990 files changed, 334353 insertions, 0 deletions
diff --git a/src/mesa/drivers/Makefile b/src/mesa/drivers/Makefile
new file mode 100644
index 0000000000..c5998413e8
--- /dev/null
+++ b/src/mesa/drivers/Makefile
@@ -0,0 +1,29 @@
+# src/mesa/drivers/Makefile
+
+TOP = ../../..
+include $(TOP)/configs/current
+
+
+default:
+	@for dir in $(DRIVER_DIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE)) || exit 1; \
+		fi \
+	done
+
+
+clean:
+	@for dir in $(DRIVER_DIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE) clean) || exit 1; \
+		fi \
+	done
+
+
+install:
+	@for dir in $(DRIVER_DIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE) install) || exit 1; \
+		fi \
+	done
+
diff --git a/src/mesa/drivers/beos/GLView.cpp b/src/mesa/drivers/beos/GLView.cpp
new file mode 100644
index 0000000000..a029f6b200
--- /dev/null
+++ b/src/mesa/drivers/beos/GLView.cpp
@@ -0,0 +1,1573 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.1
+ * 
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <assert.h>
+#include <stdio.h>
+
+extern "C" {
+
+#include "glheader.h"
+#include "version.h"
+#include "buffers.h"
+#include "bufferobj.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texobj.h"
+#include "teximage.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast/s_context.h"
+#include "swrast/s_depth.h"
+#include "swrast/s_lines.h"
+#include "swrast/s_triangle.h"
+#include "swrast/s_trispan.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+}	// extern "C"
+
+#include <interface/Screen.h>
+#include <GLView.h>
+
+// BeOS component ordering for B_RGBA32 bitmap format
+#if B_HOST_IS_LENDIAN
+	#define BE_RCOMP 2
+	#define BE_GCOMP 1
+	#define BE_BCOMP 0
+	#define BE_ACOMP 3
+
+	#define PACK_B_RGBA32(color) (color[BCOMP] | (color[GCOMP] << 8) | \
+							(color[RCOMP] << 16) | (color[ACOMP] << 24))
+
+	#define PACK_B_RGB32(color) (color[BCOMP] | (color[GCOMP] << 8) | \
+  							(color[RCOMP] << 16) | 0xFF000000)
+#else
+	// Big Endian B_RGBA32 bitmap format
+	#define BE_RCOMP 1
+	#define BE_GCOMP 2
+	#define BE_BCOMP 3
+	#define BE_ACOMP 0
+
+	#define PACK_B_RGBA32(color) (color[ACOMP] | (color[RCOMP] << 8) | \
+							(color[GCOMP] << 16) | (color[BCOMP] << 24))
+
+	#define PACK_B_RGB32(color) ((color[RCOMP] << 8) | (color[GCOMP] << 16) | \
+  							(color[BCOMP] << 24) | 0xFF000000)
+#endif
+
+#define FLIP(coord) (LIBGGI_MODE(ggi_ctx->ggi_visual)->visible.y-(coord) - 1) 
+
+const char * color_space_name(color_space space);
+
+//
+// This object hangs off of the BGLView object.  We have to use
+// Be's BGLView class as-is to maintain binary compatibility (we
+// can't add new members to it).  Instead we just put all our data
+// in this class and use BGLVIew::m_gc to point to it.
+//
+class MesaDriver
+{
+friend class BGLView;
+public:
+	MesaDriver();
+	~MesaDriver();
+	
+	void 		Init(BGLView * bglview, GLcontext * c, GLvisual * v, GLframebuffer * b);
+
+	void 		LockGL();
+	void 		UnlockGL();
+	void 		SwapBuffers() const;
+	status_t 	CopyPixelsOut(BPoint source, BBitmap *dest);
+	status_t 	CopyPixelsIn(BBitmap *source, BPoint dest);
+
+	void CopySubBuffer(GLint x, GLint y, GLuint width, GLuint height) const;
+	void Draw(BRect updateRect) const;
+
+private:
+	MesaDriver(const MesaDriver &rhs);  // copy constructor illegal
+	MesaDriver &operator=(const MesaDriver &rhs);  // assignment oper. illegal
+
+	GLcontext * 	m_glcontext;
+	GLvisual * 		m_glvisual;
+	GLframebuffer *	m_glframebuffer;
+
+	BGLView *		m_bglview;
+	BBitmap *		m_bitmap;
+
+	GLchan 			m_clear_color[4];  // buffer clear color
+	GLuint 			m_clear_index;      // buffer clear color index
+	GLint 			m_bottom;           // used for flipping Y coords
+	GLuint 			m_width;
+	GLuint			m_height;
+	
+   // Mesa Device Driver callback functions
+   static void 		UpdateState(GLcontext *ctx, GLuint new_state);
+   static void 		ClearIndex(GLcontext *ctx, GLuint index);
+   static void 		ClearColor(GLcontext *ctx, const GLfloat color[4]);
+   static void 		Clear(GLcontext *ctx, GLbitfield mask,
+                                GLboolean all, GLint x, GLint y,
+                                GLint width, GLint height);
+   static void 		ClearFront(GLcontext *ctx, GLboolean all, GLint x, GLint y,
+                          GLint width, GLint height);
+   static void 		ClearBack(GLcontext *ctx, GLboolean all, GLint x, GLint y,
+                         GLint width, GLint height);
+   static void 		Index(GLcontext *ctx, GLuint index);
+   static void 		Color(GLcontext *ctx, GLubyte r, GLubyte g,
+                     GLubyte b, GLubyte a);
+   static void 		SetBuffer(GLcontext *ctx, GLframebuffer *colorBuffer,
+                             GLenum mode);
+   static void 		GetBufferSize(GLframebuffer * framebuffer, GLuint *width,
+                             GLuint *height);
+   static void		Error(GLcontext *ctx);
+   static const GLubyte *	GetString(GLcontext *ctx, GLenum name);
+   static void          Viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h);
+
+   // Front-buffer functions
+   static void 		WriteRGBASpanFront(const GLcontext *ctx, GLuint n,
+                                  GLint x, GLint y,
+                                  CONST GLubyte rgba[][4],
+                                  const GLubyte mask[]);
+   static void 		WriteRGBSpanFront(const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 CONST GLubyte rgba[][3],
+                                 const GLubyte mask[]);
+   static void 		WriteMonoRGBASpanFront(const GLcontext *ctx, GLuint n,
+                                      GLint x, GLint y,
+                                      const GLchan color[4],
+                                      const GLubyte mask[]);
+   static void 		WriteRGBAPixelsFront(const GLcontext *ctx, GLuint n,
+                                    const GLint x[], const GLint y[],
+                                    CONST GLubyte rgba[][4],
+                                    const GLubyte mask[]);
+   static void 		WriteMonoRGBAPixelsFront(const GLcontext *ctx, GLuint n,
+                                        const GLint x[], const GLint y[],
+                                        const GLchan color[4],
+                                        const GLubyte mask[]);
+   static void 		WriteCI32SpanFront(const GLcontext *ctx, GLuint n,
+                                  GLint x, GLint y,
+                                  const GLuint index[], const GLubyte mask[]);
+   static void 		WriteCI8SpanFront(const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 const GLubyte index[], const GLubyte mask[]);
+   static void 		WriteMonoCISpanFront(const GLcontext *ctx, GLuint n,
+                                    GLint x, GLint y,
+                                    GLuint colorIndex, const GLubyte mask[]);
+   static void 		WriteCI32PixelsFront(const GLcontext *ctx,
+                                    GLuint n, const GLint x[], const GLint y[],
+                                    const GLuint index[], const GLubyte mask[]);
+   static void 		WriteMonoCIPixelsFront(const GLcontext *ctx, GLuint n,
+                                      const GLint x[], const GLint y[],
+                                      GLuint colorIndex, const GLubyte mask[]);
+   static void 		ReadCI32SpanFront(const GLcontext *ctx,
+                                 GLuint n, GLint x, GLint y, GLuint index[]);
+   static void 		ReadRGBASpanFront(const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 GLubyte rgba[][4]);
+   static void 		ReadCI32PixelsFront(const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   GLuint indx[], const GLubyte mask[]);
+   static void 		ReadRGBAPixelsFront(const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   GLubyte rgba[][4], const GLubyte mask[]);
+
+   // Back buffer functions
+   static void 		WriteRGBASpanBack(const GLcontext *ctx, GLuint n,
+                                  GLint x, GLint y,
+                                  CONST GLubyte rgba[][4],
+                                  const GLubyte mask[]);
+   static void 		WriteRGBSpanBack(const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 CONST GLubyte rgba[][3],
+                                 const GLubyte mask[]);
+   static void 		WriteMonoRGBASpanBack(const GLcontext *ctx, GLuint n,
+                                     GLint x, GLint y,
+                                     const GLchan color[4],
+                                     const GLubyte mask[]);
+   static void 		WriteRGBAPixelsBack(const GLcontext *ctx, GLuint n,
+                                   const GLint x[], const GLint y[],
+                                   CONST GLubyte rgba[][4],
+                                   const GLubyte mask[]);
+   static void 		WriteMonoRGBAPixelsBack(const GLcontext *ctx, GLuint n,
+                                       const GLint x[], const GLint y[],
+                                       const GLchan color[4],
+                                       const GLubyte mask[]);
+   static void 		WriteCI32SpanBack(const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 const GLuint index[], const GLubyte mask[]);
+   static void 		WriteCI8SpanBack(const GLcontext *ctx, GLuint n, GLint x, GLint y,
+                                const GLubyte index[], const GLubyte mask[]);
+   static void 		WriteMonoCISpanBack(const GLcontext *ctx, GLuint n,
+                                   GLint x, GLint y, GLuint colorIndex,
+                                   const GLubyte mask[]);
+   static void 		WriteCI32PixelsBack(const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   const GLuint index[], const GLubyte mask[]);
+   static void 		WriteMonoCIPixelsBack(const GLcontext *ctx,
+                                     GLuint n, const GLint x[], const GLint y[],
+                                     GLuint colorIndex, const GLubyte mask[]);
+   static void 		ReadCI32SpanBack(const GLcontext *ctx,
+                                GLuint n, GLint x, GLint y, GLuint index[]);
+   static void 		ReadRGBASpanBack(const GLcontext *ctx, GLuint n,
+                                GLint x, GLint y,
+                                GLubyte rgba[][4]);
+   static void 		ReadCI32PixelsBack(const GLcontext *ctx,
+                                  GLuint n, const GLint x[], const GLint y[],
+                                  GLuint indx[], const GLubyte mask[]);
+   static void 		ReadRGBAPixelsBack(const GLcontext *ctx,
+                                  GLuint n, const GLint x[], const GLint y[],
+                                  GLubyte rgba[][4], const GLubyte mask[]);
+
+};
+
+//------------------------------------------------------------------
+// Public interface methods
+//------------------------------------------------------------------
+
+
+//
+// Input:  rect - initial rectangle
+//         name - window name
+//         resizingMode - example: B_FOLLOW_NONE
+//         mode - usually 0 ?
+//         options - Bitwise-OR of BGL_* tokens
+//
+BGLView::BGLView(BRect rect, char *name,
+                 ulong resizingMode, ulong mode,
+                 ulong options)
+   : BView(rect, name, B_FOLLOW_ALL_SIDES, mode | B_WILL_DRAW | B_FRAME_EVENTS) //  | B_FULL_UPDATE_ON_RESIZE)
+{
+	// We don't support single buffering (yet): double buffering forced.
+	options |= BGL_DOUBLE;
+
+   const GLboolean rgbFlag = ((options & BGL_INDEX) == 0);
+   const GLboolean alphaFlag = ((options & BGL_ALPHA) == BGL_ALPHA);
+   const GLboolean dblFlag = ((options & BGL_DOUBLE) == BGL_DOUBLE);
+   const GLboolean stereoFlag = false;
+   const GLint depth = (options & BGL_DEPTH) ? 16 : 0;
+   const GLint stencil = (options & BGL_STENCIL) ? 8 : 0;
+   const GLint accum = (options & BGL_ACCUM) ? 16 : 0;
+   const GLint index = (options & BGL_INDEX) ? 32 : 0;
+   const GLint red = rgbFlag ? 8 : 0;
+   const GLint green = rgbFlag ? 8 : 0;
+   const GLint blue = rgbFlag ? 8 : 0;
+   const GLint alpha = alphaFlag ? 8 : 0;
+
+	m_options = options | BGL_INDIRECT;
+
+   struct dd_function_table functions;
+ 
+   if (!rgbFlag) {
+      fprintf(stderr, "Mesa Warning: color index mode not supported\n");
+   }
+
+   // Allocate auxiliary data object
+   MesaDriver * md = new MesaDriver();
+
+   // examine option flags and create gl_context struct
+   GLvisual * visual = _mesa_create_visual( dblFlag,
+                                            stereoFlag,
+                                            red, green, blue, alpha,
+                                            depth,
+                                            stencil,
+                                            accum, accum, accum, accum,
+                                            1
+                                            );
+
+	// Initialize device driver function table
+	_mesa_init_driver_functions(&functions);
+
+	functions.GetString 	= md->GetString;
+	functions.UpdateState 	= md->UpdateState;
+	functions.GetBufferSize = md->GetBufferSize;
+	functions.Clear 		= md->Clear;
+	functions.ClearIndex 	= md->ClearIndex;
+	functions.ClearColor 	= md->ClearColor;
+	functions.Error			= md->Error;
+        functions.Viewport      = md->Viewport;
+
+	// create core context
+	GLcontext *ctx = _mesa_create_context(visual, NULL, &functions, md);
+	if (! ctx) {
+         _mesa_destroy_visual(visual);
+         delete md;
+         return;
+      }
+   _mesa_enable_sw_extensions(ctx);
+   _mesa_enable_1_3_extensions(ctx);
+   _mesa_enable_1_4_extensions(ctx);
+   _mesa_enable_1_5_extensions(ctx);
+
+
+   // create core framebuffer
+   GLframebuffer * buffer = _mesa_create_framebuffer(visual,
+                                              depth > 0 ? GL_TRUE : GL_FALSE,
+                                              stencil > 0 ? GL_TRUE: GL_FALSE,
+                                              accum > 0 ? GL_TRUE : GL_FALSE,
+                                              alphaFlag
+                                              );
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext(ctx);
+   _vbo_CreateContext(ctx);
+   _tnl_CreateContext(ctx);
+   _swsetup_CreateContext(ctx);
+   _swsetup_Wakeup(ctx);
+
+   md->Init(this, ctx, visual, buffer );
+
+   // Hook aux data into BGLView object
+   m_gc = md;
+
+   // some stupid applications (Quake2) don't even think about calling LockGL()
+   // before using glGetString and friends... so make sure there is at least a
+   // valid context.
+   if (!_mesa_get_current_context()) {
+      LockGL();
+      // not needed, we don't have a looper yet: UnlockLooper();
+   }
+
+}
+
+
+BGLView::~BGLView()
+{
+   // printf("BGLView destructor\n");
+   MesaDriver * md = (MesaDriver *) m_gc;
+   assert(md);
+   delete md;
+}
+
+void BGLView::LockGL()
+{
+   MesaDriver * md = (MesaDriver *) m_gc;
+   assert(md);
+   md->LockGL();
+}
+
+void BGLView::UnlockGL()
+{
+   MesaDriver * md = (MesaDriver *) m_gc;
+   assert(md);
+   md->UnlockGL();
+}
+
+void BGLView::SwapBuffers()
+{
+	SwapBuffers(false);
+}
+
+void BGLView::SwapBuffers(bool vSync)
+{
+	MesaDriver * md = (MesaDriver *) m_gc;
+	assert(md);
+	md->SwapBuffers();
+
+	if (vSync) {
+		BScreen screen(Window());
+		screen.WaitForRetrace();
+	}
+}
+
+
+#if 0
+void BGLView::CopySubBufferMESA(GLint x, GLint y, GLuint width, GLuint height)
+{
+   MesaDriver * md = (MesaDriver *) m_gc;
+   assert(md);
+   md->CopySubBuffer(x, y, width, height);
+}
+#endif
+
+BView *	BGLView::EmbeddedView()
+{
+	return NULL;
+}
+
+status_t BGLView::CopyPixelsOut(BPoint source, BBitmap *dest)
+{
+	if (! dest || ! dest->Bounds().IsValid())
+		return B_BAD_VALUE;
+
+	MesaDriver * md = (MesaDriver *) m_gc;
+	assert(md);
+	return md->CopyPixelsOut(source, dest);
+}
+
+status_t BGLView::CopyPixelsIn(BBitmap *source, BPoint dest)
+{
+	if (! source || ! source->Bounds().IsValid())
+		return B_BAD_VALUE;
+
+	MesaDriver * md = (MesaDriver *) m_gc;
+	assert(md);
+	return md->CopyPixelsIn(source, dest);
+}
+
+
+void BGLView::ErrorCallback(unsigned long errorCode) // Mesa's GLenum is not ulong but uint!
+{
+	char msg[32];
+	sprintf(msg, "GL: Error code $%04lx.", errorCode);
+	// debugger(msg);
+	fprintf(stderr, "%s\n", msg);
+	return;
+}
+
+void BGLView::Draw(BRect updateRect)
+{
+   // printf("BGLView::Draw()\n");
+   MesaDriver * md = (MesaDriver *) m_gc;
+   assert(md);
+   md->Draw(updateRect);
+}
+
+void BGLView::AttachedToWindow()
+{
+   BView::AttachedToWindow();
+
+   // don't paint window background white when resized
+   SetViewColor(B_TRANSPARENT_32_BIT);
+}
+
+void BGLView::AllAttached()
+{
+   BView::AllAttached();
+//   printf("BGLView AllAttached\n");
+}
+
+void BGLView::DetachedFromWindow()
+{
+   BView::DetachedFromWindow();
+}
+
+void BGLView::AllDetached()
+{
+   BView::AllDetached();
+//   printf("BGLView AllDetached");
+}
+
+void BGLView::FrameResized(float width, float height)
+{
+   return BView::FrameResized(width, height);
+}
+
+status_t BGLView::Perform(perform_code d, void *arg)
+{
+   return BView::Perform(d, arg);
+}
+
+
+status_t BGLView::Archive(BMessage *data, bool deep) const
+{
+   return BView::Archive(data, deep);
+}
+
+void BGLView::MessageReceived(BMessage *msg)
+{
+   BView::MessageReceived(msg);
+}
+
+void BGLView::SetResizingMode(uint32 mode)
+{
+   BView::SetResizingMode(mode);
+}
+
+void BGLView::Show()
+{
+   BView::Show();
+}
+
+void BGLView::Hide()
+{
+   BView::Hide();
+}
+
+BHandler *BGLView::ResolveSpecifier(BMessage *msg, int32 index,
+                                    BMessage *specifier, int32 form,
+                                    const char *property)
+{
+   return BView::ResolveSpecifier(msg, index, specifier, form, property);
+}
+
+status_t BGLView::GetSupportedSuites(BMessage *data)
+{
+   return BView::GetSupportedSuites(data);
+}
+
+void BGLView::DirectConnected( direct_buffer_info *info )
+{
+#if 0
+	if (! m_direct_connected && m_direct_connection_disabled) 
+		return; 
+
+	direct_info_locker->Lock(); 
+	switch(info->buffer_state & B_DIRECT_MODE_MASK) { 
+	case B_DIRECT_START: 
+		m_direct_connected = true;
+	case B_DIRECT_MODIFY: 
+		// Get clipping information 
+		if (m_clip_list)
+			free(m_clip_list); 
+		m_clip_list_count = info->clip_list_count; 
+		m_clip_list = (clipping_rect *) malloc(m_clip_list_count*sizeof(clipping_rect)); 
+		if (m_clip_list) { 
+			memcpy(m_clip_list, info->clip_list, m_clip_list_count*sizeof(clipping_rect));
+			fBits = (uint8 *) info->bits; 
+			fRowBytes = info->bytes_per_row; 
+			fFormat = info->pixel_format; 
+			fBounds = info->window_bounds; 
+			fDirty = true; 
+		} 
+		break; 
+	case B_DIRECT_STOP: 
+		fConnected = false; 
+		break; 
+	} 
+	direct_info_locker->Unlock(); 
+#endif
+}
+
+void BGLView::EnableDirectMode( bool enabled )
+{
+   // TODO
+}
+
+
+//---- virtual reserved methods ----------
+
+void BGLView::_ReservedGLView1() {}
+void BGLView::_ReservedGLView2() {}
+void BGLView::_ReservedGLView3() {}
+void BGLView::_ReservedGLView4() {}
+void BGLView::_ReservedGLView5() {}
+void BGLView::_ReservedGLView6() {}
+void BGLView::_ReservedGLView7() {}
+void BGLView::_ReservedGLView8() {}
+
+#if 0
+// Not implemented!!!
+
+BGLView::BGLView(const BGLView &v)
+	: BView(v)
+{
+   // XXX not sure how this should work
+   printf("Warning BGLView::copy constructor not implemented\n");
+}
+
+BGLView &BGLView::operator=(const BGLView &v)
+{
+   printf("Warning BGLView::operator= not implemented\n");
+	return *this;
+}
+#endif
+
+void BGLView::dither_front()
+{
+   // no-op
+}
+
+bool BGLView::confirm_dither()
+{
+   // no-op
+   return false;
+}
+
+void BGLView::draw(BRect r)
+{
+   // XXX no-op ???
+}
+
+/* Direct Window stuff */
+void BGLView::drawScanline( int x1, int x2, int y, void *data )
+{
+   // no-op
+}
+
+void BGLView::scanlineHandler(struct rasStateRec *state,
+                              GLint x1, GLint x2)
+{
+   // no-op
+}
+
+void BGLView::lock_draw()
+{
+   // no-op
+}
+
+void BGLView::unlock_draw()
+{
+   // no-op
+}
+
+bool BGLView::validateView()
+{
+   // no-op
+   return true;
+}
+
+// #pragma mark -
+
+MesaDriver::MesaDriver()
+{
+   m_glcontext 		= NULL;
+   m_glvisual		= NULL;
+   m_glframebuffer 	= NULL;
+   m_bglview 		= NULL;
+   m_bitmap 		= NULL;
+
+   m_clear_color[BE_RCOMP] = 0;
+   m_clear_color[BE_GCOMP] = 0;
+   m_clear_color[BE_BCOMP] = 0;
+   m_clear_color[BE_ACOMP] = 0;
+
+   m_clear_index = 0;
+}
+
+
+MesaDriver::~MesaDriver()
+{
+   _mesa_destroy_visual(m_glvisual);
+   _mesa_destroy_framebuffer(m_glframebuffer);
+   _mesa_destroy_context(m_glcontext);
+   
+   delete m_bitmap;
+}
+
+
+void MesaDriver::Init(BGLView * bglview, GLcontext * ctx, GLvisual * visual, GLframebuffer * framebuffer)
+{
+	m_bglview 		= bglview;
+	m_glcontext 	= ctx;
+	m_glvisual 		= visual;
+	m_glframebuffer = framebuffer;
+
+	MesaDriver * md = (MesaDriver *) ctx->DriverCtx;
+	struct swrast_device_driver * swdd = _swrast_GetDeviceDriverReference( ctx );
+	TNLcontext * tnl = TNL_CONTEXT(ctx);
+
+	assert(md->m_glcontext == ctx );
+	assert(tnl);
+	assert(swdd);
+
+	// Use default TCL pipeline
+	tnl->Driver.RunPipeline = _tnl_run_pipeline;
+ 
+	swdd->SetBuffer = this->SetBuffer;
+}
+
+
+void MesaDriver::LockGL()
+{
+	m_bglview->LockLooper();
+
+   UpdateState(m_glcontext, 0);
+   _mesa_make_current(m_glcontext, m_glframebuffer);
+}
+
+
+void MesaDriver::UnlockGL()
+{
+	if (m_bglview->Looper()->IsLocked())
+		m_bglview->UnlockLooper();
+   // Could call _mesa_make_current(NULL, NULL) but it would just
+   // hinder performance
+}
+
+
+void MesaDriver::SwapBuffers() const
+{
+    _mesa_notifySwapBuffers(m_glcontext);
+
+	if (m_bitmap) {
+		m_bglview->LockLooper();
+		m_bglview->DrawBitmap(m_bitmap);
+		m_bglview->UnlockLooper();
+	};
+}
+
+
+void MesaDriver::CopySubBuffer(GLint x, GLint y, GLuint width, GLuint height) const
+{
+   if (m_bitmap) {
+      // Source bitmap and view's bitmap are same size.
+      // Source and dest rectangle are the same.
+      // Note (x,y) = (0,0) is the lower-left corner, have to flip Y
+      BRect srcAndDest;
+      srcAndDest.left = x;
+      srcAndDest.right = x + width - 1;
+      srcAndDest.bottom = m_bottom - y;
+      srcAndDest.top = srcAndDest.bottom - height + 1;
+      m_bglview->DrawBitmap(m_bitmap, srcAndDest, srcAndDest);
+   }
+}
+
+status_t MesaDriver::CopyPixelsOut(BPoint location, BBitmap *bitmap)
+{
+	color_space scs = m_bitmap->ColorSpace();
+	color_space dcs = bitmap->ColorSpace();
+
+	if (scs != dcs && (scs != B_RGBA32 || dcs != B_RGB32)) {
+		printf("CopyPixelsOut(): incompatible color space: %s != %s\n",
+			color_space_name(scs),
+			color_space_name(dcs));
+		return B_BAD_TYPE;
+	}
+	
+	// debugger("CopyPixelsOut()");
+	
+	BRect sr = m_bitmap->Bounds();
+	BRect dr = bitmap->Bounds();
+
+	sr = sr & dr.OffsetBySelf(location);
+	dr = sr.OffsetByCopy(-location.x, -location.y); 
+	
+	uint8 *ps = (uint8 *) m_bitmap->Bits();
+	uint8 *pd = (uint8 *) bitmap->Bits();
+	uint32 *s, *d;
+	uint32 y;
+	for (y = (uint32) sr.top; y <= (uint32) sr.bottom; y++) {
+		s = (uint32 *) (ps + y * m_bitmap->BytesPerRow());
+		s += (uint32) sr.left;
+		
+		d = (uint32 *) (pd + (y + (uint32) (dr.top - sr.top)) * bitmap->BytesPerRow());
+		d += (uint32) dr.left;
+		
+		memcpy(d, s, dr.IntegerWidth() * 4);
+	}
+	return B_OK;
+}
+
+status_t MesaDriver::CopyPixelsIn(BBitmap *bitmap, BPoint location)
+{
+	color_space scs = bitmap->ColorSpace();
+	color_space dcs = m_bitmap->ColorSpace();
+
+	if (scs != dcs && (dcs != B_RGBA32 || scs != B_RGB32)) {
+		printf("CopyPixelsIn(): incompatible color space: %s != %s\n",
+			color_space_name(scs),
+			color_space_name(dcs));
+		return B_BAD_TYPE;
+	}
+	
+	// debugger("CopyPixelsIn()");
+
+	BRect sr = bitmap->Bounds();
+	BRect dr = m_bitmap->Bounds();
+
+	sr = sr & dr.OffsetBySelf(location);
+	dr = sr.OffsetByCopy(-location.x, -location.y); 
+	
+	uint8 *ps = (uint8 *) bitmap->Bits();
+	uint8 *pd = (uint8 *) m_bitmap->Bits();
+	uint32 *s, *d;
+	uint32 y;
+	for (y = (uint32) sr.top; y <= (uint32) sr.bottom; y++) {
+		s = (uint32 *) (ps + y * bitmap->BytesPerRow());
+		s += (uint32) sr.left;
+		
+		d = (uint32 *) (pd + (y + (uint32) (dr.top - sr.top)) * m_bitmap->BytesPerRow());
+		d += (uint32) dr.left;
+		
+		memcpy(d, s, dr.IntegerWidth() * 4);
+	}
+	return B_OK;
+}
+
+
+void MesaDriver::Draw(BRect updateRect) const
+{
+   if (m_bitmap)
+      m_bglview->DrawBitmap(m_bitmap, updateRect, updateRect);
+}
+
+
+void MesaDriver::Error(GLcontext *ctx)
+{
+	MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+	if (md && md->m_bglview)
+		md->m_bglview->ErrorCallback((unsigned long) ctx->ErrorValue);
+}
+
+void MesaDriver::UpdateState( GLcontext *ctx, GLuint new_state )
+{
+	struct swrast_device_driver *	swdd = _swrast_GetDeviceDriverReference( ctx );
+
+	_swrast_InvalidateState( ctx, new_state );
+	_swsetup_InvalidateState( ctx, new_state );
+	_vbo_InvalidateState( ctx, new_state );
+	_tnl_InvalidateState( ctx, new_state );
+
+	if (ctx->Color.DrawBuffer[0] == GL_FRONT) {
+      /* read/write front buffer */
+      swdd->WriteRGBASpan = MesaDriver::WriteRGBASpanFront;
+      swdd->WriteRGBSpan = MesaDriver::WriteRGBSpanFront;
+      swdd->WriteRGBAPixels = MesaDriver::WriteRGBAPixelsFront;
+      swdd->WriteMonoRGBASpan = MesaDriver::WriteMonoRGBASpanFront;
+      swdd->WriteMonoRGBAPixels = MesaDriver::WriteMonoRGBAPixelsFront;
+      swdd->WriteCI32Span = MesaDriver::WriteCI32SpanFront;
+      swdd->WriteCI8Span = MesaDriver::WriteCI8SpanFront;
+      swdd->WriteMonoCISpan = MesaDriver::WriteMonoCISpanFront;
+      swdd->WriteCI32Pixels = MesaDriver::WriteCI32PixelsFront;
+      swdd->WriteMonoCIPixels = MesaDriver::WriteMonoCIPixelsFront;
+      swdd->ReadRGBASpan = MesaDriver::ReadRGBASpanFront;
+      swdd->ReadRGBAPixels = MesaDriver::ReadRGBAPixelsFront;
+      swdd->ReadCI32Span = MesaDriver::ReadCI32SpanFront;
+      swdd->ReadCI32Pixels = MesaDriver::ReadCI32PixelsFront;
+   }
+   else {
+      /* read/write back buffer */
+      swdd->WriteRGBASpan = MesaDriver::WriteRGBASpanBack;
+      swdd->WriteRGBSpan = MesaDriver::WriteRGBSpanBack;
+      swdd->WriteRGBAPixels = MesaDriver::WriteRGBAPixelsBack;
+      swdd->WriteMonoRGBASpan = MesaDriver::WriteMonoRGBASpanBack;
+      swdd->WriteMonoRGBAPixels = MesaDriver::WriteMonoRGBAPixelsBack;
+      swdd->WriteCI32Span = MesaDriver::WriteCI32SpanBack;
+      swdd->WriteCI8Span = MesaDriver::WriteCI8SpanBack;
+      swdd->WriteMonoCISpan = MesaDriver::WriteMonoCISpanBack;
+      swdd->WriteCI32Pixels = MesaDriver::WriteCI32PixelsBack;
+      swdd->WriteMonoCIPixels = MesaDriver::WriteMonoCIPixelsBack;
+      swdd->ReadRGBASpan = MesaDriver::ReadRGBASpanBack;
+      swdd->ReadRGBAPixels = MesaDriver::ReadRGBAPixelsBack;
+      swdd->ReadCI32Span = MesaDriver::ReadCI32SpanBack;
+      swdd->ReadCI32Pixels = MesaDriver::ReadCI32PixelsBack;
+    }
+}
+
+
+void MesaDriver::ClearIndex(GLcontext *ctx, GLuint index)
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   md->m_clear_index = index;
+}
+
+
+void MesaDriver::ClearColor(GLcontext *ctx, const GLfloat color[4])
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   CLAMPED_FLOAT_TO_CHAN(md->m_clear_color[BE_RCOMP], color[0]);
+   CLAMPED_FLOAT_TO_CHAN(md->m_clear_color[BE_GCOMP], color[1]);
+   CLAMPED_FLOAT_TO_CHAN(md->m_clear_color[BE_BCOMP], color[2]);
+   CLAMPED_FLOAT_TO_CHAN(md->m_clear_color[BE_ACOMP], color[3]); 
+   assert(md->m_bglview);
+}
+
+
+void MesaDriver::Clear(GLcontext *ctx, GLbitfield mask,
+                               GLboolean all, GLint x, GLint y,
+                               GLint width, GLint height)
+{
+   if (mask & DD_FRONT_LEFT_BIT)
+		ClearFront(ctx, all, x, y, width, height);
+   if (mask & DD_BACK_LEFT_BIT)
+		ClearBack(ctx, all, x, y, width, height);
+
+	mask &= ~(DD_FRONT_LEFT_BIT | DD_BACK_LEFT_BIT);
+	if (mask)
+		_swrast_Clear( ctx, mask, all, x, y, width, height );
+
+   return;
+}
+
+
+void MesaDriver::ClearFront(GLcontext *ctx,
+                         GLboolean all, GLint x, GLint y,
+                         GLint width, GLint height)
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+
+   bglview->SetHighColor(md->m_clear_color[BE_RCOMP],
+                         md->m_clear_color[BE_GCOMP],
+                         md->m_clear_color[BE_BCOMP],
+                         md->m_clear_color[BE_ACOMP]);
+   bglview->SetLowColor(md->m_clear_color[BE_RCOMP],
+                        md->m_clear_color[BE_GCOMP],
+                        md->m_clear_color[BE_BCOMP],
+                        md->m_clear_color[BE_ACOMP]);
+   if (all) {
+      BRect b = bglview->Bounds();
+      bglview->FillRect(b);
+   }
+   else {
+      // XXX untested
+      BRect b;
+      b.left = x;
+      b.right = x + width;
+      b.bottom = md->m_height - y - 1;
+      b.top = b.bottom - height;
+      bglview->FillRect(b);
+   }
+
+   // restore drawing color
+#if 0
+   bglview->SetHighColor(md->mColor[BE_RCOMP],
+                         md->mColor[BE_GCOMP],
+                         md->mColor[BE_BCOMP],
+                         md->mColor[BE_ACOMP]);
+   bglview->SetLowColor(md->mColor[BE_RCOMP],
+                        md->mColor[BE_GCOMP],
+                        md->mColor[BE_BCOMP],
+                        md->mColor[BE_ACOMP]);
+#endif
+}
+
+
+void MesaDriver::ClearBack(GLcontext *ctx,
+                        GLboolean all, GLint x, GLint y,
+                        GLint width, GLint height)
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+   BBitmap *bitmap = md->m_bitmap;
+   assert(bitmap);
+   GLuint *start = (GLuint *) bitmap->Bits();
+   const GLuint *clearPixelPtr = (const GLuint *) md->m_clear_color;
+   const GLuint clearPixel = B_LENDIAN_TO_HOST_INT32(*clearPixelPtr);
+
+   if (all) {
+      const int numPixels = md->m_width * md->m_height;
+      if (clearPixel == 0) {
+         memset(start, 0, numPixels * 4);
+      }
+      else {
+         for (int i = 0; i < numPixels; i++) {
+             start[i] = clearPixel;
+         }
+      }
+   }
+   else {
+      // XXX untested
+      start += y * md->m_width + x;
+      for (int i = 0; i < height; i++) {
+         for (int j = 0; j < width; j++) {
+            start[j] = clearPixel;
+         }
+         start += md->m_width;
+      }
+   }
+}
+
+
+void MesaDriver::SetBuffer(GLcontext *ctx, GLframebuffer *buffer,
+                            GLenum mode)
+{
+   /* TODO */
+	(void) ctx;
+	(void) buffer;
+	(void) mode;
+}
+
+void MesaDriver::GetBufferSize(GLframebuffer * framebuffer, GLuint *width,
+                            GLuint *height)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (!ctx)
+		return;
+
+   MesaDriver * md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+
+   BRect b = bglview->Bounds();
+   *width = (GLuint) b.IntegerWidth() + 1; // (b.right - b.left + 1);
+   *height = (GLuint) b.IntegerHeight() + 1; // (b.bottom - b.top + 1);
+   md->m_bottom = (GLint) b.bottom;
+
+   if (ctx->Visual.doubleBufferMode) {
+      if (*width != md->m_width || *height != md->m_height) {
+         // allocate new size of back buffer bitmap
+         if (md->m_bitmap)
+            delete md->m_bitmap;
+         BRect rect(0.0, 0.0, *width - 1, *height - 1);
+         md->m_bitmap = new BBitmap(rect, B_RGBA32);
+      }
+   }
+   else
+   {
+      md->m_bitmap = NULL;
+   }
+
+   md->m_width = *width;
+   md->m_height = *height;
+}
+
+
+void MesaDriver::Viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   /* poll for window size change and realloc software Z/stencil/etc if needed */
+   _mesa_ResizeBuffersMESA();
+}
+
+
+const GLubyte *MesaDriver::GetString(GLcontext *ctx, GLenum name)
+{
+   switch (name) {
+      case GL_RENDERER:
+         return (const GLubyte *) "Mesa " MESA_VERSION_STRING " powered BGLView (software)";
+      default:
+         // Let core library handle all other cases
+         return NULL;
+   }
+}
+
+
+// Plot a pixel.  (0,0) is upper-left corner
+// This is only used when drawing to the front buffer.
+inline void Plot(BGLView *bglview, int x, int y)
+{
+   // XXX There's got to be a better way!
+   BPoint p(x, y), q(x+1, y);
+   bglview->StrokeLine(p, q);
+}
+
+
+void MesaDriver::WriteRGBASpanFront(const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 CONST GLubyte rgba[][4],
+                                 const GLubyte mask[])
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+   int flippedY = md->m_bottom - y;
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+            bglview->SetHighColor(rgba[i][0], rgba[i][1], rgba[i][2], rgba[i][3]);
+            Plot(bglview, x++, flippedY);
+         }
+      }
+   }
+   else {
+      for (GLuint i = 0; i < n; i++) {
+         bglview->SetHighColor(rgba[i][0], rgba[i][1], rgba[i][2], rgba[i][3]);
+         Plot(bglview, x++, flippedY);
+      }
+   }
+}
+
+void MesaDriver::WriteRGBSpanFront(const GLcontext *ctx, GLuint n,
+                                GLint x, GLint y,
+                                CONST GLubyte rgba[][3],
+                                const GLubyte mask[])
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+   int flippedY = md->m_bottom - y;
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+            bglview->SetHighColor(rgba[i][0], rgba[i][1], rgba[i][2]);
+            Plot(bglview, x++, flippedY);
+         }
+      }
+   }
+   else {
+      for (GLuint i = 0; i < n; i++) {
+         bglview->SetHighColor(rgba[i][0], rgba[i][1], rgba[i][2]);
+         Plot(bglview, x++, flippedY);
+      }
+   }
+}
+
+void MesaDriver::WriteMonoRGBASpanFront(const GLcontext *ctx, GLuint n,
+                                     GLint x, GLint y,
+                                     const GLchan color[4],
+                                     const GLubyte mask[])
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+   int flippedY = md->m_bottom - y;
+   bglview->SetHighColor(color[RCOMP], color[GCOMP], color[BCOMP]);
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+            Plot(bglview, x++, flippedY);
+         }
+      }
+   }
+   else {
+      for (GLuint i = 0; i < n; i++) {
+         Plot(bglview, x++, flippedY);
+      }
+   }
+}
+
+void MesaDriver::WriteRGBAPixelsFront(const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   CONST GLubyte rgba[][4],
+                                   const GLubyte mask[] )
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+            bglview->SetHighColor(rgba[i][0], rgba[i][1], rgba[i][2]);
+            Plot(bglview, x[i], md->m_bottom - y[i]);
+         }
+      }
+   }
+   else {
+      for (GLuint i = 0; i < n; i++) {
+         bglview->SetHighColor(rgba[i][0], rgba[i][1], rgba[i][2]);
+         Plot(bglview, x[i], md->m_bottom - y[i]);
+      }
+   }
+}
+
+
+void MesaDriver::WriteMonoRGBAPixelsFront(const GLcontext *ctx, GLuint n,
+                                       const GLint x[], const GLint y[],
+                                       const GLchan color[4],
+                                       const GLubyte mask[])
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BGLView *bglview = md->m_bglview;
+   assert(bglview);
+   // plot points using current color
+   bglview->SetHighColor(color[RCOMP], color[GCOMP], color[BCOMP]);
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+            Plot(bglview, x[i], md->m_bottom - y[i]);
+         }
+      }
+   }
+   else {
+      for (GLuint i = 0; i < n; i++) {
+         Plot(bglview, x[i], md->m_bottom - y[i]);
+      }
+   }
+}
+
+
+void MesaDriver::WriteCI32SpanFront( const GLcontext *ctx, GLuint n, GLint x, GLint y,
+                             const GLuint index[], const GLubyte mask[] )
+{
+ 	printf("WriteCI32SpanFront() not implemented yet!\n");
+   // TODO
+}
+
+void MesaDriver::WriteCI8SpanFront( const GLcontext *ctx, GLuint n, GLint x, GLint y,
+                            const GLubyte index[], const GLubyte mask[] )
+{
+ 	printf("WriteCI8SpanFront() not implemented yet!\n");
+   // TODO
+}
+
+void MesaDriver::WriteMonoCISpanFront( const GLcontext *ctx, GLuint n,
+                                    GLint x, GLint y,
+                                    GLuint colorIndex, const GLubyte mask[] )
+{
+ 	printf("WriteMonoCISpanFront() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::WriteCI32PixelsFront( const GLcontext *ctx, GLuint n,
+                                    const GLint x[], const GLint y[],
+                                    const GLuint index[], const GLubyte mask[] )
+{
+ 	printf("WriteCI32PixelsFront() not implemented yet!\n");
+   // TODO
+}
+
+void MesaDriver::WriteMonoCIPixelsFront( const GLcontext *ctx, GLuint n,
+                                      const GLint x[], const GLint y[],
+                                      GLuint colorIndex, const GLubyte mask[] )
+{
+ 	printf("WriteMonoCIPixelsFront() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::ReadCI32SpanFront( const GLcontext *ctx,
+                                 GLuint n, GLint x, GLint y, GLuint index[] )
+{
+ 	printf("ReadCI32SpanFront() not implemented yet!\n");
+  // TODO
+}
+
+
+void MesaDriver::ReadRGBASpanFront( const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y, GLubyte rgba[][4] )
+{
+ 	printf("ReadRGBASpanFront() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::ReadCI32PixelsFront( const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   GLuint indx[], const GLubyte mask[] )
+{
+ 	printf("ReadCI32PixelsFront() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::ReadRGBAPixelsFront( const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   GLubyte rgba[][4], const GLubyte mask[] )
+{
+ 	printf("ReadRGBAPixelsFront() not implemented yet!\n");
+   // TODO
+}
+
+
+
+
+void MesaDriver::WriteRGBASpanBack(const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 CONST GLubyte rgba[][4],
+                                 const GLubyte mask[])
+{
+	MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+	BBitmap *bitmap = md->m_bitmap;
+
+	assert(bitmap);
+
+	int row = md->m_bottom - y;
+	uint8 * ptr = (uint8 *) bitmap->Bits() + (row * bitmap->BytesPerRow()) + x * 4;
+ 	uint32 * pixel = (uint32 *) ptr;
+	
+	if (mask) {
+		while(n--) {
+			if (*mask++)
+				*pixel = PACK_B_RGBA32(rgba[0]);
+			pixel++;
+			rgba++;
+		};
+	} else {
+		while(n--) {
+			*pixel++ = PACK_B_RGBA32(rgba[0]);
+			rgba++;
+		};
+	};
+ }
+
+
+void MesaDriver::WriteRGBSpanBack(const GLcontext *ctx, GLuint n,
+                                GLint x, GLint y,
+                                CONST GLubyte rgb[][3],
+                                const GLubyte mask[])
+{
+	MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+	BBitmap *bitmap = md->m_bitmap;
+
+	assert(bitmap);
+
+	int row = md->m_bottom - y;
+	uint8 * ptr = (uint8 *) bitmap->Bits() + (row * bitmap->BytesPerRow()) + x * 4;
+ 	uint32 * pixel = (uint32 *) ptr;
+	
+	if (mask) {
+		while(n--) {
+			if (*mask++)
+				*pixel = PACK_B_RGB32(rgb[0]);
+			pixel++;
+			rgb++;
+		};
+	} else {
+		while(n--) {
+			*pixel++ = PACK_B_RGB32(rgb[0]);
+			rgb++;
+		};
+	};
+}
+
+
+
+
+void MesaDriver::WriteMonoRGBASpanBack(const GLcontext *ctx, GLuint n,
+                                    GLint x, GLint y,
+                                    const GLchan color[4], const GLubyte mask[])
+{
+	MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+	BBitmap *bitmap = md->m_bitmap;
+
+	assert(bitmap);
+
+	int row = md->m_bottom - y;
+	uint8 * ptr = (uint8 *) bitmap->Bits() + (row * bitmap->BytesPerRow()) + x * 4;
+ 	uint32 * pixel = (uint32 *) ptr;
+	uint32 pixel_color = PACK_B_RGBA32(color);
+	
+	if (mask) {
+		while(n--) {
+			if (*mask++)
+				*pixel = pixel_color;
+			pixel++;
+		};
+	} else {
+		while(n--) {
+			*pixel++ = pixel_color;
+		};
+	};
+}
+
+
+void MesaDriver::WriteRGBAPixelsBack(const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   CONST GLubyte rgba[][4],
+                                   const GLubyte mask[] )
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   BBitmap *bitmap = md->m_bitmap;
+
+	assert(bitmap);
+#if 0
+	while(n--) {
+		if (*mask++) {
+			int row = md->m_bottom - *y;
+			uint8 * pixel = (uint8 *) bitmap->Bits() + (row * bitmap->BytesPerRow()) + *x * 4;
+			*((uint32 *) pixel) = PACK_B_RGBA32(rgba[0]);
+		};
+		x++;
+		y++;
+		rgba++;
+	};
+#else
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+            GLubyte *pixel = (GLubyte *) bitmap->Bits()
+            + ((md->m_bottom - y[i]) * bitmap->BytesPerRow()) + x[i] * 4;
+            pixel[BE_RCOMP] = rgba[i][RCOMP];
+            pixel[BE_GCOMP] = rgba[i][GCOMP];
+            pixel[BE_BCOMP] = rgba[i][BCOMP];
+            pixel[BE_ACOMP] = rgba[i][ACOMP];
+         }
+      }
+   }
+   else {
+      for (GLuint i = 0; i < n; i++) {
+         GLubyte *pixel = (GLubyte *) bitmap->Bits()
+            + ((md->m_bottom - y[i]) * bitmap->BytesPerRow()) + x[i] * 4;
+         pixel[BE_RCOMP] = rgba[i][RCOMP];
+         pixel[BE_GCOMP] = rgba[i][GCOMP];
+         pixel[BE_BCOMP] = rgba[i][BCOMP];
+         pixel[BE_ACOMP] = rgba[i][ACOMP];
+      }
+   }
+#endif
+}
+
+
+void MesaDriver::WriteMonoRGBAPixelsBack(const GLcontext *ctx, GLuint n,
+                                      const GLint x[], const GLint y[],
+                                      const GLchan color[4],
+                                      const GLubyte mask[])
+{
+	MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+	BBitmap *bitmap = md->m_bitmap;
+
+	assert(bitmap);
+
+	uint32 pixel_color = PACK_B_RGBA32(color);
+#if 0	
+	while(n--) {
+		if (*mask++) {
+			int row = md->m_bottom - *y;
+			uint8 * pixel = (uint8 *) bitmap->Bits() + (row * bitmap->BytesPerRow()) + *x * 4;
+
+			*((uint32 *) pixel) = pixel_color;
+		};
+		x++;
+		y++;
+	};
+#else
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+         	GLubyte * ptr = (GLubyte *) bitmap->Bits()
+            	+ ((md->m_bottom - y[i]) * bitmap->BytesPerRow()) + x[i] * 4;
+            *((uint32 *) ptr) = pixel_color;
+         }
+      }
+   }
+   else {
+	  for (GLuint i = 0; i < n; i++) {
+       	GLubyte * ptr = (GLubyte *) bitmap->Bits()
+	           	+ ((md->m_bottom - y[i]) * bitmap->BytesPerRow()) + x[i] * 4;
+       *((uint32 *) ptr) = pixel_color;
+      }
+   }
+#endif
+}
+
+
+void MesaDriver::WriteCI32SpanBack( const GLcontext *ctx, GLuint n,
+                                 GLint x, GLint y,
+                                 const GLuint index[], const GLubyte mask[] )
+{
+ 	printf("WriteCI32SpanBack() not implemented yet!\n");
+   // TODO
+}
+
+void MesaDriver::WriteCI8SpanBack( const GLcontext *ctx, GLuint n,
+                                GLint x, GLint y,
+                                const GLubyte index[], const GLubyte mask[] )
+{
+  	printf("WriteCI8SpanBack() not implemented yet!\n");
+  // TODO
+}
+
+void MesaDriver::WriteMonoCISpanBack( const GLcontext *ctx, GLuint n,
+                                   GLint x, GLint y,
+                                   GLuint colorIndex, const GLubyte mask[] )
+{
+ 	printf("WriteMonoCISpanBack() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::WriteCI32PixelsBack( const GLcontext *ctx, GLuint n,
+                                   const GLint x[], const GLint y[],
+                                   const GLuint index[], const GLubyte mask[] )
+{
+ 	printf("WriteCI32PixelsBack() not implemented yet!\n");
+   // TODO
+}
+
+void MesaDriver::WriteMonoCIPixelsBack( const GLcontext *ctx, GLuint n,
+                                     const GLint x[], const GLint y[],
+                                     GLuint colorIndex, const GLubyte mask[] )
+{
+ 	printf("WriteMonoCIPixelsBack() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::ReadCI32SpanBack( const GLcontext *ctx,
+                                GLuint n, GLint x, GLint y, GLuint index[] )
+{
+ 	printf("ReadCI32SpanBack() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::ReadRGBASpanBack( const GLcontext *ctx, GLuint n,
+                                GLint x, GLint y, GLubyte rgba[][4] )
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   const BBitmap *bitmap = md->m_bitmap;
+   assert(bitmap);
+   int row = md->m_bottom - y;
+   const GLubyte *pixel = (GLubyte *) bitmap->Bits()
+                        + (row * bitmap->BytesPerRow()) + x * 4;
+
+   for (GLuint i = 0; i < n; i++) {
+      rgba[i][RCOMP] = pixel[BE_RCOMP];
+      rgba[i][GCOMP] = pixel[BE_GCOMP];
+      rgba[i][BCOMP] = pixel[BE_BCOMP];
+      rgba[i][ACOMP] = pixel[BE_ACOMP];
+      pixel += 4;
+   }
+}
+
+
+void MesaDriver::ReadCI32PixelsBack( const GLcontext *ctx,
+                                   GLuint n, const GLint x[], const GLint y[],
+                                   GLuint indx[], const GLubyte mask[] )
+{
+ 	printf("ReadCI32PixelsBack() not implemented yet!\n");
+   // TODO
+}
+
+
+void MesaDriver::ReadRGBAPixelsBack( const GLcontext *ctx,
+                                  GLuint n, const GLint x[], const GLint y[],
+                                  GLubyte rgba[][4], const GLubyte mask[] )
+{
+   MesaDriver *md = (MesaDriver *) ctx->DriverCtx;
+   const BBitmap *bitmap = md->m_bitmap;
+   assert(bitmap);
+
+   if (mask) {
+      for (GLuint i = 0; i < n; i++) {
+         if (mask[i]) {
+            GLubyte *pixel = (GLubyte *) bitmap->Bits()
+            + ((md->m_bottom - y[i]) * bitmap->BytesPerRow()) + x[i] * 4;
+	         rgba[i][RCOMP] = pixel[BE_RCOMP];
+    	     rgba[i][GCOMP] = pixel[BE_GCOMP];
+        	 rgba[i][BCOMP] = pixel[BE_BCOMP];
+        	 rgba[i][ACOMP] = pixel[BE_ACOMP];
+         };
+      };
+   } else {
+      for (GLuint i = 0; i < n; i++) {
+         GLubyte *pixel = (GLubyte *) bitmap->Bits()
+            + ((md->m_bottom - y[i]) * bitmap->BytesPerRow()) + x[i] * 4;
+         rgba[i][RCOMP] = pixel[BE_RCOMP];
+         rgba[i][GCOMP] = pixel[BE_GCOMP];
+         rgba[i][BCOMP] = pixel[BE_BCOMP];
+         rgba[i][ACOMP] = pixel[BE_ACOMP];
+      };
+   };
+}
+
+const char * color_space_name(color_space space)
+{
+#define C2N(a)	case a:	return #a
+
+	switch (space) {
+	C2N(B_RGB24);
+	C2N(B_RGB32);
+	C2N(B_RGBA32);
+	C2N(B_RGB32_BIG);
+	C2N(B_RGBA32_BIG);
+	C2N(B_GRAY8);
+	C2N(B_GRAY1);
+	C2N(B_RGB16);
+	C2N(B_RGB15);
+	C2N(B_RGBA15);
+	C2N(B_CMAP8);
+	default:
+		return "Unknown!";
+	};
+
+#undef C2N
+};
+
+
diff --git a/src/mesa/drivers/beos/Makefile b/src/mesa/drivers/beos/Makefile
new file mode 100644
index 0000000000..c79dd24c39
--- /dev/null
+++ b/src/mesa/drivers/beos/Makefile
@@ -0,0 +1,199 @@
+# src/mesa/drivers/beos/Makefile
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+include $(TOP)/src/mesa/sources
+
+MESA_MODULES = $(TOP)/src/mesa/mesa.a
+# To switch back to old Mesa GLU implementation (but why do
+# you want this!), uncomment this next line:
+
+# GLU_DIR	= $(TOP)/src/glu/mesa
+ifeq ($(GLU_DIR),)
+# By default, we build the SGI GLU implementation
+	GLU_DIR = $(TOP)/src/glu/sgi
+endif
+
+ifeq ($(GLU_DIR), $(TOP)/src/glu/mesa)
+	GLU_MODULES = \
+		$(GLU_DIR)/glu.o \
+		$(GLU_DIR)/mipmap.o \
+		$(GLU_DIR)/nurbs.o \
+		$(GLU_DIR)/nurbscrv.o \
+		$(GLU_DIR)/nurbssrf.o \
+		$(GLU_DIR)/nurbsutl.o \
+		$(GLU_DIR)/polytest.o \
+		$(GLU_DIR)/project.o \
+		$(GLU_DIR)/quadric.o \
+		$(GLU_DIR)/tess.o \
+		$(GLU_DIR)/tesselat.o
+else
+	GLU_MODULES = \
+		$(GLU_DIR)/libutil/error.o \
+		$(GLU_DIR)/libutil/glue.o \
+		$(GLU_DIR)/libutil/mipmap.o	\
+		$(GLU_DIR)/libutil/project.o \
+		$(GLU_DIR)/libutil/quad.o \
+		$(GLU_DIR)/libutil/registry.o \
+		$(GLU_DIR)/libtess/dict.o \
+		$(GLU_DIR)/libtess/geom.o \
+		$(GLU_DIR)/libtess/memalloc.o \
+		$(GLU_DIR)/libtess/mesh.o \
+		$(GLU_DIR)/libtess/normal.o	\
+		$(GLU_DIR)/libtess/priorityq.o \
+		$(GLU_DIR)/libtess/render.o	\
+		$(GLU_DIR)/libtess/sweep.o \
+		$(GLU_DIR)/libtess/tess.o \
+		$(GLU_DIR)/libtess/tessmono.o \
+		$(GLU_DIR)/libnurbs/interface/bezierEval.o		\
+		$(GLU_DIR)/libnurbs/interface/bezierPatch.o		\
+		$(GLU_DIR)/libnurbs/interface/bezierPatchMesh.o		\
+		$(GLU_DIR)/libnurbs/interface/glcurveval.o		\
+		$(GLU_DIR)/libnurbs/interface/glinterface.o		\
+		$(GLU_DIR)/libnurbs/interface/glrenderer.o		\
+		$(GLU_DIR)/libnurbs/interface/glsurfeval.o		\
+		$(GLU_DIR)/libnurbs/interface/incurveeval.o		\
+		$(GLU_DIR)/libnurbs/interface/insurfeval.o		\
+		$(GLU_DIR)/libnurbs/internals/arc.o			\
+		$(GLU_DIR)/libnurbs/internals/arcsorter.o			\
+		$(GLU_DIR)/libnurbs/internals/arctess.o			\
+		$(GLU_DIR)/libnurbs/internals/backend.o			\
+		$(GLU_DIR)/libnurbs/internals/basiccrveval.o		\
+		$(GLU_DIR)/libnurbs/internals/basicsurfeval.o		\
+		$(GLU_DIR)/libnurbs/internals/bin.o			\
+		$(GLU_DIR)/libnurbs/internals/bufpool.o			\
+		$(GLU_DIR)/libnurbs/internals/cachingeval.o		\
+		$(GLU_DIR)/libnurbs/internals/ccw.o			\
+		$(GLU_DIR)/libnurbs/internals/coveandtiler.o		\
+		$(GLU_DIR)/libnurbs/internals/curve.o			\
+		$(GLU_DIR)/libnurbs/internals/curvelist.o			\
+		$(GLU_DIR)/libnurbs/internals/curvesub.o			\
+		$(GLU_DIR)/libnurbs/internals/dataTransform.o		\
+		$(GLU_DIR)/libnurbs/internals/displaylist.o		\
+		$(GLU_DIR)/libnurbs/internals/flist.o			\
+		$(GLU_DIR)/libnurbs/internals/flistsorter.o		\
+		$(GLU_DIR)/libnurbs/internals/hull.o			\
+		$(GLU_DIR)/libnurbs/internals/intersect.o			\
+		$(GLU_DIR)/libnurbs/internals/knotvector.o		\
+		$(GLU_DIR)/libnurbs/internals/mapdesc.o			\
+		$(GLU_DIR)/libnurbs/internals/mapdescv.o			\
+		$(GLU_DIR)/libnurbs/internals/maplist.o			\
+		$(GLU_DIR)/libnurbs/internals/mesher.o			\
+		$(GLU_DIR)/libnurbs/internals/monoTriangulationBackend.o	\
+		$(GLU_DIR)/libnurbs/internals/monotonizer.o		\
+		$(GLU_DIR)/libnurbs/internals/mycode.o			\
+		$(GLU_DIR)/libnurbs/internals/nurbsinterfac.o		\
+		$(GLU_DIR)/libnurbs/internals/nurbstess.o			\
+		$(GLU_DIR)/libnurbs/internals/patch.o			\
+		$(GLU_DIR)/libnurbs/internals/patchlist.o			\
+		$(GLU_DIR)/libnurbs/internals/quilt.o			\
+		$(GLU_DIR)/libnurbs/internals/reader.o			\
+		$(GLU_DIR)/libnurbs/internals/renderhints.o		\
+		$(GLU_DIR)/libnurbs/internals/slicer.o			\
+		$(GLU_DIR)/libnurbs/internals/sorter.o			\
+		$(GLU_DIR)/libnurbs/internals/splitarcs.o			\
+		$(GLU_DIR)/libnurbs/internals/subdivider.o		\
+		$(GLU_DIR)/libnurbs/internals/tobezier.o			\
+		$(GLU_DIR)/libnurbs/internals/trimline.o			\
+		$(GLU_DIR)/libnurbs/internals/trimregion.o		\
+		$(GLU_DIR)/libnurbs/internals/trimvertpool.o		\
+		$(GLU_DIR)/libnurbs/internals/uarray.o			\
+		$(GLU_DIR)/libnurbs/internals/varray.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/directedLine.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/gridWrap.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/monoChain.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/monoPolyPart.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/monoTriangulation.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/partitionX.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/partitionY.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/polyDBG.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/polyUtil.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/primitiveStream.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/quicksort.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/rectBlock.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/sampleComp.o			\
+		$(GLU_DIR)/libnurbs/nurbtess/sampleCompBot.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/sampleCompRight.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/sampleCompTop.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/sampleMonoPoly.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/sampledLine.o		\
+		$(GLU_DIR)/libnurbs/nurbtess/searchTree.o
+endif
+
+INCLUDES = \
+	-I../common \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/array_cache \
+	-I$(TOP)/src/mesa/main \
+	-I$(TOP)/src/mesa/glapi \
+	-I$(TOP)/src/mesa/math \
+	-I$(TOP)/src/mesa/shader \
+	-I$(TOP)/src/mesa/swrast \
+	-I$(TOP)/src/mesa/swrast_setup \
+	-I$(TOP)/src/mesa/tnl \
+	-I$(TOP)/src/mesa/tnl_dd \
+	-I$(TOP)/src/mesa/x86 \
+	-I. \
+	-I- \
+	-I$(TOP)/include
+
+DRIVER_SOURCES = \
+	GLView.cpp \
+	../common/driverfuncs.c \
+	$(addprefix ../../, $(GLAPI_SOURCES))
+
+ifeq ($(CPU), x86)
+	DRIVER_SOURCES += $(addprefix ../../, $(X86_API))
+else
+# No GL API PPC optimization yet
+endif 
+
+DRIVER_OBJECTS = $(DRIVER_SOURCES:.c=.o)
+DRIVER_OBJECTS := $(DRIVER_OBJECTS:.S=.o)
+
+OBJECTS := $(DRIVER_OBJECTS:.cpp=.o)
+
+
+# Rules
+
+.c.o:
+	$(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@
+
+.cpp.o:
+	$(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@
+
+.S.o:
+	$(CC) $(INCLUDES) $(CFLAGS) -c $< -o $@
+
+
+default: depend $(TOP)/$(LIB_DIR) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
+
+# XXX FIXME: mesa.a might be libmesa.a now
+$(MESA_MODULES):
+	cd $(TOP)/src/mesa && $(MAKE) mesa.a ;
+	mimeset -f "$@"
+
+$(GLU_MODULES):
+	cd $(GLU_DIR) && $(MAKE) $(subst $(GLU_DIR)/,,$(GLU_MODULES)) ;
+
+$(TOP)/$(LIB_DIR):
+	mkdir $(TOP)/$(LIB_DIR)
+
+$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(MESA_MODULES) $(GLU_MODULES)
+	@$(MKLIB) -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
+		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(GL_LIB_DEPS) \
+		$(OBJECTS) $(MESA_MODULES) $(GLU_MODULES)
+
+# $(GLU_OBJECTS):
+#	cd $(GLU_DIR) && $(MAKE) $< ;
+
+depend: $(DRIVER_SOURCES) $(GLU_SOURCES)
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(DRIVER_SOURCES) $(GLU_SOURCES) > /dev/null 
+
+clean:
+	-rm -f depend depend.bak $(OBJECTS)
+
+include depend
diff --git a/src/mesa/drivers/common/descrip.mms b/src/mesa/drivers/common/descrip.mms
new file mode 100644
index 0000000000..d5bbc69dfd
--- /dev/null
+++ b/src/mesa/drivers/common/descrip.mms
@@ -0,0 +1,42 @@
+# Makefile for core library for VMS
+# contributed by Jouk Jansen  joukj@hrem.nano.tudelft.nl
+# Last revision : 29 September 2008
+
+.first
+	define gl [----.include.gl]
+	define math [--.math]
+	define tnl [--.tnl]
+	define swrast [--.swrast]
+	define glapi [--.glapi]
+	define shader [--.shader]
+	define main [--.main]
+
+.include [----]mms-config.
+
+##### MACROS #####
+
+VPATH = RCS
+
+INCDIR = [----.include],[--.main],[--.glapi],[--.shader]
+LIBDIR = [----.lib]
+CFLAGS = /include=($(INCDIR),[])/define=(PTHREADS=1)/name=(as_is,short)\
+	/float=ieee/ieee=denorm/warn=disable=(PTRMISMATCH)
+
+SOURCES = driverfuncs.c
+
+OBJECTS =driverfuncs.obj
+
+##### RULES #####
+
+VERSION=Mesa V3.4
+
+##### TARGETS #####
+# Make the library
+$(LIBDIR)$(GL_LIB) : $(OBJECTS)
+  @ library $(LIBDIR)$(GL_LIB) $(OBJECTS)
+
+clean :
+	purge
+	delete *.obj;*
+
+driverfuncs.obj : driverfuncs.c
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
new file mode 100644
index 0000000000..ca5eb5c755
--- /dev/null
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -0,0 +1,344 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/arrayobj.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/mipmap.h"
+#include "main/queryobj.h"
+#include "main/renderbuffer.h"
+#include "main/texcompress.h"
+#include "main/texformat.h"
+#include "main/texgetimage.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#if FEATURE_ARB_vertex_buffer_object
+#include "main/bufferobj.h"
+#endif
+#if FEATURE_EXT_framebuffer_object
+#include "main/fbobject.h"
+#include "main/texrender.h"
+#endif
+#if FEATURE_ARB_sync
+#include "main/syncobj.h"
+#endif
+#if FEATURE_EXT_transform_feedback
+#include "main/transformfeedback.h"
+#endif
+
+#include "shader/program.h"
+#include "shader/shader_api.h"
+#include "tnl/tnl.h"
+#include "swrast/swrast.h"
+
+#include "driverfuncs.h"
+#include "meta.h"
+
+
+
+/**
+ * Plug in default functions for all pointers in the dd_function_table
+ * structure.
+ * Device drivers should call this function and then plug in any
+ * functions which it wants to override.
+ * Some functions (pointers) MUST be implemented by all drivers (REQUIRED).
+ *
+ * \param table the dd_function_table to initialize
+ */
+void
+_mesa_init_driver_functions(struct dd_function_table *driver)
+{
+   memset(driver, 0, sizeof(*driver));
+
+   driver->GetString = NULL;  /* REQUIRED! */
+   driver->UpdateState = NULL;  /* REQUIRED! */
+   driver->GetBufferSize = NULL;  /* REQUIRED! */
+   driver->ResizeBuffers = _mesa_resize_framebuffer;
+   driver->Error = NULL;
+
+   driver->Finish = NULL;
+   driver->Flush = NULL;
+
+   /* framebuffer/image functions */
+   driver->Clear = _swrast_Clear;
+   driver->Accum = _swrast_Accum;
+   driver->RasterPos = _tnl_RasterPos;
+   driver->DrawPixels = _swrast_DrawPixels;
+   driver->ReadPixels = _swrast_ReadPixels;
+   driver->CopyPixels = _swrast_CopyPixels;
+   driver->Bitmap = _swrast_Bitmap;
+
+   /* Texture functions */
+   driver->ChooseTextureFormat = _mesa_choose_tex_format;
+   driver->TexImage1D = _mesa_store_teximage1d;
+   driver->TexImage2D = _mesa_store_teximage2d;
+   driver->TexImage3D = _mesa_store_teximage3d;
+   driver->TexSubImage1D = _mesa_store_texsubimage1d;
+   driver->TexSubImage2D = _mesa_store_texsubimage2d;
+   driver->TexSubImage3D = _mesa_store_texsubimage3d;
+   driver->GetTexImage = _mesa_get_teximage;
+   driver->CopyTexImage1D = _mesa_meta_CopyTexImage1D;
+   driver->CopyTexImage2D = _mesa_meta_CopyTexImage2D;
+   driver->CopyTexSubImage1D = _mesa_meta_CopyTexSubImage1D;
+   driver->CopyTexSubImage2D = _mesa_meta_CopyTexSubImage2D;
+   driver->CopyTexSubImage3D = _mesa_meta_CopyTexSubImage3D;
+   driver->GenerateMipmap = _mesa_meta_GenerateMipmap;
+   driver->TestProxyTexImage = _mesa_test_proxy_teximage;
+   driver->CompressedTexImage1D = _mesa_store_compressed_teximage1d;
+   driver->CompressedTexImage2D = _mesa_store_compressed_teximage2d;
+   driver->CompressedTexImage3D = _mesa_store_compressed_teximage3d;
+   driver->CompressedTexSubImage1D = _mesa_store_compressed_texsubimage1d;
+   driver->CompressedTexSubImage2D = _mesa_store_compressed_texsubimage2d;
+   driver->CompressedTexSubImage3D = _mesa_store_compressed_texsubimage3d;
+   driver->GetCompressedTexImage = _mesa_get_compressed_teximage;
+   driver->BindTexture = NULL;
+   driver->NewTextureObject = _mesa_new_texture_object;
+   driver->DeleteTexture = _mesa_delete_texture_object;
+   driver->NewTextureImage = _mesa_new_texture_image;
+   driver->FreeTexImageData = _mesa_free_texture_image_data; 
+   driver->MapTexture = NULL;
+   driver->UnmapTexture = NULL;
+   driver->TextureMemCpy = memcpy;
+   driver->IsTextureResident = NULL;
+   driver->UpdateTexturePalette = NULL;
+
+   /* imaging */
+   driver->CopyColorTable = _mesa_meta_CopyColorTable;
+   driver->CopyColorSubTable = _mesa_meta_CopyColorSubTable;
+   driver->CopyConvolutionFilter1D = _mesa_meta_CopyConvolutionFilter1D;
+   driver->CopyConvolutionFilter2D = _mesa_meta_CopyConvolutionFilter2D;
+
+   /* Vertex/fragment programs */
+   driver->BindProgram = NULL;
+   driver->NewProgram = _mesa_new_program;
+   driver->DeleteProgram = _mesa_delete_program;
+
+   /* simple state commands */
+   driver->AlphaFunc = NULL;
+   driver->BlendColor = NULL;
+   driver->BlendEquationSeparate = NULL;
+   driver->BlendFuncSeparate = NULL;
+   driver->ClearColor = NULL;
+   driver->ClearDepth = NULL;
+   driver->ClearStencil = NULL;
+   driver->ClipPlane = NULL;
+   driver->ColorMask = NULL;
+   driver->ColorMaterial = NULL;
+   driver->CullFace = NULL;
+   driver->DrawBuffer = NULL;
+   driver->DrawBuffers = NULL;
+   driver->FrontFace = NULL;
+   driver->DepthFunc = NULL;
+   driver->DepthMask = NULL;
+   driver->DepthRange = NULL;
+   driver->Enable = NULL;
+   driver->Fogfv = NULL;
+   driver->Hint = NULL;
+   driver->Lightfv = NULL;
+   driver->LightModelfv = NULL;
+   driver->LineStipple = NULL;
+   driver->LineWidth = NULL;
+   driver->LogicOpcode = NULL;
+   driver->PointParameterfv = NULL;
+   driver->PointSize = NULL;
+   driver->PolygonMode = NULL;
+   driver->PolygonOffset = NULL;
+   driver->PolygonStipple = NULL;
+   driver->ReadBuffer = NULL;
+   driver->RenderMode = NULL;
+   driver->Scissor = NULL;
+   driver->ShadeModel = NULL;
+   driver->StencilFuncSeparate = NULL;
+   driver->StencilOpSeparate = NULL;
+   driver->StencilMaskSeparate = NULL;
+   driver->TexGen = NULL;
+   driver->TexEnv = NULL;
+   driver->TexParameter = NULL;
+   driver->Viewport = NULL;
+
+   /* buffer objects */
+   _mesa_init_buffer_object_functions(driver);
+
+   /* query objects */
+   _mesa_init_query_object_functions(driver);
+
+#if FEATURE_ARB_sync
+   _mesa_init_sync_object_functions(driver);
+#endif
+
+#if FEATURE_EXT_framebuffer_object
+   driver->NewFramebuffer = _mesa_new_framebuffer;
+   driver->NewRenderbuffer = _mesa_new_soft_renderbuffer;
+   driver->RenderTexture = _mesa_render_texture;
+   driver->FinishRenderTexture = _mesa_finish_render_texture;
+   driver->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer;
+#endif
+
+#if FEATURE_EXT_framebuffer_blit
+   driver->BlitFramebuffer = _swrast_BlitFramebuffer;
+#endif
+
+   /* APPLE_vertex_array_object */
+   driver->NewArrayObject = _mesa_new_array_object;
+   driver->DeleteArrayObject = _mesa_delete_array_object;
+   driver->BindArrayObject = NULL;
+
+#if FEATURE_EXT_transform_feedback
+   _mesa_init_transform_feedback_functions(driver);
+#endif
+
+   /* T&L stuff */
+   driver->NeedValidate = GL_FALSE;
+   driver->ValidateTnlModule = NULL;
+   driver->CurrentExecPrimitive = 0;
+   driver->CurrentSavePrimitive = 0;
+   driver->NeedFlush = 0;
+   driver->SaveNeedFlush = 0;
+
+   driver->ProgramStringNotify = _tnl_program_string;
+   driver->FlushVertices = NULL;
+   driver->SaveFlushVertices = NULL;
+   driver->NotifySaveBegin = NULL;
+   driver->LightingSpaceChange = NULL;
+
+   /* display list */
+   driver->NewList = NULL;
+   driver->EndList = NULL;
+   driver->BeginCallList = NULL;
+   driver->EndCallList = NULL;
+
+
+   /* XXX temporary here */
+   _mesa_init_glsl_driver_functions(driver);
+}
+
+
+/**
+ * Call the ctx->Driver.* state functions with current values to initialize
+ * driver state.
+ * Only the Intel drivers use this so far.
+ */
+void
+_mesa_init_driver_state(GLcontext *ctx)
+{
+   ctx->Driver.AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+
+   ctx->Driver.BlendColor(ctx, ctx->Color.BlendColor);
+
+   ctx->Driver.BlendEquationSeparate(ctx,
+                                     ctx->Color.BlendEquationRGB,
+                                     ctx->Color.BlendEquationA);
+
+   ctx->Driver.BlendFuncSeparate(ctx,
+                                 ctx->Color.BlendSrcRGB,
+                                 ctx->Color.BlendDstRGB,
+                                 ctx->Color.BlendSrcA, ctx->Color.BlendDstA);
+
+   if (ctx->Driver.ColorMaskIndexed) {
+      GLuint i;
+      for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
+         ctx->Driver.ColorMaskIndexed(ctx, i,
+                                      ctx->Color.ColorMask[0][RCOMP],
+                                      ctx->Color.ColorMask[0][GCOMP],
+                                      ctx->Color.ColorMask[0][BCOMP],
+                                      ctx->Color.ColorMask[0][ACOMP]);
+      }
+   }
+   else {
+      ctx->Driver.ColorMask(ctx,
+                            ctx->Color.ColorMask[0][RCOMP],
+                            ctx->Color.ColorMask[0][GCOMP],
+                            ctx->Color.ColorMask[0][BCOMP],
+                            ctx->Color.ColorMask[0][ACOMP]);
+   }
+
+   ctx->Driver.CullFace(ctx, ctx->Polygon.CullFaceMode);
+   ctx->Driver.DepthFunc(ctx, ctx->Depth.Func);
+   ctx->Driver.DepthMask(ctx, ctx->Depth.Mask);
+
+   ctx->Driver.Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+   ctx->Driver.Enable(ctx, GL_BLEND, ctx->Color.BlendEnabled);
+   ctx->Driver.Enable(ctx, GL_COLOR_LOGIC_OP, ctx->Color.ColorLogicOpEnabled);
+   ctx->Driver.Enable(ctx, GL_COLOR_SUM, ctx->Fog.ColorSumEnabled);
+   ctx->Driver.Enable(ctx, GL_CULL_FACE, ctx->Polygon.CullFlag);
+   ctx->Driver.Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+   ctx->Driver.Enable(ctx, GL_DITHER, ctx->Color.DitherFlag);
+   ctx->Driver.Enable(ctx, GL_FOG, ctx->Fog.Enabled);
+   ctx->Driver.Enable(ctx, GL_LIGHTING, ctx->Light.Enabled);
+   ctx->Driver.Enable(ctx, GL_LINE_SMOOTH, ctx->Line.SmoothFlag);
+   ctx->Driver.Enable(ctx, GL_POLYGON_STIPPLE, ctx->Polygon.StippleFlag);
+   ctx->Driver.Enable(ctx, GL_SCISSOR_TEST, ctx->Scissor.Enabled);
+   ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled);
+   ctx->Driver.Enable(ctx, GL_TEXTURE_1D, GL_FALSE);
+   ctx->Driver.Enable(ctx, GL_TEXTURE_2D, GL_FALSE);
+   ctx->Driver.Enable(ctx, GL_TEXTURE_RECTANGLE_NV, GL_FALSE);
+   ctx->Driver.Enable(ctx, GL_TEXTURE_3D, GL_FALSE);
+   ctx->Driver.Enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE);
+
+   ctx->Driver.Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
+   ctx->Driver.Fogfv(ctx, GL_FOG_MODE, 0);
+   ctx->Driver.Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
+   ctx->Driver.Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
+   ctx->Driver.Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
+
+   ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+
+   {
+      GLfloat f = (GLfloat) ctx->Light.Model.ColorControl;
+      ctx->Driver.LightModelfv(ctx, GL_LIGHT_MODEL_COLOR_CONTROL, &f);
+   }
+
+   ctx->Driver.LineWidth(ctx, ctx->Line.Width);
+   ctx->Driver.LogicOpcode(ctx, ctx->Color.LogicOp);
+   ctx->Driver.PointSize(ctx, ctx->Point.Size);
+   ctx->Driver.PolygonStipple(ctx, (const GLubyte *) ctx->PolygonStipple);
+   ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+                       ctx->Scissor.Width, ctx->Scissor.Height);
+   ctx->Driver.ShadeModel(ctx, ctx->Light.ShadeModel);
+   ctx->Driver.StencilFuncSeparate(ctx, GL_FRONT,
+                                   ctx->Stencil.Function[0],
+                                   ctx->Stencil.Ref[0],
+                                   ctx->Stencil.ValueMask[0]);
+   ctx->Driver.StencilFuncSeparate(ctx, GL_BACK,
+                                   ctx->Stencil.Function[1],
+                                   ctx->Stencil.Ref[1],
+                                   ctx->Stencil.ValueMask[1]);
+   ctx->Driver.StencilMaskSeparate(ctx, GL_FRONT, ctx->Stencil.WriteMask[0]);
+   ctx->Driver.StencilMaskSeparate(ctx, GL_BACK, ctx->Stencil.WriteMask[1]);
+   ctx->Driver.StencilOpSeparate(ctx, GL_FRONT,
+                                 ctx->Stencil.FailFunc[0],
+                                 ctx->Stencil.ZFailFunc[0],
+                                 ctx->Stencil.ZPassFunc[0]);
+   ctx->Driver.StencilOpSeparate(ctx, GL_BACK,
+                                 ctx->Stencil.FailFunc[1],
+                                 ctx->Stencil.ZFailFunc[1],
+                                 ctx->Stencil.ZPassFunc[1]);
+
+
+   ctx->Driver.DrawBuffer(ctx, ctx->Color.DrawBuffer[0]);
+}
diff --git a/src/mesa/drivers/common/driverfuncs.h b/src/mesa/drivers/common/driverfuncs.h
new file mode 100644
index 0000000000..4c90ed12f6
--- /dev/null
+++ b/src/mesa/drivers/common/driverfuncs.h
@@ -0,0 +1,37 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef DRIVERFUNCS_H
+#define DRIVERFUNCS_H
+
+extern void
+_mesa_init_driver_functions(struct dd_function_table *driver);
+
+
+extern void
+_mesa_init_driver_state(GLcontext *ctx);
+
+
+#endif
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
new file mode 100644
index 0000000000..3525583382
--- /dev/null
+++ b/src/mesa/drivers/common/meta.c
@@ -0,0 +1,2873 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.6
+ *
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Meta operations.  Some GL operations can be expressed in terms of
+ * other GL operations.  For example, glBlitFramebuffer() can be done
+ * with texture mapping and glClear() can be done with polygon rendering.
+ *
+ * \author Brian Paul
+ */
+
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/arrayobj.h"
+#include "main/blend.h"
+#include "main/bufferobj.h"
+#include "main/buffers.h"
+#include "main/colortab.h"
+#include "main/convolve.h"
+#include "main/depth.h"
+#include "main/enable.h"
+#include "main/fbobject.h"
+#include "main/formats.h"
+#include "main/image.h"
+#include "main/macros.h"
+#include "main/matrix.h"
+#include "main/mipmap.h"
+#include "main/polygon.h"
+#include "main/readpix.h"
+#include "main/scissor.h"
+#include "main/shaders.h"
+#include "main/state.h"
+#include "main/stencil.h"
+#include "main/texobj.h"
+#include "main/texenv.h"
+#include "main/teximage.h"
+#include "main/texparam.h"
+#include "main/texstate.h"
+#include "main/varray.h"
+#include "main/viewport.h"
+#include "shader/program.h"
+#include "shader/arbprogram.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+
+
+/** Return offset in bytes of the field within a vertex struct */
+#define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
+
+
+/**
+ * Flags passed to _mesa_meta_begin().
+ */
+/*@{*/
+#define META_ALL              ~0x0
+#define META_ALPHA_TEST        0x1
+#define META_BLEND             0x2  /**< includes logicop */
+#define META_COLOR_MASK        0x4
+#define META_DEPTH_TEST        0x8
+#define META_FOG              0x10
+#define META_PIXEL_STORE      0x20
+#define META_PIXEL_TRANSFER   0x40
+#define META_RASTERIZATION    0x80
+#define META_SCISSOR         0x100
+#define META_SHADER          0x200
+#define META_STENCIL_TEST    0x400
+#define META_TRANSFORM       0x800 /**< modelview, projection, clip planes */
+#define META_TEXTURE        0x1000
+#define META_VERTEX         0x2000
+#define META_VIEWPORT       0x4000
+/*@}*/
+
+
+/**
+ * State which we may save/restore across meta ops.
+ * XXX this may be incomplete...
+ */
+struct save_state
+{
+   GLbitfield SavedState;  /**< bitmask of META_* flags */
+
+   /** META_ALPHA_TEST */
+   GLboolean AlphaEnabled;
+
+   /** META_BLEND */
+   GLbitfield BlendEnabled;
+   GLboolean ColorLogicOpEnabled;
+
+   /** META_COLOR_MASK */
+   GLubyte ColorMask[MAX_DRAW_BUFFERS][4];
+
+   /** META_DEPTH_TEST */
+   struct gl_depthbuffer_attrib Depth;
+
+   /** META_FOG */
+   GLboolean Fog;
+
+   /** META_PIXEL_STORE */
+   struct gl_pixelstore_attrib Pack, Unpack;
+
+   /** META_PIXEL_TRANSFER */
+   GLfloat RedBias, RedScale;
+   GLfloat GreenBias, GreenScale;
+   GLfloat BlueBias, BlueScale;
+   GLfloat AlphaBias, AlphaScale;
+   GLfloat DepthBias, DepthScale;
+   GLboolean MapColorFlag;
+   GLboolean Convolution1DEnabled;
+   GLboolean Convolution2DEnabled;
+   GLboolean Separable2DEnabled;
+
+   /** META_RASTERIZATION */
+   GLenum FrontPolygonMode, BackPolygonMode;
+   GLboolean PolygonOffset;
+   GLboolean PolygonSmooth;
+   GLboolean PolygonStipple;
+   GLboolean PolygonCull;
+
+   /** META_SCISSOR */
+   struct gl_scissor_attrib Scissor;
+
+   /** META_SHADER */
+   GLboolean VertexProgramEnabled;
+   struct gl_vertex_program *VertexProgram;
+   GLboolean FragmentProgramEnabled;
+   struct gl_fragment_program *FragmentProgram;
+   GLuint Shader;
+
+   /** META_STENCIL_TEST */
+   struct gl_stencil_attrib Stencil;
+
+   /** META_TRANSFORM */
+   GLenum MatrixMode;
+   GLfloat ModelviewMatrix[16];
+   GLfloat ProjectionMatrix[16];
+   GLfloat TextureMatrix[16];
+   GLbitfield ClipPlanesEnabled;
+
+   /** META_TEXTURE */
+   GLuint ActiveUnit;
+   GLuint ClientActiveUnit;
+   /** for unit[0] only */
+   struct gl_texture_object *CurrentTexture[NUM_TEXTURE_TARGETS];
+   /** mask of TEXTURE_2D_BIT, etc */
+   GLbitfield TexEnabled[MAX_TEXTURE_UNITS];
+   GLbitfield TexGenEnabled[MAX_TEXTURE_UNITS];
+   GLuint EnvMode;  /* unit[0] only */
+
+   /** META_VERTEX */
+   struct gl_array_object *ArrayObj;
+   struct gl_buffer_object *ArrayBufferObj;
+
+   /** META_VIEWPORT */
+   GLint ViewportX, ViewportY, ViewportW, ViewportH;
+   GLclampd DepthNear, DepthFar;
+
+   /** Miscellaneous (always disabled) */
+   GLboolean Lighting;
+};
+
+
+/**
+ * Temporary texture used for glBlitFramebuffer, glDrawPixels, etc.
+ * This is currently shared by all the meta ops.  But we could create a
+ * separate one for each of glDrawPixel, glBlitFramebuffer, glCopyPixels, etc.
+ */
+struct temp_texture
+{
+   GLuint TexObj;
+   GLenum Target;         /**< GL_TEXTURE_2D or GL_TEXTURE_RECTANGLE */
+   GLsizei MinSize;       /**< Min texture size to allocate */
+   GLsizei MaxSize;       /**< Max possible texture size */
+   GLboolean NPOT;        /**< Non-power of two size OK? */
+   GLsizei Width, Height; /**< Current texture size */
+   GLenum IntFormat;
+   GLfloat Sright, Ttop;  /**< right, top texcoords */
+};
+
+
+/**
+ * State for glBlitFramebufer()
+ */
+struct blit_state
+{
+   GLuint ArrayObj;
+   GLuint VBO;
+   GLuint DepthFP;
+};
+
+
+/**
+ * State for glClear()
+ */
+struct clear_state
+{
+   GLuint ArrayObj;
+   GLuint VBO;
+};
+
+
+/**
+ * State for glCopyPixels()
+ */
+struct copypix_state
+{
+   GLuint ArrayObj;
+   GLuint VBO;
+};
+
+
+/**
+ * State for glDrawPixels()
+ */
+struct drawpix_state
+{
+   GLuint ArrayObj;
+
+   GLuint StencilFP;  /**< Fragment program for drawing stencil images */
+   GLuint DepthFP;  /**< Fragment program for drawing depth images */
+};
+
+
+/**
+ * State for glBitmap()
+ */
+struct bitmap_state
+{
+   GLuint ArrayObj;
+   GLuint VBO;
+   struct temp_texture Tex;  /**< separate texture from other meta ops */
+};
+
+
+/**
+ * State for _mesa_meta_generate_mipmap()
+ */
+struct gen_mipmap_state
+{
+   GLuint ArrayObj;
+   GLuint VBO;
+   GLuint FBO;
+};
+
+
+/**
+ * All per-context meta state.
+ */
+struct gl_meta_state
+{
+   struct save_state Save;    /**< state saved during meta-ops */
+
+   struct temp_texture TempTex;
+
+   struct blit_state Blit;    /**< For _mesa_meta_BlitFramebuffer() */
+   struct clear_state Clear;  /**< For _mesa_meta_Clear() */
+   struct copypix_state CopyPix;  /**< For _mesa_meta_CopyPixels() */
+   struct drawpix_state DrawPix;  /**< For _mesa_meta_DrawPixels() */
+   struct bitmap_state Bitmap;    /**< For _mesa_meta_Bitmap() */
+   struct gen_mipmap_state Mipmap;    /**< For _mesa_meta_GenerateMipmap() */
+};
+
+
+/**
+ * Initialize meta-ops for a context.
+ * To be called once during context creation.
+ */
+void
+_mesa_meta_init(GLcontext *ctx)
+{
+   ASSERT(!ctx->Meta);
+
+   ctx->Meta = CALLOC_STRUCT(gl_meta_state);
+}
+
+
+/**
+ * Free context meta-op state.
+ * To be called once during context destruction.
+ */
+void
+_mesa_meta_free(GLcontext *ctx)
+{
+   /* Note: Any textures, VBOs, etc, that we allocate should get
+    * freed by the normal context destruction code.  But this would be
+    * the place to free other meta data someday.
+    */
+   free(ctx->Meta);
+   ctx->Meta = NULL;
+}
+
+
+/**
+ * Enter meta state.  This is like a light-weight version of glPushAttrib
+ * but it also resets most GL state back to default values.
+ *
+ * \param state  bitmask of META_* flags indicating which attribute groups
+ *               to save and reset to their defaults
+ */
+static void
+_mesa_meta_begin(GLcontext *ctx, GLbitfield state)
+{
+   struct save_state *save = &ctx->Meta->Save;
+
+   save->SavedState = state;
+
+   if (state & META_ALPHA_TEST) {
+      save->AlphaEnabled = ctx->Color.AlphaEnabled;
+      if (ctx->Color.AlphaEnabled)
+         _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_FALSE);
+   }
+
+   if (state & META_BLEND) {
+      save->BlendEnabled = ctx->Color.BlendEnabled;
+      if (ctx->Color.BlendEnabled) {
+         if (ctx->Extensions.EXT_draw_buffers2) {
+            GLuint i;
+            for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
+               _mesa_set_enablei(ctx, GL_BLEND, i, GL_FALSE);
+            }
+         }
+         else {
+            _mesa_set_enable(ctx, GL_BLEND, GL_FALSE);
+         }
+      }
+      save->ColorLogicOpEnabled = ctx->Color.ColorLogicOpEnabled;
+      if (ctx->Color.ColorLogicOpEnabled)
+         _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, GL_FALSE);
+   }
+
+   if (state & META_COLOR_MASK) {
+      memcpy(save->ColorMask, ctx->Color.ColorMask,
+             sizeof(ctx->Color.ColorMask));
+      if (!ctx->Color.ColorMask[0][0] ||
+          !ctx->Color.ColorMask[0][1] ||
+          !ctx->Color.ColorMask[0][2] ||
+          !ctx->Color.ColorMask[0][3])
+         _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+   }
+
+   if (state & META_DEPTH_TEST) {
+      save->Depth = ctx->Depth; /* struct copy */
+      if (ctx->Depth.Test)
+         _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE);
+   }
+
+   if (state & META_FOG) {
+      save->Fog = ctx->Fog.Enabled;
+      if (ctx->Fog.Enabled)
+         _mesa_set_enable(ctx, GL_FOG, GL_FALSE);
+   }
+
+   if (state & META_PIXEL_STORE) {
+      save->Pack = ctx->Pack;
+      save->Unpack = ctx->Unpack;
+      ctx->Pack = ctx->DefaultPacking;
+      ctx->Unpack = ctx->DefaultPacking;
+   }
+
+   if (state & META_PIXEL_TRANSFER) {
+      save->RedScale = ctx->Pixel.RedScale;
+      save->RedBias = ctx->Pixel.RedBias;
+      save->GreenScale = ctx->Pixel.GreenScale;
+      save->GreenBias = ctx->Pixel.GreenBias;
+      save->BlueScale = ctx->Pixel.BlueScale;
+      save->BlueBias = ctx->Pixel.BlueBias;
+      save->AlphaScale = ctx->Pixel.AlphaScale;
+      save->AlphaBias = ctx->Pixel.AlphaBias;
+      save->MapColorFlag = ctx->Pixel.MapColorFlag;
+      save->Convolution1DEnabled = ctx->Pixel.Convolution1DEnabled;
+      save->Convolution2DEnabled = ctx->Pixel.Convolution2DEnabled;
+      save->Separable2DEnabled = ctx->Pixel.Separable2DEnabled;
+      ctx->Pixel.RedScale = 1.0F;
+      ctx->Pixel.RedBias = 0.0F;
+      ctx->Pixel.GreenScale = 1.0F;
+      ctx->Pixel.GreenBias = 0.0F;
+      ctx->Pixel.BlueScale = 1.0F;
+      ctx->Pixel.BlueBias = 0.0F;
+      ctx->Pixel.AlphaScale = 1.0F;
+      ctx->Pixel.AlphaBias = 0.0F;
+      ctx->Pixel.MapColorFlag = GL_FALSE;
+      ctx->Pixel.Convolution1DEnabled = GL_FALSE;
+      ctx->Pixel.Convolution2DEnabled = GL_FALSE;
+      ctx->Pixel.Separable2DEnabled = GL_FALSE;
+      /* XXX more state */
+      ctx->NewState |=_NEW_PIXEL;
+   }
+
+   if (state & META_RASTERIZATION) {
+      save->FrontPolygonMode = ctx->Polygon.FrontMode;
+      save->BackPolygonMode = ctx->Polygon.BackMode;
+      save->PolygonOffset = ctx->Polygon.OffsetFill;
+      save->PolygonSmooth = ctx->Polygon.SmoothFlag;
+      save->PolygonStipple = ctx->Polygon.StippleFlag;
+      save->PolygonCull = ctx->Polygon.CullFlag;
+      _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+      _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, GL_FALSE);
+      _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, GL_FALSE);
+      _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, GL_FALSE);
+      _mesa_set_enable(ctx, GL_CULL_FACE, GL_FALSE);
+   }
+
+   if (state & META_SCISSOR) {
+      save->Scissor = ctx->Scissor; /* struct copy */
+      _mesa_set_enable(ctx, GL_SCISSOR_TEST, GL_FALSE);
+   }
+
+   if (state & META_SHADER) {
+      if (ctx->Extensions.ARB_vertex_program) {
+         save->VertexProgramEnabled = ctx->VertexProgram.Enabled;
+         _mesa_reference_vertprog(ctx, &save->VertexProgram,
+				  ctx->VertexProgram.Current);
+         _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB, GL_FALSE);
+      }
+
+      if (ctx->Extensions.ARB_fragment_program) {
+         save->FragmentProgramEnabled = ctx->FragmentProgram.Enabled;
+         _mesa_reference_fragprog(ctx, &save->FragmentProgram,
+				  ctx->FragmentProgram.Current);
+         _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_FALSE);
+      }
+
+      if (ctx->Extensions.ARB_shader_objects) {
+         save->Shader = ctx->Shader.CurrentProgram ?
+            ctx->Shader.CurrentProgram->Name : 0;
+         _mesa_UseProgramObjectARB(0);
+      }
+   }
+
+   if (state & META_STENCIL_TEST) {
+      save->Stencil = ctx->Stencil; /* struct copy */
+      if (ctx->Stencil.Enabled)
+         _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE);
+      /* NOTE: other stencil state not reset */
+   }
+
+   if (state & META_TEXTURE) {
+      GLuint u, tgt;
+
+      save->ActiveUnit = ctx->Texture.CurrentUnit;
+      save->ClientActiveUnit = ctx->Array.ActiveTexture;
+      save->EnvMode = ctx->Texture.Unit[0].EnvMode;
+
+      /* Disable all texture units */
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         save->TexEnabled[u] = ctx->Texture.Unit[u].Enabled;
+         save->TexGenEnabled[u] = ctx->Texture.Unit[u].TexGenEnabled;
+         if (ctx->Texture.Unit[u].Enabled ||
+             ctx->Texture.Unit[u].TexGenEnabled) {
+            _mesa_ActiveTextureARB(GL_TEXTURE0 + u);
+            _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_FALSE);
+            _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_FALSE);
+         }
+      }
+
+      /* save current texture objects for unit[0] only */
+      for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) {
+         _mesa_reference_texobj(&save->CurrentTexture[tgt],
+                                ctx->Texture.Unit[0].CurrentTex[tgt]);
+      }
+
+      /* set defaults for unit[0] */
+      _mesa_ActiveTextureARB(GL_TEXTURE0);
+      _mesa_ClientActiveTextureARB(GL_TEXTURE0);
+      _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+   }
+
+   if (state & META_TRANSFORM) {
+      GLuint activeTexture = ctx->Texture.CurrentUnit;
+      memcpy(save->ModelviewMatrix, ctx->ModelviewMatrixStack.Top->m,
+             16 * sizeof(GLfloat));
+      memcpy(save->ProjectionMatrix, ctx->ProjectionMatrixStack.Top->m,
+             16 * sizeof(GLfloat));
+      memcpy(save->TextureMatrix, ctx->TextureMatrixStack[0].Top->m,
+             16 * sizeof(GLfloat));
+      save->MatrixMode = ctx->Transform.MatrixMode;
+      /* set 1:1 vertex:pixel coordinate transform */
+      _mesa_ActiveTextureARB(GL_TEXTURE0);
+      _mesa_MatrixMode(GL_TEXTURE);
+      _mesa_LoadIdentity();
+      _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture);
+      _mesa_MatrixMode(GL_MODELVIEW);
+      _mesa_LoadIdentity();
+      _mesa_MatrixMode(GL_PROJECTION);
+      _mesa_LoadIdentity();
+      _mesa_Ortho(0.0, ctx->DrawBuffer->Width,
+                  0.0, ctx->DrawBuffer->Height,
+                  -1.0, 1.0);
+      save->ClipPlanesEnabled = ctx->Transform.ClipPlanesEnabled;
+      if (ctx->Transform.ClipPlanesEnabled) {
+         GLuint i;
+         for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
+            _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_FALSE);
+         }
+      }
+   }
+
+   if (state & META_VERTEX) {
+      /* save vertex array object state */
+      _mesa_reference_array_object(ctx, &save->ArrayObj,
+                                   ctx->Array.ArrayObj);
+      _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj,
+                                    ctx->Array.ArrayBufferObj);
+      /* set some default state? */
+   }
+
+   if (state & META_VIEWPORT) {
+      /* save viewport state */
+      save->ViewportX = ctx->Viewport.X;
+      save->ViewportY = ctx->Viewport.Y;
+      save->ViewportW = ctx->Viewport.Width;
+      save->ViewportH = ctx->Viewport.Height;
+      /* set viewport to match window size */
+      if (ctx->Viewport.X != 0 ||
+          ctx->Viewport.Y != 0 ||
+          ctx->Viewport.Width != ctx->DrawBuffer->Width ||
+          ctx->Viewport.Height != ctx->DrawBuffer->Height) {
+         _mesa_set_viewport(ctx, 0, 0,
+                            ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
+      }
+      /* save depth range state */
+      save->DepthNear = ctx->Viewport.Near;
+      save->DepthFar = ctx->Viewport.Far;
+      /* set depth range to default */
+      _mesa_DepthRange(0.0, 1.0);
+   }
+
+   /* misc */
+   {
+      save->Lighting = ctx->Light.Enabled;
+      if (ctx->Light.Enabled)
+         _mesa_set_enable(ctx, GL_LIGHTING, GL_FALSE);
+   }
+}
+
+
+/**
+ * Leave meta state.  This is like a light-weight version of glPopAttrib().
+ */
+static void
+_mesa_meta_end(GLcontext *ctx)
+{
+   struct save_state *save = &ctx->Meta->Save;
+   const GLbitfield state = save->SavedState;
+
+   if (state & META_ALPHA_TEST) {
+      if (ctx->Color.AlphaEnabled != save->AlphaEnabled)
+         _mesa_set_enable(ctx, GL_ALPHA_TEST, save->AlphaEnabled);
+   }
+
+   if (state & META_BLEND) {
+      if (ctx->Color.BlendEnabled != save->BlendEnabled) {
+         if (ctx->Extensions.EXT_draw_buffers2) {
+            GLuint i;
+            for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
+               _mesa_set_enablei(ctx, GL_BLEND, i, (save->BlendEnabled >> i) & 1);
+            }
+         }
+         else {
+            _mesa_set_enable(ctx, GL_BLEND, (save->BlendEnabled & 1));
+         }
+      }
+      if (ctx->Color.ColorLogicOpEnabled != save->ColorLogicOpEnabled)
+         _mesa_set_enable(ctx, GL_COLOR_LOGIC_OP, save->ColorLogicOpEnabled);
+   }
+
+   if (state & META_COLOR_MASK) {
+      GLuint i;
+      for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
+         if (!TEST_EQ_4V(ctx->Color.ColorMask[i], save->ColorMask[i])) {
+            if (i == 0) {
+               _mesa_ColorMask(save->ColorMask[i][0], save->ColorMask[i][1],
+                               save->ColorMask[i][2], save->ColorMask[i][3]);
+            }
+            else {
+               _mesa_ColorMaskIndexed(i,
+                                      save->ColorMask[i][0],
+                                      save->ColorMask[i][1],
+                                      save->ColorMask[i][2],
+                                      save->ColorMask[i][3]);
+            }
+         }
+      }
+   }
+
+   if (state & META_DEPTH_TEST) {
+      if (ctx->Depth.Test != save->Depth.Test)
+         _mesa_set_enable(ctx, GL_DEPTH_TEST, save->Depth.Test);
+      _mesa_DepthFunc(save->Depth.Func);
+      _mesa_DepthMask(save->Depth.Mask);
+   }
+
+   if (state & META_FOG) {
+      _mesa_set_enable(ctx, GL_FOG, save->Fog);
+   }
+
+   if (state & META_PIXEL_STORE) {
+      ctx->Pack = save->Pack;
+      ctx->Unpack = save->Unpack;
+   }
+
+   if (state & META_PIXEL_TRANSFER) {
+      ctx->Pixel.RedScale = save->RedScale;
+      ctx->Pixel.RedBias = save->RedBias;
+      ctx->Pixel.GreenScale = save->GreenScale;
+      ctx->Pixel.GreenBias = save->GreenBias;
+      ctx->Pixel.BlueScale = save->BlueScale;
+      ctx->Pixel.BlueBias = save->BlueBias;
+      ctx->Pixel.AlphaScale = save->AlphaScale;
+      ctx->Pixel.AlphaBias = save->AlphaBias;
+      ctx->Pixel.MapColorFlag = save->MapColorFlag;
+      ctx->Pixel.Convolution1DEnabled = save->Convolution1DEnabled;
+      ctx->Pixel.Convolution2DEnabled = save->Convolution2DEnabled;
+      ctx->Pixel.Separable2DEnabled = save->Separable2DEnabled;
+      /* XXX more state */
+      ctx->NewState |=_NEW_PIXEL;
+   }
+
+   if (state & META_RASTERIZATION) {
+      _mesa_PolygonMode(GL_FRONT, save->FrontPolygonMode);
+      _mesa_PolygonMode(GL_BACK, save->BackPolygonMode);
+      _mesa_set_enable(ctx, GL_POLYGON_STIPPLE, save->PolygonStipple);
+      _mesa_set_enable(ctx, GL_POLYGON_OFFSET_FILL, save->PolygonOffset);
+      _mesa_set_enable(ctx, GL_POLYGON_SMOOTH, save->PolygonSmooth);
+      _mesa_set_enable(ctx, GL_CULL_FACE, save->PolygonCull);
+   }
+
+   if (state & META_SCISSOR) {
+      _mesa_set_enable(ctx, GL_SCISSOR_TEST, save->Scissor.Enabled);
+      _mesa_Scissor(save->Scissor.X, save->Scissor.Y,
+                    save->Scissor.Width, save->Scissor.Height);
+   }
+
+   if (state & META_SHADER) {
+      if (ctx->Extensions.ARB_vertex_program) {
+         _mesa_set_enable(ctx, GL_VERTEX_PROGRAM_ARB,
+                          save->VertexProgramEnabled);
+         _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, 
+                                  save->VertexProgram);
+	 _mesa_reference_vertprog(ctx, &save->VertexProgram, NULL);
+      }
+
+      if (ctx->Extensions.ARB_fragment_program) {
+         _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB,
+                          save->FragmentProgramEnabled);
+         _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current,
+                                  save->FragmentProgram);
+	 _mesa_reference_fragprog(ctx, &save->FragmentProgram, NULL);
+      }
+
+      if (ctx->Extensions.ARB_shader_objects) {
+         _mesa_UseProgramObjectARB(save->Shader);
+      }
+   }
+
+   if (state & META_STENCIL_TEST) {
+      const struct gl_stencil_attrib *stencil = &save->Stencil;
+
+      _mesa_set_enable(ctx, GL_STENCIL_TEST, stencil->Enabled);
+      _mesa_ClearStencil(stencil->Clear);
+      if (ctx->Extensions.EXT_stencil_two_side) {
+         _mesa_set_enable(ctx, GL_STENCIL_TEST_TWO_SIDE_EXT,
+                          stencil->TestTwoSide);
+         _mesa_ActiveStencilFaceEXT(stencil->ActiveFace
+                                    ? GL_BACK : GL_FRONT);
+      }
+      /* front state */
+      _mesa_StencilFuncSeparate(GL_FRONT,
+                                stencil->Function[0],
+                                stencil->Ref[0],
+                                stencil->ValueMask[0]);
+      _mesa_StencilMaskSeparate(GL_FRONT, stencil->WriteMask[0]);
+      _mesa_StencilOpSeparate(GL_FRONT, stencil->FailFunc[0],
+                              stencil->ZFailFunc[0],
+                              stencil->ZPassFunc[0]);
+      /* back state */
+      _mesa_StencilFuncSeparate(GL_BACK,
+                                stencil->Function[1],
+                                stencil->Ref[1],
+                                stencil->ValueMask[1]);
+      _mesa_StencilMaskSeparate(GL_BACK, stencil->WriteMask[1]);
+      _mesa_StencilOpSeparate(GL_BACK, stencil->FailFunc[1],
+                              stencil->ZFailFunc[1],
+                              stencil->ZPassFunc[1]);
+   }
+
+   if (state & META_TEXTURE) {
+      GLuint u, tgt;
+
+      ASSERT(ctx->Texture.CurrentUnit == 0);
+
+      /* restore texenv for unit[0] */
+      _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, save->EnvMode);
+
+      /* restore texture objects for unit[0] only */
+      for (tgt = 0; tgt < NUM_TEXTURE_TARGETS; tgt++) {
+         _mesa_reference_texobj(&ctx->Texture.Unit[0].CurrentTex[tgt],
+                                save->CurrentTexture[tgt]);
+         _mesa_reference_texobj(&save->CurrentTexture[tgt], NULL);
+      }
+
+      /* Re-enable textures, texgen */
+      for (u = 0; u < ctx->Const.MaxTextureUnits; u++) {
+         if (save->TexEnabled[u]) {
+            _mesa_ActiveTextureARB(GL_TEXTURE0 + u);
+
+            if (save->TexEnabled[u] & TEXTURE_1D_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_1D, GL_TRUE);
+            if (save->TexEnabled[u] & TEXTURE_2D_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_2D, GL_TRUE);
+            if (save->TexEnabled[u] & TEXTURE_3D_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_3D, GL_TRUE);
+            if (save->TexEnabled[u] & TEXTURE_CUBE_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_CUBE_MAP, GL_TRUE);
+            if (save->TexEnabled[u] & TEXTURE_RECT_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_RECTANGLE, GL_TRUE);
+         }
+
+         if (save->TexGenEnabled[u]) {
+            _mesa_ActiveTextureARB(GL_TEXTURE0 + u);
+
+            if (save->TexGenEnabled[u] & S_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_GEN_S, GL_TRUE);
+            if (save->TexGenEnabled[u] & T_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_GEN_T, GL_TRUE);
+            if (save->TexGenEnabled[u] & R_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_GEN_R, GL_TRUE);
+            if (save->TexGenEnabled[u] & Q_BIT)
+               _mesa_set_enable(ctx, GL_TEXTURE_GEN_Q, GL_TRUE);
+         }
+      }
+
+      /* restore current unit state */
+      _mesa_ActiveTextureARB(GL_TEXTURE0 + save->ActiveUnit);
+      _mesa_ClientActiveTextureARB(GL_TEXTURE0 + save->ClientActiveUnit);
+   }
+
+   if (state & META_TRANSFORM) {
+      GLuint activeTexture = ctx->Texture.CurrentUnit;
+      _mesa_ActiveTextureARB(GL_TEXTURE0);
+      _mesa_MatrixMode(GL_TEXTURE);
+      _mesa_LoadMatrixf(save->TextureMatrix);
+      _mesa_ActiveTextureARB(GL_TEXTURE0 + activeTexture);
+
+      _mesa_MatrixMode(GL_MODELVIEW);
+      _mesa_LoadMatrixf(save->ModelviewMatrix);
+
+      _mesa_MatrixMode(GL_PROJECTION);
+      _mesa_LoadMatrixf(save->ProjectionMatrix);
+
+      _mesa_MatrixMode(save->MatrixMode);
+
+      if (save->ClipPlanesEnabled) {
+         GLuint i;
+         for (i = 0; i < ctx->Const.MaxClipPlanes; i++) {
+            if (save->ClipPlanesEnabled & (1 << i)) {
+               _mesa_set_enable(ctx, GL_CLIP_PLANE0 + i, GL_TRUE);
+            }
+         }
+      }
+   }
+
+   if (state & META_VERTEX) {
+      /* restore vertex buffer object */
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, save->ArrayBufferObj->Name);
+      _mesa_reference_buffer_object(ctx, &save->ArrayBufferObj, NULL);
+
+      /* restore vertex array object */
+      _mesa_BindVertexArray(save->ArrayObj->Name);
+      _mesa_reference_array_object(ctx, &save->ArrayObj, NULL);
+   }
+
+   if (state & META_VIEWPORT) {
+      if (save->ViewportX != ctx->Viewport.X ||
+          save->ViewportY != ctx->Viewport.Y ||
+          save->ViewportW != ctx->Viewport.Width ||
+          save->ViewportH != ctx->Viewport.Height) {
+         _mesa_set_viewport(ctx, save->ViewportX, save->ViewportY,
+                            save->ViewportW, save->ViewportH);
+      }
+      _mesa_DepthRange(save->DepthNear, save->DepthFar);
+   }
+
+   /* misc */
+   if (save->Lighting) {
+      _mesa_set_enable(ctx, GL_LIGHTING, GL_TRUE);
+   }
+}
+
+
+/**
+ * Convert Z from a normalized value in the range [0, 1] to an object-space
+ * Z coordinate in [-1, +1] so that drawing at the new Z position with the
+ * default/identity ortho projection results in the original Z value.
+ * Used by the meta-Clear, Draw/CopyPixels and Bitmap functions where the Z
+ * value comes from the clear value or raster position.
+ */
+static INLINE GLfloat
+invert_z(GLfloat normZ)
+{
+   GLfloat objZ = 1.0 - 2.0 * normZ;
+   return objZ;
+}
+
+
+/**
+ * One-time init for a temp_texture object.
+ * Choose tex target, compute max tex size, etc.
+ */
+static void
+init_temp_texture(GLcontext *ctx, struct temp_texture *tex)
+{
+   /* prefer texture rectangle */
+   if (ctx->Extensions.NV_texture_rectangle) {
+      tex->Target = GL_TEXTURE_RECTANGLE;
+      tex->MaxSize = ctx->Const.MaxTextureRectSize;
+      tex->NPOT = GL_TRUE;
+   }
+   else {
+      /* use 2D texture, NPOT if possible */
+      tex->Target = GL_TEXTURE_2D;
+      tex->MaxSize = 1 << (ctx->Const.MaxTextureLevels - 1);
+      tex->NPOT = ctx->Extensions.ARB_texture_non_power_of_two;
+   }
+   tex->MinSize = 16;  /* 16 x 16 at least */
+   assert(tex->MaxSize > 0);
+
+   _mesa_GenTextures(1, &tex->TexObj);
+   _mesa_BindTexture(tex->Target, tex->TexObj);
+}
+
+
+/**
+ * Return pointer to temp_texture info for non-bitmap ops.
+ * This does some one-time init if needed.
+ */
+static struct temp_texture *
+get_temp_texture(GLcontext *ctx)
+{
+   struct temp_texture *tex = &ctx->Meta->TempTex;
+
+   if (!tex->TexObj) {
+      init_temp_texture(ctx, tex);
+   }
+
+   return tex;
+}
+
+
+/**
+ * Return pointer to temp_texture info for _mesa_meta_bitmap().
+ * We use a separate texture for bitmaps to reduce texture
+ * allocation/deallocation.
+ */
+static struct temp_texture *
+get_bitmap_temp_texture(GLcontext *ctx)
+{
+   struct temp_texture *tex = &ctx->Meta->Bitmap.Tex;
+
+   if (!tex->TexObj) {
+      init_temp_texture(ctx, tex);
+   }
+
+   return tex;
+}
+
+
+/**
+ * Compute the width/height of texture needed to draw an image of the
+ * given size.  Return a flag indicating whether the current texture
+ * can be re-used (glTexSubImage2D) or if a new texture needs to be
+ * allocated (glTexImage2D).
+ * Also, compute s/t texcoords for drawing.
+ *
+ * \return GL_TRUE if new texture is needed, GL_FALSE otherwise
+ */
+static GLboolean
+alloc_texture(struct temp_texture *tex,
+              GLsizei width, GLsizei height, GLenum intFormat)
+{
+   GLboolean newTex = GL_FALSE;
+
+   ASSERT(width <= tex->MaxSize);
+   ASSERT(height <= tex->MaxSize);
+
+   if (width > tex->Width ||
+       height > tex->Height ||
+       intFormat != tex->IntFormat) {
+      /* alloc new texture (larger or different format) */
+
+      if (tex->NPOT) {
+         /* use non-power of two size */
+         tex->Width = MAX2(tex->MinSize, width);
+         tex->Height = MAX2(tex->MinSize, height);
+      }
+      else {
+         /* find power of two size */
+         GLsizei w, h;
+         w = h = tex->MinSize;
+         while (w < width)
+            w *= 2;
+         while (h < height)
+            h *= 2;
+         tex->Width = w;
+         tex->Height = h;
+      }
+
+      tex->IntFormat = intFormat;
+
+      newTex = GL_TRUE;
+   }
+
+   /* compute texcoords */
+   if (tex->Target == GL_TEXTURE_RECTANGLE) {
+      tex->Sright = (GLfloat) width;
+      tex->Ttop = (GLfloat) height;
+   }
+   else {
+      tex->Sright = (GLfloat) width / tex->Width;
+      tex->Ttop = (GLfloat) height / tex->Height;
+   }
+
+   return newTex;
+}
+
+
+/**
+ * Setup/load texture for glCopyPixels or glBlitFramebuffer.
+ */
+static void
+setup_copypix_texture(struct temp_texture *tex,
+                      GLboolean newTex,
+                      GLint srcX, GLint srcY,
+                      GLsizei width, GLsizei height, GLenum intFormat,
+                      GLenum filter)
+{
+   _mesa_BindTexture(tex->Target, tex->TexObj);
+   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, filter);
+   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, filter);
+   _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+
+   /* copy framebuffer image to texture */
+   if (newTex) {
+      /* create new tex image */
+      if (tex->Width == width && tex->Height == height) {
+         /* create new tex with framebuffer data */
+         _mesa_CopyTexImage2D(tex->Target, 0, tex->IntFormat,
+                              srcX, srcY, width, height, 0);
+      }
+      else {
+         /* create empty texture */
+         _mesa_TexImage2D(tex->Target, 0, tex->IntFormat,
+                          tex->Width, tex->Height, 0,
+                          intFormat, GL_UNSIGNED_BYTE, NULL);
+         /* load image */
+         _mesa_CopyTexSubImage2D(tex->Target, 0,
+                                 0, 0, srcX, srcY, width, height);
+      }
+   }
+   else {
+      /* replace existing tex image */
+      _mesa_CopyTexSubImage2D(tex->Target, 0,
+                              0, 0, srcX, srcY, width, height);
+   }
+}
+
+
+/**
+ * Setup/load texture for glDrawPixels.
+ */
+static void
+setup_drawpix_texture(GLcontext *ctx,
+		      struct temp_texture *tex,
+                      GLboolean newTex,
+                      GLenum texIntFormat,
+                      GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const GLvoid *pixels)
+{
+   _mesa_BindTexture(tex->Target, tex->TexObj);
+   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+   _mesa_TexParameteri(tex->Target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+   _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+
+   /* copy pixel data to texture */
+   if (newTex) {
+      /* create new tex image */
+      if (tex->Width == width && tex->Height == height) {
+         /* create new tex and load image data */
+         _mesa_TexImage2D(tex->Target, 0, tex->IntFormat,
+                          tex->Width, tex->Height, 0, format, type, pixels);
+      }
+      else {
+	 struct gl_buffer_object *save_unpack_obj = NULL;
+
+	 _mesa_reference_buffer_object(ctx, &save_unpack_obj,
+				       ctx->Unpack.BufferObj);
+	 _mesa_BindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
+         /* create empty texture */
+         _mesa_TexImage2D(tex->Target, 0, tex->IntFormat,
+                          tex->Width, tex->Height, 0, format, type, NULL);
+	 if (save_unpack_obj != NULL)
+	    _mesa_BindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB,
+				save_unpack_obj->Name);
+         /* load image */
+         _mesa_TexSubImage2D(tex->Target, 0,
+                             0, 0, width, height, format, type, pixels);
+      }
+   }
+   else {
+      /* replace existing tex image */
+      _mesa_TexSubImage2D(tex->Target, 0,
+                          0, 0, width, height, format, type, pixels);
+   }
+}
+
+
+
+/**
+ * One-time init for drawing depth pixels.
+ */
+static void
+init_blit_depth_pixels(GLcontext *ctx)
+{
+   static const char *program =
+      "!!ARBfp1.0\n"
+      "TEX result.depth, fragment.texcoord[0], texture[0], %s; \n"
+      "END \n";
+   char program2[200];
+   struct blit_state *blit = &ctx->Meta->Blit;
+   struct temp_texture *tex = get_temp_texture(ctx);
+   const char *texTarget;
+
+   assert(blit->DepthFP == 0);
+
+   /* replace %s with "RECT" or "2D" */
+   assert(strlen(program) + 4 < sizeof(program2));
+   if (tex->Target == GL_TEXTURE_RECTANGLE)
+      texTarget = "RECT";
+   else
+      texTarget = "2D";
+   _mesa_snprintf(program2, sizeof(program2), program, texTarget);
+
+   _mesa_GenPrograms(1, &blit->DepthFP);
+   _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP);
+   _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
+                          strlen(program2), (const GLubyte *) program2);
+}
+
+
+/**
+ * Try to do a glBlitFramebuffer using no-copy texturing.
+ * We can do this when the src renderbuffer is actually a texture.
+ * But if the src buffer == dst buffer we cannot do this.
+ *
+ * \return new buffer mask indicating the buffers left to blit using the
+ *         normal path.
+ */
+static GLbitfield
+blitframebuffer_texture(GLcontext *ctx,
+                        GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                        GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                        GLbitfield mask, GLenum filter)
+{
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      const struct gl_framebuffer *drawFb = ctx->DrawBuffer;
+      const struct gl_framebuffer *readFb = ctx->ReadBuffer;
+      const struct gl_renderbuffer_attachment *drawAtt =
+         &drawFb->Attachment[drawFb->_ColorDrawBufferIndexes[0]];
+      const struct gl_renderbuffer_attachment *readAtt =
+         &readFb->Attachment[readFb->_ColorReadBufferIndex];
+
+      if (readAtt && readAtt->Texture) {
+         const struct gl_texture_object *texObj = readAtt->Texture;
+         const GLuint srcLevel = readAtt->TextureLevel;
+         const GLenum minFilterSave = texObj->MinFilter;
+         const GLenum magFilterSave = texObj->MagFilter;
+         const GLint baseLevelSave = texObj->BaseLevel;
+         const GLint maxLevelSave = texObj->MaxLevel;
+         const GLenum wrapSSave = texObj->WrapS;
+         const GLenum wrapTSave = texObj->WrapT;
+         const GLenum target = texObj->Target;
+
+         if (drawAtt->Texture == readAtt->Texture) {
+            /* Can't use same texture as both the source and dest.  We need
+             * to handle overlapping blits and besides, some hw may not
+             * support this.
+             */
+            return mask;
+         }
+
+         if (target != GL_TEXTURE_2D && target != GL_TEXTURE_RECTANGLE_ARB) {
+            /* Can't handle other texture types at this time */
+            return mask;
+         }
+
+         /*
+         printf("Blit from texture!\n");
+         printf("  srcAtt %p  dstAtt %p\n", readAtt, drawAtt);
+         printf("  srcTex %p  dstText %p\n", texObj, drawAtt->Texture);
+         */
+
+         /* Prepare src texture state */
+         _mesa_BindTexture(target, texObj->Name);
+         _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, filter);
+         _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, filter);
+         if (target != GL_TEXTURE_RECTANGLE_ARB) {
+            _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel);
+            _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);
+         }
+         _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+         _mesa_TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+         _mesa_TexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+         _mesa_set_enable(ctx, target, GL_TRUE);
+
+         /* Prepare vertex data (the VBO was previously created and bound) */
+         {
+            struct vertex {
+               GLfloat x, y, s, t;
+            };
+            struct vertex verts[4];
+            GLfloat s0, t0, s1, t1;
+
+            if (target == GL_TEXTURE_2D) {
+               const struct gl_texture_image *texImage
+                   = _mesa_select_tex_image(ctx, texObj, target, srcLevel);
+               s0 = srcX0 / (float) texImage->Width;
+               s1 = srcX1 / (float) texImage->Width;
+               t0 = srcY0 / (float) texImage->Height;
+               t1 = srcY1 / (float) texImage->Height;
+            }
+            else {
+               assert(target == GL_TEXTURE_RECTANGLE_ARB);
+               s0 = srcX0;
+               s1 = srcX1;
+               t0 = srcY0;
+               t1 = srcY1;
+            }
+
+            verts[0].x = (GLfloat) dstX0;
+            verts[0].y = (GLfloat) dstY0;
+            verts[1].x = (GLfloat) dstX1;
+            verts[1].y = (GLfloat) dstY0;
+            verts[2].x = (GLfloat) dstX1;
+            verts[2].y = (GLfloat) dstY1;
+            verts[3].x = (GLfloat) dstX0;
+            verts[3].y = (GLfloat) dstY1;
+
+            verts[0].s = s0;
+            verts[0].t = t0;
+            verts[1].s = s1;
+            verts[1].t = t0;
+            verts[2].s = s1;
+            verts[2].t = t1;
+            verts[3].s = s0;
+            verts[3].t = t1;
+
+            _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+         }
+
+         _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+         /* Restore texture object state, the texture binding will
+          * be restored by _mesa_meta_end().
+          */
+         _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, minFilterSave);
+         _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, magFilterSave);
+         if (target != GL_TEXTURE_RECTANGLE_ARB) {
+            _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, baseLevelSave);
+            _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, maxLevelSave);
+         }
+         _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, wrapSSave);
+         _mesa_TexParameteri(target, GL_TEXTURE_WRAP_T, wrapTSave);
+
+         /* Done with color buffer */
+         mask &= ~GL_COLOR_BUFFER_BIT;
+      }
+   }
+
+   return mask;
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.BlitFramebuffer() in terms
+ * of texture mapping and polygon rendering.
+ */
+void
+_mesa_meta_BlitFramebuffer(GLcontext *ctx,
+                           GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                           GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                           GLbitfield mask, GLenum filter)
+{
+   struct blit_state *blit = &ctx->Meta->Blit;
+   struct temp_texture *tex = get_temp_texture(ctx);
+   const GLsizei maxTexSize = tex->MaxSize;
+   const GLint srcX = MIN2(srcX0, srcX1);
+   const GLint srcY = MIN2(srcY0, srcY1);
+   const GLint srcW = abs(srcX1 - srcX0);
+   const GLint srcH = abs(srcY1 - srcY0);
+   const GLboolean srcFlipX = srcX1 < srcX0;
+   const GLboolean srcFlipY = srcY1 < srcY0;
+   struct vertex {
+      GLfloat x, y, s, t;
+   };
+   struct vertex verts[4];
+   GLboolean newTex;
+
+   if (srcW > maxTexSize || srcH > maxTexSize) {
+      /* XXX avoid this fallback */
+      _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1,
+                              dstX0, dstY0, dstX1, dstY1, mask, filter);
+      return;
+   }
+
+   if (srcFlipX) {
+      GLint tmp = dstX0;
+      dstX0 = dstX1;
+      dstX1 = tmp;
+   }
+
+   if (srcFlipY) {
+      GLint tmp = dstY0;
+      dstY0 = dstY1;
+      dstY1 = tmp;
+   }
+
+   /* only scissor effects blit so save/clear all other relevant state */
+   _mesa_meta_begin(ctx, ~META_SCISSOR);
+
+   if (blit->ArrayObj == 0) {
+      /* one-time setup */
+
+      /* create vertex array object */
+      _mesa_GenVertexArrays(1, &blit->ArrayObj);
+      _mesa_BindVertexArray(blit->ArrayObj);
+
+      /* create vertex array buffer */
+      _mesa_GenBuffersARB(1, &blit->VBO);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO);
+      _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+                          NULL, GL_DYNAMIC_DRAW_ARB);
+
+      /* setup vertex arrays */
+      _mesa_VertexPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+      _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
+      _mesa_EnableClientState(GL_VERTEX_ARRAY);
+      _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+   }
+   else {
+      _mesa_BindVertexArray(blit->ArrayObj);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, blit->VBO);
+   }
+
+   /* Try faster, direct texture approach first */
+   mask = blitframebuffer_texture(ctx, srcX0, srcY0, srcX1, srcY1,
+                                  dstX0, dstY0, dstX1, dstY1, mask, filter);
+   if (mask == 0x0) {
+      _mesa_meta_end(ctx);
+      return;
+   }
+
+   /* Continue with "normal" approach which involves copying the src rect
+    * into a temporary texture and is "blitted" by drawing a textured quad.
+    */
+
+   newTex = alloc_texture(tex, srcW, srcH, GL_RGBA);
+
+   /* vertex positions/texcoords (after texture allocation!) */
+   {
+      verts[0].x = (GLfloat) dstX0;
+      verts[0].y = (GLfloat) dstY0;
+      verts[1].x = (GLfloat) dstX1;
+      verts[1].y = (GLfloat) dstY0;
+      verts[2].x = (GLfloat) dstX1;
+      verts[2].y = (GLfloat) dstY1;
+      verts[3].x = (GLfloat) dstX0;
+      verts[3].y = (GLfloat) dstY1;
+
+      verts[0].s = 0.0F;
+      verts[0].t = 0.0F;
+      verts[1].s = tex->Sright;
+      verts[1].t = 0.0F;
+      verts[2].s = tex->Sright;
+      verts[2].t = tex->Ttop;
+      verts[3].s = 0.0F;
+      verts[3].t = tex->Ttop;
+
+      /* upload new vertex data */
+      _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+   }
+
+   _mesa_set_enable(ctx, tex->Target, GL_TRUE);
+
+   if (mask & GL_COLOR_BUFFER_BIT) {
+      setup_copypix_texture(tex, newTex, srcX, srcY, srcW, srcH,
+                            GL_RGBA, filter);
+      _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+      mask &= ~GL_COLOR_BUFFER_BIT;
+   }
+
+   if (mask & GL_DEPTH_BUFFER_BIT) {
+      GLuint *tmp = (GLuint *) malloc(srcW * srcH * sizeof(GLuint));
+      if (tmp) {
+         if (!blit->DepthFP)
+            init_blit_depth_pixels(ctx);
+
+         /* maybe change tex format here */
+         newTex = alloc_texture(tex, srcW, srcH, GL_DEPTH_COMPONENT);
+
+         _mesa_ReadPixels(srcX, srcY, srcW, srcH,
+                          GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
+
+         setup_drawpix_texture(ctx, tex, newTex, GL_DEPTH_COMPONENT, srcW, srcH,
+                               GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, tmp);
+
+         _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, blit->DepthFP);
+         _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE);
+         _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+         _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+         _mesa_DepthFunc(GL_ALWAYS);
+         _mesa_DepthMask(GL_TRUE);
+
+         _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+         mask &= ~GL_DEPTH_BUFFER_BIT;
+
+         free(tmp);
+      }
+   }
+
+   if (mask & GL_STENCIL_BUFFER_BIT) {
+      /* XXX can't easily do stencil */
+   }
+
+   _mesa_set_enable(ctx, tex->Target, GL_FALSE);
+
+   _mesa_meta_end(ctx);
+
+   if (mask) {
+      _swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1,
+                              dstX0, dstY0, dstX1, dstY1, mask, filter);
+   }
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
+ */
+void
+_mesa_meta_Clear(GLcontext *ctx, GLbitfield buffers)
+{
+   struct clear_state *clear = &ctx->Meta->Clear;
+   struct vertex {
+      GLfloat x, y, z, r, g, b, a;
+   };
+   struct vertex verts[4];
+   /* save all state but scissor, pixel pack/unpack */
+   GLbitfield metaSave = META_ALL - META_SCISSOR - META_PIXEL_STORE;
+
+   if (buffers & BUFFER_BITS_COLOR) {
+      /* if clearing color buffers, don't save/restore colormask */
+      metaSave -= META_COLOR_MASK;
+   }
+
+   _mesa_meta_begin(ctx, metaSave);
+
+   if (clear->ArrayObj == 0) {
+      /* one-time setup */
+
+      /* create vertex array object */
+      _mesa_GenVertexArrays(1, &clear->ArrayObj);
+      _mesa_BindVertexArray(clear->ArrayObj);
+
+      /* create vertex array buffer */
+      _mesa_GenBuffersARB(1, &clear->VBO);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+
+      /* setup vertex arrays */
+      _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+      _mesa_ColorPointer(4, GL_FLOAT, sizeof(struct vertex), OFFSET(r));
+      _mesa_EnableClientState(GL_VERTEX_ARRAY);
+      _mesa_EnableClientState(GL_COLOR_ARRAY);
+   }
+   else {
+      _mesa_BindVertexArray(clear->ArrayObj);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, clear->VBO);
+   }
+
+   /* GL_COLOR_BUFFER_BIT */
+   if (buffers & BUFFER_BITS_COLOR) {
+      /* leave colormask, glDrawBuffer state as-is */
+   }
+   else {
+      ASSERT(metaSave & META_COLOR_MASK);
+      _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+   }
+
+   /* GL_DEPTH_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_DEPTH) {
+      _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
+      _mesa_DepthFunc(GL_ALWAYS);
+      _mesa_DepthMask(GL_TRUE);
+   }
+   else {
+      assert(!ctx->Depth.Test);
+   }
+
+   /* GL_STENCIL_BUFFER_BIT */
+   if (buffers & BUFFER_BIT_STENCIL) {
+      _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
+      _mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
+                              GL_REPLACE, GL_REPLACE, GL_REPLACE);
+      _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
+                                ctx->Stencil.Clear & 0x7fffffff,
+                                ctx->Stencil.WriteMask[0]);
+   }
+   else {
+      assert(!ctx->Stencil.Enabled);
+   }
+
+   /* vertex positions/colors */
+   {
+      const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin;
+      const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin;
+      const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax;
+      const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax;
+      const GLfloat z = invert_z(ctx->Depth.Clear);
+      GLuint i;
+
+      verts[0].x = x0;
+      verts[0].y = y0;
+      verts[0].z = z;
+      verts[1].x = x1;
+      verts[1].y = y0;
+      verts[1].z = z;
+      verts[2].x = x1;
+      verts[2].y = y1;
+      verts[2].z = z;
+      verts[3].x = x0;
+      verts[3].y = y1;
+      verts[3].z = z;
+
+      /* vertex colors */
+      for (i = 0; i < 4; i++) {
+         verts[i].r = ctx->Color.ClearColor[0];
+         verts[i].g = ctx->Color.ClearColor[1];
+         verts[i].b = ctx->Color.ClearColor[2];
+         verts[i].a = ctx->Color.ClearColor[3];
+      }
+
+      /* upload new vertex data */
+      _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts,
+			  GL_DYNAMIC_DRAW_ARB);
+   }
+
+   /* draw quad */
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   _mesa_meta_end(ctx);
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.CopyPixels() in terms
+ * of texture mapping and polygon rendering.
+ */
+void
+_mesa_meta_CopyPixels(GLcontext *ctx, GLint srcX, GLint srcY,
+                      GLsizei width, GLsizei height,
+                      GLint dstX, GLint dstY, GLenum type)
+{
+   struct copypix_state *copypix = &ctx->Meta->CopyPix;
+   struct temp_texture *tex = get_temp_texture(ctx);
+   struct vertex {
+      GLfloat x, y, z, s, t;
+   };
+   struct vertex verts[4];
+   GLboolean newTex;
+   GLenum intFormat = GL_RGBA;
+
+   if (type != GL_COLOR ||
+       ctx->_ImageTransferState ||
+       ctx->Fog.Enabled ||
+       width > tex->MaxSize ||
+       height > tex->MaxSize) {
+      /* XXX avoid this fallback */
+      _swrast_CopyPixels(ctx, srcX, srcY, width, height, dstX, dstY, type);
+      return;
+   }
+
+   /* Most GL state applies to glCopyPixels, but a there's a few things
+    * we need to override:
+    */
+   _mesa_meta_begin(ctx, (META_RASTERIZATION |
+                          META_SHADER |
+                          META_TEXTURE |
+                          META_TRANSFORM |
+                          META_VERTEX |
+                          META_VIEWPORT));
+
+   if (copypix->ArrayObj == 0) {
+      /* one-time setup */
+
+      /* create vertex array object */
+      _mesa_GenVertexArrays(1, &copypix->ArrayObj);
+      _mesa_BindVertexArray(copypix->ArrayObj);
+
+      /* create vertex array buffer */
+      _mesa_GenBuffersARB(1, &copypix->VBO);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO);
+      _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+                          NULL, GL_DYNAMIC_DRAW_ARB);
+
+      /* setup vertex arrays */
+      _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+      _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
+      _mesa_EnableClientState(GL_VERTEX_ARRAY);
+      _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+   }
+   else {
+      _mesa_BindVertexArray(copypix->ArrayObj);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, copypix->VBO);
+   }
+
+   newTex = alloc_texture(tex, width, height, intFormat);
+
+   /* vertex positions, texcoords (after texture allocation!) */
+   {
+      const GLfloat dstX0 = (GLfloat) dstX;
+      const GLfloat dstY0 = (GLfloat) dstY;
+      const GLfloat dstX1 = dstX + width * ctx->Pixel.ZoomX;
+      const GLfloat dstY1 = dstY + height * ctx->Pixel.ZoomY;
+      const GLfloat z = invert_z(ctx->Current.RasterPos[2]);
+
+      verts[0].x = dstX0;
+      verts[0].y = dstY0;
+      verts[0].z = z;
+      verts[0].s = 0.0F;
+      verts[0].t = 0.0F;
+      verts[1].x = dstX1;
+      verts[1].y = dstY0;
+      verts[1].z = z;
+      verts[1].s = tex->Sright;
+      verts[1].t = 0.0F;
+      verts[2].x = dstX1;
+      verts[2].y = dstY1;
+      verts[2].z = z;
+      verts[2].s = tex->Sright;
+      verts[2].t = tex->Ttop;
+      verts[3].x = dstX0;
+      verts[3].y = dstY1;
+      verts[3].z = z;
+      verts[3].s = 0.0F;
+      verts[3].t = tex->Ttop;
+
+      /* upload new vertex data */
+      _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+   }
+
+   /* Alloc/setup texture */
+   setup_copypix_texture(tex, newTex, srcX, srcY, width, height,
+                         GL_RGBA, GL_NEAREST);
+
+   _mesa_set_enable(ctx, tex->Target, GL_TRUE);
+
+   /* draw textured quad */
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   _mesa_set_enable(ctx, tex->Target, GL_FALSE);
+
+   _mesa_meta_end(ctx);
+}
+
+
+
+/**
+ * When the glDrawPixels() image size is greater than the max rectangle
+ * texture size we use this function to break the glDrawPixels() image
+ * into tiles which fit into the max texture size.
+ */
+static void
+tiled_draw_pixels(GLcontext *ctx,
+                  GLint tileSize,
+                  GLint x, GLint y, GLsizei width, GLsizei height,
+                  GLenum format, GLenum type,
+                  const struct gl_pixelstore_attrib *unpack,
+                  const GLvoid *pixels)
+{
+   struct gl_pixelstore_attrib tileUnpack = *unpack;
+   GLint i, j;
+
+   if (tileUnpack.RowLength == 0)
+      tileUnpack.RowLength = width;
+
+   for (i = 0; i < width; i += tileSize) {
+      const GLint tileWidth = MIN2(tileSize, width - i);
+      const GLint tileX = (GLint) (x + i * ctx->Pixel.ZoomX);
+
+      tileUnpack.SkipPixels = unpack->SkipPixels + i;
+
+      for (j = 0; j < height; j += tileSize) {
+         const GLint tileHeight = MIN2(tileSize, height - j);
+         const GLint tileY = (GLint) (y + j * ctx->Pixel.ZoomY);
+
+         tileUnpack.SkipRows = unpack->SkipRows + j;
+
+         _mesa_meta_DrawPixels(ctx, tileX, tileY, tileWidth, tileHeight,
+                               format, type, &tileUnpack, pixels);
+      }
+   }
+}
+
+
+/**
+ * One-time init for drawing stencil pixels.
+ */
+static void
+init_draw_stencil_pixels(GLcontext *ctx)
+{
+   /* This program is run eight times, once for each stencil bit.
+    * The stencil values to draw are found in an 8-bit alpha texture.
+    * We read the texture/stencil value and test if bit 'b' is set.
+    * If the bit is not set, use KIL to kill the fragment.
+    * Finally, we use the stencil test to update the stencil buffer.
+    *
+    * The basic algorithm for checking if a bit is set is:
+    *   if (is_odd(value / (1 << bit)))
+    *      result is one (or non-zero).
+    *   else
+    *      result is zero.
+    * The program parameter contains three values:
+    *   parm.x = 255 / (1 << bit)
+    *   parm.y = 0.5
+    *   parm.z = 0.0
+    */
+   static const char *program =
+      "!!ARBfp1.0\n"
+      "PARAM parm = program.local[0]; \n"
+      "TEMP t; \n"
+      "TEX t, fragment.texcoord[0], texture[0], %s; \n"   /* NOTE %s here! */
+      "# t = t * 255 / bit \n"
+      "MUL t.x, t.a, parm.x; \n"
+      "# t = (int) t \n"
+      "FRC t.y, t.x; \n"
+      "SUB t.x, t.x, t.y; \n"
+      "# t = t * 0.5 \n"
+      "MUL t.x, t.x, parm.y; \n"
+      "# t = fract(t.x) \n"
+      "FRC t.x, t.x; # if t.x != 0, then the bit is set \n"
+      "# t.x = (t.x == 0 ? 1 : 0) \n"
+      "SGE t.x, -t.x, parm.z; \n"
+      "KIL -t.x; \n"
+      "# for debug only \n"
+      "#MOV result.color, t.x; \n"
+      "END \n";
+   char program2[1000];
+   struct drawpix_state *drawpix = &ctx->Meta->DrawPix;
+   struct temp_texture *tex = get_temp_texture(ctx);
+   const char *texTarget;
+
+   assert(drawpix->StencilFP == 0);
+
+   /* replace %s with "RECT" or "2D" */
+   assert(strlen(program) + 4 < sizeof(program2));
+   if (tex->Target == GL_TEXTURE_RECTANGLE)
+      texTarget = "RECT";
+   else
+      texTarget = "2D";
+   _mesa_snprintf(program2, sizeof(program2), program, texTarget);
+
+   _mesa_GenPrograms(1, &drawpix->StencilFP);
+   _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->StencilFP);
+   _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
+                          strlen(program2), (const GLubyte *) program2);
+}
+
+
+/**
+ * One-time init for drawing depth pixels.
+ */
+static void
+init_draw_depth_pixels(GLcontext *ctx)
+{
+   static const char *program =
+      "!!ARBfp1.0\n"
+      "PARAM color = program.local[0]; \n"
+      "TEX result.depth, fragment.texcoord[0], texture[0], %s; \n"
+      "MOV result.color, color; \n"
+      "END \n";
+   char program2[200];
+   struct drawpix_state *drawpix = &ctx->Meta->DrawPix;
+   struct temp_texture *tex = get_temp_texture(ctx);
+   const char *texTarget;
+
+   assert(drawpix->DepthFP == 0);
+
+   /* replace %s with "RECT" or "2D" */
+   assert(strlen(program) + 4 < sizeof(program2));
+   if (tex->Target == GL_TEXTURE_RECTANGLE)
+      texTarget = "RECT";
+   else
+      texTarget = "2D";
+   _mesa_snprintf(program2, sizeof(program2), program, texTarget);
+
+   _mesa_GenPrograms(1, &drawpix->DepthFP);
+   _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->DepthFP);
+   _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
+                          strlen(program2), (const GLubyte *) program2);
+}
+
+
+/**
+ * Meta implementation of ctx->Driver.DrawPixels() in terms
+ * of texture mapping and polygon rendering.
+ */
+void
+_mesa_meta_DrawPixels(GLcontext *ctx,
+                      GLint x, GLint y, GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const struct gl_pixelstore_attrib *unpack,
+                      const GLvoid *pixels)
+{
+   struct drawpix_state *drawpix = &ctx->Meta->DrawPix;
+   struct temp_texture *tex = get_temp_texture(ctx);
+   const struct gl_pixelstore_attrib unpackSave = ctx->Unpack;
+   const GLuint origStencilMask = ctx->Stencil.WriteMask[0];
+   struct vertex {
+      GLfloat x, y, z, s, t;
+   };
+   struct vertex verts[4];
+   GLenum texIntFormat;
+   GLboolean fallback, newTex;
+   GLbitfield metaExtraSave = 0x0;
+   GLuint vbo;
+
+   /*
+    * Determine if we can do the glDrawPixels with texture mapping.
+    */
+   fallback = GL_FALSE;
+   if (ctx->_ImageTransferState ||
+       ctx->Fog.Enabled) {
+      fallback = GL_TRUE;
+   }
+
+   if (_mesa_is_color_format(format)) {
+      /* use more compact format when possible */
+      /* XXX disable special case for GL_LUMINANCE for now to work around
+       * apparent i965 driver bug (see bug #23670).
+       */
+      if (/*format == GL_LUMINANCE ||*/ format == GL_LUMINANCE_ALPHA)
+         texIntFormat = format;
+      else
+         texIntFormat = GL_RGBA;
+   }
+   else if (_mesa_is_stencil_format(format)) {
+      if (ctx->Extensions.ARB_fragment_program &&
+          ctx->Pixel.IndexShift == 0 &&
+          ctx->Pixel.IndexOffset == 0 &&
+          type == GL_UNSIGNED_BYTE) {
+         /* We'll store stencil as alpha.  This only works for GLubyte
+          * image data because of how incoming values are mapped to alpha
+          * in [0,1].
+          */
+         texIntFormat = GL_ALPHA;
+         metaExtraSave = (META_COLOR_MASK |
+                          META_DEPTH_TEST |
+                          META_SHADER |
+                          META_STENCIL_TEST);
+      }
+      else {
+         fallback = GL_TRUE;
+      }
+   }
+   else if (_mesa_is_depth_format(format)) {
+      if (ctx->Extensions.ARB_depth_texture &&
+          ctx->Extensions.ARB_fragment_program) {
+         texIntFormat = GL_DEPTH_COMPONENT;
+         metaExtraSave = (META_SHADER);
+      }
+      else {
+         fallback = GL_TRUE;
+      }
+   }
+   else {
+      fallback = GL_TRUE;
+   }
+
+   if (fallback) {
+      _swrast_DrawPixels(ctx, x, y, width, height,
+                         format, type, unpack, pixels);
+      return;
+   }
+
+   /*
+    * Check image size against max texture size, draw as tiles if needed.
+    */
+   if (width > tex->MaxSize || height > tex->MaxSize) {
+      tiled_draw_pixels(ctx, tex->MaxSize, x, y, width, height,
+                        format, type, unpack, pixels);
+      return;
+   }
+
+   /* Most GL state applies to glDrawPixels (like blending, stencil, etc),
+    * but a there's a few things we need to override:
+    */
+   _mesa_meta_begin(ctx, (META_RASTERIZATION |
+                          META_SHADER |
+                          META_TEXTURE |
+                          META_TRANSFORM |
+                          META_VERTEX |
+                          META_VIEWPORT |
+                          metaExtraSave));
+
+   newTex = alloc_texture(tex, width, height, texIntFormat);
+
+   /* vertex positions, texcoords (after texture allocation!) */
+   {
+      const GLfloat x0 = (GLfloat) x;
+      const GLfloat y0 = (GLfloat) y;
+      const GLfloat x1 = x + width * ctx->Pixel.ZoomX;
+      const GLfloat y1 = y + height * ctx->Pixel.ZoomY;
+      const GLfloat z = invert_z(ctx->Current.RasterPos[2]);
+
+      verts[0].x = x0;
+      verts[0].y = y0;
+      verts[0].z = z;
+      verts[0].s = 0.0F;
+      verts[0].t = 0.0F;
+      verts[1].x = x1;
+      verts[1].y = y0;
+      verts[1].z = z;
+      verts[1].s = tex->Sright;
+      verts[1].t = 0.0F;
+      verts[2].x = x1;
+      verts[2].y = y1;
+      verts[2].z = z;
+      verts[2].s = tex->Sright;
+      verts[2].t = tex->Ttop;
+      verts[3].x = x0;
+      verts[3].y = y1;
+      verts[3].z = z;
+      verts[3].s = 0.0F;
+      verts[3].t = tex->Ttop;
+   }
+
+   if (drawpix->ArrayObj == 0) {
+      /* one-time setup: create vertex array object */
+      _mesa_GenVertexArrays(1, &drawpix->ArrayObj);
+   }
+   _mesa_BindVertexArray(drawpix->ArrayObj);
+
+   /* create vertex array buffer */
+   _mesa_GenBuffersARB(1, &vbo);
+   _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo);
+   _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+                       verts, GL_DYNAMIC_DRAW_ARB);
+
+   /* setup vertex arrays */
+   _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+   _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
+   _mesa_EnableClientState(GL_VERTEX_ARRAY);
+   _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+
+   /* set given unpack params */
+   ctx->Unpack = *unpack;
+
+   _mesa_set_enable(ctx, tex->Target, GL_TRUE);
+
+   if (_mesa_is_stencil_format(format)) {
+      /* Drawing stencil */
+      GLint bit;
+
+      if (!drawpix->StencilFP)
+         init_draw_stencil_pixels(ctx);
+
+      setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
+                            GL_ALPHA, type, pixels);
+
+      _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+
+      _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
+
+      /* set all stencil bits to 0 */
+      _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE);
+      _mesa_StencilFunc(GL_ALWAYS, 0, 255);
+      _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+  
+      /* set stencil bits to 1 where needed */
+      _mesa_StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE);
+
+      _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->StencilFP);
+      _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE);
+
+      for (bit = 0; bit < ctx->DrawBuffer->Visual.stencilBits; bit++) {
+         const GLuint mask = 1 << bit;
+         if (mask & origStencilMask) {
+            _mesa_StencilFunc(GL_ALWAYS, mask, mask);
+            _mesa_StencilMask(mask);
+
+            _mesa_ProgramLocalParameter4fARB(GL_FRAGMENT_PROGRAM_ARB, 0,
+                                             255.0 / mask, 0.5, 0.0, 0.0);
+
+            _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+         }
+      }
+   }
+   else if (_mesa_is_depth_format(format)) {
+      /* Drawing depth */
+      if (!drawpix->DepthFP)
+         init_draw_depth_pixels(ctx);
+
+      _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, drawpix->DepthFP);
+      _mesa_set_enable(ctx, GL_FRAGMENT_PROGRAM_ARB, GL_TRUE);
+
+      /* polygon color = current raster color */
+      _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0,
+                                        ctx->Current.RasterColor);
+
+      setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
+                            format, type, pixels);
+
+      _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+   }
+   else {
+      /* Drawing RGBA */
+      setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
+                            format, type, pixels);
+      _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+   }
+
+   _mesa_set_enable(ctx, tex->Target, GL_FALSE);
+
+   _mesa_DeleteBuffersARB(1, &vbo);
+
+   /* restore unpack params */
+   ctx->Unpack = unpackSave;
+
+   _mesa_meta_end(ctx);
+}
+
+
+/**
+ * Do glBitmap with a alpha texture quad.  Use the alpha test to
+ * cull the 'off' bits.  If alpha test is already enabled, fall back
+ * to swrast (should be a rare case).
+ * A bitmap cache as in the gallium/mesa state tracker would
+ * improve performance a lot.
+ */
+void
+_mesa_meta_Bitmap(GLcontext *ctx,
+                  GLint x, GLint y, GLsizei width, GLsizei height,
+                  const struct gl_pixelstore_attrib *unpack,
+                  const GLubyte *bitmap1)
+{
+   struct bitmap_state *bitmap = &ctx->Meta->Bitmap;
+   struct temp_texture *tex = get_bitmap_temp_texture(ctx);
+   const GLenum texIntFormat = GL_ALPHA;
+   const struct gl_pixelstore_attrib unpackSave = *unpack;
+   struct vertex {
+      GLfloat x, y, z, s, t, r, g, b, a;
+   };
+   struct vertex verts[4];
+   GLboolean newTex;
+   GLubyte *bitmap8;
+
+   /*
+    * Check if swrast fallback is needed.
+    */
+   if (ctx->_ImageTransferState ||
+       ctx->Color.AlphaEnabled ||
+       ctx->Fog.Enabled ||
+       ctx->Texture._EnabledUnits ||
+       width > tex->MaxSize ||
+       height > tex->MaxSize) {
+      _swrast_Bitmap(ctx, x, y, width, height, unpack, bitmap1);
+      return;
+   }
+
+   /* Most GL state applies to glBitmap (like blending, stencil, etc),
+    * but a there's a few things we need to override:
+    */
+   _mesa_meta_begin(ctx, (META_ALPHA_TEST |
+                          META_PIXEL_STORE |
+                          META_RASTERIZATION |
+                          META_SHADER |
+                          META_TEXTURE |
+                          META_TRANSFORM |
+                          META_VERTEX |
+                          META_VIEWPORT));
+
+   if (bitmap->ArrayObj == 0) {
+      /* one-time setup */
+
+      /* create vertex array object */
+      _mesa_GenVertexArraysAPPLE(1, &bitmap->ArrayObj);
+      _mesa_BindVertexArrayAPPLE(bitmap->ArrayObj);
+
+      /* create vertex array buffer */
+      _mesa_GenBuffersARB(1, &bitmap->VBO);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, bitmap->VBO);
+      _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+                          NULL, GL_DYNAMIC_DRAW_ARB);
+
+      /* setup vertex arrays */
+      _mesa_VertexPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+      _mesa_TexCoordPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
+      _mesa_ColorPointer(4, GL_FLOAT, sizeof(struct vertex), OFFSET(r));
+      _mesa_EnableClientState(GL_VERTEX_ARRAY);
+      _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+      _mesa_EnableClientState(GL_COLOR_ARRAY);
+   }
+   else {
+      _mesa_BindVertexArray(bitmap->ArrayObj);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, bitmap->VBO);
+   }
+
+   newTex = alloc_texture(tex, width, height, texIntFormat);
+
+   /* vertex positions, texcoords, colors (after texture allocation!) */
+   {
+      const GLfloat x0 = (GLfloat) x;
+      const GLfloat y0 = (GLfloat) y;
+      const GLfloat x1 = (GLfloat) (x + width);
+      const GLfloat y1 = (GLfloat) (y + height);
+      const GLfloat z = invert_z(ctx->Current.RasterPos[2]);
+      GLuint i;
+
+      verts[0].x = x0;
+      verts[0].y = y0;
+      verts[0].z = z;
+      verts[0].s = 0.0F;
+      verts[0].t = 0.0F;
+      verts[1].x = x1;
+      verts[1].y = y0;
+      verts[1].z = z;
+      verts[1].s = tex->Sright;
+      verts[1].t = 0.0F;
+      verts[2].x = x1;
+      verts[2].y = y1;
+      verts[2].z = z;
+      verts[2].s = tex->Sright;
+      verts[2].t = tex->Ttop;
+      verts[3].x = x0;
+      verts[3].y = y1;
+      verts[3].z = z;
+      verts[3].s = 0.0F;
+      verts[3].t = tex->Ttop;
+
+      for (i = 0; i < 4; i++) {
+         verts[i].r = ctx->Current.RasterColor[0];
+         verts[i].g = ctx->Current.RasterColor[1];
+         verts[i].b = ctx->Current.RasterColor[2];
+         verts[i].a = ctx->Current.RasterColor[3];
+      }
+
+      /* upload new vertex data */
+      _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+   }
+
+   bitmap1 = _mesa_map_pbo_source(ctx, &unpackSave, bitmap1);
+   if (!bitmap1) {
+      _mesa_meta_end(ctx);
+      return;
+   }
+
+   bitmap8 = (GLubyte *) calloc(1, width * height);
+   if (bitmap8) {
+      _mesa_expand_bitmap(width, height, &unpackSave, bitmap1,
+                          bitmap8, width, 0xff);
+
+      _mesa_set_enable(ctx, tex->Target, GL_TRUE);
+
+      _mesa_set_enable(ctx, GL_ALPHA_TEST, GL_TRUE);
+      _mesa_AlphaFunc(GL_GREATER, 0.0);
+
+      setup_drawpix_texture(ctx, tex, newTex, texIntFormat, width, height,
+                            GL_ALPHA, GL_UNSIGNED_BYTE, bitmap8);
+
+      _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+      _mesa_set_enable(ctx, tex->Target, GL_FALSE);
+
+      free(bitmap8);
+   }
+
+   _mesa_unmap_pbo_source(ctx, &unpackSave);
+
+   _mesa_meta_end(ctx);
+}
+
+
+/**
+ * Check if the call to _mesa_meta_GenerateMipmap() will require a
+ * software fallback.  The fallback path will require that the texture
+ * images are mapped.
+ * \return GL_TRUE if a fallback is needed, GL_FALSE otherwise
+ */
+GLboolean
+_mesa_meta_check_generate_mipmap_fallback(GLcontext *ctx, GLenum target,
+                                          struct gl_texture_object *texObj)
+{
+   const GLuint fboSave = ctx->DrawBuffer->Name;
+   struct gen_mipmap_state *mipmap = &ctx->Meta->Mipmap;
+   struct gl_texture_image *baseImage;
+   GLuint srcLevel;
+   GLenum status;
+
+   /* check for fallbacks */
+   if (!ctx->Extensions.EXT_framebuffer_object ||
+       target == GL_TEXTURE_3D) {
+      return GL_TRUE;
+   }
+
+   srcLevel = texObj->BaseLevel;
+   baseImage = _mesa_select_tex_image(ctx, texObj, target, srcLevel);
+   if (!baseImage || _mesa_is_format_compressed(baseImage->TexFormat)) {
+      return GL_TRUE;
+   }
+
+   /*
+    * Test that we can actually render in the texture's format.
+    */
+   if (!mipmap->FBO)
+      _mesa_GenFramebuffersEXT(1, &mipmap->FBO);
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, mipmap->FBO);
+
+   if (target == GL_TEXTURE_1D) {
+      _mesa_FramebufferTexture1DEXT(GL_FRAMEBUFFER_EXT,
+                                    GL_COLOR_ATTACHMENT0_EXT,
+                                    target, texObj->Name, srcLevel);
+   }
+#if 0
+   /* other work is needed to enable 3D mipmap generation */
+   else if (target == GL_TEXTURE_3D) {
+      GLint zoffset = 0;
+      _mesa_FramebufferTexture3DEXT(GL_FRAMEBUFFER_EXT,
+                                    GL_COLOR_ATTACHMENT0_EXT,
+                                    target, texObj->Name, srcLevel, zoffset);
+   }
+#endif
+   else {
+      /* 2D / cube */
+      _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+                                    GL_COLOR_ATTACHMENT0_EXT,
+                                    target, texObj->Name, srcLevel);
+   }
+
+   status = _mesa_CheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
+
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fboSave);
+
+   if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+      return GL_TRUE;
+   }
+
+   return GL_FALSE;
+}
+
+
+/**
+ * Called via ctx->Driver.GenerateMipmap()
+ * Note: texture borders and 3D texture support not yet complete.
+ */
+void
+_mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target,
+                          struct gl_texture_object *texObj)
+{
+   struct gen_mipmap_state *mipmap = &ctx->Meta->Mipmap;
+   struct vertex {
+      GLfloat x, y, s, t, r;
+   };
+   struct vertex verts[4];
+   const GLuint baseLevel = texObj->BaseLevel;
+   const GLuint maxLevel = texObj->MaxLevel;
+   const GLenum minFilterSave = texObj->MinFilter;
+   const GLenum magFilterSave = texObj->MagFilter;
+   const GLint baseLevelSave = texObj->BaseLevel;
+   const GLint maxLevelSave = texObj->MaxLevel;
+   const GLboolean genMipmapSave = texObj->GenerateMipmap;
+   const GLenum wrapSSave = texObj->WrapS;
+   const GLenum wrapTSave = texObj->WrapT;
+   const GLenum wrapRSave = texObj->WrapR;
+   const GLuint fboSave = ctx->DrawBuffer->Name;
+   const GLuint original_active_unit = ctx->Texture.CurrentUnit;
+   GLenum faceTarget;
+   GLuint dstLevel;
+   GLuint border = 0;
+
+   if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) {
+      _mesa_generate_mipmap(ctx, target, texObj);
+      return;
+   }
+
+   if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X &&
+       target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z) {
+      faceTarget = target;
+      target = GL_TEXTURE_CUBE_MAP;
+   }
+   else {
+      faceTarget = target;
+   }
+
+   _mesa_meta_begin(ctx, META_ALL);
+
+   if (original_active_unit != 0)
+      _mesa_BindTexture(target, texObj->Name);
+
+   if (mipmap->ArrayObj == 0) {
+      /* one-time setup */
+
+      /* create vertex array object */
+      _mesa_GenVertexArraysAPPLE(1, &mipmap->ArrayObj);
+      _mesa_BindVertexArrayAPPLE(mipmap->ArrayObj);
+
+      /* create vertex array buffer */
+      _mesa_GenBuffersARB(1, &mipmap->VBO);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, mipmap->VBO);
+      _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(verts),
+                          NULL, GL_DYNAMIC_DRAW_ARB);
+
+      /* setup vertex arrays */
+      _mesa_VertexPointer(2, GL_FLOAT, sizeof(struct vertex), OFFSET(x));
+      _mesa_TexCoordPointer(3, GL_FLOAT, sizeof(struct vertex), OFFSET(s));
+      _mesa_EnableClientState(GL_VERTEX_ARRAY);
+      _mesa_EnableClientState(GL_TEXTURE_COORD_ARRAY);
+   }
+   else {
+      _mesa_BindVertexArray(mipmap->ArrayObj);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, mipmap->VBO);
+   }
+
+   if (!mipmap->FBO) {
+      _mesa_GenFramebuffersEXT(1, &mipmap->FBO);
+   }
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, mipmap->FBO);
+
+   _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+   _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+   _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, GL_FALSE);
+   _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+   _mesa_TexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+   _mesa_TexParameteri(target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
+
+   _mesa_set_enable(ctx, target, GL_TRUE);
+
+   /* setup texcoords once (XXX what about border?) */
+   switch (faceTarget) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+      verts[0].s = 0.0F;
+      verts[0].t = 0.0F;
+      verts[0].r = 0.0F;
+      verts[1].s = 1.0F;
+      verts[1].t = 0.0F;
+      verts[1].r = 0.0F;
+      verts[2].s = 1.0F;
+      verts[2].t = 1.0F;
+      verts[2].r = 0.0F;
+      verts[3].s = 0.0F;
+      verts[3].t = 1.0F;
+      verts[3].r = 0.0F;
+      break;
+   case GL_TEXTURE_3D:
+      abort();
+      break;
+   default:
+      /* cube face */
+      {
+         static const GLfloat st[4][2] = {
+            {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
+         };
+         GLuint i;
+
+         /* loop over quad verts */
+         for (i = 0; i < 4; i++) {
+            /* Compute sc = +/-scale and tc = +/-scale.
+             * Not +/-1 to avoid cube face selection ambiguity near the edges,
+             * though that can still sometimes happen with this scale factor...
+             */
+            const GLfloat scale = 0.9999f;
+            const GLfloat sc = (2.0f * st[i][0] - 1.0f) * scale;
+            const GLfloat tc = (2.0f * st[i][1] - 1.0f) * scale;
+
+            switch (faceTarget) {
+            case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+               verts[i].s = 1.0f;
+               verts[i].t = -tc;
+               verts[i].r = -sc;
+               break;
+            case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+               verts[i].s = -1.0f;
+               verts[i].t = -tc;
+               verts[i].r = sc;
+               break;
+            case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+               verts[i].s = sc;
+               verts[i].t = 1.0f;
+               verts[i].r = tc;
+               break;
+            case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+               verts[i].s = sc;
+               verts[i].t = -1.0f;
+               verts[i].r = -tc;
+               break;
+            case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+               verts[i].s = sc;
+               verts[i].t = -tc;
+               verts[i].r = 1.0f;
+               break;
+            case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+               verts[i].s = -sc;
+               verts[i].t = -tc;
+               verts[i].r = -1.0f;
+               break;
+            default:
+               assert(0);
+            }
+         }
+      }
+   }
+
+   _mesa_set_enable(ctx, target, GL_TRUE);
+
+   /* setup vertex positions */
+   {
+      verts[0].x = 0.0F;
+      verts[0].y = 0.0F;
+      verts[1].x = 1.0F;
+      verts[1].y = 0.0F;
+      verts[2].x = 1.0F;
+      verts[2].y = 1.0F;
+      verts[3].x = 0.0F;
+      verts[3].y = 1.0F;
+      
+      /* upload new vertex data */
+      _mesa_BufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
+   }
+
+   /* setup projection matrix */
+   _mesa_MatrixMode(GL_PROJECTION);
+   _mesa_LoadIdentity();
+   _mesa_Ortho(0.0, 1.0, 0.0, 1.0, -1.0, 1.0);
+
+   /* texture is already locked, unlock now */
+   _mesa_unlock_texture(ctx, texObj);
+
+   for (dstLevel = baseLevel + 1; dstLevel <= maxLevel; dstLevel++) {
+      const struct gl_texture_image *srcImage;
+      const GLuint srcLevel = dstLevel - 1;
+      GLsizei srcWidth, srcHeight, srcDepth;
+      GLsizei dstWidth, dstHeight, dstDepth;
+      GLenum status;
+
+      srcImage = _mesa_select_tex_image(ctx, texObj, faceTarget, srcLevel);
+      assert(srcImage->Border == 0); /* XXX we can fix this */
+
+      /* src size w/out border */
+      srcWidth = srcImage->Width - 2 * border;
+      srcHeight = srcImage->Height - 2 * border;
+      srcDepth = srcImage->Depth - 2 * border;
+
+      /* new dst size w/ border */
+      dstWidth = MAX2(1, srcWidth / 2) + 2 * border;
+      dstHeight = MAX2(1, srcHeight / 2) + 2 * border;
+      dstDepth = MAX2(1, srcDepth / 2) + 2 * border;
+
+      if (dstWidth == srcImage->Width &&
+          dstHeight == srcImage->Height &&
+          dstDepth == srcImage->Depth) {
+         /* all done */
+         break;
+      }
+
+      /* Set MaxLevel large enough to hold the new level when we allocate it  */
+      _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, dstLevel);
+
+      /* Create empty dest image */
+      if (target == GL_TEXTURE_1D) {
+         _mesa_TexImage1D(target, dstLevel, srcImage->InternalFormat,
+                          dstWidth, border,
+                          GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+      }
+      else if (target == GL_TEXTURE_3D) {
+         _mesa_TexImage3D(target, dstLevel, srcImage->InternalFormat,
+                          dstWidth, dstHeight, dstDepth, border,
+                          GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+      }
+      else {
+         /* 2D or cube */
+         _mesa_TexImage2D(faceTarget, dstLevel, srcImage->InternalFormat,
+                          dstWidth, dstHeight, border,
+                          GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+
+         if (target == GL_TEXTURE_CUBE_MAP) {
+            /* If texturing from a cube, we need to make sure all src faces
+             * have been defined (even if we're not sampling from them.)
+             * Otherwise the texture object will be 'incomplete' and
+             * texturing from it will not be allowed.
+             */
+            GLuint face;
+            for (face = 0; face < 6; face++) {
+               if (!texObj->Image[face][srcLevel] ||
+                   texObj->Image[face][srcLevel]->Width != srcWidth) {
+                  _mesa_TexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + face,
+                                   srcLevel, srcImage->InternalFormat,
+                                   srcWidth, srcHeight, border,
+                                   GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+               }
+            }
+         }
+      }
+
+      /* limit sampling to src level */
+      _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel);
+      _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);
+
+      /* Set to draw into the current dstLevel */
+      if (target == GL_TEXTURE_1D) {
+         _mesa_FramebufferTexture1DEXT(GL_FRAMEBUFFER_EXT,
+                                       GL_COLOR_ATTACHMENT0_EXT,
+                                       target,
+                                       texObj->Name,
+                                       dstLevel);
+      }
+      else if (target == GL_TEXTURE_3D) {
+         GLint zoffset = 0; /* XXX unfinished */
+         _mesa_FramebufferTexture3DEXT(GL_FRAMEBUFFER_EXT,
+                                       GL_COLOR_ATTACHMENT0_EXT,
+                                       target,
+                                       texObj->Name,
+                                       dstLevel, zoffset);
+      }
+      else {
+         /* 2D / cube */
+         _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
+                                       GL_COLOR_ATTACHMENT0_EXT,
+                                       faceTarget,
+                                       texObj->Name,
+                                       dstLevel);
+      }
+
+      _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+
+      /* sanity check */
+      status = _mesa_CheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
+      if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+         abort();
+         break;
+      }
+
+      assert(dstWidth == ctx->DrawBuffer->Width);
+      assert(dstHeight == ctx->DrawBuffer->Height);
+
+      /* setup viewport */
+      _mesa_set_viewport(ctx, 0, 0, dstWidth, dstHeight);
+
+      _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+   }
+
+   _mesa_lock_texture(ctx, texObj); /* relock */
+
+   _mesa_meta_end(ctx);
+
+   _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, minFilterSave);
+   _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, magFilterSave);
+   _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, baseLevelSave);
+   _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, maxLevelSave);
+   _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, genMipmapSave);
+   _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, wrapSSave);
+   _mesa_TexParameteri(target, GL_TEXTURE_WRAP_T, wrapTSave);
+   _mesa_TexParameteri(target, GL_TEXTURE_WRAP_R, wrapRSave);
+
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fboSave);
+}
+
+
+/**
+ * Determine the GL data type to use for the temporary image read with
+ * ReadPixels() and passed to Tex[Sub]Image().
+ */
+static GLenum
+get_temp_image_type(GLcontext *ctx, GLenum baseFormat)
+{
+   switch (baseFormat) {
+   case GL_RGBA:
+   case GL_RGB:
+   case GL_ALPHA:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_ALPHA:
+   case GL_INTENSITY:
+      if (ctx->DrawBuffer->Visual.redBits <= 8)
+         return GL_UNSIGNED_BYTE;
+      else if (ctx->DrawBuffer->Visual.redBits <= 8)
+         return GL_UNSIGNED_SHORT;
+      else
+         return GL_FLOAT;
+   case GL_DEPTH_COMPONENT:
+      return GL_UNSIGNED_INT;
+   case GL_DEPTH_STENCIL:
+      return GL_UNSIGNED_INT_24_8;
+   default:
+      _mesa_problem(ctx, "Unexpected format in get_temp_image_type()");
+      return 0;
+   }
+}
+
+
+/**
+ * Helper for _mesa_meta_CopyTexImage1/2D() functions.
+ * Have to be careful with locking and meta state for pixel transfer.
+ */
+static void
+copy_tex_image(GLcontext *ctx, GLuint dims, GLenum target, GLint level,
+               GLenum internalFormat, GLint x, GLint y,
+               GLsizei width, GLsizei height, GLint border)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   GLsizei postConvWidth = width, postConvHeight = height;
+   GLenum format, type;
+   GLint bpp;
+   void *buf;
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   texImage = _mesa_get_tex_image(ctx, texObj, target, level);
+
+   format = _mesa_base_tex_format(ctx, internalFormat);
+   type = get_temp_image_type(ctx, format);
+   bpp = _mesa_bytes_per_pixel(format, type);
+   if (bpp <= 0) {
+      _mesa_problem(ctx, "Bad bpp in meta copy_tex_image()");
+      return;
+   }
+
+   /*
+    * Alloc image buffer (XXX could use a PBO)
+    */
+   buf = malloc(width * height * bpp);
+   if (!buf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexImage%uD", dims);
+      return;
+   }
+
+   if (texImage->TexFormat == MESA_FORMAT_NONE)
+      texImage->TexFormat = ctx->Driver.ChooseTextureFormat(ctx,
+                                                            internalFormat,
+                                                            format,
+                                                            type);
+
+   _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
+
+   /*
+    * Read image from framebuffer (disable pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   ctx->Driver.ReadPixels(ctx, x, y, width, height,
+			  format, type, &ctx->Pack, buf);
+   _mesa_meta_end(ctx);
+
+   /*
+    * Prepare for new texture image size/data
+    */
+#if FEATURE_convolve
+   if (_mesa_is_color_format(internalFormat)) {
+      _mesa_adjust_image_for_convolution(ctx, 2,
+                                         &postConvWidth, &postConvHeight);
+   }
+#endif
+
+   if (texImage->Data) {
+      ctx->Driver.FreeTexImageData(ctx, texImage);
+   }
+
+   _mesa_init_teximage_fields(ctx, target, texImage,
+                              postConvWidth, postConvHeight, 1,
+                              border, internalFormat);
+
+   /*
+    * Store texture data (with pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE);
+
+   _mesa_update_state(ctx); /* to update pixel transfer state */
+
+   if (target == GL_TEXTURE_1D) {
+      ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                             width, border, format, type,
+                             buf, &ctx->Unpack, texObj, texImage);
+   }
+   else {
+      ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                             width, height, border, format, type,
+                             buf, &ctx->Unpack, texObj, texImage);
+   }
+   _mesa_meta_end(ctx);
+
+   _mesa_lock_texture(ctx, texObj); /* re-lock */
+
+   free(buf);
+}
+
+
+void
+_mesa_meta_CopyTexImage1D(GLcontext *ctx, GLenum target, GLint level,
+                          GLenum internalFormat, GLint x, GLint y,
+                          GLsizei width, GLint border)
+{
+   copy_tex_image(ctx, 1, target, level, internalFormat, x, y,
+                  width, 1, border);
+}
+
+
+void
+_mesa_meta_CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+                          GLenum internalFormat, GLint x, GLint y,
+                          GLsizei width, GLsizei height, GLint border)
+{
+   copy_tex_image(ctx, 2, target, level, internalFormat, x, y,
+                  width, height, border);
+}
+
+
+
+/**
+ * Helper for _mesa_meta_CopyTexSubImage1/2/3D() functions.
+ * Have to be careful with locking and meta state for pixel transfer.
+ */
+static void
+copy_tex_sub_image(GLcontext *ctx, GLuint dims, GLenum target, GLint level,
+                   GLint xoffset, GLint yoffset, GLint zoffset,
+                   GLint x, GLint y,
+                   GLsizei width, GLsizei height)
+{
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   GLenum format, type;
+   GLint bpp;
+   void *buf;
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+
+   format = _mesa_get_format_base_format(texImage->TexFormat);
+   type = get_temp_image_type(ctx, format);
+   bpp = _mesa_bytes_per_pixel(format, type);
+   if (bpp <= 0) {
+      _mesa_problem(ctx, "Bad bpp in meta copy_tex_sub_image()");
+      return;
+   }
+
+   /*
+    * Alloc image buffer (XXX could use a PBO)
+    */
+   buf = malloc(width * height * bpp);
+   if (!buf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyTexSubImage%uD", dims);
+      return;
+   }
+
+   _mesa_unlock_texture(ctx, texObj); /* need to unlock first */
+
+   /*
+    * Read image from framebuffer (disable pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   ctx->Driver.ReadPixels(ctx, x, y, width, height,
+			  format, type, &ctx->Pack, buf);
+   _mesa_meta_end(ctx);
+
+   _mesa_update_state(ctx); /* to update pixel transfer state */
+
+   /*
+    * Store texture data (with pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE);
+   if (target == GL_TEXTURE_1D) {
+      ctx->Driver.TexSubImage1D(ctx, target, level, xoffset,
+                                width, format, type, buf,
+                                &ctx->Unpack, texObj, texImage);
+   }
+   else if (target == GL_TEXTURE_3D) {
+      ctx->Driver.TexSubImage3D(ctx, target, level, xoffset, yoffset, zoffset,
+                                width, height, 1, format, type, buf,
+                                &ctx->Unpack, texObj, texImage);
+   }
+   else {
+      ctx->Driver.TexSubImage2D(ctx, target, level, xoffset, yoffset,
+                                width, height, format, type, buf,
+                                &ctx->Unpack, texObj, texImage);
+   }
+   _mesa_meta_end(ctx);
+
+   _mesa_lock_texture(ctx, texObj); /* re-lock */
+
+   free(buf);
+}
+
+
+void
+_mesa_meta_CopyTexSubImage1D(GLcontext *ctx, GLenum target, GLint level,
+                             GLint xoffset,
+                             GLint x, GLint y, GLsizei width)
+{
+   copy_tex_sub_image(ctx, 1, target, level, xoffset, 0, 0,
+                      x, y, width, 1);
+}
+
+
+void
+_mesa_meta_CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height)
+{
+   copy_tex_sub_image(ctx, 2, target, level, xoffset, yoffset, 0,
+                      x, y, width, height);
+}
+
+
+void
+_mesa_meta_CopyTexSubImage3D(GLcontext *ctx, GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset, GLint zoffset,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height)
+{
+   copy_tex_sub_image(ctx, 3, target, level, xoffset, yoffset, zoffset,
+                      x, y, width, height);
+}
+
+
+void
+_mesa_meta_CopyColorTable(GLcontext *ctx,
+                          GLenum target, GLenum internalformat,
+                          GLint x, GLint y, GLsizei width)
+{
+   GLfloat *buf;
+
+   buf = (GLfloat *) malloc(width * 4 * sizeof(GLfloat));
+   if (!buf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyColorTable");
+      return;
+   }
+
+   /*
+    * Read image from framebuffer (disable pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   ctx->Driver.ReadPixels(ctx, x, y, width, 1,
+                          GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+   _mesa_ColorTable(target, internalformat, width, GL_RGBA, GL_FLOAT, buf);
+
+   _mesa_meta_end(ctx);
+
+   free(buf);
+}
+
+
+void
+_mesa_meta_CopyColorSubTable(GLcontext *ctx,GLenum target, GLsizei start,
+                             GLint x, GLint y, GLsizei width)
+{
+   GLfloat *buf;
+
+   buf = (GLfloat *) malloc(width * 4 * sizeof(GLfloat));
+   if (!buf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyColorSubTable");
+      return;
+   }
+
+   /*
+    * Read image from framebuffer (disable pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   ctx->Driver.ReadPixels(ctx, x, y, width, 1,
+                          GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+   _mesa_ColorSubTable(target, start, width, GL_RGBA, GL_FLOAT, buf);
+
+   _mesa_meta_end(ctx);
+
+   free(buf);
+}
+
+
+void
+_mesa_meta_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target,
+                                   GLenum internalFormat,
+                                   GLint x, GLint y, GLsizei width)
+{
+   GLfloat *buf;
+
+   buf = (GLfloat *) malloc(width * 4 * sizeof(GLfloat));
+   if (!buf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyConvolutionFilter2D");
+      return;
+   }
+
+   /*
+    * Read image from framebuffer (disable pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_update_state(ctx);
+   ctx->Driver.ReadPixels(ctx, x, y, width, 1,
+                          GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+   _mesa_ConvolutionFilter1D(target, internalFormat, width,
+                             GL_RGBA, GL_FLOAT, buf);
+
+   _mesa_meta_end(ctx);
+
+   free(buf);
+}
+
+
+void
+_mesa_meta_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target,
+                                   GLenum internalFormat, GLint x, GLint y,
+                                   GLsizei width, GLsizei height)
+{
+   GLfloat *buf;
+
+   buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat));
+   if (!buf) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyConvolutionFilter2D");
+      return;
+   }
+
+   /*
+    * Read image from framebuffer (disable pixel transfer ops)
+    */
+   _mesa_meta_begin(ctx, META_PIXEL_STORE | META_PIXEL_TRANSFER);
+   _mesa_update_state(ctx);
+
+   ctx->Driver.ReadPixels(ctx, x, y, width, height,
+                          GL_RGBA, GL_FLOAT, &ctx->Pack, buf);
+
+   _mesa_ConvolutionFilter2D(target, internalFormat, width, height,
+                             GL_RGBA, GL_FLOAT, buf);
+
+   _mesa_meta_end(ctx);
+
+   free(buf);
+}
diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
new file mode 100644
index 0000000000..6225b94189
--- /dev/null
+++ b/src/mesa/drivers/common/meta.h
@@ -0,0 +1,118 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.6
+ *
+ * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef META_H
+#define META_H
+
+
+extern void
+_mesa_meta_init(GLcontext *ctx);
+
+extern void
+_mesa_meta_free(GLcontext *ctx);
+
+extern void
+_mesa_meta_BlitFramebuffer(GLcontext *ctx,
+                           GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                           GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                           GLbitfield mask, GLenum filter);
+
+extern void
+_mesa_meta_Clear(GLcontext *ctx, GLbitfield buffers);
+
+extern void
+_mesa_meta_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
+                      GLsizei width, GLsizei height,
+                      GLint dstx, GLint dsty, GLenum type);
+
+extern void
+_mesa_meta_DrawPixels(GLcontext *ctx,
+                      GLint x, GLint y, GLsizei width, GLsizei height,
+                      GLenum format, GLenum type,
+                      const struct gl_pixelstore_attrib *unpack,
+                      const GLvoid *pixels);
+
+extern void
+_mesa_meta_Bitmap(GLcontext *ctx,
+                  GLint x, GLint y, GLsizei width, GLsizei height,
+                  const struct gl_pixelstore_attrib *unpack,
+                  const GLubyte *bitmap);
+
+extern GLboolean
+_mesa_meta_check_generate_mipmap_fallback(GLcontext *ctx, GLenum target,
+                                          struct gl_texture_object *texObj);
+
+extern void
+_mesa_meta_GenerateMipmap(GLcontext *ctx, GLenum target,
+                          struct gl_texture_object *texObj);
+
+extern void
+_mesa_meta_CopyTexImage1D(GLcontext *ctx, GLenum target, GLint level,
+                          GLenum internalFormat, GLint x, GLint y,
+                          GLsizei width, GLint border);
+
+extern void
+_mesa_meta_CopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+                          GLenum internalFormat, GLint x, GLint y,
+                          GLsizei width, GLsizei height, GLint border);
+
+extern void
+_mesa_meta_CopyTexSubImage1D(GLcontext *ctx, GLenum target, GLint level,
+                             GLint xoffset,
+                             GLint x, GLint y, GLsizei width);
+
+extern void
+_mesa_meta_CopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height);
+
+extern void
+_mesa_meta_CopyTexSubImage3D(GLcontext *ctx, GLenum target, GLint level,
+                             GLint xoffset, GLint yoffset, GLint zoffset,
+                             GLint x, GLint y,
+                             GLsizei width, GLsizei height);
+
+extern void
+_mesa_meta_CopyColorTable(GLcontext *ctx,
+                          GLenum target, GLenum internalformat,
+                          GLint x, GLint y, GLsizei width);
+
+extern void
+_mesa_meta_CopyColorSubTable(GLcontext *ctx,GLenum target, GLsizei start,
+                             GLint x, GLint y, GLsizei width);
+
+extern void
+_mesa_meta_CopyConvolutionFilter1D(GLcontext *ctx, GLenum target,
+                                   GLenum internalFormat,
+                                   GLint x, GLint y, GLsizei width);
+
+extern void
+_mesa_meta_CopyConvolutionFilter2D(GLcontext *ctx, GLenum target,
+                                   GLenum internalFormat, GLint x, GLint y,
+                                   GLsizei width, GLsizei height);
+
+
+#endif /* META_H */
diff --git a/src/mesa/drivers/dri/Makefile b/src/mesa/drivers/dri/Makefile
new file mode 100644
index 0000000000..264648c3fb
--- /dev/null
+++ b/src/mesa/drivers/dri/Makefile
@@ -0,0 +1,55 @@
+# src/mesa/drivers/dri/Makefile
+
+TOP = ../../../..
+
+include $(TOP)/configs/current
+
+
+
+default: $(TOP)/$(LIB_DIR) subdirs dri.pc
+
+
+$(TOP)/$(LIB_DIR):
+	-mkdir $(TOP)/$(LIB_DIR)
+
+
+subdirs:
+	@for dir in $(DRI_DIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE)) || exit 1 ; \
+		fi \
+	done
+
+pcedit = sed \
+	-e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \
+	-e 's,@INSTALL_LIB_DIR@,$(INSTALL_LIB_DIR),' \
+	-e 's,@INSTALL_INC_DIR@,$(INSTALL_INC_DIR),' \
+	-e 's,@VERSION@,$(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY),' \
+	-e 's,@DRI_DRIVER_DIR@,$(DRI_DRIVER_INSTALL_DIR),' \
+	-e 's,@DRI_PC_REQ_PRIV@,$(DRI_PC_REQ_PRIV),'
+
+dri.pc: dri.pc.in
+	$(pcedit) $< > $@
+
+
+install: dri.pc
+	@for dir in $(DRI_DIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE) install) || exit 1 ; \
+		fi \
+	done
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_INC_DIR)/GL/internal
+	$(INSTALL) -m 0644 $(TOP)/include/GL/internal/dri_interface.h \
+	  $(DESTDIR)$(INSTALL_INC_DIR)/GL/internal
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+	$(INSTALL) -m 0644 dri.pc $(DESTDIR)$(INSTALL_LIB_DIR)/pkgconfig
+
+
+clean:
+	-@for dir in $(DRI_DIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE) clean) ; \
+		fi \
+	done
+	-rm -f common/*.o
+	-rm -f *.pc
diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template
new file mode 100644
index 0000000000..8cb25439e4
--- /dev/null
+++ b/src/mesa/drivers/dri/Makefile.template
@@ -0,0 +1,108 @@
+# -*-makefile-*-
+
+MESA_MODULES = $(TOP)/src/mesa/libmesa.a
+
+COMMON_GALLIUM_SOURCES = \
+        ../common/utils.c \
+        ../common/vblank.c \
+        ../common/dri_util.c \
+        ../common/xmlconfig.c
+
+COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \
+        ../../common/driverfuncs.c \
+        ../common/texmem.c \
+        ../common/drirenderbuffer.c \
+	../common/dri_metaops.c
+
+INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES)
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+	  $(ASM_SOURCES:.S=.o) 
+
+
+### Include directories
+SHARED_INCLUDES = \
+	-I. \
+	-I$(TOP)/src/mesa/drivers/dri/common \
+	-Iserver \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mapi \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/egl/main \
+	-I$(TOP)/src/egl/drivers/dri \
+	$(LIBDRM_CFLAGS)
+
+CFLAGS += $(API_DEFINES)
+
+##### RULES #####
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+
+##### TARGETS #####
+
+default: subdirs lib
+
+
+.PHONY: lib
+lib: symlinks subdirs depend
+	@$(MAKE) $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME)
+
+$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) Makefile \
+		$(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o
+	$(MKLIB) -o $@.tmp -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		$(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) $(DRI_LIB_DEPS)
+	$(CC) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS)
+	@rm -f $@.test
+	mv -f $@.tmp $@
+
+
+$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME)
+	$(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) 
+
+
+# If the Makefile defined SUBDIRS, run make in each
+.PHONY: subdirs
+subdirs:
+	@if test -n "$(SUBDIRS)" ; then \
+		for dir in $(SUBDIRS) ; do \
+			if [ -d $$dir ] ; then \
+				(cd $$dir && $(MAKE)) || exit 1; \
+			fi \
+		done \
+	fi
+
+
+.PHONY: symlinks
+symlinks:
+
+
+depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+	@ echo "running $(MKDEP)"
+	@ rm -f depend
+	@ touch depend
+	@ $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \
+		$(ASM_SOURCES) > /dev/null 2>/dev/null
+
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` `find ../include`
+
+
+# Remove .o and backup files
+clean:
+	-rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS)
+	-rm -f depend depend.bak
+
+
+install: $(LIBNAME)
+	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+	$(MINSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+
+
+-include depend
diff --git a/src/mesa/drivers/dri/common/.gitignore b/src/mesa/drivers/dri/common/.gitignore
new file mode 100644
index 0000000000..1edeb79fd1
--- /dev/null
+++ b/src/mesa/drivers/dri/common/.gitignore
@@ -0,0 +1 @@
+*.os
diff --git a/src/mesa/drivers/dri/common/depthtmp.h b/src/mesa/drivers/dri/common/depthtmp.h
new file mode 100644
index 0000000000..fd2dab3b42
--- /dev/null
+++ b/src/mesa/drivers/dri/common/depthtmp.h
@@ -0,0 +1,270 @@
+
+/*
+ * Notes:
+ * 1. These functions plug into the gl_renderbuffer structure.
+ * 2. The 'values' parameter always points to GLuint values, regardless of
+ *    the actual Z buffer depth.
+ */
+
+
+#include "spantmp_common.h"
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HAVE_HW_DEPTH_SPANS
+#define HAVE_HW_DEPTH_SPANS 0
+#endif
+
+#ifndef HAVE_HW_DEPTH_PIXELS
+#define HAVE_HW_DEPTH_PIXELS 0
+#endif
+
+static void TAG(WriteDepthSpan)( GLcontext *ctx,
+                                 struct gl_renderbuffer *rb,
+                                 GLuint n, GLint x, GLint y,
+				 const void *values,
+				 const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const VALUE_TYPE *depth = (const VALUE_TYPE *) values;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_DEPTH_VARS;
+
+	 y = Y_FLIP( y );
+
+#if HAVE_HW_DEPTH_SPANS
+	 (void) x1; (void) n1;
+
+	 if ( DBG ) fprintf( stderr, "WriteDepthSpan 0..%d (x1 %d)\n",
+			     (int)n, (int)x );
+
+	 WRITE_DEPTH_SPAN();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN( x, y, n, x1, n1, i );
+
+	       if ( DBG ) fprintf( stderr, "WriteDepthSpan %d..%d (x1 %d) (mask %p)\n",
+				   (int)i, (int)n1, (int)x1, mask );
+
+	       if ( mask ) {
+		  for ( ; n1>0 ; i++, x1++, n1-- ) {
+		     if ( mask[i] ) WRITE_DEPTH( x1, y, depth[i] );
+		  }
+	       } else {
+		  for ( ; n1>0 ; i++, x1++, n1-- ) {
+		     WRITE_DEPTH( x1, y, depth[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+#if HAVE_HW_DEPTH_SPANS
+/* implement MonoWriteDepthSpan() in terms of WriteDepthSpan() */
+static void
+TAG(WriteMonoDepthSpan)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                         GLuint n, GLint x, GLint y,
+                         const void *value, const GLubyte mask[] )
+{
+   const GLuint depthVal = *((GLuint *) value);
+   GLuint depths[MAX_WIDTH];
+   GLuint i;
+   for (i = 0; i < n; i++)
+      depths[i] = depthVal;
+   TAG(WriteDepthSpan)(ctx, rb, n, x, y, depths, mask);
+}
+#else
+static void TAG(WriteMonoDepthSpan)( GLcontext *ctx,
+                                     struct gl_renderbuffer *rb,
+                                     GLuint n, GLint x, GLint y,
+                                     const void *value,
+                                     const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLuint depth = *((GLuint *) value);
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_DEPTH_VARS;
+
+	 y = Y_FLIP( y );
+
+	 HW_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN( x, y, n, x1, n1, i );
+
+	       if ( DBG ) fprintf( stderr, "%s %d..%d (x1 %d) = %u\n",
+				   __FUNCTION__, (int)i, (int)n1, (int)x1, (GLuint)depth );
+
+	       if ( mask ) {
+		  for ( ; n1>0 ; i++, x1++, n1-- ) {
+		     if ( mask[i] ) WRITE_DEPTH( x1, y, depth );
+		  }
+	       } else {
+		  for ( ; n1>0 ; x1++, n1-- ) {
+		     WRITE_DEPTH( x1, y, depth );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+#endif
+
+
+static void TAG(WriteDepthPixels)( GLcontext *ctx,
+                                   struct gl_renderbuffer *rb,
+				   GLuint n,
+				   const GLint x[],
+				   const GLint y[],
+				   const void *values,
+				   const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const VALUE_TYPE *depth = (const VALUE_TYPE *) values;
+	 GLuint i;
+	 LOCAL_DEPTH_VARS;
+
+	 if ( DBG ) fprintf( stderr, "WriteDepthPixels\n" );
+
+#if HAVE_HW_DEPTH_PIXELS
+	 (void) i;
+
+	 WRITE_DEPTH_PIXELS();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       if ( mask ) {
+		  for ( i = 0 ; i < n ; i++ ) {
+		     if ( mask[i] ) {
+			const int fy = Y_FLIP( y[i] );
+			if ( CLIPPIXEL( x[i], fy ) )
+			   WRITE_DEPTH( x[i], fy, depth[i] );
+		     }
+		  }
+	       }
+	       else {
+		  for ( i = 0 ; i < n ; i++ ) {
+		     const int fy = Y_FLIP( y[i] );
+		     if ( CLIPPIXEL( x[i], fy ) )
+			WRITE_DEPTH( x[i], fy, depth[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+/* Read depth spans and pixels
+ */
+static void TAG(ReadDepthSpan)( GLcontext *ctx,
+                                struct gl_renderbuffer *rb,
+				GLuint n, GLint x, GLint y,
+				void *values )
+{
+   HW_READ_LOCK()
+      {
+         VALUE_TYPE *depth = (VALUE_TYPE *) values;
+	 GLint x1, n1;
+	 LOCAL_DEPTH_VARS;
+
+	 y = Y_FLIP( y );
+
+	 if ( DBG ) fprintf( stderr, "ReadDepthSpan\n" );
+
+#if HAVE_HW_DEPTH_SPANS
+	 (void) x1; (void) n1;
+
+	 READ_DEPTH_SPAN();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN( x, y, n, x1, n1, i );
+	       for ( ; n1>0 ; i++, n1-- ) {
+		  READ_DEPTH( depth[i], x+i, y );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_READ_UNLOCK();
+}
+
+static void TAG(ReadDepthPixels)( GLcontext *ctx,
+                                  struct gl_renderbuffer *rb,
+                                  GLuint n,
+				  const GLint x[], const GLint y[],
+				  void *values )
+{
+   HW_READ_LOCK()
+      {
+         VALUE_TYPE *depth = (VALUE_TYPE *) values;
+	 GLuint i;
+	 LOCAL_DEPTH_VARS;
+
+	 if ( DBG ) fprintf( stderr, "ReadDepthPixels\n" );
+
+#if HAVE_HW_DEPTH_PIXELS
+	 (void) i;
+
+	 READ_DEPTH_PIXELS();
+#else
+	 HW_CLIPLOOP()
+	    {
+	       for ( i = 0 ; i < n ;i++ ) {
+		  int fy = Y_FLIP( y[i] );
+		  if ( CLIPPIXEL( x[i], fy ) )
+		     READ_DEPTH( depth[i], x[i], fy );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif
+      }
+   HW_READ_UNLOCK();
+}
+
+
+/**
+ * Initialize the given renderbuffer's span routines to point to
+ * the depth/z functions we generated above.
+ */
+static void TAG(InitDepthPointers)(struct gl_renderbuffer *rb)
+{
+   rb->GetRow = TAG(ReadDepthSpan);
+   rb->GetValues = TAG(ReadDepthPixels);
+   rb->PutRow = TAG(WriteDepthSpan);
+   rb->PutRowRGB = NULL;
+   rb->PutMonoRow = TAG(WriteMonoDepthSpan);
+   rb->PutValues = TAG(WriteDepthPixels);
+   rb->PutMonoValues = NULL;
+}
+
+
+#if HAVE_HW_DEPTH_SPANS
+#undef WRITE_DEPTH_SPAN
+#undef WRITE_DEPTH_PIXELS
+#undef READ_DEPTH_SPAN
+#undef READ_DEPTH_PIXELS
+#else
+#undef WRITE_DEPTH
+#undef READ_DEPTH
+#endif
+#undef TAG
+#undef VALUE_TYPE
diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c
new file mode 100644
index 0000000000..dfb7d64040
--- /dev/null
+++ b/src/mesa/drivers/dri/common/dri_metaops.c
@@ -0,0 +1,290 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009 Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/arrayobj.h"
+#include "main/bufferobj.h"
+#include "main/enable.h"
+#include "main/matrix.h"
+#include "main/texstate.h"
+#include "main/varray.h"
+#include "main/viewport.h"
+#include "shader/arbprogram.h"
+#include "shader/program.h"
+#include "dri_metaops.h"
+
+void
+meta_set_passthrough_transform(struct dri_metaops *meta)
+{
+   GLcontext *ctx = meta->ctx;
+
+   meta->saved_vp_x = ctx->Viewport.X;
+   meta->saved_vp_y = ctx->Viewport.Y;
+   meta->saved_vp_width = ctx->Viewport.Width;
+   meta->saved_vp_height = ctx->Viewport.Height;
+   meta->saved_matrix_mode = ctx->Transform.MatrixMode;
+
+   meta->internal_viewport_call = GL_TRUE;
+   _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height);
+   meta->internal_viewport_call = GL_FALSE;
+
+   _mesa_MatrixMode(GL_PROJECTION);
+   _mesa_PushMatrix();
+   _mesa_LoadIdentity();
+   _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1);
+
+   _mesa_MatrixMode(GL_MODELVIEW);
+   _mesa_PushMatrix();
+   _mesa_LoadIdentity();
+}
+
+void
+meta_restore_transform(struct dri_metaops *meta)
+{
+   _mesa_MatrixMode(GL_PROJECTION);
+   _mesa_PopMatrix();
+   _mesa_MatrixMode(GL_MODELVIEW);
+   _mesa_PopMatrix();
+
+   _mesa_MatrixMode(meta->saved_matrix_mode);
+
+   meta->internal_viewport_call = GL_TRUE;
+   _mesa_Viewport(meta->saved_vp_x, meta->saved_vp_y,
+		  meta->saved_vp_width, meta->saved_vp_height);
+   meta->internal_viewport_call = GL_FALSE;
+}
+
+
+/**
+ * Set up a vertex program to pass through the position and first texcoord
+ * for pixel path.
+ */
+void
+meta_set_passthrough_vertex_program(struct dri_metaops *meta)
+{
+   GLcontext *ctx = meta->ctx;
+   static const char *vp =
+      "!!ARBvp1.0\n"
+      "TEMP vertexClip;\n"
+      "DP4 vertexClip.x, state.matrix.mvp.row[0], vertex.position;\n"
+      "DP4 vertexClip.y, state.matrix.mvp.row[1], vertex.position;\n"
+      "DP4 vertexClip.z, state.matrix.mvp.row[2], vertex.position;\n"
+      "DP4 vertexClip.w, state.matrix.mvp.row[3], vertex.position;\n"
+      "MOV result.position, vertexClip;\n"
+      "MOV result.texcoord[0], vertex.texcoord[0];\n"
+      "MOV result.color, vertex.color;\n"
+      "END\n";
+
+   assert(meta->saved_vp == NULL);
+
+   _mesa_reference_vertprog(ctx, &meta->saved_vp,
+			    ctx->VertexProgram.Current);
+   if (meta->passthrough_vp == NULL) {
+      GLuint prog_name;
+      _mesa_GenPrograms(1, &prog_name);
+      _mesa_BindProgram(GL_VERTEX_PROGRAM_ARB, prog_name);
+      _mesa_ProgramStringARB(GL_VERTEX_PROGRAM_ARB,
+			     GL_PROGRAM_FORMAT_ASCII_ARB,
+			     strlen(vp), (const GLubyte *)vp);
+      _mesa_reference_vertprog(ctx, &meta->passthrough_vp,
+			       ctx->VertexProgram.Current);
+      _mesa_DeletePrograms(1, &prog_name);
+   }
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+   _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
+			    meta->passthrough_vp);
+   ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
+			   &meta->passthrough_vp->Base);
+
+   meta->saved_vp_enable = ctx->VertexProgram.Enabled;
+   _mesa_Enable(GL_VERTEX_PROGRAM_ARB);
+}
+
+/**
+ * Restores the previous vertex program after
+ * meta_set_passthrough_vertex_program()
+ */
+void
+meta_restore_vertex_program(struct dri_metaops *meta)
+{
+   GLcontext *ctx = meta->ctx;
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+   _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current,
+			    meta->saved_vp);
+   _mesa_reference_vertprog(ctx, &meta->saved_vp, NULL);
+   ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB,
+			   &ctx->VertexProgram.Current->Base);
+
+   if (!meta->saved_vp_enable)
+      _mesa_Disable(GL_VERTEX_PROGRAM_ARB);
+}
+
+/**
+ * Binds the given program string to GL_FRAGMENT_PROGRAM_ARB, caching the
+ * program object.
+ */
+void
+meta_set_fragment_program(struct dri_metaops *meta,
+			  struct gl_fragment_program **prog,
+			  const char *prog_string)
+{
+   GLcontext *ctx = meta->ctx;
+   assert(meta->saved_fp == NULL);
+
+   _mesa_reference_fragprog(ctx, &meta->saved_fp,
+			    ctx->FragmentProgram.Current);
+   if (*prog == NULL) {
+      GLuint prog_name;
+      _mesa_GenPrograms(1, &prog_name);
+      _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, prog_name);
+      _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB,
+			     GL_PROGRAM_FORMAT_ASCII_ARB,
+			     strlen(prog_string), (const GLubyte *)prog_string);
+      _mesa_reference_fragprog(ctx, prog, ctx->FragmentProgram.Current);
+      /* Note that DeletePrograms unbinds the program on us */
+      _mesa_DeletePrograms(1, &prog_name);
+   }
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+   _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, *prog);
+   ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, &((*prog)->Base));
+
+   meta->saved_fp_enable = ctx->FragmentProgram.Enabled;
+   _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB);
+}
+
+/**
+ * Restores the previous fragment program after
+ * meta_set_fragment_program()
+ */
+void
+meta_restore_fragment_program(struct dri_metaops *meta)
+{
+   GLcontext *ctx = meta->ctx;
+
+   FLUSH_VERTICES(ctx, _NEW_PROGRAM);
+   _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current,
+			    meta->saved_fp);
+   _mesa_reference_fragprog(ctx, &meta->saved_fp, NULL);
+   ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB,
+			   &ctx->FragmentProgram.Current->Base);
+
+   if (!meta->saved_fp_enable)
+      _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB);
+}
+
+static const float default_texcoords[4][2] = { { 0.0, 0.0 },
+					       { 1.0, 0.0 },
+					       { 1.0, 1.0 },
+					       { 0.0, 1.0 } };
+
+void
+meta_set_default_texrect(struct dri_metaops *meta)
+{
+   GLcontext *ctx = meta->ctx;
+   struct gl_client_array *old_texcoord_array;
+
+   meta->saved_active_texture = ctx->Texture.CurrentUnit;
+   if (meta->saved_array_vbo == NULL) {
+      _mesa_reference_buffer_object(ctx, &meta->saved_array_vbo,
+				    ctx->Array.ArrayBufferObj);
+   }
+
+   old_texcoord_array = &ctx->Array.ArrayObj->TexCoord[0];
+   meta->saved_texcoord_type = old_texcoord_array->Type;
+   meta->saved_texcoord_size = old_texcoord_array->Size;
+   meta->saved_texcoord_stride = old_texcoord_array->Stride;
+   meta->saved_texcoord_enable = old_texcoord_array->Enabled;
+   meta->saved_texcoord_ptr = old_texcoord_array->Ptr;
+   _mesa_reference_buffer_object(ctx, &meta->saved_texcoord_vbo,
+				 old_texcoord_array->BufferObj);
+
+   _mesa_ClientActiveTextureARB(GL_TEXTURE0);
+
+   if (meta->texcoord_vbo == NULL) {
+      GLuint vbo_name;
+
+      _mesa_GenBuffersARB(1, &vbo_name);
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo_name);
+      _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(default_texcoords),
+			  default_texcoords, GL_STATIC_DRAW_ARB);
+      _mesa_reference_buffer_object(ctx, &meta->texcoord_vbo,
+				    ctx->Array.ArrayBufferObj);
+   } else {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB,
+			  meta->texcoord_vbo->Name);
+   }
+   _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), NULL);
+
+   _mesa_Enable(GL_TEXTURE_COORD_ARRAY);
+}
+
+void
+meta_restore_texcoords(struct dri_metaops *meta)
+{
+   GLcontext *ctx = meta->ctx;
+
+   /* Restore the old TexCoordPointer */
+   if (meta->saved_texcoord_vbo) {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB,
+			  meta->saved_texcoord_vbo->Name);
+      _mesa_reference_buffer_object(ctx, &meta->saved_texcoord_vbo, NULL);
+   } else {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
+   }
+
+   _mesa_TexCoordPointer(meta->saved_texcoord_size,
+			 meta->saved_texcoord_type,
+			 meta->saved_texcoord_stride,
+			 meta->saved_texcoord_ptr);
+   if (!meta->saved_texcoord_enable)
+      _mesa_Disable(GL_TEXTURE_COORD_ARRAY);
+
+   _mesa_ClientActiveTextureARB(GL_TEXTURE0 +
+				meta->saved_active_texture);
+
+   if (meta->saved_array_vbo) {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB,
+			  meta->saved_array_vbo->Name);
+      _mesa_reference_buffer_object(ctx, &meta->saved_array_vbo, NULL);
+   } else {
+      _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0);
+   }
+}
+
+
+void meta_init_metaops(GLcontext *ctx, struct dri_metaops *meta)
+{
+   meta->ctx = ctx;
+}
+
+void meta_destroy_metaops(struct dri_metaops *meta)
+{
+
+}
diff --git a/src/mesa/drivers/dri/common/dri_metaops.h b/src/mesa/drivers/dri/common/dri_metaops.h
new file mode 100644
index 0000000000..2487145326
--- /dev/null
+++ b/src/mesa/drivers/dri/common/dri_metaops.h
@@ -0,0 +1,81 @@
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009 Intel Corporation.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef DRI_METAOPS_H
+#define DRI_METAOPS_H
+
+
+struct dri_metaops {
+    GLcontext *ctx;
+    GLboolean internal_viewport_call;
+    struct gl_fragment_program *bitmap_fp;
+    struct gl_vertex_program *passthrough_vp;
+    struct gl_buffer_object *texcoord_vbo;
+    
+    struct gl_fragment_program *saved_fp;
+    GLboolean saved_fp_enable;
+    struct gl_vertex_program *saved_vp;
+    GLboolean saved_vp_enable;
+
+    struct gl_fragment_program *tex2d_fp;
+    
+    GLboolean saved_texcoord_enable;
+    struct gl_buffer_object *saved_array_vbo, *saved_texcoord_vbo;
+    GLenum saved_texcoord_type;
+    GLsizei saved_texcoord_size, saved_texcoord_stride;
+    const void *saved_texcoord_ptr;
+    int saved_active_texture;
+
+    GLint saved_vp_x, saved_vp_y;
+    GLsizei saved_vp_width, saved_vp_height;
+    GLenum saved_matrix_mode;
+};
+
+
+void meta_set_passthrough_transform(struct dri_metaops *meta);
+
+void meta_restore_transform(struct dri_metaops *meta);
+
+void meta_set_passthrough_vertex_program(struct dri_metaops *meta);
+
+void meta_restore_vertex_program(struct dri_metaops *meta);
+
+void meta_set_fragment_program(struct dri_metaops *meta,
+			  struct gl_fragment_program **prog,
+			  const char *prog_string);
+
+void meta_restore_fragment_program(struct dri_metaops *meta);
+
+void meta_set_default_texrect(struct dri_metaops *meta);
+
+void meta_restore_texcoords(struct dri_metaops *meta);
+
+void meta_init_metaops(GLcontext *ctx, struct dri_metaops *meta);
+void meta_destroy_metaops(struct dri_metaops *meta);
+
+#endif
diff --git a/src/mesa/drivers/dri/common/dri_test.c b/src/mesa/drivers/dri/common/dri_test.c
new file mode 100644
index 0000000000..793f0c37d7
--- /dev/null
+++ b/src/mesa/drivers/dri/common/dri_test.c
@@ -0,0 +1,89 @@
+#include "main/glheader.h"
+#include "main/compiler.h"
+#include "glapi/glapi.h"
+
+/* This is just supposed to make sure we get a reference to
+   the driver entry symbol that the compiler doesn't optimize away */
+
+extern char __driDriverExtensions[];
+
+/* provide glapi symbols */
+
+#if defined(GLX_USE_TLS)
+
+PUBLIC __thread struct _glapi_table * _glapi_tls_Dispatch
+    __attribute__((tls_model("initial-exec")));
+
+PUBLIC __thread void * _glapi_tls_Context
+    __attribute__((tls_model("initial-exec")));
+
+PUBLIC const struct _glapi_table *_glapi_Dispatch;
+PUBLIC const void *_glapi_Context;
+
+#else
+
+PUBLIC struct _glapi_table *_glapi_Dispatch;
+PUBLIC void *_glapi_Context;
+
+#endif
+
+PUBLIC void
+_glapi_check_multithread(void)
+{}
+
+PUBLIC void
+_glapi_set_context(void *context)
+{}
+
+PUBLIC void *
+_glapi_get_context(void)
+{
+	return 0;
+}
+
+PUBLIC void
+_glapi_set_dispatch(struct _glapi_table *dispatch)
+{}
+
+PUBLIC struct _glapi_table *
+_glapi_get_dispatch(void)
+{
+	return 0;
+}
+
+PUBLIC int
+_glapi_add_dispatch( const char * const * function_names,
+		     const char * parameter_signature )
+{
+	return 0;
+}
+
+PUBLIC GLint
+_glapi_get_proc_offset(const char *funcName)
+{
+	return 0;
+}
+
+PUBLIC _glapi_proc
+_glapi_get_proc_address(const char *funcName)
+{
+	return 0;
+}
+
+PUBLIC GLuint
+_glapi_get_dispatch_table_size(void)
+{
+	return 0;
+}
+
+PUBLIC unsigned long
+_glthread_GetID(void)
+{
+   return 0;
+}
+
+int main(int argc, char** argv)
+{
+   void* p = __driDriverExtensions;
+   return (int)(unsigned long)p;
+}
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
new file mode 100644
index 0000000000..18b9035248
--- /dev/null
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -0,0 +1,1034 @@
+/**
+ * \file dri_util.c
+ * DRI utility functions.
+ *
+ * This module acts as glue between GLX and the actual hardware driver.  A DRI
+ * driver doesn't really \e have to use any of this - it's optional.  But, some
+ * useful stuff is done here that otherwise would have to be duplicated in most
+ * drivers.
+ * 
+ * Basically, these utility functions take care of some of the dirty details of
+ * screen initialization, context creation, context binding, DRM setup, etc.
+ *
+ * These functions are compiled into each DRI driver so libGL.so knows nothing
+ * about them.
+ */
+
+
+#include <assert.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdio.h>
+
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)-1)
+#endif
+
+#include "main/imports.h"
+#define None 0
+
+#include "dri_util.h"
+#include "drm_sarea.h"
+#include "utils.h"
+#include "xmlpool.h"
+
+PUBLIC const char __dri2ConfigOptions[] =
+   DRI_CONF_BEGIN
+      DRI_CONF_SECTION_PERFORMANCE
+         DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_1)
+      DRI_CONF_SECTION_END
+   DRI_CONF_END;
+
+static const uint __dri2NConfigOptions = 1;
+
+#ifndef GLX_OML_sync_control
+typedef GLboolean ( * PFNGLXGETMSCRATEOMLPROC) (__DRIdrawable *drawable, int32_t *numerator, int32_t *denominator);
+#endif
+
+static void dri_get_drawable(__DRIdrawable *pdp);
+static void dri_put_drawable(__DRIdrawable *pdp);
+
+/**
+ * This is just a token extension used to signal that the driver
+ * supports setting a read drawable.
+ */
+const __DRIextension driReadDrawableExtension = {
+    __DRI_READ_DRAWABLE, __DRI_READ_DRAWABLE_VERSION
+};
+
+GLint
+driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 )
+{
+   if (rect2.x1 > rect1.x1) rect1.x1 = rect2.x1;
+   if (rect2.x2 < rect1.x2) rect1.x2 = rect2.x2;
+   if (rect2.y1 > rect1.y1) rect1.y1 = rect2.y1;
+   if (rect2.y2 < rect1.y2) rect1.y2 = rect2.y2;
+
+   if (rect1.x1 > rect1.x2 || rect1.y1 > rect1.y2) return 0;
+
+   return (rect1.x2 - rect1.x1) * (rect1.y2 - rect1.y1);
+}
+
+/*****************************************************************/
+/** \name Context (un)binding functions                          */
+/*****************************************************************/
+/*@{*/
+
+/**
+ * Unbind context.
+ * 
+ * \param scrn the screen.
+ * \param gc context.
+ *
+ * \return \c GL_TRUE on success, or \c GL_FALSE on failure.
+ * 
+ * \internal
+ * This function calls __DriverAPIRec::UnbindContext, and then decrements
+ * __DRIdrawableRec::refcount which must be non-zero for a successful
+ * return.
+ * 
+ * While casting the opaque private pointers associated with the parameters
+ * into their respective real types it also assures they are not \c NULL. 
+ */
+static int driUnbindContext(__DRIcontext *pcp)
+{
+    __DRIscreen *psp;
+    __DRIdrawable *pdp;
+    __DRIdrawable *prp;
+
+    /*
+    ** Assume error checking is done properly in glXMakeCurrent before
+    ** calling driUnbindContext.
+    */
+
+    if (pcp == NULL)
+        return GL_FALSE;
+
+    psp = pcp->driScreenPriv;
+    pdp = pcp->driDrawablePriv;
+    prp = pcp->driReadablePriv;
+
+    /* already unbound */
+    if (!pdp && !prp)
+      return GL_TRUE;
+    /* Let driver unbind drawable from context */
+    (*psp->DriverAPI.UnbindContext)(pcp);
+
+    assert(pdp);
+    if (pdp->refcount == 0) {
+	/* ERROR!!! */
+	return GL_FALSE;
+    }
+
+    dri_put_drawable(pdp);
+
+    if (prp != pdp) {
+        if (prp->refcount == 0) {
+	    /* ERROR!!! */
+	    return GL_FALSE;
+	}
+
+    	dri_put_drawable(prp);
+    }
+
+
+    /* XXX this is disabled so that if we call SwapBuffers on an unbound
+     * window we can determine the last context bound to the window and
+     * use that context's lock. (BrianP, 2-Dec-2000)
+     */
+    pcp->driDrawablePriv = pcp->driReadablePriv = NULL;
+
+    return GL_TRUE;
+}
+
+/**
+ * This function takes both a read buffer and a draw buffer.  This is needed
+ * for \c glXMakeCurrentReadSGI or GLX 1.3's \c glXMakeContextCurrent
+ * function.
+ */
+static int driBindContext(__DRIcontext *pcp,
+			  __DRIdrawable *pdp,
+			  __DRIdrawable *prp)
+{
+    __DRIscreen *psp = NULL;
+
+    /*
+    ** Assume error checking is done properly in glXMakeCurrent before
+    ** calling driUnbindContext.
+    */
+
+    if (!pcp)
+	return GL_FALSE;
+
+    /* Bind the drawable to the context */
+    psp = pcp->driScreenPriv;
+    pcp->driDrawablePriv = pdp;
+    pcp->driReadablePriv = prp;
+    if (pdp) {
+	pdp->driContextPriv = pcp;
+	dri_get_drawable(pdp);
+    }
+    if (prp && pdp != prp) {
+	dri_get_drawable(prp);
+    }
+
+    /*
+    ** Now that we have a context associated with this drawable, we can
+    ** initialize the drawable information if has not been done before.
+    */
+
+    if (!psp->dri2.enabled) {
+	if (pdp && !pdp->pStamp) {
+	    DRM_SPINLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);
+	    __driUtilUpdateDrawableInfo(pdp);
+	    DRM_SPINUNLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);
+	}
+	if (prp && pdp != prp && !prp->pStamp) {
+	    DRM_SPINLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);
+	    __driUtilUpdateDrawableInfo(prp);
+	    DRM_SPINUNLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);
+        }
+    }
+
+    /* Call device-specific MakeCurrent */
+    return (*psp->DriverAPI.MakeCurrent)(pcp, pdp, prp);
+}
+
+/*@}*/
+
+
+/*****************************************************************/
+/** \name Drawable handling functions                            */
+/*****************************************************************/
+/*@{*/
+
+/**
+ * Update private drawable information.
+ *
+ * \param pdp pointer to the private drawable information to update.
+ * 
+ * This function basically updates the __DRIdrawable struct's
+ * cliprect information by calling \c __DRIinterfaceMethods::getDrawableInfo.
+ * This is usually called by the DRI_VALIDATE_DRAWABLE_INFO macro which
+ * compares the __DRIdrwablePrivate pStamp and lastStamp values.  If
+ * the values are different that means we have to update the clipping
+ * info.
+ */
+void
+__driUtilUpdateDrawableInfo(__DRIdrawable *pdp)
+{
+    __DRIscreen *psp = pdp->driScreenPriv;
+    __DRIcontext *pcp = pdp->driContextPriv;
+    
+    if (!pcp 
+	|| ((pdp != pcp->driDrawablePriv) && (pdp != pcp->driReadablePriv))) {
+	/* ERROR!!! 
+	 * ...but we must ignore it. There can be many contexts bound to a
+	 * drawable.
+	 */
+    }
+
+    if (pdp->pClipRects) {
+	free(pdp->pClipRects); 
+	pdp->pClipRects = NULL;
+    }
+
+    if (pdp->pBackClipRects) {
+	free(pdp->pBackClipRects); 
+	pdp->pBackClipRects = NULL;
+    }
+
+    DRM_SPINUNLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);
+
+    if (! (*psp->getDrawableInfo->getDrawableInfo)(pdp,
+			  &pdp->index, &pdp->lastStamp,
+			  &pdp->x, &pdp->y, &pdp->w, &pdp->h,
+			  &pdp->numClipRects, &pdp->pClipRects,
+			  &pdp->backX,
+			  &pdp->backY,
+			  &pdp->numBackClipRects,
+			  &pdp->pBackClipRects,
+			  pdp->loaderPrivate)) {
+	/* Error -- eg the window may have been destroyed.  Keep going
+	 * with no cliprects.
+	 */
+        pdp->pStamp = &pdp->lastStamp; /* prevent endless loop */
+	pdp->numClipRects = 0;
+	pdp->pClipRects = NULL;
+	pdp->numBackClipRects = 0;
+	pdp->pBackClipRects = NULL;
+    }
+    else
+       pdp->pStamp = &(psp->pSAREA->drawableTable[pdp->index].stamp);
+
+    DRM_SPINLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);
+}
+
+/*@}*/
+
+/*****************************************************************/
+/** \name GLX callbacks                                          */
+/*****************************************************************/
+/*@{*/
+
+static void driReportDamage(__DRIdrawable *pdp,
+			    struct drm_clip_rect *pClipRects, int numClipRects)
+{
+    __DRIscreen *psp = pdp->driScreenPriv;
+
+    /* Check that we actually have the new damage report method */
+    if (psp->damage) {
+	/* Report the damage.  Currently, all our drivers draw
+	 * directly to the front buffer, so we report the damage there
+	 * rather than to the backing storein (if any).
+	 */
+	(*psp->damage->reportDamage)(pdp,
+				     pdp->x, pdp->y,
+				     pClipRects, numClipRects,
+				     GL_TRUE, pdp->loaderPrivate);
+    }
+}
+
+
+/**
+ * Swap buffers.
+ *
+ * \param drawablePrivate opaque pointer to the per-drawable private info.
+ * 
+ * \internal
+ * This function calls __DRIdrawable::swapBuffers.
+ * 
+ * Is called directly from glXSwapBuffers().
+ */
+static void driSwapBuffers(__DRIdrawable *dPriv)
+{
+    __DRIscreen *psp = dPriv->driScreenPriv;
+    drm_clip_rect_t *rects;
+    int i;
+
+    psp->DriverAPI.SwapBuffers(dPriv);
+
+    if (!dPriv->numClipRects)
+        return;
+
+    rects = malloc(sizeof(*rects) * dPriv->numClipRects);
+
+    if (!rects)
+        return;
+
+    for (i = 0; i < dPriv->numClipRects; i++) {
+        rects[i].x1 = dPriv->pClipRects[i].x1 - dPriv->x;
+        rects[i].y1 = dPriv->pClipRects[i].y1 - dPriv->y;
+        rects[i].x2 = dPriv->pClipRects[i].x2 - dPriv->x;
+        rects[i].y2 = dPriv->pClipRects[i].y2 - dPriv->y;
+    }
+
+    driReportDamage(dPriv, rects, dPriv->numClipRects);
+    free(rects);
+}
+
+static int driDrawableGetMSC( __DRIscreen *sPriv, __DRIdrawable *dPriv,
+			      int64_t *msc )
+{
+    return sPriv->DriverAPI.GetDrawableMSC(sPriv, dPriv, msc);
+}
+
+
+static int driWaitForMSC(__DRIdrawable *dPriv, int64_t target_msc,
+			 int64_t divisor, int64_t remainder,
+			 int64_t * msc, int64_t * sbc)
+{
+    __DRIswapInfo  sInfo;
+    int  status;
+
+    status = dPriv->driScreenPriv->DriverAPI.WaitForMSC( dPriv, target_msc,
+                                                         divisor, remainder,
+                                                         msc );
+
+    /* GetSwapInfo() may not be provided by the driver if GLX_SGI_video_sync
+     * is supported but GLX_OML_sync_control is not.  Therefore, don't return
+     * an error value if GetSwapInfo() is not implemented.
+    */
+    if ( status == 0
+         && dPriv->driScreenPriv->DriverAPI.GetSwapInfo ) {
+        status = dPriv->driScreenPriv->DriverAPI.GetSwapInfo( dPriv, & sInfo );
+        *sbc = sInfo.swap_count;
+    }
+
+    return status;
+}
+
+
+const __DRImediaStreamCounterExtension driMediaStreamCounterExtension = {
+    { __DRI_MEDIA_STREAM_COUNTER, __DRI_MEDIA_STREAM_COUNTER_VERSION },
+    driWaitForMSC,
+    driDrawableGetMSC,
+};
+
+
+static void driCopySubBuffer(__DRIdrawable *dPriv,
+			      int x, int y, int w, int h)
+{
+    drm_clip_rect_t rect;
+
+    rect.x1 = x;
+    rect.y1 = dPriv->h - y - h;
+    rect.x2 = x + w;
+    rect.y2 = rect.y1 + h;
+    driReportDamage(dPriv, &rect, 1);
+
+    dPriv->driScreenPriv->DriverAPI.CopySubBuffer(dPriv, x, y, w, h);
+}
+
+const __DRIcopySubBufferExtension driCopySubBufferExtension = {
+    { __DRI_COPY_SUB_BUFFER, __DRI_COPY_SUB_BUFFER_VERSION },
+    driCopySubBuffer
+};
+
+static void driSetSwapInterval(__DRIdrawable *dPriv, unsigned int interval)
+{
+    dPriv->swap_interval = interval;
+}
+
+static unsigned int driGetSwapInterval(__DRIdrawable *dPriv)
+{
+    return dPriv->swap_interval;
+}
+
+const __DRIswapControlExtension driSwapControlExtension = {
+    { __DRI_SWAP_CONTROL, __DRI_SWAP_CONTROL_VERSION },
+    driSetSwapInterval,
+    driGetSwapInterval
+};
+
+
+/**
+ * This is called via __DRIscreenRec's createNewDrawable pointer.
+ */
+static __DRIdrawable *
+driCreateNewDrawable(__DRIscreen *psp, const __DRIconfig *config,
+		     drm_drawable_t hwDrawable, int renderType,
+		     const int *attrs, void *data)
+{
+    __DRIdrawable *pdp;
+
+    /* Since pbuffers are not yet supported, no drawable attributes are
+     * supported either.
+     */
+    (void) attrs;
+
+    pdp = malloc(sizeof *pdp);
+    if (!pdp) {
+	return NULL;
+    }
+
+    pdp->driContextPriv = NULL;
+    pdp->loaderPrivate = data;
+    pdp->hHWDrawable = hwDrawable;
+    pdp->refcount = 1;
+    pdp->pStamp = NULL;
+    pdp->lastStamp = 0;
+    pdp->index = 0;
+    pdp->x = 0;
+    pdp->y = 0;
+    pdp->w = 0;
+    pdp->h = 0;
+    pdp->numClipRects = 0;
+    pdp->numBackClipRects = 0;
+    pdp->pClipRects = NULL;
+    pdp->pBackClipRects = NULL;
+    pdp->vblSeq = 0;
+    pdp->vblFlags = 0;
+
+    pdp->driScreenPriv = psp;
+
+    if (!(*psp->DriverAPI.CreateBuffer)(psp, pdp, &config->modes,
+					renderType == GLX_PIXMAP_BIT)) {
+       free(pdp);
+       return NULL;
+    }
+
+    pdp->msc_base = 0;
+
+    /* This special default value is replaced with the configured
+     * default value when the drawable is first bound to a direct
+     * rendering context. 
+     */
+    pdp->swap_interval = (unsigned)-1;
+
+    return pdp;
+}
+
+
+static __DRIdrawable *
+dri2CreateNewDrawable(__DRIscreen *screen,
+		      const __DRIconfig *config,
+		      void *loaderPrivate)
+{
+    __DRIdrawable *pdraw;
+
+    pdraw = driCreateNewDrawable(screen, config, 0, 0, NULL, loaderPrivate);
+    if (!pdraw)
+    	return NULL;
+
+    pdraw->pClipRects = &pdraw->dri2.clipRect;
+    pdraw->pBackClipRects = &pdraw->dri2.clipRect;
+
+    pdraw->pStamp = &pdraw->dri2.stamp;
+    *pdraw->pStamp = pdraw->lastStamp + 1;
+
+    return pdraw;
+}
+
+static int
+dri2ConfigQueryb(__DRIscreen *screen, const char *var, GLboolean *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_BOOL))
+      return -1;
+
+   *val = driQueryOptionb(&screen->optionCache, var);
+
+   return 0;
+}
+
+static int
+dri2ConfigQueryi(__DRIscreen *screen, const char *var, GLint *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_INT) &&
+       !driCheckOption(&screen->optionCache, var, DRI_ENUM))
+      return -1;
+
+    *val = driQueryOptioni(&screen->optionCache, var);
+
+    return 0;
+}
+
+static int
+dri2ConfigQueryf(__DRIscreen *screen, const char *var, GLfloat *val)
+{
+   if (!driCheckOption(&screen->optionCache, var, DRI_FLOAT))
+      return -1;
+
+    *val = driQueryOptionf(&screen->optionCache, var);
+
+    return 0;
+}
+
+
+static void dri_get_drawable(__DRIdrawable *pdp)
+{
+    pdp->refcount++;
+}
+	
+static void dri_put_drawable(__DRIdrawable *pdp)
+{
+    __DRIscreen *psp;
+
+    if (pdp) {
+	pdp->refcount--;
+	if (pdp->refcount)
+	    return;
+
+	psp = pdp->driScreenPriv;
+        (*psp->DriverAPI.DestroyBuffer)(pdp);
+	if (pdp->pClipRects && pdp->pClipRects != &pdp->dri2.clipRect) {
+	    free(pdp->pClipRects);
+	    pdp->pClipRects = NULL;
+	}
+	if (pdp->pBackClipRects && pdp->pClipRects != &pdp->dri2.clipRect) {
+	    free(pdp->pBackClipRects);
+	    pdp->pBackClipRects = NULL;
+	}
+	free(pdp);
+    }
+}
+
+static void
+driDestroyDrawable(__DRIdrawable *pdp)
+{
+    dri_put_drawable(pdp);
+}
+
+/*@}*/
+
+
+/*****************************************************************/
+/** \name Context handling functions                             */
+/*****************************************************************/
+/*@{*/
+
+/**
+ * Destroy the per-context private information.
+ * 
+ * \internal
+ * This function calls __DriverAPIRec::DestroyContext on \p contextPrivate, calls
+ * drmDestroyContext(), and finally frees \p contextPrivate.
+ */
+static void
+driDestroyContext(__DRIcontext *pcp)
+{
+    if (pcp) {
+	(*pcp->driScreenPriv->DriverAPI.DestroyContext)(pcp);
+	free(pcp);
+    }
+}
+
+
+/**
+ * Create the per-drawable private driver information.
+ * 
+ * \param render_type   Type of rendering target.  \c GLX_RGBA is the only
+ *                      type likely to ever be supported for direct-rendering.
+ * \param shared        Context with which to share textures, etc. or NULL
+ *
+ * \returns An opaque pointer to the per-context private information on
+ *          success, or \c NULL on failure.
+ * 
+ * \internal
+ * This function allocates and fills a __DRIcontextRec structure.  It
+ * performs some device independent initialization and passes all the
+ * relevent information to __DriverAPIRec::CreateContext to create the
+ * context.
+ *
+ */
+static __DRIcontext *
+driCreateNewContext(__DRIscreen *psp, const __DRIconfig *config,
+		    int render_type, __DRIcontext *shared, 
+		    drm_context_t hwContext, void *data)
+{
+    __DRIcontext *pcp;
+    void * const shareCtx = (shared != NULL) ? shared->driverPrivate : NULL;
+
+    pcp = malloc(sizeof *pcp);
+    if (!pcp)
+	return NULL;
+
+    pcp->driScreenPriv = psp;
+    pcp->driDrawablePriv = NULL;
+    pcp->loaderPrivate = data;
+    
+    pcp->dri2.draw_stamp = 0;
+    pcp->dri2.read_stamp = 0;
+
+    pcp->hHWContext = hwContext;
+
+    if ( !(*psp->DriverAPI.CreateContext)(API_OPENGL,
+					  &config->modes, pcp, shareCtx) ) {
+        free(pcp);
+        return NULL;
+    }
+
+    return pcp;
+}
+
+static unsigned int
+dri2GetAPIMask(__DRIscreen *screen)
+{
+    return screen->api_mask;
+}
+
+static __DRIcontext *
+dri2CreateNewContextForAPI(__DRIscreen *screen, int api,
+			   const __DRIconfig *config,
+			   __DRIcontext *shared, void *data)
+{
+    __DRIcontext *context;
+    void *shareCtx = (shared != NULL) ? shared->driverPrivate : NULL;
+    gl_api mesa_api;
+
+    if (!(screen->api_mask & (1 << api)))
+	return NULL;
+
+    switch (api) {
+    case __DRI_API_OPENGL:
+	    mesa_api = API_OPENGL;
+	    break;
+    case __DRI_API_GLES:
+	    mesa_api = API_OPENGLES;
+	    break;
+    case __DRI_API_GLES2:
+	    mesa_api = API_OPENGLES2;
+	    break;
+    }
+
+    context = malloc(sizeof *context);
+    if (!context)
+	return NULL;
+
+    context->driScreenPriv = screen;
+    context->driDrawablePriv = NULL;
+    context->loaderPrivate = data;
+    
+    if (!(*screen->DriverAPI.CreateContext)(api, &config->modes,
+					    context, shareCtx) ) {
+        free(context);
+        return NULL;
+    }
+
+    return context;
+}
+
+
+static __DRIcontext *
+dri2CreateNewContext(__DRIscreen *screen, const __DRIconfig *config,
+		      __DRIcontext *shared, void *data)
+{
+   return dri2CreateNewContextForAPI(screen, __DRI_API_OPENGL,
+				     config, shared, data);
+}
+
+static int
+driCopyContext(__DRIcontext *dest, __DRIcontext *src, unsigned long mask)
+{
+    return GL_FALSE;
+}
+
+/*@}*/
+
+
+/*****************************************************************/
+/** \name Screen handling functions                              */
+/*****************************************************************/
+/*@{*/
+
+/**
+ * Destroy the per-screen private information.
+ * 
+ * \internal
+ * This function calls __DriverAPIRec::DestroyScreen on \p screenPrivate, calls
+ * drmClose(), and finally frees \p screenPrivate.
+ */
+static void driDestroyScreen(__DRIscreen *psp)
+{
+    if (psp) {
+	/* No interaction with the X-server is possible at this point.  This
+	 * routine is called after XCloseDisplay, so there is no protocol
+	 * stream open to the X-server anymore.
+	 */
+
+	if (psp->DriverAPI.DestroyScreen)
+	    (*psp->DriverAPI.DestroyScreen)(psp);
+
+	if (!psp->dri2.enabled) {
+	   (void)drmUnmap((drmAddress)psp->pSAREA, SAREA_MAX);
+	   (void)drmUnmap((drmAddress)psp->pFB, psp->fbSize);
+	   (void)drmCloseOnce(psp->fd);
+	}
+
+	free(psp);
+    }
+}
+
+static void
+setupLoaderExtensions(__DRIscreen *psp,
+		      const __DRIextension **extensions)
+{
+    int i;
+
+    for (i = 0; extensions[i]; i++) {
+	if (strcmp(extensions[i]->name, __DRI_GET_DRAWABLE_INFO) == 0)
+	    psp->getDrawableInfo = (__DRIgetDrawableInfoExtension *) extensions[i];
+	if (strcmp(extensions[i]->name, __DRI_DAMAGE) == 0)
+	    psp->damage = (__DRIdamageExtension *) extensions[i];
+	if (strcmp(extensions[i]->name, __DRI_SYSTEM_TIME) == 0)
+	    psp->systemTime = (__DRIsystemTimeExtension *) extensions[i];
+	if (strcmp(extensions[i]->name, __DRI_DRI2_LOADER) == 0)
+	    psp->dri2.loader = (__DRIdri2LoaderExtension *) extensions[i];
+	if (strcmp(extensions[i]->name, __DRI_IMAGE_LOOKUP) == 0)
+	    psp->dri2.image = (__DRIimageLookupExtension *) extensions[i];
+	if (strcmp(extensions[i]->name, __DRI_USE_INVALIDATE) == 0)
+	    psp->dri2.useInvalidate = (__DRIuseInvalidateExtension *) extensions[i];
+    }
+}
+
+/**
+ * This is the bootstrap function for the driver.  libGL supplies all of the
+ * requisite information about the system, and the driver initializes itself.
+ * This routine also fills in the linked list pointed to by \c driver_modes
+ * with the \c __GLcontextModes that the driver can support for windows or
+ * pbuffers.
+ *
+ * For legacy DRI.
+ * 
+ * \param scrn  Index of the screen
+ * \param ddx_version Version of the 2D DDX.  This may not be meaningful for
+ *                    all drivers.
+ * \param dri_version Version of the "server-side" DRI.
+ * \param drm_version Version of the kernel DRM.
+ * \param frame_buffer Data describing the location and layout of the
+ *                     framebuffer.
+ * \param pSAREA       Pointer to the SAREA.
+ * \param fd           Device handle for the DRM.
+ * \param extensions   ??
+ * \param driver_modes  Returns modes suppoted by the driver
+ * \param loaderPrivate  ??
+ * 
+ * \note There is no need to check the minimum API version in this
+ * function.  Since the name of this function is versioned, it is
+ * impossible for a loader that is too old to even load this driver.
+ */
+static __DRIscreen *
+driCreateNewScreen(int scrn,
+		   const __DRIversion *ddx_version,
+		   const __DRIversion *dri_version,
+		   const __DRIversion *drm_version,
+		   const __DRIframebuffer *frame_buffer,
+		   drmAddress pSAREA, int fd, 
+		   const __DRIextension **extensions,
+		   const __DRIconfig ***driver_modes,
+		   void *loaderPrivate)
+{
+    static const __DRIextension *emptyExtensionList[] = { NULL };
+    __DRIscreen *psp;
+
+    psp = calloc(1, sizeof *psp);
+    if (!psp)
+	return NULL;
+
+    setupLoaderExtensions(psp, extensions);
+
+    /*
+    ** NOT_DONE: This is used by the X server to detect when the client
+    ** has died while holding the drawable lock.  The client sets the
+    ** drawable lock to this value.
+    */
+    psp->drawLockID = 1;
+
+    psp->drm_version = *drm_version;
+    psp->ddx_version = *ddx_version;
+    psp->dri_version = *dri_version;
+
+    psp->pSAREA = pSAREA;
+    psp->lock = (drmLock *) &psp->pSAREA->lock;
+
+    psp->pFB = frame_buffer->base;
+    psp->fbSize = frame_buffer->size;
+    psp->fbStride = frame_buffer->stride;
+    psp->fbWidth = frame_buffer->width;
+    psp->fbHeight = frame_buffer->height;
+    psp->devPrivSize = frame_buffer->dev_priv_size;
+    psp->pDevPriv = frame_buffer->dev_priv;
+    psp->fbBPP = psp->fbStride * 8 / frame_buffer->width;
+
+    psp->extensions = emptyExtensionList;
+    psp->fd = fd;
+    psp->myNum = scrn;
+    psp->dri2.enabled = GL_FALSE;
+
+    psp->DriverAPI = driDriverAPI;
+    psp->api_mask = (1 << __DRI_API_OPENGL);
+
+    *driver_modes = driDriverAPI.InitScreen(psp);
+    if (*driver_modes == NULL) {
+	free(psp);
+	return NULL;
+    }
+
+    return psp;
+}
+
+/**
+ * DRI2
+ */
+static __DRIscreen *
+dri2CreateNewScreen(int scrn, int fd,
+		    const __DRIextension **extensions,
+		    const __DRIconfig ***driver_configs, void *data)
+{
+    static const __DRIextension *emptyExtensionList[] = { NULL };
+    __DRIscreen *psp;
+    drmVersionPtr version;
+    driOptionCache options;
+
+    if (driDriverAPI.InitScreen2 == NULL)
+        return NULL;
+
+    psp = calloc(1, sizeof(*psp));
+    if (!psp)
+	return NULL;
+
+    setupLoaderExtensions(psp, extensions);
+
+    version = drmGetVersion(fd);
+    if (version) {
+	psp->drm_version.major = version->version_major;
+	psp->drm_version.minor = version->version_minor;
+	psp->drm_version.patch = version->version_patchlevel;
+	drmFreeVersion(version);
+    }
+
+    psp->extensions = emptyExtensionList;
+    psp->fd = fd;
+    psp->myNum = scrn;
+    psp->dri2.enabled = GL_TRUE;
+
+    psp->DriverAPI = driDriverAPI;
+    psp->api_mask = (1 << __DRI_API_OPENGL);
+    *driver_configs = driDriverAPI.InitScreen2(psp);
+    if (*driver_configs == NULL) {
+	free(psp);
+	return NULL;
+    }
+
+    psp->DriverAPI = driDriverAPI;
+
+    driParseOptionInfo(&options, __dri2ConfigOptions, __dri2NConfigOptions);
+    driParseConfigFiles(&psp->optionCache, &options, psp->myNum, "dri2");
+
+    return psp;
+}
+
+static const __DRIextension **driGetExtensions(__DRIscreen *psp)
+{
+    return psp->extensions;
+}
+
+/** Core interface */
+const __DRIcoreExtension driCoreExtension = {
+    { __DRI_CORE, __DRI_CORE_VERSION },
+    NULL,
+    driDestroyScreen,
+    driGetExtensions,
+    driGetConfigAttrib,
+    driIndexConfigAttrib,
+    NULL,
+    driDestroyDrawable,
+    driSwapBuffers,
+    NULL,
+    driCopyContext,
+    driDestroyContext,
+    driBindContext,
+    driUnbindContext
+};
+
+/** Legacy DRI interface */
+const __DRIlegacyExtension driLegacyExtension = {
+    { __DRI_LEGACY, __DRI_LEGACY_VERSION },
+    driCreateNewScreen,
+    driCreateNewDrawable,
+    driCreateNewContext,
+};
+
+/** DRI2 interface */
+const __DRIdri2Extension driDRI2Extension = {
+    { __DRI_DRI2, __DRI_DRI2_VERSION },
+    dri2CreateNewScreen,
+    dri2CreateNewDrawable,
+    dri2CreateNewContext,
+    dri2GetAPIMask,
+    dri2CreateNewContextForAPI
+};
+
+const __DRI2configQueryExtension dri2ConfigQueryExtension = {
+   { __DRI2_CONFIG_QUERY, __DRI2_CONFIG_QUERY_VERSION },
+   dri2ConfigQueryb,
+   dri2ConfigQueryi,
+   dri2ConfigQueryf,
+};
+
+static int
+driFrameTracking(__DRIdrawable *drawable, GLboolean enable)
+{
+    return GLX_BAD_CONTEXT;
+}
+
+static int
+driQueryFrameTracking(__DRIdrawable *dpriv,
+		      int64_t * sbc, int64_t * missedFrames,
+		      float * lastMissedUsage, float * usage)
+{
+   __DRIswapInfo   sInfo;
+   int             status;
+   int64_t         ust;
+   __DRIscreen *psp = dpriv->driScreenPriv;
+
+   status = dpriv->driScreenPriv->DriverAPI.GetSwapInfo( dpriv, & sInfo );
+   if ( status == 0 ) {
+      *sbc = sInfo.swap_count;
+      *missedFrames = sInfo.swap_missed_count;
+      *lastMissedUsage = sInfo.swap_missed_usage;
+
+      (*psp->systemTime->getUST)( & ust );
+      *usage = driCalculateSwapUsage( dpriv, sInfo.swap_ust, ust );
+   }
+
+   return status;
+}
+
+const __DRIframeTrackingExtension driFrameTrackingExtension = {
+    { __DRI_FRAME_TRACKING, __DRI_FRAME_TRACKING_VERSION },
+    driFrameTracking,
+    driQueryFrameTracking    
+};
+
+/**
+ * Calculate amount of swap interval used between GLX buffer swaps.
+ * 
+ * The usage value, on the range [0,max], is the fraction of total swap
+ * interval time used between GLX buffer swaps is calculated.
+ *
+ *            \f$p = t_d / (i * t_r)\f$
+ * 
+ * Where \f$t_d\f$ is the time since the last GLX buffer swap, \f$i\f$ is the
+ * swap interval (as set by \c glXSwapIntervalSGI), and \f$t_r\f$ time
+ * required for a single vertical refresh period (as returned by \c
+ * glXGetMscRateOML).
+ * 
+ * See the documentation for the GLX_MESA_swap_frame_usage extension for more
+ * details.
+ *
+ * \param   dPriv  Pointer to the private drawable structure.
+ * \return  If less than a single swap interval time period was required
+ *          between GLX buffer swaps, a number greater than 0 and less than
+ *          1.0 is returned.  If exactly one swap interval time period is
+ *          required, 1.0 is returned, and if more than one is required then
+ *          a number greater than 1.0 will be returned.
+ *
+ * \sa glXSwapIntervalSGI glXGetMscRateOML
+ * 
+ * \todo Instead of caching the \c glXGetMscRateOML function pointer, would it
+ *       be possible to cache the sync rate?
+ */
+float
+driCalculateSwapUsage( __DRIdrawable *dPriv, int64_t last_swap_ust,
+		       int64_t current_ust )
+{
+   int32_t   n;
+   int32_t   d;
+   int       interval;
+   float     usage = 1.0;
+   __DRIscreen *psp = dPriv->driScreenPriv;
+
+   if ( (*psp->systemTime->getMSCRate)(dPriv, &n, &d, dPriv->loaderPrivate) ) {
+      interval = (dPriv->swap_interval != 0) ? dPriv->swap_interval : 1;
+
+
+      /* We want to calculate
+       * (current_UST - last_swap_UST) / (interval * us_per_refresh).  We get
+       * current_UST by calling __glXGetUST.  last_swap_UST is stored in
+       * dPriv->swap_ust.  interval has already been calculated.
+       *
+       * The only tricky part is us_per_refresh.  us_per_refresh is
+       * 1000000 / MSC_rate.  We know the MSC_rate is n / d.  We can flip it
+       * around and say us_per_refresh = 1000000 * d / n.  Since this goes in
+       * the denominator of the final calculation, we calculate
+       * (interval * 1000000 * d) and move n into the numerator.
+       */
+
+      usage = (current_ust - last_swap_ust);
+      usage *= n;
+      usage /= (interval * d);
+      usage /= 1000000.0;
+   }
+   
+   return usage;
+}
+
+void
+dri2InvalidateDrawable(__DRIdrawable *drawable)
+{
+    drawable->dri2.stamp++;
+}
+
+/*@}*/
diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h
new file mode 100644
index 0000000000..e4c590b132
--- /dev/null
+++ b/src/mesa/drivers/dri/common/dri_util.h
@@ -0,0 +1,553 @@
+/*
+ * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file dri_util.h
+ * DRI utility functions definitions.
+ *
+ * This module acts as glue between GLX and the actual hardware driver.  A DRI
+ * driver doesn't really \e have to use any of this - it's optional.  But, some
+ * useful stuff is done here that otherwise would have to be duplicated in most
+ * drivers.
+ * 
+ * Basically, these utility functions take care of some of the dirty details of
+ * screen initialization, context creation, context binding, DRM setup, etc.
+ *
+ * These functions are compiled into each DRI driver so libGL.so knows nothing
+ * about them.
+ *
+ * \sa dri_util.c.
+ * 
+ * \author Kevin E. Martin <kevin@precisioninsight.com>
+ * \author Brian Paul <brian@precisioninsight.com>
+ */
+
+#ifndef _DRI_UTIL_H_
+#define _DRI_UTIL_H_
+
+#include <GL/gl.h>
+#include <drm.h>
+#include <drm_sarea.h>
+#include <xf86drm.h>
+#include "xmlconfig.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "GL/internal/glcore.h"
+#include "GL/internal/dri_interface.h"
+
+#define GLX_BAD_CONTEXT                    5
+
+typedef struct __DRIswapInfoRec        __DRIswapInfo;
+
+/**
+ * Extensions.
+ */
+extern const __DRIlegacyExtension driLegacyExtension;
+extern const __DRIcoreExtension driCoreExtension;
+extern const __DRIdri2Extension driDRI2Extension;
+extern const __DRIextension driReadDrawableExtension;
+extern const __DRIcopySubBufferExtension driCopySubBufferExtension;
+extern const __DRIswapControlExtension driSwapControlExtension;
+extern const __DRIframeTrackingExtension driFrameTrackingExtension;
+extern const __DRImediaStreamCounterExtension driMediaStreamCounterExtension;
+extern const __DRI2configQueryExtension dri2ConfigQueryExtension;
+
+/**
+ * Used by DRI_VALIDATE_DRAWABLE_INFO
+ */
+#define DRI_VALIDATE_DRAWABLE_INFO_ONCE(pDrawPriv)              \
+    do {                                                        \
+	if (*(pDrawPriv->pStamp) != pDrawPriv->lastStamp) {     \
+	    __driUtilUpdateDrawableInfo(pDrawPriv);             \
+	}                                                       \
+    } while (0)
+
+
+/**
+ * Utility macro to validate the drawable information.
+ *
+ * See __DRIdrawable::pStamp and __DRIdrawable::lastStamp.
+ */
+#define DRI_VALIDATE_DRAWABLE_INFO(psp, pdp)                            \
+do {                                                                    \
+    while (*(pdp->pStamp) != pdp->lastStamp) {                          \
+        register unsigned int hwContext = psp->pSAREA->lock.lock &      \
+		     ~(DRM_LOCK_HELD | DRM_LOCK_CONT);                  \
+	DRM_UNLOCK(psp->fd, &psp->pSAREA->lock, hwContext);             \
+                                                                        \
+	DRM_SPINLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);     \
+	DRI_VALIDATE_DRAWABLE_INFO_ONCE(pdp);                           \
+	DRM_SPINUNLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID);   \
+                                                                        \
+	DRM_LIGHT_LOCK(psp->fd, &psp->pSAREA->lock, hwContext);         \
+    }                                                                   \
+} while (0)
+
+/**
+ * Same as above, but for two drawables simultaneously.
+ *
+ */
+
+#define DRI_VALIDATE_TWO_DRAWABLES_INFO(psp, pdp, prp)			\
+do {								\
+    while (*((pdp)->pStamp) != (pdp)->lastStamp ||			\
+	   *((prp)->pStamp) != (prp)->lastStamp) {			\
+        register unsigned int hwContext = (psp)->pSAREA->lock.lock &	\
+	    ~(DRM_LOCK_HELD | DRM_LOCK_CONT);				\
+	DRM_UNLOCK((psp)->fd, &(psp)->pSAREA->lock, hwContext);		\
+									\
+	DRM_SPINLOCK(&(psp)->pSAREA->drawable_lock, (psp)->drawLockID);	\
+	DRI_VALIDATE_DRAWABLE_INFO_ONCE(pdp);                           \
+	DRI_VALIDATE_DRAWABLE_INFO_ONCE(prp);				\
+	DRM_SPINUNLOCK(&(psp)->pSAREA->drawable_lock, (psp)->drawLockID); \
+									\
+	DRM_LIGHT_LOCK((psp)->fd, &(psp)->pSAREA->lock, hwContext);	\
+    }                                                                   \
+} while (0)
+
+
+/**
+ * Driver callback functions.
+ *
+ * Each DRI driver must have one of these structures with all the pointers set
+ * to appropriate functions within the driver.
+ * 
+ * When glXCreateContext() is called, for example, it'll call a helper function
+ * dri_util.c which in turn will jump through the \a CreateContext pointer in
+ * this structure.
+ */
+struct __DriverAPIRec {
+    const __DRIconfig **(*InitScreen) (__DRIscreen * priv);
+
+    /**
+     * Screen destruction callback
+     */
+    void (*DestroyScreen)(__DRIscreen *driScrnPriv);
+
+    /**
+     * Context creation callback
+     */	    	    
+    GLboolean (*CreateContext)(gl_api api,
+			       const __GLcontextModes *glVis,
+			       __DRIcontext *driContextPriv,
+                               void *sharedContextPrivate);
+
+    /**
+     * Context destruction callback
+     */
+    void (*DestroyContext)(__DRIcontext *driContextPriv);
+
+    /**
+     * Buffer (drawable) creation callback
+     */
+    GLboolean (*CreateBuffer)(__DRIscreen *driScrnPriv,
+                              __DRIdrawable *driDrawPriv,
+                              const __GLcontextModes *glVis,
+                              GLboolean pixmapBuffer);
+    
+    /**
+     * Buffer (drawable) destruction callback
+     */
+    void (*DestroyBuffer)(__DRIdrawable *driDrawPriv);
+
+    /**
+     * Buffer swapping callback 
+     */
+    void (*SwapBuffers)(__DRIdrawable *driDrawPriv);
+
+    /**
+     * Context activation callback
+     */
+    GLboolean (*MakeCurrent)(__DRIcontext *driContextPriv,
+                             __DRIdrawable *driDrawPriv,
+                             __DRIdrawable *driReadPriv);
+
+    /**
+     * Context unbinding callback
+     */
+    GLboolean (*UnbindContext)(__DRIcontext *driContextPriv);
+  
+    /**
+     * Retrieves statistics about buffer swap operations.  Required if
+     * GLX_OML_sync_control or GLX_MESA_swap_frame_usage is supported.
+     */
+    int (*GetSwapInfo)( __DRIdrawable *dPriv, __DRIswapInfo * sInfo );
+
+
+    /**
+     * These are required if GLX_OML_sync_control is supported.
+     */
+    /*@{*/
+    int (*WaitForMSC)( __DRIdrawable *priv, int64_t target_msc, 
+		       int64_t divisor, int64_t remainder,
+		       int64_t * msc );
+    int (*WaitForSBC)( __DRIdrawable *priv, int64_t target_sbc,
+		       int64_t * msc, int64_t * sbc );
+
+    int64_t (*SwapBuffersMSC)( __DRIdrawable *priv, int64_t target_msc,
+			       int64_t divisor, int64_t remainder );
+    /*@}*/
+    void (*CopySubBuffer)(__DRIdrawable *driDrawPriv,
+			  int x, int y, int w, int h);
+
+    /**
+     * New version of GetMSC so we can pass drawable data to the low
+     * level DRM driver (e.g. pipe info).  Required if
+     * GLX_SGI_video_sync or GLX_OML_sync_control is supported.
+     */
+    int (*GetDrawableMSC) ( __DRIscreen * priv,
+			    __DRIdrawable *drawablePrivate,
+			    int64_t *count);
+
+
+
+    /* DRI2 Entry point */
+    const __DRIconfig **(*InitScreen2) (__DRIscreen * priv);
+};
+
+extern const struct __DriverAPIRec driDriverAPI;
+
+
+struct __DRIswapInfoRec {
+    /** 
+     * Number of swapBuffers operations that have been *completed*. 
+     */
+    uint64_t swap_count;
+
+    /**
+     * Unadjusted system time of the last buffer swap.  This is the time
+     * when the swap completed, not the time when swapBuffers was called.
+     */
+    int64_t   swap_ust;
+
+    /**
+     * Number of swap operations that occurred after the swap deadline.  That
+     * is if a swap happens more than swap_interval frames after the previous
+     * swap, it has missed its deadline.  If swap_interval is 0, then the
+     * swap deadline is 1 frame after the previous swap.
+     */
+    uint64_t swap_missed_count;
+
+    /**
+     * Amount of time used by the last swap that missed its deadline.  This
+     * is calculated as (__glXGetUST() - swap_ust) / (swap_interval * 
+     * time_for_single_vrefresh)).  If the actual value of swap_interval is
+     * 0, then 1 is used instead.  If swap_missed_count is non-zero, this
+     * should be greater-than 1.0.
+     */
+    float     swap_missed_usage;
+};
+
+
+/**
+ * Per-drawable private DRI driver information.
+ */
+struct __DRIdrawableRec {
+    /**
+     * Kernel drawable handle
+     */
+    drm_drawable_t hHWDrawable;
+
+    /**
+     * Driver's private drawable information.  
+     *
+     * This structure is opaque.
+     */
+    void *driverPrivate;
+
+    /**
+     * Private data from the loader.  We just hold on to it and pass
+     * it back when calling into loader provided functions.
+     */
+    void *loaderPrivate;
+
+    /**
+     * Reference count for number of context's currently bound to this
+     * drawable.  
+     *
+     * Once it reaches zero, the drawable can be destroyed.
+     *
+     * \note This behavior will change with GLX 1.3.
+     */
+    int refcount;
+
+    /**
+     * Index of this drawable information in the SAREA.
+     */
+    unsigned int index;
+
+    /**
+     * Pointer to the "drawable has changed ID" stamp in the SAREA (or
+     * to dri2.stamp if DRI2 is being used).
+     */
+    unsigned int *pStamp;
+
+    /**
+     * Last value of the stamp.
+     *
+     * If this differs from the value stored at __DRIdrawable::pStamp,
+     * then the drawable information has been modified by the X server, and the
+     * drawable information (below) should be retrieved from the X server.
+     */
+    unsigned int lastStamp;
+
+    /**
+     * \name Drawable 
+     *
+     * Drawable information used in software fallbacks.
+     */
+    /*@{*/
+    int x;
+    int y;
+    int w;
+    int h;
+    int numClipRects;
+    drm_clip_rect_t *pClipRects;
+    /*@}*/
+
+    /**
+     * \name Back and depthbuffer
+     *
+     * Information about the back and depthbuffer where different from above.
+     */
+    /*@{*/
+    int backX;
+    int backY;
+    int backClipRectType;
+    int numBackClipRects;
+    drm_clip_rect_t *pBackClipRects;
+    /*@}*/
+
+    /**
+     * \name Vertical blank tracking information
+     * Used for waiting on vertical blank events.
+     */
+    /*@{*/
+    unsigned int vblSeq;
+    unsigned int vblFlags;
+    /*@}*/
+
+    /**
+     * \name Monotonic MSC tracking
+     *
+     * Low level driver is responsible for updating msc_base and
+     * vblSeq values so that higher level code can calculate
+     * a new msc value or msc target for a WaitMSC call.  The new value
+     * will be:
+     *   msc = msc_base + get_vblank_count() - vblank_base;
+     *
+     * And for waiting on a value, core code will use:
+     *   actual_target = target_msc - msc_base + vblank_base;
+     */
+    /*@{*/
+    int64_t vblank_base;
+    int64_t msc_base;
+    /*@}*/
+
+    /**
+     * Pointer to context to which this drawable is currently bound.
+     */
+    __DRIcontext *driContextPriv;
+
+    /**
+     * Pointer to screen on which this drawable was created.
+     */
+    __DRIscreen *driScreenPriv;
+
+    /**
+     * Controls swap interval as used by GLX_SGI_swap_control and
+     * GLX_MESA_swap_control.
+     */
+    unsigned int swap_interval;
+
+    struct {
+	unsigned int stamp;
+	drm_clip_rect_t clipRect;
+    } dri2;
+};
+
+/**
+ * Per-context private driver information.
+ */
+struct __DRIcontextRec {
+    /**
+     * Kernel context handle used to access the device lock.
+     */
+    drm_context_t hHWContext;
+
+    /**
+     * Device driver's private context data.  This structure is opaque.
+     */
+    void *driverPrivate;
+
+    /**
+     * Pointer to drawable currently bound to this context for drawing.
+     */
+    __DRIdrawable *driDrawablePriv;
+
+    /**
+     * Pointer to drawable currently bound to this context for reading.
+     */
+    __DRIdrawable *driReadablePriv;
+
+    /**
+     * Pointer to screen on which this context was created.
+     */
+    __DRIscreen *driScreenPriv;
+
+    /**
+     * The loaders's private context data.  This structure is opaque.
+     */
+    void *loaderPrivate;
+
+    struct {
+	int draw_stamp;
+	int read_stamp;
+    } dri2;
+};
+
+/**
+ * Per-screen private driver information.
+ */
+struct __DRIscreenRec {
+    /**
+     * Current screen's number
+     */
+    int myNum;
+
+    /**
+     * Callback functions into the hardware-specific DRI driver code.
+     */
+    struct __DriverAPIRec DriverAPI;
+
+    const __DRIextension **extensions;
+    /**
+     * DDX / 2D driver version information.
+     */
+    __DRIversion ddx_version;
+
+    /**
+     * DRI X extension version information.
+     */
+    __DRIversion dri_version;
+
+    /**
+     * DRM (kernel module) version information.
+     */
+    __DRIversion drm_version;
+
+    /**
+     * ID used when the client sets the drawable lock.
+     *
+     * The X server uses this value to detect if the client has died while
+     * holding the drawable lock.
+     */
+    int drawLockID;
+
+    /**
+     * File descriptor returned when the kernel device driver is opened.
+     * 
+     * Used to:
+     *   - authenticate client to kernel
+     *   - map the frame buffer, SAREA, etc.
+     *   - close the kernel device driver
+     */
+    int fd;
+
+    /**
+     * SAREA pointer 
+     *
+     * Used to access:
+     *   - the device lock
+     *   - the device-independent per-drawable and per-context(?) information
+     */
+    drm_sarea_t *pSAREA;
+
+    /**
+     * \name Direct frame buffer access information 
+     * Used for software fallbacks.
+     */
+    /*@{*/
+    unsigned char *pFB;
+    int fbSize;
+    int fbOrigin;
+    int fbStride;
+    int fbWidth;
+    int fbHeight;
+    int fbBPP;
+    /*@}*/
+
+    /**
+     * \name Device-dependent private information (stored in the SAREA).
+     *
+     * This data is accessed by the client driver only.
+     */
+    /*@{*/
+    void *pDevPriv;
+    int devPrivSize;
+    /*@}*/
+
+    /**
+     * Device-dependent private information (not stored in the SAREA).
+     * 
+     * This pointer is never touched by the DRI layer.
+     */
+    void *private;
+
+    /* Extensions provided by the loader. */
+    const __DRIgetDrawableInfoExtension *getDrawableInfo;
+    const __DRIsystemTimeExtension *systemTime;
+    const __DRIdamageExtension *damage;
+
+    struct {
+	/* Flag to indicate that this is a DRI2 screen.  Many of the above
+	 * fields will not be valid or initializaed in that case. */
+	int enabled;
+	__DRIdri2LoaderExtension *loader;
+	__DRIimageLookupExtension *image;
+	__DRIuseInvalidateExtension *useInvalidate;
+    } dri2;
+
+    /* The lock actually in use, old sarea or DRI2 */
+    drmLock *lock;
+
+    driOptionCache optionCache;
+   unsigned int api_mask;
+};
+
+extern void
+__driUtilUpdateDrawableInfo(__DRIdrawable *pdp);
+
+extern float
+driCalculateSwapUsage( __DRIdrawable *dPriv,
+		       int64_t last_swap_ust, int64_t current_ust );
+
+extern GLint
+driIntersectArea( drm_clip_rect_t rect1, drm_clip_rect_t rect2 );
+
+extern void
+dri2InvalidateDrawable(__DRIdrawable *drawable);
+
+#endif /* _DRI_UTIL_H_ */
diff --git a/src/mesa/drivers/dri/common/drirenderbuffer.c b/src/mesa/drivers/dri/common/drirenderbuffer.c
new file mode 100644
index 0000000000..c9ce6e3cb6
--- /dev/null
+++ b/src/mesa/drivers/dri/common/drirenderbuffer.c
@@ -0,0 +1,200 @@
+
+#include "main/mtypes.h"
+#include "main/formats.h"
+#include "main/renderbuffer.h"
+#include "main/imports.h"
+#include "drirenderbuffer.h"
+
+
+/**
+ * This will get called when a window (gl_framebuffer) is resized (probably
+ * via driUpdateFramebufferSize(), below).
+ * Just update width, height and internal format fields for now.
+ * There's usually no memory allocation above because the present
+ * DRI drivers use statically-allocated full-screen buffers. If that's not
+ * the case for a DRI driver, a different AllocStorage method should
+ * be used.
+ */
+static GLboolean
+driRenderbufferStorage(GLcontext *ctx, struct gl_renderbuffer *rb,
+                       GLenum internalFormat, GLuint width, GLuint height)
+{
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+   return GL_TRUE;
+}
+
+
+static void
+driDeleteRenderbuffer(struct gl_renderbuffer *rb)
+{
+   /* don't free rb->Data  Chances are it's a memory mapped region for
+    * the dri drivers.
+    */
+   free(rb);
+}
+
+
+/**
+ * Allocate a new driRenderbuffer object.
+ * Individual drivers are free to implement different versions of
+ * this function.
+ *
+ * At this time, this function can only be used for window-system
+ * renderbuffers, not user-created RBOs.
+ *
+ * \param format  Either GL_RGBA, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT24,
+ *                GL_DEPTH_COMPONENT32, or GL_STENCIL_INDEX8_EXT (for now).
+ * \param addr  address in main memory of the buffer.  Probably a memory
+ *              mapped region.
+ * \param cpp  chars or bytes per pixel
+ * \param offset  start of renderbuffer with respect to start of framebuffer
+ * \param pitch   pixels per row
+ */
+driRenderbuffer *
+driNewRenderbuffer(gl_format format, GLvoid *addr,
+                   GLint cpp, GLint offset, GLint pitch,
+                   __DRIdrawable *dPriv)
+{
+   driRenderbuffer *drb;
+
+   assert(cpp > 0);
+   assert(pitch > 0);
+
+   drb = calloc(1, sizeof(driRenderbuffer));
+   if (drb) {
+      const GLuint name = 0;
+
+      _mesa_init_renderbuffer(&drb->Base, name);
+
+      /* Make sure we're using a null-valued GetPointer routine */
+      assert(drb->Base.GetPointer(NULL, &drb->Base, 0, 0) == NULL);
+
+      switch (format) {
+      case MESA_FORMAT_ARGB8888:
+         if (cpp == 2) {
+            /* override format */
+            format = MESA_FORMAT_RGB565;
+         }
+         drb->Base.DataType = GL_UNSIGNED_BYTE;
+         break;
+      case MESA_FORMAT_Z16:
+         /* Depth */
+         /* we always Get/Put 32-bit Z values */
+         drb->Base.DataType = GL_UNSIGNED_INT;
+         assert(cpp == 2);
+         break;
+      case MESA_FORMAT_Z32:
+         /* Depth */
+         /* we always Get/Put 32-bit Z values */
+         drb->Base.DataType = GL_UNSIGNED_INT;
+         assert(cpp == 4);
+         break;
+      case MESA_FORMAT_Z24_S8:
+         drb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+         assert(cpp == 4);
+         break;
+      case MESA_FORMAT_S8_Z24:
+         drb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+         assert(cpp == 4);
+         break;
+      case MESA_FORMAT_S8:
+         /* Stencil */
+         drb->Base.DataType = GL_UNSIGNED_BYTE;
+         break;
+      default:
+         _mesa_problem(NULL, "Bad format 0x%x in driNewRenderbuffer", format);
+         return NULL;
+      }
+
+      drb->Base.Format = format;
+
+      drb->Base.InternalFormat =
+      drb->Base._BaseFormat = _mesa_get_format_base_format(format);
+
+      drb->Base.AllocStorage = driRenderbufferStorage;
+      drb->Base.Delete = driDeleteRenderbuffer;
+
+      drb->Base.Data = addr;
+
+      /* DRI renderbuffer-specific fields: */
+      drb->dPriv = dPriv;
+      drb->offset = offset;
+      drb->pitch = pitch;
+      drb->cpp = cpp;
+
+      /* may be changed if page flipping is active: */
+      drb->flippedOffset = offset;
+      drb->flippedPitch = pitch;
+      drb->flippedData = addr;
+   }
+   return drb;
+}
+
+
+/**
+ * Update the front and back renderbuffers' flippedPitch/Offset/Data fields.
+ * If stereo, flip both the left and right pairs.
+ * This is used when we do double buffering via page flipping.
+ * \param fb  the framebuffer we're page flipping
+ * \param flipped  if true, set flipped values, else set non-flipped values
+ */
+void
+driFlipRenderbuffers(struct gl_framebuffer *fb, GLboolean flipped)
+{
+   const GLuint count = fb->Visual.stereoMode ? 2 : 1;
+   GLuint lr; /* left or right */
+
+   /* we shouldn't really call this function if single-buffered, but
+    * play it safe.
+    */
+   if (!fb->Visual.doubleBufferMode)
+      return;
+
+   for (lr = 0; lr < count; lr++) {
+      GLuint frontBuf = (lr == 0) ? BUFFER_FRONT_LEFT : BUFFER_FRONT_RIGHT;
+      GLuint backBuf  = (lr == 0) ? BUFFER_BACK_LEFT  : BUFFER_BACK_RIGHT;
+      driRenderbuffer *front_drb
+         = (driRenderbuffer *) fb->Attachment[frontBuf].Renderbuffer;
+      driRenderbuffer *back_drb
+         = (driRenderbuffer *) fb->Attachment[backBuf].Renderbuffer;
+
+      if (flipped) {
+         front_drb->flippedOffset = back_drb->offset;
+         front_drb->flippedPitch  = back_drb->pitch;
+         front_drb->flippedData   = back_drb->Base.Data;
+         back_drb->flippedOffset  = front_drb->offset;
+         back_drb->flippedPitch   = front_drb->pitch;
+         back_drb->flippedData    = front_drb->Base.Data;
+      }
+      else {
+         front_drb->flippedOffset = front_drb->offset;
+         front_drb->flippedPitch  = front_drb->pitch;
+         front_drb->flippedData   = front_drb->Base.Data;
+         back_drb->flippedOffset  = back_drb->offset;
+         back_drb->flippedPitch   = back_drb->pitch;
+         back_drb->flippedData    = back_drb->Base.Data;
+      }
+   }
+}
+
+
+/**
+ * Check that the gl_framebuffer associated with dPriv is the right size.
+ * Resize the gl_framebuffer if needed.
+ * It's expected that the dPriv->driverPrivate member points to a
+ * gl_framebuffer object.
+ */
+void
+driUpdateFramebufferSize(GLcontext *ctx, const __DRIdrawable *dPriv)
+{
+   struct gl_framebuffer *fb = (struct gl_framebuffer *) dPriv->driverPrivate;
+   if (fb && (dPriv->w != fb->Width || dPriv->h != fb->Height)) {
+      ctx->Driver.ResizeBuffers(ctx, fb, dPriv->w, dPriv->h);
+      /* if the driver needs the hw lock for ResizeBuffers, the drawable
+         might have changed again by now */
+      assert(fb->Width == dPriv->w);
+      assert(fb->Height == dPriv->h);
+   }
+}
diff --git a/src/mesa/drivers/dri/common/drirenderbuffer.h b/src/mesa/drivers/dri/common/drirenderbuffer.h
new file mode 100644
index 0000000000..677511334d
--- /dev/null
+++ b/src/mesa/drivers/dri/common/drirenderbuffer.h
@@ -0,0 +1,79 @@
+
+/**
+ * A driRenderbuffer is dervied from gl_renderbuffer.
+ * It describes a color buffer (front or back), a depth buffer, or stencil
+ * buffer etc.
+ * Specific to DRI drivers are the offset and pitch fields.
+ */
+
+
+#ifndef DRIRENDERBUFFER_H
+#define DRIRENDERBUFFER_H
+
+#include "main/mtypes.h"
+#include "main/formats.h"
+#include "dri_util.h"
+
+
+typedef struct {
+   struct gl_renderbuffer Base;
+
+   /* Chars or bytes per pixel.  If Z and Stencil are stored together this
+    * will typically be 32 whether this a depth or stencil renderbuffer.
+    */
+   GLint cpp;
+
+   /* Buffer position and pitch (row stride).  Recall that for today's DRI
+    * drivers, we have statically allocated color/depth/stencil buffers.
+    * So this information describes the whole screen, not just a window.
+    * To address pixels in a window, we need to know the window's position
+    * and size with respect to the screen.
+    */
+   GLint offset;  /* in bytes */
+   GLint pitch;   /* in pixels */
+
+   /* If the driver can do page flipping (full-screen double buffering)
+    * the current front/back buffers may get swapped.
+    * If page flipping is disabled, these  fields will be identical to
+    * the offset/pitch/Data above.
+    * If page flipping is enabled, and this is the front(back) renderbuffer,
+    * flippedOffset/Pitch/Data will have the back(front) renderbuffer's values.
+    */
+   GLint flippedOffset;
+   GLint flippedPitch;
+   GLvoid *flippedData;  /* mmap'd address of buffer memory, if used */
+
+   /* Pointer to corresponding __DRIdrawable.  This is used to compute
+    * the window's position within the framebuffer.
+    */
+   __DRIdrawable *dPriv;
+
+   /* XXX this is for radeon/r200 only.  We should really create a new
+    * r200Renderbuffer class, derived from this class...  not a huge deal.
+    */
+   GLboolean depthHasSurface;
+
+   /**
+    * A handy flag to know if this is the back color buffer.
+    * 
+    * \note
+    * This is currently only used by tdfx.
+    */
+   GLboolean backBuffer;
+} driRenderbuffer;
+
+
+extern driRenderbuffer *
+driNewRenderbuffer(gl_format format, GLvoid *addr,
+                   GLint cpp, GLint offset, GLint pitch,
+                   __DRIdrawable *dPriv);
+
+extern void
+driFlipRenderbuffers(struct gl_framebuffer *fb, GLboolean flipped);
+
+
+extern void
+driUpdateFramebufferSize(GLcontext *ctx, const __DRIdrawable *dPriv);
+
+
+#endif /* DRIRENDERBUFFER_H */
diff --git a/src/mesa/drivers/dri/common/drisw_util.c b/src/mesa/drivers/dri/common/drisw_util.c
new file mode 100644
index 0000000000..1529c23b16
--- /dev/null
+++ b/src/mesa/drivers/dri/common/drisw_util.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file drisw_util.c
+ *
+ * DRISW utility functions, i.e. dri_util.c stripped from drm-specific bits.
+ */
+
+#include "drisw_util.h"
+#include "utils.h"
+
+
+/**
+ * Screen functions
+ */
+
+static void
+setupLoaderExtensions(__DRIscreen *psp,
+		      const __DRIextension **extensions)
+{
+    int i;
+
+    for (i = 0; extensions[i]; i++) {
+	if (strcmp(extensions[i]->name, __DRI_SWRAST_LOADER) == 0)
+	    psp->swrast_loader = (__DRIswrastLoaderExtension *) extensions[i];
+    }
+}
+
+static __DRIscreen *
+driCreateNewScreen(int scrn, const __DRIextension **extensions,
+		   const __DRIconfig ***driver_configs, void *data)
+{
+    static const __DRIextension *emptyExtensionList[] = { NULL };
+    __DRIscreen *psp;
+
+    (void) data;
+
+    psp = CALLOC_STRUCT(__DRIscreenRec);
+    if (!psp)
+	return NULL;
+
+    setupLoaderExtensions(psp, extensions);
+
+    psp->extensions = emptyExtensionList;
+    psp->fd = -1;
+    psp->myNum = scrn;
+
+    *driver_configs = driDriverAPI.InitScreen(psp);
+
+    if (*driver_configs == NULL) {
+	FREE(psp);
+	return NULL;
+    }
+
+    return psp;
+}
+
+static void driDestroyScreen(__DRIscreen *psp)
+{
+    if (psp) {
+	driDriverAPI.DestroyScreen(psp);
+
+	FREE(psp);
+    }
+}
+
+static const __DRIextension **driGetExtensions(__DRIscreen *psp)
+{
+    return psp->extensions;
+}
+
+
+/**
+ * Context functions
+ */
+
+static __DRIcontext *
+driCreateNewContext(__DRIscreen *psp, const __DRIconfig *config,
+		    __DRIcontext *shared, void *data)
+{
+    __DRIcontext *pcp;
+    void * const shareCtx = (shared != NULL) ? shared->driverPrivate : NULL;
+
+    pcp = CALLOC_STRUCT(__DRIcontextRec);
+    if (!pcp)
+	return NULL;
+
+    pcp->loaderPrivate = data;
+
+    pcp->driScreenPriv = psp;
+    pcp->driDrawablePriv = NULL;
+    pcp->driReadablePriv = NULL;
+
+    if (!driDriverAPI.CreateContext(API_OPENGL,
+			    &config->modes, pcp, shareCtx)) {
+	FREE(pcp);
+	return NULL;
+    }
+
+    return pcp;
+}
+
+static void
+driDestroyContext(__DRIcontext *pcp)
+{
+    if (pcp) {
+	driDriverAPI.DestroyContext(pcp);
+	FREE(pcp);
+    }
+}
+
+static int
+driCopyContext(__DRIcontext *dst, __DRIcontext *src, unsigned long mask)
+{
+    return GL_FALSE;
+}
+
+static void dri_get_drawable(__DRIdrawable *pdp);
+static void dri_put_drawable(__DRIdrawable *pdp);
+
+static int driBindContext(__DRIcontext *pcp,
+			  __DRIdrawable *pdp,
+			  __DRIdrawable *prp)
+{
+    /* Bind the drawable to the context */
+    if (pcp) {
+	pcp->driDrawablePriv = pdp;
+	pcp->driReadablePriv = prp;
+	if (pdp) {
+	    pdp->driContextPriv = pcp;
+	    dri_get_drawable(pdp);
+	}
+	if ( prp && pdp != prp ) {
+	    dri_get_drawable(prp);
+	}
+    }
+
+    return driDriverAPI.MakeCurrent(pcp, pdp, prp);
+}
+
+static int driUnbindContext(__DRIcontext *pcp)
+{
+    __DRIdrawable *pdp;
+    __DRIdrawable *prp;
+
+    if (pcp == NULL)
+	return GL_FALSE;
+
+    pdp = pcp->driDrawablePriv;
+    prp = pcp->driReadablePriv;
+
+    /* already unbound */
+    if (!pdp && !prp)
+	return GL_TRUE;
+
+    driDriverAPI.UnbindContext(pcp);
+
+    dri_put_drawable(pdp);
+
+    if (prp != pdp) {
+	dri_put_drawable(prp);
+    }
+
+    pcp->driDrawablePriv = NULL;
+    pcp->driReadablePriv = NULL;
+
+    return GL_TRUE;
+}
+
+
+/**
+ * Drawable functions
+ */
+
+static void dri_get_drawable(__DRIdrawable *pdp)
+{
+    pdp->refcount++;
+}
+
+static void dri_put_drawable(__DRIdrawable *pdp)
+{
+    if (pdp) {
+	pdp->refcount--;
+	if (pdp->refcount)
+	    return;
+
+	driDriverAPI.DestroyBuffer(pdp);
+
+	FREE(pdp);
+    }
+}
+
+static __DRIdrawable *
+driCreateNewDrawable(__DRIscreen *psp,
+		     const __DRIconfig *config, void *data)
+{
+    __DRIdrawable *pdp;
+
+    pdp = CALLOC_STRUCT(__DRIdrawableRec);
+    if (!pdp)
+	return NULL;
+
+    pdp->loaderPrivate = data;
+
+    pdp->driScreenPriv = psp;
+    pdp->driContextPriv = NULL;
+
+    dri_get_drawable(pdp);
+
+    if (!driDriverAPI.CreateBuffer(psp, pdp, &config->modes, GL_FALSE)) {
+	FREE(pdp);
+	return NULL;
+    }
+
+    pdp->lastStamp = 1; /* const */
+
+    return pdp;
+}
+
+static void
+driDestroyDrawable(__DRIdrawable *pdp)
+{
+    dri_put_drawable(pdp);
+}
+
+static void driSwapBuffers(__DRIdrawable *pdp)
+{
+    driDriverAPI.SwapBuffers(pdp);
+}
+
+const __DRIcoreExtension driCoreExtension = {
+    { __DRI_CORE, __DRI_CORE_VERSION },
+    NULL, /* driCreateNewScreen */
+    driDestroyScreen,
+    driGetExtensions,
+    driGetConfigAttrib,
+    driIndexConfigAttrib,
+    NULL, /* driCreateNewDrawable */
+    driDestroyDrawable,
+    driSwapBuffers,
+    driCreateNewContext,
+    driCopyContext,
+    driDestroyContext,
+    driBindContext,
+    driUnbindContext
+};
+
+const __DRIswrastExtension driSWRastExtension = {
+    { __DRI_SWRAST, __DRI_SWRAST_VERSION },
+    driCreateNewScreen,
+    driCreateNewDrawable
+};
diff --git a/src/mesa/drivers/dri/common/drisw_util.h b/src/mesa/drivers/dri/common/drisw_util.h
new file mode 100644
index 0000000000..9c3d01c99c
--- /dev/null
+++ b/src/mesa/drivers/dri/common/drisw_util.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * Copyright 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Binding of the DRI interface (dri_interface.h) for DRISW.
+ *
+ * The DRISW structs are 'base classes' of the corresponding DRI1 / DRI2 (DRM)
+ * structs. The bindings for SW and DRM can be unified by making the DRM structs
+ * 'sub-classes' of the SW structs, either proper or with field re-ordering.
+ *
+ * The code can also be unified but that requires cluttering the common code
+ * with ifdef's and guarding with (__DRIscreen::fd >= 0) for DRM.
+ */
+
+#ifndef _DRISW_UTIL_H
+#define _DRISW_UTIL_H
+
+#include "main/mtypes.h"
+
+#include <GL/gl.h>
+#include <GL/internal/glcore.h>
+#include <GL/internal/dri_interface.h>
+typedef struct _drmLock drmLock;
+
+
+/**
+ * Extensions
+ */
+extern const __DRIcoreExtension driCoreExtension;
+extern const __DRIswrastExtension driSWRastExtension;
+
+
+/**
+ * Driver callback functions
+ */
+struct __DriverAPIRec {
+    const __DRIconfig **(*InitScreen) (__DRIscreen * priv);
+
+    void (*DestroyScreen)(__DRIscreen *driScrnPriv);
+
+    GLboolean (*CreateContext)(gl_api glapi,
+                               const __GLcontextModes *glVis,
+                               __DRIcontext *driContextPriv,
+                               void *sharedContextPrivate);
+
+    void (*DestroyContext)(__DRIcontext *driContextPriv);
+
+    GLboolean (*CreateBuffer)(__DRIscreen *driScrnPriv,
+                              __DRIdrawable *driDrawPriv,
+                              const __GLcontextModes *glVis,
+                              GLboolean pixmapBuffer);
+
+    void (*DestroyBuffer)(__DRIdrawable *driDrawPriv);
+
+    void (*SwapBuffers)(__DRIdrawable *driDrawPriv);
+
+    GLboolean (*MakeCurrent)(__DRIcontext *driContextPriv,
+                             __DRIdrawable *driDrawPriv,
+                             __DRIdrawable *driReadPriv);
+
+    GLboolean (*UnbindContext)(__DRIcontext *driContextPriv);
+};
+
+extern const struct __DriverAPIRec driDriverAPI;
+
+
+/**
+ * Data types
+ */
+struct __DRIscreenRec {
+    int myNum;
+
+    int fd;
+
+    void *private;
+
+    const __DRIextension **extensions;
+
+    const __DRIswrastLoaderExtension *swrast_loader;
+};
+
+struct __DRIcontextRec {
+
+    void *driverPrivate;
+
+    void *loaderPrivate;
+
+    __DRIdrawable *driDrawablePriv;
+
+    __DRIdrawable *driReadablePriv;
+
+    __DRIscreen *driScreenPriv;
+};
+
+struct __DRIdrawableRec {
+
+    void *driverPrivate;
+
+    void *loaderPrivate;
+
+    __DRIcontext *driContextPriv;
+
+    __DRIscreen *driScreenPriv;
+
+    int refcount;
+
+    /* gallium */
+    unsigned int lastStamp;
+
+    int w;
+    int h;
+};
+
+#endif /* _DRISW_UTIL_H */
diff --git a/src/mesa/drivers/dri/common/memops.h b/src/mesa/drivers/dri/common/memops.h
new file mode 100644
index 0000000000..9cd1d8ec3f
--- /dev/null
+++ b/src/mesa/drivers/dri/common/memops.h
@@ -0,0 +1,17 @@
+#ifndef DRIMEMSETIO_H
+#define DRIMEMSETIO_H
+/*
+* memset an area in I/O space
+* We need to be careful about this on some archs
+*/
+static INLINE void drimemsetio(void* address, int c, int size)
+{
+#if defined(__powerpc__) || defined(__ia64__)
+     int i;
+     for(i=0;i<size;i++)
+        *((char *)address + i)=c;
+#else
+     memset(address,c,size);
+#endif
+}
+#endif
diff --git a/src/mesa/drivers/dri/common/mmio.h b/src/mesa/drivers/dri/common/mmio.h
new file mode 100644
index 0000000000..ce95d8c907
--- /dev/null
+++ b/src/mesa/drivers/dri/common/mmio.h
@@ -0,0 +1,62 @@
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file mmio.h
+ * Functions for properly handling memory mapped IO on various platforms.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+
+#ifndef MMIO_H
+#define MMIO_H
+
+#include "main/glheader.h"
+
+#if defined( __powerpc__ )
+
+static INLINE uint32_t
+read_MMIO_LE32( volatile void * base, unsigned long offset )
+{
+   uint32_t val;
+
+   __asm__ __volatile__( "lwbrx	%0, %1, %2 ; eieio"
+			 : "=r" (val)
+			 : "b" (base), "r" (offset) );
+   return val;
+}
+
+#else
+
+static INLINE uint32_t
+read_MMIO_LE32( volatile void * base, unsigned long offset )
+{
+   volatile uint32_t * p = (volatile uint32_t *) (((volatile char *) base) + offset);
+   return LE32_TO_CPU( p[0] );
+}
+
+#endif
+
+#endif /* MMIO_H */
diff --git a/src/mesa/drivers/dri/common/mmx.h b/src/mesa/drivers/dri/common/mmx.h
new file mode 100644
index 0000000000..49ce7e3e34
--- /dev/null
+++ b/src/mesa/drivers/dri/common/mmx.h
@@ -0,0 +1,560 @@
+/*	mmx.h
+
+	MultiMedia eXtensions GCC interface library for IA32.
+
+	To use this library, simply include this header file
+	and compile with GCC.  You MUST have inlining enabled
+	in order for mmx_ok() to work; this can be done by
+	simply using -O on the GCC command line.
+
+	Compiling with -DMMX_TRACE will cause detailed trace
+	output to be sent to stderr for each mmx operation.
+	This adds lots of code, and obviously slows execution to
+	a crawl, but can be very useful for debugging.
+
+	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
+	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
+	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+	AND FITNESS FOR ANY PARTICULAR PURPOSE.
+
+	1997-98 by H. Dietz and R. Fisher
+
+ History:
+	97-98*	R.Fisher	Early versions
+	980501	R.Fisher	Original Release
+	980611*	H.Dietz		Rewrite, correctly implementing inlines, and
+		R.Fisher	 including direct register accesses.
+	980616	R.Fisher	Release of 980611 as 980616.
+	980714	R.Fisher	Minor corrections to Makefile, etc.
+	980715	R.Fisher	mmx_ok() now prevents optimizer from using
+				 clobbered values.
+				mmx_ok() now checks if cpuid instruction is
+				 available before trying to use it.
+	980726*	R.Fisher	mm_support() searches for AMD 3DNow, Cyrix
+				 Extended MMX, and standard MMX.  It returns a
+				 value which is positive if any of these are
+				 supported, and can be masked with constants to
+				 see which.  mmx_ok() is now a call to this
+	980726*	R.Fisher	Added i2r support for shift functions
+	980919	R.Fisher	Fixed AMD extended feature recognition bug.
+	980921	R.Fisher	Added definition/check for _MMX_H.
+				Added "float s[2]" to mmx_t for use with
+				  3DNow and EMMX.  So same mmx_t can be used.
+	981013	R.Fisher	Fixed cpuid function 1 bug (looked at wrong reg)
+				Fixed psllq_i2r error in mmxtest.c
+
+	* Unreleased (internal or interim) versions
+
+ Notes:
+	It appears that the latest gas has the pand problem fixed, therefore
+	  I'll undefine BROKEN_PAND by default.
+	String compares may be quicker than the multiple test/jumps in vendor
+	  test sequence in mmx_ok(), but I'm not concerned with that right now.
+
+ Acknowledgments:
+	Jussi Laako for pointing out the errors ultimately found to be
+	  connected to the failure to notify the optimizer of clobbered values.
+	Roger Hardiman for reminding us that CPUID isn't everywhere, and that
+	  someone may actually try to use this on a machine without CPUID.
+	  Also for suggesting code for checking this.
+	Robert Dale for pointing out the AMD recognition bug.
+	Jimmy Mayfield and Carl Witty for pointing out the Intel recognition
+	  bug.
+	Carl Witty for pointing out the psllq_i2r test bug.
+*/
+
+#ifndef _MMX_H
+#define _MMX_H
+
+//#define MMX_TRACE
+
+/*	Warning:  at this writing, the version of GAS packaged
+	with most Linux distributions does not handle the
+	parallel AND operation mnemonic correctly.  If the
+	symbol BROKEN_PAND is defined, a slower alternative
+	coding will be used.  If execution of mmxtest results
+	in an illegal instruction fault, define this symbol.
+*/
+#undef	BROKEN_PAND
+
+
+/*	The type of an value that fits in an MMX register
+	(note that long long constant values MUST be suffixed
+	 by LL and unsigned long long values by ULL, lest
+	 they be truncated by the compiler)
+*/
+typedef	union {
+	long long		q;	/* Quadword (64-bit) value */
+	unsigned long long	uq;	/* Unsigned Quadword */
+	int			d[2];	/* 2 Doubleword (32-bit) values */
+	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
+	short			w[4];	/* 4 Word (16-bit) values */
+	unsigned short		uw[4];	/* 4 Unsigned Word */
+	char			b[8];	/* 8 Byte (8-bit) values */
+	unsigned char		ub[8];	/* 8 Unsigned Byte */
+	float			s[2];	/* Single-precision (32-bit) value */
+} mmx_t;
+
+/*	Helper functions for the instruction macros that follow...
+	(note that memory-to-register, m2r, instructions are nearly
+	 as efficient as register-to-register, r2r, instructions;
+	 however, memory-to-memory instructions are really simulated
+	 as a convenience, and are only 1/3 as efficient)
+*/
+#ifdef	MMX_TRACE
+
+/*	Include the stuff for printing a trace to stderr...
+*/
+
+#include <stdio.h>
+
+#define	mmx_i2r(op, imm, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (imm); \
+		fprintf(stderr, #op "_i2r(" #imm "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (imm)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2r(op, mem, reg) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mem); \
+		fprintf(stderr, #op "_m2r(" #mem "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %0, %%" #reg \
+				      : /* nothing */ \
+				      : "X" (mem)); \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #reg "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2m(op, reg, mem) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #reg ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2m(" #reg "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %%" #reg ", %0" \
+				      : "=X" (mem) \
+				      : /* nothing */ ); \
+		mmx_trace = (mem); \
+		fprintf(stderr, #mem "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_r2r(op, regs, regd) \
+	{ \
+		mmx_t mmx_trace; \
+		__asm__ __volatile__ ("movq %%" #regs ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #op "_r2r(" #regs "=0x%016llx, ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
+		__asm__ __volatile__ ("movq %%" #regd ", %0" \
+				      : "=X" (mmx_trace) \
+				      : /* nothing */ ); \
+		fprintf(stderr, #regd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#define	mmx_m2m(op, mems, memd) \
+	{ \
+		mmx_t mmx_trace; \
+		mmx_trace = (mems); \
+		fprintf(stderr, #op "_m2m(" #mems "=0x%016llx, ", mmx_trace.q); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx) => ", mmx_trace.q); \
+		__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+				      #op " %1, %%mm0\n\t" \
+				      "movq %%mm0, %0" \
+				      : "=X" (memd) \
+				      : "X" (mems)); \
+		mmx_trace = (memd); \
+		fprintf(stderr, #memd "=0x%016llx\n", mmx_trace.q); \
+	}
+
+#else
+
+/*	These macros are a lot simpler without the tracing...
+*/
+
+#define	mmx_i2r(op, imm, reg) \
+	__asm__ __volatile__ (#op " $" #imm ", %%" #reg \
+			      : /* nothing */ \
+			      : /* nothing */);
+
+#define	mmx_m2r(op, mem, reg) \
+	__asm__ __volatile__ (#op " %0, %%" #reg \
+			      : /* nothing */ \
+			      : "X" (mem))
+
+#define	mmx_r2m(op, reg, mem) \
+	__asm__ __volatile__ (#op " %%" #reg ", %0" \
+			      : "=X" (mem) \
+			      : /* nothing */ )
+
+#define	mmx_r2r(op, regs, regd) \
+	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+#define	mmx_m2m(op, mems, memd) \
+	__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
+			      #op " %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (memd) \
+			      : "X" (mems))
+
+#endif
+
+
+/*	1x64 MOVe Quadword
+	(this is both a load and a store...
+	 in fact, it is the only way to store)
+*/
+#define	movq_m2r(var, reg)	mmx_m2r(movq, var, reg)
+#define	movq_r2m(reg, var)	mmx_r2m(movq, reg, var)
+#define	movq_r2r(regs, regd)	mmx_r2r(movq, regs, regd)
+#define	movq(vars, vard) \
+	__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
+			      "movq %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	1x32 MOVe Doubleword
+	(like movq, this is both load and store...
+	 but is most useful for moving things between
+	 mmx registers and ordinary registers)
+*/
+#define	movd_m2r(var, reg)	mmx_m2r(movd, var, reg)
+#define	movd_r2m(reg, var)	mmx_r2m(movd, reg, var)
+#define	movd_r2r(regs, regd)	mmx_r2r(movd, regs, regd)
+#define	movd(vars, vard) \
+	__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
+			      "movd %%mm0, %0" \
+			      : "=X" (vard) \
+			      : "X" (vars))
+
+
+/*	2x32, 4x16, and 8x8 Parallel ADDs
+*/
+#define	paddd_m2r(var, reg)	mmx_m2r(paddd, var, reg)
+#define	paddd_r2r(regs, regd)	mmx_r2r(paddd, regs, regd)
+#define	paddd(vars, vard)	mmx_m2m(paddd, vars, vard)
+
+#define	paddw_m2r(var, reg)	mmx_m2r(paddw, var, reg)
+#define	paddw_r2r(regs, regd)	mmx_r2r(paddw, regs, regd)
+#define	paddw(vars, vard)	mmx_m2m(paddw, vars, vard)
+
+#define	paddb_m2r(var, reg)	mmx_m2r(paddb, var, reg)
+#define	paddb_r2r(regs, regd)	mmx_r2r(paddb, regs, regd)
+#define	paddb(vars, vard)	mmx_m2m(paddb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Saturation arithmetic
+*/
+#define	paddsw_m2r(var, reg)	mmx_m2r(paddsw, var, reg)
+#define	paddsw_r2r(regs, regd)	mmx_r2r(paddsw, regs, regd)
+#define	paddsw(vars, vard)	mmx_m2m(paddsw, vars, vard)
+
+#define	paddsb_m2r(var, reg)	mmx_m2r(paddsb, var, reg)
+#define	paddsb_r2r(regs, regd)	mmx_r2r(paddsb, regs, regd)
+#define	paddsb(vars, vard)	mmx_m2m(paddsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
+*/
+#define	paddusw_m2r(var, reg)	mmx_m2r(paddusw, var, reg)
+#define	paddusw_r2r(regs, regd)	mmx_r2r(paddusw, regs, regd)
+#define	paddusw(vars, vard)	mmx_m2m(paddusw, vars, vard)
+
+#define	paddusb_m2r(var, reg)	mmx_m2r(paddusb, var, reg)
+#define	paddusb_r2r(regs, regd)	mmx_r2r(paddusb, regs, regd)
+#define	paddusb(vars, vard)	mmx_m2m(paddusb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel SUBs
+*/
+#define	psubd_m2r(var, reg)	mmx_m2r(psubd, var, reg)
+#define	psubd_r2r(regs, regd)	mmx_r2r(psubd, regs, regd)
+#define	psubd(vars, vard)	mmx_m2m(psubd, vars, vard)
+
+#define	psubw_m2r(var, reg)	mmx_m2r(psubw, var, reg)
+#define	psubw_r2r(regs, regd)	mmx_r2r(psubw, regs, regd)
+#define	psubw(vars, vard)	mmx_m2m(psubw, vars, vard)
+
+#define	psubb_m2r(var, reg)	mmx_m2r(psubb, var, reg)
+#define	psubb_r2r(regs, regd)	mmx_r2r(psubb, regs, regd)
+#define	psubb(vars, vard)	mmx_m2m(psubb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Saturation arithmetic
+*/
+#define	psubsw_m2r(var, reg)	mmx_m2r(psubsw, var, reg)
+#define	psubsw_r2r(regs, regd)	mmx_r2r(psubsw, regs, regd)
+#define	psubsw(vars, vard)	mmx_m2m(psubsw, vars, vard)
+
+#define	psubsb_m2r(var, reg)	mmx_m2r(psubsb, var, reg)
+#define	psubsb_r2r(regs, regd)	mmx_r2r(psubsb, regs, regd)
+#define	psubsb(vars, vard)	mmx_m2m(psubsb, vars, vard)
+
+
+/*	4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
+*/
+#define	psubusw_m2r(var, reg)	mmx_m2r(psubusw, var, reg)
+#define	psubusw_r2r(regs, regd)	mmx_r2r(psubusw, regs, regd)
+#define	psubusw(vars, vard)	mmx_m2m(psubusw, vars, vard)
+
+#define	psubusb_m2r(var, reg)	mmx_m2r(psubusb, var, reg)
+#define	psubusb_r2r(regs, regd)	mmx_r2r(psubusb, regs, regd)
+#define	psubusb(vars, vard)	mmx_m2m(psubusb, vars, vard)
+
+
+/*	4x16 Parallel MULs giving Low 4x16 portions of results
+*/
+#define	pmullw_m2r(var, reg)	mmx_m2r(pmullw, var, reg)
+#define	pmullw_r2r(regs, regd)	mmx_r2r(pmullw, regs, regd)
+#define	pmullw(vars, vard)	mmx_m2m(pmullw, vars, vard)
+
+
+/*	4x16 Parallel MULs giving High 4x16 portions of results
+*/
+#define	pmulhw_m2r(var, reg)	mmx_m2r(pmulhw, var, reg)
+#define	pmulhw_r2r(regs, regd)	mmx_r2r(pmulhw, regs, regd)
+#define	pmulhw(vars, vard)	mmx_m2m(pmulhw, vars, vard)
+
+
+/*	4x16->2x32 Parallel Mul-ADD
+	(muls like pmullw, then adds adjacent 16-bit fields
+	 in the multiply result to make the final 2x32 result)
+*/
+#define	pmaddwd_m2r(var, reg)	mmx_m2r(pmaddwd, var, reg)
+#define	pmaddwd_r2r(regs, regd)	mmx_r2r(pmaddwd, regs, regd)
+#define	pmaddwd(vars, vard)	mmx_m2m(pmaddwd, vars, vard)
+
+
+/*	1x64 bitwise AND
+*/
+#ifdef	BROKEN_PAND
+#define	pand_m2r(var, reg) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, reg); \
+		mmx_m2r(pandn, var, reg); \
+	}
+#define	pand_r2r(regs, regd) \
+	{ \
+		mmx_m2r(pandn, (mmx_t) -1LL, regd); \
+		mmx_r2r(pandn, regs, regd) \
+	}
+#define	pand(vars, vard) \
+	{ \
+		movq_m2r(vard, mm0); \
+		mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
+		mmx_m2r(pandn, vars, mm0); \
+		movq_r2m(mm0, vard); \
+	}
+#else
+#define	pand_m2r(var, reg)	mmx_m2r(pand, var, reg)
+#define	pand_r2r(regs, regd)	mmx_r2r(pand, regs, regd)
+#define	pand(vars, vard)	mmx_m2m(pand, vars, vard)
+#endif
+
+
+/*	1x64 bitwise AND with Not the destination
+*/
+#define	pandn_m2r(var, reg)	mmx_m2r(pandn, var, reg)
+#define	pandn_r2r(regs, regd)	mmx_r2r(pandn, regs, regd)
+#define	pandn(vars, vard)	mmx_m2m(pandn, vars, vard)
+
+
+/*	1x64 bitwise OR
+*/
+#define	por_m2r(var, reg)	mmx_m2r(por, var, reg)
+#define	por_r2r(regs, regd)	mmx_r2r(por, regs, regd)
+#define	por(vars, vard)	mmx_m2m(por, vars, vard)
+
+
+/*	1x64 bitwise eXclusive OR
+*/
+#define	pxor_m2r(var, reg)	mmx_m2r(pxor, var, reg)
+#define	pxor_r2r(regs, regd)	mmx_r2r(pxor, regs, regd)
+#define	pxor(vars, vard)	mmx_m2m(pxor, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpeqd_m2r(var, reg)	mmx_m2r(pcmpeqd, var, reg)
+#define	pcmpeqd_r2r(regs, regd)	mmx_r2r(pcmpeqd, regs, regd)
+#define	pcmpeqd(vars, vard)	mmx_m2m(pcmpeqd, vars, vard)
+
+#define	pcmpeqw_m2r(var, reg)	mmx_m2r(pcmpeqw, var, reg)
+#define	pcmpeqw_r2r(regs, regd)	mmx_r2r(pcmpeqw, regs, regd)
+#define	pcmpeqw(vars, vard)	mmx_m2m(pcmpeqw, vars, vard)
+
+#define	pcmpeqb_m2r(var, reg)	mmx_m2r(pcmpeqb, var, reg)
+#define	pcmpeqb_r2r(regs, regd)	mmx_r2r(pcmpeqb, regs, regd)
+#define	pcmpeqb(vars, vard)	mmx_m2m(pcmpeqb, vars, vard)
+
+
+/*	2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
+	(resulting fields are either 0 or -1)
+*/
+#define	pcmpgtd_m2r(var, reg)	mmx_m2r(pcmpgtd, var, reg)
+#define	pcmpgtd_r2r(regs, regd)	mmx_r2r(pcmpgtd, regs, regd)
+#define	pcmpgtd(vars, vard)	mmx_m2m(pcmpgtd, vars, vard)
+
+#define	pcmpgtw_m2r(var, reg)	mmx_m2r(pcmpgtw, var, reg)
+#define	pcmpgtw_r2r(regs, regd)	mmx_r2r(pcmpgtw, regs, regd)
+#define	pcmpgtw(vars, vard)	mmx_m2m(pcmpgtw, vars, vard)
+
+#define	pcmpgtb_m2r(var, reg)	mmx_m2r(pcmpgtb, var, reg)
+#define	pcmpgtb_r2r(regs, regd)	mmx_r2r(pcmpgtb, regs, regd)
+#define	pcmpgtb(vars, vard)	mmx_m2m(pcmpgtb, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Left Logical
+*/
+#define	psllq_i2r(imm, reg)	mmx_i2r(psllq, imm, reg)
+#define	psllq_m2r(var, reg)	mmx_m2r(psllq, var, reg)
+#define	psllq_r2r(regs, regd)	mmx_r2r(psllq, regs, regd)
+#define	psllq(vars, vard)	mmx_m2m(psllq, vars, vard)
+
+#define	pslld_i2r(imm, reg)	mmx_i2r(pslld, imm, reg)
+#define	pslld_m2r(var, reg)	mmx_m2r(pslld, var, reg)
+#define	pslld_r2r(regs, regd)	mmx_r2r(pslld, regs, regd)
+#define	pslld(vars, vard)	mmx_m2m(pslld, vars, vard)
+
+#define	psllw_i2r(imm, reg)	mmx_i2r(psllw, imm, reg)
+#define	psllw_m2r(var, reg)	mmx_m2r(psllw, var, reg)
+#define	psllw_r2r(regs, regd)	mmx_r2r(psllw, regs, regd)
+#define	psllw(vars, vard)	mmx_m2m(psllw, vars, vard)
+
+
+/*	1x64, 2x32, and 4x16 Parallel Shift Right Logical
+*/
+#define	psrlq_i2r(imm, reg)	mmx_i2r(psrlq, imm, reg)
+#define	psrlq_m2r(var, reg)	mmx_m2r(psrlq, var, reg)
+#define	psrlq_r2r(regs, regd)	mmx_r2r(psrlq, regs, regd)
+#define	psrlq(vars, vard)	mmx_m2m(psrlq, vars, vard)
+
+#define	psrld_i2r(imm, reg)	mmx_i2r(psrld, imm, reg)
+#define	psrld_m2r(var, reg)	mmx_m2r(psrld, var, reg)
+#define	psrld_r2r(regs, regd)	mmx_r2r(psrld, regs, regd)
+#define	psrld(vars, vard)	mmx_m2m(psrld, vars, vard)
+
+#define	psrlw_i2r(imm, reg)	mmx_i2r(psrlw, imm, reg)
+#define	psrlw_m2r(var, reg)	mmx_m2r(psrlw, var, reg)
+#define	psrlw_r2r(regs, regd)	mmx_r2r(psrlw, regs, regd)
+#define	psrlw(vars, vard)	mmx_m2m(psrlw, vars, vard)
+
+
+/*	2x32 and 4x16 Parallel Shift Right Arithmetic
+*/
+#define	psrad_i2r(imm, reg)	mmx_i2r(psrad, imm, reg)
+#define	psrad_m2r(var, reg)	mmx_m2r(psrad, var, reg)
+#define	psrad_r2r(regs, regd)	mmx_r2r(psrad, regs, regd)
+#define	psrad(vars, vard)	mmx_m2m(psrad, vars, vard)
+
+#define	psraw_i2r(imm, reg)	mmx_i2r(psraw, imm, reg)
+#define	psraw_m2r(var, reg)	mmx_m2r(psraw, var, reg)
+#define	psraw_r2r(regs, regd)	mmx_r2r(psraw, regs, regd)
+#define	psraw(vars, vard)	mmx_m2m(psraw, vars, vard)
+
+
+/*	2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packssdw_m2r(var, reg)	mmx_m2r(packssdw, var, reg)
+#define	packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
+#define	packssdw(vars, vard)	mmx_m2m(packssdw, vars, vard)
+
+#define	packsswb_m2r(var, reg)	mmx_m2r(packsswb, var, reg)
+#define	packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
+#define	packsswb(vars, vard)	mmx_m2m(packsswb, vars, vard)
+
+
+/*	4x16->8x8 PACK and Unsigned Saturate
+	(packs source and dest fields into dest in that order)
+*/
+#define	packuswb_m2r(var, reg)	mmx_m2r(packuswb, var, reg)
+#define	packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
+#define	packuswb(vars, vard)	mmx_m2m(packuswb, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
+	(interleaves low half of dest with low half of source
+	 as padding in each result field)
+*/
+#define	punpckldq_m2r(var, reg)	mmx_m2r(punpckldq, var, reg)
+#define	punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
+#define	punpckldq(vars, vard)	mmx_m2m(punpckldq, vars, vard)
+
+#define	punpcklwd_m2r(var, reg)	mmx_m2r(punpcklwd, var, reg)
+#define	punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
+#define	punpcklwd(vars, vard)	mmx_m2m(punpcklwd, vars, vard)
+
+#define	punpcklbw_m2r(var, reg)	mmx_m2r(punpcklbw, var, reg)
+#define	punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
+#define	punpcklbw(vars, vard)	mmx_m2m(punpcklbw, vars, vard)
+
+
+/*	2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
+	(interleaves high half of dest with high half of source
+	 as padding in each result field)
+*/
+#define	punpckhdq_m2r(var, reg)	mmx_m2r(punpckhdq, var, reg)
+#define	punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
+#define	punpckhdq(vars, vard)	mmx_m2m(punpckhdq, vars, vard)
+
+#define	punpckhwd_m2r(var, reg)	mmx_m2r(punpckhwd, var, reg)
+#define	punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
+#define	punpckhwd(vars, vard)	mmx_m2m(punpckhwd, vars, vard)
+
+#define	punpckhbw_m2r(var, reg)	mmx_m2r(punpckhbw, var, reg)
+#define	punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
+#define	punpckhbw(vars, vard)	mmx_m2m(punpckhbw, vars, vard)
+
+
+/*	Empty MMx State
+	(used to clean-up when going from mmx to float use
+	 of the registers that are shared by both; note that
+	 there is no float-to-mmx operation needed, because
+	 only the float tag word info is corruptible)
+*/
+#ifdef	MMX_TRACE
+
+#define	emms() \
+	{ \
+		fprintf(stderr, "emms()\n"); \
+		__asm__ __volatile__ ("emms"); \
+	}
+
+#else
+
+#define	emms()			__asm__ __volatile__ ("emms")
+
+#endif
+
+#endif
+
diff --git a/src/mesa/drivers/dri/common/spantmp.h b/src/mesa/drivers/dri/common/spantmp.h
new file mode 100644
index 0000000000..cdc4f422ce
--- /dev/null
+++ b/src/mesa/drivers/dri/common/spantmp.h
@@ -0,0 +1,325 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (C) Copyright IBM Corporation 2002, 2003
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keithw@tungstengraphics.com>
+ *    Gareth Hughes <gareth@nvidia.com>
+ */
+
+#include "spantmp_common.h"
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HW_READ_CLIPLOOP
+#define HW_READ_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+#ifndef HW_WRITE_CLIPLOOP
+#define HW_WRITE_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+
+static void TAG(WriteRGBASpan)( GLcontext *ctx,
+                                struct gl_renderbuffer *rb,
+				GLuint n, GLint x, GLint y,
+				const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y,
+				 rgba[i][0], rgba[i][1],
+				 rgba[i][2], rgba[i][3] );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBSpan)( GLcontext *ctx,
+                               struct gl_renderbuffer *rb,
+			       GLuint n, GLint x, GLint y,
+			       const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBAPixels)( GLcontext *ctx,
+                                  struct gl_renderbuffer *rb,
+                                  GLuint n, const GLint x[], const GLint y[],
+                                  const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+	 GLuint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       if (mask)
+	       {
+	          for (i=0;i<n;i++)
+	          {
+		     if (mask[i]) {
+		        const int fy = Y_FLIP(y[i]);
+		        if (CLIPPIXEL(x[i],fy))
+			   WRITE_RGBA( x[i], fy,
+				       rgba[i][0], rgba[i][1],
+				       rgba[i][2], rgba[i][3] );
+		     }
+	          }
+	       }
+	       else
+	       {
+	          for (i=0;i<n;i++)
+	          {
+		     const int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL(x[i],fy))
+			WRITE_RGBA( x[i], fy,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+	          }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBASpan)( GLcontext *ctx,	
+                                    struct gl_renderbuffer *rb,
+				    GLuint n, GLint x, GLint y, 
+				    const void *value,
+				    const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte *color = (const GLubyte *) value;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 y = Y_FLIP( y );
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       if (mask)
+	       {
+	          for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+		        WRITE_PIXEL( x1, y, p );
+	       }
+	       else
+	       {
+	          for (;n1>0;i++,x1++,n1--)
+		     WRITE_PIXEL( x1, y, p );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBAPixels)( GLcontext *ctx,
+                                      struct gl_renderbuffer *rb,
+				      GLuint n,
+                                      const GLint x[], const GLint y[],
+				      const void *value,
+                                      const GLubyte mask[] ) 
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte *color = (const GLubyte *) value;
+	 GLuint i;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       if (mask)
+	       {
+		  for (i=0;i<n;i++)
+		     if (mask[i]) {
+			int fy = Y_FLIP(y[i]);
+			if (CLIPPIXEL( x[i], fy ))
+			   WRITE_PIXEL( x[i], fy, p );
+		     }
+	       }
+	       else
+	       {
+		  for (i=0;i<n;i++) {
+		     int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL( x[i], fy ))
+			WRITE_PIXEL( x[i], fy, p );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(ReadRGBASpan)( GLcontext *ctx,
+                               struct gl_renderbuffer *rb,
+			       GLuint n, GLint x, GLint y,
+			       void *values)
+{
+   HW_READ_LOCK()
+      {
+         GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+	 GLint x1,n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;n1>0;i++,x1++,n1--)
+		  READ_RGBA( rgba[i], x1, y );
+	    }
+         HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+static void TAG(ReadRGBAPixels)( GLcontext *ctx,
+                                 struct gl_renderbuffer *rb,
+				 GLuint n, const GLint x[], const GLint y[],
+				 void *values )
+{
+   HW_READ_LOCK()
+      {
+         GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+	 GLuint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+               for (i=0;i<n;i++) {
+                  int fy = Y_FLIP( y[i] );
+                  if (CLIPPIXEL( x[i], fy ))
+                     READ_RGBA( rgba[i], x[i], fy );
+               }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+static void TAG(InitPointers)(struct gl_renderbuffer *rb)
+{
+   rb->PutRow = TAG(WriteRGBASpan);
+   rb->PutRowRGB = TAG(WriteRGBSpan);
+   rb->PutMonoRow = TAG(WriteMonoRGBASpan);
+   rb->PutValues = TAG(WriteRGBAPixels);
+   rb->PutMonoValues = TAG(WriteMonoRGBAPixels);
+   rb->GetValues = TAG(ReadRGBAPixels);
+   rb->GetRow = TAG(ReadRGBASpan);
+}
+
+
+#undef WRITE_PIXEL
+#undef WRITE_RGBA
+#undef READ_RGBA
+#undef TAG
diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h
new file mode 100644
index 0000000000..1dab7336b9
--- /dev/null
+++ b/src/mesa/drivers/dri/common/spantmp2.h
@@ -0,0 +1,916 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file spantmp2.h
+ *
+ * Template file of span read / write functions.
+ *
+ * \author Keith Whitwell <keithw@tungstengraphics.com>
+ * \author Gareth Hughes <gareth@nvidia.com>
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "main/colormac.h"
+#include "spantmp_common.h"
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HW_READ_CLIPLOOP
+#define HW_READ_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+#ifndef HW_WRITE_CLIPLOOP
+#define HW_WRITE_CLIPLOOP()	HW_CLIPLOOP()
+#endif
+
+#if (SPANTMP_PIXEL_FMT == GL_RGB)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
+
+/**
+ ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+#define INIT_MONO_PIXEL(p, color) \
+  p = PACK_COLOR_565( color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   PUT_VALUE(_x, _y, ((((int)r & 0xf8) << 8) |				\
+		      (((int)g & 0xfc) << 3) |				\
+		      (((int)b & 0xf8) >> 3)))				\
+
+#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y )					\
+   do {									\
+      GLushort p = GET_VALUE(_x, _y);					\
+      rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;				\
+      rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;				\
+      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
+      rgba[3] = 0xff;							\
+   } while (0)
+
+#elif (SPANTMP_PIXEL_FMT == GL_RGB)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5_REV)
+
+/**
+ ** GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+#define INIT_MONO_PIXEL(p, color) \
+  p = PACK_COLOR_565_REV( color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   PUT_VALUE(_x, _y, PACK_COLOR_565_REV( r, g, b ))
+
+#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y )					\
+   do {									\
+      GLushort p = GET_VALUE(_x, _y);					\
+      p = p << 8 | p >> 8;						\
+      rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;				\
+      rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;				\
+      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
+      rgba[3] = 0xff;							\
+   } while (0)
+
+#elif (SPANTMP_PIXEL_FMT == GL_BGRA)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4)
+
+/**
+ ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+#define INIT_MONO_PIXEL(p, color) \
+   p = PACK_COLOR_4444_REV(color[3], color[0], color[1], color[2])
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   PUT_VALUE(_x, _y, PACK_COLOR_4444_REV(a, r, g, b))			\
+
+#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y )					\
+   do {									\
+      GLushort p = GET_VALUE(_x, _y);					\
+      rgba[0] = ((p >> 0) & 0xf) * 0x11;				\
+      rgba[1] = ((p >> 12) & 0xf) * 0x11;				\
+      rgba[2] = ((p >> 4) & 0xf) * 0x11;				\
+      rgba[3] = ((p >> 8) & 0xf) * 0x11;				\
+   } while (0)
+
+
+#elif (SPANTMP_PIXEL_FMT == GL_BGRA)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_4_4_4_4_REV)
+
+/**
+ ** GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+#define INIT_MONO_PIXEL(p, color) \
+   p = PACK_COLOR_4444(color[3], color[0], color[1], color[2])
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   PUT_VALUE(_x, _y, PACK_COLOR_4444(a, r, g, b))			\
+
+#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y )					\
+   do {									\
+      GLushort p = GET_VALUE(_x, _y);					\
+      rgba[0] = ((p >> 8) & 0xf) * 0x11;				\
+      rgba[1] = ((p >> 4) & 0xf) * 0x11;				\
+      rgba[2] = ((p >> 0) & 0xf) * 0x11;				\
+      rgba[3] = ((p >> 12) & 0xf) * 0x11;				\
+   } while (0)
+
+
+#elif (SPANTMP_PIXEL_FMT == GL_BGRA)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5_REV)
+
+/**
+ ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+#define INIT_MONO_PIXEL(p, color) \
+   p = PACK_COLOR_1555(color[3], color[0], color[1], color[2])
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   PUT_VALUE(_x, _y, PACK_COLOR_1555(a, r, g, b))			\
+
+#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y )					\
+   do {									\
+      GLushort p = GET_VALUE(_x, _y);					\
+      rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8;				\
+      rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8;				\
+      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
+      rgba[3] = ((p >> 15) & 0x1) * 0xff;				\
+   } while (0)
+
+#elif (SPANTMP_PIXEL_FMT == GL_BGRA)  && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_1_5_5_5)
+
+/**
+ ** GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+#define INIT_MONO_PIXEL(p, color) \
+   p = PACK_COLOR_1555_REV(color[3], color[0], color[1], color[2])
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   PUT_VALUE(_x, _y, PACK_COLOR_1555_REV(a, r, g, b))			\
+
+#define WRITE_PIXEL( _x, _y, p ) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y )					\
+   do {									\
+      GLushort p = GET_VALUE(_x, _y);					\
+      p = p << 8 | p >> 8;						\
+      rgba[0] = ((p >> 7) & 0xf8) * 255 / 0xf8;				\
+      rgba[1] = ((p >> 2) & 0xf8) * 255 / 0xf8;				\
+      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
+      rgba[3] = ((p >> 15) & 0x1) * 0xff;				\
+   } while (0)
+
+#elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
+
+/**
+ ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (     buf + (_x) * 4 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color)                       \
+     p = PACK_COLOR_8888(color[3], color[0], color[1], color[2]) 
+
+# define WRITE_RGBA(_x, _y, r, g, b, a)                                 \
+   PUT_VALUE(_x, _y, ((r << 16) |					\
+		      (g << 8) |					\
+		      (b << 0) |					\
+		      (a << 24)))
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+# if defined( USE_X86_ASM )
+#  define READ_RGBA(rgba, _x, _y)                                       \
+    do {                                                                \
+       GLuint p = GET_VALUE(_x, _y);					\
+       __asm__ __volatile__( "bswap	%0; rorl $8, %0"                \
+				: "=r" (p) : "0" (p) );                 \
+       ((GLuint *)rgba)[0] = p;                                         \
+    } while (0)
+# elif defined( MESA_BIG_ENDIAN )
+    /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
+     * rotlwi instruction.  It also produces good code on SPARC.
+     */
+#  define READ_RGBA( rgba, _x, _y )				        \
+     do {								\
+        GLuint p = GET_VALUE(_x, _y);					\
+        GLuint t = p;                                                   \
+        *((uint32_t *) rgba) = (t >> 24) | (p << 8);                    \
+     } while (0)
+# else
+#  define READ_RGBA( rgba, _x, _y )				        \
+     do {								\
+        GLuint p = GET_VALUE(_x, _y);					\
+	rgba[0] = (p >> 16) & 0xff;					\
+	rgba[1] = (p >>  8) & 0xff;					\
+	rgba[2] = (p >>  0) & 0xff;					\
+	rgba[3] = (p >> 24) & 0xff;					\
+     } while (0)
+# endif
+
+#elif (SPANTMP_PIXEL_FMT == GL_BGRA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8)
+
+/**
+ ** GL_BGRA, GL_UNSIGNED_INT_8_8_8_8
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (     buf + (_x) * 4 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color)                       \
+     p = PACK_COLOR_8888(color[2], color[1], color[0], color[3]) 
+
+# define WRITE_RGBA(_x, _y, r, g, b, a)                                 \
+   PUT_VALUE(_x, _y, ((r << 8) |					\
+		      (g << 16) |					\
+		      (b << 24) |					\
+		      (a << 0)))
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+# if defined( USE_X86_ASM )
+#  define READ_RGBA(rgba, _x, _y)                                       \
+    do {                                                                \
+       GLuint p = GET_VALUE(_x, _y);					\
+       __asm__ __volatile__( "rorl $8, %0"				\
+				: "=r" (p) : "0" (p) );                 \
+       ((GLuint *)rgba)[0] = p;                                         \
+    } while (0)
+# elif defined( MESA_BIG_ENDIAN )
+    /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
+     * rotlwi instruction.  It also produces good code on SPARC.
+     */
+#  define READ_RGBA( rgba, _x, _y )				        \
+     do {								\
+        GLuint p = CPU_TO_LE32(GET_VALUE(_x, _y));                      \
+        GLuint t = p;                                                   \
+        *((uint32_t *) rgba) = (t >> 24) | (p << 8);                    \
+     } while (0)
+# else
+#  define READ_RGBA( rgba, _x, _y )				        \
+     do {								\
+        GLuint p = GET_VALUE(_x, _y);					\
+	rgba[0] = (p >>  8) & 0xff;					\
+	rgba[1] = (p >> 16) & 0xff;					\
+	rgba[2] = (p >> 24) & 0xff;					\
+	rgba[3] = (p >>  0) & 0xff;					\
+     } while (0)
+# endif
+
+#elif (SPANTMP_PIXEL_FMT == GL_BGR) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
+
+/**
+ ** GL_BGR, GL_UNSIGNED_INT_8_8_8_8_REV
+ **
+ ** This is really for MESA_FORMAT_XRGB8888.  The spantmp code needs to be
+ ** kicked to the curb, and we need to just code-gen this.
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (     buf + (_x) * 4 + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color)                       \
+     p = PACK_COLOR_8888(0xff, color[0], color[1], color[2])
+
+# define WRITE_RGBA(_x, _y, r, g, b, a)					\
+   PUT_VALUE(_x, _y, ((r << 16) |					\
+		      (g << 8) |					\
+		      (b << 0) |					\
+		      (0xff << 24)))
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+# if defined( USE_X86_ASM )
+#  define READ_RGBA(rgba, _x, _y)                                       \
+    do {                                                                \
+       GLuint p = GET_VALUE(_x, _y);					\
+       __asm__ __volatile__( "bswap	%0; rorl $8, %0"                \
+				: "=r" (p) : "0" (p) );                 \
+       ((GLuint *)rgba)[0] = p | 0xff000000;				\
+    } while (0)
+# elif defined( MESA_BIG_ENDIAN )
+    /* On PowerPC with GCC 3.4.2 the shift madness below becomes a single
+     * rotlwi instruction.  It also produces good code on SPARC.
+     */
+#  define READ_RGBA( rgba, _x, _y )				        \
+     do {								\
+        GLuint p = GET_VALUE(_x, _y);					\
+        *((uint32_t *) rgba) = (p << 8) | 0xff;				\
+     } while (0)
+# else
+#  define READ_RGBA( rgba, _x, _y )				        \
+     do {								\
+        GLuint p = GET_VALUE(_x, _y);					\
+	rgba[0] = (p >> 16) & 0xff;					\
+	rgba[1] = (p >>  8) & 0xff;					\
+	rgba[2] = (p >>  0) & 0xff;					\
+	rgba[3] = 0xff;							\
+     } while (0)
+# endif
+
+#elif (SPANTMP_PIXEL_FMT == GL_ALPHA) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_BYTE)
+
+/**
+ ** GL_ALPHA, GL_UNSIGNED_BYTE
+ **/
+
+#ifndef GET_VALUE
+#ifndef GET_PTR
+#define GET_PTR(_x, _y) (     buf + (_x) + (_y) * pitch)
+#endif
+
+#define GET_VALUE(_x, _y) *(volatile GLubyte *)(GET_PTR(_x, _y))
+#define PUT_VALUE(_x, _y, _v) *(volatile GLubyte *)(GET_PTR(_x, _y)) = (_v)
+#endif /* GET_VALUE */
+
+# define INIT_MONO_PIXEL(p, color)                       \
+     p = color[3]
+
+# define WRITE_RGBA(_x, _y, r, g, b, a)                                 \
+   PUT_VALUE(_x, _y, a | (r & 0 /* quiet warnings */))
+
+#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p)
+
+#define READ_RGBA( rgba, _x, _y )				        \
+     do {								\
+        GLubyte p = GET_VALUE(_x, _y);					\
+	rgba[0] = 0;							\
+	rgba[1] = 0;							\
+	rgba[2] = 0;							\
+	rgba[3] = p;							\
+     } while (0)
+
+#else
+#error SPANTMP_PIXEL_FMT must be set to a valid value!
+#endif
+
+
+
+/**
+ ** Assembly routines.
+ **/
+
+#if defined( USE_MMX_ASM ) || defined( USE_SSE_ASM )
+#include "x86/read_rgba_span_x86.h"
+#include "x86/common_x86_asm.h"
+#endif
+
+static void TAG(WriteRGBASpan)( GLcontext *ctx,
+                                struct gl_renderbuffer *rb,
+				GLuint n, GLint x, GLint y,
+				const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBASpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y,
+				 rgba[i][0], rgba[i][1],
+				 rgba[i][2], rgba[i][3] );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBSpan)( GLcontext *ctx,
+                               struct gl_renderbuffer *rb,
+			       GLuint n, GLint x, GLint y,
+			       const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteRGBSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_RGBA( x1, y, rgb[i][0], rgb[i][1], rgb[i][2], 255 );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+static void TAG(WriteRGBAPixels)( GLcontext *ctx,
+                                  struct gl_renderbuffer *rb,
+                                  GLuint n, const GLint x[], const GLint y[],
+                                  const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+	 GLint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "WriteRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       if (mask)
+	       {
+	          for (i=0;i<n;i++)
+	          {
+		     if (mask[i]) {
+		        const int fy = Y_FLIP(y[i]);
+		        if (CLIPPIXEL(x[i],fy))
+			   WRITE_RGBA( x[i], fy,
+				       rgba[i][0], rgba[i][1],
+				       rgba[i][2], rgba[i][3] );
+		     }
+	          }
+	       }
+	       else
+	       {
+	          for (i=0;i<n;i++)
+	          {
+		     const int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL(x[i],fy))
+			WRITE_RGBA( x[i], fy,
+				    rgba[i][0], rgba[i][1],
+				    rgba[i][2], rgba[i][3] );
+	          }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBASpan)( GLcontext *ctx,	
+                                    struct gl_renderbuffer *rb,
+				    GLuint n, GLint x, GLint y, 
+				    const void *value, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte *color = (const GLubyte *) value;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 y = Y_FLIP( y );
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBASpan\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       if (mask)
+	       {
+	          for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+		        WRITE_PIXEL( x1, y, p );
+	       }
+	       else
+	       {
+	          for (;n1>0;i++,x1++,n1--)
+		     WRITE_PIXEL( x1, y, p );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(WriteMonoRGBAPixels)( GLcontext *ctx,
+                                      struct gl_renderbuffer *rb,
+				      GLuint n,
+				      const GLint x[], const GLint y[],
+				      const void *value,
+				      const GLubyte mask[] ) 
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte *color = (const GLubyte *) value;
+	 GLint i;
+	 LOCAL_VARS;
+	 INIT_MONO_PIXEL(p, color);
+
+	 if (DBG) fprintf(stderr, "WriteMonoRGBAPixels\n");
+
+	 HW_WRITE_CLIPLOOP()
+	    {
+	       if (mask)
+	       {
+		  for (i=0;i<n;i++)
+		     if (mask[i]) {
+			int fy = Y_FLIP(y[i]);
+			if (CLIPPIXEL( x[i], fy ))
+			   WRITE_PIXEL( x[i], fy, p );
+		     }
+	       }
+	       else
+	       {
+		  for (i=0;i<n;i++) {
+		     int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL( x[i], fy ))
+			WRITE_PIXEL( x[i], fy, p );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+static void TAG(ReadRGBASpan)( GLcontext *ctx,
+                               struct gl_renderbuffer *rb,
+			       GLuint n, GLint x, GLint y, void *values)
+{
+   HW_READ_LOCK()
+      {
+         GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+	 GLint x1,n1;
+	 LOCAL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 if (DBG) fprintf(stderr, "ReadRGBASpan\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;n1>0;i++,x1++,n1--)
+		  READ_RGBA( rgba[i], x1, y );
+	    }
+         HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+
+#if defined(GET_PTR) && \
+   defined(USE_MMX_ASM) && \
+   (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
+	(SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
+    ((SPANTMP_PIXEL_FMT == GL_RGB) && \
+	(SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
+static void TAG2(ReadRGBASpan,_MMX)( GLcontext *ctx,
+                                     struct gl_renderbuffer *rb,
+                                     GLuint n, GLint x, GLint y, void *values)
+{
+#ifndef USE_INNER_EMMS
+   /* The EMMS instruction is directly in-lined here because using GCC's
+    * built-in _mm_empty function was found to utterly destroy performance.
+    */
+   __asm__ __volatile__( "emms" );
+#endif
+
+   HW_READ_LOCK()
+     {
+        GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+	GLint x1,n1;
+	LOCAL_VARS;
+
+	y = Y_FLIP(y);
+
+	if (DBG) fprintf(stderr, "ReadRGBASpan\n");
+
+	HW_READ_CLIPLOOP()
+	  {
+	     GLint i = 0;
+	     CLIPSPAN(x,y,n,x1,n1,i);
+
+	       {
+		  const void * src = GET_PTR( x1, y );
+#if (SPANTMP_PIXEL_FMT == GL_RGB) && \
+		  (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)
+		  _generic_read_RGBA_span_RGB565_MMX( src, rgba[i], n1 );
+#else
+		  _generic_read_RGBA_span_BGRA8888_REV_MMX( src, rgba[i], n1 );
+#endif
+	       }
+	  }
+	HW_ENDCLIPLOOP();
+     }
+   HW_READ_UNLOCK();
+#ifndef USE_INNER_EMMS
+   __asm__ __volatile__( "emms" );
+#endif
+}
+#endif
+
+
+#if defined(GET_PTR) &&	\
+   defined(USE_SSE_ASM) && \
+   (SPANTMP_PIXEL_FMT == GL_BGRA) && \
+     (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
+static void TAG2(ReadRGBASpan,_SSE2)( GLcontext *ctx,
+                                      struct gl_renderbuffer *rb,
+                                      GLuint n, GLint x, GLint y,
+                                      void *values)
+{
+   HW_READ_LOCK()
+     {
+        GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+	GLint x1,n1;
+	LOCAL_VARS;
+
+	y = Y_FLIP(y);
+
+	if (DBG) fprintf(stderr, "ReadRGBASpan\n");
+
+	HW_READ_CLIPLOOP()
+	  {
+	     GLint i = 0;
+	     CLIPSPAN(x,y,n,x1,n1,i);
+
+	       {
+		  const void * src = GET_PTR( x1, y );
+		  _generic_read_RGBA_span_BGRA8888_REV_SSE2( src, rgba[i], n1 );
+	       }
+	  }
+	HW_ENDCLIPLOOP();
+     }
+   HW_READ_UNLOCK();
+}
+#endif
+
+#if defined(GET_PTR) &&	\
+   defined(USE_SSE_ASM) && \
+   (SPANTMP_PIXEL_FMT == GL_BGRA) && \
+     (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
+static void TAG2(ReadRGBASpan,_SSE)( GLcontext *ctx,
+                                     struct gl_renderbuffer *rb,
+                                     GLuint n, GLint x, GLint y,
+                                     void *values)
+{
+#ifndef USE_INNER_EMMS
+   /* The EMMS instruction is directly in-lined here because using GCC's
+    * built-in _mm_empty function was found to utterly destroy performance.
+    */
+   __asm__ __volatile__( "emms" );
+#endif
+
+   HW_READ_LOCK()
+     {
+        GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+	GLint x1,n1;
+	LOCAL_VARS;
+
+	y = Y_FLIP(y);
+
+	if (DBG) fprintf(stderr, "ReadRGBASpan\n");
+
+	HW_READ_CLIPLOOP()
+	  {
+	     GLint i = 0;
+	     CLIPSPAN(x,y,n,x1,n1,i);
+
+	       {
+		  const void * src = GET_PTR( x1, y );
+		  _generic_read_RGBA_span_BGRA8888_REV_SSE( src, rgba[i], n1 );
+	       }
+	  }
+	HW_ENDCLIPLOOP();
+     }
+   HW_READ_UNLOCK();
+#ifndef USE_INNER_EMMS
+   __asm__ __volatile__( "emms" );
+#endif
+}
+#endif
+
+
+static void TAG(ReadRGBAPixels)( GLcontext *ctx,
+                                 struct gl_renderbuffer *rb,
+				 GLuint n, const GLint x[], const GLint y[],
+				 void *values )
+{
+   HW_READ_LOCK()
+      {
+         GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+	 GLint i;
+	 LOCAL_VARS;
+
+	 if (DBG) fprintf(stderr, "ReadRGBAPixels\n");
+
+	 HW_READ_CLIPLOOP()
+	    {
+               for (i=0;i<n;i++) {
+                  int fy = Y_FLIP( y[i] );
+                     if (CLIPPIXEL( x[i], fy ))
+                        READ_RGBA( rgba[i], x[i], fy );
+               }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_READ_UNLOCK();
+}
+
+static void TAG(InitPointers)(struct gl_renderbuffer *rb)
+{
+   rb->PutRow = TAG(WriteRGBASpan);
+   rb->PutRowRGB = TAG(WriteRGBSpan);
+   rb->PutMonoRow = TAG(WriteMonoRGBASpan);
+   rb->PutValues = TAG(WriteRGBAPixels);
+   rb->PutMonoValues = TAG(WriteMonoRGBAPixels);
+   rb->GetValues = TAG(ReadRGBAPixels);
+
+#if defined(GET_PTR)
+#if defined(USE_SSE_ASM) && \
+   (SPANTMP_PIXEL_FMT == GL_BGRA) && \
+     (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
+   if ( cpu_has_xmm2 ) {
+      if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE2" );
+      rb->GetRow = TAG2(ReadRGBASpan, _SSE2);
+   }
+   else
+#endif
+#if defined(USE_SSE_ASM) && \
+   (SPANTMP_PIXEL_FMT == GL_BGRA) && \
+     (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)
+   if ( cpu_has_xmm ) {
+      if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "SSE" );
+      rb->GetRow = TAG2(ReadRGBASpan, _SSE);
+   }
+   else
+#endif
+#if defined(USE_MMX_ASM) && \
+   (((SPANTMP_PIXEL_FMT == GL_BGRA) && \
+	(SPANTMP_PIXEL_TYPE == GL_UNSIGNED_INT_8_8_8_8_REV)) || \
+    ((SPANTMP_PIXEL_FMT == GL_RGB) && \
+	(SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5)))
+   if ( cpu_has_mmx ) {
+      if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "MMX" );
+      rb->GetRow = TAG2(ReadRGBASpan, _MMX);
+   }
+   else
+#endif
+#endif /* GET_PTR */
+   {
+      if (DBG) fprintf( stderr, "Using %s version of GetRow\n", "C" );
+      rb->GetRow = TAG(ReadRGBASpan);
+   }
+
+}
+
+
+#undef INIT_MONO_PIXEL
+#undef WRITE_PIXEL
+#undef WRITE_RGBA
+#undef READ_RGBA
+#undef TAG
+#undef TAG2
+#undef GET_VALUE
+#undef PUT_VALUE
+#undef GET_PTR
+#undef SPANTMP_PIXEL_FMT
+#undef SPANTMP_PIXEL_TYPE
diff --git a/src/mesa/drivers/dri/common/spantmp_common.h b/src/mesa/drivers/dri/common/spantmp_common.h
new file mode 100644
index 0000000000..a4509a569d
--- /dev/null
+++ b/src/mesa/drivers/dri/common/spantmp_common.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file spantmp_common.h
+ *
+ * common macros for span read / write functions to be used in the depth,
+ * stencil and pixel span templates.
+ */
+
+#ifndef HW_WRITE_LOCK
+#define HW_WRITE_LOCK()		HW_LOCK()
+#endif
+
+#ifndef HW_WRITE_UNLOCK
+#define HW_WRITE_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_READ_LOCK
+#define HW_READ_LOCK()		HW_LOCK()
+#endif
+
+#ifndef HW_READ_UNLOCK
+#define HW_READ_UNLOCK()	HW_UNLOCK()
+#endif
+
+#ifndef HW_CLIPLOOP
+#define HW_CLIPLOOP()							\
+   do {									\
+      int _nc = dPriv->numClipRects;					\
+      while ( _nc-- ) {							\
+	 int minx = dPriv->pClipRects[_nc].x1 - dPriv->x;		\
+	 int miny = dPriv->pClipRects[_nc].y1 - dPriv->y;		\
+	 int maxx = dPriv->pClipRects[_nc].x2 - dPriv->x;		\
+	 int maxy = dPriv->pClipRects[_nc].y2 - dPriv->y;
+#endif
+
+#ifndef HW_ENDCLIPLOOP
+#define HW_ENDCLIPLOOP()						\
+      }									\
+   } while (0)
+#endif
+
+#ifndef CLIPPIXEL
+#define CLIPPIXEL( _x, _y )						\
+   ((_x >= minx) && (_x < maxx) && (_y >= miny) && (_y < maxy))
+#endif
+
+#ifndef CLIPSPAN
+#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i )				\
+   if ( _y < miny || _y >= maxy /*|| _x + n < minx || _x >=maxx*/ ) {	\
+      _n1 = 0, _x1 = x;							\
+   } else {								\
+      _n1 = _n;								\
+      _x1 = _x;								\
+      if ( _x1 < minx ) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx; \
+      if ( _x1 + _n1 >= maxx ) n1 -= (_x1 + n1 - maxx);		        \
+   }
+#endif
diff --git a/src/mesa/drivers/dri/common/stenciltmp.h b/src/mesa/drivers/dri/common/stenciltmp.h
new file mode 100644
index 0000000000..2b10b9ecfe
--- /dev/null
+++ b/src/mesa/drivers/dri/common/stenciltmp.h
@@ -0,0 +1,245 @@
+
+#include "spantmp_common.h"
+
+#ifndef DBG
+#define DBG 0
+#endif
+
+#ifndef HAVE_HW_STENCIL_SPANS
+#define HAVE_HW_STENCIL_SPANS 0
+#endif
+
+#ifndef HAVE_HW_STENCIL_PIXELS
+#define HAVE_HW_STENCIL_PIXELS 0
+#endif
+
+static void TAG(WriteStencilSpan)( GLcontext *ctx,
+                                   struct gl_renderbuffer *rb,
+				   GLuint n, GLint x, GLint y,
+				   const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte *stencil = (const GLubyte *) values;
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_STENCIL_VARS;
+
+	 y = Y_FLIP(y);
+
+#if HAVE_HW_STENCIL_SPANS
+	 (void) x1; (void) n1;
+
+	 if (DBG) fprintf(stderr, "WriteStencilSpan 0..%d (x1 %d)\n",
+			  (int)n1, (int)x1);
+
+	 WRITE_STENCIL_SPAN();
+#else /* HAVE_HW_STENCIL_SPANS */
+	 HW_CLIPLOOP() 
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteStencilSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_STENCIL( x1, y, stencil[i] );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_STENCIL( x1, y, stencil[i] );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif /* !HAVE_HW_STENCIL_SPANS */
+      }
+   HW_WRITE_UNLOCK();
+}
+
+#if HAVE_HW_STENCIL_SPANS
+/* implement MonoWriteDepthSpan() in terms of WriteDepthSpan() */
+static void
+TAG(WriteMonoStencilSpan)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                           GLuint n, GLint x, GLint y,
+                           const void *value, const GLubyte mask[] )
+{
+   const GLuint stenVal = *((GLuint *) value);
+   GLuint stens[MAX_WIDTH];
+   GLuint i;
+   for (i = 0; i < n; i++)
+      stens[i] = stenVal;
+   TAG(WriteStencilSpan)(ctx, rb, n, x, y, stens, mask);
+}
+#else /* HAVE_HW_STENCIL_SPANS */
+static void TAG(WriteMonoStencilSpan)( GLcontext *ctx,
+                                       struct gl_renderbuffer *rb,
+                                       GLuint n, GLint x, GLint y,
+                                       const void *value,
+                                       const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte stencil = *((const GLubyte *) value);
+	 GLint x1;
+	 GLint n1;
+	 LOCAL_STENCIL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 HW_CLIPLOOP() 
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+
+	       if (DBG) fprintf(stderr, "WriteStencilSpan %d..%d (x1 %d)\n",
+				(int)i, (int)n1, (int)x1);
+
+	       if (mask)
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     if (mask[i])
+			WRITE_STENCIL( x1, y, stencil );
+	       }
+	       else
+	       {
+		  for (;n1>0;i++,x1++,n1--)
+		     WRITE_STENCIL( x1, y, stencil );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+      }
+   HW_WRITE_UNLOCK();
+}
+#endif /* !HAVE_HW_STENCIL_SPANS */
+
+
+static void TAG(WriteStencilPixels)( GLcontext *ctx,
+                                     struct gl_renderbuffer *rb,
+				     GLuint n,
+				     const GLint x[], const GLint y[],
+				     const void *values, const GLubyte mask[] )
+{
+   HW_WRITE_LOCK()
+      {
+         const GLubyte *stencil = (const GLubyte *) values;
+	 GLuint i;
+	 LOCAL_STENCIL_VARS;
+
+	 if (DBG) fprintf(stderr, "WriteStencilPixels\n");
+
+#if HAVE_HW_STENCIL_PIXELS
+	 (void) i;
+
+	 WRITE_STENCIL_PIXELS();
+#else /* HAVE_HW_STENCIL_PIXELS */
+	 HW_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++)
+	       {
+		  if (mask[i]) {
+		     const int fy = Y_FLIP(y[i]);
+		     if (CLIPPIXEL(x[i],fy))
+			WRITE_STENCIL( x[i], fy, stencil[i] );
+		  }
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif /* !HAVE_HW_STENCIL_PIXELS */
+      }
+   HW_WRITE_UNLOCK();
+}
+
+
+/* Read stencil spans and pixels
+ */
+static void TAG(ReadStencilSpan)( GLcontext *ctx,
+                                  struct gl_renderbuffer *rb,
+				  GLuint n, GLint x, GLint y,
+				  void *values)
+{
+   HW_READ_LOCK()
+      {
+         GLubyte *stencil = (GLubyte *) values;
+	 GLint x1,n1;
+	 LOCAL_STENCIL_VARS;
+
+	 y = Y_FLIP(y);
+
+	 if (DBG) fprintf(stderr, "ReadStencilSpan\n");
+
+#if HAVE_HW_STENCIL_SPANS
+	 (void) x1; (void) n1;
+
+	 READ_STENCIL_SPAN();
+#else /* HAVE_HW_STENCIL_SPANS */
+	 HW_CLIPLOOP() 
+	    {
+	       GLint i = 0;
+	       CLIPSPAN(x,y,n,x1,n1,i);
+	       for (;n1>0;i++,n1--)
+		  READ_STENCIL( stencil[i], (x+i), y );
+	    }
+	 HW_ENDCLIPLOOP();
+#endif /* !HAVE_HW_STENCIL_SPANS */
+      }
+   HW_READ_UNLOCK();
+}
+
+static void TAG(ReadStencilPixels)( GLcontext *ctx,
+                                    struct gl_renderbuffer *rb,
+                                    GLuint n, const GLint x[], const GLint y[],
+				    void *values )
+{
+   HW_READ_LOCK()
+      {
+         GLubyte *stencil = (GLubyte *) values;
+	 GLuint i;
+	 LOCAL_STENCIL_VARS;
+
+	 if (DBG) fprintf(stderr, "ReadStencilPixels\n");
+ 
+#if HAVE_HW_STENCIL_PIXELS
+	 (void) i;
+
+	 READ_STENCIL_PIXELS();
+#else /* HAVE_HW_STENCIL_PIXELS */
+	 HW_CLIPLOOP()
+	    {
+	       for (i=0;i<n;i++) {
+		  int fy = Y_FLIP( y[i] );
+		  if (CLIPPIXEL( x[i], fy ))
+		     READ_STENCIL( stencil[i], x[i], fy );
+	       }
+	    }
+	 HW_ENDCLIPLOOP();
+#endif /* !HAVE_HW_STENCIL_PIXELS */
+      }
+   HW_READ_UNLOCK();
+}
+
+
+
+/**
+ * Initialize the given renderbuffer's span routines to point to
+ * the stencil functions we generated above.
+ */
+static void TAG(InitStencilPointers)(struct gl_renderbuffer *rb)
+{
+   rb->GetRow = TAG(ReadStencilSpan);
+   rb->GetValues = TAG(ReadStencilPixels);
+   rb->PutRow = TAG(WriteStencilSpan);
+   rb->PutRowRGB = NULL;
+   rb->PutMonoRow = TAG(WriteMonoStencilSpan);
+   rb->PutValues = TAG(WriteStencilPixels);
+   rb->PutMonoValues = NULL;
+}
+
+
+#undef WRITE_STENCIL
+#undef READ_STENCIL
+#undef TAG
diff --git a/src/mesa/drivers/dri/common/texmem.c b/src/mesa/drivers/dri/common/texmem.c
new file mode 100644
index 0000000000..895139b55b
--- /dev/null
+++ b/src/mesa/drivers/dri/common/texmem.c
@@ -0,0 +1,1341 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (C) Copyright IBM Corporation 2002, 2003
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ *    Keith Whitwell <keithw@tungstengraphics.com>
+ *    Kevin E. Martin <kem@users.sourceforge.net>
+ *    Gareth Hughes <gareth@nvidia.com>
+ */
+
+/** \file texmem.c
+ * Implements all of the device-independent texture memory management.
+ * 
+ * Currently, only a simple LRU texture memory management policy is
+ * implemented.  In the (hopefully very near) future, better policies will be
+ * implemented.  The idea is that the DRI should be able to run in one of two
+ * modes.  In the default mode the DRI will dynamically attempt to discover
+ * the best texture management policy for the running application.  In the
+ * other mode, the user (via some sort of as yet TBD mechanism) will select
+ * a texture management policy that is known to work well with the
+ * application.
+ */
+
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/simple_list.h"
+#include "texmem.h"
+
+
+static unsigned dummy_swap_counter;
+
+
+/**
+ * Calculate \f$\log_2\f$ of a value.  This is a particularly poor
+ * implementation of this function.  However, since system performance is in
+ * no way dependent on this function, the slowness of the implementation is
+ * irrelevent.
+ * 
+ * \param n Value whose \f$\log_2\f$ is to be calculated
+ */
+
+static GLuint
+driLog2( GLuint n )
+{
+   GLuint log2;
+
+   for ( log2 = 1 ; n > 1 ; log2++ ) {
+      n >>= 1;
+   }
+
+   return log2;
+}
+
+
+
+
+/**
+ * Determine if a texture is resident in textureable memory.  Depending on
+ * the driver, this may or may not be on-card memory.  It could be AGP memory
+ * or anyother type of memory from which the hardware can directly read
+ * texels.
+ * 
+ * This function is intended to be used as the \c IsTextureResident function
+ * in the device's \c dd_function_table.
+ * 
+ * \param ctx GL context pointer (currently unused)
+ * \param texObj Texture object to be tested
+ */
+
+GLboolean
+driIsTextureResident( GLcontext * ctx, 
+		      struct gl_texture_object * texObj )
+{
+   driTextureObject * t;
+
+
+   t = (driTextureObject *) texObj->DriverData;
+   return( (t != NULL) && (t->memBlock != NULL) );
+}
+
+
+
+
+/**
+ * (Re)initialize the global circular LRU list.  The last element
+ * in the array (\a heap->nrRegions) is the sentinal.  Keeping it
+ * at the end of the array allows the other elements of the array
+ * to be addressed rationally when looking up objects at a particular
+ * location in texture memory.
+ * 
+ * \param heap Texture heap to be reset
+ */
+
+static void resetGlobalLRU( driTexHeap * heap )
+{
+   drmTextureRegionPtr list = heap->global_regions;
+   unsigned       sz = 1U << heap->logGranularity;
+   unsigned       i;
+
+   for (i = 0 ; (i+1) * sz <= heap->size ; i++) {
+      list[i].prev = i-1;
+      list[i].next = i+1;
+      list[i].age = 0;
+   }
+
+   i--;
+   list[0].prev = heap->nrRegions;
+   list[i].prev = i-1;
+   list[i].next = heap->nrRegions;
+   list[heap->nrRegions].prev = i;
+   list[heap->nrRegions].next = 0;
+   heap->global_age[0] = 0;
+}
+
+/**
+ * Print out debugging information about the local texture LRU.
+ *
+ * \param heap Texture heap to be printed
+ * \param callername Name of calling function
+ */
+static void printLocalLRU( driTexHeap * heap, const char *callername  )
+{
+   driTextureObject *t;
+   unsigned sz = 1U << heap->logGranularity;
+
+   fprintf( stderr, "%s in %s:\nLocal LRU, heap %d:\n", 
+	    __FUNCTION__, callername, heap->heapId );
+
+   foreach ( t, &heap->texture_objects ) {
+      if (!t->memBlock)
+	 continue;
+      if (!t->tObj) {
+	 fprintf( stderr, "Placeholder (%p) %d at 0x%x sz 0x%x\n",
+		  (void *)t,
+		  t->memBlock->ofs / sz,
+		  t->memBlock->ofs,
+		  t->memBlock->size );
+      } else {
+	 fprintf( stderr, "Texture (%p) at 0x%x sz 0x%x\n",
+		  (void *)t,
+		  t->memBlock->ofs,
+		  t->memBlock->size );
+      }
+   }
+   foreach ( t, heap->swapped_objects ) {
+      if (!t->tObj) {
+	 fprintf( stderr, "Swapped Placeholder (%p)\n", (void *)t );
+      } else {
+	 fprintf( stderr, "Swapped Texture (%p)\n", (void *)t );
+      }
+   }
+
+   fprintf( stderr, "\n" );
+}
+
+/**
+ * Print out debugging information about the global texture LRU.
+ *
+ * \param heap Texture heap to be printed
+ * \param callername Name of calling function
+ */
+static void printGlobalLRU( driTexHeap * heap, const char *callername )
+{
+   drmTextureRegionPtr list = heap->global_regions;
+   unsigned int i, j;
+
+   fprintf( stderr, "%s in %s:\nGlobal LRU, heap %d list %p:\n", 
+	    __FUNCTION__, callername, heap->heapId, (void *)list );
+
+   for ( i = 0, j = heap->nrRegions ; i < heap->nrRegions ; i++ ) {
+      fprintf( stderr, "list[%d] age %d next %d prev %d in_use %d\n",
+	       j, list[j].age, list[j].next, list[j].prev, list[j].in_use );
+      j = list[j].next;
+      if ( j == heap->nrRegions ) break;
+   }
+
+   if ( j != heap->nrRegions ) {
+      fprintf( stderr, "Loop detected in global LRU\n" );
+      for ( i = 0 ; i < heap->nrRegions ; i++ ) {
+	 fprintf( stderr, "list[%d] age %d next %d prev %d in_use %d\n",
+		  i, list[i].age, list[i].next, list[i].prev, list[i].in_use );
+      }
+   }
+
+   fprintf( stderr, "\n" );
+}
+
+
+/**
+ * Called by the client whenever it touches a local texture.
+ * 
+ * \param t Texture object that the client has accessed
+ */
+
+void driUpdateTextureLRU( driTextureObject * t )
+{
+   driTexHeap   * heap;
+   drmTextureRegionPtr list;
+   unsigned   shift;
+   unsigned   start;
+   unsigned   end;
+   unsigned   i;
+
+
+   heap = t->heap;
+   if ( heap != NULL ) {
+      shift = heap->logGranularity;
+      start = t->memBlock->ofs >> shift;
+      end = (t->memBlock->ofs + t->memBlock->size - 1) >> shift;
+
+
+      heap->local_age = ++heap->global_age[0];
+      list = heap->global_regions;
+
+
+      /* Update the context's local LRU 
+       */
+
+      move_to_head( & heap->texture_objects, t );
+
+
+      for (i = start ; i <= end ; i++) {
+	 list[i].age = heap->local_age;
+
+	 /* remove_from_list(i)
+	  */
+	 list[(unsigned)list[i].next].prev = list[i].prev;
+	 list[(unsigned)list[i].prev].next = list[i].next;
+
+	 /* insert_at_head(list, i)
+	  */
+	 list[i].prev = heap->nrRegions;
+	 list[i].next = list[heap->nrRegions].next;
+	 list[(unsigned)list[heap->nrRegions].next].prev = i;
+	 list[heap->nrRegions].next = i;
+      }
+
+      if ( 0 ) {
+	 printGlobalLRU( heap, __FUNCTION__ );
+	 printLocalLRU( heap, __FUNCTION__ );
+      }
+   }
+}
+
+
+
+
+/**
+ * Keep track of swapped out texture objects.
+ * 
+ * \param t Texture object to be "swapped" out of its texture heap
+ */
+
+void driSwapOutTextureObject( driTextureObject * t )
+{
+   unsigned   face;
+
+
+   if ( t->memBlock != NULL ) {
+      assert( t->heap != NULL );
+      mmFreeMem( t->memBlock );
+      t->memBlock = NULL;
+
+      if (t->timestamp > t->heap->timestamp)
+	 t->heap->timestamp = t->timestamp;
+
+      t->heap->texture_swaps[0]++;
+      move_to_tail( t->heap->swapped_objects, t );
+      t->heap = NULL;
+   }
+   else {
+      assert( t->heap == NULL );
+   }
+
+
+   for ( face = 0 ; face < 6 ; face++ ) {
+      t->dirty_images[face] = ~0;
+   }
+}
+
+
+
+
+/**
+ * Destroy hardware state associated with texture \a t.  Calls the
+ * \a destroy_texture_object method associated with the heap from which
+ * \a t was allocated.
+ * 
+ * \param t Texture object to be destroyed
+ */
+
+void driDestroyTextureObject( driTextureObject * t )
+{
+   driTexHeap * heap;
+
+
+   if ( 0 ) {
+      fprintf( stderr, "[%s:%d] freeing %p (tObj = %p, DriverData = %p)\n",
+	       __FILE__, __LINE__,
+	       (void *)t,
+	       (void *)((t != NULL) ? t->tObj : NULL),
+	       (void *)((t != NULL && t->tObj != NULL) ? t->tObj->DriverData : NULL ));
+   }
+
+   if ( t != NULL ) {
+      if ( t->memBlock ) {
+	 heap = t->heap;
+	 assert( heap != NULL );
+
+	 heap->texture_swaps[0]++;
+
+	 mmFreeMem( t->memBlock );
+	 t->memBlock = NULL;
+
+	 if (t->timestamp > t->heap->timestamp)
+	    t->heap->timestamp = t->timestamp;
+
+	 heap->destroy_texture_object( heap->driverContext, t );
+	 t->heap = NULL;
+      }
+
+      if ( t->tObj != NULL ) {
+	 assert( t->tObj->DriverData == t );
+	 t->tObj->DriverData = NULL;
+      }
+
+      remove_from_list( t );
+      FREE( t );
+   }
+
+   if ( 0 ) {
+      fprintf( stderr, "[%s:%d] done freeing %p\n", __FILE__, __LINE__, (void *)t );
+   }
+}
+
+
+
+
+/**
+ * Update the local heap's representation of texture memory based on
+ * data in the SAREA.  This is done each time it is detected that some other
+ * direct rendering client has held the lock.  This pertains to both our local
+ * textures and the textures belonging to other clients.  Keep track of other
+ * client's textures by pushing a placeholder texture onto the LRU list --
+ * these are denoted by \a tObj being \a NULL.
+ * 
+ * \param heap Heap whose state is to be updated
+ * \param offset Byte offset in the heap that has been stolen
+ * \param size Size, in bytes, of the stolen block
+ * \param in_use Non-zero if the block is pinned/reserved by the kernel
+ */
+
+static void driTexturesGone( driTexHeap * heap, int offset, int size, 
+			     int in_use )
+{
+   driTextureObject * t;
+   driTextureObject * tmp;
+
+
+   foreach_s ( t, tmp, & heap->texture_objects ) {
+      if ( (t->memBlock->ofs < (offset + size))
+	   && ((t->memBlock->ofs + t->memBlock->size) > offset) ) {
+	 /* It overlaps - kick it out.  If the texture object is just a
+	  * place holder, then destroy it all together.  Otherwise, mark
+	  * it as being swapped out.
+	  */
+
+	 if ( t->tObj != NULL ) {
+	    driSwapOutTextureObject( t );
+	 }
+	 else {
+	    driDestroyTextureObject( t );
+	 }
+      }
+   }
+
+
+   {
+      t = (driTextureObject *) CALLOC( heap->texture_object_size );
+      if ( t == NULL ) return;
+
+      t->memBlock = mmAllocMem( heap->memory_heap, size, 0, offset );
+      if ( t->memBlock == NULL ) {
+	 fprintf( stderr, "Couldn't alloc placeholder: heap %u sz %x ofs %x\n", heap->heapId,
+		  (int)size, (int)offset );
+	 mmDumpMemInfo( heap->memory_heap );
+	 FREE(t);
+	 return;
+      }
+      t->heap = heap;
+      if (in_use) 
+	 t->reserved = 1; 
+      insert_at_head( & heap->texture_objects, t );
+   }
+}
+
+
+
+
+/**
+ * Called by the client on lock contention to determine whether textures have
+ * been stolen.  If another client has modified a region in which we have
+ * textures, then we need to figure out which of our textures have been
+ * removed and update our global LRU.
+ * 
+ * \param heap Texture heap to be updated
+ */
+
+void driAgeTextures( driTexHeap * heap )
+{
+   drmTextureRegionPtr list = heap->global_regions;
+   unsigned       sz = 1U << (heap->logGranularity);
+   unsigned       i, nr = 0;
+
+
+   /* Have to go right round from the back to ensure stuff ends up
+    * LRU in the local list...  Fix with a cursor pointer.
+    */
+
+   for (i = list[heap->nrRegions].prev ; 
+	i != heap->nrRegions && nr < heap->nrRegions ; 
+	i = list[i].prev, nr++) {
+      /* If switching texturing schemes, then the SAREA might not have been
+       * properly cleared, so we need to reset the global texture LRU.
+       */
+
+      if ( (i * sz) > heap->size ) {
+	 nr = heap->nrRegions;
+	 break;
+      }
+
+      if (list[i].age > heap->local_age) 
+	  driTexturesGone( heap, i * sz, sz, list[i].in_use); 
+   }
+
+   /* Loop or uninitialized heap detected.  Reset.
+    */
+
+   if (nr == heap->nrRegions) {
+      driTexturesGone( heap, 0, heap->size, 0);
+      resetGlobalLRU( heap );
+   }
+
+   if ( 0 ) {
+      printGlobalLRU( heap, __FUNCTION__ );
+      printLocalLRU( heap, __FUNCTION__ );
+   }
+
+   heap->local_age = heap->global_age[0];
+}
+
+
+
+
+#define INDEX_ARRAY_SIZE 6 /* I'm not aware of driver with more than 2 heaps */
+
+/**
+ * Allocate memory from a texture heap to hold a texture object.  This
+ * routine will attempt to allocate memory for the texture from the heaps
+ * specified by \c heap_array in order.  That is, first it will try to
+ * allocate from \c heap_array[0], then \c heap_array[1], and so on.
+ *
+ * \param heap_array Array of pointers to texture heaps to use
+ * \param nr_heaps Number of heap pointer in \a heap_array
+ * \param t Texture object for which space is needed
+ * \return The ID of the heap from which memory was allocated, or -1 if
+ *         memory could not be allocated.
+ *
+ * \bug The replacement policy implemented by this function is horrible.
+ */
+
+
+int
+driAllocateTexture( driTexHeap * const * heap_array, unsigned nr_heaps,
+		    driTextureObject * t )
+{
+   driTexHeap       * heap;
+   driTextureObject * temp;
+   driTextureObject * cursor;
+   unsigned           id;
+
+
+   /* In case it already has texture space, initialize heap.  This also
+    * prevents GCC from issuing a warning that heap might be used
+    * uninitialized.
+    */
+
+   heap = t->heap;
+
+
+   /* Run through each of the existing heaps and try to allocate a buffer
+    * to hold the texture.
+    */
+
+   for ( id = 0 ; (t->memBlock == NULL) && (id < nr_heaps) ; id++ ) {
+      heap = heap_array[ id ];
+      if ( heap != NULL ) {
+	 t->memBlock = mmAllocMem( heap->memory_heap, t->totalSize, 
+				   heap->alignmentShift, 0 );
+      }
+   }
+
+
+   /* Kick textures out until the requested texture fits.
+    */
+
+   if ( t->memBlock == NULL ) {
+      unsigned index[INDEX_ARRAY_SIZE];
+      unsigned nrGoodHeaps = 0;
+
+      /* Trying to avoid dynamic memory allocation. If you have more
+       * heaps, increase INDEX_ARRAY_SIZE. I'm not aware of any
+       * drivers with more than 2 tex heaps. */
+      assert( nr_heaps < INDEX_ARRAY_SIZE );
+
+      /* Sort large enough heaps by duty. Insertion sort should be
+       * fast enough for such a short array. */
+      for ( id = 0 ; id < nr_heaps ; id++ ) {
+	 heap = heap_array[ id ];
+
+	 if ( heap != NULL && t->totalSize <= heap->size ) {
+	    unsigned j;
+
+	    for ( j = 0 ; j < nrGoodHeaps; j++ ) {
+	       if ( heap->duty > heap_array[ index[ j ] ]->duty )
+		  break;
+	    }
+
+	    if ( j < nrGoodHeaps ) {
+	       memmove( &index[ j+1 ], &index[ j ],
+			sizeof(index[ 0 ]) * (nrGoodHeaps - j) );
+	    }
+
+	    index[ j ] = id;
+
+	    nrGoodHeaps++;
+	 }
+      }
+
+      for ( id = 0 ; (t->memBlock == NULL) && (id < nrGoodHeaps) ; id++ ) {
+	 heap = heap_array[ index[ id ] ];
+
+	 for ( cursor = heap->texture_objects.prev, temp = cursor->prev;
+	       cursor != &heap->texture_objects ; 
+	       cursor = temp, temp = cursor->prev ) {
+	       
+	    /* The the LRU element.  If the texture is bound to one of
+	     * the texture units, then we cannot kick it out.
+	     */
+	    if ( cursor->bound || cursor->reserved ) {
+	       continue;
+	    }
+
+	    if ( cursor->memBlock )
+	       heap->duty -= cursor->memBlock->size;
+
+	    /* If this is a placeholder, there's no need to keep it */
+	    if (cursor->tObj)
+	       driSwapOutTextureObject( cursor );
+	    else
+	       driDestroyTextureObject( cursor );
+
+	    t->memBlock = mmAllocMem( heap->memory_heap, t->totalSize, 
+				      heap->alignmentShift, 0 );
+
+	    if (t->memBlock)
+	       break;
+	 }
+      }
+
+      /* Rebalance duties. If a heap kicked more data than its duty,
+       * then all other heaps get that amount multiplied with their
+       * relative weight added to their duty. The negative duty is
+       * reset to 0. In the end all heaps have a duty >= 0.
+       *
+       * CAUTION: we must not change the heap pointer here, because it
+       * is used below to update the texture object.
+       */
+      for ( id = 0 ; id < nr_heaps ; id++ )
+	 if ( heap_array[ id ] != NULL && heap_array[ id ]->duty < 0) {
+	    int duty = -heap_array[ id ]->duty;
+	    double weight = heap_array[ id ]->weight;
+	    unsigned j;
+
+	    for ( j = 0 ; j < nr_heaps ; j++ )
+	       if ( j != id && heap_array[ j ] != NULL ) {
+		  heap_array[ j ]->duty += (double) duty *
+		     heap_array[ j ]->weight / weight;
+	       }
+
+	    heap_array[ id ]->duty = 0;
+	 }
+   }
+
+
+   if ( t->memBlock != NULL ) {
+      /* id and heap->heapId may or may not be the same value here.
+       */
+
+      assert( heap != NULL );
+      assert( (t->heap == NULL) || (t->heap == heap) );
+
+      t->heap = heap;
+      return heap->heapId;
+   }
+   else {
+      assert( t->heap == NULL );
+
+      fprintf( stderr, "[%s:%d] unable to allocate texture\n",
+	       __FUNCTION__, __LINE__ );
+      return -1;
+   }
+}
+
+
+
+
+
+
+/**
+ * Set the location where the texture-swap counter is stored.
+ */
+
+void
+driSetTextureSwapCounterLocation( driTexHeap * heap, unsigned * counter )
+{
+   heap->texture_swaps = (counter == NULL) ? & dummy_swap_counter : counter;
+}
+
+
+
+
+/**
+ * Create a new heap for texture data.
+ * 
+ * \param heap_id             Device-dependent heap identifier.  This value
+ *                            will returned by driAllocateTexture when memory
+ *                            is allocated from this heap.
+ * \param context             Device-dependent driver context.  This is
+ *                            supplied as the first parameter to the
+ *                            \c destroy_tex_obj function.
+ * \param size                Size, in bytes, of the texture region
+ * \param alignmentShift      Alignment requirement for textures.  If textures 
+ *                            must be allocated on a 4096 byte boundry, this
+ *                            would be 12.
+ * \param nr_regions          Number of regions into which this texture space
+ *                            should be partitioned
+ * \param global_regions      Array of \c drmTextureRegion structures in the SAREA
+ * \param global_age          Pointer to the global texture age in the SAREA
+ * \param swapped_objects     Pointer to the list of texture objects that are
+ *                            not in texture memory (i.e., have been swapped
+ *                            out).
+ * \param texture_object_size Size, in bytes, of a device-dependent texture
+ *                            object
+ * \param destroy_tex_obj     Function used to destroy a device-dependent
+ *                            texture object
+ *
+ * \sa driDestroyTextureHeap
+ */
+
+driTexHeap *
+driCreateTextureHeap( unsigned heap_id, void * context, unsigned size,
+		      unsigned alignmentShift, unsigned nr_regions,
+		      drmTextureRegionPtr global_regions, unsigned * global_age,
+		      driTextureObject * swapped_objects, 
+		      unsigned texture_object_size,
+		      destroy_texture_object_t * destroy_tex_obj
+		    )
+{
+   driTexHeap * heap;
+   unsigned     l;
+    
+    
+   if ( 0 )
+       fprintf( stderr, "%s( %u, %p, %u, %u, %u )\n",
+		__FUNCTION__,
+		heap_id, (void *)context, size, alignmentShift, nr_regions );
+
+   heap = (driTexHeap *) CALLOC( sizeof( driTexHeap ) );
+   if ( heap != NULL ) {
+      l = driLog2( (size - 1) / nr_regions );
+      if ( l < alignmentShift )
+      {
+	 l = alignmentShift;
+      }
+
+      heap->logGranularity = l;
+      heap->size = size & ~((1L << l) - 1);
+
+      heap->memory_heap = mmInit( 0, heap->size );
+      if ( heap->memory_heap != NULL ) {
+	 heap->heapId = heap_id;
+	 heap->driverContext = context;
+
+	 heap->alignmentShift = alignmentShift;
+	 heap->nrRegions = nr_regions;
+	 heap->global_regions = global_regions;
+	 heap->global_age = global_age;
+	 heap->swapped_objects = swapped_objects;
+	 heap->texture_object_size = texture_object_size;
+	 heap->destroy_texture_object = destroy_tex_obj;
+
+	 /* Force global heap init */
+	 if (heap->global_age[0] == 0)
+	     heap->local_age = ~0;
+	 else
+	     heap->local_age = 0;
+
+	 make_empty_list( & heap->texture_objects );
+	 driSetTextureSwapCounterLocation( heap, NULL );
+
+	 heap->weight = heap->size;
+	 heap->duty = 0;
+      }
+      else {
+	 FREE( heap );
+	 heap = NULL;
+      }
+   }
+
+
+   if ( 0 )
+       fprintf( stderr, "%s returning %p\n", __FUNCTION__, (void *)heap );
+
+   return heap;
+}
+
+
+
+
+/** Destroys a texture heap
+ * 
+ * \param heap Texture heap to be destroyed
+ */
+
+void
+driDestroyTextureHeap( driTexHeap * heap )
+{
+   driTextureObject * t;
+   driTextureObject * temp;
+
+
+   if ( heap != NULL ) {
+      foreach_s( t, temp, & heap->texture_objects ) {
+	 driDestroyTextureObject( t );
+      }
+      foreach_s( t, temp, heap->swapped_objects ) {
+	 driDestroyTextureObject( t );
+      }
+
+      mmDestroy( heap->memory_heap );
+      FREE( heap );
+   }
+}
+
+
+
+
+/****************************************************************************/
+/**
+ * Determine how many texels (including all mipmap levels) would be required
+ * for a texture map of size \f$2^^\c base_size_log2\f$ would require.
+ *
+ * \param base_size_log2 \f$log_2\f$ of the size of a side of the texture
+ * \param dimensions Number of dimensions of the texture.  Either 2 or 3.
+ * \param faces Number of faces of the texture.  Either 1 or 6 (for cube maps).
+ * \return Number of texels
+ */
+
+static unsigned
+texels_this_map_size( int base_size_log2, unsigned dimensions, unsigned faces )
+{
+   unsigned  texels;
+
+
+   assert( (faces == 1) || (faces == 6) );
+   assert( (dimensions == 2) || (dimensions == 3) );
+
+   texels = 0;
+   if ( base_size_log2 >= 0 ) {
+      texels = (1U << (dimensions * base_size_log2));
+
+      /* See http://www.mail-archive.com/dri-devel@lists.sourceforge.net/msg03636.html
+       * for the complete explaination of why this formulation is used.
+       * Basically, the smaller mipmap levels sum to 0.333 the size of the
+       * level 0 map.  The total size is therefore the size of the map
+       * multipled by 1.333.  The +2 is there to round up.
+       */
+
+      texels = (texels * 4 * faces + 2) / 3;
+   }
+
+   return texels;
+}
+
+
+
+
+struct maps_per_heap {
+   unsigned  c[32];
+};
+
+static void
+fill_in_maximums( driTexHeap * const * heaps, unsigned nr_heaps,
+		  unsigned max_bytes_per_texel, unsigned max_size,
+		  unsigned mipmaps_at_once, unsigned dimensions,
+		  unsigned faces, struct maps_per_heap * max_textures )
+{
+   unsigned   heap;
+   unsigned   log2_size;
+   unsigned   mask;
+
+
+   /* Determine how many textures of each size can be stored in each
+    * texture heap.
+    */
+
+   for ( heap = 0 ; heap < nr_heaps ; heap++ ) {
+      if ( heaps[ heap ] == NULL ) {
+	 (void) memset( max_textures[ heap ].c, 0, 
+			sizeof( max_textures[ heap ].c ) );
+	 continue;
+      }
+
+      mask = (1U << heaps[ heap ]->logGranularity) - 1;
+
+      if ( 0 ) {
+	 fprintf( stderr, "[%s:%d] heap[%u] = %u bytes, mask = 0x%08x\n",
+		  __FILE__, __LINE__,
+		  heap, heaps[ heap ]->size, mask );
+      }
+
+      for ( log2_size = max_size ; log2_size > 0 ; log2_size-- ) {
+	 unsigned   total;
+
+
+	 /* Determine the total number of bytes required by a texture of
+	  * size log2_size.
+	  */
+
+	 total = texels_this_map_size( log2_size, dimensions, faces )
+	     - texels_this_map_size( log2_size - mipmaps_at_once,
+				     dimensions, faces );
+	 total *= max_bytes_per_texel;
+	 total = (total + mask) & ~mask;
+
+	 /* The number of textures of a given size that will fit in a heap
+	  * is equal to the size of the heap divided by the size of the
+	  * texture.
+	  */
+
+	 max_textures[ heap ].c[ log2_size ] = heaps[ heap ]->size / total;
+
+	 if ( 0 ) {
+	    fprintf( stderr, "[%s:%d] max_textures[%u].c[%02u] "
+		     "= 0x%08x / 0x%08x "
+		     "= %u (%u)\n",
+		     __FILE__, __LINE__,
+		     heap, log2_size,
+		     heaps[ heap ]->size, total,
+		     heaps[ heap ]->size / total,
+		     max_textures[ heap ].c[ log2_size ] );
+	 }
+      }
+   }
+}
+
+
+static unsigned
+get_max_size( unsigned nr_heaps,
+	      unsigned texture_units,
+	      unsigned max_size,
+	      int all_textures_one_heap,
+	      struct maps_per_heap * max_textures )
+{
+   unsigned   heap;
+   unsigned   log2_size;
+
+
+   /* Determine the largest texture size such that a texture of that size
+    * can be bound to each texture unit at the same time.  Some hardware
+    * may require that all textures be in the same texture heap for
+    * multitexturing.
+    */
+
+   for ( log2_size = max_size ; log2_size > 0 ; log2_size-- ) {
+      unsigned   total = 0;
+
+      for ( heap = 0 ; heap < nr_heaps ; heap++ )
+      {
+	 total += max_textures[ heap ].c[ log2_size ];
+
+	 if ( 0 ) {
+	    fprintf( stderr, "[%s:%d] max_textures[%u].c[%02u] = %u, "
+		     "total = %u\n", __FILE__, __LINE__, heap, log2_size,
+		     max_textures[ heap ].c[ log2_size ], total );
+	 }
+
+	 if ( (max_textures[ heap ].c[ log2_size ] >= texture_units)
+	      || (!all_textures_one_heap && (total >= texture_units)) ) {
+	    /* The number of mipmap levels is the log-base-2 of the
+	     * maximum texture size plus 1.  If the maximum texture size
+	     * is 1x1, the log-base-2 is 0 and 1 mipmap level (the base
+	     * level) is available.
+	     */
+
+	    return log2_size + 1;
+	 }
+      }
+   }
+
+   /* This should NEVER happen.  It should always be possible to have at
+    * *least* a 1x1 texture in memory!
+    */
+   assert( log2_size != 0 );
+   return 0;
+}
+
+#define SET_MAX(f,v) \
+    do { if ( max_sizes[v] != 0 ) { limits-> f = max_sizes[v]; } } while( 0 )
+
+#define SET_MAX_RECT(f,v) \
+    do { if ( max_sizes[v] != 0 ) { limits-> f = 1 << (max_sizes[v] - 1); } } while( 0 )
+
+
+/**
+ * Given the amount of texture memory, the number of texture units, and the
+ * maximum size of a texel, calculate the maximum texture size the driver can
+ * advertise.
+ * 
+ * \param heaps Texture heaps for this card
+ * \param nr_heap Number of texture heaps
+ * \param limits OpenGL contants.  MaxTextureUnits must be set.
+ * \param max_bytes_per_texel Maximum size of a single texel, in bytes
+ * \param max_2D_size \f$\log_2\f$ of the maximum 2D texture size (i.e.,
+ *     1024x1024 textures, this would be 10)
+ * \param max_3D_size \f$\log_2\f$ of the maximum 3D texture size (i.e.,
+ *     1024x1024x1024 textures, this would be 10)
+ * \param max_cube_size \f$\log_2\f$ of the maximum cube texture size (i.e.,
+ *     1024x1024 textures, this would be 10)
+ * \param max_rect_size \f$\log_2\f$ of the maximum texture rectangle size
+ *     (i.e., 1024x1024 textures, this would be 10).  This is a power-of-2
+ *     even though texture rectangles need not be a power-of-2.
+ * \param mipmaps_at_once Total number of mipmaps that can be used
+ *     at one time.  For most hardware this will be \f$\c max_size + 1\f$.
+ *     For hardware that does not support mipmapping, this will be 1.
+ * \param all_textures_one_heap True if the hardware requires that all
+ *     textures be in a single texture heap for multitexturing.
+ * \param allow_larger_textures 0 conservative, 1 calculate limits
+ *     so at least one worst-case texture can fit, 2 just use hw limits.
+ */
+
+void
+driCalculateMaxTextureLevels( driTexHeap * const * heaps,
+			      unsigned nr_heaps,
+			      struct gl_constants * limits,
+			      unsigned max_bytes_per_texel,
+			      unsigned max_2D_size,
+			      unsigned max_3D_size,
+			      unsigned max_cube_size,
+			      unsigned max_rect_size,
+			      unsigned mipmaps_at_once,
+			      int all_textures_one_heap,
+			      int allow_larger_textures )
+{
+   struct maps_per_heap  max_textures[8];
+   unsigned         i;
+   const unsigned   dimensions[4] = { 2, 3, 2, 2 };
+   const unsigned   faces[4]      = { 1, 1, 6, 1 };
+   unsigned         max_sizes[4];
+   unsigned         mipmaps[4];
+
+
+   max_sizes[0] = max_2D_size;
+   max_sizes[1] = max_3D_size;
+   max_sizes[2] = max_cube_size;
+   max_sizes[3] = max_rect_size;
+
+   mipmaps[0] = mipmaps_at_once;
+   mipmaps[1] = mipmaps_at_once;
+   mipmaps[2] = mipmaps_at_once;
+   mipmaps[3] = 1;
+
+
+   /* Calculate the maximum number of texture levels in two passes.  The
+    * first pass determines how many textures of each power-of-two size
+    * (including all mipmap levels for that size) can fit in each texture
+    * heap.  The second pass finds the largest texture size that allows
+    * a texture of that size to be bound to every texture unit.
+    */
+
+   for ( i = 0 ; i < 4 ; i++ ) {
+      if ( (allow_larger_textures != 2) && (max_sizes[ i ] != 0) ) {
+	 fill_in_maximums( heaps, nr_heaps, max_bytes_per_texel,
+			   max_sizes[ i ], mipmaps[ i ],
+			   dimensions[ i ], faces[ i ],
+			   max_textures );
+
+	 max_sizes[ i ] = get_max_size( nr_heaps,
+					allow_larger_textures == 1 ?
+					1 : limits->MaxTextureUnits,
+					max_sizes[ i ],
+					all_textures_one_heap,
+					max_textures );
+      }
+      else if (max_sizes[ i ] != 0) {
+	 max_sizes[ i ] += 1;
+      }
+   }
+
+   SET_MAX( MaxTextureLevels,        0 );
+   SET_MAX( Max3DTextureLevels,      1 );
+   SET_MAX( MaxCubeTextureLevels,    2 );
+   SET_MAX_RECT( MaxTextureRectSize, 3 );
+}
+
+
+
+
+/**
+ * Perform initial binding of default textures objects on a per unit, per
+ * texture target basis.
+ *
+ * \param ctx Current OpenGL context
+ * \param swapped List of swapped-out textures
+ * \param targets Bit-mask of value texture targets
+ */
+
+void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped,
+			    GLuint targets )
+{
+   struct gl_texture_object *texObj;
+   GLuint tmp = ctx->Texture.CurrentUnit;
+   unsigned   i;
+
+
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      ctx->Texture.CurrentUnit = i;
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_1D) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_1D_INDEX];
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_1D, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_2D) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_2D_INDEX];
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_2D, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_3D) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_3D_INDEX];
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_3D, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_CUBE) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_CUBE_INDEX];
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_CUBE_MAP_ARB, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+
+      if ( (targets & DRI_TEXMGR_DO_TEXTURE_RECT) != 0 ) {
+	 texObj = ctx->Texture.Unit[i].CurrentTex[TEXTURE_RECT_INDEX];
+	 ctx->Driver.BindTexture( ctx, GL_TEXTURE_RECTANGLE_NV, texObj );
+	 move_to_tail( swapped, (driTextureObject *) texObj->DriverData );
+      }
+   }
+
+   ctx->Texture.CurrentUnit = tmp;
+}
+
+
+
+
+/**
+ * Verify that the specified texture is in the specificed heap.
+ * 
+ * \param tex   Texture to be tested.
+ * \param heap  Texture memory heap to be tested.
+ * \return True if the texture is in the heap, false otherwise.
+ */
+
+static GLboolean
+check_in_heap( const driTextureObject * tex, const driTexHeap * heap )
+{
+#if 1
+   return tex->heap == heap;
+#else
+   driTextureObject * curr;
+
+   foreach( curr, & heap->texture_objects ) {
+      if ( curr == tex ) {
+	 break;
+      }
+   }
+
+   return curr == tex;
+#endif
+}
+
+
+
+/****************************************************************************/
+/**
+ * Validate the consistency of a set of texture heaps.
+ * Original version by Keith Whitwell in r200/r200_sanity.c.
+ */
+
+GLboolean
+driValidateTextureHeaps( driTexHeap * const * texture_heaps,
+			 unsigned nr_heaps, const driTextureObject * swapped )
+{
+   driTextureObject *t;
+   unsigned  i;
+
+   for ( i = 0 ; i < nr_heaps ; i++ ) {
+      int last_end = 0;
+      unsigned textures_in_heap = 0;
+      unsigned blocks_in_mempool = 0;
+      const driTexHeap * heap = texture_heaps[i];
+      const struct mem_block *p = heap->memory_heap;
+
+      /* Check each texture object has a MemBlock, and is linked into
+       * the correct heap.  
+       *
+       * Check the texobj base address corresponds to the MemBlock
+       * range.  Check the texobj size (recalculate?) fits within
+       * the MemBlock.
+       *
+       * Count the number of texobj's using this heap.
+       */
+
+      foreach ( t, &heap->texture_objects ) {
+	 if ( !check_in_heap( t, heap ) ) {
+	    fprintf( stderr, "%s memory block for texture object @ %p not "
+		     "found in heap #%d\n",
+		     __FUNCTION__, (void *)t, i );
+	    return GL_FALSE;
+	 }
+
+
+	 if ( t->totalSize > t->memBlock->size ) {
+	    fprintf( stderr, "%s: Memory block for texture object @ %p is "
+		     "only %u bytes, but %u are required\n",
+		     __FUNCTION__, (void *)t, t->totalSize, t->memBlock->size );
+	    return GL_FALSE;
+	 }
+
+	 textures_in_heap++;
+      }
+
+      /* Validate the contents of the heap:
+       *   - Ordering
+       *   - Overlaps
+       *   - Bounds
+       */
+
+      while ( p != NULL ) {
+	 if (p->reserved) {
+	    fprintf( stderr, "%s: Block (%08x,%x), is reserved?!\n",
+		     __FUNCTION__, p->ofs, p->size );
+	    return GL_FALSE;
+	 }
+
+	 if (p->ofs != last_end) {
+	    fprintf( stderr, "%s: blocks_in_mempool = %d, last_end = %d, p->ofs = %d\n",
+		     __FUNCTION__, blocks_in_mempool, last_end, p->ofs );
+	    return GL_FALSE;
+	 }
+
+	 if (!p->reserved && !p->free) {
+	    blocks_in_mempool++;
+	 }
+
+	 last_end = p->ofs + p->size;
+	 p = p->next;
+      }
+
+      if (textures_in_heap != blocks_in_mempool) {
+	 fprintf( stderr, "%s: Different number of textures objects (%u) and "
+		  "inuse memory blocks (%u)\n", 
+		  __FUNCTION__, textures_in_heap, blocks_in_mempool );
+	 return GL_FALSE;
+      }
+
+#if 0
+      fprintf( stderr, "%s: textures_in_heap = %u\n", 
+	       __FUNCTION__, textures_in_heap );
+#endif
+   }
+
+
+   /* Check swapped texobj's have zero memblocks
+    */
+   i = 0;
+   foreach ( t, swapped ) {
+      if ( t->memBlock != NULL ) {
+	 fprintf( stderr, "%s: Swapped texobj %p has non-NULL memblock %p\n",
+		  __FUNCTION__, (void *)t, (void *)t->memBlock );
+	 return GL_FALSE;
+      }
+      i++;
+   }
+
+#if 0
+   fprintf( stderr, "%s: swapped texture count = %u\n", __FUNCTION__, i );
+#endif
+
+   return GL_TRUE;
+}
+
+
+
+
+/****************************************************************************/
+/**
+ * Compute which mipmap levels that really need to be sent to the hardware.
+ * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+ * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+ */
+
+void
+driCalculateTextureFirstLastLevel( driTextureObject * t )
+{
+   struct gl_texture_object * const tObj = t->tObj;
+   const struct gl_texture_image * const baseImage =
+       tObj->Image[0][tObj->BaseLevel];
+
+   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
+    * and having firstLevel and lastLevel as signed prevents the need for
+    * extra sign checks.
+    */
+   int   firstLevel;
+   int   lastLevel;
+
+   /* Yes, this looks overly complicated, but it's all needed.
+    */
+
+   switch (tObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+   case GL_TEXTURE_CUBE_MAP:
+      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+          */
+
+         firstLevel = lastLevel = tObj->BaseLevel;
+      }
+      else {
+	 firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5);
+	 firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+	 firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
+	 lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+	 lastLevel = MAX2(lastLevel, t->tObj->BaseLevel);
+	 lastLevel = MIN2(lastLevel, t->tObj->BaseLevel + baseImage->MaxLog2);
+	 lastLevel = MIN2(lastLevel, t->tObj->MaxLevel);
+	 lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+      }
+      break;
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_4D_SGIS:
+      firstLevel = lastLevel = 0;
+      break;
+   default:
+      return;
+   }
+
+   /* save these values */
+   t->firstLevel = firstLevel;
+   t->lastLevel = lastLevel;
+}
+
+
+
+
+/**
+ * \name DRI texture formats.  These vars are initialized to either the
+ * big- or little-endian Mesa formats.
+ */
+/*@{*/
+gl_format _dri_texformat_rgba8888 = MESA_FORMAT_NONE;
+gl_format _dri_texformat_argb8888 = MESA_FORMAT_NONE;
+gl_format _dri_texformat_rgb565 = MESA_FORMAT_NONE;
+gl_format _dri_texformat_argb4444 = MESA_FORMAT_NONE;
+gl_format _dri_texformat_argb1555 = MESA_FORMAT_NONE;
+gl_format _dri_texformat_al88 = MESA_FORMAT_NONE;
+gl_format _dri_texformat_a8 = MESA_FORMAT_A8;
+gl_format _dri_texformat_ci8 = MESA_FORMAT_CI8;
+gl_format _dri_texformat_i8 = MESA_FORMAT_I8;
+gl_format _dri_texformat_l8 = MESA_FORMAT_L8;
+/*@}*/
+
+
+/**
+ * Initialize _dri_texformat_* vars according to whether we're on
+ * a big or little endian system.
+ */
+void
+driInitTextureFormats(void)
+{
+   if (_mesa_little_endian()) {
+      _dri_texformat_rgba8888	= MESA_FORMAT_RGBA8888;
+      _dri_texformat_argb8888	= MESA_FORMAT_ARGB8888;
+      _dri_texformat_rgb565	= MESA_FORMAT_RGB565;
+      _dri_texformat_argb4444	= MESA_FORMAT_ARGB4444;
+      _dri_texformat_argb1555	= MESA_FORMAT_ARGB1555;
+      _dri_texformat_al88	= MESA_FORMAT_AL88;
+   }
+   else {
+      _dri_texformat_rgba8888	= MESA_FORMAT_RGBA8888_REV;
+      _dri_texformat_argb8888	= MESA_FORMAT_ARGB8888_REV;
+      _dri_texformat_rgb565	= MESA_FORMAT_RGB565_REV;
+      _dri_texformat_argb4444	= MESA_FORMAT_ARGB4444_REV;
+      _dri_texformat_argb1555	= MESA_FORMAT_ARGB1555_REV;
+      _dri_texformat_al88	= MESA_FORMAT_AL88_REV;
+   }
+}
diff --git a/src/mesa/drivers/dri/common/texmem.h b/src/mesa/drivers/dri/common/texmem.h
new file mode 100644
index 0000000000..725ba2e119
--- /dev/null
+++ b/src/mesa/drivers/dri/common/texmem.h
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ *    Keith Whitwell <keithw@tungstengraphics.com>
+ *    Kevin E. Martin <kem@users.sourceforge.net>
+ *    Gareth Hughes <gareth@nvidia.com>
+ */
+
+/** \file texmem.h
+ * Public interface to the DRI texture memory management routines.
+ * 
+ * \sa texmem.c
+ */
+
+#ifndef DRI_TEXMEM_H
+#define DRI_TEXMEM_H
+
+#include "main/mtypes.h"
+#include "main/formats.h"
+#include "main/mm.h"
+#include "xf86drm.h"
+
+struct dri_tex_heap;
+typedef struct dri_tex_heap driTexHeap;
+
+struct dri_texture_object;
+typedef struct dri_texture_object driTextureObject;
+
+
+/**
+ * Base texture object type.  Each driver will extend this type with its own
+ * private data members.
+ */
+
+struct dri_texture_object {
+	struct dri_texture_object * next;
+	struct dri_texture_object * prev;
+
+	driTexHeap * heap;		/**< Texture heap currently stored in */
+	struct gl_texture_object * tObj;/**< Pointer to Mesa texture object
+					 * If NULL, this texture object is a
+					 * "placeholder" object representing
+					 * texture memory in use by another context.
+					 * A placeholder should have a heap and a memBlock.
+					 */
+	struct mem_block *memBlock;	/**< Memory block containing texture */
+
+        unsigned    reserved;	        /**< Cannot be swapped out by user contexts.  */
+
+	unsigned    bound;		/**< Bitmask indicating which tex units
+					 * this texture object is bound to.
+					 * Bit 0 = unit 0, Bit 1 = unit 1, etc
+					 */
+
+	unsigned    totalSize;		/**< Total size of the texture,
+					 * including all mipmap levels 
+					 */
+
+	unsigned    dirty_images[6];	/**< Flags for whether or not images
+					 * need to be uploaded to local or
+					 * AGP texture space.  One flag set
+					 * for each cube face for cubic
+					 * textures.  Bit zero corresponds to
+					 * the base-level, which may or may
+					 * not be the level zero mipmap.
+					 */
+
+        unsigned    timestamp;	        /**< Timestamp used to
+					 * synchronize with 3d engine
+					 * in hardware where textures
+					 * are uploaded directly to
+					 * the framebuffer.  
+					 */
+
+        unsigned    firstLevel;         /**< Image in \c tObj->Image[0] that
+					 * corresponds to the base-level of
+					 * this texture object.
+					 */
+
+        unsigned    lastLevel;          /**< Last image in \c tObj->Image[0] 
+					 * used by the
+					 * current LOD settings of
+					 * this texture object.  This
+					 * value must be greater than
+					 * or equal to \c firstLevel.
+					 */
+};
+
+
+typedef void (destroy_texture_object_t)( void * driverContext,
+				        driTextureObject * t );
+
+/**
+ * Client-private representation of texture memory state.
+ *
+ * Clients will place one or more of these structs in their driver
+ * context struct to manage one or more global texture heaps.
+ */
+
+struct dri_tex_heap {
+
+	/** Client-supplied heap identifier 
+	 */
+	unsigned heapId;	
+
+	/** Pointer to the client's private context 
+	 */
+	void *driverContext;
+
+	/** Total size of the heap, in bytes
+	 */
+	unsigned size;
+
+	/** \brief \f$log_2\f$ of size of single heap region
+	 *
+	 * Each context takes memory from the global texture heap in
+	 * \f$2^{logGranularity}\f$ byte blocks.  The value of
+	 * \a logGranularity is based on the amount of memory represented
+	 * by the heap and the maximum number of regions in the SAREA.  Given
+	 * \a b bytes of texture memory an \a n regions in the SAREA,
+	 * \a logGranularity will be \f$\lfloor\log_2( b / n )\rfloor\f$.
+	 */
+	unsigned logGranularity;
+
+	/** \brief Required alignment of allocations in this heap
+	 * 
+	 * The alignment shift is supplied to \a mmAllocMem when memory is
+	 * allocated from this heap.  The value of \a alignmentShift will
+	 * typically reflect some require of the hardware.  This value has
+	 * \b no \b relation to \a logGranularity.  \a alignmentShift is a
+	 * per-context value.
+	 *
+	 * \sa mmAllocMem
+	 */
+	unsigned alignmentShift;
+
+	/** Number of elements in global list (the SAREA).
+	 */
+	unsigned nrRegions;	 
+
+	/** Pointer to SAREA \a driTexRegion array
+	 */
+	drmTextureRegionPtr global_regions;
+
+	/** Pointer to the texture state age (generation number) in the SAREA
+	 */
+	unsigned     * global_age;
+
+	/** Local age (generation number) of texture state
+	 */
+	unsigned local_age;
+
+	/** Memory heap used to manage texture memory represented by
+	 * this texture heap.
+	 */
+	struct mem_block * memory_heap;
+
+	/** List of objects that we currently believe to be in texture
+	 * memory.
+	 */
+	driTextureObject     texture_objects;
+    
+	/** Pointer to the list of texture objects that are not in
+	 * texture memory.
+	 */
+	driTextureObject   * swapped_objects;
+
+	/** Size of the driver-speicific texture object.
+	 */
+	unsigned       texture_object_size;
+
+
+	/**
+	 * \brief Function to destroy driver-specific texture object data.
+	 * 
+	 * This function is supplied by the driver so that the texture manager
+	 * can release all resources associated with a texture object.  This
+	 * function should only release driver-specific data.  That is,
+	 * \a driDestroyTextureObject will release the texture memory
+	 * associated with the texture object, it will release the memory
+	 * for the texture object itself, and it will unlink the texture
+	 * object from the texture object lists.
+	 *
+	 * \param driverContext Pointer to the driver supplied context
+	 * \param t Texture object that is to be destroyed
+	 * \sa driDestroyTextureObject
+	 */
+
+	destroy_texture_object_t * destroy_texture_object;
+
+
+	/**
+	 */
+	unsigned * texture_swaps;
+
+        /**
+	 * Timestamp used to synchronize with 3d engine in hardware
+	 * where textures are uploaded directly to the
+	 * framebuffer.  
+	 */
+        unsigned timestamp;
+
+	/** \brief Kick/upload weight
+	 *
+	 * When not enough free space is available this weight
+	 * influences the choice of the heap from which textures are
+	 * kicked. By default the weight is equal to the heap size.
+	 */
+	double weight;
+
+	/** \brief Kick/upload duty
+	 *
+	 * The heap with the highest duty will be chosen for kicking
+	 * textures if not enough free space is available. The duty is
+	 * reduced by the amount of data kicked. Rebalancing of
+	 * negative duties takes the weights into account.
+	 */
+	int duty;
+};
+
+
+
+
+/**
+ * Called by the client on lock contention to determine whether textures have
+ * been stolen.  If another client has modified a region in which we have
+ * textures, then we need to figure out which of our textures have been
+ * removed and update our global LRU.
+ * 
+ * \param heap Texture heap to be updated
+ * \hideinitializer
+ */
+
+#define DRI_AGE_TEXTURES( heap )				\
+   do {								\
+       if ( ((heap) != NULL)					\
+	    && ((heap)->local_age != (heap)->global_age[0]) )	\
+	   driAgeTextures( heap );				\
+   } while( 0 )
+
+
+
+
+/* This should be called whenever there has been contention on the hardware
+ * lock.  driAgeTextures should not be called directly.  Instead, clients
+ * should use DRI_AGE_TEXTURES, above.
+ */
+
+void driAgeTextures( driTexHeap * heap );
+
+void driUpdateTextureLRU( driTextureObject * t );
+void driSwapOutTextureObject( driTextureObject * t );
+void driDestroyTextureObject( driTextureObject * t );
+int driAllocateTexture( driTexHeap * const * heap_array, unsigned nr_heaps,
+    driTextureObject * t );
+
+GLboolean driIsTextureResident( GLcontext * ctx, 
+    struct gl_texture_object * texObj );
+
+driTexHeap * driCreateTextureHeap( unsigned heap_id, void * context,
+    unsigned size, unsigned alignmentShift, unsigned nr_regions,
+    drmTextureRegionPtr global_regions, unsigned * global_age,
+    driTextureObject * swapped_objects, unsigned texture_object_size,
+    destroy_texture_object_t * destroy_tex_obj );
+void driDestroyTextureHeap( driTexHeap * heap );
+
+void
+driCalculateMaxTextureLevels( driTexHeap * const * heaps,
+			      unsigned nr_heaps,
+			      struct gl_constants * limits,
+			      unsigned max_bytes_per_texel,
+			      unsigned max_2D_size,
+			      unsigned max_3D_size,
+			      unsigned max_cube_size,
+			      unsigned max_rect_size,
+			      unsigned mipmaps_at_once,
+			      int all_textures_one_heap,
+			      int allow_larger_textures );
+
+void
+driSetTextureSwapCounterLocation( driTexHeap * heap, unsigned * counter );
+
+#define DRI_TEXMGR_DO_TEXTURE_1D    0x0001
+#define DRI_TEXMGR_DO_TEXTURE_2D    0x0002
+#define DRI_TEXMGR_DO_TEXTURE_3D    0x0004
+#define DRI_TEXMGR_DO_TEXTURE_CUBE  0x0008
+#define DRI_TEXMGR_DO_TEXTURE_RECT  0x0010
+
+void driInitTextureObjects( GLcontext *ctx, driTextureObject * swapped,
+			    GLuint targets );
+
+GLboolean driValidateTextureHeaps( driTexHeap * const * texture_heaps,
+    unsigned nr_heaps, const driTextureObject * swapped );
+
+extern void driCalculateTextureFirstLastLevel( driTextureObject * t );
+
+
+extern gl_format _dri_texformat_rgba8888;
+extern gl_format _dri_texformat_argb8888;
+extern gl_format _dri_texformat_rgb565;
+extern gl_format _dri_texformat_argb4444;
+extern gl_format _dri_texformat_argb1555;
+extern gl_format _dri_texformat_al88;
+extern gl_format _dri_texformat_a8;
+extern gl_format _dri_texformat_ci8;
+extern gl_format _dri_texformat_i8;
+extern gl_format _dri_texformat_l8;
+
+extern void driInitTextureFormats( void );
+
+#endif /* DRI_TEXMEM_H */
diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c
new file mode 100644
index 0000000000..0dd879abc9
--- /dev/null
+++ b/src/mesa/drivers/dri/common/utils.c
@@ -0,0 +1,798 @@
+/*
+ * (C) Copyright IBM Corporation 2002, 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file utils.c
+ * Utility functions for DRI drivers.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include "main/mtypes.h"
+#include "main/cpuinfo.h"
+#include "main/extensions.h"
+#include "utils.h"
+
+
+/**
+ * Print message to \c stderr if the \c LIBGL_DEBUG environment variable
+ * is set. 
+ * 
+ * Is called from the drivers.
+ * 
+ * \param f \c printf like format string.
+ */
+void
+__driUtilMessage(const char *f, ...)
+{
+    va_list args;
+
+    if (getenv("LIBGL_DEBUG")) {
+        fprintf(stderr, "libGL: ");
+        va_start(args, f);
+        vfprintf(stderr, f, args);
+        va_end(args);
+        fprintf(stderr, "\n");
+    }
+}
+
+
+unsigned
+driParseDebugString( const char * debug, 
+		     const struct dri_debug_control * control  )
+{
+   unsigned   flag;
+
+
+   flag = 0;
+   if ( debug != NULL ) {
+      while( control->string != NULL ) {
+	 if ( !strcmp( debug, "all" ) ||
+	      strstr( debug, control->string ) != NULL ) {
+	    flag |= control->flag;
+	 }
+
+	 control++;
+      }
+   }
+
+   return flag;
+}
+
+
+
+/**
+ * Create the \c GL_RENDERER string for DRI drivers.
+ * 
+ * Almost all DRI drivers use a \c GL_RENDERER string of the form:
+ *
+ *    "Mesa DRI <chip> <driver date> <AGP speed) <CPU information>"
+ *
+ * Using the supplied chip name, driver data, and AGP speed, this function
+ * creates the string.
+ * 
+ * \param buffer         Buffer to hold the \c GL_RENDERER string.
+ * \param hardware_name  Name of the hardware.
+ * \param driver_date    Driver date.
+ * \param agp_mode       AGP mode (speed).
+ * 
+ * \returns
+ * The length of the string stored in \c buffer.  This does \b not include
+ * the terminating \c NUL character.
+ */
+unsigned
+driGetRendererString( char * buffer, const char * hardware_name,
+		      const char * driver_date, GLuint agp_mode )
+{
+   unsigned offset;
+   char *cpu;
+
+   offset = sprintf( buffer, "Mesa DRI %s %s", hardware_name, driver_date );
+
+   /* Append any AGP-specific information.
+    */
+   switch ( agp_mode ) {
+   case 1:
+   case 2:
+   case 4:
+   case 8:
+      offset += sprintf( & buffer[ offset ], " AGP %ux", agp_mode );
+      break;
+	
+   default:
+      break;
+   }
+
+   /* Append any CPU-specific information.
+    */
+   cpu = _mesa_get_cpu_string();
+   if (cpu) {
+      offset += sprintf(buffer + offset, " %s", cpu);
+      free(cpu);
+   }
+
+   return offset;
+}
+
+
+
+
+#define need_GL_ARB_draw_buffers
+#define need_GL_ARB_multisample
+#define need_GL_ARB_texture_compression
+#define need_GL_ARB_transpose_matrix
+#define need_GL_ARB_vertex_buffer_object
+#define need_GL_ARB_window_pos
+#define need_GL_EXT_compiled_vertex_array
+#define need_GL_EXT_multi_draw_arrays
+#define need_GL_EXT_polygon_offset
+#define need_GL_EXT_texture_object
+#define need_GL_EXT_vertex_array
+#define need_GL_IBM_multimode_draw_arrays
+#define need_GL_MESA_window_pos
+
+/* These are needed in *all* drivers because Mesa internally implements
+ * certain functionality in terms of functions provided by these extensions.
+ * For example, glBlendFunc is implemented by calling glBlendFuncSeparateEXT.
+ */
+#define need_GL_EXT_blend_func_separate
+#define need_GL_NV_vertex_program
+
+#include "main/remap_helper.h"
+
+static const struct dri_extension all_mesa_extensions[] = {
+   { "GL_ARB_draw_buffers",          GL_ARB_draw_buffers_functions },
+   { "GL_ARB_multisample",           GL_ARB_multisample_functions },
+   { "GL_ARB_texture_compression",   GL_ARB_texture_compression_functions },
+   { "GL_ARB_transpose_matrix",      GL_ARB_transpose_matrix_functions },
+   { "GL_ARB_vertex_buffer_object",  GL_ARB_vertex_buffer_object_functions},
+   { "GL_ARB_window_pos",            GL_ARB_window_pos_functions },
+   { "GL_EXT_blend_func_separate",   GL_EXT_blend_func_separate_functions },
+   { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions },
+   { "GL_EXT_multi_draw_arrays",     GL_EXT_multi_draw_arrays_functions },
+   { "GL_EXT_polygon_offset",        GL_EXT_polygon_offset_functions },
+   { "GL_EXT_texture_object",        GL_EXT_texture_object_functions },
+   { "GL_EXT_vertex_array",          GL_EXT_vertex_array_functions },
+   { "GL_IBM_multimode_draw_arrays", GL_IBM_multimode_draw_arrays_functions },
+   { "GL_MESA_window_pos",           GL_MESA_window_pos_functions },
+   { "GL_NV_vertex_program",         GL_NV_vertex_program_functions },
+   { NULL,                           NULL }
+};
+
+
+/**
+ * Enable and map extensions supported by the driver.
+ * 
+ * When ctx is NULL, extensions are not enabled, but their functions
+ * are still mapped.  When extensions_to_enable is NULL, all static
+ * functions known to mesa core are mapped.
+ *
+ * \bug
+ * ARB_imaging isn't handled properly.  In Mesa, enabling ARB_imaging also
+ * enables all the sub-extensions that are folded into it.  This means that
+ * we need to add entry-points (via \c driInitSingleExtension) for those
+ * new functions here.
+ */
+void driInitExtensions( GLcontext * ctx,
+			const struct dri_extension * extensions_to_enable,
+			GLboolean enable_imaging )
+{
+   static int first_time = 1;
+   unsigned   i;
+
+   if ( first_time ) {
+      first_time = 0;
+      driInitExtensions( NULL, all_mesa_extensions, GL_FALSE );
+   }
+
+   if ( (ctx != NULL) && enable_imaging ) {
+      _mesa_enable_imaging_extensions( ctx );
+   }
+
+   /* The caller is too lazy to list any extension */
+   if ( extensions_to_enable == NULL ) {
+      /* Map the static functions.  Together with those mapped by remap
+       * table, this should cover everything mesa core knows.
+       */
+      _mesa_map_static_functions();
+      return;
+   }
+
+   for ( i = 0 ; extensions_to_enable[i].name != NULL ; i++ ) {
+       driInitSingleExtension( ctx, & extensions_to_enable[i] );
+   }
+}
+
+
+
+
+/**
+ * Enable and map functions for a single extension
+ * 
+ * \param ctx  Context where extension is to be enabled.
+ * \param ext  Extension that is to be enabled.
+ * 
+ * \sa driInitExtensions, _mesa_enable_extension, _mesa_map_function_array
+ */
+void driInitSingleExtension( GLcontext * ctx,
+			     const struct dri_extension * ext )
+{
+    if ( ext->functions != NULL ) {
+       _mesa_map_function_array(ext->functions);
+    }
+
+    if ( ctx != NULL ) {
+	_mesa_enable_extension( ctx, ext->name );
+    }
+}
+
+
+/**
+ * Utility function used by drivers to test the verions of other components.
+ *
+ * \param driver_name  Name of the driver.  Used in error messages.
+ * \param driActual    Actual DRI version supplied __driCreateNewScreen.
+ * \param driExpected  Minimum DRI version required by the driver.
+ * \param ddxActual    Actual DDX version supplied __driCreateNewScreen.
+ * \param ddxExpected  Minimum DDX minor and range of DDX major version required by the driver.
+ * \param drmActual    Actual DRM version supplied __driCreateNewScreen.
+ * \param drmExpected  Minimum DRM version required by the driver.
+ * 
+ * \returns \c GL_TRUE if all version requirements are met.  Otherwise,
+ *          \c GL_FALSE is returned.
+ * 
+ * \sa __driCreateNewScreen, driCheckDriDdxDrmVersions2
+ *
+ * \todo
+ * Now that the old \c driCheckDriDdxDrmVersions function is gone, this
+ * function and \c driCheckDriDdxDrmVersions2 should be renamed.
+ */
+GLboolean
+driCheckDriDdxDrmVersions3(const char * driver_name,
+			   const __DRIversion * driActual,
+			   const __DRIversion * driExpected,
+			   const __DRIversion * ddxActual,
+			   const __DRIutilversion2 * ddxExpected,
+			   const __DRIversion * drmActual,
+			   const __DRIversion * drmExpected)
+{
+   static const char format[] = "%s DRI driver expected %s version %d.%d.x "
+       "but got version %d.%d.%d\n";
+   static const char format2[] = "%s DRI driver expected %s version %d-%d.%d.x "
+       "but got version %d.%d.%d\n";
+
+
+   /* Check the DRI version */
+   if ( (driActual->major != driExpected->major)
+	|| (driActual->minor < driExpected->minor) ) {
+      fprintf(stderr, format, driver_name, "DRI",
+		       driExpected->major, driExpected->minor,
+		       driActual->major, driActual->minor, driActual->patch);
+      return GL_FALSE;
+   }
+
+   /* Check that the DDX driver version is compatible */
+   if ( (ddxActual->major < ddxExpected->major_min)
+	|| (ddxActual->major > ddxExpected->major_max)
+	|| (ddxActual->minor < ddxExpected->minor) ) {
+      fprintf(stderr, format2, driver_name, "DDX",
+		       ddxExpected->major_min, ddxExpected->major_max, ddxExpected->minor,
+		       ddxActual->major, ddxActual->minor, ddxActual->patch);
+      return GL_FALSE;
+   }
+   
+   /* Check that the DRM driver version is compatible */
+   if ( (drmActual->major != drmExpected->major)
+	|| (drmActual->minor < drmExpected->minor) ) {
+      fprintf(stderr, format, driver_name, "DRM",
+		       drmExpected->major, drmExpected->minor,
+		       drmActual->major, drmActual->minor, drmActual->patch);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+GLboolean
+driCheckDriDdxDrmVersions2(const char * driver_name,
+			   const __DRIversion * driActual,
+			   const __DRIversion * driExpected,
+			   const __DRIversion * ddxActual,
+			   const __DRIversion * ddxExpected,
+			   const __DRIversion * drmActual,
+			   const __DRIversion * drmExpected)
+{
+   __DRIutilversion2 ddx_expected;
+   ddx_expected.major_min = ddxExpected->major;
+   ddx_expected.major_max = ddxExpected->major;
+   ddx_expected.minor = ddxExpected->minor;
+   ddx_expected.patch = ddxExpected->patch;
+   return driCheckDriDdxDrmVersions3(driver_name, driActual,
+				driExpected, ddxActual, & ddx_expected,
+				drmActual, drmExpected);
+}
+
+GLboolean driClipRectToFramebuffer( const GLframebuffer *buffer,
+				    GLint *x, GLint *y,
+				    GLsizei *width, GLsizei *height )
+{
+   /* left clipping */
+   if (*x < buffer->_Xmin) {
+      *width -= (buffer->_Xmin - *x);
+      *x = buffer->_Xmin;
+   }
+
+   /* right clipping */
+   if (*x + *width > buffer->_Xmax)
+      *width -= (*x + *width - buffer->_Xmax - 1);
+
+   if (*width <= 0)
+      return GL_FALSE;
+
+   /* bottom clipping */
+   if (*y < buffer->_Ymin) {
+      *height -= (buffer->_Ymin - *y);
+      *y = buffer->_Ymin;
+   }
+
+   /* top clipping */
+   if (*y + *height > buffer->_Ymax)
+      *height -= (*y + *height - buffer->_Ymax - 1);
+
+   if (*height <= 0)
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+/**
+ * Creates a set of \c __GLcontextModes that a driver will expose.
+ * 
+ * A set of \c __GLcontextModes will be created based on the supplied
+ * parameters.  The number of modes processed will be 2 *
+ * \c num_depth_stencil_bits * \c num_db_modes.
+ * 
+ * For the most part, data is just copied from \c depth_bits, \c stencil_bits,
+ * \c db_modes, and \c visType into each \c __GLcontextModes element.
+ * However, the meanings of \c fb_format and \c fb_type require further
+ * explanation.  The \c fb_format specifies which color components are in
+ * each pixel and what the default order is.  For example, \c GL_RGB specifies
+ * that red, green, blue are available and red is in the "most significant"
+ * position and blue is in the "least significant".  The \c fb_type specifies
+ * the bit sizes of each component and the actual ordering.  For example, if
+ * \c GL_UNSIGNED_SHORT_5_6_5_REV is specified with \c GL_RGB, bits [15:11]
+ * are the blue value, bits [10:5] are the green value, and bits [4:0] are
+ * the red value.
+ * 
+ * One sublte issue is the combination of \c GL_RGB  or \c GL_BGR and either
+ * of the \c GL_UNSIGNED_INT_8_8_8_8 modes.  The resulting mask values in the
+ * \c __GLcontextModes structure is \b identical to the \c GL_RGBA or
+ * \c GL_BGRA case, except the \c alphaMask is zero.  This means that, as
+ * far as this routine is concerned, \c GL_RGB with \c GL_UNSIGNED_INT_8_8_8_8
+ * still uses 32-bits.
+ *
+ * If in doubt, look at the tables used in the function.
+ * 
+ * \param ptr_to_modes  Pointer to a pointer to a linked list of
+ *                      \c __GLcontextModes.  Upon completion, a pointer to
+ *                      the next element to be process will be stored here.
+ *                      If the function fails and returns \c GL_FALSE, this
+ *                      value will be unmodified, but some elements in the
+ *                      linked list may be modified.
+ * \param fb_format     Format of the framebuffer.  Currently only \c GL_RGB,
+ *                      \c GL_RGBA, \c GL_BGR, and \c GL_BGRA are supported.
+ * \param fb_type       Type of the pixels in the framebuffer.  Currently only
+ *                      \c GL_UNSIGNED_SHORT_5_6_5, 
+ *                      \c GL_UNSIGNED_SHORT_5_6_5_REV,
+ *                      \c GL_UNSIGNED_INT_8_8_8_8, and
+ *                      \c GL_UNSIGNED_INT_8_8_8_8_REV are supported.
+ * \param depth_bits    Array of depth buffer sizes to be exposed.
+ * \param stencil_bits  Array of stencil buffer sizes to be exposed.
+ * \param num_depth_stencil_bits  Number of entries in both \c depth_bits and
+ *                      \c stencil_bits.
+ * \param db_modes      Array of buffer swap modes.  If an element has a
+ *                      value of \c GLX_NONE, then it represents a
+ *                      single-buffered mode.  Other valid values are
+ *                      \c GLX_SWAP_EXCHANGE_OML, \c GLX_SWAP_COPY_OML, and
+ *                      \c GLX_SWAP_UNDEFINED_OML.  See the
+ *                      GLX_OML_swap_method extension spec for more details.
+ * \param num_db_modes  Number of entries in \c db_modes.
+ * \param msaa_samples  Array of msaa sample count. 0 represents a visual
+ *                      without a multisample buffer.
+ * \param num_msaa_modes Number of entries in \c msaa_samples.
+ * \param visType       GLX visual type.  Usually either \c GLX_TRUE_COLOR or
+ *                      \c GLX_DIRECT_COLOR.
+ * 
+ * \returns
+ * \c GL_TRUE on success or \c GL_FALSE on failure.  Currently the only
+ * cause of failure is a bad parameter (i.e., unsupported \c fb_format or
+ * \c fb_type).
+ * 
+ * \todo
+ * There is currently no way to support packed RGB modes (i.e., modes with
+ * exactly 3 bytes per pixel) or floating-point modes.  This could probably
+ * be done by creating some new, private enums with clever names likes
+ * \c GL_UNSIGNED_3BYTE_8_8_8, \c GL_4FLOAT_32_32_32_32, 
+ * \c GL_4HALF_16_16_16_16, etc.  We can cross that bridge when we come to it.
+ */
+__DRIconfig **
+driCreateConfigs(GLenum fb_format, GLenum fb_type,
+		 const uint8_t * depth_bits, const uint8_t * stencil_bits,
+		 unsigned num_depth_stencil_bits,
+		 const GLenum * db_modes, unsigned num_db_modes,
+		 const uint8_t * msaa_samples, unsigned num_msaa_modes,
+		 GLboolean enable_accum)
+{
+   static const uint8_t bits_table[4][4] = {
+     /* R  G  B  A */
+      { 3, 3, 2, 0 }, /* Any GL_UNSIGNED_BYTE_3_3_2 */
+      { 5, 6, 5, 0 }, /* Any GL_UNSIGNED_SHORT_5_6_5 */
+      { 8, 8, 8, 0 }, /* Any RGB with any GL_UNSIGNED_INT_8_8_8_8 */
+      { 8, 8, 8, 8 }  /* Any RGBA with any GL_UNSIGNED_INT_8_8_8_8 */
+   };
+
+   static const uint32_t masks_table_rgb[6][4] = {
+      { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 3_3_2       */
+      { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 2_3_3_REV   */
+      { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5       */
+      { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5_REV   */
+      { 0xFF000000, 0x00FF0000, 0x0000FF00, 0x00000000 }, /* 8_8_8_8     */
+      { 0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000 }  /* 8_8_8_8_REV */
+   };
+
+   static const uint32_t masks_table_rgba[6][4] = {
+      { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 3_3_2       */
+      { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 2_3_3_REV   */
+      { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5       */
+      { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5_REV   */
+      { 0xFF000000, 0x00FF0000, 0x0000FF00, 0x000000FF }, /* 8_8_8_8     */
+      { 0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000 }, /* 8_8_8_8_REV */
+   };
+
+   static const uint32_t masks_table_bgr[6][4] = {
+      { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 3_3_2       */
+      { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 2_3_3_REV   */
+      { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5       */
+      { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5_REV   */
+      { 0x0000FF00, 0x00FF0000, 0xFF000000, 0x00000000 }, /* 8_8_8_8     */
+      { 0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000 }, /* 8_8_8_8_REV */
+   };
+
+   static const uint32_t masks_table_bgra[6][4] = {
+      { 0x00000007, 0x00000038, 0x000000C0, 0x00000000 }, /* 3_3_2       */
+      { 0x000000E0, 0x0000001C, 0x00000003, 0x00000000 }, /* 2_3_3_REV   */
+      { 0x0000001F, 0x000007E0, 0x0000F800, 0x00000000 }, /* 5_6_5       */
+      { 0x0000F800, 0x000007E0, 0x0000001F, 0x00000000 }, /* 5_6_5_REV   */
+      { 0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF }, /* 8_8_8_8     */
+      { 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000 }, /* 8_8_8_8_REV */
+   };
+
+   static const uint8_t bytes_per_pixel[6] = {
+      1, /* 3_3_2       */
+      1, /* 2_3_3_REV   */
+      2, /* 5_6_5       */
+      2, /* 5_6_5_REV   */
+      4, /* 8_8_8_8     */
+      4  /* 8_8_8_8_REV */
+   };
+
+   const uint8_t  * bits;
+   const uint32_t * masks;
+   int index;
+   __DRIconfig **configs, **c;
+   __GLcontextModes *modes;
+   unsigned i, j, k, h;
+   unsigned num_modes;
+   unsigned num_accum_bits = (enable_accum) ? 2 : 1;
+
+   switch ( fb_type ) {
+      case GL_UNSIGNED_BYTE_3_3_2:
+	 index = 0;
+	 break;
+      case GL_UNSIGNED_BYTE_2_3_3_REV:
+	 index = 1;
+	 break;
+      case GL_UNSIGNED_SHORT_5_6_5:
+	 index = 2;
+	 break;
+      case GL_UNSIGNED_SHORT_5_6_5_REV:
+	 index = 3;
+	 break;
+      case GL_UNSIGNED_INT_8_8_8_8:
+	 index = 4;
+	 break;
+      case GL_UNSIGNED_INT_8_8_8_8_REV:
+	 index = 5;
+	 break;
+      default:
+	 fprintf( stderr, "[%s:%u] Unknown framebuffer type 0x%04x.\n",
+               __FUNCTION__, __LINE__, fb_type );
+	 return NULL;
+   }
+
+
+   /* Valid types are GL_UNSIGNED_SHORT_5_6_5 and GL_UNSIGNED_INT_8_8_8_8 and
+    * the _REV versions.
+    *
+    * Valid formats are GL_RGBA, GL_RGB, and GL_BGRA.
+    */
+
+   switch ( fb_format ) {
+      case GL_RGB:
+         masks = masks_table_rgb[ index ];
+         break;
+
+      case GL_RGBA:
+         masks = masks_table_rgba[ index ];
+         break;
+
+      case GL_BGR:
+         masks = masks_table_bgr[ index ];
+         break;
+
+      case GL_BGRA:
+         masks = masks_table_bgra[ index ];
+         break;
+
+      default:
+         fprintf( stderr, "[%s:%u] Unknown framebuffer format 0x%04x.\n",
+               __FUNCTION__, __LINE__, fb_format );
+         return NULL;
+   }
+
+   switch ( bytes_per_pixel[ index ] ) {
+      case 1:
+	 bits = bits_table[0];
+	 break;
+      case 2:
+	 bits = bits_table[1];
+	 break;
+      default:
+	 bits = ((fb_format == GL_RGB) || (fb_format == GL_BGR))
+	    ? bits_table[2]
+	    : bits_table[3];
+	 break;
+   }
+
+   num_modes = num_depth_stencil_bits * num_db_modes * num_accum_bits * num_msaa_modes;
+   configs = calloc(1, (num_modes + 1) * sizeof *configs);
+   if (configs == NULL)
+       return NULL;
+
+    c = configs;
+    for ( k = 0 ; k < num_depth_stencil_bits ; k++ ) {
+	for ( i = 0 ; i < num_db_modes ; i++ ) {
+	    for ( h = 0 ; h < num_msaa_modes; h++ ) {
+	    	for ( j = 0 ; j < num_accum_bits ; j++ ) {
+		    *c = malloc (sizeof **c);
+		    modes = &(*c)->modes;
+		    c++;
+
+		    memset(modes, 0, sizeof *modes);
+		    modes->redBits   = bits[0];
+		    modes->greenBits = bits[1];
+		    modes->blueBits  = bits[2];
+		    modes->alphaBits = bits[3];
+		    modes->redMask   = masks[0];
+		    modes->greenMask = masks[1];
+		    modes->blueMask  = masks[2];
+		    modes->alphaMask = masks[3];
+		    modes->rgbBits   = modes->redBits + modes->greenBits
+		    	+ modes->blueBits + modes->alphaBits;
+
+		    modes->accumRedBits   = 16 * j;
+		    modes->accumGreenBits = 16 * j;
+		    modes->accumBlueBits  = 16 * j;
+		    modes->accumAlphaBits = (masks[3] != 0) ? 16 * j : 0;
+		    modes->visualRating = (j == 0) ? GLX_NONE : GLX_SLOW_CONFIG;
+
+		    modes->stencilBits = stencil_bits[k];
+		    modes->depthBits = depth_bits[k];
+
+		    modes->transparentPixel = GLX_NONE;
+		    modes->transparentRed = GLX_DONT_CARE;
+		    modes->transparentGreen = GLX_DONT_CARE;
+		    modes->transparentBlue = GLX_DONT_CARE;
+		    modes->transparentAlpha = GLX_DONT_CARE;
+		    modes->transparentIndex = GLX_DONT_CARE;
+		    modes->visualType = GLX_DONT_CARE;
+		    modes->renderType = GLX_RGBA_BIT;
+		    modes->drawableType = GLX_WINDOW_BIT;
+		    modes->rgbMode = GL_TRUE;
+
+		    if ( db_modes[i] == GLX_NONE ) {
+		    	modes->doubleBufferMode = GL_FALSE;
+		    }
+		    else {
+		    	modes->doubleBufferMode = GL_TRUE;
+		    	modes->swapMethod = db_modes[i];
+		    }
+
+		    modes->samples = msaa_samples[h];
+		    modes->sampleBuffers = modes->samples ? 1 : 0;
+
+
+		    modes->haveAccumBuffer = ((modes->accumRedBits +
+					   modes->accumGreenBits +
+					   modes->accumBlueBits +
+					   modes->accumAlphaBits) > 0);
+		    modes->haveDepthBuffer = (modes->depthBits > 0);
+		    modes->haveStencilBuffer = (modes->stencilBits > 0);
+
+		    modes->bindToTextureRgb = GL_TRUE;
+		    modes->bindToTextureRgba = GL_TRUE;
+		    modes->bindToMipmapTexture = GL_FALSE;
+		    modes->bindToTextureTargets =
+			__DRI_ATTRIB_TEXTURE_1D_BIT |
+			__DRI_ATTRIB_TEXTURE_2D_BIT |
+			__DRI_ATTRIB_TEXTURE_RECTANGLE_BIT;
+		}
+	    }
+	}
+    }
+    *c = NULL;
+
+    return configs;
+}
+
+__DRIconfig **driConcatConfigs(__DRIconfig **a,
+			       __DRIconfig **b)
+{
+    __DRIconfig **all;
+    int i, j, index;
+
+    i = 0;
+    while (a[i] != NULL)
+	i++;
+    j = 0;
+    while (b[j] != NULL)
+	j++;
+   
+    all = malloc((i + j + 1) * sizeof *all);
+    index = 0;
+    for (i = 0; a[i] != NULL; i++)
+	all[index++] = a[i];
+    for (j = 0; b[j] != NULL; j++)
+	all[index++] = b[j];
+    all[index++] = NULL;
+
+    free(a);
+    free(b);
+
+    return all;
+}
+
+#define __ATTRIB(attrib, field) \
+    { attrib, offsetof(__GLcontextModes, field) }
+
+static const struct { unsigned int attrib, offset; } attribMap[] = {
+    __ATTRIB(__DRI_ATTRIB_BUFFER_SIZE,			rgbBits),
+    __ATTRIB(__DRI_ATTRIB_LEVEL,			level),
+    __ATTRIB(__DRI_ATTRIB_RED_SIZE,			redBits),
+    __ATTRIB(__DRI_ATTRIB_GREEN_SIZE,			greenBits),
+    __ATTRIB(__DRI_ATTRIB_BLUE_SIZE,			blueBits),
+    __ATTRIB(__DRI_ATTRIB_ALPHA_SIZE,			alphaBits),
+    __ATTRIB(__DRI_ATTRIB_DEPTH_SIZE,			depthBits),
+    __ATTRIB(__DRI_ATTRIB_STENCIL_SIZE,			stencilBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_RED_SIZE,		accumRedBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_GREEN_SIZE,		accumGreenBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_BLUE_SIZE,		accumBlueBits),
+    __ATTRIB(__DRI_ATTRIB_ACCUM_ALPHA_SIZE,		accumAlphaBits),
+    __ATTRIB(__DRI_ATTRIB_SAMPLE_BUFFERS,		sampleBuffers),
+    __ATTRIB(__DRI_ATTRIB_SAMPLES,			samples),
+    __ATTRIB(__DRI_ATTRIB_DOUBLE_BUFFER,		doubleBufferMode),
+    __ATTRIB(__DRI_ATTRIB_STEREO,			stereoMode),
+    __ATTRIB(__DRI_ATTRIB_AUX_BUFFERS,			numAuxBuffers),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_TYPE,		transparentPixel),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_INDEX_VALUE,	transparentPixel),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_RED_VALUE,	transparentRed),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_GREEN_VALUE,	transparentGreen),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_BLUE_VALUE,	transparentBlue),
+    __ATTRIB(__DRI_ATTRIB_TRANSPARENT_ALPHA_VALUE,	transparentAlpha),
+    __ATTRIB(__DRI_ATTRIB_FLOAT_MODE,			floatMode),
+    __ATTRIB(__DRI_ATTRIB_RED_MASK,			redMask),
+    __ATTRIB(__DRI_ATTRIB_GREEN_MASK,			greenMask),
+    __ATTRIB(__DRI_ATTRIB_BLUE_MASK,			blueMask),
+    __ATTRIB(__DRI_ATTRIB_ALPHA_MASK,			alphaMask),
+    __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_WIDTH,		maxPbufferWidth),
+    __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_HEIGHT,		maxPbufferHeight),
+    __ATTRIB(__DRI_ATTRIB_MAX_PBUFFER_PIXELS,		maxPbufferPixels),
+    __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_WIDTH,	optimalPbufferWidth),
+    __ATTRIB(__DRI_ATTRIB_OPTIMAL_PBUFFER_HEIGHT,	optimalPbufferHeight),
+    __ATTRIB(__DRI_ATTRIB_SWAP_METHOD,			swapMethod),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGB,		bindToTextureRgb),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_RGBA,		bindToTextureRgba),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_MIPMAP_TEXTURE,	bindToMipmapTexture),
+    __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS,	bindToTextureTargets),
+    __ATTRIB(__DRI_ATTRIB_YINVERTED,			yInverted),
+
+    /* The struct field doesn't matter here, these are handled by the
+     * switch in driGetConfigAttribIndex.  We need them in the array
+     * so the iterator includes them though.*/
+    __ATTRIB(__DRI_ATTRIB_RENDER_TYPE,			level),
+    __ATTRIB(__DRI_ATTRIB_CONFIG_CAVEAT,		level),
+    __ATTRIB(__DRI_ATTRIB_SWAP_METHOD,			level)
+};
+
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+static int
+driGetConfigAttribIndex(const __DRIconfig *config,
+			unsigned int index, unsigned int *value)
+{
+    switch (attribMap[index].attrib) {
+    case __DRI_ATTRIB_RENDER_TYPE:
+	*value = __DRI_ATTRIB_RGBA_BIT;
+	break;
+    case __DRI_ATTRIB_CONFIG_CAVEAT:
+	if (config->modes.visualRating == GLX_NON_CONFORMANT_CONFIG)
+	    *value = __DRI_ATTRIB_NON_CONFORMANT_CONFIG;
+	else if (config->modes.visualRating == GLX_SLOW_CONFIG)
+	    *value = __DRI_ATTRIB_SLOW_BIT;
+	else
+	    *value = 0;
+	break;
+    case __DRI_ATTRIB_SWAP_METHOD:
+	break;
+
+    case __DRI_ATTRIB_FLOAT_MODE:
+        *value = config->modes.floatMode;
+        break;
+
+    default:
+	*value = *(unsigned int *)
+	    ((char *) &config->modes + attribMap[index].offset);
+	
+	break;
+    }
+
+    return GL_TRUE;
+}
+
+int
+driGetConfigAttrib(const __DRIconfig *config,
+		   unsigned int attrib, unsigned int *value)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(attribMap); i++)
+	if (attribMap[i].attrib == attrib)
+	    return driGetConfigAttribIndex(config, i, value);
+
+    return GL_FALSE;
+}
+
+int
+driIndexConfigAttrib(const __DRIconfig *config, int index,
+		     unsigned int *attrib, unsigned int *value)
+{
+    if (index >= 0 && index < ARRAY_SIZE(attribMap)) {
+	*attrib = attribMap[index].attrib;
+	return driGetConfigAttribIndex(config, index, value);
+    }
+
+    return GL_FALSE;
+}
diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h
new file mode 100644
index 0000000000..de6070c398
--- /dev/null
+++ b/src/mesa/drivers/dri/common/utils.h
@@ -0,0 +1,123 @@
+/*
+ * (C) Copyright IBM Corporation 2002, 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+
+#ifndef DRI_DEBUG_H
+#define DRI_DEBUG_H
+
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+#include "main/context.h"
+#include "main/remap.h"
+
+typedef struct __DRIutilversionRec2    __DRIutilversion2;
+
+struct dri_debug_control {
+    const char * string;
+    unsigned     flag;
+};
+
+/**
+ * Description of the API for an extension to OpenGL.
+ */
+struct dri_extension {
+    /**
+     * Name of the extension.
+     */
+    const char * name;
+    
+
+    /**
+     * Pointer to a list of \c dri_extension_function structures.  The list
+     * is terminated by a structure with a \c NULL
+     * \c dri_extension_function::strings pointer.
+     */
+    const struct gl_function_remap * functions;
+};
+
+/**
+ * Used to store a version which includes a major range instead of a single
+ * major version number.
+ */
+struct __DRIutilversionRec2 {
+    int    major_min;    /** min allowed Major version number. */
+    int    major_max;    /** max allowed Major version number. */
+    int    minor;        /**< Minor version number. */
+    int    patch;        /**< Patch-level. */
+};
+
+extern void
+__driUtilMessage(const char *f, ...);
+
+extern unsigned driParseDebugString( const char * debug,
+    const struct dri_debug_control * control );
+
+extern unsigned driGetRendererString( char * buffer,
+    const char * hardware_name, const char * driver_date, GLuint agp_mode );
+
+extern void driInitExtensions( GLcontext * ctx, 
+    const struct dri_extension * card_extensions, GLboolean enable_imaging );
+
+extern void driInitSingleExtension( GLcontext * ctx,
+    const struct dri_extension * ext );
+
+extern GLboolean driCheckDriDdxDrmVersions2(const char * driver_name,
+    const __DRIversion * driActual, const __DRIversion * driExpected,
+    const __DRIversion * ddxActual, const __DRIversion * ddxExpected,
+    const __DRIversion * drmActual, const __DRIversion * drmExpected);
+
+extern GLboolean driCheckDriDdxDrmVersions3(const char * driver_name,
+    const __DRIversion * driActual, const __DRIversion * driExpected,
+    const __DRIversion * ddxActual, const __DRIutilversion2 * ddxExpected,
+    const __DRIversion * drmActual, const __DRIversion * drmExpected);
+
+extern GLboolean driClipRectToFramebuffer( const GLframebuffer *buffer,
+					   GLint *x, GLint *y,
+					   GLsizei *width, GLsizei *height );
+
+struct __DRIconfigRec {
+    __GLcontextModes modes;
+};
+
+extern __DRIconfig **
+driCreateConfigs(GLenum fb_format, GLenum fb_type,
+		 const uint8_t * depth_bits, const uint8_t * stencil_bits,
+		 unsigned num_depth_stencil_bits,
+		 const GLenum * db_modes, unsigned num_db_modes,
+		 const uint8_t * msaa_samples, unsigned num_msaa_modes,
+		 GLboolean enable_accum);
+
+__DRIconfig **driConcatConfigs(__DRIconfig **a,
+			       __DRIconfig **b);
+
+int
+driGetConfigAttrib(const __DRIconfig *config,
+		   unsigned int attrib, unsigned int *value);
+int
+driIndexConfigAttrib(const __DRIconfig *config, int index,
+		     unsigned int *attrib, unsigned int *value);
+
+#endif /* DRI_DEBUG_H */
diff --git a/src/mesa/drivers/dri/common/vblank.c b/src/mesa/drivers/dri/common/vblank.c
new file mode 100644
index 0000000000..49b22a2dc7
--- /dev/null
+++ b/src/mesa/drivers/dri/common/vblank.c
@@ -0,0 +1,434 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "main/glheader.h"
+#include "xf86drm.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/dd.h"
+#include "vblank.h"
+#include "xmlpool.h"
+
+static unsigned int msc_to_vblank(__DRIdrawable * dPriv, int64_t msc)
+{
+   return (unsigned int)(msc - dPriv->msc_base + dPriv->vblank_base);
+}
+
+static int64_t vblank_to_msc(__DRIdrawable * dPriv, unsigned int vblank)
+{
+   return (int64_t)(vblank - dPriv->vblank_base + dPriv->msc_base);
+}
+
+
+/****************************************************************************/
+/**
+ * Get the current MSC refresh counter.
+ *
+ * Stores the 64-bit count of vertical refreshes since some (arbitrary)
+ * point in time in \c count.  Unless the value wraps around, which it
+ * may, it will never decrease for a given drawable.
+ *
+ * \warning This function is called from \c glXGetVideoSyncSGI, which expects
+ * a \c count of type \c unsigned (32-bit), and \c glXGetSyncValuesOML, which 
+ * expects a \c count of type \c int64_t (signed 64-bit).  The kernel ioctl 
+ * currently always returns a \c sequence of type \c unsigned.
+ *
+ * \param priv   Pointer to the DRI screen private struct.
+ * \param dPriv  Pointer to the DRI drawable private struct
+ * \param count  Storage to hold MSC counter.
+ * \return       Zero is returned on success.  A negative errno value
+ *               is returned on failure.
+ */
+int driDrawableGetMSC32( __DRIscreen * priv,
+			 __DRIdrawable * dPriv,
+			 int64_t * count)
+{
+   drmVBlank vbl;
+   int ret;
+
+   /* Don't wait for anything.  Just get the current refresh count. */
+
+   vbl.request.type = DRM_VBLANK_RELATIVE;
+   vbl.request.sequence = 0;
+   if ( dPriv && dPriv->vblFlags & VBLANK_FLAG_SECONDARY )
+      vbl.request.type |= DRM_VBLANK_SECONDARY;
+
+   ret = drmWaitVBlank( priv->fd, &vbl );
+
+   if (dPriv) {
+      *count = vblank_to_msc(dPriv, vbl.reply.sequence);
+   } else {
+      /* Old driver (no knowledge of drawable MSC callback) */
+      *count = vbl.reply.sequence;
+   }
+
+   return ret;
+}
+
+/****************************************************************************/
+/**
+ * Wait for a specified refresh count.  This implements most of the
+ * functionality of \c glXWaitForMscOML from the GLX_OML_sync_control spec.
+ * Waits for the \c target_msc refresh.  If that has already passed, it
+ * waits until \f$(MSC \bmod divisor)\f$ is equal to \c remainder.  If 
+ * \c target_msc is 0, use the behavior of glXWaitVideoSyncSGI(), which
+ * omits the initial check against a target MSC value.
+ * 
+ * This function is actually something of a hack.  The problem is that, at
+ * the time of this writing, none of the existing DRM modules support an
+ * ioctl that returns a 64-bit count (at least not on 32-bit platforms).
+ * However, this function exists to support a GLX function that requires
+ * the use of 64-bit counts.  As such, there is a little bit of ugly
+ * hackery at the end of this function to make the 32-bit count act like
+ * a 64-bit count.  There are still some cases where this will break, but
+ * I believe it catches the most common cases.
+ *
+ * The real solution is to provide an ioctl that uses a 64-bit count.
+ *
+ * \param dpy         Pointer to the \c Display.
+ * \param priv        Pointer to the DRI drawable private.
+ * \param target_msc  Desired refresh count to wait for.  A value of 0
+ *                    means to use the glXWaitVideoSyncSGI() behavior.
+ * \param divisor     MSC divisor if \c target_msc is already reached.
+ * \param remainder   Desired MSC remainder if \c target_msc is already
+ *                    reached.
+ * \param msc         Buffer to hold MSC when done waiting.
+ *
+ * \return            Zero on success or \c GLX_BAD_CONTEXT on failure.
+ */
+
+int driWaitForMSC32( __DRIdrawable *priv,
+		     int64_t target_msc, int64_t divisor, int64_t remainder,
+		     int64_t * msc )
+{
+   drmVBlank vbl;
+
+
+   if ( divisor != 0 ) {
+      int64_t next = target_msc;
+      int64_t r;
+      int dont_wait = (target_msc == 0);
+
+      do {
+         /* dont_wait means we're using the glXWaitVideoSyncSGI() behavior.
+          * The first time around, just get the current count and proceed 
+          * to the test for (MSC % divisor) == remainder.
+          */
+         vbl.request.type = dont_wait ? DRM_VBLANK_RELATIVE :
+                                        DRM_VBLANK_ABSOLUTE;
+         vbl.request.sequence = next ? msc_to_vblank(priv, next) : 0;
+	 if ( priv->vblFlags & VBLANK_FLAG_SECONDARY )
+	    vbl.request.type |= DRM_VBLANK_SECONDARY;
+
+	 if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) {
+	    /* FIXME: This doesn't seem like the right thing to return here.
+	     */
+	    return GLX_BAD_CONTEXT;
+	 }
+
+	 *msc = vblank_to_msc(priv, vbl.reply.sequence);
+
+         if (!dont_wait && *msc == next)
+            break;
+         dont_wait = 0;
+
+         /* Assuming the wait-done test fails, the next refresh to wait for
+          * will be one that satisfies (MSC % divisor) == remainder.  The
+          * value (MSC - (MSC % divisor) + remainder) is the refresh value 
+          * closest to the current value that would satisfy the equation.  
+          * If this refresh has already happened, we add divisor to obtain 
+          * the next refresh after the current one that will satisfy it.
+          */
+         r = ((uint64_t)*msc % divisor);
+         next = (*msc - r + remainder);
+         if (next <= *msc)
+	    next += divisor;
+
+      } while (r != remainder);
+   }
+   else {
+      /* If the \c divisor is zero, just wait until the MSC is greater
+       * than or equal to \c target_msc.
+       */
+
+      vbl.request.type = DRM_VBLANK_ABSOLUTE;
+      vbl.request.sequence = target_msc ? msc_to_vblank(priv, target_msc) : 0;
+
+      if ( priv->vblFlags & VBLANK_FLAG_SECONDARY )
+	 vbl.request.type |= DRM_VBLANK_SECONDARY;
+
+      if ( drmWaitVBlank( priv->driScreenPriv->fd, &vbl ) != 0 ) {
+	 /* FIXME: This doesn't seem like the right thing to return here.
+	  */
+	 return GLX_BAD_CONTEXT;
+      }
+   }
+
+   *msc = vblank_to_msc(priv, vbl.reply.sequence);
+
+   if ( *msc < target_msc ) {
+      *msc += 0x0000000100000000LL;
+   }
+
+   return 0;
+}
+
+
+/****************************************************************************/
+/**
+ * Gets a set of default vertical-blank-wait flags based on the internal GLX
+ * API version and several configuration options.
+ */
+
+GLuint driGetDefaultVBlankFlags( const driOptionCache *optionCache )
+{
+   GLuint  flags = VBLANK_FLAG_INTERVAL;
+   int vblank_mode;
+
+
+   if ( driCheckOption( optionCache, "vblank_mode", DRI_ENUM ) )
+      vblank_mode = driQueryOptioni( optionCache, "vblank_mode" );
+   else
+      vblank_mode = DRI_CONF_VBLANK_DEF_INTERVAL_1;
+
+   switch (vblank_mode) {
+   case DRI_CONF_VBLANK_NEVER:
+      flags = 0;
+      break;
+   case DRI_CONF_VBLANK_DEF_INTERVAL_0:
+      break;
+   case DRI_CONF_VBLANK_DEF_INTERVAL_1:
+      flags |= VBLANK_FLAG_THROTTLE;
+      break;
+   case DRI_CONF_VBLANK_ALWAYS_SYNC:
+      flags |= VBLANK_FLAG_SYNC;
+      break;
+   }
+
+   return flags;
+}
+
+
+/****************************************************************************/
+/**
+ * Wrapper to call \c drmWaitVBlank.  The main purpose of this function is to
+ * wrap the error message logging.  The error message should only be logged
+ * the first time the \c drmWaitVBlank fails.  If \c drmWaitVBlank is
+ * successful, \c vbl_seq will be set the sequence value in the reply.
+ *
+ * \param vbl      Pointer to drmVBlank packet desribing how to wait.
+ * \param vbl_seq  Location to store the current refresh counter.
+ * \param fd       File descriptor use to call into the DRM.
+ * \return         Zero on success or -1 on failure.
+ */
+
+static int do_wait( drmVBlank * vbl, GLuint * vbl_seq, int fd )
+{
+   int   ret;
+
+
+   ret = drmWaitVBlank( fd, vbl );
+   if ( ret != 0 ) {
+      static GLboolean first_time = GL_TRUE;
+
+      if ( first_time ) {
+	 fprintf(stderr, 
+		 "%s: drmWaitVBlank returned %d, IRQs don't seem to be"
+		 " working correctly.\nTry adjusting the vblank_mode"
+		 " configuration parameter.\n", __FUNCTION__, ret);
+	 first_time = GL_FALSE;
+      }
+
+      return -1;
+   }
+
+   *vbl_seq = vbl->reply.sequence;
+   return 0;
+}
+
+
+/****************************************************************************/
+/**
+ * Returns the default swap interval of the given drawable.
+ */
+
+static unsigned
+driGetDefaultVBlankInterval( const  __DRIdrawable *priv )
+{
+   if ( (priv->vblFlags & (VBLANK_FLAG_THROTTLE | VBLANK_FLAG_SYNC)) != 0 ) {
+      return 1;
+   }
+   else {
+      return 0;
+   }
+}
+
+
+/****************************************************************************/
+/**
+ * Sets the default swap interval when the drawable is first bound to a
+ * direct rendering context.
+ */
+
+void driDrawableInitVBlank( __DRIdrawable *priv )
+{
+   if ( priv->swap_interval == (unsigned)-1 &&
+	!( priv->vblFlags & VBLANK_FLAG_NO_IRQ ) ) {
+      /* Get current vertical blank sequence */
+      drmVBlank vbl;
+ 
+      vbl.request.type = DRM_VBLANK_RELATIVE;
+      if ( priv->vblFlags & VBLANK_FLAG_SECONDARY )
+ 	 vbl.request.type |= DRM_VBLANK_SECONDARY;
+      vbl.request.sequence = 0;
+      do_wait( &vbl, &priv->vblSeq, priv->driScreenPriv->fd );
+      priv->vblank_base = priv->vblSeq;
+
+      priv->swap_interval = driGetDefaultVBlankInterval( priv );
+   }
+}
+
+
+/****************************************************************************/
+/**
+ * Returns the current swap interval of the given drawable.
+ */
+
+unsigned
+driGetVBlankInterval( const  __DRIdrawable *priv )
+{
+   if ( (priv->vblFlags & VBLANK_FLAG_INTERVAL) != 0 ) {
+      /* this must have been initialized when the drawable was first bound
+       * to a direct rendering context. */
+      assert ( priv->swap_interval != (unsigned)-1 );
+
+      return priv->swap_interval;
+   }
+   else 
+      return driGetDefaultVBlankInterval( priv );
+}
+
+
+/****************************************************************************/
+/**
+ * Returns the current vertical blank sequence number of the given drawable.
+ */
+
+void
+driGetCurrentVBlank( __DRIdrawable *priv )
+{
+   drmVBlank vbl;
+
+   vbl.request.type = DRM_VBLANK_RELATIVE;
+   if ( priv->vblFlags & VBLANK_FLAG_SECONDARY ) {
+      vbl.request.type |= DRM_VBLANK_SECONDARY;
+   }
+   vbl.request.sequence = 0;
+
+   (void) do_wait( &vbl, &priv->vblSeq, priv->driScreenPriv->fd );
+}
+
+
+/****************************************************************************/
+/**
+ * Waits for the vertical blank for use with glXSwapBuffers.
+ * 
+ * \param missed_deadline  Set to \c GL_TRUE if the MSC after waiting is later
+ *                 than the "target" based on \c priv->vblFlags.  The idea is
+ *                 that if \c missed_deadline is set, then the application is
+ *                 not achieving its desired framerate.
+ * \return         Zero on success, -1 on error.
+ */
+
+int
+driWaitForVBlank( __DRIdrawable *priv, GLboolean * missed_deadline )
+{
+   drmVBlank vbl;
+   unsigned   original_seq;
+   unsigned   deadline;
+   unsigned   interval;
+   unsigned   diff;
+
+   *missed_deadline = GL_FALSE;
+   if ( (priv->vblFlags & (VBLANK_FLAG_INTERVAL |
+			   VBLANK_FLAG_THROTTLE |
+			   VBLANK_FLAG_SYNC)) == 0 ||
+	(priv->vblFlags & VBLANK_FLAG_NO_IRQ) != 0 ) {
+      return 0;
+   }
+
+
+   /* VBLANK_FLAG_SYNC means to wait for at least one vertical blank.  If
+    * that flag is not set, do a fake wait for zero vertical blanking
+    * periods so that we can get the current MSC.
+    *
+    * VBLANK_FLAG_INTERVAL and VBLANK_FLAG_THROTTLE mean to wait for at
+    * least one vertical blank since the last wait.  Since do_wait modifies
+    * priv->vblSeq, we have to save the original value of priv->vblSeq for the
+    * VBLANK_FLAG_INTERVAL / VBLANK_FLAG_THROTTLE calculation later.
+    */
+
+   original_seq = priv->vblSeq;
+   interval = driGetVBlankInterval(priv);
+   deadline = original_seq + interval;
+
+   vbl.request.type = DRM_VBLANK_RELATIVE;
+   if ( priv->vblFlags & VBLANK_FLAG_SECONDARY ) {
+      vbl.request.type |= DRM_VBLANK_SECONDARY;
+   }
+   vbl.request.sequence = ((priv->vblFlags & VBLANK_FLAG_SYNC) != 0) ? 1 : 0;
+
+   if ( do_wait( & vbl, &priv->vblSeq, priv->driScreenPriv->fd ) != 0 ) {
+      return -1;
+   }
+
+   diff = priv->vblSeq - deadline;
+
+   /* No need to wait again if we've already reached the target */
+   if (diff <= (1 << 23)) {
+      *missed_deadline = (priv->vblFlags & VBLANK_FLAG_SYNC) ? (diff > 0) :
+			 GL_TRUE;
+      return 0;
+   }
+
+   /* Wait until the target vertical blank. */
+   vbl.request.type = DRM_VBLANK_ABSOLUTE;
+   if ( priv->vblFlags & VBLANK_FLAG_SECONDARY ) {
+      vbl.request.type |= DRM_VBLANK_SECONDARY;
+   }
+   vbl.request.sequence = deadline;
+
+   if ( do_wait( & vbl, &priv->vblSeq, priv->driScreenPriv->fd ) != 0 ) {
+      return -1;
+   }
+
+   diff = priv->vblSeq - deadline;
+   *missed_deadline = diff > 0 && diff <= (1 << 23);
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/common/vblank.h b/src/mesa/drivers/dri/common/vblank.h
new file mode 100644
index 0000000000..29d1ad8003
--- /dev/null
+++ b/src/mesa/drivers/dri/common/vblank.h
@@ -0,0 +1,75 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEM, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ */
+
+#ifndef DRI_VBLANK_H
+#define DRI_VBLANK_H
+
+#include "main/context.h"
+#include "dri_util.h"
+#include "xmlconfig.h"
+
+#define VBLANK_FLAG_INTERVAL  (1U << 0)  /* Respect the swap_interval setting
+					  */
+#define VBLANK_FLAG_THROTTLE  (1U << 1)  /* Wait 1 refresh since last call.
+					  */
+#define VBLANK_FLAG_SYNC      (1U << 2)  /* Sync to the next refresh.
+					  */
+#define VBLANK_FLAG_NO_IRQ    (1U << 7)  /* DRM has no IRQ to wait on.
+					  */
+#define VBLANK_FLAG_SECONDARY (1U << 8)  /* Wait for secondary vblank.
+					  */
+
+extern int driGetMSC32( __DRIscreen * priv, int64_t * count );
+extern int driDrawableGetMSC32( __DRIscreen * priv,
+				__DRIdrawable * drawablePrivate,
+				int64_t * count);
+extern int driWaitForMSC32( __DRIdrawable *priv,
+    int64_t target_msc, int64_t divisor, int64_t remainder, int64_t * msc );
+extern GLuint driGetDefaultVBlankFlags( const driOptionCache *optionCache );
+extern void driDrawableInitVBlank ( __DRIdrawable *priv );
+extern unsigned driGetVBlankInterval( const  __DRIdrawable *priv );
+extern void driGetCurrentVBlank( __DRIdrawable *priv );
+extern int driWaitForVBlank( __DRIdrawable *priv,
+			     GLboolean * missed_deadline );
+
+#undef usleep
+#include <unistd.h>  /* for usleep() */
+#include <sched.h>   /* for sched_yield() */
+
+#ifdef linux
+#include <sched.h>   /* for sched_yield() */
+#endif
+
+#define DO_USLEEP(nr)							\
+   do {								 	\
+      if (0) fprintf(stderr, "%s: usleep for %u\n", __FUNCTION__, nr );	\
+      if (1) usleep( nr );						\
+      sched_yield();							\
+   } while( 0 )
+
+#endif /* DRI_VBLANK_H */
diff --git a/src/mesa/drivers/dri/common/xmlconfig.c b/src/mesa/drivers/dri/common/xmlconfig.c
new file mode 100644
index 0000000000..738b1ae97f
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlconfig.c
@@ -0,0 +1,1001 @@
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ */
+/**
+ * \file xmlconfig.c
+ * \brief Driver-independent client-side part of the XML configuration
+ * \author Felix Kuehling
+ */
+
+#include "main/glheader.h"
+
+#include <string.h>
+#include <assert.h>
+#include <expat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include "main/imports.h"
+#include "utils.h"
+#include "xmlconfig.h"
+
+#undef GET_PROGRAM_NAME
+
+#if (defined(__GNU_LIBRARY__) || defined(__GLIBC__)) && !defined(__UCLIBC__)
+#    if !defined(__GLIBC__) || (__GLIBC__ < 2)
+/* These aren't declared in any libc5 header */
+extern char *program_invocation_name, *program_invocation_short_name;
+#    endif
+#    define GET_PROGRAM_NAME() program_invocation_short_name
+#elif defined(__FreeBSD__) && (__FreeBSD__ >= 2)
+#    include <osreldate.h>
+#    if (__FreeBSD_version >= 440000)
+#        include <stdlib.h>
+#        define GET_PROGRAM_NAME() getprogname()
+#    endif
+#elif defined(__NetBSD__) && defined(__NetBSD_Version) && (__NetBSD_Version >= 106000100)
+#    include <stdlib.h>
+#    define GET_PROGRAM_NAME() getprogname()
+#elif defined(__sun)
+/* Solaris has getexecname() which returns the full path - return just
+   the basename to match BSD getprogname() */
+#    include <stdlib.h>
+#    include <libgen.h>
+#    define GET_PROGRAM_NAME() basename(getexecname())
+#endif
+
+#if !defined(GET_PROGRAM_NAME)
+#    if defined(__OpenBSD__) || defined(NetBSD) || defined(__UCLIBC__)
+/* This is a hack. It's said to work on OpenBSD, NetBSD and GNU.
+ * Rogelio M.Serrano Jr. reported it's also working with UCLIBC. It's
+ * used as a last resort, if there is no documented facility available. */
+static const char *__getProgramName () {
+    extern const char *__progname;
+    char * arg = strrchr(__progname, '/');
+    if (arg)
+        return arg+1;
+    else
+        return __progname;
+}
+#        define GET_PROGRAM_NAME() __getProgramName()
+#    else
+#        define GET_PROGRAM_NAME() ""
+#        warning "Per application configuration won't work with your OS version."
+#    endif
+#endif
+
+/** \brief Find an option in an option cache with the name as key */
+static GLuint findOption (const driOptionCache *cache, const char *name) {
+    GLuint len = strlen (name);
+    GLuint size = 1 << cache->tableSize, mask = size - 1;
+    GLuint hash = 0;
+    GLuint i, shift;
+
+  /* compute a hash from the variable length name */
+    for (i = 0, shift = 0; i < len; ++i, shift = (shift+8) & 31)
+	hash += (GLuint)name[i] << shift;
+    hash *= hash;
+    hash = (hash >> (16-cache->tableSize/2)) & mask;
+
+  /* this is just the starting point of the linear search for the option */
+    for (i = 0; i < size; ++i, hash = (hash+1) & mask) {
+      /* if we hit an empty entry then the option is not defined (yet) */
+	if (cache->info[hash].name == 0)
+	    break;
+	else if (!strcmp (name, cache->info[hash].name))
+	    break;
+    }
+  /* this assertion fails if the hash table is full */
+    assert (i < size);
+
+    return hash;
+}
+
+/** \brief Count the real number of options in an option cache */
+static GLuint countOptions (const driOptionCache *cache) {
+    GLuint size = 1 << cache->tableSize;
+    GLuint i, count = 0;
+    for (i = 0; i < size; ++i)
+	if (cache->info[i].name)
+	    count++;
+    return count;
+}
+
+/** \brief Like strdup but using MALLOC and with error checking. */
+#define XSTRDUP(dest,source) do { \
+    GLuint len = strlen (source); \
+    if (!(dest = MALLOC (len+1))) { \
+	fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__); \
+	abort(); \
+    } \
+    memcpy (dest, source, len+1); \
+} while (0)
+
+static int compare (const void *a, const void *b) {
+    return strcmp (*(char *const*)a, *(char *const*)b);
+}
+/** \brief Binary search in a string array. */
+static GLuint bsearchStr (const XML_Char *name,
+			  const XML_Char *elems[], GLuint count) {
+    const XML_Char **found;
+    found = bsearch (&name, elems, count, sizeof (XML_Char *), compare);
+    if (found)
+	return found - elems;
+    else
+	return count;
+}
+
+/** \brief Locale-independent integer parser.
+ *
+ * Works similar to strtol. Leading space is NOT skipped. The input
+ * number may have an optional sign. Radix is specified by base. If
+ * base is 0 then decimal is assumed unless the input number is
+ * prefixed by 0x or 0X for hexadecimal or 0 for octal. After
+ * returning tail points to the first character that is not part of
+ * the integer number. If no number was found then tail points to the
+ * start of the input string. */
+static GLint strToI (const XML_Char *string, const XML_Char **tail, int base) {
+    GLint radix = base == 0 ? 10 : base;
+    GLint result = 0;
+    GLint sign = 1;
+    GLboolean numberFound = GL_FALSE;
+    const XML_Char *start = string;
+
+    assert (radix >= 2 && radix <= 36);
+
+    if (*string == '-') {
+	sign = -1;
+	string++;
+    } else if (*string == '+')
+	string++;
+    if (base == 0 && *string == '0') {
+	numberFound = GL_TRUE; 
+	if (*(string+1) == 'x' || *(string+1) == 'X') {
+	    radix = 16;
+	    string += 2;
+	} else {
+	    radix = 8;
+	    string++;
+	}
+    }
+    do {
+	GLint digit = -1;
+	if (radix <= 10) {
+	    if (*string >= '0' && *string < '0' + radix)
+		digit = *string - '0';
+	} else {
+	    if (*string >= '0' && *string <= '9')
+		digit = *string - '0';
+	    else if (*string >= 'a' && *string < 'a' + radix - 10)
+		digit = *string - 'a' + 10;
+	    else if (*string >= 'A' && *string < 'A' + radix - 10)
+		digit = *string - 'A' + 10;
+	}
+	if (digit != -1) {
+	    numberFound = GL_TRUE;
+	    result = radix*result + digit;
+	    string++;
+	} else
+	    break;
+    } while (GL_TRUE);
+    *tail = numberFound ? string : start;
+    return sign * result;
+}
+
+/** \brief Locale-independent floating-point parser.
+ *
+ * Works similar to strtod. Leading space is NOT skipped. The input
+ * number may have an optional sign. '.' is interpreted as decimal
+ * point and may occor at most once. Optionally the number may end in
+ * [eE]<exponent>, where <exponent> is an integer as recognized by
+ * strToI. In that case the result is number * 10^exponent. After
+ * returning tail points to the first character that is not part of
+ * the floating point number. If no number was found then tail points
+ * to the start of the input string.
+ *
+ * Uses two passes for maximum accuracy. */
+static GLfloat strToF (const XML_Char *string, const XML_Char **tail) {
+    GLint nDigits = 0, pointPos, exponent;
+    GLfloat sign = 1.0f, result = 0.0f, scale;
+    const XML_Char *start = string, *numStart;
+
+    /* sign */
+    if (*string == '-') {
+	sign = -1.0f;
+	string++;
+    } else if (*string == '+')
+	string++;
+
+    /* first pass: determine position of decimal point, number of
+     * digits, exponent and the end of the number. */
+    numStart = string;
+    while (*string >= '0' && *string <= '9') {
+	string++;
+	nDigits++;
+    }
+    pointPos = nDigits;
+    if (*string == '.') {
+	string++;
+	while (*string >= '0' && *string <= '9') {
+	    string++;
+	    nDigits++;
+	}
+    }
+    if (nDigits == 0) {
+	/* no digits, no number */
+	*tail = start;
+	return 0.0f;
+    }
+    *tail = string;
+    if (*string == 'e' || *string == 'E') {
+	const XML_Char *expTail;
+	exponent = strToI (string+1, &expTail, 10);
+	if (expTail == string+1)
+	    exponent = 0;
+	else
+	    *tail = expTail;
+    } else
+	exponent = 0;
+    string = numStart;
+
+    /* scale of the first digit */
+    scale = sign * (GLfloat)pow (10.0, (GLdouble)(pointPos-1 + exponent));
+
+    /* second pass: parse digits */
+    do {
+	if (*string != '.') {
+	    assert (*string >= '0' && *string <= '9');
+	    result += scale * (GLfloat)(*string - '0');
+	    scale *= 0.1f;
+	    nDigits--;
+	}
+	string++;
+    } while (nDigits > 0);
+
+    return result;
+}
+
+/** \brief Parse a value of a given type. */
+static GLboolean parseValue (driOptionValue *v, driOptionType type,
+			     const XML_Char *string) {
+    const XML_Char *tail = NULL;
+  /* skip leading white-space */
+    string += strspn (string, " \f\n\r\t\v");
+    switch (type) {
+      case DRI_BOOL:
+	if (!strcmp (string, "false")) {
+	    v->_bool = GL_FALSE;
+	    tail = string + 5;
+	} else if (!strcmp (string, "true")) {
+	    v->_bool = GL_TRUE;
+	    tail = string + 4;
+	}
+	else
+	    return GL_FALSE;
+	break;
+      case DRI_ENUM: /* enum is just a special integer */
+      case DRI_INT:
+	v->_int = strToI (string, &tail, 0);
+	break;
+      case DRI_FLOAT:
+	v->_float = strToF (string, &tail);
+	break;
+    }
+
+    if (tail == string)
+	return GL_FALSE; /* empty string (or containing only white-space) */
+  /* skip trailing white space */
+    if (*tail)
+	tail += strspn (tail, " \f\n\r\t\v");
+    if (*tail)
+	return GL_FALSE; /* something left over that is not part of value */
+
+    return GL_TRUE;
+}
+
+/** \brief Parse a list of ranges of type info->type. */
+static GLboolean parseRanges (driOptionInfo *info, const XML_Char *string) {
+    XML_Char *cp, *range;
+    GLuint nRanges, i;
+    driOptionRange *ranges;
+
+    XSTRDUP (cp, string);
+  /* pass 1: determine the number of ranges (number of commas + 1) */
+    range = cp;
+    for (nRanges = 1; *range; ++range)
+	if (*range == ',')
+	    ++nRanges;
+
+    if ((ranges = MALLOC (nRanges*sizeof(driOptionRange))) == NULL) {
+	fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__);
+	abort();
+    }
+
+  /* pass 2: parse all ranges into preallocated array */
+    range = cp;
+    for (i = 0; i < nRanges; ++i) {
+	XML_Char *end, *sep;
+	assert (range);
+	end = strchr (range, ',');
+	if (end)
+	    *end = '\0';
+	sep = strchr (range, ':');
+	if (sep) { /* non-empty interval */
+	    *sep = '\0';
+	    if (!parseValue (&ranges[i].start, info->type, range) ||
+		!parseValue (&ranges[i].end, info->type, sep+1))
+	        break;
+	    if (info->type == DRI_INT &&
+		ranges[i].start._int > ranges[i].end._int)
+		break;
+	    if (info->type == DRI_FLOAT &&
+		ranges[i].start._float > ranges[i].end._float)
+		break;
+	} else { /* empty interval */
+	    if (!parseValue (&ranges[i].start, info->type, range))
+		break;
+	    ranges[i].end = ranges[i].start;
+	}
+	if (end)
+	    range = end+1;
+	else
+	    range = NULL;
+    }
+    FREE (cp);
+    if (i < nRanges) {
+	FREE (ranges);
+	return GL_FALSE;
+    } else
+	assert (range == NULL);
+
+    info->nRanges = nRanges;
+    info->ranges = ranges;
+    return GL_TRUE;
+}
+
+/** \brief Check if a value is in one of info->ranges. */
+static GLboolean checkValue (const driOptionValue *v, const driOptionInfo *info) {
+    GLuint i;
+    assert (info->type != DRI_BOOL); /* should be caught by the parser */
+    if (info->nRanges == 0)
+	return GL_TRUE;
+    switch (info->type) {
+      case DRI_ENUM: /* enum is just a special integer */
+      case DRI_INT:
+	for (i = 0; i < info->nRanges; ++i)
+	    if (v->_int >= info->ranges[i].start._int &&
+		v->_int <= info->ranges[i].end._int)
+		return GL_TRUE;
+	break;
+      case DRI_FLOAT:
+	for (i = 0; i < info->nRanges; ++i)
+	    if (v->_float >= info->ranges[i].start._float &&
+		v->_float <= info->ranges[i].end._float)
+		return GL_TRUE;
+	break;
+      default:
+	assert (0); /* should never happen */
+    }
+    return GL_FALSE;
+}
+
+/** \brief Output a warning message. */
+#define XML_WARNING1(msg) do {\
+    __driUtilMessage ("Warning in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser)); \
+} while (0)
+#define XML_WARNING(msg,args...) do { \
+    __driUtilMessage ("Warning in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser), \
+                      args); \
+} while (0)
+/** \brief Output an error message. */
+#define XML_ERROR1(msg) do { \
+    __driUtilMessage ("Error in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser)); \
+} while (0)
+#define XML_ERROR(msg,args...) do { \
+    __driUtilMessage ("Error in %s line %d, column %d: "msg, data->name, \
+                      (int) XML_GetCurrentLineNumber(data->parser), \
+                      (int) XML_GetCurrentColumnNumber(data->parser), \
+                      args); \
+} while (0)
+/** \brief Output a fatal error message and abort. */
+#define XML_FATAL1(msg) do { \
+    fprintf (stderr, "Fatal error in %s line %d, column %d: "msg"\n", \
+             data->name, \
+             (int) XML_GetCurrentLineNumber(data->parser),	\
+             (int) XML_GetCurrentColumnNumber(data->parser)); \
+    abort();\
+} while (0)
+#define XML_FATAL(msg,args...) do { \
+    fprintf (stderr, "Fatal error in %s line %d, column %d: "msg"\n", \
+             data->name, \
+             (int) XML_GetCurrentLineNumber(data->parser),	\
+             (int) XML_GetCurrentColumnNumber(data->parser),		\
+             args); \
+    abort();\
+} while (0)
+
+/** \brief Parser context for __driConfigOptions. */
+struct OptInfoData {
+    const char *name;
+    XML_Parser parser;
+    driOptionCache *cache;
+    GLboolean inDriInfo;
+    GLboolean inSection;
+    GLboolean inDesc;
+    GLboolean inOption;
+    GLboolean inEnum;
+    int curOption;
+};
+
+/** \brief Elements in __driConfigOptions. */
+enum OptInfoElem {
+    OI_DESCRIPTION = 0, OI_DRIINFO, OI_ENUM, OI_OPTION, OI_SECTION, OI_COUNT
+};
+static const XML_Char *OptInfoElems[] = {
+    "description", "driinfo", "enum", "option", "section"
+};
+
+/** \brief Parse attributes of an enum element.
+ *
+ * We're not actually interested in the data. Just make sure this is ok
+ * for external configuration tools.
+ */
+static void parseEnumAttr (struct OptInfoData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *value = NULL, *text = NULL;
+    driOptionValue v;
+    GLuint opt = data->curOption;
+    for (i = 0; attr[i]; i += 2) {
+	if (!strcmp (attr[i], "value")) value = attr[i+1];
+	else if (!strcmp (attr[i], "text")) text = attr[i+1];
+	else XML_FATAL("illegal enum attribute: %s.", attr[i]);
+    }
+    if (!value) XML_FATAL1 ("value attribute missing in enum.");
+    if (!text) XML_FATAL1 ("text attribute missing in enum.");
+     if (!parseValue (&v, data->cache->info[opt].type, value))
+	XML_FATAL ("illegal enum value: %s.", value);
+    if (!checkValue (&v, &data->cache->info[opt]))
+	XML_FATAL ("enum value out of valid range: %s.", value);
+}
+
+/** \brief Parse attributes of a description element.
+ *
+ * We're not actually interested in the data. Just make sure this is ok
+ * for external configuration tools.
+ */
+static void parseDescAttr (struct OptInfoData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *lang = NULL, *text = NULL;
+    for (i = 0; attr[i]; i += 2) {
+	if (!strcmp (attr[i], "lang")) lang = attr[i+1];
+	else if (!strcmp (attr[i], "text")) text = attr[i+1];
+	else XML_FATAL("illegal description attribute: %s.", attr[i]);
+    }
+    if (!lang) XML_FATAL1 ("lang attribute missing in description.");
+    if (!text) XML_FATAL1 ("text attribute missing in description.");
+}
+
+/** \brief Parse attributes of an option element. */
+static void parseOptInfoAttr (struct OptInfoData *data, const XML_Char **attr) {
+    enum OptAttr {OA_DEFAULT = 0, OA_NAME, OA_TYPE, OA_VALID, OA_COUNT};
+    static const XML_Char *optAttr[] = {"default", "name", "type", "valid"};
+    const XML_Char *attrVal[OA_COUNT] = {NULL, NULL, NULL, NULL};
+    const char *defaultVal;
+    driOptionCache *cache = data->cache;
+    GLuint opt, i;
+    for (i = 0; attr[i]; i += 2) {
+	GLuint attrName = bsearchStr (attr[i], optAttr, OA_COUNT);
+	if (attrName >= OA_COUNT)
+	    XML_FATAL ("illegal option attribute: %s", attr[i]);
+	attrVal[attrName] = attr[i+1];
+    }
+    if (!attrVal[OA_NAME]) XML_FATAL1 ("name attribute missing in option.");
+    if (!attrVal[OA_TYPE]) XML_FATAL1 ("type attribute missing in option.");
+    if (!attrVal[OA_DEFAULT]) XML_FATAL1 ("default attribute missing in option.");
+
+    opt = findOption (cache, attrVal[OA_NAME]);
+    if (cache->info[opt].name)
+	XML_FATAL ("option %s redefined.", attrVal[OA_NAME]);
+    data->curOption = opt;
+
+    XSTRDUP (cache->info[opt].name, attrVal[OA_NAME]);
+
+    if (!strcmp (attrVal[OA_TYPE], "bool"))
+	cache->info[opt].type = DRI_BOOL;
+    else if (!strcmp (attrVal[OA_TYPE], "enum"))
+	cache->info[opt].type = DRI_ENUM;
+    else if (!strcmp (attrVal[OA_TYPE], "int"))
+	cache->info[opt].type = DRI_INT;
+    else if (!strcmp (attrVal[OA_TYPE], "float"))
+	cache->info[opt].type = DRI_FLOAT;
+    else
+	XML_FATAL ("illegal type in option: %s.", attrVal[OA_TYPE]);
+
+    defaultVal = getenv (cache->info[opt].name);
+    if (defaultVal != NULL) {
+      /* don't use XML_WARNING, we want the user to see this! */
+	fprintf (stderr,
+		 "ATTENTION: default value of option %s overridden by environment.\n",
+		 cache->info[opt].name);
+    } else
+	defaultVal = attrVal[OA_DEFAULT];
+    if (!parseValue (&cache->values[opt], cache->info[opt].type, defaultVal))
+	XML_FATAL ("illegal default value: %s.", defaultVal);
+
+    if (attrVal[OA_VALID]) {
+	if (cache->info[opt].type == DRI_BOOL)
+	    XML_FATAL1 ("boolean option with valid attribute.");
+	if (!parseRanges (&cache->info[opt], attrVal[OA_VALID]))
+	    XML_FATAL ("illegal valid attribute: %s.", attrVal[OA_VALID]);
+	if (!checkValue (&cache->values[opt], &cache->info[opt]))
+	    XML_FATAL ("default value out of valid range '%s': %s.",
+		       attrVal[OA_VALID], defaultVal);
+    } else if (cache->info[opt].type == DRI_ENUM) {
+	XML_FATAL1 ("valid attribute missing in option (mandatory for enums).");
+    } else {
+	cache->info[opt].nRanges = 0;
+	cache->info[opt].ranges = NULL;
+    }
+}
+
+/** \brief Handler for start element events. */
+static void optInfoStartElem (void *userData, const XML_Char *name,
+			      const XML_Char **attr) {
+    struct OptInfoData *data = (struct OptInfoData *)userData;
+    enum OptInfoElem elem = bsearchStr (name, OptInfoElems, OI_COUNT);
+    switch (elem) {
+      case OI_DRIINFO:
+	if (data->inDriInfo)
+	    XML_FATAL1 ("nested <driinfo> elements.");
+	if (attr[0])
+	    XML_FATAL1 ("attributes specified on <driinfo> element.");
+	data->inDriInfo = GL_TRUE;
+	break;
+      case OI_SECTION:
+	if (!data->inDriInfo)
+	    XML_FATAL1 ("<section> must be inside <driinfo>.");
+	if (data->inSection)
+	    XML_FATAL1 ("nested <section> elements.");
+	if (attr[0])
+	    XML_FATAL1 ("attributes specified on <section> element.");
+	data->inSection = GL_TRUE;
+	break;
+      case OI_DESCRIPTION:
+	if (!data->inSection && !data->inOption)
+	    XML_FATAL1 ("<description> must be inside <description> or <option.");
+	if (data->inDesc)
+	    XML_FATAL1 ("nested <description> elements.");
+	data->inDesc = GL_TRUE;
+	parseDescAttr (data, attr);
+	break;
+      case OI_OPTION:
+	if (!data->inSection)
+	    XML_FATAL1 ("<option> must be inside <section>.");
+	if (data->inDesc)
+	    XML_FATAL1 ("<option> nested in <description> element.");
+	if (data->inOption)
+	    XML_FATAL1 ("nested <option> elements.");
+	data->inOption = GL_TRUE;
+	parseOptInfoAttr (data, attr);
+	break;
+      case OI_ENUM:
+	if (!(data->inOption && data->inDesc))
+	    XML_FATAL1 ("<enum> must be inside <option> and <description>.");
+	if (data->inEnum)
+	    XML_FATAL1 ("nested <enum> elements.");
+	data->inEnum = GL_TRUE;
+	parseEnumAttr (data, attr);
+	break;
+      default:
+	XML_FATAL ("unknown element: %s.", name);
+    }
+}
+
+/** \brief Handler for end element events. */
+static void optInfoEndElem (void *userData, const XML_Char *name) {
+    struct OptInfoData *data = (struct OptInfoData *)userData;
+    enum OptInfoElem elem = bsearchStr (name, OptInfoElems, OI_COUNT);
+    switch (elem) {
+      case OI_DRIINFO:
+	data->inDriInfo = GL_FALSE;
+	break;
+      case OI_SECTION:
+	data->inSection = GL_FALSE;
+	break;
+      case OI_DESCRIPTION:
+	data->inDesc = GL_FALSE;
+	break;
+      case OI_OPTION:
+	data->inOption = GL_FALSE;
+	break;
+      case OI_ENUM:
+	data->inEnum = GL_FALSE;
+	break;
+      default:
+	assert (0); /* should have been caught by StartElem */
+    }
+}
+
+void driParseOptionInfo (driOptionCache *info,
+			 const char *configOptions, GLuint nConfigOptions) {
+    XML_Parser p;
+    int status;
+    struct OptInfoData userData;
+    struct OptInfoData *data = &userData;
+    GLuint realNoptions;
+
+  /* determine hash table size and allocate memory:
+   * 3/2 of the number of options, rounded up, so there remains always
+   * at least one free entry. This is needed for detecting undefined
+   * options in configuration files without getting a hash table overflow.
+   * Round this up to a power of two. */
+    GLuint minSize = (nConfigOptions*3 + 1) / 2;
+    GLuint size, log2size;
+    for (size = 1, log2size = 0; size < minSize; size <<= 1, ++log2size);
+    info->tableSize = log2size;
+    info->info = CALLOC (size * sizeof (driOptionInfo));
+    info->values = CALLOC (size * sizeof (driOptionValue));
+    if (info->info == NULL || info->values == NULL) {
+	fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__);
+	abort();
+    }
+
+    p = XML_ParserCreate ("UTF-8"); /* always UTF-8 */
+    XML_SetElementHandler (p, optInfoStartElem, optInfoEndElem);
+    XML_SetUserData (p, data);
+
+    userData.name = "__driConfigOptions";
+    userData.parser = p;
+    userData.cache = info;
+    userData.inDriInfo = GL_FALSE;
+    userData.inSection = GL_FALSE;
+    userData.inDesc = GL_FALSE;
+    userData.inOption = GL_FALSE;
+    userData.inEnum = GL_FALSE;
+    userData.curOption = -1;
+
+    status = XML_Parse (p, configOptions, strlen (configOptions), 1);
+    if (!status)
+	XML_FATAL ("%s.", XML_ErrorString(XML_GetErrorCode(p)));
+
+    XML_ParserFree (p);
+
+  /* Check if the actual number of options matches nConfigOptions.
+   * A mismatch is not fatal (a hash table overflow would be) but we
+   * want the driver developer's attention anyway. */
+    realNoptions = countOptions (info);
+    if (realNoptions != nConfigOptions) {
+	fprintf (stderr,
+		 "Error: nConfigOptions (%u) does not match the actual number of options in\n"
+		 "       __driConfigOptions (%u).\n",
+		 nConfigOptions, realNoptions);
+    }
+}
+
+/** \brief Parser context for configuration files. */
+struct OptConfData {
+    const char *name;
+    XML_Parser parser;
+    driOptionCache *cache;
+    GLint screenNum;
+    const char *driverName, *execName;
+    GLuint ignoringDevice;
+    GLuint ignoringApp;
+    GLuint inDriConf;
+    GLuint inDevice;
+    GLuint inApp;
+    GLuint inOption;
+};
+
+/** \brief Elements in configuration files. */
+enum OptConfElem {
+    OC_APPLICATION = 0, OC_DEVICE, OC_DRICONF, OC_OPTION, OC_COUNT
+};
+static const XML_Char *OptConfElems[] = {
+    "application", "device", "driconf", "option"
+};
+
+/** \brief Parse attributes of a device element. */
+static void parseDeviceAttr (struct OptConfData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *driver = NULL, *screen = NULL;
+    for (i = 0; attr[i]; i += 2) {
+	if (!strcmp (attr[i], "driver")) driver = attr[i+1];
+	else if (!strcmp (attr[i], "screen")) screen = attr[i+1];
+	else XML_WARNING("unkown device attribute: %s.", attr[i]);
+    }
+    if (driver && strcmp (driver, data->driverName))
+	data->ignoringDevice = data->inDevice;
+    else if (screen) {
+	driOptionValue screenNum;
+	if (!parseValue (&screenNum, DRI_INT, screen))
+	    XML_WARNING("illegal screen number: %s.", screen);
+	else if (screenNum._int != data->screenNum)
+	    data->ignoringDevice = data->inDevice;
+    }
+}
+
+/** \brief Parse attributes of an application element. */
+static void parseAppAttr (struct OptConfData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *name = NULL, *exec = NULL;
+    for (i = 0; attr[i]; i += 2) {
+	if (!strcmp (attr[i], "name")) name = attr[i+1];
+	else if (!strcmp (attr[i], "executable")) exec = attr[i+1];
+	else XML_WARNING("unkown application attribute: %s.", attr[i]);
+    }
+    if (exec && strcmp (exec, data->execName))
+	data->ignoringApp = data->inApp;
+}
+
+/** \brief Parse attributes of an option element. */
+static void parseOptConfAttr (struct OptConfData *data, const XML_Char **attr) {
+    GLuint i;
+    const XML_Char *name = NULL, *value = NULL;
+    for (i = 0; attr[i]; i += 2) {
+	if (!strcmp (attr[i], "name")) name = attr[i+1];
+	else if (!strcmp (attr[i], "value")) value = attr[i+1];
+	else XML_WARNING("unkown option attribute: %s.", attr[i]);
+    }
+    if (!name) XML_WARNING1 ("name attribute missing in option.");
+    if (!value) XML_WARNING1 ("value attribute missing in option.");
+    if (name && value) {
+	driOptionCache *cache = data->cache;
+	GLuint opt = findOption (cache, name);
+	if (cache->info[opt].name == NULL)
+	    XML_WARNING ("undefined option: %s.", name);
+	else if (getenv (cache->info[opt].name))
+	  /* don't use XML_WARNING, we want the user to see this! */
+	    fprintf (stderr, "ATTENTION: option value of option %s ignored.\n",
+		     cache->info[opt].name);
+	else if (!parseValue (&cache->values[opt], cache->info[opt].type, value))
+	    XML_WARNING ("illegal option value: %s.", value);
+    }
+}
+
+/** \brief Handler for start element events. */
+static void optConfStartElem (void *userData, const XML_Char *name,
+			      const XML_Char **attr) {
+    struct OptConfData *data = (struct OptConfData *)userData;
+    enum OptConfElem elem = bsearchStr (name, OptConfElems, OC_COUNT);
+    switch (elem) {
+      case OC_DRICONF:
+	if (data->inDriConf)
+	    XML_WARNING1 ("nested <driconf> elements.");
+	if (attr[0])
+	    XML_WARNING1 ("attributes specified on <driconf> element.");
+	data->inDriConf++;
+	break;
+      case OC_DEVICE:
+	if (!data->inDriConf)
+	    XML_WARNING1 ("<device> should be inside <driconf>.");
+	if (data->inDevice)
+	    XML_WARNING1 ("nested <device> elements.");
+	data->inDevice++;
+	if (!data->ignoringDevice && !data->ignoringApp)
+	    parseDeviceAttr (data, attr);
+	break;
+      case OC_APPLICATION:
+	if (!data->inDevice)
+	    XML_WARNING1 ("<application> should be inside <device>.");
+	if (data->inApp)
+	    XML_WARNING1 ("nested <application> elements.");
+	data->inApp++;
+	if (!data->ignoringDevice && !data->ignoringApp)
+	    parseAppAttr (data, attr);
+	break;
+      case OC_OPTION:
+	if (!data->inApp)
+	    XML_WARNING1 ("<option> should be inside <application>.");
+	if (data->inOption)
+	    XML_WARNING1 ("nested <option> elements.");
+	data->inOption++;
+	if (!data->ignoringDevice && !data->ignoringApp)
+	    parseOptConfAttr (data, attr);
+	break;
+      default:
+	XML_WARNING ("unknown element: %s.", name);
+    }
+}
+
+/** \brief Handler for end element events. */
+static void optConfEndElem (void *userData, const XML_Char *name) {
+    struct OptConfData *data = (struct OptConfData *)userData;
+    enum OptConfElem elem = bsearchStr (name, OptConfElems, OC_COUNT);
+    switch (elem) {
+      case OC_DRICONF:
+	data->inDriConf--;
+	break;
+      case OC_DEVICE:
+	if (data->inDevice-- == data->ignoringDevice)
+	    data->ignoringDevice = 0;
+	break;
+      case OC_APPLICATION:
+	if (data->inApp-- == data->ignoringApp)
+	    data->ignoringApp = 0;
+	break;
+      case OC_OPTION:
+	data->inOption--;
+	break;
+      default:
+	/* unknown element, warning was produced on start tag */;
+    }
+}
+
+/** \brief Initialize an option cache based on info */
+static void initOptionCache (driOptionCache *cache, const driOptionCache *info) {
+    cache->info = info->info;
+    cache->tableSize = info->tableSize;
+    cache->values = MALLOC ((1<<info->tableSize) * sizeof (driOptionValue));
+    if (cache->values == NULL) {
+	fprintf (stderr, "%s: %d: out of memory.\n", __FILE__, __LINE__);
+	abort();
+    }
+    memcpy (cache->values, info->values,
+	    (1<<info->tableSize) * sizeof (driOptionValue));
+}
+
+/** \brief Parse the named configuration file */
+static void parseOneConfigFile (XML_Parser p) {
+#define BUF_SIZE 0x1000
+    struct OptConfData *data = (struct OptConfData *)XML_GetUserData (p);
+    int status;
+    int fd;
+
+    if ((fd = open (data->name, O_RDONLY)) == -1) {
+	__driUtilMessage ("Can't open configuration file %s: %s.",
+			  data->name, strerror (errno));
+	return;
+    }
+
+    while (1) {
+	int bytesRead;
+	void *buffer = XML_GetBuffer (p, BUF_SIZE);
+	if (!buffer) {
+	    __driUtilMessage ("Can't allocate parser buffer.");
+	    break;
+	}
+	bytesRead = read (fd, buffer, BUF_SIZE);
+	if (bytesRead == -1) {
+	    __driUtilMessage ("Error reading from configuration file %s: %s.",
+			      data->name, strerror (errno));
+	    break;
+	}
+	status = XML_ParseBuffer (p, bytesRead, bytesRead == 0);
+	if (!status) {
+	    XML_ERROR ("%s.", XML_ErrorString(XML_GetErrorCode(p)));
+	    break;
+	}
+	if (bytesRead == 0)
+	    break;
+    }
+
+    close (fd);
+#undef BUF_SIZE
+}
+
+void driParseConfigFiles (driOptionCache *cache, const driOptionCache *info,
+			  GLint screenNum, const char *driverName) {
+    char *filenames[2] = {"/etc/drirc", NULL};
+    char *home;
+    GLuint i;
+    struct OptConfData userData;
+
+    initOptionCache (cache, info);
+
+    userData.cache = cache;
+    userData.screenNum = screenNum;
+    userData.driverName = driverName;
+    userData.execName = GET_PROGRAM_NAME();
+
+    if ((home = getenv ("HOME"))) {
+	GLuint len = strlen (home);
+	filenames[1] = MALLOC (len + 7+1);
+	if (filenames[1] == NULL)
+	    __driUtilMessage ("Can't allocate memory for %s/.drirc.", home);
+	else {
+	    memcpy (filenames[1], home, len);
+	    memcpy (filenames[1] + len, "/.drirc", 7+1);
+	}
+    }
+
+    for (i = 0; i < 2; ++i) {
+	XML_Parser p;
+	if (filenames[i] == NULL)
+	    continue;
+
+	p = XML_ParserCreate (NULL); /* use encoding specified by file */
+	XML_SetElementHandler (p, optConfStartElem, optConfEndElem);
+	XML_SetUserData (p, &userData);
+	userData.parser = p;
+	userData.name = filenames[i];
+	userData.ignoringDevice = 0;
+	userData.ignoringApp = 0;
+	userData.inDriConf = 0;
+	userData.inDevice = 0;
+	userData.inApp = 0;
+	userData.inOption = 0;
+
+	parseOneConfigFile (p);
+	XML_ParserFree (p);
+    }
+
+    if (filenames[1])
+	FREE (filenames[1]);
+}
+
+void driDestroyOptionInfo (driOptionCache *info) {
+    driDestroyOptionCache (info);
+    if (info->info) {
+	GLuint i, size = 1 << info->tableSize;
+	for (i = 0; i < size; ++i) {
+	    if (info->info[i].name) {
+		FREE (info->info[i].name);
+		if (info->info[i].ranges)
+		    FREE (info->info[i].ranges);
+	    }
+	}
+	FREE (info->info);
+    }
+}
+
+void driDestroyOptionCache (driOptionCache *cache) {
+    if (cache->values)
+	FREE (cache->values);
+}
+
+GLboolean driCheckOption (const driOptionCache *cache, const char *name,
+			  driOptionType type) {
+    GLuint i = findOption (cache, name);
+    return cache->info[i].name != NULL && cache->info[i].type == type;
+}
+
+GLboolean driQueryOptionb (const driOptionCache *cache, const char *name) {
+    GLuint i = findOption (cache, name);
+  /* make sure the option is defined and has the correct type */
+    assert (cache->info[i].name != NULL);
+    assert (cache->info[i].type == DRI_BOOL);
+    return cache->values[i]._bool;
+}
+
+GLint driQueryOptioni (const driOptionCache *cache, const char *name) {
+    GLuint i = findOption (cache, name);
+  /* make sure the option is defined and has the correct type */
+    assert (cache->info[i].name != NULL);
+    assert (cache->info[i].type == DRI_INT || cache->info[i].type == DRI_ENUM);
+    return cache->values[i]._int;
+}
+
+GLfloat driQueryOptionf (const driOptionCache *cache, const char *name) {
+    GLuint i = findOption (cache, name);
+  /* make sure the option is defined and has the correct type */
+    assert (cache->info[i].name != NULL);
+    assert (cache->info[i].type == DRI_FLOAT);
+    return cache->values[i]._float;
+}
diff --git a/src/mesa/drivers/dri/common/xmlconfig.h b/src/mesa/drivers/dri/common/xmlconfig.h
new file mode 100644
index 0000000000..c363af764f
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlconfig.h
@@ -0,0 +1,124 @@
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ */
+/**
+ * \file xmlconfig.h
+ * \brief Driver-independent client-side part of the XML configuration
+ * \author Felix Kuehling
+ */
+
+#ifndef __XMLCONFIG_H
+#define __XMLCONFIG_H
+
+/** \brief Option data types */
+typedef enum driOptionType {
+    DRI_BOOL, DRI_ENUM, DRI_INT, DRI_FLOAT
+} driOptionType;
+
+/** \brief Option value */
+typedef union driOptionValue {
+    GLboolean _bool; /**< \brief Boolean */
+    GLint _int;      /**< \brief Integer or Enum */
+    GLfloat _float;  /**< \brief Floating-point */
+} driOptionValue;
+
+/** \brief Single range of valid values
+ *
+ * For empty ranges (a single value) start == end */
+typedef struct driOptionRange {
+    driOptionValue start; /**< \brief Start */
+    driOptionValue end;   /**< \brief End */
+} driOptionRange;
+
+/** \brief Information about an option */
+typedef struct driOptionInfo {
+    char *name;             /**< \brief Name */
+    driOptionType type;     /**< \brief Type */
+    driOptionRange *ranges; /**< \brief Array of ranges */
+    GLuint nRanges;         /**< \brief Number of ranges */
+} driOptionInfo;
+
+/** \brief Option cache
+ *
+ * \li One in <driver>Screen caching option info and the default values
+ * \li One in each <driver>Context with the actual values for that context */
+typedef struct driOptionCache {
+    driOptionInfo *info;
+  /**< \brief Array of option infos
+   *
+   * Points to the same array in the screen and all contexts */
+    driOptionValue *values;	
+  /**< \brief Array of option values
+   *
+   * \li Default values in screen
+   * \li Actual values in contexts 
+   */
+    GLuint tableSize;
+  /**< \brief Size of the arrays
+   *
+   * Depending on the hash function this may differ from __driNConfigOptions.
+   * In the current implementation it's not actually a size but log2(size).
+   * The value is the same in the screen and all contexts. */
+} driOptionCache;
+
+/** \brief Parse XML option info from configOptions
+ *
+ * To be called in <driver>CreateScreen 
+ *
+ * \param info    pointer to a driOptionCache that will store the option info
+ * \param configOptions   XML document describing available configuration opts
+ * \param nConfigOptions  number of options, used to choose a hash table size
+ *
+ * For the option information to be available to external configuration tools
+ * it must be a public symbol __driConfigOptions. It is also passed as a
+ * parameter to driParseOptionInfo in order to avoid driver-independent code
+ * depending on symbols in driver-specific code. */
+void driParseOptionInfo (driOptionCache *info,
+			 const char *configOptions, GLuint nConfigOptions);
+/** \brief Initialize option cache from info and parse configuration files
+ *
+ * To be called in <driver>CreateContext. screenNum and driverName select
+ * device sections. */
+void driParseConfigFiles (driOptionCache *cache, const driOptionCache *info,
+			  GLint screenNum, const char *driverName);
+/** \brief Destroy option info
+ *
+ * To be called in <driver>DestroyScreen */
+void driDestroyOptionInfo (driOptionCache *info);
+/** \brief Destroy option cache
+ *
+ * To be called in <driver>DestroyContext */
+void driDestroyOptionCache (driOptionCache *cache);
+
+/** \brief Check if there exists a certain option */
+GLboolean driCheckOption (const driOptionCache *cache, const char *name,
+			  driOptionType type);
+
+/** \brief Query a boolean option value */
+GLboolean driQueryOptionb (const driOptionCache *cache, const char *name);
+/** \brief Query an integer option value */
+GLint driQueryOptioni (const driOptionCache *cache, const char *name);
+/** \brief Query a floating-point option value */
+GLfloat driQueryOptionf (const driOptionCache *cache, const char *name);
+
+#endif
diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h
new file mode 100644
index 0000000000..587517ea10
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool.h
@@ -0,0 +1,98 @@
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ */
+/**
+ * \file xmlpool.h
+ * \brief Pool of common options
+ * \author Felix Kuehling
+ *
+ * This file defines macros that can be used to construct
+ * driConfigOptions in the drivers. Common options are defined in
+ * xmlpool/t_options.h from which xmlpool/options.h is generated with
+ * translations. This file defines generic helper macros and includes
+ * xmlpool/options.h.
+ */
+
+#ifndef __XMLPOOL_H
+#define __XMLPOOL_H
+
+/*
+ * generic macros
+ */
+
+/** \brief Begin __driConfigOptions */
+#define DRI_CONF_BEGIN \
+"<driinfo>\n"
+
+/** \brief End __driConfigOptions */
+#define DRI_CONF_END \
+"</driinfo>\n"
+
+/** \brief Begin a section of related options */
+#define DRI_CONF_SECTION_BEGIN \
+"<section>\n"
+
+/** \brief End a section of related options */
+#define DRI_CONF_SECTION_END \
+"</section>\n"
+
+/** \brief Begin an option definition */
+#define DRI_CONF_OPT_BEGIN(name,type,def) \
+"<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n"
+
+/** \brief Begin an option definition with qouted default value */
+#define DRI_CONF_OPT_BEGIN_Q(name,type,def) \
+"<option name=\""#name"\" type=\""#type"\" default="#def">\n"
+
+/** \brief Begin an option definition with restrictions on valid values */
+#define DRI_CONF_OPT_BEGIN_V(name,type,def,valid) \
+"<option name=\""#name"\" type=\""#type"\" default=\""#def"\" valid=\""valid"\">\n"
+
+/** \brief End an option description */
+#define DRI_CONF_OPT_END \
+"</option>\n"
+
+/** \brief A verbal description in a specified language (empty version) */
+#define DRI_CONF_DESC(lang,text) \
+"<description lang=\""#lang"\" text=\""text"\"/>\n"
+
+/** \brief A verbal description in a specified language */
+#define DRI_CONF_DESC_BEGIN(lang,text) \
+"<description lang=\""#lang"\" text=\""text"\">\n"
+
+/** \brief End a description */
+#define DRI_CONF_DESC_END \
+"</description>\n"
+
+/** \brief A verbal description of an enum value */
+#define DRI_CONF_ENUM(value,text) \
+"<enum value=\""#value"\" text=\""text"\"/>\n"
+
+
+/*
+ * Predefined option sections and options with multi-lingual descriptions
+ * are now automatically generated.
+ */
+#include "xmlpool/options.h"
+
+#endif
diff --git a/src/mesa/drivers/dri/common/xmlpool/.gitignore b/src/mesa/drivers/dri/common/xmlpool/.gitignore
new file mode 100644
index 0000000000..a5a437849b
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/.gitignore
@@ -0,0 +1,5 @@
+de
+es
+fr
+nl
+sv
diff --git a/src/mesa/drivers/dri/common/xmlpool/Makefile b/src/mesa/drivers/dri/common/xmlpool/Makefile
new file mode 100644
index 0000000000..62ec919ea6
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/Makefile
@@ -0,0 +1,96 @@
+# Convenient makefile for managing translations.
+
+# Prerequisites:
+# - GNU gettext
+# - Python
+
+# Adding new translations
+# -----------------------
+
+# To start working on a new translation edit the POS=... line
+# below. If you want to add for example a french translation, add
+# fr.po.
+
+# Then run "make po" to generate a fresh .po file from translatable
+# strings in t_options.h. Now you can edit the new .po file (fr.po in
+# the example above) to translate the strings. Please make sure that
+# your editor encodes the file in UTF-8.
+
+# Updating existing translations
+# ------------------------------
+
+# Run "make po" to update .po files with new translatable strings from
+# t_options.h. Now you can edit the .po files you're interested
+# in. Please make sure that your editor encodes the file in UTF-8.
+
+# Updating options.h
+# ------------------
+
+# Finally run "make" to generate options.h from t_options.h with all
+# translations. Now you can rebuild the drivers. Any common options
+# used by the drivers will have option descriptions with the latest
+# translations.
+
+# Publishing translations
+# -----------------------
+
+# To get your translation(s) into Mesa CVS, please send me your
+# <lang>.po file.
+
+# More information:
+# - info gettext
+
+# The set of supported languages. Add languages as needed.
+POS=de.po es.po nl.po fr.po sv.po
+
+#
+# Don't change anything below, unless you know what you're doing.
+#
+LANGS=$(POS:%.po=%)
+MOS=$(POS:%.po=%/LC_MESSAGES/options.mo)
+POT=xmlpool.pot
+
+.PHONY: all clean pot po mo
+
+all: options.h
+
+# Only intermediate files are cleaned up. options.h is not deleted because
+# it's in CVS. 
+clean:
+	-rm -f $(POT) *~
+	-rm -rf $(LANGS)
+
+# Default target options.h
+options.h: t_options.h mo
+	python gen_xmlpool.py $(LANGS) > options.h
+
+# Update .mo files from the corresponding .po files.
+mo:
+	@for mo in $(MOS); do \
+		lang=$${mo%%/*}; \
+		echo "Updating $$mo from $$lang.po."; \
+		mkdir -p $${mo%/*}; \
+		msgfmt -o $$mo $$lang.po; \
+	done
+
+# Use this target to create or update .po files with new messages in
+# driconf.py.
+po: $(POS)
+
+pot: $(POT)
+
+# Extract message catalog from driconf.py.
+$(POT): t_options.h
+	xgettext -L C --from-code utf-8 -o $(POT) t_options.h
+
+# Create or update a .po file for a specific language.
+%.po: $(POT)
+	@if [ -f $@ ]; then \
+		echo "Merging new strings from $(POT) into $@."; \
+		mv $@ $@~; \
+		msgmerge -o $@ $@~ $(POT); \
+	else \
+		echo "Initializing $@ from $(POT)."; \
+		msginit -i $(POT) -o $@~ --locale=$*; \
+		sed -e 's/charset=.*\\n/charset=UTF-8\\n/' $@~ > $@; \
+	fi
diff --git a/src/mesa/drivers/dri/common/xmlpool/de.po b/src/mesa/drivers/dri/common/xmlpool/de.po
new file mode 100644
index 0000000000..4ea82f9010
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/de.po
@@ -0,0 +1,240 @@
+# German translations for DRI driver options.
+# Copyright (C) 2005 Felix Kuehling
+# This file is distributed under the same license as the Mesa package.
+# Felix Kuehling <fxkuehl@gmx.de>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Mesa 6.3\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-11 23:19+0200\n"
+"PO-Revision-Date: 2005-04-11 01:34+0200\n"
+"Last-Translator: Felix Kuehling <fxkuehl@gmx.de>\n"
+"Language-Team: German <de@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Fehlersuche"
+
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "3D-Beschleunigung abschalten"
+
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Zeige Performanceboxen"
+
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Bildqualität"
+
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Texturfarbtiefe"
+
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Bevorzuge Farbtiefe des Framebuffers"
+
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Bevorzuge 32 bits pro Texel"
+
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Bevorzuge 16 bits pro Texel"
+
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Erzwinge 16 bits pro Texel"
+
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Initialer Maximalwert für anisotropische Texturfilterung"
+
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Verbiete negative Textur-Detailgradverschiebung"
+
+#: t_options.h:97
+msgid ""
+"Enable S3TC texture compression even if software support is not available"
+msgstr ""
+"Aktiviere S3TC Texturkomprimierung auch wenn die nötige "
+"Softwareunterstützung fehlt"
+
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Initiale Farbreduktionsmethode"
+
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Farben runden"
+
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Farben rastern"
+
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Farbrundungsmethode"
+
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Farbkomponenten abrunden"
+
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Zur ähnlichsten Farbe runden"
+
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Farbrasterungsmethode"
+
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Horizontale Fehlerstreuung"
+
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Horizontale Fehlerstreuung, Fehler am Zeilenanfang zurücksetzen"
+
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Geordnete 2D Farbrasterung"
+
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Fließkomma z-Puffer"
+
+#: t_options.h:140
+msgid "Performance"
+msgstr "Leistung"
+
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "TCL-Modus (Transformation, Clipping, Licht)"
+
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Benutze die Software-TCL-Pipeline"
+
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Benutze Hardware TCL als erste Stufe der TCL-Pipeline"
+
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Umgehe die TCL-Pipeline"
+
+#: t_options.h:152
+msgid ""
+"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr ""
+"Umgehe die TCL-Pipeline mit zur Laufzeit erzeugtem, zustandsbasiertem "
+"Maschinencode"
+
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Methode zur Begrenzung der Bildverzögerung"
+
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Aktives Warten auf die Grafikhardware"
+
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Kurze Schlafintervalle beim Warten auf die Grafikhardware"
+
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr ""
+"Die Grafikhardware eine Softwareunterbrechnung erzeugen lassen und schlafen"
+
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synchronisation mit der vertikalen Bildwiederholung"
+
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr ""
+"Niemals mit der Bildwiederholung synchronisieren, Anweisungen der Anwendung "
+"ignorieren"
+
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Initiales Bildinterval 0, Anweisungen der Anwendung gehorchen"
+
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Initiales Bildinterval 1, Anweisungen der Anwendung gehorchen"
+
+#: t_options.h:178
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr ""
+"Immer mit der Bildwiederholung synchronisieren, Anwendung wählt das minimale "
+"Bildintervall"
+
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "HyperZ zur Leistungssteigerung verwenden"
+
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Anzahl der benutzten Textureinheiten"
+
+#: t_options.h:196
+msgid "Support larger textures not guaranteed to fit into graphics memory"
+msgstr "Unterstütze grosse Texturen die evtl. nicht in den Grafikspeicher passen"
+
+#: t_options.h:197
+msgid "No"
+msgstr "Nein"
+
+#: t_options.h:198
+msgid "At least 1 texture must fit under worst-case assumptions"
+msgstr "Mindestens 1 Textur muss auch im schlechtesten Fall Platz haben"
+
+#: t_options.h:199
+msgid "Announce hardware limits"
+msgstr "Benutze Hardware-Limits"
+
+#: t_options.h:205
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr ""
+"Texturfilterqualität versus -geschwindigkeit, auch bekannt als „brilineare“ "
+"Texturfilterung"
+
+#: t_options.h:213
+msgid "Used types of texture memory"
+msgstr "Benutzte Arten von Texturspeicher"
+
+#: t_options.h:214
+msgid "All available memory"
+msgstr "Aller verfügbarer Speicher"
+
+#: t_options.h:215
+msgid "Only card memory (if available)"
+msgstr "Nur Grafikspeicher (falls verfügbar)"
+
+#: t_options.h:216
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Nur GART-Speicher (AGP/PCIE) (falls verfügbar)"
+
+#: t_options.h:224
+msgid "Features that are not hardware-accelerated"
+msgstr "Funktionalität, die nicht hardwarebeschleunigt ist"
+
+#: t_options.h:228
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Erweiterung GL_ARB_vertex_program aktivieren"
+
+#: t_options.h:233
+msgid "Enable extension GL_NV_vertex_program"
+msgstr "Erweiterung GL_NV_vertex_program aktivieren"
diff --git a/src/mesa/drivers/dri/common/xmlpool/es.po b/src/mesa/drivers/dri/common/xmlpool/es.po
new file mode 100644
index 0000000000..82ad177cb2
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/es.po
@@ -0,0 +1,219 @@
+# translation of es.po to Spanish
+# Spanish translations for PACKAGE package.
+# Copyright (C) 2005 THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# David <deifo@ono.com>, 2005.
+# David Rubio Miguélez <deifo@ono.com>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: es\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-12 12:18+0200\n"
+"PO-Revision-Date: 2005-04-12 20:26+0200\n"
+"Last-Translator: David Rubio Miguélez <deifo@ono.com>\n"
+"Language-Team: Spanish <es@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+"X-Generator: KBabel 1.10\n"
+
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Depurando"
+
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "Desactivar aceleración 3D"
+
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Mostrar cajas de rendimiento"
+
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Calidad de imagen"
+
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Profundidad de color de textura"
+
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Preferir profundidad de color del \"framebuffer\""
+
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Preferir 32 bits por texel"
+
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Preferir 16 bits por texel"
+
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Forzar a 16 bits por texel"
+
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Valor máximo inicial para filtrado anisotrópico de textura"
+
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Prohibir valores negativos de Nivel De Detalle (LOD) de texturas"
+
+#: t_options.h:97
+msgid "Enable S3TC texture compression even if software support is not available"
+msgstr "Activar la compresión de texturas S3TC incluso si el soporte por software no está disponible"
+
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Método inicial de reducción de color"
+
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Colores redondeados"
+
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Colores suavizados"
+
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Método de redondeo de colores"
+
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Redondear hacia abajo los componentes de color"
+
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Redondear al color más cercano"
+
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Método de suavizado de color"
+
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Difusión de error horizontal"
+
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Difusión de error horizontal, reiniciar error al comienzo de línea"
+
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Suavizado de color 2D ordenado"
+
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Búfer de profundidad en coma flotante"
+
+#: t_options.h:140
+msgid "Performance"
+msgstr "Rendimiento"
+
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "Modo TCL (Transformación, Recorte, Iluminación)"
+
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Usar tubería TCL por software"
+
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Usar TCL por hardware en la primera fase de la tubería TCL"
+
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Pasar por alto la tubería TCL"
+
+#: t_options.h:152
+msgid "Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr "Pasar por alto la tubería TCL con código máquina basado en estados generado al vuelo"
+
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Método para limitar la latencia de rénder"
+
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Esperar activamente al hardware gráfico"
+
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Dormir en intervalos cortos mientras se espera al hardware gráfico"
+
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Permitir que el hardware gráfico emita una interrupción de software y duerma"
+
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Sincronización con el refresco vertical (intervalos de intercambio)"
+
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "No sincronizar nunca con el refresco vertical, ignorar la elección de la aplicación"
+
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Intervalo de intercambio inicial 0, obedecer la elección de la aplicación"
+
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Intervalo de intercambio inicial 1, obedecer la elección de la aplicación"
+
+#: t_options.h:178
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr "Sincronizar siempre con el refresco vertical, la aplicación elige el intervalo de intercambio mínimo"
+
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Usar HyperZ para potenciar rendimiento"
+
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Número de unidades de textura usadas"
+
+#: t_options.h:196
+msgid "Enable hack to allow larger textures with texture compression on radeon/r200"
+msgstr "Activar \"hack\" para permitir texturas más grandes con compresión de textura activada en la Radeon/r200"
+
+#: t_options.h:201
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr "Calidad de filtrado de textura vs. velocidad, alias filtrado \"brilinear\" de textura"
+
+#: t_options.h:209
+msgid "Used types of texture memory"
+msgstr "Tipos de memoria de textura usados"
+
+#: t_options.h:210
+msgid "All available memory"
+msgstr "Toda la memoria disponible"
+
+#: t_options.h:211
+msgid "Only card memory (if available)"
+msgstr "Sólo la memoria de la tarjeta (si disponible)"
+
+#: t_options.h:212
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Sólo memoria GART (AGP/PCIE) (si disponible)"
+
+#: t_options.h:220
+msgid "Features that are not hardware-accelerated"
+msgstr "Características no aceleradas por hardware"
+
+#: t_options.h:224
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Activar la extensión GL_ARB_vertex_program"
+
+#: t_options.h:229
+msgid "Enable extension GL_NV_vertex_program"
+msgstr "Activar extensión GL_NV_vertex_program"
+
diff --git a/src/mesa/drivers/dri/common/xmlpool/fr.po b/src/mesa/drivers/dri/common/xmlpool/fr.po
new file mode 100644
index 0000000000..19f3b4a4ed
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/fr.po
@@ -0,0 +1,225 @@
+# French translations for DRI driver options.
+# Copyright (C) 2005 Stephane Marchesin
+# This file is distributed under the same license as the Mesa package.
+# Stephane Marchesin <marchesin@icps.u-strasbg.fr>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Mesa 6.3\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-11 23:19+0200\n"
+"PO-Revision-Date: 2005-04-11 01:34+0200\n"
+"Last-Translator: Stephane Marchesin <marchesin@icps.u-strasbg.fr>\n"
+"Language-Team: French <fr@li.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Debogage"
+
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "Désactiver l'accélération 3D"
+
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Afficher les boîtes de performance"
+
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Qualité d'image"
+
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Profondeur de texture"
+
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Profondeur de couleur"
+
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Préférer 32 bits par texel"
+
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Prérérer 16 bits par texel"
+
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Forcer 16 bits par texel"
+
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Valeur maximale initiale pour le filtrage anisotropique de texture"
+
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Interdire le LOD bias negatif"
+
+#: t_options.h:97
+msgid ""
+"Enable S3TC texture compression even if software support is not available"
+msgstr ""
+"Activer la compression de texture S3TC même si le support logiciel est absent"
+
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Technique de réduction de couleurs"
+
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Arrondir les valeurs de couleur"
+
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Tramer les couleurs"
+
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Méthode d'arrondi des couleurs"
+
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Arrondi à l'inférieur"
+
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Arrondi au plus proche"
+
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Méthode de tramage"
+
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Diffusion d'erreur horizontale"
+
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Diffusion d'erreur horizontale, réinitialisé pour chaque ligne"
+
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Tramage ordonné des couleurs"
+
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Z-buffer en virgule flottante"
+
+#: t_options.h:140
+msgid "Performance"
+msgstr "Performance"
+
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "Mode de TCL (Transformation, Clipping, Eclairage)"
+
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Utiliser un pipeline TCL logiciel"
+
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Utiliser le TCL matériel pour le premier niveau de pipeline"
+
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Court-circuiter le pipeline TCL"
+
+#: t_options.h:152
+msgid ""
+"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr ""
+"Court-circuiter le pipeline TCL par une machine à états qui génère le code"
+"de TCL à la volée"
+
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Méthode d'attente de la carte graphique"
+
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Attente active de la carte graphique"
+
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Attente utilisant usleep()"
+
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Utiliser les interruptions"
+
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synchronisation de l'affichage avec le balayage vertical"
+
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "Ne jamais synchroniser avec le balayage vertical, ignorer le choix de l'application"
+
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Ne pas synchroniser avec le balayage vertical par défaut, mais obéir au choix de l'application"
+
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Synchroniser avec le balayage vertical par défaut, mais obéir au choix de l'application"
+
+#: t_options.h:178
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr ""
+"Toujours synchroniser avec le balayage vertical, l'application choisit l'intervalle minimal"
+
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Utiliser le HyperZ pour améliorer les performances"
+
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Nombre d'unités de texture"
+
+#: t_options.h:196
+msgid ""
+"Enable hack to allow larger textures with texture compression on radeon/r200"
+msgstr ""
+"Activer le hack permettant l'utilisation de textures de grande taille avec la "
+"compression de textures sur radeon/r200"
+
+#: t_options.h:201
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr ""
+"Qualité/performance du filtrage trilinéaire de texture (filtrage brilinéaire)"
+
+#: t_options.h:209
+msgid "Used types of texture memory"
+msgstr "Types de mémoire de texture"
+
+#: t_options.h:210
+msgid "All available memory"
+msgstr "Utiliser toute la mémoire disponible"
+
+#: t_options.h:211
+msgid "Only card memory (if available)"
+msgstr "Utiliser uniquement la mémoire graphique (si disponible)"
+
+#: t_options.h:212
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Utiliser uniquement la mémoire GART (AGP/PCIE) (si disponible)"
+
+#: t_options.h:220
+msgid "Features that are not hardware-accelerated"
+msgstr "Fonctionnalités ne bénéficiant pas d'une accélération matérielle"
+
+#: t_options.h:224
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Activer l'extension GL_ARB_vertex_program"
+
+#: t_options.h:229
+msgid "Enable extension GL_NV_vertex_program"
+msgstr "Activer l'extension GL_NV_vertex_program"
diff --git a/src/mesa/drivers/dri/common/xmlpool/gen_xmlpool.py b/src/mesa/drivers/dri/common/xmlpool/gen_xmlpool.py
new file mode 100644
index 0000000000..7398c4cd0b
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/gen_xmlpool.py
@@ -0,0 +1,191 @@
+#!/usr/bin/python
+
+import sys
+import gettext
+import re
+
+# List of supported languages
+languages = sys.argv[1:]
+
+# Escape special characters in C strings
+def escapeCString (s):
+    escapeSeqs = {'\a' : '\\a', '\b' : '\\b', '\f' : '\\f', '\n' : '\\n',
+                  '\r' : '\\r', '\t' : '\\t', '\v' : '\\v', '\\' : '\\\\'}
+    # " -> '' is a hack. Quotes (") aren't possible in XML attributes.
+    # Better use Unicode characters for typographic quotes in option
+    # descriptions and translations.
+    i = 0
+    r = ''
+    while i < len(s):
+        # Special case: escape double quote with \u201c or \u201d, depending
+        # on whether it's an open or close quote. This is needed because plain
+        # double quotes are not possible in XML attributes.
+        if s[i] == '"':
+            if i == len(s)-1 or s[i+1].isspace():
+                # close quote
+                q = u'\u201c'
+            else:
+                # open quote
+                q = u'\u201d'
+            r = r + q
+        elif escapeSeqs.has_key(s[i]):
+            r = r + escapeSeqs[s[i]]
+        else:
+            r = r + s[i]
+        i = i + 1
+    return r
+
+# Expand escape sequences in C strings (needed for gettext lookup)
+def expandCString (s):
+    escapeSeqs = {'a' : '\a', 'b' : '\b', 'f' : '\f', 'n' : '\n',
+                  'r' : '\r', 't' : '\t', 'v' : '\v',
+                  '"' : '"', '\\' : '\\'}
+    i = 0
+    escape = False
+    hexa = False
+    octa = False
+    num = 0
+    digits = 0
+    r = ''
+    while i < len(s):
+        if not escape:
+            if s[i] == '\\':
+                escape = True
+            else:
+                r = r + s[i]
+        elif hexa:
+            if (s[i] >= '0' and s[i] <= '9') or \
+               (s[i] >= 'a' and s[i] <= 'f') or \
+               (s[i] >= 'A' and s[i] <= 'F'):
+                num = num * 16 + int(s[i],16)
+                digits = digits + 1
+            else:
+                digits = 2
+            if digits >= 2:
+                hexa = False
+                escape = False
+                r = r + chr(num)
+        elif octa:
+            if s[i] >= '0' and s[i] <= '7':
+                num = num * 8 + int(s[i],8)
+                digits = digits + 1
+            else:
+                digits = 3
+            if digits >= 3:
+                octa = False
+                escape = False
+                r = r + chr(num)
+        else:
+            if escapeSeqs.has_key(s[i]):
+                r = r + escapeSeqs[s[i]]
+                escape = False
+            elif s[i] >= '0' and s[i] <= '7':
+                octa = True
+                num = int(s[i],8)
+                if num <= 3:
+                    digits = 1
+                else:
+                    digits = 2
+            elif s[i] == 'x' or s[i] == 'X':
+                hexa = True
+                num = 0
+                digits = 0
+            else:
+                r = r + s[i]
+                escape = False
+        i = i + 1
+    return r
+
+# Expand matches. The first match is always a DESC or DESC_BEGIN match.
+# Subsequent matches are ENUM matches.
+#
+# DESC, DESC_BEGIN format: \1 \2=<lang> \3 \4=gettext(" \5=<text> \6=") \7
+# ENUM format:             \1 \2=gettext(" \3=<text> \4=") \5
+def expandMatches (matches, translations, end=None):
+    assert len(matches) > 0
+    nTranslations = len(translations)
+    i = 0
+    # Expand the description+enums for all translations
+    for lang,trans in translations:
+        i = i + 1
+        # Make sure that all but the last line of a simple description
+        # are extended with a backslash.
+        suffix = ''
+        if len(matches) == 1 and i < len(translations) and \
+               not matches[0].expand (r'\7').endswith('\\'):
+            suffix = ' \\'
+        # Expand the description line. Need to use ugettext in order to allow
+        # non-ascii unicode chars in the original English descriptions.
+        text = escapeCString (trans.ugettext (unicode (expandCString (
+            matches[0].expand (r'\5')), "utf-8"))).encode("utf-8")
+        print matches[0].expand (r'\1' + lang + r'\3"' + text + r'"\7') + suffix
+        # Expand any subsequent enum lines
+        for match in matches[1:]:
+            text = escapeCString (trans.ugettext (unicode (expandCString (
+                match.expand (r'\3')), "utf-8"))).encode("utf-8")
+            print match.expand (r'\1"' + text + r'"\5')
+
+        # Expand description end
+        if end:
+            print end,
+
+# Compile a list of translation classes to all supported languages.
+# The first translation is always a NullTranslations.
+translations = [("en", gettext.NullTranslations())]
+for lang in languages:
+    try:
+        trans = gettext.translation ("options", ".", [lang])
+    except IOError:
+        sys.stderr.write ("Warning: language '%s' not found.\n" % lang)
+        continue
+    translations.append ((lang, trans))
+
+# Regular expressions:
+reLibintl_h  = re.compile (r'#\s*include\s*<libintl.h>')
+reDESC       = re.compile (r'(\s*DRI_CONF_DESC\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
+reDESC_BEGIN = re.compile (r'(\s*DRI_CONF_DESC_BEGIN\s*\(\s*)([a-z]+)(\s*,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
+reENUM       = re.compile (r'(\s*DRI_CONF_ENUM\s*\([^,]+,\s*)(gettext\s*\(\s*")(.*)("\s*\))(\s*\)[ \t]*\\?)$')
+reDESC_END   = re.compile (r'\s*DRI_CONF_DESC_END')
+
+# Print a header
+print \
+"/***********************************************************************\n" \
+" ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***\n" \
+" ***********************************************************************/"
+
+# Process the options template and generate options.h with all
+# translations.
+template = file ("t_options.h", "r")
+descMatches = []
+for line in template:
+    if len(descMatches) > 0:
+        matchENUM     = reENUM    .match (line)
+        matchDESC_END = reDESC_END.match (line)
+        if matchENUM:
+            descMatches.append (matchENUM)
+        elif matchDESC_END:
+            expandMatches (descMatches, translations, line)
+            descMatches = []
+        else:
+            sys.stderr.write (
+                "Warning: unexpected line inside description dropped:\n%s\n" \
+                % line)
+        continue
+    if reLibintl_h.search (line):
+        # Ignore (comment out) #include <libintl.h>
+        print "/* %s * commented out by gen_xmlpool.py */" % line
+        continue
+    matchDESC       = reDESC      .match (line)
+    matchDESC_BEGIN = reDESC_BEGIN.match (line)
+    if matchDESC:
+        assert len(descMatches) == 0
+        expandMatches ([matchDESC], translations)
+    elif matchDESC_BEGIN:
+        assert len(descMatches) == 0
+        descMatches = [matchDESC_BEGIN]
+    else:
+        print line,
+
+if len(descMatches) > 0:
+    sys.stderr.write ("Warning: unterminated description at end of file.\n")
+    expandMatches (descMatches, translations)
diff --git a/src/mesa/drivers/dri/common/xmlpool/nl.po b/src/mesa/drivers/dri/common/xmlpool/nl.po
new file mode 100644
index 0000000000..1e4cf167bf
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/nl.po
@@ -0,0 +1,230 @@
+# Dutch translations for PACKAGE package.
+# Copyright (C) 2005 THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+#  <manfred.stienstra@dwerg.net>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-12 20:09+0200\n"
+"PO-Revision-Date: 2005-04-12 20:09+0200\n"
+"Last-Translator:  Manfred Stienstra <manfred.stienstra@dwerg.net>\n"
+"Language-Team: Dutch <vertaling@nl.linux.org>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Debuggen"
+
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "3D versnelling uitschakelen"
+
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Laat prestatie boxjes zien"
+
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Beeldkwaliteit"
+
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Textuurkleurendiepte"
+
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Prefereer kaderbufferkleurdiepte"
+
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Prefereer 32 bits per texel"
+
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Prefereer 16 bits per texel"
+
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Dwing 16 bits per texel af"
+
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Initïele maximum waarde voor anisotrophische textuur filtering"
+
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Verbied negatief niveau detailonderscheid (LOD) van texturen"
+
+#: t_options.h:97
+msgid ""
+"Enable S3TC texture compression even if software support is not available"
+msgstr ""
+"Schakel S3TC textuurcompressie in, zelfs als softwareondersteuning niet "
+"aanwezig is"
+
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Initïele kleurreductie methode"
+
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Rond kleuren af"
+
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Rasteriseer kleuren"
+
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Kleurafrondingmethode"
+
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Rond kleurencomponenten af naar beneden"
+
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Rond af naar dichtsbijzijnde kleur"
+
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Kleurrasteriseringsmethode"
+
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Horizontale foutdiffusie"
+
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Horizontale foutdiffusie, zet fout bij lijnbegin terug"
+
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Geordende 2D kleurrasterisering"
+
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Dieptebuffer als commagetal"
+
+#: t_options.h:140
+msgid "Performance"
+msgstr "Prestatie"
+
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "TCL-modus (Transformatie, Clipping, Licht)"
+
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Gebruik software TCL pijpleiding"
+
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Gebruik hardware TCL as eerste TCL pijpleiding trap"
+
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Omzeil de TCL pijpleiding"
+
+#: t_options.h:152
+msgid ""
+"Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr ""
+"Omzeil de TCL pijpleiding met staatgebaseerde machinecode die tijdens "
+"executie gegenereerd wordt"
+
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Methode om beeldopbouwvertraging te onderdrukken"
+
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Actief wachten voor de grafische hardware"
+
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Slaap voor korte intervallen tijdens het wachten op de grafische "
+"hardware"
+
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Laat de grafische hardware een software onderbreking uitzenden en in "
+"slaap vallen"
+
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synchronisatie met verticale verversing (interval omwisselen)"
+
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "Nooit synchroniseren met verticale verversing, negeer de keuze van de "
+"applicatie"
+
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Initïeel omwisselingsinterval 0, honoreer de keuze van de applicatie"
+
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Initïeel omwisselingsinterval 1, honoreer de keuze van de applicatie"
+
+#: t_options.h:178
+msgid ""
+"Always synchronize with vertical refresh, application chooses the minimum "
+"swap interval"
+msgstr ""
+"Synchroniseer altijd met verticale verversing, de applicatie kiest het "
+"minimum omwisselingsinterval"
+
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Gebruik HyperZ om de prestaties te verbeteren"
+
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Aantal textuureenheden in gebruik"
+
+#: t_options.h:196
+msgid ""
+"Enable hack to allow larger textures with texture compression on radeon/r200"
+msgstr ""
+"Schakel hack in om met textuurcompressie grotere texturen toe te staan op "
+"een radeon/r200"
+
+#: t_options.h:201
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr "Textuurfilterkwaliteit versus -snelheid, ookwel bekend als "
+"“brilineaire” textuurfiltering"
+
+#: t_options.h:209
+msgid "Used types of texture memory"
+msgstr "Gebruikte soorten textuurgeheugen"
+
+#: t_options.h:210
+msgid "All available memory"
+msgstr "Al het beschikbaar geheugen"
+
+#: t_options.h:211
+msgid "Only card memory (if available)"
+msgstr "Alleen geheugen op de kaart (als het aanwezig is)"
+
+#: t_options.h:212
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Alleen GART (AGP/PCIE) geheugen (als het aanwezig is)"
+
+#: t_options.h:220
+msgid "Features that are not hardware-accelerated"
+msgstr "Eigenschappen die niet hardwareversneld zijn"
+
+#: t_options.h:224
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Zet uitbreiding GL_ARB_vertex_program aan"
+
+#: t_options.h:229
+msgid "Enable extension GL_NV_vertex_program"
+msgstr "Zet uitbreiding GL_NV_vertex_program aan"
diff --git a/src/mesa/drivers/dri/common/xmlpool/options.h b/src/mesa/drivers/dri/common/xmlpool/options.h
new file mode 100644
index 0000000000..d76595578c
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/options.h
@@ -0,0 +1,568 @@
+/***********************************************************************
+ ***        THIS FILE IS GENERATED AUTOMATICALLY. DON'T EDIT!        ***
+ ***********************************************************************/
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ */
+/**
+ * \file t_options.h
+ * \brief Templates of common options
+ * \author Felix Kuehling
+ *
+ * This file defines macros for common options that can be used to
+ * construct driConfigOptions in the drivers. This file is only a
+ * template containing English descriptions for options wrapped in
+ * gettext(). xgettext can be used to extract translatable
+ * strings. These strings can then be translated by anyone familiar
+ * with GNU gettext. gen_xmlpool.py takes this template and fills in
+ * all the translations. The result (options.h) is included by
+ * xmlpool.h which in turn can be included by drivers.
+ *
+ * The macros used to describe otions in this file are defined in
+ * ../xmlpool.h.
+ */
+
+/* This is needed for xgettext to extract translatable strings.
+ * gen_xmlpool.py will discard this line. */
+/* #include <libintl.h>
+ * commented out by gen_xmlpool.py */
+
+/*
+ * predefined option sections and options with multi-lingual descriptions
+ */
+
+/** \brief Debugging options */
+#define DRI_CONF_SECTION_DEBUG \
+DRI_CONF_SECTION_BEGIN \
+	DRI_CONF_DESC(en,"Debugging") \
+	DRI_CONF_DESC(de,"Fehlersuche") \
+	DRI_CONF_DESC(es,"Depurando") \
+	DRI_CONF_DESC(nl,"Debuggen") \
+	DRI_CONF_DESC(fr,"Debogage") \
+	DRI_CONF_DESC(sv,"Felsökning")
+
+#define DRI_CONF_NO_RAST(def) \
+DRI_CONF_OPT_BEGIN(no_rast,bool,def) \
+        DRI_CONF_DESC(en,"Disable 3D acceleration") \
+        DRI_CONF_DESC(de,"3D-Beschleunigung abschalten") \
+        DRI_CONF_DESC(es,"Desactivar aceleración 3D") \
+        DRI_CONF_DESC(nl,"3D versnelling uitschakelen") \
+        DRI_CONF_DESC(fr,"Désactiver l'accélération 3D") \
+        DRI_CONF_DESC(sv,"Inaktivera 3D-accelerering") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PERFORMANCE_BOXES(def) \
+DRI_CONF_OPT_BEGIN(performance_boxes,bool,def) \
+        DRI_CONF_DESC(en,"Show performance boxes") \
+        DRI_CONF_DESC(de,"Zeige Performanceboxen") \
+        DRI_CONF_DESC(es,"Mostrar cajas de rendimiento") \
+        DRI_CONF_DESC(nl,"Laat prestatie boxjes zien") \
+        DRI_CONF_DESC(fr,"Afficher les boîtes de performance") \
+        DRI_CONF_DESC(sv,"Visa prestandarutor") \
+DRI_CONF_OPT_END
+
+
+/** \brief Texture-related options */
+#define DRI_CONF_SECTION_QUALITY \
+DRI_CONF_SECTION_BEGIN \
+	DRI_CONF_DESC(en,"Image Quality") \
+	DRI_CONF_DESC(de,"Bildqualität") \
+	DRI_CONF_DESC(es,"Calidad de imagen") \
+	DRI_CONF_DESC(nl,"Beeldkwaliteit") \
+	DRI_CONF_DESC(fr,"Qualité d'image") \
+	DRI_CONF_DESC(sv,"Bildkvalitet")
+
+#define DRI_CONF_EXCESS_MIPMAP(def) \
+DRI_CONF_OPT_BEGIN(excess_mipmap,bool,def) \
+	DRI_CONF_DESC(en,"Enable extra mipmap level") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_TEXTURE_DEPTH_FB       0
+#define DRI_CONF_TEXTURE_DEPTH_32       1
+#define DRI_CONF_TEXTURE_DEPTH_16       2
+#define DRI_CONF_TEXTURE_DEPTH_FORCE_16 3
+#define DRI_CONF_TEXTURE_DEPTH(def) \
+DRI_CONF_OPT_BEGIN_V(texture_depth,enum,def,"0:3") \
+	DRI_CONF_DESC_BEGIN(en,"Texture color depth") \
+                DRI_CONF_ENUM(0,"Prefer frame buffer color depth") \
+                DRI_CONF_ENUM(1,"Prefer 32 bits per texel") \
+                DRI_CONF_ENUM(2,"Prefer 16 bits per texel") \
+                DRI_CONF_ENUM(3,"Force 16 bits per texel") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(de,"Texturfarbtiefe") \
+                DRI_CONF_ENUM(0,"Bevorzuge Farbtiefe des Framebuffers") \
+                DRI_CONF_ENUM(1,"Bevorzuge 32 bits pro Texel") \
+                DRI_CONF_ENUM(2,"Bevorzuge 16 bits pro Texel") \
+                DRI_CONF_ENUM(3,"Erzwinge 16 bits pro Texel") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(es,"Profundidad de color de textura") \
+                DRI_CONF_ENUM(0,"Preferir profundidad de color del ”framebuffer“") \
+                DRI_CONF_ENUM(1,"Preferir 32 bits por texel") \
+                DRI_CONF_ENUM(2,"Preferir 16 bits por texel") \
+                DRI_CONF_ENUM(3,"Forzar a 16 bits por texel") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(nl,"Textuurkleurendiepte") \
+                DRI_CONF_ENUM(0,"Prefereer kaderbufferkleurdiepte") \
+                DRI_CONF_ENUM(1,"Prefereer 32 bits per texel") \
+                DRI_CONF_ENUM(2,"Prefereer 16 bits per texel") \
+                DRI_CONF_ENUM(3,"Dwing 16 bits per texel af") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(fr,"Profondeur de texture") \
+                DRI_CONF_ENUM(0,"Profondeur de couleur") \
+                DRI_CONF_ENUM(1,"Préférer 32 bits par texel") \
+                DRI_CONF_ENUM(2,"Prérérer 16 bits par texel") \
+                DRI_CONF_ENUM(3,"Forcer 16 bits par texel") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(sv,"Färgdjup för texturer") \
+                DRI_CONF_ENUM(0,"Föredra färgdjupet för framebuffer") \
+                DRI_CONF_ENUM(1,"Föredra 32 bitar per texel") \
+                DRI_CONF_ENUM(2,"Föredra 16 bitar per texel") \
+                DRI_CONF_ENUM(3,"Tvinga 16 bitar per texel") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_DEF_MAX_ANISOTROPY(def,range) \
+DRI_CONF_OPT_BEGIN_V(def_max_anisotropy,float,def,range) \
+        DRI_CONF_DESC(en,"Initial maximum value for anisotropic texture filtering") \
+        DRI_CONF_DESC(de,"Initialer Maximalwert für anisotropische Texturfilterung") \
+        DRI_CONF_DESC(es,"Valor máximo inicial para filtrado anisotrópico de textura") \
+        DRI_CONF_DESC(nl,"Initïele maximum waarde voor anisotrophische textuur filtering") \
+        DRI_CONF_DESC(fr,"Valeur maximale initiale pour le filtrage anisotropique de texture") \
+        DRI_CONF_DESC(sv,"Initialt maximalt värde för anisotropisk texturfiltrering") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_NO_NEG_LOD_BIAS(def) \
+DRI_CONF_OPT_BEGIN(no_neg_lod_bias,bool,def) \
+        DRI_CONF_DESC(en,"Forbid negative texture LOD bias") \
+        DRI_CONF_DESC(de,"Verbiete negative Textur-Detailgradverschiebung") \
+        DRI_CONF_DESC(es,"Prohibir valores negativos de Nivel De Detalle (LOD) de texturas") \
+        DRI_CONF_DESC(nl,"Verbied negatief niveau detailonderscheid (LOD) van texturen") \
+        DRI_CONF_DESC(fr,"Interdire le LOD bias negatif") \
+        DRI_CONF_DESC(sv,"Förbjud negativ LOD-kompensation för texturer") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_FORCE_S3TC_ENABLE(def) \
+DRI_CONF_OPT_BEGIN(force_s3tc_enable,bool,def) \
+        DRI_CONF_DESC(en,"Enable S3TC texture compression even if software support is not available") \
+        DRI_CONF_DESC(de,"Aktiviere S3TC Texturkomprimierung auch wenn die nötige Softwareunterstützung fehlt") \
+        DRI_CONF_DESC(es,"Activar la compresión de texturas S3TC incluso si el soporte por software no está disponible") \
+        DRI_CONF_DESC(nl,"Schakel S3TC textuurcompressie in, zelfs als softwareondersteuning niet aanwezig is") \
+        DRI_CONF_DESC(fr,"Activer la compression de texture S3TC même si le support logiciel est absent") \
+        DRI_CONF_DESC(sv,"Aktivera S3TC-texturkomprimering även om programvarustöd saknas") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_COLOR_REDUCTION_ROUND 0
+#define DRI_CONF_COLOR_REDUCTION_DITHER 1
+#define DRI_CONF_COLOR_REDUCTION(def) \
+DRI_CONF_OPT_BEGIN_V(color_reduction,enum,def,"0:1") \
+        DRI_CONF_DESC_BEGIN(en,"Initial color reduction method") \
+                DRI_CONF_ENUM(0,"Round colors") \
+                DRI_CONF_ENUM(1,"Dither colors") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(de,"Initiale Farbreduktionsmethode") \
+                DRI_CONF_ENUM(0,"Farben runden") \
+                DRI_CONF_ENUM(1,"Farben rastern") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(es,"Método inicial de reducción de color") \
+                DRI_CONF_ENUM(0,"Colores redondeados") \
+                DRI_CONF_ENUM(1,"Colores suavizados") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(nl,"Initïele kleurreductie methode") \
+                DRI_CONF_ENUM(0,"Rond kleuren af") \
+                DRI_CONF_ENUM(1,"Rasteriseer kleuren") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(fr,"Technique de réduction de couleurs") \
+                DRI_CONF_ENUM(0,"Arrondir les valeurs de couleur") \
+                DRI_CONF_ENUM(1,"Tramer les couleurs") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(sv,"Initial färgminskningsmetod") \
+                DRI_CONF_ENUM(0,"Avrunda färger") \
+                DRI_CONF_ENUM(1,"Utjämna färger") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ROUND_TRUNC 0
+#define DRI_CONF_ROUND_ROUND 1
+#define DRI_CONF_ROUND_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(round_mode,enum,def,"0:1") \
+	DRI_CONF_DESC_BEGIN(en,"Color rounding method") \
+                DRI_CONF_ENUM(0,"Round color components downward") \
+                DRI_CONF_ENUM(1,"Round to nearest color") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(de,"Farbrundungsmethode") \
+                DRI_CONF_ENUM(0,"Farbkomponenten abrunden") \
+                DRI_CONF_ENUM(1,"Zur ähnlichsten Farbe runden") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(es,"Método de redondeo de colores") \
+                DRI_CONF_ENUM(0,"Redondear hacia abajo los componentes de color") \
+                DRI_CONF_ENUM(1,"Redondear al color más cercano") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(nl,"Kleurafrondingmethode") \
+                DRI_CONF_ENUM(0,"Rond kleurencomponenten af naar beneden") \
+                DRI_CONF_ENUM(1,"Rond af naar dichtsbijzijnde kleur") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(fr,"Méthode d'arrondi des couleurs") \
+                DRI_CONF_ENUM(0,"Arrondi à l'inférieur") \
+                DRI_CONF_ENUM(1,"Arrondi au plus proche") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(sv,"Färgavrundningsmetod") \
+                DRI_CONF_ENUM(0,"Avrunda färdkomponenter nedåt") \
+                DRI_CONF_ENUM(1,"Avrunda till närmsta färg") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_DITHER_XERRORDIFF 0
+#define DRI_CONF_DITHER_XERRORDIFFRESET 1
+#define DRI_CONF_DITHER_ORDERED 2
+#define DRI_CONF_DITHER_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(dither_mode,enum,def,"0:2") \
+	DRI_CONF_DESC_BEGIN(en,"Color dithering method") \
+                DRI_CONF_ENUM(0,"Horizontal error diffusion") \
+                DRI_CONF_ENUM(1,"Horizontal error diffusion, reset error at line start") \
+                DRI_CONF_ENUM(2,"Ordered 2D color dithering") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(de,"Farbrasterungsmethode") \
+                DRI_CONF_ENUM(0,"Horizontale Fehlerstreuung") \
+                DRI_CONF_ENUM(1,"Horizontale Fehlerstreuung, Fehler am Zeilenanfang zurücksetzen") \
+                DRI_CONF_ENUM(2,"Geordnete 2D Farbrasterung") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(es,"Método de suavizado de color") \
+                DRI_CONF_ENUM(0,"Difusión de error horizontal") \
+                DRI_CONF_ENUM(1,"Difusión de error horizontal, reiniciar error al comienzo de línea") \
+                DRI_CONF_ENUM(2,"Suavizado de color 2D ordenado") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(nl,"Kleurrasteriseringsmethode") \
+                DRI_CONF_ENUM(0,"Horizontale foutdiffusie") \
+                DRI_CONF_ENUM(1,"Horizontale foutdiffusie, zet fout bij lijnbegin terug") \
+                DRI_CONF_ENUM(2,"Geordende 2D kleurrasterisering") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(fr,"Méthode de tramage") \
+                DRI_CONF_ENUM(0,"Diffusion d'erreur horizontale") \
+                DRI_CONF_ENUM(1,"Diffusion d'erreur horizontale, réinitialisé pour chaque ligne") \
+                DRI_CONF_ENUM(2,"Tramage ordonné des couleurs") \
+        DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(sv,"Färgutjämningsmetod") \
+                DRI_CONF_ENUM(0,"Horisontell felspridning") \
+                DRI_CONF_ENUM(1,"Horisontell felspridning, återställ fel vid radbörjan") \
+                DRI_CONF_ENUM(2,"Ordnad 2D-färgutjämning") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_FLOAT_DEPTH(def) \
+DRI_CONF_OPT_BEGIN(float_depth,bool,def) \
+        DRI_CONF_DESC(en,"Floating point depth buffer") \
+        DRI_CONF_DESC(de,"Fließkomma z-Puffer") \
+        DRI_CONF_DESC(es,"Búfer de profundidad en coma flotante") \
+        DRI_CONF_DESC(nl,"Dieptebuffer als commagetal") \
+        DRI_CONF_DESC(fr,"Z-buffer en virgule flottante") \
+        DRI_CONF_DESC(sv,"Buffert för flytande punktdjup") \
+DRI_CONF_OPT_END
+
+/** \brief Performance-related options */
+#define DRI_CONF_SECTION_PERFORMANCE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,"Performance") \
+        DRI_CONF_DESC(de,"Leistung") \
+        DRI_CONF_DESC(es,"Rendimiento") \
+        DRI_CONF_DESC(nl,"Prestatie") \
+        DRI_CONF_DESC(fr,"Performance") \
+        DRI_CONF_DESC(sv,"Prestanda")
+
+#define DRI_CONF_TCL_SW 0
+#define DRI_CONF_TCL_PIPELINED 1
+#define DRI_CONF_TCL_VTXFMT 2
+#define DRI_CONF_TCL_CODEGEN 3
+#define DRI_CONF_TCL_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(tcl_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,"TCL mode (Transformation, Clipping, Lighting)") \
+                DRI_CONF_ENUM(0,"Use software TCL pipeline") \
+                DRI_CONF_ENUM(1,"Use hardware TCL as first TCL pipeline stage") \
+                DRI_CONF_ENUM(2,"Bypass the TCL pipeline") \
+                DRI_CONF_ENUM(3,"Bypass the TCL pipeline with state-based machine code generated on-the-fly") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(de,"TCL-Modus (Transformation, Clipping, Licht)") \
+                DRI_CONF_ENUM(0,"Benutze die Software-TCL-Pipeline") \
+                DRI_CONF_ENUM(1,"Benutze Hardware TCL als erste Stufe der TCL-Pipeline") \
+                DRI_CONF_ENUM(2,"Umgehe die TCL-Pipeline") \
+                DRI_CONF_ENUM(3,"Umgehe die TCL-Pipeline mit zur Laufzeit erzeugtem, zustandsbasiertem Maschinencode") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(es,"Modo TCL (Transformación, Recorte, Iluminación)") \
+                DRI_CONF_ENUM(0,"Usar tubería TCL por software") \
+                DRI_CONF_ENUM(1,"Usar TCL por hardware en la primera fase de la tubería TCL") \
+                DRI_CONF_ENUM(2,"Pasar por alto la tubería TCL") \
+                DRI_CONF_ENUM(3,"Pasar por alto la tubería TCL con código máquina basado en estados generado al vuelo") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(nl,"TCL-modus (Transformatie, Clipping, Licht)") \
+                DRI_CONF_ENUM(0,"Gebruik software TCL pijpleiding") \
+                DRI_CONF_ENUM(1,"Gebruik hardware TCL as eerste TCL pijpleiding trap") \
+                DRI_CONF_ENUM(2,"Omzeil de TCL pijpleiding") \
+                DRI_CONF_ENUM(3,"Omzeil de TCL pijpleiding met staatgebaseerde machinecode die tijdens executie gegenereerd wordt") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(fr,"Mode de TCL (Transformation, Clipping, Eclairage)") \
+                DRI_CONF_ENUM(0,"Utiliser un pipeline TCL logiciel") \
+                DRI_CONF_ENUM(1,"Utiliser le TCL matériel pour le premier niveau de pipeline") \
+                DRI_CONF_ENUM(2,"Court-circuiter le pipeline TCL") \
+                DRI_CONF_ENUM(3,"Court-circuiter le pipeline TCL par une machine à états qui génère le codede TCL à la volée") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(sv,"TCL-läge (Transformation, Clipping, Lighting)") \
+                DRI_CONF_ENUM(0,"Använd programvaru-TCL-rörledning") \
+                DRI_CONF_ENUM(1,"Använd maskinvaru-TCL som första TCL-rörledningssteg") \
+                DRI_CONF_ENUM(2,"Kringgå TCL-rörledningen") \
+                DRI_CONF_ENUM(3,"Kringgå TCL-rörledningen med tillståndsbaserad maskinkod som direktgenereras") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_FTHROTTLE_BUSY 0
+#define DRI_CONF_FTHROTTLE_USLEEPS 1
+#define DRI_CONF_FTHROTTLE_IRQS 2
+#define DRI_CONF_FTHROTTLE_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(fthrottle_mode,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,"Method to limit rendering latency") \
+                DRI_CONF_ENUM(0,"Busy waiting for the graphics hardware") \
+                DRI_CONF_ENUM(1,"Sleep for brief intervals while waiting for the graphics hardware") \
+                DRI_CONF_ENUM(2,"Let the graphics hardware emit a software interrupt and sleep") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(de,"Methode zur Begrenzung der Bildverzögerung") \
+                DRI_CONF_ENUM(0,"Aktives Warten auf die Grafikhardware") \
+                DRI_CONF_ENUM(1,"Kurze Schlafintervalle beim Warten auf die Grafikhardware") \
+                DRI_CONF_ENUM(2,"Die Grafikhardware eine Softwareunterbrechnung erzeugen lassen und schlafen") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(es,"Método para limitar la latencia de rénder") \
+                DRI_CONF_ENUM(0,"Esperar activamente al hardware gráfico") \
+                DRI_CONF_ENUM(1,"Dormir en intervalos cortos mientras se espera al hardware gráfico") \
+                DRI_CONF_ENUM(2,"Permitir que el hardware gráfico emita una interrupción de software y duerma") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(nl,"Methode om beeldopbouwvertraging te onderdrukken") \
+                DRI_CONF_ENUM(0,"Actief wachten voor de grafische hardware") \
+                DRI_CONF_ENUM(1,"Slaap voor korte intervallen tijdens het wachten op de grafische hardware") \
+                DRI_CONF_ENUM(2,"Laat de grafische hardware een software onderbreking uitzenden en in slaap vallen") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(fr,"Méthode d'attente de la carte graphique") \
+                DRI_CONF_ENUM(0,"Attente active de la carte graphique") \
+                DRI_CONF_ENUM(1,"Attente utilisant usleep()") \
+                DRI_CONF_ENUM(2,"Utiliser les interruptions") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(sv,"Metod för att begränsa renderingslatens") \
+                DRI_CONF_ENUM(0,"Upptagen med att vänta på grafikhårdvaran") \
+                DRI_CONF_ENUM(1,"Sov i korta intervall under väntan på grafikhårdvaran") \
+                DRI_CONF_ENUM(2,"Låt grafikhårdvaran sända ut ett programvaruavbrott och sov") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_VBLANK_NEVER 0
+#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
+#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
+#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
+#define DRI_CONF_VBLANK_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(vblank_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,"Synchronization with vertical refresh (swap intervals)") \
+                DRI_CONF_ENUM(0,"Never synchronize with vertical refresh, ignore application's choice") \
+                DRI_CONF_ENUM(1,"Initial swap interval 0, obey application's choice") \
+                DRI_CONF_ENUM(2,"Initial swap interval 1, obey application's choice") \
+                DRI_CONF_ENUM(3,"Always synchronize with vertical refresh, application chooses the minimum swap interval") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(de,"Synchronisation mit der vertikalen Bildwiederholung") \
+                DRI_CONF_ENUM(0,"Niemals mit der Bildwiederholung synchronisieren, Anweisungen der Anwendung ignorieren") \
+                DRI_CONF_ENUM(1,"Initiales Bildinterval 0, Anweisungen der Anwendung gehorchen") \
+                DRI_CONF_ENUM(2,"Initiales Bildinterval 1, Anweisungen der Anwendung gehorchen") \
+                DRI_CONF_ENUM(3,"Immer mit der Bildwiederholung synchronisieren, Anwendung wählt das minimale Bildintervall") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(es,"Sincronización con el refresco vertical (intervalos de intercambio)") \
+                DRI_CONF_ENUM(0,"No sincronizar nunca con el refresco vertical, ignorar la elección de la aplicación") \
+                DRI_CONF_ENUM(1,"Intervalo de intercambio inicial 0, obedecer la elección de la aplicación") \
+                DRI_CONF_ENUM(2,"Intervalo de intercambio inicial 1, obedecer la elección de la aplicación") \
+                DRI_CONF_ENUM(3,"Sincronizar siempre con el refresco vertical, la aplicación elige el intervalo de intercambio mínimo") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(nl,"Synchronisatie met verticale verversing (interval omwisselen)") \
+                DRI_CONF_ENUM(0,"Nooit synchroniseren met verticale verversing, negeer de keuze van de applicatie") \
+                DRI_CONF_ENUM(1,"Initïeel omwisselingsinterval 0, honoreer de keuze van de applicatie") \
+                DRI_CONF_ENUM(2,"Initïeel omwisselingsinterval 1, honoreer de keuze van de applicatie") \
+                DRI_CONF_ENUM(3,"Synchroniseer altijd met verticale verversing, de applicatie kiest het minimum omwisselingsinterval") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(fr,"Synchronisation de l'affichage avec le balayage vertical") \
+                DRI_CONF_ENUM(0,"Ne jamais synchroniser avec le balayage vertical, ignorer le choix de l'application") \
+                DRI_CONF_ENUM(1,"Ne pas synchroniser avec le balayage vertical par défaut, mais obéir au choix de l'application") \
+                DRI_CONF_ENUM(2,"Synchroniser avec le balayage vertical par défaut, mais obéir au choix de l'application") \
+                DRI_CONF_ENUM(3,"Toujours synchroniser avec le balayage vertical, l'application choisit l'intervalle minimal") \
+        DRI_CONF_DESC_END \
+        DRI_CONF_DESC_BEGIN(sv,"Synkronisering med vertikal uppdatering (växlingsintervall)") \
+                DRI_CONF_ENUM(0,"Synkronisera aldrig med vertikal uppdatering, ignorera programmets val") \
+                DRI_CONF_ENUM(1,"Initialt växlingsintervall 0, följ programmets val") \
+                DRI_CONF_ENUM(2,"Initialt växlingsintervall 1, följ programmets val") \
+                DRI_CONF_ENUM(3,"Synkronisera alltid med vertikal uppdatering, programmet väljer den minsta växlingsintervallen") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_HYPERZ_DISABLED 0
+#define DRI_CONF_HYPERZ_ENABLED 1
+#define DRI_CONF_HYPERZ(def) \
+DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
+        DRI_CONF_DESC(en,"Use HyperZ to boost performance") \
+        DRI_CONF_DESC(de,"HyperZ zur Leistungssteigerung verwenden") \
+        DRI_CONF_DESC(es,"Usar HyperZ para potenciar rendimiento") \
+        DRI_CONF_DESC(nl,"Gebruik HyperZ om de prestaties te verbeteren") \
+        DRI_CONF_DESC(fr,"Utiliser le HyperZ pour améliorer les performances") \
+        DRI_CONF_DESC(sv,"Använd HyperZ för att maximera prestandan") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Number of texture units used") \
+        DRI_CONF_DESC(de,"Anzahl der benutzten Textureinheiten") \
+        DRI_CONF_DESC(es,"Número de unidades de textura usadas") \
+        DRI_CONF_DESC(nl,"Aantal textuureenheden in gebruik") \
+        DRI_CONF_DESC(fr,"Nombre d'unités de texture") \
+        DRI_CONF_DESC(sv,"Antal använda texturenheter") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ALLOW_LARGE_TEXTURES(def) \
+DRI_CONF_OPT_BEGIN_V(allow_large_textures,enum,def,"0:2") \
+	DRI_CONF_DESC_BEGIN(en,"Support larger textures not guaranteed to fit into graphics memory") \
+		DRI_CONF_ENUM(0,"No") \
+		DRI_CONF_ENUM(1,"At least 1 texture must fit under worst-case assumptions") \
+		DRI_CONF_ENUM(2,"Announce hardware limits") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(de,"Unterstütze grosse Texturen die evtl. nicht in den Grafikspeicher passen") \
+		DRI_CONF_ENUM(0,"Nein") \
+		DRI_CONF_ENUM(1,"Mindestens 1 Textur muss auch im schlechtesten Fall Platz haben") \
+		DRI_CONF_ENUM(2,"Benutze Hardware-Limits") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(es,"Support larger textures not guaranteed to fit into graphics memory") \
+		DRI_CONF_ENUM(0,"No") \
+		DRI_CONF_ENUM(1,"At least 1 texture must fit under worst-case assumptions") \
+		DRI_CONF_ENUM(2,"Announce hardware limits") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(nl,"Support larger textures not guaranteed to fit into graphics memory") \
+		DRI_CONF_ENUM(0,"No") \
+		DRI_CONF_ENUM(1,"At least 1 texture must fit under worst-case assumptions") \
+		DRI_CONF_ENUM(2,"Announce hardware limits") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(fr,"Support larger textures not guaranteed to fit into graphics memory") \
+		DRI_CONF_ENUM(0,"No") \
+		DRI_CONF_ENUM(1,"At least 1 texture must fit under worst-case assumptions") \
+		DRI_CONF_ENUM(2,"Announce hardware limits") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(sv,"Stöd för större texturer är inte garanterat att passa i grafikminnet") \
+		DRI_CONF_ENUM(0,"Nej") \
+		DRI_CONF_ENUM(1,"Åtminstone en textur måste passa för antaget sämsta förhållande") \
+		DRI_CONF_ENUM(2,"Annonsera hårdvarubegränsningar") \
+	DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_TEXTURE_BLEND_QUALITY(def,range) \
+DRI_CONF_OPT_BEGIN_V(texture_blend_quality,float,def,range) \
+	DRI_CONF_DESC(en,"Texture filtering quality vs. speed, AKA “brilinear” texture filtering") \
+	DRI_CONF_DESC(de,"Texturfilterqualität versus -geschwindigkeit, auch bekannt als „brilineare“ Texturfilterung") \
+	DRI_CONF_DESC(es,"Calidad de filtrado de textura vs. velocidad, alias filtrado ”brilinear“ de textura") \
+	DRI_CONF_DESC(nl,"Textuurfilterkwaliteit versus -snelheid, ookwel bekend als “brilineaire” textuurfiltering") \
+	DRI_CONF_DESC(fr,"Qualité/performance du filtrage trilinéaire de texture (filtrage brilinéaire)") \
+	DRI_CONF_DESC(sv,"Texturfiltreringskvalitet mot hastighet, även kallad ”brilinear”-texturfiltrering") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_TEXTURE_HEAPS_ALL 0
+#define DRI_CONF_TEXTURE_HEAPS_CARD 1
+#define DRI_CONF_TEXTURE_HEAPS_GART 2
+#define DRI_CONF_TEXTURE_HEAPS(def) \
+DRI_CONF_OPT_BEGIN_V(texture_heaps,enum,def,"0:2") \
+	DRI_CONF_DESC_BEGIN(en,"Used types of texture memory") \
+		DRI_CONF_ENUM(0,"All available memory") \
+		DRI_CONF_ENUM(1,"Only card memory (if available)") \
+		DRI_CONF_ENUM(2,"Only GART (AGP/PCIE) memory (if available)") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(de,"Benutzte Arten von Texturspeicher") \
+		DRI_CONF_ENUM(0,"Aller verfügbarer Speicher") \
+		DRI_CONF_ENUM(1,"Nur Grafikspeicher (falls verfügbar)") \
+		DRI_CONF_ENUM(2,"Nur GART-Speicher (AGP/PCIE) (falls verfügbar)") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(es,"Tipos de memoria de textura usados") \
+		DRI_CONF_ENUM(0,"Toda la memoria disponible") \
+		DRI_CONF_ENUM(1,"Sólo la memoria de la tarjeta (si disponible)") \
+		DRI_CONF_ENUM(2,"Sólo memoria GART (AGP/PCIE) (si disponible)") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(nl,"Gebruikte soorten textuurgeheugen") \
+		DRI_CONF_ENUM(0,"Al het beschikbaar geheugen") \
+		DRI_CONF_ENUM(1,"Alleen geheugen op de kaart (als het aanwezig is)") \
+		DRI_CONF_ENUM(2,"Alleen GART (AGP/PCIE) geheugen (als het aanwezig is)") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(fr,"Types de mémoire de texture") \
+		DRI_CONF_ENUM(0,"Utiliser toute la mémoire disponible") \
+		DRI_CONF_ENUM(1,"Utiliser uniquement la mémoire graphique (si disponible)") \
+		DRI_CONF_ENUM(2,"Utiliser uniquement la mémoire GART (AGP/PCIE) (si disponible)") \
+	DRI_CONF_DESC_END \
+	DRI_CONF_DESC_BEGIN(sv,"Använda typer av texturminne") \
+		DRI_CONF_ENUM(0,"Allt tillgängligt minne") \
+		DRI_CONF_ENUM(1,"Endast kortminne (om tillgängligt)") \
+		DRI_CONF_ENUM(2,"Endast GART-minne (AGP/PCIE) (om tillgängligt)") \
+	DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+/* Options for features that are not done in hardware by the driver (like GL_ARB_vertex_program
+   On cards where there is no documentation (r200) or on rasterization-only hardware). */
+#define DRI_CONF_SECTION_SOFTWARE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,"Features that are not hardware-accelerated") \
+        DRI_CONF_DESC(de,"Funktionalität, die nicht hardwarebeschleunigt ist") \
+        DRI_CONF_DESC(es,"Características no aceleradas por hardware") \
+        DRI_CONF_DESC(nl,"Eigenschappen die niet hardwareversneld zijn") \
+        DRI_CONF_DESC(fr,"Fonctionnalités ne bénéficiant pas d'une accélération matérielle") \
+        DRI_CONF_DESC(sv,"Funktioner som inte är hårdvaruaccelererade")
+
+#define DRI_CONF_ARB_VERTEX_PROGRAM(def) \
+DRI_CONF_OPT_BEGIN(arb_vertex_program,bool,def) \
+        DRI_CONF_DESC(en,"Enable extension GL_ARB_vertex_program") \
+        DRI_CONF_DESC(de,"Erweiterung GL_ARB_vertex_program aktivieren") \
+        DRI_CONF_DESC(es,"Activar la extensión GL_ARB_vertex_program") \
+        DRI_CONF_DESC(nl,"Zet uitbreiding GL_ARB_vertex_program aan") \
+        DRI_CONF_DESC(fr,"Activer l'extension GL_ARB_vertex_program") \
+        DRI_CONF_DESC(sv,"Aktivera tillägget GL_ARB_vertex_program") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_NV_VERTEX_PROGRAM(def) \
+DRI_CONF_OPT_BEGIN(nv_vertex_program,bool,def) \
+        DRI_CONF_DESC(en,"Enable extension GL_NV_vertex_program") \
+        DRI_CONF_DESC(de,"Erweiterung GL_NV_vertex_program aktivieren") \
+        DRI_CONF_DESC(es,"Activar extensión GL_NV_vertex_program") \
+        DRI_CONF_DESC(nl,"Zet uitbreiding GL_NV_vertex_program aan") \
+        DRI_CONF_DESC(fr,"Activer l'extension GL_NV_vertex_program") \
+        DRI_CONF_DESC(sv,"Aktivera tillägget GL_NV_vertex_program") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ALWAYS_FLUSH_BATCH(def) \
+DRI_CONF_OPT_BEGIN(always_flush_batch,bool,def) \
+        DRI_CONF_DESC(en,"Enable flushing batchbuffer after each draw call") \
+        DRI_CONF_DESC(de,"Enable flushing batchbuffer after each draw call") \
+        DRI_CONF_DESC(es,"Enable flushing batchbuffer after each draw call") \
+        DRI_CONF_DESC(nl,"Enable flushing batchbuffer after each draw call") \
+        DRI_CONF_DESC(fr,"Enable flushing batchbuffer after each draw call") \
+        DRI_CONF_DESC(sv,"Enable flushing batchbuffer after each draw call") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ALWAYS_FLUSH_CACHE(def) \
+DRI_CONF_OPT_BEGIN(always_flush_cache,bool,def) \
+        DRI_CONF_DESC(en,"Enable flushing GPU caches with each draw call") \
+        DRI_CONF_DESC(de,"Enable flushing GPU caches with each draw call") \
+        DRI_CONF_DESC(es,"Enable flushing GPU caches with each draw call") \
+        DRI_CONF_DESC(nl,"Enable flushing GPU caches with each draw call") \
+        DRI_CONF_DESC(fr,"Enable flushing GPU caches with each draw call") \
+        DRI_CONF_DESC(sv,"Enable flushing GPU caches with each draw call") \
+DRI_CONF_OPT_END
diff --git a/src/mesa/drivers/dri/common/xmlpool/sv.po b/src/mesa/drivers/dri/common/xmlpool/sv.po
new file mode 100644
index 0000000000..ba32b2ff15
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/sv.po
@@ -0,0 +1,226 @@
+# Swedish translation of DRI driver options.
+# Copyright (C) Free Software Foundation, Inc.
+# This file is distributed under the same license as the Mesa package.
+# Daniel Nylander <po@danielnylander.se>, 2006.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Mesa DRI\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2005-04-11 23:19+0200\n"
+"PO-Revision-Date: 2006-09-18 10:56+0100\n"
+"Last-Translator: Daniel Nylander <po@danielnylander.se>\n"
+"Language-Team: Swedish <tp-sv@listor.tp-sv.se>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: t_options.h:53
+msgid "Debugging"
+msgstr "Felsökning"
+
+#: t_options.h:57
+msgid "Disable 3D acceleration"
+msgstr "Inaktivera 3D-accelerering"
+
+#: t_options.h:62
+msgid "Show performance boxes"
+msgstr "Visa prestandarutor"
+
+#: t_options.h:69
+msgid "Image Quality"
+msgstr "Bildkvalitet"
+
+#: t_options.h:77
+msgid "Texture color depth"
+msgstr "Färgdjup för texturer"
+
+#: t_options.h:78
+msgid "Prefer frame buffer color depth"
+msgstr "Föredra färgdjupet för framebuffer"
+
+#: t_options.h:79
+msgid "Prefer 32 bits per texel"
+msgstr "Föredra 32 bitar per texel"
+
+#: t_options.h:80
+msgid "Prefer 16 bits per texel"
+msgstr "Föredra 16 bitar per texel"
+
+#: t_options.h:81
+msgid "Force 16 bits per texel"
+msgstr "Tvinga 16 bitar per texel"
+
+#: t_options.h:87
+msgid "Initial maximum value for anisotropic texture filtering"
+msgstr "Initialt maximalt värde för anisotropisk texturfiltrering"
+
+#: t_options.h:92
+msgid "Forbid negative texture LOD bias"
+msgstr "Förbjud negativ LOD-kompensation för texturer"
+
+#: t_options.h:97
+msgid "Enable S3TC texture compression even if software support is not available"
+msgstr "Aktivera S3TC-texturkomprimering även om programvarustöd saknas"
+
+#: t_options.h:104
+msgid "Initial color reduction method"
+msgstr "Initial färgminskningsmetod"
+
+#: t_options.h:105
+msgid "Round colors"
+msgstr "Avrunda färger"
+
+#: t_options.h:106
+msgid "Dither colors"
+msgstr "Utjämna färger"
+
+#: t_options.h:114
+msgid "Color rounding method"
+msgstr "Färgavrundningsmetod"
+
+#: t_options.h:115
+msgid "Round color components downward"
+msgstr "Avrunda färdkomponenter nedåt"
+
+#: t_options.h:116
+msgid "Round to nearest color"
+msgstr "Avrunda till närmsta färg"
+
+#: t_options.h:125
+msgid "Color dithering method"
+msgstr "Färgutjämningsmetod"
+
+#: t_options.h:126
+msgid "Horizontal error diffusion"
+msgstr "Horisontell felspridning"
+
+#: t_options.h:127
+msgid "Horizontal error diffusion, reset error at line start"
+msgstr "Horisontell felspridning, återställ fel vid radbörjan"
+
+#: t_options.h:128
+msgid "Ordered 2D color dithering"
+msgstr "Ordnad 2D-färgutjämning"
+
+#: t_options.h:134
+msgid "Floating point depth buffer"
+msgstr "Buffert för flytande punktdjup"
+
+#: t_options.h:140
+msgid "Performance"
+msgstr "Prestanda"
+
+#: t_options.h:148
+msgid "TCL mode (Transformation, Clipping, Lighting)"
+msgstr "TCL-läge (Transformation, Clipping, Lighting)"
+
+#: t_options.h:149
+msgid "Use software TCL pipeline"
+msgstr "Använd programvaru-TCL-rörledning"
+
+#: t_options.h:150
+msgid "Use hardware TCL as first TCL pipeline stage"
+msgstr "Använd maskinvaru-TCL som första TCL-rörledningssteg"
+
+#: t_options.h:151
+msgid "Bypass the TCL pipeline"
+msgstr "Kringgå TCL-rörledningen"
+
+#: t_options.h:152
+msgid "Bypass the TCL pipeline with state-based machine code generated on-the-fly"
+msgstr "Kringgå TCL-rörledningen med tillståndsbaserad maskinkod som direktgenereras"
+
+#: t_options.h:161
+msgid "Method to limit rendering latency"
+msgstr "Metod för att begränsa renderingslatens"
+
+#: t_options.h:162
+msgid "Busy waiting for the graphics hardware"
+msgstr "Upptagen med att vänta på grafikhårdvaran"
+
+#: t_options.h:163
+msgid "Sleep for brief intervals while waiting for the graphics hardware"
+msgstr "Sov i korta intervall under väntan på grafikhårdvaran"
+
+#: t_options.h:164
+msgid "Let the graphics hardware emit a software interrupt and sleep"
+msgstr "Låt grafikhårdvaran sända ut ett programvaruavbrott och sov"
+
+#: t_options.h:174
+msgid "Synchronization with vertical refresh (swap intervals)"
+msgstr "Synkronisering med vertikal uppdatering (växlingsintervall)"
+
+#: t_options.h:175
+msgid "Never synchronize with vertical refresh, ignore application's choice"
+msgstr "Synkronisera aldrig med vertikal uppdatering, ignorera programmets val"
+
+#: t_options.h:176
+msgid "Initial swap interval 0, obey application's choice"
+msgstr "Initialt växlingsintervall 0, följ programmets val"
+
+#: t_options.h:177
+msgid "Initial swap interval 1, obey application's choice"
+msgstr "Initialt växlingsintervall 1, följ programmets val"
+
+#: t_options.h:178
+msgid "Always synchronize with vertical refresh, application chooses the minimum swap interval"
+msgstr "Synkronisera alltid med vertikal uppdatering, programmet väljer den minsta växlingsintervallen"
+
+#: t_options.h:186
+msgid "Use HyperZ to boost performance"
+msgstr "Använd HyperZ för att maximera prestandan"
+
+#: t_options.h:191
+msgid "Number of texture units used"
+msgstr "Antal använda texturenheter"
+
+#: t_options.h:196
+msgid "Support larger textures not guaranteed to fit into graphics memory"
+msgstr "Stöd för större texturer är inte garanterat att passa i grafikminnet"
+
+#: t_options.h:197
+msgid "No"
+msgstr "Nej"
+
+#: t_options.h:198
+msgid "At least 1 texture must fit under worst-case assumptions"
+msgstr "Åtminstone en textur måste passa för antaget sämsta förhållande"
+
+#: t_options.h:199
+msgid "Announce hardware limits"
+msgstr "Annonsera hårdvarubegränsningar"
+
+#: t_options.h:205
+msgid "Texture filtering quality vs. speed, AKA “brilinear” texture filtering"
+msgstr "Texturfiltreringskvalitet mot hastighet, även kallad \"brilinear\"-texturfiltrering"
+
+#: t_options.h:213
+msgid "Used types of texture memory"
+msgstr "Använda typer av texturminne"
+
+#: t_options.h:214
+msgid "All available memory"
+msgstr "Allt tillgängligt minne"
+
+#: t_options.h:215
+msgid "Only card memory (if available)"
+msgstr "Endast kortminne (om tillgängligt)"
+
+#: t_options.h:216
+msgid "Only GART (AGP/PCIE) memory (if available)"
+msgstr "Endast GART-minne (AGP/PCIE) (om tillgängligt)"
+
+#: t_options.h:224
+msgid "Features that are not hardware-accelerated"
+msgstr "Funktioner som inte är hårdvaruaccelererade"
+
+#: t_options.h:228
+msgid "Enable extension GL_ARB_vertex_program"
+msgstr "Aktivera tillägget GL_ARB_vertex_program"
+
+#: t_options.h:233
+msgid "Enable extension GL_NV_vertex_program"
+msgstr "Aktivera tillägget GL_NV_vertex_program"
+
diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h
new file mode 100644
index 0000000000..5fd6ec65bf
--- /dev/null
+++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h
@@ -0,0 +1,249 @@
+/*
+ * XML DRI client-side driver configuration
+ * Copyright (C) 2003 Felix Kuehling
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * FELIX KUEHLING, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ */
+/**
+ * \file t_options.h
+ * \brief Templates of common options
+ * \author Felix Kuehling
+ *
+ * This file defines macros for common options that can be used to
+ * construct driConfigOptions in the drivers. This file is only a
+ * template containing English descriptions for options wrapped in
+ * gettext(). xgettext can be used to extract translatable
+ * strings. These strings can then be translated by anyone familiar
+ * with GNU gettext. gen_xmlpool.py takes this template and fills in
+ * all the translations. The result (options.h) is included by
+ * xmlpool.h which in turn can be included by drivers.
+ *
+ * The macros used to describe otions in this file are defined in
+ * ../xmlpool.h.
+ */
+
+/* This is needed for xgettext to extract translatable strings.
+ * gen_xmlpool.py will discard this line. */
+#include <libintl.h>
+
+/*
+ * predefined option sections and options with multi-lingual descriptions
+ */
+
+/** \brief Debugging options */
+#define DRI_CONF_SECTION_DEBUG \
+DRI_CONF_SECTION_BEGIN \
+	DRI_CONF_DESC(en,gettext("Debugging"))
+
+#define DRI_CONF_NO_RAST(def) \
+DRI_CONF_OPT_BEGIN(no_rast,bool,def) \
+        DRI_CONF_DESC(en,gettext("Disable 3D acceleration")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_PERFORMANCE_BOXES(def) \
+DRI_CONF_OPT_BEGIN(performance_boxes,bool,def) \
+        DRI_CONF_DESC(en,gettext("Show performance boxes")) \
+DRI_CONF_OPT_END
+
+
+/** \brief Texture-related options */
+#define DRI_CONF_SECTION_QUALITY \
+DRI_CONF_SECTION_BEGIN \
+	DRI_CONF_DESC(en,gettext("Image Quality"))
+
+#define DRI_CONF_EXCESS_MIPMAP(def) \
+DRI_CONF_OPT_BEGIN(excess_mipmap,bool,def) \
+	DRI_CONF_DESC(en,"Enable extra mipmap level") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_TEXTURE_DEPTH_FB       0
+#define DRI_CONF_TEXTURE_DEPTH_32       1
+#define DRI_CONF_TEXTURE_DEPTH_16       2
+#define DRI_CONF_TEXTURE_DEPTH_FORCE_16 3
+#define DRI_CONF_TEXTURE_DEPTH(def) \
+DRI_CONF_OPT_BEGIN_V(texture_depth,enum,def,"0:3") \
+	DRI_CONF_DESC_BEGIN(en,gettext("Texture color depth")) \
+                DRI_CONF_ENUM(0,gettext("Prefer frame buffer color depth")) \
+                DRI_CONF_ENUM(1,gettext("Prefer 32 bits per texel")) \
+                DRI_CONF_ENUM(2,gettext("Prefer 16 bits per texel")) \
+                DRI_CONF_ENUM(3,gettext("Force 16 bits per texel")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_DEF_MAX_ANISOTROPY(def,range) \
+DRI_CONF_OPT_BEGIN_V(def_max_anisotropy,float,def,range) \
+        DRI_CONF_DESC(en,gettext("Initial maximum value for anisotropic texture filtering")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_NO_NEG_LOD_BIAS(def) \
+DRI_CONF_OPT_BEGIN(no_neg_lod_bias,bool,def) \
+        DRI_CONF_DESC(en,gettext("Forbid negative texture LOD bias")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_FORCE_S3TC_ENABLE(def) \
+DRI_CONF_OPT_BEGIN(force_s3tc_enable,bool,def) \
+        DRI_CONF_DESC(en,gettext("Enable S3TC texture compression even if software support is not available")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_COLOR_REDUCTION_ROUND 0
+#define DRI_CONF_COLOR_REDUCTION_DITHER 1
+#define DRI_CONF_COLOR_REDUCTION(def) \
+DRI_CONF_OPT_BEGIN_V(color_reduction,enum,def,"0:1") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Initial color reduction method")) \
+                DRI_CONF_ENUM(0,gettext("Round colors")) \
+                DRI_CONF_ENUM(1,gettext("Dither colors")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ROUND_TRUNC 0
+#define DRI_CONF_ROUND_ROUND 1
+#define DRI_CONF_ROUND_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(round_mode,enum,def,"0:1") \
+	DRI_CONF_DESC_BEGIN(en,gettext("Color rounding method")) \
+                DRI_CONF_ENUM(0,gettext("Round color components downward")) \
+                DRI_CONF_ENUM(1,gettext("Round to nearest color")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_DITHER_XERRORDIFF 0
+#define DRI_CONF_DITHER_XERRORDIFFRESET 1
+#define DRI_CONF_DITHER_ORDERED 2
+#define DRI_CONF_DITHER_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(dither_mode,enum,def,"0:2") \
+	DRI_CONF_DESC_BEGIN(en,gettext("Color dithering method")) \
+                DRI_CONF_ENUM(0,gettext("Horizontal error diffusion")) \
+                DRI_CONF_ENUM(1,gettext("Horizontal error diffusion, reset error at line start")) \
+                DRI_CONF_ENUM(2,gettext("Ordered 2D color dithering")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_FLOAT_DEPTH(def) \
+DRI_CONF_OPT_BEGIN(float_depth,bool,def) \
+        DRI_CONF_DESC(en,gettext("Floating point depth buffer")) \
+DRI_CONF_OPT_END
+
+/** \brief Performance-related options */
+#define DRI_CONF_SECTION_PERFORMANCE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,gettext("Performance"))
+
+#define DRI_CONF_TCL_SW 0
+#define DRI_CONF_TCL_PIPELINED 1
+#define DRI_CONF_TCL_VTXFMT 2
+#define DRI_CONF_TCL_CODEGEN 3
+#define DRI_CONF_TCL_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(tcl_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,gettext("TCL mode (Transformation, Clipping, Lighting)")) \
+                DRI_CONF_ENUM(0,gettext("Use software TCL pipeline")) \
+                DRI_CONF_ENUM(1,gettext("Use hardware TCL as first TCL pipeline stage")) \
+                DRI_CONF_ENUM(2,gettext("Bypass the TCL pipeline")) \
+                DRI_CONF_ENUM(3,gettext("Bypass the TCL pipeline with state-based machine code generated on-the-fly")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_FTHROTTLE_BUSY 0
+#define DRI_CONF_FTHROTTLE_USLEEPS 1
+#define DRI_CONF_FTHROTTLE_IRQS 2
+#define DRI_CONF_FTHROTTLE_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(fthrottle_mode,enum,def,"0:2") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Method to limit rendering latency")) \
+                DRI_CONF_ENUM(0,gettext("Busy waiting for the graphics hardware")) \
+                DRI_CONF_ENUM(1,gettext("Sleep for brief intervals while waiting for the graphics hardware")) \
+                DRI_CONF_ENUM(2,gettext("Let the graphics hardware emit a software interrupt and sleep")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_VBLANK_NEVER 0
+#define DRI_CONF_VBLANK_DEF_INTERVAL_0 1
+#define DRI_CONF_VBLANK_DEF_INTERVAL_1 2
+#define DRI_CONF_VBLANK_ALWAYS_SYNC 3
+#define DRI_CONF_VBLANK_MODE(def) \
+DRI_CONF_OPT_BEGIN_V(vblank_mode,enum,def,"0:3") \
+        DRI_CONF_DESC_BEGIN(en,gettext("Synchronization with vertical refresh (swap intervals)")) \
+                DRI_CONF_ENUM(0,gettext("Never synchronize with vertical refresh, ignore application's choice")) \
+                DRI_CONF_ENUM(1,gettext("Initial swap interval 0, obey application's choice")) \
+                DRI_CONF_ENUM(2,gettext("Initial swap interval 1, obey application's choice")) \
+                DRI_CONF_ENUM(3,gettext("Always synchronize with vertical refresh, application chooses the minimum swap interval")) \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_HYPERZ_DISABLED 0
+#define DRI_CONF_HYPERZ_ENABLED 1
+#define DRI_CONF_HYPERZ(def) \
+DRI_CONF_OPT_BEGIN(hyperz,bool,def) \
+        DRI_CONF_DESC(en,gettext("Use HyperZ to boost performance")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_MAX_TEXTURE_UNITS(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(texture_units,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,gettext("Number of texture units used")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ALLOW_LARGE_TEXTURES(def) \
+DRI_CONF_OPT_BEGIN_V(allow_large_textures,enum,def,"0:2") \
+	DRI_CONF_DESC_BEGIN(en,gettext("Support larger textures not guaranteed to fit into graphics memory")) \
+		DRI_CONF_ENUM(0,gettext("No")) \
+		DRI_CONF_ENUM(1,gettext("At least 1 texture must fit under worst-case assumptions")) \
+		DRI_CONF_ENUM(2,gettext("Announce hardware limits")) \
+	DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_TEXTURE_BLEND_QUALITY(def,range) \
+DRI_CONF_OPT_BEGIN_V(texture_blend_quality,float,def,range) \
+	DRI_CONF_DESC(en,gettext("Texture filtering quality vs. speed, AKA “brilinear” texture filtering")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_TEXTURE_HEAPS_ALL 0
+#define DRI_CONF_TEXTURE_HEAPS_CARD 1
+#define DRI_CONF_TEXTURE_HEAPS_GART 2
+#define DRI_CONF_TEXTURE_HEAPS(def) \
+DRI_CONF_OPT_BEGIN_V(texture_heaps,enum,def,"0:2") \
+	DRI_CONF_DESC_BEGIN(en,gettext("Used types of texture memory")) \
+		DRI_CONF_ENUM(0,gettext("All available memory")) \
+		DRI_CONF_ENUM(1,gettext("Only card memory (if available)")) \
+		DRI_CONF_ENUM(2,gettext("Only GART (AGP/PCIE) memory (if available)")) \
+	DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+/* Options for features that are not done in hardware by the driver (like GL_ARB_vertex_program
+   On cards where there is no documentation (r200) or on rasterization-only hardware). */
+#define DRI_CONF_SECTION_SOFTWARE \
+DRI_CONF_SECTION_BEGIN \
+        DRI_CONF_DESC(en,gettext("Features that are not hardware-accelerated"))
+
+#define DRI_CONF_ARB_VERTEX_PROGRAM(def) \
+DRI_CONF_OPT_BEGIN(arb_vertex_program,bool,def) \
+        DRI_CONF_DESC(en,gettext("Enable extension GL_ARB_vertex_program")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_NV_VERTEX_PROGRAM(def) \
+DRI_CONF_OPT_BEGIN(nv_vertex_program,bool,def) \
+        DRI_CONF_DESC(en,gettext("Enable extension GL_NV_vertex_program")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ALWAYS_FLUSH_BATCH(def) \
+DRI_CONF_OPT_BEGIN(always_flush_batch,bool,def) \
+        DRI_CONF_DESC(en,gettext("Enable flushing batchbuffer after each draw call")) \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_ALWAYS_FLUSH_CACHE(def) \
+DRI_CONF_OPT_BEGIN(always_flush_cache,bool,def) \
+        DRI_CONF_DESC(en,gettext("Enable flushing GPU caches with each draw call")) \
+DRI_CONF_OPT_END
diff --git a/src/mesa/drivers/dri/dri.pc.in b/src/mesa/drivers/dri/dri.pc.in
new file mode 100644
index 0000000000..695aa6cfd6
--- /dev/null
+++ b/src/mesa/drivers/dri/dri.pc.in
@@ -0,0 +1,11 @@
+prefix=@INSTALL_DIR@
+exec_prefix=${prefix}
+libdir=@INSTALL_LIB_DIR@
+includedir=@INSTALL_INC_DIR@
+dridriverdir=@DRI_DRIVER_DIR@
+
+Name: dri
+Description: Direct Rendering Infrastructure
+Version: @VERSION@
+Requires.private: @DRI_PC_REQ_PRIV@
+Cflags: -I${includedir}
diff --git a/src/mesa/drivers/dri/i810/Makefile b/src/mesa/drivers/dri/i810/Makefile
new file mode 100644
index 0000000000..54a837d5ea
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/Makefile
@@ -0,0 +1,28 @@
+# src/mesa/drivers/dri/i810/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i810_dri.so
+
+DRIVER_SOURCES = \
+	i810context.c \
+	i810ioctl.c \
+	i810render.c \
+	i810screen.c \
+	i810span.c \
+	i810state.c \
+	i810tex.c \
+	i810texmem.c \
+	i810texstate.c \
+	i810tris.c \
+	i810vb.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/i810/i810_3d_reg.h b/src/mesa/drivers/dri/i810/i810_3d_reg.h
new file mode 100644
index 0000000000..2fbeb64978
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810_3d_reg.h
@@ -0,0 +1,630 @@
+
+#ifndef I810_3D_REG_H
+#define I810_3D_REG_H
+
+#include "i810_reg.h"
+
+/* Registers not used in the X server
+ */
+
+#define I810_NOP_ID           0x2094
+#define I810_NOP_ID_MASK        ((1<<22)-1)
+
+
+/* 3D instructions
+ */
+
+
+/* GFXRENDERSTATE_PV_PIXELIZATION_RULE, p149
+ *
+ * Format:
+ *     0: GFX_OP_PV_RULE | PV_*
+ *
+ */
+#define GFX_OP_PV_RULE           ((0x3<<29)|(0x7<<24))
+#define PV_SMALL_TRI_FILTER_ENABLE   (0x1<<11)
+#define PV_UPDATE_PIXRULE            (0x1<<10)
+#define PV_PIXRULE_ENABLE            (0x1<<9)
+#define PV_UPDATE_LINELIST           (0x1<<8)
+#define PV_LINELIST_MASK             (0x3<<6)
+#define PV_LINELIST_PV0              (0x0<<6)
+#define PV_LINELIST_PV1              (0x1<<6)
+#define PV_UPDATE_TRIFAN             (0x1<<5)
+#define PV_TRIFAN_MASK               (0x3<<3)
+#define PV_TRIFAN_PV0                (0x0<<3)
+#define PV_TRIFAN_PV1                (0x1<<3)
+#define PV_TRIFAN_PV2                (0x2<<3)
+#define PV_UPDATE_TRISTRIP           (0x1<<2)
+#define PV_TRISTRIP_MASK             (0x3<<0)
+#define PV_TRISTRIP_PV0              (0x0<<0)
+#define PV_TRISTRIP_PV1              (0x1<<0)
+#define PV_TRISTRIP_PV2              (0x2<<0)
+
+
+/* GFXRENDERSTATE_SCISSOR_ENABLE, p146
+ *
+ * Format:
+ *     0: GFX_OP_SCISSOR | SC_*
+ */
+#define GFX_OP_SCISSOR         ((0x3<<29)|(0x1c<<24)|(0x10<<19))
+#define SC_UPDATE_SCISSOR       (0x1<<1)
+#define SC_ENABLE_MASK          (0x1<<0)
+#define SC_ENABLE               (0x1<<0)
+
+/* GFXRENDERSTATE_SCISSOR_INFO, p147
+ *
+ * Format:
+ *     0: GFX_OP_SCISSOR_INFO
+ *     1: SCI_MIN_*
+ *     2: SCI_MAX_*
+ */
+#define GFX_OP_SCISSOR_INFO    ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1))
+#define SCI_YMIN_MASK      (0xffff<<16)
+#define SCI_XMIN_MASK      (0xffff<<0)
+#define SCI_YMAX_MASK      (0xffff<<16)
+#define SCI_XMAX_MASK      (0xffff<<0)
+
+/* GFXRENDERSTATE_DRAWING_RECT_INFO, p144
+ *
+ * Format:
+ *     0: GFX_OP_DRAWRECT_INFO
+ *     1: DR1_*
+ *     2: DR2_*
+ *     3: DR3_*
+ *     4: DR4_*
+ */
+#define GFX_OP_DRAWRECT_INFO   ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
+#define DR1_RECT_CLIP_ENABLE   (0x0<<31)
+#define DR1_RECT_CLIP_DISABLE  (0x1<<31)
+#define DR1_X_DITHER_BIAS_MASK (0x3<<26)
+#define DR1_X_DITHER_BIAS_SHIFT      26
+#define DR1_Y_DITHER_BIAS_MASK (0x3<<24)
+#define DR1_Y_DITHER_BIAS_SHIFT      24
+#define DR2_YMIN_MASK          (0xffff<<16)
+#define DR2_XMIN_MASK          (0xffff<<0)
+#define DR3_YMAX_MASK          (0xffff<<16)
+#define DR3_XMAX_MASK          (0xffff<<0)
+#define DR4_YORG_MASK          (0x3ff<<16)
+#define DR4_XORG_MASK          (0x7ff<<0)
+
+
+/* GFXRENDERSTATE_LINEWIDTH_CULL_SHADE_MODE, p140
+ *
+ * Format:
+ *     0: GFX_OP_LINEWIDTH_CULL_SHADE_MODE | LCS_*
+ */
+#define GFX_OP_LINEWIDTH_CULL_SHADE_MODE  ((0x3<<29)|(0x2<<24))
+#define LCS_UPDATE_ZMODE        (0x1<<20)
+#define LCS_Z_MASK              (0xf<<16)
+#define LCS_Z_NEVER             (0x1<<16)
+#define LCS_Z_LESS              (0x2<<16)
+#define LCS_Z_EQUAL             (0x3<<16)
+#define LCS_Z_LEQUAL            (0x4<<16)
+#define LCS_Z_GREATER           (0x5<<16)
+#define LCS_Z_NOTEQUAL          (0x6<<16)
+#define LCS_Z_GEQUAL            (0x7<<16)
+#define LCS_Z_ALWAYS            (0x8<<16)
+#define LCS_UPDATE_LINEWIDTH    (0x1<<15)
+#define LCS_LINEWIDTH_MASK      (0x7<<12)
+#define LCS_LINEWIDTH_SHIFT           12
+#define LCS_LINEWIDTH_0_5       (0x1<<12)
+#define LCS_LINEWIDTH_1_0       (0x2<<12)
+#define LCS_LINEWIDTH_2_0       (0x4<<12)
+#define LCS_LINEWIDTH_3_0       (0x6<<12)
+#define LCS_UPDATE_ALPHA_INTERP (0x1<<11)
+#define LCS_ALPHA_FLAT          (0x1<<10)
+#define LCS_ALPHA_INTERP        (0x0<<10)
+#define LCS_UPDATE_FOG_INTERP   (0x1<<9)
+#define LCS_FOG_INTERP          (0x0<<8)
+#define LCS_FOG_FLAT            (0x1<<8)
+#define LCS_UPDATE_SPEC_INTERP  (0x1<<7)
+#define LCS_SPEC_INTERP         (0x0<<6)
+#define LCS_SPEC_FLAT           (0x1<<6)
+#define LCS_UPDATE_RGB_INTERP   (0x1<<5)
+#define LCS_RGB_INTERP          (0x0<<4)
+#define LCS_RGB_FLAT            (0x1<<4)
+#define LCS_UPDATE_CULL_MODE    (0x1<<3)
+#define LCS_CULL_MASK           (0x7<<0)
+#define LCS_CULL_DISABLE        (0x1<<0)
+#define LCS_CULL_CW             (0x2<<0)
+#define LCS_CULL_CCW            (0x3<<0)
+#define LCS_CULL_BOTH           (0x4<<0)
+
+#define LCS_INTERP_FLAT (LCS_ALPHA_FLAT|LCS_RGB_FLAT|LCS_SPEC_FLAT)
+#define LCS_UPDATE_INTERP (LCS_UPDATE_ALPHA_INTERP| 	\
+			   LCS_UPDATE_RGB_INTERP|	\
+			   LCS_UPDATE_SPEC_INTERP)
+
+
+/* GFXRENDERSTATE_BOOLEAN_ENA_1, p142
+ *
+ */
+#define GFX_OP_BOOL_1           ((0x3<<29)|(0x3<<24))
+#define B1_UPDATE_SPEC_SETUP_ENABLE   (1<<19)
+#define B1_SPEC_SETUP_ENABLE          (1<<18)
+#define B1_UPDATE_ALPHA_SETUP_ENABLE  (1<<17)
+#define B1_ALPHA_SETUP_ENABLE         (1<<16)
+#define B1_UPDATE_CI_KEY_ENABLE       (1<<15)
+#define B1_CI_KEY_ENABLE              (1<<14)
+#define B1_UPDATE_CHROMAKEY_ENABLE    (1<<13)
+#define B1_CHROMAKEY_ENABLE           (1<<12)
+#define B1_UPDATE_Z_BIAS_ENABLE       (1<<11)
+#define B1_Z_BIAS_ENABLE              (1<<10)
+#define B1_UPDATE_SPEC_ENABLE         (1<<9)
+#define B1_SPEC_ENABLE                (1<<8)
+#define B1_UPDATE_FOG_ENABLE          (1<<7)
+#define B1_FOG_ENABLE                 (1<<6)
+#define B1_UPDATE_ALPHA_TEST_ENABLE   (1<<5)
+#define B1_ALPHA_TEST_ENABLE          (1<<4)
+#define B1_UPDATE_BLEND_ENABLE        (1<<3)
+#define B1_BLEND_ENABLE               (1<<2)
+#define B1_UPDATE_Z_TEST_ENABLE       (1<<1)
+#define B1_Z_TEST_ENABLE              (1<<0)
+
+/* GFXRENDERSTATE_BOOLEAN_ENA_2, p143
+ *
+ */
+#define GFX_OP_BOOL_2          ((0x3<<29)|(0x4<<24))
+#define B2_UPDATE_MAP_CACHE_ENABLE     (1<<17)
+#define B2_MAP_CACHE_ENABLE            (1<<16)
+#define B2_UPDATE_ALPHA_DITHER_ENABLE  (1<<15)
+#define B2_ALPHA_DITHER_ENABLE         (1<<14)
+#define B2_UPDATE_FOG_DITHER_ENABLE    (1<<13)
+#define B2_FOG_DITHER_ENABLE           (1<<12)
+#define B2_UPDATE_SPEC_DITHER_ENABLE   (1<<11)
+#define B2_SPEC_DITHER_ENABLE          (1<<10)
+#define B2_UPDATE_RGB_DITHER_ENABLE    (1<<9)
+#define B2_RGB_DITHER_ENABLE           (1<<8)
+#define B2_UPDATE_FB_WRITE_ENABLE      (1<<3)
+#define B2_FB_WRITE_ENABLE             (1<<2)
+#define B2_UPDATE_ZB_WRITE_ENABLE      (1<<1)
+#define B2_ZB_WRITE_ENABLE             (1<<0)
+
+
+/* GFXRENDERSTATE_FOG_COLOR, p144
+ */
+#define GFX_OP_FOG_COLOR       ((0x3<<29)|(0x15<<24))
+#define FOG_RED_SHIFT          16
+#define FOG_GREEN_SHIFT        8
+#define FOG_BLUE_SHIFT         0
+#define FOG_RESERVED_MASK      ((0x7<<16)|(0x3<<8)|(0x3))
+
+
+/* GFXRENDERSTATE_Z_BIAS_ALPHA_FUNC_REF, p139
+ */
+#define GFX_OP_ZBIAS_ALPHAFUNC ((0x3<<29)|(0x14<<24))
+#define ZA_UPDATE_ZBIAS        (1<<22)
+#define ZA_ZBIAS_SHIFT         14
+#define ZA_ZBIAS_MASK          (0xff<<14)
+#define ZA_UPDATE_ALPHAFUNC    (1<<13)
+#define ZA_ALPHA_MASK          (0xf<<9)
+#define ZA_ALPHA_NEVER         (1<<9)
+#define ZA_ALPHA_LESS          (2<<9)
+#define ZA_ALPHA_EQUAL         (3<<9)
+#define ZA_ALPHA_LEQUAL        (4<<9)
+#define ZA_ALPHA_GREATER       (5<<9)
+#define ZA_ALPHA_NOTEQUAL      (6<<9)
+#define ZA_ALPHA_GEQUAL        (7<<9)
+#define ZA_ALPHA_ALWAYS        (8<<9)
+#define ZA_UPDATE_ALPHAREF     (1<<8)
+#define ZA_ALPHAREF_MASK       (0xff<<0)
+#define ZA_ALPHAREF_SHIFT      0
+#define ZA_ALPHAREF_RESERVED   (0x7<<0)
+
+
+/* GFXRENDERSTATE_SRC_DST_BLEND_MONO, p136
+ */
+#define GFX_OP_SRC_DEST_MONO    ((0x3<<29)|(0x8<<24))
+#define SDM_UPDATE_MONO_ENABLE      (1<<13)
+#define SDM_MONO_ENABLE             (1<<12)
+#define SDM_UPDATE_SRC_BLEND        (1<<11)
+#define SDM_SRC_MASK               (0xf<<6)
+#define SDM_SRC_ZERO               (0x1<<6)
+#define SDM_SRC_ONE                (0x2<<6)
+#define SDM_SRC_SRC_COLOR          (0x3<<6)
+#define SDM_SRC_INV_SRC_COLOR      (0x4<<6)
+#define SDM_SRC_SRC_ALPHA          (0x5<<6)
+#define SDM_SRC_INV_SRC_ALPHA      (0x6<<6)
+#define SDM_SRC_DST_COLOR          (0x9<<6)
+#define SDM_SRC_INV_DST_COLOR      (0xa<<6)
+#define SDM_SRC_BOTH_SRC_ALPHA     (0xc<<6)
+#define SDM_SRC_BOTH_INV_SRC_ALPHA (0xd<<6)
+#define SDM_UPDATE_DST_BLEND        (1<<5)
+#define SDM_DST_MASK               (0xf<<0)
+#define SDM_DST_ZERO               (0x1<<0)
+#define SDM_DST_ONE                (0x2<<0)
+#define SDM_DST_SRC_COLOR          (0x3<<0)
+#define SDM_DST_INV_SRC_COLOR      (0x4<<0)
+#define SDM_DST_SRC_ALPHA          (0x5<<0)
+#define SDM_DST_INV_SRC_ALPHA      (0x6<<0)
+#define SDM_DST_DST_COLOR          (0x9<<0)
+#define SDM_DST_INV_DST_COLOR      (0xa<<0)
+#define SDM_DST_BOTH_SRC_ALPHA     (0xc<<0)
+#define SDM_DST_BOTH_INV_SRC_ALPHA (0xd<<0)
+
+
+/* GFXRENDERSTATE_COLOR_FACTOR, p134
+ *
+ * Format:
+ *     0: GFX_OP_COLOR_FACTOR
+ *     1: ARGB8888 color factor
+ */
+#define GFX_OP_COLOR_FACTOR      ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0)
+
+/* GFXRENDERSTATE_MAP_ALPHA_BLEND_STAGES, p132
+ */
+#define GFX_OP_MAP_ALPHA_STAGES      ((0x3<<29)|(0x1<<24))
+#define MA_STAGE_SHIFT               20
+#define MA_STAGE_0                   (0<<20)
+#define MA_STAGE_1                   (1<<20)
+#define MA_STAGE_2                   (2<<20)
+
+#define MA_ARG_ONE                   (0x0<<2)
+#define MA_ARG_ALPHA_FACTOR          (0x1<<2)
+#define MA_ARG_ITERATED_ALPHA        (0x3<<2)
+#define MA_ARG_CURRENT_ALPHA         (0x5<<2)
+#define MA_ARG_TEX0_ALPHA            (0x6<<2)
+#define MA_ARG_TEX1_ALPHA            (0x7<<2)
+#define MA_ARG_INVERT                (0x1)
+#define MA_ARG_DONT_INVERT           (0x0)
+
+#define MA_UPDATE_ARG1               (1<<18)
+#define MA_ARG1_SHIFT                13
+#define MA_ARG1_MASK                 (0x1d << MA_ARG1_SHIFT)
+
+#define MA_UPDATE_ARG2               (1<<12)
+#define MA_ARG2_SHIFT                6
+#define MA_ARG2_MASK                 (0x1d << MA_ARG2_SHIFT)
+
+#define MA_UPDATE_OP                 (1<<5)
+#define MA_OP_MASK                   (0xf)
+#define MA_OP_ARG1                   (0x1)
+#define MA_OP_ARG2                   (0x2)
+#define MA_OP_MODULATE               (0x3)
+#define MA_OP_MODULATE_X2            (0x4)
+#define MA_OP_MODULATE_X4            (0x5)
+#define MA_OP_ADD                    (0x6)
+#define MA_OP_ADD_SIGNED             (0x7)
+#define MA_OP_LIN_BLEND_ITER_ALPHA   (0x8)
+#define MA_OP_LIN_BLEND_ALPHA_FACTOR (0xa)
+#define MA_OP_LIN_BLEND_TEX0_ALPHA   (0x10)
+#define MA_OP_LIN_BLEND_TEX1_ALPHA   (0x11)
+#define MA_OP_SUBTRACT               (0x14)
+
+
+/* GFXRENDERSTATE_MAP_COLOR_BLEND_STAGES, p129
+ */
+#define GFX_OP_MAP_COLOR_STAGES        ((0x3<<29)|(0x0<<24))
+#define MC_STAGE_SHIFT                 20
+#define MC_STAGE_0                     (0<<20)
+#define MC_STAGE_1                     (1<<20)
+#define MC_STAGE_2                     (2<<20)
+#define MC_UPDATE_DEST                 (1<<19)
+#define MC_DEST_MASK                   (1<<18)
+#define MC_DEST_CURRENT                (0<<18)
+#define MC_DEST_ACCUMULATOR            (1<<18)
+
+#define MC_ARG_ONE                     (0x0<<2)
+#define MC_ARG_COLOR_FACTOR            (0x1<<2)
+#define MC_ARG_ACCUMULATOR             (0x2<<2)
+#define MC_ARG_ITERATED_COLOR          (0x3<<2)
+#define MC_ARG_SPECULAR_COLOR          (0x4<<2)
+#define MC_ARG_CURRENT_COLOR           (0x5<<2)
+#define MC_ARG_TEX0_COLOR              (0x6<<2)
+#define MC_ARG_TEX1_COLOR              (0x7<<2)
+#define MC_ARG_DONT_REPLICATE_ALPHA    (0x0<<1)
+#define MC_ARG_REPLICATE_ALPHA         (0x1<<1)
+#define MC_ARG_DONT_INVERT             (0x0)
+#define MC_ARG_INVERT                  (0x1)
+
+#define MC_UPDATE_ARG1                 (1<<17)
+#define MC_ARG1_SHIFT                  12
+#define MC_ARG1_MASK                   (0x1f << MC_ARG1_SHIFT)
+
+#define MC_UPDATE_ARG2                 (1<<11)
+#define MC_ARG2_SHIFT                  6
+#define MC_ARG2_MASK                   (0x1f << MC_ARG2_SHIFT)
+
+#define MC_UPDATE_OP                   (1<<5)
+#define MC_OP_MASK                     (0xf)
+#define MC_OP_DISABLE                  (0x0)
+#define MC_OP_ARG1                     (0x1)
+#define MC_OP_ARG2                     (0x2)
+#define MC_OP_MODULATE                 (0x3)
+#define MC_OP_MODULATE_X2              (0x4)
+#define MC_OP_MODULATE_X4              (0x5)
+#define MC_OP_ADD                      (0x6)
+#define MC_OP_ADD_SIGNED               (0x7)
+#define MC_OP_LIN_BLEND_ITER_ALPHA     (0x8)
+#define MC_OP_LIN_BLEND_ALPHA_FACTOR   (0xa)
+#define MC_OP_LIN_BLEND_TEX0_ALPHA     (0x10)
+#define MC_OP_LIN_BLEND_TEX1_ALPHA     (0x11)
+#define MC_OP_LIN_BLEND_TEX0_COLOR     (0x12)
+#define MC_OP_LIN_BLEND_TEX1_COLOR     (0x13)
+#define MC_OP_SUBTRACT                 (0x14)
+
+/* GFXRENDERSTATE_MAP_PALETTE_LOAD, p128
+ *
+ * Format:
+ *     0:  GFX_OP_MAP_PALETTE_LOAD
+ *     1:  16bpp color[0]
+ *     ...
+ *     256: 16bpp color[255]
+ */
+#define GFX_OP_MAP_PALETTE_LOAD ((0x3<<29)|(0x1d<<24)|(0x82<<16)|0xff)
+
+/* GFXRENDERSTATE_MAP_LOD_CONTROL, p127
+ */
+#define GFX_OP_MAP_LOD_CTL       ((0x3<<29)|(0x1c<<24)|(0x4<<19))
+#define MLC_MAP_ID_SHIFT         16
+#define MLC_MAP_0                (0<<16)
+#define MLC_MAP_1                (1<<16)
+#define MLC_UPDATE_DITHER_WEIGHT (1<<10)
+#define MLC_DITHER_WEIGHT_MASK   (0x3<<8)
+#define MLC_DITHER_WEIGHT_FULL   (0x0<<8)
+#define MLC_DITHER_WEIGHT_50     (0x1<<8)
+#define MLC_DITHER_WEIGHT_25     (0x2<<8)
+#define MLC_DITHER_WEIGHT_12     (0x3<<8)
+#define MLC_UPDATE_LOD_BIAS      (1<<7)
+#define MLC_LOD_BIAS_MASK        ((1<<7)-1)
+
+/* GFXRENDERSTATE_MAP_LOD_LIMITS, p126
+ */
+#define GFX_OP_MAP_LOD_LIMITS   ((0x3<<29)|(0x1c<<24)|(0x3<<19))
+#define MLL_MAP_ID_SHIFT         16
+#define MLL_MAP_0                (0<<16)
+#define MLL_MAP_1                (1<<16)
+#define MLL_UPDATE_MAX_MIP       (1<<13)
+#define MLL_MAX_MIP_SHIFT        5
+#define MLL_MAX_MIP_MASK         (0xff<<5)
+#define MLL_MAX_MIP_ONE          (0x10<<5)
+#define MLL_UPDATE_MIN_MIP       (1<<4)
+#define MLL_MIN_MIP_SHIFT        0
+#define MLL_MIN_MIP_MASK         (0xf<<0)
+
+/* GFXRENDERSTATE_MAP_FILTER, p124
+ */
+#define GFX_OP_MAP_FILTER       ((0x3<<29)|(0x1c<<24)|(0x2<<19))
+#define MF_MAP_ID_SHIFT         16
+#define MF_MAP_0                (0<<16)
+#define MF_MAP_1                (1<<16)
+#define MF_UPDATE_ANISOTROPIC   (1<<12)
+#define MF_ANISOTROPIC_MASK     (1<<10)
+#define MF_ANISOTROPIC_ENABLE   (1<<10)
+#define MF_UPDATE_MIP_FILTER    (1<<9)
+#define MF_MIP_MASK             (0x3<<6)
+#define MF_MIP_NONE             (0x0<<6)
+#define MF_MIP_NEAREST          (0x1<<6)
+#define MF_MIP_DITHER           (0x2<<6)
+#define MF_MIP_LINEAR           (0x3<<6)
+#define MF_UPDATE_MAG_FILTER    (1<<5)
+#define MF_MAG_MASK             (1<<3)
+#define MF_MAG_LINEAR           (1<<3)
+#define MF_MAG_NEAREST          (0<<3)
+#define MF_UPDATE_MIN_FILTER    (1<<2)
+#define MF_MIN_MASK             (1<<0)
+#define MF_MIN_LINEAR           (1<<0)
+#define MF_MIN_NEAREST          (0<<0)
+
+/* GFXRENDERSTATE_MAP_INFO, p118
+ */
+#define GFX_OP_MAP_INFO      ((0x3<<29)|(0x1d<<24)|0x2)
+#define MI1_MAP_ID_SHIFT         28
+#define MI1_MAP_0                (0<<28)
+#define MI1_MAP_1                (1<<28)
+#define MI1_FMT_MASK             (0x7<<24)
+#define MI1_FMT_8CI              (0x0<<24)
+#define MI1_FMT_8BPP             (0x1<<24)
+#define MI1_FMT_16BPP            (0x2<<24)
+#define MI1_FMT_422              (0x5<<24)
+#define MI1_PF_MASK              (0x3<<21)
+#define MI1_PF_8CI_RGB565         (0x0<<21)
+#define MI1_PF_8CI_ARGB1555       (0x1<<21)
+#define MI1_PF_8CI_ARGB4444       (0x2<<21)
+#define MI1_PF_8CI_AY88           (0x3<<21)
+#define MI1_PF_16BPP_RGB565       (0x0<<21)
+#define MI1_PF_16BPP_ARGB1555     (0x1<<21)
+#define MI1_PF_16BPP_ARGB4444     (0x2<<21)
+#define MI1_PF_16BPP_AY88         (0x3<<21)
+#define MI1_PF_422_YCRCB_SWAP_Y   (0x0<<21)
+#define MI1_PF_422_YCRCB          (0x1<<21)
+#define MI1_PF_422_YCRCB_SWAP_UV  (0x2<<21)
+#define MI1_PF_422_YCRCB_SWAP_YUV (0x3<<21)
+#define MI1_OUTPUT_CHANNEL_MASK   (0x3<<19)
+#define MI1_COLOR_CONV_ENABLE     (1<<18)
+#define MI1_VERT_STRIDE_MASK      (1<<17)
+#define MI1_VERT_STRIDE_1         (1<<17)
+#define MI1_VERT_OFFSET_MASK      (1<<16)
+#define MI1_VERT_OFFSET_1         (1<<16)
+#define MI1_ENABLE_FENCE_REGS     (1<<10)
+#define MI1_TILED_SURFACE         (1<<9)
+#define MI1_TILE_WALK_X           (0<<8)
+#define MI1_TILE_WALK_Y           (1<<8)
+#define MI1_PITCH_MASK            (0xf<<0)
+#define MI2_DIMENSIONS_ARE_LOG2   (1<<31)
+#define MI2_DIMENSIONS_ARE_EXACT  (0<<31)
+#define MI2_HEIGHT_SHIFT          16
+#define MI2_HEIGHT_MASK           (0x1ff<<16)
+#define MI2_WIDTH_SHIFT           0
+#define MI2_WIDTH_MASK            (0x1ff<<0)
+#define MI3_BASE_ADDR_MASK        (~0xf)
+
+/* GFXRENDERSTATE_MAP_COORD_SETS, p116
+ */
+#define GFX_OP_MAP_COORD_SETS ((0x3<<29)|(0x1c<<24)|(0x1<<19))
+#define MCS_COORD_ID_SHIFT         16
+#define MCS_COORD_0                (0<<16)
+#define MCS_COORD_1                (1<<16)
+#define MCS_UPDATE_NORMALIZED      (1<<15)
+#define MCS_NORMALIZED_COORDS_MASK (1<<14)
+#define MCS_NORMALIZED_COORDS      (1<<14)
+#define MCS_UPDATE_V_STATE         (1<<7)
+#define MCS_V_STATE_MASK           (0x3<<4)
+#define MCS_V_WRAP                 (0x0<<4)
+#define MCS_V_MIRROR               (0x1<<4)
+#define MCS_V_CLAMP                (0x2<<4)
+#define MCS_V_WRAP_SHORTEST        (0x3<<4)
+#define MCS_UPDATE_U_STATE         (1<<3)
+#define MCS_U_STATE_MASK           (0x3<<0)
+#define MCS_U_WRAP                 (0x0<<0)
+#define MCS_U_MIRROR               (0x1<<0)
+#define MCS_U_CLAMP                (0x2<<0)
+#define MCS_U_WRAP_SHORTEST        (0x3<<0)
+
+/* GFXRENDERSTATE_MAP_TEXELS, p115
+ */
+#define GFX_OP_MAP_TEXELS   ((0x3<<29)|(0x1c<<24)|(0x0<<19))
+#define MT_UPDATE_TEXEL1_STATE     (1<<15)
+#define MT_TEXEL1_DISABLE          (0<<14)
+#define MT_TEXEL1_ENABLE           (1<<14)
+#define MT_TEXEL1_COORD0           (0<<11)
+#define MT_TEXEL1_COORD1           (1<<11)
+#define MT_TEXEL1_MAP0             (0<<8)
+#define MT_TEXEL1_MAP1             (1<<8)
+#define MT_UPDATE_TEXEL0_STATE     (1<<7)
+#define MT_TEXEL0_DISABLE          (0<<6)
+#define MT_TEXEL0_ENABLE           (1<<6)
+#define MT_TEXEL0_COORD0           (0<<3)
+#define MT_TEXEL0_COORD1           (1<<3)
+#define MT_TEXEL0_MAP0             (0<<0)
+#define MT_TEXEL0_MAP1             (1<<0)
+
+/* GFXRENDERSTATE_VERTEX_FORMAT, p110
+ */
+#define GFX_OP_VERTEX_FMT  ((0x3<<29)|(0x5<<24))
+#define VF_TEXCOORD_COUNT_SHIFT    8
+#define VF_TEXCOORD_COUNT_0        (0<<8)
+#define VF_TEXCOORD_COUNT_1        (1<<8)
+#define VF_TEXCOORD_COUNT_2        (2<<8)
+#define VF_SPEC_FOG_ENABLE         (1<<7)
+#define VF_RGBA_ENABLE             (1<<6)
+#define VF_Z_OFFSET_ENABLE         (1<<5)
+#define VF_XYZ                     (0x1<<1)
+#define VF_XYZW                    (0x2<<1)
+#define VF_XY                      (0x3<<1)
+#define VF_XYW                     (0x4<<1)
+
+
+#define VERT_X_MASK       (~0xf)
+#define VERT_X_EDGE_V2V0  (1<<2)
+#define VERT_X_EDGE_V1V2  (1<<1)
+#define VERT_X_EDGE_V0V1  (1<<0)
+
+/* Not enabled fields should not be sent to hardware:
+ */
+typedef struct {
+   union {
+      float x;
+      unsigned int edge_flags;
+   } x;
+   float y;
+   float z;
+   float z_bias;
+   float oow;
+   unsigned int argb;
+   unsigned int fog_spec_rgb;	/* spec g and r ignored. */
+   float tu0;
+   float tv0;
+   float tu1;
+   float tv1;
+} i810_full_vertex;
+
+
+
+/* GFXCMDPARSER_BATCH_BUFFER, p105
+ *
+ * Not clear whether start address must be shifted or not.  Not clear
+ * whether address is physical system memory, or subject to GTT
+ * translation.  Because the address appears to be 32 bits long,
+ * perhaps it refers to physical system memory...
+ */
+#define CMD_OP_BATCH_BUFFER  ((0x0<<29)|(0x30<<23)|0x1)
+#define BB1_START_ADDR_MASK   (~0x7)
+#define BB1_PROTECTED         (1<<0)
+#define BB1_UNPROTECTED       (0<<0)
+#define BB2_END_ADDR_MASK     (~0x7)
+
+/* Hardware seems to barf on buffers larger than this (in strange ways)...
+ */
+#define MAX_BATCH (512*1024)
+
+
+/* GFXCMDPARSER_Z_BUFFER_INFO, p98
+ *
+ * Base address is in GTT space, and must be 4K aligned
+ */
+#define CMD_OP_Z_BUFFER_INFO  ((0x0<<29)|(0x16<<23))
+#define ZB_BASE_ADDR_SHIFT     0
+#define ZB_BASE_ADDR_MASK     (~((1<<12)-1))
+#define ZB_PITCH_512B         (0x0<<0)
+#define ZB_PITCH_1K           (0x1<<0)
+#define ZB_PITCH_2K           (0x2<<0)
+#define ZB_PITCH_4K           (0x3<<0)
+
+/* GFXCMDPARSER_FRONT_BUFFER_INFO, p97
+ *
+ * Format:
+ *     0:  CMD_OP_FRONT_BUFFER_INFO | (pitch<<FB0_PITCH_SHIFT) | FB0_*
+ *     1:  FB1_*
+ */
+#define CMD_OP_FRONT_BUFFER_INFO ((0x0<<29)|(0x14<<23))
+#define FB0_PITCH_SHIFT           8
+#define FB0_FLIP_SYNC            (0<<6)
+#define FB0_FLIP_ASYNC           (1<<6)
+#define FB0_BASE_ADDR_SHIFT       0
+#define FB0_BASE_ADDR_MASK        0x03FFFFF8
+
+/* GFXCMDPARSER_DEST_BUFFER_INFO, p96
+ *
+ * Format:
+ */
+#define CMD_OP_DESTBUFFER_INFO ((0x0<<29)|(0x15<<23))
+#define DB1_BASE_ADDR_SHIFT       0
+#define DB1_BASE_ADDR_MASK        0x03FFF000
+#define DB1_PITCH_512B            (0x0<<0)
+#define DB1_PITCH_1K              (0x1<<0)
+#define DB1_PITCH_2K              (0x2<<0)
+#define DB1_PITCH_4K              (0x4<<0)
+
+
+/* GFXRENDERSTATE_DEST_BUFFER_VARIABLES, p152
+ *
+ * Format:
+ *     0:  GFX_OP_DESTBUFFER_VARS
+ *     1:  DEST_*
+ */
+#define GFX_OP_DESTBUFFER_VARS   ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0)
+#define DV_HORG_BIAS_MASK      (0xf<<20)
+#define DV_HORG_BIAS_OGL       (0x0<<20)
+#define DV_VORG_BIAS_MASK      (0xf<<16)
+#define DV_VORG_BIAS_OGL       (0x0<<16)
+#define DV_PF_MASK             (0x7<<8)
+#define DV_PF_INDEX            (0x0<<8)
+#define DV_PF_555           (0x1<<8)
+#define DV_PF_565           (0x2<<8)
+
+#define GFX_OP_ANTIALIAS         ((0x3<<29)|(0x6<<24))
+#define AA_UPDATE_EDGEFLAG       (1<<13)
+#define AA_ENABLE_EDGEFLAG       (1<<12)
+#define AA_UPDATE_POLYWIDTH      (1<<11)
+#define AA_POLYWIDTH_05          (1<<9)
+#define AA_POLYWIDTH_10          (2<<9)
+#define AA_POLYWIDTH_20          (3<<9)
+#define AA_POLYWIDTH_40          (4<<9)
+#define AA_UPDATE_LINEWIDTH      (1<<8)
+#define AA_LINEWIDTH_05          (1<<6)
+#define AA_LINEWIDTH_10          (2<<6)
+#define AA_LINEWIDTH_20          (3<<6)
+#define AA_LINEWIDTH_40          (4<<6)
+#define AA_UPDATE_BB_EXPANSION   (1<<5)
+#define AA_BB_EXPANSION_SHIFT    2
+#define AA_UPDATE_AA_ENABLE      (1<<1)
+#define AA_ENABLE                (1<<0)
+
+#define GFX_OP_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+
+#define I810_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810context.c b/src/mesa/drivers/dri/i810/i810context.c
new file mode 100644
index 0000000000..49f3ee88a6
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810context.c
@@ -0,0 +1,573 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/points.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo.h"
+
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810state.h"
+#include "i810tex.h"
+#include "i810span.h"
+#include "i810tris.h"
+#include "i810vb.h"
+#include "i810ioctl.h"
+
+#include "drirenderbuffer.h"
+#include "utils.h"
+
+#ifndef I810_DEBUG
+int I810_DEBUG = (0);
+#endif
+
+PUBLIC const char __driConfigOptions[] = { 0 };
+const GLuint __driNConfigOptions = 0;
+
+#define DRIVER_DATE                     "20050821"
+
+static const GLubyte *i810GetString( GLcontext *ctx, GLenum name )
+{
+   static char buffer[128];
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *)"Keith Whitwell";
+   case GL_RENDERER: {
+      i810ContextPtr imesa = I810_CONTEXT(ctx);
+      const char * chipset;
+      
+      switch (imesa->i810Screen->deviceID) {
+      case PCI_CHIP_I810:       chipset = "i810"; break;
+      case PCI_CHIP_I810_DC100: chipset = "i810 DC-100"; break;
+      case PCI_CHIP_I810_E:     chipset = "i810E"; break;
+      case PCI_CHIP_I815:       chipset = "i815"; break;
+      default:                  chipset = "Unknown i810-class Chipset"; break;
+      }
+
+      (void) driGetRendererString( buffer, chipset, DRIVER_DATE, 0 );
+      return (GLubyte *) buffer;
+   }
+   default:
+      return 0;
+   }
+}
+
+static void i810BufferSize(GLframebuffer *buffer, GLuint *width, GLuint *height)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+
+   /* Need to lock to make sure the driDrawable is uptodate.  This
+    * information is used to resize Mesa's software buffers, so it has
+    * to be correct.
+    */
+   LOCK_HARDWARE(imesa);
+   *width = imesa->driDrawable->w;
+   *height = imesa->driDrawable->h;
+   UNLOCK_HARDWARE(imesa);
+}
+
+/* Extension strings exported by the i810 driver.
+ */
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_env_combine",        NULL },
+    { "GL_ARB_texture_env_crossbar",       NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_EXT_texture_env_combine",        NULL },
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_EXT_texture_rectangle",          NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_NV_blend_square",                NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+extern const struct tnl_pipeline_stage _i810_render_stage;
+
+static const struct tnl_pipeline_stage *i810_pipeline[] = {
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+				/* REMOVE: point attenuation stage */
+#if 1
+   &_i810_render_stage,		/* ADD: unclipped rastersetup-to-dma */
+#endif
+   &_tnl_render_stage,
+   0,
+};
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { "tex",   DEBUG_TEXTURE },
+    { "ioctl", DEBUG_IOCTL },
+    { "prim",  DEBUG_PRIMS },
+    { "vert",  DEBUG_VERTS },
+    { "state", DEBUG_STATE },
+    { "verb",  DEBUG_VERBOSE },
+    { "dri",   DEBUG_DRI },
+    { "dma",   DEBUG_DMA },
+    { "san",   DEBUG_SANITY },
+    { "sync",  DEBUG_SYNC },
+    { "sleep", DEBUG_SLEEP },
+    { NULL,    0 }
+};
+
+GLboolean
+i810CreateContext( gl_api api,
+		   const __GLcontextModes *mesaVis,
+                   __DRIcontext *driContextPriv,
+                   void *sharedContextPrivate )
+{
+   GLcontext *ctx, *shareCtx;
+   i810ContextPtr imesa;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   i810ScreenPrivate *i810Screen = (i810ScreenPrivate *)sPriv->private;
+   I810SAREAPtr saPriv = (I810SAREAPtr)
+      (((GLubyte *)sPriv->pSAREA) + i810Screen->sarea_priv_offset);
+   struct dd_function_table functions;
+
+   /* Allocate i810 context */
+   imesa = (i810ContextPtr) CALLOC_STRUCT(i810_context_t);
+   if (!imesa) {
+      return GL_FALSE;
+   }
+
+   driContextPriv->driverPrivate = imesa;
+
+   imesa->i810Screen = i810Screen;
+   imesa->driScreen = sPriv;
+   imesa->sarea = saPriv;
+   imesa->glBuffer = NULL;
+
+   /* Init default driver functions then plug in our I810-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   i810InitIoctlFuncs( &functions );
+   i810InitTextureFuncs( &functions );
+
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((i810ContextPtr) sharedContextPrivate)->glCtx;
+   else
+      shareCtx = NULL;
+   imesa->glCtx = _mesa_create_context(mesaVis, shareCtx,
+                                       &functions, (void*) imesa);
+   if (!imesa->glCtx) {
+      FREE(imesa);
+      return GL_FALSE;
+   }
+
+   (void) memset( imesa->texture_heaps, 0, sizeof( imesa->texture_heaps ) );
+   make_empty_list( & imesa->swapped );
+   
+   imesa->nr_heaps = 1;
+   imesa->texture_heaps[0] = driCreateTextureHeap( 0, imesa,
+	    i810Screen->textureSize,
+	    12,
+	    I810_NR_TEX_REGIONS,
+	    imesa->sarea->texList,
+	    (unsigned *) & imesa->sarea->texAge, /* XXX we shouldn't cast! */
+	    & imesa->swapped,
+	    sizeof( struct i810_texture_object_t ),
+	    (destroy_texture_object_t *) i810DestroyTexObj );
+
+
+
+   /* Set the maximum texture size small enough that we can guarentee
+    * that both texture units can bind a maximal texture and have them
+    * in memory at once.
+    */
+
+
+
+   ctx = imesa->glCtx;
+   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxTextureImageUnits = 2;
+   ctx->Const.MaxTextureCoordUnits = 2;
+
+
+   /* FIXME: driCalcualteMaxTextureLevels assumes that mipmaps are tightly
+    * FIXME: packed, but they're not in Intel graphics hardware.
+    */
+   driCalculateMaxTextureLevels( imesa->texture_heaps,
+				 imesa->nr_heaps,
+				 & ctx->Const,
+				 4,
+				 11, /* max 2D texture size is 2048x2048 */
+				 0,  /* 3D textures unsupported */
+				 0,  /* cube textures unsupported. */
+				 0,  /* texture rectangles unsupported. */
+				 12,
+				 GL_FALSE,
+				 0 );
+
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 3.0;
+   ctx->Const.MaxLineWidthAA = 3.0;
+   ctx->Const.LineWidthGranularity = 1.0;
+
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 3.0;
+   ctx->Const.MaxPointSizeAA = 3.0;
+   ctx->Const.PointSizeGranularity = 1.0;
+
+   /* reinitialize the context point state.
+    * It depend on constants in __GLcontextRec::Const
+    */
+   _mesa_init_point(ctx);
+
+   ctx->Driver.GetBufferSize = i810BufferSize;
+   ctx->Driver.GetString = i810GetString;
+
+   /* Who owns who?
+    */
+   ctx->DriverCtx = (void *) imesa;
+   imesa->glCtx = ctx;
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, i810_pipeline );
+
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+
+   /* Dri stuff
+    */
+   imesa->hHWContext = driContextPriv->hHWContext;
+   imesa->driFd = sPriv->fd;
+   imesa->driHwLock = &sPriv->pSAREA->lock;
+
+   imesa->stipple_in_hw = 1;
+   imesa->RenderIndex = ~0;
+   imesa->dirty = I810_UPLOAD_CTX|I810_UPLOAD_BUFFERS;
+   imesa->upload_cliprects = GL_TRUE;
+
+   imesa->CurrentTexObj[0] = 0;
+   imesa->CurrentTexObj[1] = 0;
+
+   _math_matrix_ctr( &imesa->ViewportMatrix );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   i810InitStateFuncs( ctx );
+   i810InitTriFuncs( ctx );
+   i810InitSpanFuncs( ctx );
+   i810InitVB( ctx );
+   i810InitState( ctx );
+
+#if DO_DEBUG
+   I810_DEBUG  = driParseDebugString( getenv( "I810_DEBUG" ),
+				      debug_control );
+   I810_DEBUG |= driParseDebugString( getenv( "INTEL_DEBUG" ),
+				      debug_control );
+#endif
+
+   return GL_TRUE;
+}
+
+void
+i810DestroyContext(__DRIcontext *driContextPriv)
+{
+   i810ContextPtr imesa = (i810ContextPtr) driContextPriv->driverPrivate;
+
+   assert(imesa); /* should never be null */
+   if (imesa) {
+      GLboolean   release_texture_heaps;
+
+
+      release_texture_heaps = (imesa->glCtx->Shared->RefCount == 1);
+      _swsetup_DestroyContext( imesa->glCtx );
+      _tnl_DestroyContext( imesa->glCtx );
+      _vbo_DestroyContext( imesa->glCtx );
+      _swrast_DestroyContext( imesa->glCtx );
+
+      i810FreeVB( imesa->glCtx );
+
+      /* free the Mesa context */
+      imesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(imesa->glCtx);
+      if ( release_texture_heaps ) {
+ 	 /* This share group is about to go away, free our private
+          * texture object data.
+          */
+	 unsigned int i;
+
+         for ( i = 0 ; i < imesa->nr_heaps ; i++ ) {
+	    driDestroyTextureHeap( imesa->texture_heaps[ i ] );
+	    imesa->texture_heaps[ i ] = NULL;
+         }
+
+	 assert( is_empty_list( & imesa->swapped ) );
+      }
+
+      FREE(imesa);
+   }
+}
+
+
+void i810XMesaSetFrontClipRects( i810ContextPtr imesa )
+{
+   __DRIdrawable *dPriv = imesa->driDrawable;
+
+   imesa->numClipRects = dPriv->numClipRects;
+   imesa->pClipRects = dPriv->pClipRects;
+   imesa->drawX = dPriv->x;
+   imesa->drawY = dPriv->y;
+
+   i810EmitDrawingRectangle( imesa );
+   imesa->upload_cliprects = GL_TRUE;
+}
+
+
+void i810XMesaSetBackClipRects( i810ContextPtr imesa )
+{
+   __DRIdrawable *dPriv = imesa->driDrawable;
+
+   if (imesa->sarea->pf_enabled == 0 && dPriv->numBackClipRects == 0)
+   {
+      imesa->numClipRects = dPriv->numClipRects;
+      imesa->pClipRects = dPriv->pClipRects;
+      imesa->drawX = dPriv->x;
+      imesa->drawY = dPriv->y;
+   } else {
+      imesa->numClipRects = dPriv->numBackClipRects;
+      imesa->pClipRects = dPriv->pBackClipRects;
+      imesa->drawX = dPriv->backX;
+      imesa->drawY = dPriv->backY;
+   }
+
+   i810EmitDrawingRectangle( imesa );
+   imesa->upload_cliprects = GL_TRUE;
+}
+
+
+static void i810XMesaWindowMoved( i810ContextPtr imesa )
+{
+   /* Determine current color drawing buffer */
+   switch (imesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0]) {
+   case BUFFER_FRONT_LEFT:
+      i810XMesaSetFrontClipRects( imesa );
+      break;
+   case BUFFER_BACK_LEFT:
+      i810XMesaSetBackClipRects( imesa );
+      break;
+   default:
+      /* glDrawBuffer(GL_NONE or GL_FRONT_AND_BACK): software fallback */
+      i810XMesaSetFrontClipRects( imesa );
+   }
+}
+
+
+GLboolean
+i810UnbindContext(__DRIcontext *driContextPriv)
+{
+   i810ContextPtr imesa = (i810ContextPtr) driContextPriv->driverPrivate;
+   if (imesa) {
+      imesa->dirty = I810_UPLOAD_CTX|I810_UPLOAD_BUFFERS;
+      if (imesa->CurrentTexObj[0]) imesa->dirty |= I810_UPLOAD_TEX0;
+      if (imesa->CurrentTexObj[1]) imesa->dirty |= I810_UPLOAD_TEX1;
+   }
+
+   return GL_TRUE;
+}
+
+
+GLboolean
+i810MakeCurrent(__DRIcontext *driContextPriv,
+                __DRIdrawable *driDrawPriv,
+                __DRIdrawable *driReadPriv)
+{
+   if (driContextPriv) {
+      i810ContextPtr imesa = (i810ContextPtr) driContextPriv->driverPrivate;
+
+      /* Shouldn't the readbuffer be stored also?
+       */
+      imesa->driDrawable = driDrawPriv;
+
+      _mesa_make_current(imesa->glCtx,
+                         (GLframebuffer *) driDrawPriv->driverPrivate,
+                         (GLframebuffer *) driReadPriv->driverPrivate);
+
+      /* Are these necessary?
+       */
+      i810XMesaWindowMoved( imesa );
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+static void
+i810UpdatePageFlipping( i810ContextPtr imesa )
+{
+   GLcontext *ctx = imesa->glCtx;
+   int front = 0;
+
+   /* Determine current color drawing buffer */
+   switch (ctx->DrawBuffer->_ColorDrawBufferIndexes[0]) {
+   case BUFFER_FRONT_LEFT:
+      front = 1;
+      break;
+   case BUFFER_BACK_LEFT:
+      front = 0;
+      break;
+   default:
+      return;
+   }
+
+   if ( imesa->sarea->pf_current_page == 1 ) 
+     front ^= 1;
+
+   driFlipRenderbuffers(ctx->WinSysDrawBuffer, front);
+   
+   if (front) {
+      imesa->BufferSetup[I810_DESTREG_DI1] = imesa->i810Screen->fbOffset | imesa->i810Screen->backPitchBits;
+   } else {
+      imesa->BufferSetup[I810_DESTREG_DI1] = imesa->i810Screen->backOffset | imesa->i810Screen->backPitchBits;
+   }
+
+   imesa->dirty |= I810_UPLOAD_BUFFERS;
+}
+
+void i810GetLock( i810ContextPtr imesa, GLuint flags )
+{
+   __DRIdrawable *dPriv = imesa->driDrawable;
+   __DRIscreen *sPriv = imesa->driScreen;
+   I810SAREAPtr sarea = imesa->sarea;
+   int me = imesa->hHWContext;
+   unsigned i;
+
+   drmGetLock(imesa->driFd, imesa->hHWContext, flags);
+
+   /* If the window moved, may need to set a new cliprect now.
+    *
+    * NOTE: This releases and regains the hw lock, so all state
+    * checking must be done *after* this call:
+    */
+   DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
+
+
+   /* If we lost context, need to dump all registers to hardware.
+    * Note that we don't care about 2d contexts, even if they perform
+    * accelerated commands, so the DRI locking in the X server is even
+    * more broken than usual.
+    */
+   if (sarea->ctxOwner != me) {
+      driUpdateFramebufferSize(imesa->glCtx, dPriv);
+      imesa->upload_cliprects = GL_TRUE;
+      imesa->dirty = I810_UPLOAD_CTX|I810_UPLOAD_BUFFERS;
+      if (imesa->CurrentTexObj[0]) imesa->dirty |= I810_UPLOAD_TEX0;
+      if (imesa->CurrentTexObj[1]) imesa->dirty |= I810_UPLOAD_TEX1;
+      sarea->ctxOwner = me;
+   }
+
+   /* Shared texture managment - if another client has played with
+    * texture space, figure out which if any of our textures have been
+    * ejected, and update our global LRU.
+    */ 
+   for ( i = 0 ; i < imesa->nr_heaps ; i++ ) {
+      DRI_AGE_TEXTURES( imesa->texture_heaps[ i ] );
+   }
+
+   if (imesa->lastStamp != dPriv->lastStamp) {
+      i810UpdatePageFlipping( imesa );
+      i810XMesaWindowMoved( imesa );
+      imesa->lastStamp = dPriv->lastStamp;
+   }
+}
+
+
+void
+i810SwapBuffers( __DRIdrawable *dPriv )
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      i810ContextPtr imesa;
+      GLcontext *ctx;
+      imesa = (i810ContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = imesa->glCtx;
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+         if ( imesa->sarea->pf_active ) {
+            i810PageFlip( dPriv );
+         } else {
+            i810CopyBuffer( dPriv );
+         }
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "i810SwapBuffers: drawable has no context!\n");
+   }
+}
+
diff --git a/src/mesa/drivers/dri/i810/i810context.h b/src/mesa/drivers/dri/i810/i810context.h
new file mode 100644
index 0000000000..19529db020
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810context.h
@@ -0,0 +1,249 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I810CONTEXT_INC
+#define I810CONTEXT_INC
+
+typedef struct i810_context_t i810Context;
+typedef struct i810_context_t *i810ContextPtr;
+typedef struct i810_texture_object_t *i810TextureObjectPtr;
+
+#include "drm.h"
+#include "main/mtypes.h"
+#include "main/mm.h"
+
+#include "i810screen.h"
+#include "i810tex.h"
+
+
+/* Reasons to disable hardware rasterization. 
+ */
+#define I810_FALLBACK_TEXTURE        0x1
+#define I810_FALLBACK_DRAW_BUFFER    0x2
+#define I810_FALLBACK_READ_BUFFER    0x4
+#define I810_FALLBACK_COLORMASK      0x8  
+#define I810_FALLBACK_SPECULAR       0x20 
+#define I810_FALLBACK_LOGICOP        0x40
+#define I810_FALLBACK_RENDERMODE     0x80
+#define I810_FALLBACK_STENCIL        0x100
+#define I810_FALLBACK_BLEND_EQ       0x200
+#define I810_FALLBACK_BLEND_FUNC     0x400
+
+
+#ifndef PCI_CHIP_I810				 
+#define PCI_CHIP_I810              0x7121
+#define PCI_CHIP_I810_DC100        0x7123
+#define PCI_CHIP_I810_E            0x7125 
+#define PCI_CHIP_I815              0x1132 
+#endif
+
+#define IS_I810(imesa) (imesa->i810Screen->deviceID == PCI_CHIP_I810 ||	\
+			imesa->i810Screen->deviceID == PCI_CHIP_I810_DC100 || \
+			imesa->i810Screen->deviceID == PCI_CHIP_I810_E)
+#define IS_I815(imesa) (imesa->i810Screen->deviceID == PCI_CHIP_I815)
+
+
+#define I810_UPLOAD_TEX(i) (I810_UPLOAD_TEX0<<(i))
+
+/* Use the templated vertex formats:
+ */
+#define TAG(x) i810##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+typedef void (*i810_tri_func)( i810ContextPtr, i810Vertex *, i810Vertex *,
+			       i810Vertex * );
+typedef void (*i810_line_func)( i810ContextPtr, i810Vertex *, i810Vertex * );
+typedef void (*i810_point_func)( i810ContextPtr, i810Vertex * );
+
+struct i810_context_t {
+   GLint refcount;   
+   GLcontext *glCtx;
+
+   /* Texture object bookkeeping
+    */
+   unsigned              nr_heaps;
+   driTexHeap          * texture_heaps[1];
+   driTextureObject      swapped;
+
+   struct i810_texture_object_t *CurrentTexObj[2];
+
+
+   /* Bit flag to keep track of fallbacks.
+    */
+   GLuint Fallback;
+
+   /* State for i810vb.c and i810tris.c.
+    */
+   GLuint new_state;		/* _NEW_* flags */
+   GLuint SetupNewInputs;
+   GLuint SetupIndex;
+   GLuint RenderIndex;
+   GLmatrix ViewportMatrix;
+   GLenum render_primitive;
+   GLenum reduced_primitive;
+   GLuint hw_primitive;
+   GLubyte *verts;
+
+   drmBufPtr  vertex_buffer;
+   char *vertex_addr;
+   GLuint vertex_low;
+   GLuint vertex_high;
+   GLuint vertex_last_prim;
+   
+   GLboolean upload_cliprects;
+
+
+   /* Fallback rasterization functions 
+    */
+   i810_point_func draw_point;
+   i810_line_func draw_line;
+   i810_tri_func draw_tri;
+
+   /* Hardware state 
+    */
+   GLuint dirty;		/* I810_UPLOAD_* */
+   GLuint Setup[I810_CTX_SETUP_SIZE];
+   GLuint BufferSetup[I810_DEST_SETUP_SIZE];
+   int vertex_size;
+   int vertex_stride_shift;
+   unsigned int lastStamp;
+   GLboolean stipple_in_hw;
+
+   GLenum TexEnvImageFmt[2];
+
+   /* State which can't be computed completely on the fly:
+    */
+   GLuint LcsCullMode;
+   GLuint LcsLineWidth;
+   GLuint LcsPointSize;
+
+   /* Funny mesa mirrors
+    */
+   GLushort ClearColor;
+
+   /* DRI stuff
+    */
+   GLuint needClip;
+   GLframebuffer *glBuffer;
+   GLboolean doPageFlip;
+
+   /* These refer to the current draw (front vs. back) buffer:
+    */
+   int drawX;			/* origin of drawable in draw buffer */
+   int drawY;
+   GLuint numClipRects;		/* cliprects for that buffer */
+   drm_clip_rect_t *pClipRects;
+
+   int lastSwap;
+   int texAge;
+   int ctxAge;
+   int dirtyAge;
+  
+ 
+   GLboolean scissor;
+   drm_clip_rect_t draw_rect;
+   drm_clip_rect_t scissor_rect;
+
+   drm_context_t hHWContext;
+   drm_hw_lock_t *driHwLock;
+   int driFd;
+
+   __DRIdrawable *driDrawable;
+   __DRIscreen *driScreen;
+   i810ScreenPrivate *i810Screen; 
+   I810SAREAPtr sarea;
+};
+
+
+#define I810_CONTEXT(ctx)    ((i810ContextPtr)(ctx->DriverCtx))
+
+#define GET_DISPATCH_AGE( imesa ) imesa->sarea->last_dispatch
+#define GET_ENQUEUE_AGE( imesa ) imesa->sarea->last_enqueue
+
+
+/* Lock the hardware and validate our state.  
+ */
+#define LOCK_HARDWARE( imesa )				\
+  do {							\
+    char __ret=0;					\
+    DRM_CAS(imesa->driHwLock, imesa->hHWContext,	\
+	    (DRM_LOCK_HELD|imesa->hHWContext), __ret);	\
+    if (__ret)						\
+        i810GetLock( imesa, 0 );			\
+  } while (0)
+
+
+
+/* Release the kernel lock.
+ */
+#define UNLOCK_HARDWARE(imesa)					\
+    DRM_UNLOCK(imesa->driFd, imesa->driHwLock, imesa->hHWContext);	
+
+
+/* This is the wrong way to do it, I'm sure.  Otherwise the drm
+ * bitches that I've already got the heavyweight lock.  At worst,
+ * this is 3 ioctls.  The best solution probably only gets me down 
+ * to 2 ioctls in the worst case.
+ */
+#define LOCK_HARDWARE_QUIESCENT( imesa ) do {	\
+   LOCK_HARDWARE( imesa );			\
+   i810RegetLockQuiescent( imesa );		\
+} while(0)
+
+
+extern void i810GetLock( i810ContextPtr imesa, GLuint flags );
+extern void i810EmitHwStateLocked( i810ContextPtr imesa );
+extern void i810EmitScissorValues( i810ContextPtr imesa, int box_nr, int emit );
+extern void i810EmitDrawingRectangle( i810ContextPtr imesa );
+extern void i810XMesaSetBackClipRects( i810ContextPtr imesa );
+extern void i810XMesaSetFrontClipRects( i810ContextPtr imesa );
+
+#define SUBPIXEL_X -.5
+#define SUBPIXEL_Y -.5
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG		1
+#if DO_DEBUG
+extern int I810_DEBUG;
+#else
+#define I810_DEBUG		0
+#endif
+
+#define DEBUG_TEXTURE	0x1
+#define DEBUG_STATE	0x2
+#define DEBUG_IOCTL	0x4
+#define DEBUG_PRIMS	0x8
+#define DEBUG_VERTS	0x10
+#define DEBUG_FALLBACKS	0x20
+#define DEBUG_VERBOSE	0x40
+#define DEBUG_DRI       0x80
+#define DEBUG_DMA       0x100
+#define DEBUG_SANITY    0x200
+#define DEBUG_SYNC      0x400
+#define DEBUG_SLEEP     0x800
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810ioctl.c b/src/mesa/drivers/dri/i810/i810ioctl.c
new file mode 100644
index 0000000000..c631543d93
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810ioctl.c
@@ -0,0 +1,519 @@
+
+#include <unistd.h> /* for usleep() */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/dd.h"
+#include "swrast/swrast.h"
+#include "main/mm.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "main/context.h"
+#include "i810ioctl.h"
+#include "i810state.h"
+
+static drmBufPtr i810_get_buffer_ioctl( i810ContextPtr imesa )
+{
+   drmI810DMA dma;
+   drmBufPtr buf;
+   int retcode, i = 0;
+   
+   while (1) {
+      retcode = drmCommandWriteRead(imesa->driFd, DRM_I810_GETBUF,
+                                    &dma, sizeof(drmI810DMA));
+
+      if (dma.granted == 1 && retcode == 0) 
+	 break;
+      
+      if (++i > 1000) {
+	 drmCommandNone(imesa->driFd, DRM_I810_FLUSH);
+	 i = 0;
+      }
+   }
+
+   buf = &(imesa->i810Screen->bufs->list[dma.request_idx]);
+   buf->idx = dma.request_idx;
+   buf->used = 0;
+   buf->total = dma.request_size;
+   buf->address = (drmAddress)dma.virtual;
+
+   return buf;
+}
+
+
+
+#define DEPTH_SCALE ((1<<16)-1)
+
+static void i810Clear( GLcontext *ctx, GLbitfield mask )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   __DRIdrawable *dPriv = imesa->driDrawable;
+   const GLuint colorMask = *((GLuint *) &ctx->Color.ColorMask[0]);
+   drmI810Clear clear;
+   unsigned int i;
+
+   clear.flags = 0;
+   clear.clear_color = imesa->ClearColor;
+   clear.clear_depth = (GLuint) (ctx->Depth.Clear * DEPTH_SCALE);
+
+   I810_FIREVERTICES( imesa );
+	
+   if ((mask & BUFFER_BIT_FRONT_LEFT) && colorMask == ~0U) {
+      clear.flags |= I810_FRONT;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if ((mask & BUFFER_BIT_BACK_LEFT) && colorMask == ~0U) {
+      clear.flags |= I810_BACK;
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if (mask & BUFFER_BIT_DEPTH) {
+      if (ctx->Depth.Mask) 
+	 clear.flags |= I810_DEPTH;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   if (clear.flags) {
+      GLint cx, cy, cw, ch;
+
+      LOCK_HARDWARE( imesa );
+
+      /* compute region after locking: */
+      cx = ctx->DrawBuffer->_Xmin;
+      cy = ctx->DrawBuffer->_Ymin;
+      cw = ctx->DrawBuffer->_Xmax - cx;
+      ch = ctx->DrawBuffer->_Ymax - cy;
+
+      /* flip top to bottom */
+      cy = dPriv->h-cy-ch;
+      cx += imesa->drawX;
+      cy += imesa->drawY;
+
+      for (i = 0 ; i < imesa->numClipRects ; ) 
+      { 	 
+	 unsigned int nr = MIN2(i + I810_NR_SAREA_CLIPRECTS, imesa->numClipRects);
+	 drm_clip_rect_t *box = imesa->pClipRects;	 
+	 drm_clip_rect_t *b = (drm_clip_rect_t *)imesa->sarea->boxes;
+	 int n = 0;
+
+	 if (cw != dPriv->w || ch != dPriv->h) {
+            /* clear sub region */
+	    for ( ; i < nr ; i++) {
+	       GLint x = box[i].x1;
+	       GLint y = box[i].y1;
+	       GLint w = box[i].x2 - x;
+	       GLint h = box[i].y2 - y;
+
+	       if (x < cx) w -= cx - x, x = cx; 
+	       if (y < cy) h -= cy - y, y = cy;
+	       if (x + w > cx + cw) w = cx + cw - x;
+	       if (y + h > cy + ch) h = cy + ch - y;
+	       if (w <= 0) continue;
+	       if (h <= 0) continue;
+
+	       b->x1 = x;
+	       b->y1 = y;
+	       b->x2 = x + w;
+	       b->y2 = y + h;
+	       b++;
+	       n++;
+	    }
+	 } else {
+            /* clear whole buffer */
+	    for ( ; i < nr ; i++) {
+	       *b++ = box[i];
+	       n++;
+	    }
+	 }
+
+	 imesa->sarea->nbox = n;
+         drmCommandWrite(imesa->driFd, DRM_I810_CLEAR,
+                         &clear, sizeof(drmI810Clear));
+      }
+
+      UNLOCK_HARDWARE( imesa );
+      imesa->upload_cliprects = GL_TRUE;
+   }
+
+   if (mask) 
+      _swrast_Clear( ctx, mask );
+}
+
+
+
+
+/*
+ * Copy the back buffer to the front buffer. 
+ */
+void i810CopyBuffer( const __DRIdrawable *dPriv ) 
+{
+   i810ContextPtr imesa;
+   drm_clip_rect_t *pbox;
+   int nbox, i, tmp;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   imesa = (i810ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   I810_FIREVERTICES( imesa );
+   LOCK_HARDWARE( imesa );
+   
+   pbox = (drm_clip_rect_t *)dPriv->pClipRects;
+   nbox = dPriv->numClipRects;
+
+   for (i = 0 ; i < nbox ; )
+   {
+      int nr = MIN2(i + I810_NR_SAREA_CLIPRECTS, dPriv->numClipRects);
+      drm_clip_rect_t *b = (drm_clip_rect_t *)imesa->sarea->boxes;
+
+      imesa->sarea->nbox = nr - i;
+
+      for ( ; i < nr ; i++) 
+	 *b++ = pbox[i];
+
+      drmCommandNone(imesa->driFd, DRM_I810_SWAP);
+   }
+
+   tmp = GET_ENQUEUE_AGE(imesa);
+   UNLOCK_HARDWARE( imesa );
+
+   /* multiarb will suck the life out of the server without this throttle:
+    */
+   if (GET_DISPATCH_AGE(imesa) < imesa->lastSwap) {
+      i810WaitAge(imesa, imesa->lastSwap);
+   }
+
+   imesa->lastSwap = tmp;
+   imesa->upload_cliprects = GL_TRUE;
+}
+
+
+/*
+ * XXX implement when full-screen extension is done.
+ */
+void i810PageFlip( const __DRIdrawable *dPriv ) 
+{
+  i810ContextPtr imesa;
+  int tmp, ret;
+
+  assert(dPriv);
+  assert(dPriv->driContextPriv);
+  assert(dPriv->driContextPriv->driverPrivate);
+    
+  imesa = (i810ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+  I810_FIREVERTICES( imesa );
+  LOCK_HARDWARE( imesa );
+  
+  if (dPriv->pClipRects) {
+     memcpy(&(imesa->sarea->boxes[0]), &(dPriv->pClipRects[0]),
+            sizeof(drm_clip_rect_t));
+     imesa->sarea->nbox = 1;
+  }
+  ret = drmCommandNone(imesa->driFd, DRM_I810_FLIP);
+  if (ret) {
+    fprintf(stderr, "%s: %d\n", __FUNCTION__, ret);
+    UNLOCK_HARDWARE( imesa );
+    exit(1);
+  }
+
+  tmp = GET_ENQUEUE_AGE(imesa);
+  UNLOCK_HARDWARE( imesa );
+  
+   /* multiarb will suck the life out of the server without this throttle:
+    */
+  if (GET_DISPATCH_AGE(imesa) < imesa->lastSwap) {
+    i810WaitAge(imesa, imesa->lastSwap);
+   }
+
+  /*  i810SetDrawBuffer( imesa->glCtx, imesa->glCtx->Color.DriverDrawBuffer );*/
+  i810DrawBuffer( imesa->glCtx, imesa->glCtx->Color.DrawBuffer[0] );
+  imesa->upload_cliprects = GL_TRUE;
+  imesa->lastSwap = tmp;
+  return;
+}
+
+
+/* This waits for *everybody* to finish rendering -- overkill.
+ */
+void i810DmaFinish( i810ContextPtr imesa  ) 
+{
+   I810_FIREVERTICES( imesa );
+
+   LOCK_HARDWARE( imesa );
+   i810RegetLockQuiescent( imesa );
+   UNLOCK_HARDWARE( imesa );
+}
+
+
+void i810RegetLockQuiescent( i810ContextPtr imesa  ) 
+{
+   drmUnlock(imesa->driFd, imesa->hHWContext);
+   i810GetLock( imesa, DRM_LOCK_QUIESCENT ); 
+}
+
+void i810WaitAgeLocked( i810ContextPtr imesa, int age  ) 
+{
+   int i = 0, j;
+
+   while (++i < 5000) {
+      drmCommandNone(imesa->driFd, DRM_I810_GETAGE);
+      if (GET_DISPATCH_AGE(imesa) >= age)
+	 return;
+      for (j = 0 ; j < 1000 ; j++)
+	 ;
+   }
+
+   drmCommandNone(imesa->driFd, DRM_I810_FLUSH);
+}
+
+
+void i810WaitAge( i810ContextPtr imesa, int age  ) 
+{
+   int i = 0, j;
+
+   while (++i < 5000) {
+      drmCommandNone(imesa->driFd, DRM_I810_GETAGE);
+      if (GET_DISPATCH_AGE(imesa) >= age)
+	 return;
+      for (j = 0 ; j < 1000 ; j++)
+	 ;
+   }
+
+   i = 0;
+   while (++i < 1000) {
+      drmCommandNone(imesa->driFd, DRM_I810_GETAGE);
+      if (GET_DISPATCH_AGE(imesa) >= age)
+	 return;
+      usleep(1000);
+   }
+
+   LOCK_HARDWARE(imesa);
+   drmCommandNone(imesa->driFd, DRM_I810_FLUSH);
+   UNLOCK_HARDWARE(imesa);
+}
+
+
+
+
+static int intersect_rect( drm_clip_rect_t *out,
+                           drm_clip_rect_t *a,
+                           drm_clip_rect_t *b )
+{
+   *out = *a;
+   if (b->x1 > out->x1) out->x1 = b->x1;
+   if (b->x2 < out->x2) out->x2 = b->x2;
+   if (out->x1 >= out->x2) return 0;
+
+   if (b->y1 > out->y1) out->y1 = b->y1;
+   if (b->y2 < out->y2) out->y2 = b->y2;
+   if (out->y1 >= out->y2) return 0;
+   return 1;
+}
+
+
+static void emit_state( i810ContextPtr imesa )
+{
+   GLuint dirty = imesa->dirty;   
+   I810SAREAPtr sarea = imesa->sarea;
+
+   if (dirty & I810_UPLOAD_BUFFERS) {
+      memcpy( sarea->BufferState, imesa->BufferSetup, 
+	      sizeof(imesa->BufferSetup) );
+   }	 
+
+   if (dirty & I810_UPLOAD_CTX) {
+      memcpy( sarea->ContextState, imesa->Setup, 
+	      sizeof(imesa->Setup) );
+   }
+
+   if (dirty & I810_UPLOAD_TEX0) {
+      memcpy(sarea->TexState[0], 
+	     imesa->CurrentTexObj[0]->Setup,
+	     sizeof(imesa->CurrentTexObj[0]->Setup));
+   }
+
+   if (dirty & I810_UPLOAD_TEX1) {
+      GLuint *setup = sarea->TexState[1];
+
+      memcpy( setup,
+	      imesa->CurrentTexObj[1]->Setup,
+	      sizeof(imesa->CurrentTexObj[1]->Setup));
+
+      /* Need this for the case where both units are bound to the same
+       * texobj.  
+       */
+      setup[I810_TEXREG_MI1] ^= (MI1_MAP_0 ^ MI1_MAP_1);
+      setup[I810_TEXREG_MLC] ^= (MLC_MAP_0 ^ MLC_MAP_1);
+      setup[I810_TEXREG_MLL] ^= (MLL_MAP_0 ^ MLL_MAP_1);
+      setup[I810_TEXREG_MCS] ^= (MCS_COORD_0 ^ MCS_COORD_1);
+      setup[I810_TEXREG_MF]  ^= (MF_MAP_0 ^ MF_MAP_1);
+   }
+    
+   sarea->dirty = dirty;
+   imesa->dirty = 0;
+}
+
+
+static void age_imesa( i810ContextPtr imesa, int age )
+{
+   if (imesa->CurrentTexObj[0]) imesa->CurrentTexObj[0]->base.timestamp = age;
+   if (imesa->CurrentTexObj[1]) imesa->CurrentTexObj[1]->base.timestamp = age;
+}
+
+
+void i810FlushPrimsLocked( i810ContextPtr imesa )
+{
+   drm_clip_rect_t *pbox = imesa->pClipRects;
+   int nbox = imesa->numClipRects;
+   drmBufPtr buffer = imesa->vertex_buffer;
+   I810SAREAPtr sarea = imesa->sarea;
+   drmI810Vertex vertex;
+   int i;
+	  
+   if (I810_DEBUG & DEBUG_STATE)
+      i810PrintDirty( __FUNCTION__, imesa->dirty );
+   
+   if (imesa->dirty)
+      emit_state( imesa );
+
+   vertex.idx = buffer->idx;
+   vertex.used = imesa->vertex_low;
+   vertex.discard = 0;
+   sarea->vertex_prim = imesa->hw_primitive;
+
+   if (!nbox) {
+      vertex.used = 0;
+   }
+   else if (nbox > I810_NR_SAREA_CLIPRECTS) {      
+      imesa->upload_cliprects = GL_TRUE;
+   }
+
+   if (!nbox || !imesa->upload_cliprects) 
+   {
+      if (nbox == 1) 
+	 sarea->nbox = 0;
+      else
+	 sarea->nbox = nbox;
+
+      vertex.discard = 1;	
+      drmCommandWrite(imesa->driFd, DRM_I810_VERTEX,
+                      &vertex, sizeof(drmI810Vertex));
+      age_imesa(imesa, sarea->last_enqueue);
+   }  
+   else 
+   {
+      for (i = 0 ; i < nbox ; )
+      {
+	 int nr = MIN2(i + I810_NR_SAREA_CLIPRECTS, nbox);
+	 drm_clip_rect_t *b = (drm_clip_rect_t *)sarea->boxes;
+
+	 if (imesa->scissor) {
+	    sarea->nbox = 0;
+	 
+	    for ( ; i < nr ; i++) {
+	       b->x1 = pbox[i].x1 - imesa->drawX;
+	       b->y1 = pbox[i].y1 - imesa->drawY;
+	       b->x2 = pbox[i].x2 - imesa->drawX;
+	       b->y2 = pbox[i].y2 - imesa->drawY;
+
+	       if (intersect_rect(b, b, &imesa->scissor_rect)) {
+		  sarea->nbox++;
+		  b++;
+	       }
+	    }
+
+	    /* Culled?
+	     */
+	    if (!sarea->nbox) {
+	       if (nr < nbox) continue;
+	       vertex.used = 0;
+	    }
+	 } else {
+	    sarea->nbox = nr - i;
+	    for ( ; i < nr ; i++, b++) {
+	       b->x1 = pbox[i].x1 - imesa->drawX;
+	       b->y1 = pbox[i].y1 - imesa->drawY;
+	       b->x2 = pbox[i].x2 - imesa->drawX;
+	       b->y2 = pbox[i].y2 - imesa->drawY;
+	    }
+	 }
+	 
+	 /* Finished with the buffer?
+	  */
+	 if (nr == nbox) 
+	    vertex.discard = 1;
+
+	 drmCommandWrite(imesa->driFd, DRM_I810_VERTEX,
+                         &vertex, sizeof(drmI810Vertex));
+	 age_imesa(imesa, imesa->sarea->last_enqueue);
+      }
+   }
+
+   /* Reset imesa vars:
+    */
+   imesa->vertex_buffer = 0;
+   imesa->vertex_addr = 0;
+   imesa->vertex_low = 0;
+   imesa->vertex_high = 0;
+   imesa->vertex_last_prim = 0;
+   imesa->dirty = 0;
+   imesa->upload_cliprects = GL_FALSE;
+}
+
+void i810FlushPrimsGetBuffer( i810ContextPtr imesa )
+{
+   LOCK_HARDWARE(imesa);
+
+   if (imesa->vertex_buffer) 
+      i810FlushPrimsLocked( imesa );      
+
+   imesa->vertex_buffer = i810_get_buffer_ioctl( imesa );
+   imesa->vertex_high = imesa->vertex_buffer->total;
+   imesa->vertex_addr = (char *)imesa->vertex_buffer->address;
+   imesa->vertex_low = 4;	/* leave room for instruction header */
+   imesa->vertex_last_prim = imesa->vertex_low;
+   UNLOCK_HARDWARE(imesa);
+}
+
+
+void i810FlushPrims( i810ContextPtr imesa ) 
+{
+   if (imesa->vertex_buffer) {
+      LOCK_HARDWARE( imesa );
+      i810FlushPrimsLocked( imesa );
+      UNLOCK_HARDWARE( imesa );
+   }
+}
+
+
+
+int i810_check_copy(int fd)
+{
+   return(drmCommandNone(fd, DRM_I810_DOCOPY));
+}
+
+static void i810Flush( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   I810_FIREVERTICES( imesa );
+}
+
+static void i810Finish( GLcontext *ctx  ) 
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   i810DmaFinish( imesa );
+}
+
+void i810InitIoctlFuncs( struct dd_function_table *functions )
+{
+   functions->Flush = i810Flush;
+   functions->Clear = i810Clear;
+   functions->Finish = i810Finish;
+}
diff --git a/src/mesa/drivers/dri/i810/i810ioctl.h b/src/mesa/drivers/dri/i810/i810ioctl.h
new file mode 100644
index 0000000000..926e38ce51
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810ioctl.h
@@ -0,0 +1,48 @@
+
+#ifndef I810_IOCTL_H
+#define I810_IOCTL_H
+
+#include "i810context.h"
+
+void i810EmitPrim( i810ContextPtr imesa );
+void i810FlushPrims( i810ContextPtr mmesa ); 
+void i810FlushPrimsLocked( i810ContextPtr mmesa );
+void i810FlushPrimsGetBuffer( i810ContextPtr imesa );
+
+void i810WaitAgeLocked( i810ContextPtr imesa, int age );
+void i810WaitAge( i810ContextPtr imesa, int age );
+void i810DmaFinish( i810ContextPtr imesa );
+void i810RegetLockQuiescent( i810ContextPtr imesa );
+void i810InitIoctlFuncs( struct dd_function_table *functions );
+void i810CopyBuffer( const __DRIdrawable *dpriv );
+void i810PageFlip( const __DRIdrawable *dpriv );
+int i810_check_copy(int fd);
+
+#define I810_STATECHANGE(imesa, flag)				\
+do {								\
+   if (imesa->vertex_low != imesa->vertex_last_prim)		\
+      i810FlushPrims(imesa);					\
+   imesa->dirty |= flag;					\
+} while (0)							\
+
+
+#define I810_FIREVERTICES(imesa)				\
+do {								\
+   if (imesa->vertex_buffer) {					\
+      i810FlushPrims(imesa);					\
+   }								\
+} while (0)
+
+static INLINE GLuint *i810AllocDmaLow( i810ContextPtr imesa, int bytes )
+{
+   if (imesa->vertex_low + bytes > imesa->vertex_high) 
+      i810FlushPrimsGetBuffer( imesa );
+
+   {
+      GLuint *start = (GLuint *)(imesa->vertex_addr + imesa->vertex_low);
+      imesa->vertex_low += bytes;
+      return start;
+   }
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c
new file mode 100644
index 0000000000..b543d4f012
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810render.c
@@ -0,0 +1,172 @@
+/*
+ * Intel i810 DRI driver for Mesa 3.5
+ *
+ * Copyright (C) 1999-2000  Keith Whitwell   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL KEITH WHITWELL BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Author:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware acceleration where possible.
+ *
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810context.h"
+#include "i810tris.h"
+#include "i810vb.h"
+#include "i810ioctl.h"
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      0
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0	/* has it, template can't use it yet */
+#define HAVE_TRI_FANS    1
+#define HAVE_POLYGONS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+
+#define HAVE_ELTS        0
+
+
+static GLuint hw_prim[GL_POLYGON+1] = {
+   0,
+   PR_LINES,
+   0,
+   PR_LINESTRIP,
+   PR_TRIANGLES,
+   PR_TRISTRIP_0,
+   PR_TRIFAN,
+   0,
+   0,
+   PR_POLYGON
+};
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+
+
+#define LOCAL_VARS i810ContextPtr imesa = I810_CONTEXT(ctx)
+#define INIT( prim ) do {						\
+   I810_STATECHANGE(imesa, 0);						\
+   i810RasterPrimitive( ctx, reduced_prim[prim], hw_prim[prim] );	\
+} while (0)
+#define GET_CURRENT_VB_MAX_VERTS() \
+  (((int)imesa->vertex_high - (int)imesa->vertex_low) / (imesa->vertex_size*4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  (I810_DMA_BUF_SZ-4) / (imesa->vertex_size * 4)
+
+#define ALLOC_VERTS( nr ) \
+  i810AllocDmaLow( imesa, (nr) * imesa->vertex_size * 4)
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+  i810_emit_contiguous_verts(ctx, j, (j)+(nr), buf)
+
+#define FLUSH()  I810_FIREVERTICES( imesa )
+
+
+#define TAG(x) i810_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+static GLboolean i810_run_render( GLcontext *ctx,
+				  struct tnl_pipeline_stage *stage )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+
+   /* Don't handle clipping or indexed vertices.
+    */
+   if (imesa->RenderIndex != 0 || 
+       !i810_validate_render( ctx, VB )) {
+      return GL_TRUE;
+   }
+
+   imesa->SetupNewInputs = VERT_BIT_POS;
+
+   tnl->Driver.Render.Start( ctx );
+
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      i810_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length,
+						    prim );
+   }
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+const struct tnl_pipeline_stage _i810_render_stage =
+{
+   "i810 render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   i810_run_render		/* run */
+};
diff --git a/src/mesa/drivers/dri/i810/i810screen.c b/src/mesa/drivers/dri/i810/i810screen.c
new file mode 100644
index 0000000000..56708c97cb
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810screen.c
@@ -0,0 +1,361 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/simple_list.h"
+#include "utils.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810state.h"
+#include "i810tex.h"
+#include "i810span.h"
+
+#include "GL/internal/dri_interface.h"
+
+static const __DRIconfig **
+i810FillInModes( __DRIscreen *psp,
+		 unsigned pixel_bits, unsigned depth_bits,
+		 unsigned stencil_bits, GLboolean have_back_buffer )
+{
+    __DRIconfig **configs;
+    __GLcontextModes * m;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    unsigned i;
+
+    /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+     * enough to add support.  Basically, if a context is created with an
+     * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+     * will never be used.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */
+    };
+
+    uint8_t depth_bits_array[2];
+    uint8_t stencil_bits_array[2];
+    uint8_t msaa_samples_array[1];
+
+    depth_bits_array[0] = depth_bits;
+    depth_bits_array[1] = depth_bits;
+
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.  It will be a sw fallback, but some apps won't
+     * care about that.
+     */
+    stencil_bits_array[0] = 0;
+    stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    msaa_samples_array[0] = 0;
+
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+
+    configs = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
+			       depth_bits_array, stencil_bits_array,
+			       depth_buffer_factor,
+			       back_buffer_modes, back_buffer_factor,
+                               msaa_samples_array, 1, GL_TRUE);
+    if (configs == NULL) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+    }
+
+    /* Mark the visual as slow if there are "fake" stencil bits.
+     */
+    for (i = 0; configs[i]; i++) {
+	m = &configs[i]->modes;
+	if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+	    m->visualRating = GLX_SLOW_CONFIG;
+	}
+    }
+
+    return (const __DRIconfig **) configs;
+}
+
+     
+/*  static int i810_malloc_proxy_buf(drmBufMapPtr buffers) */
+/*  { */
+/*     char *buffer; */
+/*     drmBufPtr buf; */
+/*     int i; */
+
+/*     buffer = CALLOC(I810_DMA_BUF_SZ); */
+/*     if(buffer == NULL) return -1; */
+/*     for(i = 0; i < I810_DMA_BUF_NR; i++) { */
+/*        buf = &(buffers->list[i]); */
+/*        buf->address = (drmAddress)buffer; */
+/*     } */
+/*     return 0; */
+/*  } */
+
+static drmBufMapPtr i810_create_empty_buffers(void)
+{
+   drmBufMapPtr retval;
+
+   retval = (drmBufMapPtr)_mesa_align_malloc(sizeof(drmBufMap), 32);
+   if(retval == NULL) return NULL;
+   memset(retval, 0, sizeof(drmBufMap));
+   retval->list = (drmBufPtr)_mesa_align_malloc(sizeof(drmBuf) * I810_DMA_BUF_NR, 32);
+   if(retval->list == NULL) {
+      _mesa_align_free(retval);
+      return NULL;
+   }
+   memset(retval->list, 0, sizeof(drmBuf) * I810_DMA_BUF_NR);
+   return retval;
+}
+
+
+static const __DRIconfig **
+i810InitScreen(__DRIscreen *sPriv)
+{
+   static const __DRIversion ddx_expected = { 1, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 2, 0 };
+   i810ScreenPrivate *i810Screen;
+   I810DRIPtr         gDRIPriv = (I810DRIPtr)sPriv->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions2( "i810",
+				      &sPriv->dri_version, & dri_expected,
+				      &sPriv->ddx_version, & ddx_expected,
+				      &sPriv->drm_version, & drm_expected ) ) {
+      return NULL;
+   }
+
+   if (sPriv->devPrivSize != sizeof(I810DRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(I810DRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   i810Screen = (i810ScreenPrivate *)CALLOC(sizeof(i810ScreenPrivate));
+   if (!i810Screen) {
+      __driUtilMessage("i810InitDriver: alloc i810ScreenPrivate struct failed");
+      return GL_FALSE;
+   }
+
+   i810Screen->driScrnPriv = sPriv;
+   sPriv->private = (void *)i810Screen;
+
+   i810Screen->deviceID=gDRIPriv->deviceID;
+   i810Screen->width=gDRIPriv->width;
+   i810Screen->height=gDRIPriv->height;
+   i810Screen->mem=gDRIPriv->mem;
+   i810Screen->cpp=gDRIPriv->cpp;
+   i810Screen->fbStride=gDRIPriv->fbStride;
+   i810Screen->fbOffset=gDRIPriv->fbOffset;
+
+   if (gDRIPriv->bitsPerPixel == 15)
+      i810Screen->fbFormat = DV_PF_555;
+   else
+      i810Screen->fbFormat = DV_PF_565;
+
+   i810Screen->backOffset=gDRIPriv->backOffset;
+   i810Screen->depthOffset=gDRIPriv->depthOffset;
+   i810Screen->backPitch = gDRIPriv->auxPitch;
+   i810Screen->backPitchBits = gDRIPriv->auxPitchBits;
+   i810Screen->textureOffset=gDRIPriv->textureOffset;
+   i810Screen->textureSize=gDRIPriv->textureSize;
+   i810Screen->logTextureGranularity = gDRIPriv->logTextureGranularity;
+
+   i810Screen->bufs = i810_create_empty_buffers();
+   if (i810Screen->bufs == NULL) {
+      __driUtilMessage("i810InitDriver: i810_create_empty_buffers() failed");
+      FREE(i810Screen);
+      return GL_FALSE;
+   }
+
+   i810Screen->back.handle = gDRIPriv->backbuffer;
+   i810Screen->back.size = gDRIPriv->backbufferSize;
+
+   if (drmMap(sPriv->fd,
+	      i810Screen->back.handle,
+	      i810Screen->back.size,
+	      (drmAddress *)&i810Screen->back.map) != 0) {
+      FREE(i810Screen);
+      sPriv->private = NULL;
+      __driUtilMessage("i810InitDriver: drmMap failed");
+      return GL_FALSE;
+   }
+
+   i810Screen->depth.handle = gDRIPriv->depthbuffer;
+   i810Screen->depth.size = gDRIPriv->depthbufferSize;
+
+   if (drmMap(sPriv->fd,
+	      i810Screen->depth.handle,
+	      i810Screen->depth.size,
+	      (drmAddress *)&i810Screen->depth.map) != 0) {
+      drmUnmap(i810Screen->back.map, i810Screen->back.size);
+      FREE(i810Screen);
+      sPriv->private = NULL;
+      __driUtilMessage("i810InitDriver: drmMap (2) failed");
+      return GL_FALSE;
+   }
+
+   i810Screen->tex.handle = gDRIPriv->textures;
+   i810Screen->tex.size = gDRIPriv->textureSize;
+
+   if (drmMap(sPriv->fd,
+	      i810Screen->tex.handle,
+	      i810Screen->tex.size,
+	      (drmAddress *)&i810Screen->tex.map) != 0) {
+      drmUnmap(i810Screen->back.map, i810Screen->back.size);
+      drmUnmap(i810Screen->depth.map, i810Screen->depth.size);
+      FREE(i810Screen);
+      sPriv->private = NULL;
+      __driUtilMessage("i810InitDriver: drmMap (3) failed");
+      return GL_FALSE;
+   }
+
+   i810Screen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
+
+   return i810FillInModes(sPriv, 16, 16, 0, 1);
+}
+
+static void
+i810DestroyScreen(__DRIscreen *sPriv)
+{
+   i810ScreenPrivate *i810Screen = (i810ScreenPrivate *)sPriv->private;
+
+   /* Need to unmap all the bufs and maps here:
+    */
+   drmUnmap(i810Screen->back.map, i810Screen->back.size);
+   drmUnmap(i810Screen->depth.map, i810Screen->depth.size);
+   drmUnmap(i810Screen->tex.map, i810Screen->tex.size);
+
+   FREE(i810Screen);
+   sPriv->private = NULL;
+}
+
+
+/**
+ * Create a buffer which corresponds to the window.
+ */
+static GLboolean
+i810CreateBuffer( __DRIscreen *driScrnPriv,
+                  __DRIdrawable *driDrawPriv,
+                  const __GLcontextModes *mesaVis,
+                  GLboolean isPixmap )
+{
+   i810ScreenPrivate *screen = (i810ScreenPrivate *) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 driScrnPriv->pFB,
+                                 screen->cpp,
+                                 /*screen->frontOffset*/0, screen->backPitch,
+                                 driDrawPriv);
+         i810SetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 screen->back.map,
+                                 screen->cpp,
+                                 screen->backOffset, screen->backPitch,
+                                 driDrawPriv);
+         i810SetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z16,
+                                 screen->depth.map,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->backPitch,
+                                 driDrawPriv);
+         i810SetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   mesaVis->stencilBits > 0,
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+i810DestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = i810InitScreen,
+   .DestroyScreen   = i810DestroyScreen,
+   .CreateContext   = i810CreateContext,
+   .DestroyContext  = i810DestroyContext,
+   .CreateBuffer    = i810CreateBuffer,
+   .DestroyBuffer   = i810DestroyBuffer,
+   .SwapBuffers     = i810SwapBuffers,
+   .MakeCurrent     = i810MakeCurrent,
+   .UnbindContext   = i810UnbindContext,
+   .GetSwapInfo     = NULL,
+   .GetDrawableMSC  = NULL,
+   .WaitForMSC      = NULL,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/i810/i810screen.h b/src/mesa/drivers/dri/i810/i810screen.h
new file mode 100644
index 0000000000..fe6db7e6e1
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810screen.h
@@ -0,0 +1,100 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef _I810_INIT_H_
+#define _I810_INIT_H_
+
+#include <sys/time.h>
+#include "dri_util.h"
+
+typedef struct {
+   drm_handle_t handle;
+   drmSize size;
+   char *map;
+} i810Region, *i810RegionPtr;
+
+typedef struct {
+   i810Region front;
+   i810Region back;
+   i810Region depth;
+   i810Region tex;
+
+   int deviceID;
+   int width;
+   int height;
+   int mem;
+
+   int cpp;			/* for front and back buffers */
+   int bitsPerPixel;
+
+   int fbFormat;
+   int fbOffset;
+   int fbStride;
+
+   int backOffset;
+   int depthOffset;
+
+   int backPitch;
+   int backPitchBits;
+
+   int textureOffset;
+   int textureSize;
+   int logTextureGranularity;
+
+   __DRIscreen *driScrnPriv;
+   drmBufMapPtr  bufs;
+   unsigned int sarea_priv_offset;
+} i810ScreenPrivate;
+
+
+extern GLboolean
+i810CreateContext( gl_api api,
+		   const __GLcontextModes *mesaVis,
+                   __DRIcontext *driContextPriv,
+                   void *sharedContextPrivate );
+
+extern void
+i810DestroyContext(__DRIcontext *driContextPriv);
+
+extern GLboolean
+i810UnbindContext(__DRIcontext *driContextPriv);
+
+extern GLboolean
+i810MakeCurrent(__DRIcontext *driContextPriv,
+                __DRIdrawable *driDrawPriv,
+                __DRIdrawable *driReadPriv);
+
+extern void
+i810SwapBuffers(__DRIdrawable *driDrawPriv);
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810span.c b/src/mesa/drivers/dri/i810/i810span.c
new file mode 100644
index 0000000000..6576f6745e
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810span.c
@@ -0,0 +1,138 @@
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "swrast/swrast.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810span.h"
+#include "i810ioctl.h"
+
+
+#define DBG 0
+
+#define LOCAL_VARS					\
+   i810ContextPtr imesa = I810_CONTEXT(ctx);	        \
+   __DRIdrawable *dPriv = imesa->driDrawable;	\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
+   GLuint pitch = drb->pitch;				\
+   GLuint height = dPriv->h;				\
+   GLushort p;						\
+   char *buf = (char *)(drb->flippedData +		\
+			dPriv->x * 2 +			\
+			dPriv->y * pitch);		\
+   (void) buf; (void) p
+
+#define LOCAL_DEPTH_VARS				\
+   i810ContextPtr imesa = I810_CONTEXT(ctx);	        \
+   __DRIdrawable *dPriv = imesa->driDrawable;	\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
+   GLuint pitch = drb->pitch;				\
+   GLuint height = dPriv->h;				\
+   char *buf = (char *)(drb->Base.Data +		\
+			dPriv->x * 2 +			\
+			dPriv->y * pitch)
+
+#define INIT_MONO_PIXEL(p, color) \
+   p = PACK_COLOR_565( color[0], color[1], color[2] )
+
+#define Y_FLIP(_y) (height - _y - 1)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* 16 bit, 565 rgb color spanline and pixel functions
+ */
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLushort *)(buf + _x*2 + _y*pitch)  = ( (((int)r & 0xf8) << 8) |	\
+		                             (((int)g & 0xfc) << 3) |	\
+		                             (((int)b & 0xf8) >> 3))
+#define WRITE_PIXEL( _x, _y, p )  \
+   *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+do {									\
+   GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);			\
+   rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;				\
+   rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;				\
+   rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
+   rgba[3] = 255;							\
+} while(0)
+
+#define TAG(x) i810##x##_565
+#include "spantmp.h"
+
+/* 16 bit depthbuffer functions.
+ */
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH( _x, _y, d ) \
+   *(GLushort *)(buf + (_x)*2 + (_y)*pitch)  = d;
+
+#define READ_DEPTH( d, _x, _y )	\
+   d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch);
+
+#define TAG(x) i810##x##_z16
+#include "depthtmp.h"
+
+
+/* Move locking out to get reasonable span performance.
+ */
+void i810SpanRenderStart( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   I810_FIREVERTICES(imesa);
+   LOCK_HARDWARE(imesa);
+   i810RegetLockQuiescent( imesa );
+}
+
+void i810SpanRenderFinish( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( imesa );
+}
+
+void i810InitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart = i810SpanRenderStart;
+   swdd->SpanRenderFinish = i810SpanRenderFinish; 
+}
+
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+i810SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.InternalFormat == GL_RGBA) {
+      /* always 565 RGB */
+      i810InitPointers_565(&drb->Base);
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
+      i810InitDepthPointers_z16(&drb->Base);
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+      /* should never get here */
+      drb->Base.GetRow        = NULL;
+      drb->Base.GetValues     = NULL;
+      drb->Base.PutRow        = NULL;
+      drb->Base.PutMonoRow    = NULL;
+      drb->Base.PutValues     = NULL;
+      drb->Base.PutMonoValues = NULL;
+   }
+   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+      drb->Base.GetRow        = NULL;
+      drb->Base.GetValues     = NULL;
+      drb->Base.PutRow        = NULL;
+      drb->Base.PutMonoRow    = NULL;
+      drb->Base.PutValues     = NULL;
+      drb->Base.PutMonoValues = NULL;
+   }
+}
diff --git a/src/mesa/drivers/dri/i810/i810span.h b/src/mesa/drivers/dri/i810/i810span.h
new file mode 100644
index 0000000000..9aed253bd5
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810span.h
@@ -0,0 +1,14 @@
+#ifndef _I810_SPAN_H
+#define _I810_SPAN_H
+
+#include "drirenderbuffer.h"
+
+extern void i810InitSpanFuncs( GLcontext *ctx );
+
+extern void i810SpanRenderFinish( GLcontext *ctx );
+extern void i810SpanRenderStart( GLcontext *ctx );
+
+extern void
+i810SetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810state.c b/src/mesa/drivers/dri/i810/i810state.c
new file mode 100644
index 0000000000..0c68e120b0
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810state.c
@@ -0,0 +1,1002 @@
+
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/dd.h"
+#include "main/colormac.h"
+#include "swrast/swrast.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "vbo/vbo.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "texmem.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810context.h"
+#include "i810state.h"
+#include "i810tex.h"
+#include "i810ioctl.h"
+
+
+static INLINE GLuint i810PackColor(GLuint format,
+				       GLubyte r, GLubyte g,
+				       GLubyte b, GLubyte a)
+{
+
+   if (I810_DEBUG&DEBUG_DRI)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   switch (format) {
+   case DV_PF_555:
+      return PACK_COLOR_1555( a, r, g, b );
+   case DV_PF_565:
+      return PACK_COLOR_565( r, g, b );
+   default:
+      fprintf(stderr, "unknown format %d\n", (int)format);
+      return 0;
+   }
+}
+
+
+static void i810AlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint a = (ZA_UPDATE_ALPHAFUNC|ZA_UPDATE_ALPHAREF);
+   GLubyte refByte;
+
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   switch (ctx->Color.AlphaFunc) {
+   case GL_NEVER:    a |= ZA_ALPHA_NEVER;    break;
+   case GL_LESS:     a |= ZA_ALPHA_LESS;     break;
+   case GL_GEQUAL:   a |= ZA_ALPHA_GEQUAL;   break;
+   case GL_LEQUAL:   a |= ZA_ALPHA_LEQUAL;   break;
+   case GL_GREATER:  a |= ZA_ALPHA_GREATER;  break;
+   case GL_NOTEQUAL: a |= ZA_ALPHA_NOTEQUAL; break;
+   case GL_EQUAL:    a |= ZA_ALPHA_EQUAL;    break;
+   case GL_ALWAYS:   a |= ZA_ALPHA_ALWAYS;   break;
+   default: return;
+   }
+
+   a |= ((refByte & 0xfc) << ZA_ALPHAREF_SHIFT);
+
+   I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+   imesa->Setup[I810_CTXREG_ZA] &= ~(ZA_ALPHA_MASK|ZA_ALPHAREF_MASK);
+   imesa->Setup[I810_CTXREG_ZA] |= a;
+}
+
+static void i810BlendEquationSeparate(GLcontext *ctx,
+				      GLenum modeRGB, GLenum modeA)
+{
+   assert( modeRGB == modeA );
+
+   /* Can only do GL_ADD equation in hardware */
+   FALLBACK( I810_CONTEXT(ctx), I810_FALLBACK_BLEND_EQ, 
+	     modeRGB != GL_FUNC_ADD);
+
+   /* BlendEquation sets ColorLogicOpEnabled in an unexpected
+    * manner.
+    */
+   FALLBACK( I810_CONTEXT(ctx), I810_FALLBACK_LOGICOP,
+	     (ctx->Color.ColorLogicOpEnabled &&
+	      ctx->Color.LogicOp != GL_COPY));
+}
+
+static void i810BlendFuncSeparate( GLcontext *ctx, GLenum sfactorRGB,
+				     GLenum dfactorRGB, GLenum sfactorA,
+				     GLenum dfactorA )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint a = SDM_UPDATE_SRC_BLEND | SDM_UPDATE_DST_BLEND;
+   GLboolean fallback = GL_FALSE;
+
+   switch (ctx->Color.BlendSrcRGB) {
+   case GL_ZERO:                a |= SDM_SRC_ZERO; break;
+   case GL_ONE:                 a |= SDM_SRC_ONE; break;
+   case GL_SRC_COLOR:           a |= SDM_SRC_SRC_COLOR; break;
+   case GL_ONE_MINUS_SRC_COLOR: a |= SDM_SRC_INV_SRC_COLOR; break;
+   case GL_SRC_ALPHA:           a |= SDM_SRC_SRC_ALPHA; break;
+   case GL_ONE_MINUS_SRC_ALPHA: a |= SDM_SRC_INV_SRC_ALPHA; break;
+   case GL_DST_ALPHA:           a |= SDM_SRC_ONE; break;
+   case GL_ONE_MINUS_DST_ALPHA: a |= SDM_SRC_ZERO; break;
+   case GL_DST_COLOR:           a |= SDM_SRC_DST_COLOR; break;
+   case GL_ONE_MINUS_DST_COLOR: a |= SDM_SRC_INV_DST_COLOR; break;
+
+   /* (f, f, f, 1), f = min(As, 1 - Ad) = min(As, 1 - 1) = 0
+    * So (f, f, f, 1) = (0, 0, 0, 1).  Since there is no destination alpha and
+    * the only supported alpha operation is GL_FUNC_ADD, the result modulating
+    * the source alpha with the alpha factor is largely irrelevant.
+    */
+   case GL_SRC_ALPHA_SATURATE:  a |= SDM_SRC_ZERO; break;
+
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      fallback = GL_TRUE;
+      break;
+   default:
+      return;
+   }
+
+   switch (ctx->Color.BlendDstRGB) {
+   case GL_ZERO:                a |= SDM_DST_ZERO; break;
+   case GL_ONE:                 a |= SDM_DST_ONE; break;
+   case GL_SRC_COLOR:           a |= SDM_DST_SRC_COLOR; break;
+   case GL_ONE_MINUS_SRC_COLOR: a |= SDM_DST_INV_SRC_COLOR; break;
+   case GL_SRC_ALPHA:           a |= SDM_DST_SRC_ALPHA; break;
+   case GL_ONE_MINUS_SRC_ALPHA: a |= SDM_DST_INV_SRC_ALPHA; break;
+   case GL_DST_ALPHA:           a |= SDM_DST_ONE; break;
+   case GL_ONE_MINUS_DST_ALPHA: a |= SDM_DST_ZERO; break;
+   case GL_DST_COLOR:           a |= SDM_DST_DST_COLOR; break;
+   case GL_ONE_MINUS_DST_COLOR: a |= SDM_DST_INV_DST_COLOR; break;
+
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      fallback = GL_TRUE;
+      break;
+   default:
+      return;
+   }
+
+   FALLBACK( imesa, I810_FALLBACK_BLEND_FUNC, fallback);
+   if (!fallback) {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_SDM] &= ~(SDM_SRC_MASK|SDM_DST_MASK);
+      imesa->Setup[I810_CTXREG_SDM] |= a;
+   }
+}
+
+
+
+static void i810DepthFunc(GLcontext *ctx, GLenum func)
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   int zmode;
+
+   switch(func)  {
+   case GL_NEVER: zmode = LCS_Z_NEVER; break;
+   case GL_ALWAYS: zmode = LCS_Z_ALWAYS; break;
+   case GL_LESS: zmode = LCS_Z_LESS; break;
+   case GL_LEQUAL: zmode = LCS_Z_LEQUAL; break;
+   case GL_EQUAL: zmode = LCS_Z_EQUAL; break;
+   case GL_GREATER: zmode = LCS_Z_GREATER; break;
+   case GL_GEQUAL: zmode = LCS_Z_GEQUAL; break;
+   case GL_NOTEQUAL: zmode = LCS_Z_NOTEQUAL; break;
+   default: return;
+   }
+
+   I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+   imesa->Setup[I810_CTXREG_LCS] &= ~LCS_Z_MASK;
+   imesa->Setup[I810_CTXREG_LCS] |= zmode;
+}
+
+static void i810DepthMask(GLcontext *ctx, GLboolean flag)
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+
+   if (flag)
+      imesa->Setup[I810_CTXREG_B2] |= B2_ZB_WRITE_ENABLE;
+   else
+      imesa->Setup[I810_CTXREG_B2] &= ~B2_ZB_WRITE_ENABLE;
+}
+
+
+/* =============================================================
+ * Polygon stipple
+ *
+ * The i810 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ */
+static void i810PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   const GLubyte *m = mask;
+   GLubyte p[4];
+   int i,j,k;
+   int active = (ctx->Polygon.StippleFlag &&
+		 imesa->reduced_primitive == GL_TRIANGLES);
+   GLuint newMask;
+
+   if (active) {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_ST1] &= ~ST1_ENABLE;
+   }
+
+   p[0] = mask[12] & 0xf; p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf; p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf; p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf; p[3] |= p[3] << 4;
+
+   for (k = 0 ; k < 8 ; k++)
+      for (j = 0 ; j < 4; j++)
+	 for (i = 0 ; i < 4 ; i++)
+	    if (*m++ != p[j]) {
+	       imesa->stipple_in_hw = 0;
+	       return;
+	    }
+
+   newMask = ((p[0] & 0xf) << 0) |
+             ((p[1] & 0xf) << 4) |
+             ((p[2] & 0xf) << 8) |
+             ((p[3] & 0xf) << 12);
+
+   if (newMask == 0xffff) {
+      /* this is needed to make conform pass */
+      imesa->stipple_in_hw = 0;
+      return;
+   }
+
+   imesa->Setup[I810_CTXREG_ST1] &= ~0xffff;
+   imesa->Setup[I810_CTXREG_ST1] |= newMask;
+   imesa->stipple_in_hw = 1;
+
+   if (active)
+      imesa->Setup[I810_CTXREG_ST1] |= ST1_ENABLE;
+}
+
+
+
+/* =============================================================
+ * Hardware clipping
+ */
+
+
+static void i810Scissor( GLcontext *ctx, GLint x, GLint y,
+			 GLsizei w, GLsizei h )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+
+   if (ctx->Scissor.Enabled) {
+      I810_FIREVERTICES(imesa);	/* don't pipeline cliprect changes */
+      imesa->upload_cliprects = GL_TRUE;
+   }
+
+   imesa->scissor_rect.x1 = x;
+   imesa->scissor_rect.y1 = imesa->driDrawable->h - (y + h);
+   imesa->scissor_rect.x2 = x + w;
+   imesa->scissor_rect.y2 = imesa->driDrawable->h - y;
+}
+
+
+static void i810LogicOp( GLcontext *ctx, GLenum opcode )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   FALLBACK( imesa, I810_FALLBACK_LOGICOP,
+	     (ctx->Color.ColorLogicOpEnabled && opcode != GL_COPY) );
+}
+
+/* Fallback to swrast for select and feedback.
+ */
+static void i810RenderMode( GLcontext *ctx, GLenum mode )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   FALLBACK( imesa, I810_FALLBACK_RENDERMODE, (mode != GL_RENDER) );
+}
+
+
+void i810DrawBuffer(GLcontext *ctx, GLenum mode )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   int front = 0;
+  
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( imesa, I810_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0]) {
+   case BUFFER_FRONT_LEFT:
+     front = 1;
+     break;
+   case BUFFER_BACK_LEFT:
+     front = 0;
+     break;
+   default:
+      FALLBACK( imesa, I810_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   if ( imesa->sarea->pf_current_page == 1 ) 
+     front ^= 1;
+ 
+   FALLBACK( imesa, I810_FALLBACK_DRAW_BUFFER, GL_FALSE );
+   I810_FIREVERTICES(imesa);
+   I810_STATECHANGE(imesa, I810_UPLOAD_BUFFERS);
+
+   if (front)
+   {
+     imesa->BufferSetup[I810_DESTREG_DI1] = (imesa->i810Screen->fbOffset |
+					     imesa->i810Screen->backPitchBits);
+     i810XMesaSetFrontClipRects( imesa );
+   }
+   else
+   {
+     imesa->BufferSetup[I810_DESTREG_DI1] = (imesa->i810Screen->backOffset |
+					     imesa->i810Screen->backPitchBits);
+     i810XMesaSetBackClipRects( imesa );
+   }
+}
+
+
+static void i810ReadBuffer(GLcontext *ctx, GLenum mode )
+{
+   /* XXX anything? */
+}
+
+
+static void i810ClearColor(GLcontext *ctx, const GLfloat color[4] )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   imesa->ClearColor = i810PackColor( imesa->i810Screen->fbFormat,
+				      c[0], c[1], c[2], c[3] );
+}
+
+
+/* =============================================================
+ * Culling - the i810 isn't quite as clean here as the rest of
+ *           its interfaces, but it's not bad.
+ */
+static void i810CullFaceFrontFace(GLcontext *ctx, GLenum unused)
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint mode = LCS_CULL_BOTH;
+
+   if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
+      mode = LCS_CULL_CW;
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+	 mode ^= (LCS_CULL_CW ^ LCS_CULL_CCW);
+      if (ctx->Polygon.FrontFace != GL_CCW)
+	 mode ^= (LCS_CULL_CW ^ LCS_CULL_CCW);
+   }
+
+   imesa->LcsCullMode = mode;
+
+   if (ctx->Polygon.CullFlag)
+   {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_LCS] &= ~LCS_CULL_MASK;
+      imesa->Setup[I810_CTXREG_LCS] |= mode;
+   }
+}
+
+
+static void i810LineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   /* AA, non-AA limits are same */
+   const int width = (int) CLAMP(ctx->Line.Width,
+                                 ctx->Const.MinLineWidth,
+                                 ctx->Const.MaxLineWidth);
+
+   imesa->LcsLineWidth = 0;
+   if (width & 1) imesa->LcsLineWidth |= LCS_LINEWIDTH_1_0;
+   if (width & 2) imesa->LcsLineWidth |= LCS_LINEWIDTH_2_0;
+
+   if (imesa->reduced_primitive == GL_LINES) {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_LCS] &= ~LCS_LINEWIDTH_3_0;
+      imesa->Setup[I810_CTXREG_LCS] |= imesa->LcsLineWidth;
+   }
+}
+
+static void i810PointSize( GLcontext *ctx, GLfloat sz )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   /* AA, non-AA limits are same */
+   const int size = (int) CLAMP(ctx->Point.Size,
+                                ctx->Const.MinPointSize,
+                                ctx->Const.MaxPointSize);
+
+   imesa->LcsPointSize = 0;
+   if (size & 1) imesa->LcsPointSize |= LCS_LINEWIDTH_1_0;
+   if (size & 2) imesa->LcsPointSize |= LCS_LINEWIDTH_2_0;
+
+   if (imesa->reduced_primitive == GL_POINTS) {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_LCS] &= ~LCS_LINEWIDTH_3_0;
+      imesa->Setup[I810_CTXREG_LCS] |= imesa->LcsPointSize;
+   }
+}
+
+/* =============================================================
+ * Color masks
+ */
+
+static void i810ColorMask(GLcontext *ctx,
+			  GLboolean r, GLboolean g,
+			  GLboolean b, GLboolean a )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   GLuint tmp = 0;
+
+   if (r && g && b) {
+      tmp = imesa->Setup[I810_CTXREG_B2] | B2_FB_WRITE_ENABLE;
+      FALLBACK( imesa, I810_FALLBACK_COLORMASK, GL_FALSE );
+   } else if (!r && !g && !b) {
+      tmp = imesa->Setup[I810_CTXREG_B2] & ~B2_FB_WRITE_ENABLE;
+      FALLBACK( imesa, I810_FALLBACK_COLORMASK, GL_FALSE );
+   } else {
+      FALLBACK( imesa, I810_FALLBACK_COLORMASK, GL_TRUE );
+      return;
+   }
+
+   if (tmp != imesa->Setup[I810_CTXREG_B2]) {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_B2] = tmp;
+      imesa->dirty |= I810_UPLOAD_CTX;
+   }
+}
+
+/* Seperate specular not fully implemented on the i810.
+ */
+static void i810LightModelfv(GLcontext *ctx, GLenum pname,
+			       const GLfloat *param)
+{
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL)
+   {
+      i810ContextPtr imesa = I810_CONTEXT( ctx );
+      FALLBACK( imesa, I810_FALLBACK_SPECULAR,
+		(ctx->Light.Enabled &&
+		 ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR));
+   }
+}
+
+/* But the 815 has it...
+ */
+static void i810LightModelfv_i815(GLcontext *ctx, GLenum pname,
+				    const GLfloat *param)
+{
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL)
+   {
+      i810ContextPtr imesa = I810_CONTEXT( ctx );
+
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)
+	 imesa->Setup[I810_CTXREG_B1] |= B1_SPEC_ENABLE;
+      else
+	 imesa->Setup[I810_CTXREG_B1] &= ~B1_SPEC_ENABLE;
+   }
+}
+
+/* In Mesa 3.5 we can reliably do native flatshading.
+ */
+static void i810ShadeModel(GLcontext *ctx, GLenum mode)
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+   if (mode == GL_FLAT)
+      imesa->Setup[I810_CTXREG_LCS] |= LCS_INTERP_FLAT;
+   else
+      imesa->Setup[I810_CTXREG_LCS] &= ~LCS_INTERP_FLAT;
+}
+
+
+
+/* =============================================================
+ * Fog
+ */
+static void i810Fogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+
+   if (pname == GL_FOG_COLOR) {
+      GLuint color = (((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) |
+		      ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) |
+		      ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0));
+
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_FOG] = ((GFX_OP_FOG_COLOR | color) &
+				      ~FOG_RESERVED_MASK);
+   }
+}
+
+
+/* =============================================================
+ */
+static void i810Enable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+
+   switch(cap) {
+   case GL_ALPHA_TEST:
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_B1] &= ~B1_ALPHA_TEST_ENABLE;
+      if (state)
+	 imesa->Setup[I810_CTXREG_B1] |= B1_ALPHA_TEST_ENABLE;
+      break;
+   case GL_BLEND:
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_B1] &= ~B1_BLEND_ENABLE;
+      if (state)
+	 imesa->Setup[I810_CTXREG_B1] |= B1_BLEND_ENABLE;
+
+      /* For some reason enable(GL_BLEND) affects ColorLogicOpEnabled.
+       */
+      FALLBACK( imesa, I810_FALLBACK_LOGICOP,
+		(ctx->Color.ColorLogicOpEnabled &&
+		 ctx->Color.LogicOp != GL_COPY));
+      break;
+   case GL_DEPTH_TEST:
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_B1] &= ~B1_Z_TEST_ENABLE;
+      if (state)
+	 imesa->Setup[I810_CTXREG_B1] |= B1_Z_TEST_ENABLE;
+      break;
+   case GL_SCISSOR_TEST:
+      /* XXX without these next two lines, conform's scissor test fails */
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      I810_STATECHANGE(imesa, I810_UPLOAD_BUFFERS);
+      I810_FIREVERTICES(imesa);	/* don't pipeline cliprect changes */
+      imesa->upload_cliprects = GL_TRUE;
+      imesa->scissor = state;
+      break;
+   case GL_POLYGON_STIPPLE:
+      if (imesa->stipple_in_hw && imesa->reduced_primitive == GL_TRIANGLES)
+      {
+	 I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+	 imesa->Setup[I810_CTXREG_ST1] &= ~ST1_ENABLE;
+	 if (state)
+	    imesa->Setup[I810_CTXREG_ST1] |= ST1_ENABLE;
+      }
+      break;
+   case GL_LINE_SMOOTH:
+      /* Need to fatten the lines by .5, or they disappear...
+       */
+      if (imesa->reduced_primitive == GL_LINES) {
+	 I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+	 imesa->Setup[I810_CTXREG_AA] &= ~AA_ENABLE;
+	 imesa->Setup[I810_CTXREG_LCS] &= ~LCS_LINEWIDTH_0_5;
+	 if (state) {
+	    imesa->Setup[I810_CTXREG_AA] |= AA_ENABLE;
+	    imesa->Setup[I810_CTXREG_LCS] |= LCS_LINEWIDTH_0_5;
+	 }
+      }
+      break;
+   case GL_POINT_SMOOTH:
+      if (imesa->reduced_primitive == GL_POINTS) {
+	 I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+	 imesa->Setup[I810_CTXREG_AA] &= ~AA_ENABLE;
+	 imesa->Setup[I810_CTXREG_LCS] &= ~LCS_LINEWIDTH_0_5;
+	 if (state) {
+	    imesa->Setup[I810_CTXREG_AA] |= AA_ENABLE;
+	    imesa->Setup[I810_CTXREG_LCS] |= LCS_LINEWIDTH_0_5;
+	 }
+      }
+      break;
+   case GL_POLYGON_SMOOTH:
+      if (imesa->reduced_primitive == GL_TRIANGLES) {
+	 I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+	 imesa->Setup[I810_CTXREG_AA] &= ~AA_ENABLE;
+	 if (state)
+	    imesa->Setup[I810_CTXREG_AA] |= AA_ENABLE;
+      }
+      break;
+   case GL_FOG:
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_B1] &= ~B1_FOG_ENABLE;
+      if (state)
+	 imesa->Setup[I810_CTXREG_B1] |= B1_FOG_ENABLE;
+      break;
+   case GL_CULL_FACE:
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_LCS] &= ~LCS_CULL_MASK;
+      if (state)
+	 imesa->Setup[I810_CTXREG_LCS] |= imesa->LcsCullMode;
+      else
+	 imesa->Setup[I810_CTXREG_LCS] |= LCS_CULL_DISABLE;
+      break;
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE_NV:
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      if (ctx->Texture.CurrentUnit == 0) {
+	 imesa->Setup[I810_CTXREG_MT] &= ~MT_TEXEL0_ENABLE;
+	 if (state)
+	    imesa->Setup[I810_CTXREG_MT] |= MT_TEXEL0_ENABLE;
+      } else {
+	 imesa->Setup[I810_CTXREG_MT] &= ~MT_TEXEL1_ENABLE;
+	 if (state)
+	    imesa->Setup[I810_CTXREG_MT] |= MT_TEXEL1_ENABLE;
+      }
+      break;
+   case GL_COLOR_LOGIC_OP:
+      FALLBACK( imesa, I810_FALLBACK_LOGICOP,
+		(state && ctx->Color.LogicOp != GL_COPY));
+      break;
+   case GL_STENCIL_TEST:
+      FALLBACK( imesa, I810_FALLBACK_STENCIL, state );
+      break;
+   default:
+      ;
+   }
+}
+
+
+
+
+
+
+
+/* =============================================================
+ */
+
+
+
+
+void i810EmitDrawingRectangle( i810ContextPtr imesa )
+{
+   __DRIdrawable *dPriv = imesa->driDrawable;
+   i810ScreenPrivate *i810Screen = imesa->i810Screen;
+   int x0 = imesa->drawX;
+   int y0 = imesa->drawY;
+   int x1 = x0 + dPriv->w;
+   int y1 = y0 + dPriv->h;
+   GLuint dr2, dr3, dr4;
+
+
+   /* Coordinate origin of the window - may be offscreen.
+    */
+   dr4 = imesa->BufferSetup[I810_DESTREG_DR4] = ((y0<<16) |
+						 (((unsigned)x0)&0xFFFF));
+
+   /* Clip to screen.
+    */
+   if (x0 < 0) x0 = 0;
+   if (y0 < 0) y0 = 0;
+   if (x1 > i810Screen->width-1) x1 = i810Screen->width-1;
+   if (y1 > i810Screen->height-1) y1 = i810Screen->height-1;
+
+
+   /* Onscreen drawing rectangle.
+    */
+   dr2 = imesa->BufferSetup[I810_DESTREG_DR2] = ((y0<<16) | x0);
+   dr3 = imesa->BufferSetup[I810_DESTREG_DR3] = (((y1+1)<<16) | (x1+1));
+
+
+   imesa->dirty |= I810_UPLOAD_BUFFERS;
+}
+
+
+
+static void i810CalcViewport( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = imesa->ViewportMatrix.m;
+
+   /* See also i810_translate_vertex.  SUBPIXEL adjustments can be done
+    * via state vars, too.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + imesa->driDrawable->h + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * (1.0 / 0xffff);
+   m[MAT_TZ] =   v[MAT_TZ] * (1.0 / 0xffff);
+}
+
+static void i810Viewport( GLcontext *ctx,
+			  GLint x, GLint y,
+			  GLsizei width, GLsizei height )
+{
+   i810CalcViewport( ctx );
+}
+
+static void i810DepthRange( GLcontext *ctx,
+			    GLclampd nearval, GLclampd farval )
+{
+   i810CalcViewport( ctx );
+}
+
+
+
+void i810PrintDirty( const char *msg, GLuint state )
+{
+   fprintf(stderr, "%s (0x%x): %s%s%s%s\n",
+	   msg,
+	   (unsigned int) state,
+	   (state & I810_UPLOAD_TEX0)  ? "upload-tex0, " : "",
+	   (state & I810_UPLOAD_TEX1)  ? "upload-tex1, " : "",
+	   (state & I810_UPLOAD_CTX)        ? "upload-ctx, " : "",
+	   (state & I810_UPLOAD_BUFFERS)    ? "upload-bufs, " : ""
+	   );
+}
+
+
+
+void i810InitState( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   i810ScreenPrivate *i810Screen = imesa->i810Screen;
+
+   memset(imesa->Setup, 0, sizeof(imesa->Setup));
+
+   imesa->Setup[I810_CTXREG_VF] = 0;
+
+   imesa->Setup[I810_CTXREG_MT] = (GFX_OP_MAP_TEXELS |
+				   MT_UPDATE_TEXEL1_STATE |
+				   MT_TEXEL1_COORD1 |
+				   MT_TEXEL1_MAP1 |
+				   MT_TEXEL1_DISABLE |
+				   MT_UPDATE_TEXEL0_STATE |
+				   MT_TEXEL0_COORD0 |
+				   MT_TEXEL0_MAP0 |
+				   MT_TEXEL0_DISABLE);
+
+   imesa->Setup[I810_CTXREG_MC0] = ( GFX_OP_MAP_COLOR_STAGES |
+				     MC_STAGE_0 |
+				     MC_UPDATE_DEST |
+				     MC_DEST_CURRENT |
+				     MC_UPDATE_ARG1 |
+				     ((MC_ARG_ITERATED_COLOR |
+				       MC_ARG_DONT_REPLICATE_ALPHA |
+				       MC_ARG_DONT_INVERT) << MC_ARG1_SHIFT) |
+				     MC_UPDATE_ARG2 |
+				     ((MC_ARG_ONE |
+				       MC_ARG_DONT_REPLICATE_ALPHA |
+				       MC_ARG_DONT_INVERT) << MC_ARG2_SHIFT) |
+				     MC_UPDATE_OP |
+				     MC_OP_ARG1 );
+
+   imesa->Setup[I810_CTXREG_MC1] = ( GFX_OP_MAP_COLOR_STAGES |
+				     MC_STAGE_1 |
+				     MC_UPDATE_DEST |
+				     MC_DEST_CURRENT |
+				     MC_UPDATE_ARG1 |
+				     ((MC_ARG_ONE |
+				       MC_ARG_DONT_REPLICATE_ALPHA |
+				       MC_ARG_DONT_INVERT) << MC_ARG1_SHIFT) |
+				     MC_UPDATE_ARG2 |
+				     ((MC_ARG_ONE |
+				       MC_ARG_DONT_REPLICATE_ALPHA |
+				       MC_ARG_DONT_INVERT) << MC_ARG2_SHIFT) |
+				     MC_UPDATE_OP |
+				     MC_OP_DISABLE );
+
+
+   imesa->Setup[I810_CTXREG_MC2] = ( GFX_OP_MAP_COLOR_STAGES |
+				     MC_STAGE_2 |
+				     MC_UPDATE_DEST |
+				     MC_DEST_CURRENT |
+				     MC_UPDATE_ARG1 |
+				     ((MC_ARG_CURRENT_COLOR |
+				       MC_ARG_REPLICATE_ALPHA |
+				       MC_ARG_DONT_INVERT) << MC_ARG1_SHIFT) |
+				     MC_UPDATE_ARG2 |
+				     ((MC_ARG_ONE |
+				       MC_ARG_DONT_REPLICATE_ALPHA |
+				       MC_ARG_DONT_INVERT) << MC_ARG2_SHIFT) |
+				     MC_UPDATE_OP |
+				     MC_OP_DISABLE );
+
+
+   imesa->Setup[I810_CTXREG_MA0] = ( GFX_OP_MAP_ALPHA_STAGES |
+				     MA_STAGE_0 |
+				     MA_UPDATE_ARG1 |
+				     ((MA_ARG_ITERATED_ALPHA |
+				       MA_ARG_DONT_INVERT) << MA_ARG1_SHIFT) |
+				     MA_UPDATE_ARG2 |
+				     ((MA_ARG_CURRENT_ALPHA |
+				       MA_ARG_DONT_INVERT) << MA_ARG2_SHIFT) |
+				     MA_UPDATE_OP |
+				     MA_OP_ARG1 );
+
+
+   imesa->Setup[I810_CTXREG_MA1] = ( GFX_OP_MAP_ALPHA_STAGES |
+				     MA_STAGE_1 |
+				     MA_UPDATE_ARG1 |
+				     ((MA_ARG_CURRENT_ALPHA |
+				       MA_ARG_DONT_INVERT) << MA_ARG1_SHIFT) |
+				     MA_UPDATE_ARG2 |
+				     ((MA_ARG_CURRENT_ALPHA |
+				       MA_ARG_DONT_INVERT) << MA_ARG2_SHIFT) |
+				     MA_UPDATE_OP |
+				     MA_OP_ARG1 );
+
+
+   imesa->Setup[I810_CTXREG_MA2] = ( GFX_OP_MAP_ALPHA_STAGES |
+				     MA_STAGE_2 |
+				     MA_UPDATE_ARG1 |
+				     ((MA_ARG_CURRENT_ALPHA |
+				       MA_ARG_DONT_INVERT) << MA_ARG1_SHIFT) |
+				     MA_UPDATE_ARG2 |
+				     ((MA_ARG_CURRENT_ALPHA |
+				       MA_ARG_DONT_INVERT) << MA_ARG2_SHIFT) |
+				     MA_UPDATE_OP |
+				     MA_OP_ARG1 );
+
+
+   imesa->Setup[I810_CTXREG_SDM] = ( GFX_OP_SRC_DEST_MONO |
+				     SDM_UPDATE_MONO_ENABLE |
+				     0 |
+				     SDM_UPDATE_SRC_BLEND |
+				     SDM_SRC_ONE |
+				     SDM_UPDATE_DST_BLEND |
+				     SDM_DST_ZERO );
+
+   /* Use for colormask:
+    */
+   imesa->Setup[I810_CTXREG_CF0] = GFX_OP_COLOR_FACTOR;
+   imesa->Setup[I810_CTXREG_CF1] = 0xffffffff;
+
+   imesa->Setup[I810_CTXREG_ZA] = (GFX_OP_ZBIAS_ALPHAFUNC |
+				   ZA_UPDATE_ALPHAFUNC |
+				   ZA_ALPHA_ALWAYS |
+				   ZA_UPDATE_ZBIAS |
+				   0 |
+				   ZA_UPDATE_ALPHAREF |
+				   0x0);
+
+   imesa->Setup[I810_CTXREG_FOG] = (GFX_OP_FOG_COLOR |
+				    (0xffffff & ~FOG_RESERVED_MASK));
+
+   /* Choose a pipe
+    */
+   imesa->Setup[I810_CTXREG_B1] = ( GFX_OP_BOOL_1 |
+				    B1_UPDATE_SPEC_SETUP_ENABLE |
+				    0 |
+				    B1_UPDATE_ALPHA_SETUP_ENABLE |
+				    B1_ALPHA_SETUP_ENABLE |
+				    B1_UPDATE_CI_KEY_ENABLE |
+				    0 |
+				    B1_UPDATE_CHROMAKEY_ENABLE |
+				    0 |
+				    B1_UPDATE_Z_BIAS_ENABLE |
+				    0 |
+				    B1_UPDATE_SPEC_ENABLE |
+				    0 |
+				    B1_UPDATE_FOG_ENABLE |
+				    0 |
+				    B1_UPDATE_ALPHA_TEST_ENABLE |
+				    0 |
+				    B1_UPDATE_BLEND_ENABLE |
+				    0 |
+				    B1_UPDATE_Z_TEST_ENABLE |
+				    0 );
+
+   imesa->Setup[I810_CTXREG_B2] = ( GFX_OP_BOOL_2 |
+				    B2_UPDATE_MAP_CACHE_ENABLE |
+				    B2_MAP_CACHE_ENABLE |
+				    B2_UPDATE_ALPHA_DITHER_ENABLE |
+				    0 |
+				    B2_UPDATE_FOG_DITHER_ENABLE |
+				    0 |
+				    B2_UPDATE_SPEC_DITHER_ENABLE |
+				    0 |
+				    B2_UPDATE_RGB_DITHER_ENABLE |
+				    B2_RGB_DITHER_ENABLE |
+				    B2_UPDATE_FB_WRITE_ENABLE |
+				    B2_FB_WRITE_ENABLE |
+				    B2_UPDATE_ZB_WRITE_ENABLE |
+				    B2_ZB_WRITE_ENABLE );
+
+   imesa->Setup[I810_CTXREG_LCS] = ( GFX_OP_LINEWIDTH_CULL_SHADE_MODE |
+				     LCS_UPDATE_ZMODE |
+				     LCS_Z_LESS |
+				     LCS_UPDATE_LINEWIDTH |
+				     LCS_LINEWIDTH_1_0 |
+				     LCS_UPDATE_ALPHA_INTERP |
+				     LCS_ALPHA_INTERP |
+				     LCS_UPDATE_FOG_INTERP |
+				     0 |
+				     LCS_UPDATE_SPEC_INTERP |
+				     0 |
+				     LCS_UPDATE_RGB_INTERP |
+				     LCS_RGB_INTERP |
+				     LCS_UPDATE_CULL_MODE |
+				     LCS_CULL_DISABLE);
+
+   imesa->LcsCullMode = LCS_CULL_CW;
+   imesa->LcsLineWidth = LCS_LINEWIDTH_1_0;
+   imesa->LcsPointSize = LCS_LINEWIDTH_1_0;
+
+   imesa->Setup[I810_CTXREG_PV] = ( GFX_OP_PV_RULE |
+				    PV_UPDATE_PIXRULE |
+				    PV_PIXRULE_ENABLE |
+				    PV_UPDATE_LINELIST |
+				    PV_LINELIST_PV1 |
+				    PV_UPDATE_TRIFAN |
+				    PV_TRIFAN_PV2 |
+				    PV_UPDATE_TRISTRIP |
+				    PV_TRISTRIP_PV2 );
+
+
+   imesa->Setup[I810_CTXREG_ST0] = GFX_OP_STIPPLE;
+   imesa->Setup[I810_CTXREG_ST1] = 0;
+
+   imesa->Setup[I810_CTXREG_AA] = ( GFX_OP_ANTIALIAS |
+				    AA_UPDATE_EDGEFLAG |
+				    0 |
+				    AA_UPDATE_POLYWIDTH |
+				    AA_POLYWIDTH_05 |
+				    AA_UPDATE_LINEWIDTH |
+				    AA_LINEWIDTH_05 |
+				    AA_UPDATE_BB_EXPANSION |
+				    0 |
+				    AA_UPDATE_AA_ENABLE |
+				    0 );
+
+   memset(imesa->BufferSetup, 0, sizeof(imesa->BufferSetup));
+   imesa->BufferSetup[I810_DESTREG_DI0] = CMD_OP_DESTBUFFER_INFO;
+
+   if (imesa->glCtx->Visual.doubleBufferMode && imesa->sarea->pf_current_page == 0) {
+      /* use back buffer by default */
+      imesa->BufferSetup[I810_DESTREG_DI1] = (i810Screen->backOffset |
+					      i810Screen->backPitchBits);
+   } else {
+      /* use front buffer by default */
+      imesa->BufferSetup[I810_DESTREG_DI1] = (i810Screen->fbOffset |
+					      i810Screen->backPitchBits);
+   }
+
+   imesa->BufferSetup[I810_DESTREG_DV0] = GFX_OP_DESTBUFFER_VARS;
+   imesa->BufferSetup[I810_DESTREG_DV1] = (DV_HORG_BIAS_OGL |
+					   DV_VORG_BIAS_OGL |
+					   i810Screen->fbFormat);
+
+   imesa->BufferSetup[I810_DESTREG_DR0] = GFX_OP_DRAWRECT_INFO;
+   imesa->BufferSetup[I810_DESTREG_DR1] = DR1_RECT_CLIP_ENABLE;
+}
+
+
+static void i810InvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   I810_CONTEXT(ctx)->new_state |= new_state;
+}
+
+
+void i810InitStateFuncs(GLcontext *ctx)
+{
+   /* Callbacks for internal Mesa events.
+    */
+   ctx->Driver.UpdateState = i810InvalidateState;
+
+   /* API callbacks
+    */
+   ctx->Driver.AlphaFunc = i810AlphaFunc;
+   ctx->Driver.BlendEquationSeparate = i810BlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate = i810BlendFuncSeparate;
+   ctx->Driver.ClearColor = i810ClearColor;
+   ctx->Driver.ColorMask = i810ColorMask;
+   ctx->Driver.CullFace = i810CullFaceFrontFace;
+   ctx->Driver.DepthFunc = i810DepthFunc;
+   ctx->Driver.DepthMask = i810DepthMask;
+   ctx->Driver.Enable = i810Enable;
+   ctx->Driver.Fogfv = i810Fogfv;
+   ctx->Driver.FrontFace = i810CullFaceFrontFace;
+   ctx->Driver.LineWidth = i810LineWidth;
+   ctx->Driver.LogicOpcode = i810LogicOp;
+   ctx->Driver.PolygonStipple = i810PolygonStipple;
+   ctx->Driver.RenderMode = i810RenderMode;
+   ctx->Driver.Scissor = i810Scissor;
+   ctx->Driver.DrawBuffer = i810DrawBuffer;
+   ctx->Driver.ReadBuffer = i810ReadBuffer;
+   ctx->Driver.ShadeModel = i810ShadeModel;
+   ctx->Driver.DepthRange = i810DepthRange;
+   ctx->Driver.Viewport = i810Viewport;
+   ctx->Driver.PointSize = i810PointSize;
+
+   if (IS_I815(I810_CONTEXT(ctx))) {
+      ctx->Driver.LightModelfv = i810LightModelfv_i815;
+   } else {
+      ctx->Driver.LightModelfv = i810LightModelfv;
+   }
+}
diff --git a/src/mesa/drivers/dri/i810/i810state.h b/src/mesa/drivers/dri/i810/i810state.h
new file mode 100644
index 0000000000..118b075491
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810state.h
@@ -0,0 +1,15 @@
+#ifndef _I810_STATE_H
+#define _I810_STATE_H
+
+#include "i810context.h"
+
+extern void i810InitState( GLcontext *ctx );
+extern void i810InitStateFuncs( GLcontext *ctx );
+extern void i810PrintDirty( const char *msg, GLuint state );
+extern void i810DrawBuffer(GLcontext *ctx, GLenum mode );
+
+extern void i810Fallback( i810ContextPtr imesa, GLuint bit, GLboolean mode );
+#define FALLBACK( imesa, bit, mode ) i810Fallback( imesa, bit, mode )
+
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810tex.c b/src/mesa/drivers/dri/i810/i810tex.c
new file mode 100644
index 0000000000..2ccb9562e9
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810tex.c
@@ -0,0 +1,547 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/colormac.h"
+#include "main/texobj.h"
+#include "main/mm.h"
+
+#include "texmem.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810context.h"
+#include "i810tex.h"
+#include "i810ioctl.h"
+
+
+/*
+ * Compute the 'S2.4' lod bias factor from the floating point OpenGL bias.
+ */
+static GLuint i810ComputeLodBias(GLfloat bias)
+{
+   int b = (int) (bias * 16.0) + 12;
+   if (b > 63)
+      b = 63;
+   else if (b < -64)
+      b = -64;
+   return (GLuint) (b & MLC_LOD_BIAS_MASK);
+}
+
+
+static void i810SetTexWrapping(i810TextureObjectPtr tex,
+			       GLenum swrap, GLenum twrap)
+{
+   tex->Setup[I810_TEXREG_MCS] &= ~(MCS_U_STATE_MASK| MCS_V_STATE_MASK);
+
+   switch( swrap ) {
+   case GL_REPEAT:
+      tex->Setup[I810_TEXREG_MCS] |= MCS_U_WRAP;
+      break;
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+      tex->Setup[I810_TEXREG_MCS] |= MCS_U_CLAMP;
+      break;
+   case GL_MIRRORED_REPEAT:
+      tex->Setup[I810_TEXREG_MCS] |= MCS_U_MIRROR;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+
+   switch( twrap ) {
+   case GL_REPEAT:
+      tex->Setup[I810_TEXREG_MCS] |= MCS_V_WRAP;
+      break;
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+      tex->Setup[I810_TEXREG_MCS] |= MCS_V_CLAMP;
+      break;
+   case GL_MIRRORED_REPEAT:
+      tex->Setup[I810_TEXREG_MCS] |= MCS_V_MIRROR;
+      break;
+   default:
+      _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+   }
+}
+
+
+static void i810SetTexFilter(i810ContextPtr imesa, 
+			     i810TextureObjectPtr t, 
+			     GLenum minf, GLenum magf,
+                             GLfloat bias)
+{
+   t->Setup[I810_TEXREG_MF] &= ~(MF_MIN_MASK|
+				 MF_MAG_MASK|
+				 MF_MIP_MASK);
+   t->Setup[I810_TEXREG_MLC] &= ~(MLC_LOD_BIAS_MASK);
+
+   switch (minf) {
+   case GL_NEAREST:
+      t->Setup[I810_TEXREG_MF] |= MF_MIN_NEAREST | MF_MIP_NONE;
+      break;
+   case GL_LINEAR:
+      t->Setup[I810_TEXREG_MF] |= MF_MIN_LINEAR | MF_MIP_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      t->Setup[I810_TEXREG_MF] |= MF_MIN_NEAREST | MF_MIP_NEAREST;
+      if (magf == GL_LINEAR) {
+         /*bias -= 0.5;*/  /* this doesn't work too good */
+      }
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      t->Setup[I810_TEXREG_MF] |= MF_MIN_LINEAR | MF_MIP_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      if (IS_I815(imesa)) 
+	 t->Setup[I810_TEXREG_MF] |= MF_MIN_NEAREST | MF_MIP_LINEAR;
+      else 
+	 t->Setup[I810_TEXREG_MF] |= MF_MIN_NEAREST | MF_MIP_DITHER;
+      /*
+      if (magf == GL_LINEAR) {
+         bias -= 0.5;
+      }
+      */
+      bias -= 0.5; /* always biasing here looks better */
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      if (IS_I815(imesa))
+	 t->Setup[I810_TEXREG_MF] |= MF_MIN_LINEAR | MF_MIP_LINEAR;
+      else 
+	 t->Setup[I810_TEXREG_MF] |= MF_MIN_LINEAR | MF_MIP_DITHER;
+      break;
+   default:
+      return;
+   }
+
+   switch (magf) {
+   case GL_NEAREST: 
+      t->Setup[I810_TEXREG_MF] |= MF_MAG_NEAREST; 
+      break;
+   case GL_LINEAR: 
+      t->Setup[I810_TEXREG_MF] |= MF_MAG_LINEAR; 
+      break;
+   default: 
+      return;
+   }
+
+   t->Setup[I810_TEXREG_MLC] |= i810ComputeLodBias(bias);
+}
+
+
+static void
+i810SetTexBorderColor( i810TextureObjectPtr t, const GLfloat color[4] )
+{
+   /* Need a fallback.
+    */
+}
+
+
+static i810TextureObjectPtr
+i810AllocTexObj( GLcontext *ctx, struct gl_texture_object *texObj )
+{
+   i810TextureObjectPtr t;
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+
+   t = CALLOC_STRUCT( i810_texture_object_t );
+   texObj->DriverData = t;
+   if ( t != NULL ) {
+      GLfloat bias = ctx->Texture.Unit[ctx->Texture.CurrentUnit].LodBias;
+      /* Initialize non-image-dependent parts of the state:
+       */
+      t->base.tObj = texObj;
+      t->Setup[I810_TEXREG_MI0] = GFX_OP_MAP_INFO;
+      t->Setup[I810_TEXREG_MI1] = MI1_MAP_0; 
+      t->Setup[I810_TEXREG_MI2] = MI2_DIMENSIONS_ARE_LOG2;
+      t->Setup[I810_TEXREG_MLC] = (GFX_OP_MAP_LOD_CTL | 
+				   MLC_MAP_0 |
+				   /*MLC_DITHER_WEIGHT_FULL |*/
+				   MLC_DITHER_WEIGHT_12 |
+				   MLC_UPDATE_LOD_BIAS |
+				   0x0);
+      t->Setup[I810_TEXREG_MCS] = (GFX_OP_MAP_COORD_SETS |
+				   MCS_COORD_0 |
+				   MCS_UPDATE_NORMALIZED |
+				   MCS_NORMALIZED_COORDS |
+				   MCS_UPDATE_V_STATE |
+				   MCS_V_WRAP |
+				   MCS_UPDATE_U_STATE |
+				   MCS_U_WRAP);
+      t->Setup[I810_TEXREG_MF] = (GFX_OP_MAP_FILTER |
+				  MF_MAP_0 |
+				  MF_UPDATE_ANISOTROPIC |
+				  MF_UPDATE_MIP_FILTER |
+				  MF_UPDATE_MAG_FILTER |
+				  MF_UPDATE_MIN_FILTER);
+      
+      make_empty_list( & t->base );
+
+      i810SetTexWrapping( t, texObj->WrapS, texObj->WrapT );
+      /*i830SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );*/
+      i810SetTexFilter( imesa, t, texObj->MinFilter, texObj->MagFilter, bias );
+      i810SetTexBorderColor( t, texObj->BorderColor.f );
+   }
+
+   return t;
+}
+
+
+static void i810TexParameter( GLcontext *ctx, GLenum target,
+			      struct gl_texture_object *tObj,
+			      GLenum pname, const GLfloat *params )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   i810TextureObjectPtr t = (i810TextureObjectPtr) tObj->DriverData;
+
+   if (!t)
+      return;
+
+   if ( target != GL_TEXTURE_2D )
+      return;
+
+   /* Can't do the update now as we don't know whether to flush
+    * vertices or not.  Setting imesa->new_state means that
+    * i810UpdateTextureState() will be called before any triangles are
+    * rendered.  If a statechange has occurred, it will be detected at
+    * that point, and buffered vertices flushed.  
+    */
+   switch (pname) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+      {
+         GLfloat bias = ctx->Texture.Unit[ctx->Texture.CurrentUnit].LodBias;
+         i810SetTexFilter( imesa, t, tObj->MinFilter, tObj->MagFilter, bias );
+      }
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      i810SetTexWrapping( t, tObj->WrapS, tObj->WrapT );
+      break;
+  
+   case GL_TEXTURE_BORDER_COLOR:
+      i810SetTexBorderColor( t, tObj->BorderColor.f );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      /* This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative for Radeon.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.
+       */
+      I810_FIREVERTICES( I810_CONTEXT(ctx) );
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+
+   if (t == imesa->CurrentTexObj[0]) {
+      I810_STATECHANGE( imesa, I810_UPLOAD_TEX0 );
+   }
+
+   if (t == imesa->CurrentTexObj[1]) {
+      I810_STATECHANGE( imesa, I810_UPLOAD_TEX1 );
+   }
+}
+
+
+/**
+ * Setup hardware bits for new texture environment settings.
+ * 
+ * \todo
+ * Determine whether or not \c param can be used instead of
+ * \c texUnit->EnvColor in the \c GL_TEXTURE_ENV_COLOR case.
+ */
+static void i810TexEnv( GLcontext *ctx, GLenum target, 
+			GLenum pname, const GLfloat *param )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   const GLuint unit = ctx->Texture.CurrentUnit;
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   /* Only one env color.  Need a fallback if env colors are different
+    * and texture setup references env color in both units.  
+    */
+   switch (pname) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+      envColor = PACK_COLOR_8888( c[3], c[0], c[1], c[2] );
+
+      if (imesa->Setup[I810_CTXREG_CF1] != envColor) {
+	 I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+	 imesa->Setup[I810_CTXREG_CF1] = envColor;
+      }
+      break;
+   }
+
+   case GL_TEXTURE_ENV_MODE:
+      imesa->TexEnvImageFmt[unit] = 0; /* force recalc of env state */
+      break;
+
+   case GL_TEXTURE_LOD_BIAS: {
+      if ( texUnit->_Current != NULL ) {
+	 const struct gl_texture_object *tObj = texUnit->_Current;
+	 i810TextureObjectPtr t = (i810TextureObjectPtr) tObj->DriverData;
+
+	 t->Setup[I810_TEXREG_MLC] &= ~(MLC_LOD_BIAS_MASK);
+	 t->Setup[I810_TEXREG_MLC] |= i810ComputeLodBias(*param);
+      }
+      break;
+   }
+
+   default:
+      break;
+   }
+} 
+
+
+
+#if 0
+static void i810TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint border,
+			    GLenum format, GLenum type, 
+			    const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *pack,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   i810TextureObjectPtr t = (i810TextureObjectPtr) texObj->DriverData;
+   if (t) {
+      i810SwapOutTexObj( imesa, t );
+   }
+}
+
+static void i810TexSubImage1D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset,
+			       GLsizei width,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *pack,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+}
+#endif
+
+
+static void i810TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint height, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   driTextureObject *t = (driTextureObject *) texObj->DriverData;
+   if (t) {
+      I810_FIREVERTICES( I810_CONTEXT(ctx) );
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) i810AllocTexObj( ctx, texObj );
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+   _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			   width, height, border, format, type,
+			   pixels, packing, texObj, texImage );
+}
+
+static void i810TexSubImage2D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset, GLint yoffset,
+			       GLsizei width, GLsizei height,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   driTextureObject *t = (driTextureObject *)texObj->DriverData;
+   if (t) {
+     I810_FIREVERTICES( I810_CONTEXT(ctx) );
+     driSwapOutTextureObject( t );
+   }
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+}
+
+
+static void i810BindTexture( GLcontext *ctx, GLenum target,
+			     struct gl_texture_object *tObj )
+{
+   assert( (target != GL_TEXTURE_2D) || (tObj->DriverData != NULL) );
+}
+
+
+static void i810DeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj )
+{
+   driTextureObject * t = (driTextureObject *) tObj->DriverData;
+   if (t) {
+      i810ContextPtr imesa = I810_CONTEXT( ctx );
+      if (imesa)
+         I810_FIREVERTICES( imesa );
+      driDestroyTextureObject( t );
+   }
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, tObj);
+}
+
+/**
+ * Choose a Mesa texture format to match the requested format.
+ * 
+ * The i810 only supports 5 texture modes that are useful to Mesa.  That
+ * makes this routine pretty simple.
+ */
+static gl_format
+i810ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			 GLenum format, GLenum type )
+{
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+   case GL_COMPRESSED_RGBA:
+      if ( ((format == GL_BGRA) && (type == GL_UNSIGNED_SHORT_1_5_5_5_REV))
+	   || ((format == GL_RGBA) && (type == GL_UNSIGNED_SHORT_5_5_5_1))
+	   || (internalFormat == GL_RGB5_A1) ) {
+	 return MESA_FORMAT_ARGB1555;
+      }
+      return MESA_FORMAT_ARGB4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return MESA_FORMAT_RGB565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return MESA_FORMAT_AL88;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_MESA ||
+	  type == GL_UNSIGNED_BYTE)
+         return MESA_FORMAT_YCBCR;
+      else
+         return MESA_FORMAT_YCBCR_REV;
+
+   default:
+      fprintf(stderr, "unexpected texture format in %s\n", __FUNCTION__);
+      return MESA_FORMAT_NONE;
+   }
+
+   return MESA_FORMAT_NONE; /* never get here */
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+i810NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj;
+   obj = _mesa_new_texture_object(ctx, name, target);
+   i810AllocTexObj( ctx, obj );
+   return obj;
+}
+
+void i810InitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->ChooseTextureFormat = i810ChooseTextureFormat;
+   functions->TexImage2D = i810TexImage2D;
+   functions->TexSubImage2D = i810TexSubImage2D;
+   functions->BindTexture = i810BindTexture;
+   functions->NewTextureObject = i810NewTextureObject;
+   functions->DeleteTexture = i810DeleteTexture;
+   functions->TexParameter = i810TexParameter;
+   functions->TexEnv = i810TexEnv;
+   functions->IsTextureResident = driIsTextureResident;
+}
diff --git a/src/mesa/drivers/dri/i810/i810tex.h b/src/mesa/drivers/dri/i810/i810tex.h
new file mode 100644
index 0000000000..28958dcb4b
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810tex.h
@@ -0,0 +1,77 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+
+#ifndef I810TEX_INC
+#define I810TEX_INC
+
+#include "main/mtypes.h"
+#include "main/mm.h"
+
+#include "i810_3d_reg.h"
+#include "texmem.h"
+
+#define I810_TEX_MAXLEVELS 11
+
+/* For shared texture space managment, these texture objects may also
+ * be used as proxies for regions of texture memory containing other
+ * client's textures.  Such proxy textures (not to be confused with GL
+ * proxy textures) are subject to the same LRU aging we use for our
+ * own private textures, and thus we have a mechanism where we can
+ * fairly decide between kicking out our own textures and those of
+ * other clients.
+ *
+ * Non-local texture objects have a valid MemBlock to describe the
+ * region managed by the other client, and can be identified by
+ * 't->globj == 0' 
+ */
+struct i810_texture_object_t {
+   driTextureObject base;
+     
+   int Pitch;
+   int Height;
+   int texelBytes;
+   char *BufAddr;
+   
+   GLuint max_level;
+
+   struct { 
+      const struct gl_texture_image *image;
+      int offset;		/* into BufAddr */
+      int height;
+      int internalFormat;
+   } image[I810_TEX_MAXLEVELS];
+
+   GLuint Setup[I810_TEX_SETUP_SIZE];
+   GLuint dirty;
+
+};		
+
+void i810UpdateTextureState( GLcontext *ctx );
+void i810InitTextureFuncs( struct dd_function_table *functions );
+
+void i810DestroyTexObj( i810ContextPtr imesa, i810TextureObjectPtr t );
+int i810UploadTexImagesLocked( i810ContextPtr imesa, i810TextureObjectPtr t );
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810texmem.c b/src/mesa/drivers/dri/i810/i810texmem.c
new file mode 100644
index 0000000000..6e6b21cf2b
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810texmem.c
@@ -0,0 +1,184 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ * Texmem interface changes (C) 2003 Dave Airlie
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/mm.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+#include "i810context.h"
+#include "i810tex.h"
+#include "i810ioctl.h"
+
+
+void i810DestroyTexObj(i810ContextPtr imesa, i810TextureObjectPtr t)
+{
+   /* See if it was the driver's current object.
+    */
+   if ( imesa != NULL ) { 
+     if (imesa->CurrentTexObj[0] == t) {
+       imesa->CurrentTexObj[0] = 0;
+       imesa->dirty &= ~I810_UPLOAD_TEX0;
+     }
+     
+     if (imesa->CurrentTexObj[1] == t) {
+       imesa->CurrentTexObj[1] = 0;
+       imesa->dirty &= ~I810_UPLOAD_TEX1;
+     }
+   }
+}
+
+
+
+#if defined(i386) || defined(__i386__)
+/* From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ */
+static INLINE void * __memcpy(void * to, const void * from, size_t n)
+{
+int d0, d1, d2;
+__asm__ __volatile__(
+	"rep ; movsl\n\t"
+	"testb $2,%b4\n\t"
+	"je 1f\n\t"
+	"movsw\n"
+	"1:\ttestb $1,%b4\n\t"
+	"je 2f\n\t"
+	"movsb\n"
+	"2:"
+	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+	:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+	: "memory");
+return (to);
+}
+#else
+/* Allow compilation on other architectures */
+#define __memcpy memcpy
+#endif
+
+/* Upload an image from mesa's internal copy.
+ */
+static void i810UploadTexLevel( i810ContextPtr imesa,
+				i810TextureObjectPtr t, int hwlevel )
+{
+   const struct gl_texture_image *image = t->image[hwlevel].image;
+   int j;
+   GLuint texelBytes;
+
+   if (!image || !image->Data)
+      return;
+
+   texelBytes = _mesa_get_format_bytes(image->TexFormat);
+
+   if (image->Width * texelBytes == t->Pitch) {
+	 GLubyte *dst = (GLubyte *)(t->BufAddr + t->image[hwlevel].offset);
+	 GLubyte *src = (GLubyte *)image->Data;
+	 
+	 memcpy( dst, src, t->Pitch * image->Height );
+   }
+   else {
+      switch (texelBytes) {
+      case 1:
+         {
+            GLubyte *dst = (GLubyte *)(t->BufAddr + t->image[hwlevel].offset);
+            GLubyte *src = (GLubyte *)image->Data;
+
+            for (j = 0 ; j < image->Height ; j++, dst += t->Pitch) {
+               __memcpy(dst, src, image->Width );
+               src += image->Width;
+            }
+         }
+         break;
+      case 2:
+         {
+            GLushort *dst = (GLushort *)(t->BufAddr + t->image[hwlevel].offset);
+            GLushort *src = (GLushort *)image->Data;
+
+            for (j = 0 ; j < image->Height ; j++, dst += (t->Pitch/2)) {
+               __memcpy(dst, src, image->Width * 2 );
+               src += image->Width;
+            }
+         }
+         break;
+      default:
+         fprintf(stderr, "%s: Not supported texel size %d\n",
+                 __FUNCTION__, texelBytes);
+      }
+   }
+}
+
+/* This is called with the lock held.  May have to eject our own and/or
+ * other client's texture objects to make room for the upload.
+ */
+int i810UploadTexImagesLocked( i810ContextPtr imesa, i810TextureObjectPtr t )
+{
+   int i;
+   int ofs;
+   int numLevels;
+
+   /* Do we need to eject LRU texture objects?
+    */
+   if (!t->base.memBlock) {
+      int heap;
+       
+      heap = driAllocateTexture( imesa->texture_heaps, imesa->nr_heaps,
+				 (driTextureObject *) t);
+      
+      if ( heap == -1 ) {
+	return -1;
+      }
+      
+      assert(t->base.memBlock);
+      ofs = t->base.memBlock->ofs;
+      t->BufAddr = imesa->i810Screen->tex.map + ofs;
+      t->Setup[I810_TEXREG_MI3] = imesa->i810Screen->textureOffset + ofs;
+      
+      if (t == imesa->CurrentTexObj[0])
+	I810_STATECHANGE(imesa, I810_UPLOAD_TEX0);
+      
+      if (t == imesa->CurrentTexObj[1])
+	 I810_STATECHANGE(imesa, I810_UPLOAD_TEX1);
+      
+       /*      i810UpdateTexLRU( imesa, t );*/
+     }
+   driUpdateTextureLRU( (driTextureObject *) t );
+   
+   if (imesa->texture_heaps[0]->timestamp >= GET_DISPATCH_AGE(imesa))
+      i810WaitAgeLocked( imesa, imesa->texture_heaps[0]->timestamp );
+
+   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+   for (i = 0 ; i < numLevels ; i++)
+      if (t->base.dirty_images[0] & (1<<i))
+	 i810UploadTexLevel( imesa, t, i );
+
+   t->base.dirty_images[0] = 0;
+
+   return 0;
+}  
diff --git a/src/mesa/drivers/dri/i810/i810texstate.c b/src/mesa/drivers/dri/i810/i810texstate.c
new file mode 100644
index 0000000000..bff28c11c8
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810texstate.c
@@ -0,0 +1,750 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/mm.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810context.h"
+#include "i810tex.h"
+#include "i810state.h"
+#include "i810ioctl.h"
+
+
+
+
+static void i810SetTexImages( i810ContextPtr imesa, 
+			      struct gl_texture_object *tObj )
+{
+   GLuint height, width, pitch, i, textureFormat, log_pitch;
+   i810TextureObjectPtr t = (i810TextureObjectPtr) tObj->DriverData;
+   const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   GLint numLevels;
+   GLint log2Width, log2Height;
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   t->texelBytes = 2;
+   switch (baseImage->TexFormat) {
+   case MESA_FORMAT_ARGB1555:
+      textureFormat = MI1_FMT_16BPP | MI1_PF_16BPP_ARGB1555;
+      break;
+   case MESA_FORMAT_ARGB4444:
+      textureFormat = MI1_FMT_16BPP | MI1_PF_16BPP_ARGB4444;
+      break;
+   case MESA_FORMAT_RGB565:
+      textureFormat = MI1_FMT_16BPP | MI1_PF_16BPP_RGB565;
+      break;
+   case MESA_FORMAT_AL88:
+      textureFormat = MI1_FMT_16BPP | MI1_PF_16BPP_AY88;
+      break;
+   case MESA_FORMAT_YCBCR:
+      textureFormat = MI1_FMT_422 | MI1_PF_422_YCRCB_SWAP_Y
+	  | MI1_COLOR_CONV_ENABLE;
+      break;
+   case MESA_FORMAT_YCBCR_REV:
+      textureFormat = MI1_FMT_422 | MI1_PF_422_YCRCB
+	  | MI1_COLOR_CONV_ENABLE;
+      break;
+   case MESA_FORMAT_CI8:
+      textureFormat = MI1_FMT_8CI | MI1_PF_8CI_ARGB4444;
+      t->texelBytes = 1;
+      break;
+
+   default:
+      fprintf(stderr, "i810SetTexImages: bad image->Format\n" );
+      return;
+   }
+
+   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+
+   numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+
+   log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+   log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+
+   /* Figure out the amount of memory required to hold all the mipmap
+    * levels.  Choose the smallest pitch to accomodate the largest
+    * mipmap:
+    */
+   width = tObj->Image[0][t->base.firstLevel]->Width * t->texelBytes;
+   for (pitch = 32, log_pitch=2 ; pitch < width ; pitch *= 2 )
+      log_pitch++;
+   
+   /* All images must be loaded at this pitch.  Count the number of
+    * lines required:
+    */
+   for ( height = i = 0 ; i < numLevels ; i++ ) {
+      t->image[i].image = tObj->Image[0][t->base.firstLevel + i];
+      t->image[i].offset = height * pitch;
+      t->image[i].internalFormat = baseImage->_BaseFormat;
+      height += t->image[i].image->Height;
+   }
+
+   t->Pitch = pitch;
+   t->base.totalSize = height*pitch;
+   t->max_level = i-1;
+   t->dirty = I810_UPLOAD_TEX0 | I810_UPLOAD_TEX1;   
+   t->Setup[I810_TEXREG_MI1] = (MI1_MAP_0 | textureFormat | log_pitch); 
+   t->Setup[I810_TEXREG_MLL] = (GFX_OP_MAP_LOD_LIMITS |
+				MLL_MAP_0  |
+				MLL_UPDATE_MAX_MIP | 
+				MLL_UPDATE_MIN_MIP |
+				((numLevels - 1) << MLL_MIN_MIP_SHIFT));
+
+   LOCK_HARDWARE( imesa );
+   i810UploadTexImagesLocked( imesa, t );
+   UNLOCK_HARDWARE( imesa );
+}
+
+/* ================================================================
+ * Texture combine functions
+ */
+
+
+static void set_color_stage( unsigned color, int stage,
+			      i810ContextPtr imesa )
+{
+   if ( color != imesa->Setup[I810_CTXREG_MC0 + stage] ) {
+      I810_STATECHANGE( imesa, I810_UPLOAD_CTX );
+      imesa->Setup[I810_CTXREG_MC0 + stage] = color;
+   }
+}
+
+
+static void set_alpha_stage( unsigned alpha, int stage,
+				    i810ContextPtr imesa )
+{
+   if ( alpha != imesa->Setup[I810_CTXREG_MA0 + stage] ) {
+      I810_STATECHANGE( imesa, I810_UPLOAD_CTX );
+      imesa->Setup[I810_CTXREG_MA0 + stage] = alpha;
+   }
+}
+
+
+static const unsigned operand_modifiers[] = {
+   0,                       MC_ARG_INVERT,
+   MC_ARG_REPLICATE_ALPHA,  MC_ARG_INVERT | MC_ARG_REPLICATE_ALPHA
+};
+
+/**
+ * Configure the hardware bits for the specified texture environment.
+ *
+ * Configures the hardware bits for the texture environment state for the
+ * specified texture unit.  As combine stages are added, the values pointed
+ * to by \c color_stage and \c alpha_stage are incremented.
+ *
+ * \param ctx          GL context pointer.
+ * \param unit         Texture unit to be added.
+ * \param color_stage  Next available hardware color combine stage.
+ * \param alpha_stage  Next available hardware alpha combine stage.
+ *
+ * \returns
+ * If the combine mode for the specified texture unit could be added without
+ * requiring a software fallback, \c GL_TRUE is returned.  Otherwise,
+ * \c GL_FALSE is returned.
+ *
+ * \todo
+ * If the mode is (GL_REPLACE, GL_PREVIOUS), treat it as though the texture
+ * stage is disabled.  That is, don't emit any combine stages.
+ *
+ * \todo
+ * Add support for ATI_texture_env_combine3 modes.  This will require using
+ * two combine stages.
+ *
+ * \todo
+ * Add support for the missing \c GL_INTERPOLATE modes.  This will require
+ * using all three combine stages.  There is a comment in the function
+ * describing how this might work.
+ *
+ * \todo
+ * If, after all the combine stages have been emitted, a texture is never
+ * actually used, disable the texture unit.  That should save texture some
+ * memory bandwidth.  This won't happen in this function, but this seems like
+ * a reasonable place to make note of it.
+ */
+static GLboolean
+i810UpdateTexEnvCombine( GLcontext *ctx, GLuint unit, 
+			 int * color_stage, int * alpha_stage )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint color_arg[3] = {
+      MC_ARG_ONE,            MC_ARG_ONE,            MC_ARG_ONE
+   };
+   GLuint alpha_arg[3] = {
+      MA_ARG_ITERATED_ALPHA, MA_ARG_ITERATED_ALPHA, MA_ARG_ITERATED_ALPHA
+   };
+   GLuint i;
+   GLuint color_combine, alpha_combine;
+   const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+   const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+   GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
+   GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
+
+
+   if ( !texUnit->_ReallyEnabled ) {
+      return GL_TRUE;
+   }
+
+      
+   if ((*color_stage >= 3) || (*alpha_stage >= 3)) {
+      return GL_FALSE;
+   }
+
+
+   /* Step 1:
+    * Extract the color and alpha combine function arguments.
+    */
+
+   for ( i = 0 ; i < numColorArgs ; i++ ) {
+      unsigned op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+      assert(op >= 0);
+      assert(op <= 3);
+      switch ( texUnit->_CurrentCombine->SourceRGB[i] ) {
+      case GL_TEXTURE0:
+	 color_arg[i] = MC_ARG_TEX0_COLOR;
+	 break;
+      case GL_TEXTURE1:
+	 color_arg[i] = MC_ARG_TEX1_COLOR;
+	 break;
+      case GL_TEXTURE:
+	 color_arg[i] = (unit == 0) 
+	   ? MC_ARG_TEX0_COLOR : MC_ARG_TEX1_COLOR;
+	 break;
+      case GL_CONSTANT:
+	 color_arg[i] = MC_ARG_COLOR_FACTOR;
+	 break;
+      case GL_PRIMARY_COLOR:
+	 color_arg[i] = MC_ARG_ITERATED_COLOR;
+	 break;
+      case GL_PREVIOUS:
+	 color_arg[i] = (unit == 0)
+	   ? MC_ARG_ITERATED_COLOR : MC_ARG_CURRENT_COLOR;
+	 break;
+      case GL_ZERO:
+	 /* Toggle the low bit of the op value.  The is the 'invert' bit,
+	  * and it acts to convert GL_ZERO+op to the equivalent GL_ONE+op.
+	  */
+	 op ^= 1;
+
+	 /*FALLTHROUGH*/
+
+      case GL_ONE:
+	 color_arg[i] = MC_ARG_ONE;
+	 break;
+      default:
+	 return GL_FALSE;
+      }
+
+      color_arg[i] |= operand_modifiers[op];
+   }
+
+
+   for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+      unsigned op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+      assert(op >= 0);
+      assert(op <= 1);
+      switch ( texUnit->_CurrentCombine->SourceA[i] ) {
+      case GL_TEXTURE0:
+	 alpha_arg[i] = MA_ARG_TEX0_ALPHA;
+	 break;
+      case GL_TEXTURE1:
+	 alpha_arg[i] = MA_ARG_TEX1_ALPHA;
+	 break;
+      case GL_TEXTURE:
+	 alpha_arg[i] = (unit == 0)
+	   ? MA_ARG_TEX0_ALPHA : MA_ARG_TEX1_ALPHA;
+	 break;
+      case GL_CONSTANT:
+	 alpha_arg[i] = MA_ARG_ALPHA_FACTOR;
+	 break;
+      case GL_PRIMARY_COLOR:
+	 alpha_arg[i] = MA_ARG_ITERATED_ALPHA;
+	 break;
+      case GL_PREVIOUS:
+	 alpha_arg[i] = (unit == 0)
+	   ? MA_ARG_ITERATED_ALPHA : MA_ARG_CURRENT_ALPHA;
+	 break;
+      case GL_ZERO:
+	 /* Toggle the low bit of the op value.  The is the 'invert' bit,
+	  * and it acts to convert GL_ZERO+op to the equivalent GL_ONE+op.
+	  */
+	 op ^= 1;
+
+	 /*FALLTHROUGH*/
+
+      case GL_ONE:
+	 if (i != 2) {
+	    return GL_FALSE;
+	 }
+
+	 alpha_arg[i] = MA_ARG_ONE;
+	 break;
+      default:
+	 return GL_FALSE;
+      }
+
+      alpha_arg[i] |= operand_modifiers[op];
+   }
+
+
+   /* Step 2:
+    * Build up the color and alpha combine functions.
+    */
+   switch ( texUnit->_CurrentCombine->ModeRGB ) {
+   case GL_REPLACE:
+      color_combine = MC_OP_ARG1;
+      break;
+   case GL_MODULATE:
+      color_combine = MC_OP_MODULATE + RGBshift;
+      RGBshift = 0;
+      break;
+   case GL_ADD:
+      color_combine = MC_OP_ADD;
+      break;
+   case GL_ADD_SIGNED:
+      color_combine = MC_OP_ADD_SIGNED;
+      break;
+   case GL_SUBTRACT:
+      color_combine = MC_OP_SUBTRACT;
+      break;
+   case GL_INTERPOLATE:
+      /* For interpolation, the i810 hardware has some limitations.  It
+       * can't handle using the secondary or diffuse color (diffuse alpha
+       * is okay) for the third argument.
+       *
+       * It is possible to emulate the missing modes by using multiple
+       * combine stages.  Unfortunately it requires all three stages to
+       * emulate a single interpolate stage.  The (arg0*arg2) portion is
+       * done in stage zero and writes to MC_DEST_ACCUMULATOR.  The
+       * (arg1*(1-arg2)) portion is done in stage 1, and the final stage is
+       * (MC_ARG1_ACCUMULATOR | MC_ARG2_CURRENT_COLOR | MC_OP_ADD).
+       * 
+       * It can also be done without using the accumulator by rearranging
+       * the equation as (arg1 + (arg2 * (arg0 - arg1))).  Too bad the i810
+       * doesn't support the MODULATE_AND_ADD mode that the i830 supports.
+       * If it did, the interpolate could be done in only two stages.
+       */
+	 
+      if ( (color_arg[2] & MC_ARG_INVERT) != 0 ) {
+	 unsigned temp = color_arg[0];
+
+	 color_arg[0] = color_arg[1];
+	 color_arg[1] = temp;
+	 color_arg[2] &= ~MC_ARG_INVERT;
+      }
+
+      switch (color_arg[2]) {
+      case (MC_ARG_ONE):
+      case (MC_ARG_ONE | MC_ARG_REPLICATE_ALPHA):
+	 color_combine = MC_OP_ARG1;
+	 color_arg[1] = MC_ARG_ONE;
+	 break;
+
+      case (MC_ARG_COLOR_FACTOR):
+	 return GL_FALSE;
+
+      case (MC_ARG_COLOR_FACTOR | MC_ARG_REPLICATE_ALPHA):
+	 color_combine = MC_OP_LIN_BLEND_ALPHA_FACTOR;
+	 break;
+
+      case (MC_ARG_ITERATED_COLOR):
+	 return GL_FALSE;
+
+      case (MC_ARG_ITERATED_COLOR | MC_ARG_REPLICATE_ALPHA):
+	 color_combine = MC_OP_LIN_BLEND_ITER_ALPHA;
+	 break;
+
+      case (MC_ARG_SPECULAR_COLOR):
+      case (MC_ARG_SPECULAR_COLOR | MC_ARG_REPLICATE_ALPHA):
+	 return GL_FALSE;
+
+      case (MC_ARG_TEX0_COLOR):
+	 color_combine = MC_OP_LIN_BLEND_TEX0_COLOR;
+	 break;
+
+      case (MC_ARG_TEX0_COLOR | MC_ARG_REPLICATE_ALPHA):
+	 color_combine = MC_OP_LIN_BLEND_TEX0_ALPHA;
+	 break;
+
+      case (MC_ARG_TEX1_COLOR):
+	 color_combine = MC_OP_LIN_BLEND_TEX1_COLOR;
+	 break;
+
+      case (MC_ARG_TEX1_COLOR | MC_ARG_REPLICATE_ALPHA):
+	 color_combine = MC_OP_LIN_BLEND_TEX1_ALPHA;
+	 break;
+
+      default:
+	 return GL_FALSE;
+      }
+      break;
+
+   default:
+      return GL_FALSE;
+   }
+
+   
+   switch ( texUnit->_CurrentCombine->ModeA ) {
+   case GL_REPLACE:
+      alpha_combine = MA_OP_ARG1;
+      break;
+   case GL_MODULATE:
+      alpha_combine = MA_OP_MODULATE + Ashift;
+      Ashift = 0;
+      break;
+   case GL_ADD:
+      alpha_combine = MA_OP_ADD;
+      break;
+   case GL_ADD_SIGNED:
+      alpha_combine = MA_OP_ADD_SIGNED;
+      break;
+   case GL_SUBTRACT:
+      alpha_combine = MA_OP_SUBTRACT;
+      break;
+   case GL_INTERPOLATE:
+      if ( (alpha_arg[2] & MA_ARG_INVERT) != 0 ) {
+	 unsigned temp = alpha_arg[0];
+
+	 alpha_arg[0] = alpha_arg[1];
+	 alpha_arg[1] = temp;
+	 alpha_arg[2] &= ~MA_ARG_INVERT;
+      }
+
+      switch (alpha_arg[2]) {
+      case MA_ARG_ONE:
+	 alpha_combine = MA_OP_ARG1;
+	 alpha_arg[1] = MA_ARG_ITERATED_ALPHA;
+	 break;
+
+      case MA_ARG_ALPHA_FACTOR:
+	 alpha_combine = MA_OP_LIN_BLEND_ALPHA_FACTOR;
+	 break;
+
+      case MA_ARG_ITERATED_ALPHA:
+	 alpha_combine = MA_OP_LIN_BLEND_ITER_ALPHA;
+	 break;
+
+      case MA_ARG_TEX0_ALPHA:
+	 alpha_combine = MA_OP_LIN_BLEND_TEX0_ALPHA;
+	 break;
+
+      case MA_ARG_TEX1_ALPHA:
+	 alpha_combine = MA_OP_LIN_BLEND_TEX1_ALPHA;
+	 break;
+
+      default:
+	 return GL_FALSE;
+      }
+      break;
+
+   default:
+      return GL_FALSE;
+   }
+
+
+   color_combine |= GFX_OP_MAP_COLOR_STAGES | (*color_stage << MC_STAGE_SHIFT)
+     | MC_UPDATE_DEST | MC_DEST_CURRENT
+     | MC_UPDATE_ARG1 | (color_arg[0] << MC_ARG1_SHIFT)
+     | MC_UPDATE_ARG2 | (color_arg[1] << MC_ARG2_SHIFT)
+     | MC_UPDATE_OP;
+
+   alpha_combine |= GFX_OP_MAP_ALPHA_STAGES | (*alpha_stage << MA_STAGE_SHIFT)
+     | MA_UPDATE_ARG1 | (alpha_arg[0] << MA_ARG1_SHIFT)
+     | MA_UPDATE_ARG2 | (alpha_arg[1] << MA_ARG2_SHIFT)
+     | MA_UPDATE_OP;
+
+   set_color_stage( color_combine, *color_stage, imesa );
+   set_alpha_stage( alpha_combine, *alpha_stage, imesa );
+   (*color_stage)++;
+   (*alpha_stage)++;
+
+
+   /* Step 3:
+    * Apply the scale factor.
+    */
+   /* The only operation where the i810 directly supports adding a post-
+    * scale factor is modulate.  For all the other modes the post-scale is
+    * emulated by inserting and extra modulate stage.  For the modulate
+    * case, the scaling is handled above when color_combine / alpha_combine
+    * are initially set.
+    */
+
+   if ( RGBshift != 0 ) {
+      const unsigned color_scale = GFX_OP_MAP_COLOR_STAGES
+	| (*color_stage << MC_STAGE_SHIFT)
+	| MC_UPDATE_DEST | MC_DEST_CURRENT
+	| MC_UPDATE_ARG1 | (MC_ARG_CURRENT_COLOR << MC_ARG1_SHIFT)
+	| MC_UPDATE_ARG2 | (MC_ARG_ONE           << MC_ARG2_SHIFT)
+	| MC_UPDATE_OP   | (MC_OP_MODULATE + RGBshift);
+
+      if ( *color_stage >= 3 ) {
+	 return GL_FALSE;
+      }
+
+      set_color_stage( color_scale, *color_stage, imesa );
+      (*color_stage)++;
+   }
+
+   
+   if ( Ashift != 0 ) {
+      const unsigned alpha_scale = GFX_OP_MAP_ALPHA_STAGES
+	| (*alpha_stage << MA_STAGE_SHIFT)
+	| MA_UPDATE_ARG1 | (MA_ARG_CURRENT_ALPHA << MA_ARG1_SHIFT)
+	| MA_UPDATE_ARG2 | (MA_ARG_ONE           << MA_ARG2_SHIFT)
+	| MA_UPDATE_OP   | (MA_OP_MODULATE + Ashift);
+
+      if ( *alpha_stage >= 3 ) {
+	 return GL_FALSE;
+      }
+
+      set_alpha_stage( alpha_scale, *alpha_stage, imesa );
+      (*alpha_stage)++;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean enable_tex_common( GLcontext *ctx, GLuint unit )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = texUnit->_Current;
+   i810TextureObjectPtr t = (i810TextureObjectPtr)tObj->DriverData;
+
+   if (tObj->Image[0][tObj->BaseLevel]->Border > 0) {
+     return GL_FALSE;
+   }
+
+  /* Upload teximages (not pipelined)
+   */
+  if (t->base.dirty_images[0]) {
+    I810_FIREVERTICES(imesa);
+    i810SetTexImages( imesa, tObj );
+    if (!t->base.memBlock) {
+      return GL_FALSE;
+    }
+  }
+   
+  /* Update state if this is a different texture object to last
+   * time.
+   */
+  if (imesa->CurrentTexObj[unit] != t) {
+    I810_STATECHANGE(imesa, (I810_UPLOAD_TEX0<<unit));
+    imesa->CurrentTexObj[unit] = t;
+    t->base.bound |= (1U << unit);
+    
+    /* XXX: should be locked */
+    driUpdateTextureLRU( (driTextureObject *) t );
+  }
+  
+  imesa->TexEnvImageFmt[unit] = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+  return GL_TRUE;
+}
+
+static GLboolean enable_tex_rect( GLcontext *ctx, GLuint unit )
+{
+  i810ContextPtr imesa = I810_CONTEXT(ctx);
+  struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+  struct gl_texture_object *tObj = texUnit->_Current;
+  i810TextureObjectPtr t = (i810TextureObjectPtr)tObj->DriverData;
+  GLint Width, Height;
+
+  Width = tObj->Image[0][t->base.firstLevel]->Width - 1;
+  Height = tObj->Image[0][t->base.firstLevel]->Height - 1;
+
+  I810_STATECHANGE(imesa, (I810_UPLOAD_TEX0<<unit));
+  t->Setup[I810_TEXREG_MCS] &= ~MCS_NORMALIZED_COORDS;
+  t->Setup[I810_TEXREG_MCS] |= MCS_UPDATE_NORMALIZED; 
+  t->Setup[I810_TEXREG_MI2] = (MI2_DIMENSIONS_ARE_EXACT |
+			       (Height << MI2_HEIGHT_SHIFT) | Width);
+  
+  return GL_TRUE;
+}
+
+static GLboolean enable_tex_2d( GLcontext *ctx, GLuint unit )
+{
+  i810ContextPtr imesa = I810_CONTEXT(ctx);
+  struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+  struct gl_texture_object *tObj = texUnit->_Current;
+  i810TextureObjectPtr t = (i810TextureObjectPtr)tObj->DriverData;
+  GLint log2Width, log2Height;
+
+
+  log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+  log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+
+  I810_STATECHANGE(imesa, (I810_UPLOAD_TEX0<<unit));
+  t->Setup[I810_TEXREG_MCS] |= MCS_NORMALIZED_COORDS | MCS_UPDATE_NORMALIZED; 
+  t->Setup[I810_TEXREG_MI2] = (MI2_DIMENSIONS_ARE_LOG2 |
+			       (log2Height << MI2_HEIGHT_SHIFT) | log2Width);
+  
+  return GL_TRUE;
+}
+
+static void disable_tex( GLcontext *ctx, GLuint unit )
+{
+  i810ContextPtr imesa = I810_CONTEXT(ctx);
+
+  imesa->CurrentTexObj[unit] = 0;
+  imesa->TexEnvImageFmt[unit] = 0;	
+  imesa->dirty &= ~(I810_UPLOAD_TEX0<<unit); 
+  
+}
+
+/**
+ * Update hardware state for a texture unit.
+ *
+ * \todo
+ * 1D textures should be supported!  Just use a 2D texture with the second
+ * texture coordinate value fixed at 0.0.
+ */
+static void i810UpdateTexUnit( GLcontext *ctx, GLuint unit, 
+			      int * next_color_stage, int * next_alpha_stage )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLboolean ret;
+   
+   switch(texUnit->_ReallyEnabled) {
+   case TEXTURE_2D_BIT:
+     ret = enable_tex_common( ctx, unit);
+     ret &= enable_tex_2d(ctx, unit);
+     if (ret == GL_FALSE) {
+       FALLBACK( imesa, I810_FALLBACK_TEXTURE, GL_TRUE );
+     }
+     break;
+   case TEXTURE_RECT_BIT:
+     ret = enable_tex_common( ctx, unit);
+     ret &= enable_tex_rect(ctx, unit);
+     if (ret == GL_FALSE) {
+       FALLBACK( imesa, I810_FALLBACK_TEXTURE, GL_TRUE );
+     }
+     break;
+   case 0:
+     disable_tex(ctx, unit);
+     break;
+   }
+
+
+   if (!i810UpdateTexEnvCombine( ctx, unit, 
+				 next_color_stage, next_alpha_stage )) {
+     FALLBACK( imesa, I810_FALLBACK_TEXTURE, GL_TRUE );
+   }
+
+   return;
+}
+
+
+void i810UpdateTextureState( GLcontext *ctx )
+{
+   static const unsigned color_pass[3] = {
+      GFX_OP_MAP_COLOR_STAGES | MC_STAGE_0 | MC_UPDATE_DEST | MC_DEST_CURRENT
+	| MC_UPDATE_ARG1 | (MC_ARG_ITERATED_COLOR << MC_ARG1_SHIFT)
+	| MC_UPDATE_ARG2 | (MC_ARG_ONE            << MC_ARG2_SHIFT)
+	| MC_UPDATE_OP   | MC_OP_ARG1,
+      GFX_OP_MAP_COLOR_STAGES | MC_STAGE_1 | MC_UPDATE_DEST | MC_DEST_CURRENT
+	| MC_UPDATE_ARG1 | (MC_ARG_CURRENT_COLOR  << MC_ARG1_SHIFT)
+	| MC_UPDATE_ARG2 | (MC_ARG_ONE            << MC_ARG2_SHIFT)
+	| MC_UPDATE_OP   | MC_OP_ARG1,
+      GFX_OP_MAP_COLOR_STAGES | MC_STAGE_2 | MC_UPDATE_DEST | MC_DEST_CURRENT
+	| MC_UPDATE_ARG1 | (MC_ARG_CURRENT_COLOR  << MC_ARG1_SHIFT)
+	| MC_UPDATE_ARG2 | (MC_ARG_ONE            << MC_ARG2_SHIFT)
+	| MC_UPDATE_OP   | MC_OP_ARG1
+   };
+   static const unsigned alpha_pass[3] = {
+      GFX_OP_MAP_ALPHA_STAGES | MA_STAGE_0
+	| MA_UPDATE_ARG1 | (MA_ARG_ITERATED_ALPHA << MA_ARG1_SHIFT)
+	| MA_UPDATE_ARG2 | (MA_ARG_ITERATED_ALPHA << MA_ARG2_SHIFT)
+	| MA_UPDATE_OP   | MA_OP_ARG1,
+      GFX_OP_MAP_ALPHA_STAGES | MA_STAGE_1
+	| MA_UPDATE_ARG1 | (MA_ARG_CURRENT_ALPHA  << MA_ARG1_SHIFT)
+	| MA_UPDATE_ARG2 | (MA_ARG_CURRENT_ALPHA  << MA_ARG2_SHIFT)
+	| MA_UPDATE_OP   | MA_OP_ARG1,
+      GFX_OP_MAP_ALPHA_STAGES | MA_STAGE_2
+	| MA_UPDATE_ARG1 | (MA_ARG_CURRENT_ALPHA  << MA_ARG1_SHIFT)
+	| MA_UPDATE_ARG2 | (MA_ARG_CURRENT_ALPHA  << MA_ARG2_SHIFT)
+	| MA_UPDATE_OP   | MA_OP_ARG1
+   };
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   int next_color_stage = 0;
+   int next_alpha_stage = 0;
+
+
+   /*  fprintf(stderr, "%s\n", __FUNCTION__); */
+   FALLBACK( imesa, I810_FALLBACK_TEXTURE, GL_FALSE );
+
+   i810UpdateTexUnit( ctx, 0, & next_color_stage, & next_alpha_stage );
+   i810UpdateTexUnit( ctx, 1, & next_color_stage, & next_alpha_stage );
+
+   /* There needs to be at least one combine stage emitted that just moves
+    * the incoming primary color to the current color register.  In addition,
+    * there number be the same number of color and alpha stages emitted.
+    * Finally, if there are less than 3 combine stages, a MC_OP_DISABLE stage
+    * must be emitted.
+    */
+
+   while ( (next_color_stage == 0) ||
+	   (next_color_stage < next_alpha_stage) ) {
+      set_color_stage( color_pass[ next_color_stage ], next_color_stage,
+		       imesa );
+      next_color_stage++;
+   }
+
+   assert( next_color_stage <= 3 );
+
+   while ( next_alpha_stage < next_color_stage ) {
+      set_alpha_stage( alpha_pass[ next_alpha_stage ], next_alpha_stage,
+		       imesa );
+      next_alpha_stage++;
+   }
+
+   assert( next_alpha_stage <= 3 );
+   assert( next_color_stage == next_alpha_stage );
+
+   if ( next_color_stage < 3 ) {
+      const unsigned color = GFX_OP_MAP_COLOR_STAGES
+	| (next_color_stage << MC_STAGE_SHIFT)
+	| MC_UPDATE_DEST | MC_DEST_CURRENT
+	| MC_UPDATE_ARG1 | (MC_ARG_ONE << MC_ARG1_SHIFT)
+	| MC_UPDATE_ARG2 | (MC_ARG_ONE << MC_ARG2_SHIFT)
+	| MC_UPDATE_OP   | (MC_OP_DISABLE);
+
+      const unsigned alpha = GFX_OP_MAP_ALPHA_STAGES
+	| (next_color_stage << MC_STAGE_SHIFT)
+	| MA_UPDATE_ARG1 | (MA_ARG_CURRENT_ALPHA << MA_ARG1_SHIFT)
+	| MA_UPDATE_ARG2 | (MA_ARG_CURRENT_ALPHA << MA_ARG2_SHIFT)
+	| MA_UPDATE_OP   | (MA_OP_ARG1);
+
+      set_color_stage( color, next_color_stage, imesa );
+      set_alpha_stage( alpha, next_alpha_stage, imesa );
+   }
+}
diff --git a/src/mesa/drivers/dri/i810/i810tris.c b/src/mesa/drivers/dri/i810/i810tris.c
new file mode 100644
index 0000000000..1492f711c9
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810tris.c
@@ -0,0 +1,872 @@
+/**************************************************************************
+
+Copyright 2001 VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+
+#include "i810tris.h"
+#include "i810state.h"
+#include "i810vb.h"
+#include "i810ioctl.h"
+
+static void i810RenderPrimitive( GLcontext *ctx, GLenum prim );
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( j, vb, vertsize, v )				\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (j), "=D" (vb), "=S" (__tmp)	\
+			      : "0" (vertsize),				\
+			        "D" ((long)vb),				\
+			        "S" ((long)v) );			\
+} while (0)
+#else
+#define COPY_DWORDS( j, vb, vertsize, v )				\
+do {									\
+   for ( j = 0 ; j < vertsize ; j++ )					\
+      vb[j] = ((GLuint *)v)[j];						\
+   vb += vertsize;							\
+} while (0)
+#endif
+
+static INLINE void i810_draw_triangle( i810ContextPtr imesa,
+					   i810VertexPtr v0,
+					   i810VertexPtr v1,
+					   i810VertexPtr v2 )
+{
+   GLuint vertsize = imesa->vertex_size;
+   GLuint *vb = i810AllocDmaLow( imesa, 3 * 4 * vertsize );
+   int j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v2 );
+}
+
+
+static INLINE void i810_draw_quad( i810ContextPtr imesa,
+				       i810VertexPtr v0,
+				       i810VertexPtr v1,
+				       i810VertexPtr v2,
+				       i810VertexPtr v3 )
+{
+   GLuint vertsize = imesa->vertex_size;
+   GLuint *vb = i810AllocDmaLow( imesa, 6 * 4 * vertsize );
+   int j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v3 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+   COPY_DWORDS( j, vb, vertsize, v2 );
+   COPY_DWORDS( j, vb, vertsize, v3 );
+}
+
+
+static INLINE void i810_draw_point( i810ContextPtr imesa,
+					i810VertexPtr tmp )
+{
+   GLfloat sz = 0.5 * CLAMP(imesa->glCtx->Point.Size,
+                            imesa->glCtx->Const.MinPointSize,
+                            imesa->glCtx->Const.MaxPointSize);
+   int vertsize = imesa->vertex_size;
+   GLuint *vb = i810AllocDmaLow( imesa, 2 * 4 * vertsize );
+   int j;
+
+   /* Draw a point as a horizontal line.
+    */
+   *(float *)&vb[0] = tmp->v.x - sz + 0.125;
+   for (j = 1 ; j < vertsize ; j++)
+      vb[j] = tmp->ui[j];
+   vb += vertsize;
+
+   *(float *)&vb[0] = tmp->v.x + sz + 0.125;
+   for (j = 1 ; j < vertsize ; j++)
+      vb[j] = tmp->ui[j];
+   vb += vertsize;
+}
+
+
+static INLINE void i810_draw_line( i810ContextPtr imesa,
+				       i810VertexPtr v0,
+				       i810VertexPtr v1 )
+{
+   GLuint vertsize = imesa->vertex_size;
+   GLuint *vb = i810AllocDmaLow( imesa, 2 * 4 * vertsize );
+   int j;
+
+   COPY_DWORDS( j, vb, vertsize, v0 );
+   COPY_DWORDS( j, vb, vertsize, v1 );
+}
+
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do { 						\
+   if (0) fprintf(stderr, "hw TRI\n");		\
+   if (DO_FALLBACK)				\
+      imesa->draw_tri( imesa, a, b, c );	\
+   else						\
+      i810_draw_triangle( imesa, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do { 						\
+   if (0) fprintf(stderr, "hw QUAD\n");		\
+   if (DO_FALLBACK) {				\
+      imesa->draw_tri( imesa, a, b, d );	\
+      imesa->draw_tri( imesa, b, c, d );	\
+   } else					\
+      i810_draw_quad( imesa, a, b, c, d );	\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do { 						\
+   if (0) fprintf(stderr, "hw LINE\n");		\
+   if (DO_FALLBACK)				\
+      imesa->draw_line( imesa, v0, v1 );	\
+   else						\
+      i810_draw_line( imesa, v0, v1 );		\
+} while (0)
+
+#define POINT( v0 )				\
+do { 						\
+   if (0) fprintf(stderr, "hw POINT\n");	\
+   if (DO_FALLBACK)				\
+      imesa->draw_point( imesa, v0 );		\
+   else						\
+      i810_draw_point( imesa, v0 );		\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define I810_OFFSET_BIT 	0x01
+#define I810_TWOSIDE_BIT	0x02
+#define I810_UNFILLED_BIT	0x04
+#define I810_FALLBACK_BIT	0x08
+#define I810_MAX_TRIFUNC	0x10
+
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[I810_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & I810_FALLBACK_BIT)
+#define DO_OFFSET   (IND & I810_OFFSET_BIT)
+#define DO_UNFILLED (IND & I810_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & I810_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC         1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX            i810Vertex
+#define TAB               rast_tab
+
+
+#define DEPTH_SCALE (1.0/0xffff)
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (imesa->verts + (e * imesa->vertex_size * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )  					\
+do {								\
+   i810_color_t *color = (i810_color_t *)&((v)->ui[coloroffset]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v0, c )					\
+do {								\
+   if (havespec) {						\
+      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]);	\
+   }								\
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )			\
+do {							\
+   if (havespec) {					\
+      v0->v.specular.red   = v1->v.specular.red;	\
+      v0->v.specular.green = v1->v.specular.green;	\
+      v0->v.specular.blue  = v1->v.specular.blue; 	\
+   }							\
+} while (0)
+
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[5]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
+
+#define LOCAL_VARS(n)							\
+   i810ContextPtr imesa = I810_CONTEXT(ctx);				\
+   GLuint color[n] = { 0 };						\
+   GLuint spec[n] = { 0 };						\
+   GLuint coloroffset = (imesa->vertex_size == 4 ? 3 : 4);		\
+   GLboolean havespec = (imesa->vertex_size > 4);			\
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   PR_LINES,
+   PR_LINES,
+   PR_LINES,
+   PR_LINES,
+   PR_TRIANGLES,
+   PR_TRIANGLES,
+   PR_TRIANGLES,
+   PR_TRIANGLES,
+   PR_TRIANGLES,
+   PR_TRIANGLES
+};
+
+#define RASTERIZE(x) if (imesa->hw_primitive != hw_prim[x]) \
+                        i810RasterPrimitive( ctx, x, hw_prim[x] )
+#define RENDER_PRIMITIVE imesa->render_primitive
+#define TAG(x) x
+#define IND I810_FALLBACK_BIT
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT|I810_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_OFFSET_BIT|I810_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT|I810_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT|I810_OFFSET_BIT|I810_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_OFFSET_BIT|I810_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT|I810_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT|I810_OFFSET_BIT|I810_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_UNFILLED_BIT|I810_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_OFFSET_BIT|I810_UNFILLED_BIT|I810_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT|I810_UNFILLED_BIT|I810_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (I810_TWOSIDE_BIT|I810_OFFSET_BIT|I810_UNFILLED_BIT| \
+	     I810_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+i810_fallback_tri( i810ContextPtr imesa,
+		   i810Vertex *v0,
+		   i810Vertex *v1,
+		   i810Vertex *v2 )
+{
+   GLcontext *ctx = imesa->glCtx;
+   SWvertex v[3];
+   i810_translate_vertex( ctx, v0, &v[0] );
+   i810_translate_vertex( ctx, v1, &v[1] );
+   i810_translate_vertex( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void
+i810_fallback_line( i810ContextPtr imesa,
+		    i810Vertex *v0,
+		    i810Vertex *v1 )
+{
+   GLcontext *ctx = imesa->glCtx;
+   SWvertex v[2];
+   i810_translate_vertex( ctx, v0, &v[0] );
+   i810_translate_vertex( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void
+i810_fallback_point( i810ContextPtr imesa,
+		     i810Vertex *v0 )
+{
+   GLcontext *ctx = imesa->glCtx;
+   SWvertex v[1];
+   i810_translate_vertex( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define IND 0
+#define V(x) (i810Vertex *)(vertptr + ((x)*vertsize*sizeof(int)))
+#define RENDER_POINTS( start, count )	\
+   for ( ; start < count ; start++) POINT( V(ELT(start)) );
+#define RENDER_LINE( v0, v1 )         LINE( V(v0), V(v1) )
+#define RENDER_TRI(  v0, v1, v2 )     TRI(  V(v0), V(v1), V(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) QUAD( V(v0), V(v1), V(v2), V(v3) )
+#define INIT(x) i810RenderPrimitive( ctx, x )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    i810ContextPtr imesa = I810_CONTEXT(ctx);			\
+    GLubyte *vertptr = (GLubyte *)imesa->verts;			\
+    const GLuint vertsize = imesa->vertex_size;       	\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) x
+#define TAG(x) i810_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) i810_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+/**********************************************************************/
+/*                   Render clipped primitives                        */
+/**********************************************************************/
+
+
+
+static void i810RenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				   GLuint n )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint prim = imesa->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, 
+						  PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+}
+
+static void i810RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+
+static void i810FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				       GLuint n )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   GLuint vertsize = imesa->vertex_size;
+   GLuint *vb = i810AllocDmaLow( imesa, (n-2) * 3 * 4 * vertsize );
+   GLubyte *vertptr = (GLubyte *)imesa->verts;
+   const GLuint *start = (const GLuint *)V(elts[0]);
+   int i,j;
+
+   for (i = 2 ; i < n ; i++) {
+      COPY_DWORDS( j, vb, vertsize, V(elts[i-1]) );
+      COPY_DWORDS( j, vb, vertsize, V(elts[i]) );
+      COPY_DWORDS( j, vb, vertsize, start );
+   }
+}
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+
+#define _I810_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE |		\
+			       _DD_NEW_TRI_UNFILLED |		\
+			       _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			       _DD_NEW_TRI_OFFSET |		\
+			       _DD_NEW_TRI_STIPPLE |		\
+			       _NEW_POLYGONSTIPPLE)
+
+#define POINT_FALLBACK (0)
+#define LINE_FALLBACK (DD_LINE_STIPPLE)
+#define TRI_FALLBACK (0)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK|\
+                            DD_TRI_STIPPLE)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+
+static void i810ChooseRenderState(GLcontext *ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (I810_DEBUG & DEBUG_STATE)
+     fprintf(stderr,"\n%s\n",__FUNCTION__);
+
+   if (flags & (ANY_FALLBACK_FLAGS|ANY_RASTER_FLAGS)) {
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE)    index |= I810_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)	      index |= I810_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)	      index |= I810_UNFILLED_BIT;
+      }
+
+      imesa->draw_point = i810_draw_point;
+      imesa->draw_line = i810_draw_line;
+      imesa->draw_tri = i810_draw_triangle;
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & ANY_FALLBACK_FLAGS)
+      {
+	 if (flags & POINT_FALLBACK)
+	    imesa->draw_point = i810_fallback_point;
+
+	 if (flags & LINE_FALLBACK)
+	    imesa->draw_line = i810_fallback_line;
+
+	 if (flags & TRI_FALLBACK)
+	    imesa->draw_tri = i810_fallback_tri;
+
+	 if ((flags & DD_TRI_STIPPLE) && !imesa->stipple_in_hw)
+	    imesa->draw_tri = i810_fallback_tri;
+
+	 index |= I810_FALLBACK_BIT;
+      }
+   }
+
+   if (imesa->RenderIndex != index) {
+      imesa->RenderIndex = index;
+
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = i810_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = i810_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+	 tnl->Driver.Render.ClippedPolygon = i810FastRenderClippedPoly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = i810RenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = i810RenderClippedPoly;
+      }
+   }
+}
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+
+/* Determine the rasterized primitive when not drawing unfilled
+ * polygons.
+ *
+ * Used only for the default render stage which always decomposes
+ * primitives to trianges/lines/points.  For the accelerated stage,
+ * which renders strips as strips, the equivalent calculations are
+ * performed in i810render.c.
+ */
+static void i810RenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint rprim = reduced_prim[prim];
+
+   imesa->render_primitive = prim;
+
+   if (rprim == GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+
+   if (imesa->reduced_primitive != rprim ||
+       hw_prim[prim] != imesa->hw_primitive) {
+      i810RasterPrimitive( ctx, rprim, hw_prim[prim] );
+   }
+}
+
+static void i810RunPipeline( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+
+   if (imesa->new_state) {
+      if (imesa->new_state & _NEW_TEXTURE)
+	 i810UpdateTextureState( ctx );	/* may modify imesa->new_state */
+
+      if (!imesa->Fallback) {
+	 if (imesa->new_state & _I810_NEW_VERTEX)
+	    i810ChooseVertexState( ctx );
+
+	 if (imesa->new_state & _I810_NEW_RENDERSTATE)
+	    i810ChooseRenderState( ctx );
+      }
+
+      imesa->new_state = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+static void i810RenderStart( GLcontext *ctx )
+{
+   /* Check for projective textureing.  Make sure all texcoord
+    * pointers point to something.  (fix in mesa?)
+    */
+   i810CheckTexSizes( ctx );
+}
+
+static void i810RenderFinish( GLcontext *ctx )
+{
+   if (I810_CONTEXT(ctx)->RenderIndex & I810_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+
+
+/* System to flush dma and emit state changes based on the rasterized
+ * primitive.
+ */
+void i810RasterPrimitive( GLcontext *ctx,
+			  GLenum rprim,
+			  GLuint hwprim )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint st1 = imesa->Setup[I810_CTXREG_ST1];
+   GLuint aa = imesa->Setup[I810_CTXREG_AA];
+   GLuint lcs = imesa->Setup[I810_CTXREG_LCS];
+
+   st1 &= ~ST1_ENABLE;
+   aa &= ~AA_ENABLE;
+
+   if (I810_DEBUG & DEBUG_PRIMS) {
+      /* Prints reduced prim, and hw prim */
+      char *prim_name = "Unknown";
+      
+      switch(hwprim) {
+      case PR_LINES:
+	 prim_name = "Lines";
+	 break;
+      case PR_LINESTRIP:
+	 prim_name = "LineStrip";
+	 break;	 
+      case PR_TRIANGLES:
+	 prim_name = "Triangles";
+	 break;	 
+      case PR_TRISTRIP_0:
+	 prim_name = "TriStrip_0";
+	 break;	 
+      case PR_TRIFAN:
+	 prim_name = "TriFan";
+	 break;	 
+      case PR_POLYGON:
+	 prim_name = "Polygons";
+	 break;
+      default:
+	 break;
+      }
+
+      fprintf(stderr, "%s : rprim(%s), hwprim(%s)\n",
+	      __FUNCTION__,
+	      _mesa_lookup_enum_by_nr(rprim),
+	      prim_name);
+   }
+
+   switch (rprim) {
+   case GL_TRIANGLES:
+      if (ctx->Polygon.StippleFlag)
+	 st1 |= ST1_ENABLE;
+      if (ctx->Polygon.SmoothFlag)
+	 aa |= AA_ENABLE;
+      break;
+   case GL_LINES:
+      lcs &= ~(LCS_LINEWIDTH_3_0|LCS_LINEWIDTH_0_5);
+      lcs |= imesa->LcsLineWidth;
+      if (ctx->Line.SmoothFlag) {
+	 aa |= AA_ENABLE;
+	 lcs |= LCS_LINEWIDTH_0_5;
+      }
+      break;
+   case GL_POINTS:
+      lcs &= ~(LCS_LINEWIDTH_3_0|LCS_LINEWIDTH_0_5);
+      lcs |= imesa->LcsPointSize;
+      if (ctx->Point.SmoothFlag) {
+	 aa |= AA_ENABLE;
+	 lcs |= LCS_LINEWIDTH_0_5;
+      }
+      break;
+   default:
+      return;
+   }
+
+   imesa->reduced_primitive = rprim;
+
+   if (st1 != imesa->Setup[I810_CTXREG_ST1] ||
+       aa != imesa->Setup[I810_CTXREG_AA] ||
+       lcs != imesa->Setup[I810_CTXREG_LCS])
+   {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->hw_primitive = hwprim;
+      imesa->Setup[I810_CTXREG_LCS] = lcs;
+      imesa->Setup[I810_CTXREG_ST1] = st1;
+      imesa->Setup[I810_CTXREG_AA] = aa;
+   }
+   else if (hwprim != imesa->hw_primitive) {
+      I810_STATECHANGE(imesa, 0);
+      imesa->hw_primitive = hwprim;
+   }
+}
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+static char *fallbackStrings[] = {
+   "Texture",
+   "Draw buffer",
+   "Read buffer",
+   "Color mask",
+   "Render mode",
+   "Stencil",
+   "Stipple",
+   "User disable"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+void i810Fallback( i810ContextPtr imesa, GLuint bit, GLboolean mode )
+{
+   GLcontext *ctx = imesa->glCtx;
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = imesa->Fallback;
+
+   if (0) fprintf(stderr, "%s old %x bit %x mode %d\n", __FUNCTION__,
+		  imesa->Fallback, bit, mode );
+
+   if (mode) {
+      imesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 I810_FIREVERTICES(imesa);
+	 if (I810_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "ENTER FALLBACK %s\n", getFallbackString( bit ));
+	 _swsetup_Wakeup( ctx );
+	 imesa->RenderIndex = ~0;
+      }
+   }
+   else {
+      imesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 if (I810_DEBUG & DEBUG_FALLBACKS) 
+	    fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString( bit ));
+	 tnl->Driver.Render.Start = i810RenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = i810RenderPrimitive;
+	 tnl->Driver.Render.Finish = i810RenderFinish;
+	 tnl->Driver.Render.BuildVertices = i810BuildVertices;
+	 imesa->new_state |= (_I810_NEW_RENDERSTATE|_I810_NEW_VERTEX);
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+
+void i810InitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = i810RunPipeline;
+   tnl->Driver.Render.Start = i810RenderStart;
+   tnl->Driver.Render.Finish = i810RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = i810RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = i810BuildVertices;
+}
diff --git a/src/mesa/drivers/dri/i810/i810tris.h b/src/mesa/drivers/dri/i810/i810tris.h
new file mode 100644
index 0000000000..ab026be0a5
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810tris.h
@@ -0,0 +1,35 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+
+#ifndef I810TRIS_INC
+#define I810TRIS_INC
+
+#include "main/mtypes.h"
+
+extern void i810PrintRenderState( const char *msg, GLuint state );
+extern void i810InitTriFuncs( GLcontext *ctx );
+extern void i810RasterPrimitive( GLcontext *ctx, GLenum rprim, GLuint hwprim );
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/i810vb.c b/src/mesa/drivers/dri/i810/i810vb.c
new file mode 100644
index 0000000000..70301a2d2e
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810vb.c
@@ -0,0 +1,486 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+ 
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+
+#include "i810screen.h"
+#include "i810_dri.h"
+#include "i810context.h"
+#include "i810vb.h"
+#include "i810ioctl.h"
+#include "i810state.h"
+
+
+#define I810_TEX1_BIT       0x1
+#define I810_TEX0_BIT       0x2
+#define I810_RGBA_BIT       0x4
+#define I810_SPEC_BIT       0x8
+#define I810_FOG_BIT	    0x10
+#define I810_XYZW_BIT       0x20
+#define I810_PTEX_BIT       0x40
+#define I810_MAX_SETUP      0x80
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   tnl_interp_func		interp;
+   tnl_copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_format;
+} setup_tab[I810_MAX_SETUP];
+
+#define TINY_VERTEX_FORMAT (GFX_OP_VERTEX_FMT |		\
+		            VF_TEXCOORD_COUNT_0 |	\
+		            VF_RGBA_ENABLE |		\
+		            VF_XYZ)
+
+#define NOTEX_VERTEX_FORMAT (GFX_OP_VERTEX_FMT |	\
+		             VF_TEXCOORD_COUNT_0 |	\
+		             VF_SPEC_FOG_ENABLE |	\
+		             VF_RGBA_ENABLE |		\
+		             VF_XYZW)
+
+#define TEX0_VERTEX_FORMAT (GFX_OP_VERTEX_FMT |		\
+		            VF_TEXCOORD_COUNT_1 |	\
+		            VF_SPEC_FOG_ENABLE |	\
+		            VF_RGBA_ENABLE |		\
+		            VF_XYZW)
+
+#define TEX1_VERTEX_FORMAT (GFX_OP_VERTEX_FMT |		\
+		            VF_TEXCOORD_COUNT_2 |	\
+		            VF_SPEC_FOG_ENABLE |	\
+		            VF_RGBA_ENABLE |		\
+		            VF_XYZW)
+
+#define PROJ_TEX1_VERTEX_FORMAT 0
+#define TEX2_VERTEX_FORMAT      0
+#define TEX3_VERTEX_FORMAT      0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & I810_XYZW_BIT)
+#define DO_RGBA (IND & I810_RGBA_BIT)
+#define DO_SPEC (IND & I810_SPEC_BIT)
+#define DO_FOG  (IND & I810_FOG_BIT)
+#define DO_TEX0 (IND & I810_TEX0_BIT)
+#define DO_TEX1 (IND & I810_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & I810_PTEX_BIT)
+
+#define VERTEX i810Vertex
+#define VERTEX_COLOR i810_color_t
+#define GET_VIEWPORT_MAT() I810_CONTEXT(ctx)->ViewportMatrix.m
+#define GET_TEXSOURCE(n)  n
+#define GET_VERTEX_FORMAT() I810_CONTEXT(ctx)->Setup[I810_CTXREG_VF]
+#define GET_VERTEX_STORE() I810_CONTEXT(ctx)->verts
+#define GET_VERTEX_SIZE() I810_CONTEXT(ctx)->vertex_size * sizeof(GLuint)
+#define INVALIDATE_STORED_VERTICES()
+
+#define HAVE_HW_VIEWPORT    0
+#define HAVE_HW_DIVIDE      0
+#define HAVE_RGBA_COLOR     0
+#define HAVE_TINY_VERTICES  1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  0
+
+#define UNVIEWPORT_VARS  GLfloat h = I810_CONTEXT(ctx)->driDrawable->h
+#define UNVIEWPORT_X(x)  x - SUBPIXEL_X
+#define UNVIEWPORT_Y(y)  - y + h + SUBPIXEL_Y
+#define UNVIEWPORT_Z(z)  z * (float)0xffff
+
+#define PTEX_FALLBACK() FALLBACK(I810_CONTEXT(ctx), I810_FALLBACK_TEXTURE, 1)
+
+#define INTERP_VERTEX setup_tab[I810_CONTEXT(ctx)->SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[I810_CONTEXT(ctx)->SetupIndex].copy_pv
+
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#define TAG(x) i810_##x
+#include "tnl_dd/t_dd_vb.c"
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_TEX0_BIT|I810_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_TEX0_BIT|I810_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_SPEC_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_SPEC_BIT|I810_TEX0_BIT|\
+             I810_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_SPEC_BIT|I810_TEX0_BIT|\
+             I810_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT|I810_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT|I810_TEX0_BIT|\
+             I810_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT|I810_TEX0_BIT|\
+             I810_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT|I810_SPEC_BIT|\
+             I810_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT|I810_SPEC_BIT|\
+             I810_TEX0_BIT|I810_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_XYZW_BIT|I810_RGBA_BIT|I810_FOG_BIT|I810_SPEC_BIT|\
+             I810_TEX0_BIT|I810_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_TEX0_BIT|I810_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_FOG_BIT)
+#define TAG(x) x##_f
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_FOG_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_FOG_BIT|I810_TEX0_BIT|I810_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT)
+#define TAG(x) x##_g
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_TEX0_BIT|I810_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_SPEC_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_SPEC_BIT|I810_TEX0_BIT|I810_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_FOG_BIT)
+#define TAG(x) x##_gf
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_FOG_BIT|I810_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_FOG_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_FOG_BIT|I810_TEX0_BIT|I810_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_FOG_BIT|I810_SPEC_BIT|I810_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (I810_RGBA_BIT|I810_FOG_BIT|I810_SPEC_BIT|I810_TEX0_BIT|\
+             I810_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgs();
+   init_wgt0();
+   init_wgt0t1();
+   init_wgpt0();
+   init_wgst0();
+   init_wgst0t1();
+   init_wgspt0();
+   init_wgf();
+   init_wgfs();
+   init_wgft0();
+   init_wgft0t1();
+   init_wgfpt0();
+   init_wgfst0();
+   init_wgfst0t1();
+   init_wgfspt0();
+   init_t0();
+   init_t0t1();
+   init_f();
+   init_ft0();
+   init_ft0t1();
+   init_g();
+   init_gs();
+   init_gt0();
+   init_gt0t1();
+   init_gst0();
+   init_gst0t1();
+   init_gf();
+   init_gfs();
+   init_gft0();
+   init_gft0t1();
+   init_gfst0();
+   init_gfst0t1();
+}
+
+
+
+static void i810PrintSetupFlags(const char *msg, GLuint flags )
+{
+   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
+	   msg,
+	   (int)flags,
+	   (flags & I810_XYZW_BIT)      ? " xyzw," : "",
+	   (flags & I810_RGBA_BIT)     ? " rgba," : "",
+	   (flags & I810_SPEC_BIT)     ? " spec," : "",
+	   (flags & I810_FOG_BIT)      ? " fog," : "",
+	   (flags & I810_TEX0_BIT)     ? " tex-0," : "",
+	   (flags & I810_TEX1_BIT)     ? " tex-1," : "");
+}
+
+
+
+void i810CheckTexSizes( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+
+   if (!setup_tab[imesa->SetupIndex].check_tex_sizes(ctx)) {
+      /* Invalidate stored verts
+       */
+      imesa->SetupNewInputs = ~0;
+      imesa->SetupIndex |= I810_PTEX_BIT;
+
+      if (!imesa->Fallback &&
+	  !(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[imesa->SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[imesa->SetupIndex].copy_pv;
+      }
+      if (imesa->Fallback) {
+         tnl->Driver.Render.Start(ctx);
+      }
+   }
+}
+
+void i810BuildVertices( GLcontext *ctx,
+			GLuint start,
+			GLuint count,
+			GLuint newinputs )
+{
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   GLuint stride = imesa->vertex_size * sizeof(int);
+   GLubyte *v = ((GLubyte *)imesa->verts + (start * stride));
+
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);
+
+   newinputs |= imesa->SetupNewInputs;
+   imesa->SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   if (newinputs & VERT_BIT_POS) {
+      setup_tab[imesa->SetupIndex].emit( ctx, start, count, v, stride );
+   } else {
+      GLuint ind = 0;
+
+      if (newinputs & VERT_BIT_COLOR0)
+	 ind |= I810_RGBA_BIT;
+
+      if (newinputs & VERT_BIT_COLOR1)
+	 ind |= I810_SPEC_BIT;
+
+      if (newinputs & VERT_BIT_TEX0)
+	 ind |= I810_TEX0_BIT;
+
+      if (newinputs & VERT_BIT_TEX1)
+	 ind |= I810_TEX1_BIT;
+
+      if (newinputs & VERT_BIT_FOG)
+	 ind |= I810_FOG_BIT;
+
+      if (imesa->SetupIndex & I810_PTEX_BIT)
+	 ind = ~0;
+
+      ind &= imesa->SetupIndex;
+
+      if (ind) {
+	 setup_tab[ind].emit( ctx, start, count, v, stride );
+      }
+   }
+}
+
+void i810ChooseVertexState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   i810ContextPtr imesa = I810_CONTEXT( ctx );
+   GLuint ind = I810_XYZW_BIT|I810_RGBA_BIT;
+
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+      ind |= I810_SPEC_BIT;
+
+   if (ctx->Fog.Enabled)
+      ind |= I810_FOG_BIT;
+
+   if (ctx->Texture._EnabledUnits & 0x2)
+      /* unit 1 enabled */
+      ind |= I810_TEX1_BIT|I810_TEX0_BIT;
+   else if (ctx->Texture._EnabledUnits & 0x1)
+      /* unit 0 enabled */
+      ind |= I810_TEX0_BIT;
+
+   imesa->SetupIndex = ind;
+
+   if (I810_DEBUG & (DEBUG_VERTS|DEBUG_STATE))
+      i810PrintSetupFlags( __FUNCTION__, ind );
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = i810_interp_extras;
+      tnl->Driver.Render.CopyPV = i810_copy_pv_extras;
+   } else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+   if (setup_tab[ind].vertex_format != imesa->Setup[I810_CTXREG_VF]) {
+      I810_STATECHANGE(imesa, I810_UPLOAD_CTX);
+      imesa->Setup[I810_CTXREG_VF] = setup_tab[ind].vertex_format;
+      imesa->vertex_size = setup_tab[ind].vertex_size;
+   }
+}
+
+
+
+void *i810_emit_contiguous_verts( GLcontext *ctx,
+				  GLuint start,
+				  GLuint count,
+				  void *dest )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint stride = imesa->vertex_size * 4;
+   setup_tab[imesa->SetupIndex].emit( ctx, start, count, dest, stride );
+   return (void *)((char *)dest + stride * (count - start));
+}
+
+
+
+void i810InitVB( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+
+   imesa->verts = (GLubyte *)_mesa_align_malloc(size * 4 * 16, 32);
+
+   {
+      static int firsttime = 1;
+      if (firsttime) {
+	 init_setup_tab();
+	 firsttime = 0;
+      }
+   }
+}
+
+
+void i810FreeVB( GLcontext *ctx )
+{
+   i810ContextPtr imesa = I810_CONTEXT(ctx);
+   if (imesa->verts) {
+      _mesa_align_free(imesa->verts);
+      imesa->verts = 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/i810/i810vb.h b/src/mesa/drivers/dri/i810/i810vb.h
new file mode 100644
index 0000000000..1f704e4569
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/i810vb.h
@@ -0,0 +1,59 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+
+#ifndef I810VB_INC
+#define I810VB_INC
+
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+
+#define _I810_NEW_VERTEX (_NEW_TEXTURE |			\
+			  _DD_NEW_SEPARATE_SPECULAR |		\
+			  _DD_NEW_TRI_UNFILLED |		\
+			  _DD_NEW_TRI_LIGHT_TWOSIDE |		\
+			  _NEW_FOG)
+
+
+extern void i810ChooseVertexState( GLcontext *ctx );
+extern void i810CheckTexSizes( GLcontext *ctx );
+extern void i810BuildVertices( GLcontext *ctx,
+			       GLuint start,
+			       GLuint count,
+			       GLuint newinputs );
+
+
+extern void *i810_emit_contiguous_verts( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count,
+					 void *dest );
+
+extern void i810_translate_vertex( GLcontext *ctx,
+				   const i810Vertex *src,
+				   SWvertex *dst );
+
+extern void i810InitVB( GLcontext *ctx );
+extern void i810FreeVB( GLcontext *ctx );
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/server/i810_common.h b/src/mesa/drivers/dri/i810/server/i810_common.h
new file mode 100644
index 0000000000..29be444b45
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/server/i810_common.h
@@ -0,0 +1,191 @@
+/* i810_common.h -- common header definitions for I810 2D/3D/DRM suite
+ *
+ * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Converted to common header format:
+ *   Jens Owen <jens@tungstengraphics.com>
+ *
+ *
+ */
+
+/* WARNING: If you change any of these defines, make sure to change
+ * the kernel include file as well (i810_drm.h)
+ */
+
+#ifndef _I810_COMMON_H_
+#define _I810_COMMON_H_
+
+#ifndef _I810_DEFINES_
+#define _I810_DEFINES_
+#define I810_USE_BATCH 1
+
+#define I810_DMA_BUF_ORDER     12
+#define I810_DMA_BUF_SZ        (1<<I810_DMA_BUF_ORDER)
+#define I810_DMA_BUF_NR        256
+
+#define I810_NR_SAREA_CLIPRECTS 8
+
+/* Each region is a minimum of 64k, and there are at most 64 of them.
+ */
+#define I810_NR_TEX_REGIONS 64
+#define I810_LOG_MIN_TEX_REGION_SIZE 16
+
+/* Destbuffer state
+ *    - backbuffer linear offset and pitch -- invarient in the current dri
+ *    - zbuffer linear offset and pitch -- also invarient
+ *    - drawing origin in back and depth buffers.
+ *
+ * Keep the depth/back buffer state here to acommodate private buffers
+ * in the future.
+ */
+#define I810_DESTREG_DI0  0		/* CMD_OP_DESTBUFFER_INFO (2 dwords) */
+#define I810_DESTREG_DI1  1
+#define I810_DESTREG_DV0  2		/* GFX_OP_DESTBUFFER_VARS (2 dwords) */
+#define I810_DESTREG_DV1  3
+#define I810_DESTREG_DR0  4		/* GFX_OP_DRAWRECT_INFO (4 dwords) */
+#define I810_DESTREG_DR1  5
+#define I810_DESTREG_DR2  6
+#define I810_DESTREG_DR3  7
+#define I810_DESTREG_DR4  8
+#define I810_DEST_SETUP_SIZE 10
+
+/* Context state
+ */
+#define I810_CTXREG_CF0   0		/* GFX_OP_COLOR_FACTOR */
+#define I810_CTXREG_CF1   1
+#define I810_CTXREG_ST0   2		/* GFX_OP_STIPPLE */
+#define I810_CTXREG_ST1   3
+#define I810_CTXREG_VF    4		/* GFX_OP_VERTEX_FMT */
+#define I810_CTXREG_MT    5		/* GFX_OP_MAP_TEXELS */
+#define I810_CTXREG_MC0   6		/* GFX_OP_MAP_COLOR_STAGES - stage 0 */
+#define I810_CTXREG_MC1   7		/* GFX_OP_MAP_COLOR_STAGES - stage 1 */
+#define I810_CTXREG_MC2   8		/* GFX_OP_MAP_COLOR_STAGES - stage 2 */
+#define I810_CTXREG_MA0   9		/* GFX_OP_MAP_ALPHA_STAGES - stage 0 */
+#define I810_CTXREG_MA1   10		/* GFX_OP_MAP_ALPHA_STAGES - stage 1 */
+#define I810_CTXREG_MA2   11		/* GFX_OP_MAP_ALPHA_STAGES - stage 2 */
+#define I810_CTXREG_SDM   12		/* GFX_OP_SRC_DEST_MONO */
+#define I810_CTXREG_FOG   13		/* GFX_OP_FOG_COLOR */
+#define I810_CTXREG_B1    14		/* GFX_OP_BOOL_1 */
+#define I810_CTXREG_B2    15		/* GFX_OP_BOOL_2 */
+#define I810_CTXREG_LCS   16		/* GFX_OP_LINEWIDTH_CULL_SHADE_MODE */
+#define I810_CTXREG_PV    17		/* GFX_OP_PV_RULE -- Invarient! */
+#define I810_CTXREG_ZA    18		/* GFX_OP_ZBIAS_ALPHAFUNC */
+#define I810_CTXREG_AA    19		/* GFX_OP_ANTIALIAS */
+#define I810_CTX_SETUP_SIZE 20
+
+/* Texture state (per tex unit)
+ */
+#define I810_TEXREG_MI0  0		/* GFX_OP_MAP_INFO (4 dwords) */
+#define I810_TEXREG_MI1  1
+#define I810_TEXREG_MI2  2
+#define I810_TEXREG_MI3  3
+#define I810_TEXREG_MF   4		/* GFX_OP_MAP_FILTER */
+#define I810_TEXREG_MLC  5		/* GFX_OP_MAP_LOD_CTL */
+#define I810_TEXREG_MLL  6		/* GFX_OP_MAP_LOD_LIMITS */
+#define I810_TEXREG_MCS  7		/* GFX_OP_MAP_COORD_SETS ??? */
+#define I810_TEX_SETUP_SIZE 8
+
+/* Driver specific DRM command indices
+ * NOTE: these are not OS specific, but they are driver specific
+ */
+#define DRM_I810_INIT                     0x00
+#define DRM_I810_VERTEX                   0x01
+#define DRM_I810_CLEAR                    0x02
+#define DRM_I810_FLUSH                    0x03
+#define DRM_I810_GETAGE                   0x04
+#define DRM_I810_GETBUF                   0x05
+#define DRM_I810_SWAP                     0x06
+#define DRM_I810_COPY                     0x07
+#define DRM_I810_DOCOPY                   0x08
+#define DRM_I810_OV0INFO                  0x09
+#define DRM_I810_FSTATUS                  0x0a
+#define DRM_I810_OV0FLIP                  0x0b
+#define DRM_I810_MC                       0x0c
+#define DRM_I810_RSTATUS                  0x0d
+#define DRM_I810_FLIP                     0x0e
+
+#endif
+
+typedef enum _drmI810Initfunc {
+	I810_INIT_DMA = 0x01,
+	I810_CLEANUP_DMA = 0x02,
+	I810_INIT_DMA_1_4 = 0x03
+} drmI810Initfunc;
+
+typedef struct {
+   drmI810Initfunc func;
+   unsigned int mmio_offset;
+   unsigned int buffers_offset;
+   int sarea_priv_offset;
+   unsigned int ring_start;
+   unsigned int ring_end;
+   unsigned int ring_size;
+   unsigned int front_offset;
+   unsigned int back_offset;
+   unsigned int depth_offset;
+   unsigned int overlay_offset;
+   unsigned int overlay_physical;
+   unsigned int w;
+   unsigned int h;
+   unsigned int pitch;
+   unsigned int pitch_bits;
+} drmI810Init;
+
+typedef struct {
+   void *virtual;
+   int request_idx;
+   int request_size;
+   int granted;
+} drmI810DMA;
+
+/* Flags for clear ioctl
+ */
+#define I810_FRONT   0x1
+#define I810_BACK    0x2
+#define I810_DEPTH   0x4
+
+typedef struct {
+   int clear_color;
+   int clear_depth;
+   int flags;
+} drmI810Clear;
+
+typedef struct {
+   int idx;				/* buffer index */
+   int used;				/* nr bytes in use */
+   int discard;				/* client is finished with the buffer? */
+} drmI810Vertex;
+
+/* Flags for vertex ioctl
+ */
+#define PR_TRIANGLES         (0x0<<18)
+#define PR_TRISTRIP_0        (0x1<<18)
+#define PR_TRISTRIP_1        (0x2<<18)
+#define PR_TRIFAN            (0x3<<18)
+#define PR_POLYGON           (0x4<<18)
+#define PR_LINES             (0x5<<18)
+#define PR_LINESTRIP         (0x6<<18)
+#define PR_RECTS             (0x7<<18)
+#define PR_MASK              (0x7<<18)
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/server/i810_dri.h b/src/mesa/drivers/dri/i810/server/i810_dri.h
new file mode 100644
index 0000000000..4a714f0306
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/server/i810_dri.h
@@ -0,0 +1,127 @@
+
+#ifndef _I810_DRI_
+#define _I810_DRI_
+
+#include "xf86drm.h"
+#include "i810_common.h"
+
+#define I810_MAX_DRAWABLES 256
+
+typedef struct {
+   drm_handle_t regs;
+   drmSize regsSize;
+
+   drmSize backbufferSize;
+   drm_handle_t backbuffer;
+
+   drmSize depthbufferSize;
+   drm_handle_t depthbuffer;
+
+   drm_handle_t textures;
+   int textureSize;
+
+   drm_handle_t agp_buffers;
+   drmSize agp_buf_size;
+
+   int deviceID;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+   int bitsPerPixel;
+   int fbOffset;
+   int fbStride;
+
+   int backOffset;
+   int depthOffset;
+
+   int auxPitch;
+   int auxPitchBits;
+
+   int logTextureGranularity;
+   int textureOffset;
+
+   /* For non-dma direct rendering.
+    */
+   int ringOffset;
+   int ringSize;
+
+   drmBufMapPtr drmBufs;
+   int irq;
+   unsigned int sarea_priv_offset;
+
+} I810DRIRec, *I810DRIPtr;
+
+/* WARNING: Do not change the SAREA structure without changing the kernel
+ * as well */
+
+#define I810_UPLOAD_TEX0IMAGE  0x1	/* handled clientside */
+#define I810_UPLOAD_TEX1IMAGE  0x2	/* handled clientside */
+#define I810_UPLOAD_CTX        0x4
+#define I810_UPLOAD_BUFFERS    0x8
+#define I810_UPLOAD_TEX0       0x10
+#define I810_UPLOAD_TEX1       0x20
+#define I810_UPLOAD_CLIPRECTS  0x40
+
+typedef struct {
+   unsigned char next, prev;		/* indices to form a circular LRU  */
+   unsigned char in_use;		/* owned by a client, or free? */
+   int age;				/* tracked by clients to update local LRU's */
+} I810TexRegionRec, *I810TexRegionPtr;
+
+typedef struct {
+   unsigned int ContextState[I810_CTX_SETUP_SIZE];
+   unsigned int BufferState[I810_DEST_SETUP_SIZE];
+   unsigned int TexState[2][I810_TEX_SETUP_SIZE];
+   unsigned int dirty;
+
+   unsigned int nbox;
+   drm_clip_rect_t boxes[I810_NR_SAREA_CLIPRECTS];
+
+   /* Maintain an LRU of contiguous regions of texture space.  If
+    * you think you own a region of texture memory, and it has an
+    * age different to the one you set, then you are mistaken and
+    * it has been stolen by another client.  If global texAge
+    * hasn't changed, there is no need to walk the list.
+    *
+    * These regions can be used as a proxy for the fine-grained
+    * texture information of other clients - by maintaining them
+    * in the same lru which is used to age their own textures,
+    * clients have an approximate lru for the whole of global
+    * texture space, and can make informed decisions as to which
+    * areas to kick out.  There is no need to choose whether to
+    * kick out your own texture or someone else's - simply eject
+    * them all in LRU order.
+    */
+ 
+   drmTextureRegion texList[I810_NR_TEX_REGIONS + 1];
+
+   /* Last elt is sentinal */
+   int texAge;				/* last time texture was uploaded */
+
+   int last_enqueue;			/* last time a buffer was enqueued */
+   int last_dispatch;			/* age of the most recently dispatched buffer */
+   int last_quiescent;			/*  */
+
+   int ctxOwner;			/* last context to upload state */
+
+   int vertex_prim;
+
+   int pf_enabled;                  /* is pageflipping allowed? */
+   int pf_active;                   /* is pageflipping active right now? */
+   int pf_current_page; 	    /* which buffer is being displayed? */
+
+
+} I810SAREARec, *I810SAREAPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I810ConfigPrivRec, *I810ConfigPrivPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I810DRIContextRec, *I810DRIContextPtr;
+
+#endif
diff --git a/src/mesa/drivers/dri/i810/server/i810_reg.h b/src/mesa/drivers/dri/i810/server/i810_reg.h
new file mode 100644
index 0000000000..e7e5081038
--- /dev/null
+++ b/src/mesa/drivers/dri/i810/server/i810_reg.h
@@ -0,0 +1,991 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ *   based on the i740 driver by
+ *        Kevin E. Martin <kevin@precisioninsight.com> 
+ *   
+ *
+ */
+
+#ifndef _I810_REG_H
+#define _I810_REG_H
+
+/* I/O register offsets
+ */
+#define SRX 0x3C4		/* p208 */
+#define GRX 0x3CE		/* p213 */
+#define ARX 0x3C0		/* p224 */
+
+/* VGA Color Palette Registers */
+#define DACMASK  0x3C6		/* p232 */
+#define DACSTATE 0x3C7		/* p232 */
+#define DACRX    0x3C7		/* p233 */
+#define DACWX    0x3C8		/* p233 */
+#define DACDATA  0x3C9		/* p233 */
+
+/* CRT Controller Registers (CRX) */
+#define START_ADDR_HI        0x0C /* p246 */
+#define START_ADDR_LO        0x0D /* p247 */
+#define VERT_SYNC_END        0x11 /* p249 */
+#define EXT_VERT_TOTAL       0x30 /* p257 */
+#define EXT_VERT_DISPLAY     0x31 /* p258 */
+#define EXT_VERT_SYNC_START  0x32 /* p259 */
+#define EXT_VERT_BLANK_START 0x33 /* p260 */
+#define EXT_HORIZ_TOTAL      0x35 /* p261 */
+#define EXT_HORIZ_BLANK      0x39 /* p261 */
+#define EXT_START_ADDR       0x40 /* p262 */
+#define EXT_START_ADDR_ENABLE    0x80 
+#define EXT_OFFSET           0x41 /* p263 */
+#define EXT_START_ADDR_HI    0x42 /* p263 */
+#define INTERLACE_CNTL       0x70 /* p264 */
+#define INTERLACE_ENABLE         0x80 
+#define INTERLACE_DISABLE        0x00 
+
+/* Miscellaneous Output Register 
+ */
+#define MSR_R          0x3CC	/* p207 */
+#define MSR_W          0x3C2	/* p207 */
+#define IO_ADDR_SELECT     0x01
+
+#define MDA_BASE       0x3B0	/* p207 */
+#define CGA_BASE       0x3D0	/* p207 */
+
+/* CR80 - IO Control, p264
+ */
+#define IO_CTNL            0x80
+#define EXTENDED_ATTR_CNTL     0x02
+#define EXTENDED_CRTC_CNTL     0x01
+
+/* GR10 - Address mapping, p221
+ */
+#define ADDRESS_MAPPING    0x10
+#define PAGE_TO_LOCAL_MEM_ENABLE 0x10
+#define GTT_MEM_MAP_ENABLE     0x08
+#define PACKED_MODE_ENABLE     0x04
+#define LINEAR_MODE_ENABLE     0x02
+#define PAGE_MAPPING_ENABLE    0x01
+
+/* Blitter control, p378
+ */
+#define BITBLT_CNTL        0x7000c
+#define COLEXP_MODE            0x30
+#define COLEXP_8BPP            0x00
+#define COLEXP_16BPP           0x10
+#define COLEXP_24BPP           0x20
+#define COLEXP_RESERVED        0x30
+#define BITBLT_STATUS          0x01
+
+/* p375. 
+ */
+#define DISPLAY_CNTL       0x70008
+#define VGA_WRAP_MODE          0x02
+#define VGA_WRAP_AT_256KB      0x00
+#define VGA_NO_WRAP            0x02
+#define GUI_MODE               0x01
+#define STANDARD_VGA_MODE      0x00
+#define HIRES_MODE             0x01
+
+/* p375
+ */
+#define PIXPIPE_CONFIG_0   0x70009
+#define DAC_8_BIT              0x80
+#define DAC_6_BIT              0x00
+#define HW_CURSOR_ENABLE       0x10
+#define EXTENDED_PALETTE       0x01
+
+/* p375
+ */
+#define PIXPIPE_CONFIG_1   0x7000a
+#define DISPLAY_COLOR_MODE     0x0F
+#define DISPLAY_VGA_MODE       0x00
+#define DISPLAY_8BPP_MODE      0x02
+#define DISPLAY_15BPP_MODE     0x04
+#define DISPLAY_16BPP_MODE     0x05
+#define DISPLAY_24BPP_MODE     0x06
+#define DISPLAY_32BPP_MODE     0x07
+
+/* p375
+ */
+#define PIXPIPE_CONFIG_2   0x7000b
+#define DISPLAY_GAMMA_ENABLE   0x08
+#define DISPLAY_GAMMA_DISABLE  0x00
+#define OVERLAY_GAMMA_ENABLE   0x04
+#define OVERLAY_GAMMA_DISABLE  0x00
+
+
+/* p380
+ */
+#define DISPLAY_BASE       0x70020
+#define DISPLAY_BASE_MASK  0x03fffffc
+
+
+/* Cursor control registers, pp383-384
+ */
+/* Desktop (845G, 865G) */
+#define CURSOR_CONTROL     0x70080
+#define CURSOR_ENABLE          0x80000000
+#define CURSOR_GAMMA_ENABLE    0x40000000
+#define CURSOR_STRIDE_MASK     0x30000000
+#define CURSOR_FORMAT_SHIFT    24
+#define CURSOR_FORMAT_MASK     (0x07 << CURSOR_FORMAT_SHIFT)
+#define CURSOR_FORMAT_2C       (0x00 << CURSOR_FORMAT_SHIFT)
+#define CURSOR_FORMAT_3C       (0x01 << CURSOR_FORMAT_SHIFT)
+#define CURSOR_FORMAT_4C       (0x02 << CURSOR_FORMAT_SHIFT)
+#define CURSOR_FORMAT_ARGB     (0x04 << CURSOR_FORMAT_SHIFT)
+#define CURSOR_FORMAT_XRGB     (0x05 << CURSOR_FORMAT_SHIFT)
+
+/* Mobile and i810 */
+#define CURSOR_A_CONTROL   CURSOR_CONTROL
+#define CURSOR_ORIGIN_SCREEN   0x00	/* i810 only */
+#define CURSOR_ORIGIN_DISPLAY  0x1	/* i810 only */
+#define CURSOR_MODE            0x27
+#define CURSOR_MODE_DISABLE    0x00
+#define CURSOR_MODE_32_4C_AX   0x01	/* i810 only */
+#define CURSOR_MODE_64_3C      0x04
+#define CURSOR_MODE_64_4C_AX   0x05
+#define CURSOR_MODE_64_4C      0x06
+#define CURSOR_MODE_64_32B_AX  0x07
+#define CURSOR_MODE_64_ARGB_AX (0x20 | CURSOR_MODE_64_32B_AX)
+#define MCURSOR_PIPE_SELECT    (1 << 28)
+#define MCURSOR_PIPE_A         0x00
+#define MCURSOR_PIPE_B         (1 << 28)
+#define MCURSOR_GAMMA_ENABLE   (1 << 26)
+#define MCURSOR_MEM_TYPE_LOCAL (1 << 25)
+
+
+#define CURSOR_BASEADDR    0x70084
+#define CURSOR_A_BASE      CURSOR_BASEADDR
+#define CURSOR_BASEADDR_MASK 0x1FFFFF00
+#define CURSOR_A_POSITION  0x70088
+#define CURSOR_POS_SIGN        0x8000
+#define CURSOR_POS_MASK        0x007FF
+#define CURSOR_X_SHIFT	       0
+#define CURSOR_Y_SHIFT         16
+#define CURSOR_X_LO        0x70088
+#define CURSOR_X_HI        0x70089
+#define CURSOR_X_POS           0x00
+#define CURSOR_X_NEG           0x80
+#define CURSOR_Y_LO        0x7008A
+#define CURSOR_Y_HI        0x7008B
+#define CURSOR_Y_POS           0x00
+#define CURSOR_Y_NEG           0x80
+
+#define CURSOR_A_PALETTE0  0x70090
+#define CURSOR_A_PALETTE1  0x70094
+#define CURSOR_A_PALETTE2  0x70098
+#define CURSOR_A_PALETTE3  0x7009C
+
+#define CURSOR_SIZE	   0x700A0
+#define CURSOR_SIZE_MASK       0x3FF
+#define CURSOR_SIZE_HSHIFT     0
+#define CURSOR_SIZE_VSHIFT     12
+
+
+/* Similar registers exist in Device 0 on the i810 (pp55-65), but I'm
+ * not sure they refer to local (graphics) memory.
+ *
+ * These details are for the local memory control registers,
+ * (pp301-310).  The test machines are not equiped with local memory,
+ * so nothing is tested.  Only a single row seems to be supported.
+ */
+#define DRAM_ROW_TYPE      0x3000
+#define DRAM_ROW_0             0x01
+#define DRAM_ROW_0_SDRAM       0x01
+#define DRAM_ROW_0_EMPTY       0x00
+#define DRAM_ROW_CNTL_LO   0x3001
+#define DRAM_PAGE_MODE_CTRL    0x10
+#define DRAM_RAS_TO_CAS_OVRIDE 0x08
+#define DRAM_CAS_LATENCY       0x04
+#define DRAM_RAS_TIMING        0x02
+#define DRAM_RAS_PRECHARGE     0x01
+#define DRAM_ROW_CNTL_HI   0x3002
+#define DRAM_REFRESH_RATE      0x18
+#define DRAM_REFRESH_DISABLE   0x00
+#define DRAM_REFRESH_60HZ      0x08
+#define DRAM_REFRESH_FAST_TEST 0x10
+#define DRAM_REFRESH_RESERVED  0x18
+#define DRAM_SMS               0x07
+#define DRAM_SMS_NORMAL        0x00
+#define DRAM_SMS_NOP_ENABLE    0x01
+#define DRAM_SMS_ABPCE         0x02
+#define DRAM_SMS_MRCE          0x03
+#define DRAM_SMS_CBRCE         0x04
+
+/* p307
+ */
+#define DPMS_SYNC_SELECT   0x5002
+#define VSYNC_CNTL             0x08
+#define VSYNC_ON               0x00
+#define VSYNC_OFF              0x08
+#define HSYNC_CNTL             0x02
+#define HSYNC_ON               0x00
+#define HSYNC_OFF              0x02
+
+
+
+/* p317, 319
+ */
+#define VCLK2_VCO_M        0x6008 /* treat as 16 bit? (includes msbs) */
+#define VCLK2_VCO_N        0x600a
+#define VCLK2_VCO_DIV_SEL  0x6012
+
+#define VCLK_DIVISOR_VGA0   0x6000
+#define VCLK_DIVISOR_VGA1   0x6004
+#define VCLK_POST_DIV	    0x6010
+
+#define POST_DIV_SELECT        0x70
+#define POST_DIV_1             0x00
+#define POST_DIV_2             0x10
+#define POST_DIV_4             0x20
+#define POST_DIV_8             0x30
+#define POST_DIV_16            0x40
+#define POST_DIV_32            0x50
+#define VCO_LOOP_DIV_BY_4M     0x00
+#define VCO_LOOP_DIV_BY_16M    0x04
+
+
+/* Instruction Parser Mode Register 
+ *    - p281
+ *    - 2 new bits.
+ */
+#define INST_PM                  0x20c0	
+#define AGP_SYNC_PACKET_FLUSH_ENABLE 0x20 /* reserved */
+#define SYNC_PACKET_FLUSH_ENABLE     0x10
+#define TWO_D_INST_DISABLE           0x08
+#define THREE_D_INST_DISABLE         0x04
+#define STATE_VAR_UPDATE_DISABLE     0x02
+#define PAL_STIP_DISABLE             0x01
+
+#define INST_DONE                0x2090
+#define INST_PS                  0x20c4
+
+#define MEMMODE                  0x20dc
+
+
+/* Instruction parser error register.  p279
+ */
+#define IPEIR                  0x2088
+#define IPEHR                  0x208C
+
+
+/* General error reporting regs, p296
+ */
+#define EIR               0x20B0
+#define EMR               0x20B4
+#define ESR               0x20B8
+#define IP_ERR                    0x0001
+#define ERROR_RESERVED            0xffc6
+
+
+/* Interrupt Control Registers 
+ *   - new bits for i810
+ *   - new register hwstam (mask)
+ */
+#define HWSTAM               0x2098 /* p290 */
+#define IER                  0x20a0 /* p291 */
+#define IIR                  0x20a4 /* p292 */
+#define IMR                  0x20a8 /* p293 */
+#define ISR                  0x20ac /* p294 */
+#define HW_ERROR                 0x8000
+#define SYNC_STATUS_TOGGLE       0x1000
+#define DPY_0_FLIP_PENDING       0x0800
+#define DPY_1_FLIP_PENDING       0x0400	/* not implemented on i810 */
+#define OVL_0_FLIP_PENDING       0x0200
+#define OVL_1_FLIP_PENDING       0x0100	/* not implemented on i810 */
+#define DPY_0_VBLANK             0x0080
+#define DPY_0_EVENT              0x0040
+#define DPY_1_VBLANK             0x0020	/* not implemented on i810 */
+#define DPY_1_EVENT              0x0010	/* not implemented on i810 */
+#define HOST_PORT_EVENT          0x0008	/*  */
+#define CAPTURE_EVENT            0x0004	/*  */
+#define USER_DEFINED             0x0002
+#define BREAKPOINT               0x0001
+
+
+#define INTR_RESERVED            (0x6000 | 		\
+				  DPY_1_FLIP_PENDING |	\
+				  OVL_1_FLIP_PENDING |	\
+				  DPY_1_VBLANK |	\
+				  DPY_1_EVENT |		\
+				  HOST_PORT_EVENT |	\
+				  CAPTURE_EVENT )
+
+/* FIFO Watermark and Burst Length Control Register 
+ *
+ * - different offset and contents on i810 (p299) (fewer bits per field)
+ * - some overlay fields added
+ * - what does it all mean?
+ */
+#define FWATER_BLC       0x20d8
+#define FWATER_BLC2	 0x20dc
+#define MM_BURST_LENGTH     0x00700000
+#define MM_FIFO_WATERMARK   0x0001F000
+#define LM_BURST_LENGTH     0x00000700
+#define LM_FIFO_WATERMARK   0x0000001F
+
+
+/* Fence/Tiling ranges [0..7]
+ */
+#define FENCE            0x2000
+#define FENCE_NR         8
+
+#define I830_FENCE_START_MASK	0x07f80000
+
+#define FENCE_START_MASK    0x03F80000
+#define FENCE_X_MAJOR       0x00000000
+#define FENCE_Y_MAJOR       0x00001000
+#define FENCE_SIZE_MASK     0x00000700
+#define FENCE_SIZE_512K     0x00000000
+#define FENCE_SIZE_1M       0x00000100
+#define FENCE_SIZE_2M       0x00000200
+#define FENCE_SIZE_4M       0x00000300
+#define FENCE_SIZE_8M       0x00000400
+#define FENCE_SIZE_16M      0x00000500
+#define FENCE_SIZE_32M      0x00000600
+#define FENCE_SIZE_64M	    0x00000700
+#define FENCE_PITCH_MASK    0x00000070
+#define FENCE_PITCH_1       0x00000000
+#define FENCE_PITCH_2       0x00000010
+#define FENCE_PITCH_4       0x00000020
+#define FENCE_PITCH_8       0x00000030
+#define FENCE_PITCH_16      0x00000040
+#define FENCE_PITCH_32      0x00000050
+#define FENCE_PITCH_64	    0x00000060
+#define FENCE_VALID         0x00000001
+
+
+/* Registers to control page table, p274
+ */
+#define PGETBL_CTL       0x2020
+#define PGETBL_ADDR_MASK    0xFFFFF000
+#define PGETBL_ENABLE_MASK  0x00000001
+#define PGETBL_ENABLED      0x00000001
+
+/* Register containing pge table error results, p276
+ */
+#define PGE_ERR          0x2024
+#define PGE_ERR_ADDR_MASK   0xFFFFF000
+#define PGE_ERR_ID_MASK     0x00000038
+#define PGE_ERR_CAPTURE     0x00000000
+#define PGE_ERR_OVERLAY     0x00000008
+#define PGE_ERR_DISPLAY     0x00000010
+#define PGE_ERR_HOST        0x00000018
+#define PGE_ERR_RENDER      0x00000020
+#define PGE_ERR_BLITTER     0x00000028
+#define PGE_ERR_MAPPING     0x00000030
+#define PGE_ERR_CMD_PARSER  0x00000038
+#define PGE_ERR_TYPE_MASK   0x00000007
+#define PGE_ERR_INV_TABLE   0x00000000
+#define PGE_ERR_INV_PTE     0x00000001
+#define PGE_ERR_MIXED_TYPES 0x00000002
+#define PGE_ERR_PAGE_MISS   0x00000003
+#define PGE_ERR_ILLEGAL_TRX 0x00000004
+#define PGE_ERR_LOCAL_MEM   0x00000005
+#define PGE_ERR_TILED       0x00000006
+
+
+
+/* Page table entries loaded via mmio region, p323
+ */
+#define PTE_BASE         0x10000
+#define PTE_ADDR_MASK       0x3FFFF000
+#define PTE_TYPE_MASK       0x00000006
+#define PTE_LOCAL           0x00000002
+#define PTE_MAIN_UNCACHED   0x00000000
+#define PTE_MAIN_CACHED     0x00000006
+#define PTE_VALID_MASK      0x00000001
+#define PTE_VALID           0x00000001
+
+
+/* Ring buffer registers, p277, overview p19
+ */
+#define LP_RING     0x2030
+#define HP_RING     0x2040
+
+#define RING_TAIL      0x00
+#define TAIL_ADDR           0x000FFFF8
+#define I830_TAIL_MASK	    0x001FFFF8
+
+#define RING_HEAD      0x04
+#define HEAD_WRAP_COUNT     0xFFE00000
+#define HEAD_WRAP_ONE       0x00200000
+#define HEAD_ADDR           0x001FFFFC
+#define I830_HEAD_MASK      0x001FFFFC
+
+#define RING_START     0x08
+#define START_ADDR          0x00FFFFF8
+#define I830_RING_START_MASK	0xFFFFF000
+
+#define RING_LEN       0x0C
+#define RING_NR_PAGES       0x000FF000 
+#define I830_RING_NR_PAGES	0x001FF000
+#define RING_REPORT_MASK    0x00000006
+#define RING_REPORT_64K     0x00000002
+#define RING_REPORT_128K    0x00000004
+#define RING_NO_REPORT      0x00000000
+#define RING_VALID_MASK     0x00000001
+#define RING_VALID          0x00000001
+#define RING_INVALID        0x00000000
+
+
+
+/* BitBlt Instructions
+ *
+ * There are many more masks & ranges yet to add.
+ */
+#define BR00_BITBLT_CLIENT   0x40000000
+#define BR00_OP_COLOR_BLT    0x10000000
+#define BR00_OP_SRC_COPY_BLT 0x10C00000
+#define BR00_OP_FULL_BLT     0x11400000
+#define BR00_OP_MONO_SRC_BLT 0x11800000
+#define BR00_OP_MONO_SRC_COPY_BLT 0x11000000
+#define BR00_OP_MONO_PAT_BLT 0x11C00000
+#define BR00_OP_MONO_SRC_COPY_IMMEDIATE_BLT (0x61 << 22)
+#define BR00_OP_TEXT_IMMEDIATE_BLT 0xc000000
+
+
+#define BR00_TPCY_DISABLE    0x00000000
+#define BR00_TPCY_ENABLE     0x00000010
+
+#define BR00_TPCY_ROP        0x00000000
+#define BR00_TPCY_NO_ROP     0x00000020
+#define BR00_TPCY_EQ         0x00000000
+#define BR00_TPCY_NOT_EQ     0x00000040
+
+#define BR00_PAT_MSB_FIRST   0x00000000	/* ? */
+
+#define BR00_PAT_VERT_ALIGN  0x000000e0
+
+#define BR00_LENGTH          0x0000000F
+
+#define BR09_DEST_ADDR       0x03FFFFFF
+
+#define BR11_SOURCE_PITCH    0x00003FFF
+
+#define BR12_SOURCE_ADDR     0x03FFFFFF
+
+#define BR13_SOLID_PATTERN   0x80000000
+#define BR13_RIGHT_TO_LEFT   0x40000000
+#define BR13_LEFT_TO_RIGHT   0x00000000
+#define BR13_MONO_TRANSPCY   0x20000000
+#define BR13_USE_DYN_DEPTH   0x04000000
+#define BR13_DYN_8BPP        0x00000000
+#define BR13_DYN_16BPP       0x01000000
+#define BR13_DYN_24BPP       0x02000000
+#define BR13_ROP_MASK        0x00FF0000
+#define BR13_DEST_PITCH      0x0000FFFF
+#define BR13_PITCH_SIGN_BIT  0x00008000
+
+#define BR14_DEST_HEIGHT     0xFFFF0000
+#define BR14_DEST_WIDTH      0x0000FFFF
+
+#define BR15_PATTERN_ADDR    0x03FFFFFF
+
+#define BR16_SOLID_PAT_COLOR 0x00FFFFFF
+#define BR16_BACKGND_PAT_CLR 0x00FFFFFF
+
+#define BR17_FGND_PAT_CLR    0x00FFFFFF
+
+#define BR18_SRC_BGND_CLR    0x00FFFFFF
+#define BR19_SRC_FGND_CLR    0x00FFFFFF
+
+
+/* Instruction parser instructions
+ */
+
+#define INST_PARSER_CLIENT   0x00000000
+#define INST_OP_FLUSH        0x02000000
+#define INST_FLUSH_MAP_CACHE 0x00000001
+
+
+#define GFX_OP_USER_INTERRUPT ((0<<29)|(2<<23))
+
+
+/* Registers in the i810 host-pci bridge pci config space which affect
+ * the i810 graphics operations.  
+ */
+#define SMRAM_MISCC         0x70
+#define GMS                    0x000000c0
+#define GMS_DISABLE            0x00000000
+#define GMS_ENABLE_BARE        0x00000040
+#define GMS_ENABLE_512K        0x00000080
+#define GMS_ENABLE_1M          0x000000c0
+#define USMM                   0x00000030 
+#define USMM_DISABLE           0x00000000
+#define USMM_TSEG_ZERO         0x00000010
+#define USMM_TSEG_512K         0x00000020
+#define USMM_TSEG_1M           0x00000030  
+#define GFX_MEM_WIN_SIZE       0x00010000
+#define GFX_MEM_WIN_32M        0x00010000
+#define GFX_MEM_WIN_64M        0x00000000
+
+/* Overkill?  I don't know.  Need to figure out top of mem to make the
+ * SMRAM calculations come out.  Linux seems to have problems
+ * detecting it all on its own, so this seems a reasonable double
+ * check to any user supplied 'mem=...' boot param.
+ *
+ * ... unfortunately this reg doesn't work according to spec on the
+ * test hardware.
+ */
+#define WHTCFG_PAMR_DRP      0x50
+#define SYS_DRAM_ROW_0_SHIFT    16
+#define SYS_DRAM_ROW_1_SHIFT    20
+#define DRAM_MASK           0x0f
+#define DRAM_VALUE_0        0
+#define DRAM_VALUE_1        8
+/* No 2 value defined */
+#define DRAM_VALUE_3        16
+#define DRAM_VALUE_4        16
+#define DRAM_VALUE_5        24
+#define DRAM_VALUE_6        32
+#define DRAM_VALUE_7        32
+#define DRAM_VALUE_8        48
+#define DRAM_VALUE_9        64
+#define DRAM_VALUE_A        64
+#define DRAM_VALUE_B        96
+#define DRAM_VALUE_C        128
+#define DRAM_VALUE_D        128
+#define DRAM_VALUE_E        192
+#define DRAM_VALUE_F        256	/* nice one, geezer */
+#define LM_FREQ_MASK        0x10
+#define LM_FREQ_133         0x10
+#define LM_FREQ_100         0x00
+
+
+
+
+/* These are 3d state registers, but the state is invarient, so we let
+ * the X server handle it:
+ */
+
+
+
+/* GFXRENDERSTATE_COLOR_CHROMA_KEY, p135
+ */
+#define GFX_OP_COLOR_CHROMA_KEY  ((0x3<<29)|(0x1d<<24)|(0x2<<16)|0x1)
+#define CC1_UPDATE_KILL_WRITE    (1<<28)
+#define CC1_ENABLE_KILL_WRITE    (1<<27)
+#define CC1_DISABLE_KILL_WRITE    0
+#define CC1_UPDATE_COLOR_IDX     (1<<26)
+#define CC1_UPDATE_CHROMA_LOW    (1<<25)
+#define CC1_UPDATE_CHROMA_HI     (1<<24)
+#define CC1_CHROMA_LOW_MASK      ((1<<24)-1)
+#define CC2_COLOR_IDX_SHIFT      24
+#define CC2_COLOR_IDX_MASK       (0xff<<24)
+#define CC2_CHROMA_HI_MASK       ((1<<24)-1)
+
+
+#define GFX_CMD_CONTEXT_SEL      ((0<<29)|(0x5<<23))
+#define CS_UPDATE_LOAD           (1<<17)
+#define CS_UPDATE_USE            (1<<16)
+#define CS_UPDATE_LOAD           (1<<17)
+#define CS_LOAD_CTX0             0
+#define CS_LOAD_CTX1             (1<<8)
+#define CS_USE_CTX0              0
+#define CS_USE_CTX1              (1<<0)
+
+/* I810 LCD/TV registers */
+#define LCD_TV_HTOTAL	0x60000
+#define LCD_TV_C	0x60018
+#define LCD_TV_OVRACT   0x6001C
+
+#define LCD_TV_ENABLE (1 << 31)
+#define LCD_TV_VGAMOD (1 << 28)
+
+/* I830 CRTC registers */
+#define HTOTAL_A	0x60000
+#define HBLANK_A	0x60004
+#define HSYNC_A 	0x60008
+#define VTOTAL_A	0x6000c
+#define VBLANK_A	0x60010
+#define VSYNC_A 	0x60014
+#define PIPEASRC	0x6001c
+#define BCLRPAT_A	0x60020
+
+#define HTOTAL_B	0x61000
+#define HBLANK_B	0x61004
+#define HSYNC_B 	0x61008
+#define VTOTAL_B	0x6100c
+#define VBLANK_B	0x61010
+#define VSYNC_B 	0x61014
+#define PIPEBSRC	0x6101c
+#define BCLRPAT_B	0x61020
+
+#define DPLL_A		0x06014
+#define DPLL_B		0x06018
+#define FPA0		0x06040
+#define FPA1		0x06044
+
+#define I830_HTOTAL_MASK 	0xfff0000
+#define I830_HACTIVE_MASK	0x7ff
+
+#define I830_HBLANKEND_MASK	0xfff0000
+#define I830_HBLANKSTART_MASK    0xfff
+
+#define I830_HSYNCEND_MASK	0xfff0000
+#define I830_HSYNCSTART_MASK    0xfff
+
+#define I830_VTOTAL_MASK 	0xfff0000
+#define I830_VACTIVE_MASK	0x7ff
+
+#define I830_VBLANKEND_MASK	0xfff0000
+#define I830_VBLANKSTART_MASK    0xfff
+
+#define I830_VSYNCEND_MASK	0xfff0000
+#define I830_VSYNCSTART_MASK    0xfff
+
+#define I830_PIPEA_HORZ_MASK	0x7ff0000
+#define I830_PIPEA_VERT_MASK	0x7ff
+
+#define ADPA			0x61100
+#define ADPA_DAC_ENABLE 	(1<<31)
+#define ADPA_DAC_DISABLE	0
+#define ADPA_PIPE_SELECT_MASK	(1<<30)
+#define ADPA_PIPE_A_SELECT	0
+#define ADPA_PIPE_B_SELECT	(1<<30)
+#define ADPA_USE_VGA_HVPOLARITY (1<<15)
+#define ADPA_SETS_HVPOLARITY	0
+#define ADPA_VSYNC_CNTL_DISABLE (1<<11)
+#define ADPA_VSYNC_CNTL_ENABLE	0
+#define ADPA_HSYNC_CNTL_DISABLE (1<<10)
+#define ADPA_HSYNC_CNTL_ENABLE	0
+#define ADPA_VSYNC_ACTIVE_HIGH	(1<<4)
+#define ADPA_VSYNC_ACTIVE_LOW	0
+#define ADPA_HSYNC_ACTIVE_HIGH	(1<<3)
+#define ADPA_HSYNC_ACTIVE_LOW	0
+
+
+#define DVOA			0x61120
+#define DVOB			0x61140
+#define DVOC			0x61160
+#define DVO_ENABLE		(1<<31)
+
+#define DVOA_SRCDIM		0x61124
+#define DVOB_SRCDIM		0x61144
+#define DVOC_SRCDIM		0x61164
+
+#define LVDS			0x61180
+
+#define PIPEACONF 0x70008
+#define PIPEACONF_ENABLE	(1<<31)
+#define PIPEACONF_DISABLE	0
+#define PIPEACONF_DOUBLE_WIDE	(1<<30)
+#define PIPEACONF_SINGLE_WIDE	0
+#define PIPEACONF_PIPE_UNLOCKED 0
+#define PIPEACONF_PIPE_LOCKED	(1<<25)
+#define PIPEACONF_PALETTE	0
+#define PIPEACONF_GAMMA 	(1<<24)
+
+#define PIPEBCONF 0x71008
+#define PIPEBCONF_ENABLE	(1<<31)
+#define PIPEBCONF_DISABLE	0
+#define PIPEBCONF_GAMMA 	(1<<24)
+#define PIPEBCONF_PALETTE	0
+
+#define DSPACNTR		0x70180
+#define DSPBCNTR		0x71180
+#define DISPLAY_PLANE_ENABLE 			(1<<31)
+#define DISPLAY_PLANE_DISABLE			0
+#define DISPPLANE_GAMMA_ENABLE			(1<<30)
+#define DISPPLANE_GAMMA_DISABLE			0
+#define DISPPLANE_PIXFORMAT_MASK		(0xf<<26)
+#define DISPPLANE_8BPP				(0x2<<26)
+#define DISPPLANE_15_16BPP			(0x4<<26)
+#define DISPPLANE_16BPP				(0x5<<26)
+#define DISPPLANE_32BPP_NO_ALPHA 		(0x6<<26)
+#define DISPPLANE_32BPP				(0x7<<26)
+#define DISPPLANE_STEREO_ENABLE			(1<<25)
+#define DISPPLANE_STEREO_DISABLE		0
+#define DISPPLANE_SEL_PIPE_MASK			(1<<24)
+#define DISPPLANE_SEL_PIPE_A			0
+#define DISPPLANE_SEL_PIPE_B			(1<<24)
+#define DISPPLANE_SRC_KEY_ENABLE		(1<<22)
+#define DISPPLANE_SRC_KEY_DISABLE		0
+#define DISPPLANE_LINE_DOUBLE			(1<<20)
+#define DISPPLANE_NO_LINE_DOUBLE		0
+#define DISPPLANE_STEREO_POLARITY_FIRST		0
+#define DISPPLANE_STEREO_POLARITY_SECOND	(1<<18)
+/* plane B only */
+#define DISPPLANE_ALPHA_TRANS_ENABLE		(1<<15)
+#define DISPPLANE_ALPHA_TRANS_DISABLE		0
+#define DISPPLANE_SPRITE_ABOVE_DISPLAYA		0
+#define DISPPLANE_SPRITE_ABOVE_OVERLAY		(1)
+
+#define DSPABASE		0x70184
+#define DSPASTRIDE		0x70188
+
+#define DSPBBASE		0x71184
+#define DSPBADDR		DSPBBASE
+#define DSPBSTRIDE		0x71188
+
+/* Various masks for reserved bits, etc. */
+#define I830_FWATER1_MASK        (~((1<<11)|(1<<10)|(1<<9)|      \
+        (1<<8)|(1<<26)|(1<<25)|(1<<24)|(1<<5)|(1<<4)|(1<<3)|    \
+        (1<<2)|(1<<1)|1|(1<<20)|(1<<19)|(1<<18)|(1<<17)|(1<<16)))
+#define I830_FWATER2_MASK ~(0)
+
+#define DV0A_RESERVED ((1<<26)|(1<<25)|(1<<24)|(1<<23)|(1<<22)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<16)|(1<<5)|(1<<1)|1)
+#define DV0B_RESERVED ((1<<27)|(1<<26)|(1<<25)|(1<<24)|(1<<23)|(1<<22)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<16)|(1<<5)|(1<<1)|1)
+#define VGA0_N_DIVISOR_MASK     ((1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<17)|(1<<16))
+#define VGA0_M1_DIVISOR_MASK    ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define VGA0_M2_DIVISOR_MASK    ((1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|1)
+#define VGA0_M1M2N_RESERVED	~(VGA0_N_DIVISOR_MASK|VGA0_M1_DIVISOR_MASK|VGA0_M2_DIVISOR_MASK)
+#define VGA0_POSTDIV_MASK       ((1<<7)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|1)
+#define VGA1_POSTDIV_MASK       ((1<<15)|(1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define VGA_POSTDIV_RESERVED	~(VGA0_POSTDIV_MASK|VGA1_POSTDIV_MASK|(1<<7)|(1<<15))
+#define DPLLA_POSTDIV_MASK ((1<<23)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<17)|(1<<16))
+#define DPLLA_RESERVED     ((1<<27)|(1<<26)|(1<<25)|(1<<24)|(1<<22)|(1<<15)|(1<<12)|(1<<11)|(1<<10)|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|1)
+#define ADPA_RESERVED	((1<<2)|(1<<1)|1|(1<<9)|(1<<8)|(1<<7)|(1<<6)|(1<<5)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24)|(1<<23)|(1<<22)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<17)|(1<<16))
+#define SUPER_WORD              32
+#define BURST_A_MASK    ((1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define BURST_B_MASK    ((1<<26)|(1<<25)|(1<<24))
+#define WATER_A_MASK    ((1<<5)|(1<<4)|(1<<3)|(1<<2)|(1<<1)|1)
+#define WATER_B_MASK    ((1<<20)|(1<<19)|(1<<18)|(1<<17)|(1<<16))
+#define WATER_RESERVED	((1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<23)|(1<<22)|(1<<21)|(1<<15)|(1<<14)|(1<<13)|(1<<12)|(1<<7)|(1<<6))
+#define PIPEACONF_RESERVED ((1<<29)|(1<<28)|(1<<27)|(1<<23)|(1<<22)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<17)|(1<<16)|0xffff)
+#define PIPEBCONF_RESERVED ((1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<23)|(1<<22)|(1<<21)|(1<<20)|(1<<19)|(1<<18)|(1<<17)|(1<<16)|0xffff)
+#define DSPACNTR_RESERVED ((1<<23)|(1<<19)|(1<<17)|(1<<16)|0xffff)
+#define DSPBCNTR_RESERVED ((1<<23)|(1<<19)|(1<<17)|(1<<16)|0x7ffe)
+
+#define I830_GMCH_CTRL		0x52
+
+#define I830_GMCH_ENABLED	0x4
+#define I830_GMCH_MEM_MASK	0x1
+#define I830_GMCH_MEM_64M	0x1
+#define I830_GMCH_MEM_128M	0
+
+#define I830_GMCH_GMS_MASK			0x70
+#define I830_GMCH_GMS_DISABLED		0x00
+#define I830_GMCH_GMS_LOCAL			0x10
+#define I830_GMCH_GMS_STOLEN_512	0x20
+#define I830_GMCH_GMS_STOLEN_1024	0x30
+#define I830_GMCH_GMS_STOLEN_8192	0x40
+
+#define I830_RDRAM_CHANNEL_TYPE		0x03010
+#define I830_RDRAM_ND(x)			(((x) & 0x20) >> 5)
+#define I830_RDRAM_DDT(x)			(((x) & 0x18) >> 3)
+
+#define I855_GMCH_GMS_MASK			(0x7 << 4)
+#define I855_GMCH_GMS_DISABLED			0x00
+#define I855_GMCH_GMS_STOLEN_1M			(0x1 << 4)
+#define I855_GMCH_GMS_STOLEN_4M			(0x2 << 4)
+#define I855_GMCH_GMS_STOLEN_8M			(0x3 << 4)
+#define I855_GMCH_GMS_STOLEN_16M		(0x4 << 4)
+#define I855_GMCH_GMS_STOLEN_32M		(0x5 << 4)
+
+#define I85X_CAPID			0x44
+#define I85X_VARIANT_MASK			0x7
+#define I85X_VARIANT_SHIFT			5
+#define I855_GME				0x0
+#define I855_GM					0x4
+#define I852_GME				0x2
+#define I852_GM					0x5
+
+/* BLT commands */
+#define COLOR_BLT_CMD		((2<<29)|(0x40<<22)|(0x3))
+#define COLOR_BLT_WRITE_ALPHA	(1<<21)
+#define COLOR_BLT_WRITE_RGB	(1<<20)
+
+#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|(0x4))
+#define XY_COLOR_BLT_WRITE_ALPHA	(1<<21)
+#define XY_COLOR_BLT_WRITE_RGB		(1<<20)
+
+#define XY_SETUP_CLIP_BLT_CMD		((2<<29)|(3<<22)|1)
+
+#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
+#define XY_SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
+#define XY_SRC_COPY_BLT_WRITE_RGB	(1<<20)
+
+#define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|0x4)
+#define SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
+#define SRC_COPY_BLT_WRITE_RGB		(1<<20)
+
+#define XY_MONO_PAT_BLT_CMD		((0x2<<29)|(0x52<<22)|0x7)
+#define XY_MONO_PAT_VERT_SEED		((1<<10)|(1<<9)|(1<<8))
+#define XY_MONO_PAT_HORT_SEED		((1<<14)|(1<<13)|(1<<12))
+#define XY_MONO_PAT_BLT_WRITE_ALPHA	(1<<21)
+#define XY_MONO_PAT_BLT_WRITE_RGB	(1<<20)
+
+#define XY_MONO_SRC_BLT_CMD		((0x2<<29)|(0x54<<22)|(0x6))
+#define XY_MONO_SRC_BLT_WRITE_ALPHA	(1<<21)
+#define XY_MONO_SRC_BLT_WRITE_RGB	(1<<20)
+
+/* 3d state */
+#define STATE3D_FOG_MODE		((3<<29)|(0x1d<<24)|(0x89<<16)|2)
+#define FOG_MODE_VERTEX 		(1<<31)
+#define STATE3D_MAP_COORD_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8c<<16))
+#define DISABLE_TEX_TRANSFORM		(1<<28)
+#define TEXTURE_SET(x)			(x<<29)
+#define STATE3D_RASTERIZATION_RULES	((3<<29)|(0x07<<24))
+#define POINT_RASTER_ENABLE		(1<<15)
+#define POINT_RASTER_OGL		(1<<13)
+#define STATE3D_VERTEX_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8b<<16))
+#define DISABLE_VIEWPORT_TRANSFORM	(1<<31)
+#define DISABLE_PERSPECTIVE_DIVIDE	(1<<29)
+
+#define MI_SET_CONTEXT			(0x18<<23)
+#define CTXT_NO_RESTORE 		(1)
+#define CTXT_PALETTE_SAVE_DISABLE	(1<<3)
+#define CTXT_PALETTE_RESTORE_DISABLE	(1<<2)
+
+/* Dword 0 */
+#define MI_VERTEX_BUFFER		(0x17<<23)
+#define MI_VERTEX_BUFFER_IDX(x) 	(x<<20)
+#define MI_VERTEX_BUFFER_PITCH(x)	(x<<13)
+#define MI_VERTEX_BUFFER_WIDTH(x)	(x<<6)
+/* Dword 1 */
+#define MI_VERTEX_BUFFER_DISABLE	(1)
+
+/* Overlay Flip */
+#define MI_OVERLAY_FLIP			(0x11<<23)
+#define MI_OVERLAY_FLIP_CONTINUE	(0<<21)
+#define MI_OVERLAY_FLIP_ON		(1<<21)
+#define MI_OVERLAY_FLIP_OFF		(2<<21)
+
+/* Wait for Events */
+#define MI_WAIT_FOR_EVENT		(0x03<<23)
+#define MI_WAIT_FOR_OVERLAY_FLIP	(1<<16)
+
+/* Flush */
+#define MI_FLUSH			(0x04<<23)
+#define MI_WRITE_DIRTY_STATE		(1<<4)
+#define MI_END_SCENE			(1<<3)
+#define MI_INHIBIT_RENDER_CACHE_FLUSH	(1<<2)
+#define MI_INVALIDATE_MAP_CACHE		(1<<0)
+
+/* Noop */
+#define MI_NOOP				0x00
+#define MI_NOOP_WRITE_ID		(1<<22)
+#define MI_NOOP_ID_MASK			(1<<22 - 1)
+
+#define STATE3D_COLOR_FACTOR	((0x3<<29)|(0x1d<<24)|(0x01<<16))
+
+/* STATE3D_FOG_MODE stuff */
+#define ENABLE_FOG_SOURCE	(1<<27)
+#define ENABLE_FOG_CONST	(1<<24)
+#define ENABLE_FOG_DENSITY	(1<<23)
+
+
+#define MAX_DISPLAY_PIPES	2
+
+typedef enum {
+   CrtIndex = 0,
+   TvIndex,
+   DfpIndex,
+   LfpIndex,
+   Tv2Index,
+   Dfp2Index,
+   UnknownIndex,
+   Unknown2Index,
+   NumDisplayTypes,
+   NumKnownDisplayTypes = UnknownIndex
+} DisplayType;
+
+/* What's connected to the pipes (as reported by the BIOS) */
+#define PIPE_ACTIVE_MASK		0xff
+#define PIPE_CRT_ACTIVE			(1 << CrtIndex)
+#define PIPE_TV_ACTIVE			(1 << TvIndex)
+#define PIPE_DFP_ACTIVE			(1 << DfpIndex)
+#define PIPE_LCD_ACTIVE			(1 << LfpIndex)
+#define PIPE_TV2_ACTIVE			(1 << Tv2Index)
+#define PIPE_DFP2_ACTIVE		(1 << Dfp2Index)
+#define PIPE_UNKNOWN_ACTIVE		((1 << UnknownIndex) |	\
+					 (1 << Unknown2Index))
+
+#define PIPE_SIZED_DISP_MASK		(PIPE_DFP_ACTIVE |	\
+					 PIPE_LCD_ACTIVE |	\
+					 PIPE_DFP2_ACTIVE)
+
+#define PIPE_A_SHIFT			0
+#define PIPE_B_SHIFT			8
+#define PIPE_SHIFT(n)			((n) == 0 ? \
+					 PIPE_A_SHIFT : PIPE_B_SHIFT)
+
+/*
+ * Some BIOS scratch area registers.  The 845 (and 830?) store the amount
+ * of video memory available to the BIOS in SWF1.
+ */
+
+#define SWF0			0x71410
+#define SWF1			0x71414
+#define SWF2			0x71418
+#define SWF3			0x7141c
+#define SWF4			0x71420
+#define SWF5			0x71424
+#define SWF6			0x71428
+
+/*
+ * 855 scratch registers.
+ */
+#define SWF00			0x70410
+#define SWF01			0x70414
+#define SWF02			0x70418
+#define SWF03			0x7041c
+#define SWF04			0x70420
+#define SWF05			0x70424
+#define SWF06			0x70428
+
+#define SWF10			SWF0
+#define SWF11			SWF1
+#define SWF12			SWF2
+#define SWF13			SWF3
+#define SWF14			SWF4
+#define SWF15			SWF5
+#define SWF16			SWF6
+
+#define SWF30			0x72414
+#define SWF31			0x72418
+#define SWF32			0x7241c
+
+/*
+ * Overlay registers.  These are overlay registers accessed via MMIO.
+ * Those loaded via the overlay register page are defined in i830_video.c.
+ */
+#define OVADD			0x30000
+
+#define DOVSTA			0x30008
+#define OC_BUF			(0x3<<20)
+
+#define OGAMC5			0x30010
+#define OGAMC4			0x30014
+#define OGAMC3			0x30018
+#define OGAMC2			0x3001c
+#define OGAMC1			0x30020
+#define OGAMC0			0x30024
+
+
+/*
+ * Palette registers
+ */
+#define PALETTE_A		0x0a000
+#define PALETTE_B		0x0a800
+
+#endif /* _I810_REG_H */
diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile
new file mode 100644
index 0000000000..71ee753748
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -0,0 +1,71 @@
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i915_dri.so
+
+DRIVER_SOURCES = \
+	i830_context.c \
+	i830_state.c \
+	i830_texblend.c \
+	i830_texstate.c \
+	i830_vtbl.c \
+	intel_render.c \
+	intel_regions.c \
+	intel_buffer_objects.c \
+	intel_batchbuffer.c \
+	intel_clear.c \
+	intel_extensions.c \
+	intel_extensions_es2.c \
+	intel_mipmap_tree.c \
+	intel_tex_layout.c \
+	intel_tex_image.c \
+	intel_tex_subimage.c \
+	intel_tex_copy.c \
+	intel_tex_validate.c \
+	intel_tex_format.c \
+	intel_tex.c \
+	intel_pixel.c \
+	intel_pixel_bitmap.c \
+	intel_pixel_copy.c \
+	intel_pixel_draw.c \
+	intel_pixel_read.c \
+	intel_buffers.c \
+	intel_blit.c \
+	i915_tex_layout.c \
+	i915_texstate.c \
+	i915_context.c \
+	i915_debug.c \
+	i915_debug_fp.c \
+	i915_fragprog.c \
+	i915_program.c \
+	i915_state.c \
+	i915_vtbl.c \
+	intel_context.c \
+	intel_decode.c \
+	intel_screen.c \
+	intel_span.c \
+	intel_state.c \
+	intel_syncobj.c \
+	intel_tris.c \
+	intel_fbo.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \
+	$(shell pkg-config libdrm --atleast-version=2.3.1 \
+				&& echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
+
+INCLUDES += $(INTEL_CFLAGS)
+DRI_LIB_DEPS += $(INTEL_LIBS)
+
+include ../Makefile.template
+
+intel_decode.o: ../intel/intel_decode.c
+
+intel_tex_layout.o: ../intel/intel_tex_layout.c
+
diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c
new file mode 100644
index 0000000000..d52ea9812f
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i830_context.c
@@ -0,0 +1,113 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i830_context.h"
+#include "main/imports.h"
+#include "texmem.h"
+#include "tnl/tnl.h"
+#include "tnl/t_vertex.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "intel_span.h"
+#include "intel_tris.h"
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+static void
+i830InitDriverFunctions(struct dd_function_table *functions)
+{
+   intelInitDriverFunctions(functions);
+   i830InitStateFuncs(functions);
+}
+
+extern const struct tnl_pipeline_stage *intel_pipeline[];
+
+GLboolean
+i830CreateContext(const __GLcontextModes * mesaVis,
+                  __DRIcontext * driContextPriv,
+                  void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   struct i830_context *i830 = CALLOC_STRUCT(i830_context);
+   struct intel_context *intel = &i830->intel;
+   GLcontext *ctx = &intel->ctx;
+   if (!i830)
+      return GL_FALSE;
+
+   i830InitVtbl(i830);
+   i830InitDriverFunctions(&functions);
+
+   if (!intelInitContext(intel, __DRI_API_OPENGL, mesaVis, driContextPriv,
+                         sharedContextPrivate, &functions)) {
+      FREE(i830);
+      return GL_FALSE;
+   }
+
+   _math_matrix_ctr(&intel->ViewportMatrix);
+
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs(ctx);
+   intelInitTriFuncs(ctx);
+
+   /* Install the customized pipeline: */
+   _tnl_destroy_pipeline(ctx);
+   _tnl_install_pipeline(ctx, intel_pipeline);
+
+   if (intel->no_rast)
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+
+   intel->ctx.Const.MaxTextureUnits = I830_TEX_UNITS;
+   intel->ctx.Const.MaxTextureImageUnits = I830_TEX_UNITS;
+   intel->ctx.Const.MaxTextureCoordUnits = I830_TEX_UNITS;
+
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
+    */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 11;
+   ctx->Const.MaxTextureRectSize = (1 << 11);
+   ctx->Const.MaxTextureUnits = I830_TEX_UNITS;
+
+   ctx->Const.MaxTextureMaxAnisotropy = 2.0;
+
+   ctx->Const.MaxDrawBuffers = 1;
+
+   _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12,
+                      18 * sizeof(GLfloat));
+
+   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
+
+   i830InitState(i830);
+
+   _tnl_allow_vertex_fog(ctx, 1);
+   _tnl_allow_pixel_fog(ctx, 0);
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h
new file mode 100644
index 0000000000..2100ffe6d9
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i830_context.h
@@ -0,0 +1,220 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef I830CONTEXT_INC
+#define I830CONTEXT_INC
+
+#include "intel_context.h"
+
+#define I830_FALLBACK_TEXTURE		 0x1000
+#define I830_FALLBACK_COLORMASK		 0x2000
+#define I830_FALLBACK_STENCIL		 0x4000
+#define I830_FALLBACK_STIPPLE		 0x8000
+#define I830_FALLBACK_LOGICOP		 0x20000
+#define I830_FALLBACK_DRAW_OFFSET	 0x200000
+
+#define I830_UPLOAD_CTX              0x1
+#define I830_UPLOAD_BUFFERS          0x2
+#define I830_UPLOAD_STIPPLE          0x4
+#define I830_UPLOAD_INVARIENT        0x8
+#define I830_UPLOAD_RASTER_RULES     0x10
+#define I830_UPLOAD_TEX(i)           (0x10<<(i))
+#define I830_UPLOAD_TEXBLEND(i)      (0x100<<(i))
+#define I830_UPLOAD_TEX_ALL          (0x0f0)
+#define I830_UPLOAD_TEXBLEND_ALL     (0xf00)
+
+/* State structure offsets - these will probably disappear.
+ */
+#define I830_DESTREG_CBUFADDR0 0
+#define I830_DESTREG_CBUFADDR1 1
+#define I830_DESTREG_DBUFADDR0 2
+#define I830_DESTREG_DBUFADDR1 3
+#define I830_DESTREG_DV0 4
+#define I830_DESTREG_DV1 5
+#define I830_DESTREG_SENABLE 6
+#define I830_DESTREG_SR0 7
+#define I830_DESTREG_SR1 8
+#define I830_DESTREG_SR2 9
+#define I830_DESTREG_DRAWRECT0 10
+#define I830_DESTREG_DRAWRECT1 11
+#define I830_DESTREG_DRAWRECT2 12
+#define I830_DESTREG_DRAWRECT3 13
+#define I830_DESTREG_DRAWRECT4 14
+#define I830_DESTREG_DRAWRECT5 15
+#define I830_DEST_SETUP_SIZE 16
+
+#define I830_CTXREG_STATE1		0
+#define I830_CTXREG_STATE2		1
+#define I830_CTXREG_STATE3		2
+#define I830_CTXREG_STATE4		3
+#define I830_CTXREG_STATE5		4
+#define I830_CTXREG_IALPHAB		5
+#define I830_CTXREG_STENCILTST		6
+#define I830_CTXREG_ENABLES_1		7
+#define I830_CTXREG_ENABLES_2		8
+#define I830_CTXREG_AA			9
+#define I830_CTXREG_FOGCOLOR		10
+#define I830_CTXREG_BLENDCOLOR0		11
+#define I830_CTXREG_BLENDCOLOR1		12
+#define I830_CTXREG_VF			13
+#define I830_CTXREG_VF2			14
+#define I830_CTXREG_MCSB0		15
+#define I830_CTXREG_MCSB1		16
+#define I830_CTX_SETUP_SIZE		17
+
+#define I830_STPREG_ST0        0
+#define I830_STPREG_ST1        1
+#define I830_STP_SETUP_SIZE    2
+
+#define I830_TEXREG_TM0LI      0        /* load immediate 2 texture map n */
+#define I830_TEXREG_TM0S1      1
+#define I830_TEXREG_TM0S2      2
+#define I830_TEXREG_TM0S3      3
+#define I830_TEXREG_TM0S4      4
+#define I830_TEXREG_MCS	       5        /* _3DSTATE_MAP_COORD_SETS */
+#define I830_TEXREG_CUBE       6        /* _3DSTATE_MAP_SUBE */
+#define I830_TEX_SETUP_SIZE    7
+
+#define I830_TEXBLEND_SIZE	12      /* (4 args + op) * 2 + COLOR_FACTOR */
+
+enum {
+   I830_RASTER_RULES,
+   I830_RASTER_RULES_SIZE
+};
+
+struct i830_texture_object
+{
+   struct intel_texture_object intel;
+   GLuint Setup[I830_TEX_SETUP_SIZE];
+};
+
+#define I830_TEX_UNITS 4
+
+struct i830_hw_state
+{
+   GLuint Ctx[I830_CTX_SETUP_SIZE];
+   GLuint Buffer[I830_DEST_SETUP_SIZE];
+   GLuint Stipple[I830_STP_SETUP_SIZE];
+   GLuint RasterRules[I830_RASTER_RULES_SIZE];
+   GLuint Tex[I830_TEX_UNITS][I830_TEX_SETUP_SIZE];
+   GLuint TexBlend[I830_TEX_UNITS][I830_TEXBLEND_SIZE];
+   GLuint TexBlendWordsUsed[I830_TEX_UNITS];
+
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;
+
+   /* Regions aren't actually that appropriate here as the memory may
+    * be from a PBO or FBO.  Will have to do this for draw and depth for
+    * FBO's...
+    */
+   drm_intel_bo *tex_buffer[I830_TEX_UNITS];
+   GLuint tex_offset[I830_TEX_UNITS];
+
+   GLuint emitted;              /* I810_UPLOAD_* */
+   GLuint active;
+};
+
+struct i830_context
+{
+   struct intel_context intel;
+
+   GLuint lodbias_tm0s3[MAX_TEXTURE_UNITS];
+     DECLARE_RENDERINPUTS(last_index_bitset);
+
+   struct i830_hw_state state;
+};
+
+
+
+
+#define I830_STATECHANGE(i830, flag)				\
+do {								\
+   INTEL_FIREVERTICES( &i830->intel );				\
+   i830->state.emitted &= ~flag;					\
+} while (0)
+
+#define I830_ACTIVESTATE(i830, flag, mode)	\
+do {						\
+   INTEL_FIREVERTICES( &i830->intel );		\
+   if (mode)					\
+      i830->state.active |= flag;		\
+   else						\
+      i830->state.active &= ~flag;		\
+} while (0)
+
+/* i830_vtbl.c
+ */
+extern void i830InitVtbl(struct i830_context *i830);
+
+extern void
+i830_state_draw_region(struct intel_context *intel,
+                       struct i830_hw_state *state,
+                       struct intel_region *color_region,
+                       struct intel_region *depth_region);
+/* i830_context.c
+ */
+extern GLboolean
+i830CreateContext(const __GLcontextModes * mesaVis,
+                  __DRIcontext * driContextPriv,
+                  void *sharedContextPrivate);
+
+/* i830_tex.c, i830_texstate.c
+ */
+extern void i830UpdateTextureState(struct intel_context *intel);
+
+extern void i830InitTextureFuncs(struct dd_function_table *functions);
+
+/* i830_texblend.c
+ */
+extern GLuint i830SetTexEnvCombine(struct i830_context *i830,
+                                   const struct gl_tex_env_combine_state
+                                   *combine, GLint blendUnit, GLuint texel_op,
+                                   GLuint * state, const GLfloat * factor);
+
+extern void i830EmitTextureBlend(struct i830_context *i830);
+
+
+/* i830_state.c
+ */
+extern void i830InitStateFuncs(struct dd_function_table *functions);
+
+extern void i830EmitState(struct i830_context *i830);
+
+extern void i830InitState(struct i830_context *i830);
+extern void i830_update_provoking_vertex(GLcontext *ctx);
+
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i830_context *
+i830_context(GLcontext * ctx)
+{
+   return (struct i830_context *) ctx;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h
new file mode 100644
index 0000000000..ae1317029a
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i830_reg.h
@@ -0,0 +1,624 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef _I830_REG_H_
+#define _I830_REG_H_
+
+
+#include "intel_reg.h"
+
+#define I830_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#define _3DSTATE_AA_CMD			(CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE	(1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 	0
+#define AA_LINE_ECAAR_WIDTH_1_0		(1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 	(2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 	(3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE	(1<<8)
+#define AA_LINE_REGION_WIDTH_0_5	0
+#define AA_LINE_REGION_WIDTH_1_0	(1<<6)
+#define AA_LINE_REGION_WIDTH_2_0	(2<<6)
+#define AA_LINE_REGION_WIDTH_4_0	(3<<6)
+#define AA_LINE_ENABLE			((1<<1) | 1)
+#define AA_LINE_DISABLE			(1<<1)
+
+#define _3DSTATE_COLOR_FACTOR_CMD	(CMD_3D | (0x1d<<24) | (0x1<<16))
+
+#define _3DSTATE_COLOR_FACTOR_N_CMD(stage)	(CMD_3D | (0x1d<<24) | \
+					         ((0x90+(stage))<<16))
+
+#define _3DSTATE_CONST_BLEND_COLOR_CMD	(CMD_3D | (0x1d<<24) | (0x88<<16))
+
+#define _3DSTATE_DFLT_DIFFUSE_CMD	(CMD_3D | (0x1d<<24) | (0x99<<16))
+
+#define _3DSTATE_DFLT_SPEC_CMD		(CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+#define _3DSTATE_DFLT_Z_CMD		(CMD_3D | (0x1d<<24) | (0x98<<16))
+
+
+#define _3DSTATE_DST_BUF_VARS_CMD	(CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define DSTORG_HORT_BIAS(x)		((x)<<20)
+#define DSTORG_VERT_BIAS(x)		((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL	0
+#define COLOR_4_2_2_CHNL_WRT_Y		(1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR		(2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB		(3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB	(4<<12)
+#define COLR_BUF_8BIT			0
+#define COLR_BUF_RGB555 		(1<<8)
+#define COLR_BUF_RGB565 		(2<<8)
+#define COLR_BUF_ARGB8888		(3<<8)
+#define DEPTH_IS_Z			0
+#define DEPTH_IS_W			(1<<6)
+#define DEPTH_FRMT_16_FIXED		0
+#define DEPTH_FRMT_16_FLOAT		(1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER	(2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER	(3<<2)
+#define VERT_LINE_STRIDE_1		(1<<1)
+#define VERT_LINE_STRIDE_0		0
+#define VERT_LINE_STRIDE_OFS_1		1
+#define VERT_LINE_STRIDE_OFS_0		0
+
+
+#define _3DSTATE_DRAW_RECT_CMD		(CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS 	(1<<30)
+#define DRAW_DITHER_OFS_X(x)		((x)<<26)
+#define DRAW_DITHER_OFS_Y(x)		((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x)			((x)<<16)
+#define DRAW_XMIN(x)			(x)
+/* Dword 3 */
+#define DRAW_YMAX(x)			((x)<<16)
+#define DRAW_XMAX(x)			(x)
+/* Dword 4 */
+#define DRAW_YORG(x)			((x)<<16)
+#define DRAW_XORG(x)			(x)
+
+
+#define _3DSTATE_ENABLES_1_CMD		(CMD_3D|(0x3<<24))
+#define ENABLE_LOGIC_OP_MASK		((1<<23)|(1<<22))
+#define ENABLE_LOGIC_OP 		((1<<23)|(1<<22))
+#define DISABLE_LOGIC_OP		(1<<23)
+#define ENABLE_STENCIL_TEST		((1<<21)|(1<<20))
+#define DISABLE_STENCIL_TEST		(1<<21)
+#define ENABLE_DEPTH_BIAS		((1<<11)|(1<<10))
+#define DISABLE_DEPTH_BIAS		(1<<11)
+#define ENABLE_SPEC_ADD_MASK		((1<<9)|(1<<8))
+#define ENABLE_SPEC_ADD 		((1<<9)|(1<<8))
+#define DISABLE_SPEC_ADD		(1<<9)
+#define ENABLE_DIS_FOG_MASK		((1<<7)|(1<<6))
+#define ENABLE_FOG			((1<<7)|(1<<6))
+#define DISABLE_FOG			(1<<7)
+#define ENABLE_DIS_ALPHA_TEST_MASK	((1<<5)|(1<<4))
+#define ENABLE_ALPHA_TEST		((1<<5)|(1<<4))
+#define DISABLE_ALPHA_TEST		(1<<5)
+#define ENABLE_DIS_CBLEND_MASK		((1<<3)|(1<<2))
+#define ENABLE_COLOR_BLEND		((1<<3)|(1<<2))
+#define DISABLE_COLOR_BLEND		(1<<3)
+#define ENABLE_DIS_DEPTH_TEST_MASK	((1<<1)|1)
+#define ENABLE_DEPTH_TEST		((1<<1)|1)
+#define DISABLE_DEPTH_TEST		(1<<1)
+
+/* _3DSTATE_ENABLES_2, p138 */
+#define _3DSTATE_ENABLES_2_CMD		(CMD_3D|(0x4<<24))
+#define ENABLE_STENCIL_WRITE		((1<<21)|(1<<20))
+#define DISABLE_STENCIL_WRITE		(1<<21)
+#define ENABLE_TEX_CACHE		((1<<17)|(1<<16))
+#define DISABLE_TEX_CACHE		(1<<17)
+#define ENABLE_DITHER			((1<<9)|(1<<8))
+#define DISABLE_DITHER			(1<<9)
+#define ENABLE_COLOR_MASK		(1<<10)
+#define WRITEMASK_ALPHA			(1<<7)
+#define WRITEMASK_ALPHA_SHIFT		7
+#define WRITEMASK_RED			(1<<6)
+#define WRITEMASK_RED_SHIFT		6
+#define WRITEMASK_GREEN 		(1<<5)
+#define WRITEMASK_GREEN_SHIFT		5
+#define WRITEMASK_BLUE			(1<<4)
+#define WRITEMASK_BLUE_SHIFT		4
+#define WRITEMASK_MASK			((1<<4)|(1<<5)|(1<<6)|(1<<7))
+#define ENABLE_COLOR_WRITE		((1<<3)|(1<<2))
+#define DISABLE_COLOR_WRITE		(1<<3)
+#define ENABLE_DIS_DEPTH_WRITE_MASK	0x3
+#define ENABLE_DEPTH_WRITE		((1<<1)|1)
+#define DISABLE_DEPTH_WRITE		(1<<1)
+
+/* _3DSTATE_FOG_COLOR, p139 */
+#define _3DSTATE_FOG_COLOR_CMD		(CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x)		((x)<<16)
+#define FOG_COLOR_GREEN(x)		((x)<<8)
+#define FOG_COLOR_BLUE(x)		(x)
+
+/* _3DSTATE_FOG_MODE, p140 */
+#define _3DSTATE_FOG_MODE_CMD		(CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FOGFUNC_ENABLE			(1<<31)
+#define FOGFUNC_VERTEX			0
+#define FOGFUNC_PIXEL_EXP		(1<<28)
+#define FOGFUNC_PIXEL_EXP2		(2<<28)
+#define FOGFUNC_PIXEL_LINEAR		(3<<28)
+#define FOGSRC_INDEX_Z			(1<<27)
+#define FOGSRC_INDEX_W			((1<<27)|(1<<25))
+#define FOG_LINEAR_CONST		(1<<24)
+#define FOG_CONST_1(x)			((x)<<4)
+#define ENABLE_FOG_DENSITY		(1<<23)
+/* Dword 2 */
+#define FOG_CONST_2(x)			(x)
+/* Dword 3 */
+#define FOG_DENSITY(x)			(x)
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */
+#define _3DSTATE_INDPT_ALPHA_BLEND_CMD	(CMD_3D|(0x0b<<24))
+#define ENABLE_INDPT_ALPHA_BLEND	((1<<23)|(1<<22))
+#define DISABLE_INDPT_ALPHA_BLEND	(1<<23)
+#define ALPHA_BLENDFUNC_MASK		0x3f0000
+#define ENABLE_ALPHA_BLENDFUNC		(1<<21)
+#define ABLENDFUNC_ADD			0
+#define ABLENDFUNC_SUB			(1<<16)
+#define ABLENDFUNC_RVSE_SUB		(2<<16)
+#define ABLENDFUNC_MIN			(3<<16)
+#define ABLENDFUNC_MAX			(4<<16)
+#define SRC_DST_ABLEND_MASK		0xfff
+#define ENABLE_SRC_ABLEND_FACTOR	(1<<11)
+#define SRC_ABLEND_FACT(x)		((x)<<6)
+#define ENABLE_DST_ABLEND_FACTOR	(1<<5)
+#define DST_ABLEND_FACT(x)		(x)
+
+
+/* _3DSTATE_MAP_BLEND_ARG, p152 */
+#define _3DSTATE_MAP_BLEND_ARG_CMD(stage)	(CMD_3D|(0x0e<<24)|((stage)<<20))
+
+#define TEXPIPE_COLOR			0
+#define TEXPIPE_ALPHA			(1<<18)
+#define TEXPIPE_KILL			(2<<18)
+#define TEXBLEND_ARG0			0
+#define TEXBLEND_ARG1			(1<<15)
+#define TEXBLEND_ARG2			(2<<15)
+#define TEXBLEND_ARG3			(3<<15)
+#define TEXBLENDARG_MODIFY_PARMS	(1<<6)
+#define TEXBLENDARG_REPLICATE_ALPHA 	(1<<5)
+#define TEXBLENDARG_INV_ARG 		(1<<4)
+#define TEXBLENDARG_ONE 		0
+#define TEXBLENDARG_FACTOR		0x01
+#define TEXBLENDARG_ACCUM		0x02
+#define TEXBLENDARG_DIFFUSE		0x03
+#define TEXBLENDARG_SPEC		0x04
+#define TEXBLENDARG_CURRENT		0x05
+#define TEXBLENDARG_TEXEL0		0x06
+#define TEXBLENDARG_TEXEL1		0x07
+#define TEXBLENDARG_TEXEL2		0x08
+#define TEXBLENDARG_TEXEL3		0x09
+#define TEXBLENDARG_FACTOR_N		0x0e
+
+/* _3DSTATE_MAP_BLEND_OP, p155 */
+#define _3DSTATE_MAP_BLEND_OP_CMD(stage)	(CMD_3D|(0x0d<<24)|((stage)<<20))
+#if 0
+#   define TEXPIPE_COLOR		0
+#   define TEXPIPE_ALPHA		(1<<18)
+#   define TEXPIPE_KILL			(2<<18)
+#endif
+#define ENABLE_TEXOUTPUT_WRT_SEL	(1<<17)
+#define TEXOP_OUTPUT_CURRENT		0
+#define TEXOP_OUTPUT_ACCUM		(1<<15)
+#define ENABLE_TEX_CNTRL_STAGE		((1<<12)|(1<<11))
+#define DISABLE_TEX_CNTRL_STAGE		(1<<12)
+#define TEXOP_SCALE_SHIFT		9
+#define TEXOP_SCALE_1X			(0 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_2X			(1 << TEXOP_SCALE_SHIFT)
+#define TEXOP_SCALE_4X			(2 << TEXOP_SCALE_SHIFT)
+#define TEXOP_MODIFY_PARMS		(1<<8)
+#define TEXOP_LAST_STAGE		(1<<7)
+#define TEXBLENDOP_KILLPIXEL		0x02
+#define TEXBLENDOP_ARG1 		0x01
+#define TEXBLENDOP_ARG2 		0x02
+#define TEXBLENDOP_MODULATE		0x03
+#define TEXBLENDOP_ADD			0x06
+#define TEXBLENDOP_ADDSIGNED		0x07
+#define TEXBLENDOP_BLEND		0x08
+#define TEXBLENDOP_BLEND_AND_ADD	0x09
+#define TEXBLENDOP_SUBTRACT		0x0a
+#define TEXBLENDOP_DOT3 		0x0b
+#define TEXBLENDOP_DOT4 		0x0c
+#define TEXBLENDOP_MODULATE_AND_ADD	0x0d
+#define TEXBLENDOP_MODULATE_2X_AND_ADD	0x0e
+#define TEXBLENDOP_MODULATE_4X_AND_ADD	0x0f
+
+/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */
+/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */
+
+#define _3DSTATE_MAP_COORD_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8c<<16))
+#define DISABLE_TEX_TRANSFORM		(1<<28)
+#define TEXTURE_SET(x)			(x<<29)
+
+#define _3DSTATE_VERTEX_TRANSFORM	((3<<29)|(0x1d<<24)|(0x8b<<16))
+#define DISABLE_VIEWPORT_TRANSFORM	(1<<31)
+#define DISABLE_PERSPECTIVE_DIVIDE	(1<<29)
+
+
+/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */
+#define _3DSTATE_MAP_COORD_SETBIND_CMD	(CMD_3D|(0x1d<<24)|(0x02<<16))
+#define TEXBIND_MASK3			((1<<15)|(1<<14)|(1<<13)|(1<<12))
+#define TEXBIND_MASK2			((1<<11)|(1<<10)|(1<<9)|(1<<8))
+#define TEXBIND_MASK1			((1<<7)|(1<<6)|(1<<5)|(1<<4))
+#define TEXBIND_MASK0			((1<<3)|(1<<2)|(1<<1)|1)
+
+#define TEXBIND_SET3(x) 		((x)<<12)
+#define TEXBIND_SET2(x) 		((x)<<8)
+#define TEXBIND_SET1(x) 		((x)<<4)
+#define TEXBIND_SET0(x) 		(x)
+
+#define TEXCOORDSRC_KEEP		0
+#define TEXCOORDSRC_DEFAULT		0x01
+#define TEXCOORDSRC_VTXSET_0		0x08
+#define TEXCOORDSRC_VTXSET_1		0x09
+#define TEXCOORDSRC_VTXSET_2		0x0a
+#define TEXCOORDSRC_VTXSET_3		0x0b
+#define TEXCOORDSRC_VTXSET_4		0x0c
+#define TEXCOORDSRC_VTXSET_5		0x0d
+#define TEXCOORDSRC_VTXSET_6		0x0e
+#define TEXCOORDSRC_VTXSET_7		0x0f
+
+#define MAP_UNIT(unit)			((unit)<<16)
+#define MAP_UNIT_MASK			(0x7<<16)
+
+/* _3DSTATE_MAP_COORD_SETS, p164 */
+#define _3DSTATE_MAP_COORD_SET_CMD	(CMD_3D|(0x1c<<24)|(0x01<<19))
+#define ENABLE_TEXCOORD_PARAMS		(1<<15)
+#define TEXCOORDS_ARE_NORMAL		(1<<14)
+#define TEXCOORDS_ARE_IN_TEXELUNITS	0
+#define TEXCOORDTYPE_CARTESIAN		0
+#define TEXCOORDTYPE_HOMOGENEOUS	(1<<11)
+#define TEXCOORDTYPE_VECTOR		(2<<11)
+#define TEXCOORDTYPE_MASK	        (0x7<<11)
+#define ENABLE_ADDR_V_CNTL		(1<<7)
+#define ENABLE_ADDR_U_CNTL		(1<<3)
+#define TEXCOORD_ADDR_V_MODE(x) 	((x)<<4)
+#define TEXCOORD_ADDR_U_MODE(x) 	(x)
+#define TEXCOORDMODE_WRAP		0
+#define TEXCOORDMODE_MIRROR		1
+#define TEXCOORDMODE_CLAMP		2
+#define TEXCOORDMODE_WRAP_SHORTEST	3
+#define TEXCOORDMODE_CLAMP_BORDER	4
+#define TEXCOORD_ADDR_V_MASK		0x70
+#define TEXCOORD_ADDR_U_MASK		0x7
+
+/* _3DSTATE_MAP_CUBE, p168 TODO */
+#define _3DSTATE_MAP_CUBE		(CMD_3D|(0x1c<<24)|(0x0a<<19))
+#define CUBE_NEGX_ENABLE                (1<<5)
+#define CUBE_POSX_ENABLE                (1<<4)
+#define CUBE_NEGY_ENABLE                (1<<3)
+#define CUBE_POSY_ENABLE                (1<<2)
+#define CUBE_NEGZ_ENABLE                (1<<1)
+#define CUBE_POSZ_ENABLE                (1<<0)
+
+
+/* _3DSTATE_MODES_1, p190 */
+#define _3DSTATE_MODES_1_CMD		(CMD_3D|(0x08<<24))
+#define BLENDFUNC_MASK			0x3f0000
+#define ENABLE_COLR_BLND_FUNC		(1<<21)
+#define BLENDFUNC_ADD			0
+#define BLENDFUNC_SUB			(1<<16)
+#define BLENDFUNC_RVRSE_SUB		(2<<16)
+#define BLENDFUNC_MIN			(3<<16)
+#define BLENDFUNC_MAX			(4<<16)
+#define SRC_DST_BLND_MASK		0xfff
+#define ENABLE_SRC_BLND_FACTOR		(1<<11)
+#define ENABLE_DST_BLND_FACTOR		(1<<5)
+#define SRC_BLND_FACT(x)		((x)<<6)
+#define DST_BLND_FACT(x)		(x)
+
+
+/* _3DSTATE_MODES_2, p192 */
+#define _3DSTATE_MODES_2_CMD		(CMD_3D|(0x0f<<24))
+#define ENABLE_GLOBAL_DEPTH_BIAS	(1<<22)
+#define GLOBAL_DEPTH_BIAS(x)		((x)<<14)
+#define ENABLE_ALPHA_TEST_FUNC		(1<<13)
+#define ENABLE_ALPHA_REF_VALUE		(1<<8)
+#define ALPHA_TEST_FUNC(x)		((x)<<9)
+#define ALPHA_REF_VALUE(x)		(x)
+
+#define ALPHA_TEST_REF_MASK		0x3fff
+
+/* _3DSTATE_MODES_3, p193 */
+#define _3DSTATE_MODES_3_CMD		(CMD_3D|(0x02<<24))
+#define DEPTH_TEST_FUNC_MASK		0x1f0000
+#define ENABLE_DEPTH_TEST_FUNC		(1<<20)
+/* Uses COMPAREFUNC */
+#define DEPTH_TEST_FUNC(x)		((x)<<16)
+#define ENABLE_ALPHA_SHADE_MODE 	(1<<11)
+#define ENABLE_FOG_SHADE_MODE		(1<<9)
+#define ENABLE_SPEC_SHADE_MODE		(1<<7)
+#define ENABLE_COLOR_SHADE_MODE 	(1<<5)
+#define ALPHA_SHADE_MODE(x)		((x)<<10)
+#define FOG_SHADE_MODE(x)		((x)<<8)
+#define SPEC_SHADE_MODE(x)		((x)<<6)
+#define COLOR_SHADE_MODE(x)		((x)<<4)
+#define CULLMODE_MASK			0xf
+#define ENABLE_CULL_MODE		(1<<3)
+#define CULLMODE_BOTH			0
+#define CULLMODE_NONE			1
+#define CULLMODE_CW			2
+#define CULLMODE_CCW			3
+
+#define SHADE_MODE_LINEAR		0
+#define SHADE_MODE_FLAT 		0x1
+
+/* _3DSTATE_MODES_4, p195 */
+#define _3DSTATE_MODES_4_CMD		(CMD_3D|(0x16<<24))
+#define ENABLE_LOGIC_OP_FUNC		(1<<23)
+#define LOGIC_OP_FUNC(x)		((x)<<18)
+#define LOGICOP_MASK			((1<<18)|(1<<19)|(1<<20)|(1<<21))
+#define LOGICOP_CLEAR			0
+#define LOGICOP_NOR			0x1
+#define LOGICOP_AND_INV 		0x2
+#define LOGICOP_COPY_INV		0x3
+#define LOGICOP_AND_RVRSE		0x4
+#define LOGICOP_INV			0x5
+#define LOGICOP_XOR			0x6
+#define LOGICOP_NAND			0x7
+#define LOGICOP_AND			0x8
+#define LOGICOP_EQUIV			0x9
+#define LOGICOP_NOOP			0xa
+#define LOGICOP_OR_INV			0xb
+#define LOGICOP_COPY			0xc
+#define LOGICOP_OR_RVRSE		0xd
+#define LOGICOP_OR			0xe
+#define LOGICOP_SET			0xf
+#define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK	(1<<17)
+#define STENCIL_TEST_MASK(x)		(((x)&0xff)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK	(1<<16)
+#define STENCIL_WRITE_MASK(x)		((x)&0xff)
+
+/* _3DSTATE_MODES_5, p196 */
+#define _3DSTATE_MODES_5_CMD		(CMD_3D|(0x0c<<24))
+#define ENABLE_SPRITE_POINT_TEX 	(1<<23)
+#define SPRITE_POINT_TEX_ON		(1<<22)
+#define SPRITE_POINT_TEX_OFF		0
+#define FLUSH_RENDER_CACHE		(1<<18)
+#define FLUSH_TEXTURE_CACHE		(1<<16)
+#define FIXED_LINE_WIDTH_MASK		0xfc00
+#define ENABLE_FIXED_LINE_WIDTH 	(1<<15)
+#define FIXED_LINE_WIDTH(x)		((x)<<10)
+#define FIXED_POINT_WIDTH_MASK		0x3ff
+#define ENABLE_FIXED_POINT_WIDTH	(1<<9)
+#define FIXED_POINT_WIDTH(x)		(x)
+
+/* _3DSTATE_RASTERIZATION_RULES, p198 */
+#define _3DSTATE_RASTER_RULES_CMD	(CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE	(1<<15)
+#define OGL_POINT_RASTER_RULE		(1<<13)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX	(1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX	(1<<5)
+#define ENABLE_TRI_STRIP_PROVOKE_VRTX	(1<<2)
+#define LINE_STRIP_PROVOKE_VRTX_MASK	(3<<6)
+#define LINE_STRIP_PROVOKE_VRTX(x)	((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK	(3<<3)
+#define TRI_FAN_PROVOKE_VRTX(x) 	((x)<<3)
+#define TRI_STRIP_PROVOKE_VRTX_MASK	(3<<0)
+#define TRI_STRIP_PROVOKE_VRTX(x)	(x)
+
+/* _3DSTATE_SCISSOR_ENABLE, p200 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD	(CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT		((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT		(1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD	(CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x)		(x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x)		(x)
+
+/* _3DSTATE_STENCIL_TEST, p202 */
+#define _3DSTATE_STENCIL_TEST_CMD	(CMD_3D|(0x09<<24))
+#define ENABLE_STENCIL_PARMS		(1<<23)
+#define STENCIL_OPS_MASK		(0xffc000)
+#define STENCIL_FAIL_OP(x)		((x)<<20)
+#define STENCIL_PASS_DEPTH_FAIL_OP(x)	((x)<<17)
+#define STENCIL_PASS_DEPTH_PASS_OP(x)	((x)<<14)
+
+#define ENABLE_STENCIL_TEST_FUNC_MASK	((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9))
+#define ENABLE_STENCIL_TEST_FUNC	(1<<13)
+/* Uses COMPAREFUNC */
+#define STENCIL_TEST_FUNC(x)		((x)<<9)
+#define STENCIL_REF_VALUE_MASK		((1<<8)|0xff)
+#define ENABLE_STENCIL_REF_VALUE	(1<<8)
+#define STENCIL_REF_VALUE(x)		(x)
+
+/* _3DSTATE_VERTEX_FORMAT, p204 */
+#define _3DSTATE_VFT0_CMD	(CMD_3D|(0x05<<24))
+#define VFT0_POINT_WIDTH	(1<<12)
+#define VFT0_TEX_COUNT_MASK    	(7<<8)
+#define VFT0_TEX_COUNT_SHIFT    8
+#define VFT0_TEX_COUNT(x) 	((x)<<8)
+#define VFT0_SPEC		(1<<7)
+#define VFT0_DIFFUSE		(1<<6)
+#define VFT0_DEPTH_OFFSET  	(1<<5)
+#define VFT0_XYZ		(1<<1)
+#define VFT0_XYZW		(2<<1)
+#define VFT0_XY			(3<<1)
+#define VFT0_XYW		(4<<1)
+#define VFT0_XYZW_MASK          (7<<1)
+
+/* _3DSTATE_VERTEX_FORMAT_2, p206 */
+#define _3DSTATE_VFT1_CMD	(CMD_3D|(0x0a<<24))
+#define VFT1_TEX7_FMT(x)	((x)<<14)
+#define VFT1_TEX6_FMT(x)	((x)<<12)
+#define VFT1_TEX5_FMT(x)	((x)<<10)
+#define VFT1_TEX4_FMT(x)	((x)<<8)
+#define VFT1_TEX3_FMT(x)	((x)<<6)
+#define VFT1_TEX2_FMT(x)	((x)<<4)
+#define VFT1_TEX1_FMT(x)	((x)<<2)
+#define VFT1_TEX0_FMT(x)	(x)
+#define VFT1_TEX0_MASK          3
+#define VFT1_TEX1_SHIFT         2
+
+/*New stuff picked up along the way */
+
+#define MLC_LOD_BIAS_MASK ((1<<7)-1)
+
+
+/* _3DSTATE_VERTEX_TRANSFORM, p207 */
+#define _3DSTATE_VERTEX_TRANS_CMD	(CMD_3D|(0x1d<<24)|(0x8b<<16)|0)
+#define _3DSTATE_VERTEX_TRANS_MTX_CMD	(CMD_3D|(0x1d<<24)|(0x8b<<16)|6)
+/* Dword 1 */
+#define ENABLE_VIEWPORT_TRANSFORM	((1<<31)|(1<<30))
+#define DISABLE_VIEWPORT_TRANSFORM	(1<<31)
+#define ENABLE_PERSP_DIVIDE		((1<<29)|(1<<28))
+#define DISABLE_PERSP_DIVIDE		(1<<29)
+#define VRTX_TRANS_LOAD_MATRICES	0x7421
+#define VRTX_TRANS_NO_LOAD_MATRICES	0x0000
+/* Dword 2 -> 7  are matrix elements */
+
+/* _3DSTATE_W_STATE, p209 */
+#define _3DSTATE_W_STATE_CMD		(CMD_3D|(0x1d<<24)|(0x8d<<16)|1)
+/* Dword 1 */
+#define MAGIC_W_STATE_DWORD1		0x00000008
+/* Dword 2 */
+#define WFAR_VALUE(x)			(x)
+
+
+/* Stipple command, carried over from the i810, apparently:
+ */
+#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+
+
+
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_2      ((0x3<<29)|(0x1d<<24)|(0x03<<16))
+#define LOAD_TEXTURE_MAP0                   (1<<11)
+#define LOAD_GLOBAL_COLOR_FACTOR            (1<<6)
+
+#define TM0S0_ADDRESS_MASK              0xfffffffc
+#define TM0S0_USE_FENCE                 (1<<1)
+
+#define TM0S1_HEIGHT_SHIFT              21
+#define TM0S1_WIDTH_SHIFT               10
+#define TM0S1_PALETTE_SELECT            (1<<9)
+#define TM0S1_MAPSURF_FORMAT_MASK       (0x7 << 6)
+#define TM0S1_MAPSURF_FORMAT_SHIFT      6
+#define    MAPSURF_8BIT_INDEXED		   (0<<6)
+#define    MAPSURF_8BIT		 	   (1<<6)
+#define    MAPSURF_16BIT		   (2<<6)
+#define    MAPSURF_32BIT		   (3<<6)
+#define    MAPSURF_411			   (4<<6)
+#define    MAPSURF_422			   (5<<6)
+#define    MAPSURF_COMPRESSED		   (6<<6)
+#define    MAPSURF_4BIT_INDEXED		   (7<<6)
+#define TM0S1_MT_FORMAT_MASK         (0x7 << 3)
+#define TM0S1_MT_FORMAT_SHIFT        3
+#define    MT_4BIT_IDX_ARGB8888	           (7<<3)       /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_IDX_RGB565	           (0<<3)       /* SURFACE_8BIT_INDEXED */
+#define    MT_8BIT_IDX_ARGB1555	           (1<<3)
+#define    MT_8BIT_IDX_ARGB4444	           (2<<3)
+#define    MT_8BIT_IDX_AY88		   (3<<3)
+#define    MT_8BIT_IDX_ABGR8888	           (4<<3)
+#define    MT_8BIT_IDX_BUMP_88DVDU 	   (5<<3)
+#define    MT_8BIT_IDX_BUMP_655LDVDU	   (6<<3)
+#define    MT_8BIT_IDX_ARGB8888	           (7<<3)
+#define    MT_8BIT_I8		           (0<<3)       /* SURFACE_8BIT */
+#define    MT_8BIT_L8		           (1<<3)
+#define    MT_16BIT_RGB565 		   (0<<3)       /* SURFACE_16BIT */
+#define    MT_16BIT_ARGB1555		   (1<<3)
+#define    MT_16BIT_ARGB4444		   (2<<3)
+#define    MT_16BIT_AY88		   (3<<3)
+#define    MT_16BIT_DIB_ARGB1555_8888      (4<<3)
+#define    MT_16BIT_BUMP_88DVDU	           (5<<3)
+#define    MT_16BIT_BUMP_655LDVDU	   (6<<3)
+#define    MT_16BIT_DIB_RGB565_8888	   (7<<3)
+#define    MT_32BIT_ARGB8888		   (0<<3)       /* SURFACE_32BIT */
+#define    MT_32BIT_ABGR8888		   (1<<3)
+#define    MT_32BIT_XRGB8888		   (2<<3)       /* XXX: Guess from i915_reg.h */
+#define    MT_32BIT_BUMP_XLDVDU_8888	   (6<<3)
+#define    MT_32BIT_DIB_8888		   (7<<3)
+#define    MT_411_YUV411		   (0<<3)       /* SURFACE_411 */
+#define    MT_422_YCRCB_SWAPY	           (0<<3)       /* SURFACE_422 */
+#define    MT_422_YCRCB_NORMAL	           (1<<3)
+#define    MT_422_YCRCB_SWAPUV	           (2<<3)
+#define    MT_422_YCRCB_SWAPUVY	           (3<<3)
+#define    MT_COMPRESS_DXT1		   (0<<3)       /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT2_3	           (1<<3)
+#define    MT_COMPRESS_DXT4_5	           (2<<3)
+#define    MT_COMPRESS_FXT1		   (3<<3)
+#define TM0S1_COLORSPACE_CONVERSION     (1 << 2)
+#define TM0S1_TILED_SURFACE             (1 << 1)
+#define TM0S1_TILE_WALK                 (1 << 0)
+
+#define TM0S2_PITCH_SHIFT               21
+#define TM0S2_CUBE_FACE_ENA_SHIFT       15
+#define TM0S2_CUBE_FACE_ENA_MASK        (1<<15)
+#define TM0S2_MAP_FORMAT                (1<<14)
+#define TM0S2_VERTICAL_LINE_STRIDE      (1<<13)
+#define TM0S2_VERITCAL_LINE_STRIDE_OFF  (1<<12)
+#define TM0S2_OUTPUT_CHAN_SHIFT         10
+#define TM0S2_OUTPUT_CHAN_MASK          (3<<10)
+
+#define TM0S3_MIP_FILTER_MASK           (0x3<<30)
+#define TM0S3_MIP_FILTER_SHIFT          30
+#define MIPFILTER_NONE		0
+#define MIPFILTER_NEAREST	1
+#define MIPFILTER_LINEAR	3
+#define TM0S3_MAG_FILTER_MASK           (0x3<<28)
+#define TM0S3_MAG_FILTER_SHIFT          28
+#define TM0S3_MIN_FILTER_MASK           (0x3<<26)
+#define TM0S3_MIN_FILTER_SHIFT          26
+#define FILTER_NEAREST		0
+#define FILTER_LINEAR		1
+#define FILTER_ANISOTROPIC	2
+
+#define TM0S3_LOD_BIAS_SHIFT		17
+#define TM0S3_LOD_BIAS_MASK		(0x1ff<<17)
+#define TM0S3_MAX_MIP_SHIFT		9
+#define TM0S3_MAX_MIP_MASK		(0xff<<9)
+#define TM0S3_MIN_MIP_SHIFT		3
+#define TM0S3_MIN_MIP_MASK		(0x3f<<3)
+#define TM0S3_KILL_PIXEL		(1<<2)
+#define TM0S3_KEYED_FILTER		(1<<1)
+#define TM0S3_CHROMA_KEY		(1<<0)
+
+
+/* _3DSTATE_MAP_TEXEL_STREAM, p188 */
+#define _3DSTATE_MAP_TEX_STREAM_CMD	(CMD_3D|(0x1c<<24)|(0x05<<19))
+#define DISABLE_TEX_STREAM_BUMP 	(1<<12)
+#define ENABLE_TEX_STREAM_BUMP		((1<<12)|(1<<11))
+#define TEX_MODIFY_UNIT_0		0
+#define TEX_MODIFY_UNIT_1		(1<<8)
+#define ENABLE_TEX_STREAM_COORD_SET	(1<<7)
+#define TEX_STREAM_COORD_SET(x) 	((x)<<4)
+#define ENABLE_TEX_STREAM_MAP_IDX	(1<<3)
+#define TEX_STREAM_MAP_IDX(x)		(x)
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c
new file mode 100644
index 0000000000..38e524e183
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i830_state.c
@@ -0,0 +1,1134 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/dd.h"
+
+#include "texmem.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_buffers.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+#define FILE_DEBUG_FLAG DEBUG_STATE
+
+static void
+i830StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
+                        GLuint mask)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int test = intel_translate_compare_func(func);
+
+   mask = mask & 0xff;
+
+   DBG("%s : func: %s, ref : 0x%x, mask: 0x%x\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(func), ref, mask);
+
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_TEST_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
+                                           STENCIL_TEST_MASK(mask));
+   i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_REF_VALUE_MASK |
+                                                ENABLE_STENCIL_TEST_FUNC_MASK);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_REF_VALUE |
+                                               ENABLE_STENCIL_TEST_FUNC |
+                                               STENCIL_REF_VALUE(ref) |
+                                               STENCIL_TEST_FUNC(test));
+}
+
+static void
+i830StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
+{
+   struct i830_context *i830 = i830_context(ctx);
+
+   DBG("%s : mask 0x%x\n", __FUNCTION__, mask);
+   
+   mask = mask & 0xff;
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~MODE4_ENABLE_STENCIL_WRITE_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= (ENABLE_STENCIL_WRITE_MASK |
+                                           STENCIL_WRITE_MASK(mask));
+}
+
+static void
+i830StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail,
+                      GLenum zpass)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int fop, dfop, dpop;
+
+   DBG("%s: fail : %s, zfail: %s, zpass : %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(fail),
+       _mesa_lookup_enum_by_nr(zfail), 
+       _mesa_lookup_enum_by_nr(zpass));
+
+   fop = 0;
+   dfop = 0;
+   dpop = 0;
+
+   switch (fail) {
+   case GL_KEEP:
+      fop = STENCILOP_KEEP;
+      break;
+   case GL_ZERO:
+      fop = STENCILOP_ZERO;
+      break;
+   case GL_REPLACE:
+      fop = STENCILOP_REPLACE;
+      break;
+   case GL_INCR:
+      fop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR:
+      fop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      fop = STENCILOP_INCR;
+      break;
+   case GL_DECR_WRAP:
+      fop = STENCILOP_DECR;
+      break;
+   case GL_INVERT:
+      fop = STENCILOP_INVERT;
+      break;
+   default:
+      break;
+   }
+   switch (zfail) {
+   case GL_KEEP:
+      dfop = STENCILOP_KEEP;
+      break;
+   case GL_ZERO:
+      dfop = STENCILOP_ZERO;
+      break;
+   case GL_REPLACE:
+      dfop = STENCILOP_REPLACE;
+      break;
+   case GL_INCR:
+      dfop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR:
+      dfop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      dfop = STENCILOP_INCR;
+      break;
+   case GL_DECR_WRAP:
+      dfop = STENCILOP_DECR;
+      break;
+   case GL_INVERT:
+      dfop = STENCILOP_INVERT;
+      break;
+   default:
+      break;
+   }
+   switch (zpass) {
+   case GL_KEEP:
+      dpop = STENCILOP_KEEP;
+      break;
+   case GL_ZERO:
+      dpop = STENCILOP_ZERO;
+      break;
+   case GL_REPLACE:
+      dpop = STENCILOP_REPLACE;
+      break;
+   case GL_INCR:
+      dpop = STENCILOP_INCRSAT;
+      break;
+   case GL_DECR:
+      dpop = STENCILOP_DECRSAT;
+      break;
+   case GL_INCR_WRAP:
+      dpop = STENCILOP_INCR;
+      break;
+   case GL_DECR_WRAP:
+      dpop = STENCILOP_DECR;
+      break;
+   case GL_INVERT:
+      dpop = STENCILOP_INVERT;
+      break;
+   default:
+      break;
+   }
+
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] &= ~(STENCIL_OPS_MASK);
+   i830->state.Ctx[I830_CTXREG_STENCILTST] |= (ENABLE_STENCIL_PARMS |
+                                               STENCIL_FAIL_OP(fop) |
+                                               STENCIL_PASS_DEPTH_FAIL_OP
+                                               (dfop) |
+                                               STENCIL_PASS_DEPTH_PASS_OP
+                                               (dpop));
+}
+
+static void
+i830AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int test = intel_translate_compare_func(func);
+   GLubyte refByte;
+   GLuint refInt;
+
+   UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+   refInt = (GLuint) refByte;
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE2] &= ~ALPHA_TEST_REF_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE2] |= (ENABLE_ALPHA_TEST_FUNC |
+                                           ENABLE_ALPHA_REF_VALUE |
+                                           ALPHA_TEST_FUNC(test) |
+                                           ALPHA_REF_VALUE(refInt));
+}
+
+/**
+ * Makes sure that the proper enables are set for LogicOp, Independant Alpha
+ * Blend, and Blending.  It needs to be called from numerous places where we
+ * could change the LogicOp or Independant Alpha Blend without subsequent
+ * calls to glEnable.
+ * 
+ * \todo
+ * This function is substantially different from the old i830-specific driver.
+ * I'm not sure which is correct.
+ */
+static void
+i830EvalLogicOpBlendState(GLcontext * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+   if (RGBA_LOGICOP_ENABLED(ctx)) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+                                                  ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
+                                                 ENABLE_LOGIC_OP);
+   }
+   else if (ctx->Color.BlendEnabled) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+                                                  ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (ENABLE_COLOR_BLEND |
+                                                 DISABLE_LOGIC_OP);
+   }
+   else {
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND |
+                                                  ENABLE_LOGIC_OP_MASK);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND |
+                                                 DISABLE_LOGIC_OP);
+   }
+}
+
+static void
+i830BlendColor(GLcontext * ctx, const GLfloat color[4])
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLubyte r, g, b, a;
+
+   DBG("%s\n", __FUNCTION__);
+   
+   UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] =
+      (a << 24) | (r << 16) | (g << 8) | b;
+}
+
+/**
+ * Sets both the blend equation (called "function" in i830 docs) and the
+ * blend function (called "factor" in i830 docs).  This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ */
+static void
+i830_set_blend_state(GLcontext * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int funcA;
+   int funcRGB;
+   int eqnA;
+   int eqnRGB;
+   int iab;
+   int s1;
+
+
+   funcRGB =
+      SRC_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcRGB))
+      | DST_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstRGB));
+
+   switch (ctx->Color.BlendEquationRGB) {
+   case GL_FUNC_ADD:
+      eqnRGB = BLENDFUNC_ADD;
+      break;
+   case GL_MIN:
+      eqnRGB = BLENDFUNC_MIN;
+      funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_MAX:
+      eqnRGB = BLENDFUNC_MAX;
+      funcRGB = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_FUNC_SUBTRACT:
+      eqnRGB = BLENDFUNC_SUB;
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnRGB = BLENDFUNC_RVRSE_SUB;
+      break;
+   default:
+      fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+              __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+      return;
+   }
+
+
+   funcA = SRC_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcA))
+      | DST_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstA));
+
+   switch (ctx->Color.BlendEquationA) {
+   case GL_FUNC_ADD:
+      eqnA = BLENDFUNC_ADD;
+      break;
+   case GL_MIN:
+      eqnA = BLENDFUNC_MIN;
+      funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_MAX:
+      eqnA = BLENDFUNC_MAX;
+      funcA = SRC_BLND_FACT(BLENDFACT_ONE) | DST_BLND_FACT(BLENDFACT_ONE);
+      break;
+   case GL_FUNC_SUBTRACT:
+      eqnA = BLENDFUNC_SUB;
+      break;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnA = BLENDFUNC_RVRSE_SUB;
+      break;
+   default:
+      fprintf(stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n",
+              __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+      return;
+   }
+
+   iab = eqnA | funcA
+      | _3DSTATE_INDPT_ALPHA_BLEND_CMD
+      | ENABLE_SRC_ABLEND_FACTOR | ENABLE_DST_ABLEND_FACTOR
+      | ENABLE_ALPHA_BLENDFUNC;
+   s1 = eqnRGB | funcRGB
+      | _3DSTATE_MODES_1_CMD
+      | ENABLE_SRC_BLND_FACTOR | ENABLE_DST_BLND_FACTOR
+      | ENABLE_COLR_BLND_FUNC;
+
+   if ((eqnA | funcA) != (eqnRGB | funcRGB))
+      iab |= ENABLE_INDPT_ALPHA_BLEND;
+   else
+      iab |= DISABLE_INDPT_ALPHA_BLEND;
+
+   if (iab != i830->state.Ctx[I830_CTXREG_IALPHAB] ||
+       s1 != i830->state.Ctx[I830_CTXREG_STATE1]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_IALPHAB] = iab;
+      i830->state.Ctx[I830_CTXREG_STATE1] = s1;
+   }
+
+   /* This will catch a logicop blend equation.  It will also ensure
+    * independant alpha blend is really in the correct state (either enabled
+    * or disabled) if blending is already enabled.
+    */
+
+   i830EvalLogicOpBlendState(ctx);
+
+   if (0) {
+      fprintf(stderr,
+              "[%s:%u] STATE1: 0x%08x IALPHAB: 0x%08x blend is %sabled\n",
+              __FUNCTION__, __LINE__, i830->state.Ctx[I830_CTXREG_STATE1],
+              i830->state.Ctx[I830_CTXREG_IALPHAB],
+              (ctx->Color.BlendEnabled) ? "en" : "dis");
+   }
+}
+
+
+static void
+i830BlendEquationSeparate(GLcontext * ctx, GLenum modeRGB, GLenum modeA)
+{
+   DBG("%s -> %s, %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(modeRGB),
+       _mesa_lookup_enum_by_nr(modeA));
+
+   (void) modeRGB;
+   (void) modeA;
+   i830_set_blend_state(ctx);
+}
+
+
+static void
+i830BlendFuncSeparate(GLcontext * ctx, GLenum sfactorRGB,
+                      GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA)
+{
+   DBG("%s -> RGB(%s, %s) A(%s, %s)\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(sfactorRGB),
+       _mesa_lookup_enum_by_nr(dfactorRGB),
+       _mesa_lookup_enum_by_nr(sfactorA),
+       _mesa_lookup_enum_by_nr(dfactorA));
+
+   (void) sfactorRGB;
+   (void) dfactorRGB;
+   (void) sfactorA;
+   (void) dfactorA;
+   i830_set_blend_state(ctx);
+}
+
+
+
+static void
+i830DepthFunc(GLcontext * ctx, GLenum func)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int test = intel_translate_compare_func(func);
+
+   DBG("%s\n", __FUNCTION__);
+   
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~DEPTH_TEST_FUNC_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE3] |= (ENABLE_DEPTH_TEST_FUNC |
+                                           DEPTH_TEST_FUNC(test));
+}
+
+static void
+i830DepthMask(GLcontext * ctx, GLboolean flag)
+{
+   struct i830_context *i830 = i830_context(ctx);
+
+   DBG("%s flag (%d)\n", __FUNCTION__, flag);
+   
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+   i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DIS_DEPTH_WRITE_MASK;
+
+   if (flag && ctx->Depth.Test)
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DEPTH_WRITE;
+   else
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DEPTH_WRITE;
+}
+
+/** Called from ctx->Driver.Viewport() */
+static void
+i830Viewport(GLcontext * ctx,
+              GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   intelCalcViewport(ctx);
+}
+
+
+/** Called from ctx->Driver.DepthRange() */
+static void
+i830DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
+{
+   intelCalcViewport(ctx);
+}
+
+/* =============================================================
+ * Polygon stipple
+ *
+ * The i830 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ */
+static void
+i830PolygonStipple(GLcontext * ctx, const GLubyte * mask)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   const GLubyte *m = mask;
+   GLubyte p[4];
+   int i, j, k;
+   int active = (ctx->Polygon.StippleFlag &&
+                 i830->intel.reduced_primitive == GL_TRIANGLES);
+   GLuint newMask;
+
+   if (active) {
+      I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+      i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+   }
+
+   p[0] = mask[12] & 0xf;
+   p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf;
+   p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf;
+   p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf;
+   p[3] |= p[3] << 4;
+
+   for (k = 0; k < 8; k++)
+      for (j = 3; j >= 0; j--)
+         for (i = 0; i < 4; i++, m++)
+            if (*m != p[j]) {
+               i830->intel.hw_stipple = 0;
+               return;
+            }
+
+   newMask = (((p[0] & 0xf) << 0) |
+              ((p[1] & 0xf) << 4) |
+              ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12));
+
+
+   if (newMask == 0xffff || newMask == 0x0) {
+      /* this is needed to make conform pass */
+      i830->intel.hw_stipple = 0;
+      return;
+   }
+
+   i830->state.Stipple[I830_STPREG_ST1] &= ~0xffff;
+   i830->state.Stipple[I830_STPREG_ST1] |= newMask;
+   i830->intel.hw_stipple = 1;
+
+   if (active)
+      i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
+}
+
+
+/* =============================================================
+ * Hardware clipping
+ */
+static void
+i830Scissor(GLcontext * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int x1, y1, x2, y2;
+
+   if (!ctx->DrawBuffer)
+      return;
+
+   DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
+
+   if (ctx->DrawBuffer->Name == 0) {
+      x1 = x;
+      y1 = ctx->DrawBuffer->Height - (y + h);
+      x2 = x + w - 1;
+      y2 = y1 + h - 1;
+      DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   else {
+      /* FBO - not inverted
+       */
+      x1 = x;
+      y1 = y;
+      x2 = x + w - 1;
+      y2 = y + h - 1;
+      DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+
+   x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
+   y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
+   x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
+   y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+   
+   DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+   i830->state.Buffer[I830_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
+   i830->state.Buffer[I830_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
+}
+
+static void
+i830LogicOp(GLcontext * ctx, GLenum opcode)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int tmp = intel_translate_logic_op(opcode);
+
+   DBG("%s\n", __FUNCTION__);
+   
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE4] &= ~LOGICOP_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
+}
+
+
+
+static void
+i830CullFaceFrontFace(GLcontext * ctx, GLenum unused)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLuint mode;
+
+   DBG("%s\n", __FUNCTION__);
+   
+   if (!ctx->Polygon.CullFlag) {
+      mode = CULLMODE_NONE;
+   }
+   else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
+      mode = CULLMODE_CW;
+
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+         mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+      if (ctx->Polygon.FrontFace != GL_CCW)
+         mode ^= (CULLMODE_CW ^ CULLMODE_CCW);
+   }
+   else {
+      mode = CULLMODE_BOTH;
+   }
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~CULLMODE_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE3] |= ENABLE_CULL_MODE | mode;
+}
+
+static void
+i830LineWidth(GLcontext * ctx, GLfloat widthf)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   int width;
+   int state5;
+
+   DBG("%s\n", __FUNCTION__);
+   
+   width = (int) (widthf * 2);
+   width = CLAMP(width, 1, 15);
+
+   state5 = i830->state.Ctx[I830_CTXREG_STATE5] & ~FIXED_LINE_WIDTH_MASK;
+   state5 |= (ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(width));
+
+   if (state5 != i830->state.Ctx[I830_CTXREG_STATE5]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_STATE5] = state5;
+   }
+}
+
+static void
+i830PointSize(GLcontext * ctx, GLfloat size)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLint point_size = (int) size;
+
+   DBG("%s\n", __FUNCTION__);
+   
+   point_size = CLAMP(point_size, 1, 256);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_STATE5] &= ~FIXED_POINT_WIDTH_MASK;
+   i830->state.Ctx[I830_CTXREG_STATE5] |= (ENABLE_FIXED_POINT_WIDTH |
+                                           FIXED_POINT_WIDTH(point_size));
+}
+
+
+/* =============================================================
+ * Color masks
+ */
+
+static void
+i830ColorMask(GLcontext * ctx,
+              GLboolean r, GLboolean g, GLboolean b, GLboolean a)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   GLuint tmp = 0;
+
+   DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
+
+   tmp = ((i830->state.Ctx[I830_CTXREG_ENABLES_2] & ~WRITEMASK_MASK) |
+          ENABLE_COLOR_MASK |
+          ENABLE_COLOR_WRITE |
+          ((!r) << WRITEMASK_RED_SHIFT) |
+          ((!g) << WRITEMASK_GREEN_SHIFT) |
+          ((!b) << WRITEMASK_BLUE_SHIFT) | ((!a) << WRITEMASK_ALPHA_SHIFT));
+
+   if (tmp != i830->state.Ctx[I830_CTXREG_ENABLES_2]) {
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = tmp;
+   }
+}
+
+static void
+update_specular(GLcontext * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+   i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_SPEC_ADD_MASK;
+
+   if (NEED_SECONDARY_COLOR(ctx))
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_SPEC_ADD;
+   else
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_SPEC_ADD;
+}
+
+static void
+i830LightModelfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+   DBG("%s\n", __FUNCTION__);
+   
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      update_specular(ctx);
+   }
+}
+
+/* In Mesa 3.5 we can reliably do native flatshading.
+ */
+static void
+i830ShadeModel(GLcontext * ctx, GLenum mode)
+{
+   struct i830_context *i830 = i830_context(ctx);
+   I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+
+#define SHADE_MODE_MASK ((1<<10)|(1<<8)|(1<<6)|(1<<4))
+
+   i830->state.Ctx[I830_CTXREG_STATE3] &= ~SHADE_MODE_MASK;
+
+   if (mode == GL_FLAT) {
+      i830->state.Ctx[I830_CTXREG_STATE3] |=
+         (ALPHA_SHADE_MODE(SHADE_MODE_FLAT) | FOG_SHADE_MODE(SHADE_MODE_FLAT)
+          | SPEC_SHADE_MODE(SHADE_MODE_FLAT) |
+          COLOR_SHADE_MODE(SHADE_MODE_FLAT));
+   }
+   else {
+      i830->state.Ctx[I830_CTXREG_STATE3] |=
+         (ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) |
+          FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+          SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+          COLOR_SHADE_MODE(SHADE_MODE_LINEAR));
+   }
+}
+
+/* =============================================================
+ * Fog
+ */
+static void
+i830Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+   struct i830_context *i830 = i830_context(ctx);
+
+   DBG("%s\n", __FUNCTION__);
+   
+   if (pname == GL_FOG_COLOR) {
+      GLuint color = (((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) |
+                      ((GLubyte) (ctx->Fog.Color[1] * 255.0F) << 8) |
+                      ((GLubyte) (ctx->Fog.Color[2] * 255.0F) << 0));
+
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_FOGCOLOR] =
+         (_3DSTATE_FOG_COLOR_CMD | color);
+   }
+}
+
+/* =============================================================
+ */
+
+static void
+i830Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+   struct i830_context *i830 = i830_context(ctx);
+
+   switch (cap) {
+   case GL_LIGHTING:
+   case GL_COLOR_SUM:
+      update_specular(ctx);
+      break;
+
+   case GL_ALPHA_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_ALPHA_TEST_MASK;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_ALPHA_TEST;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_ALPHA_TEST;
+
+      break;
+
+   case GL_BLEND:
+      i830EvalLogicOpBlendState(ctx);
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      i830EvalLogicOpBlendState(ctx);
+
+      /* Logicop doesn't seem to work at 16bpp:
+       */
+      if (i830->intel.ctx.Visual.rgbBits == 16)
+         FALLBACK(&i830->intel, I830_FALLBACK_LOGICOP, state);
+      break;
+
+   case GL_DITHER:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] &= ~ENABLE_DITHER;
+
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_DITHER;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_2] |= DISABLE_DITHER;
+      break;
+
+   case GL_DEPTH_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_DEPTH_TEST_MASK;
+
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_DEPTH_TEST;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_DEPTH_TEST;
+
+      /* Also turn off depth writes when GL_DEPTH_TEST is disabled:
+       */
+      i830DepthMask(ctx, ctx->Depth.Mask);
+      break;
+
+   case GL_SCISSOR_TEST:
+      I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+
+      if (state)
+         i830->state.Buffer[I830_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT);
+      else
+         i830->state.Buffer[I830_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+
+      break;
+
+   case GL_LINE_SMOOTH:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+      i830->state.Ctx[I830_CTXREG_AA] &= ~AA_LINE_ENABLE;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_ENABLE;
+      else
+         i830->state.Ctx[I830_CTXREG_AA] |= AA_LINE_DISABLE;
+      break;
+
+   case GL_FOG:
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+      i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_DIS_FOG_MASK;
+      if (state)
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_FOG;
+      else
+         i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_FOG;
+      break;
+
+   case GL_CULL_FACE:
+      i830CullFaceFrontFace(ctx, 0);
+      break;
+
+   case GL_TEXTURE_2D:
+      break;
+
+   case GL_STENCIL_TEST:
+      {
+         GLboolean hw_stencil = GL_FALSE;
+         if (ctx->DrawBuffer) {
+            struct intel_renderbuffer *irbStencil
+               = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+            hw_stencil = (irbStencil && irbStencil->region);
+         }
+         if (hw_stencil) {
+            I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+            if (state) {
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] |= ENABLE_STENCIL_WRITE;
+            }
+            else {
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] &=
+                  ~ENABLE_STENCIL_WRITE;
+               i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_STENCIL_TEST;
+               i830->state.Ctx[I830_CTXREG_ENABLES_2] |=
+                  DISABLE_STENCIL_WRITE;
+            }
+         }
+         else {
+            FALLBACK(&i830->intel, I830_FALLBACK_STENCIL, state);
+         }
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      /* The stipple command worked on my 855GM box, but not my 845G.
+       * I'll do more testing later to find out exactly which hardware
+       * supports it.  Disabled for now.
+       */
+      if (i830->intel.hw_stipple &&
+          i830->intel.reduced_primitive == GL_TRIANGLES) {
+         I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+         i830->state.Stipple[I830_STPREG_ST1] &= ~ST1_ENABLE;
+         if (state)
+            i830->state.Stipple[I830_STPREG_ST1] |= ST1_ENABLE;
+      }
+      break;
+
+   default:
+      ;
+   }
+}
+
+
+static void
+i830_init_packets(struct i830_context *i830)
+{
+   /* Zero all state */
+   memset(&i830->state, 0, sizeof(i830->state));
+
+   /* Set default blend state */
+   i830->state.TexBlend[0][0] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
+                                 TEXPIPE_COLOR |
+                                 ENABLE_TEXOUTPUT_WRT_SEL |
+                                 TEXOP_OUTPUT_CURRENT |
+                                 DISABLE_TEX_CNTRL_STAGE |
+                                 TEXOP_SCALE_1X |
+                                 TEXOP_MODIFY_PARMS |
+                                 TEXOP_LAST_STAGE | TEXBLENDOP_ARG1);
+   i830->state.TexBlend[0][1] = (_3DSTATE_MAP_BLEND_OP_CMD(0) |
+                                 TEXPIPE_ALPHA |
+                                 ENABLE_TEXOUTPUT_WRT_SEL |
+                                 TEXOP_OUTPUT_CURRENT |
+                                 TEXOP_SCALE_1X |
+                                 TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+   i830->state.TexBlend[0][2] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+                                 TEXPIPE_COLOR |
+                                 TEXBLEND_ARG1 |
+                                 TEXBLENDARG_MODIFY_PARMS |
+                                 TEXBLENDARG_DIFFUSE);
+   i830->state.TexBlend[0][3] = (_3DSTATE_MAP_BLEND_ARG_CMD(0) |
+                                 TEXPIPE_ALPHA |
+                                 TEXBLEND_ARG1 |
+                                 TEXBLENDARG_MODIFY_PARMS |
+                                 TEXBLENDARG_DIFFUSE);
+
+   i830->state.TexBlendWordsUsed[0] = 4;
+
+
+   i830->state.Ctx[I830_CTXREG_VF] = 0;
+   i830->state.Ctx[I830_CTXREG_VF2] = 0;
+
+   i830->state.Ctx[I830_CTXREG_AA] = (_3DSTATE_AA_CMD |
+                                      AA_LINE_ECAAR_WIDTH_ENABLE |
+                                      AA_LINE_ECAAR_WIDTH_1_0 |
+                                      AA_LINE_REGION_WIDTH_ENABLE |
+                                      AA_LINE_REGION_WIDTH_1_0 |
+                                      AA_LINE_DISABLE);
+
+   i830->state.Ctx[I830_CTXREG_ENABLES_1] = (_3DSTATE_ENABLES_1_CMD |
+                                             DISABLE_LOGIC_OP |
+                                             DISABLE_STENCIL_TEST |
+                                             DISABLE_DEPTH_BIAS |
+                                             DISABLE_SPEC_ADD |
+                                             DISABLE_FOG |
+                                             DISABLE_ALPHA_TEST |
+                                             DISABLE_COLOR_BLEND |
+                                             DISABLE_DEPTH_TEST);
+
+#if 000                         /* XXX all the stencil enable state is set in i830Enable(), right? */
+   if (i830->intel.hw_stencil) {
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
+                                                ENABLE_STENCIL_WRITE |
+                                                ENABLE_TEX_CACHE |
+                                                ENABLE_DITHER |
+                                                ENABLE_COLOR_MASK |
+                                                /* set no color comps disabled */
+                                                ENABLE_COLOR_WRITE |
+                                                ENABLE_DEPTH_WRITE);
+   }
+   else
+#endif
+   {
+      i830->state.Ctx[I830_CTXREG_ENABLES_2] = (_3DSTATE_ENABLES_2_CMD |
+                                                DISABLE_STENCIL_WRITE |
+                                                ENABLE_TEX_CACHE |
+                                                ENABLE_DITHER |
+                                                ENABLE_COLOR_MASK |
+                                                /* set no color comps disabled */
+                                                ENABLE_COLOR_WRITE |
+                                                ENABLE_DEPTH_WRITE);
+   }
+
+   i830->state.Ctx[I830_CTXREG_STATE1] = (_3DSTATE_MODES_1_CMD |
+                                          ENABLE_COLR_BLND_FUNC |
+                                          BLENDFUNC_ADD |
+                                          ENABLE_SRC_BLND_FACTOR |
+                                          SRC_BLND_FACT(BLENDFACT_ONE) |
+                                          ENABLE_DST_BLND_FACTOR |
+                                          DST_BLND_FACT(BLENDFACT_ZERO));
+
+   i830->state.Ctx[I830_CTXREG_STATE2] = (_3DSTATE_MODES_2_CMD |
+                                          ENABLE_GLOBAL_DEPTH_BIAS |
+                                          GLOBAL_DEPTH_BIAS(0) |
+                                          ENABLE_ALPHA_TEST_FUNC |
+                                          ALPHA_TEST_FUNC(COMPAREFUNC_ALWAYS)
+                                          | ALPHA_REF_VALUE(0));
+
+   i830->state.Ctx[I830_CTXREG_STATE3] = (_3DSTATE_MODES_3_CMD |
+                                          ENABLE_DEPTH_TEST_FUNC |
+                                          DEPTH_TEST_FUNC(COMPAREFUNC_LESS) |
+                                          ENABLE_ALPHA_SHADE_MODE |
+                                          ALPHA_SHADE_MODE(SHADE_MODE_LINEAR)
+                                          | ENABLE_FOG_SHADE_MODE |
+                                          FOG_SHADE_MODE(SHADE_MODE_LINEAR) |
+                                          ENABLE_SPEC_SHADE_MODE |
+                                          SPEC_SHADE_MODE(SHADE_MODE_LINEAR) |
+                                          ENABLE_COLOR_SHADE_MODE |
+                                          COLOR_SHADE_MODE(SHADE_MODE_LINEAR)
+                                          | ENABLE_CULL_MODE | CULLMODE_NONE);
+
+   i830->state.Ctx[I830_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
+                                          ENABLE_LOGIC_OP_FUNC |
+                                          LOGIC_OP_FUNC(LOGICOP_COPY) |
+                                          ENABLE_STENCIL_TEST_MASK |
+                                          STENCIL_TEST_MASK(0xff) |
+                                          ENABLE_STENCIL_WRITE_MASK |
+                                          STENCIL_WRITE_MASK(0xff));
+
+   i830->state.Ctx[I830_CTXREG_STENCILTST] = (_3DSTATE_STENCIL_TEST_CMD |
+                                              ENABLE_STENCIL_PARMS |
+                                              STENCIL_FAIL_OP(STENCILOP_KEEP)
+                                              |
+                                              STENCIL_PASS_DEPTH_FAIL_OP
+                                              (STENCILOP_KEEP) |
+                                              STENCIL_PASS_DEPTH_PASS_OP
+                                              (STENCILOP_KEEP) |
+                                              ENABLE_STENCIL_TEST_FUNC |
+                                              STENCIL_TEST_FUNC
+                                              (COMPAREFUNC_ALWAYS) |
+                                              ENABLE_STENCIL_REF_VALUE |
+                                              STENCIL_REF_VALUE(0));
+
+   i830->state.Ctx[I830_CTXREG_STATE5] = (_3DSTATE_MODES_5_CMD | FLUSH_TEXTURE_CACHE | ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) |       /* 1.0 */
+                                          ENABLE_FIXED_POINT_WIDTH |
+                                          FIXED_POINT_WIDTH(1));
+
+   i830->state.Ctx[I830_CTXREG_IALPHAB] = (_3DSTATE_INDPT_ALPHA_BLEND_CMD |
+                                           DISABLE_INDPT_ALPHA_BLEND |
+                                           ENABLE_ALPHA_BLENDFUNC |
+                                           ABLENDFUNC_ADD);
+
+   i830->state.Ctx[I830_CTXREG_FOGCOLOR] = (_3DSTATE_FOG_COLOR_CMD |
+                                            FOG_COLOR_RED(0) |
+                                            FOG_COLOR_GREEN(0) |
+                                            FOG_COLOR_BLUE(0));
+
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD;
+   i830->state.Ctx[I830_CTXREG_BLENDCOLOR1] = 0;
+
+   i830->state.Ctx[I830_CTXREG_MCSB0] = _3DSTATE_MAP_COORD_SETBIND_CMD;
+   i830->state.Ctx[I830_CTXREG_MCSB1] = (TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) |
+                                         TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) |
+                                         TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) |
+                                         TEXBIND_SET0(TEXCOORDSRC_VTXSET_0));
+
+   i830->state.RasterRules[I830_RASTER_RULES] = (_3DSTATE_RASTER_RULES_CMD |
+						 ENABLE_POINT_RASTER_RULE |
+						 OGL_POINT_RASTER_RULE |
+						 ENABLE_LINE_STRIP_PROVOKE_VRTX |
+						 ENABLE_TRI_FAN_PROVOKE_VRTX |
+						 ENABLE_TRI_STRIP_PROVOKE_VRTX |
+						 LINE_STRIP_PROVOKE_VRTX(1) |
+						 TRI_FAN_PROVOKE_VRTX(2) |
+						 TRI_STRIP_PROVOKE_VRTX(2));
+
+
+   i830->state.Stipple[I830_STPREG_ST0] = _3DSTATE_STIPPLE;
+
+   i830->state.Buffer[I830_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
+   i830->state.Buffer[I830_DESTREG_SENABLE] = (_3DSTATE_SCISSOR_ENABLE_CMD |
+                                               DISABLE_SCISSOR_RECT);
+   i830->state.Buffer[I830_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
+   i830->state.Buffer[I830_DESTREG_SR1] = 0;
+   i830->state.Buffer[I830_DESTREG_SR2] = 0;
+}
+
+void
+i830_update_provoking_vertex(GLcontext * ctx)
+{
+   struct i830_context *i830 = i830_context(ctx);
+
+   I830_STATECHANGE(i830, I830_UPLOAD_RASTER_RULES);
+   i830->state.RasterRules[I830_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+						   TRI_FAN_PROVOKE_VRTX_MASK |
+						   TRI_STRIP_PROVOKE_VRTX_MASK);
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+      i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+						     TRI_FAN_PROVOKE_VRTX(2) |
+						     TRI_STRIP_PROVOKE_VRTX(2));
+   } else {
+      i830->state.RasterRules[I830_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+						     TRI_FAN_PROVOKE_VRTX(1) |
+						     TRI_STRIP_PROVOKE_VRTX(0));
+    }
+}
+
+void
+i830InitStateFuncs(struct dd_function_table *functions)
+{
+   functions->AlphaFunc = i830AlphaFunc;
+   functions->BlendColor = i830BlendColor;
+   functions->BlendEquationSeparate = i830BlendEquationSeparate;
+   functions->BlendFuncSeparate = i830BlendFuncSeparate;
+   functions->ColorMask = i830ColorMask;
+   functions->CullFace = i830CullFaceFrontFace;
+   functions->DepthFunc = i830DepthFunc;
+   functions->DepthMask = i830DepthMask;
+   functions->Enable = i830Enable;
+   functions->Fogfv = i830Fogfv;
+   functions->FrontFace = i830CullFaceFrontFace;
+   functions->LightModelfv = i830LightModelfv;
+   functions->LineWidth = i830LineWidth;
+   functions->LogicOpcode = i830LogicOp;
+   functions->PointSize = i830PointSize;
+   functions->PolygonStipple = i830PolygonStipple;
+   functions->Scissor = i830Scissor;
+   functions->ShadeModel = i830ShadeModel;
+   functions->StencilFuncSeparate = i830StencilFuncSeparate;
+   functions->StencilMaskSeparate = i830StencilMaskSeparate;
+   functions->StencilOpSeparate = i830StencilOpSeparate;
+   functions->DepthRange = i830DepthRange;
+   functions->Viewport = i830Viewport;
+}
+
+void
+i830InitState(struct i830_context *i830)
+{
+   GLcontext *ctx = &i830->intel.ctx;
+
+   i830_init_packets(i830);
+
+   _mesa_init_driver_state(ctx);
+
+   i830->state.emitted = 0;
+   i830->state.active = (I830_UPLOAD_INVARIENT |
+                         I830_UPLOAD_RASTER_RULES |
+                         I830_UPLOAD_TEXBLEND(0) |
+                         I830_UPLOAD_STIPPLE |
+                         I830_UPLOAD_CTX | I830_UPLOAD_BUFFERS);
+}
diff --git a/src/mesa/drivers/dri/i915/i830_texblend.c b/src/mesa/drivers/dri/i915/i830_texblend.c
new file mode 100644
index 0000000000..3f64be8c96
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i830_texblend.c
@@ -0,0 +1,460 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/texstore.h"
+#include "main/mm.h"
+
+#include "intel_screen.h"
+#include "intel_tex.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+
+/* ================================================================
+ * Texture combine functions
+ */
+static GLuint
+pass_through(GLuint * state, GLuint blendUnit)
+{
+   state[0] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+               TEXPIPE_COLOR |
+               ENABLE_TEXOUTPUT_WRT_SEL |
+               TEXOP_OUTPUT_CURRENT |
+               DISABLE_TEX_CNTRL_STAGE |
+               TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+   state[1] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+               TEXPIPE_ALPHA |
+               ENABLE_TEXOUTPUT_WRT_SEL |
+               TEXOP_OUTPUT_CURRENT |
+               TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1);
+   state[2] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+               TEXPIPE_COLOR |
+               TEXBLEND_ARG1 |
+               TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT);
+   state[3] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+               TEXPIPE_ALPHA |
+               TEXBLEND_ARG1 |
+               TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_CURRENT);
+
+   return 4;
+}
+
+static GLuint
+emit_factor(GLuint blendUnit, GLuint * state, GLuint count,
+            const GLfloat * factor)
+{
+   GLubyte r, g, b, a;
+   GLuint col;
+
+   if (0)
+      fprintf(stderr, "emit constant %d: %.2f %.2f %.2f %.2f\n",
+              blendUnit, factor[0], factor[1], factor[2], factor[3]);
+
+   UNCLAMPED_FLOAT_TO_UBYTE(r, factor[0]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, factor[1]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, factor[2]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, factor[3]);
+
+   col = ((a << 24) | (r << 16) | (g << 8) | b);
+
+   state[count++] = _3DSTATE_COLOR_FACTOR_N_CMD(blendUnit);
+   state[count++] = col;
+
+   return count;
+}
+
+
+static INLINE GLuint
+GetTexelOp(GLint unit)
+{
+   switch (unit) {
+   case 0:
+      return TEXBLENDARG_TEXEL0;
+   case 1:
+      return TEXBLENDARG_TEXEL1;
+   case 2:
+      return TEXBLENDARG_TEXEL2;
+   case 3:
+      return TEXBLENDARG_TEXEL3;
+   default:
+      return TEXBLENDARG_TEXEL0;
+   }
+}
+
+
+/**
+ * Calculate the hardware instuctions to setup the current texture enviromnemt
+ * settings.  Since \c gl_texture_unit::_CurrentCombine is used, both
+ * "classic" texture enviroments and GL_ARB_texture_env_combine type texture
+ * environments are treated identically.
+ *
+ * \todo
+ * This function should return \c GLboolean.  When \c GL_FALSE is returned,
+ * it means that an environment is selected that the hardware cannot do.  This
+ * is the way the Radeon and R200 drivers work.
+ * 
+ * \todo
+ * Looking at i830_3d_regs.h, it seems the i830 can do part of
+ * GL_ATI_texture_env_combine3.  It can handle using \c GL_ONE and
+ * \c GL_ZERO as combine inputs (which the code already supports).  It can
+ * also handle the \c GL_MODULATE_ADD_ATI mode.  Is it worth investigating
+ * partial support for the extension?
+ */
+GLuint
+i830SetTexEnvCombine(struct i830_context * i830,
+                     const struct gl_tex_env_combine_state * combine,
+                     GLint blendUnit,
+                     GLuint texel_op, GLuint * state, const GLfloat * factor)
+{
+   const GLuint numColorArgs = combine->_NumArgsRGB;
+   const GLuint numAlphaArgs = combine->_NumArgsA;
+
+   GLuint blendop;
+   GLuint ablendop;
+   GLuint args_RGB[3];
+   GLuint args_A[3];
+   GLuint rgb_shift;
+   GLuint alpha_shift;
+   GLboolean need_factor = 0;
+   int i;
+   unsigned used;
+   static const GLuint tex_blend_rgb[3] = {
+      TEXPIPE_COLOR | TEXBLEND_ARG1 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_COLOR | TEXBLEND_ARG2 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_COLOR | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS,
+   };
+   static const GLuint tex_blend_a[3] = {
+      TEXPIPE_ALPHA | TEXBLEND_ARG1 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_ALPHA | TEXBLEND_ARG2 | TEXBLENDARG_MODIFY_PARMS,
+      TEXPIPE_ALPHA | TEXBLEND_ARG0 | TEXBLENDARG_MODIFY_PARMS,
+   };
+
+   if (INTEL_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+
+   /* The EXT version of the DOT3 extension does not support the
+    * scale factor, but the ARB version (and the version in OpenGL
+    * 1.3) does.
+    */
+   switch (combine->ModeRGB) {
+   case GL_DOT3_RGB_EXT:
+      alpha_shift = combine->ScaleShiftA;
+      rgb_shift = 0;
+      break;
+
+   case GL_DOT3_RGBA_EXT:
+      alpha_shift = 0;
+      rgb_shift = 0;
+      break;
+
+   default:
+      rgb_shift = combine->ScaleShiftRGB;
+      alpha_shift = combine->ScaleShiftA;
+      break;
+   }
+
+
+   switch (combine->ModeRGB) {
+   case GL_REPLACE:
+      blendop = TEXBLENDOP_ARG1;
+      break;
+   case GL_MODULATE:
+      blendop = TEXBLENDOP_MODULATE;
+      break;
+   case GL_ADD:
+      blendop = TEXBLENDOP_ADD;
+      break;
+   case GL_ADD_SIGNED:
+      blendop = TEXBLENDOP_ADDSIGNED;
+      break;
+   case GL_INTERPOLATE:
+      blendop = TEXBLENDOP_BLEND;
+      break;
+   case GL_SUBTRACT:
+      blendop = TEXBLENDOP_SUBTRACT;
+      break;
+   case GL_DOT3_RGB_EXT:
+   case GL_DOT3_RGB:
+      blendop = TEXBLENDOP_DOT3;
+      break;
+   case GL_DOT3_RGBA_EXT:
+   case GL_DOT3_RGBA:
+      blendop = TEXBLENDOP_DOT3;
+      break;
+   default:
+      return pass_through(state, blendUnit);
+   }
+
+   blendop |= (rgb_shift << TEXOP_SCALE_SHIFT);
+
+
+   /* Handle RGB args */
+   for (i = 0; i < 3; i++) {
+      switch (combine->SourceRGB[i]) {
+      case GL_TEXTURE:
+         args_RGB[i] = texel_op;
+         break;
+      case GL_TEXTURE0:
+      case GL_TEXTURE1:
+      case GL_TEXTURE2:
+      case GL_TEXTURE3:
+         args_RGB[i] = GetTexelOp(combine->SourceRGB[i] - GL_TEXTURE0);
+         break;
+      case GL_CONSTANT:
+         args_RGB[i] = TEXBLENDARG_FACTOR_N;
+         need_factor = 1;
+         break;
+      case GL_PRIMARY_COLOR:
+         args_RGB[i] = TEXBLENDARG_DIFFUSE;
+         break;
+      case GL_PREVIOUS:
+         args_RGB[i] = TEXBLENDARG_CURRENT;
+         break;
+      default:
+         return pass_through(state, blendUnit);
+      }
+
+      switch (combine->OperandRGB[i]) {
+      case GL_SRC_COLOR:
+         args_RGB[i] |= 0;
+         break;
+      case GL_ONE_MINUS_SRC_COLOR:
+         args_RGB[i] |= TEXBLENDARG_INV_ARG;
+         break;
+      case GL_SRC_ALPHA:
+         args_RGB[i] |= TEXBLENDARG_REPLICATE_ALPHA;
+         break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+         args_RGB[i] |= (TEXBLENDARG_REPLICATE_ALPHA | TEXBLENDARG_INV_ARG);
+         break;
+      default:
+         return pass_through(state, blendUnit);
+      }
+   }
+
+
+   /* Need to knobble the alpha calculations of TEXBLENDOP_DOT4 to
+    * match the spec.  Can't use DOT3 as it won't propogate values
+    * into alpha as required:
+    *
+    * Note - the global factor is set up with alpha == .5, so 
+    * the alpha part of the DOT4 calculation should be zero.
+    */
+   if (combine->ModeRGB == GL_DOT3_RGBA_EXT ||
+       combine->ModeRGB == GL_DOT3_RGBA) {
+      ablendop = TEXBLENDOP_DOT4;
+      args_A[0] = TEXBLENDARG_FACTOR;   /* the global factor */
+      args_A[1] = TEXBLENDARG_FACTOR;
+      args_A[2] = TEXBLENDARG_FACTOR;
+   }
+   else {
+      switch (combine->ModeA) {
+      case GL_REPLACE:
+         ablendop = TEXBLENDOP_ARG1;
+         break;
+      case GL_MODULATE:
+         ablendop = TEXBLENDOP_MODULATE;
+         break;
+      case GL_ADD:
+         ablendop = TEXBLENDOP_ADD;
+         break;
+      case GL_ADD_SIGNED:
+         ablendop = TEXBLENDOP_ADDSIGNED;
+         break;
+      case GL_INTERPOLATE:
+         ablendop = TEXBLENDOP_BLEND;
+         break;
+      case GL_SUBTRACT:
+         ablendop = TEXBLENDOP_SUBTRACT;
+         break;
+      default:
+         return pass_through(state, blendUnit);
+      }
+
+
+      ablendop |= (alpha_shift << TEXOP_SCALE_SHIFT);
+
+      /* Handle A args */
+      for (i = 0; i < 3; i++) {
+         switch (combine->SourceA[i]) {
+         case GL_TEXTURE:
+            args_A[i] = texel_op;
+            break;
+         case GL_TEXTURE0:
+         case GL_TEXTURE1:
+         case GL_TEXTURE2:
+         case GL_TEXTURE3:
+            args_A[i] = GetTexelOp(combine->SourceA[i] - GL_TEXTURE0);
+            break;
+         case GL_CONSTANT:
+            args_A[i] = TEXBLENDARG_FACTOR_N;
+            need_factor = 1;
+            break;
+         case GL_PRIMARY_COLOR:
+            args_A[i] = TEXBLENDARG_DIFFUSE;
+            break;
+         case GL_PREVIOUS:
+            args_A[i] = TEXBLENDARG_CURRENT;
+            break;
+         default:
+            return pass_through(state, blendUnit);
+         }
+
+         switch (combine->OperandA[i]) {
+         case GL_SRC_ALPHA:
+            args_A[i] |= 0;
+            break;
+         case GL_ONE_MINUS_SRC_ALPHA:
+            args_A[i] |= TEXBLENDARG_INV_ARG;
+            break;
+         default:
+            return pass_through(state, blendUnit);
+         }
+      }
+   }
+
+
+
+   /* Native Arg1 == Arg0 in GL_EXT_texture_env_combine spec */
+   /* Native Arg2 == Arg1 in GL_EXT_texture_env_combine spec */
+   /* Native Arg0 == Arg2 in GL_EXT_texture_env_combine spec */
+
+   /* When we render we need to figure out which is the last really enabled
+    * tex unit, and put last stage on it
+    */
+
+
+   /* Build color & alpha pipelines */
+
+   used = 0;
+   state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+                    TEXPIPE_COLOR |
+                    ENABLE_TEXOUTPUT_WRT_SEL |
+                    TEXOP_OUTPUT_CURRENT |
+                    DISABLE_TEX_CNTRL_STAGE | TEXOP_MODIFY_PARMS | blendop);
+   state[used++] = (_3DSTATE_MAP_BLEND_OP_CMD(blendUnit) |
+                    TEXPIPE_ALPHA |
+                    ENABLE_TEXOUTPUT_WRT_SEL |
+                    TEXOP_OUTPUT_CURRENT | TEXOP_MODIFY_PARMS | ablendop);
+
+   for (i = 0; i < numColorArgs; i++) {
+      state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+                       tex_blend_rgb[i] | args_RGB[i]);
+   }
+
+   for (i = 0; i < numAlphaArgs; i++) {
+      state[used++] = (_3DSTATE_MAP_BLEND_ARG_CMD(blendUnit) |
+                       tex_blend_a[i] | args_A[i]);
+   }
+
+
+   if (need_factor)
+      return emit_factor(blendUnit, state, used, factor);
+   else
+      return used;
+}
+
+
+static void
+emit_texblend(struct i830_context *i830, GLuint unit, GLuint blendUnit,
+              GLboolean last_stage)
+{
+   struct gl_texture_unit *texUnit = &i830->intel.ctx.Texture.Unit[unit];
+   GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
+
+
+   if (0)
+      fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
+
+   /* Update i830->state.TexBlend
+    */
+   tmp_sz = i830SetTexEnvCombine(i830, texUnit->_CurrentCombine, blendUnit,
+                                 GetTexelOp(unit), tmp, texUnit->EnvColor);
+
+   if (last_stage)
+      tmp[0] |= TEXOP_LAST_STAGE;
+
+   if (tmp_sz != i830->state.TexBlendWordsUsed[blendUnit] ||
+       memcmp(tmp, i830->state.TexBlend[blendUnit],
+              tmp_sz * sizeof(GLuint))) {
+
+      I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(blendUnit));
+      memcpy(i830->state.TexBlend[blendUnit], tmp, tmp_sz * sizeof(GLuint));
+      i830->state.TexBlendWordsUsed[blendUnit] = tmp_sz;
+   }
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(blendUnit), GL_TRUE);
+}
+
+static void
+emit_passthrough(struct i830_context *i830)
+{
+   GLuint tmp[I830_TEXBLEND_SIZE], tmp_sz;
+   GLuint unit = 0;
+
+   tmp_sz = pass_through(tmp, unit);
+   tmp[0] |= TEXOP_LAST_STAGE;
+
+   if (tmp_sz != i830->state.TexBlendWordsUsed[unit] ||
+       memcmp(tmp, i830->state.TexBlend[unit], tmp_sz * sizeof(GLuint))) {
+
+      I830_STATECHANGE(i830, I830_UPLOAD_TEXBLEND(unit));
+      memcpy(i830->state.TexBlend[unit], tmp, tmp_sz * sizeof(GLuint));
+      i830->state.TexBlendWordsUsed[unit] = tmp_sz;
+   }
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND(unit), GL_TRUE);
+}
+
+void
+i830EmitTextureBlend(struct i830_context *i830)
+{
+   GLcontext *ctx = &i830->intel.ctx;
+   GLuint unit, last_stage = 0, blendunit = 0;
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEXBLEND_ALL, GL_FALSE);
+
+   if (ctx->Texture._EnabledUnits) {
+      for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++)
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            last_stage = unit;
+
+      for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++)
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            emit_texblend(i830, unit, blendunit++, last_stage == unit);
+   }
+   else {
+      emit_passthrough(i830);
+   }
+}
diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c
new file mode 100644
index 0000000000..ace44430d9
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i830_texstate.c
@@ -0,0 +1,346 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+
+#include "i830_context.h"
+#include "i830_reg.h"
+
+
+
+static GLuint
+translate_texture_format(GLuint mesa_format, GLuint internal_format)
+{
+   switch (mesa_format) {
+   case MESA_FORMAT_L8:
+      return MAPSURF_8BIT | MT_8BIT_L8;
+   case MESA_FORMAT_I8:
+      return MAPSURF_8BIT | MT_8BIT_I8;
+   case MESA_FORMAT_A8:
+      return MAPSURF_8BIT | MT_8BIT_I8; /* Kludge! */
+   case MESA_FORMAT_AL88:
+      return MAPSURF_16BIT | MT_16BIT_AY88;
+   case MESA_FORMAT_RGB565:
+      return MAPSURF_16BIT | MT_16BIT_RGB565;
+   case MESA_FORMAT_ARGB1555:
+      return MAPSURF_16BIT | MT_16BIT_ARGB1555;
+   case MESA_FORMAT_ARGB4444:
+      return MAPSURF_16BIT | MT_16BIT_ARGB4444;
+   case MESA_FORMAT_ARGB8888:
+      return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+   case MESA_FORMAT_XRGB8888:
+      return MAPSURF_32BIT | MT_32BIT_XRGB8888;
+   case MESA_FORMAT_YCBCR_REV:
+      return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
+   case MESA_FORMAT_YCBCR:
+      return (MAPSURF_422 | MT_422_YCRCB_SWAPY);
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGB_DXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
+   case MESA_FORMAT_RGBA_DXT3:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
+   case MESA_FORMAT_RGBA_DXT5:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
+   default:
+      fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format);
+      abort();
+      return 0;
+   }
+}
+
+
+
+
+/* The i915 (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint
+translate_wrap_mode(GLenum wrap)
+{
+   switch (wrap) {
+   case GL_REPEAT:
+      return TEXCOORDMODE_WRAP;
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+      return TEXCOORDMODE_CLAMP;        /* not really correct */
+   case GL_CLAMP_TO_BORDER:
+      return TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
+   default:
+      return TEXCOORDMODE_WRAP;
+   }
+}
+
+
+/* Recalculate all state from scratch.  Perhaps not the most
+ * efficient, but this has gotten complex enough that we need
+ * something which is understandable and reliable.
+ */
+static GLboolean
+i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
+{
+   GLcontext *ctx = &intel->ctx;
+   struct i830_context *i830 = i830_context(ctx);
+   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = tUnit->_Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage;
+   GLuint *state = i830->state.Tex[unit], format, pitch;
+   GLint lodbias;
+   GLubyte border[4];
+   GLuint dst_x, dst_y;
+
+   memset(state, 0, sizeof(state));
+
+   /*We need to refcount these. */
+
+   if (i830->state.tex_buffer[unit] != NULL) {
+       drm_intel_bo_unreference(i830->state.tex_buffer[unit]);
+       i830->state.tex_buffer[unit] = NULL;
+   }
+
+   if (!intel_finalize_mipmap_tree(intel, unit))
+      return GL_FALSE;
+
+   /* Get first image here, since intelObj->firstLevel will get set in
+    * the intel_finalize_mipmap_tree() call above.
+    */
+   firstImage = tObj->Image[0][intelObj->firstLevel];
+
+   intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0,
+				  &dst_x, &dst_y);
+
+   drm_intel_bo_reference(intelObj->mt->region->buffer);
+   i830->state.tex_buffer[unit] = intelObj->mt->region->buffer;
+   pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;
+
+   /* XXX: This calculation is probably broken for tiled images with
+    * a non-page-aligned offset.
+    */
+   i830->state.tex_offset[unit] = dst_x * intelObj->mt->cpp + dst_y * pitch;
+
+   format = translate_texture_format(firstImage->TexFormat,
+				     firstImage->InternalFormat);
+
+   state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+                               (LOAD_TEXTURE_MAP0 << unit) | 4);
+
+   state[I830_TEXREG_TM0S1] =
+      (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) |
+       ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format);
+
+   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
+      state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE;
+      if (intelObj->mt->region->tiling == I915_TILING_Y)
+	 state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK;
+   }
+
+   state[I830_TEXREG_TM0S2] =
+      ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK);
+
+   {
+      if (tObj->Target == GL_TEXTURE_CUBE_MAP)
+         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit) |
+                                    CUBE_NEGX_ENABLE |
+                                    CUBE_POSX_ENABLE |
+                                    CUBE_NEGY_ENABLE |
+                                    CUBE_POSY_ENABLE |
+                                    CUBE_NEGZ_ENABLE | CUBE_POSZ_ENABLE);
+      else
+         state[I830_TEXREG_CUBE] = (_3DSTATE_MAP_CUBE | MAP_UNIT(unit));
+   }
+
+
+
+
+   {
+      GLuint minFilt, mipFilt, magFilt;
+
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      default:
+         return GL_FALSE;
+      }
+
+      if (tObj->MaxAnisotropy > 1.0) {
+         minFilt = FILTER_ANISOTROPIC;
+         magFilt = FILTER_ANISOTROPIC;
+      }
+      else {
+         switch (tObj->MagFilter) {
+         case GL_NEAREST:
+            magFilt = FILTER_NEAREST;
+            break;
+         case GL_LINEAR:
+            magFilt = FILTER_LINEAR;
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
+
+      lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0);
+      if (lodbias < -64)
+          lodbias = -64;
+      if (lodbias > 63)
+          lodbias = 63;
+      
+      state[I830_TEXREG_TM0S3] = ((lodbias << TM0S3_LOD_BIAS_SHIFT) & 
+                                  TM0S3_LOD_BIAS_MASK);
+#if 0
+      /* YUV conversion:
+       */
+      if (firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR ||
+          firstImage->TexFormat->MesaFormat == MESA_FORMAT_YCBCR_REV)
+         state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION;
+#endif
+
+      state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel -
+                                    intelObj->firstLevel) *
+                                   4) << TM0S3_MIN_MIP_SHIFT;
+
+      state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) |
+                                   (mipFilt << TM0S3_MIP_FILTER_SHIFT) |
+                                   (magFilt << TM0S3_MAG_FILTER_SHIFT));
+   }
+
+   {
+      GLenum ws = tObj->WrapS;
+      GLenum wt = tObj->WrapT;
+
+
+      /* 3D textures not available on i830
+       */
+      if (tObj->Target == GL_TEXTURE_3D)
+         return GL_FALSE;
+
+      state[I830_TEXREG_MCS] = (_3DSTATE_MAP_COORD_SET_CMD |
+                                MAP_UNIT(unit) |
+                                ENABLE_TEXCOORD_PARAMS |
+                                ss3 |
+                                ENABLE_ADDR_V_CNTL |
+                                TEXCOORD_ADDR_V_MODE(translate_wrap_mode(wt))
+                                | ENABLE_ADDR_U_CNTL |
+                                TEXCOORD_ADDR_U_MODE(translate_wrap_mode
+                                                     (ws)));
+   }
+
+   /* convert border color from float to ubyte */
+   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]);
+   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]);
+   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]);
+   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]);
+
+   state[I830_TEXREG_TM0S4] = PACK_COLOR_8888(border[3],
+					      border[0],
+					      border[1],
+					      border[2]);
+
+   I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(unit), GL_TRUE);
+   /* memcmp was already disabled, but definitely won't work as the
+    * region might now change and that wouldn't be detected:
+    */
+   I830_STATECHANGE(i830, I830_UPLOAD_TEX(unit));
+   return GL_TRUE;
+}
+
+
+
+
+void
+i830UpdateTextureState(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   GLboolean ok = GL_TRUE;
+   GLuint i;
+
+   for (i = 0; i < I830_TEX_UNITS && ok; i++) {
+      switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) {
+      case TEXTURE_1D_BIT:
+      case TEXTURE_2D_BIT:
+      case TEXTURE_CUBE_BIT:
+         ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_NORMAL);
+         break;
+      case TEXTURE_RECT_BIT:
+         ok = i830_update_tex_unit(intel, i, TEXCOORDS_ARE_IN_TEXELUNITS);
+         break;
+      case 0:{
+	 struct i830_context *i830 = i830_context(&intel->ctx);
+         if (i830->state.active & I830_UPLOAD_TEX(i)) 
+            I830_ACTIVESTATE(i830, I830_UPLOAD_TEX(i), GL_FALSE);
+
+	 if (i830->state.tex_buffer[i] != NULL) {
+	    drm_intel_bo_unreference(i830->state.tex_buffer[i]);
+	    i830->state.tex_buffer[i] = NULL;
+	 }
+         break;
+      }
+      case TEXTURE_3D_BIT:
+      default:
+         ok = GL_FALSE;
+         break;
+      }
+   }
+
+   FALLBACK(intel, I830_FALLBACK_TEXTURE, !ok);
+
+   if (ok)
+      i830EmitTextureBlend(i830);
+}
diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c
new file mode 100644
index 0000000000..0775d7fe94
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i830_vtbl.c
@@ -0,0 +1,730 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i830_context.h"
+#include "i830_reg.h"
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "intel_tris.h"
+#include "intel_fbo.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+
+#define FILE_DEBUG_FLAG DEBUG_STATE
+
+static GLboolean i830_check_vertex_size(struct intel_context *intel,
+                                        GLuint expected);
+
+#define SZ_TO_HW(sz)  ((sz-2)&0x3)
+#define EMIT_SZ(sz)   (EMIT_1F + (sz) - 1)
+#define EMIT_ATTR( ATTR, STYLE, V0 )					\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = (ATTR);	\
+   intel->vertex_attrs[intel->vertex_attr_count].format = (STYLE);	\
+   intel->vertex_attr_count++;						\
+   v0 |= V0;								\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = 0;		\
+   intel->vertex_attrs[intel->vertex_attr_count].format = EMIT_PAD;	\
+   intel->vertex_attrs[intel->vertex_attr_count].offset = (N);		\
+   intel->vertex_attr_count++;						\
+} while (0)
+
+
+#define VRTX_TEX_SET_FMT(n, x)          ((x)<<((n)*2))
+#define TEXBIND_SET(n, x) 		((x)<<((n)*4))
+
+static void
+i830_render_prevalidate(struct intel_context *intel)
+{
+}
+
+static void
+i830_render_start(struct intel_context *intel)
+{
+   GLcontext *ctx = &intel->ctx;
+   struct i830_context *i830 = i830_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint v0 = _3DSTATE_VFT0_CMD;
+   GLuint v2 = _3DSTATE_VFT1_CMD;
+   GLuint mcsb1 = 0;
+
+   RENDERINPUTS_COPY(index_bitset, tnl->render_inputs_bitset);
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   intel->vertex_attr_count = 0;
+
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if (RENDERINPUTS_TEST_RANGE(index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX)) {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VFT0_XYZW);
+      intel->coloroffset = 4;
+   }
+   else {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, VFT0_XYZ);
+      intel->coloroffset = 3;
+   }
+
+   if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_POINTSIZE)) {
+      EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, VFT0_POINT_WIDTH);
+   }
+
+   EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VFT0_DIFFUSE);
+
+   intel->specoffset = 0;
+   if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR1) ||
+       RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) {
+      if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR1)) {
+         intel->specoffset = intel->coloroffset + 1;
+         EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VFT0_SPEC);
+      }
+      else
+         EMIT_PAD(3);
+
+      if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG))
+         EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F, VFT0_SPEC);
+      else
+         EMIT_PAD(1);
+   }
+
+   if (RENDERINPUTS_TEST_RANGE(index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX)) {
+      int i, count = 0;
+
+      for (i = 0; i < I830_TEX_UNITS; i++) {
+         if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_TEX(i))) {
+            GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+            GLuint emit;
+            GLuint mcs = (i830->state.Tex[i][I830_TEXREG_MCS] &
+                          ~TEXCOORDTYPE_MASK);
+
+            switch (sz) {
+            case 1:
+            case 2:
+               emit = EMIT_2F;
+               sz = 2;
+               mcs |= TEXCOORDTYPE_CARTESIAN;
+               break;
+            case 3:
+               emit = EMIT_3F;
+               sz = 3;
+               mcs |= TEXCOORDTYPE_VECTOR;
+               break;
+            case 4:
+               emit = EMIT_3F_XYW;
+               sz = 3;
+               mcs |= TEXCOORDTYPE_HOMOGENEOUS;
+               break;
+            default:
+               continue;
+            };
+
+
+            EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, emit, 0);
+            v2 |= VRTX_TEX_SET_FMT(count, SZ_TO_HW(sz));
+            mcsb1 |= (count + 8) << (i * 4);
+
+            if (mcs != i830->state.Tex[i][I830_TEXREG_MCS]) {
+               I830_STATECHANGE(i830, I830_UPLOAD_TEX(i));
+               i830->state.Tex[i][I830_TEXREG_MCS] = mcs;
+            }
+
+            count++;
+         }
+      }
+
+      v0 |= VFT0_TEX_COUNT(count);
+   }
+
+   /* Only need to change the vertex emit code if there has been a
+    * statechange to a new hardware vertex format:
+    */
+   if (v0 != i830->state.Ctx[I830_CTXREG_VF] ||
+       v2 != i830->state.Ctx[I830_CTXREG_VF2] ||
+       mcsb1 != i830->state.Ctx[I830_CTXREG_MCSB1] ||
+       !RENDERINPUTS_EQUAL(index_bitset, i830->last_index_bitset)) {
+      int k;
+
+      I830_STATECHANGE(i830, I830_UPLOAD_CTX);
+
+      /* Must do this *after* statechange, so as not to affect
+       * buffered vertices reliant on the old state:
+       */
+      intel->vertex_size =
+         _tnl_install_attrs(ctx,
+                            intel->vertex_attrs,
+                            intel->vertex_attr_count,
+                            intel->ViewportMatrix.m, 0);
+
+      intel->vertex_size >>= 2;
+
+      i830->state.Ctx[I830_CTXREG_VF] = v0;
+      i830->state.Ctx[I830_CTXREG_VF2] = v2;
+      i830->state.Ctx[I830_CTXREG_MCSB1] = mcsb1;
+      RENDERINPUTS_COPY(i830->last_index_bitset, index_bitset);
+
+      k = i830_check_vertex_size(intel, intel->vertex_size);
+      assert(k);
+   }
+}
+
+static void
+i830_reduced_primitive_state(struct intel_context *intel, GLenum rprim)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   GLuint st1 = i830->state.Stipple[I830_STPREG_ST1];
+
+   st1 &= ~ST1_ENABLE;
+
+   switch (rprim) {
+   case GL_TRIANGLES:
+      if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple)
+         st1 |= ST1_ENABLE;
+      break;
+   case GL_LINES:
+   case GL_POINTS:
+   default:
+      break;
+   }
+
+   i830->intel.reduced_primitive = rprim;
+
+   if (st1 != i830->state.Stipple[I830_STPREG_ST1]) {
+      INTEL_FIREVERTICES(intel);
+
+      I830_STATECHANGE(i830, I830_UPLOAD_STIPPLE);
+      i830->state.Stipple[I830_STPREG_ST1] = st1;
+   }
+}
+
+/* Pull apart the vertex format registers and figure out how large a
+ * vertex is supposed to be. 
+ */
+static GLboolean
+i830_check_vertex_size(struct intel_context *intel, GLuint expected)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   int vft0 = i830->state.Ctx[I830_CTXREG_VF];
+   int vft1 = i830->state.Ctx[I830_CTXREG_VF2];
+   int nrtex = (vft0 & VFT0_TEX_COUNT_MASK) >> VFT0_TEX_COUNT_SHIFT;
+   int i, sz = 0;
+
+   switch (vft0 & VFT0_XYZW_MASK) {
+   case VFT0_XY:
+      sz = 2;
+      break;
+   case VFT0_XYZ:
+      sz = 3;
+      break;
+   case VFT0_XYW:
+      sz = 3;
+      break;
+   case VFT0_XYZW:
+      sz = 4;
+      break;
+   default:
+      fprintf(stderr, "no xyzw specified\n");
+      return 0;
+   }
+
+   if (vft0 & VFT0_SPEC)
+      sz++;
+   if (vft0 & VFT0_DIFFUSE)
+      sz++;
+   if (vft0 & VFT0_DEPTH_OFFSET)
+      sz++;
+   if (vft0 & VFT0_POINT_WIDTH)
+      sz++;
+
+   for (i = 0; i < nrtex; i++) {
+      switch (vft1 & VFT1_TEX0_MASK) {
+      case TEXCOORDFMT_2D:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_3D:
+         sz += 3;
+         break;
+      case TEXCOORDFMT_4D:
+         sz += 4;
+         break;
+      case TEXCOORDFMT_1D:
+         sz += 1;
+         break;
+      }
+      vft1 >>= VFT1_TEX1_SHIFT;
+   }
+
+   if (sz != expected)
+      fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
+
+   return sz == expected;
+}
+
+static void
+i830_emit_invarient_state(struct intel_context *intel)
+{
+   BATCH_LOCALS;
+
+   BEGIN_BATCH(29);
+
+   OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_FOG_MODE_CMD);
+   OUT_BATCH(FOGFUNC_ENABLE |
+             FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+
+
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(0) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(0) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(1) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(1) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(2) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(2) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2));
+   OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD |
+             MAP_UNIT(3) |
+             DISABLE_TEX_STREAM_BUMP |
+             ENABLE_TEX_STREAM_COORD_SET |
+             TEX_STREAM_COORD_SET(3) |
+             ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3));
+
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2));
+   OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM);
+   OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3));
+
+   OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM);
+   OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE);
+
+   OUT_BATCH(_3DSTATE_W_STATE_CMD);
+   OUT_BATCH(MAGIC_W_STATE_DWORD1);
+   OUT_BATCH(0x3f800000 /* 1.0 in IEEE float */ );
+
+
+   OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD);
+   OUT_BATCH(0x80808080);       /* .5 required in alpha for GL_DOT3_RGBA_EXT */
+
+   ADVANCE_BATCH();
+}
+
+
+#define emit( intel, state, size )			\
+   intel_batchbuffer_data(intel->batch, state, size )
+
+static GLuint
+get_dirty(struct i830_hw_state *state)
+{
+   return state->active & ~state->emitted;
+}
+
+static GLuint
+get_state_size(struct i830_hw_state *state)
+{
+   GLuint dirty = get_dirty(state);
+   GLuint sz = 0;
+   GLuint i;
+
+   if (dirty & I830_UPLOAD_INVARIENT)
+      sz += 40 * sizeof(int);
+
+   if (dirty & I830_UPLOAD_RASTER_RULES)
+      sz += sizeof(state->RasterRules);
+
+   if (dirty & I830_UPLOAD_CTX)
+      sz += sizeof(state->Ctx);
+
+   if (dirty & I830_UPLOAD_BUFFERS)
+      sz += sizeof(state->Buffer);
+
+   if (dirty & I830_UPLOAD_STIPPLE)
+      sz += sizeof(state->Stipple);
+
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if ((dirty & I830_UPLOAD_TEX(i)))
+         sz += sizeof(state->Tex[i]);
+
+      if (dirty & I830_UPLOAD_TEXBLEND(i))
+         sz += state->TexBlendWordsUsed[i] * 4;
+   }
+
+   return sz;
+}
+
+
+/* Push the state into the sarea and/or texture memory.
+ */
+static void
+i830_emit_state(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   struct i830_hw_state *state = &i830->state;
+   int i, count;
+   GLuint dirty;
+   drm_intel_bo *aper_array[3 + I830_TEX_UNITS];
+   int aper_count;
+   GET_CURRENT_CONTEXT(ctx);
+   BATCH_LOCALS;
+
+   /* We don't hold the lock at this point, so want to make sure that
+    * there won't be a buffer wrap between the state emits and the primitive
+    * emit header.
+    *
+    * It might be better to talk about explicit places where
+    * scheduling is allowed, rather than assume that it is whenever a
+    * batchbuffer fills up.
+    */
+   intel_batchbuffer_require_space(intel->batch,
+				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
+   count = 0;
+ again:
+   aper_count = 0;
+   dirty = get_dirty(state);
+
+   aper_array[aper_count++] = intel->batch->buf;
+   if (dirty & I830_UPLOAD_BUFFERS) {
+      aper_array[aper_count++] = state->draw_region->buffer;
+      if (state->depth_region)
+         aper_array[aper_count++] = state->depth_region->buffer;
+   }
+
+   for (i = 0; i < I830_TEX_UNITS; i++)
+     if (dirty & I830_UPLOAD_TEX(i)) {
+	if (state->tex_buffer[i]) {
+	   aper_array[aper_count++] = state->tex_buffer[i];
+	}
+     }
+
+   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
+       if (count == 0) {
+	   count++;
+	   intel_batchbuffer_flush(intel->batch);
+	   goto again;
+       } else {
+	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "i830 emit state");
+	   assert(0);
+       }
+   }
+
+
+   /* Do this here as we may have flushed the batchbuffer above,
+    * causing more state to be dirty!
+    */
+   dirty = get_dirty(state);
+   state->emitted |= dirty;
+   assert(get_dirty(state) == 0);
+
+   if (dirty & I830_UPLOAD_INVARIENT) {
+      DBG("I830_UPLOAD_INVARIENT:\n");
+      i830_emit_invarient_state(intel);
+   }
+
+   if (dirty & I830_UPLOAD_RASTER_RULES) {
+      DBG("I830_UPLOAD_RASTER_RULES:\n");
+      emit(intel, state->RasterRules, sizeof(state->RasterRules));
+   }
+
+   if (dirty & I830_UPLOAD_CTX) {
+      DBG("I830_UPLOAD_CTX:\n");
+      emit(intel, state->Ctx, sizeof(state->Ctx));
+
+   }
+
+   if (dirty & I830_UPLOAD_BUFFERS) {
+      GLuint count = 15;
+
+      DBG("I830_UPLOAD_BUFFERS:\n");
+
+      if (state->depth_region)
+          count += 3;
+
+      BEGIN_BATCH(count);
+      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_CBUFADDR1]);
+      OUT_RELOC(state->draw_region->buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+
+      if (state->depth_region) {
+         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR0]);
+         OUT_BATCH(state->Buffer[I830_DESTREG_DBUFADDR1]);
+         OUT_RELOC(state->depth_region->buffer,
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+      }
+
+      OUT_BATCH(state->Buffer[I830_DESTREG_DV0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DV1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SENABLE]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_SR2]);
+
+      assert(state->Buffer[I830_DESTREG_DRAWRECT0] != MI_NOOP);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT0]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT1]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT2]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT3]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT4]);
+      OUT_BATCH(state->Buffer[I830_DESTREG_DRAWRECT5]);
+      ADVANCE_BATCH();
+   }
+   
+   if (dirty & I830_UPLOAD_STIPPLE) {
+      DBG("I830_UPLOAD_STIPPLE:\n");
+      emit(intel, state->Stipple, sizeof(state->Stipple));
+   }
+
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if ((dirty & I830_UPLOAD_TEX(i))) {
+         DBG("I830_UPLOAD_TEX(%d):\n", i);
+
+         BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]);
+
+         if (state->tex_buffer[i]) {
+            OUT_RELOC(state->tex_buffer[i],
+		      I915_GEM_DOMAIN_SAMPLER, 0,
+                      state->tex_offset[i]);
+         }
+	 else {
+	    OUT_BATCH(state->tex_offset[i]);
+	 }
+
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S1]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S2]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S3]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S4]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_MCS]);
+         OUT_BATCH(state->Tex[i][I830_TEXREG_CUBE]);
+
+         ADVANCE_BATCH();
+      }
+
+      if (dirty & I830_UPLOAD_TEXBLEND(i)) {
+         DBG("I830_UPLOAD_TEXBLEND(%d): %d words\n", i,
+             state->TexBlendWordsUsed[i]);
+         emit(intel, state->TexBlend[i], state->TexBlendWordsUsed[i] * 4);
+      }
+   }
+
+   intel->batch->dirty_state &= ~dirty;
+   assert(get_dirty(state) == 0);
+   assert((intel->batch->dirty_state & (1<<1)) == 0);
+}
+
+static void
+i830_destroy_context(struct intel_context *intel)
+{
+   GLuint i;
+   struct i830_context *i830 = i830_context(&intel->ctx);
+
+   intel_region_release(&i830->state.draw_region);
+   intel_region_release(&i830->state.depth_region);
+
+   for (i = 0; i < I830_TEX_UNITS; i++) {
+      if (i830->state.tex_buffer[i] != NULL) {
+	 drm_intel_bo_unreference(i830->state.tex_buffer[i]);
+	 i830->state.tex_buffer[i] = NULL;
+      }
+   }
+
+   _tnl_free_vertices(&intel->ctx);
+}
+
+static void
+i830_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+		     GLuint num_regions)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   GLcontext *ctx = &intel->ctx;
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   GLuint value;
+   struct i830_hw_state *state = &i830->state;
+   uint32_t draw_x, draw_y;
+
+   if (state->draw_region != color_regions[0]) {
+      intel_region_release(&state->draw_region);
+      intel_region_reference(&state->draw_region, color_regions[0]);
+   }
+   if (state->depth_region != depth_region) {
+      intel_region_release(&state->depth_region);
+      intel_region_reference(&state->depth_region, depth_region);
+   }
+
+   /*
+    * Set stride/cpp values
+    */
+   i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0],
+				color_regions[0], BUF_3D_ID_COLOR_BACK);
+
+   i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0],
+				depth_region, BUF_3D_ID_DEPTH);
+
+   /*
+    * Compute/set I830_DESTREG_DV1 value
+    */
+   value = (DSTORG_HORT_BIAS(0x8) |     /* .5 */
+            DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z);    /* .5 */
+
+   if (irb != NULL) {
+      switch (irb->Base.Format) {
+      case MESA_FORMAT_ARGB8888:
+      case MESA_FORMAT_XRGB8888:
+	 value |= DV_PF_8888;
+	 break;
+      case MESA_FORMAT_RGB565:
+	 value |= DV_PF_565;
+	 break;
+      case MESA_FORMAT_ARGB1555:
+	 value |= DV_PF_1555;
+	 break;
+      case MESA_FORMAT_ARGB4444:
+	 value |= DV_PF_4444;
+	 break;
+      default:
+	 _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
+		       irb->Base.Format);
+      }
+   }
+
+   if (depth_region && depth_region->cpp == 4) {
+      value |= DEPTH_FRMT_24_FIXED_8_OTHER;
+   }
+   else {
+      value |= DEPTH_FRMT_16_FIXED;
+   }
+   state->Buffer[I830_DESTREG_DV1] = value;
+
+   /* We set up the drawing rectangle to be offset into the color
+    * region's location in the miptree.  If it doesn't match with
+    * depth's offsets, we can't render to it.
+    *
+    * (Well, not actually true -- the hw grew a bit to let depth's
+    * offset get forced to 0,0.  We may want to use that if people are
+    * hitting that case.  Also, some configurations may be supportable
+    * by tweaking the start offset of the buffers around, which we
+    * can't do in general due to tiling)
+    */
+   FALLBACK(intel, I830_FALLBACK_DRAW_OFFSET,
+	    (depth_region && color_regions[0]) &&
+	    (depth_region->draw_x != color_regions[0]->draw_x ||
+	     depth_region->draw_y != color_regions[0]->draw_y));
+
+   if (color_regions[0]) {
+      draw_x = color_regions[0]->draw_x;
+      draw_y = color_regions[0]->draw_y;
+   } else if (depth_region) {
+      draw_x = depth_region->draw_x;
+      draw_y = depth_region->draw_y;
+   } else {
+      draw_x = 0;
+      draw_y = 0;
+   }
+
+   state->Buffer[I830_DESTREG_DRAWRECT0] = _3DSTATE_DRAWRECT_INFO;
+   state->Buffer[I830_DESTREG_DRAWRECT1] = 0;
+   state->Buffer[I830_DESTREG_DRAWRECT2] = (draw_y << 16) | draw_x;
+   state->Buffer[I830_DESTREG_DRAWRECT3] =
+      ((ctx->DrawBuffer->Width + draw_x) & 0xffff) |
+      ((ctx->DrawBuffer->Height + draw_y) << 16);
+   state->Buffer[I830_DESTREG_DRAWRECT4] = (draw_y << 16) | draw_x;
+   state->Buffer[I830_DESTREG_DRAWRECT5] = MI_NOOP;
+
+   I830_STATECHANGE(i830, I830_UPLOAD_BUFFERS);
+}
+
+/* This isn't really handled at the moment.
+ */
+static void
+i830_new_batch(struct intel_context *intel)
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   i830->state.emitted = 0;
+}
+
+static void 
+i830_assert_not_dirty( struct intel_context *intel )
+{
+   struct i830_context *i830 = i830_context(&intel->ctx);
+   assert(!get_dirty(&i830->state));
+}
+
+static void
+i830_invalidate_state(struct intel_context *intel, GLuint new_state)
+{
+   if (new_state & _NEW_LIGHT)
+      i830_update_provoking_vertex(&intel->ctx);
+}
+
+void
+i830InitVtbl(struct i830_context *i830)
+{
+   i830->intel.vtbl.check_vertex_size = i830_check_vertex_size;
+   i830->intel.vtbl.destroy = i830_destroy_context;
+   i830->intel.vtbl.emit_state = i830_emit_state;
+   i830->intel.vtbl.new_batch = i830_new_batch;
+   i830->intel.vtbl.reduced_primitive_state = i830_reduced_primitive_state;
+   i830->intel.vtbl.set_draw_region = i830_set_draw_region;
+   i830->intel.vtbl.update_texture_state = i830UpdateTextureState;
+   i830->intel.vtbl.render_start = i830_render_start;
+   i830->intel.vtbl.render_prevalidate = i830_render_prevalidate;
+   i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty;
+   i830->intel.vtbl.finish_batch = intel_finish_vb;
+   i830->intel.vtbl.invalidate_state = i830_invalidate_state;
+}
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
new file mode 100644
index 0000000000..b3fe1c05d6
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -0,0 +1,187 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "i915_context.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "intel_tris.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+
+#include "i915_reg.h"
+#include "i915_program.h"
+
+#include "intel_tris.h"
+#include "intel_span.h"
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+/* Override intel default.
+ */
+static void
+i915InvalidateState(GLcontext * ctx, GLuint new_state)
+{
+   _swrast_InvalidateState(ctx, new_state);
+   _swsetup_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   _tnl_InvalidateState(ctx, new_state);
+   _tnl_invalidate_vertex_state(ctx, new_state);
+   intel_context(ctx)->NewGLState |= new_state;
+
+   /* Todo: gather state values under which tracked parameters become
+    * invalidated, add callbacks for things like
+    * ProgramLocalParameters, etc.
+    */
+   {
+      struct i915_fragment_program *p =
+         (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+      if (p && p->nr_params)
+         p->params_uptodate = 0;
+   }
+
+   if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
+      i915_update_fog(ctx);
+   if (new_state & (_NEW_STENCIL | _NEW_BUFFERS | _NEW_POLYGON))
+      i915_update_stencil(ctx);
+   if (new_state & (_NEW_LIGHT))
+       i915_update_provoking_vertex(ctx);
+   if (new_state & (_NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))
+       i915_update_program(ctx);
+}
+
+
+static void
+i915InitDriverFunctions(struct dd_function_table *functions)
+{
+   intelInitDriverFunctions(functions);
+   i915InitStateFunctions(functions);
+   i915InitFragProgFuncs(functions);
+   functions->UpdateState = i915InvalidateState;
+}
+
+
+extern const struct tnl_pipeline_stage *intel_pipeline[];
+
+GLboolean
+i915CreateContext(int api,
+		  const __GLcontextModes * mesaVis,
+                  __DRIcontext * driContextPriv,
+                  void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   struct i915_context *i915 =
+      (struct i915_context *) CALLOC_STRUCT(i915_context);
+   struct intel_context *intel = &i915->intel;
+   GLcontext *ctx = &intel->ctx;
+
+   if (!i915)
+      return GL_FALSE;
+
+   if (0)
+      printf("\ntexmem-0-3 branch\n\n");
+
+   i915InitVtbl(i915);
+
+   i915InitDriverFunctions(&functions);
+
+   if (!intelInitContext(intel, api, mesaVis, driContextPriv,
+                         sharedContextPrivate, &functions)) {
+      FREE(i915);
+      return GL_FALSE;
+   }
+
+   _math_matrix_ctr(&intel->ViewportMatrix);
+
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs(ctx);
+   intelInitTriFuncs(ctx);
+
+   /* Install the customized pipeline: */
+   _tnl_destroy_pipeline(ctx);
+   _tnl_install_pipeline(ctx, intel_pipeline);
+
+   if (intel->no_rast)
+      FALLBACK(intel, INTEL_FALLBACK_USER, 1);
+
+   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
+   ctx->Const.MaxTextureImageUnits = I915_TEX_UNITS;
+   ctx->Const.MaxTextureCoordUnits = I915_TEX_UNITS;
+   ctx->Const.MaxVarying = I915_TEX_UNITS;
+   ctx->Const.MaxCombinedTextureImageUnits =
+      ctx->Const.MaxVertexTextureImageUnits +
+      ctx->Const.MaxTextureImageUnits;
+
+   /* Advertise the full hardware capabilities.  The new memory
+    * manager should cope much better with overload situations:
+    */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = (1 << 11);
+   ctx->Const.MaxTextureUnits = I915_TEX_UNITS;
+
+   ctx->Const.MaxTextureMaxAnisotropy = 4.0;
+
+   /* GL_ARB_fragment_program limits - don't think Mesa actually
+    * validates programs against these, and in any case one ARB
+    * instruction can translate to more than one HW instruction, so
+    * we'll still have to check and fallback each time.
+    */
+   ctx->Const.FragmentProgram.MaxNativeTemps = I915_MAX_TEMPORARY;
+   ctx->Const.FragmentProgram.MaxNativeAttribs = 11;    /* 8 tex, 2 color, fog */
+   ctx->Const.FragmentProgram.MaxNativeParameters = I915_MAX_CONSTANT;
+   ctx->Const.FragmentProgram.MaxNativeAluInstructions = I915_MAX_ALU_INSN;
+   ctx->Const.FragmentProgram.MaxNativeTexInstructions = I915_MAX_TEX_INSN;
+   ctx->Const.FragmentProgram.MaxNativeInstructions = (I915_MAX_ALU_INSN +
+                                                       I915_MAX_TEX_INSN);
+   ctx->Const.FragmentProgram.MaxNativeTexIndirections =
+      I915_MAX_TEX_INDIRECT;
+   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* I don't think we have one */
+   ctx->Const.FragmentProgram.MaxEnvParams =
+      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+	   ctx->Const.FragmentProgram.MaxEnvParams);
+
+   ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+   ctx->Const.MaxDrawBuffers = 1;
+
+   _tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12,
+                      36 * sizeof(GLfloat));
+
+   intel->verts = TNL_CONTEXT(ctx)->clipspace.vertex_buf;
+
+   i915InitState(i915);
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h
new file mode 100644
index 0000000000..33dad9a195
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_context.h
@@ -0,0 +1,373 @@
+ /**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef I915CONTEXT_INC
+#define I915CONTEXT_INC
+
+#include "intel_context.h"
+#include "i915_reg.h"
+
+#define I915_FALLBACK_TEXTURE		 0x1000
+#define I915_FALLBACK_COLORMASK		 0x2000
+#define I915_FALLBACK_STENCIL		 0x4000
+#define I915_FALLBACK_STIPPLE		 0x8000
+#define I915_FALLBACK_PROGRAM		 0x10000
+#define I915_FALLBACK_LOGICOP		 0x20000
+#define I915_FALLBACK_POLYGON_SMOOTH	 0x40000
+#define I915_FALLBACK_POINT_SMOOTH	 0x80000
+#define I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN	 0x100000
+#define I915_FALLBACK_DRAW_OFFSET	 0x200000
+
+#define I915_UPLOAD_CTX              0x1
+#define I915_UPLOAD_BUFFERS          0x2
+#define I915_UPLOAD_STIPPLE          0x4
+#define I915_UPLOAD_PROGRAM          0x8
+#define I915_UPLOAD_CONSTANTS        0x10
+#define I915_UPLOAD_FOG              0x20
+#define I915_UPLOAD_INVARIENT        0x40
+#define I915_UPLOAD_DEFAULTS         0x80
+#define I915_UPLOAD_RASTER_RULES     0x100
+#define I915_UPLOAD_TEX(i)           (0x00010000<<(i))
+#define I915_UPLOAD_TEX_ALL          (0x00ff0000)
+#define I915_UPLOAD_TEX_0_SHIFT      16
+
+
+/* State structure offsets - these will probably disappear.
+ */
+#define I915_DESTREG_CBUFADDR0 0
+#define I915_DESTREG_CBUFADDR1 1
+#define I915_DESTREG_DBUFADDR0 3
+#define I915_DESTREG_DBUFADDR1 4
+#define I915_DESTREG_DV0 6
+#define I915_DESTREG_DV1 7
+#define I915_DESTREG_SENABLE 8
+#define I915_DESTREG_SR0 9
+#define I915_DESTREG_SR1 10
+#define I915_DESTREG_SR2 11
+#define I915_DESTREG_DRAWRECT0 12
+#define I915_DESTREG_DRAWRECT1 13
+#define I915_DESTREG_DRAWRECT2 14
+#define I915_DESTREG_DRAWRECT3 15
+#define I915_DESTREG_DRAWRECT4 16
+#define I915_DESTREG_DRAWRECT5 17
+#define I915_DEST_SETUP_SIZE 18
+
+#define I915_CTXREG_STATE4		0
+#define I915_CTXREG_LI	        	1
+#define I915_CTXREG_LIS2		        2
+#define I915_CTXREG_LIS4	        	3
+#define I915_CTXREG_LIS5	        	4
+#define I915_CTXREG_LIS6	         	5
+#define I915_CTXREG_IAB   	 	6
+#define I915_CTXREG_BLENDCOLOR0		7
+#define I915_CTXREG_BLENDCOLOR1		8
+#define I915_CTXREG_BF_STENCIL_OPS	9
+#define I915_CTXREG_BF_STENCIL_MASKS	10
+#define I915_CTX_SETUP_SIZE		11
+
+#define I915_FOGREG_COLOR		0
+#define I915_FOGREG_MODE0		1
+#define I915_FOGREG_MODE1		2
+#define I915_FOGREG_MODE2		3
+#define I915_FOGREG_MODE3		4
+#define I915_FOG_SETUP_SIZE		5
+
+#define I915_STPREG_ST0        0
+#define I915_STPREG_ST1        1
+#define I915_STP_SETUP_SIZE    2
+
+#define I915_TEXREG_MS3        1
+#define I915_TEXREG_MS4        2
+#define I915_TEXREG_SS2        3
+#define I915_TEXREG_SS3        4
+#define I915_TEXREG_SS4        5
+#define I915_TEX_SETUP_SIZE    6
+
+#define I915_DEFREG_C0    0
+#define I915_DEFREG_C1    1
+#define I915_DEFREG_S0    2
+#define I915_DEFREG_S1    3
+#define I915_DEFREG_Z0    4
+#define I915_DEFREG_Z1    5
+#define I915_DEF_SETUP_SIZE    6
+
+enum {
+   I915_RASTER_RULES,
+   I915_RASTER_RULES_SETUP_SIZE,
+};
+
+#define I915_MAX_CONSTANT      32
+#define I915_CONSTANT_SIZE     (2+(4*I915_MAX_CONSTANT))
+
+#define I915_MAX_INSN          (I915_MAX_DECL_INSN + \
+				I915_MAX_TEX_INSN + \
+				I915_MAX_ALU_INSN)
+
+/* Maximum size of the program packet, which matches the limits on
+ * decl, tex, and ALU instructions.
+ */
+#define I915_PROGRAM_SIZE      (I915_MAX_INSN * 3 + 1)
+
+/* Hardware version of a parsed fragment program.  "Derived" from the
+ * mesa fragment_program struct.
+ */
+struct i915_fragment_program
+{
+   struct gl_fragment_program FragProg;
+
+   GLboolean translated;
+   GLboolean params_uptodate;
+   GLboolean on_hardware;
+   GLboolean error;             /* If program is malformed for any reason. */
+
+   /** Record of which phases R registers were last written in. */
+   GLuint register_phases[16];
+   GLuint indirections;
+   GLuint nr_tex_indirect;
+   GLuint nr_tex_insn;
+   GLuint nr_alu_insn;
+   GLuint nr_decl_insn;
+
+
+
+
+   /* TODO: split between the stored representation of a program and
+    * the state used to build that representation.
+    */
+   GLcontext *ctx;
+
+   /* declarations contains the packet header. */
+   GLuint declarations[I915_MAX_DECL_INSN * 3 + 1];
+   GLuint program[(I915_MAX_TEX_INSN + I915_MAX_ALU_INSN) * 3];
+
+   GLfloat constant[I915_MAX_CONSTANT][4];
+   GLuint constant_flags[I915_MAX_CONSTANT];
+   GLuint nr_constants;
+
+   GLuint *csr;                 /* Cursor, points into program.
+                                 */
+
+   GLuint *decl;                /* Cursor, points into declarations.
+                                 */
+
+   GLuint decl_s;               /* flags for which s regs need to be decl'd */
+   GLuint decl_t;               /* flags for which t regs need to be decl'd */
+
+   GLuint temp_flag;            /* Tracks temporary regs which are in
+                                 * use.
+                                 */
+
+   GLuint utemp_flag;           /* Tracks TYPE_U temporary regs which are in
+                                 * use.
+                                 */
+
+
+   /* Track which R registers are "live" for each instruction.
+    * A register is live between the time it's written to and the last time
+    * it's read. */
+   GLuint usedRegs[I915_MAX_INSN];
+
+   /* Helpers for i915_fragprog.c:
+    */
+   GLuint wpos_tex;
+   GLboolean depth_written;
+
+   struct
+   {
+      GLuint reg;               /* Hardware constant idx */
+      const GLfloat *values;    /* Pointer to tracked values */
+   } param[I915_MAX_CONSTANT];
+   GLuint nr_params;
+};
+
+
+
+
+
+
+
+#define I915_TEX_UNITS 8
+
+
+struct i915_hw_state
+{
+   GLuint Ctx[I915_CTX_SETUP_SIZE];
+   GLuint Buffer[I915_DEST_SETUP_SIZE];
+   GLuint Stipple[I915_STP_SETUP_SIZE];
+   GLuint Fog[I915_FOG_SETUP_SIZE];
+   GLuint Defaults[I915_DEF_SETUP_SIZE];
+   GLuint RasterRules[I915_RASTER_RULES_SETUP_SIZE];
+   GLuint Tex[I915_TEX_UNITS][I915_TEX_SETUP_SIZE];
+   GLuint Constant[I915_CONSTANT_SIZE];
+   GLuint ConstantSize;
+   GLuint Program[I915_PROGRAM_SIZE];
+   GLuint ProgramSize;
+
+   /* Region pointers for relocation: 
+    */
+   struct intel_region *draw_region;
+   struct intel_region *depth_region;
+/*    struct intel_region *tex_region[I915_TEX_UNITS]; */
+
+   /* Regions aren't actually that appropriate here as the memory may
+    * be from a PBO or FBO.  Will have to do this for draw and depth for
+    * FBO's...
+    */
+   drm_intel_bo *tex_buffer[I915_TEX_UNITS];
+   GLuint tex_offset[I915_TEX_UNITS];
+
+
+   GLuint active;               /* I915_UPLOAD_* */
+   GLuint emitted;              /* I915_UPLOAD_* */
+};
+
+#define I915_FOG_PIXEL  2
+#define I915_FOG_VERTEX 1
+#define I915_FOG_NONE   0
+
+struct i915_context
+{
+   struct intel_context intel;
+
+   GLuint last_ReallyEnabled;
+   GLuint vertex_fog;
+   GLuint lodbias_ss2[MAX_TEXTURE_UNITS];
+
+
+   struct i915_fragment_program *current_program;
+
+   struct i915_hw_state state;
+   uint32_t last_draw_offset;
+};
+
+
+#define I915_STATECHANGE(i915, flag)					\
+do {									\
+   INTEL_FIREVERTICES( &(i915)->intel );					\
+   (i915)->state.emitted &= ~(flag);					\
+} while (0)
+
+#define I915_ACTIVESTATE(i915, flag, mode)			\
+do {								\
+   INTEL_FIREVERTICES( &(i915)->intel );				\
+   if (mode)							\
+      (i915)->state.active |= (flag);				\
+   else								\
+      (i915)->state.active &= ~(flag);				\
+} while (0)
+
+
+/*======================================================================
+ * i915_vtbl.c
+ */
+extern void i915InitVtbl(struct i915_context *i915);
+
+extern void
+i915_state_draw_region(struct intel_context *intel,
+                       struct i915_hw_state *state,
+                       struct intel_region *color_region,
+                       struct intel_region *depth_region);
+
+
+
+#define SZ_TO_HW(sz)  ((sz-2)&0x3)
+#define EMIT_SZ(sz)   (EMIT_1F + (sz) - 1)
+#define EMIT_ATTR( ATTR, STYLE, S4, SZ )				\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = (ATTR);	\
+   intel->vertex_attrs[intel->vertex_attr_count].format = (STYLE);	\
+   s4 |= S4;								\
+   intel->vertex_attr_count++;						\
+   offset += (SZ);							\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   intel->vertex_attrs[intel->vertex_attr_count].attrib = 0;		\
+   intel->vertex_attrs[intel->vertex_attr_count].format = EMIT_PAD;	\
+   intel->vertex_attrs[intel->vertex_attr_count].offset = (N);		\
+   intel->vertex_attr_count++;						\
+   offset += (N);							\
+} while (0)
+
+
+
+/*======================================================================
+ * i915_context.c
+ */
+extern GLboolean i915CreateContext(int api,
+				   const __GLcontextModes * mesaVis,
+                                   __DRIcontext * driContextPriv,
+                                   void *sharedContextPrivate);
+
+
+/*======================================================================
+ * i915_debug.c
+ */
+extern void i915_disassemble_program(const GLuint * program, GLuint sz);
+extern void i915_print_ureg(const char *msg, GLuint ureg);
+
+
+/*======================================================================
+ * i915_state.c
+ */
+extern void i915InitStateFunctions(struct dd_function_table *functions);
+extern void i915InitState(struct i915_context *i915);
+extern void i915_update_fog(GLcontext * ctx);
+extern void i915_update_stencil(GLcontext * ctx);
+extern void i915_update_provoking_vertex(GLcontext *ctx);
+
+
+/*======================================================================
+ * i915_tex.c
+ */
+extern void i915UpdateTextureState(struct intel_context *intel);
+extern void i915InitTextureFuncs(struct dd_function_table *functions);
+
+/*======================================================================
+ * i915_fragprog.c
+ */
+extern void i915ValidateFragmentProgram(struct i915_context *i915);
+extern void i915InitFragProgFuncs(struct dd_function_table *functions);
+
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct i915_context *
+i915_context(GLcontext * ctx)
+{
+   return (struct i915_context *) ctx;
+}
+
+
+
+#define I915_CONTEXT(ctx)	i915_context(ctx)
+
+
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/i915_debug.c b/src/mesa/drivers/dri/i915/i915_debug.c
new file mode 100644
index 0000000000..4569fb918e
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_debug.c
@@ -0,0 +1,847 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/imports.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_debug.h"
+
+static GLboolean debug( struct debug_stream *stream, const char *name, GLuint len )
+{
+   GLuint i;
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   
+   if (len == 0) {
+      printf("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+      assert(0);
+      return GL_FALSE;
+   }
+
+   if (stream->print_addresses)
+      printf("%08x:  ", stream->offset);
+
+
+   printf("%s (%d dwords):\n", name, len);
+   for (i = 0; i < len; i++)
+      printf("\t0x%08x\n",  ptr[i]);   
+   printf("\n");
+
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
+}
+
+
+static const char *get_prim_name( GLuint val )
+{
+   switch (val & PRIM3D_MASK) {
+   case PRIM3D_TRILIST: return "TRILIST"; break;
+   case PRIM3D_TRISTRIP: return "TRISTRIP"; break;
+   case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break;
+   case PRIM3D_TRIFAN: return "TRIFAN"; break;
+   case PRIM3D_POLY: return "POLY"; break;
+   case PRIM3D_LINELIST: return "LINELIST"; break;
+   case PRIM3D_LINESTRIP: return "LINESTRIP"; break;
+   case PRIM3D_RECTLIST: return "RECTLIST"; break;
+   case PRIM3D_POINTLIST: return "POINTLIST"; break;
+   case PRIM3D_DIB: return "DIB"; break;
+   case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break;
+   case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break;
+   default: return "????"; break;
+   }
+}
+
+static GLboolean debug_prim( struct debug_stream *stream, const char *name, 
+			     GLboolean dump_floats,
+			     GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   const char *prim = get_prim_name( ptr[0] );
+   GLuint i;
+   
+
+
+   printf("%s %s (%d dwords):\n", name, prim, len);
+   printf("\t0x%08x\n",  ptr[0]);   
+   for (i = 1; i < len; i++) {
+      if (dump_floats)
+	 printf("\t0x%08x // %f\n",  ptr[i], *(GLfloat *)&ptr[i]);   
+      else
+	 printf("\t0x%08x\n",  ptr[i]);   
+   }
+
+      
+   printf("\n");
+
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
+}
+   
+
+
+
+static GLboolean debug_program( struct debug_stream *stream, const char *name, GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+
+   if (len == 0) {
+      printf("Error - zero length packet (0x%08x)\n", stream->ptr[0]);
+      assert(0);
+      return GL_FALSE;
+   }
+
+   if (stream->print_addresses)
+      printf("%08x:  ", stream->offset);
+
+   printf("%s (%d dwords):\n", name, len);
+   i915_disassemble_program( ptr, len );
+
+   stream->offset += len * sizeof(GLuint);
+   return GL_TRUE;
+}
+
+
+static GLboolean debug_chain( struct debug_stream *stream, const char *name, GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint old_offset = stream->offset + len * sizeof(GLuint);
+   GLuint i;
+
+   printf("%s (%d dwords):\n", name, len);
+   for (i = 0; i < len; i++)
+      printf("\t0x%08x\n",  ptr[i]);
+
+   stream->offset = ptr[1] & ~0x3;
+   
+   if (stream->offset < old_offset)
+      printf("\n... skipping backwards from 0x%x --> 0x%x ...\n\n", 
+		   old_offset, stream->offset );
+   else
+      printf("\n... skipping from 0x%x --> 0x%x ...\n\n", 
+		   old_offset, stream->offset );
+
+
+   return GL_TRUE;
+}
+
+
+static GLboolean debug_variable_length_prim( struct debug_stream *stream )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   const char *prim = get_prim_name( ptr[0] );
+   GLuint i, len;
+
+   GLushort *idx = (GLushort *)(ptr+1);
+   for (i = 0; idx[i] != 0xffff; i++)
+      ;
+
+   len = 1+(i+2)/2;
+
+   printf("3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len);
+   for (i = 0; i < len; i++)
+      printf("\t0x%08x\n",  ptr[i]);
+   printf("\n");
+
+   stream->offset += len * sizeof(GLuint);
+   return GL_TRUE;
+}
+
+
+#define BITS( dw, hi, lo, ... )				\
+do {							\
+   unsigned himask = 0xffffffffU >> (31 - (hi));		\
+   printf("\t\t ");				\
+   printf(__VA_ARGS__);			\
+   printf(": 0x%x\n", ((dw) & himask) >> (lo));	\
+} while (0)
+
+#define MBZ( dw, hi, lo) do {							\
+   unsigned x = (dw) >> (lo);				\
+   unsigned lomask = (1 << (lo)) - 1;			\
+   unsigned himask;					\
+   himask = (1UL << (hi)) - 1;				\
+   assert ((x & himask & ~lomask) == 0);	\
+} while (0)
+
+#define FLAG( dw, bit, ... )			\
+do {							\
+   if (((dw) >> (bit)) & 1) {				\
+      printf("\t\t ");				\
+      printf(__VA_ARGS__);			\
+      printf("\n");				\
+   }							\
+} while (0)
+
+static GLboolean debug_load_immediate( struct debug_stream *stream,
+				       const char *name,
+				       GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint bits = (ptr[0] >> 4) & 0xff;
+   GLuint j = 0;
+   
+   printf("%s (%d dwords, flags: %x):\n", name, len, bits);
+   printf("\t0x%08x\n",  ptr[j++]);
+
+   if (bits & (1<<0)) {
+      printf("\t  LIS0: 0x%08x\n", ptr[j]);
+      printf("\t vb address: 0x%08x\n", (ptr[j] & ~0x3));
+      BITS(ptr[j], 0, 0, "vb invalidate disable");
+      j++;
+   }
+   if (bits & (1<<1)) {
+      printf("\t  LIS1: 0x%08x\n", ptr[j]);
+      BITS(ptr[j], 29, 24, "vb dword width");
+      BITS(ptr[j], 21, 16, "vb dword pitch");
+      BITS(ptr[j], 15, 0, "vb max index");
+      j++;
+   }
+   if (bits & (1<<2)) {
+      int i;
+      printf("\t  LIS2: 0x%08x\n", ptr[j]);
+      for (i = 0; i < 8; i++) {
+	 unsigned tc = (ptr[j] >> (i * 4)) & 0xf;
+	 if (tc != 0xf)
+	    BITS(tc, 3, 0, "tex coord %d", i);
+      }
+      j++;
+   }
+   if (bits & (1<<3)) {
+      printf("\t  LIS3: 0x%08x\n", ptr[j]);
+      j++;
+   }
+   if (bits & (1<<4)) {
+      printf("\t  LIS4: 0x%08x\n", ptr[j]);
+      BITS(ptr[j], 31, 23, "point width");
+      BITS(ptr[j], 22, 19, "line width");
+      FLAG(ptr[j], 18, "alpha flatshade");
+      FLAG(ptr[j], 17, "fog flatshade");
+      FLAG(ptr[j], 16, "spec flatshade");
+      FLAG(ptr[j], 15, "rgb flatshade");
+      BITS(ptr[j], 14, 13, "cull mode");
+      FLAG(ptr[j], 12, "vfmt: point width");
+      FLAG(ptr[j], 11, "vfmt: specular/fog");
+      FLAG(ptr[j], 10, "vfmt: rgba");
+      FLAG(ptr[j], 9, "vfmt: depth offset");
+      BITS(ptr[j], 8, 6, "vfmt: position (2==xyzw)");
+      FLAG(ptr[j], 5, "force dflt diffuse");
+      FLAG(ptr[j], 4, "force dflt specular");
+      FLAG(ptr[j], 3, "local depth offset enable");
+      FLAG(ptr[j], 2, "vfmt: fp32 fog coord");
+      FLAG(ptr[j], 1, "sprite point");
+      FLAG(ptr[j], 0, "antialiasing");
+      j++;
+   }
+   if (bits & (1<<5)) {
+      printf("\t  LIS5: 0x%08x\n", ptr[j]);
+      BITS(ptr[j], 31, 28, "rgba write disables");
+      FLAG(ptr[j], 27,     "force dflt point width");
+      FLAG(ptr[j], 26,     "last pixel enable");
+      FLAG(ptr[j], 25,     "global z offset enable");
+      FLAG(ptr[j], 24,     "fog enable");
+      BITS(ptr[j], 23, 16, "stencil ref");
+      BITS(ptr[j], 15, 13, "stencil test");
+      BITS(ptr[j], 12, 10, "stencil fail op");
+      BITS(ptr[j], 9, 7,   "stencil pass z fail op");
+      BITS(ptr[j], 6, 4,   "stencil pass z pass op");
+      FLAG(ptr[j], 3,      "stencil write enable");
+      FLAG(ptr[j], 2,      "stencil test enable");
+      FLAG(ptr[j], 1,      "color dither enable");
+      FLAG(ptr[j], 0,      "logiop enable");
+      j++;
+   }
+   if (bits & (1<<6)) {
+      printf("\t  LIS6: 0x%08x\n", ptr[j]);
+      FLAG(ptr[j], 31,      "alpha test enable");
+      BITS(ptr[j], 30, 28,  "alpha func");
+      BITS(ptr[j], 27, 20,  "alpha ref");
+      FLAG(ptr[j], 19,      "depth test enable");
+      BITS(ptr[j], 18, 16,  "depth func");
+      FLAG(ptr[j], 15,      "blend enable");
+      BITS(ptr[j], 14, 12,  "blend func");
+      BITS(ptr[j], 11, 8,   "blend src factor");
+      BITS(ptr[j], 7,  4,   "blend dst factor");
+      FLAG(ptr[j], 3,       "depth write enable");
+      FLAG(ptr[j], 2,       "color write enable");
+      BITS(ptr[j], 1,  0,   "provoking vertex"); 
+      j++;
+   }
+
+
+   printf("\n");
+
+   assert(j == len);
+
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
+}
+ 
+
+
+static GLboolean debug_load_indirect( struct debug_stream *stream,
+				      const char *name,
+				      GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint bits = (ptr[0] >> 8) & 0x3f;
+   GLuint i, j = 0;
+   
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
+
+   for (i = 0; i < 6; i++) {
+      if (bits & (1<<i)) {
+	 switch (1<<(8+i)) {
+	 case LI0_STATE_STATIC_INDIRECT:
+	    printf("        STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_DYNAMIC_INDIRECT:
+	    printf("       DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    break;
+	 case LI0_STATE_SAMPLER:
+	    printf("       SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_MAP:
+	    printf("           MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_PROGRAM:
+	    printf("       PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
+	    break;
+	 case LI0_STATE_CONSTANTS:
+	    printf("     CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++;
+	    printf("                0x%08x\n", ptr[j++]);
+	    break;
+	 default:
+	    assert(0);
+	    break;
+	 }
+      }
+   }
+
+   if (bits == 0) {
+      printf("\t  DUMMY: 0x%08x\n", ptr[j++]);
+   }
+
+   printf("\n");
+
+
+   assert(j == len);
+
+   stream->offset += len * sizeof(GLuint);
+   
+   return GL_TRUE;
+}
+ 	
+static void BR13( struct debug_stream *stream,
+		  GLuint val )
+{
+   printf("\t0x%08x\n",  val);
+   FLAG(val, 30, "clipping enable");
+   BITS(val, 25, 24, "color depth (3==32bpp)");
+   BITS(val, 23, 16, "raster op");
+   BITS(val, 15, 0,  "dest pitch");
+}
+
+
+static void BR2223( struct debug_stream *stream,
+		    GLuint val22, GLuint val23 )
+{
+   union { GLuint val; short field[2]; } BR22, BR23;
+
+   BR22.val = val22;
+   BR23.val = val23;
+
+   printf("\t0x%08x\n",  val22);
+   BITS(val22, 31, 16, "dest y1");
+   BITS(val22, 15, 0,  "dest x1");
+
+   printf("\t0x%08x\n",  val23);
+   BITS(val23, 31, 16, "dest y2");
+   BITS(val23, 15, 0,  "dest x2");
+
+   /* The blit engine may produce unexpected results when these aren't met */
+   assert(BR22.field[0] < BR23.field[0]);
+   assert(BR22.field[1] < BR23.field[1]);
+}
+
+static void BR09( struct debug_stream *stream,
+		  GLuint val )
+{
+   printf("\t0x%08x -- dest address\n",  val);
+}
+
+static void BR26( struct debug_stream *stream,
+		  GLuint val )
+{
+   printf("\t0x%08x\n",  val);
+   BITS(val, 31, 16, "src y1");
+   BITS(val, 15, 0,  "src x1");
+}
+
+static void BR11( struct debug_stream *stream,
+		  GLuint val )
+{
+   printf("\t0x%08x\n",  val);
+   BITS(val, 15, 0,  "src pitch");
+}
+
+static void BR12( struct debug_stream *stream,
+		  GLuint val )
+{
+   printf("\t0x%08x -- src address\n",  val);
+}
+
+static void BR16( struct debug_stream *stream,
+		  GLuint val )
+{
+   printf("\t0x%08x -- color\n",  val);
+}
+   
+static GLboolean debug_copy_blit( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
+   
+   BR13(stream, ptr[j++]);
+   BR2223(stream, ptr[j], ptr[j+1]);
+   j += 2;
+   BR09(stream, ptr[j++]);
+   BR26(stream, ptr[j++]);
+   BR11(stream, ptr[j++]);
+   BR12(stream, ptr[j++]);
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_color_blit( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
+
+   BR13(stream, ptr[j++]);
+   BR2223(stream, ptr[j], ptr[j+1]);
+   j += 2;
+   BR09(stream, ptr[j++]);
+   BR16(stream, ptr[j++]);
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_modes4( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j]);
+   BITS(ptr[j], 21, 18, "logicop func");
+   FLAG(ptr[j], 17, "stencil test mask modify-enable");
+   FLAG(ptr[j], 16, "stencil write mask modify-enable");
+   BITS(ptr[j], 15, 8, "stencil test mask");
+   BITS(ptr[j], 7, 0,  "stencil write mask");
+   j++;
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_map_state( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
+   
+   {
+      printf("\t0x%08x\n",  ptr[j]);
+      BITS(ptr[j], 15, 0,   "map mask");
+      j++;
+   }
+
+   while (j < len) {
+      {
+	 printf("\t  TMn.0: 0x%08x\n", ptr[j]);
+	 printf("\t map address: 0x%08x\n", (ptr[j] & ~0x3));
+	 FLAG(ptr[j], 1, "vertical line stride");
+	 FLAG(ptr[j], 0, "vertical line stride offset");
+	 j++;
+      }
+
+      {
+	 printf("\t  TMn.1: 0x%08x\n", ptr[j]);
+	 BITS(ptr[j], 31, 21, "height");
+	 BITS(ptr[j], 20, 10, "width");
+	 BITS(ptr[j], 9, 7, "surface format");
+	 BITS(ptr[j], 6, 3, "texel format");
+	 FLAG(ptr[j], 2, "use fence regs");
+	 FLAG(ptr[j], 1, "tiled surface");
+	 FLAG(ptr[j], 0, "tile walk ymajor");
+	 j++;
+      }
+      {
+	 printf("\t  TMn.2: 0x%08x\n", ptr[j]);
+	 BITS(ptr[j], 31, 21, "dword pitch");
+	 BITS(ptr[j], 20, 15, "cube face enables");
+	 BITS(ptr[j], 14, 9, "max lod");
+	 FLAG(ptr[j], 8,     "mip layout right");
+	 BITS(ptr[j], 7, 0, "depth");
+	 j++;
+      }
+   }
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_sampler_state( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
+   
+   {
+      printf("\t0x%08x\n",  ptr[j]);
+      BITS(ptr[j], 15, 0,   "sampler mask");
+      j++;
+   }
+
+   while (j < len) {
+      {
+	 printf("\t  TSn.0: 0x%08x\n", ptr[j]);
+	 FLAG(ptr[j], 31, "reverse gamma");
+	 FLAG(ptr[j], 30, "planar to packed");
+	 FLAG(ptr[j], 29, "yuv->rgb");
+	 BITS(ptr[j], 28, 27, "chromakey index");
+	 BITS(ptr[j], 26, 22, "base mip level");
+	 BITS(ptr[j], 21, 20, "mip mode filter");
+	 BITS(ptr[j], 19, 17, "mag mode filter");
+	 BITS(ptr[j], 16, 14, "min mode filter");
+	 BITS(ptr[j], 13, 5,  "lod bias (s4.4)");
+	 FLAG(ptr[j], 4,      "shadow enable");
+	 FLAG(ptr[j], 3,      "max-aniso-4");
+	 BITS(ptr[j], 2, 0,   "shadow func");
+	 j++;
+      }
+
+      {
+	 printf("\t  TSn.1: 0x%08x\n", ptr[j]);
+	 BITS(ptr[j], 31, 24, "min lod");
+	 MBZ( ptr[j], 23, 18 );
+	 FLAG(ptr[j], 17,     "kill pixel enable");
+	 FLAG(ptr[j], 16,     "keyed tex filter mode");
+	 FLAG(ptr[j], 15,     "chromakey enable");
+	 BITS(ptr[j], 14, 12, "tcx wrap mode");
+	 BITS(ptr[j], 11, 9,  "tcy wrap mode");
+	 BITS(ptr[j], 8,  6,  "tcz wrap mode");
+	 FLAG(ptr[j], 5,      "normalized coords");
+	 BITS(ptr[j], 4,  1,  "map (surface) index");
+	 FLAG(ptr[j], 0,      "EAST deinterlacer enable");
+	 j++;
+      }
+      {
+	 printf("\t  TSn.2: 0x%08x  (default color)\n", ptr[j]);
+	 j++;
+      }
+   }
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_dest_vars( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
+
+   {
+      printf("\t0x%08x\n",  ptr[j]);
+      FLAG(ptr[j], 31,     "early classic ztest");
+      FLAG(ptr[j], 30,     "opengl tex default color");
+      FLAG(ptr[j], 29,     "bypass iz");
+      FLAG(ptr[j], 28,     "lod preclamp");
+      BITS(ptr[j], 27, 26, "dither pattern");
+      FLAG(ptr[j], 25,     "linear gamma blend");
+      FLAG(ptr[j], 24,     "debug dither");
+      BITS(ptr[j], 23, 20, "dstorg x");
+      BITS(ptr[j], 19, 16, "dstorg y");
+      MBZ (ptr[j], 15, 15 );
+      BITS(ptr[j], 14, 12, "422 write select");
+      BITS(ptr[j], 11, 8,  "cbuf format");
+      BITS(ptr[j], 3, 2,   "zbuf format");
+      FLAG(ptr[j], 1,      "vert line stride");
+      FLAG(ptr[j], 1,      "vert line stride offset");
+      j++;
+   }
+   
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean debug_buf_info( struct debug_stream *stream,
+				  const char *name,
+				  GLuint len )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   int j = 0;
+
+   printf("%s (%d dwords):\n", name, len);
+   printf("\t0x%08x\n",  ptr[j++]);
+
+   {
+      printf("\t0x%08x\n",  ptr[j]);
+      BITS(ptr[j], 28, 28, "aux buffer id");
+      BITS(ptr[j], 27, 24, "buffer id (7=depth, 3=back)");
+      FLAG(ptr[j], 23,     "use fence regs");
+      FLAG(ptr[j], 22,     "tiled surface");
+      FLAG(ptr[j], 21,     "tile walk ymajor");
+      MBZ (ptr[j], 20, 14);
+      BITS(ptr[j], 13, 2,  "dword pitch");
+      MBZ (ptr[j], 2,  0);
+      j++;
+   }
+   
+   printf("\t0x%08x -- buffer base address\n",  ptr[j++]);
+
+   stream->offset += len * sizeof(GLuint);
+   assert(j == len);
+   return GL_TRUE;
+}
+
+static GLboolean i915_debug_packet( struct debug_stream *stream )
+{
+   GLuint *ptr = (GLuint *)(stream->ptr + stream->offset);
+   GLuint cmd = *ptr;
+   
+   switch (((cmd >> 29) & 0x7)) {
+   case 0x0:
+      switch ((cmd >> 23) & 0x3f) {
+      case 0x0:
+	 return debug(stream, "MI_NOOP", 1);
+      case 0x3:
+	 return debug(stream, "MI_WAIT_FOR_EVENT", 1);
+      case 0x4:
+	 return debug(stream, "MI_FLUSH", 1);
+      case 0xA:
+	 debug(stream, "MI_BATCH_BUFFER_END", 1);
+	 return GL_FALSE;
+      case 0x22:
+	 return debug(stream, "MI_LOAD_REGISTER_IMM", 3);
+      case 0x31:
+	 return debug_chain(stream, "MI_BATCH_BUFFER_START", 2);
+      default:
+	 break;
+      }
+      break;
+   case 0x1:
+      break;
+   case 0x2:
+      switch ((cmd >> 22) & 0xff) {	 
+      case 0x50:
+	 return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2);
+      case 0x53:
+	 return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2);
+      default:
+	 return debug(stream, "blit command", (cmd & 0xff) + 2);
+      }
+      break;
+   case 0x3:
+      switch ((cmd >> 24) & 0x1f) {	 
+      case 0x6:
+	 return debug(stream, "3DSTATE_ANTI_ALIASING", 1);
+      case 0x7:
+	 return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1);
+      case 0x8:
+	 return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2);
+      case 0x9:
+	 return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1);
+      case 0xb:
+	 return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1);
+      case 0xc:
+	 return debug(stream, "3DSTATE_MODES5", 1);	 
+      case 0xd:
+	 return debug_modes4(stream, "3DSTATE_MODES4", 1);
+      case 0x15:
+	 return debug(stream, "3DSTATE_FOG_COLOR", 1);
+      case 0x16:
+	 return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1);
+      case 0x1c:
+	 /* 3DState16NP */
+	 switch((cmd >> 19) & 0x1f) {
+	 case 0x10:
+	    return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1);
+	 case 0x11:
+	    return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1);
+	 default:
+	    break;
+	 }
+	 break;
+      case 0x1d:
+	 /* 3DStateMW */
+	 switch ((cmd >> 16) & 0xff) {
+	 case 0x0:
+	    return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2);
+	 case 0x1:
+	    return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2);
+	 case 0x4:
+	    return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2);
+	 case 0x5:
+	    return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2);
+	 case 0x6:
+	    return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2);
+	 case 0x7:
+	    return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2);
+	 case 0x80:
+	    return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2);
+	 case 0x81:
+	    return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2);
+	 case 0x83:
+	    return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2);
+	 case 0x85:
+	    return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2);
+	 case 0x88:
+	    return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2);
+	 case 0x89:
+	    return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2);
+	 case 0x8e:
+	    return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2);
+	 case 0x97:
+	    return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2);
+	 case 0x98:
+	    return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2);
+	 case 0x99:
+	    return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2);
+	 case 0x9a:
+	    return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2);
+	 case 0x9c:
+	    return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2);
+	 default:
+	    assert(0);
+	    return 0;
+	 }
+	 break;
+      case 0x1e:
+	 if (cmd & (1 << 23))
+	    return debug(stream, "???", (cmd & 0xffff) + 1);
+	 else
+	    return debug(stream, "", 1);
+	 break;
+      case 0x1f:
+	 if ((cmd & (1 << 23)) == 0)	
+	    return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2);
+	 else if (cmd & (1 << 17)) 
+	 {
+	    if ((cmd & 0xffff) == 0)
+	       return debug_variable_length_prim(stream);
+	    else
+	       return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1);
+	 }
+	 else
+	    return debug_prim(stream, "3DPRIM  (indirect sequential)", 0, 2); 
+	 break;
+      default:
+	 return debug(stream, "", 0);
+      }
+      break;
+   default:
+      assert(0);
+      return 0;
+   }
+
+   assert(0);
+   return 0;
+}
+
+
+
+void
+i915_dump_batchbuffer( GLuint *start,
+		       GLuint *end )
+{
+   struct debug_stream stream;
+   GLuint bytes = (end - start) * 4;
+   GLboolean done = GL_FALSE;
+
+   printf("\n\nBATCH: (%d)\n", bytes / 4);
+
+   stream.offset = 0;
+   stream.ptr = (char *)start;
+   stream.print_addresses = 0;
+
+   while (!done &&
+	  stream.offset < bytes &&
+	  stream.offset >= 0)
+   {
+      if (!i915_debug_packet( &stream ))
+	 break;
+
+      assert(stream.offset <= bytes &&
+	     stream.offset >= 0);
+   }
+
+   printf("END-BATCH\n\n\n");
+}
+
+
diff --git a/src/mesa/drivers/dri/i915/i915_debug.h b/src/mesa/drivers/dri/i915/i915_debug.h
new file mode 100644
index 0000000000..0643a8c631
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_debug.h
@@ -0,0 +1,55 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Authors:  Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef I915_DEBUG_H
+#define I915_DEBUG_H
+
+struct i915_context;
+
+struct debug_stream 
+{
+   unsigned offset;		/* current gtt offset */
+   char *ptr;		/* pointer to gtt offset zero */
+   char *end;		/* pointer to gtt offset zero */
+   unsigned print_addresses;
+};
+
+
+
+extern void i915_disassemble_program(const unsigned *program, unsigned sz);
+extern void i915_print_ureg(const char *msg, unsigned ureg);
+
+
+void
+i915_dump_batchbuffer( unsigned *start,
+		       unsigned *end );
+
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/i915_debug_fp.c b/src/mesa/drivers/dri/i915/i915_debug_fp.c
new file mode 100644
index 0000000000..adfc9e8945
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_debug_fp.c
@@ -0,0 +1,328 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "i915_reg.h"
+#include "i915_debug.h"
+#include "main/imports.h"
+
+static const char *opcodes[0x20] = {
+   "NOP",
+   "ADD",
+   "MOV",
+   "MUL",
+   "MAD",
+   "DP2ADD",
+   "DP3",
+   "DP4",
+   "FRC",
+   "RCP",
+   "RSQ",
+   "EXP",
+   "LOG",
+   "CMP",
+   "MIN",
+   "MAX",
+   "FLR",
+   "MOD",
+   "TRC",
+   "SGE",
+   "SLT",
+   "TEXLD",
+   "TEXLDP",
+   "TEXLDB",
+   "TEXKILL",
+   "DCL",
+   "0x1a",
+   "0x1b",
+   "0x1c",
+   "0x1d",
+   "0x1e",
+   "0x1f",
+};
+
+
+static const int args[0x20] = {
+   0,                           /* 0 nop */
+   2,                           /* 1 add */
+   1,                           /* 2 mov */
+   2,                           /* 3 m ul */
+   3,                           /* 4 mad */
+   3,                           /* 5 dp2add */
+   2,                           /* 6 dp3 */
+   2,                           /* 7 dp4 */
+   1,                           /* 8 frc */
+   1,                           /* 9 rcp */
+   1,                           /* a rsq */
+   1,                           /* b exp */
+   1,                           /* c log */
+   3,                           /* d cmp */
+   2,                           /* e min */
+   2,                           /* f max */
+   1,                           /* 10 flr */
+   1,                           /* 11 mod */
+   1,                           /* 12 trc */
+   2,                           /* 13 sge */
+   2,                           /* 14 slt */
+   1,
+   1,
+   1,
+   1,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+
+
+static const char *regname[0x8] = {
+   "R",
+   "T",
+   "CONST",
+   "S",
+   "OC",
+   "OD",
+   "U",
+   "UNKNOWN",
+};
+
+static void
+print_reg_type_nr(GLuint type, GLuint nr)
+{
+   switch (type) {
+   case REG_TYPE_T:
+      switch (nr) {
+      case T_DIFFUSE:
+         printf("T_DIFFUSE");
+         return;
+      case T_SPECULAR:
+         printf("T_SPECULAR");
+         return;
+      case T_FOG_W:
+         printf("T_FOG_W");
+         return;
+      default:
+         printf("T_TEX%d", nr);
+         return;
+      }
+   case REG_TYPE_OC:
+      if (nr == 0) {
+         printf("oC");
+         return;
+      }
+      break;
+   case REG_TYPE_OD:
+      if (nr == 0) {
+         printf("oD");
+         return;
+      }
+      break;
+   default:
+      break;
+   }
+
+   printf("%s[%d]", regname[type], nr);
+}
+
+#define REG_SWIZZLE_MASK 0x7777
+#define REG_NEGATE_MASK 0x8888
+
+#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) |	\
+		      (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) |	\
+		      (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) |	\
+		      (SRC_W << A2_SRC2_CHANNEL_W_SHIFT))
+
+
+static void
+print_reg_neg_swizzle(GLuint reg)
+{
+   int i;
+
+   if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW &&
+       (reg & REG_NEGATE_MASK) == 0)
+      return;
+
+   printf(".");
+
+   for (i = 3; i >= 0; i--) {
+      if (reg & (1 << ((i * 4) + 3)))
+         printf("-");
+
+      switch ((reg >> (i * 4)) & 0x7) {
+      case 0:
+         printf("x");
+         break;
+      case 1:
+         printf("y");
+         break;
+      case 2:
+         printf("z");
+         break;
+      case 3:
+         printf("w");
+         break;
+      case 4:
+         printf("0");
+         break;
+      case 5:
+         printf("1");
+         break;
+      default:
+         printf("?");
+         break;
+      }
+   }
+}
+
+
+static void
+print_src_reg(GLuint dword)
+{
+   GLuint nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   print_reg_neg_swizzle(dword);
+}
+
+
+static void
+print_dest_reg(GLuint dword)
+{
+   GLuint nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK;
+   GLuint type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK;
+   print_reg_type_nr(type, nr);
+   if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL)
+      return;
+   printf(".");
+   if (dword & A0_DEST_CHANNEL_X)
+      printf("x");
+   if (dword & A0_DEST_CHANNEL_Y)
+      printf("y");
+   if (dword & A0_DEST_CHANNEL_Z)
+      printf("z");
+   if (dword & A0_DEST_CHANNEL_W)
+      printf("w");
+}
+
+
+#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT))
+#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT))
+#define GET_SRC2_REG(r)      (r)
+
+
+static void
+print_arith_op(GLuint opcode, const GLuint * program)
+{
+   if (opcode != A0_NOP) {
+      print_dest_reg(program[0]);
+      if (program[0] & A0_DEST_SATURATE)
+         printf(" = SATURATE ");
+      else
+         printf(" = ");
+   }
+
+   printf("%s ", opcodes[opcode]);
+
+   print_src_reg(GET_SRC0_REG(program[0], program[1]));
+   if (args[opcode] == 1) {
+      printf("\n");
+      return;
+   }
+
+   printf(", ");
+   print_src_reg(GET_SRC1_REG(program[1], program[2]));
+   if (args[opcode] == 2) {
+      printf("\n");
+      return;
+   }
+
+   printf(", ");
+   print_src_reg(GET_SRC2_REG(program[2]));
+   printf("\n");
+   return;
+}
+
+
+static void
+print_tex_op(GLuint opcode, const GLuint * program)
+{
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   printf(" = ");
+
+   printf("%s ", opcodes[opcode]);
+
+   printf("S[%d],", program[0] & T0_SAMPLER_NR_MASK);
+
+   print_reg_type_nr((program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) &
+                     REG_TYPE_MASK,
+                     (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK);
+   printf("\n");
+}
+
+static void
+print_dcl_op(GLuint opcode, const GLuint * program)
+{
+   printf("%s ", opcodes[opcode]);
+   print_dest_reg(program[0] | A0_DEST_CHANNEL_ALL);
+   printf("\n");
+}
+
+
+void
+i915_disassemble_program(const GLuint * program, GLuint sz)
+{
+   GLuint size = program[0] & 0x1ff;
+   GLint i;
+
+   printf("\t\tBEGIN\n");
+
+   assert(size + 2 == sz);
+
+   program++;
+   for (i = 1; i < sz; i += 3, program += 3) {
+      GLuint opcode = program[0] & (0x1f << 24);
+
+      printf("\t\t");
+
+      if ((GLint) opcode >= A0_NOP && opcode <= A0_SLT)
+         print_arith_op(opcode >> 24, program);
+      else if (opcode >= T0_TEXLD && opcode <= T0_TEXKILL)
+         print_tex_op(opcode >> 24, program);
+      else if (opcode == D0_DCL)
+         print_dcl_op(opcode >> 24, program);
+      else
+         printf("Unknown opcode 0x%x\n", opcode);
+   }
+
+   printf("\t\tEND\n\n");
+}
+
+
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
new file mode 100644
index 0000000000..e60157f377
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -0,0 +1,1390 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
+#include "shader/prog_print.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "intel_batchbuffer.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_program.h"
+
+static const GLfloat sin_quad_constants[2][4] = {
+   {
+      2.0,
+      -1.0,
+      .5,
+      .75
+   },
+   {
+      4.0,
+      -4.0,
+      1.0 / (2.0 * M_PI),
+      .2225
+   }
+};
+
+static const GLfloat sin_constants[4] = { 1.0,
+   -1.0 / (3 * 2 * 1),
+   1.0 / (5 * 4 * 3 * 2 * 1),
+   -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
+};
+
+/* 1, -1/2!, 1/4!, -1/6! */
+static const GLfloat cos_constants[4] = { 1.0,
+   -1.0 / (2 * 1),
+   1.0 / (4 * 3 * 2 * 1),
+   -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
+};
+
+/**
+ * Retrieve a ureg for the given source register.  Will emit
+ * constants, apply swizzling and negation as needed.
+ */
+static GLuint
+src_vector(struct i915_fragment_program *p,
+           const struct prog_src_register *source,
+           const struct gl_fragment_program *program)
+{
+   GLuint src;
+
+   switch (source->File) {
+
+      /* Registers:
+       */
+   case PROGRAM_TEMPORARY:
+      if (source->Index >= I915_MAX_TEMPORARY) {
+         i915_program_error(p, "Exceeded max temporary reg: %d/%d",
+			    source->Index, I915_MAX_TEMPORARY);
+         return 0;
+      }
+      src = UREG(REG_TYPE_R, source->Index);
+      break;
+   case PROGRAM_INPUT:
+      switch (source->Index) {
+      case FRAG_ATTRIB_WPOS:
+         src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
+         break;
+      case FRAG_ATTRIB_COL0:
+         src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
+         break;
+      case FRAG_ATTRIB_COL1:
+         src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
+         src = swizzle(src, X, Y, Z, ONE);
+         break;
+      case FRAG_ATTRIB_FOGC:
+         src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
+         src = swizzle(src, W, ZERO, ZERO, ONE);
+         break;
+      case FRAG_ATTRIB_TEX0:
+      case FRAG_ATTRIB_TEX1:
+      case FRAG_ATTRIB_TEX2:
+      case FRAG_ATTRIB_TEX3:
+      case FRAG_ATTRIB_TEX4:
+      case FRAG_ATTRIB_TEX5:
+      case FRAG_ATTRIB_TEX6:
+      case FRAG_ATTRIB_TEX7:
+         src = i915_emit_decl(p, REG_TYPE_T,
+                              T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
+                              D0_CHANNEL_ALL);
+	 break;
+
+      case FRAG_ATTRIB_VAR0:
+      case FRAG_ATTRIB_VAR0 + 1:
+      case FRAG_ATTRIB_VAR0 + 2:
+      case FRAG_ATTRIB_VAR0 + 3:
+      case FRAG_ATTRIB_VAR0 + 4:
+      case FRAG_ATTRIB_VAR0 + 5:
+      case FRAG_ATTRIB_VAR0 + 6:
+      case FRAG_ATTRIB_VAR0 + 7:
+         src = i915_emit_decl(p, REG_TYPE_T,
+                              T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0),
+                              D0_CHANNEL_ALL);
+         break;
+
+      default:
+         i915_program_error(p, "Bad source->Index: %d", source->Index);
+         return 0;
+      }
+      break;
+
+      /* Various paramters and env values.  All emitted to
+       * hardware as program constants.
+       */
+   case PROGRAM_LOCAL_PARAM:
+      src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
+      break;
+
+   case PROGRAM_ENV_PARAM:
+      src =
+         i915_emit_param4fv(p,
+                            p->ctx->FragmentProgram.Parameters[source->
+                                                               Index]);
+      break;
+
+   case PROGRAM_CONSTANT:
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_NAMED_PARAM:
+   case PROGRAM_UNIFORM:
+      src =
+         i915_emit_param4fv(p,
+                            program->Base.Parameters->ParameterValues[source->
+                                                                      Index]);
+      break;
+
+   default:
+      i915_program_error(p, "Bad source->File: %d", source->File);
+      return 0;
+   }
+
+   src = swizzle(src,
+                 GET_SWZ(source->Swizzle, 0),
+                 GET_SWZ(source->Swizzle, 1),
+                 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
+
+   if (source->Negate)
+      src = negate(src,
+                   GET_BIT(source->Negate, 0),
+                   GET_BIT(source->Negate, 1),
+                   GET_BIT(source->Negate, 2),
+                   GET_BIT(source->Negate, 3));
+
+   return src;
+}
+
+
+static GLuint
+get_result_vector(struct i915_fragment_program *p,
+                  const struct prog_instruction *inst)
+{
+   switch (inst->DstReg.File) {
+   case PROGRAM_OUTPUT:
+      switch (inst->DstReg.Index) {
+      case FRAG_RESULT_COLOR:
+         return UREG(REG_TYPE_OC, 0);
+      case FRAG_RESULT_DEPTH:
+         p->depth_written = 1;
+         return UREG(REG_TYPE_OD, 0);
+      default:
+         i915_program_error(p, "Bad inst->DstReg.Index: %d",
+			    inst->DstReg.Index);
+         return 0;
+      }
+   case PROGRAM_TEMPORARY:
+      return UREG(REG_TYPE_R, inst->DstReg.Index);
+   default:
+      i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
+      return 0;
+   }
+}
+
+static GLuint
+get_result_flags(const struct prog_instruction *inst)
+{
+   GLuint flags = 0;
+
+   if (inst->SaturateMode == SATURATE_ZERO_ONE)
+      flags |= A0_DEST_SATURATE;
+   if (inst->DstReg.WriteMask & WRITEMASK_X)
+      flags |= A0_DEST_CHANNEL_X;
+   if (inst->DstReg.WriteMask & WRITEMASK_Y)
+      flags |= A0_DEST_CHANNEL_Y;
+   if (inst->DstReg.WriteMask & WRITEMASK_Z)
+      flags |= A0_DEST_CHANNEL_Z;
+   if (inst->DstReg.WriteMask & WRITEMASK_W)
+      flags |= A0_DEST_CHANNEL_W;
+
+   return flags;
+}
+
+static GLuint
+translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
+{
+   switch (bit) {
+   case TEXTURE_1D_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_2D_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_RECT_INDEX:
+      return D0_SAMPLE_TYPE_2D;
+   case TEXTURE_3D_INDEX:
+      return D0_SAMPLE_TYPE_VOLUME;
+   case TEXTURE_CUBE_INDEX:
+      return D0_SAMPLE_TYPE_CUBE;
+   default:
+      i915_program_error(p, "TexSrcBit: %d", bit);
+      return 0;
+   }
+}
+
+#define EMIT_TEX( OP )						\
+do {								\
+   GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );	\
+   const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; \
+   GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit];	\
+   GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
+				   unit, dim);			\
+   GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
+   /* Texel lookup */						\
+								\
+   i915_emit_texld( p, get_live_regs(p, inst),						\
+	       get_result_vector( p, inst ),			\
+	       get_result_flags( inst ),			\
+	       sampler,						\
+	       coord,						\
+	       OP);						\
+} while (0)
+
+#define EMIT_ARITH( OP, N )						\
+do {									\
+   i915_emit_arith( p,							\
+	       OP,							\
+	       get_result_vector( p, inst ), 				\
+	       get_result_flags( inst ), 0,			\
+	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
+	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
+	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
+} while (0)
+
+#define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
+#define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
+#define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
+
+/* 
+ * TODO: consider moving this into core 
+ */
+static void calc_live_regs( struct i915_fragment_program *p )
+{
+    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+    GLuint regsUsed = 0xffff0000;
+    GLint i;
+   
+    for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
+        struct prog_instruction *inst = &program->Base.Instructions[i];
+        int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
+        int a;
+
+        /* Register is written to: unmark as live for this and preceeding ops */ 
+        if (inst->DstReg.File == PROGRAM_TEMPORARY)
+            regsUsed &= ~(1 << inst->DstReg.Index);
+
+        for (a = 0; a < opArgs; a++) {
+            /* Register is read from: mark as live for this and preceeding ops */ 
+            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY)
+                regsUsed |= 1 << inst->SrcReg[a].Index;
+        }
+
+        p->usedRegs[i] = regsUsed;
+    }
+}
+
+static GLuint get_live_regs( struct i915_fragment_program *p, 
+                             const struct prog_instruction *inst )
+{
+    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+    GLuint nr = inst - program->Base.Instructions;
+
+    return p->usedRegs[nr];
+}
+ 
+
+/* Possible concerns:
+ *
+ * SIN, COS -- could use another taylor step?
+ * LIT      -- results seem a little different to sw mesa
+ * LOG      -- different to mesa on negative numbers, but this is conformant.
+ * 
+ * Parse failures -- Mesa doesn't currently give a good indication
+ * internally whether a particular program string parsed or not.  This
+ * can lead to confusion -- hopefully we cope with it ok now.
+ *
+ */
+static void
+upload_program(struct i915_fragment_program *p)
+{
+   const struct gl_fragment_program *program =
+      p->ctx->FragmentProgram._Current;
+   const struct prog_instruction *inst = program->Base.Instructions;
+
+   if (INTEL_DEBUG & DEBUG_WM)
+      _mesa_print_program(&program->Base);
+
+   /* Is this a parse-failed program?  Ensure a valid program is
+    * loaded, as the flagging of an error isn't sufficient to stop
+    * this being uploaded to hardware.
+    */
+   if (inst[0].Opcode == OPCODE_END) {
+      GLuint tmp = i915_get_utemp(p);
+      i915_emit_arith(p,
+                      A0_MOV,
+                      UREG(REG_TYPE_OC, 0),
+                      A0_DEST_CHANNEL_ALL, 0,
+                      swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
+      return;
+   }
+
+   if (program->Base.NumInstructions > I915_MAX_INSN) {
+      i915_program_error(p, "Exceeded max instructions (%d out of %d)",
+			 program->Base.NumInstructions, I915_MAX_INSN);
+      return;
+   }
+
+   /* Not always needed:
+    */
+   calc_live_regs(p);
+
+   while (1) {
+      GLuint src0, src1, src2, flags;
+      GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
+
+      switch (inst->Opcode) {
+      case OPCODE_ABS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         i915_emit_arith(p,
+                         A0_MAX,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         src0, negate(src0, 1, 1, 1, 1), 0);
+         break;
+
+      case OPCODE_ADD:
+         EMIT_2ARG_ARITH(A0_ADD);
+         break;
+
+      case OPCODE_CMP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         src2 = src_vector(p, &inst->SrcReg[2], program);
+         i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
+         break;
+
+      case OPCODE_COS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
+	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
+
+	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
+			 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
+
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
+			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
+
+	 /* Compute COS with the same calculation used for SIN, but a
+	  * different source range has been mapped to [-1,1] this time.
+	  */
+
+	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+			 0);
+
+	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+	 i915_emit_arith(p,
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 tmp,
+			 0);
+
+	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+                         swizzle(consts1, X, Y, ZERO, ZERO),
+			 0);
+
+	 /* tmp.x now contains a first approximation (y).  Now, weight it
+	  * against tmp.y**2 to get closer.
+	  */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+			 0);
+
+	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
+
+	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
+	 i915_emit_arith(p,
+			 A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+			 swizzle(consts1, W, W, W, W),
+			 swizzle(tmp, Y, Y, Y, Y),
+			 swizzle(tmp, X, X, X, X));
+         break;
+
+      case OPCODE_DP3:
+         EMIT_2ARG_ARITH(A0_DP3);
+         break;
+
+      case OPCODE_DP4:
+         EMIT_2ARG_ARITH(A0_DP4);
+         break;
+
+      case OPCODE_DPH:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         i915_emit_arith(p,
+                         A0_DP4,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, Y, Z, ONE), src1, 0);
+         break;
+
+      case OPCODE_DST:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         /* result[0] = 1    * 1;
+          * result[1] = a[1] * b[1];
+          * result[2] = a[2] * 1;
+          * result[3] = 1    * b[3];
+          */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, ONE, Y, Z, ONE),
+                         swizzle(src1, ONE, Y, ONE, W), 0);
+         break;
+
+      case OPCODE_EX2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+
+         i915_emit_arith(p,
+                         A0_EXP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+
+      case OPCODE_FLR:
+         EMIT_1ARG_ARITH(A0_FLR);
+         break;
+
+      case OPCODE_TRUNC:
+	 EMIT_1ARG_ARITH(A0_TRC);
+	 break;
+
+      case OPCODE_FRC:
+         EMIT_1ARG_ARITH(A0_FRC);
+         break;
+
+      case OPCODE_KIL:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+
+         i915_emit_texld(p, get_live_regs(p, inst),
+                         tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
+                         0, src0, T0_TEXKILL);
+         break;
+
+      case OPCODE_KIL_NV:
+	 if (inst->DstReg.CondMask == COND_TR) {
+	    tmp = i915_get_utemp(p);
+
+	    i915_emit_texld(p, get_live_regs(p, inst),
+			    tmp, A0_DEST_CHANNEL_ALL,
+			    0, /* use a dummy dest reg */
+			    swizzle(tmp, ONE, ONE, ONE, ONE), /* always */
+			    T0_TEXKILL);
+	 } else {
+	    p->error = 1;
+	    i915_program_error(p, "Unsupported KIL_NV condition code: %d",
+			       inst->DstReg.CondMask);
+	 }
+	 break;
+
+      case OPCODE_LG2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+
+         i915_emit_arith(p,
+                         A0_LOG,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+
+      case OPCODE_LIT:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+
+         /* tmp = max( a.xyzw, a.00zw )
+          * XXX: Clamp tmp.w to -128..128
+          * tmp.y = log(tmp.y)
+          * tmp.y = tmp.w * tmp.y
+          * tmp.y = exp(tmp.y)
+          * result = cmp (a.11-x1, a.1x01, a.1xy1 )
+          */
+         i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
+                         src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
+
+         i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, ZERO, Y, ZERO, ZERO),
+                         swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
+
+         i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
+                         swizzle(tmp, Y, Y, Y, Y), 0, 0);
+
+         i915_emit_arith(p, A0_CMP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
+                         swizzle(tmp, ONE, X, ZERO, ONE),
+                         swizzle(tmp, ONE, X, Y, ONE));
+
+         break;
+
+      case OPCODE_LRP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         src2 = src_vector(p, &inst->SrcReg[2], program);
+         flags = get_result_flags(inst);
+         tmp = i915_get_utemp(p);
+
+         /* b*a + c*(1-a)
+          *
+          * b*a + c - ca 
+          *
+          * tmp = b*a + c, 
+          * result = (-c)*a + tmp 
+          */
+         i915_emit_arith(p, A0_MAD, tmp,
+                         flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
+
+         i915_emit_arith(p, A0_MAD,
+                         get_result_vector(p, inst),
+                         flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
+         break;
+
+      case OPCODE_MAD:
+         EMIT_3ARG_ARITH(A0_MAD);
+         break;
+
+      case OPCODE_MAX:
+         EMIT_2ARG_ARITH(A0_MAX);
+         break;
+
+      case OPCODE_MIN:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+
+         i915_emit_arith(p,
+                         A0_MAX,
+                         tmp, flags & A0_DEST_CHANNEL_ALL, 0,
+                         negate(src0, 1, 1, 1, 1),
+                         negate(src1, 1, 1, 1, 1), 0);
+
+         i915_emit_arith(p,
+                         A0_MOV,
+                         get_result_vector(p, inst),
+                         flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+         break;
+
+      case OPCODE_MOV:
+         EMIT_1ARG_ARITH(A0_MOV);
+         break;
+
+      case OPCODE_MUL:
+         EMIT_2ARG_ARITH(A0_MUL);
+         break;
+
+      case OPCODE_NOISE1:
+      case OPCODE_NOISE2:
+      case OPCODE_NOISE3:
+      case OPCODE_NOISE4:
+	 /* Don't implement noise because we just don't have the instructions
+	  * to spare.  We aren't the first vendor to do so.
+	  */
+	 i915_program_error(p, "Stubbed-out noise functions");
+	 i915_emit_arith(p,
+			 A0_MOV,
+			 get_result_vector(p, inst),
+			 get_result_flags(inst), 0,
+			 swizzle(tmp, ZERO, ZERO, ZERO, ZERO), 0, 0);
+	 break;
+
+      case OPCODE_POW:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+         flags = get_result_flags(inst);
+
+         /* XXX: masking on intermediate values, here and elsewhere.
+          */
+         i915_emit_arith(p,
+                         A0_LOG,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+
+         i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
+
+
+         i915_emit_arith(p,
+                         A0_EXP,
+                         get_result_vector(p, inst),
+                         flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
+
+         break;
+
+      case OPCODE_RCP:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+
+         i915_emit_arith(p,
+                         A0_RCP,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+
+      case OPCODE_RSQ:
+
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+
+         i915_emit_arith(p,
+                         A0_RSQ,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, X, X, X, X), 0, 0);
+         break;
+
+      case OPCODE_SCS:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+
+         /* 
+          * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+          * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
+          * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
+          * scs.x = DP4 t1, sin_constants
+          * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
+          * scs.y = DP4 t1, cos_constants
+          */
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_XY, 0,
+                         swizzle(src0, X, X, ONE, ONE),
+                         swizzle(src0, X, ONE, ONE, ONE), 0);
+
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_ALL, 0,
+                         swizzle(tmp, X, Y, X, Y),
+                         swizzle(tmp, X, X, ONE, ONE), 0);
+
+         if (inst->DstReg.WriteMask & WRITEMASK_Y) {
+            GLuint tmp1;
+
+            if (inst->DstReg.WriteMask & WRITEMASK_X)
+               tmp1 = i915_get_utemp(p);
+            else
+               tmp1 = tmp;
+
+            i915_emit_arith(p,
+                            A0_MUL,
+                            tmp1, A0_DEST_CHANNEL_ALL, 0,
+                            swizzle(tmp, X, Y, Y, W),
+                            swizzle(tmp, X, Z, ONE, ONE), 0);
+
+            i915_emit_arith(p,
+                            A0_DP4,
+                            get_result_vector(p, inst),
+                            A0_DEST_CHANNEL_Y, 0,
+                            swizzle(tmp1, W, Z, Y, X),
+                            i915_emit_const4fv(p, sin_constants), 0);
+         }
+
+         if (inst->DstReg.WriteMask & WRITEMASK_X) {
+            i915_emit_arith(p,
+                            A0_MUL,
+                            tmp, A0_DEST_CHANNEL_XYZ, 0,
+                            swizzle(tmp, X, X, Z, ONE),
+                            swizzle(tmp, Z, ONE, ONE, ONE), 0);
+
+            i915_emit_arith(p,
+                            A0_DP4,
+                            get_result_vector(p, inst),
+                            A0_DEST_CHANNEL_X, 0,
+                            swizzle(tmp, ONE, Z, Y, X),
+                            i915_emit_const4fv(p, cos_constants), 0);
+         }
+         break;
+
+      case OPCODE_SEQ:
+	 tmp = i915_get_utemp(p);
+	 flags = get_result_flags(inst);
+	 dst = get_result_vector(p, inst);
+
+	 /* dst = src1 >= src2 */
+	 i915_emit_arith(p,
+			 A0_SGE,
+			 dst,
+			 flags, 0,
+			 src_vector(p, &inst->SrcReg[0], program),
+			 src_vector(p, &inst->SrcReg[1], program),
+			 0);
+	 /* tmp = src1 <= src2 */
+	 i915_emit_arith(p,
+			 A0_SGE,
+			 tmp,
+			 flags, 0,
+			 negate(src_vector(p, &inst->SrcReg[0], program),
+				1, 1, 1, 1),
+			 negate(src_vector(p, &inst->SrcReg[1], program),
+				1, 1, 1, 1),
+			 0);
+	 /* dst = tmp && dst */
+	 i915_emit_arith(p,
+			 A0_MUL,
+			 dst,
+			 flags, 0,
+			 dst,
+			 tmp,
+			 0);
+	 break;
+
+      case OPCODE_SIN:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         tmp = i915_get_utemp(p);
+	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
+	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
+
+	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
+         i915_emit_arith(p,
+                         A0_MAD,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+                         src0,
+			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
+			 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
+
+         i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
+
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
+			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
+
+	 /* Compute sin using a quadratic and quartic.  It gives continuity
+	  * that repeating the Taylor series lacks every 2*pi, and has
+	  * reduced error.
+	  *
+	  * The idea was described at:
+	  * http://www.devmaster.net/forums/showthread.php?t=5784
+	  */
+
+	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+			 0);
+
+	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
+	 i915_emit_arith(p,
+			 A0_MUL,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 tmp,
+			 0);
+
+	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
+         i915_emit_arith(p,
+                         A0_DP3,
+                         tmp, A0_DEST_CHANNEL_X, 0,
+			 tmp,
+                         swizzle(consts1, X, Y, ZERO, ZERO),
+			 0);
+
+	 /* tmp.x now contains a first approximation (y).  Now, weight it
+	  * against tmp.y**2 to get closer.
+	  */
+	 i915_emit_arith(p,
+                         A0_MAX,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
+			 0);
+
+	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
+	 i915_emit_arith(p,
+			 A0_MAD,
+			 tmp, A0_DEST_CHANNEL_Y, 0,
+			 swizzle(tmp, ZERO, X, ZERO, ZERO),
+			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
+			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
+
+	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
+	 i915_emit_arith(p,
+			 A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+			 swizzle(consts1, W, W, W, W),
+			 swizzle(tmp, Y, Y, Y, Y),
+			 swizzle(tmp, X, X, X, X));
+
+         break;
+
+      case OPCODE_SGE:
+	 EMIT_2ARG_ARITH(A0_SGE);
+	 break;
+
+      case OPCODE_SGT:
+	 i915_emit_arith(p,
+			 A0_SLT,
+			 get_result_vector( p, inst ),
+			 get_result_flags( inst ), 0,
+			 negate(src_vector( p, &inst->SrcReg[0], program),
+				1, 1, 1, 1),
+			 negate(src_vector( p, &inst->SrcReg[1], program),
+				1, 1, 1, 1),
+			 0);
+         break;
+
+      case OPCODE_SLE:
+	 i915_emit_arith(p,
+			 A0_SGE,
+			 get_result_vector( p, inst ),
+			 get_result_flags( inst ), 0,
+			 negate(src_vector( p, &inst->SrcReg[0], program),
+				1, 1, 1, 1),
+			 negate(src_vector( p, &inst->SrcReg[1], program),
+				1, 1, 1, 1),
+			 0);
+         break;
+
+      case OPCODE_SLT:
+         EMIT_2ARG_ARITH(A0_SLT);
+         break;
+
+      case OPCODE_SNE:
+	 tmp = i915_get_utemp(p);
+	 flags = get_result_flags(inst);
+	 dst = get_result_vector(p, inst);
+
+	 /* dst = src1 < src2 */
+	 i915_emit_arith(p,
+			 A0_SLT,
+			 dst,
+			 flags, 0,
+			 src_vector(p, &inst->SrcReg[0], program),
+			 src_vector(p, &inst->SrcReg[1], program),
+			 0);
+	 /* tmp = src1 > src2 */
+	 i915_emit_arith(p,
+			 A0_SLT,
+			 tmp,
+			 flags, 0,
+			 negate(src_vector(p, &inst->SrcReg[0], program),
+				1, 1, 1, 1),
+			 negate(src_vector(p, &inst->SrcReg[1], program),
+				1, 1, 1, 1),
+			 0);
+	 /* dst = tmp || dst */
+	 i915_emit_arith(p,
+			 A0_ADD,
+			 dst,
+			 flags | A0_DEST_SATURATE, 0,
+			 dst,
+			 tmp,
+			 0);
+         break;
+
+      case OPCODE_SUB:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         i915_emit_arith(p,
+                         A0_ADD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         src0, negate(src1, 1, 1, 1, 1), 0);
+         break;
+
+      case OPCODE_SWZ:
+         EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
+         break;
+
+      case OPCODE_TEX:
+         EMIT_TEX(T0_TEXLD);
+         break;
+
+      case OPCODE_TXB:
+         EMIT_TEX(T0_TEXLDB);
+         break;
+
+      case OPCODE_TXP:
+         EMIT_TEX(T0_TEXLDP);
+         break;
+
+      case OPCODE_XPD:
+         /* Cross product:
+          *      result.x = src0.y * src1.z - src0.z * src1.y;
+          *      result.y = src0.z * src1.x - src0.x * src1.z;
+          *      result.z = src0.x * src1.y - src0.y * src1.x;
+          *      result.w = undef;
+          */
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+         tmp = i915_get_utemp(p);
+
+         i915_emit_arith(p,
+                         A0_MUL,
+                         tmp, A0_DEST_CHANNEL_ALL, 0,
+                         swizzle(src0, Z, X, Y, ONE),
+                         swizzle(src1, Y, Z, X, ONE), 0);
+
+         i915_emit_arith(p,
+                         A0_MAD,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                         swizzle(src0, Y, Z, X, ONE),
+                         swizzle(src1, Z, X, Y, ONE),
+                         negate(tmp, 1, 1, 1, 0));
+         break;
+
+      case OPCODE_END:
+         return;
+
+      case OPCODE_BGNLOOP:
+      case OPCODE_BGNSUB:
+      case OPCODE_BRA:
+      case OPCODE_BRK:
+      case OPCODE_CAL:
+      case OPCODE_CONT:
+      case OPCODE_DDX:
+      case OPCODE_DDY:
+      case OPCODE_ELSE:
+      case OPCODE_ENDIF:
+      case OPCODE_ENDLOOP:
+      case OPCODE_ENDSUB:
+      case OPCODE_IF:
+      case OPCODE_RET:
+	 p->error = 1;
+	 i915_program_error(p, "Unsupported opcode: %s",
+			    _mesa_opcode_string(inst->Opcode));
+	 return;
+
+      case OPCODE_EXP:
+      case OPCODE_LOG:
+	 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
+	  * prog_instruction.h, but apparently GLSL doesn't ever emit them.
+	  * Instead, it translates to EX2 or LG2.
+	  */
+      case OPCODE_TXD:
+      case OPCODE_TXL:
+	 /* These opcodes are claimed by GLSL in prog_instruction.h, but
+	  * only NV_vp/fp appears to emit them.
+	  */
+      default:
+         i915_program_error(p, "bad opcode: %s",
+			    _mesa_opcode_string(inst->Opcode));
+         return;
+      }
+
+      inst++;
+      i915_release_utemps(p);
+   }
+}
+
+/* Rather than trying to intercept and jiggle depth writes during
+ * emit, just move the value into its correct position at the end of
+ * the program:
+ */
+static void
+fixup_depth_write(struct i915_fragment_program *p)
+{
+   if (p->depth_written) {
+      GLuint depth = UREG(REG_TYPE_OD, 0);
+
+      i915_emit_arith(p,
+                      A0_MOV,
+                      depth, A0_DEST_CHANNEL_W, 0,
+                      swizzle(depth, X, Y, Z, Z), 0, 0);
+   }
+}
+
+
+static void
+check_wpos(struct i915_fragment_program *p)
+{
+   GLuint inputs = p->FragProg.Base.InputsRead;
+   GLint i;
+
+   p->wpos_tex = -1;
+
+   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i)))
+         continue;
+      else if (inputs & FRAG_BIT_WPOS) {
+         p->wpos_tex = i;
+         inputs &= ~FRAG_BIT_WPOS;
+      }
+   }
+
+   if (inputs & FRAG_BIT_WPOS) {
+      i915_program_error(p, "No free texcoord for wpos value");
+   }
+}
+
+
+static void
+translate_program(struct i915_fragment_program *p)
+{
+   struct i915_context *i915 = I915_CONTEXT(p->ctx);
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("fp:\n");
+      _mesa_print_program(&p->ctx->FragmentProgram._Current->Base);
+      printf("\n");
+   }
+
+   i915_init_program(i915, p);
+   check_wpos(p);
+   upload_program(p);
+   fixup_depth_write(p);
+   i915_fini_program(p);
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("i915:\n");
+      i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
+   }
+
+   p->translated = 1;
+}
+
+
+static void
+track_params(struct i915_fragment_program *p)
+{
+   GLint i;
+
+   if (p->nr_params)
+      _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
+
+   for (i = 0; i < p->nr_params; i++) {
+      GLint reg = p->param[i].reg;
+      COPY_4V(p->constant[reg], p->param[i].values);
+   }
+
+   p->params_uptodate = 1;
+   p->on_hardware = 0;          /* overkill */
+}
+
+
+static void
+i915BindProgram(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_context *i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+
+      if (i915->current_program == p)
+         return;
+
+      if (i915->current_program) {
+         i915->current_program->on_hardware = 0;
+         i915->current_program->params_uptodate = 0;
+      }
+
+      i915->current_program = p;
+
+      assert(p->on_hardware == 0);
+      assert(p->params_uptodate == 0);
+
+   }
+}
+
+static struct gl_program *
+i915NewProgram(GLcontext * ctx, GLenum target, GLuint id)
+{
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
+                                       target, id);
+
+   case GL_FRAGMENT_PROGRAM_ARB:{
+         struct i915_fragment_program *prog =
+            CALLOC_STRUCT(i915_fragment_program);
+         if (prog) {
+            i915_init_program(I915_CONTEXT(ctx), prog);
+
+            return _mesa_init_fragment_program(ctx, &prog->FragProg,
+                                               target, id);
+         }
+         else
+            return NULL;
+      }
+
+   default:
+      /* Just fallback:
+       */
+      return _mesa_new_program(ctx, target, id);
+   }
+}
+
+static void
+i915DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_context *i915 = I915_CONTEXT(ctx);
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+
+      if (i915->current_program == p)
+         i915->current_program = 0;
+   }
+
+   _mesa_delete_program(ctx, prog);
+}
+
+
+static GLboolean
+i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+
+      if (!p->translated)
+         translate_program(p);
+
+      return !p->error;
+   }
+   else
+      return GL_TRUE;
+}
+
+static GLboolean
+i915ProgramStringNotify(GLcontext * ctx,
+                        GLenum target, struct gl_program *prog)
+{
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
+      p->translated = 0;
+
+      /* Hack: make sure fog is correctly enabled according to this
+       * fragment program's fog options.
+       */
+      if (p->FragProg.FogOption) {
+         /* add extra instructions to do fog, then turn off FogOption field */
+         _mesa_append_fog_code(ctx, &p->FragProg);
+         p->FragProg.FogOption = GL_NONE;
+      }
+   }
+
+   (void) _tnl_program_string(ctx, target, prog);
+
+   /* XXX check if program is legal, within limits */
+   return GL_TRUE;
+}
+
+void
+i915_update_program(GLcontext *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   struct i915_fragment_program *fp =
+      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+
+   if (i915->current_program != fp) {
+      if (i915->current_program) {
+         i915->current_program->on_hardware = 0;
+         i915->current_program->params_uptodate = 0;
+      }
+
+      i915->current_program = fp;
+   }
+
+   if (!fp->translated)
+      translate_program(fp);
+
+   FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
+}
+
+void
+i915ValidateFragmentProgram(struct i915_context *i915)
+{
+   GLcontext *ctx = &i915->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+
+   struct i915_fragment_program *p =
+      (struct i915_fragment_program *) ctx->FragmentProgram._Current;
+
+   const GLuint inputsRead = p->FragProg.Base.InputsRead;
+   GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
+   GLuint s2 = S2_TEXCOORD_NONE;
+   int i, offset = 0;
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+
+   if (!p->translated)
+      translate_program(p);
+
+   intel->vertex_attr_count = 0;
+   intel->wpos_offset = 0;
+   intel->wpos_size = 0;
+   intel->coloroffset = 0;
+   intel->specoffset = 0;
+
+   if (inputsRead & FRAG_BITS_TEX_ANY) {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
+   }
+   else {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
+   }
+
+   if (inputsRead & FRAG_BIT_COL0) {
+      intel->coloroffset = offset / 4;
+      EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
+   }
+
+   if (inputsRead & FRAG_BIT_COL1) {
+       intel->specoffset = offset / 4;
+       EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
+   }
+
+   if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
+      EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
+   }
+
+   for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+      if (inputsRead & FRAG_BIT_TEX(i)) {
+         int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+
+         EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
+      }
+      else if (inputsRead & FRAG_BIT_VAR(i)) {
+         int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
+
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+
+         EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
+      }
+      else if (i == p->wpos_tex) {
+
+         /* If WPOS is required, duplicate the XYZ position data in an
+          * unused texture coordinate:
+          */
+         s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
+
+         intel->wpos_offset = offset;
+         intel->wpos_size = 3 * sizeof(GLuint);
+
+         EMIT_PAD(intel->wpos_size);
+      }
+   }
+
+   if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
+       s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      int k;
+
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+      /* Must do this *after* statechange, so as not to affect
+       * buffered vertices reliant on the old state:
+       */
+      intel->vertex_size = _tnl_install_attrs(&intel->ctx,
+                                              intel->vertex_attrs,
+                                              intel->vertex_attr_count,
+                                              intel->ViewportMatrix.m, 0);
+
+      intel->vertex_size >>= 2;
+
+      i915->state.Ctx[I915_CTXREG_LIS2] = s2;
+      i915->state.Ctx[I915_CTXREG_LIS4] = s4;
+
+      k = intel->vtbl.check_vertex_size(intel, intel->vertex_size);
+      assert(k);
+   }
+
+   if (!p->params_uptodate)
+      track_params(p);
+
+   if (!p->on_hardware)
+      i915_upload_program(i915, p);
+}
+
+void
+i915InitFragProgFuncs(struct dd_function_table *functions)
+{
+   functions->BindProgram = i915BindProgram;
+   functions->NewProgram = i915NewProgram;
+   functions->DeleteProgram = i915DeleteProgram;
+   functions->IsProgramNative = i915IsProgramNative;
+   functions->ProgramStringNotify = i915ProgramStringNotify;
+}
diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c
new file mode 100644
index 0000000000..670c713785
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_program.c
@@ -0,0 +1,586 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <strings.h>
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "tnl/t_context.h"
+#include "intel_batchbuffer.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+#include "i915_program.h"
+
+
+#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
+#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
+#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
+#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
+#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
+#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
+
+/* These are special, and don't have swizzle/negate bits.
+ */
+#define T0_SAMPLER( reg )     (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
+#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
+			       (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
+
+
+/* Macros for translating UREG's into the various register fields used
+ * by the I915 programmable unit.
+ */
+#define UREG_A0_DEST_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
+#define UREG_A0_SRC0_SHIFT_LEFT  (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
+#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A1_SRC1_SHIFT_LEFT  (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
+#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
+#define UREG_A2_SRC2_SHIFT_LEFT  (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
+
+#define UREG_MASK         0xffffff00
+#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
+  			   (REG_NR_MASK << UREG_NR_SHIFT))
+
+
+#define I915_CONSTFLAG_PARAM 0x1f
+
+GLuint
+i915_get_temp(struct i915_fragment_program *p)
+{
+   int bit = ffs(~p->temp_flag);
+   if (!bit) {
+      fprintf(stderr, "%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   p->temp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_R, (bit - 1));
+}
+
+
+GLuint
+i915_get_utemp(struct i915_fragment_program * p)
+{
+   int bit = ffs(~p->utemp_flag);
+   if (!bit) {
+      fprintf(stderr, "%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   p->utemp_flag |= 1 << (bit - 1);
+   return UREG(REG_TYPE_U, (bit - 1));
+}
+
+void
+i915_release_utemps(struct i915_fragment_program *p)
+{
+   p->utemp_flag = ~0x7;
+}
+
+
+GLuint
+i915_emit_decl(struct i915_fragment_program *p,
+               GLuint type, GLuint nr, GLuint d0_flags)
+{
+   GLuint reg = UREG(type, nr);
+
+   if (type == REG_TYPE_T) {
+      if (p->decl_t & (1 << nr))
+         return reg;
+
+      p->decl_t |= (1 << nr);
+   }
+   else if (type == REG_TYPE_S) {
+      if (p->decl_s & (1 << nr))
+         return reg;
+
+      p->decl_s |= (1 << nr);
+   }
+   else
+      return reg;
+
+   *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);
+   *(p->decl++) = D1_MBZ;
+   *(p->decl++) = D2_MBZ;
+   assert(p->decl <= p->declarations + ARRAY_SIZE(p->declarations));
+
+   p->nr_decl_insn++;
+   return reg;
+}
+
+GLuint
+i915_emit_arith(struct i915_fragment_program * p,
+                GLuint op,
+                GLuint dest,
+                GLuint mask,
+                GLuint saturate, GLuint src0, GLuint src1, GLuint src2)
+{
+   GLuint c[3];
+   GLuint nr_const = 0;
+
+   assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+   dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));
+   assert(dest);
+
+   if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)
+      c[nr_const++] = 0;
+   if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)
+      c[nr_const++] = 1;
+   if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)
+      c[nr_const++] = 2;
+
+   /* Recursively call this function to MOV additional const values
+    * into temporary registers.  Use utemp registers for this -
+    * currently shouldn't be possible to run out, but keep an eye on
+    * this.
+    */
+   if (nr_const > 1) {
+      GLuint s[3], first, i, old_utemp_flag;
+
+      s[0] = src0;
+      s[1] = src1;
+      s[2] = src2;
+      old_utemp_flag = p->utemp_flag;
+
+      first = GET_UREG_NR(s[c[0]]);
+      for (i = 1; i < nr_const; i++) {
+         if (GET_UREG_NR(s[c[i]]) != first) {
+            GLuint tmp = i915_get_utemp(p);
+
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            s[c[i]], 0, 0);
+            s[c[i]] = tmp;
+         }
+      }
+
+      src0 = s[0];
+      src1 = s[1];
+      src2 = s[2];
+      p->utemp_flag = old_utemp_flag;   /* restore */
+   }
+
+   if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+      i915_program_error(p, "Program contains too many instructions");
+      return UREG_BAD;
+   }
+
+   *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));
+   *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));
+   *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));
+
+   if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+      p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
+
+   p->nr_alu_insn++;
+   return dest;
+}
+
+static GLuint get_free_rreg (struct i915_fragment_program *p, 
+                             GLuint live_regs)
+{
+    int bit = ffs(~live_regs);
+    if (!bit) {
+        i915_program_error(p, "Can't find free R reg");
+        return UREG_BAD;
+    }
+    return UREG(REG_TYPE_R, bit - 1);
+}
+
+GLuint i915_emit_texld( struct i915_fragment_program *p,
+			GLuint live_regs,               
+			GLuint dest,
+			GLuint destmask,
+			GLuint sampler,
+			GLuint coord,
+			GLuint op )
+{
+    if (coord != UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord))) {
+        /* With the help of the "needed registers" table created earlier, pick
+         * a register we can MOV the swizzled TC to (since TEX doesn't support
+         * swizzled sources) */
+        GLuint swizCoord = get_free_rreg(p, live_regs);
+        if (swizCoord == UREG_BAD) 
+            return 0;
+
+        i915_emit_arith( p, A0_MOV, swizCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0 );
+        coord = swizCoord;
+    }
+
+   /* Don't worry about saturate as we only support texture formats
+    * that are always in the 0..1 range.
+    */
+   if (destmask != A0_DEST_CHANNEL_ALL) {
+      GLuint tmp = i915_get_utemp(p);
+      i915_emit_texld( p, 0, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, op );
+      i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
+      return dest;
+   }
+   else {
+      assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);
+      assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));
+      /* Can't use unsaved temps for coords, as the phase boundary would result
+       * in the contents becoming undefined.
+       */
+      assert(GET_UREG_TYPE(coord) != REG_TYPE_U);
+
+      if ((GET_UREG_TYPE(coord) != REG_TYPE_R) &&
+          (GET_UREG_TYPE(coord) != REG_TYPE_OC) &&
+          (GET_UREG_TYPE(coord) != REG_TYPE_OD) &&
+          (GET_UREG_TYPE(coord) != REG_TYPE_T)) {
+          GLuint  tmpCoord = get_free_rreg(p, live_regs);
+          
+          if (tmpCoord == UREG_BAD) 
+              return 0;
+
+          i915_emit_arith(p, A0_MOV, tmpCoord, A0_DEST_CHANNEL_ALL, 0, coord, 0, 0);
+          coord = tmpCoord;
+      }
+
+      /* Output register being oC or oD defines a phase boundary */
+      if (GET_UREG_TYPE(dest) == REG_TYPE_OC ||
+	  GET_UREG_TYPE(dest) == REG_TYPE_OD)
+	 p->nr_tex_indirect++;
+
+      /* Reading from an r# register whose contents depend on output of the
+       * current phase defines a phase boundary.
+       */
+      if (GET_UREG_TYPE(coord) == REG_TYPE_R &&
+	  p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)
+	 p->nr_tex_indirect++;
+
+      if (p->csr >= p->program + ARRAY_SIZE(p->program)) {
+	 i915_program_error(p, "Program contains too many instructions");
+	 return UREG_BAD;
+      }
+
+      *(p->csr++) = (op | 
+		     T0_DEST( dest ) |
+		     T0_SAMPLER( sampler ));
+
+      *(p->csr++) = T1_ADDRESS_REG( coord );
+      *(p->csr++) = T2_MBZ;
+
+      if (GET_UREG_TYPE(dest) == REG_TYPE_R)
+	 p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;
+
+      p->nr_tex_insn++;
+      return dest;
+   }
+}
+
+
+GLuint
+i915_emit_const1f(struct i915_fragment_program * p, GLfloat c0)
+{
+   GLint reg, idx;
+
+   if (c0 == 0.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);
+   if (c0 == 1.0)
+      return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+         continue;
+      for (idx = 0; idx < 4; idx++) {
+         if (!(p->constant_flags[reg] & (1 << idx)) ||
+             p->constant[reg][idx] == c0) {
+            p->constant[reg][idx] = c0;
+            p->constant_flags[reg] |= 1 << idx;
+            if (reg + 1 > p->nr_constants)
+               p->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);
+         }
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+GLuint
+i915_emit_const2f(struct i915_fragment_program * p, GLfloat c0, GLfloat c1)
+{
+   GLint reg, idx;
+
+   if (c0 == 0.0)
+      return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);
+   if (c0 == 1.0)
+      return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);
+
+   if (c1 == 0.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);
+   if (c1 == 1.0)
+      return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf ||
+          p->constant_flags[reg] == I915_CONSTFLAG_PARAM)
+         continue;
+      for (idx = 0; idx < 3; idx++) {
+         if (!(p->constant_flags[reg] & (3 << idx))) {
+            p->constant[reg][idx] = c0;
+            p->constant[reg][idx + 1] = c1;
+            p->constant_flags[reg] |= 3 << idx;
+            if (reg + 1 > p->nr_constants)
+               p->nr_constants = reg + 1;
+            return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO,
+                           ONE);
+         }
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+
+
+GLuint
+i915_emit_const4f(struct i915_fragment_program * p,
+                  GLfloat c0, GLfloat c1, GLfloat c2, GLfloat c3)
+{
+   GLint reg;
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0xf &&
+          p->constant[reg][0] == c0 &&
+          p->constant[reg][1] == c1 &&
+          p->constant[reg][2] == c2 && p->constant[reg][3] == c3) {
+         return UREG(REG_TYPE_CONST, reg);
+      }
+      else if (p->constant_flags[reg] == 0) {
+         p->constant[reg][0] = c0;
+         p->constant[reg][1] = c1;
+         p->constant[reg][2] = c2;
+         p->constant[reg][3] = c3;
+         p->constant_flags[reg] = 0xf;
+         if (reg + 1 > p->nr_constants)
+            p->nr_constants = reg + 1;
+         return UREG(REG_TYPE_CONST, reg);
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+
+GLuint
+i915_emit_const4fv(struct i915_fragment_program * p, const GLfloat * c)
+{
+   return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);
+}
+
+
+GLuint
+i915_emit_param4fv(struct i915_fragment_program * p, const GLfloat * values)
+{
+   GLint reg, i;
+
+   for (i = 0; i < p->nr_params; i++) {
+      if (p->param[i].values == values)
+         return UREG(REG_TYPE_CONST, p->param[i].reg);
+   }
+
+
+   for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {
+      if (p->constant_flags[reg] == 0) {
+         p->constant_flags[reg] = I915_CONSTFLAG_PARAM;
+         i = p->nr_params++;
+
+         p->param[i].values = values;
+         p->param[i].reg = reg;
+         p->params_uptodate = 0;
+
+         if (reg + 1 > p->nr_constants)
+            p->nr_constants = reg + 1;
+         return UREG(REG_TYPE_CONST, reg);
+      }
+   }
+
+   fprintf(stderr, "%s: out of constants\n", __FUNCTION__);
+   p->error = 1;
+   return 0;
+}
+
+/* Warning the user about program errors seems to be quite valuable, from
+ * our bug reports.  It unfortunately means piglit reporting errors
+ * when we fall back to software due to an unsupportable program, though.
+ */
+void
+i915_program_error(struct i915_fragment_program *p, const char *fmt, ...)
+{
+   va_list args;
+
+   fprintf(stderr, "i915_program_error: ");
+   va_start(args, fmt);
+   vfprintf(stderr, fmt, args);
+   va_end(args);
+
+   fprintf(stderr, "\n");
+   p->error = 1;
+}
+
+
+void
+i915_init_program(struct i915_context *i915, struct i915_fragment_program *p)
+{
+   GLcontext *ctx = &i915->intel.ctx;
+
+   p->translated = 0;
+   p->params_uptodate = 0;
+   p->on_hardware = 0;
+   p->error = 0;
+
+   memset(&p->register_phases, 0, sizeof(p->register_phases));
+   p->nr_tex_indirect = 1;
+   p->nr_tex_insn = 0;
+   p->nr_alu_insn = 0;
+   p->nr_decl_insn = 0;
+
+   p->ctx = ctx;
+   memset(p->constant_flags, 0, sizeof(p->constant_flags));
+
+   p->nr_constants = 0;
+   p->csr = p->program;
+   p->decl = p->declarations;
+   p->decl_s = 0;
+   p->decl_t = 0;
+   p->temp_flag = 0xffff000;
+   p->utemp_flag = ~0x7;
+   p->wpos_tex = -1;
+   p->depth_written = 0;
+   p->nr_params = 0;
+
+   *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
+}
+
+
+void
+i915_fini_program(struct i915_fragment_program *p)
+{
+   GLuint program_size = p->csr - p->program;
+   GLuint decl_size = p->decl - p->declarations;
+
+   if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) {
+      i915_program_error(p, "Exceeded max nr indirect texture lookups "
+			 "(%d out of %d)",
+			 p->nr_tex_indirect, I915_MAX_TEX_INDIRECT);
+   }
+
+   if (p->nr_tex_insn > I915_MAX_TEX_INSN) {
+      i915_program_error(p, "Exceeded max TEX instructions (%d out of %d)",
+			 p->nr_tex_insn, I915_MAX_TEX_INSN);
+   }
+
+   if (p->nr_alu_insn > I915_MAX_ALU_INSN)
+      i915_program_error(p, "Exceeded max ALU instructions (%d out of %d)",
+			 p->nr_alu_insn, I915_MAX_ALU_INSN);
+
+   if (p->nr_decl_insn > I915_MAX_DECL_INSN) {
+      i915_program_error(p, "Exceeded max DECL instructions (%d out of %d)",
+			 p->nr_decl_insn, I915_MAX_DECL_INSN);
+   }
+
+   if (p->error) {
+      p->FragProg.Base.NumNativeInstructions = 0;
+      p->FragProg.Base.NumNativeAluInstructions = 0;
+      p->FragProg.Base.NumNativeTexInstructions = 0;
+      p->FragProg.Base.NumNativeTexIndirections = 0;
+   }
+   else {
+      p->FragProg.Base.NumNativeInstructions = (p->nr_alu_insn +
+                                                p->nr_tex_insn +
+                                                p->nr_decl_insn);
+      p->FragProg.Base.NumNativeAluInstructions = p->nr_alu_insn;
+      p->FragProg.Base.NumNativeTexInstructions = p->nr_tex_insn;
+      p->FragProg.Base.NumNativeTexIndirections = p->nr_tex_indirect;
+   }
+
+   p->declarations[0] |= program_size + decl_size - 2;
+}
+
+void
+i915_upload_program(struct i915_context *i915,
+                    struct i915_fragment_program *p)
+{
+   GLuint program_size = p->csr - p->program;
+   GLuint decl_size = p->decl - p->declarations;
+
+   if (p->error)
+      return;
+
+   /* Could just go straight to the batchbuffer from here:
+    */
+   if (i915->state.ProgramSize != (program_size + decl_size) ||
+       memcmp(i915->state.Program + decl_size, p->program,
+              program_size * sizeof(int)) != 0) {
+      I915_STATECHANGE(i915, I915_UPLOAD_PROGRAM);
+      memcpy(i915->state.Program, p->declarations, decl_size * sizeof(int));
+      memcpy(i915->state.Program + decl_size, p->program,
+             program_size * sizeof(int));
+      i915->state.ProgramSize = decl_size + program_size;
+   }
+
+   /* Always seemed to get a failure if I used memcmp() to
+    * shortcircuit this state upload.  Needs further investigation?
+    */
+   if (p->nr_constants) {
+      GLuint nr = p->nr_constants;
+
+      I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1);
+      I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS);
+
+      i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4);
+      i915->state.Constant[1] = (1 << (nr - 1)) | ((1 << (nr - 1)) - 1);
+
+      memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * (nr));
+      i915->state.ConstantSize = 2 + (nr) * 4;
+
+      if (0) {
+         GLuint i;
+         for (i = 0; i < nr; i++) {
+            fprintf(stderr, "const[%d]: %f %f %f %f\n", i,
+                    p->constant[i][0],
+                    p->constant[i][1], p->constant[i][2], p->constant[i][3]);
+         }
+      }
+   }
+   else {
+      I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0);
+   }
+
+   p->on_hardware = 1;
+}
diff --git a/src/mesa/drivers/dri/i915/i915_program.h b/src/mesa/drivers/dri/i915/i915_program.h
new file mode 100644
index 0000000000..0d17d04865
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_program.h
@@ -0,0 +1,160 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef I915_PROGRAM_H
+#define I915_PROGRAM_H
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+
+/* Having zero and one in here makes the definition of swizzle a lot
+ * easier.
+ */
+#define UREG_TYPE_SHIFT               29
+#define UREG_NR_SHIFT                 24
+#define UREG_CHANNEL_X_NEGATE_SHIFT   23
+#define UREG_CHANNEL_X_SHIFT          20
+#define UREG_CHANNEL_Y_NEGATE_SHIFT   19
+#define UREG_CHANNEL_Y_SHIFT          16
+#define UREG_CHANNEL_Z_NEGATE_SHIFT   15
+#define UREG_CHANNEL_Z_SHIFT          12
+#define UREG_CHANNEL_W_NEGATE_SHIFT   11
+#define UREG_CHANNEL_W_SHIFT          8
+#define UREG_CHANNEL_ZERO_NEGATE_MBZ  5
+#define UREG_CHANNEL_ZERO_SHIFT       4
+#define UREG_CHANNEL_ONE_NEGATE_MBZ   1
+#define UREG_CHANNEL_ONE_SHIFT        0
+
+#define UREG_BAD          0xffffffff    /* not a valid ureg */
+
+#define X    SRC_X
+#define Y    SRC_Y
+#define Z    SRC_Z
+#define W    SRC_W
+#define ZERO SRC_ZERO
+#define ONE  SRC_ONE
+
+/* Construct a ureg:
+ */
+#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) |		\
+			  ((nr)  << UREG_NR_SHIFT) |		\
+			  (X     << UREG_CHANNEL_X_SHIFT) |	\
+			  (Y     << UREG_CHANNEL_Y_SHIFT) |	\
+			  (Z     << UREG_CHANNEL_Z_SHIFT) |	\
+			  (W     << UREG_CHANNEL_W_SHIFT) |	\
+			  (ZERO  << UREG_CHANNEL_ZERO_SHIFT) |	\
+			  (ONE   << UREG_CHANNEL_ONE_SHIFT))
+
+#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20))
+#define CHANNEL_SRC( src, channel ) (src>>(channel*4))
+
+#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&REG_TYPE_MASK)
+#define GET_UREG_NR(reg)   (((reg)>>UREG_NR_SHIFT)&REG_NR_MASK)
+
+
+
+#define UREG_XYZW_CHANNEL_MASK 0x00ffff00
+
+/* One neat thing about the UREG representation:  
+ */
+static INLINE int
+swizzle(int reg, int x, int y, int z, int w)
+{
+   return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) |
+           CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3));
+}
+
+/* Another neat thing about the UREG representation:  
+ */
+static INLINE int
+negate(int reg, int x, int y, int z, int w)
+{
+   return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
+                 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
+                 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
+                 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
+}
+
+
+extern GLuint i915_get_temp(struct i915_fragment_program *p);
+extern GLuint i915_get_utemp(struct i915_fragment_program *p);
+extern void i915_release_utemps(struct i915_fragment_program *p);
+
+
+extern GLuint i915_emit_texld(struct i915_fragment_program *p,
+                              GLuint live_regs,
+                              GLuint dest,
+                              GLuint destmask,
+                              GLuint sampler, GLuint coord, GLuint op);
+
+extern GLuint i915_emit_arith(struct i915_fragment_program *p,
+                              GLuint op,
+                              GLuint dest,
+                              GLuint mask,
+                              GLuint saturate,
+                              GLuint src0, GLuint src1, GLuint src2);
+
+extern GLuint i915_emit_decl(struct i915_fragment_program *p,
+                             GLuint type, GLuint nr, GLuint d0_flags);
+
+
+extern GLuint i915_emit_const1f(struct i915_fragment_program *p, GLfloat c0);
+
+extern GLuint i915_emit_const2f(struct i915_fragment_program *p,
+                                GLfloat c0, GLfloat c1);
+
+extern GLuint i915_emit_const4fv(struct i915_fragment_program *p,
+                                 const GLfloat * c);
+
+extern GLuint i915_emit_const4f(struct i915_fragment_program *p,
+                                GLfloat c0, GLfloat c1,
+                                GLfloat c2, GLfloat c3);
+
+
+extern GLuint i915_emit_param4fv(struct i915_fragment_program *p,
+                                 const GLfloat * values);
+
+extern void i915_program_error(struct i915_fragment_program *p,
+                               const char *fmt, ...);
+
+extern void i915_init_program(struct i915_context *i915,
+                              struct i915_fragment_program *p);
+
+extern void i915_upload_program(struct i915_context *i915,
+                                struct i915_fragment_program *p);
+
+extern void i915_fini_program(struct i915_fragment_program *p);
+
+extern void i915_update_program(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h
new file mode 100644
index 0000000000..7f31ff674f
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_reg.h
@@ -0,0 +1,737 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef _I915_REG_H_
+#define _I915_REG_H_
+
+
+#include "intel_reg.h"
+
+#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+
+#define PRIM3D_INLINE		(CMD_3D | (0x1f<<24))
+#define PRIM3D_TRILIST		(0x0<<18)
+#define PRIM3D_TRISTRIP 	(0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18)
+#define PRIM3D_TRIFAN		(0x3<<18)
+#define PRIM3D_POLY		(0x4<<18)
+#define PRIM3D_LINELIST 	(0x5<<18)
+#define PRIM3D_LINESTRIP	(0x6<<18)
+#define PRIM3D_RECTLIST 	(0x7<<18)
+#define PRIM3D_POINTLIST	(0x8<<18)
+#define PRIM3D_DIB		(0x9<<18)
+#define PRIM3D_CLEAR_RECT	(0xa<<18)
+#define PRIM3D_ZONE_INIT	(0xd<<18)
+#define PRIM3D_MASK		(0x1f<<18)
+
+/* p137 */
+#define _3DSTATE_AA_CMD			(CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE	(1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5 	0
+#define AA_LINE_ECAAR_WIDTH_1_0		(1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0 	(2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0 	(3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE	(1<<8)
+#define AA_LINE_REGION_WIDTH_0_5	0
+#define AA_LINE_REGION_WIDTH_1_0	(1<<6)
+#define AA_LINE_REGION_WIDTH_2_0	(2<<6)
+#define AA_LINE_REGION_WIDTH_4_0	(3<<6)
+
+/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
+#define _3DSTATE_BACKFACE_STENCIL_OPS    (CMD_3D | (0x8<<24))
+#define BFO_ENABLE_STENCIL_REF          (1<<23)
+#define BFO_STENCIL_REF_SHIFT           15
+#define BFO_STENCIL_REF_MASK            (0xff<<15)
+#define BFO_ENABLE_STENCIL_FUNCS        (1<<14)
+#define BFO_STENCIL_TEST_SHIFT          11
+#define BFO_STENCIL_TEST_MASK           (0x7<<11)
+#define BFO_STENCIL_FAIL_SHIFT          8
+#define BFO_STENCIL_FAIL_MASK           (0x7<<8)
+#define BFO_STENCIL_PASS_Z_FAIL_SHIFT   5
+#define BFO_STENCIL_PASS_Z_FAIL_MASK    (0x7<<5)
+#define BFO_STENCIL_PASS_Z_PASS_SHIFT   2
+#define BFO_STENCIL_PASS_Z_PASS_MASK    (0x7<<2)
+#define BFO_ENABLE_STENCIL_TWO_SIDE     (1<<1)
+#define BFO_STENCIL_TWO_SIDE            (1<<0)
+
+
+/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
+#define _3DSTATE_BACKFACE_STENCIL_MASKS    (CMD_3D | (0x9<<24))
+#define BFM_ENABLE_STENCIL_TEST_MASK      (1<<17)
+#define BFM_ENABLE_STENCIL_WRITE_MASK     (1<<16)
+#define BFM_STENCIL_TEST_MASK_SHIFT       8
+#define BFM_STENCIL_TEST_MASK_MASK        (0xff<<8)
+#define BFM_STENCIL_TEST_MASK(x)	  (((x)&0xff) << 8)
+#define BFM_STENCIL_WRITE_MASK_SHIFT      0
+#define BFM_STENCIL_WRITE_MASK_MASK       (0xff<<0)
+#define BFM_STENCIL_WRITE_MASK(x)	  ((x)&0xff)
+
+
+
+/* 3DSTATE_BIN_CONTROL p141 */
+
+/* 3DSTATE_CHROMA_KEY */
+
+/* 3DSTATE_CLEAR_PARAMETERS, p150 */
+/* 
+ * Sets the color, depth and stencil clear values used by the
+ * CLEAR_RECT and ZONE_INIT primitive types, respectively.  These
+ * primitives set override most 3d state and only take a minimal x/y
+ * vertex.  The color/z/stencil information is supplied here and
+ * therefore cannot vary per vertex.
+ */
+#define _3DSTATE_CLEAR_PARAMETERS	(CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
+/* Dword 1 */
+#define CLEARPARAM_CLEAR_RECT		(1 << 16)
+#define CLEARPARAM_ZONE_INIT		(0 << 16)
+#define CLEARPARAM_WRITE_COLOR		(1 << 2)
+#define CLEARPARAM_WRITE_DEPTH		(1 << 1)
+#define CLEARPARAM_WRITE_STENCIL	(1 << 0)
+
+/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
+#define _3DSTATE_CONST_BLEND_COLOR_CMD	(CMD_3D | (0x1d<<24) | (0x88<<16))
+
+
+
+/* 3DSTATE_COORD_SET_BINDINGS, p154 */
+#define _3DSTATE_COORD_SET_BINDINGS      (CMD_3D | (0x16<<24))
+#define CSB_TCB(iunit, eunit)           ((eunit)<<(iunit*3))
+
+/* p156 */
+#define _3DSTATE_DFLT_DIFFUSE_CMD	(CMD_3D | (0x1d<<24) | (0x99<<16))
+
+/* p157 */
+#define _3DSTATE_DFLT_SPEC_CMD		(CMD_3D | (0x1d<<24) | (0x9a<<16))
+
+/* p158 */
+#define _3DSTATE_DFLT_Z_CMD		(CMD_3D | (0x1d<<24) | (0x98<<16))
+
+
+/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
+#define _3DSTATE_DEPTH_OFFSET_SCALE       (CMD_3D | (0x1d<<24) | (0x97<<16))
+/* scale in dword 1 */
+
+
+/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE    (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2)
+
+/* p161 */
+#define _3DSTATE_DST_BUF_VARS_CMD	(CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define CLASSIC_EARLY_DEPTH		(1<<31)
+#define TEX_DEFAULT_COLOR_OGL           (0<<30)
+#define TEX_DEFAULT_COLOR_D3D           (1<<30)
+#define ZR_EARLY_DEPTH                  (1<<29)
+#define LOD_PRECLAMP_OGL                (1<<28)
+#define LOD_PRECLAMP_D3D                (0<<28)
+#define DITHER_FULL_ALWAYS              (0<<26)
+#define DITHER_FULL_ON_FB_BLEND         (1<<26)
+#define DITHER_CLAMPED_ALWAYS           (2<<26)
+#define LINEAR_GAMMA_BLEND_32BPP        (1<<25)
+#define DEBUG_DISABLE_ENH_DITHER        (1<<24)
+#define DSTORG_HORT_BIAS(x)		((x)<<20)
+#define DSTORG_VERT_BIAS(x)		((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL	0
+#define COLOR_4_2_2_CHNL_WRT_Y		(1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR		(2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB		(3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB	(4<<12)
+#define COLR_BUF_8BIT			0
+#define COLR_BUF_RGB555 		(1<<8)
+#define COLR_BUF_RGB565 		(2<<8)
+#define COLR_BUF_ARGB8888		(3<<8)
+#define DEPTH_FRMT_16_FIXED		0
+#define DEPTH_FRMT_16_FLOAT		(1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER	(2<<2)
+#define VERT_LINE_STRIDE_1		(1<<1)
+#define VERT_LINE_STRIDE_0		(0<<1)
+#define VERT_LINE_STRIDE_OFS_1		1
+#define VERT_LINE_STRIDE_OFS_0		0
+
+/* p166 */
+#define _3DSTATE_DRAW_RECT_CMD		(CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS 	(1<<30)
+#define DRAW_DITHER_OFS_X(x)		((x)<<26)
+#define DRAW_DITHER_OFS_Y(x)		((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x)			((x)<<16)
+#define DRAW_XMIN(x)			(x)
+/* Dword 3 */
+#define DRAW_YMAX(x)			((x)<<16)
+#define DRAW_XMAX(x)			(x)
+/* Dword 4 */
+#define DRAW_YORG(x)			((x)<<16)
+#define DRAW_XORG(x)			(x)
+
+
+/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
+
+/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
+
+
+/* _3DSTATE_FOG_COLOR, p173 */
+#define _3DSTATE_FOG_COLOR_CMD		(CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x)		((x)<<16)
+#define FOG_COLOR_GREEN(x)		((x)<<8)
+#define FOG_COLOR_BLUE(x)		(x)
+
+/* _3DSTATE_FOG_MODE, p174 */
+#define _3DSTATE_FOG_MODE_CMD		(CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FMC1_FOGFUNC_MODIFY_ENABLE	(1<<31)
+#define FMC1_FOGFUNC_VERTEX		(0<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP		(1<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP2		(2<<28)
+#define FMC1_FOGFUNC_PIXEL_LINEAR	(3<<28)
+#define FMC1_FOGFUNC_MASK		(3<<28)
+#define FMC1_FOGINDEX_MODIFY_ENABLE     (1<<27)
+#define FMC1_FOGINDEX_Z		        (0<<25)
+#define FMC1_FOGINDEX_W   		(1<<25)
+#define FMC1_C1_C2_MODIFY_ENABLE	(1<<24)
+#define FMC1_DENSITY_MODIFY_ENABLE	(1<<23)
+#define FMC1_C1_ONE      	        (1<<13)
+#define FMC1_C1_MASK		        (0xffff<<4)
+/* Dword 2 */
+#define FMC2_C2_ONE		        (1<<16)
+/* Dword 3 */
+#define FMC3_D_ONE      		(1<<16)
+
+
+
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
+#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD	(CMD_3D|(0x0b<<24))
+#define IAB_MODIFY_ENABLE	        (1<<23)
+#define IAB_ENABLE       	        (1<<22)
+#define IAB_MODIFY_FUNC         	(1<<21)
+#define IAB_FUNC_SHIFT          	16
+#define IAB_MODIFY_SRC_FACTOR   	(1<<11)
+#define IAB_SRC_FACTOR_SHIFT		6
+#define IAB_SRC_FACTOR_MASK		(BLENDFACT_MASK<<6)
+#define IAB_MODIFY_DST_FACTOR	        (1<<5)
+#define IAB_DST_FACTOR_SHIFT		0
+#define IAB_DST_FACTOR_MASK		(BLENDFACT_MASK<<0)
+
+
+#define BLENDFUNC_ADD			0x0
+#define BLENDFUNC_SUBTRACT		0x1
+#define BLENDFUNC_REVERSE_SUBTRACT	0x2
+#define BLENDFUNC_MIN			0x3
+#define BLENDFUNC_MAX			0x4
+#define BLENDFUNC_MASK			0x7
+
+/* 3DSTATE_LOAD_INDIRECT, p180 */
+
+#define _3DSTATE_LOAD_INDIRECT	        (CMD_3D|(0x1d<<24)|(0x7<<16))
+#define LI0_STATE_STATIC_INDIRECT       (0x01<<8)
+#define LI0_STATE_DYNAMIC_INDIRECT      (0x02<<8)
+#define LI0_STATE_SAMPLER               (0x04<<8)
+#define LI0_STATE_MAP                   (0x08<<8)
+#define LI0_STATE_PROGRAM               (0x10<<8)
+#define LI0_STATE_CONSTANTS             (0x20<<8)
+
+#define SIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SIS0_FORCE_LOAD                 (1<<1)
+#define SIS0_BUFFER_VALID               (1<<0)
+#define SIS1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define DIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define DIS0_BUFFER_RESET               (1<<1)
+#define DIS0_BUFFER_VALID               (1<<0)
+
+#define SSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SSB0_FORCE_LOAD                 (1<<1)
+#define SSB0_BUFFER_VALID               (1<<0)
+#define SSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define MSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define MSB0_FORCE_LOAD                 (1<<1)
+#define MSB0_BUFFER_VALID               (1<<0)
+#define MSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define PSP0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSP0_FORCE_LOAD                 (1<<1)
+#define PSP0_BUFFER_VALID               (1<<0)
+#define PSP1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+#define PSC0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSC0_FORCE_LOAD                 (1<<1)
+#define PSC0_BUFFER_VALID               (1<<0)
+#define PSC1_BUFFER_LENGTH(x)           ((x)&0xff)
+
+
+
+
+
+/* _3DSTATE_RASTERIZATION_RULES */
+#define _3DSTATE_RASTER_RULES_CMD	(CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE	(1<<15)
+#define OGL_POINT_RASTER_RULE		(1<<13)
+#define ENABLE_TEXKILL_3D_4D            (1<<10)
+#define TEXKILL_3D                      (0<<9)
+#define TEXKILL_4D                      (1<<9)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX	(1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX	(1<<5)
+#define LINE_STRIP_PROVOKE_VRTX_MASK	(3 << 6)
+#define LINE_STRIP_PROVOKE_VRTX(x)	((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX_MASK	(3 << 3)
+#define TRI_FAN_PROVOKE_VRTX(x) 	((x)<<3)
+
+/* _3DSTATE_SCISSOR_ENABLE, p256 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD	(CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT		((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT		(1<<1)
+
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD	(CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x)		(x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x)		((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x)		(x)
+
+/* Helper macros for blend factors
+ */
+#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
+#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT)
+#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT)
+#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT)
+
+
+
+
+/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
+
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+#define _3DSTATE_MAP_PALETTE_LOAD_32    (CMD_3D|(0x1d<<24)|(0x8f<<16))
+/* subsequent dwords up to length (max 16) are ARGB8888 color values */
+
+/* _3DSTATE_MODES_4, p218 */
+#define _3DSTATE_MODES_4_CMD		(CMD_3D|(0x0d<<24))
+#define ENABLE_LOGIC_OP_FUNC		(1<<23)
+#define LOGIC_OP_FUNC(x)		((x)<<18)
+#define LOGICOP_MASK			(0xf<<18)
+#define MODE4_ENABLE_STENCIL_TEST_MASK	((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK	(1<<17)
+#define STENCIL_TEST_MASK(x)		(((x)&0xff)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK	((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK	(1<<16)
+#define STENCIL_WRITE_MASK(x)		((x)&0xff)
+
+/* _3DSTATE_MODES_5, p220 */
+#define _3DSTATE_MODES_5_CMD		(CMD_3D|(0x0c<<24))
+#define PIPELINE_FLUSH_RENDER_CACHE	(1<<18)
+#define PIPELINE_FLUSH_TEXTURE_CACHE	(1<<16)
+
+
+/* p221 */
+#define _3DSTATE_PIXEL_SHADER_CONSTANTS  (CMD_3D|(0x1d<<24)|(0x6<<16))
+#define PS1_REG(n)                      (1<<(n))
+#define PS2_CONST_X(n)                  (n)
+#define PS3_CONST_Y(n)                  (n)
+#define PS4_CONST_Z(n)                  (n)
+#define PS5_CONST_W(n)                  (n)
+
+/* p222 */
+
+
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN     32
+#define I915_MAX_ALU_INSN     64
+#define I915_MAX_DECL_INSN    27
+#define I915_MAX_TEMPORARY    16
+
+
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space.  Maximum of 123 instructions.  Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM    (CMD_3D|(0x1d<<24)|(0x5<<16))
+
+#define REG_TYPE_R                 0    /* temporary regs, no need to
+                                         * dcl, must be written before
+                                         * read -- Preserved between
+                                         * phases. 
+                                         */
+#define REG_TYPE_T                 1    /* Interpolated values, must be
+                                         * dcl'ed before use.
+                                         *
+                                         * 0..7: texture coord,
+                                         * 8: diffuse spec,
+                                         * 9: specular color,
+                                         * 10: fog parameter in w.
+                                         */
+#define REG_TYPE_CONST             2    /* Restriction: only one const
+                                         * can be referenced per
+                                         * instruction, though it may be
+                                         * selected for multiple inputs.
+                                         * Constants not initialized
+                                         * default to zero.
+                                         */
+#define REG_TYPE_S                 3    /* sampler */
+#define REG_TYPE_OC                4    /* output color (rgba) */
+#define REG_TYPE_OD                5    /* output depth (w), xyz are
+                                         * temporaries.  If not written,
+                                         * interpolated depth is used?
+                                         */
+#define REG_TYPE_U                 6    /* unpreserved temporaries */
+#define REG_TYPE_MASK              0x7
+#define REG_NR_MASK                0xf
+
+
+/* REG_TYPE_T:
+ */
+#define T_TEX0     0
+#define T_TEX1     1
+#define T_TEX2     2
+#define T_TEX3     3
+#define T_TEX4     4
+#define T_TEX5     5
+#define T_TEX6     6
+#define T_TEX7     7
+#define T_DIFFUSE  8
+#define T_SPECULAR 9
+#define T_FOG_W    10           /* interpolated fog is in W coord */
+
+/* Arithmetic instructions */
+
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww 
+ */
+#define A0_NOP    (0x0<<24)     /* no operation */
+#define A0_ADD    (0x1<<24)     /* dst = src0 + src1 */
+#define A0_MOV    (0x2<<24)     /* dst = src0 */
+#define A0_MUL    (0x3<<24)     /* dst = src0 * src1 */
+#define A0_MAD    (0x4<<24)     /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24)     /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3    (0x6<<24)     /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4    (0x7<<24)     /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC    (0x8<<24)     /* dst = src0 - floor(src0) */
+#define A0_RCP    (0x9<<24)     /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ    (0xa<<24)     /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP    (0xb<<24)     /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG    (0xc<<24)     /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP    (0xd<<24)     /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN    (0xe<<24)     /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX    (0xf<<24)     /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR    (0x10<<24)    /* dst = floor(src0) */
+#define A0_MOD    (0x11<<24)    /* dst = src0 fmod 1.0 */
+#define A0_TRC    (0x12<<24)    /* dst = int(src0) */
+#define A0_SGE    (0x13<<24)    /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT    (0x14<<24)    /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE                 (1<<22)
+#define A0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X                (1<<10)
+#define A0_DEST_CHANNEL_Y                (2<<10)
+#define A0_DEST_CHANNEL_Z                (4<<10)
+#define A0_DEST_CHANNEL_W                (8<<10)
+#define A0_DEST_CHANNEL_ALL              (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT            10
+#define A0_SRC0_TYPE_SHIFT               7
+#define A0_SRC0_NR_SHIFT                 2
+
+#define A0_DEST_CHANNEL_XY              (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ             (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+
+
+#define SRC_X        0
+#define SRC_Y        1
+#define SRC_Z        2
+#define SRC_W        3
+#define SRC_ZERO     4
+#define SRC_ONE      5
+
+#define A1_SRC0_CHANNEL_X_NEGATE         (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT          28
+#define A1_SRC0_CHANNEL_Y_NEGATE         (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT          24
+#define A1_SRC0_CHANNEL_Z_NEGATE         (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT          20
+#define A1_SRC0_CHANNEL_W_NEGATE         (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT          16
+#define A1_SRC1_TYPE_SHIFT               13
+#define A1_SRC1_NR_SHIFT                 8
+#define A1_SRC1_CHANNEL_X_NEGATE         (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT          4
+#define A1_SRC1_CHANNEL_Y_NEGATE         (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT          0
+
+#define A2_SRC1_CHANNEL_Z_NEGATE         (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT          28
+#define A2_SRC1_CHANNEL_W_NEGATE         (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT          24
+#define A2_SRC2_TYPE_SHIFT               21
+#define A2_SRC2_NR_SHIFT                 16
+#define A2_SRC2_CHANNEL_X_NEGATE         (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT          12
+#define A2_SRC2_CHANNEL_Y_NEGATE         (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT          8
+#define A2_SRC2_CHANNEL_Z_NEGATE         (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT          4
+#define A2_SRC2_CHANNEL_W_NEGATE         (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT          0
+
+
+
+/* Texture instructions */
+#define T0_TEXLD     (0x15<<24) /* Sample texture using predeclared
+                                 * sampler and address, and output
+                                 * filtered texel data to destination
+                                 * register */
+#define T0_TEXLDP    (0x16<<24) /* Same as texld but performs a
+                                 * perspective divide of the texture
+                                 * coordinate .xyz values by .w before
+                                 * sampling. */
+#define T0_TEXLDB    (0x17<<24) /* Same as texld but biases the
+                                 * computed LOD by w.  Only S4.6 two's
+                                 * comp is used.  This implies that a
+                                 * float to fixed conversion is
+                                 * done. */
+#define T0_TEXKILL   (0x18<<24) /* Does not perform a sampling
+                                 * operation.  Simply kills the pixel
+                                 * if any channel of the address
+                                 * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback) 
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction). 
+ */
+#define T0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT              0      /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK               (0xf<<0)
+
+#define T1_ADDRESS_REG_TYPE_SHIFT        24     /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT          17
+#define T2_MBZ                           0
+
+/* Declaration instructions */
+#define D0_DCL       (0x19<<24) /* Declare a t (interpolated attrib)
+                                 * register or an s (sampler)
+                                 * register. */
+#define D0_SAMPLE_TYPE_SHIFT              22
+#define D0_SAMPLE_TYPE_2D                 (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE               (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME             (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK               (0x3<<22)
+
+#define D0_TYPE_SHIFT                19
+/* Allow: T, S */
+#define D0_NR_SHIFT                  14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X                (1<<10)
+#define D0_CHANNEL_Y                (2<<10)
+#define D0_CHANNEL_Z                (4<<10)
+#define D0_CHANNEL_W                (8<<10)
+#define D0_CHANNEL_ALL              (0xf<<10)
+#define D0_CHANNEL_NONE             (0<<10)
+
+#define D0_CHANNEL_XY               (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ              (D0_CHANNEL_XY|D0_CHANNEL_Z)
+
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations. 
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) 
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ                          0
+#define D2_MBZ                          0
+
+
+
+/* p207 */
+#define _3DSTATE_MAP_STATE               (CMD_3D|(0x1d<<24)|(0x0<<16))
+
+#define MS1_MAPMASK_SHIFT               0
+#define MS1_MAPMASK_MASK                (0x8fff<<0)
+
+#define MS2_UNTRUSTED_SURFACE           (1<<31)
+#define MS2_ADDRESS_MASK                0xfffffffc
+#define MS2_VERTICAL_LINE_STRIDE        (1<<1)
+#define MS2_VERTICAL_OFFSET             (1<<1)
+
+#define MS3_HEIGHT_SHIFT              21
+#define MS3_WIDTH_SHIFT               10
+#define MS3_PALETTE_SELECT            (1<<9)
+#define MS3_MAPSURF_FORMAT_SHIFT      7
+#define MS3_MAPSURF_FORMAT_MASK       (0x7<<7)
+#define    MAPSURF_8BIT		 	   (1<<7)
+#define    MAPSURF_16BIT		   (2<<7)
+#define    MAPSURF_32BIT		   (3<<7)
+#define    MAPSURF_422			   (5<<7)
+#define    MAPSURF_COMPRESSED		   (6<<7)
+#define    MAPSURF_4BIT_INDEXED		   (7<<7)
+#define MS3_MT_FORMAT_MASK         (0x7 << 3)
+#define MS3_MT_FORMAT_SHIFT        3
+#define    MT_4BIT_IDX_ARGB8888	           (7<<3)       /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_I8		           (0<<3)       /* SURFACE_8BIT */
+#define    MT_8BIT_L8		           (1<<3)
+#define    MT_8BIT_A8		           (4<<3)
+#define    MT_8BIT_MONO8	           (5<<3)
+#define    MT_16BIT_RGB565 		   (0<<3)       /* SURFACE_16BIT */
+#define    MT_16BIT_ARGB1555		   (1<<3)
+#define    MT_16BIT_ARGB4444		   (2<<3)
+#define    MT_16BIT_AY88		   (3<<3)
+#define    MT_16BIT_88DVDU	           (5<<3)
+#define    MT_16BIT_BUMP_655LDVDU	   (6<<3)
+#define    MT_16BIT_I16	                   (7<<3)
+#define    MT_16BIT_L16	                   (8<<3)
+#define    MT_16BIT_A16	                   (9<<3)
+#define    MT_32BIT_ARGB8888		   (0<<3)       /* SURFACE_32BIT */
+#define    MT_32BIT_ABGR8888		   (1<<3)
+#define    MT_32BIT_XRGB8888		   (2<<3)
+#define    MT_32BIT_XBGR8888		   (3<<3)
+#define    MT_32BIT_QWVU8888		   (4<<3)
+#define    MT_32BIT_AXVU8888		   (5<<3)
+#define    MT_32BIT_LXVU8888	           (6<<3)
+#define    MT_32BIT_XLVU8888	           (7<<3)
+#define    MT_32BIT_ARGB2101010	           (8<<3)
+#define    MT_32BIT_ABGR2101010	           (9<<3)
+#define    MT_32BIT_AWVU2101010	           (0xA<<3)
+#define    MT_32BIT_GR1616	           (0xB<<3)
+#define    MT_32BIT_VU1616	           (0xC<<3)
+#define    MT_32BIT_x8I24	           (0xD<<3)
+#define    MT_32BIT_x8L24	           (0xE<<3)
+#define    MT_32BIT_x8A24	           (0xF<<3)
+#define    MT_422_YCRCB_SWAPY	           (0<<3)       /* SURFACE_422 */
+#define    MT_422_YCRCB_NORMAL	           (1<<3)
+#define    MT_422_YCRCB_SWAPUV	           (2<<3)
+#define    MT_422_YCRCB_SWAPUVY	           (3<<3)
+#define    MT_COMPRESS_DXT1		   (0<<3)       /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT2_3	           (1<<3)
+#define    MT_COMPRESS_DXT4_5	           (2<<3)
+#define    MT_COMPRESS_FXT1		   (3<<3)
+#define    MT_COMPRESS_DXT1_RGB		   (4<<3)
+#define MS3_USE_FENCE_REGS              (1<<2)
+#define MS3_TILED_SURFACE             (1<<1)
+#define MS3_TILE_WALK                 (1<<0)
+
+#define MS4_PITCH_SHIFT                 21
+#define MS4_CUBE_FACE_ENA_NEGX          (1<<20)
+#define MS4_CUBE_FACE_ENA_POSX          (1<<19)
+#define MS4_CUBE_FACE_ENA_NEGY          (1<<18)
+#define MS4_CUBE_FACE_ENA_POSY          (1<<17)
+#define MS4_CUBE_FACE_ENA_NEGZ          (1<<16)
+#define MS4_CUBE_FACE_ENA_POSZ          (1<<15)
+#define MS4_CUBE_FACE_ENA_MASK          (0x3f<<15)
+#define MS4_MAX_LOD_SHIFT		9
+#define MS4_MAX_LOD_MASK		(0x3f<<9)
+#define MS4_MIP_LAYOUT_LEGACY           (0<<8)
+#define MS4_MIP_LAYOUT_BELOW_LPT        (0<<8)
+#define MS4_MIP_LAYOUT_RIGHT_LPT        (1<<8)
+#define MS4_VOLUME_DEPTH_SHIFT          0
+#define MS4_VOLUME_DEPTH_MASK           (0xff<<0)
+
+/* p244 */
+#define _3DSTATE_SAMPLER_STATE         (CMD_3D|(0x1d<<24)|(0x1<<16))
+
+#define SS1_MAPMASK_SHIFT               0
+#define SS1_MAPMASK_MASK                (0x8fff<<0)
+
+#define SS2_REVERSE_GAMMA_ENABLE        (1<<31)
+#define SS2_PACKED_TO_PLANAR_ENABLE     (1<<30)
+#define SS2_COLORSPACE_CONVERSION       (1<<29)
+#define SS2_CHROMAKEY_SHIFT             27
+#define SS2_BASE_MIP_LEVEL_SHIFT        22
+#define SS2_BASE_MIP_LEVEL_MASK         (0x1f<<22)
+#define SS2_MIP_FILTER_SHIFT            20
+#define SS2_MIP_FILTER_MASK             (0x3<<20)
+#define   MIPFILTER_NONE       	0
+#define   MIPFILTER_NEAREST	1
+#define   MIPFILTER_LINEAR	3
+#define SS2_MAG_FILTER_SHIFT          17
+#define SS2_MAG_FILTER_MASK           (0x7<<17)
+#define   FILTER_NEAREST	0
+#define   FILTER_LINEAR		1
+#define   FILTER_ANISOTROPIC	2
+#define   FILTER_4X4_1    	3
+#define   FILTER_4X4_2    	4
+#define   FILTER_4X4_FLAT 	5
+#define   FILTER_6X5_MONO   	6       /* XXX - check */
+#define SS2_MIN_FILTER_SHIFT          14
+#define SS2_MIN_FILTER_MASK           (0x7<<14)
+#define SS2_LOD_BIAS_SHIFT            5
+#define SS2_LOD_BIAS_ONE              (0x10<<5)
+#define SS2_LOD_BIAS_MASK             (0x1ff<<5)
+/* Shadow requires:
+ *  MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
+ *  FILTER_4X4_x  MIN and MAG filters
+ */
+#define SS2_SHADOW_ENABLE             (1<<4)
+#define SS2_MAX_ANISO_MASK            (1<<3)
+#define SS2_MAX_ANISO_2               (0<<3)
+#define SS2_MAX_ANISO_4               (1<<3)
+#define SS2_SHADOW_FUNC_SHIFT         0
+#define SS2_SHADOW_FUNC_MASK          (0x7<<0)
+/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
+
+#define SS3_MIN_LOD_SHIFT            24
+#define SS3_MIN_LOD_ONE              (0x10<<24)
+#define SS3_MIN_LOD_MASK             (0xff<<24)
+#define SS3_KILL_PIXEL_ENABLE        (1<<17)
+#define SS3_TCX_ADDR_MODE_SHIFT      12
+#define SS3_TCX_ADDR_MODE_MASK       (0x7<<12)
+#define   TEXCOORDMODE_WRAP		0
+#define   TEXCOORDMODE_MIRROR		1
+#define   TEXCOORDMODE_CLAMP_EDGE	2
+#define   TEXCOORDMODE_CUBE       	3
+#define   TEXCOORDMODE_CLAMP_BORDER	4
+#define   TEXCOORDMODE_MIRROR_ONCE      5
+#define SS3_TCY_ADDR_MODE_SHIFT      9
+#define SS3_TCY_ADDR_MODE_MASK       (0x7<<9)
+#define SS3_TCZ_ADDR_MODE_SHIFT      6
+#define SS3_TCZ_ADDR_MODE_MASK       (0x7<<6)
+#define SS3_NORMALIZED_COORDS        (1<<5)
+#define SS3_TEXTUREMAP_INDEX_SHIFT   1
+#define SS3_TEXTUREMAP_INDEX_MASK    (0xf<<1)
+#define SS3_DEINTERLACER_ENABLE      (1<<0)
+
+#define SS4_BORDER_COLOR_MASK        (~0)
+
+/* 3DSTATE_SPAN_STIPPLE, p258
+ */
+#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+
+#define _3DSTATE_DEFAULT_Z          ((0x3<<29)|(0x1d<<24)|(0x98<<16))
+#define _3DSTATE_DEFAULT_DIFFUSE    ((0x3<<29)|(0x1d<<24)|(0x99<<16))
+#define _3DSTATE_DEFAULT_SPECULAR   ((0x3<<29)|(0x1d<<24)|(0x9a<<16))
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c
new file mode 100644
index 0000000000..26d387f383
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_state.c
@@ -0,0 +1,1158 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/dd.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "texmem.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "intel_fbo.h"
+#include "intel_screen.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+#define FILE_DEBUG_FLAG DEBUG_STATE
+
+void
+i915_update_stencil(GLcontext * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint front_ref, front_writemask, front_mask;
+   GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass;
+   GLuint back_ref, back_writemask, back_mask;
+   GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass;
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   /* The 915 considers CW to be "front" for two-sided stencil, so choose
+    * appropriately.
+    */
+   /* _NEW_POLYGON | _NEW_STENCIL */
+   if (ctx->Polygon.FrontFace == GL_CW) {
+      front_ref = ctx->Stencil.Ref[0];
+      front_mask = ctx->Stencil.ValueMask[0];
+      front_writemask = ctx->Stencil.WriteMask[0];
+      front_func = ctx->Stencil.Function[0];
+      front_fail = ctx->Stencil.FailFunc[0];
+      front_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+      front_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+      back_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+      back_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+      back_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+      back_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+      back_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+      back_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+      back_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+   } else {
+      front_ref = ctx->Stencil.Ref[ctx->Stencil._BackFace];
+      front_mask = ctx->Stencil.ValueMask[ctx->Stencil._BackFace];
+      front_writemask = ctx->Stencil.WriteMask[ctx->Stencil._BackFace];
+      front_func = ctx->Stencil.Function[ctx->Stencil._BackFace];
+      front_fail = ctx->Stencil.FailFunc[ctx->Stencil._BackFace];
+      front_pass_z_fail = ctx->Stencil.ZFailFunc[ctx->Stencil._BackFace];
+      front_pass_z_pass = ctx->Stencil.ZPassFunc[ctx->Stencil._BackFace];
+      back_ref = ctx->Stencil.Ref[0];
+      back_mask = ctx->Stencil.ValueMask[0];
+      back_writemask = ctx->Stencil.WriteMask[0];
+      back_func = ctx->Stencil.Function[0];
+      back_fail = ctx->Stencil.FailFunc[0];
+      back_pass_z_fail = ctx->Stencil.ZFailFunc[0];
+      back_pass_z_pass = ctx->Stencil.ZPassFunc[0];
+   }
+
+   /* Set front state. */
+   i915->state.Ctx[I915_CTXREG_STATE4] &= ~(MODE4_ENABLE_STENCIL_TEST_MASK |
+					    MODE4_ENABLE_STENCIL_WRITE_MASK);
+   i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK |
+					   ENABLE_STENCIL_WRITE_MASK |
+					   STENCIL_TEST_MASK(front_mask) |
+					   STENCIL_WRITE_MASK(front_writemask));
+
+   i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK |
+					  S5_STENCIL_TEST_FUNC_MASK |
+					  S5_STENCIL_FAIL_MASK |
+					  S5_STENCIL_PASS_Z_FAIL_MASK |
+					  S5_STENCIL_PASS_Z_PASS_MASK);
+
+   i915->state.Ctx[I915_CTXREG_LIS5] |=
+      (front_ref << S5_STENCIL_REF_SHIFT) |
+      (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) |
+      (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) |
+      (intel_translate_stencil_op(front_pass_z_fail) <<
+       S5_STENCIL_PASS_Z_FAIL_SHIFT) |
+      (intel_translate_stencil_op(front_pass_z_pass) <<
+       S5_STENCIL_PASS_Z_PASS_SHIFT);
+
+   /* Set back state if different from front. */
+   if (ctx->Stencil._TestTwoSide) {
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &=
+	 ~(BFO_STENCIL_REF_MASK |
+	   BFO_STENCIL_TEST_MASK |
+	   BFO_STENCIL_FAIL_MASK |
+	   BFO_STENCIL_PASS_Z_FAIL_MASK |
+	   BFO_STENCIL_PASS_Z_PASS_MASK);
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] |= BFO_STENCIL_TWO_SIDE |
+	 (back_ref << BFO_STENCIL_REF_SHIFT) |
+	 (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) |
+	 (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) |
+	 (intel_translate_stencil_op(back_pass_z_fail) <<
+	  BFO_STENCIL_PASS_Z_FAIL_SHIFT) |
+	 (intel_translate_stencil_op(back_pass_z_pass) <<
+	  BFO_STENCIL_PASS_Z_PASS_SHIFT);
+
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] &=
+	 ~(BFM_STENCIL_TEST_MASK_MASK |
+	   BFM_STENCIL_WRITE_MASK_MASK);
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] |=
+	 BFM_STENCIL_TEST_MASK(back_mask) |
+	 BFM_STENCIL_WRITE_MASK(back_writemask);
+   } else {
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= ~BFO_STENCIL_TWO_SIDE;
+   }
+}
+
+static void
+i915StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref,
+                        GLuint mask)
+{
+}
+
+static void
+i915StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
+{
+}
+
+static void
+i915StencilOpSeparate(GLcontext * ctx, GLenum face, GLenum fail, GLenum zfail,
+                      GLenum zpass)
+{
+}
+
+static void
+i915AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
+   GLubyte refByte;
+
+   UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_ALPHA_TEST_FUNC_MASK |
+                                          S6_ALPHA_REF_MASK);
+   i915->state.Ctx[I915_CTXREG_LIS6] |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) |
+                                         (((GLuint) refByte) <<
+                                          S6_ALPHA_REF_SHIFT));
+}
+
+/* This function makes sure that the proper enables are
+ * set for LogicOp, Independant Alpha Blend, and Blending.
+ * It needs to be called from numerous places where we
+ * could change the LogicOp or Independant Alpha Blend without subsequent
+ * calls to glEnable.
+ */
+static void
+i915EvalLogicOpBlendState(GLcontext * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   if (RGBA_LOGICOP_ENABLED(ctx)) {
+      i915->state.Ctx[I915_CTXREG_LIS5] |= S5_LOGICOP_ENABLE;
+      i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE;
+   }
+   else {
+      i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_LOGICOP_ENABLE;
+
+      if (ctx->Color.BlendEnabled) {
+         i915->state.Ctx[I915_CTXREG_LIS6] |= S6_CBUF_BLEND_ENABLE;
+      }
+      else {
+         i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE;
+      }
+   }
+}
+
+static void
+i915BlendColor(GLcontext * ctx, const GLfloat color[4])
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLubyte r, g, b, a;
+
+   DBG("%s\n", __FUNCTION__);
+   
+   UNCLAMPED_FLOAT_TO_UBYTE(r, color[RCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(g, color[GCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]);
+   UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] =
+      (a << 24) | (r << 16) | (g << 8) | b;
+}
+
+
+#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT)
+#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT)
+#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT)
+#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT)
+
+
+
+static GLuint
+translate_blend_equation(GLenum mode)
+{
+   switch (mode) {
+   case GL_FUNC_ADD:
+      return BLENDFUNC_ADD;
+   case GL_MIN:
+      return BLENDFUNC_MIN;
+   case GL_MAX:
+      return BLENDFUNC_MAX;
+   case GL_FUNC_SUBTRACT:
+      return BLENDFUNC_SUBTRACT;
+   case GL_FUNC_REVERSE_SUBTRACT:
+      return BLENDFUNC_REVERSE_SUBTRACT;
+   default:
+      return 0;
+   }
+}
+
+static void
+i915UpdateBlendState(GLcontext * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint iab = (i915->state.Ctx[I915_CTXREG_IAB] &
+                 ~(IAB_SRC_FACTOR_MASK |
+                   IAB_DST_FACTOR_MASK |
+                   (BLENDFUNC_MASK << IAB_FUNC_SHIFT) | IAB_ENABLE));
+
+   GLuint lis6 = (i915->state.Ctx[I915_CTXREG_LIS6] &
+                  ~(S6_CBUF_SRC_BLEND_FACT_MASK |
+                    S6_CBUF_DST_BLEND_FACT_MASK | S6_CBUF_BLEND_FUNC_MASK));
+
+   GLuint eqRGB = ctx->Color.BlendEquationRGB;
+   GLuint eqA = ctx->Color.BlendEquationA;
+   GLuint srcRGB = ctx->Color.BlendSrcRGB;
+   GLuint dstRGB = ctx->Color.BlendDstRGB;
+   GLuint srcA = ctx->Color.BlendSrcA;
+   GLuint dstA = ctx->Color.BlendDstA;
+
+   if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+      srcRGB = dstRGB = GL_ONE;
+   }
+
+   if (eqA == GL_MIN || eqA == GL_MAX) {
+      srcA = dstA = GL_ONE;
+   }
+
+   lis6 |= SRC_BLND_FACT(intel_translate_blend_factor(srcRGB));
+   lis6 |= DST_BLND_FACT(intel_translate_blend_factor(dstRGB));
+   lis6 |= translate_blend_equation(eqRGB) << S6_CBUF_BLEND_FUNC_SHIFT;
+
+   iab |= SRC_ABLND_FACT(intel_translate_blend_factor(srcA));
+   iab |= DST_ABLND_FACT(intel_translate_blend_factor(dstA));
+   iab |= translate_blend_equation(eqA) << IAB_FUNC_SHIFT;
+
+   if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
+      iab |= IAB_ENABLE;
+
+   if (iab != i915->state.Ctx[I915_CTXREG_IAB] ||
+       lis6 != i915->state.Ctx[I915_CTXREG_LIS6]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_IAB] = iab;
+      i915->state.Ctx[I915_CTXREG_LIS6] = lis6;
+   }
+
+   /* This will catch a logicop blend equation */
+   i915EvalLogicOpBlendState(ctx);
+}
+
+
+static void
+i915BlendFuncSeparate(GLcontext * ctx, GLenum srcRGB,
+                      GLenum dstRGB, GLenum srcA, GLenum dstA)
+{
+   i915UpdateBlendState(ctx);
+}
+
+
+static void
+i915BlendEquationSeparate(GLcontext * ctx, GLenum eqRGB, GLenum eqA)
+{
+   i915UpdateBlendState(ctx);
+}
+
+
+static void
+i915DepthFunc(GLcontext * ctx, GLenum func)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int test = intel_translate_compare_func(func);
+
+   DBG("%s\n", __FUNCTION__);
+   
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK;
+   i915->state.Ctx[I915_CTXREG_LIS6] |= test << S6_DEPTH_TEST_FUNC_SHIFT;
+}
+
+static void
+i915DepthMask(GLcontext * ctx, GLboolean flag)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+
+   DBG("%s flag (%d)\n", __FUNCTION__, flag);
+   
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   if (flag && ctx->Depth.Test)
+      i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_WRITE_ENABLE;
+   else
+      i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_WRITE_ENABLE;
+}
+
+
+
+/**
+ * Update the viewport transformation matrix.  Depends on:
+ *  - viewport pos/size
+ *  - depthrange
+ *  - window pos/size or FBO size
+ */
+void
+intelCalcViewport(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat *m = intel->ViewportMatrix.m;
+   GLfloat yScale, yBias;
+
+   if (ctx->DrawBuffer->Name) {
+      /* User created FBO */
+      /* y=0=bottom */
+      yScale = 1.0;
+      yBias = 0.0;
+   }
+   else {
+      /* window buffer, y=0=top */
+      yScale = -1.0;
+      yBias = ctx->DrawBuffer->Height;
+   }
+
+   m[MAT_SX] = v[MAT_SX];
+   m[MAT_TX] = v[MAT_TX];
+
+   m[MAT_SY] = v[MAT_SY] * yScale;
+   m[MAT_TY] = v[MAT_TY] * yScale + yBias;
+
+   m[MAT_SZ] = v[MAT_SZ] * depthScale;
+   m[MAT_TZ] = v[MAT_TZ] * depthScale;
+}
+
+
+/** Called from ctx->Driver.Viewport() */
+static void
+i915Viewport(GLcontext * ctx,
+              GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   intelCalcViewport(ctx);
+}
+
+
+/** Called from ctx->Driver.DepthRange() */
+static void
+i915DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
+{
+   intelCalcViewport(ctx);
+}
+
+
+/* =============================================================
+ * Polygon stipple
+ *
+ * The i915 supports a 4x4 stipple natively, GL wants 32x32.
+ * Fortunately stipple is usually a repeating pattern.
+ */
+static void
+i915PolygonStipple(GLcontext * ctx, const GLubyte * mask)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   const GLubyte *m;
+   GLubyte p[4];
+   int i, j, k;
+   int active = (ctx->Polygon.StippleFlag &&
+                 i915->intel.reduced_primitive == GL_TRIANGLES);
+   GLuint newMask;
+
+   if (active) {
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
+   }
+
+   /* Use the already unpacked stipple data from the context rather than the
+    * uninterpreted mask passed in.
+    */
+   mask = (const GLubyte *)ctx->PolygonStipple;
+   m = mask;
+
+   p[0] = mask[12] & 0xf;
+   p[0] |= p[0] << 4;
+   p[1] = mask[8] & 0xf;
+   p[1] |= p[1] << 4;
+   p[2] = mask[4] & 0xf;
+   p[2] |= p[2] << 4;
+   p[3] = mask[0] & 0xf;
+   p[3] |= p[3] << 4;
+
+   for (k = 0; k < 8; k++)
+      for (j = 3; j >= 0; j--)
+         for (i = 0; i < 4; i++, m++)
+            if (*m != p[j]) {
+               i915->intel.hw_stipple = 0;
+               return;
+            }
+
+   newMask = (((p[0] & 0xf) << 0) |
+              ((p[1] & 0xf) << 4) |
+              ((p[2] & 0xf) << 8) | ((p[3] & 0xf) << 12));
+
+
+   if (newMask == 0xffff || newMask == 0x0) {
+      /* this is needed to make conform pass */
+      i915->intel.hw_stipple = 0;
+      return;
+   }
+
+   i915->state.Stipple[I915_STPREG_ST1] &= ~0xffff;
+   i915->state.Stipple[I915_STPREG_ST1] |= newMask;
+   i915->intel.hw_stipple = 1;
+
+   if (active)
+      i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
+}
+
+
+/* =============================================================
+ * Hardware clipping
+ */
+static void
+i915Scissor(GLcontext * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int x1, y1, x2, y2;
+
+   if (!ctx->DrawBuffer)
+      return;
+
+   DBG("%s %d,%d %dx%d\n", __FUNCTION__, x, y, w, h);
+
+   if (ctx->DrawBuffer->Name == 0) {
+      x1 = x;
+      y1 = ctx->DrawBuffer->Height - (y + h);
+      x2 = x + w - 1;
+      y2 = y1 + h - 1;
+      DBG("%s %d..%d,%d..%d (inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   else {
+      /* FBO - not inverted
+       */
+      x1 = x;
+      y1 = y;
+      x2 = x + w - 1;
+      y2 = y + h - 1;
+      DBG("%s %d..%d,%d..%d (not inverted)\n", __FUNCTION__, x1, x2, y1, y2);
+   }
+   
+   x1 = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
+   y1 = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
+   x2 = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
+   y2 = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+   
+   DBG("%s %d..%d,%d..%d (clamped)\n", __FUNCTION__, x1, x2, y1, y2);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+   i915->state.Buffer[I915_DESTREG_SR1] = (y1 << 16) | (x1 & 0xffff);
+   i915->state.Buffer[I915_DESTREG_SR2] = (y2 << 16) | (x2 & 0xffff);
+}
+
+static void
+i915LogicOp(GLcontext * ctx, GLenum opcode)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int tmp = intel_translate_logic_op(opcode);
+
+   DBG("%s\n", __FUNCTION__);
+   
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_STATE4] &= ~LOGICOP_MASK;
+   i915->state.Ctx[I915_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
+}
+
+
+
+static void
+i915CullFaceFrontFace(GLcontext * ctx, GLenum unused)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint mode;
+
+   DBG("%s %d\n", __FUNCTION__,
+       ctx->DrawBuffer ? ctx->DrawBuffer->Name : 0);
+
+   if (!ctx->Polygon.CullFlag) {
+      mode = S4_CULLMODE_NONE;
+   }
+   else if (ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) {
+      mode = S4_CULLMODE_CW;
+
+      if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0)
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+      if (ctx->Polygon.FrontFace != GL_CCW)
+         mode ^= (S4_CULLMODE_CW ^ S4_CULLMODE_CCW);
+   }
+   else {
+      mode = S4_CULLMODE_BOTH;
+   }
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_CULLMODE_MASK;
+   i915->state.Ctx[I915_CTXREG_LIS4] |= mode;
+}
+
+static void
+i915LineWidth(GLcontext * ctx, GLfloat widthf)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_LINE_WIDTH_MASK;
+   int width;
+
+   DBG("%s\n", __FUNCTION__);
+   
+   width = (int) (widthf * 2);
+   width = CLAMP(width, 1, 0xf);
+   lis4 |= width << S4_LINE_WIDTH_SHIFT;
+
+   if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS4] = lis4;
+   }
+}
+
+static void
+i915PointSize(GLcontext * ctx, GLfloat size)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_POINT_WIDTH_MASK;
+   GLint point_size = (int) round(size);
+
+   DBG("%s\n", __FUNCTION__);
+   
+   point_size = CLAMP(point_size, 1, 255);
+   lis4 |= point_size << S4_POINT_WIDTH_SHIFT;
+
+   if (lis4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS4] = lis4;
+   }
+}
+
+
+static void
+i915PointParameterfv(GLcontext * ctx, GLenum pname, const GLfloat *params)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+
+   switch (pname) {
+   case GL_POINT_SPRITE_COORD_ORIGIN:
+      /* This could be supported, but it would require modifying the fragment
+       * program to invert the y component of the texture coordinate by
+       * inserting a 'SUB tc.y, {1.0}.xxxx, tc' instruction.
+       */
+      FALLBACK(&i915->intel, I915_FALLBACK_POINT_SPRITE_COORD_ORIGIN,
+	       (params[0] != GL_UPPER_LEFT));
+      break;
+   }
+}
+
+
+/* =============================================================
+ * Color masks
+ */
+
+static void
+i915ColorMask(GLcontext * ctx,
+              GLboolean r, GLboolean g, GLboolean b, GLboolean a)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLuint tmp = i915->state.Ctx[I915_CTXREG_LIS5] & ~S5_WRITEDISABLE_MASK;
+
+   DBG("%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b,
+       a);
+
+   if (!r)
+      tmp |= S5_WRITEDISABLE_RED;
+   if (!g)
+      tmp |= S5_WRITEDISABLE_GREEN;
+   if (!b)
+      tmp |= S5_WRITEDISABLE_BLUE;
+   if (!a)
+      tmp |= S5_WRITEDISABLE_ALPHA;
+
+   if (tmp != i915->state.Ctx[I915_CTXREG_LIS5]) {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      i915->state.Ctx[I915_CTXREG_LIS5] = tmp;
+   }
+}
+
+static void
+update_specular(GLcontext * ctx)
+{
+   /* A hack to trigger the rebuild of the fragment program.
+    */
+   intel_context(ctx)->NewGLState |= _NEW_TEXTURE;
+}
+
+static void
+i915LightModelfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+   DBG("%s\n", __FUNCTION__);
+   
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      update_specular(ctx);
+   }
+}
+
+static void
+i915ShadeModel(GLcontext * ctx, GLenum mode)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+
+   if (mode == GL_SMOOTH) {
+      i915->state.Ctx[I915_CTXREG_LIS4] &= ~(S4_FLATSHADE_ALPHA |
+                                             S4_FLATSHADE_COLOR |
+                                             S4_FLATSHADE_SPECULAR);
+   }
+   else {
+      i915->state.Ctx[I915_CTXREG_LIS4] |= (S4_FLATSHADE_ALPHA |
+                                            S4_FLATSHADE_COLOR |
+                                            S4_FLATSHADE_SPECULAR);
+   }
+}
+
+/* =============================================================
+ * Fog
+ */
+void
+i915_update_fog(GLcontext * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+   GLenum mode;
+   GLboolean enabled;
+   GLboolean try_pixel_fog;
+
+   if (ctx->FragmentProgram._Current) {
+      /* Pull in static fog state from program */
+      mode = ctx->FragmentProgram._Current->FogOption;
+      enabled = (mode != GL_NONE);
+      try_pixel_fog = 0;
+   }
+   else {
+      enabled = ctx->Fog.Enabled;
+      mode = ctx->Fog.Mode;
+#if 0
+      /* XXX - DISABLED -- Need ortho fallback */
+      try_pixel_fog = (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT
+                       && ctx->Hint.Fog == GL_NICEST);
+#else
+      try_pixel_fog = 0;
+#endif
+   }
+
+   if (!enabled) {
+      i915->vertex_fog = I915_FOG_NONE;
+   }
+   else if (try_pixel_fog) {
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK;
+      i915->vertex_fog = I915_FOG_PIXEL;
+
+      switch (mode) {
+      case GL_LINEAR:
+         if (ctx->Fog.End <= ctx->Fog.Start) {
+            /* XXX - this won't work with fragment programs.  Need to
+             * either fallback or append fog instructions to end of
+             * program in the case of linear fog.
+             */
+            printf("vertex fog!\n");
+            i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX;
+            i915->vertex_fog = I915_FOG_VERTEX;
+         }
+         else {
+            GLfloat c2 = 1.0 / (ctx->Fog.End - ctx->Fog.Start);
+            GLfloat c1 = ctx->Fog.End * c2;
+
+            i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_C1_MASK;
+            i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_LINEAR;
+            i915->state.Fog[I915_FOGREG_MODE1] |=
+               ((GLuint) (c1 * FMC1_C1_ONE)) & FMC1_C1_MASK;
+
+            if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) {
+               i915->state.Fog[I915_FOGREG_MODE2]
+                  = (GLuint) (c2 * FMC2_C2_ONE);
+            }
+            else {
+               fi_type fi;
+               fi.f = c2;
+               i915->state.Fog[I915_FOGREG_MODE2] = fi.i;
+            }
+         }
+         break;
+      case GL_EXP:
+         i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP;
+         break;
+      case GL_EXP2:
+         i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_PIXEL_EXP2;
+         break;
+      default:
+         break;
+      }
+   }
+   else { /* if (i915->vertex_fog != I915_FOG_VERTEX) */
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_MODE1] &= ~FMC1_FOGFUNC_MASK;
+      i915->state.Fog[I915_FOGREG_MODE1] |= FMC1_FOGFUNC_VERTEX;
+      i915->vertex_fog = I915_FOG_VERTEX;
+   }
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   I915_ACTIVESTATE(i915, I915_UPLOAD_FOG, enabled);
+   if (enabled)
+      i915->state.Ctx[I915_CTXREG_LIS5] |= S5_FOG_ENABLE;
+   else
+      i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_FOG_ENABLE;
+
+   /* Always enable pixel fog.  Vertex fog using fog coord will conflict
+    * with fog code appended onto fragment program.
+    */
+    _tnl_allow_vertex_fog( ctx, 0 );
+    _tnl_allow_pixel_fog( ctx, 1 );
+}
+
+static void
+i915Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+
+   switch (pname) {
+   case GL_FOG_COORDINATE_SOURCE_EXT:
+   case GL_FOG_MODE:
+   case GL_FOG_START:
+   case GL_FOG_END:
+      break;
+
+   case GL_FOG_DENSITY:
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+
+      if (i915->state.Fog[I915_FOGREG_MODE1] & FMC1_FOGINDEX_Z) {
+         i915->state.Fog[I915_FOGREG_MODE3] =
+            (GLuint) (ctx->Fog.Density * FMC3_D_ONE);
+      }
+      else {
+         fi_type fi;
+         fi.f = ctx->Fog.Density;
+         i915->state.Fog[I915_FOGREG_MODE3] = fi.i;
+      }
+      break;
+
+   case GL_FOG_COLOR:
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_COLOR] =
+         (_3DSTATE_FOG_COLOR_CMD |
+          ((GLubyte) (ctx->Fog.Color[0] * 255.0F) << 16) |
+          ((GLubyte) (ctx->Fog.Color[1] * 255.0F) << 8) |
+          ((GLubyte) (ctx->Fog.Color[2] * 255.0F) << 0));
+      break;
+
+   default:
+      break;
+   }
+}
+
+static void
+i915Hint(GLcontext * ctx, GLenum target, GLenum state)
+{
+   switch (target) {
+   case GL_FOG_HINT:
+      break;
+   default:
+      break;
+   }
+}
+
+/* =============================================================
+ */
+
+static void
+i915Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+
+   switch (cap) {
+   case GL_TEXTURE_2D:
+      break;
+
+   case GL_LIGHTING:
+   case GL_COLOR_SUM:
+      update_specular(ctx);
+      break;
+
+   case GL_ALPHA_TEST:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+         i915->state.Ctx[I915_CTXREG_LIS6] |= S6_ALPHA_TEST_ENABLE;
+      else
+         i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_ALPHA_TEST_ENABLE;
+      break;
+
+   case GL_BLEND:
+      i915EvalLogicOpBlendState(ctx);
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      i915EvalLogicOpBlendState(ctx);
+
+      /* Logicop doesn't seem to work at 16bpp:
+       */
+      if (ctx->Visual.rgbBits == 16)
+         FALLBACK(&i915->intel, I915_FALLBACK_LOGICOP, state);
+      break;
+
+   case GL_FRAGMENT_PROGRAM_ARB:
+      break;
+
+   case GL_DITHER:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+         i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
+      else
+         i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_COLOR_DITHER_ENABLE;
+      break;
+
+   case GL_DEPTH_TEST:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+         i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_TEST_ENABLE;
+      else
+         i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_ENABLE;
+
+      i915DepthMask(ctx, ctx->Depth.Mask);
+      break;
+
+   case GL_SCISSOR_TEST:
+      I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+      if (state)
+         i915->state.Buffer[I915_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT);
+      else
+         i915->state.Buffer[I915_DESTREG_SENABLE] =
+            (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+      break;
+
+   case GL_LINE_SMOOTH:
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+         i915->state.Ctx[I915_CTXREG_LIS4] |= S4_LINE_ANTIALIAS_ENABLE;
+      else
+         i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_LINE_ANTIALIAS_ENABLE;
+      break;
+
+   case GL_FOG:
+      break;
+
+   case GL_CULL_FACE:
+      i915CullFaceFrontFace(ctx, 0);
+      break;
+
+   case GL_STENCIL_TEST:
+      {
+         GLboolean hw_stencil = GL_FALSE;
+         if (ctx->DrawBuffer) {
+            struct intel_renderbuffer *irbStencil
+               = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+            hw_stencil = (irbStencil && irbStencil->region);
+         }
+         if (hw_stencil) {
+            I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+            if (state)
+               i915->state.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE |
+                                                     S5_STENCIL_WRITE_ENABLE);
+            else
+               i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE |
+                                                      S5_STENCIL_WRITE_ENABLE);
+         }
+         else {
+            FALLBACK(&i915->intel, I915_FALLBACK_STENCIL, state);
+         }
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      /* The stipple command worked on my 855GM box, but not my 845G.
+       * I'll do more testing later to find out exactly which hardware
+       * supports it.  Disabled for now.
+       */
+      if (i915->intel.hw_stipple &&
+          i915->intel.reduced_primitive == GL_TRIANGLES) {
+         I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+         if (state)
+            i915->state.Stipple[I915_STPREG_ST1] |= ST1_ENABLE;
+         else
+            i915->state.Stipple[I915_STPREG_ST1] &= ~ST1_ENABLE;
+      }
+      break;
+
+   case GL_POLYGON_SMOOTH:
+      break;
+
+   case GL_POINT_SPRITE:
+      /* This state change is handled in i915_reduced_primitive_state because
+       * the hardware bit should only be set when rendering points.
+       */
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      if (state)
+	 i915->state.Ctx[I915_CTXREG_LIS4] |= S4_SPRITE_POINT_ENABLE;
+      else
+	 i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_SPRITE_POINT_ENABLE;
+      break;
+
+   case GL_POINT_SMOOTH:
+      break;
+
+   default:
+      ;
+   }
+}
+
+
+static void
+i915_init_packets(struct i915_context *i915)
+{
+   /* Zero all state */
+   memset(&i915->state, 0, sizeof(i915->state));
+
+
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+      /* Probably don't want to upload all this stuff every time one 
+       * piece changes.
+       */
+      i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+                                         I1_LOAD_S(2) |
+                                         I1_LOAD_S(4) |
+                                         I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
+      i915->state.Ctx[I915_CTXREG_LIS2] = 0;
+      i915->state.Ctx[I915_CTXREG_LIS4] = 0;
+      i915->state.Ctx[I915_CTXREG_LIS5] = 0;
+
+      if (i915->intel.ctx.Visual.rgbBits == 16)
+         i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE;
+
+
+      i915->state.Ctx[I915_CTXREG_LIS6] = (S6_COLOR_WRITE_ENABLE |
+                                           (2 << S6_TRISTRIP_PV_SHIFT));
+
+      i915->state.Ctx[I915_CTXREG_STATE4] = (_3DSTATE_MODES_4_CMD |
+                                             ENABLE_LOGIC_OP_FUNC |
+                                             LOGIC_OP_FUNC(LOGICOP_COPY) |
+                                             ENABLE_STENCIL_TEST_MASK |
+                                             STENCIL_TEST_MASK(0xff) |
+                                             ENABLE_STENCIL_WRITE_MASK |
+                                             STENCIL_WRITE_MASK(0xff));
+
+      i915->state.Ctx[I915_CTXREG_IAB] =
+         (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
+          IAB_MODIFY_FUNC | IAB_MODIFY_SRC_FACTOR | IAB_MODIFY_DST_FACTOR);
+
+      i915->state.Ctx[I915_CTXREG_BLENDCOLOR0] =
+         _3DSTATE_CONST_BLEND_COLOR_CMD;
+      i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0;
+
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] =
+	 _3DSTATE_BACKFACE_STENCIL_MASKS |
+	 BFM_ENABLE_STENCIL_TEST_MASK |
+	 BFM_ENABLE_STENCIL_WRITE_MASK |
+	 (0xff << BFM_STENCIL_WRITE_MASK_SHIFT) |
+	 (0xff << BFM_STENCIL_TEST_MASK_SHIFT);
+      i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] =
+	 _3DSTATE_BACKFACE_STENCIL_OPS |
+	 BFO_ENABLE_STENCIL_REF |
+	 BFO_ENABLE_STENCIL_FUNCS |
+	 BFO_ENABLE_STENCIL_TWO_SIDE;
+   }
+
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST0] = _3DSTATE_STIPPLE;
+   }
+
+
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_FOG);
+      i915->state.Fog[I915_FOGREG_MODE0] = _3DSTATE_FOG_MODE_CMD;
+      i915->state.Fog[I915_FOGREG_MODE1] = (FMC1_FOGFUNC_MODIFY_ENABLE |
+                                            FMC1_FOGFUNC_VERTEX |
+                                            FMC1_FOGINDEX_MODIFY_ENABLE |
+                                            FMC1_FOGINDEX_W |
+                                            FMC1_C1_C2_MODIFY_ENABLE |
+                                            FMC1_DENSITY_MODIFY_ENABLE);
+      i915->state.Fog[I915_FOGREG_COLOR] = _3DSTATE_FOG_COLOR_CMD;
+   }
+
+   {
+      i915->state.Buffer[I915_DESTREG_DV0] = _3DSTATE_DST_BUF_VARS_CMD;
+
+      /* scissor */
+      i915->state.Buffer[I915_DESTREG_SENABLE] =
+         (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+      i915->state.Buffer[I915_DESTREG_SR0] = _3DSTATE_SCISSOR_RECT_0_CMD;
+      i915->state.Buffer[I915_DESTREG_SR1] = 0;
+      i915->state.Buffer[I915_DESTREG_SR2] = 0;
+   }
+
+   i915->state.RasterRules[I915_RASTER_RULES] = _3DSTATE_RASTER_RULES_CMD |
+      ENABLE_POINT_RASTER_RULE |
+      OGL_POINT_RASTER_RULE |
+      ENABLE_LINE_STRIP_PROVOKE_VRTX |
+      ENABLE_TRI_FAN_PROVOKE_VRTX |
+      LINE_STRIP_PROVOKE_VRTX(1) |
+      TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D;
+
+#if 0
+   {
+      I915_STATECHANGE(i915, I915_UPLOAD_DEFAULTS);
+      i915->state.Default[I915_DEFREG_C0] = _3DSTATE_DEFAULT_DIFFUSE;
+      i915->state.Default[I915_DEFREG_C1] = 0;
+      i915->state.Default[I915_DEFREG_S0] = _3DSTATE_DEFAULT_SPECULAR;
+      i915->state.Default[I915_DEFREG_S1] = 0;
+      i915->state.Default[I915_DEFREG_Z0] = _3DSTATE_DEFAULT_Z;
+      i915->state.Default[I915_DEFREG_Z1] = 0;
+   }
+#endif
+
+
+   /* These will be emitted every at the head of every buffer, unless
+    * we get hardware contexts working.
+    */
+   i915->state.active = (I915_UPLOAD_PROGRAM |
+                         I915_UPLOAD_STIPPLE |
+                         I915_UPLOAD_CTX |
+                         I915_UPLOAD_BUFFERS |
+			 I915_UPLOAD_INVARIENT |
+			 I915_UPLOAD_RASTER_RULES);
+}
+
+void
+i915_update_provoking_vertex(GLcontext * ctx)
+{
+   struct i915_context *i915 = I915_CONTEXT(ctx);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_CTX);
+   i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_TRISTRIP_PV_MASK);
+
+   I915_STATECHANGE(i915, I915_UPLOAD_RASTER_RULES);
+   i915->state.RasterRules[I915_RASTER_RULES] &= ~(LINE_STRIP_PROVOKE_VRTX_MASK |
+						   TRI_FAN_PROVOKE_VRTX_MASK);
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION) {
+      i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(1) |
+						     TRI_FAN_PROVOKE_VRTX(2));
+      i915->state.Ctx[I915_CTXREG_LIS6] |= (2 << S6_TRISTRIP_PV_SHIFT);
+   } else {
+      i915->state.RasterRules[I915_RASTER_RULES] |= (LINE_STRIP_PROVOKE_VRTX(0) |
+						     TRI_FAN_PROVOKE_VRTX(1));
+      i915->state.Ctx[I915_CTXREG_LIS6] |= (0 << S6_TRISTRIP_PV_SHIFT);
+    }
+}
+
+void
+i915InitStateFunctions(struct dd_function_table *functions)
+{
+   functions->AlphaFunc = i915AlphaFunc;
+   functions->BlendColor = i915BlendColor;
+   functions->BlendEquationSeparate = i915BlendEquationSeparate;
+   functions->BlendFuncSeparate = i915BlendFuncSeparate;
+   functions->ColorMask = i915ColorMask;
+   functions->CullFace = i915CullFaceFrontFace;
+   functions->DepthFunc = i915DepthFunc;
+   functions->DepthMask = i915DepthMask;
+   functions->Enable = i915Enable;
+   functions->Fogfv = i915Fogfv;
+   functions->FrontFace = i915CullFaceFrontFace;
+   functions->Hint = i915Hint;
+   functions->LightModelfv = i915LightModelfv;
+   functions->LineWidth = i915LineWidth;
+   functions->LogicOpcode = i915LogicOp;
+   functions->PointSize = i915PointSize;
+   functions->PointParameterfv = i915PointParameterfv;
+   functions->PolygonStipple = i915PolygonStipple;
+   functions->Scissor = i915Scissor;
+   functions->ShadeModel = i915ShadeModel;
+   functions->StencilFuncSeparate = i915StencilFuncSeparate;
+   functions->StencilMaskSeparate = i915StencilMaskSeparate;
+   functions->StencilOpSeparate = i915StencilOpSeparate;
+   functions->DepthRange = i915DepthRange;
+   functions->Viewport = i915Viewport;
+}
+
+
+void
+i915InitState(struct i915_context *i915)
+{
+   GLcontext *ctx = &i915->intel.ctx;
+
+   i915_init_packets(i915);
+
+   _mesa_init_driver_state(ctx);
+}
diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c
new file mode 100644
index 0000000000..6e4512129c
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c
@@ -0,0 +1,497 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/** @file i915_tex_layout.c
+ * Code to layout images in a mipmap tree for i830M-GM915 and G945 and beyond.
+ */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "main/macros.h"
+#include "intel_context.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+static GLint initial_offsets[6][2] = {
+   [FACE_POS_X] = {0, 0},
+   [FACE_POS_Y] = {1, 0},
+   [FACE_POS_Z] = {1, 1},
+   [FACE_NEG_X] = {0, 2},
+   [FACE_NEG_Y] = {1, 2},
+   [FACE_NEG_Z] = {1, 3},
+};
+
+
+static GLint step_offsets[6][2] = {
+   [FACE_POS_X] = {0, 2},
+   [FACE_POS_Y] = {-1, 2},
+   [FACE_POS_Z] = {-1, 1},
+   [FACE_NEG_X] = {0, 2},
+   [FACE_NEG_Y] = {-1, 2},
+   [FACE_NEG_Z] = {-1, 1},
+};
+
+
+static GLint bottom_offsets[6] = {
+   [FACE_POS_X] = 16 + 0 * 8,
+   [FACE_POS_Y] = 16 + 1 * 8,
+   [FACE_POS_Z] = 16 + 2 * 8,
+   [FACE_NEG_X] = 16 + 3 * 8,
+   [FACE_NEG_Y] = 16 + 4 * 8,
+   [FACE_NEG_Z] = 16 + 5 * 8,
+};
+
+
+/**
+ * Cube texture map layout for i830M-GM915 and
+ * non-compressed cube texture map on GM945.
+ *
+ * Hardware layout looks like:
+ *
+ * +-------+-------+
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |  +x   |  +y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |   |   |       |
+ * | +x| +y|       |
+ * |   |   |       |
+ * |   |   |       |
+ * +-+-+---+  +z   |
+ * | | |   |       |
+ * +-+-+ +z|       |
+ *   | |   |       |
+ * +-+-+---+-------+
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |  -x   |  -y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |   |   |       |
+ * | -x| -y|       |
+ * |   |   |       |
+ * |   |   |       |
+ * +-+-+---+  -z   |
+ * | | |   |       |
+ * +-+-+ -z|       |
+ *   | |   |       |
+ *   +-+---+-------+
+ *
+ */
+static void
+i915_miptree_layout_cube(struct intel_context *intel,
+			 struct intel_mipmap_tree * mt,
+			 uint32_t tiling)
+{
+   const GLuint dim = mt->width0;
+   GLuint face;
+   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
+   GLint level;
+
+   assert(lvlWidth == lvlHeight); /* cubemap images are square */
+
+   /* double pitch for cube layouts */
+   mt->total_width = dim * 2;
+   mt->total_height = dim * 4;
+
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, 6,
+				   0, 0,
+				   lvlWidth, lvlHeight,
+				   1);
+      lvlWidth /= 2;
+      lvlHeight /= 2;
+   }
+
+   for (face = 0; face < 6; face++) {
+      GLuint x = initial_offsets[face][0] * dim;
+      GLuint y = initial_offsets[face][1] * dim;
+      GLuint d = dim;
+
+      for (level = mt->first_level; level <= mt->last_level; level++) {
+	 intel_miptree_set_image_offset(mt, level, face, x, y);
+
+	 if (d == 0)
+	    printf("cube mipmap %d/%d (%d..%d) is 0x0\n",
+		   face, level, mt->first_level, mt->last_level);
+
+	 d >>= 1;
+	 x += step_offsets[face][0] * d;
+	 y += step_offsets[face][1] * d;
+      }
+   }
+}
+
+static void
+i915_miptree_layout_3d(struct intel_context *intel,
+		       struct intel_mipmap_tree * mt,
+		       uint32_t tiling)
+{
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+   GLuint depth = mt->depth0;
+   GLuint stack_height = 0;
+   GLint level;
+
+   /* Calculate the size of a single slice. */
+   mt->total_width = mt->width0;
+
+   /* XXX: hardware expects/requires 9 levels at minimum. */
+   for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) {
+      intel_miptree_set_level_info(mt, level, depth, 0, mt->total_height,
+				   width, height, depth);
+
+      stack_height += MAX2(2, height);
+
+      width = minify(width);
+      height = minify(height);
+      depth = minify(depth);
+   }
+
+   /* Fixup depth image_offsets: */
+   depth = mt->depth0;
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      GLuint i;
+      for (i = 0; i < depth; i++) {
+	 intel_miptree_set_image_offset(mt, level, i,
+					0, i * stack_height);
+      }
+
+      depth = minify(depth);
+   }
+
+   /* Multiply slice size by texture depth for total size.  It's
+    * remarkable how wasteful of memory the i915 texture layouts
+    * are.  They are largely fixed in the i945.
+    */
+   mt->total_height = stack_height * mt->depth0;
+}
+
+static void
+i915_miptree_layout_2d(struct intel_context *intel,
+		       struct intel_mipmap_tree * mt,
+		       uint32_t tiling)
+{
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+   GLuint img_height;
+   GLint level;
+
+   mt->total_width = mt->width0;
+   mt->total_height = 0;
+
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, 1,
+				   0, mt->total_height,
+				   width, height, 1);
+
+      if (mt->compressed)
+	 img_height = MAX2(1, height / 4);
+      else
+	 img_height = (MAX2(2, height) + 1) & ~1;
+
+      mt->total_height += img_height;
+
+      width = minify(width);
+      height = minify(height);
+   }
+}
+
+GLboolean
+i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt,
+		    uint32_t tiling)
+{
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      i915_miptree_layout_cube(intel, mt, tiling);
+      break;
+   case GL_TEXTURE_3D:
+      i915_miptree_layout_3d(intel, mt, tiling);
+      break;
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE_ARB:
+      i915_miptree_layout_2d(intel, mt, tiling);
+      break;
+   default:
+      _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()");
+      break;
+   }
+
+   DBG("%s: %dx%dx%d\n", __FUNCTION__,
+       mt->total_width, mt->total_height, mt->cpp);
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Compressed cube texture map layout for GM945 and later.
+ *
+ * The hardware layout looks like the 830-915 layout, except for the small
+ * sizes.  A zoomed in view of the layout for 945 is:
+ *
+ * +-------+-------+
+ * |  8x8  |  8x8  |
+ * |       |       |
+ * |       |       |
+ * |  +x   |  +y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |4x4|   |  8x8  |
+ * | +x|   |       |
+ * |   |   |       |
+ * |   |   |       |
+ * +---+   |  +z   |
+ * |4x4|   |       |
+ * | +y|   |       |
+ * |   |   |       |
+ * +---+   +-------+
+ *
+ * ...
+ *
+ * +-------+-------+
+ * |  8x8  |  8x8  |
+ * |       |       |
+ * |       |       |
+ * |  -x   |  -y   |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * |       |       |
+ * +---+---+-------+
+ * |4x4|   |  8x8  |
+ * | -x|   |       |
+ * |   |   |       |
+ * |   |   |       |
+ * +---+   |  -z   |
+ * |4x4|   |       |
+ * | -y|   |       |
+ * |   |   |       |
+ * +---+   +---+---+---+---+---+---+---+---+---+
+ * |4x4|   |4x4|   |2x2|   |2x2|   |2x2|   |2x2|
+ * | +z|   | -z|   | +x|   | +y|   | +z|   | -x| ...
+ * |   |   |   |   |   |   |   |   |   |   |   |
+ * +---+   +---+   +---+   +---+   +---+   +---+
+ *
+ * The bottom row continues with the remaining 2x2 then the 1x1 mip contents
+ * in order, with each of them aligned to a 8x8 block boundary.  Thus, for
+ * 32x32 cube maps and smaller, the bottom row layout is going to dictate the
+ * pitch of the tree.  For a tree with 4x4 images, the pitch is at least
+ * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1
+ * it is 6 * 8 texels.
+ */
+
+static void
+i945_miptree_layout_cube(struct intel_context *intel,
+			 struct intel_mipmap_tree * mt,
+			 uint32_t tiling)
+{
+   const GLuint dim = mt->width0;
+   GLuint face;
+   GLuint lvlWidth = mt->width0, lvlHeight = mt->height0;
+   GLint level;
+
+   assert(lvlWidth == lvlHeight); /* cubemap images are square */
+
+   /* Depending on the size of the largest images, pitch can be
+    * determined either by the old-style packing of cubemap faces,
+    * or the final row of 4x4, 2x2 and 1x1 faces below this.
+    */
+   if (dim > 32)
+      mt->total_width = dim * 2;
+   else
+      mt->total_width = 14 * 8;
+
+   if (dim >= 4)
+      mt->total_height = dim * 4 + 4;
+   else
+      mt->total_height = 4;
+
+   /* Set all the levels to effectively occupy the whole rectangular region. */
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      intel_miptree_set_level_info(mt, level, 6,
+				   0, 0,
+				   lvlWidth, lvlHeight, 1);
+      lvlWidth /= 2;
+      lvlHeight /= 2;
+   }
+
+   for (face = 0; face < 6; face++) {
+      GLuint x = initial_offsets[face][0] * dim;
+      GLuint y = initial_offsets[face][1] * dim;
+      GLuint d = dim;
+
+      if (dim == 4 && face >= 4) {
+	 y = mt->total_height - 4;
+	 x = (face - 4) * 8;
+      } else if (dim < 4 && (face > 0 || mt->first_level > 0)) {
+	 y = mt->total_height - 4;
+	 x = face * 8;
+      }
+
+      for (level = mt->first_level; level <= mt->last_level; level++) {
+	 intel_miptree_set_image_offset(mt, level, face, x, y);
+
+	 d >>= 1;
+
+	 switch (d) {
+	 case 4:
+	    switch (face) {
+	    case FACE_POS_X:
+	    case FACE_NEG_X:
+	       x += step_offsets[face][0] * d;
+	       y += step_offsets[face][1] * d;
+	       break;
+	    case FACE_POS_Y:
+	    case FACE_NEG_Y:
+	       y += 12;
+	       x -= 8;
+	       break;
+	    case FACE_POS_Z:
+	    case FACE_NEG_Z:
+	       y = mt->total_height - 4;
+	       x = (face - 4) * 8;
+	       break;
+	    }
+	    break;
+
+	 case 2:
+	    y = mt->total_height - 4;
+	    x = bottom_offsets[face];
+	    break;
+
+	 case 1:
+	    x += 48;
+	    break;
+
+	 default:
+	    x += step_offsets[face][0] * d;
+	    y += step_offsets[face][1] * d;
+	    break;
+	 }
+      }
+   }
+}
+
+static void
+i945_miptree_layout_3d(struct intel_context *intel,
+		       struct intel_mipmap_tree * mt,
+		       uint32_t tiling)
+{
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+   GLuint depth = mt->depth0;
+   GLuint pack_x_pitch, pack_x_nr;
+   GLuint pack_y_pitch;
+   GLuint level;
+
+   mt->total_width = mt->width0;
+   mt->total_height = 0;
+
+   pack_y_pitch = MAX2(mt->height0, 2);
+   pack_x_pitch = mt->total_width;
+   pack_x_nr = 1;
+
+   for (level = mt->first_level; level <= mt->last_level; level++) {
+      GLint x = 0;
+      GLint y = 0;
+      GLint q, j;
+
+      intel_miptree_set_level_info(mt, level, depth,
+				   0, mt->total_height,
+				   width, height, depth);
+
+      for (q = 0; q < depth;) {
+	 for (j = 0; j < pack_x_nr && q < depth; j++, q++) {
+	    intel_miptree_set_image_offset(mt, level, q, x, y);
+	    x += pack_x_pitch;
+	 }
+
+	 x = 0;
+	 y += pack_y_pitch;
+      }
+
+      mt->total_height += y;
+
+      if (pack_x_pitch > 4) {
+	 pack_x_pitch >>= 1;
+	 pack_x_nr <<= 1;
+	 assert(pack_x_pitch * pack_x_nr <= mt->total_width);
+      }
+
+      if (pack_y_pitch > 2) {
+	 pack_y_pitch >>= 1;
+      }
+
+      width = minify(width);
+      height = minify(height);
+      depth = minify(depth);
+   }
+}
+
+GLboolean
+i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt,
+		    uint32_t tiling)
+{
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      if (mt->compressed)
+	 i945_miptree_layout_cube(intel, mt, tiling);
+      else
+	 i915_miptree_layout_cube(intel, mt, tiling);
+      break;
+   case GL_TEXTURE_3D:
+      i945_miptree_layout_3d(intel, mt, tiling);
+      break;
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_RECTANGLE_ARB:
+      i945_miptree_layout_2d(intel, mt, tiling, 1);
+      break;
+   default:
+      _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()");
+      break;
+   }
+
+   DBG("%s: %dx%dx%d\n", __FUNCTION__,
+       mt->total_width, mt->total_height, mt->cpp);
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c
new file mode 100644
index 0000000000..e0e7f3bc3d
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_texstate.c
@@ -0,0 +1,413 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+
+#include "i915_context.h"
+#include "i915_reg.h"
+
+
+static GLuint
+translate_texture_format(gl_format mesa_format, GLuint internal_format,
+			 GLenum DepthMode)
+{
+   switch (mesa_format) {
+   case MESA_FORMAT_L8:
+      return MAPSURF_8BIT | MT_8BIT_L8;
+   case MESA_FORMAT_I8:
+      return MAPSURF_8BIT | MT_8BIT_I8;
+   case MESA_FORMAT_A8:
+      return MAPSURF_8BIT | MT_8BIT_A8;
+   case MESA_FORMAT_AL88:
+      return MAPSURF_16BIT | MT_16BIT_AY88;
+   case MESA_FORMAT_RGB565:
+      return MAPSURF_16BIT | MT_16BIT_RGB565;
+   case MESA_FORMAT_ARGB1555:
+      return MAPSURF_16BIT | MT_16BIT_ARGB1555;
+   case MESA_FORMAT_ARGB4444:
+      return MAPSURF_16BIT | MT_16BIT_ARGB4444;
+   case MESA_FORMAT_ARGB8888:
+      return MAPSURF_32BIT | MT_32BIT_ARGB8888;
+   case MESA_FORMAT_XRGB8888:
+      return MAPSURF_32BIT | MT_32BIT_XRGB8888;
+   case MESA_FORMAT_YCBCR_REV:
+      return (MAPSURF_422 | MT_422_YCRCB_NORMAL);
+   case MESA_FORMAT_YCBCR:
+      return (MAPSURF_422 | MT_422_YCRCB_SWAPY);
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1);
+   case MESA_FORMAT_Z16:
+      if (DepthMode == GL_ALPHA)
+          return (MAPSURF_16BIT | MT_16BIT_A16);
+      else if (DepthMode == GL_INTENSITY)
+          return (MAPSURF_16BIT | MT_16BIT_I16);
+      else
+          return (MAPSURF_16BIT | MT_16BIT_L16);
+   case MESA_FORMAT_RGBA_DXT1:
+   case MESA_FORMAT_RGB_DXT1:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1);
+   case MESA_FORMAT_RGBA_DXT3:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3);
+   case MESA_FORMAT_RGBA_DXT5:
+      return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5);
+   case MESA_FORMAT_S8_Z24:
+      if (DepthMode == GL_ALPHA)
+	 return (MAPSURF_32BIT | MT_32BIT_x8A24);
+      else if (DepthMode == GL_INTENSITY)
+	 return (MAPSURF_32BIT | MT_32BIT_x8I24);
+      else
+	 return (MAPSURF_32BIT | MT_32BIT_x8L24);
+   default:
+      fprintf(stderr, "%s: bad image format %x\n", __FUNCTION__, mesa_format);
+      abort();
+      return 0;
+   }
+}
+
+
+
+
+/* The i915 (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint
+translate_wrap_mode(GLenum wrap)
+{
+   switch (wrap) {
+   case GL_REPEAT:
+      return TEXCOORDMODE_WRAP;
+   case GL_CLAMP:
+      return TEXCOORDMODE_CLAMP_EDGE;   /* not quite correct */
+   case GL_CLAMP_TO_EDGE:
+      return TEXCOORDMODE_CLAMP_EDGE;
+   case GL_CLAMP_TO_BORDER:
+      return TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
+   default:
+      return TEXCOORDMODE_WRAP;
+   }
+}
+
+
+
+/* Recalculate all state from scratch.  Perhaps not the most
+ * efficient, but this has gotten complex enough that we need
+ * something which is understandable and reliable.
+ */
+static GLboolean
+i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3)
+{
+   GLcontext *ctx = &intel->ctx;
+   struct i915_context *i915 = i915_context(ctx);
+   struct gl_texture_unit *tUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *tObj = tUnit->_Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage;
+   GLuint *state = i915->state.Tex[unit], format, pitch;
+   GLint lodbias, aniso = 0;
+   GLubyte border[4];
+   GLfloat maxlod;
+
+   memset(state, 0, sizeof(state));
+
+   /*We need to refcount these. */
+
+   if (i915->state.tex_buffer[unit] != NULL) {
+       drm_intel_bo_unreference(i915->state.tex_buffer[unit]);
+       i915->state.tex_buffer[unit] = NULL;
+   }
+
+   if (!intel_finalize_mipmap_tree(intel, unit))
+      return GL_FALSE;
+
+   /* Get first image here, since intelObj->firstLevel will get set in
+    * the intel_finalize_mipmap_tree() call above.
+    */
+   firstImage = tObj->Image[0][intelObj->firstLevel];
+
+   drm_intel_bo_reference(intelObj->mt->region->buffer);
+   i915->state.tex_buffer[unit] = intelObj->mt->region->buffer;
+   i915->state.tex_offset[unit] = 0; /* Always the origin of the miptree */
+
+   format = translate_texture_format(firstImage->TexFormat,
+				     firstImage->InternalFormat,
+				     tObj->DepthMode);
+   pitch = intelObj->mt->region->pitch * intelObj->mt->cpp;
+
+   state[I915_TEXREG_MS3] =
+      (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) |
+       ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format);
+
+   if (intelObj->mt->region->tiling != I915_TILING_NONE) {
+      state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE;
+      if (intelObj->mt->region->tiling == I915_TILING_Y)
+	 state[I915_TEXREG_MS3] |= MS3_TILE_WALK;
+   }
+
+   /* We get one field with fraction bits for the maximum addressable
+    * (lowest resolution) LOD.  Use it to cover both MAX_LEVEL and
+    * MAX_LOD.
+    */
+   maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel);
+   state[I915_TEXREG_MS4] =
+      ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) |
+       MS4_CUBE_FACE_ENA_MASK |
+       (U_FIXED(CLAMP(maxlod, 0.0, 11.0), 2) << MS4_MAX_LOD_SHIFT) |
+       ((firstImage->Depth - 1) << MS4_VOLUME_DEPTH_SHIFT));
+
+
+   {
+      GLuint minFilt, mipFilt, magFilt;
+
+      switch (tObj->MinFilter) {
+      case GL_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NONE;
+         break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_NEAREST;
+         break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+         minFilt = FILTER_NEAREST;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+         minFilt = FILTER_LINEAR;
+         mipFilt = MIPFILTER_LINEAR;
+         break;
+      default:
+         return GL_FALSE;
+      }
+
+      if (tObj->MaxAnisotropy > 1.0) {
+         minFilt = FILTER_ANISOTROPIC;
+         magFilt = FILTER_ANISOTROPIC;
+         if (tObj->MaxAnisotropy > 2.0)
+            aniso = SS2_MAX_ANISO_4;
+         else
+            aniso = SS2_MAX_ANISO_2;
+      }
+      else {
+         switch (tObj->MagFilter) {
+         case GL_NEAREST:
+            magFilt = FILTER_NEAREST;
+            break;
+         case GL_LINEAR:
+            magFilt = FILTER_LINEAR;
+            break;
+         default:
+            return GL_FALSE;
+         }
+      }
+
+      lodbias = (int) ((tUnit->LodBias + tObj->LodBias) * 16.0);
+      if (lodbias < -256)
+          lodbias = -256;
+      if (lodbias > 255)
+          lodbias = 255;
+      state[I915_TEXREG_SS2] = ((lodbias << SS2_LOD_BIAS_SHIFT) & 
+                                SS2_LOD_BIAS_MASK);
+
+      /* YUV conversion:
+       */
+      if (firstImage->TexFormat == MESA_FORMAT_YCBCR ||
+          firstImage->TexFormat == MESA_FORMAT_YCBCR_REV)
+         state[I915_TEXREG_SS2] |= SS2_COLORSPACE_CONVERSION;
+
+      /* Shadow:
+       */
+      if (tObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB &&
+          tObj->Target != GL_TEXTURE_3D) {
+         if (tObj->Target == GL_TEXTURE_1D) 
+            return GL_FALSE;
+
+         state[I915_TEXREG_SS2] |=
+            (SS2_SHADOW_ENABLE |
+             intel_translate_shadow_compare_func(tObj->CompareFunc));
+
+         minFilt = FILTER_4X4_FLAT;
+         magFilt = FILTER_4X4_FLAT;
+      }
+
+      state[I915_TEXREG_SS2] |= ((minFilt << SS2_MIN_FILTER_SHIFT) |
+                                 (mipFilt << SS2_MIP_FILTER_SHIFT) |
+                                 (magFilt << SS2_MAG_FILTER_SHIFT) |
+                                 aniso);
+   }
+
+   {
+      GLenum ws = tObj->WrapS;
+      GLenum wt = tObj->WrapT;
+      GLenum wr = tObj->WrapR;
+      float minlod;
+
+      /* We program 1D textures as 2D textures, so the 2D texcoord could
+       * result in sampling border values if we don't set the T wrap to
+       * repeat.
+       */
+      if (tObj->Target == GL_TEXTURE_1D)
+	 wt = GL_REPEAT;
+
+      /* 3D textures don't seem to respect the border color.
+       * Fallback if there's ever a danger that they might refer to
+       * it.  
+       * 
+       * Effectively this means fallback on 3D clamp or
+       * clamp_to_border.
+       */
+      if (tObj->Target == GL_TEXTURE_3D &&
+          (tObj->MinFilter != GL_NEAREST ||
+           tObj->MagFilter != GL_NEAREST) &&
+          (ws == GL_CLAMP ||
+           wt == GL_CLAMP ||
+           wr == GL_CLAMP ||
+           ws == GL_CLAMP_TO_BORDER ||
+           wt == GL_CLAMP_TO_BORDER || wr == GL_CLAMP_TO_BORDER))
+         return GL_FALSE;
+
+      /* Only support TEXCOORDMODE_CLAMP_EDGE and TEXCOORDMODE_CUBE (not 
+       * used) when using cube map texture coordinates
+       */
+      if (tObj->Target == GL_TEXTURE_CUBE_MAP_ARB &&
+          (((ws != GL_CLAMP) && (ws != GL_CLAMP_TO_EDGE)) ||
+           ((wt != GL_CLAMP) && (wt != GL_CLAMP_TO_EDGE))))
+          return GL_FALSE;
+
+      state[I915_TEXREG_SS3] = ss3;     /* SS3_NORMALIZED_COORDS */
+
+      state[I915_TEXREG_SS3] |=
+         ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) |
+          (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) |
+          (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT));
+
+      minlod = MIN2(tObj->MinLod, tObj->_MaxLevel - tObj->BaseLevel);
+      state[I915_TEXREG_SS3] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT);
+      state[I915_TEXREG_SS3] |= (U_FIXED(CLAMP(minlod, 0.0, 11.0), 4) <<
+				 SS3_MIN_LOD_SHIFT);
+
+   }
+
+   /* convert border color from float to ubyte */
+   CLAMPED_FLOAT_TO_UBYTE(border[0], tObj->BorderColor.f[0]);
+   CLAMPED_FLOAT_TO_UBYTE(border[1], tObj->BorderColor.f[1]);
+   CLAMPED_FLOAT_TO_UBYTE(border[2], tObj->BorderColor.f[2]);
+   CLAMPED_FLOAT_TO_UBYTE(border[3], tObj->BorderColor.f[3]);
+
+   if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+      /* GL specs that border color for depth textures is taken from the
+       * R channel, while the hardware uses A.  Spam R into all the channels
+       * for safety.
+       */
+      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[0],
+					       border[0],
+					       border[0],
+					       border[0]);
+   } else {
+      state[I915_TEXREG_SS4] = PACK_COLOR_8888(border[3],
+					       border[0],
+					       border[1],
+					       border[2]);
+   }
+
+
+   I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(unit), GL_TRUE);
+   /* memcmp was already disabled, but definitely won't work as the
+    * region might now change and that wouldn't be detected:
+    */
+   I915_STATECHANGE(i915, I915_UPLOAD_TEX(unit));
+
+
+#if 0
+   DBG(TEXTURE, "state[I915_TEXREG_SS2] = 0x%x\n", state[I915_TEXREG_SS2]);
+   DBG(TEXTURE, "state[I915_TEXREG_SS3] = 0x%x\n", state[I915_TEXREG_SS3]);
+   DBG(TEXTURE, "state[I915_TEXREG_SS4] = 0x%x\n", state[I915_TEXREG_SS4]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS2] = 0x%x\n", state[I915_TEXREG_MS2]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS3] = 0x%x\n", state[I915_TEXREG_MS3]);
+   DBG(TEXTURE, "state[I915_TEXREG_MS4] = 0x%x\n", state[I915_TEXREG_MS4]);
+#endif
+
+   return GL_TRUE;
+}
+
+
+
+
+void
+i915UpdateTextureState(struct intel_context *intel)
+{
+   GLboolean ok = GL_TRUE;
+   GLuint i;
+
+   for (i = 0; i < I915_TEX_UNITS && ok; i++) {
+      switch (intel->ctx.Texture.Unit[i]._ReallyEnabled) {
+      case TEXTURE_1D_BIT:
+      case TEXTURE_2D_BIT:
+      case TEXTURE_CUBE_BIT:
+      case TEXTURE_3D_BIT:
+         ok = i915_update_tex_unit(intel, i, SS3_NORMALIZED_COORDS);
+         break;
+      case TEXTURE_RECT_BIT:
+         ok = i915_update_tex_unit(intel, i, 0);
+         break;
+      case 0:{
+            struct i915_context *i915 = i915_context(&intel->ctx);
+            if (i915->state.active & I915_UPLOAD_TEX(i))
+               I915_ACTIVESTATE(i915, I915_UPLOAD_TEX(i), GL_FALSE);
+
+	    if (i915->state.tex_buffer[i] != NULL) {
+	       drm_intel_bo_unreference(i915->state.tex_buffer[i]);
+	       i915->state.tex_buffer[i] = NULL;
+	    }
+
+            break;
+         }
+      default:
+         ok = GL_FALSE;
+         break;
+      }
+   }
+
+   FALLBACK(intel, I915_FALLBACK_TEXTURE, !ok);
+}
diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c
new file mode 100644
index 0000000000..d7828a296a
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/i915_vtbl.c
@@ -0,0 +1,688 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "intel_tris.h"
+#include "intel_fbo.h"
+
+#include "i915_reg.h"
+#include "i915_context.h"
+
+static void
+i915_render_prevalidate(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
+   i915ValidateFragmentProgram(i915);
+}
+
+static void
+i915_render_start(struct intel_context *intel)
+{
+   intel_prepare_render(intel);
+}
+
+
+static void
+i915_reduced_primitive_state(struct intel_context *intel, GLenum rprim)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   GLuint st1 = i915->state.Stipple[I915_STPREG_ST1];
+
+   st1 &= ~ST1_ENABLE;
+
+   switch (rprim) {
+   case GL_QUADS: /* from RASTERIZE(GL_QUADS) in t_dd_tritemp.h */
+   case GL_TRIANGLES:
+      if (intel->ctx.Polygon.StippleFlag && intel->hw_stipple)
+         st1 |= ST1_ENABLE;
+      break;
+   case GL_LINES:
+   case GL_POINTS:
+   default:
+      break;
+   }
+
+   i915->intel.reduced_primitive = rprim;
+
+   if (st1 != i915->state.Stipple[I915_STPREG_ST1]) {
+      INTEL_FIREVERTICES(intel);
+
+      I915_STATECHANGE(i915, I915_UPLOAD_STIPPLE);
+      i915->state.Stipple[I915_STPREG_ST1] = st1;
+   }
+}
+
+
+/* Pull apart the vertex format registers and figure out how large a
+ * vertex is supposed to be. 
+ */
+static GLboolean
+i915_check_vertex_size(struct intel_context *intel, GLuint expected)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   int lis2 = i915->state.Ctx[I915_CTXREG_LIS2];
+   int lis4 = i915->state.Ctx[I915_CTXREG_LIS4];
+   int i, sz = 0;
+
+   switch (lis4 & S4_VFMT_XYZW_MASK) {
+   case S4_VFMT_XY:
+      sz = 2;
+      break;
+   case S4_VFMT_XYZ:
+      sz = 3;
+      break;
+   case S4_VFMT_XYW:
+      sz = 3;
+      break;
+   case S4_VFMT_XYZW:
+      sz = 4;
+      break;
+   default:
+      fprintf(stderr, "no xyzw specified\n");
+      return 0;
+   }
+
+   if (lis4 & S4_VFMT_SPEC_FOG)
+      sz++;
+   if (lis4 & S4_VFMT_COLOR)
+      sz++;
+   if (lis4 & S4_VFMT_DEPTH_OFFSET)
+      sz++;
+   if (lis4 & S4_VFMT_POINT_WIDTH)
+      sz++;
+   if (lis4 & S4_VFMT_FOG_PARAM)
+      sz++;
+
+   for (i = 0; i < 8; i++) {
+      switch (lis2 & S2_TEXCOORD_FMT0_MASK) {
+      case TEXCOORDFMT_2D:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_3D:
+         sz += 3;
+         break;
+      case TEXCOORDFMT_4D:
+         sz += 4;
+         break;
+      case TEXCOORDFMT_1D:
+         sz += 1;
+         break;
+      case TEXCOORDFMT_2D_16:
+         sz += 1;
+         break;
+      case TEXCOORDFMT_4D_16:
+         sz += 2;
+         break;
+      case TEXCOORDFMT_NOT_PRESENT:
+         break;
+      default:
+         fprintf(stderr, "bad texcoord fmt %d\n", i);
+         return GL_FALSE;
+      }
+      lis2 >>= S2_TEXCOORD_FMT1_SHIFT;
+   }
+
+   if (sz != expected)
+      fprintf(stderr, "vertex size mismatch %d/%d\n", sz, expected);
+
+   return sz == expected;
+}
+
+
+static void
+i915_emit_invarient_state(struct intel_context *intel)
+{
+   BATCH_LOCALS;
+
+   BEGIN_BATCH(17);
+
+   OUT_BATCH(_3DSTATE_AA_CMD |
+             AA_LINE_ECAAR_WIDTH_ENABLE |
+             AA_LINE_ECAAR_WIDTH_1_0 |
+             AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+
+   OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
+   OUT_BATCH(0);
+
+   /* Don't support texture crossbar yet */
+   OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+             CSB_TCB(0, 0) |
+             CSB_TCB(1, 1) |
+             CSB_TCB(2, 2) |
+             CSB_TCB(3, 3) |
+             CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
+
+   /* Need to initialize this to zero.
+    */
+   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
+   OUT_BATCH(0);
+
+   /* XXX: Use this */
+   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+
+   OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+
+   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);       /* disable indirect state */
+   OUT_BATCH(0);
+
+   ADVANCE_BATCH();
+}
+
+
+#define emit(intel, state, size )		     \
+   intel_batchbuffer_data(intel->batch, state, size)
+
+static GLuint
+get_dirty(struct i915_hw_state *state)
+{
+   GLuint dirty;
+
+   /* Workaround the multitex hang - if one texture unit state is
+    * modified, emit all texture units.
+    */
+   dirty = state->active & ~state->emitted;
+   if (dirty & I915_UPLOAD_TEX_ALL)
+      state->emitted &= ~I915_UPLOAD_TEX_ALL;
+   dirty = state->active & ~state->emitted;
+   return dirty;
+}
+
+
+static GLuint
+get_state_size(struct i915_hw_state *state)
+{
+   GLuint dirty = get_dirty(state);
+   GLuint i;
+   GLuint sz = 0;
+
+   if (dirty & I915_UPLOAD_INVARIENT)
+      sz += 30 * 4;
+
+   if (dirty & I915_UPLOAD_RASTER_RULES)
+      sz += sizeof(state->RasterRules);
+
+   if (dirty & I915_UPLOAD_CTX)
+      sz += sizeof(state->Ctx);
+
+   if (dirty & I915_UPLOAD_BUFFERS)
+      sz += sizeof(state->Buffer);
+
+   if (dirty & I915_UPLOAD_STIPPLE)
+      sz += sizeof(state->Stipple);
+
+   if (dirty & I915_UPLOAD_FOG)
+      sz += sizeof(state->Fog);
+
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      int nr = 0;
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i))
+            nr++;
+
+      sz += (2 + nr * 3) * sizeof(GLuint) * 2;
+   }
+
+   if (dirty & I915_UPLOAD_CONSTANTS)
+      sz += state->ConstantSize * sizeof(GLuint);
+
+   if (dirty & I915_UPLOAD_PROGRAM)
+      sz += state->ProgramSize * sizeof(GLuint);
+
+   return sz;
+}
+
+/* Push the state into the sarea and/or texture memory.
+ */
+static void
+i915_emit_state(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   struct i915_hw_state *state = &i915->state;
+   int i, count, aper_count;
+   GLuint dirty;
+   drm_intel_bo *aper_array[3 + I915_TEX_UNITS];
+   GET_CURRENT_CONTEXT(ctx);
+   BATCH_LOCALS;
+
+   /* We don't hold the lock at this point, so want to make sure that
+    * there won't be a buffer wrap between the state emits and the primitive
+    * emit header.
+    *
+    * It might be better to talk about explicit places where
+    * scheduling is allowed, rather than assume that it is whenever a
+    * batchbuffer fills up.
+    */
+   intel_batchbuffer_require_space(intel->batch,
+				   get_state_size(state) + INTEL_PRIM_EMIT_SIZE);
+   count = 0;
+ again:
+   aper_count = 0;
+   dirty = get_dirty(state);
+
+   aper_array[aper_count++] = intel->batch->buf;
+   if (dirty & I915_UPLOAD_BUFFERS) {
+      aper_array[aper_count++] = state->draw_region->buffer;
+      if (state->depth_region)
+	 aper_array[aper_count++] = state->depth_region->buffer;
+   }
+
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      for (i = 0; i < I915_TEX_UNITS; i++) {
+	 if (dirty & I915_UPLOAD_TEX(i)) {
+	    if (state->tex_buffer[i]) {
+	       aper_array[aper_count++] = state->tex_buffer[i];
+	    }
+	 }
+      }
+   }
+
+   if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) {
+       if (count == 0) {
+	   count++;
+	   intel_batchbuffer_flush(intel->batch);
+	   goto again;
+       } else {
+	   _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state");
+	   assert(0);
+       }
+   }
+
+   /* work out list of buffers to emit */
+   
+   /* Do this here as we may have flushed the batchbuffer above,
+    * causing more state to be dirty!
+    */
+   dirty = get_dirty(state);
+   state->emitted |= dirty;
+   assert(get_dirty(state) == 0);
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);
+
+   if (dirty & I915_UPLOAD_INVARIENT) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+      i915_emit_invarient_state(intel);
+   }
+
+   if (dirty & I915_UPLOAD_RASTER_RULES) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_RASTER_RULES:\n");
+      emit(intel, state->RasterRules, sizeof(state->RasterRules));
+   }
+
+   if (dirty & I915_UPLOAD_CTX) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_CTX:\n");
+
+      emit(intel, state->Ctx, sizeof(state->Ctx));
+   }
+
+   if (dirty & I915_UPLOAD_BUFFERS) {
+      GLuint count;
+
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
+
+      count = 14;
+      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
+         count++;
+      if (state->depth_region)
+         count += 3;
+
+      BEGIN_BATCH(count);
+      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
+      OUT_RELOC(state->draw_region->buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+
+      if (state->depth_region) {
+         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
+         OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
+         OUT_RELOC(state->depth_region->buffer,
+		   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+      }
+
+      OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
+
+      if (state->Buffer[I915_DESTREG_DRAWRECT0] != MI_NOOP)
+         OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT0]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT1]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT2]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT3]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT4]);
+      OUT_BATCH(state->Buffer[I915_DESTREG_DRAWRECT5]);
+
+      ADVANCE_BATCH();
+   }
+
+   if (dirty & I915_UPLOAD_STIPPLE) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
+      emit(intel, state->Stipple, sizeof(state->Stipple));
+   }
+
+   if (dirty & I915_UPLOAD_FOG) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_FOG:\n");
+      emit(intel, state->Fog, sizeof(state->Fog));
+   }
+
+   /* Combine all the dirty texture state into a single command to
+    * avoid lockups on I915 hardware. 
+    */
+   if (dirty & I915_UPLOAD_TEX_ALL) {
+      int nr = 0;
+
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i))
+            nr++;
+
+      BEGIN_BATCH(2 + nr * 3);
+      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
+      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i)) {
+
+            if (state->tex_buffer[i]) {
+               OUT_RELOC(state->tex_buffer[i],
+			 I915_GEM_DOMAIN_SAMPLER, 0,
+                         state->tex_offset[i]);
+            }
+            else {
+               OUT_BATCH(state->tex_offset[i]);
+            }
+
+            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
+         }
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2 + nr * 3);
+      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
+      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      for (i = 0; i < I915_TEX_UNITS; i++)
+         if (dirty & I915_UPLOAD_TEX(i)) {
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
+            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
+         }
+      ADVANCE_BATCH();
+   }
+
+   if (dirty & I915_UPLOAD_CONSTANTS) {
+      if (INTEL_DEBUG & DEBUG_STATE)
+         fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
+      emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
+   }
+
+   if (dirty & I915_UPLOAD_PROGRAM) {
+      if (state->ProgramSize) {
+         if (INTEL_DEBUG & DEBUG_STATE)
+            fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
+
+         assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize);
+
+         emit(intel, state->Program, state->ProgramSize * sizeof(GLuint));
+         if (INTEL_DEBUG & DEBUG_STATE)
+            i915_disassemble_program(state->Program, state->ProgramSize);
+      }
+   }
+
+   intel->batch->dirty_state &= ~dirty;
+   assert(get_dirty(state) == 0);
+   assert((intel->batch->dirty_state & (1<<1)) == 0);
+}
+
+static void
+i915_destroy_context(struct intel_context *intel)
+{
+   GLuint i;
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
+   intel_region_release(&i915->state.draw_region);
+   intel_region_release(&i915->state.depth_region);
+
+   for (i = 0; i < I915_TEX_UNITS; i++) {
+      if (i915->state.tex_buffer[i] != NULL) {
+	 drm_intel_bo_unreference(i915->state.tex_buffer[i]);
+	 i915->state.tex_buffer[i] = NULL;
+      }
+   }
+
+   _tnl_free_vertices(&intel->ctx);
+}
+
+void
+i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
+			     uint32_t buffer_id)
+{
+   state[0] = _3DSTATE_BUF_INFO_CMD;
+   state[1] = buffer_id;
+
+   if (region != NULL) {
+      state[1] |= BUF_3D_PITCH(region->pitch * region->cpp);
+
+      if (region->tiling != I915_TILING_NONE) {
+	 state[1] |= BUF_3D_TILED_SURFACE;
+	 if (region->tiling == I915_TILING_Y)
+	    state[1] |= BUF_3D_TILE_WALK_Y;
+      }
+   }
+}
+
+static void
+i915_set_draw_region(struct intel_context *intel,
+                     struct intel_region *color_regions[],
+                     struct intel_region *depth_region,
+		     GLuint num_regions)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   GLcontext *ctx = &intel->ctx;
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   GLuint value;
+   struct i915_hw_state *state = &i915->state;
+   uint32_t draw_x, draw_y, draw_offset;
+
+   if (state->draw_region != color_regions[0]) {
+      intel_region_release(&state->draw_region);
+      intel_region_reference(&state->draw_region, color_regions[0]);
+   }
+   if (state->depth_region != depth_region) {
+      intel_region_release(&state->depth_region);
+      intel_region_reference(&state->depth_region, depth_region);
+   }
+
+   /*
+    * Set stride/cpp values
+    */
+   i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0],
+				color_regions[0], BUF_3D_ID_COLOR_BACK);
+
+   i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0],
+				depth_region, BUF_3D_ID_DEPTH);
+
+   /*
+    * Compute/set I915_DESTREG_DV1 value
+    */
+   value = (DSTORG_HORT_BIAS(0x8) |     /* .5 */
+            DSTORG_VERT_BIAS(0x8) |     /* .5 */
+            LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL);
+   if (irb != NULL) {
+      switch (irb->Base.Format) {
+      case MESA_FORMAT_ARGB8888:
+      case MESA_FORMAT_XRGB8888:
+	 value |= DV_PF_8888;
+	 break;
+      case MESA_FORMAT_RGB565:
+	 value |= DV_PF_565 | DITHER_FULL_ALWAYS;
+	 break;
+      case MESA_FORMAT_ARGB1555:
+	 value |= DV_PF_1555 | DITHER_FULL_ALWAYS;
+	 break;
+      case MESA_FORMAT_ARGB4444:
+	 value |= DV_PF_4444 | DITHER_FULL_ALWAYS;
+	 break;
+      default:
+	 _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
+		       irb->Base.Format);
+      }
+   }
+
+   /* This isn't quite safe, thus being hidden behind an option.  When changing
+    * the value of this bit, the pipeline needs to be MI_FLUSHed.  And it
+    * can only be set when a depth buffer is already defined.
+    */
+   if (intel->is_945 && intel->use_early_z &&
+       depth_region->tiling != I915_TILING_NONE)
+      value |= CLASSIC_EARLY_DEPTH;
+
+   if (depth_region && depth_region->cpp == 4) {
+      value |= DEPTH_FRMT_24_FIXED_8_OTHER;
+   }
+   else {
+      value |= DEPTH_FRMT_16_FIXED;
+   }
+   state->Buffer[I915_DESTREG_DV1] = value;
+
+   /* We set up the drawing rectangle to be offset into the color
+    * region's location in the miptree.  If it doesn't match with
+    * depth's offsets, we can't render to it.
+    *
+    * (Well, not actually true -- the hw grew a bit to let depth's
+    * offset get forced to 0,0.  We may want to use that if people are
+    * hitting that case.  Also, some configurations may be supportable
+    * by tweaking the start offset of the buffers around, which we
+    * can't do in general due to tiling)
+    */
+   FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET,
+	    (depth_region && color_regions[0]) &&
+	    (depth_region->draw_x != color_regions[0]->draw_x ||
+	     depth_region->draw_y != color_regions[0]->draw_y));
+
+   if (color_regions[0]) {
+      draw_x = color_regions[0]->draw_x;
+      draw_y = color_regions[0]->draw_y;
+   } else if (depth_region) {
+      draw_x = depth_region->draw_x;
+      draw_y = depth_region->draw_y;
+   } else {
+      draw_x = 0;
+      draw_y = 0;
+   }
+
+   draw_offset = (draw_y << 16) | draw_x;
+
+   /* When changing drawing rectangle offset, an MI_FLUSH is first required. */
+   if (draw_offset != i915->last_draw_offset) {
+      FALLBACK(intel, I915_FALLBACK_DRAW_OFFSET,
+               (ctx->DrawBuffer->Width + draw_x > 2048) ||
+               (ctx->DrawBuffer->Height + draw_y > 2048));
+
+      state->Buffer[I915_DESTREG_DRAWRECT0] = MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE;
+      i915->last_draw_offset = draw_offset;
+   } else
+      state->Buffer[I915_DESTREG_DRAWRECT0] = MI_NOOP;
+
+   state->Buffer[I915_DESTREG_DRAWRECT1] = _3DSTATE_DRAWRECT_INFO;
+   state->Buffer[I915_DESTREG_DRAWRECT2] = 0;
+   state->Buffer[I915_DESTREG_DRAWRECT3] = draw_offset;
+   state->Buffer[I915_DESTREG_DRAWRECT4] =
+      ((ctx->DrawBuffer->Width + draw_x - 1) & 0xffff) |
+      ((ctx->DrawBuffer->Height + draw_y - 1) << 16);
+   state->Buffer[I915_DESTREG_DRAWRECT5] = draw_offset;
+
+   I915_STATECHANGE(i915, I915_UPLOAD_BUFFERS);
+}
+
+
+
+static void
+i915_new_batch(struct intel_context *intel)
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+
+   /* Mark all state as needing to be emitted when starting a new batchbuffer.
+    * Using hardware contexts would be an alternative, but they have some
+    * difficulties associated with them (physical address requirements).
+    */
+   i915->state.emitted = 0;
+   i915->last_draw_offset = 0;
+}
+
+static void 
+i915_assert_not_dirty( struct intel_context *intel )
+{
+   struct i915_context *i915 = i915_context(&intel->ctx);
+   GLuint dirty = get_dirty(&i915->state);
+   assert(!dirty);
+}
+
+void
+i915InitVtbl(struct i915_context *i915)
+{
+   i915->intel.vtbl.check_vertex_size = i915_check_vertex_size;
+   i915->intel.vtbl.destroy = i915_destroy_context;
+   i915->intel.vtbl.emit_state = i915_emit_state;
+   i915->intel.vtbl.new_batch = i915_new_batch;
+   i915->intel.vtbl.reduced_primitive_state = i915_reduced_primitive_state;
+   i915->intel.vtbl.render_start = i915_render_start;
+   i915->intel.vtbl.render_prevalidate = i915_render_prevalidate;
+   i915->intel.vtbl.set_draw_region = i915_set_draw_region;
+   i915->intel.vtbl.update_texture_state = i915UpdateTextureState;
+   i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty;
+   i915->intel.vtbl.finish_batch = intel_finish_vb;
+}
diff --git a/src/mesa/drivers/dri/i915/intel_batchbuffer.c b/src/mesa/drivers/dri/i915/intel_batchbuffer.c
new file mode 120000
index 0000000000..d38cdf31cc
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_batchbuffer.c
@@ -0,0 +1 @@
+../intel/intel_batchbuffer.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_blit.c b/src/mesa/drivers/dri/i915/intel_blit.c
new file mode 120000
index 0000000000..dd6c8d17c2
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_blit.c
@@ -0,0 +1 @@
+../intel/intel_blit.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_buffer_objects.c b/src/mesa/drivers/dri/i915/intel_buffer_objects.c
new file mode 120000
index 0000000000..e06dd3c8d3
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_buffer_objects.c
@@ -0,0 +1 @@
+../intel/intel_buffer_objects.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_buffers.c b/src/mesa/drivers/dri/i915/intel_buffers.c
new file mode 120000
index 0000000000..c86daa49f4
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_buffers.c
@@ -0,0 +1 @@
+../intel/intel_buffers.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_clear.c b/src/mesa/drivers/dri/i915/intel_clear.c
new file mode 120000
index 0000000000..9a2a742a0d
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_clear.c
@@ -0,0 +1 @@
+../intel/intel_clear.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_context.c b/src/mesa/drivers/dri/i915/intel_context.c
new file mode 120000
index 0000000000..27a1cbb255
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_context.c
@@ -0,0 +1 @@
+../intel/intel_context.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_decode.c b/src/mesa/drivers/dri/i915/intel_decode.c
new file mode 120000
index 0000000000..f671b6cbb1
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_decode.c
@@ -0,0 +1 @@
+../intel/intel_decode.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c b/src/mesa/drivers/dri/i915/intel_extensions.c
new file mode 120000
index 0000000000..a2f3e8cd20
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_extensions.c
@@ -0,0 +1 @@
+../intel/intel_extensions.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_extensions_es2.c b/src/mesa/drivers/dri/i915/intel_extensions_es2.c
new file mode 120000
index 0000000000..0ec1ceee78
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_extensions_es2.c
@@ -0,0 +1 @@
+../intel/intel_extensions_es2.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c
new file mode 120000
index 0000000000..a19f86dcc5
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_fbo.c
@@ -0,0 +1 @@
+../intel/intel_fbo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_mipmap_tree.c b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
new file mode 120000
index 0000000000..242fed0b6a
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_mipmap_tree.c
@@ -0,0 +1 @@
+../intel/intel_mipmap_tree.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_pixel.c b/src/mesa/drivers/dri/i915/intel_pixel.c
new file mode 120000
index 0000000000..d733c5e874
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_pixel.c
@@ -0,0 +1 @@
+../intel/intel_pixel.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c
new file mode 120000
index 0000000000..9085c7b039
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_pixel_bitmap.c
@@ -0,0 +1 @@
+../intel/intel_pixel_bitmap.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_pixel_copy.c b/src/mesa/drivers/dri/i915/intel_pixel_copy.c
new file mode 120000
index 0000000000..ee43360590
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_pixel_copy.c
@@ -0,0 +1 @@
+../intel/intel_pixel_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_pixel_draw.c b/src/mesa/drivers/dri/i915/intel_pixel_draw.c
new file mode 120000
index 0000000000..8431a24edf
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_pixel_draw.c
@@ -0,0 +1 @@
+../intel/intel_pixel_draw.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c
new file mode 120000
index 0000000000..cc4589f4d4
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c
@@ -0,0 +1 @@
+../intel/intel_pixel_read.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_regions.c b/src/mesa/drivers/dri/i915/intel_regions.c
new file mode 120000
index 0000000000..89b2f15c10
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_regions.c
@@ -0,0 +1 @@
+../intel/intel_regions.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
new file mode 100644
index 0000000000..ec209391ab
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -0,0 +1,280 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware acceleration where possible.
+ *
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "tnl/t_pipeline.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_tris.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      0      /* Has it, but can't use because subpixel has to
+                                 * be adjusted for points on the INTEL/I845G
+                                 */
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0      /* has it, template can't use it yet */
+#define HAVE_TRI_FANS    1
+#define HAVE_POLYGONS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+
+#define HAVE_ELTS        0
+
+static uint32_t hw_prim[GL_POLYGON + 1] = {
+   0,
+   PRIM3D_LINELIST,
+   PRIM3D_LINESTRIP,
+   PRIM3D_LINESTRIP,
+   PRIM3D_TRILIST,
+   PRIM3D_TRISTRIP,
+   PRIM3D_TRIFAN,
+   0,
+   0,
+   PRIM3D_POLY
+};
+
+static const GLenum reduced_prim[GL_POLYGON + 1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+static const int scale_prim[GL_POLYGON + 1] = {
+   0,                           /* fallback case */
+   1,
+   2,
+   2,
+   1,
+   3,
+   3,
+   0,                           /* fallback case */
+   0,                           /* fallback case */
+   3
+};
+
+
+static void
+intelDmaPrimitive(struct intel_context *intel, GLenum prim)
+{
+   if (0)
+      fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
+   INTEL_FIREVERTICES(intel);
+   intel->vtbl.reduced_primitive_state(intel, reduced_prim[prim]);
+   intel_set_prim(intel, hw_prim[prim]);
+}
+
+static INLINE GLuint intel_get_vb_max(struct intel_context *intel)
+{
+   GLuint ret;
+
+   if (intel->intelScreen->no_vbo)
+      ret = intel->batch->size - 1500;
+   else
+      ret = INTEL_VB_SIZE;
+   ret /= (intel->vertex_size * 4);
+   return ret;
+}
+
+static INLINE GLuint intel_get_current_max(struct intel_context *intel)
+{
+
+   if (intel->intelScreen->no_vbo)
+      return intel_get_vb_max(intel);
+   else
+      return (INTEL_VB_SIZE - intel->prim.current_offset) / (intel->vertex_size * 4);
+}
+
+#define LOCAL_VARS struct intel_context *intel = intel_context(ctx)
+#define INIT( prim ) 				\
+do {						\
+   intelDmaPrimitive( intel, prim );		\
+} while (0)
+
+#define FLUSH() INTEL_FIREVERTICES(intel)
+
+#define GET_SUBSEQUENT_VB_MAX_VERTS() intel_get_vb_max(intel)
+#define GET_CURRENT_VB_MAX_VERTS() intel_get_current_max(intel)
+
+#define ALLOC_VERTS(nr) intel_get_prim_space(intel, nr)
+
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
+
+#define TAG(x) intel_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+/* Heuristic to choose between the two render paths:  
+ */
+static GLboolean
+choose_render(struct intel_context *intel, struct vertex_buffer *VB)
+{
+   int vertsz = intel->vertex_size;
+   int cost_render = 0;
+   int cost_fallback = 0;
+   int nr_prims = 0;
+   int nr_rprims = 0;
+   int nr_rverts = 0;
+   int rprim = intel->reduced_primitive;
+   int i = 0;
+
+   for (i = 0; i < VB->PrimitiveCount; i++) {
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+         continue;
+
+      nr_prims++;
+      nr_rverts += length * scale_prim[prim & PRIM_MODE_MASK];
+
+      if (reduced_prim[prim & PRIM_MODE_MASK] != rprim) {
+         nr_rprims++;
+         rprim = reduced_prim[prim & PRIM_MODE_MASK];
+      }
+   }
+
+   /* One point for each generated primitive:
+    */
+   cost_render = nr_prims;
+   cost_fallback = nr_rprims;
+
+   /* One point for every 1024 dwords (4k) of dma:
+    */
+   cost_render += (vertsz * i) / 1024;
+   cost_fallback += (vertsz * nr_rverts) / 1024;
+
+   if (0)
+      fprintf(stderr, "cost render: %d fallback: %d\n",
+              cost_render, cost_fallback);
+
+   if (cost_render > cost_fallback)
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+intel_run_render(GLcontext * ctx, struct tnl_pipeline_stage *stage)
+{
+   struct intel_context *intel = intel_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+
+   intel->vtbl.render_prevalidate( intel );
+
+   /* Don't handle clipping or indexed vertices.
+    */
+   if (intel->RenderIndex != 0 ||
+       !intel_validate_render(ctx, VB) || !choose_render(intel, VB)) {
+      return GL_TRUE;
+   }
+
+   tnl->clipspace.new_inputs |= VERT_BIT_POS;
+
+   tnl->Driver.Render.Start(ctx);
+
+   for (i = 0; i < VB->PrimitiveCount; i++) {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+         continue;
+
+      intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
+                                                     start + length, prim);
+   }
+
+   tnl->Driver.Render.Finish(ctx);
+
+   INTEL_FIREVERTICES(intel);
+
+   return GL_FALSE;             /* finished the pipe */
+}
+
+static const struct tnl_pipeline_stage _intel_render_stage = {
+   "intel render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   intel_run_render             /* run */
+};
+
+const struct tnl_pipeline_stage *intel_pipeline[] = {
+   &_tnl_vertex_transform_stage,
+   &_tnl_vertex_cull_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_tnl_point_attenuation_stage,
+   &_tnl_vertex_program_stage,
+#if 1
+   &_intel_render_stage,        /* ADD: unclipped rastersetup-to-dma */
+#endif
+   &_tnl_render_stage,
+   0,
+};
diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c
new file mode 120000
index 0000000000..f2db48272b
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_screen.c
@@ -0,0 +1 @@
+../intel/intel_screen.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_span.c b/src/mesa/drivers/dri/i915/intel_span.c
new file mode 120000
index 0000000000..05e5e8e583
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_span.c
@@ -0,0 +1 @@
+../intel/intel_span.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_state.c b/src/mesa/drivers/dri/i915/intel_state.c
new file mode 120000
index 0000000000..519672fc35
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_state.c
@@ -0,0 +1 @@
+../intel/intel_state.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_structs.h b/src/mesa/drivers/dri/i915/intel_structs.h
new file mode 100644
index 0000000000..522e3bd92c
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+   GLuint length:8;
+   GLuint pad0:3;
+   GLuint dst_tiled:1;
+   GLuint pad1:8;
+   GLuint write_rgb:1;
+   GLuint write_alpha:1;
+   GLuint opcode:7;
+   GLuint client:3;
+};
+
+   
+struct br13 {
+   GLint dest_pitch:16;
+   GLuint rop:8;
+   GLuint color_depth:2;
+   GLuint pad1:3;
+   GLuint mono_source_transparency:1;
+   GLuint clipping_enable:1;
+   GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+   GLuint color;
+};
+
+struct xy_src_copy_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+
+   struct {
+      GLuint src_x1:16;
+      GLuint src_y1:16;
+   } dw5;
+
+   struct {
+      GLint src_pitch:16;
+      GLuint pad:16;
+   } dw6;
+   
+   GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint clip_x1:16;
+      GLuint clip_y1:16;
+   } dw2;
+
+   struct {
+      GLuint clip_x2:16;
+      GLuint clip_y2:16;
+   } dw3;
+      
+   GLuint dest_base_addr;
+   GLuint background_color;
+   GLuint foreground_color;
+   GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+   struct {
+      GLuint length:8;
+      GLuint pad2:3;
+      GLuint dst_tiled:1;
+      GLuint pad1:4;
+      GLuint byte_packed:1;
+      GLuint pad0:5;
+      GLuint opcode:7;
+      GLuint client:3;
+   } dw0;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw1;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw2;   
+
+   /* Src bitmap data follows as inline dwords.
+    */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/src/mesa/drivers/dri/i915/intel_syncobj.c b/src/mesa/drivers/dri/i915/intel_syncobj.c
new file mode 120000
index 0000000000..0b2e56ab24
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_syncobj.c
@@ -0,0 +1 @@
+../intel/intel_syncobj.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tex.c b/src/mesa/drivers/dri/i915/intel_tex.c
new file mode 120000
index 0000000000..d77ce749a3
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tex.c
@@ -0,0 +1 @@
+../intel/intel_tex.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tex_copy.c b/src/mesa/drivers/dri/i915/intel_tex_copy.c
new file mode 120000
index 0000000000..87196c5d1e
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tex_copy.c
@@ -0,0 +1 @@
+../intel/intel_tex_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tex_format.c b/src/mesa/drivers/dri/i915/intel_tex_format.c
new file mode 120000
index 0000000000..3415f75470
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tex_format.c
@@ -0,0 +1 @@
+../intel/intel_tex_format.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tex_image.c b/src/mesa/drivers/dri/i915/intel_tex_image.c
new file mode 120000
index 0000000000..567abe4974
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tex_image.c
@@ -0,0 +1 @@
+../intel/intel_tex_image.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tex_layout.c b/src/mesa/drivers/dri/i915/intel_tex_layout.c
new file mode 120000
index 0000000000..fe61b44194
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tex_layout.c
@@ -0,0 +1 @@
+../intel/intel_tex_layout.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tex_subimage.c b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
new file mode 120000
index 0000000000..b3a8a3d7ca
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tex_subimage.c
@@ -0,0 +1 @@
+../intel/intel_tex_subimage.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tex_validate.c b/src/mesa/drivers/dri/i915/intel_tex_validate.c
new file mode 120000
index 0000000000..41a75674c2
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tex_validate.c
@@ -0,0 +1 @@
+../intel/intel_tex_validate.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c
new file mode 100644
index 0000000000..ede111b87a
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tris.c
@@ -0,0 +1,1274 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/** @file intel_tris.c
+ *
+ * This file contains functions for managing the vertex buffer and emitting
+ * primitives into it.
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/texobj.h"
+#include "main/state.h"
+#include "main/dd.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_tris.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_reg.h"
+#include "intel_span.h"
+#include "i830_context.h"
+#include "i830_reg.h"
+
+static void intelRenderPrimitive(GLcontext * ctx, GLenum prim);
+static void intelRasterPrimitive(GLcontext * ctx, GLenum rprim,
+                                 GLuint hwprim);
+
+static void
+intel_flush_inline_primitive(struct intel_context *intel)
+{
+   GLuint used = intel->batch->ptr - intel->prim.start_ptr;
+
+   assert(intel->prim.primitive != ~0);
+
+/*    printf("/\n"); */
+
+   if (used < 8)
+      goto do_discard;
+
+   *(int *) intel->prim.start_ptr = (_3DPRIMITIVE |
+                                     intel->prim.primitive | (used / 4 - 2));
+
+   goto finished;
+
+ do_discard:
+   intel->batch->ptr -= used;
+
+ finished:
+   intel->prim.primitive = ~0;
+   intel->prim.start_ptr = 0;
+   intel->prim.flush = 0;
+}
+
+static void intel_start_inline(struct intel_context *intel, uint32_t prim)
+{
+   BATCH_LOCALS;
+
+   intel->vtbl.emit_state(intel);
+
+   intel->no_batch_wrap = GL_TRUE;
+
+   /*printf("%s *", __progname);*/
+
+   /* Emit a slot which will be filled with the inline primitive
+    * command later.
+    */
+   BEGIN_BATCH(1);
+
+   assert((intel->batch->dirty_state & (1<<1)) == 0);
+
+   intel->prim.start_ptr = intel->batch->ptr;
+   intel->prim.primitive = prim;
+   intel->prim.flush = intel_flush_inline_primitive;
+
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel->no_batch_wrap = GL_FALSE;
+/*    printf(">"); */
+}
+
+static void intel_wrap_inline(struct intel_context *intel)
+{
+   GLuint prim = intel->prim.primitive;
+
+   intel_flush_inline_primitive(intel);
+   intel_batchbuffer_flush(intel->batch);
+   intel_start_inline(intel, prim);  /* ??? */
+}
+
+static GLuint *intel_extend_inline(struct intel_context *intel, GLuint dwords)
+{
+   GLuint sz = dwords * sizeof(GLuint);
+   GLuint *ptr;
+
+   assert(intel->prim.flush == intel_flush_inline_primitive);
+
+   if (intel_batchbuffer_space(intel->batch) < sz)
+      intel_wrap_inline(intel);
+
+/*    printf("."); */
+
+   intel->vtbl.assert_not_dirty(intel);
+
+   ptr = (GLuint *) intel->batch->ptr;
+   intel->batch->ptr += sz;
+
+   return ptr;
+}
+
+/** Sets the primitive type for a primitive sequence, flushing as needed. */
+void intel_set_prim(struct intel_context *intel, uint32_t prim)
+{
+   /* if we have no VBOs */
+
+   if (intel->intelScreen->no_vbo) {
+      intel_start_inline(intel, prim);
+      return;
+   }
+   if (prim != intel->prim.primitive) {
+      INTEL_FIREVERTICES(intel);
+      intel->prim.primitive = prim;
+   }
+}
+
+/** Returns mapped VB space for the given number of vertices */
+uint32_t *intel_get_prim_space(struct intel_context *intel, unsigned int count)
+{
+   uint32_t *addr;
+
+   if (intel->intelScreen->no_vbo) {
+      return intel_extend_inline(intel, count * intel->vertex_size);
+   }
+
+   /* Check for space in the existing VB */
+   if (intel->prim.vb_bo == NULL ||
+       (intel->prim.current_offset +
+	count * intel->vertex_size * 4) > INTEL_VB_SIZE ||
+       (intel->prim.count + count) >= (1 << 16)) {
+      /* Flush existing prim if any */
+      INTEL_FIREVERTICES(intel);
+
+      intel_finish_vb(intel);
+
+      /* Start a new VB */
+      if (intel->prim.vb == NULL)
+	 intel->prim.vb = malloc(INTEL_VB_SIZE);
+      intel->prim.vb_bo = drm_intel_bo_alloc(intel->bufmgr, "vb",
+					     INTEL_VB_SIZE, 4);
+      intel->prim.start_offset = 0;
+      intel->prim.current_offset = 0;
+   }
+
+   intel->prim.flush = intel_flush_prim;
+
+   addr = (uint32_t *)(intel->prim.vb + intel->prim.current_offset);
+   intel->prim.current_offset += intel->vertex_size * 4 * count;
+   intel->prim.count += count;
+
+   return addr;
+}
+
+/** Dispatches the accumulated primitive to the batchbuffer. */
+void intel_flush_prim(struct intel_context *intel)
+{
+   drm_intel_bo *aper_array[2];
+   drm_intel_bo *vb_bo;
+   unsigned int offset, count;
+   BATCH_LOCALS;
+
+   /* Must be called after an intel_start_prim. */
+   assert(intel->prim.primitive != ~0);
+
+   if (intel->prim.count == 0)
+      return;
+
+   /* Clear the current prims out of the context state so that a batch flush
+    * flush triggered by emit_state doesn't loop back to flush_prim again.
+    */
+   vb_bo = intel->prim.vb_bo;
+   drm_intel_bo_reference(vb_bo);
+   count = intel->prim.count;
+   intel->prim.count = 0;
+   offset = intel->prim.start_offset;
+   intel->prim.start_offset = intel->prim.current_offset;
+   if (intel->gen < 3)
+      intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128);
+   intel->prim.flush = NULL;
+
+   intel->vtbl.emit_state(intel);
+
+   aper_array[0] = intel->batch->buf;
+   aper_array[1] = vb_bo;
+   if (dri_bufmgr_check_aperture_space(aper_array, 2)) {
+      intel_batchbuffer_flush(intel->batch);
+      intel->vtbl.emit_state(intel);
+   }
+
+   /* Ensure that we don't start a new batch for the following emit, which
+    * depends on the state just emitted. emit_state should be making sure we
+    * have the space for this.
+    */
+   intel->no_batch_wrap = GL_TRUE;
+
+   /* Check that we actually emitted the state into this batch, using the
+    * UPLOAD_CTX bit as the signal.
+    */
+   assert((intel->batch->dirty_state & (1<<1)) == 0);
+
+#if 0
+   printf("emitting %d..%d=%d vertices size %d\n", offset,
+	  intel->prim.current_offset, count,
+	  intel->vertex_size * 4);
+#endif
+
+   if (intel->gen >= 3) {
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+		I1_LOAD_S(0) | I1_LOAD_S(1) | 1);
+      assert((offset & ~S0_VB_OFFSET_MASK) == 0);
+      OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset);
+      OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) |
+		(intel->vertex_size << S1_VERTEX_PITCH_SHIFT));
+
+      OUT_BATCH(_3DPRIMITIVE |
+		PRIM_INDIRECT |
+		PRIM_INDIRECT_SEQUENTIAL |
+		intel->prim.primitive |
+		count);
+      OUT_BATCH(0); /* Beginning vertex index */
+      ADVANCE_BATCH();
+   } else {
+      struct i830_context *i830 = i830_context(&intel->ctx);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+		I1_LOAD_S(0) | I1_LOAD_S(2) | 1);
+      /* S0 */
+      assert((offset & ~S0_VB_OFFSET_MASK_830) == 0);
+      OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0,
+		offset | (intel->vertex_size << S0_VB_PITCH_SHIFT_830) |
+		S0_VB_ENABLE_830);
+      /* S2
+       * This is somewhat unfortunate -- VB width is tied up with
+       * vertex format data that we've already uploaded through
+       * _3DSTATE_VFT[01]_CMD.  We may want to replace emits of VFT state with
+       * STATE_IMMEDIATE_1 like this to avoid duplication.
+       */
+      OUT_BATCH((i830->state.Ctx[I830_CTXREG_VF] & VFT0_TEX_COUNT_MASK) >>
+		VFT0_TEX_COUNT_SHIFT << S2_TEX_COUNT_SHIFT_830 |
+		(i830->state.Ctx[I830_CTXREG_VF2] << 16) |
+		intel->vertex_size << S2_VERTEX_0_WIDTH_SHIFT_830);
+
+      OUT_BATCH(_3DPRIMITIVE |
+		PRIM_INDIRECT |
+		PRIM_INDIRECT_SEQUENTIAL |
+		intel->prim.primitive |
+		count);
+      OUT_BATCH(0); /* Beginning vertex index */
+      ADVANCE_BATCH();
+   }
+
+   intel->no_batch_wrap = GL_FALSE;
+
+   drm_intel_bo_unreference(vb_bo);
+}
+
+/**
+ * Uploads the locally-accumulated VB into the buffer object.
+ *
+ * This avoids us thrashing the cachelines in and out as the buffer gets
+ * filled, dispatched, then reused as the hardware completes rendering from it,
+ * and also lets us clflush less if we dispatch with a partially-filled VB.
+ *
+ * This is called normally from get_space when we're finishing a BO, but also
+ * at batch flush time so that we don't try accessing the contents of a
+ * just-dispatched buffer.
+ */
+void intel_finish_vb(struct intel_context *intel)
+{
+   if (intel->prim.vb_bo == NULL)
+      return;
+
+   drm_intel_bo_subdata(intel->prim.vb_bo, 0, intel->prim.start_offset,
+			intel->prim.vb);
+   drm_intel_bo_unreference(intel->prim.vb_bo);
+   intel->prim.vb_bo = NULL;
+}
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#ifdef __i386__
+#define COPY_DWORDS( j, vb, vertsize, v )			\
+do {								\
+   int __tmp;							\
+   __asm__ __volatile__( "rep ; movsl"				\
+			 : "=%c" (j), "=D" (vb), "=S" (__tmp)	\
+			 : "0" (vertsize),			\
+			 "D" ((long)vb),			\
+			 "S" ((long)v) );			\
+} while (0)
+#else
+#define COPY_DWORDS( j, vb, vertsize, v )	\
+do {						\
+   for ( j = 0 ; j < vertsize ; j++ ) {		\
+      vb[j] = ((GLuint *)v)[j];			\
+   }						\
+   vb += vertsize;				\
+} while (0)
+#endif
+
+static void
+intel_draw_quad(struct intel_context *intel,
+                intelVertexPtr v0,
+                intelVertexPtr v1, intelVertexPtr v2, intelVertexPtr v3)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 6);
+   int j;
+
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+
+   /* If smooth shading, draw like a trifan which gives better
+    * rasterization.  Otherwise draw as two triangles with provoking
+    * vertex in third position as required for flat shading.
+    */
+   if (intel->ctx.Light.ShadeModel == GL_FLAT) {
+      COPY_DWORDS(j, vb, vertsize, v3);
+      COPY_DWORDS(j, vb, vertsize, v1);
+   }
+   else {
+      COPY_DWORDS(j, vb, vertsize, v2);
+      COPY_DWORDS(j, vb, vertsize, v0);
+   }
+
+   COPY_DWORDS(j, vb, vertsize, v2);
+   COPY_DWORDS(j, vb, vertsize, v3);
+}
+
+static void
+intel_draw_triangle(struct intel_context *intel,
+                    intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 3);
+   int j;
+
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+   COPY_DWORDS(j, vb, vertsize, v2);
+}
+
+
+static void
+intel_draw_line(struct intel_context *intel,
+                intelVertexPtr v0, intelVertexPtr v1)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 2);
+   int j;
+
+   COPY_DWORDS(j, vb, vertsize, v0);
+   COPY_DWORDS(j, vb, vertsize, v1);
+}
+
+
+static void
+intel_draw_point(struct intel_context *intel, intelVertexPtr v0)
+{
+   GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, 1);
+   int j;
+
+   /* Adjust for sub pixel position -- still required for conform. */
+   *(float *) &vb[0] = v0->v.x;
+   *(float *) &vb[1] = v0->v.y;
+   for (j = 2; j < vertsize; j++)
+      vb[j] = v0->ui[j];
+}
+
+
+
+/***********************************************************************
+ *                Fixup for ARB_point_parameters                       *
+ ***********************************************************************/
+
+/* Currently not working - VERT_ATTRIB_POINTSIZE isn't correctly
+ * represented in the fragment program InputsRead field.
+ */
+static void
+intel_atten_point(struct intel_context *intel, intelVertexPtr v0)
+{
+   GLcontext *ctx = &intel->ctx;
+   GLfloat psz[4], col[4], restore_psz, restore_alpha;
+
+   _tnl_get_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+   _tnl_get_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
+
+   restore_psz = psz[0];
+   restore_alpha = col[3];
+
+   if (psz[0] >= ctx->Point.Threshold) {
+      psz[0] = MIN2(psz[0], ctx->Point.MaxSize);
+   }
+   else {
+      GLfloat dsize = psz[0] / ctx->Point.Threshold;
+      psz[0] = MAX2(ctx->Point.Threshold, ctx->Point.MinSize);
+      col[3] *= dsize * dsize;
+   }
+
+   if (psz[0] < 1.0)
+      psz[0] = 1.0;
+
+   if (restore_psz != psz[0] || restore_alpha != col[3]) {
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
+
+      intel_draw_point(intel, v0);
+
+      psz[0] = restore_psz;
+      col[3] = restore_alpha;
+
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_POINTSIZE, psz);
+      _tnl_set_attr(ctx, v0, _TNL_ATTRIB_COLOR0, col);
+   }
+   else
+      intel_draw_point(intel, v0);
+}
+
+
+
+
+
+/***********************************************************************
+ *                Fixup for I915 WPOS texture coordinate                *
+ ***********************************************************************/
+
+
+
+static void
+intel_wpos_triangle(struct intel_context *intel,
+                    intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2)
+{
+   GLuint offset = intel->wpos_offset;
+   GLuint size = intel->wpos_size;
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
+   GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset);
+   GLfloat *v2_wpos = (GLfloat *)((char *)v2 + offset);
+
+   __memcpy(v0_wpos, v0, size);
+   __memcpy(v1_wpos, v1, size);
+   __memcpy(v2_wpos, v2, size);
+
+   v0_wpos[1] = -v0_wpos[1] + intel->ctx.DrawBuffer->Height;
+   v1_wpos[1] = -v1_wpos[1] + intel->ctx.DrawBuffer->Height;
+   v2_wpos[1] = -v2_wpos[1] + intel->ctx.DrawBuffer->Height;
+
+
+   intel_draw_triangle(intel, v0, v1, v2);
+}
+
+
+static void
+intel_wpos_line(struct intel_context *intel,
+                intelVertexPtr v0, intelVertexPtr v1)
+{
+   GLuint offset = intel->wpos_offset;
+   GLuint size = intel->wpos_size;
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
+   GLfloat *v1_wpos = (GLfloat *)((char *)v1 + offset);
+
+   __memcpy(v0_wpos, v0, size);
+   __memcpy(v1_wpos, v1, size);
+
+   v0_wpos[1] = -v0_wpos[1] + intel->ctx.DrawBuffer->Height;
+   v1_wpos[1] = -v1_wpos[1] + intel->ctx.DrawBuffer->Height;
+
+   intel_draw_line(intel, v0, v1);
+}
+
+
+static void
+intel_wpos_point(struct intel_context *intel, intelVertexPtr v0)
+{
+   GLuint offset = intel->wpos_offset;
+   GLuint size = intel->wpos_size;
+   GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset);
+
+   __memcpy(v0_wpos, v0, size);
+   v0_wpos[1] = -v0_wpos[1] + intel->ctx.DrawBuffer->Height;
+
+   intel_draw_point(intel, v0);
+}
+
+
+
+
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      intel->draw_tri( intel, a, b, c );	\
+   else						\
+      intel_draw_triangle( intel, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do { 						\
+   if (DO_FALLBACK) {				\
+      intel->draw_tri( intel, a, b, d );	\
+      intel->draw_tri( intel, b, c, d );	\
+   } else					\
+      intel_draw_quad( intel, a, b, c, d );	\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      intel->draw_line( intel, v0, v1 );	\
+   else						\
+      intel_draw_line( intel, v0, v1 );		\
+} while (0)
+
+#define POINT( v0 )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      intel->draw_point( intel, v0 );		\
+   else						\
+      intel_draw_point( intel, v0 );		\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define INTEL_OFFSET_BIT 	0x01
+#define INTEL_TWOSIDE_BIT	0x02
+#define INTEL_UNFILLED_BIT	0x04
+#define INTEL_FALLBACK_BIT	0x08
+#define INTEL_MAX_TRIFUNC	0x10
+
+
+static struct
+{
+   tnl_points_func points;
+   tnl_line_func line;
+   tnl_triangle_func triangle;
+   tnl_quad_func quad;
+} rast_tab[INTEL_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & INTEL_FALLBACK_BIT)
+#define DO_OFFSET   (IND & INTEL_OFFSET_BIT)
+#define DO_UNFILLED (IND & INTEL_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & INTEL_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC         1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX            intelVertex
+#define TAB               rast_tab
+
+/* Only used to pull back colors into vertices (ie, we know color is
+ * floating point).
+ */
+#define INTEL_COLOR( dst, src )				\
+do {							\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]);	\
+} while (0)
+
+#define INTEL_SPEC( dst, src )				\
+do {							\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]);	\
+} while (0)
+
+
+#define DEPTH_SCALE intel->polygon_offset_scale
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (intel->verts + (e * intel->vertex_size * sizeof(GLuint)))
+
+#define VERT_SET_RGBA( v, c )    if (coloroffset) INTEL_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    if (coloroffset) color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (specoffset) INTEL_SPEC( v->ub4[specoffset], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#define LOCAL_VARS(n)							\
+   struct intel_context *intel = intel_context(ctx);			\
+   GLuint color[n] = { 0, }, spec[n] = { 0, };				\
+   GLuint coloroffset = intel->coloroffset;				\
+   GLboolean specoffset = intel->specoffset;				\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+static const GLuint hw_prim[GL_POLYGON + 1] = {
+   PRIM3D_POINTLIST,
+   PRIM3D_LINELIST,
+   PRIM3D_LINELIST,
+   PRIM3D_LINELIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST,
+   PRIM3D_TRILIST
+};
+
+#define RASTERIZE(x) intelRasterPrimitive( ctx, x, hw_prim[x] )
+#define RENDER_PRIMITIVE intel->render_primitive
+#define TAG(x) x
+#define IND INTEL_FALLBACK_BIT
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_UNFILLED_BIT|INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (INTEL_TWOSIDE_BIT|INTEL_OFFSET_BIT|INTEL_UNFILLED_BIT| \
+	     INTEL_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void
+init_rast_tab(void)
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+intel_fallback_tri(struct intel_context *intel,
+                   intelVertex * v0, intelVertex * v1, intelVertex * v2)
+{
+   GLcontext *ctx = &intel->ctx;
+   SWvertex v[3];
+
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+
+   INTEL_FIREVERTICES(intel);
+
+   _swsetup_Translate(ctx, v0, &v[0]);
+   _swsetup_Translate(ctx, v1, &v[1]);
+   _swsetup_Translate(ctx, v2, &v[2]);
+   intelSpanRenderStart(ctx);
+   _swrast_Triangle(ctx, &v[0], &v[1], &v[2]);
+   intelSpanRenderFinish(ctx);
+}
+
+
+static void
+intel_fallback_line(struct intel_context *intel,
+                    intelVertex * v0, intelVertex * v1)
+{
+   GLcontext *ctx = &intel->ctx;
+   SWvertex v[2];
+
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+
+   INTEL_FIREVERTICES(intel);
+
+   _swsetup_Translate(ctx, v0, &v[0]);
+   _swsetup_Translate(ctx, v1, &v[1]);
+   intelSpanRenderStart(ctx);
+   _swrast_Line(ctx, &v[0], &v[1]);
+   intelSpanRenderFinish(ctx);
+}
+
+static void
+intel_fallback_point(struct intel_context *intel,
+		     intelVertex * v0)
+{
+   GLcontext *ctx = &intel->ctx;
+   SWvertex v[1];
+
+   if (0)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+
+   INTEL_FIREVERTICES(intel);
+
+   _swsetup_Translate(ctx, v0, &v[0]);
+   intelSpanRenderStart(ctx);
+   _swrast_Point(ctx, &v[0]);
+   intelSpanRenderFinish(ctx);
+}
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define IND 0
+#define V(x) (intelVertex *)(vertptr + ((x)*vertsize*sizeof(GLuint)))
+#define RENDER_POINTS( start, count )	\
+   for ( ; start < count ; start++) POINT( V(ELT(start)) );
+#define RENDER_LINE( v0, v1 )         LINE( V(v0), V(v1) )
+#define RENDER_TRI(  v0, v1, v2 )     TRI(  V(v0), V(v1), V(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) QUAD( V(v0), V(v1), V(v2), V(v3) )
+#define INIT(x) intelRenderPrimitive( ctx, x )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    struct intel_context *intel = intel_context(ctx);			\
+    GLubyte *vertptr = (GLubyte *)intel->verts;			\
+    const GLuint vertsize = intel->vertex_size;       	\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) x
+#define TAG(x) intel_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) intel_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+/**********************************************************************/
+/*                   Render clipped primitives                        */
+/**********************************************************************/
+
+
+
+static void
+intelRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n)
+{
+   struct intel_context *intel = intel_context(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint prim = intel->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *) elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON] (ctx, 0, n,
+                                                  PRIM_BEGIN | PRIM_END);
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify(ctx, prim);
+}
+
+static void
+intelRenderClippedLine(GLcontext * ctx, GLuint ii, GLuint jj)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   tnl->Driver.Render.Line(ctx, ii, jj);
+}
+
+static void
+intelFastRenderClippedPoly(GLcontext * ctx, const GLuint * elts, GLuint n)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLuint vertsize = intel->vertex_size;
+   GLuint *vb = intel_get_prim_space(intel, (n - 2) * 3);
+   GLubyte *vertptr = (GLubyte *) intel->verts;
+   const GLuint *start = (const GLuint *) V(elts[0]);
+   int i, j;
+
+   for (i = 2; i < n; i++) {
+      COPY_DWORDS(j, vb, vertsize, V(elts[i - 1]));
+      COPY_DWORDS(j, vb, vertsize, V(elts[i]));
+      COPY_DWORDS(j, vb, vertsize, start);
+   }
+}
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+
+
+
+#define ANY_FALLBACK_FLAGS (DD_LINE_STIPPLE | DD_TRI_STIPPLE | DD_POINT_ATTEN | DD_POINT_SMOOTH | DD_TRI_SMOOTH)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE | DD_TRI_OFFSET | DD_TRI_UNFILLED)
+
+void
+intelChooseRenderState(GLcontext * ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   const struct gl_fragment_program *fprog = ctx->FragmentProgram._Current;
+   GLboolean have_wpos = (fprog && (fprog->Base.InputsRead & FRAG_BIT_WPOS));
+   GLuint index = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "\n%s\n", __FUNCTION__);
+
+   if ((flags & (ANY_FALLBACK_FLAGS | ANY_RASTER_FLAGS)) || have_wpos) {
+
+      if (flags & ANY_RASTER_FLAGS) {
+         if (flags & DD_TRI_LIGHT_TWOSIDE)
+            index |= INTEL_TWOSIDE_BIT;
+         if (flags & DD_TRI_OFFSET)
+            index |= INTEL_OFFSET_BIT;
+         if (flags & DD_TRI_UNFILLED)
+            index |= INTEL_UNFILLED_BIT;
+      }
+
+      if (have_wpos) {
+         intel->draw_point = intel_wpos_point;
+         intel->draw_line = intel_wpos_line;
+         intel->draw_tri = intel_wpos_triangle;
+
+         /* Make sure these get called:
+          */
+         index |= INTEL_FALLBACK_BIT;
+      }
+      else {
+         intel->draw_point = intel_draw_point;
+         intel->draw_line = intel_draw_line;
+         intel->draw_tri = intel_draw_triangle;
+      }
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & ANY_FALLBACK_FLAGS) {
+         if (flags & DD_LINE_STIPPLE)
+            intel->draw_line = intel_fallback_line;
+
+         if ((flags & DD_TRI_STIPPLE) && !intel->hw_stipple)
+            intel->draw_tri = intel_fallback_tri;
+
+         if (flags & DD_TRI_SMOOTH) {
+	    if (intel->conformance_mode > 0)
+	       intel->draw_tri = intel_fallback_tri;
+	 }
+
+         if (flags & DD_POINT_ATTEN) {
+	    if (0)
+	       intel->draw_point = intel_atten_point;
+	    else
+	       intel->draw_point = intel_fallback_point;
+	 }
+
+	 if (flags & DD_POINT_SMOOTH) {
+	    if (intel->conformance_mode > 0)
+	       intel->draw_point = intel_fallback_point;
+	 }
+
+         index |= INTEL_FALLBACK_BIT;
+      }
+   }
+
+   if (intel->RenderIndex != index) {
+      intel->RenderIndex = index;
+
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+         tnl->Driver.Render.PrimTabVerts = intel_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = intel_render_tab_elts;
+         tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+         tnl->Driver.Render.ClippedPolygon = intelFastRenderClippedPoly;
+      }
+      else {
+         tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+         tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+         tnl->Driver.Render.ClippedLine = intelRenderClippedLine;
+         tnl->Driver.Render.ClippedPolygon = intelRenderClippedPoly;
+      }
+   }
+}
+
+static const GLenum reduced_prim[GL_POLYGON + 1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+
+
+static void
+intelRunPipeline(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   _mesa_lock_context_textures(ctx);
+   
+   if (ctx->NewState)
+      _mesa_update_state_locked(ctx);
+
+   if (intel->NewGLState) {
+      if (intel->NewGLState & _NEW_TEXTURE) {
+         intel->vtbl.update_texture_state(intel);
+      }
+
+      if (!intel->Fallback) {
+         if (intel->NewGLState & _INTEL_NEW_RENDERSTATE)
+            intelChooseRenderState(ctx);
+      }
+
+      intel->NewGLState = 0;
+   }
+
+   intel_map_vertex_shader_textures(ctx);
+   _tnl_run_pipeline(ctx);
+   intel_unmap_vertex_shader_textures(ctx);
+
+   _mesa_unlock_context_textures(ctx);
+}
+
+static void
+intelRenderStart(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   intel_check_front_buffer_rendering(intel);
+   intel->vtbl.render_start(intel_context(ctx));
+   intel->vtbl.emit_state(intel);
+}
+
+static void
+intelRenderFinish(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   if (intel->RenderIndex & INTEL_FALLBACK_BIT)
+      _swrast_flush(ctx);
+
+   INTEL_FIREVERTICES(intel);
+}
+
+
+
+
+ /* System to flush dma and emit state changes based on the rasterized
+  * primitive.
+  */
+static void
+intelRasterPrimitive(GLcontext * ctx, GLenum rprim, GLuint hwprim)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   if (0)
+      fprintf(stderr, "%s %s %x\n", __FUNCTION__,
+              _mesa_lookup_enum_by_nr(rprim), hwprim);
+
+   intel->vtbl.reduced_primitive_state(intel, rprim);
+
+   /* Start a new primitive.  Arrange to have it flushed later on.
+    */
+   if (hwprim != intel->prim.primitive) {
+      INTEL_FIREVERTICES(intel);
+
+      intel_set_prim(intel, hwprim);
+   }
+}
+
+
+ /* 
+  */
+static void
+intelRenderPrimitive(GLcontext * ctx, GLenum prim)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   if (0)
+      fprintf(stderr, "%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(prim));
+
+   /* Let some clipping routines know which primitive they're dealing
+    * with.
+    */
+   intel->render_primitive = prim;
+
+   /* Shortcircuit this when called from t_dd_rendertmp.h for unfilled
+    * triangles.  The rasterized primitive will always be reset by
+    * lower level functions in that case, potentially pingponging the
+    * state:
+    */
+   if (reduced_prim[prim] == GL_TRIANGLES &&
+       (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+
+   /* Set some primitive-dependent state and Start? a new primitive.
+    */
+   intelRasterPrimitive(ctx, reduced_prim[prim], hw_prim[prim]);
+}
+
+
+ /**********************************************************************/
+ /*           Transition to/from hardware rasterization.               */
+ /**********************************************************************/
+
+static char *fallbackStrings[] = {
+   [0] = "Draw buffer",
+   [1] = "Read buffer",
+   [2] = "Depth buffer",
+   [3] = "Stencil buffer",
+   [4] = "User disable",
+   [5] = "Render mode",
+
+   [12] = "Texture",
+   [13] = "Color mask",
+   [14] = "Stencil",
+   [15] = "Stipple",
+   [16] = "Program",
+   [17] = "Logic op",
+   [18] = "Smooth polygon",
+   [19] = "Smooth point",
+   [20] = "point sprite coord origin",
+   [21] = "depth/color drawing offset",
+};
+
+
+static char *
+getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+
+/**
+ * Enable/disable a fallback flag.
+ * \param bit  one of INTEL_FALLBACK_x flags.
+ */
+void
+intelFallback(struct intel_context *intel, GLbitfield bit, GLboolean mode)
+{
+   GLcontext *ctx = &intel->ctx;
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   const GLbitfield oldfallback = intel->Fallback;
+
+   if (mode) {
+      intel->Fallback |= bit;
+      if (oldfallback == 0) {
+         intel_flush(ctx);
+         if (INTEL_DEBUG & DEBUG_FALLBACKS)
+            fprintf(stderr, "ENTER FALLBACK %x: %s\n",
+                    bit, getFallbackString(bit));
+         _swsetup_Wakeup(ctx);
+         intel->RenderIndex = ~0;
+      }
+   }
+   else {
+      intel->Fallback &= ~bit;
+      if (oldfallback == bit) {
+         _swrast_flush(ctx);
+         if (INTEL_DEBUG & DEBUG_FALLBACKS)
+            fprintf(stderr, "LEAVE FALLBACK %s\n", getFallbackString(bit));
+         tnl->Driver.Render.Start = intelRenderStart;
+         tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive;
+         tnl->Driver.Render.Finish = intelRenderFinish;
+         tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+         tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+         tnl->Driver.Render.Interp = _tnl_interp;
+
+         _tnl_invalidate_vertex_state(ctx, ~0);
+         _tnl_invalidate_vertices(ctx, ~0);
+         _tnl_install_attrs(ctx,
+                            intel->vertex_attrs,
+                            intel->vertex_attr_count,
+                            intel->ViewportMatrix.m, 0);
+
+         intel->NewGLState |= _INTEL_NEW_RENDERSTATE;
+      }
+   }
+}
+
+union fi
+{
+   GLfloat f;
+   GLint i;
+};
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+
+void
+intelInitTriFuncs(GLcontext * ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = intelRunPipeline;
+   tnl->Driver.Render.Start = intelRenderStart;
+   tnl->Driver.Render.Finish = intelRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = intelRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+}
diff --git a/src/mesa/drivers/dri/i915/intel_tris.h b/src/mesa/drivers/dri/i915/intel_tris.h
new file mode 100644
index 0000000000..55b60a47f9
--- /dev/null
+++ b/src/mesa/drivers/dri/i915/intel_tris.h
@@ -0,0 +1,54 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELTRIS_INC
+#define INTELTRIS_INC
+
+#include "main/mtypes.h"
+
+#define INTEL_VB_SIZE		(32 * 1024)
+/** 3 dwords of state_immediate and 2 of 3dprim, in intel_flush_prim */
+#define INTEL_PRIM_EMIT_SIZE	(5 * 4)
+
+#define _INTEL_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE |		\
+			       _DD_NEW_TRI_UNFILLED |		\
+			       _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			       _DD_NEW_TRI_OFFSET |		\
+			       _DD_NEW_TRI_STIPPLE |		\
+			       _NEW_PROGRAM |		\
+			       _NEW_POLYGONSTIPPLE)
+
+extern void intelInitTriFuncs(GLcontext * ctx);
+
+extern void intelChooseRenderState(GLcontext * ctx);
+
+void intel_set_prim(struct intel_context *intel, uint32_t prim);
+GLuint *intel_get_prim_space(struct intel_context *intel, unsigned int count);
+void intel_flush_prim(struct intel_context *intel);
+void intel_finish_vb(struct intel_context *intel);
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
new file mode 100644
index 0000000000..831981558d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -0,0 +1,117 @@
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965_dri.so
+
+DRIVER_SOURCES = \
+	intel_batchbuffer.c \
+	intel_blit.c \
+	intel_buffer_objects.c \
+	intel_buffers.c \
+	intel_clear.c \
+	intel_context.c \
+	intel_decode.c \
+	intel_extensions.c \
+	intel_extensions_es2.c \
+	intel_fbo.c \
+	intel_mipmap_tree.c \
+	intel_regions.c \
+	intel_screen.c \
+	intel_span.c \
+	intel_pixel.c \
+	intel_pixel_bitmap.c \
+	intel_pixel_copy.c \
+	intel_pixel_draw.c \
+	intel_pixel_read.c \
+	intel_state.c \
+	intel_syncobj.c \
+	intel_tex.c \
+	intel_tex_copy.c \
+	intel_tex_format.c \
+	intel_tex_image.c \
+	intel_tex_layout.c \
+	intel_tex_subimage.c \
+	intel_tex_validate.c \
+	brw_cc.c \
+	brw_clip.c \
+	brw_clip_line.c \
+	brw_clip_point.c \
+	brw_clip_state.c \
+	brw_clip_tri.c \
+	brw_clip_unfilled.c \
+	brw_clip_util.c \
+	brw_context.c \
+	brw_curbe.c \
+	brw_disasm.c \
+	brw_draw.c \
+	brw_draw_upload.c \
+	brw_eu.c \
+	brw_eu_debug.c \
+	brw_eu_emit.c \
+	brw_eu_util.c \
+	brw_fallback.c \
+	brw_gs.c \
+	brw_gs_emit.c \
+	brw_gs_state.c \
+	brw_misc_state.c \
+	brw_optimize.c \
+	brw_program.c \
+	brw_queryobj.c \
+	brw_sf.c \
+	brw_sf_emit.c \
+	brw_sf_state.c \
+	brw_state.c \
+	brw_state_batch.c \
+	brw_state_cache.c \
+	brw_state_dump.c \
+	brw_state_upload.c \
+	brw_tex.c \
+	brw_tex_layout.c \
+	brw_urb.c \
+	brw_util.c \
+	brw_vs.c \
+	brw_vs_constval.c \
+	brw_vs_emit.c \
+	brw_vs_state.c \
+	brw_vs_surface_state.c \
+	brw_vtbl.c \
+	brw_wm.c \
+	brw_wm_debug.c \
+	brw_wm_emit.c \
+	brw_wm_fp.c \
+	brw_wm_iz.c \
+	brw_wm_glsl.c \
+	brw_wm_pass0.c \
+	brw_wm_pass1.c \
+	brw_wm_pass2.c \
+	brw_wm_sampler_state.c \
+	brw_wm_state.c \
+	brw_wm_surface_state.c \
+	gen6_cc.c \
+	gen6_clip_state.c \
+	gen6_depthstencil.c \
+	gen6_gs_state.c \
+	gen6_sampler_state.c \
+	gen6_scissor_state.c \
+	gen6_sf_state.c \
+	gen6_urb.c \
+	gen6_viewport_state.c \
+	gen6_vs_state.c \
+	gen6_wm_state.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+DRIVER_DEFINES = -I../intel -I../intel/server
+
+INCLUDES += $(INTEL_CFLAGS)
+DRI_LIB_DEPS += $(INTEL_LIBS)
+
+include ../Makefile.template
+
+intel_decode.o: ../intel/intel_decode.c
+intel_tex_layout.o: ../intel/intel_tex_layout.c
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c
new file mode 100644
index 0000000000..cfce5d3140
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_cc.c
@@ -0,0 +1,233 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+
+void
+brw_update_cc_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_cc_viewport ccv;
+
+   memset(&ccv, 0, sizeof(ccv));
+
+   /* _NEW_TRANSOFORM */
+   if (ctx->Transform.DepthClamp) {
+      /* _NEW_VIEWPORT */
+      ccv.min_depth = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+      ccv.max_depth = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+   } else {
+      ccv.min_depth = 0.0;
+      ccv.max_depth = 1.0;
+   }
+
+   drm_intel_bo_unreference(brw->cc.vp_bo);
+   brw->cc.vp_bo = brw_cache_data(&brw->cache, BRW_CC_VP, &ccv, sizeof(ccv));
+}
+
+/**
+ * Modify blend function to force destination alpha to 1.0
+ *
+ * If \c function specifies a blend function that uses destination alpha,
+ * replace it with a function that hard-wires destination alpha to 1.0.  This
+ * is used when rendering to xRGB targets.
+ */
+static GLenum
+fix_xRGB_alpha(GLenum function)
+{
+   switch (function) {
+   case GL_DST_ALPHA:
+      return GL_ONE;
+
+   case GL_ONE_MINUS_DST_ALPHA:
+   case GL_SRC_ALPHA_SATURATE:
+      return GL_ZERO;
+   }
+
+   return function;
+}
+
+static void prepare_cc_unit(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->cc.vp_bo);
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static void upload_cc_unit(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_cc_unit_state cc;
+   void *map;
+
+   memset(&cc, 0, sizeof(cc));
+
+   /* _NEW_STENCIL */
+   if (ctx->Stencil._Enabled) {
+      const unsigned back = ctx->Stencil._BackFace;
+
+      cc.cc0.stencil_enable = 1;
+      cc.cc0.stencil_func =
+	 intel_translate_compare_func(ctx->Stencil.Function[0]);
+      cc.cc0.stencil_fail_op =
+	 intel_translate_stencil_op(ctx->Stencil.FailFunc[0]);
+      cc.cc0.stencil_pass_depth_fail_op =
+	 intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]);
+      cc.cc0.stencil_pass_depth_pass_op =
+	 intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]);
+      cc.cc1.stencil_ref = ctx->Stencil.Ref[0];
+      cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0];
+      cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0];
+
+      if (ctx->Stencil._TestTwoSide) {
+	 cc.cc0.bf_stencil_enable = 1;
+	 cc.cc0.bf_stencil_func =
+	    intel_translate_compare_func(ctx->Stencil.Function[back]);
+	 cc.cc0.bf_stencil_fail_op =
+	    intel_translate_stencil_op(ctx->Stencil.FailFunc[back]);
+	 cc.cc0.bf_stencil_pass_depth_fail_op =
+	    intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]);
+	 cc.cc0.bf_stencil_pass_depth_pass_op =
+	    intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]);
+	 cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back];
+	 cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back];
+	 cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back];
+      }
+
+      /* Not really sure about this:
+       */
+      if (ctx->Stencil.WriteMask[0] ||
+	  (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back]))
+	 cc.cc0.stencil_write_enable = 1;
+   }
+
+   /* _NEW_COLOR */
+   if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) {
+      cc.cc2.logicop_enable = 1;
+      cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp);
+   } else if (ctx->Color.BlendEnabled) {
+      GLenum eqRGB = ctx->Color.BlendEquationRGB;
+      GLenum eqA = ctx->Color.BlendEquationA;
+      GLenum srcRGB = ctx->Color.BlendSrcRGB;
+      GLenum dstRGB = ctx->Color.BlendDstRGB;
+      GLenum srcA = ctx->Color.BlendSrcA;
+      GLenum dstA = ctx->Color.BlendDstA;
+
+      /* If the renderbuffer is XRGB, we have to frob the blend function to
+       * force the destination alpha to 1.0.  This means replacing GL_DST_ALPHA
+       * with GL_ONE and GL_ONE_MINUS_DST_ALPHA with GL_ZERO.
+       */
+      if (ctx->DrawBuffer->Visual.alphaBits == 0) {
+	 srcRGB = fix_xRGB_alpha(srcRGB);
+	 srcA   = fix_xRGB_alpha(srcA);
+	 dstRGB = fix_xRGB_alpha(dstRGB);
+	 dstA   = fix_xRGB_alpha(dstA);
+      }
+
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+	 srcRGB = dstRGB = GL_ONE;
+      }
+
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+	 srcA = dstA = GL_ONE;
+      }
+
+      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
+
+      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
+
+      cc.cc3.blend_enable = 1;
+      cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+				dstA != dstRGB ||
+				eqA != eqRGB);
+   }
+
+   if (ctx->Color.AlphaEnabled) {
+      cc.cc3.alpha_test = 1;
+      cc.cc3.alpha_test_func =
+	 intel_translate_compare_func(ctx->Color.AlphaFunc);
+      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+
+      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef);
+   }
+
+   if (ctx->Color.DitherFlag) {
+      cc.cc5.dither_enable = 1;
+      cc.cc6.y_dither_offset = 0;
+      cc.cc6.x_dither_offset = 0;
+   }
+
+   /* _NEW_DEPTH */
+   if (ctx->Depth.Test) {
+      cc.cc2.depth_test = 1;
+      cc.cc2.depth_test_function =
+	 intel_translate_compare_func(ctx->Depth.Func);
+      cc.cc2.depth_write_enable = ctx->Depth.Mask;
+   }
+
+   /* CACHE_NEW_CC_VP */
+   cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      cc.cc5.statistics_enable = 1;
+
+   map = brw_state_batch(brw, sizeof(cc), 64,
+			 &brw->cc.state_bo, &brw->cc.state_offset);
+   memcpy(map, &cc, sizeof(cc));
+   brw->state.dirty.cache |= CACHE_NEW_CC_UNIT;
+
+   /* Emit CC viewport relocation */
+   drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset +
+					      offsetof(struct brw_cc_unit_state,
+						       cc4)),
+			   brw->cc.vp_bo, 0,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0);
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+   .dirty = {
+      .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_CC_VP
+   },
+   .prepare = prepare_cc_unit,
+   .emit = upload_cc_unit,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
new file mode 100644
index 0000000000..228ee3f3be
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -0,0 +1,283 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_clip.h"
+
+#define FRONT_UNFILLED_BIT  0x1
+#define BACK_UNFILLED_BIT   0x2
+
+
+static void compile_clip_prog( struct brw_context *brw,
+			     struct brw_clip_prog_key *key )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_clip_compile c;
+   const GLuint *program;
+   GLuint program_size;
+   GLuint delta;
+   GLuint i;
+
+   memset(&c, 0, sizeof(c));
+   
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.func.single_program_flow = 1;
+
+   c.key = *key;
+
+   /* Need to locate the two positions present in vertex + header.
+    * These are currently hardcoded:
+    */
+   c.header_position_offset = ATTR_SIZE;
+
+   if (intel->gen == 5)
+       delta = 3 * REG_SIZE;
+   else
+       delta = REG_SIZE;
+
+   for (i = 0; i < VERT_RESULT_MAX; i++)
+      if (c.key.attrs & BITFIELD64_BIT(i)) {
+	 c.offset[i] = delta;
+	 delta += ATTR_SIZE;
+      }
+
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+   
+   if (intel->gen == 5)
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
+   else
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+
+   c.nr_bytes = c.nr_regs * REG_SIZE;
+
+   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.  
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+   /* Would ideally have the option of producing a program which could
+    * do all three:
+    */
+   switch (key->primitive) {
+   case GL_TRIANGLES: 
+      if (key->do_unfilled)
+	 brw_emit_unfilled_clip( &c );
+      else
+	 brw_emit_tri_clip( &c );
+      break;
+   case GL_LINES:
+      brw_emit_line_clip( &c );
+      break;
+   case GL_POINTS:
+      brw_emit_point_clip( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+	 
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+    if (INTEL_DEBUG & DEBUG_CLIP) {
+      printf("clip:\n");
+      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+		    intel->gen);
+      printf("\n");
+    }
+
+   /* Upload
+    */
+   drm_intel_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_upload_cache_with_auxdata(&brw->cache,
+						     BRW_CLIP_PROG,
+						     &c.key, sizeof(c.key),
+						     NULL, 0,
+						     program, program_size,
+						     &c.prog_data,
+						     sizeof(c.prog_data),
+						     &brw->clip.prog_data);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_clip_prog(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   struct brw_clip_prog_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Populate the key:
+    */
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   key.primitive = brw->intel.reduced_primitive;
+   /* CACHE_NEW_VS_PROG */
+   key.attrs = brw->vs.prog_data->outputs_written;
+   /* _NEW_LIGHT */
+   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+   key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+   /* _NEW_TRANSFORM */
+   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+
+   if (intel->gen == 5)
+       key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+   else
+       key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+   /* _NEW_POLYGON */
+   if (key.primitive == GL_TRIANGLES) {
+      if (ctx->Polygon.CullFlag &&
+	  ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+      else {
+	 GLuint fill_front = CLIP_CULL;
+	 GLuint fill_back = CLIP_CULL;
+	 GLuint offset_front = 0;
+	 GLuint offset_back = 0;
+
+	 if (!ctx->Polygon.CullFlag ||
+	     ctx->Polygon.CullFaceMode != GL_FRONT) {
+	    switch (ctx->Polygon.FrontMode) {
+	    case GL_FILL: 
+	       fill_front = CLIP_FILL; 
+	       offset_front = 0;
+	       break;
+	    case GL_LINE:
+	       fill_front = CLIP_LINE;
+	       offset_front = ctx->Polygon.OffsetLine;
+	       break;
+	    case GL_POINT:
+	       fill_front = CLIP_POINT;
+	       offset_front = ctx->Polygon.OffsetPoint;
+	       break;
+	    }
+	 }
+
+	 if (!ctx->Polygon.CullFlag ||
+	     ctx->Polygon.CullFaceMode != GL_BACK) {
+	    switch (ctx->Polygon.BackMode) {
+	    case GL_FILL: 
+	       fill_back = CLIP_FILL; 
+	       offset_back = 0;
+	       break;
+	    case GL_LINE:
+	       fill_back = CLIP_LINE;
+	       offset_back = ctx->Polygon.OffsetLine;
+	       break;
+	    case GL_POINT:
+	       fill_back = CLIP_POINT;
+	       offset_back = ctx->Polygon.OffsetPoint;
+	       break;
+	    }
+	 }
+
+	 if (ctx->Polygon.BackMode != GL_FILL ||
+	     ctx->Polygon.FrontMode != GL_FILL) {
+	    key.do_unfilled = 1;
+
+	    /* Most cases the fixed function units will handle.  Cases where
+	     * one or more polygon faces are unfilled will require help:
+	     */
+	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+
+	    if (offset_back || offset_front) {
+	       /* _NEW_POLYGON, _NEW_BUFFERS */
+	       key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale;
+	       key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
+	    }
+
+	    switch (ctx->Polygon.FrontFace) {
+	    case GL_CCW:
+	       key.fill_ccw = fill_front;
+	       key.fill_cw = fill_back;
+	       key.offset_ccw = offset_front;
+	       key.offset_cw = offset_back;
+	       if (ctx->Light.Model.TwoSide &&
+		   key.fill_cw != CLIP_CULL) 
+		  key.copy_bfc_cw = 1;
+	       break;
+	    case GL_CW:
+	       key.fill_cw = fill_front;
+	       key.fill_ccw = fill_back;
+	       key.offset_cw = offset_front;
+	       key.offset_ccw = offset_back;
+	       if (ctx->Light.Model.TwoSide &&
+		   key.fill_ccw != CLIP_CULL) 
+		  key.copy_bfc_ccw = 1;
+	       break;
+	    }
+	 }
+      }
+   }
+
+   drm_intel_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+					&key, sizeof(key),
+					NULL, 0,
+					&brw->clip.prog_data);
+   if (brw->clip.prog_bo == NULL)
+      compile_clip_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_clip_prog = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT | 
+		_NEW_TRANSFORM |
+		_NEW_POLYGON | 
+		_NEW_BUFFERS),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = upload_clip_prog
+};
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
new file mode 100644
index 0000000000..68222c6c27
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -0,0 +1,175 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)	
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+   GLbitfield64 attrs;
+   GLuint primitive:4;
+   GLuint nr_userclip:3;
+   GLuint do_flat_shading:1;
+   GLuint pv_first:1;
+   GLuint do_unfilled:1;
+   GLuint fill_cw:2;		/* includes cull information */
+   GLuint fill_ccw:2;		/* includes cull information */
+   GLuint offset_cw:1;
+   GLuint offset_ccw:1;
+   GLuint copy_bfc_cw:1;
+   GLuint copy_bfc_ccw:1;
+   GLuint clip_mode:3;
+   GLuint pad0:11;
+
+   GLfloat offset_factor;
+   GLfloat offset_units;
+};
+
+
+#define CLIP_LINE   0
+#define CLIP_POINT  1
+#define CLIP_FILL   2
+#define CLIP_CULL   3
+
+
+#define PRIM_MASK  (0x1f)
+
+struct brw_clip_compile {
+   struct brw_compile func;
+   struct brw_clip_prog_key key;
+   struct brw_clip_prog_data prog_data;
+   
+   struct {
+      struct brw_reg R0;
+      struct brw_reg vertex[MAX_VERTS];
+
+      struct brw_reg t;
+      struct brw_reg t0, t1;
+      struct brw_reg dp0, dp1;
+
+      struct brw_reg dpPrev;
+      struct brw_reg dp;
+      struct brw_reg loopcount;
+      struct brw_reg nr_verts;
+      struct brw_reg planemask;
+
+      struct brw_reg inlist;
+      struct brw_reg outlist;
+      struct brw_reg freelist;
+
+      struct brw_reg dir;
+      struct brw_reg tmp0, tmp1;
+      struct brw_reg offset;
+      
+      struct brw_reg fixed_planes;
+      struct brw_reg plane_equation;
+       
+      struct brw_reg ff_sync;
+   } reg;
+
+   /* 3 different ways of expressing vertex size:
+    */
+   GLuint nr_attrs;
+   GLuint nr_regs;
+   GLuint nr_bytes;
+
+   GLuint first_tmp;
+   GLuint last_tmp;
+
+   GLboolean need_direction;
+
+   GLuint header_position_offset;
+   GLuint offset[VERT_ATTRIB_MAX];
+};
+
+#define ATTR_SIZE  (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
+			      GLuint nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+			     struct brw_indirect dest_ptr,
+			     struct brw_indirect v0_ptr, /* from */
+			     struct brw_indirect v1_ptr, /* to */
+			     struct brw_reg t0,
+			     GLboolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c, 
+		       struct brw_indirect vert,
+		       GLboolean allocate,
+		       GLboolean eot,
+		       GLuint header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+			   GLuint to, GLuint from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+
+void brw_clip_project_position(struct brw_clip_compile *c,
+             struct brw_reg pos );
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c
new file mode 100644
index 0000000000..ceb62a3116
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip_line.c
@@ -0,0 +1,281 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < 4; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   c->reg.t           = brw_vec1_grf(i, 0);
+   c->reg.t0          = brw_vec1_grf(i, 1);
+   c->reg.t1          = brw_vec1_grf(i, 2);
+   c->reg.planemask   = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+
+   c->reg.dp0         = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp1         = brw_vec1_grf(i, 4);
+   i++;
+
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
+      i++;
+   }
+
+   if (intel->needs_ff_sync) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
+
+   c->first_tmp = i;
+   c->last_tmp = i;
+
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ *  for (p=0;p<MAX_PLANES;p++) {
+ *     if (clipmask & (1 << p)) {
+ *        GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ *        GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ *        if (IS_NEGATIVE(dp1)) {
+ *           GLfloat t = dp1 / (dp1 - dp0);
+ *           if (t > t1) t1 = t;
+ *        } else {
+ *           GLfloat t = dp0 / (dp0 - dp1);
+ *           if (t > t0) t0 = t;
+ *        }
+ *  
+ *        if (t0 + t1 >= 1.0)
+ *           return;
+ *     }
+ *  }
+ *
+ *  interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ *  interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct brw_indirect vtx0     = brw_indirect(0, 0);
+   struct brw_indirect vtx1      = brw_indirect(1, 0);
+   struct brw_indirect newvtx0   = brw_indirect(2, 0);
+   struct brw_indirect newvtx1   = brw_indirect(3, 0);
+   struct brw_indirect plane_ptr = brw_indirect(4, 0);
+   struct brw_instruction *plane_loop;
+   struct brw_instruction *plane_active;
+   struct brw_instruction *is_negative;
+   struct brw_instruction *is_neg2 = NULL;
+   struct brw_instruction *not_culled;
+   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
+   brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
+   brw_MOV(p, get_addr_reg(newvtx0),   brw_address(c->reg.vertex[2]));
+   brw_MOV(p, get_addr_reg(newvtx1),   brw_address(c->reg.vertex[3]));
+   brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+   /* Note: init t0, t1 together: 
+    */
+   brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+   brw_clip_init_planes(c);
+   brw_clip_init_clipmask(c);
+
+   /* -ve rhw workaround */
+   if (brw->has_negative_rhw_bug) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+              brw_imm_ud(1<<20));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+   }
+
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+      
+      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 if (c->key.nr_userclip)
+	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+	 else
+	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+	 /* dp = DP4(vtx->position, plane) 
+	  */
+	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+
+	 /* if (IS_NEGATIVE(dp1)) 
+	  */
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	 is_negative = brw_IF(p, BRW_EXECUTE_1);
+	 {
+             /*
+              * Both can be negative on GM965/G965 due to RHW workaround
+              * if so, this object should be rejected.
+              */
+             if (brw->has_negative_rhw_bug) {
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+                 is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+                 {
+                     brw_clip_kill_thread(c);
+                 }
+                 brw_ENDIF(p, is_neg2);
+             }
+
+             brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+             brw_math_invert(p, c->reg.t, c->reg.t);
+             brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+             brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+             brw_MOV(p, c->reg.t1, c->reg.t);
+             brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 } 
+	 is_negative = brw_ELSE(p, is_negative);
+	 {
+             /* Coming back in.  We know that both cannot be negative
+              * because the line would have been culled in that case.
+              */
+
+             /* If both are positive, do nothing */
+             /* Only on GM965/G965 */
+             if (brw->has_negative_rhw_bug) {
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+                 is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+             }
+
+             {
+                 brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+                 brw_math_invert(p, c->reg.t, c->reg.t);
+                 brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+                 brw_MOV(p, c->reg.t0, c->reg.t);
+                 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+             }
+
+             if (brw->has_negative_rhw_bug) {
+                 brw_ENDIF(p, is_neg2);
+             }
+         }
+	 brw_ENDIF(p, is_negative);	 
+      }
+      brw_ENDIF(p, plane_active);
+      
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+      /* while (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+   }
+   brw_WHILE(p, plane_loop);
+
+   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+   not_culled = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
+      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE);
+
+      brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+      brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); 
+   }
+   brw_ENDIF(p, not_culled);
+   brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+   brw_clip_line_alloc_regs(c);
+   brw_clip_init_ff_sync(c);
+
+   if (c->key.do_flat_shading) {
+      if (c->key.pv_first)
+         brw_clip_copy_colors(c, 1, 0);
+      else
+         brw_clip_copy_colors(c, 0, 1);
+   }
+                
+   clip_and_emit_line(c);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c
new file mode 100644
index 0000000000..7f47634dca
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip_point.c
@@ -0,0 +1,55 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_tri_alloc_regs(c, 0);
+   brw_clip_init_ff_sync(c);
+
+   brw_clip_kill_thread(c);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c
new file mode 100644
index 0000000000..856d8f0c6c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip_state.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+struct brw_clip_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+   unsigned int clip_mode;
+
+   unsigned int curbe_offset;
+
+   unsigned int nr_urb_entries, urb_size;
+
+   GLboolean depth_clamp;
+};
+
+static void
+clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_CLIP_PROG */
+   key->total_grf = brw->clip.prog_data->total_grf;
+   key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
+   key->clip_mode = brw->clip.prog_data->clip_mode;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.clip_start;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_clip_entries;
+   key->urb_size = brw->urb.vsize;
+
+   /* _NEW_TRANSOFORM */
+   key->depth_clamp = ctx->Transform.DepthClamp;
+}
+
+static drm_intel_bo *
+clip_unit_create_from_key(struct brw_context *brw,
+			  struct brw_clip_unit_key *key)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_clip_unit_state clip;
+   drm_intel_bo *bo;
+
+   memset(&clip, 0, sizeof(clip));
+
+   clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   /* reloc */
+   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+
+   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   clip.thread1.single_program_flow = 1;
+
+   clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+   clip.thread3.dispatch_grf_start_reg = 1;
+   clip.thread3.urb_entry_read_offset = 0;
+
+   clip.thread4.nr_urb_entries = key->nr_urb_entries;
+   clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   /* If we have enough clip URB entries to run two threads, do so.
+    */
+   if (key->nr_urb_entries >= 10) {
+      /* Half of the URB entries go to each thread, and it has to be an
+       * even number.
+       */
+      assert(key->nr_urb_entries % 2 == 0);
+      
+      /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
+       * only 2 threads can output VUEs at a time.
+       */
+      if (intel->gen == 5)
+         clip.thread4.max_threads = 16 - 1;        
+      else
+         clip.thread4.max_threads = 2 - 1;
+   } else {
+      assert(key->nr_urb_entries >= 5);
+      clip.thread4.max_threads = 1 - 1;
+   }
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      clip.thread4.max_threads = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      clip.thread4.stats_enable = 1;
+
+   clip.clip5.userclip_enable_flags = 0x7f;
+   clip.clip5.userclip_must_clip = 1;
+   clip.clip5.guard_band_enable = 0;
+   if (!key->depth_clamp)
+      clip.clip5.viewport_z_clip_enable = 1;
+   clip.clip5.viewport_xy_clip_enable = 1;
+   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+   clip.clip5.api_mode = BRW_CLIP_API_OGL;
+   clip.clip5.clip_mode = key->clip_mode;
+
+   if (intel->is_g4x)
+      clip.clip5.negative_w_clip_test = 1;
+
+   clip.clip6.clipper_viewport_state_ptr = 0;
+   clip.viewport_xmin = -1;
+   clip.viewport_xmax = 1;
+   clip.viewport_ymin = -1;
+   clip.viewport_ymax = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+			 key, sizeof(*key),
+			 &brw->clip.prog_bo, 1,
+			 &clip, sizeof(clip));
+
+   /* Emit clip program relocation */
+   assert(brw->clip.prog_bo);
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0),
+			   brw->clip.prog_bo, clip.thread0.grf_reg_count << 1,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   return bo;
+}
+
+static void upload_clip_unit( struct brw_context *brw )
+{
+   struct brw_clip_unit_key key;
+
+   clip_unit_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->clip.state_bo);
+   brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+					 &key, sizeof(key),
+					 &brw->clip.prog_bo, 1,
+					 NULL);
+   if (brw->clip.state_bo == NULL) {
+      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
+   }
+}
+
+const struct brw_tracked_state brw_clip_unit = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_CLIP_PROG
+   },
+   .prepare = upload_clip_unit,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c
new file mode 100644
index 0000000000..916a99ea00
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c
@@ -0,0 +1,624 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
+			      GLuint nr_verts )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   if (c->nr_attrs & 1) {
+      for (j = 0; j < 3; j++) {
+	 GLuint delta = c->nr_attrs*16 + 32;
+
+         if (intel->gen == 5)
+             delta = c->nr_attrs * 16 + 32 * 3;
+
+	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+      }
+   }
+
+   c->reg.t          = brw_vec1_grf(i, 0);
+   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+
+   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp         = brw_vec1_grf(i, 4);
+   i++;
+
+   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
+      i++;
+   }
+
+   if (c->key.do_unfilled) {
+      c->reg.dir     = brw_vec4_grf(i, 0);
+      c->reg.offset  = brw_vec4_grf(i, 4);
+      i++;
+      c->reg.tmp0    = brw_vec4_grf(i, 0);
+      c->reg.tmp1    = brw_vec4_grf(i, 4);
+      i++;
+   }
+
+   if (intel->needs_ff_sync) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
+
+   c->first_tmp = i;
+   c->last_tmp = i;
+
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+   struct brw_instruction *is_rev;
+
+   /* Initial list of indices for incoming vertexes:
+    */
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+   /* XXX: Is there an easier way to do this?  Need to reverse every
+    * second tristrip element:  Can ignore sometimes?
+    */
+   is_rev = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[1]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[0]) );
+      if (c->need_direction)
+	 brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+   }
+   is_rev = brw_ELSE(p, is_rev);
+   {
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[0]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[1]) );
+      if (c->need_direction)
+	 brw_MOV(p, c->reg.dir, brw_imm_f(1));
+   }
+   brw_ENDIF(p, is_rev);
+
+   brw_MOV(p, get_element(c->reg.inlist, 2),  brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+   brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *is_poly, *is_trifan;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_POLYGON));
+
+   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_copy_colors(c, 1, 0);
+      brw_clip_copy_colors(c, 2, 0);
+   }
+   is_poly = brw_ELSE(p, is_poly);
+   {
+      if (c->key.pv_first) {
+	 brw_CMP(p,
+		 vec1(brw_null_reg()),
+		 BRW_CONDITIONAL_EQ,
+		 tmp0,
+		 brw_imm_ud(_3DPRIM_TRIFAN));
+	 is_trifan = brw_IF(p, BRW_EXECUTE_1);
+	 {
+	    brw_clip_copy_colors(c, 0, 1);
+	    brw_clip_copy_colors(c, 2, 1);
+	 }
+	 is_trifan = brw_ELSE(p, is_trifan);
+	 {
+	    brw_clip_copy_colors(c, 1, 0);
+	    brw_clip_copy_colors(c, 2, 0);
+	 }
+	 brw_ENDIF(p, is_trifan);
+      }
+      else {
+         brw_clip_copy_colors(c, 0, 2);
+         brw_clip_copy_colors(c, 1, 2);
+      }
+   }
+   brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect vtx = brw_indirect(0, 0);
+   struct brw_indirect vtxPrev = brw_indirect(1, 0);
+   struct brw_indirect vtxOut = brw_indirect(2, 0);
+   struct brw_indirect plane_ptr = brw_indirect(3, 0);
+   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+   struct brw_instruction *plane_loop;
+   struct brw_instruction *plane_active;
+   struct brw_instruction *vertex_loop;
+   struct brw_instruction *next_test;
+   struct brw_instruction *prev_test;
+   
+   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
+   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
+   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+      
+      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 /* vtxOut = freelist_ptr++ 
+	  */
+	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
+	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+	 if (c->key.nr_userclip)
+	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+	 else
+	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+	    
+	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+	 vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+	 {
+	    /* vtx = *input_ptr;
+	     */
+	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+	    /* IS_NEGATIVE(prev) */
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	    prev_test = brw_IF(p, BRW_EXECUTE_1);
+	    {
+	       /* IS_POSITIVE(next)
+		*/
+	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       {
+
+		  /* Coming back in.
+		   */
+		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+		  brw_math_invert(p, c->reg.t, c->reg.t);
+		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+		  /* If (vtxOut == 0) vtxOut = vtxPrev
+		   */
+		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
+
+		  /* *outlist_ptr++ = vtxOut;
+		   * nr_verts++; 
+		   * vtxOut = 0;
+		   */
+		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+	       }
+	       brw_ENDIF(p, next_test);
+	       
+	    }
+	    prev_test = brw_ELSE(p, prev_test);
+	    {
+	       /* *outlist_ptr++ = vtxPrev;
+		* nr_verts++;
+		*/
+	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+	       /* IS_NEGATIVE(next)
+		*/
+	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       {
+		  /* Going out of bounds.  Avoid division by zero as we
+		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
+		   */
+		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+		  brw_math_invert(p, c->reg.t, c->reg.t);
+		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+		  /* If (vtxOut == 0) vtxOut = vtx
+		   */
+		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);		  
+
+		  /* *outlist_ptr++ = vtxOut;
+		   * nr_verts++; 
+		   * vtxOut = 0;
+		   */
+		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+	       } 	       
+	       brw_ENDIF(p, next_test);
+	    }
+	    brw_ENDIF(p, prev_test);
+	    
+	    /* vtxPrev = vtx;
+	     * inlist_ptr++;
+	     */
+	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+	    /* while (--loopcount != 0)
+	     */
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+	 } 
+	 brw_WHILE(p, vertex_loop);
+
+	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
+	  * inlist = outlist
+	  * inlist_ptr = &inlist[0]
+	  * outlist_ptr = &outlist[0]
+	  */
+	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+      }
+      brw_ENDIF(p, plane_active);
+      
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+      /* nr_verts >= 3 
+       */
+      brw_CMP(p,
+	      vec1(brw_null_reg()),
+	      BRW_CONDITIONAL_GE,
+	      c->reg.nr_verts,
+	      brw_imm_ud(3));
+   
+      /* && (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+   }
+   brw_WHILE(p, plane_loop);
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop, *if_insn;
+
+   /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+    */
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+   brw_ADD(p,
+	   c->reg.loopcount,
+	   c->reg.nr_verts,
+	   brw_imm_d(-2));
+
+   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   {
+      struct brw_indirect v0 = brw_indirect(0, 0);
+      struct brw_indirect vptr = brw_indirect(1, 0);
+
+      brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+      brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+      
+      brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+      loop = brw_DO(p, BRW_EXECUTE_1);
+      {
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+  
+	 brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+	 brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p, loop);
+
+      brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+   }
+   brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+   brw_clip_init_planes(c);
+
+   brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *do_clip;
+
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   {
+      do_clip_tri(c);
+   }
+   brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+    struct brw_reg v0 = get_tmp(c);
+    struct brw_reg v1 = get_tmp(c);
+    struct brw_reg v2 = get_tmp(c);
+
+    struct brw_indirect vt0 = brw_indirect(0, 0);
+    struct brw_indirect vt1 = brw_indirect(1, 0);
+    struct brw_indirect vt2 = brw_indirect(2, 0);
+
+    struct brw_compile *p = &c->func;
+    struct brw_instruction *is_outside;
+    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+    brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+
+    /* test nearz, xmin, ymin plane */
+    /* clip.xyz < -clip.w */
+    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* All vertices are outside of a plane, rejected */
+    brw_AND(p, t, t1, t2);
+    brw_AND(p, t, t, t3);
+    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+    brw_OR(p, tmp0, tmp0, get_element(t, 2));
+    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+    is_outside = brw_IF(p, BRW_EXECUTE_1);
+    {
+        brw_clip_kill_thread(c);
+    }
+    brw_ENDIF(p, is_outside);
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* some vertices are inside a plane, some are outside,need to clip */
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+    brw_AND(p, t, t, brw_imm_ud(0x1));
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* test farz, xmax, ymax plane */
+    /* clip.xyz > clip.w */
+    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* All vertices are outside of a plane, rejected */
+    brw_AND(p, t, t1, t2);
+    brw_AND(p, t, t, t3);
+    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+    brw_OR(p, tmp0, tmp0, get_element(t, 2));
+    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+    is_outside = brw_IF(p, BRW_EXECUTE_1);
+    {
+        brw_clip_kill_thread(c);
+    }
+    brw_ENDIF(p, is_outside);
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* some vertices are inside a plane, some are outside,need to clip */
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+    brw_AND(p, t, t, brw_imm_ud(0x1));
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    release_tmps(c);
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+   struct brw_instruction *neg_rhw;
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+   brw_clip_init_clipmask(c);
+   brw_clip_init_ff_sync(c);
+
+   /* if -ve rhw workaround bit is set, 
+      do cliptest */
+   if (brw->has_negative_rhw_bug) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), 
+              brw_imm_ud(1<<20));
+      neg_rhw = brw_IF(p, BRW_EXECUTE_1); 
+      {
+         brw_clip_test(c);
+      }
+      brw_ENDIF(p, neg_rhw);
+   }
+   /* Can't push into do_clip_tri because with polygon (or quad)
+    * flatshading, need to apply the flatshade here because we don't
+    * respect the PV when converting to trifan for emit:
+    */
+   if (c->key.do_flat_shading) 
+      brw_clip_tri_flat_shade(c); 
+      
+   if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+       (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
+      do_clip_tri(c);
+   else 
+      maybe_do_clip_tri(c);
+
+   brw_clip_tri_emit_polygon(c);
+
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_kill_thread(c);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
new file mode 100644
index 0000000000..f36d22fdbf
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c
@@ -0,0 +1,504 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg e = c->reg.tmp0;
+   struct brw_reg f = c->reg.tmp1;
+   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); 
+   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); 
+   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); 
+
+
+   struct brw_reg v0n = get_tmp(c);
+   struct brw_reg v1n = get_tmp(c);
+   struct brw_reg v2n = get_tmp(c);
+
+   /* Convert to NDC.
+    * NOTE: We can't modify the original vertex coordinates,
+    * as it may impact further operations.
+    * So, we have to keep normalized coordinates in temp registers.
+    *
+    * TBD-KC
+    * Try to optimize unnecessary MOV's.
+    */
+   brw_MOV(p, v0n, v0);
+   brw_MOV(p, v1n, v1);
+   brw_MOV(p, v2n, v2);
+
+   brw_clip_project_position(c, v0n);
+   brw_clip_project_position(c, v1n);
+   brw_clip_project_position(c, v2n);
+
+   /* Calculate the vectors of two edges of the triangle:
+    */
+   brw_ADD(p, e, v0n, negate(v2n)); 
+   brw_ADD(p, f, v1n, negate(v2n)); 
+
+   /* Take their crossproduct:
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
+   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+   GLuint conditional;
+
+   assert (!(c->key.fill_ccw == CLIP_CULL &&
+	     c->key.fill_cw == CLIP_CULL));
+
+   if (c->key.fill_ccw == CLIP_CULL)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+
+   brw_CMP(p,
+	   vec1(brw_null_reg()),
+	   conditional,
+	   get_element(c->reg.dir, 2),
+	   brw_imm_f(0));
+   
+   ccw = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+   GLuint conditional;
+
+   /* Do we have any colors to copy? 
+    */
+   if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
+       !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
+      return;
+
+   /* In some wierd degnerate cases we can end up testing the
+    * direction twice, once for culling and once for bfc copying.  Oh
+    * well, that's what you get for setting wierd GL state.
+    */
+   if (c->key.copy_bfc_ccw)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+
+   brw_CMP(p,
+	   vec1(brw_null_reg()),
+	   conditional,
+	   get_element(c->reg.dir, 2),
+	   brw_imm_f(0));
+   
+   ccw = brw_IF(p, BRW_EXECUTE_1);
+   {
+      GLuint i;
+
+      for (i = 0; i < 3; i++) {
+	 if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
+	    brw_MOV(p, 
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
+
+	 if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
+	    brw_MOV(p, 
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
+      }
+   }
+   brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+  GLfloat iz	= 1.0 / dir.z;
+  GLfloat ac	= dir.x * iz;
+  GLfloat bc	= dir.y * iz;
+  offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+  offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+  offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg off = c->reg.offset;
+   struct brw_reg dir = c->reg.dir;
+   
+   brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+   brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_GE,
+	   brw_abs(get_element(off, 0)), 
+	   brw_abs(get_element(off, 1)));
+
+   brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+   brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+   brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *is_poly;
+   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_POLYGON));
+
+   /* Get away with using reg.vertex because we know that this is not
+    * a _3DPRIM_TRISTRIP_REVERSE:
+    */
+   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   }
+   brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+			  struct brw_indirect vert )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg z = deref_1f(vert, c->header_position_offset +
+			       2 * type_sz(BRW_REGISTER_TYPE_F));
+
+   brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+		       GLboolean do_offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop;
+   struct brw_instruction *draw_edge;
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v1 = brw_indirect(1, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+   struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+   /* Need a seperate loop for offset:
+    */
+   if (do_offset) {
+      brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+      brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+      loop = brw_DO(p, BRW_EXECUTE_1);
+      {
+	 brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+	 brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+	    
+	 apply_one_offset(c, v0);
+	    
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p, loop);
+   }
+
+   /* v1ptr = &inlist[nr_verts]
+    * *v1ptr = v0
+    */
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+   loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+      /* draw edge if edgeflag != 0 */
+      brw_CMP(p, 
+	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
+	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      brw_imm_f(0));
+      draw_edge = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+	 brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+      }
+      brw_ENDIF(p, draw_edge);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+			GLboolean do_offset )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop;
+   struct brw_instruction *draw_point;
+
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+   loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+      /* draw if edgeflag != 0 
+       */
+      brw_CMP(p, 
+	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
+	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      brw_imm_f(0));
+      draw_point = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 if (do_offset)
+	    apply_one_offset(c, v0);
+
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+      }
+      brw_ENDIF(p, draw_point);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+			     GLuint mode, 
+			     GLboolean do_offset )
+{
+   switch (mode) {
+   case CLIP_FILL:
+      brw_clip_tri_emit_polygon(c);
+      break;
+
+   case CLIP_LINE:
+      emit_lines(c, do_offset);
+      break;
+
+   case CLIP_POINT:
+      emit_points(c, do_offset);
+      break;
+
+   case CLIP_CULL:
+      assert(0);
+      break;
+   }
+} 
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+
+   /* Direction culling has already been done.
+    */
+   if (c->key.fill_ccw != c->key.fill_cw &&
+       c->key.fill_ccw != CLIP_CULL &&
+       c->key.fill_cw != CLIP_CULL)
+   {
+      brw_CMP(p,
+	      vec1(brw_null_reg()),
+	      BRW_CONDITIONAL_GE,
+	      get_element(c->reg.dir, 2),
+	      brw_imm_f(0));
+   
+      ccw = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+      }
+      ccw = brw_ELSE(p, ccw);
+      {
+	 emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+      }
+      brw_ENDIF(p, ccw);
+   }
+   else if (c->key.fill_cw != CLIP_CULL) {
+      emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+   }
+   else if (c->key.fill_ccw != CLIP_CULL) { 
+      emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+   }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));      
+   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *do_clip;
+   
+
+   c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+			(c->key.fill_ccw != c->key.fill_cw) ||
+			c->key.fill_ccw == CLIP_CULL ||
+			c->key.fill_cw == CLIP_CULL ||
+			c->key.copy_bfc_cw ||
+			c->key.copy_bfc_ccw);
+
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+   brw_clip_init_ff_sync(c);
+
+   assert(c->offset[VERT_RESULT_EDGE]);
+
+   if (c->key.fill_ccw == CLIP_CULL &&
+       c->key.fill_cw == CLIP_CULL) {
+      brw_clip_kill_thread(c);
+      return;
+   }
+
+   merge_edgeflags(c);
+
+   /* Need to use the inlist indirection here: 
+    */
+   if (c->need_direction) 
+      compute_tri_direction(c);
+   
+   if (c->key.fill_ccw == CLIP_CULL ||
+       c->key.fill_cw == CLIP_CULL)
+      cull_direction(c);
+
+   if (c->key.offset_ccw ||
+       c->key.offset_cw)
+      compute_offset(c);
+
+   if (c->key.copy_bfc_ccw ||
+       c->key.copy_bfc_cw)
+      copy_bfc(c);
+
+   /* Need to do this whether we clip or not:
+    */
+   if (c->key.do_flat_shading)
+      brw_clip_tri_flat_shade(c);
+   
+   brw_clip_init_clipmask(c);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_init_planes(c);
+      brw_clip_tri(c);
+      check_nr_verts(c);
+   }
+   brw_ENDIF(p, do_clip);
+   
+   emit_unfilled_primitives(c);
+   brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
new file mode 100644
index 0000000000..2148bc8244
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -0,0 +1,382 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_clip.h"
+
+
+
+
+struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+
+   return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+   return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+
+   if (!c->key.nr_userclip) {
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
+   }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+   struct brw_compile *p = &c->func;
+
+   /* calc rhw 
+    */
+   brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+   /* value.xyz *= value.rhw
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c, 
+				     struct brw_indirect vert_addr )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+
+   /* Fixup position.  Extract from the original vertex and re-project
+    * to screen space:
+    */
+   brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+   brw_clip_project_position(c, tmp);
+   brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+	 
+   release_tmp(c, tmp);
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.  
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+			     struct brw_indirect dest_ptr,
+			     struct brw_indirect v0_ptr, /* from */
+			     struct brw_indirect v1_ptr, /* to */
+			     struct brw_reg t0,
+			     GLboolean force_edgeflag)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg tmp = get_tmp(c);
+   GLuint i;
+
+   /* Just copy the vertex header:
+    */
+   /*
+    * After CLIP stage, only first 256 bits of the VUE are read
+    * back on Ironlake, so needn't change it
+    */
+   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+      
+   /* Iterate over each attribute (could be done in pairs?)
+    */
+   for (i = 0; i < c->nr_attrs; i++) {
+      GLuint delta = i*16 + 32;
+
+      if (intel->gen == 5)
+          delta = i * 16 + 32 * 3;
+
+      if (delta == c->offset[VERT_RESULT_EDGE]) {
+	 if (force_edgeflag) 
+	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+	 else
+	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+      }
+      else {
+	 /* Interpolate: 
+	  *
+	  *        New = attr0 + t*attr1 - t*attr0
+	  */
+	 brw_MUL(p, 
+		 vec4(brw_null_reg()),
+		 deref_4f(v1_ptr, delta),
+		 t0);
+
+	 brw_MAC(p, 
+		 tmp,	      
+		 negate(deref_4f(v0_ptr, delta)),
+		 t0); 
+	      
+	 brw_ADD(p,
+		 deref_4f(dest_ptr, delta), 
+		 deref_4f(v0_ptr, delta),
+		 tmp);
+      }
+   }
+
+   if (i & 1) {
+      GLuint delta = i*16 + 32;
+
+      if (intel->gen == 5)
+          delta = i * 16 + 32 * 3;
+
+      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+   }
+
+   release_tmp(c, tmp);
+
+   /* Recreate the projected (NDC) coordinate in the new vertex
+    * header:
+    */
+   brw_clip_project_vertex(c, dest_ptr );
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c, 
+		       struct brw_indirect vert,
+		       GLboolean allocate,
+		       GLboolean eot,
+		       GLuint header)
+{
+   struct brw_compile *p = &c->func;
+
+   brw_clip_ff_sync(c);
+
+   assert(!(allocate && eot));
+
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy_from_indirect(p, brw_message_reg(1), vert, c->nr_regs);
+
+   /* Overwrite PrimType and PrimStart in the message header, for
+    * each vertex in turn:
+    */
+   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+   /* Send each vertex as a seperate write to the urb.  This
+    * is different to the concept in brw_sf_emit.c, where
+    * subsequent writes are used to build up a single urb
+    * entry.  Each of these writes instantiates a seperate
+    * urb entry - (I think... what about 'allocate'?)
+    */
+   brw_urb_WRITE(p, 
+		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 0,
+		 c->reg.R0,
+		 allocate,
+		 1,		/* used */
+		 c->nr_regs + 1, /* msg length */
+		 allocate ? 1 : 0, /* response_length */ 
+		 eot,		/* eot */
+		 1,		/* writes_complete */
+		 0,		/* urb offset */
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+
+   brw_clip_ff_sync(c);
+   /* Send an empty message to kill the thread and release any
+    * allocated urb entry:
+    */
+   brw_urb_WRITE(p, 
+		 retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 0,
+		 c->reg.R0,
+		 0,		/* allocate */
+		 0,		/* used */
+		 1, 		/* msg len */
+		 0, 		/* response len */
+		 1, 		/* eot */
+		 1,		/* writes complete */
+		 0,
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+   return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+   if (c->key.nr_userclip) {
+      return brw_imm_uw(16);
+   }
+   else {
+      return brw_imm_uw(4);
+   }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+			   GLuint to, GLuint from )
+{
+   struct brw_compile *p = &c->func;
+
+   if (c->offset[VERT_RESULT_COL0])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
+
+   if (c->offset[VERT_RESULT_COL1])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
+
+   if (c->offset[VERT_RESULT_BFC0])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
+
+   if (c->offset[VERT_RESULT_BFC1])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+   
+   /* Shift so that lowest outcode bit is rightmost: 
+    */
+   brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+   if (c->key.nr_userclip) {
+      struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+      /* Rearrange userclip outcodes so that they come directly after
+       * the fixed plane bits.
+       */
+      brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+      brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+      
+      release_tmp(c, tmp);
+   }
+}
+
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+    struct intel_context *intel = &c->func.brw->intel;
+
+    if (intel->needs_ff_sync) {
+        struct brw_compile *p = &c->func;
+        struct brw_instruction *need_ff_sync;
+
+        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+        need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+        {
+            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+            brw_ff_sync(p,
+			c->reg.R0,
+			0,
+			c->reg.R0,
+			1, /* allocate */
+			1, /* response length */
+			0 /* eot */);
+        }
+        brw_ENDIF(p, need_ff_sync);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+    struct intel_context *intel = &c->func.brw->intel;
+
+    if (intel->needs_ff_sync) {
+	struct brw_compile *p = &c->func;
+        
+        brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+    }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
new file mode 100644
index 0000000000..d13b9ae298
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -0,0 +1,199 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "main/imports.h"
+#include "main/api_noop.h"
+#include "main/macros.h"
+#include "main/simple_list.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "intel_span.h"
+#include "tnl/t_pipeline.h"
+
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+static void brwInitDriverFunctions( struct dd_function_table *functions )
+{
+   intelInitDriverFunctions( functions );
+
+   brwInitFragProgFuncs( functions );
+   brw_init_queryobj_functions(functions);
+
+   functions->Enable = brw_enable;
+   functions->DepthRange = brw_depth_range;
+}
+
+GLboolean brwCreateContext( int api,
+			    const __GLcontextModes *mesaVis,
+			    __DRIcontext *driContextPriv,
+			    void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+
+   if (!brw) {
+      printf("%s: failed to alloc context\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   brwInitVtbl( brw );
+   brwInitDriverFunctions( &functions );
+
+   if (!intelInitContext( intel, api, mesaVis, driContextPriv,
+			  sharedContextPrivate, &functions )) {
+      printf("%s: failed to init intel context\n", __FUNCTION__);
+      FREE(brw);
+      return GL_FALSE;
+   }
+
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs(ctx);
+
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+   ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
+   ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+   ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
+                                     ctx->Const.MaxTextureImageUnits);
+   ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
+   ctx->Const.MaxCombinedTextureImageUnits =
+      ctx->Const.MaxVertexTextureImageUnits +
+      ctx->Const.MaxTextureImageUnits;
+
+   /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
+    */
+   ctx->Const.MaxTextureLevels = 13;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = (1<<12);
+   
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+   /* if conformance mode is set, swrast can handle any size AA point */
+   ctx->Const.MaxPointSizeAA = 255.0;
+
+   /* We want the GLSL compiler to emit code that uses condition codes */
+   ctx->Shader.EmitCondCodes = GL_TRUE;
+   ctx->Shader.EmitNVTempInitialization = GL_TRUE;
+
+   ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
+   ctx->Const.VertexProgram.MaxAluInstructions = 0;
+   ctx->Const.VertexProgram.MaxTexInstructions = 0;
+   ctx->Const.VertexProgram.MaxTexIndirections = 0;
+   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
+   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
+   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
+   ctx->Const.VertexProgram.MaxNativeAttribs = 16;
+   ctx->Const.VertexProgram.MaxNativeTemps = 256;
+   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+   ctx->Const.VertexProgram.MaxNativeParameters = 1024;
+   ctx->Const.VertexProgram.MaxEnvParams =
+      MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
+	   ctx->Const.VertexProgram.MaxEnvParams);
+
+   ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
+   ctx->Const.FragmentProgram.MaxNativeTemps = 256;
+   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+   ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
+   ctx->Const.FragmentProgram.MaxEnvParams =
+      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+	   ctx->Const.FragmentProgram.MaxEnvParams);
+
+   if (intel->is_g4x || intel->gen >= 5) {
+      brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
+      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
+      brw->has_surface_tile_offset = GL_TRUE;
+      brw->has_compr4 = GL_TRUE;
+      brw->has_aa_line_parameters = GL_TRUE;
+      brw->has_pln = GL_TRUE;
+  } else {
+      brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
+      brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
+   }
+
+   /* WM maximum threads is number of EUs times number of threads per EU. */
+   if (intel->gen == 5) {
+      brw->urb.size = 1024;
+      brw->vs_max_threads = 72;
+      brw->wm_max_threads = 12 * 6;
+   } else if (intel->is_g4x) {
+      brw->urb.size = 384;
+      brw->vs_max_threads = 32;
+      brw->wm_max_threads = 10 * 5;
+   } else if (intel->gen < 6) {
+      brw->urb.size = 256;
+      brw->vs_max_threads = 16;
+      brw->wm_max_threads = 8 * 4;
+      brw->has_negative_rhw_bug = GL_TRUE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) {
+      brw->vs_max_threads = 1;
+      brw->wm_max_threads = 1;
+   }
+
+   brw_init_state( brw );
+
+   brw->curbe.last_buf = calloc(1, 4096);
+   brw->curbe.next_buf = calloc(1, 4096);
+
+   brw->state.dirty.mesa = ~0;
+   brw->state.dirty.brw = ~0;
+
+   brw->emit_state_always = 0;
+
+   ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+   ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+   brw_draw_init( brw );
+
+   /* Now that most driver functions are hooked up, initialize some of the
+    * immediate state.
+    */
+   brw_update_cc_vp(brw);
+
+   return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
new file mode 100644
index 0000000000..cc4e6638e8
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -0,0 +1,782 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+#include "intel_context.h"
+#include "brw_structs.h"
+#include "main/imports.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer.  A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry.  An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawning a thread.
+ *
+ * VUE - vertex URB entry.  An urb entry holding a vertex and usually
+ * a vertex header.  The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.  
+ *
+ * PUE - primitive URB entry.  An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file.  One of several register files
+ * addressable by programmed threads.  The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned.  The registers are individually 8 dwords wide and suitable
+ * for general usage.  Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file.  Threads communicate (and terminate)
+ * by sending messages.  Message parameters are placed in contiguous
+ * MRF registers.  All program output is via these messages.  URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0.  Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware.  Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer.  Notional first unit, little software
+ * interaction.  Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader.  The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs.  If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units.  The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses.  This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more.  Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper.  Mesa's clipping algorithms are imported to run on
+ * this unit.  The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization.  Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower.  Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here.  SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator.  No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+
+#define BRW_MAX_CURBE                    (32*16)
+
+struct brw_context;
+
+#define BRW_NEW_URB_FENCE               0x1
+#define BRW_NEW_FRAGMENT_PROGRAM        0x2
+#define BRW_NEW_VERTEX_PROGRAM          0x4
+#define BRW_NEW_INPUT_DIMENSIONS        0x8
+#define BRW_NEW_CURBE_OFFSETS           0x10
+#define BRW_NEW_REDUCED_PRIMITIVE       0x20
+#define BRW_NEW_PRIMITIVE               0x40
+#define BRW_NEW_CONTEXT                 0x80
+#define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
+#define BRW_NEW_PSP                     0x800
+#define BRW_NEW_WM_SURFACES		0x1000
+#define BRW_NEW_BINDING_TABLE		0x2000
+#define BRW_NEW_INDICES			0x4000
+#define BRW_NEW_VERTICES		0x8000
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering.
+ */
+#define BRW_NEW_BATCH			0x10000
+/** brw->depth_region updated */
+#define BRW_NEW_DEPTH_BUFFER		0x20000
+#define BRW_NEW_NR_WM_SURFACES		0x40000
+#define BRW_NEW_NR_VS_SURFACES		0x80000
+#define BRW_NEW_INDEX_BUFFER		0x100000
+#define BRW_NEW_VS_CONSTBUF		0x200000
+#define BRW_NEW_WM_CONSTBUF		0x200000
+
+struct brw_state_flags {
+   /** State update flags signalled by mesa internals */
+   GLuint mesa;
+   /**
+    * State update flags signalled as the result of brw_tracked_state updates
+    */
+   GLuint brw;
+   /** State update flags signalled by brw_state_cache.c searches */
+   GLuint cache;
+};
+
+
+/** Subclass of Mesa vertex program */
+struct brw_vertex_program {
+   struct gl_vertex_program program;
+   GLuint id;
+   GLboolean use_const_buffer;
+};
+
+
+/** Subclass of Mesa fragment program */
+struct brw_fragment_program {
+   struct gl_fragment_program program;
+   GLuint id;  /**< serial no. to identify frag progs, never re-used */
+   GLboolean isGLSL;  /**< really, any IF/LOOP/CONT/BREAK instructions */
+
+   GLboolean use_const_buffer;
+
+   /** for debugging, which texture units are referenced */
+   GLbitfield tex_units_used;
+};
+
+
+/* Data about a particular attempt to compile a program.  Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+
+   GLuint first_curbe_grf;
+   GLuint total_grf;
+   GLuint total_scratch;
+
+   GLuint nr_params;       /**< number of float params/constants */
+   GLboolean error;
+
+   /* Pointer to tracked values (only valid once
+    * _mesa_load_state_parameters has been called at runtime).
+    */
+   const GLfloat *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+
+   /* Each vertex may have upto 12 attributes, 4 components each,
+    * except WPOS which requires only 2.  (11*4 + 2) == 44 ==> 11
+    * rows.
+    *
+    * Actually we use 4 for each, so call it 12 rows.
+    */
+   GLuint urb_entry_size;
+};
+
+struct brw_clip_prog_data {
+   GLuint curb_read_length;	/* user planes? */
+   GLuint clip_mode;
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
+
+struct brw_gs_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
+
+struct brw_vs_prog_data {
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+   GLuint total_grf;
+   GLbitfield64 outputs_written;
+   GLuint nr_params;       /**< number of float params/constants */
+
+   GLuint inputs_read;
+
+   /* Used for calculating urb partitions:
+    */
+   GLuint urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+   GLubyte output_size[VERT_RESULT_MAX];
+};
+
+
+/** Number of texture sampler units */
+#define BRW_MAX_TEX_UNIT 16
+
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 8
+
+/**
+ * Size of our surface binding table for the WM.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffers (+2).
+ */
+#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define SURF_INDEX_DRAW(d)           (d)
+#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) 
+#define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
+
+enum brw_cache_id {
+   BRW_BLEND_STATE,
+   BRW_DEPTH_STENCIL_STATE,
+   BRW_COLOR_CALC_STATE,
+   BRW_CC_VP,
+   BRW_CC_UNIT,
+   BRW_WM_PROG,
+   BRW_SAMPLER_DEFAULT_COLOR,
+   BRW_SAMPLER,
+   BRW_WM_UNIT,
+   BRW_SF_PROG,
+   BRW_SF_VP,
+   BRW_SF_UNIT, /* scissor state on gen6 */
+   BRW_VS_UNIT,
+   BRW_VS_PROG,
+   BRW_GS_UNIT,
+   BRW_GS_PROG,
+   BRW_CLIP_VP,
+   BRW_CLIP_UNIT,
+   BRW_CLIP_PROG,
+
+   BRW_MAX_CACHE
+};
+
+struct brw_cache_item {
+   /**
+    * Effectively part of the key, cache_id identifies what kind of state
+    * buffer is involved, and also which brw->state.dirty.cache flag should
+    * be set when this cache item is chosen.
+    */
+   enum brw_cache_id cache_id;
+   /** 32-bit hash of the key data */
+   GLuint hash;
+   GLuint key_size;		/* for variable-sized keys */
+   const void *key;
+   drm_intel_bo **reloc_bufs;
+   GLuint nr_reloc_bufs;
+
+   drm_intel_bo *bo;
+
+   struct brw_cache_item *next;
+};   
+
+
+
+struct brw_cache {
+   struct brw_context *brw;
+
+   struct brw_cache_item **items;
+   GLuint size, n_items;
+
+   char *name[BRW_MAX_CACHE];
+
+   /* Record of the last BOs chosen for each cache_id.  Used to set
+    * brw->state.dirty.cache when a new cache item is chosen.
+    */
+   drm_intel_bo *last_bo[BRW_MAX_CACHE];
+};
+
+
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime.  Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+   struct brw_state_flags dirty;
+   void (*prepare)( struct brw_context *brw );
+   void (*emit)( struct brw_context *brw );
+};
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_BLEND_STATE            (1<<BRW_BLEND_STATE)
+#define CACHE_NEW_DEPTH_STENCIL_STATE    (1<<BRW_DEPTH_STENCIL_STATE)
+#define CACHE_NEW_COLOR_CALC_STATE       (1<<BRW_COLOR_CALC_STATE)
+#define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR  (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP                  (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG              (1<<BRW_CLIP_PROG)
+
+struct brw_cached_batch_item {
+   struct header *header;
+   GLuint sz;
+   struct brw_cached_batch_item *next;
+};
+   
+
+
+/* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
+
+struct brw_vertex_element {
+   const struct gl_client_array *glarray;
+
+   /** The corresponding Mesa vertex attribute */
+   gl_vert_attrib attrib;
+   /** Size of a complete element */
+   GLuint element_size;
+   /** Number of uploaded elements for this input. */
+   GLuint count;
+   /** Byte stride between elements in the uploaded array */
+   GLuint stride;
+   /** Offset of the first element within the buffer object */
+   unsigned int offset;
+   /** Buffer object containing the uploaded vertex data */
+   drm_intel_bo *bo;
+};
+
+
+
+struct brw_vertex_info {
+   GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
+};
+
+struct brw_query_object {
+   struct gl_query_object Base;
+
+   /** Last query BO associated with this query. */
+   drm_intel_bo *bo;
+   /** First index in bo with query data for this object. */
+   int first_index;
+   /** Last index in bo with query data for this object. */
+   int last_index;
+};
+
+
+/**
+ * brw_context is derived from intel_context.
+ */
+struct brw_context 
+{
+   struct intel_context intel;  /**< base class, must be first field */
+   GLuint primitive;
+
+   GLboolean emit_state_always;
+   GLboolean has_surface_tile_offset;
+   GLboolean has_compr4;
+   GLboolean has_negative_rhw_bug;
+   GLboolean has_aa_line_parameters;
+   GLboolean has_pln;
+;
+   struct {
+      struct brw_state_flags dirty;
+
+      GLuint nr_color_regions;
+      struct intel_region *color_regions[MAX_DRAW_BUFFERS];
+      struct intel_region *depth_region;
+
+      /**
+       * List of buffers accumulated in brw_validate_state to receive
+       * drm_intel_bo_check_aperture treatment before exec, so we can
+       * know if we should flush the batch and try again before
+       * emitting primitives.
+       *
+       * This can be a fixed number as we only have a limited number of
+       * objects referenced from the batchbuffer in a primitive emit,
+       * consisting of the vertex buffers, pipelined state pointers,
+       * the CURBE, the depth buffer, and a query BO.
+       */
+      drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
+      int validated_bo_count;
+   } state;
+
+   struct brw_cache cache;
+   struct brw_cached_batch_item *cached_batch_items;
+
+   struct {
+      struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+
+      struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+      GLuint nr_enabled;
+
+#define BRW_NR_UPLOAD_BUFS 17
+#define BRW_UPLOAD_INIT_SIZE (128*1024)
+
+      struct {
+	 drm_intel_bo *bo;
+	 GLuint offset;
+      } upload;
+
+      /* Summary of size and varying of active arrays, so we can check
+       * for changes to this state:
+       */
+      struct brw_vertex_info info;
+      unsigned int min_index, max_index;
+   } vb;
+
+   struct {
+      /**
+       * Index buffer for this draw_prims call.
+       *
+       * Updates are signaled by BRW_NEW_INDICES.
+       */
+      const struct _mesa_index_buffer *ib;
+
+      /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+      drm_intel_bo *bo;
+      unsigned int offset;
+      unsigned int size;
+      /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+       * avoid re-uploading the IB packet over and over if we're actually
+       * referencing the same index buffer.
+       */
+      unsigned int start_vertex_offset;
+   } ib;
+
+   /* Active vertex program: 
+    */
+   const struct gl_vertex_program *vertex_program;
+   const struct gl_fragment_program *fragment_program;
+
+
+   /* For populating the gtt:
+    */
+   GLuint next_free_page;
+
+   /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
+   uint32_t CMD_VF_STATISTICS;
+   /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
+   uint32_t CMD_PIPELINE_SELECT;
+   int vs_max_threads;
+   int wm_max_threads;
+
+   /* BRW_NEW_URB_ALLOCATIONS:
+    */
+   struct {
+      GLuint vsize;		/* vertex size plus header in urb registers */
+      GLuint csize;		/* constant buffer size in urb registers */
+      GLuint sfsize;		/* setup data size in urb registers */
+
+      GLboolean constrained;
+
+      GLuint nr_vs_entries;
+      GLuint nr_gs_entries;
+      GLuint nr_clip_entries;
+      GLuint nr_sf_entries;
+      GLuint nr_cs_entries;
+
+      /* gen6 */
+      GLuint vs_size;
+/*       GLuint gs_size; */
+/*       GLuint clip_size; */
+/*       GLuint sf_size; */
+/*       GLuint cs_size; */
+
+      GLuint vs_start;
+      GLuint gs_start;
+      GLuint clip_start;
+      GLuint sf_start;
+      GLuint cs_start;
+      GLuint size; /* Hardware URB size, in KB. */
+   } urb;
+
+   
+   /* BRW_NEW_CURBE_OFFSETS: 
+    */
+   struct {
+      GLuint wm_start;  /**< pos of first wm const in CURBE buffer */
+      GLuint wm_size;   /**< number of float[4] consts, multiple of 16 */
+      GLuint clip_start;
+      GLuint clip_size;
+      GLuint vs_start;
+      GLuint vs_size;
+      GLuint total_size;
+
+      drm_intel_bo *curbe_bo;
+      /** Offset within curbe_bo of space for current curbe entry */
+      GLuint curbe_offset;
+      /** Offset within curbe_bo of space for next curbe entry */
+      GLuint curbe_next_offset;
+
+      /**
+       * Copy of the last set of CURBEs uploaded.  Frequently we'll end up
+       * in brw_curbe.c with the same set of constant data to be uploaded,
+       * so we'd rather not upload new constants in that case (it can cause
+       * a pipeline bubble since only up to 4 can be pipelined at a time).
+       */
+      GLfloat *last_buf;
+      /**
+       * Allocation for where to calculate the next set of CURBEs.
+       * It's a hot enough path that malloc/free of that data matters.
+       */
+      GLfloat *next_buf;
+      GLuint last_bufsz;
+   } curbe;
+
+   struct {
+      struct brw_vs_prog_data *prog_data;
+      int8_t *constant_map; /* variable array following prog_data */
+
+      drm_intel_bo *prog_bo;
+      drm_intel_bo *state_bo;
+      drm_intel_bo *const_bo;
+
+      /** Binding table of pointers to surf_bo entries */
+      drm_intel_bo *bind_bo;
+      uint32_t bind_bo_offset;
+      drm_intel_bo *surf_bo[BRW_VS_MAX_SURF];
+      uint32_t surf_offset[BRW_VS_MAX_SURF];
+      GLuint nr_surfaces;      
+   } vs;
+
+   struct {
+      struct brw_gs_prog_data *prog_data;
+
+      GLboolean prog_active;
+      drm_intel_bo *prog_bo;
+      drm_intel_bo *state_bo;
+   } gs;
+
+   struct {
+      struct brw_clip_prog_data *prog_data;
+
+      drm_intel_bo *prog_bo;
+      drm_intel_bo *state_bo;
+      drm_intel_bo *vp_bo;
+   } clip;
+
+
+   struct {
+      struct brw_sf_prog_data *prog_data;
+
+      drm_intel_bo *prog_bo;
+      drm_intel_bo *state_bo;
+      drm_intel_bo *vp_bo;
+   } sf;
+
+   struct {
+      struct brw_wm_prog_data *prog_data;
+      struct brw_wm_compile *compile_data;
+
+      /** Input sizes, calculated from active vertex program.
+       * One bit per fragment program input attribute.
+       */
+      GLbitfield input_size_masks[4];
+
+      /** Array of surface default colors (texture border color) */
+      drm_intel_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+
+      GLuint render_surf;
+      GLuint nr_surfaces;      
+
+      GLuint max_threads;
+      drm_intel_bo *scratch_bo;
+
+      GLuint sampler_count;
+      drm_intel_bo *sampler_bo;
+
+      /** Binding table of pointers to surf_bo entries */
+      drm_intel_bo *bind_bo;
+      uint32_t bind_bo_offset;
+      drm_intel_bo *surf_bo[BRW_WM_MAX_SURF];
+      uint32_t surf_offset[BRW_WM_MAX_SURF];
+
+      drm_intel_bo *prog_bo;
+      drm_intel_bo *state_bo;
+      drm_intel_bo *const_bo;
+   } wm;
+
+
+   struct {
+      /* gen4 */
+      drm_intel_bo *prog_bo;
+      drm_intel_bo *vp_bo;
+
+      /* gen6 */
+      drm_intel_bo *blend_state_bo;
+      drm_intel_bo *depth_stencil_state_bo;
+      drm_intel_bo *color_calc_state_bo;
+
+      drm_intel_bo *state_bo;
+      uint32_t state_offset;
+   } cc;
+
+   struct {
+      struct brw_query_object *obj;
+      drm_intel_bo *bo;
+      int index;
+      GLboolean active;
+   } query;
+   /* Used to give every program string a unique id
+    */
+   GLuint program_id;
+};
+
+
+#define BRW_PACKCOLOR8888(r,g,b,a)  ((r<<24) | (g<<16) | (b<<8) | a)
+
+
+
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brwInitVtbl( struct brw_context *brw );
+
+/*======================================================================
+ * brw_context.c
+ */
+GLboolean brwCreateContext( int api,
+			    const __GLcontextModes *mesaVis,
+			    __DRIcontext *driContextPriv,
+			    void *sharedContextPrivate);
+
+/*======================================================================
+ * brw_queryobj.c
+ */
+void brw_init_queryobj_functions(struct dd_function_table *functions);
+void brw_prepare_query_begin(struct brw_context *brw);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
+
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct intel_context *intel);
+
+/*======================================================================
+ * brw_tex.c
+ */
+void brw_validate_textures( struct brw_context *brw );
+
+
+/*======================================================================
+ * brw_program.c
+ */
+void brwInitFragProgFuncs( struct dd_function_table *functions );
+
+
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+
+/* brw_cc.c */
+void brw_update_cc_vp(struct brw_context *brw);
+
+/* brw_curbe.c
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw);
+
+/* brw_disasm.c */
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+
+/* brw_state.c */
+void brw_enable(GLcontext * ctx, GLenum cap, GLboolean state);
+void brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval);
+
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct brw_context *
+brw_context( GLcontext *ctx )
+{
+   return (struct brw_context *)ctx;
+}
+
+static INLINE struct brw_vertex_program *
+brw_vertex_program(struct gl_vertex_program *p)
+{
+   return (struct brw_vertex_program *) p;
+}
+
+static INLINE const struct brw_vertex_program *
+brw_vertex_program_const(const struct gl_vertex_program *p)
+{
+   return (const struct brw_vertex_program *) p;
+}
+
+static INLINE struct brw_fragment_program *
+brw_fragment_program(struct gl_fragment_program *p)
+{
+   return (struct brw_fragment_program *) p;
+}
+
+static INLINE const struct brw_fragment_program *
+brw_fragment_program_const(const struct gl_fragment_program *p)
+{
+   return (const struct brw_fragment_program *) p;
+}
+
+#endif
+
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
new file mode 100644
index 0000000000..6c0b79f724
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -0,0 +1,372 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+
+
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers.  We no longer allocatea block of the GRF for
+ * constants.  That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
+ */
+static void calculate_curbe_offsets( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   /* CACHE_NEW_WM_PROG */
+   const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+   
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
+   GLuint nr_clip_regs = 0;
+   GLuint total_regs;
+
+   /* _NEW_TRANSFORM */
+   if (ctx->Transform.ClipPlanesEnabled) {
+      GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+      nr_clip_regs = (nr_planes * 4 + 15) / 16;
+   }
+
+
+   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+   /* This can happen - what to do?  Probably rather than falling
+    * back, the best thing to do is emit programs which code the
+    * constants as immediate values.  Could do this either as a static
+    * cap on WM and VS, or adaptively.
+    *
+    * Unfortunately, this is currently dependent on the results of the
+    * program generation process (in the case of wm), so this would
+    * introduce the need to re-generate programs in the event of a
+    * curbe allocation failure.
+    */
+   /* Max size is 32 - just large enough to
+    * hold the 128 parameters allowed by
+    * the fragment and vertex program
+    * api's.  It's not clear what happens
+    * when both VP and FP want to use 128
+    * parameters, though. 
+    */
+   assert(total_regs <= 32);
+
+   /* Lazy resize:
+    */
+   if (nr_fp_regs > brw->curbe.wm_size ||
+       nr_vp_regs > brw->curbe.vs_size ||
+       nr_clip_regs != brw->curbe.clip_size ||
+       (total_regs < brw->curbe.total_size / 4 &&
+	brw->curbe.total_size > 16)) {
+
+      GLuint reg = 0;
+
+      /* Calculate a new layout: 
+       */
+      reg = 0;
+      brw->curbe.wm_start = reg;
+      brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+      brw->curbe.clip_start = reg;
+      brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+      brw->curbe.vs_start = reg;
+      brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+      brw->curbe.total_size = reg;
+
+      if (0)
+	 printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+		brw->curbe.wm_start,
+		brw->curbe.wm_size,
+		brw->curbe.clip_start,
+		brw->curbe.clip_size,
+		brw->curbe.vs_start,
+		brw->curbe.vs_size );
+
+      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+   }
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+   .dirty = {
+      .mesa = _NEW_TRANSFORM,
+      .brw  = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_WM_PROG
+   },
+   .prepare = calculate_curbe_offsets
+};
+
+
+
+
+/* Define the number of curbes within CS's urb allocation.  Multiple
+ * urb entries -> multiple curbes.  These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw)
+{
+   struct brw_cs_urb_state cs_urb;
+   memset(&cs_urb, 0, sizeof(cs_urb));
+
+   /* It appears that this is the state packet for the CS unit, ie. the
+    * urb entries detailed here are housed in the CS range from the
+    * URB_FENCE command.
+    */
+   cs_urb.header.opcode = CMD_CS_URB_STATE;
+   cs_urb.header.length = sizeof(cs_urb)/4 - 2;
+
+   /* BRW_NEW_URB_FENCE */
+   cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+   cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
+
+   assert(brw->urb.nr_cs_entries);
+   BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+}
+
+static GLfloat fixed_plane[6][4] = {
+   { 0,    0,   -1, 1 },
+   { 0,    0,    1, 1 },
+   { 0,   -1,    0, 1 },
+   { 0,    1,    0, 1 },
+   {-1,    0,    0, 1 },
+   { 1,    0,    0, 1 }
+};
+
+/* Upload a new set of constants.  Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static void prepare_constant_buffer(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const struct brw_vertex_program *vp =
+      brw_vertex_program_const(brw->vertex_program);
+   const GLuint sz = brw->curbe.total_size;
+   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+   GLfloat *buf;
+   GLuint i;
+
+   if (sz == 0) {
+      brw->curbe.last_bufsz  = 0;
+      return;
+   }
+
+   buf = brw->curbe.next_buf;
+
+   /* fragment shader constants */
+   if (brw->curbe.wm_size) {
+      GLuint offset = brw->curbe.wm_start * 16;
+
+      /* copy float constants */
+      for (i = 0; i < brw->wm.prog_data->nr_params; i++) 
+	 buf[offset + i] = *brw->wm.prog_data->param[i];
+   }
+
+
+   /* The clipplanes are actually delivered to both CLIP and VS units.
+    * VS uses them to calculate the outcode bitmasks.
+    */
+   if (brw->curbe.clip_size) {
+      GLuint offset = brw->curbe.clip_start * 16;
+      GLuint j;
+
+      /* If any planes are going this way, send them all this way:
+       */
+      for (i = 0; i < 6; i++) {
+	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
+	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
+	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
+	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
+      }
+
+      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
+       * clip-space:
+       */
+      assert(MAX_CLIP_PLANES == 6);
+      for (j = 0; j < MAX_CLIP_PLANES; j++) {
+	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
+	    buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
+	    buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
+	    buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
+	    buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
+	    i++;
+	 }
+      }
+   }
+
+   /* vertex shader constants */
+   if (brw->curbe.vs_size) {
+      GLuint offset = brw->curbe.vs_start * 16;
+      GLuint nr = brw->vs.prog_data->nr_params / 4;
+
+      if (vp->use_const_buffer) {
+	 /* Load the subset of push constants that will get used when
+	  * we also have a pull constant buffer.
+	  */
+	 for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) {
+	    if (brw->vs.constant_map[i] != -1) {
+	       assert(brw->vs.constant_map[i] <= nr);
+	       memcpy(buf + offset + brw->vs.constant_map[i] * 4,
+		      vp->program.Base.Parameters->ParameterValues[i],
+		      4 * sizeof(float));
+	    }
+	 }
+      } else {
+	 for (i = 0; i < nr; i++) {
+	    memcpy(buf + offset + i * 4,
+		   vp->program.Base.Parameters->ParameterValues[i],
+		   4 * sizeof(float));
+	 }
+      }
+   }
+
+   if (0) {
+      for (i = 0; i < sz*16; i+=4) 
+	 printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+		buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+      printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+	     brw->curbe.last_buf, buf,
+	     bufsz, brw->curbe.last_bufsz,
+	     brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+   }
+
+   if (brw->curbe.curbe_bo != NULL &&
+       bufsz == brw->curbe.last_bufsz &&
+       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+      /* constants have not changed */
+   } else {
+      /* Update the record of what our last set of constants was.  We
+       * don't just flip the pointers because we don't fill in the
+       * data in the padding between the entries.
+       */
+      memcpy(brw->curbe.last_buf, buf, bufsz);
+      brw->curbe.last_bufsz = bufsz;
+
+      if (brw->curbe.curbe_bo != NULL &&
+	  brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
+      {
+	 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
+	 drm_intel_bo_unreference(brw->curbe.curbe_bo);
+	 brw->curbe.curbe_bo = NULL;
+      }
+
+      if (brw->curbe.curbe_bo == NULL) {
+	 /* Allocate a single page for CURBE entries for this batchbuffer.
+	  * They're generally around 64b.
+	  */
+	 brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->intel.bufmgr, "CURBE",
+						  4096, 1 << 6);
+	 brw->curbe.curbe_next_offset = 0;
+	 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
+	 assert(bufsz < 4096);
+      }
+
+      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+      brw->curbe.curbe_next_offset += bufsz;
+      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
+
+      /* Copy data to the buffer:
+       */
+      memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
+	     buf,
+	     bufsz);
+   }
+
+   brw_add_validated_bo(brw, brw->curbe.curbe_bo);
+
+   /* Because this provokes an action (ie copy the constants into the
+    * URB), it shouldn't be shortcircuited if identical to the
+    * previous time - because eg. the urb destination may have
+    * changed, or the urb contents different to last time.
+    *
+    * Note that the data referred to is actually copied internally,
+    * not just used in place according to passed pointer.
+    *
+    * It appears that the CS unit takes care of using each available
+    * URB entry (Const URB Entry == CURBE) in turn, and issuing
+    * flushes as necessary when doublebuffering of CURBEs isn't
+    * possible.
+    */
+}
+
+static void emit_constant_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLuint sz = brw->curbe.total_size;
+
+   BEGIN_BATCH(2);
+   if (sz == 0) {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+      OUT_BATCH(0);
+   } else {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+      OUT_RELOC(brw->curbe.curbe_bo,
+		I915_GEM_DOMAIN_INSTRUCTION, 0,
+		(sz - 1) + brw->curbe.curbe_offset);
+   }
+   ADVANCE_BATCH();
+}
+
+/* This tracked state is unique in that the state it monitors varies
+ * dynamically depending on the parameters tracked by the fragment and
+ * vertex programs.  This is the template used as a starting point,
+ * each context will maintain a copy of this internally and update as
+ * required.
+ */
+const struct brw_tracked_state brw_constant_buffer = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
+	       BRW_NEW_VERTEX_PROGRAM |
+	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+	       BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+	       BRW_NEW_CURBE_OFFSETS |
+	       BRW_NEW_BATCH),
+      .cache = (CACHE_NEW_WM_PROG) 
+   },
+   .prepare = prepare_constant_buffer,
+   .emit = emit_constant_buffer,
+};
+
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
new file mode 100644
index 0000000000..39bf5b63fc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -0,0 +1,1084 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED       0x0
+#define _3DOP_3DSTATE_NONPIPELINED    0x1
+#define _3DOP_3DCONTROL               0x2
+#define _3DOP_3DPRIMITIVE             0x3
+
+#define _3DSTATE_PIPELINED_POINTERS       0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS   0x01
+#define _3DSTATE_VERTEX_BUFFERS           0x08
+#define _3DSTATE_VERTEX_ELEMENTS          0x09
+#define _3DSTATE_INDEX_BUFFER             0x0A
+#define _3DSTATE_VF_STATISTICS            0x0B
+#define _3DSTATE_DRAWING_RECTANGLE            0x00
+#define _3DSTATE_CONSTANT_COLOR               0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD         0x02
+#define _3DSTATE_CHROMA_KEY                   0x04
+#define _3DSTATE_DEPTH_BUFFER                 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET          0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN         0x07
+#define _3DSTATE_LINE_STIPPLE                 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP    0x09
+#define _3DCONTROL    0x00
+
+#define PIPE_CONTROL_NOWRITE          0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE   0x01
+#define PIPE_CONTROL_WRITEDEPTH       0x02
+#define PIPE_CONTROL_WRITETIMESTAMP   0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01
+
+#define _3DPRIM_POINTLIST         0x01
+#define _3DPRIM_LINELIST          0x02
+#define _3DPRIM_LINESTRIP         0x03
+#define _3DPRIM_TRILIST           0x04
+#define _3DPRIM_TRISTRIP          0x05
+#define _3DPRIM_TRIFAN            0x06
+#define _3DPRIM_QUADLIST          0x07
+#define _3DPRIM_QUADSTRIP         0x08
+#define _3DPRIM_LINELIST_ADJ      0x09
+#define _3DPRIM_LINESTRIP_ADJ     0x0A
+#define _3DPRIM_TRILIST_ADJ       0x0B
+#define _3DPRIM_TRISTRIP_ADJ      0x0C
+#define _3DPRIM_TRISTRIP_REVERSE  0x0D
+#define _3DPRIM_POLYGON           0x0E
+#define _3DPRIM_RECTLIST          0x0F
+#define _3DPRIM_LINELOOP          0x10
+#define _3DPRIM_POINTLIST_BF      0x11
+#define _3DPRIM_LINESTRIP_CONT    0x12
+#define _3DPRIM_LINESTRIP_BF      0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     1
+
+#define BRW_ANISORATIO_2     0 
+#define BRW_ANISORATIO_4     1 
+#define BRW_ANISORATIO_6     2 
+#define BRW_ANISORATIO_8     3 
+#define BRW_ANISORATIO_10    4 
+#define BRW_ANISORATIO_12    5 
+#define BRW_ANISORATIO_14    6 
+#define BRW_ANISORATIO_16    7
+
+#define BRW_BLENDFACTOR_ONE                 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR           0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA           0x3
+#define BRW_BLENDFACTOR_DST_ALPHA           0x4
+#define BRW_BLENDFACTOR_DST_COLOR           0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
+#define BRW_BLENDFACTOR_CONST_COLOR         0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA         0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR          0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA          0x0A
+#define BRW_BLENDFACTOR_ZERO                0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR       0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA       0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA       0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR       0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR     0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA     0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR      0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
+
+#define BRW_BLENDFUNCTION_ADD               0
+#define BRW_BLENDFUNCTION_SUBTRACT          1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT  2
+#define BRW_BLENDFUNCTION_MIN               3
+#define BRW_BLENDFUNCTION_MAX               4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8         0
+#define BRW_ALPHATEST_FORMAT_FLOAT32        1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH  0
+#define BRW_CHROMAKEY_REPLACE_BLACK      1
+
+#define BRW_CLIP_API_OGL     0
+#define BRW_CLIP_API_DX      1
+
+#define BRW_CLIPMODE_NORMAL              0
+#define BRW_CLIPMODE_CLIP_ALL            1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED   2
+#define BRW_CLIPMODE_REJECT_ALL          3
+#define BRW_CLIPMODE_ACCEPT_ALL          4
+#define BRW_CLIPMODE_KERNEL_CLIP         5
+
+#define BRW_CLIP_NDCSPACE     0
+#define BRW_CLIP_SCREENSPACE  1
+
+#define BRW_COMPAREFUNCTION_ALWAYS       0
+#define BRW_COMPAREFUNCTION_NEVER        1
+#define BRW_COMPAREFUNCTION_LESS         2
+#define BRW_COMPAREFUNCTION_EQUAL        3
+#define BRW_COMPAREFUNCTION_LEQUAL       4
+#define BRW_COMPAREFUNCTION_GREATER      5
+#define BRW_COMPAREFUNCTION_NOTEQUAL     6
+#define BRW_COMPAREFUNCTION_GEQUAL       7
+
+#define BRW_COVERAGE_PIXELS_HALF     0
+#define BRW_COVERAGE_PIXELS_1        1
+#define BRW_COVERAGE_PIXELS_2        2
+#define BRW_COVERAGE_PIXELS_4        3
+
+#define BRW_CULLMODE_BOTH        0
+#define BRW_CULLMODE_NONE        1
+#define BRW_CULLMODE_FRONT       2
+#define BRW_CULLMODE_BACK        3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM      0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
+#define BRW_DEPTHFORMAT_D32_FLOAT                1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT        2
+#define BRW_DEPTHFORMAT_D16_UNORM                5
+
+#define BRW_FLOATING_POINT_IEEE_754        0
+#define BRW_FLOATING_POINT_NON_IEEE_754    1
+
+#define BRW_FRONTWINDING_CW      0
+#define BRW_FRONTWINDING_CCW     1
+
+#define BRW_SPRITE_POINT_ENABLE  16
+
+#define BRW_INDEX_BYTE     0
+#define BRW_INDEX_WORD     1
+#define BRW_INDEX_DWORD    2
+
+#define BRW_LOGICOPFUNCTION_CLEAR            0
+#define BRW_LOGICOPFUNCTION_NOR              1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED     2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED    3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE      4
+#define BRW_LOGICOPFUNCTION_INVERT           5
+#define BRW_LOGICOPFUNCTION_XOR              6
+#define BRW_LOGICOPFUNCTION_NAND             7
+#define BRW_LOGICOPFUNCTION_AND              8
+#define BRW_LOGICOPFUNCTION_EQUIV            9
+#define BRW_LOGICOPFUNCTION_NOOP             10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED      11
+#define BRW_LOGICOPFUNCTION_COPY             12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE       13
+#define BRW_LOGICOPFUNCTION_OR               14
+#define BRW_LOGICOPFUNCTION_SET              15  
+
+#define BRW_MAPFILTER_NEAREST        0x0 
+#define BRW_MAPFILTER_LINEAR         0x1 
+#define BRW_MAPFILTER_ANISOTROPIC    0x2
+
+#define BRW_MIPFILTER_NONE        0   
+#define BRW_MIPFILTER_NEAREST     1   
+#define BRW_MIPFILTER_LINEAR      3
+
+#define BRW_POLYGON_FRONT_FACING     0
+#define BRW_POLYGON_BACK_FACING      1
+
+#define BRW_PREFILTER_ALWAYS     0x0 
+#define BRW_PREFILTER_NEVER      0x1
+#define BRW_PREFILTER_LESS       0x2
+#define BRW_PREFILTER_EQUAL      0x3
+#define BRW_PREFILTER_LEQUAL     0x4
+#define BRW_PREFILTER_GREATER    0x5
+#define BRW_PREFILTER_NOTEQUAL   0x6
+#define BRW_PREFILTER_GEQUAL     0x7
+
+#define BRW_PROVOKING_VERTEX_0    0
+#define BRW_PROVOKING_VERTEX_1    1 
+#define BRW_PROVOKING_VERTEX_2    2
+
+#define BRW_RASTRULE_UPPER_LEFT  0    
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at 
+ *     http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT  2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM    0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM    1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT   2
+
+#define BRW_STENCILOP_KEEP               0
+#define BRW_STENCILOP_ZERO               1
+#define BRW_STENCILOP_REPLACE            2
+#define BRW_STENCILOP_INCRSAT            3
+#define BRW_STENCILOP_DECRSAT            4
+#define BRW_STENCILOP_INCR               5
+#define BRW_STENCILOP_DECR               6
+#define BRW_STENCILOP_INVERT             7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT   1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT              0x001 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT              0x002 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM             0x003 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM             0x004 
+#define BRW_SURFACEFORMAT_R64G64_FLOAT                   0x005 
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT             0x006 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED           0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED           0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT                0x040 
+#define BRW_SURFACEFORMAT_R32G32B32_SINT                 0x041 
+#define BRW_SURFACEFORMAT_R32G32B32_UINT                 0x042 
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM                0x043 
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM                0x044 
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED              0x045 
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED              0x046 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM             0x080 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM             0x081 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT              0x082 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT              0x083 
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT             0x084 
+#define BRW_SURFACEFORMAT_R32G32_FLOAT                   0x085 
+#define BRW_SURFACEFORMAT_R32G32_SINT                    0x086 
+#define BRW_SURFACEFORMAT_R32G32_UINT                    0x087 
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS       0x088 
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT        0x089 
+#define BRW_SURFACEFORMAT_L32A32_FLOAT                   0x08A 
+#define BRW_SURFACEFORMAT_R32G32_UNORM                   0x08B 
+#define BRW_SURFACEFORMAT_R32G32_SNORM                   0x08C 
+#define BRW_SURFACEFORMAT_R64_FLOAT                      0x08D 
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM             0x08E 
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT             0x08F 
+#define BRW_SURFACEFORMAT_A32X32_FLOAT                   0x090 
+#define BRW_SURFACEFORMAT_L32X32_FLOAT                   0x091 
+#define BRW_SURFACEFORMAT_I32X32_FLOAT                   0x092 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED           0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED           0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED                 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED                 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM                 0x0C0 
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB            0x0C1 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM              0x0C2 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB         0x0C3 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT               0x0C4 
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM       0x0C5 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM                 0x0C7 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB            0x0C8 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM                 0x0C9 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT                  0x0CA 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT                  0x0CB 
+#define BRW_SURFACEFORMAT_R16G16_UNORM                   0x0CC 
+#define BRW_SURFACEFORMAT_R16G16_SNORM                   0x0CD 
+#define BRW_SURFACEFORMAT_R16G16_SINT                    0x0CE 
+#define BRW_SURFACEFORMAT_R16G16_UINT                    0x0CF 
+#define BRW_SURFACEFORMAT_R16G16_FLOAT                   0x0D0 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM              0x0D1 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB         0x0D2 
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT                0x0D3 
+#define BRW_SURFACEFORMAT_R32_SINT                       0x0D6 
+#define BRW_SURFACEFORMAT_R32_UINT                       0x0D7 
+#define BRW_SURFACEFORMAT_R32_FLOAT                      0x0D8 
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS          0x0D9 
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT           0x0DA 
+#define BRW_SURFACEFORMAT_L16A16_UNORM                   0x0DF 
+#define BRW_SURFACEFORMAT_I24X8_UNORM                    0x0E0 
+#define BRW_SURFACEFORMAT_L24X8_UNORM                    0x0E1 
+#define BRW_SURFACEFORMAT_A24X8_UNORM                    0x0E2 
+#define BRW_SURFACEFORMAT_I32_FLOAT                      0x0E3 
+#define BRW_SURFACEFORMAT_L32_FLOAT                      0x0E4 
+#define BRW_SURFACEFORMAT_A32_FLOAT                      0x0E5 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM                 0x0E9 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB            0x0EA 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM                 0x0EB 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB            0x0EC 
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP             0x0ED 
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM              0x0EE 
+#define BRW_SURFACEFORMAT_L16A16_FLOAT                   0x0F0 
+#define BRW_SURFACEFORMAT_R32_UNORM                      0x0F1 
+#define BRW_SURFACEFORMAT_R32_SNORM                      0x0F2 
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED            0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED               0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED               0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED                 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED                 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED                    0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED                    0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM                   0x100 
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB              0x101 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM                 0x102 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB            0x103 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM                 0x104 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB            0x105 
+#define BRW_SURFACEFORMAT_R8G8_UNORM                     0x106 
+#define BRW_SURFACEFORMAT_R8G8_SNORM                     0x107 
+#define BRW_SURFACEFORMAT_R8G8_SINT                      0x108 
+#define BRW_SURFACEFORMAT_R8G8_UINT                      0x109 
+#define BRW_SURFACEFORMAT_R16_UNORM                      0x10A 
+#define BRW_SURFACEFORMAT_R16_SNORM                      0x10B 
+#define BRW_SURFACEFORMAT_R16_SINT                       0x10C 
+#define BRW_SURFACEFORMAT_R16_UINT                       0x10D 
+#define BRW_SURFACEFORMAT_R16_FLOAT                      0x10E 
+#define BRW_SURFACEFORMAT_I16_UNORM                      0x111 
+#define BRW_SURFACEFORMAT_L16_UNORM                      0x112 
+#define BRW_SURFACEFORMAT_A16_UNORM                      0x113 
+#define BRW_SURFACEFORMAT_L8A8_UNORM                     0x114 
+#define BRW_SURFACEFORMAT_I16_FLOAT                      0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT                      0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT                      0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB                0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM            0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM                 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB            0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED                   0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED                   0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED                    0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED                    0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM                       0x140 
+#define BRW_SURFACEFORMAT_R8_SNORM                       0x141 
+#define BRW_SURFACEFORMAT_R8_SINT                        0x142 
+#define BRW_SURFACEFORMAT_R8_UINT                        0x143 
+#define BRW_SURFACEFORMAT_A8_UNORM                       0x144 
+#define BRW_SURFACEFORMAT_I8_UNORM                       0x145 
+#define BRW_SURFACEFORMAT_L8_UNORM                       0x146 
+#define BRW_SURFACEFORMAT_P4A4_UNORM                     0x147 
+#define BRW_SURFACEFORMAT_A4P4_UNORM                     0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED                     0x149
+#define BRW_SURFACEFORMAT_R8_USCALED                     0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB                  0x14C
+#define BRW_SURFACEFORMAT_R1_UINT                        0x181 
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL                   0x182 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY                  0x183 
+#define BRW_SURFACEFORMAT_BC1_UNORM                      0x186 
+#define BRW_SURFACEFORMAT_BC2_UNORM                      0x187 
+#define BRW_SURFACEFORMAT_BC3_UNORM                      0x188 
+#define BRW_SURFACEFORMAT_BC4_UNORM                      0x189 
+#define BRW_SURFACEFORMAT_BC5_UNORM                      0x18A 
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB                 0x18B 
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB                 0x18C 
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB                 0x18D 
+#define BRW_SURFACEFORMAT_MONO8                          0x18E 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV                   0x18F 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY                    0x190 
+#define BRW_SURFACEFORMAT_DXT1_RGB                       0x191 
+#define BRW_SURFACEFORMAT_FXT1                           0x192 
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM                   0x193 
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM                   0x194 
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED                 0x195 
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED                 0x196 
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT             0x197 
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT                0x198 
+#define BRW_SURFACEFORMAT_BC4_SNORM                      0x199 
+#define BRW_SURFACEFORMAT_BC5_SNORM                      0x19A 
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM                0x19C 
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM                0x19D 
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED              0x19E 
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED              0x19F
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32  0
+#define BRW_SURFACERETURNFORMAT_S1       1
+
+#define BRW_SURFACE_1D      0
+#define BRW_SURFACE_2D      1
+#define BRW_SURFACE_3D      2
+#define BRW_SURFACE_CUBE    3
+#define BRW_SURFACE_BUFFER  4
+#define BRW_SURFACE_NULL    7
+
+#define BRW_TEXCOORDMODE_WRAP            0
+#define BRW_TEXCOORDMODE_MIRROR          1
+#define BRW_TEXCOORDMODE_CLAMP           2
+#define BRW_TEXCOORDMODE_CUBE            3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER    4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE     5
+
+#define BRW_THREAD_PRIORITY_NORMAL   0
+#define BRW_THREAD_PRIORITY_HIGH     1
+
+#define BRW_TILEWALK_XMAJOR                 0
+#define BRW_TILEWALK_YMAJOR                 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1   0
+#define BRW_ALIGN_16  1
+
+#define BRW_ADDRESS_DIRECT                        0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+
+#define BRW_CHANNEL_X     0
+#define BRW_CHANNEL_Y     1
+#define BRW_CHANNEL_Z     2
+#define BRW_CHANNEL_W     3
+
+#define BRW_COMPRESSION_NONE          0
+#define BRW_COMPRESSION_2NDHALF       1
+#define BRW_COMPRESSION_COMPRESSED    2
+
+#define BRW_CONDITIONAL_NONE  0
+#define BRW_CONDITIONAL_Z     1
+#define BRW_CONDITIONAL_NZ    2
+#define BRW_CONDITIONAL_EQ    1	/* Z */
+#define BRW_CONDITIONAL_NEQ   2	/* NZ */
+#define BRW_CONDITIONAL_G     3
+#define BRW_CONDITIONAL_GE    4
+#define BRW_CONDITIONAL_L     5
+#define BRW_CONDITIONAL_LE    6
+#define BRW_CONDITIONAL_R     7
+#define BRW_CONDITIONAL_O     8
+#define BRW_CONDITIONAL_U     9
+
+#define BRW_DEBUG_NONE        0
+#define BRW_DEBUG_BREAKPOINT  1
+
+#define BRW_DEPENDENCY_NORMAL         0
+#define BRW_DEPENDENCY_NOTCLEARED     1
+#define BRW_DEPENDENCY_NOTCHECKED     2
+#define BRW_DEPENDENCY_DISABLE        3
+
+#define BRW_EXECUTE_1     0
+#define BRW_EXECUTE_2     1
+#define BRW_EXECUTE_4     2
+#define BRW_EXECUTE_8     3
+#define BRW_EXECUTE_16    4
+#define BRW_EXECUTE_32    5
+
+#define BRW_HORIZONTAL_STRIDE_0   0
+#define BRW_HORIZONTAL_STRIDE_1   1
+#define BRW_HORIZONTAL_STRIDE_2   2
+#define BRW_HORIZONTAL_STRIDE_4   3
+
+#define BRW_INSTRUCTION_NORMAL    0
+#define BRW_INSTRUCTION_SATURATE  1
+
+#define BRW_MASK_ENABLE   0
+#define BRW_MASK_DISABLE  1
+
+#define BRW_OPCODE_MOV        1
+#define BRW_OPCODE_SEL        2
+#define BRW_OPCODE_NOT        4
+#define BRW_OPCODE_AND        5
+#define BRW_OPCODE_OR         6
+#define BRW_OPCODE_XOR        7
+#define BRW_OPCODE_SHR        8
+#define BRW_OPCODE_SHL        9
+#define BRW_OPCODE_RSR        10
+#define BRW_OPCODE_RSL        11
+#define BRW_OPCODE_ASR        12
+#define BRW_OPCODE_CMP        16
+#define BRW_OPCODE_CMPN       17
+#define BRW_OPCODE_JMPI       32
+#define BRW_OPCODE_IF         34
+#define BRW_OPCODE_IFF        35
+#define BRW_OPCODE_ELSE       36
+#define BRW_OPCODE_ENDIF      37
+#define BRW_OPCODE_DO         38
+#define BRW_OPCODE_WHILE      39
+#define BRW_OPCODE_BREAK      40
+#define BRW_OPCODE_CONTINUE   41
+#define BRW_OPCODE_HALT       42
+#define BRW_OPCODE_MSAVE      44
+#define BRW_OPCODE_MRESTORE   45
+#define BRW_OPCODE_PUSH       46
+#define BRW_OPCODE_POP        47
+#define BRW_OPCODE_WAIT       48
+#define BRW_OPCODE_SEND       49
+#define BRW_OPCODE_MATH       56
+#define BRW_OPCODE_ADD        64
+#define BRW_OPCODE_MUL        65
+#define BRW_OPCODE_AVG        66
+#define BRW_OPCODE_FRC        67
+#define BRW_OPCODE_RNDU       68
+#define BRW_OPCODE_RNDD       69
+#define BRW_OPCODE_RNDE       70
+#define BRW_OPCODE_RNDZ       71
+#define BRW_OPCODE_MAC        72
+#define BRW_OPCODE_MACH       73
+#define BRW_OPCODE_LZD        74
+#define BRW_OPCODE_SAD2       80
+#define BRW_OPCODE_SADA2      81
+#define BRW_OPCODE_DP4        84
+#define BRW_OPCODE_DPH        85
+#define BRW_OPCODE_DP3        86
+#define BRW_OPCODE_DP2        87
+#define BRW_OPCODE_DPA2       88
+#define BRW_OPCODE_LINE       89
+#define BRW_OPCODE_PLN        90
+#define BRW_OPCODE_NOP        126
+
+#define BRW_PREDICATE_NONE             0
+#define BRW_PREDICATE_NORMAL           1
+#define BRW_PREDICATE_ALIGN1_ANYV             2
+#define BRW_PREDICATE_ALIGN1_ALLV             3
+#define BRW_PREDICATE_ALIGN1_ANY2H            4
+#define BRW_PREDICATE_ALIGN1_ALL2H            5
+#define BRW_PREDICATE_ALIGN1_ANY4H            6
+#define BRW_PREDICATE_ALIGN1_ALL4H            7
+#define BRW_PREDICATE_ALIGN1_ANY8H            8
+#define BRW_PREDICATE_ALIGN1_ALL8H            9
+#define BRW_PREDICATE_ALIGN1_ANY16H           10
+#define BRW_PREDICATE_ALIGN1_ALL16H           11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
+#define BRW_PREDICATE_ALIGN16_ANY4H           6
+#define BRW_PREDICATE_ALIGN16_ALL4H           7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE    0
+#define BRW_GENERAL_REGISTER_FILE         1
+#define BRW_MESSAGE_REGISTER_FILE         2
+#define BRW_IMMEDIATE_VALUE               3
+
+#define BRW_REGISTER_TYPE_UD  0
+#define BRW_REGISTER_TYPE_D   1
+#define BRW_REGISTER_TYPE_UW  2
+#define BRW_REGISTER_TYPE_W   3
+#define BRW_REGISTER_TYPE_UB  4
+#define BRW_REGISTER_TYPE_B   5
+#define BRW_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF  6
+#define BRW_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F   7
+
+#define BRW_ARF_NULL                  0x00
+#define BRW_ARF_ADDRESS               0x10
+#define BRW_ARF_ACCUMULATOR           0x20   
+#define BRW_ARF_FLAG                  0x30
+#define BRW_ARF_MASK                  0x40
+#define BRW_ARF_MASK_STACK            0x50
+#define BRW_ARF_MASK_STACK_DEPTH      0x60
+#define BRW_ARF_STATE                 0x70
+#define BRW_ARF_CONTROL               0x80
+#define BRW_ARF_NOTIFICATION_COUNT    0x90
+#define BRW_ARF_IP                    0xA0
+
+#define BRW_AMASK   0
+#define BRW_IMASK   1
+#define BRW_LMASK   2
+#define BRW_CMASK   3
+
+
+
+#define BRW_THREAD_NORMAL     0
+#define BRW_THREAD_ATOMIC     1
+#define BRW_THREAD_SWITCH     2
+
+#define BRW_VERTICAL_STRIDE_0                 0
+#define BRW_VERTICAL_STRIDE_1                 1
+#define BRW_VERTICAL_STRIDE_2                 2
+#define BRW_VERTICAL_STRIDE_4                 3
+#define BRW_VERTICAL_STRIDE_8                 4
+#define BRW_VERTICAL_STRIDE_16                5
+#define BRW_VERTICAL_STRIDE_32                6
+#define BRW_VERTICAL_STRIDE_64                7
+#define BRW_VERTICAL_STRIDE_128               8
+#define BRW_VERTICAL_STRIDE_256               9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+
+#define BRW_WIDTH_1       0
+#define BRW_WIDTH_2       1
+#define BRW_WIDTH_4       2
+#define BRW_WIDTH_8       3
+#define BRW_WIDTH_16      4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
+
+#define BRW_POLYGON_FACING_FRONT      0
+#define BRW_POLYGON_FACING_BACK       1
+
+#define BRW_MESSAGE_TARGET_NULL               0
+#define BRW_MESSAGE_TARGET_MATH               1
+#define BRW_MESSAGE_TARGET_SAMPLER            2
+#define BRW_MESSAGE_TARGET_GATEWAY            3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ      4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5
+#define BRW_MESSAGE_TARGET_URB                6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
+
+#define BRW_SAMPLER_MESSAGE_SAMPLE_GEN5            0
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5       1
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5        2
+#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5    3
+
+/* for GEN5 only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ          2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE                2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
+
+#define BRW_MATH_FUNCTION_INV                              1
+#define BRW_MATH_FUNCTION_LOG                              2
+#define BRW_MATH_FUNCTION_EXP                              3
+#define BRW_MATH_FUNCTION_SQRT                             4
+#define BRW_MATH_FUNCTION_RSQ                              5
+#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
+#define BRW_MATH_FUNCTION_POW                              10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
+
+#define BRW_MATH_INTEGER_UNSIGNED     0
+#define BRW_MATH_INTEGER_SIGNED       1
+
+#define BRW_MATH_PRECISION_FULL        0
+#define BRW_MATH_PRECISION_PARTIAL     1
+
+#define BRW_MATH_SATURATE_NONE         0
+#define BRW_MATH_SATURATE_SATURATE     1
+
+#define BRW_MATH_DATA_VECTOR  0
+#define BRW_MATH_DATA_SCALAR  1
+
+#define BRW_URB_OPCODE_WRITE  0
+
+#define BRW_URB_SWIZZLE_NONE          0
+#define BRW_URB_SWIZZLE_INTERLEAVE    1
+#define BRW_URB_SWIZZLE_TRANSPOSE     2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K     0
+#define BRW_SCRATCH_SPACE_SIZE_2K     1
+#define BRW_SCRATCH_SPACE_SIZE_4K     2
+#define BRW_SCRATCH_SPACE_SIZE_8K     3
+#define BRW_SCRATCH_SPACE_SIZE_16K    4
+#define BRW_SCRATCH_SPACE_SIZE_32K    5
+#define BRW_SCRATCH_SPACE_SIZE_64K    6
+#define BRW_SCRATCH_SPACE_SIZE_128K   7
+#define BRW_SCRATCH_SPACE_SIZE_256K   8
+#define BRW_SCRATCH_SPACE_SIZE_512K   9
+#define BRW_SCRATCH_SPACE_SIZE_1M     10
+#define BRW_SCRATCH_SPACE_SIZE_2M     11
+
+
+
+
+#define CMD_URB_FENCE                 0x6000
+#define CMD_CS_URB_STATE              0x6001
+#define CMD_CONST_BUFFER              0x6002
+
+#define CMD_STATE_BASE_ADDRESS        0x6101
+#define CMD_STATE_INSN_POINTER        0x6102
+#define CMD_PIPELINE_SELECT_965       0x6104
+#define CMD_PIPELINE_SELECT_GM45      0x6904
+
+#define CMD_PIPELINED_STATE_POINTERS  0x7800
+#define CMD_BINDING_TABLE_PTRS        0x7801
+# define GEN6_BINDING_TABLE_MODIFY_VS	(1 << 8)
+# define GEN6_BINDING_TABLE_MODIFY_GS	(1 << 9)
+# define GEN6_BINDING_TABLE_MODIFY_PS	(1 << 12)
+
+#define CMD_3D_SAMPLER_STATE_POINTERS			0x7802 /* SNB+ */
+# define PS_SAMPLER_STATE_CHANGE				(1 << 12)
+# define GS_SAMPLER_STATE_CHANGE				(1 << 9)
+# define VS_SAMPLER_STATE_CHANGE				(1 << 8)
+/* DW1: VS */
+/* DW2: GS */
+/* DW3: PS */
+
+#define CMD_VERTEX_BUFFER             0x7808
+# define BRW_VB0_INDEX_SHIFT		27
+# define GEN6_VB0_INDEX_SHIFT		26
+# define BRW_VB0_ACCESS_VERTEXDATA	(0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA	(1 << 26)
+# define GEN6_VB0_ACCESS_VERTEXDATA	(0 << 20)
+# define GEN6_VB0_ACCESS_INSTANCEDATA	(1 << 20)
+# define BRW_VB0_PITCH_SHIFT		0
+
+#define CMD_VERTEX_ELEMENT            0x7809
+# define BRW_VE0_INDEX_SHIFT		27
+# define GEN6_VE0_INDEX_SHIFT		26
+# define BRW_VE0_FORMAT_SHIFT		16
+# define BRW_VE0_VALID			(1 << 26)
+# define GEN6_VE0_VALID			(1 << 25)
+# define BRW_VE0_SRC_OFFSET_SHIFT	0
+# define BRW_VE1_COMPONENT_NOSTORE	0
+# define BRW_VE1_COMPONENT_STORE_SRC	1
+# define BRW_VE1_COMPONENT_STORE_0	2
+# define BRW_VE1_COMPONENT_STORE_1_FLT	3
+# define BRW_VE1_COMPONENT_STORE_1_INT	4
+# define BRW_VE1_COMPONENT_STORE_VID	5
+# define BRW_VE1_COMPONENT_STORE_IID	6
+# define BRW_VE1_COMPONENT_STORE_PID	7
+# define BRW_VE1_COMPONENT_0_SHIFT	28
+# define BRW_VE1_COMPONENT_1_SHIFT	24
+# define BRW_VE1_COMPONENT_2_SHIFT	20
+# define BRW_VE1_COMPONENT_3_SHIFT	16
+# define BRW_VE1_DST_OFFSET_SHIFT	0
+
+#define CMD_INDEX_BUFFER              0x780a
+#define CMD_VF_STATISTICS_965          0x780b
+#define CMD_VF_STATISTICS_GM45        0x680b
+#define CMD_3D_CC_STATE_POINTERS      0x780e /* GEN6+ */
+
+#define CMD_URB					0x7805 /* GEN6+ */
+# define GEN6_URB_VS_SIZE_SHIFT				16
+# define GEN6_URB_VS_ENTRIES_SHIFT			0
+# define GEN6_URB_GS_ENTRIES_SHIFT			8
+# define GEN6_URB_GS_SIZE_SHIFT				0
+
+#define CMD_VIEWPORT_STATE_POINTERS			0x780d /* GEN6+ */
+# define GEN6_CC_VIEWPORT_MODIFY			(1 << 12)
+# define GEN6_SF_VIEWPORT_MODIFY			(1 << 11)
+# define GEN6_CLIP_VIEWPORT_MODIFY			(1 << 10)
+
+#define CMD_3D_SCISSOR_STATE_POINTERS		0x780f /* GEN6+ */
+
+#define CMD_3D_VS_STATE		      0x7810 /* GEN6+ */
+/* DW2 */
+# define GEN6_VS_SPF_MODE				(1 << 31)
+# define GEN6_VS_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_VS_SAMPLER_COUNT_SHIFT			27
+# define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_VS_DISPATCH_START_GRF_SHIFT		20
+# define GEN6_VS_URB_READ_LENGTH_SHIFT			11
+# define GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW5 */
+# define GEN6_VS_MAX_THREADS_SHIFT			25
+# define GEN6_VS_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_VS_CACHE_DISABLE				(1 << 1)
+# define GEN6_VS_ENABLE					(1 << 0)
+
+#define CMD_3D_GS_STATE		      0x7811 /* GEN6+ */
+/* DW2 */
+# define GEN6_GS_SPF_MODE				(1 << 31)
+# define GEN6_GS_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_GS_SAMPLER_COUNT_SHIFT			27
+# define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_GS_URB_READ_LENGTH_SHIFT			11
+# define GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT		4
+# define GEN6_GS_DISPATCH_START_GRF_SHIFT		0
+/* DW5 */
+# define GEN6_GS_MAX_THREADS_SHIFT			25
+# define GEN6_GS_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_GS_SO_STATISTICS_ENABLE			(1 << 9)
+# define GEN6_GS_RENDERING_ENABLE			(1 << 8)
+/* DW6 */
+# define GEN6_GS_ENABLE					(1 << 15)
+
+#define CMD_3D_CLIP_STATE		      0x7812 /* GEN6+ */
+/* DW1 */
+# define GEN6_CLIP_STATISTICS_ENABLE			(1 << 10)
+/* DW2 */
+# define GEN6_CLIP_ENABLE				(1 << 31)
+# define GEN6_CLIP_API_OGL				(0 << 30)
+# define GEN6_CLIP_API_D3D				(1 << 30)
+# define GEN6_CLIP_XY_TEST				(1 << 28)
+# define GEN6_CLIP_Z_TEST				(1 << 27)
+# define GEN6_CLIP_GB_TEST				(1 << 26)
+# define GEN6_CLIP_MODE_NORMAL				(0 << 13)
+# define GEN6_CLIP_MODE_REJECT_ALL			(3 << 13)
+# define GEN6_CLIP_MODE_ACCEPT_ALL			(4 << 13)
+# define GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE		(1 << 9)
+# define GEN6_CLIP_BARYCENTRIC_ENABLE			(1 << 8)
+# define GEN6_CLIP_TRI_PROVOKE_SHIFT			4
+# define GEN6_CLIP_LINE_PROVOKE_SHIFT			2
+# define GEN6_CLIP_TRIFAN_PROVOKE_SHIFT			0
+/* DW3 */
+# define GEN6_CLIP_MIN_POINT_WIDTH_SHIFT		17
+# define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT		6
+
+#define CMD_3D_SF_STATE				0x7813 /* GEN6+ */
+/* DW1 */
+# define GEN6_SF_NUM_OUTPUTS_SHIFT			22
+# define GEN6_SF_SWIZZLE_ENABLE				(1 << 21)
+# define GEN6_SF_POINT_SPRITE_LOWERLEFT			(1 << 20)
+# define GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT		11
+# define GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT		4
+/* DW2 */
+# define GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS		(1 << 11)
+# define GEN6_SF_STATISTICS_ENABLE			(1 << 10)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID		(1 << 9)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME		(1 << 8)
+# define GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT		(1 << 7)
+# define GEN6_SF_FRONT_SOLID				(0 << 5)
+# define GEN6_SF_FRONT_WIREFRAME			(1 << 5)
+# define GEN6_SF_FRONT_POINT				(2 << 5)
+# define GEN6_SF_BACK_SOLID				(0 << 3)
+# define GEN6_SF_BACK_WIREFRAME				(1 << 3)
+# define GEN6_SF_BACK_POINT				(2 << 3)
+# define GEN6_SF_VIEWPORT_TRANSFORM_ENABLE		(1 << 1)
+# define GEN6_SF_WINDING_CCW				(1 << 0)
+/* DW3 */
+# define GEN6_SF_LINE_AA_ENABLE				(1 << 31)
+# define GEN6_SF_CULL_BOTH				(0 << 29)
+# define GEN6_SF_CULL_NONE				(1 << 29)
+# define GEN6_SF_CULL_FRONT				(2 << 29)
+# define GEN6_SF_CULL_BACK				(3 << 29)
+# define GEN6_SF_LINE_WIDTH_SHIFT			18 /* U3.7 */
+# define GEN6_SF_LINE_END_CAP_WIDTH_0_5			(0 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_1_0			(1 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_2_0			(2 << 16)
+# define GEN6_SF_LINE_END_CAP_WIDTH_4_0			(3 << 16)
+# define GEN6_SF_SCISSOR_ENABLE				(1 << 11)
+# define GEN6_SF_MSRAST_OFF_PIXEL			(0 << 8)
+# define GEN6_SF_MSRAST_OFF_PATTERN			(1 << 8)
+# define GEN6_SF_MSRAST_ON_PIXEL			(2 << 8)
+# define GEN6_SF_MSRAST_ON_PATTERN			(3 << 8)
+/* DW4 */
+# define GEN6_SF_TRI_PROVOKE_SHIFT			29
+# define GEN6_SF_LINE_PROVOKE_SHIFT			27
+# define GEN6_SF_TRIFAN_PROVOKE_SHIFT			25
+# define GEN6_SF_LINE_AA_MODE_MANHATTAN			(0 << 14)
+# define GEN6_SF_LINE_AA_MODE_TRUE			(1 << 14)
+# define GEN6_SF_VERTEX_SUBPIXEL_8BITS			(0 << 12)
+# define GEN6_SF_VERTEX_SUBPIXEL_4BITS			(1 << 12)
+# define GEN6_SF_USE_STATE_POINT_WIDTH			(1 << 11)
+# define GEN6_SF_POINT_WIDTH_SHIFT			0 /* U8.3 */
+/* DW5: depth offset constant */
+/* DW6: depth offset scale */
+/* DW7: depth offset clamp */
+/* DW8 */
+# define ATTRIBUTE_1_OVERRIDE_W				(1 << 31)
+# define ATTRIBUTE_1_OVERRIDE_Z				(1 << 30)
+# define ATTRIBUTE_1_OVERRIDE_Y				(1 << 29)
+# define ATTRIBUTE_1_OVERRIDE_X				(1 << 28)
+# define ATTRIBUTE_1_CONST_SOURCE_SHIFT			25
+# define ATTRIBUTE_1_SWIZZLE_SHIFT			22
+# define ATTRIBUTE_1_SOURCE_SHIFT			16
+# define ATTRIBUTE_0_OVERRIDE_W				(1 << 15)
+# define ATTRIBUTE_0_OVERRIDE_Z				(1 << 14)
+# define ATTRIBUTE_0_OVERRIDE_Y				(1 << 13)
+# define ATTRIBUTE_0_OVERRIDE_X				(1 << 12)
+# define ATTRIBUTE_0_CONST_SOURCE_SHIFT			9
+# define ATTRIBUTE_0_SWIZZLE_SHIFT			6
+# define ATTRIBUTE_0_SOURCE_SHIFT			0
+/* DW16: Point sprite texture coordinate enables */
+/* DW17: Constant interpolation enables */
+/* DW18: attr 0-7 wrap shortest enables */
+/* DW19: attr 8-16 wrap shortest enables */
+
+#define CMD_3D_WM_STATE		      0x7814 /* GEN6+ */
+/* DW1: kernel pointer */
+/* DW2 */
+# define GEN6_WM_SPF_MODE				(1 << 31)
+# define GEN6_WM_VECTOR_MASK_ENABLE			(1 << 30)
+# define GEN6_WM_SAMPLER_COUNT_SHIFT			27
+# define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW3: scratch space */
+/* DW4 */
+# define GEN6_WM_STATISTICS_ENABLE			(1 << 31)
+# define GEN6_WM_DEPTH_CLEAR				(1 << 30)
+# define GEN6_WM_DEPTH_RESOLVE				(1 << 28)
+# define GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE		(1 << 27)
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_0		16
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_1		8
+# define GEN6_WM_DISPATCH_START_GRF_SHIFT_2		0
+/* DW5 */
+# define GEN6_WM_MAX_THREADS_SHIFT			25
+# define GEN6_WM_KILL_ENABLE				(1 << 22)
+# define GEN6_WM_COMPUTED_DEPTH				(1 << 21)
+# define GEN6_WM_USES_SOURCE_DEPTH			(1 << 20)
+# define GEN6_WM_DISPATCH_ENABLE			(1 << 19)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5		(0 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0		(1 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_2_0		(2 << 16)
+# define GEN6_WM_LINE_END_CAP_AA_WIDTH_4_0		(3 << 16)
+# define GEN6_WM_LINE_AA_WIDTH_0_5			(0 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_1_0			(1 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_2_0			(2 << 14)
+# define GEN6_WM_LINE_AA_WIDTH_4_0			(3 << 14)
+# define GEN6_WM_POLYGON_STIPPLE_ENABLE			(1 << 13)
+# define GEN6_WM_LINE_STIPPLE_ENABLE			(1 << 11)
+# define GEN6_WM_OMASK_TO_RENDER_TARGET			(1 << 9)
+# define GEN6_WM_USES_SOURCE_W				(1 << 8)
+# define GEN6_WM_DUAL_SOURCE_BLEND_ENABLE		(1 << 7)
+# define GEN6_WM_32_DISPATCH_ENABLE			(1 << 2)
+# define GEN6_WM_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN6_WM_8_DISPATCH_ENABLE			(1 << 0)
+/* DW6 */
+# define GEN6_WM_NUM_SF_OUTPUTS_SHIFT			20
+# define GEN6_WM_POSOFFSET_NONE				(0 << 18)
+# define GEN6_WM_POSOFFSET_CENTROID			(2 << 18)
+# define GEN6_WM_POSOFFSET_SAMPLE			(3 << 18)
+# define GEN6_WM_POSITION_ZW_PIXEL			(0 << 16)
+# define GEN6_WM_POSITION_ZW_CENTROID			(2 << 16)
+# define GEN6_WM_POSITION_ZW_SAMPLE			(3 << 16)
+# define GEN6_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
+# define GEN6_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
+# define GEN6_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
+# define GEN6_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
+# define GEN6_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
+# define GEN6_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+# define GEN6_WM_POINT_RASTRULE_UPPER_RIGHT		(1 << 9)
+# define GEN6_WM_MSRAST_OFF_PIXEL			(0 << 1)
+# define GEN6_WM_MSRAST_OFF_PATTERN			(1 << 1)
+# define GEN6_WM_MSRAST_ON_PIXEL			(2 << 1)
+# define GEN6_WM_MSRAST_ON_PATTERN			(3 << 1)
+# define GEN6_WM_MSDISPMODE_PERPIXEL			(1 << 0)
+/* DW7: kernel 1 pointer */
+/* DW8: kernel 2 pointer */
+
+#define CMD_3D_CONSTANT_VS_STATE	      0x7815 /* GEN6+ */
+#define CMD_3D_CONSTANT_GS_STATE	      0x7816 /* GEN6+ */
+#define CMD_3D_CONSTANT_PS_STATE	      0x7817 /* GEN6+ */
+# define GEN6_CONSTANT_BUFFER_3_ENABLE			(1 << 15)
+# define GEN6_CONSTANT_BUFFER_2_ENABLE			(1 << 14)
+# define GEN6_CONSTANT_BUFFER_1_ENABLE			(1 << 13)
+# define GEN6_CONSTANT_BUFFER_0_ENABLE			(1 << 12)
+
+#define CMD_3D_SAMPLE_MASK			0x7818 /* GEN6+ */
+
+#define CMD_DRAW_RECT                 0x7900
+#define CMD_BLEND_CONSTANT_COLOR      0x7901
+#define CMD_CHROMA_KEY                0x7904
+#define CMD_DEPTH_BUFFER              0x7905
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+#define CMD_POLY_STIPPLE_PATTERN      0x7907
+#define CMD_LINE_STIPPLE_PATTERN      0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define CMD_AA_LINE_PARAMETERS        0x790a
+
+#define CMD_GS_SVB_INDEX			0x790b /* CTG+ */
+/* DW1 */
+# define SVB_INDEX_SHIFT				29
+# define SVB_LOAD_INTERNAL_VERTEX_COUNT			(1 << 0) /* SNB+ */
+/* DW2: SVB index */
+/* DW3: SVB maximum index */
+
+#define CMD_3D_MULTISAMPLE			0x790d /* SNB+ */
+/* DW1 */
+# define MS_PIXEL_LOCATION_CENTER			(0 << 4)
+# define MS_PIXEL_LOCATION_UPPER_LEFT			(1 << 4)
+# define MS_NUMSAMPLES_1				(0 << 1)
+# define MS_NUMSAMPLES_4				(2 << 1)
+# define MS_NUMSAMPLES_8				(3 << 1)
+
+#define CMD_3D_CLEAR_PARAMS			0x7910 /* ILK+ */
+# define DEPTH_CLEAR_VALID				(1 << 15)
+/* DW1: depth clear value */
+
+#define CMD_PIPE_CONTROL              0x7a00
+
+#define CMD_3D_PRIM                   0x7b00
+
+#define CMD_MI_FLUSH                  0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END    0x1
+#define R02_PRIM_START  0x2
+
+#include "intel_chipset.h"
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
new file mode 100644
index 0000000000..ff12daf497
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -0,0 +1,932 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+struct {
+    char    *name;
+    int	    nsrc;
+    int	    ndst;
+} opcode[128] = {
+    [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+    [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+    [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+    [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+    [BRW_CONDITIONAL_NONE] = "",
+    [BRW_CONDITIONAL_Z] = ".e",
+    [BRW_CONDITIONAL_NZ] = ".ne",
+    [BRW_CONDITIONAL_G] = ".g",
+    [BRW_CONDITIONAL_GE] = ".ge",
+    [BRW_CONDITIONAL_L] = ".l",
+    [BRW_CONDITIONAL_LE] = ".le",
+    [BRW_CONDITIONAL_R] = ".r",
+    [BRW_CONDITIONAL_O] = ".o",
+    [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+    [0] = "",
+    [1] = "-",
+};
+
+char *_abs[2] = {
+    [0] = "",
+    [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4",
+    [4] = "8",
+    [5] = "16",
+    [6] = "32",
+    [15] = "VxH",
+};
+
+char *width[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+};
+
+char *horiz_stride[4] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4"
+};
+
+char *chan_sel[4] = {
+    [0] = "x",
+    [1] = "y",
+    [2] = "z",
+    [3] = "w",
+};
+
+char *dest_condmod[16] = {
+};
+
+char *debug_ctrl[2] = {
+    [0] = "",
+    [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+    [0] = "",
+    [1] = ".sat"
+};
+
+char *exec_size[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+    [5] = "32"
+};
+
+char *pred_inv[2] = {
+    [0] = "+",
+    [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+    [1] = "",
+    [2] = ".x",
+    [3] = ".y",
+    [4] = ".z",
+    [5] = ".w",
+    [6] = ".any4h",
+    [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+    [1] = "",
+    [2] = ".anyv",
+    [3] = ".allv",
+    [4] = ".any2h",
+    [5] = ".all2h",
+    [6] = ".any4h",
+    [7] = ".all4h",
+    [8] = ".any8h",
+    [9] = ".all8h",
+    [10] = ".any16h",
+    [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+    [0] = "",
+    [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+    [0] = "",
+    [1] = "sechalf",
+    [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+    [0] = "",
+    [1] = "NoDDClr",
+    [2] = "NoDDChk",
+    [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+    [0] = "",
+    [1] = "nomask",
+};
+
+char *access_mode[2] = {
+    [0] = "align1",
+    [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+    [0] = "UD",
+    [1] = "D",
+    [2] = "UW",
+    [3] = "W",
+    [4] = "UB",
+    [5] = "B",
+    [7] = "F"
+};
+
+char *imm_encoding[8] = {
+    [0] = "UD",
+    [1] = "D",
+    [2] = "UW",
+    [3] = "W",
+    [5] = "VF",
+    [6] = "V",
+    [7] = "F"
+};
+
+char *reg_file[4] = {
+    [0] = "A",
+    [1] = "g",
+    [2] = "m",
+    [3] = "imm",
+};
+
+char *writemask[16] = {
+    [0x0] = ".",
+    [0x1] = ".x",
+    [0x2] = ".y",
+    [0x3] = ".xy",
+    [0x4] = ".z",
+    [0x5] = ".xz",
+    [0x6] = ".yz",
+    [0x7] = ".xyz",
+    [0x8] = ".w",
+    [0x9] = ".xw",
+    [0xa] = ".yw",
+    [0xb] = ".xyw",
+    [0xc] = ".zw",
+    [0xd] = ".xzw",
+    [0xe] = ".yzw",
+    [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+    [0] = "",
+    [1] = "EOT"
+};
+
+char *target_function[16] = {
+    [BRW_MESSAGE_TARGET_NULL] = "null",
+    [BRW_MESSAGE_TARGET_MATH] = "math",
+    [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+    [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+    [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+    [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+    [BRW_MESSAGE_TARGET_URB] = "urb",
+    [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+    [BRW_MATH_FUNCTION_INV] = "inv",
+    [BRW_MATH_FUNCTION_LOG] = "log",
+    [BRW_MATH_FUNCTION_EXP] = "exp",
+    [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+    [BRW_MATH_FUNCTION_RSQ] = "rsq",
+    [BRW_MATH_FUNCTION_SIN] = "sin",
+    [BRW_MATH_FUNCTION_COS] = "cos",
+    [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+    [BRW_MATH_FUNCTION_TAN] = "tan",
+    [BRW_MATH_FUNCTION_POW] = "pow",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+    [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+    [0] = "",
+    [1] = "sat"
+};
+
+char *math_signed[2] = {
+    [0] = "",
+    [1] = "signed"
+};
+
+char *math_scalar[2] = {
+    [0] = "",
+    [1] = "scalar"
+};
+
+char *math_precision[2] = {
+    [0] = "",
+    [1] = "partial_precision"
+};
+
+char *urb_opcode[2] = {
+    [0] = "urb_write",
+    [1] = "ff_sync",
+};
+
+char *urb_swizzle[4] = {
+    [BRW_URB_SWIZZLE_NONE] = "",
+    [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+    [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+    [0] = "",
+    [1] = "allocate"
+};
+
+char *urb_used[2] = {
+    [0] = "",
+    [1] = "used"
+};
+
+char *urb_complete[2] = {
+    [0] = "",
+    [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+    [0] = "F",
+    [2] = "UD",
+    [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+    fputs (string, file);
+    column += strlen (string);
+    return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+    char    buf[1024];
+    va_list	args;
+    va_start (args, format);
+
+    vsnprintf (buf, sizeof (buf) - 1, format, args);
+    va_end (args);
+    string (f, buf);
+    return 0;
+}
+
+static int newline (FILE *f)
+{
+    putc ('\n', f);
+    column = 0;
+    return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+    do
+	string (f, " ");
+    while (column < c);
+    return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+    if (!ctrl[id]) {
+	fprintf (file, "*** invalid %s value %d ",
+		 name, id);
+	return 1;
+    }
+    if (ctrl[id][0])
+    {
+	if (space && *space)
+	    string (file, " ");
+	string (file, ctrl[id]);
+	if (space)
+	    *space = 1;
+    }
+    return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+    if (!opcode[id].name) {
+	format (file, "*** invalid opcode value %d ", id);
+	return 1;
+    }
+    string (file, opcode[id].name);
+    return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+    int	err = 0;
+    if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+	switch (_reg_nr & 0xf0) {
+	case BRW_ARF_NULL:
+	    string (file, "null");
+	    return -1;
+	case BRW_ARF_ADDRESS:
+	    format (file, "a%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_ACCUMULATOR:
+	    format (file, "acc%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_MASK:
+	    format (file, "mask%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_MASK_STACK:
+	    format (file, "msd%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_STATE:
+	    format (file, "sr%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_CONTROL:
+	    format (file, "cr%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_NOTIFICATION_COUNT:
+	    format (file, "n%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_IP:
+	    string (file, "ip");
+	    return -1;
+	    break;
+	default:
+	    format (file, "ARF%d", _reg_nr);
+	    break;
+	}
+    } else {
+	err  |= control (file, "src reg file", reg_file, _reg_file, NULL);
+	format (file, "%d", _reg_nr);
+    }
+    return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+    int	err = 0;
+
+    if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+	if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+	    if (err == -1)
+		return 0;
+	    if (inst->bits1.da1.dest_subreg_nr)
+		format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+	    format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+	}
+	else
+	{
+	    string (file, "g[a0");
+	    if (inst->bits1.ia1.dest_subreg_nr)
+		format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+	    if (inst->bits1.ia1.dest_indirect_offset)
+		format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+	    string (file, "]");
+	    format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+	}
+    }
+    else
+    {
+	if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+	    if (err == -1)
+		return 0;
+	    if (inst->bits1.da16.dest_subreg_nr)
+		format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+	    string (file, "<1>");
+	    err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+	}
+	else
+	{
+	    err = 1;
+	    string (file, "Indirect align16 address mode not supported");
+	}
+    }
+
+    return 0;
+}
+
+static int src_align1_region (FILE *file,
+			      GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+    int err = 0;
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",");
+    err |= control (file, "width", width, _width, NULL);
+    string (file, ",");
+    err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+    string (file, ">");
+    return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+		    GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+		    GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    err |= reg (file, _reg_file, reg_num);
+    if (err == -1)
+	return 0;
+    if (sub_reg_num)
+	format (file, ".%d", sub_reg_num);
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+
+static int src_ia1 (FILE *file,
+		    GLuint type,
+		    GLuint _reg_file,
+		    GLint _addr_imm,
+		    GLuint _addr_subreg_nr,
+		    GLuint _negate,
+		    GLuint __abs,
+		    GLuint _addr_mode,
+		    GLuint _horiz_stride,
+		    GLuint _width,
+		    GLuint _vert_stride)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    string (file, "g[a0");
+    if (_addr_subreg_nr)
+	format (file, ".%d", _addr_subreg_nr);
+    if (_addr_imm)
+	format (file, " %d", _addr_imm);
+    string (file, "]");
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+
+static int src_da16 (FILE *file,
+		     GLuint _reg_type,
+		     GLuint _reg_file,
+		     GLuint _vert_stride,
+		     GLuint _reg_nr,
+		     GLuint _subreg_nr,
+		     GLuint __abs,
+		     GLuint _negate,
+		     GLuint swz_x,
+		     GLuint swz_y,
+		     GLuint swz_z,
+		     GLuint swz_w)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    err |= reg (file, _reg_file, _reg_nr);
+    if (err == -1)
+	return 0;
+    if (_subreg_nr)
+	format (file, ".%d", _subreg_nr);
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all	 - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+	swz_y == BRW_CHANNEL_Y &&
+	swz_z == BRW_CHANNEL_Z &&
+	swz_w == BRW_CHANNEL_W)
+    {
+	;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+	err |= control (file, "channel select", chan_sel, swz_y, NULL);
+	err |= control (file, "channel select", chan_sel, swz_z, NULL);
+	err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+
+
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+    switch (type) {
+    case BRW_REGISTER_TYPE_UD:
+	format (file, "0x%08xUD", inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_D:
+	format (file, "%dD", inst->bits3.d);
+	break;
+    case BRW_REGISTER_TYPE_UW:
+	format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_W:
+	format (file, "%dW", (int16_t) inst->bits3.d);
+	break;
+    case BRW_REGISTER_TYPE_UB:
+	format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_VF:
+	format (file, "Vector Float");
+	break;
+    case BRW_REGISTER_TYPE_V:
+	format (file, "0x%08xV", inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_F:
+	format (file, "%-gF", inst->bits3.f);
+    }
+    return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+	return imm (file, inst->bits1.da1.src0_reg_type,
+		    inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+	if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da1 (file,
+			    inst->bits1.da1.src0_reg_type,
+			    inst->bits1.da1.src0_reg_file,
+			    inst->bits2.da1.src0_vert_stride,
+			    inst->bits2.da1.src0_width,
+			    inst->bits2.da1.src0_horiz_stride,
+			    inst->bits2.da1.src0_reg_nr,
+			    inst->bits2.da1.src0_subreg_nr,
+			    inst->bits2.da1.src0_abs,
+			    inst->bits2.da1.src0_negate);
+	}
+	else
+	{
+	    return src_ia1 (file,
+			    inst->bits1.ia1.src0_reg_type,
+			    inst->bits1.ia1.src0_reg_file,
+			    inst->bits2.ia1.src0_indirect_offset,
+			    inst->bits2.ia1.src0_subreg_nr,
+			    inst->bits2.ia1.src0_negate,
+			    inst->bits2.ia1.src0_abs,
+			    inst->bits2.ia1.src0_address_mode,
+			    inst->bits2.ia1.src0_horiz_stride,
+			    inst->bits2.ia1.src0_width,
+			    inst->bits2.ia1.src0_vert_stride);
+	}
+    }
+    else
+    {
+	if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da16 (file,
+			     inst->bits1.da16.src0_reg_type,
+			     inst->bits1.da16.src0_reg_file,
+			     inst->bits2.da16.src0_vert_stride,
+			     inst->bits2.da16.src0_reg_nr,
+			     inst->bits2.da16.src0_subreg_nr,
+			     inst->bits2.da16.src0_abs,
+			     inst->bits2.da16.src0_negate,
+			     inst->bits2.da16.src0_swz_x,
+			     inst->bits2.da16.src0_swz_y,
+			     inst->bits2.da16.src0_swz_z,
+			     inst->bits2.da16.src0_swz_w);
+	}
+	else
+	{
+	    string (file, "Indirect align16 address mode not supported");
+	    return 1;
+	}
+    }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+	return imm (file, inst->bits1.da1.src1_reg_type,
+		    inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+	if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da1 (file,
+			    inst->bits1.da1.src1_reg_type,
+			    inst->bits1.da1.src1_reg_file,
+			    inst->bits3.da1.src1_vert_stride,
+			    inst->bits3.da1.src1_width,
+			    inst->bits3.da1.src1_horiz_stride,
+			    inst->bits3.da1.src1_reg_nr,
+			    inst->bits3.da1.src1_subreg_nr,
+			    inst->bits3.da1.src1_abs,
+			    inst->bits3.da1.src1_negate);
+	}
+	else
+	{
+	    return src_ia1 (file,
+			    inst->bits1.ia1.src1_reg_type,
+			    inst->bits1.ia1.src1_reg_file,
+			    inst->bits3.ia1.src1_indirect_offset,
+			    inst->bits3.ia1.src1_subreg_nr,
+			    inst->bits3.ia1.src1_negate,
+			    inst->bits3.ia1.src1_abs,
+			    inst->bits3.ia1.src1_address_mode,
+			    inst->bits3.ia1.src1_horiz_stride,
+			    inst->bits3.ia1.src1_width,
+			    inst->bits3.ia1.src1_vert_stride);
+	}
+    }
+    else
+    {
+	if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da16 (file,
+			     inst->bits1.da16.src1_reg_type,
+			     inst->bits1.da16.src1_reg_file,
+			     inst->bits3.da16.src1_vert_stride,
+			     inst->bits3.da16.src1_reg_nr,
+			     inst->bits3.da16.src1_subreg_nr,
+			     inst->bits3.da16.src1_abs,
+			     inst->bits3.da16.src1_negate,
+			     inst->bits3.da16.src1_swz_x,
+			     inst->bits3.da16.src1_swz_y,
+			     inst->bits3.da16.src1_swz_z,
+			     inst->bits3.da16.src1_swz_w);
+	}
+	else
+	{
+	    string (file, "Indirect align16 address mode not supported");
+	    return 1;
+	}
+    }
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
+{
+    int	err = 0;
+    int space = 0;
+
+    if (inst->header.predicate_control) {
+	string (file, "(");
+	err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+	string (file, "f0");
+	if (inst->bits2.da1.flag_reg_nr)
+	    format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+	if (inst->header.access_mode == BRW_ALIGN_1)
+	    err |= control (file, "predicate control align1", pred_ctrl_align1,
+			    inst->header.predicate_control, NULL);
+	else
+	    err |= control (file, "predicate control align16", pred_ctrl_align16,
+			    inst->header.predicate_control, NULL);
+	string (file, ") ");
+    }
+
+    err |= print_opcode (file, inst->header.opcode);
+    err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+    err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+    if (inst->header.opcode == BRW_OPCODE_MATH) {
+	string (file, " ");
+	err |= control (file, "function", math_function,
+			inst->header.destreg__conditionalmod, NULL);
+    } else if (inst->header.opcode != BRW_OPCODE_SEND)
+	err |= control (file, "conditional modifier", conditional_modifier,
+			inst->header.destreg__conditionalmod, NULL);
+
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+	string (file, "(");
+	err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+	string (file, ")");
+    }
+
+    if (inst->header.opcode == BRW_OPCODE_SEND)
+	format (file, " %d", inst->header.destreg__conditionalmod);
+
+    if (opcode[inst->header.opcode].ndst > 0) {
+	pad (file, 16);
+	err |= dest (file, inst);
+    }
+    if (opcode[inst->header.opcode].nsrc > 0) {
+	pad (file, 32);
+	err |= src0 (file, inst);
+    }
+    if (opcode[inst->header.opcode].nsrc > 1) {
+	pad (file, 48);
+	err |= src1 (file, inst);
+    }
+
+    if (inst->header.opcode == BRW_OPCODE_SEND) {
+	int target;
+
+	if (gen >= 5)
+	   target = inst->bits2.send_gen5.sfid;
+	else
+	   target = inst->bits3.generic.msg_target;
+
+	newline (file);
+	pad (file, 16);
+	space = 0;
+	err |= control (file, "target function", target_function,
+			target, &space);
+
+	switch (target) {
+	case BRW_MESSAGE_TARGET_MATH:
+	    err |= control (file, "math function", math_function,
+			    inst->bits3.math.function, &space);
+	    err |= control (file, "math saturate", math_saturate,
+			    inst->bits3.math.saturate, &space);
+	    err |= control (file, "math signed", math_signed,
+			    inst->bits3.math.int_type, &space);
+	    err |= control (file, "math scalar", math_scalar,
+			    inst->bits3.math.data_type, &space);
+	    err |= control (file, "math precision", math_precision,
+			    inst->bits3.math.precision, &space);
+	    break;
+	case BRW_MESSAGE_TARGET_SAMPLER:
+	    format (file, " (%d, %d, ",
+		    inst->bits3.sampler.binding_table_index,
+		    inst->bits3.sampler.sampler);
+	    err |= control (file, "sampler target format", sampler_target_format,
+			    inst->bits3.sampler.return_format, NULL);
+	    string (file, ")");
+	    break;
+	case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+	    format (file, " (%d, %d, %d, %d)",
+		    inst->bits3.dp_write.binding_table_index,
+		    (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+		    inst->bits3.dp_write.msg_control,
+		    inst->bits3.dp_write.msg_type,
+		    inst->bits3.dp_write.send_commit_msg);
+	    break;
+	case BRW_MESSAGE_TARGET_URB:
+	    if (gen >= 5) {
+		format (file, " %d", inst->bits3.urb_gen5.offset);
+	    } else {
+		format (file, " %d", inst->bits3.urb.offset);
+	    }
+
+	    space = 1;
+	    if (gen >= 5) {
+		err |= control (file, "urb opcode", urb_opcode,
+				inst->bits3.urb_gen5.opcode, &space);
+	    }
+	    err |= control (file, "urb swizzle", urb_swizzle,
+			    inst->bits3.urb.swizzle_control, &space);
+	    err |= control (file, "urb allocate", urb_allocate,
+			    inst->bits3.urb.allocate, &space);
+	    err |= control (file, "urb used", urb_used,
+			    inst->bits3.urb.used, &space);
+	    err |= control (file, "urb complete", urb_complete,
+			    inst->bits3.urb.complete, &space);
+	    break;
+	case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+	    break;
+	default:
+	    format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+	    break;
+	}
+	if (space)
+	    string (file, " ");
+	format (file, "mlen %d",
+		inst->bits3.generic.msg_length);
+	format (file, " rlen %d",
+		inst->bits3.generic.response_length);
+    }
+    pad (file, 64);
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+	string (file, "{");
+	space = 1;
+	err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+	err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+	err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+	err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+	err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+	if (inst->header.opcode == BRW_OPCODE_SEND)
+	    err |= control (file, "end of thread", end_of_thread,
+			    inst->bits3.generic.end_of_thread, &space);
+	if (space)
+	    string (file, " ");
+	string (file, "}");
+    }
+    string (file, ";");
+    newline (file);
+    return err;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
new file mode 100644
index 0000000000..16331cc3ac
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -0,0 +1,487 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/enums.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+#include "intel_batchbuffer.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BATCH
+
+static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
+   _3DPRIM_POINTLIST,
+   _3DPRIM_LINELIST,
+   _3DPRIM_LINELOOP,
+   _3DPRIM_LINESTRIP,
+   _3DPRIM_TRILIST,
+   _3DPRIM_TRISTRIP,
+   _3DPRIM_TRIFAN,
+   _3DPRIM_QUADLIST,
+   _3DPRIM_QUADSTRIP,
+   _3DPRIM_POLYGON
+};
+
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {  
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+/* When the primitive changes, set a state bit and re-validate.  Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities).  That may not be realistic however.
+ */
+static GLuint brw_set_prim(struct brw_context *brw,
+			   const struct _mesa_prim *prim)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLenum mode = prim->mode;
+
+   if (INTEL_DEBUG & DEBUG_PRIMS)
+      printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
+
+   /* Slight optimization to avoid the GS program when not needed:
+    */
+   if (mode == GL_QUAD_STRIP &&
+       ctx->Light.ShadeModel != GL_FLAT &&
+       ctx->Polygon.FrontMode == GL_FILL &&
+       ctx->Polygon.BackMode == GL_FILL)
+      mode = GL_TRIANGLE_STRIP;
+
+   if (prim->mode == GL_QUADS && prim->count == 4 &&
+       ctx->Light.ShadeModel != GL_FLAT &&
+       ctx->Polygon.FrontMode == GL_FILL &&
+       ctx->Polygon.BackMode == GL_FILL) {
+      mode = GL_TRIANGLE_FAN;
+   }
+
+   if (mode != brw->primitive) {
+      brw->primitive = mode;
+      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+      if (reduced_prim[mode] != brw->intel.reduced_primitive) {
+	 brw->intel.reduced_primitive = reduced_prim[mode];
+	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+      }
+   }
+
+   return prim_to_hw_prim[mode];
+}
+
+
+static GLuint trim(GLenum prim, GLuint length)
+{
+   if (prim == GL_QUAD_STRIP)
+      return length > 3 ? (length - length % 2) : 0;
+   else if (prim == GL_QUADS)
+      return length - length % 4;
+   else 
+      return length;
+}
+
+
+static void brw_emit_prim(struct brw_context *brw,
+			  const struct _mesa_prim *prim,
+			  uint32_t hw_prim)
+{
+   struct brw_3d_primitive prim_packet;
+   struct intel_context *intel = &brw->intel;
+
+   if (INTEL_DEBUG & DEBUG_PRIMS)
+      printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
+		   prim->start, prim->count);
+
+   prim_packet.header.opcode = CMD_3D_PRIM;
+   prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+   prim_packet.header.pad = 0;
+   prim_packet.header.topology = hw_prim;
+   prim_packet.header.indexed = prim->indexed;
+
+   prim_packet.verts_per_instance = trim(prim->mode, prim->count);
+   prim_packet.start_vert_location = prim->start;
+   if (prim->indexed)
+      prim_packet.start_vert_location += brw->ib.start_vertex_offset;
+   prim_packet.instance_count = 1;
+   prim_packet.start_instance_location = 0;
+   prim_packet.base_vert_location = prim->basevertex;
+
+   /* If we're set to always flush, do it before and after the primitive emit.
+    * We want to catch both missed flushes that hurt instruction/state cache
+    * and missed flushes of the render cache as it heads to other parts of
+    * the besides the draw code.
+    */
+   if (intel->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+   }
+   if (prim_packet.verts_per_instance) {
+      intel_batchbuffer_data( brw->intel.batch, &prim_packet,
+			      sizeof(prim_packet));
+   }
+   if (intel->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+   }
+}
+
+static void brw_merge_inputs( struct brw_context *brw,
+		       const struct gl_client_array *arrays[])
+{
+   struct brw_vertex_info old = brw->vb.info;
+   GLuint i;
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++)
+      drm_intel_bo_unreference(brw->vb.inputs[i].bo);
+
+   memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
+   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      brw->vb.inputs[i].glarray = arrays[i];
+      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
+
+      if (arrays[i]->StrideB != 0)
+	 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
+	    ((i%16) * 2);
+   }
+
+   /* Raise statechanges if input sizes have changed. */
+   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
+      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
+}
+
+/* XXX: could split the primitive list to fallback only on the
+ * non-conformant primitives.
+ */
+static GLboolean check_fallbacks( struct brw_context *brw,
+				  const struct _mesa_prim *prim,
+				  GLuint nr_prims )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+
+   /* If we don't require strict OpenGL conformance, never 
+    * use fallbacks.  If we're forcing fallbacks, always
+    * use fallfacks.
+    */
+   if (brw->intel.conformance_mode == 0)
+      return GL_FALSE;
+
+   if (brw->intel.conformance_mode == 2)
+      return GL_TRUE;
+
+   if (ctx->Polygon.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
+	    return GL_TRUE;
+   }
+
+   /* BRW hardware will do AA lines, but they are non-conformant it
+    * seems.  TBD whether we keep this fallback:
+    */
+   if (ctx->Line.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_LINES) 
+	    return GL_TRUE;
+   }
+
+   /* Stipple -- these fallbacks could be resolved with a little
+    * bit of work?
+    */
+   if (ctx->Line.StippleFlag) {
+      for (i = 0; i < nr_prims; i++) {
+	 /* GS doesn't get enough information to know when to reset
+	  * the stipple counter?!?
+	  */
+	 if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) 
+	    return GL_TRUE;
+	    
+	 if (prim[i].mode == GL_POLYGON &&
+	     (ctx->Polygon.FrontMode == GL_LINE ||
+	      ctx->Polygon.BackMode == GL_LINE))
+	    return GL_TRUE;
+      }
+   }
+
+   if (ctx->Point.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (prim[i].mode == GL_POINTS) 
+	    return GL_TRUE;
+   }
+
+   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
+    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
+    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
+    * we want strict conformance, force the fallback.
+    * Right now, we only do this for 2D textures.
+    */
+   {
+      int u;
+      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
+         if (texUnit->Enabled) {
+            if (texUnit->Enabled & TEXTURE_1D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+            if (texUnit->Enabled & TEXTURE_2D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+            if (texUnit->Enabled & TEXTURE_3D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+         }
+      }
+   }
+      
+   /* Nothing stopping us from the fast path now */
+   return GL_FALSE;
+}
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static GLboolean brw_try_draw_prims( GLcontext *ctx,
+				     const struct gl_client_array *arrays[],
+				     const struct _mesa_prim *prim,
+				     GLuint nr_prims,
+				     const struct _mesa_index_buffer *ib,
+				     GLuint min_index,
+				     GLuint max_index )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_context *brw = brw_context(ctx);
+   GLboolean retval = GL_FALSE;
+   GLboolean warn = GL_FALSE;
+   GLboolean first_time = GL_TRUE;
+   GLuint i;
+
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+
+   /* We have to validate the textures *before* checking for fallbacks;
+    * otherwise, the software fallback won't be able to rely on the
+    * texture state, the firstLevel and lastLevel fields won't be
+    * set in the intel texture object (they'll both be 0), and the 
+    * software fallback will segfault if it attempts to access any
+    * texture level other than level 0.
+    */
+   brw_validate_textures( brw );
+
+   if (check_fallbacks(brw, prim, nr_prims))
+      return GL_FALSE;
+
+   /* Bind all inputs, derive varying and size information:
+    */
+   brw_merge_inputs( brw, arrays );
+
+   brw->ib.ib = ib;
+   brw->state.dirty.brw |= BRW_NEW_INDICES;
+
+   brw->vb.min_index = min_index;
+   brw->vb.max_index = max_index;
+   brw->state.dirty.brw |= BRW_NEW_VERTICES;
+
+   /* Have to validate state quite late.  Will rebuild tnl_program,
+    * which depends on varying information.  
+    * 
+    * Note this is where brw->vs->prog_data.inputs_read is calculated,
+    * so can't access it earlier.
+    */
+
+   intel_prepare_render(intel);
+
+   for (i = 0; i < nr_prims; i++) {
+      uint32_t hw_prim;
+
+      /* Flush the batch if it's approaching full, so that we don't wrap while
+       * we've got validated state that needs to be in the same batch as the
+       * primitives.  This fraction is just a guess (minimal full state plus
+       * a primitive is around 512 bytes), and would be better if we had
+       * an upper bound of how much we might emit in a single
+       * brw_try_draw_prims().
+       */
+      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4);
+
+      hw_prim = brw_set_prim(brw, &prim[i]);
+
+      if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
+	 first_time = GL_FALSE;
+
+	 brw_validate_state(brw);
+
+	 /* Various fallback checks:  */
+	 if (brw->intel.Fallback)
+	    goto out;
+
+	 /* Check that we can fit our state in with our existing batchbuffer, or
+	  * flush otherwise.
+	  */
+	 if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+					     brw->state.validated_bo_count)) {
+	    static GLboolean warned;
+	    intel_batchbuffer_flush(intel->batch);
+
+	    /* Validate the state after we flushed the batch (which would have
+	     * changed the set of dirty state).  If we still fail to
+	     * check_aperture, warn of what's happening, but attempt to continue
+	     * on since it may succeed anyway, and the user would probably rather
+	     * see a failure and a warning than a fallback.
+	     */
+	    brw_validate_state(brw);
+	    if (!warned &&
+		dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+						brw->state.validated_bo_count)) {
+	       warn = GL_TRUE;
+	       warned = GL_TRUE;
+	    }
+	 }
+
+	 intel->no_batch_wrap = GL_TRUE;
+	 brw_upload_state(brw);
+      }
+
+      brw_emit_prim(brw, &prim[i], hw_prim);
+
+      intel->no_batch_wrap = GL_FALSE;
+
+      retval = GL_TRUE;
+   }
+
+   if (intel->always_flush_batch)
+      intel_batchbuffer_flush(intel->batch);
+ out:
+
+   brw_state_cache_check_size(brw);
+
+   if (warn)
+      fprintf(stderr, "i965: Single primitive emit potentially exceeded "
+	      "available aperture space\n");
+
+   if (!retval)
+      DBG("%s failed\n", __FUNCTION__);
+
+   return retval;
+}
+
+void brw_draw_prims( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prim,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     GLboolean index_bounds_valid,
+		     GLuint min_index,
+		     GLuint max_index )
+{
+   GLboolean retval;
+
+   if (!vbo_all_varyings_in_vbos(arrays)) {
+      if (!index_bounds_valid)
+	 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+
+      /* Decide if we want to rebase.  If so we end up recursing once
+       * only into this function.
+       */
+      if (min_index != 0) {
+	 vbo_rebase_prims(ctx, arrays,
+			  prim, nr_prims,
+			  ib, min_index, max_index,
+			  brw_draw_prims );
+	 return;
+      }
+   }
+
+   /* Make a first attempt at drawing:
+    */
+   retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+   /* Otherwise, we really are out of memory.  Pass the drawing
+    * command to the software tnl module and which will in turn call
+    * swrast to do the drawing.
+    */
+   if (!retval) {
+       _swsetup_Wakeup(ctx);
+      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   }
+
+}
+
+void brw_draw_init( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+
+   /* Register our drawing function: 
+    */
+   vbo->draw_prims = brw_draw_prims;
+}
+
+void brw_draw_destroy( struct brw_context *brw )
+{
+   int i;
+
+   if (brw->vb.upload.bo != NULL) {
+      drm_intel_bo_unreference(brw->vb.upload.bo);
+      brw->vb.upload.bo = NULL;
+   }
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      drm_intel_bo_unreference(brw->vb.inputs[i].bo);
+      brw->vb.inputs[i].bo = NULL;
+   }
+
+   drm_intel_bo_unreference(brw->ib.bo);
+   brw->ib.bo = NULL;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_draw.h b/src/mesa/drivers/dri/i965/brw_draw.h
new file mode 100644
index 0000000000..2a14db217f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_draw.h
@@ -0,0 +1,54 @@
+ /**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "main/mtypes.h"		/* for GLcontext... */
+#include "vbo/vbo.h"
+
+struct brw_context;
+
+
+void brw_draw_prims( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prims,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     GLboolean index_bounds_valid,
+		     GLuint min_index,
+		     GLuint max_index );
+
+void brw_draw_init( struct brw_context *brw );
+void brw_draw_destroy( struct brw_context *brw );
+
+/* brw_draw_current.c
+ */
+void brw_init_current_values(GLcontext *ctx,
+			     struct gl_client_array *arrays);
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
new file mode 100644
index 0000000000..f07aab86e9
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -0,0 +1,725 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/enums.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+
+static GLuint double_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
+};
+
+static GLuint float_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
+};
+
+static GLuint half_float_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16B16A16_FLOAT,
+   BRW_SURFACEFORMAT_R16G16B16A16_FLOAT
+};
+
+static GLuint uint_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_UNORM,
+   BRW_SURFACEFORMAT_R32G32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_UNORM
+};
+
+static GLuint uint_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_USCALED,
+   BRW_SURFACEFORMAT_R32G32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_USCALED
+};
+
+static GLuint int_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_SNORM,
+   BRW_SURFACEFORMAT_R32G32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_SNORM
+};
+
+static GLuint int_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
+};
+
+static GLuint ushort_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_UNORM,
+   BRW_SURFACEFORMAT_R16G16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_UNORM
+};
+
+static GLuint ushort_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_USCALED,
+   BRW_SURFACEFORMAT_R16G16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_USCALED
+};
+
+static GLuint short_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_SNORM,
+   BRW_SURFACEFORMAT_R16G16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_SNORM
+};
+
+static GLuint short_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
+};
+
+static GLuint ubyte_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_UNORM,
+   BRW_SURFACEFORMAT_R8G8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_UNORM
+};
+
+static GLuint ubyte_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_USCALED,
+   BRW_SURFACEFORMAT_R8G8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_USCALED
+};
+
+static GLuint byte_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_SNORM,
+   BRW_SURFACEFORMAT_R8G8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_SNORM
+};
+
+static GLuint byte_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
+};
+
+
+/**
+ * Given vertex array type/size/format/normalized info, return
+ * the appopriate hardware surface type.
+ * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
+ */
+static GLuint get_surface_type( GLenum type, GLuint size,
+                                GLenum format, GLboolean normalized )
+{
+   if (INTEL_DEBUG & DEBUG_VERTS)
+      printf("type %s size %d normalized %d\n", 
+		   _mesa_lookup_enum_by_nr(type), size, normalized);
+
+   if (normalized) {
+      switch (type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_HALF_FLOAT: return half_float_types[size];
+      case GL_INT: return int_types_norm[size];
+      case GL_SHORT: return short_types_norm[size];
+      case GL_BYTE: return byte_types_norm[size];
+      case GL_UNSIGNED_INT: return uint_types_norm[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
+      case GL_UNSIGNED_BYTE:
+         if (format == GL_BGRA) {
+            /* See GL_EXT_vertex_array_bgra */
+            assert(size == 4);
+            return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+         }
+         else {
+            return ubyte_types_norm[size];
+         }
+      default: assert(0); return 0;
+      }      
+   }
+   else {
+      assert(format == GL_RGBA); /* sanity check */
+      switch (type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_HALF_FLOAT: return half_float_types[size];
+      case GL_INT: return int_types_scale[size];
+      case GL_SHORT: return short_types_scale[size];
+      case GL_BYTE: return byte_types_scale[size];
+      case GL_UNSIGNED_INT: return uint_types_scale[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
+      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
+      default: assert(0); return 0;
+      }      
+   }
+}
+
+
+static GLuint get_size( GLenum type )
+{
+   switch (type) {
+   case GL_DOUBLE: return sizeof(GLdouble);
+   case GL_FLOAT: return sizeof(GLfloat);
+   case GL_HALF_FLOAT: return sizeof(GLhalfARB);
+   case GL_INT: return sizeof(GLint);
+   case GL_SHORT: return sizeof(GLshort);
+   case GL_BYTE: return sizeof(GLbyte);
+   case GL_UNSIGNED_INT: return sizeof(GLuint);
+   case GL_UNSIGNED_SHORT: return sizeof(GLushort);
+   case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
+   default: return 0;
+   }      
+}
+
+static GLuint get_index_type(GLenum type) 
+{
+   switch (type) {
+   case GL_UNSIGNED_BYTE:  return BRW_INDEX_BYTE;
+   case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
+   case GL_UNSIGNED_INT:   return BRW_INDEX_DWORD;
+   default: assert(0); return 0;
+   }
+}
+
+static void wrap_buffers( struct brw_context *brw,
+			  GLuint size )
+{
+   if (size < BRW_UPLOAD_INIT_SIZE)
+      size = BRW_UPLOAD_INIT_SIZE;
+
+   brw->vb.upload.offset = 0;
+
+   if (brw->vb.upload.bo != NULL)
+      drm_intel_bo_unreference(brw->vb.upload.bo);
+   brw->vb.upload.bo = drm_intel_bo_alloc(brw->intel.bufmgr, "temporary VBO",
+					  size, 1);
+}
+
+static void get_space( struct brw_context *brw,
+		       GLuint size,
+		       drm_intel_bo **bo_return,
+		       GLuint *offset_return )
+{
+   size = ALIGN(size, 64);
+
+   if (brw->vb.upload.bo == NULL ||
+       brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
+      wrap_buffers(brw, size);
+   }
+
+   assert(*bo_return == NULL);
+   drm_intel_bo_reference(brw->vb.upload.bo);
+   *bo_return = brw->vb.upload.bo;
+   *offset_return = brw->vb.upload.offset;
+   brw->vb.upload.offset += size;
+}
+
+static void
+copy_array_to_vbo_array( struct brw_context *brw,
+			 struct brw_vertex_element *element,
+			 GLuint dst_stride)
+{
+   GLuint size = element->count * dst_stride;
+
+   get_space(brw, size, &element->bo, &element->offset);
+
+   if (element->glarray->StrideB == 0) {
+      assert(element->count == 1);
+      element->stride = 0;
+   } else {
+      element->stride = dst_stride;
+   }
+
+   if (dst_stride == element->glarray->StrideB) {
+      drm_intel_gem_bo_map_gtt(element->bo);
+      memcpy((char *)element->bo->virtual + element->offset,
+	     element->glarray->Ptr, size);
+      drm_intel_gem_bo_unmap_gtt(element->bo);
+   } else {
+      char *dest;
+      const unsigned char *src = element->glarray->Ptr;
+      int i;
+
+      drm_intel_gem_bo_map_gtt(element->bo);
+      dest = element->bo->virtual;
+      dest += element->offset;
+
+      for (i = 0; i < element->count; i++) {
+	 memcpy(dest, src, dst_stride);
+	 src += element->glarray->StrideB;
+	 dest += dst_stride;
+      }
+
+      drm_intel_gem_bo_unmap_gtt(element->bo);
+   }
+}
+
+static void brw_prepare_vertices(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; 
+   GLuint i;
+   const unsigned char *ptr = NULL;
+   GLuint interleave = 0;
+   unsigned int min_index = brw->vb.min_index;
+   unsigned int max_index = brw->vb.max_index;
+
+   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
+   GLuint nr_uploads = 0;
+
+   /* First build an array of pointers to ve's in vb.inputs_read
+    */
+   if (0)
+      printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+
+   /* Accumulate the list of enabled arrays. */
+   brw->vb.nr_enabled = 0;
+   while (vs_inputs) {
+      GLuint i = _mesa_ffsll(vs_inputs) - 1;
+      struct brw_vertex_element *input = &brw->vb.inputs[i];
+
+      vs_inputs &= ~(1 << i);
+      brw->vb.enabled[brw->vb.nr_enabled++] = input;
+   }
+
+   /* XXX: In the rare cases where this happens we fallback all
+    * the way to software rasterization, although a tnl fallback
+    * would be sufficient.  I don't know of *any* real world
+    * cases with > 17 vertex attributes enabled, so it probably
+    * isn't an issue at this point.
+    */
+   if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
+      intel->Fallback = GL_TRUE; /* boolean, not bitfield */
+      return;
+   }
+
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+
+      input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
+
+      if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
+	 struct intel_buffer_object *intel_buffer =
+	    intel_buffer_object(input->glarray->BufferObj);
+
+	 /* Named buffer object: Just reference its contents directly. */
+	 drm_intel_bo_unreference(input->bo);
+	 input->bo = intel_bufferobj_buffer(intel, intel_buffer,
+					    INTEL_READ);
+	 drm_intel_bo_reference(input->bo);
+	 input->offset = (unsigned long)input->glarray->Ptr;
+	 input->stride = input->glarray->StrideB;
+	 input->count = input->glarray->_MaxElement;
+
+	 /* This is a common place to reach if the user mistakenly supplies
+	  * a pointer in place of a VBO offset.  If we just let it go through,
+	  * we may end up dereferencing a pointer beyond the bounds of the
+	  * GTT.  We would hope that the VBO's max_index would save us, but
+	  * Mesa appears to hand us min/max values not clipped to the
+	  * array object's _MaxElement, and _MaxElement frequently appears
+	  * to be wrong anyway.
+	  *
+	  * The VBO spec allows application termination in this case, and it's
+	  * probably a service to the poor programmer to do so rather than
+	  * trying to just not render.
+	  */
+	 assert(input->offset < input->bo->size);
+      } else {
+	 input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
+	 if (input->bo != NULL) {
+	    /* Already-uploaded vertex data is present from a previous
+	     * prepare_vertices, but we had to re-validate state due to
+	     * check_aperture failing and a new batch being produced.
+	     */
+	    continue;
+	 }
+
+	 /* Queue the buffer object up to be uploaded in the next pass,
+	  * when we've decided if we're doing interleaved or not.
+	  */
+	 if (input->attrib == VERT_ATTRIB_POS) {
+	    /* Position array not properly enabled:
+	     */
+            if (input->glarray->StrideB == 0) {
+               intel->Fallback = GL_TRUE; /* boolean, not bitfield */
+               return;
+            }
+
+	    interleave = input->glarray->StrideB;
+	    ptr = input->glarray->Ptr;
+	 }
+	 else if (interleave != input->glarray->StrideB ||
+		  (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
+		  (const unsigned char *)input->glarray->Ptr - ptr > interleave)
+	 {
+	    interleave = 0;
+	 }
+
+	 upload[nr_uploads++] = input;
+	 
+	 /* We rebase drawing to start at element zero only when
+	  * varyings are not in vbos, which means we can end up
+	  * uploading non-varying arrays (stride != 0) when min_index
+	  * is zero.  This doesn't matter as the amount to upload is
+	  * the same for these arrays whether the draw call is rebased
+	  * or not - we just have to upload the one element.
+	  */
+	 assert(min_index == 0 || input->glarray->StrideB == 0);
+      }
+   }
+
+   /* Handle any arrays to be uploaded. */
+   if (nr_uploads > 1 && interleave && interleave <= 256) {
+      /* All uploads are interleaved, so upload the arrays together as
+       * interleaved.  First, upload the contents and set up upload[0].
+       */
+      copy_array_to_vbo_array(brw, upload[0], interleave);
+
+      for (i = 1; i < nr_uploads; i++) {
+	 /* Then, just point upload[i] at upload[0]'s buffer. */
+	 upload[i]->stride = interleave;
+	 upload[i]->offset = upload[0]->offset +
+	    ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
+	 upload[i]->bo = upload[0]->bo;
+	 drm_intel_bo_reference(upload[i]->bo);
+      }
+   }
+   else {
+      /* Upload non-interleaved arrays */
+      for (i = 0; i < nr_uploads; i++) {
+          copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
+      }
+   }
+
+   brw_prepare_query_begin(brw);
+
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+
+      brw_add_validated_bo(brw, input->bo);
+   }
+}
+
+static void brw_emit_vertices(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   GLuint i;
+
+   brw_emit_query_begin(brw);
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), emit a single pad
+    * VERTEX_ELEMENT struct and bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   if (brw->vb.nr_enabled == 0) {
+      BEGIN_BATCH(3);
+      OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+      if (IS_GEN6(intel->intelScreen->deviceID)) {
+	 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
+		   GEN6_VE0_VALID |
+		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+	 OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+		   BRW_VE0_VALID |
+		   (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+      ADVANCE_BATCH();
+      return;
+   }
+
+   /* Now emit VB and VEP state packets.
+    *
+    * This still defines a hardware VB for each input, even if they
+    * are interleaved or from the same VBO.  TBD if this makes a
+    * performance difference.
+    */
+   BEGIN_BATCH(1 + brw->vb.nr_enabled * 4);
+   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+	     ((1 + brw->vb.nr_enabled * 4) - 2));
+
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+      uint32_t dw0;
+
+      if (intel->gen >= 6) {
+	 dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
+	    (i << GEN6_VB0_INDEX_SHIFT);
+      } else {
+	 dw0 = BRW_VB0_ACCESS_VERTEXDATA |
+	    (i << BRW_VB0_INDEX_SHIFT);
+      }
+
+      OUT_BATCH(dw0 |
+		(input->stride << BRW_VB0_PITCH_SHIFT));
+      OUT_RELOC(input->bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		input->offset);
+      if (intel->gen >= 5) {
+	 OUT_RELOC(input->bo,
+		   I915_GEM_DOMAIN_VERTEX, 0,
+		   input->bo->size - 1);
+      } else
+          OUT_BATCH(input->stride ? input->count : 0);
+      OUT_BATCH(0); /* Instance data step rate */
+   }
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2);
+   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+      uint32_t format = get_surface_type(input->glarray->Type,
+					 input->glarray->Size,
+					 input->glarray->Format,
+					 input->glarray->Normalized);
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+      switch (input->glarray->Size) {
+      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+	 break;
+      }
+
+      if (IS_GEN6(intel->intelScreen->deviceID)) {
+	 OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) |
+		   GEN6_VE0_VALID |
+		   (format << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      } else {
+	 OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+		   BRW_VE0_VALID |
+		   (format << BRW_VE0_FORMAT_SHIFT) |
+		   (0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      }
+
+      if (intel->gen >= 5)
+          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+      else
+          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+   }
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_vertices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
+      .cache = 0,
+   },
+   .prepare = brw_prepare_vertices,
+   .emit = brw_emit_vertices,
+};
+
+static void brw_prepare_indices(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+   GLuint ib_size;
+   drm_intel_bo *bo = NULL;
+   struct gl_buffer_object *bufferobj;
+   GLuint offset;
+   GLuint ib_type_size;
+
+   if (index_buffer == NULL)
+      return;
+
+   ib_type_size = get_size(index_buffer->type);
+   ib_size = ib_type_size * index_buffer->count;
+   bufferobj = index_buffer->obj;;
+
+   /* Turn into a proper VBO:
+    */
+   if (!_mesa_is_bufferobj(bufferobj)) {
+      brw->ib.start_vertex_offset = 0;
+
+      /* Get new bufferobj, offset:
+       */
+      get_space(brw, ib_size, &bo, &offset);
+
+      /* Straight upload
+       */
+      drm_intel_gem_bo_map_gtt(bo);
+      memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+      drm_intel_gem_bo_unmap_gtt(bo);
+   } else {
+      offset = (GLuint) (unsigned long) index_buffer->ptr;
+      brw->ib.start_vertex_offset = 0;
+
+      /* If the index buffer isn't aligned to its element size, we have to
+       * rebase it into a temporary.
+       */
+       if ((get_size(index_buffer->type) - 1) & offset) {
+           GLubyte *map = ctx->Driver.MapBuffer(ctx,
+                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
+                                                GL_DYNAMIC_DRAW_ARB,
+                                                bufferobj);
+           map += offset;
+
+	   get_space(brw, ib_size, &bo, &offset);
+
+	   drm_intel_bo_subdata(bo, offset, ib_size, map);
+
+           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+       } else {
+	  bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
+				      INTEL_READ);
+	  drm_intel_bo_reference(bo);
+
+	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
+	   * the index buffer state when we're just moving the start index
+	   * of our drawing.
+	   */
+	  brw->ib.start_vertex_offset = offset / ib_type_size;
+	  offset = 0;
+	  ib_size = bo->size;
+       }
+   }
+
+   if (brw->ib.bo != bo ||
+       brw->ib.offset != offset ||
+       brw->ib.size != ib_size)
+   {
+      drm_intel_bo_unreference(brw->ib.bo);
+      brw->ib.bo = bo;
+      brw->ib.offset = offset;
+      brw->ib.size = ib_size;
+
+      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+   } else {
+      drm_intel_bo_unreference(bo);
+   }
+
+   brw_add_validated_bo(brw, brw->ib.bo);
+}
+
+const struct brw_tracked_state brw_indices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_INDICES,
+      .cache = 0,
+   },
+   .prepare = brw_prepare_indices,
+};
+
+static void brw_emit_index_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+
+   if (index_buffer == NULL)
+      return;
+
+   /* Emit the indexbuffer packet:
+    */
+   {
+      struct brw_indexbuffer ib;
+
+      memset(&ib, 0, sizeof(ib));
+
+      ib.header.bits.opcode = CMD_INDEX_BUFFER;
+      ib.header.bits.length = sizeof(ib)/4 - 2;
+      ib.header.bits.index_format = get_index_type(index_buffer->type);
+      ib.header.bits.cut_index_enable = 0;
+
+      BEGIN_BATCH(4);
+      OUT_BATCH( ib.header.dword );
+      OUT_RELOC(brw->ib.bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->ib.offset);
+      OUT_RELOC(brw->ib.bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->ib.offset + brw->ib.size - 1);
+      OUT_BATCH( 0 );
+      ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state brw_index_buffer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
+      .cache = 0,
+   },
+   .emit = brw_emit_index_buffer,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
new file mode 100644
index 0000000000..4e7c1226ad
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -0,0 +1,254 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8?  Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   if (value != 0xff) {
+      if (value != p->flag_value) {
+	 brw_push_insn_state(p);
+	 brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+	 p->flag_value = value;
+	 brw_pop_insn_state(p);
+      }
+
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }   
+}
+
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+   p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+   p->current->header.destreg__conditionalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+   p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
+{
+   p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+   p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, GLuint value )
+{
+   p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+   assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+   memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+   p->current++;   
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+   assert(p->current != p->stack);
+   p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+{
+   p->brw = brw;
+   p->nr_insn = 0;
+   p->current = p->stack;
+   memset(p->current, 0, sizeof(p->current[0]));
+
+   /* Some defaults?
+    */
+   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+   brw_set_saturate(p, 0);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_predicate_control_flag_value(p, 0xff); 
+}
+
+
+const GLuint *brw_get_program( struct brw_compile *p,
+			       GLuint *sz )
+{
+   GLuint i;
+
+   for (i = 0; i < 8; i++)
+      brw_NOP(p);
+
+   *sz = p->nr_insn * sizeof(struct brw_instruction);
+   return (const GLuint *)p->store;
+}
+
+
+
+/**
+ * Subroutine calls require special attention.
+ * Mesa instructions may be expanded into multiple hardware instructions
+ * so the prog_instruction::BranchTarget field can't be used as an index
+ * into the hardware instructions.
+ *
+ * The BranchTarget field isn't needed, however.  Mesa's GLSL compiler
+ * emits CAL and BGNSUB instructions with labels that can be used to map
+ * subroutine calls to actual subroutine code blocks.
+ *
+ * The structures and function here implement patching of CAL instructions
+ * so they jump to the right subroutine code...
+ */
+
+
+/**
+ * For each OPCODE_BGNSUB we create one of these.
+ */
+struct brw_glsl_label
+{
+   const char *name; /**< the label string */
+   GLuint position;  /**< the position of the brw instruction for this label */
+   struct brw_glsl_label *next;  /**< next in linked list */
+};
+
+
+/**
+ * For each OPCODE_CAL we create one of these.
+ */
+struct brw_glsl_call
+{
+   GLuint call_inst_pos;  /**< location of the CAL instruction */
+   const char *sub_name;  /**< name of subroutine to call */
+   struct brw_glsl_call *next;  /**< next in linked list */
+};
+
+
+/**
+ * Called for each OPCODE_BGNSUB.
+ */
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position)
+{
+   struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
+   label->name = name;
+   label->position = position;
+   label->next = c->first_label;
+   c->first_label = label;
+}
+
+
+/**
+ * Called for each OPCODE_CAL.
+ */
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos)
+{
+   struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
+   call->call_inst_pos = call_pos;
+   call->sub_name = name;
+   call->next = c->first_call;
+   c->first_call = call;
+}
+
+
+/**
+ * Lookup a label, return label's position/offset.
+ */
+static GLuint
+brw_lookup_label(struct brw_compile *c, const char *name)
+{
+   const struct brw_glsl_label *label;
+   for (label = c->first_label; label; label = label->next) {
+      if (strcmp(name, label->name) == 0) {
+         return label->position;
+      }
+   }
+   abort();  /* should never happen */
+   return ~0;
+}
+
+
+/**
+ * When we're done generating code, this function is called to resolve
+ * subroutine calls.
+ */
+void
+brw_resolve_cals(struct brw_compile *c)
+{
+    const struct brw_glsl_call *call;
+
+    for (call = c->first_call; call; call = call->next) {
+        const GLuint sub_loc = brw_lookup_label(c, call->sub_name);
+	struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
+	struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
+	GLint offset = brw_sub_inst - brw_call_inst;
+
+	/* patch brw_inst1 to point to brw_inst2 */
+	brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+    }
+
+    /* free linked list of calls */
+    {
+        struct brw_glsl_call *call, *next;
+        for (call = c->first_call; call; call = next) {
+	    next = call->next;
+	    free(call);
+	}
+	c->first_call = NULL;
+    }
+
+    /* free linked list of labels */
+    {
+        struct brw_glsl_label *label, *next;
+	for (label = c->first_label; label; label = next) {
+	    next = label->next;
+	    free(label);
+	}
+	c->first_label = NULL;
+    }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
new file mode 100644
index 0000000000..3a32ad26c1
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -0,0 +1,969 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "shader/prog_instruction.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges.  Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+   GLuint type:4;
+   GLuint file:2;
+   GLuint nr:8;
+   GLuint subnr:5;		/* :1 in align16 */
+   GLuint negate:1;		/* source only */
+   GLuint abs:1;		/* source only */
+   GLuint vstride:4;		/* source only */
+   GLuint width:3;		/* src only, align1 only */
+   GLuint hstride:2;   		/* align1 only */
+   GLuint address_mode:1;	/* relative addressing, hopefully! */
+   GLuint pad0:1;
+
+   union {      
+      struct {
+	 GLuint swizzle:8;		/* src only, align16 only */
+	 GLuint writemask:4;		/* dest only, align16 only */
+	 GLint  indirect_offset:10;	/* relative addressing offset */
+	 GLuint pad1:10;		/* two dwords total */
+      } bits;
+
+      GLfloat f;
+      GLint   d;
+      GLuint ud;
+   } dw1;      
+};
+
+
+struct brw_indirect {
+   GLuint addr_subnr:4;
+   GLint addr_offset:10;
+   GLuint pad:18;
+};
+
+
+struct brw_glsl_label;
+struct brw_glsl_call;
+
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 10000
+
+struct brw_compile {
+   struct brw_instruction store[BRW_EU_MAX_INSN];
+   GLuint nr_insn;
+
+   /* Allow clients to push/pop instruction state:
+    */
+   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+   struct brw_instruction *current;
+
+   GLuint flag_value;
+   GLboolean single_program_flow;
+   struct brw_context *brw;
+
+   struct brw_glsl_label *first_label;  /**< linked list of labels */
+   struct brw_glsl_call *first_call;    /**< linked list of CALs */
+};
+
+
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position);
+
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
+
+void
+brw_resolve_cals(struct brw_compile *c);
+
+
+
+static INLINE int type_sz( GLuint type )
+{
+   switch( type ) {
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_F:
+      return 4;
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_W:
+      return 2;
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_B:
+      return 1;
+   default:
+      return 0;
+   }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file  one of the BRW_x_REGISTER_FILE values
+ * \param nr  register number/index
+ * \param subnr  register sub number
+ * \param type  one of BRW_REGISTER_TYPE_x
+ * \param vstride  one of BRW_VERTICAL_STRIDE_x
+ * \param width  one of BRW_WIDTH_x
+ * \param hstride  one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle  one of BRW_SWIZZLE_x
+ * \param writemask  WRITEMASK_X/Y/Z/W bitfield
+ */
+static INLINE struct brw_reg brw_reg( GLuint file,
+                                      GLuint nr,
+                                      GLuint subnr,
+                                      GLuint type,
+                                      GLuint vstride,
+                                      GLuint width,
+                                      GLuint hstride,
+                                      GLuint swizzle,
+                                      GLuint writemask )
+{
+   struct brw_reg reg;
+   if (file == BRW_GENERAL_REGISTER_FILE)
+      assert(nr < BRW_MAX_GRF);
+   else if (file == BRW_MESSAGE_REGISTER_FILE)
+      assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+   else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(nr <= BRW_ARF_IP);
+
+   reg.type = type;
+   reg.file = file;
+   reg.nr = nr;
+   reg.subnr = subnr * type_sz(type);
+   reg.negate = 0;
+   reg.abs = 0;
+   reg.vstride = vstride;
+   reg.width = width;
+   reg.hstride = hstride;
+   reg.address_mode = BRW_ADDRESS_DIRECT;
+   reg.pad0 = 0;
+
+   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+    * set swizzle and writemask to W, as the lower bits of subnr will
+    * be lost when converted to align16.  This is probably too much to
+    * keep track of as you'd want it adjusted by suboffset(), etc.
+    * Perhaps fix up when converting to align16?
+    */
+   reg.dw1.bits.swizzle = swizzle;
+   reg.dw1.bits.writemask = writemask;
+   reg.dw1.bits.indirect_offset = 0;
+   reg.dw1.bits.pad1 = 0;
+   return reg;
+}
+
+/** Construct float[16] register */
+static INLINE struct brw_reg brw_vec16_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_16,
+		  BRW_WIDTH_16,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static INLINE struct brw_reg brw_vec8_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_8,
+		  BRW_WIDTH_8,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static INLINE struct brw_reg brw_vec4_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_4,
+		  BRW_WIDTH_4,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static INLINE struct brw_reg brw_vec2_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_2,
+		  BRW_WIDTH_2,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYXY,
+		  WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static INLINE struct brw_reg brw_vec1_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_0,
+		  BRW_WIDTH_1,
+		  BRW_HORIZONTAL_STRIDE_0,
+		  BRW_SWIZZLE_XXXX,
+		  WRITEMASK_X);
+}
+
+
+static INLINE struct brw_reg retype( struct brw_reg reg,
+				       GLuint type )
+{
+   reg.type = type;
+   return reg;
+}
+
+static INLINE struct brw_reg suboffset( struct brw_reg reg,
+					  GLuint delta )
+{   
+   reg.subnr += delta * type_sz(reg.type);
+   return reg;
+}
+
+
+static INLINE struct brw_reg offset( struct brw_reg reg,
+				       GLuint delta )
+{
+   reg.nr += delta;
+   return reg;
+}
+
+
+static INLINE struct brw_reg byte_offset( struct brw_reg reg,
+					    GLuint bytes )
+{
+   GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+   reg.nr = newoffset / REG_SIZE;
+   reg.subnr = newoffset % REG_SIZE;
+   return reg;
+}
+   
+
+/** Construct unsigned word[16] register */
+static INLINE struct brw_reg brw_uw16_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static INLINE struct brw_reg brw_uw8_reg( GLuint file,
+					    GLuint nr,
+					    GLuint subnr )
+{
+   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static INLINE struct brw_reg brw_uw1_reg( GLuint file,
+					    GLuint nr,
+					    GLuint subnr )
+{
+   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static INLINE struct brw_reg brw_imm_reg( GLuint type )
+{
+   return brw_reg( BRW_IMMEDIATE_VALUE,
+		   0,
+		   0,
+		   type,
+		   BRW_VERTICAL_STRIDE_0,
+		   BRW_WIDTH_1,
+		   BRW_HORIZONTAL_STRIDE_0,
+		   0,
+		   0);      
+}
+
+/** Construct float immediate register */
+static INLINE struct brw_reg brw_imm_f( GLfloat f )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+   imm.dw1.f = f;
+   return imm;
+}
+
+/** Construct integer immediate register */
+static INLINE struct brw_reg brw_imm_d( GLint d )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+   imm.dw1.d = d;
+   return imm;
+}
+
+/** Construct uint immediate register */
+static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+   imm.dw1.ud = ud;
+   return imm;
+}
+
+/** Construct ushort immediate register */
+static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+   imm.dw1.ud = uw | (uw << 16);
+   return imm;
+}
+
+/** Construct short immediate register */
+static INLINE struct brw_reg brw_imm_w( GLshort w )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+   imm.dw1.d = w | (w << 16);
+   return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static INLINE struct brw_reg brw_imm_v( GLuint v )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_8;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static INLINE struct brw_reg brw_imm_vf( GLuint v )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE  0x30
+#define VF_NEG  (1<<7)
+
+static INLINE struct brw_reg brw_imm_vf4( GLuint v0, 
+					    GLuint v1, 
+					    GLuint v2,
+					    GLuint v3)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = ((v0 << 0) |
+		 (v1 << 8) |
+		 (v2 << 16) |
+		 (v3 << 24));
+   return imm;
+}
+
+
+static INLINE struct brw_reg brw_address( struct brw_reg reg )
+{
+   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+{
+   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+{
+   return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static INLINE struct brw_reg brw_null_reg( void )
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		       BRW_ARF_NULL, 
+		       0);
+}
+
+static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		      BRW_ARF_ADDRESS, 
+		      subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw.  This goes against the convention for other scalar
+ * regs:
+ */
+static INLINE struct brw_reg brw_ip_reg( void )
+{
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		  BRW_ARF_IP, 
+		  0,
+		  BRW_REGISTER_TYPE_UD,
+		  BRW_VERTICAL_STRIDE_4, /* ? */
+		  BRW_WIDTH_1,
+		  BRW_HORIZONTAL_STRIDE_0,
+		  BRW_SWIZZLE_XYZW, /* NOTE! */
+		  WRITEMASK_XYZW); /* NOTE! */
+}
+
+static INLINE struct brw_reg brw_acc_reg( void )
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		       BRW_ARF_ACCUMULATOR, 
+		       0);
+}
+
+
+static INLINE struct brw_reg brw_flag_reg( void )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+		      BRW_ARF_FLAG,
+		      0);
+}
+
+
+static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+		      BRW_ARF_MASK,
+		      subnr);
+}
+
+static INLINE struct brw_reg brw_message_reg( GLuint nr )
+{
+   assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+		       nr,
+		       0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static INLINE GLuint cvt( GLuint val )
+{
+   switch (val) {
+   case 0: return 0;
+   case 1: return 1;
+   case 2: return 2;
+   case 4: return 3;
+   case 8: return 4;
+   case 16: return 5;
+   case 32: return 6;
+   }
+   return 0;
+}
+
+static INLINE struct brw_reg stride( struct brw_reg reg,
+				       GLuint vstride,
+				       GLuint width,
+				       GLuint hstride )
+{
+   reg.vstride = cvt(vstride);
+   reg.width = cvt(width) - 1;
+   reg.hstride = cvt(hstride);
+   return reg;
+}
+
+
+static INLINE struct brw_reg vec16( struct brw_reg reg )
+{
+   return stride(reg, 16,16,1);
+}
+
+static INLINE struct brw_reg vec8( struct brw_reg reg )
+{
+   return stride(reg, 8,8,1);
+}
+
+static INLINE struct brw_reg vec4( struct brw_reg reg )
+{
+   return stride(reg, 4,4,1);
+}
+
+static INLINE struct brw_reg vec2( struct brw_reg reg )
+{
+   return stride(reg, 2,2,1);
+}
+
+static INLINE struct brw_reg vec1( struct brw_reg reg )
+{
+   return stride(reg, 0,1,0);
+}
+
+
+static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+{
+   return vec1(suboffset(reg, elt));
+}
+
+static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
+					    GLuint x,
+					    GLuint y, 
+					    GLuint z,
+					    GLuint w)
+{
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+   return reg;
+}
+
+
+static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
+					     GLuint x )
+{
+   return brw_swizzle(reg, x, x, x, x);
+}
+
+static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
+					      GLuint mask )
+{
+   reg.dw1.bits.writemask &= mask;
+   return reg;
+}
+
+static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
+						  GLuint mask )
+{
+   reg.dw1.bits.writemask = mask;
+   return reg;
+}
+
+static INLINE struct brw_reg negate( struct brw_reg reg )
+{
+   reg.negate ^= 1;
+   return reg;
+}
+
+static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+{
+   reg.abs = 1;
+   return reg;
+}
+
+/***********************************************************************
+ */
+static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
+						  GLint offset )
+{
+   struct brw_reg reg =  brw_vec4_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
+						  GLint offset )
+{
+   struct brw_reg reg =  brw_vec1_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+{
+   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+{
+   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+   return brw_address_reg(ptr.addr_subnr);
+}
+
+static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+{
+   ptr.addr_offset += offset;
+   return ptr;
+}
+
+static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+{
+   struct brw_indirect ptr;
+   ptr.addr_subnr = addr_subnr;
+   ptr.addr_offset = offset;
+   ptr.pad = 0;
+   return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static INLINE GLboolean
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+   return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+   return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, GLuint value );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control( struct brw_compile *p, GLboolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+
+void brw_init_compile( struct brw_context *, struct brw_compile *p );
+const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0);
+
+#define ALU2(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLuint response_length,
+		   GLboolean eot);
+
+void brw_fb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLuint binding_table_index,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+		struct brw_reg dest,
+		GLuint msg_reg_nr,
+		struct brw_reg src0,
+		GLuint binding_table_index,
+		GLuint sampler,
+		GLuint writemask,
+		GLuint msg_type,
+		GLuint response_length,
+		GLuint msg_length,
+		GLboolean eot,
+		GLuint header_present,
+		GLuint simd_mode);
+
+void brw_math_16( struct brw_compile *p,
+		  struct brw_reg dest,
+		  GLuint function,
+		  GLuint saturate,
+		  GLuint msg_reg_nr,
+		  struct brw_reg src,
+		  GLuint precision );
+
+void brw_math( struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       GLuint saturate,
+	       GLuint msg_reg_nr,
+	       struct brw_reg src,
+	       GLuint data_type,
+	       GLuint precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+		     struct brw_reg dest,
+		     GLuint scratch_offset );
+
+void brw_dp_READ_4( struct brw_compile *p,
+                    struct brw_reg dest,
+                    GLboolean relAddr,
+                    GLuint location,
+                    GLuint bind_table_index );
+
+void brw_dp_READ_4_vs( struct brw_compile *p,
+                       struct brw_reg dest,
+                       GLuint oword,
+                       GLboolean relAddr,
+                       struct brw_reg addrReg,
+                       GLuint location,
+                       GLuint bind_table_index );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+		      struct brw_reg src,
+		      GLuint scratch_offset );
+
+/* If/else/endif.  Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, 
+			       GLuint execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p, 
+				 struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p, 
+	       struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+			       GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
+	       struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, 
+		       struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+	     struct brw_reg dest,
+	     GLuint conditional,
+	     struct brw_reg src0,
+	     struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/*********************************************************************** 
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+				   struct brw_indirect dst_ptr,
+				   struct brw_indirect src_ptr,
+				   GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+			    struct brw_reg dst,
+			    struct brw_indirect ptr,
+			    GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_math_invert( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+                          struct brw_reg reg );
+
+
+/* brw_optimize.c */
+void brw_optimize(struct brw_compile *p);
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_eu_debug.c b/src/mesa/drivers/dri/i965/brw_eu_debug.c
new file mode 100644
index 0000000000..99453afdca
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu_debug.c
@@ -0,0 +1,95 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+    
+
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+   static const char *file[] = {
+      "arf",
+      "grf",
+      "msg",
+      "imm"
+   };
+
+   static const char *type[] = {
+      "ud",
+      "d",
+      "uw",
+      "w",
+      "ub",
+      "vf",
+      "hf",
+      "f"
+   };
+
+   printf("%s%s", 
+	  hwreg.abs ? "abs/" : "",
+	  hwreg.negate ? "-" : "");
+     
+   if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+       hwreg.nr % 2 == 0 &&
+       hwreg.subnr == 0 &&
+       hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+       hwreg.width == BRW_WIDTH_8 &&
+       hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+       hwreg.type == BRW_REGISTER_TYPE_F) {
+      /* vector register */
+      printf("vec%d", hwreg.nr);
+   }
+   else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+	    hwreg.width == BRW_WIDTH_1 &&
+	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+	    hwreg.type == BRW_REGISTER_TYPE_F) {      
+      /* "scalar" register */
+      printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+   }
+   else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+      printf("imm %f", hwreg.dw1.f);
+   }
+   else {
+      printf("%s%d.%d<%d;%d,%d>:%s", 
+		   file[hwreg.file],
+		   hwreg.nr,
+		   hwreg.subnr / type_sz(hwreg.type),
+		   hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+		   1<<hwreg.width,
+		   hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,		
+		   type[hwreg.type]);
+   }
+}
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
new file mode 100644
index 0000000000..34dfe10cb9
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -0,0 +1,1485 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+     
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+				  struct brw_reg reg )
+{
+   if (reg.width == BRW_WIDTH_8 && 
+       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 
+      insn->header.execution_size = BRW_EXECUTE_16;
+   else
+      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+			  struct brw_reg dest )
+{
+   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.file != BRW_MESSAGE_REGISTER_FILE)
+      assert(dest.nr < 128);
+
+   insn->bits1.da1.dest_reg_file = dest.file;
+   insn->bits1.da1.dest_reg_type = dest.type;
+   insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+   if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
+      insn->bits1.da1.dest_reg_nr = dest.nr;
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
+	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
+      }
+      else {
+	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+      }
+   }
+   else {
+      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+      /* These are different sizes in align1 vs align16:
+       */
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+      }
+      else {
+	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+      }
+   }
+
+   /* NEW: Set the execution size based on dest.width and
+    * insn->compression_control:
+    */
+   guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+                          struct brw_reg reg )
+{
+   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(reg.nr < 128);
+
+   insn->bits1.da1.src0_reg_file = reg.file;
+   insn->bits1.da1.src0_reg_type = reg.type;
+   insn->bits2.da1.src0_abs = reg.abs;
+   insn->bits2.da1.src0_negate = reg.negate;
+   insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+   
+      /* Required to set some fields in src1 as well:
+       */
+      insn->bits1.da1.src1_reg_file = 0; /* arf */
+      insn->bits1.da1.src1_reg_type = reg.type;
+   }
+   else 
+   {
+      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+	 if (insn->header.access_mode == BRW_ALIGN_1) {
+	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
+	    insn->bits2.da1.src0_reg_nr = reg.nr;
+	 }
+	 else {
+	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+	    insn->bits2.da16.src0_reg_nr = reg.nr;
+	 }
+      }
+      else {
+	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+	 if (insn->header.access_mode == BRW_ALIGN_1) {
+	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
+	 }
+	 else {
+	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+	 }
+      }
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 if (reg.width == BRW_WIDTH_1 && 
+	     insn->header.execution_size == BRW_EXECUTE_1) {
+	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
+	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+	 }
+	 else {
+	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
+	    insn->bits2.da1.src0_width = reg.width;
+	    insn->bits2.da1.src0_vert_stride = reg.vstride;
+	 }
+      }
+      else {
+	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+	 /* This is an oddity of the fact we're using the same
+	  * descriptions for registers in align_16 as align_1:
+	  */
+	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+	 else
+	    insn->bits2.da16.src0_vert_stride = reg.vstride;
+      }
+   }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+                   struct brw_reg reg )
+{
+   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+   assert(reg.nr < 128);
+
+   insn->bits1.da1.src1_reg_file = reg.file;
+   insn->bits1.da1.src1_reg_type = reg.type;
+   insn->bits3.da1.src1_abs = reg.abs;
+   insn->bits3.da1.src1_negate = reg.negate;
+
+   /* Only src1 can be immediate in two-argument instructions.
+    */
+   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+   }
+   else {
+      /* This is a hardware restriction, which may or may not be lifted
+       * in the future:
+       */
+      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
+	 insn->bits3.da1.src1_reg_nr = reg.nr;
+      }
+      else {
+	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+	 insn->bits3.da16.src1_reg_nr = reg.nr;
+      }
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 if (reg.width == BRW_WIDTH_1 && 
+	     insn->header.execution_size == BRW_EXECUTE_1) {
+	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
+	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+	 }
+	 else {
+	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
+	    insn->bits3.da1.src1_width = reg.width;
+	    insn->bits3.da1.src1_vert_stride = reg.vstride;
+	 }
+      }
+      else {
+	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+	 /* This is an oddity of the fact we're using the same
+	  * descriptions for registers in align_16 as align_1:
+	  */
+	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+	 else
+	    insn->bits3.da16.src1_vert_stride = reg.vstride;
+      }
+   }
+}
+
+
+
+static void brw_set_math_message( struct brw_context *brw,
+				  struct brw_instruction *insn,
+				  GLuint msg_length,
+				  GLuint response_length,
+				  GLuint function,
+				  GLuint integer_type,
+				  GLboolean low_precision,
+				  GLboolean saturate,
+				  GLuint dataType )
+{
+   struct intel_context *intel = &brw->intel;
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (intel->gen == 5) {
+       insn->bits3.math_gen5.function = function;
+       insn->bits3.math_gen5.int_type = integer_type;
+       insn->bits3.math_gen5.precision = low_precision;
+       insn->bits3.math_gen5.saturate = saturate;
+       insn->bits3.math_gen5.data_type = dataType;
+       insn->bits3.math_gen5.snapshot = 0;
+       insn->bits3.math_gen5.header_present = 0;
+       insn->bits3.math_gen5.response_length = response_length;
+       insn->bits3.math_gen5.msg_length = msg_length;
+       insn->bits3.math_gen5.end_of_thread = 0;
+       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_MATH;
+       insn->bits2.send_gen5.end_of_thread = 0;
+   } else {
+       insn->bits3.math.function = function;
+       insn->bits3.math.int_type = integer_type;
+       insn->bits3.math.precision = low_precision;
+       insn->bits3.math.saturate = saturate;
+       insn->bits3.math.data_type = dataType;
+       insn->bits3.math.response_length = response_length;
+       insn->bits3.math.msg_length = msg_length;
+       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+       insn->bits3.math.end_of_thread = 0;
+   }
+}
+
+
+static void brw_set_ff_sync_message(struct brw_context *brw,
+				    struct brw_instruction *insn,
+				    GLboolean allocate,
+				    GLuint response_length,
+				    GLboolean end_of_thread)
+{
+	struct intel_context *intel = &brw->intel;
+	brw_set_src1(insn, brw_imm_d(0));
+
+	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
+	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
+	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
+	insn->bits3.urb_gen5.allocate = allocate;
+	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
+	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
+	insn->bits3.urb_gen5.header_present = 1;
+	insn->bits3.urb_gen5.response_length = response_length; /* may be 1 or 0 */
+	insn->bits3.urb_gen5.msg_length = 1;
+	insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+	if (intel->gen >= 6) {
+	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+	} else {
+	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
+	}
+}
+
+static void brw_set_urb_message( struct brw_context *brw,
+				 struct brw_instruction *insn,
+				 GLboolean allocate,
+				 GLboolean used,
+				 GLuint msg_length,
+				 GLuint response_length,
+				 GLboolean end_of_thread,
+				 GLboolean complete,
+				 GLuint offset,
+				 GLuint swizzle_control )
+{
+    struct intel_context *intel = &brw->intel;
+    brw_set_src1(insn, brw_imm_d(0));
+
+    if (intel->gen >= 5) {
+        insn->bits3.urb_gen5.opcode = 0;	/* ? */
+        insn->bits3.urb_gen5.offset = offset;
+        insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+        insn->bits3.urb_gen5.allocate = allocate;
+        insn->bits3.urb_gen5.used = used;	/* ? */
+        insn->bits3.urb_gen5.complete = complete;
+        insn->bits3.urb_gen5.header_present = 1;
+        insn->bits3.urb_gen5.response_length = response_length;
+        insn->bits3.urb_gen5.msg_length = msg_length;
+        insn->bits3.urb_gen5.end_of_thread = end_of_thread;
+	if (intel->gen >= 6) {
+	   /* For SNB, the SFID bits moved to the condmod bits, and
+	    * EOT stayed in bits3 above.  Does the EOT bit setting
+	    * below on Ironlake even do anything?
+	    */
+	   insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+	} else {
+	   insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_URB;
+	   insn->bits2.send_gen5.end_of_thread = end_of_thread;
+	}
+    } else {
+        insn->bits3.urb.opcode = 0;	/* ? */
+        insn->bits3.urb.offset = offset;
+        insn->bits3.urb.swizzle_control = swizzle_control;
+        insn->bits3.urb.allocate = allocate;
+        insn->bits3.urb.used = used;	/* ? */
+        insn->bits3.urb.complete = complete;
+        insn->bits3.urb.response_length = response_length;
+        insn->bits3.urb.msg_length = msg_length;
+        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+        insn->bits3.urb.end_of_thread = end_of_thread;
+    }
+}
+
+static void brw_set_dp_write_message( struct brw_context *brw,
+				      struct brw_instruction *insn,
+				      GLuint binding_table_index,
+				      GLuint msg_control,
+				      GLuint msg_type,
+				      GLuint msg_length,
+				      GLuint pixel_scoreboard_clear,
+				      GLuint response_length,
+				      GLuint end_of_thread )
+{
+   struct intel_context *intel = &brw->intel;
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (intel->gen == 5) {
+       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
+       insn->bits3.dp_write_gen5.msg_control = msg_control;
+       insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
+       insn->bits3.dp_write_gen5.msg_type = msg_type;
+       insn->bits3.dp_write_gen5.send_commit_msg = 0;
+       insn->bits3.dp_write_gen5.header_present = 1;
+       insn->bits3.dp_write_gen5.response_length = response_length;
+       insn->bits3.dp_write_gen5.msg_length = msg_length;
+       insn->bits3.dp_write_gen5.end_of_thread = end_of_thread;
+       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+       insn->bits2.send_gen5.end_of_thread = end_of_thread;
+   } else {
+       insn->bits3.dp_write.binding_table_index = binding_table_index;
+       insn->bits3.dp_write.msg_control = msg_control;
+       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+       insn->bits3.dp_write.msg_type = msg_type;
+       insn->bits3.dp_write.send_commit_msg = 0;
+       insn->bits3.dp_write.response_length = response_length;
+       insn->bits3.dp_write.msg_length = msg_length;
+       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+       insn->bits3.dp_write.end_of_thread = end_of_thread;
+   }
+}
+
+static void brw_set_dp_read_message( struct brw_context *brw,
+				      struct brw_instruction *insn,
+				      GLuint binding_table_index,
+				      GLuint msg_control,
+				      GLuint msg_type,
+				      GLuint target_cache,
+				      GLuint msg_length,
+				      GLuint response_length,
+				      GLuint end_of_thread )
+{
+   struct intel_context *intel = &brw->intel;
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (intel->gen == 5) {
+       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
+       insn->bits3.dp_read_gen5.msg_control = msg_control;
+       insn->bits3.dp_read_gen5.msg_type = msg_type;
+       insn->bits3.dp_read_gen5.target_cache = target_cache;
+       insn->bits3.dp_read_gen5.header_present = 1;
+       insn->bits3.dp_read_gen5.response_length = response_length;
+       insn->bits3.dp_read_gen5.msg_length = msg_length;
+       insn->bits3.dp_read_gen5.pad1 = 0;
+       insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
+       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+       insn->bits2.send_gen5.end_of_thread = end_of_thread;
+   } else {
+       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
+       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
+       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
+       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
+       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
+       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
+       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
+   }
+}
+
+static void brw_set_sampler_message(struct brw_context *brw,
+                                    struct brw_instruction *insn,
+                                    GLuint binding_table_index,
+                                    GLuint sampler,
+                                    GLuint msg_type,
+                                    GLuint response_length,
+                                    GLuint msg_length,
+                                    GLboolean eot,
+                                    GLuint header_present,
+                                    GLuint simd_mode)
+{
+   struct intel_context *intel = &brw->intel;
+   assert(eot == 0);
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (intel->gen == 5) {
+      insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
+      insn->bits3.sampler_gen5.sampler = sampler;
+      insn->bits3.sampler_gen5.msg_type = msg_type;
+      insn->bits3.sampler_gen5.simd_mode = simd_mode;
+      insn->bits3.sampler_gen5.header_present = header_present;
+      insn->bits3.sampler_gen5.response_length = response_length;
+      insn->bits3.sampler_gen5.msg_length = msg_length;
+      insn->bits3.sampler_gen5.end_of_thread = eot;
+      insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+      insn->bits2.send_gen5.end_of_thread = eot;
+   } else if (intel->is_g4x) {
+      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+      insn->bits3.sampler_g4x.sampler = sampler;
+      insn->bits3.sampler_g4x.msg_type = msg_type;
+      insn->bits3.sampler_g4x.response_length = response_length;
+      insn->bits3.sampler_g4x.msg_length = msg_length;
+      insn->bits3.sampler_g4x.end_of_thread = eot;
+      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+   } else {
+      insn->bits3.sampler.binding_table_index = binding_table_index;
+      insn->bits3.sampler.sampler = sampler;
+      insn->bits3.sampler.msg_type = msg_type;
+      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+      insn->bits3.sampler.response_length = response_length;
+      insn->bits3.sampler.msg_length = msg_length;
+      insn->bits3.sampler.end_of_thread = eot;
+      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+   }
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p, 
+					  GLuint opcode )
+{
+   struct brw_instruction *insn;
+
+   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+   insn = &p->store[p->nr_insn++];
+   memcpy(insn, p->current, sizeof(*insn));
+
+   /* Reset this one-shot flag: 
+    */
+
+   if (p->current->header.destreg__conditionalmod) {
+      p->current->header.destreg__conditionalmod = 0;
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }
+
+   insn->header.opcode = opcode;
+   return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+					 GLuint opcode,
+					 struct brw_reg dest,
+					 struct brw_reg src )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);   
+   return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+					GLuint opcode,
+					struct brw_reg dest,
+					struct brw_reg src0,
+					struct brw_reg src1 )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);   
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, src1);
+   return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0)   			\
+{							\
+   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
+}
+
+#define ALU2(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1)   			\
+{							\
+   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
+   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p, 
+                                 struct brw_reg dest,
+                                 struct brw_reg src0,
+                                 struct brw_reg src1)
+{
+   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+   insn->header.execution_size = 1;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.mask_control = BRW_MASK_DISABLE;
+
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack).  Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off.  If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+   struct brw_instruction *insn;
+
+   if (p->single_program_flow) {
+      assert(execute_size == BRW_EXECUTE_1);
+
+      insn = next_insn(p, BRW_OPCODE_ADD);
+      insn->header.predicate_inverse = 1;
+   } else {
+      insn = next_insn(p, BRW_OPCODE_IF);
+   }
+
+   /* Override the defaults for this instruction:
+    */
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.execution_size = execute_size;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+   if (!p->single_program_flow)
+       insn->header.thread_control = BRW_THREAD_SWITCH;
+
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p, 
+				 struct brw_instruction *if_insn)
+{
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_instruction *insn;
+   GLuint br = 1;
+
+   if (intel->gen == 5)
+      br = 2;
+
+   if (p->single_program_flow) {
+      insn = next_insn(p, BRW_OPCODE_ADD);
+   } else {
+      insn = next_insn(p, BRW_OPCODE_ELSE);
+   }
+
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = if_insn->header.execution_size;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+   if (!p->single_program_flow)
+       insn->header.thread_control = BRW_THREAD_SWITCH;
+
+   /* Patch the if instruction to point at this instruction.
+    */
+   if (p->single_program_flow) {
+      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+   } else {
+      assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
+      if_insn->bits3.if_else.pop_count = 0;
+      if_insn->bits3.if_else.pad0 = 0;
+   }
+
+   return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p, 
+	       struct brw_instruction *patch_insn)
+{
+   struct intel_context *intel = &p->brw->intel;
+   GLuint br = 1;
+
+   if (intel->gen == 5)
+      br = 2; 
+ 
+   if (p->single_program_flow) {
+      /* In single program flow mode, there's no need to execute an ENDIF,
+       * since we don't need to do any stack operations, and if we're executing
+       * currently, we want to just continue executing.
+       */
+      struct brw_instruction *next = &p->store[p->nr_insn];
+
+      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+      patch_insn->bits3.ud = (next - patch_insn) * 16;
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src1(insn, brw_imm_d(0x0));
+
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = patch_insn->header.execution_size;
+      insn->header.mask_control = BRW_MASK_ENABLE;
+      insn->header.thread_control = BRW_THREAD_SWITCH;
+
+      assert(patch_insn->bits3.if_else.jump_count == 0);
+
+      /* Patch the if or else instructions to point at this or the next
+       * instruction respectively.
+       */
+      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+	 /* Automagically turn it into an IFF:
+	  */
+	 patch_insn->header.opcode = BRW_OPCODE_IFF;
+	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+	 patch_insn->bits3.if_else.pop_count = 0;
+	 patch_insn->bits3.if_else.pad0 = 0;
+      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+	 patch_insn->bits3.if_else.pop_count = 1;
+	 patch_insn->bits3.if_else.pad0 = 0;
+      } else {
+	 assert(0);
+      }
+
+      /* Also pop item off the stack in the endif instruction:
+       */
+      insn->bits3.if_else.jump_count = 0;
+      insn->bits3.if_else.pop_count = 1;
+      insn->bits3.if_else.pad0 = 0;
+   }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_BREAK);
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   /* insn->header.mask_control = BRW_MASK_DISABLE; */
+   insn->bits3.if_else.pad0 = 0;
+   return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_CONTINUE);
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   /* insn->header.mask_control = BRW_MASK_DISABLE; */
+   insn->bits3.if_else.pad0 = 0;
+   return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+   if (p->single_program_flow) {
+      return &p->store[p->nr_insn];
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+      /* Override the defaults for this instruction:
+       */
+      brw_set_dest(insn, brw_null_reg());
+      brw_set_src0(insn, brw_null_reg());
+      brw_set_src1(insn, brw_null_reg());
+
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = execute_size;
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      /* insn->header.mask_control = BRW_MASK_ENABLE; */
+      /* insn->header.mask_control = BRW_MASK_DISABLE; */
+
+      return insn;
+   }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
+                                  struct brw_instruction *do_insn)
+{
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_instruction *insn;
+   GLuint br = 1;
+
+   if (intel->gen == 5)
+      br = 2;
+
+   if (p->single_program_flow)
+      insn = next_insn(p, BRW_OPCODE_ADD);
+   else
+      insn = next_insn(p, BRW_OPCODE_WHILE);
+
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+   if (p->single_program_flow) {
+      insn->header.execution_size = BRW_EXECUTE_1;
+
+      insn->bits3.d = (do_insn - insn) * 16;
+   } else {
+      insn->header.execution_size = do_insn->header.execution_size;
+
+      assert(do_insn->header.opcode == BRW_OPCODE_DO);
+      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+      insn->bits3.if_else.pop_count = 0;
+      insn->bits3.if_else.pad0 = 0;
+   }
+
+/*    insn->header.mask_control = BRW_MASK_ENABLE; */
+
+   /* insn->header.mask_control = BRW_MASK_DISABLE; */
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;   
+   return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, 
+		       struct brw_instruction *jmp_insn)
+{
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_instruction *landing = &p->store[p->nr_insn];
+   GLuint jmpi = 1;
+
+   if (intel->gen == 5)
+       jmpi = 2;
+
+   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
+
+   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register.  It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+	     struct brw_reg dest,
+	     GLuint conditional,
+	     struct brw_reg src0,
+	     struct brw_reg src1)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+   insn->header.destreg__conditionalmod = conditional;
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, src1);
+
+/*    guess_execution_size(insn, src0); */
+
+
+   /* Make it so that future instructions will use the computed flag
+    * value until brw_set_predicate_control_flag_value() is called
+    * again.  
+    */
+   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.nr == 0) {
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+      p->flag_value = 0xff;
+   }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       GLuint saturate,
+	       GLuint msg_reg_nr,
+	       struct brw_reg src,
+	       GLuint data_type,
+	       GLuint precision )
+{
+   struct intel_context *intel = &p->brw->intel;
+
+   if (intel->gen >= 6) {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
+
+      /* Math is the same ISA format as other opcodes, except that CondModifier
+       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+       */
+      insn->header.destreg__conditionalmod = function;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+      brw_set_src1(insn, brw_null_reg());
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+      GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
+      GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
+      /* Example code doesn't set predicate_control for send
+       * instructions.
+       */
+      insn->header.predicate_control = 0;
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+      brw_set_math_message(p->brw,
+			   insn,
+			   msg_length, response_length,
+			   function,
+			   BRW_MATH_INTEGER_UNSIGNED,
+			   precision,
+			   saturate,
+			   data_type);
+   }
+}
+
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
+ */
+void brw_math_16( struct brw_compile *p,
+		  struct brw_reg dest,
+		  GLuint function,
+		  GLuint saturate,
+		  GLuint msg_reg_nr,
+		  struct brw_reg src,
+		  GLuint precision )
+{
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_instruction *insn;
+   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
+   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
+
+   if (intel->gen >= 6) {
+      insn = next_insn(p, BRW_OPCODE_MATH);
+
+      /* Math is the same ISA format as other opcodes, except that CondModifier
+       * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+       */
+      insn->header.destreg__conditionalmod = function;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+      brw_set_src1(insn, brw_null_reg());
+      return;
+   }
+
+   /* First instruction:
+    */
+   brw_push_insn_state(p);
+   brw_set_predicate_control_flag_value(p, 0xff);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);
+   brw_set_math_message(p->brw,
+			insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			BRW_MATH_DATA_VECTOR);
+
+   /* Second instruction:
+    */
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+   insn->header.destreg__conditionalmod = msg_reg_nr+1;
+
+   brw_set_dest(insn, offset(dest,1));
+   brw_set_src0(insn, src);
+   brw_set_math_message(p->brw, 
+			insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			BRW_MATH_DATA_VECTOR);
+
+   brw_pop_insn_state(p);
+}
+
+
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_WRITE_16( struct brw_compile *p,
+		      struct brw_reg src,
+		      GLuint scratch_offset )
+{
+   GLuint msg_reg_nr = 1;
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+      /* set message header global offset field (reg 0, element 2) */
+      brw_MOV(p,
+	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+	      brw_imm_d(scratch_offset));
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      GLuint msg_length = 3;
+      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+  
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+
+      brw_set_dp_write_message(p->brw,
+			       insn,
+			       255, /* binding table index (255=stateless) */
+			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+			       msg_length,
+			       0, /* pixel scoreboard */
+			       0, /* response_length */
+			       0); /* eot */
+   }
+}
+
+
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_READ_16( struct brw_compile *p,
+		      struct brw_reg dest,
+		      GLuint scratch_offset )
+{
+   GLuint msg_reg_nr = 1;
+   {
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+      /* set message header global offset field (reg 0, element 2) */
+      brw_MOV(p,
+	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+	      brw_imm_d(scratch_offset));
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+  
+      brw_set_dest(insn, dest);	/* UW? */
+      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+      brw_set_dp_read_message(p->brw,
+			      insn,
+			      255, /* binding table index (255=stateless) */
+			      3,  /* msg_control (3 means 4 Owords) */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      1, /* target cache (render/scratch) */
+			      1, /* msg_length */
+			      2, /* response_length */
+			      0); /* eot */
+   }
+}
+
+
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+                    struct brw_reg dest,
+                    GLboolean relAddr,
+                    GLuint location,
+                    GLuint bind_table_index )
+{
+   /* XXX: relAddr not implemented */
+   GLuint msg_reg_nr = 1;
+   {
+      struct brw_reg b;
+      brw_push_insn_state(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+   /* Setup MRF[1] with location/offset into const buffer */
+      b = brw_message_reg(msg_reg_nr);
+      b = retype(b, BRW_REGISTER_TYPE_UD);
+      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
+       */
+      brw_MOV(p, b, brw_imm_ud(location));
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+      insn->header.mask_control = BRW_MASK_DISABLE;
+  
+      /* cast dest to a uword[8] vector */
+      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, brw_null_reg());
+
+      brw_set_dp_read_message(p->brw,
+			      insn,
+			      bind_table_index,
+			      0,  /* msg_control (0 means 1 Oword) */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      0, /* source cache = data cache */
+			      1, /* msg_length */
+			      1, /* response_length (1 Oword) */
+			      0); /* eot */
+   }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+                      struct brw_reg dest,
+                      GLuint oword,
+                      GLboolean relAddr,
+                      struct brw_reg addrReg,
+                      GLuint location,
+                      GLuint bind_table_index)
+{
+   GLuint msg_reg_nr = 1;
+
+   assert(oword < 2);
+   /*
+   printf("vs const read msg, location %u, msg_reg_nr %d\n",
+          location, msg_reg_nr);
+   */
+
+   /* Setup MRF[1] with location/offset into const buffer */
+   {
+      struct brw_reg b;
+
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
+       */
+      b = brw_message_reg(msg_reg_nr);
+      b = retype(b, BRW_REGISTER_TYPE_UD);
+      /*b = get_element_ud(b, 2);*/
+      if (relAddr) {
+         brw_ADD(p, b, addrReg, brw_imm_ud(location));
+      }
+      else {
+         brw_MOV(p, b, brw_imm_ud(location));
+      }
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+      insn->header.mask_control = BRW_MASK_DISABLE;
+      /*insn->header.access_mode = BRW_ALIGN_16;*/
+  
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, brw_null_reg());
+
+      brw_set_dp_read_message(p->brw,
+			      insn,
+			      bind_table_index,
+			      oword,  /* 0 = lower Oword, 1 = upper Oword */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      0, /* source cache = data cache */
+			      1, /* msg_length */
+			      1, /* response_length (1 Oword) */
+			      0); /* eot */
+   }
+}
+
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+                  struct brw_reg dest,
+                  GLuint msg_reg_nr,
+                  struct brw_reg src0,
+                  GLuint binding_table_index,
+                  GLuint msg_length,
+                  GLuint response_length,
+                  GLboolean eot)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+   insn->header.predicate_control = 0; /* XXX */
+   insn->header.compression_control = BRW_COMPRESSION_NONE; 
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+  
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_dp_write_message(p->brw,
+			    insn,
+			    binding_table_index,
+			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+			    msg_length,
+			    1,	/* pixel scoreboard */
+			    response_length, 
+			    eot);
+}
+
+
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed.  See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+		struct brw_reg dest,
+		GLuint msg_reg_nr,
+		struct brw_reg src0,
+		GLuint binding_table_index,
+		GLuint sampler,
+		GLuint writemask,
+		GLuint msg_type,
+		GLuint response_length,
+		GLuint msg_length,
+		GLboolean eot,
+		GLuint header_present,
+		GLuint simd_mode)
+{
+   GLboolean need_stall = 0;
+
+   if (writemask == 0) {
+      /*printf("%s: zero writemask??\n", __FUNCTION__); */
+      return;
+   }
+   
+   /* Hardware doesn't do destination dependency checking on send
+    * instructions properly.  Add a workaround which generates the
+    * dependency by other means.  In practice it seems like this bug
+    * only crops up for texture samples, and only where registers are
+    * written by the send and then written again later without being
+    * read in between.  Luckily for us, we already track that
+    * information and use it to modify the writemask for the
+    * instruction, so that is a guide for whether a workaround is
+    * needed.
+    */
+   if (writemask != WRITEMASK_XYZW) {
+      GLuint dst_offset = 0;
+      GLuint i, newmask = 0, len = 0;
+
+      for (i = 0; i < 4; i++) {
+	 if (writemask & (1<<i))
+	    break;
+	 dst_offset += 2;
+      }
+      for (; i < 4; i++) {
+	 if (!(writemask & (1<<i)))
+	    break;
+	 newmask |= 1<<i;
+	 len++;
+      }
+
+      if (newmask != writemask) {
+	 need_stall = 1;
+         /* printf("need stall %x %x\n", newmask , writemask); */
+      }
+      else {
+	 GLboolean dispatch_16 = GL_FALSE;
+
+	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+
+	 guess_execution_size(p->current, dest);
+	 if (p->current->header.execution_size == BRW_EXECUTE_16)
+	    dispatch_16 = GL_TRUE;
+
+	 newmask = ~newmask & WRITEMASK_XYZW;
+
+	 brw_push_insn_state(p);
+
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+	 brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+	 brw_MOV(p, m1, brw_vec8_grf(0,0));	 
+  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
+
+	 brw_pop_insn_state(p);
+
+  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
+	 dest = offset(dest, dst_offset);
+
+	 /* For 16-wide dispatch, masked channels are skipped in the
+	  * response.  For 8-wide, masked channels still take up slots,
+	  * and are just not written to.
+	  */
+	 if (dispatch_16)
+	    response_length = len * 2;
+      }
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src0);
+      brw_set_sampler_message(p->brw, insn,
+			      binding_table_index,
+			      sampler,
+			      msg_type,
+			      response_length, 
+			      msg_length,
+			      eot,
+			      header_present,
+			      simd_mode);
+   }
+
+   if (need_stall) {
+      struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
+       */
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, reg, reg);	      
+      brw_pop_insn_state(p);
+   }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style.  Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle)
+{
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_instruction *insn;
+
+   /* Sandybridge doesn't have the implied move for SENDs,
+    * and the first message register index comes from src0.
+    */
+   if (intel->gen >= 6) {
+      brw_push_insn_state(p);
+      brw_set_mask_control( p, BRW_MASK_DISABLE );
+      brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+      brw_pop_insn_state(p);
+      src0 = brw_message_reg(msg_reg_nr);
+   }
+
+   insn = next_insn(p, BRW_OPCODE_SEND);
+
+   assert(msg_length < BRW_MAX_MRF);
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (intel->gen < 6)
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+
+   brw_set_urb_message(p->brw,
+		       insn,
+		       allocate,
+		       used,
+		       msg_length,
+		       response_length, 
+		       eot, 
+		       writes_complete, 
+		       offset,
+		       swizzle);
+}
+
+void brw_ff_sync(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLuint response_length,
+		   GLboolean eot)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+
+   brw_set_ff_sync_message(p->brw,
+			   insn,
+			   allocate,
+			   response_length,
+			   eot);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_eu_util.c b/src/mesa/drivers/dri/i965/brw_eu_util.c
new file mode 100644
index 0000000000..5405cf17a4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+      
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p, 
+			     struct brw_reg dst,
+			     struct brw_reg src)
+{
+   brw_math( p, 
+	     dst,
+	     BRW_MATH_FUNCTION_INV, 
+	     BRW_MATH_SATURATE_NONE,
+	     0,
+	     src,
+	     BRW_MATH_PRECISION_FULL, 
+	     BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count)
+{
+   GLuint i;
+
+   dst = vec4(dst);
+   src = vec4(src);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+   }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count)
+{
+   GLuint i;
+
+   dst = vec8(dst);
+   src = vec8(src);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+   }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+				   struct brw_indirect dst_ptr,
+				   struct brw_indirect src_ptr,
+				   GLuint count)
+{
+   GLuint i;
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, deref_4f(dst_ptr, delta),    deref_4f(src_ptr, delta));
+      brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+   }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+			    struct brw_reg dst,
+			    struct brw_indirect ptr,
+			    GLuint count)
+{
+   GLuint i;
+
+   dst = vec4(dst);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    deref_4f(ptr, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+   }
+}
+
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c
new file mode 100644
index 0000000000..ba401c215c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fallback.c
@@ -0,0 +1,141 @@
+/**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "swrast/swrast.h"
+#include "tnl/tnl.h"
+#include "brw_context.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
+
+#define FILE_DEBUG_FLAG DEBUG_FALLBACKS
+
+static GLboolean do_check_fallback(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+
+   if (brw->intel.no_rast) {
+      DBG("FALLBACK: rasterization disabled\n");
+      return GL_TRUE;
+   }
+
+   /* _NEW_RENDERMODE
+    */
+   if (ctx->RenderMode != GL_RENDER) {
+      DBG("FALLBACK: render mode\n");
+      return GL_TRUE;
+   }
+
+   /* _NEW_TEXTURE:
+    */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+      if (texUnit->_ReallyEnabled) {
+	 struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current);
+	 struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel];
+	 if (texImage->Border) {
+	    DBG("FALLBACK: texture border\n");
+	    return GL_TRUE;
+	 }
+      }
+   }
+   
+   /* _NEW_STENCIL 
+    */
+   if (ctx->Stencil._Enabled &&
+       (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) {
+      DBG("FALLBACK: stencil\n");
+      return GL_TRUE;
+   }
+
+   /* _NEW_BUFFERS */
+   if (!brw->has_surface_tile_offset) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+	 /* The original gen4 hardware couldn't set up WM surfaces pointing
+	  * at an offset within a tile, which can happen when rendering to
+	  * anything but the base level of a texture or the +X face/0 depth.
+	  * This was fixed with the 4 Series hardware.
+	  *
+	  * For these original chips, you would have to make the depth and
+	  * color destination surfaces include information on the texture
+	  * type, LOD, face, and various limits to use them as a destination.
+	  * I would have done this, but there's also a nasty requirement that
+	  * the depth and the color surfaces all be of the same LOD, which
+	  * may be a worse requirement than this alignment.  (Also, we may
+	  * want to just demote the texture to untiled, instead).
+	  */
+	 if (irb->region && irb->region->tiling != I915_TILING_NONE &&
+	     (irb->region->draw_offset & 4095)) {
+	    DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
+	    return GL_TRUE;
+	 }
+      }
+   }
+
+   return GL_FALSE;
+}
+
+static void check_fallback(struct brw_context *brw)
+{
+   brw->intel.Fallback = do_check_fallback(brw);
+}
+
+const struct brw_tracked_state brw_check_fallback = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL,
+      .brw  = 0,
+      .cache = 0
+   },
+   .prepare = check_fallback
+};
+
+
+
+
+/**
+ * Called by the INTEL_FALLBACK() macro.
+ * NOTE: this is a no-op for the i965 driver.  The brw->intel.Fallback
+ * field is treated as a boolean, not a bitmask.  It's only set in a
+ * couple of places.
+ */
+void intelFallback( struct intel_context *intel, GLuint bit, GLboolean mode )
+{
+}
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_fallback.h b/src/mesa/drivers/dri/i965/brw_fallback.h
new file mode 100644
index 0000000000..50dcdacd17
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fallback.h
@@ -0,0 +1,47 @@
+ /**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BRW_FALLBACK_H
+#define BRW_FALLBACK_H
+
+#include "main/mtypes.h"		/* for GLcontext... */
+
+struct brw_context;
+struct vbo_prim;
+
+void brw_fallback( GLcontext *ctx );
+void brw_unfallback( GLcontext *ctx );
+
+void brw_loopback_vertex_list( GLcontext *ctx,
+			       const GLfloat *buffer,
+			       const GLubyte *attrsz,
+			       const struct vbo_prim *prim,
+			       GLuint prim_count,
+			       GLuint wrap_count,
+			       GLuint vertex_size);
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
new file mode 100644
index 0000000000..5409e55788
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -0,0 +1,222 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+      
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static void compile_gs_prog( struct brw_context *brw,
+			     struct brw_gs_prog_key *key )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_gs_compile c;
+   const GLuint *program;
+   GLuint program_size;
+
+   memset(&c, 0, sizeof(c));
+   
+   c.key = *key;
+   /* Need to locate the two positions present in vertex + header.
+    * These are currently hardcoded:
+    */
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+
+   if (intel->gen == 5)
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
+   else
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+
+   c.nr_bytes = c.nr_regs * REG_SIZE;
+
+   
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.func.single_program_flow = 1;
+
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.  
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+   /* Note that primitives which don't require a GS program have
+    * already been weeded out by this stage:
+    */
+   switch (key->primitive) {
+   case GL_QUADS:
+      brw_gs_quads( &c, key );
+      break;
+   case GL_QUAD_STRIP:
+      brw_gs_quad_strip( &c, key );
+      break;
+   case GL_LINE_LOOP:
+      brw_gs_lines( &c );
+      break;
+   case GL_LINES:
+      if (key->hint_gs_always)
+	 brw_gs_lines( &c );
+      else {
+	 return;
+      }
+      break;
+   case GL_TRIANGLES:
+      if (key->hint_gs_always)
+	 brw_gs_tris( &c );
+      else {
+	 return;
+      }
+      break;
+   case GL_POINTS:
+      if (key->hint_gs_always)
+	 brw_gs_points( &c );
+      else {
+	 return;
+      }
+      break;      
+   default:
+      return;
+   }
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+    if (INTEL_DEBUG & DEBUG_GS) {
+       int i;
+
+      printf("gs:\n");
+      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+		    intel->gen);
+      printf("\n");
+    }
+
+   /* Upload
+    */
+   drm_intel_bo_unreference(brw->gs.prog_bo);
+   brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG,
+						   &c.key, sizeof(c.key),
+						   NULL, 0,
+						   program, program_size,
+						   &c.prog_data,
+						   sizeof(c.prog_data),
+						   &brw->gs.prog_data);
+}
+
+static const GLenum gs_prim[GL_POLYGON+1] = {  
+   GL_POINTS,
+   GL_LINES,
+   GL_LINE_LOOP,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_QUADS,
+   GL_QUAD_STRIP,
+   GL_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+			  struct brw_gs_prog_key *key )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_VS_PROG */
+   key->attrs = brw->vs.prog_data->outputs_written;
+
+   /* BRW_NEW_PRIMITIVE */
+   key->primitive = gs_prim[brw->primitive];
+
+   key->hint_gs_always = 0;	/* debug code? */
+   
+   /* _NEW_LIGHT */
+   key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+   if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) {
+      /* Provide consistent primitive order with brw_set_prim's
+       * optimization of single quads to trifans.
+       */
+      key->pv_first = GL_TRUE;
+   }
+
+   key->need_gs_prog = (key->hint_gs_always ||
+			brw->primitive == GL_QUADS ||
+			brw->primitive == GL_QUAD_STRIP ||
+			brw->primitive == GL_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void prepare_gs_prog(struct brw_context *brw)
+{
+   struct brw_gs_prog_key key;
+   /* Populate the key:
+    */
+   populate_key(brw, &key);
+
+   if (brw->gs.prog_active != key.need_gs_prog) {
+      brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+      brw->gs.prog_active = key.need_gs_prog;
+   }
+
+   if (brw->gs.prog_active) {
+      drm_intel_bo_unreference(brw->gs.prog_bo);
+      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
+					 &key, sizeof(key),
+					 NULL, 0,
+					 &brw->gs.prog_data);
+      if (brw->gs.prog_bo == NULL)
+	 compile_gs_prog( brw, &key );
+   }
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+   .dirty = {
+      .mesa  = _NEW_LIGHT,
+      .brw   = BRW_NEW_PRIMITIVE,
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = prepare_gs_prog
+};
diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h
new file mode 100644
index 0000000000..813b8d447a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_gs.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)	     
+
+struct brw_gs_prog_key {
+   GLbitfield64 attrs;
+   GLuint primitive:4;
+   GLuint hint_gs_always:1;
+   GLuint pv_first:1;
+   GLuint need_gs_prog:1;
+   GLuint pad:25;
+};
+
+struct brw_gs_compile {
+   struct brw_compile func;
+   struct brw_gs_prog_key key;
+   struct brw_gs_prog_data prog_data;
+   
+   struct {
+      struct brw_reg R0;
+      struct brw_reg vertex[MAX_GS_VERTS];
+   } reg;
+
+   /* 3 different ways of expressing vertex size:
+    */
+   GLuint nr_attrs;
+   GLuint nr_regs;
+   GLuint nr_bytes;
+};
+
+#define ATTR_SIZE  (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c
new file mode 100644
index 0000000000..99a6f6be11
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c
@@ -0,0 +1,206 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+			       GLuint nr_verts )
+{
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   c->prog_data.urb_read_length = c->nr_regs; 
+   c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c, 
+			    struct brw_reg vert,
+			    GLboolean last,
+			    GLuint header)
+{
+   struct brw_compile *p = &c->func;
+   GLboolean allocate = !last;
+
+   /* Overwrite PrimType and PrimStart in the message header, for
+    * each vertex in turn:
+    */
+   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+   /* Send each vertex as a seperate write to the urb.  This is
+    * different to the concept in brw_sf_emit.c, where subsequent
+    * writes are used to build up a single urb entry.  Each of these
+    * writes instantiates a seperate urb entry, and a new one must be
+    * allocated each time.
+    */
+   brw_urb_WRITE(p, 
+		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 0,
+		 c->reg.R0,
+		 allocate,
+		 1,		/* used */
+		 c->nr_regs + 1, /* msg length */
+		 allocate ? 1 : 0, /* response length */
+		 allocate ? 0 : 1, /* eot */
+		 1,		/* writes_complete */
+		 0,		/* urb offset */
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+	struct brw_compile *p = &c->func;
+	brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+	brw_ff_sync(p,
+		    c->reg.R0,
+		    0,
+		    c->reg.R0,
+		    1, /* allocate */
+		    1, /* response length */
+		    0 /* eot */);
+}
+
+
+void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+
+   brw_gs_alloc_regs(c, 4);
+   
+   /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+    * is the PV for quads, but vertex 0 for polygons:
+    */
+   if (intel->needs_ff_sync)
+	   brw_gs_ff_sync(c, 1);
+   if (key->pv_first) {
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+   }
+   else {
+      brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+   }
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+
+   brw_gs_alloc_regs(c, 4);
+   
+   if (intel->needs_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   if (key->pv_first) {
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+      brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[3], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+   }
+   else {
+      brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+      brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+      brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+   }
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+
+   brw_gs_alloc_regs(c, 3);
+
+   if (intel->needs_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+   brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+
+   brw_gs_alloc_regs(c, 2);
+
+   if (intel->needs_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+
+   brw_gs_alloc_regs(c, 1);
+
+   if (intel->needs_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c
new file mode 100644
index 0000000000..63562ebcfc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_gs_state.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+struct brw_gs_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+
+   unsigned int curbe_offset;
+
+   unsigned int nr_urb_entries, urb_size;
+   GLboolean prog_active;
+};
+
+static void
+gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_GS_PROG */
+   key->prog_active = brw->gs.prog_active;
+   if (key->prog_active) {
+      key->total_grf = brw->gs.prog_data->total_grf;
+      key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+   } else {
+      key->total_grf = 1;
+      key->urb_entry_read_length = 1;
+   }
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.clip_start;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_gs_entries;
+   key->urb_size = brw->urb.vsize;
+}
+
+static drm_intel_bo *
+gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_gs_unit_state gs;
+   drm_intel_bo *bo;
+
+   memset(&gs, 0, sizeof(gs));
+
+   gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   if (key->prog_active) /* reloc */
+      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+
+   gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   gs.thread1.single_program_flow = 1;
+
+   gs.thread3.dispatch_grf_start_reg = 1;
+   gs.thread3.const_urb_entry_read_offset = 0;
+   gs.thread3.const_urb_entry_read_length = 0;
+   gs.thread3.urb_entry_read_offset = 0;
+   gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+   gs.thread4.nr_urb_entries = key->nr_urb_entries;
+   gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+   if (key->nr_urb_entries >= 8)
+      gs.thread4.max_threads = 1;
+   else
+      gs.thread4.max_threads = 0;
+
+   if (intel->gen == 5)
+      gs.thread4.rendering_enable = 1;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      gs.thread4.stats_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+			 key, sizeof(*key),
+			 &brw->gs.prog_bo, 1,
+			 &gs, sizeof(gs));
+
+   if (key->prog_active) {
+      /* Emit GS program relocation */
+      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_gs_unit_state, thread0),
+			      brw->gs.prog_bo, gs.thread0.grf_reg_count << 1,
+			      I915_GEM_DOMAIN_INSTRUCTION, 0);
+   }
+
+   return bo;
+}
+
+static void prepare_gs_unit(struct brw_context *brw)
+{
+   struct brw_gs_unit_key key;
+
+   gs_unit_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->gs.state_bo);
+   brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
+				       &key, sizeof(key),
+				       &brw->gs.prog_bo, 1,
+				       NULL);
+   if (brw->gs.state_bo == NULL) {
+      brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
+   }
+}
+
+const struct brw_tracked_state brw_gs_unit = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .prepare = prepare_gs_unit,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
new file mode 100644
index 0000000000..572175f463
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -0,0 +1,636 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_blend_constant_color bcc;
+
+   memset(&bcc, 0, sizeof(bcc));      
+   bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+   bcc.header.length = sizeof(bcc)/4-2;
+   bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
+   bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
+   bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
+   bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_blend_constant_color
+};
+
+/* Constant single cliprect for framebuffer object or DRI2 drawing */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+   OUT_BATCH(0); /* xmin, ymin */
+   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
+	    ((ctx->DrawBuffer->Height - 1) << 16));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_drawing_rect
+};
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which points at the batchbuffer containing the streamed batch state.
+ */
+static void upload_binding_table_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(6);
+   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+   OUT_BATCH(brw->vs.bind_bo_offset);
+   OUT_BATCH(0); /* gs */
+   OUT_BATCH(0); /* clip */
+   OUT_BATCH(0); /* sf */
+   OUT_BATCH(brw->wm.bind_bo_offset);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+      .cache = 0,
+   },
+   .emit = upload_binding_table_pointers,
+};
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which points at the batchbuffer containing the streamed batch state.
+ */
+static void upload_gen6_binding_table_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 |
+	     GEN6_BINDING_TABLE_MODIFY_VS |
+	     GEN6_BINDING_TABLE_MODIFY_GS |
+	     GEN6_BINDING_TABLE_MODIFY_PS |
+	     (4 - 2));
+   OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
+   OUT_BATCH(0); /* gs */
+   OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state gen6_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_BINDING_TABLE,
+      .cache = 0,
+   },
+   .emit = upload_gen6_binding_table_pointers,
+};
+
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
+ */
+static void upload_pipelined_state_pointers(struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen == 5) {
+      /* Need to flush before changing clip max threads for errata. */
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_FLUSH);
+      ADVANCE_BATCH();
+   }
+
+   BEGIN_BATCH(7);
+   OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+   OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   if (brw->gs.prog_active)
+      OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   else
+      OUT_BATCH(0);
+   OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     brw->cc.state_offset);
+   ADVANCE_BATCH();
+
+   brw->state.dirty.brw |= BRW_NEW_PSP;
+}
+
+
+static void prepare_psp_urb_cbs(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->vs.state_bo);
+   brw_add_validated_bo(brw, brw->gs.state_bo);
+   brw_add_validated_bo(brw, brw->clip.state_bo);
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+   brw_add_validated_bo(brw, brw->wm.state_bo);
+}
+
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
+   upload_pipelined_state_pointers(brw);
+   brw_upload_urb_fence(brw);
+   brw_upload_cs_urb_state(brw);
+}
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_VS_UNIT | 
+		CACHE_NEW_GS_UNIT | 
+		CACHE_NEW_GS_PROG | 
+		CACHE_NEW_CLIP_UNIT | 
+		CACHE_NEW_SF_UNIT | 
+		CACHE_NEW_WM_UNIT | 
+		CACHE_NEW_CC_UNIT)
+   },
+   .prepare = prepare_psp_urb_cbs,
+   .emit = upload_psp_urb_cbs,
+};
+
+static void prepare_depthbuffer(struct brw_context *brw)
+{
+   struct intel_region *region = brw->state.depth_region;
+
+   if (region != NULL)
+      brw_add_validated_bo(brw, region->buffer);
+}
+
+static void emit_depthbuffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct intel_region *region = brw->state.depth_region;
+   unsigned int len;
+
+   if (intel->gen >= 6)
+      len = 7;
+   else if (intel->is_g4x || intel->gen == 5)
+      len = 6;
+   else
+      len = 5;
+
+   if (region == NULL) {
+      BEGIN_BATCH(len);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+		(BRW_SURFACE_NULL << 29));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+
+      if (intel->is_g4x || intel->gen >= 5)
+         OUT_BATCH(0);
+
+      if (intel->gen >= 6)
+	 OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+   } else {
+      unsigned int format;
+
+      switch (region->cpp) {
+      case 2:
+	 format = BRW_DEPTHFORMAT_D16_UNORM;
+	 break;
+      case 4:
+	 if (intel->depth_buffer_is_float)
+	    format = BRW_DEPTHFORMAT_D32_FLOAT;
+	 else
+	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+	 break;
+      default:
+	 assert(0);
+	 return;
+      }
+
+      assert(region->tiling != I915_TILING_X);
+      if (IS_GEN6(intel->intelScreen->deviceID))
+	 assert(region->tiling != I915_TILING_NONE);
+
+      BEGIN_BATCH(len);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+      OUT_BATCH(((region->pitch * region->cpp) - 1) |
+		(format << 18) |
+		(BRW_TILEWALK_YMAJOR << 26) |
+		((region->tiling != I915_TILING_NONE) << 27) |
+		(BRW_SURFACE_2D << 29));
+      OUT_RELOC(region->buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		0);
+      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+		((region->pitch - 1) << 6) |
+		((region->height - 1) << 19));
+      OUT_BATCH(0);
+
+      if (intel->is_g4x || intel->gen >= 5)
+         OUT_BATCH(0);
+
+      if (intel->gen >= 6)
+	 OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+   }
+
+   /* Initialize it for safety. */
+   if (intel->gen >= 6) {
+      BEGIN_BATCH(2);
+      OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2));
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .prepare = prepare_depthbuffer,
+   .emit = emit_depthbuffer,
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static void upload_polygon_stipple(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_polygon_stipple bps;
+   GLuint i;
+
+   memset(&bps, 0, sizeof(bps));
+   bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
+   bps.header.length = sizeof(bps)/4-2;
+
+   /* Polygon stipple is provided in OpenGL order, i.e. bottom
+    * row first.  If we're rendering to a window (i.e. the
+    * default frame buffer object, 0), then we need to invert
+    * it to match our pixel layout.  But if we're rendering
+    * to a FBO (i.e. any named frame buffer object), we *don't*
+    * need to invert - we already match the layout.
+    */
+   if (ctx->DrawBuffer->Name == 0) {
+      for (i = 0; i < 32; i++)
+         bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+   }
+   else {
+      for (i = 0; i < 32; i++)
+         bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
+   }
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bps);
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+   .dirty = {
+      .mesa = _NEW_POLYGONSTIPPLE,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Polygon stipple offset packet
+ */
+
+static void upload_polygon_stipple_offset(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_polygon_stipple_offset bpso;
+
+   memset(&bpso, 0, sizeof(bpso));
+   bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+   bpso.header.length = sizeof(bpso)/4-2;
+
+   /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
+    * we have to invert the Y axis in order to match the OpenGL
+    * pixel coordinate system, and our offset must be matched
+    * to the window position.  If we're drawing to a FBO
+    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
+    * system works just fine, and there's no window system to
+    * worry about.
+    */
+   if (brw->intel.ctx.DrawBuffer->Name == 0) {
+      bpso.bits0.x_offset = 0;
+      bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31;
+   }
+   else {
+      bpso.bits0.y_offset = 0;
+      bpso.bits0.x_offset = 0;
+   }
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+}
+
+#define _NEW_WINDOW_POS 0x40000000
+
+const struct brw_tracked_state brw_polygon_stipple_offset = {
+   .dirty = {
+      .mesa = _NEW_WINDOW_POS,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_polygon_stipple_offset
+};
+
+/**********************************************************************
+ * AA Line parameters
+ */
+static void upload_aa_line_parameters(struct brw_context *brw)
+{
+   struct brw_aa_line_parameters balp;
+
+   if (!brw->has_aa_line_parameters)
+      return;
+
+   /* use legacy aa line coverage computation */
+   memset(&balp, 0, sizeof(balp));
+   balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+   balp.header.length = sizeof(balp) / 4 - 2;
+   
+   BRW_CACHED_BATCH_STRUCT(brw, &balp);
+}
+
+const struct brw_tracked_state brw_aa_line_parameters = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_aa_line_parameters
+};
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static void upload_line_stipple(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_line_stipple bls;
+   GLfloat tmp;
+   GLint tmpi;
+
+   memset(&bls, 0, sizeof(bls));
+   bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+   bls.header.length = sizeof(bls)/4 - 2;
+
+   bls.bits0.pattern = ctx->Line.StipplePattern;
+   bls.bits1.repeat_count = ctx->Line.StippleFactor;
+
+   tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+   tmpi = tmp * (1<<13);
+
+
+   bls.bits1.inverse_repeat_count = tmpi;
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bls);
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+   .dirty = {
+      .mesa = _NEW_LINE,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_line_stipple
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static void upload_invarient_state( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+
+   {
+      /* 0x61040000  Pipeline Select */
+      /*     PipelineSelect            : 0 */
+      struct brw_pipeline_select ps;
+
+      memset(&ps, 0, sizeof(ps));
+      ps.header.opcode = brw->CMD_PIPELINE_SELECT;
+      ps.header.pipeline_select = 0;
+      BRW_BATCH_STRUCT(brw, &ps);
+   }
+
+   if (intel->gen < 6) {
+      struct brw_global_depth_offset_clamp gdo;
+      memset(&gdo, 0, sizeof(gdo));
+
+      /* Disable depth offset clamping. 
+       */
+      gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+      gdo.header.length = sizeof(gdo)/4 - 2;
+      gdo.depth_offset_clamp = 0.0;
+
+      BRW_BATCH_STRUCT(brw, &gdo);
+   }
+
+   if (intel->gen >= 6) {
+      int i;
+
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+
+      BEGIN_BATCH(3);
+      OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2));
+      OUT_BATCH(MS_PIXEL_LOCATION_CENTER |
+		MS_NUMSAMPLES_1);
+      OUT_BATCH(0); /* positions for 4/8-sample */
+      ADVANCE_BATCH();
+
+      BEGIN_BATCH(2);
+      OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2));
+      OUT_BATCH(1);
+      ADVANCE_BATCH();
+
+      for (i = 0; i < 4; i++) {
+	 BEGIN_BATCH(4);
+	 OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2));
+	 OUT_BATCH(i << SVB_INDEX_SHIFT);
+	 OUT_BATCH(0);
+	 OUT_BATCH(0xffffffff);
+	 ADVANCE_BATCH();
+      }
+   }
+
+   /* 0x61020000  State Instruction Pointer */
+   {
+      struct brw_system_instruction_pointer sip;
+      memset(&sip, 0, sizeof(sip));
+
+      sip.header.opcode = CMD_STATE_INSN_POINTER;
+      sip.header.length = 0;
+      sip.bits0.pad = 0;
+      sip.bits0.system_instruction_pointer = 0;
+      BRW_BATCH_STRUCT(brw, &sip);
+   }
+
+
+   {
+      struct brw_vf_statistics vfs;
+      memset(&vfs, 0, sizeof(vfs));
+
+      vfs.opcode = brw->CMD_VF_STATISTICS;
+      if (INTEL_DEBUG & DEBUG_STATS)
+	 vfs.statistics_enable = 1; 
+
+      BRW_BATCH_STRUCT(brw, &vfs);
+   }
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_invarient_state
+};
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations for the objects,
+ * and is actually required for binding table pointers on gen6.
+ *
+ * Surface state base address covers binding table pointers and
+ * surface state objects, but not the surfaces that the surface state
+ * objects point to.
+ */
+static void upload_state_base_address( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (intel->gen >= 6) {
+       BEGIN_BATCH(10);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+		 1); /* Surface state base address */
+       OUT_BATCH(1); /* Dynamic state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* Instruction base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Dynamic state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       ADVANCE_BATCH();
+   } else if (intel->gen == 5) {
+       BEGIN_BATCH(8);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+		 1); /* Surface state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* Instruction base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       ADVANCE_BATCH();
+   } else {
+       BEGIN_BATCH(6);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0,
+		 1); /* Surface state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state brw_state_base_address = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .emit = upload_state_base_address
+};
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
new file mode 100644
index 0000000000..e79b3ddea3
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/macros.h"
+#include "shader/program.h"
+#include "shader/prog_print.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+static GLboolean
+is_single_channel_dp4(struct brw_instruction *insn)
+{
+   if (insn->header.opcode != BRW_OPCODE_DP4 ||
+       insn->header.execution_size != BRW_EXECUTE_8 ||
+       insn->header.access_mode != BRW_ALIGN_16 ||
+       insn->bits1.da1.dest_reg_file != BRW_GENERAL_REGISTER_FILE)
+      return GL_FALSE;
+
+   if (!is_power_of_two(insn->bits1.da16.dest_writemask))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+/**
+ * Sets the dependency control fields on DP4 instructions.
+ *
+ * The hardware only tracks dependencies on a register basis, so when
+ * you do:
+ *
+ * DP4 dst.x src1 src2
+ * DP4 dst.y src1 src3
+ * DP4 dst.z src1 src4
+ * DP4 dst.w src1 src5
+ *
+ * It will wait to do the DP4 dst.y until the dst.x is resolved, etc.
+ * We can examine our instruction stream and set the dependency
+ * control fields to tell the hardware when to do it.
+ *
+ * We may want to extend this to other instructions that are used to
+ * fill in a channel at a time of the destination register.
+ */
+static void
+brw_set_dp4_dependency_control(struct brw_compile *p)
+{
+   int i;
+
+   for (i = 1; i < p->nr_insn; i++) {
+      struct brw_instruction *insn = &p->store[i];
+      struct brw_instruction *prev = &p->store[i - 1];
+
+      if (!is_single_channel_dp4(prev))
+	 continue;
+
+      if (!is_single_channel_dp4(insn)) {
+	 i++;
+	 continue;
+      }
+
+      /* Only avoid hw dep control if the write masks are different
+       * channels of one reg.
+       */
+      if (insn->bits1.da16.dest_writemask == prev->bits1.da16.dest_writemask)
+	 continue;
+      if (insn->bits1.da16.dest_reg_nr != prev->bits1.da16.dest_reg_nr)
+	 continue;
+
+      /* Check if the second instruction depends on the previous one
+       * for a src.
+       */
+      if (insn->bits1.da1.src0_reg_file == BRW_GENERAL_REGISTER_FILE &&
+	  (insn->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
+	   insn->bits2.da1.src0_reg_nr == insn->bits1.da16.dest_reg_nr))
+	  continue;
+      if (insn->bits1.da1.src1_reg_file == BRW_GENERAL_REGISTER_FILE &&
+	  (insn->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT ||
+	   insn->bits3.da1.src1_reg_nr == insn->bits1.da16.dest_reg_nr))
+	  continue;
+
+      prev->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
+      insn->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
+   }
+}
+
+void
+brw_optimize(struct brw_compile *p)
+{
+   brw_set_dp4_dependency_control(p);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
new file mode 100644
index 0000000000..bd560acdad
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -0,0 +1,195 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+#include "main/imports.h"
+#include "main/enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
+#include "shader/shader_api.h"
+#include "tnl/tnl.h"
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+static void brwBindProgram( GLcontext *ctx,
+			    GLenum target, 
+			    struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: 
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      break;
+   }
+}
+
+static struct gl_program *brwNewProgram( GLcontext *ctx,
+				      GLenum target, 
+				      GLuint id )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: {
+      struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_vertex_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   case GL_FRAGMENT_PROGRAM_ARB: {
+      struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_fragment_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   default:
+      return _mesa_new_program(ctx, target, id);
+   }
+}
+
+static void brwDeleteProgram( GLcontext *ctx,
+			      struct gl_program *prog )
+{
+   _mesa_delete_program( ctx, prog );
+}
+
+
+static GLboolean brwIsProgramNative( GLcontext *ctx,
+				     GLenum target, 
+				     struct gl_program *prog )
+{
+   return GL_TRUE;
+}
+
+static void
+shader_error(GLcontext *ctx, struct gl_program *prog, const char *msg)
+{
+   struct gl_shader_program *shader;
+
+   shader = _mesa_lookup_shader_program(ctx, prog->Id);
+
+   if (shader) {
+      if (shader->InfoLog) {
+	 free(shader->InfoLog);
+      }
+      shader->InfoLog = _mesa_strdup(msg);
+      shader->LinkStatus = GL_FALSE;
+   }
+}
+
+static GLboolean brwProgramStringNotify( GLcontext *ctx,
+                                         GLenum target,
+                                         struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+   int i;
+
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
+      const struct brw_fragment_program *curFP =
+         brw_fragment_program_const(brw->fragment_program);
+
+      if (fprog->FogOption) {
+         _mesa_append_fog_code(ctx, fprog);
+         fprog->FogOption = GL_NONE;
+      }
+
+      if (newFP == curFP)
+	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      newFP->id = brw->program_id++;      
+      newFP->isGLSL = brw_wm_is_glsl(fprog);
+   }
+   else if (target == GL_VERTEX_PROGRAM_ARB) {
+      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
+      const struct brw_vertex_program *curVP =
+         brw_vertex_program_const(brw->vertex_program);
+
+      if (newVP == curVP)
+	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      if (newVP->program.IsPositionInvariant) {
+	 _mesa_insert_mvp_code(ctx, &newVP->program);
+      }
+      newVP->id = brw->program_id++;      
+
+      /* Also tell tnl about it:
+       */
+      _tnl_program_string(ctx, target, prog);
+   }
+
+   /* Reject programs with subroutines, which are totally broken at the moment
+    * (all program flows return when any program flow returns, and
+    * the VS also hangs if a function call calls a function.
+    *
+    * See piglit glsl-{vs,fs}-functions-[23] tests.
+    */
+   for (i = 0; i < prog->NumInstructions; i++) {
+      if (prog->Instructions[i].Opcode == OPCODE_CAL) {
+	 shader_error(ctx, prog,
+		      "i965 driver doesn't yet support uninlined function "
+		      "calls.  Move to using a single return statement at "
+		      "the end of the function to work around it.");
+	 return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+void brwInitFragProgFuncs( struct dd_function_table *functions )
+{
+   assert(functions->ProgramStringNotify == _tnl_program_string); 
+
+   functions->BindProgram = brwBindProgram;
+   functions->NewProgram = brwNewProgram;
+   functions->DeleteProgram = brwDeleteProgram;
+   functions->IsProgramNative = brwIsProgramNative;
+   functions->ProgramStringNotify = brwProgramStringNotify;
+}
+
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
new file mode 100644
index 0000000000..f6868c83ac
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -0,0 +1,287 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours.  To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once.  This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "main/imports.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static void
+brw_queryobj_get_results(struct brw_query_object *query)
+{
+   int i;
+   uint64_t *results;
+
+   if (query->bo == NULL)
+      return;
+
+   drm_intel_bo_map(query->bo, GL_FALSE);
+   results = query->bo->virtual;
+   if (query->Base.Target == GL_TIME_ELAPSED_EXT) {
+      query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32));
+   } else {
+      /* Map and count the pixels from the current query BO */
+      for (i = query->first_index; i <= query->last_index; i++) {
+	 query->Base.Result += results[i * 2 + 1] - results[i * 2];
+      }
+   }
+   drm_intel_bo_unmap(query->bo);
+
+   drm_intel_bo_unreference(query->bo);
+   query->bo = NULL;
+}
+
+static struct gl_query_object *
+brw_new_query_object(GLcontext *ctx, GLuint id)
+{
+   struct brw_query_object *query;
+
+   query = calloc(1, sizeof(struct brw_query_object));
+
+   query->Base.Id = id;
+   query->Base.Result = 0;
+   query->Base.Active = GL_FALSE;
+   query->Base.Ready = GL_TRUE;
+
+   return &query->Base;
+}
+
+static void
+brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   drm_intel_bo_unreference(query->bo);
+   free(query);
+}
+
+static void
+brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   if (query->Base.Target == GL_TIME_ELAPSED_EXT) {
+      drm_intel_bo_unreference(query->bo);
+      query->bo = drm_intel_bo_alloc(intel->bufmgr, "timer query",
+				     4096, 4096);
+
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+		PIPE_CONTROL_WRITE_TIMESTAMP);
+      OUT_RELOC(query->bo,
+		I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+		PIPE_CONTROL_GLOBAL_GTT_WRITE |
+		0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      /* Reset our driver's tracking of query state. */
+      drm_intel_bo_unreference(query->bo);
+      query->bo = NULL;
+      query->first_index = -1;
+      query->last_index = -1;
+
+      brw->query.obj = query;
+      intel->stats_wm++;
+   }
+}
+
+/**
+ * Begin the ARB_occlusion_query query on a query object.
+ */
+static void
+brw_end_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   if (query->Base.Target == GL_TIME_ELAPSED_EXT) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+		PIPE_CONTROL_WRITE_TIMESTAMP);
+      OUT_RELOC(query->bo,
+		I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+		PIPE_CONTROL_GLOBAL_GTT_WRITE |
+		8);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      intel_batchbuffer_flush(intel->batch);
+   } else {
+      /* Flush the batchbuffer in case it has writes to our query BO.
+       * Have later queries write to a new query BO so that further rendering
+       * doesn't delay the collection of our results.
+       */
+      if (query->bo) {
+	 brw_emit_query_end(brw);
+	 intel_batchbuffer_flush(intel->batch);
+
+	 drm_intel_bo_unreference(brw->query.bo);
+	 brw->query.bo = NULL;
+      }
+
+      brw->query.obj = NULL;
+
+      intel->stats_wm--;
+   }
+}
+
+static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   brw_queryobj_get_results(query);
+   query->Base.Ready = GL_TRUE;
+}
+
+static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
+      brw_queryobj_get_results(query);
+      query->Base.Ready = GL_TRUE;
+   }
+}
+
+/** Called to set up the query BO and account for its aperture space */
+void
+brw_prepare_query_begin(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Skip if we're not doing any queries. */
+   if (!brw->query.obj)
+      return;
+
+   /* Get a new query BO if we're going to need it. */
+   if (brw->query.bo == NULL ||
+       brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+      drm_intel_bo_unreference(brw->query.bo);
+      brw->query.bo = NULL;
+
+      brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1);
+      brw->query.index = 0;
+   }
+
+   brw_add_validated_bo(brw, brw->query.bo);
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_query_object *query = brw->query.obj;
+
+   /* Skip if we're not doing any queries, or we've emitted the start. */
+   if (!query || brw->query.active)
+      return;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+	     PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   /* This object could be mapped cacheable, but we don't have an exposed
+    * mechanism to support that.  Since it's going uncached, tell GEM that
+    * we're writing to it.  The usual clflush should be all that's required
+    * to pick up the results.
+    */
+   OUT_RELOC(brw->query.bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
+	     ((brw->query.index * 2) * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   if (query->bo != brw->query.bo) {
+      if (query->bo != NULL)
+	 brw_queryobj_get_results(query);
+      drm_intel_bo_reference(brw->query.bo);
+      query->bo = brw->query.bo;
+      query->first_index = brw->query.index;
+   }
+   query->last_index = brw->query.index;
+   brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (!brw->query.active)
+      return;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+	     PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   OUT_RELOC(brw->query.bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
+	     ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   brw->query.active = GL_FALSE;
+   brw->query.index++;
+}
+
+void brw_init_queryobj_functions(struct dd_function_table *functions)
+{
+   functions->NewQueryObject = brw_new_query_object;
+   functions->DeleteQuery = brw_delete_query;
+   functions->BeginQuery = brw_begin_query;
+   functions->EndQuery = brw_end_query;
+   functions->CheckQuery = brw_check_query;
+   functions->WaitQuery = brw_wait_query;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
new file mode 100644
index 0000000000..7d005d278f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -0,0 +1,212 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+static void compile_sf_prog( struct brw_context *brw,
+			     struct brw_sf_prog_key *key )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_sf_compile c;
+   const GLuint *program;
+   GLuint program_size;
+   GLuint i, idx;
+
+   memset(&c, 0, sizeof(c));
+
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.key = *key;
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_attr_regs = (c.nr_attrs+1)/2;
+   c.nr_setup_attrs = brw_count_bits(c.key.attrs);
+   c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+   c.prog_data.urb_read_length = c.nr_attr_regs;
+   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+   /* Construct map from attribute number to position in the vertex.
+    */
+   for (i = idx = 0; i < VERT_RESULT_MAX; i++) {
+      if (c.key.attrs & BITFIELD64_BIT(i)) {
+	 c.attr_to_idx[i] = idx;
+	 c.idx_to_attr[idx] = i;
+	 idx++;
+      }
+   }
+
+   /* Which primitive?  Or all three? 
+    */
+   switch (key->primitive) {
+   case SF_TRIANGLES:
+      c.nr_verts = 3;
+      brw_emit_tri_setup( &c, GL_TRUE );
+      break;
+   case SF_LINES:
+      c.nr_verts = 2;
+      brw_emit_line_setup( &c, GL_TRUE );
+      break;
+   case SF_POINTS:
+      c.nr_verts = 1;
+      if (key->do_point_sprite)
+	  brw_emit_point_sprite_setup( &c, GL_TRUE );
+      else
+	  brw_emit_point_setup( &c, GL_TRUE );
+      break;
+   case SF_UNFILLED_TRIS:
+      c.nr_verts = 3;
+      brw_emit_anyprim_setup( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   if (INTEL_DEBUG & DEBUG_SF) {
+      printf("sf:\n");
+      for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
+	 brw_disasm(stdout, &((struct brw_instruction *)program)[i],
+		    intel->gen);
+      printf("\n");
+   }
+
+   /* Upload
+    */
+   drm_intel_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_SF_PROG,
+						   &c.key, sizeof(c.key),
+						   NULL, 0,
+						   program, program_size,
+						   &c.prog_data,
+						   sizeof(c.prog_data),
+						   &brw->sf.prog_data);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_sf_prog(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_sf_prog_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Populate the key, noting state dependencies:
+    */
+   /* CACHE_NEW_VS_PROG */
+   key.attrs = brw->vs.prog_data->outputs_written; 
+
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   switch (brw->intel.reduced_primitive) {
+   case GL_TRIANGLES: 
+      /* NOTE: We just use the edgeflag attribute as an indicator that
+       * unfilled triangles are active.  We don't actually do the
+       * edgeflag testing here, it is already done in the clip
+       * program.
+       */
+      if (key.attrs & BITFIELD64_BIT(VERT_RESULT_EDGE))
+	 key.primitive = SF_UNFILLED_TRIS;
+      else
+	 key.primitive = SF_TRIANGLES;
+      break;
+   case GL_LINES: 
+      key.primitive = SF_LINES; 
+      break;
+   case GL_POINTS: 
+      key.primitive = SF_POINTS; 
+      break;
+   }
+
+   /* _NEW_POINT */
+   key.do_point_sprite = ctx->Point.PointSprite;
+   if (key.do_point_sprite) {
+      int i;
+
+      for (i = 0; i < 8; i++) {
+	 if (ctx->Point.CoordReplace[i])
+	    key.point_sprite_coord_replace |= (1 << i);
+      }
+   }
+   key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT);
+   /* _NEW_LIGHT */
+   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+   key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+
+   /* _NEW_HINT */
+   key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
+   /* _NEW_POLYGON */
+   if (key.do_twoside_color) {
+      /* If we're rendering to a FBO, we have to invert the polygon
+       * face orientation, just as we invert the viewport in
+       * sf_unit_create_from_key().  ctx->DrawBuffer->Name will be
+       * nonzero if we're rendering to such an FBO.
+       */
+      key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
+   }
+
+   drm_intel_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->sf.prog_data);
+   if (brw->sf.prog_bo == NULL)
+      compile_sf_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+   .dirty = {
+      .mesa  = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = upload_sf_prog
+};
+
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
new file mode 100644
index 0000000000..a0680a56f2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -0,0 +1,109 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+
+#include "shader/program.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS    0
+#define SF_LINES     1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS   3
+
+struct brw_sf_prog_key {
+   GLbitfield64 attrs;
+   uint8_t point_sprite_coord_replace;
+   GLuint primitive:2;
+   GLuint do_twoside_color:1;
+   GLuint do_flat_shading:1;
+   GLuint frontface_ccw:1;
+   GLuint do_point_sprite:1;
+   GLuint linear_color:1;  /**< linear interp vs. perspective interp */
+   GLuint sprite_origin_lower_left:1;
+   GLuint pad:24;
+};
+
+struct brw_sf_compile {
+   struct brw_compile func;
+   struct brw_sf_prog_key key;
+   struct brw_sf_prog_data prog_data;
+   
+   struct brw_reg pv;
+   struct brw_reg det;
+   struct brw_reg dx0;
+   struct brw_reg dx2;
+   struct brw_reg dy0;
+   struct brw_reg dy2;
+
+   /* z and 1/w passed in seperately:
+    */
+   struct brw_reg z[3];
+   struct brw_reg inv_w[3];
+   
+   /* The vertices:
+    */
+   struct brw_reg vert[3];
+
+    /* Temporaries, allocated after last vertex reg.
+    */
+   struct brw_reg inv_det;
+   struct brw_reg a1_sub_a0;
+   struct brw_reg a2_sub_a0;
+   struct brw_reg tmp;
+
+   struct brw_reg m1Cx;
+   struct brw_reg m2Cy;
+   struct brw_reg m3C0;
+
+   GLuint nr_verts;
+   GLuint nr_attrs;
+   GLuint nr_attr_regs;
+   GLuint nr_setup_attrs;
+   GLuint nr_setup_regs;
+
+   GLubyte attr_to_idx[VERT_RESULT_MAX];   
+   GLubyte idx_to_attr[VERT_RESULT_MAX];   
+};
+
+ 
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
new file mode 100644
index 0000000000..d3c975690c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -0,0 +1,774 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+				    struct brw_reg vert,
+				    GLuint attr)
+{
+   GLuint off = c->attr_to_idx[attr] / 2;
+   GLuint sub = c->attr_to_idx[attr] % 2;
+
+   return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+static GLboolean have_attr(struct brw_sf_compile *c,
+			   GLuint attr)
+{
+   return (c->key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0;
+}
+
+/*********************************************************************** 
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+		      struct brw_reg vert )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   for (i = 0; i < 2; i++) {
+      if (have_attr(c, VERT_RESULT_COL0+i) &&
+	  have_attr(c, VERT_RESULT_BFC0+i))
+	 brw_MOV(p, 
+		 get_vert_attr(c, vert, VERT_RESULT_COL0+i), 
+		 get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
+   }
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   /* XXX: What happens if BFC isn't present?  This could only happen
+    * for user-supplied vertex programs, as t_vp_build.c always does
+    * the right thing.
+    */
+   if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
+       !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
+      return;
+   
+   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+    * to get all channels active inside the IF.  In the clipping code
+    * we run with NoMask, so it's not an option and we can use
+    * BRW_EXECUTE_1 for all comparisions.
+    */
+   brw_push_insn_state(p);
+   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+   if_insn = brw_IF(p, BRW_EXECUTE_4); 
+   {
+      switch (c->nr_verts) {
+      case 3: copy_bfc(c, c->vert[2]);
+      case 2: copy_bfc(c, c->vert[1]);
+      case 1: copy_bfc(c, c->vert[0]);
+      }
+   }
+   brw_ENDIF(p, if_insn);
+   brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
+				BITFIELD64_BIT(VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+		     struct brw_reg dst,
+		     struct brw_reg src)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
+      if (have_attr(c,i))
+	 brw_MOV(p, 
+		 get_vert_attr(c, dst, i), 
+		 get_vert_attr(c, src, i));
+   }
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   GLuint jmpi = 1;
+
+   if (!nr)
+      return;
+
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   if (intel->gen == 5)
+       jmpi = 2;
+
+   brw_push_insn_state(p);
+   
+   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
+   brw_JMPI(p, ip, ip, c->pv);
+
+   copy_colors(c, c->vert[1], c->vert[0]);
+   copy_colors(c, c->vert[2], c->vert[0]);
+   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+
+   copy_colors(c, c->vert[0], c->vert[1]);
+   copy_colors(c, c->vert[2], c->vert[1]);
+   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+
+   copy_colors(c, c->vert[0], c->vert[2]);
+   copy_colors(c, c->vert[1], c->vert[2]);
+
+   brw_pop_insn_state(p);
+}
+	
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   GLuint jmpi = 1;
+
+   if (!nr)
+      return;
+
+   /* Already done in clip program: 
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   if (intel->gen == 5)
+       jmpi = 2;
+
+   brw_push_insn_state(p);
+   
+   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
+   brw_JMPI(p, ip, ip, c->pv);
+   copy_colors(c, c->vert[1], c->vert[0]);
+
+   brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+   copy_colors(c, c->vert[0], c->vert[1]);
+
+   brw_pop_insn_state(p);
+}
+
+	
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+   GLuint reg, i;
+
+   /* Values computed by fixed function unit:
+    */
+   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
+   c->det = brw_vec1_grf(1, 2);
+   c->dx0 = brw_vec1_grf(1, 3);
+   c->dx2 = brw_vec1_grf(1, 4);
+   c->dy0 = brw_vec1_grf(1, 5);
+   c->dy2 = brw_vec1_grf(1, 6);
+
+   /* z and 1/w passed in seperately:
+    */
+   c->z[0]     = brw_vec1_grf(2, 0);
+   c->inv_w[0] = brw_vec1_grf(2, 1);
+   c->z[1]     = brw_vec1_grf(2, 2);
+   c->inv_w[1] = brw_vec1_grf(2, 3);
+   c->z[2]     = brw_vec1_grf(2, 4);
+   c->inv_w[2] = brw_vec1_grf(2, 5);
+   
+   /* The vertices:
+    */
+   reg = 3;
+   for (i = 0; i < c->nr_verts; i++) {
+      c->vert[i] = brw_vec8_grf(reg, 0);
+      reg += c->nr_attr_regs;
+   }
+
+   /* Temporaries, allocated after last vertex reg.
+    */
+   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
+   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->tmp = brw_vec8_grf(reg, 0);  reg++;
+
+   /* Note grf allocation:
+    */
+   c->prog_data.total_grf = reg;
+   
+
+   /* Outputs of this program - interpolation coefficients for
+    * rasterization:
+    */
+   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   brw_push_insn_state(p);
+	
+   /* Copy both scalars with a single MOV:
+    */
+   for (i = 0; i < c->nr_verts; i++)
+      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+	 
+   brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+   /* Looks like we invert all 8 elements just to get 1/det in
+    * position 2 !?!
+    */
+   brw_math(&c->func, 
+	    c->inv_det, 
+	    BRW_MATH_FUNCTION_INV,
+	    BRW_MATH_SATURATE_NONE,
+	    0, 
+	    c->det,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+
+}
+
+
+static GLboolean calculate_masks( struct brw_sf_compile *c,
+				  GLuint reg,
+				  GLushort *pc,
+				  GLushort *pc_persp,
+				  GLushort *pc_linear)
+{
+   GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
+   GLbitfield64 persp_mask;
+   GLbitfield64 linear_mask;
+
+   if (c->key.do_flat_shading || c->key.linear_color)
+      persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
+                                    FRAG_BIT_COL0 |
+                                    FRAG_BIT_COL1);
+   else
+      persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS);
+
+   if (c->key.do_flat_shading)
+      linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
+   else
+      linear_mask = c->key.attrs;
+
+   *pc_persp = 0;
+   *pc_linear = 0;
+   *pc = 0xf;
+      
+   if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2]))
+      *pc_persp = 0xf;
+
+   if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2]))
+      *pc_linear = 0xf;
+
+   /* Maybe only processs one attribute on the final round:
+    */
+   if (reg*2+1 < c->nr_setup_attrs) {
+      *pc |= 0xf0;
+
+      if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1]))
+	 *pc_persp |= 0xf0;
+
+      if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1]))
+	 *pc_linear |= 0xf0;
+   }
+
+   return is_last_attr;
+}
+
+/* Calculates the predicate control for which channels of a reg
+ * (containing 2 attrs) to do point sprite coordinate replacement on.
+ */
+static uint16_t
+calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
+{
+   int attr1, attr2;
+   uint16_t pc = 0;
+
+   attr1 = c->idx_to_attr[reg * 2];
+   if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) {
+      if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0)))
+	 pc |= 0x0f;
+   }
+
+   if (reg * 2 + 1 < c->nr_setup_attrs) {
+       attr2 = c->idx_to_attr[reg * 2 + 1];
+       if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) {
+	  if (c->key.point_sprite_coord_replace & (1 << (attr2 -
+							 VERT_RESULT_TEX0)))
+	     pc |= 0xf0;
+       }
+   }
+
+   return pc;
+}
+
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 3;
+
+   if (allocate)
+      alloc_regs(c);
+
+   invert_det(c);
+   copy_z_inv_w(c);
+
+   if (c->key.do_twoside_color) 
+      do_twoside_color(c);
+
+   if (c->key.do_flat_shading)
+      do_flatshade_triangle(c);
+      
+   
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      struct brw_reg a2 = offset(c->vert[2], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+      if (pc_persp)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+	 brw_MUL(p, a1, a1, c->inv_w[1]);
+	 brw_MUL(p, a2, a2, c->inv_w[2]);
+      }
+      
+      
+      /* Calculate coefficients for interpolated values:
+       */      
+      if (pc_linear)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_linear);
+
+	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+	 /* calculate dA/dx
+	  */
+	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+		
+	 /* calculate dA/dy
+	  */
+	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+	 /* start point for interpolation
+	  */
+	 brw_MOV(p, c->m3C0, a0);
+      
+	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
+	  * the send instruction:
+	  */	 
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last,	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+      }
+   }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+
+   c->nr_verts = 2;
+
+   if (allocate)
+      alloc_regs(c);
+
+   invert_det(c);
+   copy_z_inv_w(c);
+
+   if (c->key.do_flat_shading)
+      do_flatshade_line(c);
+
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+      if (pc_persp)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+	 brw_MUL(p, a1, a1, c->inv_w[1]);
+      }
+
+      /* Calculate coefficients for position, color:
+       */
+      if (pc_linear) {
+	 brw_set_predicate_control_flag_value(p, pc_linear); 
+
+	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); 
+	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+		
+	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+
+	 /* start point for interpolation
+	  */
+	 brw_MOV(p, c->m3C0, a0);
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1, 	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE); 
+      }
+   } 
+}
+
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 1;
+
+   if (allocate)
+      alloc_regs(c);
+
+   copy_z_inv_w(c);
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear, pc_coord_replace;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+      pc_coord_replace = calculate_point_sprite_mask(c, i);
+      pc_persp &= ~pc_coord_replace;
+
+      if (pc_persp) {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+      }
+
+      /* Point sprite coordinate replacement: A texcoord with this
+       * enabled gets replaced with the value (x, y, 0, 1) where x and
+       * y vary from 0 to 1 across the horizontal and vertical of the
+       * point.
+       */
+      if (pc_coord_replace) {
+	 brw_set_predicate_control_flag_value(p, pc_coord_replace);
+	 /* Caculate 1.0/PointWidth */
+	 brw_math(&c->func,
+		  c->tmp,
+		  BRW_MATH_FUNCTION_INV,
+		  BRW_MATH_SATURATE_NONE,
+		  0,
+		  c->dx0,
+		  BRW_MATH_DATA_SCALAR,
+		  BRW_MATH_PRECISION_FULL);
+
+	 brw_set_access_mode(p, BRW_ALIGN_16);
+
+	 /* dA/dx, dA/dy */
+	 brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
+	 brw_MOV(p, c->m2Cy, brw_imm_f(0.0));
+	 brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp);
+	 if (c->key.sprite_origin_lower_left) {
+	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp));
+	 } else {
+	    brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp);
+	 }
+
+	 /* attribute constant offset */
+	 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	 if (c->key.sprite_origin_lower_left) {
+	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0));
+	 } else {
+	    brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
+	 }
+
+	 brw_set_access_mode(p, BRW_ALIGN_1);
+      }
+
+      if (pc & ~pc_coord_replace) {
+	 brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
+	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+	 brw_MOV(p, c->m3C0, a0); /* constant value */
+      }
+
+
+      brw_set_predicate_control_flag_value(p, pc);
+      /* Copy m0..m3 to URB. */
+      brw_urb_WRITE(p,
+		    brw_null_reg(),
+		    0,
+		    brw_vec8_grf(0, 0),
+		    0, 	/* allocate */
+		    1,	/* used */
+		    4, 	/* msg len */
+		    0,	/* response len */
+		    last, 	/* eot */
+		    last, 	/* writes complete */
+		    i*4,	/* urb destination offset */
+		    BRW_URB_SWIZZLE_TRANSPOSE);
+   }
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 1;
+   
+   if (allocate)
+      alloc_regs(c);
+
+   copy_z_inv_w(c);
+
+   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+            
+      if (pc_persp)
+      {				
+	 /* This seems odd as the values are all constant, but the
+	  * fragment shader will be expecting it:
+	  */
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+      }
+
+
+      /* The delta values are always zero, just send the starting
+       * coordinate.  Again, this is to fit in with the interpolation
+       * code in the fragment shader.
+       */
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+
+	 brw_MOV(p, c->m3C0, a0); /* constant value */
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE);
+      }
+   }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); 
+   struct brw_reg primmask;
+   struct brw_instruction *jmp;
+   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+   
+   GLuint saveflag;
+
+   c->nr_verts = 3;
+   alloc_regs(c);
+
+   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+   brw_MOV(p, primmask, brw_imm_ud(1));
+   brw_SHL(p, primmask, primmask, payload_prim);
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+					       (1<<_3DPRIM_TRISTRIP) |
+					       (1<<_3DPRIM_TRIFAN) |
+					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
+					       (1<<_3DPRIM_POLYGON) |
+					       (1<<_3DPRIM_RECTLIST) |
+					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_tri_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine, so must
+       * restore the flag which is changed when building
+       * the subroutine. fix #13240
+       */
+   }
+   brw_land_fwd_jump(p, jmp);
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+					       (1<<_3DPRIM_LINESTRIP) |
+					       (1<<_3DPRIM_LINELOOP) |
+					       (1<<_3DPRIM_LINESTRIP_CONT) |
+					       (1<<_3DPRIM_LINESTRIP_BF) |
+					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_line_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine */
+   }
+   brw_land_fwd_jump(p, jmp); 
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_point_sprite_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+   }
+   brw_land_fwd_jump(p, jmp); 
+
+   brw_emit_point_setup( c, GL_FALSE );
+}
+
+
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
new file mode 100644
index 0000000000..e290ca92f6
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
@@ -0,0 +1,375 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+static void upload_sf_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   struct brw_sf_viewport sfv;
+   GLfloat y_scale, y_bias;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   memset(&sfv, 0, sizeof(sfv));
+
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   }
+   else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+
+   /* _NEW_VIEWPORT */
+
+   sfv.viewport.m00 = v[MAT_SX];
+   sfv.viewport.m11 = v[MAT_SY] * y_scale;
+   sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+   sfv.viewport.m30 = v[MAT_TX];
+   sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+
+   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
+    * for DrawBuffer->_[XY]{min,max}
+    */
+
+   /* The scissor only needs to handle the intersection of drawable
+    * and scissor rect, since there are no longer cliprects for shared
+    * buffers with DRI2.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+
+   if (ctx->DrawBuffer->_Xmin == ctx->DrawBuffer->_Xmax ||
+       ctx->DrawBuffer->_Ymin == ctx->DrawBuffer->_Ymax) {
+      /* If the scissor was out of bounds and got clamped to 0
+       * width/height at the bounds, the subtraction of 1 from
+       * maximums could produce a negative number and thus not clip
+       * anything.  Instead, just provide a min > max scissor inside
+       * the bounds, which produces the expected no rendering.
+       */
+      sfv.scissor.xmin = 1;
+      sfv.scissor.xmax = 0;
+      sfv.scissor.ymin = 1;
+      sfv.scissor.ymax = 0;
+   } else if (render_to_fbo) {
+      /* texmemory: Y=0=bottom */
+      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
+      sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+   }
+   else {
+      /* memory: Y=0=top */
+      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+      sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+   }
+
+   drm_intel_bo_unreference(brw->sf.vp_bo);
+   brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv));
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+   .dirty = {
+      .mesa  = (_NEW_VIEWPORT | 
+		_NEW_SCISSOR |
+		_NEW_BUFFERS),
+      .brw   = 0,
+      .cache = 0
+   },
+   .prepare = upload_sf_vp
+};
+
+struct brw_sf_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+
+   unsigned int nr_urb_entries, urb_size, sfsize;
+
+   GLenum front_face, cull_face;
+   unsigned pv_first:1;
+   unsigned scissor:1;
+   unsigned line_smooth:1;
+   unsigned point_sprite:1;
+   unsigned point_attenuated:1;
+   unsigned render_to_fbo:1;
+   float line_width;
+   float point_size;
+};
+
+static void
+sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_SF_PROG */
+   key->total_grf = brw->sf.prog_data->total_grf;
+   key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_sf_entries;
+   key->urb_size = brw->urb.vsize;
+   key->sfsize = brw->urb.sfsize;
+
+   key->scissor = ctx->Scissor.Enabled;
+   key->front_face = ctx->Polygon.FrontFace;
+
+   if (ctx->Polygon.CullFlag)
+      key->cull_face = ctx->Polygon.CullFaceMode;
+   else
+      key->cull_face = GL_NONE;
+
+   key->line_width = ctx->Line.Width;
+   key->line_smooth = ctx->Line.SmoothFlag;
+
+   key->point_sprite = ctx->Point.PointSprite;
+   key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+   key->point_attenuated = ctx->Point._Attenuated;
+
+   /* _NEW_LIGHT */
+   key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
+
+   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+}
+
+static drm_intel_bo *
+sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
+			drm_intel_bo **reloc_bufs)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_sf_unit_state sf;
+   drm_intel_bo *bo;
+   int chipset_max_threads;
+   memset(&sf, 0, sizeof(sf));
+
+   sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+
+   sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+   sf.thread3.dispatch_grf_start_reg = 3;
+
+   if (intel->gen == 5)
+       sf.thread3.urb_entry_read_offset = 3;
+   else
+       sf.thread3.urb_entry_read_offset = 1;
+
+   sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+   sf.thread4.nr_urb_entries = key->nr_urb_entries;
+   sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+
+   /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or
+    * 48 (Ironlake) threads.
+    */
+   if (intel->gen == 5)
+      chipset_max_threads = 48;
+   else
+      chipset_max_threads = 24;
+
+   sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      sf.thread4.max_threads = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      sf.thread4.stats_enable = 1;
+
+   /* CACHE_NEW_SF_VP */
+   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+
+   sf.sf5.viewport_transform = 1;
+
+   /* _NEW_SCISSOR */
+   if (key->scissor)
+      sf.sf6.scissor = 1;
+
+   /* _NEW_POLYGON */
+   if (key->front_face == GL_CCW)
+      sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+   else
+      sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+   /* The viewport is inverted for rendering to a FBO, and that inverts
+    * polygon front/back orientation.
+    */
+   sf.sf5.front_winding ^= key->render_to_fbo;
+
+   switch (key->cull_face) {
+   case GL_FRONT:
+      sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+      break;
+   case GL_BACK:
+      sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+      break;
+   case GL_FRONT_AND_BACK:
+      sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+      break;
+   case GL_NONE:
+      sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   /* _NEW_LINE */
+   /* XXX use ctx->Const.Min/MaxLineWidth here */
+   sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
+
+   sf.sf6.line_endcap_aa_region_width = 1;
+   if (key->line_smooth)
+      sf.sf6.aa_enable = 1;
+   else if (sf.sf6.line_width <= 0x2)
+       sf.sf6.line_width = 0;
+
+   /* _NEW_BUFFERS */
+   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   if (!key->render_to_fbo) {
+      /* Rendering to an OpenGL window */
+      sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+   }
+   else {
+      /* If rendering to an FBO, the pixel coordinate system is
+       * inverted with respect to the normal OpenGL coordinate
+       * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
+       * But this value is listed as "Reserved, but not seen as useful"
+       * in Intel documentation (page 212, "Point Rasterization Rule",
+       * section 7.4 "SF Pipeline State Summary", of document
+       * "Intel® 965 Express Chipset Family and Intel® G35 Express
+       * Chipset Graphics Controller Programmer's Reference Manual,
+       * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+       * available at 
+       *     http://intellinuxgraphics.org/documentation.html
+       * at the time of this writing).
+       *
+       * It does work on at least some devices, if not all;
+       * if devices that don't support it can be identified,
+       * the likely failure case is that points are rasterized
+       * incorrectly, which is no worse than occurs without
+       * the value, so we're using it here.
+       */
+      sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+   }
+   /* XXX clamp max depends on AA vs. non-AA */
+
+   /* _NEW_POINT */
+   sf.sf7.sprite_point = key->point_sprite;
+   sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
+   sf.sf7.use_point_size_state = !key->point_attenuated;
+   sf.sf7.aa_line_distance_mode = 0;
+
+   /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+    */
+   if (!key->pv_first) {
+      sf.sf7.trifan_pv = 2;
+      sf.sf7.linestrip_pv = 1;
+      sf.sf7.tristrip_pv = 2;
+   } else {
+      sf.sf7.trifan_pv = 1;
+      sf.sf7.linestrip_pv = 0;
+      sf.sf7.tristrip_pv = 0;
+   }
+   sf.sf7.line_last_pixel_enable = 0;
+
+   /* Set bias for OpenGL rasterization rules:
+    */
+   sf.sf6.dest_org_vbias = 0x8;
+   sf.sf6.dest_org_hbias = 0x8;
+
+   bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+			 key, sizeof(*key),
+			 reloc_bufs, 2,
+			 &sf, sizeof(sf));
+
+   /* STATE_PREFETCH command description describes this state as being
+    * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
+    */
+   /* Emit SF program relocation */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_sf_unit_state, thread0),
+			   brw->sf.prog_bo, sf.thread0.grf_reg_count << 1,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   /* Emit SF viewport relocation */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_sf_unit_state, sf5),
+			   brw->sf.vp_bo, (sf.sf5.front_winding |
+					   (sf.sf5.viewport_transform << 1)),
+			   I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   return bo;
+}
+
+static void upload_sf_unit( struct brw_context *brw )
+{
+   struct brw_sf_unit_key key;
+   drm_intel_bo *reloc_bufs[2];
+
+   sf_unit_populate_key(brw, &key);
+
+   reloc_bufs[0] = brw->sf.prog_bo;
+   reloc_bufs[1] = brw->sf.vp_bo;
+
+   drm_intel_bo_unreference(brw->sf.state_bo);
+   brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
+				       &key, sizeof(key),
+				       reloc_bufs, 2,
+				       NULL);
+   if (brw->sf.state_bo == NULL) {
+      brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
+   }
+}
+
+const struct brw_tracked_state brw_sf_unit = {
+   .dirty = {
+      .mesa  = (_NEW_POLYGON | 
+		_NEW_LIGHT |
+		_NEW_LINE | 
+		_NEW_POINT | 
+		_NEW_SCISSOR |
+		_NEW_BUFFERS),
+      .brw   = BRW_NEW_URB_FENCE,
+      .cache = (CACHE_NEW_SF_VP |
+		CACHE_NEW_SF_PROG)
+   },
+   .prepare = upload_sf_unit,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_state.c b/src/mesa/drivers/dri/i965/brw_state.c
new file mode 100644
index 0000000000..1e77e427d3
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_state.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+
+void
+brw_enable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (cap) {
+   case GL_DEPTH_CLAMP:
+      brw_update_cc_vp(brw);
+      break;
+   }
+}
+
+void
+brw_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   if (ctx->Transform.DepthClamp)
+      brw_update_cc_vp(brw);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
new file mode 100644
index 0000000000..40eece276b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -0,0 +1,186 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+    
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "brw_context.h"
+
+static INLINE void
+brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo)
+{
+   assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+
+   if (bo != NULL) {
+      drm_intel_bo_reference(bo);
+      brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+   }
+};
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_check_fallback;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_vs_constants;
+const struct brw_tracked_state brw_wm_constants;
+const struct brw_tracked_state brw_constant_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_aa_line_parameters;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple_offset;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_binding_table;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
+const struct brw_tracked_state gen6_binding_table_pointers;
+const struct brw_tracked_state gen6_blend_state;
+const struct brw_tracked_state gen6_cc_state_pointers;
+const struct brw_tracked_state gen6_clip_state;
+const struct brw_tracked_state gen6_clip_vp;
+const struct brw_tracked_state gen6_color_calc_state;
+const struct brw_tracked_state gen6_depth_stencil_state;
+const struct brw_tracked_state gen6_gs_state;
+const struct brw_tracked_state gen6_sampler_state;
+const struct brw_tracked_state gen6_scissor_state;
+const struct brw_tracked_state gen6_sf_state;
+const struct brw_tracked_state gen6_sf_vp;
+const struct brw_tracked_state gen6_urb;
+const struct brw_tracked_state gen6_viewport_state;
+const struct brw_tracked_state gen6_vs_state;
+const struct brw_tracked_state gen6_wm_state;
+
+/***********************************************************************
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+void brw_clear_validated_bos(struct brw_context *brw);
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+drm_intel_bo *brw_cache_data(struct brw_cache *cache,
+		       enum brw_cache_id cache_id,
+		       const void *data,
+		       GLuint size);
+
+drm_intel_bo *brw_upload_cache(struct brw_cache *cache,
+			       enum brw_cache_id cache_id,
+			       const void *key,
+			       GLuint key_sz,
+			       drm_intel_bo **reloc_bufs,
+			       GLuint nr_reloc_bufs,
+			       const void *data,
+			       GLuint data_sz);
+
+drm_intel_bo *brw_upload_cache_with_auxdata(struct brw_cache *cache,
+					    enum brw_cache_id cache_id,
+					    const void *key,
+					    GLuint key_sz,
+					    drm_intel_bo **reloc_bufs,
+					    GLuint nr_reloc_bufs,
+					    const void *data,
+					    GLuint data_sz,
+					    const void *aux,
+					    GLuint aux_sz,
+					    void *aux_return);
+
+drm_intel_bo *brw_search_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_size,
+			  drm_intel_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs,
+			  void *aux_return);
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)))
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+				   const void *data,
+				   GLuint sz );
+void brw_destroy_batch_cache( struct brw_context *brw );
+void brw_clear_batch_cache( struct brw_context *brw );
+void *brw_state_batch(struct brw_context *brw,
+		      int size,
+		      int alignment,
+		      drm_intel_bo **out_bo,
+		      uint32_t *out_offset);
+
+/* brw_wm_surface_state.c */
+void brw_create_constant_surface(struct brw_context *brw,
+				 drm_intel_bo *bo,
+				 int width,
+				 drm_intel_bo **out_bo,
+				 uint32_t *out_offset);
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c
new file mode 100644
index 0000000000..be3989eb7d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_state_batch.c
@@ -0,0 +1,148 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+     
+
+
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "main/imports.h"
+
+
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+				   const void *data,
+				   GLuint sz )
+{
+   struct brw_cached_batch_item *item = brw->cached_batch_items;
+   struct header *newheader = (struct header *)data;
+
+   if (brw->emit_state_always) {
+      intel_batchbuffer_data(brw->intel.batch, data, sz);
+      return GL_TRUE;
+   }
+
+   while (item) {
+      if (item->header->opcode == newheader->opcode) {
+	 if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+	    return GL_FALSE;
+	 if (item->sz != sz) {
+	    free(item->header);
+	    item->header = malloc(sz);
+	    item->sz = sz;
+	 }
+	 goto emit;
+      }
+      item = item->next;
+   }
+
+   assert(!item);
+   item = CALLOC_STRUCT(brw_cached_batch_item);
+   item->header = malloc(sz);
+   item->sz = sz;
+   item->next = brw->cached_batch_items;
+   brw->cached_batch_items = item;
+
+ emit:
+   memcpy(item->header, newheader, sz);
+   intel_batchbuffer_data(brw->intel.batch, data, sz);
+   return GL_TRUE;
+}
+
+void brw_clear_batch_cache( struct brw_context *brw )
+{
+   struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+   while (item) {
+      struct brw_cached_batch_item *next = item->next;
+      free((void *)item->header);
+      free(item);
+      item = next;
+   }
+
+   brw->cached_batch_items = NULL;
+}
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+   brw_clear_batch_cache(brw);
+}
+
+/**
+ * Allocates a block of space in the batchbuffer for indirect state.
+ *
+ * We don't want to allocate separate BOs for every bit of indirect
+ * state in the driver.  It means overallocating by a significant
+ * margin (4096 bytes, even if the object is just a 20-byte surface
+ * state), and more buffers to walk and count for aperture size checking.
+ *
+ * However, due to the restrictions inposed by the aperture size
+ * checking performance hacks, we can't have the batch point at a
+ * separate indirect state buffer, because once the batch points at
+ * it, no more relocations can be added to it.  So, we sneak these
+ * buffers in at the top of the batchbuffer.
+ */
+void *
+brw_state_batch(struct brw_context *brw,
+		int size,
+		int alignment,
+		drm_intel_bo **out_bo,
+		uint32_t *out_offset)
+{
+   struct intel_batchbuffer *batch = brw->intel.batch;
+   uint32_t offset;
+
+   assert(size < batch->buf->size);
+   offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+
+   /* If allocating from the top would wrap below the batchbuffer, or
+    * if the batch's used space (plus the reserved pad) collides with our
+    * space, then flush and try again.
+    */
+   if (batch->state_batch_offset < size ||
+       offset < batch->ptr - batch->map + batch->reserved_space) {
+      intel_batchbuffer_flush(batch);
+      offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment);
+   }
+
+   batch->state_batch_offset = offset;
+
+   if (*out_bo != batch->buf) {
+      drm_intel_bo_unreference(*out_bo);
+      drm_intel_bo_reference(batch->buf);
+      *out_bo = batch->buf;
+   }
+
+   *out_offset = offset;
+   return batch->map + offset;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
new file mode 100644
index 0000000000..b31d84953a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -0,0 +1,471 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965.  The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented.  Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data and let it
+ * regenerate for the next rendering operation.
+ *
+ * The reloc_buf pointers need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match.  The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
+
+#include "main/imports.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "brw_wm.h"
+
+
+static GLuint
+hash_key(struct brw_cache_item *item)
+{
+   GLuint *ikey = (GLuint *)item->key;
+   GLuint hash = item->cache_id, i;
+
+   assert(item->key_size % 4 == 0);
+
+   /* I'm sure this can be improved on:
+    */
+   for (i = 0; i < item->key_size/4; i++) {
+      hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
+
+   /* Include the BO pointers as key data as well */
+   ikey = (GLuint *)item->reloc_bufs;
+   for (i = 0; i < item->nr_reloc_bufs * sizeof(drm_intel_bo *) / 4; i++) {
+      hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
+
+   return hash;
+}
+
+
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+		  drm_intel_bo *bo)
+{
+   if (bo == cache->last_bo[cache_id])
+      return; /* no change */
+
+   drm_intel_bo_unreference(cache->last_bo[cache_id]);
+   cache->last_bo[cache_id] = bo;
+   drm_intel_bo_reference(cache->last_bo[cache_id]);
+   cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+static int
+brw_cache_item_equals(const struct brw_cache_item *a,
+		      const struct brw_cache_item *b)
+{
+   return a->cache_id == b->cache_id &&
+      a->hash == b->hash &&
+      a->key_size == b->key_size &&
+      (memcmp(a->key, b->key, a->key_size) == 0) &&
+      a->nr_reloc_bufs == b->nr_reloc_bufs &&
+      (memcmp(a->reloc_bufs, b->reloc_bufs,
+	      a->nr_reloc_bufs * sizeof(drm_intel_bo *)) == 0);
+}
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, GLuint hash,
+	     struct brw_cache_item *lookup)
+{
+   struct brw_cache_item *c;
+
+#if 0
+   int bucketcount = 0;
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next)
+      bucketcount++;
+
+   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+	   cache->size, bucketcount, cache->n_items);
+#endif
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next) {
+      if (brw_cache_item_equals(lookup, c))
+	 return c;
+   }
+
+   return NULL;
+}
+
+
+static void
+rehash(struct brw_cache *cache)
+{
+   struct brw_cache_item **items;
+   struct brw_cache_item *c, *next;
+   GLuint size, i;
+
+   size = cache->size * 3;
+   items = (struct brw_cache_item**) calloc(1, size * sizeof(*items));
+
+   for (i = 0; i < cache->size; i++)
+      for (c = cache->items[i]; c; c = next) {
+	 next = c->next;
+	 c->next = items[c->hash % size];
+	 items[c->hash % size] = c;
+      }
+
+   FREE(cache->items);
+   cache->items = items;
+   cache->size = size;
+}
+
+
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+drm_intel_bo *
+brw_search_cache(struct brw_cache *cache,
+                 enum brw_cache_id cache_id,
+                 const void *key,
+                 GLuint key_size,
+                 drm_intel_bo **reloc_bufs, GLuint nr_reloc_bufs,
+                 void *aux_return)
+{
+   struct brw_cache_item *item;
+   struct brw_cache_item lookup;
+   GLuint hash;
+
+   lookup.cache_id = cache_id;
+   lookup.key = key;
+   lookup.key_size = key_size;
+   lookup.reloc_bufs = reloc_bufs;
+   lookup.nr_reloc_bufs = nr_reloc_bufs;
+   hash = hash_key(&lookup);
+   lookup.hash = hash;
+
+   item = search_cache(cache, hash, &lookup);
+
+   if (item == NULL)
+      return NULL;
+
+   if (aux_return)
+      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+   update_cache_last(cache, cache_id, item->bo);
+
+   drm_intel_bo_reference(item->bo);
+   return item->bo;
+}
+
+
+drm_intel_bo *
+brw_upload_cache_with_auxdata(struct brw_cache *cache,
+			      enum brw_cache_id cache_id,
+			      const void *key,
+			      GLuint key_size,
+			      drm_intel_bo **reloc_bufs,
+			      GLuint nr_reloc_bufs,
+			      const void *data,
+			      GLuint data_size,
+			      const void *aux,
+			      GLuint aux_size,
+			      void *aux_return)
+{
+   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+   GLuint hash;
+   GLuint relocs_size = nr_reloc_bufs * sizeof(drm_intel_bo *);
+   void *tmp;
+   drm_intel_bo *bo;
+   int i;
+
+   item->cache_id = cache_id;
+   item->key = key;
+   item->key_size = key_size;
+   item->reloc_bufs = reloc_bufs;
+   item->nr_reloc_bufs = nr_reloc_bufs;
+   hash = hash_key(item);
+   item->hash = hash;
+
+   /* Create the buffer object to contain the data */
+   bo = drm_intel_bo_alloc(cache->brw->intel.bufmgr,
+			   cache->name[cache_id], data_size, 1 << 6);
+
+
+   /* Set up the memory containing the key, aux_data, and reloc_bufs */
+   tmp = malloc(key_size + aux_size + relocs_size);
+
+   memcpy(tmp, key, key_size);
+   memcpy(tmp + key_size, aux, aux_size);
+   memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
+   for (i = 0; i < nr_reloc_bufs; i++) {
+      if (reloc_bufs[i] != NULL)
+	 drm_intel_bo_reference(reloc_bufs[i]);
+   }
+
+   item->key = tmp;
+   item->reloc_bufs = tmp + key_size + aux_size;
+
+   item->bo = bo;
+   drm_intel_bo_reference(bo);
+
+   if (cache->n_items > cache->size * 1.5)
+      rehash(cache);
+
+   hash %= cache->size;
+   item->next = cache->items[hash];
+   cache->items[hash] = item;
+   cache->n_items++;
+
+   if (aux_return) {
+      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+   }
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      printf("upload %s: %d bytes to cache id %d\n",
+		   cache->name[cache_id],
+		   data_size, cache_id);
+
+   /* Copy data to the buffer */
+   drm_intel_bo_subdata(bo, 0, data_size, data);
+
+   update_cache_last(cache, cache_id, bo);
+
+   return bo;
+}
+
+drm_intel_bo *
+brw_upload_cache(struct brw_cache *cache,
+		 enum brw_cache_id cache_id,
+		 const void *key,
+		 GLuint key_size,
+		 drm_intel_bo **reloc_bufs,
+		 GLuint nr_reloc_bufs,
+		 const void *data,
+		 GLuint data_size)
+{
+   return brw_upload_cache_with_auxdata(cache, cache_id,
+					key, key_size,
+					reloc_bufs, nr_reloc_bufs,
+					data, data_size,
+					NULL, 0,
+					NULL);
+}
+
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ *
+ * If aux data is involved, use search/upload instead.
+
+ */
+drm_intel_bo *
+brw_cache_data(struct brw_cache *cache,
+	       enum brw_cache_id cache_id,
+	       const void *data,
+	       GLuint data_size)
+{
+   drm_intel_bo *bo;
+   struct brw_cache_item *item, lookup;
+   GLuint hash;
+
+   lookup.cache_id = cache_id;
+   lookup.key = data;
+   lookup.key_size = data_size;
+   lookup.reloc_bufs = NULL;
+   lookup.nr_reloc_bufs = 0;
+   hash = hash_key(&lookup);
+   lookup.hash = hash;
+
+   item = search_cache(cache, hash, &lookup);
+   if (item) {
+      update_cache_last(cache, cache_id, item->bo);
+      drm_intel_bo_reference(item->bo);
+      return item->bo;
+   }
+
+   bo = brw_upload_cache(cache, cache_id,
+			 data, data_size,
+			 NULL, 0,
+			 data, data_size);
+
+   return bo;
+}
+
+enum pool_type {
+   DW_SURFACE_STATE,
+   DW_GENERAL_STATE
+};
+
+
+static void
+brw_init_cache_id(struct brw_cache *cache,
+                  const char *name,
+                  enum brw_cache_id id)
+{
+   cache->name[id] = strdup(name);
+}
+
+
+static void
+brw_init_non_surface_cache(struct brw_context *brw)
+{
+   struct brw_cache *cache = &brw->cache;
+
+   cache->brw = brw;
+
+   cache->size = 7;
+   cache->n_items = 0;
+   cache->items = (struct brw_cache_item **)
+      calloc(1, cache->size * sizeof(struct brw_cache_item));
+
+   brw_init_cache_id(cache, "CC_VP", BRW_CC_VP);
+   brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT);
+   brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG);
+   brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR);
+   brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER);
+   brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT);
+   brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG);
+   brw_init_cache_id(cache, "SF_VP", BRW_SF_VP);
+
+   brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT);
+
+   brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT);
+
+   brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG);
+
+   brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT);
+
+   brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG);
+   brw_init_cache_id(cache, "CLIP_VP", BRW_CLIP_VP);
+
+   brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT);
+
+   brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG);
+   brw_init_cache_id(cache, "BLEND_STATE", BRW_BLEND_STATE);
+   brw_init_cache_id(cache, "COLOR_CALC_STATE", BRW_COLOR_CALC_STATE);
+   brw_init_cache_id(cache, "DEPTH_STENCIL_STATE", BRW_DEPTH_STENCIL_STATE);
+}
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+   brw_init_non_surface_cache(brw);
+}
+
+
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+   struct brw_cache_item *c, *next;
+   GLuint i;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      printf("%s\n", __FUNCTION__);
+
+   for (i = 0; i < cache->size; i++) {
+      for (c = cache->items[i]; c; c = next) {
+	 int j;
+
+	 next = c->next;
+	 for (j = 0; j < c->nr_reloc_bufs; j++)
+	    drm_intel_bo_unreference(c->reloc_bufs[j]);
+	 drm_intel_bo_unreference(c->bo);
+	 free((void *)c->key);
+	 free(c);
+      }
+      cache->items[i] = NULL;
+   }
+
+   cache->n_items = 0;
+
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+}
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+   if (INTEL_DEBUG & DEBUG_STATE)
+      printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
+   /* un-tuned guess.  Each object is generally a page, so 1000 of them is 4 MB of
+    * state cache.
+    */
+   if (brw->cache.n_items > 1000)
+      brw_clear_cache(brw, &brw->cache);
+}
+
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+   GLuint i;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      printf("%s\n", __FUNCTION__);
+
+   brw_clear_cache(brw, cache);
+   for (i = 0; i < BRW_MAX_CACHE; i++) {
+      drm_intel_bo_unreference(cache->last_bo[i]);
+      free(cache->name[i]);
+   }
+   free(cache->items);
+   cache->items = NULL;
+   cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+   brw_destroy_cache(brw, &brw->cache);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c
new file mode 100644
index 0000000000..d410861bdf
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+/**
+ * Prints out a header, the contents, and the message associated with
+ * the hardware state data given.
+ *
+ * \param name Name of the state object
+ * \param data Pointer to the base of the state object
+ * \param hw_offset Hardware offset of the base of the state data.
+ * \param index Index of the DWORD being output.
+ */
+static void
+state_out(const char *name, void *data, uint32_t hw_offset, int index,
+	  char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
+	    name, hw_offset + index * 4, ((uint32_t *)data)[index]);
+    va_start(va, fmt);
+    vfprintf(stderr, fmt, va);
+    va_end(va);
+}
+
+/** Generic, undecoded state buffer debug printout */
+static void
+state_struct_out(const char *name, drm_intel_bo *buffer, unsigned int state_size)
+{
+   int i;
+
+   if (buffer == NULL)
+      return;
+
+   drm_intel_bo_map(buffer, GL_FALSE);
+   for (i = 0; i < state_size / 4; i++) {
+      state_out(name, buffer->virtual, buffer->offset, i,
+		"dword %d\n", i);
+   }
+   drm_intel_bo_unmap(buffer);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
+}
+
+static const char *
+get_965_surface_format(unsigned int surface_format)
+{
+    switch (surface_format) {
+    case 0x000: return "r32g32b32a32_float";
+    case 0x0c1: return "b8g8r8a8_unorm";
+    case 0x100: return "b5g6r5_unorm";
+    case 0x102: return "b5g5r5a1_unorm";
+    case 0x104: return "b4g4r4a4_unorm";
+    default: return "unknown";
+    }
+}
+
+static void dump_wm_surface_state(struct brw_context *brw)
+{
+   int i;
+
+   for (i = 0; i < brw->wm.nr_surfaces; i++) {
+      drm_intel_bo *surf_bo = brw->wm.surf_bo[i];
+      unsigned int surfoff;
+      struct brw_surface_state *surf;
+      char name[20];
+
+      if (surf_bo == NULL) {
+	 fprintf(stderr, "  WM SS%d: NULL\n", i);
+	 continue;
+      }
+      drm_intel_bo_map(surf_bo, GL_FALSE);
+      surfoff = surf_bo->offset + brw->wm.surf_offset[i];
+      surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]);
+
+      sprintf(name, "WM SS%d", i);
+      state_out(name, surf, surfoff, 0, "%s %s\n",
+		get_965_surfacetype(surf->ss0.surface_type),
+		get_965_surface_format(surf->ss0.surface_format));
+      state_out(name, surf, surfoff, 1, "offset\n");
+      state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
+		surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
+      state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
+		surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
+      state_out(name, surf, surfoff, 4, "mip base %d\n",
+		surf->ss4.min_lod);
+      state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
+		surf->ss5.x_offset, surf->ss5.y_offset);
+
+      drm_intel_bo_unmap(surf_bo);
+   }
+}
+
+static void dump_sf_viewport_state(struct brw_context *brw)
+{
+   const char *name = "SF VP";
+   struct brw_sf_viewport *vp;
+   uint32_t vp_off;
+
+   if (brw->sf.vp_bo == NULL)
+      return;
+
+   drm_intel_bo_map(brw->sf.vp_bo, GL_FALSE);
+
+   vp = brw->sf.vp_bo->virtual;
+   vp_off = brw->sf.vp_bo->offset;
+
+   state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
+   state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
+   state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22);
+   state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30);
+   state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31);
+   state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32);
+
+   state_out(name, vp, vp_off, 6, "top left = %d,%d\n",
+	     vp->scissor.xmin, vp->scissor.ymin);
+   state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
+	     vp->scissor.xmax, vp->scissor.ymax);
+
+   drm_intel_bo_unmap(brw->sf.vp_bo);
+}
+
+static void brw_debug_prog(const char *name, drm_intel_bo *prog)
+{
+   unsigned int i;
+   uint32_t *data;
+
+   if (prog == NULL)
+      return;
+
+   drm_intel_bo_map(prog, GL_FALSE);
+
+   data = prog->virtual;
+
+   for (i = 0; i < prog->size / 4 / 4; i++) {
+      fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+	      name, (unsigned int)prog->offset + i * 4 * 4,
+	      data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+      /* Stop at the end of the program.  It'd be nice to keep track of the actual
+       * intended program size instead of guessing like this.
+       */
+      if (data[i * 4 + 0] == 0 &&
+	  data[i * 4 + 1] == 0 &&
+	  data[i * 4 + 2] == 0 &&
+	  data[i * 4 + 3] == 0)
+	 break;
+   }
+
+   drm_intel_bo_unmap(prog);
+}
+
+
+/**
+ * Print additional debug information associated with the batchbuffer
+ * when DEBUG_BATCH is set.
+ *
+ * For 965, this means mapping the state buffers that would have been referenced
+ * by the batchbuffer and dumping them.
+ *
+ * The buffer offsets printed rely on the buffer containing the last offset
+ * it was validated at.
+ */
+void brw_debug_batch(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
+   dump_wm_surface_state(brw);
+
+   state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
+   brw_debug_prog("VS prog", brw->vs.prog_bo);
+
+   state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
+   brw_debug_prog("GS prog", brw->gs.prog_bo);
+
+   state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
+   dump_sf_viewport_state(brw);
+   brw_debug_prog("SF prog", brw->sf.prog_bo);
+
+   state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
+   brw_debug_prog("WM prog", brw->wm.prog_bo);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
new file mode 100644
index 0000000000..f92a19c2aa
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -0,0 +1,504 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+       
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_chipset.h"
+
+/* This is used to initialize brw->state.atoms[].  We could use this
+ * list directly except for a single atom, brw_constant_buffer, which
+ * has a .dirty value which changes according to the parameters of the
+ * current fragment and vertex programs, and so cannot be a static
+ * value.
+ */
+static const struct brw_tracked_state *gen4_atoms[] =
+{
+   &brw_check_fallback,
+
+   &brw_wm_input_sizes,
+   &brw_vs_prog,
+   &brw_gs_prog, 
+   &brw_clip_prog, 
+   &brw_sf_prog,
+   &brw_wm_prog,
+
+   /* Once all the programs are done, we know how large urb entry
+    * sizes need to be and can decide if we need to change the urb
+    * layout.
+    */
+   &brw_curbe_offsets,
+   &brw_recalculate_urb_fence,
+
+   &brw_cc_unit,
+
+   &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
+   &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+
+   &brw_vs_surfaces,		/* must do before unit */
+   &brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   &brw_wm_surfaces,		/* must do before samplers and unit */
+   &brw_wm_binding_table,
+   &brw_wm_samplers,
+
+   &brw_wm_unit,
+   &brw_sf_vp,
+   &brw_sf_unit,
+   &brw_vs_unit,		/* always required, enabled or not */
+   &brw_clip_unit,
+   &brw_gs_unit,  
+
+   /* Command packets:
+    */
+   &brw_invarient_state,
+   &brw_state_base_address,
+
+   &brw_binding_table_pointers,
+   &brw_blend_constant_color,
+
+   &brw_depthbuffer,
+
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+
+   &brw_psp_urb_cbs,
+
+   &brw_drawing_rect,
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+
+   &brw_constant_buffer
+};
+
+const struct brw_tracked_state *gen6_atoms[] =
+{
+   &brw_check_fallback,
+
+   &brw_wm_input_sizes,
+   &brw_vs_prog,
+   &brw_gs_prog,
+   &brw_wm_prog,
+
+   &gen6_clip_vp,
+   &gen6_sf_vp,
+
+   /* Command packets: */
+   &brw_invarient_state,
+
+   &gen6_viewport_state,	/* must do after *_vp stages */
+
+   &gen6_urb,
+   &gen6_blend_state,		/* must do before cc unit */
+   &gen6_color_calc_state,	/* must do before cc unit */
+   &gen6_depth_stencil_state,	/* must do before cc unit */
+   &gen6_cc_state_pointers,
+
+   &brw_vs_constants, /* Before vs_surfaces and constant_buffer */
+   &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+
+   &brw_vs_surfaces,		/* must do before unit */
+   &brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   &brw_wm_surfaces,		/* must do before samplers and unit */
+   &brw_wm_binding_table,
+
+   &brw_wm_samplers,
+   &gen6_sampler_state,
+
+   &gen6_vs_state,
+   &gen6_gs_state,
+   &gen6_clip_state,
+   &gen6_sf_state,
+   &gen6_wm_state,
+
+   &gen6_scissor_state,
+
+   &brw_state_base_address,
+
+   &gen6_binding_table_pointers,
+
+   &brw_depthbuffer,
+
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+
+   &brw_drawing_rect,
+
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+};
+
+void brw_init_state( struct brw_context *brw )
+{
+   brw_init_caches(brw);
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+   brw_destroy_caches(brw);
+   brw_destroy_batch_cache(brw);
+}
+
+/***********************************************************************
+ */
+
+static GLboolean check_state( const struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   return ((a->mesa & b->mesa) ||
+	   (a->brw & b->brw) ||
+	   (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   a->mesa |= b->mesa;
+   a->brw |= b->brw;
+   a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+			     const struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   result->mesa = a->mesa ^ b->mesa;
+   result->brw = a->brw ^ b->brw;
+   result->cache = a->cache ^ b->cache;
+}
+
+void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+   int i;
+
+   /* Clear the last round of validated bos */
+   for (i = 0; i < brw->state.validated_bo_count; i++) {
+      drm_intel_bo_unreference(brw->state.validated_bos[i]);
+      brw->state.validated_bos[i] = NULL;
+   }
+   brw->state.validated_bo_count = 0;
+}
+
+struct dirty_bit_map {
+   uint32_t bit;
+   char *name;
+   uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+   DEFINE_BIT(_NEW_MODELVIEW),
+   DEFINE_BIT(_NEW_PROJECTION),
+   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
+   DEFINE_BIT(_NEW_COLOR_MATRIX),
+   DEFINE_BIT(_NEW_ACCUM),
+   DEFINE_BIT(_NEW_COLOR),
+   DEFINE_BIT(_NEW_DEPTH),
+   DEFINE_BIT(_NEW_EVAL),
+   DEFINE_BIT(_NEW_FOG),
+   DEFINE_BIT(_NEW_HINT),
+   DEFINE_BIT(_NEW_LIGHT),
+   DEFINE_BIT(_NEW_LINE),
+   DEFINE_BIT(_NEW_PIXEL),
+   DEFINE_BIT(_NEW_POINT),
+   DEFINE_BIT(_NEW_POLYGON),
+   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
+   DEFINE_BIT(_NEW_SCISSOR),
+   DEFINE_BIT(_NEW_STENCIL),
+   DEFINE_BIT(_NEW_TEXTURE),
+   DEFINE_BIT(_NEW_TRANSFORM),
+   DEFINE_BIT(_NEW_VIEWPORT),
+   DEFINE_BIT(_NEW_PACKUNPACK),
+   DEFINE_BIT(_NEW_ARRAY),
+   DEFINE_BIT(_NEW_RENDERMODE),
+   DEFINE_BIT(_NEW_BUFFERS),
+   DEFINE_BIT(_NEW_MULTISAMPLE),
+   DEFINE_BIT(_NEW_TRACK_MATRIX),
+   DEFINE_BIT(_NEW_PROGRAM),
+   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
+   {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+   DEFINE_BIT(BRW_NEW_URB_FENCE),
+   DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+   DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+   DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+   DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+   DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_CONTEXT),
+   DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+   DEFINE_BIT(BRW_NEW_PSP),
+   DEFINE_BIT(BRW_NEW_WM_SURFACES),
+   DEFINE_BIT(BRW_NEW_BINDING_TABLE),
+   DEFINE_BIT(BRW_NEW_INDICES),
+   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
+   DEFINE_BIT(BRW_NEW_VERTICES),
+   DEFINE_BIT(BRW_NEW_BATCH),
+   DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
+   {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+   DEFINE_BIT(CACHE_NEW_BLEND_STATE),
+   DEFINE_BIT(CACHE_NEW_CC_VP),
+   DEFINE_BIT(CACHE_NEW_CC_UNIT),
+   DEFINE_BIT(CACHE_NEW_WM_PROG),
+   DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+   DEFINE_BIT(CACHE_NEW_SAMPLER),
+   DEFINE_BIT(CACHE_NEW_WM_UNIT),
+   DEFINE_BIT(CACHE_NEW_SF_PROG),
+   DEFINE_BIT(CACHE_NEW_SF_VP),
+   DEFINE_BIT(CACHE_NEW_SF_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_PROG),
+   DEFINE_BIT(CACHE_NEW_GS_UNIT),
+   DEFINE_BIT(CACHE_NEW_GS_PROG),
+   DEFINE_BIT(CACHE_NEW_CLIP_VP),
+   DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+   DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+   {0, 0, 0}
+};
+
+
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+   int i;
+
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+	 return;
+
+      if (bit_map[i].bit & bits)
+	 bit_map[i].count++;
+   }
+}
+
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+   int i;
+
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+	 return;
+
+      fprintf(stderr, "0x%08x: %12d (%s)\n",
+	      bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+   }
+}
+
+/***********************************************************************
+ * Emit all state:
+ */
+void brw_validate_state( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct brw_state_flags *state = &brw->state.dirty;
+   GLuint i;
+   const struct brw_tracked_state **atoms;
+   int num_atoms;
+
+   brw_clear_validated_bos(brw);
+
+   state->mesa |= brw->intel.NewGLState;
+   brw->intel.NewGLState = 0;
+
+   brw_add_validated_bo(brw, intel->batch->buf);
+
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      atoms = gen6_atoms;
+      num_atoms = ARRAY_SIZE(gen6_atoms);
+   } else {
+      atoms = gen4_atoms;
+      num_atoms = ARRAY_SIZE(gen4_atoms);
+   }
+
+   if (brw->emit_state_always) {
+      state->mesa |= ~0;
+      state->brw |= ~0;
+      state->cache |= ~0;
+   }
+
+   if (brw->fragment_program != ctx->FragmentProgram._Current) {
+      brw->fragment_program = ctx->FragmentProgram._Current;
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+   }
+
+   if (brw->vertex_program != ctx->VertexProgram._Current) {
+      brw->vertex_program = ctx->VertexProgram._Current;
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+   }
+
+   if (state->mesa == 0 &&
+       state->cache == 0 &&
+       state->brw == 0)
+      return;
+
+   if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+      brw_clear_batch_cache(brw);
+
+   brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */
+
+   /* do prepare stage for all atoms */
+   for (i = 0; i < num_atoms; i++) {
+      const struct brw_tracked_state *atom = atoms[i];
+
+      if (brw->intel.Fallback)
+         break;
+
+      if (check_state(state, &atom->dirty)) {
+         if (atom->prepare) {
+            atom->prepare(brw);
+        }
+      }
+   }
+
+   intel_check_front_buffer_rendering(intel);
+
+   /* Make sure that the textures which are referenced by the current
+    * brw fragment program are actually present/valid.
+    * If this fails, we can experience GPU lock-ups.
+    */
+   {
+      const struct brw_fragment_program *fp;
+      fp = brw_fragment_program_const(brw->fragment_program);
+      if (fp) {
+         assert((fp->tex_units_used & ctx->Texture._EnabledUnits)
+                == fp->tex_units_used);
+      }
+   }
+}
+
+
+void brw_upload_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_state_flags *state = &brw->state.dirty;
+   int i;
+   static int dirty_count = 0;
+   const struct brw_tracked_state **atoms;
+   int num_atoms;
+
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      atoms = gen6_atoms;
+      num_atoms = ARRAY_SIZE(gen6_atoms);
+   } else {
+      atoms = gen4_atoms;
+      num_atoms = ARRAY_SIZE(gen4_atoms);
+   }
+
+   brw_clear_validated_bos(brw);
+
+   if (INTEL_DEBUG) {
+      /* Debug version which enforces various sanity checks on the
+       * state flags which are generated and checked to help ensure
+       * state atoms are ordered correctly in the list.
+       */
+      struct brw_state_flags examined, prev;      
+      memset(&examined, 0, sizeof(examined));
+      prev = *state;
+
+      for (i = 0; i < num_atoms; i++) {
+	 const struct brw_tracked_state *atom = atoms[i];
+	 struct brw_state_flags generated;
+
+	 assert(atom->dirty.mesa ||
+		atom->dirty.brw ||
+		atom->dirty.cache);
+
+	 if (brw->intel.Fallback)
+	    break;
+
+	 if (check_state(state, &atom->dirty)) {
+	    if (atom->emit) {
+	       atom->emit( brw );
+	    }
+	 }
+
+	 accumulate_state(&examined, &atom->dirty);
+
+	 /* generated = (prev ^ state)
+	  * if (examined & generated)
+	  *     fail;
+	  */
+	 xor_states(&generated, &prev, state);
+	 assert(!check_state(&examined, &generated));
+	 prev = *state;
+      }
+   }
+   else {
+      for (i = 0; i < num_atoms; i++) {
+	 const struct brw_tracked_state *atom = atoms[i];
+
+	 if (brw->intel.Fallback)
+	    break;
+
+	 if (check_state(state, &atom->dirty)) {
+	    if (atom->emit) {
+	       atom->emit( brw );
+	    }
+	 }
+      }
+   }
+
+   if (INTEL_DEBUG & DEBUG_STATE) {
+      brw_update_dirty_count(mesa_bits, state->mesa);
+      brw_update_dirty_count(brw_bits, state->brw);
+      brw_update_dirty_count(cache_bits, state->cache);
+      if (dirty_count++ % 1000 == 0) {
+	 brw_print_dirty_count(mesa_bits, state->mesa);
+	 brw_print_dirty_count(brw_bits, state->brw);
+	 brw_print_dirty_count(cache_bits, state->cache);
+	 fprintf(stderr, "\n");
+      }
+   }
+
+   if (!brw->intel.Fallback)
+      memset(state, 0, sizeof(*state));
+}
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
new file mode 100644
index 0000000000..2a7fa5b699
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -0,0 +1,1685 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+
+/* Command packets:
+ */
+struct header 
+{
+   GLuint length:16; 
+   GLuint opcode:16; 
+};
+
+
+union header_union
+{
+   struct header bits;
+   GLuint dword;
+};
+
+struct brw_3d_control
+{   
+   struct 
+   {
+      GLuint length:8;
+      GLuint notify_enable:1;
+      GLuint pad:3;
+      GLuint wc_flush_enable:1; 
+      GLuint depth_stall_enable:1; 
+      GLuint operation:2; 
+      GLuint opcode:16; 
+   } header;
+   
+   struct
+   {
+      GLuint pad:2;
+      GLuint dest_addr_type:1; 
+      GLuint dest_addr:29; 
+   } dest;
+   
+   GLuint dword2;   
+   GLuint dword3;   
+};
+
+
+struct brw_3d_primitive
+{
+   struct
+   {
+      GLuint length:8; 
+      GLuint pad:2;
+      GLuint topology:5; 
+      GLuint indexed:1; 
+      GLuint opcode:16; 
+   } header;
+
+   GLuint verts_per_instance;  
+   GLuint start_vert_location;  
+   GLuint instance_count;  
+   GLuint start_instance_location;  
+   GLuint base_vert_location;  
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE           0x1
+#define BRW_FLUSH_STATE_CACHE          0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS    0x8
+
+struct brw_mi_flush
+{
+   GLuint flags:4;
+   GLuint pad:12;
+   GLuint opcode:16;
+};
+
+struct brw_vf_statistics
+{
+   GLuint statistics_enable:1;
+   GLuint pad:15;
+   GLuint opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+   struct header header;
+   GLuint vs; 
+   GLuint gs; 
+   GLuint clp; 
+   GLuint sf; 
+   GLuint wm; 
+};
+
+
+struct brw_blend_constant_color
+{
+   struct header header;
+   GLfloat blend_constant_color[4];  
+};
+
+
+struct brw_depthbuffer
+{
+   union header_union header;
+   
+   union {
+      struct {
+	 GLuint pitch:18; 
+	 GLuint format:3; 
+	 GLuint pad:2;
+	 GLuint software_tiled_rendering_mode:2;
+	 GLuint depth_offset_disable:1; 
+	 GLuint tile_walk:1; 
+	 GLuint tiled_surface:1; 
+	 GLuint pad2:1;
+	 GLuint surface_type:3; 
+      } bits;
+      GLuint dword;
+   } dword1;
+   
+   GLuint dword2_base_addr; 
+ 
+   union {
+      struct {
+	 GLuint pad:1;
+	 GLuint mipmap_layout:1; 
+	 GLuint lod:4; 
+	 GLuint width:13; 
+	 GLuint height:13; 
+      } bits;
+      GLuint dword;
+   } dword3;
+
+   union {
+      struct {
+	 GLuint pad:10;
+	 GLuint min_array_element:11; 
+	 GLuint depth:11; 
+      } bits;
+      GLuint dword;
+   } dword4;
+};
+
+struct brw_depthbuffer_g4x
+{
+   union header_union header;
+   
+   union {
+      struct {
+	 GLuint pitch:18; 
+	 GLuint format:3; 
+	 GLuint pad:2;
+	 GLuint software_tiled_rendering_mode:2;
+	 GLuint depth_offset_disable:1; 
+	 GLuint tile_walk:1; 
+	 GLuint tiled_surface:1; 
+	 GLuint pad2:1;
+	 GLuint surface_type:3; 
+      } bits;
+      GLuint dword;
+   } dword1;
+   
+   GLuint dword2_base_addr; 
+ 
+   union {
+      struct {
+	 GLuint pad:1;
+	 GLuint mipmap_layout:1; 
+	 GLuint lod:4; 
+	 GLuint width:13; 
+	 GLuint height:13; 
+      } bits;
+      GLuint dword;
+   } dword3;
+
+   union {
+      struct {
+	 GLuint pad:10;
+	 GLuint min_array_element:11; 
+	 GLuint depth:11; 
+      } bits;
+      GLuint dword;
+   } dword4;
+
+   union {
+      struct {
+         GLuint xoffset:16;
+         GLuint yoffset:16;
+      } bits;
+      GLuint dword;
+   } dword5;   /* NEW in Integrated Graphics Device */
+};
+
+struct brw_drawrect
+{
+   struct header header;
+   GLuint xmin:16; 
+   GLuint ymin:16; 
+   GLuint xmax:16; 
+   GLuint ymax:16; 
+   GLuint xorg:16;  
+   GLuint yorg:16;  
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+   struct header header;
+   GLfloat depth_offset_clamp;  
+};
+
+struct brw_indexbuffer
+{   
+   union {
+      struct
+      {
+	 GLuint length:8; 
+	 GLuint index_format:2; 
+	 GLuint cut_index_enable:1; 
+	 GLuint pad:5; 
+	 GLuint opcode:16; 
+      } bits;
+      GLuint dword;
+
+   } header;
+
+   GLuint buffer_start; 
+   GLuint buffer_end; 
+};
+
+/* NEW in Integrated Graphics Device */
+struct brw_aa_line_parameters
+{
+   struct header header;
+
+   struct {
+      GLuint aa_coverage_slope:8;
+      GLuint pad0:8;
+      GLuint aa_coverage_bias:8;
+      GLuint pad1:8;
+   } bits0;
+
+   struct {
+      GLuint aa_coverage_endcap_slope:8;
+      GLuint pad0:8;
+      GLuint aa_coverage_endcap_bias:8;
+      GLuint pad1:8;
+   } bits1;
+};
+
+struct brw_line_stipple
+{   
+   struct header header;
+  
+   struct
+   {
+      GLuint pattern:16; 
+      GLuint pad:16;
+   } bits0;
+   
+   struct
+   {
+      GLuint repeat_count:9; 
+      GLuint pad:7;
+      GLuint inverse_repeat_count:16; 
+   } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+   struct header header;
+   
+   struct {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } vs;
+   
+   struct
+   {
+      GLuint enable:1;
+      GLuint pad:4;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } gs;
+   
+   struct
+   {
+      GLuint enable:1;
+      GLuint pad:4;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } clp;
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } sf;
+
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } wm;
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */
+   } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+   struct header header;
+
+   struct {
+      GLuint y_offset:5; 
+      GLuint pad:3;
+      GLuint x_offset:5; 
+      GLuint pad0:19;
+   } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+   struct header header;
+   GLuint stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+   struct
+   {
+      GLuint pipeline_select:1;   
+      GLuint pad:15;
+      GLuint opcode:16;   
+   } header;
+};
+
+
+struct brw_pipe_control
+{
+   struct
+   {
+      GLuint length:8;
+      GLuint notify_enable:1;
+      GLuint texture_cache_flush_enable:1;
+      GLuint indirect_state_pointers_disable:1;
+      GLuint instruction_state_cache_flush_enable:1;
+      GLuint write_cache_flush_enable:1;
+      GLuint depth_stall_enable:1;
+      GLuint post_sync_operation:2;
+
+      GLuint opcode:16;
+   } header;
+
+   struct
+   {
+      GLuint pad:2;
+      GLuint dest_addr_type:1;
+      GLuint dest_addr:29;
+   } bits1;
+
+   GLuint data0;
+   GLuint data1;
+};
+
+
+struct brw_urb_fence
+{
+   struct
+   {
+      GLuint length:8;   
+      GLuint vs_realloc:1;   
+      GLuint gs_realloc:1;   
+      GLuint clp_realloc:1;   
+      GLuint sf_realloc:1;   
+      GLuint vfe_realloc:1;   
+      GLuint cs_realloc:1;   
+      GLuint pad:2;
+      GLuint opcode:16;   
+   } header;
+
+   struct
+   {
+      GLuint vs_fence:10;  
+      GLuint gs_fence:10;  
+      GLuint clp_fence:10;  
+      GLuint pad:2;
+   } bits0;
+
+   struct
+   {
+      GLuint sf_fence:10;  
+      GLuint vf_fence:10;  
+      GLuint cs_fence:11;  
+      GLuint pad:1;
+   } bits1;
+};
+
+struct brw_cs_urb_state
+{
+   struct header header;
+
+   struct
+   {
+      GLuint nr_urb_entries:3;   
+      GLuint pad:1;
+      GLuint urb_entry_size:5;   
+      GLuint pad0:23;
+   } bits0;
+};
+
+struct brw_constant_buffer
+{
+   struct
+   {
+      GLuint length:8;   
+      GLuint valid:1;   
+      GLuint pad:7;
+      GLuint opcode:16;   
+   } header;
+
+   struct
+   {
+      GLuint buffer_length:6;   
+      GLuint buffer_address:26;  
+   } bits0;
+};
+
+struct brw_state_base_address
+{
+   struct header header;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint general_state_address:27;  
+   } bits0;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint surface_state_address:27;  
+   } bits1;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint indirect_object_state_address:27;  
+   } bits2;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:11;
+      GLuint general_state_upper_bound:20;  
+   } bits3;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:11;
+      GLuint indirect_object_state_upper_bound:20;  
+   } bits4;
+};
+
+struct brw_state_prefetch
+{
+   struct header header;
+
+   struct
+   {
+      GLuint prefetch_count:3;   
+      GLuint pad:3;
+      GLuint prefetch_pointer:26;  
+   } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+   struct header header;
+
+   struct
+   {
+      GLuint pad:4;
+      GLuint system_instruction_pointer:28;  
+   } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+   GLuint pad0:1;
+   GLuint grf_reg_count:3; 
+   GLuint pad1:2;
+   GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
+};
+
+struct thread1
+{
+   GLuint ext_halt_exception_enable:1; 
+   GLuint sw_exception_enable:1; 
+   GLuint mask_stack_exception_enable:1; 
+   GLuint timeout_exception_enable:1; 
+   GLuint illegal_op_exception_enable:1; 
+   GLuint pad0:3;
+   GLuint depth_coef_urb_read_offset:6;	/* WM only */
+   GLuint pad1:2;
+   GLuint floating_point_mode:1; 
+   GLuint thread_priority:1; 
+   GLuint binding_table_entry_count:8; 
+   GLuint pad3:5;
+   GLuint single_program_flow:1; 
+};
+
+struct thread2
+{
+   GLuint per_thread_scratch_space:4; 
+   GLuint pad0:6;
+   GLuint scratch_space_base_pointer:22; 
+};
+
+   
+struct thread3
+{
+   GLuint dispatch_grf_start_reg:4; 
+   GLuint urb_entry_read_offset:6; 
+   GLuint pad0:1;
+   GLuint urb_entry_read_length:6; 
+   GLuint pad1:1;
+   GLuint const_urb_entry_read_offset:6; 
+   GLuint pad2:1;
+   GLuint const_urb_entry_read_length:6; 
+   GLuint pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+   struct thread0 thread0;
+   struct
+   {
+      GLuint pad0:7;
+      GLuint sw_exception_enable:1;
+      GLuint pad1:3;
+      GLuint mask_stack_exception_enable:1;
+      GLuint pad2:1;
+      GLuint illegal_op_exception_enable:1;
+      GLuint pad3:2;
+      GLuint floating_point_mode:1;
+      GLuint thread_priority:1;
+      GLuint binding_table_entry_count:8;
+      GLuint pad4:5;
+      GLuint single_program_flow:1;
+   } thread1;
+
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:9;
+      GLuint gs_output_stats:1; /* not always */
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:5; 	/* may be less */
+      GLuint pad3:2;
+   } thread4;   
+      
+   struct
+   {
+      GLuint pad0:13;
+      GLuint clip_mode:3; 
+      GLuint userclip_enable_flags:8; 
+      GLuint userclip_must_clip:1; 
+      GLuint negative_w_clip_test:1;
+      GLuint guard_band_enable:1; 
+      GLuint viewport_z_clip_enable:1; 
+      GLuint viewport_xy_clip_enable:1; 
+      GLuint vertex_position_space:1; 
+      GLuint api_mode:1; 
+      GLuint pad2:1;
+   } clip5;
+   
+   struct
+   {
+      GLuint pad0:5;
+      GLuint clipper_viewport_state_ptr:27; 
+   } clip6;
+
+   
+   GLfloat viewport_xmin;  
+   GLfloat viewport_xmax;  
+   GLfloat viewport_ymin;  
+   GLfloat viewport_ymax;  
+};
+
+struct gen6_blend_state
+{
+   struct {
+      GLuint dest_blend_factor:5;
+      GLuint source_blend_factor:5;
+      GLuint pad3:1;
+      GLuint blend_func:3;
+      GLuint pad2:1;
+      GLuint ia_dest_blend_factor:5;
+      GLuint ia_source_blend_factor:5;
+      GLuint pad1:1;
+      GLuint ia_blend_func:3;
+      GLuint pad0:1;
+      GLuint ia_blend_enable:1;
+      GLuint blend_enable:1;
+   } blend0;
+
+   struct {
+      GLuint post_blend_clamp_enable:1;
+      GLuint pre_blend_clamp_enable:1;
+      GLuint clamp_range:2;
+      GLuint pad0:4;
+      GLuint x_dither_offset:2;
+      GLuint y_dither_offset:2;
+      GLuint dither_enable:1;
+      GLuint alpha_test_func:3;
+      GLuint alpha_test_enable:1;
+      GLuint pad1:1;
+      GLuint logic_op_func:4;
+      GLuint logic_op_enable:1;
+      GLuint pad2:1;
+      GLuint write_disable_b:1;
+      GLuint write_disable_g:1;
+      GLuint write_disable_r:1;
+      GLuint write_disable_a:1;
+      GLuint pad3:1;
+      GLuint alpha_to_coverage_dither:1;
+      GLuint alpha_to_one:1;
+      GLuint alpha_to_coverage:1;
+   } blend1;
+};
+
+struct gen6_color_calc_state
+{
+   struct {
+      GLuint alpha_test_format:1;
+      GLuint pad0:14;
+      GLuint round_disable:1;
+      GLuint bf_stencil_ref:8;
+      GLuint stencil_ref:8;
+   } cc0;
+
+   union {
+      GLfloat alpha_ref_f;
+      struct {
+	 GLuint ui:8;
+	 GLuint pad0:24;
+      } alpha_ref_fi;
+   } cc1;
+
+   GLfloat constant_r;
+   GLfloat constant_g;
+   GLfloat constant_b;
+   GLfloat constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+   struct {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3;
+      GLuint bf_stencil_pass_depth_fail_op:3;
+      GLuint bf_stencil_fail_op:3;
+      GLuint bf_stencil_func:3;
+      GLuint bf_stencil_enable:1;
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1;
+      GLuint stencil_pass_depth_pass_op:3;
+      GLuint stencil_pass_depth_fail_op:3;
+      GLuint stencil_fail_op:3;
+      GLuint stencil_func:3;
+      GLuint stencil_enable:1;
+   } ds0;
+
+   struct {
+      GLuint bf_stencil_write_mask:8;
+      GLuint bf_stencil_test_mask:8;
+      GLuint stencil_write_mask:8;
+      GLuint stencil_test_mask:8;
+   } ds1;
+
+   struct {
+      GLuint pad0:25;
+      GLuint depth_write_enable:1;
+      GLuint depth_test_func:3;
+      GLuint pad1:1;
+      GLuint depth_test_enable:1;
+   } ds2;
+};
+
+struct brw_cc_unit_state
+{
+   struct
+   {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3; 
+      GLuint bf_stencil_pass_depth_fail_op:3; 
+      GLuint bf_stencil_fail_op:3; 
+      GLuint bf_stencil_func:3; 
+      GLuint bf_stencil_enable:1; 
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1; 
+      GLuint stencil_pass_depth_pass_op:3; 
+      GLuint stencil_pass_depth_fail_op:3; 
+      GLuint stencil_fail_op:3; 
+      GLuint stencil_func:3; 
+      GLuint stencil_enable:1; 
+   } cc0;
+
+   
+   struct
+   {
+      GLuint bf_stencil_ref:8; 
+      GLuint stencil_write_mask:8; 
+      GLuint stencil_test_mask:8; 
+      GLuint stencil_ref:8; 
+   } cc1;
+
+   
+   struct
+   {
+      GLuint logicop_enable:1; 
+      GLuint pad0:10;
+      GLuint depth_write_enable:1; 
+      GLuint depth_test_function:3; 
+      GLuint depth_test:1; 
+      GLuint bf_stencil_write_mask:8; 
+      GLuint bf_stencil_test_mask:8; 
+   } cc2;
+
+   
+   struct
+   {
+      GLuint pad0:8;
+      GLuint alpha_test_func:3; 
+      GLuint alpha_test:1; 
+      GLuint blend_enable:1; 
+      GLuint ia_blend_enable:1; 
+      GLuint pad1:1;
+      GLuint alpha_test_format:1;
+      GLuint pad2:16;
+   } cc3;
+   
+   struct
+   {
+      GLuint pad0:5; 
+      GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+   } cc4;
+   
+   struct
+   {
+      GLuint pad0:2;
+      GLuint ia_dest_blend_factor:5; 
+      GLuint ia_src_blend_factor:5; 
+      GLuint ia_blend_function:3; 
+      GLuint statistics_enable:1; 
+      GLuint logicop_func:4; 
+      GLuint pad1:11;
+      GLuint dither_enable:1; 
+   } cc5;
+
+   struct
+   {
+      GLuint clamp_post_alpha_blend:1; 
+      GLuint clamp_pre_alpha_blend:1; 
+      GLuint clamp_range:2; 
+      GLuint pad0:11;
+      GLuint y_dither_offset:2; 
+      GLuint x_dither_offset:2; 
+      GLuint dest_blend_factor:5; 
+      GLuint src_blend_factor:5; 
+      GLuint blend_function:3; 
+   } cc6;
+
+   struct {
+      union {
+	 GLfloat f;  
+	 GLubyte ub[4];
+      } alpha_ref;
+   } cc7;
+};
+
+struct brw_sf_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:6; 
+      GLuint pad3:1;
+   } thread4;   
+
+   struct
+   {
+      GLuint front_winding:1; 
+      GLuint viewport_transform:1; 
+      GLuint pad0:3;
+      GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+   } sf5;
+   
+   struct
+   {
+      GLuint pad0:9;
+      GLuint dest_org_vbias:4; 
+      GLuint dest_org_hbias:4; 
+      GLuint scissor:1; 
+      GLuint disable_2x2_trifilter:1; 
+      GLuint disable_zero_pix_trifilter:1; 
+      GLuint point_rast_rule:2; 
+      GLuint line_endcap_aa_region_width:2; 
+      GLuint line_width:4; 
+      GLuint fast_scissor_disable:1; 
+      GLuint cull_mode:2; 
+      GLuint aa_enable:1; 
+   } sf6;
+
+   struct
+   {
+      GLuint point_size:11; 
+      GLuint use_point_size_state:1; 
+      GLuint subpixel_precision:1; 
+      GLuint sprite_point:1; 
+      GLuint pad0:10;
+      GLuint aa_line_distance_mode:1;
+      GLuint trifan_pv:2; 
+      GLuint linestrip_pv:2; 
+      GLuint tristrip_pv:2; 
+      GLuint line_last_pixel_enable:1; 
+   } sf7;
+
+};
+
+struct gen6_scissor_state
+{
+   GLuint ymin, xmin;
+   GLuint ymax, xmax;
+};
+
+struct brw_gs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:8;
+      GLuint rendering_enable:1; /* for Ironlake */
+      GLuint pad4:1;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:5; 
+      GLuint pad3:2;
+   } thread4;   
+      
+   struct
+   {
+      GLuint sampler_count:3; 
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27; 
+   } gs5;
+
+   
+   struct
+   {
+      GLuint max_vp_index:4; 
+      GLuint pad0:12;
+      GLuint svbi_post_inc_value:10;
+      GLuint pad1:1;
+      GLuint svbi_post_inc_enable:1;
+      GLuint svbi_payload:1;
+      GLuint discard_adjaceny:1;
+      GLuint reorder_enable:1; 
+      GLuint pad2:1;
+   } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:6; 
+      GLuint pad3:1;
+   } thread4;   
+
+   struct
+   {
+      GLuint sampler_count:3; 
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27; 
+   } vs5;
+
+   struct
+   {
+      GLuint vs_enable:1; 
+      GLuint vert_cache_disable:1; 
+      GLuint pad0:30;
+   } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   
+   struct {
+      GLuint stats_enable:1; 
+      GLuint depth_buffer_clear:1;
+      GLuint sampler_count:3; 
+      GLuint sampler_state_pointer:27; 
+   } wm4;
+   
+   struct
+   {
+      GLuint enable_8_pix:1; 
+      GLuint enable_16_pix:1; 
+      GLuint enable_32_pix:1; 
+      GLuint enable_con_32_pix:1;
+      GLuint enable_con_64_pix:1;
+      GLuint pad0:5;
+      GLuint legacy_global_depth_bias:1; 
+      GLuint line_stipple:1; 
+      GLuint depth_offset:1; 
+      GLuint polygon_stipple:1; 
+      GLuint line_aa_region_width:2; 
+      GLuint line_endcap_aa_region_width:2; 
+      GLuint early_depth_test:1; 
+      GLuint thread_dispatch_enable:1; 
+      GLuint program_uses_depth:1; 
+      GLuint program_computes_depth:1; 
+      GLuint program_uses_killpixel:1; 
+      GLuint legacy_line_rast: 1; 
+      GLuint transposed_urb_read_enable:1; 
+      GLuint max_threads:7; 
+   } wm5;
+   
+   GLfloat global_depth_offset_constant;  
+   GLfloat global_depth_offset_scale;   
+   
+   /* for Ironlake only */
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_1:3; 
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_1:26;
+   } wm8;       
+
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_2:3; 
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_2:26;
+   } wm9;       
+
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_3:3; 
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_3:26;
+   } wm10;       
+};
+
+struct brw_sampler_default_color {
+   GLfloat color[4];
+};
+
+struct brw_sampler_state
+{
+   
+   struct
+   {
+      GLuint shadow_function:3; 
+      GLuint lod_bias:11; 
+      GLuint min_filter:3; 
+      GLuint mag_filter:3; 
+      GLuint mip_filter:2; 
+      GLuint base_level:5; 
+      GLuint pad:1;
+      GLuint lod_preclamp:1; 
+      GLuint default_color_mode:1; 
+      GLuint pad0:1;
+      GLuint disable:1; 
+   } ss0;
+
+   struct
+   {
+      GLuint r_wrap_mode:3; 
+      GLuint t_wrap_mode:3; 
+      GLuint s_wrap_mode:3; 
+      GLuint pad:3;
+      GLuint max_lod:10; 
+      GLuint min_lod:10; 
+   } ss1;
+
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint default_color_pointer:27; 
+   } ss2;
+   
+   struct
+   {
+      GLuint pad:19;
+      GLuint max_aniso:3; 
+      GLuint chroma_key_mode:1; 
+      GLuint chroma_key_index:2; 
+      GLuint chroma_key_enable:1; 
+      GLuint monochrome_filter_width:3; 
+      GLuint monochrome_filter_height:3; 
+   } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+   GLfloat xmin;  
+   GLfloat xmax;  
+   GLfloat ymin;  
+   GLfloat ymax;  
+};
+
+struct brw_cc_viewport
+{
+   GLfloat min_depth;  
+   GLfloat max_depth;  
+};
+
+struct brw_sf_viewport
+{
+   struct {
+      GLfloat m00;  
+      GLfloat m11;  
+      GLfloat m22;  
+      GLfloat m30;  
+      GLfloat m31;  
+      GLfloat m32;  
+   } viewport;
+
+   /* scissor coordinates are inclusive */
+   struct {
+      GLshort xmin;
+      GLshort ymin;
+      GLshort xmax;
+      GLshort ymax;
+   } scissor;
+};
+
+struct gen6_sf_viewport {
+   GLfloat m00;
+   GLfloat m11;
+   GLfloat m22;
+   GLfloat m30;
+   GLfloat m31;
+   GLfloat m32;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+   struct {
+      GLuint cube_pos_z:1; 
+      GLuint cube_neg_z:1; 
+      GLuint cube_pos_y:1; 
+      GLuint cube_neg_y:1; 
+      GLuint cube_pos_x:1; 
+      GLuint cube_neg_x:1; 
+      GLuint pad:4;
+      GLuint mipmap_layout_mode:1; 
+      GLuint vert_line_stride_ofs:1; 
+      GLuint vert_line_stride:1; 
+      GLuint color_blend:1; 
+      GLuint writedisable_blue:1; 
+      GLuint writedisable_green:1; 
+      GLuint writedisable_red:1; 
+      GLuint writedisable_alpha:1; 
+      GLuint surface_format:9;     /**< BRW_SURFACEFORMAT_x */
+      GLuint data_return_format:1; 
+      GLuint pad0:1;
+      GLuint surface_type:3;       /**< BRW_SURFACE_1D/2D/3D/CUBE */
+   } ss0;
+   
+   struct {
+      GLuint base_addr;  
+   } ss1;
+   
+   struct {
+      GLuint pad:2;
+      GLuint mip_count:4; 
+      GLuint width:13; 
+      GLuint height:13; 
+   } ss2;
+
+   struct {
+      GLuint tile_walk:1; 
+      GLuint tiled_surface:1; 
+      GLuint pad:1; 
+      GLuint pitch:18; 
+      GLuint depth:11; 
+   } ss3;
+   
+   struct {
+      GLuint multisample_position_palette_index:3;
+      GLuint pad1:1;
+      GLuint num_multisamples:3;
+      GLuint pad0:1;
+      GLuint render_target_view_extent:9;
+      GLuint min_array_elt:11;
+      GLuint min_lod:4; 
+   } ss4;
+
+   struct {
+      GLuint pad1:16;
+      GLuint llc_mapping:1;
+      GLuint mlc_mapping:1;
+      GLuint gfdt:1;
+      GLuint gfdt_src:1;
+      GLuint y_offset:4;
+      GLuint pad0:1;
+      GLuint x_offset:7;
+   } ss5;   /* New in G4X */
+
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+   struct {
+      GLuint pitch:11; 
+      GLuint pad:15;
+      GLuint access_type:1; 
+      GLuint vb_index:5; 
+   } vb0;
+   
+   GLuint start_addr; 
+   GLuint max_index;   
+#if 1
+   GLuint instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+   struct header header;
+   struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+   struct
+   {
+      GLuint src_offset:11; 
+      GLuint pad:5;
+      GLuint src_format:9; 
+      GLuint pad0:1;
+      GLuint valid:1; 
+      GLuint vertex_buffer_index:5; 
+   } ve0;
+   
+   struct
+   {
+      GLuint dst_offset:8; 
+      GLuint pad:8;
+      GLuint vfcomponent3:4; 
+      GLuint vfcomponent2:4; 
+      GLuint vfcomponent1:4; 
+      GLuint vfcomponent0:4; 
+   } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+   struct header header;
+   struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+   GLuint opcode:4;
+   GLuint offset:6;
+   GLuint swizzle_control:2; 
+   GLuint pad:1;
+   GLuint allocate:1;
+   GLuint used:1;
+   GLuint complete:1;
+   GLuint response_length:4;
+   GLuint msg_length:4;
+   GLuint msg_target:4;
+   GLuint pad1:3;
+   GLuint end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+ 
+struct brw_instruction
+{
+   struct 
+   {
+      GLuint opcode:7;
+      GLuint pad:1;
+      GLuint access_mode:1;
+      GLuint mask_control:1;
+      GLuint dependency_control:2;
+      GLuint compression_control:2;
+      GLuint thread_control:2;
+      GLuint predicate_control:4;
+      GLuint predicate_inverse:1;
+      GLuint execution_size:3;
+      GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
+      GLuint pad0:2;
+      GLuint debug_control:1;
+      GLuint saturate:1;
+   } header;
+
+   union {
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;
+	 GLuint src1_reg_type:3;
+	 GLuint pad:1;
+	 GLuint dest_subreg_nr:5;
+	 GLuint dest_reg_nr:8;
+	 GLuint dest_horiz_stride:2;
+	 GLuint dest_address_mode:1;
+      } da1;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;        /* 0x00000c00 */
+	 GLuint src1_reg_type:3;        /* 0x00007000 */
+	 GLuint pad:1;
+	 GLint dest_indirect_offset:10;	/* offset against the deref'd address reg */
+	 GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
+	 GLuint dest_horiz_stride:2;
+	 GLuint dest_address_mode:1;
+      } ia1;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;
+	 GLuint src1_reg_type:3;
+	 GLuint pad:1;
+	 GLuint dest_writemask:4;
+	 GLuint dest_subreg_nr:1;
+	 GLuint dest_reg_nr:8;
+	 GLuint pad1:2;
+	 GLuint dest_address_mode:1;
+      } da16;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint pad0:6;
+	 GLuint dest_writemask:4;
+	 GLint dest_indirect_offset:6;
+	 GLuint dest_subreg_nr:3;
+	 GLuint pad1:2;
+	 GLuint dest_address_mode:1;
+      } ia16;
+   } bits1;
+
+
+   union {
+      struct
+      {
+	 GLuint src0_subreg_nr:5;
+	 GLuint src0_reg_nr:8;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_horiz_stride:2;
+	 GLuint src0_width:3;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad:6;
+      } da1;
+
+      struct
+      {
+	 GLint src0_indirect_offset:10;
+	 GLuint src0_subreg_nr:3;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_horiz_stride:2;
+	 GLuint src0_width:3;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad:6;	
+      } ia1;
+
+      struct
+      {
+	 GLuint src0_swz_x:2;
+	 GLuint src0_swz_y:2;
+	 GLuint src0_subreg_nr:1;
+	 GLuint src0_reg_nr:8;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_swz_z:2;
+	 GLuint src0_swz_w:2;
+	 GLuint pad0:1;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;
+      } da16;
+
+      struct
+      {
+	 GLuint src0_swz_x:2;
+	 GLuint src0_swz_y:2;
+	 GLint src0_indirect_offset:6;
+	 GLuint src0_subreg_nr:3;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_swz_z:2;
+	 GLuint src0_swz_w:2;
+	 GLuint pad0:1;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;
+      } ia16;
+
+       struct 
+       {
+           GLuint pad:26;
+           GLuint end_of_thread:1;
+           GLuint pad1:1;
+           GLuint sfid:4;
+       } send_gen5;  /* for Ironlake only */
+
+   } bits2;
+
+   union
+   {
+      struct
+      {
+	 GLuint src1_subreg_nr:5;
+	 GLuint src1_reg_nr:8;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint src1_address_mode:1;
+	 GLuint src1_horiz_stride:2;
+	 GLuint src1_width:3;
+	 GLuint src1_vert_stride:4;
+	 GLuint pad0:7;
+      } da1;
+
+      struct
+      {
+	 GLuint src1_swz_x:2;
+	 GLuint src1_swz_y:2;
+	 GLuint src1_subreg_nr:1;
+	 GLuint src1_reg_nr:8;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint src1_address_mode:1;
+	 GLuint src1_swz_z:2;
+	 GLuint src1_swz_w:2;
+	 GLuint pad1:1;
+	 GLuint src1_vert_stride:4;
+	 GLuint pad2:7;
+      } da16;
+
+      struct
+      {
+	 GLint  src1_indirect_offset:10;
+	 GLuint src1_subreg_nr:3;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint src1_address_mode:1;
+	 GLuint src1_horiz_stride:2;
+	 GLuint src1_width:3;
+	 GLuint src1_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;	
+      } ia1;
+
+      struct
+      {
+	 GLuint src1_swz_x:2;
+	 GLuint src1_swz_y:2;
+	 GLint  src1_indirect_offset:6;
+	 GLuint src1_subreg_nr:3;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint pad0:1;
+	 GLuint src1_swz_z:2;
+	 GLuint src1_swz_w:2;
+	 GLuint pad1:1;
+	 GLuint src1_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad2:6;
+      } ia16;
+
+
+      struct
+      {
+	 GLint  jump_count:16;	/* note: signed */
+	 GLuint  pop_count:4;
+	 GLuint  pad0:12;
+      } if_else;
+
+      struct {
+	 GLuint function:4;
+	 GLuint int_type:1;
+	 GLuint precision:1;
+	 GLuint saturate:1;
+	 GLuint data_type:1;
+	 GLuint pad0:8;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } math;
+
+      struct {
+	 GLuint function:4;
+	 GLuint int_type:1;
+	 GLuint precision:1;
+	 GLuint saturate:1;
+	 GLuint data_type:1;
+	 GLuint snapshot:1;
+	 GLuint pad0:10;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } math_gen5;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint sampler:4;
+	 GLuint return_format:2; 
+	 GLuint msg_type:2;   
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } sampler;
+
+      struct {
+         GLuint binding_table_index:8;
+         GLuint sampler:4;
+         GLuint msg_type:4;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } sampler_g4x;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint sampler:4;
+	 GLuint msg_type:4;
+	 GLuint simd_mode:2;
+	 GLuint pad0:1;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } sampler_gen5;
+
+      struct brw_urb_immediate urb;
+
+      struct {
+	 GLuint opcode:4;
+	 GLuint offset:6;
+	 GLuint swizzle_control:2; 
+	 GLuint pad:1;
+	 GLuint allocate:1;
+	 GLuint used:1;
+	 GLuint complete:1;
+	 GLuint pad0:3;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } urb_gen5;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:4;  
+	 GLuint msg_type:2;  
+	 GLuint target_cache:2;    
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } dp_read;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;  
+	 GLuint msg_type:3;  
+	 GLuint target_cache:2;    
+	 GLuint pad0:3;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } dp_read_gen5;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;
+	 GLuint pixel_scoreboard_clear:1;
+	 GLuint msg_type:3;    
+	 GLuint send_commit_msg:1;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } dp_write;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;
+	 GLuint pixel_scoreboard_clear:1;
+	 GLuint msg_type:3;    
+	 GLuint send_commit_msg:1;
+	 GLuint pad0:3;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } dp_write_gen5;
+
+      struct {
+	 GLuint pad:16;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } generic;
+
+      struct {
+	 GLuint pad:19;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } generic_gen5;
+
+      GLint d;
+      GLuint ud;
+      float f;
+   } bits3;
+};
+
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_tex.c b/src/mesa/drivers/dri/i965/brw_tex.c
new file mode 100644
index 0000000000..e911b105b2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_tex.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/teximage.h"
+
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "brw_context.h"
+
+/**
+ * Finalizes all textures, completing any rendering that needs to be done
+ * to prepare them.
+ */
+void brw_validate_textures( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   int i;
+
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+
+      if (texUnit->_ReallyEnabled) {
+	 intel_finalize_mipmap_tree(intel, i);
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
new file mode 100644
index 0000000000..768ccfd79c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -0,0 +1,173 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "intel_context.h"
+#include "main/macros.h"
+
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+
+GLboolean brw_miptree_layout(struct intel_context *intel,
+			     struct intel_mipmap_tree *mt,
+			     uint32_t tiling)
+{
+   /* XXX: these vary depending on image format: */
+   /* GLint align_w = 4; */
+
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      if (intel->gen == 5) {
+          GLuint align_h = 2;
+          GLuint level;
+          GLuint qpitch = 0;
+	  int h0, h1, q;
+
+	  /* On Ironlake, cube maps are finally represented as just a series
+	   * of MIPLAYOUT_BELOW 2D textures (like 2D texture arrays), separated
+	   * by a pitch of qpitch rows, where qpitch is defined by the equation
+	   * given in Volume 1 of the BSpec.
+	   */
+	  h0 = ALIGN(mt->height0, align_h);
+	  h1 = ALIGN(minify(h0), align_h);
+	  qpitch = (h0 + h1 + 11 * align_h);
+          if (mt->compressed)
+	     qpitch /= 4;
+
+	  i945_miptree_layout_2d(intel, mt, tiling, 6);
+
+          for (level = mt->first_level; level <= mt->last_level; level++) {
+	     for (q = 0; q < 6; q++) {
+		intel_miptree_set_image_offset(mt, level, q, 0, q * qpitch);
+	     }
+          }
+	  mt->total_height = qpitch * 6;
+
+          break;
+      }
+
+   case GL_TEXTURE_3D: {
+      GLuint width  = mt->width0;
+      GLuint height = mt->height0;
+      GLuint depth = mt->depth0;
+      GLuint pack_x_pitch, pack_x_nr;
+      GLuint pack_y_pitch;
+      GLuint level;
+      GLuint align_h = 2;
+      GLuint align_w = 4;
+
+      mt->total_height = 0;
+      intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
+      if (mt->compressed) {
+          mt->total_width = ALIGN(width, align_w);
+          pack_y_pitch = (height + 3) / 4;
+      } else {
+	 mt->total_width = mt->width0;
+	 pack_y_pitch = ALIGN(mt->height0, align_h);
+      }
+
+      pack_x_pitch = width;
+      pack_x_nr = 1;
+
+      for (level = mt->first_level ; level <= mt->last_level ; level++) {
+	 GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
+	 GLint x = 0;
+	 GLint y = 0;
+	 GLint q, j;
+
+	 intel_miptree_set_level_info(mt, level, nr_images,
+				      0, mt->total_height,
+				      width, height, depth);
+
+	 for (q = 0; q < nr_images;) {
+	    for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+	       intel_miptree_set_image_offset(mt, level, q, x, y);
+	       x += pack_x_pitch;
+	    }
+
+	    x = 0;
+	    y += pack_y_pitch;
+	 }
+
+
+	 mt->total_height += y;
+	 width  = minify(width);
+	 height = minify(height);
+	 depth  = minify(depth);
+
+	 if (mt->compressed) {
+	    pack_y_pitch = (height + 3) / 4;
+
+	    if (pack_x_pitch > ALIGN(width, align_w)) {
+	       pack_x_pitch = ALIGN(width, align_w);
+	       pack_x_nr <<= 1;
+	    }
+	 } else {
+	    if (pack_x_pitch > 4) {
+	       pack_x_pitch >>= 1;
+	       pack_x_nr <<= 1;
+	       assert(pack_x_pitch * pack_x_nr <= mt->total_width);
+	    }
+
+	    if (pack_y_pitch > 2) {
+	       pack_y_pitch >>= 1;
+	       pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+	    }
+	 }
+
+      }
+      /* The 965's sampler lays cachelines out according to how accesses
+       * in the texture surfaces run, so they may be "vertical" through
+       * memory.  As a result, the docs say in Surface Padding Requirements:
+       * Sampling Engine Surfaces that two extra rows of padding are required.
+       * We don't know of similar requirements for pre-965, but given that
+       * those docs are silent on padding requirements in general, let's play
+       * it safe.
+       */
+      if (mt->target == GL_TEXTURE_CUBE_MAP)
+	 mt->total_height += 2;
+      break;
+   }
+
+   default:
+      i945_miptree_layout_2d(intel, mt, tiling, 1);
+      break;
+   }
+   DBG("%s: %dx%dx%d\n", __FUNCTION__,
+       mt->total_width, mt->total_height, mt->cpp);
+
+   return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
new file mode 100644
index 0000000000..0f597184b4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -0,0 +1,252 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one.  So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5.  There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+   GLuint min_nr_entries;
+   GLuint preferred_nr_entries;
+   GLuint min_entry_size;
+   GLuint max_entry_size;
+} limits[CS+1] = {
+   { 16, 32, 1, 5 },			/* vs */
+   { 4, 8,  1, 5 },			/* gs */
+   { 5, 10,  1, 5 },			/* clp */
+   { 1, 8,  1, 12 },		        /* sf */
+   { 1, 4,  1, 32 }			/* cs */
+};
+
+
+static GLboolean check_urb_layout( struct brw_context *brw )
+{
+   brw->urb.vs_start = 0;
+   brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+   brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+   brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+   brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+   return brw->urb.cs_start + brw->urb.nr_cs_entries *
+      brw->urb.csize <= brw->urb.size;
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   GLuint csize = brw->curbe.total_size;
+   GLuint vsize = brw->vs.prog_data->urb_entry_size;
+   GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+
+   if (csize < limits[CS].min_entry_size)
+      csize = limits[CS].min_entry_size;
+
+   if (vsize < limits[VS].min_entry_size)
+      vsize = limits[VS].min_entry_size;
+
+   if (sfsize < limits[SF].min_entry_size)
+      sfsize = limits[SF].min_entry_size;
+
+   if (brw->urb.vsize < vsize ||
+       brw->urb.sfsize < sfsize ||
+       brw->urb.csize < csize ||
+       (brw->urb.constrained && (brw->urb.vsize > vsize ||
+				 brw->urb.sfsize > sfsize ||
+				 brw->urb.csize > csize))) {
+      
+
+      brw->urb.csize = csize;
+      brw->urb.sfsize = sfsize;
+      brw->urb.vsize = vsize;
+
+      brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;	
+      brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;	
+      brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+      brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;	
+      brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;	
+
+      brw->urb.constrained = 0;
+
+      if (intel->gen == 5) {
+         brw->urb.nr_vs_entries = 128;
+         brw->urb.nr_sf_entries = 48;
+         if (check_urb_layout(brw)) {
+            goto done;
+         } else {
+            brw->urb.constrained = 1;
+            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+            brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+         }
+      } else if (intel->is_g4x) {
+	 brw->urb.nr_vs_entries = 64;
+	 if (check_urb_layout(brw)) {
+	    goto done;
+	 } else {
+	    brw->urb.constrained = 1;
+	    brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+	 }
+      }
+
+      if (!check_urb_layout(brw)) {
+	 brw->urb.nr_vs_entries = limits[VS].min_nr_entries;	
+	 brw->urb.nr_gs_entries = limits[GS].min_nr_entries;	
+	 brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+	 brw->urb.nr_sf_entries = limits[SF].min_nr_entries;	
+	 brw->urb.nr_cs_entries = limits[CS].min_nr_entries;	
+
+	 /* Mark us as operating with constrained nr_entries, so that next
+	  * time we recalculate we'll resize the fences in the hope of
+	  * escaping constrained mode and getting back to normal performance.
+	  */
+	 brw->urb.constrained = 1;
+	 
+	 if (!check_urb_layout(brw)) {
+	    /* This is impossible, given the maximal sizes of urb
+	     * entries and the values for minimum nr of entries
+	     * provided above.
+	     */
+	    printf("couldn't calculate URB layout!\n");
+	    exit(1);
+	 }
+	 
+	 if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+	    printf("URB CONSTRAINED\n");
+      }
+
+done:
+      if (INTEL_DEBUG & DEBUG_URB)
+	 printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+		      brw->urb.vs_start,
+		      brw->urb.gs_start,
+		      brw->urb.clip_start,
+		      brw->urb.sf_start,
+		      brw->urb.cs_start, 
+		      brw->urb.size);
+      
+      brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+   }
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CURBE_OFFSETS,
+      .cache = (CACHE_NEW_VS_PROG |
+		CACHE_NEW_SF_PROG)
+   },
+   .prepare = recalculate_urb_fence
+};
+
+
+
+
+
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+   struct brw_urb_fence uf;
+   memset(&uf, 0, sizeof(uf));
+
+   uf.header.opcode = CMD_URB_FENCE;
+   uf.header.length = sizeof(uf)/4-2;
+   uf.header.vs_realloc = 1;
+   uf.header.gs_realloc = 1;
+   uf.header.clp_realloc = 1;
+   uf.header.sf_realloc = 1;
+   uf.header.vfe_realloc = 1;
+   uf.header.cs_realloc = 1;
+
+   /* The ordering below is correct, not the layout in the
+    * instruction.
+    *
+    * There are 256/384 urb reg pairs in total.
+    */
+   uf.bits0.vs_fence  = brw->urb.gs_start;
+   uf.bits0.gs_fence  = brw->urb.clip_start; 
+   uf.bits0.clp_fence = brw->urb.sf_start; 
+   uf.bits1.sf_fence  = brw->urb.cs_start; 
+   uf.bits1.cs_fence  = brw->urb.size;
+
+   BRW_BATCH_STRUCT(brw, &uf);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
new file mode 100644
index 0000000000..bba9249d1b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -0,0 +1,104 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+         
+
+#include "main/mtypes.h"
+#include "shader/prog_parameter.h"
+#include "brw_util.h"
+#include "brw_defines.h"
+
+GLuint brw_count_bits(uint64_t val)
+{
+   GLuint i;
+   for (i = 0; val ; val >>= 1)
+      if (val & 1)
+	 i++;
+   return i;
+}
+
+
+GLuint brw_translate_blend_equation( GLenum mode )
+{
+   switch (mode) {
+   case GL_FUNC_ADD: 
+      return BRW_BLENDFUNCTION_ADD; 
+   case GL_MIN: 
+      return BRW_BLENDFUNCTION_MIN; 
+   case GL_MAX: 
+      return BRW_BLENDFUNCTION_MAX; 
+   case GL_FUNC_SUBTRACT: 
+      return BRW_BLENDFUNCTION_SUBTRACT; 
+   case GL_FUNC_REVERSE_SUBTRACT: 
+      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; 
+   default: 
+      assert(0);
+      return BRW_BLENDFUNCTION_ADD;
+   }
+}
+
+GLuint brw_translate_blend_factor( GLenum factor )
+{
+   switch(factor) {
+   case GL_ZERO: 
+      return BRW_BLENDFACTOR_ZERO; 
+   case GL_SRC_ALPHA: 
+      return BRW_BLENDFACTOR_SRC_ALPHA; 
+   case GL_ONE: 
+      return BRW_BLENDFACTOR_ONE; 
+   case GL_SRC_COLOR: 
+      return BRW_BLENDFACTOR_SRC_COLOR; 
+   case GL_ONE_MINUS_SRC_COLOR: 
+      return BRW_BLENDFACTOR_INV_SRC_COLOR; 
+   case GL_DST_COLOR: 
+      return BRW_BLENDFACTOR_DST_COLOR; 
+   case GL_ONE_MINUS_DST_COLOR: 
+      return BRW_BLENDFACTOR_INV_DST_COLOR; 
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BRW_BLENDFACTOR_INV_SRC_ALPHA; 
+   case GL_DST_ALPHA: 
+      return BRW_BLENDFACTOR_DST_ALPHA; 
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BRW_BLENDFACTOR_INV_DST_ALPHA; 
+   case GL_SRC_ALPHA_SATURATE: 
+      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_CONST_COLOR; 
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_CONST_ALPHA; 
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+   default:
+      assert(0);
+      return BRW_BLENDFACTOR_ZERO;
+   }   
+}
diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
new file mode 100644
index 0000000000..04f3175d3e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_util.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+          
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "main/mtypes.h"
+
+extern GLuint brw_count_bits(uint64_t val);
+extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
+extern GLuint brw_translate_blend_factor( GLenum factor );
+extern GLuint brw_translate_blend_equation( GLenum mode );
+
+
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
new file mode 100644
index 0000000000..3c12f11ea7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -0,0 +1,164 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+           
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "shader/prog_print.h"
+#include "shader/prog_parameter.h"
+
+
+
+static void do_vs_prog( struct brw_context *brw, 
+			struct brw_vertex_program *vp,
+			struct brw_vs_prog_key *key )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint program_size;
+   const GLuint *program;
+   struct brw_vs_compile c;
+   int aux_size;
+   int i;
+
+   memset(&c, 0, sizeof(c));
+   memcpy(&c.key, key, sizeof(*key));
+
+   brw_init_compile(brw, &c.func);
+   c.vp = vp;
+
+   c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
+   c.prog_data.inputs_read = vp->program.Base.InputsRead;
+
+   if (c.key.copy_edgeflag) {
+      c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_EDGE);
+      c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
+   }
+
+   /* Put dummy slots into the VUE for the SF to put the replaced
+    * point sprite coords in.  We shouldn't need these dummy slots,
+    * which take up precious URB space, but it would mean that the SF
+    * doesn't get nice aligned pairs of input coords into output
+    * coords, which would be a pain to handle.
+    */
+   for (i = 0; i < 8; i++) {
+      if (c.key.point_coord_replace & (1 << i))
+	 c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
+   }
+
+   if (0)
+      _mesa_print_program(&c.vp->program.Base);
+
+
+
+   /* Emit GEN4 code.
+    */
+   brw_vs_emit(&c);
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /* We upload from &c.prog_data including the constant_map assuming
+    * they're packed together.  It would be nice to have a
+    * compile-time assert macro here.
+    */
+   assert(c.constant_map == (int8_t *)&c.prog_data +
+	  sizeof(c.prog_data));
+   assert(ctx->Const.VertexProgram.MaxNativeParameters ==
+	  ARRAY_SIZE(c.constant_map));
+
+   aux_size = sizeof(c.prog_data);
+   if (c.vp->use_const_buffer)
+      aux_size += c.vp->program.Base.Parameters->NumParameters;
+
+   drm_intel_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG,
+						   &c.key, sizeof(c.key),
+						   NULL, 0,
+						   program, program_size,
+						   &c.prog_data,
+						   aux_size,
+						   &brw->vs.prog_data);
+}
+
+
+static void brw_upload_vs_prog(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_vs_prog_key key;
+   struct brw_vertex_program *vp = 
+      (struct brw_vertex_program *)brw->vertex_program;
+   int i;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Just upload the program verbatim for now.  Always send it all
+    * the inputs it asks for, whether they are varying or not.
+    */
+   key.program_string_id = vp->id;
+   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+   key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
+			ctx->Polygon.BackMode != GL_FILL);
+
+   /* _NEW_POINT */
+   if (ctx->Point.PointSprite) {
+      for (i = 0; i < 8; i++) {
+	 if (ctx->Point.CoordReplace[i])
+	    key.point_coord_replace |= (1 << i);
+      }
+   }
+
+   /* Make an early check for the key.
+    */
+   drm_intel_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->vs.prog_data);
+   if (brw->vs.prog_bo == NULL)
+      do_vs_prog(brw, vp, &key);
+   brw->vs.constant_map = ((int8_t *)brw->vs.prog_data +
+			   sizeof(*brw->vs.prog_data));
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT,
+      .brw   = BRW_NEW_VERTEX_PROGRAM,
+      .cache = 0
+   },
+   .prepare = brw_upload_vs_prog
+};
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
new file mode 100644
index 0000000000..6493744f3e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "shader/program.h"
+
+
+struct brw_vs_prog_key {
+   GLuint program_string_id;
+   GLuint nr_userclip:4;
+   GLuint copy_edgeflag:1;
+   GLuint point_coord_replace:8;
+};
+
+
+struct brw_vs_compile {
+   struct brw_compile func;
+   struct brw_vs_prog_key key;
+   struct brw_vs_prog_data prog_data;
+   int8_t constant_map[1024];
+
+   struct brw_vertex_program *vp;
+
+   GLuint nr_inputs;
+
+   GLuint first_output;
+   GLuint nr_outputs;
+   GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+
+   GLuint first_tmp;
+   GLuint last_tmp;
+
+   struct brw_reg r0;
+   struct brw_reg r1;
+   struct brw_reg regs[PROGRAM_ADDRESS+1][128];
+   struct brw_reg tmp;
+   struct brw_reg stack;
+
+   struct {	
+       GLboolean used_in_src;
+       struct brw_reg reg;
+   } output_regs[128];
+
+   struct brw_reg userplane[6];
+
+   /** we may need up to 3 constants per instruction (if use_const_buffer) */
+   struct {
+      GLint index;
+      struct brw_reg reg;
+   } current_const[3];
+
+   GLboolean needs_stack;
+};
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c
new file mode 100644
index 0000000000..249a800bf4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c
@@ -0,0 +1,246 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+
+/* Component is active if it may diverge from [0,0,0,1].  Undef values
+ * are promoted to [0,0,0,1] for the purposes of this analysis.
+ */
+struct tracker {
+   GLboolean twoside;
+   GLubyte active[PROGRAM_OUTPUT+1][MAX_PROGRAM_TEMPS];
+   GLbitfield size_masks[4];  /**< one bit per fragment program input attrib */
+};
+
+
+static void set_active_component( struct tracker *t,
+				  GLuint file,
+				  GLuint index,
+				  GLubyte active )
+{
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+      assert(file < PROGRAM_OUTPUT + 1);
+      assert(index < Elements(t->active[0]));
+      t->active[file][index] |= active;
+      break;
+   default:
+      break;
+   }
+}
+
+static void set_active( struct tracker *t,
+			struct prog_dst_register dst,
+			GLuint active )
+{
+   set_active_component( t, dst.File, dst.Index, active & dst.WriteMask );
+}
+
+
+static GLubyte get_active_component( struct tracker *t,
+				     GLuint file,
+				     GLuint index,
+				     GLuint component,
+				     GLubyte swz )
+{
+   switch (swz) {
+   case SWIZZLE_ZERO:
+      return component < 3 ? 0 : (1<<component);
+   case SWIZZLE_ONE:
+      return component == 3 ? 0 : (1<<component);
+   default:
+      switch (file) {
+      case PROGRAM_TEMPORARY:
+      case PROGRAM_INPUT:
+      case PROGRAM_OUTPUT:
+	 return t->active[file][index] & (1<<component);
+      default:
+	 return 1 << component;
+      }
+   }
+}
+
+
+static GLubyte get_active( struct tracker *t,
+			   struct prog_src_register src )
+{
+   GLuint i;
+   GLubyte active = src.Negate; /* NOTE! */
+
+   if (src.RelAddr)
+      return 0xf;
+
+   for (i = 0; i < 4; i++) 
+      active |= get_active_component(t, src.File, src.Index, i,
+				     GET_SWZ(src.Swizzle, i));
+
+   return active;
+}
+
+/**
+ * Return the size (1,2,3 or 4) of the output/result for VERT_RESULT_idx.
+ */
+static GLubyte get_output_size( struct tracker *t,
+				GLuint idx )
+{
+   GLubyte active;
+   assert(idx < VERT_RESULT_MAX);
+   active = t->active[PROGRAM_OUTPUT][idx];
+   if (active & (1<<3)) return 4;
+   if (active & (1<<2)) return 3;
+   if (active & (1<<1)) return 2;
+   if (active & (1<<0)) return 1;
+   return 0;
+}
+
+/* Note the potential copying that occurs in the setup program:
+ */
+static void calc_sizes( struct tracker *t )
+{
+   GLint vertRes;
+
+   if (t->twoside) {
+      t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= 
+	 t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC0];
+
+      t->active[PROGRAM_OUTPUT][VERT_RESULT_COL1] |= 
+	 t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1];
+   }
+
+   /* Examine vertex program output sizes to set the size_masks[] info
+    * which describes the fragment program input sizes.
+    */
+   for (vertRes = VERT_RESULT_TEX0; vertRes < VERT_RESULT_MAX; vertRes++) {
+      GLint fragAttrib;
+
+      /* map vertex program output index to fragment program input index */
+      if (vertRes <= VERT_RESULT_TEX7)
+         fragAttrib = FRAG_ATTRIB_TEX0 + vertRes - VERT_RESULT_TEX0;
+      else if (vertRes >= VERT_RESULT_VAR0)
+         fragAttrib = FRAG_ATTRIB_VAR0 + vertRes - VERT_RESULT_VAR0;
+      else
+         continue;
+      assert(fragAttrib >= FRAG_ATTRIB_TEX0);
+      assert(fragAttrib <= FRAG_ATTRIB_MAX);
+
+      switch (get_output_size(t, vertRes)) {
+      case 4: t->size_masks[4-1] |= 1 << fragAttrib;
+      case 3: t->size_masks[3-1] |= 1 << fragAttrib;
+      case 2: t->size_masks[2-1] |= 1 << fragAttrib;
+      case 1: t->size_masks[1-1] |= 1 << fragAttrib;
+	 break;
+      }
+   }
+}
+
+static GLubyte szflag[4+1] = {
+   0,
+   0x1,
+   0x3,
+   0x7,
+   0xf
+};
+
+/* Pull a size out of the packed array:
+ */
+static GLuint get_input_size(struct brw_context *brw,
+			     GLuint attr)
+{
+   GLuint sizes_dword = brw->vb.info.sizes[attr/16];
+   GLuint sizes_bits = (sizes_dword>>((attr%16)*2)) & 0x3;
+   return sizes_bits + 1;
+/*    return brw->vb.inputs[attr].glarray->Size; */
+}
+
+/* Calculate sizes of vertex program outputs.  Size is the largest
+ * component index which might vary from [0,0,0,1]
+ */
+static void calc_wm_input_sizes( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct brw_vertex_program *vp =
+      brw_vertex_program_const(brw->vertex_program);
+   /* BRW_NEW_INPUT_DIMENSIONS */
+   struct tracker t;
+   GLuint insn;
+   GLuint i;
+
+   memset(&t, 0, sizeof(t));
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.Model.TwoSide)
+      t.twoside = 1;
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) 
+      if (vp->program.Base.InputsRead & (1<<i))
+	 set_active_component(&t, PROGRAM_INPUT, i, 
+			      szflag[get_input_size(brw, i)]);
+      
+   for (insn = 0; insn < vp->program.Base.NumInstructions; insn++) {
+      struct prog_instruction *inst = &vp->program.Base.Instructions[insn];
+      
+      switch (inst->Opcode) {
+      case OPCODE_ARL:
+	 break;
+
+      case OPCODE_MOV:
+	 set_active(&t, inst->DstReg, get_active(&t, inst->SrcReg[0]));
+	 break;
+
+      default:
+	 set_active(&t, inst->DstReg, 0xf);
+	 break;
+      }
+   }
+
+   calc_sizes(&t);
+
+   if (memcmp(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks)) != 0) {
+      memcpy(brw->wm.input_size_masks, t.size_masks, sizeof(t.size_masks));
+      brw->state.dirty.brw |= BRW_NEW_WM_INPUT_DIMENSIONS;
+   }
+}
+
+const struct brw_tracked_state brw_wm_input_sizes = {
+   .dirty = {
+      .mesa  = _NEW_LIGHT,
+      .brw   = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_INPUT_DIMENSIONS,
+      .cache = 0
+   },
+   .prepare = calc_wm_input_sizes
+};
+
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
new file mode 100644
index 0000000000..0b44deeb63
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -0,0 +1,1888 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#include "main/macros.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+
+/* Return the SrcReg index of the channels that can be immediate float operands
+ * instead of usage of PROGRAM_CONSTANT values through push/pull.
+ */
+static GLboolean
+brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg)
+{
+   int opcode_array[] = {
+      [OPCODE_ADD] = 2,
+      [OPCODE_CMP] = 3,
+      [OPCODE_DP3] = 2,
+      [OPCODE_DP4] = 2,
+      [OPCODE_DPH] = 2,
+      [OPCODE_MAX] = 2,
+      [OPCODE_MIN] = 2,
+      [OPCODE_MUL] = 2,
+      [OPCODE_SEQ] = 2,
+      [OPCODE_SGE] = 2,
+      [OPCODE_SGT] = 2,
+      [OPCODE_SLE] = 2,
+      [OPCODE_SLT] = 2,
+      [OPCODE_SNE] = 2,
+      [OPCODE_XPD] = 2,
+   };
+
+   /* These opcodes get broken down in a way that allow two
+    * args to be immediates.
+    */
+   if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) {
+      if (arg == 1 || arg == 2)
+	 return GL_TRUE;
+   }
+
+   if (opcode > ARRAY_SIZE(opcode_array))
+      return GL_FALSE;
+
+   return arg == opcode_array[opcode] - 1;
+}
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+   struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+
+   return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+			       
+static void release_tmps( struct brw_vs_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible.  Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c )
+{
+   struct intel_context *intel = &c->func.brw->intel;
+   GLuint i, reg = 0, mrf;
+   int attributes_in_vue;
+
+   /* Determine whether to use a real constant buffer or use a block
+    * of GRF registers for constants.  The later is faster but only
+    * works if everything fits in the GRF.
+    * XXX this heuristic/check may need some fine tuning...
+    */
+   if (c->vp->program.Base.Parameters->NumParameters +
+       c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+      c->vp->use_const_buffer = GL_TRUE;
+   else
+      c->vp->use_const_buffer = GL_FALSE;
+
+   /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
+
+   /* r0 -- reserved as usual
+    */
+   c->r0 = brw_vec8_grf(reg, 0);
+   reg++;
+
+   /* User clip planes from curbe: 
+    */
+   if (c->key.nr_userclip) {
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+      }     
+
+      /* Deal with curbe alignment:
+       */
+      reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+   }
+
+   /* Vertex program parameters from curbe:
+    */
+   if (c->vp->use_const_buffer) {
+      int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
+      int constant = 0;
+
+      /* We've got more constants than we can load with the push
+       * mechanism.  This is often correlated with reladdr loads where
+       * we should probably be using a pull mechanism anyway to avoid
+       * excessive reading.  However, the pull mechanism is slow in
+       * general.  So, we try to allocate as many non-reladdr-loaded
+       * constants through the push buffer as we can before giving up.
+       */
+      memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters);
+      for (i = 0;
+	   i < c->vp->program.Base.NumInstructions && constant < max_constant;
+	   i++) {
+	 struct prog_instruction *inst = &c->vp->program.Base.Instructions[i];
+	 int arg;
+
+	 for (arg = 0; arg < 3 && constant < max_constant; arg++) {
+	    if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR &&
+		 inst->SrcReg[arg].File != PROGRAM_CONSTANT &&
+		 inst->SrcReg[arg].File != PROGRAM_UNIFORM &&
+		 inst->SrcReg[arg].File != PROGRAM_ENV_PARAM &&
+		 inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) ||
+		inst->SrcReg[arg].RelAddr)
+	       continue;
+
+	    if (c->constant_map[inst->SrcReg[arg].Index] == -1) {
+	       c->constant_map[inst->SrcReg[arg].Index] = constant++;
+	    }
+	 }
+      }
+
+      for (i = 0; i < constant; i++) {
+         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2,
+							      (i%2) * 4),
+						 0, 4, 1);
+      }
+      reg += (constant + 1) / 2;
+      c->prog_data.curb_read_length = reg - 1;
+      /* XXX 0 causes a bug elsewhere... */
+      c->prog_data.nr_params = MAX2(constant * 4, 4);
+   }
+   else {
+      /* use a section of the GRF for constants */
+      GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+      for (i = 0; i < nr_params; i++) {
+         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+      }
+      reg += (nr_params + 1) / 2;
+      c->prog_data.curb_read_length = reg - 1;
+
+      c->prog_data.nr_params = nr_params * 4;
+   }
+
+   /* Allocate input regs:  
+    */
+   c->nr_inputs = 0;
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (c->prog_data.inputs_read & (1 << i)) {
+	 c->nr_inputs++;
+	 c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
+	 reg++;
+      }
+   }
+   /* If there are no inputs, we'll still be reading one attribute's worth
+    * because it's required -- see urb_read_length setting.
+    */
+   if (c->nr_inputs == 0)
+      reg++;
+
+   /* Allocate outputs.  The non-position outputs go straight into message regs.
+    */
+   c->nr_outputs = 0;
+   c->first_output = reg;
+   c->first_overflow_output = 0;
+
+   if (intel->gen >= 6)
+      mrf = 6;
+   else if (intel->gen == 5)
+      mrf = 8;
+   else
+      mrf = 4;
+
+   for (i = 0; i < VERT_RESULT_MAX; i++) {
+      if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
+	 c->nr_outputs++;
+         assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
+	 if (i == VERT_RESULT_HPOS) {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	    reg++;
+	 }
+	 else if (i == VERT_RESULT_PSIZ) {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	    reg++;
+	    mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
+	 }
+	 else {
+            if (mrf < 16) {
+               c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+               mrf++;
+            }
+            else {
+               /* too many vertex results to fit in MRF, use GRF for overflow */
+               if (!c->first_overflow_output)
+                  c->first_overflow_output = i;
+               c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+               reg++;
+            }
+	 }
+      }
+   }     
+
+   /* Allocate program temporaries:
+    */
+   for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) {
+      c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+      reg++;
+   }
+
+   /* Address reg(s).  Don't try to use the internal address reg until
+    * deref time.
+    */
+   for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) {
+      c->regs[PROGRAM_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE,
+					     reg,
+					     0,
+					     BRW_REGISTER_TYPE_D,
+					     BRW_VERTICAL_STRIDE_8,
+					     BRW_WIDTH_8,
+					     BRW_HORIZONTAL_STRIDE_1,
+					     BRW_SWIZZLE_XXXX,
+					     WRITEMASK_X);
+      reg++;
+   }
+
+   if (c->vp->use_const_buffer) {
+      for (i = 0; i < 3; i++) {
+         c->current_const[i].index = -1;
+         c->current_const[i].reg = brw_vec8_grf(reg, 0);
+         reg++;
+      }
+   }
+
+   for (i = 0; i < 128; i++) {
+      if (c->output_regs[i].used_in_src) {
+         c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+         reg++;
+      }
+   }
+
+   if (c->needs_stack) {
+      c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+      reg += 2;
+   }
+
+   /* Some opcodes need an internal temporary:
+    */
+   c->first_tmp = reg;
+   c->last_tmp = reg;		/* for allocation purposes */
+
+   /* Each input reg holds data from two vertices.  The
+    * urb_read_length is the number of registers read from *each*
+    * vertex urb, so is half the amount:
+    */
+   c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+   /* Setting this field to 0 leads to undefined behavior according to the
+    * the VS_STATE docs.  Our VUEs will always have at least one attribute
+    * sitting in them, even if it's padding.
+    */
+   if (c->prog_data.urb_read_length == 0)
+      c->prog_data.urb_read_length = 1;
+
+   /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+    * them to fit the biggest thing they need to.
+    */
+   attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+
+   if (intel->gen >= 6)
+      c->prog_data.urb_entry_size = (attributes_in_vue + 4 + 7) / 8;
+   else if (intel->gen == 5)
+      c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+   else
+      c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+
+   c->prog_data.total_grf = reg;
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+      printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+      printf("%s reg = %d\n", __FUNCTION__, reg);
+   }
+}
+
+
+/**
+ * If an instruction uses a temp reg both as a src and the dest, we
+ * sometimes need to allocate an intermediate temporary.
+ */
+static void unalias1( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if (dst.file == arg0.file && dst.nr == arg0.nr) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0);
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+   else {
+      func(c, dst, arg0);
+   }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 2-operand instruction needs an intermediate temporary.
+ */
+static void unalias2( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+       (dst.file == arg1.file && dst.nr == arg1.nr)) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0, arg1);
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+   else {
+      func(c, dst, arg0, arg1);
+   }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 3-operand instruction needs an intermediate temporary.
+ */
+static void unalias3( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1,
+		      struct brw_reg arg2,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+       (dst.file == arg1.file && dst.nr == arg1.nr) ||
+       (dst.file == arg2.file && dst.nr == arg2.nr)) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0, arg1, arg2);
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+   else {
+      func(c, dst, arg0, arg1, arg2);
+   }
+}
+
+static void emit_sop( struct brw_vs_compile *c,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1, 
+		      GLuint cond)
+{
+   struct brw_compile *p = &c->func;
+
+   brw_MOV(p, dst, brw_imm_f(0.0f));
+   brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+   brw_MOV(p, dst, brw_imm_f(1.0f));
+   brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_seq( struct brw_vs_compile *c,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1 )
+{
+   emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne( struct brw_vs_compile *c,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1 )
+{
+   emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
+static void emit_slt( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+  emit_sop(c, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_cmp( struct brw_compile *p,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1,
+		      struct brw_reg arg2 )
+{
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, brw_imm_f(0));
+   brw_SEL(p, dst, arg1, arg2);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_max( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1);
+   brw_SEL(p, dst, arg0, arg1);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+   brw_SEL(p, dst, arg0, arg1);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+			GLuint function,
+			struct brw_reg dst,
+			struct brw_reg arg0,
+			GLuint precision)
+{
+   /* There are various odd behaviours with SEND on the simulator.  In
+    * addition there are documented issues with the fact that the GEN4
+    * processor doesn't do dependency control properly on SEND
+    * results.  So, on balance, this kludge to get around failures
+    * with writemasked math results looks like it might be necessary
+    * whether that turns out to be a simulator bug or not:
+    */
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (intel->gen < 6 &&
+			 (dst.dw1.bits.writemask != 0xf ||
+			  dst.file != BRW_GENERAL_REGISTER_FILE));
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_math(p, 
+	    tmp,
+	    function,
+	    BRW_MATH_SATURATE_NONE,
+	    2,
+	    arg0,
+	    BRW_MATH_DATA_SCALAR,
+	    precision);
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+static void emit_math2( struct brw_vs_compile *c, 
+			GLuint function,
+			struct brw_reg dst,
+			struct brw_reg arg0,
+			struct brw_reg arg1,
+			GLuint precision)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (intel->gen < 6 &&
+			 (dst.dw1.bits.writemask != 0xf ||
+			  dst.file != BRW_GENERAL_REGISTER_FILE));
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_MOV(p, brw_message_reg(3), arg1);
+   
+   brw_math(p, 
+	    tmp,
+	    function,
+	    BRW_MATH_SATURATE_NONE,
+	    2,
+ 	    arg0,
+	    BRW_MATH_DATA_SCALAR,
+	    precision);
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   
+
+   if (dst.dw1.bits.writemask & WRITEMASK_X) {
+      struct brw_reg tmp = get_tmp(c);
+      struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+      /* tmp_d = floor(arg0.x) */
+      brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+      /* result[0] = 2.0 ^ tmp */
+
+      /* Adjust exponent for floating point: 
+       * exp += 127 
+       */
+      brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+      /* Install exponent and sign.  
+       * Excess drops off the edge: 
+       */
+      brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), 
+	      tmp_d, brw_imm_d(23));
+
+      release_tmp(c, tmp);
+   }
+
+   if (dst.dw1.bits.writemask & WRITEMASK_Y) {
+      /* result[1] = arg0.x - floor(arg0.x) */
+      brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0));
+   }
+   
+   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+      /* As with the LOG instruction, we might be better off just
+       * doing a taylor expansion here, seeing as we have to do all
+       * the prep work.
+       *
+       * If mathbox partial precision is too low, consider also:
+       * result[3] = result[0] * EXP(result[1])
+       */
+      emit_math1(c, 
+		 BRW_MATH_FUNCTION_EXP, 
+		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_swizzle1(arg0, 0), 
+		 BRW_MATH_PRECISION_FULL);
+   }  
+
+   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+      /* result[3] = 1.0; */
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1));
+   }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+   struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) {
+      tmp = get_tmp(c);
+      tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+   }
+   
+   /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+    * according to spec:
+    *
+    * These almost look likey they could be joined up, but not really
+    * practical:
+    *
+    * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+    * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
+    */
+   if (dst.dw1.bits.writemask & WRITEMASK_XZ) {
+      brw_AND(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_X),
+	      brw_swizzle1(arg0_ud, 0),
+	      brw_imm_ud((1U<<31)-1));
+
+      brw_SHR(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_X), 
+	      tmp_ud,
+	      brw_imm_ud(23));
+
+      brw_ADD(p, 
+	      brw_writemask(tmp, WRITEMASK_X), 
+	      retype(tmp_ud, BRW_REGISTER_TYPE_D),	/* does it matter? */
+	      brw_imm_d(-127));
+   }
+
+   if (dst.dw1.bits.writemask & WRITEMASK_YZ) {
+      brw_AND(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_Y),
+	      brw_swizzle1(arg0_ud, 0),
+	      brw_imm_ud((1<<23)-1));
+
+      brw_OR(p, 
+	     brw_writemask(tmp_ud, WRITEMASK_Y), 
+	     tmp_ud,
+	     brw_imm_ud(127<<23));
+   }
+   
+   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+      /* result[2] = result[0] + LOG2(result[1]); */
+
+      /* Why bother?  The above is just a hint how to do this with a
+       * taylor series.  Maybe we *should* use a taylor series as by
+       * the time all the above has been done it's almost certainly
+       * quicker than calling the mathbox, even with low precision.
+       * 
+       * Options are:
+       *    - result[0] + mathbox.LOG2(result[1])
+       *    - mathbox.LOG2(arg0.x)
+       *    - result[0] + inline_taylor_approx(result[1])
+       */
+      emit_math1(c, 
+		 BRW_MATH_FUNCTION_LOG, 
+		 brw_writemask(tmp, WRITEMASK_Z), 
+		 brw_swizzle1(tmp, 1), 
+		 BRW_MATH_PRECISION_FULL);
+      
+      brw_ADD(p, 
+	      brw_writemask(tmp, WRITEMASK_Z), 
+	      brw_swizzle1(tmp, 2), 
+	      brw_swizzle1(tmp, 0));
+   }  
+
+   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+      /* result[3] = 1.0; */
+      brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1));
+   }
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+/* Need to unalias - consider swizzles:   r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c, 
+			      struct brw_reg dst,
+			      struct brw_reg arg0,
+			      struct brw_reg arg1)
+{
+   struct brw_compile *p = &c->func;
+
+   /* There must be a better way to do this: 
+    */
+   if (dst.dw1.bits.writemask & WRITEMASK_X)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0));
+   if (dst.dw1.bits.writemask & WRITEMASK_Y)
+      brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1);
+   if (dst.dw1.bits.writemask & WRITEMASK_Z)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0);
+   if (dst.dw1.bits.writemask & WRITEMASK_W)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+		      struct brw_reg dst,
+		      struct brw_reg t,
+		      struct brw_reg u)
+{
+   brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3),  brw_swizzle(u,2,0,1,3));
+   brw_MAC(p, dst,     negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c, 
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+   
+   brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); 
+   brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); 
+
+   /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+    * to get all channels active inside the IF.  In the clipping code
+    * we run with NoMask, so it's not an option and we can use
+    * BRW_EXECUTE_1 for all comparisions.
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+   if_insn = brw_IF(p, BRW_EXECUTE_8);
+   {
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+      brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z),  brw_swizzle1(arg0,1));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+      emit_math2(c, 
+		 BRW_MATH_FUNCTION_POW, 
+		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_swizzle1(tmp, 2),
+		 brw_swizzle1(arg0, 3),
+		 BRW_MATH_PRECISION_PARTIAL);      
+   }
+
+   brw_ENDIF(p, if_insn);
+
+   release_tmp(c, tmp);
+}
+
+static void emit_lrp_noalias(struct brw_vs_compile *c,
+			     struct brw_reg dst,
+			     struct brw_reg arg0,
+			     struct brw_reg arg1,
+			     struct brw_reg arg2)
+{
+   struct brw_compile *p = &c->func;
+
+   brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0));
+   brw_MUL(p, brw_null_reg(), dst, arg2);
+   brw_MAC(p, dst, arg0, arg1);
+}
+
+/** 3 or 4-component vector normalization */
+static void emit_nrm( struct brw_vs_compile *c, 
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      int num_comps)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+
+   /* tmp = dot(arg0, arg0) */
+   if (num_comps == 3)
+      brw_DP3(p, tmp, arg0, arg0);
+   else
+      brw_DP4(p, tmp, arg0, arg0);
+
+   /* tmp = 1 / sqrt(tmp) */
+   emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
+
+   /* dst = arg0 * tmp */
+   brw_MUL(p, dst, arg0, tmp);
+
+   release_tmp(c, tmp);
+}
+
+
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+             const struct prog_instruction *inst,
+             GLuint argIndex)
+{
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   struct brw_compile *p = &c->func;
+   struct brw_reg const_reg = c->current_const[argIndex].reg;
+
+   assert(argIndex < 3);
+
+   if (c->current_const[argIndex].index != src->Index) {
+      struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+      /* Keep track of the last constant loaded in this slot, for reuse. */
+      c->current_const[argIndex].index = src->Index;
+
+#if 0
+      printf("  fetch const[%d] for arg %d into reg %d\n",
+             src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+      /* need to fetch the constant now */
+      brw_dp_READ_4_vs(p,
+                       const_reg,                     /* writeback dest */
+                       0,                             /* oword */
+                       0,                             /* relative indexing? */
+                       addrReg,                       /* address register */
+                       16 * src->Index,               /* byte offset */
+                       SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
+                       );
+   }
+
+   /* replicate lower four floats into upper half (to get XYZWXYZW) */
+   const_reg = stride(const_reg, 0, 4, 0);
+   const_reg.subnr = 0;
+
+   return const_reg;
+}
+
+static struct brw_reg
+get_reladdr_constant(struct brw_vs_compile *c,
+		     const struct prog_instruction *inst,
+		     GLuint argIndex)
+{
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   struct brw_compile *p = &c->func;
+   struct brw_reg const_reg = c->current_const[argIndex].reg;
+   struct brw_reg const2_reg;
+   struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+   assert(argIndex < 3);
+
+   /* Can't reuse a reladdr constant load. */
+   c->current_const[argIndex].index = -1;
+
+ #if 0
+   printf("  fetch const[a0.x+%d] for arg %d into reg %d\n",
+	  src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+
+   /* fetch the first vec4 */
+   brw_dp_READ_4_vs(p,
+		    const_reg,                     /* writeback dest */
+		    0,                             /* oword */
+		    1,                             /* relative indexing? */
+		    addrReg,                       /* address register */
+		    16 * src->Index,               /* byte offset */
+		    SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
+		    );
+   /* second vec4 */
+   const2_reg = get_tmp(c);
+
+   /* use upper half of address reg for second read */
+   addrReg = stride(addrReg, 0, 4, 0);
+   addrReg.subnr = 16;
+
+   brw_dp_READ_4_vs(p,
+		    const2_reg,              /* writeback dest */
+		    1,                       /* oword */
+		    1,                       /* relative indexing? */
+		    addrReg,                 /* address register */
+		    16 * src->Index,         /* byte offset */
+		    SURF_INDEX_VERT_CONST_BUFFER
+		    );
+
+   /* merge the two Owords into the constant register */
+   /* const_reg[7..4] = const2_reg[7..4] */
+   brw_MOV(p,
+	   suboffset(stride(const_reg, 0, 4, 1), 4),
+	   suboffset(stride(const2_reg, 0, 4, 1), 4));
+   release_tmp(c, const2_reg);
+
+   return const_reg;
+}
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+			       gl_register_file file,
+			       GLuint index )
+{
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+      assert(c->regs[file][index].nr != 0);
+      return c->regs[file][index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_UNIFORM:
+      assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+      return c->regs[PROGRAM_STATE_VAR][index];
+   case PROGRAM_ADDRESS:
+      assert(index == 0);
+      return c->regs[file][index];
+
+   case PROGRAM_UNDEFINED:			/* undef values */
+      return brw_null_reg();
+
+   case PROGRAM_LOCAL_PARAM: 
+   case PROGRAM_ENV_PARAM: 
+   case PROGRAM_WRITE_ONLY:
+   default:
+      assert(0);
+      return brw_null_reg();
+   }
+}
+
+
+/**
+ * Indirect addressing:  get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+			     struct brw_reg arg,
+			     GLint offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = vec4(get_tmp(c));
+   struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+   struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+   GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+   struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+   {
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
+
+      /* This is pretty clunky - load the address register twice and
+       * fetch each 4-dword value in turn.  There must be a way to do
+       * this in a single pass, but I couldn't get it to work.
+       */
+      brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+      brw_MOV(p, tmp, indirect);
+
+      brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+      brw_MOV(p, suboffset(tmp, 4), indirect);
+
+      brw_pop_insn_state(p);
+   }
+   
+   /* NOTE: tmp not released */
+   return vec8(tmp);
+}
+
+
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+             const struct prog_instruction *inst,
+             GLuint argIndex )
+{
+   const GLuint file = inst->SrcReg[argIndex].File;
+   const GLint index = inst->SrcReg[argIndex].Index;
+   const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+
+   if (brw_vs_arg_can_be_immediate(inst->Opcode, argIndex)) {
+      const struct prog_src_register *src = &inst->SrcReg[argIndex];
+
+      if (src->Swizzle == MAKE_SWIZZLE4(SWIZZLE_ZERO,
+					SWIZZLE_ZERO,
+					SWIZZLE_ZERO,
+					SWIZZLE_ZERO)) {
+	  return brw_imm_f(0.0f);
+      } else if (src->Swizzle == MAKE_SWIZZLE4(SWIZZLE_ONE,
+					       SWIZZLE_ONE,
+					       SWIZZLE_ONE,
+					       SWIZZLE_ONE)) {
+	 if (src->Negate)
+	    return brw_imm_f(-1.0F);
+	 else
+	    return brw_imm_f(1.0F);
+      } else if (src->File == PROGRAM_CONSTANT) {
+	 const struct gl_program_parameter_list *params;
+	 float f;
+	 int component = -1;
+
+	 switch (src->Swizzle) {
+	 case SWIZZLE_XXXX:
+	    component = 0;
+	    break;
+	 case SWIZZLE_YYYY:
+	    component = 1;
+	    break;
+	 case SWIZZLE_ZZZZ:
+	    component = 2;
+	    break;
+	 case SWIZZLE_WWWW:
+	    component = 3;
+	    break;
+	 }
+
+	 if (component >= 0) {
+	    params = c->vp->program.Base.Parameters;
+	    f = params->ParameterValues[src->Index][component];
+
+	    if (src->Abs)
+	       f = fabs(f);
+	    if (src->Negate)
+	       f = -f;
+	    return brw_imm_f(f);
+	 }
+      }
+   }
+
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+      if (relAddr) {
+         return deref(c, c->regs[file][0], index);
+      }
+      else {
+         assert(c->regs[file][index].nr != 0);
+         return c->regs[file][index];
+      }
+
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_UNIFORM:
+   case PROGRAM_ENV_PARAM:
+   case PROGRAM_LOCAL_PARAM:
+      if (c->vp->use_const_buffer) {
+	 if (!relAddr && c->constant_map[index] != -1) {
+	    assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0);
+	    return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]];
+	 } else if (relAddr)
+	    return get_reladdr_constant(c, inst, argIndex);
+	 else
+	    return get_constant(c, inst, argIndex);
+      }
+      else if (relAddr) {
+         return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+      }
+      else {
+         assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+         return c->regs[PROGRAM_STATE_VAR][index];
+      }
+   case PROGRAM_ADDRESS:
+      assert(index == 0);
+      return c->regs[file][index];
+
+   case PROGRAM_UNDEFINED:
+      /* this is a normal case since we loop over all three src args */
+      return brw_null_reg();
+
+   case PROGRAM_WRITE_ONLY:
+   default:
+      assert(0);
+      return brw_null_reg();
+   }
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+   
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_RNDD(p, tmp, arg0);               /* tmp = round(arg0) */
+   brw_MUL(p, dst, tmp, brw_imm_d(16));  /* dst = tmp * 16 */
+
+   if (need_tmp)
+      release_tmp(c, tmp);
+}
+
+
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op.  The emit_swz() function
+ * ignores this result and recalculates taking extended swizzles into
+ * account.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+                               const struct prog_instruction *inst,
+                               GLuint argIndex )
+{
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   struct brw_reg reg;
+
+   if (src->File == PROGRAM_UNDEFINED)
+      return brw_null_reg();
+
+   reg = get_src_reg(c, inst, argIndex);
+
+   /* Convert 3-bit swizzle to 2-bit.  
+    */
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+				       GET_SWZ(src->Swizzle, 1),
+				       GET_SWZ(src->Swizzle, 2),
+				       GET_SWZ(src->Swizzle, 3));
+
+   /* Note this is ok for non-swizzle instructions: 
+    */
+   reg.negate = src->Negate ? 1 : 0;   
+
+   return reg;
+}
+
+
+/**
+ * Get brw register for the given program dest register.
+ */
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+			       struct prog_dst_register dst )
+{
+   struct brw_reg reg;
+
+   switch (dst.File) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_OUTPUT:
+      assert(c->regs[dst.File][dst.Index].nr != 0);
+      reg = c->regs[dst.File][dst.Index];
+      break;
+   case PROGRAM_ADDRESS:
+      assert(dst.Index == 0);
+      reg = c->regs[dst.File][dst.Index];
+      break;
+   case PROGRAM_UNDEFINED:
+      /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+      reg = brw_null_reg();
+      break;
+   default:
+      assert(0);
+      reg = brw_null_reg();
+   }
+
+   reg.dw1.bits.writemask = dst.WriteMask;
+
+   return reg;
+}
+
+
+static void emit_swz( struct brw_vs_compile *c, 
+		      struct brw_reg dst,
+                      const struct prog_instruction *inst)
+{
+   const GLuint argIndex = 0;
+   const struct prog_src_register src = inst->SrcReg[argIndex];
+   struct brw_compile *p = &c->func;
+   GLuint zeros_mask = 0;
+   GLuint ones_mask = 0;
+   GLuint src_mask = 0;
+   GLubyte src_swz[4];
+   GLboolean need_tmp = (src.Negate &&
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+   struct brw_reg tmp = dst;
+   GLuint i;
+
+   if (need_tmp)
+      tmp = get_tmp(c);
+
+   for (i = 0; i < 4; i++) {
+      if (dst.dw1.bits.writemask & (1<<i)) {
+	 GLubyte s = GET_SWZ(src.Swizzle, i);
+	 switch (s) {
+	 case SWIZZLE_X:
+	 case SWIZZLE_Y:
+	 case SWIZZLE_Z:
+	 case SWIZZLE_W:
+	    src_mask |= 1<<i;
+	    src_swz[i] = s;
+	    break;
+	 case SWIZZLE_ZERO:
+	    zeros_mask |= 1<<i;
+	    break;
+	 case SWIZZLE_ONE:
+	    ones_mask |= 1<<i;
+	    break;
+	 }
+      }
+   }
+   
+   /* Do src first, in case dst aliases src:
+    */
+   if (src_mask) {
+      struct brw_reg arg0;
+
+      arg0 = get_src_reg(c, inst, argIndex);
+
+      arg0 = brw_swizzle(arg0, 
+			 src_swz[0], src_swz[1], 
+			 src_swz[2], src_swz[3]);
+
+      brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
+   } 
+   
+   if (zeros_mask) 
+      brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
+
+   if (ones_mask) 
+      brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
+
+   if (src.Negate)
+      brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
+   
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+/**
+ * Post-vertex-program processing.  Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct intel_context *intel = &brw->intel;
+   struct brw_reg m0 = brw_message_reg(0);
+   struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
+   struct brw_reg ndc;
+   int eot;
+   GLuint len_vertex_header = 2;
+
+   if (c->key.copy_edgeflag) {
+      brw_MOV(p, 
+	      get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE),
+	      get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
+   }
+
+   if (intel->gen < 6) {
+      /* Build ndc coords */
+      ndc = get_tmp(c);
+      /* ndc = 1.0 / pos.w */
+      emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+      /* ndc.xyz = pos * ndc */
+      brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   }
+
+   /* Update the header for point size, user clipping flags, and -ve rhw
+    * workaround.
+    */
+   if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+       c->key.nr_userclip || brw->has_negative_rhw_bug)
+   {
+      struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+      GLuint i;
+
+      brw_MOV(p, header1, brw_imm_ud(0));
+
+      brw_set_access_mode(p, BRW_ALIGN_16);	
+
+      if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+	 struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
+	 brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+	 brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+      }
+
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
+
+      /* i965 clipping workaround: 
+       * 1) Test for -ve rhw
+       * 2) If set, 
+       *      set ndc = (0,0,0,0)
+       *      set ucp[6] = 1
+       *
+       * Later, clipping will detect ucp[6] and ensure the primitive is
+       * clipped against all fixed planes.
+       */
+      if (brw->has_negative_rhw_bug) {
+	 brw_CMP(p,
+		 vec8(brw_null_reg()),
+		 BRW_CONDITIONAL_L,
+		 brw_swizzle1(ndc, 3),
+		 brw_imm_f(0));
+   
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+	 brw_MOV(p, ndc, brw_imm_f(0));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
+
+      brw_set_access_mode(p, BRW_ALIGN_1);	/* why? */
+      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+      brw_set_access_mode(p, BRW_ALIGN_16);
+
+      release_tmp(c, header1);
+   }
+   else {
+      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+   }
+
+   /* Emit the (interleaved) headers for the two vertices - an 8-reg
+    * of zeros followed by two sets of NDC coordinates:
+    */
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   if (intel->gen >= 6) {
+      /* There are 16 DWs (D0-D15) in VUE header on Sandybridge:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the 4D space position
+       * dword 8-15 (m3,m4) of the vertex header is the user clip distance.
+       * m5 is the first vertex data we fill, which is the vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), pos);
+      brw_MOV(p, offset(m0, 5), pos);
+      len_vertex_header = 4;
+   } else if (intel->gen == 5) {
+      /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+       * dword 0-3 (m1) of the header is indices, point width, clip flags.
+       * dword 4-7 (m2) is the ndc position (set above)
+       * dword 8-11 (m3) of the vertex header is the 4D space position
+       * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+       * m6 is a pad so that the vertex element data is aligned
+       * m7 is the first vertex data we fill, which is the vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), ndc);
+      brw_MOV(p, offset(m0, 3), pos);
+      brw_MOV(p, offset(m0, 7), pos);
+      len_vertex_header = 6;
+   } else {
+      /* There are 8 dwords in VUE header pre-Ironlake:
+       * dword 0-3 (m1) is indices, point width, clip flags.
+       * dword 4-7 (m2) is ndc position (set above)
+       *
+       * dword 8-11 (m3) is the first vertex data, which we always have be the
+       * vertex position.
+       */
+      brw_MOV(p, offset(m0, 2), ndc);
+      brw_MOV(p, offset(m0, 3), pos);
+      len_vertex_header = 2;
+   }
+
+   eot = (c->first_overflow_output == 0);
+
+   brw_urb_WRITE(p, 
+		 brw_null_reg(), /* dest */
+		 0,		/* starting mrf reg nr */
+		 c->r0,		/* src */
+		 0,		/* allocate */
+		 1,		/* used */
+		 MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */
+		 0,		/* response len */
+		 eot, 		/* eot */
+		 eot, 		/* writes complete */
+		 0, 		/* urb destination offset */
+		 BRW_URB_SWIZZLE_INTERLEAVE);
+
+   if (c->first_overflow_output > 0) {
+      /* Not all of the vertex outputs/results fit into the MRF.
+       * Move the overflowed attributes from the GRF to the MRF and
+       * issue another brw_urb_WRITE().
+       */
+      /* XXX I'm not 100% sure about which MRF regs to use here.  Starting
+       * at mrf[4] atm...
+       */
+      GLuint i, mrf = 0;
+      for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
+         if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
+            /* move from GRF to MRF */
+            brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+            mrf++;
+         }
+      }
+
+      brw_urb_WRITE(p,
+                    brw_null_reg(), /* dest */
+                    4,              /* starting mrf reg nr */
+                    c->r0,          /* src */
+                    0,              /* allocate */
+                    1,              /* used */
+                    mrf+1,          /* msg len */
+                    0,              /* response len */
+                    1,              /* eot */
+                    1,              /* writes complete */
+                    BRW_MAX_MRF-1,  /* urb destination offset */
+                    BRW_URB_SWIZZLE_INTERLEAVE);
+   }
+}
+
+static GLboolean
+accumulator_contains(struct brw_vs_compile *c, struct brw_reg val)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *prev_insn = &p->store[p->nr_insn - 1];
+
+   if (p->nr_insn == 0)
+      return GL_FALSE;
+
+   if (val.address_mode != BRW_ADDRESS_DIRECT)
+      return GL_FALSE;
+
+   switch (prev_insn->header.opcode) {
+   case BRW_OPCODE_MOV:
+   case BRW_OPCODE_MAC:
+   case BRW_OPCODE_MUL:
+      if (prev_insn->header.access_mode == BRW_ALIGN_16 &&
+	  prev_insn->header.execution_size == val.width &&
+	  prev_insn->bits1.da1.dest_reg_file == val.file &&
+	  prev_insn->bits1.da1.dest_reg_type == val.type &&
+	  prev_insn->bits1.da1.dest_address_mode == val.address_mode &&
+	  prev_insn->bits1.da1.dest_reg_nr == val.nr &&
+	  prev_insn->bits1.da16.dest_subreg_nr == val.subnr / 16 &&
+	  prev_insn->bits1.da16.dest_writemask == 0xf)
+	 return GL_TRUE;
+      else
+	 return GL_FALSE;
+   default:
+      return GL_FALSE;
+   }
+}
+
+static uint32_t
+get_predicate(const struct prog_instruction *inst)
+{
+   if (inst->DstReg.CondMask == COND_TR)
+      return BRW_PREDICATE_NONE;
+
+   /* All of GLSL only produces predicates for COND_NE and one channel per
+    * vector.  Fail badly if someone starts doing something else, as it might
+    * mean infinite looping or something.
+    *
+    * We'd like to support all the condition codes, but our hardware doesn't
+    * quite match the Mesa IR, which is modeled after the NV extensions.  For
+    * those, the instruction may update the condition codes or not, then any
+    * later instruction may use one of those condition codes.  For gen4, the
+    * instruction may update the flags register based on one of the condition
+    * codes output by the instruction, and then further instructions may
+    * predicate on that.  We can probably support this, but it won't
+    * necessarily be easy.
+    */
+   assert(inst->DstReg.CondMask == COND_NE);
+
+   switch (inst->DstReg.CondSwizzle) {
+   case SWIZZLE_XXXX:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+   case SWIZZLE_YYYY:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+   case SWIZZLE_ZZZZ:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+   case SWIZZLE_WWWW:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+   default:
+      _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
+		    inst->DstReg.CondMask);
+      return BRW_PREDICATE_NORMAL;
+   }
+}
+
+/* Emit the vertex program instructions here.
+ */
+void brw_vs_emit(struct brw_vs_compile *c )
+{
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   struct intel_context *intel = &brw->intel;
+   const GLuint nr_insns = c->vp->program.Base.NumInstructions;
+   GLuint insn, if_depth = 0, loop_depth = 0;
+   struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 };
+   const struct brw_indirect stack_index = brw_indirect(0, 0);   
+   GLuint index;
+   GLuint file;
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      printf("vs-mesa:\n");
+      _mesa_print_program(&c->vp->program.Base); 
+      printf("\n");
+   }
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+
+   for (insn = 0; insn < nr_insns; insn++) {
+       GLuint i;
+       struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+
+       /* Message registers can't be read, so copy the output into GRF
+	* register if they are used in source registers
+	*/
+       for (i = 0; i < 3; i++) {
+	   struct prog_src_register *src = &inst->SrcReg[i];
+	   GLuint index = src->Index;
+	   GLuint file = src->File;	
+	   if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
+	       c->output_regs[index].used_in_src = GL_TRUE;
+       }
+
+       switch (inst->Opcode) {
+       case OPCODE_CAL:
+       case OPCODE_RET:
+	  c->needs_stack = GL_TRUE;
+	  break;
+       default:
+	  break;
+       }
+   }
+
+   /* Static register allocation
+    */
+   brw_vs_alloc_regs(c);
+
+   if (c->needs_stack)
+      brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+   for (insn = 0; insn < nr_insns; insn++) {
+
+      const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+      struct brw_reg args[3], dst;
+      GLuint i;
+      
+#if 0
+      printf("%d: ", insn);
+      _mesa_print_instruction(inst);
+#endif
+
+      /* Get argument regs.  SWZ is special and does this itself.
+       */
+      if (inst->Opcode != OPCODE_SWZ)
+	  for (i = 0; i < 3; i++) {
+	      const struct prog_src_register *src = &inst->SrcReg[i];
+	      index = src->Index;
+	      file = src->File;	
+	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
+		  args[i] = c->output_regs[index].reg;
+	      else
+                  args[i] = get_arg(c, inst, i);
+	  }
+
+      /* Get dest regs.  Note that it is possible for a reg to be both
+       * dst and arg, given the static allocation of registers.  So
+       * care needs to be taken emitting multi-operation instructions.
+       */ 
+      index = inst->DstReg.Index;
+      file = inst->DstReg.File;
+      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
+	  dst = c->output_regs[index].reg;
+      else
+	  dst = get_dst(c, inst->DstReg);
+
+      if (inst->SaturateMode != SATURATE_OFF) {
+	 _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
+                       inst->SaturateMode);
+      }
+
+      switch (inst->Opcode) {
+      case OPCODE_ABS:
+	 brw_MOV(p, dst, brw_abs(args[0]));
+	 break;
+      case OPCODE_ADD:
+	 brw_ADD(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_COS:
+	 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_DP3:
+	 brw_DP3(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DP4:
+	 brw_DP4(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DPH:
+	 brw_DPH(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_NRM3:
+	 emit_nrm(c, dst, args[0], 3);
+	 break;
+      case OPCODE_NRM4:
+	 emit_nrm(c, dst, args[0], 4);
+	 break;
+      case OPCODE_DST:
+	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
+	 break;
+      case OPCODE_EXP:
+	 unalias1(c, dst, args[0], emit_exp_noalias);
+	 break;
+      case OPCODE_EX2:
+	 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_ARL:
+	 emit_arl(c, dst, args[0]);
+	 break;
+      case OPCODE_FLR:
+	 brw_RNDD(p, dst, args[0]);
+	 break;
+      case OPCODE_FRC:
+	 brw_FRC(p, dst, args[0]);
+	 break;
+      case OPCODE_LOG:
+	 unalias1(c, dst, args[0], emit_log_noalias);
+	 break;
+      case OPCODE_LG2:
+	 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_LIT:
+	 unalias1(c, dst, args[0], emit_lit_noalias);
+	 break;
+      case OPCODE_LRP:
+	 unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+	 break;
+      case OPCODE_MAD:
+	 if (!accumulator_contains(c, args[2]))
+	    brw_MOV(p, brw_acc_reg(), args[2]);
+	 brw_MAC(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_CMP:
+	 emit_cmp(p, dst, args[0], args[1], args[2]);
+	 break;
+      case OPCODE_MAX:
+	 emit_max(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MIN:
+	 emit_min(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MOV:
+	 brw_MOV(p, dst, args[0]);
+	 break;
+      case OPCODE_MUL:
+	 brw_MUL(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_POW:
+	 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); 
+	 break;
+      case OPCODE_RCP:
+	 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_RSQ:
+	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+
+      case OPCODE_SEQ:
+         unalias2(c, dst, args[0], args[1], emit_seq);
+         break;
+      case OPCODE_SIN:
+	 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_SNE:
+         unalias2(c, dst, args[0], args[1], emit_sne);
+         break;
+      case OPCODE_SGE:
+         unalias2(c, dst, args[0], args[1], emit_sge);
+	 break;
+      case OPCODE_SGT:
+         unalias2(c, dst, args[0], args[1], emit_sgt);
+         break;
+      case OPCODE_SLT:
+         unalias2(c, dst, args[0], args[1], emit_slt);
+	 break;
+      case OPCODE_SLE:
+         unalias2(c, dst, args[0], args[1], emit_sle);
+         break;
+      case OPCODE_SUB:
+	 brw_ADD(p, dst, args[0], negate(args[1]));
+	 break;
+      case OPCODE_SWZ:
+	 /* The args[0] value can't be used here as it won't have
+	  * correctly encoded the full swizzle:
+	  */
+	 emit_swz(c, dst, inst);
+	 break;
+      case OPCODE_TRUNC:
+         /* round toward zero */
+	 brw_RNDZ(p, dst, args[0]);
+	 break;
+      case OPCODE_XPD:
+	 emit_xpd(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_IF:
+	 assert(if_depth < MAX_IF_DEPTH);
+	 if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+	 /* Note that brw_IF smashes the predicate_control field. */
+	 if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+	 if_depth++;
+	 break;
+      case OPCODE_ELSE:
+	 assert(if_depth > 0);
+	 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+	 break;
+      case OPCODE_ENDIF:
+         assert(if_depth > 0);
+	 brw_ENDIF(p, if_inst[--if_depth]);
+	 break;			
+      case OPCODE_BGNLOOP:
+         loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+         break;
+      case OPCODE_BRK:
+	 brw_set_predicate_control(p, get_predicate(inst));
+         brw_BREAK(p);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case OPCODE_CONT:
+	 brw_set_predicate_control(p, get_predicate(inst));
+         brw_CONT(p);
+         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case OPCODE_ENDLOOP: 
+         {
+            struct brw_instruction *inst0, *inst1;
+	    GLuint br = 1;
+
+            loop_depth--;
+
+	    if (intel->gen == 5)
+	       br = 2;
+
+            inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+            /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+            while (inst0 > loop_inst[loop_depth]) {
+               inst0--;
+               if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+		   inst0->bits3.if_else.jump_count == 0) {
+                  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+                  inst0->bits3.if_else.pop_count = 0;
+               }
+               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			inst0->bits3.if_else.jump_count == 0) {
+                  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+                  inst0->bits3.if_else.pop_count = 0;
+               }
+            }
+         }
+         break;
+      case OPCODE_BRA:
+	 brw_set_predicate_control(p, get_predicate(inst));
+         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case OPCODE_CAL:
+	 brw_set_access_mode(p, BRW_ALIGN_1);
+	 brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+	 brw_set_access_mode(p, BRW_ALIGN_16);
+	 brw_ADD(p, get_addr_reg(stack_index),
+			 get_addr_reg(stack_index), brw_imm_d(4));
+         brw_save_call(p, inst->Comment, p->nr_insn);
+	 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+         break;
+      case OPCODE_RET:
+	 brw_ADD(p, get_addr_reg(stack_index),
+			 get_addr_reg(stack_index), brw_imm_d(-4));
+	 brw_set_access_mode(p, BRW_ALIGN_1);
+         brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
+	 brw_set_access_mode(p, BRW_ALIGN_16);
+	 break;
+      case OPCODE_END:
+	 emit_vertex_write(c);
+         break;
+      case OPCODE_PRINT:
+         /* no-op */
+         break;
+      case OPCODE_BGNSUB:
+         brw_save_label(p, inst->Comment, p->nr_insn);
+         break;
+      case OPCODE_ENDSUB:
+         /* no-op */
+         break;
+      default:
+	 _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
+                       inst->Opcode, inst->Opcode < MAX_OPCODE ?
+				    _mesa_opcode_string(inst->Opcode) :
+				    "unknown");
+      }
+
+      /* Set the predication update on the last instruction of the native
+       * instruction sequence.
+       *
+       * This would be problematic if it was set on a math instruction,
+       * but that shouldn't be the case with the current GLSL compiler.
+       */
+      if (inst->CondUpdate) {
+	 struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+	 assert(hw_insn->header.destreg__conditionalmod == 0);
+	 hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+      }
+
+      if ((inst->DstReg.File == PROGRAM_OUTPUT)
+          && (inst->DstReg.Index != VERT_RESULT_HPOS)
+          && c->output_regs[inst->DstReg.Index].used_in_src) {
+         brw_MOV(p, get_dst(c, inst->DstReg), dst);
+      }
+
+      /* Result color clamping.
+       *
+       * When destination register is an output register and
+       * it's primary/secondary front/back color, we have to clamp
+       * the result to [0,1]. This is done by enabling the
+       * saturation bit for the last instruction.
+       *
+       * We don't use brw_set_saturate() as it modifies
+       * p->current->header.saturate, which affects all the subsequent
+       * instructions. Instead, we directly modify the header
+       * of the last (already stored) instruction.
+       */
+      if (inst->DstReg.File == PROGRAM_OUTPUT) {
+         if ((inst->DstReg.Index == VERT_RESULT_COL0)
+             || (inst->DstReg.Index == VERT_RESULT_COL1)
+             || (inst->DstReg.Index == VERT_RESULT_BFC0)
+             || (inst->DstReg.Index == VERT_RESULT_BFC1)) {
+            p->store[p->nr_insn-1].header.saturate = 1;
+         }
+      }
+
+      release_tmps(c);
+   }
+
+   brw_resolve_cals(p);
+
+   brw_optimize(p);
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      int i;
+
+      printf("vs-native:\n");
+      for (i = 0; i < p->nr_insn; i++)
+	 brw_disasm(stderr, &p->store[i], intel->gen);
+      printf("\n");
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
new file mode 100644
index 0000000000..9b2dd5b3d1
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -0,0 +1,202 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+struct brw_vs_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+
+   unsigned int curbe_offset;
+
+   unsigned int nr_urb_entries, urb_size;
+
+   unsigned int nr_surfaces;
+};
+
+static void
+vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_VS_PROG */
+   key->total_grf = brw->vs.prog_data->total_grf;
+   key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_vs_entries;
+   key->urb_size = brw->urb.vsize;
+
+   /* BRW_NEW_NR_VS_SURFACES */
+   key->nr_surfaces = brw->vs.nr_surfaces;
+
+   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+   if (ctx->Transform.ClipPlanesEnabled) {
+      /* Note that we read in the userclip planes as well, hence
+       * clip_start:
+       */
+      key->curbe_offset = brw->curbe.clip_start;
+   }
+   else {
+      key->curbe_offset = brw->curbe.vs_start;
+   }
+}
+
+static drm_intel_bo *
+vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_vs_unit_state vs;
+   drm_intel_bo *bo;
+
+   memset(&vs, 0, sizeof(vs));
+
+   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+   vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   /* Choosing multiple program flow means that we may get 2-vertex threads,
+    * which will have the channel mask for dwords 4-7 enabled in the thread,
+    * and those dwords will be written to the second URB handle when we
+    * brw_urb_WRITE() results.
+    */
+   vs.thread1.single_program_flow = 0;
+
+   if (intel->gen == 5)
+      vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+   else
+      vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
+   vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   vs.thread3.dispatch_grf_start_reg = 1;
+   vs.thread3.urb_entry_read_offset = 0;
+   vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+   if (intel->gen == 5) {
+      switch (key->nr_urb_entries) {
+      case 8:
+      case 12:
+      case 16:
+      case 32:
+      case 64:
+      case 96:
+      case 128:
+      case 168:
+      case 192:
+      case 224:
+      case 256:
+	 vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+	 break;
+      default:
+	 assert(0);
+      }
+   } else {
+      switch (key->nr_urb_entries) {
+      case 8:
+      case 12:
+      case 16:
+      case 32:
+	 break;
+      case 64:
+	 assert(intel->is_g4x);
+	 break;
+      default:
+	 assert(0);
+      }
+      vs.thread4.nr_urb_entries = key->nr_urb_entries;
+   }
+
+   vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+   vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+				  1, brw->vs_max_threads) - 1;
+
+   /* No samplers for ARB_vp programs:
+    */
+   /* It has to be set to 0 for Ironlake
+    */
+   vs.vs5.sampler_count = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      vs.thread4.stats_enable = 1;
+
+   /* Vertex program always enabled:
+    */
+   vs.vs6.vs_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
+			 key, sizeof(*key),
+			 &brw->vs.prog_bo, 1,
+			 &vs, sizeof(vs));
+
+   /* Emit VS program relocation */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_vs_unit_state, thread0),
+			   brw->vs.prog_bo, vs.thread0.grf_reg_count << 1,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   return bo;
+}
+
+static void prepare_vs_unit(struct brw_context *brw)
+{
+   struct brw_vs_unit_key key;
+
+   vs_unit_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->vs.state_bo);
+   brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
+				       &key, sizeof(key),
+				       &brw->vs.prog_bo, 1,
+				       NULL);
+   if (brw->vs.state_bo == NULL) {
+      brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
+   }
+}
+
+const struct brw_tracked_state brw_vs_unit = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_NR_VS_SURFACES |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = prepare_vs_unit,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
new file mode 100644
index 0000000000..be9e415cb0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -0,0 +1,198 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/mtypes.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static void
+prepare_vs_constants(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *) brw->vertex_program;
+   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+   int i;
+
+   if (vp->program.IsNVProgram)
+      _mesa_load_tracked_matrices(ctx);
+
+   /* Updates the ParamaterValues[i] pointers for all parameters of the
+    * basic type of PROGRAM_STATE_VAR.
+    */
+   _mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
+
+   /* BRW_NEW_VERTEX_PROGRAM */
+   if (!vp->use_const_buffer) {
+      if (brw->vs.const_bo) {
+	 drm_intel_bo_unreference(brw->vs.const_bo);
+	 brw->vs.const_bo = NULL;
+	 brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
+      }
+      return;
+   }
+
+   /* _NEW_PROGRAM_CONSTANTS */
+   drm_intel_bo_unreference(brw->vs.const_bo);
+   brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+					 size, 64);
+
+   drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
+   for (i = 0; i < params->NumParameters; i++) {
+      memcpy(brw->vs.const_bo->virtual + i * 4 * sizeof(float),
+	     params->ParameterValues[i],
+	     4 * sizeof(float));
+   }
+   drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo);
+   brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
+}
+
+const struct brw_tracked_state brw_vs_constants = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_VERTEX_PROGRAM),
+      .cache = 0
+   },
+   .prepare = prepare_vs_constants,
+};
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+static void
+brw_update_vs_constant_surface( GLcontext *ctx,
+                                GLuint surf)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *) brw->vertex_program;
+   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+
+   assert(surf == 0);
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (brw->vs.const_bo == NULL) {
+      drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+      brw->vs.surf_bo[surf] = NULL;
+      return;
+   }
+
+   brw_create_constant_surface(brw, brw->vs.const_bo, params->NumParameters,
+			       &brw->vs.surf_bo[surf],
+			       &brw->vs.surf_offset[surf]);
+}
+
+
+static void
+prepare_vs_surfaces(struct brw_context *brw)
+{
+   int nr_surfaces = 0;
+
+   if (brw->vs.const_bo) {
+      brw_add_validated_bo(brw, brw->vs.const_bo);
+      nr_surfaces = 1;
+   }
+
+   if (brw->vs.nr_surfaces != nr_surfaces) {
+      brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+      brw->vs.nr_surfaces = nr_surfaces;
+   }
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static void upload_vs_surfaces(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   uint32_t *bind;
+   int i;
+
+   /* BRW_NEW_NR_VS_SURFACES */
+   if (brw->vs.nr_surfaces == 0) {
+      if (brw->vs.bind_bo) {
+	 drm_intel_bo_unreference(brw->vs.bind_bo);
+	 brw->vs.bind_bo = NULL;
+	 brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
+      }
+      return;
+   }
+
+   brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table. (once we have vs samplers)
+    */
+   bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF,
+			  32, &brw->vs.bind_bo, &brw->vs.bind_bo_offset);
+
+   for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+      /* BRW_NEW_VS_CONSTBUF */
+      if (brw->vs.surf_bo[i]) {
+	 bind[i] = brw->vs.surf_offset[i];
+      } else {
+	 bind[i] = 0;
+      }
+   }
+
+   brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_VS_CONSTBUF |
+	      BRW_NEW_NR_VS_SURFACES |
+	      BRW_NEW_BATCH),
+      .cache = 0
+   },
+   .prepare = prepare_vs_surfaces,
+   .emit = upload_vs_surfaces,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
new file mode 100644
index 0000000000..14227a5133
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -0,0 +1,206 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+**********************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "intel_batchbuffer.h" 
+#include "intel_regions.h" 
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+
+static void
+dri_bo_release(drm_intel_bo **bo)
+{
+   drm_intel_bo_unreference(*bo);
+   *bo = NULL;
+}
+
+
+/**
+ * called from intelDestroyContext()
+ */
+static void brw_destroy_context( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   int i;
+
+   brw_destroy_state(brw);
+   brw_draw_destroy( brw );
+   brw_clear_validated_bos(brw);
+   if (brw->wm.compile_data) {
+      free(brw->wm.compile_data->instruction);
+      free(brw->wm.compile_data->vreg);
+      free(brw->wm.compile_data->refs);
+      free(brw->wm.compile_data->prog_instructions);
+      free(brw->wm.compile_data);
+   }
+
+   for (i = 0; i < brw->state.nr_color_regions; i++)
+      intel_region_release(&brw->state.color_regions[i]);
+   brw->state.nr_color_regions = 0;
+   intel_region_release(&brw->state.depth_region);
+
+   dri_bo_release(&brw->curbe.curbe_bo);
+   dri_bo_release(&brw->vs.prog_bo);
+   dri_bo_release(&brw->vs.state_bo);
+   dri_bo_release(&brw->vs.bind_bo);
+   dri_bo_release(&brw->vs.const_bo);
+   dri_bo_release(&brw->gs.prog_bo);
+   dri_bo_release(&brw->gs.state_bo);
+   dri_bo_release(&brw->clip.prog_bo);
+   dri_bo_release(&brw->clip.state_bo);
+   dri_bo_release(&brw->clip.vp_bo);
+   dri_bo_release(&brw->sf.prog_bo);
+   dri_bo_release(&brw->sf.state_bo);
+   dri_bo_release(&brw->sf.vp_bo);
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
+      dri_bo_release(&brw->wm.sdc_bo[i]);
+   dri_bo_release(&brw->wm.bind_bo);
+   for (i = 0; i < BRW_WM_MAX_SURF; i++)
+      dri_bo_release(&brw->wm.surf_bo[i]);
+   dri_bo_release(&brw->wm.sampler_bo);
+   dri_bo_release(&brw->wm.prog_bo);
+   dri_bo_release(&brw->wm.state_bo);
+   dri_bo_release(&brw->wm.const_bo);
+   dri_bo_release(&brw->cc.prog_bo);
+   dri_bo_release(&brw->cc.state_bo);
+   dri_bo_release(&brw->cc.vp_bo);
+   dri_bo_release(&brw->cc.blend_state_bo);
+   dri_bo_release(&brw->cc.depth_stencil_state_bo);
+   dri_bo_release(&brw->cc.color_calc_state_bo);
+
+   free(brw->curbe.last_buf);
+   free(brw->curbe.next_buf);
+}
+
+
+/**
+ * called from intelDrawBuffer()
+ */
+static void brw_set_draw_region( struct intel_context *intel, 
+                                 struct intel_region *color_regions[],
+                                 struct intel_region *depth_region,
+                                 GLuint num_color_regions)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   GLuint i;
+
+   /* release old color/depth regions */
+   if (brw->state.depth_region != depth_region)
+      brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
+   for (i = 0; i < brw->state.nr_color_regions; i++)
+       intel_region_release(&brw->state.color_regions[i]);
+   intel_region_release(&brw->state.depth_region);
+
+   /* reference new color/depth regions */
+   for (i = 0; i < num_color_regions; i++)
+       intel_region_reference(&brw->state.color_regions[i], color_regions[i]);
+   intel_region_reference(&brw->state.depth_region, depth_region);
+   brw->state.nr_color_regions = num_color_regions;
+}
+
+
+/**
+ * called from intel_batchbuffer_flush and children before sending a
+ * batchbuffer off.
+ */
+static void brw_finish_batch(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   brw_emit_query_end(brw);
+
+   if (brw->curbe.curbe_bo) {
+      drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
+      drm_intel_bo_unreference(brw->curbe.curbe_bo);
+      brw->curbe.curbe_bo = NULL;
+   }
+}
+
+
+/**
+ * called from intelFlushBatchLocked
+ */
+static void brw_new_batch( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   /* Mark all context state as needing to be re-emitted.
+    * This is probably not as severe as on 915, since almost all of our state
+    * is just in referenced buffers.
+    */
+   brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+
+   /* Move to the end of the current upload buffer so that we'll force choosing
+    * a new buffer next time.
+    */
+   if (brw->vb.upload.bo != NULL) {
+      drm_intel_bo_unreference(brw->vb.upload.bo);
+      brw->vb.upload.bo = NULL;
+      brw->vb.upload.offset = 0;
+   }
+}
+
+static void brw_invalidate_state( struct intel_context *intel, GLuint new_state )
+{
+   /* nothing */
+}
+
+
+void brwInitVtbl( struct brw_context *brw )
+{
+   brw->intel.vtbl.check_vertex_size = 0;
+   brw->intel.vtbl.emit_state = 0;
+   brw->intel.vtbl.reduced_primitive_state = 0;
+   brw->intel.vtbl.render_start = 0;
+   brw->intel.vtbl.update_texture_state = 0;
+
+   brw->intel.vtbl.invalidate_state = brw_invalidate_state;
+   brw->intel.vtbl.new_batch = brw_new_batch;
+   brw->intel.vtbl.finish_batch = brw_finish_batch;
+   brw->intel.vtbl.destroy = brw_destroy_context;
+   brw->intel.vtbl.set_draw_region = brw_set_draw_region;
+   brw->intel.vtbl.debug_batch = brw_debug_batch;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
new file mode 100644
index 0000000000..e182fc3202
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -0,0 +1,390 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+             
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+
+
+/** Return number of src args for given instruction */
+GLuint brw_wm_nr_args( GLuint opcode )
+{
+   switch (opcode) {
+   case WM_FRONTFACING:
+   case WM_PIXELXY:
+      return 0;
+   case WM_CINTERP:
+   case WM_WPOSXY:
+   case WM_DELTAXY:
+      return 1;
+   case WM_LINTERP:
+   case WM_PIXELW:
+      return 2;
+   case WM_FB_WRITE:
+   case WM_PINTERP:
+      return 3;
+   default:
+      assert(opcode < MAX_OPCODE);
+      return _mesa_num_inst_src_regs(opcode);
+   }
+}
+
+
+GLuint brw_wm_is_scalar_result( GLuint opcode )
+{
+   switch (opcode) {
+   case OPCODE_COS:
+   case OPCODE_EX2:
+   case OPCODE_LG2:
+   case OPCODE_POW:
+   case OPCODE_RCP:
+   case OPCODE_RSQ:
+   case OPCODE_SIN:
+   case OPCODE_DP3:
+   case OPCODE_DP4:
+   case OPCODE_DPH:
+   case OPCODE_DST:
+      return 1;
+      
+   default:
+      return 0;
+   }
+}
+
+
+/**
+ * Do GPU code generation for non-GLSL shader.  non-GLSL shaders have
+ * no flow control instructions so we can more readily do SSA-style
+ * optimizations.
+ */
+static void
+brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+   /* Augment fragment program.  Add instructions for pre- and
+    * post-fragment-program tasks such as interpolation and fogging.
+    */
+   brw_wm_pass_fp(c);
+
+   /* Translate to intermediate representation.  Build register usage
+    * chains.
+    */
+   brw_wm_pass0(c);
+
+   /* Dead code removal.
+    */
+   brw_wm_pass1(c);
+
+   /* Register allocation.
+    * Divide by two because we operate on 16 pixels at a time and require
+    * two GRF entries for each logical shader register.
+    */
+   c->grf_limit = BRW_WM_MAX_GRF / 2;
+
+   brw_wm_pass2(c);
+
+   /* how many general-purpose registers are used */
+   c->prog_data.total_grf = c->max_wm_grf;
+
+   /* Scratch space is used for register spilling */
+   if (c->last_scratch) {
+      c->prog_data.total_scratch = c->last_scratch + 0x40;
+   }
+   else {
+      c->prog_data.total_scratch = 0;
+   }
+
+   /* Emit GEN4 code.
+    */
+   brw_wm_emit(c);
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
+static void do_wm_prog( struct brw_context *brw,
+			struct brw_fragment_program *fp, 
+			struct brw_wm_prog_key *key)
+{
+   struct brw_wm_compile *c;
+   const GLuint *program;
+   GLuint program_size;
+
+   c = brw->wm.compile_data;
+   if (c == NULL) {
+      brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+      c = brw->wm.compile_data;
+      if (c == NULL) {
+         /* Ouch - big out of memory problem.  Can't continue
+          * without triggering a segfault, no way to signal,
+          * so just return.
+          */
+         return;
+      }
+      c->instruction = calloc(1, BRW_WM_MAX_INSN * sizeof(*c->instruction));
+      c->prog_instructions = calloc(1, BRW_WM_MAX_INSN *
+					  sizeof(*c->prog_instructions));
+      c->vreg = calloc(1, BRW_WM_MAX_VREG * sizeof(*c->vreg));
+      c->refs = calloc(1, BRW_WM_MAX_REF * sizeof(*c->refs));
+   } else {
+      void *instruction = c->instruction;
+      void *prog_instructions = c->prog_instructions;
+      void *vreg = c->vreg;
+      void *refs = c->refs;
+      memset(c, 0, sizeof(*brw->wm.compile_data));
+      c->instruction = instruction;
+      c->prog_instructions = prog_instructions;
+      c->vreg = vreg;
+      c->refs = refs;
+   }
+   memcpy(&c->key, key, sizeof(*key));
+
+   c->fp = fp;
+   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+
+   brw_init_compile(brw, &c->func);
+
+   /* temporary sanity check assertion */
+   ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+
+   /*
+    * Shader which use GLSL features such as flow control are handled
+    * differently from "simple" shaders.
+    */
+   if (fp->isGLSL) {
+      c->dispatch_width = 8;
+      brw_wm_glsl_emit(brw, c);
+   }
+   else {
+      c->dispatch_width = 16;
+      brw_wm_non_glsl_emit(brw, c);
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM)
+      fprintf(stderr, "\n");
+
+   /* get the program
+    */
+   program = brw_get_program(&c->func, &program_size);
+
+   drm_intel_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_WM_PROG,
+						   &c->key, sizeof(c->key),
+						   NULL, 0,
+						   program, program_size,
+						   &c->prog_data,
+						   sizeof(c->prog_data),
+						   &brw->wm.prog_data);
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+				 struct brw_wm_prog_key *key )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   const struct brw_fragment_program *fp = 
+      (struct brw_fragment_program *)brw->fragment_program;
+   GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+   GLuint lookup = 0;
+   GLuint line_aa;
+   GLuint i;
+
+   memset(key, 0, sizeof(*key));
+
+   /* Build the index for table lookup
+    */
+   /* _NEW_COLOR */
+   if (fp->program.UsesKill ||
+       ctx->Color.AlphaEnabled)
+      lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+      lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+   /* _NEW_DEPTH */
+   if (ctx->Depth.Test)
+      lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+   if (ctx->Depth.Test &&  
+       ctx->Depth.Mask) /* ?? */
+      lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+   /* _NEW_STENCIL */
+   if (ctx->Stencil._Enabled) {
+      lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+      if (ctx->Stencil.WriteMask[0] ||
+	  ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
+	 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+   }
+
+   line_aa = AA_NEVER;
+
+   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+   if (ctx->Line.SmoothFlag) {
+      if (brw->intel.reduced_primitive == GL_LINES) {
+	 line_aa = AA_ALWAYS;
+      }
+      else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
+	 if (ctx->Polygon.FrontMode == GL_LINE) {
+	    line_aa = AA_SOMETIMES;
+
+	    if (ctx->Polygon.BackMode == GL_LINE ||
+		(ctx->Polygon.CullFlag &&
+		 ctx->Polygon.CullFaceMode == GL_BACK))
+	       line_aa = AA_ALWAYS;
+	 }
+	 else if (ctx->Polygon.BackMode == GL_LINE) {
+	    line_aa = AA_SOMETIMES;
+
+	    if ((ctx->Polygon.CullFlag &&
+		 ctx->Polygon.CullFaceMode == GL_FRONT))
+	       line_aa = AA_ALWAYS;
+	 }
+      }
+   }
+	 
+   brw_wm_lookup_iz(line_aa,
+		    lookup,
+		    uses_depth,
+		    key);
+
+
+   /* BRW_NEW_WM_INPUT_DIMENSIONS */
+   key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
+
+   /* _NEW_LIGHT */
+   key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+
+   /* _NEW_HINT */
+   key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
+   /* _NEW_TEXTURE */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+
+      if (unit->_ReallyEnabled) {
+         const struct gl_texture_object *t = unit->_Current;
+         const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+	 if (img->InternalFormat == GL_YCBCR_MESA) {
+	    key->yuvtex_mask |= 1 << i;
+	    if (img->TexFormat == MESA_FORMAT_YCBCR)
+		key->yuvtex_swap_mask |= 1 << i;
+	 }
+
+         key->tex_swizzles[i] = t->_Swizzle;
+      }
+      else {
+         key->tex_swizzles[i] = SWIZZLE_NOOP;
+      }
+   }
+
+   /* Shadow */
+   key->shadowtex_mask = fp->program.Base.ShadowSamplers;
+
+   /* _NEW_BUFFERS */
+   /*
+    * Include the draw buffer origin and height so that we can calculate
+    * fragment position values relative to the bottom left of the drawable,
+    * from the incoming screen origin relative position we get as part of our
+    * payload.
+    *
+    * This is only needed for the WM_WPOSXY opcode when the fragment program
+    * uses the gl_FragCoord input.
+    *
+    * We could avoid recompiling by including this as a constant referenced by
+    * our program, but if we were to do that it would also be nice to handle
+    * getting that constant updated at batchbuffer submit time (when we
+    * hold the lock and know where the buffer really is) rather than at emit
+    * time when we don't hold the lock and are just guessing.  We could also
+    * just avoid using this as key data if the program doesn't use
+    * fragment.position.
+    *
+    * For DRI2 the origin_x/y will always be (0,0) but we still need the
+    * drawable height in order to invert the Y axis.
+    */
+   if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) {
+      key->drawable_height = ctx->DrawBuffer->Height;
+   }
+
+   key->nr_color_regions = brw->state.nr_color_regions;
+
+   /* CACHE_NEW_VS_PROG */
+   key->vp_outputs_written = brw->vs.prog_data->outputs_written;
+
+   /* The unique fragment program ID */
+   key->program_string_id = fp->id;
+}
+
+
+static void brw_prepare_wm_prog(struct brw_context *brw)
+{
+   struct brw_wm_prog_key key;
+   struct brw_fragment_program *fp = (struct brw_fragment_program *)
+      brw->fragment_program;
+     
+   brw_wm_populate_key(brw, &key);
+
+   /* Make an early check for the key.
+    */
+   drm_intel_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->wm.prog_data);
+   if (brw->wm.prog_bo == NULL)
+      do_wm_prog(brw, fp, &key);
+}
+
+
+const struct brw_tracked_state brw_wm_prog = {
+   .dirty = {
+      .mesa  = (_NEW_COLOR |
+		_NEW_DEPTH |
+                _NEW_HINT |
+		_NEW_STENCIL |
+		_NEW_POLYGON |
+		_NEW_LINE |
+		_NEW_LIGHT |
+		_NEW_BUFFERS |
+		_NEW_TEXTURE),
+      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+		BRW_NEW_WM_INPUT_DIMENSIONS |
+		BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .prepare = brw_prepare_wm_prog
+};
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
new file mode 100644
index 0000000000..277b6de442
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -0,0 +1,453 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+              
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+
+#include "shader/prog_instruction.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define SATURATE (1<<5)
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution.  These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT    0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT    0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT   0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT    0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT  0x20
+#define IZ_BIT_MAX                  0x40
+
+#define AA_NEVER     0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS    2
+
+struct brw_wm_prog_key {
+   GLuint source_depth_reg:3;
+   GLuint aa_dest_stencil_reg:3;
+   GLuint dest_depth_reg:3;
+   GLuint nr_depth_regs:3;
+   GLuint computes_depth:1;	/* could be derived from program string */
+   GLuint source_depth_to_render_target:1;
+   GLuint flat_shade:1;
+   GLuint linear_color:1;  /**< linear interpolation vs perspective interp */
+   GLuint runtime_check_aads_emit:1;
+   GLuint nr_color_regions:5;
+   
+   GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
+   GLuint shadowtex_mask:16;
+   GLuint yuvtex_mask:16;
+   GLuint yuvtex_swap_mask:16;	/* UV swaped */
+
+   GLushort tex_swizzles[BRW_MAX_TEX_UNIT];
+
+   GLushort drawable_height;
+   GLbitfield64 vp_outputs_written;
+   GLuint program_string_id:32;
+};
+
+
+/* A bit of a glossary:
+ *
+ * brw_wm_value: A computed value or program input.  Values are
+ * constant, they are created once and are never modified.  When a
+ * fragment program register is written or overwritten, new values are
+ * created fresh, preserving the rule that values are constant.
+ *
+ * brw_wm_ref: A reference to a value.  Wherever a value used is by an
+ * instruction or as a program output, that is tracked with an
+ * instance of this struct.  All references to a value occur after it
+ * is created.  After the last reference, a value is dead and can be
+ * discarded.
+ *
+ * brw_wm_grf: Represents a physical hardware register.  May be either
+ * empty or hold a value.  Register allocation is the process of
+ * assigning values to grf registers.  This occurs in pass2 and the
+ * brw_wm_grf struct is not used before that.
+ *
+ * Fragment program registers: These are time-varying constructs that
+ * are hard to reason about and which we translate away in pass0.  A
+ * single fragment program register element (eg. temp[0].x) will be
+ * translated to one or more brw_wm_value structs, one for each time
+ * that temp[0].x is written to during the program. 
+ */
+
+
+
+/* Used in pass2 to track register allocation.
+ */
+struct brw_wm_grf {
+   struct brw_wm_value *value;
+   GLuint nextuse;
+};
+
+struct brw_wm_value {
+   struct brw_reg hw_reg;	/* emitted to this reg, may not always be there */
+   struct brw_wm_ref *lastuse;
+   struct brw_wm_grf *resident; 
+   GLuint contributes_to_output:1;
+   GLuint spill_slot:16;	/* if non-zero, spill immediately after calculation */
+};
+
+struct brw_wm_ref {
+   struct brw_reg hw_reg;	/* nr filled in in pass2, everything else, pass0 */
+   struct brw_wm_value *value;
+   struct brw_wm_ref *prevuse;
+   GLuint unspill_reg:7;	/* unspill to reg */
+   GLuint emitted:1;
+   GLuint insn:24;
+};
+
+struct brw_wm_constref {
+   const struct brw_wm_ref *ref;
+   GLfloat constval;
+};
+
+
+struct brw_wm_instruction {
+   struct brw_wm_value *dst[4];
+   struct brw_wm_ref *src[3][4];
+   GLuint opcode:8;
+   GLuint saturate:1;
+   GLuint writemask:4;
+   GLuint tex_unit:4;   /* texture unit for TEX, TXD, TXP instructions */
+   GLuint tex_idx:3;    /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+   GLuint tex_shadow:1; /* do shadow comparison? */
+   GLuint eot:1;    	/* End of thread indicator for FB_WRITE*/
+   GLuint target:10;    /* target binding table index for FB_WRITE*/
+};
+
+
+#define BRW_WM_MAX_INSN  (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_GRF   128		/* hardware limit */
+#define BRW_WM_MAX_VREG  (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF   (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_SUBROUTINE 16
+
+/* used in masks next to WRITEMASK_*. */
+#define SATURATE (1<<5)
+
+
+/* New opcodes to track internal operations required for WM unit.
+ * These are added early so that the registers used can be tracked,
+ * freed and reused like those of other instructions.
+ */
+#define WM_PIXELXY        (MAX_OPCODE)
+#define WM_DELTAXY        (MAX_OPCODE + 1)
+#define WM_PIXELW         (MAX_OPCODE + 2)
+#define WM_LINTERP        (MAX_OPCODE + 3)
+#define WM_PINTERP        (MAX_OPCODE + 4)
+#define WM_CINTERP        (MAX_OPCODE + 5)
+#define WM_WPOSXY         (MAX_OPCODE + 6)
+#define WM_FB_WRITE       (MAX_OPCODE + 7)
+#define WM_FRONTFACING    (MAX_OPCODE + 8)
+#define MAX_WM_OPCODE     (MAX_OPCODE + 9)
+
+#define PROGRAM_PAYLOAD   (PROGRAM_FILE_MAX)
+#define PAYLOAD_DEPTH     (FRAG_ATTRIB_MAX)
+
+struct brw_wm_compile {
+   struct brw_compile func;
+   struct brw_wm_prog_key key;
+   struct brw_wm_prog_data prog_data;
+
+   struct brw_fragment_program *fp;
+
+   GLfloat (*env_param)[4];
+
+   enum {
+      START,
+      PASS2_DONE
+   } state;
+
+   /* Initial pass - translate fp instructions to fp instructions,
+    * simplifying and adding instructions for interpolation and
+    * framebuffer writes.
+    */
+   struct prog_instruction *prog_instructions;
+   GLuint nr_fp_insns;
+   GLuint fp_temp;
+   GLuint fp_interp_emitted;
+   GLuint fp_fragcolor_emitted;
+
+   struct prog_src_register pixel_xy;
+   struct prog_src_register delta_xy;
+   struct prog_src_register pixel_w;
+
+
+   struct brw_wm_value *vreg;
+   GLuint nr_vreg;
+
+   struct brw_wm_value creg[BRW_WM_MAX_PARAM];
+   GLuint nr_creg;
+
+   struct {
+      struct brw_wm_value depth[4]; /* includes r0/r1 */
+      struct brw_wm_value input_interp[FRAG_ATTRIB_MAX];
+   } payload;
+
+
+   const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
+
+   struct brw_wm_ref undef_ref;
+   struct brw_wm_value undef_value;
+
+   struct brw_wm_ref *refs;
+   GLuint nr_refs;
+
+   struct brw_wm_instruction *instruction;
+   GLuint nr_insns;
+
+   struct brw_wm_constref constref[BRW_WM_MAX_CONST];
+   GLuint nr_constrefs;
+
+   struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+
+   GLuint grf_limit;
+   GLuint max_wm_grf;
+   GLuint last_scratch;
+
+   GLuint cur_inst;  /**< index of current instruction */
+
+   GLboolean out_of_regs;  /**< ran out of GRF registers? */
+
+   /** Mapping from Mesa registers to hardware registers */
+   struct {
+      GLboolean inited;
+      struct brw_reg reg;
+   } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+
+   GLboolean used_grf[BRW_WM_MAX_GRF];
+   GLuint first_free_grf;
+   struct brw_reg stack;
+   struct brw_reg emit_mask_reg;
+   GLuint tmp_regs[BRW_WM_MAX_GRF];
+   GLuint tmp_index;
+   GLuint tmp_max;
+   GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+   GLuint dispatch_width;
+
+   /** we may need up to 3 constants per instruction (if use_const_buffer) */
+   struct {
+      GLint index;
+      struct brw_reg reg;
+   } current_const[3];
+};
+
+
+/** Bits for prog_instruction::Aux field */
+#define INST_AUX_EOT      0x1
+#define INST_AUX_TARGET(T)  (T << 1)
+#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1)
+
+
+GLuint brw_wm_nr_args( GLuint opcode );
+GLuint brw_wm_is_scalar_result( GLuint opcode );
+
+void brw_wm_pass_fp( struct brw_wm_compile *c );
+void brw_wm_pass0( struct brw_wm_compile *c );
+void brw_wm_pass1( struct brw_wm_compile *c );
+void brw_wm_pass2( struct brw_wm_compile *c );
+void brw_wm_emit( struct brw_wm_compile *c );
+GLboolean brw_wm_arg_can_be_immediate(enum prog_opcode, int arg);
+void brw_wm_print_value( struct brw_wm_compile *c,
+			 struct brw_wm_value *value );
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+		       struct brw_wm_ref *ref );
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+			struct brw_wm_instruction *inst );
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+			   const char *stage );
+
+void brw_wm_lookup_iz( GLuint line_aa,
+		       GLuint lookup,
+		       GLboolean ps_uses_depth,
+		       struct brw_wm_prog_key *key );
+
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+
+/* brw_wm_emit.c */
+void emit_alu1(struct brw_compile *p,
+	       struct brw_instruction *(*func)(struct brw_compile *,
+					       struct brw_reg,
+					       struct brw_reg),
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       const struct brw_reg *arg0);
+void emit_alu2(struct brw_compile *p,
+	       struct brw_instruction *(*func)(struct brw_compile *,
+					       struct brw_reg,
+					       struct brw_reg,
+					       struct brw_reg),
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       const struct brw_reg *arg0,
+	       const struct brw_reg *arg1);
+void emit_cinterp(struct brw_compile *p,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0);
+void emit_cmp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2);
+void emit_ddxy(struct brw_compile *p,
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       GLboolean is_ddx,
+	       const struct brw_reg *arg0);
+void emit_delta_xy(struct brw_compile *p,
+		   const struct brw_reg *dst,
+		   GLuint mask,
+		   const struct brw_reg *arg0);
+void emit_dp3(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1);
+void emit_dp4(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1);
+void emit_dph(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1);
+void emit_fb_write(struct brw_wm_compile *c,
+		   struct brw_reg *arg0,
+		   struct brw_reg *arg1,
+		   struct brw_reg *arg2,
+		   GLuint target,
+		   GLuint eot);
+void emit_frontfacing(struct brw_compile *p,
+		      const struct brw_reg *dst,
+		      GLuint mask);
+void emit_linterp(struct brw_compile *p,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0,
+		  const struct brw_reg *deltas);
+void emit_lrp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2);
+void emit_mad(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2);
+void emit_math1(struct brw_wm_compile *c,
+		GLuint function,
+		const struct brw_reg *dst,
+		GLuint mask,
+		const struct brw_reg *arg0);
+void emit_math2(struct brw_wm_compile *c,
+		GLuint function,
+		const struct brw_reg *dst,
+		GLuint mask,
+		const struct brw_reg *arg0,
+		const struct brw_reg *arg1);
+void emit_min(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1);
+void emit_max(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1);
+void emit_pinterp(struct brw_compile *p,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0,
+		  const struct brw_reg *deltas,
+		  const struct brw_reg *w);
+void emit_pixel_xy(struct brw_wm_compile *c,
+		   const struct brw_reg *dst,
+		   GLuint mask);
+void emit_pixel_w(struct brw_wm_compile *c,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0,
+		  const struct brw_reg *deltas);
+void emit_sop(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      GLuint cond,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1);
+void emit_tex(struct brw_wm_compile *c,
+	      struct brw_reg *dst,
+	      GLuint dst_flags,
+	      struct brw_reg *arg,
+	      struct brw_reg depth_payload,
+	      GLuint tex_idx,
+	      GLuint sampler,
+	      GLboolean shadow);
+void emit_txb(struct brw_wm_compile *c,
+	      struct brw_reg *dst,
+	      GLuint dst_flags,
+	      struct brw_reg *arg,
+	      struct brw_reg depth_payload,
+	      GLuint tex_idx,
+	      GLuint sampler);
+void emit_wpos_xy(struct brw_wm_compile *c,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0);
+void emit_xpd(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1);
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c
new file mode 100644
index 0000000000..a78cc8b54e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c
@@ -0,0 +1,174 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+		       struct brw_wm_value *value )
+{
+   assert(value);
+   if (c->state >= PASS2_DONE) 
+      brw_print_reg(value->hw_reg);
+   else if( value == &c->undef_value )
+      printf("undef");
+   else if( value - c->vreg >= 0 &&
+	    value - c->vreg < BRW_WM_MAX_VREG)
+      printf("r%d", value - c->vreg);
+   else if (value - c->creg >= 0 &&
+	    value - c->creg < BRW_WM_MAX_PARAM)
+      printf("c%d", value - c->creg);
+   else if (value - c->payload.input_interp >= 0 &&
+	    value - c->payload.input_interp < FRAG_ATTRIB_MAX)
+      printf("i%d", value - c->payload.input_interp);
+   else if (value - c->payload.depth >= 0 &&
+	    value - c->payload.depth < FRAG_ATTRIB_MAX)
+      printf("d%d", value - c->payload.depth);
+   else 
+      printf("?");
+}
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+		       struct brw_wm_ref *ref )
+{
+   struct brw_reg hw_reg = ref->hw_reg;
+
+   if (ref->unspill_reg)
+      printf("UNSPILL(%x)/", ref->value->spill_slot);
+
+   if (c->state >= PASS2_DONE)
+      brw_print_reg(ref->hw_reg);
+   else {
+      printf("%s", hw_reg.negate ? "-" : "");
+      printf("%s", hw_reg.abs ? "abs/" : "");
+      brw_wm_print_value(c, ref->value);
+      if ((hw_reg.nr&1) || hw_reg.subnr) {
+	 printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+      }
+   }
+}
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+			struct brw_wm_instruction *inst )
+{
+   GLuint i, arg;
+   GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+   printf("[");
+   for (i = 0; i < 4; i++) {
+      if (inst->dst[i]) {
+	 brw_wm_print_value(c, inst->dst[i]);
+	 if (inst->dst[i]->spill_slot)
+	    printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+      }
+      else
+	 printf("#");
+      if (i < 3)      
+	 printf(",");
+   }
+   printf("]");
+
+   if (inst->writemask != WRITEMASK_XYZW)
+      printf(".%s%s%s%s", 
+		   GET_BIT(inst->writemask, 0) ? "x" : "",
+		   GET_BIT(inst->writemask, 1) ? "y" : "",
+		   GET_BIT(inst->writemask, 2) ? "z" : "",
+		   GET_BIT(inst->writemask, 3) ? "w" : "");
+
+   switch (inst->opcode) {
+   case WM_PIXELXY:
+      printf(" = PIXELXY");
+      break;
+   case WM_DELTAXY:
+      printf(" = DELTAXY");
+      break;
+   case WM_PIXELW:
+      printf(" = PIXELW");
+      break;
+   case WM_WPOSXY:
+      printf(" = WPOSXY");
+      break;
+   case WM_PINTERP:
+      printf(" = PINTERP");
+      break;
+   case WM_LINTERP:
+      printf(" = LINTERP");
+      break;
+   case WM_CINTERP:
+      printf(" = CINTERP");
+      break;
+   case WM_FB_WRITE:
+      printf(" = FB_WRITE");
+      break;
+   case WM_FRONTFACING:
+      printf(" = FRONTFACING");
+      break;
+   default:
+      printf(" = %s", _mesa_opcode_string(inst->opcode));
+      break;
+   }
+
+   if (inst->saturate)
+      printf("_SAT");
+
+   for (arg = 0; arg < nr_args; arg++) {
+
+      printf(" [");
+
+      for (i = 0; i < 4; i++) {
+	 if (inst->src[arg][i]) {
+	    brw_wm_print_ref(c, inst->src[arg][i]);
+	 }
+	 else
+	    printf("%%");
+
+	 if (i < 3) 
+	    printf(",");
+	 else
+	    printf("]");
+      }
+   }
+   printf("\n");
+}
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+			   const char *stage )
+{
+   GLuint insn;
+
+   printf("%s:\n", stage);
+   for (insn = 0; insn < c->nr_insns; insn++)
+      brw_wm_print_insn(c, &c->instruction[insn]);
+   printf("\n");
+}
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
new file mode 100644
index 0000000000..323cfac8fa
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -0,0 +1,1723 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+
+static GLboolean can_do_pln(struct intel_context *intel,
+			    const struct brw_reg *deltas)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   if (!brw->has_pln)
+      return GL_FALSE;
+
+   if (deltas[1].nr != deltas[0].nr + 1)
+      return GL_FALSE;
+
+   if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+/* Not quite sure how correct this is - need to understand horiz
+ * vs. vertical strides a little better.
+ */
+static INLINE struct brw_reg sechalf( struct brw_reg reg )
+{
+   if (reg.vstride)
+      reg.nr++;
+   return reg;
+}
+
+/* Return the SrcReg index of the channels that can be immediate float operands
+ * instead of usage of PROGRAM_CONSTANT values through push/pull.
+ */
+GLboolean
+brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg)
+{
+   int opcode_array[] = {
+      [OPCODE_ADD] = 2,
+      [OPCODE_CMP] = 3,
+      [OPCODE_DP3] = 2,
+      [OPCODE_DP4] = 2,
+      [OPCODE_DPH] = 2,
+      [OPCODE_MAX] = 2,
+      [OPCODE_MIN] = 2,
+      [OPCODE_MOV] = 1,
+      [OPCODE_MUL] = 2,
+      [OPCODE_SEQ] = 2,
+      [OPCODE_SGE] = 2,
+      [OPCODE_SGT] = 2,
+      [OPCODE_SLE] = 2,
+      [OPCODE_SLT] = 2,
+      [OPCODE_SNE] = 2,
+      [OPCODE_XPD] = 2,
+   };
+
+   /* These opcodes get broken down in a way that allow two
+    * args to be immediates.
+    */
+   if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) {
+      if (arg == 1 || arg == 2)
+	 return GL_TRUE;
+   }
+
+   if (opcode > ARRAY_SIZE(opcode_array))
+      return GL_FALSE;
+
+   return arg == opcode_array[opcode] - 1;
+}
+
+/**
+ * Computes the screen-space x,y position of the pixels.
+ *
+ * This will be used by emit_delta_xy() or emit_wpos_xy() for
+ * interpolation of attributes..
+ *
+ * Payload R0:
+ *
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ *         corresponding to each of the 16 execution channels.
+ * R0.1..8 -- ?
+ * R1.0 -- triangle vertex 0.X
+ * R1.1 -- triangle vertex 0.Y
+ * R1.2 -- tile 0 x,y coords (2 packed uwords)
+ * R1.3 -- tile 1 x,y coords (2 packed uwords)
+ * R1.4 -- tile 2 x,y coords (2 packed uwords)
+ * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.6 -- ?
+ * R1.7 -- ?
+ * R1.8 -- ?
+ */
+void emit_pixel_xy(struct brw_wm_compile *c,
+		   const struct brw_reg *dst,
+		   GLuint mask)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg r1 = brw_vec1_grf(1, 0);
+   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+   struct brw_reg dst0_uw, dst1_uw;
+
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+   if (c->dispatch_width == 16) {
+      dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW));
+      dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW));
+   } else {
+      dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW));
+      dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW));
+   }
+
+   /* Calculate pixel centers by adding 1 or 0 to each of the
+    * micro-tile coordinates passed in r1.
+    */
+   if (mask & WRITEMASK_X) {
+      brw_ADD(p,
+	      dst0_uw,
+	      stride(suboffset(r1_uw, 4), 2, 4, 0),
+	      brw_imm_v(0x10101010));
+   }
+
+   if (mask & WRITEMASK_Y) {
+      brw_ADD(p,
+	      dst1_uw,
+	      stride(suboffset(r1_uw,5), 2, 4, 0),
+	      brw_imm_v(0x11001100));
+   }
+   brw_pop_insn_state(p);
+}
+
+/**
+ * Computes the screen-space x,y distance of the pixels from the start
+ * vertex.
+ *
+ * This will be used in linterp or pinterp with the start vertex value
+ * and the Cx, Cy, and C0 coefficients passed in from the setup engine
+ * to produce interpolated attribute values.
+ */
+void emit_delta_xy(struct brw_compile *p,
+		   const struct brw_reg *dst,
+		   GLuint mask,
+		   const struct brw_reg *arg0)
+{
+   struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+   if (mask == 0)
+      return;
+
+   assert(mask == WRITEMASK_XY);
+
+   /* Calc delta X,Y by subtracting origin in r1 from the pixel
+    * centers produced by emit_pixel_xy().
+    */
+   brw_ADD(p,
+	   dst[0],
+	   retype(arg0[0], BRW_REGISTER_TYPE_UW),
+	   negate(r1));
+   brw_ADD(p,
+	   dst[1],
+	   retype(arg0[1], BRW_REGISTER_TYPE_UW),
+	   negate(suboffset(r1,1)));
+}
+
+/**
+ * Computes the pixel offset from the window origin for gl_FragCoord().
+ */
+void emit_wpos_xy(struct brw_wm_compile *c,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0)
+{
+   struct brw_compile *p = &c->func;
+
+   if (mask & WRITEMASK_X) {
+      if (c->fp->program.PixelCenterInteger) {
+	 /* X' = X */
+	 brw_MOV(p,
+		 dst[0],
+		 retype(arg0[0], BRW_REGISTER_TYPE_W));
+      } else {
+	 /* X' = X + 0.5 */
+	 brw_ADD(p,
+		 dst[0],
+		 retype(arg0[0], BRW_REGISTER_TYPE_W),
+		 brw_imm_f(0.5));
+      }
+   }
+
+   if (mask & WRITEMASK_Y) {
+      if (c->fp->program.OriginUpperLeft) {
+	 if (c->fp->program.PixelCenterInteger) {
+	    /* Y' = Y */
+	    brw_MOV(p,
+		    dst[1],
+		    retype(arg0[1], BRW_REGISTER_TYPE_W));
+	 } else {
+	    /* Y' = Y + 0.5 */
+	    brw_ADD(p,
+		    dst[1],
+		    retype(arg0[1], BRW_REGISTER_TYPE_W),
+		    brw_imm_f(0.5));
+	 }
+      } else {
+	 float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5;
+
+	 /* Y' = (height - 1) - Y + center */
+	 brw_ADD(p,
+		 dst[1],
+		 negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+		 brw_imm_f(c->key.drawable_height - 1 + center_offset));
+      }
+   }
+}
+
+
+void emit_pixel_w(struct brw_wm_compile *c,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0,
+		  const struct brw_reg *deltas)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+
+   /* Don't need this if all you are doing is interpolating color, for
+    * instance.
+    */
+   if (mask & WRITEMASK_W) {      
+      struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
+
+      /* Calc 1/w - just linterp wpos[3] optimized by putting the
+       * result straight into a message reg.
+       */
+      if (can_do_pln(intel, deltas)) {
+	 brw_PLN(p, brw_message_reg(2), interp3, deltas[0]);
+      } else {
+	 brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+	 brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+      }
+
+      /* Calc w */
+      if (c->dispatch_width == 16) {
+	 brw_math_16(p, dst[3],
+		     BRW_MATH_FUNCTION_INV,
+		     BRW_MATH_SATURATE_NONE,
+		     2, brw_null_reg(),
+		     BRW_MATH_PRECISION_FULL);
+      } else {
+	 brw_math(p, dst[3],
+		  BRW_MATH_FUNCTION_INV,
+		  BRW_MATH_SATURATE_NONE,
+		  2, brw_null_reg(),
+		  BRW_MATH_DATA_VECTOR,
+		  BRW_MATH_PRECISION_FULL);
+      }
+   }
+}
+
+
+void emit_linterp(struct brw_compile *p,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0,
+		  const struct brw_reg *deltas)
+{
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 if (can_do_pln(intel, deltas)) {
+	    brw_PLN(p, dst[i], interp[i], deltas[0]);
+	 } else {
+	    brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	    brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+	 }
+      }
+   }
+}
+
+
+void emit_pinterp(struct brw_compile *p,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0,
+		  const struct brw_reg *deltas,
+		  const struct brw_reg *w)
+{
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 if (can_do_pln(intel, deltas)) {
+	    brw_PLN(p, dst[i], interp[i], deltas[0]);
+	 } else {
+	    brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	    brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+	 }
+      }
+   }
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MUL(p, dst[i], dst[i], w[3]);
+      }
+   }
+}
+
+
+void emit_cinterp(struct brw_compile *p,
+		  const struct brw_reg *dst,
+		  GLuint mask,
+		  const struct brw_reg *arg0)
+{
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+         brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
+      }
+   }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+void emit_frontfacing(struct brw_compile *p,
+		      const struct brw_reg *dst,
+		      GLuint mask)
+{
+   struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+   GLuint i;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MOV(p, dst[i], brw_imm_f(0.0));
+      }
+   }
+
+   /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+    * us front face
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MOV(p, dst[i], brw_imm_f(1.0));
+      }
+   }
+   brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ *           DDX                     DDY
+ * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
+ *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
+ *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
+ *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
+ *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
+ *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
+ *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
+ *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other.  We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       GLboolean is_ddx,
+	       const struct brw_reg *arg0)
+{
+   int i;
+   struct brw_reg src0, src1;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+   for (i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+	 if (is_ddx) {
+	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_2,
+			   BRW_WIDTH_2,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_2,
+			   BRW_WIDTH_2,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	 } else {
+	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_4,
+			   BRW_WIDTH_4,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_4,
+			   BRW_WIDTH_4,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	 }
+	 brw_ADD(p, dst[i], src0, negate(src1));
+      }
+   }
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+void emit_alu1(struct brw_compile *p,
+	       struct brw_instruction *(*func)(struct brw_compile *,
+					       struct brw_reg,
+					       struct brw_reg),
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       const struct brw_reg *arg0)
+{
+   GLuint i;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 func(p, dst[i], arg0[i]);
+      }
+   }
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+
+void emit_alu2(struct brw_compile *p,
+	       struct brw_instruction *(*func)(struct brw_compile *,
+					       struct brw_reg,
+					       struct brw_reg,
+					       struct brw_reg),
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       const struct brw_reg *arg0,
+	       const struct brw_reg *arg1)
+{
+   GLuint i;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 func(p, dst[i], arg0[i], arg1[i]);
+      }
+   }
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+
+void emit_mad(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MUL(p, dst[i], arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_ADD(p, dst[i], dst[i], arg2[i]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+void emit_lrp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2)
+{
+   GLuint i;
+
+   /* Uses dst as a temporary:
+    */
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 /* Can I use the LINE instruction for this? 
+	  */
+	 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
+	 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MAC(p, dst[i], arg0[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+void emit_sop(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      GLuint cond,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_push_insn_state(p);
+	 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 brw_MOV(p, dst[i], brw_imm_f(0));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	 brw_MOV(p, dst[i], brw_imm_f(1.0));
+	 brw_pop_insn_state(p);
+      }
+   }
+}
+
+static void emit_slt( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+}
+
+static void emit_sle( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
+}
+
+static void emit_sgt( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+}
+
+static void emit_sge( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+}
+
+static void emit_seq( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+}
+
+static void emit_sne( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+}
+
+void emit_cmp(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1,
+	      const struct brw_reg *arg2)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_SEL(p, dst[i], arg1[i], arg2[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+void emit_max(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_SEL(p, dst[i], arg0[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+void emit_min(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_SEL(p, dst[i], arg0[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+
+void emit_dp3(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1)
+{
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+   brw_set_saturate(p, 0);
+}
+
+
+void emit_dp4(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1)
+{
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+   brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
+   brw_set_saturate(p, 0);
+}
+
+
+void emit_dph(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1)
+{
+   const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+   brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
+   brw_set_saturate(p, 0);
+}
+
+
+void emit_xpd(struct brw_compile *p,
+	      const struct brw_reg *dst,
+	      GLuint mask,
+	      const struct brw_reg *arg0,
+	      const struct brw_reg *arg1)
+{
+   GLuint i;
+
+   assert((mask & WRITEMASK_W) != WRITEMASK_W);
+   
+   for (i = 0 ; i < 3; i++) {
+      if (mask & (1<<i)) {
+	 GLuint i2 = (i+2)%3;
+	 GLuint i1 = (i+1)%3;
+
+	 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+
+void emit_math1(struct brw_wm_compile *c,
+		GLuint function,
+		const struct brw_reg *dst,
+		GLuint mask,
+		const struct brw_reg *arg0)
+{
+   struct brw_compile *p = &c->func;
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   GLuint saturate = ((mask & SATURATE) ?
+		      BRW_MATH_SATURATE_SATURATE :
+		      BRW_MATH_SATURATE_NONE);
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   /* If compressed, this will write message reg 2,3 from arg0.x's 16
+    * channels.
+    */
+   brw_MOV(p, brw_message_reg(2), arg0[0]);
+
+   /* Send two messages to perform all 16 operations:
+    */
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p,
+	    dst[dst_chan],
+	    function,
+	    saturate,
+	    2,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+
+   if (c->dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p,
+	       offset(dst[dst_chan],1),
+	       function,
+	       saturate,
+	       3,
+	       brw_null_reg(),
+	       BRW_MATH_DATA_VECTOR,
+	       BRW_MATH_PRECISION_FULL);
+   }
+   brw_pop_insn_state(p);
+}
+
+
+void emit_math2(struct brw_wm_compile *c,
+		GLuint function,
+		const struct brw_reg *dst,
+		GLuint mask,
+		const struct brw_reg *arg0,
+		const struct brw_reg *arg1)
+{
+   struct brw_compile *p = &c->func;
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   GLuint saturate = ((mask & SATURATE) ?
+		      BRW_MATH_SATURATE_SATURATE :
+		      BRW_MATH_SATURATE_NONE);
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_push_insn_state(p);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p, brw_message_reg(2), arg0[0]);
+   if (c->dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+   }
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p, brw_message_reg(3), arg1[0]);
+   if (c->dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+   }
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, 
+	    dst[dst_chan],
+	    function,
+	    saturate,
+	    2,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+
+   /* Send two messages to perform all 16 operations:
+    */
+   if (c->dispatch_width == 16) {
+      brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+      brw_math(p,
+	       offset(dst[dst_chan],1),
+	       function,
+	       saturate,
+	       4,
+	       brw_null_reg(),
+	       BRW_MATH_DATA_VECTOR,
+	       BRW_MATH_PRECISION_FULL);
+   }
+   brw_pop_insn_state(p);
+}
+
+
+void emit_tex(struct brw_wm_compile *c,
+	      struct brw_reg *dst,
+	      GLuint dst_flags,
+	      struct brw_reg *arg,
+	      struct brw_reg depth_payload,
+	      GLuint tex_idx,
+	      GLuint sampler,
+	      GLboolean shadow)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+   struct brw_reg dst_retyped;
+   GLuint cur_mrf = 2, response_length;
+   GLuint i, nr_texcoords;
+   GLuint emit;
+   GLuint msg_type;
+   GLuint mrf_per_channel;
+   GLuint simd_mode;
+
+   if (c->dispatch_width == 16) {
+      mrf_per_channel = 2;
+      response_length = 8;
+      dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
+   } else {
+      mrf_per_channel = 1;
+      response_length = 4;
+      dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+      simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
+   }
+
+   /* How many input regs are there?
+    */
+   switch (tex_idx) {
+   case TEXTURE_1D_INDEX:
+      emit = WRITEMASK_X;
+      nr_texcoords = 1;
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      emit = WRITEMASK_XY;
+      nr_texcoords = 2;
+      break;
+   case TEXTURE_3D_INDEX:
+   case TEXTURE_CUBE_INDEX:
+      emit = WRITEMASK_XYZ;
+      nr_texcoords = 3;
+      break;
+   default:
+      /* unexpected target */
+      abort();
+   }
+
+   /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
+   if (intel->gen < 5 && c->dispatch_width == 8)
+      nr_texcoords = 3;
+
+   /* For shadow comparisons, we have to supply u,v,r. */
+   if (shadow)
+      nr_texcoords = 3;
+
+   /* Emit the texcoords. */
+   for (i = 0; i < nr_texcoords; i++) {
+      if (emit & (1<<i))
+	 brw_MOV(p, brw_message_reg(cur_mrf), arg[i]);
+      else
+	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+      cur_mrf += mrf_per_channel;
+   }
+
+   /* Fill in the shadow comparison reference value. */
+   if (shadow) {
+      if (intel->gen == 5) {
+	 /* Fill in the cube map array index value. */
+	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+	 cur_mrf += mrf_per_channel;
+      } else if (c->dispatch_width == 8) {
+	 /* Fill in the LOD bias value. */
+	 brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0));
+	 cur_mrf += mrf_per_channel;
+      }
+      brw_MOV(p, brw_message_reg(cur_mrf), arg[2]);
+      cur_mrf += mrf_per_channel;
+   }
+
+   if (intel->gen == 5) {
+      if (shadow)
+	 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
+      else
+	 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
+   } else {
+      /* Note that G45 and older determines shadow compare and dispatch width
+       * from message length for most messages.
+       */
+      if (c->dispatch_width == 16 && shadow)
+	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+      else
+	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+   }
+
+   brw_SAMPLE(p,
+	      dst_retyped,
+	      1,
+	      retype(depth_payload, BRW_REGISTER_TYPE_UW),
+              SURF_INDEX_TEXTURE(sampler),
+	      sampler,
+	      dst_flags & WRITEMASK_XYZW,
+	      msg_type,
+	      response_length,
+	      cur_mrf - 1,
+	      0,
+	      1,
+	      simd_mode);
+}
+
+
+void emit_txb(struct brw_wm_compile *c,
+	      struct brw_reg *dst,
+	      GLuint dst_flags,
+	      struct brw_reg *arg,
+	      struct brw_reg depth_payload,
+	      GLuint tex_idx,
+	      GLuint sampler)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &p->brw->intel;
+   GLuint msgLength;
+   GLuint msg_type;
+   GLuint mrf_per_channel;
+   GLuint response_length;
+   struct brw_reg dst_retyped;
+
+   /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased
+    * samples, so we'll use the 16-wide instruction, leave the second halves
+    * undefined, and trust the execution mask to keep the undefined pixels
+    * from mattering.
+    */
+   if (c->dispatch_width == 16 || intel->gen < 5) {
+      if (intel->gen == 5)
+	 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+      else
+	 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+      mrf_per_channel = 2;
+      dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW);
+      response_length = 8;
+   } else {
+      msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+      mrf_per_channel = 1;
+      dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW);
+      response_length = 4;
+   }
+
+   /* Shadow ignored for txb. */
+   switch (tex_idx) {
+   case TEXTURE_1D_INDEX:
+      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0));
+      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
+      break;
+   case TEXTURE_3D_INDEX:
+   case TEXTURE_CUBE_INDEX:
+      brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
+      brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
+      brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]);
+      break;
+   default:
+      /* unexpected target */
+      abort();
+   }
+
+   brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]);
+   msgLength = 2 + 4 * mrf_per_channel - 1;
+
+   brw_SAMPLE(p, 
+	      dst_retyped,
+	      1,
+	      retype(depth_payload, BRW_REGISTER_TYPE_UW),
+              SURF_INDEX_TEXTURE(sampler),
+	      sampler,
+	      dst_flags & WRITEMASK_XYZW,
+	      msg_type,
+	      response_length,
+	      msgLength,
+	      0,	
+	      1,
+	      BRW_SAMPLER_SIMD_MODE_SIMD16);	
+}
+
+
+static void emit_lit(struct brw_wm_compile *c,
+		     const struct brw_reg *dst,
+		     GLuint mask,
+		     const struct brw_reg *arg0)
+{
+   struct brw_compile *p = &c->func;
+
+   assert((mask & WRITEMASK_XW) == 0);
+
+   if (mask & WRITEMASK_Y) {
+      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+      brw_MOV(p, dst[1], arg0[0]);
+      brw_set_saturate(p, 0);
+   }
+
+   if (mask & WRITEMASK_Z) {
+      emit_math2(c, BRW_MATH_FUNCTION_POW,
+		 &dst[2],
+		 WRITEMASK_X | (mask & SATURATE),
+		 &arg0[1],
+		 &arg0[3]);
+   }
+
+   /* Ordinarily you'd use an iff statement to skip or shortcircuit
+    * some of the POW calculations above, but 16-wide iff statements
+    * seem to lock c1 hardware, so this is a nasty workaround:
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
+   {
+      if (mask & WRITEMASK_Y) 
+	 brw_MOV(p, dst[1], brw_imm_f(0));
+
+      if (mask & WRITEMASK_Z) 
+	 brw_MOV(p, dst[2], brw_imm_f(0)); 
+   }
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+/* Kill pixel - set execution mask to zero for those pixels which
+ * fail.
+ */
+static void emit_kil( struct brw_wm_compile *c,
+		      struct brw_reg *arg0)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   GLuint i, j;
+
+   for (i = 0; i < 4; i++) {
+      /* Check if we've already done the comparison for this reg
+       * -- common when someone does KIL TEMP.wwww.
+       */
+      for (j = 0; j < i; j++) {
+	 if (memcmp(&arg0[j], &arg0[i], sizeof(arg0[0])) == 0)
+	    break;
+      }
+      if (j != i)
+	 continue;
+
+      brw_push_insn_state(p);
+      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
+      brw_set_predicate_control_flag_value(p, 0xff);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+      brw_pop_insn_state(p);
+   }
+}
+
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
+   brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+   brw_pop_insn_state(p);
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+			   GLuint base_reg,
+			   GLuint nr,
+			   GLuint target,
+			   GLuint eot )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg dst;
+
+   if (c->dispatch_width == 16)
+      dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+   else
+      dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+
+   /* Pass through control information:
+    */
+/*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, 
+	       brw_message_reg(base_reg + 1),
+	       brw_vec8_grf(1, 0));
+      brw_pop_insn_state(p);
+   }
+
+   /* Send framebuffer write message: */
+/*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
+   brw_fb_WRITE(p,
+		dst,
+		base_reg,
+		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+		target,		
+		nr,
+		0, 
+		eot);
+}
+
+
+static void emit_aa( struct brw_wm_compile *c,
+		     struct brw_reg *arg1,
+		     GLuint reg )
+{
+   struct brw_compile *p = &c->func;
+   GLuint comp = c->key.aa_dest_stencil_reg / 2;
+   GLuint off = c->key.aa_dest_stencil_reg % 2;
+   struct brw_reg aa = offset(arg1[comp], off);
+
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
+   brw_MOV(p, brw_message_reg(reg), aa);
+   brw_pop_insn_state(p);
+}
+
+
+/* Post-fragment-program processing.  Send the results to the
+ * framebuffer.
+ * \param arg0  the fragment color
+ * \param arg1  the pass-through depth value
+ * \param arg2  the shader-computed depth value
+ */
+void emit_fb_write(struct brw_wm_compile *c,
+		   struct brw_reg *arg0,
+		   struct brw_reg *arg1,
+		   struct brw_reg *arg2,
+		   GLuint target,
+		   GLuint eot)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   GLuint nr = 2;
+   GLuint channel;
+
+   /* Reserve a space for AA - may not be needed:
+    */
+   if (c->key.aa_dest_stencil_reg)
+      nr += 1;
+
+   /* I don't really understand how this achieves the color interleave
+    * (ie RGBARGBA) in the result:  [Do the saturation here]
+    */
+   brw_push_insn_state(p);
+
+   for (channel = 0; channel < 4; channel++) {
+      if (c->dispatch_width == 16 && brw->has_compr4) {
+	 /* By setting the high bit of the MRF register number, we indicate
+	  * that we want COMPR4 mode - instead of doing the usual destination
+	  * + 1 for the second half we get destination + 4.
+	  */
+	 brw_MOV(p,
+		 brw_message_reg(nr + channel + (1 << 7)),
+		 arg0[channel]);
+      } else {
+	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+	 brw_MOV(p,
+		 brw_message_reg(nr + channel),
+		 arg0[channel]);
+
+	 if (c->dispatch_width == 16) {
+	    brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+	    brw_MOV(p,
+		    brw_message_reg(nr + channel + 4),
+		    sechalf(arg0[channel]));
+	 }
+      }
+   }
+   /* skip over the regs populated above:
+    */
+   nr += 8;
+   brw_pop_insn_state(p);
+
+   if (c->key.source_depth_to_render_target)
+   {
+      if (c->key.computes_depth) 
+	 brw_MOV(p, brw_message_reg(nr), arg2[2]);
+      else 
+	 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
+
+      nr += 2;
+   }
+
+   if (c->key.dest_depth_reg)
+   {
+      GLuint comp = c->key.dest_depth_reg / 2;
+      GLuint off = c->key.dest_depth_reg % 2;
+
+      if (off != 0) {
+         brw_push_insn_state(p);
+         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+         brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+         /* 2nd half? */
+         brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+         brw_pop_insn_state(p);
+      }
+      else {
+         brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+      }
+      nr += 2;
+   }
+
+   if (!c->key.runtime_check_aads_emit) {
+      if (c->key.aa_dest_stencil_reg)
+	 emit_aa(c, arg1, 2);
+
+      fire_fb_write(c, 0, nr, target, eot);
+   }
+   else {
+      struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+      struct brw_reg ip = brw_ip_reg();
+      struct brw_instruction *jmp;
+      
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+      brw_AND(p, 
+	      v1_null_ud, 
+	      get_element_ud(brw_vec8_grf(1,0), 6), 
+	      brw_imm_ud(1<<26)); 
+
+      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+      {
+	 emit_aa(c, arg1, 2);
+	 fire_fb_write(c, 0, nr, target, eot);
+	 /* note - thread killed in subroutine */
+      }
+      brw_land_fwd_jump(p, jmp);
+
+      /* ELSE: Shuffle up one register to fill in the hole left for AA:
+       */
+      fire_fb_write(c, 1, nr-1, target, eot);
+   }
+}
+
+/**
+ * Move a GPR to scratch memory. 
+ */
+static void emit_spill( struct brw_wm_compile *c,
+			struct brw_reg reg,
+			GLuint slot )
+{
+   struct brw_compile *p = &c->func;
+
+   /*
+     mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
+   */
+   brw_MOV(p, brw_message_reg(2), reg);
+
+   /*
+     mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
+     send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
+   */
+   brw_dp_WRITE_16(p, 
+		   retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
+		   slot);
+}
+
+
+/**
+ * Load a GPR from scratch memory. 
+ */
+static void emit_unspill( struct brw_wm_compile *c,
+			  struct brw_reg reg,
+			  GLuint slot )
+{
+   struct brw_compile *p = &c->func;
+
+   /* Slot 0 is the undef value.
+    */
+   if (slot == 0) {
+      brw_MOV(p, reg, brw_imm_f(0));
+      return;
+   }
+
+   /*
+     mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
+     send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
+   */
+
+   brw_dp_READ_16(p,
+		  retype(vec16(reg), BRW_REGISTER_TYPE_UW),
+		  slot);
+}
+
+
+/**
+ * Retrieve up to 4 GEN4 register pairs for the given wm reg:
+ * Args with unspill_reg != 0 will be loaded from scratch memory.
+ */
+static void get_argument_regs( struct brw_wm_compile *c,
+			       struct brw_wm_ref *arg[],
+			       struct brw_reg *regs )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (arg[i]) {
+	 if (arg[i]->unspill_reg)
+	    emit_unspill(c,
+			 brw_vec8_grf(arg[i]->unspill_reg, 0),
+			 arg[i]->value->spill_slot);
+
+	 regs[i] = arg[i]->hw_reg;
+      }
+      else {
+	 regs[i] = brw_null_reg();
+      }
+   }
+}
+
+
+/**
+ * For values that have a spill_slot!=0, write those regs to scratch memory.
+ */
+static void spill_values( struct brw_wm_compile *c,
+			  struct brw_wm_value *values,
+			  GLuint nr )
+{
+   GLuint i;
+
+   for (i = 0; i < nr; i++)
+      if (values[i].spill_slot) 
+	 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
+}
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_wm_emit( struct brw_wm_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint insn;
+
+   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+   /* Check if any of the payload regs need to be spilled:
+    */
+   spill_values(c, c->payload.depth, 4);
+   spill_values(c, c->creg, c->nr_creg);
+   spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
+   
+
+   for (insn = 0; insn < c->nr_insns; insn++) {
+
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+      struct brw_reg args[3][4], dst[4];
+      GLuint i, dst_flags;
+      
+      /* Get argument regs:
+       */
+      for (i = 0; i < 3; i++) 
+	 get_argument_regs(c, inst->src[i], args[i]);
+
+      /* Get dest regs:
+       */
+      for (i = 0; i < 4; i++)
+	 if (inst->dst[i])
+	    dst[i] = inst->dst[i]->hw_reg;
+	 else
+	    dst[i] = brw_null_reg();
+      
+      /* Flags
+       */
+      dst_flags = inst->writemask;
+      if (inst->saturate) 
+	 dst_flags |= SATURATE;
+
+      switch (inst->opcode) {
+	 /* Generated instructions for calculating triangle interpolants:
+	  */
+      case WM_PIXELXY:
+	 emit_pixel_xy(c, dst, dst_flags);
+	 break;
+
+      case WM_DELTAXY:
+	 emit_delta_xy(p, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_WPOSXY:
+	 emit_wpos_xy(c, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_PIXELW:
+	 emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case WM_LINTERP:
+	 emit_linterp(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case WM_PINTERP:
+	 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case WM_CINTERP:
+	 emit_cinterp(p, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_FB_WRITE:
+	 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
+	 break;
+
+      case WM_FRONTFACING:
+	 emit_frontfacing(p, dst, dst_flags);
+	 break;
+
+	 /* Straightforward arithmetic:
+	  */
+      case OPCODE_ADD:
+	 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_FRC:
+	 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_FLR:
+	 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_DDX:
+	 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+	 break;
+
+      case OPCODE_DDY:
+	 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+	 break;
+
+      case OPCODE_DP3:
+	 emit_dp3(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_DP4:
+	 emit_dp4(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_DPH:
+	 emit_dph(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_TRUNC:
+	 emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_LRP:
+	 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MAD:	
+	 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MOV:
+      case OPCODE_SWZ:
+	 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_MUL:
+	 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_XPD:
+	 emit_xpd(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+	 /* Higher math functions:
+	  */
+      case OPCODE_RCP:
+	 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_RSQ:
+	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_SIN:
+	 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_COS:
+	 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_EX2:
+	 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_LG2:
+	 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_SCS:
+	 /* There is an scs math function, but it would need some
+	  * fixup for 16-element execution.
+	  */
+	 if (dst_flags & WRITEMASK_X)
+	    emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+	 if (dst_flags & WRITEMASK_Y)
+	    emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+	 break;
+
+      case OPCODE_POW:
+	 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+	 break;
+
+	 /* Comparisons:
+	  */
+      case OPCODE_CMP:
+	 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MAX:
+	 emit_max(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_MIN:
+	 emit_min(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_SLT:
+	 emit_slt(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_SLE:
+	 emit_sle(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SGT:
+	 emit_sgt(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SGE:
+	 emit_sge(p, dst, dst_flags, args[0], args[1]);
+	 break;
+      case OPCODE_SEQ:
+	 emit_seq(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SNE:
+	 emit_sne(p, dst, dst_flags, args[0], args[1]);
+	break;
+
+      case OPCODE_LIT:
+	 emit_lit(c, dst, dst_flags, args[0]);
+	 break;
+
+	 /* Texturing operations:
+	  */
+      case OPCODE_TEX:
+	 emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+		  inst->tex_idx, inst->tex_unit,
+		  inst->tex_shadow);
+	 break;
+
+      case OPCODE_TXB:
+	 emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg,
+		  inst->tex_idx, inst->tex_unit);
+	 break;
+
+      case OPCODE_KIL:
+	 emit_kil(c, args[0]);
+	 break;
+
+      case OPCODE_KIL_NV:
+	 emit_kil_nv(c);
+	 break;
+
+      default:
+	 printf("Unsupported opcode %i (%s) in fragment shader\n",
+		inst->opcode, inst->opcode < MAX_OPCODE ?
+		_mesa_opcode_string(inst->opcode) :
+		"unknown");
+      }
+      
+      for (i = 0; i < 4; i++)
+	if (inst->dst[i] && inst->dst[i]->spill_slot) 
+	   emit_spill(c, 
+		      inst->dst[i]->hw_reg, 
+		      inst->dst[i]->spill_slot);
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      int i;
+
+      printf("wm-native:\n");
+      for (i = 0; i < p->nr_insn; i++)
+	 brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
+      printf("\n");
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
new file mode 100644
index 0000000000..d73c391582
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -0,0 +1,1176 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_util.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+
+
+/** An invalid texture target */
+#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
+
+/** An invalid texture unit */
+#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
+
+#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
+
+#define X    0
+#define Y    1
+#define Z    2
+#define W    3
+
+
+static const char *wm_opcode_strings[] = {   
+   "PIXELXY",
+   "DELTAXY",
+   "PIXELW",
+   "LINTERP",
+   "PINTERP",
+   "CINTERP",
+   "WPOSXY",
+   "FB_WRITE",
+   "FRONTFACING",
+};
+
+#if 0
+static const char *wm_file_strings[] = {   
+   "PAYLOAD"
+};
+#endif
+
+
+/***********************************************************************
+ * Source regs
+ */
+
+static struct prog_src_register src_reg(GLuint file, GLuint idx)
+{
+   struct prog_src_register reg;
+   reg.File = file;
+   reg.Index = idx;
+   reg.Swizzle = SWIZZLE_NOOP;
+   reg.RelAddr = 0;
+   reg.Negate = NEGATE_NONE;
+   reg.Abs = 0;
+   return reg;
+}
+
+static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
+{
+   return src_reg(dst.File, dst.Index);
+}
+
+static struct prog_src_register src_undef( void )
+{
+   return src_reg(PROGRAM_UNDEFINED, 0);
+}
+
+static GLboolean src_is_undef(struct prog_src_register src)
+{
+   return src.File == PROGRAM_UNDEFINED;
+}
+
+static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
+{
+   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
+   return reg;
+}
+
+static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
+{
+   return src_swizzle(reg, x, x, x, x);
+}
+
+static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
+{
+   reg.Swizzle = swizzle;
+   return reg;
+}
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
+{
+   struct prog_dst_register reg;
+   reg.File = file;
+   reg.Index = idx;
+   reg.WriteMask = WRITEMASK_XYZW;
+   reg.RelAddr = 0;
+   reg.CondMask = COND_TR;
+   reg.CondSwizzle = 0;
+   reg.CondSrc = 0;
+   return reg;
+}
+
+static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
+{
+   reg.WriteMask &= mask;
+   return reg;
+}
+
+static struct prog_dst_register dst_undef( void )
+{
+   return dst_reg(PROGRAM_UNDEFINED, 0);
+}
+
+
+
+static struct prog_dst_register get_temp( struct brw_wm_compile *c )
+{
+   int bit = _mesa_ffs( ~c->fp_temp );
+
+   if (!bit) {
+      printf("%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   c->fp_temp |= 1<<(bit-1);
+   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
+{
+   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
+}
+
+
+/***********************************************************************
+ * Instructions 
+ */
+
+static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+   assert(c->nr_fp_insns < BRW_WM_MAX_INSN);
+   memset(&c->prog_instructions[c->nr_fp_insns], 0,
+	  sizeof(*c->prog_instructions));
+   return &c->prog_instructions[c->nr_fp_insns++];
+}
+
+static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
+					const struct prog_instruction *inst0)
+{
+   struct prog_instruction *inst = get_fp_inst(c);
+   *inst = *inst0;
+   return inst;
+}
+
+static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
+				       GLuint op,
+				       struct prog_dst_register dest,
+				       GLuint saturate,
+				       GLuint tex_src_unit,
+				       GLuint tex_src_target,
+				       GLuint tex_shadow,
+				       struct prog_src_register src0,
+				       struct prog_src_register src1,
+				       struct prog_src_register src2 )
+{
+   struct prog_instruction *inst = get_fp_inst(c);
+      
+   assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
+          tex_src_unit == TEX_UNIT_NONE);
+   assert(tex_src_target < NUM_TEXTURE_TARGETS ||
+          tex_src_target == TEX_TARGET_NONE);
+
+   /* update mask of which texture units are referenced by this program */
+   if (tex_src_unit != TEX_UNIT_NONE)
+      c->fp->tex_units_used |= (1 << tex_src_unit);
+
+   memset(inst, 0, sizeof(*inst));
+
+   inst->Opcode = op;
+   inst->DstReg = dest;
+   inst->SaturateMode = saturate;   
+   inst->TexSrcUnit = tex_src_unit;
+   inst->TexSrcTarget = tex_src_target;
+   inst->TexShadow = tex_shadow;
+   inst->SrcReg[0] = src0;
+   inst->SrcReg[1] = src1;
+   inst->SrcReg[2] = src2;
+   return inst;
+}
+   
+
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+				       GLuint op,
+				       struct prog_dst_register dest,
+				       GLuint saturate,
+				       struct prog_src_register src0,
+				       struct prog_src_register src1,
+				       struct prog_src_register src2 )
+{
+   return emit_tex_op(c, op, dest, saturate,
+                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
+                      src0, src1, src2);
+}
+
+
+/* Many Mesa opcodes produce the same value across all the result channels.
+ * We'd rather not have to support that splatting in the opcode implementations,
+ * and brw_wm_pass*.c wants to optimize them out by shuffling references around
+ * anyway.  We can easily get both by emitting the opcode to one channel, and
+ * then MOVing it to the others, which brw_wm_pass*.c already understands.
+ */
+static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
+						 const struct prog_instruction *inst0)
+{
+   struct prog_instruction *inst;
+   unsigned int dst_chan;
+   unsigned int other_channel_mask;
+
+   if (inst0->DstReg.WriteMask == 0)
+      return NULL;
+
+   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
+   inst = get_fp_inst(c);
+   *inst = *inst0;
+   inst->DstReg.WriteMask = 1 << dst_chan;
+
+   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
+   if (other_channel_mask != 0) {
+      inst = emit_op(c,
+		     OPCODE_MOV,
+		     dst_mask(inst0->DstReg, other_channel_mask),
+		     0,
+		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
+		     src_undef(),
+		     src_undef());
+   }
+   return inst;
+}
+
+
+/***********************************************************************
+ * Special instructions for interpolation and other tasks
+ */
+
+static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->pixel_xy)) {
+      struct prog_dst_register pixel_xy = get_temp(c);
+      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      
+      
+      /* Emit the out calculations, and hold onto the results.  Use
+       * two instructions as a temporary is required.
+       */   
+      /* pixel_xy.xy = PIXELXY payload[0];
+       */
+      emit_op(c,
+	      WM_PIXELXY,
+	      dst_mask(pixel_xy, WRITEMASK_XY),
+	      0,
+	      payload_r0_depth,
+	      src_undef(),
+	      src_undef());
+
+      c->pixel_xy = src_reg_from_dst(pixel_xy);
+   }
+
+   return c->pixel_xy;
+}
+
+static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->delta_xy)) {
+      struct prog_dst_register delta_xy = get_temp(c);
+      struct prog_src_register pixel_xy = get_pixel_xy(c);
+      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      
+      /* deltas.xy = DELTAXY pixel_xy, payload[0]
+       */
+      emit_op(c,
+	      WM_DELTAXY,
+	      dst_mask(delta_xy, WRITEMASK_XY),
+	      0,
+	      pixel_xy, 
+	      payload_r0_depth,
+	      src_undef());
+      
+      c->delta_xy = src_reg_from_dst(delta_xy);
+   }
+
+   return c->delta_xy;
+}
+
+static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->pixel_w)) {
+      struct prog_dst_register pixel_w = get_temp(c);
+      struct prog_src_register deltas = get_delta_xy(c);
+      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+
+      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
+       */
+      emit_op(c,
+	      WM_PIXELW,
+	      dst_mask(pixel_w, WRITEMASK_W),
+	      0,
+	      interp_wpos,
+	      deltas, 
+	      src_undef());
+      
+
+      c->pixel_w = src_reg_from_dst(pixel_w);
+   }
+
+   return c->pixel_w;
+}
+
+static void emit_interp( struct brw_wm_compile *c,
+			 GLuint idx )
+{
+   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+   struct prog_src_register deltas = get_delta_xy(c);
+
+   /* Need to use PINTERP on attributes which have been
+    * multiplied by 1/W in the SF program, and LINTERP on those
+    * which have not:
+    */
+   switch (idx) {
+   case FRAG_ATTRIB_WPOS:
+      /* Have to treat wpos.xy specially:
+       */
+      emit_op(c,
+	      WM_WPOSXY,
+	      dst_mask(dst, WRITEMASK_XY),
+	      0,
+	      get_pixel_xy(c),
+	      src_undef(),
+	      src_undef());
+      
+      dst = dst_mask(dst, WRITEMASK_ZW);
+
+      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+       */
+      emit_op(c,
+	      WM_LINTERP,
+	      dst,
+	      0,
+	      interp,
+	      deltas,
+	      src_undef());
+      break;
+   case FRAG_ATTRIB_COL0:
+   case FRAG_ATTRIB_COL1:
+      if (c->key.flat_shade) {
+	 emit_op(c,
+		 WM_CINTERP,
+		 dst,
+		 0,
+		 interp,
+		 src_undef(),
+		 src_undef());
+      }
+      else {
+         if (c->key.linear_color) {
+            emit_op(c,
+                    WM_LINTERP,
+                    dst,
+                    0,
+                    interp,
+                    deltas,
+                    src_undef());
+         }
+         else {
+            /* perspective-corrected color interpolation */
+            emit_op(c,
+                    WM_PINTERP,
+                    dst,
+                    0,
+                    interp,
+                    deltas,
+                    get_pixel_w(c));
+         }
+      }
+      break;
+   case FRAG_ATTRIB_FOGC:
+      /* Interpolate the fog coordinate */
+      emit_op(c,
+	      WM_PINTERP,
+	      dst_mask(dst, WRITEMASK_X),
+	      0,
+	      interp,
+	      deltas,
+	      get_pixel_w(c));
+
+      emit_op(c,
+	      OPCODE_MOV,
+	      dst_mask(dst, WRITEMASK_YZW),
+	      0,
+	      src_swizzle(interp,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ONE),
+	      src_undef(),
+	      src_undef());
+      break;
+
+   case FRAG_ATTRIB_FACE:
+      emit_op(c,
+              WM_FRONTFACING,
+              dst_mask(dst, WRITEMASK_X),
+              0,
+              src_undef(),
+              src_undef(),
+              src_undef());
+      break;
+
+   case FRAG_ATTRIB_PNTC:
+      /* XXX review/test this case */
+      emit_op(c,
+	      WM_PINTERP,
+	      dst_mask(dst, WRITEMASK_XY),
+	      0,
+	      interp,
+	      deltas,
+	      get_pixel_w(c));
+
+      emit_op(c,
+	      OPCODE_MOV,
+	      dst_mask(dst, WRITEMASK_ZW),
+	      0,
+	      src_swizzle(interp,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ONE),
+	      src_undef(),
+	      src_undef());
+      break;
+
+   default:
+      emit_op(c,
+	      WM_PINTERP,
+	      dst,
+	      0,
+	      interp,
+	      deltas,
+	      get_pixel_w(c));
+      break;
+   }
+
+   c->fp_interp_emitted |= 1<<idx;
+}
+
+/***********************************************************************
+ * Hacks to extend the program parameter and constant lists.
+ */
+
+/* Add the fog parameters to the parameter list of the original
+ * program, rather than creating a new list.  Doesn't really do any
+ * harm and it's not as if the parameter handling isn't a big hack
+ * anyway.
+ */
+static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 
+                                                     GLint s0,
+                                                     GLint s1,
+                                                     GLint s2,
+                                                     GLint s3,
+                                                     GLint s4)
+{
+   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+   gl_state_index tokens[STATE_LENGTH];
+   GLuint idx;
+   tokens[0] = s0;
+   tokens[1] = s1;
+   tokens[2] = s2;
+   tokens[3] = s3;
+   tokens[4] = s4;
+   
+   for (idx = 0; idx < paramList->NumParameters; idx++) {
+      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
+	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
+	 return src_reg(PROGRAM_STATE_VAR, idx);
+   }
+
+   idx = _mesa_add_state_reference( paramList, tokens );
+
+   return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 
+						     GLfloat s0,
+						     GLfloat s1,
+						     GLfloat s2,
+						     GLfloat s3)
+{
+   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+   GLfloat values[4];
+   GLuint idx;
+   GLuint swizzle;
+
+   values[0] = s0;
+   values[1] = s1;
+   values[2] = s2;
+   values[3] = s3;
+
+   /* Have to search, otherwise multiple compilations will each grow
+    * the parameter list.
+    */
+   for (idx = 0; idx < paramList->NumParameters; idx++) {
+      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
+	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
+
+	 /* XXX: this mimics the mesa bug which puts all constants and
+	  * parameters into the "PROGRAM_STATE_VAR" category:
+	  */
+	 return src_reg(PROGRAM_STATE_VAR, idx);
+   }
+   
+   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
+   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
+   return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+
+/***********************************************************************
+ * Expand various instructions here to simpler forms.  
+ */
+static void precalc_dst( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+   struct prog_src_register src1 = inst->SrcReg[1];
+   struct prog_dst_register dst = inst->DstReg;
+   
+   if (dst.WriteMask & WRITEMASK_Y) {      
+      /* dst.y = mul src0.y, src1.y
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(dst, WRITEMASK_Y),
+	      inst->SaturateMode,
+	      src0,
+	      src1,
+	      src_undef());
+   }
+
+   if (dst.WriteMask & WRITEMASK_XZ) {
+      struct prog_instruction *swz;
+      GLuint z = GET_SWZ(src0.Swizzle, Z);
+
+      /* dst.xz = swz src0.1zzz
+       */
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XZ),
+		    inst->SaturateMode,
+		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].Negate &= ~NEGATE_X;
+   }
+   if (dst.WriteMask & WRITEMASK_W) {
+      /* dst.w = mov src1.w
+       */
+      emit_op(c,
+	      OPCODE_MOV,
+	      dst_mask(dst, WRITEMASK_W),
+	      inst->SaturateMode,
+	      src1,
+	      src_undef(),
+	      src_undef());
+   }
+}
+
+
+static void precalc_lit( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+   struct prog_dst_register dst = inst->DstReg;
+   
+   if (dst.WriteMask & WRITEMASK_XW) {
+      struct prog_instruction *swz;
+
+      /* dst.xw = swz src0.1111
+       */
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XW),
+		    0,
+		    src_swizzle1(src0, SWIZZLE_ONE),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting the negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].Negate = NEGATE_NONE;
+   }
+
+   if (dst.WriteMask & WRITEMASK_YZ) {
+      emit_op(c,
+	      OPCODE_LIT,
+	      dst_mask(dst, WRITEMASK_YZ),
+	      inst->SaturateMode,
+	      src0,
+	      src_undef(),
+	      src_undef());
+   }
+}
+
+
+/**
+ * Some TEX instructions require extra code, cube map coordinate
+ * normalization, or coordinate scaling for RECT textures, etc.
+ * This function emits those extra instructions and the TEX
+ * instruction itself.
+ */
+static void precalc_tex( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+   struct prog_src_register coord;
+   struct prog_dst_register tmpcoord;
+   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+
+   assert(unit < BRW_MAX_TEX_UNIT);
+
+   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
+       struct prog_instruction *out;
+       struct prog_dst_register tmp0 = get_temp(c);
+       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
+       struct prog_dst_register tmp1 = get_temp(c);
+       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
+       struct prog_src_register src0 = inst->SrcReg[0];
+
+       /* find longest component of coord vector and normalize it */
+       tmpcoord = get_temp(c);
+       coord = src_reg_from_dst(tmpcoord);
+
+       /* tmpcoord = src0 (i.e.: coord = src0) */
+       out = emit_op(c, OPCODE_MOV,
+                     tmpcoord,
+                     0,
+                     src0,
+                     src_undef(),
+                     src_undef());
+       out->SrcReg[0].Negate = NEGATE_NONE;
+       out->SrcReg[0].Abs = 1;
+
+       /* tmp0 = MAX(coord.X, coord.Y) */
+       emit_op(c, OPCODE_MAX,
+               tmp0,
+               0,
+               src_swizzle1(coord, X),
+               src_swizzle1(coord, Y),
+               src_undef());
+
+       /* tmp1 = MAX(tmp0, coord.Z) */
+       emit_op(c, OPCODE_MAX,
+               tmp1,
+               0,
+               tmp0src,
+               src_swizzle1(coord, Z),
+               src_undef());
+
+       /* tmp0 = 1 / tmp1 */
+       emit_op(c, OPCODE_RCP,
+               dst_mask(tmp0, WRITEMASK_X),
+               0,
+               tmp1src,
+               src_undef(),
+               src_undef());
+
+       /* tmpCoord = src0 * tmp0 */
+       emit_op(c, OPCODE_MUL,
+               tmpcoord,
+               0,
+               src0,
+               src_swizzle1(tmp0src, SWIZZLE_X),
+               src_undef());
+
+       release_temp(c, tmp0);
+       release_temp(c, tmp1);
+   }
+   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+      struct prog_src_register scale = 
+	 search_or_add_param5( c, 
+			       STATE_INTERNAL, 
+			       STATE_TEXRECT_SCALE,
+			       unit,
+			       0,0 );
+
+      tmpcoord = get_temp(c);
+
+      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      tmpcoord,
+	      0,
+	      inst->SrcReg[0],
+	      src_swizzle(scale,
+			  SWIZZLE_X,
+			  SWIZZLE_Y,
+			  SWIZZLE_ONE,
+			  SWIZZLE_ONE),
+	      src_undef());
+
+      coord = src_reg_from_dst(tmpcoord);
+   }
+   else {
+      coord = inst->SrcReg[0];
+   }
+
+   /* Need to emit YUV texture conversions by hand.  Probably need to
+    * do this here - the alternative is in brw_wm_emit.c, but the
+    * conversion requires allocating a temporary variable which we
+    * don't have the facility to do that late in the compilation.
+    */
+   if (c->key.yuvtex_mask & (1 << unit)) {
+      /* convert ycbcr to RGBA */
+      GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
+
+      /* 
+	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
+	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
+	 UYV     = TEX ...
+	 UYV.xyz = ADD UYV,     C0
+	 UYV.y   = MUL UYV.y,   C0.w
+ 	 if (UV swaped)
+	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
+	 else
+	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y 
+	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
+      */
+      struct prog_dst_register dst = inst->DstReg;
+      struct prog_dst_register tmp = get_temp(c);
+      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
+      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
+      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+     
+      /* tmp     = TEX ...
+       */
+      emit_tex_op(c, 
+                  OPCODE_TEX,
+                  tmp,
+                  inst->SaturateMode,
+                  unit,
+                  inst->TexSrcTarget,
+                  inst->TexShadow,
+                  coord,
+                  src_undef(),
+                  src_undef());
+
+      /* tmp.xyz =  ADD TMP, C0
+       */
+      emit_op(c,
+	      OPCODE_ADD,
+	      dst_mask(tmp, WRITEMASK_XYZ),
+	      0,
+	      tmpsrc,
+	      C0,
+	      src_undef());
+
+      /* YUV.y   = MUL YUV.y, C0.w
+       */
+
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(tmp, WRITEMASK_Y),
+	      0,
+	      tmpsrc,
+	      src_swizzle1(C0, W),
+	      src_undef());
+
+      /* 
+       * if (UV swaped)
+       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
+       * else
+       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
+       */
+
+      emit_op(c,
+	      OPCODE_MAD,
+	      dst_mask(dst, WRITEMASK_XYZ),
+	      0,
+	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
+	      C1,
+	      src_swizzle1(tmpsrc, Y));
+
+      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
+       */
+      emit_op(c,
+	      OPCODE_MAD,
+	      dst_mask(dst, WRITEMASK_Y),
+	      0,
+	      src_swizzle1(tmpsrc, Z),
+	      src_swizzle1(C1, W),
+	      src_swizzle1(src_reg_from_dst(dst), Y));
+
+      release_temp(c, tmp);
+   }
+   else {
+      /* ordinary RGBA tex instruction */
+      emit_tex_op(c, 
+                  OPCODE_TEX,
+                  inst->DstReg,
+                  inst->SaturateMode,
+                  unit,
+                  inst->TexSrcTarget,
+                  inst->TexShadow,
+                  coord,
+                  src_undef(),
+                  src_undef());
+   }
+
+   /* For GL_EXT_texture_swizzle: */
+   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
+      /* swizzle the result of the TEX instruction */
+      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
+      emit_op(c, OPCODE_SWZ,
+              inst->DstReg,
+              SATURATE_OFF, /* saturate already done above */
+              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
+              src_undef(),
+              src_undef());
+   }
+
+   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
+       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
+      release_temp(c, tmpcoord);
+}
+
+
+/**
+ * Check if the given TXP instruction really needs the divide-by-W step.
+ */
+static GLboolean projtex( struct brw_wm_compile *c,
+			  const struct prog_instruction *inst )
+{
+   const struct prog_src_register src = inst->SrcReg[0];
+   GLboolean retVal;
+
+   assert(inst->Opcode == OPCODE_TXP);
+
+   /* Only try to detect the simplest cases.  Could detect (later)
+    * cases where we are trying to emit code like RCP {1.0}, MUL x,
+    * {1.0}, and so on.
+    *
+    * More complex cases than this typically only arise from
+    * user-provided fragment programs anyway:
+    */
+   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
+      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
+   else if (src.File == PROGRAM_INPUT && 
+	    GET_SWZ(src.Swizzle, W) == W &&
+            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
+      retVal = GL_FALSE;
+   else
+      retVal = GL_TRUE;
+
+   return retVal;
+}
+
+
+/**
+ * Emit code for TXP.
+ */
+static void precalc_txp( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+
+   if (projtex(c, inst)) {
+      struct prog_dst_register tmp = get_temp(c);
+      struct prog_instruction tmp_inst;
+
+      /* tmp0.w = RCP inst.arg[0][3]
+       */
+      emit_op(c,
+	      OPCODE_RCP,
+	      dst_mask(tmp, WRITEMASK_W),
+	      0,
+	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
+	      src_undef(),
+	      src_undef());
+
+      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(tmp, WRITEMASK_XYZ),
+	      0,
+	      src0,
+	      src_swizzle1(src_reg_from_dst(tmp), W),
+	      src_undef());
+
+      /* dst = precalc(TEX tmp0)
+       */
+      tmp_inst = *inst;
+      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
+      precalc_tex(c, &tmp_inst);
+
+      release_temp(c, tmp);
+   }
+   else
+   {
+      /* dst = precalc(TEX src0)
+       */
+      precalc_tex(c, inst);
+   }
+}
+
+
+
+static void emit_render_target_writes( struct brw_wm_compile *c )
+{
+   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
+   struct prog_src_register outcolor;
+   GLuint i;
+
+   struct prog_instruction *inst, *last_inst;
+
+   /* The inst->Aux field is used for FB write target and the EOT marker */
+
+   if (c->key.nr_color_regions > 1) {
+      for (i = 0 ; i < c->key.nr_color_regions; i++) {
+         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
+         last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
+                                    0, outcolor, payload_r0_depth, outdepth);
+         inst->Aux = INST_AUX_TARGET(i);
+         if (c->fp_fragcolor_emitted) {
+            outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+            last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0),
+                                       0, outcolor, payload_r0_depth, outdepth);
+            inst->Aux = INST_AUX_TARGET(i);
+         }
+      }
+      last_inst->Aux |= INST_AUX_EOT;
+   }
+   else {
+      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
+      if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0))
+         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
+      else 
+         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+
+      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+                     0, outcolor, payload_r0_depth, outdepth);
+      inst->Aux = INST_AUX_EOT | INST_AUX_TARGET(0);
+   }
+}
+
+
+
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
+static void validate_src_regs( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
+   GLuint i;
+
+   for (i = 0; i < nr_args; i++) {
+      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
+	 GLuint idx = inst->SrcReg[i].Index;
+	 if (!(c->fp_interp_emitted & (1<<idx))) {
+	    emit_interp(c, idx);
+	 }
+      }
+   }
+}
+	 
+static void validate_dst_regs( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   if (inst->DstReg.File == PROGRAM_OUTPUT) {
+      GLuint idx = inst->DstReg.Index;
+      if (idx == FRAG_RESULT_COLOR)
+         c->fp_fragcolor_emitted = 1;
+   }
+}
+
+static void print_insns( const struct prog_instruction *insn,
+			 GLuint nr )
+{
+   GLuint i;
+   for (i = 0; i < nr; i++, insn++) {
+      printf("%3d: ", i);
+      if (insn->Opcode < MAX_OPCODE)
+	 _mesa_print_instruction(insn);
+      else if (insn->Opcode < MAX_WM_OPCODE) {
+	 GLuint idx = insn->Opcode - MAX_OPCODE;
+
+	 _mesa_print_alu_instruction(insn,
+				     wm_opcode_strings[idx],
+				     3);
+      }
+      else 
+	 printf("965 Opcode %d\n", insn->Opcode);
+   }
+}
+
+
+/**
+ * Initial pass for fragment program code generation.
+ * This function is used by both the GLSL and non-GLSL paths.
+ */
+void brw_wm_pass_fp( struct brw_wm_compile *c )
+{
+   struct brw_fragment_program *fp = c->fp;
+   GLuint insn;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("pre-fp:\n");
+      _mesa_print_program(&fp->program.Base); 
+      printf("\n");
+   }
+
+   c->pixel_xy = src_undef();
+   c->delta_xy = src_undef();
+   c->pixel_w = src_undef();
+   c->nr_fp_insns = 0;
+   c->fp->tex_units_used = 0x0;
+
+   /* Emit preamble instructions.  This is where special instructions such as
+    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+    * compute shader inputs from varying vars.
+    */
+   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+      validate_src_regs(c, inst);
+      validate_dst_regs(c, inst);
+   }
+
+   /* Loop over all instructions doing assorted simplifications and
+    * transformations.
+    */
+   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+      struct prog_instruction *out;
+
+      /* Check for INPUT values, emit INTERP instructions where
+       * necessary:
+       */
+
+      switch (inst->Opcode) {
+      case OPCODE_SWZ: 
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_MOV;
+	 break;
+	 
+      case OPCODE_ABS:
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_MOV;
+	 out->SrcReg[0].Negate = NEGATE_NONE;
+	 out->SrcReg[0].Abs = 1;
+	 break;
+
+      case OPCODE_SUB: 
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_ADD;
+	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
+	 break;
+
+      case OPCODE_SCS: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask &= WRITEMASK_XY;
+	 break;
+	 
+      case OPCODE_DST:
+	 precalc_dst(c, inst);
+	 break;
+
+      case OPCODE_LIT:
+	 precalc_lit(c, inst);
+	 break;
+
+      case OPCODE_TEX:
+	 precalc_tex(c, inst);
+	 break;
+
+      case OPCODE_TXP:
+	 precalc_txp(c, inst);
+	 break;
+
+      case OPCODE_TXB:
+	 out = emit_insn(c, inst);
+	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+	 break;
+
+      case OPCODE_XPD: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
+	 break;
+
+      case OPCODE_KIL: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask = 0;
+	 break;
+      case OPCODE_END:
+	 emit_render_target_writes(c);
+	 break;
+      case OPCODE_PRINT:
+	 break;
+      default:
+	 if (brw_wm_is_scalar_result(inst->Opcode))
+	    emit_scalar_insn(c, inst);
+	 else
+	    emit_insn(c, inst);
+	 break;
+      }
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("pass_fp:\n");
+      print_insns( c->prog_instructions, c->nr_fp_insns );
+      printf("\n");
+   }
+}
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
new file mode 100644
index 0000000000..fe3c89b721
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -0,0 +1,2141 @@
+#include "main/macros.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+enum _subroutine {
+    SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
+};
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+                                  const struct prog_instruction *inst,
+                                  GLuint component);
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
+{
+    int i;
+
+    if (INTEL_DEBUG & DEBUG_GLSL_FORCE)
+       return GL_TRUE;
+
+    for (i = 0; i < fp->Base.NumInstructions; i++) {
+	const struct prog_instruction *inst = &fp->Base.Instructions[i];
+	switch (inst->Opcode) {
+	    case OPCODE_ARL:
+	    case OPCODE_IF:
+	    case OPCODE_ENDIF:
+	    case OPCODE_CAL:
+	    case OPCODE_BRK:
+	    case OPCODE_RET:
+	    case OPCODE_NOISE1:
+	    case OPCODE_NOISE2:
+	    case OPCODE_NOISE3:
+	    case OPCODE_NOISE4:
+	    case OPCODE_BGNLOOP:
+		return GL_TRUE; 
+	    default:
+		break;
+	}
+    }
+    return GL_FALSE; 
+}
+
+
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+   c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+   /*assert(c->used_grf[r]);*/
+   c->used_grf[r] = GL_FALSE;
+   c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+   GLuint r;
+   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+      if (!c->used_grf[r]) {
+         c->used_grf[r] = GL_TRUE;
+         c->first_free_grf = r + 1;  /* a guess */
+         return r;
+      }
+   }
+
+   /* no free temps, try to reclaim some */
+   reclaim_temps(c);
+   c->first_free_grf = 0;
+
+   /* try alloc again */
+   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+      if (!c->used_grf[r]) {
+         c->used_grf[r] = GL_TRUE;
+         c->first_free_grf = r + 1;  /* a guess */
+         return r;
+      }
+   }
+
+   for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+      assert(c->used_grf[r]);
+   }
+
+   /* really, no free GRF regs found */
+   if (!c->out_of_regs) {
+      /* print warning once per compilation */
+      _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+      c->out_of_regs = GL_TRUE;
+   }
+
+   return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+   int r;
+   for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+      if (c->used_grf[r])
+         return r + 1;
+   return 0;
+}
+
+
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
+static void set_reg(struct brw_wm_compile *c, int file, int index, 
+	int component, struct brw_reg reg)
+{
+    c->wm_regs[file][index][component].reg = reg;
+    c->wm_regs[file][index][component].inited = GL_TRUE;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+    struct brw_reg reg;
+
+    /* if we need to allocate another temp, grow the tmp_regs[] array */
+    if (c->tmp_index == c->tmp_max) {
+       int r = alloc_grf(c);
+       if (r < 0) {
+          /*printf("Out of temps in %s\n", __FUNCTION__);*/
+          r = 50; /* XXX random register! */
+       }
+       c->tmp_regs[ c->tmp_max++ ] = r;
+    }
+
+    /* form the GRF register */
+    reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+    /*printf("alloc_temp %d\n", reg.nr);*/
+    assert(reg.nr < BRW_WM_MAX_GRF);
+    return reg;
+
+}
+
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
+static int mark_tmps(struct brw_wm_compile *c)
+{
+    return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
+{
+    return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
+{
+    c->tmp_index = mark;
+}
+
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers.  We allocate the hardware registers as needed here.
+ *
+ * \param file  register file, one of PROGRAM_x
+ * \param index  register number
+ * \param component  src component (X=0, Y=1, Z=2, W=3)
+ * \param nr  not used?!?
+ * \param neg  negate value?
+ * \param abs  take absolute value?
+ */
+static struct brw_reg 
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+        int nr, GLuint neg, GLuint abs)
+{
+    struct brw_reg reg;
+    switch (file) {
+	case PROGRAM_STATE_VAR:
+	case PROGRAM_CONSTANT:
+	case PROGRAM_UNIFORM:
+	    file = PROGRAM_STATE_VAR;
+	    break;
+	case PROGRAM_UNDEFINED:
+	    return brw_null_reg();	
+	case PROGRAM_TEMPORARY:
+	case PROGRAM_INPUT:
+	case PROGRAM_OUTPUT:
+	case PROGRAM_PAYLOAD:
+	    break;
+	default:
+	    _mesa_problem(NULL, "Unexpected file in get_reg()");
+	    return brw_null_reg();
+    }
+
+    assert(index < 256);
+    assert(component < 4);
+
+    /* see if we've already allocated a HW register for this Mesa register */
+    if (c->wm_regs[file][index][component].inited) {
+       /* yes, re-use */
+       reg = c->wm_regs[file][index][component].reg;
+    }
+    else {
+	/* no, allocate new register */
+       int grf = alloc_grf(c);
+       /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+       if (grf < 0) {
+          /* totally out of temps */
+          grf = 51; /* XXX random register! */
+       }
+
+       reg = brw_vec8_grf(grf, 0);
+       /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
+
+       set_reg(c, file, index, component, reg);
+    }
+
+    if (neg & (1 << component)) {
+	reg = negate(reg);
+    }
+    if (abs)
+	reg = brw_abs(reg);
+    return reg;
+}
+
+
+
+/**
+ * This is called if we run out of GRF registers.  Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+   GLint intBegin[MAX_PROGRAM_TEMPS];
+   GLint intEnd[MAX_PROGRAM_TEMPS];
+   int index;
+
+   /*printf("Reclaim temps:\n");*/
+
+   _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+                             intBegin, intEnd);
+
+   for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+      if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+         /* program temp[i] can be freed */
+         int component;
+         /*printf("  temp[%d] is dead\n", index);*/
+         for (component = 0; component < 4; component++) {
+            if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+               int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+               release_grf(c, r);
+               /*
+               printf("  Reclaim temp %d, reg %d at inst %d\n",
+                      index, r, c->cur_inst);
+               */
+               c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+            }
+         }
+      }
+   }
+}
+
+
+
+
+/**
+ * Preallocate registers.  This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+    struct intel_context *intel = &c->func.brw->intel;
+    int i, j;
+    struct brw_reg reg;
+    int urb_read_length = 0;
+    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+    GLuint reg_index = 0;
+
+    memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+    c->first_free_grf = 0;
+
+    for (i = 0; i < 4; i++) {
+        if (i < c->key.nr_depth_regs) 
+            reg = brw_vec8_grf(i * 2, 0);
+        else
+            reg = brw_vec8_grf(0, 0);
+	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+    }
+    reg_index += 2 * c->key.nr_depth_regs;
+
+    /* constants */
+    {
+        const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+        const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
+
+        /* use a real constant buffer, or just use a section of the GRF? */
+        /* XXX this heuristic may need adjustment... */
+        if ((nr_params + nr_temps) * 4 + reg_index > 80)
+           c->fp->use_const_buffer = GL_TRUE;
+        else
+           c->fp->use_const_buffer = GL_FALSE;
+        /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+
+        if (c->fp->use_const_buffer) {
+           /* We'll use a real constant buffer and fetch constants from
+            * it with a dataport read message.
+            */
+
+           /* number of float constants in CURBE */
+           c->prog_data.nr_params = 0;
+        }
+        else {
+           const struct gl_program_parameter_list *plist = 
+              c->fp->program.Base.Parameters;
+           int index = 0;
+
+           /* number of float constants in CURBE */
+           c->prog_data.nr_params = 4 * nr_params;
+
+           /* loop over program constants (float[4]) */
+           for (i = 0; i < nr_params; i++) {
+              /* loop over XYZW channels */
+              for (j = 0; j < 4; j++, index++) {
+                 reg = brw_vec1_grf(reg_index + index / 8, index % 8);
+                 /* Save pointer to parameter/constant value.
+                  * Constants will be copied in prepare_constant_buffer()
+                  */
+                 c->prog_data.param[index] = &plist->ParameterValues[i][j];
+                 set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+              }
+           }
+           /* number of constant regs used (each reg is float[8]) */
+           c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+           reg_index += c->nr_creg;
+        }
+    }
+
+    /* fragment shader inputs */
+    for (i = 0; i < VERT_RESULT_MAX; i++) {
+       int fp_input;
+
+       if (i >= VERT_RESULT_VAR0)
+	  fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+       else if (i <= VERT_RESULT_TEX7)
+	  fp_input = i;
+       else
+	  fp_input = -1;
+
+       if (fp_input >= 0 && inputs & (1 << fp_input)) {
+	  urb_read_length = reg_index;
+	  reg = brw_vec8_grf(reg_index, 0);
+	  for (j = 0; j < 4; j++)
+	     set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+       }
+       if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
+	  reg_index += 2;
+       }
+    }
+
+    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+    c->prog_data.urb_read_length = urb_read_length;
+    c->prog_data.curb_read_length = c->nr_creg;
+    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+    reg_index++;
+    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+    reg_index += 2;
+
+    /* mark GRF regs [0..reg_index-1] as in-use */
+    for (i = 0; i < reg_index; i++)
+       prealloc_grf(c, i);
+
+    /* Don't use GRF 126, 127.  Using them seems to lead to GPU lock-ups */
+    prealloc_grf(c, 126);
+    prealloc_grf(c, 127);
+
+    for (i = 0; i < c->nr_fp_insns; i++) {
+	const struct prog_instruction *inst = &c->prog_instructions[i];
+	struct brw_reg dst[4];
+
+	switch (inst->Opcode) {
+	case OPCODE_TEX:
+	case OPCODE_TXB:
+	    /* Allocate the channels of texture results contiguously,
+	     * since they are written out that way by the sampler unit.
+	     */
+	    for (j = 0; j < 4; j++) {
+		dst[j] = get_dst_reg(c, inst, j);
+		if (j != 0)
+		    assert(dst[j].nr == dst[j - 1].nr + 1);
+	    }
+	    break;
+	default:
+	    break;
+	}
+    }
+
+    for (i = 0; i < c->nr_fp_insns; i++) {
+	const struct prog_instruction *inst = &c->prog_instructions[i];
+
+	switch (inst->Opcode) {
+	case WM_DELTAXY:
+	    /* Allocate WM_DELTAXY destination on G45/GM45 to an
+	     * even-numbered GRF if possible so that we can use the PLN
+	     * instruction.
+	     */
+	    if (inst->DstReg.WriteMask == WRITEMASK_XY &&
+		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited &&
+		!c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited &&
+		(IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) {
+		int grf;
+
+		for (grf = c->first_free_grf & ~1;
+		     grf < BRW_WM_MAX_GRF;
+		     grf += 2)
+		{
+		    if (!c->used_grf[grf] && !c->used_grf[grf + 1]) {
+			c->used_grf[grf] = GL_TRUE;
+			c->used_grf[grf + 1] = GL_TRUE;
+			c->first_free_grf = grf + 2;  /* a guess */
+
+			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0,
+				brw_vec8_grf(grf, 0));
+			set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1,
+				brw_vec8_grf(grf + 1, 0));
+			break;
+		    }
+		}
+	    }
+	default:
+	    break;
+	}
+    }
+
+    /* An instruction may reference up to three constants.
+     * They'll be found in these registers.
+     * XXX alloc these on demand!
+     */
+    if (c->fp->use_const_buffer) {
+       for (i = 0; i < 3; i++) {
+          c->current_const[i].index = -1;
+          c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
+       }
+    }
+#if 0
+    printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+    printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
+#endif
+}
+
+
+/**
+ * Check if any of the instruction's src registers are constants, uniforms,
+ * or statevars.  If so, fetch any constants that we don't already have in
+ * the three GRF slots.
+ */
+static void fetch_constants(struct brw_wm_compile *c,
+                            const struct prog_instruction *inst)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   /* loop over instruction src regs */
+   for (i = 0; i < 3; i++) {
+      const struct prog_src_register *src = &inst->SrcReg[i];
+      if (src->File == PROGRAM_STATE_VAR ||
+          src->File == PROGRAM_CONSTANT ||
+          src->File == PROGRAM_UNIFORM) {
+	 c->current_const[i].index = src->Index;
+
+#if 0
+	 printf("  fetch const[%d] for arg %d into reg %d\n",
+		src->Index, i, c->current_const[i].reg.nr);
+#endif
+
+	 /* need to fetch the constant now */
+	 brw_dp_READ_4(p,
+		       c->current_const[i].reg,  /* writeback dest */
+		       src->RelAddr,             /* relative indexing? */
+		       16 * src->Index,          /* byte offset */
+		       SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+		       );
+      }
+   }
+}
+
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c, 
+                                  const struct prog_instruction *inst,
+                                  GLuint component)
+{
+    const int nr = 1;
+    return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+	    0, 0);
+}
+
+
+static struct brw_reg
+get_src_reg_const(struct brw_wm_compile *c,
+                  const struct prog_instruction *inst,
+                  GLuint srcRegIndex, GLuint component)
+{
+   /* We should have already fetched the constant from the constant
+    * buffer in fetch_constants().  Now we just have to return a
+    * register description that extracts the needed component and
+    * smears it across all eight vector components.
+    */
+   const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+   struct brw_reg const_reg;
+
+   assert(component < 4);
+   assert(srcRegIndex < 3);
+   assert(c->current_const[srcRegIndex].index != -1);
+   const_reg = c->current_const[srcRegIndex].reg;
+
+   /* extract desired float from the const_reg, and smear */
+   const_reg = stride(const_reg, 0, 1, 0);
+   const_reg.subnr = component * 4;
+
+   if (src->Negate & (1 << component))
+      const_reg = negate(const_reg);
+   if (src->Abs)
+      const_reg = brw_abs(const_reg);
+
+#if 0
+   printf("  form const[%d].%d for arg %d, reg %d\n",
+          c->current_const[srcRegIndex].index,
+          component,
+          srcRegIndex,
+          const_reg.nr);
+#endif
+
+   return const_reg;
+}
+
+
+/**
+ * Convert Mesa src register to brw register.
+ */
+static struct brw_reg get_src_reg(struct brw_wm_compile *c, 
+                                  const struct prog_instruction *inst,
+                                  GLuint srcRegIndex, GLuint channel)
+{
+    const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+    const GLuint nr = 1;
+    const GLuint component = GET_SWZ(src->Swizzle, channel);
+
+    /* Only one immediate value can be used per native opcode, and it
+     * has be in the src1 slot, so not all Mesa instructions will get
+     * to take advantage of immediate constants.
+     */
+    if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) {
+       const struct gl_program_parameter_list *params;
+
+       params = c->fp->program.Base.Parameters;
+
+       /* Extended swizzle terms */
+       if (component == SWIZZLE_ZERO) {
+	  return brw_imm_f(0.0F);
+       } else if (component == SWIZZLE_ONE) {
+	  if (src->Negate)
+	     return brw_imm_f(-1.0F);
+	  else
+	     return brw_imm_f(1.0F);
+       }
+
+       if (src->File == PROGRAM_CONSTANT) {
+	  float f = params->ParameterValues[src->Index][component];
+
+	  if (src->Abs)
+	     f = fabs(f);
+	  if (src->Negate)
+	     f = -f;
+
+	  return brw_imm_f(f);
+       }
+    }
+
+    if (c->fp->use_const_buffer &&
+        (src->File == PROGRAM_STATE_VAR ||
+         src->File == PROGRAM_CONSTANT ||
+         src->File == PROGRAM_UNIFORM)) {
+       return get_src_reg_const(c, inst, srcRegIndex, component);
+    }
+    else {
+       /* other type of source register */
+       return get_reg(c, src->File, src->Index, component, nr, 
+                      src->Negate, src->Abs);
+    }
+}
+
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage.  This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
+static void invoke_subroutine( struct brw_wm_compile *c,
+			       enum _subroutine subroutine,
+			       void (*emit)( struct brw_wm_compile * ) )
+{
+    struct brw_compile *p = &c->func;
+
+    assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+    
+    if( c->subroutines[ subroutine ] ) {
+	/* subroutine previously emitted: reuse existing instructions */
+
+	int mark = mark_tmps( c );
+	struct brw_reg return_address = retype( alloc_tmp( c ),
+						BRW_REGISTER_TYPE_UD );
+	int here = p->nr_insn;
+	
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE);
+	brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+	brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+		 brw_imm_d( ( c->subroutines[ subroutine ] -
+			      here - 1 ) << 4 ) );
+	brw_pop_insn_state(p);
+
+	release_tmps( c, mark );
+    } else {
+	/* previously unused subroutine: emit, and mark for later reuse */
+	
+	int mark = mark_tmps( c );
+	struct brw_reg return_address = retype( alloc_tmp( c ),
+						BRW_REGISTER_TYPE_UD );
+	struct brw_instruction *calc;
+	int base = p->nr_insn;
+	
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE);
+	calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+	brw_pop_insn_state(p);
+	
+	c->subroutines[ subroutine ] = p->nr_insn;
+
+	emit( c );
+	
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE);
+	brw_MOV( p, brw_ip_reg(), return_address );
+	brw_pop_insn_state(p);
+
+	brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+	
+	release_tmps( c, mark );
+    }
+}
+
+static void emit_arl(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, addr_reg;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+                           BRW_ARF_ADDRESS, 0);
+    src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
+    brw_MOV(p, addr_reg, src0);
+    brw_set_saturate(p, 0);
+}
+
+/**
+ * For GLSL shaders, this KIL will be unconditional.
+ * It may be contained inside an IF/ENDIF structure of course.
+ */
+static void emit_kil(struct brw_wm_compile *c)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+    brw_push_insn_state(p);
+    brw_set_mask_control(p, BRW_MASK_DISABLE);
+    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
+    brw_AND(p, depth, c->emit_mask_reg, depth);
+    brw_pop_insn_state(p);
+}
+
+static INLINE struct brw_reg high_words( struct brw_reg reg )
+{
+    return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+		   0, 8, 2 );
+}
+
+static INLINE struct brw_reg low_words( struct brw_reg reg )
+{
+    return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
+{
+    return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
+}
+
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
+{
+    return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
+		   0, 16, 2 );
+}
+
+/* One-, two- and three-dimensional Perlin noise, similar to the description
+   in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
+static void noise1_sub( struct brw_wm_compile *c ) {
+
+    struct brw_compile *p = &c->func;
+    struct brw_reg param,
+	x0, x1, /* gradients at each end */       
+	t, tmp[ 2 ], /* float temporaries */
+	itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
+    int i;
+    int mark = mark_tmps( c );
+
+    x0 = alloc_tmp( c );
+    x1 = alloc_tmp( c );
+    t = alloc_tmp( c );
+    tmp[ 0 ] = alloc_tmp( c );
+    tmp[ 1 ] = alloc_tmp( c );
+    itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
+    itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
+    itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
+    itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
+    itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
+    
+    param = lookup_tmp( c, mark - 2 );
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+
+    brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+
+    /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
+       be hashed.  Also compute the remainder (offset within the unit
+       length), interleaved to reduce register dependency penalties. */
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param );
+    brw_FRC( p, param, param );
+    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
+    brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+
+    /* We're now ready to perform the hashing.  The two hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 32x16
+       bit multiplication, and 16-bit swizzles (which we get for
+       free).  We can't use immediate operands in the multiplies,
+       because immediates are permitted only in src1 and the 16-bit
+       factor is permitted only in src0. */
+    for( i = 0; i < 2; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
+    for( i = 0; i < 2; i++ )
+       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		high_words( itmp[ i ] ) );
+    for( i = 0; i < 2; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
+    for( i = 0; i < 2; i++ )
+       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		high_words( itmp[ i ] ) );
+    for( i = 0; i < 2; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+    for( i = 0; i < 2; i++ )
+       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		high_words( itmp[ i ] ) );
+
+    /* Now we want to initialise the two gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 31 ), but
+       we correct for that right at the end. */
+    brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
+    brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
+
+    brw_MUL( p, x0, x0, param );
+    brw_MUL( p, x1, x1, t );
+    
+    /* We interpolate between the gradients using the polynomial
+       6t^5 - 15t^4 + 10t^3 (Perlin). */
+    brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+    brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
+					   pipeline */
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+    brw_MUL( p, param, tmp[ 0 ], param );
+    brw_MUL( p, x1, x1, param );
+    brw_ADD( p, x0, x0, x1 );    
+    /* scale by pow( 2, -30 ), to compensate for the format conversion
+       above and an extra factor of 2 so that a single gradient covers
+       the [-1,1] range */
+    brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise1( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src, param, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src = get_src_reg( c, inst, 0, 0 );
+
+    param = alloc_tmp( c );
+
+    brw_MOV( p, param, src );
+
+    invoke_subroutine( c, SUB_NOISE1, noise1_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+    
+static void noise2_sub( struct brw_wm_compile *c ) {
+
+    struct brw_compile *p = &c->func;
+    struct brw_reg param0, param1,
+	x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */       
+	t, tmp[ 4 ], /* float temporaries */
+	itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
+    int i;
+    int mark = mark_tmps( c );
+
+    x0y0 = alloc_tmp( c );
+    x0y1 = alloc_tmp( c );
+    x1y0 = alloc_tmp( c );
+    x1y1 = alloc_tmp( c );
+    t = alloc_tmp( c );
+    for( i = 0; i < 4; i++ ) {
+	tmp[ i ] = alloc_tmp( c );
+	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+    }
+    itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
+    itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
+    itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
+    
+    param0 = lookup_tmp( c, mark - 3 );
+    param1 = lookup_tmp( c, mark - 2 );
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+    
+    /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
+       be hashed.  Also compute the remainders (offsets within the unit
+       square), interleaved to reduce register dependency penalties. */
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+    brw_FRC( p, param0, param0 );
+    brw_FRC( p, param1, param1 );
+    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+    brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
+	     low_words( itmp[ 1 ] ) );
+    brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+    brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
+    brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
+    brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
+
+    /* We're now ready to perform the hashing.  The four hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 32x16
+       bit multiplication, and 16-bit swizzles (which we get for
+       free).  We can't use immediate operands in the multiplies,
+       because immediates are permitted only in src1 and the 16-bit
+       factor is permitted only in src0. */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		 high_words( itmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		 high_words( itmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		 high_words( itmp[ i ] ) );
+
+    /* Now we want to initialise the four gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 15 ), but
+       we correct for that right at the end. */
+    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+    brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
+    brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
+    
+    brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
+    
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param0 );
+    brw_MUL( p, x0y1, x0y1, param0 );
+
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
+    brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
+    brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
+
+    brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
+    
+    /* We interpolate between the gradients using the polynomial
+       6t^5 - 15t^4 + 10t^3 (Perlin). */
+    brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
+    brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
+						 pipeline */
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
+						 pipeline */
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+    brw_MUL( p, param0, tmp[ 0 ], param0 );
+    brw_MUL( p, param1, tmp[ 1 ], param1 );
+    
+    /* Here we interpolate in the y dimension... */
+    brw_MUL( p, x0y1, x0y1, param1 );
+    brw_MUL( p, x1y1, x1y1, param1 );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  There are horrible register dependencies here,
+       but we have nothing else to do. */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, param0 );
+    brw_ADD( p, x0y0, x0y0, x1y0 );
+    
+    /* scale by pow( 2, -15 ), as described above */
+    brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise2( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, param0, param1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src0 = get_src_reg( c, inst, 0, 0 );
+    src1 = get_src_reg( c, inst, 0, 1 );
+
+    param0 = alloc_tmp( c );
+    param1 = alloc_tmp( c );
+
+    brw_MOV( p, param0, src0 );
+    brw_MOV( p, param1, src1 );
+
+    invoke_subroutine( c, SUB_NOISE2, noise2_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param0 );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+
+/**
+ * The three-dimensional case is much like the one- and two- versions above,
+ * but since the number of corners is rapidly growing we now pack 16 16-bit
+ * hashes into each register to extract more parallelism from the EUs.
+ */
+static void noise3_sub( struct brw_wm_compile *c ) {
+
+    struct brw_compile *p = &c->func;
+    struct brw_reg param0, param1, param2,
+	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+	xi, yi, zi, /* interpolation coefficients */
+	t, tmp[ 8 ], /* float temporaries */
+	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+    int i;
+    int mark = mark_tmps( c );
+
+    x0y0 = alloc_tmp( c );
+    x0y1 = alloc_tmp( c );
+    x1y0 = alloc_tmp( c );
+    x1y1 = alloc_tmp( c );
+    xi = alloc_tmp( c );
+    yi = alloc_tmp( c );
+    zi = alloc_tmp( c );
+    t = alloc_tmp( c );
+    for( i = 0; i < 8; i++ ) {
+	tmp[ i ] = alloc_tmp( c );
+	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+    }
+    
+    param0 = lookup_tmp( c, mark - 4 );
+    param1 = lookup_tmp( c, mark - 3 );
+    param2 = lookup_tmp( c, mark - 2 );
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+    
+    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+       be hashed.  Also compute the remainders (offsets within the unit
+       cube), interleaved to reduce register dependency penalties. */
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+    brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 );
+    brw_FRC( p, param0, param0 );
+    brw_FRC( p, param1, param1 );
+    brw_FRC( p, param2, param2 );
+    /* Since we now have only 16 bits of precision in the hash, we must
+       be more careful about thorough mixing to maintain entropy as we
+       squash the input vector into a small scalar. */
+    brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) );
+    brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) );
+    brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ),
+	     brw_imm_uw( 0x9B93 ) );
+    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+	     brw_imm_uw( 0xBC8F ) );
+
+    /* Temporarily disable the execution mask while we work with ExecSize=16
+       channels (the mask is set for ExecSize=8 and is probably incorrect).
+       Although this might cause execution of unwanted channels, the code
+       writes only to temporary registers and has no side effects, so
+       disabling the mask is harmless. */
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+    /* We're now ready to perform the hashing.  The eight hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 16x16
+       bit multiplication, and 8-bit swizzles (which we get for
+       free). */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    brw_pop_insn_state( p );
+
+    /* Now we want to initialise the four rear gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 15 ), but
+       we correct for that right at the end. */
+    /* x component */
+    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+    
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param0 );
+    brw_MUL( p, x0y1, x0y1, param0 );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    
+    /* We interpolate between the gradients using the polynomial
+       6t^5 - 15t^4 + 10t^3 (Perlin). */
+    brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) );
+    brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) );
+    brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) );
+    brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) );
+    brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) );
+    brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) );
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) );
+    brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) );
+    brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) );
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    
+    /* Here we interpolate in the y dimension... */
+    brw_MUL( p, x0y1, x0y1, yi );
+    brw_MUL( p, x1y1, x1y1, yi );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, xi );
+    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+    /* Now do the same thing for the front four gradients... */
+    /* x component */
+    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param0 );
+    brw_MUL( p, x0y1, x0y1, param0 );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param2, brw_imm_f( -1.0 ) );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    
+    /* The interpolation coefficients are still around from last time, so
+       again interpolate in the y dimension... */
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+    brw_MUL( p, x0y1, x0y1, yi );
+    brw_MUL( p, x1y1, x1y1, yi );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this
+       time put the front face in tmp[ 1 ] and we're nearly there... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, xi );
+    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+    /* The final interpolation, in the z dimension: */
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );    
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+    
+    /* scale by pow( 2, -15 ), as described above */
+    brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise3( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, src2, param0, param1, param2, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src0 = get_src_reg( c, inst, 0, 0 );
+    src1 = get_src_reg( c, inst, 0, 1 );
+    src2 = get_src_reg( c, inst, 0, 2 );
+
+    param0 = alloc_tmp( c );
+    param1 = alloc_tmp( c );
+    param2 = alloc_tmp( c );
+
+    brw_MOV( p, param0, src0 );
+    brw_MOV( p, param1, src1 );
+    brw_MOV( p, param2, src2 );
+
+    invoke_subroutine( c, SUB_NOISE3, noise3_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param0 );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+    
+/**
+ * For the four-dimensional case, the little micro-optimisation benefits
+ * we obtain by unrolling all the loops aren't worth the massive bloat it
+ * now causes.  Instead, we loop twice around performing a similar operation
+ * to noise3, once for the w=0 cube and once for the w=1, with a bit more
+ * code to glue it all together.
+ */
+static void noise4_sub( struct brw_wm_compile *c )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg param[ 4 ],
+	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+	w0, /* noise for the w=0 cube */
+	floors[ 2 ], /* integer coordinates of base corner of hypercube */
+	interp[ 4 ], /* interpolation coefficients */
+	t, tmp[ 8 ], /* float temporaries */
+	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+    int i, j;
+    int mark = mark_tmps( c );
+    GLuint loop, origin;
+    
+    x0y0 = alloc_tmp( c );
+    x0y1 = alloc_tmp( c );
+    x1y0 = alloc_tmp( c );
+    x1y1 = alloc_tmp( c );
+    t = alloc_tmp( c );
+    w0 = alloc_tmp( c );    
+    floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+    floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+
+    for( i = 0; i < 4; i++ ) {
+	param[ i ] = lookup_tmp( c, mark - 5 + i );
+	interp[ i ] = alloc_tmp( c );
+    }
+    
+    for( i = 0; i < 8; i++ ) {
+	tmp[ i ] = alloc_tmp( c );
+	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+    }
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+
+    /* We only want 16 bits of precision from the integral part of each
+       co-ordinate, but unfortunately the RNDD semantics would saturate
+       at 16 bits if we performed the operation directly to a 16-bit
+       destination.  Therefore, we round to 32-bit temporaries where
+       appropriate, and then store only the lower 16 bits. */
+    brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] );
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] );
+    brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] );
+    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] );
+    brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) );
+    brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) );
+
+    /* Modify the flag register here, because the side effect is useful
+       later (see below).  We know for certain that all flags will be
+       cleared, since the FRC instruction cannot possibly generate
+       negative results.  Even for exceptional inputs (infinities, denormals,
+       NaNs), the architecture guarantees that the L conditional is false. */
+    brw_set_conditionalmod( p, BRW_CONDITIONAL_L );
+    brw_FRC( p, param[ 0 ], param[ 0 ] );
+    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+    for( i = 1; i < 4; i++ )	
+	brw_FRC( p, param[ i ], param[ i ] );
+    
+    /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
+       of all. */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) );
+    for( i = 0; i < 4; i++ )
+	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) );
+    for( j = 0; j < 3; j++ )
+	for( i = 0; i < 4; i++ )
+	    brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+
+    /* Mark the current address, as it will be a jump destination.  The
+       following code will be executed twice: first, with the flag
+       register clear indicating the w=0 case, and second with flags
+       set for w=1. */
+    loop = p->nr_insn;
+    
+    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+       be hashed.  Since we have only 16 bits of precision in the hash, we
+       must be careful about thorough mixing to maintain entropy as we
+       squash the input vector into a small scalar. */
+    brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ),
+	     brw_imm_uw( 0xBC8F ) );
+    brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ),
+	     brw_imm_uw( 0xD0BD ) );
+    brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ),
+	     brw_imm_uw( 0x9B93 ) );
+    brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ),
+	     brw_imm_uw( 0xA359 ) );
+    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+	     brw_imm_uw( 0xBC8F ) );
+
+    /* Temporarily disable the execution mask while we work with ExecSize=16
+       channels (the mask is set for ExecSize=8 and is probably incorrect).
+       Although this might cause execution of unwanted channels, the code
+       writes only to temporary registers and has no side effects, so
+       disabling the mask is harmless. */
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+    /* We're now ready to perform the hashing.  The eight hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 16x16
+       bit multiplication, and 8-bit swizzles (which we get for
+       free). */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    brw_pop_insn_state( p );
+
+    /* Now we want to initialise the four rear gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 15 ), but
+       we correct for that right at the end. */
+    /* x component */
+    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+    
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+    brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );    
+    /* prepare t for the w component (used below): w the first time through
+       the loop; w - 1 the second time) */
+    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+    p->current->header.predicate_inverse = 1;
+    brw_MOV( p, t, param[ 3 ] );
+    p->current->header.predicate_inverse = 0;
+    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* w component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* Here we interpolate in the y dimension... */
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+    brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+    brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+    /* Now do the same thing for the front four gradients... */
+    /* x component */
+    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+    brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    /* prepare t for the w component (used below): w the first time through
+       the loop; w - 1 the second time) */
+    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+    p->current->header.predicate_inverse = 1;
+    brw_MOV( p, t, param[ 3 ] );
+    p->current->header.predicate_inverse = 0;
+    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* w component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* Interpolate in the y dimension: */
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+    brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+    brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this
+       time put the front face in tmp[ 1 ] and we're nearly there... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+    /* Another interpolation, in the z dimension: */
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );    
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+
+    /* Exit the loop if we've computed both cubes... */
+    origin = p->nr_insn;
+    brw_push_insn_state( p );
+    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
+    brw_pop_insn_state( p );
+
+    /* Save the result for the w=0 case, and increment the w coordinate: */
+    brw_MOV( p, w0, tmp[ 0 ] );
+    brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ),
+	     brw_imm_uw( 1 ) );
+
+    /* Loop around for the other cube.  Explicitly set the flag register
+       (unfortunately we must spend an extra instruction to do this: we
+       can't rely on a side effect of the previous MOV or ADD because
+       conditional modifiers which are normally true might be false in
+       exceptional circumstances, e.g. given a NaN input; the add to
+       brw_ip_reg() is not suitable because the IP is not an 8-vector). */
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) );
+    brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+	     brw_imm_d( ( loop - p->nr_insn ) << 4 ) );
+    brw_pop_insn_state( p );
+
+    /* Patch the previous conditional branch now that we know the
+       destination address. */
+    brw_set_src1( p->store + origin,
+		  brw_imm_d( ( p->nr_insn - origin ) << 4 ) );
+
+    /* The very last interpolation. */
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );    
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 );
+
+    /* scale by pow( 2, -15 ), as described above */
+    brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise4( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src0 = get_src_reg( c, inst, 0, 0 );
+    src1 = get_src_reg( c, inst, 0, 1 );
+    src2 = get_src_reg( c, inst, 0, 2 );
+    src3 = get_src_reg( c, inst, 0, 3 );
+
+    param0 = alloc_tmp( c );
+    param1 = alloc_tmp( c );
+    param2 = alloc_tmp( c );
+    param3 = alloc_tmp( c );
+
+    brw_MOV( p, param0, src0 );
+    brw_MOV( p, param1, src1 );
+    brw_MOV( p, param2, src2 );
+    brw_MOV( p, param3, src3 );
+
+    invoke_subroutine( c, SUB_NOISE4, noise4_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param0 );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+
+/**
+ * Resolve subroutine calls after code emit is done.
+ */
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+    brw_resolve_cals(&c->func);
+}
+
+static void
+get_argument_regs(struct brw_wm_compile *c,
+		  const struct prog_instruction *inst,
+		  int index,
+		  struct brw_reg *dst,
+		  struct brw_reg *regs,
+		  int mask)
+{
+    struct brw_compile *p = &c->func;
+    int i, j;
+
+    for (i = 0; i < 4; i++) {
+	if (mask & (1 << i)) {
+	    regs[i] = get_src_reg(c, inst, index, i);
+
+	    /* Unalias destination registers from our sources. */
+	    if (regs[i].file == BRW_GENERAL_REGISTER_FILE) {
+	       for (j = 0; j < 4; j++) {
+		   if (memcmp(&regs[i], &dst[j], sizeof(regs[0])) == 0) {
+		       struct brw_reg tmp = alloc_tmp(c);
+		       brw_MOV(p, tmp, regs[i]);
+		       regs[i] = tmp;
+		       break;
+		   }
+	       }
+	    }
+	}
+    }
+}
+
+static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
+{
+   struct intel_context *intel = &brw->intel;
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+    struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+    GLuint i, if_depth = 0, loop_depth = 0;
+    struct brw_compile *p = &c->func;
+    struct brw_indirect stack_index = brw_indirect(0, 0);
+
+    c->out_of_regs = GL_FALSE;
+
+    prealloc_reg(c);
+    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+    brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+    for (i = 0; i < c->nr_fp_insns; i++) {
+        const struct prog_instruction *inst = &c->prog_instructions[i];
+	int dst_flags;
+	struct brw_reg args[3][4], dst[4];
+	int j;
+	int mark = mark_tmps( c );
+
+        c->cur_inst = i;
+
+#if 0
+        printf("Inst %d: ", i);
+        _mesa_print_instruction(inst);
+#endif
+
+        /* fetch any constants that this instruction needs */
+        if (c->fp->use_const_buffer)
+           fetch_constants(c, inst);
+
+	if (inst->Opcode != OPCODE_ARL) {
+	   for (j = 0; j < 4; j++) {
+	      if (inst->DstReg.WriteMask & (1 << j))
+		 dst[j] = get_dst_reg(c, inst, j);
+	      else
+		 dst[j] = brw_null_reg();
+	   }
+	}
+	for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++)
+	    get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW);
+
+	dst_flags = inst->DstReg.WriteMask;
+	if (inst->SaturateMode == SATURATE_ZERO_ONE)
+	    dst_flags |= SATURATE;
+
+	if (inst->CondUpdate)
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	else
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
+	switch (inst->Opcode) {
+	    case WM_PIXELXY:
+		emit_pixel_xy(c, dst, dst_flags);
+		break;
+	    case WM_DELTAXY: 
+		emit_delta_xy(p, dst, dst_flags, args[0]);
+		break;
+	    case WM_PIXELW:
+		emit_pixel_w(c, dst, dst_flags, args[0], args[1]);
+		break;	
+	    case WM_LINTERP:
+		emit_linterp(p, dst, dst_flags, args[0], args[1]);
+		break;
+	    case WM_PINTERP:
+		emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+		break;
+	    case WM_CINTERP:
+		emit_cinterp(p, dst, dst_flags, args[0]);
+		break;
+	    case WM_WPOSXY:
+		emit_wpos_xy(c, dst, dst_flags, args[0]);
+		break;
+	    case WM_FB_WRITE:
+		emit_fb_write(c, args[0], args[1], args[2],
+			      INST_AUX_GET_TARGET(inst->Aux),
+			      inst->Aux & INST_AUX_EOT);
+		break;
+	    case WM_FRONTFACING:
+		emit_frontfacing(p, dst, dst_flags);
+		break;
+	    case OPCODE_ADD:
+		emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_ARL:
+		emit_arl(c, inst);
+		break;
+	    case OPCODE_FRC:
+		emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_FLR:
+		emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_LRP:
+		emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+		break;
+	    case OPCODE_TRUNC:
+		emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_MOV:
+	    case OPCODE_SWZ:
+		emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_DP3:
+		emit_dp3(p, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_DP4:
+		emit_dp4(p, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_XPD:
+		emit_xpd(p, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_DPH:
+		emit_dph(p, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_RCP:
+		emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_RSQ:
+		emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_SIN:
+		emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_COS:
+		emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_EX2:
+		emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_LG2:
+		emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+		break;
+	    case OPCODE_CMP:
+		emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+		break;
+	    case OPCODE_MIN:	
+		emit_min(p, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_MAX:	
+		emit_max(p, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_DDX:
+	    case OPCODE_DDY:
+		emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+			  args[0]);
+                break;
+	    case OPCODE_SLT:
+		emit_sop(p, dst, dst_flags,
+			 BRW_CONDITIONAL_L, args[0], args[1]);
+		break;
+	    case OPCODE_SLE:
+		emit_sop(p, dst, dst_flags,
+			 BRW_CONDITIONAL_LE, args[0], args[1]);
+		break;
+	    case OPCODE_SGT:
+		emit_sop(p, dst, dst_flags,
+			 BRW_CONDITIONAL_G, args[0], args[1]);
+		break;
+	    case OPCODE_SGE:
+		emit_sop(p, dst, dst_flags,
+			 BRW_CONDITIONAL_GE, args[0], args[1]);
+		break;
+	    case OPCODE_SEQ:
+		emit_sop(p, dst, dst_flags,
+			 BRW_CONDITIONAL_EQ, args[0], args[1]);
+		break;
+	    case OPCODE_SNE:
+		emit_sop(p, dst, dst_flags,
+			 BRW_CONDITIONAL_NEQ, args[0], args[1]);
+		break;
+	    case OPCODE_MUL:
+		emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_POW:
+		emit_math2(c, BRW_MATH_FUNCTION_POW,
+			   dst, dst_flags, args[0], args[1]);
+		break;
+	    case OPCODE_MAD:
+		emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+		break;
+	    case OPCODE_NOISE1:
+		emit_noise1(c, inst);
+		break;
+	    case OPCODE_NOISE2:
+		emit_noise2(c, inst);
+		break;
+	    case OPCODE_NOISE3:
+		emit_noise3(c, inst);
+		break;
+	    case OPCODE_NOISE4:
+		emit_noise4(c, inst);
+		break;
+	    case OPCODE_TEX:
+		emit_tex(c, dst, dst_flags, args[0],
+			 get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+				 0, 1, 0, 0),
+			 inst->TexSrcTarget,
+			 inst->TexSrcUnit,
+			 (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0);
+		break;
+	    case OPCODE_TXB:
+		emit_txb(c, dst, dst_flags, args[0],
+			 get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH,
+				 0, 1, 0, 0),
+			 inst->TexSrcTarget,
+			 c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]);
+		break;
+	    case OPCODE_KIL_NV:
+		emit_kil(c);
+		break;
+	    case OPCODE_IF:
+		assert(if_depth < MAX_IF_DEPTH);
+		if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
+		break;
+	    case OPCODE_ELSE:
+		assert(if_depth > 0);
+		if_inst[if_depth-1]  = brw_ELSE(p, if_inst[if_depth-1]);
+		break;
+	    case OPCODE_ENDIF:
+		assert(if_depth > 0);
+		brw_ENDIF(p, if_inst[--if_depth]);
+		break;
+	    case OPCODE_BGNSUB:
+		brw_save_label(p, inst->Comment, p->nr_insn);
+		break;
+	    case OPCODE_ENDSUB:
+		/* no-op */
+		break;
+	    case OPCODE_CAL: 
+		brw_push_insn_state(p);
+		brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_set_access_mode(p, BRW_ALIGN_1);
+                brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+                brw_set_access_mode(p, BRW_ALIGN_16);
+                brw_ADD(p, get_addr_reg(stack_index),
+                         get_addr_reg(stack_index), brw_imm_d(4));
+		brw_save_call(&c->func, inst->Comment, p->nr_insn);
+                brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+                brw_pop_insn_state(p);
+		break;
+
+	    case OPCODE_RET:
+		brw_push_insn_state(p);
+		brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_ADD(p, get_addr_reg(stack_index),
+                        get_addr_reg(stack_index), brw_imm_d(-4));
+                brw_set_access_mode(p, BRW_ALIGN_1);
+                brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
+                brw_set_access_mode(p, BRW_ALIGN_16);
+		brw_pop_insn_state(p);
+
+		break;
+	    case OPCODE_BGNLOOP:
+                /* XXX may need to invalidate the current_constant regs */
+		loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+		break;
+	    case OPCODE_BRK:
+		brw_BREAK(p);
+		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+		break;
+	    case OPCODE_CONT:
+		brw_CONT(p);
+		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+		break;
+	    case OPCODE_ENDLOOP: 
+               {
+                  struct brw_instruction *inst0, *inst1;
+                  GLuint br = 1;
+
+                  if (intel->gen == 5)
+                     br = 2;
+
+		  assert(loop_depth > 0);
+                  loop_depth--;
+                  inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+                  /* patch all the BREAK/CONT instructions from last BGNLOOP */
+                  while (inst0 > loop_inst[loop_depth]) {
+                     inst0--;
+                     if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+			 inst0->bits3.if_else.jump_count == 0) {
+			inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+			inst0->bits3.if_else.pop_count = 0;
+                     }
+                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+			      inst0->bits3.if_else.jump_count == 0) {
+                        inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+                        inst0->bits3.if_else.pop_count = 0;
+                     }
+                  }
+               }
+               break;
+	    default:
+		printf("unsupported opcode %d (%s) in fragment shader\n",
+		       inst->Opcode, inst->Opcode < MAX_OPCODE ?
+		       _mesa_opcode_string(inst->Opcode) : "unknown");
+	}
+
+	/* Release temporaries containing any unaliased source regs. */
+	release_tmps( c, mark );
+
+	if (inst->CondUpdate)
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	else
+	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+    post_wm_emit(c);
+
+    if (INTEL_DEBUG & DEBUG_WM) {
+      printf("wm-native:\n");
+      for (i = 0; i < p->nr_insn; i++)
+	 brw_disasm(stderr, &p->store[i], intel->gen);
+      printf("\n");
+    }
+}
+
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control.  Other shaders will be compiled with the 
+ */
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+    if (INTEL_DEBUG & DEBUG_WM) {
+        printf("brw_wm_glsl_emit:\n");
+    }
+
+    /* initial instruction translation/simplification */
+    brw_wm_pass_fp(c);
+
+    /* actual code generation */
+    brw_wm_emit_glsl(brw, c);
+
+    if (INTEL_DEBUG & DEBUG_WM) {
+        brw_wm_print_program(c, "brw_wm_glsl_emit done");
+    }
+
+    c->prog_data.total_grf = num_grf_used(c);
+    c->prog_data.total_scratch = 0;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
new file mode 100644
index 0000000000..5e399ac62a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -0,0 +1,157 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                
+
+#include "main/mtypes.h"
+#include "brw_wm.h"
+
+
+#undef P			/* prompted depth */
+#undef C			/* computed */
+#undef N			/* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+   GLuint mode:2;
+   GLuint sd_present:1;
+   GLuint sd_to_rt:1;
+   GLuint dd_present:1;
+   GLuint ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 1 }, 
+ { N, 0, 1, 0, 1 }, 
+ { N, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 0, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 } 
+};
+
+/**
+ * \param line_aa  AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup  bitmask of IZ_* flags
+ */
+void brw_wm_lookup_iz( GLuint line_aa,
+		       GLuint lookup,
+		       GLboolean ps_uses_depth,
+		       struct brw_wm_prog_key *key )
+{
+   GLuint reg = 2;
+
+   assert (lookup < IZ_BIT_MAX);
+      
+   if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+      key->computes_depth = 1;
+
+   if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
+      key->source_depth_reg = reg;
+      reg += 2;
+   }
+
+   if (wm_iz_table[lookup].sd_to_rt)
+      key->source_depth_to_render_target = 1;
+
+   if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+      key->aa_dest_stencil_reg = reg;
+      key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+				      line_aa == AA_SOMETIMES);
+      reg++;
+   }
+
+   if (wm_iz_table[lookup].dd_present) {
+      key->dest_depth_reg = reg;
+      reg+=2;
+   }
+
+   key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
new file mode 100644
index 0000000000..60bd92ed22
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -0,0 +1,445 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                 
+
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "shader/prog_parameter.h"
+
+
+
+/***********************************************************************
+ */
+
+static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
+{
+   assert(c->nr_refs < BRW_WM_MAX_REF);
+   memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs));
+   return &c->refs[c->nr_refs++];
+}
+
+static struct brw_wm_value *get_value( struct brw_wm_compile *c)
+{
+   assert(c->nr_refs < BRW_WM_MAX_VREG);
+   memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg));
+   return &c->vreg[c->nr_vreg++];
+}
+
+/** return pointer to a newly allocated instruction */
+static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
+{
+   assert(c->nr_insns < BRW_WM_MAX_INSN);
+   memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction));
+   return &c->instruction[c->nr_insns++];
+}
+
+/***********************************************************************
+ */
+
+/** Init the "undef" register */
+static void pass0_init_undef( struct brw_wm_compile *c)
+{
+   struct brw_wm_ref *ref = &c->undef_ref;
+   ref->value = &c->undef_value;
+   ref->hw_reg = brw_vec8_grf(0, 0);
+   ref->insn = 0;
+   ref->prevuse = NULL;
+}
+
+/** Set a FP register to a value */
+static void pass0_set_fpreg_value( struct brw_wm_compile *c,
+				   GLuint file,
+				   GLuint idx,
+				   GLuint component,
+				   struct brw_wm_value *value )
+{
+   struct brw_wm_ref *ref = get_ref(c);
+   ref->value = value;
+   ref->hw_reg = brw_vec8_grf(0, 0);
+   ref->insn = 0;
+   ref->prevuse = NULL;
+   c->pass0_fp_reg[file][idx][component] = ref;
+}
+
+/** Set a FP register to a ref */
+static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
+				 GLuint file,
+				 GLuint idx,
+				 GLuint component,
+				 const struct brw_wm_ref *src_ref )
+{
+   c->pass0_fp_reg[file][idx][component] = src_ref;
+}
+
+static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, 
+					       const GLfloat *param_ptr )
+{
+   GLuint i = c->prog_data.nr_params++;
+   
+   if (i >= BRW_WM_MAX_PARAM) {
+      printf("%s: out of params\n", __FUNCTION__);
+      c->prog_data.error = 1;
+      return NULL;
+   }
+   else {
+      struct brw_wm_ref *ref = get_ref(c);
+
+      c->prog_data.param[i] = param_ptr;
+      c->nr_creg = (i+16)/16;
+
+      /* Push the offsets into hw_reg.  These will be added to the
+       * real register numbers once one is allocated in pass2.
+       */
+      ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
+      ref->value = &c->creg[i/16];
+      ref->insn = 0;
+      ref->prevuse = NULL;
+
+      return ref;
+   }
+}
+
+
+/** Return a ref to a constant/literal value */
+static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
+					       const GLfloat *constval )
+{
+   GLuint i;
+
+   /* Search for an existing const value matching the request:
+    */
+   for (i = 0; i < c->nr_constrefs; i++) {
+      if (c->constref[i].constval == *constval) 
+	 return c->constref[i].ref;
+   }
+
+   /* Else try to add a new one:
+    */
+   if (c->nr_constrefs < BRW_WM_MAX_CONST) {
+      GLuint i = c->nr_constrefs++;
+
+      /* A constant is a special type of parameter:
+       */
+      c->constref[i].constval = *constval;
+      c->constref[i].ref = get_param_ref(c, constval);
+
+      return c->constref[i].ref;
+   }
+   else {
+      printf("%s: out of constrefs\n", __FUNCTION__);
+      c->prog_data.error = 1;
+      return NULL;
+   }
+}
+
+
+/* Lookup our internal registers
+ */
+static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
+					       GLuint file,
+					       GLuint idx,
+					       GLuint component )
+{
+   const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
+
+   if (!ref) {
+      switch (file) {
+      case PROGRAM_INPUT:
+      case PROGRAM_PAYLOAD:
+      case PROGRAM_TEMPORARY:
+      case PROGRAM_OUTPUT:
+      case PROGRAM_VARYING:
+	 break;
+
+      case PROGRAM_LOCAL_PARAM:
+	 ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
+	 break;
+
+      case PROGRAM_ENV_PARAM:
+	 ref = get_param_ref(c, &c->env_param[idx][component]);
+	 break;
+
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_UNIFORM:
+      case PROGRAM_CONSTANT:
+      case PROGRAM_NAMED_PARAM: {
+	 struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
+	 
+	 /* There's something really hokey about parameters parsed in
+	  * arb programs - they all end up in here, whether they be
+	  * state values, parameters or constants.  This duplicates the
+	  * structure above & also seems to subvert the limits set for
+	  * each type of constant/param.
+	  */ 
+	 switch (plist->Parameters[idx].Type) {
+	 case PROGRAM_NAMED_PARAM:
+	 case PROGRAM_CONSTANT:
+	    /* These are invarient:
+	     */
+	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    break;
+
+	 case PROGRAM_STATE_VAR:
+	 case PROGRAM_UNIFORM:
+	    /* These may change from run to run:
+	     */
+	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+	    break;
+
+	 default:
+	    assert(0);
+	    break;
+	 }
+	 break;
+      }
+
+      default:
+	 assert(0);
+	 break;
+      }
+
+      c->pass0_fp_reg[file][idx][component] = ref;
+   }
+
+   if (!ref)
+      ref = &c->undef_ref;
+
+   return ref;
+}
+
+
+
+/***********************************************************************
+ * Straight translation to internal instruction format
+ */
+
+static void pass0_set_dst( struct brw_wm_compile *c,
+			   struct brw_wm_instruction *out,
+			   const struct prog_instruction *inst,
+			   GLuint writemask )
+{
+   const struct prog_dst_register *dst = &inst->DstReg;
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (writemask & (1<<i)) {
+	 out->dst[i] = get_value(c);
+	 pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
+      }
+   }
+
+   out->writemask = writemask;
+}
+
+
+static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
+						    struct prog_src_register src,
+						    GLuint i )
+{
+   GLuint component = GET_SWZ(src.Swizzle,i);
+   const struct brw_wm_ref *src_ref;
+   static const GLfloat const_zero = 0.0;
+   static const GLfloat const_one = 1.0;
+
+   if (component == SWIZZLE_ZERO) 
+      src_ref = get_const_ref(c, &const_zero);
+   else if (component == SWIZZLE_ONE) 
+      src_ref = get_const_ref(c, &const_one);
+   else 
+      src_ref = pass0_get_reg(c, src.File, src.Index, component);
+
+   return src_ref;
+}
+
+
+static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
+				       struct prog_src_register src,
+				       GLuint i,
+				       struct brw_wm_instruction *insn)
+{
+   const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
+   struct brw_wm_ref *newref = get_ref(c);
+
+   newref->value = ref->value;
+   newref->hw_reg = ref->hw_reg;
+
+   if (insn) {
+      newref->insn = insn - c->instruction;
+      newref->prevuse = newref->value->lastuse;
+      newref->value->lastuse = newref;
+   }
+
+   if (src.Negate & (1 << i))
+      newref->hw_reg.negate ^= 1;
+
+   if (src.Abs) {
+      newref->hw_reg.negate = 0;
+      newref->hw_reg.abs = 1;
+   }
+
+   return newref;
+}
+
+
+static void
+translate_insn(struct brw_wm_compile *c,
+               const struct prog_instruction *inst)
+{
+   struct brw_wm_instruction *out = get_instruction(c);
+   GLuint writemask = inst->DstReg.WriteMask;
+   GLuint nr_args = brw_wm_nr_args(inst->Opcode);
+   GLuint i, j;
+
+   /* Copy some data out of the instruction
+    */
+   out->opcode = inst->Opcode;
+   out->saturate = (inst->SaturateMode != SATURATE_OFF);
+   out->tex_unit = inst->TexSrcUnit;
+   out->tex_idx = inst->TexSrcTarget;
+   out->tex_shadow = inst->TexShadow;
+   out->eot = inst->Aux & INST_AUX_EOT;
+   out->target = INST_AUX_GET_TARGET(inst->Aux);
+
+   /* Args:
+    */
+   for (i = 0; i < nr_args; i++) {
+      for (j = 0; j < 4; j++) {
+	 out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
+      }
+   }
+
+   /* Dst:
+    */
+   pass0_set_dst(c, out, inst, writemask);
+}
+
+
+
+/***********************************************************************
+ * Optimize moves and swizzles away:
+ */ 
+static void pass0_precalc_mov( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   const struct prog_dst_register *dst = &inst->DstReg;
+   GLuint writemask = inst->DstReg.WriteMask;
+   struct brw_wm_ref *refs[4];
+   GLuint i;
+
+   /* Get the effect of a MOV by manipulating our register table:
+    * First get all refs, then assign refs.  This ensures that "in-place"
+    * swizzles such as:
+    *   MOV t, t.xxyx
+    * are handled correctly.  Previously, these two steps were done in
+    * one loop and the above case was incorrectly handled.
+    */
+   for (i = 0; i < 4; i++) {
+      refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
+   }
+   for (i = 0; i < 4; i++) {
+      if (writemask & (1 << i)) {	    
+         pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
+      }
+   }
+}
+
+
+/* Initialize payload "registers".
+ */
+static void pass0_init_payload( struct brw_wm_compile *c )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
+      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, 
+			     &c->payload.depth[j] );
+   }
+
+#if 0
+   /* This seems to be an alternative to the INTERP_WPOS stuff I do
+    * elsewhere:
+    */
+   if (c->key.source_depth_reg)
+      pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
+			    &c->payload.depth[c->key.source_depth_reg/2]);
+#endif
+   
+   for (i = 0; i < FRAG_ATTRIB_MAX; i++)
+      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, 
+			     &c->payload.input_interp[i] );      
+}
+
+
+/***********************************************************************
+ * PASS 0
+ *
+ * Work forwards to give each calculated value a unique number.  Where
+ * an instruction produces duplicate values (eg DP3), all are given
+ * the same number.
+ *
+ * Translate away swizzling and eliminate non-saturating moves.
+ */
+void brw_wm_pass0( struct brw_wm_compile *c )
+{
+   GLuint insn;
+
+   c->nr_vreg = 0;
+   c->nr_insns = 0;
+
+   pass0_init_undef(c);
+   pass0_init_payload(c);
+
+   for (insn = 0; insn < c->nr_fp_insns; insn++) {
+      const struct prog_instruction *inst = &c->prog_instructions[insn];
+
+      /* Optimize away moves, otherwise emit translated instruction:
+       */      
+      switch (inst->Opcode) {
+      case OPCODE_MOV: 
+      case OPCODE_SWZ: 
+	 if (!inst->SaturateMode) {
+	    pass0_precalc_mov(c, inst);
+	 }
+	 else {
+	    translate_insn(c, inst);
+	 }
+	 break;
+      default:
+	 translate_insn(c, inst);
+	 break;
+      }
+   }
+ 
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass0");
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
new file mode 100644
index 0000000000..b449394029
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -0,0 +1,291 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                  
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+static GLuint get_tracked_mask(struct brw_wm_compile *c,
+			       struct brw_wm_instruction *inst)
+{
+   GLuint i;
+   for (i = 0; i < 4; i++) {
+      if (inst->writemask & (1<<i)) {
+	 if (!inst->dst[i]->contributes_to_output) {
+	    inst->writemask &= ~(1<<i);
+	    inst->dst[i] = 0;
+	 }
+      }
+   }
+
+   return inst->writemask;
+}
+
+/* Remove a reference from a value's usage chain.
+ */
+static void unlink_ref(struct brw_wm_ref *ref)
+{
+   struct brw_wm_value *value = ref->value;
+
+   if (ref == value->lastuse) {
+      value->lastuse = ref->prevuse;
+   }
+   else {
+      struct brw_wm_ref *i = value->lastuse;
+      while (i->prevuse != ref) i = i->prevuse;
+      i->prevuse = ref->prevuse;
+   }
+}
+
+static void track_arg(struct brw_wm_compile *c,
+		      struct brw_wm_instruction *inst,
+		      GLuint arg,
+		      GLuint readmask)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      struct brw_wm_ref *ref = inst->src[arg][i];
+      if (ref) {
+	 if (readmask & (1<<i)) {
+	    ref->value->contributes_to_output = 1;
+         }
+	 else {
+	    unlink_ref(ref);
+	    inst->src[arg][i] = NULL;
+	 }
+      }
+   }
+}
+
+static GLuint get_texcoord_mask( GLuint tex_idx )
+{
+   switch (tex_idx) {
+   case TEXTURE_1D_INDEX:
+      return WRITEMASK_X;
+   case TEXTURE_2D_INDEX:
+      return WRITEMASK_XY;
+   case TEXTURE_3D_INDEX:
+      return WRITEMASK_XYZ;
+   case TEXTURE_CUBE_INDEX:
+      return WRITEMASK_XYZ;
+   case TEXTURE_RECT_INDEX:
+      return WRITEMASK_XY;
+   default: return 0;
+   }
+}
+
+
+/* Step two: Basically this is dead code elimination.  
+ *
+ * Iterate backwards over instructions, noting which values
+ * contribute to the final result.  Adjust writemasks to only
+ * calculate these values.
+ */
+void brw_wm_pass1( struct brw_wm_compile *c )
+{
+   GLint insn;
+
+   for (insn = c->nr_insns-1; insn >= 0; insn--) {
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+      GLuint writemask;
+      GLuint read0, read1, read2;
+
+      if (inst->opcode == OPCODE_KIL) {
+	 track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
+	 continue;
+      }
+
+      if (inst->opcode == WM_FB_WRITE) {
+	 track_arg(c, inst, 0, WRITEMASK_XYZW); 
+	 track_arg(c, inst, 1, WRITEMASK_XYZW); 
+	 if (c->key.source_depth_to_render_target &&
+	     c->key.computes_depth)
+	    track_arg(c, inst, 2, WRITEMASK_Z); 
+	 else
+	    track_arg(c, inst, 2, 0); 
+	 continue;
+      }
+
+      /* Lookup all the registers which were written by this
+       * instruction and get a mask of those that contribute to the output:
+       */
+      writemask = get_tracked_mask(c, inst);
+      if (!writemask) {
+	 GLuint arg;
+	 for (arg = 0; arg < 3; arg++)
+	    track_arg(c, inst, arg, 0);
+	 continue;
+      }
+
+      read0 = 0;
+      read1 = 0;
+      read2 = 0;
+
+      /* Mark all inputs which contribute to the marked outputs:
+       */
+      switch (inst->opcode) {
+      case OPCODE_ABS:
+      case OPCODE_FLR:
+      case OPCODE_FRC:
+      case OPCODE_MOV:
+      case OPCODE_SWZ:
+      case OPCODE_TRUNC:
+	 read0 = writemask;
+	 break;
+
+      case OPCODE_SUB:
+      case OPCODE_SLT:
+      case OPCODE_SLE:
+      case OPCODE_SGE:
+      case OPCODE_SGT:
+      case OPCODE_SEQ:
+      case OPCODE_SNE:
+      case OPCODE_ADD:
+      case OPCODE_MAX:
+      case OPCODE_MIN:
+      case OPCODE_MUL:
+	 read0 = writemask;
+	 read1 = writemask;
+	 break;
+
+      case OPCODE_DDX:
+      case OPCODE_DDY:
+	 read0 = writemask;
+	 break;
+
+      case OPCODE_MAD:	
+      case OPCODE_CMP:
+      case OPCODE_LRP:
+	 read0 = writemask;
+	 read1 = writemask;	
+	 read2 = writemask;	
+	 break;
+
+      case OPCODE_XPD: 
+	 if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;	 
+	 if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;	 
+	 if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
+	 read1 = read0;
+	 break;
+
+      case OPCODE_COS:
+      case OPCODE_EX2:
+      case OPCODE_LG2:
+      case OPCODE_RCP:
+      case OPCODE_RSQ:
+      case OPCODE_SIN:
+      case OPCODE_SCS:
+      case WM_CINTERP:
+      case WM_PIXELXY:
+	 read0 = WRITEMASK_X;
+	 break;
+
+      case OPCODE_POW:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_X;
+	 break;
+
+      case OPCODE_TEX:
+      case OPCODE_TXP:
+	 read0 = get_texcoord_mask(inst->tex_idx);
+
+         if (inst->tex_shadow)
+	    read0 |= WRITEMASK_Z;
+	 break;
+
+      case OPCODE_TXB:
+	 /* Shadow ignored for txb.
+	  */
+	 read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
+	 break;
+
+      case WM_WPOSXY:
+	 read0 = writemask & WRITEMASK_XY;
+	 break;
+
+      case WM_DELTAXY:
+	 read0 = writemask & WRITEMASK_XY;
+	 read1 = WRITEMASK_X;
+	 break;
+
+      case WM_PIXELW:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_XY;
+	 break;
+
+      case WM_LINTERP:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_XY;
+	 break;
+
+      case WM_PINTERP:
+	 read0 = WRITEMASK_X; /* interpolant */
+	 read1 = WRITEMASK_XY; /* deltas */
+	 read2 = WRITEMASK_W; /* pixel w */
+	 break;
+
+      case OPCODE_DP3:	
+	 read0 = WRITEMASK_XYZ;
+	 read1 = WRITEMASK_XYZ;
+	 break;
+
+      case OPCODE_DPH:
+	 read0 = WRITEMASK_XYZ;
+	 read1 = WRITEMASK_XYZW;
+	 break;
+
+      case OPCODE_DP4:
+	 read0 = WRITEMASK_XYZW;
+	 read1 = WRITEMASK_XYZW;
+	 break;
+
+      case OPCODE_LIT: 
+	 read0 = WRITEMASK_XYW;
+	 break;
+
+      case OPCODE_DST:
+      case WM_FRONTFACING:
+      case OPCODE_KIL_NV:
+      default:
+	 break;
+      }
+
+      track_arg(c, inst, 0, read0);
+      track_arg(c, inst, 1, read1);
+      track_arg(c, inst, 2, read2);
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass1");
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
new file mode 100644
index 0000000000..31303febf0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
@@ -0,0 +1,343 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+/* Use these to force spilling so that that functionality can be
+ * tested with known-good examples rather than having to construct new
+ * tests.
+ */
+#define TEST_PAYLOAD_SPILLS 0
+#define TEST_DST_SPILLS 0
+
+static void spill_value(struct brw_wm_compile *c,
+			struct brw_wm_value *value);
+
+static void prealloc_reg(struct brw_wm_compile *c,
+			 struct brw_wm_value *value,
+			 GLuint reg)
+{
+   if (value->lastuse) {
+      /* Set nextuse to zero, it will be corrected by
+       * update_register_usage().
+       */
+      c->pass2_grf[reg].value = value;
+      c->pass2_grf[reg].nextuse = 0;
+
+      value->resident = &c->pass2_grf[reg];
+      value->hw_reg = brw_vec8_grf(reg*2, 0);
+
+      if (TEST_PAYLOAD_SPILLS)
+	 spill_value(c, value);
+   }
+}
+
+
+/* Initialize all the register values.  Do the initial setup
+ * calculations for interpolants.
+ */
+static void init_registers( struct brw_wm_compile *c )
+{
+   GLuint nr_interp_regs = 0;
+   GLuint i = 0;
+   GLuint j;
+
+   for (j = 0; j < c->grf_limit; j++) 
+      c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
+
+   for (j = 0; j < c->key.nr_depth_regs; j++) 
+      prealloc_reg(c, &c->payload.depth[j], i++);
+
+   for (j = 0; j < c->nr_creg; j++) 
+      prealloc_reg(c, &c->creg[j], i++);
+
+   for (j = 0; j < VERT_RESULT_MAX; j++) {
+      if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) {
+	 int fp_index;
+
+	 if (j >= VERT_RESULT_VAR0)
+	    fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+	 else if (j <= VERT_RESULT_TEX7)
+	    fp_index = j;
+	 else
+	    fp_index = -1;
+
+	 nr_interp_regs++;
+	 if (fp_index >= 0)
+	    prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+      }
+   }
+
+   assert(nr_interp_regs >= 1);
+
+   c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+   c->prog_data.urb_read_length = nr_interp_regs * 2;
+   c->prog_data.curb_read_length = c->nr_creg * 2;
+
+   c->max_wm_grf = i * 2;
+}
+
+
+/* Update the nextuse value for each register in our file.
+ */
+static void update_register_usage(struct brw_wm_compile *c,
+				  GLuint thisinsn)
+{
+   GLuint i;
+
+   for (i = 1; i < c->grf_limit; i++) {
+      struct brw_wm_grf *grf = &c->pass2_grf[i];
+
+      /* Only search those which can change:
+       */
+      if (grf->nextuse < thisinsn) {
+	 const struct brw_wm_ref *ref = grf->value->lastuse;
+
+	 /* Has last use of value been passed?
+	  */
+	 if (ref->insn < thisinsn) {
+	    grf->value->resident = 0;
+	    grf->value = 0;
+	    grf->nextuse = BRW_WM_MAX_INSN;
+	 }
+	 else {
+	    /* Else loop through chain to update:
+	     */
+	    while (ref->prevuse && ref->prevuse->insn >= thisinsn)
+	       ref = ref->prevuse;
+
+	    grf->nextuse = ref->insn;
+	 }
+      }
+   }
+}
+
+
+static void spill_value(struct brw_wm_compile *c,
+			struct brw_wm_value *value)
+{	
+   /* Allocate a spill slot.  Note that allocations start from 0x40 -
+    * the first slot is reserved to mean "undef" in brw_wm_emit.c
+    */
+   if (!value->spill_slot) {
+      c->last_scratch += 0x40;	
+      value->spill_slot = c->last_scratch;
+   }
+
+   /* The spill will be done in brw_wm_emit.c immediately after the
+    * value is calculated, so we can just take this reg without any
+    * further work.
+    */
+   value->resident->value = NULL;
+   value->resident->nextuse = BRW_WM_MAX_INSN;
+   value->resident = NULL;
+}
+
+
+
+/* Search for contiguous region with the most distant nearest
+ * member.  Free regs count as very distant.
+ *
+ * TODO: implement spill-to-reg so that we can rearrange discontigous
+ * free regs and then spill the oldest non-free regs in sequence.
+ * This would mean inserting instructions in this pass.
+ */
+static GLuint search_contiguous_regs(struct brw_wm_compile *c,
+				     GLuint nr,
+				     GLuint thisinsn)
+{
+   struct brw_wm_grf *grf = c->pass2_grf;
+   GLuint furthest = 0;
+   GLuint reg = 0;
+   GLuint i, j;
+
+   /* Start search at 1: r0 is special and can't be used or spilled.
+    */
+   for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
+      GLuint group_nextuse = BRW_WM_MAX_INSN;
+
+      for (j = 0; j < nr; j++) {
+	 if (grf[i+j].nextuse < group_nextuse)
+	    group_nextuse = grf[i+j].nextuse;
+      }
+
+      if (group_nextuse > furthest) {
+	 furthest = group_nextuse;
+	 reg = i;
+      }
+   }
+
+   assert(furthest != thisinsn);
+
+   /* Any non-empty regs will need to be spilled:
+    */
+   for (j = 0; j < nr; j++) 
+      if (grf[reg+j].value)
+	 spill_value(c, grf[reg+j].value);
+
+   return reg;
+}
+
+
+static void alloc_contiguous_dest(struct brw_wm_compile *c, 
+				  struct brw_wm_value *dst[],
+				  GLuint nr,
+				  GLuint thisinsn)
+{
+   GLuint reg = search_contiguous_regs(c, nr, thisinsn);
+   GLuint i;
+
+   for (i = 0; i < nr; i++) {
+      if (!dst[i]) {
+	 /* Need to grab a dummy value in TEX case.  Don't introduce
+	  * it into the tracking scheme.
+	  */
+	 dst[i] = &c->vreg[c->nr_vreg++];
+      }
+      else {
+	 assert(!dst[i]->resident);
+	 assert(c->pass2_grf[reg+i].nextuse != thisinsn);
+
+	 c->pass2_grf[reg+i].value = dst[i];
+	 c->pass2_grf[reg+i].nextuse = thisinsn;
+
+	 dst[i]->resident = &c->pass2_grf[reg+i];
+      }
+
+      dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
+   }
+
+   if ((reg+nr)*2 > c->max_wm_grf)
+      c->max_wm_grf = (reg+nr) * 2;
+}
+
+
+static void load_args(struct brw_wm_compile *c, 
+		      struct brw_wm_instruction *inst)
+{
+   GLuint thisinsn = inst - c->instruction;
+   GLuint i,j;
+
+   for (i = 0; i < 3; i++) {
+      for (j = 0; j < 4; j++) {
+	 struct brw_wm_ref *ref = inst->src[i][j];
+
+	 if (ref) {
+	    if (!ref->value->resident) {
+	       /* Need to bring the value in from scratch space.  The code for
+		* this will be done in brw_wm_emit.c, here we just do the
+		* register allocation and mark the ref as requiring a fill.
+		*/
+	       GLuint reg = search_contiguous_regs(c, 1, thisinsn);
+
+	       c->pass2_grf[reg].value = ref->value;
+	       c->pass2_grf[reg].nextuse = thisinsn;
+
+	       ref->value->resident = &c->pass2_grf[reg];
+
+	       /* Note that a fill is required:
+		*/
+	       ref->unspill_reg = reg*2;
+	    }
+
+	    /* Adjust the hw_reg to point at the value's current location:
+	     */
+	    assert(ref->value == ref->value->resident->value);
+	    ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
+	 }
+      }
+   }
+}
+
+
+
+/* Step 3: Work forwards once again.  Perform register allocations,
+ * taking into account instructions like TEX which require contiguous
+ * result registers.  Where necessary spill registers to scratch space
+ * and reload later.
+ */
+void brw_wm_pass2( struct brw_wm_compile *c )
+{
+   GLuint insn;
+   GLuint i;
+
+   init_registers(c);
+
+   for (insn = 0; insn < c->nr_insns; insn++) {
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+
+      /* Update registers' nextuse values:
+       */
+      update_register_usage(c, insn);
+
+      /* May need to unspill some args.
+       */
+      load_args(c, inst);
+
+      /* Allocate registers to hold results:
+       */
+      switch (inst->opcode) {
+      case OPCODE_TEX:
+      case OPCODE_TXB:
+      case OPCODE_TXP:
+	 alloc_contiguous_dest(c, inst->dst, 4, insn);
+	 break;
+
+      default:
+	 for (i = 0; i < 4; i++) {
+	    if (inst->writemask & (1<<i)) {
+	       assert(inst->dst[i]);
+	       alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
+	    }
+	 }
+	 break;
+      }
+
+      if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
+	 for (i = 0; i < 4; i++)	
+	    if (inst->dst[i])
+	       spill_value(c, inst->dst[i]);
+      }
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass2");
+   }
+
+   c->state = PASS2_DONE;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+       brw_wm_print_program(c, "pass2/done");
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
new file mode 100644
index 0000000000..1fc802cfa6
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -0,0 +1,364 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "main/macros.h"
+
+
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+
+
+/* The brw (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( GLenum wrap )
+{
+   switch( wrap ) {
+   case GL_REPEAT: 
+      return BRW_TEXCOORDMODE_WRAP;
+   case GL_CLAMP:  
+      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP_TO_EDGE: 
+      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+   case GL_CLAMP_TO_BORDER: 
+      return BRW_TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT: 
+      return BRW_TEXCOORDMODE_MIRROR;
+   default: 
+      return BRW_TEXCOORDMODE_WRAP;
+   }
+}
+
+static drm_intel_bo *upload_default_color( struct brw_context *brw,
+				     const GLfloat *color )
+{
+   struct brw_sampler_default_color sdc;
+
+   COPY_4V(sdc.color, color); 
+   
+   return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR,
+			 &sdc, sizeof(sdc));
+}
+
+
+struct wm_sampler_key {
+   int sampler_count;
+
+   struct wm_sampler_entry {
+      GLenum tex_target;
+      GLenum wrap_r, wrap_s, wrap_t;
+      float maxlod, minlod;
+      float lod_bias;
+      float max_aniso;
+      GLenum minfilter, magfilter;
+      GLenum comparemode, comparefunc;
+
+      /** If target is cubemap, take context setting.
+       */
+      GLboolean seamless_cube_map;
+   } sampler[BRW_MAX_TEX_UNIT];
+};
+
+/**
+ * Sets the sampler state for a single unit based off of the sampler key
+ * entry.
+ */
+static void brw_update_sampler_state(struct wm_sampler_entry *key,
+				     drm_intel_bo *sdc_bo,
+				     struct brw_sampler_state *sampler)
+{
+   memset(sampler, 0, sizeof(*sampler));
+
+   switch (key->minfilter) {
+   case GL_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+
+   /* Set Anisotropy: 
+    */
+   if (key->max_aniso > 1.0) {
+      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; 
+      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+      if (key->max_aniso > 2.0) {
+	 sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
+				       BRW_ANISORATIO_16);
+      }
+   }
+   else {
+      switch (key->magfilter) {
+      case GL_NEAREST:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }  
+   }
+
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+
+   /* Cube-maps on 965 and later must use the same wrap mode for all 3
+    * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
+    */
+   if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
+      if (key->seamless_cube_map &&
+	  (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
+	 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+      } else {
+	 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+      }
+   } else if (key->tex_target == GL_TEXTURE_1D) {
+      /* There's a bug in 1D texture sampling - it actually pays
+       * attention to the wrap_t value, though it should not.
+       * Override the wrap_t value here to GL_REPEAT to keep
+       * any nonexistent border pixels from floating in.
+       */
+      sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+   }
+
+
+   /* Set shadow function: 
+    */
+   if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* Shadowing is "enabled" by emitting a particular sampler
+       * message (sample_c).  So need to recompile WM program when
+       * shadow comparison is enabled on each/any texture unit.
+       */
+      sampler->ss0.shadow_function =
+	 intel_translate_shadow_compare_func(key->comparefunc);
+   }
+
+   /* Set LOD bias: 
+    */
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
+
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+   /* Set BaseMipLevel, MaxLOD, MinLOD: 
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+
+   sampler->ss1.max_lod = U_FIXED(CLAMP(key->maxlod, 0, 13), 6);
+   sampler->ss1.min_lod = U_FIXED(CLAMP(key->minlod, 0, 13), 6);
+   
+   sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
+}
+
+
+/** Sets up the cache key for sampler state for all texture units */
+static void
+brw_wm_sampler_populate_key(struct brw_context *brw,
+			    struct wm_sampler_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   int unit;
+   char *last_entry_end = ((char*)&key->sampler_count) + 
+      sizeof(key->sampler_count);
+
+   key->sampler_count = 0;
+
+   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+	 struct wm_sampler_entry *entry = &key->sampler[unit];
+	 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+	 struct gl_texture_object *texObj = texUnit->_Current;
+	 struct intel_texture_object *intelObj = intel_texture_object(texObj);
+	 struct gl_texture_image *firstImage =
+	    texObj->Image[0][intelObj->firstLevel];
+
+	 memset(last_entry_end, 0, 
+		(char*)entry - last_entry_end + sizeof(*entry));
+	 last_entry_end = ((char*)entry) + sizeof(*entry);
+
+         entry->tex_target = texObj->Target;
+
+	 entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
+	    ? ctx->Texture.CubeMapSeamless : GL_FALSE;
+
+	 entry->wrap_r = texObj->WrapR;
+	 entry->wrap_s = texObj->WrapS;
+	 entry->wrap_t = texObj->WrapT;
+
+	 entry->maxlod = texObj->MaxLod;
+	 entry->minlod = texObj->MinLod;
+	 entry->lod_bias = texUnit->LodBias + texObj->LodBias;
+	 entry->max_aniso = texObj->MaxAnisotropy;
+	 entry->minfilter = texObj->MinFilter;
+	 entry->magfilter = texObj->MagFilter;
+	 entry->comparemode = texObj->CompareMode;
+         entry->comparefunc = texObj->CompareFunc;
+
+	 drm_intel_bo_unreference(brw->wm.sdc_bo[unit]);
+	 if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+	    float bordercolor[4] = {
+	       texObj->BorderColor.f[0],
+	       texObj->BorderColor.f[0],
+	       texObj->BorderColor.f[0],
+	       texObj->BorderColor.f[0]
+	    };
+	    /* GL specs that border color for depth textures is taken from the
+	     * R channel, while the hardware uses A.  Spam R into all the
+	     * channels for safety.
+	     */
+	    brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
+	 } else {
+	    brw->wm.sdc_bo[unit] = upload_default_color(brw,
+							texObj->BorderColor.f);
+	 }
+	 key->sampler_count = unit + 1;
+      }
+   }
+   struct wm_sampler_entry *entry = &key->sampler[key->sampler_count];
+   memset(last_entry_end, 0, (char*)entry - last_entry_end);
+}
+
+/* All samplers must be uploaded in a single contiguous array, which
+ * complicates various things.  However, this is still too confusing -
+ * FIXME: simplify all the different new texture state flags.
+ */
+static void upload_wm_samplers( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct wm_sampler_key key;
+   int i, sampler_key_size;
+
+   brw_wm_sampler_populate_key(brw, &key);
+
+   if (brw->wm.sampler_count != key.sampler_count) {
+      brw->wm.sampler_count = key.sampler_count;
+      brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+   }
+
+   drm_intel_bo_unreference(brw->wm.sampler_bo);
+   brw->wm.sampler_bo = NULL;
+   if (brw->wm.sampler_count == 0)
+      return;
+
+   /* Only include the populated portion of the key in the search. */
+   sampler_key_size = offsetof(struct wm_sampler_key,
+			       sampler[key.sampler_count]);
+   brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
+					 &key, sampler_key_size,
+					 brw->wm.sdc_bo, key.sampler_count,
+					 NULL);
+
+   /* If we didnt find it in the cache, compute the state and put it in the
+    * cache.
+    */
+   if (brw->wm.sampler_bo == NULL) {
+      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+      memset(sampler, 0, sizeof(sampler));
+      for (i = 0; i < key.sampler_count; i++) {
+	 if (brw->wm.sdc_bo[i] == NULL)
+	    continue;
+
+	 brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i],
+				  &sampler[i]);
+      }
+
+      brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
+					    &key, sampler_key_size,
+					    brw->wm.sdc_bo, key.sampler_count,
+					    &sampler, sizeof(sampler));
+
+      /* Emit SDC relocations */
+      for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+	 if (!ctx->Texture.Unit[i]._ReallyEnabled)
+	    continue;
+
+	 drm_intel_bo_emit_reloc(brw->wm.sampler_bo,
+				 i * sizeof(struct brw_sampler_state) +
+				 offsetof(struct brw_sampler_state, ss2),
+				 brw->wm.sdc_bo[i], 0,
+				 I915_GEM_DOMAIN_SAMPLER, 0);
+      }
+   }
+}
+
+const struct brw_tracked_state brw_wm_samplers = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = 0,
+      .cache = 0
+   },
+   .prepare = upload_wm_samplers,
+};
+
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
new file mode 100644
index 0000000000..1789b21451
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -0,0 +1,299 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+
+struct brw_wm_unit_key {
+   unsigned int total_grf, total_scratch;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+   unsigned int dispatch_grf_start_reg;
+
+   unsigned int curbe_offset;
+   unsigned int urb_size;
+
+   unsigned int nr_surfaces, sampler_count;
+   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+   GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+   GLfloat offset_units, offset_factor;
+};
+
+static void
+wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
+   struct intel_context *intel = &brw->intel;
+
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_WM_PROG */
+   key->total_grf = brw->wm.prog_data->total_grf;
+   key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
+   key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+
+   /* BRW_NEW_URB_FENCE */
+   key->urb_size = brw->urb.vsize;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.wm_start;
+
+   /* BRW_NEW_NR_SURFACEs */
+   key->nr_surfaces = brw->wm.nr_surfaces;
+
+   /* CACHE_NEW_SAMPLER */
+   key->sampler_count = brw->wm.sampler_count;
+
+   /* _NEW_POLYGONSTIPPLE */
+   key->polygon_stipple = ctx->Polygon.StippleFlag;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+
+   /* as far as we can tell */
+   key->computes_depth =
+      (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0;
+   /* BRW_NEW_DEPTH_BUFFER
+    * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+    * Depth field.
+    */
+   if (brw->state.depth_region == NULL)
+      key->computes_depth = 0;
+
+   /* _NEW_COLOR */
+   key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
+   key->is_glsl = bfp->isGLSL;
+
+   /* temporary sanity check assertion */
+   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+
+   /* _NEW_DEPTH */
+   key->stats_wm = intel->stats_wm;
+
+   /* _NEW_LINE */
+   key->line_stipple = ctx->Line.StippleFlag;
+
+   /* _NEW_POLYGON */
+   key->offset_enable = ctx->Polygon.OffsetFill;
+   key->offset_units = ctx->Polygon.OffsetUnits;
+   key->offset_factor = ctx->Polygon.OffsetFactor;
+}
+
+/**
+ * Setup wm hardware state.  See page 225 of Volume 2
+ */
+static drm_intel_bo *
+wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
+			drm_intel_bo **reloc_bufs)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_wm_unit_state wm;
+   drm_intel_bo *bo;
+
+   memset(&wm, 0, sizeof(wm));
+
+   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+   wm.thread1.depth_coef_urb_read_offset = 1;
+   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+   if (intel->gen == 5)
+      wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
+   else
+      wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+   if (key->total_scratch != 0) {
+      wm.thread2.scratch_space_base_pointer =
+	 brw->wm.scratch_bo->offset >> 10; /* reloc */
+      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+   } else {
+      wm.thread2.scratch_space_base_pointer = 0;
+      wm.thread2.per_thread_scratch_space = 0;
+   }
+
+   wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
+   wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   wm.thread3.urb_entry_read_offset = 0;
+   wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+   if (intel->gen == 5)
+      wm.wm4.sampler_count = 0; /* hardware requirement */
+   else
+      wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+
+   if (brw->wm.sampler_bo != NULL) {
+      /* reloc */
+      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
+   } else {
+      wm.wm4.sampler_state_pointer = 0;
+   }
+
+   wm.wm5.program_uses_depth = key->uses_depth;
+   wm.wm5.program_computes_depth = key->computes_depth;
+   wm.wm5.program_uses_killpixel = key->uses_kill;
+
+   if (key->is_glsl)
+      wm.wm5.enable_8_pix = 1;
+   else
+      wm.wm5.enable_16_pix = 1;
+
+   wm.wm5.max_threads = brw->wm_max_threads - 1;
+   wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */
+   wm.wm5.legacy_line_rast = 0;
+   wm.wm5.legacy_global_depth_bias = 0;
+   wm.wm5.early_depth_test = 1;	        /* never need to disable */
+   wm.wm5.line_aa_region_width = 0;
+   wm.wm5.line_endcap_aa_region_width = 1;
+
+   wm.wm5.polygon_stipple = key->polygon_stipple;
+
+   if (key->offset_enable) {
+      wm.wm5.depth_offset = 1;
+      /* Something wierd going on with legacy_global_depth_bias,
+       * offset_constant, scaling and MRD.  This value passes glean
+       * but gives some odd results elsewere (eg. the
+       * quad-offset-units test).
+       */
+      wm.global_depth_offset_constant = key->offset_units * 2;
+
+      /* This is the only value that passes glean:
+       */
+      wm.global_depth_offset_scale = key->offset_factor;
+   }
+
+   wm.wm5.line_stipple = key->line_stipple;
+
+   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+      wm.wm4.stats_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+			 key, sizeof(*key),
+			 reloc_bufs, 3,
+			 &wm, sizeof(wm));
+
+   /* Emit WM program relocation */
+   drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0),
+			   brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0);
+
+   /* Emit scratch space relocation */
+   if (key->total_scratch != 0) {
+      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
+			      brw->wm.scratch_bo,
+			      wm.thread2.per_thread_scratch_space,
+			      0, 0);
+   }
+
+   /* Emit sampler state relocation */
+   if (key->sampler_count != 0) {
+      drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4),
+			      brw->wm.sampler_bo, (wm.wm4.stats_enable |
+						   (wm.wm4.sampler_count << 2)),
+			      I915_GEM_DOMAIN_INSTRUCTION, 0);
+   }
+
+   return bo;
+}
+
+
+static void upload_wm_unit( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_wm_unit_key key;
+   drm_intel_bo *reloc_bufs[3];
+   wm_unit_populate_key(brw, &key);
+
+   /* Allocate the necessary scratch space if we haven't already.  Don't
+    * bother reducing the allocation later, since we use scratch so
+    * rarely.
+    */
+   assert(key.total_scratch <= 12 * 1024);
+   if (key.total_scratch) {
+      GLuint total = key.total_scratch * brw->wm_max_threads;
+
+      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
+	 drm_intel_bo_unreference(brw->wm.scratch_bo);
+	 brw->wm.scratch_bo = NULL;
+      }
+      if (brw->wm.scratch_bo == NULL) {
+	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+						 "wm scratch",
+						 total,
+						 4096);
+      }
+   }
+
+   reloc_bufs[0] = brw->wm.prog_bo;
+   reloc_bufs[1] = brw->wm.scratch_bo;
+   reloc_bufs[2] = brw->wm.sampler_bo;
+
+   drm_intel_bo_unreference(brw->wm.state_bo);
+   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
+				       &key, sizeof(key),
+				       reloc_bufs, 3,
+				       NULL);
+   if (brw->wm.state_bo == NULL) {
+      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
+   }
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+   .dirty = {
+      .mesa = (_NEW_POLYGON | 
+	       _NEW_POLYGONSTIPPLE | 
+	       _NEW_LINE | 
+	       _NEW_COLOR |
+	       _NEW_DEPTH),
+
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
+	      BRW_NEW_CURBE_OFFSETS |
+	      BRW_NEW_DEPTH_BUFFER |
+	      BRW_NEW_NR_WM_SURFACES),
+
+      .cache = (CACHE_NEW_WM_PROG |
+		CACHE_NEW_SAMPLER)
+   },
+   .prepare = upload_wm_unit,
+};
+
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
new file mode 100644
index 0000000000..77898dbbe7
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -0,0 +1,676 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "main/mtypes.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+static GLuint translate_tex_target( GLenum target )
+{
+   switch (target) {
+   case GL_TEXTURE_1D: 
+      return BRW_SURFACE_1D;
+
+   case GL_TEXTURE_RECTANGLE_NV: 
+      return BRW_SURFACE_2D;
+
+   case GL_TEXTURE_2D: 
+      return BRW_SURFACE_2D;
+
+   case GL_TEXTURE_3D: 
+      return BRW_SURFACE_3D;
+
+   case GL_TEXTURE_CUBE_MAP: 
+      return BRW_SURFACE_CUBE;
+
+   default: 
+      assert(0); 
+      return 0;
+   }
+}
+
+
+static GLuint translate_tex_format( gl_format mesa_format,
+                                    GLenum internal_format,
+				    GLenum depth_mode )
+{
+   switch( mesa_format ) {
+   case MESA_FORMAT_L8:
+      return BRW_SURFACEFORMAT_L8_UNORM;
+
+   case MESA_FORMAT_I8:
+      return BRW_SURFACEFORMAT_I8_UNORM;
+
+   case MESA_FORMAT_A8:
+      return BRW_SURFACEFORMAT_A8_UNORM; 
+
+   case MESA_FORMAT_AL88:
+      return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+   case MESA_FORMAT_AL1616:
+      return BRW_SURFACEFORMAT_L16A16_UNORM;
+
+   case MESA_FORMAT_RGB888:
+      assert(0);		/* not supported for sampling */
+      return BRW_SURFACEFORMAT_R8G8B8_UNORM;      
+
+   case MESA_FORMAT_ARGB8888:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+   case MESA_FORMAT_XRGB8888:
+      return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+
+   case MESA_FORMAT_RGBA8888_REV:
+      _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()");
+      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+   case MESA_FORMAT_RGB565:
+      return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+   case MESA_FORMAT_ARGB1555:
+      return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+   case MESA_FORMAT_ARGB4444:
+      return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+   case MESA_FORMAT_YCBCR_REV:
+      return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+   case MESA_FORMAT_YCBCR:
+      return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+      return BRW_SURFACEFORMAT_FXT1;
+
+   case MESA_FORMAT_Z16:
+      if (depth_mode == GL_INTENSITY) 
+	  return BRW_SURFACEFORMAT_I16_UNORM;
+      else if (depth_mode == GL_ALPHA)
+	  return BRW_SURFACEFORMAT_A16_UNORM;
+      else
+	  return BRW_SURFACEFORMAT_L16_UNORM;
+
+   case MESA_FORMAT_RGB_DXT1:
+       return BRW_SURFACEFORMAT_DXT1_RGB;
+
+   case MESA_FORMAT_RGBA_DXT1:
+       return BRW_SURFACEFORMAT_BC1_UNORM;
+       
+   case MESA_FORMAT_RGBA_DXT3:
+       return BRW_SURFACEFORMAT_BC2_UNORM;
+       
+   case MESA_FORMAT_RGBA_DXT5:
+       return BRW_SURFACEFORMAT_BC3_UNORM;
+
+   case MESA_FORMAT_SARGB8:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+   case MESA_FORMAT_SLA8:
+      return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+   case MESA_FORMAT_SL8:
+      return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
+   case MESA_FORMAT_SRGB_DXT1:
+      return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+   case MESA_FORMAT_S8_Z24:
+      /* XXX: these different surface formats don't seem to
+       * make any difference for shadow sampler/compares.
+       */
+      if (depth_mode == GL_INTENSITY) 
+         return BRW_SURFACEFORMAT_I24X8_UNORM;
+      else if (depth_mode == GL_ALPHA)
+         return BRW_SURFACEFORMAT_A24X8_UNORM;
+      else
+         return BRW_SURFACEFORMAT_L24X8_UNORM;
+
+   case MESA_FORMAT_DUDV8:
+      return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+   case MESA_FORMAT_SIGNED_RGBA8888_REV:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void
+brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
+{
+   switch (tiling) {
+   case I915_TILING_NONE:
+      surf->ss3.tiled_surface = 0;
+      surf->ss3.tile_walk = 0;
+      break;
+   case I915_TILING_X:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      break;
+   case I915_TILING_Y:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+      break;
+   }
+}
+
+static void
+brw_update_texture_surface( GLcontext *ctx, GLuint unit )
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+   const GLuint surf_index = SURF_INDEX_TEXTURE(unit);
+   struct brw_surface_state surf;
+   void *map;
+
+   memset(&surf, 0, sizeof(surf));
+
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = translate_tex_target(tObj->Target);
+   surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat,
+						  firstImage->InternalFormat,
+						  tObj->DepthMode);
+
+   /* This is ok for all textures with channel width 8bit or less:
+    */
+/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+   surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */
+
+   surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel;
+   surf.ss2.width = firstImage->Width - 1;
+   surf.ss2.height = firstImage->Height - 1;
+   brw_set_surface_tiling(&surf, intelObj->mt->region->tiling);
+   surf.ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1;
+   surf.ss3.depth = firstImage->Depth - 1;
+
+   surf.ss4.min_lod = 0;
+ 
+   if (tObj->Target == GL_TEXTURE_CUBE_MAP) {
+      surf.ss0.cube_pos_x = 1;
+      surf.ss0.cube_pos_y = 1;
+      surf.ss0.cube_pos_z = 1;
+      surf.ss0.cube_neg_x = 1;
+      surf.ss0.cube_neg_y = 1;
+      surf.ss0.cube_neg_z = 1;
+   }
+
+   map = brw_state_batch(brw, sizeof(surf), 32,
+			 &brw->wm.surf_bo[surf_index],
+			 &brw->wm.surf_offset[surf_index]);
+   memcpy(map, &surf, sizeof(surf));
+
+   /* Emit relocation to surface contents */
+   drm_intel_bo_emit_reloc(brw->wm.surf_bo[surf_index],
+			   brw->wm.surf_offset[surf_index] +
+			   offsetof(struct brw_surface_state, ss1),
+			   intelObj->mt->region->buffer, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
+}
+
+/**
+ * Create the constant buffer surface.  Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+void
+brw_create_constant_surface(struct brw_context *brw,
+			    drm_intel_bo *bo,
+			    int width,
+			    drm_intel_bo **out_bo,
+			    uint32_t *out_offset)
+{
+   const GLint w = width - 1;
+   struct brw_surface_state surf;
+   void *map;
+
+   memset(&surf, 0, sizeof(surf));
+
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+   surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   assert(bo);
+   surf.ss1.base_addr = bo->offset; /* reloc */
+
+   surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
+   surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
+   surf.ss3.depth = (w >> 20) & 0x7f;    /* bits 26:20 of size or width */
+   surf.ss3.pitch = (width * 16) - 1; /* ignored?? */
+   brw_set_surface_tiling(&surf, I915_TILING_NONE); /* tiling now allowed */
+
+   map = brw_state_batch(brw, sizeof(surf), 32, out_bo, out_offset);
+   memcpy(map, &surf, sizeof(surf));
+
+   /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
+    * bspec ("Data Cache") says that the data cache does not exist as
+    * a separate cache and is just the sampler cache.
+    */
+   drm_intel_bo_emit_reloc(*out_bo, (*out_offset +
+				     offsetof(struct brw_surface_state, ss1)),
+			   bo, 0,
+			   I915_GEM_DOMAIN_SAMPLER, 0);
+}
+
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static void
+prepare_wm_constants(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+
+   _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (!fp->use_const_buffer) {
+      if (brw->wm.const_bo) {
+	 drm_intel_bo_unreference(brw->wm.const_bo);
+	 brw->wm.const_bo = NULL;
+	 brw->state.dirty.brw |= BRW_NEW_WM_CONSTBUF;
+      }
+      return;
+   }
+
+   drm_intel_bo_unreference(brw->wm.const_bo);
+   brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+					 size, 64);
+
+   /* _NEW_PROGRAM_CONSTANTS */
+   drm_intel_bo_subdata(brw->wm.const_bo, 0, size, params->ParameterValues);
+}
+
+const struct brw_tracked_state brw_wm_constants = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = 0
+   },
+   .prepare = prepare_wm_constants,
+};
+
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static void upload_wm_constant_surface(struct brw_context *brw )
+{
+   GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   const struct gl_program_parameter_list *params =
+      fp->program.Base.Parameters;
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (brw->wm.const_bo == 0) {
+      if (brw->wm.surf_bo[surf] != NULL) {
+	 drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+	 brw->wm.surf_bo[surf] = NULL;
+	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+      }
+      return;
+   }
+
+   brw_create_constant_surface(brw, brw->wm.const_bo, params->NumParameters,
+			       &brw->wm.surf_bo[surf],
+			       &brw->wm.surf_offset[surf]);
+   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+}
+
+const struct brw_tracked_state brw_wm_constant_surface = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_WM_CONSTBUF |
+	      BRW_NEW_BATCH),
+      .cache = 0
+   },
+   .emit = upload_wm_constant_surface,
+};
+
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+brw_update_renderbuffer_surface(struct brw_context *brw,
+				struct gl_renderbuffer *rb,
+				unsigned int unit)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   drm_intel_bo *region_bo = NULL;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_region *region = irb ? irb->region : NULL;
+   struct {
+      unsigned int surface_type;
+      unsigned int surface_format;
+      unsigned int width, height, pitch, cpp;
+      GLubyte color_mask[4];
+      GLboolean color_blend;
+      uint32_t tiling;
+      uint32_t draw_x;
+      uint32_t draw_y;
+   } key;
+   struct brw_surface_state surf;
+   void *map;
+
+   memset(&key, 0, sizeof(key));
+
+   if (region != NULL) {
+      region_bo = region->buffer;
+
+      key.surface_type = BRW_SURFACE_2D;
+      switch (irb->Base.Format) {
+      /* XRGB and ARGB are treated the same here because the chips in this
+       * family cannot render to XRGB targets.  This means that we have to
+       * mask writes to alpha (ala glColorMask) and reconfigure the alpha
+       * blending hardware to use GL_ONE (or GL_ZERO) for cases where
+       * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used.
+       */
+      case MESA_FORMAT_ARGB8888:
+      case MESA_FORMAT_XRGB8888:
+	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+	 break;
+      case MESA_FORMAT_RGB565:
+	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+	 break;
+      case MESA_FORMAT_ARGB1555:
+	 key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+	 break;
+      case MESA_FORMAT_ARGB4444:
+	 key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+	 break;
+      case MESA_FORMAT_A8:
+	 key.surface_format = BRW_SURFACEFORMAT_A8_UNORM;
+	 break;
+      default:
+	 _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format);
+      }
+      key.tiling = region->tiling;
+      key.width = rb->Width;
+      key.height = rb->Height;
+      key.pitch = region->pitch;
+      key.cpp = region->cpp;
+      key.draw_x = region->draw_x;
+      key.draw_y = region->draw_y;
+   } else {
+      key.surface_type = BRW_SURFACE_NULL;
+      key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      key.tiling = I915_TILING_X;
+      key.width = 1;
+      key.height = 1;
+      key.cpp = 4;
+      key.draw_x = 0;
+      key.draw_y = 0;
+   }
+
+   if (intel->gen < 6) {
+      /* _NEW_COLOR */
+      memcpy(key.color_mask, ctx->Color.ColorMask[unit],
+	     sizeof(key.color_mask));
+
+      /* As mentioned above, disable writes to the alpha component when the
+       * renderbuffer is XRGB.
+       */
+      if (ctx->DrawBuffer->Visual.alphaBits == 0)
+	 key.color_mask[3] = GL_FALSE;
+
+      key.color_blend = (!ctx->Color._LogicOpEnabled &&
+			 (ctx->Color.BlendEnabled & (1 << unit)));
+   }
+
+   memset(&surf, 0, sizeof(surf));
+
+   surf.ss0.surface_format = key.surface_format;
+   surf.ss0.surface_type = key.surface_type;
+   if (key.tiling == I915_TILING_NONE) {
+      surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp;
+   } else {
+      uint32_t tile_base, tile_x, tile_y;
+      uint32_t pitch = key.pitch * key.cpp;
+
+      if (key.tiling == I915_TILING_X) {
+	 tile_x = key.draw_x % (512 / key.cpp);
+	 tile_y = key.draw_y % 8;
+	 tile_base = ((key.draw_y / 8) * (8 * pitch));
+	 tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096;
+      } else {
+	 /* Y */
+	 tile_x = key.draw_x % (128 / key.cpp);
+	 tile_y = key.draw_y % 32;
+	 tile_base = ((key.draw_y / 32) * (32 * pitch));
+	 tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096;
+      }
+      assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
+      assert(tile_x % 4 == 0);
+      assert(tile_y % 2 == 0);
+      /* Note that the low bits of these fields are missing, so
+       * there's the possibility of getting in trouble.
+       */
+      surf.ss1.base_addr = tile_base;
+      surf.ss5.x_offset = tile_x / 4;
+      surf.ss5.y_offset = tile_y / 2;
+   }
+   if (region_bo != NULL)
+      surf.ss1.base_addr += region_bo->offset; /* reloc */
+
+   surf.ss2.width = key.width - 1;
+   surf.ss2.height = key.height - 1;
+   brw_set_surface_tiling(&surf, key.tiling);
+   surf.ss3.pitch = (key.pitch * key.cpp) - 1;
+
+   if (intel->gen < 6) {
+      /* _NEW_COLOR */
+      surf.ss0.color_blend = key.color_blend;
+      surf.ss0.writedisable_red =   !key.color_mask[0];
+      surf.ss0.writedisable_green = !key.color_mask[1];
+      surf.ss0.writedisable_blue =  !key.color_mask[2];
+      surf.ss0.writedisable_alpha = !key.color_mask[3];
+   }
+
+   map = brw_state_batch(brw, sizeof(surf), 32,
+			 &brw->wm.surf_bo[unit],
+			 &brw->wm.surf_offset[unit]);
+   memcpy(map, &surf, sizeof(surf));
+
+   if (region_bo != NULL) {
+      drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+			      brw->wm.surf_offset[unit] +
+			      offsetof(struct brw_surface_state, ss1),
+			      region_bo,
+			      surf.ss1.base_addr - region_bo->offset,
+			      I915_GEM_DOMAIN_RENDER,
+			      I915_GEM_DOMAIN_RENDER);
+   }
+}
+
+static void
+prepare_wm_surfaces(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   int i;
+   int nr_surfaces = 0;
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+	 struct intel_region *region = irb ? irb->region : NULL;
+
+	 brw_add_validated_bo(brw, region->buffer);
+	 nr_surfaces = SURF_INDEX_DRAW(i) + 1;
+      }
+   }
+
+   if (brw->wm.const_bo) {
+      brw_add_validated_bo(brw, brw->wm.const_bo);
+      nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
+   }
+
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+      struct gl_texture_object *tObj = texUnit->_Current;
+      struct intel_texture_object *intelObj = intel_texture_object(tObj);
+
+      if (texUnit->_ReallyEnabled) {
+	 brw_add_validated_bo(brw, intelObj->mt->region->buffer);
+	 nr_surfaces = SURF_INDEX_TEXTURE(i) + 1;
+      }
+   }
+
+   /* Have to update this in our prepare, since the unit's prepare
+    * relies on it.
+    */
+   if (brw->wm.nr_surfaces != nr_surfaces) {
+      brw->wm.nr_surfaces = nr_surfaces;
+      brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+   }
+}
+
+/**
+ * Constructs the set of surface state objects pointed to by the
+ * binding table.
+ */
+static void
+upload_wm_surfaces(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+
+   /* _NEW_BUFFERS | _NEW_COLOR */
+   /* Update surfaces for drawing buffers */
+   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+         brw_update_renderbuffer_surface(brw,
+					 ctx->DrawBuffer->_ColorDrawBuffers[i],
+					 i);
+      }
+   } else {
+      brw_update_renderbuffer_surface(brw, NULL, 0);
+   }
+
+   /* Update surfaces for textures */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+      const GLuint surf = SURF_INDEX_TEXTURE(i);
+
+      /* _NEW_TEXTURE */
+      if (texUnit->_ReallyEnabled) {
+	 brw_update_texture_surface(ctx, i);
+      } else {
+         drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+         brw->wm.surf_bo[surf] = NULL;
+      }
+   }
+
+   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+}
+
+const struct brw_tracked_state brw_wm_surfaces = {
+   .dirty = {
+      .mesa = (_NEW_COLOR |
+               _NEW_TEXTURE |
+               _NEW_BUFFERS),
+      .brw = (BRW_NEW_BATCH),
+      .cache = 0
+   },
+   .prepare = prepare_wm_surfaces,
+   .emit = upload_wm_surfaces,
+};
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_wm_upload_binding_table(struct brw_context *brw)
+{
+   uint32_t *bind;
+   int i;
+
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF,
+			  32, &brw->wm.bind_bo, &brw->wm.bind_bo_offset);
+
+   for (i = 0; i < BRW_WM_MAX_SURF; i++) {
+      /* BRW_NEW_WM_SURFACES */
+      bind[i] = brw->wm.surf_offset[i];
+      if (brw->wm.surf_bo[i]) {
+	 bind[i] = brw->wm.surf_offset[i];
+      } else {
+	 bind[i] = 0;
+      }
+   }
+
+   brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE;
+}
+
+const struct brw_tracked_state brw_wm_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+	      BRW_NEW_WM_SURFACES),
+      .cache = 0
+   },
+   .emit = brw_wm_upload_binding_table,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
new file mode 100644
index 0000000000..f7acad6912
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "intel_batchbuffer.h"
+#include "main/macros.h"
+
+struct gen6_blend_state_key {
+   GLboolean color_blend, alpha_enabled;
+   GLboolean dither;
+
+   GLenum logic_op;
+
+   GLenum blend_eq_rgb, blend_eq_a;
+   GLenum blend_src_rgb, blend_src_a;
+   GLenum blend_dst_rgb, blend_dst_a;
+
+   GLenum alpha_func;
+};
+
+static void
+blend_state_populate_key(struct brw_context *brw,
+			 struct gen6_blend_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_COLOR */
+   if (ctx->Color._LogicOpEnabled)
+      key->logic_op = ctx->Color.LogicOp;
+   else
+      key->logic_op = GL_COPY;
+
+   /* _NEW_COLOR */
+   key->color_blend = ctx->Color.BlendEnabled;
+   if (key->color_blend) {
+      key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
+      key->blend_eq_a = ctx->Color.BlendEquationA;
+      key->blend_src_rgb = ctx->Color.BlendSrcRGB;
+      key->blend_dst_rgb = ctx->Color.BlendDstRGB;
+      key->blend_src_a = ctx->Color.BlendSrcA;
+      key->blend_dst_a = ctx->Color.BlendDstA;
+   }
+
+   /* _NEW_COLOR */
+   key->alpha_enabled = ctx->Color.AlphaEnabled;
+   if (key->alpha_enabled) {
+      key->alpha_func = ctx->Color.AlphaFunc;
+   }
+
+   /* _NEW_COLOR */
+   key->dither = ctx->Color.DitherFlag;
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static drm_intel_bo *
+blend_state_create_from_key(struct brw_context *brw,
+			    struct gen6_blend_state_key *key)
+{
+   struct gen6_blend_state blend;
+   drm_intel_bo *bo;
+
+   memset(&blend, 0, sizeof(blend));
+
+   if (key->logic_op != GL_COPY) {
+      blend.blend1.logic_op_enable = 1;
+      blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op);
+   } else if (key->color_blend) {
+      GLenum eqRGB = key->blend_eq_rgb;
+      GLenum eqA = key->blend_eq_a;
+      GLenum srcRGB = key->blend_src_rgb;
+      GLenum dstRGB = key->blend_dst_rgb;
+      GLenum srcA = key->blend_src_a;
+      GLenum dstA = key->blend_dst_a;
+
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+	 srcRGB = dstRGB = GL_ONE;
+      }
+
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+	 srcA = dstA = GL_ONE;
+      }
+
+      blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB);
+      blend.blend0.blend_func = brw_translate_blend_equation(eqRGB);
+
+      blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA);
+      blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA);
+
+      blend.blend0.blend_enable = 1;
+      blend.blend0.ia_blend_enable = (srcA != srcRGB ||
+				      dstA != dstRGB ||
+				      eqA != eqRGB);
+   }
+
+   if (key->alpha_enabled) {
+      blend.blend1.alpha_test_enable = 1;
+      blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+
+   }
+
+   if (key->dither) {
+      blend.blend1.dither_enable = 1;
+      blend.blend1.y_dither_offset = 0;
+      blend.blend1.x_dither_offset = 0;
+   }
+
+   bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &blend, sizeof(blend));
+
+   return bo;
+}
+
+static void
+prepare_blend_state(struct brw_context *brw)
+{
+   struct gen6_blend_state_key key;
+
+   blend_state_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->cc.blend_state_bo);
+   brw->cc.blend_state_bo = brw_search_cache(&brw->cache, BRW_BLEND_STATE,
+					     &key, sizeof(key),
+					     NULL, 0,
+					     NULL);
+
+   if (brw->cc.blend_state_bo == NULL)
+      brw->cc.blend_state_bo = blend_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_blend_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_blend_state,
+};
+
+struct gen6_color_calc_state_key {
+   GLubyte blend_constant_color[4];
+   GLclampf alpha_ref;
+   GLubyte stencil_ref[2];
+};
+
+static void
+color_calc_state_populate_key(struct brw_context *brw,
+			      struct gen6_color_calc_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_STENCIL */
+   if (ctx->Stencil._Enabled) {
+      const unsigned back = ctx->Stencil._BackFace;
+
+      key->stencil_ref[0] = ctx->Stencil.Ref[0];
+      if (ctx->Stencil._TestTwoSide)
+	 key->stencil_ref[1] = ctx->Stencil.Ref[back];
+   }
+
+   /* _NEW_COLOR */
+   if (ctx->Color.AlphaEnabled)
+      key->alpha_ref = ctx->Color.AlphaRef;
+
+   key->blend_constant_color[0] = ctx->Color.BlendColor[0];
+   key->blend_constant_color[1] = ctx->Color.BlendColor[1];
+   key->blend_constant_color[2] = ctx->Color.BlendColor[2];
+   key->blend_constant_color[3] = ctx->Color.BlendColor[3];
+}
+
+/**
+ * Creates the state cache entry for the given CC state key.
+ */
+static drm_intel_bo *
+color_calc_state_create_from_key(struct brw_context *brw,
+				 struct gen6_color_calc_state_key *key)
+{
+   struct gen6_color_calc_state cc;
+   drm_intel_bo *bo;
+
+   memset(&cc, 0, sizeof(cc));
+
+   cc.cc0.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+   UNCLAMPED_FLOAT_TO_UBYTE(cc.cc1.alpha_ref_fi.ui, key->alpha_ref);
+
+   cc.cc0.stencil_ref = key->stencil_ref[0];
+   cc.cc0.bf_stencil_ref = key->stencil_ref[1];
+
+   cc.constant_r = key->blend_constant_color[0];
+   cc.constant_g = key->blend_constant_color[1];
+   cc.constant_b = key->blend_constant_color[2];
+   cc.constant_a = key->blend_constant_color[3];
+
+   bo = brw_upload_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &cc, sizeof(cc));
+
+   return bo;
+}
+
+static void
+prepare_color_calc_state(struct brw_context *brw)
+{
+   struct gen6_color_calc_state_key key;
+
+   color_calc_state_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->cc.state_bo);
+   brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE,
+				       &key, sizeof(key),
+				       NULL, 0,
+				       NULL);
+
+   if (brw->cc.state_bo == NULL)
+      brw->cc.state_bo = color_calc_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_color_calc_state = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_color_calc_state,
+};
+
+static void upload_cc_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2));
+   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_cc_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->cc.state_bo);
+   brw_add_validated_bo(brw, brw->cc.blend_state_bo);
+   brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo);
+}
+
+const struct brw_tracked_state gen6_cc_state_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_BLEND_STATE |
+		CACHE_NEW_COLOR_CALC_STATE |
+		CACHE_NEW_DEPTH_STENCIL_STATE)
+   },
+   .prepare = prepare_cc_state_pointers,
+   .emit = upload_cc_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
new file mode 100644
index 0000000000..acc4b7f101
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_clip_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   uint32_t depth_clamp = 0;
+   uint32_t provoking;
+
+   if (!ctx->Transform.DepthClamp)
+      depth_clamp = GEN6_CLIP_Z_TEST;
+
+   if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) {
+      provoking = 0;
+   } else {
+      provoking =
+	 (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_CLIP_LINE_PROVOKE_SHIFT);
+   }
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2));
+   OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE);
+   OUT_BATCH(GEN6_CLIP_ENABLE |
+	     GEN6_CLIP_API_OGL |
+	     GEN6_CLIP_MODE_NORMAL |
+	     GEN6_CLIP_XY_TEST |
+	     depth_clamp |
+	     provoking);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_clip_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_clip_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_depthstencil.c b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
new file mode 100644
index 0000000000..d9eca9af35
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_depthstencil.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+struct brw_depth_stencil_state_key {
+   GLenum depth_func;
+   GLboolean depth_test, depth_write;
+   GLboolean stencil, stencil_two_side;
+   GLenum stencil_func[2], stencil_fail_op[2];
+   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
+   GLubyte stencil_write_mask[2], stencil_test_mask[2];
+};
+
+static void
+depth_stencil_state_populate_key(struct brw_context *brw,
+				 struct brw_depth_stencil_state_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const unsigned back = ctx->Stencil._BackFace;
+
+   memset(key, 0, sizeof(*key));
+
+   /* _NEW_STENCIL */
+   key->stencil = ctx->Stencil._Enabled;
+   key->stencil_two_side = ctx->Stencil._TestTwoSide;
+
+   if (key->stencil) {
+      key->stencil_func[0] = ctx->Stencil.Function[0];
+      key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
+      key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
+      key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
+      key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
+      key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
+   }
+   if (key->stencil_two_side) {
+      key->stencil_func[1] = ctx->Stencil.Function[back];
+      key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
+      key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
+      key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
+      key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
+      key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
+   }
+
+   key->depth_test = ctx->Depth.Test;
+   if (key->depth_test) {
+      key->depth_func = ctx->Depth.Func;
+      key->depth_write = ctx->Depth.Mask;
+   }
+}
+
+/**
+ * Creates the state cache entry for the given DEPTH_STENCIL_STATE state key.
+ */
+static drm_intel_bo *
+depth_stencil_state_create_from_key(struct brw_context *brw,
+				    struct brw_depth_stencil_state_key *key)
+{
+   struct gen6_depth_stencil_state ds;
+   drm_intel_bo *bo;
+
+   memset(&ds, 0, sizeof(ds));
+
+   /* _NEW_STENCIL */
+   if (key->stencil) {
+      ds.ds0.stencil_enable = 1;
+      ds.ds0.stencil_func =
+	 intel_translate_compare_func(key->stencil_func[0]);
+      ds.ds0.stencil_fail_op =
+	 intel_translate_stencil_op(key->stencil_fail_op[0]);
+      ds.ds0.stencil_pass_depth_fail_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+      ds.ds0.stencil_pass_depth_pass_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+      ds.ds1.stencil_write_mask = key->stencil_write_mask[0];
+      ds.ds1.stencil_test_mask = key->stencil_test_mask[0];
+
+      if (key->stencil_two_side) {
+	 ds.ds0.bf_stencil_enable = 1;
+	 ds.ds0.bf_stencil_func =
+	    intel_translate_compare_func(key->stencil_func[1]);
+	 ds.ds0.bf_stencil_fail_op =
+	    intel_translate_stencil_op(key->stencil_fail_op[1]);
+	 ds.ds0.bf_stencil_pass_depth_fail_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+	 ds.ds0.bf_stencil_pass_depth_pass_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+	 ds.ds1.bf_stencil_write_mask = key->stencil_write_mask[1];
+	 ds.ds1.bf_stencil_test_mask = key->stencil_test_mask[1];
+      }
+
+      /* Not really sure about this:
+       */
+      if (key->stencil_write_mask[0] ||
+	  (key->stencil_two_side && key->stencil_write_mask[1]))
+	 ds.ds0.stencil_write_enable = 1;
+   }
+
+   /* _NEW_DEPTH */
+   if (key->depth_test) {
+      ds.ds2.depth_test_enable = 1;
+      ds.ds2.depth_test_func = intel_translate_compare_func(key->depth_func);
+      ds.ds2.depth_write_enable = key->depth_write;
+   }
+
+   bo = brw_upload_cache(&brw->cache, BRW_DEPTH_STENCIL_STATE,
+			 key, sizeof(*key),
+			 NULL, 0,
+			 &ds, sizeof(ds));
+
+   return bo;
+}
+
+static void
+prepare_depth_stencil_state(struct brw_context *brw)
+{
+   struct brw_depth_stencil_state_key key;
+
+   depth_stencil_state_populate_key(brw, &key);
+
+   drm_intel_bo_unreference(brw->cc.depth_stencil_state_bo);
+   brw->cc.depth_stencil_state_bo = brw_search_cache(&brw->cache,
+						     BRW_DEPTH_STENCIL_STATE,
+						     &key, sizeof(key),
+						     NULL, 0,
+						     NULL);
+
+   if (brw->cc.depth_stencil_state_bo == NULL)
+      brw->cc.depth_stencil_state_bo =
+	 depth_stencil_state_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state gen6_depth_stencil_state = {
+   .dirty = {
+      .mesa = _NEW_DEPTH | _NEW_STENCIL,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_depth_stencil_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
new file mode 100644
index 0000000000..cefc93ba48
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_gs_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Disable all the constant buffers. */
+   BEGIN_BATCH(5);
+   OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   if (brw->gs.prog_bo) {
+      BEGIN_BATCH(7);
+      OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+      OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+      OUT_BATCH(GEN6_GS_SPF_MODE |
+		(0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+		(brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+		GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(GEN6_GS_ENABLE);
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(7);
+      OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2));
+      OUT_BATCH(0); /* prog_bo */
+      OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+		(0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+      OUT_BATCH(0); /* scratch space base offset */
+      OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+		(0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+		(0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+      OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+		GEN6_GS_STATISTICS_ENABLE |
+		GEN6_GS_RENDERING_ENABLE);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state gen6_gs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_CONTEXT),
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .emit = upload_gs_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
new file mode 100644
index 0000000000..ab8e7516d2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_sampler_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 |
+	     VS_SAMPLER_STATE_CHANGE |
+	     GS_SAMPLER_STATE_CHANGE |
+	     PS_SAMPLER_STATE_CHANGE |
+	     (4 - 2));
+   OUT_BATCH(0); /* VS */
+   OUT_BATCH(0); /* GS */
+   if (brw->wm.sampler_bo)
+      OUT_RELOC(brw->wm.sampler_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   else
+      OUT_BATCH(0);
+
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void
+prepare_sampler_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->wm.sampler_bo);
+}
+
+const struct brw_tracked_state gen6_sampler_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SAMPLER
+   },
+   .prepare = prepare_sampler_state_pointers,
+   .emit = upload_sampler_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
new file mode 100644
index 0000000000..34a9dc234c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+static void
+prepare_scissor_state(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   struct gen6_scissor_state scissor;
+
+   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT */
+
+   /* The scissor only needs to handle the intersection of drawable and
+    * scissor rect.  Clipping to the boundaries of static shared buffers
+    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   if (render_to_fbo) {
+      /* texmemory: Y=0=bottom */
+      scissor.xmin = ctx->DrawBuffer->_Xmin;
+      scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor.ymin = ctx->DrawBuffer->_Ymin;
+      scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+   }
+   else {
+      /* memory: Y=0=top */
+      scissor.xmin = ctx->DrawBuffer->_Xmin;
+      scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+      scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+   }
+
+   drm_intel_bo_unreference(brw->sf.state_bo);
+   brw->sf.state_bo = brw_cache_data(&brw->cache, BRW_SF_UNIT,
+				     &scissor, sizeof(scissor));
+}
+
+const struct brw_tracked_state gen6_scissor_state = {
+   .dirty = {
+      .mesa = _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_scissor_state,
+};
+
+static void upload_scissor_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(2);
+   OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2));
+   OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+
+static void prepare_scissor_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+const struct brw_tracked_state gen6_scissor_state_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SF_UNIT
+   },
+   .prepare = prepare_scissor_state_pointers,
+   .emit = upload_scissor_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
new file mode 100644
index 0000000000..51940efb44
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+
+static uint32_t
+get_attr_override(struct brw_context *brw, int attr)
+{
+   uint32_t attr_override;
+   int attr_index = 0, i;
+
+   /* Find the source index (0 = first attribute after the 4D position)
+    * for this output attribute.  attr is currently a VERT_RESULT_* but should
+    * be FRAG_ATTRIB_*.
+    */
+   for (i = 0; i < attr; i++) {
+      if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i))
+	 attr_index++;
+   }
+   attr_override = attr_index;
+
+   return attr_index;
+}
+
+static void
+upload_sf_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   /* CACHE_NEW_VS_PROG */
+   uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+   /* This should probably be FS inputs read */
+   uint32_t num_outputs = brw_count_bits(brw->vs.prog_data->outputs_written);
+   uint32_t dw1, dw2, dw3, dw4;
+   int i;
+   /* _NEW_BUFFER */
+   GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   int attr = 0;
+
+   dw1 =
+      num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT |
+      (num_inputs + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
+      3 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
+   dw2 = GEN6_SF_VIEWPORT_TRANSFORM_ENABLE |
+      GEN6_SF_STATISTICS_ENABLE;
+   dw3 = 0;
+   dw4 = 0;
+
+   /* _NEW_POLYGON */
+   if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo)
+      dw2 |= GEN6_SF_WINDING_CCW;
+
+   /* _NEW_SCISSOR */
+   if (ctx->Scissor.Enabled)
+      dw3 |= GEN6_SF_SCISSOR_ENABLE;
+
+   /* _NEW_POLYGON */
+   if (ctx->Polygon.CullFlag) {
+      switch (ctx->Polygon.CullFaceMode) {
+      case GL_FRONT:
+	 dw3 |= GEN6_SF_CULL_FRONT;
+	 break;
+      case GL_BACK:
+	 dw3 |= GEN6_SF_CULL_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 dw3 |= GEN6_SF_CULL_BOTH;
+	 break;
+      default:
+	 assert(0);
+	 break;
+      }
+   } else {
+      dw3 |= GEN6_SF_CULL_NONE;
+   }
+
+   /* _NEW_LINE */
+   dw3 |= U_FIXED(CLAMP(ctx->Line.Width, 0.0, 7.99), 7) <<
+      GEN6_SF_LINE_WIDTH_SHIFT;
+   if (ctx->Line.SmoothFlag) {
+      dw3 |= GEN6_SF_LINE_AA_ENABLE;
+      dw3 |= GEN6_SF_LINE_AA_MODE_TRUE;
+      dw3 |= GEN6_SF_LINE_END_CAP_WIDTH_1_0;
+   }
+
+   /* _NEW_POINT */
+   if (ctx->Point._Attenuated)
+      dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH;
+
+   dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) <<
+      GEN6_SF_POINT_WIDTH_SHIFT;
+   if (render_to_fbo)
+      dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
+
+   /* _NEW_LIGHT */
+   if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
+      dw4 |=
+	 (2 << GEN6_SF_TRI_PROVOKE_SHIFT) |
+	 (2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT) |
+	 (1 << GEN6_SF_LINE_PROVOKE_SHIFT);
+   } else {
+      dw4 |=
+	 (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT);
+   }
+
+   BEGIN_BATCH(20);
+   OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2));
+   OUT_BATCH(dw1);
+   OUT_BATCH(dw2);
+   OUT_BATCH(dw3);
+   OUT_BATCH(dw4);
+   OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant.  copied from gen4 */
+   OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */
+   OUT_BATCH_F(0.0); /* XXX: global depth offset clamp */
+   for (i = 0; i < 8; i++) {
+      uint32_t attr_overrides = 0;
+
+      /* These should be generating FS inputs read instead of VS
+       * outputs written
+       */
+      for (; attr < 64; attr++) {
+	 if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr);
+	    attr++;
+	    break;
+	 }
+      }
+
+      for (; attr < 64; attr++) {
+	 if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(attr)) {
+	    attr_overrides |= get_attr_override(brw, attr) << 16;
+	    attr++;
+	    break;
+	 }
+      }
+      OUT_BATCH(attr_overrides);
+   }
+   OUT_BATCH(0); /* point sprite texcoord bitmask */
+   OUT_BATCH(0); /* constant interp bitmask */
+   OUT_BATCH(0); /* wrapshortest enables 0-7 */
+   OUT_BATCH(0); /* wrapshortest enables 8-15 */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_sf_state = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT |
+		_NEW_POLYGON |
+		_NEW_LINE |
+		_NEW_SCISSOR |
+		_NEW_BUFFERS),
+      .brw   = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_sf_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
new file mode 100644
index 0000000000..5445e4035a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/macros.h"
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+static void
+prepare_urb( struct brw_context *brw )
+{
+   brw->urb.nr_vs_entries = 24;
+   if (brw->gs.prog_bo)
+      brw->urb.nr_gs_entries = 4;
+   else
+      brw->urb.nr_gs_entries = 0;
+   /* CACHE_NEW_VS_PROG */
+   brw->urb.vs_size = MIN2(brw->vs.prog_data->urb_entry_size, 1);
+
+   /* Check that the number of URB rows (8 floats each) allocated is less
+    * than the URB space.
+    */
+   assert((brw->urb.nr_vs_entries +
+	   brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024);
+}
+
+static void
+upload_urb(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   assert(brw->urb.nr_vs_entries % 4 == 0);
+   assert(brw->urb.nr_gs_entries % 4 == 0);
+   /* GS requirement */
+   assert(!brw->gs.prog_bo || brw->urb.vs_size < 5);
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(CMD_URB << 16 | (3 - 2));
+   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
+	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
+   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_urb = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .prepare = prepare_urb,
+   .emit = upload_urb,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
new file mode 100644
index 0000000000..301c68e7f9
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+
+/* The clip VP defines the guardband region where expensive clipping is skipped
+ * and fragments are allowed to be generated and clipped out cheaply by the SF.
+ *
+ * By setting it to NDC bounds of [-1,1], we don't do GB clipping.  It's
+ * supposed to cause seams to become visible in apps due to shared edges taking
+ * different clip/no clip paths depending on whether the rest of the prim ends
+ * up in the guardband or not.
+ */
+static void
+prepare_clip_vp(struct brw_context *brw)
+{
+   struct brw_clipper_viewport vp;
+
+   vp.xmin = -1.0;
+   vp.xmax = 1.0;
+   vp.ymin = -1.0;
+   vp.ymax = 1.0;
+
+   drm_intel_bo_unreference(brw->clip.vp_bo);
+   brw->clip.vp_bo = brw_cache_data(&brw->cache, BRW_CLIP_VP,
+				    &vp, sizeof(vp));
+}
+
+const struct brw_tracked_state gen6_clip_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT, /* XXX: not really, but we need nonzero */
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_clip_vp,
+};
+
+static void
+prepare_sf_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   struct brw_sf_viewport sfv;
+   GLfloat y_scale, y_bias;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   memset(&sfv, 0, sizeof(sfv));
+
+   /* _NEW_BUFFERS */
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+
+   /* _NEW_VIEWPORT */
+   sfv.viewport.m00 = v[MAT_SX];
+   sfv.viewport.m11 = v[MAT_SY] * y_scale;
+   sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+   sfv.viewport.m30 = v[MAT_TX];
+   sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+
+   drm_intel_bo_unreference(brw->sf.vp_bo);
+   brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP,
+				  &sfv, sizeof(sfv));
+}
+
+const struct brw_tracked_state gen6_sf_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT | _NEW_BUFFERS,
+      .brw = 0,
+      .cache = 0,
+   },
+   .prepare = prepare_sf_vp,
+};
+
+static void prepare_viewport_state_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+}
+
+static void upload_viewport_state_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
+	     GEN6_CC_VIEWPORT_MODIFY |
+	     GEN6_SF_VIEWPORT_MODIFY |
+	     GEN6_CLIP_VIEWPORT_MODIFY);
+   OUT_RELOC(brw->clip.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->sf.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->cc.vp_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_viewport_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_CLIP_VP |
+		CACHE_NEW_SF_VP |
+		CACHE_NEW_CC_VP)
+   },
+   .prepare = prepare_viewport_state_pointers,
+   .emit = upload_viewport_state_pointers,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
new file mode 100644
index 0000000000..5916a13994
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_vs_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   const struct brw_vertex_program *vp =
+      brw_vertex_program_const(brw->vertex_program);
+   unsigned int nr_params = vp->program.Base.Parameters->NumParameters;
+   drm_intel_bo *constant_bo;
+   int i;
+
+   if (vp->use_const_buffer || nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      if (brw->vertex_program->IsNVProgram)
+	 _mesa_load_tracked_matrices(ctx);
+
+      /* Updates the ParamaterValues[i] pointers for all parameters of the
+       * basic type of PROGRAM_STATE_VAR.
+       */
+      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+
+      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "VS constant_bo",
+				       nr_params * 4 * sizeof(float),
+				       4096);
+      drm_intel_gem_bo_map_gtt(constant_bo);
+      for (i = 0; i < nr_params; i++) {
+	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+		vp->program.Base.Parameters->ParameterValues[i],
+		4 * sizeof(float));
+      }
+      drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 |
+		GEN6_CONSTANT_BUFFER_0_ENABLE |
+		(5 - 2));
+      OUT_RELOC(constant_bo,
+		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+		ALIGN(nr_params, 2) / 2 - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      drm_intel_bo_unreference(constant_bo);
+   }
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   BEGIN_BATCH(6);
+   OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2));
+   OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
+	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+	     (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+	     (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
+   OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) |
+	     GEN6_VS_STATISTICS_ENABLE |
+	     GEN6_VS_ENABLE);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_vs_state = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_NR_VS_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_CONTEXT),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = upload_vs_state,
+};
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
new file mode 100644
index 0000000000..ed1a72f03b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+
+static void
+upload_wm_state(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   unsigned int nr_params = fp->program.Base.Parameters->NumParameters;
+   drm_intel_bo *constant_bo;
+   int i;
+   uint32_t dw2, dw4, dw5, dw6;
+
+   if (fp->use_const_buffer || nr_params == 0) {
+      /* Disable the push constant buffers. */
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+   } else {
+      /* Updates the ParamaterValues[i] pointers for all parameters of the
+       * basic type of PROGRAM_STATE_VAR.
+       */
+      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+      constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo",
+				       nr_params * 4 * sizeof(float),
+				       4096);
+      drm_intel_gem_bo_map_gtt(constant_bo);
+      for (i = 0; i < nr_params; i++) {
+	 memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
+		fp->program.Base.Parameters->ParameterValues[i],
+		4 * sizeof(float));
+      }
+      drm_intel_gem_bo_unmap_gtt(constant_bo);
+
+      BEGIN_BATCH(5);
+      OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 |
+		GEN6_CONSTANT_BUFFER_0_ENABLE |
+		(5 - 2));
+      OUT_RELOC(constant_bo,
+		I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
+		ALIGN(nr_params, 2) / 2 - 1);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      ADVANCE_BATCH();
+
+      drm_intel_bo_unreference(constant_bo);
+   }
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   dw2 = dw4 = dw5 = dw6 = 0;
+   dw4 |= GEN6_WM_STATISTICS_ENABLE;
+   dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
+   dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
+
+   /* BRW_NEW_NR_SURFACES */
+   dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
+
+   /* CACHE_NEW_SAMPLER */
+   dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
+   dw4 |= (brw->wm.prog_data->first_curbe_grf <<
+	   GEN6_WM_DISPATCH_START_GRF_SHIFT_0);
+
+   dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT;
+   dw5 |= GEN6_WM_DISPATCH_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->isGLSL)
+      dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
+   else
+      dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
+
+   /* _NEW_LINE */
+   if (ctx->Line.StippleFlag)
+      dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
+
+   /* _NEW_POLYGONSTIPPLE */
+   if (ctx->Polygon.StippleFlag)
+      dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+      dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
+   if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+      dw5 |= GEN6_WM_COMPUTED_DEPTH;
+
+   /* _NEW_COLOR */
+   if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+      dw5 |= GEN6_WM_KILL_ENABLE;
+
+   /* This should probably be FS inputs read */
+   dw6 |= brw_count_bits(brw->vs.prog_data->outputs_written) <<
+      GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
+
+   BEGIN_BATCH(9);
+   OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2));
+   OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_BATCH(dw2);
+   OUT_BATCH(0); /* scratch space base offset */
+   OUT_BATCH(dw4);
+   OUT_BATCH(dw5);
+   OUT_BATCH(dw6);
+   OUT_BATCH(0); /* kernel 1 pointer */
+   OUT_BATCH(0); /* kernel 2 pointer */
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+const struct brw_tracked_state gen6_wm_state = {
+   .dirty = {
+      .mesa  = _NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_NR_WM_SURFACES |
+		BRW_NEW_URB_FENCE |
+		BRW_NEW_BATCH),
+      .cache = CACHE_NEW_SAMPLER
+   },
+   .emit = upload_wm_state,
+};
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
new file mode 120000
index 0000000000..d38cdf31cc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -0,0 +1 @@
+../intel/intel_batchbuffer.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_blit.c b/src/mesa/drivers/dri/i965/intel_blit.c
new file mode 120000
index 0000000000..dd6c8d17c2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -0,0 +1 @@
+../intel/intel_blit.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
new file mode 120000
index 0000000000..e06dd3c8d3
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c
@@ -0,0 +1 @@
+../intel/intel_buffer_objects.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_buffers.c b/src/mesa/drivers/dri/i965/intel_buffers.c
new file mode 120000
index 0000000000..c86daa49f4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_buffers.c
@@ -0,0 +1 @@
+../intel/intel_buffers.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_clear.c b/src/mesa/drivers/dri/i965/intel_clear.c
new file mode 120000
index 0000000000..9a2a742a0d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_clear.c
@@ -0,0 +1 @@
+../intel/intel_clear.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_context.c b/src/mesa/drivers/dri/i965/intel_context.c
new file mode 120000
index 0000000000..27a1cbb255
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_context.c
@@ -0,0 +1 @@
+../intel/intel_context.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_decode.c b/src/mesa/drivers/dri/i965/intel_decode.c
new file mode 120000
index 0000000000..f671b6cbb1
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_decode.c
@@ -0,0 +1 @@
+../intel/intel_decode.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c
new file mode 120000
index 0000000000..a2f3e8cd20
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -0,0 +1 @@
+../intel/intel_extensions.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_extensions_es2.c b/src/mesa/drivers/dri/i965/intel_extensions_es2.c
new file mode 120000
index 0000000000..0ec1ceee78
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_extensions_es2.c
@@ -0,0 +1 @@
+../intel/intel_extensions_es2.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c b/src/mesa/drivers/dri/i965/intel_fbo.c
new file mode 120000
index 0000000000..a19f86dcc5
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -0,0 +1 @@
+../intel/intel_fbo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
new file mode 120000
index 0000000000..242fed0b6a
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -0,0 +1 @@
+../intel/intel_mipmap_tree.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_pixel.c b/src/mesa/drivers/dri/i965/intel_pixel.c
new file mode 120000
index 0000000000..d733c5e874
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_pixel.c
@@ -0,0 +1 @@
+../intel/intel_pixel.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
new file mode 120000
index 0000000000..9085c7b039
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -0,0 +1 @@
+../intel/intel_pixel_bitmap.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_copy.c b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
new file mode 120000
index 0000000000..ee43360590
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_pixel_copy.c
@@ -0,0 +1 @@
+../intel/intel_pixel_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_draw.c b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
new file mode 120000
index 0000000000..8431a24edf
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_pixel_draw.c
@@ -0,0 +1 @@
+../intel/intel_pixel_draw.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
new file mode 120000
index 0000000000..cc4589f4d4
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -0,0 +1 @@
+../intel/intel_pixel_read.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_regions.c b/src/mesa/drivers/dri/i965/intel_regions.c
new file mode 120000
index 0000000000..89b2f15c10
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_regions.c
@@ -0,0 +1 @@
+../intel/intel_regions.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
new file mode 120000
index 0000000000..f2db48272b
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -0,0 +1 @@
+../intel/intel_screen.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_span.c b/src/mesa/drivers/dri/i965/intel_span.c
new file mode 120000
index 0000000000..05e5e8e583
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_span.c
@@ -0,0 +1 @@
+../intel/intel_span.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_state.c b/src/mesa/drivers/dri/i965/intel_state.c
new file mode 120000
index 0000000000..519672fc35
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_state.c
@@ -0,0 +1 @@
+../intel/intel_state.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_structs.h b/src/mesa/drivers/dri/i965/intel_structs.h
new file mode 100644
index 0000000000..522e3bd92c
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+   GLuint length:8;
+   GLuint pad0:3;
+   GLuint dst_tiled:1;
+   GLuint pad1:8;
+   GLuint write_rgb:1;
+   GLuint write_alpha:1;
+   GLuint opcode:7;
+   GLuint client:3;
+};
+
+   
+struct br13 {
+   GLint dest_pitch:16;
+   GLuint rop:8;
+   GLuint color_depth:2;
+   GLuint pad1:3;
+   GLuint mono_source_transparency:1;
+   GLuint clipping_enable:1;
+   GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+   GLuint color;
+};
+
+struct xy_src_copy_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+
+   struct {
+      GLuint src_x1:16;
+      GLuint src_y1:16;
+   } dw5;
+
+   struct {
+      GLint src_pitch:16;
+      GLuint pad:16;
+   } dw6;
+   
+   GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint clip_x1:16;
+      GLuint clip_y1:16;
+   } dw2;
+
+   struct {
+      GLuint clip_x2:16;
+      GLuint clip_y2:16;
+   } dw3;
+      
+   GLuint dest_base_addr;
+   GLuint background_color;
+   GLuint foreground_color;
+   GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+   struct {
+      GLuint length:8;
+      GLuint pad2:3;
+      GLuint dst_tiled:1;
+      GLuint pad1:4;
+      GLuint byte_packed:1;
+      GLuint pad0:5;
+      GLuint opcode:7;
+      GLuint client:3;
+   } dw0;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw1;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw2;   
+
+   /* Src bitmap data follows as inline dwords.
+    */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c b/src/mesa/drivers/dri/i965/intel_syncobj.c
new file mode 120000
index 0000000000..0b2e56ab24
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_syncobj.c
@@ -0,0 +1 @@
+../intel/intel_syncobj.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
new file mode 120000
index 0000000000..d77ce749a3
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex.c
@@ -0,0 +1 @@
+../intel/intel_tex.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
new file mode 120000
index 0000000000..87196c5d1e
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -0,0 +1 @@
+../intel/intel_tex_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_format.c b/src/mesa/drivers/dri/i965/intel_tex_format.c
new file mode 120000
index 0000000000..3415f75470
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_format.c
@@ -0,0 +1 @@
+../intel/intel_tex_format.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
new file mode 120000
index 0000000000..567abe4974
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -0,0 +1 @@
+../intel/intel_tex_image.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_layout.c b/src/mesa/drivers/dri/i965/intel_tex_layout.c
new file mode 120000
index 0000000000..fe61b44194
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_layout.c
@@ -0,0 +1 @@
+../intel/intel_tex_layout.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
new file mode 120000
index 0000000000..b3a8a3d7ca
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -0,0 +1 @@
+../intel/intel_tex_subimage.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/i965/intel_tex_validate.c b/src/mesa/drivers/dri/i965/intel_tex_validate.c
new file mode 120000
index 0000000000..41a75674c2
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/intel_tex_validate.c
@@ -0,0 +1 @@
+../intel/intel_tex_validate.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
new file mode 100644
index 0000000000..698445c526
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c
@@ -0,0 +1,289 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_decode.h"
+#include "intel_reg.h"
+#include "intel_bufmgr.h"
+#include "intel_buffers.h"
+
+void
+intel_batchbuffer_reset(struct intel_batchbuffer *batch)
+{
+   struct intel_context *intel = batch->intel;
+
+   if (batch->buf != NULL) {
+      drm_intel_bo_unreference(batch->buf);
+      batch->buf = NULL;
+   }
+
+   batch->buf = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",
+				   intel->maxBatchSize, 4096);
+   batch->map = batch->buffer;
+   batch->size = intel->maxBatchSize;
+   batch->ptr = batch->map;
+   batch->reserved_space = BATCH_RESERVED;
+   batch->dirty_state = ~0;
+   batch->state_batch_offset = batch->size;
+}
+
+struct intel_batchbuffer *
+intel_batchbuffer_alloc(struct intel_context *intel)
+{
+   struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
+
+   batch->intel = intel;
+   batch->buffer = malloc(intel->maxBatchSize);
+   intel_batchbuffer_reset(batch);
+
+   return batch;
+}
+
+void
+intel_batchbuffer_free(struct intel_batchbuffer *batch)
+{
+   free (batch->buffer);
+   drm_intel_bo_unreference(batch->buf);
+   batch->buf = NULL;
+   free(batch);
+}
+
+
+
+/* TODO: Push this whole function into bufmgr.
+ */
+static void
+do_flush_locked(struct intel_batchbuffer *batch, GLuint used)
+{
+   struct intel_context *intel = batch->intel;
+   int ret = 0;
+   int x_off = 0, y_off = 0;
+
+   drm_intel_bo_subdata(batch->buf, 0, used, batch->buffer);
+   if (batch->state_batch_offset != batch->size) {
+      drm_intel_bo_subdata(batch->buf,
+			   batch->state_batch_offset,
+			   batch->size - batch->state_batch_offset,
+			   batch->buffer + batch->state_batch_offset);
+   }
+
+   batch->ptr = NULL;
+
+   if (!intel->no_hw) {
+      drm_intel_bo_exec(batch->buf, used, NULL, 0,
+			(x_off & 0xffff) | (y_off << 16));
+   }
+
+   if (INTEL_DEBUG & DEBUG_BATCH) {
+      drm_intel_bo_map(batch->buf, GL_FALSE);
+      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+		   intel->intelScreen->deviceID);
+      drm_intel_bo_unmap(batch->buf);
+
+      if (intel->vtbl.debug_batch != NULL)
+	 intel->vtbl.debug_batch(intel);
+   }
+
+   if (ret != 0) {
+      exit(1);
+   }
+   intel->vtbl.new_batch(intel);
+}
+
+void
+_intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file,
+			 int line)
+{
+   struct intel_context *intel = batch->intel;
+   GLuint used = batch->ptr - batch->map;
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      drm_intel_bo_reference(intel->first_post_swapbuffers_batch);
+   }
+
+   if (used == 0)
+      return;
+
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+	      used);
+
+   batch->reserved_space = 0;
+
+   if (intel->always_flush_cache) {
+      intel_batchbuffer_emit_mi_flush(batch);
+      used = batch->ptr - batch->map;
+   }
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
+   }
+
+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END;
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+   assert (used <= batch->buf->size);
+
+   /* Workaround for recursive batchbuffer flushing: If the window is
+    * moved, we can get into a case where we try to flush during a
+    * flush.  What happens is that when we try to grab the lock for
+    * the first flush, we detect that the window moved which then
+    * causes another flush (from the intel_draw_buffer() call in
+    * intelUpdatePageFlipping()).  To work around this we reset the
+    * batchbuffer tail pointer before trying to get the lock.  This
+    * prevent the nested buffer flush, but a better fix would be to
+    * avoid that in the first place. */
+   batch->ptr = batch->map;
+
+   if (intel->vtbl.finish_batch)
+      intel->vtbl.finish_batch(intel);
+
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!intel->no_batch_wrap);
+
+   do_flush_locked(batch, used);
+
+   if (INTEL_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "waiting for idle\n");
+      drm_intel_bo_map(batch->buf, GL_TRUE);
+      drm_intel_bo_unmap(batch->buf);
+   }
+
+   /* Reset the buffer:
+    */
+   intel_batchbuffer_reset(batch);
+}
+
+
+/*  This is the only way buffers get added to the validate list.
+ */
+GLboolean
+intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+                             drm_intel_bo *buffer,
+                             uint32_t read_domains, uint32_t write_domain,
+			     uint32_t delta)
+{
+   int ret;
+
+   assert(delta < buffer->size);
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    printf ("bad relocation ptr %p map %p offset %d size %lu\n",
+	    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = drm_intel_bo_emit_reloc(batch->buf, batch->ptr - batch->map,
+				 buffer, delta,
+				 read_domains, write_domain);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
+GLboolean
+intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+				    drm_intel_bo *buffer,
+				    uint32_t read_domains, uint32_t write_domain,
+				    uint32_t delta)
+{
+   int ret;
+
+   assert(delta < buffer->size);
+
+   if (batch->ptr - batch->map > batch->buf->size)
+    printf ("bad relocation ptr %p map %p offset %d size %lu\n",
+	    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map,
+				       buffer, delta,
+				       read_domains, write_domain);
+
+   /*
+    * Using the old buffer offset, write in what the right data would
+    * be, in case the buffer doesn't move and we can short-circuit the
+    * relocation processing in the kernel
+    */
+   intel_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
+void
+intel_batchbuffer_data(struct intel_batchbuffer *batch,
+                       const void *data, GLuint bytes)
+{
+   assert((bytes & 3) == 0);
+   intel_batchbuffer_require_space(batch, bytes);
+   __memcpy(batch->ptr, data, bytes);
+   batch->ptr += bytes;
+}
+
+/* Emit a pipelined flush to either flush render and texture cache for
+ * reading from a FBO-drawn texture, or flush so that frontbuffer
+ * render appears on the screen in DRI1.
+ *
+ * This is also used for the always_flush_cache driconf debug option.
+ */
+void
+intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
+{
+   struct intel_context *intel = batch->intel;
+
+   if (intel->gen >= 6) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL);
+      OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
+		PIPE_CONTROL_WRITE_FLUSH |
+		PIPE_CONTROL_NO_WRITE);
+      OUT_BATCH(0); /* write address */
+      OUT_BATCH(0); /* write data */
+      ADVANCE_BATCH();
+   } else if (intel->gen >= 4) {
+      BEGIN_BATCH(4);
+      OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+		PIPE_CONTROL_WRITE_FLUSH |
+		PIPE_CONTROL_NO_WRITE);
+      OUT_BATCH(0); /* write address */
+      OUT_BATCH(0); /* write data */
+      OUT_BATCH(0); /* write data */
+      ADVANCE_BATCH();
+   } else {
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_FLUSH);
+      ADVANCE_BATCH();
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
new file mode 100644
index 0000000000..ae53f45511
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h
@@ -0,0 +1,173 @@
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+
+#include "main/mtypes.h"
+
+#include "intel_context.h"
+#include "intel_bufmgr.h"
+#include "intel_reg.h"
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+
+struct intel_batchbuffer
+{
+   struct intel_context *intel;
+
+   drm_intel_bo *buf;
+
+   GLubyte *buffer;
+
+   GLubyte *map;
+   GLubyte *ptr;
+
+   GLuint size;
+   uint32_t state_batch_offset;
+
+#ifdef DEBUG
+   /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
+   struct {
+      GLuint total;
+      GLubyte *start_ptr;
+   } emit;
+#endif
+
+   GLuint dirty_state;
+   GLuint reserved_space;
+};
+
+struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
+                                                  *intel);
+
+void intel_batchbuffer_free(struct intel_batchbuffer *batch);
+
+
+void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
+			      const char *file, int line);
+
+#define intel_batchbuffer_flush(batch) \
+	_intel_batchbuffer_flush(batch, __FILE__, __LINE__)
+
+void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+                            const void *data, GLuint bytes);
+
+void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
+                                     GLuint bytes);
+
+GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+                                       drm_intel_bo *buffer,
+				       uint32_t read_domains,
+				       uint32_t write_domain,
+				       uint32_t offset);
+GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch,
+					      drm_intel_bo *buffer,
+					      uint32_t read_domains,
+					      uint32_t write_domain,
+					      uint32_t offset);
+void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch);
+
+static INLINE uint32_t float_as_int(float f)
+{
+   union {
+      float f;
+      uint32_t d;
+   } fi;
+
+   fi.f = f;
+   return fi.d;
+}
+
+/* Inline functions - might actually be better off with these
+ * non-inlined.  Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLint
+intel_batchbuffer_space(struct intel_batchbuffer *batch)
+{
+   return (batch->state_batch_offset - batch->reserved_space) -
+      (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
+{
+#ifdef DEBUG
+   assert(intel_batchbuffer_space(batch) >= 4);
+#endif
+   *(GLuint *) (batch->ptr) = dword;
+   batch->ptr += 4;
+}
+
+static INLINE void
+intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f)
+{
+   intel_batchbuffer_emit_dword(batch, float_as_int(f));
+}
+
+static INLINE void
+intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
+                                GLuint sz)
+{
+#ifdef DEBUG
+   assert(sz < batch->size - 8);
+#endif
+   if (intel_batchbuffer_space(batch) < sz)
+      intel_batchbuffer_flush(batch);
+}
+
+static INLINE void
+intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n)
+{
+   intel_batchbuffer_require_space(batch, n * 4);
+#ifdef DEBUG
+   assert(batch->map);
+   assert(batch->emit.start_ptr == NULL);
+   batch->emit.total = n * 4;
+   batch->emit.start_ptr = batch->ptr;
+#endif
+}
+
+static INLINE void
+intel_batchbuffer_advance(struct intel_batchbuffer *batch)
+{
+#ifdef DEBUG
+   unsigned int _n = batch->ptr - batch->emit.start_ptr;
+   assert(batch->emit.start_ptr != NULL);
+   if (_n != batch->emit.total) {
+      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",
+	      _n, batch->emit.total);
+      abort();
+   }
+   batch->emit.start_ptr = NULL;
+#endif
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS
+
+#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n)
+#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
+#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f)
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
+   intel_batchbuffer_emit_reloc(intel->batch, buf,			\
+				read_domains, write_domain, delta);	\
+} while (0)
+#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do {	\
+   intel_batchbuffer_emit_reloc_fenced(intel->batch, buf,		\
+				       read_domains, write_domain, delta); \
+} while (0)
+
+#define ADVANCE_BATCH() intel_batchbuffer_advance(intel->batch);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c
new file mode 100644
index 0000000000..2c85ad3c36
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_blit.c
@@ -0,0 +1,512 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/mtypes.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_reg.h"
+#include "intel_regions.h"
+#include "intel_batchbuffer.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+static GLuint translate_raster_op(GLenum logicop)
+{
+   switch(logicop) {
+   case GL_CLEAR: return 0x00;
+   case GL_AND: return 0x88;
+   case GL_AND_REVERSE: return 0x44;
+   case GL_COPY: return 0xCC;
+   case GL_AND_INVERTED: return 0x22;
+   case GL_NOOP: return 0xAA;
+   case GL_XOR: return 0x66;
+   case GL_OR: return 0xEE;
+   case GL_NOR: return 0x11;
+   case GL_EQUIV: return 0x99;
+   case GL_INVERT: return 0x55;
+   case GL_OR_REVERSE: return 0xDD;
+   case GL_COPY_INVERTED: return 0x33;
+   case GL_OR_INVERTED: return 0xBB;
+   case GL_NAND: return 0x77;
+   case GL_SET: return 0xFF;
+   default: return 0;
+   }
+}
+
+static uint32_t
+br13_for_cpp(int cpp)
+{
+   switch (cpp) {
+   case 4:
+      return BR13_8888;
+      break;
+   case 2:
+      return BR13_565;
+      break;
+   case 1:
+      return BR13_8;
+      break;
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+/* Copy BitBlt
+ */
+GLboolean
+intelEmitCopyBlit(struct intel_context *intel,
+		  GLuint cpp,
+		  GLshort src_pitch,
+		  drm_intel_bo *src_buffer,
+		  GLuint src_offset,
+		  uint32_t src_tiling,
+		  GLshort dst_pitch,
+		  drm_intel_bo *dst_buffer,
+		  GLuint dst_offset,
+		  uint32_t dst_tiling,
+		  GLshort src_x, GLshort src_y,
+		  GLshort dst_x, GLshort dst_y,
+		  GLshort w, GLshort h,
+		  GLenum logic_op)
+{
+   GLuint CMD, BR13, pass = 0;
+   int dst_y2 = dst_y + h;
+   int dst_x2 = dst_x + w;
+   drm_intel_bo *aper_array[3];
+   BATCH_LOCALS;
+
+   /* Blits are in a different ringbuffer so we don't use them. */
+   if (intel->gen >= 6)
+      return GL_FALSE;
+
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+	 return GL_FALSE;
+      if (dst_tiling == I915_TILING_Y)
+	 return GL_FALSE;
+   }
+   if (src_tiling != I915_TILING_NONE) {
+      if (src_offset & 4095)
+	 return GL_FALSE;
+      if (src_tiling == I915_TILING_Y)
+	 return GL_FALSE;
+   }
+
+   /* do space check before going any further */
+   do {
+       aper_array[0] = intel->batch->buf;
+       aper_array[1] = dst_buffer;
+       aper_array[2] = src_buffer;
+
+       if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) {
+           intel_batchbuffer_flush(intel->batch);
+           pass++;
+       } else
+           break;
+   } while (pass < 2);
+
+   if (pass >= 2)
+      return GL_FALSE;
+
+   intel_batchbuffer_require_space(intel->batch, 8 * 4);
+   DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+       __FUNCTION__,
+       src_buffer, src_pitch, src_offset, src_x, src_y,
+       dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
+
+   src_pitch *= cpp;
+   dst_pitch *= cpp;
+
+   BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16;
+
+   switch (cpp) {
+   case 1:
+   case 2:
+      CMD = XY_SRC_COPY_BLT_CMD;
+      break;
+   case 4:
+      CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+      break;
+   default:
+      return GL_FALSE;
+   }
+
+#ifndef I915
+   if (dst_tiling != I915_TILING_NONE) {
+      CMD |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+   if (src_tiling != I915_TILING_NONE) {
+      CMD |= XY_SRC_TILED;
+      src_pitch /= 4;
+   }
+#endif
+
+   if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
+      return GL_TRUE;
+   }
+
+   assert(dst_x < dst_x2);
+   assert(dst_y < dst_y2);
+
+   BEGIN_BATCH(8);
+   OUT_BATCH(CMD);
+   OUT_BATCH(BR13 | (uint16_t)dst_pitch);
+   OUT_BATCH((dst_y << 16) | dst_x);
+   OUT_BATCH((dst_y2 << 16) | dst_x2);
+   OUT_RELOC_FENCED(dst_buffer,
+		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		    dst_offset);
+   OUT_BATCH((src_y << 16) | src_x);
+   OUT_BATCH((uint16_t)src_pitch);
+   OUT_RELOC_FENCED(src_buffer,
+		    I915_GEM_DOMAIN_RENDER, 0,
+		    src_offset);
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Use blitting to clear the renderbuffers named by 'flags'.
+ * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
+ * since that might include software renderbuffers or renderbuffers
+ * which we're clearing with triangles.
+ * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
+ */
+void
+intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLuint clear_depth;
+   GLboolean all;
+   GLint cx, cy, cw, ch;
+   BATCH_LOCALS;
+
+   /* Blits are in a different ringbuffer so we don't use them. */
+   assert(intel->gen < 6);
+
+   /*
+    * Compute values for clearing the buffers.
+    */
+   clear_depth = 0;
+   if (mask & BUFFER_BIT_DEPTH) {
+      clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
+   }
+   if (mask & BUFFER_BIT_STENCIL) {
+      clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
+   }
+
+   cx = fb->_Xmin;
+   if (fb->Name == 0)
+      cy = ctx->DrawBuffer->Height - fb->_Ymax;
+   else
+      cy = fb->_Ymin;
+   cw = fb->_Xmax - fb->_Xmin;
+   ch = fb->_Ymax - fb->_Ymin;
+
+   if (cw == 0 || ch == 0)
+      return;
+
+   GLuint buf;
+   all = (cw == fb->Width && ch == fb->Height);
+
+   /* Loop over all renderbuffers */
+   for (buf = 0; buf < BUFFER_COUNT && mask; buf++) {
+      const GLbitfield bufBit = 1 << buf;
+      struct intel_renderbuffer *irb;
+      drm_intel_bo *write_buffer;
+      int x1, y1, x2, y2;
+      uint32_t clear_val;
+      uint32_t BR13, CMD;
+      int pitch, cpp;
+      drm_intel_bo *aper_array[2];
+
+      if (!(mask & bufBit))
+	 continue;
+
+      /* OK, clear this renderbuffer */
+      irb = intel_get_renderbuffer(fb, buf);
+      write_buffer = intel_region_buffer(intel, irb->region,
+					 all ? INTEL_WRITE_FULL :
+					 INTEL_WRITE_PART);
+      x1 = cx + irb->region->draw_x;
+      y1 = cy + irb->region->draw_y;
+      x2 = cx + cw + irb->region->draw_x;
+      y2 = cy + ch + irb->region->draw_y;
+
+      pitch = irb->region->pitch;
+      cpp = irb->region->cpp;
+
+      DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n",
+	  __FUNCTION__,
+	  irb->region->buffer, (pitch * cpp),
+	  x1, y1, x2 - x1, y2 - y1);
+
+      BR13 = br13_for_cpp(cpp) | 0xf0 << 16;
+      CMD = XY_COLOR_BLT_CMD;
+
+      /* Setup the blit command */
+      if (cpp == 4) {
+	 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
+	    if (mask & BUFFER_BIT_DEPTH)
+	       CMD |= XY_BLT_WRITE_RGB;
+	    if (mask & BUFFER_BIT_STENCIL)
+	       CMD |= XY_BLT_WRITE_ALPHA;
+	 } else {
+	    /* clearing RGBA */
+	    CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+	 }
+      }
+
+      assert(irb->region->tiling != I915_TILING_Y);
+
+#ifndef I915
+      if (irb->region->tiling != I915_TILING_NONE) {
+	 CMD |= XY_DST_TILED;
+	 pitch /= 4;
+      }
+#endif
+      BR13 |= (pitch * cpp);
+
+      if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
+	 clear_val = clear_depth;
+      } else {
+	 uint8_t clear[4];
+	 GLclampf *color = ctx->Color.ClearColor;
+
+	 CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]);
+	 CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]);
+	 CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]);
+	 CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]);
+
+	 switch (irb->Base.Format) {
+	 case MESA_FORMAT_ARGB8888:
+	 case MESA_FORMAT_XRGB8888:
+	    clear_val = PACK_COLOR_8888(clear[3], clear[0],
+					clear[1], clear[2]);
+	    break;
+	 case MESA_FORMAT_RGB565:
+	    clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]);
+	    break;
+	 case MESA_FORMAT_ARGB4444:
+	    clear_val = PACK_COLOR_4444(clear[3], clear[0],
+					clear[1], clear[2]);
+	    break;
+	 case MESA_FORMAT_ARGB1555:
+	    clear_val = PACK_COLOR_1555(clear[3], clear[0],
+					clear[1], clear[2]);
+	    break;
+	 case MESA_FORMAT_A8:
+	    clear_val = PACK_COLOR_8888(clear[3], clear[3],
+					clear[3], clear[3]);
+	    break;
+	 default:
+	    _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n",
+			  irb->Base.Format);
+	    clear_val = 0;
+	 }
+      }
+
+      assert(x1 < x2);
+      assert(y1 < y2);
+
+      /* do space check before going any further */
+      aper_array[0] = intel->batch->buf;
+      aper_array[1] = write_buffer;
+
+      if (drm_intel_bufmgr_check_aperture_space(aper_array,
+						ARRAY_SIZE(aper_array)) != 0) {
+	 intel_batchbuffer_flush(intel->batch);
+      }
+
+      BEGIN_BATCH(6);
+      OUT_BATCH(CMD);
+      OUT_BATCH(BR13);
+      OUT_BATCH((y1 << 16) | x1);
+      OUT_BATCH((y2 << 16) | x2);
+      OUT_RELOC_FENCED(write_buffer,
+		       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		       0);
+      OUT_BATCH(clear_val);
+      ADVANCE_BATCH();
+
+      if (intel->always_flush_cache)
+	 intel_batchbuffer_emit_mi_flush(intel->batch);
+
+      if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL)
+	 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+      else
+	 mask &= ~bufBit;    /* turn off bit, for faster loop exit */
+   }
+}
+
+GLboolean
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  drm_intel_bo *dst_buffer,
+				  GLuint dst_offset,
+				  uint32_t dst_tiling,
+				  GLshort x, GLshort y,
+				  GLshort w, GLshort h,
+				  GLenum logic_op)
+{
+   int dwords = ALIGN(src_size, 8) / 4;
+   uint32_t opcode, br13, blit_cmd;
+
+   /* Blits are in a different ringbuffer so we don't use them. */
+   if (intel->gen >= 6)
+      return GL_FALSE;
+
+   if (dst_tiling != I915_TILING_NONE) {
+      if (dst_offset & 4095)
+	 return GL_FALSE;
+      if (dst_tiling == I915_TILING_Y)
+	 return GL_FALSE;
+   }
+
+   assert( logic_op - GL_CLEAR >= 0 );
+   assert( logic_op - GL_CLEAR < 0x10 );
+   assert(dst_pitch > 0);
+
+   if (w < 0 || h < 0)
+      return GL_TRUE;
+
+   dst_pitch *= cpp;
+
+   DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
+       __FUNCTION__,
+       dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
+
+   intel_batchbuffer_require_space( intel->batch,
+				    (8 * 4) +
+				    (3 * 4) +
+				    dwords * 4 );
+
+   opcode = XY_SETUP_BLT_CMD;
+   if (cpp == 4)
+      opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+#ifndef I915
+   if (dst_tiling != I915_TILING_NONE) {
+      opcode |= XY_DST_TILED;
+      dst_pitch /= 4;
+   }
+#endif
+
+   br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
+   br13 |= br13_for_cpp(cpp);
+
+   blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
+   if (dst_tiling != I915_TILING_NONE)
+      blit_cmd |= XY_DST_TILED;
+
+   BEGIN_BATCH(8 + 3);
+   OUT_BATCH(opcode);
+   OUT_BATCH(br13);
+   OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
+   OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
+   OUT_RELOC_FENCED(dst_buffer,
+		    I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		    dst_offset);
+   OUT_BATCH(0); /* bg */
+   OUT_BATCH(fg_color); /* fg */
+   OUT_BATCH(0); /* pattern base addr */
+
+   OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
+   OUT_BATCH((y << 16) | x);
+   OUT_BATCH(((y + h) << 16) | (x + w));
+   ADVANCE_BATCH();
+
+   intel_batchbuffer_data( intel->batch,
+			   src_bits,
+			   dwords * 4 );
+
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   return GL_TRUE;
+}
+
+/* We don't have a memmove-type blit like some other hardware, so we'll do a
+ * rectangular blit covering a large space, then emit 1-scanline blit at the
+ * end to cover the last if we need.
+ */
+void
+intel_emit_linear_blit(struct intel_context *intel,
+		       drm_intel_bo *dst_bo,
+		       unsigned int dst_offset,
+		       drm_intel_bo *src_bo,
+		       unsigned int src_offset,
+		       unsigned int size)
+{
+   GLuint pitch, height;
+   GLboolean ok;
+
+   /* Blits are in a different ringbuffer so we don't use them. */
+   assert(intel->gen < 6);
+
+   /* The pitch is a signed value. */
+   pitch = MIN2(size, (1 << 15) - 1);
+   height = size / pitch;
+   ok = intelEmitCopyBlit(intel, 1,
+			  pitch, src_bo, src_offset, I915_TILING_NONE,
+			  pitch, dst_bo, dst_offset, I915_TILING_NONE,
+			  0, 0, /* src x/y */
+			  0, 0, /* dst x/y */
+			  pitch, height, /* w, h */
+			  GL_COPY);
+   assert(ok);
+
+   src_offset += pitch * height;
+   dst_offset += pitch * height;
+   size -= pitch * height;
+   assert (size < (1 << 15));
+   if (size != 0) {
+      ok = intelEmitCopyBlit(intel, 1,
+			     size, src_bo, src_offset, I915_TILING_NONE,
+			     size, dst_bo, dst_offset, I915_TILING_NONE,
+			     0, 0, /* src x/y */
+			     0, 0, /* dst x/y */
+			     size, 1, /* w, h */
+			     GL_COPY);
+      assert(ok);
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h
new file mode 100644
index 0000000000..70d277df3c
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_blit.h
@@ -0,0 +1,73 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_BLIT_H
+#define INTEL_BLIT_H
+
+#include "intel_context.h"
+
+extern void intelCopyBuffer(const __DRIdrawable * dpriv,
+                            const drm_clip_rect_t * rect);
+
+extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask);
+
+GLboolean
+intelEmitCopyBlit(struct intel_context *intel,
+                              GLuint cpp,
+                              GLshort src_pitch,
+                              drm_intel_bo *src_buffer,
+                              GLuint src_offset,
+			      uint32_t src_tiling,
+                              GLshort dst_pitch,
+                              drm_intel_bo *dst_buffer,
+                              GLuint dst_offset,
+			      uint32_t dst_tiling,
+                              GLshort srcx, GLshort srcy,
+                              GLshort dstx, GLshort dsty,
+                              GLshort w, GLshort h,
+			      GLenum logicop );
+
+GLboolean
+intelEmitImmediateColorExpandBlit(struct intel_context *intel,
+				  GLuint cpp,
+				  GLubyte *src_bits, GLuint src_size,
+				  GLuint fg_color,
+				  GLshort dst_pitch,
+				  drm_intel_bo *dst_buffer,
+				  GLuint dst_offset,
+				  uint32_t dst_tiling,
+				  GLshort x, GLshort y,
+				  GLshort w, GLshort h,
+				  GLenum logic_op);
+void intel_emit_linear_blit(struct intel_context *intel,
+			    drm_intel_bo *dst_bo,
+			    unsigned int dst_offset,
+			    drm_intel_bo *src_bo,
+			    unsigned int src_offset,
+			    unsigned int size);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
new file mode 100644
index 0000000000..8ab41f8d27
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -0,0 +1,734 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+
+#include "intel_blit.h"
+#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
+#include "intel_context.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+
+static GLboolean
+intel_bufferobj_unmap(GLcontext * ctx,
+                      GLenum target, struct gl_buffer_object *obj);
+
+/** Allocates a new drm_intel_bo to store the data for the buffer object. */
+static void
+intel_bufferobj_alloc_buffer(struct intel_context *intel,
+			     struct intel_buffer_object *intel_obj)
+{
+   intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj",
+					  intel_obj->Base.Size, 64);
+}
+
+/**
+ * There is some duplication between mesa's bufferobjects and our
+ * bufmgr buffers.  Both have an integer handle and a hashtable to
+ * lookup an opaque structure.  It would be nice if the handles and
+ * internal structure where somehow shared.
+ */
+static struct gl_buffer_object *
+intel_bufferobj_alloc(GLcontext * ctx, GLuint name, GLenum target)
+{
+   struct intel_buffer_object *obj = CALLOC_STRUCT(intel_buffer_object);
+
+   _mesa_initialize_buffer_object(&obj->Base, name, target);
+
+   obj->buffer = NULL;
+
+   return &obj->Base;
+}
+
+/* Break the COW tie to the region.  The region gets to keep the data.
+ */
+void
+intel_bufferobj_release_region(struct intel_context *intel,
+                               struct intel_buffer_object *intel_obj)
+{
+   assert(intel_obj->region->buffer == intel_obj->buffer);
+   intel_obj->region->pbo = NULL;
+   intel_obj->region = NULL;
+
+   drm_intel_bo_unreference(intel_obj->buffer);
+   intel_obj->buffer = NULL;
+}
+
+/* Break the COW tie to the region.  Both the pbo and the region end
+ * up with a copy of the data.
+ */
+void
+intel_bufferobj_cow(struct intel_context *intel,
+                    struct intel_buffer_object *intel_obj)
+{
+   assert(intel_obj->region);
+   intel_region_cow(intel, intel_obj->region);
+}
+
+
+/**
+ * Deallocate/free a vertex/pixel buffer object.
+ * Called via glDeleteBuffersARB().
+ */
+static void
+intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+
+   /* Buffer objects are automatically unmapped when deleting according
+    * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy
+    * (though it does if you call glDeleteBuffers)
+    */
+   if (obj->Pointer)
+      intel_bufferobj_unmap(ctx, 0, obj);
+
+   free(intel_obj->sys_buffer);
+   if (intel_obj->region) {
+      intel_bufferobj_release_region(intel, intel_obj);
+   }
+   else if (intel_obj->buffer) {
+      drm_intel_bo_unreference(intel_obj->buffer);
+   }
+
+   free(intel_obj);
+}
+
+
+
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
+ */
+static GLboolean
+intel_bufferobj_data(GLcontext * ctx,
+                     GLenum target,
+                     GLsizeiptrARB size,
+                     const GLvoid * data,
+                     GLenum usage, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   intel_obj->Base.Size = size;
+   intel_obj->Base.Usage = usage;
+
+   assert(!obj->Pointer); /* Mesa should have unmapped it */
+
+   if (intel_obj->region)
+      intel_bufferobj_release_region(intel, intel_obj);
+
+   if (intel_obj->buffer != NULL) {
+      drm_intel_bo_unreference(intel_obj->buffer);
+      intel_obj->buffer = NULL;
+   }
+   free(intel_obj->sys_buffer);
+   intel_obj->sys_buffer = NULL;
+
+   if (size != 0) {
+#ifdef I915
+      /* On pre-965, stick VBOs in system memory, as we're always doing swtnl
+       * with their contents anyway.
+       */
+      if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) {
+	 intel_obj->sys_buffer = malloc(size);
+	 if (intel_obj->sys_buffer != NULL) {
+	    if (data != NULL)
+	       memcpy(intel_obj->sys_buffer, data, size);
+	    return GL_TRUE;
+	 }
+      }
+#endif
+      intel_bufferobj_alloc_buffer(intel, intel_obj);
+      if (!intel_obj->buffer)
+         return GL_FALSE;
+
+      if (data != NULL)
+	 drm_intel_bo_subdata(intel_obj->buffer, 0, size, data);
+   }
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+intel_bufferobj_subdata(GLcontext * ctx,
+                        GLenum target,
+                        GLintptrARB offset,
+                        GLsizeiptrARB size,
+                        const GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+
+   if (intel_obj->region)
+      intel_bufferobj_cow(intel, intel_obj);
+
+   if (intel_obj->sys_buffer)
+      memcpy((char *)intel_obj->sys_buffer + offset, data, size);
+   else {
+      /* Flush any existing batchbuffer that might reference this data. */
+      if (drm_intel_bo_busy(intel_obj->buffer) ||
+	  drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) {
+	 drm_intel_bo *temp_bo;
+
+	 temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64);
+
+	 drm_intel_bo_subdata(temp_bo, 0, size, data);
+
+	 intel_emit_linear_blit(intel,
+				intel_obj->buffer, offset,
+				temp_bo, 0,
+				size);
+
+	 drm_intel_bo_unreference(temp_bo);
+      } else {
+	 drm_intel_bo_subdata(intel_obj->buffer, offset, size, data);
+      }
+   }
+}
+
+
+/**
+ * Called via glGetBufferSubDataARB().
+ */
+static void
+intel_bufferobj_get_subdata(GLcontext * ctx,
+                            GLenum target,
+                            GLintptrARB offset,
+                            GLsizeiptrARB size,
+                            GLvoid * data, struct gl_buffer_object *obj)
+{
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+   if (intel_obj->sys_buffer)
+      memcpy(data, (char *)intel_obj->sys_buffer + offset, size);
+   else
+      drm_intel_bo_get_subdata(intel_obj->buffer, offset, size, data);
+}
+
+
+
+/**
+ * Called via glMapBufferARB().
+ */
+static void *
+intel_bufferobj_map(GLcontext * ctx,
+                    GLenum target,
+                    GLenum access, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   GLboolean read_only = (access == GL_READ_ONLY_ARB);
+   GLboolean write_only = (access == GL_WRITE_ONLY_ARB);
+
+   assert(intel_obj);
+
+   if (intel_obj->sys_buffer) {
+      obj->Pointer = intel_obj->sys_buffer;
+      obj->Length = obj->Size;
+      obj->Offset = 0;
+      return obj->Pointer;
+   }
+
+   /* Flush any existing batchbuffer that might reference this data. */
+   if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
+      intel_flush(ctx);
+
+   if (intel_obj->region)
+      intel_bufferobj_cow(intel, intel_obj);
+
+   if (intel_obj->buffer == NULL) {
+      obj->Pointer = NULL;
+      return NULL;
+   }
+
+   if (write_only) {
+      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
+      intel_obj->mapped_gtt = GL_TRUE;
+   } else {
+      drm_intel_bo_map(intel_obj->buffer, !read_only);
+      intel_obj->mapped_gtt = GL_FALSE;
+   }
+
+   obj->Pointer = intel_obj->buffer->virtual;
+   obj->Length = obj->Size;
+   obj->Offset = 0;
+
+   return obj->Pointer;
+}
+
+/**
+ * Called via glMapBufferRange().
+ *
+ * The goal of this extension is to allow apps to accumulate their rendering
+ * at the same time as they accumulate their buffer object.  Without it,
+ * you'd end up blocking on execution of rendering every time you mapped
+ * the buffer to put new data in.
+ *
+ * We support it in 3 ways: If unsynchronized, then don't bother
+ * flushing the batchbuffer before mapping the buffer, which can save blocking
+ * in many cases.  If we would still block, and they allow the whole buffer
+ * to be invalidated, then just allocate a new buffer to replace the old one.
+ * If not, and we'd block, and they allow the subrange of the buffer to be
+ * invalidated, then we can make a new little BO, let them write into that,
+ * and blit it into the real BO at unmap time.
+ */
+static void *
+intel_bufferobj_map_range(GLcontext * ctx,
+			  GLenum target, GLintptr offset, GLsizeiptr length,
+			  GLbitfield access, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+
+   /* _mesa_MapBufferRange (GL entrypoint) sets these, but the vbo module also
+    * internally uses our functions directly.
+    */
+   obj->Offset = offset;
+   obj->Length = length;
+   obj->AccessFlags = access;
+
+   if (intel_obj->sys_buffer) {
+      obj->Pointer = intel_obj->sys_buffer + offset;
+      return obj->Pointer;
+   }
+
+   if (intel_obj->region)
+      intel_bufferobj_cow(intel, intel_obj);
+
+   /* If the mapping is synchronized with other GL operations, flush
+    * the batchbuffer so that GEM knows about the buffer access for later
+    * syncing.
+    */
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
+       drm_intel_bo_references(intel->batch->buf, intel_obj->buffer))
+      intel_flush(ctx);
+
+   if (intel_obj->buffer == NULL) {
+      obj->Pointer = NULL;
+      return NULL;
+   }
+
+   /* If the user doesn't care about existing buffer contents and mapping
+    * would cause us to block, then throw out the old buffer.
+    */
+   if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) &&
+       (access & GL_MAP_INVALIDATE_BUFFER_BIT) &&
+       drm_intel_bo_busy(intel_obj->buffer)) {
+      drm_intel_bo_unreference(intel_obj->buffer);
+      intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj",
+					     intel_obj->Base.Size, 64);
+   }
+
+   /* If the user is mapping a range of an active buffer object but
+    * doesn't require the current contents of that range, make a new
+    * BO, and we'll copy what they put in there out at unmap or
+    * FlushRange time.
+    */
+   if ((access & GL_MAP_INVALIDATE_RANGE_BIT) &&
+       drm_intel_bo_busy(intel_obj->buffer)) {
+      if (access & GL_MAP_FLUSH_EXPLICIT_BIT) {
+	 intel_obj->range_map_buffer = malloc(length);
+	 obj->Pointer = intel_obj->range_map_buffer;
+      } else {
+	 intel_obj->range_map_bo = drm_intel_bo_alloc(intel->bufmgr,
+						      "range map",
+						      length, 64);
+	 if (!(access & GL_MAP_READ_BIT)) {
+	    drm_intel_gem_bo_map_gtt(intel_obj->range_map_bo);
+	    intel_obj->mapped_gtt = GL_TRUE;
+	 } else {
+	    drm_intel_bo_map(intel_obj->range_map_bo,
+			     (access & GL_MAP_WRITE_BIT) != 0);
+	    intel_obj->mapped_gtt = GL_FALSE;
+	 }
+	 obj->Pointer = intel_obj->range_map_bo->virtual;
+      }
+      return obj->Pointer;
+   }
+
+   if (!(access & GL_MAP_READ_BIT)) {
+      drm_intel_gem_bo_map_gtt(intel_obj->buffer);
+      intel_obj->mapped_gtt = GL_TRUE;
+   } else {
+      drm_intel_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
+      intel_obj->mapped_gtt = GL_FALSE;
+   }
+
+   obj->Pointer = intel_obj->buffer->virtual + offset;
+   return obj->Pointer;
+}
+
+/* Ideally we'd use a BO to avoid taking up cache space for the temporary
+ * data, but FlushMappedBufferRange may be followed by further writes to
+ * the pointer, so we would have to re-map after emitting our blit, which
+ * would defeat the point.
+ */
+static void
+intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target,
+				   GLintptr offset, GLsizeiptr length,
+				   struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+   drm_intel_bo *temp_bo;
+
+   /* Unless we're in the range map using a temporary system buffer,
+    * there's no work to do.
+    */
+   if (intel_obj->range_map_buffer == NULL)
+      return;
+
+   temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64);
+
+   drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
+
+   intel_emit_linear_blit(intel,
+			  intel_obj->buffer, obj->Offset + offset,
+			  temp_bo, 0,
+			  length);
+
+   drm_intel_bo_unreference(temp_bo);
+}
+
+
+/**
+ * Called via glUnmapBuffer().
+ */
+static GLboolean
+intel_bufferobj_unmap(GLcontext * ctx,
+                      GLenum target, struct gl_buffer_object *obj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+
+   assert(intel_obj);
+   assert(obj->Pointer);
+   if (intel_obj->sys_buffer != NULL) {
+      /* always keep the mapping around. */
+   } else if (intel_obj->range_map_buffer != NULL) {
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+      free(intel_obj->range_map_buffer);
+      intel_obj->range_map_buffer = NULL;
+   } else if (intel_obj->range_map_bo != NULL) {
+      if (intel_obj->mapped_gtt) {
+	 drm_intel_gem_bo_unmap_gtt(intel_obj->range_map_bo);
+      } else {
+	 drm_intel_bo_unmap(intel_obj->range_map_bo);
+      }
+
+      intel_emit_linear_blit(intel,
+			     intel_obj->buffer, obj->Offset,
+			     intel_obj->range_map_bo, 0,
+			     obj->Length);
+
+      /* Since we've emitted some blits to buffers that will (likely) be used
+       * in rendering operations in other cache domains in this batch, emit a
+       * flush.  Once again, we wish for a domain tracker in libdrm to cover
+       * usage inside of a batchbuffer.
+       */
+      intel_batchbuffer_emit_mi_flush(intel->batch);
+
+      drm_intel_bo_unreference(intel_obj->range_map_bo);
+      intel_obj->range_map_bo = NULL;
+   } else if (intel_obj->buffer != NULL) {
+      if (intel_obj->mapped_gtt) {
+	 drm_intel_gem_bo_unmap_gtt(intel_obj->buffer);
+      } else {
+	 drm_intel_bo_unmap(intel_obj->buffer);
+      }
+   }
+   obj->Pointer = NULL;
+   obj->Offset = 0;
+   obj->Length = 0;
+
+   return GL_TRUE;
+}
+
+drm_intel_bo *
+intel_bufferobj_buffer(struct intel_context *intel,
+                       struct intel_buffer_object *intel_obj, GLuint flag)
+{
+   if (intel_obj->region) {
+      if (flag == INTEL_WRITE_PART)
+         intel_bufferobj_cow(intel, intel_obj);
+      else if (flag == INTEL_WRITE_FULL) {
+         intel_bufferobj_release_region(intel, intel_obj);
+	 intel_bufferobj_alloc_buffer(intel, intel_obj);
+      }
+   }
+
+   if (intel_obj->buffer == NULL) {
+      void *sys_buffer = intel_obj->sys_buffer;
+
+      /* only one of buffer and sys_buffer could be non-NULL */
+      intel_bufferobj_alloc_buffer(intel, intel_obj);
+      intel_obj->sys_buffer = NULL;
+
+      intel_bufferobj_subdata(&intel->ctx,
+			      GL_ARRAY_BUFFER_ARB,
+			      0,
+			      intel_obj->Base.Size,
+			      sys_buffer,
+			      &intel_obj->Base);
+      free(sys_buffer);
+      intel_obj->sys_buffer = NULL;
+   }
+
+   return intel_obj->buffer;
+}
+
+static void
+intel_bufferobj_copy_subdata(GLcontext *ctx,
+			     struct gl_buffer_object *src,
+			     struct gl_buffer_object *dst,
+			     GLintptr read_offset, GLintptr write_offset,
+			     GLsizeiptr size)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_buffer_object *intel_src = intel_buffer_object(src);
+   struct intel_buffer_object *intel_dst = intel_buffer_object(dst);
+   drm_intel_bo *src_bo, *dst_bo;
+
+   if (size == 0)
+      return;
+
+   /* If we're in system memory, just map and memcpy. */
+   if (intel_src->sys_buffer || intel_dst->sys_buffer) {
+      /* The same buffer may be used, but note that regions copied may
+       * not overlap.
+       */
+      if (src == dst) {
+	 char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
+					 GL_READ_WRITE, dst);
+	 memcpy(ptr + write_offset, ptr + read_offset, size);
+	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+      } else {
+	 const char *src_ptr;
+	 char *dst_ptr;
+
+	 src_ptr =  intel_bufferobj_map(ctx, GL_COPY_READ_BUFFER,
+					GL_READ_ONLY, src);
+	 dst_ptr =  intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER,
+					GL_WRITE_ONLY, dst);
+
+	 memcpy(dst_ptr + write_offset, src_ptr + read_offset, size);
+
+	 intel_bufferobj_unmap(ctx, GL_COPY_READ_BUFFER, src);
+	 intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst);
+      }
+   }
+
+   /* Otherwise, we have real BOs, so blit them. */
+
+   dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART);
+   src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ);
+
+   intel_emit_linear_blit(intel,
+			  dst_bo, write_offset,
+			  src_bo, read_offset, size);
+
+   /* Since we've emitted some blits to buffers that will (likely) be used
+    * in rendering operations in other cache domains in this batch, emit a
+    * flush.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer.
+    */
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+#if FEATURE_APPLE_object_purgeable
+static GLenum
+intel_buffer_purgeable(GLcontext * ctx,
+                       drm_intel_bo *buffer,
+                       GLenum option)
+{
+   int retained = 0;
+
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_DONTNEED);
+
+   return retained ? GL_VOLATILE_APPLE : GL_RELEASED_APPLE;
+}
+
+static GLenum
+intel_buffer_object_purgeable(GLcontext * ctx,
+                              struct gl_buffer_object *obj,
+                              GLenum option)
+{
+   struct intel_buffer_object *intel;
+
+   intel = intel_buffer_object (obj);
+   if (intel->buffer != NULL)
+      return intel_buffer_purgeable (ctx, intel->buffer, option);
+
+   if (option == GL_RELEASED_APPLE) {
+      if (intel->sys_buffer != NULL) {
+         free(intel->sys_buffer);
+         intel->sys_buffer = NULL;
+      }
+
+      return GL_RELEASED_APPLE;
+   } else {
+      /* XXX Create the buffer and madvise(MADV_DONTNEED)? */
+      return intel_buffer_purgeable (ctx,
+                                     intel_bufferobj_buffer(intel_context(ctx),
+                                                            intel, INTEL_READ),
+                                     option);
+   }
+}
+
+static GLenum
+intel_texture_object_purgeable(GLcontext * ctx,
+                               struct gl_texture_object *obj,
+                               GLenum option)
+{
+   struct intel_texture_object *intel;
+
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_RELEASED_APPLE;
+
+   return intel_buffer_purgeable (ctx, intel->mt->region->buffer, option);
+}
+
+static GLenum
+intel_render_object_purgeable(GLcontext * ctx,
+                              struct gl_renderbuffer *obj,
+                              GLenum option)
+{
+   struct intel_renderbuffer *intel;
+
+   intel = intel_renderbuffer(obj);
+   if (intel->region == NULL)
+      return GL_RELEASED_APPLE;
+
+   return intel_buffer_purgeable (ctx, intel->region->buffer, option);
+}
+
+static GLenum
+intel_buffer_unpurgeable(GLcontext * ctx,
+                         drm_intel_bo *buffer,
+                         GLenum option)
+{
+   int retained;
+
+   retained = 0;
+   if (buffer != NULL)
+      retained = drm_intel_bo_madvise (buffer, I915_MADV_WILLNEED);
+
+   return retained ? GL_RETAINED_APPLE : GL_UNDEFINED_APPLE;
+}
+
+static GLenum
+intel_buffer_object_unpurgeable(GLcontext * ctx,
+                                struct gl_buffer_object *obj,
+                                GLenum option)
+{
+   return intel_buffer_unpurgeable (ctx, intel_buffer_object (obj)->buffer, option);
+}
+
+static GLenum
+intel_texture_object_unpurgeable(GLcontext * ctx,
+                                 struct gl_texture_object *obj,
+                                 GLenum option)
+{
+   struct intel_texture_object *intel;
+
+   intel = intel_texture_object(obj);
+   if (intel->mt == NULL || intel->mt->region == NULL)
+      return GL_UNDEFINED_APPLE;
+
+   return intel_buffer_unpurgeable (ctx, intel->mt->region->buffer, option);
+}
+
+static GLenum
+intel_render_object_unpurgeable(GLcontext * ctx,
+                                struct gl_renderbuffer *obj,
+                                GLenum option)
+{
+   struct intel_renderbuffer *intel;
+
+   intel = intel_renderbuffer(obj);
+   if (intel->region == NULL)
+      return GL_UNDEFINED_APPLE;
+
+   return intel_buffer_unpurgeable (ctx, intel->region->buffer, option);
+}
+#endif
+
+void
+intelInitBufferObjectFuncs(struct dd_function_table *functions)
+{
+   functions->NewBufferObject = intel_bufferobj_alloc;
+   functions->DeleteBuffer = intel_bufferobj_free;
+   functions->BufferData = intel_bufferobj_data;
+   functions->BufferSubData = intel_bufferobj_subdata;
+   functions->GetBufferSubData = intel_bufferobj_get_subdata;
+   functions->MapBuffer = intel_bufferobj_map;
+   functions->MapBufferRange = intel_bufferobj_map_range;
+   functions->FlushMappedBufferRange = intel_bufferobj_flush_mapped_range;
+   functions->UnmapBuffer = intel_bufferobj_unmap;
+   functions->CopyBufferSubData = intel_bufferobj_copy_subdata;
+
+#if FEATURE_APPLE_object_purgeable
+   functions->BufferObjectPurgeable = intel_buffer_object_purgeable;
+   functions->TextureObjectPurgeable = intel_texture_object_purgeable;
+   functions->RenderObjectPurgeable = intel_render_object_purgeable;
+
+   functions->BufferObjectUnpurgeable = intel_buffer_object_unpurgeable;
+   functions->TextureObjectUnpurgeable = intel_texture_object_unpurgeable;
+   functions->RenderObjectUnpurgeable = intel_render_object_unpurgeable;
+#endif
+}
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
new file mode 100644
index 0000000000..b15c192106
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h
@@ -0,0 +1,92 @@
+/**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_BUFFEROBJ_H
+#define INTEL_BUFFEROBJ_H
+
+#include "main/mtypes.h"
+
+struct intel_context;
+struct intel_region;
+struct gl_buffer_object;
+
+
+/**
+ * Intel vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct intel_buffer_object
+{
+   struct gl_buffer_object Base;
+   drm_intel_bo *buffer;     /* the low-level buffer manager's buffer handle */
+   /** System memory buffer data, if not using a BO to store the data. */
+   void *sys_buffer;
+
+   struct intel_region *region; /* Is there a zero-copy texture
+                                   associated with this (pixel)
+                                   buffer object? */
+
+   drm_intel_bo *range_map_bo;
+   void *range_map_buffer;
+   unsigned int range_map_offset;
+   GLsizei range_map_size;
+
+   GLboolean mapped_gtt;
+};
+
+
+/* Get the bm buffer associated with a GL bufferobject:
+ */
+drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel,
+				     struct intel_buffer_object *obj,
+				     GLuint flag);
+
+/* Hook the bufferobject implementation into mesa: 
+ */
+void intelInitBufferObjectFuncs(struct dd_function_table *functions);
+
+
+
+/* Are the obj->Name tests necessary?  Unfortunately yes, mesa
+ * allocates a couple of gl_buffer_object structs statically, and
+ * the Name == 0 test is the only way to identify them and avoid
+ * casting them erroneously to our structs.
+ */
+static INLINE struct intel_buffer_object *
+intel_buffer_object(struct gl_buffer_object *obj)
+{
+   return (struct intel_buffer_object *) obj;
+}
+
+/* Helpers for zerocopy image uploads.  See also intel_regions.h:
+ */
+void intel_bufferobj_cow(struct intel_context *intel,
+                         struct intel_buffer_object *intel_obj);
+void intel_bufferobj_release_region(struct intel_context *intel,
+                                    struct intel_buffer_object *intel_obj);
+
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c
new file mode 100644
index 0000000000..1bff344a45
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_buffers.c
@@ -0,0 +1,325 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_context.h"
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "main/framebuffer.h"
+
+/**
+ * Return pointer to current color drawing region, or NULL.
+ */
+struct intel_region *
+intel_drawbuf_region(struct intel_context *intel)
+{
+   struct intel_renderbuffer *irbColor =
+      intel_renderbuffer(intel->ctx.DrawBuffer->_ColorDrawBuffers[0]);
+   if (irbColor)
+      return irbColor->region;
+   else
+      return NULL;
+}
+
+/**
+ * Return pointer to current color reading region, or NULL.
+ */
+struct intel_region *
+intel_readbuf_region(struct intel_context *intel)
+{
+   struct intel_renderbuffer *irb
+      = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer);
+   if (irb)
+      return irb->region;
+   else
+      return NULL;
+}
+
+/**
+ * Check if we're about to draw into the front color buffer.
+ * If so, set the intel->front_buffer_dirty field to true.
+ */
+void
+intel_check_front_buffer_rendering(struct intel_context *intel)
+{
+   const struct gl_framebuffer *fb = intel->ctx.DrawBuffer;
+   if (fb->Name == 0) {
+      /* drawing to window system buffer */
+      if (fb->_NumColorDrawBuffers > 0) {
+         if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+	    intel->front_buffer_dirty = GL_TRUE;
+	 }
+      }
+   }
+}
+
+
+/**
+ * Update the hardware state for drawing into a window or framebuffer object.
+ *
+ * Called by glDrawBuffer, glBindFramebufferEXT, MakeCurrent, and other
+ * places within the driver.
+ *
+ * Basically, this needs to be called any time the current framebuffer
+ * changes, the renderbuffers change, or we need to draw into different
+ * color buffers.
+ */
+void
+intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *colorRegions[MAX_DRAW_BUFFERS], *depthRegion = NULL;
+   struct intel_renderbuffer *irbDepth = NULL, *irbStencil = NULL;
+
+   if (!fb) {
+      /* this can happen during the initial context initialization */
+      return;
+   }
+
+   /* Do this here, not core Mesa, since this function is called from
+    * many places within the driver.
+    */
+   if (ctx->NewState & _NEW_BUFFERS) {
+      /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+      _mesa_update_framebuffer(ctx);
+      /* this updates the DrawBuffer's Width/Height if it's a FBO */
+      _mesa_update_draw_buffer_bounds(ctx);
+   }
+
+   if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+      /* this may occur when we're called by glBindFrameBuffer() during
+       * the process of someone setting up renderbuffers, etc.
+       */
+      /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+      return;
+   }
+
+   /* How many color buffers are we drawing into?
+    *
+    * If there are zero buffers or the buffer is too big, don't configure any
+    * regions for hardware drawing.  We'll fallback to software below.  Not
+    * having regions set makes some of the software fallback paths faster.
+    */
+   if ((fb->Width > ctx->Const.MaxRenderbufferSize)
+       || (fb->Height > ctx->Const.MaxRenderbufferSize)
+       || (fb->_NumColorDrawBuffers == 0)) {
+      /* writing to 0  */
+      colorRegions[0] = NULL;
+   }
+   else if (fb->_NumColorDrawBuffers > 1) {
+       int i;
+       struct intel_renderbuffer *irb;
+
+       for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+           irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+           colorRegions[i] = irb ? irb->region : NULL;
+       }
+   }
+   else {
+      /* Get the intel_renderbuffer for the single colorbuffer we're drawing
+       * into.
+       */
+      if (fb->Name == 0) {
+	 /* drawing to window system buffer */
+	 if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT)
+	    colorRegions[0] = intel_get_rb_region(fb, BUFFER_FRONT_LEFT);
+	 else
+	    colorRegions[0] = intel_get_rb_region(fb, BUFFER_BACK_LEFT);
+      }
+      else {
+	 /* drawing to user-created FBO */
+	 struct intel_renderbuffer *irb;
+	 irb = intel_renderbuffer(fb->_ColorDrawBuffers[0]);
+	 colorRegions[0] = (irb && irb->region) ? irb->region : NULL;
+      }
+   }
+
+   if (!colorRegions[0]) {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_TRUE);
+   }
+   else {
+      FALLBACK(intel, INTEL_FALLBACK_DRAW_BUFFER, GL_FALSE);
+   }
+
+   /***
+    *** Get depth buffer region and check if we need a software fallback.
+    *** Note that the depth buffer is usually a DEPTH_STENCIL buffer.
+    ***/
+   if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) {
+      irbDepth = intel_renderbuffer(fb->_DepthBuffer->Wrapped);
+      if (irbDepth && irbDepth->region) {
+         FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+         depthRegion = irbDepth->region;
+      }
+      else {
+         FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_TRUE);
+         depthRegion = NULL;
+      }
+   }
+   else {
+      /* not using depth buffer */
+      FALLBACK(intel, INTEL_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+      depthRegion = NULL;
+   }
+
+   /***
+    *** Stencil buffer
+    *** This can only be hardware accelerated if we're using a
+    *** combined DEPTH_STENCIL buffer.
+    ***/
+   if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
+      irbStencil = intel_renderbuffer(fb->_StencilBuffer->Wrapped);
+      if (irbStencil && irbStencil->region) {
+         ASSERT(irbStencil->Base.Format == MESA_FORMAT_S8_Z24);
+         FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+      }
+      else {
+         FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_TRUE);
+      }
+   }
+   else {
+      /* XXX FBO: instead of FALSE, pass ctx->Stencil._Enabled ??? */
+      FALLBACK(intel, INTEL_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+   }
+
+   /* If we have a (packed) stencil buffer attached but no depth buffer,
+    * we still need to set up the shared depth/stencil state so we can use it.
+    */
+   if (depthRegion == NULL && irbStencil && irbStencil->region)
+      depthRegion = irbStencil->region;
+
+   /*
+    * Update depth and stencil test state
+    */
+   if (ctx->Driver.Enable) {
+      ctx->Driver.Enable(ctx, GL_DEPTH_TEST,
+                         (ctx->Depth.Test && fb->Visual.depthBits > 0));
+      ctx->Driver.Enable(ctx, GL_STENCIL_TEST,
+                         (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0));
+   }
+   else {
+      /* Mesa's Stencil._Enabled field is updated when
+       * _NEW_BUFFERS | _NEW_STENCIL, but i965 code assumes that the value
+       * only changes with _NEW_STENCIL (which seems sensible).  So flag it
+       * here since this is the _NEW_BUFFERS path.
+       */
+      intel->NewGLState |= (_NEW_DEPTH | _NEW_STENCIL);
+   }
+
+   intel->vtbl.set_draw_region(intel, colorRegions, depthRegion, 
+                               fb->_NumColorDrawBuffers);
+   intel->NewGLState |= _NEW_BUFFERS;
+
+   /* update viewport since it depends on window size */
+#ifdef I915
+   intelCalcViewport(ctx);
+#else
+   intel->NewGLState |= _NEW_VIEWPORT;
+#endif
+   /* Set state we know depends on drawable parameters:
+    */
+   if (ctx->Driver.Scissor)
+      ctx->Driver.Scissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			  ctx->Scissor.Width, ctx->Scissor.Height);
+   intel->NewGLState |= _NEW_SCISSOR;
+
+   if (ctx->Driver.DepthRange)
+      ctx->Driver.DepthRange(ctx,
+			     ctx->Viewport.Near,
+			     ctx->Viewport.Far);
+
+   /* Update culling direction which changes depending on the
+    * orientation of the buffer:
+    */
+   if (ctx->Driver.FrontFace)
+      ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+   else
+      intel->NewGLState |= _NEW_POLYGON;
+}
+
+
+static void
+intelDrawBuffer(GLcontext * ctx, GLenum mode)
+{
+   if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+      struct intel_context *const intel = intel_context(ctx);
+      const GLboolean was_front_buffer_rendering =
+	intel->is_front_buffer_rendering;
+
+      intel->is_front_buffer_rendering = (mode == GL_FRONT_LEFT)
+	|| (mode == GL_FRONT);
+
+      /* If we weren't front-buffer rendering before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start rendering again.
+       */
+      if (!was_front_buffer_rendering && intel->is_front_buffer_rendering)
+	 dri2InvalidateDrawable(intel->driContext->driDrawablePriv);
+   }
+
+   intel_draw_buffer(ctx, ctx->DrawBuffer);
+}
+
+
+static void
+intelReadBuffer(GLcontext * ctx, GLenum mode)
+{
+   if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+      struct intel_context *const intel = intel_context(ctx);
+      const GLboolean was_front_buffer_reading =
+	intel->is_front_buffer_reading;
+
+      intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+	|| (mode == GL_FRONT);
+
+      /* If we weren't front-buffer reading before but we are now,
+       * invalidate our DRI drawable so we'll ask for new buffers
+       * (including the fake front) before we start reading again.
+       */
+      if (!was_front_buffer_reading && intel->is_front_buffer_reading)
+	 dri2InvalidateDrawable(intel->driContext->driReadablePriv);
+   }
+
+   if (ctx->ReadBuffer == ctx->DrawBuffer) {
+      /* This will update FBO completeness status.
+       * A framebuffer will be incomplete if the GL_READ_BUFFER setting
+       * refers to a missing renderbuffer.  Calling glReadBuffer can set
+       * that straight and can make the drawing buffer complete.
+       */
+      intel_draw_buffer(ctx, ctx->DrawBuffer);
+   }
+   /* Generally, functions which read pixels (glReadPixels, glCopyPixels, etc)
+    * reference ctx->ReadBuffer and do appropriate state checks.
+    */
+}
+
+
+void
+intelInitBufferFuncs(struct dd_function_table *functions)
+{
+   functions->DrawBuffer = intelDrawBuffer;
+   functions->ReadBuffer = intelReadBuffer;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_buffers.h b/src/mesa/drivers/dri/intel/intel_buffers.h
new file mode 100644
index 0000000000..abb86aade6
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_buffers.h
@@ -0,0 +1,56 @@
+
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_BUFFERS_H
+#define INTEL_BUFFERS_H
+
+#include "dri_util.h"
+#include "drm.h"
+
+struct intel_context;
+struct intel_framebuffer;
+
+extern struct intel_region *intel_readbuf_region(struct intel_context *intel);
+
+extern struct intel_region *intel_drawbuf_region(struct intel_context *intel);
+
+extern void intel_check_front_buffer_rendering(struct intel_context *intel);
+
+extern void intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb);
+
+extern void intelInitBufferFuncs(struct dd_function_table *functions);
+
+void intel_get_cliprects(struct intel_context *intel,
+			 struct drm_clip_rect **cliprects,
+			 unsigned int *num_cliprects,
+			 int *x_off, int *y_off);
+#ifdef I915
+void intelCalcViewport(GLcontext * ctx);
+#endif
+
+#endif /* INTEL_BUFFERS_H */
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
new file mode 100644
index 0000000000..cd614c59e5
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -0,0 +1,128 @@
+ /*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#define PCI_CHIP_I810			0x7121
+#define PCI_CHIP_I810_DC100		0x7123
+#define PCI_CHIP_I810_E			0x7125
+#define PCI_CHIP_I815			0x1132
+
+#define PCI_CHIP_I830_M			0x3577
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I855_GM		0x3582
+#define PCI_CHIP_I865_G			0x2572
+
+#define PCI_CHIP_I915_G			0x2582
+#define PCI_CHIP_E7221_G		0x258A
+#define PCI_CHIP_I915_GM		0x2592
+#define PCI_CHIP_I945_G			0x2772
+#define PCI_CHIP_I945_GM		0x27A2
+#define PCI_CHIP_I945_GME		0x27AE
+
+#define PCI_CHIP_Q35_G			0x29B2
+#define PCI_CHIP_G33_G			0x29C2
+#define PCI_CHIP_Q33_G			0x29D2
+
+#define PCI_CHIP_IGD_GM			0xA011
+#define PCI_CHIP_IGD_G			0xA001
+
+#define IS_IGDGM(devid)	(devid == PCI_CHIP_IGD_GM)
+#define IS_IGDG(devid)	(devid == PCI_CHIP_IGD_G)
+#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
+
+#define PCI_CHIP_I965_G			0x29A2
+#define PCI_CHIP_I965_Q			0x2992
+#define PCI_CHIP_I965_G_1		0x2982
+#define PCI_CHIP_I946_GZ		0x2972
+#define PCI_CHIP_I965_GM                0x2A02
+#define PCI_CHIP_I965_GME               0x2A12
+
+#define PCI_CHIP_GM45_GM                0x2A42
+
+#define PCI_CHIP_IGD_E_G                0x2E02
+#define PCI_CHIP_Q45_G                  0x2E12
+#define PCI_CHIP_G45_G                  0x2E22
+#define PCI_CHIP_G41_G                  0x2E32
+#define PCI_CHIP_B43_G                  0x2E42
+
+#define PCI_CHIP_ILD_G                  0x0042
+#define PCI_CHIP_ILM_G                  0x0046
+
+#define PCI_CHIP_SANDYBRIDGE		0x0102
+#define PCI_CHIP_SANDYBRIDGE_M		0x0106
+
+#define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
+				 devid == PCI_CHIP_I915_GM || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_I965_GM || \
+				 devid == PCI_CHIP_I965_GME || \
+				 devid == PCI_CHIP_GM45_GM || \
+				 IS_IGD(devid) || \
+				 devid == PCI_CHIP_ILM_G)
+
+#define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
+                                 devid == PCI_CHIP_Q45_G || \
+                                 devid == PCI_CHIP_G45_G || \
+                                 devid == PCI_CHIP_G41_G || \
+                                 devid == PCI_CHIP_B43_G)
+#define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid)		(IS_G45(devid) || IS_GM45(devid))
+
+#define IS_ILD(devid)           (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid)           (devid == PCI_CHIP_ILM_G)
+#define IS_GEN5(devid)          (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_915(devid)		(devid == PCI_CHIP_I915_G || \
+				 devid == PCI_CHIP_E7221_G || \
+				 devid == PCI_CHIP_I915_GM)
+
+#define IS_945(devid)		(devid == PCI_CHIP_I945_G || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_G33_G || \
+				 devid == PCI_CHIP_Q33_G || \
+				 devid == PCI_CHIP_Q35_G || IS_IGD(devid))
+
+#define IS_GEN4(devid)		(devid == PCI_CHIP_I965_G || \
+				 devid == PCI_CHIP_I965_Q || \
+				 devid == PCI_CHIP_I965_G_1 || \
+				 devid == PCI_CHIP_I965_GM || \
+				 devid == PCI_CHIP_I965_GME || \
+				 devid == PCI_CHIP_I946_GZ || \
+				 IS_G4X(devid))
+
+#define IS_GEN6(devid)		(devid == PCI_CHIP_SANDYBRIDGE || \
+				 devid == PCI_CHIP_SANDYBRIDGE_M)
+
+#define IS_965(devid)		(IS_GEN4(devid) || \
+				 IS_G4X(devid) || \
+				 IS_GEN5(devid) || \
+				 IS_GEN6(devid))
+
+#define IS_9XX(devid)		(IS_915(devid) || \
+				 IS_945(devid) || \
+				 IS_965(devid))
diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c
new file mode 100644
index 0000000000..3c22118866
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_clear.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2009 Intel Corporation.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+
+#include "intel_context.h"
+#include "intel_blit.h"
+#include "intel_clear.h"
+#include "intel_fbo.h"
+#include "intel_regions.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+static const char *buffer_names[] = {
+   [BUFFER_FRONT_LEFT] = "front",
+   [BUFFER_BACK_LEFT] = "back",
+   [BUFFER_FRONT_RIGHT] = "front right",
+   [BUFFER_BACK_RIGHT] = "back right",
+   [BUFFER_DEPTH] = "depth",
+   [BUFFER_STENCIL] = "stencil",
+   [BUFFER_ACCUM] = "accum",
+   [BUFFER_AUX0] = "aux0",
+   [BUFFER_COLOR0] = "color0",
+   [BUFFER_COLOR1] = "color1",
+   [BUFFER_COLOR2] = "color2",
+   [BUFFER_COLOR3] = "color3",
+   [BUFFER_COLOR4] = "color4",
+   [BUFFER_COLOR5] = "color5",
+   [BUFFER_COLOR6] = "color6",
+   [BUFFER_COLOR7] = "color7",
+};
+
+/**
+ * Called by ctx->Driver.Clear.
+ */
+static void
+intelClear(GLcontext *ctx, GLbitfield mask)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
+   GLbitfield tri_mask = 0;
+   GLbitfield blit_mask = 0;
+   GLbitfield swrast_mask = 0;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLuint i;
+
+   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+      intel->front_buffer_dirty = GL_TRUE;
+   }
+
+   if (0)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* HW color buffers (front, back, aux, generic FBO, etc) */
+   if (colorMask == ~0) {
+      /* clear all R,G,B,A */
+      /* XXX FBO: need to check if colorbuffers are software RBOs! */
+      blit_mask |= (mask & BUFFER_BITS_COLOR);
+   }
+   else {
+      /* glColorMask in effect */
+      tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+   }
+
+   /* Make sure we have up to date buffers before we start looking at
+    * the tiling bits to determine how to clear. */
+   intel_prepare_render(intel);
+
+   /* HW stencil */
+   if (mask & BUFFER_BIT_STENCIL) {
+      const struct intel_region *stencilRegion
+         = intel_get_rb_region(fb, BUFFER_STENCIL);
+      if (stencilRegion) {
+         /* have hw stencil */
+         if (stencilRegion->tiling == I915_TILING_Y ||
+	     (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+	    /* We have to use the 3D engine if we're clearing a partial mask
+	     * of the stencil buffer, or if we're on a 965 which has a tiled
+	     * depth/stencil buffer in a layout we can't blit to.
+	     */
+            tri_mask |= BUFFER_BIT_STENCIL;
+         }
+         else {
+            /* clearing all stencil bits, use blitting */
+            blit_mask |= BUFFER_BIT_STENCIL;
+         }
+      }
+   }
+
+   /* HW depth */
+   if (mask & BUFFER_BIT_DEPTH) {
+      const struct intel_region *irb = intel_get_rb_region(fb, BUFFER_DEPTH);
+
+      /* clear depth with whatever method is used for stencil (see above) */
+      if (irb->tiling == I915_TILING_Y || tri_mask & BUFFER_BIT_STENCIL)
+         tri_mask |= BUFFER_BIT_DEPTH;
+      else
+         blit_mask |= BUFFER_BIT_DEPTH;
+   }
+
+   /* If we're doing a tri pass for depth/stencil, include a likely color
+    * buffer with it.
+    */
+   if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
+      int color_bit = _mesa_ffs(mask & BUFFER_BITS_COLOR);
+      if (color_bit != 0) {
+	 tri_mask |= blit_mask & (1 << (color_bit - 1));
+	 blit_mask &= ~(1 << (color_bit - 1));
+      }
+   }
+
+   if (intel->gen >= 6) {
+      /* Blits are in a different ringbuffer so we don't use them. */
+      tri_mask |= blit_mask;
+      blit_mask = 0;
+   }
+
+   /* SW fallback clearing */
+   swrast_mask = mask & ~tri_mask & ~blit_mask;
+
+   {
+      /* look for non-Intel renderbuffers (clear them with swrast) */
+      GLbitfield blit_or_tri = blit_mask | tri_mask;
+      while (blit_or_tri) {
+         GLuint i = _mesa_ffs(blit_or_tri) - 1;
+         GLbitfield bufBit = 1 << i;
+         if (!fb->Attachment[i].Renderbuffer->ClassID) {
+            blit_mask &= ~bufBit;
+            tri_mask &= ~bufBit;
+            swrast_mask |= bufBit;
+         }
+         blit_or_tri ^= bufBit;
+      }
+   }
+
+   if (blit_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("blit clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (blit_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
+      intelClearWithBlit(ctx, blit_mask);
+   }
+
+   if (tri_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("tri clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (tri_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
+
+      _mesa_meta_Clear(&intel->ctx, tri_mask);
+   }
+
+   if (swrast_mask) {
+      if (INTEL_DEBUG & DEBUG_BLIT) {
+	 DBG("swrast clear:");
+	 for (i = 0; i < BUFFER_COUNT; i++) {
+	    if (swrast_mask & (1 << i))
+	       DBG(" %s", buffer_names[i]);
+	 }
+	 DBG("\n");
+      }
+      _swrast_Clear(ctx, swrast_mask);
+   }
+}
+
+
+void
+intelInitClearFuncs(struct dd_function_table *functions)
+{
+   functions->Clear = intelClear;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_clear.h b/src/mesa/drivers/dri/intel/intel_clear.h
new file mode 100644
index 0000000000..7fd6b310a9
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_clear.h
@@ -0,0 +1,38 @@
+
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_CLEAR_H
+#define INTEL_CLEAR_H
+
+struct dd_function_table;
+
+extern void
+intelInitClearFuncs(struct dd_function_table *functions);
+
+
+#endif /* INTEL_CLEAR_H */
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
new file mode 100644
index 0000000000..5f2035d79c
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -0,0 +1,912 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/points.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+
+#include "i830_dri.h"
+
+#include "intel_chipset.h"
+#include "intel_buffers.h"
+#include "intel_tex.h"
+#include "intel_batchbuffer.h"
+#include "intel_clear.h"
+#include "intel_extensions.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+#include "intel_buffer_objects.h"
+#include "intel_fbo.h"
+#include "intel_bufmgr.h"
+#include "intel_screen.h"
+
+#include "drirenderbuffer.h"
+#include "utils.h"
+
+
+#ifndef INTEL_DEBUG
+int INTEL_DEBUG = (0);
+#endif
+
+
+#define DRIVER_DATE                     "20100330 DEVELOPMENT"
+#define DRIVER_DATE_GEM                 "GEM " DRIVER_DATE
+
+
+static const GLubyte *
+intelGetString(GLcontext * ctx, GLenum name)
+{
+   const struct intel_context *const intel = intel_context(ctx);
+   const char *chipset;
+   static char buffer[128];
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *) "Tungsten Graphics, Inc";
+      break;
+
+   case GL_RENDERER:
+      switch (intel->intelScreen->deviceID) {
+      case PCI_CHIP_845_G:
+         chipset = "Intel(R) 845G";
+         break;
+      case PCI_CHIP_I830_M:
+         chipset = "Intel(R) 830M";
+         break;
+      case PCI_CHIP_I855_GM:
+         chipset = "Intel(R) 852GM/855GM";
+         break;
+      case PCI_CHIP_I865_G:
+         chipset = "Intel(R) 865G";
+         break;
+      case PCI_CHIP_I915_G:
+         chipset = "Intel(R) 915G";
+         break;
+      case PCI_CHIP_E7221_G:
+	 chipset = "Intel (R) E7221G (i915)";
+	 break;
+      case PCI_CHIP_I915_GM:
+         chipset = "Intel(R) 915GM";
+         break;
+      case PCI_CHIP_I945_G:
+         chipset = "Intel(R) 945G";
+         break;
+      case PCI_CHIP_I945_GM:
+         chipset = "Intel(R) 945GM";
+         break;
+      case PCI_CHIP_I945_GME:
+         chipset = "Intel(R) 945GME";
+         break;
+      case PCI_CHIP_G33_G:
+	 chipset = "Intel(R) G33";
+	 break;
+      case PCI_CHIP_Q35_G:
+	 chipset = "Intel(R) Q35";
+	 break;
+      case PCI_CHIP_Q33_G:
+	 chipset = "Intel(R) Q33";
+	 break;
+      case PCI_CHIP_IGD_GM:
+      case PCI_CHIP_IGD_G:
+	 chipset = "Intel(R) IGD";
+	 break;
+      case PCI_CHIP_I965_Q:
+	 chipset = "Intel(R) 965Q";
+	 break;
+      case PCI_CHIP_I965_G:
+      case PCI_CHIP_I965_G_1:
+	 chipset = "Intel(R) 965G";
+	 break;
+      case PCI_CHIP_I946_GZ:
+	 chipset = "Intel(R) 946GZ";
+	 break;
+      case PCI_CHIP_I965_GM:
+	 chipset = "Intel(R) 965GM";
+	 break;
+      case PCI_CHIP_I965_GME:
+	 chipset = "Intel(R) 965GME/GLE";
+	 break;
+      case PCI_CHIP_GM45_GM:
+	 chipset = "Mobile Intel® GM45 Express Chipset";
+	 break; 
+      case PCI_CHIP_IGD_E_G:
+	 chipset = "Intel(R) Integrated Graphics Device";
+	 break;
+      case PCI_CHIP_G45_G:
+         chipset = "Intel(R) G45/G43";
+         break;
+      case PCI_CHIP_Q45_G:
+         chipset = "Intel(R) Q45/Q43";
+         break;
+      case PCI_CHIP_G41_G:
+         chipset = "Intel(R) G41";
+         break;
+      case PCI_CHIP_B43_G:
+         chipset = "Intel(R) B43";
+         break;
+      case PCI_CHIP_ILD_G:
+         chipset = "Intel(R) Ironlake Desktop";
+         break;
+      case PCI_CHIP_ILM_G:
+         chipset = "Intel(R) Ironlake Mobile";
+         break;
+      default:
+         chipset = "Unknown Intel Chipset";
+         break;
+      }
+
+      (void) driGetRendererString(buffer, chipset, DRIVER_DATE_GEM, 0);
+      return (GLubyte *) buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+static void
+intel_flush_front(GLcontext *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+    __DRIcontext *driContext = intel->driContext;
+    __DRIscreen *const screen = intel->intelScreen->driScrnPriv;
+
+   if ((ctx->DrawBuffer->Name == 0) && intel->front_buffer_dirty) {
+      if (screen->dri2.loader &&
+          (screen->dri2.loader->base.version >= 2)
+	  && (screen->dri2.loader->flushFrontBuffer != NULL) &&
+          driContext->driDrawablePriv &&
+	  driContext->driDrawablePriv->loaderPrivate) {
+	 (*screen->dri2.loader->flushFrontBuffer)(driContext->driDrawablePriv,
+						  driContext->driDrawablePriv->loaderPrivate);
+
+	 /* We set the dirty bit in intel_prepare_render() if we're
+	  * front buffer rendering once we get there.
+	  */
+	 intel->front_buffer_dirty = GL_FALSE;
+      }
+   }
+}
+
+static unsigned
+intel_bits_per_pixel(const struct intel_renderbuffer *rb)
+{
+   return _mesa_get_format_bytes(rb->Base.Format) * 8;
+}
+
+void
+intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
+{
+   struct gl_framebuffer *fb = drawable->driverPrivate;
+   struct intel_renderbuffer *rb;
+   struct intel_region *region, *depth_region;
+   struct intel_context *intel = context->driverPrivate;
+   struct intel_renderbuffer *front_rb, *back_rb, *depth_rb, *stencil_rb;
+   __DRIbuffer *buffers = NULL;
+   __DRIscreen *screen;
+   int i, count;
+   unsigned int attachments[10];
+   const char *region_name;
+
+   /* If we're rendering to the fake front buffer, make sure all the
+    * pending drawing has landed on the real front buffer.  Otherwise
+    * when we eventually get to DRI2GetBuffersWithFormat the stale
+    * real front buffer contents will get copied to the new fake front
+    * buffer.
+    */
+   if (intel->is_front_buffer_rendering) {
+      intel_flush(&intel->ctx);
+      intel_flush_front(&intel->ctx);
+   }
+
+   /* Set this up front, so that in case our buffers get invalidated
+    * while we're getting new buffers, we don't clobber the stamp and
+    * thus ignore the invalidate. */
+   drawable->lastStamp = drawable->dri2.stamp;
+
+   if (INTEL_DEBUG & DEBUG_DRI)
+      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+
+   screen = intel->intelScreen->driScrnPriv;
+
+   if (screen->dri2.loader
+       && (screen->dri2.loader->base.version > 2)
+       && (screen->dri2.loader->getBuffersWithFormat != NULL)) {
+
+      front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+      back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+      depth_rb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+      stencil_rb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+
+      i = 0;
+      if ((intel->is_front_buffer_rendering ||
+	   intel->is_front_buffer_reading ||
+	   !back_rb) && front_rb) {
+	 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+	 attachments[i++] = intel_bits_per_pixel(front_rb);
+      }
+
+      if (back_rb) {
+	 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+	 attachments[i++] = intel_bits_per_pixel(back_rb);
+      }
+
+      if ((depth_rb != NULL) && (stencil_rb != NULL)) {
+	 attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
+	 attachments[i++] = intel_bits_per_pixel(depth_rb);
+      } else if (depth_rb != NULL) {
+	 attachments[i++] = __DRI_BUFFER_DEPTH;
+	 attachments[i++] = intel_bits_per_pixel(depth_rb);
+      } else if (stencil_rb != NULL) {
+	 attachments[i++] = __DRI_BUFFER_STENCIL;
+	 attachments[i++] = intel_bits_per_pixel(stencil_rb);
+      }
+
+      buffers =
+	 (*screen->dri2.loader->getBuffersWithFormat)(drawable,
+						      &drawable->w,
+						      &drawable->h,
+						      attachments, i / 2,
+						      &count,
+						      drawable->loaderPrivate);
+   } else if (screen->dri2.loader) {
+      i = 0;
+      if (intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT))
+	 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+      if (intel_get_renderbuffer(fb, BUFFER_BACK_LEFT))
+	 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+      if (intel_get_renderbuffer(fb, BUFFER_DEPTH))
+	 attachments[i++] = __DRI_BUFFER_DEPTH;
+      if (intel_get_renderbuffer(fb, BUFFER_STENCIL))
+	 attachments[i++] = __DRI_BUFFER_STENCIL;
+
+      buffers = (*screen->dri2.loader->getBuffers)(drawable,
+						   &drawable->w,
+						   &drawable->h,
+						   attachments, i,
+						   &count,
+						   drawable->loaderPrivate);
+   }
+
+   if (buffers == NULL)
+      return;
+
+   drawable->x = 0;
+   drawable->y = 0;
+   drawable->backX = 0;
+   drawable->backY = 0;
+   drawable->numClipRects = 1;
+   drawable->pClipRects[0].x1 = 0;
+   drawable->pClipRects[0].y1 = 0;
+   drawable->pClipRects[0].x2 = drawable->w;
+   drawable->pClipRects[0].y2 = drawable->h;
+   drawable->numBackClipRects = 1;
+   drawable->pBackClipRects[0].x1 = 0;
+   drawable->pBackClipRects[0].y1 = 0;
+   drawable->pBackClipRects[0].x2 = drawable->w;
+   drawable->pBackClipRects[0].y2 = drawable->h;
+
+   depth_region = NULL;
+   for (i = 0; i < count; i++) {
+       switch (buffers[i].attachment) {
+       case __DRI_BUFFER_FRONT_LEFT:
+	   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+	   region_name = "dri2 front buffer";
+	   break;
+
+       case __DRI_BUFFER_FAKE_FRONT_LEFT:
+	   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+	   region_name = "dri2 fake front buffer";
+	   break;
+
+       case __DRI_BUFFER_BACK_LEFT:
+	   rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
+	   region_name = "dri2 back buffer";
+	   break;
+
+       case __DRI_BUFFER_DEPTH:
+	   rb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+	   region_name = "dri2 depth buffer";
+	   break;
+
+       case __DRI_BUFFER_DEPTH_STENCIL:
+	   rb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+	   region_name = "dri2 depth / stencil buffer";
+	   break;
+
+       case __DRI_BUFFER_STENCIL:
+	   rb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+	   region_name = "dri2 stencil buffer";
+	   break;
+
+       case __DRI_BUFFER_ACCUM:
+       default:
+	   fprintf(stderr,
+		   "unhandled buffer attach event, attacment type %d\n",
+		   buffers[i].attachment);
+	   return;
+       }
+
+       if (rb == NULL)
+	  continue;
+
+       if (rb->region && rb->region->name == buffers[i].name)
+	     continue;
+
+       if (INTEL_DEBUG & DEBUG_DRI)
+	  fprintf(stderr,
+		  "attaching buffer %d, at %d, cpp %d, pitch %d\n",
+		  buffers[i].name, buffers[i].attachment,
+		  buffers[i].cpp, buffers[i].pitch);
+       
+       if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_region) {
+	  if (INTEL_DEBUG & DEBUG_DRI)
+	     fprintf(stderr, "(reusing depth buffer as stencil)\n");
+	  intel_region_reference(&region, depth_region);
+       }
+       else
+          region = intel_region_alloc_for_handle(intel, buffers[i].cpp,
+						 drawable->w,
+						 drawable->h,
+						 buffers[i].pitch / buffers[i].cpp,
+						 buffers[i].name,
+						 region_name);
+
+       if (buffers[i].attachment == __DRI_BUFFER_DEPTH)
+	  depth_region = region;
+
+       intel_renderbuffer_set_region(intel, rb, region);
+       intel_region_release(&region);
+
+       if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) {
+	  rb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+	  if (rb != NULL) {
+	     struct intel_region *stencil_region = NULL;
+
+	     if (rb->region && rb->region->name == buffers[i].name)
+		   continue;
+
+	     intel_region_reference(&stencil_region, region);
+	     intel_renderbuffer_set_region(intel, rb, stencil_region);
+	     intel_region_release(&stencil_region);
+	  }
+       }
+   }
+
+   driUpdateFramebufferSize(&intel->ctx, drawable);
+}
+
+/**
+ * intel_prepare_render should be called anywhere that curent read/drawbuffer
+ * state is required.
+ */
+void
+intel_prepare_render(struct intel_context *intel)
+{
+   __DRIcontext *driContext = intel->driContext;
+   __DRIdrawable *drawable;
+
+   drawable = driContext->driDrawablePriv;
+   if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+	 intel_update_renderbuffers(driContext, drawable);
+      intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+      driContext->dri2.draw_stamp = drawable->dri2.stamp;
+   }
+
+   drawable = driContext->driReadablePriv;
+   if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+      if (drawable->lastStamp != drawable->dri2.stamp)
+	 intel_update_renderbuffers(driContext, drawable);
+      driContext->dri2.read_stamp = drawable->dri2.stamp;
+   }
+
+   /* If we're currently rendering to the front buffer, the rendering
+    * that will happen next will probably dirty the front buffer.  So
+    * mark it as dirty here.
+    */
+   if (intel->is_front_buffer_rendering)
+      intel->front_buffer_dirty = GL_TRUE;
+
+   /* Wait for the swapbuffers before the one we just emitted, so we
+    * don't get too many swaps outstanding for apps that are GPU-heavy
+    * but not CPU-heavy.
+    *
+    * We're using intelDRI2Flush (called from the loader before
+    * swapbuffer) and glFlush (for front buffer rendering) as the
+    * indicator that a frame is done and then throttle when we get
+    * here as we prepare to render the next frame.  At this point for
+    * round trips for swap/copy and getting new buffers are done and
+    * we'll spend less time waiting on the GPU.
+    *
+    * Unfortunately, we don't have a handle to the batch containing
+    * the swap, and getting our hands on that doesn't seem worth it,
+    * so we just us the first batch we emitted after the last swap.
+    */
+   if (intel->need_throttle && intel->first_post_swapbuffers_batch) {
+      drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch);
+      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+      intel->first_post_swapbuffers_batch = NULL;
+      intel->need_throttle = GL_FALSE;
+   }
+}
+
+static void
+intel_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+    struct intel_context *intel = intel_context(ctx);
+    __DRIcontext *driContext = intel->driContext;
+
+    if (intel->saved_viewport)
+	intel->saved_viewport(ctx, x, y, w, h);
+
+    if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+       dri2InvalidateDrawable(driContext->driDrawablePriv);
+       dri2InvalidateDrawable(driContext->driReadablePriv);
+    }
+}
+
+static const struct dri_debug_control debug_control[] = {
+   { "tex",   DEBUG_TEXTURE},
+   { "state", DEBUG_STATE},
+   { "ioctl", DEBUG_IOCTL},
+   { "blit",  DEBUG_BLIT},
+   { "mip",   DEBUG_MIPTREE},
+   { "fall",  DEBUG_FALLBACKS},
+   { "verb",  DEBUG_VERBOSE},
+   { "bat",   DEBUG_BATCH},
+   { "pix",   DEBUG_PIXEL},
+   { "buf",   DEBUG_BUFMGR},
+   { "reg",   DEBUG_REGION},
+   { "fbo",   DEBUG_FBO},
+   { "gs",    DEBUG_GS},
+   { "sync",  DEBUG_SYNC},
+   { "prim",  DEBUG_PRIMS },
+   { "vert",  DEBUG_VERTS },
+   { "dri",   DEBUG_DRI },
+   { "sf",    DEBUG_SF },
+   { "san",   DEBUG_SANITY },
+   { "sleep", DEBUG_SLEEP },
+   { "stats", DEBUG_STATS },
+   { "tile",  DEBUG_TILE },
+   { "sing",  DEBUG_SINGLE_THREAD },
+   { "thre",  DEBUG_SINGLE_THREAD },
+   { "wm",    DEBUG_WM },
+   { "glsl_force", DEBUG_GLSL_FORCE },
+   { "urb",   DEBUG_URB },
+   { "vs",    DEBUG_VS },
+   { "clip",  DEBUG_CLIP },
+   { NULL,    0 }
+};
+
+
+static void
+intelInvalidateState(GLcontext * ctx, GLuint new_state)
+{
+    struct intel_context *intel = intel_context(ctx);
+
+   _swrast_InvalidateState(ctx, new_state);
+   _swsetup_InvalidateState(ctx, new_state);
+   _vbo_InvalidateState(ctx, new_state);
+   _tnl_InvalidateState(ctx, new_state);
+   _tnl_invalidate_vertex_state(ctx, new_state);
+
+   intel->NewGLState |= new_state;
+
+   if (intel->vtbl.invalidate_state)
+      intel->vtbl.invalidate_state( intel, new_state );
+}
+
+void
+intel_flush(GLcontext *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   if (intel->Fallback)
+      _swrast_flush(ctx);
+
+   if (intel->gen < 4)
+      INTEL_FIREVERTICES(intel);
+
+   if (intel->batch->map != intel->batch->ptr)
+      intel_batchbuffer_flush(intel->batch);
+}
+
+static void
+intel_glFlush(GLcontext *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+   intel->need_throttle = GL_TRUE;
+}
+
+void
+intelFinish(GLcontext * ctx)
+{
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   int i;
+
+   intel_flush(ctx);
+   intel_flush_front(ctx);
+
+   for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+       struct intel_renderbuffer *irb;
+
+       irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+       if (irb && irb->region)
+	  drm_intel_bo_wait_rendering(irb->region->buffer);
+   }
+   if (fb->_DepthBuffer) {
+      /* XXX: Wait on buffer idle */
+   }
+}
+
+void
+intelInitDriverFunctions(struct dd_function_table *functions)
+{
+   _mesa_init_driver_functions(functions);
+
+   functions->Flush = intel_glFlush;
+   functions->Finish = intelFinish;
+   functions->GetString = intelGetString;
+   functions->UpdateState = intelInvalidateState;
+
+   intelInitTextureFuncs(functions);
+   intelInitTextureImageFuncs(functions);
+   intelInitTextureSubImageFuncs(functions);
+   intelInitTextureCopyImageFuncs(functions);
+   intelInitStateFuncs(functions);
+   intelInitClearFuncs(functions);
+   intelInitBufferFuncs(functions);
+   intelInitPixelFuncs(functions);
+   intelInitBufferObjectFuncs(functions);
+   intel_init_syncobj_functions(functions);
+}
+
+
+GLboolean
+intelInitContext(struct intel_context *intel,
+		 int api,
+                 const __GLcontextModes * mesaVis,
+                 __DRIcontext * driContextPriv,
+                 void *sharedContextPrivate,
+                 struct dd_function_table *functions)
+{
+   GLcontext *ctx = &intel->ctx;
+   GLcontext *shareCtx = (GLcontext *) sharedContextPrivate;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->private;
+   int bo_reuse_mode;
+
+   /* we can't do anything without a connection to the device */
+   if (intelScreen->bufmgr == NULL)
+      return GL_FALSE;
+
+   /* Can't rely on invalidate events, fall back to glViewport hack */
+   if (!driContextPriv->driScreenPriv->dri2.useInvalidate) {
+      intel->saved_viewport = functions->Viewport;
+      functions->Viewport = intel_viewport;
+   }
+
+   if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx,
+					 functions, (void *) intel)) {
+      printf("%s: failed to init mesa context\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   driContextPriv->driverPrivate = intel;
+   intel->intelScreen = intelScreen;
+   intel->driContext = driContextPriv;
+   intel->driFd = sPriv->fd;
+
+   intel->has_xrgb_textures = GL_TRUE;
+   if (IS_GEN6(intel->intelScreen->deviceID)) {
+      intel->gen = 6;
+      intel->needs_ff_sync = GL_TRUE;
+      intel->has_luminance_srgb = GL_TRUE;
+   } else if (IS_GEN5(intel->intelScreen->deviceID)) {
+      intel->gen = 5;
+      intel->needs_ff_sync = GL_TRUE;
+      intel->has_luminance_srgb = GL_TRUE;
+   } else if (IS_965(intel->intelScreen->deviceID)) {
+      intel->gen = 4;
+      if (IS_G4X(intel->intelScreen->deviceID)) {
+	  intel->has_luminance_srgb = GL_TRUE;
+	  intel->is_g4x = GL_TRUE;
+      }
+   } else if (IS_9XX(intel->intelScreen->deviceID)) {
+      intel->gen = 3;
+      if (IS_945(intel->intelScreen->deviceID)) {
+	 intel->is_945 = GL_TRUE;
+      }
+   } else {
+      intel->gen = 2;
+      if (intel->intelScreen->deviceID == PCI_CHIP_I830_M ||
+	  intel->intelScreen->deviceID == PCI_CHIP_845_G) {
+	 intel->has_xrgb_textures = GL_FALSE;
+      }
+   }
+
+   driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache,
+                       sPriv->myNum, (intel->gen >= 4) ? "i965" : "i915");
+   if (intelScreen->deviceID == PCI_CHIP_I865_G)
+      intel->maxBatchSize = 4096;
+   else
+      intel->maxBatchSize = BATCH_SZ;
+
+   intel->bufmgr = intelScreen->bufmgr;
+
+   bo_reuse_mode = driQueryOptioni(&intel->optionCache, "bo_reuse");
+   switch (bo_reuse_mode) {
+   case DRI_CONF_BO_REUSE_DISABLED:
+      break;
+   case DRI_CONF_BO_REUSE_ALL:
+      intel_bufmgr_gem_enable_reuse(intel->bufmgr);
+      break;
+   }
+
+   /* This doesn't yet catch all non-conformant rendering, but it's a
+    * start.
+    */
+   if (getenv("INTEL_STRICT_CONFORMANCE")) {
+      unsigned int value = atoi(getenv("INTEL_STRICT_CONFORMANCE"));
+      if (value > 0) {
+         intel->conformance_mode = value;
+      }
+      else {
+         intel->conformance_mode = 1;
+      }
+   }
+
+   if (intel->conformance_mode > 0) {
+      ctx->Const.MinLineWidth = 1.0;
+      ctx->Const.MinLineWidthAA = 1.0;
+      ctx->Const.MaxLineWidth = 1.0;
+      ctx->Const.MaxLineWidthAA = 1.0;
+      ctx->Const.LineWidthGranularity = 1.0;
+   }
+   else {
+      ctx->Const.MinLineWidth = 1.0;
+      ctx->Const.MinLineWidthAA = 1.0;
+      ctx->Const.MaxLineWidth = 5.0;
+      ctx->Const.MaxLineWidthAA = 5.0;
+      ctx->Const.LineWidthGranularity = 0.5;
+   }
+
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 255.0;
+   ctx->Const.MaxPointSizeAA = 3.0;
+   ctx->Const.PointSizeGranularity = 1.0;
+
+   /* reinitialize the context point state.
+    * It depend on constants in __GLcontextRec::Const
+    */
+   _mesa_init_point(ctx);
+
+   meta_init_metaops(ctx, &intel->meta);
+   if (intel->gen >= 4) {
+      if (MAX_WIDTH > 8192)
+	 ctx->Const.MaxRenderbufferSize = 8192;
+   } else {
+      if (MAX_WIDTH > 2048)
+	 ctx->Const.MaxRenderbufferSize = 2048;
+   }
+
+   /* Initialize the software rasterizer and helper modules. */
+   _swrast_CreateContext(ctx);
+   _vbo_CreateContext(ctx);
+   _tnl_CreateContext(ctx);
+   _swsetup_CreateContext(ctx);
+ 
+   /* Configure swrast to match hardware characteristics: */
+   _swrast_allow_pixel_fog(ctx, GL_FALSE);
+   _swrast_allow_vertex_fog(ctx, GL_TRUE);
+
+   _mesa_meta_init(ctx);
+
+   intel->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
+   intel->hw_stipple = 1;
+
+   /* XXX FBO: this doesn't seem to be used anywhere */
+   switch (mesaVis->depthBits) {
+   case 0:                     /* what to do in this case? */
+   case 16:
+      intel->polygon_offset_scale = 1.0;
+      break;
+   case 24:
+      intel->polygon_offset_scale = 2.0;     /* req'd to pass glean */
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   if (intel->gen >= 4)
+      intel->polygon_offset_scale /= 0xffff;
+
+   intel->RenderIndex = ~0;
+
+   switch (ctx->API) {
+   case API_OPENGL:
+      intelInitExtensions(ctx);
+      break;
+   case API_OPENGLES:
+      break;
+   case API_OPENGLES2:
+      intelInitExtensionsES2(ctx);
+      break;
+   }
+
+   INTEL_DEBUG = driParseDebugString(getenv("INTEL_DEBUG"), debug_control);
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE);
+
+   intel->batch = intel_batchbuffer_alloc(intel);
+
+   intel_fbo_init(intel);
+
+   if (intel->ctx.Mesa_DXTn) {
+      _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+      _mesa_enable_extension(ctx, "GL_S3_s3tc");
+   }
+   else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) {
+      _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+   }
+   intel->use_texture_tiling = driQueryOptionb(&intel->optionCache,
+					       "texture_tiling");
+   intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z");
+
+   intel->prim.primitive = ~0;
+
+   /* Force all software fallbacks */
+   if (driQueryOptionb(&intel->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D rasterization\n");
+      intel->no_rast = 1;
+   }
+
+   if (driQueryOptionb(&intel->optionCache, "always_flush_batch")) {
+      fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
+      intel->always_flush_batch = 1;
+   }
+
+   if (driQueryOptionb(&intel->optionCache, "always_flush_cache")) {
+      fprintf(stderr, "flushing GPU caches before/after each draw call\n");
+      intel->always_flush_cache = 1;
+   }
+
+   /* Disable all hardware rendering (skip emitting batches and fences/waits
+    * to the kernel)
+    */
+   intel->no_hw = getenv("INTEL_NO_HW") != NULL;
+
+   return GL_TRUE;
+}
+
+void
+intelDestroyContext(__DRIcontext * driContextPriv)
+{
+   struct intel_context *intel =
+      (struct intel_context *) driContextPriv->driverPrivate;
+
+   assert(intel);               /* should never be null */
+   if (intel) {
+      INTEL_FIREVERTICES(intel);
+
+      _mesa_meta_free(&intel->ctx);
+
+      meta_destroy_metaops(&intel->meta);
+
+      intel->vtbl.destroy(intel);
+
+      _swsetup_DestroyContext(&intel->ctx);
+      _tnl_DestroyContext(&intel->ctx);
+      _vbo_DestroyContext(&intel->ctx);
+
+      _swrast_DestroyContext(&intel->ctx);
+      intel->Fallback = 0x0;      /* don't call _swrast_Flush later */
+
+      intel_batchbuffer_free(intel->batch);
+      intel->batch = NULL;
+
+      free(intel->prim.vb);
+      intel->prim.vb = NULL;
+      drm_intel_bo_unreference(intel->prim.vb_bo);
+      intel->prim.vb_bo = NULL;
+      drm_intel_bo_unreference(intel->first_post_swapbuffers_batch);
+      intel->first_post_swapbuffers_batch = NULL;
+
+      driDestroyOptionCache(&intel->optionCache);
+
+      /* free the Mesa context */
+      _mesa_free_context_data(&intel->ctx);
+
+      FREE(intel);
+      driContextPriv->driverPrivate = NULL;
+   }
+}
+
+GLboolean
+intelUnbindContext(__DRIcontext * driContextPriv)
+{
+   return GL_TRUE;
+}
+
+GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv)
+{
+   struct intel_context *intel;
+   GET_CURRENT_CONTEXT(curCtx);
+
+   if (driContextPriv)
+      intel = (struct intel_context *) driContextPriv->driverPrivate;
+   else
+      intel = NULL;
+
+   /* According to the glXMakeCurrent() man page: "Pending commands to
+    * the previous context, if any, are flushed before it is released."
+    * But only flush if we're actually changing contexts.
+    */
+   if (intel_context(curCtx) && intel_context(curCtx) != intel) {
+      _mesa_flush(curCtx);
+   }
+
+   if (driContextPriv) {
+      struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
+      struct gl_framebuffer *readFb = driReadPriv->driverPrivate;
+
+      driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+      driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+      intel_prepare_render(intel);
+      _mesa_make_current(&intel->ctx, fb, readFb);
+
+      /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer
+       * is NULL at that point.  We can't call _mesa_makecurrent()
+       * first, since we need the buffer size for the initial
+       * viewport.  So just call intel_draw_buffer() again here. */
+      intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
new file mode 100644
index 0000000000..c7ac2de01e
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -0,0 +1,478 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELCONTEXT_INC
+#define INTELCONTEXT_INC
+
+
+
+#include "main/mtypes.h"
+#include "main/mm.h"
+#include "texmem.h"
+#include "dri_metaops.h"
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+#include "intel_screen.h"
+#include "intel_tex_obj.h"
+#include "i915_drm.h"
+#include "tnl/t_vertex.h"
+
+#define TAG(x) intel##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define DV_PF_555  (1<<8)
+#define DV_PF_565  (2<<8)
+#define DV_PF_8888 (3<<8)
+#define DV_PF_4444 (8<<8)
+#define DV_PF_1555 (9<<8)
+
+struct intel_region;
+struct intel_context;
+
+typedef void (*intel_tri_func) (struct intel_context *, intelVertex *,
+                                intelVertex *, intelVertex *);
+typedef void (*intel_line_func) (struct intel_context *, intelVertex *,
+                                 intelVertex *);
+typedef void (*intel_point_func) (struct intel_context *, intelVertex *);
+
+/**
+ * Bits for intel->Fallback field
+ */
+/*@{*/
+#define INTEL_FALLBACK_DRAW_BUFFER	 0x1
+#define INTEL_FALLBACK_READ_BUFFER	 0x2
+#define INTEL_FALLBACK_DEPTH_BUFFER      0x4
+#define INTEL_FALLBACK_STENCIL_BUFFER    0x8
+#define INTEL_FALLBACK_USER		 0x10
+#define INTEL_FALLBACK_RENDERMODE	 0x20
+#define INTEL_FALLBACK_TEXTURE   	 0x40
+#define INTEL_FALLBACK_DRIVER            0x1000  /**< first for drivers */
+/*@}*/
+
+extern void intelFallback(struct intel_context *intel, GLbitfield bit,
+                          GLboolean mode);
+#define FALLBACK( intel, bit, mode ) intelFallback( intel, bit, mode )
+
+
+#define INTEL_WRITE_PART  0x1
+#define INTEL_WRITE_FULL  0x2
+#define INTEL_READ        0x4
+
+#define INTEL_MAX_FIXUP 64
+
+struct intel_sync_object {
+   struct gl_sync_object Base;
+
+   /** Batch associated with this sync object */
+   drm_intel_bo *bo;
+};
+
+/**
+ * intel_context is derived from Mesa's context class: GLcontext.
+ */
+struct intel_context
+{
+   GLcontext ctx;  /**< base class, must be first field */
+
+   struct
+   {
+      void (*destroy) (struct intel_context * intel);
+      void (*emit_state) (struct intel_context * intel);
+      void (*finish_batch) (struct intel_context * intel);
+      void (*new_batch) (struct intel_context * intel);
+      void (*emit_invarient_state) (struct intel_context * intel);
+      void (*update_texture_state) (struct intel_context * intel);
+
+      void (*render_start) (struct intel_context * intel);
+      void (*render_prevalidate) (struct intel_context * intel);
+      void (*set_draw_region) (struct intel_context * intel,
+                               struct intel_region * draw_regions[],
+                               struct intel_region * depth_region,
+			       GLuint num_regions);
+
+      void (*reduced_primitive_state) (struct intel_context * intel,
+                                       GLenum rprim);
+
+      GLboolean (*check_vertex_size) (struct intel_context * intel,
+				      GLuint expected);
+      void (*invalidate_state) (struct intel_context *intel,
+				GLuint new_state);
+
+      void (*assert_not_dirty) (struct intel_context *intel);
+
+      void (*debug_batch)(struct intel_context *intel);
+   } vtbl;
+
+   struct dri_metaops meta;
+
+   GLbitfield Fallback;  /**< mask of INTEL_FALLBACK_x bits */
+   GLuint NewGLState;
+
+   dri_bufmgr *bufmgr;
+   unsigned int maxBatchSize;
+
+   /**
+    * Generation number of the hardware: 2 is 8xx, 3 is 9xx pre-965, 4 is 965.
+    */
+   int gen;
+   GLboolean needs_ff_sync;
+   GLboolean is_g4x;
+   GLboolean is_945;
+   GLboolean has_luminance_srgb;
+   GLboolean has_xrgb_textures;
+
+   int urb_size;
+
+   struct intel_batchbuffer *batch;
+   drm_intel_bo *first_post_swapbuffers_batch;
+   GLboolean need_throttle;
+   GLboolean no_batch_wrap;
+
+   struct
+   {
+      GLuint id;
+      uint32_t primitive;	/**< Current hardware primitive type */
+      void (*flush) (struct intel_context *);
+      GLubyte *start_ptr; /**< for i8xx */
+      drm_intel_bo *vb_bo;
+      uint8_t *vb;
+      unsigned int start_offset; /**< Byte offset of primitive sequence */
+      unsigned int current_offset; /**< Byte offset of next vertex */
+      unsigned int count;	/**< Number of vertices in current primitive */
+   } prim;
+
+   GLuint stats_wm;
+   GLboolean locked;
+   char *prevLockFile;
+   int prevLockLine;
+
+   /* Offsets of fields within the current vertex:
+    */
+   GLuint coloroffset;
+   GLuint specoffset;
+   GLuint wpos_offset;
+   GLuint wpos_size;
+
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+
+   GLfloat polygon_offset_scale;        /* dependent on depth_scale, bpp */
+
+   GLboolean hw_stencil;
+   GLboolean hw_stipple;
+   GLboolean depth_buffer_is_float;
+   GLboolean no_rast;
+   GLboolean no_hw;
+   GLboolean always_flush_batch;
+   GLboolean always_flush_cache;
+
+   /* 0 - nonconformant, best performance;
+    * 1 - fallback to sw for known conformance bugs
+    * 2 - always fallback to sw
+    */
+   GLuint conformance_mode;
+
+   /* State for intelvb.c and inteltris.c.
+    */
+   GLuint RenderIndex;
+   GLmatrix ViewportMatrix;
+   GLenum render_primitive;
+   GLenum reduced_primitive;
+   GLuint vertex_size;
+   GLubyte *verts;              /* points to tnl->clipspace.vertex_buf */
+
+   /* Fallback rasterization functions 
+    */
+   intel_point_func draw_point;
+   intel_line_func draw_line;
+   intel_tri_func draw_tri;
+
+   /**
+    * Set if rendering has occured to the drawable's front buffer.
+    *
+    * This is used in the DRI2 case to detect that glFlush should also copy
+    * the contents of the fake front buffer to the real front buffer.
+    */
+   GLboolean front_buffer_dirty;
+
+   /**
+    * Track whether front-buffer rendering is currently enabled
+    *
+    * A separate flag is used to track this in order to support MRT more
+    * easily.
+    */
+   GLboolean is_front_buffer_rendering;
+   /**
+    * Track whether front-buffer is the current read target.
+    *
+    * This is closely associated with is_front_buffer_rendering, but may
+    * be set separately.  The DRI2 fake front buffer must be referenced
+    * either way.
+    */
+   GLboolean is_front_buffer_reading;
+
+   GLboolean use_texture_tiling;
+   GLboolean use_early_z;
+
+   int driFd;
+
+   __DRIcontext *driContext;
+   struct intel_screen *intelScreen;
+   void (*saved_viewport)(GLcontext * ctx,
+			  GLint x, GLint y, GLsizei width, GLsizei height);
+
+   /**
+    * Configuration cache
+    */
+   driOptionCache optionCache;
+};
+
+extern char *__progname;
+
+
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#define ALIGN(value, alignment)  ((value + alignment - 1) & ~(alignment - 1))
+#define ROUND_DOWN_TO(value, alignment) (ALIGN(value - alignment - 1, \
+					       alignment))
+#define IS_POWER_OF_TWO(val) (((val) & (val - 1)) == 0)
+
+static INLINE uint32_t
+U_FIXED(float value, uint32_t frac_bits)
+{
+   value *= (1 << frac_bits);
+   return value < 0 ? 0 : value;
+}
+
+static INLINE uint32_t
+S_FIXED(float value, uint32_t frac_bits)
+{
+   return value * (1 << frac_bits);
+}
+
+#define INTEL_FIREVERTICES(intel)		\
+do {						\
+   if ((intel)->prim.flush)			\
+      (intel)->prim.flush(intel);		\
+} while (0)
+
+/* ================================================================
+ * From linux kernel i386 header files, copes with odd sizes better
+ * than COPY_DWORDS would:
+ * XXX Put this in src/mesa/main/imports.h ???
+ */
+#if defined(i386) || defined(__i386__)
+static INLINE void * __memcpy(void * to, const void * from, size_t n)
+{
+   int d0, d1, d2;
+   __asm__ __volatile__(
+      "rep ; movsl\n\t"
+      "testb $2,%b4\n\t"
+      "je 1f\n\t"
+      "movsw\n"
+      "1:\ttestb $1,%b4\n\t"
+      "je 2f\n\t"
+      "movsb\n"
+      "2:"
+      : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+      :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
+      : "memory");
+   return (to);
+}
+#else
+#define __memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
+
+/* ================================================================
+ * Debugging:
+ */
+extern int INTEL_DEBUG;
+
+#define DEBUG_TEXTURE	0x1
+#define DEBUG_STATE	0x2
+#define DEBUG_IOCTL	0x4
+#define DEBUG_BLIT	0x8
+#define DEBUG_MIPTREE   0x10
+#define DEBUG_FALLBACKS	0x20
+#define DEBUG_VERBOSE	0x40
+#define DEBUG_BATCH     0x80
+#define DEBUG_PIXEL     0x100
+#define DEBUG_BUFMGR    0x200
+#define DEBUG_REGION    0x400
+#define DEBUG_FBO       0x800
+#define DEBUG_GS        0x1000
+#define DEBUG_SYNC	0x2000
+#define DEBUG_PRIMS	0x4000
+#define DEBUG_VERTS	0x8000
+#define DEBUG_DRI       0x10000
+#define DEBUG_SF        0x20000
+#define DEBUG_SANITY    0x40000
+#define DEBUG_SLEEP     0x80000
+#define DEBUG_STATS     0x100000
+#define DEBUG_TILE      0x200000
+#define DEBUG_SINGLE_THREAD   0x400000
+#define DEBUG_WM        0x800000
+#define DEBUG_URB       0x1000000
+#define DEBUG_VS        0x2000000
+#define DEBUG_GLSL_FORCE 0x4000000
+#define DEBUG_CLIP      0x8000000
+
+#define DBG(...) do {						\
+	if (INTEL_DEBUG & FILE_DEBUG_FLAG)			\
+		printf(__VA_ARGS__);			\
+} while(0)
+
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I830_M			0x3577
+#define PCI_CHIP_I855_GM		0x3582
+#define PCI_CHIP_I865_G			0x2572
+#define PCI_CHIP_I915_G			0x2582
+#define PCI_CHIP_I915_GM		0x2592
+#define PCI_CHIP_I945_G			0x2772
+#define PCI_CHIP_I945_GM		0x27A2
+#define PCI_CHIP_I945_GME		0x27AE
+#define PCI_CHIP_G33_G			0x29C2
+#define PCI_CHIP_Q35_G			0x29B2
+#define PCI_CHIP_Q33_G			0x29D2
+
+
+/* ================================================================
+ * intel_context.c:
+ */
+
+extern GLboolean intelInitContext(struct intel_context *intel,
+				  int api,
+                                  const __GLcontextModes * mesaVis,
+                                  __DRIcontext * driContextPriv,
+                                  void *sharedContextPrivate,
+                                  struct dd_function_table *functions);
+
+extern void intelFinish(GLcontext * ctx);
+extern void intel_flush(GLcontext * ctx);
+
+extern void intelInitDriverFunctions(struct dd_function_table *functions);
+
+void intel_init_syncobj_functions(struct dd_function_table *functions);
+
+
+/* ================================================================
+ * intel_state.c:
+ */
+extern void intelInitStateFuncs(struct dd_function_table *functions);
+
+#define COMPAREFUNC_ALWAYS		0
+#define COMPAREFUNC_NEVER		0x1
+#define COMPAREFUNC_LESS		0x2
+#define COMPAREFUNC_EQUAL		0x3
+#define COMPAREFUNC_LEQUAL		0x4
+#define COMPAREFUNC_GREATER		0x5
+#define COMPAREFUNC_NOTEQUAL		0x6
+#define COMPAREFUNC_GEQUAL		0x7
+
+#define STENCILOP_KEEP			0
+#define STENCILOP_ZERO			0x1
+#define STENCILOP_REPLACE		0x2
+#define STENCILOP_INCRSAT		0x3
+#define STENCILOP_DECRSAT		0x4
+#define STENCILOP_INCR			0x5
+#define STENCILOP_DECR			0x6
+#define STENCILOP_INVERT		0x7
+
+#define LOGICOP_CLEAR			0
+#define LOGICOP_NOR			0x1
+#define LOGICOP_AND_INV 		0x2
+#define LOGICOP_COPY_INV		0x3
+#define LOGICOP_AND_RVRSE		0x4
+#define LOGICOP_INV			0x5
+#define LOGICOP_XOR			0x6
+#define LOGICOP_NAND			0x7
+#define LOGICOP_AND			0x8
+#define LOGICOP_EQUIV			0x9
+#define LOGICOP_NOOP			0xa
+#define LOGICOP_OR_INV			0xb
+#define LOGICOP_COPY			0xc
+#define LOGICOP_OR_RVRSE		0xd
+#define LOGICOP_OR			0xe
+#define LOGICOP_SET			0xf
+
+#define BLENDFACT_ZERO			0x01
+#define BLENDFACT_ONE			0x02
+#define BLENDFACT_SRC_COLR		0x03
+#define BLENDFACT_INV_SRC_COLR 		0x04
+#define BLENDFACT_SRC_ALPHA		0x05
+#define BLENDFACT_INV_SRC_ALPHA 	0x06
+#define BLENDFACT_DST_ALPHA		0x07
+#define BLENDFACT_INV_DST_ALPHA 	0x08
+#define BLENDFACT_DST_COLR		0x09
+#define BLENDFACT_INV_DST_COLR		0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE	0x0b
+#define BLENDFACT_CONST_COLOR		0x0c
+#define BLENDFACT_INV_CONST_COLOR	0x0d
+#define BLENDFACT_CONST_ALPHA		0x0e
+#define BLENDFACT_INV_CONST_ALPHA	0x0f
+#define BLENDFACT_MASK          	0x0f
+
+enum {
+   DRI_CONF_BO_REUSE_DISABLED,
+   DRI_CONF_BO_REUSE_ALL
+};
+
+extern int intel_translate_shadow_compare_func(GLenum func);
+extern int intel_translate_compare_func(GLenum func);
+extern int intel_translate_stencil_op(GLenum op);
+extern int intel_translate_blend_factor(GLenum factor);
+extern int intel_translate_logic_op(GLenum opcode);
+
+void intel_update_renderbuffers(__DRIcontext *context,
+				__DRIdrawable *drawable);
+void intel_prepare_render(struct intel_context *intel);
+
+void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region,
+				  uint32_t buffer_id);
+
+/*======================================================================
+ * Inline conversion functions.  
+ * These are better-typed than the macros used previously:
+ */
+static INLINE struct intel_context *
+intel_context(GLcontext * ctx)
+{
+   return (struct intel_context *) ctx;
+}
+
+static INLINE GLboolean
+is_power_of_two(uint32_t value)
+{
+   return (value & (value - 1)) == 0;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c
new file mode 100644
index 0000000000..650010ac9c
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_decode.c
@@ -0,0 +1,1822 @@
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_decode.c
+ * This file contains code to print out batchbuffer contents in a
+ * human-readable format.
+ *
+ * The current version only supports i915 packets, and only pretty-prints a
+ * subset of them.  The intention is for it to make just a best attempt to
+ * decode, but never crash in the process.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "intel_decode.h"
+#include "intel_chipset.h"
+
+#define BUFFER_FAIL(_count, _len, _name) do {			\
+    fprintf(out, "Buffer size too small in %s (%d < %d)\n",	\
+	    (_name), (_count), (_len));				\
+    (*failures)++;						\
+    return count;						\
+} while (0)
+
+static FILE *out;
+static uint32_t saved_s2 = 0, saved_s4 = 0;
+static char saved_s2_set = 0, saved_s4_set = 0;
+
+static float
+int_as_float(uint32_t intval)
+{
+    union intfloat {
+	uint32_t i;
+	float f;
+    } uval;
+
+    uval.i = intval;
+    return uval.f;
+}
+
+static void
+instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
+	  char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index],
+	    index == 0 ? "" : "  ");
+    va_start(va, fmt);
+    vfprintf(out, fmt, va);
+    va_end(va);
+}
+
+
+static int
+decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int len_mask;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_mi[] = {
+	{ 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
+	{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+	{ 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
+	{ 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
+	{ 0x04, 0, 1, 1, "MI_FLUSH" },
+	{ 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+	{ 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
+	{ 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
+	{ 0x00, 0, 1, 1, "MI_NOOP" },
+	{ 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" },
+	{ 0x07, 0, 1, 1, "MI_REPORT_HEAD" },
+	{ 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" },
+	{ 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" },
+	{ 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" },
+	{ 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" },
+	{ 0x02, 0, 1, 1, "MI_USER_INTERRUPT" },
+	{ 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
+    };
+
+
+    for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name);
+	    if (opcodes_mi[opcode].max_len > 1) {
+		len = (data[0] & opcodes_mi[opcode].len_mask) + 2;
+		if (len < opcodes_mi[opcode].min_len ||
+		    len > opcodes_mi[opcode].max_len)
+		{
+		    fprintf(out, "Bad length (%d) in %s, [%d, %d]\n",
+			    len, opcodes_mi[opcode].name,
+			    opcodes_mi[opcode].min_len,
+			    opcodes_mi[opcode].max_len);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_mi[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "MI UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode, len;
+    char *format = NULL;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_2d[] = {
+	{ 0x40, 5, 5, "COLOR_BLT" },
+	{ 0x43, 6, 6, "SRC_COPY_BLT" },
+	{ 0x01, 8, 8, "XY_SETUP_BLT" },
+	{ 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" },
+	{ 0x03, 3, 3, "XY_SETUP_CLIP_BLT" },
+	{ 0x24, 2, 2, "XY_PIXEL_BLT" },
+	{ 0x25, 3, 3, "XY_SCANLINES_BLT" },
+	{ 0x26, 4, 4, "Y_TEXT_BLT" },
+	{ 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" },
+	{ 0x50, 6, 6, "XY_COLOR_BLT" },
+	{ 0x51, 6, 6, "XY_PAT_BLT" },
+	{ 0x76, 8, 8, "XY_PAT_CHROMA_BLT" },
+	{ 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" },
+	{ 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" },
+	{ 0x52, 9, 9, "XY_MONO_PAT_BLT" },
+	{ 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" },
+	{ 0x53, 8, 8, "XY_SRC_COPY_BLT" },
+	{ 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" },
+	{ 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" },
+	{ 0x55, 9, 9, "XY_FULL_BLT" },
+	{ 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" },
+	{ 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" },
+	{ 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" },
+	{ 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" },
+	{ 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" },
+    };
+
+    switch ((data[0] & 0x1fc00000) >> 22) {
+    case 0x50:
+	instr_out(data, hw_offset, 0,
+		  "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
+		  (data[0] & (1 << 20)) ? "en" : "dis",
+		  (data[0] & (1 << 21)) ? "en" : "dis",
+		  (data[0] >> 11) & 1);
+
+	len = (data[0] & 0x000000ff) + 2;
+	if (len != 6)
+	    fprintf(out, "Bad count in XY_COLOR_BLT\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "XY_COLOR_BLT");
+
+	switch ((data[1] >> 24) & 0x3) {
+	case 0:
+	    format="8";
+	    break;
+	case 1:
+	    format="565";
+	    break;
+	case 2:
+	    format="1555";
+	    break;
+	case 3:
+	    format="8888";
+	    break;
+	}
+
+	instr_out(data, hw_offset, 1, "format %s, pitch %d, "
+		  "clipping %sabled\n", format,
+		  (short)(data[1] & 0xffff),
+		  data[1] & (1 << 30) ? "en" : "dis");
+	instr_out(data, hw_offset, 2, "(%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	instr_out(data, hw_offset, 3, "(%d,%d)\n",
+		  data[3] & 0xffff, data[3] >> 16);
+	instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]);
+	instr_out(data, hw_offset, 5, "color\n");
+	return len;
+    case 0x53:
+	instr_out(data, hw_offset, 0,
+		  "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
+		  "src tile %d, dst tile %d)\n",
+		  (data[0] & (1 << 20)) ? "en" : "dis",
+		  (data[0] & (1 << 21)) ? "en" : "dis",
+		  (data[0] >> 15) & 1,
+		  (data[0] >> 11) & 1);
+
+	len = (data[0] & 0x000000ff) + 2;
+	if (len != 8)
+	    fprintf(out, "Bad count in XY_SRC_COPY_BLT\n");
+	if (count < 8)
+	    BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT");
+
+	switch ((data[1] >> 24) & 0x3) {
+	case 0:
+	    format="8";
+	    break;
+	case 1:
+	    format="565";
+	    break;
+	case 2:
+	    format="1555";
+	    break;
+	case 3:
+	    format="8888";
+	    break;
+	}
+
+	instr_out(data, hw_offset, 1, "format %s, dst pitch %d, "
+		  "clipping %sabled\n", format,
+		  (short)(data[1] & 0xffff),
+		  data[1] & (1 << 30) ? "en" : "dis");
+	instr_out(data, hw_offset, 2, "dst (%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	instr_out(data, hw_offset, 3, "dst (%d,%d)\n",
+		  data[3] & 0xffff, data[3] >> 16);
+	instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]);
+	instr_out(data, hw_offset, 5, "src (%d,%d)\n",
+		  data[5] & 0xffff, data[5] >> 16);
+	instr_out(data, hw_offset, 6, "src pitch %d\n",
+		  (short)(data[6] & 0xffff));
+	instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]);
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) {
+	    unsigned int i;
+
+	    len = 1;
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name);
+	    if (opcodes_2d[opcode].max_len > 1) {
+		len = (data[0] & 0x000000ff) + 2;
+		if (len < opcodes_2d[opcode].min_len ||
+		    len > opcodes_2d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_2d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "2D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    switch ((data[0] & 0x00f80000) >> 19) {
+    case 0x11:
+	instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+	return 1;
+    case 0x10:
+	instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
+	return 1;
+    case 0x01:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+	return 1;
+    case 0x0a:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+	return 1;
+    case 0x05:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+	return 1;
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+/** Sets the string dstname to describe the destination of the PS instruction */
+static void
+i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
+{
+    uint32_t a0 = data[i];
+    int dst_nr = (a0 >> 14) & 0xf;
+    char dstmask[8];
+    char *sat;
+
+    if (do_mask) {
+	if (((a0 >> 10) & 0xf) == 0xf) {
+	    dstmask[0] = 0;
+	} else {
+	    int dstmask_index = 0;
+
+	    dstmask[dstmask_index++] = '.';
+	    if (a0 & (1 << 10))
+		dstmask[dstmask_index++] = 'x';
+	    if (a0 & (1 << 11))
+		dstmask[dstmask_index++] = 'y';
+	    if (a0 & (1 << 12))
+		dstmask[dstmask_index++] = 'z';
+	    if (a0 & (1 << 13))
+		dstmask[dstmask_index++] = 'w';
+	    dstmask[dstmask_index++] = 0;
+	}
+
+	if (a0 & (1 << 22))
+	    sat = ".sat";
+	else
+	    sat = "";
+    } else {
+	dstmask[0] = 0;
+	sat = "";
+    }
+
+    switch ((a0 >> 19) & 0x7) {
+    case 0:
+	if (dst_nr > 15)
+	    fprintf(out, "bad destination reg R%d\n", dst_nr);
+	sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
+	break;
+    case 4:
+	if (dst_nr > 0)
+	    fprintf(out, "bad destination reg oC%d\n", dst_nr);
+	sprintf(dstname, "oC%s%s", dstmask, sat);
+	break;
+    case 5:
+	if (dst_nr > 0)
+	    fprintf(out, "bad destination reg oD%d\n", dst_nr);
+	sprintf(dstname, "oD%s%s",  dstmask, sat);
+	break;
+    case 6:
+	if (dst_nr > 2)
+	    fprintf(out, "bad destination reg U%d\n", dst_nr);
+	sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
+	break;
+    default:
+	sprintf(dstname, "RESERVED");
+	break;
+    }
+}
+
+static char *
+i915_get_channel_swizzle(uint32_t select)
+{
+    switch (select & 0x7) {
+    case 0:
+	return (select & 8) ? "-x" : "x";
+    case 1:
+	return (select & 8) ? "-y" : "y";
+    case 2:
+	return (select & 8) ? "-z" : "z";
+    case 3:
+	return (select & 8) ? "-w" : "w";
+    case 4:
+	return (select & 8) ? "-0" : "0";
+    case 5:
+	return (select & 8) ? "-1" : "1";
+    default:
+	return (select & 8) ? "-bad" : "bad";
+    }
+}
+
+static void
+i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
+{
+    switch (src_type) {
+    case 0:
+	sprintf(name, "R%d", src_nr);
+	if (src_nr > 15)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    case 1:
+	if (src_nr < 8)
+	    sprintf(name, "T%d", src_nr);
+	else if (src_nr == 8)
+	    sprintf(name, "DIFFUSE");
+	else if (src_nr == 9)
+	    sprintf(name, "SPECULAR");
+	else if (src_nr == 10)
+	    sprintf(name, "FOG");
+	else {
+	    fprintf(out, "bad src reg T%d\n", src_nr);
+	    sprintf(name, "RESERVED");
+	}
+	break;
+    case 2:
+	sprintf(name, "C%d", src_nr);
+	if (src_nr > 31)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    case 4:
+	sprintf(name, "oC");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oC%d\n", src_nr);
+	break;
+    case 5:
+	sprintf(name, "oD");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oD%d\n", src_nr);
+	break;
+    case 6:
+	sprintf(name, "U%d", src_nr);
+	if (src_nr > 2)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    default:
+	fprintf(out, "bad src reg type %d\n", src_type);
+	sprintf(name, "RESERVED");
+	break;
+    }
+}
+
+static void
+i915_get_instruction_src0(uint32_t *data, int i, char *srcname)
+{
+    uint32_t a0 = data[i];
+    uint32_t a1 = data[i + 1];
+    int src_nr = (a0 >> 2) & 0x1f;
+    char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf);
+    char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf);
+    char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf);
+    char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf);
+    char swizzle[100];
+
+    i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
+    sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+    if (strcmp(swizzle, ".xyzw") != 0)
+	strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src1(uint32_t *data, int i, char *srcname)
+{
+    uint32_t a1 = data[i + 1];
+    uint32_t a2 = data[i + 2];
+    int src_nr = (a1 >> 8) & 0x1f;
+    char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf);
+    char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf);
+    char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf);
+    char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf);
+    char swizzle[100];
+
+    i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
+    sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+    if (strcmp(swizzle, ".xyzw") != 0)
+	strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src2(uint32_t *data, int i, char *srcname)
+{
+    uint32_t a2 = data[i + 2];
+    int src_nr = (a2 >> 16) & 0x1f;
+    char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf);
+    char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf);
+    char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf);
+    char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf);
+    char swizzle[100];
+
+    i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
+    sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+    if (strcmp(swizzle, ".xyzw") != 0)
+	strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
+{
+    switch (src_type) {
+    case 0:
+	sprintf(name, "R%d", src_nr);
+	if (src_nr > 15)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    case 1:
+	if (src_nr < 8)
+	    sprintf(name, "T%d", src_nr);
+	else if (src_nr == 8)
+	    sprintf(name, "DIFFUSE");
+	else if (src_nr == 9)
+	    sprintf(name, "SPECULAR");
+	else if (src_nr == 10)
+	    sprintf(name, "FOG");
+	else {
+	    fprintf(out, "bad src reg T%d\n", src_nr);
+	    sprintf(name, "RESERVED");
+	}
+	break;
+    case 4:
+	sprintf(name, "oC");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oC%d\n", src_nr);
+	break;
+    case 5:
+	sprintf(name, "oD");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oD%d\n", src_nr);
+	break;
+    default:
+	fprintf(out, "bad src reg type %d\n", src_type);
+	sprintf(name, "RESERVED");
+	break;
+    }
+}
+
+static void
+i915_decode_alu1(uint32_t *data, uint32_t hw_offset,
+		 int i, char *instr_prefix, char *op_name)
+{
+    char dst[100], src0[100];
+
+    i915_get_instruction_dst(data, i, dst, 1);
+    i915_get_instruction_src0(data, i, src0);
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, %s\n", instr_prefix,
+	      op_name, dst, src0);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu2(uint32_t *data, uint32_t hw_offset,
+		 int i, char *instr_prefix, char *op_name)
+{
+    char dst[100], src0[100], src1[100];
+
+    i915_get_instruction_dst(data, i, dst, 1);
+    i915_get_instruction_src0(data, i, src0);
+    i915_get_instruction_src1(data, i, src1);
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
+	      op_name, dst, src0, src1);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu3(uint32_t *data, uint32_t hw_offset,
+		 int i, char *instr_prefix, char *op_name)
+{
+    char dst[100], src0[100], src1[100], src2[100];
+
+    i915_get_instruction_dst(data, i, dst, 1);
+    i915_get_instruction_src0(data, i, src0);
+    i915_get_instruction_src1(data, i, src1);
+    i915_get_instruction_src2(data, i, src2);
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
+	      op_name, dst, src0, src1, src2);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
+		char *tex_name)
+{
+    uint32_t t0 = data[i];
+    uint32_t t1 = data[i + 1];
+    char dst_name[100];
+    char addr_name[100];
+    int sampler_nr;
+
+    i915_get_instruction_dst(data, i, dst_name, 0);
+    i915_get_instruction_addr((t1 >> 24) & 0x7,
+			      (t1 >> 17) & 0xf,
+			      addr_name);
+    sampler_nr = t0 & 0xf;
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
+	      tex_name, dst_name, sampler_nr, addr_name);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
+{
+    uint32_t d0 = data[i];
+    char *sampletype;
+    int dcl_nr = (d0 >> 14) & 0xf;
+    char *dcl_x = d0 & (1 << 10) ? "x" : "";
+    char *dcl_y = d0 & (1 << 11) ? "y" : "";
+    char *dcl_z = d0 & (1 << 12) ? "z" : "";
+    char *dcl_w = d0 & (1 << 13) ? "w" : "";
+    char dcl_mask[10];
+
+    switch ((d0 >> 19) & 0x3) {
+    case 1:
+	sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
+	if (strcmp(dcl_mask, ".") == 0)
+	    fprintf(out, "bad (empty) dcl mask\n");
+
+	if (dcl_nr > 10)
+	    fprintf(out, "bad T%d dcl register number\n", dcl_nr);
+	if (dcl_nr < 8) {
+	    if (strcmp(dcl_mask, ".x") != 0 &&
+		strcmp(dcl_mask, ".xy") != 0 &&
+		strcmp(dcl_mask, ".xz") != 0 &&
+		strcmp(dcl_mask, ".w") != 0 &&
+		strcmp(dcl_mask, ".xyzw") != 0) {
+		fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, dcl_mask);
+	    }
+	    instr_out(data, hw_offset, i++, "%s: DCL T%d%s\n", instr_prefix,
+		      dcl_nr, dcl_mask);
+	} else {
+	    if (strcmp(dcl_mask, ".xz") == 0)
+		fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+	    else if (strcmp(dcl_mask, ".xw") == 0)
+		fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+	    else if (strcmp(dcl_mask, ".xzw") == 0)
+		fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+
+	    if (dcl_nr == 8) {
+		instr_out(data, hw_offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
+			  dcl_mask);
+	    } else if (dcl_nr == 9) {
+		instr_out(data, hw_offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
+			  dcl_mask);
+	    } else if (dcl_nr == 10) {
+		instr_out(data, hw_offset, i++, "%s: DCL FOG%s\n", instr_prefix,
+			  dcl_mask);
+	    }
+	}
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    case 3:
+	switch ((d0 >> 22) & 0x3) {
+	case 0:
+	    sampletype = "2D";
+	    break;
+	case 1:
+	    sampletype = "CUBE";
+	    break;
+	case 2:
+	    sampletype = "3D";
+	    break;
+	default:
+	    sampletype = "RESERVED";
+	    break;
+	}
+	if (dcl_nr > 15)
+	    fprintf(out, "bad S%d dcl register number\n", dcl_nr);
+	instr_out(data, hw_offset, i++, "%s: DCL S%d %s\n", instr_prefix,
+		  dcl_nr, sampletype);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    default:
+	instr_out(data, hw_offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    }
+}
+
+static void
+i915_decode_instruction(uint32_t *data, uint32_t hw_offset,
+			int i, char *instr_prefix)
+{
+    switch ((data[i] >> 24) & 0x1f) {
+    case 0x0:
+	instr_out(data, hw_offset, i++, "%s: NOP\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    case 0x01:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "ADD");
+	break;
+    case 0x02:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOV");
+	break;
+    case 0x03:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "MUL");
+	break;
+    case 0x04:
+	i915_decode_alu3(data, hw_offset, i, instr_prefix, "MAD");
+	break;
+    case 0x05:
+	i915_decode_alu3(data, hw_offset, i, instr_prefix, "DP2ADD");
+	break;
+    case 0x06:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP3");
+	break;
+    case 0x07:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP4");
+	break;
+    case 0x08:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "FRC");
+	break;
+    case 0x09:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "RCP");
+	break;
+    case 0x0a:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "RSQ");
+	break;
+    case 0x0b:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "EXP");
+	break;
+    case 0x0c:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "LOG");
+	break;
+    case 0x0d:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "CMP");
+	break;
+    case 0x0e:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "MIN");
+	break;
+    case 0x0f:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "MAX");
+	break;
+    case 0x10:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "FLR");
+	break;
+    case 0x11:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOD");
+	break;
+    case 0x12:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "TRC");
+	break;
+    case 0x13:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "SGE");
+	break;
+    case 0x14:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "SLT");
+	break;
+    case 0x15:
+	i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLD");
+	break;
+    case 0x16:
+	i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDP");
+	break;
+    case 0x17:
+	i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDB");
+	break;
+    case 0x19:
+	i915_decode_dcl(data, hw_offset, i, instr_prefix);
+	break;
+    default:
+	instr_out(data, hw_offset, i++, "%s: unknown\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    }
+}
+
+static int
+decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+{
+    unsigned int len, i, c, opcode, word, map, sampler, instr;
+    char *format;
+
+    struct {
+	uint32_t opcode;
+	int i830_only;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d_1d[] = {
+	{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+	{ 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
+	{ 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+	{ 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+	{ 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+	{ 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+	{ 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" },
+	{ 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+	{ 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
+	{ 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
+	{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+	{ 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+	{ 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
+	{ 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
+	{ 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+	{ 0x81, 0, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" },
+	{ 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+	{ 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" },
+	{ 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" },
+	{ 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
+	{ 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
+	{ 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
+    };
+
+    switch ((data[0] & 0x00ff0000) >> 16) {
+    case 0x07:
+	/* This instruction is unusual.  A 0 length means just 1 DWORD instead of
+	 * 2.  The 0 length is specified in one place to be unsupported, but
+	 * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+	 * to cause no harm at least.
+	 */
+	instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+	len = (data[0] & 0x000000ff) + 1;
+	i = 1;
+	if (data[0] & (0x01 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "SIS.0\n");
+	    instr_out(data, hw_offset, i++, "SIS.1\n");
+	}
+	if (data[0] & (0x02 << 8)) {
+	    if (i + 1 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "DIS.0\n");
+	}
+	if (data[0] & (0x04 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "SSB.0\n");
+	    instr_out(data, hw_offset, i++, "SSB.1\n");
+	}
+	if (data[0] & (0x08 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "MSB.0\n");
+	    instr_out(data, hw_offset, i++, "MSB.1\n");
+	}
+	if (data[0] & (0x10 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "PSP.0\n");
+	    instr_out(data, hw_offset, i++, "PSP.1\n");
+	}
+	if (data[0] & (0x20 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "PSC.0\n");
+	    instr_out(data, hw_offset, i++, "PSC.1\n");
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+	    (*failures)++;
+	    return len;
+	}
+	return len;
+    case 0x04:
+	instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+	len = (data[0] & 0x0000000f) + 2;
+	i = 1;
+	for (word = 0; word <= 7; word++) {
+	    if (data[0] & (1 << (4 + word))) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
+
+		/* save vertex state for decode */
+		if (word == 2) {
+		    saved_s2_set = 1;
+		    saved_s2 = data[i];
+		}
+		if (word == 4) {
+		    saved_s4_set = 1;
+		    saved_s4 = data[i];
+		}
+
+		instr_out(data, hw_offset, i++, "S%d\n", word);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x00:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n");
+	len = (data[0] & 0x0000003f) + 2;
+	instr_out(data, hw_offset, 1, "mask\n");
+
+	i = 2;
+	for (map = 0; map <= 15; map++) {
+	    if (data[1] & (1 << map)) {
+		if (i + 3 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
+		instr_out(data, hw_offset, i++, "map %d MS2\n", map);
+		instr_out(data, hw_offset, i++, "map %d MS3\n", map);
+		instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n");
+	    (*failures)++;
+	    return len;
+	}
+	return len;
+    case 0x06:
+	instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+	len = (data[0] & 0x000000ff) + 2;
+
+	i = 2;
+	for (c = 0; c <= 31; c++) {
+	    if (data[1] & (1 << c)) {
+		if (i + 4 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS");
+		instr_out(data, hw_offset, i, "C%d.X = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.Y = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.Z = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.W = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x05:
+	instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
+	len = (data[0] & 0x000000ff) + 2;
+	if ((len - 1) % 3 != 0 || len > 370) {
+	    fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n");
+	    (*failures)++;
+	}
+	i = 1;
+	for (instr = 0; instr < (len - 1) / 3; instr++) {
+	    char instr_prefix[10];
+
+	    if (i + 3 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM");
+	    sprintf(instr_prefix, "PS%03d", instr);
+	    i915_decode_instruction(data, hw_offset, i, instr_prefix);
+	    i += 3;
+	}
+	return len;
+    case 0x01:
+	if (i830)
+	    break;
+	instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
+	instr_out(data, hw_offset, 1, "mask\n");
+	len = (data[0] & 0x0000003f) + 2;
+	i = 2;
+	for (sampler = 0; sampler <= 15; sampler++) {
+	    if (data[1] & (1 << sampler)) {
+		if (i + 3 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE");
+		instr_out(data, hw_offset, i++, "sampler %d SS2\n",
+			  sampler);
+		instr_out(data, hw_offset, i++, "sampler %d SS3\n",
+			  sampler);
+		instr_out(data, hw_offset, i++, "sampler %d SS4\n",
+			  sampler);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x85:
+	len = (data[0] & 0x0000000f) + 2;
+
+	if (len != 2)
+	    fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n");
+	if (count < 2)
+	    BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DEST_BUFFER_VARIABLES\n");
+
+	switch ((data[1] >> 8) & 0xf) {
+	case 0x0: format = "g8"; break;
+	case 0x1: format = "x1r5g5b5"; break;
+	case 0x2: format = "r5g6b5"; break;
+	case 0x3: format = "a8r8g8b8"; break;
+	case 0x4: format = "ycrcb_swapy"; break;
+	case 0x5: format = "ycrcb_normal"; break;
+	case 0x6: format = "ycrcb_swapuv"; break;
+	case 0x7: format = "ycrcb_swapuvy"; break;
+	case 0x8: format = "a4r4g4b4"; break;
+	case 0x9: format = "a1r5g5b5"; break;
+	case 0xa: format = "a2r10g10b10"; break;
+	default: format = "BAD"; break;
+	}
+	instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n",
+		  format,
+		  (data[1] & (1 << 31)) ? "en" : "dis");
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
+	 opcode++)
+    {
+	if (opcodes_3d_1d[opcode].i830_only && !i830)
+	    continue;
+
+	if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+	    len = 1;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
+	    if (opcodes_3d_1d[opcode].max_len > 1) {
+		len = (data[0] & 0x0000ffff) + 2;
+		if (len < opcodes_3d_1d[opcode].min_len ||
+		    len > opcodes_3d_1d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n",
+			    opcodes_3d_1d[opcode].name);
+		    (*failures)++;
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len,  opcodes_3d_1d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
+		    int *failures)
+{
+    char immediate = (data[0] & (1 << 23)) == 0;
+    unsigned int len, i;
+    char *primtype;
+
+    switch ((data[0] >> 18) & 0xf) {
+    case 0x0: primtype = "TRILIST"; break;
+    case 0x1: primtype = "TRISTRIP"; break;
+    case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+    case 0x3: primtype = "TRIFAN"; break;
+    case 0x4: primtype = "POLYGON"; break;
+    case 0x5: primtype = "LINELIST"; break;
+    case 0x6: primtype = "LINESTRIP"; break;
+    case 0x7: primtype = "RECTLIST"; break;
+    case 0x8: primtype = "POINTLIST"; break;
+    case 0x9: primtype = "DIB"; break;
+    case 0xa: primtype = "CLEAR_RECT"; break;
+    default: primtype = "unknown"; break;
+    }
+
+    /* XXX: 3DPRIM_DIB not supported */
+    if (immediate) {
+	len = (data[0] & 0x0003ffff) + 2;
+	instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+	if (count < len)
+	    BUFFER_FAIL(count, len,  "3DPRIMITIVE inline");
+	if (!saved_s2_set || !saved_s4_set) {
+	    fprintf(out, "unknown vertex format\n");
+	    for (i = 1; i < len; i++) {
+		instr_out(data, hw_offset, i,
+			  "           vertex data (%f float)\n",
+			  int_as_float(data[i]));
+	    }
+	} else {
+	    unsigned int vertex = 0;
+	    for (i = 1; i < len;) {
+		unsigned int tc;
+
+#define VERTEX_OUT(fmt, ...) do {					\
+    if (i < len)							\
+	instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+    else								\
+	fprintf(out, " missing data in V%d\n", vertex);			\
+    i++;								\
+} while (0)
+
+		VERTEX_OUT("X = %f", int_as_float(data[i]));
+		VERTEX_OUT("Y = %f", int_as_float(data[i]));
+	        switch (saved_s4 >> 6 & 0x7) {
+		case 0x1:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    break;
+		case 0x2:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		case 0x3:
+		    break;
+		case 0x4:
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		default:
+		    fprintf(out, "bad S4 position mask\n");
+		}
+
+		if (saved_s4 & (1 << 10)) {
+		    VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 11)) {
+		    VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 12))
+		    VERTEX_OUT("width = 0x%08x)", data[i]);
+
+		for (tc = 0; tc <= 7; tc++) {
+		    switch ((saved_s2 >> (tc * 4)) & 0xf) {
+		    case 0x0:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x1:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x2:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x3:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x4:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0x5:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0xf:
+			break;
+		    default:
+			fprintf(out, "bad S2.T%d format\n", tc);
+		    }
+		}
+		vertex++;
+	    }
+	}
+    } else {
+	/* indirect vertices */
+	len = data[0] & 0x0000ffff; /* index count */
+	if (data[0] & (1 << 17)) {
+	    /* random vertex access */
+	    if (count < (len + 1) / 2 + 1) {
+		BUFFER_FAIL(count, (len + 1) / 2 + 1,
+			    "3DPRIMITIVE random indirect");
+	    }
+	    instr_out(data, hw_offset, 0,
+		      "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+	    if (len == 0) {
+		/* vertex indices continue until 0xffff is found */
+		for (i = 1; i < count; i++) {
+		    if ((data[i] & 0xffff) == 0xffff) {
+			instr_out(data, hw_offset, i,
+				  "            indices: (terminator)\n");
+			return i;
+		    } else if ((data[i] >> 16) == 0xffff) {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, "
+				  "(terminator)\n",
+				  data[i] & 0xffff);
+			return i;
+		    } else {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, 0x%04x\n",
+				  data[i] & 0xffff, data[i] >> 16);
+		    }
+		}
+		fprintf(out,
+			"3DPRIMITIVE: no terminator found in index buffer\n");
+		(*failures)++;
+		return count;
+	    } else {
+		/* fixed size vertex index buffer */
+		for (i = 0; i < len; i += 2) {
+		    if (i * 2 == len - 1) {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x\n",
+				  data[i] & 0xffff);
+		    } else {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, 0x%04x\n",
+				  data[i] & 0xffff, data[i] >> 16);
+		    }
+		}
+	    }
+	    return (len + 1) / 2 + 1;
+	} else {
+	    /* sequential vertex access */
+	    if (count < 2)
+		BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect");
+	    instr_out(data, hw_offset, 0,
+		      "3DPRIMITIVE sequential indirect %s, %d starting from "
+		      "%d\n", primtype, len, data[1] & 0xffff);
+	    instr_out(data, hw_offset, 1, "           start\n");
+	    return 2;
+	}
+    }
+
+    return len;
+}
+
+static int
+decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
+	{ 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
+	{ 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
+	{ 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
+	{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+	{ 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
+	{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
+	{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
+	{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+    };
+
+    switch ((data[0] & 0x1f000000) >> 24) {
+    case 0x1f:
+	return decode_3d_primitive(data, count, hw_offset, failures);
+    case 0x1d:
+	return decode_3d_1d(data, count, hw_offset, failures, 0);
+    case 0x1c:
+	return decode_3d_1c(data, count, hw_offset, failures);
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+    switch (depthformat) {
+    case 0: return "s8_z24float";
+    case 1: return "z32float";
+    case 2: return "z24s8";
+    case 5: return "z16";
+    default: return "unknown";
+    }
+}
+
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+    uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+    switch (component_control) {
+    case 0:
+	return "nostore";
+    case 1:
+	switch (component) {
+	case 0: return "X";
+	case 1: return "Y";
+	case 2: return "Z";
+	case 3: return "W";
+	default: return "fail";
+	}
+    case 2:
+	return "0.0";
+    case 3:
+	return "1.0";
+    case 4:
+	return "0x1";
+    case 5:
+	return "VID";
+    default:
+	return "fail";
+    }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+    uint32_t primtype = (data >> 10) & 0x1f;
+
+    switch (primtype) {
+    case 0x01: return "point list";
+    case 0x02: return "line list";
+    case 0x03: return "line strip";
+    case 0x04: return "tri list";
+    case 0x05: return "tri strip";
+    case 0x06: return "tri fan";
+    case 0x07: return "quad list";
+    case 0x08: return "quad strip";
+    case 0x09: return "line list adj";
+    case 0x0a: return "line strip adj";
+    case 0x0b: return "tri list adj";
+    case 0x0c: return "tri strip adj";
+    case 0x0d: return "tri strip reverse";
+    case 0x0e: return "polygon";
+    case 0x0f: return "rect list";
+    case 0x10: return "line loop";
+    case 0x11: return "point list bf";
+    case 0x12: return "line strip cont";
+    case 0x13: return "line strip bf";
+    case 0x14: return "line strip cont bf";
+    case 0x15: return "tri fan no stipple";
+    default: return "fail";
+    }
+}
+
+static int
+decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode, len;
+    int i;
+    char *desc1;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x6000, 3, 3, "URB_FENCE" },
+	{ 0x6001, 2, 2, "CS_URB_STATE" },
+	{ 0x6002, 2, 2, "CONSTANT_BUFFER" },
+	{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+	{ 0x6102, 2, 2 , "STATE_SIP" },
+	{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+	{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+	{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+	{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+	{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+	{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+	{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+	{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+	{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+	{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+	{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+	{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+	{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+	{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+	{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+	{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+	{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+	{ 0x7b00, 6, 6, "3DPRIMITIVE" },
+	{ 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
+	{ 0x7810, 6, 6, "3DSTATE_VS_STATE" },
+	{ 0x7811, 6, 6, "3DSTATE_GS_STATE" },
+	{ 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
+	{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
+	{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
+    };
+
+    len = (data[0] & 0x0000ffff) + 2;
+
+    switch ((data[0] & 0xffff0000) >> 16) {
+    case 0x6101:
+	if (len != 6)
+	    fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+
+	instr_out(data, hw_offset, 0,
+		  "STATE_BASE_ADDRESS\n");
+
+	if (data[1] & 1) {
+	    instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
+		      data[1] & ~1);
+	} else
+	    instr_out(data, hw_offset, 1, "General state not updated\n");
+
+	if (data[2] & 1) {
+	    instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
+		      data[2] & ~1);
+	} else
+	    instr_out(data, hw_offset, 2, "Surface state not updated\n");
+
+	if (data[3] & 1) {
+	    instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
+		      data[3] & ~1);
+	} else
+	    instr_out(data, hw_offset, 3, "Indirect state not updated\n");
+
+	if (data[4] & 1) {
+	    instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
+		      data[4] & ~1);
+	} else
+	    instr_out(data, hw_offset, 4, "General state not updated\n");
+
+	if (data[5] & 1) {
+	    instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
+		      data[5] & ~1);
+	} else
+	    instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+
+	return len;
+    case 0x7800:
+	if (len != 7)
+	    fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n");
+	if (count < 7)
+	    BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_PIPELINED_POINTERS\n");
+	instr_out(data, hw_offset, 1, "VS state\n");
+	instr_out(data, hw_offset, 2, "GS state\n");
+	instr_out(data, hw_offset, 3, "Clip state\n");
+	instr_out(data, hw_offset, 4, "SF state\n");
+	instr_out(data, hw_offset, 5, "WM state\n");
+	instr_out(data, hw_offset, 6, "CC state\n");
+	return len;
+    case 0x7801:
+	if (len != 6)
+	    fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_BINDING_TABLE_POINTERS\n");
+	instr_out(data, hw_offset, 1, "VS binding table\n");
+	instr_out(data, hw_offset, 2, "GS binding table\n");
+	instr_out(data, hw_offset, 3, "Clip binding table\n");
+	instr_out(data, hw_offset, 4, "SF binding table\n");
+	instr_out(data, hw_offset, 5, "WM binding table\n");
+
+	return len;
+
+    case 0x7808:
+	len = (data[0] & 0xff) + 2;
+	if ((len - 1) % 4 != 0)
+	    fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS");
+	instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+	for (i = 1; i < len;) {
+	    instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n",
+		      data[i] >> 27,
+		      data[i] & (1 << 26) ? "random" : "sequential",
+		      data[i] & 0x07ff);
+	    i++;
+	    instr_out(data, hw_offset, i++, "buffer address\n");
+	    instr_out(data, hw_offset, i++, "max index\n");
+	    instr_out(data, hw_offset, i++, "mbz\n");
+	}
+	return len;
+
+    case 0x7809:
+	len = (data[0] & 0xff) + 2;
+	if ((len + 1) % 2 != 0)
+	    fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS");
+	instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+	for (i = 1; i < len;) {
+	    instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, "
+		      "src offset 0x%04x bytes\n",
+		      data[i] >> 27,
+		      data[i] & (1 << 26) ? "" : "in",
+		      (data[i] >> 16) & 0x1ff,
+		      data[i] & 0x07ff);
+	    i++;
+	    instr_out(data, hw_offset, i, "(%s, %s, %s, %s), "
+		      "dst offset 0x%02x bytes\n",
+		      get_965_element_component(data[i], 0),
+		      get_965_element_component(data[i], 1),
+		      get_965_element_component(data[i], 2),
+		      get_965_element_component(data[i], 3),
+		      (data[i] & 0xff) * 4);
+	    i++;
+	}
+	return len;
+
+    case 0x780a:
+	len = (data[0] & 0xff) + 2;
+	if (len != 3)
+	    fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER");
+	instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n");
+	instr_out(data, hw_offset, 1, "beginning buffer address\n");
+	instr_out(data, hw_offset, 2, "ending buffer address\n");
+	return len;
+
+    case 0x7900:
+	if (len != 4)
+	    fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
+	if (count < 4)
+	    BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DRAWING_RECTANGLE\n");
+	instr_out(data, hw_offset, 1, "top left: %d,%d\n",
+		  data[1] & 0xffff,
+		  (data[1] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 2, "bottom right: %d,%d\n",
+		  data[2] & 0xffff,
+		  (data[2] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 3, "origin: %d,%d\n",
+		  (int)data[3] & 0xffff,
+		  ((int)data[3] >> 16) & 0xffff);
+
+	return len;
+
+    case 0x7905:
+	if (len < 5 || len > 7)
+	    fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DEPTH_BUFFER\n");
+	instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n",
+		  get_965_surfacetype(data[1] >> 29),
+		  get_965_depthformat((data[1] >> 18) & 0x7),
+		  (data[1] & 0x0001ffff) + 1,
+		  data[1] & (1 << 27) ? "" : "not ");
+	instr_out(data, hw_offset, 2, "depth offset\n");
+	instr_out(data, hw_offset, 3, "%dx%d\n",
+		  ((data[3] & 0x0007ffc0) >> 6) + 1,
+		  ((data[3] & 0xfff80000) >> 19) + 1);
+	instr_out(data, hw_offset, 4, "volume depth\n");
+	if (len == 6)
+	    instr_out(data, hw_offset, 5, "\n");
+	if (len == 7)
+	    instr_out(data, hw_offset, 6, "render target view extent\n");
+
+	return len;
+
+    case 0x7a00:
+	len = (data[0] & 0xff) + 2;
+	if (len != 4)
+	    fprintf(out, "Bad count in PIPE_CONTROL\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "PIPE_CONTROL");
+
+	switch ((data[0] >> 14) & 0x3) {
+	case 0: desc1 = "no write"; break;
+	case 1: desc1 = "qword write"; break;
+	case 2: desc1 = "PS_DEPTH_COUNT write"; break;
+	case 3: desc1 = "TIMESTAMP write"; break;
+	}
+	instr_out(data, hw_offset, 0,
+		  "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, "
+		  "%sinst flush, %stexture flush\n",
+		  desc1,
+		  data[0] & (1 << 13) ? "" : "no ",
+		  data[0] & (1 << 12) ? "" : "no ",
+		  data[0] & (1 << 11) ? "" : "no ",
+		  data[0] & (1 << 9) ? "" : "no ");
+	instr_out(data, hw_offset, 1, "destination address\n");
+	instr_out(data, hw_offset, 2, "immediate dword low\n");
+	instr_out(data, hw_offset, 3, "immediate dword high\n");
+	return len;
+
+    case 0x7b00:
+	len = (data[0] & 0xff) + 2;
+	if (len != 6)
+	    fprintf(out, "Bad count in 3DPRIMITIVE\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DPRIMITIVE");
+
+	instr_out(data, hw_offset, 0,
+		  "3DPRIMITIVE: %s %s\n",
+		  get_965_prim_type(data[0]),
+		  (data[0] & (1 << 15)) ? "random" : "sequential");
+	instr_out(data, hw_offset, 1, "vertex count\n");
+	instr_out(data, hw_offset, 2, "start vertex\n");
+	instr_out(data, hw_offset, 3, "instance count\n");
+	instr_out(data, hw_offset, 4, "start instance\n");
+	instr_out(data, hw_offset, 5, "index bias\n");
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+	    unsigned int i;
+	    len = 1;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x02, 1, 1, "3DSTATE_MODES_3" },
+	{ 0x03, 1, 1, "3DSTATE_ENABLES_1"},
+	{ 0x04, 1, 1, "3DSTATE_ENABLES_2"},
+	{ 0x05, 1, 1, "3DSTATE_VFT0"},
+	{ 0x06, 1, 1, "3DSTATE_AA"},
+	{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+	{ 0x08, 1, 1, "3DSTATE_MODES_1" },
+	{ 0x09, 1, 1, "3DSTATE_STENCIL_TEST" },
+	{ 0x0a, 1, 1, "3DSTATE_VFT1"},
+	{ 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" },
+	{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
+	{ 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" },
+	{ 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" },
+	{ 0x0f, 1, 1, "3DSTATE_MODES_2" },
+	{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+	{ 0x16, 1, 1, "3DSTATE_MODES_4" },
+    };
+
+    switch ((data[0] & 0x1f000000) >> 24) {
+    case 0x1f:
+	return decode_3d_primitive(data, count, hw_offset, failures);
+    case 0x1d:
+	return decode_3d_1d(data, count, hw_offset, failures, 1);
+    case 0x1c:
+	return decode_3d_1c(data, count, hw_offset, failures);
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+/**
+ * Decodes an i830-i915 batch buffer, writing the output to stdout.
+ *
+ * \param data batch buffer contents
+ * \param count number of DWORDs to decode in the batch buffer
+ * \param hw_offset hardware address for the buffer
+ */
+int
+intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+{
+    int index = 0;
+    int failures = 0;
+
+    out = stderr;
+
+    while (index < count) {
+	switch ((data[index] & 0xe0000000) >> 29) {
+	case 0x0:
+	    index += decode_mi(data + index, count - index,
+			       hw_offset + index * 4, &failures);
+	    break;
+	case 0x2:
+	    index += decode_2d(data + index, count - index,
+			       hw_offset + index * 4, &failures);
+	    break;
+	case 0x3:
+	    if (IS_965(devid)) {
+		index += decode_3d_965(data + index, count - index,
+				       hw_offset + index * 4, &failures);
+	    } else if (IS_9XX(devid)) {
+		index += decode_3d(data + index, count - index,
+				   hw_offset + index * 4, &failures);
+	    } else {
+		index += decode_3d_i830(data + index, count - index,
+					hw_offset + index * 4, &failures);
+	    }
+	    break;
+	default:
+	    instr_out(data, hw_offset, index, "UNKNOWN\n");
+	    failures++;
+	    index++;
+	    break;
+	}
+	fflush(out);
+    }
+
+    return failures;
+}
+
+void intel_decode_context_reset(void)
+{
+    saved_s2_set = 0;
+    saved_s4_set = 1;
+}
+
diff --git a/src/mesa/drivers/dri/intel/intel_decode.h b/src/mesa/drivers/dri/intel/intel_decode.h
new file mode 100644
index 0000000000..c50644a46b
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_decode.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+void intel_decode_context_reset(void);
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
new file mode 100644
index 0000000000..edba1fc2f2
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -0,0 +1,229 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_chipset.h"
+#include "intel_context.h"
+#include "intel_extensions.h"
+#include "utils.h"
+
+
+#define need_GL_ARB_copy_buffer
+#define need_GL_ARB_draw_elements_base_vertex
+#define need_GL_ARB_framebuffer_object
+#define need_GL_ARB_map_buffer_range
+#define need_GL_ARB_occlusion_query
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_shader_objects
+#define need_GL_ARB_sync
+#define need_GL_ARB_vertex_array_object
+#define need_GL_ARB_vertex_program
+#define need_GL_ARB_vertex_shader
+#define need_GL_ARB_window_pos
+#define need_GL_EXT_blend_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_cull_vertex
+#define need_GL_EXT_draw_buffers2
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_framebuffer_blit
+#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_point_parameters
+#define need_GL_EXT_provoking_vertex
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_stencil_two_side
+#define need_GL_EXT_timer_query
+#define need_GL_APPLE_vertex_array_object
+#define need_GL_APPLE_object_purgeable
+#define need_GL_ATI_separate_stencil
+#define need_GL_ATI_envmap_bumpmap
+#define need_GL_NV_point_sprite
+#define need_GL_NV_vertex_program
+#define need_GL_OES_EGL_image
+#define need_GL_VERSION_2_0
+#define need_GL_VERSION_2_1
+
+#include "main/remap_helper.h"
+
+
+/**
+ * Extension strings exported by the intel driver.
+ *
+ * Extensions supported by all chips supported by i830_dri, i915_dri, or
+ * i965_dri.
+ */
+static const struct dri_extension card_extensions[] = {
+   { "GL_ARB_copy_buffer",                GL_ARB_copy_buffer_functions },
+   { "GL_ARB_draw_elements_base_vertex",  GL_ARB_draw_elements_base_vertex_functions },
+   { "GL_ARB_half_float_pixel",           NULL },
+   { "GL_ARB_map_buffer_range",           GL_ARB_map_buffer_range_functions },
+   { "GL_ARB_multitexture",               NULL },
+   { "GL_ARB_pixel_buffer_object",      NULL },
+   { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
+   { "GL_ARB_point_sprite",               NULL },
+   { "GL_ARB_shader_objects",             GL_ARB_shader_objects_functions },
+   { "GL_ARB_shading_language_100",       GL_VERSION_2_0_functions },
+   { "GL_ARB_shading_language_120",       GL_VERSION_2_1_functions },
+   { "GL_ARB_sync",                       GL_ARB_sync_functions },
+   { "GL_ARB_texture_border_clamp",       NULL },
+   { "GL_ARB_texture_cube_map",           NULL },
+   { "GL_ARB_texture_env_add",            NULL },
+   { "GL_ARB_texture_env_combine",        NULL },
+   { "GL_ARB_texture_env_crossbar",       NULL },
+   { "GL_ARB_texture_env_dot3",           NULL },
+   { "GL_ARB_texture_mirrored_repeat",    NULL },
+   { "GL_ARB_texture_rectangle",          NULL },
+   { "GL_ARB_vertex_array_object",        GL_ARB_vertex_array_object_functions},
+   { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions },
+   { "GL_ARB_vertex_shader",              GL_ARB_vertex_shader_functions },
+   { "GL_ARB_window_pos",                 GL_ARB_window_pos_functions },
+   { "GL_EXT_blend_color",                GL_EXT_blend_color_functions },
+   { "GL_EXT_blend_equation_separate",    GL_EXT_blend_equation_separate_functions },
+   { "GL_EXT_blend_func_separate",        GL_EXT_blend_func_separate_functions },
+   { "GL_EXT_blend_minmax",               GL_EXT_blend_minmax_functions },
+   { "GL_EXT_blend_logic_op",             NULL },
+   { "GL_EXT_blend_subtract",             NULL },
+   { "GL_EXT_cull_vertex",                GL_EXT_cull_vertex_functions },
+   { "GL_EXT_framebuffer_blit",         GL_EXT_framebuffer_blit_functions },
+   { "GL_EXT_framebuffer_object",       GL_EXT_framebuffer_object_functions },
+   { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+   { "GL_EXT_gpu_program_parameters",     GL_EXT_gpu_program_parameters_functions },
+   { "GL_EXT_packed_depth_stencil",       NULL },
+   { "GL_EXT_provoking_vertex",           GL_EXT_provoking_vertex_functions },
+   { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+   { "GL_EXT_stencil_wrap",               NULL },
+   { "GL_EXT_texture_edge_clamp",         NULL },
+   { "GL_EXT_texture_env_combine",        NULL },
+   { "GL_EXT_texture_env_dot3",           NULL },
+   { "GL_EXT_texture_filter_anisotropic", NULL },
+   { "GL_EXT_texture_lod_bias",           NULL },
+   { "GL_3DFX_texture_compression_FXT1",  NULL },
+   { "GL_APPLE_client_storage",           NULL },
+   { "GL_APPLE_object_purgeable",         GL_APPLE_object_purgeable_functions },
+   { "GL_APPLE_vertex_array_object",      GL_APPLE_vertex_array_object_functions},
+   { "GL_MESA_pack_invert",               NULL },
+   { "GL_MESA_ycbcr_texture",             NULL },
+   { "GL_NV_blend_square",                NULL },
+   { "GL_NV_vertex_program",              GL_NV_vertex_program_functions },
+   { "GL_NV_vertex_program1_1",           NULL },
+   { "GL_SGIS_generate_mipmap",           NULL },
+#if FEATURE_OES_EGL_image
+   { "GL_OES_EGL_image",                  GL_OES_EGL_image_functions },
+#endif
+   { NULL, NULL }
+};
+
+
+/** i915 / i945-only extensions */
+static const struct dri_extension i915_extensions[] = {
+   { "GL_ARB_depth_texture",              NULL },
+   { "GL_ARB_fragment_program",           NULL },
+   { "GL_ARB_shadow",                     NULL },
+   { "GL_ARB_texture_non_power_of_two",   NULL },
+   { "GL_ATI_separate_stencil",           GL_ATI_separate_stencil_functions },
+   { "GL_ATI_texture_env_combine3",       NULL },
+   { "GL_EXT_shadow_funcs",               NULL },
+   { "GL_EXT_stencil_two_side",           GL_EXT_stencil_two_side_functions },
+   { "GL_NV_texture_env_combine4",        NULL },
+   { NULL,                                NULL }
+};
+
+
+/** i965-only extensions */
+static const struct dri_extension brw_extensions[] = {
+   { "GL_ARB_depth_clamp",                NULL },
+   { "GL_ARB_depth_texture",              NULL },
+   { "GL_ARB_fragment_coord_conventions", NULL },
+   { "GL_ARB_fragment_program",           NULL },
+   { "GL_ARB_fragment_program_shadow",    NULL },
+   { "GL_ARB_fragment_shader",            NULL },
+   { "GL_ARB_framebuffer_object",         GL_ARB_framebuffer_object_functions},
+   { "GL_ARB_half_float_vertex",          NULL },
+   { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions },
+   { "GL_ARB_point_sprite", 		  NULL },
+   { "GL_ARB_seamless_cube_map",          NULL },
+   { "GL_ARB_shadow",                     NULL },
+   { "GL_MESA_texture_signed_rgba",       NULL },
+   { "GL_ARB_texture_non_power_of_two",   NULL },
+   { "GL_EXT_draw_buffers2",              GL_EXT_draw_buffers2_functions },
+   { "GL_EXT_shadow_funcs",               NULL },
+   { "GL_EXT_stencil_two_side",           GL_EXT_stencil_two_side_functions },
+   { "GL_EXT_texture_sRGB",		  NULL },
+   { "GL_EXT_texture_swizzle",		  NULL },
+   { "GL_EXT_vertex_array_bgra",	  NULL },
+   { "GL_ATI_envmap_bumpmap",             GL_ATI_envmap_bumpmap_functions },
+   { "GL_ATI_separate_stencil",           GL_ATI_separate_stencil_functions },
+   { "GL_ATI_texture_env_combine3",       NULL },
+   { "GL_NV_texture_env_combine4",        NULL },
+   { NULL,                                NULL }
+};
+
+static const struct dri_extension ironlake_extensions[] = {
+   { "GL_EXT_timer_query",                GL_EXT_timer_query_functions },
+};
+
+static const struct dri_extension arb_oq_extensions[] = {
+   { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions },
+   { NULL, NULL }
+};
+
+
+static const struct dri_extension fragment_shader_extensions[] = {
+   { "GL_ARB_fragment_shader",            NULL },
+   { NULL, NULL }
+};
+
+/**
+ * Initializes potential list of extensions if ctx == NULL, or actually enables
+ * extensions for a context.
+ */
+void
+intelInitExtensions(GLcontext *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+
+   /* Disable imaging extension until convolution is working in teximage paths.
+    */
+   driInitExtensions(ctx, card_extensions, GL_FALSE);
+
+   if (intel->gen >= 5)
+      driInitExtensions(ctx, ironlake_extensions, GL_FALSE);
+
+   if (intel->gen >= 4)
+      driInitExtensions(ctx, brw_extensions, GL_FALSE);
+
+   if (intel->gen == 3) {
+      driInitExtensions(ctx, i915_extensions, GL_FALSE);
+
+      if (driQueryOptionb(&intel->optionCache, "fragment_shader"))
+	 driInitExtensions(ctx, fragment_shader_extensions, GL_FALSE);
+
+      if (driQueryOptionb(&intel->optionCache, "stub_occlusion_query"))
+	 driInitExtensions(ctx, arb_oq_extensions, GL_FALSE);
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.h b/src/mesa/drivers/dri/intel/intel_extensions.h
new file mode 100644
index 0000000000..236442a4d6
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_extensions.h
@@ -0,0 +1,39 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_EXTENSIONS_H
+#define INTEL_EXTENSIONS_H
+
+
+extern void
+intelInitExtensions(GLcontext *ctx);
+
+extern void
+intelInitExtensionsES2(GLcontext *ctx);
+
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
new file mode 100644
index 0000000000..baf8e13001
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
@@ -0,0 +1,94 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/extensions.h"
+
+#include "intel_extensions.h"
+#include "utils.h"
+
+static const char *es2_extensions[] = {
+   /* Used by mesa internally (cf all_mesa_extensions in ../common/utils.c) */
+   "GL_ARB_draw_buffers",
+   "GL_ARB_multisample",
+   "GL_ARB_texture_compression",
+   "GL_ARB_transpose_matrix",
+   "GL_ARB_vertex_buffer_object",
+   "GL_ARB_window_pos",
+   "GL_EXT_blend_func_separate",
+   "GL_EXT_compiled_vertex_array",
+   "GL_EXT_multi_draw_arrays",
+   "GL_EXT_polygon_offset",
+   "GL_EXT_texture_object",
+   "GL_EXT_vertex_array",
+   "GL_IBM_multimode_draw_arrays",
+   "GL_MESA_window_pos",
+   "GL_NV_vertex_program",
+
+   /* Required by GLES2 */
+   "GL_ARB_fragment_program",
+   "GL_ARB_fragment_shader",
+   "GL_ARB_multitexture",
+   "GL_ARB_shader_objects",
+   "GL_ARB_texture_cube_map",
+   "GL_ARB_texture_mirrored_repeat",
+   "GL_ARB_texture_non_power_of_two",
+   "GL_ARB_vertex_shader",
+   "GL_EXT_blend_color",
+   "GL_EXT_blend_equation_separate",
+   "GL_EXT_blend_minmax",
+   "GL_EXT_blend_subtract",
+   "GL_EXT_stencil_wrap",
+
+   /* Optional GLES2 */
+   "GL_ARB_framebuffer_object",
+   "GL_EXT_texture_filter_anisotropic",
+   "GL_ARB_depth_texture",
+   "GL_EXT_packed_depth_stencil",
+   "GL_EXT_framebuffer_object",
+
+#if FEATURE_OES_EGL_image
+   "GL_OES_EGL_image",
+#endif
+
+   NULL,
+};
+
+/**
+ * Initializes potential list of extensions if ctx == NULL, or actually enables
+ * extensions for a context.
+ */
+void
+intelInitExtensionsES2(GLcontext *ctx)
+{
+   int i;
+
+   /* Can't use driInitExtensions() since it uses extensions from
+    * main/remap_helper.h when called the first time. */
+
+   for (i = 0; es2_extensions[i]; i++)
+      _mesa_enable_extension(ctx, es2_extensions[i]);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
new file mode 100644
index 0000000000..4a83886fc1
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -0,0 +1,697 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/texrender.h"
+#include "drivers/common/meta.h"
+
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+
+#define FILE_DEBUG_FLAG DEBUG_FBO
+
+
+/**
+ * Create a new framebuffer object.
+ */
+static struct gl_framebuffer *
+intel_new_framebuffer(GLcontext * ctx, GLuint name)
+{
+   /* Only drawable state in intel_framebuffer at this time, just use Mesa's
+    * class
+    */
+   return _mesa_new_framebuffer(ctx, name);
+}
+
+
+/** Called by gl_renderbuffer::Delete() */
+static void
+intel_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+   ASSERT(irb);
+
+   if (intel && irb->region) {
+      intel_region_release(&irb->region);
+   }
+
+   free(irb);
+}
+
+
+/**
+ * Return a pointer to a specific pixel in a renderbuffer.
+ */
+static void *
+intel_get_pointer(GLcontext * ctx, struct gl_renderbuffer *rb,
+                  GLint x, GLint y)
+{
+   /* By returning NULL we force all software rendering to go through
+    * the span routines.
+    */
+   return NULL;
+}
+
+
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   int cpp;
+
+   ASSERT(rb->Name != 0);
+
+   switch (internalFormat) {
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+      rb->Format = MESA_FORMAT_RGB565;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      break;
+   case GL_RGB:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      rb->Format = MESA_FORMAT_XRGB8888;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      break;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      rb->Format = MESA_FORMAT_ARGB8888;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      break;
+   case GL_ALPHA:
+   case GL_ALPHA8:
+      rb->Format = MESA_FORMAT_A8;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* alloc a depth+stencil buffer */
+      rb->Format = MESA_FORMAT_S8_Z24;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      break;
+   case GL_DEPTH_COMPONENT16:
+      rb->Format = MESA_FORMAT_Z16;
+      rb->DataType = GL_UNSIGNED_SHORT;
+      break;
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      rb->Format = MESA_FORMAT_S8_Z24;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      rb->Format = MESA_FORMAT_S8_Z24;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      break;
+   default:
+      _mesa_problem(ctx,
+                    "Unexpected format in intel_alloc_renderbuffer_storage");
+      return GL_FALSE;
+   }
+
+   rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+   cpp = _mesa_get_format_bytes(rb->Format);
+
+   intel_flush(ctx);
+
+   /* free old region */
+   if (irb->region) {
+      intel_region_release(&irb->region);
+   }
+
+   /* allocate new memory region/renderbuffer */
+
+   /* alloc hardware renderbuffer */
+   DBG("Allocating %d x %d Intel RBO\n", width, height);
+
+   irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp,
+				    width, height, GL_TRUE);
+   if (!irb->region)
+      return GL_FALSE;       /* out of memory? */
+
+   ASSERT(irb->region->buffer);
+
+   rb->Width = width;
+   rb->Height = height;
+
+   return GL_TRUE;
+}
+
+
+#if FEATURE_OES_EGL_image
+static void
+intel_image_target_renderbuffer_storage(GLcontext *ctx,
+					struct gl_renderbuffer *rb,
+					void *image_handle)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_renderbuffer *irb;
+   __DRIscreen *screen;
+   __DRIimage *image;
+
+   screen = intel->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(intel->driContext, image_handle,
+					      intel->driContext->loaderPrivate);
+   if (image == NULL)
+      return;
+
+   irb = intel_renderbuffer(rb);
+   if (irb->region)
+      intel_region_release(&irb->region);
+   intel_region_reference(&irb->region, image->region);
+
+   rb->InternalFormat = image->internal_format;
+   rb->Width = image->region->width;
+   rb->Height = image->region->height;
+   rb->Format = image->format;
+   rb->DataType = image->data_type;
+   rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx,
+					   image->internal_format);
+}
+#endif
+
+/**
+ * Called for each hardware renderbuffer when a _window_ is resized.
+ * Just update fields.
+ * Not used for user-created renderbuffers!
+ */
+static GLboolean
+intel_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+
+   return GL_TRUE;
+}
+
+
+static void
+intel_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
+		     GLuint width, GLuint height)
+{
+   int i;
+
+   _mesa_resize_framebuffer(ctx, fb, width, height);
+
+   fb->Initialized = GL_TRUE; /* XXX remove someday */
+
+   if (fb->Name != 0) {
+      return;
+   }
+
+
+   /* Make sure all window system renderbuffers are up to date */
+   for (i = BUFFER_FRONT_LEFT; i <= BUFFER_BACK_RIGHT; i++) {
+      struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
+
+      /* only resize if size is changing */
+      if (rb && (rb->Width != width || rb->Height != height)) {
+	 rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height);
+      }
+   }
+}
+
+
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+intel_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                        GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "intel_op_alloc_storage should never be called.");
+   return GL_FALSE;
+}
+
+
+void
+intel_renderbuffer_set_region(struct intel_context *intel,
+			      struct intel_renderbuffer *rb,
+			      struct intel_region *region)
+{
+   struct intel_region *old;
+
+   old = rb->region;
+   rb->region = NULL;
+   intel_region_reference(&rb->region, region);
+   intel_region_release(&old);
+}
+
+
+/**
+ * Create a new intel_renderbuffer which corresponds to an on-screen window,
+ * not a user-created renderbuffer.
+ */
+struct intel_renderbuffer *
+intel_create_renderbuffer(gl_format format)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   struct intel_renderbuffer *irb;
+
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&irb->Base, 0);
+   irb->Base.ClassID = INTEL_RB_CLASS;
+
+   switch (format) {
+   case MESA_FORMAT_RGB565:
+      irb->Base._BaseFormat = GL_RGB;
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      break;
+   case MESA_FORMAT_XRGB8888:
+      irb->Base._BaseFormat = GL_RGB;
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      break;
+   case MESA_FORMAT_ARGB8888:
+      irb->Base._BaseFormat = GL_RGBA;
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      break;
+   case MESA_FORMAT_Z16:
+      irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
+      irb->Base.DataType = GL_UNSIGNED_SHORT;
+      break;
+   case MESA_FORMAT_X8_Z24:
+      irb->Base._BaseFormat = GL_DEPTH_COMPONENT;
+      irb->Base.DataType = GL_UNSIGNED_INT;
+      break;
+   case MESA_FORMAT_S8_Z24:
+      irb->Base._BaseFormat = GL_DEPTH_STENCIL;
+      irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+      break;
+   case MESA_FORMAT_A8:
+      irb->Base._BaseFormat = GL_ALPHA;
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      break;
+   default:
+      _mesa_problem(NULL,
+                    "Unexpected intFormat in intel_create_renderbuffer");
+      free(irb);
+      return NULL;
+   }
+
+   irb->Base.Format = format;
+   irb->Base.InternalFormat = irb->Base._BaseFormat;
+
+   /* intel-specific methods */
+   irb->Base.Delete = intel_delete_renderbuffer;
+   irb->Base.AllocStorage = intel_alloc_window_storage;
+   irb->Base.GetPointer = intel_get_pointer;
+
+   return irb;
+}
+
+
+/**
+ * Create a new renderbuffer object.
+ * Typically called via glBindRenderbufferEXT().
+ */
+static struct gl_renderbuffer *
+intel_new_renderbuffer(GLcontext * ctx, GLuint name)
+{
+   /*struct intel_context *intel = intel_context(ctx); */
+   struct intel_renderbuffer *irb;
+
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&irb->Base, name);
+   irb->Base.ClassID = INTEL_RB_CLASS;
+
+   /* intel-specific methods */
+   irb->Base.Delete = intel_delete_renderbuffer;
+   irb->Base.AllocStorage = intel_alloc_renderbuffer_storage;
+   irb->Base.GetPointer = intel_get_pointer;
+   /* span routines set in alloc_storage function */
+
+   return &irb->Base;
+}
+
+
+/**
+ * Called via glBindFramebufferEXT().
+ */
+static void
+intel_bind_framebuffer(GLcontext * ctx, GLenum target,
+                       struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
+{
+   if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
+      intel_draw_buffer(ctx, fb);
+   }
+   else {
+      /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
+   }
+}
+
+
+/**
+ * Called via glFramebufferRenderbufferEXT().
+ */
+static void
+intel_framebuffer_renderbuffer(GLcontext * ctx,
+                               struct gl_framebuffer *fb,
+                               GLenum attachment, struct gl_renderbuffer *rb)
+{
+   DBG("Intel FramebufferRenderbuffer %u %u\n", fb->Name, rb ? rb->Name : 0);
+
+   intel_flush(ctx);
+
+   _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+   intel_draw_buffer(ctx, fb);
+}
+
+
+static GLboolean
+intel_update_wrapper(GLcontext *ctx, struct intel_renderbuffer *irb, 
+		     struct gl_texture_image *texImage)
+{
+   if (texImage->TexFormat == MESA_FORMAT_ARGB8888) {
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      DBG("Render to RGBA8 texture OK\n");
+   }
+   else if (texImage->TexFormat == MESA_FORMAT_XRGB8888) {
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      DBG("Render to XGBA8 texture OK\n");
+   }
+   else if (texImage->TexFormat == MESA_FORMAT_RGB565) {
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      DBG("Render to RGB5 texture OK\n");
+   }
+   else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) {
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      DBG("Render to ARGB1555 texture OK\n");
+   }
+   else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) {
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      DBG("Render to ARGB4444 texture OK\n");
+   }
+   else if (texImage->TexFormat == MESA_FORMAT_A8) {
+      irb->Base.DataType = GL_UNSIGNED_BYTE;
+      DBG("Render to A8 texture OK\n");
+   }
+   else if (texImage->TexFormat == MESA_FORMAT_Z16) {
+      irb->Base.DataType = GL_UNSIGNED_SHORT;
+      DBG("Render to DEPTH16 texture OK\n");
+   }
+   else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) {
+      irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+      DBG("Render to DEPTH_STENCIL texture OK\n");
+   }
+   else {
+      DBG("Render to texture BAD FORMAT %s\n",
+	  _mesa_get_format_name(texImage->TexFormat));
+      return GL_FALSE;
+   }
+
+   irb->Base.Format = texImage->TexFormat;
+
+   irb->Base.InternalFormat = texImage->InternalFormat;
+   irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat);
+   irb->Base.Width = texImage->Width;
+   irb->Base.Height = texImage->Height;
+
+   irb->Base.Delete = intel_delete_renderbuffer;
+   irb->Base.AllocStorage = intel_nop_alloc_storage;
+
+   return GL_TRUE;
+}
+
+
+/**
+ * When glFramebufferTexture[123]D is called this function sets up the
+ * gl_renderbuffer wrapper around the texture image.
+ * This will have the region info needed for hardware rendering.
+ */
+static struct intel_renderbuffer *
+intel_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
+{
+   const GLuint name = ~0;   /* not significant, but distinct for debugging */
+   struct intel_renderbuffer *irb;
+
+   /* make an intel_renderbuffer to wrap the texture image */
+   irb = CALLOC_STRUCT(intel_renderbuffer);
+   if (!irb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&irb->Base, name);
+   irb->Base.ClassID = INTEL_RB_CLASS;
+
+   if (!intel_update_wrapper(ctx, irb, texImage)) {
+      free(irb);
+      return NULL;
+   }
+
+   return irb;
+}
+
+
+/**
+ * Called by glFramebufferTexture[123]DEXT() (and other places) to
+ * prepare for rendering into texture memory.  This might be called
+ * many times to choose different texture levels, cube faces, etc
+ * before intel_finish_render_texture() is ever called.
+ */
+static void
+intel_render_texture(GLcontext * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct gl_texture_image *newImage
+      = att->Texture->Image[att->CubeMapFace][att->TextureLevel];
+   struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
+   struct intel_texture_image *intel_image;
+   GLuint dst_x, dst_y;
+
+   (void) fb;
+
+   ASSERT(newImage);
+
+   intel_image = intel_texture_image(newImage);
+   if (!intel_image->mt) {
+      /* Fallback on drawing to a texture that doesn't have a miptree
+       * (has a border, width/height 0, etc.)
+       */
+      _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+      _mesa_render_texture(ctx, fb, att);
+      return;
+   }
+   else if (!irb) {
+      irb = intel_wrap_texture(ctx, newImage);
+      if (irb) {
+         /* bind the wrapper to the attachment point */
+         _mesa_reference_renderbuffer(&att->Renderbuffer, &irb->Base);
+      }
+      else {
+         /* fallback to software rendering */
+         _mesa_render_texture(ctx, fb, att);
+         return;
+      }
+   }
+
+   if (!intel_update_wrapper(ctx, irb, newImage)) {
+       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+       _mesa_render_texture(ctx, fb, att);
+       return;
+   }
+
+   DBG("Begin render texture tid %lx tex=%u w=%d h=%d refcount=%d\n",
+       _glthread_GetID(),
+       att->Texture->Name, newImage->Width, newImage->Height,
+       irb->Base.RefCount);
+
+   /* point the renderbufer's region to the texture image region */
+   if (irb->region != intel_image->mt->region) {
+      if (irb->region)
+	 intel_region_release(&irb->region);
+      intel_region_reference(&irb->region, intel_image->mt->region);
+   }
+
+   /* compute offset of the particular 2D image within the texture region */
+   intel_miptree_get_image_offset(intel_image->mt,
+				  att->TextureLevel,
+				  att->CubeMapFace,
+				  att->Zoffset,
+				  &dst_x, &dst_y);
+
+   intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->region->pitch +
+					   dst_x) * intel_image->mt->cpp;
+   intel_image->mt->region->draw_x = dst_x;
+   intel_image->mt->region->draw_y = dst_y;
+   intel_image->used_as_render_target = GL_TRUE;
+
+   /* update drawing region, etc */
+   intel_draw_buffer(ctx, fb);
+}
+
+
+/**
+ * Called by Mesa when rendering to a texture is done.
+ */
+static void
+intel_finish_render_texture(GLcontext * ctx,
+                            struct gl_renderbuffer_attachment *att)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct gl_texture_object *tex_obj = att->Texture;
+   struct gl_texture_image *image =
+      tex_obj->Image[att->CubeMapFace][att->TextureLevel];
+   struct intel_texture_image *intel_image = intel_texture_image(image);
+
+   /* Flag that this image may now be validated into the object's miptree. */
+   intel_image->used_as_render_target = GL_FALSE;
+
+   /* Since we've (probably) rendered to the texture and will (likely) use
+    * it in the texture domain later on in this batchbuffer, flush the
+    * batch.  Once again, we wish for a domain tracker in libdrm to cover
+    * usage inside of a batchbuffer like GEM does in the kernel.
+    */
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+}
+
+/**
+ * Do additional "completeness" testing of a framebuffer object.
+ */
+static void
+intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+   const struct intel_renderbuffer *depthRb =
+      intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   const struct intel_renderbuffer *stencilRb =
+      intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   int i;
+
+   if (depthRb && stencilRb && stencilRb != depthRb) {
+      if (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE &&
+	  ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE &&
+	  (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Texture->Name ==
+	   ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Texture->Name)) {
+	 /* OK */
+      } else {
+	 /* we only support combined depth/stencil buffers, not separate
+	  * stencil buffers.
+	  */
+	 DBG("Only supports combined depth/stencil (found %s, %s)\n",
+	     depthRb ? _mesa_get_format_name(depthRb->Base.Format): "NULL",
+	     stencilRb ? _mesa_get_format_name(stencilRb->Base.Format): "NULL");
+	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+      }
+   }
+
+   for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+      if (rb == NULL)
+	 continue;
+
+      if (irb == NULL) {
+	 DBG("software rendering renderbuffer\n");
+	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+	 continue;
+      }
+
+      switch (irb->Base.Format) {
+      case MESA_FORMAT_ARGB8888:
+      case MESA_FORMAT_XRGB8888:
+      case MESA_FORMAT_RGB565:
+      case MESA_FORMAT_ARGB1555:
+      case MESA_FORMAT_ARGB4444:
+      case MESA_FORMAT_A8:
+	 break;
+      default:
+	 fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT;
+      }
+   }
+}
+
+
+/**
+ * Do one-time context initializations related to GL_EXT_framebuffer_object.
+ * Hook in device driver functions.
+ */
+void
+intel_fbo_init(struct intel_context *intel)
+{
+   intel->ctx.Driver.NewFramebuffer = intel_new_framebuffer;
+   intel->ctx.Driver.NewRenderbuffer = intel_new_renderbuffer;
+   intel->ctx.Driver.BindFramebuffer = intel_bind_framebuffer;
+   intel->ctx.Driver.FramebufferRenderbuffer = intel_framebuffer_renderbuffer;
+   intel->ctx.Driver.RenderTexture = intel_render_texture;
+   intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture;
+   intel->ctx.Driver.ResizeBuffers = intel_resize_buffers;
+   intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer;
+   intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+
+#if FEATURE_OES_EGL_image
+   intel->ctx.Driver.EGLImageTargetRenderbufferStorage =
+      intel_image_target_renderbuffer_storage;
+#endif   
+}
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
new file mode 100644
index 0000000000..028f657d12
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -0,0 +1,113 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_FBO_H
+#define INTEL_FBO_H
+
+#include "main/formats.h"
+#include "intel_screen.h"
+
+struct intel_context;
+
+/**
+ * Intel renderbuffer, derived from gl_renderbuffer.
+ */
+struct intel_renderbuffer
+{
+   struct gl_renderbuffer Base;
+   struct intel_region *region;
+};
+
+
+/**
+ * gl_renderbuffer is a base class which we subclass.  The Class field
+ * is used for simple run-time type checking.
+ */
+#define INTEL_RB_CLASS 0x12345678
+
+
+/**
+ * Return a gl_renderbuffer ptr casted to intel_renderbuffer.
+ * NULL will be returned if the rb isn't really an intel_renderbuffer.
+ * This is determined by checking the ClassID.
+ */
+static INLINE struct intel_renderbuffer *
+intel_renderbuffer(struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
+   if (irb && irb->Base.ClassID == INTEL_RB_CLASS) {
+      /*_mesa_warning(NULL, "Returning non-intel Rb\n");*/
+      return irb;
+   }
+   else
+      return NULL;
+}
+
+
+/**
+ * Return a framebuffer's renderbuffer, named by a BUFFER_x index.
+ */
+static INLINE struct intel_renderbuffer *
+intel_get_renderbuffer(struct gl_framebuffer *fb, int attIndex)
+{
+   if (attIndex >= 0)
+      return intel_renderbuffer(fb->Attachment[attIndex].Renderbuffer);
+   else
+      return NULL;
+}
+
+
+extern void
+intel_renderbuffer_set_region(struct intel_context *intel,
+			      struct intel_renderbuffer *irb,
+			      struct intel_region *region);
+
+
+extern struct intel_renderbuffer *
+intel_create_renderbuffer(gl_format format);
+
+
+extern void
+intel_fbo_init(struct intel_context *intel);
+
+
+extern void
+intel_flip_renderbuffers(struct gl_framebuffer *fb);
+
+
+static INLINE struct intel_region *
+intel_get_rb_region(struct gl_framebuffer *fb, GLuint attIndex)
+{
+   struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, attIndex);
+   if (irb)
+      return irb->region;
+   else
+      return NULL;
+}
+
+
+#endif /* INTEL_FBO_H */
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
new file mode 100644
index 0000000000..39ac0205fa
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -0,0 +1,461 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_tex_layout.h"
+#include "main/enums.h"
+
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+
+
+static GLenum
+target_to_target(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return GL_TEXTURE_CUBE_MAP_ARB;
+   default:
+      return target;
+   }
+}
+
+
+static struct intel_mipmap_tree *
+intel_miptree_create_internal(struct intel_context *intel,
+			      GLenum target,
+			      GLenum internal_format,
+			      GLuint first_level,
+			      GLuint last_level,
+			      GLuint width0,
+			      GLuint height0,
+			      GLuint depth0, GLuint cpp, GLuint compress_byte,
+			      uint32_t tiling)
+{
+   GLboolean ok;
+   struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
+
+   DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       _mesa_lookup_enum_by_nr(internal_format), 
+       first_level, last_level, mt);
+
+   mt->target = target_to_target(target);
+   mt->internal_format = internal_format;
+   mt->first_level = first_level;
+   mt->last_level = last_level;
+   mt->width0 = width0;
+   mt->height0 = height0;
+   mt->depth0 = depth0;
+   mt->cpp = compress_byte ? compress_byte : cpp;
+   mt->compressed = compress_byte ? 1 : 0;
+   mt->refcount = 1; 
+
+#ifdef I915
+   if (intel->is_945)
+      ok = i945_miptree_layout(intel, mt, tiling);
+   else
+      ok = i915_miptree_layout(intel, mt, tiling);
+#else
+   ok = brw_miptree_layout(intel, mt, tiling);
+#endif
+
+   if (!ok) {
+      free(mt);
+      DBG("%s not okay - returning NULL\n", __FUNCTION__);
+      return NULL;
+   }
+
+   return mt;
+}
+
+
+struct intel_mipmap_tree *
+intel_miptree_create(struct intel_context *intel,
+		     GLenum target,
+		     GLenum base_format,
+		     GLenum internal_format,
+		     GLuint first_level,
+		     GLuint last_level,
+		     GLuint width0,
+		     GLuint height0,
+		     GLuint depth0, GLuint cpp, GLuint compress_byte,
+		     GLboolean expect_accelerated_upload)
+{
+   struct intel_mipmap_tree *mt;
+   uint32_t tiling = I915_TILING_NONE;
+
+   if (intel->use_texture_tiling && compress_byte == 0) {
+      if (intel->gen >= 4 &&
+	  (base_format == GL_DEPTH_COMPONENT ||
+	   base_format == GL_DEPTH_STENCIL_EXT))
+	 tiling = I915_TILING_Y;
+      else if (width0 >= 64)
+	 tiling = I915_TILING_X;
+   }
+
+   mt = intel_miptree_create_internal(intel, target, internal_format,
+				      first_level, last_level, width0,
+				      height0, depth0, cpp, compress_byte,
+				      tiling);
+   /*
+    * pitch == 0 || height == 0  indicates the null texture
+    */
+   if (!mt || !mt->total_height) {
+      free(mt);
+      return NULL;
+   }
+
+   mt->region = intel_region_alloc(intel,
+				   tiling,
+				   mt->cpp,
+				   mt->total_width,
+				   mt->total_height,
+				   expect_accelerated_upload);
+
+   if (!mt->region) {
+       free(mt);
+       return NULL;
+   }
+
+   return mt;
+}
+
+
+struct intel_mipmap_tree *
+intel_miptree_create_for_region(struct intel_context *intel,
+				GLenum target,
+				GLenum internal_format,
+				GLuint first_level,
+				GLuint last_level,
+				struct intel_region *region,
+				GLuint depth0,
+				GLuint compress_byte)
+{
+   struct intel_mipmap_tree *mt;
+
+   mt = intel_miptree_create_internal(intel, target, internal_format,
+				      first_level, last_level,
+				      region->width, region->height, 1,
+				      region->cpp, compress_byte,
+				      I915_TILING_NONE);
+   if (!mt)
+      return mt;
+
+   intel_region_reference(&mt->region, region);
+
+   return mt;
+}
+
+void
+intel_miptree_reference(struct intel_mipmap_tree **dst,
+                        struct intel_mipmap_tree *src)
+{
+   src->refcount++;
+   *dst = src;
+   DBG("%s %p refcount now %d\n", __FUNCTION__, src, src->refcount);
+}
+
+
+void
+intel_miptree_release(struct intel_context *intel,
+                      struct intel_mipmap_tree **mt)
+{
+   if (!*mt)
+      return;
+
+   DBG("%s %p refcount will be %d\n", __FUNCTION__, *mt, (*mt)->refcount - 1);
+   if (--(*mt)->refcount <= 0) {
+      GLuint i;
+
+      DBG("%s deleting %p\n", __FUNCTION__, *mt);
+
+      intel_region_release(&((*mt)->region));
+
+      for (i = 0; i < MAX_TEXTURE_LEVELS; i++) {
+	 free((*mt)->level[i].x_offset);
+	 free((*mt)->level[i].y_offset);
+      }
+
+      free(*mt);
+   }
+   *mt = NULL;
+}
+
+
+/**
+ * Can the image be pulled into a unified mipmap tree?  This mirrors
+ * the completeness test in a lot of ways.
+ *
+ * Not sure whether I want to pass gl_texture_image here.
+ */
+GLboolean
+intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                          struct gl_texture_image *image)
+{
+   GLboolean isCompressed = _mesa_is_format_compressed(image->TexFormat);
+   struct intel_texture_image *intelImage = intel_texture_image(image);
+   GLuint level = intelImage->level;
+
+   /* Images with borders are never pulled into mipmap trees. */
+   if (image->Border)
+      return GL_FALSE;
+
+   if (image->InternalFormat != mt->internal_format ||
+       isCompressed != mt->compressed)
+      return GL_FALSE;
+
+   if (!isCompressed &&
+       !mt->compressed &&
+       _mesa_get_format_bytes(image->TexFormat) != mt->cpp)
+      return GL_FALSE;
+
+   /* Test image dimensions against the base level image adjusted for
+    * minification.  This will also catch images not present in the
+    * tree, changed targets, etc.
+    */
+   if (image->Width != mt->level[level].width ||
+       image->Height != mt->level[level].height ||
+       image->Depth != mt->level[level].depth)
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+void
+intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+			     GLuint level,
+			     GLuint nr_images,
+			     GLuint x, GLuint y,
+			     GLuint w, GLuint h, GLuint d)
+{
+   mt->level[level].width = w;
+   mt->level[level].height = h;
+   mt->level[level].depth = d;
+   mt->level[level].level_x = x;
+   mt->level[level].level_y = y;
+   mt->level[level].nr_images = nr_images;
+
+   DBG("%s level %d size: %d,%d,%d offset %d,%d\n", __FUNCTION__,
+       level, w, h, d, x, y);
+
+   assert(nr_images);
+   assert(!mt->level[level].x_offset);
+
+   mt->level[level].x_offset = malloc(nr_images * sizeof(GLuint));
+   mt->level[level].x_offset[0] = mt->level[level].level_x;
+   mt->level[level].y_offset = malloc(nr_images * sizeof(GLuint));
+   mt->level[level].y_offset[0] = mt->level[level].level_y;
+}
+
+
+void
+intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+			       GLuint level, GLuint img,
+			       GLuint x, GLuint y)
+{
+   if (img == 0 && level == 0)
+      assert(x == 0 && y == 0);
+
+   assert(img < mt->level[level].nr_images);
+
+   mt->level[level].x_offset[img] = mt->level[level].level_x + x;
+   mt->level[level].y_offset[img] = mt->level[level].level_y + y;
+
+   DBG("%s level %d img %d pos %d,%d\n",
+       __FUNCTION__, level, img,
+       mt->level[level].x_offset[img], mt->level[level].y_offset[img]);
+}
+
+
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+			       GLuint level, GLuint face, GLuint depth,
+			       GLuint *x, GLuint *y)
+{
+   if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) {
+      *x = mt->level[level].x_offset[face];
+      *y = mt->level[level].y_offset[face];
+   } else if (mt->target == GL_TEXTURE_3D) {
+      *x = mt->level[level].x_offset[depth];
+      *y = mt->level[level].y_offset[depth];
+   } else {
+      *x = mt->level[level].x_offset[0];
+      *y = mt->level[level].y_offset[0];
+   }
+}
+
+/**
+ * Map a teximage in a mipmap tree.
+ * \param row_stride  returns row stride in bytes
+ * \param image_stride  returns image stride in bytes (for 3D textures).
+ * \param image_offsets pointer to array of pixel offsets from the returned
+ *	  pointer to each depth image
+ * \return address of mapping
+ */
+GLubyte *
+intel_miptree_image_map(struct intel_context * intel,
+                        struct intel_mipmap_tree * mt,
+                        GLuint face,
+                        GLuint level,
+                        GLuint * row_stride, GLuint * image_offsets)
+{
+   GLuint x, y;
+   DBG("%s \n", __FUNCTION__);
+
+   if (row_stride)
+      *row_stride = mt->region->pitch * mt->cpp;
+
+   if (mt->target == GL_TEXTURE_3D) {
+      int i;
+
+      for (i = 0; i < mt->level[level].depth; i++) {
+
+	 intel_miptree_get_image_offset(mt, level, face, i,
+					&x, &y);
+	 image_offsets[i] = x + y * mt->region->pitch;
+      }
+
+      return intel_region_map(intel, mt->region);
+   } else {
+      assert(mt->level[level].depth == 1);
+      intel_miptree_get_image_offset(mt, level, face, 0,
+				     &x, &y);
+      image_offsets[0] = 0;
+
+      return intel_region_map(intel, mt->region) +
+	 (x + y * mt->region->pitch) * mt->cpp;
+   }
+}
+
+
+void
+intel_miptree_image_unmap(struct intel_context *intel,
+                          struct intel_mipmap_tree *mt)
+{
+   DBG("%s\n", __FUNCTION__);
+   intel_region_unmap(intel, mt->region);
+}
+
+
+/**
+ * Upload data for a particular image.
+ */
+void
+intel_miptree_image_data(struct intel_context *intel,
+			 struct intel_mipmap_tree *dst,
+			 GLuint face,
+			 GLuint level,
+			 void *src,
+			 GLuint src_row_pitch,
+			 GLuint src_image_pitch)
+{
+   const GLuint depth = dst->level[level].depth;
+   GLuint i;
+
+   DBG("%s: %d/%d\n", __FUNCTION__, face, level);
+   for (i = 0; i < depth; i++) {
+      GLuint dst_x, dst_y, height;
+
+      intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y);
+
+      height = dst->level[level].height;
+      if(dst->compressed)
+	 height = (height + 3) / 4;
+
+      intel_region_data(intel,
+			dst->region, 0, dst_x, dst_y,
+			src,
+			src_row_pitch,
+			0, 0,                             /* source x, y */
+			dst->level[level].width, height); /* width, height */
+
+      src = (char *)src + src_image_pitch * dst->cpp;
+   }
+}
+
+
+/**
+ * Copy mipmap image between trees
+ */
+void
+intel_miptree_image_copy(struct intel_context *intel,
+                         struct intel_mipmap_tree *dst,
+                         GLuint face, GLuint level,
+                         struct intel_mipmap_tree *src)
+{
+   GLuint width = src->level[level].width;
+   GLuint height = src->level[level].height;
+   GLuint depth = src->level[level].depth;
+   GLuint src_x, src_y, dst_x, dst_y;
+   GLuint i;
+   GLboolean success;
+
+   if (dst->compressed) {
+       GLuint align_w, align_h;
+
+       intel_get_texture_alignment_unit(dst->internal_format,
+                                        &align_w, &align_h);
+       height = (height + 3) / 4;
+       width = ALIGN(width, align_w);
+   }
+
+   intel_prepare_render(intel);
+
+   for (i = 0; i < depth; i++) {
+      intel_miptree_get_image_offset(src, level, face, i, &src_x, &src_y);
+      intel_miptree_get_image_offset(dst, level, face, i, &dst_x, &dst_y);
+      success = intel_region_copy(intel,
+				  dst->region, 0, dst_x, dst_y,
+				  src->region, 0, src_x, src_y,
+				  width, height, GL_FALSE,
+				  GL_COPY);
+      if (!success) {
+	 GLubyte *src_ptr, *dst_ptr;
+
+	 src_ptr = intel_region_map(intel, src->region);
+	 dst_ptr = intel_region_map(intel, dst->region);
+
+	 _mesa_copy_rect(dst_ptr,
+			 dst->cpp,
+			 dst->region->pitch,
+			 dst_x, dst_y, width, height,
+			 src_ptr,
+			 src->region->pitch,
+			 src_x, src_y);
+	 intel_region_unmap(intel, src->region);
+	 intel_region_unmap(intel, dst->region);
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
new file mode 100644
index 0000000000..21db2f4d3b
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h
@@ -0,0 +1,217 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_MIPMAP_TREE_H
+#define INTEL_MIPMAP_TREE_H
+
+#include "intel_regions.h"
+
+/* A layer on top of the intel_regions code which adds:
+ *
+ * - Code to size and layout a region to hold a set of mipmaps.
+ * - Query to determine if a new image fits in an existing tree.
+ * - More refcounting 
+ *     - maybe able to remove refcounting from intel_region?
+ * - ?
+ *
+ * The fixed mipmap layout of intel hardware where one offset
+ * specifies the position of all images in a mipmap hierachy
+ * complicates the implementation of GL texture image commands,
+ * compared to hardware where each image is specified with an
+ * independent offset.
+ *
+ * In an ideal world, each texture object would be associated with a
+ * single bufmgr buffer or 2d intel_region, and all the images within
+ * the texture object would slot into the tree as they arrive.  The
+ * reality can be a little messier, as images can arrive from the user
+ * with sizes that don't fit in the existing tree, or in an order
+ * where the tree layout cannot be guessed immediately.  
+ * 
+ * This structure encodes an idealized mipmap tree.  The GL image
+ * commands build these where possible, otherwise store the images in
+ * temporary system buffers.
+ */
+
+
+/**
+ * Describes the location of each texture image within a texture region.
+ */
+struct intel_mipmap_level
+{
+   /** Offset to this miptree level, used in computing x_offset. */
+   GLuint level_x;
+   /** Offset to this miptree level, used in computing y_offset. */
+   GLuint level_y;
+   GLuint width;
+   GLuint height;
+   /** Depth of the mipmap at this level: 1 for 1D/2D/CUBE, n for 3D. */
+   GLuint depth;
+   /** Number of images at this level: 1 for 1D/2D, 6 for CUBE, depth for 3D */
+   GLuint nr_images;
+
+   /** @{
+    * offsets from level_[xy] to the image for each cube face or depth
+    * level.
+    *
+    * Pretty much have to accept that hardware formats
+    * are going to be so diverse that there is no unified way to
+    * compute the offsets of depth/cube images within a mipmap level,
+    * so have to store them as a lookup table.
+    */
+   GLuint *x_offset, *y_offset;
+   /** @} */
+};
+
+struct intel_mipmap_tree
+{
+   /* Effectively the key:
+    */
+   GLenum target;
+   GLenum internal_format;
+
+   GLuint first_level;
+   GLuint last_level;
+
+   GLuint width0, height0, depth0; /**< Level zero image dimensions */
+   GLuint cpp;
+   GLboolean compressed;
+
+   /* Derived from the above:
+    */
+   GLuint total_width;
+   GLuint total_height;
+
+   /* Includes image offset tables:
+    */
+   struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
+
+   /* The data is held here:
+    */
+   struct intel_region *region;
+
+   /* These are also refcounted:
+    */
+   GLuint refcount;
+};
+
+
+
+struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel,
+                                               GLenum target,
+                                               GLenum base_format,
+                                               GLenum internal_format,
+                                               GLuint first_level,
+                                               GLuint last_level,
+                                               GLuint width0,
+                                               GLuint height0,
+                                               GLuint depth0,
+                                               GLuint cpp,
+                                               GLuint compress_byte,
+					       GLboolean expect_accelerated_upload);
+
+struct intel_mipmap_tree *
+intel_miptree_create_for_region(struct intel_context *intel,
+				GLenum target,
+				GLenum internal_format,
+				GLuint first_level,
+				GLuint last_level,
+				struct intel_region *region,
+				GLuint depth0,
+				GLuint compress_byte);
+
+int intel_miptree_pitch_align (struct intel_context *intel,
+			       struct intel_mipmap_tree *mt,
+			       uint32_t tiling,
+			       int pitch);
+
+void intel_miptree_reference(struct intel_mipmap_tree **dst,
+                             struct intel_mipmap_tree *src);
+
+void intel_miptree_release(struct intel_context *intel,
+                           struct intel_mipmap_tree **mt);
+
+/* Check if an image fits an existing mipmap tree layout
+ */
+GLboolean intel_miptree_match_image(struct intel_mipmap_tree *mt,
+                                    struct gl_texture_image *image);
+
+/* Return a pointer to an image within a tree.  Return image stride as
+ * well.
+ */
+GLubyte *intel_miptree_image_map(struct intel_context *intel,
+                                 struct intel_mipmap_tree *mt,
+                                 GLuint face,
+                                 GLuint level,
+                                 GLuint * row_stride, GLuint * image_stride);
+
+void intel_miptree_image_unmap(struct intel_context *intel,
+                               struct intel_mipmap_tree *mt);
+
+void
+intel_miptree_get_image_offset(struct intel_mipmap_tree *mt,
+			       GLuint level, GLuint face, GLuint depth,
+			       GLuint *x, GLuint *y);
+
+void intel_miptree_set_level_info(struct intel_mipmap_tree *mt,
+                                  GLuint level,
+                                  GLuint nr_images,
+                                  GLuint x, GLuint y,
+                                  GLuint w, GLuint h, GLuint d);
+
+void intel_miptree_set_image_offset(struct intel_mipmap_tree *mt,
+                                    GLuint level,
+                                    GLuint img, GLuint x, GLuint y);
+
+/* Upload an image into a tree
+ */
+void intel_miptree_image_data(struct intel_context *intel,
+                              struct intel_mipmap_tree *dst,
+                              GLuint face,
+                              GLuint level,
+                              void *src,
+                              GLuint src_row_pitch, GLuint src_image_pitch);
+
+/* Copy an image between two trees
+ */
+void intel_miptree_image_copy(struct intel_context *intel,
+                              struct intel_mipmap_tree *dst,
+                              GLuint face, GLuint level,
+                              struct intel_mipmap_tree *src);
+
+/* i915_mipmap_tree.c:
+ */
+GLboolean i915_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt,
+			      uint32_t tiling);
+GLboolean i945_miptree_layout(struct intel_context *intel,
+			      struct intel_mipmap_tree *mt,
+			      uint32_t tiling);
+GLboolean brw_miptree_layout(struct intel_context *intel,
+			     struct intel_mipmap_tree *mt,
+			     uint32_t tiling);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c
new file mode 100644
index 0000000000..cb088e4032
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel.c
@@ -0,0 +1,169 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/enums.h"
+#include "main/state.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "swrast/swrast.h"
+
+#include "intel_context.h"
+#include "intel_pixel.h"
+#include "intel_regions.h"
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+static GLenum
+effective_func(GLenum func, GLboolean src_alpha_is_one)
+{
+   if (src_alpha_is_one) {
+      if (func == GL_SRC_ALPHA)
+	 return GL_ONE;
+      if (func == GL_ONE_MINUS_SRC_ALPHA)
+	 return GL_ZERO;
+   }
+
+   return func;
+}
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glDraw/CopyPixels.
+ */
+GLboolean
+intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   if (ctx->FragmentProgram._Enabled) {
+      DBG("fallback due to fragment program\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Color.BlendEnabled &&
+       (effective_func(ctx->Color.BlendSrcRGB, src_alpha_is_one) != GL_ONE ||
+	effective_func(ctx->Color.BlendDstRGB, src_alpha_is_one) != GL_ZERO ||
+	ctx->Color.BlendEquationRGB != GL_FUNC_ADD ||
+	effective_func(ctx->Color.BlendSrcA, src_alpha_is_one) != GL_ONE ||
+	effective_func(ctx->Color.BlendDstA, src_alpha_is_one) != GL_ZERO ||
+	ctx->Color.BlendEquationA != GL_FUNC_ADD)) {
+      DBG("fallback due to blend\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Texture._EnabledUnits) {
+      DBG("fallback due to texturing\n");
+      return GL_FALSE;
+   }
+
+   if (!(ctx->Color.ColorMask[0][0] &&
+	 ctx->Color.ColorMask[0][1] &&
+	 ctx->Color.ColorMask[0][2] &&
+	 ctx->Color.ColorMask[0][3])) {
+      DBG("fallback due to color masking\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Color.AlphaEnabled) {
+      DBG("fallback due to alpha\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Depth.Test) {
+      DBG("fallback due to depth test\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Fog.Enabled) {
+      DBG("fallback due to fog\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->_ImageTransferState) {
+      DBG("fallback due to image transfer\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Stencil._Enabled) {
+      DBG("fallback due to image stencil\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->RenderMode != GL_RENDER) {
+      DBG("fallback due to render mode\n");
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+/* The intel_region struct doesn't really do enough to capture the
+ * format of the pixels in the region.  For now this code assumes that
+ * the region is a display surface and hence is either ARGB8888 or
+ * RGB565.
+ * XXX FBO: If we'd pass in the intel_renderbuffer instead of region, we'd
+ * know the buffer's pixel format.
+ *
+ * \param format  as given to glDraw/ReadPixels
+ * \param type  as given to glDraw/ReadPixels
+ */
+GLboolean
+intel_check_blit_format(struct intel_region * region,
+                        GLenum format, GLenum type)
+{
+   if (region->cpp == 4 &&
+       (type == GL_UNSIGNED_INT_8_8_8_8_REV ||
+        type == GL_UNSIGNED_BYTE) && format == GL_BGRA) {
+      return GL_TRUE;
+   }
+
+   if (region->cpp == 2 &&
+       type == GL_UNSIGNED_SHORT_5_6_5_REV && format == GL_BGR) {
+      return GL_TRUE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s: bad format for blit (cpp %d, type %s format %s)\n",
+              __FUNCTION__, region->cpp,
+              _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+
+   return GL_FALSE;
+}
+
+void
+intelInitPixelFuncs(struct dd_function_table *functions)
+{
+   functions->Accum = _swrast_Accum;
+   if (!getenv("INTEL_NO_BLIT")) {
+      functions->Bitmap = intelBitmap;
+      functions->CopyPixels = intelCopyPixels;
+      functions->DrawPixels = intelDrawPixels;
+   }
+   functions->ReadPixels = intelReadPixels;
+}
+
diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h
new file mode 100644
index 0000000000..743b6497c5
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_PIXEL_H
+#define INTEL_PIXEL_H
+
+#include "main/mtypes.h"
+
+void intelInitPixelFuncs(struct dd_function_table *functions);
+GLboolean intel_check_blit_fragment_ops(GLcontext * ctx,
+					GLboolean src_alpha_is_one);
+
+GLboolean intel_check_blit_format(struct intel_region *region,
+                                  GLenum format, GLenum type);
+
+
+void intelReadPixels(GLcontext * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format, GLenum type,
+                     const struct gl_pixelstore_attrib *pack,
+                     GLvoid * pixels);
+
+void intelDrawPixels(GLcontext * ctx,
+                     GLint x, GLint y,
+                     GLsizei width, GLsizei height,
+                     GLenum format,
+                     GLenum type,
+                     const struct gl_pixelstore_attrib *unpack,
+                     const GLvoid * pixels);
+
+void intelCopyPixels(GLcontext * ctx,
+                     GLint srcx, GLint srcy,
+                     GLsizei width, GLsizei height,
+                     GLint destx, GLint desty, GLenum type);
+
+void intelBitmap(GLcontext * ctx,
+		 GLint x, GLint y,
+		 GLsizei width, GLsizei height,
+		 const struct gl_pixelstore_attrib *unpack,
+		 const GLubyte * pixels);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
new file mode 100644
index 0000000000..076fee89bd
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -0,0 +1,522 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/colormac.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/bufferobj.h"
+#include "main/polygon.h"
+#include "main/pixelstore.h"
+#include "main/polygon.h"
+#include "main/state.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "main/texparam.h"
+#include "main/varray.h"
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/viewport.h"
+#include "shader/arbprogram.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_blit.h"
+#include "intel_regions.h"
+#include "intel_buffers.h"
+#include "intel_pixel.h"
+#include "intel_reg.h"
+
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+
+/* Unlike the other intel_pixel_* functions, the expectation here is
+ * that the incoming data is not in a PBO.  With the XY_TEXT blit
+ * method, there's no benefit haveing it in a PBO, but we could
+ * implement a path based on XY_MONO_SRC_COPY_BLIT which might benefit
+ * PBO bitmaps.  I think they are probably pretty rare though - I
+ * wonder if Xgl uses them?
+ */
+static const GLubyte *map_pbo( GLcontext *ctx,
+			       GLsizei width, GLsizei height,
+			       const struct gl_pixelstore_attrib *unpack,
+			       const GLubyte *bitmap )
+{
+   GLubyte *buf;
+
+   if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+				  GL_COLOR_INDEX, GL_BITMAP,
+				  (GLvoid *) bitmap)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,"glBitmap(invalid PBO access)");
+      return NULL;
+   }
+
+   buf = (GLubyte *) ctx->Driver.MapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+					   GL_READ_ONLY_ARB,
+					   unpack->BufferObj);
+   if (!buf) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "glBitmap(PBO is mapped)");
+      return NULL;
+   }
+
+   return ADD_POINTERS(buf, bitmap);
+}
+
+static GLboolean test_bit( const GLubyte *src, GLuint bit )
+{
+   return (src[bit/8] & (1<<(bit % 8))) ? 1 : 0;
+}
+
+static void set_bit( GLubyte *dest, GLuint bit )
+{
+   dest[bit/8] |= 1 << (bit % 8);
+}
+
+/* Extract a rectangle's worth of data from the bitmap.  Called
+ * per chunk of HW-sized bitmap.
+ */
+static GLuint get_bitmap_rect(GLsizei width, GLsizei height,
+			      const struct gl_pixelstore_attrib *unpack,
+			      const GLubyte *bitmap,
+			      GLuint x, GLuint y, 
+			      GLuint w, GLuint h,
+			      GLubyte *dest,
+			      GLuint row_align,
+			      GLboolean invert)
+{
+   GLuint src_offset = (x + unpack->SkipPixels) & 0x7;
+   GLuint mask = unpack->LsbFirst ? 0 : 7;
+   GLuint bit = 0;
+   GLint row, col;
+   GLint first, last;
+   GLint incr;
+   GLuint count = 0;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      printf("%s %d,%d %dx%d bitmap %dx%d skip %d src_offset %d mask %d\n",
+		   __FUNCTION__, x,y,w,h,width,height,unpack->SkipPixels, src_offset, mask);
+
+   if (invert) {
+      first = h-1;
+      last = 0;
+      incr = -1;
+   }
+   else {
+      first = 0;
+      last = h-1;
+      incr = 1;
+   }
+
+   /* Require that dest be pre-zero'd.
+    */
+   for (row = first; row != (last+incr); row += incr) {
+      const GLubyte *rowsrc = _mesa_image_address2d(unpack, bitmap, 
+						    width, height, 
+						    GL_COLOR_INDEX, GL_BITMAP, 
+						    y + row, x);
+
+      for (col = 0; col < w; col++, bit++) {
+	 if (test_bit(rowsrc, (col + src_offset) ^ mask)) {
+	    set_bit(dest, bit ^ 7);
+	    count++;
+	 }
+      }
+
+      if (row_align)
+	 bit = ALIGN(bit, row_align);
+   }
+
+   return count;
+}
+
+/**
+ * Returns the low Y value of the vertical range given, flipped according to
+ * whether the framebuffer is or not.
+ */
+static INLINE int
+y_flip(struct gl_framebuffer *fb, int y, int height)
+{
+   if (fb->Name != 0)
+      return y;
+   else
+      return fb->Height - y - height;
+}
+
+/*
+ * Render a bitmap.
+ */
+static GLboolean
+do_blit_bitmap( GLcontext *ctx, 
+		GLint dstx, GLint dsty,
+		GLsizei width, GLsizei height,
+		const struct gl_pixelstore_attrib *unpack,
+		const GLubyte *bitmap )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst = intel_drawbuf_region(intel);
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   GLfloat tmpColor[4];
+   GLubyte ubcolor[4];
+   GLuint color;
+   GLsizei bitmap_width = width;
+   GLsizei bitmap_height = height;
+   GLint px, py;
+   GLuint stipple[32];
+   GLint orig_dstx = dstx;
+   GLint orig_dsty = dsty;
+
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
+
+   if (ctx->Depth.Test) {
+      /* The blit path produces incorrect results when depth testing is on.
+       * It seems the blit Z coord is always 1.0 (the far plane) so fragments
+       * will likely be obscured by other, closer geometry.
+       */
+      return GL_FALSE;
+   }
+
+   if (!dst)
+       return GL_FALSE;
+
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+	 return GL_TRUE;	/* even though this is an error, we're done */
+   }
+
+   COPY_4V(tmpColor, ctx->Current.RasterColor);
+
+   if (NEED_SECONDARY_COLOR(ctx)) {
+       ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor);
+   }
+
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[0], tmpColor[0]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[1], tmpColor[1]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]);
+   UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]);
+
+   if (dst->cpp == 2)
+      color = PACK_COLOR_565(ubcolor[0], ubcolor[1], ubcolor[2]);
+   else
+      color = PACK_COLOR_8888(ubcolor[3], ubcolor[0], ubcolor[1], ubcolor[2]);
+
+   if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F))
+      return GL_FALSE;
+
+   intel_prepare_render(intel);
+
+   /* Clip to buffer bounds and scissor. */
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+			     fb->_Xmax, fb->_Ymax,
+			     &dstx, &dsty, &width, &height))
+      goto out;
+
+   dsty = y_flip(fb, dsty, height);
+
+#define DY 32
+#define DX 32
+
+   /* Chop it all into chunks that can be digested by hardware: */
+   for (py = 0; py < height; py += DY) {
+      for (px = 0; px < width; px += DX) {
+	 int h = MIN2(DY, height - py);
+	 int w = MIN2(DX, width - px);
+	 GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
+	 GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
+	    ctx->Color.LogicOp : GL_COPY;
+
+	 assert(sz <= sizeof(stipple));
+	 memset(stipple, 0, sz);
+
+	 /* May need to adjust this when padding has been introduced in
+	  * sz above:
+	  *
+	  * Have to translate destination coordinates back into source
+	  * coordinates.
+	  */
+	 if (get_bitmap_rect(bitmap_width, bitmap_height, unpack,
+			     bitmap,
+			     -orig_dstx + (dstx + px),
+			     -orig_dsty + y_flip(fb, dsty + py, h),
+			     w, h,
+			     (GLubyte *)stipple,
+			     8,
+			     fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0)
+	    continue;
+
+	 if (!intelEmitImmediateColorExpandBlit(intel,
+						dst->cpp,
+						(GLubyte *)stipple,
+						sz,
+						color,
+						dst->pitch,
+						dst->buffer,
+						0,
+						dst->tiling,
+						dstx + px,
+						dsty + py,
+						w, h,
+						logic_op)) {
+	    return GL_FALSE;
+	 }
+      }
+   }
+out:
+
+   if (INTEL_DEBUG & DEBUG_SYNC)
+      intel_batchbuffer_flush(intel->batch);
+
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+
+   intel_check_front_buffer_rendering(intel);
+
+   return GL_TRUE;
+}
+
+static GLboolean
+intel_texture_bitmap(GLcontext * ctx,
+		     GLint dst_x, GLint dst_y,
+		     GLsizei width, GLsizei height,
+		     const struct gl_pixelstore_attrib *unpack,
+		     const GLubyte *bitmap)
+{
+   struct intel_context *intel = intel_context(ctx);
+   static const char *fp =
+      "!!ARBfp1.0\n"
+      "TEMP val;\n"
+      "PARAM color=program.local[0];\n"
+      "TEX val, fragment.texcoord[0], texture[0], 2D;\n"
+      "ADD val, val.wwww, {-.5, -.5, -.5, -.5};\n"
+      "KIL val;\n"
+      "MOV result.color, color;\n"
+      "END\n";
+   GLuint texname;
+   GLfloat vertices[4][4];
+   GLint old_active_texture;
+   GLubyte *a8_bitmap;
+   GLfloat dst_z;
+
+   /* We need a fragment program for the KIL effect */
+   if (!ctx->Extensions.ARB_fragment_program ||
+       !ctx->Extensions.ARB_vertex_program) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr,
+		 "glBitmap fallback: No fragment/vertex program support\n");
+      return GL_FALSE;
+   }
+
+   /* We're going to mess with texturing with no regard to existing texture
+    * state, so if there is some set up we have to bail.
+    */
+   if (ctx->Texture._EnabledUnits != 0) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glBitmap fallback: texturing enabled\n");
+      return GL_FALSE;
+   }
+
+   /* Can't do textured DrawPixels with a fragment program, unless we were
+    * to generate a new program that sampled our texture and put the results
+    * in the fragment color before the user's program started.
+    */
+   if (ctx->FragmentProgram.Enabled) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glBitmap fallback: fragment program enabled\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->VertexProgram.Enabled) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glBitmap fallback: vertex program enabled\n");
+      return GL_FALSE;
+   }
+
+   if (!ctx->Extensions.ARB_texture_non_power_of_two &&
+       (!is_power_of_two(width) || !is_power_of_two(height))) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr,
+		 "glBitmap() fallback: NPOT texture\n");
+      return GL_FALSE;
+   }
+
+   if (ctx->Fog.Enabled) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glBitmap() fallback: fog\n");
+      return GL_FALSE;
+   }
+
+   /* Check that we can load in a texture this big. */
+   if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
+       height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glBitmap fallback: bitmap too large (%dx%d)\n",
+		 width, height);
+      return GL_FALSE;
+   }
+
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      bitmap = map_pbo(ctx, width, height, unpack, bitmap);
+      if (bitmap == NULL)
+	 return GL_TRUE;	/* even though this is an error, we're done */
+   }
+
+   /* Convert the A1 bitmap to an A8 format suitable for glTexImage */
+   a8_bitmap = calloc(1, width * height);
+   _mesa_expand_bitmap(width, height, unpack, bitmap, a8_bitmap, width, 0xff);
+
+   if (_mesa_is_bufferobj(unpack->BufferObj)) {
+      /* done with PBO so unmap it now */
+      ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                              unpack->BufferObj);
+   }
+
+   /* Save GL state before we start setting up our drawing */
+   _mesa_PushAttrib(GL_ENABLE_BIT | GL_CURRENT_BIT | GL_POLYGON_BIT |
+                    GL_TEXTURE_BIT | GL_VIEWPORT_BIT);
+   _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT |
+			  GL_CLIENT_PIXEL_STORE_BIT);
+   old_active_texture = ctx->Texture.CurrentUnit;
+
+   _mesa_Disable(GL_POLYGON_STIPPLE);
+   _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+
+   /* Upload our bitmap data to an alpha texture */
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+   _mesa_Enable(GL_TEXTURE_2D);
+   _mesa_GenTextures(1, &texname);
+   _mesa_BindTexture(GL_TEXTURE_2D, texname);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+   _mesa_PixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
+   _mesa_PixelStorei(GL_UNPACK_LSB_FIRST, GL_FALSE);
+   _mesa_PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+   _mesa_PixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
+   _mesa_PixelStorei(GL_UNPACK_SKIP_ROWS, 0);
+   _mesa_PixelStorei(GL_UNPACK_ALIGNMENT, 1);
+   _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, width, height, 0,
+		    GL_ALPHA, GL_UNSIGNED_BYTE, a8_bitmap);
+   free(a8_bitmap);
+
+   meta_set_fragment_program(&intel->meta, &intel->meta.bitmap_fp, fp);
+   _mesa_ProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0,
+				     ctx->Current.RasterColor);
+   meta_set_passthrough_vertex_program(&intel->meta);
+   meta_set_passthrough_transform(&intel->meta);
+
+   /* convert rasterpos Z from [0,1] to NDC coord in [-1,1] */
+   dst_z = -1.0 + 2.0 * ctx->Current.RasterPos[2];
+
+   /* RasterPos[2] already takes into account the DepthRange mapping. */
+   _mesa_DepthRange(0.0, 1.0);
+
+   vertices[0][0] = dst_x;
+   vertices[0][1] = dst_y;
+   vertices[0][2] = dst_z;
+   vertices[0][3] = 1.0;
+   vertices[1][0] = dst_x + width;
+   vertices[1][1] = dst_y;
+   vertices[1][2] = dst_z;
+   vertices[1][3] = 1.0;
+   vertices[2][0] = dst_x + width;
+   vertices[2][1] = dst_y + height;
+   vertices[2][2] = dst_z;
+   vertices[2][3] = 1.0;
+   vertices[3][0] = dst_x;
+   vertices[3][1] = dst_y + height;
+   vertices[3][2] = dst_z;
+   vertices[3][3] = 1.0;
+
+   _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices);
+   _mesa_Enable(GL_VERTEX_ARRAY);
+   meta_set_default_texrect(&intel->meta);
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   meta_restore_texcoords(&intel->meta);
+   meta_restore_transform(&intel->meta);
+   meta_restore_fragment_program(&intel->meta);
+   meta_restore_vertex_program(&intel->meta);
+
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
+   _mesa_PopClientAttrib();
+   _mesa_PopAttrib();
+
+   _mesa_DeleteTextures(1, &texname);
+
+   return GL_TRUE;
+}
+
+/* There are a large number of possible ways to implement bitmap on
+ * this hardware, most of them have some sort of drawback.  Here are a
+ * few that spring to mind:
+ * 
+ * Blit:
+ *    - XY_MONO_SRC_BLT_CMD
+ *         - use XY_SETUP_CLIP_BLT for cliprect clipping.
+ *    - XY_TEXT_BLT
+ *    - XY_TEXT_IMMEDIATE_BLT
+ *         - blit per cliprect, subject to maximum immediate data size.
+ *    - XY_COLOR_BLT 
+ *         - per pixel or run of pixels
+ *    - XY_PIXEL_BLT
+ *         - good for sparse bitmaps
+ *
+ * 3D engine:
+ *    - Point per pixel
+ *    - Translate bitmap to an alpha texture and render as a quad
+ *    - Chop bitmap up into 32x32 squares and render w/polygon stipple.
+ */
+void
+intelBitmap(GLcontext * ctx,
+	    GLint x, GLint y,
+	    GLsizei width, GLsizei height,
+	    const struct gl_pixelstore_attrib *unpack,
+	    const GLubyte * pixels)
+{
+   if (do_blit_bitmap(ctx, x, y, width, height,
+                          unpack, pixels))
+      return;
+
+   if (intel_texture_bitmap(ctx, x, y, width, height,
+			    unpack, pixels))
+      return;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   _swrast_Bitmap(ctx, x, y, width, height, unpack, pixels);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
new file mode 100644
index 0000000000..2008a4c2be
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c
@@ -0,0 +1,214 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "main/mtypes.h"
+#include "drivers/common/meta.h"
+
+#include "intel_context.h"
+#include "intel_buffers.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_fbo.h"
+
+#define FILE_DEBUG_FLAG DEBUG_PIXEL
+
+static struct intel_region *
+copypix_src_region(struct intel_context *intel, GLenum type)
+{
+   struct intel_renderbuffer *depth;
+
+   depth = (struct intel_renderbuffer *)
+      &intel->ctx.DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+
+   switch (type) {
+   case GL_COLOR:
+      return intel_readbuf_region(intel);
+   case GL_DEPTH:
+      /* Don't think this is really possible execpt at 16bpp, when we
+       * have no stencil. */
+      if (depth && depth->region->cpp == 2)
+         return depth->region;
+   case GL_STENCIL:
+      /* Don't think this is really possible. */
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+      /* Does it matter whether it is stencil/depth or depth/stencil?
+       */
+      return depth->region;
+   default:
+      break;
+   }
+
+   return NULL;
+}
+
+
+/**
+ * Check if any fragment operations are in effect which might effect
+ * glCopyPixels.  Differs from intel_check_blit_fragment_ops in that
+ * we allow Scissor.
+ */
+static GLboolean
+intel_check_copypixel_blit_fragment_ops(GLcontext * ctx)
+{
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   /* Could do logicop with the blitter: 
+    */
+   return !(ctx->_ImageTransferState ||
+            ctx->Color.AlphaEnabled ||
+            ctx->Depth.Test ||
+            ctx->Fog.Enabled ||
+            ctx->Stencil._Enabled ||
+            !ctx->Color.ColorMask[0][0] ||
+            !ctx->Color.ColorMask[0][1] ||
+            !ctx->Color.ColorMask[0][2] ||
+            !ctx->Color.ColorMask[0][3] ||
+            ctx->Texture._EnabledUnits ||
+	    ctx->FragmentProgram._Enabled ||
+	    ctx->Color.BlendEnabled);
+}
+
+
+/**
+ * CopyPixels with the blitter.  Don't support zooming, pixel transfer, etc.
+ */
+static GLboolean
+do_blit_copypixels(GLcontext * ctx,
+                   GLint srcx, GLint srcy,
+                   GLsizei width, GLsizei height,
+                   GLint dstx, GLint dsty, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *dst;
+   struct intel_region *src;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   struct gl_framebuffer *read_fb = ctx->ReadBuffer;
+   GLint orig_dstx;
+   GLint orig_dsty;
+   GLint orig_srcx;
+   GLint orig_srcy;
+   GLboolean flip = GL_FALSE;
+
+   if (type == GL_DEPTH || type == GL_STENCIL) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glCopyPixels() fallback: GL_DEPTH || GL_STENCIL\n");
+      return GL_FALSE;
+   }
+
+   /* Update draw buffer bounds */
+   _mesa_update_state(ctx);
+
+   /* Copypixels can be more than a straight copy.  Ensure all the
+    * extra operations are disabled:
+    */
+   if (!intel_check_copypixel_blit_fragment_ops(ctx) ||
+       ctx->Pixel.ZoomX != 1.0F || ctx->Pixel.ZoomY != 1.0F)
+      return GL_FALSE;
+
+   intel_prepare_render(intel);
+
+   dst = intel_drawbuf_region(intel);
+   src = copypix_src_region(intel, type);
+
+   if (!src || !dst)
+      return GL_FALSE;
+
+   intel_flush(&intel->ctx);
+
+   /* Clip to destination buffer. */
+   orig_dstx = dstx;
+   orig_dsty = dsty;
+   if (!_mesa_clip_to_region(fb->_Xmin, fb->_Ymin,
+			     fb->_Xmax, fb->_Ymax,
+			     &dstx, &dsty, &width, &height))
+      goto out;
+   /* Adjust src coords for our post-clipped destination origin */
+   srcx += dstx - orig_dstx;
+   srcy += dsty - orig_dsty;
+
+   /* Clip to source buffer. */
+   orig_srcx = srcx;
+   orig_srcy = srcy;
+   if (!_mesa_clip_to_region(0, 0,
+			     read_fb->Width, read_fb->Height,
+			     &srcx, &srcy, &width, &height))
+      goto out;
+   /* Adjust dst coords for our post-clipped source origin */
+   dstx += srcx - orig_srcx;
+   dsty += srcy - orig_srcy;
+
+   /* Flip dest Y if it's a window system framebuffer. */
+   if (fb->Name == 0) {
+      /* copypixels to a window system framebuffer */
+      dsty = fb->Height - dsty - height;
+      flip = !flip;
+   }
+
+   /* Flip source Y if it's a window system framebuffer. */
+   if (read_fb->Name == 0) {
+      srcy = read_fb->Height - srcy - height;
+      flip = !flip;
+   }
+
+   if (!intel_region_copy(intel,
+			  dst, 0, dstx, dsty,
+			  src, 0, srcx, srcy,
+			  width, height, flip,
+			  ctx->Color.ColorLogicOpEnabled ?
+			  ctx->Color.LogicOp : GL_COPY)) {
+      DBG("%s: blit failure\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+out:
+   intel_check_front_buffer_rendering(intel);
+
+   DBG("%s: success\n", __FUNCTION__);
+   return GL_TRUE;
+}
+
+
+void
+intelCopyPixels(GLcontext * ctx,
+                GLint srcx, GLint srcy,
+                GLsizei width, GLsizei height,
+                GLint destx, GLint desty, GLenum type)
+{
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (do_blit_copypixels(ctx, srcx, srcy, width, height, destx, desty, type))
+      return;
+
+   /* this will use swrast if needed */
+   _mesa_meta_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
new file mode 100644
index 0000000000..a40b232fff
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c
@@ -0,0 +1,279 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portionsalloc
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mtypes.h"
+#include "main/teximage.h"
+#include "main/texenv.h"
+#include "main/texobj.h"
+#include "main/texstate.h"
+#include "main/texparam.h"
+#include "main/varray.h"
+#include "main/attrib.h"
+#include "main/enable.h"
+#include "main/buffers.h"
+#include "main/fbobject.h"
+#include "main/depth.h"
+#include "main/hash.h"
+#include "main/blend.h"
+#include "swrast/swrast.h"
+#include "drivers/common/meta.h"
+
+#include "intel_context.h"
+#include "intel_pixel.h"
+#include "intel_fbo.h"
+
+
+/** XXX compare perf of this vs. _mesa_meta_DrawPixels(STENCIL) */
+static GLboolean
+intel_stencil_drawpixels(GLcontext * ctx,
+			 GLint x, GLint y,
+			 GLsizei width, GLsizei height,
+			 GLenum format,
+			 GLenum type,
+			 const struct gl_pixelstore_attrib *unpack,
+			 const GLvoid *pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLuint texname, rb_name, fb_name, old_fb_name;
+   GLfloat vertices[4][2];
+   struct intel_renderbuffer *irb;
+   struct intel_renderbuffer *depth_irb;
+   struct gl_pixelstore_attrib old_unpack;
+   GLstencil *stencil_pixels;
+   int row, y1, y2;
+   GLint old_active_texture;
+   GLboolean rendering_to_fbo = ctx->DrawBuffer->Name != 0;
+
+   if (format != GL_STENCIL_INDEX)
+      return GL_FALSE;
+
+   /* If there's nothing to write, we're done. */
+   if (ctx->Stencil.WriteMask[0] == 0)
+      return GL_TRUE;
+
+   /* Can't do a per-bit writemask while treating stencil as rgba data. */
+   if ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+		 "stencil mask enabled\n");
+      return GL_FALSE;
+   }
+
+   /* We don't support stencil testing/ops here */
+   if (ctx->Stencil._Enabled)
+      return GL_FALSE;
+
+   /* We use FBOs for our wrapping of the depthbuffer into a color
+    * destination.
+    */
+   if (!ctx->Extensions.EXT_framebuffer_object)
+      return GL_FALSE;
+
+   /* We're going to mess with texturing with no regard to existing texture
+    * state, so if there is some set up we have to bail.
+    */
+   if (ctx->Texture._EnabledUnits != 0) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+		 "texturing enabled\n");
+      return GL_FALSE;
+   }
+
+   /* Can't do textured DrawPixels with a fragment program, unless we were
+    * to generate a new program that sampled our texture and put the results
+    * in the fragment color before the user's program started.
+    */
+   if (ctx->FragmentProgram.Enabled) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+		 "fragment program enabled\n");
+      return GL_FALSE;
+   }
+
+   /* Check that we can load in a texture this big. */
+   if (width > (1 << (ctx->Const.MaxTextureLevels - 1)) ||
+       height > (1 << (ctx->Const.MaxTextureLevels - 1))) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "glDrawPixels(STENCIL_INDEX) fallback: "
+		 "bitmap too large (%dx%d)\n",
+		 width, height);
+      return GL_FALSE;
+   }
+
+   if (!ctx->Extensions.ARB_texture_non_power_of_two &&
+       (!is_power_of_two(width) || !is_power_of_two(height))) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr,
+		 "glDrawPixels(GL_STENCIL_INDEX) fallback: NPOT texture\n");
+      return GL_FALSE;
+   }
+
+   _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT |
+		    GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+   _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT);
+   old_fb_name = ctx->DrawBuffer->Name;
+   old_active_texture = ctx->Texture.CurrentUnit;
+
+   _mesa_Disable(GL_POLYGON_STIPPLE);
+   _mesa_Disable(GL_DEPTH_TEST);
+   _mesa_Disable(GL_STENCIL_TEST);
+
+   /* Unpack the supplied stencil values into a ubyte buffer. */
+   assert(sizeof(GLstencil) == sizeof(GLubyte));
+   stencil_pixels = malloc(width * height * sizeof(GLstencil));
+   for (row = 0; row < height; row++) {
+      GLvoid *source = _mesa_image_address2d(unpack, pixels,
+					     width, height,
+					     GL_COLOR_INDEX, type,
+					     row, 0);
+      _mesa_unpack_stencil_span(ctx, width, GL_UNSIGNED_BYTE,
+				stencil_pixels +
+				row * width * sizeof(GLstencil),
+				type, source, unpack, ctx->_ImageTransferState);
+   }
+
+   /* Take the current depth/stencil renderbuffer, and make a new one wrapping
+    * it which will be treated as GL_RGBA8 so we can render to it as a color
+    * buffer.
+    */
+   depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
+   irb = intel_create_renderbuffer(MESA_FORMAT_ARGB8888);
+   irb->Base.Width = depth_irb->Base.Width;
+   irb->Base.Height = depth_irb->Base.Height;
+   intel_renderbuffer_set_region(intel, irb, depth_irb->region);
+
+   /* Create a name for our renderbuffer, which lets us use other mesa
+    * rb functions for convenience.
+    */
+   _mesa_GenRenderbuffersEXT(1, &rb_name);
+   irb->Base.RefCount++;
+   _mesa_HashInsert(ctx->Shared->RenderBuffers, rb_name, &irb->Base);
+
+   /* Bind the new renderbuffer to the color attachment point. */
+   _mesa_GenFramebuffersEXT(1, &fb_name);
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name);
+   _mesa_FramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT,
+				    GL_COLOR_ATTACHMENT0_EXT,
+				    GL_RENDERBUFFER_EXT,
+				    rb_name);
+   /* Choose to render to the color attachment. */
+   _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
+
+   _mesa_DepthMask(GL_FALSE);
+   _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_TRUE);
+
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB);
+   _mesa_Enable(GL_TEXTURE_2D);
+   _mesa_GenTextures(1, &texname);
+   _mesa_BindTexture(GL_TEXTURE_2D, texname);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+   _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+   _mesa_TexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
+   old_unpack = ctx->Unpack;
+   ctx->Unpack = ctx->DefaultPacking;
+   _mesa_TexImage2D(GL_TEXTURE_2D, 0, GL_INTENSITY, width, height, 0,
+		    GL_RED, GL_UNSIGNED_BYTE, stencil_pixels);
+   ctx->Unpack = old_unpack;
+   free(stencil_pixels);
+
+   meta_set_passthrough_transform(&intel->meta);
+
+   /* Since we're rendering to the framebuffer as if it was an FBO,
+    * if it's the window system we have to flip the coordinates.
+    */
+   if (rendering_to_fbo) {
+      y1 = y;
+      y2 = y + height * ctx->Pixel.ZoomY;
+   } else {
+      y1 = irb->Base.Height - (y + height * ctx->Pixel.ZoomY);
+      y2 = irb->Base.Height - y;
+   }
+   vertices[0][0] = x;
+   vertices[0][1] = y1;
+   vertices[1][0] = x + width * ctx->Pixel.ZoomX;
+   vertices[1][1] = y1;
+   vertices[2][0] = x + width * ctx->Pixel.ZoomX;
+   vertices[2][1] = y2;
+   vertices[3][0] = x;
+   vertices[3][1] = y2;
+
+   _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices);
+   _mesa_Enable(GL_VERTEX_ARRAY);
+   meta_set_default_texrect(&intel->meta);
+
+   _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
+
+   meta_restore_texcoords(&intel->meta);
+   meta_restore_transform(&intel->meta);
+
+   _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture);
+   _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, old_fb_name);
+
+   _mesa_PopClientAttrib();
+   _mesa_PopAttrib();
+
+   _mesa_DeleteTextures(1, &texname);
+   _mesa_DeleteFramebuffersEXT(1, &fb_name);
+   _mesa_DeleteRenderbuffersEXT(1, &rb_name);
+
+   return GL_TRUE;
+}
+
+void
+intelDrawPixels(GLcontext * ctx,
+                GLint x, GLint y,
+                GLsizei width, GLsizei height,
+                GLenum format,
+                GLenum type,
+                const struct gl_pixelstore_attrib *unpack,
+                const GLvoid * pixels)
+{
+#if 0
+   /* XXX this function doesn't seem to work reliably even when all
+    * the pre-requisite conditions are met.
+    * Note that this function is never hit with conform.
+    * Fall back to swrast because even the _mesa_meta_DrawPixels() approach
+    * isn't working because of an apparent stencil bug.
+    */
+   if (intel_stencil_drawpixels(ctx, x, y, width, height, format, type,
+				unpack, pixels))
+      return;
+#else
+   (void) intel_stencil_drawpixels; /* silence warning */
+   if (format == GL_STENCIL_INDEX) {
+      _swrast_DrawPixels(ctx, x, y, width, height, format, type,
+                         unpack, pixels);
+      return;
+   }
+#endif
+
+   _mesa_meta_DrawPixels(ctx, x, y, width, height, format, type,
+                         unpack, pixels);
+}
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c
new file mode 100644
index 0000000000..21d2a7a93e
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c
@@ -0,0 +1,207 @@
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/enums.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/image.h"
+#include "main/bufferobj.h"
+#include "main/state.h"
+#include "swrast/swrast.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_blit.h"
+#include "intel_buffers.h"
+#include "intel_regions.h"
+#include "intel_pixel.h"
+#include "intel_buffer_objects.h"
+
+/* For many applications, the new ability to pull the source buffers
+ * back out of the GTT and then do the packing/conversion operations
+ * in software will be as much of an improvement as trying to get the
+ * blitter and/or texture engine to do the work.
+ *
+ * This step is gated on private backbuffers.
+ *
+ * Obviously the frontbuffer can't be pulled back, so that is either
+ * an argument for blit/texture readpixels, or for blitting to a
+ * temporary and then pulling that back.
+ *
+ * When the destination is a pbo, however, it's not clear if it is
+ * ever going to be pulled to main memory (though the access param
+ * will be a good hint).  So it sounds like we do want to be able to
+ * choose between blit/texture implementation on the gpu and pullback
+ * and cpu-based copying.
+ *
+ * Unless you can magically turn client memory into a PBO for the
+ * duration of this call, there will be a cpu-based copying step in
+ * any case.
+ */
+
+static GLboolean
+do_blit_readpixels(GLcontext * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_region *src = intel_readbuf_region(intel);
+   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
+   GLuint dst_offset;
+   GLuint rowLength;
+   drm_intel_bo *dst_buffer;
+   GLboolean all;
+   GLint dst_x, dst_y;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      printf("%s\n", __FUNCTION__);
+
+   if (!src)
+      return GL_FALSE;
+
+   if (!_mesa_is_bufferobj(pack->BufferObj)) {
+      /* PBO only for now:
+       */
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         printf("%s - not PBO\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+
+   if (ctx->_ImageTransferState ||
+       !intel_check_blit_format(src, format, type)) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         printf("%s - bad format for blit\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         printf("%s: bad packing params\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if (pack->RowLength > 0)
+      rowLength = pack->RowLength;
+   else
+      rowLength = width;
+
+   if (pack->Invert) {
+      if (INTEL_DEBUG & DEBUG_PIXEL)
+         printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+   else {
+      if (ctx->ReadBuffer->Name == 0)
+	 rowLength = -rowLength;
+   }
+
+   dst_offset = (GLintptr) _mesa_image_address(2, pack, pixels, width, height,
+					       format, type, 0, 0, 0);
+
+   if (!_mesa_clip_copytexsubimage(ctx,
+				   &dst_x, &dst_y,
+				   &x, &y,
+				   &width, &height)) {
+      return GL_TRUE;
+   }
+
+   intel_prepare_render(intel);
+
+   all = (width * height * src->cpp == dst->Base.Size &&
+	  x == 0 && dst_offset == 0);
+
+   dst_x = 0;
+   dst_y = 0;
+
+   dst_buffer = intel_bufferobj_buffer(intel, dst,
+					       all ? INTEL_WRITE_FULL :
+					       INTEL_WRITE_PART);
+
+   if (ctx->ReadBuffer->Name == 0)
+      y = ctx->ReadBuffer->Height - (y + height);
+
+   if (!intelEmitCopyBlit(intel,
+			  src->cpp,
+			  src->pitch, src->buffer, 0, src->tiling,
+			  rowLength, dst_buffer, dst_offset, GL_FALSE,
+			  x, y,
+			  dst_x, dst_y,
+			  width, height,
+			  GL_COPY)) {
+      return GL_FALSE;
+   }
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      printf("%s - DONE\n", __FUNCTION__);
+
+   return GL_TRUE;
+}
+
+void
+intelReadPixels(GLcontext * ctx,
+                GLint x, GLint y, GLsizei width, GLsizei height,
+                GLenum format, GLenum type,
+                const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLboolean dirty;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   intel_flush(ctx);
+
+   /* glReadPixels() wont dirty the front buffer, so reset the dirty
+    * flag after calling intel_prepare_render(). */
+   dirty = intel->front_buffer_dirty;
+   intel_prepare_render(intel);
+   intel->front_buffer_dirty = dirty;
+
+   if (do_blit_readpixels
+       (ctx, x, y, width, height, format, type, pack, pixels))
+      return;
+
+   if (INTEL_DEBUG & DEBUG_PIXEL)
+      printf("%s: fallback to swrast\n", __FUNCTION__);
+
+   /* Update Mesa state before calling down into _swrast_ReadPixels, as
+    * the spans code requires the computed buffer states to be up to date,
+    * but _swrast_ReadPixels only updates Mesa state after setting up
+    * the spans code.
+    */
+
+   if (ctx->NewState)
+      _mesa_update_state(ctx);
+
+   _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
+
+   /* There's an intel_prepare_render() call in intelSpanRenderStart(). */
+   intel->front_buffer_dirty = dirty;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h
new file mode 100644
index 0000000000..c1a281f261
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_reg.h
@@ -0,0 +1,248 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#define CMD_MI				(0x0 << 29)
+#define CMD_2D				(0x2 << 29)
+#define CMD_3D				(0x3 << 29)
+
+#define MI_NOOP				(CMD_MI | 0)
+
+#define MI_BATCH_BUFFER_END		(CMD_MI | 0xA << 23)
+
+#define MI_FLUSH			(CMD_MI | (4 << 23))
+#define FLUSH_MAP_CACHE				(1 << 0)
+#define INHIBIT_FLUSH_RENDER_CACHE		(1 << 2)
+
+/* Stalls command execution waiting for the given events to have occurred. */
+#define MI_WAIT_FOR_EVENT               (CMD_MI | (0x3 << 23))
+#define MI_WAIT_FOR_PLANE_B_FLIP        (1<<6)
+#define MI_WAIT_FOR_PLANE_A_FLIP        (1<<2)
+
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   (CMD_3D | (0x1d<<24) | (0x04<<16))
+#define I1_LOAD_S(n)                      (1<<(4+n))
+
+#define _3DSTATE_DRAWRECT_INFO		(CMD_3D | (0x1d<<24) | (0x80<<16) | 0x3)
+#define _3DSTATE_DRAWRECT_INFO_I965	(CMD_3D | (3 << 27) | (1 << 24) | 0x2)
+
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL		(CMD_3D | (3 << 27) | (2 << 24) | 2)
+#define PIPE_CONTROL_NO_WRITE		(0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE	(1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT	(2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP	(3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL	(1 << 13)
+#define PIPE_CONTROL_WRITE_FLUSH	(1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_FLUSH	(1 << 11)
+#define PIPE_CONTROL_INTERRUPT_ENABLE	(1 << 8)
+#define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE	(1 << 2)
+
+/** @} */
+
+/** @{
+ * 915 definitions
+ *
+ * 915 documents say that bits 31:28 and 1 are "undefined, must be zero."
+ */
+#define S0_VB_OFFSET_MASK		0x0ffffffc
+#define S0_AUTO_CACHE_INV_DISABLE	(1<<0)
+/** @} */
+
+/** @{
+ * 830 definitions
+ */
+#define S0_VB_OFFSET_MASK_830		0xffffff80
+#define S0_VB_PITCH_SHIFT_830		1
+#define S0_VB_ENABLE_830		(1<<0)
+/** @} */
+
+#define S1_VERTEX_WIDTH_SHIFT          24
+#define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT          16
+#define S1_VERTEX_PITCH_MASK           (0x3f<<16)
+
+#define TEXCOORDFMT_2D                 0x0
+#define TEXCOORDFMT_3D                 0x1
+#define TEXCOORDFMT_4D                 0x2
+#define TEXCOORDFMT_1D                 0x3
+#define TEXCOORDFMT_2D_16              0x4
+#define TEXCOORDFMT_4D_16              0x5
+#define TEXCOORDFMT_NOT_PRESENT        0xf
+#define S2_TEXCOORD_FMT0_MASK            0xf
+#define S2_TEXCOORD_FMT1_SHIFT           4
+#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE               (~0)
+#define S2_TEX_COUNT_SHIFT_830		12
+#define S2_VERTEX_1_WIDTH_SHIFT_830	0
+#define S2_VERTEX_0_WIDTH_SHIFT_830	6
+/* S3 not interesting */
+
+#define S4_POINT_WIDTH_SHIFT           23
+#define S4_POINT_WIDTH_MASK            (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT            19
+#define S4_LINE_WIDTH_ONE              (0x2<<19)
+#define S4_LINE_WIDTH_MASK             (0xf<<19)
+#define S4_FLATSHADE_ALPHA             (1<<18)
+#define S4_FLATSHADE_FOG               (1<<17)
+#define S4_FLATSHADE_SPECULAR          (1<<16)
+#define S4_FLATSHADE_COLOR             (1<<15)
+#define S4_CULLMODE_BOTH	       (0<<13)
+#define S4_CULLMODE_NONE	       (1<<13)
+#define S4_CULLMODE_CW		       (2<<13)
+#define S4_CULLMODE_CCW		       (3<<13)
+#define S4_CULLMODE_MASK	       (3<<13)
+#define S4_VFMT_POINT_WIDTH            (1<<12)
+#define S4_VFMT_SPEC_FOG               (1<<11)
+#define S4_VFMT_COLOR                  (1<<10)
+#define S4_VFMT_DEPTH_OFFSET           (1<<9)
+#define S4_VFMT_XYZ     	       (1<<6)
+#define S4_VFMT_XYZW     	       (2<<6)
+#define S4_VFMT_XY     		       (3<<6)
+#define S4_VFMT_XYW     	       (4<<6)
+#define S4_VFMT_XYZW_MASK              (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
+#define S4_VFMT_FOG_PARAM              (1<<2)
+#define S4_SPRITE_POINT_ENABLE         (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
+
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   | 	\
+		      S4_VFMT_SPEC_FOG      |	\
+		      S4_VFMT_COLOR         |	\
+		      S4_VFMT_DEPTH_OFFSET  |	\
+		      S4_VFMT_XYZW_MASK     |	\
+		      S4_VFMT_FOG_PARAM)
+
+
+#define S5_WRITEDISABLE_ALPHA          (1<<31)
+#define S5_WRITEDISABLE_RED            (1<<30)
+#define S5_WRITEDISABLE_GREEN          (1<<29)
+#define S5_WRITEDISABLE_BLUE           (1<<28)
+#define S5_WRITEDISABLE_MASK           (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
+#define S5_LAST_PIXEL_ENABLE           (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
+#define S5_FOG_ENABLE                  (1<<24)
+#define S5_STENCIL_REF_SHIFT           16
+#define S5_STENCIL_REF_MASK            (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT     13
+#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT          10
+#define S5_STENCIL_FAIL_MASK           (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
+#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT   4
+#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE        (1<<3)
+#define S5_STENCIL_TEST_ENABLE         (1<<2)
+#define S5_COLOR_DITHER_ENABLE         (1<<1)
+#define S5_LOGICOP_ENABLE              (1<<0)
+
+
+#define S6_ALPHA_TEST_ENABLE           (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT       28
+#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
+#define S6_ALPHA_REF_SHIFT             20
+#define S6_ALPHA_REF_MASK              (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE           (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT       16
+#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE           (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT       12
+#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
+#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT   4
+#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE          (1<<3)
+#define S6_COLOR_WRITE_ENABLE          (1<<2)
+#define S6_TRISTRIP_PV_SHIFT           0
+#define S6_TRISTRIP_PV_MASK            (0x3<<0)
+
+#define S7_DEPTH_OFFSET_CONST_MASK     ~0
+
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD	(CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK	(0x3<<24)
+#define BUF_3D_ID_DEPTH 	(0x7<<24)
+#define BUF_3D_USE_FENCE	(1<<23)
+#define BUF_3D_TILED_SURFACE	(1<<22)
+#define BUF_3D_TILE_WALK_X	0
+#define BUF_3D_TILE_WALK_Y	(1<<21)
+#define BUF_3D_PITCH(x)         (((x)/4)<<2)
+/* Dword 2 */
+#define BUF_3D_ADDR(x)		((x) & ~0x3)
+
+/* Primitive dispatch on 830-945 */
+#define _3DPRIMITIVE			(CMD_3D | (0x1f << 24))
+#define PRIM_INDIRECT            (1<<23)
+#define PRIM_INLINE              (0<<23)
+#define PRIM_INDIRECT_SEQUENTIAL (0<<17)
+#define PRIM_INDIRECT_ELTS       (1<<17)
+
+#define PRIM3D_TRILIST		(0x0<<18)
+#define PRIM3D_TRISTRIP 	(0x1<<18)
+#define PRIM3D_TRISTRIP_RVRSE	(0x2<<18)
+#define PRIM3D_TRIFAN		(0x3<<18)
+#define PRIM3D_POLY		(0x4<<18)
+#define PRIM3D_LINELIST 	(0x5<<18)
+#define PRIM3D_LINESTRIP	(0x6<<18)
+#define PRIM3D_RECTLIST 	(0x7<<18)
+#define PRIM3D_POINTLIST	(0x8<<18)
+#define PRIM3D_DIB		(0x9<<18)
+#define PRIM3D_MASK		(0x1f<<18)
+
+#define XY_SETUP_BLT_CMD		(CMD_2D | (0x01 << 22) | 6)
+
+#define XY_COLOR_BLT_CMD		(CMD_2D | (0x50 << 22) | 4)
+
+#define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22) | 6)
+
+#define XY_TEXT_IMMEDIATE_BLIT_CMD	(CMD_2D | (0x31 << 22))
+# define XY_TEXT_BYTE_PACKED		(1 << 16)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA	(1 << 21)
+#define XY_BLT_WRITE_RGB	(1 << 20)
+#define XY_SRC_TILED		(1 << 15)
+#define XY_DST_TILED		(1 << 11)
+
+/* BR13 */
+#define BR13_8			(0x0 << 24)
+#define BR13_565		(0x1 << 24)
+#define BR13_8888		(0x3 << 24)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
new file mode 100644
index 0000000000..fe4de18960
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -0,0 +1,507 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Provide additional functionality on top of bufmgr buffers:
+ *   - 2d semantics and blit operations
+ *   - refcounting of buffers for multiple images in a buffer.
+ *   - refcounting of buffer mappings.
+ *   - some logic for moving the buffers to the best memory pools for
+ *     given operations.
+ *
+ * Most of this is to make it easier to implement the fixed-layout
+ * mipmap tree required by intel hardware in the face of GL's
+ * programming interface where each image can be specifed in random
+ * order and it isn't clear what layout the tree should have until the
+ * last moment.
+ */
+
+#include <sys/ioctl.h>
+#include <errno.h>
+
+#include "main/hash.h"
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_blit.h"
+#include "intel_buffer_objects.h"
+#include "intel_bufmgr.h"
+#include "intel_batchbuffer.h"
+
+#define FILE_DEBUG_FLAG DEBUG_REGION
+
+/* This should be set to the maximum backtrace size desired.
+ * Set it to 0 to disable backtrace debugging.
+ */
+#define DEBUG_BACKTRACE_SIZE 0
+
+#if DEBUG_BACKTRACE_SIZE == 0
+/* Use the standard debug output */
+#define _DBG(...) DBG(__VA_ARGS__)
+#else
+/* Use backtracing debug output */
+#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);}
+
+/* Backtracing debug support */
+#include <execinfo.h>
+
+static void
+debug_backtrace(void)
+{
+   void *trace[DEBUG_BACKTRACE_SIZE];
+   char **strings = NULL;
+   int traceSize;
+   register int i;
+
+   traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE);
+   strings = backtrace_symbols(trace, traceSize);
+   if (strings == NULL) {
+      DBG("no backtrace:");
+      return;
+   }
+
+   /* Spit out all the strings with a colon separator.  Ignore
+    * the first, since we don't really care about the call
+    * to debug_backtrace() itself.  Skip until the final "/" in
+    * the trace to avoid really long lines.
+    */
+   for (i = 1; i < traceSize; i++) {
+      char *p = strings[i], *slash = strings[i];
+      while (*p) {
+         if (*p++ == '/') {
+            slash = p;
+         }
+      }
+
+      DBG("%s:", slash);
+   }
+
+   /* Free up the memory, and we're done */
+   free(strings);
+}
+
+#endif
+
+
+
+/* XXX: Thread safety?
+ */
+GLubyte *
+intel_region_map(struct intel_context *intel, struct intel_region *region)
+{
+   intel_flush(&intel->ctx);
+
+   _DBG("%s %p\n", __FUNCTION__, region);
+   if (!region->map_refcount++) {
+      if (region->pbo)
+         intel_region_cow(intel, region);
+
+      if (region->tiling != I915_TILING_NONE)
+	 drm_intel_gem_bo_map_gtt(region->buffer);
+      else
+	 drm_intel_bo_map(region->buffer, GL_TRUE);
+      region->map = region->buffer->virtual;
+   }
+
+   return region->map;
+}
+
+void
+intel_region_unmap(struct intel_context *intel, struct intel_region *region)
+{
+   _DBG("%s %p\n", __FUNCTION__, region);
+   if (!--region->map_refcount) {
+      if (region->tiling != I915_TILING_NONE)
+	 drm_intel_gem_bo_unmap_gtt(region->buffer);
+      else
+	 drm_intel_bo_unmap(region->buffer);
+      region->map = NULL;
+   }
+}
+
+static struct intel_region *
+intel_region_alloc_internal(struct intel_context *intel,
+			    GLuint cpp,
+			    GLuint width, GLuint height, GLuint pitch,
+			    drm_intel_bo *buffer)
+{
+   struct intel_region *region;
+
+   if (buffer == NULL) {
+      _DBG("%s <-- NULL\n", __FUNCTION__);
+      return NULL;
+   }
+
+   region = calloc(sizeof(*region), 1);
+   region->cpp = cpp;
+   region->width = width;
+   region->height = height;
+   region->pitch = pitch;
+   region->refcount = 1;
+   region->buffer = buffer;
+
+   /* Default to no tiling */
+   region->tiling = I915_TILING_NONE;
+
+   _DBG("%s <-- %p\n", __FUNCTION__, region);
+   return region;
+}
+
+struct intel_region *
+intel_region_alloc(struct intel_context *intel,
+		   uint32_t tiling,
+                   GLuint cpp, GLuint width, GLuint height,
+		   GLboolean expect_accelerated_upload)
+{
+   drm_intel_bo *buffer;
+   struct intel_region *region;
+   unsigned long flags = 0;
+   unsigned long aligned_pitch;
+
+   if (expect_accelerated_upload)
+      flags |= BO_ALLOC_FOR_RENDER;
+
+   buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
+				     width, height, cpp,
+				     &tiling, &aligned_pitch, flags);
+
+   region = intel_region_alloc_internal(intel, cpp, width, height,
+					aligned_pitch / cpp, buffer);
+   region->tiling = tiling;
+
+   return region;
+}
+
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_context *intel,
+			      GLuint cpp,
+			      GLuint width, GLuint height, GLuint pitch,
+			      GLuint handle, const char *name)
+{
+   struct intel_region *region, *dummy;
+   drm_intel_bo *buffer;
+   int ret;
+   uint32_t bit_6_swizzle;
+
+   region = _mesa_HashLookup(intel->intelScreen->named_regions, handle);
+   if (region != NULL) {
+      dummy = NULL;
+      if (region->width != width || region->height != height ||
+	  region->cpp != cpp || region->pitch != pitch) {
+	 fprintf(stderr,
+		 "Region for name %d already exists but is not compatible\n",
+		 handle);
+	 return NULL;
+      }
+      intel_region_reference(&dummy, region);
+      return dummy;
+   }
+
+   buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
+
+   region = intel_region_alloc_internal(intel, cpp,
+					width, height, pitch, buffer);
+   if (region == NULL)
+      return region;
+
+   ret = drm_intel_bo_get_tiling(region->buffer, &region->tiling,
+				 &bit_6_swizzle);
+   if (ret != 0) {
+      fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
+	      handle, name, strerror(-ret));
+      intel_region_release(&region);
+      return NULL;
+   }
+
+   region->name = handle;
+   region->screen = intel->intelScreen;
+   _mesa_HashInsert(intel->intelScreen->named_regions, handle, region);
+
+   return region;
+}
+
+void
+intel_region_reference(struct intel_region **dst, struct intel_region *src)
+{
+   if (src)
+      _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount);
+
+   assert(*dst == NULL);
+   if (src) {
+      src->refcount++;
+      *dst = src;
+   }
+}
+
+void
+intel_region_release(struct intel_region **region_handle)
+{
+   struct intel_region *region = *region_handle;
+
+   if (region == NULL) {
+      _DBG("%s NULL\n", __FUNCTION__);
+      return;
+   }
+
+   _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1);
+
+   ASSERT(region->refcount > 0);
+   region->refcount--;
+
+   if (region->refcount == 0) {
+      assert(region->map_refcount == 0);
+
+      if (region->pbo)
+	 region->pbo->region = NULL;
+      region->pbo = NULL;
+      drm_intel_bo_unreference(region->buffer);
+
+      if (region->name > 0)
+	 _mesa_HashRemove(region->screen->named_regions, region->name);
+
+      free(region);
+   }
+   *region_handle = NULL;
+}
+
+/*
+ * XXX Move this into core Mesa?
+ */
+void
+_mesa_copy_rect(GLubyte * dst,
+                GLuint cpp,
+                GLuint dst_pitch,
+                GLuint dst_x,
+                GLuint dst_y,
+                GLuint width,
+                GLuint height,
+                const GLubyte * src,
+                GLuint src_pitch, GLuint src_x, GLuint src_y)
+{
+   GLuint i;
+
+   dst_pitch *= cpp;
+   src_pitch *= cpp;
+   dst += dst_x * cpp;
+   src += src_x * cpp;
+   dst += dst_y * dst_pitch;
+   src += src_y * src_pitch;
+   width *= cpp;
+
+   if (width == dst_pitch && width == src_pitch)
+      memcpy(dst, src, height * width);
+   else {
+      for (i = 0; i < height; i++) {
+         memcpy(dst, src, width);
+         dst += dst_pitch;
+         src += src_pitch;
+      }
+   }
+}
+
+
+/* Upload data to a rectangular sub-region.  Lots of choices how to do this:
+ *
+ * - memcpy by span to current destination
+ * - upload data as new buffer and blit
+ *
+ * Currently always memcpy.
+ */
+void
+intel_region_data(struct intel_context *intel,
+                  struct intel_region *dst,
+                  GLuint dst_offset,
+                  GLuint dstx, GLuint dsty,
+                  const void *src, GLuint src_pitch,
+                  GLuint srcx, GLuint srcy, GLuint width, GLuint height)
+{
+   _DBG("%s\n", __FUNCTION__);
+
+   if (intel == NULL)
+      return;
+
+   if (dst->pbo) {
+      if (dstx == 0 &&
+          dsty == 0 && width == dst->pitch && height == dst->height)
+         intel_region_release_pbo(intel, dst);
+      else
+         intel_region_cow(intel, dst);
+   }
+
+   intel_prepare_render(intel);
+
+   _mesa_copy_rect(intel_region_map(intel, dst) + dst_offset,
+                   dst->cpp,
+                   dst->pitch,
+                   dstx, dsty, width, height, src, src_pitch, srcx, srcy);
+
+   intel_region_unmap(intel, dst);
+}
+
+/* Copy rectangular sub-regions. Need better logic about when to
+ * push buffers into AGP - will currently do so whenever possible.
+ */
+GLboolean
+intel_region_copy(struct intel_context *intel,
+                  struct intel_region *dst,
+                  GLuint dst_offset,
+                  GLuint dstx, GLuint dsty,
+                  struct intel_region *src,
+                  GLuint src_offset,
+                  GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+		  GLboolean flip,
+		  GLenum logicop)
+{
+   uint32_t src_pitch = src->pitch;
+
+   _DBG("%s\n", __FUNCTION__);
+
+   if (intel == NULL)
+      return GL_FALSE;
+
+   if (dst->pbo) {
+      if (dstx == 0 &&
+          dsty == 0 && width == dst->pitch && height == dst->height)
+         intel_region_release_pbo(intel, dst);
+      else
+         intel_region_cow(intel, dst);
+   }
+
+   assert(src->cpp == dst->cpp);
+
+   if (flip)
+      src_pitch = -src_pitch;
+
+   return intelEmitCopyBlit(intel,
+			    dst->cpp,
+			    src_pitch, src->buffer, src_offset, src->tiling,
+			    dst->pitch, dst->buffer, dst_offset, dst->tiling,
+			    srcx, srcy, dstx, dsty, width, height,
+			    logicop);
+}
+
+/* Attach to a pbo, discarding our data.  Effectively zero-copy upload
+ * the pbo's data.
+ */
+void
+intel_region_attach_pbo(struct intel_context *intel,
+                        struct intel_region *region,
+                        struct intel_buffer_object *pbo)
+{
+   drm_intel_bo *buffer;
+
+   if (region->pbo == pbo)
+      return;
+
+   _DBG("%s %p %p\n", __FUNCTION__, region, pbo);
+
+   /* If there is already a pbo attached, break the cow tie now.
+    * Don't call intel_region_release_pbo() as that would
+    * unnecessarily allocate a new buffer we would have to immediately
+    * discard.
+    */
+   if (region->pbo) {
+      region->pbo->region = NULL;
+      region->pbo = NULL;
+   }
+
+   if (region->buffer) {
+      drm_intel_bo_unreference(region->buffer);
+      region->buffer = NULL;
+   }
+
+   /* make sure pbo has a buffer of its own */
+   buffer = intel_bufferobj_buffer(intel, pbo, INTEL_WRITE_FULL);
+
+   region->pbo = pbo;
+   region->pbo->region = region;
+   drm_intel_bo_reference(buffer);
+   region->buffer = buffer;
+   region->tiling = I915_TILING_NONE;
+}
+
+
+/* Break the COW tie to the pbo and allocate a new buffer.
+ * The pbo gets to keep the data.
+ */
+void
+intel_region_release_pbo(struct intel_context *intel,
+                         struct intel_region *region)
+{
+   _DBG("%s %p\n", __FUNCTION__, region);
+   assert(region->buffer == region->pbo->buffer);
+   region->pbo->region = NULL;
+   region->pbo = NULL;
+   drm_intel_bo_unreference(region->buffer);
+   region->buffer = NULL;
+
+   region->buffer = drm_intel_bo_alloc(intel->bufmgr, "region",
+				       region->pitch * region->cpp *
+				       region->height,
+				       64);
+}
+
+/* Break the COW tie to the pbo.  Both the pbo and the region end up
+ * with a copy of the data.
+ */
+void
+intel_region_cow(struct intel_context *intel, struct intel_region *region)
+{
+   struct intel_buffer_object *pbo = region->pbo;
+   GLboolean ok;
+
+   intel_region_release_pbo(intel, region);
+
+   assert(region->cpp * region->pitch * region->height == pbo->Base.Size);
+
+   _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size);
+
+   /* Now blit from the texture buffer to the new buffer: 
+    */
+
+   intel_prepare_render(intel);
+   ok = intelEmitCopyBlit(intel,
+                          region->cpp,
+                          region->pitch, pbo->buffer, 0, region->tiling,
+                          region->pitch, region->buffer, 0, region->tiling,
+                          0, 0, 0, 0,
+                          region->pitch, region->height,
+                          GL_COPY);
+   assert(ok);
+}
+
+drm_intel_bo *
+intel_region_buffer(struct intel_context *intel,
+                    struct intel_region *region, GLuint flag)
+{
+   if (region->pbo) {
+      if (flag == INTEL_WRITE_PART)
+         intel_region_cow(intel, region);
+      else if (flag == INTEL_WRITE_FULL)
+         intel_region_release_pbo(intel, region);
+   }
+
+   return region->buffer;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h
new file mode 100644
index 0000000000..6bbed32f2a
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -0,0 +1,160 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTEL_REGIONS_H
+#define INTEL_REGIONS_H
+
+/** @file intel_regions.h
+ *
+ * Structure definitions and prototypes for intel_region handling,
+ * which is the basic structure for rectangular collections of pixels
+ * stored in a drm_intel_bo.
+ */
+
+#include <xf86drm.h>
+
+#include "main/mtypes.h"
+#include "intel_bufmgr.h"
+
+struct intel_context;
+struct intel_buffer_object;
+
+/**
+ * A layer on top of the bufmgr buffers that adds a few useful things:
+ *
+ * - Refcounting for local buffer references.
+ * - Refcounting for buffer maps
+ * - Buffer dimensions - pitch and height.
+ * - Blitter commands for copying 2D regions between buffers. (really???)
+ */
+struct intel_region
+{
+   drm_intel_bo *buffer;  /**< buffer manager's buffer */
+   GLuint refcount; /**< Reference count for region */
+   GLuint cpp;      /**< bytes per pixel */
+   GLuint width;    /**< in pixels */
+   GLuint height;   /**< in pixels */
+   GLuint pitch;    /**< in pixels */
+   GLubyte *map;    /**< only non-NULL when region is actually mapped */
+   GLuint map_refcount;  /**< Reference count for mapping */
+
+   GLuint draw_offset; /**< Offset of drawing address within the region */
+   GLuint draw_x, draw_y; /**< Offset of drawing within the region */
+
+   uint32_t tiling; /**< Which tiling mode the region is in */
+   struct intel_buffer_object *pbo;     /* zero-copy uploads */
+
+   uint32_t name; /**< Global name for the bo */
+   struct intel_screen *screen;
+};
+
+
+/* Allocate a refcounted region.  Pointers to regions should only be
+ * copied by calling intel_reference_region().
+ */
+struct intel_region *intel_region_alloc(struct intel_context *intel,
+                                        uint32_t tiling,
+					GLuint cpp, GLuint width,
+                                        GLuint height,
+					GLboolean expect_accelerated_upload);
+
+struct intel_region *
+intel_region_alloc_for_handle(struct intel_context *intel,
+			      GLuint cpp,
+			      GLuint width, GLuint height, GLuint pitch,
+			      unsigned int handle, const char *name);
+
+void intel_region_reference(struct intel_region **dst,
+                            struct intel_region *src);
+
+void intel_region_release(struct intel_region **ib);
+
+void intel_recreate_static_regions(struct intel_context *intel);
+
+/* Map/unmap regions.  This is refcounted also: 
+ */
+GLubyte *intel_region_map(struct intel_context *intel,
+                          struct intel_region *ib);
+
+void intel_region_unmap(struct intel_context *intel, struct intel_region *ib);
+
+
+/* Upload data to a rectangular sub-region
+ */
+void intel_region_data(struct intel_context *intel,
+                       struct intel_region *dest,
+                       GLuint dest_offset,
+                       GLuint destx, GLuint desty,
+                       const void *src, GLuint src_stride,
+                       GLuint srcx, GLuint srcy, GLuint width, GLuint height);
+
+/* Copy rectangular sub-regions
+ */
+GLboolean
+intel_region_copy(struct intel_context *intel,
+		  struct intel_region *dest,
+		  GLuint dest_offset,
+		  GLuint destx, GLuint desty,
+		  struct intel_region *src,
+		  GLuint src_offset,
+		  GLuint srcx, GLuint srcy, GLuint width, GLuint height,
+		  GLboolean flip,
+		  GLenum logicop);
+
+/* Helpers for zerocopy uploads, particularly texture image uploads:
+ */
+void intel_region_attach_pbo(struct intel_context *intel,
+                             struct intel_region *region,
+                             struct intel_buffer_object *pbo);
+void intel_region_release_pbo(struct intel_context *intel,
+                              struct intel_region *region);
+void intel_region_cow(struct intel_context *intel,
+                      struct intel_region *region);
+
+drm_intel_bo *intel_region_buffer(struct intel_context *intel,
+				  struct intel_region *region,
+				  GLuint flag);
+
+void _mesa_copy_rect(GLubyte * dst,
+                GLuint cpp,
+                GLuint dst_pitch,
+                GLuint dst_x,
+                GLuint dst_y,
+                GLuint width,
+                GLuint height,
+                const GLubyte * src,
+                GLuint src_pitch, GLuint src_x, GLuint src_y);
+
+struct __DRIimageRec {
+   struct intel_region *region;
+   GLenum internal_format;
+   GLuint format;
+   GLenum data_type;
+   void *data;
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
new file mode 100644
index 0000000000..15a465c640
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -0,0 +1,574 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/hash.h"
+#include "main/fbobject.h"
+
+#include "utils.h"
+#include "xmlpool.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_buffers.h"
+#include "intel_bufmgr.h"
+#include "intel_chipset.h"
+#include "intel_fbo.h"
+#include "intel_screen.h"
+#include "intel_tex.h"
+#include "intel_regions.h"
+
+#include "i915_drm.h"
+
+#define DRI_CONF_TEXTURE_TILING(def) \
+
+PUBLIC const char __driConfigOptions[] =
+   DRI_CONF_BEGIN
+   DRI_CONF_SECTION_PERFORMANCE
+      DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_ALWAYS_SYNC)
+      /* Options correspond to DRI_CONF_BO_REUSE_DISABLED,
+       * DRI_CONF_BO_REUSE_ALL
+       */
+      DRI_CONF_OPT_BEGIN_V(bo_reuse, enum, 1, "0:1")
+	 DRI_CONF_DESC_BEGIN(en, "Buffer object reuse")
+	    DRI_CONF_ENUM(0, "Disable buffer object reuse")
+	    DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects")
+	 DRI_CONF_DESC_END
+      DRI_CONF_OPT_END
+
+      DRI_CONF_OPT_BEGIN(texture_tiling, bool, true)
+	 DRI_CONF_DESC(en, "Enable texture tiling")
+      DRI_CONF_OPT_END
+
+      DRI_CONF_OPT_BEGIN(early_z, bool, false)
+	 DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
+      DRI_CONF_OPT_END
+
+      DRI_CONF_OPT_BEGIN(fragment_shader, bool, false)
+	 DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.")
+      DRI_CONF_OPT_END
+
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_QUALITY
+      DRI_CONF_FORCE_S3TC_ENABLE(false)
+      DRI_CONF_ALLOW_LARGE_TEXTURES(2)
+   DRI_CONF_SECTION_END
+   DRI_CONF_SECTION_DEBUG
+     DRI_CONF_NO_RAST(false)
+     DRI_CONF_ALWAYS_FLUSH_BATCH(false)
+     DRI_CONF_ALWAYS_FLUSH_CACHE(false)
+
+      DRI_CONF_OPT_BEGIN(stub_occlusion_query, bool, false)
+	 DRI_CONF_DESC(en, "Enable stub ARB_occlusion_query support on 915/945.")
+      DRI_CONF_OPT_END
+   DRI_CONF_SECTION_END
+DRI_CONF_END;
+
+const GLuint __driNConfigOptions = 11;
+
+#ifdef USE_NEW_INTERFACE
+static PFNGLXCREATECONTEXTMODES create_context_modes = NULL;
+#endif /*USE_NEW_INTERFACE */
+
+static const __DRItexBufferExtension intelTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   intelSetTexBuffer,
+   intelSetTexBuffer2,
+};
+
+static void
+intelDRI2Flush(__DRIdrawable *drawable)
+{
+   struct intel_context *intel = drawable->driContextPriv->driverPrivate;
+
+   if (intel->gen < 4)
+      INTEL_FIREVERTICES(intel);
+
+   intel->need_throttle = GL_TRUE;
+
+   if (intel->batch->map != intel->batch->ptr)
+      intel_batchbuffer_flush(intel->batch);
+}
+
+static const struct __DRI2flushExtensionRec intelFlushExtension = {
+    { __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
+    intelDRI2Flush,
+    dri2InvalidateDrawable,
+};
+
+static __DRIimage *
+intel_create_image_from_name(__DRIcontext *context,
+			     int width, int height, int format,
+			     int name, int pitch, void *loaderPrivate)
+{
+    __DRIimage *image;
+    struct intel_context *intel = context->driverPrivate;
+    int cpp;
+
+    image = CALLOC(sizeof *image);
+    if (image == NULL)
+	return NULL;
+
+    switch (format) {
+    case __DRI_IMAGE_FORMAT_RGB565:
+       image->format = MESA_FORMAT_RGB565;
+       image->internal_format = GL_RGB;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
+    case __DRI_IMAGE_FORMAT_XRGB8888:
+       image->format = MESA_FORMAT_XRGB8888;
+       image->internal_format = GL_RGB;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
+    case __DRI_IMAGE_FORMAT_ARGB8888:
+       image->format = MESA_FORMAT_ARGB8888;
+       image->internal_format = GL_RGBA;
+       image->data_type = GL_UNSIGNED_BYTE;
+       break;
+    default:
+       free(image);
+       return NULL;
+    }
+
+    image->data = loaderPrivate;
+    cpp = _mesa_get_format_bytes(image->format);
+
+    image->region = intel_region_alloc_for_handle(intel, cpp, width, height,
+						  pitch, name, "image");
+    if (image->region == NULL) {
+       FREE(image);
+       return NULL;
+    }
+
+    return image;	
+}
+
+static __DRIimage *
+intel_create_image_from_renderbuffer(__DRIcontext *context,
+				     int renderbuffer, void *loaderPrivate)
+{
+   __DRIimage *image;
+   struct intel_context *intel = context->driverPrivate;
+   struct gl_renderbuffer *rb;
+   struct intel_renderbuffer *irb;
+
+   rb = _mesa_lookup_renderbuffer(&intel->ctx, renderbuffer);
+   if (!rb) {
+      _mesa_error(&intel->ctx,
+		  GL_INVALID_OPERATION, "glRenderbufferExternalMESA");
+      return NULL;
+   }
+
+   irb = intel_renderbuffer(rb);
+   image = CALLOC(sizeof *image);
+   if (image == NULL)
+      return NULL;
+
+   image->internal_format = rb->InternalFormat;
+   image->format = rb->Format;
+   image->data_type = rb->DataType;
+   image->data = loaderPrivate;
+   intel_region_reference(&image->region, irb->region);
+
+   return image;
+}
+
+static void
+intel_destroy_image(__DRIimage *image)
+{
+    intel_region_release(&image->region);
+    FREE(image);
+}
+
+static struct __DRIimageExtensionRec intelImageExtension = {
+    { __DRI_IMAGE, __DRI_IMAGE_VERSION },
+    intel_create_image_from_name,
+    intel_create_image_from_renderbuffer,
+    intel_destroy_image,
+};
+
+static const __DRIextension *intelScreenExtensions[] = {
+    &driReadDrawableExtension,
+    &intelTexBufferExtension.base,
+    &intelFlushExtension.base,
+    &intelImageExtension.base,
+    &dri2ConfigQueryExtension.base,
+    NULL
+};
+
+static GLboolean
+intel_get_param(__DRIscreen *psp, int param, int *value)
+{
+   int ret;
+   struct drm_i915_getparam gp;
+
+   gp.param = param;
+   gp.value = value;
+
+   ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp));
+   if (ret) {
+      _mesa_warning(NULL, "drm_i915_getparam: %d", ret);
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+static void
+nop_callback(GLuint key, void *data, void *userData)
+{
+}
+
+static void
+intelDestroyScreen(__DRIscreen * sPriv)
+{
+   struct intel_screen *intelScreen = sPriv->private;
+
+   dri_bufmgr_destroy(intelScreen->bufmgr);
+   driDestroyOptionInfo(&intelScreen->optionCache);
+
+   /* Some regions may still have references to them at this point, so
+    * flush the hash table to prevent _mesa_DeleteHashTable() from
+    * complaining about the hash not being empty; */
+   _mesa_HashDeleteAll(intelScreen->named_regions, nop_callback, NULL);
+   _mesa_DeleteHashTable(intelScreen->named_regions);
+
+   FREE(intelScreen);
+   sPriv->private = NULL;
+}
+
+
+/**
+ * This is called when we need to set up GL rendering to a new X window.
+ */
+static GLboolean
+intelCreateBuffer(__DRIscreen * driScrnPriv,
+                  __DRIdrawable * driDrawPriv,
+                  const __GLcontextModes * mesaVis, GLboolean isPixmap)
+{
+   struct intel_renderbuffer *rb;
+
+   if (isPixmap) {
+      return GL_FALSE;          /* not implemented */
+   }
+   else {
+      GLboolean swStencil = (mesaVis->stencilBits > 0 &&
+                             mesaVis->depthBits != 24);
+      gl_format rgbFormat;
+
+      struct gl_framebuffer *fb = CALLOC_STRUCT(gl_framebuffer);
+
+      if (!fb)
+	 return GL_FALSE;
+
+      _mesa_initialize_window_framebuffer(fb, mesaVis);
+
+      if (mesaVis->redBits == 5)
+	 rgbFormat = MESA_FORMAT_RGB565;
+      else if (mesaVis->alphaBits == 0)
+	 rgbFormat = MESA_FORMAT_XRGB8888;
+      else
+	 rgbFormat = MESA_FORMAT_ARGB8888;
+
+      /* setup the hardware-based renderbuffers */
+      rb = intel_create_renderbuffer(rgbFormat);
+      _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &rb->Base);
+
+      if (mesaVis->doubleBufferMode) {
+	 rb = intel_create_renderbuffer(rgbFormat);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &rb->Base);
+      }
+
+      if (mesaVis->depthBits == 24) {
+	 assert(mesaVis->stencilBits == 8);
+	 /* combined depth/stencil buffer */
+	 struct intel_renderbuffer *depthStencilRb
+	    = intel_create_renderbuffer(MESA_FORMAT_S8_Z24);
+	 /* note: bind RB to two attachment points */
+	 _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthStencilRb->Base);
+	 _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &depthStencilRb->Base);
+      }
+      else if (mesaVis->depthBits == 16) {
+         /* just 16-bit depth buffer, no hw stencil */
+         struct intel_renderbuffer *depthRb
+	    = intel_create_renderbuffer(MESA_FORMAT_Z16);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      /* now add any/all software-based renderbuffers we may need */
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* never sw color */
+                                   GL_FALSE, /* never sw depth */
+                                   swStencil, mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* never sw alpha */
+                                   GL_FALSE  /* never sw aux */ );
+      driDrawPriv->driverPrivate = fb;
+
+      return GL_TRUE;
+   }
+}
+
+static void
+intelDestroyBuffer(__DRIdrawable * driDrawPriv)
+{
+    struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
+  
+    _mesa_reference_framebuffer(&fb, NULL);
+}
+
+/* There are probably better ways to do this, such as an
+ * init-designated function to register chipids and createcontext
+ * functions.
+ */
+extern GLboolean i830CreateContext(const __GLcontextModes * mesaVis,
+                                   __DRIcontext * driContextPriv,
+                                   void *sharedContextPrivate);
+
+extern GLboolean i915CreateContext(int api,
+				   const __GLcontextModes * mesaVis,
+                                   __DRIcontext * driContextPriv,
+                                   void *sharedContextPrivate);
+extern GLboolean brwCreateContext(int api,
+				  const __GLcontextModes * mesaVis,
+				  __DRIcontext * driContextPriv,
+				  void *sharedContextPrivate);
+
+static GLboolean
+intelCreateContext(gl_api api,
+		   const __GLcontextModes * mesaVis,
+                   __DRIcontext * driContextPriv,
+                   void *sharedContextPrivate)
+{
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct intel_screen *intelScreen = sPriv->private;
+
+#ifdef I915
+   if (IS_9XX(intelScreen->deviceID)) {
+      if (!IS_965(intelScreen->deviceID)) {
+	 return i915CreateContext(api, mesaVis, driContextPriv,
+				  sharedContextPrivate);
+      }
+   } else {
+      intelScreen->no_vbo = GL_TRUE;
+      return i830CreateContext(mesaVis, driContextPriv, sharedContextPrivate);
+   }
+#else
+   if (IS_965(intelScreen->deviceID))
+      return brwCreateContext(api, mesaVis,
+			      driContextPriv, sharedContextPrivate);
+#endif
+   fprintf(stderr, "Unrecognized deviceID %x\n", intelScreen->deviceID);
+   return GL_FALSE;
+}
+
+static GLboolean
+intel_init_bufmgr(struct intel_screen *intelScreen)
+{
+   __DRIscreen *spriv = intelScreen->driScrnPriv;
+   int num_fences = 0;
+
+   intelScreen->no_hw = getenv("INTEL_NO_HW") != NULL;
+
+   intelScreen->bufmgr = intel_bufmgr_gem_init(spriv->fd, BATCH_SZ);
+   /* Otherwise, use the classic buffer manager. */
+   if (intelScreen->bufmgr == NULL) {
+      fprintf(stderr, "[%s:%u] Error initializing buffer manager.\n",
+	      __func__, __LINE__);
+      return GL_FALSE;
+   }
+
+   if (!intel_get_param(spriv, I915_PARAM_NUM_FENCES_AVAIL, &num_fences) ||
+       num_fences == 0) {
+      fprintf(stderr, "[%s: %u] Kernel 2.6.29 required.\n", __func__, __LINE__);
+      return GL_FALSE;
+   }
+
+   drm_intel_bufmgr_gem_enable_fenced_relocs(intelScreen->bufmgr);
+
+   intelScreen->named_regions = _mesa_NewHashTable();
+
+   return GL_TRUE;
+}
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const
+__DRIconfig **intelInitScreen2(__DRIscreen *psp)
+{
+   struct intel_screen *intelScreen;
+   GLenum fb_format[3];
+   GLenum fb_type[3];
+   unsigned int api_mask;
+
+   static const GLenum back_buffer_modes[] = {
+       GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
+   };
+   uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
+   int color;
+   __DRIconfig **configs = NULL;
+
+   /* Allocate the private area */
+   intelScreen = CALLOC(sizeof *intelScreen);
+   if (!intelScreen) {
+      fprintf(stderr, "\nERROR!  Allocating private area failed\n");
+      return GL_FALSE;
+   }
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo(&intelScreen->optionCache,
+                      __driConfigOptions, __driNConfigOptions);
+
+   intelScreen->driScrnPriv = psp;
+   psp->private = (void *) intelScreen;
+
+   /* Determine chipset ID */
+   if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID,
+			&intelScreen->deviceID))
+      return GL_FALSE;
+
+   api_mask = (1 << __DRI_API_OPENGL);
+#if FEATURE_ES1
+   api_mask |= (1 << __DRI_API_GLES);
+#endif
+#if FEATURE_ES2
+   api_mask |= (1 << __DRI_API_GLES2);
+#endif
+
+   if (IS_9XX(intelScreen->deviceID) || IS_965(intelScreen->deviceID))
+      psp->api_mask = api_mask;
+
+   if (!intel_init_bufmgr(intelScreen))
+       return GL_FALSE;
+
+   psp->extensions = intelScreenExtensions;
+
+   msaa_samples_array[0] = 0;
+
+   fb_format[0] = GL_RGB;
+   fb_type[0] = GL_UNSIGNED_SHORT_5_6_5;
+
+   fb_format[1] = GL_BGR;
+   fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+   fb_format[2] = GL_BGRA;
+   fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+   depth_bits[0] = 0;
+   stencil_bits[0] = 0;
+
+   /* Generate a rich set of useful configs that do not include an
+    * accumulation buffer.
+    */
+   for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
+      __DRIconfig **new_configs;
+      int depth_factor;
+
+      /* Starting with DRI2 protocol version 1.1 we can request a depth/stencil
+       * buffer that has a diffferent number of bits per pixel than the color
+       * buffer.  This isn't yet supported here.
+       */
+      if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) {
+	 depth_bits[1] = 16;
+	 stencil_bits[1] = 0;
+      } else {
+	 depth_bits[1] = 24;
+	 stencil_bits[1] = 8;
+      }
+
+      depth_factor = 2;
+
+      new_configs = driCreateConfigs(fb_format[color], fb_type[color],
+				     depth_bits,
+				     stencil_bits,
+				     depth_factor,
+				     back_buffer_modes,
+				     ARRAY_SIZE(back_buffer_modes),
+				     msaa_samples_array,
+				     ARRAY_SIZE(msaa_samples_array),
+				     GL_FALSE);
+      if (configs == NULL)
+	 configs = new_configs;
+      else
+	 configs = driConcatConfigs(configs, new_configs);
+   }
+
+   /* Generate the minimum possible set of configs that include an
+    * accumulation buffer.
+    */
+   for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
+      __DRIconfig **new_configs;
+
+      if (fb_type[color] == GL_UNSIGNED_SHORT_5_6_5) {
+	 depth_bits[0] = 16;
+	 stencil_bits[0] = 0;
+      } else {
+	 depth_bits[0] = 24;
+	 stencil_bits[0] = 8;
+      }
+
+      new_configs = driCreateConfigs(fb_format[color], fb_type[color],
+				     depth_bits, stencil_bits, 1,
+				     back_buffer_modes + 1, 1,
+				     msaa_samples_array, 1,
+				     GL_TRUE);
+      if (configs == NULL)
+	 configs = new_configs;
+      else
+	 configs = driConcatConfigs(configs, new_configs);
+   }
+
+   if (configs == NULL) {
+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+              __LINE__);
+      return NULL;
+   }
+
+   return (const __DRIconfig **)configs;
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .DestroyScreen	 = intelDestroyScreen,
+   .CreateContext	 = intelCreateContext,
+   .DestroyContext	 = intelDestroyContext,
+   .CreateBuffer	 = intelCreateBuffer,
+   .DestroyBuffer	 = intelDestroyBuffer,
+   .MakeCurrent		 = intelMakeCurrent,
+   .UnbindContext	 = intelUnbindContext,
+   .InitScreen2		 = intelInitScreen2,
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driDRI2Extension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h
new file mode 100644
index 0000000000..5863093f00
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_screen.h
@@ -0,0 +1,68 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef _INTEL_INIT_H_
+#define _INTEL_INIT_H_
+
+#include <sys/time.h>
+#include "dri_util.h"
+#include "intel_bufmgr.h"
+#include "i915_drm.h"
+#include "xmlconfig.h"
+
+struct intel_screen
+{
+   int deviceID;
+
+   int logTextureGranularity;
+
+   __DRIscreen *driScrnPriv;
+
+   GLboolean no_hw;
+
+   GLboolean no_vbo;
+   dri_bufmgr *bufmgr;
+   struct _mesa_HashTable *named_regions;
+
+   /**
+   * Configuration cache with default values for all contexts
+   */
+   driOptionCache optionCache;
+};
+
+extern GLboolean intelMapScreenRegions(__DRIscreen * sPriv);
+
+extern void intelDestroyContext(__DRIcontext * driContextPriv);
+
+extern GLboolean intelUnbindContext(__DRIcontext * driContextPriv);
+
+extern GLboolean
+intelMakeCurrent(__DRIcontext * driContextPriv,
+                 __DRIdrawable * driDrawPriv,
+                 __DRIdrawable * driReadPriv);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c
new file mode 100644
index 0000000000..fb840c1020
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_span.c
@@ -0,0 +1,384 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+
+#include "intel_buffers.h"
+#include "intel_fbo.h"
+#include "intel_screen.h"
+#include "intel_span.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+
+#include "swrast/swrast.h"
+
+static void
+intel_set_span_functions(struct intel_context *intel,
+			 struct gl_renderbuffer *rb);
+
+#undef DBG
+#define DBG 0
+
+#define LOCAL_VARS							\
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
+   const GLint yScale = rb->Name ? 1 : -1;				\
+   const GLint yBias = rb->Name ? 0 : rb->Height - 1;			\
+   int minx = 0, miny = 0;						\
+   int maxx = rb->Width;						\
+   int maxy = rb->Height;						\
+   int pitch = irb->region->pitch * irb->region->cpp;			\
+   void *buf = irb->region->buffer->virtual;				\
+   GLuint p;								\
+   (void) p;								\
+   (void)buf; (void)pitch; /* unused for non-gttmap. */			\
+
+#define HW_CLIPLOOP()
+#define HW_ENDCLIPLOOP()
+
+#define Y_FLIP(_y) ((_y) * yScale + yBias)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* Convenience macros to avoid typing the address argument over and over */
+#define NO_TILE(_X, _Y) (((_Y) * irb->region->pitch + (_X)) * irb->region->cpp)
+
+/* r5g6b5 color span and pixel functions */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+#define TAG(x) intel_##x##_RGB565
+#define TAG2(x,y) intel_##x##y_RGB565
+#include "spantmp2.h"
+
+/* a4r4g4b4 color span and pixel functions */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
+#define TAG(x) intel_##x##_ARGB4444
+#define TAG2(x,y) intel_##x##y_ARGB4444
+#include "spantmp2.h"
+
+/* a1r5g5b5 color span and pixel functions */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
+#define TAG(x) intel_##x##_ARGB1555
+#define TAG2(x,y) intel_##x##y##_ARGB1555
+#include "spantmp2.h"
+
+/* a8r8g8b8 color span and pixel functions */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+#define TAG(x) intel_##x##_ARGB8888
+#define TAG2(x,y) intel_##x##y##_ARGB8888
+#include "spantmp2.h"
+
+/* x8r8g8b8 color span and pixel functions */
+#define SPANTMP_PIXEL_FMT GL_BGR
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+#define TAG(x) intel_##x##_xRGB8888
+#define TAG2(x,y) intel_##x##y##_xRGB8888
+#include "spantmp2.h"
+
+/* a8 color span and pixel functions */
+#define SPANTMP_PIXEL_FMT GL_ALPHA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_BYTE
+#define TAG(x) intel_##x##_A8
+#define TAG2(x,y) intel_##x##y##_A8
+#include "spantmp2.h"
+
+#define LOCAL_DEPTH_VARS						\
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);		\
+   const GLint yScale = rb->Name ? 1 : -1;				\
+   const GLint yBias = rb->Name ? 0 : rb->Height - 1;			\
+   int minx = 0, miny = 0;						\
+   int maxx = rb->Width;						\
+   int maxy = rb->Height;						\
+   int pitch = irb->region->pitch * irb->region->cpp;			\
+   void *buf = irb->region->buffer->virtual;				\
+   (void)buf; (void)pitch; /* unused for non-gttmap. */			\
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+
+/* z16 depthbuffer functions. */
+#define VALUE_TYPE GLushort
+#define WRITE_DEPTH(_x, _y, d) \
+   (*(uint16_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) = d)
+#define READ_DEPTH(d, _x, _y) \
+   d = *(uint16_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y))
+#define TAG(x) intel_##x##_z16
+#include "depthtmp.h"
+
+/* z24_s8 and z24_x8 depthbuffer functions. */
+#define VALUE_TYPE GLuint
+#define WRITE_DEPTH(_x, _y, d) \
+   (*(uint32_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y)) = d)
+#define READ_DEPTH(d, _x, _y) \
+   d = *(uint32_t *)(irb->region->buffer->virtual + NO_TILE(_x, _y))
+#define TAG(x) intel_##x##_z24_s8
+#include "depthtmp.h"
+
+void
+intel_renderbuffer_map(struct intel_context *intel, struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+   if (irb == NULL || irb->region == NULL)
+      return;
+
+   drm_intel_gem_bo_map_gtt(irb->region->buffer);
+
+   intel_set_span_functions(intel, rb);
+}
+
+void
+intel_renderbuffer_unmap(struct intel_context *intel,
+			 struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+   if (irb == NULL || irb->region == NULL)
+      return;
+
+   drm_intel_gem_bo_unmap_gtt(irb->region->buffer);
+
+   rb->GetRow = NULL;
+   rb->PutRow = NULL;
+}
+
+/**
+ * Map or unmap all the renderbuffers which we may need during
+ * software rendering.
+ * XXX in the future, we could probably convey extra information to
+ * reduce the number of mappings needed.  I.e. if doing a glReadPixels
+ * from the depth buffer, we really only need one mapping.
+ *
+ * XXX Rewrite this function someday.
+ * We can probably just loop over all the renderbuffer attachments,
+ * map/unmap all of them, and not worry about the _ColorDrawBuffers
+ * _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields.
+ */
+static void
+intel_map_unmap_framebuffer(struct intel_context *intel,
+			    struct gl_framebuffer *fb,
+			    GLboolean map)
+{
+   GLuint i;
+
+   /* color draw buffers */
+   for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+      if (map)
+         intel_renderbuffer_map(intel, fb->_ColorDrawBuffers[i]);
+      else
+         intel_renderbuffer_unmap(intel, fb->_ColorDrawBuffers[i]);
+   }
+
+   /* color read buffer */
+   if (map)
+      intel_renderbuffer_map(intel, fb->_ColorReadBuffer);
+   else
+      intel_renderbuffer_unmap(intel, fb->_ColorReadBuffer);
+
+   /* check for render to textures */
+   for (i = 0; i < BUFFER_COUNT; i++) {
+      struct gl_renderbuffer_attachment *att =
+         fb->Attachment + i;
+      struct gl_texture_object *tex = att->Texture;
+      if (tex) {
+         /* render to texture */
+         ASSERT(att->Renderbuffer);
+         if (map)
+            intel_tex_map_images(intel, intel_texture_object(tex));
+         else
+            intel_tex_unmap_images(intel, intel_texture_object(tex));
+      }
+   }
+
+   /* depth buffer (Note wrapper!) */
+   if (fb->_DepthBuffer) {
+      if (map)
+         intel_renderbuffer_map(intel, fb->_DepthBuffer->Wrapped);
+      else
+         intel_renderbuffer_unmap(intel, fb->_DepthBuffer->Wrapped);
+   }
+
+   /* stencil buffer (Note wrapper!) */
+   if (fb->_StencilBuffer) {
+      if (map)
+         intel_renderbuffer_map(intel, fb->_StencilBuffer->Wrapped);
+      else
+         intel_renderbuffer_unmap(intel, fb->_StencilBuffer->Wrapped);
+   }
+
+   intel_check_front_buffer_rendering(intel);
+}
+
+/**
+ * Prepare for software rendering.  Map current read/draw framebuffers'
+ * renderbuffes and all currently bound texture objects.
+ *
+ * Old note: Moved locking out to get reasonable span performance.
+ */
+void
+intelSpanRenderStart(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLuint i;
+
+   intel_flush(&intel->ctx);
+   intel_prepare_render(intel);
+
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+         intel_finalize_mipmap_tree(intel, i);
+         intel_tex_map_images(intel, intel_texture_object(texObj));
+      }
+   }
+
+   intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_TRUE);
+   if (ctx->ReadBuffer != ctx->DrawBuffer)
+      intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_TRUE);
+}
+
+/**
+ * Called when done software rendering.  Unmap the buffers we mapped in
+ * the above function.
+ */
+void
+intelSpanRenderFinish(GLcontext * ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   GLuint i;
+
+   _swrast_flush(ctx);
+
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+         intel_tex_unmap_images(intel, intel_texture_object(texObj));
+      }
+   }
+
+   intel_map_unmap_framebuffer(intel, ctx->DrawBuffer, GL_FALSE);
+   if (ctx->ReadBuffer != ctx->DrawBuffer)
+      intel_map_unmap_framebuffer(intel, ctx->ReadBuffer, GL_FALSE);
+}
+
+
+void
+intelInitSpanFuncs(GLcontext * ctx)
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart = intelSpanRenderStart;
+   swdd->SpanRenderFinish = intelSpanRenderFinish;
+}
+
+void
+intel_map_vertex_shader_textures(GLcontext *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   int i;
+
+   if (ctx->VertexProgram._Current == NULL)
+      return;
+
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled &&
+	  ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+         intel_tex_map_images(intel, intel_texture_object(texObj));
+      }
+   }
+}
+
+void
+intel_unmap_vertex_shader_textures(GLcontext *ctx)
+{
+   struct intel_context *intel = intel_context(ctx);
+   int i;
+
+   if (ctx->VertexProgram._Current == NULL)
+      return;
+
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled &&
+	  ctx->VertexProgram._Current->Base.TexturesUsed[i] != 0) {
+         struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+
+         intel_tex_unmap_images(intel, intel_texture_object(texObj));
+      }
+   }
+}
+
+/**
+ * Plug in appropriate span read/write functions for the given renderbuffer.
+ * These are used for the software fallbacks.
+ */
+static void
+intel_set_span_functions(struct intel_context *intel,
+			 struct gl_renderbuffer *rb)
+{
+   struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb;
+
+   switch (irb->Base.Format) {
+   case MESA_FORMAT_A8:
+      intel_InitPointers_A8(rb);
+      break;
+   case MESA_FORMAT_RGB565:
+      intel_InitPointers_RGB565(rb);
+      break;
+   case MESA_FORMAT_ARGB4444:
+      intel_InitPointers_ARGB4444(rb);
+      break;
+   case MESA_FORMAT_ARGB1555:
+      intel_InitPointers_ARGB1555(rb);
+      break;
+   case MESA_FORMAT_XRGB8888:
+      intel_InitPointers_xRGB8888(rb);
+      break;
+   case MESA_FORMAT_ARGB8888:
+      intel_InitPointers_ARGB8888(rb);
+      break;
+   case MESA_FORMAT_Z16:
+      intel_InitDepthPointers_z16(rb);
+      break;
+   case MESA_FORMAT_X8_Z24:
+   case MESA_FORMAT_S8_Z24:
+      intel_InitDepthPointers_z24_s8(rb);
+      break;
+   default:
+      _mesa_problem(NULL,
+		    "Unexpected MesaFormat %d in intelSetSpanFunctions",
+		    irb->Base.Format);
+      break;
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h
new file mode 100644
index 0000000000..bffe109aa5
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_span.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef _INTEL_SPAN_H
+#define _INTEL_SPAN_H
+
+extern void intelInitSpanFuncs(GLcontext * ctx);
+
+extern void intelSpanRenderFinish(GLcontext * ctx);
+extern void intelSpanRenderStart(GLcontext * ctx);
+void intel_renderbuffer_map(struct intel_context *intel,
+			    struct gl_renderbuffer *rb);
+void intel_renderbuffer_unmap(struct intel_context *intel,
+			      struct gl_renderbuffer *rb);
+void intel_map_vertex_shader_textures(GLcontext *ctx);
+void intel_unmap_vertex_shader_textures(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_state.c b/src/mesa/drivers/dri/intel/intel_state.c
new file mode 100644
index 0000000000..c5ef909dbf
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_state.c
@@ -0,0 +1,211 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/dd.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+
+int
+intel_translate_shadow_compare_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER: 
+       return COMPAREFUNC_ALWAYS;
+   case GL_LESS: 
+       return COMPAREFUNC_LEQUAL;
+   case GL_LEQUAL: 
+       return COMPAREFUNC_LESS;
+   case GL_GREATER: 
+       return COMPAREFUNC_GEQUAL;
+   case GL_GEQUAL: 
+      return COMPAREFUNC_GREATER;
+   case GL_NOTEQUAL: 
+      return COMPAREFUNC_EQUAL;
+   case GL_EQUAL: 
+      return COMPAREFUNC_NOTEQUAL;
+   case GL_ALWAYS: 
+       return COMPAREFUNC_NEVER;
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_NEVER;
+}
+
+int
+intel_translate_compare_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER:
+      return COMPAREFUNC_NEVER;
+   case GL_LESS:
+      return COMPAREFUNC_LESS;
+   case GL_LEQUAL:
+      return COMPAREFUNC_LEQUAL;
+   case GL_GREATER:
+      return COMPAREFUNC_GREATER;
+   case GL_GEQUAL:
+      return COMPAREFUNC_GEQUAL;
+   case GL_NOTEQUAL:
+      return COMPAREFUNC_NOTEQUAL;
+   case GL_EQUAL:
+      return COMPAREFUNC_EQUAL;
+   case GL_ALWAYS:
+      return COMPAREFUNC_ALWAYS;
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+   return COMPAREFUNC_ALWAYS;
+}
+
+int
+intel_translate_stencil_op(GLenum op)
+{
+   switch (op) {
+   case GL_KEEP:
+      return STENCILOP_KEEP;
+   case GL_ZERO:
+      return STENCILOP_ZERO;
+   case GL_REPLACE:
+      return STENCILOP_REPLACE;
+   case GL_INCR:
+      return STENCILOP_INCRSAT;
+   case GL_DECR:
+      return STENCILOP_DECRSAT;
+   case GL_INCR_WRAP:
+      return STENCILOP_INCR;
+   case GL_DECR_WRAP:
+      return STENCILOP_DECR;
+   case GL_INVERT:
+      return STENCILOP_INVERT;
+   default:
+      return STENCILOP_ZERO;
+   }
+}
+
+int
+intel_translate_blend_factor(GLenum factor)
+{
+   switch (factor) {
+   case GL_ZERO:
+      return BLENDFACT_ZERO;
+   case GL_SRC_ALPHA:
+      return BLENDFACT_SRC_ALPHA;
+   case GL_ONE:
+      return BLENDFACT_ONE;
+   case GL_SRC_COLOR:
+      return BLENDFACT_SRC_COLR;
+   case GL_ONE_MINUS_SRC_COLOR:
+      return BLENDFACT_INV_SRC_COLR;
+   case GL_DST_COLOR:
+      return BLENDFACT_DST_COLR;
+   case GL_ONE_MINUS_DST_COLOR:
+      return BLENDFACT_INV_DST_COLR;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BLENDFACT_INV_SRC_ALPHA;
+   case GL_DST_ALPHA:
+      return BLENDFACT_DST_ALPHA;
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BLENDFACT_INV_DST_ALPHA;
+   case GL_SRC_ALPHA_SATURATE:
+      return BLENDFACT_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BLENDFACT_CONST_COLOR;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BLENDFACT_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BLENDFACT_CONST_ALPHA;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BLENDFACT_INV_CONST_ALPHA;
+   }
+
+   fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, factor);
+   return BLENDFACT_ZERO;
+}
+
+int
+intel_translate_logic_op(GLenum opcode)
+{
+   switch (opcode) {
+   case GL_CLEAR:
+      return LOGICOP_CLEAR;
+   case GL_AND:
+      return LOGICOP_AND;
+   case GL_AND_REVERSE:
+      return LOGICOP_AND_RVRSE;
+   case GL_COPY:
+      return LOGICOP_COPY;
+   case GL_COPY_INVERTED:
+      return LOGICOP_COPY_INV;
+   case GL_AND_INVERTED:
+      return LOGICOP_AND_INV;
+   case GL_NOOP:
+      return LOGICOP_NOOP;
+   case GL_XOR:
+      return LOGICOP_XOR;
+   case GL_OR:
+      return LOGICOP_OR;
+   case GL_OR_INVERTED:
+      return LOGICOP_OR_INV;
+   case GL_NOR:
+      return LOGICOP_NOR;
+   case GL_EQUIV:
+      return LOGICOP_EQUIV;
+   case GL_INVERT:
+      return LOGICOP_INV;
+   case GL_OR_REVERSE:
+      return LOGICOP_OR_RVRSE;
+   case GL_NAND:
+      return LOGICOP_NAND;
+   case GL_SET:
+      return LOGICOP_SET;
+   default:
+      return LOGICOP_SET;
+   }
+}
+
+/* Fallback to swrast for select and feedback.
+ */
+static void
+intelRenderMode(GLcontext *ctx, GLenum mode)
+{
+   struct intel_context *intel = intel_context(ctx);
+   FALLBACK(intel, INTEL_FALLBACK_RENDERMODE, (mode != GL_RENDER));
+}
+
+
+void
+intelInitStateFuncs(struct dd_function_table *functions)
+{
+   functions->RenderMode = intelRenderMode;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c
new file mode 100644
index 0000000000..c2d86432ff
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_syncobj.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_syncobj.c
+ *
+ * Support for ARB_sync
+ *
+ * ARB_sync is implemented by flushing the current batchbuffer and keeping a
+ * reference on it.  We can then check for completion or wait for compeltion
+ * using the normal buffer object mechanisms.  This does mean that if an
+ * application is using many sync objects, it will emit small batchbuffers
+ * which may end up being a significant overhead.  In other tests of removing
+ * gratuitous batchbuffer syncs in Mesa, it hasn't appeared to be a significant
+ * performance bottleneck, though.
+ */
+
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "intel_context.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+static struct gl_sync_object *
+intel_new_sync_object(GLcontext *ctx, GLuint id)
+{
+   struct intel_sync_object *sync;
+
+   sync = calloc(1, sizeof(struct intel_sync_object));
+
+   return &sync->Base;
+}
+
+static void
+intel_delete_sync_object(GLcontext *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   drm_intel_bo_unreference(sync->bo);
+   free(sync);
+}
+
+static void
+intel_fence_sync(GLcontext *ctx, struct gl_sync_object *s,
+	       GLenum condition, GLbitfield flags)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE);
+   intel_batchbuffer_emit_mi_flush(intel->batch);
+
+   sync->bo = intel->batch->buf;
+   drm_intel_bo_reference(sync->bo);
+
+   intel_flush(ctx);
+}
+
+/* We ignore the user-supplied timeout.  This is weaselly -- we're allowed to
+ * round to an implementation-dependent accuracy, and right now our
+ * implementation "rounds" to the wait-forever value.
+ *
+ * The fix would be a new kernel function to do the GTT transition with a
+ * timeout.
+ */
+static void intel_client_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+				 GLbitfield flags, GLuint64 timeout)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   if (sync->bo) {
+      drm_intel_bo_wait_rendering(sync->bo);
+      s->StatusFlag = 1;
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+   }
+}
+
+/* We have nothing to do for WaitSync.  Our GL command stream is sequential,
+ * so given that the sync object has already flushed the batchbuffer,
+ * any batchbuffers coming after this waitsync will naturally not occur until
+ * the previous one is done.
+ */
+static void intel_server_wait_sync(GLcontext *ctx, struct gl_sync_object *s,
+				 GLbitfield flags, GLuint64 timeout)
+{
+}
+
+static void intel_check_sync(GLcontext *ctx, struct gl_sync_object *s)
+{
+   struct intel_sync_object *sync = (struct intel_sync_object *)s;
+
+   if (sync->bo && !drm_intel_bo_busy(sync->bo)) {
+      drm_intel_bo_unreference(sync->bo);
+      sync->bo = NULL;
+      s->StatusFlag = 1;
+   }
+}
+
+void intel_init_syncobj_functions(struct dd_function_table *functions)
+{
+   functions->NewSyncObject = intel_new_sync_object;
+   functions->DeleteSyncObject = intel_delete_sync_object;
+   functions->FenceSync = intel_fence_sync;
+   functions->CheckSync = intel_check_sync;
+   functions->ClientWaitSync = intel_client_wait_sync;
+   functions->ServerWaitSync = intel_server_wait_sync;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c
new file mode 100644
index 0000000000..8bb6ae99fb
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex.c
@@ -0,0 +1,228 @@
+#include "swrast/swrast.h"
+#include "main/texobj.h"
+#include "main/teximage.h"
+#include "main/mipmap.h"
+#include "drivers/common/meta.h"
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+static GLboolean
+intelIsTextureResident(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+#if 0
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   return
+      intelObj->mt &&
+      intelObj->mt->region &&
+      intel_is_region_resident(intel, intelObj->mt->region);
+#endif
+   return 1;
+}
+
+
+
+static struct gl_texture_image *
+intelNewTextureImage(GLcontext * ctx)
+{
+   DBG("%s\n", __FUNCTION__);
+   (void) ctx;
+   return (struct gl_texture_image *) CALLOC_STRUCT(intel_texture_image);
+}
+
+
+static struct gl_texture_object *
+intelNewTextureObject(GLcontext * ctx, GLuint name, GLenum target)
+{
+   struct intel_texture_object *obj = CALLOC_STRUCT(intel_texture_object);
+
+   DBG("%s\n", __FUNCTION__);
+   _mesa_initialize_texture_object(&obj->base, name, target);
+
+   return &obj->base;
+}
+
+static void 
+intelDeleteTextureObject(GLcontext *ctx,
+			 struct gl_texture_object *texObj)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+
+   if (intelObj->mt)
+      intel_miptree_release(intel, &intelObj->mt);
+
+   _mesa_delete_texture_object(ctx, texObj);
+}
+
+
+static void
+intelFreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (intelImage->mt) {
+      intel_miptree_release(intel, &intelImage->mt);
+   }
+
+   if (texImage->Data) {
+      _mesa_free_texmemory(texImage->Data);
+      texImage->Data = NULL;
+   }
+}
+
+
+/* The system memcpy (at least on ubuntu 5.10) has problems copying
+ * to agp (writecombined) memory from a source which isn't 64-byte
+ * aligned - there is a 4x performance falloff.
+ *
+ * The x86 __memcpy is immune to this but is slightly slower
+ * (10%-ish) than the system memcpy.
+ *
+ * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but
+ * isn't much faster than x86_memcpy for agp copies.
+ * 
+ * TODO: switch dynamically.
+ */
+static void *
+do_memcpy(void *dest, const void *src, size_t n)
+{
+   if ((((unsigned long) src) & 63) || (((unsigned long) dest) & 63)) {
+      return __memcpy(dest, src, n);
+   }
+   else
+      return memcpy(dest, src, n);
+}
+
+
+#if DO_DEBUG && !defined(__ia64__)
+
+#ifndef __x86_64__
+static unsigned
+fastrdtsc(void)
+{
+   unsigned eax;
+   __asm__ volatile ("\t"
+                     "pushl  %%ebx\n\t"
+                     "cpuid\n\t" ".byte 0x0f, 0x31\n\t"
+                     "popl %%ebx\n":"=a" (eax)
+                     :"0"(0)
+                     :"ecx", "edx", "cc");
+
+   return eax;
+}
+#else
+static unsigned
+fastrdtsc(void)
+{
+   unsigned eax;
+   __asm__ volatile ("\t" "cpuid\n\t" ".byte 0x0f, 0x31\n\t":"=a" (eax)
+                     :"0"(0)
+                     :"ecx", "edx", "ebx", "cc");
+
+   return eax;
+}
+#endif
+
+static unsigned
+time_diff(unsigned t, unsigned t2)
+{
+   return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1));
+}
+
+
+static void *
+timed_memcpy(void *dest, const void *src, size_t n)
+{
+   void *ret;
+   unsigned t1, t2;
+   double rate;
+
+   if ((((unsigned) src) & 63) || (((unsigned) dest) & 63))
+      printf("Warning - non-aligned texture copy!\n");
+
+   t1 = fastrdtsc();
+   ret = do_memcpy(dest, src, n);
+   t2 = fastrdtsc();
+
+   rate = time_diff(t1, t2);
+   rate /= (double) n;
+   printf("timed_memcpy: %u %u --> %f clocks/byte\n", t1, t2, rate);
+   return ret;
+}
+#endif /* DO_DEBUG */
+
+
+/**
+ * Called via ctx->Driver.GenerateMipmap()
+ * This is basically a wrapper for _mesa_meta_GenerateMipmap() which checks
+ * if we'll be using software mipmap generation.  In that case, we need to
+ * map/unmap the base level texture image.
+ */
+static void
+intelGenerateMipmap(GLcontext *ctx, GLenum target,
+                    struct gl_texture_object *texObj)
+{
+   if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) {
+      /* sw path: need to map texture images */
+      struct intel_context *intel = intel_context(ctx);
+      struct intel_texture_object *intelObj = intel_texture_object(texObj);
+      intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel);
+      _mesa_generate_mipmap(ctx, target, texObj);
+      intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel);
+
+      {
+         GLuint nr_faces = (texObj->Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+         GLuint face, i;
+         /* Update the level information in our private data in the new images,
+          * since it didn't get set as part of a normal TexImage path.
+          */
+         for (face = 0; face < nr_faces; face++) {
+            for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+               struct intel_texture_image *intelImage =
+                  intel_texture_image(texObj->Image[face][i]);
+               if (!intelImage)
+                  break;
+               intelImage->level = i;
+               intelImage->face = face;
+               /* Unreference the miptree to signal that the new Data is a
+                * bare pointer from mesa.
+                */
+               intel_miptree_release(intel, &intelImage->mt);
+            }
+         }
+      }
+   }
+   else {
+      _mesa_meta_GenerateMipmap(ctx, target, texObj);
+   }
+}
+
+
+void
+intelInitTextureFuncs(struct dd_function_table *functions)
+{
+   functions->ChooseTextureFormat = intelChooseTextureFormat;
+   functions->GenerateMipmap = intelGenerateMipmap;
+
+   functions->NewTextureObject = intelNewTextureObject;
+   functions->NewTextureImage = intelNewTextureImage;
+   functions->DeleteTexture = intelDeleteTextureObject;
+   functions->FreeTexImageData = intelFreeTextureImageData;
+   functions->UpdateTexturePalette = 0;
+   functions->IsTextureResident = intelIsTextureResident;
+
+#if DO_DEBUG && !defined(__ia64__)
+   if (INTEL_DEBUG & DEBUG_BUFMGR)
+      functions->TextureMemCpy = timed_memcpy;
+   else
+#endif
+      functions->TextureMemCpy = do_memcpy;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
new file mode 100644
index 0000000000..4bb012dc65
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef INTELTEX_INC
+#define INTELTEX_INC
+
+#include "main/mtypes.h"
+#include "main/formats.h"
+#include "intel_context.h"
+#include "texmem.h"
+
+
+void intelInitTextureFuncs(struct dd_function_table *functions);
+
+void intelInitTextureImageFuncs(struct dd_function_table *functions);
+
+void intelInitTextureSubImageFuncs(struct dd_function_table *functions);
+
+void intelInitTextureCopyImageFuncs(struct dd_function_table *functions);
+
+gl_format intelChooseTextureFormat(GLcontext *ctx, GLint internalFormat,
+                                   GLenum format, GLenum type);
+
+void intelSetTexBuffer(__DRIcontext *pDRICtx,
+		       GLint target, __DRIdrawable *pDraw);
+void intelSetTexBuffer2(__DRIcontext *pDRICtx,
+			GLint target, GLint format, __DRIdrawable *pDraw);
+
+GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit);
+
+void intel_tex_map_level_images(struct intel_context *intel,
+				struct intel_texture_object *intelObj,
+				int level);
+
+void intel_tex_unmap_level_images(struct intel_context *intel,
+				  struct intel_texture_object *intelObj,
+				  int level);
+
+void intel_tex_map_images(struct intel_context *intel,
+                          struct intel_texture_object *intelObj);
+
+void intel_tex_unmap_images(struct intel_context *intel,
+                            struct intel_texture_object *intelObj);
+
+int intel_compressed_num_bytes(GLuint mesaFormat);
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
new file mode 100644
index 0000000000..224b506c05
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -0,0 +1,330 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "main/mipmap.h"
+
+#include "drivers/common/meta.h"
+
+#include "intel_screen.h"
+#include "intel_context.h"
+#include "intel_buffers.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
+#include "intel_fbo.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+/**
+ * Get the intel_region which is the source for any glCopyTex[Sub]Image call.
+ *
+ * Do the best we can using the blitter.  A future project is to use
+ * the texture engine and fragment programs for these copies.
+ */
+static const struct intel_region *
+get_teximage_source(struct intel_context *intel, GLenum internalFormat)
+{
+   struct intel_renderbuffer *irb;
+
+   DBG("%s %s\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(internalFormat));
+
+   switch (internalFormat) {
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16:
+      irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
+      if (irb && irb->region && irb->region->cpp == 2)
+         return irb->region;
+      return NULL;
+   case GL_DEPTH24_STENCIL8_EXT:
+   case GL_DEPTH_STENCIL_EXT:
+      irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH);
+      if (irb && irb->region && irb->region->cpp == 4)
+         return irb->region;
+      return NULL;
+   case GL_RGBA:
+   case GL_RGBA8:
+      irb = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer);
+      /* We're required to set alpha to 1.0 in this case, but we can't
+       * do that with the blitter, so fall back.  We could use the 3D
+       * engine or do two passes with the blitter, but it doesn't seem
+       * worth it for this case. */
+      if (irb->Base._BaseFormat == GL_RGB)
+	 return NULL;
+      return irb->region;
+   case GL_RGB:
+   case GL_RGB8:
+      return intel_readbuf_region(intel);
+   default:
+      return NULL;
+   }
+}
+
+
+static GLboolean
+do_copy_texsubimage(struct intel_context *intel,
+		    GLenum target,
+                    struct intel_texture_image *intelImage,
+                    GLenum internalFormat,
+                    GLint dstx, GLint dsty,
+                    GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   GLcontext *ctx = &intel->ctx;
+   const struct intel_region *src = get_teximage_source(intel, internalFormat);
+
+   if (!intelImage->mt || !src) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "%s fail %p %p (0x%08x)\n",
+		 __FUNCTION__, intelImage->mt, src, internalFormat);
+      return GL_FALSE;
+   }
+
+   if (intelImage->mt->cpp != src->cpp) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+	 fprintf(stderr, "%s fail %d vs %d cpp\n",
+		 __FUNCTION__, intelImage->mt->cpp, src->cpp);
+      return GL_FALSE;
+   }
+
+   /* intel_flush(ctx); */
+   intel_prepare_render(intel);
+   {
+      drm_intel_bo *dst_bo = intel_region_buffer(intel,
+						 intelImage->mt->region,
+						 INTEL_WRITE_PART);
+      GLuint image_x, image_y;
+      GLshort src_pitch;
+
+      /* get dest x/y in destination texture */
+      intel_miptree_get_image_offset(intelImage->mt,
+				     intelImage->level,
+				     intelImage->face,
+				     0,
+				     &image_x, &image_y);
+
+      /* The blitter can't handle Y-tiled buffers. */
+      if (intelImage->mt->region->tiling == I915_TILING_Y) {
+	 return GL_FALSE;
+      }
+
+      if (ctx->ReadBuffer->Name == 0) {
+	 /* Flip vertical orientation for system framebuffers */
+	 y = ctx->ReadBuffer->Height - (y + height);
+	 src_pitch = -src->pitch;
+      } else {
+	 /* reading from a FBO, y is already oriented the way we like */
+	 src_pitch = src->pitch;
+      }
+
+      /* blit from src buffer to texture */
+      if (!intelEmitCopyBlit(intel,
+			     intelImage->mt->cpp,
+			     src_pitch,
+			     src->buffer,
+			     0,
+			     src->tiling,
+			     intelImage->mt->region->pitch,
+			     dst_bo,
+			     0,
+			     intelImage->mt->region->tiling,
+			     src->draw_x + x, src->draw_y + y,
+			     image_x + dstx, image_y + dsty,
+			     width, height,
+			     GL_COPY)) {
+	 return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
+static void
+intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+                    GLenum internalFormat,
+                    GLint x, GLint y, GLsizei width, GLint border)
+{
+   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+   int srcx, srcy, dstx, dsty, height;
+
+   if (border)
+      goto fail;
+
+   /* Setup or redefine the texture object, mipmap tree and texture
+    * image.  Don't populate yet.  
+    */
+   ctx->Driver.TexImage1D(ctx, target, level, internalFormat,
+                          width, border,
+                          GL_RGBA, CHAN_TYPE, NULL,
+                          &ctx->DefaultPacking, texObj, texImage);
+   srcx = x;
+   srcy = y;
+   dstx = 0;
+   dsty = 0;
+   height = 1;
+   if (!_mesa_clip_copytexsubimage(ctx,
+				   &dstx, &dsty,
+				   &srcx, &srcy,
+				   &width, &height))
+      return;
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat, 0, 0, x, y, width, height))
+      goto fail;
+
+   return;
+
+ fail:
+   if (INTEL_DEBUG & DEBUG_FALLBACKS)
+      fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+   _mesa_meta_CopyTexImage1D(ctx, target, level, internalFormat, x, y,
+                             width, border);
+}
+
+
+static void
+intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+                    GLenum internalFormat,
+                    GLint x, GLint y, GLsizei width, GLsizei height,
+                    GLint border)
+{
+   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+   int srcx, srcy, dstx, dsty;
+
+   if (border)
+      goto fail;
+
+   /* Setup or redefine the texture object, mipmap tree and texture
+    * image.  Don't populate yet.
+    */
+   ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                          width, height, border,
+                          GL_RGBA, GL_UNSIGNED_BYTE, NULL,
+                          &ctx->DefaultPacking, texObj, texImage);
+
+   srcx = x;
+   srcy = y;
+   dstx = 0;
+   dsty = 0;
+   if (!_mesa_clip_copytexsubimage(ctx,
+				   &dstx, &dsty,
+				   &srcx, &srcy,
+				   &width, &height))
+      return;
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat, 0, 0, x, y, width, height))
+      goto fail;
+
+   return;
+
+ fail:
+   if (INTEL_DEBUG & DEBUG_FALLBACKS)
+      fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+   _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
+                             width, height, border);
+}
+
+
+static void
+intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+                       GLint xoffset, GLint x, GLint y, GLsizei width)
+{
+   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+   GLenum internalFormat = texImage->InternalFormat;
+
+   /* XXX need to check <border> as in above function? */
+
+   /* Need to check texture is compatible with source format. 
+    */
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat, xoffset, 0, x, y, width, 1)) {
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+         fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+      _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width);
+   }
+}
+
+
+static void
+intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+                       GLint xoffset, GLint yoffset,
+                       GLint x, GLint y, GLsizei width, GLsizei height)
+{
+   struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+   struct gl_texture_object *texObj =
+      _mesa_select_tex_object(ctx, texUnit, target);
+   struct gl_texture_image *texImage =
+      _mesa_select_tex_image(ctx, texObj, target, level);
+   GLenum internalFormat = texImage->InternalFormat;
+
+   /* Need to check texture is compatible with source format. 
+    */
+
+   if (!do_copy_texsubimage(intel_context(ctx), target,
+                            intel_texture_image(texImage),
+                            internalFormat,
+                            xoffset, yoffset, x, y, width, height)) {
+
+      if (INTEL_DEBUG & DEBUG_FALLBACKS)
+         fprintf(stderr, "%s - fallback to swrast\n", __FUNCTION__);
+      _mesa_meta_CopyTexSubImage2D(ctx, target, level,
+                                   xoffset, yoffset, x, y, width, height);
+   }
+}
+
+
+void
+intelInitTextureCopyImageFuncs(struct dd_function_table *functions)
+{
+   functions->CopyTexImage1D = intelCopyTexImage1D;
+   functions->CopyTexImage2D = intelCopyTexImage2D;
+   functions->CopyTexSubImage1D = intelCopyTexSubImage1D;
+   functions->CopyTexSubImage2D = intelCopyTexSubImage2D;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
new file mode 100644
index 0000000000..5f813c0efa
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -0,0 +1,228 @@
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "main/enums.h"
+#include "main/formats.h"
+
+/**
+ * Choose hardware texture format given the user's glTexImage parameters.
+ *
+ * It works out that this function is fine for all the supported
+ * hardware.  However, there is still a need to map the formats onto
+ * hardware descriptors.
+ *
+ * Note that the i915 can actually support many more formats than
+ * these if we take the step of simply swizzling the colors
+ * immediately after sampling...
+ */
+gl_format
+intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
+                         GLenum format, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
+
+#if 0
+   printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
+          __FUNCTION__, internalFormat, format, type);
+#endif
+
+   switch (internalFormat) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if (format == GL_BGRA) {
+         if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+            return MESA_FORMAT_ARGB8888;
+         }
+         else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
+            return MESA_FORMAT_ARGB4444;
+         }
+         else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
+            return MESA_FORMAT_ARGB1555;
+         }
+      }
+      return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+         return MESA_FORMAT_RGB565;
+      }
+      if (do32bpt) {
+	 if (intel->has_xrgb_textures)
+	    return MESA_FORMAT_XRGB8888;
+	 else
+	    return MESA_FORMAT_ARGB8888;
+      } else {
+	 return MESA_FORMAT_RGB565;
+      }
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return MESA_FORMAT_ARGB4444;
+
+   case GL_RGB5_A1:
+      return MESA_FORMAT_ARGB1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      if (intel->has_xrgb_textures)
+	 return MESA_FORMAT_XRGB8888;
+      else
+	 return MESA_FORMAT_ARGB8888;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return MESA_FORMAT_RGB565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return MESA_FORMAT_A8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return MESA_FORMAT_L8;
+
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+#ifndef I915
+      return MESA_FORMAT_AL1616;
+#else
+      /* FALLTHROUGH */
+#endif
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return MESA_FORMAT_AL88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return MESA_FORMAT_I8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
+         return MESA_FORMAT_YCBCR;
+      else
+         return MESA_FORMAT_YCBCR_REV;
+
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+      return MESA_FORMAT_RGB_FXT1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+      return MESA_FORMAT_RGBA_FXT1;
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+      return MESA_FORMAT_RGB_DXT1;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      return MESA_FORMAT_RGBA_DXT1;
+
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      return MESA_FORMAT_RGBA_DXT3;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return MESA_FORMAT_RGBA_DXT5;
+
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+#if 0
+      return MESA_FORMAT_Z16;
+#else
+      /* fall-through.
+       * 16bpp depth texture can't be paired with a stencil buffer so
+       * always used combined depth/stencil format.
+       */
+#endif
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      return MESA_FORMAT_S8_Z24;
+
+#ifndef I915
+   case GL_SRGB_EXT:
+   case GL_SRGB8_EXT:
+   case GL_SRGB_ALPHA_EXT:
+   case GL_SRGB8_ALPHA8_EXT:
+   case GL_COMPRESSED_SRGB_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_EXT:
+   case GL_COMPRESSED_SLUMINANCE_EXT:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+      return MESA_FORMAT_SARGB8;
+   case GL_SLUMINANCE_EXT:
+   case GL_SLUMINANCE8_EXT:
+      if (intel->has_luminance_srgb)
+         return MESA_FORMAT_SL8;
+      else
+         return MESA_FORMAT_SARGB8;
+   case GL_SLUMINANCE_ALPHA_EXT:
+   case GL_SLUMINANCE8_ALPHA8_EXT:
+      if (intel->has_luminance_srgb)
+         return MESA_FORMAT_SLA8;
+      else
+         return MESA_FORMAT_SARGB8;
+   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+      return MESA_FORMAT_SRGB_DXT1;
+
+   /* i915 could also do this */
+   case GL_DUDV_ATI:
+   case GL_DU8DV8_ATI:
+      return MESA_FORMAT_DUDV8;
+   case GL_RGBA_SNORM:
+   case GL_RGBA8_SNORM:
+      return MESA_FORMAT_SIGNED_RGBA8888_REV;
+#endif
+
+   default:
+      fprintf(stderr, "unexpected texture format %s in %s\n",
+              _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
+      return MESA_FORMAT_NONE;
+   }
+
+   return MESA_FORMAT_NONE;       /* never get here */
+}
+
+int intel_compressed_num_bytes(GLuint mesaFormat)
+{
+   GLuint bw, bh;
+   GLuint block_size;
+
+   block_size = _mesa_get_format_bytes(mesaFormat);
+   _mesa_get_format_block_size(mesaFormat, &bw, &bh);
+
+   return block_size / bw;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c
new file mode 100644
index 0000000000..7d33df3599
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_image.c
@@ -0,0 +1,856 @@
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/bufferobj.h"
+#include "main/convolve.h"
+#include "main/context.h"
+#include "main/formats.h"
+#include "main/texcompress.h"
+#include "main/texstore.h"
+#include "main/texgetimage.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_buffer_objects.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "intel_fbo.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+/* Functions to store texture images.  Where possible, mipmap_tree's
+ * will be created or further instantiated with image data, otherwise
+ * images will be stored in malloc'd memory.  A validation step is
+ * required to pull those images into a mipmap tree, or otherwise
+ * decide a fallback is required.
+ */
+
+
+static int
+logbase2(int n)
+{
+   GLint i = 1;
+   GLint log2 = 0;
+
+   while (n > i) {
+      i *= 2;
+      log2++;
+   }
+
+   return log2;
+}
+
+
+/* Otherwise, store it in memory if (Border != 0) or (any dimension ==
+ * 1).
+ *    
+ * Otherwise, if max_level >= level >= min_level, create tree with
+ * space for textures from min_level down to max_level.
+ *
+ * Otherwise, create tree with space for textures from (level
+ * 0)..(1x1).  Consider pruning this tree at a validation if the
+ * saving is worth it.
+ */
+static void
+guess_and_alloc_mipmap_tree(struct intel_context *intel,
+                            struct intel_texture_object *intelObj,
+                            struct intel_texture_image *intelImage,
+			    GLboolean expect_accelerated_upload)
+{
+   GLuint firstLevel;
+   GLuint lastLevel;
+   GLuint width = intelImage->base.Width;
+   GLuint height = intelImage->base.Height;
+   GLuint depth = intelImage->base.Depth;
+   GLuint l2width, l2height, l2depth;
+   GLuint i, comp_byte = 0;
+   GLuint texelBytes;
+
+   DBG("%s\n", __FUNCTION__);
+
+   if (intelImage->base.Border ||
+       ((intelImage->base._BaseFormat == GL_DEPTH_COMPONENT) && 
+        ((intelObj->base.WrapS == GL_CLAMP_TO_BORDER) ||
+         (intelObj->base.WrapT == GL_CLAMP_TO_BORDER))))
+      return;
+
+   if (intelImage->level > intelObj->base.BaseLevel &&
+       (intelImage->base.Width == 1 ||
+        (intelObj->base.Target != GL_TEXTURE_1D &&
+         intelImage->base.Height == 1) ||
+        (intelObj->base.Target == GL_TEXTURE_3D &&
+         intelImage->base.Depth == 1)))
+      return;
+
+   /* If this image disrespects BaseLevel, allocate from level zero.
+    * Usually BaseLevel == 0, so it's unlikely to happen.
+    */
+   if (intelImage->level < intelObj->base.BaseLevel)
+      firstLevel = 0;
+   else
+      firstLevel = intelObj->base.BaseLevel;
+
+
+   /* Figure out image dimensions at start level. 
+    */
+   for (i = intelImage->level; i > firstLevel; i--) {
+      width <<= 1;
+      if (height != 1)
+         height <<= 1;
+      if (depth != 1)
+         depth <<= 1;
+   }
+
+   /* Guess a reasonable value for lastLevel.  This is probably going
+    * to be wrong fairly often and might mean that we have to look at
+    * resizable buffers, or require that buffers implement lazy
+    * pagetable arrangements.
+    */
+   if ((intelObj->base.MinFilter == GL_NEAREST ||
+        intelObj->base.MinFilter == GL_LINEAR) &&
+       intelImage->level == firstLevel &&
+       (intel->gen < 4 || firstLevel == 0)) {
+      lastLevel = firstLevel;
+   }
+   else {
+      l2width = logbase2(width);
+      l2height = logbase2(height);
+      l2depth = logbase2(depth);
+      lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth);
+   }
+
+   assert(!intelObj->mt);
+   if (_mesa_is_format_compressed(intelImage->base.TexFormat))
+      comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat);
+
+   texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
+
+   intelObj->mt = intel_miptree_create(intel,
+                                       intelObj->base.Target,
+                                       intelImage->base._BaseFormat,
+                                       intelImage->base.InternalFormat,
+                                       firstLevel,
+                                       lastLevel,
+                                       width,
+                                       height,
+                                       depth,
+                                       texelBytes,
+                                       comp_byte,
+				       expect_accelerated_upload);
+
+   DBG("%s - success\n", __FUNCTION__);
+}
+
+
+
+
+static GLuint
+target_to_face(GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB:
+   case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB:
+   case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB:
+      return ((GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X);
+   default:
+      return 0;
+   }
+}
+
+/* There are actually quite a few combinations this will work for,
+ * more than what I've listed here.
+ */
+static GLboolean
+check_pbo_format(GLint internalFormat,
+                 GLenum format, GLenum type,
+                 gl_format mesa_format)
+{
+   switch (internalFormat) {
+   case 4:
+   case GL_RGBA:
+   case GL_RGBA8:
+      return (format == GL_BGRA &&
+              (type == GL_UNSIGNED_BYTE ||
+               type == GL_UNSIGNED_INT_8_8_8_8_REV) &&
+              mesa_format == MESA_FORMAT_ARGB8888);
+   case 3:
+   case GL_RGB:
+      return (format == GL_RGB &&
+              type == GL_UNSIGNED_SHORT_5_6_5 &&
+              mesa_format == MESA_FORMAT_RGB565);
+   case 1:
+   case GL_LUMINANCE:
+      return (format == GL_LUMINANCE &&
+	      type == GL_UNSIGNED_BYTE &&
+	      mesa_format == MESA_FORMAT_L8);
+   case GL_YCBCR_MESA:
+      return (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE);
+   default:
+      return GL_FALSE;
+   }
+}
+
+
+/* XXX: Do this for TexSubImage also:
+ */
+static GLboolean
+try_pbo_upload(struct intel_context *intel,
+               struct intel_texture_image *intelImage,
+               const struct gl_pixelstore_attrib *unpack,
+               GLint internalFormat,
+               GLint width, GLint height,
+               GLenum format, GLenum type, const void *pixels)
+{
+   struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset, src_stride;
+   GLuint dst_x, dst_y, dst_stride;
+   drm_intel_bo *dst_buffer = intel_region_buffer(intel,
+						  intelImage->mt->region,
+						  INTEL_WRITE_FULL);
+
+   if (!_mesa_is_bufferobj(unpack->BufferObj) ||
+       intel->ctx._ImageTransferState ||
+       unpack->SkipPixels || unpack->SkipRows) {
+      DBG("%s: failure 1\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* note: potential 64-bit ptr to 32-bit int cast */
+   src_offset = (GLuint) (unsigned long) pixels;
+
+   if (unpack->RowLength > 0)
+      src_stride = unpack->RowLength;
+   else
+      src_stride = width;
+
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
+				  intelImage->face, 0,
+				  &dst_x, &dst_y);
+
+   dst_stride = intelImage->mt->region->pitch;
+
+   if (drm_intel_bo_references(intel->batch->buf, dst_buffer))
+      intel_flush(&intel->ctx);
+
+   {
+      drm_intel_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ);
+
+      if (!intelEmitCopyBlit(intel,
+			     intelImage->mt->cpp,
+			     src_stride, src_buffer, src_offset, GL_FALSE,
+			     dst_stride, dst_buffer, 0,
+			     intelImage->mt->region->tiling,
+			     0, 0, dst_x, dst_y, width, height,
+			     GL_COPY)) {
+	 return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+try_pbo_zcopy(struct intel_context *intel,
+              struct intel_texture_image *intelImage,
+              const struct gl_pixelstore_attrib *unpack,
+              GLint internalFormat,
+              GLint width, GLint height,
+              GLenum format, GLenum type, const void *pixels)
+{
+   struct intel_buffer_object *pbo = intel_buffer_object(unpack->BufferObj);
+   GLuint src_offset, src_stride;
+   GLuint dst_x, dst_y, dst_stride;
+
+   if (!_mesa_is_bufferobj(unpack->BufferObj) ||
+       intel->ctx._ImageTransferState ||
+       unpack->SkipPixels || unpack->SkipRows) {
+      DBG("%s: failure 1\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* note: potential 64-bit ptr to 32-bit int cast */
+   src_offset = (GLuint) (unsigned long) pixels;
+
+   if (unpack->RowLength > 0)
+      src_stride = unpack->RowLength;
+   else
+      src_stride = width;
+
+   intel_miptree_get_image_offset(intelImage->mt, intelImage->level,
+				  intelImage->face, 0,
+				  &dst_x, &dst_y);
+
+   dst_stride = intelImage->mt->region->pitch;
+
+   if (src_stride != dst_stride || dst_x != 0 || dst_y != 0 ||
+       src_offset != 0) {
+      DBG("%s: failure 2\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   intel_region_attach_pbo(intel, intelImage->mt->region, pbo);
+
+   return GL_TRUE;
+}
+
+
+static void
+intelTexImage(GLcontext * ctx,
+              GLint dims,
+              GLenum target, GLint level,
+              GLint internalFormat,
+              GLint width, GLint height, GLint depth,
+              GLint border,
+              GLenum format, GLenum type, const void *pixels,
+              const struct gl_pixelstore_attrib *unpack,
+              struct gl_texture_object *texObj,
+              struct gl_texture_image *texImage, GLsizei imageSize,
+              GLboolean compressed)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   GLint postConvWidth = width;
+   GLint postConvHeight = height;
+   GLint texelBytes, sizeInBytes;
+   GLuint dstRowStride = 0, srcRowStride = texImage->RowStride;
+
+   DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target), level, width, height, depth, border);
+
+   intelImage->face = target_to_face(target);
+   intelImage->level = level;
+
+   if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
+      _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
+                                         &postConvHeight);
+   }
+
+   if (_mesa_is_format_compressed(texImage->TexFormat)) {
+      texelBytes = 0;
+   }
+   else {
+      texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+      
+      /* Minimum pitch of 32 bytes */
+      if (postConvWidth * texelBytes < 32) {
+	 postConvWidth = 32 / texelBytes;
+	 texImage->RowStride = postConvWidth;
+      }
+
+      if (!intelImage->mt) {      
+	  assert(texImage->RowStride == postConvWidth);
+      }
+   }
+
+   /* Release the reference to a potentially orphaned buffer.   
+    * Release any old malloced memory.
+    */
+   if (intelImage->mt) {
+      intel_miptree_release(intel, &intelImage->mt);
+      assert(!texImage->Data);
+   }
+   else if (texImage->Data) {
+      _mesa_free_texmemory(texImage->Data);
+      texImage->Data = NULL;
+   }
+
+   /* If this is the only texture image in the tree, could call
+    * bmBufferData with NULL data to free the old block and avoid
+    * waiting on any outstanding fences.
+    */
+   if (intelObj->mt &&
+       intelObj->mt->first_level == level &&
+       intelObj->mt->last_level == level &&
+       intelObj->mt->target != GL_TEXTURE_CUBE_MAP_ARB &&
+       !intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
+
+      DBG("release it\n");
+      intel_miptree_release(intel, &intelObj->mt);
+      assert(!intelObj->mt);
+   }
+
+   if (!intelObj->mt) {
+      guess_and_alloc_mipmap_tree(intel, intelObj, intelImage, pixels == NULL);
+      if (!intelObj->mt) {
+	 DBG("guess_and_alloc_mipmap_tree: failed\n");
+      }
+   }
+
+   assert(!intelImage->mt);
+
+   if (intelObj->mt &&
+       intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
+
+      intel_miptree_reference(&intelImage->mt, intelObj->mt);
+      assert(intelImage->mt);
+   } else if (intelImage->base.Border == 0) {
+      int comp_byte = 0;
+      GLuint texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat);
+      GLenum baseFormat = _mesa_get_format_base_format(intelImage->base.TexFormat);
+      if (_mesa_is_format_compressed(intelImage->base.TexFormat)) {
+	 comp_byte =
+	    intel_compressed_num_bytes(intelImage->base.TexFormat);
+      }
+
+      /* Didn't fit in the object miptree, but it's suitable for inclusion in
+       * a miptree, so create one just for our level and store it in the image.
+       * It'll get moved into the object miptree at validate time.
+       */
+      intelImage->mt = intel_miptree_create(intel, target,
+					    baseFormat,
+					    internalFormat,
+					    level, level,
+					    width, height, depth,
+					    texelBytes,
+					    comp_byte, pixels == NULL);
+
+   }
+
+   /* PBO fastpaths:
+    */
+   if (dims <= 2 &&
+       intelImage->mt &&
+       _mesa_is_bufferobj(unpack->BufferObj) &&
+       check_pbo_format(internalFormat, format,
+                        type, intelImage->base.TexFormat)) {
+
+      DBG("trying pbo upload\n");
+
+      /* Attempt to texture directly from PBO data (zero copy upload).
+       *
+       * Currently disable as it can lead to worse as well as better
+       * performance (in particular when intel_region_cow() is
+       * required).
+       */
+      if (intelObj->mt == intelImage->mt &&
+          intelObj->mt->first_level == level &&
+          intelObj->mt->last_level == level) {
+
+         if (try_pbo_zcopy(intel, intelImage, unpack,
+                           internalFormat,
+                           width, height, format, type, pixels)) {
+
+            DBG("pbo zcopy upload succeeded\n");
+            return;
+         }
+      }
+
+
+      /* Otherwise, attempt to use the blitter for PBO image uploads.
+       */
+      if (try_pbo_upload(intel, intelImage, unpack,
+                         internalFormat,
+                         width, height, format, type, pixels)) {
+         DBG("pbo upload succeeded\n");
+         return;
+      }
+
+      DBG("pbo upload failed\n");
+   }
+
+   /* intelCopyTexImage calls this function with pixels == NULL, with
+    * the expectation that the mipmap tree will be set up but nothing
+    * more will be done.  This is where those calls return:
+    */
+   if (compressed) {
+      pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, pixels,
+						      unpack,
+						      "glCompressedTexImage");
+   } else {
+      pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1,
+					   format, type,
+					   pixels, unpack, "glTexImage");
+   }
+
+   if (intelImage->mt) {
+      if (pixels != NULL) {
+	 /* Flush any queued rendering with the texture before mapping. */
+	 if (drm_intel_bo_references(intel->batch->buf,
+				     intelImage->mt->region->buffer)) {
+	    intel_flush(ctx);
+	 }
+         texImage->Data = intel_miptree_image_map(intel,
+                                                  intelImage->mt,
+                                                  intelImage->face,
+                                                  intelImage->level,
+                                                  &dstRowStride,
+                                                  intelImage->base.ImageOffsets);
+      }
+
+      texImage->RowStride = dstRowStride / intelImage->mt->cpp;
+   }
+   else {
+      /* Allocate regular memory and store the image there temporarily.   */
+      if (_mesa_is_format_compressed(texImage->TexFormat)) {
+         sizeInBytes = _mesa_format_image_size(texImage->TexFormat,
+                                               texImage->Width,
+                                               texImage->Height,
+                                               texImage->Depth);
+         dstRowStride =
+            _mesa_format_row_stride(texImage->TexFormat, width);
+         assert(dims != 3);
+      }
+      else {
+         dstRowStride = postConvWidth * texelBytes;
+         sizeInBytes = depth * dstRowStride * postConvHeight;
+      }
+
+      texImage->Data = _mesa_alloc_texmemory(sizeInBytes);
+   }
+
+   DBG("Upload image %dx%dx%d row_len %d "
+       "pitch %d pixels %d compressed %d\n",
+       width, height, depth, width * texelBytes, dstRowStride,
+       pixels ? 1 : 0, compressed);
+
+   /* Copy data.  Would like to know when it's ok for us to eg. use
+    * the blitter to copy.  Or, use the hardware to do the format
+    * conversion and copy:
+    */
+   if (pixels) {
+       if (compressed) {
+	   if (intelImage->mt) {
+	       struct intel_region *dst = intelImage->mt->region;
+	       _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch,
+			       0, 0,
+			       intelImage->mt->level[level].width,
+			       (intelImage->mt->level[level].height+3)/4,
+			       pixels,
+			       srcRowStride,
+			       0, 0);
+	   }
+           else {
+	       memcpy(texImage->Data, pixels, imageSize);
+           }
+       }
+       else if (!_mesa_texstore(ctx, dims, 
+                                texImage->_BaseFormat, 
+                                texImage->TexFormat, 
+                                texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */
+                                dstRowStride,
+                                texImage->ImageOffsets,
+                                width, height, depth,
+                                format, type, pixels, unpack)) {
+          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+       }
+   }
+
+   _mesa_unmap_teximage_pbo(ctx, unpack);
+
+   if (intelImage->mt) {
+      if (pixels != NULL)
+         intel_miptree_image_unmap(intel, intelImage->mt);
+      texImage->Data = NULL;
+   }
+}
+
+
+static void
+intelTexImage3D(GLcontext * ctx,
+                GLenum target, GLint level,
+                GLint internalFormat,
+                GLint width, GLint height, GLint depth,
+                GLint border,
+                GLenum format, GLenum type, const void *pixels,
+                const struct gl_pixelstore_attrib *unpack,
+                struct gl_texture_object *texObj,
+                struct gl_texture_image *texImage)
+{
+   intelTexImage(ctx, 3, target, level,
+                 internalFormat, width, height, depth, border,
+                 format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
+}
+
+
+static void
+intelTexImage2D(GLcontext * ctx,
+                GLenum target, GLint level,
+                GLint internalFormat,
+                GLint width, GLint height, GLint border,
+                GLenum format, GLenum type, const void *pixels,
+                const struct gl_pixelstore_attrib *unpack,
+                struct gl_texture_object *texObj,
+                struct gl_texture_image *texImage)
+{
+   intelTexImage(ctx, 2, target, level,
+                 internalFormat, width, height, 1, border,
+                 format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
+}
+
+
+static void
+intelTexImage1D(GLcontext * ctx,
+                GLenum target, GLint level,
+                GLint internalFormat,
+                GLint width, GLint border,
+                GLenum format, GLenum type, const void *pixels,
+                const struct gl_pixelstore_attrib *unpack,
+                struct gl_texture_object *texObj,
+                struct gl_texture_image *texImage)
+{
+   intelTexImage(ctx, 1, target, level,
+                 internalFormat, width, 1, 1, border,
+                 format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE);
+}
+
+
+static void
+intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+                           GLint internalFormat,
+                           GLint width, GLint height, GLint border,
+                           GLsizei imageSize, const GLvoid *data,
+                           struct gl_texture_object *texObj,
+                           struct gl_texture_image *texImage )
+{
+   intelTexImage(ctx, 2, target, level,
+		 internalFormat, width, height, 1, border,
+		 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE);
+}
+
+
+/**
+ * Need to map texture image into memory before copying image data,
+ * then unmap it.
+ */
+static void
+intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
+		    GLenum format, GLenum type, GLvoid * pixels,
+		    struct gl_texture_object *texObj,
+		    struct gl_texture_image *texImage, GLboolean compressed)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+
+   /* If we're reading from a texture that has been rendered to, need to
+    * make sure rendering is complete.
+    * We could probably predicate this on texObj->_RenderToTexture
+    */
+   intel_flush(ctx);
+
+   /* Map */
+   if (intelImage->mt) {
+      /* Image is stored in hardware format in a buffer managed by the
+       * kernel.  Need to explicitly map and unmap it.
+       */
+      intelImage->base.Data =
+         intel_miptree_image_map(intel,
+                                 intelImage->mt,
+                                 intelImage->face,
+                                 intelImage->level,
+                                 &intelImage->base.RowStride,
+                                 intelImage->base.ImageOffsets);
+      intelImage->base.RowStride /= intelImage->mt->cpp;
+   }
+   else {
+      /* Otherwise, the image should actually be stored in
+       * intelImage->base.Data.  This is pretty confusing for
+       * everybody, I'd much prefer to separate the two functions of
+       * texImage->Data - storage for texture images in main memory
+       * and access (ie mappings) of images.  In other words, we'd
+       * create a new texImage->Map field and leave Data simply for
+       * storage.
+       */
+      assert(intelImage->base.Data);
+   }
+
+
+   if (compressed) {
+      _mesa_get_compressed_teximage(ctx, target, level, pixels,
+				    texObj, texImage);
+   }
+   else {
+      _mesa_get_teximage(ctx, target, level, format, type, pixels,
+                         texObj, texImage);
+   }
+     
+
+   /* Unmap */
+   if (intelImage->mt) {
+      intel_miptree_image_unmap(intel, intelImage->mt);
+      intelImage->base.Data = NULL;
+   }
+}
+
+
+static void
+intelGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+                 GLenum format, GLenum type, GLvoid * pixels,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage)
+{
+   intel_get_tex_image(ctx, target, level, format, type, pixels,
+		       texObj, texImage, GL_FALSE);
+}
+
+
+static void
+intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+			   GLvoid *pixels,
+			   struct gl_texture_object *texObj,
+			   struct gl_texture_image *texImage)
+{
+   intel_get_tex_image(ctx, target, level, 0, 0, pixels,
+		       texObj, texImage, GL_TRUE);
+}
+
+void
+intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+		   GLint texture_format,
+		   __DRIdrawable *dPriv)
+{
+   struct gl_framebuffer *fb = dPriv->driverPrivate;
+   struct intel_context *intel = pDRICtx->driverPrivate;
+   GLcontext *ctx = &intel->ctx;
+   struct intel_texture_object *intelObj;
+   struct intel_texture_image *intelImage;
+   struct intel_mipmap_tree *mt;
+   struct intel_renderbuffer *rb;
+   struct gl_texture_object *texObj;
+   struct gl_texture_image *texImage;
+   int level = 0, internalFormat;
+
+   texObj = _mesa_get_current_tex_object(ctx, target);
+   intelObj = intel_texture_object(texObj);
+
+   if (!intelObj)
+      return;
+
+   if (dPriv->lastStamp != dPriv->dri2.stamp ||
+       !pDRICtx->driScreenPriv->dri2.useInvalidate)
+      intel_update_renderbuffers(pDRICtx, dPriv);
+
+   rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
+   /* If the region isn't set, then intel_update_renderbuffers was unable
+    * to get the buffers for the drawable.
+    */
+   if (rb->region == NULL)
+      return;
+
+   if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+      internalFormat = GL_RGB;
+   else
+      internalFormat = GL_RGBA;
+
+   mt = intel_miptree_create_for_region(intel, target,
+					internalFormat,
+					0, 0, rb->region, 1, 0);
+   if (mt == NULL)
+       return;
+
+   _mesa_lock_texture(&intel->ctx, texObj);
+
+   texImage = _mesa_get_tex_image(&intel->ctx, texObj, target, level);
+   intelImage = intel_texture_image(texImage);
+
+   if (intelImage->mt) {
+      intel_miptree_release(intel, &intelImage->mt);
+      assert(!texImage->Data);
+   }
+   if (intelObj->mt)
+      intel_miptree_release(intel, &intelObj->mt);
+
+   intelObj->mt = mt;
+   _mesa_init_teximage_fields(&intel->ctx, target, texImage,
+			      rb->region->width, rb->region->height, 1,
+			      0, internalFormat);
+
+   intelImage->face = target_to_face(target);
+   intelImage->level = level;
+   if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+      texImage->TexFormat = MESA_FORMAT_XRGB8888;
+   else
+      texImage->TexFormat = MESA_FORMAT_ARGB8888;
+   texImage->RowStride = rb->region->pitch;
+   intel_miptree_reference(&intelImage->mt, intelObj->mt);
+
+   if (!intel_miptree_match_image(intelObj->mt, &intelImage->base)) {
+	   fprintf(stderr, "miptree doesn't match image\n");
+   }
+
+   _mesa_unlock_texture(&intel->ctx, texObj);
+}
+
+void
+intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+   /* The old interface didn't have the format argument, so copy our
+    * implementation's behavior at the time.
+    */
+   intelSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+
+#if FEATURE_OES_EGL_image
+static void
+intel_image_target_texture_2d(GLcontext *ctx, GLenum target,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage,
+			      GLeglImageOES image_handle)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_object *intelObj = intel_texture_object(texObj);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   struct intel_mipmap_tree *mt;
+   __DRIscreen *screen;
+   __DRIimage *image;
+
+   screen = intel->intelScreen->driScrnPriv;
+   image = screen->dri2.image->lookupEGLImage(intel->driContext, image_handle,
+					      intel->driContext->loaderPrivate);
+   if (image == NULL)
+      return;
+
+   mt = intel_miptree_create_for_region(intel, target,
+					image->internal_format,
+					0, 0, image->region, 1, 0);
+   if (mt == NULL)
+       return;
+
+   if (intelImage->mt) {
+      intel_miptree_release(intel, &intelImage->mt);
+      assert(!texImage->Data);
+   }
+   if (intelObj->mt)
+      intel_miptree_release(intel, &intelObj->mt);
+
+   intelObj->mt = mt;
+   _mesa_init_teximage_fields(&intel->ctx, target, texImage,
+			      image->region->width, image->region->height, 1,
+			      0, image->internal_format);
+
+   intelImage->face = target_to_face(target);
+   intelImage->level = 0;
+   texImage->TexFormat = image->format;
+   texImage->RowStride = image->region->pitch;
+   intel_miptree_reference(&intelImage->mt, intelObj->mt);
+
+   if (!intel_miptree_match_image(intelObj->mt, &intelImage->base))
+      fprintf(stderr, "miptree doesn't match image\n");
+}
+#endif
+
+void
+intelInitTextureImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexImage1D = intelTexImage1D;
+   functions->TexImage2D = intelTexImage2D;
+   functions->TexImage3D = intelTexImage3D;
+   functions->GetTexImage = intelGetTexImage;
+
+   functions->CompressedTexImage2D = intelCompressedTexImage2D;
+   functions->GetCompressedTexImage = intelGetCompressedTexImage;
+
+#if FEATURE_OES_EGL_image
+   functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
+#endif
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c
new file mode 100644
index 0000000000..d39733b6c5
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c
@@ -0,0 +1,136 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Michel Dänzer <michel@tungstengraphics.com>
+  */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "intel_context.h"
+#include "main/macros.h"
+
+void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
+{
+    switch (internalFormat) {
+    case GL_COMPRESSED_RGB_FXT1_3DFX:
+    case GL_COMPRESSED_RGBA_FXT1_3DFX:
+        *w = 8;
+        *h = 4;
+        break;
+
+    case GL_RGB_S3TC:
+    case GL_RGB4_S3TC:
+    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+    case GL_RGBA_S3TC:
+    case GL_RGBA4_S3TC:
+    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+        *w = 4;
+        *h = 4;
+        break;
+
+    default:
+        *w = 4;
+        *h = 2;
+        break;
+    }
+}
+
+void i945_miptree_layout_2d(struct intel_context *intel,
+			    struct intel_mipmap_tree *mt,
+			    uint32_t tiling, int nr_images)
+{
+   GLuint align_h = 2, align_w = 4;
+   GLuint level;
+   GLuint x = 0;
+   GLuint y = 0;
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+
+   mt->total_width = mt->width0;
+   intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
+   if (mt->compressed) {
+       mt->total_width = ALIGN(mt->width0, align_w);
+   }
+
+   /* May need to adjust width to accomodate the placement of
+    * the 2nd mipmap.  This occurs when the alignment
+    * constraints of mipmap placement push the right edge of the
+    * 2nd mipmap out past the width of its parent.
+    */
+   if (mt->first_level != mt->last_level) {
+       GLuint mip1_width;
+
+       if (mt->compressed) {
+           mip1_width = ALIGN(minify(mt->width0), align_w)
+               + ALIGN(minify(minify(mt->width0)), align_w);
+       } else {
+           mip1_width = ALIGN(minify(mt->width0), align_w)
+               + minify(minify(mt->width0));
+       }
+
+       if (mip1_width > mt->total_width) {
+           mt->total_width = mip1_width;
+       }
+   }
+
+   mt->total_height = 0;
+
+   for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+      GLuint img_height;
+
+      intel_miptree_set_level_info(mt, level, nr_images, x, y, width,
+				   height, 1);
+
+      if (mt->compressed)
+	 img_height = MAX2(1, height/4);
+      else
+	 img_height = ALIGN(height, align_h);
+
+
+      /* Because the images are packed better, the final offset
+       * might not be the maximal one:
+       */
+      mt->total_height = MAX2(mt->total_height, y + img_height);
+
+      /* Layout_below: step right after second mipmap.
+       */
+      if (level == mt->first_level + 1) {
+	 x += ALIGN(width, align_w);
+      }
+      else {
+	 y += img_height;
+      }
+
+      width  = minify(width);
+      height = minify(height);
+   }
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h
new file mode 100644
index 0000000000..1c8c53e545
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h
@@ -0,0 +1,44 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Michel Dänzer <michel@tungstengraphics.com>
+  */
+
+#include "main/macros.h"
+
+
+static INLINE GLuint minify( GLuint d )
+{
+   return MAX2(1, d>>1);
+}
+
+extern void i945_miptree_layout_2d(struct intel_context *intel,
+				   struct intel_mipmap_tree *mt,
+				   uint32_t tiling, int nr_images);
+extern void intel_get_texture_alignment_unit(GLenum, GLuint *, GLuint *);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
new file mode 100644
index 0000000000..5f60e0ea4f
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -0,0 +1,80 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef _INTEL_TEX_OBJ_H
+#define _INTEL_TEX_OBJ_H
+
+struct intel_texture_object
+{
+   struct gl_texture_object base;       /* The "parent" object */
+
+   /* The mipmap tree must include at least these levels once
+    * validated:
+    */
+   GLuint firstLevel;
+   GLuint lastLevel;
+
+   /* Offset for firstLevel image:
+    */
+   GLuint textureOffset;
+
+   /* On validation any active images held in main memory or in other
+    * regions will be copied to this region and the old storage freed.
+    */
+   struct intel_mipmap_tree *mt;
+};
+
+struct intel_texture_image
+{
+   struct gl_texture_image base;
+
+   /* These aren't stored in gl_texture_image 
+    */
+   GLuint level;
+   GLuint face;
+
+   /* If intelImage->mt != NULL, image data is stored here.
+    * Else if intelImage->base.Data != NULL, image is stored there.
+    * Else there is no image data.
+    */
+   struct intel_mipmap_tree *mt;
+   GLboolean used_as_render_target;
+};
+
+static INLINE struct intel_texture_object *
+intel_texture_object(struct gl_texture_object *obj)
+{
+   return (struct intel_texture_object *) obj;
+}
+
+static INLINE struct intel_texture_image *
+intel_texture_image(struct gl_texture_image *img)
+{
+   return (struct intel_texture_image *) img;
+}
+
+#endif /* _INTEL_TEX_OBJ_H */
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
new file mode 100644
index 0000000000..b7ce50a820
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -0,0 +1,277 @@
+
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "main/mtypes.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/texcompress.h"
+#include "main/enums.h"
+
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_mipmap_tree.h"
+#include "intel_blit.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+static void
+intelTexSubimage(GLcontext * ctx,
+                 GLint dims,
+                 GLenum target, GLint level,
+                 GLint xoffset, GLint yoffset, GLint zoffset,
+                 GLint width, GLint height, GLint depth,
+                 GLsizei imageSize,
+                 GLenum format, GLenum type, const void *pixels,
+                 const struct gl_pixelstore_attrib *packing,
+                 struct gl_texture_object *texObj,
+                 struct gl_texture_image *texImage,
+                 GLboolean compressed)
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct intel_texture_image *intelImage = intel_texture_image(texImage);
+   GLuint dstRowStride = 0;
+   drm_intel_bo *temp_bo = NULL, *dst_bo = NULL;
+   unsigned int blit_x = 0, blit_y = 0;
+
+   DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__,
+       _mesa_lookup_enum_by_nr(target),
+       level, xoffset, yoffset, width, height);
+
+   intel_flush(ctx);
+
+   if (compressed)
+      pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize,
+                                                      pixels, packing,
+                                                      "glCompressedTexImage");
+   else
+      pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
+                                           format, type, pixels, packing,
+                                           "glTexSubImage");
+   if (!pixels)
+      return;
+
+   intel_prepare_render(intel);
+
+   /* Map buffer if necessary.  Need to lock to prevent other contexts
+    * from uploading the buffer under us.
+    */
+   if (intelImage->mt) {
+      dst_bo = intel_region_buffer(intel, intelImage->mt->region,
+				   INTEL_WRITE_PART);
+
+      if (!compressed &&
+	  intelImage->mt->region->tiling != I915_TILING_Y &&
+	  intel->gen < 6 && target == GL_TEXTURE_2D &&
+	  drm_intel_bo_busy(dst_bo))
+      {
+	 unsigned long pitch;
+	 uint32_t tiling_mode = I915_TILING_NONE;
+	 temp_bo = drm_intel_bo_alloc_tiled(intel->bufmgr,
+					    "subimage blit bo",
+					    width, height,
+					    intelImage->mt->cpp,
+					    &tiling_mode,
+					    &pitch,
+					    0);
+	 drm_intel_gem_bo_map_gtt(temp_bo);
+	 texImage->Data = temp_bo->virtual;
+	 texImage->ImageOffsets[0] = 0;
+	 dstRowStride = pitch;
+
+	 intel_miptree_get_image_offset(intelImage->mt, level,
+					intelImage->face, 0,
+					&blit_x, &blit_y);
+	 blit_x += xoffset;
+	 blit_y += yoffset;
+	 xoffset = 0;
+	 yoffset = 0;
+      } else {
+	 texImage->Data = intel_miptree_image_map(intel,
+						  intelImage->mt,
+						  intelImage->face,
+						  intelImage->level,
+						  &dstRowStride,
+						  texImage->ImageOffsets);
+      }
+   } else {
+      if (_mesa_is_format_compressed(texImage->TexFormat)) {
+         dstRowStride =
+            _mesa_format_row_stride(texImage->TexFormat, width);
+         assert(dims != 3);
+      }
+      else {
+         dstRowStride = texImage->RowStride * _mesa_get_format_bytes(texImage->TexFormat);
+      }
+   }
+
+   assert(dstRowStride);
+
+   if (compressed) {
+      if (intelImage->mt) {
+         struct intel_region *dst = intelImage->mt->region;
+         
+         _mesa_copy_rect(texImage->Data, dst->cpp, dst->pitch,
+                         xoffset, yoffset / 4,
+                         (width + 3)  & ~3, (height + 3) / 4,
+                         pixels, (width + 3) & ~3, 0, 0);
+      }
+      else {
+        memcpy(texImage->Data, pixels, imageSize);
+      }
+   }
+   else {
+      if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat,
+                          texImage->TexFormat,
+                          texImage->Data,
+                          xoffset, yoffset, zoffset,
+                          dstRowStride,
+                          texImage->ImageOffsets,
+                          width, height, depth,
+                          format, type, pixels, packing)) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage");
+      }
+
+      if (temp_bo) {
+	 GLboolean ret;
+	 unsigned int dst_pitch = intelImage->mt->region->pitch *
+	    intelImage->mt->cpp;
+
+	 drm_intel_gem_bo_unmap_gtt(temp_bo);
+	 texImage->Data = NULL;
+
+	 ret = intelEmitCopyBlit(intel,
+				 intelImage->mt->cpp,
+				 dstRowStride / intelImage->mt->cpp,
+				 temp_bo, 0, GL_FALSE,
+				 dst_pitch / intelImage->mt->cpp, dst_bo, 0,
+				 intelImage->mt->region->tiling,
+				 0, 0, blit_x, blit_y, width, height,
+				 GL_COPY);
+	 assert(ret);
+      }
+   }
+
+   _mesa_unmap_teximage_pbo(ctx, packing);
+
+   if (temp_bo) {
+      drm_intel_bo_unreference(temp_bo);
+      temp_bo = NULL;
+   } else if (intelImage->mt) {
+      intel_miptree_image_unmap(intel, intelImage->mt);
+      texImage->Data = NULL;
+   }
+}
+
+
+static void
+intelTexSubImage3D(GLcontext * ctx,
+                   GLenum target,
+                   GLint level,
+                   GLint xoffset, GLint yoffset, GLint zoffset,
+                   GLsizei width, GLsizei height, GLsizei depth,
+                   GLenum format, GLenum type,
+                   const GLvoid * pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage)
+{
+   intelTexSubimage(ctx, 3,
+                    target, level,
+                    xoffset, yoffset, zoffset,
+                    width, height, depth, 0,
+                    format, type, pixels, packing, texObj, texImage, GL_FALSE);
+}
+
+
+static void
+intelTexSubImage2D(GLcontext * ctx,
+                   GLenum target,
+                   GLint level,
+                   GLint xoffset, GLint yoffset,
+                   GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const GLvoid * pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage)
+{
+   intelTexSubimage(ctx, 2,
+                    target, level,
+                    xoffset, yoffset, 0,
+                    width, height, 1, 0,
+                    format, type, pixels, packing, texObj, texImage, GL_FALSE);
+}
+
+
+static void
+intelTexSubImage1D(GLcontext * ctx,
+                   GLenum target,
+                   GLint level,
+                   GLint xoffset,
+                   GLsizei width,
+                   GLenum format, GLenum type,
+                   const GLvoid * pixels,
+                   const struct gl_pixelstore_attrib *packing,
+                   struct gl_texture_object *texObj,
+                   struct gl_texture_image *texImage)
+{
+   intelTexSubimage(ctx, 1,
+                    target, level,
+                    xoffset, 0, 0,
+                    width, 1, 1, 0,
+                    format, type, pixels, packing, texObj, texImage, GL_FALSE);
+}
+
+static void
+intelCompressedTexSubImage2D(GLcontext * ctx,
+			     GLenum target,
+			     GLint level,
+			     GLint xoffset, GLint yoffset,
+			     GLsizei width, GLsizei height,
+			     GLenum format, GLsizei imageSize,
+			     const GLvoid * pixels,
+			     struct gl_texture_object *texObj,
+			     struct gl_texture_image *texImage)
+{
+   intelTexSubimage(ctx, 2,
+                    target, level,
+                    xoffset, yoffset, 0,
+                    width, height, 1, imageSize,
+                    format, 0, pixels, &ctx->Unpack, texObj, texImage, GL_TRUE);
+}
+
+
+
+void
+intelInitTextureSubImageFuncs(struct dd_function_table *functions)
+{
+   functions->TexSubImage1D = intelTexSubImage1D;
+   functions->TexSubImage2D = intelTexSubImage2D;
+   functions->TexSubImage3D = intelTexSubImage3D;
+   functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D;
+}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
new file mode 100644
index 0000000000..ed5c5d896b
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -0,0 +1,304 @@
+#include "main/mtypes.h"
+#include "main/macros.h"
+
+#include "intel_context.h"
+#include "intel_mipmap_tree.h"
+#include "intel_tex.h"
+
+#define FILE_DEBUG_FLAG DEBUG_TEXTURE
+
+/**
+ * Compute which mipmap levels that really need to be sent to the hardware.
+ * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+ * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+ */
+static void
+intel_calculate_first_last_level(struct intel_context *intel,
+				 struct intel_texture_object *intelObj)
+{
+   struct gl_texture_object *tObj = &intelObj->base;
+   const struct gl_texture_image *const baseImage =
+      tObj->Image[0][tObj->BaseLevel];
+
+   /* These must be signed values.  MinLod and MaxLod can be negative numbers,
+    * and having firstLevel and lastLevel as signed prevents the need for
+    * extra sign checks.
+    */
+   int firstLevel;
+   int lastLevel;
+
+   /* Yes, this looks overly complicated, but it's all needed.
+    */
+   switch (tObj->Target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+   case GL_TEXTURE_CUBE_MAP:
+      if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+         /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+          */
+         firstLevel = lastLevel = tObj->BaseLevel;
+      }
+      else {
+	 if (intel->gen == 2) {
+	    firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5);
+	    firstLevel = MAX2(firstLevel, tObj->BaseLevel);
+	    firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2);
+	    lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5);
+	    lastLevel = MAX2(lastLevel, tObj->BaseLevel);
+	    lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2);
+	    lastLevel = MIN2(lastLevel, tObj->MaxLevel);
+	    lastLevel = MAX2(firstLevel, lastLevel);       /* need at least one level */
+	 } else {
+	    /* Min/max LOD are taken into account in sampler state.  We don't
+	     * want to re-layout textures just because clamping has been applied
+	     * since it means a bunch of blitting around and probably no memory
+	     * savings (since we have to keep the other levels around anyway).
+	     */
+	    firstLevel = tObj->BaseLevel;
+	    lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2,
+			     tObj->MaxLevel);
+	    /* need at least one level */
+	    lastLevel = MAX2(firstLevel, lastLevel);
+	 }
+      }
+      break;
+   case GL_TEXTURE_RECTANGLE_NV:
+   case GL_TEXTURE_4D_SGIS:
+      firstLevel = lastLevel = 0;
+      break;
+   default:
+      return;
+   }
+
+   /* save these values */
+   intelObj->firstLevel = firstLevel;
+   intelObj->lastLevel = lastLevel;
+}
+
+/**
+ * Copies the image's contents at its level into the object's miptree,
+ * and updates the image to point at the object's miptree.
+ */
+static void
+copy_image_data_to_tree(struct intel_context *intel,
+                        struct intel_texture_object *intelObj,
+                        struct intel_texture_image *intelImage)
+{
+   if (intelImage->mt) {
+      /* Copy potentially with the blitter:
+       */
+      intel_miptree_image_copy(intel,
+                               intelObj->mt,
+                               intelImage->face,
+                               intelImage->level, intelImage->mt);
+
+      intel_miptree_release(intel, &intelImage->mt);
+   }
+   else {
+      assert(intelImage->base.Data != NULL);
+
+      /* More straightforward upload.  
+       */
+      intel_miptree_image_data(intel,
+                               intelObj->mt,
+                               intelImage->face,
+                               intelImage->level,
+                               intelImage->base.Data,
+                               intelImage->base.RowStride,
+                               intelImage->base.RowStride *
+                               intelImage->base.Height);
+      _mesa_align_free(intelImage->base.Data);
+      intelImage->base.Data = NULL;
+   }
+
+   intel_miptree_reference(&intelImage->mt, intelObj->mt);
+}
+
+
+/*  
+ */
+GLuint
+intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
+{
+   struct gl_texture_object *tObj = intel->ctx.Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   int comp_byte = 0;
+   int cpp;
+   GLuint face, i;
+   GLuint nr_faces = 0;
+   struct intel_texture_image *firstImage;
+
+   /* We know/require this is true by now: 
+    */
+   assert(intelObj->base._Complete);
+
+   /* What levels must the tree include at a minimum?
+    */
+   intel_calculate_first_last_level(intel, intelObj);
+   firstImage = intel_texture_image(tObj->Image[0][intelObj->firstLevel]);
+
+   /* Fallback case:
+    */
+   if (firstImage->base.Border) {
+      if (intelObj->mt) {
+         intel_miptree_release(intel, &intelObj->mt);
+      }
+      return GL_FALSE;
+   }
+
+
+   /* If both firstImage and intelObj have a tree which can contain
+    * all active images, favour firstImage.  Note that because of the
+    * completeness requirement, we know that the image dimensions
+    * will match.
+    */
+   if (firstImage->mt &&
+       firstImage->mt != intelObj->mt &&
+       firstImage->mt->first_level <= intelObj->firstLevel &&
+       firstImage->mt->last_level >= intelObj->lastLevel) {
+
+      if (intelObj->mt)
+         intel_miptree_release(intel, &intelObj->mt);
+
+      intel_miptree_reference(&intelObj->mt, firstImage->mt);
+   }
+
+   if (_mesa_is_format_compressed(firstImage->base.TexFormat)) {
+      comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat);
+      cpp = comp_byte;
+   }
+   else
+      cpp = _mesa_get_format_bytes(firstImage->base.TexFormat);
+
+   /* Check tree can hold all active levels.  Check tree matches
+    * target, imageFormat, etc.
+    * 
+    * XXX: For some layouts (eg i945?), the test might have to be
+    * first_level == firstLevel, as the tree isn't valid except at the
+    * original start level.  Hope to get around this by
+    * programming minLod, maxLod, baseLevel into the hardware and
+    * leaving the tree alone.
+    */
+   if (intelObj->mt &&
+       (intelObj->mt->target != intelObj->base.Target ||
+	intelObj->mt->internal_format != firstImage->base.InternalFormat ||
+	intelObj->mt->first_level != intelObj->firstLevel ||
+	intelObj->mt->last_level != intelObj->lastLevel ||
+	intelObj->mt->width0 != firstImage->base.Width ||
+	intelObj->mt->height0 != firstImage->base.Height ||
+	intelObj->mt->depth0 != firstImage->base.Depth ||
+	intelObj->mt->cpp != cpp ||
+	intelObj->mt->compressed != _mesa_is_format_compressed(firstImage->base.TexFormat))) {
+      intel_miptree_release(intel, &intelObj->mt);
+   }
+
+
+   /* May need to create a new tree:
+    */
+   if (!intelObj->mt) {
+      intelObj->mt = intel_miptree_create(intel,
+                                          intelObj->base.Target,
+                                          firstImage->base._BaseFormat,
+                                          firstImage->base.InternalFormat,
+                                          intelObj->firstLevel,
+                                          intelObj->lastLevel,
+                                          firstImage->base.Width,
+                                          firstImage->base.Height,
+                                          firstImage->base.Depth,
+                                          cpp,
+                                          comp_byte,
+					  GL_TRUE);
+   }
+
+   /* Pull in any images not in the object's tree:
+    */
+   nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   for (face = 0; face < nr_faces; face++) {
+      for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) {
+         struct intel_texture_image *intelImage =
+            intel_texture_image(intelObj->base.Image[face][i]);
+
+         /* Need to import images in main memory or held in other trees.
+	  * If it's a render target, then its data isn't needed to be in
+	  * the object tree (otherwise we'd be FBO incomplete), and we need
+	  * to keep track of the image's MT as needing to be pulled in still,
+	  * or we'll lose the rendering that's done to it.
+          */
+         if (intelObj->mt != intelImage->mt &&
+	     !intelImage->used_as_render_target) {
+            copy_image_data_to_tree(intel, intelObj, intelImage);
+         }
+      }
+   }
+
+   return GL_TRUE;
+}
+
+void
+intel_tex_map_level_images(struct intel_context *intel,
+			   struct intel_texture_object *intelObj,
+			   int level)
+{
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   GLuint face;
+
+   for (face = 0; face < nr_faces; face++) {
+      struct intel_texture_image *intelImage =
+	 intel_texture_image(intelObj->base.Image[face][level]);
+
+      if (intelImage && intelImage->mt) {
+	 intelImage->base.Data =
+	    intel_miptree_image_map(intel,
+				    intelImage->mt,
+				    intelImage->face,
+				    intelImage->level,
+				    &intelImage->base.RowStride,
+				    intelImage->base.ImageOffsets);
+	 /* convert stride to texels, not bytes */
+	 intelImage->base.RowStride /= intelImage->mt->cpp;
+	 /* intelImage->base.ImageStride /= intelImage->mt->cpp; */
+      }
+   }
+}
+
+void
+intel_tex_unmap_level_images(struct intel_context *intel,
+			     struct intel_texture_object *intelObj,
+			     int level)
+{
+   GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+   GLuint face;
+
+   for (face = 0; face < nr_faces; face++) {
+      struct intel_texture_image *intelImage =
+	 intel_texture_image(intelObj->base.Image[face][level]);
+
+      if (intelImage && intelImage->mt) {
+	 intel_miptree_image_unmap(intel, intelImage->mt);
+	 intelImage->base.Data = NULL;
+      }
+   }
+}
+
+void
+intel_tex_map_images(struct intel_context *intel,
+                     struct intel_texture_object *intelObj)
+{
+   int i;
+
+   DBG("%s\n", __FUNCTION__);
+
+   for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+      intel_tex_map_level_images(intel, intelObj, i);
+}
+
+void
+intel_tex_unmap_images(struct intel_context *intel,
+                       struct intel_texture_object *intelObj)
+{
+   int i;
+
+   for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++)
+      intel_tex_unmap_level_images(intel, intelObj, i);
+}
diff --git a/src/mesa/drivers/dri/intel/server/i830_dri.h b/src/mesa/drivers/dri/intel/server/i830_dri.h
new file mode 100644
index 0000000000..def049e7a6
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/server/i830_dri.h
@@ -0,0 +1,62 @@
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */
+
+#ifndef _I830_DRI_H
+#define _I830_DRI_H
+
+#include "xf86drm.h"
+
+#define I830_MAX_DRAWABLES 256
+
+#define I830_MAJOR_VERSION 1
+#define I830_MINOR_VERSION 9
+#define I830_PATCHLEVEL 0
+
+#define I830_REG_SIZE 0x80000
+
+typedef struct _I830DRIRec {
+   drm_handle_t regs;
+   drmSize regsSize;
+
+   drmSize unused1; /* backbufferSize */
+   drm_handle_t unused2; /* backbuffer */
+
+   drmSize unused3; /* depthbufferSize */
+   drm_handle_t unused4; /* depthbuffer */
+
+   drmSize unused5; /* rotatedSize */
+   drm_handle_t unused6; /* rotatedbuffer */
+
+   drm_handle_t unused7; /* textures */
+   int unused8; /* textureSize */
+
+   drm_handle_t unused9; /* agp_buffers */
+   drmSize unused10; /* agp_buf_size */
+
+   int deviceID;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+   int bitsPerPixel;
+
+   int unused11[8]; /* was front/back/depth/rotated offset/pitch */
+
+   int unused12; /* logTextureGranularity */
+   int unused13; /* textureOffset */
+
+   int irq;
+   int sarea_priv_offset;
+} I830DRIRec, *I830DRIPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I830ConfigPrivRec, *I830ConfigPrivPtr;
+
+typedef struct {
+   /* Nothing here yet */
+   int dummy;
+} I830DRIContextRec, *I830DRIContextPtr;
+
+
+#endif
diff --git a/src/mesa/drivers/dri/intel/server/intel.h b/src/mesa/drivers/dri/intel/server/intel.h
new file mode 100644
index 0000000000..6ea72499c1
--- /dev/null
+++ b/src/mesa/drivers/dri/intel/server/intel.h
@@ -0,0 +1,331 @@
+#ifndef _INTEL_H_
+#define _INTEL_H_
+
+#include "xf86drm.h"		/* drm_handle_t, etc */
+
+/* Intel */
+#ifndef PCI_CHIP_I810
+#define PCI_CHIP_I810              0x7121
+#define PCI_CHIP_I810_DC100        0x7123
+#define PCI_CHIP_I810_E            0x7125
+#define PCI_CHIP_I815              0x1132
+#define PCI_CHIP_I810_BRIDGE       0x7120
+#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
+#define PCI_CHIP_I810_E_BRIDGE     0x7124
+#define PCI_CHIP_I815_BRIDGE       0x1130
+#endif
+
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I830_M			0x3577
+
+#ifndef PCI_CHIP_I855_GM
+#define PCI_CHIP_I855_GM	   0x3582
+#define PCI_CHIP_I855_GM_BRIDGE	   0x3580
+#endif
+
+#ifndef PCI_CHIP_I865_G
+#define PCI_CHIP_I865_G		   0x2572
+#define PCI_CHIP_I865_G_BRIDGE	   0x2570
+#endif
+
+#ifndef PCI_CHIP_I915_G
+#define PCI_CHIP_I915_G		   0x2582
+#define PCI_CHIP_I915_G_BRIDGE	   0x2580
+#endif
+
+#ifndef PCI_CHIP_I915_GM
+#define PCI_CHIP_I915_GM	   0x2592
+#define PCI_CHIP_I915_GM_BRIDGE	   0x2590
+#endif
+
+#ifndef PCI_CHIP_E7221_G
+#define PCI_CHIP_E7221_G	   0x258A
+/* Same as I915_G_BRIDGE */
+#define PCI_CHIP_E7221_G_BRIDGE	   0x2580
+#endif
+
+#ifndef PCI_CHIP_I945_G
+#define PCI_CHIP_I945_G        0x2772
+#define PCI_CHIP_I945_G_BRIDGE 0x2770
+#endif
+
+#ifndef PCI_CHIP_I945_GM
+#define PCI_CHIP_I945_GM        0x27A2
+#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
+#endif
+
+#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 ||	\
+			pI810->Chipset == PCI_CHIP_I810_DC100 || \
+			pI810->Chipset == PCI_CHIP_I810_E)
+#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815)
+#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M)
+#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G)
+#define IS_I85X(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM)
+#define IS_I852(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME))
+#define IS_I855(pI810)  (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME))
+#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G)
+
+#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G)
+#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM)
+#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G)
+#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM)
+#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810))
+
+#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810))
+
+#define I830_GMCH_CTRL		0x52
+
+#define I830_GMCH_MEM_MASK      0x1
+#define I830_GMCH_MEM_64M       0x1
+#define I830_GMCH_MEM_128M      0
+
+#define I830_GMCH_GMS_MASK			0x70
+#define I830_GMCH_GMS_DISABLED		0x00
+#define I830_GMCH_GMS_LOCAL			0x10
+#define I830_GMCH_GMS_STOLEN_512	0x20
+#define I830_GMCH_GMS_STOLEN_1024	0x30
+#define I830_GMCH_GMS_STOLEN_8192	0x40
+
+#define I855_GMCH_GMS_MASK			(0x7 << 4)
+#define I855_GMCH_GMS_DISABLED			0x00
+#define I855_GMCH_GMS_STOLEN_1M			(0x1 << 4)
+#define I855_GMCH_GMS_STOLEN_4M			(0x2 << 4)
+#define I855_GMCH_GMS_STOLEN_8M			(0x3 << 4)
+#define I855_GMCH_GMS_STOLEN_16M		(0x4 << 4)
+#define I855_GMCH_GMS_STOLEN_32M		(0x5 << 4)
+#define I915G_GMCH_GMS_STOLEN_48M		(0x6 << 4)
+#define I915G_GMCH_GMS_STOLEN_64M		(0x7 << 4)
+
+typedef unsigned char Bool;
+#define TRUE 1
+#define FALSE 0
+
+#define PIPE_NONE	0<<0
+#define PIPE_CRT	1<<0
+#define PIPE_TV		1<<1
+#define PIPE_DFP	1<<2
+#define PIPE_LFP	1<<3
+#define PIPE_CRT2	1<<4
+#define PIPE_TV2	1<<5
+#define PIPE_DFP2	1<<6
+#define PIPE_LFP2	1<<7
+
+typedef struct _I830MemPool *I830MemPoolPtr;
+typedef struct _I830MemRange *I830MemRangePtr;
+typedef struct _I830MemRange {
+   long Start;
+   long End;
+   long Size;
+   unsigned long Physical;
+   unsigned long Offset;		/* Offset of AGP-allocated portion */
+   unsigned long Alignment;
+   drm_handle_t Key;
+   unsigned long Pitch; // add pitch
+   I830MemPoolPtr Pool;
+} I830MemRange;
+
+typedef struct _I830MemPool {
+   I830MemRange Total;
+   I830MemRange Free;
+   I830MemRange Fixed;
+   I830MemRange Allocated;
+} I830MemPool;
+
+typedef struct {
+   int tail_mask;
+   I830MemRange mem;
+   unsigned char *virtual_start;
+   int head;
+   int tail;
+   int space;
+} I830RingBuffer;
+
+typedef struct _I830Rec {
+   unsigned char *MMIOBase;
+   unsigned char *FbBase;
+   int cpp;
+   uint32_t aper_size;
+   unsigned int bios_version;
+
+   /* These are set in PreInit and never changed. */
+   long FbMapSize;
+   long TotalVideoRam;
+   I830MemRange StolenMemory;		/* pre-allocated memory */
+   long BIOSMemorySize;			/* min stolen pool size */
+   int BIOSMemSizeLoc;
+
+   /* These change according to what has been allocated. */
+   long FreeMemory;
+   I830MemRange MemoryAperture;
+   I830MemPool StolenPool;
+   long allocatedMemory;
+
+   /* Regions allocated either from the above pools, or from agpgart. */
+   /* for single and dual head configurations */
+   I830MemRange FrontBuffer;
+   I830MemRange FrontBuffer2;
+   I830MemRange Scratch;
+   I830MemRange Scratch2;
+
+   I830RingBuffer *LpRing;
+
+   I830MemRange BackBuffer;
+   I830MemRange DepthBuffer;
+   I830MemRange TexMem;
+   int TexGranularity;
+   I830MemRange ContextMem;
+   int drmMinor;
+   Bool have3DWindows;
+
+   Bool NeedRingBufferLow;
+   Bool allowPageFlip;
+   Bool disableTiling;
+
+   int Chipset;
+   unsigned long LinearAddr;
+   unsigned long MMIOAddr;
+
+   drmSize           registerSize;     /**< \brief MMIO register map size */
+   drm_handle_t         registerHandle;   /**< \brief MMIO register map handle */
+  //   IOADDRESS ioBase;
+   int               irq;              /**< \brief IRQ number */
+   int GttBound;
+
+   drm_handle_t ring_map;
+   unsigned int Fence[8];
+
+} I830Rec;
+
+/*
+ * 12288 is set as the maximum, chosen because it is enough for
+ * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare.
+ */
+#define I830_MAXIMUM_VBIOS_MEM		12288
+#define I830_DEFAULT_VIDEOMEM_2D	(MB(32) / 1024)
+#define I830_DEFAULT_VIDEOMEM_3D	(MB(64) / 1024)
+
+/* Flags for memory allocation function */
+#define FROM_ANYWHERE			0x00000000
+#define FROM_POOL_ONLY			0x00000001
+#define FROM_NEW_ONLY			0x00000002
+#define FROM_MASK			0x0000000f
+
+#define ALLOCATE_AT_TOP			0x00000010
+#define ALLOCATE_AT_BOTTOM		0x00000020
+#define FORCE_GAPS			0x00000040
+
+#define NEED_PHYSICAL_ADDR		0x00000100
+#define ALIGN_BOTH_ENDS			0x00000200
+#define FORCE_LOW			0x00000400
+
+#define ALLOC_NO_TILING			0x00001000
+#define ALLOC_INITIAL			0x00002000
+
+#define ALLOCATE_DRY_RUN		0x80000000
+
+/* Chipset registers for VIDEO BIOS memory RW access */
+#define _855_DRAM_RW_CONTROL 0x58
+#define _845_DRAM_RW_CONTROL 0x90
+#define DRAM_WRITE    0x33330000
+
+#define KB(x) ((x) * 1024)
+#define MB(x) ((x) * KB(1024))
+
+#define GTT_PAGE_SIZE			KB(4)
+#define ROUND_TO(x, y)			(((x) + (y) - 1) / (y) * (y))
+#define ROUND_DOWN_TO(x, y)		((x) / (y) * (y))
+#define ROUND_TO_PAGE(x)		ROUND_TO((x), GTT_PAGE_SIZE)
+#define ROUND_TO_MB(x)			ROUND_TO((x), MB(1))
+#define PRIMARY_RINGBUFFER_SIZE		KB(128)
+
+
+/* Ring buffer registers, p277, overview p19
+ */
+#define LP_RING     0x2030
+#define HP_RING     0x2040
+
+#define RING_TAIL      0x00
+#define TAIL_ADDR           0x000FFFF8
+#define I830_TAIL_MASK	    0x001FFFF8
+
+#define RING_HEAD      0x04
+#define HEAD_WRAP_COUNT     0xFFE00000
+#define HEAD_WRAP_ONE       0x00200000
+#define HEAD_ADDR           0x001FFFFC
+#define I830_HEAD_MASK      0x001FFFFC
+
+#define RING_START     0x08
+#define START_ADDR          0x03FFFFF8
+#define I830_RING_START_MASK	0xFFFFF000
+
+#define RING_LEN       0x0C
+#define RING_NR_PAGES       0x001FF000 
+#define I830_RING_NR_PAGES	0x001FF000
+#define RING_REPORT_MASK    0x00000006
+#define RING_REPORT_64K     0x00000002
+#define RING_REPORT_128K    0x00000004
+#define RING_NO_REPORT      0x00000000
+#define RING_VALID_MASK     0x00000001
+#define RING_VALID          0x00000001
+#define RING_INVALID        0x00000000
+
+
+/* Fence/Tiling ranges [0..7]
+ */
+#define FENCE            0x2000
+#define FENCE_NR         8
+
+#define I915G_FENCE_START_MASK	0x0ff00000
+
+#define I830_FENCE_START_MASK	0x07f80000
+
+#define FENCE_START_MASK    0x03F80000
+#define FENCE_X_MAJOR       0x00000000
+#define FENCE_Y_MAJOR       0x00001000
+#define FENCE_SIZE_MASK     0x00000700
+#define FENCE_SIZE_512K     0x00000000
+#define FENCE_SIZE_1M       0x00000100
+#define FENCE_SIZE_2M       0x00000200
+#define FENCE_SIZE_4M       0x00000300
+#define FENCE_SIZE_8M       0x00000400
+#define FENCE_SIZE_16M      0x00000500
+#define FENCE_SIZE_32M      0x00000600
+#define FENCE_SIZE_64M	    0x00000700
+#define I915G_FENCE_SIZE_1M       0x00000000
+#define I915G_FENCE_SIZE_2M       0x00000100
+#define I915G_FENCE_SIZE_4M       0x00000200
+#define I915G_FENCE_SIZE_8M       0x00000300
+#define I915G_FENCE_SIZE_16M      0x00000400
+#define I915G_FENCE_SIZE_32M      0x00000500
+#define I915G_FENCE_SIZE_64M	0x00000600
+#define I915G_FENCE_SIZE_128M	0x00000700
+#define FENCE_PITCH_1       0x00000000
+#define FENCE_PITCH_2       0x00000010
+#define FENCE_PITCH_4       0x00000020
+#define FENCE_PITCH_8       0x00000030
+#define FENCE_PITCH_16      0x00000040
+#define FENCE_PITCH_32      0x00000050
+#define FENCE_PITCH_64	    0x00000060
+#define FENCE_VALID         0x00000001
+
+#include <mmio.h>
+
+#  define MMIO_IN8(base, offset) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN32(base, offset) \
+	read_MMIO_LE32(base, offset)
+#  define MMIO_OUT8(base, offset, val) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT32(base, offset, val) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
+
+
+				/* Memory mapped register access macros */
+#define INREG8(addr)        MMIO_IN8(MMIO, addr)
+#define INREG(addr)         MMIO_IN32(MMIO, addr)
+#define OUTREG8(addr, val)  MMIO_OUT8(MMIO, addr, val)
+#define OUTREG(addr, val)   MMIO_OUT32(MMIO, addr, val)
+
+#define DSPABASE		0x70184
+
+#endif
diff --git a/src/mesa/drivers/dri/mach64/Makefile b/src/mesa/drivers/dri/mach64/Makefile
new file mode 100644
index 0000000000..c20fdece29
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/Makefile
@@ -0,0 +1,29 @@
+# src/mesa/drivers/dri/mach64/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = mach64_dri.so
+
+DRIVER_SOURCES = \
+	mach64_context.c \
+	mach64_ioctl.c \
+	mach64_screen.c \
+	mach64_span.c \
+	mach64_state.c \
+	mach64_tex.c \
+	mach64_texmem.c \
+	mach64_texstate.c \
+	mach64_tris.c \
+	mach64_vb.c \
+	mach64_dd.c \
+	mach64_lock.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/mach64/mach64_context.c b/src/mesa/drivers/dri/mach64/mach64_context.c
new file mode 100644
index 0000000000..72a44d9642
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_context.c
@@ -0,0 +1,357 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_dd.h"
+#include "mach64_span.h"
+#include "mach64_state.h"
+#include "mach64_tex.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#ifndef MACH64_DEBUG
+int MACH64_DEBUG = (0);
+#endif
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "sync",   DEBUG_ALWAYS_SYNC },
+    { "api",    DEBUG_VERBOSE_API },
+    { "msg",    DEBUG_VERBOSE_MSG },
+    { "lru",    DEBUG_VERBOSE_LRU },
+    { "dri",    DEBUG_VERBOSE_DRI },
+    { "ioctl",  DEBUG_VERBOSE_IOCTL },
+    { "prims",  DEBUG_VERBOSE_PRIMS },
+    { "count",  DEBUG_VERBOSE_COUNT },
+    { "nowait", DEBUG_NOWAIT },
+    { "fall",   DEBUG_VERBOSE_FALLBACK },
+    { NULL,    0 }
+};
+
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+
+/* Create the device specific context.
+  */
+GLboolean mach64CreateContext( gl_api api,
+			       const __GLcontextModes *glVisual,
+			       __DRIcontext *driContextPriv,
+                               void *sharedContextPrivate )
+{
+   GLcontext *ctx, *shareCtx;
+   __DRIscreen *driScreen = driContextPriv->driScreenPriv;
+   struct dd_function_table functions;
+   mach64ContextPtr mmesa;
+   mach64ScreenPtr mach64Screen;
+   int i, heap;
+   GLuint *c_textureSwapsPtr = NULL;
+
+#if DO_DEBUG
+   MACH64_DEBUG = driParseDebugString(getenv("MACH64_DEBUG"), debug_control);
+#endif
+
+   /* Allocate the mach64 context */
+   mmesa = (mach64ContextPtr) CALLOC( sizeof(*mmesa) );
+   if ( !mmesa ) 
+      return GL_FALSE;
+
+   /* Init default driver functions then plug in our Mach64-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   mach64InitDriverFuncs( &functions );
+   mach64InitIoctlFuncs( &functions );
+   mach64InitTextureFuncs( &functions );
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((mach64ContextPtr) sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+   mmesa->glCtx = _mesa_create_context(glVisual, shareCtx, 
+					&functions, (void *)mmesa);
+   if (!mmesa->glCtx) {
+      FREE(mmesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = mmesa;
+   ctx = mmesa->glCtx;
+
+   mmesa->driContext = driContextPriv;
+   mmesa->driScreen = driScreen;
+   mmesa->driDrawable = NULL;
+   mmesa->hHWContext = driContextPriv->hHWContext;
+   mmesa->driHwLock = &driScreen->pSAREA->lock;
+   mmesa->driFd = driScreen->fd;
+
+   mach64Screen = mmesa->mach64Screen = (mach64ScreenPtr)driScreen->private;
+
+   /* Parse configuration files */
+   driParseConfigFiles (&mmesa->optionCache, &mach64Screen->optionCache,
+                        mach64Screen->driScreen->myNum, "mach64");
+
+   mmesa->sarea = (drm_mach64_sarea_t *)((char *)driScreen->pSAREA +
+				    sizeof(drm_sarea_t));
+
+   mmesa->CurrentTexObj[0] = NULL;
+   mmesa->CurrentTexObj[1] = NULL;
+
+   (void) memset( mmesa->texture_heaps, 0, sizeof( mmesa->texture_heaps ) );
+   make_empty_list( &mmesa->swapped );
+
+   mmesa->firstTexHeap = mach64Screen->firstTexHeap;
+   mmesa->lastTexHeap = mach64Screen->firstTexHeap + mach64Screen->numTexHeaps;
+
+   for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+      mmesa->texture_heaps[i] = driCreateTextureHeap( i, mmesa,
+	    mach64Screen->texSize[i],
+	    6, /* align to 64-byte boundary, use 12 for page-size boundary */
+	    MACH64_NR_TEX_REGIONS,
+	    (drmTextureRegionPtr)mmesa->sarea->tex_list[i],
+	    &mmesa->sarea->tex_age[i],
+	    &mmesa->swapped,
+	    sizeof( mach64TexObj ),
+	    (destroy_texture_object_t *) mach64DestroyTexObj );
+
+#if ENABLE_PERF_BOXES
+      c_textureSwapsPtr = & mmesa->c_textureSwaps;
+#endif
+      driSetTextureSwapCounterLocation( mmesa->texture_heaps[i],
+					c_textureSwapsPtr );
+   }
+
+   mmesa->RenderIndex = -1;		/* Impossible value */
+   mmesa->vert_buf = NULL;
+   mmesa->num_verts = 0;
+   mmesa->new_state = MACH64_NEW_ALL;
+   mmesa->dirty = MACH64_UPLOAD_ALL;
+
+   /* Set the maximum texture size small enough that we can
+    * guarentee that both texture units can bind a maximal texture
+    * and have them both in memory (on-card or AGP) at once.
+    * Test for 2 textures * bytes/texel * size * size.  There's no
+    * need to account for mipmaps since we only upload one level.
+    */
+
+   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxTextureImageUnits = 2;
+   ctx->Const.MaxTextureCoordUnits = 2;
+   ctx->Const.MaxDrawBuffers = 1;
+
+   heap = mach64Screen->IsPCI ? MACH64_CARD_HEAP : MACH64_AGP_HEAP;
+
+   driCalculateMaxTextureLevels( & mmesa->texture_heaps[heap],
+				 1,
+				 & ctx->Const,
+				 mach64Screen->cpp,
+				 10, /* max 2D texture size is 1024x1024 */
+				 0,  /* 3D textures unsupported. */
+				 0,  /* cube textures unsupported. */
+				 0,  /* texture rectangles unsupported. */
+				 1,  /* mipmapping unsupported. */
+				 GL_TRUE, /* need to have both textures in
+					     either local or AGP memory */
+				 0 );
+
+#if ENABLE_PERF_BOXES
+   mmesa->boxes = ( getenv( "LIBGL_PERFORMANCE_BOXES" ) != NULL );
+#endif
+
+   /* Allocate the vertex buffer
+    */
+   mmesa->vert_buf = _mesa_align_malloc(MACH64_BUFFER_SIZE, 32);
+   if ( !mmesa->vert_buf )
+      return GL_FALSE;
+   mmesa->vert_used = 0;
+   mmesa->vert_total = MACH64_BUFFER_SIZE;
+   
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+/*     _tnl_destroy_pipeline( ctx ); */
+/*     _tnl_install_pipeline( ctx, mach64_pipeline ); */
+
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+
+   mach64InitVB( ctx );
+   mach64InitTriFuncs( ctx );
+   mach64DDInitStateFuncs( ctx );
+   mach64DDInitSpanFuncs( ctx );
+   mach64DDInitState( mmesa );
+
+   mmesa->do_irqs = (mmesa->mach64Screen->irq && !getenv("MACH64_NO_IRQS"));
+
+   driContextPriv->driverPrivate = (void *)mmesa;
+
+   if (driQueryOptionb(&mmesa->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(mmesa, MACH64_FALLBACK_DISABLE, 1);
+   }
+
+   return GL_TRUE;
+}
+
+/* Destroy the device specific context.
+ */
+void mach64DestroyContext( __DRIcontext *driContextPriv  )
+{
+   mach64ContextPtr mmesa = (mach64ContextPtr) driContextPriv->driverPrivate;
+
+   assert(mmesa);  /* should never be null */
+   if ( mmesa ) {
+      GLboolean   release_texture_heaps;
+
+      release_texture_heaps = (mmesa->glCtx->Shared->RefCount == 1);
+
+      _swsetup_DestroyContext( mmesa->glCtx );
+      _tnl_DestroyContext( mmesa->glCtx );
+      _vbo_DestroyContext( mmesa->glCtx );
+      _swrast_DestroyContext( mmesa->glCtx );
+
+      if (release_texture_heaps) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         int i;
+
+         for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+	    driDestroyTextureHeap( mmesa->texture_heaps[i] );
+	    mmesa->texture_heaps[i] = NULL;
+         }
+
+	 assert( is_empty_list( & mmesa->swapped ) );
+      }
+
+      mach64FreeVB( mmesa->glCtx );
+
+      /* Free the vertex buffer */
+      if ( mmesa->vert_buf )
+	 _mesa_align_free( mmesa->vert_buf );
+      
+      /* free the Mesa context */
+      mmesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(mmesa->glCtx);
+
+      FREE( mmesa );
+   }
+}
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean
+mach64MakeCurrent( __DRIcontext *driContextPriv,
+                 __DRIdrawable *driDrawPriv,
+                 __DRIdrawable *driReadPriv )
+{
+   if ( driContextPriv ) {
+      GET_CURRENT_CONTEXT(ctx);
+      mach64ContextPtr oldMach64Ctx = ctx ? MACH64_CONTEXT(ctx) : NULL;
+      mach64ContextPtr newMach64Ctx = (mach64ContextPtr) driContextPriv->driverPrivate;
+
+      if ( newMach64Ctx != oldMach64Ctx ) {
+	 newMach64Ctx->new_state |= MACH64_NEW_CONTEXT;
+	 newMach64Ctx->dirty = MACH64_UPLOAD_ALL;
+      }
+
+      
+      if ( newMach64Ctx->driDrawable != driDrawPriv ) {
+	 if (driDrawPriv->swap_interval == (unsigned)-1) {
+	    driDrawPriv->vblFlags = (newMach64Ctx->do_irqs)
+	       ? driGetDefaultVBlankFlags(&newMach64Ctx->optionCache)
+	       : VBLANK_FLAG_NO_IRQ;
+
+	    driDrawableInitVBlank( driDrawPriv );
+	 }
+
+	 newMach64Ctx->driDrawable = driDrawPriv;
+	 mach64CalcViewport( newMach64Ctx->glCtx );
+      }
+
+      _mesa_make_current( newMach64Ctx->glCtx,
+                          (GLframebuffer *) driDrawPriv->driverPrivate,
+                          (GLframebuffer *) driReadPriv->driverPrivate );
+
+
+      newMach64Ctx->new_state |=  MACH64_NEW_CLIP;
+   } else {
+      _mesa_make_current( NULL, NULL, NULL );
+   }
+
+   return GL_TRUE;
+}
+
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean
+mach64UnbindContext( __DRIcontext *driContextPriv )
+{
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_context.h b/src/mesa/drivers/dri/mach64/mach64_context.h
new file mode 100644
index 0000000000..893fc8daee
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_context.h
@@ -0,0 +1,369 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_CONTEXT_H__
+#define __MACH64_CONTEXT_H__
+
+#include "dri_util.h"
+#include "drm.h"
+#include "mach64_drm.h"
+
+#include "main/mtypes.h"
+
+#include "mach64_reg.h"
+
+#include "texmem.h"
+
+struct mach64_context;
+typedef struct mach64_context mach64ContextRec;
+typedef struct mach64_context *mach64ContextPtr;
+
+#include "mach64_lock.h"
+#include "mach64_screen.h"
+
+/* Experimental driver options */
+#define MACH64_CLIENT_STATE_EMITS       0
+
+/* Performace monitoring */
+#define ENABLE_PERF_BOXES               1
+
+/* Native vertex format */
+#define MACH64_NATIVE_VTXFMT		1
+
+/* Flags for what context state needs to be updated:
+ */
+#define MACH64_NEW_ALPHA		0x0001
+#define MACH64_NEW_DEPTH		0x0002
+#define MACH64_NEW_FOG			0x0004
+#define MACH64_NEW_CLIP			0x0008
+#define MACH64_NEW_CULL			0x0010
+#define MACH64_NEW_MASKS		0x0020
+#define MACH64_NEW_RENDER_UNUSED	0x0040
+#define MACH64_NEW_WINDOW		0x0080
+#define MACH64_NEW_TEXTURE		0x0100
+#define MACH64_NEW_CONTEXT		0x0200
+#define MACH64_NEW_ALL			0x03ff
+
+/* Flags for software fallback cases:
+ */
+#define MACH64_FALLBACK_TEXTURE		0x0001
+#define MACH64_FALLBACK_DRAW_BUFFER	0x0002
+#define MACH64_FALLBACK_READ_BUFFER	0x0004
+#define MACH64_FALLBACK_STENCIL		0x0008
+#define MACH64_FALLBACK_RENDER_MODE	0x0010
+#define MACH64_FALLBACK_LOGICOP		0x0020
+#define MACH64_FALLBACK_SEP_SPECULAR	0x0040
+#define MACH64_FALLBACK_BLEND_EQ	0x0080
+#define MACH64_FALLBACK_BLEND_FUNC	0x0100
+#define MACH64_FALLBACK_DISABLE		0x0200
+
+#define CARD32 GLuint		/* KW: For building in mesa tree */
+
+#if MACH64_NATIVE_VTXFMT
+
+/* The vertex structures.
+ */
+
+/* The size of this union is not of relevence:
+ */
+union mach64_vertex_t {
+   GLfloat f[16];
+   GLuint ui[16];
+   GLushort us2[16][2];
+   GLubyte ub4[16][4];
+};
+
+typedef union mach64_vertex_t mach64Vertex, *mach64VertexPtr;
+
+#else
+
+/* Use the templated vertex format:
+ */
+#define TAG(x) mach64##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#endif /* MACH64_NATIVE_VTXFMT */
+
+/* Subpixel offsets for window coordinates:
+ * These are enough to fix most glean tests except polygonOffset.
+ * There are also still some gaps that show in e.g. the tunnel Mesa demo
+ * or the lament xscreensaver hack.
+ */
+#define SUBPIXEL_X	(0.0125F)
+#define SUBPIXEL_Y	(0.15F)
+
+
+typedef void (*mach64_tri_func)( mach64ContextPtr,
+				   mach64Vertex *,
+				   mach64Vertex *,
+				   mach64Vertex * );
+
+typedef void (*mach64_line_func)( mach64ContextPtr,
+				    mach64Vertex *,
+				    mach64Vertex * );
+
+typedef void (*mach64_point_func)( mach64ContextPtr,
+				     mach64Vertex * );
+
+struct mach64_texture_object {
+   driTextureObject   base;
+
+   GLuint bufAddr;
+
+   GLint heap; /* same as base.heap->heapId */
+
+   /* For communicating values from mach64AllocTexObj(), mach64SetTexImages()
+    * to mach64UpdateTextureUnit(). Alternately, we can use the tObj values or
+    * set the context registers directly.
+    */
+   GLint widthLog2;
+   GLint heightLog2;
+   GLint maxLog2;
+
+   GLint hasAlpha;
+   GLint textureFormat;
+
+   GLboolean BilinearMin;
+   GLboolean BilinearMag;
+   GLboolean ClampS;
+   GLboolean ClampT;
+};
+
+typedef struct mach64_texture_object mach64TexObj, *mach64TexObjPtr;
+
+struct mach64_context {
+   GLcontext *glCtx;
+
+   /* Driver and hardware state management
+    */
+   GLuint new_state;
+   GLuint dirty;			/* Hardware state to be updated */
+   drm_mach64_context_regs_t setup;
+
+   GLuint NewGLState;
+   GLuint Fallback;
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLfloat hw_viewport[16];
+   GLfloat depth_scale;
+   GLuint vertex_size;
+   GLuint vertex_stride_shift;
+   GLuint vertex_format;
+   GLuint num_verts;
+   GLubyte *verts;		
+
+   CARD32 Color;			/* Current draw color */
+   CARD32 ClearColor;			/* Color used to clear color buffer */
+   CARD32 ClearDepth;			/* Value used to clear depth buffer */
+
+   /* Map GL texture units onto hardware
+    */
+   GLint multitex;
+   GLint tmu_source[2];
+   GLint tex_dest[2];
+
+   /* Texture object bookkeeping
+    */
+   mach64TexObjPtr CurrentTexObj[2];
+
+   GLint firstTexHeap, lastTexHeap;
+   driTexHeap *texture_heaps[MACH64_NR_TEX_HEAPS];
+   driTextureObject swapped;
+
+   /* Fallback rasterization functions
+    */
+   mach64_point_func draw_point;
+   mach64_line_func draw_line;
+   mach64_tri_func draw_tri;
+
+   /* Culling */
+   GLfloat backface_sign;
+
+   /* DMA buffers
+    */
+   void *vert_buf;
+   size_t vert_total;
+   unsigned vert_used;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+
+   /* Visual, drawable, cliprect and scissor information
+    */
+   GLint drawOffset, drawPitch;
+   GLint drawX, drawY;                  /* origin of drawable in draw buffer */
+   GLint readOffset, readPitch;
+
+   GLuint numClipRects;			/* Cliprects for the draw buffer */
+   drm_clip_rect_t *pClipRects;
+
+   GLint scissor;
+   drm_clip_rect_t ScissorRect;	/* Current software scissor */
+
+   /* Mirrors of some DRI state
+    */
+   __DRIcontext	*driContext;	/* DRI context */
+   __DRIscreen	*driScreen;	/* DRI screen */
+   __DRIdrawable	*driDrawable;	/* DRI drawable bound to this ctx */
+
+   unsigned int lastStamp;		/* mirror driDrawable->lastStamp */
+
+   drm_context_t hHWContext;
+   drm_hw_lock_t *driHwLock;
+   int driFd;
+
+   mach64ScreenPtr mach64Screen;	/* Screen private DRI data */
+   drm_mach64_sarea_t *sarea;		/* Private SAREA data */
+
+   GLuint hardwareWentIdle;
+
+#if ENABLE_PERF_BOXES
+   /* Performance counters
+    */
+   GLuint boxes;			/* Draw performance boxes */
+   GLuint c_clears;
+   GLuint c_drawWaits;
+   GLuint c_textureSwaps;
+   GLuint c_textureBytes;
+   GLuint c_agpTextureBytes;
+   GLuint c_texsrc_agp;
+   GLuint c_texsrc_card;
+   GLuint c_vertexBuffers;
+#endif
+
+   /* VBI
+    */
+   GLuint do_irqs;
+
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+};
+
+#define MACH64_CONTEXT(ctx)		((mach64ContextPtr)(ctx->DriverCtx))
+
+
+extern GLboolean mach64CreateContext( gl_api api,
+				      const __GLcontextModes *glVisual,
+				      __DRIcontext *driContextPriv,
+                                      void *sharedContextPrivate );
+
+extern void mach64DestroyContext( __DRIcontext * );
+
+extern GLboolean mach64MakeCurrent( __DRIcontext *driContextPriv,
+                                    __DRIdrawable *driDrawPriv,
+                                    __DRIdrawable *driReadPriv );
+
+extern GLboolean mach64UnbindContext( __DRIcontext *driContextPriv );
+
+/* ================================================================
+ * Byte ordering
+ */
+#if MESA_LITTLE_ENDIAN == 1
+#define LE32_IN( x )		( *(GLuint *)(x) )
+#define LE32_IN_FLOAT( x )	( *(GLfloat *)(x) )
+#define LE32_OUT( x, y )	do { *(GLuint *)(x) = (y); } while (0)
+#define LE32_OUT_FLOAT( x, y )	do { *(GLfloat *)(x) = (y); } while (0)
+#else
+#ifndef __OpenBSD__
+#include <byteswap.h>
+#else
+#include <machine/endian.h>
+#define bswap_32 bswap32
+#endif
+
+#define LE32_IN( x )		bswap_32( *(GLuint *)(x) )
+#define LE32_IN_FLOAT( x )						\
+({									\
+   GLuint __tmp = bswap_32( *(GLuint *)(x) );				\
+   *(GLfloat *)&__tmp;							\
+})
+#define LE32_OUT( x, y )	do { *(GLuint *)(x) = bswap_32( y ); } while (0)
+#define LE32_OUT_FLOAT( x, y )						\
+do {									\
+   GLuint __tmp;							\
+   *(GLfloat *)&__tmp = (y);						\
+   *(GLuint *)(x) = bswap_32( __tmp );					\
+} while (0)
+#endif
+
+/* ================================================================
+ * DMA buffers
+ */
+
+#define DMALOCALS       CARD32 *buf=NULL; int requested=0; int outcount=0
+
+/* called while locked for interleaved client-side state emits */
+#define DMAGETPTR( dwords )					\
+do {								\
+   requested = (dwords);					\
+   buf = (CARD32 *)mach64AllocDmaLocked( mmesa, ((dwords)*4) );	\
+   outcount = 0;						\
+} while(0)
+
+#define DMAOUTREG( reg, val )				\
+do {							\
+   LE32_OUT( &buf[outcount++], ADRINDEX( reg ) );	\
+   LE32_OUT( &buf[outcount++], ( val ) );		\
+} while(0)
+
+#define DMAADVANCE()						\
+do {								\
+   if (outcount < requested) {					\
+      mmesa->vert_used -= (requested - outcount) * 4;	\
+   }								\
+} while(0)
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define DO_DEBUG		1
+
+#if DO_DEBUG
+extern int MACH64_DEBUG;
+#else
+#define MACH64_DEBUG		0
+#endif
+
+#define DEBUG_ALWAYS_SYNC	0x001
+#define DEBUG_VERBOSE_API	0x002
+#define DEBUG_VERBOSE_MSG	0x004
+#define DEBUG_VERBOSE_LRU	0x008
+#define DEBUG_VERBOSE_DRI	0x010
+#define DEBUG_VERBOSE_IOCTL	0x020
+#define DEBUG_VERBOSE_PRIMS	0x040
+#define DEBUG_VERBOSE_COUNT	0x080
+#define DEBUG_NOWAIT		0x100
+#define DEBUG_VERBOSE_FALLBACK	0x200
+#endif /* __MACH64_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_dd.c b/src/mesa/drivers/dri/mach64/mach64_dd.c
new file mode 100644
index 0000000000..ca713e2de5
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_dd.c
@@ -0,0 +1,132 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_dd.h"
+
+#include "main/context.h"
+
+#include "utils.h"
+
+#define DRIVER_DATE	"20051019"
+
+/* Return the current color buffer size.
+ */
+static void mach64DDGetBufferSize( GLframebuffer *buffer,
+				   GLuint *width, GLuint *height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   LOCK_HARDWARE( mmesa );
+   *width  = mmesa->driDrawable->w;
+   *height = mmesa->driDrawable->h;
+   UNLOCK_HARDWARE( mmesa );
+}
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *mach64DDGetString( GLcontext *ctx, GLenum name )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   const char * card_name = "Mach64 [Rage Pro]";
+   GLuint agp_mode = mmesa->mach64Screen->IsPCI ? 0 :
+      mmesa->mach64Screen->AGPMode;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte*)"Gareth Hughes, Leif Delgass, Jos� Fonseca";
+
+   case GL_RENDERER:
+ 
+      offset = driGetRendererString( buffer, card_name, DRIVER_DATE,
+				     agp_mode );
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+/* Send all commands to the hardware.  If vertex buffers or indirect
+ * buffers are in use, then we need to make sure they are sent to the
+ * hardware.  All commands that are normally sent to the ring are
+ * already considered `flushed'.
+ */
+static void mach64DDFlush( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   LOCK_HARDWARE( mmesa );
+   FLUSH_DMA_LOCKED( mmesa );
+   UNLOCK_HARDWARE( mmesa );
+
+#if ENABLE_PERF_BOXES
+   if ( mmesa->boxes ) {
+      LOCK_HARDWARE( mmesa );
+      mach64PerformanceBoxesLocked( mmesa );
+      UNLOCK_HARDWARE( mmesa );
+   }
+
+   /* Log the performance counters if necessary */
+   mach64PerformanceCounters( mmesa );
+#endif
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+static void mach64DDFinish( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_drawWaits++;
+#endif
+
+   mach64DDFlush( ctx );
+   mach64WaitForIdle( mmesa );
+}
+
+/* Initialize the driver's misc functions.
+ */
+void mach64InitDriverFuncs( struct dd_function_table *functions )
+{
+   functions->GetBufferSize	= mach64DDGetBufferSize;
+   functions->GetString	= mach64DDGetString;
+   functions->Finish		= mach64DDFinish;
+   functions->Flush		= mach64DDFlush;
+
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_dd.h b/src/mesa/drivers/dri/mach64/mach64_dd.h
new file mode 100644
index 0000000000..0a2ce06412
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_dd.h
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_DD_H__
+#define __MACH64_DD_H__
+
+extern void mach64InitDriverFuncs( struct dd_function_table *functions );
+
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.c b/src/mesa/drivers/dri/mach64/mach64_ioctl.c
new file mode 100644
index 0000000000..03587c44fd
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.c
@@ -0,0 +1,932 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+#include <errno.h>
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_tex.h"
+
+#include "main/imports.h"
+#include "main/macros.h"
+
+#include "swrast/swrast.h"
+
+#include "vblank.h"
+
+#define MACH64_TIMEOUT        10 /* the DRM already has a timeout, so keep this small */
+
+
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
+
+/* Get a new VB from the pool of vertex buffers in AGP space.
+ */
+drmBufPtr mach64GetBufferLocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->mach64Screen->driScreen->fd;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   drmBufPtr buf = NULL;
+   int to = 0;
+   int ret;
+
+   dma.context = mmesa->hHWContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = MACH64_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+   while ( !buf && ( to++ < MACH64_TIMEOUT ) ) {
+      ret = drmDMA( fd, &dma );
+
+      if ( ret == 0 ) {
+	 buf = &mmesa->mach64Screen->buffers->list[index];
+	 buf->used = 0;
+#if ENABLE_PERF_BOXES
+	 /* Bump the performance counter */
+	 mmesa->c_vertexBuffers++;
+#endif
+	 return buf;
+      }
+   }
+
+   if ( !buf ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error: Could not get new VB... exiting\n" );
+      exit( -1 );
+   }
+
+   return buf;
+}
+
+void mach64FlushVerticesLocked( mach64ContextPtr mmesa )
+{
+   drm_clip_rect_t *pbox = mmesa->pClipRects;
+   int nbox = mmesa->numClipRects;
+   void *buffer = mmesa->vert_buf;
+   int count = mmesa->vert_used;
+   int prim = mmesa->hw_primitive;
+   int fd = mmesa->driScreen->fd;
+   drm_mach64_vertex_t vertex;
+   int i;
+
+   mmesa->num_verts = 0;
+   mmesa->vert_used = 0;
+
+   if ( !count )
+      return;
+
+   if ( mmesa->dirty & ~MACH64_UPLOAD_CLIPRECTS )
+      mach64EmitHwStateLocked( mmesa );
+
+   if ( !nbox )
+      count = 0;
+
+   if ( nbox > MACH64_NR_SAREA_CLIPRECTS )
+      mmesa->dirty |= MACH64_UPLOAD_CLIPRECTS;
+
+   if ( !count || !(mmesa->dirty & MACH64_UPLOAD_CLIPRECTS) ) {
+      int to = 0;
+      int ret;
+
+      /* FIXME: Is this really necessary */
+      if ( nbox == 1 )
+	 mmesa->sarea->nbox = 0;
+      else
+	 mmesa->sarea->nbox = nbox;
+
+      vertex.prim = prim;
+      vertex.buf = buffer;
+      vertex.used = count;
+      vertex.discard = 1;
+      do {
+	 ret = drmCommandWrite( fd, DRM_MACH64_VERTEX,
+				&vertex, sizeof(drm_mach64_vertex_t) );
+      } while ( ( ret == -EAGAIN ) && ( to++ < MACH64_TIMEOUT ) );
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "Error flushing vertex buffer: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   } else {
+
+      for ( i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS, nbox );
+	 drm_clip_rect_t *b = mmesa->sarea->boxes;
+	 int discard = 0;
+	 int to = 0;
+	 int ret;
+
+	 mmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = pbox[i];
+	 }
+
+	 /* Finished with the buffer?
+	  */
+	 if ( nr == nbox ) {
+	    discard = 1;
+	 }
+
+	 mmesa->sarea->dirty |= MACH64_UPLOAD_CLIPRECTS;
+	 
+	 vertex.prim = prim;
+	 vertex.buf = buffer;
+	 vertex.used = count;
+	 vertex.discard = discard;
+	 do {
+	    ret = drmCommandWrite( fd, DRM_MACH64_VERTEX,
+				   &vertex, sizeof(drm_mach64_vertex_t) );
+	 } while ( ( ret == -EAGAIN ) && ( to++ < MACH64_TIMEOUT ) );
+	 if ( ret ) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "Error flushing vertex buffer: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+   }
+
+   mmesa->dirty &= ~MACH64_UPLOAD_CLIPRECTS;
+}
+
+/* ================================================================
+ * Texture uploads
+ */
+
+void mach64FireBlitLocked( mach64ContextPtr mmesa, void *buffer,
+			   GLint offset, GLint pitch, GLint format,
+			   GLint x, GLint y, GLint width, GLint height )
+{
+   drm_mach64_blit_t blit;
+   int to = 0;
+   int ret;
+
+   blit.buf = buffer;
+   blit.offset = offset;
+   blit.pitch = pitch;
+   blit.format = format;
+   blit.x = x;
+   blit.y = y;
+   blit.width = width;
+   blit.height = height;
+
+   do {
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_BLIT, 
+			     &blit, sizeof(drm_mach64_blit_t) );
+   } while ( ( ret == -EAGAIN ) && ( to++ < MACH64_TIMEOUT ) );
+
+   if ( ret ) {
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "DRM_MACH64_BLIT: return = %d\n", ret );
+      exit( -1 );
+   }
+}
+
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+static void delay( void ) {
+/* Prevent an optimizing compiler from removing a spin loop */
+}
+
+/* Throttle the frame rate -- only allow MACH64_MAX_QUEUED_FRAMES
+ * pending swap buffers requests at a time.
+ *
+ * GH: We probably don't want a timeout here, as we can wait as
+ * long as we want for a frame to complete.  If it never does, then
+ * the card has locked.
+ */
+static int mach64WaitForFrameCompletion( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int i;
+   int wait = 0;
+   int frames;
+
+   while ( 1 ) {
+      drm_mach64_getparam_t gp;
+      int ret;
+
+      if ( mmesa->sarea->frames_queued < MACH64_MAX_QUEUED_FRAMES ) {
+	 break;
+      }
+
+      if (MACH64_DEBUG & DEBUG_NOWAIT) {
+	 return 1;
+      }
+
+      gp.param = MACH64_PARAM_FRAMES_QUEUED;
+      gp.value = &frames; /* also copied into sarea->frames_queued by DRM */
+
+      ret = drmCommandWriteRead( fd, DRM_MACH64_GETPARAM, &gp, sizeof(gp) );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_GETPARAM: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+      /* Spin in place a bit so we aren't hammering the register */
+      wait++;
+
+      for ( i = 0 ; i < 1024 ; i++ ) {
+	 delay();
+      }
+
+   }
+
+   return wait;
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void mach64CopyBuffer( __DRIdrawable *dPriv )
+{
+   mach64ContextPtr mmesa;
+   GLint nbox, i, ret;
+   drm_clip_rect_t *pbox;
+   GLboolean missed_target;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   mmesa = (mach64ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "\n********************************\n" );
+      fprintf( stderr, "\n%s( %p )\n\n",
+	       __FUNCTION__, mmesa->glCtx );
+      fflush( stderr );
+   }
+
+   /* Flush any outstanding vertex buffers */
+   FLUSH_BATCH( mmesa );
+
+   LOCK_HARDWARE( mmesa );
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
+    */
+   if ( !mach64WaitForFrameCompletion( mmesa ) ) {
+      mmesa->hardwareWentIdle = 1;
+   } else {
+      mmesa->hardwareWentIdle = 0;
+   }
+
+#if ENABLE_PERF_BOXES
+   if ( mmesa->boxes ) {
+      mach64PerformanceBoxesLocked( mmesa );
+   }
+#endif
+
+   UNLOCK_HARDWARE( mmesa );
+   driWaitForVBlank( dPriv, &missed_target );
+   LOCK_HARDWARE( mmesa );
+
+   /* use front buffer cliprects */
+   nbox = dPriv->numClipRects;
+   pbox = dPriv->pClipRects;
+
+   for ( i = 0 ; i < nbox ; ) {
+      GLint nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS , nbox );
+      drm_clip_rect_t *b = mmesa->sarea->boxes;
+      GLint n = 0;
+
+      for ( ; i < nr ; i++ ) {
+	 *b++ = pbox[i];
+	 n++;
+      }
+      mmesa->sarea->nbox = n;
+
+      ret = drmCommandNone( mmesa->driFd, DRM_MACH64_SWAP );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_SWAP: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   if ( MACH64_DEBUG & DEBUG_ALWAYS_SYNC ) {
+      mach64WaitForIdleLocked( mmesa );
+   }
+
+   UNLOCK_HARDWARE( mmesa );
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+#if ENABLE_PERF_BOXES
+   /* Log the performance counters if necessary */
+   mach64PerformanceCounters( mmesa );
+#endif
+}
+
+#if ENABLE_PERF_BOXES
+/* ================================================================
+ * Performance monitoring
+ */
+
+void mach64PerformanceCounters( mach64ContextPtr mmesa )
+{
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT) {
+      /* report performance counters */
+      fprintf( stderr, "mach64CopyBuffer: vertexBuffers:%i drawWaits:%i clears:%i\n",
+	       mmesa->c_vertexBuffers, mmesa->c_drawWaits, mmesa->c_clears );
+   }
+
+   mmesa->c_vertexBuffers = 0;
+   mmesa->c_drawWaits = 0;
+   mmesa->c_clears = 0;
+
+   if ( mmesa->c_textureSwaps || mmesa->c_textureBytes || mmesa->c_agpTextureBytes ) {
+      if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT) {
+	 fprintf( stderr, "    textureSwaps:%i  textureBytes:%i agpTextureBytes:%i\n",
+		  mmesa->c_textureSwaps, mmesa->c_textureBytes, mmesa->c_agpTextureBytes );
+      }
+      mmesa->c_textureSwaps = 0;
+      mmesa->c_textureBytes = 0;
+      mmesa->c_agpTextureBytes = 0;
+   }
+
+   mmesa->c_texsrc_agp = 0;
+   mmesa->c_texsrc_card = 0;
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_COUNT)
+      fprintf( stderr, "---------------------------------------------------------\n" );
+}
+
+
+void mach64PerformanceBoxesLocked( mach64ContextPtr mmesa )
+{
+   GLint ret;
+   drm_mach64_clear_t clear;
+   GLint x, y, w, h;
+   GLuint color;
+   GLint nbox;
+   GLint x1, y1, x2, y2;
+   drm_clip_rect_t *b = mmesa->sarea->boxes;
+
+   /* save cliprects */
+   nbox = mmesa->sarea->nbox;
+   x1 = b[0].x1;
+   y1 = b[0].y1;
+   x2 = b[0].x2;
+   y2 = b[0].y2;
+ 
+   /* setup a single cliprect and call the clear ioctl for each box */
+   mmesa->sarea->nbox = 1;
+
+   w = h = 8;
+   x = mmesa->drawX;
+   y = mmesa->drawY;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+   b[0].y1 = y;
+   b[0].y2 = y + h;
+
+   clear.flags = MACH64_BACK;
+   clear.clear_depth = 0;
+
+   /* Red box if DDFinish was called to wait for rendering to complete */
+   if ( mmesa->c_drawWaits ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 0, 0 );
+      
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   }
+
+   x += w;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+
+   /* draw a green box if we had to wait for previous frame(s) to complete */
+   if ( !mmesa->hardwareWentIdle ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 255, 0, 0 );
+      
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+
+   }
+
+   x += w;
+   w = 20;
+   b[0].x1 = x;
+
+   /* show approx. ratio of AGP/card textures used - Blue = AGP, Purple = Card */
+   if ( mmesa->c_texsrc_agp || mmesa->c_texsrc_card ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 0, 255, 0 );
+      w = ((GLfloat)mmesa->c_texsrc_agp / (GLfloat)(mmesa->c_texsrc_agp + mmesa->c_texsrc_card))*20;
+      if (w > 1) {
+
+	 b[0].x2 = x + w;
+
+	 clear.x = x;
+	 clear.y = y;
+	 clear.w = w;
+	 clear.h = h;
+	 clear.clear_color = color;
+
+	 ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+	 if (ret < 0) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+
+      x += w;
+      w = 20 - w;
+
+      if (w > 1) {
+	 b[0].x1 = x;
+	 b[0].x2 = x + w;
+
+	 color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 255, 0 );
+
+	 clear.x = x;
+	 clear.y = y;
+	 clear.w = w;
+	 clear.h = h;
+	 clear.clear_color = color;
+
+	 ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+	 if (ret < 0) {
+	    UNLOCK_HARDWARE( mmesa );
+	    fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	    exit( -1 );
+	 }
+      }
+   }  
+
+   x += w;
+   w = 8;
+   b[0].x1 = x;
+   b[0].x2 = x + w;
+
+   /* Yellow box if we swapped textures */
+   if ( mmesa->c_textureSwaps ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 255, 0, 0 );
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+      
+   }
+
+   h = 4;
+   x += 8;
+   b[0].x1 = x;
+   b[0].y2 = y + h;
+
+   /* Purple bar for card memory texture blits/uploads */
+   if ( mmesa->c_textureBytes ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 255, 0, 255, 0 );
+      w = mmesa->c_textureBytes / 16384;
+      if ( w <= 0 ) 
+	 w = 1; 
+      if (w > (mmesa->driDrawable->w - 44))
+	 w = mmesa->driDrawable->w - 44;
+
+      b[0].x2 = x + w;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* Blue bar for AGP memory texture blits/uploads */
+   if ( mmesa->c_agpTextureBytes ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 0, 0, 255, 0 );
+      w = mmesa->c_agpTextureBytes / 16384;
+      if ( w <= 0 ) 
+	 w = 1; 
+      if (w > (mmesa->driDrawable->w - 44))
+	 w = mmesa->driDrawable->w - 44;
+
+      y += 4;
+      b[0].x2 = x + w;
+      b[0].y1 = y;
+      b[0].y2 = y + h;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* Pink bar for number of vertex buffers used */
+   if ( mmesa->c_vertexBuffers ) {
+      color = mach64PackColor( mmesa->mach64Screen->cpp, 196, 128, 128, 0 );
+
+      w = mmesa->c_vertexBuffers;
+      if (w > (mmesa->driDrawable->w))
+	 w = mmesa->driDrawable->w;
+
+      h = 8;
+      x = mmesa->drawX;
+      y = mmesa->drawY + 8;
+      b[0].x1 = x;
+      b[0].x2 = x + w;
+      b[0].y1 = y;
+      b[0].y2 = y + h;
+
+      clear.x = x;
+      clear.y = y;
+      clear.w = w;
+      clear.h = h;
+      clear.clear_color = color;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+				&clear, sizeof(drm_mach64_clear_t) );
+
+      if (ret < 0) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   /* restore cliprects */
+   mmesa->sarea->nbox = nbox;
+   b[0].x1 = x1;
+   b[0].y1 = y1;
+   b[0].x2 = x2;
+   b[0].y2 = y2;
+
+}
+
+#endif
+
+/* ================================================================
+ * Buffer clear
+ */
+
+static void mach64DDClear( GLcontext *ctx, GLbitfield mask )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   __DRIdrawable *dPriv = mmesa->driDrawable;
+   drm_mach64_clear_t clear;
+   GLuint flags = 0;
+   GLint i;
+   GLint ret;
+   GLint cx, cy, cw, ch;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64DDClear\n");
+   }
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_clears++;
+#endif
+
+   FLUSH_BATCH( mmesa );
+
+   /* The only state changes we care about here are the RGBA colormask
+    * and scissor/clipping.  We'll just update that state, if needed.
+    */
+   if ( mmesa->new_state & (MACH64_NEW_MASKS | MACH64_NEW_CLIP) ) {
+      const GLuint save_state = mmesa->new_state;
+      mmesa->new_state &= (MACH64_NEW_MASKS | MACH64_NEW_CLIP);
+      mach64DDUpdateHWState( ctx );
+      mmesa->new_state = save_state & ~(MACH64_NEW_MASKS | MACH64_NEW_CLIP);
+   }
+
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+      flags |= MACH64_FRONT;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+      flags |= MACH64_BACK;
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if ( ( mask & BUFFER_BIT_DEPTH ) && ctx->Depth.Mask ) {
+      flags |= MACH64_DEPTH;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   if ( mask )
+      _swrast_Clear( ctx, mask );
+
+   if ( !flags )
+      return;
+
+   LOCK_HARDWARE( mmesa );
+
+   /* compute region after locking: */
+   cx = ctx->DrawBuffer->_Xmin;
+   cy = ctx->DrawBuffer->_Ymin;
+   cw = ctx->DrawBuffer->_Xmax - cx;
+   ch = ctx->DrawBuffer->_Ymax - cy;
+
+   /* Flip top to bottom */
+   cx += mmesa->drawX;
+   cy  = mmesa->drawY + dPriv->h - cy - ch;
+
+   /* HACK?
+    */
+   if ( mmesa->dirty & ~MACH64_UPLOAD_CLIPRECTS ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+
+   for ( i = 0 ; i < mmesa->numClipRects ; ) {
+      int nr = MIN2( i + MACH64_NR_SAREA_CLIPRECTS, mmesa->numClipRects );
+      drm_clip_rect_t *box = mmesa->pClipRects;
+      drm_clip_rect_t *b = mmesa->sarea->boxes;
+      GLint n = 0;
+
+      if (cw != dPriv->w || ch != dPriv->h) {
+         /* clear subregion */
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
+	 }
+      } else {
+         /* clear whole window */
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
+	 }
+      }
+
+      mmesa->sarea->nbox = n;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL ) {
+	 fprintf( stderr,
+		  "DRM_MACH64_CLEAR: flag 0x%x color %x depth %x nbox %d\n",
+		  flags,
+		  (GLuint)mmesa->ClearColor,
+		  (GLuint)mmesa->ClearDepth,
+		  mmesa->sarea->nbox );
+      }
+
+      clear.flags = flags;
+      clear.x = cx;
+      clear.y = cy;
+      clear.w = cw;
+      clear.h = ch;
+      clear.clear_color = mmesa->ClearColor;
+      clear.clear_depth = mmesa->ClearDepth;
+
+      ret = drmCommandWrite( mmesa->driFd, DRM_MACH64_CLEAR,
+			     &clear, sizeof(drm_mach64_clear_t) );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 fprintf( stderr, "DRM_MACH64_CLEAR: return = %d\n", ret );
+	 exit( -1 );
+      }
+   }
+
+   UNLOCK_HARDWARE( mmesa );
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+}
+
+
+void mach64WaitForIdleLocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int to = 0;
+   int ret;
+
+   do {
+      ret = drmCommandNone( fd, DRM_MACH64_IDLE );
+   } while ( ( ret == -EBUSY ) && ( to++ < MACH64_TIMEOUT ) );
+
+   if ( ret < 0 ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error: Mach64 timed out... exiting\n" );
+      exit( -1 );
+   }
+}
+
+/* Flush the DMA queue to the hardware */
+void mach64FlushDMALocked( mach64ContextPtr mmesa )
+{
+   int fd = mmesa->driFd;
+   int ret;
+
+   ret = drmCommandNone( fd, DRM_MACH64_FLUSH );
+
+   if ( ret < 0 ) {
+      drmCommandNone( fd, DRM_MACH64_RESET );
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "Error flushing DMA... exiting\n" );
+      exit( -1 );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT |
+		    MACH64_UPLOAD_MISC |
+		    MACH64_UPLOAD_CLIPRECTS);
+
+}
+
+/* For client-side state emits - currently unused */
+void mach64UploadHwStateLocked( mach64ContextPtr mmesa )
+{
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   
+   drm_mach64_context_regs_t *regs = &sarea->context_state;
+   unsigned int dirty = sarea->dirty;
+   CARD32 offset = ((regs->tex_size_pitch & 0xf0) >> 2);
+
+   DMALOCALS;
+
+   DMAGETPTR( 19*2 );
+
+   if ( dirty & MACH64_UPLOAD_MISC ) {
+      DMAOUTREG( MACH64_DP_MIX, regs->dp_mix );
+      DMAOUTREG( MACH64_DP_SRC, regs->dp_src );
+      DMAOUTREG( MACH64_CLR_CMP_CNTL, regs->clr_cmp_cntl );
+      DMAOUTREG( MACH64_GUI_TRAJ_CNTL, regs->gui_traj_cntl );
+      DMAOUTREG( MACH64_SC_LEFT_RIGHT, regs->sc_left_right );
+      DMAOUTREG( MACH64_SC_TOP_BOTTOM, regs->sc_top_bottom );
+      sarea->dirty &= ~MACH64_UPLOAD_MISC;
+   }
+
+   if ( dirty & MACH64_UPLOAD_DST_OFF_PITCH ) {
+      DMAOUTREG( MACH64_DST_OFF_PITCH, regs->dst_off_pitch );
+      sarea->dirty &= ~MACH64_UPLOAD_DST_OFF_PITCH;
+   }
+   if ( dirty & MACH64_UPLOAD_Z_OFF_PITCH ) {
+      DMAOUTREG( MACH64_Z_OFF_PITCH, regs->z_off_pitch );
+      sarea->dirty &= ~MACH64_UPLOAD_Z_OFF_PITCH;
+   }
+   if ( dirty & MACH64_UPLOAD_Z_ALPHA_CNTL ) {
+      DMAOUTREG( MACH64_Z_CNTL, regs->z_cntl );
+      DMAOUTREG( MACH64_ALPHA_TST_CNTL, regs->alpha_tst_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+   if ( dirty & MACH64_UPLOAD_SCALE_3D_CNTL ) {
+      DMAOUTREG( MACH64_SCALE_3D_CNTL, regs->scale_3d_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_FOG_CLR ) {
+      DMAOUTREG( MACH64_DP_FOG_CLR, regs->dp_fog_clr );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_FOG_CLR;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_WRITE_MASK ) {
+      DMAOUTREG( MACH64_DP_WRITE_MASK, regs->dp_write_mask );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+   if ( dirty & MACH64_UPLOAD_DP_PIX_WIDTH ) {
+      DMAOUTREG( MACH64_DP_PIX_WIDTH, regs->dp_pix_width );
+      sarea->dirty &= ~MACH64_UPLOAD_DP_PIX_WIDTH;
+   }
+   if ( dirty & MACH64_UPLOAD_SETUP_CNTL ) {
+      DMAOUTREG( MACH64_SETUP_CNTL, regs->setup_cntl );
+      sarea->dirty &= ~MACH64_UPLOAD_SETUP_CNTL;
+   }
+
+   if ( dirty & MACH64_UPLOAD_TEXTURE ) {
+      DMAOUTREG( MACH64_TEX_SIZE_PITCH, regs->tex_size_pitch );
+      DMAOUTREG( MACH64_TEX_CNTL, regs->tex_cntl );
+      DMAOUTREG( MACH64_SECONDARY_TEX_OFF, regs->secondary_tex_off );
+      DMAOUTREG( MACH64_TEX_0_OFF + offset, regs->tex_offset );
+      sarea->dirty &= ~MACH64_UPLOAD_TEXTURE;
+   }
+
+#if 0
+   if ( dirty & MACH64_UPLOAD_CLIPRECTS ) {
+      DMAOUTREG( MACH64_SC_LEFT_RIGHT, regs->sc_left_right );
+      DMAOUTREG( MACH64_SC_TOP_BOTTOM, regs->sc_top_bottom );
+      sarea->dirty &= ~MACH64_UPLOAD_CLIPRECTS;
+   }
+#endif
+
+   sarea->dirty = 0;
+
+   DMAADVANCE();
+}
+
+void mach64InitIoctlFuncs( struct dd_function_table *functions )
+{
+    functions->Clear = mach64DDClear;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.h b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
new file mode 100644
index 0000000000..1ffda1932f
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
@@ -0,0 +1,148 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_IOCTL_H__
+#define __MACH64_IOCTL_H__
+
+#include "mach64_dri.h"
+#include "mach64_reg.h"
+#include "mach64_lock.h"
+
+#define MACH64_BUFFER_MAX_DWORDS	(MACH64_BUFFER_SIZE / sizeof(CARD32))
+
+
+extern drmBufPtr mach64GetBufferLocked( mach64ContextPtr mmesa );
+extern void mach64FlushVerticesLocked( mach64ContextPtr mmesa );
+extern void mach64FlushDMALocked( mach64ContextPtr mmesa );
+extern void mach64UploadHwStateLocked( mach64ContextPtr mmesa );
+
+static INLINE void *mach64AllocDmaLow( mach64ContextPtr mmesa, int bytes )
+{
+   CARD32 *head;
+
+   if ( mmesa->vert_used + bytes > mmesa->vert_total ) {
+      LOCK_HARDWARE( mmesa );
+      mach64FlushVerticesLocked( mmesa );
+      UNLOCK_HARDWARE( mmesa );
+   }
+
+   head = (CARD32 *)((char *)mmesa->vert_buf + mmesa->vert_used);
+   mmesa->vert_used += bytes;
+
+   return head;
+}
+
+static INLINE void *mach64AllocDmaLocked( mach64ContextPtr mmesa, int bytes )
+{
+   CARD32 *head;
+
+   if ( mmesa->vert_used + bytes > mmesa->vert_total ) {
+      mach64FlushVerticesLocked( mmesa );
+   }
+
+   head = (CARD32 *)((char *)mmesa->vert_buf + mmesa->vert_used);
+   mmesa->vert_used += bytes;
+
+   return head;
+}
+
+extern void mach64FireBlitLocked( mach64ContextPtr mmesa, void *buffer,
+				  GLint offset, GLint pitch, GLint format,
+				  GLint x, GLint y, GLint width, GLint height );
+
+extern void mach64CopyBuffer( __DRIdrawable *dPriv );
+#if ENABLE_PERF_BOXES
+extern void mach64PerformanceCounters( mach64ContextPtr mmesa );
+extern void mach64PerformanceBoxesLocked( mach64ContextPtr mmesa );
+#endif
+extern void mach64WaitForIdleLocked( mach64ContextPtr mmesa );
+
+extern void mach64InitIoctlFuncs( struct dd_function_table *functions );
+
+/* ================================================================
+ * Helper macros:
+ */
+
+#define FLUSH_BATCH( mmesa )						\
+do {									\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FLUSH_BATCH in %s\n", __FUNCTION__ );		\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVertices( mmesa );					\
+   }									\
+} while (0)
+
+/* According to a comment in ATIMach64Sync (atimach64.c) in the DDX:
+ *
+ * "For VTB's and later, the first CPU read of the framebuffer will return
+ * zeroes [...] This appears to be due to some kind of engine
+ * caching of framebuffer data I haven't found any way of disabling, or
+ * otherwise circumventing."
+ */
+#define FINISH_DMA_LOCKED( mmesa )					\
+do {									\
+   CARD32 _tmp;								\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FINISH_DMA_LOCKED in %s\n", __FUNCTION__ );	\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVerticesLocked( mmesa );				\
+   }									\
+   mach64WaitForIdleLocked( mmesa );					\
+   /* pre-read framebuffer to counter caching problem */		\
+   _tmp = *(volatile CARD32 *)mmesa->driScreen->pFB;			\
+} while (0)
+
+#define FLUSH_DMA_LOCKED( mmesa )					\
+do {									\
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FLUSH_DMA_LOCKED in %s\n", __FUNCTION__ );	\
+   if ( mmesa->vert_used ) {						\
+      mach64FlushVerticesLocked( mmesa );				\
+   }									\
+   mach64FlushDMALocked( mmesa );					\
+} while (0)
+
+#define mach64FlushVertices( mmesa )					\
+do {									\
+   LOCK_HARDWARE( mmesa );						\
+   mach64FlushVerticesLocked( mmesa );					\
+   UNLOCK_HARDWARE( mmesa );						\
+} while (0)
+
+#define mach64WaitForIdle( mmesa )		\
+do {						\
+   LOCK_HARDWARE( mmesa );			\
+   mach64WaitForIdleLocked( mmesa );		\
+   UNLOCK_HARDWARE( mmesa );			\
+} while (0)
+
+
+#endif /* __MACH64_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_lock.c b/src/mesa/drivers/dri/mach64/mach64_lock.c
new file mode 100644
index 0000000000..1a95a8f619
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_lock.c
@@ -0,0 +1,95 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_lock.h"
+#include "drirenderbuffer.h"
+
+#if DEBUG_LOCKING
+char *prevLockFile = NULL;
+int   prevLockLine = 0;
+#endif
+
+
+/* Update the hardware state.  This is called if another context has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void mach64GetLock( mach64ContextPtr mmesa, GLuint flags )
+{
+   __DRIdrawable *dPriv = mmesa->driDrawable;
+   __DRIscreen *sPriv = mmesa->driScreen;
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   int i;
+
+   drmGetLock( mmesa->driFd, mmesa->hHWContext, flags );
+
+   /* The window might have moved, so we might need to get new clip
+    * rects.
+    *
+    * NOTE: This releases and regrabs the hw lock to allow the X server
+    * to respond to the DRI protocol request for new drawable info.
+    * Since the hardware state depends on having the latest drawable
+    * clip rects, all state checking must be done _after_ this call.
+    */
+   DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv ); 
+
+   if ( mmesa->lastStamp != dPriv->lastStamp ) {
+      mmesa->lastStamp = dPriv->lastStamp;
+      if (mmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT)
+         mach64SetCliprects( mmesa->glCtx, GL_BACK_LEFT );
+      else
+         mach64SetCliprects( mmesa->glCtx, GL_FRONT_LEFT );
+      driUpdateFramebufferSize( mmesa->glCtx, dPriv );
+      mach64CalcViewport( mmesa->glCtx );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_CONTEXT
+		    | MACH64_UPLOAD_MISC
+		    | MACH64_UPLOAD_CLIPRECTS);
+
+   /* EXA render acceleration uses the texture engine, so restore it */
+   mmesa->dirty |= (MACH64_UPLOAD_TEXTURE);
+
+   if ( sarea->ctx_owner != mmesa->hHWContext ) {
+      sarea->ctx_owner = mmesa->hHWContext;
+      mmesa->dirty = MACH64_UPLOAD_ALL;
+   }
+
+   for ( i = mmesa->firstTexHeap ; i < mmesa->lastTexHeap ; i++ ) {
+      DRI_AGE_TEXTURES( mmesa->texture_heaps[i] );
+   }
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_lock.h b/src/mesa/drivers/dri/mach64/mach64_lock.h
new file mode 100644
index 0000000000..3130b183e3
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_lock.h
@@ -0,0 +1,104 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_LOCK_H__
+#define __MACH64_LOCK_H__
+
+extern void mach64GetLock( mach64ContextPtr mmesa, GLuint flags );
+
+
+/* Turn DEBUG_LOCKING on to find locking conflicts.
+ */
+#define DEBUG_LOCKING	1
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int   prevLockLine;
+
+#define DEBUG_LOCK()							\
+   do {									\
+      prevLockFile = (__FILE__);					\
+      prevLockLine = (__LINE__);					\
+   } while (0)
+
+#define DEBUG_RESET()							\
+   do {									\
+      prevLockFile = 0;							\
+      prevLockLine = 0;							\
+   } while (0)
+
+#define DEBUG_CHECK_LOCK()						\
+   do {									\
+      if ( prevLockFile ) {						\
+	 fprintf( stderr,						\
+		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+	 exit( 1 );							\
+      }									\
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+/*
+ * !!! We may want to separate locks from locks with validation.  This
+ * could be used to improve performance for those things commands that
+ * do not do any drawing !!!
+ */
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( mmesa )						\
+   do {									\
+      char __ret = 0;							\
+      DEBUG_CHECK_LOCK();						\
+      DRM_CAS( mmesa->driHwLock, mmesa->hHWContext,			\
+	       (DRM_LOCK_HELD | mmesa->hHWContext), __ret );		\
+      if ( __ret )							\
+	 mach64GetLock( mmesa, 0 );					\
+      DEBUG_LOCK();							\
+   } while (0)
+
+/* Unlock the hardware.
+ */
+#define UNLOCK_HARDWARE( mmesa )					\
+   do {									\
+      DRM_UNLOCK( mmesa->driFd,						\
+		  mmesa->driHwLock,					\
+		  mmesa->hHWContext );					\
+      DEBUG_RESET();							\
+   } while (0)
+
+#endif /* __MACH64_LOCK_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_native_vb.c b/src/mesa/drivers/dri/mach64/mach64_native_vb.c
new file mode 100644
index 0000000000..816682ec5f
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_native_vb.c
@@ -0,0 +1,257 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Adapted to Mach64 by:
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "math/m_translate.h"
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+void TAG(translate_vertex)(GLcontext *ctx,
+			   const VERTEX *src,
+			   SWvertex *dst)
+{
+   LOCALVARS
+   GLuint format = GET_VERTEX_FORMAT();
+   UNVIEWPORT_VARS;
+   CARD32 *p = (CARD32 *)src + 10 - mmesa->vertex_size;
+
+   dst->attrib[FRAG_ATTRIB_WPOS][3] = 1.0;
+   
+   switch ( format ) {
+      case TEX1_VERTEX_FORMAT:
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 {
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    dst->attrib[FRAG_ATTRIB_TEX1][0] = rhw*LE32_IN_FLOAT( p++ );
+	    dst->attrib[FRAG_ATTRIB_TEX1][1] = rhw*LE32_IN_FLOAT( p++ );
+	 }
+#else
+	 dst->attrib[FRAG_ATTRIB_TEX1][0] = LE32_IN_FLOAT( p++ );
+	 dst->attrib[FRAG_ATTRIB_TEX1][1] = LE32_IN_FLOAT( p++ );
+#endif
+	 dst->attrib[FRAG_ATTRIB_TEX1][3] = 1.0;
+	 p++;
+
+      case TEX0_VERTEX_FORMAT:
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 {
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    dst->attrib[FRAG_ATTRIB_TEX0][0] = rhw*LE32_IN_FLOAT( p++ );
+	    dst->attrib[FRAG_ATTRIB_TEX0][1] = rhw*LE32_IN_FLOAT( p++ );
+	 }
+#else
+	 dst->attrib[FRAG_ATTRIB_TEX0][0] = LE32_IN_FLOAT( p++ );
+	 dst->attrib[FRAG_ATTRIB_TEX0][1] = LE32_IN_FLOAT( p++ );
+#endif
+	 dst->attrib[FRAG_ATTRIB_TEX0][3] = 1.0;
+	 dst->attrib[FRAG_ATTRIB_WPOS][3] = LE32_IN_FLOAT( p++ );
+	
+      case NOTEX_VERTEX_FORMAT:
+	 dst->attrib[FRAG_ATTRIB_COL1][2] = UBYTE_TO_FLOAT(((GLubyte *)p)[0]);
+	 dst->attrib[FRAG_ATTRIB_COL1][1] = UBYTE_TO_FLOAT(((GLubyte *)p)[1]);
+	 dst->attrib[FRAG_ATTRIB_COL1][0] = UBYTE_TO_FLOAT(((GLubyte *)p)[2]);
+	 dst->attrib[FRAG_ATTRIB_FOGC][0] = ((GLubyte *)p)[3]; /*XXX int->float?*/
+	 p++;
+
+      case TINY_VERTEX_FORMAT:
+	 dst->attrib[FRAG_ATTRIB_WPOS][2] = UNVIEWPORT_Z( LE32_IN( p++ ) );
+
+	 dst->color[2] = ((GLubyte *)p)[0];
+	 dst->color[1] = ((GLubyte *)p)[1];
+	 dst->color[0] = ((GLubyte *)p)[2];
+	 dst->color[3] = ((GLubyte *)p)[3];
+	 p++;
+	 
+	 {
+	    GLuint xy = LE32_IN( p );
+	    
+	    dst->attrib[FRAG_ATTRIB_WPOS][0] = UNVIEWPORT_X( (GLfloat)(GLshort)( xy >> 16 ) );
+	    dst->attrib[FRAG_ATTRIB_WPOS][1] = UNVIEWPORT_Y( (GLfloat)(GLshort)( xy & 0xffff ) );
+	 }
+   }
+
+   assert( p + 1 - (CARD32 *)src == 10 );
+	 
+   dst->pointSize = ctx->Point.Size;
+}
+
+
+
+void TAG(print_vertex)( GLcontext *ctx, const VERTEX *v )
+{
+   LOCALVARS
+   GLuint format = GET_VERTEX_FORMAT();
+   CARD32 *p = (CARD32 *)v + 10 - mmesa->vertex_size;
+   
+   switch ( format ) {
+      case TEX1_VERTEX_FORMAT:
+	 {
+	    GLfloat u, v, w;
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    u = rhw*LE32_IN_FLOAT( p++ );
+	    v = rhw*LE32_IN_FLOAT( p++ );
+#else
+	    u = LE32_IN_FLOAT( p++ );
+	    v = LE32_IN_FLOAT( p++ );
+#endif
+	    w = LE32_IN_FLOAT( p++ );
+	    fprintf( stderr, "u1 %f v1 %f w1 %f\n", u, v, w );
+	 }
+
+      case TEX0_VERTEX_FORMAT:
+	 {
+	    GLfloat u, v, w;
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    float rhw = 1.0 / LE32_IN_FLOAT( p + 2 );
+	    
+	    u = rhw*LE32_IN_FLOAT( p++ );
+	    v = rhw*LE32_IN_FLOAT( p++ );
+#else
+	    u = LE32_IN_FLOAT( p++ );
+	    v = LE32_IN_FLOAT( p++ );
+#endif
+	    w = LE32_IN_FLOAT( p++ );
+	    fprintf( stderr, "u0 %f v0 %f w0 %f\n", u, v, w );
+	 }
+	
+      case NOTEX_VERTEX_FORMAT:
+	 {
+	    GLubyte r, g, b, a;
+	    
+	    b = ((GLubyte *)p)[0];
+	    g = ((GLubyte *)p)[1];
+	    r = ((GLubyte *)p)[2];
+	    a = ((GLubyte *)p)[3];
+	    p++;
+	    fprintf(stderr, "spec: r %d g %d b %d a %d\n", r, g, b, a);
+	 }
+
+      case TINY_VERTEX_FORMAT:
+	 {
+	    GLuint xy;
+	    GLfloat x, y, z;
+	    GLubyte r, g, b, a;
+	    
+	    z = LE32_IN( p++ ) / 65536.0;
+
+	    b = ((GLubyte *)p)[0];
+	    g = ((GLubyte *)p)[1];
+	    r = ((GLubyte *)p)[2];
+	    a = ((GLubyte *)p)[3];
+	    p++;
+	    xy = LE32_IN( p );
+	    x = (GLfloat)(GLshort)( xy >> 16 ) / 4.0;
+	    y = (GLfloat)(GLshort)( xy & 0xffff ) / 4.0;
+	    
+	    fprintf(stderr, "x %f y %f z %f\n", x, y, z);
+	    fprintf(stderr, "r %d g %d b %d a %d\n", r, g, b, a);
+	 }
+   }
+   
+   assert( p + 1 - (CARD32 *)v == 10 );	 
+
+   fprintf(stderr, "\n");
+}
+
+/* Interpolate the elements of the VB not included in typical hardware
+ * vertices.  
+ *
+ * NOTE: All these arrays are guarenteed by tnl to be writeable and
+ * have good stride.
+ */
+#ifndef INTERP_QUALIFIER 
+#define INTERP_QUALIFIER static
+#endif
+
+#define GET_COLOR(ptr, idx) ((ptr)->data[idx])
+
+
+INTERP_QUALIFIER void TAG(interp_extras)( GLcontext *ctx,
+					  GLfloat t,
+					  GLuint dst, GLuint out, GLuint in,
+					  GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   if (VB->BackfaceColorPtr) {
+      assert(VB->BackfaceColorPtr->stride == 4 * sizeof(GLfloat));
+      
+      INTERP_4F( t,
+		 GET_COLOR(VB->BackfaceColorPtr, dst),
+		 GET_COLOR(VB->BackfaceColorPtr, out),
+		 GET_COLOR(VB->BackfaceColorPtr, in) );
+
+      if (VB->BackfaceSecondaryColorPtr) {
+	 INTERP_3F( t,
+		    GET_COLOR(VB->BackfaceSecondaryColorPtr, dst),
+		    GET_COLOR(VB->BackfaceSecondaryColorPtr, out),
+		    GET_COLOR(VB->BackfaceSecondaryColorPtr, in) );
+      }
+   }
+
+   if (VB->EdgeFlag) {
+      VB->EdgeFlag[dst] = VB->EdgeFlag[out] || force_boundary;
+   }
+
+   INTERP_VERTEX(ctx, t, dst, out, in, force_boundary);
+}
+
+INTERP_QUALIFIER void TAG(copy_pv_extras)( GLcontext *ctx, 
+					   GLuint dst, GLuint src )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   if (VB->BackfaceColorPtr) {
+      COPY_4FV( GET_COLOR(VB->BackfaceColorPtr, dst),
+		GET_COLOR(VB->BackfaceColorPtr, src) );
+
+      if (VB->BackfaceSecondaryColorPtr) {
+	 COPY_4FV( GET_COLOR(VB->BackfaceSecondaryColorPtr, dst),
+		   GET_COLOR(VB->BackfaceSecondaryColorPtr, src) );
+      }
+   }
+
+   COPY_PV_VERTEX(ctx, dst, src);
+}
+
+
+#undef INTERP_QUALIFIER
+#undef GET_COLOR
+
+#undef IND
+#undef TAG
diff --git a/src/mesa/drivers/dri/mach64/mach64_native_vbtmp.h b/src/mesa/drivers/dri/mach64/mach64_native_vbtmp.h
new file mode 100644
index 0000000000..6e5fa3520e
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_native_vbtmp.h
@@ -0,0 +1,562 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Adapted to Mach64 by:
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+/* DO_XYZW:  Emit xyz and maybe w coordinates.
+ * DO_RGBA:  Emit color.
+ * DO_SPEC:  Emit specular color.
+ * DO_FOG:   Emit fog coordinate in specular alpha.
+ * DO_TEX0:  Emit tex0 u,v coordinates.
+ * DO_TEX1:  Emit tex1 u,v coordinates.
+ * DO_PTEX:  Emit tex0,1 q coordinates where possible.
+ *
+ * Additionally, this template assumes it is emitting *transformed*
+ * vertices; the modifications to emit untransformed vertices (ie. to
+ * t&l hardware) are probably too great to cooexist with the code
+ * already in this file.
+ */
+
+#define VIEWPORT_X(x)  ((GLint) ((s[0]  * (x) + s[12]) * 4.0))
+#define VIEWPORT_Y(y)  ((GLint) ((s[5]  * (y) + s[13]) * 4.0))
+#define VIEWPORT_Z(z) (((GLuint) (s[10] * (z) + s[14])) << 15)
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest,
+		       GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+#if DO_TEX1
+   GLfloat (*tc1)[4];
+   GLuint tc1_stride;
+#if DO_PTEX
+   GLuint tc1_size;
+#endif
+#endif
+#if DO_TEX0
+   GLfloat (*tc0)[4];
+   GLuint tc0_stride;
+#if DO_PTEX
+   GLuint tc0_size;
+#endif
+#endif
+#if DO_SPEC
+   GLfloat (*spec)[4];
+   GLuint spec_stride;
+#endif
+#if DO_FOG
+   GLfloat (*fog)[4];
+   GLuint fog_stride;
+#endif
+#if DO_RGBA
+   GLfloat (*col)[4];
+   GLuint col_stride;
+#endif
+   GLfloat (*coord)[4];
+   GLuint coord_stride;
+   VERTEX *v = (VERTEX *)dest;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+#if DO_TEX1 || DO_TEX0 || DO_XYZW
+   const GLubyte *mask = VB->ClipMask;
+#endif
+   int i;
+
+#if !DO_XYZW
+   (void) s; /* Quiet compiler */
+#endif
+/*     fprintf(stderr, "%s(big) importable %d %d..%d\n",  */
+/*  	   __FUNCTION__, VB->importable_data, start, end); */
+
+#if DO_TEX1
+   {
+      const GLuint t1 = GET_TEXSOURCE(1);
+      tc1 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->data;
+      tc1_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->stride;
+#if DO_PTEX
+      tc1_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size;
+#endif
+   }
+#endif
+
+#if DO_TEX0
+   {
+      const GLuint t0 = GET_TEXSOURCE(0);
+      tc0 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->data;
+      tc0_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->stride;
+#if DO_PTEX
+      tc0_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size;
+#endif
+   }
+#endif
+
+#if DO_SPEC
+   if (VB->AttribPtr[_TNL_ATTRIB_COLOR1]) {
+      spec = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data;
+      spec_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride;
+   } else {
+      spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+      spec_stride = 0;
+   }
+#endif
+
+#if DO_FOG
+   if (VB->AttribPtr[_TNL_ATTRIB_FOG]) {
+      fog = VB->AttribPtr[_TNL_ATTRIB_FOG]->data;
+      fog_stride = VB->AttribPtr[_TNL_ATTRIB_FOG]->stride;
+   } else {
+      static GLfloat tmp[4] = {0, 0, 0, 0};
+      fog = &tmp;
+      fog_stride = 0;
+   }
+#endif
+
+#if DO_RGBA
+   col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;
+   col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride;
+#endif
+
+   coord = VB->NdcPtr->data;
+   coord_stride = VB->NdcPtr->stride;
+
+   if (start) {
+#if DO_TEX1
+         STRIDE_4F(tc1, start * tc1_stride);
+#endif
+#if DO_TEX0
+         STRIDE_4F(tc0, start * tc0_stride);
+#endif
+#if DO_SPEC
+	 STRIDE_4F(spec, start * spec_stride);
+#endif
+#if DO_FOG
+	 STRIDE_4F(fog, start * fog_stride);
+#endif
+#if DO_RGBA
+	 STRIDE_4F(col, start * col_stride);
+#endif
+	 STRIDE_4F(coord, start * coord_stride);
+   }
+
+   for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) {
+	 CARD32 *p = (CARD32 *)v;
+#if DO_TEX1 || DO_TEX0
+	 GLfloat w;
+
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    w = coord[0][3];
+	 } else {
+	    /* clipped */
+	    w = 1.0;
+	 }
+#endif
+	 
+#if DO_TEX1
+#if DO_PTEX
+	 if (tc1_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc1[0][0] );		/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][1] );		/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][3] );		/* VERTEX_?_SECONDARY_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    float rhw = 1.0 / tc1[0][3];
+	    LE32_OUT_FLOAT( p++, rhw*tc1[0][0] );	/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, rhw*tc1[0][1] );	/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][3] );		/* VERTEX_?_SECONDARY_W */	
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+	 } else {
+#endif /* DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc1[0][0] );		/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, w*tc1[0][1] );		/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w );			/* VERTEX_?_SECONDARY_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    LE32_OUT_FLOAT( p++, tc1[0][0] );		/* VERTEX_?_SECONDARY_S */
+	    LE32_OUT_FLOAT( p++, tc1[0][1] );		/* VERTEX_?_SECONDARY_T */
+	    LE32_OUT_FLOAT( p++, w );			/* VERTEX_?_SECONDARY_W */
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+#if DO_PTEX
+	 }
+#endif /* DO_PTEX */
+	 STRIDE_4F(tc1, tc1_stride);
+#else /* !DO_TEX1 */
+	 p += 3;
+#endif /* !DO_TEX1 */
+	    
+#if DO_TEX0
+#if DO_PTEX
+	 if (tc0_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][3] );			/* VERTEX_?_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    float rhw = 1.0 / tc0[0][3];
+	    LE32_OUT_FLOAT( p++, rhw*tc0[0][0] );		/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, rhw*tc0[0][1] );		/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][3] );			/* VERTEX_?_W */	
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+	 } else {
+#endif /* DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    LE32_OUT_FLOAT( p++, w*tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, w*tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_W */
+#else /* !MACH64_PREMULT_TEXCOORDS */
+	    LE32_OUT_FLOAT( p++, tc0[0][0] );			/* VERTEX_?_S */
+	    LE32_OUT_FLOAT( p++, tc0[0][1] );			/* VERTEX_?_T */
+	    LE32_OUT_FLOAT( p++, w );				/* VERTEX_?_W */
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+#if DO_PTEX
+	 }
+#endif /* DO_PTEX */
+	 STRIDE_4F(tc0, tc0_stride);
+#else /* !DO_TEX0 */
+	 p += 3;
+#endif /* !DO_TEX0 */
+
+#if DO_SPEC
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[0],  spec[0][2]); 	/* VERTEX_?_SPEC_B */
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[1],  spec[0][1]);	/* VERTEX_?_SPEC_G */
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[2],  spec[0][0]);	/* VERTEX_?_SPEC_R */
+
+	 STRIDE_4F(spec, spec_stride);
+#endif
+#if DO_FOG
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[3], fog[0][0]);  /* VERTEX_?_SPEC_A */
+	 /*	 ((GLubyte *)p)[3] = fog[0][0] * 255.0;	 */
+	 STRIDE_4F(fog, fog_stride);
+#endif
+	 p++;
+	    
+#if DO_XYZW
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    LE32_OUT( p++, VIEWPORT_Z( coord[0][2] ) );	/* VERTEX_?_Z */
+	 } else {
+#endif
+	    p++;
+#if DO_XYZW
+	 }
+#endif
+
+#if DO_RGBA
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[0], col[0][2]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[1], col[0][1]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[2], col[0][0]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(((GLubyte *)p)[3], col[0][3]);
+	 p++;
+	 STRIDE_4F(col, col_stride);
+#else
+	 p++;
+#endif
+
+#if DO_XYZW
+	 if (mask[i] == 0) {
+	    /* unclipped */
+	    LE32_OUT( p,
+		      (VIEWPORT_X( coord[0][0] ) << 16) |	/* VERTEX_?_X */
+		      (VIEWPORT_Y( coord[0][1] ) & 0xffff) );	/* VERTEX_?_Y */
+	    
+	    if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	       fprintf( stderr, "%s: vert %d: %.2f %.2f %.2f %x\n",
+			__FUNCTION__,
+			i,
+			(LE32_IN( p ) >> 16)/4.0,
+			(LE32_IN( p ) & 0xffff)/4.0,
+			LE32_IN( p - 2 )/65536.0,
+			*(GLuint *)(p - 1) );
+	    }
+	 }
+#endif
+#if DO_TEX1 || DO_TEX0 || DO_XYZW
+	 STRIDE_4F(coord, coord_stride);
+#endif
+	 
+	 assert( p + 1 - (CARD32 *)v == 10 );
+      }
+}
+
+#if DO_XYZW && DO_RGBA
+
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX1 && VB->AttribPtr[_TNL_ATTRIB_TEX0] == 0)
+      VB->AttribPtr[_TNL_ATTRIB_TEX0] = VB->AttribPtr[_TNL_ATTRIB_TEX1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+
+   /* No hardware support for projective texture.  Can fake it for
+    * TEX0 only.
+    */
+   if ((DO_TEX1 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(1)]->size == 4)) {
+      PTEX_FALLBACK();
+      return GL_FALSE;
+   }
+
+   if (DO_TEX0 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(0)]->size == 4) {
+      if (DO_TEX1) {
+	 PTEX_FALLBACK();
+      }
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+static void TAG(interp)( GLcontext *ctx,
+			 GLfloat t,
+			 GLuint edst, GLuint eout, GLuint ein,
+			 GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte *ddverts = GET_VERTEX_STORE();
+   GLuint size = GET_VERTEX_SIZE();
+   const GLfloat *dstclip = VB->ClipPtr->data[edst];
+   GLfloat w;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+
+   CARD32 *dst = (CARD32 *)(ddverts + (edst * size));
+   CARD32 *in  = (CARD32 *)(ddverts + (ein  * size));
+   CARD32 *out = (CARD32 *)(ddverts + (eout * size));
+
+   (void)s;
+
+   w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]);
+
+#if DO_TEX1
+   {
+      GLfloat temp;
+#if DO_PTEX
+      GLfloat wout = VB->NdcPtr->data[eout][3];
+      GLfloat win = VB->NdcPtr->data[ein][3];
+      GLfloat qout = LE32_IN_FLOAT( out + 2 ) / wout;
+      GLfloat qin = LE32_IN_FLOAT( in + 2 ) / win;
+      GLfloat qdst, rqdst;
+
+      INTERP_F( t, qdst, qout, qin );
+      rqdst = 1.0 / qdst;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+      
+      LE32_OUT_FLOAT( dst, w*rqdst );				/* VERTEX_?_SECONDARY_W */
+      dst++; out++; in++;
+#else /* !DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+      GLfloat qout = w / LE32_IN_FLOAT( out + 2 );
+      GLfloat qin = w / LE32_IN_FLOAT( in + 2 );
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+#else /* !MACH64_PREMULT_TEXCOORDS */
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_SECONDARY_T */
+      dst++; out++; in++;
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+      LE32_OUT_FLOAT( dst, w );					/* VERTEX_?_SECONDARY_W */
+      dst++; out++; in++;
+#endif /* !DO_PTEX */
+   }
+#else /* !DO_TEX1 */
+   dst += 3; out += 3; in += 3;
+#endif /* !DO_TEX1 */
+
+#if DO_TEX0
+   {
+      GLfloat temp;
+#if DO_PTEX
+      GLfloat wout = VB->NdcPtr->data[eout][3];
+      GLfloat win = VB->NdcPtr->data[ein][3];
+      GLfloat qout = LE32_IN_FLOAT( out + 2 ) / wout;
+      GLfloat qin = LE32_IN_FLOAT( in + 2 ) / win;
+      GLfloat qdst, rqdst;
+
+      INTERP_F( t, qdst, qout, qin );
+      rqdst = 1.0 / qdst;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp*rqdst );			/* VERTEX_?_T */
+      dst++; out++; in++;
+      
+      LE32_OUT_FLOAT( dst, w*rqdst );				/* VERTEX_?_W */
+      dst++; out++; in++;
+#else /* !DO_PTEX */
+#ifdef MACH64_PREMULT_TEXCOORDS
+      GLfloat qout = w / LE32_IN_FLOAT( out + 2 );
+      GLfloat qin = w / LE32_IN_FLOAT( in + 2 );
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ) * qout, LE32_IN_FLOAT( in ) * qin );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_T */
+      dst++; out++; in++;
+#else /* !MACH64_PREMULT_TEXCOORDS */
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_S */
+      dst++; out++; in++;
+      
+      INTERP_F( t, temp, LE32_IN_FLOAT( out ), LE32_IN_FLOAT( in ) );
+      LE32_OUT_FLOAT( dst, temp );				/* VERTEX_?_T */
+      dst++; out++; in++;
+#endif /* !MACH64_PREMULT_TEXCOORDS */
+      LE32_OUT_FLOAT( dst, w );					/* VERTEX_?_W */
+      dst++; out++; in++;
+#endif /* !DO_PTEX */
+   }
+#else /* !DO_TEX0 */
+   dst += 3; out += 3; in += 3;
+#endif /* !DO_TEX0 */
+   
+#if DO_SPEC
+   INTERP_UB( t, ((GLubyte *)dst)[0], ((GLubyte *)out)[0], ((GLubyte *)in)[0] );	/* VERTEX_?_SPEC_B */
+   INTERP_UB( t, ((GLubyte *)dst)[1], ((GLubyte *)out)[1], ((GLubyte *)in)[1] );	/* VERTEX_?_SPEC_G */
+   INTERP_UB( t, ((GLubyte *)dst)[2], ((GLubyte *)out)[2], ((GLubyte *)in)[2] );	/* VERTEX_?_SPEC_R */
+#endif
+   
+#if DO_FOG
+   INTERP_UB( t, ((GLubyte *)dst)[3], ((GLubyte *)out)[3], ((GLubyte *)in)[3] );	/* VERTEX_?_SPEC_A */
+#endif /* DO_FOG */
+
+   dst++; out++; in++;
+
+   LE32_OUT( dst, VIEWPORT_Z( dstclip[2] * w ) );		/* VERTEX_?_Z */
+   dst++; out++; in++;
+  
+   INTERP_UB( t, ((GLubyte *)dst)[0], ((GLubyte *)out)[0], ((GLubyte *)in)[0] );	/* VERTEX_?_B */
+   INTERP_UB( t, ((GLubyte *)dst)[1], ((GLubyte *)out)[1], ((GLubyte *)in)[1] );	/* VERTEX_?_G */
+   INTERP_UB( t, ((GLubyte *)dst)[2], ((GLubyte *)out)[2], ((GLubyte *)in)[2] );	/* VERTEX_?_R */
+   INTERP_UB( t, ((GLubyte *)dst)[3], ((GLubyte *)out)[3], ((GLubyte *)in)[3] );	/* VERTEX_?_A */
+   dst++; /*out++; in++;*/
+
+   LE32_OUT( dst,
+	     (VIEWPORT_X( dstclip[0] * w ) << 16) |		/* VERTEX_?_X */
+	     (VIEWPORT_Y( dstclip[1] * w ) & 0xffff) );		/* VERTEX_?_Y */
+
+   assert( dst + 1 - (CARD32 *)(ddverts + (edst * size)) == 10 );
+   assert( in  + 2 - (CARD32 *)(ddverts + (ein  * size)) == 10 );
+   assert( out + 2 - (CARD32 *)(ddverts + (eout * size)) == 10 );
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+      fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %x\n",
+	       __FUNCTION__,
+	       (GLshort)(LE32_IN( dst ) >> 16)/4.0,
+	       (GLshort)(LE32_IN( dst ) & 0xffff)/4.0,
+	       LE32_IN( dst - 2 )/65536.0,
+	       *(GLuint *)(dst - 1) );
+   }
+}
+
+#endif /* DO_RGBA && DO_XYZW */
+
+
+static void TAG(copy_pv)( GLcontext *ctx, GLuint edst, GLuint esrc )
+{
+#if DO_SPEC || DO_FOG || DO_RGBA
+   LOCALVARS   
+   GLubyte *verts = GET_VERTEX_STORE();
+   GLuint size = GET_VERTEX_SIZE();
+   GLuint *dst = (GLuint *)(verts + (edst * size));
+   GLuint *src = (GLuint *)(verts + (esrc * size));
+#endif
+
+#if DO_SPEC || DO_FOG
+   dst[6] = src[6];			/* VERTEX_?_SPEC_ARGB */
+#endif
+
+#if DO_RGBA
+   dst[8] = src[8];			/* VERTEX_?_ARGB */
+#endif
+}
+
+static void TAG(init)( void )
+{
+   setup_tab[IND].emit = TAG(emit);
+
+#if DO_XYZW && DO_RGBA
+   setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes);
+   setup_tab[IND].interp = TAG(interp);
+#endif
+
+   setup_tab[IND].copy_pv = TAG(copy_pv);
+
+#if DO_TEX1
+   setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 10;
+#elif DO_TEX0
+   setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 7;
+#elif DO_SPEC || DO_FOG
+   setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 4;
+#else
+   setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT;
+   setup_tab[IND].vertex_size = 3;
+#endif
+
+}
+
+
+#undef IND
+#undef TAG
diff --git a/src/mesa/drivers/dri/mach64/mach64_reg.h b/src/mesa/drivers/dri/mach64/mach64_reg.h
new file mode 100644
index 0000000000..cb944e1023
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_reg.h
@@ -0,0 +1,406 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_REG_H__
+#define __MACH64_REG_H__
+
+/*
+ * Not sure how this compares with the G200, but the Rage Pro has two
+ * banks of registers, with bank 0 at (aperture base + memmap offset - 1KB)
+ * and bank 1 at (aperture base + memmap offset - 2KB).  But, to send them
+ * via DMA, we need to encode them as memory map select rather than physical
+ * offsets.
+ */
+#define DWMREG0		0x0400
+#define DWMREG0_END	0x07ff
+#define DWMREG1		0x0000
+#define DWMREG1_END	0x03ff
+
+#define ISREG0(r)	( ( (r) >= DWMREG0 ) && ( (r) <= DWMREG0_END ) )
+#define ADRINDEX0(r)	( ((r) - DWMREG0) >> 2 )
+#define ADRINDEX1(r)	( ( ((r) - DWMREG1) >> 2 ) | 0x0100 )
+#define ADRINDEX(r)	( ISREG0(r) ? ADRINDEX0(r) : ADRINDEX1(r) )
+
+#define MMREG0		0x0000
+#define MMREG0_END	0x00ff
+
+#define ISMMREG0(r)	( ( (r) >= MMREG0 ) && ( (r) <= MMREG0_END ) )
+#define MMSELECT0(r)	( ((r)<<2) + DWMREG0 )
+#define MMSELECT1(r)	( ( (((r) & 0xff)<<2) + DWMREG1 ) )
+#define MMSELECT(r)	( ISMMREG0(r) ? MMSELECT0(r) : MMSELECT1(r) )
+
+/* FIXME: If register reads are necessary, we should account for endianess here */
+#define MACH64_BASE(reg)	((CARD32)(mmesa->mach64Screen->mmio.map))
+#define MACH64_ADDR(reg)	(MACH64_BASE(reg) + reg)
+
+#define MACH64_DEREF(reg)	*(__volatile__ CARD32 *)MACH64_ADDR(reg)
+#define MACH64_READ(reg)	MACH64_DEREF(reg)
+
+
+/* ================================================================
+ * Registers
+ */
+
+#define MACH64_ALPHA_TST_CNTL			0x0550
+#	define MACH64_ALPHA_TEST_EN			(1 << 0)
+#	define MACH64_ALPHA_TEST_MASK			(7 << 4)
+#	define MACH64_ALPHA_TEST_NEVER			(0 << 4)
+#	define MACH64_ALPHA_TEST_LESS			(1 << 4)
+#	define MACH64_ALPHA_TEST_LEQUAL			(2 << 4)
+#	define MACH64_ALPHA_TEST_EQUAL			(3 << 4)
+#	define MACH64_ALPHA_TEST_GEQUAL			(4 << 4)
+#	define MACH64_ALPHA_TEST_GREATER		(5 << 4)
+#	define MACH64_ALPHA_TEST_NOTEQUAL		(6 << 4)
+#	define MACH64_ALPHA_TEST_ALWAYS			(7 << 4)
+#	define MACH64_ALPHA_MOD_MSB			(1 << 7)
+#	define MACH64_ALPHA_DST_MASK			(7 << 8)
+#	define MACH64_ALPHA_DST_ZERO			(0 << 8)
+#	define MACH64_ALPHA_DST_ONE			(1 << 8)
+#	define MACH64_ALPHA_DST_SRCALPHA		(4 << 8)
+#	define MACH64_ALPHA_DST_INVSRCALPHA		(5 << 8)
+#	define MACH64_ALPHA_DST_DSTALPHA		(6 << 8)
+#	define MACH64_ALPHA_DST_INVDSTALPHA		(7 << 8)
+#	define MACH64_ALPHA_TST_SRC_TEXEL		(0 << 12)
+#	define MACH64_ALPHA_TST_SRC_SRCALPHA		(1 << 12)
+#	define MACH64_REF_ALPHA_MASK			(0xff << 16)
+#	define MACH64_REF_ALPHA_SHIFT			16
+#	define MACH64_COMPOSITE_SHADOW			(1 << 30)
+#	define MACH64_SPECULAR_LIGHT_EN			(1 << 31)
+
+#define MACH64_BUS_CNTL				0x04a0
+#	define MACH64_BUS_MSTR_RESET			(1 << 1)
+#	define MACH64_BUS_FLUSH_BUF			(1 << 2)
+#	define MACH64_BUS_MASTER_DIS			(1 << 6)
+#	define MACH64_BUS_EXT_REG_EN			(1 << 27)
+
+#define MACH64_COMPOSITE_SHADOW_ID		0x0798
+
+#define MACH64_CLR_CMP_CLR			0x0700
+#define MACH64_CLR_CMP_CNTL			0x0708
+#define MACH64_CLR_CMP_MASK			0x0704
+
+#define MACH64_DP_BKGD_CLR			0x06c0
+#define MACH64_DP_FOG_CLR			0x06c4
+#define MACH64_DP_FGRD_BKGD_CLR			0x06e0
+#define MACH64_DP_FRGD_CLR			0x06c4
+#define MACH64_DP_FGRD_CLR_MIX			0x06dc
+
+#define MACH64_DP_MIX				0x06d4
+#	define BKGD_MIX_NOT_D				(0 << 0)
+#	define BKGD_MIX_ZERO				(1 << 0)
+#	define BKGD_MIX_ONE				(2 << 0)
+#	define MACH64_BKGD_MIX_D			(3 << 0)
+#	define BKGD_MIX_NOT_S				(4 << 0)
+#	define BKGD_MIX_D_XOR_S				(5 << 0)
+#	define BKGD_MIX_NOT_D_XOR_S			(6 << 0)
+#	define MACH64_BKGD_MIX_S			(7 << 0)
+#	define BKGD_MIX_NOT_D_OR_NOT_S			(8 << 0)
+#	define BKGD_MIX_D_OR_NOT_S			(9 << 0)
+#	define BKGD_MIX_NOT_D_OR_S			(10 << 0)
+#	define BKGD_MIX_D_OR_S				(11 << 0)
+#	define BKGD_MIX_D_AND_S				(12 << 0)
+#	define BKGD_MIX_NOT_D_AND_S			(13 << 0)
+#	define BKGD_MIX_D_AND_NOT_S			(14 << 0)
+#	define BKGD_MIX_NOT_D_AND_NOT_S			(15 << 0)
+#	define BKGD_MIX_D_PLUS_S_DIV2			(23 << 0)
+#	define FRGD_MIX_NOT_D				(0 << 16)
+#	define FRGD_MIX_ZERO				(1 << 16)
+#	define FRGD_MIX_ONE				(2 << 16)
+#	define FRGD_MIX_D				(3 << 16)
+#	define FRGD_MIX_NOT_S				(4 << 16)
+#	define FRGD_MIX_D_XOR_S				(5 << 16)
+#	define FRGD_MIX_NOT_D_XOR_S			(6 << 16)
+#	define MACH64_FRGD_MIX_S			(7 << 16)
+#	define FRGD_MIX_NOT_D_OR_NOT_S			(8 << 16)
+#	define FRGD_MIX_D_OR_NOT_S			(9 << 16)
+#	define FRGD_MIX_NOT_D_OR_S			(10 << 16)
+#	define FRGD_MIX_D_OR_S				(11 << 16)
+#	define FRGD_MIX_D_AND_S				(12 << 16)
+#	define FRGD_MIX_NOT_D_AND_S			(13 << 16)
+#	define FRGD_MIX_D_AND_NOT_S			(14 << 16)
+#	define FRGD_MIX_NOT_D_AND_NOT_S			(15 << 16)
+#	define FRGD_MIX_D_PLUS_S_DIV2			(23 << 16)
+
+#define MACH64_DP_PIX_WIDTH			0x06d0
+#	define MACH64_COMPOSITE_PIX_WIDTH_MASK		(0xf << 4)
+#	define MACH64_HOST_TRIPLE_ENABLE		(1 << 13)
+#	define MACH64_BYTE_ORDER_MSB_TO_LSB		(0 << 24)
+#	define MACH64_BYTE_ORDER_LSB_TO_MSB		(1 << 24)
+#	define MACH64_SCALE_PIX_WIDTH_MASK		(0xf << 28)
+
+#define MACH64_DP_SRC				0x06d8
+#	define MACH64_BKGD_SRC_BKGD_CLR			(0 << 0)
+#	define MACH64_BKGD_SRC_FRGD_CLR			(1 << 0)
+#	define MACH64_BKGD_SRC_HOST			(2 << 0)
+#	define MACH64_BKGD_SRC_BLIT			(3 << 0)
+#	define MACH64_BKGD_SRC_PATTERN			(4 << 0)
+#	define MACH64_BKGD_SRC_3D			(5 << 0)
+#	define MACH64_FRGD_SRC_BKGD_CLR			(0 << 8)
+#	define MACH64_FRGD_SRC_FRGD_CLR			(1 << 8)
+#	define MACH64_FRGD_SRC_HOST			(2 << 8)
+#	define MACH64_FRGD_SRC_BLIT			(3 << 8)
+#	define MACH64_FRGD_SRC_PATTERN			(4 << 8)
+#	define MACH64_FRGD_SRC_3D			(5 << 8)
+#	define MACH64_MONO_SRC_ONE			(0 << 16)
+#	define MACH64_MONO_SRC_PATTERN			(1 << 16)
+#	define MACH64_MONO_SRC_HOST			(2 << 16)
+#	define MACH64_MONO_SRC_BLIT			(3 << 16)
+
+#define MACH64_DP_WRITE_MASK			0x06c8
+
+#define MACH64_DST_CNTL				0x0530
+#	define MACH64_DST_X_RIGHT_TO_LEFT		(0 << 0)
+#	define MACH64_DST_X_LEFT_TO_RIGHT		(1 << 0)
+#	define MACH64_DST_Y_BOTTOM_TO_TOP		(0 << 1)
+#	define MACH64_DST_Y_TOP_TO_BOTTOM		(1 << 1)
+#	define MACH64_DST_X_MAJOR			(0 << 2)
+#	define MACH64_DST_Y_MAJOR			(1 << 2)
+#	define MACH64_DST_X_TILE			(1 << 3)
+#	define MACH64_DST_Y_TILE			(1 << 4)
+#	define MACH64_DST_LAST_PEL			(1 << 5)
+#	define MACH64_DST_POLYGON_ENABLE		(1 << 6)
+#	define MACH64_DST_24_ROTATION_ENABLE		(1 << 7)
+
+#define MACH64_DST_HEIGHT_WIDTH			0x0518
+#define MACH64_DST_OFF_PITCH			0x0500
+#define MACH64_DST_WIDTH_HEIGHT			0x06ec
+#define MACH64_DST_X_Y				0x06e8
+#define MACH64_DST_Y_X				0x050c
+
+#define MACH64_FIFO_STAT			0x0710
+#	define MACH64_FIFO_SLOT_MASK			0x0000ffff
+#	define MACH64_FIFO_ERR				(1 << 31)
+
+#define MACH64_GEN_TEST_CNTL			0x04d0
+#define MACH64_GUI_CMDFIFO_DEBUG		0x0170
+#define MACH64_GUI_CMDFIFO_DATA			0x0174
+#define MACH64_GUI_CNTL				0x0178
+#define MACH64_GUI_STAT				0x0738
+#	define MACH64_GUI_ACTIVE			(1 << 0)
+#define MACH64_GUI_TRAJ_CNTL			0x0730
+
+#define MACH64_HOST_CNTL			0x0640
+#define MACH64_HOST_DATA0			0x0600
+#define MACH64_HW_DEBUG				0x047c
+
+#define MACH64_ONE_OVER_AREA			0x029c
+#define MACH64_ONE_OVER_AREA_UC			0x0300
+
+#define MACH64_PAT_REG0				0x0680
+#define MACH64_PAT_REG1				0x0684
+
+#define MACH64_SC_LEFT_RIGHT			0x06a8
+#define MACH64_SC_TOP_BOTTOM			0x06b4
+#define MACH64_SCALE_3D_CNTL			0x05fc
+#	define MACH64_SCALE_PIX_EXPAND_ZERO_EXTEND	(0 << 0)
+#	define MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE	(1 << 0)
+#	define MACH64_SCALE_DITHER_ERROR_DIFFUSE	(0 << 1)
+#	define MACH64_SCALE_DITHER_2D_TABLE		(1 << 1)
+#	define MACH64_DITHER_EN				(1 << 2)
+#	define MACH64_DITHER_INIT_CURRENT		(O << 3)
+#	define MACH64_DITHER_INIT_RESET			(1 << 3)
+#	define MACH64_ROUND_EN				(1 << 4)
+#	define MACH64_TEX_CACHE_DIS			(1 << 5)
+#	define MACH64_SCALE_3D_FCN_MASK			(3 << 6)
+#	define MACH64_SCALE_3D_FCN_NOP			(0 << 6)
+#	define MACH64_SCALE_3D_FCN_SCALE		(1 << 6)
+#	define MACH64_SCALE_3D_FCN_TEXTURE		(2 << 6)
+#	define MACH64_SCALE_3D_FCN_SHADE		(3 << 6)
+#	define MACH64_TEXTURE_DISABLE			(1 << 6)
+#	define MACH64_EDGE_ANTI_ALIAS			(1 << 8)
+#	define MACH64_TEX_CACHE_SPLIT			(1 << 9)
+#	define MACH64_APPLE_YUV_MODE			(1 << 10)
+#	define MACH64_ALPHA_FOG_EN_MASK			(3 << 11)
+#	define MACH64_ALPHA_FOG_DIS			(0 << 11)
+#	define MACH64_ALPHA_FOG_EN_ALPHA		(1 << 11)
+#	define MACH64_ALPHA_FOG_EN_FOG			(2 << 11)
+#	define MACH64_ALPHA_BLEND_SAT			(1 << 13)
+#	define MACH64_RED_DITHER_MAX			(1 << 14)
+#	define MACH64_SIGNED_DST_CLAMP			(1 << 15)
+#	define MACH64_ALPHA_BLEND_SRC_MASK		(7 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_ZERO		(0 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_ONE		(1 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_DSTCOLOR		(2 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVDSTCOLOR	(3 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_SRCALPHA		(4 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVSRCALPHA	(5 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_DSTALPHA		(6 << 16)
+#	define MACH64_ALPHA_BLEND_SRC_INVDSTALPHA	(7 << 16)
+#	define MACH64_ALPHA_BLEND_DST_MASK		(7 << 19)
+#	define MACH64_ALPHA_BLEND_DST_ZERO		(0 << 19)
+#	define MACH64_ALPHA_BLEND_DST_ONE		(1 << 19)
+#	define MACH64_ALPHA_BLEND_DST_SRCCOLOR		(2 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVSRCCOLOR	(3 << 19)
+#	define MACH64_ALPHA_BLEND_DST_SRCALPHA		(4 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVSRCALPHA	(5 << 19)
+#	define MACH64_ALPHA_BLEND_DST_DSTALPHA		(6 << 19)
+#	define MACH64_ALPHA_BLEND_DST_INVDSTALPHA	(7 << 19)
+#	define MACH64_TEX_LIGHT_FCN_MASK		(3 << 22)
+#	define MACH64_TEX_LIGHT_FCN_REPLACE		(0 << 22)
+#	define MACH64_TEX_LIGHT_FCN_MODULATE		(1 << 22)
+#	define MACH64_TEX_LIGHT_FCN_ALPHA_DECAL		(2 << 22)
+#	define MACH64_MIP_MAP_DISABLE			(1 << 24)
+#	define MACH64_BILINEAR_TEX_EN			(1 << 25)
+#	define MACH64_TEX_BLEND_FCN_MASK		(3 << 26)
+#	define MACH64_TEX_BLEND_FCN_NEAREST		(0 << 26)
+#	define MACH64_TEX_BLEND_FCN_LINEAR		(2 << 26)
+#	define MACH64_TEX_BLEND_FCN_TRILINEAR		(3 << 26)
+#	define MACH64_TEX_AMASK_AEN			(1 << 28)
+#	define MACH64_TEX_AMASK_BLEND_EDGE		(1 << 29)
+#	define MACH64_TEX_MAP_AEN			(1 << 30)
+#	define MACH64_SRC_3D_HOST_FIFO			(1 << 31)
+#define MACH64_SCRATCH_REG0			0x0480
+#define MACH64_SCRATCH_REG1			0x0484
+#define MACH64_SECONDARY_TEX_OFF		0x0778
+#define MACH64_SETUP_CNTL			0x0304
+#	define MACH64_DONT_START_TRI			(1 << 0)
+#	define MACH64_DONT_START_ANY			(1 << 2)
+#	define MACH64_FLAT_SHADE_MASK			(3 << 3)
+#	define MACH64_FLAT_SHADE_OFF			(0 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_1		(1 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_2		(2 << 3)
+#	define MACH64_FLAT_SHADE_VERTEX_3		(3 << 3)
+#	define MACH64_SOLID_MODE_OFF			(0 << 5)
+#	define MACH64_SOLID_MODE_ON			(1 << 5)
+#	define MACH64_LOG_MAX_INC_ADJ			(1 << 6)
+#	define MACH64_SET_UP_CONTINUE			(1 << 31)
+#define MACH64_SRC_CNTL				0x05b4
+#define MACH64_SRC_HEIGHT1			0x0594
+#define MACH64_SRC_HEIGHT2			0x05ac
+#define MACH64_SRC_HEIGHT1_WIDTH1		0x0598
+#define MACH64_SRC_HEIGHT2_WIDTH2		0x05b0
+#define MACH64_SRC_OFF_PITCH			0x0580
+#define MACH64_SRC_WIDTH1			0x0590
+#define MACH64_SRC_Y_X				0x058c
+
+#define MACH64_TEX_0_OFF			0x05c0
+#define MACH64_TEX_CNTL				0x0774
+#	define MACH64_LOD_BIAS_SHIFT			0
+#	define MACH64_LOD_BIAS_MASK			(0xf << 0)
+#	define MACH64_COMP_FACTOR_SHIFT			4
+#	define MACH64_COMP_FACTOR_MASK			(0xf << 4)
+#	define MACH64_TEXTURE_COMPOSITE			(1 << 8)
+#	define MACH64_COMP_COMBINE_BLEND		(0 << 9)
+#	define MACH64_COMP_COMBINE_MODULATE		(1 << 9)
+#	define MACH64_COMP_BLEND_NEAREST		(0 << 11)
+#	define MACH64_COMP_BLEND_BILINEAR		(1 << 11)
+#	define MACH64_COMP_FILTER_NEAREST		(0 << 12)
+#	define MACH64_COMP_FILTER_BILINEAR		(1 << 12)
+#	define MACH64_COMP_ALPHA			(1 << 13)
+#	define MACH64_TEXTURE_TILING			(1 << 14)
+#	define MACH64_COMPOSITE_TEX_TILING		(1 << 15)
+#	define MACH64_TEX_COLLISION_DISABLE		(1 << 16)
+#	define MACH64_TEXTURE_CLAMP_S			(1 << 17)
+#	define MACH64_TEXTURE_CLAMP_T			(1 << 18)
+#	define MACH64_TEX_ST_MULT_W			(0 << 19)
+#	define MACH64_TEX_ST_DIRECT			(1 << 19)
+#	define MACH64_TEX_SRC_LOCAL			(0 << 20)
+#	define MACH64_TEX_SRC_AGP			(1 << 20)
+#	define MACH64_TEX_UNCOMPRESSED			(0 << 21)
+#	define MACH64_TEX_VQ_COMPRESSED			(1 << 21)
+#	define MACH64_COMP_TEX_UNCOMPRESSED		(0 << 22)
+#	define MACH64_COMP_TEX_VQ_COMPRESSED		(1 << 22)
+#	define MACH64_TEX_CACHE_FLUSH			(1 << 23)
+#	define MACH64_SEC_TEX_CLAMP_S			(1 << 24)
+#	define MACH64_SEC_TEX_CLAMP_T			(1 << 25)
+#	define MACH64_TEX_WRAP_S			(1 << 28)
+#	define MACH64_TEX_WRAP_T			(1 << 29)
+#	define MACH64_TEX_CACHE_SIZE_4K			(1 << 30)
+#	define MACH64_TEX_CACHE_SIZE_2K			(1 << 30)
+#	define MACH64_SECONDARY_STW			(1 << 31)
+#define MACH64_TEX_PALETTE			0x077c
+#define MACH64_TEX_PALETTE_INDEX		0x0740
+#define MACH64_TEX_SIZE_PITCH			0x0770
+
+#define MACH64_VERTEX_1_ARGB			0x0254
+#define MACH64_VERTEX_1_S			0x0240
+#define MACH64_VERTEX_1_SECONDARY_S		0x0328
+#define MACH64_VERTEX_1_SECONDARY_T		0x032c
+#define MACH64_VERTEX_1_SECONDARY_W		0x0330
+#define MACH64_VERTEX_1_SPEC_ARGB		0x024c
+#define MACH64_VERTEX_1_T			0x0244
+#define MACH64_VERTEX_1_W			0x0248
+#define MACH64_VERTEX_1_X_Y			0x0258
+#define MACH64_VERTEX_1_Z			0x0250
+#define MACH64_VERTEX_2_ARGB			0x0274
+#define MACH64_VERTEX_2_S			0x0260
+#define MACH64_VERTEX_2_SECONDARY_S		0x0334
+#define MACH64_VERTEX_2_SECONDARY_T		0x0338
+#define MACH64_VERTEX_2_SECONDARY_W		0x033c
+#define MACH64_VERTEX_2_SPEC_ARGB		0x026c
+#define MACH64_VERTEX_2_T			0x0264
+#define MACH64_VERTEX_2_W			0x0268
+#define MACH64_VERTEX_2_X_Y			0x0278
+#define MACH64_VERTEX_2_Z			0x0270
+#define MACH64_VERTEX_3_ARGB			0x0294
+#define MACH64_VERTEX_3_S			0x0280
+#define MACH64_VERTEX_3_SECONDARY_S		0x02a0
+#define MACH64_VERTEX_3_SECONDARY_T		0x02a4
+#define MACH64_VERTEX_3_SECONDARY_W		0x02a8
+#define MACH64_VERTEX_3_SPEC_ARGB		0x028c
+#define MACH64_VERTEX_3_T			0x0284
+#define MACH64_VERTEX_3_W			0x0288
+#define MACH64_VERTEX_3_X_Y			0x0298
+#define MACH64_VERTEX_3_Z			0x0290
+
+#define MACH64_Z_CNTL				0x054c
+#	define MACH64_Z_EN				(1 << 0)
+#	define MACH64_Z_SRC_2D				(1 << 1)
+#	define MACH64_Z_TEST_MASK			(7 << 4)
+#	define MACH64_Z_TEST_NEVER			(0 << 4)
+#	define MACH64_Z_TEST_LESS			(1 << 4)
+#	define MACH64_Z_TEST_LEQUAL			(2 << 4)
+#	define MACH64_Z_TEST_EQUAL			(3 << 4)
+#	define MACH64_Z_TEST_GEQUAL			(4 << 4)
+#	define MACH64_Z_TEST_GREATER			(5 << 4)
+#	define MACH64_Z_TEST_NOTEQUAL			(6 << 4)
+#	define MACH64_Z_TEST_ALWAYS			(7 << 4)
+#	define MACH64_Z_MASK_EN				(1 << 8)
+#define MACH64_Z_OFF_PITCH			0x0548
+
+
+
+#define MACH64_DATATYPE_CI8				2
+#define MACH64_DATATYPE_ARGB1555			3
+#define MACH64_DATATYPE_RGB565				4
+#define MACH64_DATATYPE_ARGB8888			6
+#define MACH64_DATATYPE_RGB332				7
+#define MACH64_DATATYPE_Y8				8
+#define MACH64_DATATYPE_RGB8				9
+#define MACH64_DATATYPE_VYUY422				11
+#define MACH64_DATATYPE_YVYU422				12
+#define MACH64_DATATYPE_AYUV444				14
+#define MACH64_DATATYPE_ARGB4444			15
+
+#define MACH64_LAST_FRAME_REG			MACH64_PAT_REG0
+#define MACH64_LAST_DISPATCH_REG		MACH64_PAT_REG1
+
+#endif /* __MACH64_REG_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_screen.c b/src/mesa/drivers/dri/mach64/mach64_screen.c
new file mode 100644
index 0000000000..4bd6dee6c0
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_screen.c
@@ -0,0 +1,463 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_span.h"
+
+#include "main/context.h"
+#include "main/imports.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#include "GL/internal/dri_interface.h"
+
+/* Mach64 configuration
+ */
+#include "xmlpool.h"
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+#if ENABLE_PERF_BOXES
+        DRI_CONF_PERFORMANCE_BOXES(false)
+#endif
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+#if ENABLE_PERF_BOXES
+static const GLuint __driNConfigOptions = 3;
+#else
+static const GLuint __driNConfigOptions = 2;
+#endif
+
+static const __DRIconfig **
+mach64FillInModes( __DRIscreen *psp,
+		   unsigned pixel_bits, unsigned depth_bits,
+		   unsigned stencil_bits, GLboolean have_back_buffer )
+{
+    __DRIconfig **configs;
+    __GLcontextModes * m;
+    GLenum fb_format;
+    GLenum fb_type;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    unsigned i;
+
+    /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+     * enough to add support.  Basically, if a context is created with an
+     * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+     * will never be used.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */
+    };
+
+    uint8_t depth_bits_array[2];
+    uint8_t stencil_bits_array[2];
+    uint8_t msaa_samples_array[1];
+
+    depth_bits_array[0] = depth_bits;
+    depth_bits_array[1] = depth_bits;
+    
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.  It will be a sw fallback, but some apps won't
+     * care about that.
+     */
+    stencil_bits_array[0] = 0;
+    stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    msaa_samples_array[0] = 0;
+
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+
+    if (pixel_bits == 16) {
+       fb_format = GL_RGB;
+       fb_type = GL_UNSIGNED_SHORT_5_6_5;
+    }
+    else {
+       fb_format = GL_BGRA;
+       fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+    }
+
+    configs = driCreateConfigs(fb_format, fb_type,
+			       depth_bits_array, stencil_bits_array,
+			       depth_buffer_factor, back_buffer_modes,
+			       back_buffer_factor,
+                               msaa_samples_array, 1, GL_TRUE);
+    if (configs == NULL) {
+       fprintf(stderr, "[%s:%u] Error creating FBConfig!\n",
+	       __func__, __LINE__);
+       return NULL;
+    }
+
+    /* Mark the visual as slow if there are "fake" stencil bits.
+     */
+    for (i = 0; configs[i]; i++) {
+       m = &configs[i]->modes;
+       if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+	  m->visualRating = GLX_SLOW_CONFIG;
+       }
+    }
+
+    return (const __DRIconfig **) configs;
+}
+
+
+/* Create the device specific screen private data struct.
+ */
+static mach64ScreenRec *
+mach64CreateScreen( __DRIscreen *sPriv )
+{
+   mach64ScreenPtr mach64Screen;
+   ATIDRIPtr serverInfo = (ATIDRIPtr)sPriv->pDevPriv;
+   int i;
+
+   if (sPriv->devPrivSize != sizeof(ATIDRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(ATIDRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_DRI ) 
+      fprintf( stderr, "%s\n", __FUNCTION__ );
+
+   /* Allocate the private area */
+   mach64Screen = (mach64ScreenPtr) CALLOC( sizeof(*mach64Screen) );
+   if ( !mach64Screen ) return NULL;
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&mach64Screen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   mach64Screen->IsPCI = serverInfo->IsPCI;
+
+   {
+      drm_mach64_getparam_t gp;
+      int ret;
+
+      gp.param = MACH64_PARAM_IRQ_NR;
+      gp.value = (void *) &mach64Screen->irq;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_MACH64_GETPARAM,
+				    &gp, sizeof(gp));
+      if (ret) {
+         fprintf(stderr, "DRM_MACH64_GETPARAM (MACH64_PARAM_IRQ_NR): %d\n", ret);
+         FREE( mach64Screen );
+         return NULL;
+      }
+   }
+
+   mach64Screen->mmio.handle = serverInfo->regs;
+   mach64Screen->mmio.size   = serverInfo->regsSize;
+   if ( drmMap( sPriv->fd,
+		mach64Screen->mmio.handle,
+		mach64Screen->mmio.size,
+		(drmAddressPtr)&mach64Screen->mmio.map ) != 0 ) {
+      FREE( mach64Screen );
+      return NULL;
+   }
+
+   mach64Screen->buffers = drmMapBufs( sPriv->fd );
+   if ( !mach64Screen->buffers ) {
+      drmUnmap( (drmAddress)mach64Screen->mmio.map,
+		mach64Screen->mmio.size );
+      FREE( mach64Screen );
+      return NULL;
+   }
+
+   if ( !mach64Screen->IsPCI ) {
+      mach64Screen->agpTextures.handle = serverInfo->agp;
+      mach64Screen->agpTextures.size   = serverInfo->agpSize;
+      if ( drmMap( sPriv->fd,
+		   mach64Screen->agpTextures.handle,
+		   mach64Screen->agpTextures.size,
+		   (drmAddressPtr)&mach64Screen->agpTextures.map ) ) {
+	 drmUnmapBufs( mach64Screen->buffers );
+	 drmUnmap( (drmAddress)mach64Screen->mmio.map, mach64Screen->mmio.size );
+	 FREE( mach64Screen );
+	 return NULL;
+      }
+   }
+
+   mach64Screen->AGPMode	= serverInfo->AGPMode;
+
+   mach64Screen->chipset	= serverInfo->chipset;
+   mach64Screen->width		= serverInfo->width;
+   mach64Screen->height		= serverInfo->height;
+   mach64Screen->mem		= serverInfo->mem;
+   mach64Screen->cpp		= serverInfo->cpp;
+
+   mach64Screen->frontOffset	= serverInfo->frontOffset;
+   mach64Screen->frontPitch	= serverInfo->frontPitch;
+   mach64Screen->backOffset	= serverInfo->backOffset;
+   mach64Screen->backPitch	= serverInfo->backPitch;
+   mach64Screen->depthOffset	= serverInfo->depthOffset;
+   mach64Screen->depthPitch	= serverInfo->depthPitch;
+
+   mach64Screen->texOffset[MACH64_CARD_HEAP] = serverInfo->textureOffset;
+   mach64Screen->texSize[MACH64_CARD_HEAP] = serverInfo->textureSize;
+   mach64Screen->logTexGranularity[MACH64_CARD_HEAP] =
+      serverInfo->logTextureGranularity;
+
+   if ( mach64Screen->IsPCI ) {
+      mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS - 1;
+      mach64Screen->firstTexHeap = MACH64_CARD_HEAP;
+      mach64Screen->texOffset[MACH64_AGP_HEAP] = 0;
+      mach64Screen->texSize[MACH64_AGP_HEAP] = 0;
+      mach64Screen->logTexGranularity[MACH64_AGP_HEAP] = 0;
+   } else {
+      if (serverInfo->textureSize > 0) {
+	 mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS;
+	 mach64Screen->firstTexHeap = MACH64_CARD_HEAP;
+      } else {
+	 mach64Screen->numTexHeaps = MACH64_NR_TEX_HEAPS - 1;
+	 mach64Screen->firstTexHeap = MACH64_AGP_HEAP;
+      }
+      mach64Screen->texOffset[MACH64_AGP_HEAP] = serverInfo->agpTextureOffset;
+      mach64Screen->texSize[MACH64_AGP_HEAP] = serverInfo->agpSize;
+      mach64Screen->logTexGranularity[MACH64_AGP_HEAP] = serverInfo->logAgpTextureGranularity;
+   }
+
+   mach64Screen->driScreen = sPriv;
+
+   i = 0;
+   mach64Screen->extensions[i++] = &driFrameTrackingExtension.base;
+   if ( mach64Screen->irq != 0 ) {
+      mach64Screen->extensions[i++] = &driSwapControlExtension.base;
+      mach64Screen->extensions[i++] = &driMediaStreamCounterExtension.base;
+   }
+   mach64Screen->extensions[i++] = NULL;
+   sPriv->extensions = mach64Screen->extensions;
+
+   return mach64Screen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+static void
+mach64DestroyScreen( __DRIscreen *driScreen )
+{
+   mach64ScreenRec *mach64Screen = (mach64ScreenRec *) driScreen->private;
+
+   if ( !mach64Screen )
+      return;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_DRI ) 
+      fprintf( stderr, "%s\n", __FUNCTION__ );
+
+   if ( !mach64Screen->IsPCI ) {
+      drmUnmap( (drmAddress)mach64Screen->agpTextures.map,
+		mach64Screen->agpTextures.size );
+   }
+
+   drmUnmapBufs( mach64Screen->buffers );
+   drmUnmap( (drmAddress)mach64Screen->mmio.map, mach64Screen->mmio.size );
+
+   FREE( mach64Screen );
+   driScreen->private = NULL;
+}
+
+
+/* Create and initialize the Mesa and driver specific pixmap buffer
+ * data.
+ */
+static GLboolean
+mach64CreateBuffer( __DRIscreen *driScrnPriv,
+		    __DRIdrawable *driDrawPriv,
+		    const __GLcontextModes *mesaVis,
+		    GLboolean isPixmap )
+{
+   mach64ScreenPtr screen = (mach64ScreenPtr) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->frontOffset, screen->frontPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->backOffset, screen->backPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z16,
+                                 NULL, screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         /* XXX I don't think 24-bit Z is supported - so this isn't used */
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z24_S8,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         mach64SetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   mesaVis->stencilBits > 0,
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+mach64DestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+
+/* Copy the back color buffer to the front color buffer */
+static void
+mach64SwapBuffers(__DRIdrawable *dPriv)
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      mach64ContextPtr mmesa;
+      GLcontext *ctx;
+      mmesa = (mach64ContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = mmesa->glCtx;
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+         mach64CopyBuffer( dPriv );
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+   }
+}
+
+
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+mach64InitDriver( __DRIscreen *driScreen )
+{
+   driScreen->private = (void *) mach64CreateScreen( driScreen );
+
+   if ( !driScreen->private ) {
+      mach64DestroyScreen( driScreen );
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **
+mach64InitScreen(__DRIscreen *psp)
+{
+   static const __DRIversion ddx_expected = { 6, 4, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 2, 0, 0 };
+   ATIDRIPtr dri_priv = (ATIDRIPtr) psp->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions2( "Mach64",
+				      &psp->dri_version, & dri_expected,
+				      &psp->ddx_version, & ddx_expected,
+				      &psp->drm_version, & drm_expected ) ) {
+      return NULL;
+   }
+   
+   if (!mach64InitDriver(psp))
+      return NULL;
+
+   return  mach64FillInModes( psp, dri_priv->cpp * 8, 16, 0, 1);
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = mach64InitScreen,
+   .DestroyScreen   = mach64DestroyScreen,
+   .CreateContext   = mach64CreateContext,
+   .DestroyContext  = mach64DestroyContext,
+   .CreateBuffer    = mach64CreateBuffer,
+   .DestroyBuffer   = mach64DestroyBuffer,
+   .SwapBuffers     = mach64SwapBuffers,
+   .MakeCurrent     = mach64MakeCurrent,
+   .UnbindContext   = mach64UnbindContext,
+   .GetSwapInfo     = NULL,
+   .GetDrawableMSC  = driDrawableGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/mach64/mach64_screen.h b/src/mesa/drivers/dri/mach64/mach64_screen.h
new file mode 100644
index 0000000000..1966809c03
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_screen.h
@@ -0,0 +1,80 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_SCREEN_H__
+#define __MACH64_SCREEN_H__
+
+#include "xmlconfig.h"
+
+typedef struct {
+   drm_handle_t handle;			/* Handle to the DRM region */
+   drmSize size;			/* Size of the DRM region */
+   drmAddress *map;			/* Mapping of the DRM region */
+} mach64RegionRec, *mach64RegionPtr;
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+
+   unsigned int	frontOffset;
+   unsigned int frontPitch;
+   unsigned int	backOffset;
+   unsigned int backPitch;
+
+   unsigned int	depthOffset;
+   unsigned int depthPitch;
+
+   int IsPCI;
+   int AGPMode;
+   unsigned int irq;			/* IRQ number (0 means none) */
+
+   /* Shared Texture data */
+   int firstTexHeap, numTexHeaps;
+   int texOffset[MACH64_NR_TEX_HEAPS];
+   int texSize[MACH64_NR_TEX_HEAPS];
+   int logTexGranularity[MACH64_NR_TEX_HEAPS];
+
+   mach64RegionRec mmio;
+   mach64RegionRec agpTextures;
+
+   drmBufMapPtr buffers;
+
+   __DRIscreen *driScreen;
+
+   driOptionCache optionCache;
+
+   const __DRIextension *extensions[4];
+} mach64ScreenRec, *mach64ScreenPtr;
+
+#endif /* __MACH64_SCREEN_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_span.c b/src/mesa/drivers/dri/mach64/mach64_span.c
new file mode 100644
index 0000000000..0c52c0c88c
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_span.c
@@ -0,0 +1,168 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_span.h"
+
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define LOCAL_VARS							\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);			\
+   __DRIscreen *sPriv = mmesa->driScreen;			\
+   __DRIdrawable *dPriv = mmesa->driDrawable;			\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;			\
+   GLuint height = dPriv->h;						\
+   GLushort p;								\
+   (void) p;
+
+#define LOCAL_DEPTH_VARS						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);			\
+   __DRIdrawable *dPriv = mmesa->driDrawable;			\
+   __DRIscreen *driScreen = mmesa->driScreen;			\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;			\
+   GLuint height = dPriv->h;						\
+   char *buf = (char *)(driScreen->pFB + drb->offset +			\
+			(dPriv->x + dPriv->y * drb->pitch) * 2)
+
+#define LOCAL_STENCIL_VARS	LOCAL_DEPTH_VARS
+
+#define Y_FLIP( _y )	(height - _y - 1)
+
+#define HW_LOCK()
+
+/* FIXME could/should we use dPriv->numClipRects like the other drivers? */
+#define HW_CLIPLOOP()							\
+   do {									\
+      int _nc = mmesa->numClipRects;					\
+									\
+      while ( _nc-- ) {							\
+	 int minx = mmesa->pClipRects[_nc].x1 - mmesa->drawX;		\
+	 int miny = mmesa->pClipRects[_nc].y1 - mmesa->drawY;		\
+	 int maxx = mmesa->pClipRects[_nc].x2 - mmesa->drawX;		\
+	 int maxy = mmesa->pClipRects[_nc].y2 - mmesa->drawY;
+
+#define HW_ENDCLIPLOOP()						\
+      }									\
+   } while (0)
+
+#define HW_UNLOCK()
+
+
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    mach64##x##_RGB565
+#define TAG2(x,y) mach64##x##_RGB565##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->offset		\
+     + ((dPriv->y + (Y)) * drb->pitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
+
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+/* FIXME the old code always read back alpha as 0xff, i.e. fully opaque.
+   Was there a reason to do so ? If so that'll won't work with that template... */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    mach64##x##_ARGB8888
+#define TAG2(x,y) mach64##x##_ARGB8888##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->offset		\
+     + ((dPriv->y + (Y)) * drb->pitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
+
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* 16 bit depthbuffer functions.
+ */
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)(buf + ((_x) + (_y) * drb->pitch) * 2) = d;
+
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)(buf + ((_x) + (_y) * drb->pitch) * 2);
+
+#define TAG(x) mach64##x##_z16
+#include "depthtmp.h"
+
+
+static void mach64SpanRenderStart( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   LOCK_HARDWARE( mmesa );
+   FINISH_DMA_LOCKED( mmesa );
+}
+
+static void mach64SpanRenderFinish( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( mmesa );
+}
+
+void mach64DDInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart	= mach64SpanRenderStart;
+   swdd->SpanRenderFinish	= mach64SpanRenderFinish;
+}
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+mach64SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.Format == MESA_FORMAT_RGB565) {
+      mach64InitPointers_RGB565(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_ARGB8888) {
+      mach64InitPointers_ARGB8888(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_Z16) {
+      mach64InitDepthPointers_z16(&drb->Base);
+   }
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_span.h b/src/mesa/drivers/dri/mach64/mach64_span.h
new file mode 100644
index 0000000000..65141d05c3
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_span.h
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __MACH64_SPAN_H__
+#define __MACH64_SPAN_H__
+
+#include "drirenderbuffer.h"
+
+extern void mach64DDInitSpanFuncs( GLcontext *ctx );
+
+extern void
+mach64SetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_state.c b/src/mesa/drivers/dri/mach64/mach64_state.c
new file mode 100644
index 0000000000..69a5aea02c
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_state.c
@@ -0,0 +1,1186 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos�Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_state.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_vb.h"
+#include "mach64_tex.h"
+
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void mach64UpdateAlphaMode( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint a = mmesa->setup.alpha_tst_cntl;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+   GLuint m = mmesa->setup.dp_write_mask;
+
+   if ( ctx->Color.AlphaEnabled ) {
+      GLubyte ref;
+
+      CLAMPED_FLOAT_TO_UBYTE(ref, ctx->Color.AlphaRef);
+
+      a &= ~(MACH64_ALPHA_TEST_MASK | MACH64_REF_ALPHA_MASK);
+
+      switch ( ctx->Color.AlphaFunc ) {
+      case GL_NEVER:
+	 a |= MACH64_ALPHA_TEST_NEVER;
+	 break;
+      case GL_LESS:
+	 a |= MACH64_ALPHA_TEST_LESS;
+         break;
+      case GL_LEQUAL:
+	 a |= MACH64_ALPHA_TEST_LEQUAL;
+	 break;
+      case GL_EQUAL:
+	 a |= MACH64_ALPHA_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 a |= MACH64_ALPHA_TEST_GEQUAL;
+	 break;
+      case GL_GREATER:
+	 a |= MACH64_ALPHA_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 a |= MACH64_ALPHA_TEST_NOTEQUAL;
+	 break;
+      case GL_ALWAYS:
+	 a |= MACH64_ALPHA_TEST_ALWAYS;
+	 break;
+      }
+
+      a |= (ref << MACH64_REF_ALPHA_SHIFT);
+      a |=  MACH64_ALPHA_TEST_EN;
+   } else {
+      a &= ~MACH64_ALPHA_TEST_EN;
+   }
+
+   FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_FALSE );
+
+   if ( ctx->Color.BlendEnabled ) {
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+
+      switch ( ctx->Color.BlendSrcRGB ) {
+      case GL_ZERO:
+	 s |= MACH64_ALPHA_BLEND_SRC_ZERO;
+	 break;
+      case GL_ONE:
+	 s |= MACH64_ALPHA_BLEND_SRC_ONE;
+	 break;
+      case GL_DST_COLOR:
+	 s |= MACH64_ALPHA_BLEND_SRC_DSTCOLOR;
+	 break;
+      case GL_ONE_MINUS_DST_COLOR:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVDSTCOLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_SRCALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVSRCALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_DSTALPHA;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_SRC_INVDSTALPHA;
+	 break;
+      case GL_SRC_ALPHA_SATURATE:
+	 s |= (MACH64_ALPHA_BLEND_SRC_SRCALPHA |
+	       MACH64_ALPHA_BLEND_SAT);
+	 break;
+      default:
+         FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_TRUE );
+      }
+
+      switch ( ctx->Color.BlendDstRGB ) {
+      case GL_ZERO:
+	 s |= MACH64_ALPHA_BLEND_DST_ZERO;
+	 break;
+      case GL_ONE:
+	 s |= MACH64_ALPHA_BLEND_DST_ONE;
+	 break;
+      case GL_SRC_COLOR:
+	 s |= MACH64_ALPHA_BLEND_DST_SRCCOLOR;
+	 break;
+      case GL_ONE_MINUS_SRC_COLOR:
+	 s |= MACH64_ALPHA_BLEND_DST_INVSRCCOLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_SRCALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_INVSRCALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_DSTALPHA;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 s |= MACH64_ALPHA_BLEND_DST_INVDSTALPHA;
+	 break;
+      default:
+         FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_TRUE );
+      }
+
+      m = 0xffffffff; /* Can't color mask and blend at the same time */
+      s &= ~MACH64_ALPHA_FOG_EN_FOG; /* Can't fog and blend at the same time */
+      s |=  MACH64_ALPHA_FOG_EN_ALPHA;
+   } else {
+      s &= ~MACH64_ALPHA_FOG_EN_ALPHA;
+   }
+
+   if ( mmesa->setup.alpha_tst_cntl != a ) {
+      mmesa->setup.alpha_tst_cntl = a;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+   if ( mmesa->setup.scale_3d_cntl != s ) {
+      mmesa->setup.scale_3d_cntl = s;
+      mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+   if ( mmesa->setup.dp_write_mask != m ) {
+      mmesa->setup.dp_write_mask = m;
+      mmesa->dirty |= MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+}
+
+static void mach64DDAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+static void mach64DDBlendEquationSeparate( GLcontext *ctx, 
+					   GLenum modeRGB, GLenum modeA )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   assert( modeRGB == modeA );
+   FLUSH_BATCH( mmesa );
+
+   /* BlendEquation affects ColorLogicOpEnabled
+    */
+   FALLBACK( MACH64_CONTEXT(ctx), MACH64_FALLBACK_LOGICOP,
+	     (ctx->Color.ColorLogicOpEnabled &&
+	      ctx->Color.LogicOp != GL_COPY));
+
+   /* Can only do blend addition, not min, max, subtract, etc. */
+   FALLBACK( MACH64_CONTEXT(ctx), MACH64_FALLBACK_BLEND_EQ,
+	     modeRGB != GL_FUNC_ADD);
+
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+static void mach64DDBlendFuncSeparate( GLcontext *ctx,
+				       GLenum sfactorRGB, GLenum dfactorRGB,
+				       GLenum sfactorA, GLenum dfactorA )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_ALPHA;
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void mach64UpdateZMode( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint z = mmesa->setup.z_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Depth.Test ) {
+      z &= ~MACH64_Z_TEST_MASK;
+
+      switch ( ctx->Depth.Func ) {
+      case GL_NEVER:
+	 z |= MACH64_Z_TEST_NEVER;
+	 break;
+      case GL_ALWAYS:
+	 z |= MACH64_Z_TEST_ALWAYS;
+	 break;
+      case GL_LESS:
+	 z |= MACH64_Z_TEST_LESS;
+	 break;
+      case GL_LEQUAL:
+	 z |= MACH64_Z_TEST_LEQUAL;
+	 break;
+      case GL_EQUAL:
+	 z |= MACH64_Z_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 z |= MACH64_Z_TEST_GEQUAL;
+	 break;
+      case GL_GREATER:
+	 z |= MACH64_Z_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 z |= MACH64_Z_TEST_NOTEQUAL;
+	 break;
+      }
+
+      z |=  MACH64_Z_EN;
+   } else {
+      z &= ~MACH64_Z_EN;
+   }
+
+   if ( ctx->Depth.Mask ) {
+      z |=  MACH64_Z_MASK_EN;
+   } else {
+      z &= ~MACH64_Z_MASK_EN;
+   }
+
+   if ( mmesa->setup.z_cntl != z ) {
+      mmesa->setup.z_cntl = z;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+   }
+}
+
+static void mach64DDDepthFunc( GLcontext *ctx, GLenum func )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_DEPTH;
+}
+
+static void mach64DDDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_DEPTH;
+}
+
+static void mach64DDClearDepth( GLcontext *ctx, GLclampd d )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   /* Always have a 16-bit depth buffer.
+    */
+   mmesa->ClearDepth = d * 0xffff;
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+static void mach64UpdateFogAttrib( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   CARD32 s = mmesa->setup.scale_3d_cntl;
+   GLubyte c[4];
+   CARD32 col;
+
+   /* Can't fog if blending is on */
+   if ( ctx->Color.BlendEnabled )
+      return;
+
+   if ( ctx->Fog.Enabled ) {
+      s |= MACH64_ALPHA_FOG_EN_FOG;
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+      /* From Utah-glx: "fog color is now dest and fog factor is alpha, so
+       * use GL_SRC_ALPHA GL_ONE_MINUS_SRC_ALPHA"
+       */
+      s |= (MACH64_ALPHA_BLEND_SRC_SRCALPHA | 
+	    MACH64_ALPHA_BLEND_DST_INVSRCALPHA);
+      /* From Utah-glx: "can't use texture alpha when fogging" */
+      s &= ~MACH64_TEX_MAP_AEN;
+   } else {
+      s &= ~(MACH64_ALPHA_BLEND_SRC_MASK |
+	     MACH64_ALPHA_BLEND_DST_MASK |
+	     MACH64_ALPHA_BLEND_SAT);
+      s |= (MACH64_ALPHA_BLEND_SRC_ONE | 
+	    MACH64_ALPHA_BLEND_DST_ZERO);
+      s &= ~MACH64_ALPHA_FOG_EN_FOG;
+   }
+
+   c[0] = FLOAT_TO_UBYTE( ctx->Fog.Color[0] );
+   c[1] = FLOAT_TO_UBYTE( ctx->Fog.Color[1] );
+   c[2] = FLOAT_TO_UBYTE( ctx->Fog.Color[2] );
+   c[3] = FLOAT_TO_UBYTE( ctx->Fog.Color[3] );
+
+   col = mach64PackColor( 4, c[0], c[1], c[2], c[3] );
+
+   if ( mmesa->setup.dp_fog_clr != col ) {
+      mmesa->setup.dp_fog_clr = col;
+      mmesa->dirty |= MACH64_UPLOAD_DP_FOG_CLR;
+   }
+   if ( mmesa->setup.scale_3d_cntl != s ) {
+      mmesa->setup.scale_3d_cntl = s;
+      mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+   }
+
+}
+
+static void mach64DDFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_FOG;
+}
+
+
+/* =============================================================
+ * Clipping
+ */
+
+static void mach64UpdateClipping( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64ScreenPtr mach64Screen = mmesa->mach64Screen;
+
+   if ( mmesa->driDrawable ) {
+      __DRIdrawable *drawable = mmesa->driDrawable;
+      int x1 = 0;
+      int y1 = 0;
+      int x2 = drawable->w - 1;
+      int y2 = drawable->h - 1;
+
+      if ( ctx->Scissor.Enabled ) {
+	 if ( ctx->Scissor.X > x1 ) {
+	    x1 = ctx->Scissor.X;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - ctx->Scissor.Height > y1 ) {
+	    y1 = drawable->h - ctx->Scissor.Y - ctx->Scissor.Height;
+	 }
+	 if ( ctx->Scissor.X + ctx->Scissor.Width - 1 < x2 ) {
+	    x2 = ctx->Scissor.X + ctx->Scissor.Width - 1;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - 1 < y2 ) {
+	    y2 = drawable->h - ctx->Scissor.Y - 1;
+	 }
+      }
+
+      x1 += drawable->x;
+      y1 += drawable->y;
+      x2 += drawable->x;
+      y2 += drawable->y;
+
+      /* clamp to screen borders */
+      if (x1 < 0) x1 = 0;
+      if (y1 < 0) y1 = 0;
+      if (x2 < 0) x2 = 0;
+      if (y2 < 0) y2 = 0;
+      if (x2 > mach64Screen->width-1) x2 = mach64Screen->width-1;
+      if (y2 > mach64Screen->height-1) y2 = mach64Screen->height-1;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+	 fprintf( stderr, "%s: drawable %3d %3d %3d %3d\n",
+		  __FUNCTION__,
+		  drawable->x,
+		  drawable->y,
+		  drawable->w,
+		  drawable->h );
+	 fprintf( stderr, "%s:  scissor %3d %3d %3d %3d\n",
+		  __FUNCTION__,
+		  ctx->Scissor.X,
+		  ctx->Scissor.Y,
+		  ctx->Scissor.Width,
+		  ctx->Scissor.Height );
+	 fprintf( stderr, "%s:    final %3d %3d %3d %3d\n",
+		  __FUNCTION__, x1, y1, x2, y2 );
+	 fprintf( stderr, "\n" );
+      }
+
+      mmesa->setup.sc_top_bottom = ((y1 << 0) |
+				    (y2 << 16));
+
+      mmesa->setup.sc_left_right = ((x1 << 0) |
+				    (x2 << 16));
+
+       /* UPLOAD_MISC reduces the dirty state, we just need to
+       * emit the scissor to the SAREA.  We need to dirty cliprects
+       * since the scissor and cliprects are intersected to update the
+       * single hardware scissor
+       */
+      mmesa->dirty |= MACH64_UPLOAD_MISC | MACH64_UPLOAD_CLIPRECTS;
+   }
+}
+
+static void mach64DDScissor( GLcontext *ctx,
+			     GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CLIP;
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+static void mach64UpdateCull( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLfloat backface_sign = 1;
+
+   if ( ctx->Polygon.CullFlag /*&& ctx->PB->primitive == GL_POLYGON*/ ) {
+      backface_sign = 1;
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_BACK:
+	 if ( ctx->Polygon.FrontFace == GL_CCW )
+	    backface_sign = -1;
+	 break;
+      case GL_FRONT:
+	 if ( ctx->Polygon.FrontFace != GL_CCW )
+	    backface_sign = -1;
+	 break;
+      default:
+      case GL_FRONT_AND_BACK:
+	 backface_sign = 0;
+	 break;
+      }
+   } else {
+      backface_sign = 0;
+   }
+
+   mmesa->backface_sign = backface_sign;
+
+}
+
+static void mach64DDCullFace( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CULL;
+}
+
+static void mach64DDFrontFace( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_CULL;
+}
+
+
+/* =============================================================
+ * Masks
+ */
+
+static void mach64UpdateMasks( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint mask = 0xffffffff;
+
+   /* mach64 can't color mask with alpha blending enabled */
+   if ( !ctx->Color.BlendEnabled ) {
+      mask = mach64PackColor( mmesa->mach64Screen->cpp,
+			      ctx->Color.ColorMask[0][RCOMP],
+			      ctx->Color.ColorMask[0][GCOMP],
+			      ctx->Color.ColorMask[0][BCOMP],
+			      ctx->Color.ColorMask[0][ACOMP] );
+   }
+
+   if ( mmesa->setup.dp_write_mask != mask ) {
+      mmesa->setup.dp_write_mask = mask;
+      mmesa->dirty |= MACH64_UPLOAD_DP_WRITE_MASK;
+   }
+}
+
+static void mach64DDColorMask( GLcontext *ctx,
+			       GLboolean r, GLboolean g,
+			       GLboolean b, GLboolean a )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+   mmesa->new_state |= MACH64_NEW_MASKS;
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+static void mach64UpdateSpecularLighting( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint a = mmesa->setup.alpha_tst_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR  &&
+        ctx->Light.Enabled ) {
+      a |=  MACH64_SPECULAR_LIGHT_EN;
+   } else {
+      a &= ~MACH64_SPECULAR_LIGHT_EN;
+   }
+
+   if ( mmesa->setup.alpha_tst_cntl != a ) {
+      mmesa->setup.alpha_tst_cntl = a;
+      mmesa->dirty |= MACH64_UPLOAD_Z_ALPHA_CNTL;
+      mmesa->new_state |= MACH64_NEW_CONTEXT;
+   }
+}
+
+static void mach64DDLightModelfv( GLcontext *ctx, GLenum pname,
+				  const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( pname == GL_LIGHT_MODEL_COLOR_CONTROL ) {
+      FLUSH_BATCH( mmesa );
+      mach64UpdateSpecularLighting(ctx);
+   }
+}
+
+static void mach64DDShadeModel( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint s = mmesa->setup.setup_cntl;
+
+   s &= ~MACH64_FLAT_SHADE_MASK;
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= MACH64_FLAT_SHADE_VERTEX_3;
+      break;
+   case GL_SMOOTH:
+      s |= MACH64_FLAT_SHADE_OFF;
+      break;
+   default:
+      return;
+   }
+
+   if ( mmesa->setup.setup_cntl != s ) {
+      FLUSH_BATCH( mmesa );
+      mmesa->setup.setup_cntl = s;
+
+      mmesa->dirty |= MACH64_UPLOAD_SETUP_CNTL;
+   }
+}
+
+
+/* =============================================================
+ * Viewport
+ */
+
+
+void mach64CalcViewport( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = mmesa->hw_viewport;
+
+   /* See also mach64_translate_vertex.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + (GLfloat)mmesa->drawX + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + mmesa->driDrawable->h + (GLfloat)mmesa->drawY + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * mmesa->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * mmesa->depth_scale;
+
+   mmesa->SetupNewInputs = ~0;
+}
+
+static void mach64Viewport( GLcontext *ctx,
+			  GLint x, GLint y,
+			  GLsizei width, GLsizei height )
+{
+   mach64CalcViewport( ctx );
+}
+
+static void mach64DepthRange( GLcontext *ctx,
+			    GLclampd nearval, GLclampd farval )
+{
+   mach64CalcViewport( ctx );
+}
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void mach64DDClearColor( GLcontext *ctx,
+				const GLfloat color[4] )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLubyte c[4];
+   
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+   mmesa->ClearColor = mach64PackColor( mmesa->mach64Screen->cpp,
+					c[0], c[1], c[2], c[3] );
+}
+
+static void mach64DDLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   
+   if ( ctx->Color.ColorLogicOpEnabled ) {
+      FLUSH_BATCH( mmesa );
+
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP, opcode != GL_COPY);
+   }
+}
+
+void mach64SetCliprects( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   __DRIdrawable *dPriv = mmesa->driDrawable;
+
+   switch ( mode ) {
+   case GL_FRONT_LEFT:
+      mmesa->numClipRects = dPriv->numClipRects;
+      mmesa->pClipRects = dPriv->pClipRects;
+      mmesa->drawX = dPriv->x;
+      mmesa->drawY = dPriv->y;
+      break;
+   case GL_BACK_LEFT:
+      if ( dPriv->numBackClipRects == 0 ) {
+	 mmesa->numClipRects = dPriv->numClipRects;
+	 mmesa->pClipRects = dPriv->pClipRects;
+	 mmesa->drawX = dPriv->x;
+	 mmesa->drawY = dPriv->y;
+      } else {
+	 mmesa->numClipRects = dPriv->numBackClipRects;
+	 mmesa->pClipRects = dPriv->pBackClipRects;
+	 mmesa->drawX = dPriv->backX;
+	 mmesa->drawY = dPriv->backY;
+      }
+      break;
+   default:
+      return;
+   }
+
+   mach64UpdateClipping( ctx );
+
+   mmesa->dirty |= MACH64_UPLOAD_CLIPRECTS;
+}
+
+static void mach64DDDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      mach64SetCliprects( ctx, GL_FRONT_LEFT );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: BUFFER_BIT_FRONT_LEFT\n", __FUNCTION__);
+      break;
+   case BUFFER_BACK_LEFT:
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      mach64SetCliprects( ctx, GL_BACK_LEFT );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: BUFFER_BIT_BACK_LEFT\n", __FUNCTION__);
+      break;
+   default:
+      FALLBACK( mmesa, MACH64_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      if (MACH64_DEBUG & DEBUG_VERBOSE_MSG)
+	 fprintf(stderr,"%s: fallback (mode=%d)\n", __FUNCTION__, mode);
+      break;
+   }
+
+   mmesa->setup.dst_off_pitch = (((mmesa->drawPitch/8) << 22) |
+				 (mmesa->drawOffset >> 3));
+
+   mmesa->dirty |= MACH64_UPLOAD_DST_OFF_PITCH;
+}
+
+static void mach64DDReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void mach64DDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s = %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( cap ),
+	       state ? "GL_TRUE" : "GL_FALSE" );
+   }
+
+   switch ( cap ) {
+   case GL_ALPHA_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_ALPHA;
+      break;
+
+   case GL_BLEND:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_ALPHA;
+
+      /* enable(GL_BLEND) affects ColorLogicOpEnabled.
+       */
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP,
+		(ctx->Color.ColorLogicOpEnabled &&
+		 ctx->Color.LogicOp != GL_COPY));
+      break;
+
+   case GL_CULL_FACE:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_CULL;
+      break;
+
+   case GL_DEPTH_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_DEPTH;
+      break;
+
+   case GL_DITHER:
+      do {
+	 GLuint s = mmesa->setup.scale_3d_cntl;
+	 FLUSH_BATCH( mmesa );
+
+	 if ( ctx->Color.DitherFlag ) {
+	    /* Dithering causes problems w/ 24bpp depth */
+	    if ( mmesa->mach64Screen->cpp == 4 )
+	       s |=  MACH64_ROUND_EN;
+	    else
+	       s |=  MACH64_DITHER_EN;
+	 } else {
+	    s &= ~MACH64_DITHER_EN;
+	    s &= ~MACH64_ROUND_EN;
+	 }
+
+	 if ( mmesa->setup.scale_3d_cntl != s ) {
+	    mmesa->setup.scale_3d_cntl = s;
+	    mmesa->dirty |= ( MACH64_UPLOAD_SCALE_3D_CNTL );
+	 }
+      } while (0);
+      break;
+
+   case GL_FOG:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_FOG;
+      break;
+
+   case GL_INDEX_LOGIC_OP:
+   case GL_COLOR_LOGIC_OP:
+      FLUSH_BATCH( mmesa );
+      FALLBACK( mmesa, MACH64_FALLBACK_LOGICOP,
+		state && ctx->Color.LogicOp != GL_COPY );
+      break;
+
+   case GL_LIGHTING:
+      mach64UpdateSpecularLighting(ctx);
+      break;
+
+   case GL_SCISSOR_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->scissor = state;
+      mmesa->new_state |= MACH64_NEW_CLIP;
+      break;
+
+   case GL_STENCIL_TEST:
+      FLUSH_BATCH( mmesa );
+      FALLBACK( mmesa, MACH64_FALLBACK_STENCIL, state );
+      break;
+
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_TEXTURE;
+      break;
+
+   default:
+      return;
+   }
+}
+
+/* =============================================================
+ * Render mode
+ */
+
+static void mach64DDRenderMode( GLcontext *ctx, GLenum mode )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   FALLBACK( mmesa, MACH64_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+/* =============================================================
+ * State initialization, management
+ */
+
+static void mach64DDPrintDirty( const char *msg, GLuint state )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    state,
+	    (state & MACH64_UPLOAD_DST_OFF_PITCH) ? "dst_off_pitch, " : "",
+	    (state & MACH64_UPLOAD_Z_ALPHA_CNTL)  ? "z_alpha_cntl, " : "",
+	    (state & MACH64_UPLOAD_SCALE_3D_CNTL) ? "scale_3d_cntl, " : "",
+	    (state & MACH64_UPLOAD_DP_FOG_CLR)    ? "dp_fog_clr, " : "",
+	    (state & MACH64_UPLOAD_DP_WRITE_MASK) ? "dp_write_mask, " : "",
+	    (state & MACH64_UPLOAD_DP_PIX_WIDTH)  ? "dp_pix_width, " : "",
+	    (state & MACH64_UPLOAD_SETUP_CNTL)    ? "setup_cntl, " : "",
+	    (state & MACH64_UPLOAD_MISC)          ? "misc, " : "",
+	    (state & MACH64_UPLOAD_TEXTURE)       ? "texture, " : "",
+	    (state & MACH64_UPLOAD_TEX0IMAGE)     ? "tex0 image, " : "",
+	    (state & MACH64_UPLOAD_TEX1IMAGE)     ? "tex1 image, " : "",
+	    (state & MACH64_UPLOAD_CLIPRECTS)     ? "cliprects, " : "" );
+}
+
+/*
+ * Load the current context's state into the hardware.
+ *
+ * NOTE: Be VERY careful about ensuring the context state is marked for
+ * upload, the only place it shouldn't be uploaded is when the setup
+ * state has changed in ReducedPrimitiveChange as this comes right after
+ * a state update.
+ *
+ * Blits of any type should always upload the context and masks after
+ * they are done.
+ */
+void mach64EmitHwStateLocked( mach64ContextPtr mmesa )
+{
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   drm_mach64_context_regs_t *regs = &(mmesa->setup);
+   mach64TexObjPtr t0 = mmesa->CurrentTexObj[0];
+   mach64TexObjPtr t1 = mmesa->CurrentTexObj[1];
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      mach64DDPrintDirty( __FUNCTION__, mmesa->dirty );
+   }
+
+   if ( t0 && t1 && mmesa->mach64Screen->numTexHeaps > 1 ) {
+      if (t0->heap != t1->heap || 
+	     (mmesa->dirty & MACH64_UPLOAD_TEX0IMAGE) ||
+	     (mmesa->dirty & MACH64_UPLOAD_TEX1IMAGE))
+	 mach64UploadMultiTexImages( mmesa, t0, t1 );
+   } else {
+      if ( mmesa->dirty & MACH64_UPLOAD_TEX0IMAGE ) {
+	 if ( t0 ) mach64UploadTexImages( mmesa, t0 );
+      }
+      if ( mmesa->dirty & MACH64_UPLOAD_TEX1IMAGE ) {
+	 if ( t1 ) mach64UploadTexImages( mmesa, t1 );
+      }
+   }
+
+   if ( mmesa->dirty & (MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC) ) {
+      memcpy( &sarea->context_state, regs,
+	      MACH64_NR_CONTEXT_REGS * sizeof(GLuint) );
+   }
+
+   if ( mmesa->dirty & MACH64_UPLOAD_TEXTURE ) {
+      mach64EmitTexStateLocked( mmesa, t0, t1 );
+   }
+
+   sarea->vertsize = mmesa->vertex_size;
+
+   /* Turn off the texture cache flushing.
+    */
+   mmesa->setup.tex_cntl &= ~MACH64_TEX_CACHE_FLUSH;
+
+   sarea->dirty |= mmesa->dirty;
+
+   mmesa->dirty &= MACH64_UPLOAD_CLIPRECTS;
+}
+
+static void mach64DDPrintState( const char *msg, GLuint flags )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    flags,
+	    (flags & MACH64_NEW_CONTEXT)	? "context, " : "",
+	    (flags & MACH64_NEW_ALPHA)		? "alpha, " : "",
+	    (flags & MACH64_NEW_DEPTH)		? "depth, " : "",
+	    (flags & MACH64_NEW_FOG)		? "fog, " : "",
+	    (flags & MACH64_NEW_CLIP)		? "clip, " : "",
+	    (flags & MACH64_NEW_TEXTURE)	? "texture, " : "",
+	    (flags & MACH64_NEW_CULL)		? "cull, " : "",
+	    (flags & MACH64_NEW_MASKS)		? "masks, " : "",
+	    (flags & MACH64_NEW_WINDOW)		? "window, " : "" );
+}
+
+/* Update the hardware state */
+void mach64DDUpdateHWState( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   int new_state = mmesa->new_state;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   if ( new_state )
+   {
+      FLUSH_BATCH( mmesa );
+
+      mmesa->new_state = 0;
+
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_MSG )
+	 mach64DDPrintState( __FUNCTION__, new_state );
+
+      /* Update the various parts of the context's state.
+       */
+      if ( new_state & MACH64_NEW_ALPHA )
+	 mach64UpdateAlphaMode( ctx );
+
+      if ( new_state & MACH64_NEW_DEPTH )
+	 mach64UpdateZMode( ctx );
+
+      if ( new_state & MACH64_NEW_FOG )
+	 mach64UpdateFogAttrib( ctx );
+
+      if ( new_state & MACH64_NEW_CLIP )
+	 mach64UpdateClipping( ctx );
+
+      if ( new_state & MACH64_NEW_WINDOW )
+	 mach64CalcViewport( ctx );
+
+      if ( new_state & MACH64_NEW_CULL )
+	 mach64UpdateCull( ctx );
+
+      if ( new_state & MACH64_NEW_MASKS )
+	 mach64UpdateMasks( ctx );
+
+      if ( new_state & MACH64_NEW_TEXTURE )
+	 mach64UpdateTextureState( ctx );
+   }
+}
+
+
+static void mach64DDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   MACH64_CONTEXT(ctx)->NewGLState |= new_state;
+}
+
+
+/* Initialize the context's hardware state */
+void mach64DDInitState( mach64ContextPtr mmesa )
+{
+   GLuint format;
+
+   switch ( mmesa->mach64Screen->cpp ) {
+   case 2:
+      format = MACH64_DATATYPE_RGB565;
+      break;
+   case 4:
+      format = MACH64_DATATYPE_ARGB8888;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   /* Always have a 16-bit depth buffer
+    * but Z coordinates are specified in 16.1 format to the setup engine.
+    */
+   mmesa->depth_scale = 2.0;
+
+   mmesa->ClearColor = 0x00000000;
+   mmesa->ClearDepth = 0x0000ffff;
+
+   mmesa->Fallback = 0;
+
+   if ( mmesa->glCtx->Visual.doubleBufferMode ) {
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->backOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->backPitch;
+   } else {
+      mmesa->drawOffset = mmesa->readOffset = mmesa->mach64Screen->frontOffset;
+      mmesa->drawPitch  = mmesa->readPitch  = mmesa->mach64Screen->frontPitch;
+   }
+
+   /* Harware state:
+    */
+   mmesa->setup.dst_off_pitch = (((mmesa->drawPitch/8) << 22) |
+				 (mmesa->drawOffset >> 3));
+
+   mmesa->setup.z_off_pitch = (((mmesa->mach64Screen->depthPitch/8) << 22) |
+			       (mmesa->mach64Screen->depthOffset >> 3));
+
+   mmesa->setup.z_cntl = (MACH64_Z_TEST_LESS |
+			  MACH64_Z_MASK_EN);
+
+   mmesa->setup.alpha_tst_cntl = (MACH64_ALPHA_TEST_ALWAYS |
+				  MACH64_ALPHA_DST_SRCALPHA |
+				  MACH64_ALPHA_TST_SRC_TEXEL |
+				  (0 << MACH64_REF_ALPHA_SHIFT));
+
+   mmesa->setup.scale_3d_cntl = (MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE |
+				 /*  MACH64_SCALE_DITHER_ERROR_DIFFUSE | */
+				 MACH64_SCALE_DITHER_2D_TABLE |
+				 /*  MACH64_DITHER_INIT_CURRENT | */
+				 MACH64_DITHER_INIT_RESET |
+				 MACH64_SCALE_3D_FCN_SHADE |
+				 MACH64_ALPHA_FOG_DIS |
+				 MACH64_ALPHA_BLEND_SRC_ONE |
+				 MACH64_ALPHA_BLEND_DST_ZERO |
+				 MACH64_TEX_LIGHT_FCN_MODULATE |
+				 MACH64_MIP_MAP_DISABLE |
+				 MACH64_BILINEAR_TEX_EN |
+				 MACH64_TEX_BLEND_FCN_LINEAR);
+
+   /* GL spec says dithering initially enabled, but dithering causes
+    * problems w/ 24bpp depth
+    */
+   if ( mmesa->mach64Screen->cpp == 4 )
+      mmesa->setup.scale_3d_cntl |= MACH64_ROUND_EN;
+   else
+      mmesa->setup.scale_3d_cntl |= MACH64_DITHER_EN;
+
+   mmesa->setup.sc_left_right = 0x1fff0000;
+   mmesa->setup.sc_top_bottom = 0x3fff0000;
+
+   mmesa->setup.dp_fog_clr    = 0x00ffffff;
+   mmesa->setup.dp_write_mask = 0xffffffff;
+
+   mmesa->setup.dp_pix_width = ((format << 0) |
+				(format << 4) |
+				(format << 8) |
+				(format << 16) |
+				(format << 28));
+
+   mmesa->setup.dp_mix = (MACH64_BKGD_MIX_S |
+			  MACH64_FRGD_MIX_S);
+   mmesa->setup.dp_src = (MACH64_BKGD_SRC_3D |
+			  MACH64_FRGD_SRC_3D |
+			  MACH64_MONO_SRC_ONE);
+
+   mmesa->setup.clr_cmp_cntl  = 0x00000000;
+   mmesa->setup.gui_traj_cntl = (MACH64_DST_X_LEFT_TO_RIGHT |
+				 MACH64_DST_Y_TOP_TO_BOTTOM);
+
+   mmesa->setup.setup_cntl = (MACH64_FLAT_SHADE_OFF |
+			      MACH64_SOLID_MODE_OFF |
+			      MACH64_LOG_MAX_INC_ADJ);
+   mmesa->setup.setup_cntl = 0;
+
+   mmesa->setup.tex_size_pitch = 0x00000000;
+
+   mmesa->setup.tex_cntl = ((0 << MACH64_LOD_BIAS_SHIFT) |
+			    (0 << MACH64_COMP_FACTOR_SHIFT) |
+			    MACH64_COMP_COMBINE_MODULATE |
+			    MACH64_COMP_BLEND_NEAREST |
+			    MACH64_COMP_FILTER_NEAREST |
+			    /* MACH64_TEXTURE_TILING | */
+#ifdef MACH64_PREMULT_TEXCOORDS
+			    MACH64_TEX_ST_DIRECT | 
+#endif
+			    MACH64_TEX_SRC_LOCAL |
+			    MACH64_TEX_UNCOMPRESSED |
+			    MACH64_TEX_CACHE_FLUSH |
+			    MACH64_TEX_CACHE_SIZE_4K);
+
+   mmesa->setup.secondary_tex_off = 0x00000000;
+   mmesa->setup.tex_offset = 0x00000000;
+
+   mmesa->new_state = MACH64_NEW_ALL;
+}
+
+/* Initialize the driver's state functions.
+  */
+void mach64DDInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState		= mach64DDInvalidateState;
+
+   ctx->Driver.ClearColor		= mach64DDClearColor;
+   ctx->Driver.DrawBuffer		= mach64DDDrawBuffer;
+   ctx->Driver.ReadBuffer		= mach64DDReadBuffer;
+
+   ctx->Driver.ColorMask		= mach64DDColorMask;
+   ctx->Driver.AlphaFunc		= mach64DDAlphaFunc;
+   ctx->Driver.BlendEquationSeparate	= mach64DDBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate	= mach64DDBlendFuncSeparate;
+   ctx->Driver.ClearDepth		= mach64DDClearDepth;
+   ctx->Driver.CullFace			= mach64DDCullFace;
+   ctx->Driver.FrontFace		= mach64DDFrontFace;
+   ctx->Driver.DepthFunc		= mach64DDDepthFunc;
+   ctx->Driver.DepthMask		= mach64DDDepthMask;
+   ctx->Driver.Enable			= mach64DDEnable;
+   ctx->Driver.Fogfv			= mach64DDFogfv;
+   ctx->Driver.Hint			= NULL;
+   ctx->Driver.Lightfv			= NULL;
+   ctx->Driver.LightModelfv		= mach64DDLightModelfv;
+   ctx->Driver.LogicOpcode		= mach64DDLogicOpCode;
+   ctx->Driver.PolygonMode		= NULL;
+   ctx->Driver.PolygonStipple		= NULL;
+   ctx->Driver.RenderMode		= mach64DDRenderMode;
+   ctx->Driver.Scissor			= mach64DDScissor;
+   ctx->Driver.ShadeModel		= mach64DDShadeModel;
+   
+   ctx->Driver.DepthRange		= mach64DepthRange;
+   ctx->Driver.Viewport			= mach64Viewport;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_state.h b/src/mesa/drivers/dri/mach64/mach64_state.h
new file mode 100644
index 0000000000..23081cb2fe
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_state.h
@@ -0,0 +1,47 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_STATE_H__
+#define __MACH64_STATE_H__
+
+#include "mach64_context.h"
+
+extern void mach64DDInitState( mach64ContextPtr mmesa );
+extern void mach64DDInitStateFuncs( GLcontext *ctx );
+
+extern void mach64SetCliprects( GLcontext *ctx, GLenum mode );
+extern void mach64CalcViewport( GLcontext *ctx );
+
+extern void mach64DDUpdateState( GLcontext *ctx );
+extern void mach64DDUpdateHWState( GLcontext *ctx );
+
+extern void mach64EmitHwStateLocked( mach64ContextPtr mmesa );
+
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_tex.c b/src/mesa/drivers/dri/mach64/mach64_tex.c
new file mode 100644
index 0000000000..1bce967d58
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tex.c
@@ -0,0 +1,565 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_tex.h"
+
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/imports.h"
+
+
+static void mach64SetTexWrap( mach64TexObjPtr t,
+			      GLenum swrap, GLenum twrap )
+{
+   switch ( swrap ) {
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+   case GL_CLAMP_TO_BORDER:
+      t->ClampS = GL_TRUE;
+      break;
+   case GL_REPEAT:
+      t->ClampS = GL_FALSE;
+      break;
+   }
+
+   switch ( twrap ) {
+   case GL_CLAMP:
+   case GL_CLAMP_TO_EDGE:
+   case GL_CLAMP_TO_BORDER:
+      t->ClampT = GL_TRUE;
+      break;
+   case GL_REPEAT:
+      t->ClampT = GL_FALSE;
+      break;
+   }
+}
+
+static void mach64SetTexFilter( mach64TexObjPtr t,
+				GLenum minf, GLenum magf )
+{
+   switch ( minf ) {
+   case GL_NEAREST:
+   case GL_NEAREST_MIPMAP_NEAREST:
+   case GL_NEAREST_MIPMAP_LINEAR:
+      t->BilinearMin = GL_FALSE;
+      break;
+   case GL_LINEAR:
+   case GL_LINEAR_MIPMAP_NEAREST:
+   case GL_LINEAR_MIPMAP_LINEAR:
+      t->BilinearMin = GL_TRUE;
+      break;
+   }
+
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->BilinearMag = GL_FALSE;
+      break;
+   case GL_LINEAR:
+      t->BilinearMag = GL_TRUE;
+      break;
+   }
+}
+
+static void mach64SetTexBorderColor( mach64TexObjPtr t, const GLfloat c[4] )
+{
+#if 0
+   GLuint border = mach64PackColor( 4, c[0], c[1], c[2], c[3] );
+#endif
+}
+
+
+static mach64TexObjPtr
+mach64AllocTexObj( struct gl_texture_object *texObj )
+{
+   mach64TexObjPtr t;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API )
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, texObj );
+
+   t = (mach64TexObjPtr) CALLOC_STRUCT( mach64_texture_object );
+   texObj->DriverData = t;
+   if ( !t )
+      return NULL;
+
+   /* Initialize non-image-dependent parts of the state:
+    */
+   t->base.tObj = texObj;
+   t->base.dirty_images[0] = (1 << 0);
+
+   t->bufAddr = 0;
+
+   make_empty_list( (driTextureObject *) t );
+
+   mach64SetTexWrap( t, texObj->WrapS, texObj->WrapT );
+   mach64SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+   mach64SetTexBorderColor( t, texObj->BorderColor.f );
+
+   return t;
+}
+
+
+/* Called by the _mesa_store_teximage[123]d() functions. */
+static gl_format
+mach64ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			   GLenum format, GLenum type )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   (void) format;
+   (void) type;
+
+   switch ( internalFormat ) {
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+   case 4:
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_COMPRESSED_RGBA:
+      if (mmesa->mach64Screen->cpp == 4)
+         return MESA_FORMAT_ARGB8888;
+      else
+         return MESA_FORMAT_ARGB4444;
+
+   case GL_RGB5_A1:
+      if (mmesa->mach64Screen->cpp == 4)
+         return MESA_FORMAT_ARGB8888;
+      else
+         return MESA_FORMAT_ARGB1555;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+   case GL_RGBA4:
+      if (mmesa->mach64Screen->cpp == 4)
+         return MESA_FORMAT_ARGB8888;
+      else
+         return MESA_FORMAT_ARGB4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+   case GL_COMPRESSED_RGB:
+      if (mmesa->mach64Screen->cpp == 4)
+         return MESA_FORMAT_ARGB8888;
+      else
+         return MESA_FORMAT_RGB565;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      if (mmesa->mach64Screen->cpp == 4)
+         return MESA_FORMAT_ARGB8888; /* inefficient but accurate */
+      else
+         return MESA_FORMAT_ARGB1555;
+
+   case GL_INTENSITY4:
+   case GL_INTENSITY:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      if (mmesa->mach64Screen->cpp == 4)
+         return MESA_FORMAT_ARGB8888; /* inefficient but accurate */
+      else
+         return MESA_FORMAT_ARGB4444;
+
+   case GL_COLOR_INDEX:
+   case GL_COLOR_INDEX1_EXT:
+   case GL_COLOR_INDEX2_EXT:
+   case GL_COLOR_INDEX4_EXT:
+   case GL_COLOR_INDEX8_EXT:
+   case GL_COLOR_INDEX12_EXT:
+   case GL_COLOR_INDEX16_EXT:
+      return MESA_FORMAT_CI8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+          type == GL_UNSIGNED_BYTE)
+         return MESA_FORMAT_YCBCR;
+      else
+         return MESA_FORMAT_YCBCR_REV;
+
+   default:
+      _mesa_problem( ctx, "unexpected format in %s", __FUNCTION__ );
+      return MESA_FORMAT_NONE;
+   }
+}
+
+static void mach64TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+         return;
+      }
+   }
+
+   /* Note, this will call mach64ChooseTextureFormat */
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexSubImage1D( GLcontext *ctx,
+				 GLenum target,
+				 GLint level,
+				 GLint xoffset,
+				 GLsizei width,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+			     format, type, pixels, packing, texObj,
+			     texImage);
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			      GLint internalFormat,
+			      GLint width, GLint height, GLint border,
+			      GLenum format, GLenum type, const GLvoid *pixels,
+			      const struct gl_pixelstore_attrib *packing,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+
+   /* Note, this will call mach64ChooseTextureFormat */
+   _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			   width, height, border, format, type, pixels,
+			   &ctx->Unpack, texObj, texImage );
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64TexSubImage2D( GLcontext *ctx,
+				 GLenum target,
+				 GLint level,
+				 GLint xoffset, GLint yoffset,
+				 GLsizei width, GLsizei height,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) mach64AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+/* ================================================================
+ * Device Driver API texture functions
+ */
+
+static void mach64DDTexEnv( GLcontext *ctx, GLenum target,
+			    GLenum pname, const GLfloat *param )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+#if 0
+   struct gl_texture_unit *texUnit;
+   GLubyte c[4];
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_MODE:
+      FLUSH_BATCH( mmesa );
+      mmesa->new_state |= MACH64_NEW_TEXTURE | MACH64_NEW_ALPHA;
+      break;
+
+#if 0
+   case GL_TEXTURE_ENV_COLOR:
+      texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+      CLAMPED_FLOAT_TO_UBYTE( c[0], texUnit->EnvColor[0] );
+      CLAMPED_FLOAT_TO_UBYTE( c[1], texUnit->EnvColor[1] );
+      CLAMPED_FLOAT_TO_UBYTE( c[2], texUnit->EnvColor[2] );
+      CLAMPED_FLOAT_TO_UBYTE( c[3], texUnit->EnvColor[3] );
+      mmesa->env_color = mach64PackColor( 32, c[0], c[1], c[2], c[3] );
+      if ( mmesa->setup.constant_color_c != mmesa->env_color ) {
+	 FLUSH_BATCH( mmesa );
+	 mmesa->setup.constant_color_c = mmesa->env_color;
+
+	 mmesa->new_state |= MACH64_NEW_TEXTURE;
+
+	 /* More complex multitexture/multipass fallbacks for GL_BLEND
+	  * can be done later, but this allows a single pass GL_BLEND
+	  * in some cases (ie. Performer town demo).
+	  */
+	 mmesa->blend_flags &= ~MACH64_BLEND_ENV_COLOR;
+	 if ( mmesa->env_color != 0x00000000 &&
+	      mmesa->env_color != 0xff000000 &&
+	      mmesa->env_color != 0x00ffffff &&
+	      mmesa->env_color != 0xffffffff )) {	
+	    mmesa->blend_flags |= MACH64_BLEND_ENV_COLOR;
+	 }
+      }
+      break;
+#endif
+
+   default:
+      return;
+   }
+}
+
+static void mach64DDTexParameter( GLcontext *ctx, GLenum target,
+				  struct gl_texture_object *tObj,
+				  GLenum pname, const GLfloat *params )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   mach64TexObjPtr t = (mach64TexObjPtr)tObj->DriverData;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   if ( ( target != GL_TEXTURE_2D ) &&
+	( target != GL_TEXTURE_1D ) ) {
+      return;
+   }
+
+   if (!t) {
+      t = mach64AllocTexObj(tObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexParameter");
+         return;
+      }
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexFilter( t, tObj->MinFilter, tObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexWrap( t, tObj->WrapS, tObj->WrapT );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      mach64SetTexBorderColor( t, tObj->BorderColor.f );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+      /* From Radeon/Rage128:
+       * This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.  
+       *
+       * For mach64 we're only concerned with the base level
+       * since that's the only texture we upload.
+       */
+      if ( t->base.bound ) FLUSH_BATCH( mmesa );
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64DDBindTexture( GLcontext *ctx, GLenum target,
+				 struct gl_texture_object *tObj )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLint unit = ctx->Texture.CurrentUnit;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p ) unit=%d\n",
+	       __FUNCTION__, tObj, unit );
+   }
+
+   FLUSH_BATCH( mmesa );
+
+   if ( mmesa->CurrentTexObj[unit] ) {
+      mmesa->CurrentTexObj[unit]->base.bound &= ~(1 << unit);
+      mmesa->CurrentTexObj[unit] = NULL;
+   }
+
+   mmesa->new_state |= MACH64_NEW_TEXTURE;
+}
+
+static void mach64DDDeleteTexture( GLcontext *ctx,
+				   struct gl_texture_object *tObj )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) tObj->DriverData;
+
+   if ( t ) {
+      if ( t->bound && mmesa ) {
+	 FLUSH_BATCH( mmesa );
+
+	 mmesa->new_state |= MACH64_NEW_TEXTURE;
+      }
+
+      driDestroyTextureObject( t );
+
+      /* Free mipmap images and the texture object itself */
+      _mesa_delete_texture_object(ctx, tObj);
+   }
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+mach64NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj;
+   obj = _mesa_new_texture_object(ctx, name, target);
+   mach64AllocTexObj( obj );
+   return obj;
+}
+
+void mach64InitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->TexEnv			= mach64DDTexEnv;
+   functions->ChooseTextureFormat	= mach64ChooseTextureFormat;
+   functions->TexImage1D		= mach64TexImage1D;
+   functions->TexSubImage1D		= mach64TexSubImage1D;
+   functions->TexImage2D		= mach64TexImage2D;
+   functions->TexSubImage2D		= mach64TexSubImage2D;
+   functions->TexParameter		= mach64DDTexParameter;
+   functions->BindTexture		= mach64DDBindTexture;
+   functions->NewTextureObject		= mach64NewTextureObject;
+   functions->DeleteTexture		= mach64DDDeleteTexture;
+   functions->IsTextureResident		= driIsTextureResident;
+
+   functions->UpdateTexturePalette	= NULL;
+
+   driInitTextureFormats();
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_tex.h b/src/mesa/drivers/dri/mach64/mach64_tex.h
new file mode 100644
index 0000000000..8e0b23ed15
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tex.h
@@ -0,0 +1,89 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_TEX_H__
+#define __MACH64_TEX_H__
+
+extern void mach64UpdateTextureState( GLcontext *ctx );
+
+extern void mach64UploadTexImages( mach64ContextPtr mach64ctx,
+				   mach64TexObjPtr t );
+
+extern void mach64UploadMultiTexImages( mach64ContextPtr mach64ctx,
+					mach64TexObjPtr t0, mach64TexObjPtr t1 );
+
+extern void mach64DestroyTexObj( mach64ContextPtr mach64ctx,
+				 mach64TexObjPtr t );
+
+extern void mach64EmitTexStateLocked( mach64ContextPtr mmesa,
+				      mach64TexObjPtr t0,
+				      mach64TexObjPtr t1 );
+
+extern void mach64InitTextureFuncs( struct dd_function_table *functions );
+
+/* ================================================================
+ * Color conversion macros:
+ */
+
+#define MACH64PACKCOLOR332(r, g, b)					\
+   (((r) & 0xe0) | (((g) & 0xe0) >> 3) | (((b) & 0xc0) >> 6))
+
+#define MACH64PACKCOLOR1555(r, g, b, a)					\
+   ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) |	\
+    ((a) ? 0x8000 : 0))
+
+#define MACH64PACKCOLOR565(r, g, b)					\
+   ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define MACH64PACKCOLOR888(r, g, b)					\
+   (((r) << 16) | ((g) << 8) | (b))
+
+#define MACH64PACKCOLOR8888(r, g, b, a)					\
+   (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+#define MACH64PACKCOLOR4444(r, g, b, a)					\
+   ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+static INLINE GLuint mach64PackColor( GLuint cpp,
+                                      GLubyte r, GLubyte g,
+                                      GLubyte b, GLubyte a )
+{
+   switch ( cpp ) {
+   case 2:
+      return MACH64PACKCOLOR565( r, g, b );
+   case 4:
+      return MACH64PACKCOLOR8888( r, g, b, a );
+   default:
+      return 0;
+   }
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_texmem.c b/src/mesa/drivers/dri/mach64/mach64_texmem.c
new file mode 100644
index 0000000000..b09954ce23
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_texmem.c
@@ -0,0 +1,511 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+ *                                                Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Leif Delgass <ldelgass@retinalburn.net>
+ *   Jose Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_tex.h"
+
+
+/* Destroy hardware state associated with texture `t'.
+ */
+void mach64DestroyTexObj( mach64ContextPtr mmesa, mach64TexObjPtr t )
+{
+   unsigned   i;
+
+   /* See if it was the driver's current object.
+    */
+   if ( mmesa != NULL )
+   {
+      for ( i = 0 ; i < mmesa->glCtx->Const.MaxTextureUnits ; i++ )
+      {
+         if ( t == mmesa->CurrentTexObj[ i ] ) {
+            assert( t->base.bound & (1 << i) );
+            mmesa->CurrentTexObj[ i ] = NULL;
+         }
+      }
+   }
+}
+
+/* Upload the texture image associated with texture `t' at level `level'
+ * at the address relative to `start'.
+ */
+static void mach64UploadAGPSubImage( mach64ContextPtr mmesa,
+				     mach64TexObjPtr t, int level,
+				     int x, int y, int width, int height )
+{
+   mach64ScreenRec *mach64Screen = mmesa->mach64Screen;
+   struct gl_texture_image *image;
+   int texelsPerDword = 0;
+   int dwords;
+   GLuint texelBytes;
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( level < 0 ) || ( level > mmesa->glCtx->Const.MaxTextureLevels ) )
+     return;
+
+   image = t->base.tObj->Image[0][level];
+   if ( !image )
+      return;
+
+   texelBytes = _mesa_get_format_bytes(image->TexFormat);
+
+   switch ( texelBytes ) {
+   case 1: texelsPerDword = 4; break;
+   case 2: texelsPerDword = 2; break;
+   case 4: texelsPerDword = 1; break;
+   }
+
+#if 1
+   /* FIXME: The subimage index calcs are wrong... */
+   x = 0;
+   y = 0;
+   width = image->Width;
+   height = image->Height;
+#endif
+
+   dwords = width * height / texelsPerDword;
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_agpTextureBytes += (dwords << 2);
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64UploadSubImage: %d,%d of %d,%d at %d,%d\n",
+	       width, height, image->Width, image->Height, x, y );
+      fprintf( stderr, "            blit ofs: 0x%07x pitch: 0x%x dwords: %d\n",
+	       (GLuint)t->bufAddr, (GLint)width, dwords );
+   }
+
+   assert(image->Data);
+
+   {
+      CARD32 *dst = (CARD32 *)((char *)mach64Screen->agpTextures.map + t->base.memBlock->ofs);
+      const GLubyte *src = (const GLubyte *) image->Data +
+	 (y * image->Width + x) * texelBytes;
+      const GLuint bytes = width * height * texelBytes;
+      memcpy(dst, src, bytes);
+   }
+
+}
+
+/* Upload the texture image associated with texture `t' at level `level'
+ * at the address relative to `start'.
+ */
+static void mach64UploadLocalSubImage( mach64ContextPtr mmesa,
+				  mach64TexObjPtr t, int level,
+				  int x, int y, int width, int height )
+{
+   struct gl_texture_image *image;
+   int texelsPerDword = 0;
+   int imageWidth, imageHeight;
+   int remaining, rows;
+   int format, dwords;
+   const int maxdwords = (MACH64_BUFFER_MAX_DWORDS - (MACH64_HOSTDATA_BLIT_OFFSET / 4));
+   CARD32 pitch, offset;
+   int i;
+   GLuint texelBytes;
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( level < 0 ) || ( level > mmesa->glCtx->Const.MaxTextureLevels ) )
+      return;
+
+   image = t->base.tObj->Image[0][level];
+   if ( !image )
+      return;
+
+   texelBytes = _mesa_get_format_bytes(image->TexFormat);
+
+   switch ( texelBytes ) {
+   case 1: texelsPerDword = 4; break;
+   case 2: texelsPerDword = 2; break;
+   case 4: texelsPerDword = 1; break;
+   }
+
+#if 1
+   /* FIXME: The subimage index calcs are wrong... */
+   x = 0;
+   y = 0;
+   width = image->Width;
+   height = image->Height;
+#endif
+
+   imageWidth  = image->Width;
+   imageHeight = image->Height;
+
+   format = t->textureFormat;
+
+   /* The texel upload routines have a minimum width, so force the size
+    * if needed.
+    */
+   if ( imageWidth < texelsPerDword ) {
+      int factor;
+
+      factor = texelsPerDword / imageWidth;
+      imageWidth = texelsPerDword;
+      imageHeight /= factor;
+      if ( imageHeight == 0 ) {
+	 /* In this case, the texel converter will actually walk a
+	  * texel or two off the end of the image, but normal malloc
+	  * alignment should prevent it from ever causing a fault.
+	  */
+	 imageHeight = 1;
+      }
+   }
+
+   /* We can't upload to a pitch less than 64 texels so we will need to
+    * linearly upload all modified rows for textures smaller than this.
+    * This makes the x/y/width/height different for the blitter and the
+    * texture walker.
+    */
+   if ( imageWidth >= 64 ) {
+      /* The texture walker and the blitter look identical */
+      pitch = imageWidth >> 3;
+   } else {
+      int factor;
+      int y2;
+      int start, end;
+
+      start = (y * imageWidth) & ~63;
+      end = (y + height) * imageWidth;
+
+      if ( end - start < 64 ) {
+	 /* Handle the case where the total number of texels
+	  * uploaded is < 64.
+	  */
+	 x = 0;
+	 y = start / 64;
+	 width = end - start;
+	 height = 1;
+      } else {
+	 /* Upload some number of full 64 texel blit rows */
+	 factor = 64 / imageWidth;
+
+	 y2 = y + height - 1;
+	 y /= factor;
+	 y2 /= factor;
+
+	 x = 0;
+	 width = 64;
+	 height = y2 - y + 1;
+      }
+
+      /* Fixed pitch of 64 */
+      pitch = 8;
+   }
+
+   dwords = width * height / texelsPerDword;
+   offset = t->bufAddr;
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   mmesa->c_textureBytes += (dwords << 2);
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "mach64UploadSubImage: %d,%d of %d,%d at %d,%d\n",
+	       width, height, image->Width, image->Height, x, y );
+      fprintf( stderr, "            blit ofs: 0x%07x pitch: 0x%x dwords: %d\n",
+	       (GLuint)offset, (GLint)width, dwords );
+   }
+
+   /* Subdivide the texture if required (account for the registers added by the drm) */
+   if ( dwords <= maxdwords ) {
+      rows = height;
+   } else {
+      rows = (maxdwords * texelsPerDword) / (2 * width);
+   }
+
+   for ( i = 0, remaining = height ;
+	 remaining > 0 ;
+	 remaining -= rows, y += rows, i++ )
+   {
+       height = MIN2(remaining, rows);
+
+       assert(image->Data);
+
+       {
+          const GLubyte *src = (const GLubyte *) image->Data +
+             (y * image->Width + x) * texelBytes;
+
+          mach64FireBlitLocked( mmesa, (void *)src, offset, pitch, format,
+				x, y, width, height );
+       }
+
+   }
+
+   mmesa->new_state |= MACH64_NEW_CONTEXT;
+   mmesa->dirty |= MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC;
+}
+
+
+/* Upload the texture images associated with texture `t'.  This might
+ * require removing our own and/or other client's texture objects to
+ * make room for these images.
+ */
+void mach64UploadTexImages( mach64ContextPtr mmesa, mach64TexObjPtr t )
+{
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %p )\n",
+	       __FUNCTION__, mmesa->glCtx, t );
+   }
+
+   assert(t);
+   assert(t->base.tObj);
+
+   if ( !t->base.memBlock ) {
+      int heap;
+
+      /* NULL heaps are skipped */
+      heap = driAllocateTexture( mmesa->texture_heaps, MACH64_NR_TEX_HEAPS,
+				 (driTextureObject *) t );
+
+      if ( heap == -1 ) {
+	 fprintf( stderr, "%s: upload texture failure, sz=%d\n", __FUNCTION__,
+		  t->base.totalSize );
+	 exit(-1);
+	 return;
+      }
+
+      t->heap = heap;
+
+      /* Set the base offset of the texture image */
+      assert(t->base.memBlock);
+      t->bufAddr = mmesa->mach64Screen->texOffset[heap] + t->base.memBlock->ofs;
+
+      /* Force loading the new state into the hardware */
+      mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		       MACH64_UPLOAD_TEXTURE);
+   }
+
+   /* Let the world know we've used this memory recently */
+   driUpdateTextureLRU( (driTextureObject *) t );
+
+   /* Upload any images that are new */
+   if ( t->base.dirty_images[0] ) {
+      const GLint j = t->base.tObj->BaseLevel;
+      if (t->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t, j, 0, 0,
+				  t->base.tObj->Image[0][j]->Width,
+				  t->base.tObj->Image[0][j]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t, j, 0, 0,
+				    t->base.tObj->Image[0][j]->Width,
+				    t->base.tObj->Image[0][j]->Height );
+      }
+
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+      t->base.dirty_images[0] = 0;
+   }
+
+   mmesa->dirty |= MACH64_UPLOAD_TEXTURE;
+}
+
+
+/* Allocate memory from the same texture heap `heap' for both textures
+ * `u0' and `u1'.
+ */
+static int mach64AllocateMultiTex( mach64ContextPtr mmesa,
+				   mach64TexObjPtr u0,
+				   mach64TexObjPtr u1,
+				   int heap, GLboolean alloc_u0 )
+{
+   /* Both objects should be bound */
+   assert( u0->base.bound && u1->base.bound );
+
+   if ( alloc_u0 ) {
+      /* Evict u0 from its current heap */
+      if ( u0->base.memBlock ) {
+	 assert( u0->heap != heap );
+	 driSwapOutTextureObject( (driTextureObject *) u0 );
+      }
+
+      /* Try to allocate u0 in the chosen heap */
+      u0->heap = driAllocateTexture( &mmesa->texture_heaps[heap], 1,
+				     (driTextureObject *) u0 );
+
+      if ( u0->heap == -1 ) {
+	 return -1;
+      }
+   }
+
+   /* Evict u1 from its current heap */
+   if ( u1->base.memBlock ) {
+      assert( u1->heap != heap );
+      driSwapOutTextureObject( (driTextureObject *) u1 );
+   }
+
+   /* Try to allocate u1 in the same heap as u0 */
+   u1->heap = driAllocateTexture( &mmesa->texture_heaps[heap], 1,
+				  (driTextureObject *) u1 );
+
+   if ( u1->heap == -1 ) {
+      return -1;
+   }
+
+   /* Bound objects are not evicted */
+   assert( u0->base.memBlock && u1->base.memBlock );
+   assert( u0->heap == u1->heap );
+
+   return heap;
+}
+
+/* The mach64 needs to have both primary and secondary textures in either
+ * local or AGP memory, so we need a "buddy system" to make sure that allocation
+ * succeeds or fails for both textures.
+ */
+void mach64UploadMultiTexImages( mach64ContextPtr mmesa, 
+				 mach64TexObjPtr t0,
+				 mach64TexObjPtr t1 )
+{
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %p %p )\n",
+	       __FUNCTION__, mmesa->glCtx, t0, t1 );
+   }
+
+   assert(t0 && t1);
+   assert(t0->base.tObj && t1->base.tObj);
+
+   if ( !t0->base.memBlock || !t1->base.memBlock || t0->heap != t1->heap ) {
+      mach64TexObjPtr u0 = NULL;
+      mach64TexObjPtr u1 = NULL;
+      unsigned totalSize = t0->base.totalSize + t1->base.totalSize;
+
+      int heap, ret;
+
+      /* Check if one of the textures is already swapped in a heap and the
+       * other texture fits in that heap.
+       */
+      if ( t0->base.memBlock && totalSize <= t0->base.heap->size ) {
+	 u0 = t0;
+	 u1 = t1;
+      } else if ( t1->base.memBlock && totalSize <= t1->base.heap->size ) {
+	 u0 = t1;
+	 u1 = t0;
+      }
+
+      if ( u0 ) {
+	 heap = u0->heap;
+
+	 ret = mach64AllocateMultiTex( mmesa, u0, u1, heap, GL_FALSE );
+      } else {
+	 /* Both textures are swapped out or collocation is impossible */
+	 u0 = t0;
+	 u1 = t1;
+
+	 /* Choose the heap appropriately */
+	 heap = MACH64_CARD_HEAP;
+
+	 if ( totalSize > mmesa->texture_heaps[heap]->size ) {
+	    heap = MACH64_AGP_HEAP;
+	 }
+
+	 ret = mach64AllocateMultiTex( mmesa, u0, u1, heap, GL_TRUE );
+      }
+
+      if ( ret == -1 && heap == MACH64_CARD_HEAP ) {
+	 /* Try AGP if local memory failed */
+	 heap = MACH64_AGP_HEAP;
+
+	 ret = mach64AllocateMultiTex( mmesa, u0, u1, heap, GL_TRUE );
+      }
+
+      if ( ret == -1 ) {
+	 /* FIXME:
+	  * Swap out all textures from the AGP heap and re-run allocation, this
+	  * should succeed in all cases.
+	  */
+	 fprintf( stderr, "%s: upload multi-texture failure, sz0=%d sz1=%d\n",
+		  __FUNCTION__, t0->base.totalSize, t1->base.totalSize );
+	 exit(-1);
+      }
+
+      /* Set the base offset of the texture image */
+      assert(t0->base.memBlock);
+      t0->bufAddr = mmesa->mach64Screen->texOffset[heap] + t0->base.memBlock->ofs;
+      assert(t1->base.memBlock);
+      t1->bufAddr = mmesa->mach64Screen->texOffset[heap] + t1->base.memBlock->ofs;
+
+      /* Force loading the new state into the hardware */
+      mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		       MACH64_UPLOAD_TEXTURE);
+   }
+
+   /* Let the world know we've used this memory recently */
+   driUpdateTextureLRU( (driTextureObject *) t0 );
+   driUpdateTextureLRU( (driTextureObject *) t1 );
+
+   /* Upload any images that are new */
+   if ( t0->base.dirty_images[0] ) {
+      const GLint j0 = t0->base.tObj->BaseLevel;
+      if (t0->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t0, j0, 0, 0,
+				    t0->base.tObj->Image[0][j0]->Width,
+				    t0->base.tObj->Image[0][j0]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t0, j0, 0, 0,
+				    t0->base.tObj->Image[0][j0]->Width,
+				    t0->base.tObj->Image[0][j0]->Height );
+      }
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+      t0->base.dirty_images[0] = 0;
+   }
+   if ( t1->base.dirty_images[0] ) {
+      const GLint j1 = t1->base.tObj->BaseLevel;
+      if (t1->heap == MACH64_AGP_HEAP) {
+	 /* Need to make sure any vertex buffers in the queue complete */
+	 mach64WaitForIdleLocked( mmesa );
+	 mach64UploadAGPSubImage( mmesa, t1, j1, 0, 0,
+			       t1->base.tObj->Image[0][j1]->Width,
+			       t1->base.tObj->Image[0][j1]->Height );
+      } else {
+	 mach64UploadLocalSubImage( mmesa, t1, j1, 0, 0,
+			       t1->base.tObj->Image[0][j1]->Width,
+			       t1->base.tObj->Image[0][j1]->Height );
+      }
+      
+      mmesa->setup.tex_cntl |= MACH64_TEX_CACHE_FLUSH;
+      t1->base.dirty_images[0] = 0;
+   }
+
+   mmesa->dirty |= MACH64_UPLOAD_TEXTURE;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_texstate.c b/src/mesa/drivers/dri/mach64/mach64_texstate.c
new file mode 100644
index 0000000000..adf774ec19
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_texstate.c
@@ -0,0 +1,522 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+
+#include "mach64_context.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+#include "mach64_tex.h"
+
+static void mach64SetTexImages( mach64ContextPtr mmesa,
+                              const struct gl_texture_object *tObj )
+{
+   mach64TexObjPtr t = (mach64TexObjPtr) tObj->DriverData;
+   struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   int totalSize;
+
+   assert(t);
+   assert(baseImage);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API )
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, tObj );
+
+   switch (baseImage->TexFormat) {
+   case MESA_FORMAT_ARGB8888:
+      t->textureFormat = MACH64_DATATYPE_ARGB8888;
+      break;
+   case MESA_FORMAT_ARGB4444:
+      t->textureFormat = MACH64_DATATYPE_ARGB4444;
+      break;
+   case MESA_FORMAT_RGB565:
+      t->textureFormat = MACH64_DATATYPE_RGB565;
+      break;
+   case MESA_FORMAT_ARGB1555:
+      t->textureFormat = MACH64_DATATYPE_ARGB1555;
+      break;
+   case MESA_FORMAT_RGB332:
+      t->textureFormat = MACH64_DATATYPE_RGB332;
+      break;
+   case MESA_FORMAT_RGB888:
+      t->textureFormat = MACH64_DATATYPE_RGB8;
+      break;
+   case MESA_FORMAT_CI8:
+      t->textureFormat = MACH64_DATATYPE_CI8;
+      break;
+   case MESA_FORMAT_YCBCR:
+      t->textureFormat = MACH64_DATATYPE_YVYU422;
+      break;
+   case MESA_FORMAT_YCBCR_REV:
+      t->textureFormat = MACH64_DATATYPE_VYUY422;
+      break;
+   default:
+      _mesa_problem(mmesa->glCtx, "Bad texture format in %s", __FUNCTION__);
+   };
+
+   totalSize = ( baseImage->Height *
+		 baseImage->Width *
+		 _mesa_get_format_bytes(baseImage->TexFormat) );
+
+   totalSize = (totalSize + 31) & ~31;
+
+   t->base.totalSize = totalSize;
+   t->base.firstLevel = tObj->BaseLevel;
+   t->base.lastLevel = tObj->BaseLevel;
+
+   /* Set the texture format */
+   if ( ( baseImage->_BaseFormat == GL_RGBA ) ||
+	( baseImage->_BaseFormat == GL_ALPHA ) ||
+	( baseImage->_BaseFormat == GL_LUMINANCE_ALPHA ) ) {
+      t->hasAlpha = 1;
+   } else {
+      t->hasAlpha = 0;
+   }
+
+   t->widthLog2 = baseImage->WidthLog2;
+   t->heightLog2 = baseImage->HeightLog2;
+   t->maxLog2 = baseImage->MaxLog2;
+}
+
+static void mach64UpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLint source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   const GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %d )\n",
+	       __FUNCTION__, ctx, unit );
+   }
+
+/*                 REPLACE  MODULATE   DECAL              GL_BLEND
+ *
+ * ALPHA           C = Cf   C = Cf     undef              C = Cf
+ *                 A = At   A = AfAt                      A = AfAt
+ *
+ * LUMINANCE       C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt 
+ *                 A = Af   A = Af                        A = Af
+ *
+ * LUMINANCE_ALPHA C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt                      A = AfAt
+ *
+ * INTENSITY       C = Ct   C = CfCt   undef              C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt                      A = Af(1-At)+AcAt
+ *
+ * RGB             C = Ct   C = CfCt   C = Ct             C = Cf(1-Ct)+CcCt
+ *                 A = Af   A = Af     A = Af             A = Af
+ *
+ * RGBA            C = Ct   C = CfCt   C = Cf(1-At)+CtAt  C = Cf(1-Ct)+CcCt
+ *                 A = At   A = AfAt   A = Af             A = AfAt 
+ */
+
+
+   if ( unit == 0 ) {
+      s &= ~MACH64_TEX_LIGHT_FCN_MASK;
+
+      /* Set the texture environment state 
+       * Need to verify these are working correctly, but the
+       * texenv Mesa demo seems to work.
+       */
+      switch ( texUnit->EnvMode ) {
+      case GL_REPLACE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    /* Not compliant - can't get At */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_REPLACE;
+	 }
+	 break;
+      case GL_MODULATE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_RGB:
+	 case GL_LUMINANCE:
+	    /* These should be compliant */
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_RGBA:
+	    /* Should fallback when blending enabled for complete compliance */
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 }
+	 break;
+      case GL_DECAL:
+	 switch ( format ) {
+	 case GL_RGBA: 
+	    s |= MACH64_TEX_LIGHT_FCN_ALPHA_DECAL;
+	    break;
+	 case GL_RGB:
+	    s |= MACH64_TEX_LIGHT_FCN_REPLACE;
+	    break;
+	 case GL_ALPHA:
+	 case GL_LUMINANCE_ALPHA:
+	    /* undefined - disable texturing, pass fragment unmodified  */
+	    /* Also, pass fragment alpha instead of texture alpha */
+	    s &= ~MACH64_TEX_MAP_AEN;
+	    s |= MACH64_TEXTURE_DISABLE;
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 case GL_LUMINANCE:
+	 case GL_INTENSITY:
+	    /* undefined - disable texturing, pass fragment unmodified  */
+	    s |= MACH64_TEXTURE_DISABLE;
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	    break;
+	 default:
+	    s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 }
+	 break;
+      case GL_BLEND:
+	 /* GL_BLEND not supported by RagePRO, use software */
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 break;
+      case GL_ADD:
+      case GL_COMBINE:
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+	 break;
+      default:
+	 s |= MACH64_TEX_LIGHT_FCN_MODULATE;
+      }
+
+      if ( mmesa->setup.scale_3d_cntl != s ) {
+	 mmesa->setup.scale_3d_cntl = s;
+	 mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+      }
+
+   } else {
+      /* blend = 0, modulate = 1 - initialize to blend */
+      mmesa->setup.tex_cntl &= ~MACH64_COMP_COMBINE_MODULATE;
+      /* Set the texture composite function for multitexturing*/
+      switch ( texUnit->EnvMode ) {
+      case GL_BLEND:
+	 /* GL_BLEND not supported by RagePRO, use software */
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      case GL_MODULATE:
+	 /* Should fallback when blending enabled for complete compliance */
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      case GL_REPLACE:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	    break;
+	 default: /* not supported by RagePRO */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 }
+	 break;
+      case GL_DECAL:
+	 switch ( format ) {
+	 case GL_ALPHA:
+	 case GL_LUMINANCE:
+	 case GL_LUMINANCE_ALPHA:
+	 case GL_INTENSITY:
+	    /* undefined, disable compositing and pass fragment unmodified */
+	    mmesa->setup.tex_cntl &= ~MACH64_TEXTURE_COMPOSITE;
+	    break;
+	 default: /* not supported by RagePRO */
+	    FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	    mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 }
+	 break;
+      case GL_ADD:
+      case GL_COMBINE:
+	 FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+	 break;
+      default:
+	 mmesa->setup.tex_cntl |= MACH64_COMP_COMBINE_MODULATE;
+      }
+   }
+}
+
+
+static void mach64UpdateTextureUnit( GLcontext *ctx, int unit )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = ctx->Texture.Unit[source]._Current;
+   mach64TexObjPtr t = tObj->DriverData;
+   GLuint d = mmesa->setup.dp_pix_width;
+   GLuint s = mmesa->setup.scale_3d_cntl;
+
+   assert(unit == 0 || unit == 1);  /* only two tex units */
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %d ) enabled=0x%x 0x%x\n",
+	       __FUNCTION__, ctx, unit, ctx->Texture.Unit[0]._ReallyEnabled,
+	       ctx->Texture.Unit[1]._ReallyEnabled);
+   }
+
+   if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+
+      assert(t);  /* should have driver tex data by now */
+
+      /* Fallback if there's a texture border */
+      if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+         FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+         return;
+      }
+
+      /* Upload teximages */
+      if (t->base.dirty_images[0]) {
+         mach64SetTexImages( mmesa, tObj );
+	 mmesa->dirty |= (MACH64_UPLOAD_TEX0IMAGE << unit);
+      }
+
+      /* Bind to the given texture unit */
+      mmesa->CurrentTexObj[unit] = t;
+      t->base.bound |= (1 << unit);
+
+      if ( t->base.memBlock )
+         driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+
+      /* register setup */
+      if ( unit == 0 ) {
+         d &= ~MACH64_SCALE_PIX_WIDTH_MASK;
+         d |= (t->textureFormat << 28);
+   
+         s &= ~(MACH64_TEXTURE_DISABLE |
+		MACH64_TEX_CACHE_SPLIT |
+		MACH64_TEX_BLEND_FCN_MASK |
+		MACH64_TEX_MAP_AEN);
+   
+         if ( mmesa->multitex ) {
+	    s |= MACH64_TEX_BLEND_FCN_TRILINEAR | MACH64_TEX_CACHE_SPLIT;
+         } else if ( t->BilinearMin ) {
+	    s |= MACH64_TEX_BLEND_FCN_LINEAR;
+         } else {
+	    s |= MACH64_TEX_BLEND_FCN_NEAREST;
+         }
+         if ( t->BilinearMag ) {
+	    s |=  MACH64_BILINEAR_TEX_EN;
+         } else {
+	    s &= ~MACH64_BILINEAR_TEX_EN;
+         }
+   
+         if ( t->hasAlpha ) {
+	    s |= MACH64_TEX_MAP_AEN;
+         }
+   
+         mmesa->setup.tex_cntl &= ~(MACH64_TEXTURE_CLAMP_S |
+				    MACH64_TEXTURE_CLAMP_T |
+				    MACH64_SECONDARY_STW);
+   
+         if ( t->ClampS ) {
+	    mmesa->setup.tex_cntl |= MACH64_TEXTURE_CLAMP_S;
+         }
+         if ( t->ClampT ) {
+	    mmesa->setup.tex_cntl |= MACH64_TEXTURE_CLAMP_T;
+         }
+   
+         mmesa->setup.tex_size_pitch |= ((t->widthLog2  << 0) |
+					 (t->maxLog2    << 4) |
+					 (t->heightLog2 << 8));
+      } else {
+         
+         /* Enable texture mapping mode */
+         s &= ~MACH64_TEXTURE_DISABLE;
+   
+         d &= ~MACH64_COMPOSITE_PIX_WIDTH_MASK;
+         d |= (t->textureFormat << 4);
+   
+         mmesa->setup.tex_cntl &= ~(MACH64_COMP_ALPHA |
+				    MACH64_SEC_TEX_CLAMP_S |
+				    MACH64_SEC_TEX_CLAMP_T);
+         mmesa->setup.tex_cntl |= (MACH64_TEXTURE_COMPOSITE |
+				   MACH64_SECONDARY_STW);
+   
+         if ( t->BilinearMin ) {
+	    mmesa->setup.tex_cntl |= MACH64_COMP_BLEND_BILINEAR;
+         } else {
+	    mmesa->setup.tex_cntl &= ~MACH64_COMP_BLEND_BILINEAR;
+         }
+         if ( t->BilinearMag ) {
+	    mmesa->setup.tex_cntl |=  MACH64_COMP_FILTER_BILINEAR;
+         } else {
+	    mmesa->setup.tex_cntl &= ~MACH64_COMP_FILTER_BILINEAR;
+         }
+         
+         if ( t->hasAlpha ) {
+	    mmesa->setup.tex_cntl |= MACH64_COMP_ALPHA;
+         }
+         if ( t->ClampS ) {
+	    mmesa->setup.tex_cntl |= MACH64_SEC_TEX_CLAMP_S;
+         }
+         if ( t->ClampT ) {
+	    mmesa->setup.tex_cntl |= MACH64_SEC_TEX_CLAMP_T;
+         }
+   
+         mmesa->setup.tex_size_pitch |= ((t->widthLog2  << 16) |
+					 (t->maxLog2    << 20) |
+					 (t->heightLog2 << 24));
+      }
+   
+      if ( mmesa->setup.scale_3d_cntl != s ) {
+         mmesa->setup.scale_3d_cntl = s;
+         mmesa->dirty |= MACH64_UPLOAD_SCALE_3D_CNTL;
+      }
+   
+      if ( mmesa->setup.dp_pix_width != d ) {
+         mmesa->setup.dp_pix_width = d;
+         mmesa->dirty |= MACH64_UPLOAD_DP_PIX_WIDTH;
+      }  
+   }
+   else if (texUnit->_ReallyEnabled) {
+      /* 3D or cube map texture enabled - fallback */
+      FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_TRUE );
+   }
+   else {
+      /* texture unit disabled */
+   }
+}
+
+
+/* Update the hardware texture state */
+void mach64UpdateTextureState( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p ) en=0x%x 0x%x\n",
+	       __FUNCTION__, ctx, ctx->Texture.Unit[0]._ReallyEnabled,
+	       ctx->Texture.Unit[1]._ReallyEnabled);
+   }
+
+   /* Clear any texturing fallbacks */
+   FALLBACK( mmesa, MACH64_FALLBACK_TEXTURE, GL_FALSE );
+
+   /* Unbind any currently bound textures */
+   if ( mmesa->CurrentTexObj[0] ) mmesa->CurrentTexObj[0]->base.bound = 0;
+   if ( mmesa->CurrentTexObj[1] ) mmesa->CurrentTexObj[1]->base.bound = 0;
+   mmesa->CurrentTexObj[0] = NULL;
+   mmesa->CurrentTexObj[1] = NULL;
+
+   /* Disable all texturing until it is known to be good */
+   mmesa->setup.scale_3d_cntl  |=  MACH64_TEXTURE_DISABLE;
+   mmesa->setup.scale_3d_cntl  &= ~MACH64_TEX_MAP_AEN;
+   mmesa->setup.tex_cntl       &= ~MACH64_TEXTURE_COMPOSITE;
+
+   mmesa->setup.tex_size_pitch = 0x00000000;
+
+   mmesa->tmu_source[0] = 0;
+   mmesa->tmu_source[1] = 1;
+   mmesa->multitex = 0;
+
+   if (ctx->Texture._EnabledUnits & 0x2) {
+       /* unit 1 enabled */
+       if (ctx->Texture._EnabledUnits & 0x1) {
+	  /* units 0 and 1 enabled */
+	  mmesa->multitex = 1;
+	  mach64UpdateTextureUnit( ctx, 0 );
+	  mach64UpdateTextureEnv( ctx, 0 );
+	  mach64UpdateTextureUnit( ctx, 1 );
+	  mach64UpdateTextureEnv( ctx, 1 );
+       } else {
+	  mmesa->tmu_source[0] = 1;
+	  mmesa->tmu_source[1] = 0;
+	  mach64UpdateTextureUnit( ctx, 0 );
+	  mach64UpdateTextureEnv( ctx, 0 );
+       }
+   } else if (ctx->Texture._EnabledUnits & 0x1) {
+      /* only unit 0 enabled */ 
+      mach64UpdateTextureUnit( ctx, 0 );
+      mach64UpdateTextureEnv( ctx, 0 );
+   }
+
+   mmesa->dirty |= (MACH64_UPLOAD_SCALE_3D_CNTL |
+		    MACH64_UPLOAD_TEXTURE);
+}
+
+
+/* Due to the way we must program texture state into the Rage Pro,
+ * we must leave these calculations to the absolute last minute.
+ */
+void mach64EmitTexStateLocked( mach64ContextPtr mmesa,
+			       mach64TexObjPtr t0,
+			       mach64TexObjPtr t1 )
+{
+   drm_mach64_sarea_t *sarea = mmesa->sarea;
+   drm_mach64_context_regs_t *regs = &(mmesa->setup);
+
+   /* for multitex, both textures must be local or AGP */
+   if ( t0 && t1 )
+      assert(t0->heap == t1->heap);
+
+   if ( t0 ) {
+      if (t0->heap == MACH64_CARD_HEAP) {
+#if ENABLE_PERF_BOXES
+	 mmesa->c_texsrc_card++;
+#endif
+	 mmesa->setup.tex_cntl &= ~MACH64_TEX_SRC_AGP;
+      } else {
+#if ENABLE_PERF_BOXES
+	 mmesa->c_texsrc_agp++;
+#endif
+	 mmesa->setup.tex_cntl |= MACH64_TEX_SRC_AGP;
+      }
+      mmesa->setup.tex_offset = t0->bufAddr;
+   }
+
+   if ( t1 ) {
+      mmesa->setup.secondary_tex_off = t1->bufAddr;
+   }
+
+   memcpy( &sarea->context_state.tex_size_pitch, &regs->tex_size_pitch,
+	   MACH64_NR_TEXTURE_REGS * sizeof(GLuint) );
+}
+
diff --git a/src/mesa/drivers/dri/mach64/mach64_tris.c b/src/mesa/drivers/dri/mach64/mach64_tris.c
new file mode 100644
index 0000000000..a81d21afff
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tris.c
@@ -0,0 +1,1927 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/macros.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "mach64_tris.h"
+#include "mach64_state.h"
+#include "mach64_context.h"
+#include "mach64_vb.h"
+#include "mach64_ioctl.h"
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   MACH64_PRIM_POINTS,
+   MACH64_PRIM_LINES,
+   MACH64_PRIM_LINE_LOOP,
+   MACH64_PRIM_LINE_STRIP,
+   MACH64_PRIM_TRIANGLES,
+   MACH64_PRIM_TRIANGLE_STRIP,
+   MACH64_PRIM_TRIANGLE_FAN,
+   MACH64_PRIM_QUADS,
+   MACH64_PRIM_QUAD_STRIP,
+   MACH64_PRIM_POLYGON,
+};
+
+static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim );
+
+
+/* FIXME: Remove this when native template is finished. */
+#define MACH64_PRINT_BUFFER 0
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#if defined(USE_X86_ASM)
+#define DO_COPY_VERTEX( vb, vertsize, v, n, m )					\
+do {										\
+   register const CARD32 *__p __asm__( "esi" ) = (CARD32 *)v + 10 - vertsize;	\
+   register int __s __asm__( "ecx" ) = vertsize;				\
+   if ( vertsize > 7 ) {							\
+      *vb++ = (2 << 16) | ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S );		\
+      __asm__ __volatile__( "movsl ; movsl ; movsl"				\
+			    : "=D" (vb), "=S" (__p)				\
+			    : "0" (vb), "1" (__p) );				\
+      __s -= 3;									\
+   }										\
+   *vb++ = ((__s - 1 + m) << 16) |						\
+   	   (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1) );			\
+   __asm__ __volatile__( "rep ; movsl"						\
+			 : "=%c" (__s), "=D" (vb), "=S" (__p)			\
+			 : "0" (__s), "1" (vb), "2" (__p) );			\
+} while (0)
+#else
+#define DO_COPY_VERTEX( vb, vertsize, v, n, m )				\
+do {									\
+   CARD32 *__p = (CARD32 *)v + 10 - vertsize;				\
+   int __s = vertsize;							\
+   if ( vertsize > 7 ) {						\
+      LE32_OUT( vb++, (2 << 16) |					\
+	    	      ADRINDEX( MACH64_VERTEX_##n##_SECONDARY_S ) );	\
+      *vb++ = *__p++;							\
+      *vb++ = *__p++;							\
+      *vb++ = *__p++;							\
+      __s -= 3;								\
+   }									\
+   LE32_OUT( vb++, ((__s - 1 + m) << 16) |				\
+	           (ADRINDEX( MACH64_VERTEX_##n##_X_Y ) - (__s - 1)) );	\
+   while ( __s-- ) {							\
+      *vb++ = *__p++;							\
+   }									\
+} while (0)
+#endif
+
+#define COPY_VERTEX( vb, vertsize, v, n )	DO_COPY_VERTEX( vb, vertsize, v, n, 0 )
+#define COPY_VERTEX_OOA( vb, vertsize, v, n )	DO_COPY_VERTEX( vb, vertsize, v, n, 1 )
+
+
+static INLINE void mach64_draw_quad( mach64ContextPtr mmesa,
+				       mach64VertexPtr v0,
+				       mach64VertexPtr v1,
+				       mach64VertexPtr v2,
+				       mach64VertexPtr v3 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   GLfloat ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+      fprintf(stderr,"Vertex 3:\n");
+      mach64_print_vertex( ctx, v2 );
+      fprintf(stderr,"Vertex 4:\n");
+      mach64_print_vertex( ctx, v3 );
+   }
+   
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v3->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+
+   if ( (mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign )))) ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Quad culled\n");
+      return;
+   }
+   
+   ooa = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v3, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+   
+   ooa = 16.0 / a;
+   
+   COPY_VERTEX_OOA( vb, vertsize, v2, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   assert( vb == vbchk );
+   
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "quad:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb - vbsiz + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#else
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
+      fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
+      fprintf(stderr,"Vertex 4: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v3->v.x, v3->v.y, v3->v.z, v3->v.w, v3->v.u0, v3->v.v0, v3->v.u1, v3->v.v1);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)(v0->v.x * 4);
+   yy[0] = (GLint)(v0->v.y * 4);
+
+   xx[1] = (GLint)(v1->v.x * 4);
+   yy[1] = (GLint)(v1->v.y * 4);
+
+   xx[2] = (GLint)(v3->v.x * 4);
+   yy[2] = (GLint)(v3->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Quad culled\n");
+      return;
+   }
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                            /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                            /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v3->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v3->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v3->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v3->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v3->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v3->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v3->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v3->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)(v2->v.x * 4);
+   yy[0] = (GLint)(v2->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v2->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "quad:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#endif
+}
+
+static INLINE void mach64_draw_triangle( mach64ContextPtr mmesa,
+					   mach64VertexPtr v0,
+					   mach64VertexPtr v1,
+					   mach64VertexPtr v2 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   GLfloat ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 3 + 1;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+      fprintf(stderr,"Vertex 3:\n");
+      mach64_print_vertex( ctx, v2 );
+   }
+   
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+   
+   if ( mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign ))) ) {
+      /* cull triangle */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Triangle culled\n");
+      return;
+   }
+   
+   ooa = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   assert( vb == vbchk );
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "tri:\n");
+      for (i = 0; i < vbsiz; i++)
+	 fprintf(stderr, "  %08lx\n", *(vb - vbsiz + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#else
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 3 + 2);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w, v0->v.u0, v0->v.v0, v0->v.u1, v0->v.v1);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w, v1->v.u0, v1->v.v0, v1->v.u1, v1->v.v1);
+      fprintf(stderr,"Vertex 3: x: %.2f, y: %.2f, z: %.2f, w: %f\n\ts0: %f, t0: %f\n\ts1: %f, t1: %f\n", 
+	      v2->v.x, v2->v.y, v2->v.z, v2->v.w, v2->v.u0, v2->v.v0, v2->v.u1, v2->v.v1);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)(v0->v.x * 4);
+   yy[0] = (GLint)(v0->v.y * 4);
+
+   xx[1] = (GLint)(v1->v.x * 4);
+   yy[1] = (GLint)(v1->v.y * 4);
+
+   xx[2] = (GLint)(v2->v.x * 4);
+   yy[2] = (GLint)(v2->v.y * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull triangle */
+       if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Triangle culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v0->ui[8] ); /* MACH64_VERTEX_1_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[9] ); /* MACH64_VERTEX_1_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v1->ui[8] ); /* MACH64_VERTEX_2_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[9] ); /* MACH64_VERTEX_2_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_SECONDARY_W */
+   }
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v2->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v2->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v2->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v2->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   if (mmesa->multitex) {
+      /* setup for 3 sequential reg writes */
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_SECONDARY_S) );
+      LE32_OUT( &vb[vbidx++], v2->ui[8] ); /* MACH64_VERTEX_3_SECONDARY_S */
+      LE32_OUT( &vb[vbidx++], v2->ui[9] ); /* MACH64_VERTEX_3_SECONDARY_T */
+      LE32_OUT( &vb[vbidx++], v2->ui[3] ); /* MACH64_VERTEX_3_SECONDARY_W */
+   }
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+
+#if MACH64_PRINT_BUFFER
+   {
+      int i;
+      fprintf(stderr, "tri:\n");
+      for (i = 0; i < vbsiz; ++i)
+	 fprintf(stderr, "  %08lx\n", *(vb + i));
+      fprintf(stderr, "\n");
+   }
+#endif
+#endif
+}
+
+static INLINE void mach64_draw_line( mach64ContextPtr mmesa,
+				     mach64VertexPtr v0,
+				     mach64VertexPtr v1 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   /* 2 fractional bits for hardware: */
+   const int width = (int) (2.0 * CLAMP(mmesa->glCtx->Line.Width,
+                                        mmesa->glCtx->Const.MinLineWidth,
+                                        mmesa->glCtx->Const.MaxLineWidth));
+   GLfloat ooa;
+   GLuint *pxy0, *pxy1;
+   GLuint xy0old, xy0, xy1old, xy1;
+   const GLuint xyoffset = 9;
+   GLint x0, y0, x1, y1;
+   GLint dx, dy, ix, iy;
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+      fprintf(stderr,"Vertex 2:\n");
+      mach64_print_vertex( ctx, v1 );
+   }
+  
+   pxy0 = &v0->ui[xyoffset];
+   xy0old = *pxy0;
+   xy0 = LE32_IN( &xy0old );
+   x0 = (GLshort)( xy0 >> 16 );
+   y0 = (GLshort)( xy0 & 0xffff );
+   
+   pxy1 = &v1->ui[xyoffset];
+   xy1old = *pxy1;
+   xy1 = LE32_IN( &xy1old );
+   x1 = (GLshort)( xy1 >> 16 );
+   y1 = (GLshort)( xy1 & 0xffff );
+   
+   if ( (dx = x1 - x0) < 0 ) {
+      dx = -dx;
+   }
+   if ( (dy = y1 - y0) < 0 ) {
+      dy = -dy;
+   }
+   
+   /* adjust vertices depending on line direction */
+   if ( dx > dy ) {
+      ix = 0;
+      iy = width;
+      ooa = 8.0 / ((x1 - x0) * width);
+   } else {
+      ix = width;
+      iy = 0;
+      ooa = 8.0 / ((y0 - y1) * width);
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   LE32_OUT( pxy0, (( x0 - ix ) << 16) | (( y0 - iy ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   LE32_OUT( pxy1, (( x1 - ix ) << 16) | (( y1 - iy ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   LE32_OUT( pxy0, (( x0 + ix ) << 16) | (( y0 + iy ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   ooa = -ooa;
+   
+   LE32_OUT( pxy1, (( x1 + ix ) << 16) | (( y1 + iy ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v1, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   *pxy0 = xy0old;
+   *pxy1 = xy1old;
+#else /* !MACH64_NATIVE_VTXFMT */
+   GLuint vertsize = mmesa->vertex_size;
+   GLint coloridx;
+   float width = 1.0; /* Only support 1 pix lines now */
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+   
+   GLfloat hw, dx, dy, ix, iy;
+   GLfloat x0 = v0->v.x;
+   GLfloat y0 = v0->v.y;
+   GLfloat x1 = v1->v.x;
+   GLfloat y1 = v1->v.y;
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w);
+      fprintf(stderr,"Vertex 2: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v1->v.x, v1->v.y, v1->v.z, v1->v.w);
+   }
+
+   hw = 0.5F * width;
+   if (hw > 0.1F && hw < 0.5F) {
+      hw = 0.5F;
+   }
+
+   /* adjust vertices depending on line direction */
+   dx = v0->v.x - v1->v.x;
+   dy = v0->v.y - v1->v.y;
+   if (dx * dx > dy * dy) {
+      /* X-major line */
+      ix = 0.0F;
+      iy = hw;
+      if (x1 < x0) {
+         x0 += 0.5F;
+         x1 += 0.5F;
+      }
+      y0 -= 0.5F;
+      y1 -= 0.5F;
+   }
+   else {
+      /* Y-major line */
+      ix = hw;
+      iy = 0.0F;
+      if (y1 > y0) {
+         y0 -= 0.5F;
+         y1 -= 0.5F;
+      }
+      x0 += 0.5F;
+      x1 += 0.5F;
+   }
+
+   xx[0] = (GLint)((x0 - ix) * 4);
+   yy[0] = (GLint)((y0 - iy) * 4);
+
+   xx[1] = (GLint)((x1 - ix) * 4);
+   yy[1] = (GLint)((y1 - iy) * 4);
+
+   xx[2] = (GLint)((x0 + ix) * 4);
+   yy[2] = (GLint)((y0 + iy) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull line */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Line culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)((x1 + ix) * 4);
+   yy[0] = (GLint)((y1 + iy) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v1->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v1->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v1->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v1->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v1->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v1->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+#endif
+}
+
+static INLINE void mach64_draw_point( mach64ContextPtr mmesa,
+				      mach64VertexPtr v0 )
+{
+#if MACH64_NATIVE_VTXFMT
+   GLcontext *ctx = mmesa->glCtx;
+   const GLuint vertsize = mmesa->vertex_size;
+   /* 2 fractional bits for hardware: */
+   GLint sz = (GLint) (2.0 * CLAMP(mmesa->glCtx->Point.Size,
+                                   ctx->Const.MinPointSize,
+                                   ctx->Const.MaxPointSize));
+   GLfloat ooa;
+   GLuint *pxy;
+   GLuint xyold, xy;
+   const GLuint xyoffset = 9;
+   GLint x, y;
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * 4 + 2;
+   CARD32 *vb, *vbchk;
+
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1:\n");
+      mach64_print_vertex( ctx, v0 );
+   }
+  
+   if( !sz )
+      sz = 1;	/* round to the nearest supported size */
+      
+   pxy = &v0->ui[xyoffset];
+   xyold = *pxy;
+   xy = LE32_IN( &xyold );
+   x = (GLshort)( xy >> 16 );
+   y = (GLshort)( xy & 0xffff );
+   
+   ooa = 4.0 / (sz * sz);
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   LE32_OUT( pxy, (( x - sz ) << 16) | (( y - sz ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   LE32_OUT( pxy, (( x + sz ) << 16) | (( y - sz ) & 0xffff) );
+   COPY_VERTEX( vb, vertsize, v0, 2 );
+   LE32_OUT( pxy, (( x - sz ) << 16) | (( y + sz ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 3 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   ooa = -ooa;
+   
+   LE32_OUT( pxy, (( x + sz ) << 16) | (( y + sz ) & 0xffff) );
+   COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
+   LE32_OUT( vb++, *(CARD32 *)&ooa );
+
+   *pxy = xyold;
+#else /* !MACH64_NATIVE_VTXFMT */
+   GLuint vertsize = mmesa->vertex_size; 
+   GLint coloridx;
+   float sz = 1.0; /* Only support 1 pix points now */
+   GLfloat ooa;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = 
+	 ((
+	    1 +
+	    (vertsize > 6 ? 2 : 0) +
+	    (vertsize > 4 ? 2 : 0) +
+	    3 +
+	    (mmesa->multitex ? 4 : 0)
+	 ) * 4 + 4);
+   CARD32 *vb;
+   unsigned vbidx = 0;
+   
+   if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS ) {
+      fprintf(stderr, "%s:\n", __FUNCTION__);
+      fprintf(stderr,"Vertex 1: x: %.2f, y: %.2f, z: %.2f, w: %f\n", 
+	      v0->v.x, v0->v.y, v0->v.z, v0->v.w);
+   }
+
+#if MACH64_CLIENT_STATE_EMITS
+   /* Enable for interleaved client-side state emits */
+   LOCK_HARDWARE( mmesa );
+   if ( mmesa->dirty ) {
+      mach64EmitHwStateLocked( mmesa );
+   }
+   if ( mmesa->sarea->dirty ) {
+      mach64UploadHwStateLocked( mmesa );
+   }
+   UNLOCK_HARDWARE( mmesa );
+#endif
+
+   xx[0] = (GLint)((v0->v.x - sz) * 4);
+   yy[0] = (GLint)((v0->v.y - sz) * 4);
+
+   xx[1] = (GLint)((v0->v.x + sz) * 4);
+   yy[1] = (GLint)((v0->v.y - sz) * 4);
+
+   xx[2] = (GLint)((v0->v.x - sz) * 4);
+   yy[2] = (GLint)((v0->v.y + sz) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+
+   if ( ooa * mmesa->backface_sign < 0 ) {
+      /* cull quad */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Point culled\n");
+      return;
+   }
+
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * 4 );
+   
+   ooa = 1.0 / ooa;
+
+   coloridx = (vertsize > 4) ? 4: 3;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_2_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_2_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_2_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_2_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_2_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_2_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_2_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_2_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_2_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[1] << 16) | (yy[1] & 0xffff) ); /* MACH64_VERTEX_2_X_Y */
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_3_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_3_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_3_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_3_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_3_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_3_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_3_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_3_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_3_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[2] << 16) | (yy[2] & 0xffff) ); /* MACH64_VERTEX_3_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   xx[0] = (GLint)((v0->v.x + sz) * 4);
+   yy[0] = (GLint)((v0->v.y + sz) * 4);
+
+   ooa = 0.25 * 0.25 * ((xx[0] - xx[2]) * (yy[1] - yy[2]) -
+			(yy[0] - yy[2]) * (xx[1] - xx[2]));
+   ooa = 1.0 / ooa;
+
+   /* setup for 3,5, or 7 sequential reg writes based on vertex format */
+   switch (vertsize) {
+   case 6:
+      LE32_OUT( &vb[vbidx++], (4 << 16) | ADRINDEX(MACH64_VERTEX_1_W) );
+      break;
+   case 4:
+      LE32_OUT( &vb[vbidx++], (2 << 16) | ADRINDEX(MACH64_VERTEX_1_Z) );
+      break;
+   default: /* vertsize >= 8 */
+      LE32_OUT( &vb[vbidx++], (6 << 16) | ADRINDEX(MACH64_VERTEX_1_S) );
+      break;
+   }
+   if (vertsize > 6) {
+      LE32_OUT( &vb[vbidx++], v0->ui[6] ); /* MACH64_VERTEX_1_S */
+      LE32_OUT( &vb[vbidx++], v0->ui[7] ); /* MACH64_VERTEX_1_T */
+   }
+   if (vertsize > 4) {
+      LE32_OUT( &vb[vbidx++], v0->ui[3] ); /* MACH64_VERTEX_1_W */
+      LE32_OUT( &vb[vbidx++], v0->ui[5] ); /* MACH64_VERTEX_1_SPEC_ARGB */
+   }
+   LE32_OUT( &vb[vbidx++], ((GLint)(v0->v.z) << 15) );         /* MACH64_VERTEX_1_Z */
+   vb[vbidx++] = v0->ui[coloridx];                             /* MACH64_VERTEX_1_ARGB */
+   LE32_OUT( &vb[vbidx++], (xx[0] << 16) | (yy[0] & 0xffff) ); /* MACH64_VERTEX_1_X_Y */
+
+   LE32_OUT( &vb[vbidx++], ADRINDEX(MACH64_ONE_OVER_AREA_UC) );
+   LE32_OUT( &vb[vbidx++], *(GLuint *)&ooa );
+
+   assert(vbsiz == vbidx);
+#endif
+}
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_tri( mmesa, a, b, c );	\
+   else						\
+      mach64_draw_triangle( mmesa, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do {						\
+   if (DO_FALLBACK) {				\
+      mmesa->draw_tri( mmesa, a, b, d );	\
+      mmesa->draw_tri( mmesa, b, c, d );	\
+   } else 					\
+      mach64_draw_quad( mmesa, a, b, c, d );	\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_line( mmesa, v0, v1 );	\
+   else 					\
+      mach64_draw_line( mmesa, v0, v1 );	\
+} while (0)
+
+#define POINT( v0 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_point( mmesa, v0 );		\
+   else 					\
+      mach64_draw_point( mmesa, v0 );		\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define MACH64_OFFSET_BIT	0x01
+#define MACH64_TWOSIDE_BIT	0x02
+#define MACH64_UNFILLED_BIT	0x04
+#define MACH64_FALLBACK_BIT	0x08
+#define MACH64_MAX_TRIFUNC	0x10
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[MACH64_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & MACH64_FALLBACK_BIT)
+#define DO_OFFSET   (IND & MACH64_OFFSET_BIT)
+#define DO_UNFILLED (IND & MACH64_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & MACH64_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX mach64Vertex
+#define TAB rast_tab
+
+#if MACH64_NATIVE_VTXFMT
+
+/* #define DEPTH_SCALE 65536.0 */
+#define DEPTH_SCALE 1
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) & 0xffff) / 4.0)
+#define VERT_Y(_v) ((GLfloat)(GLshort)(LE32_IN( &(_v)->ui[xyoffset] ) >> 16) / 4.0)
+#define VERT_Z(_v) ((GLfloat) LE32_IN( &(_v)->ui[zoffset] ))
+#define INSANE_VERTICES
+#define VERT_SET_Z(_v,val) LE32_OUT( &(_v)->ui[zoffset], (GLuint)(val) )
+#define VERT_Z_ADD(_v,val) LE32_OUT( &(_v)->ui[zoffset], LE32_IN( &(_v)->ui[zoffset] ) + (GLuint)(val) )
+#define AREA_IS_CCW( a ) ((a) < 0)
+#define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int)))
+
+#define MACH64_COLOR( dst, src )                \
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[3], src[3]);				\
+} while (0)
+
+#define MACH64_SPEC( dst, src )			\
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);	\
+} while (0)
+
+#define VERT_SET_RGBA( v, c )    MACH64_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (havespec) MACH64_SPEC( v->ub4[specoffset], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V( v0->ub4[specoffset], v1->ub4[specoffset] )
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+   GLuint color[n] = { 0 };					\
+   GLuint spec[n] = { 0 };					\
+   GLuint vertex_size = mmesa->vertex_size;			\
+   const GLuint xyoffset = 9;					\
+   const GLuint coloroffset = 8;				\
+   const GLuint zoffset = 7;					\
+   const GLuint specoffset = 6;					\
+   GLboolean havespec = vertex_size >= 4 ? 1 : 0;		\
+   (void) color; (void) spec; (void) vertex_size; 		\
+   (void) xyoffset; (void) coloroffset; (void) zoffset;		\
+   (void) specoffset; (void) havespec;
+
+#else
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (mmesa->verts + ((e) * mmesa->vertex_size * sizeof(int)))
+
+#define MACH64_COLOR( dst, src )                \
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);				\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[3], src[3]);				\
+} while (0)
+
+#define MACH64_SPEC( dst, src )			\
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[0], src[2]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[1], src[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst[2], src[0]);	\
+} while (0)
+
+#define VERT_SET_RGBA( v, c )    MACH64_COLOR( v->ub4[coloroffset], c )
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )    if (havespec) MACH64_SPEC( v->ub4[5], c )
+#define VERT_COPY_SPEC( v0, v1 ) if (havespec) COPY_3V(v0->ub4[5], v1->ub4[5])
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[5]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[5] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+   GLuint color[n], spec[n];					\
+   GLuint coloroffset = (mmesa->vertex_size == 4 ? 3 : 4);	\
+   GLboolean havespec = (mmesa->vertex_size == 4 ? 0 : 1);	\
+   (void) color; (void) spec; (void) coloroffset; (void) havespec;
+
+#endif
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (mmesa->hw_primitive != hw_prim[x]) \
+                        mach64RasterPrimitive( ctx, hw_prim[x] )
+#define RENDER_PRIMITIVE mmesa->render_primitive
+#define IND MACH64_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_UNFILLED_BIT|MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MACH64_TWOSIDE_BIT|MACH64_OFFSET_BIT|MACH64_UNFILLED_BIT| \
+	     MACH64_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+mach64_fallback_tri( mach64ContextPtr mmesa,
+		     mach64Vertex *v0,
+		     mach64Vertex *v1,
+		     mach64Vertex *v2 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[3];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   mach64_translate_vertex( ctx, v1, &v[1] );
+   mach64_translate_vertex( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void
+mach64_fallback_line( mach64ContextPtr mmesa,
+		    mach64Vertex *v0,
+		    mach64Vertex *v1 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[2];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   mach64_translate_vertex( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void
+mach64_fallback_point( mach64ContextPtr mmesa,
+		     mach64Vertex *v0 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[1];
+   mach64_translate_vertex( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (mach64Vertex *)(mach64verts + ((x) * vertsize * sizeof(int)))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      mach64_draw_point( mmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   mach64_draw_line( mmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   mach64_draw_triangle( mmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   mach64_draw_quad( mmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);	\
+   mach64RenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);		\
+    const GLuint vertsize = mmesa->vertex_size;                 \
+    const char *mach64verts = (char *)mmesa->verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) mach64_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) mach64_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*                    Render clipped primitives                       */
+/**********************************************************************/
+
+static void mach64RenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				     GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint prim = mmesa->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+
+}
+
+static void mach64RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+
+#if MACH64_NATIVE_VTXFMT
+static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+					 GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   const GLuint vertsize = mmesa->vertex_size;
+   GLint a;
+   union {
+      GLfloat f;
+      CARD32 u;
+   } ooa;
+   GLuint xy;
+   const GLuint xyoffset = 9;
+   GLint xx[3], yy[3]; /* 2 fractional bits for hardware */
+   unsigned vbsiz = (vertsize + (vertsize > 7 ? 2 : 1)) * n + (n-2);
+   CARD32 *vb, *vbchk;
+   GLubyte *mach64verts = (GLubyte *)mmesa->verts;
+   mach64VertexPtr v0, v1, v2;
+   int i;
+   
+   v0 = (mach64VertexPtr)VERT(elts[1]);
+   v1 = (mach64VertexPtr)VERT(elts[2]);
+   v2 = (mach64VertexPtr)VERT(elts[0]);
+      
+   xy = LE32_IN( &v0->ui[xyoffset] );
+   xx[0] = (GLshort)( xy >> 16 );
+   yy[0] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v1->ui[xyoffset] );
+   xx[1] = (GLshort)( xy >> 16 );
+   yy[1] = (GLshort)( xy & 0xffff );
+   
+   xy = LE32_IN( &v2->ui[xyoffset] );
+   xx[2] = (GLshort)( xy >> 16 );
+   yy[2] = (GLshort)( xy & 0xffff );
+	   
+   a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+       (yy[0] - yy[2]) * (xx[1] - xx[2]);
+
+   if ( (mmesa->backface_sign &&
+	((a < 0 && !signbit( mmesa->backface_sign )) || 
+	(a > 0 && signbit( mmesa->backface_sign )))) ) {
+      /* cull polygon */
+      if ( MACH64_DEBUG & DEBUG_VERBOSE_PRIMS )
+	 fprintf(stderr,"Polygon culled\n");
+      return;
+   }
+   
+   ooa.f = 16.0 / a;
+   
+   vb = (CARD32 *)mach64AllocDmaLow( mmesa, vbsiz * sizeof(CARD32) );
+   vbchk = vb + vbsiz;
+
+   COPY_VERTEX( vb, vertsize, v0, 1 );
+   COPY_VERTEX( vb, vertsize, v1, 2 );
+   COPY_VERTEX_OOA( vb, vertsize, v2, 3 );
+   LE32_OUT( vb++, ooa.u );
+
+   i = 3;
+   while (1) {
+      if (i >= n)
+	 break;
+      v0 = (mach64VertexPtr)VERT(elts[i]);
+      i++;
+
+      xy = LE32_IN( &v0->ui[xyoffset] );
+      xx[0] = (GLshort)( xy >> 16 );
+      yy[0] = (GLshort)( xy & 0xffff );
+	      
+      a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+	  (yy[0] - yy[2]) * (xx[1] - xx[2]);
+      ooa.f = 16.0 / a;
+   
+      COPY_VERTEX_OOA( vb, vertsize, v0, 1 );
+      LE32_OUT( vb++, ooa.u );
+      
+      if (i >= n)
+	 break;
+      v1 = (mach64VertexPtr)VERT(elts[i]);
+      i++;
+
+      xy = LE32_IN( &v1->ui[xyoffset] );
+      xx[1] = (GLshort)( xy >> 16 );
+      yy[1] = (GLshort)( xy & 0xffff );
+	      
+      a = (xx[0] - xx[2]) * (yy[1] - yy[2]) -
+	  (yy[0] - yy[2]) * (xx[1] - xx[2]);
+      ooa.f = 16.0 / a;
+   
+      COPY_VERTEX_OOA( vb, vertsize, v1, 2 );
+      LE32_OUT( vb++, ooa.u );
+   }
+
+   assert( vb == vbchk );
+}
+#else
+static void mach64FastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+					 GLuint n )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   const GLuint vertsize = mmesa->vertex_size;
+   GLubyte *mach64verts = (GLubyte *)mmesa->verts;
+   const GLuint *start = (const GLuint *)VERT(elts[0]);
+   int i;
+
+   for (i = 2 ; i < n ; i++) {
+      mach64_draw_triangle( mmesa, 
+			    VERT(elts[i-1]), 
+			    VERT(elts[i]), 
+			    (mach64VertexPtr) start
+			    );
+   }
+}
+#endif /* MACH64_NATIVE_VTXFMT */
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+#define _MACH64_NEW_RENDER_STATE (_DD_NEW_POINT_SMOOTH |	\
+			          _DD_NEW_LINE_SMOOTH |		\
+			          _DD_NEW_LINE_STIPPLE |	\
+			          _DD_NEW_TRI_SMOOTH |		\
+			          _DD_NEW_TRI_STIPPLE |		\
+			          _NEW_POLYGONSTIPPLE |		\
+			          _DD_NEW_TRI_UNFILLED |	\
+			          _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			          _DD_NEW_TRI_OFFSET)		\
+
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_SMOOTH|DD_LINE_STIPPLE)
+#define TRI_FALLBACK (DD_TRI_SMOOTH|DD_TRI_STIPPLE)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+
+
+static void mach64ChooseRenderState(GLcontext *ctx)
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
+      mmesa->draw_point = mach64_draw_point;
+      mmesa->draw_line = mach64_draw_line;
+      mmesa->draw_tri = mach64_draw_triangle;
+
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE) index |= MACH64_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)        index |= MACH64_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)      index |= MACH64_UNFILLED_BIT;
+      }
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)) {
+	 if (flags & POINT_FALLBACK) mmesa->draw_point = mach64_fallback_point;
+	 if (flags & LINE_FALLBACK)  mmesa->draw_line = mach64_fallback_line;
+	 if (flags & TRI_FALLBACK)   mmesa->draw_tri = mach64_fallback_tri;
+	 index |= MACH64_FALLBACK_BIT;
+      }
+   }
+
+   if (index != mmesa->RenderIndex) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = mach64_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = mach64_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+	 tnl->Driver.Render.ClippedPolygon = mach64FastRenderClippedPoly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = mach64RenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = mach64RenderClippedPoly;
+      }
+
+      mmesa->RenderIndex = index;
+   }
+}
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+static void mach64RunPipeline( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   if (mmesa->new_state)
+      mach64DDUpdateHWState( ctx );
+
+   if (!mmesa->Fallback && mmesa->NewGLState) {
+      if (mmesa->NewGLState & _MACH64_NEW_VERTEX_STATE)
+	 mach64ChooseVertexState( ctx );
+
+      if (mmesa->NewGLState & _MACH64_NEW_RENDER_STATE)
+	 mach64ChooseRenderState( ctx );
+
+      mmesa->NewGLState = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+/* This is called when Mesa switches between rendering triangle
+ * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
+ * and lines, points and bitmaps.
+ */
+
+static void mach64RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+
+   mmesa->new_state |= MACH64_NEW_CONTEXT;
+   mmesa->dirty |= MACH64_UPLOAD_CONTEXT;
+
+   if (mmesa->hw_primitive != hwprim) {
+      FLUSH_BATCH( mmesa );
+      mmesa->hw_primitive = hwprim;
+   }
+}
+
+static void mach64RenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint hw = hw_prim[prim];
+
+   mmesa->render_primitive = prim;
+
+   if (prim >= GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+
+   mach64RasterPrimitive( ctx, hw );
+}
+
+
+static void mach64RenderStart( GLcontext *ctx )
+{
+   /* Check for projective texturing.  Make sure all texcoord
+    * pointers point to something.  (fix in mesa?)
+    */
+   mach64CheckTexSizes( ctx );
+}
+
+static void mach64RenderFinish( GLcontext *ctx )
+{
+   if (MACH64_CONTEXT(ctx)->RenderIndex & MACH64_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glReadBuffer",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glLogicOp (mode != GL_COPY)",
+   "GL_SEPARATE_SPECULAR_COLOR",
+   "glBlendEquation (mode != ADD)",
+   "glBlendFunc",
+   "Rasterization disable",
+};
+
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+void mach64Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint oldfallback = mmesa->Fallback;
+
+   if (mode) {
+      mmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 FLUSH_BATCH( mmesa );
+	 _swsetup_Wakeup( ctx );
+	 mmesa->RenderIndex = ~0;
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_FALLBACK) {
+	    fprintf(stderr, "Mach64 begin rasterization fallback: 0x%x %s\n",
+		    bit, getFallbackString(bit));
+	 }
+      }
+   }
+   else {
+      mmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = mach64RenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
+	 tnl->Driver.Render.Finish = mach64RenderFinish;
+	 tnl->Driver.Render.BuildVertices = mach64BuildVertices;
+	 mmesa->NewGLState |= (_MACH64_NEW_RENDER_STATE|
+			       _MACH64_NEW_VERTEX_STATE);
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_FALLBACK) {
+	    fprintf(stderr, "Mach64 end rasterization fallback: 0x%x %s\n",
+		    bit, getFallbackString(bit));
+	 }
+      }
+   }
+}
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void mach64InitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = mach64RunPipeline;
+   tnl->Driver.Render.Start = mach64RenderStart;
+   tnl->Driver.Render.Finish = mach64RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = mach64RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = mach64BuildVertices;
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_tris.h b/src/mesa/drivers/dri/mach64/mach64_tris.h
new file mode 100644
index 0000000000..042df42f5b
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_tris.h
@@ -0,0 +1,43 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_TRIS_H__
+#define __MACH64_TRIS_H__
+
+#include "main/mtypes.h"
+
+extern void mach64InitTriFuncs( GLcontext *ctx );
+
+
+extern void mach64Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( mmesa, bit, mode ) mach64Fallback( mmesa->glCtx, bit, mode )
+
+
+#endif /* __MACH64_TRIS_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_vb.c b/src/mesa/drivers/dri/mach64/mach64_vb.c
new file mode 100644
index 0000000000..046aff28a8
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_vb.c
@@ -0,0 +1,641 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+
+#include "mach64_context.h"
+#include "mach64_vb.h"
+#include "mach64_ioctl.h"
+#include "mach64_tris.h"
+
+
+#define MACH64_TEX1_BIT       0x1
+#define MACH64_TEX0_BIT       0x2
+#define MACH64_RGBA_BIT       0x4
+#define MACH64_SPEC_BIT       0x8
+#define MACH64_FOG_BIT        0x10
+#define MACH64_XYZW_BIT       0x20
+#define MACH64_PTEX_BIT       0x40
+#define MACH64_MAX_SETUP      0x80
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   tnl_interp_func		interp;
+   tnl_copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_format;
+} setup_tab[MACH64_MAX_SETUP];
+
+#define TINY_VERTEX_FORMAT      1
+#define NOTEX_VERTEX_FORMAT     2
+#define TEX0_VERTEX_FORMAT      3
+#define TEX1_VERTEX_FORMAT      4
+#define PROJ_TEX1_VERTEX_FORMAT 0
+#define TEX2_VERTEX_FORMAT      0
+#define TEX3_VERTEX_FORMAT      0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & MACH64_XYZW_BIT)
+#define DO_RGBA (IND & MACH64_RGBA_BIT)
+#define DO_SPEC (IND & MACH64_SPEC_BIT)
+#define DO_FOG  (IND & MACH64_FOG_BIT)
+#define DO_TEX0 (IND & MACH64_TEX0_BIT)
+#define DO_TEX1 (IND & MACH64_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & MACH64_PTEX_BIT)
+
+#define VERTEX mach64Vertex
+#define LOCALVARS mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+#define GET_VIEWPORT_MAT() mmesa->hw_viewport
+#define GET_TEXSOURCE(n)  mmesa->tmu_source[n]
+#define GET_VERTEX_FORMAT() mmesa->vertex_format
+#define GET_VERTEX_STORE() mmesa->verts
+#define GET_VERTEX_SIZE() mmesa->vertex_size * sizeof(GLuint)
+
+#define HAVE_HW_VIEWPORT    0
+#define HAVE_HW_DIVIDE      0
+#define HAVE_RGBA_COLOR     0
+#define HAVE_TINY_VERTICES  1
+#define HAVE_NOTEX_VERTICES 1
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  0
+
+#define UNVIEWPORT_VARS						\
+   const GLfloat dx = - (GLfloat)mmesa->drawX - SUBPIXEL_X;	\
+   const GLfloat dy = (mmesa->driDrawable->h +			\
+		       (GLfloat)mmesa->drawY  + SUBPIXEL_Y);	\
+   const GLfloat sz = 1.0 / mmesa->depth_scale
+
+#if MACH64_NATIVE_VTXFMT
+   
+#define UNVIEWPORT_X(x)    ((GLfloat)(x) / 4.0)  +  dx
+#define UNVIEWPORT_Y(y)  - ((GLfloat)(y) / 4.0)  +  dy
+#define UNVIEWPORT_Z(z)    (GLfloat)((z) >> 15)  *  sz
+
+#else
+
+#define UNVIEWPORT_X(x)    x  +  dx;
+#define UNVIEWPORT_Y(y)  - y  +  dy;
+#define UNVIEWPORT_Z(z)    z  *  sz;
+
+#endif
+
+#define PTEX_FALLBACK() FALLBACK(MACH64_CONTEXT(ctx), MACH64_FALLBACK_TEXTURE, 1)
+
+#define IMPORT_FLOAT_COLORS mach64_import_float_colors
+#define IMPORT_FLOAT_SPEC_COLORS mach64_import_float_spec_colors
+
+#define INTERP_VERTEX setup_tab[mmesa->SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[mmesa->SetupIndex].copy_pv
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#if MACH64_NATIVE_VTXFMT
+
+#define TAG(x) mach64_##x
+#include "mach64_native_vb.c"
+
+#else
+
+#define TAG(x) mach64_##x
+#include "tnl_dd/t_dd_vb.c"
+
+#endif
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+
+#if MACH64_NATIVE_VTXFMT
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT)
+#define TAG(x) x##_f
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT)
+#define TAG(x) x##_g
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_gf
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "mach64_native_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "mach64_native_vbtmp.h"
+
+#else
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|\
+             MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_XYZW_BIT|MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|\
+             MACH64_TEX0_BIT|MACH64_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT)
+#define TAG(x) x##_f
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT)
+#define TAG(x) x##_g
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT)
+#define TAG(x) x##_gf
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_TEX0_BIT|MACH64_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "mach64_vbtmp.h"
+
+#define IND (MACH64_RGBA_BIT|MACH64_FOG_BIT|MACH64_SPEC_BIT|MACH64_TEX0_BIT|\
+             MACH64_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "mach64_vbtmp.h"
+
+#endif
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgs();
+   init_wgt0();
+   init_wgt0t1();
+   init_wgpt0();
+   init_wgst0();
+   init_wgst0t1();
+   init_wgspt0();
+   init_wgf();
+   init_wgfs();
+   init_wgft0();
+   init_wgft0t1();
+   init_wgfpt0();
+   init_wgfst0();
+   init_wgfst0t1();
+   init_wgfspt0();
+   init_t0();
+   init_t0t1();
+   init_f();
+   init_ft0();
+   init_ft0t1();
+   init_g();
+   init_gs();
+   init_gt0();
+   init_gt0t1();
+   init_gst0();
+   init_gst0t1();
+   init_gf();
+   init_gfs();
+   init_gft0();
+   init_gft0t1();
+   init_gfst0();
+   init_gfst0t1();
+}
+
+
+
+void mach64PrintSetupFlags( char *msg, GLuint flags )
+{
+   fprintf( stderr, "%s: %d %s%s%s%s%s%s%s\n",
+	    msg,
+	    (int)flags,
+	    (flags & MACH64_XYZW_BIT)	? " xyzw," : "",
+	    (flags & MACH64_RGBA_BIT)	? " rgba," : "",
+	    (flags & MACH64_SPEC_BIT)	? " spec," : "",
+	    (flags & MACH64_FOG_BIT)	? " fog," : "",
+	    (flags & MACH64_TEX0_BIT)	? " tex-0," : "",
+	    (flags & MACH64_TEX1_BIT)	? " tex-1," : "",
+	    (flags & MACH64_PTEX_BIT)	? " ptex," : "");
+}
+
+
+
+
+void mach64CheckTexSizes( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+
+   if (!setup_tab[mmesa->SetupIndex].check_tex_sizes(ctx)) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+      /* Invalidate stored verts
+       */
+      mmesa->SetupNewInputs = ~0;
+      mmesa->SetupIndex |= MACH64_PTEX_BIT;
+
+      if (!mmesa->Fallback &&
+	  !(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[mmesa->SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[mmesa->SetupIndex].copy_pv;
+      }
+   }
+}
+
+void mach64BuildVertices( GLcontext *ctx,
+			GLuint start,
+			GLuint count,
+			GLuint newinputs )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   GLuint stride = mmesa->vertex_size * sizeof(int);
+   GLubyte *v = ((GLubyte *)mmesa->verts + (start * stride));
+
+   newinputs |= mmesa->SetupNewInputs;
+   mmesa->SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   if (newinputs & VERT_BIT_POS) {
+      setup_tab[mmesa->SetupIndex].emit( ctx, start, count, v, stride );
+   } else {
+      GLuint ind = 0;
+
+      if (newinputs & VERT_BIT_COLOR0)
+	 ind |= MACH64_RGBA_BIT;
+
+      if (newinputs & VERT_BIT_COLOR1)
+	 ind |= MACH64_SPEC_BIT;
+
+      if (newinputs & VERT_BIT_TEX0)
+	 ind |= MACH64_TEX0_BIT;
+
+      if (newinputs & VERT_BIT_TEX1)
+	 ind |= MACH64_TEX1_BIT;
+
+      if (newinputs & VERT_BIT_FOG)
+	 ind |= MACH64_FOG_BIT;
+
+      if (mmesa->SetupIndex & MACH64_PTEX_BIT)
+	 ind = ~0;
+
+      ind &= mmesa->SetupIndex;
+
+      if (ind) {
+	 setup_tab[ind].emit( ctx, start, count, v, stride );
+      }
+   }
+}
+
+void mach64ChooseVertexState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mach64ContextPtr mmesa = MACH64_CONTEXT( ctx );
+   GLuint ind = MACH64_XYZW_BIT|MACH64_RGBA_BIT;
+   
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+      ind |= MACH64_SPEC_BIT;
+
+   if (ctx->Fog.Enabled)
+      ind |= MACH64_FOG_BIT;
+
+   if (ctx->Texture._EnabledUnits) {
+      ind |= MACH64_TEX0_BIT;
+      if (ctx->Texture.Unit[0]._ReallyEnabled &&
+	  ctx->Texture.Unit[1]._ReallyEnabled) {
+	 ind |= MACH64_TEX1_BIT;
+      }
+   }
+
+   mmesa->SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = mach64_interp_extras;
+      tnl->Driver.Render.CopyPV = mach64_copy_pv_extras;
+   } else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+#if 0
+   if (MACH64_DEBUG & DEBUG_VERBOSE_MSG) {
+      mach64PrintSetupFlags( __FUNCTION__, ind );
+  }
+#endif
+
+   if (setup_tab[ind].vertex_format != mmesa->vertex_format) {
+      FLUSH_BATCH(mmesa);
+      mmesa->vertex_format = setup_tab[ind].vertex_format;
+      mmesa->vertex_size = setup_tab[ind].vertex_size;
+   }
+}
+
+
+#if 0
+void mach64_emit_contiguous_verts( GLcontext *ctx,
+				 GLuint start,
+				 GLuint count )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint vertex_size = mmesa->vertex_size * 4;
+   GLuint *dest = mach64AllocDmaLow( mmesa, (count-start) * vertex_size);
+   setup_tab[mmesa->SetupIndex].emit( ctx, start, count, dest, vertex_size );
+}
+#endif
+
+
+void mach64InitVB( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+
+   mmesa->verts = (GLubyte *)_mesa_align_malloc(size * 4 * 16, 32);
+
+   {
+      static int firsttime = 1;
+      if (firsttime) {
+	 init_setup_tab();
+	 firsttime = 0;
+      }
+   }
+}
+
+
+void mach64FreeVB( GLcontext *ctx )
+{
+   mach64ContextPtr mmesa = MACH64_CONTEXT(ctx);
+   if (mmesa->verts) {
+      _mesa_align_free(mmesa->verts);
+      mmesa->verts = 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/mach64/mach64_vb.h b/src/mesa/drivers/dri/mach64/mach64_vb.h
new file mode 100644
index 0000000000..e0b366916b
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_vb.h
@@ -0,0 +1,77 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Leif Delgass <ldelgass@retinalburn.net>
+ *	Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_VB_H__
+#define __MACH64_VB_H__
+
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "mach64_context.h"
+
+/* premultiply texture coordinates by homogenous coordinate */
+#define MACH64_PREMULT_TEXCOORDS
+
+#define _MACH64_NEW_VERTEX_STATE (_DD_NEW_SEPARATE_SPECULAR |          \
+                               _DD_NEW_TRI_LIGHT_TWOSIDE |             \
+                               _DD_NEW_TRI_UNFILLED |                  \
+                               _NEW_TEXTURE |                          \
+                               _NEW_FOG)
+
+
+extern void mach64CheckTexSizes( GLcontext *ctx );
+extern void mach64ChooseVertexState( GLcontext *ctx );
+
+extern void mach64BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+				   GLuint newinputs );
+
+extern void mach64PrintSetupFlags(char *msg, GLuint flags );
+
+extern void mach64InitVB( GLcontext *ctx );
+extern void mach64FreeVB( GLcontext *ctx );
+
+#if 0
+extern void mach64_emit_contiguous_verts( GLcontext *ctx,
+					    GLuint start,
+					    GLuint count );
+
+extern void mach64_emit_indexed_verts( GLcontext *ctx,
+					 GLuint start,
+					 GLuint count );
+#endif
+
+extern void mach64_translate_vertex( GLcontext *ctx,
+				       const mach64Vertex *src,
+				       SWvertex *dst );
+
+extern void mach64_print_vertex( GLcontext *ctx, const mach64Vertex *v );
+
+
+#endif /* __MACH64_VB_H__ */
diff --git a/src/mesa/drivers/dri/mach64/mach64_vbtmp.h b/src/mesa/drivers/dri/mach64/mach64_vbtmp.h
new file mode 100644
index 0000000000..60bfab8f6d
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/mach64_vbtmp.h
@@ -0,0 +1,770 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ *
+ * Modified for mach64 by:
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ *    Jos� Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+
+/* Unlike the other templates here, this assumes quite a bit about the
+ * underlying hardware.  Specifically it assumes a d3d-like vertex
+ * format, with a layout more or less constrained to look like the
+ * following:
+ *
+ * union {
+ *    struct {
+ *        float x, y, z, w;
+ *        struct { char r, g, b, a; } color;
+ *        struct { char r, g, b, fog; } spec;
+ *        float u0, v0;
+ *        float u1, v1;
+ *        float u2, v2;
+ *        float u3, v3;
+ *    } v;
+ *    struct {
+ *        float x, y, z, w;
+ *        struct { char r, g, b, a; } color;
+ *        struct { char r, g, b, fog; } spec;
+ *        float u0, v0, q0;
+ *        float u1, v1, q1;
+ *        float u2, v2, q2;
+ *        float u3, v3, q3;
+ *    } pv;
+ *    struct {
+ *        float x, y, z;
+ *        struct { char r, g, b, a; } color;
+ *    } tv;
+ *    float f[16];
+ *    unsigned int ui[16];
+ *    unsigned char ub4[4][16];
+ * }
+ *
+
+ * DO_XYZW:  Emit xyz and maybe w coordinates.
+ * DO_RGBA:  Emit color.
+ * DO_SPEC:  Emit specular color.
+ * DO_FOG:   Emit fog coordinate in specular alpha.
+ * DO_TEX0:  Emit tex0 u,v coordinates.
+ * DO_TEX1:  Emit tex1 u,v coordinates.
+ * DO_TEX2:  Emit tex2 u,v coordinates.
+ * DO_TEX3:  Emit tex3 u,v coordinates.
+ * DO_PTEX:  Emit tex0,1,2,3 q coordinates where possible.
+ *
+ * HAVE_RGBA_COLOR: Hardware takes color in rgba order (else bgra).
+ *
+ * HAVE_HW_VIEWPORT:  Hardware performs viewport transform.
+ * HAVE_HW_DIVIDE:  Hardware performs perspective divide.
+ *
+ * HAVE_TINY_VERTICES:  Hardware understands v.tv format.
+ * HAVE_PTEX_VERTICES:  Hardware understands v.pv format.
+ * HAVE_NOTEX_VERTICES:  Hardware understands v.v format with texcount 0.
+ *
+ * Additionally, this template assumes it is emitting *transformed*
+ * vertices; the modifications to emit untransformed vertices (ie. to
+ * t&l hardware) are probably too great to cooexist with the code
+ * already in this file.
+ *
+ * NOTE: The PTEX vertex format always includes TEX0 and TEX1, even if
+ * only TEX0 is enabled, in order to maintain a vertex size which is
+ * an exact number of quadwords.
+ */
+
+#if (HAVE_HW_VIEWPORT)
+#define VIEWPORT_X(dst,x) dst = x
+#define VIEWPORT_Y(dst,y) dst = y
+#define VIEWPORT_Z(dst,z) dst = z
+#else
+#define VIEWPORT_X(dst,x) dst = s[0]  * x + s[12]
+#define VIEWPORT_Y(dst,y) dst = s[5]  * y + s[13]
+#define VIEWPORT_Z(dst,z) dst = s[10] * z + s[14]
+#endif
+
+#if (HAVE_HW_DIVIDE && !HAVE_PTEX_VERTICES)
+#error "can't cope with this combination" 
+#endif 
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#ifndef CHECK_HW_DIVIDE
+#define CHECK_HW_DIVIDE 1
+#endif
+
+#if (HAVE_HW_DIVIDE || DO_SPEC || DO_TEX0 || DO_FOG || !HAVE_TINY_VERTICES)
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest,
+		       GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLfloat (*tc0)[4], (*tc1)[4], (*fog)[4];
+   GLfloat (*tc2)[4], (*tc3)[4];
+   GLfloat (*spec)[4];
+   GLfloat (*col)[4];
+   GLuint col_stride;
+   GLuint tc0_stride, tc1_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, tc3_stride;
+   GLuint tc0_size, tc1_size;
+   GLuint tc2_size, tc3_size;
+   GLfloat (*coord)[4];
+   GLuint coord_stride;
+   VERTEX *v = (VERTEX *)dest;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+   const GLubyte *mask = VB->ClipMask;
+   int i;
+
+/*     fprintf(stderr, "%s(big) importable %d %d..%d\n",  */
+/*  	   __FUNCTION__, VB->importable_data, start, end); */
+
+   if (HAVE_HW_VIEWPORT && HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) {
+      (void) s;
+      coord = VB->ClipPtr->data;
+      coord_stride = VB->ClipPtr->stride;
+   }
+   else {
+      coord = VB->NdcPtr->data;
+      coord_stride = VB->NdcPtr->stride;
+   }
+
+   if (DO_TEX3) {
+      const GLuint t3 = GET_TEXSOURCE(3);
+      tc3 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t3]->data;
+      tc3_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t3]->stride;
+      if (DO_PTEX)
+	 tc3_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t3]->size;
+   }
+
+   if (DO_TEX2) {
+      const GLuint t2 = GET_TEXSOURCE(2);
+      tc2 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->data;
+      tc2_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->stride;
+      if (DO_PTEX)
+	 tc2_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->size;
+   }
+
+   if (DO_TEX1) {
+      const GLuint t1 = GET_TEXSOURCE(1);
+      tc1 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->data;
+      tc1_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->stride;
+      if (DO_PTEX)
+	 tc1_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size;
+   }
+
+   if (DO_TEX0) {
+      const GLuint t0 = GET_TEXSOURCE(0);
+      tc0_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->stride;
+      tc0 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->data;
+      if (DO_PTEX) 
+	 tc0_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size;
+   }
+
+   if (DO_RGBA) {
+      col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;
+      col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride;
+   }
+
+   if (DO_SPEC) {
+      spec = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data;
+      spec_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride;
+   } else {
+      spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+      spec_stride = 0;
+   }
+
+   if (DO_FOG) {
+      if (VB->AttribPtr[_TNL_ATTRIB_FOG]) {
+	 fog = VB->AttribPtr[_TNL_ATTRIB_FOG]->data;
+	 fog_stride = VB->AttribPtr[_TNL_ATTRIB_FOG]->stride;
+      } else {
+	 static GLfloat tmp[4] = {0, 0, 0, 0};
+	 fog = &tmp;
+	 fog_stride = 0;
+      }
+   }
+
+   /* May have nonstandard strides:
+    */
+   if (start) {
+      coord =  (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride);
+      if (DO_TEX0)
+	 tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+      if (DO_TEX1) 
+	 tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+      if (DO_TEX2) 
+	 tc2 =  (GLfloat (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+      if (DO_TEX3) 
+	 tc3 =  (GLfloat (*)[4])((GLubyte *)tc3 + start * tc3_stride);
+      if (DO_RGBA) 
+	 STRIDE_4F(col, start * col_stride);
+      if (DO_SPEC)
+	 STRIDE_4F(spec, start * spec_stride);
+      if (DO_FOG)
+	 STRIDE_4F(fog, start * fog_stride);
+      //	 fog =  (GLfloat (*)[4])((GLubyte *)fog + start * fog_stride);
+      /*  STRIDE_F(fog, start * fog_stride); */
+   }
+   
+   for (i=start; i < end; i++, v = (VERTEX *)((GLubyte *)v + stride)) {
+      if (DO_XYZW) {
+	 if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	    /* unclipped */
+	    VIEWPORT_X(v->v.x, coord[0][0]);
+	    VIEWPORT_Y(v->v.y, coord[0][1]);
+	    VIEWPORT_Z(v->v.z, coord[0][2]);
+	    v->v.w = coord[0][3];
+	 } else {
+	    /* clipped */
+	    v->v.w = 1.0;
+	 }
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	    fprintf(stderr, "%s: vert (importable) %d: %.2f %.2f %.2f %f\n", 
+		    __FUNCTION__, i, v->v.x, v->v.y, v->v.z, v->v.w);
+	 }
+	 coord =  (GLfloat (*)[4])((GLubyte *)coord +  coord_stride);
+      }
+      if (DO_RGBA) {
+	 if (HAVE_RGBA_COLOR) {
+	    *(GLuint *)&v->v.color = *(GLuint *)&col[0];
+	    STRIDE_4F(col, col_stride);
+	 } else {
+	    v->v.color.blue  = col[0][2];
+	    v->v.color.green = col[0][1];
+	    v->v.color.red   = col[0][0];
+	    v->v.color.alpha = col[0][3];
+	    STRIDE_4F(col, col_stride);
+	 }
+      }
+      if (DO_SPEC) {
+	 v->v.specular.red = spec[0][0];
+	 v->v.specular.green = spec[0][1];
+	 v->v.specular.blue = spec[0][2];
+	 STRIDE_4F(spec, spec_stride);
+      }
+      if (DO_FOG) {
+	 v->v.specular.alpha = fog[0][0] * 255.0;
+	 /*  STRIDE_F(fog, fog_stride); */
+	 fog =  (GLfloat (*)[4])((GLubyte *)fog + fog_stride);
+      }
+      if (DO_TEX0) {
+	 v->v.u0 = tc0[0][0];
+	 v->v.v0 = tc0[0][1];
+	 if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	    fprintf(stderr, "%s: vert (importable) %d: u0: %.2f, v0: %.2f, w: %f\n", 
+		    __FUNCTION__, i, v->v.u0, v->v.v0, v->v.w);
+	 }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 v->v.u0 *= v->v.w;
+	 v->v.v0 *= v->v.w;
+#endif
+	 if (DO_PTEX) {
+	    if (HAVE_PTEX_VERTICES) {
+	       if (tc0_size == 4) 
+		  v->pv.q0 = tc0[0][3];
+	       else
+		  v->pv.q0 = 1.0;
+	    } 
+	    else if (tc0_size == 4) {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	       v->v.w *= tc0[0][3];
+#else
+	       float rhw = 1.0 / tc0[0][3];
+	       v->v.w *= tc0[0][3];
+	       v->v.u0 *= rhw;
+	       v->v.v0 *= rhw;
+#endif
+	    } 
+	 } 
+	 tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 +  tc0_stride);
+      }
+      if (DO_TEX1) {
+	 if (DO_PTEX) {
+	    v->pv.u1 = tc1[0][0];
+	    v->pv.v1 = tc1[0][1];
+	    if (tc1_size == 4) 
+	       v->pv.q1 = tc1[0][3];
+	    else
+	       v->pv.q1 = 1.0;
+	 } 
+	 else {
+	    v->v.u1 = tc1[0][0];
+	    v->v.v1 = tc1[0][1];
+	 }
+#ifdef MACH64_PREMULT_TEXCOORDS
+	 v->v.u1 *= v->v.w;
+	 v->v.v1 *= v->v.w;
+#endif
+	 tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 +  tc1_stride);
+      } 
+      else if (DO_PTEX) {
+	 *(GLuint *)&v->pv.q1 = 0;	/* avoid culling on radeon */
+      }
+      if (DO_TEX2) {
+	 if (DO_PTEX) {
+	    v->pv.u2 = tc2[0][0];
+	    v->pv.v2 = tc2[0][1];
+	    if (tc2_size == 4) 
+	       v->pv.q2 = tc2[0][3];
+	    else
+	       v->pv.q2 = 1.0;
+	 } 
+	 else {
+	    v->v.u2 = tc2[0][0];
+	    v->v.v2 = tc2[0][1];
+	 }
+	 tc2 =  (GLfloat (*)[4])((GLubyte *)tc2 +  tc2_stride);
+      } 
+      if (DO_TEX3) {
+	 if (DO_PTEX) {
+	    v->pv.u3 = tc3[0][0];
+	    v->pv.v3 = tc3[0][1];
+	    if (tc3_size == 4) 
+	       v->pv.q3 = tc3[0][3];
+	    else
+	       v->pv.q3 = 1.0;
+	    } 
+	 else {
+	    v->v.u3 = tc3[0][0];
+	    v->v.v3 = tc3[0][1];
+	 }
+	 tc3 =  (GLfloat (*)[4])((GLubyte *)tc3 +  tc3_stride);
+      } 
+   }
+}
+
+#else
+#if DO_XYZW
+
+#if HAVE_HW_DIVIDE
+#error "cannot use tiny vertices with hw perspective divide"
+#endif
+
+static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end,
+		       void *dest, GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLfloat (*col)[4];
+   GLuint col_stride;
+   GLfloat (*coord)[4] = VB->NdcPtr->data;
+   GLuint coord_stride = VB->NdcPtr->stride;
+   GLfloat *v = (GLfloat *)dest;
+   const GLubyte *mask = VB->ClipMask;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+   int i;
+
+   (void) s;
+
+   ASSERT(stride == 4);
+
+   col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;
+   col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride;
+
+   /* Pack what's left into a 4-dword vertex.  Color is in a different
+    * place, and there is no 'w' coordinate.
+    */
+   if (start) {
+      coord =  (GLfloat (*)[4])((GLubyte *)coord + start * coord_stride);
+      STRIDE_4F(col, start * col_stride);
+   }
+   
+   for (i=start; i < end; i++, v+=4) {
+      if (HAVE_HW_VIEWPORT || mask[i] == 0) {
+	 VIEWPORT_X(v[0], coord[0][0]);
+	 VIEWPORT_Y(v[1], coord[0][1]);
+	 VIEWPORT_Z(v[2], coord[0][2]);
+      }
+      coord =  (GLfloat (*)[4])((GLubyte *)coord +  coord_stride);
+      if (DO_RGBA) {
+	 if (HAVE_RGBA_COLOR) {
+	    *(GLuint *)&v[3] = *(GLuint *)col;
+	 }
+	 else {
+	    GLubyte *b = (GLubyte *)&v[3];
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]);
+	 }
+	 STRIDE_4F( col, col_stride );
+      }
+      if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+	 fprintf(stderr, "vert (importable) %d: %.2f %.2f %.2f %x\n",
+		 i, v[0], v[1], v[2], *(int *)&v[3]);
+      }
+   }
+}
+#else
+static void TAG(emit)( GLcontext *ctx, GLuint start, GLuint end,
+		       void *dest, GLuint stride )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLfloat (*col)[4];
+   GLuint col_stride;
+   GLfloat *v = (GLfloat *)dest;
+   int i;
+
+   col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;
+   col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride;
+
+   if (start)
+      STRIDE_4F(col, col_stride * start);
+
+   /* Need to figure out where color is:
+    */
+   if (GET_VERTEX_FORMAT() == TINY_VERTEX_FORMAT)
+      v += 3;
+   else
+      v += 4;
+
+   for (i=start; i < end; i++, STRIDE_F(v, stride)) {
+      if (HAVE_RGBA_COLOR) {
+	 *(GLuint *)v = *(GLuint *)col[0];
+      }
+      else {
+	 GLubyte *b = (GLubyte *)v;
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[0], col[0][2]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[1], col[0][1]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[2], col[0][0]);
+	 UNCLAMPED_FLOAT_TO_UBYTE(b[3], col[0][3]);
+      }
+      STRIDE_4F( col, col_stride );
+   }
+}
+#endif /* emit */
+#endif /* emit */
+
+#if (DO_XYZW) && (DO_RGBA)
+
+
+#if (HAVE_PTEX_VERTICES)
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX3 && VB->AttribPtr[_TNL_ATTRIB_TEX2] == 0)
+      VB->AttribPtr[_TNL_ATTRIB_TEX2] = VB->AttribPtr[_TNL_ATTRIB_TEX3];
+
+   if (DO_TEX2 && VB->AttribPtr[_TNL_ATTRIB_TEX1] == 0)
+      VB->AttribPtr[_TNL_ATTRIB_TEX1] = VB->AttribPtr[_TNL_ATTRIB_TEX2];
+
+   if (DO_TEX1 && VB->AttribPtr[_TNL_ATTRIB_TEX0] == 0)
+      VB->AttribPtr[_TNL_ATTRIB_TEX0] = VB->AttribPtr[_TNL_ATTRIB_TEX1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+   
+   if ((DO_TEX3 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(3)]->size == 4) ||
+       (DO_TEX2 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(2)]->size == 4) ||
+       (DO_TEX1 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(1)]->size == 4) ||
+       (DO_TEX0 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(0)]->size == 4))
+      return GL_FALSE;
+
+   return GL_TRUE;
+}
+#else
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Force 'missing' texcoords to something valid.
+    */
+   if (DO_TEX3 && VB->AttribPtr[_TNL_ATTRIB_TEX2] == 0)
+      VB->AttribPtr[_TNL_ATTRIB_TEX2] = VB->AttribPtr[_TNL_ATTRIB_TEX3];
+
+   if (DO_TEX2 && VB->AttribPtr[_TNL_ATTRIB_TEX1] == 0)
+      VB->AttribPtr[_TNL_ATTRIB_TEX1] = VB->AttribPtr[_TNL_ATTRIB_TEX2];
+
+   if (DO_TEX1 && VB->AttribPtr[_TNL_ATTRIB_TEX0] == 0)
+      VB->AttribPtr[_TNL_ATTRIB_TEX0] = VB->AttribPtr[_TNL_ATTRIB_TEX1];
+
+   if (DO_PTEX)
+      return GL_TRUE;
+
+   /* No hardware support for projective texture.  Can fake it for
+    * TEX0 only.
+    */
+   if ((DO_TEX3 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(3)]->size == 4) ||
+       (DO_TEX2 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(2)]->size == 4) ||
+       (DO_TEX1 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(1)]->size == 4)) {
+      PTEX_FALLBACK();
+      return GL_FALSE;
+   }
+
+   if (DO_TEX0 && VB->AttribPtr[_TNL_ATTRIB_TEX0 + GET_TEXSOURCE(0)]->size == 4) {
+      if (DO_TEX1 || DO_TEX2 || DO_TEX3) {
+	 PTEX_FALLBACK();
+      }
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+#endif /* ptex */
+
+
+static void TAG(interp)( GLcontext *ctx,
+			 GLfloat t,
+			 GLuint edst, GLuint eout, GLuint ein,
+			 GLboolean force_boundary )
+{
+   LOCALVARS
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLubyte *ddverts = GET_VERTEX_STORE();
+   GLuint size = GET_VERTEX_SIZE();
+   const GLfloat *dstclip = VB->ClipPtr->data[edst];
+   GLfloat w;
+   const GLfloat *s = GET_VIEWPORT_MAT();
+
+   VERTEX *dst = (VERTEX *)(ddverts + (edst * size));
+   VERTEX *in  = (VERTEX *)(ddverts + (ein * size));
+   VERTEX *out = (VERTEX *)(ddverts + (eout * size));
+
+   (void)s;
+
+   if (HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) {
+      VIEWPORT_X( dst->v.x, dstclip[0] );
+      VIEWPORT_Y( dst->v.y, dstclip[1] );
+      VIEWPORT_Z( dst->v.z, dstclip[2] );
+      w = dstclip[3];
+   }
+   else {
+      w = (dstclip[3] == 0.0F) ? 1.0 : (1.0 / dstclip[3]);
+      VIEWPORT_X( dst->v.x, dstclip[0] * w );
+      VIEWPORT_Y( dst->v.y, dstclip[1] * w );
+      VIEWPORT_Z( dst->v.z, dstclip[2] * w );
+   }
+
+   if (MACH64_DEBUG & DEBUG_VERBOSE_PRIMS) {
+      fprintf( stderr, "%s: dst vert: %.2f %.2f %.2f %f\n",
+	       __FUNCTION__,
+	       dst->v.x,
+	       dst->v.y,
+	       dst->v.z,
+	       w );
+   }
+
+   if ((HAVE_HW_DIVIDE && CHECK_HW_DIVIDE) || 
+       DO_FOG || DO_SPEC || DO_TEX0 || DO_TEX1 ||
+       DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES) {
+
+      dst->v.w = w;
+
+      INTERP_UB( t, dst->ub4[4][0], out->ub4[4][0], in->ub4[4][0] );
+      INTERP_UB( t, dst->ub4[4][1], out->ub4[4][1], in->ub4[4][1] );
+      INTERP_UB( t, dst->ub4[4][2], out->ub4[4][2], in->ub4[4][2] );
+      INTERP_UB( t, dst->ub4[4][3], out->ub4[4][3], in->ub4[4][3] );
+
+      if (DO_SPEC) {
+	 INTERP_UB( t, dst->ub4[5][0], out->ub4[5][0], in->ub4[5][0] );
+	 INTERP_UB( t, dst->ub4[5][1], out->ub4[5][1], in->ub4[5][1] );
+	 INTERP_UB( t, dst->ub4[5][2], out->ub4[5][2], in->ub4[5][2] );
+      }
+      if (DO_FOG) {
+	 INTERP_UB( t, dst->ub4[5][3], out->ub4[5][3], in->ub4[5][3] );
+      }
+      if (DO_TEX0) {
+	 if (DO_PTEX) {
+	    if (HAVE_PTEX_VERTICES) {
+	       INTERP_F( t, dst->pv.u0, out->pv.u0, in->pv.u0 );
+	       INTERP_F( t, dst->pv.v0, out->pv.v0, in->pv.v0 );
+	       INTERP_F( t, dst->pv.q0, out->pv.q0, in->pv.q0 );
+	    } else {
+	       GLfloat wout = VB->NdcPtr->data[eout][3];
+	       GLfloat win = VB->NdcPtr->data[ein][3];
+	       GLfloat qout = out->pv.w / wout;
+	       GLfloat qin = in->pv.w / win;
+	       GLfloat qdst, rqdst;
+
+	       ASSERT( !HAVE_HW_DIVIDE );
+
+	       INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin );
+	       INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin );
+	       INTERP_F( t, qdst, qout, qin );
+
+	       rqdst = 1.0 / qdst;
+	       dst->v.u0 *= rqdst;
+	       dst->v.v0 *= rqdst;
+	       dst->v.w *= rqdst;
+	    }
+	 }
+	 else {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    GLfloat qout = 1 / out->v.w;
+	    GLfloat qin = 1 / in->v.w;
+	    
+	    INTERP_F( t, dst->v.u0, out->v.u0 * qout, in->v.u0 * qin);
+	    INTERP_F( t, dst->v.v0, out->v.v0 * qout, in->v.v0 * qin);
+
+	    dst->v.u0 *= w;
+	    dst->v.v0 *= w;
+#else
+	    INTERP_F( t, dst->v.u0, out->v.u0, in->v.u0 );
+	    INTERP_F( t, dst->v.v0, out->v.v0, in->v.v0 );
+#endif
+	 }
+      }
+      if (DO_TEX1) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u1, out->pv.u1, in->pv.u1 );
+	    INTERP_F( t, dst->pv.v1, out->pv.v1, in->pv.v1 );
+	    INTERP_F( t, dst->pv.q1, out->pv.q1, in->pv.q1 );
+	 } else {
+#ifdef MACH64_PREMULT_TEXCOORDS
+	    GLfloat qout = 1 / out->v.w;
+	    GLfloat qin = 1 / in->v.w;
+	    
+	    INTERP_F( t, dst->v.u1, out->v.u1 * qout, in->v.u1 * qin );
+	    INTERP_F( t, dst->v.v1, out->v.v1 * qout, in->v.v1 * qin );
+
+	    dst->v.u1 *= w;
+	    dst->v.v1 *= w;
+#else
+	    INTERP_F( t, dst->v.u1, out->v.u1, in->v.u1 );
+	    INTERP_F( t, dst->v.v1, out->v.v1, in->v.v1 );
+#endif
+	 }
+      }
+      else if (DO_PTEX) {
+	 dst->pv.q0 = 0.0;	/* must be a valid float on radeon */
+      }
+      if (DO_TEX2) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u2, out->pv.u2, in->pv.u2 );
+	    INTERP_F( t, dst->pv.v2, out->pv.v2, in->pv.v2 );
+	    INTERP_F( t, dst->pv.q2, out->pv.q2, in->pv.q2 );
+	 } else {
+	    INTERP_F( t, dst->v.u2, out->v.u2, in->v.u2 );
+	    INTERP_F( t, dst->v.v2, out->v.v2, in->v.v2 );
+	 }
+      }
+      if (DO_TEX3) {
+	 if (DO_PTEX) {
+	    INTERP_F( t, dst->pv.u3, out->pv.u3, in->pv.u3 );
+	    INTERP_F( t, dst->pv.v3, out->pv.v3, in->pv.v3 );
+	    INTERP_F( t, dst->pv.q3, out->pv.q3, in->pv.q3 );
+	 } else {
+	    INTERP_F( t, dst->v.u3, out->v.u3, in->v.u3 );
+	    INTERP_F( t, dst->v.v3, out->v.v3, in->v.v3 );
+	 }
+      }
+   } else {
+      /* 4-dword vertex.  Color is in v[3] and there is no oow coordinate.
+       */
+      INTERP_UB( t, dst->ub4[3][0], out->ub4[3][0], in->ub4[3][0] );
+      INTERP_UB( t, dst->ub4[3][1], out->ub4[3][1], in->ub4[3][1] );
+      INTERP_UB( t, dst->ub4[3][2], out->ub4[3][2], in->ub4[3][2] );
+      INTERP_UB( t, dst->ub4[3][3], out->ub4[3][3], in->ub4[3][3] );
+   }
+}
+
+#endif /* rgba && xyzw */
+
+
+static void TAG(init)( void )
+{
+   setup_tab[IND].emit = TAG(emit);
+
+#if (DO_XYZW && DO_RGBA)
+   setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes);
+   setup_tab[IND].interp = TAG(interp);
+#endif
+
+   if (DO_SPEC)
+      setup_tab[IND].copy_pv = copy_pv_rgba4_spec5;
+   else if (HAVE_HW_DIVIDE || DO_SPEC || DO_FOG || DO_TEX0 || DO_TEX1 ||
+	    DO_TEX2 || DO_TEX3 || !HAVE_TINY_VERTICES)
+      setup_tab[IND].copy_pv = copy_pv_rgba4;
+   else
+      setup_tab[IND].copy_pv = copy_pv_rgba3;
+
+   if (DO_TEX3) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 18;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 14;
+      }
+   }
+   else if (DO_TEX2) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX3_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 18;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX2_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+      }
+   }
+   else if (DO_TEX1) {
+      if (DO_PTEX) {
+	 ASSERT(HAVE_PTEX_VERTICES);
+	 setup_tab[IND].vertex_format = PROJ_TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 10;
+      }
+   }
+   else if (DO_TEX0) {
+      if (DO_PTEX && HAVE_PTEX_VERTICES) {
+	 setup_tab[IND].vertex_format = PROJ_TEX1_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 12;
+      } else {
+	 setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+	 setup_tab[IND].vertex_size = 8;
+      }
+   }
+   else if (!HAVE_HW_DIVIDE && !DO_SPEC && !DO_FOG && HAVE_TINY_VERTICES) {
+      setup_tab[IND].vertex_format = TINY_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 4;
+   } else if (HAVE_NOTEX_VERTICES) {
+      setup_tab[IND].vertex_format = NOTEX_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 6;
+   } else {
+      setup_tab[IND].vertex_format = TEX0_VERTEX_FORMAT;
+      setup_tab[IND].vertex_size = 8;
+   }
+
+}
+
+
+#undef IND
+#undef TAG
diff --git a/src/mesa/drivers/dri/mach64/server/mach64_dri.h b/src/mesa/drivers/dri/mach64/server/mach64_dri.h
new file mode 100644
index 0000000000..1477443f79
--- /dev/null
+++ b/src/mesa/drivers/dri/mach64/server/mach64_dri.h
@@ -0,0 +1,126 @@
+/* -*- mode: c; c-basic-offset: 3 -*- */
+/*
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_DRI_H__
+#define __MACH64_DRI_H__ 1
+
+#include "xf86drm.h"
+
+typedef struct {
+   drm_handle_t fbHandle;
+
+   drm_handle_t regsHandle;
+   drmSize regsSize;
+
+   int IsPCI;
+
+   drm_handle_t agpHandle;            /* Handle from drmAgpAlloc */
+   unsigned long agpOffset;
+   drmSize agpSize;
+   int agpMode;
+
+   /* DMA descriptor ring */
+   unsigned long     ringStart;        /* Offset into AGP space */
+   drm_handle_t         ringHandle;       /* Handle from drmAddMap */
+   drmSize           ringMapSize;      /* Size of map */
+   int               ringSize;         /* Size of ring (in kB) */
+   drmAddress        ringMap;          /* Map */
+
+   /* vertex buffer data */
+   unsigned long     bufferStart;      /* Offset into AGP space */
+   drm_handle_t         bufferHandle;     /* Handle from drmAddMap */
+   drmSize           bufferMapSize;    /* Size of map */
+   int               bufferSize;       /* Size of buffers (in MB) */
+   drmAddress        bufferMap;        /* Map */
+
+   drmBufMapPtr      drmBuffers;       /* Buffer map */
+   int               numBuffers;       /* Number of buffers */
+
+   /* AGP Texture data */
+   unsigned long     agpTexStart;      /* Offset into AGP space */
+   drm_handle_t         agpTexHandle;     /* Handle from drmAddMap */
+   drmSize           agpTexMapSize;    /* Size of map */
+   int               agpTexSize;       /* Size of AGP tex space (in MB) */
+   drmAddress        agpTexMap;        /* Map */
+   int               log2AGPTexGran;
+
+   int fbX;
+   int fbY;
+   int backX;
+   int backY;
+   int depthX;
+   int depthY;
+
+   int frontOffset;
+   int frontPitch;
+   int backOffset;
+   int backPitch;
+   int depthOffset;
+   int depthPitch;
+
+   int textureOffset;
+   int textureSize;
+   int logTextureGranularity;
+} ATIDRIServerInfoRec, *ATIDRIServerInfoPtr;
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+
+   int IsPCI;
+   int AGPMode;
+
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+   unsigned int textureOffset;
+   unsigned int textureSize;
+   int logTextureGranularity;
+
+   drm_handle_t regs;
+   drmSize regsSize;
+
+   drm_handle_t agp;
+   drmSize agpSize;
+   unsigned int agpTextureOffset;
+   unsigned int agpTextureSize;
+   int logAgpTextureGranularity;
+} ATIDRIRec, *ATIDRIPtr;
+
+#endif /* __MACH64_DRI_H__ */
diff --git a/src/mesa/drivers/dri/mga/Doxyfile b/src/mesa/drivers/dri/mga/Doxyfile
new file mode 100644
index 0000000000..0d0c134a72
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/Doxyfile
@@ -0,0 +1,234 @@
+# Doxyfile 1.3.3-Gideon
+
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = mga
+PROJECT_NUMBER         = $VERSION$
+OUTPUT_DIRECTORY       = 
+OUTPUT_LANGUAGE        = English
+USE_WINDOWS_ENCODING   = NO
+EXTRACT_ALL            = NO
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        = 
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+SHORT_NAMES            = NO
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = YES
+JAVADOC_AUTOBRIEF      = NO
+MULTILINE_CPP_IS_BRIEF = NO
+DETAILS_AT_TOP         = NO
+INHERIT_DOCS           = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ALIASES                = 
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+SHOW_USED_FILES        = YES
+SUBGROUPING            = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = /home/newtree/temp/src/mesa/drivers/dri/mga
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.idl \
+                         *.odl \
+                         *.cs \
+                         *.C \
+                         *.H \
+                         *.tlh \
+                         *.diff \
+                         *.patch \
+                         *.moc \
+                         *.xpm \
+                         *.dox
+RECURSIVE              = yes
+EXCLUDE                = 
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       = 
+EXAMPLE_PATH           = 
+EXAMPLE_PATTERNS       = *
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+CHM_FILE               = 
+HHC_LOCATION           = 
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = NO
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = YES
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+LATEX_HIDE_INDICES     = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = yes
+XML_OUTPUT             = xml
+XML_SCHEMA             = 
+XML_DTD                = 
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX = 
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = 
+GENERATE_TAGFILE       = 
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = NO
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               = 
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+MAX_DOT_GRAPH_DEPTH    = 1000
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
+CGI_NAME               = search.cgi
+CGI_URL                = 
+DOC_URL                = 
+DOC_ABSPATH            = 
+BIN_ABSPATH            = /usr/local/bin/
+EXT_DOC_PATHS          = 
diff --git a/src/mesa/drivers/dri/mga/Makefile b/src/mesa/drivers/dri/mga/Makefile
new file mode 100644
index 0000000000..92533bccc2
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/Makefile
@@ -0,0 +1,31 @@
+# src/mesa/drivers/dri/mga/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = mga_dri.so
+
+DRIVER_SOURCES = \
+	mgadd.c \
+	mgaioctl.c \
+	mgarender.c \
+	mgastate.c \
+	mgatris.c \
+	mgapixel.c \
+	mgaspan.c \
+	mgatex.c \
+	mgatexmem.c \
+	mga_texstate.c \
+	mga_texcombine.c \
+	mgavb.c \
+	mga_xmesa.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES) 
+
+
+ASM_SOURCES = 
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/mga/README b/src/mesa/drivers/dri/mga/README
new file mode 100644
index 0000000000..a7133fa66f
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/README
@@ -0,0 +1,26 @@
+MGA DRI driver ported from XF86DRI to FBDRI
+by Denis Oliver Kropp <dok@directfb.org>
+
+
+INFO
+
+This driver has been ported from the head branch of XFree86 to
+the embedded-1-branch of Mesa.
+
+
+STATUS
+
+Already working very well as far as I've tested it (16/32 bit).
+glxgears runs at 935 fps (G550 32MB AGP 4x, Athlon 1.33) vs 744 fps with XFree.
+Other demos (terrain, fire, etc.) have been successfully tested as well.
+
+
+TODO
+
+- mgaEngineShutdown
+- mgaEngineRestore
+- SGRAM detection
+- remove some unused bits from server/*
+- subset driver support
+- mgaWaitForVBlank
+- deinitialization (from MGADRICloseScreen) a la radeonDestroyScreen
diff --git a/src/mesa/drivers/dri/mga/mga_texcombine.c b/src/mesa/drivers/dri/mga/mga_texcombine.c
new file mode 100644
index 0000000000..24083d9651
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mga_texcombine.c
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2003 Ville Syrjala
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ville Syrjala <syrjala@sci.fi>
+ */
+
+#include "main/glheader.h"
+
+#include "mgacontext.h"
+#include "mgatex.h"
+#include "mgaregs.h"
+
+/*
+ * GL_ARB_texture_env_combine
+ * GL_EXT_texture_env_combine
+ * GL_ARB_texture_env_crossbar
+ * GL_ATI_texture_env_combine3
+ */
+
+#define ARG_DISABLE 0xffffffff
+#define MGA_ARG1  0
+#define MGA_ARG2  1
+#define MGA_ALPHA 2
+
+GLboolean mgaUpdateTextureEnvCombine( GLcontext *ctx, int unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   const int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   GLuint *reg = ((GLuint *)&mmesa->setup.tdualstage0 + unit);
+   GLuint numColorArgs = 0, numAlphaArgs = 0;
+   GLuint arg1[3], arg2[3], alpha[3];
+   int args[3];
+   int i;
+
+   switch (texUnit->Combine.ModeRGB) {
+   case GL_REPLACE:
+      numColorArgs = 1;
+      break;
+   case GL_MODULATE:
+   case GL_ADD:
+   case GL_ADD_SIGNED:
+   case GL_SUBTRACT:
+      numColorArgs = 2;
+      break;
+   case GL_INTERPOLATE:
+   case GL_MODULATE_ADD_ATI:
+   case GL_MODULATE_SIGNED_ADD_ATI:
+   case GL_MODULATE_SUBTRACT_ATI:
+      numColorArgs = 3;
+      break;
+   default:
+      return GL_FALSE;
+   }
+
+   switch (texUnit->Combine.ModeA) {
+   case GL_REPLACE:
+      numAlphaArgs = 1;
+      break;
+   case GL_MODULATE:
+   case GL_ADD:
+   case GL_ADD_SIGNED:
+   case GL_SUBTRACT:
+      numAlphaArgs = 2;
+      break;
+   default:
+      return GL_FALSE;
+   }
+
+   /* Start fresh :) */
+   *reg = 0;
+
+   /* COLOR */
+   for (i = 0; i < 3; i++) {
+      arg1[i] = 0;
+      arg2[i] = 0;
+      alpha[i] = 0;
+   }
+
+   for (i = 0;i < numColorArgs; i++) {
+      switch (texUnit->Combine.SourceRGB[i]) {
+      case GL_TEXTURE:
+         arg1[i] |= 0;
+         arg2[i] |= ARG_DISABLE;
+         alpha[i] |= TD0_color_alpha_currtex;
+         break;
+      case GL_TEXTURE0:
+         if (source == 0) {
+            arg1[i] |= 0;
+            arg2[i] |= ARG_DISABLE;
+            alpha[i] |= TD0_color_alpha_currtex;
+         } else {
+            if (ctx->Texture._EnabledUnits != 0x03) {
+               /* disable texturing */
+               mmesa->setup.dwgctl &= DC_opcod_MASK;
+               mmesa->setup.dwgctl |= DC_opcod_trap;
+               mmesa->hw.alpha_sel = AC_alphasel_diffused;
+               /* return GL_TRUE since we don't need a fallback */
+               return GL_TRUE;
+            }
+            arg1[i] |= ARG_DISABLE;
+            arg2[i] |= ARG_DISABLE;
+            alpha[i] |= TD0_color_alpha_prevtex;
+         }
+         break;
+      case GL_TEXTURE1:
+         if (source == 0) {
+            if (ctx->Texture._EnabledUnits != 0x03) {
+               /* disable texturing */
+               mmesa->setup.dwgctl &= DC_opcod_MASK;
+               mmesa->setup.dwgctl |= DC_opcod_trap;
+               mmesa->hw.alpha_sel = AC_alphasel_diffused;
+               /* return GL_TRUE since we don't need a fallback */
+               return GL_TRUE;
+            }
+            arg1[i] |= ARG_DISABLE;
+            /* G400 specs (TDUALSTAGE0) */
+            arg2[i] |= TD0_color_arg2_prevstage;
+            alpha[i] |= TD0_color_alpha_prevstage;
+         } else {
+            arg1[i] |= 0;
+            arg2[i] |= ARG_DISABLE;
+            alpha[i] |= TD0_color_alpha_currtex;
+         }
+         break;
+      case GL_CONSTANT:
+         if (mmesa->fcol_used &&
+             mmesa->envcolor[source] != mmesa->envcolor[!source])
+            return GL_FALSE;
+
+         arg1[i] |= ARG_DISABLE;
+         arg2[i] |= TD0_color_arg2_fcol;
+         alpha[i] |= TD0_color_alpha_fcol;
+
+         mmesa->setup.fcol = mmesa->envcolor[source];
+         mmesa->fcol_used = GL_TRUE;
+         break;
+      case GL_PRIMARY_COLOR:
+         arg1[i] |= ARG_DISABLE;
+         /* G400 specs (TDUALSTAGE1) */
+         if (unit == 0 || (mmesa->setup.tdualstage0 &
+                           ((TD0_color_sel_mul & TD0_color_sel_add) |
+                            (TD0_alpha_sel_mul & TD0_alpha_sel_add)))) {
+            arg2[i] |= TD0_color_arg2_diffuse;
+            alpha[i] |= TD0_color_alpha_diffuse;
+         } else {
+            arg2[i] |= ARG_DISABLE;
+            alpha[i] |= ARG_DISABLE;
+         }
+         break;
+      case GL_PREVIOUS:
+         arg1[i] |= ARG_DISABLE;
+         if (unit == 0) {
+            arg2[i] |= TD0_color_arg2_diffuse;
+            alpha[i] |= TD0_color_alpha_diffuse;
+         } else {
+            arg2[i] |= TD0_color_arg2_prevstage;
+            alpha[i] |= TD0_color_alpha_prevstage;
+         }
+         break;
+      default:
+         return GL_FALSE;
+      }
+
+      switch (texUnit->Combine.OperandRGB[i]) {
+      case GL_SRC_COLOR:
+         arg1[i] |= 0;
+         arg2[i] |= 0;
+         if (texUnit->Combine.SourceRGB[i] == GL_CONSTANT &&
+             RGBA_EQUAL( mmesa->envcolor[source] )) {
+            alpha[i] |= 0;
+         } else {
+            alpha[i] |= ARG_DISABLE;
+         }
+         break;
+      case GL_ONE_MINUS_SRC_COLOR:
+         arg1[i] |= TD0_color_arg1_inv_enable;
+         arg2[i] |= TD0_color_arg2_inv_enable;
+         if (texUnit->Combine.SourceRGB[i] == GL_CONSTANT &&
+             RGBA_EQUAL( mmesa->envcolor[source] )) {
+            alpha[i] |= (TD0_color_alpha1inv_enable |
+                         TD0_color_alpha2inv_enable);
+         } else {
+            alpha[i] |= ARG_DISABLE;
+         }
+         break;
+      case GL_SRC_ALPHA:
+         arg1[i] |= TD0_color_arg1_replicatealpha_enable;
+         arg2[i] |= TD0_color_arg2_replicatealpha_enable;
+         alpha[i] |= 0;
+         break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+         arg1[i] |= (TD0_color_arg1_replicatealpha_enable |
+                     TD0_color_arg1_inv_enable);
+         arg2[i] |= (TD0_color_arg2_replicatealpha_enable |
+                     TD0_color_arg2_inv_enable);
+         alpha[i] |= (TD0_color_alpha1inv_enable |
+                      TD0_color_alpha2inv_enable);
+         break;
+      }
+   }
+
+   switch (texUnit->Combine.ModeRGB) {
+   case GL_MODULATE_ADD_ATI:
+   case GL_MODULATE_SIGNED_ADD_ATI:
+      /* Special handling for ATI_texture_env_combine3.
+       * If Arg1 == Arg0 or Arg1 == Arg2 we can use arg1 or arg2 as input for
+       * both multiplier and adder.
+       */
+      /* Arg1 == arg1 */
+      if (arg1[1] == arg1[0]) {
+         if ((arg1[1] | arg2[2]) != ARG_DISABLE) {
+            *reg |= arg1[1] | arg2[2];
+            args[0] = MGA_ARG1; args[1] = MGA_ARG1; args[2] = MGA_ARG2;
+            break;
+         } else
+         if ((arg1[1] | alpha[2]) != ARG_DISABLE) {
+            *reg |= arg1[1] | alpha[2];
+            args[0] = MGA_ARG1; args[1] = MGA_ARG1; args[2] = MGA_ALPHA;
+            break;
+         }
+      }
+      if (arg1[1] == arg1[2]) {
+         if ((arg1[1] | arg2[0]) != ARG_DISABLE) {
+            *reg |= arg1[1] | arg2[0];
+            args[0] = MGA_ARG2; args[1] = MGA_ARG1; args[2] = MGA_ARG1;
+            break;
+         } else
+         if ((arg1[1] | alpha[0]) != ARG_DISABLE) {
+            *reg |= arg1[1] | alpha[0];
+            args[0] = MGA_ALPHA; args[1] = MGA_ARG1; args[2] = MGA_ARG1;
+            break;
+         }
+      }
+      /* fallthrough */
+   case GL_MODULATE_SUBTRACT_ATI:
+      /* Arg1 == arg2 */
+      if (arg2[1] == arg2[0]) {
+         if ((arg2[1] | arg1[2]) != ARG_DISABLE) {
+            *reg |= arg2[1] | arg1[2];
+            args[0] = MGA_ARG2; args[1] = MGA_ARG2; args[2] = MGA_ARG1;
+            break;
+         } else
+         if ((arg2[1] | alpha[2]) != ARG_DISABLE) {
+            *reg |= arg2[1] | alpha[2];
+            args[0] = MGA_ARG2; args[1] = MGA_ARG2; args[2] = MGA_ALPHA;
+            break;
+         }
+      }
+      if (arg2[1] == arg2[2]) {
+         if ((arg2[1] | arg1[0]) != ARG_DISABLE) {
+            *reg |= arg2[1] | arg1[0];
+            args[0] = MGA_ARG1; args[1] = MGA_ARG2; args[2] = MGA_ARG2;
+            break;
+         } else
+         if ((arg2[1] | alpha[0]) != ARG_DISABLE) {
+            *reg |= arg2[1] | alpha[0];
+            args[0] = MGA_ALPHA; args[1] = MGA_ARG2; args[2] = MGA_ARG2;
+            break;
+         }
+      }
+      /* fallthrough */
+   default:
+      /* Find working combo of arg1, arg2 and alpha.
+       *
+       * Keep the Arg0 != alpha cases first since there's
+       * no way to get alpha out by itself (GL_REPLACE).
+       *
+       * Keep the Arg2 == alpha cases first because only alpha has the
+       * capabilities to function as Arg2 (GL_INTERPOLATE). Also good for 
+       * GL_ADD, GL_ADD_SIGNED, GL_SUBTRACT since we can't get alpha to the
+       * adder.
+       *
+       * Keep the Arg1 == alpha cases last for GL_MODULATE_ADD_ATI,
+       * GL_MODULATE_SIGNED_ADD_ATI. Again because we can't get alpha to the
+       * adder.
+       *
+       * GL_MODULATE_SUBTRACT_ATI needs special treatment since it requires
+       * that Arg1 == arg2. This requirement clashes with those of other modes.
+       */
+      if ((arg1[0] | arg2[1] | alpha[2]) != ARG_DISABLE) {
+         *reg |= arg1[0] | arg2[1] | alpha[2];
+         args[0] = MGA_ARG1; args[1] = MGA_ARG2; args[2] = MGA_ALPHA;
+      } else
+      if ((arg1[1] | arg2[0] | alpha[2]) != ARG_DISABLE &&
+          texUnit->Combine.ModeRGB != GL_MODULATE_SUBTRACT_ATI) {
+         *reg |= arg1[1] | arg2[0] | alpha[2];
+         args[0] = MGA_ARG2; args[1] = MGA_ARG1; args[2] = MGA_ALPHA;
+      } else
+      if ((arg1[1] | arg2[2] | alpha[0]) != ARG_DISABLE &&
+          texUnit->Combine.ModeRGB != GL_MODULATE_SUBTRACT_ATI) {
+         *reg |= arg1[1] | arg2[2] | alpha[0];
+         args[0] = MGA_ALPHA; args[1] = MGA_ARG1; args[2] = MGA_ARG2;
+      } else
+      if ((arg1[2] | arg2[1] | alpha[0]) != ARG_DISABLE) {
+         *reg |= arg1[2] | arg2[1] | alpha[0];
+         args[0] = MGA_ALPHA; args[1] = MGA_ARG2; args[2] = MGA_ARG1;
+      } else
+      if ((arg1[0] | arg2[2] | alpha[1]) != ARG_DISABLE) {
+         *reg |= arg1[0] | arg2[2] | alpha[1];
+         args[0] = MGA_ARG1; args[1] = MGA_ALPHA; args[2] = MGA_ARG2;
+      } else
+      if ((arg1[2] | arg2[0] | alpha[1]) != ARG_DISABLE) {
+         *reg |= arg1[2] | arg2[0] | alpha[1];
+         args[0] = MGA_ARG2; args[1] = MGA_ALPHA; args[2] = MGA_ARG1;
+      } else {
+         /* nothing suitable */
+         return GL_FALSE;
+      }
+   }
+
+   switch (texUnit->Combine.ModeRGB) {
+   case GL_REPLACE:
+      if (texUnit->Combine.ScaleShiftRGB) {
+         return GL_FALSE;
+      }
+
+      if (args[0] == MGA_ARG1) {
+         *reg |= TD0_color_sel_arg1;
+      } else if (args[0] == MGA_ARG2) {
+         *reg |= TD0_color_sel_arg2;
+      } else if (args[0] == MGA_ALPHA) {
+         /* Can't get alpha out by itself */
+         return GL_FALSE;
+      }
+      break;
+   case GL_MODULATE:
+      if (texUnit->Combine.ScaleShiftRGB == 1) {
+         *reg |= TD0_color_modbright_2x;
+      } else if (texUnit->Combine.ScaleShiftRGB == 2) {
+         *reg |= TD0_color_modbright_4x;
+      }
+
+      *reg |= TD0_color_sel_mul;
+
+      if (args[0] == MGA_ALPHA || args[1] == MGA_ALPHA) {
+         if (args[0] == MGA_ARG1 || args[1] == MGA_ARG1) {
+            *reg |= TD0_color_arg2mul_alpha2;
+         } else if (args[0] == MGA_ARG2 || args[1] == MGA_ARG2) {
+            *reg |= TD0_color_arg1mul_alpha1;
+         }
+      }
+      break;
+   case GL_ADD_SIGNED:
+      *reg |= TD0_color_addbias_enable;
+      /* fallthrough */
+   case GL_ADD:
+      if (args[0] == MGA_ALPHA || args[1] == MGA_ALPHA) {
+         /* Can't get alpha to the adder */
+         return GL_FALSE;
+      }
+      if (texUnit->Combine.ScaleShiftRGB == 1) {
+         *reg |= TD0_color_add2x_enable;
+      } else if (texUnit->Combine.ScaleShiftRGB == 2) {
+         return GL_FALSE;
+      }
+
+      *reg |= (TD0_color_add_add |
+               TD0_color_sel_add);
+      break;
+   case GL_INTERPOLATE:
+      if (args[2] != MGA_ALPHA) {
+         /* Only alpha can function as Arg2 */
+         return GL_FALSE;
+      }
+      if (texUnit->Combine.ScaleShiftRGB == 1) {
+         *reg |= TD0_color_add2x_enable;
+      } else if (texUnit->Combine.ScaleShiftRGB == 2) {
+         return GL_FALSE;
+      }
+
+      *reg |= (TD0_color_arg1mul_alpha1 |
+               TD0_color_blend_enable |
+               TD0_color_arg1add_mulout |
+               TD0_color_arg2add_mulout |
+               TD0_color_add_add |
+               TD0_color_sel_add);
+
+      /* Have to do this with xor since GL_ONE_MINUS_SRC_ALPHA may have
+       * already touched this bit.
+       */
+      *reg ^= TD0_color_alpha1inv_enable;
+
+      if (args[0] == MGA_ARG2) {
+         /* Swap arguments */
+         *reg ^= (TD0_color_arg1mul_alpha1 |
+                  TD0_color_arg2mul_alpha2 |
+                  TD0_color_alpha1inv_enable |
+                  TD0_color_alpha2inv_enable);
+      }
+
+      if (ctx->Texture._EnabledUnits != 0x03) {
+         /* Linear blending mode needs dualtex enabled */
+         *(reg+1) = (TD0_color_arg2_prevstage |
+                     TD0_color_sel_arg2 |
+                     TD0_alpha_arg2_prevstage |
+                     TD0_alpha_sel_arg2);
+         mmesa->force_dualtex = GL_TRUE;
+      }
+      break;
+   case GL_SUBTRACT:
+      if (args[0] == MGA_ALPHA || args[1] == MGA_ALPHA) {
+         /* Can't get alpha to the adder */
+         return GL_FALSE;
+      }
+      if (texUnit->Combine.ScaleShiftRGB == 1) {
+         *reg |= TD0_color_add2x_enable;
+      } else if (texUnit->Combine.ScaleShiftRGB == 2) {
+         return GL_FALSE;
+      }
+
+      *reg |= (TD0_color_add_sub |
+               TD0_color_sel_add);
+
+      if (args[0] == MGA_ARG2) {
+         /* Swap arguments */
+         *reg ^= (TD0_color_arg1_inv_enable |
+                  TD0_color_arg2_inv_enable);
+      }
+      break;
+   case GL_MODULATE_SIGNED_ADD_ATI:
+      *reg |= TD0_color_addbias_enable;
+      /* fallthrough */
+   case GL_MODULATE_ADD_ATI:
+      if (args[1] == MGA_ALPHA) {
+         /* Can't get alpha to the adder */
+         return GL_FALSE;
+      }
+      if (texUnit->Combine.ScaleShiftRGB == 1) {
+         *reg |= TD0_color_add2x_enable;
+      } else if (texUnit->Combine.ScaleShiftRGB == 2) {
+         return GL_FALSE;
+      }
+
+      *reg |= (TD0_color_add_add |
+               TD0_color_sel_add);
+
+      if (args[1] == args[0] || args[1] == args[2]) {
+         *reg |= TD0_color_arg1add_mulout;
+         if (args[0] == MGA_ALPHA || args[2] == MGA_ALPHA)
+            *reg |= TD0_color_arg1mul_alpha1;
+
+         if (args[1] == MGA_ARG1) {
+            /* Swap adder arguments */
+            *reg ^= (TD0_color_arg1add_mulout |
+                     TD0_color_arg2add_mulout);
+            if (args[0] == MGA_ALPHA || args[2] == MGA_ALPHA) {
+               /* Swap multiplier arguments */
+               *reg ^= (TD0_color_arg1mul_alpha1 |
+                        TD0_color_arg2mul_alpha2);
+            }
+         }
+      } else {
+         *reg |= (TD0_color_arg2mul_alpha2 |
+                  TD0_color_arg1add_mulout);
+
+         if (args[1] == MGA_ARG1) {
+            /* Swap arguments */
+            *reg ^= (TD0_color_arg1mul_alpha1 |
+                     TD0_color_arg2mul_alpha2 |
+                     TD0_color_arg1add_mulout |
+                     TD0_color_arg2add_mulout);
+         }
+      }
+      break;
+   case GL_MODULATE_SUBTRACT_ATI:
+      if (args[1] != MGA_ARG2) {
+         /* Can't swap arguments */
+         return GL_FALSE;
+      }
+      if (texUnit->Combine.ScaleShiftRGB == 1) {
+         *reg |= TD0_color_add2x_enable;
+      } else if (texUnit->Combine.ScaleShiftRGB == 2) {
+         return GL_FALSE;
+      }
+
+      *reg |= (TD0_color_add_sub |
+               TD0_color_sel_add);
+
+      if (args[1] == args[0] || args[1] == args[2]) {
+         *reg |= TD0_color_arg1add_mulout;
+         if (args[0] == MGA_ALPHA || args[2] == MGA_ALPHA)
+            *reg |= TD0_color_arg1mul_alpha1;
+      } else {
+         *reg |= (TD0_color_arg2mul_alpha2 |
+                  TD0_color_arg1add_mulout);
+      }
+      break;
+   }
+
+
+   /* ALPHA */
+   for (i = 0; i < 2; i++) {
+      arg1[i] = 0;
+      arg2[i] = 0;
+   }
+
+   for (i = 0; i < numAlphaArgs; i++) {
+      switch (texUnit->Combine.SourceA[i]) {
+      case GL_TEXTURE:
+         arg1[i] |= 0;
+         arg2[i] |= ARG_DISABLE;
+         break;
+      case GL_TEXTURE0:
+         if (source == 0) {
+            arg1[i] |= 0;
+            arg2[i] |= ARG_DISABLE;
+         } else {
+            if (ctx->Texture._EnabledUnits != 0x03) {
+               /* disable texturing */
+               mmesa->setup.dwgctl &= DC_opcod_MASK;
+               mmesa->setup.dwgctl |= DC_opcod_trap;
+               mmesa->hw.alpha_sel = AC_alphasel_diffused;
+               /* return GL_TRUE since we don't need a fallback */
+               return GL_TRUE;
+            }
+            arg1[i] |= ARG_DISABLE;
+            arg2[i] |= TD0_alpha_arg2_prevtex;
+         }
+         break;
+      case GL_TEXTURE1:
+         if (source == 0) {
+            if (ctx->Texture._EnabledUnits != 0x03) {
+               /* disable texturing */
+               mmesa->setup.dwgctl &= DC_opcod_MASK;
+               mmesa->setup.dwgctl |= DC_opcod_trap;
+               mmesa->hw.alpha_sel = AC_alphasel_diffused;
+               /* return GL_TRUE since we don't need a fallback */
+               return GL_TRUE;
+            }
+            arg1[i] |= ARG_DISABLE;
+            /* G400 specs (TDUALSTAGE0) */
+            arg2[i] |= TD0_alpha_arg2_prevstage;
+         } else {
+            arg1[i] |= 0;
+            arg2[i] |= ARG_DISABLE;
+         }
+         break;
+      case GL_CONSTANT:
+         if (mmesa->fcol_used &&
+             mmesa->envcolor[source] != mmesa->envcolor[!source])
+            return GL_FALSE;
+
+         arg1[i] |= ARG_DISABLE;
+         arg2[i] |= TD0_alpha_arg2_fcol;
+
+         mmesa->setup.fcol = mmesa->envcolor[source];
+         mmesa->fcol_used = GL_TRUE;
+         break;
+      case GL_PRIMARY_COLOR:
+         arg1[i] |= ARG_DISABLE;
+         /* G400 specs (TDUALSTAGE1) */
+         if (unit == 0 || (mmesa->setup.tdualstage0 &
+                           ((TD0_color_sel_mul & TD0_color_sel_add) |
+                            (TD0_alpha_sel_mul & TD0_alpha_sel_add)))) {
+            arg2[i] |= TD0_alpha_arg2_diffuse;
+         } else {
+            arg2[i] |= ARG_DISABLE;
+         }
+         break;
+      case GL_PREVIOUS:
+         arg1[i] |= ARG_DISABLE;
+         if (unit == 0) {
+            arg2[i] |= TD0_alpha_arg2_diffuse;
+         } else {
+            arg2[i] |= TD0_alpha_arg2_prevstage;
+         }
+         break;
+      default:
+         return GL_FALSE;
+      }
+
+      switch (texUnit->Combine.OperandA[i]) {
+      case GL_SRC_ALPHA:
+         arg1[i] |= 0;
+         arg2[i] |= 0;
+         break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+         arg1[i] |= TD0_alpha_arg1_inv_enable;
+         arg2[i] |= TD0_alpha_arg2_inv_enable;
+         break;
+      }
+   }
+
+   /* Find a working combo of arg1 and arg2 */
+   if ((arg1[0] | arg2[1]) != ARG_DISABLE) {
+      *reg |= arg1[0] | arg2[1];
+      args[0] = MGA_ARG1; args[1] = MGA_ARG2;
+   } else
+   if ((arg1[1] | arg2[0]) != ARG_DISABLE) {
+      *reg |= arg1[1] | arg2[0];
+      args[0] = MGA_ARG2; args[1] = MGA_ARG1;
+   } else {
+      /* nothing suitable */
+      return GL_FALSE;
+   }
+
+   switch (texUnit->Combine.ModeA) {
+   case GL_REPLACE:
+      if (texUnit->Combine.ScaleShiftA) {
+         return GL_FALSE;
+      }
+
+      if (args[0] == MGA_ARG1) {
+         *reg |= TD0_alpha_sel_arg1;
+      } else if (args[0] == MGA_ARG2) {
+         *reg |= TD0_alpha_sel_arg2;
+      }
+      break;
+   case GL_MODULATE:
+      if (texUnit->Combine.ScaleShiftA == 1) {
+         *reg |= TD0_alpha_modbright_2x;
+      } else if (texUnit->Combine.ScaleShiftA == 2) {
+         *reg |= TD0_alpha_modbright_4x;
+      }
+
+      *reg |= TD0_alpha_sel_mul;
+      break;
+   case GL_ADD_SIGNED:
+      *reg |= TD0_alpha_addbias_enable;
+      /* fallthrough */
+   case GL_ADD:
+      if (texUnit->Combine.ScaleShiftA == 1) {
+         *reg |= TD0_alpha_add2x_enable;
+      } else if (texUnit->Combine.ScaleShiftA == 2) {
+         return GL_FALSE;
+      }
+
+      *reg |= (TD0_alpha_add_enable |
+               TD0_alpha_sel_add);
+      break;
+   case GL_SUBTRACT:
+      if (texUnit->Combine.ScaleShiftA == 1) {
+         *reg |= TD0_alpha_add2x_enable;
+      } else if (texUnit->Combine.ScaleShiftA == 2) {
+         return GL_FALSE;
+      }
+
+      *reg |= (TD0_alpha_add_disable |
+               TD0_alpha_sel_add);
+
+      if (args[0] == MGA_ARG2) {
+         /* Swap arguments */
+         *reg ^= (TD0_alpha_arg1_inv_enable |
+                  TD0_alpha_arg2_inv_enable);
+      }
+      break;
+   }
+
+   return GL_TRUE;
+}
+   
+   
diff --git a/src/mesa/drivers/dri/mga/mga_texstate.c b/src/mesa/drivers/dri/mga/mga_texstate.c
new file mode 100644
index 0000000000..54eda62a96
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mga_texstate.c
@@ -0,0 +1,898 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * (c) Copyright IBM Corporation 2002
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS, IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ian Romanick <idr@us.ibm.com>
+ *    Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/mm.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+
+#include "mgacontext.h"
+#include "mgatex.h"
+#include "mgaregs.h"
+#include "mgatris.h"
+#include "mgaioctl.h"
+
+#define MGA_USE_TABLE_FOR_FORMAT
+#ifdef MGA_USE_TABLE_FOR_FORMAT
+#define TMC_nr_tformat (MESA_FORMAT_YCBCR_REV + 1)
+static const unsigned TMC_tformat[ TMC_nr_tformat ] =
+{
+    [MESA_FORMAT_ARGB8888] = TMC_tformat_tw32,
+    [MESA_FORMAT_RGB565]   = TMC_tformat_tw16,
+    [MESA_FORMAT_ARGB4444] = TMC_tformat_tw12,
+    [MESA_FORMAT_ARGB1555] = TMC_tformat_tw15,
+    [MESA_FORMAT_AL88]     = TMC_tformat_tw8al,
+    [MESA_FORMAT_I8]       = TMC_tformat_tw8a,
+    [MESA_FORMAT_CI8]      = TMC_tformat_tw8 ,
+    [MESA_FORMAT_YCBCR]     = TMC_tformat_tw422uyvy,
+    [MESA_FORMAT_YCBCR_REV] = TMC_tformat_tw422,
+};
+#endif
+
+static void
+mgaSetTexImages( mgaContextPtr mmesa,
+		 const struct gl_texture_object * tObj )
+{
+    mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData;
+    struct gl_texture_image *baseImage = tObj->Image[0][ tObj->BaseLevel ];
+    GLint totalSize;
+    GLint width, height;
+    GLint i;
+    GLint numLevels;
+    GLint log2Width, log2Height;
+    GLuint txformat = 0;
+    GLint ofs;
+
+    /* Set the hardware texture format
+     */
+#ifndef MGA_USE_TABLE_FOR_FORMAT
+    switch (baseImage->TexFormat->MesaFormat) {
+
+	case MESA_FORMAT_ARGB8888: txformat = TMC_tformat_tw32;	break;
+	case MESA_FORMAT_RGB565:   txformat = TMC_tformat_tw16; break;
+	case MESA_FORMAT_ARGB4444: txformat = TMC_tformat_tw12;	break;
+	case MESA_FORMAT_ARGB1555: txformat = TMC_tformat_tw15; break;
+	case MESA_FORMAT_AL88:     txformat = TMC_tformat_tw8al; break;
+	case MESA_FORMAT_I8:       txformat = TMC_tformat_tw8a; break;
+	case MESA_FORMAT_CI8:      txformat = TMC_tformat_tw8;  break;
+        case MESA_FORMAT_YCBCR:    txformat  = TMC_tformat_tw422uyvy; break;
+        case MESA_FORMAT_YCBCR_REV: txformat = TMC_tformat_tw422; break;
+
+	default:
+	_mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+	return;
+    }
+#else
+    if ( (baseImage->TexFormat >= TMC_nr_tformat)
+	 || (TMC_tformat[ baseImage->TexFormat ] == 0) )
+    {
+	_mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__);
+	return;
+    }
+
+    txformat = TMC_tformat[ baseImage->TexFormat ];
+
+#endif /* MGA_USE_TABLE_FOR_FORMAT */
+
+   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+   if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) {
+      log2Width = 0;
+      log2Height = 0;
+   } else {
+      log2Width  = tObj->Image[0][t->base.firstLevel]->WidthLog2;
+      log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2;
+   }
+
+   width = tObj->Image[0][t->base.firstLevel]->Width;
+   height = tObj->Image[0][t->base.firstLevel]->Height;
+
+   numLevels = MIN2( t->base.lastLevel - t->base.firstLevel + 1,
+                     MGA_IS_G200(mmesa) ? G200_TEX_MAXLEVELS : G400_TEX_MAXLEVELS);
+
+
+   totalSize = 0;
+   for ( i = 0 ; i < numLevels ; i++ ) {
+      const struct gl_texture_image * const texImage = 
+	  tObj->Image[0][ i + t->base.firstLevel ];
+      int size;
+
+      if (texImage == NULL)
+	 break;
+
+      size = texImage->Width * texImage->Height *
+         _mesa_get_format_bytes(baseImage->TexFormat);
+
+      t->offsets[i] = totalSize;
+      t->base.dirty_images[0] |= (1<<i);
+
+      /* All mipmaps must be 32-byte aligned */
+      totalSize += (size + 31) & ~31;
+
+      /* Since G400 calculates the offsets in hardware
+       * it can't handle more than one < 32 byte mipmap.
+       *
+       * Further testing has indicated that it can't
+       * handle any < 32 byte mipmaps.
+       */
+      if (MGA_IS_G400( mmesa ) && size <= 32) {
+         i++;
+         break;
+      }
+   }
+
+   /* save these values */
+   numLevels = i;
+   t->base.lastLevel = t->base.firstLevel + numLevels - 1;
+   t->base.totalSize = totalSize;
+
+   /* setup hardware register values */
+   t->setup.texctl &= (TMC_tformat_MASK & TMC_tpitch_MASK 
+		       & TMC_tpitchext_MASK);
+   t->setup.texctl |= txformat;
+
+
+   /* Set the texture width.  In order to support non-power of 2 textures and
+    * textures larger than 1024 texels wide, "linear" pitch must be used.  For
+    * the linear pitch, if the width is 2048, a value of zero is used.
+    */
+
+   t->setup.texctl |= TMC_tpitchlin_enable;
+   t->setup.texctl |= MGA_FIELD( TMC_tpitchext, width & (2048 - 1) );
+
+
+   /* G400 specifies the number of mip levels in a strange way.  Since there
+    * are up to 11 levels, it requires 4 bits.  Three of the bits are at the
+    * high end of TEXFILTER.  The other bit is in the middle.  Weird.
+    */
+   numLevels--;
+   t->setup.texfilter &= TF_mapnb_MASK & TF_mapnbhigh_MASK & TF_reserved_MASK;
+   t->setup.texfilter |= MGA_FIELD( TF_mapnb, numLevels & 0x7 );
+   t->setup.texfilter |= MGA_FIELD( TF_mapnbhigh, (numLevels >> 3) & 0x1 );
+
+   /* warp texture registers */
+   ofs = MGA_IS_G200(mmesa) ? 28 : 11;
+
+   t->setup.texwidth = (MGA_FIELD(TW_twmask, width - 1) |
+			MGA_FIELD(TW_rfw, (10 - log2Width - 8) & 63 ) |
+			MGA_FIELD(TW_tw, (log2Width + ofs ) | 0x40 ));
+
+   t->setup.texheight = (MGA_FIELD(TH_thmask, height - 1) |
+			 MGA_FIELD(TH_rfh, (10 - log2Height - 8) & 63 ) |
+			 MGA_FIELD(TH_th, (log2Height + ofs ) | 0x40 ));
+
+   mgaUploadTexImages( mmesa, t );
+}
+
+
+/* ================================================================
+ * Texture unit state management
+ */
+
+static void mgaUpdateTextureEnvG200( GLcontext *ctx, GLuint unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[0]._Current;
+   mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData;
+   GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+
+   if (tObj != ctx->Texture.Unit[0].CurrentTex[TEXTURE_2D_INDEX] &&
+       tObj != ctx->Texture.Unit[0].CurrentTex[TEXTURE_RECT_INDEX])
+      return;
+
+
+   t->setup.texctl &= ~TMC_tmodulate_enable;
+   t->setup.texctl2 &= ~(TMC_decalblend_enable |
+                         TMC_idecal_enable |
+                         TMC_decaldis_enable);
+
+   switch (ctx->Texture.Unit[0].EnvMode) {
+   case GL_REPLACE:
+      if (format == GL_ALPHA)
+         t->setup.texctl2 |= TMC_idecal_enable;
+
+      if (format == GL_RGB || format == GL_LUMINANCE)
+         mmesa->hw.alpha_sel = AC_alphasel_diffused;
+      else
+         mmesa->hw.alpha_sel = AC_alphasel_fromtex;
+      break;
+
+   case GL_MODULATE:
+      t->setup.texctl |= TMC_tmodulate_enable;
+
+      if (format == GL_ALPHA)
+         t->setup.texctl2 |= (TMC_idecal_enable |
+                              TMC_decaldis_enable);
+
+      if (format == GL_RGB || format == GL_LUMINANCE)
+         mmesa->hw.alpha_sel = AC_alphasel_diffused;
+      else
+         mmesa->hw.alpha_sel = AC_alphasel_modulated;
+      break;
+
+   case GL_DECAL:
+      if (format == GL_RGB || format == GL_RGBA)
+         t->setup.texctl2 |= TMC_decalblend_enable;
+      else
+         t->setup.texctl2 |= TMC_idecal_enable;
+
+      mmesa->hw.alpha_sel = AC_alphasel_diffused;
+      break;
+
+   case GL_BLEND:
+      if (format == GL_ALPHA) {
+         t->setup.texctl2 |= TMC_idecal_enable;
+         mmesa->hw.alpha_sel = AC_alphasel_modulated;
+      } else {
+         t->texenv_fallback = GL_TRUE;
+      }
+      break;
+
+   default:
+      break;
+   }
+}
+
+
+#define MGA_REPLACE		0
+#define MGA_MODULATE		1
+#define MGA_DECAL		2
+#define MGA_ADD			3
+#define MGA_MAX_COMBFUNC	4
+
+static const GLuint g400_color_combine[][MGA_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* GL_REPLACE
+       * Cv = Cs
+       * Av = Af
+       */
+      (TD0_color_sel_arg1 |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_arg2),
+      
+      /* GL_MODULATE
+       * Cv = Cf Cs
+       * Av = Af
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_sel_mul |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_arg2),
+      
+      /* GL_DECAL
+       * Cv = Cs
+       * Av = Af
+       */
+      (TD0_color_sel_arg1 |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_arg2),
+      
+      /* GL_ADD
+       * Cv = Cf + Cs
+       * Av = Af
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_add_add |
+       TD0_color_sel_add |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_arg2),
+   },
+   
+   /* Unit 1:
+    */
+   {
+      /* GL_REPLACE
+       * Cv = Cs
+       * Av = Ap
+       */
+      (TD0_color_sel_arg1 |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_arg2),
+      
+      /* GL_MODULATE
+       * Cv = Cp Cs
+       * Av = Ap
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_sel_mul |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_arg2),
+
+      /* GL_DECAL
+       * Cv = Cs
+       * Av = Ap
+       */
+      (TD0_color_sel_arg1 |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_arg2),
+      
+      /* GL_ADD
+       * Cv = Cp + Cs
+       * Av = Ap
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_add_add |
+       TD0_color_sel_add |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_arg2),
+   },
+};
+
+static const GLuint g400_color_alpha_combine[][MGA_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* GL_REPLACE
+       * Cv = Cs
+       * Av = As
+       */
+      (TD0_color_sel_arg1 |
+       TD0_alpha_sel_arg1),
+      
+      /* GL_MODULATE
+       * Cv = Cf Cs
+       * Av = Af As
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_sel_mul |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_mul),
+      
+      /* GL_DECAL
+       * tmp = Cf ( 1 - As )
+       * Cv = tmp + Cs As
+       * Av = Af
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_alpha_currtex |
+       TD0_color_alpha1inv_enable |
+       TD0_color_arg1mul_alpha1 |
+       TD0_color_blend_enable |
+       TD0_color_arg1add_mulout |
+       TD0_color_arg2add_mulout |
+       TD0_color_add_add |
+       TD0_color_sel_add |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_arg2),
+
+      /* GL_ADD
+       * Cv = Cf + Cs
+       * Av = Af As
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_add_add |
+       TD0_color_sel_add |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_mul),
+   },
+   
+   /* Unit 1:
+    */
+   {
+      /* GL_REPLACE
+       * Cv = Cs
+       * Av = As
+       */
+      (TD0_color_sel_arg1 |
+       TD0_alpha_sel_arg1),
+      
+      /* GL_MODULATE
+       * Cv = Cp Cs
+       * Av = Ap As
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_sel_mul |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_mul),
+
+      /* GL_DECAL
+       * tmp = Cp ( 1 - As )
+       * Cv = tmp + Cs As
+       * Av = Ap
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_alpha_currtex |
+       TD0_color_alpha1inv_enable |
+       TD0_color_arg1mul_alpha1 |
+       TD0_color_blend_enable |
+       TD0_color_arg1add_mulout |
+       TD0_color_arg2add_mulout |
+       TD0_color_add_add |
+       TD0_color_sel_add |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_arg2),
+      
+      /* GL_ADD
+       * Cv = Cp + Cs
+       * Av = Ap As
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_add_add |
+       TD0_color_sel_add |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_mul),
+   },
+};
+
+static const GLuint g400_alpha_combine[][MGA_MAX_COMBFUNC] =
+{
+   /* Unit 0:
+    */
+   {
+      /* GL_REPLACE
+       * Cv = Cf
+       * Av = As
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_sel_arg2 |
+       TD0_alpha_sel_arg1),
+      
+      /* GL_MODULATE
+       * Cv = Cf
+       * Av = Af As
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_sel_arg2 |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_mul),
+
+      /* GL_DECAL (undefined)
+       * Cv = Cf
+       * Av = Af
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_sel_arg2 |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_arg2),
+
+      /* GL_ADD
+       * Cv = Cf
+       * Av = Af As
+       */
+      (TD0_color_arg2_diffuse |
+       TD0_color_sel_arg2 |
+       TD0_alpha_arg2_diffuse |
+       TD0_alpha_sel_mul),
+   },
+
+   /* Unit 1:
+    */
+   {
+      /* GL_REPLACE
+       * Cv = Cp
+       * Av = As
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_sel_arg2 |
+       TD0_alpha_sel_arg1),
+      
+      /* GL_MODULATE
+       * Cv = Cp
+       * Av = Ap As
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_sel_arg2 |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_mul),
+
+      /* GL_DECAL (undefined)
+       * Cv = Cp
+       * Av = Ap
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_sel_arg2 |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_arg2),
+
+      /* GL_ADD
+       * Cv = Cp
+       * Av = Ap As
+       */
+      (TD0_color_arg2_prevstage |
+       TD0_color_sel_arg2 |
+       TD0_alpha_arg2_prevstage |
+       TD0_alpha_sel_mul),
+   },
+};
+
+static GLboolean mgaUpdateTextureEnvBlend( GLcontext *ctx, int unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   const int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   GLuint *reg = ((GLuint *)&mmesa->setup.tdualstage0 + unit);
+   GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+
+   *reg = 0;
+
+   if (format == GL_ALPHA) {
+      /* Cv = Cf */
+      *reg |= (TD0_color_arg2_diffuse |
+               TD0_color_sel_arg2);
+      /* Av = Af As */
+      *reg |= (TD0_alpha_arg2_diffuse |
+               TD0_alpha_sel_mul);
+      return GL_TRUE;
+   }
+
+   /* C1 = Cf ( 1 - Cs ) */
+   *reg |= (TD0_color_arg1_inv_enable |
+            TD0_color_arg2_diffuse |
+            TD0_color_sel_mul);
+
+   if (format == GL_RGB || format == GL_LUMINANCE) {
+      /* A1 = Af */
+      *reg |= (TD0_alpha_arg2_diffuse |
+               TD0_alpha_sel_arg2);
+   } else
+   if (format == GL_RGBA || format == GL_LUMINANCE_ALPHA) {
+      /* A1 = Af As */
+      *reg |= (TD0_alpha_arg2_diffuse |
+               TD0_alpha_sel_mul);
+   } else
+   if (format == GL_INTENSITY) {
+      /* A1 = Af ( 1 - As ) */
+      *reg |= (TD0_alpha_arg1_inv_enable |
+               TD0_alpha_arg2_diffuse |
+               TD0_alpha_sel_mul);
+   }
+   
+   if (RGB_ZERO(mmesa->envcolor[source]) &&
+       (format != GL_INTENSITY || ALPHA_ZERO(mmesa->envcolor[source])))
+      return GL_TRUE; /* all done */
+
+   if (ctx->Texture._EnabledUnits == 0x03)
+      return GL_FALSE; /* need both units */
+
+   mmesa->force_dualtex = GL_TRUE;
+   reg = &mmesa->setup.tdualstage1;
+   *reg = 0;
+
+   if (RGB_ZERO(mmesa->envcolor[source])) {
+      /* Cv = C1 */
+      *reg |= (TD0_color_arg2_prevstage |
+               TD0_color_sel_arg2);
+   } else
+   if (RGB_ONE(mmesa->envcolor[source])) {
+      /* Cv = C1 + Cs */
+      *reg |= (TD0_color_arg2_prevstage |
+               TD0_color_add_add |
+               TD0_color_sel_add);
+   } else
+   if (RGBA_EQUAL(mmesa->envcolor[source])) {
+      /* Cv = C1 + Cc Cs */
+      *reg |= (TD0_color_arg2_prevstage |
+               TD0_color_alpha_fcol |
+               TD0_color_arg2mul_alpha2 |
+               TD0_color_arg1add_mulout |
+               TD0_color_add_add |
+               TD0_color_sel_add);
+
+      mmesa->setup.fcol = mmesa->envcolor[source];
+   } else {
+      return GL_FALSE;
+   }
+
+   if (format != GL_INTENSITY || ALPHA_ZERO(mmesa->envcolor[source])) {
+      /* Av = A1 */
+      *reg |= (TD0_alpha_arg2_prevstage |
+               TD0_alpha_sel_arg2);
+   } else
+   if (ALPHA_ONE(mmesa->envcolor[source])) {
+      /* Av = A1 + As */
+      *reg |= (TD0_alpha_arg2_prevstage |
+               TD0_alpha_add_enable |
+               TD0_alpha_sel_add);
+   } else {
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+static void mgaUpdateTextureEnvG400( GLcontext *ctx, GLuint unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   const int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   GLuint *reg = ((GLuint *)&mmesa->setup.tdualstage0 + unit);
+   mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData;
+   GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+
+   if (tObj != ctx->Texture.Unit[source].CurrentTex[TEXTURE_2D_INDEX] &&
+       tObj != ctx->Texture.Unit[source].CurrentTex[TEXTURE_RECT_INDEX])
+      return;
+
+   switch (ctx->Texture.Unit[source].EnvMode) {
+   case GL_REPLACE:
+      if (format == GL_ALPHA) {
+         *reg = g400_alpha_combine[unit][MGA_REPLACE];
+      } else if (format == GL_RGB || format == GL_LUMINANCE) {
+         *reg = g400_color_combine[unit][MGA_REPLACE];
+      } else {
+         *reg = g400_color_alpha_combine[unit][MGA_REPLACE];
+      }
+      break;
+
+   case GL_MODULATE:
+      if (format == GL_ALPHA) {
+         *reg = g400_alpha_combine[unit][MGA_MODULATE];
+      } else if (format == GL_RGB || format == GL_LUMINANCE) {
+         *reg = g400_color_combine[unit][MGA_MODULATE];
+      } else {
+         *reg = g400_color_alpha_combine[unit][MGA_MODULATE];
+      }
+      break;
+
+   case GL_DECAL:
+      if (format == GL_RGB) {
+         *reg = g400_color_combine[unit][MGA_DECAL];
+      } else if (format == GL_RGBA) {
+         *reg = g400_color_alpha_combine[unit][MGA_DECAL];
+         if (ctx->Texture._EnabledUnits != 0x03) {
+            /* Linear blending mode needs dual texturing enabled */
+            *(reg+1) = (TD0_color_arg2_prevstage |
+                        TD0_color_sel_arg2 |
+                        TD0_alpha_arg2_prevstage |
+                        TD0_alpha_sel_arg2);
+            mmesa->force_dualtex = GL_TRUE;
+         }
+      } else {
+         /* Undefined */
+         *reg = g400_alpha_combine[unit][MGA_DECAL];
+      }
+      break;
+
+   case GL_ADD:
+      if (format == GL_ALPHA) {
+         *reg = g400_alpha_combine[unit][MGA_ADD];
+      } else if (format == GL_RGB || format == GL_LUMINANCE) {
+         *reg = g400_color_combine[unit][MGA_ADD];
+      } else if (format == GL_RGBA || format == GL_LUMINANCE_ALPHA) {
+         *reg = g400_color_alpha_combine[unit][MGA_ADD];
+      } else if (format == GL_INTENSITY) {
+         /* Cv = Cf + Cs
+          * Av = Af + As
+          */
+         if (unit == 0) {
+            *reg = (TD0_color_arg2_diffuse |
+                    TD0_color_add_add |
+                    TD0_color_sel_add |
+                    TD0_alpha_arg2_diffuse |
+                    TD0_alpha_add_enable |
+                    TD0_alpha_sel_add);
+         } else {
+            *reg = (TD0_color_arg2_prevstage |
+                    TD0_color_add_add |
+                    TD0_color_sel_add |
+                    TD0_alpha_arg2_prevstage |
+                    TD0_alpha_add_enable |
+                    TD0_alpha_sel_add);
+         }
+      }
+      break;
+
+   case GL_BLEND:
+      if (!mgaUpdateTextureEnvBlend(ctx, unit))
+         t->texenv_fallback = GL_TRUE;
+      break;
+
+   case GL_COMBINE:
+      if (!mgaUpdateTextureEnvCombine(ctx, unit))
+         t->texenv_fallback = GL_TRUE;
+      break;
+   default:
+      break;
+   }
+}
+
+static void disable_tex( GLcontext *ctx, int unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+
+   /* Texture unit disabled */
+
+   if ( mmesa->CurrentTexObj[unit] != NULL ) {
+      /* The old texture is no longer bound to this texture unit.
+       * Mark it as such.
+       */
+
+      mmesa->CurrentTexObj[unit]->base.bound &= ~(1UL << unit);
+      mmesa->CurrentTexObj[unit] = NULL;
+   }
+
+   if ( unit != 0 && !mmesa->force_dualtex ) {
+      mmesa->setup.tdualstage1 = mmesa->setup.tdualstage0;
+   }
+
+   if ( ctx->Texture._EnabledUnits == 0 ) {
+      mmesa->setup.dwgctl &= DC_opcod_MASK;
+      mmesa->setup.dwgctl |= DC_opcod_trap;
+      mmesa->hw.alpha_sel = AC_alphasel_diffused;
+   }
+
+   mmesa->dirty |= MGA_UPLOAD_CONTEXT | (MGA_UPLOAD_TEX0 << unit);
+}
+
+static GLboolean enable_tex( GLcontext *ctx, int unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   const int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData;
+
+   /* Upload teximages (not pipelined)
+    */
+   if (t->base.dirty_images[0]) {
+      FLUSH_BATCH( mmesa );
+      mgaSetTexImages( mmesa, tObj );
+      if ( t->base.memBlock == NULL ) {
+	 return GL_FALSE;
+      }
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean update_tex_common( GLcontext *ctx, int unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   const int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   struct gl_texture_object	*tObj = texUnit->_Current;
+   mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData;
+
+   /* Fallback if there's a texture border */
+   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+      return GL_FALSE;
+   }
+
+
+   /* Update state if this is a different texture object to last
+    * time.
+    */
+   if ( mmesa->CurrentTexObj[unit] != t ) {
+      if ( mmesa->CurrentTexObj[unit] != NULL ) {
+	 /* The old texture is no longer bound to this texture unit.
+	  * Mark it as such.
+	  */
+
+	 mmesa->CurrentTexObj[unit]->base.bound &= ~(1UL << unit);
+      }
+
+      mmesa->CurrentTexObj[unit] = t;
+      t->base.bound |= (1UL << unit);
+
+      driUpdateTextureLRU( (driTextureObject *) t ); /* done too often */
+   }
+
+   /* register setup */
+   if ( unit == 1 ) {
+      mmesa->setup.tdualstage1 = mmesa->setup.tdualstage0;
+   }
+
+   t->texenv_fallback = GL_FALSE;
+
+   /* Set this before mgaUpdateTextureEnvG400() since
+    * GL_ARB_texture_env_crossbar may have to disable texturing.
+    */
+   mmesa->setup.dwgctl &= DC_opcod_MASK;
+   mmesa->setup.dwgctl |= DC_opcod_texture_trap;
+
+   /* FIXME: The Radeon has some cached state so that it can avoid calling
+    * FIXME: UpdateTextureEnv in some cases.  Is that possible here?
+    */
+   if (MGA_IS_G400(mmesa)) {
+      /* G400: Regardless of texture env mode, we use the alpha from the
+       * texture unit (AC_alphasel_fromtex) since it will have already
+       * been modulated by the incoming fragment color, if needed.
+       * We don't want (AC_alphasel_modulate) since that'll effectively
+       * do the modulation twice.
+       */
+      mmesa->hw.alpha_sel = AC_alphasel_fromtex;
+
+      mgaUpdateTextureEnvG400( ctx, unit );
+   } else {
+      mgaUpdateTextureEnvG200( ctx, unit );
+   }
+
+   t->setup.texctl2 &= TMC_dualtex_MASK;
+   if (ctx->Texture._EnabledUnits == 0x03 || mmesa->force_dualtex) {
+      t->setup.texctl2 |= TMC_dualtex_enable;
+   }
+
+   mmesa->dirty |= MGA_UPLOAD_CONTEXT | (MGA_UPLOAD_TEX0 << unit);
+
+   FALLBACK( ctx, MGA_FALLBACK_BORDER_MODE, t->border_fallback );
+   return !t->border_fallback && !t->texenv_fallback;
+}
+
+
+static GLboolean updateTextureUnit( GLcontext *ctx, int unit )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   const int source = mmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+
+
+   if ( texUnit->_ReallyEnabled == TEXTURE_2D_BIT ||
+        texUnit->_ReallyEnabled == TEXTURE_RECT_BIT ) {
+      return(enable_tex( ctx, unit ) &&
+	     update_tex_common( ctx, unit ));
+   }
+   else if ( texUnit->_ReallyEnabled ) {
+      return GL_FALSE;
+   }
+   else {
+      disable_tex( ctx, unit );
+      return GL_TRUE;
+   }
+}
+
+/* The G400 is now programmed quite differently wrt texture environment.
+ */
+void mgaUpdateTextureState( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   GLboolean ok;
+   unsigned  i;
+
+   mmesa->force_dualtex = GL_FALSE;
+   mmesa->fcol_used = GL_FALSE;
+
+   /* This works around a quirk with the MGA hardware.  If only OpenGL 
+    * TEXTURE1 is enabled, then the hardware TEXTURE0 must be used.  The
+    * hardware TEXTURE1 can ONLY be used when hardware TEXTURE0 is also used.
+    */
+
+   mmesa->tmu_source[0] = 0;
+   mmesa->tmu_source[1] = 1;
+
+   if ((ctx->Texture._EnabledUnits & 0x03) == 0x02) {
+      /* only texture 1 enabled */
+      mmesa->tmu_source[0] = 1;
+      mmesa->tmu_source[1] = 0;
+   }
+
+   for ( i = 0, ok = GL_TRUE 
+	 ; (i < ctx->Const.MaxTextureUnits) && ok
+	 ; i++ ) {
+      ok = updateTextureUnit( ctx, i );
+   }
+
+   FALLBACK( ctx, MGA_FALLBACK_TEXTURE, !ok );
+}
diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c
new file mode 100644
index 0000000000..31007ccb1d
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mga_xmesa.c
@@ -0,0 +1,1006 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file mga_xmesa.c
+ * MGA screen and context initialization / creation code.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include "drm.h"
+#include "mga_drm.h"
+#include "mga_xmesa.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo.h"
+
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "mgadd.h"
+#include "mgastate.h"
+#include "mgatex.h"
+#include "mgaspan.h"
+#include "mgaioctl.h"
+#include "mgatris.h"
+#include "mgavb.h"
+#include "mgapixel.h"
+#include "mga_xmesa.h"
+#include "mga_dri.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#include "drirenderbuffer.h"
+
+#include "GL/internal/dri_interface.h"
+
+#define need_GL_ARB_vertex_program
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_secondary_color
+#if 0
+#define need_GL_EXT_paletted_texture
+#endif
+#define need_GL_APPLE_vertex_array_object
+#define need_GL_NV_vertex_program
+#include "main/remap_helper.h"
+
+/* MGA configuration
+ */
+#include "xmlpool.h"
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+        DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_SOFTWARE
+        DRI_CONF_ARB_VERTEX_PROGRAM(true)
+        DRI_CONF_NV_VERTEX_PROGRAM(true)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 6;
+
+#ifndef MGA_DEBUG
+int MGA_DEBUG = 0;
+#endif
+
+static const __DRIconfig **
+mgaFillInModes( __DRIscreen *psp,
+		unsigned pixel_bits, unsigned depth_bits,
+		unsigned stencil_bits, GLboolean have_back_buffer )
+{
+    __DRIconfig **configs;
+    __GLcontextModes * m;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    GLenum fb_format;
+    GLenum fb_type;
+    int i;
+
+    /* GLX_SWAP_COPY_OML is only supported because the MGA driver doesn't
+     * support pageflipping at all.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML
+    };
+
+    uint8_t depth_bits_array[3];
+    uint8_t stencil_bits_array[3];
+    uint8_t msaa_samples_array[1];
+
+
+    depth_bits_array[0] = 0;
+    depth_bits_array[1] = depth_bits;
+    depth_bits_array[2] = depth_bits;
+    
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.  It will be a sw fallback, but some apps won't
+     * care about that.
+     */
+    stencil_bits_array[0] = 0;
+    stencil_bits_array[1] = 0;
+    stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    msaa_samples_array[0] = 0;
+
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+
+    if ( pixel_bits == 16 ) {
+        fb_format = GL_RGB;
+        fb_type = GL_UNSIGNED_SHORT_5_6_5;
+    }
+    else {
+        fb_format = GL_BGR;
+        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+    }
+
+    configs = driCreateConfigs(fb_format, fb_type,
+			       depth_bits_array, stencil_bits_array,
+			       depth_buffer_factor,
+			       back_buffer_modes, back_buffer_factor,
+                               msaa_samples_array, 1, GL_TRUE);
+    if (configs == NULL) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+    }
+
+   /* Mark the visual as slow if there are "fake" stencil bits.
+    */
+   for (i = 0; configs[i]; i++) {
+      m = &configs[i]->modes;
+      if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+         m->visualRating = GLX_SLOW_CONFIG;
+      }
+   }
+
+   return (const __DRIconfig **) configs;
+}
+
+const __DRIextension *mgaScreenExtensions[] = {
+    &driReadDrawableExtension,
+    &driSwapControlExtension.base,
+    &driFrameTrackingExtension.base,
+    &driMediaStreamCounterExtension.base,
+    NULL
+};
+
+static GLboolean
+mgaInitDriver(__DRIscreen *sPriv)
+{
+   mgaScreenPrivate *mgaScreen;
+   MGADRIPtr         serverInfo = (MGADRIPtr)sPriv->pDevPriv;
+
+   if (sPriv->devPrivSize != sizeof(MGADRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(MGADRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   mgaScreen = (mgaScreenPrivate *)MALLOC(sizeof(mgaScreenPrivate));
+   if (!mgaScreen) {
+      __driUtilMessage("Couldn't malloc screen struct");
+      return GL_FALSE;
+   }
+
+   mgaScreen->sPriv = sPriv;
+   sPriv->private = (void *)mgaScreen;
+
+   if (sPriv->drm_version.minor >= 1) {
+      int ret;
+      drm_mga_getparam_t gp;
+
+      gp.param = MGA_PARAM_IRQ_NR;
+      gp.value = &mgaScreen->irq;
+      mgaScreen->irq = 0;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_MGA_GETPARAM,
+				    &gp, sizeof(gp));
+      if (ret) {
+	    fprintf(stderr, "drmMgaGetParam (MGA_PARAM_IRQ_NR): %d\n", ret);
+	    FREE(mgaScreen);
+	    sPriv->private = NULL;
+	    return GL_FALSE;
+      }
+   }
+
+   sPriv->extensions = mgaScreenExtensions;
+
+   if (serverInfo->chipset != MGA_CARD_TYPE_G200 &&
+       serverInfo->chipset != MGA_CARD_TYPE_G400) {
+      FREE(mgaScreen);
+      sPriv->private = NULL;
+      __driUtilMessage("Unrecognized chipset");
+      return GL_FALSE;
+   }
+
+
+   mgaScreen->chipset = serverInfo->chipset;
+   mgaScreen->cpp = serverInfo->cpp;
+
+   mgaScreen->agpMode = serverInfo->agpMode;
+
+   mgaScreen->frontPitch = serverInfo->frontPitch;
+   mgaScreen->frontOffset = serverInfo->frontOffset;
+   mgaScreen->backOffset = serverInfo->backOffset;
+   mgaScreen->backPitch  =  serverInfo->backPitch;
+   mgaScreen->depthOffset = serverInfo->depthOffset;
+   mgaScreen->depthPitch  =  serverInfo->depthPitch;
+
+
+   /* The only reason that the MMIO region needs to be accessable and the
+    * primary DMA region base address needs to be known is so that the driver
+    * can busy wait for certain DMA operations to complete (see
+    * mgaWaitForFrameCompletion in mgaioctl.c).
+    *
+    * Starting with MGA DRM version 3.2, these are completely unneeded as
+    * there is a new, in-kernel mechanism for handling the wait.
+    */
+
+   if (mgaScreen->sPriv->drm_version.minor < 2) {
+      mgaScreen->mmio.handle = serverInfo->registers.handle;
+      mgaScreen->mmio.size = serverInfo->registers.size;
+      if ( drmMap( sPriv->fd,
+		   mgaScreen->mmio.handle, mgaScreen->mmio.size,
+		   &mgaScreen->mmio.map ) < 0 ) {
+	 FREE( mgaScreen );
+	 sPriv->private = NULL;
+	 __driUtilMessage( "Couldn't map MMIO registers" );
+	 return GL_FALSE;
+      }
+
+      mgaScreen->primary.handle = serverInfo->primary.handle;
+      mgaScreen->primary.size = serverInfo->primary.size;
+   }
+   else {
+      (void) memset( & mgaScreen->primary, 0, sizeof( mgaScreen->primary ) );
+      (void) memset( & mgaScreen->mmio, 0, sizeof( mgaScreen->mmio ) );
+   }
+
+   mgaScreen->textureOffset[MGA_CARD_HEAP] = serverInfo->textureOffset;
+   mgaScreen->textureOffset[MGA_AGP_HEAP] = (serverInfo->agpTextureOffset |
+					     PDEA_pagpxfer_enable | 1);
+
+   mgaScreen->textureSize[MGA_CARD_HEAP] = serverInfo->textureSize;
+   mgaScreen->textureSize[MGA_AGP_HEAP] = serverInfo->agpTextureSize;
+
+   
+   /* The texVirtual array stores the base addresses in the CPU's address
+    * space of the texture memory pools.  The base address of the on-card
+    * memory pool is calculated as an offset of the base of video memory.  The
+    * AGP texture pool has to be mapped into the processes address space by
+    * the DRM. 
+    */
+
+   mgaScreen->texVirtual[MGA_CARD_HEAP] = (char *)(mgaScreen->sPriv->pFB +
+					   serverInfo->textureOffset);
+
+   if ( serverInfo->agpTextureSize > 0 ) {
+      if (drmMap(sPriv->fd, serverInfo->agpTextureOffset,
+		 serverInfo->agpTextureSize,
+		 (drmAddress *)&mgaScreen->texVirtual[MGA_AGP_HEAP]) != 0) {
+	 FREE(mgaScreen);
+	 sPriv->private = NULL;
+	 __driUtilMessage("Couldn't map agptexture region");
+	 return GL_FALSE;
+      }
+   }
+
+
+   /* For calculating setupdma addresses.
+    */
+
+   mgaScreen->bufs = drmMapBufs(sPriv->fd);
+   if (!mgaScreen->bufs) {
+      FREE(mgaScreen);
+      sPriv->private = NULL;
+      __driUtilMessage("Couldn't map dma buffers");
+      return GL_FALSE;
+   }
+   mgaScreen->sarea_priv_offset = serverInfo->sarea_priv_offset;
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&mgaScreen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   return GL_TRUE;
+}
+
+
+static void
+mgaDestroyScreen(__DRIscreen *sPriv)
+{
+   mgaScreenPrivate *mgaScreen = (mgaScreenPrivate *) sPriv->private;
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_DRI)
+      fprintf(stderr, "mgaDestroyScreen\n");
+
+   drmUnmapBufs(mgaScreen->bufs);
+
+
+   /* free all option information */
+   driDestroyOptionInfo (&mgaScreen->optionCache);
+
+   FREE(mgaScreen);
+   sPriv->private = NULL;
+}
+
+
+extern const struct tnl_pipeline_stage _mga_render_stage;
+
+static const struct tnl_pipeline_stage *mga_pipeline[] = {
+   &_tnl_vertex_transform_stage, 
+   &_tnl_normal_transform_stage, 
+   &_tnl_lighting_stage,	
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage, 
+   &_tnl_texture_transform_stage, 
+   &_tnl_vertex_program_stage,
+
+				/* REMOVE: point attenuation stage */
+#if 0
+   &_mga_render_stage,		/* ADD: unclipped rastersetup-to-dma */
+                                /* Need new ioctl for wacceptseq */
+#endif
+   &_tnl_render_stage,		
+   0,
+};
+
+
+static const struct dri_extension g400_extensions[] =
+{
+   { "GL_ARB_multitexture",           NULL },
+   { "GL_ARB_texture_env_add",        NULL },
+   { "GL_ARB_texture_env_combine",    NULL },
+   { "GL_ARB_texture_env_crossbar",   NULL },
+   { "GL_EXT_texture_env_combine",    NULL },
+   { "GL_EXT_texture_edge_clamp",     NULL },
+   { "GL_ATI_texture_env_combine3",   NULL },
+   { NULL,                            NULL }
+};
+
+static const struct dri_extension card_extensions[] =
+{
+   { "GL_ARB_texture_rectangle",      NULL },
+   { "GL_EXT_blend_logic_op",         NULL },
+   { "GL_EXT_fog_coord",              GL_EXT_fog_coord_functions },
+   /* paletted_textures currently doesn't work, but we could fix them later */
+#if defined( need_GL_EXT_paletted_texture )
+   { "GL_EXT_shared_texture_palette", NULL },
+   { "GL_EXT_paletted_texture",       GL_EXT_paletted_texture_functions },
+#endif
+   { "GL_EXT_secondary_color",        GL_EXT_secondary_color_functions },
+   { "GL_EXT_stencil_wrap",           NULL },
+   { "GL_APPLE_vertex_array_object",  GL_APPLE_vertex_array_object_functions },
+   { "GL_MESA_ycbcr_texture",         NULL },
+   { "GL_SGIS_generate_mipmap",       NULL },
+   { NULL,                            NULL }
+};
+
+static const struct dri_extension ARB_vp_extensions[] = {
+   { "GL_ARB_vertex_program",         GL_ARB_vertex_program_functions },
+   { "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions },
+   { NULL,                            NULL }
+};
+
+static const struct dri_extension NV_vp_extensions[] = {
+   { "GL_NV_vertex_program",          GL_NV_vertex_program_functions },
+   { "GL_NV_vertex_program1_1",       NULL },
+   { NULL,                            NULL }
+};
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_VERBOSE_FALLBACK },
+    { "tex",   DEBUG_VERBOSE_TEXTURE },
+    { "ioctl", DEBUG_VERBOSE_IOCTL },
+    { "verb",  DEBUG_VERBOSE_MSG },
+    { "dri",   DEBUG_VERBOSE_DRI },
+    { NULL,    0 }
+};
+
+
+static GLboolean
+mgaCreateContext( gl_api api,
+		  const __GLcontextModes *mesaVis,
+                  __DRIcontext *driContextPriv,
+                  void *sharedContextPrivate )
+{
+   int i;
+   unsigned   maxlevels;
+   GLcontext *ctx, *shareCtx;
+   mgaContextPtr mmesa;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   mgaScreenPrivate *mgaScreen = (mgaScreenPrivate *)sPriv->private;
+   drm_mga_sarea_t *saPriv = (drm_mga_sarea_t *)(((char*)sPriv->pSAREA)+
+					      mgaScreen->sarea_priv_offset);
+   struct dd_function_table functions;
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_DRI)
+      fprintf(stderr, "mgaCreateContext\n");
+
+   /* allocate mga context */
+   mmesa = (mgaContextPtr) CALLOC(sizeof(mgaContext));
+   if (!mmesa) {
+      return GL_FALSE;
+   }
+
+   /* Init default driver functions then plug in our Radeon-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   mgaInitDriverFuncs( &functions );
+   mgaInitTextureFuncs( &functions );
+   mgaInitIoctlFuncs( &functions );
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((mgaContextPtr) sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+   mmesa->glCtx = _mesa_create_context(mesaVis, shareCtx,
+                                       &functions, (void *) mmesa);
+   if (!mmesa->glCtx) {
+      FREE(mmesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = mmesa;
+
+   /* Init mga state */
+   mmesa->hHWContext = driContextPriv->hHWContext;
+   mmesa->driFd = sPriv->fd;
+   mmesa->driHwLock = &sPriv->pSAREA->lock;
+
+   mmesa->mgaScreen = mgaScreen;
+   mmesa->driScreen = sPriv;
+   mmesa->sarea = (void *)saPriv;
+
+   /* Parse configuration files */
+   driParseConfigFiles (&mmesa->optionCache, &mgaScreen->optionCache,
+                        sPriv->myNum, "mga");
+
+   (void) memset( mmesa->texture_heaps, 0, sizeof( mmesa->texture_heaps ) );
+   make_empty_list( & mmesa->swapped );
+
+   mmesa->nr_heaps = mgaScreen->texVirtual[MGA_AGP_HEAP] ? 2 : 1;
+   for ( i = 0 ; i < mmesa->nr_heaps ; i++ ) {
+      mmesa->texture_heaps[i] = driCreateTextureHeap( i, mmesa,
+	    mgaScreen->textureSize[i],
+	    6,
+	    MGA_NR_TEX_REGIONS,
+	    (drmTextureRegionPtr)mmesa->sarea->texList[i],
+	    &mmesa->sarea->texAge[i],
+	    &mmesa->swapped,
+	    sizeof( mgaTextureObject_t ),
+	    (destroy_texture_object_t *) mgaDestroyTexObj );
+   }
+
+   /* Set the maximum texture size small enough that we can guarentee
+    * that both texture units can bind a maximal texture and have them
+    * on the card at once.
+    */
+   ctx = mmesa->glCtx;
+   if ( mgaScreen->chipset == MGA_CARD_TYPE_G200 ) {
+      ctx->Const.MaxTextureUnits = 1;
+      ctx->Const.MaxTextureImageUnits = 1;
+      ctx->Const.MaxTextureCoordUnits = 1;
+      maxlevels = G200_TEX_MAXLEVELS;
+
+   }
+   else {
+      ctx->Const.MaxTextureUnits = 2;
+      ctx->Const.MaxTextureImageUnits = 2;
+      ctx->Const.MaxTextureCoordUnits = 2;
+      maxlevels = G400_TEX_MAXLEVELS;
+   }
+
+   driCalculateMaxTextureLevels( mmesa->texture_heaps,
+				 mmesa->nr_heaps,
+				 & ctx->Const,
+				 4,
+				 11, /* max 2D texture size is 2048x2048 */
+				 0,  /* 3D textures unsupported. */
+				 0,  /* cube textures unsupported. */
+				 11, /* max texture rect size is 2048x2048 */
+				 maxlevels,
+				 GL_FALSE,
+				 0 );
+
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 10.0;
+   ctx->Const.MaxLineWidthAA = 10.0;
+   ctx->Const.LineWidthGranularity = 1.0;
+
+   ctx->Const.MaxDrawBuffers = 1;
+
+   mmesa->texture_depth = driQueryOptioni (&mmesa->optionCache,
+					   "texture_depth");
+   if (mmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+      mmesa->texture_depth = ( mesaVis->rgbBits >= 24 ) ?
+	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+   mmesa->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
+
+   switch (mesaVis->depthBits) {
+   case 16: 
+      mmesa->depth_scale = 1.0/(GLdouble)0xffff; 
+      mmesa->depth_clear_mask = ~0;
+      mmesa->ClearDepth = 0xffff;
+      break;
+   case 24:
+      mmesa->depth_scale = 1.0/(GLdouble)0xffffff;
+      if (mmesa->hw_stencil) {
+	 mmesa->depth_clear_mask = 0xffffff00;
+	 mmesa->stencil_clear_mask = 0x000000ff;
+      } else
+	 mmesa->depth_clear_mask = ~0;
+      mmesa->ClearDepth = 0xffffff00;
+      break;
+   case 32:
+      mmesa->depth_scale = 1.0/(GLdouble)0xffffffff;
+      mmesa->depth_clear_mask = ~0;
+      mmesa->ClearDepth = 0xffffffff;
+      break;
+   };
+
+   mmesa->haveHwStipple = GL_FALSE;
+   mmesa->RenderIndex = -1;		/* impossible value */
+   mmesa->dirty = ~0;
+   mmesa->vertex_format = 0;   
+   mmesa->CurrentTexObj[0] = 0;
+   mmesa->CurrentTexObj[1] = 0;
+   mmesa->tmu_source[0] = 0;
+   mmesa->tmu_source[1] = 1;
+
+   mmesa->texAge[0] = 0;
+   mmesa->texAge[1] = 0;
+   
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, mga_pipeline );
+
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+
+   mmesa->primary_offset = mmesa->mgaScreen->primary.handle;
+
+   ctx->DriverCtx = (void *) mmesa;
+   mmesa->glCtx = ctx;
+
+   driInitExtensions( ctx, card_extensions, GL_FALSE );
+
+   if (MGA_IS_G400(MGA_CONTEXT(ctx))) {
+      driInitExtensions( ctx, g400_extensions, GL_FALSE );
+   }
+
+   if ( driQueryOptionb( &mmesa->optionCache, "arb_vertex_program" ) ) {
+      driInitExtensions(ctx, ARB_vp_extensions, GL_FALSE);
+   }
+   
+   if ( driQueryOptionb( &mmesa->optionCache, "nv_vertex_program" ) ) {
+      driInitExtensions( ctx, NV_vp_extensions, GL_FALSE );
+   }
+
+	
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   mgaDDInitStateFuncs( ctx );
+   mgaDDInitSpanFuncs( ctx );
+   mgaDDInitPixelFuncs( ctx );
+   mgaDDInitTriFuncs( ctx );
+
+   mgaInitVB( ctx );
+   mgaInitState( mmesa );
+
+   driContextPriv->driverPrivate = (void *) mmesa;
+
+#if DO_DEBUG
+   MGA_DEBUG = driParseDebugString( getenv( "MGA_DEBUG" ),
+				    debug_control );
+#endif
+
+   (*sPriv->systemTime->getUST)( & mmesa->swap_ust );
+
+   if (driQueryOptionb(&mmesa->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(mmesa->glCtx, MGA_FALLBACK_DISABLE, 1);
+   }
+
+   return GL_TRUE;
+}
+
+static void
+mgaDestroyContext(__DRIcontext *driContextPriv)
+{
+   mgaContextPtr mmesa = (mgaContextPtr) driContextPriv->driverPrivate;
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_DRI)
+      fprintf( stderr, "[%s:%d] mgaDestroyContext start\n",
+	       __FILE__, __LINE__ );
+
+   assert(mmesa); /* should never be null */
+   if (mmesa) {
+      GLboolean   release_texture_heaps;
+
+
+      release_texture_heaps = (mmesa->glCtx->Shared->RefCount == 1);
+      _swsetup_DestroyContext( mmesa->glCtx );
+      _tnl_DestroyContext( mmesa->glCtx );
+      _vbo_DestroyContext( mmesa->glCtx );
+      _swrast_DestroyContext( mmesa->glCtx );
+
+      mgaFreeVB( mmesa->glCtx );
+
+      /* free the Mesa context */
+      mmesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(mmesa->glCtx);
+       
+      if ( release_texture_heaps ) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         int i;
+
+         for ( i = 0 ; i < mmesa->nr_heaps ; i++ ) {
+	    driDestroyTextureHeap( mmesa->texture_heaps[ i ] );
+	    mmesa->texture_heaps[ i ] = NULL;
+         }
+
+	 assert( is_empty_list( & mmesa->swapped ) );
+      }
+
+      /* free the option cache */
+      driDestroyOptionCache (&mmesa->optionCache);
+
+      FREE(mmesa);
+   }
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_DRI)
+      fprintf( stderr, "[%s:%d] mgaDestroyContext done\n",
+	       __FILE__, __LINE__ );
+}
+
+
+static GLboolean
+mgaCreateBuffer( __DRIscreen *driScrnPriv,
+                 __DRIdrawable *driDrawPriv,
+                 const __GLcontextModes *mesaVis,
+                 GLboolean isPixmap )
+{
+   mgaScreenPrivate *screen = (mgaScreenPrivate *) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      GLboolean swStencil = (mesaVis->stencilBits > 0 && 
+			     mesaVis->depthBits != 24);
+
+#if 0
+      driDrawPriv->driverPrivate = (void *) 
+         _mesa_create_framebuffer(mesaVis,
+                                  GL_FALSE,  /* software depth buffer? */
+                                  swStencil,
+                                  mesaVis->accumRedBits > 0,
+                                  mesaVis->alphaBits > 0 );
+#else
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->frontOffset, screen->frontPitch,
+                                 driDrawPriv);
+         mgaSetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->backOffset, screen->backPitch,
+                                 driDrawPriv);
+         mgaSetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z16,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         mgaSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         /* XXX is this right? */
+         if (mesaVis->stencilBits) {
+            driRenderbuffer *depthRb
+               = driNewRenderbuffer(MESA_FORMAT_Z24_S8,
+                                    NULL,
+                                    screen->cpp,
+                                    screen->depthOffset, screen->depthPitch,
+                                    driDrawPriv);
+            mgaSetSpanFunctions(depthRb, mesaVis);
+            _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+         }
+         else {
+            driRenderbuffer *depthRb
+               = driNewRenderbuffer(MESA_FORMAT_Z32,
+                                    NULL,
+                                    screen->cpp,
+                                    screen->depthOffset, screen->depthPitch,
+                                    driDrawPriv);
+            mgaSetSpanFunctions(depthRb, mesaVis);
+            _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+         }
+      }
+      else if (mesaVis->depthBits == 32) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z32,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         mgaSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      if (mesaVis->stencilBits > 0 && !swStencil) {
+         driRenderbuffer *stencilRb
+            = driNewRenderbuffer(MESA_FORMAT_S8,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         mgaSetSpanFunctions(stencilRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   swStencil,
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+#endif
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+mgaDestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+static void
+mgaSwapBuffers(__DRIdrawable *dPriv)
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      mgaContextPtr mmesa;
+      GLcontext *ctx;
+      mmesa = (mgaContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = mmesa->glCtx;
+
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );
+         mgaCopyBuffer( dPriv );
+      }
+   } else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!\n", __FUNCTION__);
+   }
+}
+
+static GLboolean
+mgaUnbindContext(__DRIcontext *driContextPriv)
+{
+   mgaContextPtr mmesa = (mgaContextPtr) driContextPriv->driverPrivate;
+   if (mmesa)
+      mmesa->dirty = ~0;
+
+   return GL_TRUE;
+}
+
+/* This looks buggy to me - the 'b' variable isn't used anywhere...
+ * Hmm - It seems that the drawable is already hooked in to
+ * driDrawablePriv.
+ *
+ * But why are we doing context initialization here???
+ */
+static GLboolean
+mgaMakeCurrent(__DRIcontext *driContextPriv,
+               __DRIdrawable *driDrawPriv,
+               __DRIdrawable *driReadPriv)
+{
+   if (driContextPriv) {
+      mgaContextPtr mmesa = (mgaContextPtr) driContextPriv->driverPrivate;
+
+      if (mmesa->driDrawable != driDrawPriv) {
+	 if (driDrawPriv->swap_interval == (unsigned)-1) {
+	    driDrawPriv->vblFlags = (mmesa->mgaScreen->irq == 0)
+	       ? VBLANK_FLAG_NO_IRQ
+	       : driGetDefaultVBlankFlags(&mmesa->optionCache);
+
+	    driDrawableInitVBlank( driDrawPriv );
+	 }
+
+	 mmesa->driDrawable = driDrawPriv;
+	 mmesa->dirty = ~0; 
+	 mmesa->dirty_cliprects = (MGA_FRONT|MGA_BACK); 
+      }
+
+      mmesa->driReadable = driReadPriv;
+
+      _mesa_make_current(mmesa->glCtx,
+                         (GLframebuffer *) driDrawPriv->driverPrivate,
+                         (GLframebuffer *) driReadPriv->driverPrivate);
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+
+   return GL_TRUE;
+}
+
+
+void mgaGetLock( mgaContextPtr mmesa, GLuint flags )
+{
+   __DRIdrawable *dPriv = mmesa->driDrawable;
+   drm_mga_sarea_t *sarea = mmesa->sarea;
+   int me = mmesa->hHWContext;
+   int i;
+
+   drmGetLock(mmesa->driFd, mmesa->hHWContext, flags);
+
+   DRI_VALIDATE_DRAWABLE_INFO( mmesa->driScreen, dPriv );
+   if (*(dPriv->pStamp) != mmesa->lastStamp) {
+      mmesa->lastStamp = *(dPriv->pStamp);
+      mmesa->SetupNewInputs |= VERT_BIT_POS;
+      mmesa->dirty_cliprects = (MGA_FRONT|MGA_BACK);
+      mgaUpdateRects( mmesa, (MGA_FRONT|MGA_BACK) );
+      driUpdateFramebufferSize(mmesa->glCtx, dPriv);
+   }
+
+   mmesa->dirty |= MGA_UPLOAD_CONTEXT | MGA_UPLOAD_CLIPRECTS;
+
+   mmesa->sarea->dirty |= MGA_UPLOAD_CONTEXT;
+
+   if (sarea->ctxOwner != me) {
+      mmesa->dirty |= (MGA_UPLOAD_CONTEXT | MGA_UPLOAD_TEX0 |
+		       MGA_UPLOAD_TEX1 | MGA_UPLOAD_PIPE);
+      sarea->ctxOwner=me;
+   }
+
+   for ( i = 0 ; i < mmesa->nr_heaps ; i++ ) {
+      DRI_AGE_TEXTURES( mmesa->texture_heaps[ i ] );
+   }
+}
+
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **mgaInitScreen(__DRIscreen *psp)
+{
+   static const __DRIversion ddx_expected = { 1, 2, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 3, 0, 0 };
+   MGADRIPtr dri_priv = (MGADRIPtr) psp->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions2( "MGA",
+				      &psp->dri_version, & dri_expected,
+				      &psp->ddx_version, & ddx_expected,
+				      &psp->drm_version, & drm_expected ) )
+      return NULL;
+
+
+   if (!mgaInitDriver(psp))
+       return NULL;
+
+   return mgaFillInModes( psp,
+			  dri_priv->cpp * 8,
+			  (dri_priv->cpp == 2) ? 16 : 24,
+			  (dri_priv->cpp == 2) ? 0  : 8,
+			  (dri_priv->backOffset != dri_priv->depthOffset) );
+}
+
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo )
+{
+   mgaContextPtr  mmesa;
+
+   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+	|| (dPriv->driContextPriv->driverPrivate == NULL)
+	|| (sInfo == NULL) ) {
+      return -1;
+   }
+
+   mmesa = (mgaContextPtr) dPriv->driContextPriv->driverPrivate;
+   sInfo->swap_count = mmesa->swap_count;
+   sInfo->swap_ust = mmesa->swap_ust;
+   sInfo->swap_missed_count = mmesa->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+       ? driCalculateSwapUsage( dPriv, 0, mmesa->swap_missed_ust )
+       : 0.0;
+
+   return 0;
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = mgaInitScreen,
+   .DestroyScreen   = mgaDestroyScreen,
+   .CreateContext   = mgaCreateContext,
+   .DestroyContext  = mgaDestroyContext,
+   .CreateBuffer    = mgaCreateBuffer,
+   .DestroyBuffer   = mgaDestroyBuffer,
+   .SwapBuffers     = mgaSwapBuffers,
+   .MakeCurrent     = mgaMakeCurrent,
+   .UnbindContext   = mgaUnbindContext,
+   .GetSwapInfo     = getSwapInfo,
+   .GetDrawableMSC  = driDrawableGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.h b/src/mesa/drivers/dri/mga/mga_xmesa.h
new file mode 100644
index 0000000000..aee146090c
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mga_xmesa.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef _MGA_INIT_H_
+#define _MGA_INIT_H_
+
+#include <sys/time.h>
+#include "dri_util.h"
+#include "mga_drm.h"
+#include "main/mtypes.h"
+#include "mgaregs.h"
+#include "xmlconfig.h"
+
+typedef struct mga_screen_private_s {
+   /**
+    * Chipset "family" of this card.  Currently only \c MGA_CARD_TYPE_G200 and
+    * \c MGA_CARD_TYPE_G400 are possible.
+    */
+   int chipset;
+
+
+   /**
+    * Characters (bytes) per-pixel for both the front and back buffers.
+    * 
+    * \note
+    * This is also implicitly the bytes per-pixel for the depth-buffer.
+    */
+   int cpp;
+
+   GLint agpMode;
+   unsigned int irq;		/**< IRQ number (0 means none) */
+
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+   unsigned int textureOffset[MGA_NR_TEX_HEAPS];
+   unsigned int textureSize[MGA_NR_TEX_HEAPS];
+   char *texVirtual[MGA_NR_TEX_HEAPS];
+
+
+   __DRIscreen *sPriv;
+   drmBufMapPtr  bufs;
+
+   drmRegion mmio;
+   drmRegion primary;
+   unsigned int sarea_priv_offset;
+
+   /** Configuration cache with default values for all contexts */
+   driOptionCache optionCache;
+} mgaScreenPrivate;
+
+
+/**
+ * mgaRenderbuffer, derived from Mesa's gl_renderbuffer
+ */
+typedef struct {
+   struct gl_renderbuffer Base;
+   /* XXX per-window info should go here */
+   int foo, bar;
+} mgaRenderbuffer;
+
+
+
+#include "mgacontext.h"
+
+extern void mgaGetLock( mgaContextPtr mmesa, GLuint flags );
+extern void mgaEmitHwStateLocked( mgaContextPtr mmesa );
+extern void mgaEmitScissorValues( mgaContextPtr mmesa, int box_nr, int emit );
+
+#define GET_DISPATCH_AGE( mmesa ) mmesa->sarea->last_dispatch
+
+
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( mmesa )					\
+  do {								\
+    char __ret=0;						\
+    DRM_CAS(mmesa->driHwLock, mmesa->hHWContext,		\
+	    (DRM_LOCK_HELD|mmesa->hHWContext), __ret);		\
+    if (__ret)							\
+        mgaGetLock( mmesa, 0 );					\
+  } while (0)
+
+
+/*
+ */
+#define LOCK_HARDWARE_QUIESCENT( mmesa ) do {	                        \
+	LOCK_HARDWARE( mmesa );			                        \
+	UPDATE_LOCK( mmesa, DRM_LOCK_QUIESCENT | DRM_LOCK_FLUSH );	\
+} while (0)
+
+
+/* Unlock the hardware using the global current context
+ */
+#define UNLOCK_HARDWARE(mmesa) 				\
+    DRM_UNLOCK(mmesa->driFd, mmesa->driHwLock, mmesa->hHWContext);
+
+
+/* Freshen our snapshot of the drawables
+ */
+#define REFRESH_DRAWABLE_INFO( mmesa )		\
+do {						\
+   LOCK_HARDWARE( mmesa );			\
+   mmesa->lastX = mmesa->drawX; 		\
+   mmesa->lastY = mmesa->drawY; 		\
+   UNLOCK_HARDWARE( mmesa );			\
+} while (0)
+
+
+#define GET_DRAWABLE_LOCK( mmesa ) while(0)
+#define RELEASE_DRAWABLE_LOCK( mmesa ) while(0)
+
+
+/* The 2D driver macros are busted -- we can't use them here as they
+ * rely on the 2D driver data structures rather than taking an explicit
+ * base address.
+ */
+#define MGA_BASE( reg )		((unsigned long)(mmesa->mgaScreen->mmio.map))
+#define MGA_ADDR( reg )		(MGA_BASE(reg) + reg)
+
+#define MGA_DEREF( reg )	*(volatile uint32_t *)MGA_ADDR( reg )
+#define MGA_READ( reg )		MGA_DEREF( reg )
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgacontext.h b/src/mesa/drivers/dri/mga/mgacontext.h
new file mode 100644
index 0000000000..4141565931
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgacontext.h
@@ -0,0 +1,359 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef MGALIB_INC
+#define MGALIB_INC
+
+#include <stdint.h>
+#include "drm.h"
+#include "mga_drm.h"
+#include "dri_util.h"
+#include "xf86drm.h"
+#include "main/mtypes.h"
+#include "main/mm.h"
+#include "main/colormac.h"
+#include "main/macros.h"
+#include "texmem.h"
+#include "xmlconfig.h"
+
+#define MGA_SET_FIELD(reg,mask,val)  reg = ((reg) & (mask)) | ((val) & ~(mask))
+#define MGA_FIELD(field,val) (((val) << (field ## _SHIFT)) & ~(field ## _MASK))
+#define MGA_GET_FIELD(field, val) ((val & ~(field ## _MASK)) >> (field ## _SHIFT))
+
+#define MGA_IS_G200(mmesa) (mmesa->mgaScreen->chipset == MGA_CARD_TYPE_G200)
+#define MGA_IS_G400(mmesa) (mmesa->mgaScreen->chipset == MGA_CARD_TYPE_G400)
+
+
+/* SoftwareFallback
+ *    - texture env GL_BLEND -- can be fixed
+ *    - 1D and 3D textures
+ *    - incomplete textures
+ *    - GL_DEPTH_FUNC == GL_NEVER not in h/w
+ */
+#define MGA_FALLBACK_TEXTURE        0x1
+#define MGA_FALLBACK_DRAW_BUFFER    0x2
+#define MGA_FALLBACK_READ_BUFFER    0x4
+#define MGA_FALLBACK_BLEND          0x8
+#define MGA_FALLBACK_RENDERMODE     0x10
+#define MGA_FALLBACK_STENCIL        0x20
+#define MGA_FALLBACK_DEPTH          0x40
+#define MGA_FALLBACK_BORDER_MODE    0x80
+#define MGA_FALLBACK_DISABLE        0x100
+
+
+/* Use the templated vertex formats:
+ */
+#define TAG(x) mga##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+typedef struct mga_context_t mgaContext;
+typedef struct mga_context_t *mgaContextPtr;
+
+typedef void (*mga_tri_func)( mgaContextPtr, mgaVertex *, mgaVertex *,
+			       mgaVertex * );
+typedef void (*mga_line_func)( mgaContextPtr, mgaVertex *, mgaVertex * );
+typedef void (*mga_point_func)( mgaContextPtr, mgaVertex * );
+
+
+
+/* Texture environment color
+ */
+#define RGB_ZERO(c)   (((c) & 0xffffff) == 0x000000)
+#define RGB_ONE(c)    (((c) & 0xffffff) == 0xffffff)
+#define ALPHA_ZERO(c) (((c) >> 24) == 0x00)
+#define ALPHA_ONE(c)  (((c) >> 24) == 0xff)
+#define RGBA_EQUAL(c) ((c) == PACK_COLOR_8888( (c) & 0xff, (c) & 0xff, \
+                                               (c) & 0xff, (c) & 0xff ))
+
+struct mga_texture_object_s;
+struct mga_screen_private_s;
+
+#define G200_TEX_MAXLEVELS 5
+#define G400_TEX_MAXLEVELS 11
+
+typedef struct mga_texture_object_s
+{
+   driTextureObject   base;
+
+   /* The G200 only has the ability to use 5 mipmap levels (including the
+    * base level).  The G400 does not have this restriction, but it still
+    * only has 5 offset pointers in the hardware.  The trick on the G400 is
+    * upto the first 4 offset pointers point to mipmap levels.  The last
+    * offset pointer tells how large the preceeding mipmap is.  This value is
+    * then used to determine where the remaining mipmaps are.
+    * 
+    * For example, if the first offsets[0] through offsets[2] are used as
+    * pointers, then offset[3] will be the size of the mipmap pointed to by
+    * offsets[2].  So mipmap level 3 will be at (offsets[2]+offsets[3]).  For
+    * each successive mipmap level, offsets[3] is divided by 4 and added to
+    * the previous address.  So mipmap level 4 will be at 
+    * (offsets[2]+offsets[3]+(offsets[3] / 4)).
+    * 
+    * The last pointer is selected by setting TO_texorgoffsetsel in its
+    * pointer.  In the previous example, offset[2] would have
+    * TO_texorgoffsetsel or'ed in before writing it to the hardware.
+    * 
+    * In the current driver all of the mipmaps are packed together linearly
+    * with mipmap level 0.  Therefore offsets[0] points to the base of the
+    * texture (and has TO_texorgoffsetsel or'ed in), and offsets[1] is the
+    * size of the base texture.
+    *
+    * There is a possible optimization available here.  At times the driver
+    * may not be able to allocate a single block of memory for the complete
+    * texture without ejecting some other textures from memory.  It may be
+    * possible to put some of the lower mipmap levels (i.e., the larger
+    * mipmaps) in memory separate from the higher levels.
+    *
+    * The implementation should be fairly obvious, but getting "right" would
+    * likely be non-trivial.  A first allocation for the entire texture would
+    * be attempted with a flag that says "don't eject other textures."  If
+    * that failed, an additional allocation would be attmpted for just the
+    * base map.  The process would repeat with the block of lower maps.  The
+    * tricky parts would be in detecting when some of the levels had been
+    * ejected from texture memory by other textures and preventing the
+    * 4th allocation (for all the smallest mipmap levels) from kicking out
+    * any of the first three.
+    * 
+    * This array holds G400_TEX_MAXLEVELS pointers to remove an if-statement
+    * in a loop in mgaSetTexImages.  Values past G200_TEX_MAXLEVELS are not
+    * used.
+    */
+   GLuint             offsets[G400_TEX_MAXLEVELS];
+
+   int                texelBytes;
+   GLuint             age;
+
+   drm_mga_texture_regs_t setup;
+
+   /* If one texture dimension wraps with GL_CLAMP and the other with
+    * GL_CLAMP_TO_EDGE, we have to fallback to software.  We would also have
+    * to fallback for GL_CLAMP_TO_BORDER.
+    */
+   GLboolean          border_fallback;
+   /* Depending on multitxturing and environment color
+    * GL_BLEND may have to be a software fallback.
+    */
+   GLboolean texenv_fallback;
+} mgaTextureObject_t;
+
+struct mga_hw_state {
+   GLuint   specen;
+   GLuint   cull;
+   GLuint   cull_dualtex;
+   GLuint   stencil;
+   GLuint   stencilctl;
+   GLuint   stencil_enable;
+   GLuint   zmode;
+   GLuint   rop;
+   GLuint   alpha_func;
+   GLuint   alpha_func_enable;
+   GLuint   blend_func;
+   GLuint   blend_func_enable;
+   GLuint   alpha_sel;
+};
+
+struct mga_context_t {
+
+   GLcontext *glCtx;
+   unsigned int lastStamp;		/* fullscreen breaks dpriv->laststamp,
+					 * need to shadow it here. */
+
+   /* Hardware state management
+    */
+   struct mga_hw_state hw;
+
+   /* Bookkeeping for texturing
+    */
+   unsigned           nr_heaps;
+   driTexHeap       * texture_heaps[ MGA_NR_TEX_HEAPS ];
+   driTextureObject   swapped;
+
+   struct mga_texture_object_s *CurrentTexObj[2];
+
+
+   /* Map GL texture units onto hardware.
+    */
+   GLuint tmu_source[2];
+   
+   int texture_depth;
+
+   /* Manage fallbacks
+    */
+   GLuint Fallback;  
+
+   /* Texture environment color.
+    */
+   unsigned int envcolor[2];
+   GLboolean fcol_used;
+   GLboolean force_dualtex;
+
+   /* Rasterization state 
+    */
+   GLuint SetupNewInputs;
+   GLuint SetupIndex;
+   GLuint RenderIndex;
+   
+   GLuint hw_primitive;
+   GLenum raster_primitive;
+   GLenum render_primitive;
+
+   GLubyte *verts;
+   GLint vertex_stride_shift;
+   GLuint vertex_format;		
+   GLuint vertex_size;
+
+   /* Fallback rasterization functions 
+    */
+   mga_point_func draw_point;
+   mga_line_func draw_line;
+   mga_tri_func draw_tri;
+
+
+   /* Manage driver and hardware state
+    */
+   GLuint        NewGLState; 
+   GLuint        dirty;
+
+   drm_mga_context_regs_t setup;
+
+   GLuint        ClearColor;
+   GLuint        ClearDepth;
+   GLuint        poly_stipple;
+   GLfloat       depth_scale;
+
+   GLuint        depth_clear_mask;
+   GLuint        stencil_clear_mask;
+   GLuint        hw_stencil;
+   GLuint        haveHwStipple;
+   GLfloat       hw_viewport[16];
+
+   /* Dma buffers
+    */
+   drmBufPtr  vertex_dma_buffer;
+   drmBufPtr  iload_buffer;
+
+   int64_t swap_ust;
+   int64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+
+   uint32_t last_frame_fence;
+
+   /* Drawable, cliprect and scissor information
+    */
+   int dirty_cliprects;		/* which sets of cliprects are uptodate? */
+   int draw_buffer;		/* which buffer are we rendering to */
+   unsigned int drawOffset;		/* draw buffer address in  space */
+   int readOffset;
+   int drawX, drawY;		/* origin of drawable in draw buffer */
+   int lastX, lastY;		/* detect DSTORG bug */
+   GLuint numClipRects;		/* cliprects for the draw buffer */
+   drm_clip_rect_t *pClipRects;
+   drm_clip_rect_t draw_rect;
+   drm_clip_rect_t scissor_rect;
+   int scissor;
+
+   drm_clip_rect_t tmp_boxes[2][MGA_NR_SAREA_CLIPRECTS];
+
+
+   /* Texture aging and DMA based aging.
+    */
+   unsigned int texAge[MGA_NR_TEX_HEAPS];/* texture LRU age  */
+   unsigned int dirtyAge;		/* buffer age for synchronization */
+
+   GLuint primary_offset;
+
+   /* Mirrors of some DRI state.
+    */
+   drm_context_t hHWContext;
+   drm_hw_lock_t *driHwLock;
+   int driFd;
+   __DRIdrawable *driDrawable;
+   __DRIdrawable *driReadable;
+
+   __DRIscreen *driScreen;
+   struct mga_screen_private_s *mgaScreen;
+   drm_mga_sarea_t *sarea;
+
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+};
+
+#define MGA_CONTEXT(ctx) ((mgaContextPtr)(ctx->DriverCtx))
+
+
+
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG		1
+
+#if DO_DEBUG
+extern int MGA_DEBUG;
+#else
+#define MGA_DEBUG		0
+#endif
+
+#define DEBUG_VERBOSE_MSG	0x01
+#define DEBUG_VERBOSE_DRI	0x02
+#define DEBUG_VERBOSE_IOCTL	0x04
+#define DEBUG_VERBOSE_TEXTURE   0x08
+#define DEBUG_VERBOSE_FALLBACK	0x10
+
+static INLINE GLuint mgaPackColor(GLuint cpp,
+                                  GLubyte r, GLubyte g,
+                                  GLubyte b, GLubyte a)
+{
+   switch (cpp) {
+   case 2:
+      return PACK_COLOR_565( r, g, b );
+   case 4:
+      return PACK_COLOR_8888( a, r, g, b );
+   default:
+      return 0;
+   }
+}
+
+
+/*
+ * Subpixel offsets for window coordinates:
+ */
+#define SUBPIXEL_X (-0.5F)
+#define SUBPIXEL_Y (-0.5F + 0.125)
+
+
+#define MGA_WA_TRIANGLES     0x18000000
+#define MGA_WA_TRISTRIP_T0   0x02010200
+#define MGA_WA_TRIFAN_T0     0x01000408
+#define MGA_WA_TRISTRIP_T0T1 0x02010400
+#define MGA_WA_TRIFAN_T0T1   0x01000810
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgadd.c b/src/mesa/drivers/dri/mga/mgadd.c
new file mode 100644
index 0000000000..2f23c0e514
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgadd.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "main/mtypes.h"
+#include "main/framebuffer.h"
+#include "main/mm.h"
+
+#include "mgacontext.h"
+#include "mgadd.h"
+#include "mga_xmesa.h"
+#include "utils.h"
+
+#define DRIVER_DATE	"20071017"
+
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+
+static const GLubyte *mgaGetString( GLcontext *ctx, GLenum name )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   static char buffer[128];
+   unsigned   offset;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte *) "VA Linux Systems Inc.";
+
+   case GL_RENDERER:
+      offset = driGetRendererString( buffer, 
+				     MGA_IS_G400(mmesa) ? "G400" :
+				     MGA_IS_G200(mmesa) ? "G200" : "MGA",
+				     DRIVER_DATE,
+				     mmesa->mgaScreen->agpMode );
+
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+
+void mgaInitDriverFuncs( struct dd_function_table *functions )
+{
+   functions->GetString = mgaGetString;
+}
diff --git a/src/mesa/drivers/dri/mga/mgadd.h b/src/mesa/drivers/dri/mga/mgadd.h
new file mode 100644
index 0000000000..f92591df45
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgadd.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef MGADD_INC
+#define MGADD_INC
+
+#include "main/context.h"
+
+extern void mgaInitDriverFuncs( struct dd_function_table *functions );
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgaioctl.c b/src/mesa/drivers/dri/mga/mgaioctl.c
new file mode 100644
index 0000000000..259358eaa3
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgaioctl.c
@@ -0,0 +1,747 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file mgaioctl.c
+ * MGA IOCTL related wrapper functions.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Gareth Hughes <gareth@valinux.com>
+ */
+
+#include <errno.h>
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/dd.h"
+#include "swrast/swrast.h"
+
+#include "main/mm.h"
+#include "drm.h"
+#include "mga_drm.h"
+#include "mgacontext.h"
+#include "mgadd.h"
+#include "mgastate.h"
+#include "mgaioctl.h"
+
+#include "vblank.h"
+
+
+static int
+mgaSetFence( mgaContextPtr mmesa, uint32_t * fence )
+{
+    int ret = ENOSYS;
+
+    if ( mmesa->driScreen->drm_version.minor >= 2 ) {
+	ret = drmCommandWriteRead( mmesa->driScreen->fd, DRM_MGA_SET_FENCE,
+				   fence, sizeof( uint32_t ));
+	if (ret) {
+	    fprintf(stderr, "drmMgaSetFence: %d\n", ret);
+	    exit(1);
+	}
+    }
+
+    return ret;
+}
+
+
+static int
+mgaWaitFence( mgaContextPtr mmesa, uint32_t fence, uint32_t * curr_fence )
+{
+    int ret = ENOSYS;
+
+    if ( mmesa->driScreen->drm_version.minor >= 2 ) {
+	uint32_t temp = fence;
+	
+	ret = drmCommandWriteRead( mmesa->driScreen->fd,
+				   DRM_MGA_WAIT_FENCE,
+				   & temp, sizeof( uint32_t ));
+	if (ret) {
+	   fprintf(stderr, "drmMgaSetFence: %d\n", ret);
+	    exit(1);
+	}
+
+	if ( curr_fence ) {
+	    *curr_fence = temp;
+	}
+    }
+
+    return ret;
+}
+
+
+static void mga_iload_dma_ioctl(mgaContextPtr mmesa,
+				unsigned long dest,
+				int length)
+{
+   drmBufPtr buf = mmesa->iload_buffer;
+   drm_mga_iload_t iload;
+   int ret, i;
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr, "DRM_IOCTL_MGA_ILOAD idx %d dst %x length %d\n",
+	      buf->idx, (int) dest, length);
+
+   if ( (length & MGA_ILOAD_MASK) != 0 ) {
+      UNLOCK_HARDWARE( mmesa );
+      fprintf( stderr, "%s: Invalid ILOAD datasize (%d), must be "
+	       "multiple of %u.\n", __FUNCTION__, length, MGA_ILOAD_ALIGN );
+      exit( 1 );
+   }
+
+   iload.idx = buf->idx;
+   iload.dstorg = dest;
+   iload.length = length;
+
+   i = 0;
+   do {
+      ret = drmCommandWrite( mmesa->driFd, DRM_MGA_ILOAD, 
+                             &iload, sizeof(iload) );
+   } while ( ret == -EBUSY && i++ < DRM_MGA_IDLE_RETRY );
+
+   if ( ret < 0 ) {
+      printf("send iload retcode = %d\n", ret);
+      exit(1);
+   }
+
+   mmesa->iload_buffer = 0;
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr, "finished iload dma put\n");
+
+}
+
+drmBufPtr mga_get_buffer_ioctl( mgaContextPtr mmesa )
+{
+   int idx = 0;
+   int size = 0;
+   drmDMAReq dma;
+   int retcode;
+   drmBufPtr buf;
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr,  "Getting dma buffer\n");
+
+   dma.context = mmesa->hHWContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = MGA_BUFFER_SIZE;
+   dma.request_list = &idx;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr, "drmDMA (get) ctx %d count %d size 0x%x\n",
+	   dma.context, dma.request_count,
+	   dma.request_size);
+
+   while (1) {
+      retcode = drmDMA(mmesa->driFd, &dma);
+
+      if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+	 fprintf(stderr, "retcode %d sz %d idx %d count %d\n",
+		 retcode,
+		 dma.request_sizes[0],
+		 dma.request_list[0],
+		 dma.granted_count);
+
+      if (retcode == 0 &&
+	  dma.request_sizes[0] &&
+	  dma.granted_count)
+	 break;
+
+      if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+	 fprintf(stderr, "\n\nflush");
+
+      UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH | DRM_LOCK_QUIESCENT );
+   }
+
+   buf = &(mmesa->mgaScreen->bufs->list[idx]);
+   buf->used = 0;
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr,
+	   "drmDMA (get) returns size[0] 0x%x idx[0] %d\n"
+	   "dma_buffer now: buf idx: %d size: %d used: %d addr %p\n",
+	   dma.request_sizes[0], dma.request_list[0],
+	   buf->idx, buf->total,
+	   buf->used, buf->address);
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr, "finished getbuffer\n");
+
+   return buf;
+}
+
+
+
+
+static void
+mgaClear( GLcontext *ctx, GLbitfield mask )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   __DRIdrawable *dPriv = mmesa->driDrawable;
+   GLuint flags = 0;
+   GLuint clear_color = mmesa->ClearColor;
+   GLuint clear_depth = 0;
+   GLuint color_mask = 0;
+   GLuint depth_mask = 0;
+   int ret;
+   int i;
+   static int nrclears;
+   drm_mga_clear_t clear;
+   GLint cx, cy, cw, ch;
+
+   FLUSH_BATCH( mmesa );
+
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+      flags |= MGA_FRONT;
+      color_mask = mmesa->setup.plnwt;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+      flags |= MGA_BACK;
+      color_mask = mmesa->setup.plnwt;
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if ( (mask & BUFFER_BIT_DEPTH) && ctx->Depth.Mask ) {
+      flags |= MGA_DEPTH;
+      clear_depth = (mmesa->ClearDepth & mmesa->depth_clear_mask);
+      depth_mask |= mmesa->depth_clear_mask;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   if ( (mask & BUFFER_BIT_STENCIL) && mmesa->hw_stencil ) {
+      flags |= MGA_DEPTH;
+      clear_depth |= (ctx->Stencil.Clear & mmesa->stencil_clear_mask);
+      depth_mask |= mmesa->stencil_clear_mask;
+      mask &= ~BUFFER_BIT_STENCIL;
+   }
+
+   if ( flags ) {
+      LOCK_HARDWARE( mmesa );
+
+      /* compute region after locking: */
+      cx = ctx->DrawBuffer->_Xmin;
+      cy = ctx->DrawBuffer->_Ymin;
+      cw = ctx->DrawBuffer->_Xmax - cx;
+      ch = ctx->DrawBuffer->_Ymax - cy;
+
+      if ( mmesa->dirty_cliprects )
+	 mgaUpdateRects( mmesa, (MGA_FRONT | MGA_BACK) );
+
+      /* flip top to bottom */
+      cy = dPriv->h-cy-ch;
+      cx += mmesa->drawX;
+      cy += mmesa->drawY;
+
+      if ( MGA_DEBUG & DEBUG_VERBOSE_IOCTL )
+	 fprintf( stderr, "Clear, bufs %x nbox %d\n",
+		  (int)flags, (int)mmesa->numClipRects );
+
+      for (i = 0 ; i < mmesa->numClipRects ; )
+      {
+	 int nr = MIN2(i + MGA_NR_SAREA_CLIPRECTS, mmesa->numClipRects);
+	 drm_clip_rect_t *box = mmesa->pClipRects;
+	 drm_clip_rect_t *b = mmesa->sarea->boxes;
+	 int n = 0;
+
+	 if (cw != dPriv->w || ch != dPriv->h) {
+            /* clear subregion */
+	    for ( ; i < nr ; i++) {
+	       GLint x = box[i].x1;
+	       GLint y = box[i].y1;
+	       GLint w = box[i].x2 - x;
+	       GLint h = box[i].y2 - y;
+
+	       if (x < cx) w -= cx - x, x = cx;
+	       if (y < cy) h -= cy - y, y = cy;
+	       if (x + w > cx + cw) w = cx + cw - x;
+	       if (y + h > cy + ch) h = cy + ch - y;
+	       if (w <= 0) continue;
+	       if (h <= 0) continue;
+
+	       b->x1 = x;
+	       b->y1 = y;
+	       b->x2 = x + w;
+	       b->y2 = y + h;
+	       b++;
+	       n++;
+	    }
+	 } else {
+            /* clear whole window */
+	    for ( ; i < nr ; i++) {
+	       *b++ = box[i];
+	       n++;
+	    }
+	 }
+
+
+	 if ( MGA_DEBUG & DEBUG_VERBOSE_IOCTL )
+	    fprintf( stderr,
+		     "DRM_IOCTL_MGA_CLEAR flag 0x%x color %x depth %x nbox %d\n",
+		     flags, clear_color, clear_depth, mmesa->sarea->nbox );
+
+	 mmesa->sarea->nbox = n;
+
+         clear.flags = flags;
+         clear.clear_color = clear_color;
+         clear.clear_depth = clear_depth;
+         clear.color_mask = color_mask;
+         clear.depth_mask = depth_mask;
+         ret = drmCommandWrite( mmesa->driFd, DRM_MGA_CLEAR,
+                                 &clear, sizeof(clear));
+	 if ( ret ) {
+	    fprintf( stderr, "send clear retcode = %d\n", ret );
+	    exit( 1 );
+	 }
+	 if ( MGA_DEBUG & DEBUG_VERBOSE_IOCTL )
+	    fprintf( stderr, "finished clear %d\n", ++nrclears );
+      }
+
+      UNLOCK_HARDWARE( mmesa );
+      mmesa->dirty |= MGA_UPLOAD_CLIPRECTS|MGA_UPLOAD_CONTEXT;
+   }
+
+   if (mask) 
+      _swrast_Clear( ctx, mask );
+}
+
+
+/**
+ * Wait for the previous frame of rendering has completed.
+ * 
+ * \param mmesa  Hardware context pointer.
+ *
+ * \bug
+ * The loop in this function should have some sort of a timeout mechanism.
+ *
+ * \warning
+ * This routine used to assume that the hardware lock was held on entry.  It
+ * now assumes that the lock is \b not held on entry.
+ */
+
+static void mgaWaitForFrameCompletion( mgaContextPtr mmesa )
+{
+    if ( mgaWaitFence( mmesa, mmesa->last_frame_fence, NULL ) == ENOSYS ) {
+	unsigned wait = 0;
+	GLuint last_frame;
+	GLuint last_wrap;
+
+
+	LOCK_HARDWARE( mmesa );
+	last_frame = mmesa->sarea->last_frame.head;
+	last_wrap = mmesa->sarea->last_frame.wrap;
+
+	/* The DMA routines in the kernel track a couple values in the SAREA
+	 * that we use here.  The number of times that the primary DMA buffer
+	 * has "wrapped" around is tracked in last_wrap.  In addition, the
+	 * wrap count and the buffer position at the end of the last frame are
+	 * stored in last_frame.wrap and last_frame.head.
+	 * 
+	 * By comparing the wrap counts and the current DMA pointer value
+	 * (read directly from the hardware) to last_frame.head, we can
+	 * determine when the graphics processor has processed all of the
+	 * commands for the last frame.
+	 * 
+	 * In this case "last frame" means the frame of the *previous* swap-
+	 * buffers call.  This is done to prevent queuing a second buffer swap
+	 * before the previous swap is executed.
+	 */
+	while ( 1 ) {
+	    if ( last_wrap < mmesa->sarea->last_wrap ||
+		 ( last_wrap == mmesa->sarea->last_wrap &&
+		   last_frame <= (MGA_READ( MGAREG_PRIMADDRESS ) -
+				  mmesa->primary_offset) ) ) {
+		break;
+	    }
+	    if ( 0 ) {
+		wait++;
+		fprintf( stderr, "   last: head=0x%06x wrap=%d\n",
+			 last_frame, last_wrap );
+		fprintf( stderr, "   head: head=0x%06lx wrap=%d\n",
+			 (long)(MGA_READ( MGAREG_PRIMADDRESS ) - mmesa->primary_offset),
+			 mmesa->sarea->last_wrap );
+	    }
+	    UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH );
+
+	    UNLOCK_HARDWARE( mmesa );
+	    DO_USLEEP( 1 );
+	    LOCK_HARDWARE( mmesa );
+	}
+	if ( wait )
+	  fprintf( stderr, "\n" );
+
+	UNLOCK_HARDWARE( mmesa );
+    }
+}
+
+
+/*
+ * Copy the back buffer to the front buffer.
+ */
+void mgaCopyBuffer( __DRIdrawable *dPriv )
+{
+   mgaContextPtr mmesa;
+   drm_clip_rect_t *pbox;
+   GLint nbox;
+   GLint ret;
+   GLint i;
+   GLboolean   missed_target;
+   __DRIscreen *psp = dPriv->driScreenPriv;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   mmesa = (mgaContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   FLUSH_BATCH( mmesa );
+
+   mgaWaitForFrameCompletion( mmesa );
+   driWaitForVBlank( dPriv, & missed_target );
+   if ( missed_target ) {
+      mmesa->swap_missed_count++;
+      (void) (*psp->systemTime->getUST)( & mmesa->swap_missed_ust );
+   }
+   LOCK_HARDWARE( mmesa );
+
+   /* Use the frontbuffer cliprects
+    */
+   if (mmesa->dirty_cliprects & MGA_FRONT)
+      mgaUpdateRects( mmesa, MGA_FRONT );
+
+
+   pbox = dPriv->pClipRects;
+   nbox = dPriv->numClipRects;
+
+   for (i = 0 ; i < nbox ; )
+   {
+      int nr = MIN2(i + MGA_NR_SAREA_CLIPRECTS, dPriv->numClipRects);
+      drm_clip_rect_t *b = mmesa->sarea->boxes;
+
+      mmesa->sarea->nbox = nr - i;
+
+      for ( ; i < nr ; i++)
+	 *b++ = pbox[i];
+
+      if (0)
+	 fprintf(stderr, "DRM_IOCTL_MGA_SWAP\n");
+
+      ret = drmCommandNone( mmesa->driFd, DRM_MGA_SWAP );
+      if ( ret ) {
+	 printf("send swap retcode = %d\n", ret);
+	 exit(1);
+      }
+   }
+
+   (void) mgaSetFence( mmesa, & mmesa->last_frame_fence );
+   UNLOCK_HARDWARE( mmesa );
+
+   mmesa->dirty |= MGA_UPLOAD_CLIPRECTS;
+   mmesa->swap_count++;
+   (void) (*psp->systemTime->getUST)( & mmesa->swap_ust );
+}
+
+
+/**
+ * Implement the hardware-specific portion of \c glFinish.
+ *
+ * Flushes all pending commands to the hardware and wait for them to finish.
+ * 
+ * \param ctx  Context where the \c glFinish command was issued.
+ *
+ * \sa glFinish, mgaFlush, mgaFlushDMA
+ */
+static void mgaFinish( GLcontext *ctx  )
+{
+    mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+    uint32_t  fence;
+
+
+    LOCK_HARDWARE( mmesa );
+    if ( mmesa->vertex_dma_buffer != NULL ) {
+	mgaFlushVerticesLocked( mmesa );
+    }
+
+    if ( mgaSetFence( mmesa, & fence ) == 0 ) {
+	UNLOCK_HARDWARE( mmesa );
+	(void) mgaWaitFence( mmesa, fence, NULL );
+    }
+    else {
+	if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL) {
+	    fprintf(stderr, "mgaRegetLockQuiescent\n");
+	}
+
+	UPDATE_LOCK( mmesa, DRM_LOCK_QUIESCENT | DRM_LOCK_FLUSH );
+	UNLOCK_HARDWARE( mmesa );
+    }
+}
+
+
+/**
+ * Flush all commands upto at least a certain point to the hardware.
+ *
+ * \note
+ * The term "wait" in the name of this function is misleading.  It doesn't
+ * actually wait for anything.  It just makes sure that the commands have
+ * been flushed to the hardware.
+ *
+ * \warning
+ * As the name implies, this function assumes that the hardware lock is
+ * held on entry.
+ */
+void mgaWaitAgeLocked( mgaContextPtr mmesa, int age  )
+{
+   if (GET_DISPATCH_AGE(mmesa) < age) {
+      UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH );
+   }
+}
+
+
+static GLboolean intersect_rect( drm_clip_rect_t *out,
+				 const drm_clip_rect_t *a,
+				 const drm_clip_rect_t *b )
+{
+   *out = *a;
+   if (b->x1 > out->x1) out->x1 = b->x1;
+   if (b->y1 > out->y1) out->y1 = b->y1;
+   if (b->x2 < out->x2) out->x2 = b->x2;
+   if (b->y2 < out->y2) out->y2 = b->y2;
+
+   return ((out->x1 < out->x2) && (out->y1 < out->y2));
+}
+
+
+
+
+static void age_mmesa( mgaContextPtr mmesa, int age )
+{
+   if (mmesa->CurrentTexObj[0]) mmesa->CurrentTexObj[0]->age = age;
+   if (mmesa->CurrentTexObj[1]) mmesa->CurrentTexObj[1]->age = age;
+}
+
+
+void mgaFlushVerticesLocked( mgaContextPtr mmesa )
+{
+   drm_clip_rect_t *pbox = mmesa->pClipRects;
+   int nbox = mmesa->numClipRects;
+   drmBufPtr buffer = mmesa->vertex_dma_buffer;
+   drm_mga_vertex_t vertex;
+   int i;
+
+   mmesa->vertex_dma_buffer = 0;
+
+   if (!buffer)
+      return;
+
+   if (mmesa->dirty_cliprects & mmesa->draw_buffer)
+      mgaUpdateRects( mmesa, mmesa->draw_buffer );
+
+   if (mmesa->dirty & ~MGA_UPLOAD_CLIPRECTS)
+      mgaEmitHwStateLocked( mmesa );
+
+   /* FIXME: Workaround bug in kernel module.
+    */
+   mmesa->sarea->dirty |= MGA_UPLOAD_CONTEXT;
+
+   if (!nbox)
+      buffer->used = 0;
+
+   if (nbox >= MGA_NR_SAREA_CLIPRECTS)
+      mmesa->dirty |= MGA_UPLOAD_CLIPRECTS;
+
+#if 0
+   if (!buffer->used || !(mmesa->dirty & MGA_UPLOAD_CLIPRECTS))
+   {
+      if (nbox == 1)
+	 mmesa->sarea->nbox = 0;
+      else
+	 mmesa->sarea->nbox = nbox;
+
+      if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+	 fprintf(stderr, "Firing vertex -- case a nbox %d\n", nbox);
+
+      vertex.idx = buffer->idx;
+      vertex.used = buffer->used;
+      vertex.discard = 1;
+      drmCommandWrite( mmesa->driFd, DRM_MGA_VERTEX, 
+                       &vertex, sizeof(drmMGAVertex) );
+
+      age_mmesa(mmesa, mmesa->sarea->last_enqueue);
+   }
+   else
+#endif
+   {
+      for (i = 0 ; i < nbox ; )
+      {
+	 int nr = MIN2(i + MGA_NR_SAREA_CLIPRECTS, nbox);
+	 drm_clip_rect_t *b = mmesa->sarea->boxes;
+	 int discard = 0;
+
+	 if (mmesa->scissor) {
+	    mmesa->sarea->nbox = 0;
+
+	    for ( ; i < nr ; i++) {
+	       *b = pbox[i];
+	       if (intersect_rect(b, b, &mmesa->scissor_rect)) {
+		  mmesa->sarea->nbox++;
+		  b++;
+	       }
+	    }
+
+	    /* Culled?
+	     */
+	    if (!mmesa->sarea->nbox) {
+	       if (nr < nbox) continue;
+	       buffer->used = 0;
+	    }
+	 } else {
+	    mmesa->sarea->nbox = nr - i;
+	    for ( ; i < nr ; i++)
+	       *b++ = pbox[i];
+	 }
+
+	 /* Finished with the buffer?
+	  */
+	 if (nr == nbox)
+	    discard = 1;
+
+	 mmesa->sarea->dirty |= MGA_UPLOAD_CLIPRECTS;
+
+         vertex.idx = buffer->idx;
+         vertex.used = buffer->used;
+         vertex.discard = discard;
+         drmCommandWrite( mmesa->driFd, DRM_MGA_VERTEX,
+                          &vertex, sizeof(vertex) );
+
+	 age_mmesa(mmesa, mmesa->sarea->last_enqueue);
+      }
+   }
+
+   mmesa->dirty &= ~MGA_UPLOAD_CLIPRECTS;
+}
+
+void mgaFlushVertices( mgaContextPtr mmesa )
+{
+   LOCK_HARDWARE( mmesa );
+   mgaFlushVerticesLocked( mmesa );
+   UNLOCK_HARDWARE( mmesa );
+}
+
+
+void mgaFireILoadLocked( mgaContextPtr mmesa,
+			 GLuint offset, GLuint length )
+{
+   if (!mmesa->iload_buffer) {
+      fprintf(stderr, "mgaFireILoad: no buffer\n");
+      return;
+   }
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr, "mgaFireILoad idx %d ofs 0x%x length %d\n",
+	      mmesa->iload_buffer->idx, (int)offset, (int)length );
+
+   mga_iload_dma_ioctl( mmesa, offset, length );
+}
+
+void mgaGetILoadBufferLocked( mgaContextPtr mmesa )
+{
+   if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)
+      fprintf(stderr, "mgaGetIloadBuffer (buffer now %p)\n",
+              (void *) mmesa->iload_buffer);
+
+   mmesa->iload_buffer = mga_get_buffer_ioctl( mmesa );
+}
+
+
+/**
+ * Implement the hardware-specific portion of \c glFlush.
+ *
+ * \param ctx  Context to be flushed.
+ *
+ * \sa glFlush, mgaFinish, mgaFlushDMA
+ */
+static void mgaFlush( GLcontext *ctx )
+{
+    mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+
+
+    LOCK_HARDWARE( mmesa );
+    if ( mmesa->vertex_dma_buffer != NULL ) {
+	mgaFlushVerticesLocked( mmesa );
+    }
+
+    UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH );
+    UNLOCK_HARDWARE( mmesa );
+}
+
+
+int mgaFlushDMA( int fd, drmLockFlags flags )
+{
+   drm_lock_t lock;
+   int ret, i = 0;
+
+   memset( &lock, 0, sizeof(lock) );
+
+   lock.flags = flags & (DRM_LOCK_QUIESCENT | DRM_LOCK_FLUSH 
+			 | DRM_LOCK_FLUSH_ALL);
+
+   do {
+      ret = drmCommandWrite( fd, DRM_MGA_FLUSH, &lock, sizeof(lock) );
+   } while ( ret && errno == EBUSY && i++ < DRM_MGA_IDLE_RETRY );
+
+   if ( ret == 0 )
+      return 0;
+   if ( errno != EBUSY )
+      return -errno;
+
+   if ( lock.flags & DRM_LOCK_QUIESCENT ) {
+      /* Only keep trying if we need quiescence.
+       */
+      lock.flags &= ~(DRM_LOCK_FLUSH | DRM_LOCK_FLUSH_ALL);
+
+      do {
+         ret = drmCommandWrite( fd, DRM_MGA_FLUSH, &lock, sizeof(lock) );
+      } while ( ret && errno == EBUSY && i++ < DRM_MGA_IDLE_RETRY );
+   }
+
+   if ( ret == 0 ) {
+      return 0;
+   } else {
+      return -errno;
+   }
+}
+
+void mgaInitIoctlFuncs( struct dd_function_table *functions )
+{
+   functions->Clear = mgaClear;
+   functions->Flush = mgaFlush;
+   functions->Finish = mgaFinish;
+}
diff --git a/src/mesa/drivers/dri/mga/mgaioctl.h b/src/mesa/drivers/dri/mga/mgaioctl.h
new file mode 100644
index 0000000000..7a8660d203
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgaioctl.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ *    Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef MGA_IOCTL_H
+#define MGA_IOCTL_H
+
+#include "mgacontext.h"
+#include "mga_xmesa.h"
+
+void mgaCopyBuffer( __DRIdrawable *dPriv );
+void mgaWaitForVBlank( mgaContextPtr mmesa );
+
+void mgaGetILoadBufferLocked( mgaContextPtr mmesa );
+void mgaFireILoadLocked( mgaContextPtr mmesa,
+			 GLuint offset, GLuint length );
+
+void mgaWaitAgeLocked( mgaContextPtr mmesa, int age );
+
+void mgaFlushVertices( mgaContextPtr mmesa );
+void mgaFlushVerticesLocked( mgaContextPtr mmesa );
+int mgaFlushDMA( int fd, drmLockFlags flags );
+
+void mgaInitIoctlFuncs( struct dd_function_table *functions );
+
+#define FLUSH_BATCH(mmesa) do {						\
+        if (MGA_DEBUG&DEBUG_VERBOSE_IOCTL)  				\
+              fprintf(stderr, "FLUSH_BATCH in %s\n", __FUNCTION__);	\
+	if (mmesa->vertex_dma_buffer) mgaFlushVertices(mmesa);		\
+} while (0)
+
+#define MGA_STATECHANGE(mmesa, flag) do {	\
+   FLUSH_BATCH(mmesa);				\
+   mmesa->dirty |= flag;			\
+} while (0)
+
+
+extern drmBufPtr mga_get_buffer_ioctl( mgaContextPtr mmesa );
+
+static INLINE
+GLuint *mgaAllocDmaLow( mgaContextPtr mmesa, int bytes )
+{
+   GLuint *head;
+
+   /* If there is no DMA buffer currently allocated or the currently
+    * allocated DMA buffer doesn't have enough room left for this request,
+    * a new buffer will need to be allocated.
+    */
+   if ( (mmesa->vertex_dma_buffer == NULL)
+	|| ((mmesa->vertex_dma_buffer->used + bytes) 
+	    > mmesa->vertex_dma_buffer->total) ) {
+      LOCK_HARDWARE( mmesa );
+
+      /* In the case where the existing buffer does not have enough room,
+       * we need to flush it out to the hardware.
+       */
+      if ( mmesa->vertex_dma_buffer != NULL ) {
+	 mgaFlushVerticesLocked( mmesa );
+      }
+	   
+      mmesa->vertex_dma_buffer = mga_get_buffer_ioctl( mmesa );
+      UNLOCK_HARDWARE( mmesa );
+   }
+
+   head = (GLuint *)((char *)mmesa->vertex_dma_buffer->address +
+		      mmesa->vertex_dma_buffer->used);
+
+   mmesa->vertex_dma_buffer->used += bytes;
+   return head;
+}
+
+
+#define UPDATE_LOCK( mmesa, flags )					\
+do {									\
+   GLint ret = mgaFlushDMA( mmesa->driFd, flags );			\
+   if ( ret < 0 ) {							\
+      drmCommandNone( mmesa->driFd, DRM_MGA_RESET );			\
+      UNLOCK_HARDWARE( mmesa );						\
+      fprintf( stderr, "%s: flush return = %s (%d), flags = 0x%08x\n",	\
+	       __FUNCTION__, strerror( -ret ), -ret,			\
+	       (unsigned)(flags) );					\
+      exit( 1 );							\
+   }									\
+} while (0)
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgapixel.c b/src/mesa/drivers/dri/mga/mgapixel.c
new file mode 100644
index 0000000000..664f7c77c2
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgapixel.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright 2000 Compaq Computer Inc. and VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file mgapixel.c
+ * Implement framebuffer pixel operations for MGA.
+ *
+ * \todo
+ * Someday the accelerated \c glReadPixels and \c glDrawPixels paths need to
+ * be resurrected.  They are currently ifdef'ed out because they don't seem
+ * to work and they only get activated some very rare circumstances.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Gareth Hughes <gareth@valinux.com>
+ */
+
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "mgadd.h"
+#include "mgacontext.h"
+#include "mgaioctl.h"
+#include "mgapixel.h"
+#include "mgastate.h"
+
+#include "swrast/swrast.h"
+#include "main/imports.h"
+
+#if 0
+#define IS_AGP_MEM( mmesa, p )						  \
+   ((unsigned long)mmesa->mgaScreen->buffers.map <= ((unsigned long)p) && \
+    (unsigned long)mmesa->mgaScreen->buffers.map +			  \
+    (unsigned long)mmesa->mgaScreen->buffers.size > ((unsigned long)p))
+#define AGP_OFFSET( mmesa, p )						  \
+     (((unsigned long)p) - (unsigned long)mmesa->mgaScreen->buffers.map)
+
+
+#if defined(MESA_packed_depth_stencil)
+static GLboolean
+check_depth_stencil_24_8( const GLcontext *ctx, GLenum type,
+			  const struct gl_pixelstore_attrib *packing,
+			  const void *pixels, GLint sz,
+			  GLint pitch )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   return ( type == GL_UNSIGNED_INT_24_8_MESA &&
+	    ctx->Visual->DepthBits == 24 &&
+	    ctx->Visual->StencilBits == 8 &&
+	    mmesa->mgaScreen->cpp == 4 &&
+	    mmesa->hw_stencil &&
+	    !ctx->Pixel.IndexShift &&
+	    !ctx->Pixel.IndexOffset &&
+	    !ctx->Pixel.MapStencilFlag &&
+	    ctx->Pixel.DepthBias == 0.0 &&
+	    ctx->Pixel.DepthScale == 1.0 &&
+	    !packing->SwapBytes &&
+	    pitch % 32 == 0 &&
+	    pitch < 4096 );
+}
+#endif
+
+
+static GLboolean
+check_depth( const GLcontext *ctx, GLenum type,
+	     const struct gl_pixelstore_attrib *packing,
+	     const void *pixels, GLint sz, GLint pitch )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   if ( IS_AGP_MEM( mmesa, pixels ) &&
+	!( ( type == GL_UNSIGNED_INT && mmesa->mgaScreen->cpp == 4 ) ||
+	   ( type == GL_UNSIGNED_SHORT && mmesa->mgaScreen->cpp == 2 ) ) )
+      return GL_FALSE;
+
+   return ( ctx->Pixel.DepthBias == 0.0 &&
+	    ctx->Pixel.DepthScale == 1.0 &&
+	    !packing->SwapBytes &&
+	    pitch % 32 == 0 &&
+	    pitch < 4096 );
+}
+
+
+static GLboolean
+check_color( const GLcontext *ctx, GLenum type, GLenum format,
+	     const struct gl_pixelstore_attrib *packing,
+	     const void *pixels, GLint sz, GLint pitch )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint cpp = mmesa->mgaScreen->cpp;
+
+   /* Can't do conversions on agp reads/draws.
+    */
+   if ( IS_AGP_MEM( mmesa, pixels ) &&
+	!( pitch % 32 == 0 && pitch < 4096 &&
+	   ( ( type == GL_UNSIGNED_BYTE &&
+	       cpp == 4 && format == GL_BGRA ) ||
+	     ( type == GL_UNSIGNED_INT_8_8_8_8 &&
+	       cpp == 4 && format == GL_BGRA ) ||
+	     ( type == GL_UNSIGNED_SHORT_5_6_5_REV &&
+	       cpp == 2 && format == GL_RGB ) ) ) )
+      return GL_FALSE;
+
+   return (!ctx->_ImageTransferState &&
+	   !packing->SwapBytes &&
+	   !packing->LsbFirst);
+}
+
+static GLboolean
+check_color_per_fragment_ops( const GLcontext *ctx )
+{
+   return (!(       ctx->Color.AlphaEnabled ||
+		    ctx->Depth.Test ||
+		    ctx->Fog.Enabled ||
+		    ctx->Scissor.Enabled ||
+		    ctx->Stencil._Enabled ||
+		    !ctx->Color.ColorMask[0][0] ||
+		    !ctx->Color.ColorMask[0][1] ||
+		    !ctx->Color.ColorMask[0][2] ||
+		    !ctx->Color.ColorMask[0][3] ||
+		    ctx->Color.ColorLogicOpEnabled ||
+		    ctx->Texture._EnabledUnits
+           ) &&
+	   ctx->Current.RasterPosValid &&
+	   ctx->Pixel.ZoomX == 1.0F &&
+	   (ctx->Pixel.ZoomY == 1.0F || ctx->Pixel.ZoomY == -1.0F));
+}
+
+static GLboolean
+check_depth_per_fragment_ops( const GLcontext *ctx )
+{
+   return ( ctx->Current.RasterPosValid &&
+	    ctx->Color.ColorMask[0][RCOMP] == 0 &&
+	    ctx->Color.ColorMask[0][BCOMP] == 0 &&
+	    ctx->Color.ColorMask[0][GCOMP] == 0 &&
+	    ctx->Color.ColorMask[0][ACOMP] == 0 &&
+	    ctx->Pixel.ZoomX == 1.0F &&
+	    ( ctx->Pixel.ZoomY == 1.0F || ctx->Pixel.ZoomY == -1.0F ) );
+}
+
+/* In addition to the requirements for depth:
+ */
+#if defined(MESA_packed_depth_stencil)
+static GLboolean
+check_stencil_per_fragment_ops( const GLcontext *ctx )
+{
+   return ( !ctx->Pixel.IndexShift &&
+	    !ctx->Pixel.IndexOffset );
+}
+#endif
+
+
+static GLboolean
+clip_pixelrect( const GLcontext *ctx,
+		const GLframebuffer *buffer,
+		GLint *x, GLint *y,
+		GLsizei *width, GLsizei *height,
+		GLint *skipPixels, GLint *skipRows,
+		GLint *size )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   *width = MIN2(*width, MAX_WIDTH); /* redundant? */
+
+   /* left clipping */
+   if (*x < buffer->_Xmin) {
+      *skipPixels += (buffer->_Xmin - *x);
+      *width -= (buffer->_Xmin - *x);
+      *x = buffer->_Xmin;
+   }
+
+   /* right clipping */
+   if (*x + *width > buffer->_Xmax)
+      *width -= (*x + *width - buffer->_Xmax - 1);
+
+   if (*width <= 0)
+      return GL_FALSE;
+
+   /* bottom clipping */
+   if (*y < buffer->_Ymin) {
+      *skipRows += (buffer->_Ymin - *y);
+      *height -= (buffer->_Ymin - *y);
+      *y = buffer->_Ymin;
+   }
+
+   /* top clipping */
+   if (*y + *height > buffer->_Ymax)
+      *height -= (*y + *height - buffer->_Ymax - 1);
+
+   if (*height <= 0)
+      return GL_FALSE;
+
+   *size = ((*y + *height - 1) * mmesa->mgaScreen->frontPitch +
+	    (*x + *width - 1) * mmesa->mgaScreen->cpp);
+
+   return GL_TRUE;
+}
+
+static GLboolean
+mgaTryReadPixels( GLcontext *ctx,
+		  GLint x, GLint y, GLsizei width, GLsizei height,
+		  GLenum format, GLenum type,
+		  const struct gl_pixelstore_attrib *pack,
+		  GLvoid *pixels )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLint size, skipPixels, skipRows;
+   GLint pitch = pack->RowLength ? pack->RowLength : width;
+   GLboolean ok;
+
+   GLuint planemask;
+   GLuint source;
+#if 0
+   drmMGABlit blit;
+   GLuint dest;
+   GLint source_pitch, dest_pitch;
+   GLint delta_sx, delta_sy;
+   GLint delta_dx, delta_dy;
+   GLint blit_height, ydir;
+#endif
+
+   if (!clip_pixelrect(ctx, ctx->ReadBuffer,
+		       &x, &y, &width, &height,
+		       &skipPixels, &skipRows, &size)) {
+      return GL_TRUE;
+   }
+
+   /* Only accelerate reading to agp buffers.
+    */
+   if ( !IS_AGP_MEM(mmesa, (char *)pixels) ||
+	!IS_AGP_MEM(mmesa, (char *)pixels + size) )
+      return GL_FALSE;
+
+   switch (format) {
+#if defined(MESA_packed_depth_stencil)
+   case GL_DEPTH_STENCIL_MESA:
+      ok = check_depth_stencil_24_8(ctx, type, pack, pixels, size, pitch);
+      planemask = ~0;
+      source = mmesa->mgaScreen->depthOffset;
+      break;
+#endif
+
+   case GL_DEPTH_COMPONENT:
+      ok = check_depth(ctx, type, pack, pixels, size, pitch);
+
+      /* Can't accelerate at this depth -- planemask does the wrong
+       * thing; it doesn't clear the low order bits in the
+       * destination, instead it leaves them untouched.
+       *
+       * Could get the acclerator to solid fill the destination with
+       * zeros first...  Or get the cpu to do it...
+       */
+      if (ctx->Visual.depthBits == 24)
+	 return GL_FALSE;
+
+      planemask = ~0;
+      source = mmesa->mgaScreen->depthOffset;
+      break;
+
+   case GL_RGB:
+   case GL_BGRA:
+      ok = check_color(ctx, type, format, pack, pixels, size, pitch);
+      planemask = ~0;
+      source = (mmesa->draw_buffer == MGA_FRONT ?
+		mmesa->mgaScreen->frontOffset :
+		mmesa->mgaScreen->backOffset);
+      break;
+
+   default:
+      return GL_FALSE;
+   }
+
+   if (!ok) {
+      return GL_FALSE;
+   }
+
+
+   LOCK_HARDWARE( mmesa );
+
+#if 0
+   {
+      __DRIdrawable *dPriv = mmesa->driDrawable;
+      int nbox, retcode, i;
+
+      UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH | DRM_LOCK_QUIESCENT );
+
+      if (mmesa->dirty_cliprects & MGA_FRONT)
+	 mgaUpdateRects( mmesa, MGA_FRONT );
+
+      nbox = dPriv->numClipRects;
+
+      y = dPriv->h - y - height;
+      x += mmesa->drawX;
+      y += mmesa->drawY;
+
+      dest = ((mmesa->mgaScreen->agp.handle + AGP_OFFSET(mmesa, pixels)) |
+	      DO_dstmap_sys | DO_dstacc_agp);
+      source_pitch = mmesa->mgaScreen->frontPitch / mmesa->mgaScreen->cpp;
+      dest_pitch = pitch;
+      delta_sx = 0;
+      delta_sy = 0;
+      delta_dx = -x;
+      delta_dy = -y;
+      blit_height = 2*y + height;
+      ydir = -1;
+
+      if (0) fprintf(stderr, "XX doing readpixel blit src_pitch %d dst_pitch %d\n",
+		     source_pitch, dest_pitch);
+
+
+
+      for (i = 0 ; i < nbox ; )
+      {
+	 int nr = MIN2(i + MGA_NR_SAREA_CLIPRECTS, dPriv->numClipRects);
+	 drm_clip_rect_t *box = dPriv->pClipRects;
+	 drm_clip_rect_t *b = mmesa->sarea->boxes;
+	 int n = 0;
+
+	 for ( ; i < nr ; i++) {
+	    GLint bx = box[i].x1;
+	    GLint by = box[i].y1;
+	    GLint bw = box[i].x2 - bx;
+	    GLint bh = box[i].y2 - by;
+
+	    if (bx < x) bw -= x - bx, bx = x;
+	    if (by < y) bh -= y - by, by = y;
+	    if (bx + bw > x + width) bw = x + width - bx;
+	    if (by + bh > y + height) bh = y + height - by;
+	    if (bw <= 0) continue;
+	    if (bh <= 0) continue;
+
+	    b->x1 = bx;
+	    b->y1 = by;
+	    b->x2 = bx + bw;
+	    b->y2 = by + bh;
+	    b++;
+	    n++;
+	 }
+
+	 mmesa->sarea->nbox = n;
+
+	 if (n && (retcode = drmCommandWrite( mmesa->driFd, DRM_MGA_BLIT,
+                                              &blit, sizeof(drmMGABlit)))) {
+	    fprintf(stderr, "blit ioctl failed, retcode = %d\n", retcode);
+	    UNLOCK_HARDWARE( mmesa );
+	    exit(1);
+	 }
+      }
+
+      UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH | DRM_LOCK_QUIESCENT );
+   }
+#endif
+
+   UNLOCK_HARDWARE( mmesa );
+
+   return GL_TRUE;
+}
+
+static void
+mgaDDReadPixels( GLcontext *ctx,
+		 GLint x, GLint y, GLsizei width, GLsizei height,
+		 GLenum format, GLenum type,
+		 const struct gl_pixelstore_attrib *pack,
+		 GLvoid *pixels )
+{
+   if (!mgaTryReadPixels( ctx, x, y, width, height, format, type, pack, pixels))
+      _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, pixels);
+}
+
+
+
+
+static void do_draw_pix( GLcontext *ctx,
+			 GLint x, GLint y, GLsizei width, GLsizei height,
+			 GLint pitch,
+			 const void *pixels,
+			 GLuint dest, GLuint planemask)
+{
+#if 0
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   drmMGABlit blit;
+   __DRIdrawable *dPriv = mmesa->driDrawable;
+   drm_clip_rect_t pbox = dPriv->pClipRects;
+   int nbox = dPriv->numClipRects;
+   int retcode, i;
+
+   y = dPriv->h - y - height;
+   x += mmesa->drawX;
+   y += mmesa->drawY;
+
+   blit.dest = dest;
+   blit.planemask = planemask;
+   blit.source = ((mmesa->mgaScreen->agp.handle + AGP_OFFSET(mmesa, pixels))
+		  | SO_srcmap_sys | SO_srcacc_agp);
+   blit.dest_pitch = mmesa->mgaScreen->frontPitch / mmesa->mgaScreen->cpp;
+   blit.source_pitch = pitch;
+   blit.delta_sx = -x;
+   blit.delta_sy = -y;
+   blit.delta_dx = 0;
+   blit.delta_dy = 0;
+   if (ctx->Pixel.ZoomY == -1) {
+      blit.height = height;
+      blit.ydir = 1;
+   } else {
+      blit.height = height;
+      blit.ydir = -1;
+   }
+
+   if (0) fprintf(stderr,
+		  "doing drawpixel blit src_pitch %d dst_pitch %d\n",
+		  blit.source_pitch, blit.dest_pitch);
+
+   for (i = 0 ; i < nbox ; )
+   {
+      int nr = MIN2(i + MGA_NR_SAREA_CLIPRECTS, dPriv->numClipRects);
+      drm_clip_rect_t *box = mmesa->pClipRects;
+      drm_clip_rect_t *b = mmesa->sarea->boxes;
+      int n = 0;
+
+      for ( ; i < nr ; i++) {
+	 GLint bx = box[i].x1;
+	 GLint by = box[i].y1;
+	 GLint bw = box[i].x2 - bx;
+	 GLint bh = box[i].y2 - by;
+
+	 if (bx < x) bw -= x - bx, bx = x;
+	 if (by < y) bh -= y - by, by = y;
+	 if (bx + bw > x + width) bw = x + width - bx;
+	 if (by + bh > y + height) bh = y + height - by;
+	 if (bw <= 0) continue;
+	 if (bh <= 0) continue;
+
+	 b->x1 = bx;
+	 b->y1 = by;
+	 b->x2 = bx + bw;
+	 b->y2 = by + bh;
+	 b++;
+	 n++;
+      }
+
+      mmesa->sarea->nbox = n;
+
+      if (n && (retcode = drmCommandWrite( mmesa->driFd, DRM_MGA_BLIT,
+                                              &blit, sizeof(drmMGABlit)))) {
+	 fprintf(stderr, "blit ioctl failed, retcode = %d\n", retcode);
+	 UNLOCK_HARDWARE( mmesa );
+	 exit(1);
+      }
+   }
+#endif
+}
+
+
+
+
+static GLboolean
+mgaTryDrawPixels( GLcontext *ctx,
+		  GLint x, GLint y, GLsizei width, GLsizei height,
+		  GLenum format, GLenum type,
+		  const struct gl_pixelstore_attrib *unpack,
+		  const GLvoid *pixels )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLint size, skipPixels, skipRows;
+   GLint pitch = unpack->RowLength ? unpack->RowLength : width;
+   GLuint dest, planemask;
+   GLuint cpp = mmesa->mgaScreen->cpp;
+
+   if (!clip_pixelrect(ctx, ctx->DrawBuffer,
+		       &x, &y, &width, &height,
+		       &skipPixels, &skipRows, &size)) {
+      return GL_TRUE;
+   }
+
+
+   switch (format) {
+#if defined(MESA_packed_depth_stencil)
+   case GL_DEPTH_STENCIL_MESA:
+      dest = mmesa->mgaScreen->depthOffset;
+      planemask = ~0;
+      if (!check_depth_stencil_24_8(ctx, type, unpack, pixels, size, pitch) ||
+	  !check_depth_per_fragment_ops(ctx) ||
+	  !check_stencil_per_fragment_ops(ctx))
+	 return GL_FALSE;
+      break;
+#endif
+
+   case GL_DEPTH_COMPONENT:
+      dest = mmesa->mgaScreen->depthOffset;
+
+      if (ctx->Visual.depthBits == 24)
+	 planemask = ~0xff;
+      else
+	 planemask = ~0;
+
+      if (!check_depth(ctx, type, unpack, pixels, size, pitch) ||
+	  !check_depth_per_fragment_ops(ctx))
+	 return GL_FALSE;
+      break;
+
+   case GL_RGB:
+   case GL_BGRA:
+      dest = (mmesa->draw_buffer == MGA_FRONT ?
+	      mmesa->mgaScreen->frontOffset :
+	      mmesa->mgaScreen->backOffset);
+
+      planemask = mgaPackColor(cpp,
+			       ctx->Color.ColorMask[0][RCOMP],
+			       ctx->Color.ColorMask[0][GCOMP],
+			       ctx->Color.ColorMask[0][BCOMP],
+			       ctx->Color.ColorMask[0][ACOMP]);
+
+      if (cpp == 2)
+	 planemask |= planemask << 16;
+
+      if (!check_color(ctx, type, format, unpack, pixels, size, pitch)) {
+	 return GL_FALSE;
+      }
+      if (!check_color_per_fragment_ops(ctx)) {
+	 return GL_FALSE;
+      }
+      break;
+
+   default:
+      return GL_FALSE;
+   }
+
+   LOCK_HARDWARE_QUIESCENT( mmesa );
+
+   if (mmesa->dirty_cliprects & MGA_FRONT)
+      mgaUpdateRects( mmesa, MGA_FRONT );
+
+   if ( IS_AGP_MEM(mmesa, (char *)pixels) &&
+	IS_AGP_MEM(mmesa, (char *)pixels + size) )
+   {
+      do_draw_pix( ctx, x, y, width, height, pitch, pixels,
+		   dest, planemask );
+      UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH | DRM_LOCK_QUIESCENT );
+   }
+   else
+   {
+      /* Pixels is in regular memory -- get dma buffers and perform
+       * upload through them.
+       */
+/*        drmBufPtr buf = mgaGetBufferLocked(mmesa); */
+      GLuint bufferpitch = (width*cpp+31)&~31;
+
+      char *address = 0; /*  mmesa->mgaScreen->agp.map; */
+
+      do {
+/*  	 GLuint rows = MIN2( height, MGA_DMA_BUF_SZ / bufferpitch ); */
+	 GLuint rows = height;
+
+
+	 if (0) fprintf(stderr, "trying to upload %d rows (pitch %d)\n",
+			rows, bufferpitch);
+
+	 /* The texture conversion code is so slow that there is only
+	  * negligble speedup when the buffers/images don't exactly
+	  * match:
+	  */
+#if 0
+	 if (cpp == 2) {
+	    if (!_mesa_convert_texsubimage2d( MESA_FORMAT_RGB565,
+					      0, 0, width, rows,
+					      bufferpitch, format, type,
+					      unpack, pixels, address )) {
+/*  	       mgaReleaseBufLocked( mmesa, buf ); */
+	       UNLOCK_HARDWARE(mmesa);
+	       return GL_FALSE;
+	    }
+	 } else {
+	    if (!_mesa_convert_texsubimage2d( MESA_FORMAT_ARGB8888,
+					      0, 0, width, rows,
+					      bufferpitch, format, type,
+					      unpack, pixels, address )) {
+/*  	       mgaReleaseBufLocked( mmesa, buf ); */
+	       UNLOCK_HARDWARE(mmesa);
+	       return GL_FALSE;
+	    }
+	 }
+#else
+	 memcpy( address, pixels, rows*bufferpitch );
+#endif
+
+	 do_draw_pix( ctx, x, y, width, rows,
+		      bufferpitch/cpp, address, dest, planemask );
+
+	 /* Fix me -- use multiple buffers to avoid flush.
+	  */
+	 UPDATE_LOCK( mmesa, DRM_LOCK_FLUSH | DRM_LOCK_QUIESCENT );
+
+	 pixels = (void *)((char *) pixels + rows * pitch);
+	 height -= rows;
+	 y += rows;
+      } while (height);
+
+/*        mgaReleaseBufLocked( mmesa, buf ); */
+   }
+
+   UNLOCK_HARDWARE( mmesa );
+   mmesa->dirty |= MGA_UPLOAD_CLIPRECTS;
+
+   return GL_TRUE;
+}
+
+static void
+mgaDDDrawPixels( GLcontext *ctx,
+		 GLint x, GLint y, GLsizei width, GLsizei height,
+		 GLenum format, GLenum type,
+		 const struct gl_pixelstore_attrib *unpack,
+		 const GLvoid *pixels )
+{
+   if (!mgaTryDrawPixels( ctx, x, y, width, height, format, type,
+			  unpack, pixels ))
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type,
+			  unpack, pixels );
+}
+#endif
+
+
+/* Stub functions - not a real allocator, always returns pointer to
+ * the same block of agp space which isn't used for anything else at
+ * present.
+ */
+void mgaDDInitPixelFuncs( GLcontext *ctx )
+{
+#if 0
+   /* evidently, these functions don't always work */
+   if (getenv("MGA_BLIT_PIXELS")) {
+      ctx->Driver.ReadPixels = mgaDDReadPixels; /* requires agp dest */
+      ctx->Driver.DrawPixels = mgaDDDrawPixels; /* works with agp/normal mem */
+   }
+#endif
+}
diff --git a/src/mesa/drivers/dri/mga/mgapixel.h b/src/mesa/drivers/dri/mga/mgapixel.h
new file mode 100644
index 0000000000..f5f300db56
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgapixel.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2000-2001 Compaq Computer Inc. VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef MGA_PIXELS_H
+#define MGA_PIXELS_H
+
+#include "main/mtypes.h"
+
+extern void mgaDDInitPixelFuncs( GLcontext *ctx );
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgaregs.h b/src/mesa/drivers/dri/mga/mgaregs.h
new file mode 100644
index 0000000000..1ef1e6d24c
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgaregs.h
@@ -0,0 +1,1394 @@
+/* author: stephen crowley, crow@debian.org */
+
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * STEPHEN CROWLEY, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _MGAREGS_H_
+#define _MGAREGS_H_
+
+/*************** (START) AUTOMATICLY GENERATED REGISTER FILE *****************/
+/*
+ * Generated on Wed Jan 26 13:44:46 MST 2000
+ */
+
+
+
+/*
+ * Power Graphic Mode Memory Space Registers
+ */
+
+#define MGAREG_MGA_EXEC 			0x0100
+#define MGAREG_AGP_PLL 			0x1e4c
+
+#    define AGP_PLL_agp2xpllen_MASK 	0xfffffffe 	/* bit 0 */
+#    define AGP_PLL_agp2xpllen_disable 	0x0 		
+#    define AGP_PLL_agp2xpllen_enable 	0x1 		
+
+#define MGAREG_CFG_OR 				0x1e4c
+
+#    define CFG_OR_comp_or_MASK 	0xfffffff7 	/* bit 3 */
+#    define CFG_OR_comp_or_disable 	0x0 		
+#    define CFG_OR_comp_or_enable 	0x8 		
+#    define CFG_OR_compfreq_MASK 	0xffffff0f 	/* bits 4-7 */
+#    define CFG_OR_compfreq_SHIFT 	4 		
+#    define CFG_OR_comporup_MASK 	0xfffff0ff 	/* bits 8-11 */
+#    define CFG_OR_comporup_SHIFT 	8 		
+#    define CFG_OR_compordn_MASK 	0xffff0fff 	/* bits 12-15 */
+#    define CFG_OR_compordn_SHIFT 	12 		
+#    define CFG_OR_e2pq_MASK 		0xfffeffff 	/* bit 16 */
+#    define CFG_OR_e2pq_disable 	0x0 		
+#    define CFG_OR_e2pq_enable 		0x10000 	
+#    define CFG_OR_e2pqbypcsn_MASK 	0xfffdffff 	/* bit 17 */
+#    define CFG_OR_e2pqbypcsn_disable 	0x0 		
+#    define CFG_OR_e2pqbypcsn_enable 	0x20000 	
+#    define CFG_OR_e2pqbypd_MASK 	0xfffbffff 	/* bit 18 */
+#    define CFG_OR_e2pqbypd_disable 	0x0 		
+#    define CFG_OR_e2pqbypd_enable 	0x40000 	
+#    define CFG_OR_e2pbypclk_MASK 	0xfff7ffff 	/* bit 19 */
+#    define CFG_OR_e2pbypclk_disable 	0x0 		
+#    define CFG_OR_e2pbypclk_enable 	0x80000 	
+#    define CFG_OR_e2pbyp_MASK 		0xffefffff 	/* bit 20 */
+#    define CFG_OR_e2pbyp_disable 	0x0 		
+#    define CFG_OR_e2pbyp_enable 	0x100000 	
+#    define CFG_OR_rate_cap_or_MASK 	0xff1fffff 	/* bits 21-23 */
+#    define CFG_OR_rate_cap_or_SHIFT 	21 		
+#    define CFG_OR_rq_or_MASK 		0xe0ffffff 	/* bits 24-28 */
+#    define CFG_OR_rq_or_SHIFT 		24 		
+
+#define MGAREG_ALPHACTRL 			0x2c7c
+
+#    define AC_src_MASK 		0xfffffff0 	/* bits 0-3 */
+#    define AC_src_zero 		0x0 		/* val 0, shift 0 */
+#    define AC_src_one 			0x1 		/* val 1, shift 0 */
+#    define AC_src_dst_color 		0x2 		/* val 2, shift 0 */
+#    define AC_src_om_dst_color 	0x3 		/* val 3, shift 0 */
+#    define AC_src_src_alpha 		0x4 		/* val 4, shift 0 */
+#    define AC_src_om_src_alpha 	0x5 		/* val 5, shift 0 */
+#    define AC_src_dst_alpha 		0x6 		/* val 6, shift 0 */
+#    define AC_src_om_dst_alpha 	0x7 		/* val 7, shift 0 */
+#    define AC_src_src_alpha_sat 	0x8 		/* val 8, shift 0 */
+#    define AC_dst_MASK 		0xffffff0f 	/* bits 4-7 */
+#    define AC_dst_zero 		0x0 		/* val 0, shift 4 */
+#    define AC_dst_one 			0x10 		/* val 1, shift 4 */
+#    define AC_dst_src_color 		0x20 		/* val 2, shift 4 */
+#    define AC_dst_om_src_color 	0x30 		/* val 3, shift 4 */
+#    define AC_dst_src_alpha 		0x40 		/* val 4, shift 4 */
+#    define AC_dst_om_src_alpha 	0x50 		/* val 5, shift 4 */
+#    define AC_dst_dst_alpha 		0x60 		/* val 6, shift 4 */
+#    define AC_dst_om_dst_alpha 	0x70 		/* val 7, shift 4 */
+#    define AC_amode_MASK 		0xfffffcff 	/* bits 8-9 */
+#    define AC_amode_FCOL 		0x0 		/* val 0, shift 8 */
+#    define AC_amode_alpha_channel 	0x100 		/* val 1, shift 8 */
+#    define AC_amode_video_alpha 	0x200 		/* val 2, shift 8 */
+#    define AC_amode_RSVD 		0x300 		/* val 3, shift 8 */
+#    define AC_astipple_MASK 		0xfffff7ff 	/* bit 11 */
+#    define AC_astipple_disable 	0x0 		
+#    define AC_astipple_enable 		0x800 		
+#    define AC_aten_MASK 		0xffffefff 	/* bit 12 */
+#    define AC_aten_disable 		0x0 		
+#    define AC_aten_enable 		0x1000 		
+#    define AC_atmode_MASK 		0xffff1fff 	/* bits 13-15 */
+#    define AC_atmode_noacmp 		0x0 		/* val 0, shift 13 */
+#    define AC_atmode_ae 		0x4000 		/* val 2, shift 13 */
+#    define AC_atmode_ane 		0x6000 		/* val 3, shift 13 */
+#    define AC_atmode_alt 		0x8000 		/* val 4, shift 13 */
+#    define AC_atmode_alte 		0xa000 		/* val 5, shift 13 */
+#    define AC_atmode_agt 		0xc000 		/* val 6, shift 13 */
+#    define AC_atmode_agte 		0xe000 		/* val 7, shift 13 */
+#    define AC_atref_MASK 		0xff00ffff 	/* bits 16-23 */
+#    define AC_atref_SHIFT 		16 		
+#    define AC_alphasel_MASK 		0xfcffffff 	/* bits 24-25 */
+#    define AC_alphasel_fromtex 	0x0 		/* val 0, shift 24 */
+#    define AC_alphasel_diffused 	0x1000000 	/* val 1, shift 24 */
+#    define AC_alphasel_modulated 	0x2000000 	/* val 2, shift 24 */
+#    define AC_alphasel_trans 		0x3000000 	/* val 3, shift 24 */
+
+#define MGAREG_ALPHASTART 			0x2c70
+#define MGAREG_ALPHAXINC 			0x2c74
+#define MGAREG_ALPHAYINC 			0x2c78
+#define MGAREG_AR0 				0x1c60
+
+#    define AR0_ar0_MASK 		0xfffc0000 	/* bits 0-17 */
+#    define AR0_ar0_SHIFT 		0 		
+
+#define MGAREG_AR1 				0x1c64
+
+#    define AR1_ar1_MASK 		0xff000000 	/* bits 0-23 */
+#    define AR1_ar1_SHIFT 		0 		
+
+#define MGAREG_AR2 				0x1c68
+
+#    define AR2_ar2_MASK 		0xfffc0000 	/* bits 0-17 */
+#    define AR2_ar2_SHIFT 		0 		
+
+#define MGAREG_AR3 				0x1c6c
+
+#    define AR3_ar3_MASK 		0xff000000 	/* bits 0-23 */
+#    define AR3_ar3_SHIFT 		0 		
+#    define AR3_spage_MASK 		0xf8ffffff 	/* bits 24-26 */
+#    define AR3_spage_SHIFT 		24 		
+
+#define MGAREG_AR4 				0x1c70
+
+#    define AR4_ar4_MASK 		0xfffc0000 	/* bits 0-17 */
+#    define AR4_ar4_SHIFT 		0 		
+
+#define MGAREG_AR5 				0x1c74
+
+#    define AR5_ar5_MASK 		0xfffc0000 	/* bits 0-17 */
+#    define AR5_ar5_SHIFT 		0 		
+
+#define MGAREG_AR6 				0x1c78
+
+#    define AR6_ar6_MASK 		0xfffc0000 	/* bits 0-17 */
+#    define AR6_ar6_SHIFT 		0 		
+
+#define MGAREG_BCOL 				0x1c20
+#define MGAREG_BESA1CORG 			0x3d10
+#define MGAREG_BESA1ORG 			0x3d00
+#define MGAREG_BESA2CORG 			0x3d14
+#define MGAREG_BESA2ORG 			0x3d04
+#define MGAREG_BESB1CORG 			0x3d18
+#define MGAREG_BESB1ORG 			0x3d08
+#define MGAREG_BESB2CORG 			0x3d1c
+#define MGAREG_BESB2ORG 			0x3d0c
+#define MGAREG_BESCTL 				0x3d20
+
+#    define BC_besen_MASK 		0xfffffffe 	/* bit 0 */
+#    define BC_besen_disable 		0x0 		
+#    define BC_besen_enable 		0x1 		
+#    define BC_besv1srcstp_MASK 	0xffffffbf 	/* bit 6 */
+#    define BC_besv1srcstp_even 	0x0 		
+#    define BC_besv1srcstp_odd 		0x40 		
+#    define BC_besv2srcstp_MASK 	0xfffffeff 	/* bit 8 */
+#    define BC_besv2srcstp_disable 	0x0 		
+#    define BC_besv2srcstp_enable 	0x100 		
+#    define BC_beshfen_MASK 		0xfffffbff 	/* bit 10 */
+#    define BC_beshfen_disable 		0x0 		
+#    define BC_beshfen_enable 		0x400 		
+#    define BC_besvfen_MASK 		0xfffff7ff 	/* bit 11 */
+#    define BC_besvfen_disable 		0x0 		
+#    define BC_besvfen_enable 		0x800 		
+#    define BC_beshfixc_MASK 		0xffffefff 	/* bit 12 */
+#    define BC_beshfixc_weight 		0x0 		
+#    define BC_beshfixc_coeff 		0x1000 		
+#    define BC_bescups_MASK 		0xfffeffff 	/* bit 16 */
+#    define BC_bescups_disable 		0x0 		
+#    define BC_bescups_enable 		0x10000 	
+#    define BC_bes420pl_MASK 		0xfffdffff 	/* bit 17 */
+#    define BC_bes420pl_422 		0x0 		
+#    define BC_bes420pl_420 		0x20000 	
+#    define BC_besdith_MASK 		0xfffbffff 	/* bit 18 */
+#    define BC_besdith_disable 		0x0 		
+#    define BC_besdith_enable 		0x40000 	
+#    define BC_beshmir_MASK 		0xfff7ffff 	/* bit 19 */
+#    define BC_beshmir_disable 		0x0 		
+#    define BC_beshmir_enable 		0x80000 	
+#    define BC_besbwen_MASK 		0xffefffff 	/* bit 20 */
+#    define BC_besbwen_color 		0x0 		
+#    define BC_besbwen_bw 		0x100000 	
+#    define BC_besblank_MASK 		0xffdfffff 	/* bit 21 */
+#    define BC_besblank_disable 	0x0 		
+#    define BC_besblank_enable 		0x200000 	
+#    define BC_besfselm_MASK 		0xfeffffff 	/* bit 24 */
+#    define BC_besfselm_soft 		0x0 		
+#    define BC_besfselm_hard 		0x1000000 	
+#    define BC_besfsel_MASK 		0xf9ffffff 	/* bits 25-26 */
+#    define BC_besfsel_a1 		0x0 		/* val 0, shift 25 */
+#    define BC_besfsel_a2 		0x2000000 	/* val 1, shift 25 */
+#    define BC_besfsel_b1 		0x4000000 	/* val 2, shift 25 */
+#    define BC_besfsel_b2 		0x6000000 	/* val 3, shift 25 */
+
+#define MGAREG_BESGLOBCTL 			0x3dc0
+
+#    define BGC_beshzoom_MASK 		0xfffffffe 	/* bit 0 */
+#    define BGC_beshzoom_disable 	0x0 		
+#    define BGC_beshzoom_enable 	0x1 		
+#    define BGC_beshzoomf_MASK 		0xfffffffd 	/* bit 1 */
+#    define BGC_beshzoomf_disable 	0x0 		
+#    define BGC_beshzoomf_enable 	0x2 		
+#    define BGC_bescorder_MASK 		0xfffffff7 	/* bit 3 */
+#    define BGC_bescorder_even 		0x0 		
+#    define BGC_bescorder_odd 		0x8 		
+#    define BGC_besreghup_MASK 		0xffffffef 	/* bit 4 */
+#    define BGC_besreghup_disable 	0x0 		
+#    define BGC_besreghup_enable 	0x10 		
+#    define BGC_besvcnt_MASK 		0xf000ffff 	/* bits 16-27 */
+#    define BGC_besvcnt_SHIFT 		16 		
+
+#define MGAREG_BESHCOORD 			0x3d28
+
+#    define BHC_besright_MASK 		0xfffff800 	/* bits 0-10 */
+#    define BHC_besright_SHIFT 		0 		
+#    define BHC_besleft_MASK 		0xf800ffff 	/* bits 16-26 */
+#    define BHC_besleft_SHIFT 		16 		
+
+#define MGAREG_BESHISCAL 			0x3d30
+
+#    define BHISF_beshiscal_MASK 	0xffe00003 	/* bits 2-20 */
+#    define BHISF_beshiscal_SHIFT 	2 		
+
+#define MGAREG_BESHSRCEND 			0x3d3c
+
+#    define BHSE_beshsrcend_MASK 	0xfc000003 	/* bits 2-25 */
+#    define BHSE_beshsrcend_SHIFT 	2 		
+
+#define MGAREG_BESHSRCLST 			0x3d50
+
+#    define BHSL_beshsrclst_MASK 	0xfc00ffff 	/* bits 16-25 */
+#    define BHSL_beshsrclst_SHIFT 	16 		
+
+#define MGAREG_BESHSRCST 			0x3d38
+
+#    define BHSS_beshsrcst_MASK 	0xfc000003 	/* bits 2-25 */
+#    define BHSS_beshsrcst_SHIFT 	2 		
+
+#define MGAREG_BESPITCH 			0x3d24
+
+#    define BP_bespitch_MASK 		0xfffff000 	/* bits 0-11 */
+#    define BP_bespitch_SHIFT 		0 		
+
+#define MGAREG_BESSTATUS 			0x3dc4
+
+#    define BS_besstat_MASK 		0xfffffffc 	/* bits 0-1 */
+#    define BS_besstat_a1 		0x0 		/* val 0, shift 0 */
+#    define BS_besstat_a2 		0x1 		/* val 1, shift 0 */
+#    define BS_besstat_b1 		0x2 		/* val 2, shift 0 */
+#    define BS_besstat_b2 		0x3 		/* val 3, shift 0 */
+
+#define MGAREG_BESV1SRCLST 			0x3d54
+
+#    define BSF_besv1srclast_MASK 	0xfffffc00 	/* bits 0-9 */
+#    define BSF_besv1srclast_SHIFT 	0 		
+
+#define MGAREG_BESV2SRCLST 			0x3d58
+
+#    define BSF_besv2srclst_MASK 	0xfffffc00 	/* bits 0-9 */
+#    define BSF_besv2srclst_SHIFT 	0 		
+
+#define MGAREG_BESV1WGHT 			0x3d48
+
+#    define BSF_besv1wght_MASK 		0xffff0003 	/* bits 2-15 */
+#    define BSF_besv1wght_SHIFT 	2 		
+#    define BSF_besv1wghts_MASK 	0xfffeffff 	/* bit 16 */
+#    define BSF_besv1wghts_disable 	0x0 		
+#    define BSF_besv1wghts_enable 	0x10000 	
+
+#define MGAREG_BESV2WGHT 			0x3d4c
+
+#    define BSF_besv2wght_MASK 		0xffff0003 	/* bits 2-15 */
+#    define BSF_besv2wght_SHIFT 	2 		
+#    define BSF_besv2wghts_MASK 	0xfffeffff 	/* bit 16 */
+#    define BSF_besv2wghts_disable 	0x0 		
+#    define BSF_besv2wghts_enable 	0x10000 	
+
+#define MGAREG_BESVCOORD 			0x3d2c
+
+#    define BVC_besbot_MASK 		0xfffff800 	/* bits 0-10 */
+#    define BVC_besbot_SHIFT 		0 		
+#    define BVC_bestop_MASK 		0xf800ffff 	/* bits 16-26 */
+#    define BVC_bestop_SHIFT 		16 		
+
+#define MGAREG_BESVISCAL 			0x3d34
+
+#    define BVISF_besviscal_MASK 	0xffe00003 	/* bits 2-20 */
+#    define BVISF_besviscal_SHIFT 	2 		
+
+#define MGAREG_CODECADDR 			0x3e44
+#define MGAREG_CODECCTL 			0x3e40
+#define MGAREG_CODECHARDPTR 			0x3e4c
+#define MGAREG_CODECHOSTPTR 			0x3e48
+#define MGAREG_CODECLCODE 			0x3e50
+#define MGAREG_CXBNDRY 			0x1c80
+
+#    define CXB_cxleft_MASK 		0xfffff000 	/* bits 0-11 */
+#    define CXB_cxleft_SHIFT 		0 		
+#    define CXB_cxright_MASK 		0xf000ffff 	/* bits 16-27 */
+#    define CXB_cxright_SHIFT 		16 		
+
+#define MGAREG_CXLEFT 				0x1ca0
+#define MGAREG_CXRIGHT 			0x1ca4
+#define MGAREG_DMAMAP30 			0x1e30
+#define MGAREG_DMAMAP74 			0x1e34
+#define MGAREG_DMAMAPB8 			0x1e38
+#define MGAREG_DMAMAPFC 			0x1e3c
+#define MGAREG_DMAPAD 				0x1c54
+#define MGAREG_DR0_Z32LSB 			0x2c50
+#define MGAREG_DR0_Z32MSB 			0x2c54
+#define MGAREG_DR2_Z32LSB 			0x2c60
+#define MGAREG_DR2_Z32MSB 			0x2c64
+#define MGAREG_DR3_Z32LSB 			0x2c68
+#define MGAREG_DR3_Z32MSB 			0x2c6c
+#define MGAREG_DR0 				0x1cc0
+#define MGAREG_DR2 				0x1cc8
+#define MGAREG_DR3 				0x1ccc
+#define MGAREG_DR4 				0x1cd0
+#define MGAREG_DR6 				0x1cd8
+#define MGAREG_DR7 				0x1cdc
+#define MGAREG_DR8 				0x1ce0
+#define MGAREG_DR10 				0x1ce8
+#define MGAREG_DR11 				0x1cec
+#define MGAREG_DR12 				0x1cf0
+#define MGAREG_DR14 				0x1cf8
+#define MGAREG_DR15 				0x1cfc
+#define MGAREG_DSTORG 				0x2cb8
+
+#    define DO_dstmap_MASK 		0xfffffffe 	/* bit 0 */
+#    define DO_dstmap_fb 		0x0 		
+#    define DO_dstmap_sys 		0x1 		
+#    define DO_dstacc_MASK 		0xfffffffd 	/* bit 1 */
+#    define DO_dstacc_pci 		0x0 		
+#    define DO_dstacc_agp 		0x2 		
+#    define DO_dstorg_MASK 		0x7 		/* bits 3-31 */
+#    define DO_dstorg_SHIFT 		3 		
+
+#define MGAREG_DWG_INDIR_WT 			0x1e80
+#define MGAREG_DWGCTL 				0x1c00
+
+#    define DC_opcod_MASK 		0xfffffff0 	/* bits 0-3 */
+#    define DC_opcod_line_open 		0x0 		/* val 0, shift 0 */
+#    define DC_opcod_autoline_open 	0x1 		/* val 1, shift 0 */
+#    define DC_opcod_line_close 	0x2 		/* val 2, shift 0 */
+#    define DC_opcod_autoline_close 	0x3 		/* val 3, shift 0 */
+#    define DC_opcod_trap 		0x4 		/* val 4, shift 0 */
+#    define DC_opcod_texture_trap 	0x6 		/* val 6, shift 0 */
+#    define DC_opcod_bitblt 		0x8 		/* val 8, shift 0 */
+#    define DC_opcod_iload 		0x9 		/* val 9, shift 0 */
+#    define DC_atype_MASK 		0xffffff8f 	/* bits 4-6 */
+#    define DC_atype_rpl 		0x0 		/* val 0, shift 4 */
+#    define DC_atype_rstr 		0x10 		/* val 1, shift 4 */
+#    define DC_atype_zi 		0x30 		/* val 3, shift 4 */
+#    define DC_atype_blk 		0x40 		/* val 4, shift 4 */
+#    define DC_atype_i 			0x70 		/* val 7, shift 4 */
+#    define DC_linear_MASK 		0xffffff7f 	/* bit 7 */
+#    define DC_linear_xy 		0x0 		
+#    define DC_linear_linear 		0x80 		
+#    define DC_zmode_MASK 		0xfffff8ff 	/* bits 8-10 */
+#    define DC_zmode_nozcmp 		0x0 		/* val 0, shift 8 */
+#    define DC_zmode_ze 		0x200 		/* val 2, shift 8 */
+#    define DC_zmode_zne 		0x300 		/* val 3, shift 8 */
+#    define DC_zmode_zlt 		0x400 		/* val 4, shift 8 */
+#    define DC_zmode_zlte 		0x500 		/* val 5, shift 8 */
+#    define DC_zmode_zgt 		0x600 		/* val 6, shift 8 */
+#    define DC_zmode_zgte 		0x700 		/* val 7, shift 8 */
+#    define DC_solid_MASK 		0xfffff7ff 	/* bit 11 */
+#    define DC_solid_disable 		0x0 		
+#    define DC_solid_enable 		0x800 		
+#    define DC_arzero_MASK 		0xffffefff 	/* bit 12 */
+#    define DC_arzero_disable 		0x0 		
+#    define DC_arzero_enable 		0x1000 		
+#    define DC_sgnzero_MASK 		0xffffdfff 	/* bit 13 */
+#    define DC_sgnzero_disable 		0x0 		
+#    define DC_sgnzero_enable 		0x2000 		
+#    define DC_shftzero_MASK 		0xffffbfff 	/* bit 14 */
+#    define DC_shftzero_disable 	0x0 		
+#    define DC_shftzero_enable 		0x4000 		
+#    define DC_bop_MASK 		0xfff0ffff 	/* bits 16-19 */
+#    define DC_bop_SHIFT 		16 		
+#    define DC_trans_MASK 		0xff0fffff 	/* bits 20-23 */
+#    define DC_trans_SHIFT 		20 		
+#    define DC_bltmod_MASK 		0xe1ffffff 	/* bits 25-28 */
+#    define DC_bltmod_bmonolef 		0x0 		/* val 0, shift 25 */
+#    define DC_bltmod_bmonowf 		0x8000000 	/* val 4, shift 25 */
+#    define DC_bltmod_bplan 		0x2000000 	/* val 1, shift 25 */
+#    define DC_bltmod_bfcol 		0x4000000 	/* val 2, shift 25 */
+#    define DC_bltmod_bu32bgr 		0x6000000 	/* val 3, shift 25 */
+#    define DC_bltmod_bu32rgb 		0xe000000 	/* val 7, shift 25 */
+#    define DC_bltmod_bu24bgr 		0x16000000 	/* val 11, shift 25 */
+#    define DC_bltmod_bu24rgb 		0x1e000000 	/* val 15, shift 25 */
+#    define DC_pattern_MASK 		0xdfffffff 	/* bit 29 */
+#    define DC_pattern_disable 		0x0 		
+#    define DC_pattern_enable 		0x20000000 	
+#    define DC_transc_MASK 		0xbfffffff 	/* bit 30 */
+#    define DC_transc_disable 		0x0 		
+#    define DC_transc_enable 		0x40000000 	
+#    define DC_clipdis_MASK 		0x7fffffff 	/* bit 31 */
+#    define DC_clipdis_disable 		0x0 		
+#    define DC_clipdis_enable 		0x80000000 	
+
+#define MGAREG_DWGSYNC 			0x2c4c
+
+#    define DS_dwgsyncaddr_MASK 	0x3 		/* bits 2-31 */
+#    define DS_dwgsyncaddr_SHIFT 	2 		
+
+#define MGAREG_FCOL 				0x1c24
+#define MGAREG_FIFOSTATUS 			0x1e10
+
+#    define FS_fifocount_MASK 		0xffffff80 	/* bits 0-6 */
+#    define FS_fifocount_SHIFT 		0 		
+#    define FS_bfull_MASK 		0xfffffeff 	/* bit 8 */
+#    define FS_bfull_disable 		0x0 		
+#    define FS_bfull_enable 		0x100 		
+#    define FS_bempty_MASK 		0xfffffdff 	/* bit 9 */
+#    define FS_bempty_disable 		0x0 		
+#    define FS_bempty_enable 		0x200 		
+
+#define MGAREG_FOGCOL 				0x1cf4
+#define MGAREG_FOGSTART 			0x1cc4
+#define MGAREG_FOGXINC 			0x1cd4
+#define MGAREG_FOGYINC 			0x1ce4
+#define MGAREG_FXBNDRY 			0x1c84
+
+#    define XA_fxleft_MASK 		0xffff0000 	/* bits 0-15 */
+#    define XA_fxleft_SHIFT 		0 		
+#    define XA_fxright_MASK 		0xffff 		/* bits 16-31 */
+#    define XA_fxright_SHIFT 		16 		
+
+#define MGAREG_FXLEFT 				0x1ca8
+#define MGAREG_FXRIGHT 			0x1cac
+#define MGAREG_ICLEAR 				0x1e18
+
+#    define IC_softrapiclr_MASK 	0xfffffffe 	/* bit 0 */
+#    define IC_softrapiclr_disable 	0x0 		
+#    define IC_softrapiclr_enable 	0x1 		
+#    define IC_pickiclr_MASK 		0xfffffffb 	/* bit 2 */
+#    define IC_pickiclr_disable 	0x0 		
+#    define IC_pickiclr_enable 		0x4 		
+#    define IC_vlineiclr_MASK 		0xffffffdf 	/* bit 5 */
+#    define IC_vlineiclr_disable 	0x0 		
+#    define IC_vlineiclr_enable 	0x20 		
+#    define IC_wiclr_MASK 		0xffffff7f 	/* bit 7 */
+#    define IC_wiclr_disable 		0x0 		
+#    define IC_wiclr_enable 		0x80 		
+#    define IC_wciclr_MASK 		0xfffffeff 	/* bit 8 */
+#    define IC_wciclr_disable 		0x0 		
+#    define IC_wciclr_enable 		0x100 		
+
+#define MGAREG_IEN 				0x1e1c
+
+#    define IE_softrapien_MASK 		0xfffffffe 	/* bit 0 */
+#    define IE_softrapien_disable 	0x0 		
+#    define IE_softrapien_enable 	0x1 		
+#    define IE_pickien_MASK 		0xfffffffb 	/* bit 2 */
+#    define IE_pickien_disable 		0x0 		
+#    define IE_pickien_enable 		0x4 		
+#    define IE_vlineien_MASK 		0xffffffdf 	/* bit 5 */
+#    define IE_vlineien_disable 	0x0 		
+#    define IE_vlineien_enable 		0x20 		
+#    define IE_extien_MASK 		0xffffffbf 	/* bit 6 */
+#    define IE_extien_disable 		0x0 		
+#    define IE_extien_enable 		0x40 		
+#    define IE_wien_MASK 		0xffffff7f 	/* bit 7 */
+#    define IE_wien_disable 		0x0 		
+#    define IE_wien_enable 		0x80 		
+#    define IE_wcien_MASK 		0xfffffeff 	/* bit 8 */
+#    define IE_wcien_disable 		0x0 		
+#    define IE_wcien_enable 		0x100 		
+
+#define MGAREG_LEN 				0x1c5c
+#define MGAREG_MACCESS 			0x1c04
+
+#    define MA_pwidth_MASK 		0xfffffffc 	/* bits 0-1 */
+#    define MA_pwidth_8 		0x0 		/* val 0, shift 0 */
+#    define MA_pwidth_16 		0x1 		/* val 1, shift 0 */
+#    define MA_pwidth_32 		0x2 		/* val 2, shift 0 */
+#    define MA_pwidth_24 		0x3 		/* val 3, shift 0 */
+#    define MA_zwidth_MASK 		0xffffffe7 	/* bits 3-4 */
+#    define MA_zwidth_16 		0x0 		/* val 0, shift 3 */
+#    define MA_zwidth_32 		0x8 		/* val 1, shift 3 */
+#    define MA_zwidth_15 		0x10 		/* val 2, shift 3 */
+#    define MA_zwidth_24 		0x18 		/* val 3, shift 3 */
+#    define MA_memreset_MASK 		0xffff7fff 	/* bit 15 */
+#    define MA_memreset_disable 	0x0 		
+#    define MA_memreset_enable 		0x8000 		
+#    define MA_fogen_MASK 		0xfbffffff 	/* bit 26 */
+#    define MA_fogen_disable 		0x0 		
+#    define MA_fogen_enable 		0x4000000 	
+#    define MA_tlutload_MASK 		0xdfffffff 	/* bit 29 */
+#    define MA_tlutload_disable 	0x0 		
+#    define MA_tlutload_enable 		0x20000000 	
+#    define MA_nodither_MASK 		0xbfffffff 	/* bit 30 */
+#    define MA_nodither_disable 	0x0 		
+#    define MA_nodither_enable 		0x40000000 	
+#    define MA_dit555_MASK 		0x7fffffff 	/* bit 31 */
+#    define MA_dit555_disable 		0x0 		
+#    define MA_dit555_enable 		0x80000000 	
+
+#define MGAREG_MCTLWTST 			0x1c08
+
+#    define MCWS_casltncy_MASK 		0xfffffff8 	/* bits 0-2 */
+#    define MCWS_casltncy_SHIFT 	0 		
+#    define MCWS_rrddelay_MASK 		0xffffffcf 	/* bits 4-5 */
+#    define MCWS_rcddelay_MASK 		0xfffffe7f 	/* bits 7-8 */
+#    define MCWS_rasmin_MASK 		0xffffe3ff 	/* bits 10-12 */
+#    define MCWS_rasmin_SHIFT 		10 		
+#    define MCWS_rpdelay_MASK 		0xffff3fff 	/* bits 14-15 */
+#    define MCWS_wrdelay_MASK 		0xfff3ffff 	/* bits 18-19 */
+#    define MCWS_rddelay_MASK 		0xffdfffff 	/* bit 21 */
+#    define MCWS_rddelay_disable 	0x0 		
+#    define MCWS_rddelay_enable 	0x200000 	
+#    define MCWS_smrdelay_MASK 		0xfe7fffff 	/* bits 23-24 */
+#    define MCWS_bwcdelay_MASK 		0xf3ffffff 	/* bits 26-27 */
+#    define MCWS_bpldelay_MASK 		0x1fffffff 	/* bits 29-31 */
+#    define MCWS_bpldelay_SHIFT 	29 		
+
+#define MGAREG_MEMRDBK 			0x1e44
+
+#    define MRB_mclkbrd0_MASK 		0xfffffff0 	/* bits 0-3 */
+#    define MRB_mclkbrd0_SHIFT 		0 		
+#    define MRB_mclkbrd1_MASK 		0xfffffe1f 	/* bits 5-8 */
+#    define MRB_mclkbrd1_SHIFT 		5 		
+#    define MRB_strmfctl_MASK 		0xff3fffff 	/* bits 22-23 */
+#    define MRB_mrsopcod_MASK 		0xe1ffffff 	/* bits 25-28 */
+#    define MRB_mrsopcod_SHIFT 		25 		
+
+#define MGAREG_OPMODE 				0x1e54
+
+#    define OM_dmamod_MASK 		0xfffffff3 	/* bits 2-3 */
+#    define OM_dmamod_general 		0x0 		/* val 0, shift 2 */
+#    define OM_dmamod_blit 		0x4 		/* val 1, shift 2 */
+#    define OM_dmamod_vector 		0x8 		/* val 2, shift 2 */
+#    define OM_dmamod_vertex 		0xc 		/* val 3, shift 2 */
+#    define OM_dmadatasiz_MASK 		0xfffffcff 	/* bits 8-9 */
+#    define OM_dmadatasiz_8 		0x0 		/* val 0, shift 8 */
+#    define OM_dmadatasiz_16 		0x100 		/* val 1, shift 8 */
+#    define OM_dmadatasiz_32 		0x200 		/* val 2, shift 8 */
+#    define OM_dirdatasiz_MASK 		0xfffcffff 	/* bits 16-17 */
+#    define OM_dirdatasiz_8 		0x0 		/* val 0, shift 16 */
+#    define OM_dirdatasiz_16 		0x10000 	/* val 1, shift 16 */
+#    define OM_dirdatasiz_32 		0x20000 	/* val 2, shift 16 */
+
+#define MGAREG_PAT0 				0x1c10
+#define MGAREG_PAT1 				0x1c14
+#define MGAREG_PITCH 				0x1c8c
+
+#    define P_iy_MASK 			0xffffe000 	/* bits 0-12 */
+#    define P_iy_SHIFT 			0 		
+#    define P_ylin_MASK 		0xffff7fff 	/* bit 15 */
+#    define P_ylin_disable 		0x0 		
+#    define P_ylin_enable 		0x8000 		
+
+#define MGAREG_PLNWT 				0x1c1c
+#define MGAREG_PRIMADDRESS 			0x1e58
+
+#    define PDCA_primod_MASK 		0xfffffffc 	/* bits 0-1 */
+#    define PDCA_primod_general 	0x0 		/* val 0, shift 0 */
+#    define PDCA_primod_blit 		0x1 		/* val 1, shift 0 */
+#    define PDCA_primod_vector 		0x2 		/* val 2, shift 0 */
+#    define PDCA_primod_vertex 		0x3 		/* val 3, shift 0 */
+#    define PDCA_primaddress_MASK 	0x3 		/* bits 2-31 */
+#    define PDCA_primaddress_SHIFT 	2 		
+
+#define MGAREG_PRIMEND 			0x1e5c
+
+#    define PDEA_primnostart_MASK 	0xfffffffe 	/* bit 0 */
+#    define PDEA_primnostart_disable 	0x0 		
+#    define PDEA_primnostart_enable 	0x1 		
+#    define PDEA_pagpxfer_MASK 		0xfffffffd 	/* bit 1 */
+#    define PDEA_pagpxfer_disable 	0x0 		
+#    define PDEA_pagpxfer_enable 	0x2 		
+#    define PDEA_primend_MASK 		0x3 		/* bits 2-31 */
+#    define PDEA_primend_SHIFT 		2 		
+
+#define MGAREG_PRIMPTR 			0x1e50
+
+#    define PLS_primptren0_MASK 	0xfffffffe 	/* bit 0 */
+#    define PLS_primptren0_disable 	0x0 		
+#    define PLS_primptren0_enable 	0x1 		
+#    define PLS_primptren1_MASK 	0xfffffffd 	/* bit 1 */
+#    define PLS_primptren1_disable 	0x0 		
+#    define PLS_primptren1_enable 	0x2 		
+#    define PLS_primptr_MASK 		0x7 		/* bits 3-31 */
+#    define PLS_primptr_SHIFT 		3 		
+
+#define MGAREG_RST 				0x1e40
+
+#    define R_softreset_MASK 		0xfffffffe 	/* bit 0 */
+#    define R_softreset_disable 	0x0 		
+#    define R_softreset_enable 		0x1 		
+#    define R_softextrst_MASK 		0xfffffffd 	/* bit 1 */
+#    define R_softextrst_disable 	0x0 		
+#    define R_softextrst_enable 	0x2 		
+
+#define MGAREG_SECADDRESS 			0x2c40
+
+#    define SDCA_secmod_MASK 		0xfffffffc 	/* bits 0-1 */
+#    define SDCA_secmod_general 	0x0 		/* val 0, shift 0 */
+#    define SDCA_secmod_blit 		0x1 		/* val 1, shift 0 */
+#    define SDCA_secmod_vector 		0x2 		/* val 2, shift 0 */
+#    define SDCA_secmod_vertex 		0x3 		/* val 3, shift 0 */
+#    define SDCA_secaddress_MASK 	0x3 		/* bits 2-31 */
+#    define SDCA_secaddress_SHIFT 	2 		
+
+#define MGAREG_SECEND 				0x2c44
+
+#    define SDEA_sagpxfer_MASK 		0xfffffffd 	/* bit 1 */
+#    define SDEA_sagpxfer_disable 	0x0 		
+#    define SDEA_sagpxfer_enable 	0x2 		
+#    define SDEA_secend_MASK 		0x3 		/* bits 2-31 */
+#    define SDEA_secend_SHIFT 		2 		
+
+#define MGAREG_SETUPADDRESS 			0x2cd0
+
+#    define SETADD_mode_MASK 		0xfffffffc 	/* bits 0-1 */
+#    define SETADD_mode_vertlist 	0x0 		/* val 0, shift 0 */
+#    define SETADD_address_MASK 	0x3 		/* bits 2-31 */
+#    define SETADD_address_SHIFT 	2 		
+
+#define MGAREG_SETUPEND 			0x2cd4
+
+#    define SETEND_agpxfer_MASK 	0xfffffffd 	/* bit 1 */
+#    define SETEND_agpxfer_disable 	0x0 		
+#    define SETEND_agpxfer_enable 	0x2 		
+#    define SETEND_address_MASK 	0x3 		/* bits 2-31 */
+#    define SETEND_address_SHIFT 	2 		
+
+#define MGAREG_SGN 				0x1c58
+
+#    define S_sdydxl_MASK 		0xfffffffe 	/* bit 0 */
+#    define S_sdydxl_y 			0x0 		
+#    define S_sdydxl_x 			0x1 		
+#    define S_scanleft_MASK 		0xfffffffe 	/* bit 0 */
+#    define S_scanleft_disable 		0x0 		
+#    define S_scanleft_enable 		0x1 		
+#    define S_sdxl_MASK 		0xfffffffd 	/* bit 1 */
+#    define S_sdxl_pos 			0x0 		
+#    define S_sdxl_neg 			0x2 		
+#    define S_sdy_MASK 			0xfffffffb 	/* bit 2 */
+#    define S_sdy_pos 			0x0 		
+#    define S_sdy_neg 			0x4 		
+#    define S_sdxr_MASK 		0xffffffdf 	/* bit 5 */
+#    define S_sdxr_pos 			0x0 		
+#    define S_sdxr_neg 			0x20 		
+#    define S_brkleft_MASK 		0xfffffeff 	/* bit 8 */
+#    define S_brkleft_disable 		0x0 		
+#    define S_brkleft_enable 		0x100 		
+#    define S_errorinit_MASK 		0x7fffffff 	/* bit 31 */
+#    define S_errorinit_disable 	0x0 		
+#    define S_errorinit_enable 		0x80000000 	
+
+#define MGAREG_SHIFT 				0x1c50
+
+#    define FSC_x_off_MASK 		0xfffffff0 	/* bits 0-3 */
+#    define FSC_x_off_SHIFT 		0 		
+#    define FSC_funcnt_MASK 		0xffffff80 	/* bits 0-6 */
+#    define FSC_funcnt_SHIFT 		0 		
+#    define FSC_y_off_MASK 		0xffffff8f 	/* bits 4-6 */
+#    define FSC_y_off_SHIFT 		4 		
+#    define FSC_funoff_MASK 		0xffc0ffff 	/* bits 16-21 */
+#    define FSC_funoff_SHIFT 		16 		
+#    define FSC_stylelen_MASK 		0xffc0ffff 	/* bits 16-21 */
+#    define FSC_stylelen_SHIFT 		16 		
+
+#define MGAREG_SOFTRAP 			0x2c48
+
+#    define STH_softraphand_MASK 	0x3 		/* bits 2-31 */
+#    define STH_softraphand_SHIFT 	2 		
+
+#define MGAREG_SPECBSTART 			0x2c98
+#define MGAREG_SPECBXINC 			0x2c9c
+#define MGAREG_SPECBYINC 			0x2ca0
+#define MGAREG_SPECGSTART 			0x2c8c
+#define MGAREG_SPECGXINC 			0x2c90
+#define MGAREG_SPECGYINC 			0x2c94
+#define MGAREG_SPECRSTART 			0x2c80
+#define MGAREG_SPECRXINC 			0x2c84
+#define MGAREG_SPECRYINC 			0x2c88
+#define MGAREG_SRC0 				0x1c30
+#define MGAREG_SRC1 				0x1c34
+#define MGAREG_SRC2 				0x1c38
+#define MGAREG_SRC3 				0x1c3c
+#define MGAREG_SRCORG 				0x2cb4
+
+#    define SO_srcmap_MASK 		0xfffffffe 	/* bit 0 */
+#    define SO_srcmap_fb 		0x0 		
+#    define SO_srcmap_sys 		0x1 		
+#    define SO_srcacc_MASK 		0xfffffffd 	/* bit 1 */
+#    define SO_srcacc_pci 		0x0 		
+#    define SO_srcacc_agp 		0x2 		
+#    define SO_srcorg_MASK 		0x7 		/* bits 3-31 */
+#    define SO_srcorg_SHIFT 		3 		
+
+#define MGAREG_STATUS 				0x1e14
+
+#    define STAT_softrapen_MASK 	0xfffffffe 	/* bit 0 */
+#    define STAT_softrapen_disable 	0x0 		
+#    define STAT_softrapen_enable 	0x1 		
+#    define STAT_pickpen_MASK 		0xfffffffb 	/* bit 2 */
+#    define STAT_pickpen_disable 	0x0 		
+#    define STAT_pickpen_enable 	0x4 		
+#    define STAT_vsyncsts_MASK 		0xfffffff7 	/* bit 3 */
+#    define STAT_vsyncsts_disable 	0x0 		
+#    define STAT_vsyncsts_enable 	0x8 		
+#    define STAT_vsyncpen_MASK 		0xffffffef 	/* bit 4 */
+#    define STAT_vsyncpen_disable 	0x0 		
+#    define STAT_vsyncpen_enable 	0x10 		
+#    define STAT_vlinepen_MASK 		0xffffffdf 	/* bit 5 */
+#    define STAT_vlinepen_disable 	0x0 		
+#    define STAT_vlinepen_enable 	0x20 		
+#    define STAT_extpen_MASK 		0xffffffbf 	/* bit 6 */
+#    define STAT_extpen_disable 	0x0 		
+#    define STAT_extpen_enable 		0x40 		
+#    define STAT_wpen_MASK 		0xffffff7f 	/* bit 7 */
+#    define STAT_wpen_disable 		0x0 		
+#    define STAT_wpen_enable 		0x80 		
+#    define STAT_wcpen_MASK 		0xfffffeff 	/* bit 8 */
+#    define STAT_wcpen_disable 		0x0 		
+#    define STAT_wcpen_enable 		0x100 		
+#    define STAT_dwgengsts_MASK 	0xfffeffff 	/* bit 16 */
+#    define STAT_dwgengsts_disable 	0x0 		
+#    define STAT_dwgengsts_enable 	0x10000 	
+#    define STAT_endprdmasts_MASK 	0xfffdffff 	/* bit 17 */
+#    define STAT_endprdmasts_disable 	0x0 		
+#    define STAT_endprdmasts_enable 	0x20000 	
+#    define STAT_wbusy_MASK 		0xfffbffff 	/* bit 18 */
+#    define STAT_wbusy_disable 		0x0 		
+#    define STAT_wbusy_enable 		0x40000 	
+#    define STAT_swflag_MASK 		0xfffffff 	/* bits 28-31 */
+#    define STAT_swflag_SHIFT 		28 		
+
+#define MGAREG_STENCIL 			0x2cc8
+
+#    define S_sref_MASK 		0xffffff00 	/* bits 0-7 */
+#    define S_sref_SHIFT 		0 		
+#    define S_smsk_MASK 		0xffff00ff 	/* bits 8-15 */
+#    define S_smsk_SHIFT 		8 		
+#    define S_swtmsk_MASK 		0xff00ffff 	/* bits 16-23 */
+#    define S_swtmsk_SHIFT 		16 		
+
+#define MGAREG_STENCILCTL 			0x2ccc
+
+#    define SC_smode_MASK 		0xfffffff8 	/* bits 0-2 */
+#    define SC_smode_salways 		0x0 		/* val 0, shift 0 */
+#    define SC_smode_snever 		0x1 		/* val 1, shift 0 */
+#    define SC_smode_se 		0x2 		/* val 2, shift 0 */
+#    define SC_smode_sne 		0x3 		/* val 3, shift 0 */
+#    define SC_smode_slt 		0x4 		/* val 4, shift 0 */
+#    define SC_smode_slte 		0x5 		/* val 5, shift 0 */
+#    define SC_smode_sgt 		0x6 		/* val 6, shift 0 */
+#    define SC_smode_sgte 		0x7 		/* val 7, shift 0 */
+#    define SC_sfailop_MASK 		0xffffffc7 	/* bits 3-5 */
+#    define SC_sfailop_keep 		0x0 		/* val 0, shift 3 */
+#    define SC_sfailop_zero 		0x8 		/* val 1, shift 3 */
+#    define SC_sfailop_replace 		0x10 		/* val 2, shift 3 */
+#    define SC_sfailop_incrsat 		0x18 		/* val 3, shift 3 */
+#    define SC_sfailop_decrsat 		0x20 		/* val 4, shift 3 */
+#    define SC_sfailop_invert 		0x28 		/* val 5, shift 3 */
+#    define SC_sfailop_incr 		0x30 		/* val 6, shift 3 */
+#    define SC_sfailop_decr 		0x38 		/* val 7, shift 3 */
+#    define SC_szfailop_MASK 		0xfffffe3f 	/* bits 6-8 */
+#    define SC_szfailop_keep 		0x0 		/* val 0, shift 6 */
+#    define SC_szfailop_zero 		0x40 		/* val 1, shift 6 */
+#    define SC_szfailop_replace 	0x80 		/* val 2, shift 6 */
+#    define SC_szfailop_incrsat 	0xc0 		/* val 3, shift 6 */
+#    define SC_szfailop_decrsat 	0x100 		/* val 4, shift 6 */
+#    define SC_szfailop_invert 		0x140 		/* val 5, shift 6 */
+#    define SC_szfailop_incr 		0x180 		/* val 6, shift 6 */
+#    define SC_szfailop_decr 		0x1c0 		/* val 7, shift 6 */
+#    define SC_szpassop_MASK 		0xfffff1ff 	/* bits 9-11 */
+#    define SC_szpassop_keep 		0x0 		/* val 0, shift 9 */
+#    define SC_szpassop_zero 		0x200 		/* val 1, shift 9 */
+#    define SC_szpassop_replace 	0x400 		/* val 2, shift 9 */
+#    define SC_szpassop_incrsat 	0x600 		/* val 3, shift 9 */
+#    define SC_szpassop_decrsat 	0x800 		/* val 4, shift 9 */
+#    define SC_szpassop_invert 		0xa00 		/* val 5, shift 9 */
+#    define SC_szpassop_incr 		0xc00 		/* val 6, shift 9 */
+#    define SC_szpassop_decr 		0xe00 		/* val 7, shift 9 */
+
+#define MGAREG_TDUALSTAGE0 			0x2cf8
+
+#    define TD0_color_arg2_MASK 	0xfffffffc 	/* bits 0-1 */
+#    define TD0_color_arg2_diffuse 	0x0 		/* val 0, shift 0 */
+#    define TD0_color_arg2_specular 	0x1 		/* val 1, shift 0 */
+#    define TD0_color_arg2_fcol 	0x2 		/* val 2, shift 0 */
+#    define TD0_color_arg2_prevstage 	0x3 		/* val 3, shift 0 */
+#    define TD0_color_alpha_MASK 	0xffffffe3 	/* bits 2-4 */
+#    define TD0_color_alpha_diffuse 	0x0 		/* val 0, shift 2 */
+#    define TD0_color_alpha_fcol 	0x4 		/* val 1, shift 2 */
+#    define TD0_color_alpha_currtex 	0x8 		/* val 2, shift 2 */
+#    define TD0_color_alpha_prevtex 	0xc 		/* val 3, shift 2 */
+#    define TD0_color_alpha_prevstage 	0x10 		/* val 4, shift 2 */
+#    define TD0_color_arg1_replicatealpha_MASK 0xffffffdf 	/* bit 5 */
+#    define TD0_color_arg1_replicatealpha_disable 0x0 		
+#    define TD0_color_arg1_replicatealpha_enable 0x20 		
+#    define TD0_color_arg1_inv_MASK 	0xffffffbf 	/* bit 6 */
+#    define TD0_color_arg1_inv_disable 	0x0 		
+#    define TD0_color_arg1_inv_enable 	0x40 		
+#    define TD0_color_arg2_replicatealpha_MASK 0xffffff7f 	/* bit 7 */
+#    define TD0_color_arg2_replicatealpha_disable 0x0 		
+#    define TD0_color_arg2_replicatealpha_enable 0x80 		
+#    define TD0_color_arg2_inv_MASK 	0xfffffeff 	/* bit 8 */
+#    define TD0_color_arg2_inv_disable 	0x0 		
+#    define TD0_color_arg2_inv_enable 	0x100 		
+#    define TD0_color_alpha1inv_MASK 	0xfffffdff 	/* bit 9 */
+#    define TD0_color_alpha1inv_disable 0x0 		
+#    define TD0_color_alpha1inv_enable 	0x200 		
+#    define TD0_color_alpha2inv_MASK 	0xfffffbff 	/* bit 10 */
+#    define TD0_color_alpha2inv_disable 0x0 		
+#    define TD0_color_alpha2inv_enable 	0x400 		
+#    define TD0_color_arg1mul_MASK 	0xfffff7ff 	/* bit 11 */
+#    define TD0_color_arg1mul_disable 	0x0 		/* val 0, shift 11 */
+#    define TD0_color_arg1mul_alpha1 	0x800 		/* val 1, shift 11 */
+#    define TD0_color_arg2mul_MASK 	0xffffefff 	/* bit 12 */
+#    define TD0_color_arg2mul_disable 	0x0 		/* val 0, shift 12 */
+#    define TD0_color_arg2mul_alpha2 	0x1000 		/* val 1, shift 12 */
+#    define TD0_color_arg1add_MASK 	0xffffdfff 	/* bit 13 */
+#    define TD0_color_arg1add_disable 	0x0 		/* val 0, shift 13 */
+#    define TD0_color_arg1add_mulout 	0x2000 		/* val 1, shift 13 */
+#    define TD0_color_arg2add_MASK 	0xffffbfff 	/* bit 14 */
+#    define TD0_color_arg2add_disable 	0x0 		/* val 0, shift 14 */
+#    define TD0_color_arg2add_mulout 	0x4000 		/* val 1, shift 14 */
+#    define TD0_color_modbright_MASK 	0xfffe7fff 	/* bits 15-16 */
+#    define TD0_color_modbright_disable 0x0 		/* val 0, shift 15 */
+#    define TD0_color_modbright_2x 	0x8000 		/* val 1, shift 15 */
+#    define TD0_color_modbright_4x 	0x10000 	/* val 2, shift 15 */
+#    define TD0_color_add_MASK 		0xfffdffff 	/* bit 17 */
+#    define TD0_color_add_sub 		0x0 		/* val 0, shift 17 */
+#    define TD0_color_add_add 		0x20000 	/* val 1, shift 17 */
+#    define TD0_color_add2x_MASK 	0xfffbffff 	/* bit 18 */
+#    define TD0_color_add2x_disable 	0x0 		
+#    define TD0_color_add2x_enable 	0x40000 	
+#    define TD0_color_addbias_MASK 	0xfff7ffff 	/* bit 19 */
+#    define TD0_color_addbias_disable 	0x0 		
+#    define TD0_color_addbias_enable 	0x80000 	
+#    define TD0_color_blend_MASK 	0xffefffff 	/* bit 20 */
+#    define TD0_color_blend_disable 	0x0 		
+#    define TD0_color_blend_enable 	0x100000 	
+#    define TD0_color_sel_MASK 		0xff9fffff 	/* bits 21-22 */
+#    define TD0_color_sel_arg1 		0x0 		/* val 0, shift 21 */
+#    define TD0_color_sel_arg2 		0x200000 	/* val 1, shift 21 */
+#    define TD0_color_sel_add 		0x400000 	/* val 2, shift 21 */
+#    define TD0_color_sel_mul 		0x600000 	/* val 3, shift 21 */
+#    define TD0_alpha_arg1_inv_MASK 	0xff7fffff 	/* bit 23 */
+#    define TD0_alpha_arg1_inv_disable 	0x0 		
+#    define TD0_alpha_arg1_inv_enable 	0x800000 	
+#    define TD0_alpha_arg2_MASK 	0xfcffffff 	/* bits 24-25 */
+#    define TD0_alpha_arg2_diffuse 	0x0 		/* val 0, shift 24 */
+#    define TD0_alpha_arg2_fcol 	0x1000000 	/* val 1, shift 24 */
+#    define TD0_alpha_arg2_prevtex 	0x2000000 	/* val 2, shift 24 */
+#    define TD0_alpha_arg2_prevstage 	0x3000000 	/* val 3, shift 24 */
+#    define TD0_alpha_arg2_inv_MASK 	0xfbffffff 	/* bit 26 */
+#    define TD0_alpha_arg2_inv_disable 	0x0 		
+#    define TD0_alpha_arg2_inv_enable 	0x4000000 	
+#    define TD0_alpha_add_MASK 		0xf7ffffff 	/* bit 27 */
+#    define TD0_alpha_add_disable 	0x0 		
+#    define TD0_alpha_add_enable 	0x8000000 	
+#    define TD0_alpha_addbias_MASK 	0xefffffff 	/* bit 28 */
+#    define TD0_alpha_addbias_disable 	0x0 		
+#    define TD0_alpha_addbias_enable 	0x10000000 	
+#    define TD0_alpha_add2x_MASK 	0xdfffffff 	/* bit 29 */
+#    define TD0_alpha_add2x_disable 	0x0 		
+#    define TD0_alpha_add2x_enable 	0x20000000 	
+#    define TD0_alpha_modbright_MASK 	0xcfffffff 	/* bits 28-29 */
+#    define TD0_alpha_modbright_disable 0x0 		/* val 0, shift 28 */
+#    define TD0_alpha_modbright_2x 	0x10000000 	/* val 1, shift 28 */
+#    define TD0_alpha_modbright_4x 	0x20000000 	/* val 2, shift 28 */
+#    define TD0_alpha_sel_MASK 		0x3fffffff 	/* bits 30-31 */
+#    define TD0_alpha_sel_arg1 		0x0 		/* val 0, shift 30 */
+#    define TD0_alpha_sel_arg2 		0x40000000 	/* val 1, shift 30 */
+#    define TD0_alpha_sel_add 		0x80000000 	/* val 2, shift 30 */
+#    define TD0_alpha_sel_mul 		0xc0000000 	/* val 3, shift 30 */
+
+#define MGAREG_TDUALSTAGE1 			0x2cfc
+
+#    define TD1_color_arg2_MASK 	0xfffffffc 	/* bits 0-1 */
+#    define TD1_color_arg2_diffuse 	0x0 		/* val 0, shift 0 */
+#    define TD1_color_arg2_specular 	0x1 		/* val 1, shift 0 */
+#    define TD1_color_arg2_fcol 	0x2 		/* val 2, shift 0 */
+#    define TD1_color_arg2_prevstage 	0x3 		/* val 3, shift 0 */
+#    define TD1_color_alpha_MASK 	0xffffffe3 	/* bits 2-4 */
+#    define TD1_color_alpha_diffuse 	0x0 		/* val 0, shift 2 */
+#    define TD1_color_alpha_fcol 	0x4 		/* val 1, shift 2 */
+#    define TD1_color_alpha_tex0 	0x8 		/* val 2, shift 2 */
+#    define TD1_color_alpha_prevtex 	0xc 		/* val 3, shift 2 */
+#    define TD1_color_alpha_prevstage 	0x10 		/* val 4, shift 2 */
+#    define TD1_color_arg1_replicatealpha_MASK 0xffffffdf 	/* bit 5 */
+#    define TD1_color_arg1_replicatealpha_disable 0x0 		
+#    define TD1_color_arg1_replicatealpha_enable 0x20 		
+#    define TD1_color_arg1_inv_MASK 	0xffffffbf 	/* bit 6 */
+#    define TD1_color_arg1_inv_disable 	0x0 		
+#    define TD1_color_arg1_inv_enable 	0x40 		
+#    define TD1_color_arg2_replicatealpha_MASK 0xffffff7f 	/* bit 7 */
+#    define TD1_color_arg2_replicatealpha_disable 0x0 		
+#    define TD1_color_arg2_replicatealpha_enable 0x80 		
+#    define TD1_color_arg2_inv_MASK 	0xfffffeff 	/* bit 8 */
+#    define TD1_color_arg2_inv_disable 	0x0 		
+#    define TD1_color_arg2_inv_enable 	0x100 		
+#    define TD1_color_alpha1inv_MASK 	0xfffffdff 	/* bit 9 */
+#    define TD1_color_alpha1inv_disable 0x0 		
+#    define TD1_color_alpha1inv_enable 	0x200 		
+#    define TD1_color_alpha2inv_MASK 	0xfffffbff 	/* bit 10 */
+#    define TD1_color_alpha2inv_disable 0x0 		
+#    define TD1_color_alpha2inv_enable 	0x400 		
+#    define TD1_color_arg1mul_MASK 	0xfffff7ff 	/* bit 11 */
+#    define TD1_color_arg1mul_disable 	0x0 		/* val 0, shift 11 */
+#    define TD1_color_arg1mul_alpha1 	0x800 		/* val 1, shift 11 */
+#    define TD1_color_arg2mul_MASK 	0xffffefff 	/* bit 12 */
+#    define TD1_color_arg2mul_disable 	0x0 		/* val 0, shift 12 */
+#    define TD1_color_arg2mul_alpha2 	0x1000 		/* val 1, shift 12 */
+#    define TD1_color_arg1add_MASK 	0xffffdfff 	/* bit 13 */
+#    define TD1_color_arg1add_disable 	0x0 		/* val 0, shift 13 */
+#    define TD1_color_arg1add_mulout 	0x2000 		/* val 1, shift 13 */
+#    define TD1_color_arg2add_MASK 	0xffffbfff 	/* bit 14 */
+#    define TD1_color_arg2add_disable 	0x0 		/* val 0, shift 14 */
+#    define TD1_color_arg2add_mulout 	0x4000 		/* val 1, shift 14 */
+#    define TD1_color_modbright_MASK 	0xfffe7fff 	/* bits 15-16 */
+#    define TD1_color_modbright_disable 0x0 		/* val 0, shift 15 */
+#    define TD1_color_modbright_2x 	0x8000 		/* val 1, shift 15 */
+#    define TD1_color_modbright_4x 	0x10000 	/* val 2, shift 15 */
+#    define TD1_color_add_MASK 		0xfffdffff 	/* bit 17 */
+#    define TD1_color_add_sub 		0x0 		/* val 0, shift 17 */
+#    define TD1_color_add_add 		0x20000 	/* val 1, shift 17 */
+#    define TD1_color_add2x_MASK 	0xfffbffff 	/* bit 18 */
+#    define TD1_color_add2x_disable 	0x0 		
+#    define TD1_color_add2x_enable 	0x40000 	
+#    define TD1_color_addbias_MASK 	0xfff7ffff 	/* bit 19 */
+#    define TD1_color_addbias_disable 	0x0 		
+#    define TD1_color_addbias_enable 	0x80000 	
+#    define TD1_color_blend_MASK 	0xffefffff 	/* bit 20 */
+#    define TD1_color_blend_disable 	0x0 		
+#    define TD1_color_blend_enable 	0x100000 	
+#    define TD1_color_sel_MASK 		0xff9fffff 	/* bits 21-22 */
+#    define TD1_color_sel_arg1 		0x0 		/* val 0, shift 21 */
+#    define TD1_color_sel_arg2 		0x200000 	/* val 1, shift 21 */
+#    define TD1_color_sel_add 		0x400000 	/* val 2, shift 21 */
+#    define TD1_color_sel_mul 		0x600000 	/* val 3, shift 21 */
+#    define TD1_alpha_arg1_inv_MASK 	0xff7fffff 	/* bit 23 */
+#    define TD1_alpha_arg1_inv_disable 	0x0 		
+#    define TD1_alpha_arg1_inv_enable 	0x800000 	
+#    define TD1_alpha_arg2_MASK 	0xfcffffff 	/* bits 24-25 */
+#    define TD1_alpha_arg2_diffuse 	0x0 		/* val 0, shift 24 */
+#    define TD1_alpha_arg2_fcol 	0x1000000 	/* val 1, shift 24 */
+#    define TD1_alpha_arg2_prevtex 	0x2000000 	/* val 2, shift 24 */
+#    define TD1_alpha_arg2_prevstage 	0x3000000 	/* val 3, shift 24 */
+#    define TD1_alpha_arg2_inv_MASK 	0xfbffffff 	/* bit 26 */
+#    define TD1_alpha_arg2_inv_disable 	0x0 		
+#    define TD1_alpha_arg2_inv_enable 	0x4000000 	
+#    define TD1_alpha_add_MASK 		0xf7ffffff 	/* bit 27 */
+#    define TD1_alpha_add_disable 	0x0 		
+#    define TD1_alpha_add_enable 	0x8000000 	
+#    define TD1_alpha_addbias_MASK 	0xefffffff 	/* bit 28 */
+#    define TD1_alpha_addbias_disable 	0x0 		
+#    define TD1_alpha_addbias_enable 	0x10000000 	
+#    define TD1_alpha_add2x_MASK 	0xdfffffff 	/* bit 29 */
+#    define TD1_alpha_add2x_disable 	0x0 		
+#    define TD1_alpha_add2x_enable 	0x20000000 	
+#    define TD1_alpha_modbright_MASK 	0xcfffffff 	/* bits 28-29 */
+#    define TD1_alpha_modbright_disable 0x0 		/* val 0, shift 28 */
+#    define TD1_alpha_modbright_2x 	0x10000000 	/* val 1, shift 28 */
+#    define TD1_alpha_modbright_4x 	0x20000000 	/* val 2, shift 28 */
+#    define TD1_alpha_sel_MASK 		0x3fffffff 	/* bits 30-31 */
+#    define TD1_alpha_sel_arg1 		0x0 		/* val 0, shift 30 */
+#    define TD1_alpha_sel_arg2 		0x40000000 	/* val 1, shift 30 */
+#    define TD1_alpha_sel_add 		0x80000000 	/* val 2, shift 30 */
+#    define TD1_alpha_sel_mul 		0xc0000000 	/* val 3, shift 30 */
+
+#define MGAREG_TEST0 				0x1e48
+
+#    define TST_ramtsten_MASK 		0xfffffffe 	/* bit 0 */
+#    define TST_ramtsten_disable 	0x0 		
+#    define TST_ramtsten_enable 	0x1 		
+#    define TST_ramtstdone_MASK 	0xfffffffd 	/* bit 1 */
+#    define TST_ramtstdone_disable 	0x0 		
+#    define TST_ramtstdone_enable 	0x2 		
+#    define TST_wramtstpass_MASK 	0xfffffffb 	/* bit 2 */
+#    define TST_wramtstpass_disable 	0x0 		
+#    define TST_wramtstpass_enable 	0x4 		
+#    define TST_tcachetstpass_MASK 	0xfffffff7 	/* bit 3 */
+#    define TST_tcachetstpass_disable 	0x0 		
+#    define TST_tcachetstpass_enable 	0x8 		
+#    define TST_tluttstpass_MASK 	0xffffffef 	/* bit 4 */
+#    define TST_tluttstpass_disable 	0x0 		
+#    define TST_tluttstpass_enable 	0x10 		
+#    define TST_luttstpass_MASK 	0xffffffdf 	/* bit 5 */
+#    define TST_luttstpass_disable 	0x0 		
+#    define TST_luttstpass_enable 	0x20 		
+#    define TST_besramtstpass_MASK 	0xffffffbf 	/* bit 6 */
+#    define TST_besramtstpass_disable 	0x0 		
+#    define TST_besramtstpass_enable 	0x40 		
+#    define TST_ringen_MASK 		0xfffffeff 	/* bit 8 */
+#    define TST_ringen_disable 		0x0 		
+#    define TST_ringen_enable 		0x100 		
+#    define TST_apllbyp_MASK 		0xfffffdff 	/* bit 9 */
+#    define TST_apllbyp_disable 	0x0 		
+#    define TST_apllbyp_enable 		0x200 		
+#    define TST_hiten_MASK 		0xfffffbff 	/* bit 10 */
+#    define TST_hiten_disable 		0x0 		
+#    define TST_hiten_enable 		0x400 		
+#    define TST_tmode_MASK 		0xffffc7ff 	/* bits 11-13 */
+#    define TST_tmode_SHIFT 		11 		
+#    define TST_tclksel_MASK 		0xfffe3fff 	/* bits 14-16 */
+#    define TST_tclksel_SHIFT 		14 		
+#    define TST_ringcnten_MASK 		0xfffdffff 	/* bit 17 */
+#    define TST_ringcnten_disable 	0x0 		
+#    define TST_ringcnten_enable 	0x20000 	
+#    define TST_ringcnt_MASK 		0xc003ffff 	/* bits 18-29 */
+#    define TST_ringcnt_SHIFT 		18 		
+#    define TST_ringcntclksl_MASK 	0xbfffffff 	/* bit 30 */
+#    define TST_ringcntclksl_disable 	0x0 		
+#    define TST_ringcntclksl_enable 	0x40000000 	
+#    define TST_biosboot_MASK 		0x7fffffff 	/* bit 31 */
+#    define TST_biosboot_disable 	0x0 		
+#    define TST_biosboot_enable 	0x80000000 	
+
+#define MGAREG_TEXBORDERCOL 			0x2c5c
+#define MGAREG_TEXCTL 				0x2c30
+
+#    define TMC_tformat_MASK 		0xfffffff0 	/* bits 0-3 */
+#    define TMC_tformat_tw4 		0x0 		/* val 0, shift 0 */
+#    define TMC_tformat_tw8 		0x1 		/* val 1, shift 0 */
+#    define TMC_tformat_tw15 		0x2 		/* val 2, shift 0 */
+#    define TMC_tformat_tw16 		0x3 		/* val 3, shift 0 */
+#    define TMC_tformat_tw12 		0x4 		/* val 4, shift 0 */
+#    define TMC_tformat_tw32 		0x6 		/* val 6, shift 0 */
+#    define TMC_tformat_tw8a 		0x7 		/* val 7, shift 0 */
+#    define TMC_tformat_tw8al 		0x8 		/* val 8, shift 0 */
+#    define TMC_tformat_tw422 		0xa 		/* val 10, shift 0 */
+#    define TMC_tformat_tw422uyvy	0xb 		/* val 11, shift 0 */
+#    define TMC_tpitchlin_MASK 		0xfffffeff 	/* bit 8 */
+#    define TMC_tpitchlin_disable 	0x0 		
+#    define TMC_tpitchlin_enable 	0x100 		
+#    define TMC_tpitchext_MASK 		0xfff001ff 	/* bits 9-19 */
+#    define TMC_tpitchext_SHIFT 	9 		
+#    define TMC_tpitch_MASK 		0xfff8ffff 	/* bits 16-18 */
+#    define TMC_tpitch_SHIFT 		16 		
+#    define TMC_owalpha_MASK 		0xffbfffff 	/* bit 22 */
+#    define TMC_owalpha_disable 	0x0 		
+#    define TMC_owalpha_enable 		0x400000 	
+#    define TMC_azeroextend_MASK 	0xff7fffff 	/* bit 23 */
+#    define TMC_azeroextend_disable 	0x0 		
+#    define TMC_azeroextend_enable 	0x800000 	
+#    define TMC_decalckey_MASK 		0xfeffffff 	/* bit 24 */
+#    define TMC_decalckey_disable 	0x0 		
+#    define TMC_decalckey_enable 	0x1000000 	
+#    define TMC_takey_MASK 		0xfdffffff 	/* bit 25 */
+#    define TMC_takey_0 		0x0 		
+#    define TMC_takey_1 		0x2000000 	
+#    define TMC_tamask_MASK 		0xfbffffff 	/* bit 26 */
+#    define TMC_tamask_0 		0x0 		
+#    define TMC_tamask_1 		0x4000000 	
+#    define TMC_clampv_MASK 		0xf7ffffff 	/* bit 27 */
+#    define TMC_clampv_disable 		0x0 		
+#    define TMC_clampv_enable 		0x8000000 	
+#    define TMC_clampu_MASK 		0xefffffff 	/* bit 28 */
+#    define TMC_clampu_disable 		0x0 		
+#    define TMC_clampu_enable 		0x10000000 	
+#    define TMC_tmodulate_MASK 		0xdfffffff 	/* bit 29 */
+#    define TMC_tmodulate_disable 	0x0 		
+#    define TMC_tmodulate_enable 	0x20000000 	
+#    define TMC_strans_MASK 		0xbfffffff 	/* bit 30 */
+#    define TMC_strans_disable 		0x0 		
+#    define TMC_strans_enable 		0x40000000 	
+#    define TMC_itrans_MASK 		0x7fffffff 	/* bit 31 */
+#    define TMC_itrans_disable 		0x0 		
+#    define TMC_itrans_enable 		0x80000000 	
+
+#define MGAREG_TEXCTL2 			0x2c3c
+
+#    define TMC_decalblend_MASK 	0xfffffffe 	/* bit 0 */
+#    define TMC_decalblend_disable 	0x0 		
+#    define TMC_decalblend_enable 	0x1 		
+#    define TMC_idecal_MASK 		0xfffffffd 	/* bit 1 */
+#    define TMC_idecal_disable 		0x0 		
+#    define TMC_idecal_enable 		0x2 		
+#    define TMC_decaldis_MASK 		0xfffffffb 	/* bit 2 */
+#    define TMC_decaldis_disable 	0x0 		
+#    define TMC_decaldis_enable 	0x4 		
+#    define TMC_ckstransdis_MASK 	0xffffffef 	/* bit 4 */
+#    define TMC_ckstransdis_disable 	0x0 		
+#    define TMC_ckstransdis_enable 	0x10 		
+#    define TMC_borderen_MASK 		0xffffffdf 	/* bit 5 */
+#    define TMC_borderen_disable 	0x0 		
+#    define TMC_borderen_enable 	0x20 		
+#    define TMC_specen_MASK 		0xffffffbf 	/* bit 6 */
+#    define TMC_specen_disable 		0x0 		
+#    define TMC_specen_enable 		0x40 		
+#    define TMC_dualtex_MASK 		0xffffff7f 	/* bit 7 */
+#    define TMC_dualtex_disable 	0x0 		
+#    define TMC_dualtex_enable 		0x80 		
+#    define TMC_tablefog_MASK 		0xfffffeff 	/* bit 8 */
+#    define TMC_tablefog_disable 	0x0 		
+#    define TMC_tablefog_enable 	0x100 		
+#    define TMC_bumpmap_MASK 		0xfffffdff 	/* bit 9 */
+#    define TMC_bumpmap_disable 	0x0 		
+#    define TMC_bumpmap_enable 		0x200 		
+#    define TMC_map1_MASK 		0x7fffffff 	/* bit 31 */
+#    define TMC_map1_disable 		0x0 		
+#    define TMC_map1_enable 		0x80000000 	
+
+#define MGAREG_TEXFILTER 			0x2c58
+
+#    define TF_minfilter_MASK 		0xfffffff0 	/* bits 0-3 */
+#    define TF_minfilter_nrst 		0x0 		/* val 0, shift 0 */
+#    define TF_minfilter_bilin 		0x2 		/* val 2, shift 0 */
+#    define TF_minfilter_cnst 		0x3 		/* val 3, shift 0 */
+#    define TF_minfilter_mm1s 		0x8 		/* val 8, shift 0 */
+#    define TF_minfilter_mm2s 		0x9 		/* val 9, shift 0 */
+#    define TF_minfilter_mm4s 		0xa 		/* val 10, shift 0 */
+#    define TF_minfilter_mm8s 		0xc 		/* val 12, shift 0 */
+#    define TF_magfilter_MASK 		0xffffff0f 	/* bits 4-7 */
+#    define TF_magfilter_nrst 		0x0 		/* val 0, shift 4 */
+#    define TF_magfilter_bilin 		0x20 		/* val 2, shift 4 */
+#    define TF_magfilter_cnst 		0x30 		/* val 3, shift 4 */
+#    define TF_uvoffset_SHIFT		17
+#    define TF_uvoffset_OGL		(0U << TF_uvoffset_SHIFT)
+#    define TF_uvoffset_D3D		(1U << TF_uvoffset_SHIFT)
+#    define TF_uvoffset_MASK		(~(1U << TF_uvoffset_SHIFT))
+#    define TF_reserved_MASK		(~0x1ff00)	/* bits 8-16 */
+#    define TF_mapnbhigh_SHIFT 		18
+#    define TF_mapnbhigh_MASK 		(~(1U << TF_mapnbhigh_SHIFT))
+#    define TF_avgstride_MASK 		0xfff7ffff 	/* bit 19 */
+#    define TF_avgstride_disable 	0x0 		
+#    define TF_avgstride_enable 	0x80000 	
+#    define TF_filteralpha_MASK 	0xffefffff 	/* bit 20 */
+#    define TF_filteralpha_disable 	0x0 		
+#    define TF_filteralpha_enable 	0x100000 	
+#    define TF_fthres_MASK 		0xe01fffff 	/* bits 21-28 */
+#    define TF_fthres_SHIFT 		21 		
+#    define TF_mapnb_MASK 		0x1fffffff 	/* bits 29-31 */
+#    define TF_mapnb_SHIFT 		29 		
+
+#define MGAREG_TEXHEIGHT 			0x2c2c
+
+#    define TH_th_MASK 			0xffffffc0 	/* bits 0-5 */
+#    define TH_th_SHIFT 		0 		
+#    define TH_rfh_MASK 		0xffff81ff 	/* bits 9-14 */
+#    define TH_rfh_SHIFT 		9 		
+#    define TH_thmask_MASK 		0xe003ffff 	/* bits 18-28 */
+#    define TH_thmask_SHIFT 		18 		
+
+#define MGAREG_TEXORG 				0x2c24
+
+#    define TO_texorgmap_MASK 		0xfffffffe 	/* bit 0 */
+#    define TO_texorgmap_fb 		0x0 		
+#    define TO_texorgmap_sys 		0x1 		
+#    define TO_texorgacc_MASK 		0xfffffffd 	/* bit 1 */
+#    define TO_texorgacc_pci 		0x0 		
+#    define TO_texorgacc_agp 		0x2 		
+#    define TO_texorgoffsetsel 		0x4 		
+#    define TO_texorg_MASK 		0x1f 		/* bits 5-31 */
+#    define TO_texorg_SHIFT 		5 		
+
+#define MGAREG_TEXORG1 			0x2ca4
+#define MGAREG_TEXORG2 			0x2ca8
+#define MGAREG_TEXORG3 			0x2cac
+#define MGAREG_TEXORG4 			0x2cb0
+#define MGAREG_TEXTRANS 			0x2c34
+
+#    define TT_tckey_MASK 		0xffff0000 	/* bits 0-15 */
+#    define TT_tckey_SHIFT 		0 		
+#    define TT_tkmask_MASK 		0xffff 		/* bits 16-31 */
+#    define TT_tkmask_SHIFT 		16 		
+
+#define MGAREG_TEXTRANSHIGH 			0x2c38
+
+#    define TT_tckeyh_MASK 		0xffff0000 	/* bits 0-15 */
+#    define TT_tckeyh_SHIFT 		0 		
+#    define TT_tkmaskh_MASK 		0xffff 		/* bits 16-31 */
+#    define TT_tkmaskh_SHIFT 		16 		
+
+#define MGAREG_TEXWIDTH 			0x2c28
+
+#    define TW_tw_MASK 			0xffffffc0 	/* bits 0-5 */
+#    define TW_tw_SHIFT 		0 		
+#    define TW_rfw_MASK 		0xffff81ff 	/* bits 9-14 */
+#    define TW_rfw_SHIFT 		9 		
+#    define TW_twmask_MASK 		0xe003ffff 	/* bits 18-28 */
+#    define TW_twmask_SHIFT 		18 		
+
+#define MGAREG_TMR0 				0x2c00
+#define MGAREG_TMR1 				0x2c04
+#define MGAREG_TMR2 				0x2c08
+#define MGAREG_TMR3 				0x2c0c
+#define MGAREG_TMR4 				0x2c10
+#define MGAREG_TMR5 				0x2c14
+#define MGAREG_TMR6 				0x2c18
+#define MGAREG_TMR7 				0x2c1c
+#define MGAREG_TMR8 				0x2c20
+#define MGAREG_VBIADDR0 			0x3e08
+#define MGAREG_VBIADDR1 			0x3e0c
+#define MGAREG_VCOUNT 				0x1e20
+#define MGAREG_WACCEPTSEQ 			0x1dd4
+
+#    define WAS_seqdst0_MASK 		0xffffffc0 	/* bits 0-5 */
+#    define WAS_seqdst0_SHIFT 		0 		
+#    define WAS_seqdst1_MASK 		0xfffff03f 	/* bits 6-11 */
+#    define WAS_seqdst1_SHIFT 		6 		
+#    define WAS_seqdst2_MASK 		0xfffc0fff 	/* bits 12-17 */
+#    define WAS_seqdst2_SHIFT 		12 		
+#    define WAS_seqdst3_MASK 		0xff03ffff 	/* bits 18-23 */
+#    define WAS_seqdst3_SHIFT 		18 		
+#    define WAS_seqlen_MASK 		0xfcffffff 	/* bits 24-25 */
+#    define WAS_wfirsttag_MASK 		0xfbffffff 	/* bit 26 */
+#    define WAS_wfirsttag_disable 	0x0 		
+#    define WAS_wfirsttag_enable 	0x4000000 	
+#    define WAS_wsametag_MASK 		0xf7ffffff 	/* bit 27 */
+#    define WAS_wsametag_disable 	0x0 		
+#    define WAS_wsametag_enable 	0x8000000 	
+#    define WAS_seqoff_MASK 		0xefffffff 	/* bit 28 */
+#    define WAS_seqoff_disable 		0x0 		
+#    define WAS_seqoff_enable 		0x10000000 	
+
+#define MGAREG_WCODEADDR 			0x1e6c
+
+#    define WMA_wcodeaddr_MASK 		0xff 		/* bits 8-31 */
+#    define WMA_wcodeaddr_SHIFT 	8 		
+
+#define MGAREG_WFLAG 				0x1dc4
+
+#    define WF_walustsflag_MASK 	0xffffff00 	/* bits 0-7 */
+#    define WF_walustsflag_SHIFT 	0 		
+#    define WF_walucfgflag_MASK 	0xffff00ff 	/* bits 8-15 */
+#    define WF_walucfgflag_SHIFT 	8 		
+#    define WF_wprgflag_MASK 		0xffff 		/* bits 16-31 */
+#    define WF_wprgflag_SHIFT 		16 		
+
+#define MGAREG_WFLAG1 				0x1de0
+
+#    define WF1_walustsflag1_MASK 	0xffffff00 	/* bits 0-7 */
+#    define WF1_walustsflag1_SHIFT 	0 		
+#    define WF1_walucfgflag1_MASK 	0xffff00ff 	/* bits 8-15 */
+#    define WF1_walucfgflag1_SHIFT 	8 		
+#    define WF1_wprgflag1_MASK 		0xffff 		/* bits 16-31 */
+#    define WF1_wprgflag1_SHIFT 	16 		
+
+#define MGAREG_WFLAGNB 			0x1e64
+#define MGAREG_WFLAGNB1 			0x1e08
+#define MGAREG_WGETMSB 			0x1dc8
+
+#    define WGV_wgetmsbmin_MASK 	0xffffffe0 	/* bits 0-4 */
+#    define WGV_wgetmsbmin_SHIFT 	0 		
+#    define WGV_wgetmsbmax_MASK 	0xffffe0ff 	/* bits 8-12 */
+#    define WGV_wgetmsbmax_SHIFT 	8 		
+#    define WGV_wbrklefttop_MASK 	0xfffeffff 	/* bit 16 */
+#    define WGV_wbrklefttop_disable 	0x0 		
+#    define WGV_wbrklefttop_enable 	0x10000 	
+#    define WGV_wfastcrop_MASK 		0xfffdffff 	/* bit 17 */
+#    define WGV_wfastcrop_disable 	0x0 		
+#    define WGV_wfastcrop_enable 	0x20000 	
+#    define WGV_wcentersnap_MASK 	0xfffbffff 	/* bit 18 */
+#    define WGV_wcentersnap_disable 	0x0 		
+#    define WGV_wcentersnap_enable 	0x40000 	
+#    define WGV_wbrkrighttop_MASK 	0xfff7ffff 	/* bit 19 */
+#    define WGV_wbrkrighttop_disable 	0x0 		
+#    define WGV_wbrkrighttop_enable 	0x80000 	
+
+#define MGAREG_WIADDR 				0x1dc0
+
+#    define WIA_wmode_MASK 		0xfffffffc 	/* bits 0-1 */
+#    define WIA_wmode_suspend 		0x0 		/* val 0, shift 0 */
+#    define WIA_wmode_resume 		0x1 		/* val 1, shift 0 */
+#    define WIA_wmode_jump 		0x2 		/* val 2, shift 0 */
+#    define WIA_wmode_start 		0x3 		/* val 3, shift 0 */
+#    define WIA_wagp_MASK 		0xfffffffb 	/* bit 2 */
+#    define WIA_wagp_pci 		0x0 		
+#    define WIA_wagp_agp 		0x4 		
+#    define WIA_wiaddr_MASK 		0x7 		/* bits 3-31 */
+#    define WIA_wiaddr_SHIFT 		3 		
+
+#define MGAREG_WIADDR2 			0x1dd8
+
+#    define WIA2_wmode_MASK 		0xfffffffc 	/* bits 0-1 */
+#    define WIA2_wmode_suspend 		0x0 		/* val 0, shift 0 */
+#    define WIA2_wmode_resume 		0x1 		/* val 1, shift 0 */
+#    define WIA2_wmode_jump 		0x2 		/* val 2, shift 0 */
+#    define WIA2_wmode_start 		0x3 		/* val 3, shift 0 */
+#    define WIA2_wagp_MASK 		0xfffffffb 	/* bit 2 */
+#    define WIA2_wagp_pci 		0x0 		
+#    define WIA2_wagp_agp 		0x4 		
+#    define WIA2_wiaddr_MASK 		0x7 		/* bits 3-31 */
+#    define WIA2_wiaddr_SHIFT 		3 		
+
+#define MGAREG_WIADDRNB 			0x1e60
+#define MGAREG_WIADDRNB1 			0x1e04
+#define MGAREG_WIADDRNB2 			0x1e00
+#define MGAREG_WIMEMADDR 			0x1e68
+
+#    define WIMA_wimemaddr_MASK 	0xffffff00 	/* bits 0-7 */
+#    define WIMA_wimemaddr_SHIFT 	0 		
+
+#define MGAREG_WIMEMDATA 			0x2000
+#define MGAREG_WIMEMDATA1 			0x2100
+#define MGAREG_WMISC 				0x1e70
+
+#    define WM_wucodecache_MASK 	0xfffffffe 	/* bit 0 */
+#    define WM_wucodecache_disable 	0x0 		
+#    define WM_wucodecache_enable 	0x1 		
+#    define WM_wmaster_MASK 		0xfffffffd 	/* bit 1 */
+#    define WM_wmaster_disable 		0x0 		
+#    define WM_wmaster_enable 		0x2 		
+#    define WM_wcacheflush_MASK 	0xfffffff7 	/* bit 3 */
+#    define WM_wcacheflush_disable 	0x0 		
+#    define WM_wcacheflush_enable 	0x8 		
+
+#define MGAREG_WR 				0x2d00
+#define MGAREG_WVRTXSZ 			0x1dcc
+
+#    define WVS_wvrtxsz_MASK 		0xffffffc0 	/* bits 0-5 */
+#    define WVS_wvrtxsz_SHIFT 		0 		
+#    define WVS_primsz_MASK 		0xffffc0ff 	/* bits 8-13 */
+#    define WVS_primsz_SHIFT 		8 		
+
+#define MGAREG_XDST 				0x1cb0
+#define MGAREG_XYEND 				0x1c44
+
+#    define XYEA_x_end_MASK 		0xffff0000 	/* bits 0-15 */
+#    define XYEA_x_end_SHIFT 		0 		
+#    define XYEA_y_end_MASK 		0xffff 		/* bits 16-31 */
+#    define XYEA_y_end_SHIFT 		16 		
+
+#define MGAREG_XYSTRT 				0x1c40
+
+#    define XYSA_x_start_MASK 		0xffff0000 	/* bits 0-15 */
+#    define XYSA_x_start_SHIFT 		0 		
+#    define XYSA_y_start_MASK 		0xffff 		/* bits 16-31 */
+#    define XYSA_y_start_SHIFT 		16 		
+
+#define MGAREG_YBOT 				0x1c9c
+#define MGAREG_YDST 				0x1c90
+
+#    define YA_ydst_MASK 		0xff800000 	/* bits 0-22 */
+#    define YA_ydst_SHIFT 		0 		
+#    define YA_sellin_MASK 		0x1fffffff 	/* bits 29-31 */
+#    define YA_sellin_SHIFT 		29 		
+
+#define MGAREG_YDSTLEN 			0x1c88
+
+#    define YDL_length_MASK 		0xffff0000 	/* bits 0-15 */
+#    define YDL_length_SHIFT 		0 		
+#    define YDL_yval_MASK 		0xffff 		/* bits 16-31 */
+#    define YDL_yval_SHIFT 		16 		
+
+#define MGAREG_YDSTORG 			0x1c94
+#define MGAREG_YTOP 				0x1c98
+#define MGAREG_ZORG 				0x1c0c
+
+#    define ZO_zorgmap_MASK 		0xfffffffe 	/* bit 0 */
+#    define ZO_zorgmap_fb 		0x0 		
+#    define ZO_zorgmap_sys 		0x1 		
+#    define ZO_zorgacc_MASK 		0xfffffffd 	/* bit 1 */
+#    define ZO_zorgacc_pci 		0x0 		
+#    define ZO_zorgacc_agp 		0x2 		
+#    define ZO_zorg_MASK 		0x3 		/* bits 2-31 */
+#    define ZO_zorg_SHIFT 		2 		
+
+
+
+
+/**************** (END) AUTOMATICLY GENERATED REGISTER FILE ******************/
+
+/* Copied from mga_drv.h kernel file.
+ */
+
+#define MGA_ILOAD_ALIGN		64
+#define MGA_ILOAD_MASK		(MGA_ILOAD_ALIGN - 1)
+
+#endif 	/* _MGAREGS_H_ */
+
diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c
new file mode 100644
index 0000000000..8b8fc485d3
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgarender.c
@@ -0,0 +1,170 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Simulate missing primitives with indexed vertices.
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+
+#include "mgacontext.h"
+#include "mgatris.h"
+#include "mgaioctl.h"
+#include "mgavb.h"
+
+#define HAVE_POINTS      0
+#define HAVE_LINES       0
+#define HAVE_LINE_STRIPS 0
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_POLYGONS    0
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+
+#define HAVE_ELTS        0	/* for now */
+
+static void mgaDmaPrimitive( GLcontext *ctx, GLenum prim )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint hwprim;
+
+   switch (prim) {
+   case GL_TRIANGLES:
+      hwprim = MGA_WA_TRIANGLES;
+      break;
+   case GL_TRIANGLE_STRIP:
+      if (mmesa->vertex_size == 8)
+	 hwprim = MGA_WA_TRISTRIP_T0;
+      else
+	 hwprim = MGA_WA_TRISTRIP_T0T1;
+      break;
+   case GL_TRIANGLE_FAN:
+      if (mmesa->vertex_size == 8)
+	 hwprim = MGA_WA_TRIFAN_T0;
+      else
+	 hwprim = MGA_WA_TRIFAN_T0T1;
+      break;
+   default:
+      return;
+   }
+
+   mgaRasterPrimitive( ctx, GL_TRIANGLES, hwprim );
+}
+
+
+#define LOCAL_VARS mgaContextPtr mmesa = MGA_CONTEXT(ctx) 
+#define INIT( prim ) do {			\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);	\
+   FLUSH_BATCH(mmesa);				\
+   mgaDmaPrimitive( ctx, prim );		\
+} while (0)
+#define FLUSH()  FLUSH_BATCH( mmesa )
+#define GET_CURRENT_VB_MAX_VERTS() \
+   0 /* fix me */
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+   MGA_BUFFER_SIZE / (mmesa->vertex_size * 4)
+
+
+#define ALLOC_VERTS( nr ) \
+  mgaAllocDmaLow( mmesa, (nr) * mmesa->vertex_size * 4)
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+   mga_emit_contiguous_verts(ctx, j, (j)+(nr), buf)
+
+ 
+#define TAG(x) mga_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+static GLboolean mga_run_render( GLcontext *ctx,
+				  struct tnl_pipeline_stage *stage )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb; 
+   GLuint i;
+
+   /* Don't handle clipping or indexed vertices or vertex manipulations.
+    */
+   if (mmesa->RenderIndex != 0 || 
+       !mga_validate_render( ctx, VB )) {
+      return GL_TRUE;
+   }
+   
+   tnl->Driver.Render.Start( ctx );
+   mmesa->SetupNewInputs = ~0;      
+
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      mga_render_tab_verts[prim & PRIM_MODE_MASK]( ctx, start, start + length, 
+						   prim);
+   } 
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+const struct tnl_pipeline_stage _mga_render_stage = 
+{ 
+   "mga render",
+   NULL, 
+   NULL,
+   NULL,
+   NULL,
+   mga_run_render		/* run */
+};
diff --git a/src/mesa/drivers/dri/mga/mgaspan.c b/src/mesa/drivers/dri/mga/mgaspan.c
new file mode 100644
index 0000000000..10606c152c
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgaspan.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/mtypes.h"
+#include "mgadd.h"
+#include "mgacontext.h"
+#include "mgaspan.h"
+#include "mgaioctl.h"
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define LOCAL_VARS					\
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);		\
+   __DRIscreen *sPriv = mmesa->driScreen;	\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
+   const __DRIdrawable *dPriv = drb->dPriv;	\
+   GLuint pitch = drb->pitch;				\
+   GLuint height = dPriv->h;				\
+   char *buf = (char *)(sPriv->pFB +			\
+			drb->offset +			\
+			dPriv->x * drb->cpp +		\
+			dPriv->y * pitch);		\
+   GLuint p;						\
+   (void) buf; (void) p
+
+
+
+#define LOCAL_DEPTH_VARS						\
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);				\
+   __DRIscreen *sPriv = mmesa->driScreen;			\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;			\
+   const __DRIdrawable *dPriv = drb->dPriv;			\
+   GLuint pitch = drb->pitch;						\
+   GLuint height = dPriv->h;						\
+   char *buf = (char *)(sPriv->pFB +					\
+			drb->offset +					\
+			dPriv->x * drb->cpp +				\
+			dPriv->y * pitch)
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
+
+#define HW_LOCK()
+
+/* FIXME could/should we use dPriv->numClipRects like the other drivers? */
+#define HW_CLIPLOOP()						\
+  do {								\
+    int _nc = mmesa->numClipRects;				\
+    while (_nc--) {						\
+       int minx = mmesa->pClipRects[_nc].x1 - mmesa->drawX;	\
+       int miny = mmesa->pClipRects[_nc].y1 - mmesa->drawY;	\
+       int maxx = mmesa->pClipRects[_nc].x2 - mmesa->drawX;	\
+       int maxy = mmesa->pClipRects[_nc].y2 - mmesa->drawY;
+
+#define HW_ENDCLIPLOOP()			\
+    }						\
+  } while (0)
+
+#define HW_UNLOCK()
+
+
+
+#define Y_FLIP(_y) (height - _y - 1)
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    mga##x##_565
+#define TAG2(x,y) mga##x##_565##y
+#include "spantmp2.h"
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    mga##x##_8888
+#define TAG2(x,y) mga##x##_8888##y
+#include "spantmp2.h"
+
+
+/* 16 bit depthbuffer functions.
+ */
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH( _x, _y, d )	\
+   *(GLushort *)(buf + (_x)*2 + (_y)*pitch) = d;
+
+#define READ_DEPTH( d, _x, _y )		\
+   d = *(GLushort *)(buf + (_x)*2 + (_y)*pitch);
+
+#define TAG(x) mga##x##_z16
+#include "depthtmp.h"
+
+
+
+
+/* 32 bit depthbuffer functions.
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH( _x, _y, d )	\
+   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = d;
+
+#define READ_DEPTH( d, _x, _y )		\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);
+
+#define TAG(x) mga##x##_z32
+#include "depthtmp.h"
+
+
+
+/* 24/8 bit interleaved depth/stencil functions
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*pitch);	\
+   tmp &= 0xff;						\
+   tmp |= (d) << 8;					\
+   *(GLuint *)(buf + (_x)*4 + (_y)*pitch) = tmp;		\
+}
+
+#define READ_DEPTH( d, _x, _y )	{				\
+   d = (*(GLuint *)(buf + (_x)*4 + (_y)*pitch) & ~0xff) >> 8;	\
+}
+
+#define TAG(x) mga##x##_z24_s8
+#include "depthtmp.h"
+
+#define WRITE_STENCIL( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + _x*4 + _y*pitch);	\
+   tmp &= 0xffffff00;					\
+   tmp |= d & 0xff;					\
+   *(GLuint *)(buf + _x*4 + _y*pitch) = tmp;		\
+}
+
+#define READ_STENCIL( d, _x, _y )		\
+   d = *(GLuint *)(buf + _x*4 + _y*pitch) & 0xff;
+
+#define TAG(x) mga##x##_z24_s8
+#include "stenciltmp.h"
+
+
+static void
+mgaSpanRenderStart( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   FLUSH_BATCH( mmesa );
+   LOCK_HARDWARE_QUIESCENT( mmesa );
+}
+
+static void
+mgaSpanRenderFinish( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( mmesa );
+}
+
+/**
+ * Initialize the driver callbacks for the read / write span functions.
+ *
+ * \bug
+ * To really support RGB888 and RGBA8888 visuals, we need separate read and
+ * write routines for 888 and 8888.  We also need to determine whether or not
+ * the visual has destination alpha.
+ */
+void mgaDDInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart = mgaSpanRenderStart;
+   swdd->SpanRenderFinish = mgaSpanRenderFinish;
+}
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+mgaSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.Format == MESA_FORMAT_RGB565) {
+      mgaInitPointers_565(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_ARGB8888) {
+      mgaInitPointers_8888(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_Z16) {
+      mgaInitDepthPointers_z16(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_Z24_S8) {
+      mgaInitDepthPointers_z24_s8(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_Z32) {
+      mgaInitDepthPointers_z32(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_S8) {
+      mgaInitStencilPointers_z24_s8(&drb->Base);
+   }
+}
diff --git a/src/mesa/drivers/dri/mga/mgaspan.h b/src/mesa/drivers/dri/mga/mgaspan.h
new file mode 100644
index 0000000000..f5e2e49b8a
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgaspan.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef _MGA_SPAN_H
+#define _MGA_SPAN_H
+
+#include "drirenderbuffer.h"
+
+extern void mgaDDInitSpanFuncs( GLcontext *ctx );
+
+extern void
+mgaSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgastate.c b/src/mesa/drivers/dri/mga/mgastate.c
new file mode 100644
index 0000000000..745d5e9852
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgastate.c
@@ -0,0 +1,1197 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/dd.h"
+#include "main/mm.h"
+
+#include "mgacontext.h"
+#include "mgadd.h"
+#include "mgastate.h"
+#include "mgatex.h"
+#include "mgavb.h"
+#include "mgatris.h"
+#include "mgaioctl.h"
+#include "mgaregs.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "xmlpool.h"
+#include "drirenderbuffer.h"
+
+
+static void updateSpecularLighting( GLcontext *ctx );
+
+static const GLuint mgarop_NoBLK[16] = {
+   DC_atype_rpl  | 0x00000000, DC_atype_rstr | 0x00080000,
+   DC_atype_rstr | 0x00040000, DC_atype_rpl  | 0x000c0000,
+   DC_atype_rstr | 0x00020000, DC_atype_rstr | 0x000a0000,
+   DC_atype_rstr | 0x00060000, DC_atype_rstr | 0x000e0000,
+   DC_atype_rstr | 0x00010000, DC_atype_rstr | 0x00090000,
+   DC_atype_rstr | 0x00050000, DC_atype_rstr | 0x000d0000,
+   DC_atype_rpl  | 0x00030000, DC_atype_rstr | 0x000b0000,
+   DC_atype_rstr | 0x00070000, DC_atype_rpl  | 0x000f0000
+};
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void mgaDDAlphaFunc(GLcontext *ctx, GLenum func, GLfloat ref)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLubyte refByte;
+   GLuint  a;
+
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   switch ( func ) {
+   case GL_NEVER:
+      a = AC_atmode_alt;
+      refByte = 0;
+      break;
+   case GL_LESS:
+      a = AC_atmode_alt;
+      break;
+   case GL_GEQUAL:
+      a = AC_atmode_agte;
+      break;
+   case GL_LEQUAL:
+      a = AC_atmode_alte;
+      break;
+   case GL_GREATER:
+      a = AC_atmode_agt;
+      break;
+   case GL_NOTEQUAL:
+      a = AC_atmode_ane;
+      break;
+   case GL_EQUAL:
+      a = AC_atmode_ae;
+      break;
+   case GL_ALWAYS:
+      a = AC_atmode_noacmp;
+      break;
+   default:
+      a = 0;
+      break;
+   }
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.alpha_func = a | MGA_FIELD( AC_atref, refByte );
+}
+
+static void updateBlendLogicOp(GLcontext *ctx)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLboolean logicOp = RGBA_LOGICOP_ENABLED(ctx);
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+
+   mmesa->hw.blend_func_enable =
+      (ctx->Color.BlendEnabled && !logicOp) ? ~0 : 0;
+
+   FALLBACK( ctx, MGA_FALLBACK_BLEND,
+             ctx->Color.BlendEnabled && !logicOp &&
+             mmesa->hw.blend_func == (AC_src_src_alpha_sat | AC_dst_zero) );
+}
+
+static void mgaDDBlendEquationSeparate(GLcontext *ctx, 
+				       GLenum modeRGB, GLenum modeA)
+{
+   assert( modeRGB == modeA );
+   updateBlendLogicOp( ctx );
+}
+
+static void mgaDDBlendFuncSeparate( GLcontext *ctx, GLenum sfactorRGB,
+				    GLenum dfactorRGB, GLenum sfactorA,
+				    GLenum dfactorA )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint   src;
+   GLuint   dst;
+
+   switch (ctx->Color.BlendSrcRGB) {
+   case GL_ZERO:
+      src = AC_src_zero; break;
+   case GL_SRC_ALPHA:
+      src = AC_src_src_alpha; break;
+   case GL_ONE:
+   default:		/* never happens */
+      src = AC_src_one; break;
+   case GL_DST_COLOR:
+      src = AC_src_dst_color; break;
+   case GL_ONE_MINUS_DST_COLOR:
+      src = AC_src_om_dst_color; break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      src = AC_src_om_src_alpha; break;
+   case GL_DST_ALPHA:
+      src = (ctx->Visual.alphaBits > 0)
+	  ? AC_src_dst_alpha : AC_src_one;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      src = (ctx->Visual.alphaBits > 0)
+	  ? AC_src_om_dst_alpha : AC_src_zero;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      src = (ctx->Visual.alphaBits > 0)
+	  ? AC_src_src_alpha_sat : AC_src_zero;
+      break;
+   }
+
+   switch (ctx->Color.BlendDstRGB) {
+   case GL_SRC_ALPHA:
+      dst = AC_dst_src_alpha; break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      dst = AC_dst_om_src_alpha; break;
+   default:		/* never happens */
+   case GL_ZERO:
+      dst = AC_dst_zero; break;
+   case GL_ONE:
+      dst = AC_dst_one; break;
+   case GL_SRC_COLOR:
+      dst = AC_dst_src_color; break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      dst = AC_dst_om_src_color; break;
+   case GL_DST_ALPHA:
+      dst = (ctx->Visual.alphaBits > 0)
+	  ? AC_dst_dst_alpha : AC_dst_one;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      dst = (ctx->Visual.alphaBits > 0)
+	  ? AC_dst_om_dst_alpha : AC_dst_zero;
+      break;
+   }
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.blend_func = (src | dst);
+
+   FALLBACK( ctx, MGA_FALLBACK_BLEND,
+             ctx->Color.BlendEnabled && !RGBA_LOGICOP_ENABLED(ctx) &&
+             mmesa->hw.blend_func == (AC_src_src_alpha_sat | AC_dst_zero) );
+}
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void mgaDDDepthFunc(GLcontext *ctx, GLenum func)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   int zmode;
+
+   switch (func) {
+   case GL_NEVER:
+      /* can't do this in h/w, we'll use a s/w fallback */
+      FALLBACK (ctx, MGA_FALLBACK_DEPTH, ctx->Depth.Test);
+
+      /* FALLTHROUGH */
+   case GL_ALWAYS:
+      zmode = DC_zmode_nozcmp; break;
+   case GL_LESS:
+      zmode = DC_zmode_zlt; break;
+   case GL_LEQUAL:
+      zmode = DC_zmode_zlte; break;
+   case GL_EQUAL:
+      zmode = DC_zmode_ze; break;
+   case GL_GREATER:
+      zmode = DC_zmode_zgt; break;
+   case GL_GEQUAL:
+      zmode = DC_zmode_zgte; break;
+   case GL_NOTEQUAL:
+      zmode = DC_zmode_zne; break;
+   default:
+      zmode = 0; break;
+   }
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.zmode &= DC_zmode_MASK;
+   mmesa->hw.zmode |= zmode;
+}
+
+static void mgaDDDepthMask(GLcontext *ctx, GLboolean flag)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.zmode &= DC_atype_MASK;
+   mmesa->hw.zmode |= (flag) ? DC_atype_zi : DC_atype_i;
+}
+
+
+static void mgaDDClearDepth(GLcontext *ctx, GLclampd d)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   /* Select the Z depth.  The ~ is used because the _MASK values in the
+    * MGA driver are used to mask OFF the selected bits.  In this case,
+    * we want to mask off everything except the MA_zwidth bits.
+    */
+   switch (mmesa->setup.maccess & ~MA_zwidth_MASK) {
+   case MA_zwidth_16: mmesa->ClearDepth = d * 0x0000ffff; break;
+   case MA_zwidth_24: mmesa->ClearDepth = d * 0xffffff00; break;
+   case MA_zwidth_32: mmesa->ClearDepth = d * 0xffffffff; break;
+   default: return;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+
+static void mgaDDFogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   if (pname == GL_FOG_COLOR) {
+      GLuint color = PACK_COLOR_888((GLubyte)(ctx->Fog.Color[0]*255.0F), 
+				    (GLubyte)(ctx->Fog.Color[1]*255.0F), 
+				    (GLubyte)(ctx->Fog.Color[2]*255.0F));
+
+      MGA_STATECHANGE(mmesa, MGA_UPLOAD_CONTEXT);   
+      mmesa->setup.fogcolor = color;
+   }
+}
+
+
+/* =============================================================
+ * Scissoring
+ */
+
+
+void mgaUpdateClipping(const GLcontext *ctx)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   if (mmesa->driDrawable)
+   {
+      int x1 = mmesa->driDrawable->x + ctx->Scissor.X;
+      int y1 = mmesa->driDrawable->y + mmesa->driDrawable->h
+	 - (ctx->Scissor.Y + ctx->Scissor.Height);
+      int x2 = x1 + ctx->Scissor.Width;
+      int y2 = y1 + ctx->Scissor.Height;
+
+      if (x1 < 0) x1 = 0;
+      if (y1 < 0) y1 = 0;
+      if (x2 < 0) x2 = 0;
+      if (y2 < 0) y2 = 0;
+
+      mmesa->scissor_rect.x1 = x1;
+      mmesa->scissor_rect.y1 = y1;
+      mmesa->scissor_rect.x2 = x2;
+      mmesa->scissor_rect.y2 = y2;
+
+      mmesa->dirty |= MGA_UPLOAD_CLIPRECTS;
+   }
+}
+
+
+static void mgaDDScissor( GLcontext *ctx, GLint x, GLint y,
+			  GLsizei w, GLsizei h )
+{
+   if ( ctx->Scissor.Enabled ) {
+      FLUSH_BATCH( MGA_CONTEXT(ctx) );	/* don't pipeline cliprect changes */
+      mgaUpdateClipping( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+
+#define _CULL_DISABLE 0
+#define _CULL_NEGATIVE ((1<<11)|(1<<5)|(1<<16))
+#define _CULL_POSITIVE (1<<11)
+
+static void mgaDDCullFaceFrontFace(GLcontext *ctx, GLenum unused)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   if (ctx->Polygon.CullFlag && 
+       ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK) 
+   {
+      mmesa->hw.cull = _CULL_NEGATIVE;
+
+      if (ctx->Polygon.CullFaceMode == GL_FRONT)
+	 mmesa->hw.cull ^= (_CULL_POSITIVE ^ _CULL_NEGATIVE);
+
+      if (ctx->Polygon.FrontFace != GL_CCW)
+	 mmesa->hw.cull ^= (_CULL_POSITIVE ^ _CULL_NEGATIVE);
+
+      mmesa->hw.cull_dualtex = mmesa->hw.cull ^
+	  (_CULL_POSITIVE ^ _CULL_NEGATIVE); /* warp bug? */
+   }
+   else {
+      mmesa->hw.cull = _CULL_DISABLE;
+      mmesa->hw.cull_dualtex = _CULL_DISABLE;
+   }
+}
+
+
+/* =============================================================
+ * Masks
+ */
+
+static void mgaDDColorMask(GLcontext *ctx, 
+			   GLboolean r, GLboolean g, 
+			   GLboolean b, GLboolean a )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   mgaScreenPrivate *mgaScreen = mmesa->mgaScreen;
+   GLuint mask = mgaPackColor(mgaScreen->cpp,
+			      ctx->Color.ColorMask[0][RCOMP],
+			      ctx->Color.ColorMask[0][GCOMP],
+			      ctx->Color.ColorMask[0][BCOMP],
+			      ctx->Color.ColorMask[0][ACOMP]);
+
+   if (mgaScreen->cpp == 2)
+      mask = mask | (mask << 16);
+
+   if (mmesa->setup.plnwt != mask) {
+      MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+      mmesa->setup.plnwt = mask;      
+   }
+}
+
+
+/* =============================================================
+ * Polygon state
+ */
+
+static int mgaStipples[16] = {
+   0xffff,
+   0xa5a5,
+   0x5a5a,
+   0xa0a0,
+   0x5050,
+   0x0a0a,
+   0x0505,
+   0x8020,
+   0x0401,
+   0x1040,
+   0x0208,
+   0x0802,
+   0x4010,
+   0x0104,
+   0x2080,
+   0x0000
+};
+
+/**
+ * The MGA supports a subset of possible 4x4 stipples natively, GL
+ * wants 32x32.  Fortunately stipple is usually a repeating pattern.
+ *
+ * \param ctx GL rendering context to be affected
+ * \param mask Pointer to the 32x32 stipple mask
+ */
+
+static void mgaDDPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   const GLubyte *m = mask;
+   GLubyte p[4];
+   int i,j,k;
+   int active = (ctx->Polygon.StippleFlag && 
+		 mmesa->raster_primitive == GL_TRIANGLES);
+   GLuint stipple;
+
+   FLUSH_BATCH(mmesa);
+   mmesa->haveHwStipple = 0;
+
+   if (active) {
+      mmesa->dirty |= MGA_UPLOAD_CONTEXT;
+      mmesa->setup.dwgctl &= ~(0xf<<20);
+   }
+
+   p[0] = mask[0] & 0xf; p[0] |= p[0] << 4;
+   p[1] = mask[4] & 0xf; p[1] |= p[1] << 4;
+   p[2] = mask[8] & 0xf; p[2] |= p[2] << 4;
+   p[3] = mask[12] & 0xf; p[3] |= p[3] << 4;
+
+   for (k = 0 ; k < 8 ; k++)
+      for (j = 0 ; j < 4; j++)
+	 for (i = 0 ; i < 4 ; i++)
+	    if (*m++ != p[j]) {
+	       return;
+	    }
+
+   stipple = ( ((p[0] & 0xf) << 0) |
+	       ((p[1] & 0xf) << 4) |
+	       ((p[2] & 0xf) << 8) |
+	       ((p[3] & 0xf) << 12) );
+
+   for (i = 0 ; i < 16 ; i++)
+      if (mgaStipples[i] == stipple) {
+	 mmesa->poly_stipple = i<<20;
+	 mmesa->haveHwStipple = 1;
+	 break;
+      }
+   
+   if (active) {
+      mmesa->setup.dwgctl &= ~(0xf<<20);
+      mmesa->setup.dwgctl |= mmesa->poly_stipple;
+   }
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+static void updateSpecularLighting( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   unsigned int specen;
+
+   specen = NEED_SECONDARY_COLOR(ctx) ? TMC_specen_enable : 0;
+
+   if ( specen != mmesa->hw.specen ) {
+      mmesa->hw.specen = specen;
+      mmesa->dirty |= MGA_UPLOAD_TEX0 | MGA_UPLOAD_TEX1;
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+static void mgaDDLightModelfv(GLcontext *ctx, GLenum pname,
+			      const GLfloat *param)
+{
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      FLUSH_BATCH( MGA_CONTEXT(ctx) );
+      updateSpecularLighting( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Stencil
+ */
+
+
+static void
+mgaDDStencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func, GLint ref,
+                         GLuint mask)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint  stencil;
+   GLuint  stencilctl;
+
+   stencil = MGA_FIELD( S_sref, ref ) | MGA_FIELD( S_smsk, mask );
+   switch (func)
+   {
+   case GL_NEVER:
+      stencilctl = SC_smode_snever;
+      break;
+   case GL_LESS:
+      stencilctl = SC_smode_slt;
+      break;
+   case GL_LEQUAL:
+      stencilctl = SC_smode_slte;
+      break;
+   case GL_GREATER:
+      stencilctl = SC_smode_sgt;
+      break;
+   case GL_GEQUAL:
+      stencilctl = SC_smode_sgte;
+      break;
+   case GL_NOTEQUAL:
+      stencilctl = SC_smode_sne;
+      break;
+   case GL_EQUAL:
+      stencilctl = SC_smode_se;
+      break;
+   case GL_ALWAYS:
+   default:
+      stencilctl = SC_smode_salways;
+      break;
+   }
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.stencil &= (S_sref_MASK & S_smsk_MASK);
+   mmesa->hw.stencil |= stencil;
+   mmesa->hw.stencilctl &= SC_smode_MASK;
+   mmesa->hw.stencilctl |= stencilctl;
+}
+
+static void
+mgaDDStencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.stencil &= S_swtmsk_MASK;
+   mmesa->hw.stencil |= MGA_FIELD( S_swtmsk, mask );
+}
+
+static void
+mgaDDStencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail,
+                       GLenum zpass)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint  stencilctl;
+
+   stencilctl = 0;
+   switch (ctx->Stencil.FailFunc[0])
+   {
+   case GL_KEEP:
+      stencilctl |= SC_sfailop_keep;
+      break;
+   case GL_ZERO:
+      stencilctl |= SC_sfailop_zero;
+      break;
+   case GL_REPLACE:
+      stencilctl |= SC_sfailop_replace;
+      break;
+   case GL_INCR:
+      stencilctl |= SC_sfailop_incrsat;
+      break;
+   case GL_DECR:
+      stencilctl |= SC_sfailop_decrsat;
+      break;
+   case GL_INCR_WRAP:
+      stencilctl |= SC_sfailop_incr;
+      break;
+   case GL_DECR_WRAP:
+      stencilctl |= SC_sfailop_decr;
+      break;
+   case GL_INVERT:
+      stencilctl |= SC_sfailop_invert;
+      break;
+   default:
+      break;
+   }
+
+   switch (ctx->Stencil.ZFailFunc[0])
+   {
+   case GL_KEEP:
+      stencilctl |= SC_szfailop_keep;
+      break;
+   case GL_ZERO:
+      stencilctl |= SC_szfailop_zero;
+      break;
+   case GL_REPLACE:
+      stencilctl |= SC_szfailop_replace;
+      break;
+   case GL_INCR:
+      stencilctl |= SC_szfailop_incrsat;
+      break;
+   case GL_DECR:
+      stencilctl |= SC_szfailop_decrsat;
+      break;
+   case GL_INCR_WRAP:
+      stencilctl |= SC_szfailop_incr;
+      break;
+   case GL_DECR_WRAP:
+      stencilctl |= SC_szfailop_decr;
+      break;
+   case GL_INVERT:
+      stencilctl |= SC_szfailop_invert;
+      break;
+   default:
+      break;
+   }
+
+   switch (ctx->Stencil.ZPassFunc[0])
+   {
+   case GL_KEEP:
+      stencilctl |= SC_szpassop_keep;
+      break;
+   case GL_ZERO:
+      stencilctl |= SC_szpassop_zero;
+      break;
+   case GL_REPLACE:
+      stencilctl |= SC_szpassop_replace;
+      break;
+   case GL_INCR:
+      stencilctl |= SC_szpassop_incrsat;
+      break;
+   case GL_DECR:
+      stencilctl |= SC_szpassop_decrsat;
+      break;
+   case GL_INCR_WRAP:
+      stencilctl |= SC_szpassop_incr;
+      break;
+   case GL_DECR_WRAP:
+      stencilctl |= SC_szpassop_decr;
+      break;
+   case GL_INVERT:
+      stencilctl |= SC_szpassop_invert;
+      break;
+   default:
+      break;
+   }
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.stencilctl &= (SC_sfailop_MASK & SC_szfailop_MASK 
+			    & SC_szpassop_MASK);
+   mmesa->hw.stencilctl |= stencilctl;
+}
+
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+void mgaCalcViewport( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = mmesa->hw_viewport;
+
+   /* See also mga_translate_vertex.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + mmesa->drawX + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + mmesa->driDrawable->h + mmesa->drawY + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * mmesa->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * mmesa->depth_scale;
+
+   mmesa->SetupNewInputs = ~0;
+}
+
+static void mgaViewport( GLcontext *ctx, 
+			  GLint x, GLint y, 
+			  GLsizei width, GLsizei height )
+{
+   mgaCalcViewport( ctx );
+}
+
+static void mgaDepthRange( GLcontext *ctx, 
+			    GLclampd nearval, GLclampd farval )
+{
+   mgaCalcViewport( ctx );
+}
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void mgaDDClearColor(GLcontext *ctx, 
+			    const GLfloat color[4] )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+   mmesa->ClearColor = mgaPackColor( mmesa->mgaScreen->cpp,
+				     c[0], c[1], c[2], c[3]);
+}
+
+
+/* Fallback to swrast for select and feedback.
+ */
+static void mgaRenderMode( GLcontext *ctx, GLenum mode )
+{
+   FALLBACK( ctx, MGA_FALLBACK_RENDERMODE, (mode != GL_RENDER) );
+}
+
+
+static void mgaDDLogicOp( GLcontext *ctx, GLenum opcode )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+
+   MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+   mmesa->hw.rop = mgarop_NoBLK[ opcode & 0x0f ];
+}
+
+
+static void mga_set_cliprects(mgaContextPtr mmesa)
+{
+   __DRIdrawable *driDrawable = mmesa->driDrawable;
+
+   if ((mmesa->draw_buffer != MGA_FRONT)
+       || (driDrawable->numBackClipRects == 0)) {
+      if (driDrawable->numClipRects == 0) {
+	  static drm_clip_rect_t zeroareacliprect = {0,0,0,0};
+	  mmesa->numClipRects = 1;
+	  mmesa->pClipRects = &zeroareacliprect;
+      } else {
+	  mmesa->numClipRects = driDrawable->numClipRects;
+	  mmesa->pClipRects = driDrawable->pClipRects;
+      }
+      mmesa->drawX = driDrawable->x;
+      mmesa->drawY = driDrawable->y;
+   } else {
+      mmesa->numClipRects = driDrawable->numBackClipRects;
+      mmesa->pClipRects = driDrawable->pBackClipRects;
+      mmesa->drawX = driDrawable->backX;
+      mmesa->drawY = driDrawable->backY;
+   }
+
+   mmesa->setup.dstorg = mmesa->drawOffset;
+   mmesa->dirty |= MGA_UPLOAD_CONTEXT | MGA_UPLOAD_CLIPRECTS;
+}
+
+
+void mgaUpdateRects( mgaContextPtr mmesa, GLuint buffers )
+{
+   __DRIdrawable *const driDrawable = mmesa->driDrawable;
+   __DRIdrawable *const driReadable = mmesa->driReadable;
+
+   mmesa->dirty_cliprects = 0;	
+
+   driUpdateFramebufferSize(mmesa->glCtx, driDrawable);
+   if (driDrawable != driReadable) {
+      driUpdateFramebufferSize(mmesa->glCtx, driReadable);
+   }
+
+   mga_set_cliprects(mmesa);
+
+   mgaUpdateClipping( mmesa->glCtx );
+   mgaCalcViewport( mmesa->glCtx );
+}
+
+
+static void mgaDDDrawBuffer(GLcontext *ctx, GLenum mode )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   FLUSH_BATCH( mmesa );
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( ctx, MGA_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+      mmesa->setup.dstorg = mmesa->mgaScreen->frontOffset;
+      mmesa->draw_buffer = MGA_FRONT;
+      break;
+   case BUFFER_BACK_LEFT:
+      mmesa->setup.dstorg = mmesa->mgaScreen->backOffset;
+      mmesa->draw_buffer = MGA_BACK;
+      break;
+   default:
+      FALLBACK( ctx, MGA_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   mmesa->dirty |= MGA_UPLOAD_CONTEXT;
+   mga_set_cliprects(mmesa);
+   FALLBACK(ctx, MGA_FALLBACK_DRAW_BUFFER, GL_FALSE);
+}
+
+
+static void mgaDDReadBuffer(GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+
+/* =============================================================
+ * State enable/disable
+ */
+
+
+static void mgaDDEnable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+
+   switch(cap) {
+   case GL_DITHER:
+      MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+      if (!ctx->Color.DitherFlag)
+	 mmesa->setup.maccess |= MA_nodither_enable;
+      else
+	 mmesa->setup.maccess &= ~MA_nodither_enable;
+      break;
+   case GL_LIGHTING:
+   case GL_COLOR_SUM_EXT:
+      FLUSH_BATCH( mmesa );
+      updateSpecularLighting( ctx );
+      break;
+   case GL_ALPHA_TEST:
+      MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+      mmesa->hw.alpha_func_enable = (state) ? ~0 : 0;
+      break;
+   case GL_DEPTH_TEST:
+      MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+      FALLBACK (ctx, MGA_FALLBACK_DEPTH,
+		ctx->Depth.Func == GL_NEVER && ctx->Depth.Test);
+      break;
+
+   case GL_SCISSOR_TEST:
+      FLUSH_BATCH( mmesa );
+      mmesa->scissor = state;
+      mgaUpdateClipping( ctx );
+      break;
+
+   case GL_FOG:
+      MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+      if (ctx->Fog.Enabled) 
+	 mmesa->setup.maccess |= MA_fogen_enable;
+      else
+	 mmesa->setup.maccess &= ~MA_fogen_enable;
+      break;
+   case GL_CULL_FACE:
+      mgaDDCullFaceFrontFace( ctx, 0 );
+      break;
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+   case GL_POLYGON_STIPPLE:
+      if (mmesa->haveHwStipple && mmesa->raster_primitive == GL_TRIANGLES) {
+	 MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+	 mmesa->setup.dwgctl &= ~(0xf<<20);
+	 if (state)
+	    mmesa->setup.dwgctl |= mmesa->poly_stipple;
+      }
+      break;
+
+   case GL_BLEND:
+   case GL_COLOR_LOGIC_OP:
+      updateBlendLogicOp( ctx );
+      break;
+
+   case GL_STENCIL_TEST:
+      MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT );
+      if (mmesa->hw_stencil) {
+	 mmesa->hw.stencil_enable = ( state ) ? ~0 : 0;
+      }
+      else {
+	 FALLBACK( ctx, MGA_FALLBACK_STENCIL, state );
+      }
+   default:
+      break;
+   }
+}
+
+
+/* =============================================================
+ */
+
+static void mgaDDPrintDirty( const char *msg, GLuint state )
+{
+   fprintf(stderr, "%s (0x%03x): %s%s%s%s%s%s%s\n",
+	   msg,
+	   (unsigned int) state,
+	   (state & MGA_WAIT_AGE)          ? "wait-age " : "",
+	   (state & MGA_UPLOAD_TEX0IMAGE)  ? "upload-tex0-img " : "",
+	   (state & MGA_UPLOAD_TEX1IMAGE)  ? "upload-tex1-img " : "",
+	   (state & MGA_UPLOAD_CONTEXT)    ? "upload-ctx " : "",
+	   (state & MGA_UPLOAD_TEX0)       ? "upload-tex0 " : "",
+	   (state & MGA_UPLOAD_TEX1)       ? "upload-tex1 " : "",
+	   (state & MGA_UPLOAD_PIPE)       ? "upload-pipe " : ""
+      );
+}
+
+/* Push the state into the sarea and/or texture memory.
+ */
+void mgaEmitHwStateLocked( mgaContextPtr mmesa )
+{
+   drm_mga_sarea_t *sarea = mmesa->sarea;
+   GLcontext * ctx = mmesa->glCtx;
+
+   if (MGA_DEBUG & DEBUG_VERBOSE_MSG)
+      mgaDDPrintDirty( __FUNCTION__, mmesa->dirty );
+
+   if (mmesa->dirty & MGA_UPLOAD_CONTEXT) {
+      mmesa->setup.wflag = _CULL_DISABLE;
+      if (mmesa->raster_primitive == GL_TRIANGLES) {
+	 if ((ctx->Texture.Unit[0]._ReallyEnabled == TEXTURE_2D_BIT &&
+	      ctx->Texture.Unit[1]._ReallyEnabled == TEXTURE_2D_BIT)) {
+	    mmesa->setup.wflag = mmesa->hw.cull_dualtex;
+	 }
+	 else {
+	    mmesa->setup.wflag = mmesa->hw.cull;
+	 }
+      }
+
+      mmesa->setup.stencil = mmesa->hw.stencil 
+	  & mmesa->hw.stencil_enable;
+      mmesa->setup.stencilctl = mmesa->hw.stencilctl
+	  & mmesa->hw.stencil_enable;
+
+      /* If depth testing is not enabled, then use the no Z-compare / no
+       * Z-write mode.  Otherwise, use whatever is set in hw.zmode.
+       */
+      mmesa->setup.dwgctl &= (DC_zmode_MASK & DC_atype_MASK);
+      mmesa->setup.dwgctl |= (ctx->Depth.Test)
+	  ? mmesa->hw.zmode : (DC_zmode_nozcmp | DC_atype_i);
+
+      mmesa->setup.dwgctl &= DC_bop_MASK;
+      mmesa->setup.dwgctl |= RGBA_LOGICOP_ENABLED(ctx)
+	  ? mmesa->hw.rop : mgarop_NoBLK[ GL_COPY & 0x0f ];
+
+      mmesa->setup.alphactrl &= AC_src_MASK & AC_dst_MASK & AC_atmode_MASK
+	 & AC_atref_MASK & AC_alphasel_MASK;
+      mmesa->setup.alphactrl |= 
+	 (mmesa->hw.alpha_func & mmesa->hw.alpha_func_enable) |
+	 (mmesa->hw.blend_func & mmesa->hw.blend_func_enable) |
+	 ((AC_src_one | AC_dst_zero) & ~mmesa->hw.blend_func_enable) |
+	 mmesa->hw.alpha_sel;
+
+      memcpy( &sarea->context_state, &mmesa->setup, sizeof(mmesa->setup));
+   }
+
+   if ((mmesa->dirty & MGA_UPLOAD_TEX0) && mmesa->CurrentTexObj[0]) {
+      memcpy(&sarea->tex_state[0],
+	     &mmesa->CurrentTexObj[0]->setup,
+	     sizeof(sarea->tex_state[0]));
+   }
+
+   if ((mmesa->dirty & MGA_UPLOAD_TEX1) && mmesa->CurrentTexObj[1]) {
+      memcpy(&sarea->tex_state[1],
+	     &mmesa->CurrentTexObj[1]->setup,
+	     sizeof(sarea->tex_state[1]));
+   }
+
+   if (mmesa->dirty & (MGA_UPLOAD_TEX0 | MGA_UPLOAD_TEX1)) {
+      sarea->tex_state[0].texctl2 &= ~TMC_specen_enable;
+      sarea->tex_state[1].texctl2 &= ~TMC_specen_enable;
+      sarea->tex_state[0].texctl2 |= mmesa->hw.specen;
+      sarea->tex_state[1].texctl2 |= mmesa->hw.specen;
+   }
+
+   if (mmesa->dirty & MGA_UPLOAD_PIPE) {
+/*        mmesa->sarea->wacceptseq = mmesa->hw_primitive; */
+      mmesa->sarea->warp_pipe = mmesa->vertex_format;
+      mmesa->sarea->vertsize = mmesa->vertex_size;
+   }
+
+   mmesa->sarea->dirty |= mmesa->dirty;
+   mmesa->dirty &= MGA_UPLOAD_CLIPRECTS;
+}
+
+/* =============================================================
+ */
+
+
+static void mgaDDValidateState( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+
+   FLUSH_BATCH( mmesa );
+
+   if (mmesa->NewGLState & _NEW_TEXTURE) {
+      mgaUpdateTextureState(ctx);
+   }
+
+   if (!mmesa->Fallback) {
+      if (mmesa->NewGLState & _MGA_NEW_RASTERSETUP) {
+         mgaChooseVertexState( ctx );
+      }
+
+      if (mmesa->NewGLState & _MGA_NEW_RENDERSTATE) {
+         mgaChooseRenderState( ctx );
+      }
+   }
+
+   mmesa->NewGLState = 0;
+}
+
+
+static void mgaDDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   MGA_CONTEXT(ctx)->NewGLState |= new_state;
+}
+
+
+static void mgaRunPipeline( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   if (mmesa->NewGLState) {
+      mgaDDValidateState( ctx );
+   }
+
+   if (mmesa->dirty) {
+       mgaEmitHwStateLocked( mmesa );
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+
+void mgaInitState( mgaContextPtr mmesa )
+{
+   mgaScreenPrivate *mgaScreen = mmesa->mgaScreen;
+   GLcontext *ctx = mmesa->glCtx;
+
+   if (ctx->Visual.doubleBufferMode) {
+      /* use back buffer by default */
+      mmesa->draw_buffer = MGA_BACK;
+      mmesa->drawOffset = mmesa->mgaScreen->backOffset;
+      mmesa->readOffset = mmesa->mgaScreen->backOffset;
+      mmesa->setup.dstorg = mgaScreen->backOffset;
+   } else {
+      /* use front buffer by default */
+      mmesa->draw_buffer = MGA_FRONT;
+      mmesa->drawOffset = mmesa->mgaScreen->frontOffset;
+      mmesa->readOffset = mmesa->mgaScreen->frontOffset;
+      mmesa->setup.dstorg = mgaScreen->frontOffset;
+   }
+
+   mmesa->setup.maccess = (MA_memreset_disable |
+			   MA_fogen_disable |
+			   MA_tlutload_disable |
+			   MA_nodither_disable |
+			   MA_dit555_disable);
+   if (driQueryOptioni (&mmesa->optionCache, "color_reduction") !=
+       DRI_CONF_COLOR_REDUCTION_DITHER)
+      mmesa->setup.maccess |= MA_nodither_enable;
+
+   switch (mmesa->mgaScreen->cpp) {
+   case 2:
+      mmesa->setup.maccess |= MA_pwidth_16;
+      break;
+   case 4:
+      mmesa->setup.maccess |= MA_pwidth_32;
+      break;
+   default:
+      fprintf( stderr, "Error: unknown cpp %d, exiting...\n",
+	       mmesa->mgaScreen->cpp );
+      exit( 1 );
+   }
+
+   switch (mmesa->glCtx->Visual.depthBits) {
+   case 16:
+      mmesa->setup.maccess |= MA_zwidth_16;
+      break;
+   case 24:
+      mmesa->setup.maccess |= MA_zwidth_24;
+      break;
+   case 32:
+      mmesa->setup.maccess |= MA_zwidth_32;
+      break;
+   }
+
+   mmesa->hw.blend_func = AC_src_one | AC_dst_zero;
+   mmesa->hw.blend_func_enable = 0;
+   mmesa->hw.alpha_func = AC_atmode_noacmp | MGA_FIELD( AC_atref, 0x00 );
+   mmesa->hw.alpha_func_enable = 0;
+   mmesa->hw.rop = mgarop_NoBLK[ GL_COPY & 0x0f ];
+   mmesa->hw.zmode = DC_zmode_zlt | DC_atype_zi;
+   mmesa->hw.stencil = MGA_FIELD( S_sref, 0x00) | MGA_FIELD( S_smsk, 0xff ) |
+      MGA_FIELD( S_swtmsk, 0xff );
+   mmesa->hw.stencilctl = SC_smode_salways | SC_sfailop_keep 
+      | SC_szfailop_keep | SC_szpassop_keep;
+   mmesa->hw.stencil_enable = 0;
+   mmesa->hw.cull = _CULL_DISABLE;
+   mmesa->hw.cull_dualtex = _CULL_DISABLE;
+   mmesa->hw.specen = 0;
+   mmesa->hw.alpha_sel = AC_alphasel_diffused;
+
+   mmesa->setup.dwgctl = (DC_opcod_trap |
+			  DC_linear_xy |
+			  DC_solid_disable |
+			  DC_arzero_disable |
+			  DC_sgnzero_disable |
+			  DC_shftzero_enable |
+			  MGA_FIELD( DC_bop, 0xC ) |
+			  MGA_FIELD( DC_trans, 0x0 ) |
+			  DC_bltmod_bmonolef |
+			  DC_pattern_disable |
+			  DC_transc_disable |
+			  DC_clipdis_disable);
+
+   mmesa->setup.plnwt = ~0;
+   mmesa->setup.alphactrl = (AC_amode_alpha_channel |
+			     AC_astipple_disable |
+			     AC_aten_disable);
+
+   mmesa->setup.fogcolor = PACK_COLOR_888((GLubyte)(ctx->Fog.Color[0]*255.0F),
+					  (GLubyte)(ctx->Fog.Color[1]*255.0F),
+					  (GLubyte)(ctx->Fog.Color[2]*255.0F));
+
+   mmesa->setup.wflag = 0;
+   mmesa->setup.tdualstage0 = 0;
+   mmesa->setup.tdualstage1 = 0;
+   mmesa->setup.fcol = 0;
+   mmesa->dirty |= MGA_UPLOAD_CONTEXT;
+
+   mmesa->envcolor[0] = 0;
+   mmesa->envcolor[1] = 0;
+}
+
+
+void mgaDDInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState = mgaDDInvalidateState;
+   ctx->Driver.Enable = mgaDDEnable;
+   ctx->Driver.LightModelfv = mgaDDLightModelfv;
+   ctx->Driver.AlphaFunc = mgaDDAlphaFunc;
+   ctx->Driver.BlendEquationSeparate = mgaDDBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate = mgaDDBlendFuncSeparate;
+   ctx->Driver.DepthFunc = mgaDDDepthFunc;
+   ctx->Driver.DepthMask = mgaDDDepthMask;
+   ctx->Driver.Fogfv = mgaDDFogfv;
+   ctx->Driver.Scissor = mgaDDScissor;
+   ctx->Driver.CullFace = mgaDDCullFaceFrontFace;
+   ctx->Driver.FrontFace = mgaDDCullFaceFrontFace;
+   ctx->Driver.ColorMask = mgaDDColorMask;
+
+   ctx->Driver.DrawBuffer = mgaDDDrawBuffer;
+   ctx->Driver.ReadBuffer = mgaDDReadBuffer;
+   ctx->Driver.ClearColor = mgaDDClearColor;
+   ctx->Driver.ClearDepth = mgaDDClearDepth;
+   ctx->Driver.LogicOpcode = mgaDDLogicOp;
+
+   ctx->Driver.PolygonStipple = mgaDDPolygonStipple;
+
+   ctx->Driver.StencilFuncSeparate = mgaDDStencilFuncSeparate;
+   ctx->Driver.StencilMaskSeparate = mgaDDStencilMaskSeparate;
+   ctx->Driver.StencilOpSeparate = mgaDDStencilOpSeparate;
+
+   ctx->Driver.DepthRange = mgaDepthRange;
+   ctx->Driver.Viewport = mgaViewport;
+   ctx->Driver.RenderMode = mgaRenderMode;
+
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = mgaRunPipeline;
+}
diff --git a/src/mesa/drivers/dri/mga/mgastate.h b/src/mesa/drivers/dri/mga/mgastate.h
new file mode 100644
index 0000000000..ec65d4e6cd
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgastate.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef _MGA_STATE_H
+#define _MGA_STATE_H
+
+extern void mgaInitState( mgaContextPtr mmesa );
+extern void mgaDDInitStateFuncs(GLcontext *ctx);
+extern void mgaUpdateClipping(const GLcontext *ctx);
+extern void mgaUpdateCull( GLcontext *ctx );
+extern void mgaCalcViewport( GLcontext *ctx );
+extern void mgaUpdateRects( mgaContextPtr mmesa, GLuint buffers );
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgatex.c b/src/mesa/drivers/dri/mga/mgatex.c
new file mode 100644
index 0000000000..ca3dd4b013
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgatex.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mm.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+
+#include "mgacontext.h"
+#include "mgatex.h"
+#include "mgaregs.h"
+#include "mgaioctl.h"
+
+#include "xmlpool.h"
+
+/**
+ * Set the texture wrap modes.
+ * Currently \c GL_REPEAT, \c GL_CLAMP and \c GL_CLAMP_TO_EDGE are supported.
+ * 
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+
+static void 
+mgaSetTexWrapping( mgaTextureObjectPtr t, GLenum swrap, GLenum twrap )
+{
+   GLboolean  is_clamp = GL_FALSE;
+   GLboolean  is_clamp_to_edge = GL_FALSE;
+
+   t->setup.texctl &= (TMC_clampu_MASK & TMC_clampv_MASK);
+   t->setup.texctl2 &= (TMC_borderen_MASK);
+
+   switch( swrap ) {
+   case GL_REPEAT:
+      break;
+   case GL_CLAMP:
+      t->setup.texctl |= TMC_clampu_enable;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->setup.texctl |= TMC_clampu_enable;
+      is_clamp_to_edge = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+
+   switch( twrap ) {
+   case GL_REPEAT:
+      break;
+   case GL_CLAMP:
+      t->setup.texctl |= TMC_clampv_enable;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->setup.texctl |= TMC_clampv_enable;
+      is_clamp_to_edge = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+   }
+
+   if ( is_clamp ) {
+      t->setup.texctl2 |= TMC_borderen_enable;
+   }
+
+   t->border_fallback = (is_clamp && is_clamp_to_edge);
+}
+
+
+/**
+ * Set the texture magnification and minification modes.
+ * 
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+
+static void
+mgaSetTexFilter( mgaTextureObjectPtr t, GLenum minf, GLenum magf )
+{
+   GLuint val = 0;
+
+   switch (minf) {
+   case GL_NEAREST: val = TF_minfilter_nrst; break;
+   case GL_LINEAR: val = TF_minfilter_bilin; break;
+   case GL_NEAREST_MIPMAP_NEAREST: val = TF_minfilter_mm1s; break;
+   case GL_LINEAR_MIPMAP_NEAREST: val = TF_minfilter_mm4s; break;
+   case GL_NEAREST_MIPMAP_LINEAR: val = TF_minfilter_mm2s; break;
+   case GL_LINEAR_MIPMAP_LINEAR: val = TF_minfilter_mm8s; break;
+   default: val = TF_minfilter_nrst; break;
+   }
+
+   switch (magf) {
+   case GL_NEAREST: val |= TF_magfilter_nrst; break;
+   case GL_LINEAR: val |= TF_magfilter_bilin; break;
+   default: val |= TF_magfilter_nrst; break;
+   }
+
+   /* See OpenGL 1.2 specification */
+   if (magf == GL_LINEAR && (minf == GL_NEAREST_MIPMAP_NEAREST ||
+			     minf == GL_NEAREST_MIPMAP_LINEAR)) {
+      val |= MGA_FIELD( TF_fthres, 0x20 ); /* c = 0.5 */
+   } else {
+      val |= MGA_FIELD( TF_fthres, 0x10 ); /* c = 0 */
+   }
+
+
+   /* Mask off the bits for the fields we are setting.  Remember, the MGA mask
+    * defines have 0s for the bits in the named fields.  This is the opposite
+    * of most of the other drivers.
+    */
+
+   t->setup.texfilter &= (TF_minfilter_MASK &
+			  TF_magfilter_MASK &
+			  TF_fthres_MASK);
+   t->setup.texfilter |= val;
+}
+
+static void mgaSetTexBorderColor(mgaTextureObjectPtr t, const GLfloat color[4])
+{
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   t->setup.texbordercol = PACK_COLOR_8888(c[3], c[0], c[1], c[2] );
+}
+
+
+static gl_format
+mgaChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+		        GLenum format, GLenum type )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   const GLboolean do32bpt =
+       ( mmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
+   const GLboolean force16bpt =
+       ( mmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
+   (void) format;
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      switch ( type ) {
+      case GL_UNSIGNED_INT_10_10_10_2:
+      case GL_UNSIGNED_INT_2_10_10_10_REV:
+	 return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB1555;
+      case GL_UNSIGNED_SHORT_4_4_4_4:
+      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+	 return MESA_FORMAT_ARGB4444;
+      case GL_UNSIGNED_SHORT_5_5_5_1:
+      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+	 return MESA_FORMAT_ARGB1555;
+      default:
+         return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+      }
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      switch ( type ) {
+      case GL_UNSIGNED_SHORT_4_4_4_4:
+      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+	 return MESA_FORMAT_ARGB4444;
+      case GL_UNSIGNED_SHORT_5_5_5_1:
+      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+	 return MESA_FORMAT_ARGB1555;
+      case GL_UNSIGNED_SHORT_5_6_5:
+      case GL_UNSIGNED_SHORT_5_6_5_REV:
+	 return MESA_FORMAT_RGB565;
+      default:
+         return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+      }
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return !force16bpt ?
+	  MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return MESA_FORMAT_ARGB4444;
+
+   case GL_RGB5_A1:
+      return MESA_FORMAT_ARGB1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return !force16bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return MESA_FORMAT_RGB565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      /* FIXME: This will report incorrect component sizes... */
+      return MGA_IS_G400(mmesa) ? MESA_FORMAT_AL88 : MESA_FORMAT_ARGB4444;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      /* FIXME: This will report incorrect component sizes... */
+      return MGA_IS_G400(mmesa) ? MESA_FORMAT_AL88 : MESA_FORMAT_RGB565;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      /* FIXME: This will report incorrect component sizes... */
+      return MGA_IS_G400(mmesa) ? MESA_FORMAT_AL88 : MESA_FORMAT_ARGB4444;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      /* FIXME: This will report incorrect component sizes... */
+      return MGA_IS_G400(mmesa) ? MESA_FORMAT_I8 : MESA_FORMAT_ARGB4444;
+
+   case GL_YCBCR_MESA:
+      if (MGA_IS_G400(mmesa) &&
+          (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+           type == GL_UNSIGNED_BYTE))
+         return MESA_FORMAT_YCBCR;
+      else
+         return MESA_FORMAT_YCBCR_REV;
+
+   case GL_COLOR_INDEX:
+   case GL_COLOR_INDEX1_EXT:
+   case GL_COLOR_INDEX2_EXT:
+   case GL_COLOR_INDEX4_EXT:
+   case GL_COLOR_INDEX8_EXT:
+   case GL_COLOR_INDEX12_EXT:
+   case GL_COLOR_INDEX16_EXT:
+      return MESA_FORMAT_CI8;
+
+   default:
+      _mesa_problem( ctx, "unexpected texture format in %s", __FUNCTION__ );
+      return MESA_FORMAT_NONE;
+   }
+
+   return MESA_FORMAT_NONE; /* never get here */
+}
+
+
+
+
+/**
+ * Allocate space for and load the mesa images into the texture memory block.
+ * This will happen before drawing with a new texture, or drawing with a
+ * texture after it was swapped out or teximaged again.
+ */
+
+static mgaTextureObjectPtr
+mgaAllocTexObj( struct gl_texture_object *tObj )
+{
+   mgaTextureObjectPtr t;
+
+
+   t = CALLOC( sizeof( *t ) );
+   tObj->DriverData = t;
+   if ( t != NULL ) {
+      /* Initialize non-image-dependent parts of the state:
+       */
+      t->base.tObj = tObj;
+
+      t->setup.texctl = TMC_takey_1 | TMC_tamask_0;
+      t->setup.texctl2 = TMC_ckstransdis_enable;
+      t->setup.texfilter = TF_filteralpha_enable | TF_uvoffset_OGL;
+
+      t->border_fallback = GL_FALSE;
+      t->texenv_fallback = GL_FALSE;
+
+      make_empty_list( & t->base );
+
+      mgaSetTexWrapping( t, tObj->WrapS, tObj->WrapT );
+      mgaSetTexFilter( t, tObj->MinFilter, tObj->MagFilter );
+      mgaSetTexBorderColor( t, tObj->BorderColor.f );
+   }
+
+   return( t );
+}
+
+
+static void mgaTexEnv( GLcontext *ctx, GLenum target,
+			 GLenum pname, const GLfloat *param )
+{
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+
+   switch( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+      mmesa->envcolor[unit] = PACK_COLOR_8888( c[3], c[0], c[1], c[2] );
+      break;
+   }
+   }
+}
+
+
+static void mgaTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint height, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t != NULL ) {
+      driSwapOutTextureObject( t );
+   } 
+   else {
+      t = (driTextureObject *) mgaAllocTexObj( texObj );
+      if ( t == NULL ) {
+	 _mesa_error( ctx, GL_OUT_OF_MEMORY, "glTexImage2D" );
+	 return;
+      }
+   }
+
+   _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			   width, height, border, format, type,
+			   pixels, packing, texObj, texImage );
+   level -= t->firstLevel;
+   if (level >= 0)
+      t->dirty_images[0] |= (1UL << level);
+}
+
+static void mgaTexSubImage2D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset, GLint yoffset,
+			       GLsizei width, GLsizei height,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t != NULL ) {
+      driSwapOutTextureObject( t );
+   } 
+   else {
+      t = (driTextureObject *) mgaAllocTexObj( texObj );
+      if ( t == NULL ) {
+	 _mesa_error( ctx, GL_OUT_OF_MEMORY, "glTexImage2D" );
+	 return;
+      }
+   }
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+   level -= t->firstLevel;
+   if (level >= 0)
+      t->dirty_images[0] |= (1UL << level);
+}
+
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void
+mgaTexParameter( GLcontext *ctx, GLenum target,
+		   struct gl_texture_object *tObj,
+		   GLenum pname, const GLfloat *params )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   mgaTextureObjectPtr t = (mgaTextureObjectPtr) tObj->DriverData;
+
+   /* If we don't have a hardware texture, it will be automatically
+    * created with current state before it is used, so we don't have
+    * to do anything now 
+    */
+   if ( (t == NULL) ||
+        (target != GL_TEXTURE_2D &&
+         target != GL_TEXTURE_RECTANGLE_NV) ) {
+      return;
+   }
+
+   switch (pname) {
+   case GL_TEXTURE_MIN_FILTER:
+      driSwapOutTextureObject( (driTextureObject *) t );
+      /* FALLTHROUGH */
+   case GL_TEXTURE_MAG_FILTER:
+      FLUSH_BATCH(mmesa);
+      mgaSetTexFilter( t, tObj->MinFilter, tObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      FLUSH_BATCH(mmesa);
+      mgaSetTexWrapping(t,tObj->WrapS,tObj->WrapT);
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      FLUSH_BATCH(mmesa);
+      mgaSetTexBorderColor(t, tObj->BorderColor.f);
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      /* This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.
+       */
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+static void
+mgaBindTexture( GLcontext *ctx, GLenum target,
+		  struct gl_texture_object *tObj )
+{
+   assert( (target != GL_TEXTURE_2D && target != GL_TEXTURE_RECTANGLE_NV) ||
+           (tObj->DriverData != NULL) );
+}
+
+
+static void
+mgaDeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   driTextureObject * t = (driTextureObject *) tObj->DriverData;
+
+   if ( t ) {
+      if ( mmesa ) {
+	 FLUSH_BATCH( mmesa );
+      }
+
+      driDestroyTextureObject( t );
+   }
+
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, tObj);
+}
+
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+mgaNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj;
+   obj = _mesa_new_texture_object(ctx, name, target);
+   mgaAllocTexObj( obj );
+   return obj;
+}
+
+
+void
+mgaInitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->ChooseTextureFormat	= mgaChooseTextureFormat;
+   functions->TexImage2D		= mgaTexImage2D;
+   functions->TexSubImage2D		= mgaTexSubImage2D;
+   functions->BindTexture		= mgaBindTexture;
+   functions->NewTextureObject		= mgaNewTextureObject;
+   functions->DeleteTexture		= mgaDeleteTexture;
+   functions->IsTextureResident		= driIsTextureResident;
+   functions->TexEnv			= mgaTexEnv;
+   functions->TexParameter		= mgaTexParameter;
+}
diff --git a/src/mesa/drivers/dri/mga/mgatex.h b/src/mesa/drivers/dri/mga/mgatex.h
new file mode 100644
index 0000000000..789034964a
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgatex.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef MGATEX_INC
+#define MGATEX_INC
+
+#include "mgacontext.h"
+
+typedef struct mga_texture_object_s *mgaTextureObjectPtr;
+
+
+/* Called before a primitive is rendered to make sure the texture
+ * state is properly setup.  Texture residence is checked later
+ * when we grab the lock.
+ */
+void mgaUpdateTextureState( GLcontext *ctx );
+
+int mgaUploadTexImages( mgaContextPtr mmesa, mgaTextureObjectPtr t );
+
+void mgaDestroyTexObj( mgaContextPtr mmesa, mgaTextureObjectPtr t );
+
+void mgaInitTextureFuncs( struct dd_function_table *functions );
+
+GLboolean mgaUpdateTextureEnvCombine( GLcontext *ctx, int unit );
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgatexmem.c b/src/mesa/drivers/dri/mga/mgatexmem.c
new file mode 100644
index 0000000000..58afbbb279
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgatexmem.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+
+#include "main/mm.h"
+#include "mgacontext.h"
+#include "mgatex.h"
+#include "mgaregs.h"
+#include "mgaioctl.h"
+#include "mga_xmesa.h"
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+
+/**
+ * Destroy any device-dependent state associated with the texture.  This may
+ * include NULLing out hardware state that points to the texture.
+ */
+void
+mgaDestroyTexObj( mgaContextPtr mmesa, mgaTextureObjectPtr t )
+{
+    unsigned   i;
+
+
+    /* See if it was the driver's current object.
+     */
+
+    if ( mmesa != NULL )
+    { 
+	if ( t->age > mmesa->dirtyAge )
+	    mmesa->dirtyAge = t->age;
+
+	for ( i = 0 ; i < mmesa->glCtx->Const.MaxTextureUnits ; i++ )
+	{
+	    if ( t == mmesa->CurrentTexObj[ i ] ) {
+		mmesa->CurrentTexObj[ i ] = NULL;
+	    }
+	}
+    }
+}
+
+
+/**
+ * Upload a texture image from system memory to either on-card or AGP
+ * memory.  Uploads to on-card memory are performed using an ILOAD operation.
+ * This is used for both initial loading of the entire image, and texSubImage
+ * updates.
+ *
+ * Performed with the hardware lock held.
+ * 
+ * Even though this function is named "upload subimage," the entire image
+ * is uploaded.
+ * 
+ * \param mmesa  Driver context.
+ * \param t      Texture to be uploaded.
+ * \param hwlevel  Mipmap level of the texture to be uploaded.
+ * 
+ * \bug As mentioned above, this fuction actually copies the entier mipmap
+ *      level.  There should be a version of this function that performs
+ *      sub-rectangle uploads.  This will perform quite a bit better if only
+ *      a small portion of a larger texture has been updated.  Care would
+ *      need to be take with such an implementation once glCopyTexImage has
+ *      been hardware accelerated.
+ */
+static void mgaUploadSubImage( mgaContextPtr mmesa,
+			       mgaTextureObjectPtr t, GLint hwlevel )
+{
+   struct gl_texture_image * texImage;
+   unsigned     offset;
+   unsigned     texelBytes;
+   unsigned     length;
+   const int level = hwlevel + t->base.firstLevel;
+
+
+   if ( (hwlevel < 0) 
+	|| (hwlevel >= (MGA_IS_G200(mmesa) 
+		      ? G200_TEX_MAXLEVELS : G400_TEX_MAXLEVELS)) ) {
+      fprintf( stderr, "[%s:%d] level = %d\n", __FILE__, __LINE__, level );
+      return;
+   }
+
+   texImage = t->base.tObj->Image[0][level];
+   if ( texImage == NULL ) {
+      fprintf( stderr, "[%s:%d] Image[%d] = NULL\n", __FILE__, __LINE__,
+	       level );
+      return;
+   }
+
+
+   if (texImage->Data == NULL) {
+      fprintf(stderr, "null texture image data tObj %p level %d\n",
+	      (void *) t->base.tObj, level);
+      return;
+   }
+
+
+   /* find the proper destination offset for this level */
+   if ( MGA_IS_G200(mmesa) ) {
+      offset = (t->base.memBlock->ofs + t->offsets[hwlevel]);
+   }
+   else {
+      unsigned  i;
+
+      offset = t->base.memBlock->ofs;
+      for ( i = 0 ; i < hwlevel ; i++ ) {
+	 offset += (t->offsets[1] >> (i * 2));
+      }
+   }
+
+
+   /* Copy the texture from system memory to a memory space that can be
+    * directly used by the hardware for texturing.
+    */
+
+   texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+   length = texImage->Width * texImage->Height * texelBytes;
+   if ( t->base.heap->heapId == MGA_CARD_HEAP ) {
+      unsigned  tex_offset = 0;
+      unsigned  to_copy;
+
+
+      /* We may not be able to upload the entire texture in one batch due to
+       * register limits or dma buffer limits.  Split the copy up into maximum
+       * sized chunks.
+       */
+
+      offset += mmesa->mgaScreen->textureOffset[ t->base.heap->heapId ];
+      while ( length != 0 ) {
+	 mgaGetILoadBufferLocked( mmesa );
+
+	 /* The kernel ILOAD ioctl requires that the lenght be an even multiple
+	  * of MGA_ILOAD_ALIGN.
+	  */
+	 length = ((length) + MGA_ILOAD_MASK) & ~MGA_ILOAD_MASK;
+
+	 to_copy = MIN2( length, MGA_BUFFER_SIZE );
+	 (void) memcpy( mmesa->iload_buffer->address,
+			(GLubyte *) texImage->Data + tex_offset, to_copy );
+
+	 if ( MGA_DEBUG & DEBUG_VERBOSE_TEXTURE )
+	     fprintf(stderr, "[%s:%d] address/size = 0x%08lx/%d\n",
+		     __FILE__, __LINE__,
+		     (long) (offset + tex_offset),
+		     to_copy );
+
+	 mgaFireILoadLocked( mmesa, offset + tex_offset, to_copy );
+	 tex_offset += to_copy;
+	 length -= to_copy;
+      }
+   } else {
+      /* FIXME: the sync for direct copy reduces speed.. */
+      /* This works, is slower for uploads to card space and needs
+       * additional synchronization with the dma stream.
+       */
+       
+      UPDATE_LOCK(mmesa, DRM_LOCK_FLUSH | DRM_LOCK_QUIESCENT);
+
+      memcpy( mmesa->mgaScreen->texVirtual[t->base.heap->heapId] + offset,
+	      texImage->Data, length );
+
+      if ( MGA_DEBUG & DEBUG_VERBOSE_TEXTURE )
+	 fprintf(stderr, "[%s:%d] address/size = 0x%08lx/%d\n",
+		 __FILE__, __LINE__,
+		 (long) (mmesa->mgaScreen->texVirtual[t->base.heap->heapId] 
+			 + offset),
+		 length);
+   }
+}
+
+
+/**
+ * Upload the texture images associated with texture \a t.  This might
+ * require the allocation of texture memory.
+ * 
+ * \param mmesa Context pointer
+ * \param t Texture to be uploaded
+ */
+
+int mgaUploadTexImages( mgaContextPtr mmesa, mgaTextureObjectPtr t )
+{
+   int i;
+   int ofs;
+
+
+   if ( (t == NULL) || (t->base.totalSize == 0) )
+      return 0;
+
+   LOCK_HARDWARE( mmesa );
+
+   if (t->base.memBlock == NULL ) {
+      int heap;
+
+      heap = driAllocateTexture( mmesa->texture_heaps, mmesa->nr_heaps,
+				 (driTextureObject *) t );
+      if ( heap == -1 ) {
+	 UNLOCK_HARDWARE( mmesa );
+	 return -1;
+      }
+
+      assert(t->base.memBlock);
+      ofs = mmesa->mgaScreen->textureOffset[ heap ]
+	   + t->base.memBlock->ofs;
+
+      if ( MGA_IS_G200(mmesa) ) {
+	 t->setup.texorg  = ofs;
+	 t->setup.texorg1 = ofs + t->offsets[1];
+	 t->setup.texorg2 = ofs + t->offsets[2];
+	 t->setup.texorg3 = ofs + t->offsets[3];
+	 t->setup.texorg4 = ofs + t->offsets[4];
+      }
+      else {
+	 t->setup.texorg  = ofs | TO_texorgoffsetsel;
+	 t->setup.texorg1 = t->offsets[1];
+	 t->setup.texorg2 = 0;
+	 t->setup.texorg3 = 0;
+	 t->setup.texorg4 = 0;
+      }
+
+      mmesa->dirty |= MGA_UPLOAD_CONTEXT;
+   }
+
+   /* Let the world know we've used this memory recently.
+    */
+   driUpdateTextureLRU( (driTextureObject *) t );
+
+   if (MGA_DEBUG&DEBUG_VERBOSE_TEXTURE)
+      fprintf(stderr, "[%s:%d] dispatch age: %d age freed memory: %d\n",
+	      __FILE__, __LINE__,
+	      GET_DISPATCH_AGE(mmesa), mmesa->dirtyAge);
+
+   if (mmesa->dirtyAge >= GET_DISPATCH_AGE(mmesa))
+      mgaWaitAgeLocked( mmesa, mmesa->dirtyAge );
+
+   if (t->base.dirty_images[0]) {
+      const int numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+
+      if (MGA_DEBUG&DEBUG_VERBOSE_TEXTURE)
+	 fprintf(stderr, "[%s:%d] dirty_images[0] = 0x%04x\n",
+		 __FILE__, __LINE__, t->base.dirty_images[0] );
+
+      for (i = 0 ; i < numLevels ; i++) {
+	 if ( (t->base.dirty_images[0] & (1U << i)) != 0 ) {
+	    mgaUploadSubImage( mmesa, t, i );
+	 }
+      }
+      t->base.dirty_images[0] = 0;
+   }
+
+
+   UNLOCK_HARDWARE( mmesa );
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/mga/mgatris.c b/src/mesa/drivers/dri/mga/mgatris.c
new file mode 100644
index 0000000000..07cf682f6e
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgatris.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+#include "main/mm.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "mgacontext.h"
+#include "mgaioctl.h"
+#include "mgatris.h"
+#include "mgavb.h"
+
+
+static void mgaRenderPrimitive( GLcontext *ctx, GLenum prim );
+
+/***********************************************************************
+ *                 Functions to draw basic primitives                  *
+ ***********************************************************************/
+
+
+#if defined (USE_X86_ASM)
+#define EMIT_VERT( j, vb, vertex_size, v )		\
+do {	int __tmp;					\
+	__asm__ __volatile__( "rep ; movsl"		\
+			 : "=%c" (j), "=D" (vb), "=S" (__tmp)		\
+			 : "0" (vertex_size), 		\
+			   "D" ((long)vb), 		\
+			   "S" ((long)v));		\
+} while (0)
+#else
+#define EMIT_VERT( j, vb, vertex_size, v )	\
+do {						\
+   for ( j = 0 ; j < vertex_size ; j++ )	\
+      vb[j] = (v)->ui[j];			\
+   vb += vertex_size;				\
+} while (0)
+#endif
+
+static void INLINE mga_draw_triangle( mgaContextPtr mmesa,
+					   mgaVertexPtr v0,
+					   mgaVertexPtr v1,
+					   mgaVertexPtr v2 )
+{
+   GLuint vertex_size = mmesa->vertex_size;
+   GLuint *vb = mgaAllocDmaLow( mmesa, 3 * 4 * vertex_size );
+   int j;
+
+   EMIT_VERT( j, vb, vertex_size, v0 );
+   EMIT_VERT( j, vb, vertex_size, v1 );
+   EMIT_VERT( j, vb, vertex_size, v2 );
+}
+
+
+static void INLINE mga_draw_quad( mgaContextPtr mmesa,
+				       mgaVertexPtr v0,
+				       mgaVertexPtr v1,
+				       mgaVertexPtr v2,
+				       mgaVertexPtr v3 )
+{
+   GLuint vertex_size = mmesa->vertex_size;
+   GLuint *vb = mgaAllocDmaLow( mmesa, 6 * 4 * vertex_size );
+   int j;
+
+   EMIT_VERT( j, vb, vertex_size, v0 );
+   EMIT_VERT( j, vb, vertex_size, v1 );
+   EMIT_VERT( j, vb, vertex_size, v3 );
+   EMIT_VERT( j, vb, vertex_size, v1 );
+   EMIT_VERT( j, vb, vertex_size, v2 );
+   EMIT_VERT( j, vb, vertex_size, v3 );
+}
+
+
+static INLINE void mga_draw_point( mgaContextPtr mmesa,
+					mgaVertexPtr tmp )
+{
+   const GLfloat sz = 0.5 * CLAMP(mmesa->glCtx->Point.Size,
+                                  mmesa->glCtx->Const.MinPointSize,
+                                  mmesa->glCtx->Const.MaxPointSize);
+   const int vertex_size = mmesa->vertex_size;
+   GLuint *vb = mgaAllocDmaLow( mmesa, 6 * 4 * vertex_size );
+   int j;
+   
+#if 0
+   v0->v.x += PNT_X_OFFSET - TRI_X_OFFSET;
+   v0->v.y += PNT_Y_OFFSET - TRI_Y_OFFSET;
+#endif
+
+   /* Draw a point as two triangles.
+    */
+   *(float *)&vb[0] = tmp->v.x - sz;
+   *(float *)&vb[1] = tmp->v.y - sz;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = tmp->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = tmp->v.x + sz;
+   *(float *)&vb[1] = tmp->v.y - sz;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = tmp->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = tmp->v.x + sz;
+   *(float *)&vb[1] = tmp->v.y + sz;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = tmp->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = tmp->v.x + sz;
+   *(float *)&vb[1] = tmp->v.y + sz;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = tmp->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = tmp->v.x - sz;
+   *(float *)&vb[1] = tmp->v.y + sz;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = tmp->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = tmp->v.x - sz;
+   *(float *)&vb[1] = tmp->v.y - sz;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = tmp->ui[j];
+
+#if 0
+   v0->v.x -= PNT_X_OFFSET - TRI_X_OFFSET;
+   v0->v.y -= PNT_Y_OFFSET - TRI_Y_OFFSET;
+#endif
+}
+
+
+static INLINE void mga_draw_line( mgaContextPtr mmesa,
+				      mgaVertexPtr v0,
+				      mgaVertexPtr v1 )
+{
+   GLuint vertex_size = mmesa->vertex_size;
+   GLuint *vb = mgaAllocDmaLow( mmesa, 6 * 4 * vertex_size );
+   GLfloat dx, dy, ix, iy;
+   const GLfloat width = CLAMP(mmesa->glCtx->Line.Width,
+                               mmesa->glCtx->Const.MinLineWidth,
+                               mmesa->glCtx->Const.MaxLineWidth);
+   GLint j;
+
+#if 0
+   v0->v.x += LINE_X_OFFSET - TRI_X_OFFSET;
+   v0->v.y += LINE_Y_OFFSET - TRI_Y_OFFSET;
+   v1->v.x += LINE_X_OFFSET - TRI_X_OFFSET;
+   v1->v.y += LINE_Y_OFFSET - TRI_Y_OFFSET;
+#endif
+
+   dx = v0->v.x - v1->v.x;
+   dy = v0->v.y - v1->v.y;
+   
+   ix = width * .5; iy = 0;
+   if (dx * dx > dy * dy) {
+      iy = ix; ix = 0;
+   }
+
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = v0->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = v1->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = v0->v.x + ix;
+   *(float *)&vb[1] = v0->v.y + iy;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = v0->ui[j];
+   vb += vertex_size;
+	 
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = v0->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = v1->v.x - ix;
+   *(float *)&vb[1] = v1->v.y - iy;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = v1->ui[j];
+   vb += vertex_size;
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   for (j = 2 ; j < vertex_size ; j++) 
+      vb[j] = v1->ui[j];
+   vb += vertex_size;
+
+#if 0
+   v0->v.x -= LINE_X_OFFSET - TRI_X_OFFSET;
+   v0->v.y -= LINE_Y_OFFSET - TRI_Y_OFFSET;
+   v1->v.x -= LINE_X_OFFSET - TRI_X_OFFSET;
+   v1->v.y -= LINE_Y_OFFSET - TRI_Y_OFFSET;
+#endif
+}
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_tri( mmesa, a, b, c );	\
+   else						\
+      mga_draw_triangle( mmesa, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do {						\
+   if (DO_FALLBACK) {				\
+      mmesa->draw_tri( mmesa, a, b, d );	\
+      mmesa->draw_tri( mmesa, b, c, d );	\
+   } else {					\
+      mga_draw_quad( mmesa, a, b, c, d );	\
+   }						\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_line( mmesa, v0, v1 );	\
+   else {					\
+      mga_draw_line( mmesa, v0, v1 );		\
+   }						\
+} while (0)
+
+#define POINT( v0 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      mmesa->draw_point( mmesa, v0 );		\
+   else {					\
+      mga_draw_point( mmesa, v0 );		\
+   }						\
+} while (0)
+
+
+/***********************************************************************
+ *              Fallback to swrast for basic primitives                *
+ ***********************************************************************/
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.  
+ */
+
+static void 
+mga_fallback_tri( mgaContextPtr mmesa, 
+		   mgaVertex *v0, 
+		   mgaVertex *v1, 
+		   mgaVertex *v2 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[3];
+   mga_translate_vertex( ctx, v0, &v[0] );
+   mga_translate_vertex( ctx, v1, &v[1] );
+   mga_translate_vertex( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void 
+mga_fallback_line( mgaContextPtr mmesa,
+		    mgaVertex *v0,
+		    mgaVertex *v1 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[2];
+   mga_translate_vertex( ctx, v0, &v[0] );
+   mga_translate_vertex( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void 
+mga_fallback_point( mgaContextPtr mmesa, 
+		     mgaVertex *v0 )
+{
+   GLcontext *ctx = mmesa->glCtx;
+   SWvertex v[1];
+   mga_translate_vertex( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+
+#define MGA_UNFILLED_BIT    0x1
+#define MGA_OFFSET_BIT	    0x2
+#define MGA_TWOSIDE_BIT     0x4
+#define MGA_FLAT_BIT        0x8	/* mga can't flatshade? */
+#define MGA_FALLBACK_BIT    0x10
+#define MGA_MAX_TRIFUNC     0x20
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[MGA_MAX_TRIFUNC];
+
+#define DO_FALLBACK (IND & MGA_FALLBACK_BIT)
+#define DO_OFFSET   (IND & MGA_OFFSET_BIT)
+#define DO_UNFILLED (IND & MGA_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & MGA_TWOSIDE_BIT)
+#define DO_FLAT     (IND & MGA_FLAT_BIT)
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_BACK_COLORS  0
+#define HAVE_SPEC         1
+#define HAVE_HW_FLATSHADE 0
+#define VERTEX mgaVertex
+#define TAB rast_tab
+
+
+#define DEPTH_SCALE mmesa->depth_scale
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (mmesa->verts + (e * mmesa->vertex_size * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )  					\
+do {								\
+   mga_color_t *color = (mga_color_t *)&((v)->ui[4]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[4] = v1->ui[4]
+
+#define VERT_SET_SPEC( v0, c )					\
+do {								\
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]);	\
+} while (0)
+
+#define VERT_COPY_SPEC( v0, v1 )		\
+do {						\
+   v0->v.specular.red   = v1->v.specular.red;	\
+   v0->v.specular.green = v1->v.specular.green;	\
+   v0->v.specular.blue  = v1->v.specular.blue;	\
+} while (0)
+
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[4]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[4] = color[idx]
+#define VERT_SAVE_SPEC( idx )    spec[idx] = v[idx]->ui[5]
+#define VERT_RESTORE_SPEC( idx ) v[idx]->ui[5] = spec[idx]
+
+#define LOCAL_VARS(n)					\
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);		\
+   GLuint color[n] = { 0 };				\
+   GLuint spec[n] = { 0 };				\
+   (void) color; (void) spec;
+
+
+
+/***********************************************************************
+ *            Functions to draw basic unfilled primitives              *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (mmesa->raster_primitive != x) \
+                        mgaRasterPrimitive( ctx, x, MGA_WA_TRIANGLES )
+#define RENDER_PRIMITIVE mmesa->render_primitive
+#define IND MGA_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+/***********************************************************************
+ *                 Functions to draw GL primitives                     *
+ ***********************************************************************/
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT|MGA_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT|MGA_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT|MGA_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT|MGA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_UNFILLED_BIT|MGA_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT|MGA_UNFILLED_BIT|MGA_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_UNFILLED_BIT|MGA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT|MGA_UNFILLED_BIT| \
+	     MGA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+/* Mga doesn't support provoking-vertex flat-shading?
+ */
+#define IND (MGA_FLAT_BIT)
+#define TAG(x) x##_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_offset_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_UNFILLED_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT|MGA_UNFILLED_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_offset_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_UNFILLED_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT|MGA_UNFILLED_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT|MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_offset_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT|MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_UNFILLED_BIT|MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_OFFSET_BIT|MGA_UNFILLED_BIT|MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_offset_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_UNFILLED_BIT|MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (MGA_TWOSIDE_BIT|MGA_OFFSET_BIT|MGA_UNFILLED_BIT| \
+	     MGA_FALLBACK_BIT|MGA_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+
+   init_flat();
+   init_offset_flat();
+   init_twoside_flat();
+   init_twoside_offset_flat();
+   init_unfilled_flat();
+   init_offset_unfilled_flat();
+   init_twoside_unfilled_flat();
+   init_twoside_offset_unfilled_flat();
+   init_fallback_flat();
+   init_offset_fallback_flat();
+   init_twoside_fallback_flat();
+   init_twoside_offset_fallback_flat();
+   init_unfilled_fallback_flat();
+   init_offset_unfilled_fallback_flat();
+   init_twoside_unfilled_fallback_flat();
+   init_twoside_offset_unfilled_fallback_flat();
+}
+
+/**********************************************************************/
+/*                 Render whole begin/end objects                     */
+/**********************************************************************/
+
+
+#define VERT(x) (mgaVertex *)(vertptr + ((x)*vertex_size*sizeof(int)))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      mga_draw_point( mmesa, VERT(ELT(start)) );
+#define RENDER_LINE( v0, v1 ) \
+   mga_draw_line( mmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   mga_draw_triangle( mmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   mga_draw_quad( mmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) mgaRenderPrimitive( ctx, x )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    mgaContextPtr mmesa = MGA_CONTEXT(ctx);			\
+    GLubyte *vertptr = (GLubyte *)mmesa->verts;			\
+    const GLuint vertex_size = mmesa->vertex_size;       	\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE 
+#define RESET_OCCLUSION 
+#define PRESERVE_VB_DEFS
+#define ELT(x) x
+#define TAG(x) mga_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) mga_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*                   Render clipped primitives                        */
+/**********************************************************************/
+
+
+
+static void mgaRenderClippedPoly( GLcontext *ctx, const GLuint *elts, GLuint n )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint prim = mmesa->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon. 
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+}
+
+static void mgaRenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+
+static void mgaFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, 
+				       GLuint n )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   GLuint vertex_size = mmesa->vertex_size;
+   GLuint *vb = mgaAllocDmaLow( mmesa, (n-2) * 3 * 4 * vertex_size );
+   GLubyte *vertptr = (GLubyte *)mmesa->verts;			
+   const GLuint *start = (const GLuint *)VERT(elts[0]);
+   int i,j;
+
+   for (i = 2 ; i < n ; i++) {
+      EMIT_VERT( j, vb, vertex_size, (mgaVertexPtr) VERT(elts[i-1]) );
+      EMIT_VERT( j, vb, vertex_size, (mgaVertexPtr) VERT(elts[i]) );
+      EMIT_VERT( j, vb, vertex_size, (mgaVertexPtr) start );
+   }
+}
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_SMOOTH | DD_LINE_STIPPLE)
+#define TRI_FALLBACK (DD_TRI_SMOOTH | DD_TRI_UNFILLED)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_FLATSHADE|DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET| \
+                          DD_TRI_UNFILLED)
+
+void mgaChooseRenderState(GLcontext *ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (flags & (ANY_FALLBACK_FLAGS|ANY_RASTER_FLAGS|DD_TRI_STIPPLE)) {
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE)    index |= MGA_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)	      index |= MGA_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)	      index |= MGA_UNFILLED_BIT;
+	 if (flags & DD_FLATSHADE)	      index |= MGA_FLAT_BIT;
+      }
+
+      mmesa->draw_point = mga_draw_point;
+      mmesa->draw_line = mga_draw_line;
+      mmesa->draw_tri = mga_draw_triangle;
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & ANY_FALLBACK_FLAGS)
+      {
+	 if (flags & POINT_FALLBACK) 
+	    mmesa->draw_point = mga_fallback_point;
+	 
+	 if (flags & LINE_FALLBACK) 
+	    mmesa->draw_line = mga_fallback_line;
+	 
+	 if (flags & TRI_FALLBACK) 
+	    mmesa->draw_tri = mga_fallback_tri;
+	 
+	 index |= MGA_FALLBACK_BIT;
+      }
+
+      if ((flags & DD_TRI_STIPPLE) && !mmesa->haveHwStipple) {
+	 mmesa->draw_tri = mga_fallback_tri;
+	 index |= MGA_FALLBACK_BIT;
+      }
+   }
+
+   if (mmesa->RenderIndex != index) {
+      mmesa->RenderIndex = index;
+
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+         
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = mga_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = mga_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+	 tnl->Driver.Render.ClippedPolygon = mgaFastRenderClippedPoly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = mgaRenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = mgaRenderClippedPoly;
+      }
+   }
+}
+
+/**********************************************************************/
+/*                Runtime render state and callbacks                  */
+/**********************************************************************/
+
+
+static GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+
+/* Always called between RenderStart and RenderFinish --> We already
+ * hold the lock.
+ */
+void mgaRasterPrimitive( GLcontext *ctx, GLenum prim, GLuint hwprim )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+
+   FLUSH_BATCH( mmesa );
+
+   /* Update culling */
+   if (mmesa->raster_primitive != prim)
+      mmesa->dirty |= MGA_UPLOAD_CONTEXT;
+
+   mmesa->raster_primitive = prim;
+/*     mmesa->hw_primitive = hwprim; */
+   mmesa->hw_primitive = MGA_WA_TRIANGLES; /* disable mgarender.c for now */
+
+   if (ctx->Polygon.StippleFlag && mmesa->haveHwStipple)
+   {
+      mmesa->dirty |= MGA_UPLOAD_CONTEXT;
+      mmesa->setup.dwgctl &= ~(0xf<<20);
+      if (mmesa->raster_primitive == GL_TRIANGLES)
+	 mmesa->setup.dwgctl |= mmesa->poly_stipple;
+   }
+}
+
+
+
+/* Determine the rasterized primitive when not drawing unfilled 
+ * polygons.
+ *
+ * Used only for the default render stage which always decomposes
+ * primitives to trianges/lines/points.  For the accelerated stage,
+ * which renders strips as strips, the equivalent calculations are
+ * performed in mgarender.c.
+ */
+static void mgaRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint rprim = reduced_prim[prim];
+
+   mmesa->render_primitive = prim;
+
+   if (rprim == GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+       
+   if (mmesa->raster_primitive != rprim) {
+      mgaRasterPrimitive( ctx, rprim, MGA_WA_TRIANGLES );
+   }
+}
+
+static void mgaRenderFinish( GLcontext *ctx )
+{
+   if (MGA_CONTEXT(ctx)->RenderIndex & MGA_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+
+/**********************************************************************/
+/*               Manage total rasterization fallbacks                 */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "read buffer",
+   "glBlendFunc(GL_SRC_ALPHA_SATURATE, GL_ZERO)",
+   "glRenderMode(selection or feedback)",
+   "No hardware stencil",
+   "glDepthFunc( GL_NEVER )",
+   "Mixing GL_CLAMP_TO_EDGE and GL_CLAMP",
+   "rasterization fallback option"
+};
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void mgaFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint oldfallback = mmesa->Fallback;
+
+   if (mode) {
+      mmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 FLUSH_BATCH(mmesa);
+	 _swsetup_Wakeup( ctx );
+	 mmesa->RenderIndex = ~0;
+         if (MGA_DEBUG & DEBUG_VERBOSE_FALLBACK) {
+            fprintf(stderr, "MGA begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      mmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = mgaCheckTexSizes;
+	 tnl->Driver.Render.PrimitiveNotify = mgaRenderPrimitive;
+	 tnl->Driver.Render.Finish = mgaRenderFinish;
+	 tnl->Driver.Render.BuildVertices = mgaBuildVertices;
+	 mmesa->NewGLState |= (_MGA_NEW_RENDERSTATE |
+			       _MGA_NEW_RASTERSETUP);
+         if (MGA_DEBUG & DEBUG_VERBOSE_FALLBACK) {
+            fprintf(stderr, "MGA end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+void mgaDDInitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   static int firsttime = 1;
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   mmesa->RenderIndex = ~0;
+	
+   tnl->Driver.Render.Start              = mgaCheckTexSizes;
+   tnl->Driver.Render.Finish             = mgaRenderFinish; 
+   tnl->Driver.Render.PrimitiveNotify    = mgaRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple   = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices      = mgaBuildVertices;
+   tnl->Driver.Render.Multipass		 = NULL;
+}
diff --git a/src/mesa/drivers/dri/mga/mgatris.h b/src/mesa/drivers/dri/mga/mgatris.h
new file mode 100644
index 0000000000..43612b80a1
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgatris.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef MGATRIS_INC
+#define MGATRIS_INC
+
+#include "main/mtypes.h"
+
+extern void mgaDDInitTriFuncs( GLcontext *ctx );
+extern void mgaChooseRenderState( GLcontext *ctx );
+extern void mgaRasterPrimitive( GLcontext *ctx, GLenum prim, GLuint hwprim );
+
+extern void mgaFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( ctx, bit, mode ) mgaFallback( ctx, bit, mode )
+
+#define _MGA_NEW_RENDERSTATE (_DD_NEW_POINT_SMOOTH |		\
+			      _DD_NEW_LINE_SMOOTH |		\
+			      _DD_NEW_LINE_STIPPLE |		\
+			      _DD_NEW_TRI_SMOOTH |		\
+			      _DD_NEW_FLATSHADE |		\
+			      _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			      _DD_NEW_TRI_OFFSET |		\
+			      _DD_NEW_TRI_UNFILLED |		\
+			      _DD_NEW_TRI_STIPPLE |		\
+			      _NEW_POLYGONSTIPPLE)
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/mgavb.c b/src/mesa/drivers/dri/mga/mgavb.c
new file mode 100644
index 0000000000..71bbf33f23
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgavb.c
@@ -0,0 +1,478 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <stdlib.h>
+#include "mgacontext.h"
+#include "mgavb.h"
+#include "mgatris.h"
+#include "mgaioctl.h"
+#include "mga_xmesa.h"
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "tnl/t_context.h"
+#include "swrast/swrast.h"
+
+
+#define MGA_TEX1_BIT       0x1
+#define MGA_TEX0_BIT       0x2	
+#define MGA_RGBA_BIT       0x4
+#define MGA_SPEC_BIT       0x8
+#define MGA_FOG_BIT	   0x10
+#define MGA_XYZW_BIT       0x20
+#define MGA_PTEX_BIT       0x40
+#define MGA_MAX_SETUP      0x80
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void *, GLuint );
+   tnl_interp_func		interp;
+   tnl_copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_size;
+   GLuint               vertex_format;
+} setup_tab[MGA_MAX_SETUP];
+
+
+#define TINY_VERTEX_FORMAT      0
+#define NOTEX_VERTEX_FORMAT     0
+#define TEX0_VERTEX_FORMAT      (MGA_A|MGA_S|MGA_F)
+#define TEX1_VERTEX_FORMAT      (MGA_A|MGA_S|MGA_F|MGA_T2)
+#define PROJ_TEX1_VERTEX_FORMAT 0
+#define TEX2_VERTEX_FORMAT      0
+#define TEX3_VERTEX_FORMAT      0
+#define PROJ_TEX3_VERTEX_FORMAT 0
+
+#define DO_XYZW (IND & MGA_XYZW_BIT)
+#define DO_RGBA (IND & MGA_RGBA_BIT)
+#define DO_SPEC (IND & MGA_SPEC_BIT)
+#define DO_FOG  (IND & MGA_FOG_BIT)
+#define DO_TEX0 (IND & MGA_TEX0_BIT)
+#define DO_TEX1 (IND & MGA_TEX1_BIT)
+#define DO_TEX2 0
+#define DO_TEX3 0
+#define DO_PTEX (IND & MGA_PTEX_BIT)
+
+			       
+#define VERTEX mgaVertex
+#define VERTEX_COLOR mga_color_t
+#define LOCALVARS mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+#define GET_VIEWPORT_MAT() mmesa->hw_viewport
+#define GET_TEXSOURCE(n)  mmesa->tmu_source[n]
+#define GET_VERTEX_FORMAT() mmesa->vertex_format
+#define GET_VERTEX_STORE() mmesa->verts
+#define GET_VERTEX_SIZE() mmesa->vertex_size * sizeof(GLuint)
+
+#define HAVE_HW_VIEWPORT    0
+#define HAVE_HW_DIVIDE      0
+#define HAVE_RGBA_COLOR     0
+#define HAVE_TINY_VERTICES  0
+#define HAVE_NOTEX_VERTICES 0
+#define HAVE_TEX0_VERTICES  1
+#define HAVE_TEX1_VERTICES  1
+#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX3_VERTICES  0
+#define HAVE_PTEX_VERTICES  0
+
+#define UNVIEWPORT_VARS					\
+   const GLfloat dx = - mmesa->drawX - SUBPIXEL_X;	\
+   const GLfloat dy = (mmesa->driDrawable->h + 		\
+		       mmesa->drawY + SUBPIXEL_Y);	\
+   const GLfloat sz = 1.0 / mmesa->depth_scale
+
+#define UNVIEWPORT_X(x)    x      + dx;
+#define UNVIEWPORT_Y(y)  - y      + dy;
+#define UNVIEWPORT_Z(z)    z * sz;
+
+#define PTEX_FALLBACK() FALLBACK(ctx, MGA_FALLBACK_TEXTURE, 1)
+
+#define INTERP_VERTEX setup_tab[mmesa->SetupIndex].interp
+#define COPY_PV_VERTEX setup_tab[mmesa->SetupIndex].copy_pv
+
+
+/***********************************************************************
+ *         Generate  pv-copying and translation functions              *
+ ***********************************************************************/
+
+#define TAG(x) mga_##x
+#include "tnl_dd/t_dd_vb.c"
+
+/***********************************************************************
+ *             Generate vertex emit and interp functions               *
+ ***********************************************************************/
+
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_SPEC_BIT)
+#define TAG(x) x##_wgs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_TEX0_BIT|MGA_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_wgst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_wgst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT|MGA_PTEX_BIT)
+#define TAG(x) x##_wgspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT)
+#define TAG(x) x##_wgf
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT|MGA_SPEC_BIT)
+#define TAG(x) x##_wgfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_wgft0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_wgft0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT|MGA_TEX0_BIT|MGA_PTEX_BIT)
+#define TAG(x) x##_wgfpt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_wgfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_wgfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_XYZW_BIT|MGA_RGBA_BIT|MGA_FOG_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT|MGA_PTEX_BIT)
+#define TAG(x) x##_wgfspt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_FOG_BIT)
+#define TAG(x) x##_f
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_FOG_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_ft0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_FOG_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_ft0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT)
+#define TAG(x) x##_g
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_SPEC_BIT)
+#define TAG(x) x##_gs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_gst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_gst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_FOG_BIT)
+#define TAG(x) x##_gf
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_FOG_BIT|MGA_SPEC_BIT)
+#define TAG(x) x##_gfs
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_FOG_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_gft0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_FOG_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_gft0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_FOG_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT)
+#define TAG(x) x##_gfst0
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (MGA_RGBA_BIT|MGA_FOG_BIT|MGA_SPEC_BIT|MGA_TEX0_BIT|MGA_TEX1_BIT)
+#define TAG(x) x##_gfst0t1
+#include "tnl_dd/t_dd_vbtmp.h"
+
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wgs();
+   init_wgt0();
+   init_wgt0t1();
+   init_wgpt0();
+   init_wgst0();
+   init_wgst0t1();
+   init_wgspt0();
+   init_wgf();
+   init_wgfs();
+   init_wgft0();
+   init_wgft0t1();
+   init_wgfpt0();
+   init_wgfst0();
+   init_wgfst0t1();
+   init_wgfspt0();
+   init_t0();
+   init_t0t1();
+   init_f();
+   init_ft0();
+   init_ft0t1();
+   init_g();
+   init_gs();
+   init_gt0();
+   init_gt0t1();
+   init_gst0();
+   init_gst0t1();
+   init_gf();
+   init_gfs();
+   init_gft0();
+   init_gft0t1();
+   init_gfst0();
+   init_gfst0t1();
+}
+
+
+
+
+void mgaPrintSetupFlags(char *msg, GLuint flags )
+{
+   fprintf(stderr, "%s: %d %s%s%s%s%s%s\n",
+	   msg,
+	   (int)flags,
+	   (flags & MGA_XYZW_BIT)      ? " xyzw," : "", 
+	   (flags & MGA_RGBA_BIT)     ? " rgba," : "",
+	   (flags & MGA_SPEC_BIT)     ? " spec," : "",
+	   (flags & MGA_FOG_BIT)      ? " fog," : "",
+	   (flags & MGA_TEX0_BIT)     ? " tex-0," : "",
+	   (flags & MGA_TEX1_BIT)     ? " tex-1," : "");
+}
+
+
+void mgaCheckTexSizes( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   /*fprintf(stderr, "%s\n", __FUNCTION__);*/
+
+   if (!setup_tab[mmesa->SetupIndex].check_tex_sizes(ctx)) {
+      mmesa->SetupIndex |= MGA_PTEX_BIT;
+      mmesa->SetupNewInputs = ~0;
+
+      if (!mmesa->Fallback &&
+	  !(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	 tnl->Driver.Render.Interp = setup_tab[mmesa->SetupIndex].interp;
+	 tnl->Driver.Render.CopyPV = setup_tab[mmesa->SetupIndex].copy_pv;
+      }
+      if (mmesa->Fallback) {
+         tnl->Driver.Render.Start(ctx);
+      }
+   }
+}
+
+
+void mgaBuildVertices( GLcontext *ctx, 
+		       GLuint start, 
+		       GLuint count,
+		       GLuint newinputs )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   GLuint stride = mmesa->vertex_size * sizeof(int);
+   GLubyte *v = ((GLubyte *)mmesa->verts + (start * stride));
+
+   newinputs |= mmesa->SetupNewInputs;
+   mmesa->SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   if (newinputs & VERT_BIT_POS) {
+      setup_tab[mmesa->SetupIndex].emit( ctx, start, count, v, stride );   
+   } else {
+      GLuint ind = 0;
+
+      if (newinputs & VERT_BIT_COLOR0)
+	 ind |= MGA_RGBA_BIT;
+      
+      if (newinputs & VERT_BIT_COLOR1)
+	 ind |= MGA_SPEC_BIT;
+
+      if (newinputs & VERT_BIT_TEX0) 
+	 ind |= MGA_TEX0_BIT;
+
+      if (newinputs & VERT_BIT_TEX1)
+	 ind |= MGA_TEX0_BIT|MGA_TEX1_BIT;
+
+      if (newinputs & VERT_BIT_FOG)
+	 ind |= MGA_FOG_BIT;
+
+      if (mmesa->SetupIndex & MGA_PTEX_BIT)
+	 ind = ~0;
+
+      ind &= mmesa->SetupIndex;
+
+      if (ind) {
+	 setup_tab[ind].emit( ctx, start, count, v, stride );   
+      }
+   }
+}
+
+
+void mgaChooseVertexState( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint ind = MGA_XYZW_BIT|MGA_RGBA_BIT;
+
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) 
+      ind |= MGA_SPEC_BIT;
+
+   if (ctx->Fog.Enabled) 
+      ind |= MGA_FOG_BIT;
+   
+   if (ctx->Texture._EnabledUnits & 0x2) {
+      /* unit 1 enabled */
+      if (ctx->Texture._EnabledUnits & 0x1) {
+         /* unit 0 enabled */
+	 ind |= MGA_TEX1_BIT|MGA_TEX0_BIT;
+      }
+      else {
+	 ind |= MGA_TEX0_BIT;
+      }
+   }
+   else if (ctx->Texture._EnabledUnits & 0x1) {
+      /* unit 0 enabled */
+      ind |= MGA_TEX0_BIT;
+   }
+   
+   mmesa->SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = mga_interp_extras;
+      tnl->Driver.Render.CopyPV = mga_copy_pv_extras;
+   } else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+   if (setup_tab[ind].vertex_format != mmesa->vertex_format) {
+      FLUSH_BATCH(mmesa);      
+      mmesa->dirty |= MGA_UPLOAD_PIPE;
+      mmesa->vertex_format = setup_tab[ind].vertex_format;
+      mmesa->vertex_size = setup_tab[ind].vertex_size;
+   }
+}
+
+
+
+void *mga_emit_contiguous_verts( GLcontext *ctx,
+				 GLuint start,
+				 GLuint count,
+				 void *dest)
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint stride = mmesa->vertex_size * 4;
+   setup_tab[mmesa->SetupIndex].emit( ctx, start, count, dest, stride );
+   return (void *)((char *)dest + stride * (count - start));
+}
+				   
+
+
+void mgaInitVB( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+
+   mmesa->verts = (GLubyte *)_mesa_align_malloc(size * sizeof(mgaVertex), 32);
+
+   {
+      static int firsttime = 1;
+      if (firsttime) {
+	 init_setup_tab();
+	 firsttime = 0;
+      }
+   }
+
+   mmesa->dirty |= MGA_UPLOAD_PIPE;
+   mmesa->vertex_format = setup_tab[0].vertex_format;
+   mmesa->vertex_size = setup_tab[0].vertex_size;
+}
+
+
+void mgaFreeVB( GLcontext *ctx )
+{
+   mgaContextPtr mmesa = MGA_CONTEXT(ctx);
+   if (mmesa->verts) {
+      _mesa_align_free(mmesa->verts);
+      mmesa->verts = 0;
+   }
+}
+
diff --git a/src/mesa/drivers/dri/mga/mgavb.h b/src/mesa/drivers/dri/mga/mgavb.h
new file mode 100644
index 0000000000..8d24ab7b5f
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/mgavb.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2000-2001 VA Linux Systems, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef MGAVB_INC
+#define MGAVB_INC
+
+#include "main/mtypes.h"
+#include "mgacontext.h"
+#include "swrast/swrast.h"
+
+#define _MGA_NEW_RASTERSETUP (_NEW_TEXTURE |			\
+			      _DD_NEW_SEPARATE_SPECULAR |	\
+			      _DD_NEW_TRI_UNFILLED |		\
+			      _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			      _NEW_FOG)
+
+
+extern void mgaChooseVertexState( GLcontext *ctx );
+extern void mgaCheckTexSizes( GLcontext *ctx );
+extern void mgaBuildVertices( GLcontext *ctx, 
+			      GLuint start, 
+			      GLuint count,
+			      GLuint newinputs );
+
+extern void mgaPrintSetupFlags(char *msg, GLuint flags );
+
+extern void mgaInitVB( GLcontext *ctx );
+extern void mgaFreeVB( GLcontext *ctx );
+
+extern void *mga_emit_contiguous_verts( GLcontext *ctx,
+					GLuint start,
+					GLuint count,
+					void *dest );
+
+extern void mga_translate_vertex(GLcontext *ctx, 
+				 const mgaVertex *src, 
+				 SWvertex *dst);
+
+extern void mga_print_vertex( GLcontext *ctx, const mgaVertex *v );
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/server/mga.h b/src/mesa/drivers/dri/mga/server/mga.h
new file mode 100644
index 0000000000..d7790e4779
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/server/mga.h
@@ -0,0 +1,114 @@
+/*
+ * MGA Millennium (MGA2064W) functions
+ *
+ * Copyright 1996 The XFree86 Project, Inc.
+ *
+ * Authors
+ *		Dirk Hohndel
+ *			hohndel@XFree86.Org
+ *		David Dawes
+ *			dawes@XFree86.Org
+ */
+
+#ifndef MGA_H
+#define MGA_H
+
+
+#include "xf86drm.h"
+#include "linux/types.h"
+
+
+#define PCI_CHIP_MGA2085		0x0518
+#define PCI_CHIP_MGA2064		0x0519
+#define PCI_CHIP_MGA1064		0x051A
+#define PCI_CHIP_MGA2164		0x051B
+#define PCI_CHIP_MGA2164_AGP		0x051F
+#define PCI_CHIP_MGAG200_PCI		0x0520
+#define PCI_CHIP_MGAG200		0x0521
+#define PCI_CHIP_MGAG400		0x0525
+#define PCI_CHIP_MGAG550		0x2527
+#define PCI_CHIP_MGAG100_PCI		0x1000
+#define PCI_CHIP_MGAG100		0x1001
+
+
+#  define MMIO_IN8(base, offset) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN16(base, offset) \
+	*(volatile unsigned short *)(void *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN32(base, offset) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_OUT8(base, offset, val) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT16(base, offset, val) \
+	*(volatile unsigned short *)(void *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT32(base, offset, val) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = (val)
+
+#define INREG8(addr) MMIO_IN8(pMga->IOBase, addr)
+#define INREG16(addr) MMIO_IN16(pMga->IOBase, addr)
+#define INREG(addr) MMIO_IN32(pMga->IOBase, addr)
+#define OUTREG8(addr, val) MMIO_OUT8(pMga->IOBase, addr, val)
+#define OUTREG16(addr, val) MMIO_OUT16(pMga->IOBase, addr, val)
+#define OUTREG(addr, val) MMIO_OUT32(pMga->IOBase, addr, val)
+
+#define MGAIOMAPSIZE		0x00004000
+
+
+typedef struct {
+  int               Chipset;          /**< \brief Chipset number */
+
+  int               irq;              /**< \brief IRQ number */
+
+
+  int               frontOffset;      /**< \brief Front color buffer offset */
+  int               frontPitch;       /**< \brief Front color buffer pitch */
+  int               backOffset;       /**< \brief Back color buffer offset */
+  int               backPitch;        /**< \brief Back color buffer pitch */
+  int               depthOffset;      /**< \brief Depth buffer offset */
+  int               depthPitch;       /**< \brief Depth buffer pitch */
+  int               textureOffset;    /**< \brief Texture area offset */
+  int               textureSize;      /**< \brief Texture area size */
+  int               logTextureGranularity;
+
+  /**
+   * \name AGP
+   */
+  /*@{*/
+  drmSize           agpSize;          /**< \brief AGP map size */
+  int               agpMode;          /**< \brief AGP mode */
+  /*@}*/
+
+  drmRegion         agp;
+
+  /* PCI mappings */
+  drmRegion         registers;
+  drmRegion         status;
+
+  /* AGP mappings */
+  drmRegion         warp;
+  drmRegion         primary;
+  drmRegion         buffers;
+  drmRegion         agpTextures;
+
+  drmBufMapPtr      drmBuffers;
+
+  unsigned long     IOAddress;
+  unsigned char    *IOBase;
+  int		    HasSDRAM;
+
+  __u32             reg_ien;
+} MGARec, *MGAPtr;
+
+
+
+#define MGA_FRONT	0x1
+#define MGA_BACK	0x2
+#define MGA_DEPTH	0x4
+
+#define MGA_AGP_1X_MODE		0x01
+#define MGA_AGP_2X_MODE		0x02
+#define MGA_AGP_4X_MODE		0x04
+#define MGA_AGP_MODE_MASK	0x07
+
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/server/mga_bios.h b/src/mesa/drivers/dri/mga/server/mga_bios.h
new file mode 100644
index 0000000000..5dcfc1614d
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/server/mga_bios.h
@@ -0,0 +1,141 @@
+#ifndef MGA_BIOS_H
+#define MGA_BIOS_H
+
+
+/*
+ * MGABiosInfo - This struct describes the video BIOS info block.
+ *
+ * DESCRIPTION
+ *   Do not mess with this, unless you know what you are doing.
+ *   The data lengths and types are critical.
+ *
+ * HISTORY
+ *   October 7, 1996 - [aem] Andrew E. Mileski
+ *   This struct was shamelessly stolen from the MGA DDK.
+ *   It has been reformatted, and the data types changed.
+ */
+typedef struct {
+	/* Length of this structure in bytes */
+	__u16 StructLen;
+
+	/*
+	 * Unique number identifying the product type
+	 * 0 : MGA-S1P20 (2MB base with 175MHz Ramdac)
+	 * 1 : MGA-S1P21 (2MB base with 220MHz Ramdac)
+	 * 2 : Reserved
+	 * 3 : Reserved
+	 * 4 : MGA-S1P40 (4MB base with 175MHz Ramdac)
+	 * 5 : MGA-S1P41 (4MB base with 220MHz Ramdac)
+	 */
+	__u16 ProductID;
+
+	/* Serial number of the board */
+	__u8 SerNo[ 10 ];
+
+	/*
+	 * Manufacturing date of the board (at product test)
+	 * Format: yyyy yyym mmmd dddd
+	 */
+	__u16 ManufDate;
+
+	/* Identification of manufacturing site */
+	__u16 ManufId;
+
+	/*
+	 * Number and revision level of the PCB
+	 * Format: nnnn nnnn nnnr rrrr
+	 *         n = PCB number ex:576 (from 0->2047)
+	 *         r = PCB revision      (from 0->31)
+	 */
+	__u16 PCBInfo;
+
+	/* Identification of any PMBs */
+	__u16 PMBInfo;
+
+	/*
+	 * Bit  0-7  : Ramdac speed (0=175MHz, 1=220MHz)
+	 * Bit  8-15 : Ramdac type  (0=TVP3026, 1=TVP3027)
+	 */
+	__u16 RamdacType;
+
+	/* Maximum PCLK of the ramdac */
+	__u16 PclkMax;
+
+	/* Maximum LDCLK supported by the WRAM memory */
+	__u16 LclkMax;
+
+	/* Maximum MCLK of base board */
+	__u16 ClkBase;
+
+	/* Maximum MCLK of 4Mb board */
+	__u16 Clk4MB;
+
+	/* Maximum MCLK of 8Mb board */
+	__u16 Clk8MB;
+
+	/* Maximum MCLK of board with multimedia module */
+	__u16 ClkMod;
+
+	/* Diagnostic test pass frequency */
+	__u16 TestClk;
+
+	/* Default VGA mode1 pixel frequency */
+	__u16 VGAFreq1;
+
+	/* Default VGA mode2 pixel frequency */
+	__u16 VGAFreq2;
+
+	/* Date of last BIOS programming/update */
+	__u16 ProgramDate;
+
+	/* Number of times BIOS has been programmed */
+	__u16 ProgramCnt;
+
+	/* Support for up to 32 hardware/software options */
+	__u32 Options;
+
+	/* Support for up to 32 hardware/software features */
+	__u32 FeatFlag;
+
+	/* Definition of VGA mode MCLK */
+	__u16 VGAClk;
+
+	/* Indicate the revision level of this header struct */
+	__u16 StructRev;
+
+	__u16 Reserved[ 3 ];
+} MGABiosInfo;
+
+/* from the PINS structure, refer pins info from MGA */
+typedef struct tagParamMGA {
+	__u16 	PinID;		/* 0 */
+	__u8	StructLen;	/* 2 */
+	__u8	Rsvd1;		/* 3 */
+	__u16	StructRev;	/* 4 */
+	__u16	ProgramDate;	/* 6 */
+	__u16	ProgramCnt;	/* 8 */
+	__u16	ProductID;	/* 10 */
+	__u8	SerNo[16];	/* 12 */
+	__u8	PLInfo[6];	/* 28 */
+	__u16	PCBInfo;	/* 34 */
+	__u32	FeatFlag;	/* 36 */
+	__u8	RamdacType;	/* 40 */
+	__u8	RamdacSpeed;	/* 41 */
+	__u8	PclkMax;	/* 42 */
+	__u8	ClkGE;		/* 43 */
+	__u8   ClkMem;		/* 44 */
+	__u8	Clk4MB;		/* 45 */
+	__u8	Clk8MB;		/* 46 */
+	__u8	ClkMod;		/* 47 */
+	__u8	TestClk;	/* 48 */
+	__u8	VGAFreq1;	/* 49 */
+	__u8	VGAFreq2;	/* 50 */
+	__u8	MCTLWTST;	/* 51 */
+	__u8	VidCtrl;	/* 52 */
+	__u8	Clk12MB;	/* 53 */
+	__u8	Clk16MB;	/* 54 */
+	__u8	Reserved[8];	/* 55-62 */
+	__u8	PinCheck;	/* 63 */
+}	MGABios2Info;
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/server/mga_dri.h b/src/mesa/drivers/dri/mga/server/mga_dri.h
new file mode 100644
index 0000000000..1ce07028f1
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/server/mga_dri.h
@@ -0,0 +1,112 @@
+
+/*
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES
+ * OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __MGA_DRI_H__
+#define __MGA_DRI_H__
+
+#include "xf86drm.h"
+#include "drm.h"
+#include "mga_drm.h"
+
+#define MGA_DEFAULT_AGP_SIZE     64
+#define MGA_DEFAULT_AGP_MODE     4
+#define MGA_MAX_AGP_MODE         4
+
+/* Buffer are aligned on 4096 byte boundaries.
+ */
+#define MGA_BUFFER_ALIGN	0x00000fff
+
+#ifdef __GNUC__
+# define DEPRECATED  __attribute__ ((deprecated))
+#else
+# define DEPRECATED
+#endif
+
+#if 1
+typedef struct _mgaDrmRegion {
+    drm_handle_t     handle;
+    unsigned int  offset;
+    drmSize       size;
+} mgaDrmRegion, *mgaDrmRegionPtr;
+#else
+#define mgaDrmRegion drmRegion
+#endif
+
+typedef struct {
+   int chipset;
+   int width DEPRECATED;
+   int height DEPRECATED;
+   int mem DEPRECATED;
+   int cpp;
+
+   int agpMode;
+
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+   unsigned int textureOffset;
+   unsigned int textureSize;
+   int logTextureGranularity DEPRECATED;
+
+   /* Allow calculation of setup dma addresses.
+    */
+   unsigned int agpBufferOffset DEPRECATED;
+
+   unsigned int agpTextureOffset;
+   unsigned int agpTextureSize;
+   int logAgpTextureGranularity DEPRECATED;
+
+   unsigned int mAccess DEPRECATED;
+
+   /**
+    * \name DRM memory regions.
+    *
+    * \todo
+    * Several of these fields are no longer used (and will never be used
+    * again) on the client-side.  None of them, except \c registers, are used
+    * on the server-side.  At some point when it is safe to do so (probably
+    * for the X.org 6.9 / 7.0 release), these fields should be removed.
+    */
+   /*@{*/
+   mgaDrmRegion registers;            /**< MMIO registers. */
+   mgaDrmRegion status DEPRECATED;    /**< No longer used on the client-side. */
+   mgaDrmRegion primary;              /**< Primary DMA region. */
+   mgaDrmRegion buffers DEPRECATED;   /**< No longer used on the client-side. */
+   /*@}*/
+
+   unsigned int sarea_priv_offset;
+} MGADRIRec, *MGADRIPtr;
+
+#endif
diff --git a/src/mesa/drivers/dri/mga/server/mga_macros.h b/src/mesa/drivers/dri/mga/server/mga_macros.h
new file mode 100644
index 0000000000..189e1415d0
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/server/mga_macros.h
@@ -0,0 +1,117 @@
+
+#ifndef _MGA_MACROS_H_
+#define _MGA_MACROS_H_
+
+#ifndef PSZ
+#define PSZ 8
+#endif
+
+#if PSZ == 8
+#define REPLICATE(r) r &= 0xFF; r |= r << 8; r |= r << 16
+#elif PSZ == 16
+#define REPLICATE(r) r &= 0xFFFF; r |= r << 16
+#elif PSZ == 24
+#define REPLICATE(r) r &= 0xFFFFFF; r |= r << 24
+#else
+#define REPLICATE(r) /* */
+#endif
+
+#define RGBEQUAL(c) (!((((c) >> 8) ^ (c)) & 0xffff))
+
+#ifdef XF86DRI
+#define MGA_SYNC_XTAG                 0x275f4200
+
+#define MGABUSYWAIT() do { \
+OUTREG(MGAREG_DWGSYNC, MGA_SYNC_XTAG); \
+while(INREG(MGAREG_DWGSYNC) != MGA_SYNC_XTAG) ; \
+}while(0);
+
+#endif
+
+#define MGAISBUSY() (INREG8(MGAREG_Status + 2) & 0x01)
+
+#define WAITFIFO(cnt) \
+   if(!pMga->UsePCIRetry) {\
+	register int n = cnt; \
+	if(n > pMga->FifoSize) n = pMga->FifoSize; \
+	while(pMga->fifoCount < (n))\
+	    pMga->fifoCount = INREG8(MGAREG_FIFOSTATUS);\
+	pMga->fifoCount -= n;\
+   }
+
+#define XYADDRESS(x,y) \
+    ((y) * pMga->CurrentLayout.displayWidth + (x) + pMga->YDstOrg)
+
+#define MAKEDMAINDEX(index)  ((((index) >> 2) & 0x7f) | (((index) >> 6) & 0x80))
+
+#define DMAINDICES(one,two,three,four)	\
+	( MAKEDMAINDEX(one) | \
+	 (MAKEDMAINDEX(two) << 8) | \
+	 (MAKEDMAINDEX(three) << 16) | \
+ 	 (MAKEDMAINDEX(four) << 24) )
+
+#if PSZ == 24
+#define SET_PLANEMASK(p) /**/
+#else
+#define SET_PLANEMASK(p) \
+	if(!(pMga->AccelFlags & MGA_NO_PLANEMASK) && ((p) != pMga->PlaneMask)) { \
+	   pMga->PlaneMask = (p); \
+	   REPLICATE((p)); \
+	   OUTREG(MGAREG_PLNWT,(p)); \
+	}
+#endif
+
+#define SET_FOREGROUND(c) \
+	if((c) != pMga->FgColor) { \
+	   pMga->FgColor = (c); \
+	   REPLICATE((c)); \
+	   OUTREG(MGAREG_FCOL,(c)); \
+	}
+
+#define SET_BACKGROUND(c) \
+	if((c) != pMga->BgColor) { \
+	   pMga->BgColor = (c); \
+	   REPLICATE((c)); \
+	   OUTREG(MGAREG_BCOL,(c)); \
+	}
+
+#define DISABLE_CLIP() { \
+	pMga->AccelFlags &= ~CLIPPER_ON; \
+	WAITFIFO(1); \
+	OUTREG(MGAREG_CXBNDRY, 0xFFFF0000); }
+
+#ifdef XF86DRI
+#define CHECK_DMA_QUIESCENT(pMGA, pScrn) {	\
+   if (!pMGA->haveQuiescense) {			\
+      pMGA->GetQuiescence( pScrn );		\
+   }						\
+}
+#else
+#define CHECK_DMA_QUIESCENT(pMGA, pScrn)
+#endif
+
+#ifdef USEMGAHAL
+#define HAL_CHIPSETS ((pMga->Chipset == PCI_CHIP_MGAG200_PCI) || \
+		  (pMga->Chipset == PCI_CHIP_MGAG200) || \
+		  (pMga->Chipset == PCI_CHIP_MGAG400) || \
+		  (pMga->Chipset == PCI_CHIP_MGAG550))
+    
+#define MGA_HAL(x) { \
+	MGAPtr pMga = MGAPTR(pScrn); \
+	if (pMga->HALLoaded && HAL_CHIPSETS) { x; } \
+}
+#define MGA_NOT_HAL(x) { \
+	MGAPtr pMga = MGAPTR(pScrn); \
+	if (!pMga->HALLoaded || !HAL_CHIPSETS) { x; } \
+}
+#else
+#define MGA_NOT_HAL(x) { x; }
+#endif
+
+#define MGAISGx50(x) ( (((x)->Chipset == PCI_CHIP_MGAG400) && ((x)->ChipRev >= 0x80)) || \
+		       ((x)->Chipset == PCI_CHIP_MGAG550) )
+
+#define MGA_DH_NEEDS_HAL(x) (((x)->Chipset == PCI_CHIP_MGAG400) && \
+			     ((x)->ChipRev < 0x80))
+
+#endif /* _MGA_MACROS_H_ */
diff --git a/src/mesa/drivers/dri/mga/server/mga_reg.h b/src/mesa/drivers/dri/mga/server/mga_reg.h
new file mode 100644
index 0000000000..d51366d44e
--- /dev/null
+++ b/src/mesa/drivers/dri/mga/server/mga_reg.h
@@ -0,0 +1,482 @@
+
+
+
+
+
+
+/*
+ * MGA Millennium (MGA2064W) functions
+ * MGA Mystique (MGA1064SG) functions
+ *
+ * Copyright 1996 The XFree86 Project, Inc.
+ *
+ * Authors
+ *		Dirk Hohndel
+ *			hohndel@XFree86.Org
+ *		David Dawes
+ *			dawes@XFree86.Org
+ * Contributors:
+ *		Guy DESBIEF, Aix-en-provence, France
+ *			g.desbief@aix.pacwan.net
+ *		MGA1064SG Mystique register file
+ */
+
+
+#ifndef _MGA_REG_H_
+#define _MGA_REG_H_
+
+#define	MGAREG_DWGCTL		0x1c00
+#define	MGAREG_MACCESS		0x1c04
+#define	MGA_MACCESS_PW16        0x00000001
+#define	MGA_MACCESS_PW32        0x00000002
+/* the following is a mystique only register */
+#define MGAREG_MCTLWTST		0x1c08
+#define	MGAREG_ZORG		0x1c0c
+
+#define	MGAREG_PAT0		0x1c10
+#define	MGAREG_PAT1		0x1c14
+#define	MGAREG_PLNWT		0x1c1c
+
+#define	MGAREG_BCOL		0x1c20
+#define	MGAREG_FCOL		0x1c24
+
+#define	MGAREG_SRC0		0x1c30
+#define	MGAREG_SRC1		0x1c34
+#define	MGAREG_SRC2		0x1c38
+#define	MGAREG_SRC3		0x1c3c
+
+#define	MGAREG_XYSTRT		0x1c40
+#define	MGAREG_XYEND		0x1c44
+
+#define	MGAREG_SHIFT		0x1c50
+/* the following is a mystique only register */
+#define MGAREG_DMAPAD		0x1c54
+#define	MGAREG_SGN		0x1c58
+#define	MGAREG_LEN		0x1c5c
+
+#define	MGAREG_AR0		0x1c60
+#define	MGAREG_AR1		0x1c64
+#define	MGAREG_AR2		0x1c68
+#define	MGAREG_AR3		0x1c6c
+#define	MGAREG_AR4		0x1c70
+#define	MGAREG_AR5		0x1c74
+#define	MGAREG_AR6		0x1c78
+
+#define	MGAREG_CXBNDRY		0x1c80
+#define	MGAREG_FXBNDRY		0x1c84
+#define	MGAREG_YDSTLEN		0x1c88
+#define	MGAREG_PITCH		0x1c8c
+
+#define	MGAREG_YDST		0x1c90
+#define	MGAREG_YDSTORG		0x1c94
+#define	MGAREG_YTOP		0x1c98
+#define	MGAREG_YBOT		0x1c9c
+
+#define	MGAREG_CXLEFT		0x1ca0
+#define	MGAREG_CXRIGHT		0x1ca4
+#define	MGAREG_FXLEFT		0x1ca8
+#define	MGAREG_FXRIGHT		0x1cac
+
+#define	MGAREG_XDST		0x1cb0
+
+#define	MGAREG_DR0		0x1cc0
+#define	MGAREG_DR1		0x1cc4
+#define	MGAREG_DR2		0x1cc8
+#define	MGAREG_DR3		0x1ccc
+
+#define	MGAREG_DR4		0x1cd0
+#define	MGAREG_DR5		0x1cd4
+#define	MGAREG_DR6		0x1cd8
+#define	MGAREG_DR7		0x1cdc
+
+#define	MGAREG_DR8		0x1ce0
+#define	MGAREG_DR9		0x1ce4
+#define	MGAREG_DR10		0x1ce8
+#define	MGAREG_DR11		0x1cec
+
+#define	MGAREG_DR12		0x1cf0
+#define	MGAREG_DR13		0x1cf4
+#define	MGAREG_DR14		0x1cf8
+#define	MGAREG_DR15		0x1cfc
+
+#define MGAREG_SRCORG		0x2cb4
+#define MGAREG_DSTORG		0x2cb8
+
+/* add or or this to one of the previous "power registers" to start
+   the drawing engine */
+
+#define MGAREG_EXEC		0x0100
+
+#define	MGAREG_FIFOSTATUS	0x1e10
+#define	MGAREG_Status		0x1e14
+#define	MGAREG_ICLEAR		0x1e18
+#define	MGAREG_IEN		0x1e1c
+
+#define	MGAREG_VCOUNT		0x1e20
+
+#define	MGAREG_Reset		0x1e40
+
+#define	MGAREG_OPMODE		0x1e54
+
+/* Warp Registers */
+#define MGAREG_WIADDR           0x1dc0
+#define MGAREG_WIADDR2          0x1dd8
+#define MGAREG_WGETMSB          0x1dc8
+#define MGAREG_WVRTXSZ          0x1dcc
+#define MGAREG_WACCEPTSEQ       0x1dd4
+#define MGAREG_WMISC            0x1e70
+
+/* OPMODE register additives */
+
+#define MGAOPM_DMA_GENERAL	(0x00 << 2)
+#define MGAOPM_DMA_BLIT		(0x01 << 2)
+#define MGAOPM_DMA_VECTOR	(0x10 << 2)
+
+/* DWGCTL register additives */
+
+/* Lines */
+
+#define MGADWG_LINE_OPEN	0x00
+#define MGADWG_AUTOLINE_OPEN	0x01
+#define MGADWG_LINE_CLOSE	0x02
+#define MGADWG_AUTOLINE_CLOSE	0x03
+
+/* Trapezoids */
+#define MGADWG_TRAP		0x04
+#define MGADWG_TEXTURE_TRAP	0x05
+
+/* BitBlts */
+
+#define MGADWG_BITBLT		0x08
+#define MGADWG_FBITBLT		0x0c
+#define MGADWG_ILOAD		0x09
+#define MGADWG_ILOAD_SCALE	0x0d
+#define MGADWG_ILOAD_FILTER	0x0f
+#define MGADWG_ILOAD_HIQH	0x07
+#define MGADWG_ILOAD_HIQHV	0x0e
+#define MGADWG_IDUMP		0x0a
+
+/* atype access to WRAM */
+
+#define MGADWG_RPL		( 0x00 << 4 )
+#define MGADWG_RSTR		( 0x01 << 4 )
+#define MGADWG_ZI		( 0x03 << 4 )
+#define MGADWG_BLK 		( 0x04 << 4 )
+#define MGADWG_I		( 0x07 << 4 )
+
+/* specifies whether bit blits are linear or xy */
+#define MGADWG_LINEAR		( 0x01 << 7 )
+
+/* z drawing mode. use MGADWG_NOZCMP for always */
+
+#define MGADWG_NOZCMP		( 0x00 << 8 )
+#define MGADWG_ZE		( 0x02 << 8 )
+#define MGADWG_ZNE		( 0x03 << 8 )
+#define MGADWG_ZLT		( 0x04 << 8 )
+#define MGADWG_ZLTE		( 0x05 << 8 )
+#define MGADWG_GT		( 0x06 << 8 )
+#define MGADWG_GTE		( 0x07 << 8 )
+
+/* use this to force colour expansion circuitry to do its stuff */
+
+#define MGADWG_SOLID		( 0x01 << 11 )
+
+/* ar register at zero */
+
+#define MGADWG_ARZERO		( 0x01 << 12 )
+
+#define MGADWG_SGNZERO		( 0x01 << 13 )
+
+#define MGADWG_SHIFTZERO	( 0x01 << 14 )
+
+/* See table on 4-43 for bop ALU operations */
+
+/* See table on 4-44 for translucidity masks */
+
+#define MGADWG_BMONOLEF		( 0x00 << 25 )
+#define MGADWG_BMONOWF		( 0x04 << 25 )
+#define MGADWG_BPLAN		( 0x01 << 25 )
+
+/* note that if bfcol is specified and you're doing a bitblt, it causes
+   a fbitblt to be performed, so check that you obey the fbitblt rules */
+
+#define MGADWG_BFCOL   		( 0x02 << 25 )
+#define MGADWG_BUYUV		( 0x0e << 25 )
+#define MGADWG_BU32BGR		( 0x03 << 25 )
+#define MGADWG_BU32RGB		( 0x07 << 25 )
+#define MGADWG_BU24BGR		( 0x0b << 25 )
+#define MGADWG_BU24RGB		( 0x0f << 25 )
+
+#define MGADWG_PATTERN		( 0x01 << 29 )
+#define MGADWG_TRANSC		( 0x01 << 30 )
+#define MGAREG_MISC_WRITE	0x3c2
+#define MGAREG_MISC_READ	0x3cc
+#define MGAREG_MISC_IOADSEL	(0x1 << 0)
+#define MGAREG_MISC_RAMMAPEN	(0x1 << 1)
+#define MGAREG_MISC_CLK_SEL_VGA25	(0x0 << 2)
+#define MGAREG_MISC_CLK_SEL_VGA28	(0x1 << 2)
+#define MGAREG_MISC_CLK_SEL_MGA_PIX	(0x2 << 2)
+#define MGAREG_MISC_CLK_SEL_MGA_MSK	(0x3 << 2)
+#define MGAREG_MISC_VIDEO_DIS	(0x1 << 4)
+#define MGAREG_MISC_HIGH_PG_SEL	(0x1 << 5)
+
+/* MMIO VGA registers */
+#define MGAREG_SEQ_INDEX	0x1fc4
+#define MGAREG_SEQ_DATA		0x1fc5
+#define MGAREG_CRTC_INDEX	0x1fd4
+#define MGAREG_CRTC_DATA	0x1fd5
+#define MGAREG_CRTCEXT_INDEX	0x1fde
+#define MGAREG_CRTCEXT_DATA	0x1fdf
+
+
+
+/* MGA bits for registers PCI_OPTION_REG */
+#define MGA1064_OPT_SYS_CLK_PCI   		( 0x00 << 0 )
+#define MGA1064_OPT_SYS_CLK_PLL   		( 0x01 << 0 )
+#define MGA1064_OPT_SYS_CLK_EXT   		( 0x02 << 0 )
+#define MGA1064_OPT_SYS_CLK_MSK   		( 0x03 << 0 )
+
+#define MGA1064_OPT_SYS_CLK_DIS   		( 0x01 << 2 )
+#define MGA1064_OPT_G_CLK_DIV_1   		( 0x01 << 3 )
+#define MGA1064_OPT_M_CLK_DIV_1   		( 0x01 << 4 )
+
+#define MGA1064_OPT_SYS_PLL_PDN   		( 0x01 << 5 )
+#define MGA1064_OPT_VGA_ION   		( 0x01 << 8 )
+
+/* MGA registers in PCI config space */
+#define PCI_MGA_INDEX		0x44
+#define PCI_MGA_DATA		0x48
+#define PCI_MGA_OPTION2		0x50
+#define PCI_MGA_OPTION3		0x54
+
+#define RAMDAC_OFFSET		0x3c00
+
+/* TVP3026 direct registers */
+
+#define TVP3026_INDEX		0x00
+#define TVP3026_WADR_PAL	0x00
+#define TVP3026_COL_PAL		0x01
+#define TVP3026_PIX_RD_MSK	0x02
+#define TVP3026_RADR_PAL	0x03
+#define TVP3026_CUR_COL_ADDR	0x04
+#define TVP3026_CUR_COL_DATA	0x05
+#define TVP3026_DATA		0x0a
+#define TVP3026_CUR_RAM		0x0b
+#define TVP3026_CUR_XLOW	0x0c
+#define TVP3026_CUR_XHI		0x0d
+#define TVP3026_CUR_YLOW	0x0e
+#define TVP3026_CUR_YHI		0x0f
+
+/* TVP3026 indirect registers */
+
+#define TVP3026_SILICON_REV	0x01
+#define TVP3026_CURSOR_CTL	0x06
+#define TVP3026_LATCH_CTL	0x0f
+#define TVP3026_TRUE_COLOR_CTL	0x18
+#define TVP3026_MUX_CTL		0x19
+#define TVP3026_CLK_SEL		0x1a
+#define TVP3026_PAL_PAGE	0x1c
+#define TVP3026_GEN_CTL		0x1d
+#define TVP3026_MISC_CTL	0x1e
+#define TVP3026_GEN_IO_CTL	0x2a
+#define TVP3026_GEN_IO_DATA	0x2b
+#define TVP3026_PLL_ADDR	0x2c
+#define TVP3026_PIX_CLK_DATA	0x2d
+#define TVP3026_MEM_CLK_DATA	0x2e
+#define TVP3026_LOAD_CLK_DATA	0x2f
+#define TVP3026_KEY_RED_LOW	0x32
+#define TVP3026_KEY_RED_HI	0x33
+#define TVP3026_KEY_GREEN_LOW	0x34
+#define TVP3026_KEY_GREEN_HI	0x35
+#define TVP3026_KEY_BLUE_LOW	0x36
+#define TVP3026_KEY_BLUE_HI	0x37
+#define TVP3026_KEY_CTL		0x38
+#define TVP3026_MCLK_CTL	0x39
+#define TVP3026_SENSE_TEST	0x3a
+#define TVP3026_TEST_DATA	0x3b
+#define TVP3026_CRC_LSB		0x3c
+#define TVP3026_CRC_MSB		0x3d
+#define TVP3026_CRC_CTL		0x3e
+#define TVP3026_ID		0x3f
+#define TVP3026_RESET		0xff
+
+
+/* MGA1064 DAC Register file */
+/* MGA1064 direct registers */
+
+#define MGA1064_INDEX		0x00
+#define MGA1064_WADR_PAL	0x00
+#define MGA1064_COL_PAL		0x01
+#define MGA1064_PIX_RD_MSK	0x02
+#define MGA1064_RADR_PAL	0x03
+#define MGA1064_DATA		0x0a
+
+#define MGA1064_CUR_XLOW	0x0c
+#define MGA1064_CUR_XHI		0x0d
+#define MGA1064_CUR_YLOW	0x0e
+#define MGA1064_CUR_YHI		0x0f
+
+/* MGA1064 indirect registers */
+#define MGA1064_DVI_PIPE_CTL    0x03
+#define MGA1064_CURSOR_BASE_ADR_LOW	0x04
+#define MGA1064_CURSOR_BASE_ADR_HI	0x05
+#define MGA1064_CURSOR_CTL	0x06
+#define MGA1064_CURSOR_COL0_RED	0x08
+#define MGA1064_CURSOR_COL0_GREEN	0x09
+#define MGA1064_CURSOR_COL0_BLUE	0x0a
+
+#define MGA1064_CURSOR_COL1_RED	0x0c
+#define MGA1064_CURSOR_COL1_GREEN	0x0d
+#define MGA1064_CURSOR_COL1_BLUE	0x0e
+
+#define MGA1064_CURSOR_COL2_RED	0x010
+#define MGA1064_CURSOR_COL2_GREEN	0x011
+#define MGA1064_CURSOR_COL2_BLUE	0x012
+
+#define MGA1064_VREF_CTL	0x018
+
+#define MGA1064_MUL_CTL		0x19
+#define MGA1064_MUL_CTL_8bits		0x0
+#define MGA1064_MUL_CTL_15bits		0x01
+#define MGA1064_MUL_CTL_16bits		0x02
+#define MGA1064_MUL_CTL_24bits		0x03
+#define MGA1064_MUL_CTL_32bits		0x04
+#define MGA1064_MUL_CTL_2G8V16bits		0x05
+#define MGA1064_MUL_CTL_G16V16bits		0x06
+#define MGA1064_MUL_CTL_32_24bits		0x07
+
+#define MGAGDAC_XVREFCTRL		0x18
+#define MGA1064_PIX_CLK_CTL		0x1a
+#define MGA1064_PIX_CLK_CTL_CLK_DIS   		( 0x01 << 2 )
+#define MGA1064_PIX_CLK_CTL_CLK_POW_DOWN   	( 0x01 << 3 )
+#define MGA1064_PIX_CLK_CTL_SEL_PCI   		( 0x00 << 0 )
+#define MGA1064_PIX_CLK_CTL_SEL_PLL   		( 0x01 << 0 )
+#define MGA1064_PIX_CLK_CTL_SEL_EXT   		( 0x02 << 0 )
+#define MGA1064_PIX_CLK_CTL_SEL_MSK   		( 0x03 << 0 )
+
+#define MGA1064_GEN_CTL		0x1d
+#define MGA1064_MISC_CTL	0x1e
+#define MGA1064_MISC_CTL_DAC_POW_DN   		( 0x01 << 0 )
+#define MGA1064_MISC_CTL_VGA   		( 0x01 << 1 )
+#define MGA1064_MISC_CTL_DIS_CON   		( 0x03 << 1 )
+#define MGA1064_MISC_CTL_MAFC   		( 0x02 << 1 )
+#define MGA1064_MISC_CTL_VGA8   		( 0x01 << 3 )
+#define MGA1064_MISC_CTL_DAC_RAM_CS   		( 0x01 << 4 )
+
+#define MGA1064_GEN_IO_CTL	0x2a
+#define MGA1064_GEN_IO_DATA	0x2b
+#define MGA1064_SYS_PLL_M	0x2c
+#define MGA1064_SYS_PLL_N	0x2d
+#define MGA1064_SYS_PLL_P	0x2e
+#define MGA1064_SYS_PLL_STAT	0x2f
+#define MGA1064_ZOOM_CTL	0x38
+#define MGA1064_SENSE_TST	0x3a
+
+#define MGA1064_CRC_LSB		0x3c
+#define MGA1064_CRC_MSB		0x3d
+#define MGA1064_CRC_CTL		0x3e
+#define MGA1064_COL_KEY_MSK_LSB		0x40
+#define MGA1064_COL_KEY_MSK_MSB		0x41
+#define MGA1064_COL_KEY_LSB		0x42
+#define MGA1064_COL_KEY_MSB		0x43
+#define MGA1064_PIX_PLLA_M	0x44
+#define MGA1064_PIX_PLLA_N	0x45
+#define MGA1064_PIX_PLLA_P	0x46
+#define MGA1064_PIX_PLLB_M	0x48
+#define MGA1064_PIX_PLLB_N	0x49
+#define MGA1064_PIX_PLLB_P	0x4a
+#define MGA1064_PIX_PLLC_M	0x4c
+#define MGA1064_PIX_PLLC_N	0x4d
+#define MGA1064_PIX_PLLC_P	0x4e
+
+#define MGA1064_PIX_PLL_STAT	0x4f
+
+/*Added for G450 dual head*/
+/* Supported PLL*/
+#define __PIXEL_PLL                 1
+#define __SYSTEM_PLL                2
+#define __VIDEO_PLL                 3
+
+#define MGA1064_VID_PLL_P       0x8D
+#define MGA1064_VID_PLL_M       0x8E
+#define MGA1064_VID_PLL_N       0x8F
+
+#define MGA1064_DISP_CTL        0x8a
+#define MGA1064_SYNC_CTL        0x8b
+#define MGA1064_PWR_CTL         0xa0
+#define MGA1064_PAN_CTL         0xa2
+
+/* Using crtc2 */
+#define MGAREG2_C2CTL            0x10
+#define MGAREG2_C2HPARAM         0x14
+#define MGAREG2_C2HSYNC          0x18
+#define MGAREG2_C2VPARAM         0x1c
+#define MGAREG2_C2VSYNC          0x20
+#define MGAREG2_C2STARTADD0      0x28
+
+#define MGAREG2_C2OFFSET         0x40
+#define MGAREG2_C2DATACTL        0x4c
+
+#define MGAREG_C2CTL            0x3c10
+#define MGAREG_C2HPARAM         0x3c14
+#define MGAREG_C2HSYNC          0x3c18
+#define MGAREG_C2VPARAM         0x3c1c
+#define MGAREG_C2VSYNC          0x3c20
+#define MGAREG_C2STARTADD0      0x3c28
+
+#define MGAREG_C2OFFSET         0x3c40
+#define MGAREG_C2DATACTL        0x3c4c
+
+#define MGA1064_DISP_CTL        0x8a
+#define MGA1064_SYNC_CTL        0x8b
+#define MGA1064_PWR_CTL         0xa0
+
+/* video register */
+
+#define MGAREG_BESA1C3ORG	0x3d60
+#define MGAREG_BESA1CORG	0x3d10
+#define MGAREG_BESA1ORG		0x3d00
+#define MGAREG_BESCTL		0x3d20
+#define MGAREG_BESGLOBCTL	0x3dc0
+#define MGAREG_BESHCOORD	0x3d28
+#define MGAREG_BESHISCAL	0x3d30
+#define MGAREG_BESHSRCEND	0x3d3c
+#define MGAREG_BESHSRCLST	0x3d50
+#define MGAREG_BESHSRCST	0x3d38
+#define MGAREG_BESLUMACTL	0x3d40
+#define MGAREG_BESPITCH		0x3d24
+#define MGAREG_BESV1SRCLST	0x3d54
+#define MGAREG_BESV1WGHT	0x3d48
+#define MGAREG_BESVCOORD	0x3d2c
+#define MGAREG_BESVISCAL	0x3d34
+
+/* texture engine registers */
+
+#define MGAREG_TMR0		0x2c00
+#define MGAREG_TMR1		0x2c04
+#define MGAREG_TMR2		0x2c08
+#define MGAREG_TMR3		0x2c0c
+#define MGAREG_TMR4		0x2c10
+#define MGAREG_TMR5		0x2c14
+#define MGAREG_TMR6		0x2c18
+#define MGAREG_TMR7		0x2c1c
+#define MGAREG_TMR8		0x2c20
+#define MGAREG_TEXORG		0x2c24
+#define MGAREG_TEXWIDTH		0x2c28
+#define MGAREG_TEXHEIGHT	0x2c2c
+#define MGAREG_TEXCTL		0x2c30
+#define MGAREG_TEXCTL2		0x2c3c
+#define MGAREG_TEXTRANS		0x2c34
+#define MGAREG_TEXTRANSHIGH	0x2c38
+#define MGAREG_TEXFILTER	0x2c58
+#define MGAREG_ALPHASTART	0x2c70
+#define MGAREG_ALPHAXINC	0x2c74
+#define MGAREG_ALPHAYINC	0x2c78
+#define MGAREG_ALPHACTRL	0x2c7c
+#define MGAREG_DWGSYNC		0x2c4c
+
+#define MGAREG_AGP_PLL		0x1e4c
+#define MGA_AGP2XPLL_ENABLE		0x1
+#define MGA_AGP2XPLL_DISABLE		0x0
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/Makefile b/src/mesa/drivers/dri/nouveau/Makefile
new file mode 100644
index 0000000000..7be19b26fd
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/Makefile
@@ -0,0 +1,55 @@
+# src/mesa/drivers/dri/nouveau/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += $(shell pkg-config libdrm libdrm_nouveau --cflags)
+DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs)
+
+LIBNAME = nouveau_vieux_dri.so
+
+DRIVER_SOURCES = \
+	nouveau_screen.c \
+	nouveau_context.c \
+	nouveau_fbo.c \
+	nouveau_driver.c \
+	nouveau_state.c \
+	nouveau_bufferobj.c \
+	nouveau_span.c \
+	nouveau_bo_state.c \
+	nouveau_texture.c \
+	nouveau_surface.c \
+	nv04_context.c \
+	nv04_render.c \
+	nv04_state_fb.c \
+	nv04_state_raster.c \
+	nv04_state_tex.c \
+	nv04_state_frag.c \
+	nv04_surface.c \
+	nv10_context.c \
+	nv10_render.c \
+	nv10_state_fb.c \
+	nv10_state_polygon.c \
+	nv10_state_raster.c \
+	nv10_state_tex.c \
+	nv10_state_frag.c \
+	nv10_state_tnl.c \
+	nv20_context.c \
+	nv20_render.c \
+	nv20_state_fb.c \
+	nv20_state_polygon.c \
+	nv20_state_raster.c \
+	nv20_state_tex.c \
+	nv20_state_frag.c \
+	nv20_state_tnl.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+
+include ../Makefile.template
+
+symlinks:
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bo_state.c b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.c
new file mode 100644
index 0000000000..fc5f77b46a
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+
+static GLboolean
+nouveau_bo_marker_emit(GLcontext *ctx, struct nouveau_bo_marker *m,
+		       uint32_t flags)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	uint32_t packet;
+
+	if (m->gr->bound == NOUVEAU_GROBJ_UNBOUND)
+		nouveau_grobj_autobind(m->gr);
+
+	if (MARK_RING(chan, 2, 2))
+		return GL_FALSE;
+
+	packet = (m->gr->subc << 13) | (1 << 18) | m->mthd;
+
+	if (flags) {
+		if (nouveau_pushbuf_emit_reloc(chan, chan->cur++, m->bo,
+					       packet, 0, flags |
+					       (m->flags & (NOUVEAU_BO_VRAM |
+							    NOUVEAU_BO_GART |
+							    NOUVEAU_BO_RDWR)),
+					       0, 0))
+			goto fail;
+	} else {
+		*(chan->cur++) = packet;
+	}
+
+	if (nouveau_pushbuf_emit_reloc(chan, chan->cur++, m->bo, m->data,
+				       m->data2, flags | m->flags,
+				       m->vor, m->tor))
+		goto fail;
+
+	return GL_TRUE;
+
+fail:
+	MARK_UNDO(chan);
+	return GL_FALSE;
+}
+
+static GLboolean
+nouveau_bo_context_grow(struct nouveau_bo_context *bctx)
+{
+	struct nouveau_bo_marker *marker = bctx->marker;
+	int allocated = bctx->allocated + 1;
+
+	marker = realloc(marker, allocated * sizeof(struct nouveau_bo_marker));
+	if (!marker)
+		return GL_FALSE;
+
+	bctx->marker = marker;
+	bctx->allocated = allocated;
+
+	return GL_TRUE;
+}
+
+GLboolean
+nouveau_bo_mark(struct nouveau_bo_context *bctx, struct nouveau_grobj *gr,
+		uint32_t mthd, struct nouveau_bo *bo,
+		uint32_t data, uint32_t data2, uint32_t vor, uint32_t tor,
+		uint32_t flags)
+{
+	struct nouveau_bo_state *s = &to_nouveau_context(bctx->ctx)->bo;
+	struct nouveau_bo_marker *m;
+
+	if (bctx->count == bctx->allocated) {
+		if (!nouveau_bo_context_grow(bctx))
+			goto fail;
+	}
+
+	m = &bctx->marker[bctx->count];
+
+	*m = (struct nouveau_bo_marker) {
+		.gr = gr,
+		.mthd = mthd,
+		.data = data,
+		.data2 = data2,
+		.vor = vor,
+		.tor = tor,
+		.flags = flags,
+	};
+	nouveau_bo_ref(bo, &m->bo);
+
+	s->count++;
+	bctx->count++;
+
+	if (!nouveau_bo_marker_emit(bctx->ctx, m, 0))
+		goto fail;
+
+	return GL_TRUE;
+
+fail:
+	nouveau_bo_context_reset(bctx);
+	return GL_FALSE;
+}
+
+void
+nouveau_bo_context_reset(struct nouveau_bo_context *bctx)
+{
+	struct nouveau_bo_state *s = &to_nouveau_context(bctx->ctx)->bo;
+	int i;
+
+	for (i = 0; i < bctx->count; i++)
+		nouveau_bo_ref(NULL, &bctx->marker[i].bo);
+
+	s->count -= bctx->count;
+	bctx->count = 0;
+}
+
+GLboolean
+nouveau_bo_state_emit(GLcontext *ctx)
+{
+	struct nouveau_bo_state *s = &to_nouveau_context(ctx)->bo;
+	int i, j;
+
+	for (i = 0; i < NUM_NOUVEAU_BO_CONTEXT; i++) {
+		struct nouveau_bo_context *bctx = &s->context[i];
+
+		for (j = 0; j < bctx->count; j++) {
+			if (!nouveau_bo_marker_emit(ctx, &bctx->marker[j],
+						    NOUVEAU_BO_DUMMY))
+				return GL_FALSE;
+		}
+	}
+
+	return GL_TRUE;
+}
+
+void
+nouveau_bo_state_init(GLcontext *ctx)
+{
+	struct nouveau_bo_state *s = &to_nouveau_context(ctx)->bo;
+	int i;
+
+	for (i = 0; i < NUM_NOUVEAU_BO_CONTEXT; i++)
+		s->context[i].ctx = ctx;
+}
+
+void
+nouveau_bo_state_destroy(GLcontext *ctx)
+{
+	struct nouveau_bo_state *s = &to_nouveau_context(ctx)->bo;
+	int i, j;
+
+	for (i = 0; i < NUM_NOUVEAU_BO_CONTEXT; i++) {
+		struct nouveau_bo_context *bctx = &s->context[i];
+
+		for (j = 0; j < bctx->count; j++)
+			nouveau_bo_ref(NULL, &bctx->marker[j].bo);
+
+		if (bctx->marker)
+			free(bctx->marker);
+	}
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bo_state.h b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.h
new file mode 100644
index 0000000000..da0a3a5c6f
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bo_state.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_BO_STATE_H__
+#define __NOUVEAU_BO_STATE_H__
+
+enum {
+	NOUVEAU_BO_CONTEXT_FRAMEBUFFER = 0,
+	NOUVEAU_BO_CONTEXT_LMA_DEPTH,
+	NOUVEAU_BO_CONTEXT_SURFACE,
+	NOUVEAU_BO_CONTEXT_TEXTURE0,
+	NOUVEAU_BO_CONTEXT_TEXTURE1,
+	NOUVEAU_BO_CONTEXT_TEXTURE2,
+	NOUVEAU_BO_CONTEXT_TEXTURE3,
+	NOUVEAU_BO_CONTEXT_VERTEX,
+	NUM_NOUVEAU_BO_CONTEXT
+};
+
+struct nouveau_bo_marker {
+	struct nouveau_grobj *gr;
+	uint32_t mthd;
+
+	struct nouveau_bo *bo;
+	uint32_t data;
+	uint32_t data2;
+	uint32_t vor;
+	uint32_t tor;
+	uint32_t flags;
+};
+
+struct nouveau_bo_context {
+	GLcontext *ctx;
+
+	struct nouveau_bo_marker *marker;
+	int allocated;
+	int count;
+};
+
+struct nouveau_bo_state {
+	struct nouveau_bo_context context[NUM_NOUVEAU_BO_CONTEXT];
+	int count;
+};
+
+GLboolean
+nouveau_bo_mark(struct nouveau_bo_context *bctx, struct nouveau_grobj *gr,
+		uint32_t mthd, struct nouveau_bo *bo,
+		uint32_t data, uint32_t data2, uint32_t vor, uint32_t tor,
+		uint32_t flags);
+
+#define nouveau_bo_markl(bctx, gr, mthd, bo, data, flags)		\
+	nouveau_bo_mark(bctx, gr, mthd, bo, data, 0, 0, 0,		\
+			flags | NOUVEAU_BO_LOW);
+
+#define nouveau_bo_marko(bctx, gr, mthd, bo, flags)			\
+	nouveau_bo_mark(bctx, gr, mthd, bo, 0, 0,			\
+			context_chan(ctx)->vram->handle,		\
+			context_chan(ctx)->gart->handle,		\
+			flags | NOUVEAU_BO_OR);
+
+void
+nouveau_bo_context_reset(struct nouveau_bo_context *bctx);
+
+GLboolean
+nouveau_bo_state_emit(GLcontext *ctx);
+
+void
+nouveau_bo_state_init(GLcontext *ctx);
+
+void
+nouveau_bo_state_destroy(GLcontext *ctx);
+
+#define __context_bctx(ctx, i)						\
+	({								\
+		struct nouveau_context *nctx = to_nouveau_context(ctx); \
+		struct nouveau_bo_context *bctx = &nctx->bo.context[i];	\
+		nouveau_bo_context_reset(bctx);				\
+		bctx;							\
+	})
+#define context_bctx(ctx, s) \
+	__context_bctx(ctx, NOUVEAU_BO_CONTEXT_##s)
+#define context_bctx_i(ctx, s, i) \
+	__context_bctx(ctx, NOUVEAU_BO_CONTEXT_##s##0 + (i))
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
new file mode 100644
index 0000000000..5906ad6d39
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_bufferobj.h"
+#include "nouveau_context.h"
+
+#include "main/bufferobj.h"
+
+static struct gl_buffer_object *
+nouveau_bufferobj_new(GLcontext *ctx, GLuint buffer, GLenum target)
+{
+	struct nouveau_bufferobj *nbo;
+
+	nbo = CALLOC_STRUCT(nouveau_bufferobj);
+	if (!nbo)
+		return NULL;
+
+	_mesa_initialize_buffer_object(&nbo->base, buffer, target);
+
+	return &nbo->base;
+}
+
+static void
+nouveau_bufferobj_del(GLcontext *ctx, struct gl_buffer_object *obj)
+{
+	struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+
+	nouveau_bo_ref(NULL, &nbo->bo);
+	FREE(nbo);
+}
+
+static GLboolean
+nouveau_bufferobj_data(GLcontext *ctx, GLenum target, GLsizeiptrARB size,
+		       const GLvoid *data, GLenum usage,
+		       struct gl_buffer_object *obj)
+{
+	struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+	int ret;
+
+	obj->Size = size;
+	obj->Usage = usage;
+
+	nouveau_bo_ref(NULL, &nbo->bo);
+	ret = nouveau_bo_new(context_dev(ctx),
+			     NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+			     size, &nbo->bo);
+	assert(!ret);
+
+	if (data) {
+		nouveau_bo_map(nbo->bo, NOUVEAU_BO_WR);
+		memcpy(nbo->bo->map, data, size);
+		nouveau_bo_unmap(nbo->bo);
+	}
+
+	return GL_TRUE;
+}
+
+static void
+nouveau_bufferobj_subdata(GLcontext *ctx, GLenum target, GLintptrARB offset,
+			  GLsizeiptrARB size, const GLvoid *data,
+			  struct gl_buffer_object *obj)
+{
+	struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+
+	nouveau_bo_map(nbo->bo, NOUVEAU_BO_WR);
+	memcpy(nbo->bo->map + offset, data, size);
+	nouveau_bo_unmap(nbo->bo);
+}
+
+static void
+nouveau_bufferobj_get_subdata(GLcontext *ctx, GLenum target, GLintptrARB offset,
+			   GLsizeiptrARB size, GLvoid *data,
+			   struct gl_buffer_object *obj)
+{
+	struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+
+	nouveau_bo_map(nbo->bo, NOUVEAU_BO_RD);
+	memcpy(data, nbo->bo->map + offset, size);
+	nouveau_bo_unmap(nbo->bo);
+}
+
+static void *
+nouveau_bufferobj_map(GLcontext *ctx, GLenum target, GLenum access,
+		   struct gl_buffer_object *obj)
+{
+	return ctx->Driver.MapBufferRange(ctx, target, 0, obj->Size, access,
+					  obj);
+}
+
+static void *
+nouveau_bufferobj_map_range(GLcontext *ctx, GLenum target, GLintptr offset,
+			    GLsizeiptr length, GLenum access,
+			    struct gl_buffer_object *obj)
+{
+	struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+	uint32_t flags = 0;
+
+	assert(!obj->Pointer);
+
+	if (!nbo->bo)
+		return NULL;
+
+	if (access == GL_READ_ONLY_ARB ||
+	    access == GL_READ_WRITE_ARB)
+		flags |= NOUVEAU_BO_RD;
+	if (access == GL_WRITE_ONLY_ARB ||
+	    access == GL_READ_WRITE_ARB)
+		flags |= NOUVEAU_BO_WR;
+
+	nouveau_bo_map_range(nbo->bo, offset, length, flags);
+
+	obj->Pointer = nbo->bo->map;
+	obj->Offset = offset;
+	obj->Length = length;
+	obj->AccessFlags = access;
+
+	return obj->Pointer;
+}
+
+static GLboolean
+nouveau_bufferobj_unmap(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj)
+{
+	struct nouveau_bufferobj *nbo = to_nouveau_bufferobj(obj);
+
+	assert(obj->Pointer);
+
+	nouveau_bo_unmap(nbo->bo);
+
+	obj->Pointer = NULL;
+	obj->Offset = 0;
+	obj->Length = 0;
+	obj->AccessFlags = 0;
+
+	return GL_TRUE;
+}
+
+void
+nouveau_bufferobj_functions_init(struct dd_function_table *functions)
+{
+	functions->NewBufferObject = nouveau_bufferobj_new;
+	functions->DeleteBuffer	= nouveau_bufferobj_del;
+	functions->BufferData = nouveau_bufferobj_data;
+	functions->BufferSubData = nouveau_bufferobj_subdata;
+	functions->GetBufferSubData = nouveau_bufferobj_get_subdata;
+	functions->MapBuffer = nouveau_bufferobj_map;
+	functions->MapBufferRange = nouveau_bufferobj_map_range;
+	functions->UnmapBuffer = nouveau_bufferobj_unmap;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h
new file mode 100644
index 0000000000..acfc4cb9a9
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_bufferobj.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_BUFFEROBJ_H__
+#define __NOUVEAU_BUFFEROBJ_H__
+
+struct nouveau_bufferobj {
+	struct gl_buffer_object base;
+	struct nouveau_bo *bo;
+};
+#define to_nouveau_bufferobj(x) ((struct nouveau_bufferobj *)(x))
+
+void
+nouveau_bufferobj_functions_init(struct dd_function_table *functions);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_class.h b/src/mesa/drivers/dri/nouveau/nouveau_class.h
new file mode 120000
index 0000000000..5eff4af4d5
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_class.h
@@ -0,0 +1 @@
+../../../../gallium/drivers/nouveau/nouveau_class.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c
new file mode 100644
index 0000000000..f481161d46
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_bufferobj.h"
+#include "nouveau_fbo.h"
+
+#include "main/dd.h"
+#include "main/framebuffer.h"
+#include "main/light.h"
+#include "main/state.h"
+#include "drivers/common/meta.h"
+#include "drivers/common/driverfuncs.h"
+#include "swrast/swrast.h"
+#include "swrast/s_context.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+
+#include "main/remap_helper.h"
+
+static const struct dri_extension nouveau_extensions[] = {
+	{ "GL_ARB_multitexture",	NULL },
+	{ "GL_ARB_texture_env_add",	NULL },
+	{ "GL_ARB_texture_env_combine",	NULL },
+	{ "GL_ARB_texture_env_dot3",	NULL },
+	{ "GL_ARB_texture_mirrored_repeat", NULL },
+	{ "GL_EXT_fog_coord",		GL_EXT_fog_coord_functions },
+	{ "GL_EXT_framebuffer_blit",	NULL },
+	{ "GL_EXT_framebuffer_object",	GL_EXT_framebuffer_object_functions },
+	{ "GL_EXT_secondary_color",	GL_EXT_secondary_color_functions },
+	{ "GL_EXT_stencil_wrap",	NULL },
+	{ "GL_EXT_texture_lod_bias",	NULL },
+	{ "GL_NV_blend_square",         NULL },
+	{ "GL_SGIS_generate_mipmap",	NULL },
+	{ NULL,				NULL }
+};
+
+static void
+nouveau_channel_flush_notify(struct nouveau_channel *chan)
+{
+	struct nouveau_context *nctx = chan->user_private;
+	GLcontext *ctx = &nctx->base;
+
+	if (nctx->fallback < SWRAST)
+		nouveau_bo_state_emit(ctx);
+}
+
+GLboolean
+nouveau_context_create(gl_api api,
+		       const __GLcontextModes *visual, __DRIcontext *dri_ctx,
+		       void *share_ctx)
+{
+	__DRIscreen *dri_screen = dri_ctx->driScreenPriv;
+	struct nouveau_screen *screen = dri_screen->private;
+	struct nouveau_context *nctx;
+	GLcontext *ctx;
+
+	ctx = screen->driver->context_create(screen, visual, share_ctx);
+	if (!ctx)
+		return GL_FALSE;
+
+	nctx = to_nouveau_context(ctx);
+	nctx->dri_context = dri_ctx;
+	dri_ctx->driverPrivate = ctx;
+
+	return GL_TRUE;
+}
+
+GLboolean
+nouveau_context_init(GLcontext *ctx, struct nouveau_screen *screen,
+		     const GLvisual *visual, GLcontext *share_ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct dd_function_table functions;
+	int ret;
+
+	nctx->screen = screen;
+	nctx->fallback = HWTNL;
+
+	/* Initialize the function pointers. */
+	_mesa_init_driver_functions(&functions);
+	nouveau_driver_functions_init(&functions);
+	nouveau_bufferobj_functions_init(&functions);
+	nouveau_texture_functions_init(&functions);
+	nouveau_fbo_functions_init(&functions);
+
+	/* Initialize the mesa context. */
+	_mesa_initialize_context(ctx, visual, share_ctx, &functions, NULL);
+
+	nouveau_state_init(ctx);
+	nouveau_bo_state_init(ctx);
+	_mesa_meta_init(ctx);
+	_swrast_CreateContext(ctx);
+	_vbo_CreateContext(ctx);
+	_tnl_CreateContext(ctx);
+	nouveau_span_functions_init(ctx);
+	_mesa_allow_light_in_model(ctx, GL_FALSE);
+
+	/* Allocate a hardware channel. */
+	ret = nouveau_channel_alloc(context_dev(ctx), 0xbeef0201, 0xbeef0202,
+				    &nctx->hw.chan);
+	if (ret) {
+		nouveau_error("Error initializing the FIFO.\n");
+		return GL_FALSE;
+	}
+
+	nctx->hw.chan->flush_notify = nouveau_channel_flush_notify;
+	nctx->hw.chan->user_private = nctx;
+
+	/* Enable any supported extensions. */
+	driInitExtensions(ctx, nouveau_extensions, GL_TRUE);
+
+	return GL_TRUE;
+}
+
+void
+nouveau_context_deinit(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	if (TNL_CONTEXT(ctx))
+		_tnl_DestroyContext(ctx);
+
+	if (vbo_context(ctx))
+		_vbo_DestroyContext(ctx);
+
+	if (SWRAST_CONTEXT(ctx))
+		_swrast_DestroyContext(ctx);
+
+	if (ctx->Meta)
+		_mesa_meta_free(ctx);
+
+	if (nctx->hw.chan)
+		nouveau_channel_free(&nctx->hw.chan);
+
+	nouveau_bo_state_destroy(ctx);
+	_mesa_free_context_data(ctx);
+}
+
+void
+nouveau_context_destroy(__DRIcontext *dri_ctx)
+{
+	struct nouveau_context *nctx = dri_ctx->driverPrivate;
+	GLcontext *ctx = &nctx->base;
+
+	context_drv(ctx)->context_destroy(ctx);
+}
+
+void
+nouveau_update_renderbuffers(__DRIcontext *dri_ctx, __DRIdrawable *draw)
+{
+	GLcontext *ctx = dri_ctx->driverPrivate;
+	__DRIscreen *screen = dri_ctx->driScreenPriv;
+	struct gl_framebuffer *fb = draw->driverPrivate;
+	unsigned int attachments[10];
+	__DRIbuffer *buffers = NULL;
+	int i = 0, count, ret;
+
+	if (draw->lastStamp == *draw->pStamp)
+		return;
+	draw->lastStamp = *draw->pStamp;
+
+	attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+	if (fb->Visual.doubleBufferMode)
+		attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+	if (fb->Visual.haveDepthBuffer && fb->Visual.haveStencilBuffer)
+		attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
+	else if (fb->Visual.haveDepthBuffer)
+		attachments[i++] = __DRI_BUFFER_DEPTH;
+	else if (fb->Visual.haveStencilBuffer)
+		attachments[i++] = __DRI_BUFFER_STENCIL;
+
+	buffers = (*screen->dri2.loader->getBuffers)(draw, &draw->w, &draw->h,
+						     attachments, i, &count,
+						     draw->loaderPrivate);
+	if (buffers == NULL)
+		return;
+
+	for (i = 0; i < count; i++) {
+		struct gl_renderbuffer *rb;
+		struct nouveau_surface *s;
+		uint32_t old_handle;
+		int index;
+
+		switch (buffers[i].attachment) {
+		case __DRI_BUFFER_FRONT_LEFT:
+		case __DRI_BUFFER_FAKE_FRONT_LEFT:
+			index = BUFFER_FRONT_LEFT;
+			break;
+		case __DRI_BUFFER_BACK_LEFT:
+			index = BUFFER_BACK_LEFT;
+			break;
+		case __DRI_BUFFER_DEPTH:
+		case __DRI_BUFFER_DEPTH_STENCIL:
+			index = BUFFER_DEPTH;
+			break;
+		case __DRI_BUFFER_STENCIL:
+			index = BUFFER_STENCIL;
+			break;
+		default:
+			assert(0);
+		}
+
+		rb = fb->Attachment[index].Renderbuffer;
+		s = &to_nouveau_renderbuffer(rb)->surface;
+
+		s->width = draw->w;
+		s->height = draw->h;
+		s->pitch = buffers[i].pitch;
+		s->cpp = buffers[i].cpp;
+
+		/* Don't bother to reopen the bo if it happens to be
+		 * the same. */
+		if (s->bo) {
+			ret = nouveau_bo_handle_get(s->bo, &old_handle);
+			assert(!ret);
+		}
+
+		if (!s->bo || old_handle != buffers[i].name) {
+			nouveau_bo_ref(NULL, &s->bo);
+			ret = nouveau_bo_handle_ref(context_dev(ctx),
+						    buffers[i].name, &s->bo);
+			assert(!ret);
+		}
+	}
+
+	_mesa_resize_framebuffer(NULL, fb, draw->w, draw->h);
+}
+
+static void
+update_framebuffer(__DRIcontext *dri_ctx, __DRIdrawable *draw,
+		   int *stamp)
+{
+	GLcontext *ctx = dri_ctx->driverPrivate;
+	struct gl_framebuffer *fb = draw->driverPrivate;
+
+	*stamp = *draw->pStamp;
+
+	nouveau_update_renderbuffers(dri_ctx, draw);
+	_mesa_resize_framebuffer(ctx, fb, draw->w, draw->h);
+
+	context_dirty(ctx, FRAMEBUFFER);
+}
+
+GLboolean
+nouveau_context_make_current(__DRIcontext *dri_ctx, __DRIdrawable *dri_draw,
+			     __DRIdrawable *dri_read)
+{
+	if (dri_ctx) {
+		struct nouveau_context *nctx = dri_ctx->driverPrivate;
+		GLcontext *ctx = &nctx->base;
+
+		/* Ask the X server for new renderbuffers. */
+		if (dri_draw->driverPrivate != ctx->WinSysDrawBuffer)
+			update_framebuffer(dri_ctx, dri_draw,
+					   &dri_ctx->dri2.draw_stamp);
+
+		if (dri_draw != dri_read &&
+		    dri_read->driverPrivate != ctx->WinSysReadBuffer)
+			update_framebuffer(dri_ctx, dri_read,
+					   &dri_ctx->dri2.read_stamp);
+
+		/* Pass it down to mesa. */
+		_mesa_make_current(ctx, dri_draw->driverPrivate,
+				   dri_read->driverPrivate);
+		_mesa_update_state(ctx);
+
+		FIRE_RING(context_chan(ctx));
+
+	} else {
+		_mesa_make_current(NULL, NULL, NULL);
+	}
+
+	return GL_TRUE;
+}
+
+GLboolean
+nouveau_context_unbind(__DRIcontext *dri_ctx)
+{
+	return GL_TRUE;
+}
+
+void
+nouveau_fallback(GLcontext *ctx, enum nouveau_fallback mode)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	nctx->fallback = MAX2(HWTNL, mode);
+
+	if (mode < SWRAST)
+		nouveau_state_emit(ctx);
+	else
+		FIRE_RING(context_chan(ctx));
+}
+
+void
+nouveau_validate_framebuffer(GLcontext *ctx)
+{
+	__DRIcontext *dri_ctx = to_nouveau_context(ctx)->dri_context;
+	__DRIdrawable *dri_draw = dri_ctx->driDrawablePriv;
+	__DRIdrawable *dri_read = dri_ctx->driReadablePriv;
+
+	if (ctx->DrawBuffer->Name == 0 &&
+	    dri_ctx->dri2.draw_stamp != *dri_draw->pStamp)
+		update_framebuffer(dri_ctx, dri_draw,
+				   &dri_ctx->dri2.draw_stamp);
+
+	if (ctx->ReadBuffer->Name == 0 && dri_draw != dri_read &&
+	    dri_ctx->dri2.read_stamp != *dri_read->pStamp)
+		update_framebuffer(dri_ctx, dri_read,
+				   &dri_ctx->dri2.read_stamp);
+
+	if (nouveau_next_dirty_state(ctx) >= 0) {
+		nouveau_state_emit(ctx);
+		FIRE_RING(context_chan(ctx));
+	}
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.h b/src/mesa/drivers/dri/nouveau/nouveau_context.h
new file mode 100644
index 0000000000..3dbe72900a
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_context.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_CONTEXT_H__
+#define __NOUVEAU_CONTEXT_H__
+
+#include "nouveau_screen.h"
+#include "nouveau_state.h"
+#include "nouveau_bo_state.h"
+#include "nouveau_render.h"
+
+#include "main/bitset.h"
+
+enum nouveau_fallback {
+	HWTNL = 0,
+	SWTNL,
+	SWRAST,
+};
+
+struct nouveau_hw_state {
+	struct nouveau_channel *chan;
+
+	struct nouveau_notifier *ntfy;
+	struct nouveau_grobj *eng3d;
+	struct nouveau_grobj *eng3dm;
+	struct nouveau_grobj *surf3d;
+	struct nouveau_grobj *m2mf;
+	struct nouveau_grobj *surf2d;
+	struct nouveau_grobj *rop;
+	struct nouveau_grobj *patt;
+	struct nouveau_grobj *rect;
+	struct nouveau_grobj *swzsurf;
+	struct nouveau_grobj *sifm;
+};
+
+struct nouveau_context {
+	GLcontext base;
+	__DRIcontext *dri_context;
+	struct nouveau_screen *screen;
+
+	BITSET_DECLARE(dirty, MAX_NOUVEAU_STATE);
+	enum nouveau_fallback fallback;
+
+	struct nouveau_hw_state hw;
+	struct nouveau_bo_state bo;
+	struct nouveau_render_state render;
+};
+
+#define to_nouveau_context(ctx)	((struct nouveau_context *)(ctx))
+
+#define context_dev(ctx) \
+	(to_nouveau_context(ctx)->screen->device)
+#define context_chipset(ctx) \
+	(context_dev(ctx)->chipset)
+#define context_chan(ctx) \
+	(to_nouveau_context(ctx)->hw.chan)
+#define context_eng3d(ctx) \
+	(to_nouveau_context(ctx)->hw.eng3d)
+#define context_drv(ctx) \
+	(to_nouveau_context(ctx)->screen->driver)
+#define context_dirty(ctx, s) \
+	BITSET_SET(to_nouveau_context(ctx)->dirty, NOUVEAU_STATE_##s)
+#define context_dirty_i(ctx, s, i) \
+	BITSET_SET(to_nouveau_context(ctx)->dirty, NOUVEAU_STATE_##s##0 + i)
+#define context_emit(ctx, s) \
+	context_drv(ctx)->emit[NOUVEAU_STATE_##s](ctx, NOUVEAU_STATE_##s)
+
+GLboolean
+nouveau_context_create(gl_api api,
+		       const __GLcontextModes *visual, __DRIcontext *dri_ctx,
+		       void *share_ctx);
+
+GLboolean
+nouveau_context_init(GLcontext *ctx, struct nouveau_screen *screen,
+		     const GLvisual *visual, GLcontext *share_ctx);
+
+void
+nouveau_context_deinit(GLcontext *ctx);
+
+void
+nouveau_context_destroy(__DRIcontext *dri_ctx);
+
+void
+nouveau_update_renderbuffers(__DRIcontext *dri_ctx, __DRIdrawable *draw);
+
+GLboolean
+nouveau_context_make_current(__DRIcontext *dri_ctx, __DRIdrawable *ddraw,
+			     __DRIdrawable *rdraw);
+
+GLboolean
+nouveau_context_unbind(__DRIcontext *dri_ctx);
+
+void
+nouveau_fallback(GLcontext *ctx, enum nouveau_fallback mode);
+
+void
+nouveau_validate_framebuffer(GLcontext *ctx);
+
+#endif
+
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
new file mode 100644
index 0000000000..4ec864c181
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+
+#include "drivers/common/meta.h"
+
+static const GLubyte *
+nouveau_get_string(GLcontext *ctx, GLenum name)
+{
+	static char buffer[128];
+	char hardware_name[32];
+
+	switch (name) {
+		case GL_VENDOR:
+			return (GLubyte *)"Nouveau";
+
+		case GL_RENDERER:
+			sprintf(hardware_name, "nv%02X", context_chipset(ctx));
+			driGetRendererString(buffer, hardware_name, DRIVER_DATE, 0);
+
+			return (GLubyte *)buffer;
+		default:
+			return NULL;
+	}
+}
+
+static void
+nouveau_flush(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+
+	FIRE_RING(chan);
+
+	if (ctx->DrawBuffer->Name == 0 &&
+	    ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+		__DRIscreen *screen = nctx->screen->dri_screen;
+		__DRIdri2LoaderExtension *dri2 = screen->dri2.loader;
+		__DRIdrawable *drawable = nctx->dri_context->driDrawablePriv;
+
+		dri2->flushFrontBuffer(drawable, drawable->loaderPrivate);
+	}
+}
+
+static void
+nouveau_finish(GLcontext *ctx)
+{
+	nouveau_flush(ctx);
+}
+
+void
+nouveau_clear(GLcontext *ctx, GLbitfield buffers)
+{
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	int x, y, w, h;
+	int i, buf;
+
+	nouveau_validate_framebuffer(ctx);
+	get_scissors(fb, &x, &y, &w, &h);
+
+	for (i = 0; i < BUFFER_COUNT; i++) {
+		struct nouveau_surface *s;
+		unsigned mask, value;
+
+		buf = buffers & (1 << i);
+		if (!buf)
+			continue;
+
+		s = &to_nouveau_renderbuffer(
+			fb->Attachment[i].Renderbuffer->Wrapped)->surface;
+
+		if (buf & BUFFER_BITS_COLOR) {
+			mask = pack_rgba_i(s->format, ctx->Color.ColorMask[0]);
+			value = pack_rgba_f(s->format, ctx->Color.ClearColor);
+
+			if (mask)
+				context_drv(ctx)->surface_fill(
+					ctx, s, mask, value, x, y, w, h);
+
+			buffers &= ~buf;
+
+		} else if (buf & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
+			mask = pack_zs_i(s->format,
+					 (buffers & BUFFER_BIT_DEPTH &&
+					  ctx->Depth.Mask) ? ~0 : 0,
+					 (buffers & BUFFER_BIT_STENCIL ?
+					  ctx->Stencil.WriteMask[0] : 0));
+			value = pack_zs_f(s->format,
+					  ctx->Depth.Clear,
+					  ctx->Stencil.Clear);
+
+			if (mask)
+				context_drv(ctx)->surface_fill(
+					ctx, s, mask, value, x, y, w, h);
+
+			buffers &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+		}
+	}
+
+	if (buffers)
+		_mesa_meta_Clear(ctx, buffers);
+}
+
+void
+nouveau_driver_functions_init(struct dd_function_table *functions)
+{
+	functions->GetString = nouveau_get_string;
+	functions->Flush = nouveau_flush;
+	functions->Finish = nouveau_finish;
+	functions->Clear = nouveau_clear;
+	functions->DrawPixels = _mesa_meta_DrawPixels;
+	functions->CopyPixels = _mesa_meta_CopyPixels;
+	functions->Bitmap = _mesa_meta_Bitmap;
+	functions->BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h
new file mode 100644
index 0000000000..283f6eac2c
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_DRIVER_H__
+#define __NOUVEAU_DRIVER_H__
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/formats.h"
+#include "utils.h"
+#include "dri_util.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#include "nouveau_device.h"
+#include "nouveau_pushbuf.h"
+#include "nouveau_grobj.h"
+#include "nouveau_channel.h"
+#include "nouveau_bo.h"
+#include "nouveau_notifier.h"
+#include "nouveau_screen.h"
+#include "nouveau_state.h"
+#include "nouveau_surface.h"
+
+#define DRIVER_DATE	"20091015"
+#define DRIVER_AUTHOR	"Nouveau"
+
+struct nouveau_driver {
+	GLcontext *(*context_create)(struct nouveau_screen *screen,
+				     const GLvisual *visual,
+				     GLcontext *share_ctx);
+	void (*context_destroy)(GLcontext *ctx);
+
+	void (*surface_copy)(GLcontext *ctx,
+			     struct nouveau_surface *dst,
+			     struct nouveau_surface *src,
+			     int dx, int dy, int sx, int sy, int w, int h);
+	void (*surface_fill)(GLcontext *ctx,
+			     struct nouveau_surface *dst,
+			     unsigned mask, unsigned value,
+			     int dx, int dy, int w, int h);
+
+	nouveau_state_func *emit;
+	int num_emit;
+};
+
+#define nouveau_error(format, ...) \
+	fprintf(stderr, "%s: " format, __func__, ## __VA_ARGS__)
+
+void
+nouveau_clear(GLcontext *ctx, GLbitfield buffers);
+
+void
+nouveau_span_functions_init(GLcontext *ctx);
+
+void
+nouveau_driver_functions_init(struct dd_function_table *functions);
+
+void
+nouveau_texture_functions_init(struct dd_function_table *functions);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
new file mode 100644
index 0000000000..8be7edb150
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_fbo.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/fbobject.h"
+
+static GLboolean
+set_renderbuffer_format(struct gl_renderbuffer *rb, GLenum internalFormat)
+{
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+
+	rb->InternalFormat  = internalFormat;
+
+	switch (internalFormat) {
+	case GL_RGB:
+	case GL_RGB8:
+		rb->_BaseFormat  = GL_RGB;
+		rb->Format = MESA_FORMAT_XRGB8888;
+		rb->DataType = GL_UNSIGNED_BYTE;
+		s->cpp = 4;
+		break;
+	case GL_RGBA:
+	case GL_RGBA8:
+		rb->_BaseFormat  = GL_RGBA;
+		rb->Format = MESA_FORMAT_ARGB8888;
+		rb->DataType = GL_UNSIGNED_BYTE;
+		s->cpp = 4;
+		break;
+	case GL_RGB5:
+		rb->_BaseFormat  = GL_RGB;
+		rb->Format = MESA_FORMAT_RGB565;
+		rb->DataType = GL_UNSIGNED_BYTE;
+		s->cpp = 2;
+		break;
+	case GL_DEPTH_COMPONENT16:
+		rb->_BaseFormat  = GL_DEPTH_COMPONENT;
+		rb->Format = MESA_FORMAT_Z16;
+		rb->DataType = GL_UNSIGNED_SHORT;
+		s->cpp = 2;
+		break;
+	case GL_DEPTH_COMPONENT24:
+	case GL_STENCIL_INDEX8_EXT:
+	case GL_DEPTH24_STENCIL8_EXT:
+		rb->_BaseFormat  = GL_DEPTH_STENCIL;
+		rb->Format = MESA_FORMAT_Z24_S8;
+		rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+		s->cpp = 4;
+		break;
+	default:
+		return GL_FALSE;
+	}
+
+	s->format = rb->Format;
+
+	return GL_TRUE;
+}
+
+static GLboolean
+nouveau_renderbuffer_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+			     GLenum internalFormat,
+			     GLuint width, GLuint height)
+{
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+
+	if (!set_renderbuffer_format(rb, internalFormat))
+		return GL_FALSE;
+
+	rb->Width = width;
+	rb->Height = height;
+
+	nouveau_surface_alloc(ctx, s, TILED, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP,
+			      rb->Format, width, height);
+
+	context_dirty(ctx, FRAMEBUFFER);
+	return GL_TRUE;
+}
+
+static void
+nouveau_renderbuffer_del(struct gl_renderbuffer *rb)
+{
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+
+	nouveau_surface_ref(NULL, s);
+	FREE(rb);
+}
+
+static struct gl_renderbuffer *
+nouveau_renderbuffer_new(GLcontext *ctx, GLuint name)
+{
+	struct gl_renderbuffer *rb;
+
+	rb = (struct gl_renderbuffer *)
+		CALLOC_STRUCT(nouveau_renderbuffer);
+	if (!rb)
+		return NULL;
+
+	_mesa_init_renderbuffer(rb, name);
+
+	rb->AllocStorage = nouveau_renderbuffer_storage;
+	rb->Delete = nouveau_renderbuffer_del;
+
+	return rb;
+}
+
+static GLboolean
+nouveau_renderbuffer_dri_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+				 GLenum internalFormat,
+				 GLuint width, GLuint height)
+{
+	if (!set_renderbuffer_format(rb, internalFormat))
+		return GL_FALSE;
+
+	rb->Width = width;
+	rb->Height = height;
+
+	return GL_TRUE;
+}
+
+struct gl_renderbuffer *
+nouveau_renderbuffer_dri_new(GLenum format, __DRIdrawable *drawable)
+{
+	struct gl_renderbuffer *rb;
+
+	rb = nouveau_renderbuffer_new(NULL, 0);
+	if (!rb)
+		return NULL;
+
+	rb->AllocStorage = nouveau_renderbuffer_dri_storage;
+
+	if (!set_renderbuffer_format(rb, format)) {
+		nouveau_renderbuffer_del(rb);
+		return NULL;
+	}
+
+	return rb;
+}
+
+static struct gl_framebuffer *
+nouveau_framebuffer_new(GLcontext *ctx, GLuint name)
+{
+	struct nouveau_framebuffer *nfb;
+
+	nfb = CALLOC_STRUCT(nouveau_framebuffer);
+	if (!nfb)
+		return NULL;
+
+	_mesa_initialize_user_framebuffer(&nfb->base, name);
+
+	return &nfb->base;
+}
+
+struct gl_framebuffer *
+nouveau_framebuffer_dri_new(const GLvisual *visual)
+{
+	struct nouveau_framebuffer *nfb;
+
+	nfb = CALLOC_STRUCT(nouveau_framebuffer);
+	if (!nfb)
+		return NULL;
+
+	_mesa_initialize_window_framebuffer(&nfb->base, visual);
+
+	return &nfb->base;
+}
+
+static void
+nouveau_bind_framebuffer(GLcontext *ctx, GLenum target,
+			 struct gl_framebuffer *dfb,
+			 struct gl_framebuffer *rfb)
+{
+	context_dirty(ctx, FRAMEBUFFER);
+}
+
+static void
+nouveau_framebuffer_renderbuffer(GLcontext *ctx, struct gl_framebuffer *fb,
+				 GLenum attachment, struct gl_renderbuffer *rb)
+{
+	_mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+
+	context_dirty(ctx, FRAMEBUFFER);
+}
+
+static GLenum
+get_tex_format(struct gl_texture_image *ti)
+{
+	switch (ti->TexFormat) {
+	case MESA_FORMAT_ARGB8888:
+		return GL_RGBA8;
+	case MESA_FORMAT_XRGB8888:
+		return GL_RGB8;
+	case MESA_FORMAT_RGB565:
+		return GL_RGB5;
+	default:
+		assert(0);
+	}
+}
+
+static void
+nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb,
+		       struct gl_renderbuffer_attachment *att)
+{
+	struct gl_renderbuffer *rb = att->Renderbuffer;
+	struct gl_texture_image *ti =
+		att->Texture->Image[att->CubeMapFace][att->TextureLevel];
+	int ret;
+
+	/* Allocate a renderbuffer object for the texture if we
+	 * haven't already done so. */
+	if (!rb) {
+		rb = nouveau_renderbuffer_new(ctx, ~0);
+		assert(rb);
+
+		rb->AllocStorage = NULL;
+		_mesa_reference_renderbuffer(&att->Renderbuffer, rb);
+	}
+
+	/* Update the renderbuffer fields from the texture. */
+	ret = set_renderbuffer_format(rb, get_tex_format(ti));
+	assert(ret);
+
+	rb->Width = ti->Width;
+	rb->Height = ti->Height;
+	nouveau_surface_ref(&to_nouveau_teximage(ti)->surface,
+			    &to_nouveau_renderbuffer(rb)->surface);
+
+	context_dirty(ctx, FRAMEBUFFER);
+}
+
+static void
+nouveau_finish_render_texture(GLcontext *ctx,
+			      struct gl_renderbuffer_attachment *att)
+{
+	texture_dirty(att->Texture);
+}
+
+void
+nouveau_fbo_functions_init(struct dd_function_table *functions)
+{
+	functions->NewFramebuffer = nouveau_framebuffer_new;
+	functions->NewRenderbuffer = nouveau_renderbuffer_new;
+	functions->BindFramebuffer = nouveau_bind_framebuffer;
+	functions->FramebufferRenderbuffer = nouveau_framebuffer_renderbuffer;
+	functions->RenderTexture = nouveau_render_texture;
+	functions->FinishRenderTexture = nouveau_finish_render_texture;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.h b/src/mesa/drivers/dri/nouveau/nouveau_fbo.h
new file mode 100644
index 0000000000..5ae984bbff
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_FBO_H__
+#define __NOUVEAU_FBO_H__
+
+struct nouveau_framebuffer {
+	struct gl_framebuffer base;
+	struct nouveau_bo *lma_bo;
+};
+#define to_nouveau_framebuffer(x) ((struct nouveau_framebuffer *)(x))
+
+struct nouveau_renderbuffer {
+	struct gl_renderbuffer base;
+	struct nouveau_surface surface;
+};
+#define to_nouveau_renderbuffer(x) ((struct nouveau_renderbuffer *)(x))
+
+struct gl_framebuffer *
+nouveau_framebuffer_dri_new(const GLvisual *visual);
+
+struct gl_renderbuffer *
+nouveau_renderbuffer_dri_new(GLenum format, __DRIdrawable *drawable);
+
+void
+nouveau_fbo_functions_init(struct dd_function_table *functions);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_gldefs.h b/src/mesa/drivers/dri/nouveau/nouveau_gldefs.h
new file mode 100644
index 0000000000..fbeed3baea
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_gldefs.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2007-2010 The Nouveau Project.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_GLDEFS_H__
+#define __NOUVEAU_GLDEFS_H__
+
+static inline unsigned
+nvgl_blend_func(unsigned func)
+{
+	switch (func) {
+	case GL_ZERO:
+		return 0x0000;
+	case GL_ONE:
+		return 0x0001;
+	case GL_SRC_COLOR:
+		return 0x0300;
+	case GL_ONE_MINUS_SRC_COLOR:
+		return 0x0301;
+	case GL_SRC_ALPHA:
+		return 0x0302;
+	case GL_ONE_MINUS_SRC_ALPHA:
+		return 0x0303;
+	case GL_DST_ALPHA:
+		return 0x0304;
+	case GL_ONE_MINUS_DST_ALPHA:
+		return 0x0305;
+	case GL_DST_COLOR:
+		return 0x0306;
+	case GL_ONE_MINUS_DST_COLOR:
+		return 0x0307;
+	case GL_SRC_ALPHA_SATURATE:
+		return 0x0308;
+	case GL_CONSTANT_COLOR:
+		return 0x8001;
+	case GL_ONE_MINUS_CONSTANT_COLOR:
+		return 0x8002;
+	case GL_CONSTANT_ALPHA:
+		return 0x8003;
+	case GL_ONE_MINUS_CONSTANT_ALPHA:
+		return 0x8004;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_blend_eqn(unsigned eqn)
+{
+	switch (eqn) {
+	case GL_FUNC_ADD:
+		return 0x8006;
+	case GL_MIN:
+		return 0x8007;
+	case GL_MAX:
+		return 0x8008;
+	case GL_FUNC_SUBTRACT:
+		return 0x800a;
+	case GL_FUNC_REVERSE_SUBTRACT:
+		return 0x800b;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_logicop_func(unsigned func)
+{
+	switch (func) {
+	case GL_CLEAR:
+		return 0x1500;
+	case GL_NOR:
+		return 0x1508;
+	case GL_AND_INVERTED:
+		return 0x1504;
+	case GL_COPY_INVERTED:
+		return 0x150c;
+	case GL_AND_REVERSE:
+		return 0x1502;
+	case GL_INVERT:
+		return 0x150a;
+	case GL_XOR:
+		return 0x1506;
+	case GL_NAND:
+		return 0x150e;
+	case GL_AND:
+		return 0x1501;
+	case GL_EQUIV:
+		return 0x1509;
+	case GL_NOOP:
+		return 0x1505;
+	case GL_OR_INVERTED:
+		return 0x150d;
+	case GL_COPY:
+		return 0x1503;
+	case GL_OR_REVERSE:
+		return 0x150b;
+	case GL_OR:
+		return 0x1507;
+	case GL_SET:
+		return 0x150f;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_comparison_op(unsigned op)
+{
+	switch (op) {
+	case GL_NEVER:
+		return 0x0200;
+	case GL_LESS:
+		return 0x0201;
+	case GL_EQUAL:
+		return 0x0202;
+	case GL_LEQUAL:
+		return 0x0203;
+	case GL_GREATER:
+		return 0x0204;
+	case GL_NOTEQUAL:
+		return 0x0205;
+	case GL_GEQUAL:
+		return 0x0206;
+	case GL_ALWAYS:
+		return 0x0207;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_polygon_mode(unsigned mode)
+{
+	switch (mode) {
+	case GL_POINT:
+		return 0x1b00;
+	case GL_LINE:
+		return 0x1b01;
+	case GL_FILL:
+		return 0x1b02;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_stencil_op(unsigned op)
+{
+	switch (op) {
+	case GL_ZERO:
+		return 0x0000;
+	case GL_INVERT:
+		return 0x150a;
+	case GL_KEEP:
+		return 0x1e00;
+	case GL_REPLACE:
+		return 0x1e01;
+	case GL_INCR:
+		return 0x1e02;
+	case GL_DECR:
+		return 0x1e03;
+	case GL_INCR_WRAP_EXT:
+		return 0x8507;
+	case GL_DECR_WRAP_EXT:
+		return 0x8508;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_primitive(unsigned prim)
+{
+	switch (prim) {
+	case GL_POINTS:
+		return 0x0001;
+	case GL_LINES:
+		return 0x0002;
+	case GL_LINE_LOOP:
+		return 0x0003;
+	case GL_LINE_STRIP:
+		return 0x0004;
+	case GL_TRIANGLES:
+		return 0x0005;
+	case GL_TRIANGLE_STRIP:
+		return 0x0006;
+	case GL_TRIANGLE_FAN:
+		return 0x0007;
+	case GL_QUADS:
+		return 0x0008;
+	case GL_QUAD_STRIP:
+		return 0x0009;
+	case GL_POLYGON:
+		return 0x000a;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_wrap_mode(unsigned wrap)
+{
+	switch (wrap) {
+	case GL_REPEAT:
+		return 0x1;
+	case GL_MIRRORED_REPEAT:
+		return 0x2;
+	case GL_CLAMP_TO_EDGE:
+		return 0x3;
+	case GL_CLAMP_TO_BORDER:
+		return 0x4;
+	case GL_CLAMP:
+		return 0x5;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_filter_mode(unsigned filter)
+{
+	switch (filter) {
+	case GL_NEAREST:
+		return 0x1;
+	case GL_LINEAR:
+		return 0x2;
+	case GL_NEAREST_MIPMAP_NEAREST:
+		return 0x3;
+	case GL_LINEAR_MIPMAP_NEAREST:
+		return 0x4;
+	case GL_NEAREST_MIPMAP_LINEAR:
+		return 0x5;
+	case GL_LINEAR_MIPMAP_LINEAR:
+		return 0x6;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+nvgl_texgen_mode(unsigned mode)
+{
+	switch (mode) {
+	case GL_EYE_LINEAR:
+		return 0x2400;
+	case GL_OBJECT_LINEAR:
+		return 0x2401;
+	case GL_SPHERE_MAP:
+		return 0x2402;
+	case GL_NORMAL_MAP:
+		return 0x8511;
+	case GL_REFLECTION_MAP:
+		return 0x8512;
+	default:
+		assert(0);
+	}
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_render.h b/src/mesa/drivers/dri/nouveau/nouveau_render.h
new file mode 100644
index 0000000000..923b79b2cf
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_render.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_RENDER_H__
+#define __NOUVEAU_RENDER_H__
+
+#include "vbo/vbo_context.h"
+
+struct nouveau_array_state;
+
+typedef void (*dispatch_t)(GLcontext *, unsigned int, int, unsigned int);
+typedef unsigned (*extract_u_t)(struct nouveau_array_state *, int, int);
+typedef float (*extract_f_t)(struct nouveau_array_state *, int, int);
+
+struct nouveau_attr_info {
+	int vbo_index;
+	int imm_method;
+	int imm_fields;
+
+	void (*emit)(GLcontext *, struct nouveau_array_state *, const void *);
+};
+
+struct nouveau_array_state {
+	int attr;
+	int stride, fields, type;
+
+	struct nouveau_bo *bo;
+	unsigned offset;
+	const void *buf;
+
+	extract_u_t extract_u;
+	extract_f_t extract_f;
+};
+
+#define RENDER_SCRATCH_COUNT 32
+#define RENDER_SCRATCH_SIZE 64*1024
+
+struct nouveau_scratch_state {
+	struct nouveau_bo *bo[RENDER_SCRATCH_COUNT];
+
+	int index;
+	int offset;
+	void *buf;
+};
+
+struct nouveau_swtnl_state {
+	struct nouveau_bo *vbo;
+	void *buf;
+	unsigned vertex_count;
+	GLenum primitive;
+};
+
+struct nouveau_render_state {
+	enum {
+		VBO,
+		IMM
+	} mode;
+
+	struct nouveau_array_state ib;
+	struct nouveau_array_state attrs[VERT_ATTRIB_MAX];
+
+	/* Maps a HW VBO index or IMM emission order to an index in
+	 * the attrs array above (or -1 if unused). */
+	int map[VERT_ATTRIB_MAX];
+
+	int attr_count;
+	int vertex_size;
+
+	struct nouveau_scratch_state scratch;
+	struct nouveau_swtnl_state swtnl;
+};
+
+#define to_render_state(ctx) (&to_nouveau_context(ctx)->render)
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_render_t.c b/src/mesa/drivers/dri/nouveau/nouveau_render_t.c
new file mode 100644
index 0000000000..7ccd7e6416
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_render_t.c
@@ -0,0 +1,361 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Vertex submission helper definitions shared among the software and
+ * hardware TnL paths.
+ */
+
+#include "nouveau_gldefs.h"
+
+#include "main/light.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+
+#define OUT_INDICES_L(r, i, d, n)		\
+	BATCH_OUT_L(i + d, n);			\
+	(void)r
+#define OUT_INDICES_I16(r, i, d, n)				\
+	BATCH_OUT_I16(r->ib.extract_u(&r->ib, 0, i) + d,	\
+		      r->ib.extract_u(&r->ib, 0, i + 1) + d)
+#define OUT_INDICES_I32(r, i, d, n)			\
+	BATCH_OUT_I32(r->ib.extract_u(&r->ib, 0, i) + d)
+
+/*
+ * Emit <n> vertices using BATCH_OUT_<out>, MAX_OUT_<out> at a time,
+ * grouping them in packets of length MAX_PACKET.
+ *
+ * out:   hardware index data type.
+ * ctx:   GL context.
+ * start: element within the index buffer to begin with.
+ * delta: integer correction that will be added to each index found in
+ *        the index buffer.
+ */
+#define EMIT_VBO(out, ctx, start, delta, n) do {			\
+		struct nouveau_render_state *render = to_render_state(ctx); \
+		int npush = n;						\
+									\
+		while (npush) {						\
+			int npack = MIN2(npush, MAX_PACKET * MAX_OUT_##out); \
+			npush -= npack;					\
+									\
+			BATCH_PACKET_##out((npack + MAX_OUT_##out - 1)	\
+					   / MAX_OUT_##out);		\
+			while (npack) {					\
+				int nout = MIN2(npack, MAX_OUT_##out);	\
+				npack -= nout;				\
+									\
+				OUT_INDICES_##out(render, start, delta, \
+						  nout);		\
+				start += nout;				\
+			}						\
+		}							\
+	} while (0)
+
+/*
+ * Emit the <n>-th element of the array <a>, using IMM_OUT.
+ */
+#define EMIT_IMM(ctx, a, n) do {					\
+		struct nouveau_attr_info *info =			\
+			&TAG(vertex_attrs)[(a)->attr];			\
+		int m;							\
+									\
+		if (!info->emit) {					\
+			IMM_PACKET(info->imm_method, info->imm_fields);	\
+									\
+			for (m = 0; m < (a)->fields; m++)		\
+				IMM_OUT((a)->extract_f(a, n, m));	\
+									\
+			for (m = (a)->fields; m < info->imm_fields; m++) \
+				IMM_OUT(((float []){0, 0, 0, 1})[m]);	\
+									\
+		} else {						\
+			info->emit(ctx, a, (a)->buf + n * (a)->stride);	\
+		}							\
+	} while (0)
+
+/*
+ * Select an appropriate dispatch function for the given index buffer.
+ */
+static void
+get_array_dispatch(struct nouveau_array_state *a, dispatch_t *dispatch)
+{
+	if (!a->fields) {
+		auto void f(GLcontext *, unsigned int, int, unsigned int);
+
+		void f(GLcontext *ctx, unsigned int start, int delta,
+		       unsigned int n) {
+			struct nouveau_channel *chan = context_chan(ctx);
+			RENDER_LOCALS(ctx);
+
+			EMIT_VBO(L, ctx, start, delta, n);
+		};
+
+		*dispatch = f;
+
+	} else if (a->type == GL_UNSIGNED_INT) {
+		auto void f(GLcontext *, unsigned int, int, unsigned int);
+
+		void f(GLcontext *ctx, unsigned int start, int delta,
+		       unsigned int n) {
+			struct nouveau_channel *chan = context_chan(ctx);
+			RENDER_LOCALS(ctx);
+
+			EMIT_VBO(I32, ctx, start, delta, n);
+		};
+
+		*dispatch = f;
+
+	} else {
+		auto void f(GLcontext *, unsigned int, int, unsigned int);
+
+		void f(GLcontext *ctx, unsigned int start, int delta,
+		       unsigned int n) {
+			struct nouveau_channel *chan = context_chan(ctx);
+			RENDER_LOCALS(ctx);
+
+			EMIT_VBO(I32, ctx, start, delta, n & 1);
+			EMIT_VBO(I16, ctx, start, delta, n & ~1);
+		};
+
+		*dispatch = f;
+	}
+}
+
+/*
+ * Select appropriate element extraction functions for the given
+ * array.
+ */
+static void
+get_array_extract(struct nouveau_array_state *a,
+		  extract_u_t *extract_u, extract_f_t *extract_f)
+{
+#define EXTRACT(in_t, out_t, k)						\
+	({								\
+		auto out_t f(struct nouveau_array_state *, int, int);	\
+		out_t f(struct nouveau_array_state *a, int i, int j) {	\
+			in_t x = ((in_t *)(a->buf + i * a->stride))[j];	\
+									\
+			return (out_t)x / (k);				\
+		};							\
+		f;							\
+	});
+
+	switch (a->type) {
+	case GL_BYTE:
+		*extract_u = EXTRACT(char, unsigned, 1);
+		*extract_f = EXTRACT(char, float, SCHAR_MAX);
+		break;
+	case GL_UNSIGNED_BYTE:
+		*extract_u = EXTRACT(unsigned char, unsigned, 1);
+		*extract_f = EXTRACT(unsigned char, float, UCHAR_MAX);
+		break;
+	case GL_SHORT:
+		*extract_u = EXTRACT(short, unsigned, 1);
+		*extract_f = EXTRACT(short, float, SHRT_MAX);
+		break;
+	case GL_UNSIGNED_SHORT:
+		*extract_u = EXTRACT(unsigned short, unsigned, 1);
+		*extract_f = EXTRACT(unsigned short, float, USHRT_MAX);
+		break;
+	case GL_INT:
+		*extract_u = EXTRACT(int, unsigned, 1);
+		*extract_f = EXTRACT(int, float, INT_MAX);
+		break;
+	case GL_UNSIGNED_INT:
+		*extract_u = EXTRACT(unsigned int, unsigned, 1);
+		*extract_f = EXTRACT(unsigned int, float, UINT_MAX);
+		break;
+	case GL_FLOAT:
+		*extract_u = EXTRACT(float, unsigned, 1.0 / UINT_MAX);
+		*extract_f = EXTRACT(float, float, 1);
+		break;
+
+	default:
+		assert(0);
+	}
+}
+
+/*
+ * Returns a pointer to a chunk of <size> bytes long GART memory. <bo>
+ * will be updated with the buffer object the memory is located in.
+ *
+ * If <offset> is provided, it will be updated with the offset within
+ * <bo> of the allocated memory. Otherwise the returned memory will
+ * always be located right at the beginning of <bo>.
+ */
+static inline void *
+get_scratch_vbo(GLcontext *ctx, unsigned size, struct nouveau_bo **bo,
+		unsigned *offset)
+{
+	struct nouveau_scratch_state *scratch = &to_render_state(ctx)->scratch;
+	void *buf;
+
+	if (scratch->buf && offset &&
+	    size <= RENDER_SCRATCH_SIZE - scratch->offset) {
+		nouveau_bo_ref(scratch->bo[scratch->index], bo);
+
+		buf = scratch->buf + scratch->offset;
+		*offset = scratch->offset;
+		scratch->offset += size;
+
+	} else if (size <= RENDER_SCRATCH_SIZE) {
+		scratch->index = (scratch->index + 1) % RENDER_SCRATCH_COUNT;
+		nouveau_bo_ref(scratch->bo[scratch->index], bo);
+
+		nouveau_bo_map(*bo, NOUVEAU_BO_WR);
+		buf = scratch->buf = (*bo)->map;
+		nouveau_bo_unmap(*bo);
+
+		if (offset)
+			*offset = 0;
+		scratch->offset = size;
+
+	} else {
+		nouveau_bo_new(context_dev(ctx),
+			       NOUVEAU_BO_MAP | NOUVEAU_BO_GART, 0, size, bo);
+
+		nouveau_bo_map(*bo, NOUVEAU_BO_WR);
+		buf = (*bo)->map;
+		nouveau_bo_unmap(*bo);
+
+		if (offset)
+			*offset = 0;
+	}
+
+	return buf;
+}
+
+/*
+ * Returns how many vertices you can draw using <n> pushbuf dwords.
+ */
+static inline unsigned
+get_max_vertices(GLcontext *ctx, const struct _mesa_index_buffer *ib,
+		 int n)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+
+	if (render->mode == IMM) {
+		return MAX2(0, n - 4) / (render->vertex_size / 4 +
+					 render->attr_count);
+	} else {
+		unsigned max_out;
+
+		if (ib) {
+			switch (ib->type) {
+			case GL_UNSIGNED_INT:
+				max_out = MAX_OUT_I32;
+				break;
+
+			case GL_UNSIGNED_SHORT:
+				max_out = MAX_OUT_I16;
+				break;
+
+			case GL_UNSIGNED_BYTE:
+				max_out = MAX_OUT_I16;
+				break;
+			}
+		} else {
+			max_out = MAX_OUT_L;
+		}
+
+		return MAX2(0, n - 7) * max_out * MAX_PACKET / (1 + MAX_PACKET);
+	}
+}
+
+#include "nouveau_vbo_t.c"
+#include "nouveau_swtnl_t.c"
+
+static void
+TAG(emit_material)(GLcontext *ctx, struct nouveau_array_state *a,
+		   const void *v)
+{
+	const int attr = a->attr - VERT_ATTRIB_GENERIC0;
+	const int state = ((int []) {
+				NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT,
+				NOUVEAU_STATE_MATERIAL_BACK_AMBIENT,
+				NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE,
+				NOUVEAU_STATE_MATERIAL_BACK_DIFFUSE,
+				NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR,
+				NOUVEAU_STATE_MATERIAL_BACK_SPECULAR,
+				NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT,
+				NOUVEAU_STATE_MATERIAL_BACK_AMBIENT,
+				NOUVEAU_STATE_MATERIAL_FRONT_SHININESS,
+				NOUVEAU_STATE_MATERIAL_BACK_SHININESS
+			}) [attr];
+
+	COPY_4V(ctx->Light.Material.Attrib[attr], (float *)v);
+	_mesa_update_material(ctx, 1 << attr);
+
+	context_drv(ctx)->emit[state](ctx, state);
+}
+
+static void
+TAG(render_prims)(GLcontext *ctx, const struct gl_client_array **arrays,
+		  const struct _mesa_prim *prims, GLuint nr_prims,
+		  const struct _mesa_index_buffer *ib,
+		  GLboolean index_bounds_valid,
+		  GLuint min_index, GLuint max_index)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	nouveau_validate_framebuffer(ctx);
+
+	if (nctx->fallback == HWTNL)
+		TAG(vbo_render_prims)(ctx, arrays, prims, nr_prims, ib,
+				      index_bounds_valid, min_index, max_index);
+
+	if (nctx->fallback == SWTNL)
+		_tnl_vbo_draw_prims(ctx, arrays, prims, nr_prims, ib,
+				    index_bounds_valid, min_index, max_index);
+}
+
+void
+TAG(render_init)(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	struct nouveau_scratch_state *scratch = &render->scratch;
+	int ret, i;
+
+	for (i = 0; i < RENDER_SCRATCH_COUNT; i++) {
+		ret = nouveau_bo_new(context_dev(ctx),
+				     NOUVEAU_BO_MAP | NOUVEAU_BO_GART,
+				     0, RENDER_SCRATCH_SIZE, &scratch->bo[i]);
+		assert(!ret);
+	}
+
+	for (i = 0; i < VERT_ATTRIB_MAX; i++)
+		render->map[i] = -1;
+
+	TAG(swtnl_init)(ctx);
+	vbo_set_draw_func(ctx, TAG(render_prims));
+}
+
+void
+TAG(render_destroy)(GLcontext *ctx)
+{
+	TAG(swtnl_destroy)(ctx);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.c b/src/mesa/drivers/dri/nouveau/nouveau_screen.c
new file mode 100644
index 0000000000..78987f633c
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_texture.h"
+#include "nouveau_drmif.h"
+#include "nv04_driver.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+static const __DRIextension *nouveau_screen_extensions[];
+
+static void
+nouveau_destroy_screen(__DRIscreen *dri_screen);
+
+static const __DRIconfig **
+nouveau_get_configs(void)
+{
+	__DRIconfig **configs = NULL;
+	int i;
+
+	const uint8_t depth_bits[]   = { 0, 16, 24, 24 };
+	const uint8_t stencil_bits[] = { 0,  0,  0,  8 };
+	const uint8_t msaa_samples[] = { 0 };
+
+	const struct {
+		GLenum format;
+		GLenum type;
+	} fb_formats[] = {
+		{ GL_RGB , GL_UNSIGNED_SHORT_5_6_5     },
+		{ GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV },
+		{ GL_BGR , GL_UNSIGNED_INT_8_8_8_8_REV },
+	};
+
+	const GLenum back_buffer_modes[] = {
+		GLX_NONE, GLX_SWAP_UNDEFINED_OML
+	};
+
+	for (i = 0; i < Elements(fb_formats); i++) {
+		__DRIconfig **config;
+
+		config = driCreateConfigs(fb_formats[i].format,
+					  fb_formats[i].type,
+					  depth_bits, stencil_bits,
+					  Elements(depth_bits),
+					  back_buffer_modes,
+					  Elements(back_buffer_modes),
+					  msaa_samples,
+					  Elements(msaa_samples),
+					  GL_TRUE);
+		assert(config);
+
+		configs = configs ? driConcatConfigs(configs, config)
+			: config;
+	}
+
+	return (const __DRIconfig **)configs;
+}
+
+static const __DRIconfig **
+nouveau_init_screen2(__DRIscreen *dri_screen)
+{
+	const __DRIconfig **configs;
+	struct nouveau_screen *screen;
+	int ret;
+
+	/* Allocate the screen. */
+	screen = CALLOC_STRUCT(nouveau_screen);
+	if (!screen)
+		return NULL;
+
+	dri_screen->private = screen;
+	dri_screen->extensions = nouveau_screen_extensions;
+	screen->dri_screen = dri_screen;
+
+	/* Open the DRM device. */
+	ret = nouveau_device_open_existing(&screen->device, 0, dri_screen->fd,
+					   0);
+	if (ret) {
+		nouveau_error("Error opening the DRM device.\n");
+		goto fail;
+	}
+
+	/* Choose the card specific function pointers. */
+	switch (screen->device->chipset & 0xf0) {
+	case 0x00:
+		screen->driver = &nv04_driver;
+		break;
+	case 0x10:
+		screen->driver = &nv10_driver;
+		break;
+	case 0x20:
+		screen->driver = &nv20_driver;
+		break;
+	default:
+		assert(0);
+	}
+
+	configs = nouveau_get_configs();
+	if (!configs)
+		goto fail;
+
+	return configs;
+fail:
+	nouveau_destroy_screen(dri_screen);
+	return NULL;
+
+}
+
+static void
+nouveau_destroy_screen(__DRIscreen *dri_screen)
+{
+	struct nouveau_screen *screen = dri_screen->private;
+
+	if (!screen)
+		return;
+
+	if (screen->device)
+		nouveau_device_close(&screen->device);
+
+	FREE(screen);
+	dri_screen->private = NULL;
+}
+
+static GLboolean
+nouveau_create_buffer(__DRIscreen *dri_screen,
+		      __DRIdrawable *drawable,
+		      const __GLcontextModes *visual,
+		      GLboolean is_pixmap)
+{
+	struct gl_renderbuffer  *rb;
+	struct gl_framebuffer *fb;
+	GLenum color_format;
+
+	if (is_pixmap)
+		return GL_FALSE; /* not implemented */
+
+	if (visual->redBits == 5)
+		color_format = GL_RGB5;
+	else if (visual->alphaBits == 0)
+		color_format = GL_RGB8;
+	else
+		color_format = GL_RGBA8;
+
+	fb = nouveau_framebuffer_dri_new(visual);
+	if (!fb)
+		return GL_FALSE;
+
+	/* Front buffer. */
+	rb = nouveau_renderbuffer_dri_new(color_format, drawable);
+	_mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, rb);
+
+	/* Back buffer */
+	if (visual->doubleBufferMode) {
+		rb = nouveau_renderbuffer_dri_new(color_format, drawable);
+		_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, rb);
+	}
+
+	/* Depth/stencil buffer. */
+	if (visual->depthBits == 24 && visual->stencilBits == 8) {
+		rb = nouveau_renderbuffer_dri_new(GL_DEPTH24_STENCIL8_EXT, drawable);
+		_mesa_add_renderbuffer(fb, BUFFER_DEPTH, rb);
+		_mesa_add_renderbuffer(fb, BUFFER_STENCIL, rb);
+
+	} else if (visual->depthBits == 24) {
+		rb = nouveau_renderbuffer_dri_new(GL_DEPTH_COMPONENT24, drawable);
+		_mesa_add_renderbuffer(fb, BUFFER_DEPTH, rb);
+
+	} else if (visual->depthBits == 16) {
+		rb = nouveau_renderbuffer_dri_new(GL_DEPTH_COMPONENT16, drawable);
+		_mesa_add_renderbuffer(fb, BUFFER_DEPTH, rb);
+	}
+
+	/* Software renderbuffers. */
+	_mesa_add_soft_renderbuffers(fb, GL_FALSE, GL_FALSE, GL_FALSE,
+				     visual->accumRedBits > 0,
+				     GL_FALSE, GL_FALSE);
+
+	drawable->driverPrivate = fb;
+
+	return GL_TRUE;
+}
+
+static void
+nouveau_destroy_buffer(__DRIdrawable *drawable)
+{
+	_mesa_reference_framebuffer(
+		(struct gl_framebuffer **)&drawable->driverPrivate, NULL);
+}
+
+static void
+nouveau_drawable_flush(__DRIdrawable *draw)
+{
+}
+
+static const struct __DRI2flushExtensionRec nouveau_flush_extension = {
+    { __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
+    nouveau_drawable_flush,
+    dri2InvalidateDrawable,
+};
+
+static const struct __DRItexBufferExtensionRec nouveau_texbuffer_extension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+    NULL,
+    nouveau_set_texbuffer,
+};
+
+static const __DRIextension *nouveau_screen_extensions[] = {
+    &nouveau_flush_extension.base,
+    &nouveau_texbuffer_extension.base,
+    &dri2ConfigQueryExtension.base,
+    NULL
+};
+
+const struct __DriverAPIRec driDriverAPI = {
+	.InitScreen2     = nouveau_init_screen2,
+	.DestroyScreen   = nouveau_destroy_screen,
+	.CreateBuffer    = nouveau_create_buffer,
+	.DestroyBuffer   = nouveau_destroy_buffer,
+	.CreateContext   = nouveau_context_create,
+	.DestroyContext  = nouveau_context_destroy,
+	.MakeCurrent     = nouveau_context_make_current,
+	.UnbindContext   = nouveau_context_unbind,
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+	&driCoreExtension.base,
+	&driDRI2Extension.base,
+	NULL
+};
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_screen.h b/src/mesa/drivers/dri/nouveau/nouveau_screen.h
new file mode 100644
index 0000000000..bcf57e21f2
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_screen.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_SCREEN_H__
+#define __NOUVEAU_SCREEN_H__
+
+struct nouveau_context;
+
+struct nouveau_screen {
+	__DRIscreen *dri_screen;
+	struct nouveau_device *device;
+	const struct nouveau_driver *driver;
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_span.c b/src/mesa/drivers/dri/nouveau/nouveau_span.c
new file mode 100644
index 0000000000..1bfdecc6a2
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_span.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_fbo.h"
+#include "nouveau_context.h"
+#include "nouveau_bo.h"
+
+#include "swrast/swrast.h"
+
+#define LOCAL_VARS							\
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface; \
+	GLuint p;							\
+	(void)p;
+
+#define LOCAL_DEPTH_VARS LOCAL_VARS
+
+#define HW_LOCK()
+#define HW_UNLOCK()
+
+#define HW_CLIPLOOP() {							\
+	int minx = 0;							\
+	int miny = 0;							\
+	int maxx = rb->Width;						\
+	int maxy = rb->Height;
+
+#define HW_ENDCLIPLOOP() }
+
+#define Y_FLIP(y) (rb->Name ? (y) : rb->Height - 1 - (y))
+
+/* RGB565 span functions */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+#define TAG(x) nouveau_##x##_rgb565
+#define TAG2(x, y) nouveau_##x##_rgb565##y
+#define GET_PTR(x, y) (s->bo->map + (y)*s->pitch + (x)*s->cpp)
+
+#include "spantmp2.h"
+
+/* RGB888 span functions */
+#define SPANTMP_PIXEL_FMT GL_BGR
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+#define TAG(x) nouveau_##x##_rgb888
+#define TAG2(x, y) nouveau_##x##_rgb888##y
+#define GET_PTR(x, y) (s->bo->map + (y)*s->pitch + (x)*s->cpp)
+
+#include "spantmp2.h"
+
+/* ARGB8888 span functions */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+#define TAG(x) nouveau_##x##_argb8888
+#define TAG2(x, y) nouveau_##x##_argb8888##y
+#define GET_PTR(x, y) (s->bo->map + (y)*s->pitch + (x)*s->cpp)
+
+#include "spantmp2.h"
+
+/* Z16 span functions */
+#define VALUE_TYPE uint16_t
+#define READ_DEPTH(v, x, y)						\
+	v = *(uint16_t *)(s->bo->map + (y)*s->pitch + (x)*s->cpp);
+#define WRITE_DEPTH(x, y, v)						\
+	*(uint16_t *)(s->bo->map + (y)*s->pitch + (x)*s->cpp) = v
+#define TAG(x) nouveau_##x##_z16
+
+#include "depthtmp.h"
+
+/* Z24S8 span functions */
+#define VALUE_TYPE uint32_t
+#define READ_DEPTH(v, x, y)						\
+	v = *(uint32_t *)(s->bo->map + (y)*s->pitch + (x)*s->cpp);
+#define WRITE_DEPTH(x, y, v)						\
+	*(uint32_t *)(s->bo->map + (y)*s->pitch + (x)*s->cpp) = v
+#define TAG(x) nouveau_##x##_z24s8
+
+#include "depthtmp.h"
+
+static void
+renderbuffer_map_unmap(struct gl_renderbuffer *rb, GLboolean map)
+{
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+
+	if (map) {
+		switch (rb->Format) {
+		case MESA_FORMAT_RGB565:
+			nouveau_InitPointers_rgb565(rb);
+			break;
+		case MESA_FORMAT_XRGB8888:
+			nouveau_InitPointers_rgb888(rb);
+			break;
+		case MESA_FORMAT_ARGB8888:
+			nouveau_InitPointers_argb8888(rb);
+			break;
+		case MESA_FORMAT_Z16:
+			nouveau_InitDepthPointers_z16(rb);
+			break;
+		case MESA_FORMAT_Z24_S8:
+			nouveau_InitDepthPointers_z24s8(rb);
+			break;
+		default:
+			assert(0);
+		}
+
+		nouveau_bo_map(s->bo, NOUVEAU_BO_RDWR);
+	} else {
+		nouveau_bo_unmap(s->bo);
+	}
+}
+
+static void
+texture_unit_map_unmap(GLcontext *ctx, struct gl_texture_unit *u, GLboolean map)
+{
+	if (!u->_ReallyEnabled)
+		return;
+
+	if (map)
+		ctx->Driver.MapTexture(ctx, u->_Current);
+	else
+		ctx->Driver.UnmapTexture(ctx, u->_Current);
+}
+
+static void
+framebuffer_map_unmap(struct gl_framebuffer *fb, GLboolean map)
+{
+	int i;
+
+	for (i = 0; i < fb->_NumColorDrawBuffers; i++)
+		renderbuffer_map_unmap(fb->_ColorDrawBuffers[i], map);
+
+	renderbuffer_map_unmap(fb->_ColorReadBuffer, map);
+
+	if (fb->_DepthBuffer)
+		renderbuffer_map_unmap(fb->_DepthBuffer->Wrapped, map);
+}
+
+static void
+span_map_unmap(GLcontext *ctx, GLboolean map)
+{
+	int i;
+
+	framebuffer_map_unmap(ctx->DrawBuffer, map);
+
+	if (ctx->ReadBuffer != ctx->DrawBuffer)
+		framebuffer_map_unmap(ctx->ReadBuffer, map);
+
+	for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+		texture_unit_map_unmap(ctx, &ctx->Texture.Unit[i], map);
+}
+
+static void
+nouveau_span_start(GLcontext *ctx)
+{
+	nouveau_fallback(ctx, SWRAST);
+	span_map_unmap(ctx, GL_TRUE);
+}
+
+static void
+nouveau_span_finish(GLcontext *ctx)
+{
+	span_map_unmap(ctx, GL_FALSE);
+	nouveau_fallback(ctx, HWTNL);
+}
+
+void
+nouveau_span_functions_init(GLcontext *ctx)
+{
+	struct swrast_device_driver *swdd =
+		_swrast_GetDeviceDriverReference(ctx);
+
+	swdd->SpanRenderStart = nouveau_span_start;
+	swdd->SpanRenderFinish = nouveau_span_finish;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.c b/src/mesa/drivers/dri/nouveau/nouveau_state.c
new file mode 100644
index 0000000000..a57df2d9dc
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_state.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+#include "nouveau_util.h"
+
+#include "swrast/swrast.h"
+#include "tnl/tnl.h"
+
+static void
+nouveau_alpha_func(GLcontext *ctx, GLenum func, GLfloat ref)
+{
+	context_dirty(ctx, ALPHA_FUNC);
+}
+
+static void
+nouveau_blend_color(GLcontext *ctx, const GLfloat color[4])
+{
+	context_dirty(ctx, BLEND_COLOR);
+}
+
+static void
+nouveau_blend_equation_separate(GLcontext *ctx, GLenum modeRGB, GLenum modeA)
+{
+	context_dirty(ctx, BLEND_EQUATION);
+}
+
+static void
+nouveau_blend_func_separate(GLcontext *ctx, GLenum sfactorRGB,
+			    GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA)
+{
+	context_dirty(ctx, BLEND_FUNC);
+}
+
+static void
+nouveau_clip_plane(GLcontext *ctx, GLenum plane, const GLfloat *equation)
+{
+	context_dirty_i(ctx, CLIP_PLANE, plane - GL_CLIP_PLANE0);
+}
+
+static void
+nouveau_color_mask(GLcontext *ctx, GLboolean rmask, GLboolean gmask,
+		   GLboolean bmask, GLboolean amask)
+{
+	context_dirty(ctx, COLOR_MASK);
+}
+
+static void
+nouveau_color_material(GLcontext *ctx, GLenum face, GLenum mode)
+{
+	context_dirty(ctx, COLOR_MATERIAL);
+	context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+	context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+	context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+	context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+	context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+	context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+}
+
+static void
+nouveau_cull_face(GLcontext *ctx, GLenum mode)
+{
+	context_dirty(ctx, CULL_FACE);
+}
+
+static void
+nouveau_front_face(GLcontext *ctx, GLenum mode)
+{
+	context_dirty(ctx, FRONT_FACE);
+}
+
+static void
+nouveau_depth_func(GLcontext *ctx, GLenum func)
+{
+	context_dirty(ctx, DEPTH);
+}
+
+static void
+nouveau_depth_mask(GLcontext *ctx, GLboolean flag)
+{
+	context_dirty(ctx, DEPTH);
+}
+
+static void
+nouveau_depth_range(GLcontext *ctx, GLclampd nearval, GLclampd farval)
+{
+	context_dirty(ctx, VIEWPORT);
+}
+
+static void
+nouveau_draw_buffer(GLcontext *ctx, GLenum buffer)
+{
+	context_dirty(ctx, FRAMEBUFFER);
+}
+
+static void
+nouveau_draw_buffers(GLcontext *ctx, GLsizei n, const GLenum *buffers)
+{
+	context_dirty(ctx, FRAMEBUFFER);
+}
+
+static void
+nouveau_enable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+	int i;
+
+	switch (cap) {
+	case GL_ALPHA_TEST:
+		context_dirty(ctx, ALPHA_FUNC);
+		break;
+	case GL_BLEND:
+		context_dirty(ctx, BLEND_EQUATION);
+		break;
+	case GL_COLOR_LOGIC_OP:
+		context_dirty(ctx, LOGIC_OPCODE);
+		break;
+	case GL_COLOR_MATERIAL:
+		context_dirty(ctx, COLOR_MATERIAL);
+		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+		break;
+	case GL_COLOR_SUM_EXT:
+		context_dirty(ctx, FRAG);
+		context_dirty(ctx, LIGHT_MODEL);
+		break;
+	case GL_CULL_FACE:
+		context_dirty(ctx, CULL_FACE);
+		break;
+	case GL_DEPTH_TEST:
+		context_dirty(ctx, DEPTH);
+		break;
+	case GL_DITHER:
+		context_dirty(ctx, DITHER);
+		break;
+	case GL_FOG:
+		context_dirty(ctx, FOG);
+		context_dirty(ctx, FRAG);
+		context_dirty(ctx, MODELVIEW);
+		break;
+	case GL_LIGHT0:
+	case GL_LIGHT1:
+	case GL_LIGHT2:
+	case GL_LIGHT3:
+	case GL_LIGHT4:
+	case GL_LIGHT5:
+	case GL_LIGHT6:
+	case GL_LIGHT7:
+		context_dirty(ctx, MODELVIEW);
+		context_dirty(ctx, LIGHT_ENABLE);
+		context_dirty_i(ctx, LIGHT_SOURCE, cap - GL_LIGHT0);
+		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+		context_dirty(ctx, MATERIAL_FRONT_SHININESS);
+		context_dirty(ctx, MATERIAL_BACK_SHININESS);
+		break;
+	case GL_LIGHTING:
+		context_dirty(ctx, FRAG);
+		context_dirty(ctx, MODELVIEW);
+		context_dirty(ctx, LIGHT_MODEL);
+		context_dirty(ctx, LIGHT_ENABLE);
+
+		for (i = 0; i < MAX_LIGHTS; i++) {
+			if (ctx->Light.Light[i].Enabled)
+				context_dirty_i(ctx, LIGHT_SOURCE, i);
+		}
+
+		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+		context_dirty(ctx, MATERIAL_FRONT_SHININESS);
+		context_dirty(ctx, MATERIAL_BACK_SHININESS);
+		break;
+	case GL_LINE_SMOOTH:
+		context_dirty(ctx, LINE_MODE);
+		break;
+	case GL_NORMALIZE:
+		context_dirty(ctx, LIGHT_ENABLE);
+		break;
+	case GL_POINT_SMOOTH:
+		context_dirty(ctx, POINT_MODE);
+		break;
+	case GL_POLYGON_OFFSET_POINT:
+	case GL_POLYGON_OFFSET_LINE:
+	case GL_POLYGON_OFFSET_FILL:
+		context_dirty(ctx, POLYGON_OFFSET);
+		break;
+	case GL_POLYGON_SMOOTH:
+		context_dirty(ctx, POLYGON_MODE);
+		break;
+	case GL_SCISSOR_TEST:
+		context_dirty(ctx, SCISSOR);
+		break;
+	case GL_STENCIL_TEST:
+		context_dirty(ctx, STENCIL_FUNC);
+		break;
+	case GL_TEXTURE_1D:
+	case GL_TEXTURE_2D:
+	case GL_TEXTURE_3D:
+	case GL_TEXTURE_RECTANGLE:
+		context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+		context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+		break;
+	case GL_TEXTURE_GEN_S:
+	case GL_TEXTURE_GEN_T:
+	case GL_TEXTURE_GEN_R:
+	case GL_TEXTURE_GEN_Q:
+		context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit);
+		context_dirty(ctx, MODELVIEW);
+		break;
+	}
+}
+
+static void
+nouveau_fog(GLcontext *ctx, GLenum pname, const GLfloat *params)
+{
+	context_dirty(ctx, FOG);
+}
+
+static void
+nouveau_light(GLcontext *ctx, GLenum light, GLenum pname, const GLfloat *params)
+{
+	switch (pname) {
+	case GL_AMBIENT:
+		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+		break;
+	case GL_DIFFUSE:
+		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+		break;
+	case GL_SPECULAR:
+		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+		break;
+	case GL_SPOT_CUTOFF:
+	case GL_POSITION:
+		context_dirty(ctx, MODELVIEW);
+		context_dirty(ctx, LIGHT_ENABLE);
+		context_dirty_i(ctx, LIGHT_SOURCE, light - GL_LIGHT0);
+		break;
+	default:
+		context_dirty_i(ctx, LIGHT_SOURCE, light - GL_LIGHT0);
+		break;
+	}
+}
+
+static void
+nouveau_light_model(GLcontext *ctx, GLenum pname, const GLfloat *params)
+{
+	context_dirty(ctx, LIGHT_MODEL);
+	context_dirty(ctx, MODELVIEW);
+}
+
+static void
+nouveau_line_stipple(GLcontext *ctx, GLint factor, GLushort pattern )
+{
+	context_dirty(ctx, LINE_STIPPLE);
+}
+
+static void
+nouveau_line_width(GLcontext *ctx, GLfloat width)
+{
+	context_dirty(ctx, LINE_MODE);
+}
+
+static void
+nouveau_logic_opcode(GLcontext *ctx, GLenum opcode)
+{
+	context_dirty(ctx, LOGIC_OPCODE);
+}
+
+static void
+nouveau_point_parameter(GLcontext *ctx, GLenum pname, const GLfloat *params)
+{
+	context_dirty(ctx, POINT_PARAMETER);
+}
+
+static void
+nouveau_point_size(GLcontext *ctx, GLfloat size)
+{
+	context_dirty(ctx, POINT_MODE);
+}
+
+static void
+nouveau_polygon_mode(GLcontext *ctx, GLenum face, GLenum mode)
+{
+	context_dirty(ctx, POLYGON_MODE);
+}
+
+static void
+nouveau_polygon_offset(GLcontext *ctx, GLfloat factor, GLfloat units)
+{
+	context_dirty(ctx, POLYGON_OFFSET);
+}
+
+static void
+nouveau_polygon_stipple(GLcontext *ctx, const GLubyte *mask)
+{
+	context_dirty(ctx, POLYGON_STIPPLE);
+}
+
+static void
+nouveau_render_mode(GLcontext *ctx, GLenum mode)
+{
+	context_dirty(ctx, RENDER_MODE);
+}
+
+static void
+nouveau_scissor(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+	context_dirty(ctx, SCISSOR);
+}
+
+static void
+nouveau_shade_model(GLcontext *ctx, GLenum mode)
+{
+	context_dirty(ctx, SHADE_MODEL);
+}
+
+static void
+nouveau_stencil_func_separate(GLcontext *ctx, GLenum face, GLenum func,
+			      GLint ref, GLuint mask)
+{
+	context_dirty(ctx, STENCIL_FUNC);
+}
+
+static void
+nouveau_stencil_mask_separate(GLcontext *ctx, GLenum face, GLuint mask)
+{
+	context_dirty(ctx, STENCIL_MASK);
+}
+
+static void
+nouveau_stencil_op_separate(GLcontext *ctx, GLenum face, GLenum fail,
+			    GLenum zfail, GLenum zpass)
+{
+	context_dirty(ctx, STENCIL_OP);
+}
+
+static void
+nouveau_tex_gen(GLcontext *ctx, GLenum coord, GLenum pname,
+		const GLfloat *params)
+{
+	switch (pname) {
+	case GL_TEXTURE_GEN_MODE:
+		context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit);
+		context_dirty(ctx, MODELVIEW);
+		break;
+	default:
+		context_dirty_i(ctx, TEX_GEN, ctx->Texture.CurrentUnit);
+		break;
+	}
+}
+
+static void
+nouveau_tex_env(GLcontext *ctx, GLenum target, GLenum pname,
+		const GLfloat *param)
+{
+	switch (target) {
+	case GL_TEXTURE_FILTER_CONTROL_EXT:
+		context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+		break;
+	default:
+		context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+		break;
+	}
+}
+
+static void
+nouveau_tex_parameter(GLcontext *ctx, GLenum target,
+		      struct gl_texture_object *t, GLenum pname,
+		      const GLfloat *params)
+{
+	switch (pname) {
+	case GL_TEXTURE_MAG_FILTER:
+	case GL_TEXTURE_WRAP_S:
+	case GL_TEXTURE_WRAP_T:
+	case GL_TEXTURE_WRAP_R:
+	case GL_TEXTURE_MIN_LOD:
+	case GL_TEXTURE_MAX_LOD:
+	case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+	case GL_TEXTURE_LOD_BIAS:
+		context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+		break;
+
+	case GL_TEXTURE_MIN_FILTER:
+	case GL_TEXTURE_BASE_LEVEL:
+	case GL_TEXTURE_MAX_LEVEL:
+		nouveau_texture_reallocate(ctx, t);
+		context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+		break;
+	}
+}
+
+static void
+nouveau_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+	context_dirty(ctx, VIEWPORT);
+}
+
+void
+nouveau_emit_nothing(GLcontext *ctx, int emit)
+{
+}
+
+int
+nouveau_next_dirty_state(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	int i = BITSET_FFS(nctx->dirty) - 1;
+
+	if (i < 0 || i >= context_drv(ctx)->num_emit)
+		return -1;
+
+	return i;
+}
+
+void
+nouveau_state_emit(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	const struct nouveau_driver *drv = context_drv(ctx);
+	int i;
+
+	while ((i = nouveau_next_dirty_state(ctx)) >= 0) {
+		BITSET_CLEAR(nctx->dirty, i);
+		drv->emit[i](ctx, i);
+	}
+
+	BITSET_ZERO(nctx->dirty);
+
+	nouveau_bo_state_emit(ctx);
+}
+
+static void
+nouveau_update_state(GLcontext *ctx, GLbitfield new_state)
+{
+	int i;
+
+	if (new_state & (_NEW_PROJECTION | _NEW_MODELVIEW))
+		context_dirty(ctx, PROJECTION);
+
+	if (new_state & _NEW_MODELVIEW)
+		context_dirty(ctx, MODELVIEW);
+
+	if (new_state & _NEW_TEXTURE_MATRIX) {
+		for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++)
+			context_dirty_i(ctx, TEX_MAT, i);
+	}
+
+	if (new_state & _NEW_CURRENT_ATTRIB &&
+	    new_state & _NEW_LIGHT) {
+		context_dirty(ctx, MATERIAL_FRONT_AMBIENT);
+		context_dirty(ctx, MATERIAL_BACK_AMBIENT);
+		context_dirty(ctx, MATERIAL_FRONT_DIFFUSE);
+		context_dirty(ctx, MATERIAL_BACK_DIFFUSE);
+		context_dirty(ctx, MATERIAL_FRONT_SPECULAR);
+		context_dirty(ctx, MATERIAL_BACK_SPECULAR);
+		context_dirty(ctx, MATERIAL_FRONT_SHININESS);
+		context_dirty(ctx, MATERIAL_BACK_SHININESS);
+	}
+
+	_swrast_InvalidateState(ctx, new_state);
+	_tnl_InvalidateState(ctx, new_state);
+
+	nouveau_state_emit(ctx);
+}
+
+void
+nouveau_state_init(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	ctx->Driver.AlphaFunc = nouveau_alpha_func;
+	ctx->Driver.BlendColor = nouveau_blend_color;
+	ctx->Driver.BlendEquationSeparate = nouveau_blend_equation_separate;
+	ctx->Driver.BlendFuncSeparate = nouveau_blend_func_separate;
+	ctx->Driver.ClipPlane = nouveau_clip_plane;
+	ctx->Driver.ColorMask = nouveau_color_mask;
+	ctx->Driver.ColorMaterial = nouveau_color_material;
+	ctx->Driver.CullFace = nouveau_cull_face;
+	ctx->Driver.FrontFace = nouveau_front_face;
+	ctx->Driver.DepthFunc = nouveau_depth_func;
+	ctx->Driver.DepthMask = nouveau_depth_mask;
+	ctx->Driver.DepthRange = nouveau_depth_range;
+	ctx->Driver.DrawBuffer = nouveau_draw_buffer;
+	ctx->Driver.DrawBuffers = nouveau_draw_buffers;
+	ctx->Driver.Enable = nouveau_enable;
+	ctx->Driver.Fogfv = nouveau_fog;
+	ctx->Driver.Lightfv = nouveau_light;
+	ctx->Driver.LightModelfv = nouveau_light_model;
+	ctx->Driver.LineStipple = nouveau_line_stipple;
+	ctx->Driver.LineWidth = nouveau_line_width;
+	ctx->Driver.LogicOpcode = nouveau_logic_opcode;
+	ctx->Driver.PointParameterfv = nouveau_point_parameter;
+	ctx->Driver.PointSize = nouveau_point_size;
+	ctx->Driver.PolygonMode = nouveau_polygon_mode;
+	ctx->Driver.PolygonOffset = nouveau_polygon_offset;
+	ctx->Driver.PolygonStipple = nouveau_polygon_stipple;
+	ctx->Driver.RenderMode = nouveau_render_mode;
+	ctx->Driver.Scissor = nouveau_scissor;
+	ctx->Driver.ShadeModel = nouveau_shade_model;
+	ctx->Driver.StencilFuncSeparate = nouveau_stencil_func_separate;
+	ctx->Driver.StencilMaskSeparate = nouveau_stencil_mask_separate;
+	ctx->Driver.StencilOpSeparate = nouveau_stencil_op_separate;
+	ctx->Driver.TexGen = nouveau_tex_gen;
+	ctx->Driver.TexEnv = nouveau_tex_env;
+	ctx->Driver.TexParameter = nouveau_tex_parameter;
+	ctx->Driver.Viewport = nouveau_viewport;
+
+	ctx->Driver.UpdateState = nouveau_update_state;
+
+	BITSET_ONES(nctx->dirty);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_state.h b/src/mesa/drivers/dri/nouveau/nouveau_state.h
new file mode 100644
index 0000000000..38ac9753c8
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_state.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_STATE_H__
+#define __NOUVEAU_STATE_H__
+
+enum {
+	NOUVEAU_STATE_ALPHA_FUNC,
+	NOUVEAU_STATE_BLEND_COLOR,
+	NOUVEAU_STATE_BLEND_EQUATION,
+	NOUVEAU_STATE_BLEND_FUNC,
+	NOUVEAU_STATE_CLIP_PLANE0,
+	NOUVEAU_STATE_CLIP_PLANE1,
+	NOUVEAU_STATE_CLIP_PLANE2,
+	NOUVEAU_STATE_CLIP_PLANE3,
+	NOUVEAU_STATE_CLIP_PLANE4,
+	NOUVEAU_STATE_CLIP_PLANE5,
+	NOUVEAU_STATE_COLOR_MASK,
+	NOUVEAU_STATE_COLOR_MATERIAL,
+	NOUVEAU_STATE_CULL_FACE,
+	NOUVEAU_STATE_FRONT_FACE,
+	NOUVEAU_STATE_DEPTH,
+	NOUVEAU_STATE_DITHER,
+	NOUVEAU_STATE_FRAG,
+	NOUVEAU_STATE_FRAMEBUFFER,
+	NOUVEAU_STATE_FOG,
+	NOUVEAU_STATE_LIGHT_ENABLE,
+	NOUVEAU_STATE_LIGHT_MODEL,
+	NOUVEAU_STATE_LIGHT_SOURCE0,
+	NOUVEAU_STATE_LIGHT_SOURCE1,
+	NOUVEAU_STATE_LIGHT_SOURCE2,
+	NOUVEAU_STATE_LIGHT_SOURCE3,
+	NOUVEAU_STATE_LIGHT_SOURCE4,
+	NOUVEAU_STATE_LIGHT_SOURCE5,
+	NOUVEAU_STATE_LIGHT_SOURCE6,
+	NOUVEAU_STATE_LIGHT_SOURCE7,
+	NOUVEAU_STATE_LINE_STIPPLE,
+	NOUVEAU_STATE_LINE_MODE,
+	NOUVEAU_STATE_LOGIC_OPCODE,
+	NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT,
+	NOUVEAU_STATE_MATERIAL_BACK_AMBIENT,
+	NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE,
+	NOUVEAU_STATE_MATERIAL_BACK_DIFFUSE,
+	NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR,
+	NOUVEAU_STATE_MATERIAL_BACK_SPECULAR,
+	NOUVEAU_STATE_MATERIAL_FRONT_SHININESS,
+	NOUVEAU_STATE_MATERIAL_BACK_SHININESS,
+	NOUVEAU_STATE_MODELVIEW,
+	NOUVEAU_STATE_POINT_MODE,
+	NOUVEAU_STATE_POINT_PARAMETER,
+	NOUVEAU_STATE_POLYGON_MODE,
+	NOUVEAU_STATE_POLYGON_OFFSET,
+	NOUVEAU_STATE_POLYGON_STIPPLE,
+	NOUVEAU_STATE_PROJECTION,
+	NOUVEAU_STATE_RENDER_MODE,
+	NOUVEAU_STATE_SCISSOR,
+	NOUVEAU_STATE_SHADE_MODEL,
+	NOUVEAU_STATE_STENCIL_FUNC,
+	NOUVEAU_STATE_STENCIL_MASK,
+	NOUVEAU_STATE_STENCIL_OP,
+	NOUVEAU_STATE_TEX_ENV0,
+	NOUVEAU_STATE_TEX_ENV1,
+	NOUVEAU_STATE_TEX_ENV2,
+	NOUVEAU_STATE_TEX_ENV3,
+	NOUVEAU_STATE_TEX_GEN0,
+	NOUVEAU_STATE_TEX_GEN1,
+	NOUVEAU_STATE_TEX_GEN2,
+	NOUVEAU_STATE_TEX_GEN3,
+	NOUVEAU_STATE_TEX_MAT0,
+	NOUVEAU_STATE_TEX_MAT1,
+	NOUVEAU_STATE_TEX_MAT2,
+	NOUVEAU_STATE_TEX_MAT3,
+	NOUVEAU_STATE_TEX_OBJ0,
+	NOUVEAU_STATE_TEX_OBJ1,
+	NOUVEAU_STATE_TEX_OBJ2,
+	NOUVEAU_STATE_TEX_OBJ3,
+	NOUVEAU_STATE_VIEWPORT,
+	NUM_NOUVEAU_STATE,
+
+	/* Room for card-specific states. */
+
+	MAX_NOUVEAU_STATE = NUM_NOUVEAU_STATE + 16,
+};
+
+typedef void (*nouveau_state_func)(GLcontext *ctx, int emit);
+
+void
+nouveau_state_init(GLcontext *ctx);
+
+void
+nouveau_emit_nothing(GLcontext *ctx, int emit);
+
+int
+nouveau_next_dirty_state(GLcontext *ctx);
+
+void
+nouveau_state_emit(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_surface.c b/src/mesa/drivers/dri/nouveau/nouveau_surface.c
new file mode 100644
index 0000000000..33393970a0
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_surface.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+
+void
+nouveau_surface_alloc(GLcontext *ctx, struct nouveau_surface *s,
+		      enum nouveau_surface_layout layout,
+		      unsigned flags, unsigned format,
+		      unsigned width, unsigned height)
+{
+	unsigned tile_mode, cpp = _mesa_get_format_bytes(format);
+	int ret;
+
+	nouveau_bo_ref(NULL, &s->bo);
+
+	*s = (struct nouveau_surface) {
+		.layout = layout,
+		.format = format,
+		.width = width,
+		.height = height,
+		.cpp = cpp,
+		.pitch = width * cpp,
+	};
+
+	if (layout == TILED) {
+		s->pitch = align(s->pitch, 256);
+		tile_mode = s->pitch;
+	} else {
+		s->pitch = align(s->pitch, 64);
+		tile_mode = 0;
+	}
+
+	ret = nouveau_bo_new_tile(context_dev(ctx), flags, 0, s->pitch * height,
+				  tile_mode, 0, &s->bo);
+	assert(!ret);
+}
+
+void
+nouveau_surface_ref(struct nouveau_surface *src,
+		    struct nouveau_surface *dst)
+{
+	if (src) {
+		dst->offset = src->offset;
+		dst->layout = src->layout;
+		dst->format = src->format;
+		dst->width = src->width;
+		dst->height = src->height;
+		dst->cpp = src->cpp;
+		dst->pitch = src->pitch;
+		nouveau_bo_ref(src->bo, &dst->bo);
+
+	} else {
+		nouveau_bo_ref(NULL, &dst->bo);
+	}
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_surface.h b/src/mesa/drivers/dri/nouveau/nouveau_surface.h
new file mode 100644
index 0000000000..ebdc89afb4
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_surface.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_SURFACE_H__
+#define __NOUVEAU_SURFACE_H__
+
+enum nouveau_surface_layout {
+	LINEAR = 0,
+	TILED,
+	SWIZZLED,
+};
+
+struct nouveau_surface {
+	struct nouveau_bo *bo;
+	unsigned offset;
+
+	enum nouveau_surface_layout layout;
+
+	gl_format format;
+	unsigned cpp, pitch;
+
+	unsigned width, height;
+};
+
+void
+nouveau_surface_alloc(GLcontext *ctx, struct nouveau_surface *s,
+		      enum nouveau_surface_layout layout,
+		      unsigned flags, unsigned format,
+		      unsigned width, unsigned height);
+
+void
+nouveau_surface_ref(struct nouveau_surface *src,
+		    struct nouveau_surface *dst);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
new file mode 100644
index 0000000000..a1609a0dd5
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_swtnl_t.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+static enum tnl_attr_format
+swtnl_get_format(int type, int fields) {
+	switch (type) {
+	case GL_FLOAT:
+		switch (fields){
+		case 1:
+			return EMIT_1F;
+		case 2:
+			return EMIT_2F;
+		case 3:
+			return EMIT_3F;
+		case 4:
+			return EMIT_4F;
+		default:
+			assert(0);
+		}
+	case GL_UNSIGNED_BYTE:
+		switch (fields) {
+		case 4:
+			return EMIT_4UB_4F_RGBA;
+		default:
+			assert(0);
+		}
+	default:
+		assert(0);
+	}
+}
+
+static struct swtnl_attr_info {
+	int type;
+	int fields;
+} swtnl_attrs[VERT_ATTRIB_MAX] = {
+	[VERT_ATTRIB_POS] = {
+		.type = GL_FLOAT,
+		.fields = 4,
+	},
+	[VERT_ATTRIB_NORMAL] = {
+		.type = GL_FLOAT,
+		.fields = -1,
+	},
+	[VERT_ATTRIB_COLOR0] = {
+		.type = GL_UNSIGNED_BYTE,
+		.fields = 4,
+	},
+	[VERT_ATTRIB_COLOR1] = {
+		.type = GL_UNSIGNED_BYTE,
+		.fields = 4,
+	},
+	[VERT_ATTRIB_FOG] = {
+		.type = GL_FLOAT,
+		.fields = 1,
+	},
+	[VERT_ATTRIB_TEX0] = {
+		.type = GL_FLOAT,
+		.fields = -1,
+	},
+	[VERT_ATTRIB_TEX1] = {
+		.type = GL_FLOAT,
+		.fields = -1,
+	},
+	[VERT_ATTRIB_TEX2] = {
+		.type = GL_FLOAT,
+		.fields = -1,
+	},
+	[VERT_ATTRIB_TEX3] = {
+		.type = GL_FLOAT,
+		.fields = -1,
+	},
+};
+
+static void
+swtnl_choose_attrs(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+	struct tnl_clipspace *vtx = &tnl->clipspace;
+	static struct tnl_attr_map map[NUM_VERTEX_ATTRS];
+	int fields, i, n = 0;
+
+	render->mode = VBO;
+	render->attr_count = NUM_VERTEX_ATTRS;
+
+	/* We always want non Ndc coords format */
+	tnl->vb.AttribPtr[VERT_ATTRIB_POS] = tnl->vb.ClipPtr;
+
+	for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+		struct nouveau_attr_info *ha = &TAG(vertex_attrs)[i];
+		struct swtnl_attr_info *sa = &swtnl_attrs[i];
+		struct nouveau_array_state *a = &render->attrs[i];
+
+		if (!sa->fields)
+			continue; /* Unsupported attribute. */
+
+		if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, i)) {
+			if (sa->fields > 0)
+				fields = sa->fields;
+			else
+				fields = tnl->vb.AttribPtr[i]->size;
+
+			map[n++] = (struct tnl_attr_map) {
+				.attrib = i,
+				.format = swtnl_get_format(sa->type, fields),
+			};
+
+			render->map[ha->vbo_index] = i;
+			a->attr = i;
+			a->fields = fields;
+			a->type = sa->type;
+		}
+	}
+
+	_tnl_install_attrs(ctx, map, n, NULL, 0);
+
+	for (i = 0; i < vtx->attr_count; i++) {
+		struct tnl_clipspace_attr *ta = &vtx->attr[i];
+		struct nouveau_array_state *a = &render->attrs[ta->attrib];
+
+		a->stride = vtx->vertex_size;
+		a->offset = ta->vertoffset;
+	}
+
+	TAG(render_set_format)(ctx);
+}
+
+static void
+swtnl_alloc_vertices(GLcontext *ctx)
+{
+	struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl;
+
+	nouveau_bo_ref(NULL, &swtnl->vbo);
+	swtnl->buf = get_scratch_vbo(ctx, RENDER_SCRATCH_SIZE,
+				     &swtnl->vbo, NULL);
+	swtnl->vertex_count = 0;
+}
+
+static void
+swtnl_bind_vertices(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	struct nouveau_swtnl_state *swtnl = &render->swtnl;
+	int i;
+
+	for (i = 0; i < render->attr_count; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0)
+			nouveau_bo_ref(swtnl->vbo,
+				       &render->attrs[attr].bo);
+	}
+
+	TAG(render_bind_vertices)(ctx);
+}
+
+static void
+swtnl_unbind_vertices(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	int i;
+
+	for (i = 0; i < render->attr_count; i++) {
+		int *attr = &render->map[i];
+
+		if (*attr >= 0) {
+			nouveau_bo_ref(NULL, &render->attrs[*attr].bo);
+			*attr = -1;
+		}
+	}
+
+	render->attr_count = 0;
+}
+
+static void
+swtnl_flush_vertices(GLcontext *ctx)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl;
+	unsigned push, start = 0, count = swtnl->vertex_count;
+	RENDER_LOCALS(ctx);
+
+	swtnl_bind_vertices(ctx);
+
+	while (count) {
+		push = get_max_vertices(ctx, NULL, AVAIL_RING(chan));
+		push = MIN2(push / 12 * 12, count);
+		count -= push;
+
+		if (!push) {
+			FIRE_RING(chan);
+			continue;
+		}
+
+		BATCH_BEGIN(nvgl_primitive(swtnl->primitive));
+		EMIT_VBO(L, ctx, start, 0, push);
+		BATCH_END();
+
+		FIRE_RING(chan);
+	}
+
+	swtnl_alloc_vertices(ctx);
+}
+
+/* TnL renderer entry points */
+
+static void
+swtnl_start(GLcontext *ctx)
+{
+	swtnl_choose_attrs(ctx);
+}
+
+static void
+swtnl_finish(GLcontext *ctx)
+{
+	swtnl_flush_vertices(ctx);
+	swtnl_unbind_vertices(ctx);
+}
+
+static void
+swtnl_primitive(GLcontext *ctx, GLenum mode)
+{
+}
+
+static void
+swtnl_reset_stipple(GLcontext *ctx)
+{
+}
+
+/* Primitive rendering */
+
+#define BEGIN_PRIMITIVE(p, n)						\
+	struct nouveau_swtnl_state *swtnl = &to_render_state(ctx)->swtnl; \
+	int vertex_len = TNL_CONTEXT(ctx)->clipspace.vertex_size;	\
+									\
+	if (swtnl->vertex_count + (n) > swtnl->vbo->size/vertex_len	\
+	    || (swtnl->vertex_count && swtnl->primitive != p))		\
+		swtnl_flush_vertices(ctx);				\
+									\
+	swtnl->primitive = p;
+
+#define OUT_VERTEX(i) do {						\
+		memcpy(swtnl->buf + swtnl->vertex_count * vertex_len,	\
+		       _tnl_get_vertex(ctx, (i)), vertex_len);		\
+		swtnl->vertex_count++;					\
+	} while (0)
+
+static void
+swtnl_points(GLcontext *ctx, GLuint first, GLuint last)
+{
+	int i, count;
+
+	while (first < last) {
+		BEGIN_PRIMITIVE(GL_POINTS, last - first);
+
+		count = MIN2(swtnl->vbo->size / vertex_len, last - first);
+		for (i = 0; i < count; i++)
+			OUT_VERTEX(first + i);
+
+		first += count;
+	}
+}
+
+static void
+swtnl_line(GLcontext *ctx, GLuint v1, GLuint v2)
+{
+	BEGIN_PRIMITIVE(GL_LINES, 2);
+	OUT_VERTEX(v1);
+	OUT_VERTEX(v2);
+}
+
+static void
+swtnl_triangle(GLcontext *ctx, GLuint v1, GLuint v2, GLuint v3)
+{
+	BEGIN_PRIMITIVE(GL_TRIANGLES, 3);
+	OUT_VERTEX(v1);
+	OUT_VERTEX(v2);
+	OUT_VERTEX(v3);
+}
+
+static void
+swtnl_quad(GLcontext *ctx, GLuint v1, GLuint v2, GLuint v3, GLuint v4)
+{
+	BEGIN_PRIMITIVE(GL_QUADS, 4);
+	OUT_VERTEX(v1);
+	OUT_VERTEX(v2);
+	OUT_VERTEX(v3);
+	OUT_VERTEX(v4);
+}
+
+/* TnL initialization. */
+static void
+TAG(swtnl_init)(GLcontext *ctx)
+{
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+	tnl->Driver.RunPipeline = _tnl_run_pipeline;
+	tnl->Driver.Render.Interp = _tnl_interp;
+	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+	tnl->Driver.Render.ClippedLine = _tnl_RenderClippedLine;
+	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+
+	tnl->Driver.Render.Start = swtnl_start;
+	tnl->Driver.Render.Finish = swtnl_finish;
+	tnl->Driver.Render.PrimitiveNotify = swtnl_primitive;
+	tnl->Driver.Render.ResetLineStipple = swtnl_reset_stipple;
+
+	tnl->Driver.Render.Points = swtnl_points;
+	tnl->Driver.Render.Line = swtnl_line;
+	tnl->Driver.Render.Triangle = swtnl_triangle;
+	tnl->Driver.Render.Quad = swtnl_quad;
+
+	_tnl_init_vertices(ctx, tnl->vb.Size,
+			   NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
+	_tnl_need_projected_coords(ctx, GL_FALSE);
+	_tnl_allow_vertex_fog(ctx, GL_FALSE);
+	_tnl_wakeup(ctx);
+
+	swtnl_alloc_vertices(ctx);
+}
+
+static void
+TAG(swtnl_destroy)(GLcontext *ctx)
+{
+	nouveau_bo_ref(NULL, &to_render_state(ctx)->swtnl.vbo);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
new file mode 100644
index 0000000000..dbf9a5cc61
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
@@ -0,0 +1,610 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+
+#include "main/texobj.h"
+#include "main/texstore.h"
+#include "main/texformat.h"
+#include "main/texcompress.h"
+#include "main/texgetimage.h"
+#include "main/mipmap.h"
+#include "main/texfetch.h"
+#include "main/teximage.h"
+
+static struct gl_texture_object *
+nouveau_texture_new(GLcontext *ctx, GLuint name, GLenum target)
+{
+	struct nouveau_texture *nt = CALLOC_STRUCT(nouveau_texture);
+
+	_mesa_initialize_texture_object(&nt->base, name, target);
+
+	return &nt->base;
+}
+
+static void
+nouveau_texture_free(GLcontext *ctx, struct gl_texture_object *t)
+{
+	struct nouveau_texture *nt = to_nouveau_texture(t);
+	int i;
+
+	for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
+		nouveau_surface_ref(NULL, &nt->surfaces[i]);
+
+	_mesa_delete_texture_object(ctx, t);
+}
+
+static struct gl_texture_image *
+nouveau_teximage_new(GLcontext *ctx)
+{
+	struct nouveau_teximage *nti = CALLOC_STRUCT(nouveau_teximage);
+
+	return &nti->base;
+}
+
+static void
+nouveau_teximage_free(GLcontext *ctx, struct gl_texture_image *ti)
+{
+	struct nouveau_teximage *nti = to_nouveau_teximage(ti);
+
+	nouveau_surface_ref(NULL, &nti->surface);
+}
+
+static void
+nouveau_teximage_map(GLcontext *ctx, struct gl_texture_image *ti)
+{
+	struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
+	int ret;
+
+	if (s->bo) {
+		ret = nouveau_bo_map(s->bo, NOUVEAU_BO_RDWR);
+		assert(!ret);
+
+		ti->Data = s->bo->map;
+	}
+}
+
+static void
+nouveau_teximage_unmap(GLcontext *ctx, struct gl_texture_image *ti)
+{
+	struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
+
+	if (s->bo)
+		nouveau_bo_unmap(s->bo);
+	ti->Data = NULL;
+}
+
+static gl_format
+nouveau_choose_tex_format(GLcontext *ctx, GLint internalFormat,
+			  GLenum srcFormat, GLenum srcType)
+{
+	switch (internalFormat) {
+	case 4:
+	case GL_RGBA:
+	case GL_RGBA2:
+	case GL_RGBA4:
+	case GL_RGBA8:
+	case GL_RGBA12:
+	case GL_RGBA16:
+	case GL_RGB10_A2:
+		return MESA_FORMAT_ARGB8888;
+	case GL_RGB5_A1:
+		return MESA_FORMAT_ARGB1555;
+
+	case GL_RGB:
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return MESA_FORMAT_XRGB8888;
+	case 3:
+	case GL_R3_G3_B2:
+	case GL_RGB4:
+	case GL_RGB5:
+		return MESA_FORMAT_RGB565;
+
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+	case GL_LUMINANCE8_ALPHA8:
+		return MESA_FORMAT_ARGB8888;
+
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+	case GL_LUMINANCE8:
+		return MESA_FORMAT_L8;
+
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+	case GL_ALPHA8:
+		return MESA_FORMAT_A8;
+
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+	case GL_INTENSITY8:
+		return MESA_FORMAT_I8;
+
+	case GL_COLOR_INDEX:
+	case GL_COLOR_INDEX1_EXT:
+	case GL_COLOR_INDEX2_EXT:
+	case GL_COLOR_INDEX4_EXT:
+	case GL_COLOR_INDEX12_EXT:
+	case GL_COLOR_INDEX16_EXT:
+	case GL_COLOR_INDEX8_EXT:
+		return MESA_FORMAT_CI8;
+
+	default:
+		assert(0);
+	}
+}
+
+static GLboolean
+teximage_fits(struct gl_texture_object *t, int level)
+{
+	struct nouveau_surface *s = &to_nouveau_texture(t)->surfaces[level];
+	struct gl_texture_image *ti = t->Image[0][level];
+
+	return ti && (t->Target == GL_TEXTURE_RECTANGLE ||
+		      (s->bo && s->width == ti->Width &&
+		       s->height == ti->Height &&
+		       s->format == ti->TexFormat));
+}
+
+static GLboolean
+validate_teximage(GLcontext *ctx, struct gl_texture_object *t,
+		  int level, int x, int y, int z,
+		  int width, int height, int depth)
+{
+	struct gl_texture_image *ti = t->Image[0][level];
+
+	if (teximage_fits(t, level)) {
+		struct nouveau_surface *ss = to_nouveau_texture(t)->surfaces;
+		struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
+
+		if (t->Target == GL_TEXTURE_RECTANGLE)
+			nouveau_surface_ref(s, &ss[level]);
+		else
+			context_drv(ctx)->surface_copy(ctx, &ss[level], s,
+						       x, y, x, y,
+						       width, height);
+
+		return GL_TRUE;
+	}
+
+	return GL_FALSE;
+}
+
+static int
+get_last_level(struct gl_texture_object *t)
+{
+	struct gl_texture_image *base = t->Image[0][t->BaseLevel];
+
+	if (t->MinFilter == GL_NEAREST ||
+	    t->MinFilter == GL_LINEAR || !base)
+		return t->BaseLevel;
+	else
+		return MIN2(t->BaseLevel + base->MaxLog2, t->MaxLevel);
+}
+
+static void
+relayout_texture(GLcontext *ctx, struct gl_texture_object *t)
+{
+	struct gl_texture_image *base = t->Image[0][t->BaseLevel];
+
+	if (base && t->Target != GL_TEXTURE_RECTANGLE) {
+		struct nouveau_surface *ss = to_nouveau_texture(t)->surfaces;
+		struct nouveau_surface *s = &to_nouveau_teximage(base)->surface;
+		int i, ret, last = get_last_level(t);
+		unsigned size, offset = 0,
+			width = s->width,
+			height = s->height;
+
+		/* Deallocate the old storage. */
+		for (i = 0; i < MAX_TEXTURE_LEVELS; i++)
+			nouveau_bo_ref(NULL, &ss[i].bo);
+
+		/* Relayout the mipmap tree. */
+		for (i = t->BaseLevel; i <= last; i++) {
+			size = width * height * s->cpp;
+
+			/* Images larger than 16B have to be aligned. */
+			if (size > 16)
+				offset = align(offset, 64);
+
+			ss[i] = (struct nouveau_surface) {
+				.offset = offset,
+				.layout = SWIZZLED,
+				.format = s->format,
+				.width = width,
+				.height = height,
+				.cpp = s->cpp,
+				.pitch = width * s->cpp,
+			};
+
+			offset += size;
+			width = MAX2(1, width / 2);
+			height = MAX2(1, height / 2);
+		}
+
+		/* Get new storage. */
+		size = align(offset, 64);
+
+		ret = nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_MAP |
+				     NOUVEAU_BO_GART | NOUVEAU_BO_VRAM,
+				     0, size, &ss[last].bo);
+		assert(!ret);
+
+		for (i = t->BaseLevel; i < last; i++)
+			nouveau_bo_ref(ss[last].bo, &ss[i].bo);
+	}
+}
+
+GLboolean
+nouveau_texture_validate(GLcontext *ctx, struct gl_texture_object *t)
+{
+	struct nouveau_texture *nt = to_nouveau_texture(t);
+	int i, last = get_last_level(t);
+
+	if (!teximage_fits(t, t->BaseLevel) ||
+	    !teximage_fits(t, last))
+		return GL_FALSE;
+
+	if (nt->dirty) {
+		nt->dirty = GL_FALSE;
+
+		/* Copy the teximages to the actual miptree. */
+		for (i = t->BaseLevel; i <= last; i++) {
+			struct nouveau_surface *s = &nt->surfaces[i];
+
+			validate_teximage(ctx, t, i, 0, 0, 0,
+					  s->width, s->height, 1);
+		}
+
+		FIRE_RING(context_chan(ctx));
+	}
+
+	return GL_TRUE;
+}
+
+void
+nouveau_texture_reallocate(GLcontext *ctx, struct gl_texture_object *t)
+{
+	if (!teximage_fits(t, t->BaseLevel) ||
+	    !teximage_fits(t, get_last_level(t))) {
+		texture_dirty(t);
+		relayout_texture(ctx, t);
+		nouveau_texture_validate(ctx, t);
+	}
+}
+
+static unsigned
+get_teximage_placement(struct gl_texture_image *ti)
+{
+	if (ti->TexFormat == MESA_FORMAT_A8 ||
+	    ti->TexFormat == MESA_FORMAT_L8 ||
+	    ti->TexFormat == MESA_FORMAT_I8)
+		/* 1 cpp formats will have to be swizzled by the CPU,
+		 * so leave them in system RAM for now. */
+		return NOUVEAU_BO_MAP;
+	else
+		return NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
+}
+
+static void
+nouveau_teximage(GLcontext *ctx, GLint dims, GLenum target, GLint level,
+		 GLint internalFormat,
+		 GLint width, GLint height, GLint depth, GLint border,
+		 GLenum format, GLenum type, const GLvoid *pixels,
+		 const struct gl_pixelstore_attrib *packing,
+		 struct gl_texture_object *t,
+		 struct gl_texture_image *ti)
+{
+	struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
+	int ret;
+
+	/* Allocate a new bo for the image. */
+	nouveau_surface_alloc(ctx, s, LINEAR, get_teximage_placement(ti),
+			      ti->TexFormat, width, height);
+	ti->RowStride = s->pitch / s->cpp;
+
+	pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
+					     format, type, pixels, packing,
+					     "glTexImage");
+	if (pixels) {
+		/* Store the pixel data. */
+		nouveau_teximage_map(ctx, ti);
+
+		ret = _mesa_texstore(ctx, dims, ti->_BaseFormat,
+				     ti->TexFormat, ti->Data,
+				     0, 0, 0, s->pitch,
+				     ti->ImageOffsets,
+				     width, height, depth,
+				     format, type, pixels, packing);
+		assert(ret);
+
+		nouveau_teximage_unmap(ctx, ti);
+		_mesa_unmap_teximage_pbo(ctx, packing);
+
+		if (!validate_teximage(ctx, t, level, 0, 0, 0,
+				       width, height, depth))
+			/* It doesn't fit, mark it as dirty. */
+			texture_dirty(t);
+	}
+
+	if (level == t->BaseLevel) {
+		if (!teximage_fits(t, level))
+			relayout_texture(ctx, t);
+		nouveau_texture_validate(ctx, t);
+	}
+
+	context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+	context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+}
+
+static void
+nouveau_teximage_1d(GLcontext *ctx, GLenum target, GLint level,
+		    GLint internalFormat,
+		    GLint width, GLint border,
+		    GLenum format, GLenum type, const GLvoid *pixels,
+		    const struct gl_pixelstore_attrib *packing,
+		    struct gl_texture_object *t,
+		    struct gl_texture_image *ti)
+{
+	nouveau_teximage(ctx, 1, target, level, internalFormat,
+			 width, 1, 1, border, format, type, pixels,
+			 packing, t, ti);
+}
+
+static void
+nouveau_teximage_2d(GLcontext *ctx, GLenum target, GLint level,
+		    GLint internalFormat,
+		    GLint width, GLint height, GLint border,
+		    GLenum format, GLenum type, const GLvoid *pixels,
+		    const struct gl_pixelstore_attrib *packing,
+		    struct gl_texture_object *t,
+		    struct gl_texture_image *ti)
+{
+	nouveau_teximage(ctx, 2, target, level, internalFormat,
+			 width, height, 1, border, format, type, pixels,
+			 packing, t, ti);
+}
+
+static void
+nouveau_teximage_3d(GLcontext *ctx, GLenum target, GLint level,
+		    GLint internalFormat,
+		    GLint width, GLint height, GLint depth, GLint border,
+		    GLenum format, GLenum type, const GLvoid *pixels,
+		    const struct gl_pixelstore_attrib *packing,
+		    struct gl_texture_object *t,
+		    struct gl_texture_image *ti)
+{
+	nouveau_teximage(ctx, 3, target, level, internalFormat,
+			 width, height, depth, border, format, type, pixels,
+			 packing, t, ti);
+}
+
+static void
+nouveau_texsubimage(GLcontext *ctx, GLint dims, GLenum target, GLint level,
+		    GLint xoffset, GLint yoffset, GLint zoffset,
+		    GLint width, GLint height, GLint depth,
+		    GLenum format, GLenum type, const void *pixels,
+		    const struct gl_pixelstore_attrib *packing,
+		    struct gl_texture_object *t,
+		    struct gl_texture_image *ti)
+{
+	struct nouveau_surface *s = &to_nouveau_teximage(ti)->surface;
+	int ret;
+
+	pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
+					     format, type, pixels, packing,
+					     "glTexSubImage");
+	if (pixels) {
+		nouveau_teximage_map(ctx, ti);
+
+		ret = _mesa_texstore(ctx, 3, ti->_BaseFormat, ti->TexFormat,
+				     ti->Data, xoffset, yoffset, zoffset,
+				     s->pitch, ti->ImageOffsets,
+				     width, height, depth, format, type,
+				     pixels, packing);
+		assert(ret);
+
+		nouveau_teximage_unmap(ctx, ti);
+		_mesa_unmap_teximage_pbo(ctx, packing);
+	}
+
+	if (!to_nouveau_texture(t)->dirty)
+		validate_teximage(ctx, t, level, xoffset, yoffset, zoffset,
+				  width, height, depth);
+}
+
+static void
+nouveau_texsubimage_3d(GLcontext *ctx, GLenum target, GLint level,
+		       GLint xoffset, GLint yoffset, GLint zoffset,
+		       GLint width, GLint height, GLint depth,
+		       GLenum format, GLenum type, const void *pixels,
+		       const struct gl_pixelstore_attrib *packing,
+		       struct gl_texture_object *t,
+		       struct gl_texture_image *ti)
+{
+	nouveau_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset,
+			    width, height, depth, format, type, pixels,
+			    packing, t, ti);
+}
+
+static void
+nouveau_texsubimage_2d(GLcontext *ctx, GLenum target, GLint level,
+		       GLint xoffset, GLint yoffset,
+		       GLint width, GLint height,
+		       GLenum format, GLenum type, const void *pixels,
+		       const struct gl_pixelstore_attrib *packing,
+		       struct gl_texture_object *t,
+		       struct gl_texture_image *ti)
+{
+	nouveau_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0,
+			    width, height, 1, format, type, pixels,
+			    packing, t, ti);
+}
+
+static void
+nouveau_texsubimage_1d(GLcontext *ctx, GLenum target, GLint level,
+		       GLint xoffset, GLint width,
+		       GLenum format, GLenum type, const void *pixels,
+		       const struct gl_pixelstore_attrib *packing,
+		       struct gl_texture_object *t,
+		       struct gl_texture_image *ti)
+{
+	nouveau_texsubimage(ctx, 1, target, level, xoffset, 0, 0,
+			    width, 1, 1, format, type, pixels,
+			    packing, t, ti);
+}
+
+static void
+nouveau_get_teximage(GLcontext *ctx, GLenum target, GLint level,
+		     GLenum format, GLenum type, GLvoid *pixels,
+		     struct gl_texture_object *t,
+		     struct gl_texture_image *ti)
+{
+	nouveau_teximage_map(ctx, ti);
+	_mesa_get_teximage(ctx, target, level, format, type, pixels,
+			   t, ti);
+	nouveau_teximage_unmap(ctx, ti);
+}
+
+static void
+nouveau_bind_texture(GLcontext *ctx, GLenum target,
+		     struct gl_texture_object *t)
+{
+	context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+	context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+}
+
+static gl_format
+get_texbuffer_format(struct gl_renderbuffer *rb, GLint format)
+{
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(rb)->surface;
+
+	if (s->cpp < 4)
+		return s->format;
+	else if (format == __DRI_TEXTURE_FORMAT_RGBA)
+		return MESA_FORMAT_ARGB8888;
+	else
+		return MESA_FORMAT_XRGB8888;
+}
+
+void
+nouveau_set_texbuffer(__DRIcontext *dri_ctx,
+		      GLint target, GLint format,
+		      __DRIdrawable *draw)
+{
+	struct nouveau_context *nctx = dri_ctx->driverPrivate;
+	GLcontext *ctx = &nctx->base;
+	struct gl_framebuffer *fb = draw->driverPrivate;
+	struct gl_renderbuffer *rb =
+		fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+	struct gl_texture_object *t = _mesa_get_current_tex_object(ctx, target);
+	struct gl_texture_image *ti;
+	struct nouveau_surface *s;
+
+	_mesa_lock_texture(ctx, t);
+	ti = _mesa_get_tex_image(ctx, t, target, 0);
+	s = &to_nouveau_teximage(ti)->surface;
+
+	/* Update the texture surface with the given drawable. */
+	nouveau_update_renderbuffers(dri_ctx, draw);
+	nouveau_surface_ref(&to_nouveau_renderbuffer(rb)->surface, s);
+
+	/* Update the image fields. */
+	_mesa_init_teximage_fields(ctx, target, ti, s->width, s->height,
+				   1, 0, s->cpp);
+	ti->RowStride = s->pitch / s->cpp;
+	ti->TexFormat = s->format = get_texbuffer_format(rb, format);
+
+	/* Try to validate it. */
+	if (!validate_teximage(ctx, t, 0, 0, 0, 0, s->width, s->height, 1))
+		nouveau_texture_reallocate(ctx, t);
+
+	context_dirty_i(ctx, TEX_OBJ, ctx->Texture.CurrentUnit);
+	context_dirty_i(ctx, TEX_ENV, ctx->Texture.CurrentUnit);
+
+	_mesa_unlock_texture(ctx, t);
+}
+
+static void
+nouveau_texture_map(GLcontext *ctx, struct gl_texture_object *t)
+{
+	int i;
+
+	for (i = t->BaseLevel; i < t->_MaxLevel; i++) {
+		if (t->Image[0][i])
+			nouveau_teximage_map(ctx, t->Image[0][i]);
+	}
+}
+
+static void
+nouveau_texture_unmap(GLcontext *ctx, struct gl_texture_object *t)
+{
+	int i;
+
+	for (i = t->BaseLevel; i < t->_MaxLevel; i++) {
+		if (t->Image[0][i])
+			nouveau_teximage_unmap(ctx, t->Image[0][i]);
+	}
+}
+
+void
+nouveau_texture_functions_init(struct dd_function_table *functions)
+{
+	functions->NewTextureObject = nouveau_texture_new;
+	functions->DeleteTexture = nouveau_texture_free;
+	functions->NewTextureImage = nouveau_teximage_new;
+	functions->FreeTexImageData = nouveau_teximage_free;
+	functions->ChooseTextureFormat = nouveau_choose_tex_format;
+	functions->TexImage1D = nouveau_teximage_1d;
+	functions->TexImage2D = nouveau_teximage_2d;
+	functions->TexImage3D = nouveau_teximage_3d;
+	functions->TexSubImage1D = nouveau_texsubimage_1d;
+	functions->TexSubImage2D = nouveau_texsubimage_2d;
+	functions->TexSubImage3D = nouveau_texsubimage_3d;
+	functions->GetTexImage = nouveau_get_teximage;
+	functions->BindTexture = nouveau_bind_texture;
+	functions->MapTexture = nouveau_texture_map;
+	functions->UnmapTexture = nouveau_texture_unmap;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.h b/src/mesa/drivers/dri/nouveau/nouveau_texture.h
new file mode 100644
index 0000000000..251f537bba
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_TEXTURE_H__
+#define __NOUVEAU_TEXTURE_H__
+
+struct nouveau_teximage {
+	struct gl_texture_image base;
+	struct nouveau_surface surface;
+};
+#define to_nouveau_teximage(x) ((struct nouveau_teximage *)(x))
+
+struct nouveau_texture {
+	struct gl_texture_object base;
+	struct nouveau_surface surfaces[MAX_TEXTURE_LEVELS];
+	GLboolean dirty;
+};
+#define to_nouveau_texture(x) ((struct nouveau_texture *)(x))
+
+#define texture_dirty(t) \
+	to_nouveau_texture(t)->dirty = GL_TRUE
+
+void
+nouveau_set_texbuffer(__DRIcontext *dri_ctx,
+		      GLint target, GLint format,
+		      __DRIdrawable *draw);
+
+GLboolean
+nouveau_texture_validate(GLcontext *ctx, struct gl_texture_object *t);
+
+void
+nouveau_texture_reallocate(GLcontext *ctx, struct gl_texture_object *t);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_util.h b/src/mesa/drivers/dri/nouveau/nouveau_util.h
new file mode 100644
index 0000000000..584cb80ef6
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_util.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_UTIL_H__
+#define __NOUVEAU_UTIL_H__
+
+#include "main/formats.h"
+#include "main/colormac.h"
+
+static inline unsigned
+pack_rgba_i(gl_format f, uint8_t c[])
+{
+	switch (f) {
+	case MESA_FORMAT_ARGB8888:
+		return PACK_COLOR_8888(c[ACOMP], c[RCOMP], c[GCOMP], c[BCOMP]);
+	case MESA_FORMAT_ARGB8888_REV:
+		return PACK_COLOR_8888(c[BCOMP], c[GCOMP], c[RCOMP], c[ACOMP]);
+	case MESA_FORMAT_XRGB8888:
+		return PACK_COLOR_8888(0, c[RCOMP], c[GCOMP], c[BCOMP]);
+	case MESA_FORMAT_XRGB8888_REV:
+		return PACK_COLOR_8888(c[BCOMP], c[GCOMP], c[RCOMP], 0);
+	case MESA_FORMAT_RGBA8888:
+		return PACK_COLOR_8888(c[RCOMP], c[GCOMP], c[BCOMP], c[ACOMP]);
+	case MESA_FORMAT_RGBA8888_REV:
+		return PACK_COLOR_8888(c[ACOMP], c[BCOMP], c[GCOMP], c[RCOMP]);
+	case MESA_FORMAT_RGB565:
+		return PACK_COLOR_565(c[RCOMP], c[GCOMP], c[BCOMP]);
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+pack_zs_i(gl_format f, uint32_t z, uint8_t s)
+{
+	switch (f) {
+	case MESA_FORMAT_Z24_S8:
+		return (z & 0xffffff00) | (s & 0xff);
+	case MESA_FORMAT_Z24_X8:
+		return (z & 0xffffff00);
+	case MESA_FORMAT_Z16:
+		return (z & 0xffff0000) >> 16;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+pack_rgba_f(gl_format f, float c[])
+{
+	return pack_rgba_i(f, (uint8_t []) {
+			   FLOAT_TO_UBYTE(c[RCOMP]),
+			   FLOAT_TO_UBYTE(c[GCOMP]),
+			   FLOAT_TO_UBYTE(c[BCOMP]),
+			   FLOAT_TO_UBYTE(c[ACOMP]) });
+}
+
+static inline unsigned
+pack_zs_f(gl_format f, float z, uint8_t s)
+{
+	return pack_zs_i(f, FLOAT_TO_UINT(z), s);
+}
+
+/* Integer base-2 logarithm, rounded towards zero. */
+static inline unsigned
+log2i(unsigned i)
+{
+	unsigned r = 0;
+
+	if (i & 0xffff0000) {
+		i >>= 16;
+		r += 16;
+	}
+	if (i & 0x0000ff00) {
+		i >>= 8;
+		r += 8;
+	}
+	if (i & 0x000000f0) {
+		i >>= 4;
+		r += 4;
+	}
+	if (i & 0x0000000c) {
+		i >>= 2;
+		r += 2;
+	}
+	if (i & 0x00000002) {
+		r += 1;
+	}
+	return r;
+}
+
+static inline unsigned
+align(unsigned x, unsigned m)
+{
+	return (x + m - 1) & ~(m - 1);
+}
+
+static inline void
+get_scissors(struct gl_framebuffer *fb, int *x, int *y, int *w, int *h)
+{
+	*w = fb->_Xmax - fb->_Xmin;
+	*h = fb->_Ymax - fb->_Ymin;
+	*x = fb->_Xmin;
+	*y = (fb->Name ? fb->_Ymin :
+	      /* Window system FBO: Flip the Y coordinate. */
+	      fb->Height - fb->_Ymax);
+}
+
+static inline void
+get_viewport_scale(GLcontext *ctx, float a[16])
+{
+	struct gl_viewport_attrib *vp = &ctx->Viewport;
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+	a[MAT_SX] = (float)vp->Width / 2;
+
+	if (fb->Name)
+		a[MAT_SY] = (float)vp->Height / 2;
+	else
+		/* Window system FBO: Flip the Y coordinate. */
+		a[MAT_SY] = - (float)vp->Height / 2;
+
+	a[MAT_SZ] = fb->_DepthMaxF * (vp->Far - vp->Near) / 2;
+}
+
+static inline void
+get_viewport_translate(GLcontext *ctx, float a[4])
+{
+	struct gl_viewport_attrib *vp = &ctx->Viewport;
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+	a[0] = (float)vp->Width / 2 + vp->X;
+
+	if (fb->Name)
+		a[1] = (float)vp->Height / 2 + vp->Y;
+	else
+		/* Window system FBO: Flip the Y coordinate. */
+		a[1] = fb->Height - (float)vp->Height / 2 - vp->Y;
+
+	a[2] = fb->_DepthMaxF * (vp->Far + vp->Near) / 2;
+}
+
+static inline void
+OUT_RINGm(struct nouveau_channel *chan, float m[16])
+{
+	int i, j;
+
+	for (i = 0; i < 4; i++)
+		for (j = 0; j < 4; j++)
+			OUT_RINGf(chan, m[4*j + i]);
+}
+
+static inline GLboolean
+is_color_operand(int op)
+{
+	return op == GL_SRC_COLOR || op == GL_ONE_MINUS_SRC_COLOR;
+}
+
+static inline GLboolean
+is_negative_operand(int op)
+{
+	return op == GL_ONE_MINUS_SRC_COLOR || op == GL_ONE_MINUS_SRC_ALPHA;
+}
+
+static inline GLboolean
+is_texture_source(int s)
+{
+	return s == GL_TEXTURE || (s >= GL_TEXTURE0 && s <= GL_TEXTURE31);
+}
+
+static inline struct gl_texgen *
+get_texgen_coord(struct gl_texture_unit *u, int i)
+{
+	return ((struct gl_texgen *[])
+		{ &u->GenS, &u->GenT, &u->GenR, &u->GenQ }) [i];
+}
+
+static inline float *
+get_texgen_coeff(struct gl_texgen *c)
+{
+	if (c->Mode == GL_OBJECT_LINEAR)
+		return c->ObjectPlane;
+	else if (c->Mode == GL_EYE_LINEAR)
+		return c->EyePlane;
+	else
+		return NULL;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
new file mode 100644
index 0000000000..e5858f8268
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nouveau_vbo_t.c
@@ -0,0 +1,463 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_bufferobj.h"
+#include "nouveau_util.h"
+
+#include "main/bufferobj.h"
+#include "main/image.h"
+
+/* Arbitrary pushbuf length we can assume we can get with a single
+ * WAIT_RING. */
+#define PUSHBUF_DWORDS 2048
+
+/* Functions to set up struct nouveau_array_state from something like
+ * a GL array or index buffer. */
+
+static void
+vbo_init_array(struct nouveau_array_state *a, int attr, int stride,
+	       int fields, int type, struct gl_buffer_object *obj,
+	       const void *ptr, GLboolean map)
+{
+	a->attr = attr;
+	a->stride = stride;
+	a->fields = fields;
+	a->type = type;
+
+	if (_mesa_is_bufferobj(obj)) {
+		nouveau_bo_ref(to_nouveau_bufferobj(obj)->bo, &a->bo);
+		a->offset = (intptr_t)ptr;
+
+		if (map) {
+			nouveau_bo_map(a->bo, NOUVEAU_BO_RD);
+			a->buf = a->bo->map + a->offset;
+		} else {
+			a->buf = NULL;
+		}
+
+	} else {
+		nouveau_bo_ref(NULL, &a->bo);
+		a->offset = 0;
+
+		if (map)
+			a->buf = ptr;
+		else
+			a->buf = NULL;
+	}
+
+	if (a->buf)
+		get_array_extract(a, &a->extract_u, &a->extract_f);
+}
+
+static void
+vbo_deinit_array(struct nouveau_array_state *a)
+{
+	if (a->bo) {
+		if (a->bo->map)
+			nouveau_bo_unmap(a->bo);
+		nouveau_bo_ref(NULL, &a->bo);
+	}
+
+	a->buf = NULL;
+	a->fields = 0;
+}
+
+static int
+get_array_stride(GLcontext *ctx, const struct gl_client_array *a)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+
+	if (render->mode == VBO && !_mesa_is_bufferobj(a->BufferObj))
+		/* Pack client buffers. */
+		return align(_mesa_sizeof_type(a->Type) * a->Size, 4);
+	else
+		return a->StrideB;
+}
+
+static void
+vbo_init_arrays(GLcontext *ctx, const struct _mesa_index_buffer *ib,
+		const struct gl_client_array **arrays)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	int i;
+
+	if (ib)
+		vbo_init_array(&render->ib, 0, 0, ib->count, ib->type,
+			       ib->obj, ib->ptr, GL_TRUE);
+
+	for (i = 0; i < render->attr_count; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0) {
+			const struct gl_client_array *array = arrays[attr];
+
+			vbo_init_array(&render->attrs[attr], attr,
+				       get_array_stride(ctx, array),
+				       array->Size, array->Type,
+				       array->BufferObj, array->Ptr,
+				       render->mode == IMM);
+		}
+	}
+}
+
+static void
+vbo_deinit_arrays(GLcontext *ctx, const struct _mesa_index_buffer *ib,
+		const struct gl_client_array **arrays)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	int i;
+
+	if (ib)
+		vbo_deinit_array(&render->ib);
+
+	for (i = 0; i < render->attr_count; i++) {
+		int *attr = &render->map[i];
+
+		if (*attr >= 0) {
+			vbo_deinit_array(&render->attrs[*attr]);
+			*attr = -1;
+		}
+	}
+
+	render->attr_count = 0;
+}
+
+/* Make some rendering decisions from the GL context. */
+
+static void
+vbo_choose_render_mode(GLcontext *ctx, const struct gl_client_array **arrays)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	int i;
+
+	render->mode = VBO;
+
+	if (ctx->Light.Enabled) {
+		for (i = 0; i < MAT_ATTRIB_MAX; i++) {
+			if (arrays[VERT_ATTRIB_GENERIC0 + i]->StrideB) {
+				render->mode = IMM;
+				break;
+			}
+		}
+	}
+
+	if (render->mode == VBO)
+		render->attr_count = NUM_VERTEX_ATTRS;
+	else
+		render->attr_count = 0;
+}
+
+static void
+vbo_emit_attr(GLcontext *ctx, const struct gl_client_array **arrays, int attr)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_render_state *render = to_render_state(ctx);
+	const struct gl_client_array *array = arrays[attr];
+	struct nouveau_array_state *a = &render->attrs[attr];
+	RENDER_LOCALS(ctx);
+
+	if (!array->StrideB) {
+		if (attr >= VERT_ATTRIB_GENERIC0)
+			/* nouveau_update_state takes care of materials. */
+			return;
+
+		/* Constant attribute. */
+		vbo_init_array(a, attr, array->StrideB, array->Size,
+			       array->Type, array->BufferObj, array->Ptr,
+			       GL_TRUE);
+		EMIT_IMM(ctx, a, 0);
+		vbo_deinit_array(a);
+
+	} else {
+		/* Varying attribute. */
+		struct nouveau_attr_info *info = &TAG(vertex_attrs)[attr];
+
+		if (render->mode == VBO) {
+			render->map[info->vbo_index] = attr;
+			render->vertex_size += array->_ElementSize;
+		} else {
+			render->map[render->attr_count++] = attr;
+			render->vertex_size += 4 * info->imm_fields;
+		}
+	}
+}
+
+#define MAT(a) (VERT_ATTRIB_GENERIC0 + MAT_ATTRIB_##a)
+
+static void
+vbo_choose_attrs(GLcontext *ctx, const struct gl_client_array **arrays)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	int i;
+
+	/* Reset the vertex size. */
+	render->vertex_size = 0;
+
+	vbo_emit_attr(ctx, arrays, VERT_ATTRIB_COLOR0);
+	if (ctx->Fog.ColorSumEnabled && !ctx->Light.Enabled)
+		vbo_emit_attr(ctx, arrays, VERT_ATTRIB_COLOR1);
+
+	for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
+		if (ctx->Texture._EnabledCoordUnits & (1 << i))
+			vbo_emit_attr(ctx, arrays, VERT_ATTRIB_TEX0 + i);
+	}
+
+	if (ctx->Fog.Enabled && ctx->Fog.FogCoordinateSource == GL_FOG_COORD)
+		vbo_emit_attr(ctx, arrays, VERT_ATTRIB_FOG);
+
+	if (ctx->Light.Enabled ||
+	    (ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS))
+		vbo_emit_attr(ctx, arrays, VERT_ATTRIB_NORMAL);
+
+	if (ctx->Light.Enabled) {
+		vbo_emit_attr(ctx, arrays, MAT(FRONT_AMBIENT));
+		vbo_emit_attr(ctx, arrays, MAT(FRONT_DIFFUSE));
+		vbo_emit_attr(ctx, arrays, MAT(FRONT_SPECULAR));
+		vbo_emit_attr(ctx, arrays, MAT(FRONT_SHININESS));
+
+		if (ctx->Light.Model.TwoSide) {
+			vbo_emit_attr(ctx, arrays, MAT(BACK_AMBIENT));
+			vbo_emit_attr(ctx, arrays, MAT(BACK_DIFFUSE));
+			vbo_emit_attr(ctx, arrays, MAT(BACK_SPECULAR));
+			vbo_emit_attr(ctx, arrays, MAT(BACK_SHININESS));
+		}
+	}
+
+	vbo_emit_attr(ctx, arrays, VERT_ATTRIB_POS);
+}
+
+static int
+get_max_client_stride(GLcontext *ctx, const struct gl_client_array **arrays)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	int i, s = 0;
+
+	for (i = 0; i < render->attr_count; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0) {
+			const struct gl_client_array *a = arrays[attr];
+
+			if (!_mesa_is_bufferobj(a->BufferObj))
+				s = MAX2(s, get_array_stride(ctx, a));
+		}
+	}
+
+	return s;
+}
+
+static void
+TAG(vbo_render_prims)(GLcontext *ctx, const struct gl_client_array **arrays,
+		      const struct _mesa_prim *prims, GLuint nr_prims,
+		      const struct _mesa_index_buffer *ib,
+		      GLboolean index_bounds_valid,
+		      GLuint min_index, GLuint max_index);
+
+static GLboolean
+vbo_maybe_split(GLcontext *ctx, const struct gl_client_array **arrays,
+	    const struct _mesa_prim *prims, GLuint nr_prims,
+	    const struct _mesa_index_buffer *ib,
+	    GLuint min_index, GLuint max_index)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_render_state *render = to_render_state(ctx);
+	unsigned pushbuf_avail = PUSHBUF_DWORDS - 2 * (nctx->bo.count +
+						       render->attr_count),
+		vert_avail = get_max_vertices(ctx, NULL, pushbuf_avail),
+		idx_avail = get_max_vertices(ctx, ib, pushbuf_avail);
+	int stride;
+
+	/* Try to keep client buffers smaller than the scratch BOs. */
+	if (render->mode == VBO &&
+	    (stride = get_max_client_stride(ctx, arrays)))
+		    vert_avail = MIN2(vert_avail,
+				      RENDER_SCRATCH_SIZE / stride);
+
+	if (max_index - min_index > vert_avail ||
+	    (ib && ib->count > idx_avail)) {
+		struct split_limits limits = {
+			.max_verts = vert_avail,
+			.max_indices = idx_avail,
+			.max_vb_size = ~0,
+		};
+
+		vbo_split_prims(ctx, arrays, prims, nr_prims, ib, min_index,
+				max_index, TAG(vbo_render_prims), &limits);
+		return GL_TRUE;
+	}
+
+	return GL_FALSE;
+}
+
+/* VBO rendering path. */
+
+static void
+vbo_bind_vertices(GLcontext *ctx, const struct gl_client_array **arrays,
+		  GLint basevertex, GLuint min_index, GLuint max_index)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	int i;
+
+	for (i = 0; i < NUM_VERTEX_ATTRS; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0) {
+			const struct gl_client_array *array = arrays[attr];
+			struct nouveau_array_state *a = &render->attrs[attr];
+			unsigned delta = (basevertex + min_index)
+				* array->StrideB;
+
+			if (a->bo) {
+				/* Array in a buffer obj. */
+				a->offset = (intptr_t)array->Ptr + delta;
+			} else {
+				int j, n = max_index - min_index + 1;
+				char *sp = (char *)array->Ptr + delta;
+				char *dp = get_scratch_vbo(ctx, n * a->stride,
+							   &a->bo, &a->offset);
+
+				/* Array in client memory, move it to
+				 * a scratch buffer obj. */
+				for (j = 0; j < n; j++)
+					memcpy(dp + j * a->stride,
+					       sp + j * array->StrideB,
+					       a->stride);
+			}
+		}
+	}
+
+	TAG(render_bind_vertices)(ctx);
+}
+
+static void
+vbo_draw_vbo(GLcontext *ctx, const struct gl_client_array **arrays,
+	     const struct _mesa_prim *prims, GLuint nr_prims,
+	     const struct _mesa_index_buffer *ib, GLuint min_index,
+	     GLuint max_index)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	dispatch_t dispatch;
+	int delta = -min_index, basevertex = 0, i;
+	RENDER_LOCALS(ctx);
+
+	get_array_dispatch(&to_render_state(ctx)->ib, &dispatch);
+
+	TAG(render_set_format)(ctx);
+
+	for (i = 0; i < nr_prims; i++) {
+		unsigned start = prims[i].start,
+			count = prims[i].count;
+
+		if (i == 0 || basevertex != prims[i].basevertex) {
+			basevertex = prims[i].basevertex;
+			vbo_bind_vertices(ctx, arrays, basevertex,
+					  min_index, max_index);
+		}
+
+		if (count > get_max_vertices(ctx, ib, AVAIL_RING(chan)))
+			WAIT_RING(chan, PUSHBUF_DWORDS);
+
+		BATCH_BEGIN(nvgl_primitive(prims[i].mode));
+		dispatch(ctx, start, delta, count);
+		BATCH_END();
+	}
+}
+
+/* Immediate rendering path. */
+
+static unsigned
+extract_id(struct nouveau_array_state *a, int i, int j)
+{
+	return j;
+}
+
+static void
+vbo_draw_imm(GLcontext *ctx, const struct gl_client_array **arrays,
+	     const struct _mesa_prim *prims, GLuint nr_prims,
+	     const struct _mesa_index_buffer *ib, GLuint min_index,
+	     GLuint max_index)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	extract_u_t extract = ib ? render->ib.extract_u : extract_id;
+	int i, j, k;
+	RENDER_LOCALS(ctx);
+
+	for (i = 0; i < nr_prims; i++) {
+		unsigned start = prims[i].start,
+			end = start + prims[i].count;
+
+		if (prims[i].count > get_max_vertices(ctx, ib,
+						      AVAIL_RING(chan)))
+			WAIT_RING(chan, PUSHBUF_DWORDS);
+
+		BATCH_BEGIN(nvgl_primitive(prims[i].mode));
+
+		for (; start < end; start++) {
+			j = prims[i].basevertex +
+				extract(&render->ib, 0, start);
+
+			for (k = 0; k < render->attr_count; k++)
+				EMIT_IMM(ctx, &render->attrs[render->map[k]],
+					 j);
+		}
+
+		BATCH_END();
+	}
+}
+
+/* draw_prims entry point when we're doing hw-tnl. */
+
+static void
+TAG(vbo_render_prims)(GLcontext *ctx, const struct gl_client_array **arrays,
+		      const struct _mesa_prim *prims, GLuint nr_prims,
+		      const struct _mesa_index_buffer *ib,
+		      GLboolean index_bounds_valid,
+		      GLuint min_index, GLuint max_index)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+
+	if (!index_bounds_valid)
+		vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
+
+	vbo_choose_render_mode(ctx, arrays);
+	vbo_choose_attrs(ctx, arrays);
+
+	if (vbo_maybe_split(ctx, arrays, prims, nr_prims, ib, min_index,
+			    max_index))
+		return;
+
+	vbo_init_arrays(ctx, ib, arrays);
+
+	if (render->mode == VBO)
+		vbo_draw_vbo(ctx, arrays, prims, nr_prims, ib, min_index,
+			     max_index);
+	else
+		vbo_draw_imm(ctx, arrays, prims, nr_prims, ib, min_index,
+			     max_index);
+
+	vbo_deinit_arrays(ctx, ib, arrays);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv04_context.c b/src/mesa/drivers/dri/nouveau/nv04_context.c
new file mode 100644
index 0000000000..6834f7cd3d
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_context.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+
+struct nouveau_grobj *
+nv04_context_engine(GLcontext *ctx)
+{
+	struct nv04_context *nctx = to_nv04_context(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	struct nouveau_grobj *fahrenheit;
+
+	if (ctx->Texture.Unit[0].EnvMode == GL_COMBINE ||
+	    ctx->Texture.Unit[0].EnvMode == GL_BLEND ||
+	    ctx->Texture.Unit[0].EnvMode == GL_ADD ||
+	    ctx->Texture.Unit[1]._ReallyEnabled ||
+	    ctx->Stencil.Enabled)
+		fahrenheit = hw->eng3dm;
+	else
+		fahrenheit = hw->eng3d;
+
+	if (fahrenheit != nctx->eng3d) {
+		nctx->eng3d = fahrenheit;
+
+		if (nv04_mtex_engine(fahrenheit)) {
+			context_dirty_i(ctx, TEX_ENV, 0);
+			context_dirty_i(ctx, TEX_ENV, 1);
+			context_dirty_i(ctx, TEX_OBJ, 0);
+			context_dirty_i(ctx, TEX_OBJ, 1);
+			context_dirty(ctx, CONTROL);
+			context_dirty(ctx, BLEND);
+		} else {
+			context_bctx_i(ctx, TEXTURE, 1);
+			context_dirty_i(ctx, TEX_ENV, 0);
+			context_dirty_i(ctx, TEX_OBJ, 0);
+			context_dirty(ctx, CONTROL);
+			context_dirty(ctx, BLEND);
+		}
+	}
+
+	return fahrenheit;
+}
+
+static void
+nv04_channel_flush_notify(struct nouveau_channel *chan)
+{
+	struct nouveau_context *nctx = chan->user_private;
+	GLcontext *ctx = &nctx->base;
+
+	if (nctx->fallback < SWRAST) {
+		/* Flushing seems to clobber the engine context. */
+		context_emit(ctx, TEX_OBJ0);
+		context_emit(ctx, TEX_OBJ1);
+		context_emit(ctx, TEX_ENV0);
+		context_emit(ctx, TEX_ENV1);
+		context_emit(ctx, CONTROL);
+		context_emit(ctx, BLEND);
+
+		nouveau_bo_state_emit(ctx);
+	}
+}
+
+static void
+nv04_hwctx_init(GLcontext *ctx)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	struct nouveau_grobj *surf3d = hw->surf3d;
+	struct nouveau_grobj *eng3d = hw->eng3d;
+	struct nouveau_grobj *eng3dm = hw->eng3dm;
+
+	BIND_RING(chan, surf3d, 7);
+	BEGIN_RING(chan, surf3d, NV04_CONTEXT_SURFACES_3D_DMA_NOTIFY, 3);
+	OUT_RING(chan, hw->ntfy->handle);
+	OUT_RING(chan, chan->vram->handle);
+	OUT_RING(chan, chan->vram->handle);
+
+	BEGIN_RING(chan, eng3d, NV04_TEXTURED_TRIANGLE_DMA_NOTIFY, 4);
+	OUT_RING(chan, hw->ntfy->handle);
+	OUT_RING(chan, chan->vram->handle);
+	OUT_RING(chan, chan->gart->handle);
+	OUT_RING(chan, surf3d->handle);
+
+	BEGIN_RING(chan, eng3dm, NV04_MULTITEX_TRIANGLE_DMA_NOTIFY, 4);
+	OUT_RING(chan, hw->ntfy->handle);
+	OUT_RING(chan, chan->vram->handle);
+	OUT_RING(chan, chan->gart->handle);
+	OUT_RING(chan, surf3d->handle);
+
+	FIRE_RING(chan);
+}
+
+static void
+init_dummy_texture(GLcontext *ctx)
+{
+	struct nouveau_surface *s = &to_nv04_context(ctx)->dummy_texture;
+
+	nouveau_surface_alloc(ctx, s, SWIZZLED,
+			      NOUVEAU_BO_MAP | NOUVEAU_BO_VRAM,
+			      MESA_FORMAT_ARGB8888, 1, 1);
+
+	nouveau_bo_map(s->bo, NOUVEAU_BO_WR);
+	*(uint32_t *)s->bo->map = 0xffffffff;
+	nouveau_bo_unmap(s->bo);
+}
+
+static void
+nv04_context_destroy(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	nv04_surface_takedown(ctx);
+	nv04_render_destroy(ctx);
+	nouveau_surface_ref(NULL, &to_nv04_context(ctx)->dummy_texture);
+
+	nouveau_grobj_free(&nctx->hw.eng3d);
+	nouveau_grobj_free(&nctx->hw.eng3dm);
+	nouveau_grobj_free(&nctx->hw.surf3d);
+
+	nouveau_context_deinit(ctx);
+	FREE(ctx);
+}
+
+static GLcontext *
+nv04_context_create(struct nouveau_screen *screen, const GLvisual *visual,
+		    GLcontext *share_ctx)
+{
+	struct nv04_context *nctx;
+	struct nouveau_hw_state *hw;
+	GLcontext *ctx;
+	int ret;
+
+	nctx = CALLOC_STRUCT(nv04_context);
+	if (!nctx)
+		return NULL;
+
+	ctx = &nctx->base.base;
+	hw = &nctx->base.hw;
+
+	if (!nouveau_context_init(ctx, screen, visual, share_ctx))
+		goto fail;
+
+	hw->chan->flush_notify = nv04_channel_flush_notify;
+
+	/* GL constants. */
+	ctx->Const.MaxTextureCoordUnits = NV04_TEXTURE_UNITS;
+	ctx->Const.MaxTextureImageUnits = NV04_TEXTURE_UNITS;
+	ctx->Const.MaxTextureUnits = NV04_TEXTURE_UNITS;
+	ctx->Const.MaxTextureMaxAnisotropy = 2;
+	ctx->Const.MaxTextureLodBias = 15;
+
+	/* 2D engine. */
+	ret = nv04_surface_init(ctx);
+	if (!ret)
+		goto fail;
+
+	/* 3D engine. */
+	ret = nouveau_grobj_alloc(context_chan(ctx), 0xbeef0001,
+				  NV04_TEXTURED_TRIANGLE, &hw->eng3d);
+	if (ret)
+		goto fail;
+
+	ret = nouveau_grobj_alloc(context_chan(ctx), 0xbeef0002,
+				  NV04_MULTITEX_TRIANGLE, &hw->eng3dm);
+	if (ret)
+		goto fail;
+
+	ret = nouveau_grobj_alloc(context_chan(ctx), 0xbeef0003,
+				  NV04_CONTEXT_SURFACES_3D, &hw->surf3d);
+	if (ret)
+		goto fail;
+
+	init_dummy_texture(ctx);
+	nv04_hwctx_init(ctx);
+	nv04_render_init(ctx);
+
+	return ctx;
+
+fail:
+	nv04_context_destroy(ctx);
+	return NULL;
+}
+
+const struct nouveau_driver nv04_driver = {
+	.context_create = nv04_context_create,
+	.context_destroy = nv04_context_destroy,
+	.surface_copy = nv04_surface_copy,
+	.surface_fill = nv04_surface_fill,
+	.emit = (nouveau_state_func[]) {
+		nv04_defer_control,
+		nouveau_emit_nothing,
+		nv04_defer_blend,
+		nv04_defer_blend,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv04_defer_control,
+		nouveau_emit_nothing,
+		nv04_defer_control,
+		nouveau_emit_nothing,
+		nv04_defer_control,
+		nv04_defer_control,
+		nouveau_emit_nothing,
+		nv04_emit_framebuffer,
+		nv04_defer_blend,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv04_emit_scissor,
+		nv04_defer_blend,
+		nv04_defer_control,
+		nv04_defer_control,
+		nv04_defer_control,
+		nv04_emit_tex_env,
+		nv04_emit_tex_env,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv04_emit_tex_obj,
+		nv04_emit_tex_obj,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv04_emit_blend,
+		nv04_emit_control,
+	},
+	.num_emit = NUM_NV04_STATE,
+};
diff --git a/src/mesa/drivers/dri/nouveau/nv04_context.h b/src/mesa/drivers/dri/nouveau/nv04_context.h
new file mode 100644
index 0000000000..ccd3b61e26
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_context.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NV04_CONTEXT_H__
+#define __NV04_CONTEXT_H__
+
+#include "nouveau_context.h"
+
+struct nv04_context {
+	struct nouveau_context base;
+	struct nouveau_grobj *eng3d;
+	struct nouveau_surface dummy_texture;
+	float viewport[16];
+};
+#define to_nv04_context(ctx) ((struct nv04_context *)(ctx))
+
+#define nv04_mtex_engine(obj) ((obj)->grclass == NV04_MULTITEX_TRIANGLE)
+
+struct nouveau_grobj *
+nv04_context_engine(GLcontext *ctx);
+
+extern const struct nouveau_driver nv04_driver;
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nv04_driver.h b/src/mesa/drivers/dri/nouveau/nv04_driver.h
new file mode 100644
index 0000000000..4d599e683a
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_driver.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NV04_DRIVER_H__
+#define __NV04_DRIVER_H__
+
+#include "nv04_context.h"
+
+enum {
+	NOUVEAU_STATE_BLEND = NUM_NOUVEAU_STATE,
+	NOUVEAU_STATE_CONTROL,
+	NUM_NV04_STATE
+};
+
+#define NV04_TEXTURE_UNITS 2
+
+/* nv04_render.c */
+void
+nv04_render_init(GLcontext *ctx);
+
+void
+nv04_render_destroy(GLcontext *ctx);
+
+/* nv04_surface.c */
+GLboolean
+nv04_surface_init(GLcontext *ctx);
+
+void
+nv04_surface_takedown(GLcontext *ctx);
+
+void
+nv04_surface_copy(GLcontext *ctx,
+		  struct nouveau_surface *dst, struct nouveau_surface *src,
+		  int dx, int dy, int sx, int sy, int w, int h);
+
+void
+nv04_surface_fill(GLcontext *ctx,
+		  struct nouveau_surface *dst,
+		  unsigned mask, unsigned value,
+		  int dx, int dy, int w, int h);
+
+/* nv04_state_fb.c */
+void
+nv04_emit_framebuffer(GLcontext *ctx, int emit);
+
+void
+nv04_emit_scissor(GLcontext *ctx, int emit);
+
+/* nv04_state_raster.c */
+void
+nv04_defer_control(GLcontext *ctx, int emit);
+
+void
+nv04_emit_control(GLcontext *ctx, int emit);
+
+void
+nv04_defer_blend(GLcontext *ctx, int emit);
+
+void
+nv04_emit_blend(GLcontext *ctx, int emit);
+
+/* nv04_state_frag.c */
+void
+nv04_emit_tex_env(GLcontext *ctx, int emit);
+
+/* nv04_state_tex.c */
+void
+nv04_emit_tex_obj(GLcontext *ctx, int emit);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nv04_render.c b/src/mesa/drivers/dri/nouveau/nv04_render.c
new file mode 100644
index 0000000000..b5943d9987
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_render.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "tnl/t_vertex.h"
+
+#define NUM_VERTEX_ATTRS 6
+
+static void
+swtnl_update_viewport(GLcontext *ctx)
+{
+	float *viewport = to_nv04_context(ctx)->viewport;
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+	get_viewport_scale(ctx, viewport);
+	get_viewport_translate(ctx, &viewport[MAT_TX]);
+
+	/* It wants normalized Z coordinates. */
+	viewport[MAT_SZ] /= fb->_DepthMaxF;
+	viewport[MAT_TZ] /= fb->_DepthMaxF;
+}
+
+static void
+swtnl_emit_attr(GLcontext *ctx, struct tnl_attr_map *m, int attr, int emit)
+{
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, attr))
+		*m = (struct tnl_attr_map) {
+			.attrib = attr,
+			.format = emit,
+		};
+	else
+		*m = (struct tnl_attr_map) {
+			.format = EMIT_PAD,
+			.offset = _tnl_format_info[emit].attrsize,
+		};
+}
+
+static void
+swtnl_choose_attrs(GLcontext *ctx)
+{
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+	struct nouveau_grobj *fahrenheit = nv04_context_engine(ctx);
+	struct nv04_context *nctx = to_nv04_context(ctx);
+	static struct tnl_attr_map map[NUM_VERTEX_ATTRS];
+	int n = 0;
+
+	tnl->vb.AttribPtr[VERT_ATTRIB_POS] = tnl->vb.NdcPtr;
+
+	swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT);
+	swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA);
+	swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR);
+	swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_FOG, EMIT_1UB_1F);
+	swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_TEX0, EMIT_2F);
+	if (nv04_mtex_engine(fahrenheit))
+		swtnl_emit_attr(ctx, &map[n++], _TNL_ATTRIB_TEX1, EMIT_2F);
+
+	swtnl_update_viewport(ctx);
+
+	_tnl_install_attrs(ctx, map, n, nctx->viewport, 0);
+}
+
+/* TnL renderer entry points */
+
+static void
+swtnl_start(GLcontext *ctx)
+{
+	swtnl_choose_attrs(ctx);
+}
+
+static void
+swtnl_finish(GLcontext *ctx)
+{
+	FIRE_RING(context_chan(ctx));
+}
+
+static void
+swtnl_primitive(GLcontext *ctx, GLenum mode)
+{
+}
+
+static void
+swtnl_reset_stipple(GLcontext *ctx)
+{
+}
+
+/* Primitive rendering */
+
+#define BEGIN_PRIMITIVE(n)						\
+	struct nouveau_channel *chan = context_chan(ctx);		\
+	struct nouveau_grobj *fahrenheit = nv04_context_engine(ctx);	\
+	int vertex_len = TNL_CONTEXT(ctx)->clipspace.vertex_size / 4;	\
+									\
+	if (nv04_mtex_engine(fahrenheit))				\
+		BEGIN_RING(chan, fahrenheit,				\
+			   NV04_MULTITEX_TRIANGLE_TLMTVERTEX_SX(0),	\
+			   n * vertex_len);				\
+	else								\
+		BEGIN_RING(chan, fahrenheit,				\
+			   NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0),	\
+			   n * vertex_len);				\
+
+#define OUT_VERTEX(i)						\
+	OUT_RINGp(chan, _tnl_get_vertex(ctx, i), vertex_len);
+
+#define END_PRIMITIVE(draw)						\
+	if (nv04_mtex_engine(fahrenheit)) {				\
+		BEGIN_RING(chan, fahrenheit,				\
+			   NV04_MULTITEX_TRIANGLE_DRAWPRIMITIVE(0), 1); \
+		OUT_RING(chan, draw);					\
+	} else {							\
+		BEGIN_RING(chan, fahrenheit,				\
+			   NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), 1); \
+		OUT_RING(chan, draw);					\
+	}
+
+static void
+swtnl_points(GLcontext *ctx, GLuint first, GLuint last)
+{
+}
+
+static void
+swtnl_line(GLcontext *ctx, GLuint v1, GLuint v2)
+{
+}
+
+static void
+swtnl_triangle(GLcontext *ctx, GLuint v1, GLuint v2, GLuint v3)
+{
+	BEGIN_PRIMITIVE(3);
+	OUT_VERTEX(v1);
+	OUT_VERTEX(v2);
+	OUT_VERTEX(v3);
+	END_PRIMITIVE(0x210);
+}
+
+static void
+swtnl_quad(GLcontext *ctx, GLuint v1, GLuint v2, GLuint v3, GLuint v4)
+{
+	BEGIN_PRIMITIVE(4);
+	OUT_VERTEX(v1);
+	OUT_VERTEX(v2);
+	OUT_VERTEX(v3);
+	OUT_VERTEX(v4);
+	END_PRIMITIVE(0x320210);
+}
+
+/* TnL initialization. */
+void
+nv04_render_init(GLcontext *ctx)
+{
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+	tnl->Driver.RunPipeline = _tnl_run_pipeline;
+	tnl->Driver.Render.Interp = _tnl_interp;
+	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+	tnl->Driver.Render.ClippedLine = _tnl_RenderClippedLine;
+	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+
+	tnl->Driver.Render.Start = swtnl_start;
+	tnl->Driver.Render.Finish = swtnl_finish;
+	tnl->Driver.Render.PrimitiveNotify = swtnl_primitive;
+	tnl->Driver.Render.ResetLineStipple = swtnl_reset_stipple;
+
+	tnl->Driver.Render.Points = swtnl_points;
+	tnl->Driver.Render.Line = swtnl_line;
+	tnl->Driver.Render.Triangle = swtnl_triangle;
+	tnl->Driver.Render.Quad = swtnl_quad;
+
+	_tnl_need_projected_coords(ctx, GL_TRUE);
+	_tnl_init_vertices(ctx, tnl->vb.Size,
+			   NUM_VERTEX_ATTRS * 4 * sizeof(GLfloat));
+	_tnl_allow_pixel_fog(ctx, GL_FALSE);
+	_tnl_wakeup(ctx);
+}
+
+void
+nv04_render_destroy(GLcontext *ctx)
+{
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_fb.c b/src/mesa/drivers/dri/nouveau/nv04_state_fb.c
new file mode 100644
index 0000000000..5e5e0c5874
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_fb.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+
+static inline unsigned
+get_rt_format(gl_format format)
+{
+	switch (format) {
+	case MESA_FORMAT_XRGB8888:
+		return 0x05;
+	case MESA_FORMAT_ARGB8888:
+		return 0x08;
+	case MESA_FORMAT_RGB565:
+		return 0x03;
+	default:
+		assert(0);
+	}
+}
+
+void
+nv04_emit_framebuffer(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	struct nouveau_grobj *surf3d = hw->surf3d;
+	struct nouveau_bo_context *bctx = context_bctx(ctx, FRAMEBUFFER);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	struct nouveau_surface *s;
+	uint32_t rt_format = NV04_CONTEXT_SURFACES_3D_FORMAT_TYPE_PITCH;
+	uint32_t rt_pitch = 0, zeta_pitch = 0;
+	unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
+	if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT)
+		return;
+
+	/* Render target */
+	if (fb->_ColorDrawBuffers[0]) {
+		s = &to_nouveau_renderbuffer(
+			fb->_ColorDrawBuffers[0])->surface;
+
+		rt_format |= get_rt_format(s->format);
+		zeta_pitch = rt_pitch = s->pitch;
+
+		nouveau_bo_markl(bctx, surf3d,
+				 NV04_CONTEXT_SURFACES_3D_OFFSET_COLOR,
+				 s->bo, 0, bo_flags);
+	}
+
+	/* depth/stencil */
+	if (fb->_DepthBuffer) {
+		s = &to_nouveau_renderbuffer(
+			fb->_DepthBuffer->Wrapped)->surface;
+
+		zeta_pitch = s->pitch;
+
+		nouveau_bo_markl(bctx, surf3d,
+				 NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA,
+				 s->bo, 0, bo_flags);
+	}
+
+	BEGIN_RING(chan, surf3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
+	OUT_RING(chan, rt_format);
+	BEGIN_RING(chan, surf3d, NV04_CONTEXT_SURFACES_3D_PITCH, 1);
+	OUT_RING(chan, zeta_pitch << 16 | rt_pitch);
+
+	/* Recompute the scissor state. */
+	context_dirty(ctx, SCISSOR);
+}
+
+void
+nv04_emit_scissor(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	struct nouveau_grobj *surf3d = hw->surf3d;
+	int x, y, w, h;
+
+	get_scissors(ctx->DrawBuffer, &x, &y, &w, &h);
+
+	BEGIN_RING(chan, surf3d, NV04_CONTEXT_SURFACES_3D_CLIP_HORIZONTAL, 2);
+	OUT_RING(chan, w << 16 | x);
+	OUT_RING(chan, h << 16 | y);
+
+	/* Messing with surf3d invalidates some engine state. */
+	context_dirty(ctx, CONTROL);
+	context_dirty(ctx, BLEND);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_frag.c b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
new file mode 100644
index 0000000000..d7c86d4178
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_frag.c
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+
+#define COMBINER_SHIFT(in)						\
+	(NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT##in##_SHIFT	\
+	 - NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_SHIFT)
+#define COMBINER_SOURCE(reg)					\
+	NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ARGUMENT0_##reg
+#define COMBINER_INVERT					\
+	NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_INVERSE0
+#define COMBINER_ALPHA					\
+	NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_ALPHA0
+
+struct combiner_state {
+	GLcontext *ctx;
+	int unit;
+	GLboolean alpha;
+
+	/* GL state */
+	GLenum mode;
+	GLenum *source;
+	GLenum *operand;
+	GLuint logscale;
+
+	/* Derived HW state */
+	uint32_t hw;
+};
+
+#define __INIT_COMBINER_ALPHA_A GL_TRUE
+#define __INIT_COMBINER_ALPHA_RGB GL_FALSE
+
+/* Initialize a combiner_state struct from the texture unit
+ * context. */
+#define INIT_COMBINER(chan, ctx, rc, i) do {			\
+		struct gl_tex_env_combine_state *c =		\
+			ctx->Texture.Unit[i]._CurrentCombine;	\
+		(rc)->ctx = ctx;				\
+		(rc)->unit = i;					\
+		(rc)->alpha = __INIT_COMBINER_ALPHA_##chan;	\
+		(rc)->mode = c->Mode##chan;			\
+		(rc)->source = c->Source##chan;			\
+		(rc)->operand = c->Operand##chan;		\
+		(rc)->logscale = c->ScaleShift##chan;		\
+		(rc)->hw = 0;					\
+	} while (0)
+
+/* Get the combiner source for the specified EXT_texture_env_combine
+ * source. */
+static uint32_t
+get_input_source(struct combiner_state *rc, int source)
+{
+	switch (source) {
+	case GL_TEXTURE:
+		return rc->unit ? COMBINER_SOURCE(TEXTURE1) :
+			COMBINER_SOURCE(TEXTURE0);
+
+	case GL_TEXTURE0:
+		return COMBINER_SOURCE(TEXTURE0);
+
+	case GL_TEXTURE1:
+		return COMBINER_SOURCE(TEXTURE1);
+
+	case GL_CONSTANT:
+		return COMBINER_SOURCE(CONSTANT);
+
+	case GL_PRIMARY_COLOR:
+		return COMBINER_SOURCE(PRIMARY_COLOR);
+
+	case GL_PREVIOUS:
+		return rc->unit ? COMBINER_SOURCE(PREVIOUS) :
+			COMBINER_SOURCE(PRIMARY_COLOR);
+
+	default:
+		assert(0);
+	}
+}
+
+/* Get the (possibly inverted) combiner input mapping for the
+ * specified EXT_texture_env_combine operand. */
+#define INVERT 0x1
+
+static uint32_t
+get_input_mapping(struct combiner_state *rc, int operand, int flags)
+{
+	int map = 0;
+
+	if (!is_color_operand(operand) && !rc->alpha)
+		map |= COMBINER_ALPHA;
+
+	if (is_negative_operand(operand) == !(flags & INVERT))
+		map |= COMBINER_INVERT;
+
+	return map;
+}
+
+static uint32_t
+get_input_arg(struct combiner_state *rc, int arg, int flags)
+{
+	int source = rc->source[arg];
+	int operand = rc->operand[arg];
+
+	/* Fake several unsupported texture formats. */
+	if (is_texture_source(source)) {
+		int i = (source == GL_TEXTURE ?
+			 rc->unit : source - GL_TEXTURE0);
+		struct gl_texture_object *t = rc->ctx->Texture.Unit[i]._Current;
+		gl_format format = t->Image[0][t->BaseLevel]->TexFormat;
+
+		if (format == MESA_FORMAT_A8) {
+			/* Emulated using I8. */
+			if (is_color_operand(operand))
+				return COMBINER_SOURCE(ZERO) |
+					get_input_mapping(rc, operand, flags);
+
+		} else if (format == MESA_FORMAT_L8) {
+			/* Emulated using I8. */
+			if (!is_color_operand(operand))
+				return COMBINER_SOURCE(ZERO) |
+					get_input_mapping(rc, operand,
+							  flags ^ INVERT);
+		}
+	}
+
+	return get_input_source(rc, source) |
+		get_input_mapping(rc, operand, flags);
+}
+
+/* Bind the combiner input <in> to the combiner source <src>,
+ * possibly inverted. */
+#define INPUT_SRC(rc, in, src, flags)					\
+	(rc)->hw |= ((flags & INVERT ? COMBINER_INVERT : 0) |		\
+		   COMBINER_SOURCE(src)) << COMBINER_SHIFT(in)
+
+/* Bind the combiner input <in> to the EXT_texture_env_combine
+ * argument <arg>, possibly inverted. */
+#define INPUT_ARG(rc, in, arg, flags)					\
+	(rc)->hw |= get_input_arg(rc, arg, flags) << COMBINER_SHIFT(in)
+
+#define UNSIGNED_OP(rc)							\
+	(rc)->hw |= ((rc)->logscale ?					\
+		     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_SCALE2 :	\
+		     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_IDENTITY)
+#define SIGNED_OP(rc)							\
+	(rc)->hw |= ((rc)->logscale ?					\
+		     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_BIAS_SCALE2 : \
+		     NV04_MULTITEX_TRIANGLE_COMBINE_COLOR_MAP_BIAS)
+
+static void
+setup_combiner(struct combiner_state *rc)
+{
+	switch (rc->mode) {
+	case GL_REPLACE:
+		INPUT_ARG(rc, 0, 0, 0);
+		INPUT_SRC(rc, 1, ZERO, INVERT);
+		INPUT_SRC(rc, 2, ZERO, 0);
+		INPUT_SRC(rc, 3, ZERO, 0);
+		UNSIGNED_OP(rc);
+		break;
+
+	case GL_MODULATE:
+		INPUT_ARG(rc, 0, 0, 0);
+		INPUT_ARG(rc, 1, 1, 0);
+		INPUT_SRC(rc, 2, ZERO, 0);
+		INPUT_SRC(rc, 3, ZERO, 0);
+		UNSIGNED_OP(rc);
+		break;
+
+	case GL_ADD:
+		INPUT_ARG(rc, 0, 0, 0);
+		INPUT_SRC(rc, 1, ZERO, INVERT);
+		INPUT_ARG(rc, 2, 1, 0);
+		INPUT_SRC(rc, 3, ZERO, INVERT);
+		UNSIGNED_OP(rc);
+		break;
+
+	case GL_INTERPOLATE:
+		INPUT_ARG(rc, 0, 0, 0);
+		INPUT_ARG(rc, 1, 2, 0);
+		INPUT_ARG(rc, 2, 1, 0);
+		INPUT_ARG(rc, 3, 2, INVERT);
+		UNSIGNED_OP(rc);
+		break;
+
+	case GL_ADD_SIGNED:
+		INPUT_ARG(rc, 0, 0, 0);
+		INPUT_SRC(rc, 1, ZERO, INVERT);
+		INPUT_ARG(rc, 2, 1, 0);
+		INPUT_SRC(rc, 3, ZERO, INVERT);
+		SIGNED_OP(rc);
+		break;
+
+	default:
+		assert(0);
+	}
+}
+
+void
+nv04_emit_tex_env(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_ENV0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *fahrenheit = nv04_context_engine(ctx);
+	struct combiner_state rc_a = {}, rc_c = {};
+
+	if (!nv04_mtex_engine(fahrenheit)) {
+		context_dirty(ctx, BLEND);
+		return;
+	}
+
+	/* Compute the new combiner state. */
+	if (ctx->Texture.Unit[i]._ReallyEnabled) {
+		INIT_COMBINER(A, ctx, &rc_a, i);
+		setup_combiner(&rc_a);
+
+		INIT_COMBINER(RGB, ctx, &rc_c, i);
+		setup_combiner(&rc_c);
+
+	} else {
+		if (i == 0) {
+			INPUT_SRC(&rc_a, 0, PRIMARY_COLOR, 0);
+			INPUT_SRC(&rc_c, 0, PRIMARY_COLOR, 0);
+		} else {
+			INPUT_SRC(&rc_a, 0, PREVIOUS, 0);
+			INPUT_SRC(&rc_c, 0, PREVIOUS, 0);
+		}
+
+		INPUT_SRC(&rc_a, 1, ZERO, INVERT);
+		INPUT_SRC(&rc_c, 1, ZERO, INVERT);
+		INPUT_SRC(&rc_a, 2, ZERO, 0);
+		INPUT_SRC(&rc_c, 2, ZERO, 0);
+		INPUT_SRC(&rc_a, 3, ZERO, 0);
+		INPUT_SRC(&rc_c, 3, ZERO, 0);
+
+		UNSIGNED_OP(&rc_a);
+		UNSIGNED_OP(&rc_c);
+	}
+
+	/* Write the register combiner state out to the hardware. */
+	BEGIN_RING(chan, fahrenheit,
+		   NV04_MULTITEX_TRIANGLE_COMBINE_ALPHA(i), 2);
+	OUT_RING(chan, rc_a.hw);
+	OUT_RING(chan, rc_c.hw);
+
+	BEGIN_RING(chan, fahrenheit,
+		   NV04_MULTITEX_TRIANGLE_COMBINE_FACTOR, 1);
+	OUT_RING(chan, pack_rgba_f(MESA_FORMAT_ARGB8888,
+				   ctx->Texture.Unit[0].EnvColor));
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_raster.c b/src/mesa/drivers/dri/nouveau/nv04_state_raster.c
new file mode 100644
index 0000000000..c191571a5f
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_raster.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+
+static unsigned
+get_comparison_op(unsigned op)
+{
+	switch (op) {
+	case GL_NEVER:
+		return 0x1;
+	case GL_LESS:
+		return 0x2;
+	case GL_EQUAL:
+		return 0x3;
+	case GL_LEQUAL:
+		return 0x4;
+	case GL_GREATER:
+		return 0x5;
+	case GL_NOTEQUAL:
+		return 0x6;
+	case GL_GEQUAL:
+		return 0x7;
+	case GL_ALWAYS:
+		return 0x8;
+	default:
+		assert(0);
+	}
+}
+
+static unsigned
+get_stencil_op(unsigned op)
+{
+	switch (op) {
+	case GL_KEEP:
+		return 0x1;
+	case GL_ZERO:
+		return 0x2;
+	case GL_REPLACE:
+		return 0x3;
+	case GL_INCR:
+		return 0x4;
+	case GL_DECR:
+		return 0x5;
+	case GL_INVERT:
+		return 0x6;
+	case GL_INCR_WRAP:
+		return 0x7;
+	case GL_DECR_WRAP:
+		return 0x8;
+	default:
+		assert(0);
+	}
+}
+
+static unsigned
+get_texenv_mode(unsigned mode)
+{
+	switch (mode) {
+	case GL_REPLACE:
+		return 0x1;
+	case GL_DECAL:
+		return 0x3;
+	case GL_MODULATE:
+		return 0x4;
+	default:
+		assert(0);
+	}
+}
+
+static unsigned
+get_blend_func(unsigned func)
+{
+	switch (func) {
+	case GL_ZERO:
+		return 0x1;
+	case GL_ONE:
+		return 0x2;
+	case GL_SRC_COLOR:
+		return 0x3;
+	case GL_ONE_MINUS_SRC_COLOR:
+		return 0x4;
+	case GL_SRC_ALPHA:
+		return 0x5;
+	case GL_ONE_MINUS_SRC_ALPHA:
+		return 0x6;
+	case GL_DST_ALPHA:
+		return 0x7;
+	case GL_ONE_MINUS_DST_ALPHA:
+		return 0x8;
+	case GL_DST_COLOR:
+		return 0x9;
+	case GL_ONE_MINUS_DST_COLOR:
+		return 0xa;
+	case GL_SRC_ALPHA_SATURATE:
+		return 0xb;
+	default:
+		assert(0);
+	}
+}
+
+void
+nv04_defer_control(GLcontext *ctx, int emit)
+{
+	context_dirty(ctx, CONTROL);
+}
+
+void
+nv04_emit_control(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *fahrenheit = nv04_context_engine(ctx);
+
+	if (nv04_mtex_engine(fahrenheit)) {
+		int cull_mode = ctx->Polygon.CullFaceMode;
+		int front_face = ctx->Polygon.FrontFace;
+		uint32_t ctrl0 = 1 << 30 |
+			NV04_MULTITEX_TRIANGLE_CONTROL0_ORIGIN;
+		uint32_t ctrl1 = 0, ctrl2 = 0;
+
+		/* Color mask. */
+		if (ctx->Color.ColorMask[0][RCOMP])
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_RED_WRITE;
+		if (ctx->Color.ColorMask[0][GCOMP])
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_GREEN_WRITE;
+		if (ctx->Color.ColorMask[0][BCOMP])
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_BLUE_WRITE;
+		if (ctx->Color.ColorMask[0][ACOMP])
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_WRITE;
+
+		/* Dithering. */
+		if (ctx->Color.DitherFlag)
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_DITHER_ENABLE;
+
+		/* Cull mode. */
+		if (!ctx->Polygon.CullFlag)
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_NONE;
+		else if (cull_mode == GL_FRONT_AND_BACK)
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_BOTH;
+		else
+			ctrl0 |= (cull_mode == GL_FRONT) ^ (front_face == GL_CCW) ?
+				NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_CW :
+				NV04_MULTITEX_TRIANGLE_CONTROL0_CULL_MODE_CCW;
+
+		/* Depth test. */
+		if (ctx->Depth.Test)
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_Z_ENABLE;
+
+		if (ctx->Depth.Mask)
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_Z_WRITE;
+
+		ctrl0 |= get_comparison_op(ctx->Depth.Func) << 16;
+
+		/* Alpha test. */
+		if (ctx->Color.AlphaEnabled)
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_ALPHA_ENABLE;
+
+		ctrl0 |= get_comparison_op(ctx->Color.AlphaFunc) << 8 |
+			FLOAT_TO_UBYTE(ctx->Color.AlphaRef);
+
+		/* Stencil test. */
+		if (ctx->Stencil.WriteMask[0])
+			ctrl0 |= NV04_MULTITEX_TRIANGLE_CONTROL0_STENCIL_WRITE;
+
+		if (ctx->Stencil.Enabled)
+			ctrl1 |= NV04_MULTITEX_TRIANGLE_CONTROL1_STENCIL_ENABLE;
+
+		ctrl1 |= get_comparison_op(ctx->Stencil.Function[0]) << 4 |
+			ctx->Stencil.Ref[0] << 8 |
+			ctx->Stencil.ValueMask[0] << 16 |
+			ctx->Stencil.WriteMask[0] << 24;
+
+		ctrl2 |= get_stencil_op(ctx->Stencil.ZPassFunc[0]) << 8 |
+			get_stencil_op(ctx->Stencil.ZFailFunc[0]) << 4 |
+			get_stencil_op(ctx->Stencil.FailFunc[0]);
+
+		BEGIN_RING(chan, fahrenheit, NV04_MULTITEX_TRIANGLE_CONTROL0, 3);
+		OUT_RING(chan, ctrl0);
+		OUT_RING(chan, ctrl1);
+		OUT_RING(chan, ctrl2);
+
+	} else {
+		int cull_mode = ctx->Polygon.CullFaceMode;
+		int front_face = ctx->Polygon.FrontFace;
+		uint32_t ctrl = 1 << 30 |
+			NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+
+		/* Dithering. */
+		if (ctx->Color.DitherFlag)
+			ctrl |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+
+		/* Cull mode. */
+		if (!ctx->Polygon.CullFlag)
+			ctrl |= NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_NONE;
+		else if (cull_mode == GL_FRONT_AND_BACK)
+			ctrl |= NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_BOTH;
+		else
+			ctrl |= (cull_mode == GL_FRONT) ^ (front_face == GL_CCW) ?
+				NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_CW :
+				NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_CCW;
+
+		/* Depth test. */
+		if (ctx->Depth.Test)
+			ctrl |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE;
+		if (ctx->Depth.Mask)
+			ctrl |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE;
+
+		ctrl |= get_comparison_op(ctx->Depth.Func) << 16;
+
+		/* Alpha test. */
+		if (ctx->Color.AlphaEnabled)
+			ctrl |= NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE;
+
+		ctrl |= get_comparison_op(ctx->Color.AlphaFunc) << 8 |
+			FLOAT_TO_UBYTE(ctx->Color.AlphaRef);
+
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+		OUT_RING(chan, ctrl);
+	}
+}
+
+void
+nv04_defer_blend(GLcontext *ctx, int emit)
+{
+	context_dirty(ctx, BLEND);
+}
+
+void
+nv04_emit_blend(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *fahrenheit = nv04_context_engine(ctx);
+
+	if (nv04_mtex_engine(fahrenheit)) {
+		uint32_t blend = 0x2 << 4 |
+			NV04_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE;
+
+		/* Alpha blending. */
+		blend |= get_blend_func(ctx->Color.BlendDstRGB) << 28 |
+			get_blend_func(ctx->Color.BlendSrcRGB) << 24;
+
+		if (ctx->Color.BlendEnabled)
+			blend |= NV04_MULTITEX_TRIANGLE_BLEND_BLEND_ENABLE;
+
+		/* Shade model. */
+		if (ctx->Light.ShadeModel == GL_SMOOTH)
+			blend |= NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+		else
+			blend |= NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_FLAT;
+
+		/* Secondary color */
+		if (NEED_SECONDARY_COLOR(ctx))
+			blend |= NV04_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE;
+
+		/* Fog. */
+		if (ctx->Fog.Enabled)
+			blend |= NV04_MULTITEX_TRIANGLE_BLEND_FOG_ENABLE;
+
+		BEGIN_RING(chan, fahrenheit, NV04_MULTITEX_TRIANGLE_BLEND, 1);
+		OUT_RING(chan, blend);
+
+		BEGIN_RING(chan, fahrenheit, NV04_MULTITEX_TRIANGLE_FOGCOLOR, 1);
+		OUT_RING(chan, pack_rgba_f(MESA_FORMAT_ARGB8888,
+					   ctx->Fog.Color));
+
+	} else {
+		uint32_t blend = 0x2 << 4 |
+			NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE;
+
+		/* Alpha blending. */
+		blend |= get_blend_func(ctx->Color.BlendDstRGB) << 28 |
+			get_blend_func(ctx->Color.BlendSrcRGB) << 24;
+
+		if (ctx->Color.BlendEnabled)
+			blend |= NV04_TEXTURED_TRIANGLE_BLEND_BLEND_ENABLE;
+
+		/* Shade model. */
+		if (ctx->Light.ShadeModel == GL_SMOOTH)
+			blend |= NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+		else
+			blend |= NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT;
+
+		/* Texture environment. */
+		if (ctx->Texture._EnabledUnits)
+			blend |= get_texenv_mode(ctx->Texture.Unit[0].EnvMode);
+		else
+			blend |= get_texenv_mode(GL_MODULATE);
+
+		/* Secondary color */
+		if (NEED_SECONDARY_COLOR(ctx))
+			blend |= NV04_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE;
+
+		/* Fog. */
+		if (ctx->Fog.Enabled)
+			blend |= NV04_TEXTURED_TRIANGLE_BLEND_FOG_ENABLE;
+
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
+		OUT_RING(chan, blend);
+
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+		OUT_RING(chan, pack_rgba_f(MESA_FORMAT_ARGB8888,
+					   ctx->Fog.Color));
+	}
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_tex.c b/src/mesa/drivers/dri/nouveau/nv04_state_tex.c
new file mode 100644
index 0000000000..6d8762b7d1
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_state_tex.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_texture.h"
+#include "nouveau_util.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+
+static uint32_t
+get_tex_format(struct gl_texture_image *ti)
+{
+	switch (ti->TexFormat) {
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_L8:
+	case MESA_FORMAT_I8:
+		return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_Y8;
+	case MESA_FORMAT_ARGB1555:
+		return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A1R5G5B5;
+	case MESA_FORMAT_ARGB4444:
+		return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A4R4G4B4;
+	case MESA_FORMAT_RGB565:
+		return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_R5G6B5;
+	case MESA_FORMAT_ARGB8888:
+		return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8;
+	case MESA_FORMAT_XRGB8888:
+		return NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_X8R8G8B8;
+	default:
+		assert(0);
+	}
+}
+
+static inline unsigned
+get_wrap_mode(unsigned wrap)
+{
+	switch (wrap) {
+	case GL_REPEAT:
+		return 0x1;
+	case GL_MIRRORED_REPEAT:
+		return 0x2;
+	case GL_CLAMP:
+	case GL_CLAMP_TO_EDGE:
+		return 0x3;
+	case GL_CLAMP_TO_BORDER:
+		return 0x4;
+	default:
+		assert(0);
+	}
+}
+
+void
+nv04_emit_tex_obj(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_OBJ0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *fahrenheit = nv04_context_engine(ctx);
+	struct nouveau_bo_context *bctx = context_bctx_i(ctx, TEXTURE, i);
+	const int bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART | NOUVEAU_BO_VRAM;
+	struct nouveau_surface *s;
+	uint32_t format = 0xa0, filter = 0x1010;
+
+	if (i && !nv04_mtex_engine(fahrenheit))
+		return;
+
+	if (ctx->Texture.Unit[i]._ReallyEnabled) {
+		struct gl_texture_object *t = ctx->Texture.Unit[i]._Current;
+		struct gl_texture_image *ti = t->Image[0][t->BaseLevel];
+		int lod_max = 1, lod_bias = 0;
+
+		if (!nouveau_texture_validate(ctx, t))
+			return;
+
+		s = &to_nouveau_texture(t)->surfaces[t->BaseLevel];
+
+		if (t->MinFilter != GL_NEAREST &&
+		    t->MinFilter != GL_LINEAR) {
+			lod_max = CLAMP(MIN2(t->MaxLod, t->_MaxLambda),
+					0, 15) + 1;
+
+			lod_bias = CLAMP(ctx->Texture.Unit[i].LodBias +
+					 t->LodBias, 0, 15);
+		}
+
+		format |= get_wrap_mode(t->WrapT) << 28 |
+			get_wrap_mode(t->WrapS) << 24 |
+			ti->HeightLog2 << 20 |
+			ti->WidthLog2 << 16 |
+			lod_max << 12 |
+			get_tex_format(ti);
+
+		filter |= log2i(t->MaxAnisotropy) << 31 |
+			nvgl_filter_mode(t->MagFilter) << 28 |
+			log2i(t->MaxAnisotropy) << 27 |
+			nvgl_filter_mode(t->MinFilter) << 24 |
+			lod_bias << 16;
+
+	} else {
+		s = &to_nv04_context(ctx)->dummy_texture;
+
+		format |= NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT |
+			NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_REPEAT |
+			1 << 12 |
+			NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_A8R8G8B8;
+
+		filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST |
+			NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+	}
+
+	if (nv04_mtex_engine(fahrenheit)) {
+		nouveau_bo_markl(bctx, fahrenheit,
+				 NV04_MULTITEX_TRIANGLE_OFFSET(i),
+				 s->bo, 0, bo_flags);
+
+		nouveau_bo_mark(bctx, fahrenheit,
+				NV04_MULTITEX_TRIANGLE_FORMAT(i),
+				s->bo, format, 0,
+				NV04_MULTITEX_TRIANGLE_FORMAT_DMA_A,
+				NV04_MULTITEX_TRIANGLE_FORMAT_DMA_B,
+				bo_flags | NOUVEAU_BO_OR);
+
+		BEGIN_RING(chan, fahrenheit, NV04_MULTITEX_TRIANGLE_FILTER(i), 1);
+		OUT_RING(chan, filter);
+
+	} else {
+		nouveau_bo_markl(bctx, fahrenheit,
+				 NV04_TEXTURED_TRIANGLE_OFFSET,
+				 s->bo, 0, bo_flags);
+
+		nouveau_bo_mark(bctx, fahrenheit,
+				NV04_TEXTURED_TRIANGLE_FORMAT,
+				s->bo, format, 0,
+				NV04_TEXTURED_TRIANGLE_FORMAT_DMA_A,
+				NV04_TEXTURED_TRIANGLE_FORMAT_DMA_B,
+				bo_flags | NOUVEAU_BO_OR);
+
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1);
+		OUT_RING(chan, 0);
+
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1);
+		OUT_RING(chan, filter);
+	}
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv04_surface.c b/src/mesa/drivers/dri/nouveau/nv04_surface.c
new file mode 100644
index 0000000000..e3febf7d2f
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv04_surface.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright (C) 2007-2010 The Nouveau Project.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_class.h"
+#include "nouveau_context.h"
+#include "nouveau_util.h"
+#include "nv04_driver.h"
+
+static inline int
+swzsurf_format(gl_format format)
+{
+	switch (format) {
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_L8:
+	case MESA_FORMAT_I8:
+	case MESA_FORMAT_RGB332:
+	case MESA_FORMAT_CI8:
+		return NV04_SWIZZLED_SURFACE_FORMAT_COLOR_Y8;
+
+	case MESA_FORMAT_RGB565:
+	case MESA_FORMAT_RGB565_REV:
+	case MESA_FORMAT_ARGB4444:
+	case MESA_FORMAT_ARGB4444_REV:
+	case MESA_FORMAT_ARGB1555:
+	case MESA_FORMAT_RGBA5551:
+	case MESA_FORMAT_ARGB1555_REV:
+	case MESA_FORMAT_AL88:
+	case MESA_FORMAT_AL88_REV:
+	case MESA_FORMAT_YCBCR:
+	case MESA_FORMAT_YCBCR_REV:
+	case MESA_FORMAT_Z16:
+		return NV04_SWIZZLED_SURFACE_FORMAT_COLOR_R5G6B5;
+
+	case MESA_FORMAT_RGBA8888:
+	case MESA_FORMAT_RGBA8888_REV:
+	case MESA_FORMAT_XRGB8888:
+	case MESA_FORMAT_ARGB8888:
+	case MESA_FORMAT_ARGB8888_REV:
+	case MESA_FORMAT_S8_Z24:
+	case MESA_FORMAT_Z24_S8:
+	case MESA_FORMAT_Z32:
+		return NV04_SWIZZLED_SURFACE_FORMAT_COLOR_A8R8G8B8;
+
+	default:
+		assert(0);
+	}
+}
+
+static inline int
+surf2d_format(gl_format format)
+{
+	switch (format) {
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_L8:
+	case MESA_FORMAT_I8:
+	case MESA_FORMAT_RGB332:
+	case MESA_FORMAT_CI8:
+		return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
+
+	case MESA_FORMAT_RGB565:
+	case MESA_FORMAT_RGB565_REV:
+	case MESA_FORMAT_ARGB4444:
+	case MESA_FORMAT_ARGB4444_REV:
+	case MESA_FORMAT_ARGB1555:
+	case MESA_FORMAT_RGBA5551:
+	case MESA_FORMAT_ARGB1555_REV:
+	case MESA_FORMAT_AL88:
+	case MESA_FORMAT_AL88_REV:
+	case MESA_FORMAT_YCBCR:
+	case MESA_FORMAT_YCBCR_REV:
+	case MESA_FORMAT_Z16:
+		return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
+
+	case MESA_FORMAT_RGBA8888:
+	case MESA_FORMAT_RGBA8888_REV:
+	case MESA_FORMAT_XRGB8888:
+	case MESA_FORMAT_ARGB8888:
+	case MESA_FORMAT_ARGB8888_REV:
+	case MESA_FORMAT_S8_Z24:
+	case MESA_FORMAT_Z24_S8:
+	case MESA_FORMAT_Z32:
+		return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
+
+	default:
+		assert(0);
+	}
+}
+
+static inline int
+rect_format(gl_format format)
+{
+	switch (format) {
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_L8:
+	case MESA_FORMAT_I8:
+	case MESA_FORMAT_RGB332:
+	case MESA_FORMAT_CI8:
+		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+
+	case MESA_FORMAT_RGB565:
+	case MESA_FORMAT_RGB565_REV:
+	case MESA_FORMAT_ARGB4444:
+	case MESA_FORMAT_ARGB4444_REV:
+	case MESA_FORMAT_ARGB1555:
+	case MESA_FORMAT_RGBA5551:
+	case MESA_FORMAT_ARGB1555_REV:
+	case MESA_FORMAT_AL88:
+	case MESA_FORMAT_AL88_REV:
+	case MESA_FORMAT_YCBCR:
+	case MESA_FORMAT_YCBCR_REV:
+	case MESA_FORMAT_Z16:
+		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+
+	case MESA_FORMAT_RGBA8888:
+	case MESA_FORMAT_RGBA8888_REV:
+	case MESA_FORMAT_XRGB8888:
+	case MESA_FORMAT_ARGB8888:
+	case MESA_FORMAT_ARGB8888_REV:
+	case MESA_FORMAT_S8_Z24:
+	case MESA_FORMAT_Z24_S8:
+	case MESA_FORMAT_Z32:
+		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+
+	default:
+		assert(0);
+	}
+}
+
+static inline int
+sifm_format(gl_format format)
+{
+	switch (format) {
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_L8:
+	case MESA_FORMAT_I8:
+	case MESA_FORMAT_RGB332:
+	case MESA_FORMAT_CI8:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_AY8;
+
+	case MESA_FORMAT_RGB565:
+	case MESA_FORMAT_RGB565_REV:
+	case MESA_FORMAT_ARGB4444:
+	case MESA_FORMAT_ARGB4444_REV:
+	case MESA_FORMAT_ARGB1555:
+	case MESA_FORMAT_RGBA5551:
+	case MESA_FORMAT_ARGB1555_REV:
+	case MESA_FORMAT_AL88:
+	case MESA_FORMAT_AL88_REV:
+	case MESA_FORMAT_YCBCR:
+	case MESA_FORMAT_YCBCR_REV:
+	case MESA_FORMAT_Z16:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
+
+	case MESA_FORMAT_RGBA8888:
+	case MESA_FORMAT_RGBA8888_REV:
+	case MESA_FORMAT_XRGB8888:
+	case MESA_FORMAT_ARGB8888:
+	case MESA_FORMAT_ARGB8888_REV:
+	case MESA_FORMAT_S8_Z24:
+	case MESA_FORMAT_Z24_S8:
+	case MESA_FORMAT_Z32:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
+
+	default:
+		assert(0);
+	}
+}
+
+static void
+nv04_surface_copy_swizzle(GLcontext *ctx,
+			  struct nouveau_surface *dst,
+			  struct nouveau_surface *src,
+			  int dx, int dy, int sx, int sy,
+			  int w, int h)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	struct nouveau_grobj *swzsurf = hw->swzsurf;
+	struct nouveau_grobj *sifm = hw->sifm;
+	struct nouveau_bo_context *bctx = context_bctx(ctx, SURFACE);
+	const unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART;
+	/* Max width & height may not be the same on all HW, but must be POT */
+	const unsigned max_w = 1024;
+	const unsigned max_h = 1024;
+	unsigned sub_w = w > max_w ? max_w : w;
+	unsigned sub_h = h > max_h ? max_h : h;
+	unsigned x, y;
+
+        /* Swizzled surfaces must be POT  */
+	assert(_mesa_is_pow_two(dst->width) &&
+	       _mesa_is_pow_two(dst->height));
+
+        /* If area is too large to copy in one shot we must copy it in
+	 * POT chunks to meet alignment requirements */
+	assert(sub_w == w || _mesa_is_pow_two(w));
+	assert(sub_h == h || _mesa_is_pow_two(h));
+
+	nouveau_bo_marko(bctx, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE,
+			 src->bo, bo_flags | NOUVEAU_BO_RD);
+	nouveau_bo_marko(bctx, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE,
+			 dst->bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	nouveau_bo_markl(bctx, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET,
+			 dst->bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+	BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
+	OUT_RING  (chan, swzsurf_format(dst->format) |
+		   log2i(dst->width) << 16 |
+		   log2i(dst->height) << 24);
+
+	BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
+	OUT_RING  (chan, swzsurf->handle);
+
+	for (y = 0; y < h; y += sub_h) {
+		sub_h = MIN2(sub_h, h - y);
+
+		for (x = 0; x < w; x += sub_w) {
+			sub_w = MIN2(sub_w, w - x);
+
+			MARK_RING(chan, 15, 1);
+
+			BEGIN_RING(chan, sifm,
+				   NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT, 8);
+			OUT_RING(chan, sifm_format(src->format));
+			OUT_RING(chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
+			OUT_RING(chan, (y + dy) << 16 | (x + dx));
+			OUT_RING(chan, sub_h << 16 | sub_w);
+			OUT_RING(chan, (y + dy) << 16 | (x + dx));
+			OUT_RING(chan, sub_h << 16 | sub_w);
+			OUT_RING(chan, 1 << 20);
+			OUT_RING(chan, 1 << 20);
+
+			BEGIN_RING(chan, sifm,
+				   NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
+			OUT_RING(chan, sub_h << 16 | sub_w);
+			OUT_RING(chan, src->pitch  |
+				 NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
+				 NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
+			OUT_RELOCl(chan, src->bo, src->offset +
+				   (y + sy) * src->pitch +
+				   (x + sx) * src->cpp,
+				   bo_flags | NOUVEAU_BO_RD);
+			OUT_RING(chan, 0);
+		}
+	}
+
+	nouveau_bo_context_reset(bctx);
+
+	if (context_chipset(ctx) < 0x10)
+		FIRE_RING(chan);
+}
+
+static void
+nv04_surface_copy_m2mf(GLcontext *ctx,
+		       struct nouveau_surface *dst,
+		       struct nouveau_surface *src,
+		       int dx, int dy, int sx, int sy,
+		       int w, int h)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	struct nouveau_grobj *m2mf = hw->m2mf;
+	struct nouveau_bo_context *bctx = context_bctx(ctx, SURFACE);
+	const unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART;
+	unsigned dst_offset = dst->offset + dy * dst->pitch + dx * dst->cpp;
+	unsigned src_offset = src->offset + sy * src->pitch + sx * src->cpp;
+
+	nouveau_bo_marko(bctx, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN,
+			 src->bo, bo_flags | NOUVEAU_BO_RD);
+	nouveau_bo_marko(bctx, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_OUT,
+			 dst->bo, bo_flags | NOUVEAU_BO_WR);
+
+	while (h) {
+		int count = (h > 2047) ? 2047 : h;
+
+		MARK_RING(chan, 9, 2);
+
+		BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+		OUT_RELOCl(chan, src->bo, src_offset,
+			   bo_flags | NOUVEAU_BO_RD);
+		OUT_RELOCl(chan, dst->bo, dst_offset,
+			   bo_flags | NOUVEAU_BO_WR);
+		OUT_RING  (chan, src->pitch);
+		OUT_RING  (chan, dst->pitch);
+		OUT_RING  (chan, w * src->cpp);
+		OUT_RING  (chan, count);
+		OUT_RING  (chan, 0x0101);
+		OUT_RING  (chan, 0);
+
+		h -= count;
+		src_offset += src->pitch * count;
+		dst_offset += dst->pitch * count;
+	}
+
+	nouveau_bo_context_reset(bctx);
+
+	if (context_chipset(ctx) < 0x10)
+		FIRE_RING(chan);
+}
+
+typedef unsigned (*get_offset_t)(struct nouveau_surface *s,
+				 unsigned x, unsigned y);
+
+static unsigned
+get_linear_offset(struct nouveau_surface *s, unsigned x, unsigned y)
+{
+	return x * s->cpp + y * s->pitch;
+}
+
+static unsigned
+get_swizzled_offset(struct nouveau_surface *s, unsigned x, unsigned y)
+{
+	unsigned k = log2i(MIN2(s->width, s->height));
+
+	unsigned u = (x & 0x001) << 0 |
+		(x & 0x002) << 1 |
+		(x & 0x004) << 2 |
+		(x & 0x008) << 3 |
+		(x & 0x010) << 4 |
+		(x & 0x020) << 5 |
+		(x & 0x040) << 6 |
+		(x & 0x080) << 7 |
+		(x & 0x100) << 8 |
+		(x & 0x200) << 9 |
+		(x & 0x400) << 10 |
+		(x & 0x800) << 11;
+
+	unsigned v = (y & 0x001) << 1 |
+		(y & 0x002) << 2 |
+		(y & 0x004) << 3 |
+		(y & 0x008) << 4 |
+		(y & 0x010) << 5 |
+		(y & 0x020) << 6 |
+		(y & 0x040) << 7 |
+		(y & 0x080) << 8 |
+		(y & 0x100) << 9 |
+		(y & 0x200) << 10 |
+		(y & 0x400) << 11 |
+		(y & 0x800) << 12;
+
+	return s->cpp * (((u | v) & ~(~0 << 2*k)) |
+			 (x & (~0 << k)) << k |
+			 (y & (~0 << k)) << k);
+}
+
+static void
+nv04_surface_copy_cpu(GLcontext *ctx,
+		      struct nouveau_surface *dst,
+		      struct nouveau_surface *src,
+		      int dx, int dy, int sx, int sy,
+		      int w, int h)
+{
+	int x, y;
+	get_offset_t get_dst = (dst->layout == SWIZZLED ?
+				get_swizzled_offset : get_linear_offset);
+	get_offset_t get_src = (src->layout == SWIZZLED ?
+				get_swizzled_offset : get_linear_offset);
+	void *dp, *sp;
+
+	nouveau_bo_map(dst->bo, NOUVEAU_BO_WR);
+	nouveau_bo_map(src->bo, NOUVEAU_BO_RD);
+
+	dp = dst->bo->map + dst->offset;
+	sp = src->bo->map + src->offset;
+
+	for (y = 0; y < h; y++) {
+		for (x = 0; x < w; x++) {
+			memcpy(dp + get_dst(dst, dx + x, dy + y),
+			       sp + get_src(src, sx + x, sy + y), dst->cpp);
+		}
+	}
+
+	nouveau_bo_unmap(src->bo);
+	nouveau_bo_unmap(dst->bo);
+}
+
+void
+nv04_surface_copy(GLcontext *ctx,
+		  struct nouveau_surface *dst,
+		  struct nouveau_surface *src,
+		  int dx, int dy, int sx, int sy,
+		  int w, int h)
+{
+	/* Linear texture copy. */
+	if ((src->layout == LINEAR && dst->layout == LINEAR) ||
+	    dst->width <= 2 || dst->height <= 1) {
+		nv04_surface_copy_m2mf(ctx, dst, src, dx, dy, sx, sy, w, h);
+		return;
+	}
+
+	/* Swizzle using sifm+swzsurf. */
+        if (src->layout == LINEAR && dst->layout == SWIZZLED &&
+	    dst->cpp != 1 && !(dst->offset & 63)) {
+		nv04_surface_copy_swizzle(ctx, dst, src, dx, dy, sx, sy, w, h);
+		return;
+	}
+
+	/* Fallback to CPU copy. */
+	nv04_surface_copy_cpu(ctx, dst, src, dx, dy, sx, sy, w, h);
+}
+
+void
+nv04_surface_fill(GLcontext *ctx,
+		  struct nouveau_surface *dst,
+		  unsigned mask, unsigned value,
+		  int dx, int dy, int w, int h)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	struct nouveau_grobj *surf2d = hw->surf2d;
+	struct nouveau_grobj *patt = hw->patt;
+	struct nouveau_grobj *rect = hw->rect;
+	unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART;
+
+	MARK_RING (chan, 19, 4);
+
+	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+	OUT_RELOCo(chan, dst->bo, bo_flags | NOUVEAU_BO_WR);
+	OUT_RELOCo(chan, dst->bo, bo_flags | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+	OUT_RING  (chan, surf2d_format(dst->format));
+	OUT_RING  (chan, (dst->pitch << 16) | dst->pitch);
+	OUT_RELOCl(chan, dst->bo, dst->offset, bo_flags | NOUVEAU_BO_WR);
+	OUT_RELOCl(chan, dst->bo, dst->offset, bo_flags | NOUVEAU_BO_WR);
+
+	BEGIN_RING(chan, patt, NV04_IMAGE_PATTERN_COLOR_FORMAT, 1);
+	OUT_RING  (chan, rect_format(dst->format));
+	BEGIN_RING(chan, patt, NV04_IMAGE_PATTERN_MONOCHROME_COLOR1, 1);
+	OUT_RING  (chan, mask | ~0ll << (8 * dst->cpp));
+
+	BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
+	OUT_RING  (chan, rect_format(dst->format));
+	BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
+	OUT_RING  (chan, value);
+	BEGIN_RING(chan, rect,
+		   NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
+	OUT_RING  (chan, (dx << 16) | dy);
+	OUT_RING  (chan, ( w << 16) |  h);
+
+	if (context_chipset(ctx) < 0x10)
+		FIRE_RING(chan);
+}
+
+void
+nv04_surface_takedown(GLcontext *ctx)
+{
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+
+	nouveau_grobj_free(&hw->swzsurf);
+	nouveau_grobj_free(&hw->sifm);
+	nouveau_grobj_free(&hw->rect);
+	nouveau_grobj_free(&hw->rop);
+	nouveau_grobj_free(&hw->patt);
+	nouveau_grobj_free(&hw->surf2d);
+	nouveau_grobj_free(&hw->m2mf);
+	nouveau_notifier_free(&hw->ntfy);
+}
+
+GLboolean
+nv04_surface_init(GLcontext *ctx)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	unsigned handle = 0x88000000, class;
+	int ret;
+
+	/* Notifier object. */
+	ret = nouveau_notifier_alloc(chan, handle++, 1, &hw->ntfy);
+	if (ret)
+		goto fail;
+
+	/* Memory to memory format. */
+	ret = nouveau_grobj_alloc(chan, handle++, NV04_MEMORY_TO_MEMORY_FORMAT,
+				  &hw->m2mf);
+	if (ret)
+		goto fail;
+
+	BEGIN_RING(chan, hw->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
+	OUT_RING  (chan, hw->ntfy->handle);
+
+	/* Context surfaces 2D. */
+	if (context_chipset(ctx) < 0x10)
+		class = NV04_CONTEXT_SURFACES_2D;
+	else
+		class = NV10_CONTEXT_SURFACES_2D;
+
+	ret = nouveau_grobj_alloc(chan, handle++, class, &hw->surf2d);
+	if (ret)
+		goto fail;
+
+	/* Raster op. */
+	ret = nouveau_grobj_alloc(chan, handle++, NV03_CONTEXT_ROP, &hw->rop);
+	if (ret)
+		goto fail;
+
+	BEGIN_RING(chan, hw->rop, NV03_CONTEXT_ROP_DMA_NOTIFY, 1);
+	OUT_RING  (chan, hw->ntfy->handle);
+
+	BEGIN_RING(chan, hw->rop, NV03_CONTEXT_ROP_ROP, 1);
+	OUT_RING  (chan, 0xca); /* DPSDxax in the GDI speech. */
+
+	/* Image pattern. */
+	ret = nouveau_grobj_alloc(chan, handle++, NV04_IMAGE_PATTERN,
+				  &hw->patt);
+	if (ret)
+		goto fail;
+
+	BEGIN_RING(chan, hw->patt, NV04_IMAGE_PATTERN_DMA_NOTIFY, 1);
+	OUT_RING  (chan, hw->ntfy->handle);
+
+	BEGIN_RING(chan, hw->patt, NV04_IMAGE_PATTERN_MONOCHROME_FORMAT, 3);
+	OUT_RING  (chan, NV04_IMAGE_PATTERN_MONOCHROME_FORMAT_LE);
+	OUT_RING  (chan, NV04_IMAGE_PATTERN_MONOCHROME_SHAPE_8X8);
+	OUT_RING  (chan, NV04_IMAGE_PATTERN_PATTERN_SELECT_MONO);
+
+	BEGIN_RING(chan, hw->patt, NV04_IMAGE_PATTERN_MONOCHROME_COLOR0, 4);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, ~0);
+	OUT_RING  (chan, ~0);
+
+	/* GDI rectangle text. */
+	ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
+				  &hw->rect);
+	if (ret)
+		goto fail;
+
+	BEGIN_RING(chan, hw->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
+	OUT_RING  (chan, hw->ntfy->handle);
+	BEGIN_RING(chan, hw->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
+	OUT_RING  (chan, hw->surf2d->handle);
+	BEGIN_RING(chan, hw->rect, NV04_GDI_RECTANGLE_TEXT_ROP, 1);
+	OUT_RING  (chan, hw->rop->handle);
+	BEGIN_RING(chan, hw->rect, NV04_GDI_RECTANGLE_TEXT_PATTERN, 1);
+	OUT_RING  (chan, hw->patt->handle);
+
+	BEGIN_RING(chan, hw->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
+	OUT_RING  (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_ROP_AND);
+	BEGIN_RING(chan, hw->rect,
+		   NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
+	OUT_RING  (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
+
+	/* Swizzled surface. */
+	if (context_chipset(ctx) < 0x20)
+		class = NV04_SWIZZLED_SURFACE;
+	else
+		class = NV20_SWIZZLED_SURFACE;
+
+	ret = nouveau_grobj_alloc(chan, handle++, class, &hw->swzsurf);
+	if (ret)
+		goto fail;
+
+	/* Scaled image from memory. */
+	if  (context_chipset(ctx) < 0x10)
+		class = NV04_SCALED_IMAGE_FROM_MEMORY;
+	else
+		class = NV10_SCALED_IMAGE_FROM_MEMORY;
+
+	ret = nouveau_grobj_alloc(chan, handle++, class, &hw->sifm);
+	if (ret)
+		goto fail;
+
+	if (context_chipset(ctx) >= 0x10) {
+		BEGIN_RING(chan, hw->sifm,
+			   NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 1);
+		OUT_RING(chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
+	}
+
+	return GL_TRUE;
+
+fail:
+	nv04_surface_takedown(ctx);
+	return GL_FALSE;
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c
new file mode 100644
index 0000000000..b6d10361de
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
@@ -0,0 +1,426 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+#include "nv10_driver.h"
+
+static const struct dri_extension nv10_extensions[] = {
+	{ "GL_EXT_texture_rectangle",	NULL },
+	{ NULL,				NULL }
+};
+
+static void
+nv10_clear(GLcontext *ctx, GLbitfield buffers)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
+		ctx->DrawBuffer);
+
+	nouveau_validate_framebuffer(ctx);
+
+	/* Clear the LMA depth buffer, if present. */
+	if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask &&
+	    nfb->lma_bo) {
+		struct nouveau_surface *s = &to_nouveau_renderbuffer(
+			nfb->base._DepthBuffer->Wrapped)->surface;
+
+		BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
+		OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
+		BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
+		OUT_RING(chan, 1);
+	}
+
+	nouveau_clear(ctx, buffers);
+}
+
+static void
+nv10_hwctx_init(GLcontext *ctx)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	int i;
+
+	BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1);
+	OUT_RING(chan, hw->ntfy->handle);
+
+	BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 3);
+	OUT_RING(chan, chan->vram->handle);
+	OUT_RING(chan, chan->gart->handle);
+	OUT_RING(chan, chan->gart->handle);
+	BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2);
+	OUT_RING(chan, chan->vram->handle);
+	OUT_RING(chan, chan->vram->handle);
+
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING(chan, 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+	OUT_RING(chan, 0x7ff << 16 | 0x800);
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+	OUT_RING(chan, 0x7ff << 16 | 0x800);
+
+	for (i = 1; i < 8; i++) {
+		BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+		OUT_RING(chan, 0);
+		BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
+		OUT_RING(chan, 0);
+	}
+
+	BEGIN_RING(chan, celsius, 0x290, 1);
+	OUT_RING(chan, 0x10 << 16 | 1);
+	BEGIN_RING(chan, celsius, 0x3f4, 1);
+	OUT_RING(chan, 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING(chan, 0);
+
+	if (context_chipset(ctx) >= 0x17) {
+		BEGIN_RING(chan, celsius, NV17TCL_DMA_IN_MEMORY4, 2);
+		OUT_RING(chan, chan->vram->handle);
+		OUT_RING(chan, chan->vram->handle);
+
+		BEGIN_RING(chan, celsius, 0xd84, 1);
+		OUT_RING(chan, 0x3);
+
+		BEGIN_RING(chan, celsius, NV17TCL_COLOR_MASK_ENABLE, 1);
+		OUT_RING(chan, 1);
+	}
+
+	if (context_chipset(ctx) >= 0x11) {
+		BEGIN_RING(chan, celsius, 0x120, 3);
+		OUT_RING(chan, 0);
+		OUT_RING(chan, 1);
+		OUT_RING(chan, 2);
+
+		BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+		OUT_RING(chan, 0);
+	}
+
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING(chan, 0);
+
+	/* Set state */
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+	OUT_RING(chan, 0x207);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2);
+	OUT_RING(chan, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4);
+	OUT_RING(chan, 1);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0x8006);
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8);
+	OUT_RING(chan, 0xff);
+	OUT_RING(chan, 0x207);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0xff);
+	OUT_RING(chan, 0x1e00);
+	OUT_RING(chan, 0x1e00);
+	OUT_RING(chan, 0x1e00);
+	OUT_RING(chan, 0x1d01);
+	BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_SEPARATE_SPECULAR_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+	OUT_RING(chan, 0x201);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+	OUT_RING(chan, 8);
+	BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1);
+	OUT_RING(chan, 8);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+	OUT_RING(chan, 0x1b02);
+	OUT_RING(chan, 0x1b02);
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2);
+	OUT_RING(chan, 0x405);
+	OUT_RING(chan, 0x901);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_MODE_S(0), 8);
+	for (i = 0; i < 8; i++)
+		OUT_RING(chan, 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_TX_MATRIX_ENABLE(0), 2);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+	OUT_RING(chan, 0x3fc00000);	/* -1.50 */
+	OUT_RING(chan, 0xbdb8aa0a);	/* -0.09 */
+	OUT_RING(chan, 0);		/*  0.00 */
+
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING(chan, 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2);
+	OUT_RING(chan, 0x802);
+	OUT_RING(chan, 2);
+	/* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
+	 * using texturing, except when using the texture matrix
+	 */
+	BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
+	OUT_RING(chan, 6);
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+	OUT_RING(chan, 0x01010101);
+
+	/* Set vertex component */
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
+	OUT_RING(chan, 0);
+	OUT_RING(chan, 0);
+	OUT_RINGf(chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1);
+	OUT_RINGf(chan, 0.0);
+	BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
+	OUT_RING(chan, 1);
+
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+	OUT_RING(chan, 0.0);
+	OUT_RINGf(chan, 16777216.0);
+
+	FIRE_RING(chan);
+}
+
+static void
+nv10_context_destroy(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	nv04_surface_takedown(ctx);
+	nv10_render_destroy(ctx);
+
+	nouveau_grobj_free(&nctx->hw.eng3d);
+
+	nouveau_context_deinit(ctx);
+	FREE(ctx);
+}
+
+static GLcontext *
+nv10_context_create(struct nouveau_screen *screen, const GLvisual *visual,
+		    GLcontext *share_ctx)
+{
+	struct nouveau_context *nctx;
+	GLcontext *ctx;
+	unsigned celsius_class;
+	int ret;
+
+	nctx = CALLOC_STRUCT(nouveau_context);
+	if (!nctx)
+		return NULL;
+
+	ctx = &nctx->base;
+
+	if (!nouveau_context_init(ctx, screen, visual, share_ctx))
+		goto fail;
+
+	driInitExtensions(ctx, nv10_extensions, GL_FALSE);
+
+	/* GL constants. */
+	ctx->Const.MaxTextureLevels = 12;
+	ctx->Const.MaxTextureCoordUnits = NV10_TEXTURE_UNITS;
+	ctx->Const.MaxTextureImageUnits = NV10_TEXTURE_UNITS;
+	ctx->Const.MaxTextureUnits = NV10_TEXTURE_UNITS;
+	ctx->Const.MaxTextureMaxAnisotropy = 2;
+	ctx->Const.MaxTextureLodBias = 15;
+	ctx->Driver.Clear = nv10_clear;
+
+	/* 2D engine. */
+	ret = nv04_surface_init(ctx);
+	if (!ret)
+		goto fail;
+
+	/* 3D engine. */
+	if (context_chipset(ctx) >= 0x17)
+		celsius_class = NV17TCL;
+	else if (context_chipset(ctx) >= 0x11)
+		celsius_class = NV11TCL;
+	else
+		celsius_class = NV10TCL;
+
+	ret = nouveau_grobj_alloc(context_chan(ctx), 0xbeef0001, celsius_class,
+				  &nctx->hw.eng3d);
+	if (ret)
+		goto fail;
+
+	nv10_hwctx_init(ctx);
+	nv10_render_init(ctx);
+
+	return ctx;
+
+fail:
+	nv10_context_destroy(ctx);
+	return NULL;
+}
+
+const struct nouveau_driver nv10_driver = {
+	.context_create = nv10_context_create,
+	.context_destroy = nv10_context_destroy,
+	.surface_copy = nv04_surface_copy,
+	.surface_fill = nv04_surface_fill,
+	.emit = (nouveau_state_func[]) {
+		nv10_emit_alpha_func,
+		nv10_emit_blend_color,
+		nv10_emit_blend_equation,
+		nv10_emit_blend_func,
+		nv10_emit_clip_plane,
+		nv10_emit_clip_plane,
+		nv10_emit_clip_plane,
+		nv10_emit_clip_plane,
+		nv10_emit_clip_plane,
+		nv10_emit_clip_plane,
+		nv10_emit_color_mask,
+		nv10_emit_color_material,
+		nv10_emit_cull_face,
+		nv10_emit_front_face,
+		nv10_emit_depth,
+		nv10_emit_dither,
+		nv10_emit_frag,
+		nv10_emit_framebuffer,
+		nv10_emit_fog,
+		nv10_emit_light_enable,
+		nv10_emit_light_model,
+		nv10_emit_light_source,
+		nv10_emit_light_source,
+		nv10_emit_light_source,
+		nv10_emit_light_source,
+		nv10_emit_light_source,
+		nv10_emit_light_source,
+		nv10_emit_light_source,
+		nv10_emit_light_source,
+		nv10_emit_line_stipple,
+		nv10_emit_line_mode,
+		nv10_emit_logic_opcode,
+		nv10_emit_material_ambient,
+		nouveau_emit_nothing,
+		nv10_emit_material_diffuse,
+		nouveau_emit_nothing,
+		nv10_emit_material_specular,
+		nouveau_emit_nothing,
+		nv10_emit_material_shininess,
+		nouveau_emit_nothing,
+		nv10_emit_modelview,
+		nv10_emit_point_mode,
+		nv10_emit_point_parameter,
+		nv10_emit_polygon_mode,
+		nv10_emit_polygon_offset,
+		nv10_emit_polygon_stipple,
+		nv10_emit_projection,
+		nv10_emit_render_mode,
+		nv10_emit_scissor,
+		nv10_emit_shade_model,
+		nv10_emit_stencil_func,
+		nv10_emit_stencil_mask,
+		nv10_emit_stencil_op,
+		nv10_emit_tex_env,
+		nv10_emit_tex_env,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv10_emit_tex_gen,
+		nv10_emit_tex_gen,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv10_emit_tex_mat,
+		nv10_emit_tex_mat,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv10_emit_tex_obj,
+		nv10_emit_tex_obj,
+		nouveau_emit_nothing,
+		nouveau_emit_nothing,
+		nv10_emit_viewport
+	},
+	.num_emit = NUM_NOUVEAU_STATE,
+};
diff --git a/src/mesa/drivers/dri/nouveau/nv10_driver.h b/src/mesa/drivers/dri/nouveau/nv10_driver.h
new file mode 100644
index 0000000000..cefd6c6fba
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_driver.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NV10_DRIVER_H__
+#define __NV10_DRIVER_H__
+
+#define NV10_TEXTURE_UNITS 2
+
+/* nv10_context.c */
+extern const struct nouveau_driver nv10_driver;
+
+/* nv10_render.c */
+void
+nv10_render_init(GLcontext *ctx);
+
+void
+nv10_render_destroy(GLcontext *ctx);
+
+/* nv10_state_fb.c */
+void
+nv10_emit_framebuffer(GLcontext *ctx, int emit);
+
+void
+nv10_emit_render_mode(GLcontext *ctx, int emit);
+
+void
+nv10_emit_scissor(GLcontext *ctx, int emit);
+
+void
+nv10_emit_viewport(GLcontext *ctx, int emit);
+
+/* nv10_state_polygon.c */
+void
+nv10_emit_cull_face(GLcontext *ctx, int emit);
+
+void
+nv10_emit_front_face(GLcontext *ctx, int emit);
+
+void
+nv10_emit_line_mode(GLcontext *ctx, int emit);
+
+void
+nv10_emit_line_stipple(GLcontext *ctx, int emit);
+
+void
+nv10_emit_point_mode(GLcontext *ctx, int emit);
+
+void
+nv10_emit_polygon_mode(GLcontext *ctx, int emit);
+
+void
+nv10_emit_polygon_offset(GLcontext *ctx, int emit);
+
+void
+nv10_emit_polygon_stipple(GLcontext *ctx, int emit);
+
+/* nv10_state_raster.c */
+void
+nv10_emit_alpha_func(GLcontext *ctx, int emit);
+
+void
+nv10_emit_blend_color(GLcontext *ctx, int emit);
+
+void
+nv10_emit_blend_equation(GLcontext *ctx, int emit);
+
+void
+nv10_emit_blend_func(GLcontext *ctx, int emit);
+
+void
+nv10_emit_color_mask(GLcontext *ctx, int emit);
+
+void
+nv10_emit_depth(GLcontext *ctx, int emit);
+
+void
+nv10_emit_dither(GLcontext *ctx, int emit);
+
+void
+nv10_emit_logic_opcode(GLcontext *ctx, int emit);
+
+void
+nv10_emit_shade_model(GLcontext *ctx, int emit);
+
+void
+nv10_emit_stencil_func(GLcontext *ctx, int emit);
+
+void
+nv10_emit_stencil_mask(GLcontext *ctx, int emit);
+
+void
+nv10_emit_stencil_op(GLcontext *ctx, int emit);
+
+/* nv10_state_frag.c */
+void
+nv10_get_general_combiner(GLcontext *ctx, int i,
+			  uint32_t *a_in, uint32_t *a_out,
+			  uint32_t *c_in, uint32_t *c_out, uint32_t *k);
+
+void
+nv10_get_final_combiner(GLcontext *ctx, uint64_t *in, int *n);
+
+void
+nv10_emit_tex_env(GLcontext *ctx, int emit);
+
+void
+nv10_emit_frag(GLcontext *ctx, int emit);
+
+/* nv10_state_tex.c */
+void
+nv10_emit_tex_gen(GLcontext *ctx, int emit);
+
+void
+nv10_emit_tex_mat(GLcontext *ctx, int emit);
+
+void
+nv10_emit_tex_obj(GLcontext *ctx, int emit);
+
+/* nv10_state_tnl.c */
+void
+nv10_get_fog_coeff(GLcontext *ctx, float k[3]);
+
+void
+nv10_get_spot_coeff(struct gl_light *l, float k[7]);
+
+void
+nv10_get_shininess_coeff(float s, float k[6]);
+
+void
+nv10_emit_clip_plane(GLcontext *ctx, int emit);
+
+void
+nv10_emit_color_material(GLcontext *ctx, int emit);
+
+void
+nv10_emit_fog(GLcontext *ctx, int emit);
+
+void
+nv10_emit_light_enable(GLcontext *ctx, int emit);
+
+void
+nv10_emit_light_model(GLcontext *ctx, int emit);
+
+void
+nv10_emit_light_source(GLcontext *ctx, int emit);
+
+void
+nv10_emit_material_ambient(GLcontext *ctx, int emit);
+
+void
+nv10_emit_material_diffuse(GLcontext *ctx, int emit);
+
+void
+nv10_emit_material_specular(GLcontext *ctx, int emit);
+
+void
+nv10_emit_material_shininess(GLcontext *ctx, int emit);
+
+void
+nv10_emit_modelview(GLcontext *ctx, int emit);
+
+void
+nv10_emit_point_parameter(GLcontext *ctx, int emit);
+
+void
+nv10_emit_projection(GLcontext *ctx, int emit);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nv10_render.c b/src/mesa/drivers/dri/nouveau/nv10_render.c
new file mode 100644
index 0000000000..54245ea6ba
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_render.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_class.h"
+#include "nv10_driver.h"
+
+#define NUM_VERTEX_ATTRS 8
+
+static void
+nv10_emit_material(GLcontext *ctx, struct nouveau_array_state *a,
+		   const void *v);
+
+/* Vertex attribute format. */
+static struct nouveau_attr_info nv10_vertex_attrs[VERT_ATTRIB_MAX] = {
+	[VERT_ATTRIB_POS] = {
+		.vbo_index = 0,
+		.imm_method = NV10TCL_VERTEX_POS_4F_X,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_COLOR0] = {
+		.vbo_index = 1,
+		.imm_method = NV10TCL_VERTEX_COL_4F_R,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_COLOR1] = {
+		.vbo_index = 2,
+		.imm_method = NV10TCL_VERTEX_COL2_3F_R,
+		.imm_fields = 3,
+	},
+	[VERT_ATTRIB_TEX0] = {
+		.vbo_index = 3,
+		.imm_method = NV10TCL_VERTEX_TX0_4F_S,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_TEX1] = {
+		.vbo_index = 4,
+		.imm_method = NV10TCL_VERTEX_TX1_4F_S,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_NORMAL] = {
+		.vbo_index = 5,
+		.imm_method = NV10TCL_VERTEX_NOR_3F_X,
+		.imm_fields = 3,
+	},
+	[VERT_ATTRIB_FOG] = {
+		.vbo_index = 7,
+		.imm_method = NV10TCL_VERTEX_FOG_1F,
+		.imm_fields = 1,
+	},
+	[VERT_ATTRIB_GENERIC0] = {
+		.emit = nv10_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC2] = {
+		.emit = nv10_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC4] = {
+		.emit = nv10_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC6] = {
+		.emit = nv10_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC8] = {
+		.emit = nv10_emit_material,
+	},
+};
+
+static int
+get_hw_format(int type)
+{
+	switch (type) {
+	case GL_FLOAT:
+		return NV10TCL_VTXFMT_TYPE_FLOAT;
+	case GL_SHORT:
+	case GL_UNSIGNED_SHORT:
+		return NV10TCL_VTXFMT_TYPE_SHORT;
+	case GL_UNSIGNED_BYTE:
+		return NV10TCL_VTXFMT_TYPE_BYTE_RGBA;
+	default:
+		assert(0);
+	}
+}
+
+static void
+nv10_render_set_format(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	int i, hw_format;
+
+	for (i = 0; i < NUM_VERTEX_ATTRS; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0) {
+			struct nouveau_array_state *a = &render->attrs[attr];
+
+			hw_format = a->stride << 8 |
+				a->fields << 4 |
+				get_hw_format(a->type);
+
+			if (attr == VERT_ATTRIB_POS && a->fields == 4)
+				hw_format |= NV10TCL_VTXFMT_POS_HOMOGENEOUS;
+		} else {
+			/* Unused attribute. */
+			hw_format = NV10TCL_VTXFMT_TYPE_FLOAT;
+		}
+
+		BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1);
+		OUT_RING(chan, hw_format);
+	}
+}
+
+static void
+nv10_render_bind_vertices(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	struct nouveau_bo_context *bctx = context_bctx(ctx, VERTEX);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	int i;
+
+	for (i = 0; i < NUM_VERTEX_ATTRS; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0) {
+			struct nouveau_array_state *a = &render->attrs[attr];
+
+			nouveau_bo_markl(bctx, celsius,
+					 NV10TCL_VTXBUF_ADDRESS(i),
+					 a->bo, a->offset,
+					 NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+		}
+	}
+
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_VALIDATE, 1);
+	OUT_RING(chan, 0);
+}
+
+/* Vertex array rendering defs. */
+#define RENDER_LOCALS(ctx)					\
+	struct nouveau_grobj *celsius = context_eng3d(ctx)
+
+#define BATCH_BEGIN(prim)						\
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);	\
+	OUT_RING(chan, prim);
+#define BATCH_END()							\
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);	\
+	OUT_RING(chan, 0);
+
+#define MAX_PACKET 0x400
+
+#define MAX_OUT_L 0x100
+#define BATCH_PACKET_L(n)						\
+	BEGIN_RING_NI(chan, celsius, NV10TCL_VERTEX_BUFFER_DRAW_ARRAYS, n);
+#define BATCH_OUT_L(i, n)			\
+	OUT_RING(chan, ((n) - 1) << 24 | (i));
+
+#define MAX_OUT_I16 0x2
+#define BATCH_PACKET_I16(n)						\
+	BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, n);
+#define BATCH_OUT_I16(i0, i1)			\
+	OUT_RING(chan, (i1) << 16 | (i0));
+
+#define MAX_OUT_I32 0x1
+#define BATCH_PACKET_I32(n)						\
+	BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U32, n);
+#define BATCH_OUT_I32(i)			\
+	OUT_RING(chan, i);
+
+#define IMM_PACKET(m, n)			\
+	BEGIN_RING(chan, celsius, m, n);
+#define IMM_OUT(x)				\
+	OUT_RINGf(chan, x);
+
+#define TAG(x) nv10_##x
+#include "nouveau_render_t.c"
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
new file mode 100644
index 0000000000..a2fcb6b695
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_class.h"
+#include "nouveau_util.h"
+#include "nv10_driver.h"
+
+static inline unsigned
+get_rt_format(gl_format format)
+{
+	switch (format) {
+	case MESA_FORMAT_XRGB8888:
+		return 0x05;
+	case MESA_FORMAT_ARGB8888:
+		return 0x08;
+	case MESA_FORMAT_RGB565:
+		return 0x03;
+	case MESA_FORMAT_Z16:
+		return 0x10;
+	case MESA_FORMAT_Z24_S8:
+		return 0x0;
+	default:
+		assert(0);
+	}
+}
+
+static void
+setup_lma_buffer(GLcontext *ctx)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct nouveau_bo_context *bctx = context_bctx(ctx, LMA_DEPTH);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+	unsigned pitch = align(fb->Width, 128),
+		height = align(fb->Height, 2),
+		size = pitch * height;
+
+	if (!nfb->lma_bo || nfb->lma_bo->size != size) {
+		nouveau_bo_ref(NULL, &nfb->lma_bo);
+		nouveau_bo_new(context_dev(ctx), NOUVEAU_BO_VRAM, 0, size,
+			       &nfb->lma_bo);
+	}
+
+	nouveau_bo_markl(bctx, celsius, NV17TCL_LMA_DEPTH_BUFFER_OFFSET,
+			 nfb->lma_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR);
+
+	WAIT_RING(chan, 9);
+	BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_WINDOW_X, 4);
+	OUT_RINGf(chan, - 1792);
+	OUT_RINGf(chan, - 2304 + fb->Height);
+	OUT_RINGf(chan, fb->_DepthMaxF / 2);
+	OUT_RINGf(chan, 0);
+
+	BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_PITCH, 1);
+	OUT_RING(chan, pitch);
+
+	BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_ENABLE, 1);
+	OUT_RING(chan, 1);
+}
+
+void
+nv10_emit_framebuffer(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct nouveau_bo_context *bctx = context_bctx(ctx, FRAMEBUFFER);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	struct nouveau_surface *s;
+	unsigned rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR;
+	unsigned rt_pitch = 0, zeta_pitch = 0;
+	unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
+	if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT)
+		return;
+
+	/* At least nv11 seems to get sad if we don't do this before
+	 * swapping RTs.*/
+	if (context_chipset(ctx) < 0x17) {
+		int i;
+
+		for (i = 0; i < 6; i++) {
+			BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+			OUT_RING(chan, 0);
+		}
+	}
+
+	/* Render target */
+	if (fb->_ColorDrawBuffers[0]) {
+		s = &to_nouveau_renderbuffer(
+			fb->_ColorDrawBuffers[0])->surface;
+
+		rt_format |= get_rt_format(s->format);
+		zeta_pitch = rt_pitch = s->pitch;
+
+		nouveau_bo_markl(bctx, celsius, NV10TCL_COLOR_OFFSET,
+				 s->bo, 0, bo_flags);
+	}
+
+	/* depth/stencil */
+	if (fb->_DepthBuffer) {
+		s = &to_nouveau_renderbuffer(
+			fb->_DepthBuffer->Wrapped)->surface;
+
+		rt_format |= get_rt_format(s->format);
+		zeta_pitch = s->pitch;
+
+		nouveau_bo_markl(bctx, celsius, NV10TCL_ZETA_OFFSET,
+				 s->bo, 0, bo_flags);
+
+		if (context_chipset(ctx) >= 0x17)
+			setup_lma_buffer(ctx);
+	}
+
+	BEGIN_RING(chan, celsius, NV10TCL_RT_FORMAT, 2);
+	OUT_RING(chan, rt_format);
+	OUT_RING(chan, zeta_pitch << 16 | rt_pitch);
+
+	context_dirty(ctx, VIEWPORT);
+	context_dirty(ctx, SCISSOR);
+}
+
+void
+nv10_emit_render_mode(GLcontext *ctx, int emit)
+{
+}
+
+void
+nv10_emit_scissor(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	int x, y, w, h;
+
+	get_scissors(ctx->DrawBuffer, &x, &y, &w, &h);
+
+	BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2);
+	OUT_RING(chan, w << 16 | x);
+	OUT_RING(chan, h << 16 | y);
+}
+
+void
+nv10_emit_viewport(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	float a[4] = {};
+
+	get_viewport_translate(ctx, a);
+	a[0] -= 2048;
+	a[1] -= 2048;
+
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
+	OUT_RINGp(chan, a, 4);
+
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+	OUT_RING(chan, (fb->Width - 1) << 16 | 0x08000800);
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+	OUT_RING(chan, (fb->Height - 1) << 16 | 0x08000800);
+
+	context_dirty(ctx, PROJECTION);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_frag.c b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
new file mode 100644
index 0000000000..76b95fdd51
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_frag.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_class.h"
+#include "nouveau_util.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+
+#define RC_IN_SHIFT_A	24
+#define RC_IN_SHIFT_B	16
+#define RC_IN_SHIFT_C	8
+#define RC_IN_SHIFT_D	0
+#define RC_IN_SHIFT_E	56
+#define RC_IN_SHIFT_F	48
+#define RC_IN_SHIFT_G	40
+
+#define RC_IN_SOURCE(source)				\
+	((uint64_t)NV10TCL_RC_IN_RGB_D_INPUT_##source)
+#define RC_IN_USAGE(usage)					\
+	((uint64_t)NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_##usage)
+#define RC_IN_MAPPING(mapping)					\
+	((uint64_t)NV10TCL_RC_IN_RGB_D_MAPPING_##mapping)
+
+#define RC_OUT_BIAS	NV10TCL_RC_OUT_RGB_BIAS_BIAS_BY_NEGATIVE_ONE_HALF
+#define RC_OUT_SCALE_1	NV10TCL_RC_OUT_RGB_SCALE_NONE
+#define RC_OUT_SCALE_2	NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_TWO
+#define RC_OUT_SCALE_4	NV10TCL_RC_OUT_RGB_SCALE_SCALE_BY_FOUR
+
+/* Make the combiner do: spare0_i = A_i * B_i */
+#define RC_OUT_AB	NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0
+/* spare0_i = dot3(A, B) */
+#define RC_OUT_DOT_AB	(NV10TCL_RC_OUT_RGB_AB_OUTPUT_SPARE0 |	\
+			 NV10TCL_RC_OUT_RGB_AB_DOT_PRODUCT)
+/* spare0_i = A_i * B_i + C_i * D_i */
+#define RC_OUT_SUM	NV10TCL_RC_OUT_RGB_SUM_OUTPUT_SPARE0
+
+struct combiner_state {
+	GLcontext *ctx;
+	int unit;
+
+	/* GL state */
+	GLenum mode;
+	GLenum *source;
+	GLenum *operand;
+	GLuint logscale;
+
+	/* Derived HW state */
+	uint64_t in;
+	uint32_t out;
+};
+
+/* Initialize a combiner_state struct from the texture unit
+ * context. */
+#define INIT_COMBINER(chan, ctx, rc, i) do {			\
+		struct gl_tex_env_combine_state *c =		\
+			ctx->Texture.Unit[i]._CurrentCombine;	\
+		(rc)->ctx = ctx;				\
+		(rc)->unit = i;					\
+		(rc)->mode = c->Mode##chan;			\
+		(rc)->source = c->Source##chan;			\
+		(rc)->operand = c->Operand##chan;		\
+		(rc)->logscale = c->ScaleShift##chan;		\
+		(rc)->in = (rc)->out = 0;			\
+	} while (0)
+
+/* Get the RC input source for the specified EXT_texture_env_combine
+ * source. */
+static uint32_t
+get_input_source(struct combiner_state *rc, int source)
+{
+	switch (source) {
+	case GL_TEXTURE:
+		return RC_IN_SOURCE(TEXTURE0) + rc->unit;
+
+	case GL_TEXTURE0:
+		return RC_IN_SOURCE(TEXTURE0);
+
+	case GL_TEXTURE1:
+		return RC_IN_SOURCE(TEXTURE1);
+
+	case GL_TEXTURE2:
+		return RC_IN_SOURCE(TEXTURE2);
+
+	case GL_TEXTURE3:
+		return RC_IN_SOURCE(TEXTURE3);
+
+	case GL_CONSTANT:
+		return context_chipset(rc->ctx) >= 0x20 ?
+			RC_IN_SOURCE(CONSTANT_COLOR0) :
+			RC_IN_SOURCE(CONSTANT_COLOR0) + rc->unit;
+
+	case GL_PRIMARY_COLOR:
+		return RC_IN_SOURCE(PRIMARY_COLOR);
+
+	case GL_PREVIOUS:
+		return rc->unit ? RC_IN_SOURCE(SPARE0)
+			: RC_IN_SOURCE(PRIMARY_COLOR);
+
+	default:
+		assert(0);
+	}
+}
+
+/* Get the RC input mapping for the specified texture_env_combine
+ * operand, possibly inverted or biased. */
+#define INVERT 0x1
+#define HALF_BIAS 0x2
+
+static uint32_t
+get_input_mapping(struct combiner_state *rc, int operand, int flags)
+{
+	int map = 0;
+
+	if (is_color_operand(operand))
+		map |= RC_IN_USAGE(RGB);
+	else
+		map |= RC_IN_USAGE(ALPHA);
+
+	if (is_negative_operand(operand) == !(flags & INVERT))
+		map |= flags & HALF_BIAS ?
+			RC_IN_MAPPING(HALF_BIAS_NEGATE) :
+			RC_IN_MAPPING(UNSIGNED_INVERT);
+	else
+		map |= flags & HALF_BIAS ?
+			RC_IN_MAPPING(HALF_BIAS_NORMAL) :
+			RC_IN_MAPPING(UNSIGNED_IDENTITY);
+
+	return map;
+}
+
+static uint32_t
+get_input_arg(struct combiner_state *rc, int arg, int flags)
+{
+	int source = rc->source[arg];
+	int operand = rc->operand[arg];
+
+	/* Fake several unsupported texture formats. */
+	if (is_texture_source(source)) {
+		int i = (source == GL_TEXTURE ?
+			 rc->unit : source - GL_TEXTURE0);
+		struct gl_texture_object *t = rc->ctx->Texture.Unit[i]._Current;
+		gl_format format = t->Image[0][t->BaseLevel]->TexFormat;
+
+		if (format == MESA_FORMAT_A8) {
+			/* Emulated using I8. */
+			if (is_color_operand(operand))
+				return RC_IN_SOURCE(ZERO) |
+					get_input_mapping(rc, operand, flags);
+
+		} else if (format == MESA_FORMAT_L8) {
+			/* Sometimes emulated using I8. */
+			if (!is_color_operand(operand))
+				return RC_IN_SOURCE(ZERO) |
+					get_input_mapping(rc, operand,
+							  flags ^ INVERT);
+
+		} else if (format == MESA_FORMAT_XRGB8888) {
+			/* Sometimes emulated using ARGB8888. */
+			if (!is_color_operand(operand))
+				return RC_IN_SOURCE(ZERO) |
+					get_input_mapping(rc, operand,
+							  flags ^ INVERT);
+		}
+	}
+
+	return get_input_source(rc, source) |
+		get_input_mapping(rc, operand, flags);
+}
+
+/* Bind the RC input variable <var> to the EXT_texture_env_combine
+ * argument <arg>, possibly inverted or biased. */
+#define INPUT_ARG(rc, var, arg, flags)					\
+	(rc)->in |= get_input_arg(rc, arg, flags) << RC_IN_SHIFT_##var
+
+/* Bind the RC input variable <var> to the RC source <src>. */
+#define INPUT_SRC(rc, var, src, chan)					\
+	(rc)->in |= (RC_IN_SOURCE(src) |				\
+		     RC_IN_USAGE(chan)) << RC_IN_SHIFT_##var
+
+/* Bind the RC input variable <var> to a constant +/-1 */
+#define INPUT_ONE(rc, var, flags)					\
+	(rc)->in |= (RC_IN_SOURCE(ZERO) |				\
+		     (flags & INVERT ? RC_IN_MAPPING(EXPAND_NORMAL) :	\
+		      RC_IN_MAPPING(UNSIGNED_INVERT))) << RC_IN_SHIFT_##var
+
+static void
+setup_combiner(struct combiner_state *rc)
+{
+	switch (rc->mode) {
+	case GL_REPLACE:
+		INPUT_ARG(rc, A, 0, 0);
+		INPUT_ONE(rc, B, 0);
+
+		rc->out = RC_OUT_AB;
+		break;
+
+	case GL_MODULATE:
+		INPUT_ARG(rc, A, 0, 0);
+		INPUT_ARG(rc, B, 1, 0);
+
+		rc->out = RC_OUT_AB;
+		break;
+
+	case GL_ADD:
+		INPUT_ARG(rc, A, 0, 0);
+		INPUT_ONE(rc, B, 0);
+		INPUT_ARG(rc, C, 1, 0);
+		INPUT_ONE(rc, D, 0);
+
+		rc->out = RC_OUT_SUM;
+		break;
+
+	case GL_ADD_SIGNED:
+		INPUT_ARG(rc, A, 0, 0);
+		INPUT_ONE(rc, B, 0);
+		INPUT_ARG(rc, C, 1, 0);
+		INPUT_ONE(rc, D, 0);
+
+		rc->out = RC_OUT_SUM | RC_OUT_BIAS;
+		break;
+
+	case GL_INTERPOLATE:
+		INPUT_ARG(rc, A, 0, 0);
+		INPUT_ARG(rc, B, 2, 0);
+		INPUT_ARG(rc, C, 1, 0);
+		INPUT_ARG(rc, D, 2, INVERT);
+
+		rc->out = RC_OUT_SUM;
+		break;
+
+	case GL_SUBTRACT:
+		INPUT_ARG(rc, A, 0, 0);
+		INPUT_ONE(rc, B, 0);
+		INPUT_ARG(rc, C, 1, 0);
+		INPUT_ONE(rc, D, INVERT);
+
+		rc->out = RC_OUT_SUM;
+		break;
+
+	case GL_DOT3_RGB:
+	case GL_DOT3_RGBA:
+		INPUT_ARG(rc, A, 0, HALF_BIAS);
+		INPUT_ARG(rc, B, 1, HALF_BIAS);
+
+		rc->out = RC_OUT_DOT_AB | RC_OUT_SCALE_4;
+
+		assert(!rc->logscale);
+		break;
+
+	default:
+		assert(0);
+	}
+
+	switch (rc->logscale) {
+	case 0:
+		rc->out |= RC_OUT_SCALE_1;
+		break;
+	case 1:
+		rc->out |= RC_OUT_SCALE_2;
+		break;
+	case 2:
+		rc->out |= RC_OUT_SCALE_4;
+		break;
+	default:
+		assert(0);
+	}
+}
+
+void
+nv10_get_general_combiner(GLcontext *ctx, int i,
+			  uint32_t *a_in, uint32_t *a_out,
+			  uint32_t *c_in, uint32_t *c_out, uint32_t *k)
+{
+	struct combiner_state rc_a, rc_c;
+
+	if (ctx->Texture.Unit[i]._ReallyEnabled) {
+		INIT_COMBINER(RGB, ctx, &rc_c, i);
+
+		if (rc_c.mode == GL_DOT3_RGBA)
+			rc_a = rc_c;
+		else
+			INIT_COMBINER(A, ctx, &rc_a, i);
+
+		setup_combiner(&rc_c);
+		setup_combiner(&rc_a);
+
+	} else {
+		rc_a.in = rc_a.out = rc_c.in = rc_c.out = 0;
+	}
+
+	*k = pack_rgba_f(MESA_FORMAT_ARGB8888,
+			 ctx->Texture.Unit[i].EnvColor);
+	*a_in = rc_a.in;
+	*a_out = rc_a.out;
+	*c_in = rc_c.in;
+	*c_out = rc_c.out;
+}
+
+void
+nv10_get_final_combiner(GLcontext *ctx, uint64_t *in, int *n)
+{
+	struct combiner_state rc = {};
+
+	/*
+	 * The final fragment value equation is something like:
+	 *	x_i = A_i * B_i + (1 - A_i) * C_i + D_i
+	 *	x_alpha = G_alpha
+	 * where D_i = E_i * F_i, i one of {red, green, blue}.
+	 */
+	if (ctx->Fog.ColorSumEnabled || ctx->Light.Enabled) {
+		INPUT_SRC(&rc, D, E_TIMES_F, RGB);
+		INPUT_SRC(&rc, F, SECONDARY_COLOR, RGB);
+	}
+
+	if (ctx->Fog.Enabled) {
+		INPUT_SRC(&rc, A, FOG, ALPHA);
+		INPUT_SRC(&rc, C, FOG, RGB);
+		INPUT_SRC(&rc, E, FOG, ALPHA);
+	} else {
+		INPUT_ONE(&rc, A, 0);
+		INPUT_ONE(&rc, C, 0);
+		INPUT_ONE(&rc, E, 0);
+	}
+
+	if (ctx->Texture._EnabledUnits) {
+		INPUT_SRC(&rc, B, SPARE0, RGB);
+		INPUT_SRC(&rc, G, SPARE0, ALPHA);
+	} else {
+		INPUT_SRC(&rc, B, PRIMARY_COLOR, RGB);
+		INPUT_SRC(&rc, G, PRIMARY_COLOR, ALPHA);
+	}
+
+	*in = rc.in;
+	*n = log2i(ctx->Texture._EnabledUnits) + 1;
+}
+
+void
+nv10_emit_tex_env(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_ENV0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	uint32_t a_in, a_out, c_in, c_out, k;
+
+	nv10_get_general_combiner(ctx, i, &a_in, &a_out, &c_in, &c_out, &k);
+
+	/* Enable the combiners we're going to need. */
+	if (i == 1) {
+		if (c_out || a_out)
+			c_out |= 0x5 << 27;
+		else
+			c_out |= 0x3 << 27;
+	}
+
+	BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(i), 1);
+	OUT_RING(chan, a_in);
+	BEGIN_RING(chan, celsius, NV10TCL_RC_IN_RGB(i), 1);
+	OUT_RING(chan, c_in);
+	BEGIN_RING(chan, celsius, NV10TCL_RC_COLOR(i), 1);
+	OUT_RING(chan, k);
+	BEGIN_RING(chan, celsius, NV10TCL_RC_OUT_ALPHA(i), 1);
+	OUT_RING(chan, a_out);
+	BEGIN_RING(chan, celsius, NV10TCL_RC_OUT_RGB(i), 1);
+	OUT_RING(chan, c_out);
+
+	context_dirty(ctx, FRAG);
+}
+
+void
+nv10_emit_frag(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	uint64_t in;
+	int n;
+
+	nv10_get_final_combiner(ctx, &in, &n);
+
+	BEGIN_RING(chan, celsius, NV10TCL_RC_FINAL0, 2);
+	OUT_RING(chan, in);
+	OUT_RING(chan, in >> 32);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_polygon.c b/src/mesa/drivers/dri/nouveau/nv10_state_polygon.c
new file mode 100644
index 0000000000..deddca1011
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_polygon.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_class.h"
+#include "nv10_driver.h"
+
+void
+nv10_emit_cull_face(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	GLenum mode = ctx->Polygon.CullFaceMode;
+
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+	OUT_RING(chan, ctx->Polygon.CullFlag ? 1 : 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 1);
+	OUT_RING(chan, (mode == GL_FRONT ? NV10TCL_CULL_FACE_FRONT :
+			mode == GL_BACK ? NV10TCL_CULL_FACE_BACK :
+			NV10TCL_CULL_FACE_FRONT_AND_BACK));
+}
+
+void
+nv10_emit_front_face(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_FRONT_FACE, 1);
+	OUT_RING(chan, ctx->Polygon.FrontFace == GL_CW ?
+		 NV10TCL_FRONT_FACE_CW : NV10TCL_FRONT_FACE_CCW);
+}
+
+void
+nv10_emit_line_mode(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	GLboolean smooth = ctx->Line.SmoothFlag &&
+		ctx->Hint.LineSmooth == GL_NICEST;
+
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1);
+	OUT_RING(chan, MAX2(smooth ? 0 : 1,
+			    ctx->Line.Width) * 8);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+	OUT_RING(chan, smooth ? 1 : 0);
+}
+
+void
+nv10_emit_line_stipple(GLcontext *ctx, int emit)
+{
+}
+
+void
+nv10_emit_point_mode(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+	OUT_RING(chan, (uint32_t)(ctx->Point.Size * 8));
+
+	BEGIN_RING(chan, celsius, NV10TCL_POINT_SMOOTH_ENABLE, 1);
+	OUT_RING(chan, ctx->Point.SmoothFlag ? 1 : 0);
+}
+
+void
+nv10_emit_polygon_mode(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+	OUT_RING(chan, nvgl_polygon_mode(ctx->Polygon.FrontMode));
+	OUT_RING(chan, nvgl_polygon_mode(ctx->Polygon.BackMode));
+
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+	OUT_RING(chan, ctx->Polygon.SmoothFlag ? 1 : 0);
+}
+
+void
+nv10_emit_polygon_offset(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+	OUT_RING(chan, ctx->Polygon.OffsetPoint ? 1 : 0);
+	OUT_RING(chan, ctx->Polygon.OffsetLine ? 1 : 0);
+	OUT_RING(chan, ctx->Polygon.OffsetFill ? 1 : 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+	OUT_RINGf(chan, ctx->Polygon.OffsetFactor);
+	OUT_RINGf(chan, ctx->Polygon.OffsetUnits);
+}
+
+void
+nv10_emit_polygon_stipple(GLcontext *ctx, int emit)
+{
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_raster.c b/src/mesa/drivers/dri/nouveau/nv10_state_raster.c
new file mode 100644
index 0000000000..a62cd807a9
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_raster.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_class.h"
+#include "nv10_driver.h"
+
+void
+nv10_emit_alpha_func(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+	OUT_RING(chan, ctx->Color.AlphaEnabled ? 1 : 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+	OUT_RING(chan, nvgl_comparison_op(ctx->Color.AlphaFunc));
+	OUT_RING(chan, FLOAT_TO_UBYTE(ctx->Color.AlphaRef));
+}
+
+void
+nv10_emit_blend_color(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1);
+	OUT_RING(chan, FLOAT_TO_UBYTE(ctx->Color.BlendColor[3]) << 24 |
+		 FLOAT_TO_UBYTE(ctx->Color.BlendColor[0]) << 16 |
+		 FLOAT_TO_UBYTE(ctx->Color.BlendColor[1]) << 8 |
+		 FLOAT_TO_UBYTE(ctx->Color.BlendColor[2]) << 0);
+}
+
+void
+nv10_emit_blend_equation(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+	OUT_RING(chan, ctx->Color.BlendEnabled ? 1 : 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_EQUATION, 1);
+	OUT_RING(chan, nvgl_blend_eqn(ctx->Color.BlendEquationRGB));
+}
+
+void
+nv10_emit_blend_func(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 2);
+	OUT_RING(chan, nvgl_blend_func(ctx->Color.BlendSrcRGB));
+	OUT_RING(chan, nvgl_blend_func(ctx->Color.BlendDstRGB));
+}
+
+void
+nv10_emit_color_mask(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+	OUT_RING(chan, ((ctx->Color.ColorMask[0][3] ? 1 << 24 : 0) |
+			(ctx->Color.ColorMask[0][0] ? 1 << 16 : 0) |
+			(ctx->Color.ColorMask[0][1] ? 1 << 8 : 0) |
+			(ctx->Color.ColorMask[0][2] ? 1 << 0 : 0)));
+}
+
+void
+nv10_emit_depth(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+	OUT_RING(chan, ctx->Depth.Test ? 1 : 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+	OUT_RING(chan, ctx->Depth.Mask ? 1 : 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+	OUT_RING(chan, nvgl_comparison_op(ctx->Depth.Func));
+}
+
+void
+nv10_emit_dither(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1);
+	OUT_RING(chan, ctx->Color.DitherFlag ? 1 : 0);
+}
+
+void
+nv10_emit_logic_opcode(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	assert(!ctx->Color.ColorLogicOpEnabled
+	       || context_chipset(ctx) >= 0x11);
+
+	BEGIN_RING(chan, celsius, NV11TCL_COLOR_LOGIC_OP_ENABLE, 2);
+	OUT_RING(chan, ctx->Color.ColorLogicOpEnabled ? 1 : 0);
+	OUT_RING(chan, nvgl_logicop_func(ctx->Color.LogicOp));
+}
+
+void
+nv10_emit_shade_model(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 1);
+	OUT_RING(chan, ctx->Light.ShadeModel == GL_SMOOTH ?
+		 NV10TCL_SHADE_MODEL_SMOOTH : NV10TCL_SHADE_MODEL_FLAT);
+}
+
+void
+nv10_emit_stencil_func(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1);
+	OUT_RING(chan, ctx->Stencil.Enabled ? 1 : 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_FUNC_FUNC, 3);
+	OUT_RING(chan, nvgl_comparison_op(ctx->Stencil.Function[0]));
+	OUT_RING(chan, ctx->Stencil.Ref[0]);
+	OUT_RING(chan, ctx->Stencil.ValueMask[0]);
+}
+
+void
+nv10_emit_stencil_mask(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 1);
+	OUT_RING(chan, ctx->Stencil.WriteMask[0]);
+}
+
+void
+nv10_emit_stencil_op(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_OP_FAIL, 3);
+	OUT_RING(chan, nvgl_stencil_op(ctx->Stencil.FailFunc[0]));
+	OUT_RING(chan, nvgl_stencil_op(ctx->Stencil.ZFailFunc[0]));
+	OUT_RING(chan, nvgl_stencil_op(ctx->Stencil.ZPassFunc[0]));
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tex.c b/src/mesa/drivers/dri/nouveau/nv10_state_tex.c
new file mode 100644
index 0000000000..6dedb18c72
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_tex.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_texture.h"
+#include "nouveau_class.h"
+#include "nouveau_util.h"
+#include "nv10_driver.h"
+
+#define TX_GEN_MODE(i, j) (NV10TCL_TX_GEN_MODE_S(i) + 4 * (j))
+#define TX_GEN_COEFF(i, j) (NV10TCL_TX_GEN_COEFF_S_A(i) + 16 * (j))
+#define TX_MATRIX(i) (NV10TCL_TX0_MATRIX(0) + 64 * (i))
+
+void
+nv10_emit_tex_gen(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_GEN0;
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+	int j;
+
+	for (j = 0; j < 4; j++) {
+		if (nctx->fallback == HWTNL && (unit->TexGenEnabled & 1 << j)) {
+			struct gl_texgen *coord = get_texgen_coord(unit, j);
+			float *k = get_texgen_coeff(coord);
+
+			if (k) {
+				BEGIN_RING(chan, celsius,
+					   TX_GEN_COEFF(i, j), 4);
+				OUT_RINGp(chan, k, 4);
+			}
+
+			BEGIN_RING(chan, celsius, TX_GEN_MODE(i, j), 1);
+			OUT_RING(chan, nvgl_texgen_mode(coord->Mode));
+
+		} else {
+			BEGIN_RING(chan, celsius, TX_GEN_MODE(i, j), 1);
+			OUT_RING(chan, 0);
+		}
+	}
+
+	context_dirty_i(ctx, TEX_MAT, i);
+}
+
+void
+nv10_emit_tex_mat(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_MAT0;
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+
+	if (nctx->fallback == HWTNL &&
+	    ((ctx->Texture._TexMatEnabled & 1 << i) ||
+	     ctx->Texture.Unit[i]._GenFlags)) {
+		BEGIN_RING(chan, celsius, NV10TCL_TX_MATRIX_ENABLE(i), 1);
+		OUT_RING(chan, 1);
+
+		BEGIN_RING(chan, celsius, TX_MATRIX(i), 16);
+		OUT_RINGm(chan, ctx->TextureMatrixStack[i].Top->m);
+
+	} else {
+		BEGIN_RING(chan, celsius, NV10TCL_TX_MATRIX_ENABLE(i), 1);
+		OUT_RING(chan, 0);
+	}
+}
+
+static uint32_t
+get_tex_format_pot(struct gl_texture_image *ti)
+{
+	switch (ti->TexFormat) {
+	case MESA_FORMAT_ARGB8888:
+		return NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8;
+
+	case MESA_FORMAT_XRGB8888:
+		return NV10TCL_TX_FORMAT_FORMAT_X8R8G8B8;
+
+	case MESA_FORMAT_ARGB1555:
+		return NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5;
+
+	case MESA_FORMAT_ARGB4444:
+		return NV10TCL_TX_FORMAT_FORMAT_A4R4G4B4;
+
+	case MESA_FORMAT_RGB565:
+		return NV10TCL_TX_FORMAT_FORMAT_R5G6B5;
+
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_I8:
+		return NV10TCL_TX_FORMAT_FORMAT_A8;
+
+	case MESA_FORMAT_L8:
+		return NV10TCL_TX_FORMAT_FORMAT_L8;
+
+	case MESA_FORMAT_CI8:
+		return NV10TCL_TX_FORMAT_FORMAT_INDEX8;
+
+	default:
+		assert(0);
+	}
+}
+
+static uint32_t
+get_tex_format_rect(struct gl_texture_image *ti)
+{
+	switch (ti->TexFormat) {
+	case MESA_FORMAT_ARGB1555:
+		return NV10TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT;
+
+	case MESA_FORMAT_RGB565:
+		return NV10TCL_TX_FORMAT_FORMAT_R5G6B5_RECT;
+
+	case MESA_FORMAT_ARGB8888:
+	case MESA_FORMAT_XRGB8888:
+		return NV10TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT;
+
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_L8:
+	case MESA_FORMAT_I8:
+		return NV10TCL_TX_FORMAT_FORMAT_A8_RECT;
+
+	default:
+		assert(0);
+	}
+}
+
+void
+nv10_emit_tex_obj(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_OBJ0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct nouveau_bo_context *bctx = context_bctx_i(ctx, TEXTURE, i);
+	const int bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART | NOUVEAU_BO_VRAM;
+	struct gl_texture_object *t;
+	struct nouveau_surface *s;
+	struct gl_texture_image *ti;
+	uint32_t tx_format, tx_filter, tx_enable;
+
+	if (!ctx->Texture.Unit[i]._ReallyEnabled) {
+		BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(i), 1);
+		OUT_RING(chan, 0);
+		return;
+	}
+
+	t = ctx->Texture.Unit[i]._Current;
+	s = &to_nouveau_texture(t)->surfaces[t->BaseLevel];
+	ti = t->Image[0][t->BaseLevel];
+
+	if (!nouveau_texture_validate(ctx, t))
+		return;
+
+	/* Recompute the texturing registers. */
+	tx_format = nvgl_wrap_mode(t->WrapT) << 28
+		| nvgl_wrap_mode(t->WrapS) << 24
+		| ti->HeightLog2 << 20
+		| ti->WidthLog2 << 16
+		| 5 << 4 | 1 << 12;
+
+	tx_filter = nvgl_filter_mode(t->MagFilter) << 28
+		| nvgl_filter_mode(t->MinFilter) << 24;
+
+	tx_enable = NV10TCL_TX_ENABLE_ENABLE
+		| log2i(t->MaxAnisotropy) << 4;
+
+	if (t->Target == GL_TEXTURE_RECTANGLE) {
+		BEGIN_RING(chan, celsius, NV10TCL_TX_NPOT_PITCH(i), 1);
+		OUT_RING(chan, s->pitch << 16);
+		BEGIN_RING(chan, celsius, NV10TCL_TX_NPOT_SIZE(i), 1);
+		OUT_RING(chan, align(s->width, 2) << 16 | s->height);
+
+		tx_format |= get_tex_format_rect(ti);
+	} else {
+		tx_format |= get_tex_format_pot(ti);
+	}
+
+	if (t->MinFilter != GL_NEAREST &&
+	    t->MinFilter != GL_LINEAR) {
+		int lod_min = t->MinLod;
+		int lod_max = MIN2(t->MaxLod, t->_MaxLambda);
+		int lod_bias = t->LodBias
+			+ ctx->Texture.Unit[i].LodBias;
+
+		lod_max = CLAMP(lod_max, 0, 15);
+		lod_min = CLAMP(lod_min, 0, 15);
+		lod_bias = CLAMP(lod_bias, 0, 15);
+
+		tx_format |= NV10TCL_TX_FORMAT_MIPMAP;
+		tx_filter |= lod_bias << 8;
+		tx_enable |= lod_min << 26
+			| lod_max << 14;
+	}
+
+	/* Write it to the hardware. */
+	nouveau_bo_mark(bctx, celsius, NV10TCL_TX_FORMAT(i),
+			s->bo, tx_format, 0,
+			NV10TCL_TX_FORMAT_DMA0,
+			NV10TCL_TX_FORMAT_DMA1,
+			bo_flags | NOUVEAU_BO_OR);
+
+	nouveau_bo_markl(bctx, celsius, NV10TCL_TX_OFFSET(i),
+			 s->bo, 0, bo_flags);
+
+	BEGIN_RING(chan, celsius, NV10TCL_TX_FILTER(i), 1);
+	OUT_RING(chan, tx_filter);
+
+	BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(i), 1);
+	OUT_RING(chan, tx_enable);
+}
+
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
new file mode 100644
index 0000000000..0e592a1629
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
@@ -0,0 +1,489 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv10_driver.h"
+
+void
+nv10_emit_clip_plane(GLcontext *ctx, int emit)
+{
+}
+
+static inline unsigned
+get_material_bitmask(unsigned m)
+{
+	unsigned ret = 0;
+
+	if (m & MAT_BIT_FRONT_EMISSION)
+		ret |= NV10TCL_COLOR_MATERIAL_EMISSION;
+	if (m & MAT_BIT_FRONT_AMBIENT)
+		ret |= NV10TCL_COLOR_MATERIAL_AMBIENT;
+	if (m & MAT_BIT_FRONT_DIFFUSE)
+		ret |= NV10TCL_COLOR_MATERIAL_DIFFUSE;
+	if (m & MAT_BIT_FRONT_SPECULAR)
+		ret |= NV10TCL_COLOR_MATERIAL_SPECULAR;
+
+	return ret;
+}
+
+void
+nv10_emit_color_material(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	unsigned mask = get_material_bitmask(ctx->Light.ColorMaterialBitmask);
+
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_MATERIAL, 1);
+	OUT_RING(chan, ctx->Light.ColorMaterialEnabled ? mask : 0);
+}
+
+static unsigned
+get_fog_mode(unsigned mode)
+{
+	switch (mode) {
+	case GL_LINEAR:
+		return NV10TCL_FOG_MODE_LINEAR;
+	case GL_EXP:
+		return NV10TCL_FOG_MODE_EXP;
+	case GL_EXP2:
+		return NV10TCL_FOG_MODE_EXP2;
+	default:
+		assert(0);
+	}
+}
+
+static unsigned
+get_fog_source(unsigned source)
+{
+	switch (source) {
+	case GL_FOG_COORDINATE_EXT:
+		return NV10TCL_FOG_COORD_FOG;
+	case GL_FRAGMENT_DEPTH_EXT:
+		return NV10TCL_FOG_COORD_DIST_ORTHOGONAL_ABS;
+	default:
+		assert(0);
+	}
+}
+
+void
+nv10_get_fog_coeff(GLcontext *ctx, float k[3])
+{
+	struct gl_fog_attrib *f = &ctx->Fog;
+
+	switch (f->Mode) {
+	case GL_LINEAR:
+		k[0] = 2 + f->Start / (f->End - f->Start);
+		k[1] = -1 / (f->End - f->Start);
+		break;
+
+	case GL_EXP:
+		k[0] = 1.5;
+		k[1] = -0.09 * f->Density;
+		break;
+
+	case GL_EXP2:
+		k[0] = 1.5;
+		k[1] = -0.21 * f->Density;
+		break;
+
+	default:
+		assert(0);
+	}
+
+	k[2] = 0;
+}
+
+void
+nv10_emit_fog(GLcontext *ctx, int emit)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct gl_fog_attrib *f = &ctx->Fog;
+	unsigned source = nctx->fallback == HWTNL ?
+		f->FogCoordinateSource : GL_FOG_COORDINATE_EXT;
+	float k[3];
+
+	nv10_get_fog_coeff(ctx, k);
+
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 4);
+	OUT_RING(chan, get_fog_mode(f->Mode));
+	OUT_RING(chan, get_fog_source(source));
+	OUT_RING(chan, f->Enabled ? 1 : 0);
+	OUT_RING(chan, pack_rgba_f(MESA_FORMAT_RGBA8888_REV, f->Color));
+
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+	OUT_RINGp(chan, k, 3);
+
+	context_dirty(ctx, FRAG);
+}
+
+static inline unsigned
+get_light_mode(struct gl_light *l)
+{
+	if (l->Enabled) {
+		if (l->_Flags & LIGHT_SPOT)
+			return NV10TCL_ENABLED_LIGHTS_0_DIRECTIONAL;
+		else if (l->_Flags & LIGHT_POSITIONAL)
+			return NV10TCL_ENABLED_LIGHTS_0_POSITIONAL;
+		else
+			return NV10TCL_ENABLED_LIGHTS_0_NONPOSITIONAL;
+	} else {
+		return NV10TCL_ENABLED_LIGHTS_0_DISABLED;
+	}
+}
+
+void
+nv10_emit_light_enable(GLcontext *ctx, int emit)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	uint32_t en_lights = 0;
+	int i;
+
+	if (nctx->fallback != HWTNL) {
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHTING_ENABLE, 1);
+		OUT_RING(chan, 0);
+		return;
+	}
+
+	for (i = 0; i < MAX_LIGHTS; i++)
+		en_lights |= get_light_mode(&ctx->Light.Light[i]) << 2 * i;
+
+	BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1);
+	OUT_RING(chan, en_lights);
+	BEGIN_RING(chan, celsius, NV10TCL_LIGHTING_ENABLE, 1);
+	OUT_RING(chan, ctx->Light.Enabled ? 1 : 0);
+	BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+	OUT_RING(chan, ctx->Transform.Normalize ? 1 : 0);
+}
+
+void
+nv10_emit_light_model(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct gl_lightmodel *m = &ctx->Light.Model;
+
+	BEGIN_RING(chan, celsius, NV10TCL_SEPARATE_SPECULAR_ENABLE, 1);
+	OUT_RING(chan, m->ColorControl == GL_SEPARATE_SPECULAR_COLOR ? 1 : 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1);
+	OUT_RING(chan, ((m->LocalViewer ?
+			 NV10TCL_LIGHT_MODEL_LOCAL_VIEWER : 0) |
+			(NEED_SECONDARY_COLOR(ctx) ?
+			 NV10TCL_LIGHT_MODEL_SEPARATE_SPECULAR : 0) |
+			(!ctx->Light.Enabled && ctx->Fog.ColorSumEnabled ?
+			 NV10TCL_LIGHT_MODEL_VERTEX_SPECULAR : 0)));
+}
+
+static float
+get_shine(const float p[], float x)
+{
+	const int n = 15;
+	const float *y = &p[1];
+	float f = (n - 1) * (1 - 1 / (1 + p[0] * x))
+		/ (1 - 1 / (1 + p[0] * 1024));
+	int i = f;
+
+	/* Linear interpolation in f-space (Faster and somewhat more
+	 * accurate than x-space). */
+	if (x == 0)
+		return y[0];
+	else if (i > n - 2)
+		return y[n - 1];
+	else
+		return y[i] + (y[i + 1] - y[i]) * (f - i);
+}
+
+static const float nv10_spot_params[2][16] = {
+	{ 0.02, -3.80e-05, -1.77, -2.41, -2.71, -2.88, -2.98, -3.06,
+	  -3.11, -3.17, -3.23, -3.28, -3.37, -3.47, -3.83, -5.11 },
+	{ 0.02, -0.01, 1.77, 2.39, 2.70, 2.87, 2.98, 3.06,
+	  3.10, 3.16, 3.23, 3.27, 3.37, 3.47, 3.83, 5.11 },
+};
+
+void
+nv10_get_spot_coeff(struct gl_light *l, float k[7])
+{
+	float e = l->SpotExponent;
+	float a0, b0, a1, a2, b2, a3;
+
+	if (e > 0)
+		a0 = -1 - 5.36e-3 / sqrt(e);
+	else
+		a0 = -1;
+	b0 = 1 / (1 + 0.273 * e);
+
+	a1 = get_shine(nv10_spot_params[0], e);
+
+	a2 = get_shine(nv10_spot_params[1], e);
+	b2 = 1 / (1 + 0.273 * e);
+
+	a3 = 0.9 + 0.278 * e;
+
+	if (l->SpotCutoff > 0) {
+		float cutoff = MAX2(a3, 1 / (1 - l->_CosCutoff));
+
+		k[0] = MAX2(0, a0 + b0 * cutoff);
+		k[1] = a1;
+		k[2] = a2 + b2 * cutoff;
+		k[3] = - cutoff * l->_NormSpotDirection[0];
+		k[4] = - cutoff * l->_NormSpotDirection[1];
+		k[5] = - cutoff * l->_NormSpotDirection[2];
+		k[6] = 1 - cutoff;
+
+	} else {
+		k[0] = b0;
+		k[1] = a1;
+		k[2] = a2 + b2;
+		k[3] = - l->_NormSpotDirection[0];
+		k[4] = - l->_NormSpotDirection[1];
+		k[5] = - l->_NormSpotDirection[2];
+		k[6] = -1;
+	}
+}
+
+void
+nv10_emit_light_source(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_LIGHT_SOURCE0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct gl_light *l = &ctx->Light.Light[i];
+
+	if (l->_Flags & LIGHT_POSITIONAL) {
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHT_POSITION_X(i), 3);
+		OUT_RINGp(chan, l->_Position, 3);
+
+		BEGIN_RING(chan, celsius,
+			   NV10TCL_LIGHT_ATTENUATION_CONSTANT(i), 3);
+		OUT_RINGf(chan, l->ConstantAttenuation);
+		OUT_RINGf(chan, l->LinearAttenuation);
+		OUT_RINGf(chan, l->QuadraticAttenuation);
+
+	} else {
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHT_DIRECTION_X(i), 3);
+		OUT_RINGp(chan, l->_VP_inf_norm, 3);
+
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHT_HALF_VECTOR_X(i), 3);
+		OUT_RINGp(chan, l->_h_inf_norm, 3);
+	}
+
+	if (l->_Flags & LIGHT_SPOT) {
+		float k[7];
+
+		nv10_get_spot_coeff(l, k);
+
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHT_SPOT_CUTOFF_A(i), 7);
+		OUT_RINGp(chan, k, 7);
+	}
+}
+
+#define USE_COLOR_MATERIAL(attr)					\
+	(ctx->Light.ColorMaterialEnabled &&				\
+	 ctx->Light.ColorMaterialBitmask & (1 << MAT_ATTRIB_FRONT_##attr))
+
+void
+nv10_emit_material_ambient(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	float (*mat)[4] = ctx->Light.Material.Attrib;
+	float c_scene[3], c_factor[3];
+	struct gl_light *l;
+
+	if (USE_COLOR_MATERIAL(AMBIENT)) {
+		COPY_3V(c_scene, ctx->Light.Model.Ambient);
+		COPY_3V(c_factor, mat[MAT_ATTRIB_FRONT_EMISSION]);
+
+	} else if (USE_COLOR_MATERIAL(EMISSION)) {
+		SCALE_3V(c_scene, mat[MAT_ATTRIB_FRONT_AMBIENT],
+			 ctx->Light.Model.Ambient);
+		ZERO_3V(c_factor);
+
+	} else {
+		COPY_3V(c_scene, ctx->Light._BaseColor[0]);
+		ZERO_3V(c_factor);
+	}
+
+	BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL_AMBIENT_R, 3);
+	OUT_RINGp(chan, c_scene, 3);
+
+	if (ctx->Light.ColorMaterialEnabled) {
+		BEGIN_RING(chan, celsius, NV10TCL_MATERIAL_FACTOR_R, 3);
+		OUT_RINGp(chan, c_factor, 3);
+	}
+
+	foreach(l, &ctx->Light.EnabledList) {
+		const int i = l - ctx->Light.Light;
+		float *c_light = (USE_COLOR_MATERIAL(AMBIENT) ?
+				  l->Ambient :
+				  l->_MatAmbient[0]);
+
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHT_AMBIENT_R(i), 3);
+		OUT_RINGp(chan, c_light, 3);
+	}
+}
+
+void
+nv10_emit_material_diffuse(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+	struct gl_light *l;
+
+	BEGIN_RING(chan, celsius, NV10TCL_MATERIAL_FACTOR_A, 1);
+	OUT_RINGf(chan, mat[MAT_ATTRIB_FRONT_DIFFUSE][3]);
+
+	foreach(l, &ctx->Light.EnabledList) {
+		const int i = l - ctx->Light.Light;
+		float *c_light = (USE_COLOR_MATERIAL(DIFFUSE) ?
+				  l->Diffuse :
+				  l->_MatDiffuse[0]);
+
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHT_DIFFUSE_R(i), 3);
+		OUT_RINGp(chan, c_light, 3);
+	}
+}
+
+void
+nv10_emit_material_specular(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct gl_light *l;
+
+	foreach(l, &ctx->Light.EnabledList) {
+		const int i = l - ctx->Light.Light;
+		float *c_light = (USE_COLOR_MATERIAL(SPECULAR) ?
+				  l->Specular :
+				  l->_MatSpecular[0]);
+
+		BEGIN_RING(chan, celsius, NV10TCL_LIGHT_SPECULAR_R(i), 3);
+		OUT_RINGp(chan, c_light, 3);
+	}
+}
+
+static const float nv10_shininess_param[6][16] = {
+	{ 0.70, 0.00, 0.06, 0.06, 0.05, 0.04, 0.02, 0.00,
+	  -0.06, -0.13, -0.24, -0.36, -0.51, -0.66, -0.82, -1.00 },
+	{ 0.01, 1.00, -2.29, -2.77, -2.96, -3.06, -3.12, -3.18,
+	  -3.24, -3.29, -3.36, -3.43, -3.51, -3.75, -4.33, -5.11 },
+	{ 0.02, 0.00, 2.28, 2.75, 2.94, 3.04, 3.1, 3.15,
+	  3.18, 3.22, 3.27, 3.32, 3.39, 3.48, 3.84, 5.11 },
+	{ 0.70, 0.00, 0.05, 0.06, 0.06, 0.06, 0.05, 0.04,
+	  0.02, 0.01, -0.03, -0.12, -0.25, -0.43, -0.68, -0.99 },
+	{ 0.01, 1.00, -1.61, -2.35, -2.67, -2.84, -2.96, -3.05,
+	  -3.08, -3.14, -3.2, -3.26, -3.32, -3.42, -3.54, -4.21 },
+	{ 0.01, 0.00, 2.25, 2.73, 2.92, 3.03, 3.09, 3.15,
+	  3.16, 3.21, 3.25, 3.29, 3.35, 3.43, 3.56, 4.22 },
+};
+
+void
+nv10_get_shininess_coeff(float s, float k[6])
+{
+	int i;
+
+	for (i = 0; i < 6; i++)
+		k[i] = get_shine(nv10_shininess_param[i], s);
+}
+
+void
+nv10_emit_material_shininess(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	float (*mat)[4] = ctx->Light.Material.Attrib;
+	float k[6];
+
+	nv10_get_shininess_coeff(
+		CLAMP(mat[MAT_ATTRIB_FRONT_SHININESS][0], 0, 1024),
+		k);
+
+	BEGIN_RING(chan, celsius, NV10TCL_MATERIAL_SHININESS(0), 6);
+	OUT_RINGp(chan, k, 6);
+}
+
+void
+nv10_emit_modelview(GLcontext *ctx, int emit)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	GLmatrix *m = ctx->ModelviewMatrixStack.Top;
+
+	if (nctx->fallback != HWTNL)
+		return;
+
+	if (ctx->Light._NeedEyeCoords || ctx->Fog.Enabled ||
+	    (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+		BEGIN_RING(chan, celsius, NV10TCL_MODELVIEW0_MATRIX(0), 16);
+		OUT_RINGm(chan, m->m);
+	}
+
+	if (ctx->Light.Enabled ||
+	    (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+		int i, j;
+
+		BEGIN_RING(chan, celsius,
+			   NV10TCL_INVERSE_MODELVIEW0_MATRIX(0), 12);
+		for (i = 0; i < 3; i++)
+			for (j = 0; j < 4; j++)
+				OUT_RINGf(chan, m->inv[4*i + j]);
+	}
+}
+
+void
+nv10_emit_point_parameter(GLcontext *ctx, int emit)
+{
+}
+
+void
+nv10_emit_projection(GLcontext *ctx, int emit)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	GLmatrix m;
+
+	_math_matrix_ctr(&m);
+	get_viewport_scale(ctx, m.m);
+
+	if (nctx->fallback == HWTNL)
+		_math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix);
+
+	BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
+	OUT_RINGm(chan, m.m);
+
+	_math_matrix_dtr(&m);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_context.c b/src/mesa/drivers/dri/nouveau/nv20_context.c
new file mode 100644
index 0000000000..789dcaa6b4
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_context.c
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_class.h"
+#include "nv04_driver.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+
+static const struct dri_extension nv20_extensions[] = {
+	{ "GL_EXT_texture_rectangle",	NULL },
+	{ NULL,				NULL }
+};
+
+static void
+nv20_hwctx_init(GLcontext *ctx)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct nouveau_hw_state *hw = &to_nouveau_context(ctx)->hw;
+	int i;
+
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1);
+	OUT_RING  (chan, hw->ntfy->handle);
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2);
+	OUT_RING  (chan, chan->vram->handle);
+	OUT_RING  (chan, chan->gart->handle);
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2);
+	OUT_RING  (chan, chan->vram->handle);
+	OUT_RING  (chan, chan->vram->handle);
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 2);
+	OUT_RING(chan, chan->vram->handle);
+	OUT_RING(chan, chan->gart->handle);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+	OUT_RING  (chan, 0xfff << 16 | 0x0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+	OUT_RING  (chan, 0xfff << 16 | 0x0);
+
+	for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) {
+		BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+		OUT_RING  (chan, 0);
+		BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
+		OUT_RING  (chan, 0);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, 0x17e0, 3);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 1.0);
+
+	if (context_chipset(ctx) >= 0x25) {
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+		OUT_RING  (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
+	} else {
+		BEGIN_RING(chan, kelvin, 0x1e68, 1);
+		OUT_RING  (chan, 0x4b800000); /* 16777216.000000 */
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+		OUT_RING  (chan, NV20TCL_TX_RCOMP_LEQUAL);
+	}
+
+	BEGIN_RING(chan, kelvin, 0x290, 1);
+	OUT_RING  (chan, 0x10 << 16 | 1);
+	BEGIN_RING(chan, kelvin, 0x9fc, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, 0x1d80, 1);
+	OUT_RING  (chan, 1);
+	BEGIN_RING(chan, kelvin, 0x9f8, 1);
+	OUT_RING  (chan, 4);
+	BEGIN_RING(chan, kelvin, 0x17ec, 3);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 1.0);
+	OUT_RINGf (chan, 0.0);
+
+	if (context_chipset(ctx) >= 0x25) {
+		BEGIN_RING(chan, kelvin, 0x1d88, 1);
+		OUT_RING  (chan, 3);
+
+		BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
+		OUT_RING  (chan, chan->vram->handle);
+		BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
+		OUT_RING  (chan, chan->vram->handle);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, 0x1e98, 1);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, 0x120, 3);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 2);
+
+	if (context_chipset(ctx) >= 0x25) {
+		BEGIN_RING(chan, kelvin, 0x022c, 2);
+		OUT_RING  (chan, 0x280);
+		OUT_RING  (chan, 0x07d28000);
+
+		BEGIN_RING(chan, kelvin, 0x1da4, 1);
+		OUT_RING  (chan, 0);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+	OUT_RING  (chan, 0 << 16 | 0);
+	OUT_RING  (chan, 0 << 16 | 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
+	OUT_RING  (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
+	OUT_RING  (chan, 0);
+
+	for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; i++) {
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1);
+		OUT_RING  (chan, 0);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
+	OUT_RING  (chan, 0x30d410d0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4);
+	OUT_RING  (chan, 0x00000c00);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1);
+	OUT_RING  (chan, 0x00011101);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2);
+	OUT_RING  (chan, 0x130e0300);
+	OUT_RING  (chan, 0x0c091c80);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
+	OUT_RING  (chan, 0x00000c00);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4);
+	OUT_RING  (chan, 0x20c400c0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
+	OUT_RING  (chan, 0x035125a0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0x40002000);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
+	OUT_RING  (chan, 0xffff0000);
+	BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
+	OUT_RING  (chan, NV20TCL_BLEND_FUNC_SRC_ONE);
+	OUT_RING  (chan, NV20TCL_BLEND_FUNC_DST_ZERO);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD);
+	BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7);
+	OUT_RING  (chan, 0xff);
+	OUT_RING  (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0xff);
+	OUT_RING  (chan, NV20TCL_STENCIL_OP_FAIL_KEEP);
+	OUT_RING  (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP);
+	OUT_RING  (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY);
+	BEGIN_RING(chan, kelvin, 0x17cc, 1);
+	OUT_RING  (chan, 0);
+	if (context_chipset(ctx) >= 0x25) {
+		BEGIN_RING(chan, kelvin, 0x1d84, 1);
+		OUT_RING  (chan, 1);
+	}
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL, 1);
+	OUT_RING  (chan, NV20TCL_LIGHT_MODEL_VIEWER_NONLOCAL);
+	BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
+		   NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE);
+	for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; i++) {
+		OUT_RING(chan, 0xffffffff);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1);
+	OUT_RING  (chan, NV20TCL_DEPTH_FUNC_LESS);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+	OUT_RING  (chan, 1);
+	if (context_chipset(ctx) < 0x25) {
+		BEGIN_RING(chan, kelvin, 0x1d84, 1);
+		OUT_RING  (chan, 3);
+	}
+	BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
+	if (context_chipset(ctx) >= 0x25)
+		OUT_RINGf (chan, 1.0);
+	else
+		OUT_RING  (chan, 8);
+
+	if (context_chipset(ctx) >= 0x25) {
+		BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
+		OUT_RING  (chan, 0);
+		BEGIN_RING(chan, kelvin, 0x0a1c, 1);
+		OUT_RING  (chan, 0x800);
+	} else {
+		BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
+		OUT_RING  (chan, 0);
+		OUT_RING  (chan, 0);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1);
+	OUT_RING  (chan, 8);
+	BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+	OUT_RING  (chan, NV20TCL_POLYGON_MODE_FRONT_FILL);
+	OUT_RING  (chan, NV20TCL_POLYGON_MODE_BACK_FILL);
+	BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2);
+	OUT_RING  (chan, NV20TCL_CULL_FACE_BACK);
+	OUT_RING  (chan, NV20TCL_FRONT_FACE_CCW);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1);
+	OUT_RING  (chan, NV20TCL_SHADE_MODEL_SMOOTH);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_MODE_S(0),
+		   4 * NV20TCL_TX_GEN_MODE_S__SIZE);
+	for (i=0; i < 4 * NV20TCL_TX_GEN_MODE_S__SIZE; i++)
+		OUT_RING(chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+	OUT_RINGf (chan, 1.5);
+	OUT_RINGf (chan, -0.090168);
+	OUT_RINGf (chan, 0.0);
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2);
+	OUT_RING  (chan, NV20TCL_FOG_MODE_EXP_SIGNED);
+	OUT_RING  (chan, NV20TCL_FOG_COORD_FOG);
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1);
+	OUT_RING  (chan, NV20TCL_ENGINE_FIXED);
+
+	for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; i++) {
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+		OUT_RING  (chan, 0);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 0.0);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 1.0);
+	for (i = 0; i < 12; i++) {
+		OUT_RINGf(chan, 0.0);
+		OUT_RINGf(chan, 0.0);
+		OUT_RINGf(chan, 0.0);
+		OUT_RINGf(chan, 1.0);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
+	OUT_RING  (chan, 1);
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1);
+	OUT_RING (chan, 0x00010101);
+	BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1);
+	OUT_RING (chan, 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 16777216.0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 16777215.0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 16777215.0 * 0.5);
+	OUT_RINGf (chan, 65535.0);
+
+	FIRE_RING(chan);
+}
+
+static void
+nv20_context_destroy(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	nv04_surface_takedown(ctx);
+	nv20_render_destroy(ctx);
+
+	nouveau_grobj_free(&nctx->hw.eng3d);
+
+	nouveau_context_deinit(ctx);
+	FREE(ctx);
+}
+
+static GLcontext *
+nv20_context_create(struct nouveau_screen *screen, const GLvisual *visual,
+		    GLcontext *share_ctx)
+{
+	struct nouveau_context *nctx;
+	GLcontext *ctx;
+	unsigned kelvin_class;
+	int ret;
+
+	nctx = CALLOC_STRUCT(nouveau_context);
+	if (!nctx)
+		return NULL;
+
+	ctx = &nctx->base;
+
+	if (!nouveau_context_init(ctx, screen, visual, share_ctx))
+		goto fail;
+
+	driInitExtensions(ctx, nv20_extensions, GL_FALSE);
+
+	/* GL constants. */
+	ctx->Const.MaxTextureCoordUnits = NV20_TEXTURE_UNITS;
+	ctx->Const.MaxTextureImageUnits = NV20_TEXTURE_UNITS;
+	ctx->Const.MaxTextureUnits = NV20_TEXTURE_UNITS;
+	ctx->Const.MaxTextureMaxAnisotropy = 8;
+	ctx->Const.MaxTextureLodBias = 15;
+
+	/* 2D engine. */
+	ret = nv04_surface_init(ctx);
+	if (!ret)
+		goto fail;
+
+	/* 3D engine. */
+	if (context_chipset(ctx) >= 0x25)
+		kelvin_class = NV25TCL;
+	else
+		kelvin_class = NV20TCL;
+
+	ret = nouveau_grobj_alloc(context_chan(ctx), 0xbeef0001, kelvin_class,
+				  &nctx->hw.eng3d);
+	if (ret)
+		goto fail;
+
+	nv20_hwctx_init(ctx);
+	nv20_render_init(ctx);
+
+	return ctx;
+
+fail:
+	nv20_context_destroy(ctx);
+	return NULL;
+}
+
+const struct nouveau_driver nv20_driver = {
+	.context_create = nv20_context_create,
+	.context_destroy = nv20_context_destroy,
+	.surface_copy = nv04_surface_copy,
+	.surface_fill = nv04_surface_fill,
+	.emit = (nouveau_state_func[]) {
+		nv10_emit_alpha_func,
+		nv10_emit_blend_color,
+		nv10_emit_blend_equation,
+		nv10_emit_blend_func,
+		nv20_emit_clip_plane,
+		nv20_emit_clip_plane,
+		nv20_emit_clip_plane,
+		nv20_emit_clip_plane,
+		nv20_emit_clip_plane,
+		nv20_emit_clip_plane,
+		nv10_emit_color_mask,
+		nv20_emit_color_material,
+		nv10_emit_cull_face,
+		nv10_emit_front_face,
+		nv10_emit_depth,
+		nv10_emit_dither,
+		nv20_emit_frag,
+		nv20_emit_framebuffer,
+		nv20_emit_fog,
+		nv10_emit_light_enable,
+		nv20_emit_light_model,
+		nv20_emit_light_source,
+		nv20_emit_light_source,
+		nv20_emit_light_source,
+		nv20_emit_light_source,
+		nv20_emit_light_source,
+		nv20_emit_light_source,
+		nv20_emit_light_source,
+		nv20_emit_light_source,
+		nv10_emit_line_stipple,
+		nv10_emit_line_mode,
+		nv20_emit_logic_opcode,
+		nv20_emit_material_ambient,
+		nv20_emit_material_ambient,
+		nv20_emit_material_diffuse,
+		nv20_emit_material_diffuse,
+		nv20_emit_material_specular,
+		nv20_emit_material_specular,
+		nv20_emit_material_shininess,
+		nv20_emit_material_shininess,
+		nv20_emit_modelview,
+		nv20_emit_point_mode,
+		nv10_emit_point_parameter,
+		nv10_emit_polygon_mode,
+		nv10_emit_polygon_offset,
+		nv10_emit_polygon_stipple,
+		nv20_emit_projection,
+		nv10_emit_render_mode,
+		nv10_emit_scissor,
+		nv10_emit_shade_model,
+		nv10_emit_stencil_func,
+		nv10_emit_stencil_mask,
+		nv10_emit_stencil_op,
+		nv20_emit_tex_env,
+		nv20_emit_tex_env,
+		nv20_emit_tex_env,
+		nv20_emit_tex_env,
+		nv20_emit_tex_gen,
+		nv20_emit_tex_gen,
+		nv20_emit_tex_gen,
+		nv20_emit_tex_gen,
+		nv20_emit_tex_mat,
+		nv20_emit_tex_mat,
+		nv20_emit_tex_mat,
+		nv20_emit_tex_mat,
+		nv20_emit_tex_obj,
+		nv20_emit_tex_obj,
+		nv20_emit_tex_obj,
+		nv20_emit_tex_obj,
+		nv20_emit_viewport,
+		nv20_emit_tex_shader
+	},
+	.num_emit = NUM_NV20_STATE,
+};
diff --git a/src/mesa/drivers/dri/nouveau/nv20_driver.h b/src/mesa/drivers/dri/nouveau/nv20_driver.h
new file mode 100644
index 0000000000..8adecef2c4
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_driver.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NV20_DRIVER_H__
+#define __NV20_DRIVER_H__
+
+enum {
+	NOUVEAU_STATE_TEX_SHADER = NUM_NOUVEAU_STATE,
+	NUM_NV20_STATE
+};
+
+#define NV20_TEXTURE_UNITS 4
+
+/* nv20_context.c */
+extern const struct nouveau_driver nv20_driver;
+
+/* nv20_render.c */
+void
+nv20_render_init(GLcontext *ctx);
+
+void
+nv20_render_destroy(GLcontext *ctx);
+
+/* nv20_state_fb.c */
+void
+nv20_emit_framebuffer(GLcontext *ctx, int emit);
+
+void
+nv20_emit_viewport(GLcontext *ctx, int emit);
+
+/* nv20_state_polygon.c */
+void
+nv20_emit_point_mode(GLcontext *ctx, int emit);
+
+/* nv20_state_raster.c */
+void
+nv20_emit_logic_opcode(GLcontext *ctx, int emit);
+
+/* nv20_state_frag.c */
+void
+nv20_emit_tex_env(GLcontext *ctx, int emit);
+
+void
+nv20_emit_frag(GLcontext *ctx, int emit);
+
+/* nv20_state_tex.c */
+void
+nv20_emit_tex_gen(GLcontext *ctx, int emit);
+
+void
+nv20_emit_tex_mat(GLcontext *ctx, int emit);
+
+void
+nv20_emit_tex_obj(GLcontext *ctx, int emit);
+
+void
+nv20_emit_tex_shader(GLcontext *ctx, int emit);
+
+/* nv20_state_tnl.c */
+void
+nv20_emit_clip_plane(GLcontext *ctx, int emit);
+
+void
+nv20_emit_color_material(GLcontext *ctx, int emit);
+
+void
+nv20_emit_fog(GLcontext *ctx, int emit);
+
+void
+nv20_emit_light_model(GLcontext *ctx, int emit);
+
+void
+nv20_emit_light_source(GLcontext *ctx, int emit);
+
+void
+nv20_emit_material_ambient(GLcontext *ctx, int emit);
+
+void
+nv20_emit_material_diffuse(GLcontext *ctx, int emit);
+
+void
+nv20_emit_material_specular(GLcontext *ctx, int emit);
+
+void
+nv20_emit_material_shininess(GLcontext *ctx, int emit);
+
+void
+nv20_emit_modelview(GLcontext *ctx, int emit);
+
+void
+nv20_emit_projection(GLcontext *ctx, int emit);
+
+#endif
diff --git a/src/mesa/drivers/dri/nouveau/nv20_render.c b/src/mesa/drivers/dri/nouveau/nv20_render.c
new file mode 100644
index 0000000000..a696ac107f
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_render.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_class.h"
+#include "nv20_driver.h"
+
+#define NUM_VERTEX_ATTRS 16
+
+static void
+nv20_emit_material(GLcontext *ctx, struct nouveau_array_state *a,
+		   const void *v);
+
+/* Vertex attribute format. */
+static struct nouveau_attr_info nv20_vertex_attrs[VERT_ATTRIB_MAX] = {
+	[VERT_ATTRIB_POS] = {
+		.vbo_index = 0,
+		.imm_method = NV20TCL_VERTEX_POS_4F_X,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_NORMAL] = {
+		.vbo_index = 2,
+		.imm_method = NV20TCL_VERTEX_NOR_3F_X,
+		.imm_fields = 3,
+	},
+	[VERT_ATTRIB_COLOR0] = {
+		.vbo_index = 3,
+		.imm_method = NV20TCL_VERTEX_COL_4F_X,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_COLOR1] = {
+		.vbo_index = 4,
+		.imm_method = NV20TCL_VERTEX_COL2_3F_X,
+		.imm_fields = 3,
+	},
+	[VERT_ATTRIB_FOG] = {
+		.vbo_index = 5,
+		.imm_method = NV20TCL_VERTEX_FOG_1F,
+		.imm_fields = 1,
+	},
+	[VERT_ATTRIB_TEX0] = {
+		.vbo_index = 9,
+		.imm_method = NV20TCL_VERTEX_TX0_4F_S,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_TEX1] = {
+		.vbo_index = 10,
+		.imm_method = NV20TCL_VERTEX_TX1_4F_S,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_TEX2] = {
+		.vbo_index = 11,
+		.imm_method = NV20TCL_VERTEX_TX2_4F_S,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_TEX3] = {
+		.vbo_index = 12,
+		.imm_method = NV20TCL_VERTEX_TX3_4F_S,
+		.imm_fields = 4,
+	},
+	[VERT_ATTRIB_GENERIC0] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC1] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC2] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC3] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC4] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC5] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC6] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC7] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC8] = {
+		.emit = nv20_emit_material,
+	},
+	[VERT_ATTRIB_GENERIC9] = {
+		.emit = nv20_emit_material,
+	},
+};
+
+static int
+get_hw_format(int type)
+{
+	switch (type) {
+	case GL_FLOAT:
+		return NV20TCL_VTXFMT_TYPE_FLOAT;
+	case GL_UNSIGNED_SHORT:
+		return NV20TCL_VTXFMT_TYPE_USHORT;
+	case GL_UNSIGNED_BYTE:
+		return NV20TCL_VTXFMT_TYPE_UBYTE;
+	default:
+		assert(0);
+	}
+}
+
+static void
+nv20_render_set_format(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	int i, hw_format;
+
+	for (i = 0; i < NUM_VERTEX_ATTRS; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0) {
+			struct nouveau_array_state *a = &render->attrs[attr];
+
+			hw_format = a->stride << 8 |
+				a->fields << 4 |
+				get_hw_format(a->type);
+
+		} else {
+			/* Unused attribute. */
+			hw_format = NV10TCL_VTXFMT_TYPE_FLOAT;
+		}
+
+		BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i), 1);
+		OUT_RING(chan, hw_format);
+	}
+}
+
+static void
+nv20_render_bind_vertices(GLcontext *ctx)
+{
+	struct nouveau_render_state *render = to_render_state(ctx);
+	struct nouveau_bo_context *bctx = context_bctx(ctx, VERTEX);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	int i;
+
+	for (i = 0; i < NUM_VERTEX_ATTRS; i++) {
+		int attr = render->map[i];
+
+		if (attr >= 0) {
+			struct nouveau_array_state *a = &render->attrs[attr];
+
+			nouveau_bo_mark(bctx, kelvin,
+					NV20TCL_VTXBUF_ADDRESS(i),
+					a->bo, a->offset, 0,
+					0, NV20TCL_VTXBUF_ADDRESS_DMA1,
+					NOUVEAU_BO_LOW | NOUVEAU_BO_OR |
+					NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+		}
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VTX_CACHE_INVALIDATE, 1);
+	OUT_RING(chan, 0);
+}
+
+/* Vertex array rendering defs. */
+#define RENDER_LOCALS(ctx)					\
+	struct nouveau_grobj *kelvin = context_eng3d(ctx)
+
+#define BATCH_BEGIN(prim)					\
+	BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);	\
+	OUT_RING(chan, prim);
+#define BATCH_END()						\
+	BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);	\
+	OUT_RING(chan, 0);
+
+#define MAX_PACKET 0x400
+
+#define MAX_OUT_L 0x100
+#define BATCH_PACKET_L(n)						\
+	BEGIN_RING_NI(chan, kelvin, NV20TCL_VB_VERTEX_BATCH, n);
+#define BATCH_OUT_L(i, n)			\
+	OUT_RING(chan, ((n) - 1) << 24 | (i));
+
+#define MAX_OUT_I16 0x2
+#define BATCH_PACKET_I16(n)					\
+	BEGIN_RING_NI(chan, kelvin, NV20TCL_VB_ELEMENT_U16, n);
+#define BATCH_OUT_I16(i0, i1)			\
+	OUT_RING(chan, (i1) << 16 | (i0));
+
+#define MAX_OUT_I32 0x1
+#define BATCH_PACKET_I32(n)					\
+	BEGIN_RING_NI(chan, kelvin, NV20TCL_VB_ELEMENT_U32, n);
+#define BATCH_OUT_I32(i)			\
+	OUT_RING(chan, i);
+
+#define IMM_PACKET(m, n)			\
+	BEGIN_RING(chan, kelvin, m, n);
+#define IMM_OUT(x)				\
+	OUT_RINGf(chan, x);
+
+#define TAG(x) nv20_##x
+#include "nouveau_render_t.c"
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
new file mode 100644
index 0000000000..21da4f7af1
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_fbo.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv20_driver.h"
+
+static inline unsigned
+get_rt_format(gl_format format)
+{
+	switch (format) {
+	case MESA_FORMAT_XRGB8888:
+		return 0x05;
+	case MESA_FORMAT_ARGB8888:
+		return 0x08;
+	case MESA_FORMAT_RGB565:
+		return 0x03;
+	case MESA_FORMAT_Z16:
+		return 0x10;
+	case MESA_FORMAT_Z24_S8:
+		return 0x20;
+	default:
+		assert(0);
+	}
+}
+
+void
+nv20_emit_framebuffer(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct nouveau_bo_context *bctx = context_bctx(ctx, FRAMEBUFFER);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	struct nouveau_surface *s;
+	unsigned rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR;
+	unsigned rt_pitch = 0, zeta_pitch = 0;
+	unsigned bo_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
+	if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT)
+		return;
+
+	/* Render target */
+	if (fb->_ColorDrawBuffers[0]) {
+		s = &to_nouveau_renderbuffer(
+			fb->_ColorDrawBuffers[0])->surface;
+
+		rt_format |= get_rt_format(s->format);
+		zeta_pitch = rt_pitch = s->pitch;
+
+		nouveau_bo_markl(bctx, kelvin, NV20TCL_COLOR_OFFSET,
+				 s->bo, 0, bo_flags);
+	}
+
+	/* depth/stencil */
+	if (fb->_DepthBuffer) {
+		s = &to_nouveau_renderbuffer(
+			fb->_DepthBuffer->Wrapped)->surface;
+
+		rt_format |= get_rt_format(s->format);
+		zeta_pitch = s->pitch;
+
+		nouveau_bo_markl(bctx, kelvin, NV20TCL_ZETA_OFFSET,
+				 s->bo, 0, bo_flags);
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_RT_FORMAT, 2);
+	OUT_RING(chan, rt_format);
+	OUT_RING(chan, zeta_pitch << 16 | rt_pitch);
+
+	/* Recompute the viewport/scissor state. */
+	context_dirty(ctx, VIEWPORT);
+	context_dirty(ctx, SCISSOR);
+}
+
+void
+nv20_emit_viewport(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	float a[4] = {};
+
+	get_viewport_translate(ctx, a);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+	OUT_RINGp(chan, a, 4);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+	OUT_RING(chan, (fb->Width - 1) << 16);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+	OUT_RING(chan, (fb->Height - 1) << 16);
+
+	context_dirty(ctx, PROJECTION);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_frag.c b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
new file mode 100644
index 0000000000..74803d2ae8
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_frag.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_class.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+
+void
+nv20_emit_tex_env(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_ENV0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	uint32_t a_in, a_out, c_in, c_out, k;
+
+	nv10_get_general_combiner(ctx, i, &a_in, &a_out, &c_in, &c_out, &k);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(i), 1);
+	OUT_RING(chan, a_in);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(i), 1);
+	OUT_RING(chan, a_out);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(i), 1);
+	OUT_RING(chan, c_in);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(i), 1);
+	OUT_RING(chan, c_out);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(i), 1);
+	OUT_RING(chan, k);
+
+	context_dirty(ctx, FRAG);
+}
+
+void
+nv20_emit_frag(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	uint64_t in;
+	int n;
+
+	nv10_get_final_combiner(ctx, &in, &n);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2);
+	OUT_RING(chan, in);
+	OUT_RING(chan, in >> 32);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1);
+	OUT_RING(chan, n);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_polygon.c b/src/mesa/drivers/dri/nouveau/nv20_state_polygon.c
new file mode 100644
index 0000000000..3a320e2dac
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_polygon.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_class.h"
+#include "nv20_driver.h"
+
+void
+nv20_emit_point_mode(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
+	if (context_chipset(ctx) >= 0x25)
+		OUT_RINGf(chan, ctx->Point.Size);
+	else
+		OUT_RING(chan, (uint32_t)(ctx->Point.Size * 8));
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_raster.c b/src/mesa/drivers/dri/nouveau/nv20_state_raster.c
new file mode 100644
index 0000000000..b43b29bb23
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_raster.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_class.h"
+#include "nv20_driver.h"
+
+void
+nv20_emit_logic_opcode(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+	OUT_RING(chan, ctx->Color.ColorLogicOpEnabled ? 1 : 0);
+	OUT_RING(chan, nvgl_logicop_func(ctx->Color.LogicOp));
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
new file mode 100644
index 0000000000..e46118e4fc
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_texture.h"
+#include "nouveau_class.h"
+#include "nouveau_util.h"
+#include "nv20_driver.h"
+
+#define TX_GEN_MODE(i, j) (NV20TCL_TX_GEN_MODE_S(i) + 4 * (j))
+#define TX_GEN_COEFF(i, j) (NV20TCL_TX_GEN_COEFF_S_A(i) + 16 * (j))
+#define TX_MATRIX(i) (NV20TCL_TX0_MATRIX(0) + 64 * (i))
+
+void
+nv20_emit_tex_gen(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_GEN0;
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+	int j;
+
+	for (j = 0; j < 4; j++) {
+		if (nctx->fallback == HWTNL && (unit->TexGenEnabled & 1 << j)) {
+			struct gl_texgen *coord = get_texgen_coord(unit, j);
+			float *k = get_texgen_coeff(coord);
+
+			if (k) {
+				BEGIN_RING(chan, kelvin, TX_GEN_COEFF(i, j), 4);
+				OUT_RINGp(chan, k, 4);
+			}
+
+			BEGIN_RING(chan, kelvin, TX_GEN_MODE(i, j), 1);
+			OUT_RING(chan, nvgl_texgen_mode(coord->Mode));
+
+		} else {
+			BEGIN_RING(chan, kelvin, TX_GEN_MODE(i, j), 1);
+			OUT_RING(chan, 0);
+		}
+	}
+}
+
+void
+nv20_emit_tex_mat(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_MAT0;
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+
+	if (nctx->fallback == HWTNL &&
+	    (ctx->Texture._TexMatEnabled & 1 << i)) {
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+		OUT_RING(chan, 1);
+
+		BEGIN_RING(chan, kelvin, TX_MATRIX(i), 16);
+		OUT_RINGm(chan, ctx->TextureMatrixStack[i].Top->m);
+
+	} else {
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+		OUT_RING(chan, 0);
+	}
+}
+
+static uint32_t
+get_tex_format_pot(struct gl_texture_image *ti)
+{
+	switch (ti->TexFormat) {
+	case MESA_FORMAT_ARGB8888:
+		return NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8;
+
+	case MESA_FORMAT_ARGB1555:
+		return NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5;
+
+	case MESA_FORMAT_ARGB4444:
+		return NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4;
+
+	case MESA_FORMAT_XRGB8888:
+		return NV20TCL_TX_FORMAT_FORMAT_X8R8G8B8;
+
+	case MESA_FORMAT_RGB565:
+		return NV20TCL_TX_FORMAT_FORMAT_R5G6B5;
+
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_I8:
+		return NV20TCL_TX_FORMAT_FORMAT_A8;
+
+	case MESA_FORMAT_L8:
+		return NV20TCL_TX_FORMAT_FORMAT_L8;
+
+	case MESA_FORMAT_CI8:
+		return NV20TCL_TX_FORMAT_FORMAT_INDEX8;
+
+	default:
+		assert(0);
+	}
+}
+
+static uint32_t
+get_tex_format_rect(struct gl_texture_image *ti)
+{
+	switch (ti->TexFormat) {
+	case MESA_FORMAT_ARGB8888:
+		return NV20TCL_TX_FORMAT_FORMAT_A8R8G8B8_RECT;
+
+	case MESA_FORMAT_ARGB1555:
+		return NV20TCL_TX_FORMAT_FORMAT_A1R5G5B5_RECT;
+
+	case MESA_FORMAT_ARGB4444:
+		return NV20TCL_TX_FORMAT_FORMAT_A4R4G4B4_RECT;
+
+	case MESA_FORMAT_XRGB8888:
+		return NV20TCL_TX_FORMAT_FORMAT_R8G8B8_RECT;
+
+	case MESA_FORMAT_RGB565:
+		return NV20TCL_TX_FORMAT_FORMAT_R5G6B5_RECT;
+
+	case MESA_FORMAT_L8:
+		return NV20TCL_TX_FORMAT_FORMAT_L8_RECT;
+
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_I8:
+		return NV20TCL_TX_FORMAT_FORMAT_A8_RECT;
+
+	default:
+		assert(0);
+	}
+}
+
+void
+nv20_emit_tex_obj(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_TEX_OBJ0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct nouveau_bo_context *bctx = context_bctx_i(ctx, TEXTURE, i);
+	const int bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART | NOUVEAU_BO_VRAM;
+	struct gl_texture_object *t;
+	struct nouveau_surface *s;
+	struct gl_texture_image *ti;
+	uint32_t tx_format, tx_filter, tx_wrap, tx_enable;
+
+	if (!ctx->Texture.Unit[i]._ReallyEnabled) {
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1);
+		OUT_RING(chan, 0);
+
+		context_dirty(ctx, TEX_SHADER);
+		return;
+	}
+
+	t = ctx->Texture.Unit[i]._Current;
+	s = &to_nouveau_texture(t)->surfaces[t->BaseLevel];
+	ti = t->Image[0][t->BaseLevel];
+
+	if (!nouveau_texture_validate(ctx, t))
+		return;
+
+	/* Recompute the texturing registers. */
+	tx_format = ti->DepthLog2 << 28
+		| ti->HeightLog2 << 24
+		| ti->WidthLog2 << 20
+		| NV20TCL_TX_FORMAT_DIMS_2D
+		| NV20TCL_TX_FORMAT_NO_BORDER
+		| 1 << 16;
+
+	tx_wrap = nvgl_wrap_mode(t->WrapR) << 16
+		| nvgl_wrap_mode(t->WrapT) << 8
+		| nvgl_wrap_mode(t->WrapS) << 0;
+
+	tx_filter = nvgl_filter_mode(t->MagFilter) << 24
+		| nvgl_filter_mode(t->MinFilter) << 16;
+
+	tx_enable = NV20TCL_TX_ENABLE_ENABLE
+		| log2i(t->MaxAnisotropy) << 4;
+
+	if (t->Target == GL_TEXTURE_RECTANGLE) {
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_NPOT_PITCH(i), 1);
+		OUT_RING(chan, s->pitch << 16);
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_NPOT_SIZE(i), 1);
+		OUT_RING(chan, s->width << 16 | s->height);
+
+		tx_format |= get_tex_format_rect(ti);
+	} else {
+		tx_format |= get_tex_format_pot(ti);
+	}
+
+	if (t->MinFilter != GL_NEAREST &&
+	    t->MinFilter != GL_LINEAR) {
+		int lod_min = t->MinLod;
+		int lod_max = MIN2(t->MaxLod, t->_MaxLambda);
+		int lod_bias = t->LodBias
+			+ ctx->Texture.Unit[i].LodBias;
+
+		lod_max = CLAMP(lod_max, 0, 15);
+		lod_min = CLAMP(lod_min, 0, 15);
+		lod_bias = CLAMP(lod_bias, 0, 15);
+
+		tx_format |= NV20TCL_TX_FORMAT_MIPMAP;
+		tx_filter |= lod_bias << 8;
+		tx_enable |= lod_min << 26
+			| lod_max << 14;
+	}
+
+	/* Write it to the hardware. */
+	nouveau_bo_mark(bctx, kelvin, NV20TCL_TX_FORMAT(i),
+			s->bo, tx_format, 0,
+			NV20TCL_TX_FORMAT_DMA0,
+			NV20TCL_TX_FORMAT_DMA1,
+			bo_flags | NOUVEAU_BO_OR);
+
+	nouveau_bo_markl(bctx, kelvin, NV20TCL_TX_OFFSET(i),
+			 s->bo, 0, bo_flags);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_WRAP(i), 1);
+	OUT_RING(chan, tx_wrap);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_FILTER(i), 1);
+	OUT_RING(chan, tx_filter);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1);
+	OUT_RING(chan, tx_enable);
+
+	context_dirty(ctx, TEX_SHADER);
+}
+
+void
+nv20_emit_tex_shader(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	uint32_t tx_shader_op = 0;
+	int i;
+
+	for (i = 0; i < NV20_TEXTURE_UNITS; i++) {
+		if (!ctx->Texture.Unit[i]._ReallyEnabled)
+			continue;
+
+		tx_shader_op |= NV20TCL_TX_SHADER_OP_TX0_TEXTURE_2D << 5 * i;
+	}
+
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1);
+	OUT_RING(chan, tx_shader_op);
+}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c
new file mode 100644
index 0000000000..62efe80fe4
--- /dev/null
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2009-2010 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "nouveau_driver.h"
+#include "nouveau_context.h"
+#include "nouveau_gldefs.h"
+#include "nouveau_util.h"
+#include "nouveau_class.h"
+#include "nv10_driver.h"
+#include "nv20_driver.h"
+
+void
+nv20_emit_clip_plane(GLcontext *ctx, int emit)
+{
+}
+
+static inline unsigned
+get_material_bitmask(unsigned m)
+{
+	unsigned ret = 0;
+
+	if (m & MAT_BIT_FRONT_EMISSION)
+		ret |= NV20TCL_COLOR_MATERIAL_FRONT_EMISSION_COL1;
+	if (m & MAT_BIT_FRONT_AMBIENT)
+		ret |= NV20TCL_COLOR_MATERIAL_FRONT_AMBIENT_COL1;
+	if (m & MAT_BIT_FRONT_DIFFUSE)
+		ret |= NV20TCL_COLOR_MATERIAL_FRONT_DIFFUSE_COL1;
+	if (m & MAT_BIT_FRONT_SPECULAR)
+		ret |= NV20TCL_COLOR_MATERIAL_FRONT_SPECULAR_COL1;
+
+	if (m & MAT_BIT_BACK_EMISSION)
+		ret |= NV20TCL_COLOR_MATERIAL_BACK_EMISSION_COL1;
+	if (m & MAT_BIT_BACK_AMBIENT)
+		ret |= NV20TCL_COLOR_MATERIAL_BACK_AMBIENT_COL1;
+	if (m & MAT_BIT_BACK_DIFFUSE)
+		ret |= NV20TCL_COLOR_MATERIAL_BACK_DIFFUSE_COL1;
+	if (m & MAT_BIT_BACK_SPECULAR)
+		ret |= NV20TCL_COLOR_MATERIAL_BACK_SPECULAR_COL1;
+
+	return ret;
+}
+
+void
+nv20_emit_color_material(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	unsigned mask = get_material_bitmask(ctx->Light.ColorMaterialBitmask);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MATERIAL, 1);
+	OUT_RING(chan, ctx->Light.ColorMaterialEnabled ? mask : 0);
+}
+
+static unsigned
+get_fog_mode_signed(unsigned mode)
+{
+	switch (mode) {
+	case GL_LINEAR:
+		return NV20TCL_FOG_MODE_LINEAR_SIGNED;
+	case GL_EXP:
+		return NV20TCL_FOG_MODE_EXP_SIGNED;
+	case GL_EXP2:
+		return NV20TCL_FOG_MODE_EXP2_SIGNED;
+	default:
+		assert(0);
+	}
+}
+
+static unsigned
+get_fog_mode_unsigned(unsigned mode)
+{
+	switch (mode) {
+	case GL_LINEAR:
+		return NV20TCL_FOG_MODE_LINEAR_UNSIGNED;
+	case GL_EXP:
+		return NV20TCL_FOG_MODE_EXP_UNSIGNED;
+	case GL_EXP2:
+		return NV20TCL_FOG_MODE_EXP2_UNSIGNED;
+	default:
+		assert(0);
+	}
+}
+
+static unsigned
+get_fog_source(unsigned source)
+{
+	switch (source) {
+	case GL_FOG_COORDINATE_EXT:
+		return NV20TCL_FOG_COORD_FOG;
+	case GL_FRAGMENT_DEPTH_EXT:
+		return NV20TCL_FOG_COORD_DIST_ORTHOGONAL_ABS;
+	default:
+		assert(0);
+	}
+}
+
+void
+nv20_emit_fog(GLcontext *ctx, int emit)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct gl_fog_attrib *f = &ctx->Fog;
+	unsigned source = nctx->fallback == HWTNL ?
+		f->FogCoordinateSource : GL_FOG_COORDINATE_EXT;
+	float k[3];
+
+	nv10_get_fog_coeff(ctx, k);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 4);
+	OUT_RING(chan, (source == GL_FOG_COORDINATE_EXT ?
+			get_fog_mode_signed(f->Mode) :
+			get_fog_mode_unsigned(f->Mode)));
+	OUT_RING(chan, get_fog_source(source));
+	OUT_RING(chan, f->Enabled ? 1 : 0);
+	OUT_RING(chan, pack_rgba_f(MESA_FORMAT_RGBA8888_REV, f->Color));
+
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+	OUT_RINGp(chan, k, 3);
+}
+
+void
+nv20_emit_light_model(GLcontext *ctx, int emit)
+{
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct gl_lightmodel *m = &ctx->Light.Model;
+
+	BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+	OUT_RING(chan, m->ColorControl == GL_SEPARATE_SPECULAR_COLOR ? 1 : 0);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL, 1);
+	OUT_RING(chan, ((m->LocalViewer ?
+			 NV20TCL_LIGHT_MODEL_VIEWER_LOCAL :
+			 NV20TCL_LIGHT_MODEL_VIEWER_NONLOCAL) |
+			(NEED_SECONDARY_COLOR(ctx) ?
+			 NV20TCL_LIGHT_MODEL_SEPARATE_SPECULAR :
+			 0)));
+
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+	OUT_RING(chan, ctx->Light.Model.TwoSide ? 1 : 0);
+}
+
+void
+nv20_emit_light_source(GLcontext *ctx, int emit)
+{
+	const int i = emit - NOUVEAU_STATE_LIGHT_SOURCE0;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct gl_light *l = &ctx->Light.Light[i];
+
+	if (l->_Flags & LIGHT_POSITIONAL) {
+		BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_POSITION_X(i), 3);
+		OUT_RINGp(chan, l->_Position, 3);
+
+		BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_ATTENUATION_CONSTANT(i), 3);
+		OUT_RINGf(chan, l->ConstantAttenuation);
+		OUT_RINGf(chan, l->LinearAttenuation);
+		OUT_RINGf(chan, l->QuadraticAttenuation);
+
+	} else {
+		BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_DIRECTION_X(i), 3);
+		OUT_RINGp(chan, l->_VP_inf_norm, 3);
+
+		BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_HALF_VECTOR_X(i), 3);
+		OUT_RINGp(chan, l->_h_inf_norm, 3);
+	}
+
+	if (l->_Flags & LIGHT_SPOT) {
+		float k[7];
+
+		nv10_get_spot_coeff(l, k);
+
+		BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_SPOT_CUTOFF_A(i), 7);
+		OUT_RINGp(chan, k, 7);
+	}
+}
+
+#define USE_COLOR_MATERIAL(attr, side)					\
+	(ctx->Light.ColorMaterialEnabled &&				\
+	 ctx->Light.ColorMaterialBitmask & (1 << MAT_ATTRIB_##attr(side)))
+
+void
+nv20_emit_material_ambient(GLcontext *ctx, int emit)
+{
+	const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_AMBIENT;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	float (*mat)[4] = ctx->Light.Material.Attrib;
+	uint32_t m_scene[] = { NV20TCL_LIGHT_MODEL_FRONT_AMBIENT_R,
+			       NV20TCL_LIGHT_MODEL_BACK_AMBIENT_R };
+	uint32_t m_factor[] = { NV20TCL_MATERIAL_FACTOR_FRONT_R,
+			      NV20TCL_MATERIAL_FACTOR_BACK_R };
+	float c_scene[3], c_factor[3];
+	struct gl_light *l;
+
+	if (USE_COLOR_MATERIAL(AMBIENT, side)) {
+		COPY_3V(c_scene, mat[MAT_ATTRIB_EMISSION(side)]);
+		COPY_3V(c_factor, ctx->Light.Model.Ambient);
+
+	} else if (USE_COLOR_MATERIAL(EMISSION, side)) {
+		SCALE_3V(c_scene, mat[MAT_ATTRIB_AMBIENT(side)],
+			 ctx->Light.Model.Ambient);
+		ASSIGN_3V(c_factor, 1, 1, 1);
+
+	} else {
+		COPY_3V(c_scene, ctx->Light._BaseColor[side]);
+		ZERO_3V(c_factor);
+	}
+
+	BEGIN_RING(chan, kelvin, m_scene[side], 3);
+	OUT_RINGp(chan, c_scene, 3);
+
+	if (ctx->Light.ColorMaterialEnabled) {
+		BEGIN_RING(chan, kelvin, m_factor[side], 3);
+		OUT_RINGp(chan, c_factor, 3);
+	}
+
+	foreach(l, &ctx->Light.EnabledList) {
+		const int i = l - ctx->Light.Light;
+		uint32_t m_light[] = { NV20TCL_LIGHT_FRONT_AMBIENT_R(i),
+				      NV20TCL_LIGHT_BACK_AMBIENT_R(i) };
+		float *c_light = (USE_COLOR_MATERIAL(AMBIENT, side) ?
+				  l->Ambient :
+				  l->_MatAmbient[side]);
+
+		BEGIN_RING(chan, kelvin, m_light[side], 3);
+		OUT_RINGp(chan, c_light, 3);
+	}
+}
+
+void
+nv20_emit_material_diffuse(GLcontext *ctx, int emit)
+{
+	const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_DIFFUSE;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+	uint32_t m_factor[] = { NV20TCL_MATERIAL_FACTOR_FRONT_A,
+				NV20TCL_MATERIAL_FACTOR_BACK_A };
+	struct gl_light *l;
+
+	BEGIN_RING(chan, kelvin, m_factor[side], 1);
+	OUT_RINGf(chan, mat[MAT_ATTRIB_DIFFUSE(side)][3]);
+
+	foreach(l, &ctx->Light.EnabledList) {
+		const int i = l - ctx->Light.Light;
+		uint32_t m_light[] = { NV20TCL_LIGHT_FRONT_DIFFUSE_R(i),
+				       NV20TCL_LIGHT_BACK_DIFFUSE_R(i) };
+		float *c_light = (USE_COLOR_MATERIAL(DIFFUSE, side) ?
+				  l->Diffuse :
+				  l->_MatDiffuse[side]);
+
+		BEGIN_RING(chan, kelvin, m_light[side], 3);
+		OUT_RINGp(chan, c_light, 3);
+	}
+}
+
+void
+nv20_emit_material_specular(GLcontext *ctx, int emit)
+{
+	const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_SPECULAR;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	struct gl_light *l;
+
+	foreach(l, &ctx->Light.EnabledList) {
+		const int i = l - ctx->Light.Light;
+		uint32_t m_light[] = { NV20TCL_LIGHT_FRONT_SPECULAR_R(i),
+				       NV20TCL_LIGHT_BACK_SPECULAR_R(i) };
+		float *c_light = (USE_COLOR_MATERIAL(SPECULAR, side) ?
+				  l->Specular :
+				  l->_MatSpecular[side]);
+
+		BEGIN_RING(chan, kelvin, m_light[side], 3);
+		OUT_RINGp(chan, c_light, 3);
+	}
+}
+
+void
+nv20_emit_material_shininess(GLcontext *ctx, int emit)
+{
+	const int side = emit - NOUVEAU_STATE_MATERIAL_FRONT_SHININESS;
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	float (*mat)[4] = ctx->Light.Material.Attrib;
+	uint32_t mthd[] = { NV20TCL_FRONT_MATERIAL_SHININESS(0),
+			    NV20TCL_BACK_MATERIAL_SHININESS(0) };
+	float k[6];
+
+	nv10_get_shininess_coeff(
+		CLAMP(mat[MAT_ATTRIB_SHININESS(side)][0], 0, 1024),
+		k);
+
+	BEGIN_RING(chan, kelvin, mthd[side], 6);
+	OUT_RINGp(chan, k, 6);
+}
+
+void
+nv20_emit_modelview(GLcontext *ctx, int emit)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	GLmatrix *m = ctx->ModelviewMatrixStack.Top;
+
+	if (nctx->fallback != HWTNL)
+		return;
+
+	if (ctx->Light._NeedEyeCoords || ctx->Fog.Enabled ||
+	    (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+		BEGIN_RING(chan, kelvin, NV20TCL_MODELVIEW0_MATRIX(0), 16);
+		OUT_RINGm(chan, m->m);
+	}
+
+	if (ctx->Light.Enabled ||
+	    (ctx->Texture._GenFlags & TEXGEN_NEED_EYE_COORD)) {
+		int i, j;
+
+		BEGIN_RING(chan, kelvin,
+			   NV20TCL_INVERSE_MODELVIEW0_MATRIX(0), 12);
+		for (i = 0; i < 3; i++)
+			for (j = 0; j < 4; j++)
+				OUT_RINGf(chan, m->inv[4*i + j]);
+	}
+}
+
+void
+nv20_emit_projection(GLcontext *ctx, int emit)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct nouveau_channel *chan = context_chan(ctx);
+	struct nouveau_grobj *kelvin = context_eng3d(ctx);
+	GLmatrix m;
+
+	_math_matrix_ctr(&m);
+	get_viewport_scale(ctx, m.m);
+
+	if (nctx->fallback == HWTNL)
+		_math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
+	OUT_RINGm(chan, m.m);
+
+	_math_matrix_dtr(&m);
+}
diff --git a/src/mesa/drivers/dri/r128/Makefile b/src/mesa/drivers/dri/r128/Makefile
new file mode 100644
index 0000000000..8144c9b43f
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/Makefile
@@ -0,0 +1,29 @@
+# src/mesa/drivers/dri/r128/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r128_dri.so
+
+DRIVER_SOURCES = \
+	r128_context.c \
+	r128_lock.c \
+	r128_state.c \
+	r128_texstate.c \
+	r128_dd.c \
+	r128_screen.c \
+	r128_tex.c \
+	r128_tris.c \
+	r128_ioctl.c \
+	r128_span.c \
+	r128_texmem.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES) 
+
+ASM_SOURCES = 
+
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/r128/r128_context.c b/src/mesa/drivers/dri/r128/r128_context.c
new file mode 100644
index 0000000000..7860708383
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_context.c
@@ -0,0 +1,370 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "r128_context.h"
+#include "r128_ioctl.h"
+#include "r128_dd.h"
+#include "r128_state.h"
+#include "r128_span.h"
+#include "r128_tex.h"
+#include "r128_tris.h"
+
+#include "vblank.h"
+#include "utils.h"
+#include "texmem.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+#ifndef R128_DEBUG
+int R128_DEBUG = 0;
+#endif
+
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+#include "main/remap_helper.h"
+
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_EXT_blend_subtract",             GL_EXT_blend_minmax_functions },
+    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_NV_blend_square",                NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "ioctl", DEBUG_VERBOSE_IOCTL },
+    { "verb",  DEBUG_VERBOSE_MSG },
+    { "dri",   DEBUG_VERBOSE_DRI },
+    { "2d",    DEBUG_VERBOSE_2D },
+    { "sync",  DEBUG_ALWAYS_SYNC },
+    { "api",   DEBUG_VERBOSE_API },
+    { "fall",  DEBUG_VERBOSE_FALL },
+    { NULL,    0 }
+};
+
+/* Create the device specific context.
+ */
+GLboolean r128CreateContext( gl_api api,
+			     const __GLcontextModes *glVisual,
+			     __DRIcontext *driContextPriv,
+                             void *sharedContextPrivate )
+{
+   GLcontext *ctx, *shareCtx;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct dd_function_table functions;
+   r128ContextPtr rmesa;
+   r128ScreenPtr r128scrn;
+   int i;
+
+   /* Allocate the r128 context */
+   rmesa = (r128ContextPtr) CALLOC( sizeof(*rmesa) );
+   if ( !rmesa )
+      return GL_FALSE;
+
+   /* Init default driver functions then plug in our Radeon-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   r128InitDriverFuncs( &functions );
+   r128InitIoctlFuncs( &functions );
+   r128InitTextureFuncs( &functions );
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((r128ContextPtr) sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+   rmesa->glCtx = _mesa_create_context(glVisual, shareCtx,
+                                       &functions, (void *) rmesa);
+   if (!rmesa->glCtx) {
+      FREE(rmesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = rmesa;
+   ctx = rmesa->glCtx;
+
+   rmesa->driContext = driContextPriv;
+   rmesa->driScreen = sPriv;
+   rmesa->driDrawable = NULL;
+   rmesa->hHWContext = driContextPriv->hHWContext;
+   rmesa->driHwLock = &sPriv->pSAREA->lock;
+   rmesa->driFd = sPriv->fd;
+
+   r128scrn = rmesa->r128Screen = (r128ScreenPtr)(sPriv->private);
+
+   /* Parse configuration files */
+   driParseConfigFiles (&rmesa->optionCache, &r128scrn->optionCache,
+                        r128scrn->driScreen->myNum, "r128");
+
+   rmesa->sarea = (drm_r128_sarea_t *)((char *)sPriv->pSAREA +
+				     r128scrn->sarea_priv_offset);
+
+   rmesa->CurrentTexObj[0] = NULL;
+   rmesa->CurrentTexObj[1] = NULL;
+
+   (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) );
+   make_empty_list( & rmesa->swapped );
+
+   rmesa->nr_heaps = r128scrn->numTexHeaps;
+   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+      rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa,
+	    r128scrn->texSize[i],
+	    12,
+	    R128_NR_TEX_REGIONS,
+	    (drmTextureRegionPtr)rmesa->sarea->tex_list[i],
+	    &rmesa->sarea->tex_age[i],
+	    &rmesa->swapped,
+	    sizeof( r128TexObj ),
+	    (destroy_texture_object_t *) r128DestroyTexObj );
+
+      driSetTextureSwapCounterLocation( rmesa->texture_heaps[i],
+					& rmesa->c_textureSwaps );
+   }
+   rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache,
+					   "texture_depth");
+   if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+      rmesa->texture_depth = ( r128scrn->cpp == 4 ) ?
+	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+
+   rmesa->RenderIndex = -1;		/* Impossible value */
+   rmesa->vert_buf = NULL;
+   rmesa->num_verts = 0;
+   RENDERINPUTS_ONES( rmesa->tnl_state_bitset );
+
+   /* Set the maximum texture size small enough that we can guarentee that
+    * all texture units can bind a maximal texture and have them both in
+    * texturable memory at once.
+    */
+
+   ctx->Const.MaxTextureUnits = 2;
+   ctx->Const.MaxTextureImageUnits = 2;
+   ctx->Const.MaxTextureCoordUnits = 2;
+
+   driCalculateMaxTextureLevels( rmesa->texture_heaps,
+				 rmesa->nr_heaps,
+				 & ctx->Const,
+				 4,
+				 10, /* max 2D texture size is 1024x1024 */
+				 0,  /* 3D textures unsupported. */
+				 0,  /* cube textures unsupported. */
+				 0,  /* texture rectangles unsupported. */
+				 11,
+				 GL_FALSE,
+				 0 );
+
+   /* No wide points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+
+   /* No wide lines.
+    */
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 1.0;
+   ctx->Const.MaxLineWidthAA = 1.0;
+   ctx->Const.LineWidthGranularity = 1.0;
+
+   ctx->Const.MaxDrawBuffers = 1;
+
+#if ENABLE_PERF_BOXES
+   rmesa->boxes = driQueryOptionb(&rmesa->optionCache, "performance_boxes");
+#endif
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+/*     _tnl_destroy_pipeline( ctx ); */
+/*     _tnl_install_pipeline( ctx, r128_pipeline ); */
+
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+   if (sPriv->drm_version.minor >= 4)
+      _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" );
+
+   r128InitTriFuncs( ctx );
+   r128DDInitStateFuncs( ctx );
+   r128DDInitSpanFuncs( ctx );
+   r128DDInitState( rmesa );
+
+   driContextPriv->driverPrivate = (void *)rmesa;
+
+#if DO_DEBUG
+   R128_DEBUG = driParseDebugString( getenv( "R128_DEBUG" ),
+				     debug_control );
+#endif
+
+   if (driQueryOptionb(&rmesa->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, R128_FALLBACK_DISABLE, 1);
+   }
+
+   return GL_TRUE;
+}
+
+/* Destroy the device specific context.
+ */
+void r128DestroyContext( __DRIcontext *driContextPriv  )
+{
+   r128ContextPtr rmesa = (r128ContextPtr) driContextPriv->driverPrivate;
+
+   assert(rmesa);  /* should never be null */
+   if ( rmesa ) {
+      GLboolean   release_texture_heaps;
+
+
+      release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1);
+
+      _swsetup_DestroyContext( rmesa->glCtx );
+      _tnl_DestroyContext( rmesa->glCtx );
+      _vbo_DestroyContext( rmesa->glCtx );
+      _swrast_DestroyContext( rmesa->glCtx );
+
+      if ( release_texture_heaps ) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         int i;
+
+         for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+	    driDestroyTextureHeap( rmesa->texture_heaps[ i ] );
+	    rmesa->texture_heaps[ i ] = NULL;
+         }
+
+	 assert( is_empty_list( & rmesa->swapped ) );
+      }
+
+      /* free the Mesa context */
+      rmesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(rmesa->glCtx);
+
+      /* free the option cache */
+      driDestroyOptionCache (&rmesa->optionCache);
+
+      FREE( rmesa );
+   }
+
+#if 0
+   /* Use this to force shared object profiling. */
+   glx_fini_prof();
+#endif
+}
+
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean
+r128MakeCurrent( __DRIcontext *driContextPriv,
+                 __DRIdrawable *driDrawPriv,
+                 __DRIdrawable *driReadPriv )
+{
+   if ( driContextPriv ) {
+      GET_CURRENT_CONTEXT(ctx);
+      r128ContextPtr oldR128Ctx = ctx ? R128_CONTEXT(ctx) : NULL;
+      r128ContextPtr newR128Ctx = (r128ContextPtr) driContextPriv->driverPrivate;
+
+      if ( newR128Ctx != oldR128Ctx ) {
+	 newR128Ctx->new_state |= R128_NEW_CONTEXT;
+	 newR128Ctx->dirty = R128_UPLOAD_ALL;
+      }
+
+      if (driDrawPriv->swap_interval == (unsigned)-1) {
+	 driDrawPriv->vblFlags = (newR128Ctx->r128Screen->irq != 0)
+	    ? driGetDefaultVBlankFlags(&newR128Ctx->optionCache)
+	    : VBLANK_FLAG_NO_IRQ;
+
+	 driDrawableInitVBlank( driDrawPriv );
+      }
+      newR128Ctx->driDrawable = driDrawPriv;
+
+      _mesa_make_current( newR128Ctx->glCtx,
+                          (GLframebuffer *) driDrawPriv->driverPrivate,
+                          (GLframebuffer *) driReadPriv->driverPrivate );
+
+      newR128Ctx->new_state |= R128_NEW_WINDOW | R128_NEW_CLIP;
+   } else {
+      _mesa_make_current( NULL, NULL, NULL );
+   }
+
+   return GL_TRUE;
+}
+
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean
+r128UnbindContext( __DRIcontext *driContextPriv )
+{
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r128/r128_context.h b/src/mesa/drivers/dri/r128/r128_context.h
new file mode 100644
index 0000000000..65ddb3bd23
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_context.h
@@ -0,0 +1,261 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __R128_CONTEXT_H__
+#define __R128_CONTEXT_H__
+
+#include "dri_util.h"
+#include "drm.h"
+#include "r128_drm.h"
+
+#include "main/mtypes.h"
+#include "tnl/t_vertex.h"
+
+#include "r128_reg.h"
+
+#include "texmem.h"
+
+struct r128_context;
+typedef struct r128_context r128ContextRec;
+typedef struct r128_context *r128ContextPtr;
+
+#include "r128_lock.h"
+#include "r128_texobj.h"
+#include "r128_screen.h"
+
+/* Flags for what context state needs to be updated:
+ */
+#define R128_NEW_ALPHA		0x0001
+#define R128_NEW_DEPTH		0x0002
+#define R128_NEW_FOG		0x0004
+#define R128_NEW_CLIP		0x0008
+#define R128_NEW_CULL		0x0010
+#define R128_NEW_MASKS		0x0020
+#define R128_NEW_RENDER_NOT	0x0040
+#define R128_NEW_WINDOW		0x0080
+#define R128_NEW_CONTEXT	0x0100
+#define R128_NEW_ALL		0x01ff
+
+/* Flags for software fallback cases:
+ */
+#define R128_FALLBACK_TEXTURE		0x0001
+#define R128_FALLBACK_DRAW_BUFFER	0x0002
+#define R128_FALLBACK_READ_BUFFER	0x0004
+#define R128_FALLBACK_STENCIL		0x0008
+#define R128_FALLBACK_RENDER_MODE	0x0010
+#define R128_FALLBACK_LOGICOP		0x0020
+#define R128_FALLBACK_SEP_SPECULAR	0x0040
+#define R128_FALLBACK_BLEND_EQ		0x0080
+#define R128_FALLBACK_BLEND_FUNC	0x0100
+#define R128_FALLBACK_PROJTEX		0x0200
+#define R128_FALLBACK_DISABLE		0x0400
+
+
+/* Use the templated vertex format:
+ */
+#define TAG(x) r128##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+/* Reasons why the GL_BLEND fallback mightn't work:
+ */
+#define R128_BLEND_ENV_COLOR	0x1
+#define R128_BLEND_MULTITEX	0x2
+
+/* Subpixel offsets for window coordinates (triangles):
+ */
+#define SUBPIXEL_X  (0.0F)
+#define SUBPIXEL_Y  (0.125F)
+
+
+typedef void (*r128_tri_func)( r128ContextPtr, 
+				 r128Vertex *,
+				 r128Vertex *,
+				 r128Vertex * );
+
+typedef void (*r128_line_func)( r128ContextPtr, 
+				  r128Vertex *,
+				  r128Vertex * );
+
+typedef void (*r128_point_func)( r128ContextPtr,
+				   r128Vertex * );
+
+
+struct r128_context {
+   GLcontext *glCtx;			/* Mesa context */
+
+   /* Driver and hardware state management
+    */
+   GLuint new_state;
+   GLuint dirty;			/* Hardware state to be updated */
+   drm_r128_context_regs_t setup;
+
+   /* Vertex state */
+   GLuint vertex_size;
+   GLuint vertex_format;
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+   char *verts;			/* points to tnl->clipspace.vertex_buf */
+   GLuint num_verts;
+   int coloroffset, specoffset;
+   DECLARE_RENDERINPUTS(tnl_state_bitset);	/* tnl->render_inputs for this _tnl_install_attrs */
+
+   GLuint NewGLState;
+   GLuint Fallback;
+   GLuint RenderIndex;
+   GLfloat hw_viewport[16];
+   GLfloat depth_scale;
+
+   uint32_t ClearColor;			/* Color used to clear color buffer */
+   uint32_t ClearDepth;			/* Value used to clear depth buffer */
+   uint32_t ClearStencil;		/* Value used to clear stencil */
+
+   /* Map GL texture units onto hardware
+    */
+   GLint multitex;
+   GLint tmu_source[2];
+   GLuint tex_combine[2];
+   GLuint blend_flags;
+   GLuint env_color;
+
+   /* Texture object bookkeeping
+    */
+   unsigned              nr_heaps;
+   driTexHeap          * texture_heaps[ R128_NR_TEX_HEAPS ];
+   driTextureObject      swapped;
+
+   r128TexObjPtr CurrentTexObj[2];
+
+   int texture_depth;
+ 
+   /* Fallback rasterization functions 
+    */
+   r128_point_func draw_point;
+   r128_line_func draw_line;
+   r128_tri_func draw_tri;
+
+   /* Vertex buffers
+    */
+   drmBufPtr vert_buf;
+
+   GLuint hw_primitive;
+   GLenum render_primitive;
+
+   /* Page flipping
+    */
+   GLuint doPageFlip;
+
+   /* Cliprect and scissor information
+    */
+   GLuint numClipRects;			/* Cliprects for the draw buffer */
+   drm_clip_rect_t *pClipRects;
+
+   GLuint scissor;
+   drm_clip_rect_t ScissorRect;	/* Current software scissor */
+
+   /* Mirrors of some DRI state
+    */
+   __DRIcontext	*driContext;	/* DRI context */
+   __DRIscreen	*driScreen;	/* DRI screen */
+   __DRIdrawable	*driDrawable;	/* DRI drawable bound to this ctx */
+
+   unsigned int lastStamp;	        /* mirror driDrawable->lastStamp */
+
+   drm_context_t hHWContext;
+   drm_hw_lock_t *driHwLock;
+   int driFd;
+
+   r128ScreenPtr r128Screen;		/* Screen private DRI data */
+   drm_r128_sarea_t *sarea;		/* Private SAREA data */
+
+   /* Performance counters
+    */
+   GLuint boxes;			/* Draw performance boxes */
+   GLuint hardwareWentIdle;
+   GLuint c_clears;
+   GLuint c_drawWaits;
+   GLuint c_textureSwaps;
+   GLuint c_textureBytes;
+   GLuint c_vertexBuffers;
+
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+};
+
+#define R128_CONTEXT(ctx)		((r128ContextPtr)(ctx->DriverCtx))
+
+#define R128_IS_PLAIN( rmesa ) \
+		(rmesa->r128Screen->chipset == R128_CARD_TYPE_R128)
+#define R128_IS_PRO( rmesa ) \
+		(rmesa->r128Screen->chipset == R128_CARD_TYPE_R128_PRO)
+#define R128_IS_MOBILITY( rmesa ) \
+		(rmesa->r128Screen->chipset == R128_CARD_TYPE_R128_MOBILITY)
+
+
+extern GLboolean r128CreateContext( gl_api api,
+				    const __GLcontextModes *glVisual,
+				    __DRIcontext *driContextPriv,
+                                    void *sharedContextPrivate );
+
+extern void r128DestroyContext( __DRIcontext * );
+
+extern GLboolean r128MakeCurrent( __DRIcontext *driContextPriv,
+                                  __DRIdrawable *driDrawPriv,
+                                  __DRIdrawable *driReadPriv );
+
+extern GLboolean r128UnbindContext( __DRIcontext *driContextPriv );
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG		1
+#define ENABLE_PERF_BOXES	0
+
+#if DO_DEBUG
+extern int R128_DEBUG;
+#else
+#define R128_DEBUG		0
+#endif
+
+#define DEBUG_ALWAYS_SYNC	0x01
+#define DEBUG_VERBOSE_API	0x02
+#define DEBUG_VERBOSE_MSG	0x04
+#define DEBUG_VERBOSE_LRU	0x08
+#define DEBUG_VERBOSE_DRI	0x10
+#define DEBUG_VERBOSE_IOCTL	0x20
+#define DEBUG_VERBOSE_2D	0x40
+#define DEBUG_VERBOSE_FALL	0x80
+
+#endif /* __R128_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r128/r128_dd.c b/src/mesa/drivers/dri/r128/r128_dd.c
new file mode 100644
index 0000000000..64dec70cdd
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_dd.c
@@ -0,0 +1,143 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#include "r128_context.h"
+#include "r128_ioctl.h"
+#include "r128_dd.h"
+
+#include "main/context.h"
+
+#include "utils.h"
+
+#define DRIVER_DATE	"20051027"
+
+
+/* Return the width and height of the current color buffer.
+ */
+static void r128GetBufferSize( GLframebuffer *buffer,
+				 GLuint *width, GLuint *height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   LOCK_HARDWARE( rmesa );
+   *width  = rmesa->driDrawable->w;
+   *height = rmesa->driDrawable->h;
+   UNLOCK_HARDWARE( rmesa );
+}
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *r128GetString( GLcontext *ctx, GLenum name )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   const char * card_name = "Rage 128";
+   GLuint agp_mode = rmesa->r128Screen->IsPCI ? 0 :
+      rmesa->r128Screen->AGPMode;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte *)"VA Linux Systems, Inc.";
+
+   case GL_RENDERER:
+      /* Select the spefic chipset.
+       */
+      if ( R128_IS_PRO( rmesa ) ) {
+	 card_name = "Rage 128 Pro";
+      }
+      else if ( R128_IS_MOBILITY( rmesa ) ) {
+	 card_name = "Rage 128 Mobility";
+      }
+
+      offset = driGetRendererString( buffer, card_name, DRIVER_DATE,
+				     agp_mode );
+
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+/* Send all commands to the hardware.  If vertex buffers or indirect
+ * buffers are in use, then we need to make sure they are sent to the
+ * hardware.  All commands that are normally sent to the ring are
+ * already considered `flushed'.
+ */
+static void r128Flush( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+
+#if ENABLE_PERF_BOXES
+   if ( rmesa->boxes ) {
+      LOCK_HARDWARE( rmesa );
+      r128PerformanceBoxesLocked( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+   }
+
+   /* Log the performance counters if necessary */
+   r128PerformanceCounters( rmesa );
+#endif
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+static void r128Finish( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performance counter */
+   rmesa->c_drawWaits++;
+#endif
+
+   r128Flush( ctx );
+   r128WaitForIdle( rmesa );
+}
+
+
+/* Initialize the driver's misc functions.
+ */
+void r128InitDriverFuncs( struct dd_function_table *functions )
+{
+   functions->GetBufferSize	= r128GetBufferSize;
+   functions->GetString		= r128GetString;
+   functions->Finish		= r128Finish;
+   functions->Flush		= r128Flush;
+}
diff --git a/src/mesa/drivers/dri/r128/r128_dd.h b/src/mesa/drivers/dri/r128/r128_dd.h
new file mode 100644
index 0000000000..ce038853c4
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_dd.h
@@ -0,0 +1,40 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#ifndef __R128_DD_H__
+#define __R128_DD_H__
+
+extern void r128InitDriverFuncs( struct dd_function_table *functions );
+
+#endif
diff --git a/src/mesa/drivers/dri/r128/r128_ioctl.c b/src/mesa/drivers/dri/r128/r128_ioctl.c
new file mode 100644
index 0000000000..56758d971c
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_ioctl.c
@@ -0,0 +1,820 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+#include <errno.h>
+
+#define STANDALONE_MMIO
+#include "r128_context.h"
+#include "r128_state.h"
+#include "r128_ioctl.h"
+#include "main/imports.h"
+#include "main/macros.h"
+
+#include "swrast/swrast.h"
+
+#include "vblank.h"
+#include "mmio.h"
+#include "drirenderbuffer.h"
+
+#define R128_TIMEOUT        2048
+#define R128_IDLE_RETRY       32
+
+
+/* =============================================================
+ * Hardware vertex buffer handling
+ */
+
+/* Get a new VB from the pool of vertex buffers in AGP space.
+ */
+drmBufPtr r128GetBufferLocked( r128ContextPtr rmesa )
+{
+   int fd = rmesa->r128Screen->driScreen->fd;
+   int index = 0;
+   int size = 0;
+   drmDMAReq dma;
+   drmBufPtr buf = NULL;
+   int to = 0;
+   int ret;
+
+   dma.context = rmesa->hHWContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = R128_BUFFER_SIZE;
+   dma.request_list = &index;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+   while ( !buf && ( to++ < R128_TIMEOUT ) ) {
+      ret = drmDMA( fd, &dma );
+
+      if ( ret == 0 ) {
+	 buf = &rmesa->r128Screen->buffers->list[index];
+	 buf->used = 0;
+#if ENABLE_PERF_BOXES
+	 /* Bump the performance counter */
+	 rmesa->c_vertexBuffers++;
+#endif
+	 return buf;
+      }
+   }
+
+   if ( !buf ) {
+      drmCommandNone( fd, DRM_R128_CCE_RESET);
+      UNLOCK_HARDWARE( rmesa );
+      fprintf( stderr, "Error: Could not get new VB... exiting\n" );
+      exit( -1 );
+   }
+
+   return buf;
+}
+
+void r128FlushVerticesLocked( r128ContextPtr rmesa )
+{
+   drm_clip_rect_t *pbox = rmesa->pClipRects;
+   int nbox = rmesa->numClipRects;
+   drmBufPtr buffer = rmesa->vert_buf;
+   int count = rmesa->num_verts;
+   int prim = rmesa->hw_primitive;
+   int fd = rmesa->driScreen->fd;
+   drm_r128_vertex_t vertex;
+   int i;
+
+   rmesa->num_verts = 0;
+   rmesa->vert_buf = NULL;
+
+   if ( !buffer )
+      return;
+
+   if ( rmesa->dirty & ~R128_UPLOAD_CLIPRECTS )
+      r128EmitHwStateLocked( rmesa );
+
+   if ( !nbox )
+      count = 0;
+
+   if ( nbox >= R128_NR_SAREA_CLIPRECTS )
+      rmesa->dirty |= R128_UPLOAD_CLIPRECTS;
+
+   if ( !count || !(rmesa->dirty & R128_UPLOAD_CLIPRECTS) )
+   {
+      if ( nbox < 3 ) {
+	 rmesa->sarea->nbox = 0;
+      } else {
+	 rmesa->sarea->nbox = nbox;
+      }
+
+      vertex.prim = prim;
+      vertex.idx = buffer->idx;
+      vertex.count = count;
+      vertex.discard = 1;
+      drmCommandWrite( fd, DRM_R128_VERTEX, &vertex, sizeof(vertex) );
+   }
+   else
+   {
+      for ( i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + R128_NR_SAREA_CLIPRECTS, nbox );
+	 drm_clip_rect_t *b = rmesa->sarea->boxes;
+	 int discard = 0;
+
+	 rmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = pbox[i];
+	 }
+
+	 /* Finished with the buffer?
+	  */
+	 if ( nr == nbox ) {
+	    discard = 1;
+	 }
+
+	 rmesa->sarea->dirty |= R128_UPLOAD_CLIPRECTS;
+
+         vertex.prim = prim;
+         vertex.idx = buffer->idx;
+         vertex.count = count;
+         vertex.discard = discard;
+         drmCommandWrite( fd, DRM_R128_VERTEX, &vertex, sizeof(vertex) );
+      }
+   }
+
+   rmesa->dirty &= ~R128_UPLOAD_CLIPRECTS;
+}
+
+
+
+
+
+/* ================================================================
+ * Texture uploads
+ */
+
+void r128FireBlitLocked( r128ContextPtr rmesa, drmBufPtr buffer,
+			 GLint offset, GLint pitch, GLint format,
+			 GLint x, GLint y, GLint width, GLint height )
+{
+   drm_r128_blit_t blit;
+   GLint ret;
+
+   blit.idx = buffer->idx;
+   blit.offset = offset;
+   blit.pitch = pitch;
+   blit.format = format;
+   blit.x = x;
+   blit.y = y;
+   blit.width = width;
+   blit.height = height;
+
+   ret = drmCommandWrite( rmesa->driFd, DRM_R128_BLIT, 
+                          &blit, sizeof(blit) );
+
+   if ( ret ) {
+      UNLOCK_HARDWARE( rmesa );
+      fprintf( stderr, "DRM_R128_BLIT: return = %d\n", ret );
+      exit( 1 );
+   }
+}
+
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+
+static void delay( void ) {
+/* Prevent an optimizing compiler from removing a spin loop */
+}
+
+#define R128_MAX_OUTSTANDING	2
+
+
+/* Throttle the frame rate -- only allow one pending swap buffers
+ * request at a time.
+ * GH: We probably don't want a timeout here, as we can wait as
+ * long as we want for a frame to complete.  If it never does, then
+ * the card has locked.
+ */
+static int r128WaitForFrameCompletion( r128ContextPtr rmesa )
+{
+   unsigned char *R128MMIO = rmesa->r128Screen->mmio.map;
+   int i;
+   int wait = 0;
+
+   while ( 1 ) {
+      uint32_t frame = read_MMIO_LE32( R128MMIO, R128_LAST_FRAME_REG );
+
+      if ( rmesa->sarea->last_frame - frame <= R128_MAX_OUTSTANDING ) {
+	 break;
+      }
+
+      /* Spin in place a bit so we aren't hammering the register */
+      wait++;
+      for ( i = 0 ; i < 1024 ; i++ ) {
+	 delay();
+      }
+   }
+
+   return wait;
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void r128CopyBuffer( __DRIdrawable *dPriv )
+{
+   r128ContextPtr rmesa;
+   GLint nbox, i, ret;
+   GLboolean missed_target;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   rmesa = (r128ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "\n********************************\n" );
+      fprintf( stderr, "\n%s( %p )\n\n",
+	       __FUNCTION__, (void *)rmesa->glCtx );
+      fflush( stderr );
+   }
+
+   FLUSH_BATCH( rmesa );
+
+   LOCK_HARDWARE( rmesa );
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
+    */
+   if ( !r128WaitForFrameCompletion( rmesa ) ) {
+      rmesa->hardwareWentIdle = 1;
+   } else {
+      rmesa->hardwareWentIdle = 0;
+   }
+
+   UNLOCK_HARDWARE( rmesa );
+   driWaitForVBlank( dPriv, &missed_target );
+   LOCK_HARDWARE( rmesa );
+
+   nbox = dPriv->numClipRects;	/* must be in locked region */
+
+   for ( i = 0 ; i < nbox ; ) {
+      GLint nr = MIN2( i + R128_NR_SAREA_CLIPRECTS , nbox );
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t *b = rmesa->sarea->boxes;
+      GLint n = 0;
+
+      for ( ; i < nr ; i++ ) {
+	 *b++ = box[i];
+	 n++;
+      }
+      rmesa->sarea->nbox = n;
+
+      ret = drmCommandNone( rmesa->driFd, DRM_R128_SWAP );
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 fprintf( stderr, "DRM_R128_SWAP: return = %d\n", ret );
+	 exit( 1 );
+      }
+   }
+
+   if ( R128_DEBUG & DEBUG_ALWAYS_SYNC ) {
+      i = 0;
+      do {
+         ret = drmCommandNone(rmesa->driFd, DRM_R128_CCE_IDLE);
+      } while ( ret && errno == EBUSY && i++ < R128_IDLE_RETRY );
+   }
+
+   UNLOCK_HARDWARE( rmesa );
+
+   rmesa->new_state |= R128_NEW_CONTEXT;
+   rmesa->dirty |= (R128_UPLOAD_CONTEXT |
+		    R128_UPLOAD_MASKS |
+		    R128_UPLOAD_CLIPRECTS);
+
+#if ENABLE_PERF_BOXES
+   /* Log the performance counters if necessary */
+   r128PerformanceCounters( rmesa );
+#endif
+}
+
+void r128PageFlip( __DRIdrawable *dPriv )
+{
+   r128ContextPtr rmesa;
+   GLint ret;
+   GLboolean missed_target;
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   rmesa = (r128ContextPtr) dPriv->driContextPriv->driverPrivate;
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "\n%s( %p ): page=%d\n\n",
+	       __FUNCTION__, (void *)rmesa->glCtx, rmesa->sarea->pfCurrentPage );
+   }
+
+   FLUSH_BATCH( rmesa );
+
+   LOCK_HARDWARE( rmesa );
+
+   /* Throttle the frame rate -- only allow one pending swap buffers
+    * request at a time.
+    */
+   if ( !r128WaitForFrameCompletion( rmesa ) ) {
+      rmesa->hardwareWentIdle = 1;
+   } else {
+      rmesa->hardwareWentIdle = 0;
+   }
+
+   UNLOCK_HARDWARE( rmesa );
+   driWaitForVBlank( dPriv, &missed_target );
+   LOCK_HARDWARE( rmesa );
+
+   /* The kernel will have been initialized to perform page flipping
+    * on a swapbuffers ioctl.
+    */
+   ret = drmCommandNone( rmesa->driFd, DRM_R128_FLIP );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   if ( ret ) {
+      fprintf( stderr, "DRM_R128_FLIP: return = %d\n", ret );
+      exit( 1 );
+   }
+
+   /* Get ready for drawing next frame.  Update the renderbuffers'
+    * flippedOffset/Pitch fields so we draw into the right place.
+    */
+   driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+                        rmesa->sarea->pfCurrentPage);
+
+   rmesa->new_state |= R128_NEW_WINDOW;
+
+   /* FIXME: Do we need this anymore? */
+   rmesa->new_state |= R128_NEW_CONTEXT;
+   rmesa->dirty |= (R128_UPLOAD_CONTEXT |
+		    R128_UPLOAD_MASKS |
+		    R128_UPLOAD_CLIPRECTS);
+
+#if ENABLE_PERF_BOXES
+   /* Log the performance counters if necessary */
+   r128PerformanceCounters( rmesa );
+#endif
+}
+
+
+/* ================================================================
+ * Buffer clear
+ */
+
+static void r128Clear( GLcontext *ctx, GLbitfield mask )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   __DRIdrawable *dPriv = rmesa->driDrawable;
+   drm_r128_clear_t clear;
+   GLuint flags = 0;
+   GLint i;
+   GLint ret;
+   GLuint depthmask = 0;
+   GLint cx, cy, cw, ch;
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+   }
+
+   FLUSH_BATCH( rmesa );
+
+   /* The only state change we care about here is the RGBA colormask
+    * We'll just update that state, if needed.  If we do more then
+    * there's some strange side-effects that the conformance tests find.
+    */
+   if ( rmesa->new_state & R128_NEW_MASKS) {
+      const GLuint save_state = rmesa->new_state;
+      rmesa->new_state = R128_NEW_MASKS;
+      r128DDUpdateHWState( ctx );
+      rmesa->new_state = save_state & ~R128_NEW_MASKS;
+   }
+
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+      flags |= R128_FRONT;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+      flags |= R128_BACK;
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if ( ( mask & BUFFER_BIT_DEPTH ) && ctx->Depth.Mask ) {
+      flags |= R128_DEPTH;
+      /* if we're at 16 bits, extra plane mask won't hurt */
+      depthmask |= 0x00ffffff;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   if ( mask & BUFFER_BIT_STENCIL &&
+	(ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24) ) {
+      flags |= R128_DEPTH;
+      depthmask |= ctx->Stencil.WriteMask[0] << 24;
+      mask &= ~BUFFER_BIT_STENCIL;
+   }
+
+   if ( flags ) {
+
+      LOCK_HARDWARE( rmesa );
+
+      /* compute region after locking: */
+      cx = ctx->DrawBuffer->_Xmin;
+      cy = ctx->DrawBuffer->_Ymin;
+      cw = ctx->DrawBuffer->_Xmax - cx;
+      ch = ctx->DrawBuffer->_Ymax - cy;
+
+      /* Flip top to bottom */
+      cx += dPriv->x;
+      cy  = dPriv->y + dPriv->h - cy - ch;
+
+      /* FIXME: Do we actually need this?
+       */
+      if ( rmesa->dirty & ~R128_UPLOAD_CLIPRECTS ) {
+	 r128EmitHwStateLocked( rmesa );
+      }
+
+      for ( i = 0 ; i < rmesa->numClipRects ; ) {
+	 GLint nr = MIN2( i + R128_NR_SAREA_CLIPRECTS , rmesa->numClipRects );
+	 drm_clip_rect_t *box = rmesa->pClipRects;
+	 drm_clip_rect_t *b = rmesa->sarea->boxes;
+	 GLint n = 0;
+
+	 if (cw != dPriv->w || ch != dPriv->h) {
+            /* clear subregion */
+	    for ( ; i < nr ; i++ ) {
+	       GLint x = box[i].x1;
+	       GLint y = box[i].y1;
+	       GLint w = box[i].x2 - x;
+	       GLint h = box[i].y2 - y;
+
+	       if ( x < cx ) w -= cx - x, x = cx;
+	       if ( y < cy ) h -= cy - y, y = cy;
+	       if ( x + w > cx + cw ) w = cx + cw - x;
+	       if ( y + h > cy + ch ) h = cy + ch - y;
+	       if ( w <= 0 ) continue;
+	       if ( h <= 0 ) continue;
+
+	       b->x1 = x;
+	       b->y1 = y;
+	       b->x2 = x + w;
+	       b->y2 = y + h;
+	       b++;
+	       n++;
+	    }
+	 } else {
+            /* clear whole window */
+	    for ( ; i < nr ; i++ ) {
+	       *b++ = box[i];
+	       n++;
+	    }
+	 }
+
+	 rmesa->sarea->nbox = n;
+
+	 if ( R128_DEBUG & DEBUG_VERBOSE_IOCTL ) {
+	    fprintf( stderr,
+		     "DRM_R128_CLEAR: flag 0x%x color %x depth %x nbox %d\n",
+		     flags,
+		     (GLuint)rmesa->ClearColor,
+		     (GLuint)rmesa->ClearDepth,
+		     rmesa->sarea->nbox );
+	 }
+
+         clear.flags = flags;
+         clear.clear_color = rmesa->ClearColor;
+         clear.clear_depth = rmesa->ClearDepth;
+         clear.color_mask = rmesa->setup.plane_3d_mask_c;
+         clear.depth_mask = depthmask;
+
+         ret = drmCommandWrite( rmesa->driFd, DRM_R128_CLEAR,
+                                &clear, sizeof(clear) );
+
+	 if ( ret ) {
+	    UNLOCK_HARDWARE( rmesa );
+	    fprintf( stderr, "DRM_R128_CLEAR: return = %d\n", ret );
+	    exit( 1 );
+	 }
+      }
+
+      UNLOCK_HARDWARE( rmesa );
+
+      rmesa->dirty |= R128_UPLOAD_CLIPRECTS;
+   }
+
+   if ( mask )
+      _swrast_Clear( ctx, mask );
+}
+
+
+/* ================================================================
+ * Depth spans, pixels
+ */
+
+void r128WriteDepthSpanLocked( r128ContextPtr rmesa,
+			       GLuint n, GLint x, GLint y,
+			       const GLuint depth[],
+			       const GLubyte mask[] )
+{
+   drm_clip_rect_t *pbox = rmesa->pClipRects;
+   drm_r128_depth_t d;
+   int nbox = rmesa->numClipRects;
+   int fd = rmesa->driScreen->fd;
+   int i;
+
+   if ( !nbox || !n ) {
+      return;
+   }
+   if ( nbox >= R128_NR_SAREA_CLIPRECTS ) {
+      rmesa->dirty |= R128_UPLOAD_CLIPRECTS;
+   }
+
+   if ( !(rmesa->dirty & R128_UPLOAD_CLIPRECTS) )
+   {
+      if ( nbox < 3 ) {
+	 rmesa->sarea->nbox = 0;
+      } else {
+	 rmesa->sarea->nbox = nbox;
+      }
+
+      d.func = R128_WRITE_SPAN;
+      d.n = n;
+      d.x = (int*)&x;
+      d.y = (int*)&y;
+      d.buffer = (unsigned int *)depth;
+      d.mask = (unsigned char *)mask;
+
+      drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+
+   }
+   else
+   {
+      for (i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + R128_NR_SAREA_CLIPRECTS, nbox );
+	 drm_clip_rect_t *b = rmesa->sarea->boxes;
+
+	 rmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++) {
+	    *b++ = pbox[i];
+	 }
+
+	 rmesa->sarea->dirty |= R128_UPLOAD_CLIPRECTS;
+
+         d.func = R128_WRITE_SPAN;
+         d.n = n;
+         d.x = (int*)&x;
+         d.y = (int*)&y;
+         d.buffer = (unsigned int *)depth;
+         d.mask = (unsigned char *)mask;
+
+         drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+      }
+   }
+
+   rmesa->dirty &= ~R128_UPLOAD_CLIPRECTS;
+}
+
+void r128WriteDepthPixelsLocked( r128ContextPtr rmesa, GLuint n,
+				 const GLint x[], const GLint y[],
+				 const GLuint depth[],
+				 const GLubyte mask[] )
+{
+   drm_clip_rect_t *pbox = rmesa->pClipRects;
+   drm_r128_depth_t d;
+   int nbox = rmesa->numClipRects;
+   int fd = rmesa->driScreen->fd;
+   int i;
+
+   if ( !nbox || !n ) {
+      return;
+   }
+   if ( nbox >= R128_NR_SAREA_CLIPRECTS ) {
+      rmesa->dirty |= R128_UPLOAD_CLIPRECTS;
+   }
+
+   if ( !(rmesa->dirty & R128_UPLOAD_CLIPRECTS) )
+   {
+      if ( nbox < 3 ) {
+	 rmesa->sarea->nbox = 0;
+      } else {
+	 rmesa->sarea->nbox = nbox;
+      }
+
+      d.func = R128_WRITE_PIXELS;
+      d.n = n;
+      d.x = (int*)&x;
+      d.y = (int*)&y;
+      d.buffer = (unsigned int *)depth;
+      d.mask = (unsigned char *)mask;
+
+      drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+   }
+   else
+   {
+      for (i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + R128_NR_SAREA_CLIPRECTS, nbox );
+	 drm_clip_rect_t *b = rmesa->sarea->boxes;
+
+	 rmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++) {
+	    *b++ = pbox[i];
+	 }
+
+	 rmesa->sarea->dirty |= R128_UPLOAD_CLIPRECTS;
+
+         d.func = R128_WRITE_PIXELS;
+         d.n = n;
+         d.x = (int*)&x;
+         d.y = (int*)&y;
+         d.buffer = (unsigned int *)depth;
+         d.mask = (unsigned char *)mask;
+
+         drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+      }
+   }
+
+   rmesa->dirty &= ~R128_UPLOAD_CLIPRECTS;
+}
+
+void r128ReadDepthSpanLocked( r128ContextPtr rmesa,
+			      GLuint n, GLint x, GLint y )
+{
+   drm_clip_rect_t *pbox = rmesa->pClipRects;
+   drm_r128_depth_t d;
+   int nbox = rmesa->numClipRects;
+   int fd = rmesa->driScreen->fd;
+   int i;
+
+   if ( !nbox || !n ) {
+      return;
+   }
+   if ( nbox >= R128_NR_SAREA_CLIPRECTS ) {
+      rmesa->dirty |= R128_UPLOAD_CLIPRECTS;
+   }
+
+   if ( !(rmesa->dirty & R128_UPLOAD_CLIPRECTS) )
+   {
+      if ( nbox < 3 ) {
+	 rmesa->sarea->nbox = 0;
+      } else {
+	 rmesa->sarea->nbox = nbox;
+      }
+
+      d.func = R128_READ_SPAN;
+      d.n = n;
+      d.x = (int*)&x;
+      d.y = (int*)&y;
+      d.buffer = NULL;
+      d.mask = NULL;
+
+      drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+   }
+   else
+   {
+      for (i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + R128_NR_SAREA_CLIPRECTS, nbox );
+	 drm_clip_rect_t *b = rmesa->sarea->boxes;
+
+	 rmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++) {
+	    *b++ = pbox[i];
+	 }
+
+	 rmesa->sarea->dirty |= R128_UPLOAD_CLIPRECTS;
+
+         d.func = R128_READ_SPAN;
+         d.n = n;
+         d.x = (int*)&x;
+         d.y = (int*)&y;
+         d.buffer = NULL;
+         d.mask = NULL;
+
+         drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+      }
+   }
+
+   rmesa->dirty &= ~R128_UPLOAD_CLIPRECTS;
+}
+
+void r128ReadDepthPixelsLocked( r128ContextPtr rmesa, GLuint n,
+				const GLint x[], const GLint y[] )
+{
+   drm_clip_rect_t *pbox = rmesa->pClipRects;
+   drm_r128_depth_t d;
+   int nbox = rmesa->numClipRects;
+   int fd = rmesa->driScreen->fd;
+   int i;
+
+   if ( !nbox || !n ) {
+      return;
+   }
+   if ( nbox >= R128_NR_SAREA_CLIPRECTS ) {
+      rmesa->dirty |= R128_UPLOAD_CLIPRECTS;
+   }
+
+   if ( !(rmesa->dirty & R128_UPLOAD_CLIPRECTS) )
+   {
+      if ( nbox < 3 ) {
+	 rmesa->sarea->nbox = 0;
+      } else {
+	 rmesa->sarea->nbox = nbox;
+      }
+
+      d.func = R128_READ_PIXELS;
+      d.n = n;
+      d.x = (int*)&x;
+      d.y = (int*)&y;
+      d.buffer = NULL;
+      d.mask = NULL;
+
+      drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+   }
+   else
+   {
+      for (i = 0 ; i < nbox ; ) {
+	 int nr = MIN2( i + R128_NR_SAREA_CLIPRECTS, nbox );
+	 drm_clip_rect_t *b = rmesa->sarea->boxes;
+
+	 rmesa->sarea->nbox = nr - i;
+	 for ( ; i < nr ; i++) {
+	    *b++ = pbox[i];
+	 }
+
+	 rmesa->sarea->dirty |= R128_UPLOAD_CLIPRECTS;
+
+         d.func = R128_READ_PIXELS;
+         d.n = n;
+         d.x = (int*)&x;
+         d.y = (int*)&y;
+         d.buffer = NULL;
+         d.mask = NULL;
+
+         drmCommandWrite( fd, DRM_R128_DEPTH, &d, sizeof(d));
+      }
+   }
+
+   rmesa->dirty &= ~R128_UPLOAD_CLIPRECTS;
+}
+
+
+void r128WaitForIdleLocked( r128ContextPtr rmesa )
+{
+    int fd = rmesa->r128Screen->driScreen->fd;
+    int to = 0;
+    int ret, i;
+
+    do {
+        i = 0;
+        do {
+            ret = drmCommandNone( fd, DRM_R128_CCE_IDLE);
+        } while ( ret && errno == EBUSY && i++ < R128_IDLE_RETRY );
+    } while ( ( ret == -EBUSY ) && ( to++ < R128_TIMEOUT ) );
+
+    if ( ret < 0 ) {
+        drmCommandNone( fd, DRM_R128_CCE_RESET);
+	UNLOCK_HARDWARE( rmesa );
+	fprintf( stderr, "Error: Rage 128 timed out... exiting\n" );
+	exit( -1 );
+    }
+}
+
+void r128InitIoctlFuncs( struct dd_function_table *functions )
+{
+    functions->Clear = r128Clear;
+}
diff --git a/src/mesa/drivers/dri/r128/r128_ioctl.h b/src/mesa/drivers/dri/r128/r128_ioctl.h
new file mode 100644
index 0000000000..84ace900ee
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_ioctl.h
@@ -0,0 +1,143 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __R128_IOCTL_H__
+#define __R128_IOCTL_H__
+
+#include "r128_dri.h"
+#include "r128_reg.h"
+#include "r128_lock.h"
+
+#define R128_BUFFER_MAX_DWORDS	(R128_BUFFER_SIZE / sizeof(uint32_t))
+
+
+extern drmBufPtr r128GetBufferLocked( r128ContextPtr rmesa );
+extern void r128FlushVerticesLocked( r128ContextPtr rmesa );
+
+static INLINE void *r128AllocDmaLow( r128ContextPtr rmesa, int count,
+				       int vert_size )
+{
+   uint32_t *head;
+   int bytes = count * vert_size;
+
+   if ( !rmesa->vert_buf ) {
+      LOCK_HARDWARE( rmesa );
+      rmesa->vert_buf = r128GetBufferLocked( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+   } else if ( rmesa->vert_buf->used + bytes > rmesa->vert_buf->total ) {
+      LOCK_HARDWARE( rmesa );
+      r128FlushVerticesLocked( rmesa );
+      rmesa->vert_buf = r128GetBufferLocked( rmesa );
+      UNLOCK_HARDWARE( rmesa );
+   }
+
+   head = (uint32_t *)((char *)rmesa->vert_buf->address + rmesa->vert_buf->used);
+   rmesa->vert_buf->used += bytes;
+   rmesa->num_verts += count;
+   
+   return head;
+}
+
+extern void r128FireBlitLocked( r128ContextPtr rmesa, drmBufPtr buffer,
+				GLint offset, GLint pitch, GLint format,
+				GLint x, GLint y, GLint width, GLint height );
+
+extern void r128WriteDepthSpanLocked( r128ContextPtr rmesa,
+				      GLuint n, GLint x, GLint y,
+				      const GLuint depth[],
+				      const GLubyte mask[] );
+extern void r128WriteDepthPixelsLocked( r128ContextPtr rmesa, GLuint n,
+					const GLint x[], const GLint y[],
+					const GLuint depth[],
+					const GLubyte mask[] );
+extern void r128ReadDepthSpanLocked( r128ContextPtr rmesa,
+				     GLuint n, GLint x, GLint y );
+extern void r128ReadDepthPixelsLocked( r128ContextPtr rmesa, GLuint n,
+				       const GLint x[], const GLint y[] );
+
+extern void r128CopyBuffer( __DRIdrawable *dPriv );
+extern void r128PageFlip( __DRIdrawable *dPriv );
+void r128WaitForVBlank( r128ContextPtr rmesa );
+
+extern void r128WaitForIdleLocked( r128ContextPtr rmesa );
+
+
+extern void r128InitIoctlFuncs( struct dd_function_table *functions );
+
+
+/* ================================================================
+ * Helper macros:
+ */
+
+#define FLUSH_BATCH( rmesa )						\
+do {									\
+   if ( R128_DEBUG & DEBUG_VERBOSE_IOCTL )				\
+      fprintf( stderr, "FLUSH_BATCH in %s\n", __FUNCTION__ );		\
+   if ( rmesa->vert_buf ) {						\
+      r128FlushVertices( rmesa );					\
+   }									\
+} while (0)
+
+/* 64-bit align the next element address, and then make room for the
+ * next indexed prim packet header.
+ */
+#define ALIGN_NEXT_ELT( rmesa )						\
+do {									\
+   rmesa->next_elt = (GLushort *)					\
+      (((GLuint)rmesa->next_elt + 7) & ~0x7);				\
+   rmesa->next_elt = (GLushort *)					\
+      ((GLubyte *)rmesa->next_elt + R128_INDEX_PRIM_OFFSET);		\
+} while (0)
+
+#define r128FlushVertices( rmesa )					\
+do {									\
+   LOCK_HARDWARE( rmesa );						\
+   r128FlushVerticesLocked( rmesa );					\
+   UNLOCK_HARDWARE( rmesa );						\
+} while (0)
+
+#define r128FlushElts( rmesa )						\
+do {									\
+   LOCK_HARDWARE( rmesa );						\
+   r128FlushEltsLocked( rmesa );					\
+   UNLOCK_HARDWARE( rmesa );						\
+} while (0)
+
+#define r128WaitForIdle( rmesa )					\
+   do {									\
+      LOCK_HARDWARE( rmesa );						\
+      r128WaitForIdleLocked( rmesa );					\
+      UNLOCK_HARDWARE( rmesa );						\
+   } while (0)
+
+#endif /* __R128_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/r128/r128_lock.c b/src/mesa/drivers/dri/r128/r128_lock.c
new file mode 100644
index 0000000000..c1fa068d1f
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_lock.c
@@ -0,0 +1,107 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#include "r128_context.h"
+#include "r128_lock.h"
+
+#include "drirenderbuffer.h"
+
+
+#if DEBUG_LOCKING
+char *prevLockFile = NULL;
+int prevLockLine = 0;
+#endif
+
+
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+static void
+r128UpdatePageFlipping( r128ContextPtr rmesa )
+{
+   rmesa->doPageFlip = rmesa->sarea->pfAllowPageFlip;
+   if (rmesa->glCtx->WinSysDrawBuffer) {
+      driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer,
+                           rmesa->sarea->pfCurrentPage);
+   }
+   rmesa->new_state |= R128_NEW_WINDOW;
+}
+
+/* Update the hardware state.  This is called if another main/context.has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void r128GetLock( r128ContextPtr rmesa, GLuint flags )
+{
+   __DRIdrawable *dPriv = rmesa->driDrawable;
+   __DRIscreen *sPriv = rmesa->driScreen;
+   drm_r128_sarea_t *sarea = rmesa->sarea;
+   int i;
+
+   drmGetLock( rmesa->driFd, rmesa->hHWContext, flags );
+
+   /* The window might have moved, so we might need to get new clip
+    * rects.
+    *
+    * NOTE: This releases and regrabs the hw lock to allow the X server
+    * to respond to the DRI protocol request for new drawable info.
+    * Since the hardware state depends on having the latest drawable
+    * clip rects, all state checking must be done _after_ this call.
+    */
+   DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv );
+
+   if ( rmesa->lastStamp != dPriv->lastStamp ) {
+      r128UpdatePageFlipping( rmesa );
+      driUpdateFramebufferSize(rmesa->glCtx, dPriv);
+      rmesa->lastStamp = dPriv->lastStamp;
+      rmesa->new_state |= R128_NEW_CLIP;
+      RENDERINPUTS_ONES( rmesa->tnl_state_bitset );
+   }
+
+   rmesa->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_CLIPRECTS;
+
+   rmesa->numClipRects = dPriv->numClipRects;
+   rmesa->pClipRects = dPriv->pClipRects;
+
+   if ( sarea->ctx_owner != rmesa->hHWContext ) {
+      sarea->ctx_owner = rmesa->hHWContext;
+      rmesa->dirty = R128_UPLOAD_ALL;
+   }
+
+   for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
+      DRI_AGE_TEXTURES( rmesa->texture_heaps[i] );
+   }
+}
diff --git a/src/mesa/drivers/dri/r128/r128_lock.h b/src/mesa/drivers/dri/r128/r128_lock.h
new file mode 100644
index 0000000000..1fc8cbe29f
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_lock.h
@@ -0,0 +1,107 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __R128_LOCK_H__
+#define __R128_LOCK_H__
+
+extern void r128GetLock( r128ContextPtr rmesa, GLuint flags );
+
+/* Turn DEBUG_LOCKING on to find locking conflicts.
+ */
+#define DEBUG_LOCKING	0
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int prevLockLine;
+
+#define DEBUG_LOCK()							\
+   do {									\
+      prevLockFile = (__FILE__);					\
+      prevLockLine = (__LINE__);					\
+   } while (0)
+
+#define DEBUG_RESET()							\
+   do {									\
+      prevLockFile = 0;							\
+      prevLockLine = 0;							\
+   } while (0)
+
+#define DEBUG_CHECK_LOCK()						\
+   do {									\
+      if ( prevLockFile ) {						\
+	 fprintf( stderr,						\
+		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+	 exit( 1 );							\
+      }									\
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif
+
+/*
+ * !!! We may want to separate locks from locks with validation.  This
+ * could be used to improve performance for those things commands that
+ * do not do any drawing !!!
+ */
+
+/* Lock the hardware and validate our state.
+ */
+#define LOCK_HARDWARE( rmesa )						\
+   do {									\
+      char __ret = 0;							\
+      DEBUG_CHECK_LOCK();						\
+      DRM_CAS( rmesa->driHwLock, rmesa->hHWContext,			\
+	       (DRM_LOCK_HELD | rmesa->hHWContext), __ret );		\
+      if ( __ret )							\
+	 r128GetLock( rmesa, 0 );					\
+      DEBUG_LOCK();							\
+   } while (0)
+
+/* Unlock the hardware.
+ */
+#define UNLOCK_HARDWARE( rmesa )					\
+   do {									\
+      DRM_UNLOCK( rmesa->driFd,						\
+		  rmesa->driHwLock,					\
+		  rmesa->hHWContext );					\
+      DEBUG_RESET();							\
+   } while (0)
+
+#endif /* __R128_LOCK_H__ */
diff --git a/src/mesa/drivers/dri/r128/r128_screen.c b/src/mesa/drivers/dri/r128/r128_screen.c
new file mode 100644
index 0000000000..2d91802823
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_screen.c
@@ -0,0 +1,525 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#include "r128_dri.h"
+
+#include "r128_context.h"
+#include "r128_ioctl.h"
+#include "r128_span.h"
+
+#include "main/context.h"
+#include "main/imports.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+#include "utils.h"
+#include "vblank.h"
+
+#include "GL/internal/dri_interface.h"
+
+/* R128 configuration
+ */
+#include "xmlpool.h"
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+#if ENABLE_PERF_BOXES
+        DRI_CONF_PERFORMANCE_BOXES(false)
+#endif
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+#if ENABLE_PERF_BOXES
+static const GLuint __driNConfigOptions = 4;
+#else
+static const GLuint __driNConfigOptions = 3;
+#endif
+
+#if 1
+/* Including xf86PciInfo.h introduces a bunch of errors...
+ */
+#define PCI_CHIP_RAGE128LE	0x4C45
+#define PCI_CHIP_RAGE128LF	0x4C46
+#define PCI_CHIP_RAGE128PF	0x5046
+#define PCI_CHIP_RAGE128PR	0x5052
+#define PCI_CHIP_RAGE128RE	0x5245
+#define PCI_CHIP_RAGE128RF	0x5246
+#define PCI_CHIP_RAGE128RK	0x524B
+#define PCI_CHIP_RAGE128RL	0x524C
+#endif
+
+
+/* Create the device specific screen private data struct.
+ */
+static r128ScreenPtr
+r128CreateScreen( __DRIscreen *sPriv )
+{
+   r128ScreenPtr r128Screen;
+   R128DRIPtr r128DRIPriv = (R128DRIPtr)sPriv->pDevPriv;
+   int i;
+
+   if (sPriv->devPrivSize != sizeof(R128DRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(R128DRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   r128Screen = (r128ScreenPtr) CALLOC( sizeof(*r128Screen) );
+   if ( !r128Screen ) return NULL;
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&r128Screen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   /* This is first since which regions we map depends on whether or
+    * not we are using a PCI card.
+    */
+   r128Screen->IsPCI = r128DRIPriv->IsPCI;
+   r128Screen->sarea_priv_offset = r128DRIPriv->sarea_priv_offset;
+   
+   if (sPriv->drm_version.minor >= 3) {
+      drm_r128_getparam_t gp;
+      int ret;
+
+      gp.param = R128_PARAM_IRQ_NR;
+      gp.value = &r128Screen->irq;
+
+      ret = drmCommandWriteRead( sPriv->fd, DRM_R128_GETPARAM,
+				    &gp, sizeof(gp));
+      if (ret) {
+         fprintf(stderr, "drmR128GetParam (R128_PARAM_IRQ_NR): %d\n", ret);
+         FREE( r128Screen );
+         return NULL;
+      }
+   }
+
+   r128Screen->mmio.handle = r128DRIPriv->registerHandle;
+   r128Screen->mmio.size   = r128DRIPriv->registerSize;
+   if ( drmMap( sPriv->fd,
+		r128Screen->mmio.handle,
+		r128Screen->mmio.size,
+		(drmAddressPtr)&r128Screen->mmio.map ) ) {
+      FREE( r128Screen );
+      return NULL;
+   }
+
+   r128Screen->buffers = drmMapBufs( sPriv->fd );
+   if ( !r128Screen->buffers ) {
+      drmUnmap( (drmAddress)r128Screen->mmio.map, r128Screen->mmio.size );
+      FREE( r128Screen );
+      return NULL;
+   }
+
+   if ( !r128Screen->IsPCI ) {
+      r128Screen->agpTextures.handle = r128DRIPriv->agpTexHandle;
+      r128Screen->agpTextures.size   = r128DRIPriv->agpTexMapSize;
+      if ( drmMap( sPriv->fd,
+		   r128Screen->agpTextures.handle,
+		   r128Screen->agpTextures.size,
+		   (drmAddressPtr)&r128Screen->agpTextures.map ) ) {
+	 drmUnmapBufs( r128Screen->buffers );
+	 drmUnmap( (drmAddress)r128Screen->mmio.map, r128Screen->mmio.size );
+	 FREE( r128Screen );
+	 return NULL;
+      }
+   }
+
+   switch ( r128DRIPriv->deviceID ) {
+   case PCI_CHIP_RAGE128RE:
+   case PCI_CHIP_RAGE128RF:
+   case PCI_CHIP_RAGE128RK:
+   case PCI_CHIP_RAGE128RL:
+      r128Screen->chipset = R128_CARD_TYPE_R128;
+      break;
+   case PCI_CHIP_RAGE128PF:
+      r128Screen->chipset = R128_CARD_TYPE_R128_PRO;
+      break;
+   case PCI_CHIP_RAGE128LE:
+   case PCI_CHIP_RAGE128LF:
+      r128Screen->chipset = R128_CARD_TYPE_R128_MOBILITY;
+      break;
+   default:
+      r128Screen->chipset = R128_CARD_TYPE_R128;
+      break;
+   }
+
+   r128Screen->cpp = r128DRIPriv->bpp / 8;
+   r128Screen->AGPMode = r128DRIPriv->AGPMode;
+
+   r128Screen->frontOffset	= r128DRIPriv->frontOffset;
+   r128Screen->frontPitch	= r128DRIPriv->frontPitch;
+   r128Screen->backOffset	= r128DRIPriv->backOffset;
+   r128Screen->backPitch	= r128DRIPriv->backPitch;
+   r128Screen->depthOffset	= r128DRIPriv->depthOffset;
+   r128Screen->depthPitch	= r128DRIPriv->depthPitch;
+   r128Screen->spanOffset	= r128DRIPriv->spanOffset;
+
+   if ( r128DRIPriv->textureSize == 0 ) {
+      r128Screen->texOffset[R128_LOCAL_TEX_HEAP] =
+	 r128DRIPriv->agpTexOffset + R128_AGP_TEX_OFFSET;
+      r128Screen->texSize[R128_LOCAL_TEX_HEAP] = r128DRIPriv->agpTexMapSize;
+      r128Screen->logTexGranularity[R128_LOCAL_TEX_HEAP] =
+	 r128DRIPriv->log2AGPTexGran;
+   } else {
+      r128Screen->texOffset[R128_LOCAL_TEX_HEAP] = r128DRIPriv->textureOffset;
+      r128Screen->texSize[R128_LOCAL_TEX_HEAP] = r128DRIPriv->textureSize;
+      r128Screen->logTexGranularity[R128_LOCAL_TEX_HEAP] = r128DRIPriv->log2TexGran;
+   }
+
+   if ( !r128Screen->agpTextures.map || r128DRIPriv->textureSize == 0 ) {
+      r128Screen->numTexHeaps = R128_NR_TEX_HEAPS - 1;
+      r128Screen->texOffset[R128_AGP_TEX_HEAP] = 0;
+      r128Screen->texSize[R128_AGP_TEX_HEAP] = 0;
+      r128Screen->logTexGranularity[R128_AGP_TEX_HEAP] = 0;
+   } else {
+      r128Screen->numTexHeaps = R128_NR_TEX_HEAPS;
+      r128Screen->texOffset[R128_AGP_TEX_HEAP] =
+	 r128DRIPriv->agpTexOffset + R128_AGP_TEX_OFFSET;
+      r128Screen->texSize[R128_AGP_TEX_HEAP] = r128DRIPriv->agpTexMapSize;
+      r128Screen->logTexGranularity[R128_AGP_TEX_HEAP] =
+	 r128DRIPriv->log2AGPTexGran;
+   }
+
+   r128Screen->driScreen = sPriv;
+
+   i = 0;
+   r128Screen->extensions[i++] = &driFrameTrackingExtension.base;
+   if ( r128Screen->irq != 0 ) {
+       r128Screen->extensions[i++] = &driSwapControlExtension.base;
+       r128Screen->extensions[i++] = &driMediaStreamCounterExtension.base;
+   }
+   r128Screen->extensions[i++] = NULL;
+   sPriv->extensions = r128Screen->extensions;
+
+   return r128Screen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+static void
+r128DestroyScreen( __DRIscreen *sPriv )
+{
+   r128ScreenPtr r128Screen = (r128ScreenPtr)sPriv->private;
+
+   if ( !r128Screen )
+      return;
+
+   if ( !r128Screen->IsPCI ) {
+      drmUnmap( (drmAddress)r128Screen->agpTextures.map,
+		r128Screen->agpTextures.size );
+   }
+   drmUnmapBufs( r128Screen->buffers );
+   drmUnmap( (drmAddress)r128Screen->mmio.map, r128Screen->mmio.size );
+
+   /* free all option information */
+   driDestroyOptionInfo (&r128Screen->optionCache);
+
+   FREE( r128Screen );
+   sPriv->private = NULL;
+}
+
+
+/* Create and initialize the Mesa and driver specific pixmap buffer
+ * data.
+ */
+static GLboolean
+r128CreateBuffer( __DRIscreen *driScrnPriv,
+                  __DRIdrawable *driDrawPriv,
+                  const __GLcontextModes *mesaVis,
+                  GLboolean isPixmap )
+{
+   r128ScreenPtr screen = (r128ScreenPtr) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      const GLboolean swDepth = GL_FALSE;
+      const GLboolean swAlpha = GL_FALSE;
+      const GLboolean swAccum = mesaVis->accumRedBits > 0;
+      const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+         mesaVis->depthBits != 24;
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->frontOffset, screen->frontPitch,
+                                 driDrawPriv);
+         r128SetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->backOffset, screen->backPitch,
+                                 driDrawPriv);
+         r128SetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z16,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         r128SetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_S8_Z24,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         r128SetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      if (mesaVis->stencilBits > 0 && !swStencil) {
+         driRenderbuffer *stencilRb
+            = driNewRenderbuffer(MESA_FORMAT_S8,
+                                 NULL,
+                                 screen->cpp,
+                                 screen->depthOffset, screen->depthPitch,
+                                 driDrawPriv);
+         r128SetSpanFunctions(stencilRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   swDepth,
+                                   swStencil,
+                                   swAccum,
+                                   swAlpha,
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+r128DestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+
+/* Copy the back color buffer to the front color buffer */
+static void
+r128SwapBuffers(__DRIdrawable *dPriv)
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+      r128ContextPtr rmesa;
+      GLcontext *ctx;
+      rmesa = (r128ContextPtr) dPriv->driContextPriv->driverPrivate;
+      ctx = rmesa->glCtx;
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+         if ( rmesa->doPageFlip ) {
+            r128PageFlip( dPriv );
+         }
+         else {
+            r128CopyBuffer( dPriv );
+         }
+      }
+   }
+   else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+   }
+}
+
+
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+r128InitDriver( __DRIscreen *sPriv )
+{
+   sPriv->private = (void *) r128CreateScreen( sPriv );
+
+   if ( !sPriv->private ) {
+      r128DestroyScreen( sPriv );
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+static const __DRIconfig **
+r128FillInModes( __DRIscreen *psp,
+		 unsigned pixel_bits, unsigned depth_bits,
+		 unsigned stencil_bits, GLboolean have_back_buffer )
+{
+    __DRIconfig **configs;
+    __GLcontextModes * m;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    GLenum fb_format;
+    GLenum fb_type;
+    int i;
+
+    /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+     * enough to add support.  Basically, if a context is created with an
+     * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+     * will never be used.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */
+    };
+
+    uint8_t depth_bits_array[2];
+    uint8_t stencil_bits_array[2];
+    uint8_t msaa_samples_array[1];
+
+    depth_bits_array[0] = depth_bits;
+    depth_bits_array[1] = depth_bits;
+    
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.  It will be a sw fallback, but some apps won't
+     * care about that.
+     */
+    stencil_bits_array[0] = 0;
+    stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    msaa_samples_array[0] = 0;
+
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+
+    if ( pixel_bits == 16 ) {
+        fb_format = GL_RGB;
+        fb_type = GL_UNSIGNED_SHORT_5_6_5;
+    }
+    else {
+        fb_format = GL_BGR;
+        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+    }
+
+    configs = driCreateConfigs(fb_format, fb_type,
+                               depth_bits_array, stencil_bits_array,
+                               depth_buffer_factor, back_buffer_modes,
+                               back_buffer_factor,
+                               msaa_samples_array, 1, GL_TRUE);
+    if (configs == NULL) {
+        fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+                __LINE__);
+        return NULL;
+    }
+
+    /* Mark the visual as slow if there are "fake" stencil bits.
+     */
+    for (i = 0; configs[i]; i++) {
+        m = &configs[i]->modes;
+        if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+            m->visualRating = GLX_SLOW_CONFIG;
+        }
+    }
+
+    return (const __DRIconfig **) configs;
+}
+
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **
+r128InitScreen(__DRIscreen *psp)
+{
+   static const __DRIversion ddx_expected = { 4, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 2, 2, 0 };
+   R128DRIPtr dri_priv = (R128DRIPtr) psp->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions2( "Rage128",
+				      &psp->dri_version, & dri_expected,
+				      &psp->ddx_version, & ddx_expected,
+				      &psp->drm_version, & drm_expected ) )
+      return NULL;
+
+   if (!r128InitDriver(psp))
+       return NULL;
+
+   return r128FillInModes( psp,
+			   dri_priv->bpp,
+			   (dri_priv->bpp == 16) ? 16 : 24,
+			   (dri_priv->bpp == 16) ? 0  : 8,
+			   (dri_priv->backOffset != dri_priv->depthOffset) );
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = r128InitScreen,
+   .DestroyScreen   = r128DestroyScreen,
+   .CreateContext   = r128CreateContext,
+   .DestroyContext  = r128DestroyContext,
+   .CreateBuffer    = r128CreateBuffer,
+   .DestroyBuffer   = r128DestroyBuffer,
+   .SwapBuffers     = r128SwapBuffers,
+   .MakeCurrent     = r128MakeCurrent,
+   .UnbindContext   = r128UnbindContext,
+   .GetSwapInfo     = NULL,
+   .GetDrawableMSC  = driDrawableGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/r128/r128_screen.h b/src/mesa/drivers/dri/r128/r128_screen.h
new file mode 100644
index 0000000000..8d450adff3
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_screen.h
@@ -0,0 +1,85 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#ifndef __R128_SCREEN_H__
+#define __R128_SCREEN_H__
+
+#include "xmlconfig.h"
+
+typedef struct {
+   drm_handle_t handle;			/* Handle to the DRM region */
+   drmSize size;			/* Size of the DRM region */
+   unsigned char *map;			/* Mapping of the DRM region */
+} r128RegionRec, *r128RegionPtr;
+
+typedef struct {
+
+   GLint chipset;
+   GLint cpp;
+   GLint IsPCI;				/* Current card is a PCI card */
+   GLint AGPMode;
+   unsigned int irq;			/* IRQ number (0 means none) */
+
+   GLuint frontOffset;
+   GLuint frontPitch;
+   GLuint backOffset;
+   GLuint backPitch;
+
+   GLuint depthOffset;
+   GLuint depthPitch;
+   GLuint spanOffset;
+
+    /* Shared texture data */
+   GLint numTexHeaps;
+   GLint texOffset[R128_NR_TEX_HEAPS];
+   GLint texSize[R128_NR_TEX_HEAPS];
+   GLint logTexGranularity[R128_NR_TEX_HEAPS];
+
+   r128RegionRec mmio;
+   r128RegionRec agpTextures;
+
+   drmBufMapPtr buffers;
+
+   __DRIscreen *driScreen;
+   unsigned int sarea_priv_offset;
+
+   /* Configuration cache with default values for all contexts */
+   driOptionCache optionCache;
+
+   const __DRIextension *extensions[4];
+
+} r128ScreenRec, *r128ScreenPtr;
+
+
+#endif /* __R128_SCREEN_H__ */
diff --git a/src/mesa/drivers/dri/r128/r128_span.c b/src/mesa/drivers/dri/r128/r128_span.c
new file mode 100644
index 0000000000..2fbe93c590
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_span.c
@@ -0,0 +1,449 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#include "r128_context.h"
+#include "r128_ioctl.h"
+#include "r128_span.h"
+
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define HAVE_HW_DEPTH_SPANS	1
+#define HAVE_HW_DEPTH_PIXELS	1
+#define HAVE_HW_STENCIL_SPANS	1
+#define HAVE_HW_STENCIL_PIXELS	1
+
+#define LOCAL_VARS							\
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);				\
+   __DRIscreen *sPriv = rmesa->driScreen;			\
+   __DRIdrawable *dPriv = rmesa->driDrawable;			\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;			\
+   GLuint height = dPriv->h;						\
+   GLuint p;								\
+   (void) p;
+
+#define LOCAL_DEPTH_VARS						\
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);				\
+   r128ScreenPtr r128scrn = rmesa->r128Screen;				\
+   __DRIscreen *sPriv = rmesa->driScreen;			\
+   __DRIdrawable *dPriv = rmesa->driDrawable;			\
+   GLuint height = dPriv->h;						\
+   (void) r128scrn; (void) sPriv; (void) height
+
+#define LOCAL_STENCIL_VARS	LOCAL_DEPTH_VARS
+
+#define Y_FLIP( _y )		(height - _y - 1)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    r128##x##_RGB565
+#define TAG2(x,y) r128##x##_RGB565##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset		\
+     + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
+
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    r128##x##_ARGB8888
+#define TAG2(x,y) r128##x##_ARGB8888##y
+#define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset		\
+     + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
+#include "spantmp2.h"
+
+/* Idling in the depth/stencil span functions:
+ * For writes, the kernel reads from the given user-space buffer at dispatch
+ * time, and then writes to the depth buffer asynchronously.
+ * For reads, the kernel reads from the depth buffer and writes to the span
+ * temporary asynchronously.
+ * So, if we're going to read from the span temporary, we need to idle before
+ * doing so.  But we don't need to idle after write, because the CPU won't
+ * be accessing the destination, only the accelerator (through 3d rendering or
+ * depth span reads)
+ * However, due to interactions from pixel cache between 2d (what we do with
+ * depth) and 3d (all other parts of the system), we idle at the begin and end
+ * of a set of span operations, which should cover the pix cache issue.
+ * Except, we still have major issues, as shown by no_rast=true glxgears, or
+ * stencilwrap.
+ */
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* These functions require locking */
+#undef HW_LOCK
+#undef HW_UNLOCK
+#define HW_LOCK()    LOCK_HARDWARE(R128_CONTEXT(ctx));
+#define HW_UNLOCK()  UNLOCK_HARDWARE(R128_CONTEXT(ctx));
+
+/* 16-bit depth buffer functions
+ */
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH_SPAN()						\
+do {									\
+   r128WriteDepthSpanLocked( rmesa, n,					\
+			     x + dPriv->x,				\
+			     y + dPriv->y,				\
+			     depth, mask );				\
+} while (0)
+
+#define WRITE_DEPTH_PIXELS()						\
+do {									\
+   GLint ox[MAX_WIDTH];							\
+   GLint oy[MAX_WIDTH];							\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      ox[i] = x[i] + dPriv->x;						\
+      oy[i] = Y_FLIP( y[i] ) + dPriv->y;				\
+   }									\
+   r128WriteDepthPixelsLocked( rmesa, n, ox, oy, depth, mask );		\
+} while (0)
+
+#define READ_DEPTH_SPAN()						\
+do {									\
+   GLushort *buf = (GLushort *)((GLubyte *)sPriv->pFB +			\
+				r128scrn->spanOffset);			\
+   GLint i;								\
+									\
+   r128ReadDepthSpanLocked( rmesa, n,					\
+			    x + dPriv->x,				\
+			    y + dPriv->y );				\
+   r128WaitForIdleLocked( rmesa );					\
+									\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      depth[i] = buf[i];						\
+   }									\
+} while (0)
+
+#define READ_DEPTH_PIXELS()						\
+do {									\
+   GLushort *buf = (GLushort *)((GLubyte *)sPriv->pFB +			\
+				r128scrn->spanOffset);			\
+   GLint i, remaining = n;						\
+									\
+   while ( remaining > 0 ) {						\
+      GLint ox[128];							\
+      GLint oy[128];							\
+      GLint count;							\
+									\
+      if ( remaining <= 128 ) {						\
+	 count = remaining;						\
+      } else {								\
+	 count = 128;							\
+      }									\
+      for ( i = 0 ; i < count ; i++ ) {					\
+	 ox[i] = x[i] + dPriv->x;					\
+	 oy[i] = Y_FLIP( y[i] ) + dPriv->y;				\
+      }									\
+									\
+      r128ReadDepthPixelsLocked( rmesa, count, ox, oy );		\
+      r128WaitForIdleLocked( rmesa );					\
+									\
+      for ( i = 0 ; i < count ; i++ ) {					\
+	 depth[i] = buf[i];						\
+      }									\
+      depth += count;							\
+      x += count;							\
+      y += count;							\
+      remaining -= count;						\
+   }									\
+} while (0)
+
+#define TAG(x) r128##x##_z16
+#include "depthtmp.h"
+
+
+/* 24-bit depth, 8-bit stencil buffer functions
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH_SPAN()						\
+do {									\
+   GLuint buf[n];							\
+   GLint i;								\
+   GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+				r128scrn->spanOffset);			\
+   r128ReadDepthSpanLocked( rmesa, n,					\
+			    x + dPriv->x,				\
+			    y + dPriv->y );				\
+   r128WaitForIdleLocked( rmesa );					\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff);	\
+   }									\
+   r128WriteDepthSpanLocked( rmesa, n,					\
+			     x + dPriv->x,				\
+			     y + dPriv->y,				\
+			     buf, mask );				\
+} while (0)
+
+#define WRITE_DEPTH_PIXELS()						\
+do {									\
+   GLuint buf[n];							\
+   GLint ox[MAX_WIDTH];							\
+   GLint oy[MAX_WIDTH];							\
+   GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+				r128scrn->spanOffset);			\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      ox[i] = x[i] + dPriv->x;						\
+      oy[i] = Y_FLIP( y[i] ) + dPriv->y;				\
+   }									\
+   r128ReadDepthPixelsLocked( rmesa, n, ox, oy );			\
+   r128WaitForIdleLocked( rmesa );					\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff);	\
+   }									\
+   r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask );		\
+} while (0)
+
+#define READ_DEPTH_SPAN()						\
+do {									\
+   GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+			    r128scrn->spanOffset);			\
+   GLint i;								\
+									\
+   /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/	\
+   r128ReadDepthSpanLocked( rmesa, n,					\
+			    x + dPriv->x,				\
+			    y + dPriv->y );				\
+   r128WaitForIdleLocked( rmesa );					\
+									\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      depth[i] = buf[i] & 0x00ffffff;					\
+   }									\
+} while (0)
+
+#define READ_DEPTH_PIXELS()						\
+do {									\
+   GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+			    r128scrn->spanOffset);			\
+   GLint i, remaining = n;						\
+									\
+   while ( remaining > 0 ) {						\
+      GLint ox[128];							\
+      GLint oy[128];							\
+      GLint count;							\
+									\
+      if ( remaining <= 128 ) {						\
+	 count = remaining;						\
+      } else {								\
+	 count = 128;							\
+      }									\
+      for ( i = 0 ; i < count ; i++ ) {					\
+	 ox[i] = x[i] + dPriv->x;					\
+	 oy[i] = Y_FLIP( y[i] ) + dPriv->y;				\
+      }									\
+									\
+      r128ReadDepthPixelsLocked( rmesa, count, ox, oy );		\
+      r128WaitForIdleLocked( rmesa );					\
+									\
+      for ( i = 0 ; i < count ; i++ ) {					\
+	 depth[i] = buf[i] & 0x00ffffff;				\
+      }									\
+      depth += count;							\
+      x += count;							\
+      y += count;							\
+      remaining -= count;						\
+   }									\
+} while (0)
+
+#define TAG(x) r128##x##_z24_s8
+#include "depthtmp.h"
+
+
+
+/* ================================================================
+ * Stencil buffer
+ */
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ */
+#define WRITE_STENCIL_SPAN()						\
+do {									\
+   GLuint buf[n];							\
+   GLint i;								\
+   GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+				r128scrn->spanOffset);			\
+   r128ReadDepthSpanLocked( rmesa, n,					\
+			    x + dPriv->x,				\
+			    y + dPriv->y );				\
+   r128WaitForIdleLocked( rmesa );					\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24);		\
+   }									\
+   r128WriteDepthSpanLocked( rmesa, n,					\
+			     x + dPriv->x,				\
+			     y + dPriv->y,				\
+			     buf, mask );				\
+} while (0)
+
+#define WRITE_STENCIL_PIXELS()						\
+do {									\
+   GLuint buf[n];							\
+   GLint ox[MAX_WIDTH];							\
+   GLint oy[MAX_WIDTH];							\
+   GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+				r128scrn->spanOffset);			\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      ox[i] = x[i] + dPriv->x;						\
+      oy[i] = Y_FLIP( y[i] ) + dPriv->y;				\
+   }									\
+   r128ReadDepthPixelsLocked( rmesa, n, ox, oy );			\
+   r128WaitForIdleLocked( rmesa );					\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24);		\
+   }									\
+   r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask );		\
+} while (0)
+
+#define READ_STENCIL_SPAN()						\
+do {									\
+   GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+			    r128scrn->spanOffset);			\
+   GLint i;								\
+									\
+   /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/	\
+   r128ReadDepthSpanLocked( rmesa, n,					\
+			    x + dPriv->x,				\
+			    y + dPriv->y );				\
+   r128WaitForIdleLocked( rmesa );					\
+									\
+   for ( i = 0 ; i < n ; i++ ) {					\
+      stencil[i] = (buf[i] & 0xff000000) >> 24;				\
+   }									\
+} while (0)
+
+#define READ_STENCIL_PIXELS()						\
+do {									\
+   GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB +			\
+			    r128scrn->spanOffset);			\
+   GLint i, remaining = n;						\
+									\
+   while ( remaining > 0 ) {						\
+      GLint ox[128];							\
+      GLint oy[128];							\
+      GLint count;							\
+									\
+      if ( remaining <= 128 ) {						\
+	 count = remaining;						\
+      } else {								\
+	 count = 128;							\
+      }									\
+      for ( i = 0 ; i < count ; i++ ) {					\
+	 ox[i] = x[i] + dPriv->x;					\
+	 oy[i] = Y_FLIP( y[i] ) + dPriv->y;				\
+      }									\
+									\
+      r128ReadDepthPixelsLocked( rmesa, count, ox, oy );		\
+      r128WaitForIdleLocked( rmesa );					\
+									\
+      for ( i = 0 ; i < count ; i++ ) {					\
+	 stencil[i] = (buf[i] & 0xff000000) >> 24;			\
+      }									\
+      stencil += count;							\
+      x += count;							\
+      y += count;							\
+      remaining -= count;						\
+   }									\
+} while (0)
+
+#define TAG(x) radeon##x##_z24_s8
+#include "stenciltmp.h"
+
+static void
+r128SpanRenderStart( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   FLUSH_BATCH(rmesa);
+   LOCK_HARDWARE(rmesa);
+   r128WaitForIdleLocked( rmesa );
+}
+
+static void
+r128SpanRenderFinish( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   _swrast_flush( ctx );
+   r128WaitForIdleLocked( rmesa );
+   UNLOCK_HARDWARE( rmesa );
+}
+
+void r128DDInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart	= r128SpanRenderStart;
+   swdd->SpanRenderFinish	= r128SpanRenderFinish;
+}
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+r128SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.Format == MESA_FORMAT_RGB565) {
+      r128InitPointers_RGB565(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_ARGB8888) {
+      r128InitPointers_ARGB8888(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_Z16) {
+      r128InitDepthPointers_z16(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_S8_Z24) {
+      r128InitDepthPointers_z24_s8(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_S8) {
+      radeonInitStencilPointers_z24_s8(&drb->Base);
+   }
+}
diff --git a/src/mesa/drivers/dri/r128/r128_span.h b/src/mesa/drivers/dri/r128/r128_span.h
new file mode 100644
index 0000000000..9af4058129
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_span.h
@@ -0,0 +1,45 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#ifndef __R128_SPAN_H__
+#define __R128_SPAN_H__
+
+#include "drirenderbuffer.h"
+
+extern void r128DDInitSpanFuncs( GLcontext *ctx );
+
+extern void
+r128SetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+#endif
diff --git a/src/mesa/drivers/dri/r128/r128_state.c b/src/mesa/drivers/dri/r128/r128_state.c
new file mode 100644
index 0000000000..4d773feaaa
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_state.c
@@ -0,0 +1,1441 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "r128_context.h"
+#include "r128_state.h"
+#include "r128_ioctl.h"
+#include "r128_tris.h"
+#include "r128_tex.h"
+
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "drirenderbuffer.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+
+/**
+ * Calculate the hardware blend factor setting.  This same function is used
+ * for source and destination of both alpha and RGB.  
+ *
+ * \returns
+ * The hardware register value for the specified blend factor.  This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen.  Determine if these
+ * cases can be removed.
+ */
+static int blend_factor( r128ContextPtr rmesa, GLenum factor, GLboolean is_src )
+{
+   int   func;
+
+   switch ( factor ) {
+   case GL_ZERO:
+      func = R128_ALPHA_BLEND_ZERO;
+      break;
+   case GL_ONE:
+      func = R128_ALPHA_BLEND_ONE;
+      break;
+
+   case GL_SRC_COLOR:
+      func = R128_ALPHA_BLEND_SRCCOLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      func = R128_ALPHA_BLEND_INVSRCCOLOR;
+      break;
+   case GL_SRC_ALPHA:
+      func = R128_ALPHA_BLEND_SRCALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      func = R128_ALPHA_BLEND_INVSRCALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      func = (is_src) ? R128_ALPHA_BLEND_SAT : R128_ALPHA_BLEND_ZERO;
+      break;
+
+   case GL_DST_COLOR:
+      func = R128_ALPHA_BLEND_DSTCOLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      func = R128_ALPHA_BLEND_INVDSTCOLOR;
+      break;
+   case GL_DST_ALPHA:
+      func = R128_ALPHA_BLEND_DSTALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      func = R128_ALPHA_BLEND_INVDSTALPHA;
+      break;
+
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+   default:
+      FALLBACK( rmesa, R128_FALLBACK_BLEND_FUNC, GL_TRUE );
+      func = (is_src) ? R128_ALPHA_BLEND_ONE : R128_ALPHA_BLEND_ZERO;
+      break;
+   }
+   
+   return func;
+}
+
+
+static void r128UpdateAlphaMode( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint a = rmesa->setup.misc_3d_state_cntl_reg;
+   GLuint t = rmesa->setup.tex_cntl_c;
+
+   if ( ctx->Color.AlphaEnabled ) {
+      GLubyte ref;
+
+      CLAMPED_FLOAT_TO_UBYTE(ref, ctx->Color.AlphaRef);
+
+      a &= ~(R128_ALPHA_TEST_MASK | R128_REF_ALPHA_MASK);
+
+      switch ( ctx->Color.AlphaFunc ) {
+      case GL_NEVER:
+	 a |= R128_ALPHA_TEST_NEVER;
+	 break;
+      case GL_LESS:
+	 a |= R128_ALPHA_TEST_LESS;
+         break;
+      case GL_LEQUAL:
+	 a |= R128_ALPHA_TEST_LESSEQUAL;
+	 break;
+      case GL_EQUAL:
+	 a |= R128_ALPHA_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 a |= R128_ALPHA_TEST_GREATEREQUAL;
+	 break;
+      case GL_GREATER:
+	 a |= R128_ALPHA_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 a |= R128_ALPHA_TEST_NEQUAL;
+	 break;
+      case GL_ALWAYS:
+	 a |= R128_ALPHA_TEST_ALWAYS;
+	 break;
+      }
+
+      a |= ref & R128_REF_ALPHA_MASK;
+      t |= R128_ALPHA_TEST_ENABLE;
+   } else {
+      t &= ~R128_ALPHA_TEST_ENABLE;
+   }
+
+   FALLBACK( rmesa, R128_FALLBACK_BLEND_FUNC, GL_FALSE );
+
+   if ( ctx->Color.BlendEnabled ) {
+      a &= ~((R128_ALPHA_BLEND_MASK << R128_ALPHA_BLEND_SRC_SHIFT) |
+	     (R128_ALPHA_BLEND_MASK << R128_ALPHA_BLEND_DST_SHIFT)
+	     | R128_ALPHA_COMB_FCN_MASK);
+
+      a |= blend_factor( rmesa, ctx->Color.BlendSrcRGB, GL_TRUE ) 
+	  << R128_ALPHA_BLEND_SRC_SHIFT;
+      a |= blend_factor( rmesa, ctx->Color.BlendDstRGB, GL_FALSE ) 
+	  << R128_ALPHA_BLEND_DST_SHIFT;
+
+      switch (ctx->Color.BlendEquationRGB) {
+      case GL_FUNC_ADD:
+	 a |= R128_ALPHA_COMB_ADD_CLAMP;
+	 break;
+      case GL_FUNC_SUBTRACT:
+	 a |= R128_ALPHA_COMB_SUB_SRC_DST_CLAMP;
+	 break;
+      default:
+	 FALLBACK( rmesa, R128_FALLBACK_BLEND_EQ, GL_TRUE );
+      }
+
+      t |=  R128_ALPHA_ENABLE;
+   } else {
+      t &= ~R128_ALPHA_ENABLE;
+   }
+
+   if ( rmesa->setup.misc_3d_state_cntl_reg != a ) {
+      rmesa->setup.misc_3d_state_cntl_reg = a;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_MASKS;
+   }
+   if ( rmesa->setup.tex_cntl_c != t ) {
+      rmesa->setup.tex_cntl_c = t;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_MASKS;
+   }
+}
+
+static void r128DDAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_ALPHA;
+}
+
+static void r128DDBlendEquationSeparate( GLcontext *ctx, 
+					 GLenum modeRGB, GLenum modeA )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   assert( modeRGB == modeA );
+   FLUSH_BATCH( rmesa );
+
+   /* BlendEquation sets ColorLogicOpEnabled in an unexpected
+    * manner.
+    */
+   FALLBACK( R128_CONTEXT(ctx), R128_FALLBACK_LOGICOP,
+	     (ctx->Color.ColorLogicOpEnabled &&
+	      ctx->Color.LogicOp != GL_COPY));
+
+   /* Can only do blend addition, not min, max, subtract, etc. */
+   FALLBACK( R128_CONTEXT(ctx), R128_FALLBACK_BLEND_EQ,
+	     (modeRGB != GL_FUNC_ADD) && (modeRGB != GL_FUNC_SUBTRACT));
+
+   rmesa->new_state |= R128_NEW_ALPHA;
+}
+
+static void r128DDBlendFuncSeparate( GLcontext *ctx,
+				     GLenum sfactorRGB, GLenum dfactorRGB,
+				     GLenum sfactorA, GLenum dfactorA )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_ALPHA;
+}
+
+/* =============================================================
+ * Stencil
+ */
+
+static void
+r128DDStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                           GLint ref, GLuint mask )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << 0) |
+		     ((ctx->Stencil.ValueMask[0] & 0xff) << 16) |
+		     ((ctx->Stencil.WriteMask[0] & 0xff) << 24)); 
+   GLuint z = rmesa->setup.z_sten_cntl_c;
+
+   z &= ~R128_STENCIL_TEST_MASK;
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      z |= R128_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      z |= R128_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      z |= R128_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      z |= R128_STENCIL_TEST_LESSEQUAL;
+      break;
+   case GL_GREATER:
+      z |= R128_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      z |= R128_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      z |= R128_STENCIL_TEST_GREATEREQUAL;
+      break;
+   case GL_ALWAYS:
+      z |= R128_STENCIL_TEST_ALWAYS;
+      break;
+   }
+
+   if ( rmesa->setup.sten_ref_mask_c != refmask ) {
+      rmesa->setup.sten_ref_mask_c = refmask;
+      rmesa->dirty |= R128_UPLOAD_MASKS;
+   }
+   if ( rmesa->setup.z_sten_cntl_c != z ) {
+      rmesa->setup.z_sten_cntl_c = z;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+   }
+}
+
+static void
+r128DDStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << 0) |
+		     ((ctx->Stencil.ValueMask[0] & 0xff) << 16) |
+		     ((ctx->Stencil.WriteMask[0] & 0xff) << 24)); 
+
+   if ( rmesa->setup.sten_ref_mask_c != refmask ) {
+      rmesa->setup.sten_ref_mask_c = refmask;
+      rmesa->dirty |= R128_UPLOAD_MASKS;
+   }
+}
+
+static void r128DDStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+                                     GLenum zfail, GLenum zpass )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint z = rmesa->setup.z_sten_cntl_c;
+
+   if (!( ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24 ))
+      return;
+
+   z &= ~(R128_STENCIL_S_FAIL_MASK | R128_STENCIL_ZPASS_MASK |
+	  R128_STENCIL_ZFAIL_MASK);
+
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      z |= R128_STENCIL_S_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      z |= R128_STENCIL_S_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      z |= R128_STENCIL_S_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      z |= R128_STENCIL_S_FAIL_INC;
+      break;
+   case GL_DECR:
+      z |= R128_STENCIL_S_FAIL_DEC;
+      break;
+   case GL_INVERT:
+      z |= R128_STENCIL_S_FAIL_INV;
+      break;
+   case GL_INCR_WRAP:
+      z |= R128_STENCIL_S_FAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      z |= R128_STENCIL_S_FAIL_DEC_WRAP;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      z |= R128_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      z |= R128_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      z |= R128_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      z |= R128_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      z |= R128_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INVERT:
+      z |= R128_STENCIL_ZFAIL_INV;
+      break;
+   case GL_INCR_WRAP:
+      z |= R128_STENCIL_ZFAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      z |= R128_STENCIL_ZFAIL_DEC_WRAP;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      z |= R128_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      z |= R128_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      z |= R128_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      z |= R128_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      z |= R128_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INVERT:
+      z |= R128_STENCIL_ZPASS_INV;
+      break;
+   case GL_INCR_WRAP:
+      z |= R128_STENCIL_ZPASS_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      z |= R128_STENCIL_ZPASS_DEC_WRAP;
+      break;
+   }
+
+   if ( rmesa->setup.z_sten_cntl_c != z ) {
+      rmesa->setup.z_sten_cntl_c = z;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+   }
+}
+
+static void r128DDClearStencil( GLcontext *ctx, GLint s )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   if (ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24) {
+      rmesa->ClearDepth &= 0x00ffffff;
+      rmesa->ClearDepth |= ctx->Stencil.Clear << 24;
+   }
+}
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void r128UpdateZMode( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint z = rmesa->setup.z_sten_cntl_c;
+   GLuint t = rmesa->setup.tex_cntl_c;
+
+   if ( ctx->Depth.Test ) {
+      z &= ~R128_Z_TEST_MASK;
+
+      switch ( ctx->Depth.Func ) {
+      case GL_NEVER:
+	 z |= R128_Z_TEST_NEVER;
+	 break;
+      case GL_ALWAYS:
+	 z |= R128_Z_TEST_ALWAYS;
+	 break;
+      case GL_LESS:
+	 z |= R128_Z_TEST_LESS;
+	 break;
+      case GL_LEQUAL:
+	 z |= R128_Z_TEST_LESSEQUAL;
+	 break;
+      case GL_EQUAL:
+	 z |= R128_Z_TEST_EQUAL;
+	 break;
+      case GL_GEQUAL:
+	 z |= R128_Z_TEST_GREATEREQUAL;
+	 break;
+      case GL_GREATER:
+	 z |= R128_Z_TEST_GREATER;
+	 break;
+      case GL_NOTEQUAL:
+	 z |= R128_Z_TEST_NEQUAL;
+	 break;
+      }
+
+      t |=  R128_Z_ENABLE;
+   } else {
+      t &= ~R128_Z_ENABLE;
+   }
+
+   if ( ctx->Depth.Mask ) {
+      t |=  R128_Z_WRITE_ENABLE;
+   } else {
+      t &= ~R128_Z_WRITE_ENABLE;
+   }
+
+   if ( rmesa->setup.z_sten_cntl_c != z ) {
+      rmesa->setup.z_sten_cntl_c = z;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+   }
+   if ( rmesa->setup.tex_cntl_c != t ) {
+      rmesa->setup.tex_cntl_c = t;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+   }
+}
+
+static void r128DDDepthFunc( GLcontext *ctx, GLenum func )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_DEPTH;
+}
+
+static void r128DDDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_DEPTH;
+}
+
+static void r128DDClearDepth( GLcontext *ctx, GLclampd d )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   switch ( rmesa->setup.z_sten_cntl_c &  R128_Z_PIX_WIDTH_MASK ) {
+   case R128_Z_PIX_WIDTH_16:
+      rmesa->ClearDepth = d * 0x0000ffff;
+      break;
+   case R128_Z_PIX_WIDTH_24:
+      rmesa->ClearDepth = d * 0x00ffffff;
+      rmesa->ClearDepth |= ctx->Stencil.Clear << 24;
+      break;
+   case R128_Z_PIX_WIDTH_32:
+      rmesa->ClearDepth = d * 0xffffffff;
+      break;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+static void r128UpdateFogAttrib( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint t = rmesa->setup.tex_cntl_c;
+   GLubyte c[4];
+   GLuint col;
+
+   if ( ctx->Fog.Enabled ) {
+      t |=  R128_FOG_ENABLE;
+   } else {
+      t &= ~R128_FOG_ENABLE;
+   }
+
+   c[0] = FLOAT_TO_UBYTE( ctx->Fog.Color[0] );
+   c[1] = FLOAT_TO_UBYTE( ctx->Fog.Color[1] );
+   c[2] = FLOAT_TO_UBYTE( ctx->Fog.Color[2] );
+
+   col = r128PackColor( 4, c[0], c[1], c[2], 0 );
+
+   if ( rmesa->setup.fog_color_c != col ) {
+      rmesa->setup.fog_color_c = col;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+   }
+   if ( rmesa->setup.tex_cntl_c != t ) {
+      rmesa->setup.tex_cntl_c = t;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+   }
+}
+
+static void r128DDFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_FOG;
+}
+
+
+/* =============================================================
+ * Clipping
+ */
+
+static void r128UpdateClipping( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   if ( rmesa->driDrawable ) {
+      __DRIdrawable *drawable = rmesa->driDrawable;
+      int x1 = 0;
+      int y1 = 0;
+      int x2 = drawable->w - 1;
+      int y2 = drawable->h - 1;
+
+      if ( ctx->Scissor.Enabled ) {
+	 if ( ctx->Scissor.X > x1 ) {
+	    x1 = ctx->Scissor.X;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - ctx->Scissor.Height > y1 ) {
+	    y1 = drawable->h - ctx->Scissor.Y - ctx->Scissor.Height;
+	 }
+	 if ( ctx->Scissor.X + ctx->Scissor.Width - 1 < x2 ) {
+	    x2 = ctx->Scissor.X + ctx->Scissor.Width - 1;
+	 }
+	 if ( drawable->h - ctx->Scissor.Y - 1 < y2 ) {
+	    y2 = drawable->h - ctx->Scissor.Y - 1;
+	 }
+      }
+
+      x1 += drawable->x;
+      y1 += drawable->y;
+      x2 += drawable->x;
+      y2 += drawable->y;
+
+      /* Clamp values to screen to avoid wrapping problems */
+      if ( x1 < 0 )
+         x1 = 0;
+      else if ( x1 >= rmesa->driScreen->fbWidth )
+         x1 = rmesa->driScreen->fbWidth - 1;
+      if ( y1 < 0 )
+         y1 = 0;
+      else if ( y1 >= rmesa->driScreen->fbHeight )
+         y1 = rmesa->driScreen->fbHeight - 1;
+      if ( x2 < 0 )
+         x2 = 0;
+      else if ( x2 >= rmesa->driScreen->fbWidth )
+         x2 = rmesa->driScreen->fbWidth - 1;
+      if ( y2 < 0 )
+         y2 = 0;
+      else if ( y2 >= rmesa->driScreen->fbHeight )
+         y2 = rmesa->driScreen->fbHeight - 1;
+
+      rmesa->setup.sc_top_left_c     = (((y1 & 0x3FFF) << 16) | (x1 & 0x3FFF));
+      rmesa->setup.sc_bottom_right_c = (((y2 & 0x3FFF) << 16) | (x2 & 0x3FFF));
+
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+   }
+}
+
+static void r128DDScissor( GLcontext *ctx,
+			   GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_CLIP;
+}
+
+
+/* =============================================================
+ * Culling
+ */
+
+static void r128UpdateCull( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint f = rmesa->setup.pm4_vc_fpu_setup;
+
+   f &= ~R128_FRONT_DIR_MASK;
+
+   switch ( ctx->Polygon.FrontFace ) {
+   case GL_CW:
+      f |= R128_FRONT_DIR_CW;
+      break;
+   case GL_CCW:
+      f |= R128_FRONT_DIR_CCW;
+      break;
+   }
+
+   f |= R128_BACKFACE_SOLID | R128_FRONTFACE_SOLID;
+
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+	 f &= ~R128_FRONTFACE_SOLID;
+	 break;
+      case GL_BACK:
+	 f &= ~R128_BACKFACE_SOLID;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 f &= ~(R128_BACKFACE_SOLID |
+		R128_FRONTFACE_SOLID);
+	 break;
+      }
+   }
+
+   if ( 1 || rmesa->setup.pm4_vc_fpu_setup != f ) {
+      rmesa->setup.pm4_vc_fpu_setup = f;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_SETUP;
+   }
+}
+
+static void r128DDCullFace( GLcontext *ctx, GLenum mode )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_CULL;
+}
+
+static void r128DDFrontFace( GLcontext *ctx, GLenum mode )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_CULL;
+}
+
+
+/* =============================================================
+ * Masks
+ */
+
+static void r128UpdateMasks( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   GLuint mask = r128PackColor( rmesa->r128Screen->cpp,
+				ctx->Color.ColorMask[0][RCOMP],
+				ctx->Color.ColorMask[0][GCOMP],
+				ctx->Color.ColorMask[0][BCOMP],
+				ctx->Color.ColorMask[0][ACOMP] );
+
+   if ( rmesa->setup.plane_3d_mask_c != mask ) {
+      rmesa->setup.plane_3d_mask_c = mask;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_MASKS;
+   }
+}
+
+static void r128DDColorMask( GLcontext *ctx,
+			     GLboolean r, GLboolean g,
+			     GLboolean b, GLboolean a )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+   rmesa->new_state |= R128_NEW_MASKS;
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+static void updateSpecularLighting( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint t = rmesa->setup.tex_cntl_c;
+
+   if ( NEED_SECONDARY_COLOR( ctx ) ) {
+      if (ctx->Light.ShadeModel == GL_FLAT) {
+         /* R128 can't do flat-shaded separate specular */
+         t &= ~R128_SPEC_LIGHT_ENABLE;
+         FALLBACK( rmesa, R128_FALLBACK_SEP_SPECULAR, GL_TRUE );
+      }
+      else {
+         t |= R128_SPEC_LIGHT_ENABLE;
+         FALLBACK( rmesa, R128_FALLBACK_SEP_SPECULAR, GL_FALSE );
+      }
+   }
+   else {
+      t &= ~R128_SPEC_LIGHT_ENABLE;
+      FALLBACK( rmesa, R128_FALLBACK_SEP_SPECULAR, GL_FALSE );
+   }
+
+   if ( rmesa->setup.tex_cntl_c != t ) {
+      rmesa->setup.tex_cntl_c = t;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+      rmesa->dirty |= R128_UPLOAD_SETUP;
+      rmesa->new_state |= R128_NEW_CONTEXT;
+   }
+}
+
+
+static void r128DDLightModelfv( GLcontext *ctx, GLenum pname,
+				const GLfloat *param )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   if ( pname == GL_LIGHT_MODEL_COLOR_CONTROL ) {
+      FLUSH_BATCH( rmesa );
+      updateSpecularLighting(ctx);
+   }
+
+   if ( pname == GL_LIGHT_MODEL_TWO_SIDE ) {
+      FLUSH_BATCH( rmesa );
+      r128ChooseRenderState( ctx );
+   }
+}
+
+static void r128DDShadeModel( GLcontext *ctx, GLenum mode )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint s = rmesa->setup.pm4_vc_fpu_setup;
+
+   s &= ~R128_FPU_COLOR_MASK;
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= R128_FPU_COLOR_FLAT;
+      break;
+   case GL_SMOOTH:
+      s |= R128_FPU_COLOR_GOURAUD;
+      break;
+   default:
+      return;
+   }
+
+   updateSpecularLighting(ctx);
+
+   if ( rmesa->setup.pm4_vc_fpu_setup != s ) {
+      FLUSH_BATCH( rmesa );
+      rmesa->setup.pm4_vc_fpu_setup = s;
+
+      rmesa->new_state |= R128_NEW_CONTEXT;
+      rmesa->dirty |= R128_UPLOAD_SETUP;
+   }
+}
+
+
+/* =============================================================
+ * Window position
+ */
+
+static void r128UpdateWindow( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   int x = rmesa->driDrawable->x;
+   int y = rmesa->driDrawable->y;
+   struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+   driRenderbuffer *drb = (driRenderbuffer *) rb;
+
+   rmesa->setup.window_xy_offset = (((y & 0xFFF) << R128_WINDOW_Y_SHIFT) |
+				    ((x & 0xFFF) << R128_WINDOW_X_SHIFT));
+
+   rmesa->setup.dst_pitch_offset_c = (((drb->flippedPitch/8) << 21) |
+                                      (drb->flippedOffset >> 5));
+
+
+   rmesa->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_WINDOW;
+}
+
+
+/* =============================================================
+ * Viewport
+ */
+
+static void r128CalcViewport( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = rmesa->hw_viewport;
+
+   /* See also r128_translate_vertex.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + rmesa->driDrawable->h + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * rmesa->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * rmesa->depth_scale;
+}
+
+static void r128Viewport( GLcontext *ctx,
+			  GLint x, GLint y,
+			  GLsizei width, GLsizei height )
+{
+   r128CalcViewport( ctx );
+}
+
+static void r128DepthRange( GLcontext *ctx,
+			    GLclampd nearval, GLclampd farval )
+{
+   r128CalcViewport( ctx );
+}
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void r128DDClearColor( GLcontext *ctx,
+			      const GLfloat color[4] )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLubyte c[4];
+
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+   rmesa->ClearColor = r128PackColor( rmesa->r128Screen->cpp,
+				      c[0], c[1], c[2], c[3] );
+}
+
+static void r128DDLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   if ( ctx->Color.ColorLogicOpEnabled ) {
+      FLUSH_BATCH( rmesa );
+
+      FALLBACK( rmesa, R128_FALLBACK_LOGICOP, opcode != GL_COPY );
+   }
+}
+
+static void r128DDDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+      FALLBACK( rmesa, R128_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+   else {
+      switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+      case BUFFER_FRONT_LEFT:
+      case BUFFER_BACK_LEFT:
+         FALLBACK( rmesa, R128_FALLBACK_DRAW_BUFFER, GL_FALSE );
+         break;
+      default:
+         /* GL_NONE or GL_FRONT_AND_BACK or stereo left&right, etc */
+         FALLBACK( rmesa, R128_FALLBACK_DRAW_BUFFER, GL_TRUE );
+         break;
+      }
+   }
+
+   rmesa->new_state |= R128_NEW_WINDOW;
+}
+
+static void r128DDReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+
+/* =============================================================
+ * Polygon stipple
+ */
+
+static void r128DDPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint stipple[32], i;
+   drm_r128_stipple_t stippleRec;
+
+   for (i = 0; i < 32; i++) {
+      stipple[31 - i] = ((mask[i*4+0] << 24) |
+                         (mask[i*4+1] << 16) |
+                         (mask[i*4+2] << 8)  |
+                         (mask[i*4+3]));
+   }
+
+   FLUSH_BATCH( rmesa );
+   LOCK_HARDWARE( rmesa );
+
+   stippleRec.mask = stipple;
+   drmCommandWrite( rmesa->driFd, DRM_R128_STIPPLE, 
+                    &stippleRec, sizeof(stippleRec) );
+
+   UNLOCK_HARDWARE( rmesa );
+
+   rmesa->new_state |= R128_NEW_CONTEXT;
+   rmesa->dirty |= R128_UPLOAD_CONTEXT;
+}
+
+
+/* =============================================================
+ * Render mode
+ */
+
+static void r128DDRenderMode( GLcontext *ctx, GLenum mode )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   FALLBACK( rmesa, R128_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void r128DDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s = %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( cap ),
+	       state ? "GL_TRUE" : "GL_FALSE" );
+   }
+
+   switch ( cap ) {
+   case GL_ALPHA_TEST:
+      FLUSH_BATCH( rmesa );
+      rmesa->new_state |= R128_NEW_ALPHA;
+      break;
+
+   case GL_BLEND:
+      FLUSH_BATCH( rmesa );
+      rmesa->new_state |= R128_NEW_ALPHA;
+
+      /* For some reason enable(GL_BLEND) affects ColorLogicOpEnabled.
+       */
+      FALLBACK( rmesa, R128_FALLBACK_LOGICOP,
+		(ctx->Color.ColorLogicOpEnabled &&
+		 ctx->Color.LogicOp != GL_COPY));
+      break;
+
+   case GL_CULL_FACE:
+      FLUSH_BATCH( rmesa );
+      rmesa->new_state |= R128_NEW_CULL;
+      break;
+
+   case GL_DEPTH_TEST:
+      FLUSH_BATCH( rmesa );
+      rmesa->new_state |= R128_NEW_DEPTH;
+      break;
+
+   case GL_DITHER:
+      do {
+	 GLuint t = rmesa->setup.tex_cntl_c;
+	 FLUSH_BATCH( rmesa );
+
+	 if ( ctx->Color.DitherFlag ) {
+	    t |=  R128_DITHER_ENABLE;
+	 } else {
+	    t &= ~R128_DITHER_ENABLE;
+	 }
+
+	 if ( rmesa->setup.tex_cntl_c != t ) {
+	    rmesa->setup.tex_cntl_c = t;
+	    rmesa->dirty |= R128_UPLOAD_CONTEXT;
+	 }
+      } while (0);
+      break;
+
+   case GL_FOG:
+      FLUSH_BATCH( rmesa );
+      rmesa->new_state |= R128_NEW_FOG;
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      FLUSH_BATCH( rmesa );
+      FALLBACK( rmesa, R128_FALLBACK_LOGICOP,
+		state && ctx->Color.LogicOp != GL_COPY );
+      break;
+
+   case GL_LIGHTING:
+   case GL_COLOR_SUM_EXT:
+      updateSpecularLighting(ctx);
+      break;
+
+   case GL_SCISSOR_TEST:
+      FLUSH_BATCH( rmesa );
+      rmesa->scissor = state;
+      rmesa->new_state |= R128_NEW_CLIP;
+      break;
+
+   case GL_STENCIL_TEST:
+      FLUSH_BATCH( rmesa );
+      if ( ctx->Visual.stencilBits > 0 && ctx->Visual.depthBits == 24 ) {
+	 if ( state ) {
+	    rmesa->setup.tex_cntl_c |=  R128_STENCIL_ENABLE;
+	    /* Reset the fallback (if any) for bad stencil funcs */
+	    r128DDStencilOpSeparate( ctx, 0, ctx->Stencil.FailFunc[0],
+				     ctx->Stencil.ZFailFunc[0],
+				     ctx->Stencil.ZPassFunc[0] );
+	 } else {
+	    rmesa->setup.tex_cntl_c &= ~R128_STENCIL_ENABLE;
+	    FALLBACK( rmesa, R128_FALLBACK_STENCIL, GL_FALSE );
+	 }
+	 rmesa->dirty |= R128_UPLOAD_CONTEXT;
+      } else {
+	 FALLBACK( rmesa, R128_FALLBACK_STENCIL, state );
+      }
+      break;
+
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      FLUSH_BATCH( rmesa );
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      if ( rmesa->render_primitive == GL_TRIANGLES ) {
+	 FLUSH_BATCH( rmesa );
+	 rmesa->setup.dp_gui_master_cntl_c &= ~R128_GMC_BRUSH_NONE;
+	 if ( state ) {
+	    rmesa->setup.dp_gui_master_cntl_c |=
+	       R128_GMC_BRUSH_32x32_MONO_FG_LA;
+	 } else {
+	    rmesa->setup.dp_gui_master_cntl_c |=
+	       R128_GMC_BRUSH_SOLID_COLOR;
+	 }
+	 rmesa->new_state |= R128_NEW_CONTEXT;
+	 rmesa->dirty |= R128_UPLOAD_CONTEXT;
+      }
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+/* =============================================================
+ * State initialization, management
+ */
+
+static void r128DDPrintDirty( const char *msg, GLuint state )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    state,
+	    (state & R128_UPLOAD_CORE)		? "core, " : "",
+	    (state & R128_UPLOAD_CONTEXT)	? "context, " : "",
+	    (state & R128_UPLOAD_SETUP)		? "setup, " : "",
+	    (state & R128_UPLOAD_TEX0)		? "tex0, " : "",
+	    (state & R128_UPLOAD_TEX1)		? "tex1, " : "",
+	    (state & R128_UPLOAD_MASKS)		? "masks, " : "",
+	    (state & R128_UPLOAD_WINDOW)	? "window, " : "",
+	    (state & R128_UPLOAD_CLIPRECTS)	? "cliprects, " : "",
+	    (state & R128_REQUIRE_QUIESCENCE)	? "quiescence, " : "" );
+}
+
+/*
+ * Load the current context's state into the hardware.
+ *
+ * NOTE: Be VERY careful about ensuring the context state is marked for
+ * upload, the only place it shouldn't be uploaded is when the setup
+ * state has changed in ReducedPrimitiveChange as this comes right after
+ * a state update.
+ *
+ * Blits of any type should always upload the context and masks after
+ * they are done.
+ */
+void r128EmitHwStateLocked( r128ContextPtr rmesa )
+{
+   drm_r128_sarea_t *sarea = rmesa->sarea;
+   drm_r128_context_regs_t *regs = &(rmesa->setup);
+   const r128TexObjPtr t0 = rmesa->CurrentTexObj[0];
+   const r128TexObjPtr t1 = rmesa->CurrentTexObj[1];
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_MSG ) {
+      r128DDPrintDirty( "r128EmitHwStateLocked", rmesa->dirty );
+   }
+
+   if ( rmesa->dirty & (R128_UPLOAD_CONTEXT |
+			R128_UPLOAD_SETUP |
+			R128_UPLOAD_MASKS |
+			R128_UPLOAD_WINDOW |
+			R128_UPLOAD_CORE) ) {
+      memcpy( &sarea->context_state, regs, sizeof(sarea->context_state) );
+      
+      if( rmesa->dirty & R128_UPLOAD_CONTEXT )
+      {
+         /* One possible side-effect of uploading a new context is the
+          * setting of the R128_GMC_AUX_CLIP_DIS bit, which causes all
+          * auxilliary cliprects to be disabled. So the next command must
+          * upload them again. */
+         rmesa->dirty |= R128_UPLOAD_CLIPRECTS;
+      }
+   }
+
+   if ( (rmesa->dirty & R128_UPLOAD_TEX0) && t0 ) {
+      drm_r128_texture_regs_t *tex = &sarea->tex_state[0];
+
+      tex->tex_cntl		= t0->setup.tex_cntl;
+      tex->tex_combine_cntl	= rmesa->tex_combine[0];
+      tex->tex_size_pitch	= t0->setup.tex_size_pitch;
+      memcpy( &tex->tex_offset[0], &t0->setup.tex_offset[0],
+	      sizeof(tex->tex_offset ) );
+      tex->tex_border_color	= t0->setup.tex_border_color;
+   }
+
+   if ( (rmesa->dirty & R128_UPLOAD_TEX1) && t1 ) {
+      drm_r128_texture_regs_t *tex = &sarea->tex_state[1];
+
+      tex->tex_cntl		= t1->setup.tex_cntl;
+      tex->tex_combine_cntl	= rmesa->tex_combine[1];
+      tex->tex_size_pitch	= t1->setup.tex_size_pitch;
+      memcpy( &tex->tex_offset[0], &t1->setup.tex_offset[0],
+	      sizeof(tex->tex_offset ) );
+      tex->tex_border_color	= t1->setup.tex_border_color;
+   }
+
+   sarea->vertsize = rmesa->vertex_size;
+   sarea->vc_format = rmesa->vertex_format;
+
+   /* Turn off the texture cache flushing */
+   rmesa->setup.tex_cntl_c &= ~R128_TEX_CACHE_FLUSH;
+
+   sarea->dirty |= rmesa->dirty;
+   rmesa->dirty &= R128_UPLOAD_CLIPRECTS;
+}
+
+static void r128DDPrintState( const char *msg, GLuint flags )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s\n",
+	    msg,
+	    flags,
+	    (flags & R128_NEW_CONTEXT)	? "context, " : "",
+	    (flags & R128_NEW_ALPHA)	? "alpha, " : "",
+	    (flags & R128_NEW_DEPTH)	? "depth, " : "",
+	    (flags & R128_NEW_FOG)	? "fog, " : "",
+	    (flags & R128_NEW_CLIP)	? "clip, " : "",
+	    (flags & R128_NEW_CULL)	? "cull, " : "",
+	    (flags & R128_NEW_MASKS)	? "masks, " : "",
+	    (flags & R128_NEW_WINDOW)	? "window, " : "" );
+}
+
+void r128DDUpdateHWState( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   int new_state = rmesa->new_state;
+
+   if ( new_state || rmesa->NewGLState & _NEW_TEXTURE )
+   {
+      FLUSH_BATCH( rmesa );
+
+      rmesa->new_state = 0;
+
+      if ( R128_DEBUG & DEBUG_VERBOSE_MSG )
+	 r128DDPrintState( "r128UpdateHwState", new_state );
+
+      /* Update the various parts of the context's state.
+       */
+      if ( new_state & R128_NEW_ALPHA )
+	 r128UpdateAlphaMode( ctx );
+
+      if ( new_state & R128_NEW_DEPTH )
+	 r128UpdateZMode( ctx );
+
+      if ( new_state & R128_NEW_FOG )
+	 r128UpdateFogAttrib( ctx );
+
+      if ( new_state & R128_NEW_CLIP )
+	 r128UpdateClipping( ctx );
+
+      if ( new_state & R128_NEW_CULL )
+	 r128UpdateCull( ctx );
+
+      if ( new_state & R128_NEW_MASKS )
+	 r128UpdateMasks( ctx );
+
+      if ( new_state & R128_NEW_WINDOW )
+      {
+	 r128UpdateWindow( ctx );
+	 r128CalcViewport( ctx );
+      }
+
+      if ( rmesa->NewGLState & _NEW_TEXTURE ) {
+	 r128UpdateTextureState( ctx );
+      }
+   }
+}
+
+
+static void r128DDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   R128_CONTEXT(ctx)->NewGLState |= new_state;
+}
+
+
+
+/* Initialize the context's hardware state.
+ */
+void r128DDInitState( r128ContextPtr rmesa )
+{
+   int dst_bpp, depth_bpp;
+
+   switch ( rmesa->r128Screen->cpp ) {
+   case 2:
+      dst_bpp = R128_GMC_DST_16BPP;
+      break;
+   case 4:
+      dst_bpp = R128_GMC_DST_32BPP;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" );
+      exit( -1 );
+   }
+
+   rmesa->ClearColor = 0x00000000;
+
+   switch ( rmesa->glCtx->Visual.depthBits ) {
+   case 16:
+      rmesa->ClearDepth = 0x0000ffff;
+      depth_bpp = R128_Z_PIX_WIDTH_16;
+      rmesa->depth_scale = 1.0 / (GLfloat)0xffff;
+      break;
+   case 24:
+      rmesa->ClearDepth = 0x00ffffff;
+      depth_bpp = R128_Z_PIX_WIDTH_24;
+      rmesa->depth_scale = 1.0 / (GLfloat)0xffffff;
+      break;
+   default:
+      fprintf( stderr, "Error: Unsupported depth %d... exiting\n",
+	       rmesa->glCtx->Visual.depthBits );
+      exit( -1 );
+   }
+
+   rmesa->Fallback = 0;
+
+   /* Hardware state:
+    */
+   rmesa->setup.dp_gui_master_cntl_c = (R128_GMC_DST_PITCH_OFFSET_CNTL |
+					R128_GMC_DST_CLIPPING |
+					R128_GMC_BRUSH_SOLID_COLOR |
+					dst_bpp |
+					R128_GMC_SRC_DATATYPE_COLOR |
+					R128_GMC_BYTE_MSB_TO_LSB |
+					R128_GMC_CONVERSION_TEMP_6500 |
+					R128_ROP3_S |
+					R128_DP_SRC_SOURCE_MEMORY |
+					R128_GMC_3D_FCN_EN |
+					R128_GMC_CLR_CMP_CNTL_DIS |
+					R128_GMC_AUX_CLIP_DIS |
+					R128_GMC_WR_MSK_DIS);
+
+   rmesa->setup.sc_top_left_c     = 0x00000000;
+   rmesa->setup.sc_bottom_right_c = 0x1fff1fff;
+
+   rmesa->setup.z_offset_c = rmesa->r128Screen->depthOffset;
+   rmesa->setup.z_pitch_c = ((rmesa->r128Screen->depthPitch >> 3) |
+			     R128_Z_TILE);
+
+   rmesa->setup.z_sten_cntl_c = (depth_bpp |
+				 R128_Z_TEST_LESS |
+				 R128_STENCIL_TEST_ALWAYS |
+				 R128_STENCIL_S_FAIL_KEEP |
+				 R128_STENCIL_ZPASS_KEEP |
+				 R128_STENCIL_ZFAIL_KEEP);
+
+   rmesa->setup.tex_cntl_c = (R128_Z_WRITE_ENABLE |
+			      R128_SHADE_ENABLE |
+			      R128_DITHER_ENABLE |
+			      R128_ALPHA_IN_TEX_COMPLETE_A |
+			      R128_LIGHT_DIS |
+			      R128_ALPHA_LIGHT_DIS |
+			      R128_TEX_CACHE_FLUSH |
+			      (0x3f << R128_LOD_BIAS_SHIFT));
+
+   rmesa->setup.misc_3d_state_cntl_reg = (R128_MISC_SCALE_3D_TEXMAP_SHADE |
+					  R128_MISC_SCALE_PIX_REPLICATE |
+					  R128_ALPHA_COMB_ADD_CLAMP |
+					  R128_FOG_VERTEX |
+					  (R128_ALPHA_BLEND_ONE << R128_ALPHA_BLEND_SRC_SHIFT) |
+					  (R128_ALPHA_BLEND_ZERO << R128_ALPHA_BLEND_DST_SHIFT) |
+					  R128_ALPHA_TEST_ALWAYS);
+
+   rmesa->setup.texture_clr_cmp_clr_c = 0x00000000;
+   rmesa->setup.texture_clr_cmp_msk_c = 0xffffffff;
+
+   rmesa->setup.fog_color_c = 0x00000000;
+
+   rmesa->setup.pm4_vc_fpu_setup = (R128_FRONT_DIR_CCW |
+				    R128_BACKFACE_SOLID |
+				    R128_FRONTFACE_SOLID |
+				    R128_FPU_COLOR_GOURAUD |
+				    R128_FPU_SUB_PIX_4BITS |
+				    R128_FPU_MODE_3D |
+				    R128_TRAP_BITS_DISABLE |
+				    R128_XFACTOR_2 |
+				    R128_YFACTOR_2 |
+				    R128_FLAT_SHADE_VERTEX_OGL |
+				    R128_FPU_ROUND_TRUNCATE |
+				    R128_WM_SEL_8DW);
+
+   rmesa->setup.setup_cntl = (R128_COLOR_GOURAUD |
+			      R128_PRIM_TYPE_TRI |
+			      R128_TEXTURE_ST_MULT_W |
+			      R128_STARTING_VERTEX_1 |
+			      R128_ENDING_VERTEX_3 |
+			      R128_SU_POLY_LINE_NOT_LAST |
+			      R128_SUB_PIX_4BITS);
+
+   rmesa->setup.tex_size_pitch_c = 0x00000000;
+   rmesa->setup.constant_color_c = 0x00ffffff;
+
+   rmesa->setup.dp_write_mask   = 0xffffffff;
+   rmesa->setup.sten_ref_mask_c = 0xffff0000;
+   rmesa->setup.plane_3d_mask_c = 0xffffffff;
+
+   rmesa->setup.window_xy_offset = 0x00000000;
+
+   rmesa->setup.scale_3d_cntl = (R128_SCALE_DITHER_TABLE |
+				 R128_TEX_CACHE_SIZE_FULL |
+				 R128_DITHER_INIT_RESET |
+				 R128_SCALE_3D_TEXMAP_SHADE |
+				 R128_SCALE_PIX_REPLICATE |
+				 R128_ALPHA_COMB_ADD_CLAMP |
+				 R128_FOG_VERTEX |
+				 (R128_ALPHA_BLEND_ONE << R128_ALPHA_BLEND_SRC_SHIFT) |
+				 (R128_ALPHA_BLEND_ZERO << R128_ALPHA_BLEND_DST_SHIFT) |
+				 R128_ALPHA_TEST_ALWAYS |
+				 R128_COMPOSITE_SHADOW_CMP_EQUAL |
+				 R128_TEX_MAP_ALPHA_IN_TEXTURE |
+				 R128_TEX_CACHE_LINE_SIZE_4QW);
+
+   rmesa->new_state = R128_NEW_ALL;
+}
+
+/* Initialize the driver's state functions.
+ */
+void r128DDInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState		= r128DDInvalidateState;
+
+   ctx->Driver.ClearColor		= r128DDClearColor;
+   ctx->Driver.ClearStencil		= r128DDClearStencil;
+   ctx->Driver.DrawBuffer		= r128DDDrawBuffer;
+   ctx->Driver.ReadBuffer		= r128DDReadBuffer;
+
+   ctx->Driver.ColorMask		= r128DDColorMask;
+   ctx->Driver.AlphaFunc		= r128DDAlphaFunc;
+   ctx->Driver.BlendEquationSeparate	= r128DDBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate	= r128DDBlendFuncSeparate;
+   ctx->Driver.ClearDepth		= r128DDClearDepth;
+   ctx->Driver.CullFace			= r128DDCullFace;
+   ctx->Driver.FrontFace		= r128DDFrontFace;
+   ctx->Driver.DepthFunc		= r128DDDepthFunc;
+   ctx->Driver.DepthMask		= r128DDDepthMask;
+   ctx->Driver.Enable			= r128DDEnable;
+   ctx->Driver.Fogfv			= r128DDFogfv;
+   ctx->Driver.Hint			= NULL;
+   ctx->Driver.Lightfv			= NULL;
+   ctx->Driver.LightModelfv		= r128DDLightModelfv;
+   ctx->Driver.LogicOpcode		= r128DDLogicOpCode;
+   ctx->Driver.PolygonMode		= NULL;
+   ctx->Driver.PolygonStipple		= r128DDPolygonStipple;
+   ctx->Driver.RenderMode		= r128DDRenderMode;
+   ctx->Driver.Scissor			= r128DDScissor;
+   ctx->Driver.ShadeModel		= r128DDShadeModel;
+   ctx->Driver.StencilFuncSeparate	= r128DDStencilFuncSeparate;
+   ctx->Driver.StencilMaskSeparate	= r128DDStencilMaskSeparate;
+   ctx->Driver.StencilOpSeparate	= r128DDStencilOpSeparate;
+
+   ctx->Driver.DepthRange               = r128DepthRange;
+   ctx->Driver.Viewport                 = r128Viewport;
+}
diff --git a/src/mesa/drivers/dri/r128/r128_state.h b/src/mesa/drivers/dri/r128/r128_state.h
new file mode 100644
index 0000000000..a44327dfb3
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_state.h
@@ -0,0 +1,48 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#ifndef __R128_STATE_H__
+#define __R128_STATE_H__
+
+#include "r128_context.h"
+
+extern void r128DDInitState( r128ContextPtr rmesa );
+extern void r128DDInitStateFuncs( GLcontext *ctx );
+
+extern void r128DDUpdateState( GLcontext *ctx );
+extern void r128DDUpdateHWState( GLcontext *ctx );
+
+extern void r128EmitHwStateLocked( r128ContextPtr rmesa );
+
+#endif
diff --git a/src/mesa/drivers/dri/r128/r128_tex.c b/src/mesa/drivers/dri/r128/r128_tex.c
new file mode 100644
index 0000000000..4ec4be9a47
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_tex.c
@@ -0,0 +1,615 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Brian Paul <brianp@valinux.com>
+ */
+
+#include "r128_context.h"
+#include "r128_ioctl.h"
+#include "r128_tex.h"
+#include "r128_texobj.h"
+
+#include "main/simple_list.h"
+#include "main/enums.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/imports.h"
+#include "main/texobj.h"
+
+#include "xmlpool.h"
+
+#define TEX_0	1
+#define TEX_1	2
+
+
+/**
+ * Set the texture wrap modes.  Currently \c GL_REPEAT, \c GL_CLAMP,
+ * \c GL_CLAMP_TO_EDGE, and \c GL_MIRRORED_REPEAT are supported.
+ * 
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+static void r128SetTexWrap( r128TexObjPtr t, GLenum swrap, GLenum twrap )
+{
+   t->setup.tex_cntl &= ~(R128_TEX_CLAMP_S_MASK | R128_TEX_CLAMP_T_MASK);
+
+   switch ( swrap ) {
+   case GL_CLAMP:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_S_BORDER_COLOR;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_S_CLAMP;
+      break;
+   case GL_REPEAT:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_S_WRAP;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_S_MIRROR;
+      break;
+   }
+
+   switch ( twrap ) {
+   case GL_CLAMP:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_T_BORDER_COLOR;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_T_CLAMP;
+      break;
+   case GL_REPEAT:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_T_WRAP;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->setup.tex_cntl |= R128_TEX_CLAMP_T_MIRROR;
+      break;
+   }
+}
+
+static void r128SetTexFilter( r128TexObjPtr t, GLenum minf, GLenum magf )
+{
+   t->setup.tex_cntl &= ~(R128_MIN_BLEND_MASK | R128_MAG_BLEND_MASK);
+
+   switch ( minf ) {
+   case GL_NEAREST:
+      t->setup.tex_cntl |= R128_MIN_BLEND_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->setup.tex_cntl |= R128_MIN_BLEND_LINEAR;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      t->setup.tex_cntl |= R128_MIN_BLEND_MIPNEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      t->setup.tex_cntl |= R128_MIN_BLEND_MIPLINEAR;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      t->setup.tex_cntl |= R128_MIN_BLEND_LINEARMIPNEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      t->setup.tex_cntl |= R128_MIN_BLEND_LINEARMIPLINEAR;
+      break;
+   }
+
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->setup.tex_cntl |= R128_MAG_BLEND_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->setup.tex_cntl |= R128_MAG_BLEND_LINEAR;
+      break;
+   }
+}
+
+static void r128SetTexBorderColor( r128TexObjPtr t, const GLfloat color[4] )
+{
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   t->setup.tex_border_color = r128PackColor( 4, c[0], c[1], c[2], c[3] );
+}
+
+
+static r128TexObjPtr r128AllocTexObj( struct gl_texture_object *texObj )
+{
+   r128TexObjPtr t;
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *) texObj );
+   }
+
+   t = (r128TexObjPtr) CALLOC_STRUCT( r128_tex_obj );
+   texObj->DriverData = t;
+   if ( t != NULL ) {
+
+      /* Initialize non-image-dependent parts of the state:
+       */
+      t->base.tObj = texObj;
+
+      /* FIXME Something here to set initial values for other parts of
+       * FIXME t->setup?
+       */
+  
+      make_empty_list( (driTextureObject *) t );
+
+      r128SetTexWrap( t, texObj->WrapS, texObj->WrapT );
+      r128SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      r128SetTexBorderColor( t, texObj->BorderColor.f );
+   }
+
+   return t;
+}
+
+
+/* Called by the _mesa_store_teximage[123]d() functions. */
+static gl_format
+r128ChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+                         GLenum format, GLenum type )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   const GLboolean do32bpt =
+       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
+   const GLboolean force16bpt =
+       ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
+   (void) format;
+   (void) type;
+
+   switch ( internalFormat ) {
+   /* non-sized formats with alpha */
+   case GL_INTENSITY:
+   case GL_COMPRESSED_INTENSITY:
+   case GL_ALPHA:
+   case GL_COMPRESSED_ALPHA:
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if (do32bpt)
+         return _dri_texformat_argb8888;
+      else
+         return _dri_texformat_argb4444;
+
+   /* 16-bit formats with alpha */
+   case GL_INTENSITY4:
+   case GL_ALPHA4:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_RGBA2:
+   case GL_RGBA4:
+      return _dri_texformat_argb4444;
+
+   /* 32-bit formats with alpha */
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      if (!force16bpt)
+         return _dri_texformat_argb8888;
+      else
+         return _dri_texformat_argb4444;
+
+   /* non-sized formats without alpha */
+   case 1:
+   case GL_LUMINANCE:
+   case GL_COMPRESSED_LUMINANCE:
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if (do32bpt)
+         return _dri_texformat_argb8888;
+      else
+         return _dri_texformat_rgb565;
+
+   /* 16-bit formats without alpha */
+   case GL_LUMINANCE4:
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+      return _dri_texformat_rgb565;
+
+   /* 32-bit formats without alpha */
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      if (!force16bpt)
+         return _dri_texformat_argb8888;
+      else
+         return _dri_texformat_rgb565;
+
+   /* color-indexed formats */
+   case GL_COLOR_INDEX:
+   case GL_COLOR_INDEX1_EXT:
+   case GL_COLOR_INDEX2_EXT:
+   case GL_COLOR_INDEX4_EXT:
+   case GL_COLOR_INDEX8_EXT:
+   case GL_COLOR_INDEX12_EXT:
+   case GL_COLOR_INDEX16_EXT:
+      return _dri_texformat_ci8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+          type == GL_UNSIGNED_BYTE)
+         return MESA_FORMAT_YCBCR;
+      else
+         return MESA_FORMAT_YCBCR_REV;
+
+   default:
+      _mesa_problem( ctx, "unexpected format in %s", __FUNCTION__ );
+      return MESA_FORMAT_NONE;
+   }
+}
+
+
+static void r128TexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) r128AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+         return;
+      }
+   }
+
+   /* Note, this will call r128ChooseTextureFormat */
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void r128TexSubImage1D( GLcontext *ctx,
+			       GLenum target,
+			       GLint level,
+			       GLint xoffset,
+			       GLsizei width,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) r128AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+			     format, type, pixels, packing, texObj,
+			     texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void r128TexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint height, GLint border,
+			    GLenum format, GLenum type, const GLvoid *pixels,
+			    const struct gl_pixelstore_attrib *packing,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   if ( t ) {
+      driSwapOutTextureObject( (driTextureObject *) t );
+   }
+   else {
+      t = (driTextureObject *) r128AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+
+   /* Note, this will call r128ChooseTextureFormat */
+   _mesa_store_teximage2d(ctx, target, level, internalFormat,
+                          width, height, border, format, type, pixels,
+                          &ctx->Unpack, texObj, texImage);
+
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void r128TexSubImage2D( GLcontext *ctx,
+			       GLenum target,
+			       GLint level,
+			       GLint xoffset, GLint yoffset,
+			       GLsizei width, GLsizei height,
+			       GLenum format, GLenum type,
+			       const GLvoid *pixels,
+			       const struct gl_pixelstore_attrib *packing,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   driTextureObject * t = (driTextureObject *) texObj->DriverData;
+
+   assert( t ); /* this _should_ be true */
+   if ( t ) {
+      driSwapOutTextureObject( t );
+   }
+   else {
+      t = (driTextureObject *) r128AllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+   t->dirty_images[0] |= (1 << level);
+}
+
+
+static void r128TexEnv( GLcontext *ctx, GLenum target,
+			  GLenum pname, const GLfloat *param )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit;
+   GLubyte c[4];
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_MODE:
+      FLUSH_BATCH( rmesa );
+      rmesa->new_state |= R128_NEW_ALPHA;
+      break;
+
+   case GL_TEXTURE_ENV_COLOR:
+      texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+      CLAMPED_FLOAT_TO_UBYTE( c[0], texUnit->EnvColor[0] );
+      CLAMPED_FLOAT_TO_UBYTE( c[1], texUnit->EnvColor[1] );
+      CLAMPED_FLOAT_TO_UBYTE( c[2], texUnit->EnvColor[2] );
+      CLAMPED_FLOAT_TO_UBYTE( c[3], texUnit->EnvColor[3] );
+      rmesa->env_color = r128PackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->setup.constant_color_c != rmesa->env_color ) {
+	 FLUSH_BATCH( rmesa );
+	 rmesa->setup.constant_color_c = rmesa->env_color;
+
+	 /* More complex multitexture/multipass fallbacks for GL_BLEND
+	  * can be done later, but this allows a single pass GL_BLEND
+	  * in some cases (ie. Performer town demo).  This is only
+	  * applicable to the regular Rage 128, as the Pro and M3 can
+	  * handle true single-pass GL_BLEND texturing.
+	  */
+	 rmesa->blend_flags &= ~R128_BLEND_ENV_COLOR;
+	 if ( R128_IS_PLAIN( rmesa ) &&
+	      rmesa->env_color != 0x00000000 &&
+	      rmesa->env_color != 0xff000000 &&
+	      rmesa->env_color != 0x00ffffff &&
+	      rmesa->env_color != 0xffffffff ) {
+	    rmesa->blend_flags |= R128_BLEND_ENV_COLOR;
+	 }
+      }
+      break;
+
+   case GL_TEXTURE_LOD_BIAS:
+      {
+	 uint32_t t = rmesa->setup.tex_cntl_c;
+	 GLint bias;
+	 uint32_t b;
+
+	 /* GTH: This isn't exactly correct, but gives good results up to a
+	  * certain point.  It is better than completely ignoring the LOD
+	  * bias.  Unfortunately there isn't much range in the bias, the
+	  * spec mentions strides that vary between 0.5 and 2.0 but these
+	  * numbers don't seem to relate to the GL LOD bias value at all.
+	  */
+	 if ( param[0] >= 1.0 ) {
+	    bias = -128;
+	 } else if ( param[0] >= 0.5 ) {
+	    bias = -64;
+	 } else if ( param[0] >= 0.25 ) {
+	    bias = 0;
+	 } else if ( param[0] >= 0.0 ) {
+	    bias = 63;
+	 } else {
+	    bias = 127;
+	 }
+
+	 b = (uint32_t)bias & 0xff;
+	 t &= ~R128_LOD_BIAS_MASK;
+	 t |= (b << R128_LOD_BIAS_SHIFT);
+
+	 if ( rmesa->setup.tex_cntl_c != t ) {
+	    FLUSH_BATCH( rmesa );
+	    rmesa->setup.tex_cntl_c = t;
+	    rmesa->dirty |= R128_UPLOAD_CONTEXT;
+	 }
+      }
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+static void r128TexParameter( GLcontext *ctx, GLenum target,
+                              struct gl_texture_object *tObj,
+                              GLenum pname, const GLfloat *params )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   r128TexObjPtr t = (r128TexObjPtr)tObj->DriverData;
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   if ( ( target != GL_TEXTURE_2D ) && ( target != GL_TEXTURE_1D ) )
+      return;
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+      if ( t->base.bound ) FLUSH_BATCH( rmesa );
+      r128SetTexFilter( t, tObj->MinFilter, tObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      if ( t->base.bound ) FLUSH_BATCH( rmesa );
+      r128SetTexWrap( t, tObj->WrapS, tObj->WrapT );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      if ( t->base.bound ) FLUSH_BATCH( rmesa );
+      r128SetTexBorderColor( t, tObj->BorderColor.f );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      /* This isn't the most efficient solution but there doesn't appear to
+       * be a nice alternative for R128.  Since there's no LOD clamping,
+       * we just have to rely on loading the right subset of mipmap levels
+       * to simulate a clamped LOD.
+       */
+      if ( t->base.bound ) FLUSH_BATCH( rmesa );
+      driSwapOutTextureObject( (driTextureObject *) t );
+      break;
+
+   default:
+      return;
+   }
+}
+
+static void r128BindTexture( GLcontext *ctx, GLenum target,
+			       struct gl_texture_object *tObj )
+{
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *) tObj,
+	       ctx->Texture.CurrentUnit );
+   }
+
+   assert( (target != GL_TEXTURE_2D && target != GL_TEXTURE_1D) ||
+           (tObj->DriverData != NULL) );
+}
+
+
+static void r128DeleteTexture( GLcontext *ctx,
+				 struct gl_texture_object *tObj )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   driTextureObject * t = (driTextureObject *) tObj->DriverData;
+
+   if ( t ) {
+      if ( t->bound && rmesa ) {
+	 FLUSH_BATCH( rmesa );
+      }
+
+      driDestroyTextureObject( t );
+   }
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, tObj);
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+r128NewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj;
+   obj = _mesa_new_texture_object(ctx, name, target);
+   r128AllocTexObj( obj );
+   return obj;
+}
+
+void r128InitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->TexEnv			= r128TexEnv;
+   functions->ChooseTextureFormat	= r128ChooseTextureFormat;
+   functions->TexImage1D		= r128TexImage1D;
+   functions->TexSubImage1D		= r128TexSubImage1D;
+   functions->TexImage2D		= r128TexImage2D;
+   functions->TexSubImage2D		= r128TexSubImage2D;
+   functions->TexParameter		= r128TexParameter;
+   functions->BindTexture		= r128BindTexture;
+   functions->NewTextureObject		= r128NewTextureObject;
+   functions->DeleteTexture		= r128DeleteTexture;
+   functions->IsTextureResident		= driIsTextureResident;
+
+   driInitTextureFormats();
+}
+
diff --git a/src/mesa/drivers/dri/r128/r128_tex.h b/src/mesa/drivers/dri/r128/r128_tex.h
new file mode 100644
index 0000000000..7df8decf76
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_tex.h
@@ -0,0 +1,84 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#ifndef __R128_TEX_H__
+#define __R128_TEX_H__
+
+extern void r128UpdateTextureState( GLcontext *ctx );
+
+extern void r128UploadTexImages( r128ContextPtr rmesa, r128TexObjPtr t );
+
+extern void r128DestroyTexObj( r128ContextPtr rmesa, r128TexObjPtr t );
+
+extern void r128InitTextureFuncs( struct dd_function_table *functions );
+
+
+/* ================================================================
+ * Color conversion macros:
+ */
+
+#define R128PACKCOLOR332( r, g, b )					\
+   (((r) & 0xe0) | (((g) & 0xe0) >> 3) | (((b) & 0xc0) >> 6))
+
+#define R128PACKCOLOR1555( r, g, b, a )					\
+   ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) |	\
+    ((a) ? 0x8000 : 0))
+
+#define R128PACKCOLOR565( r, g, b )					\
+   ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define R128PACKCOLOR888( r, g, b )					\
+   (((r) << 16) | ((g) << 8) | (b))
+
+#define R128PACKCOLOR8888( r, g, b, a )					\
+   (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+#define R128PACKCOLOR4444( r, g, b, a )					\
+   ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+static INLINE uint32_t r128PackColor( GLuint cpp,
+                                       GLubyte r, GLubyte g,
+                                       GLubyte b, GLubyte a )
+{
+    switch ( cpp ) {
+    case 2:
+       return R128PACKCOLOR565( r, g, b );
+    case 4:
+       return R128PACKCOLOR8888( r, g, b, a );
+    default:
+       return 0;
+    }
+}
+
+#endif /* __R128_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r128/r128_texmem.c b/src/mesa/drivers/dri/r128/r128_texmem.c
new file mode 100644
index 0000000000..412f6d4255
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_texmem.c
@@ -0,0 +1,299 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Brian Paul <brianp@valinux.com>
+ */
+
+#include "r128_context.h"
+#include "r128_ioctl.h"
+#include "r128_tex.h"
+
+#include "main/macros.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#define TEX_0	1
+#define TEX_1	2
+
+
+/* Destroy hardware state associated with texture `t'.
+ */
+void r128DestroyTexObj( r128ContextPtr rmesa, r128TexObjPtr t )
+{
+    unsigned   i;
+
+
+    /* See if it was the driver's current object.
+     */
+
+    if ( rmesa != NULL )
+    { 
+	for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ )
+	{
+	    if ( t == rmesa->CurrentTexObj[ i ] ) {
+		assert( t->base.bound & (1 << i) );
+		rmesa->CurrentTexObj[ i ] = NULL;
+	    }
+	}
+    }
+}
+
+
+/**
+ * Upload the texture image associated with texture \a t at the specified
+ * level at the address relative to \a start.
+ */
+static void uploadSubImage( r128ContextPtr rmesa, r128TexObjPtr t,
+			    GLint level,
+			    GLint x, GLint y, GLint width, GLint height )
+{
+   struct gl_texture_image *image;
+   int texelsPerDword = 0;
+   int imageWidth, imageHeight;
+   int remaining, rows;
+   int format, dwords;
+   uint32_t pitch, offset;
+   int i;
+
+   /* Ensure we have a valid texture to upload */
+   if ( ( level < 0 ) || ( level > R128_MAX_TEXTURE_LEVELS ) )
+      return;
+
+   image = t->base.tObj->Image[0][level];
+   if ( !image )
+      return;
+
+   switch ( _mesa_get_format_bytes(image->TexFormat) ) {
+   case 1: texelsPerDword = 4; break;
+   case 2: texelsPerDword = 2; break;
+   case 4: texelsPerDword = 1; break;
+   }
+
+#if 1
+   /* FIXME: The subimage index calcs are wrong... */
+   x = 0;
+   y = 0;
+   width = image->Width;
+   height = image->Height;
+#endif
+
+   imageWidth  = image->Width;
+   imageHeight = image->Height;
+
+   format = t->textureFormat >> 16;
+
+   /* The texel upload routines have a minimum width, so force the size
+    * if needed.
+    */
+   if ( imageWidth < texelsPerDword ) {
+      int factor;
+
+      factor = texelsPerDword / imageWidth;
+      imageWidth = texelsPerDword;
+      imageHeight /= factor;
+      if ( imageHeight == 0 ) {
+	 /* In this case, the texel converter will actually walk a
+	  * texel or two off the end of the image, but normal malloc
+	  * alignment should prevent it from ever causing a fault.
+	  */
+	 imageHeight = 1;
+      }
+   }
+
+   /* We can't upload to a pitch less than 8 texels so we will need to
+    * linearly upload all modified rows for textures smaller than this.
+    * This makes the x/y/width/height different for the blitter and the
+    * texture walker.
+    */
+   if ( imageWidth >= 8 ) {
+      /* The texture walker and the blitter look identical */
+      pitch = imageWidth >> 3;
+   } else {
+      int factor;
+      int y2;
+      int start, end;
+
+      start = (y * imageWidth) & ~7;
+      end = (y + height) * imageWidth;
+
+      if ( end - start < 8 ) {
+	 /* Handle the case where the total number of texels
+	  * uploaded is < 8.
+	  */
+	 x = 0;
+	 y = start / 8;
+	 width = end - start;
+	 height = 1;
+      } else {
+	 /* Upload some number of full 8 texel blit rows */
+	 factor = 8 / imageWidth;
+
+	 y2 = y + height - 1;
+	 y /= factor;
+	 y2 /= factor;
+
+	 x = 0;
+	 width = 8;
+	 height = y2 - y + 1;
+      }
+
+      /* Fixed pitch of 8 */
+      pitch = 1;
+   }
+
+   dwords = width * height / texelsPerDword;
+   offset = t->bufAddr + t->image[level - t->base.firstLevel].offset;
+
+#if ENABLE_PERF_BOXES
+   /* Bump the performace counter */
+   rmesa->c_textureBytes += (dwords << 2);
+#endif
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "r128UploadSubImage: %d,%d of %d,%d at %d,%d\n",
+	       width, height, image->Width, image->Height, x, y );
+      fprintf( stderr, "          blit ofs: 0x%07x pitch: 0x%x dwords: %d "
+	       "level: %d format: %x\n",
+	       (GLuint)offset, (GLuint)pitch, dwords, level, format );
+   }
+
+   /* Subdivide the texture if required */
+   if ( dwords <= R128_BUFFER_MAX_DWORDS / 2 ) {
+      rows = height;
+   } else {
+      rows = (R128_BUFFER_MAX_DWORDS * texelsPerDword) / (2 * width);
+   }
+
+   for ( i = 0, remaining = height ;
+	 remaining > 0 ;
+	 remaining -= rows, y += rows, i++ )
+   {
+      uint32_t *dst;
+      drmBufPtr buffer;
+
+      assert(image->Data);
+
+      height = MIN2(remaining, rows);
+
+      /* Grab the indirect buffer for the texture blit */
+      LOCK_HARDWARE( rmesa );
+      buffer = r128GetBufferLocked( rmesa );
+
+      dst = (uint32_t *)((char *)buffer->address + R128_HOSTDATA_BLIT_OFFSET);
+
+      /* Copy the next chunck of the texture image into the blit buffer */
+      {
+         const GLuint texelBytes =
+            _mesa_get_format_bytes(image->TexFormat);
+         const GLubyte *src = (const GLubyte *) image->Data +
+            (y * image->Width + x) * texelBytes;            
+         const GLuint bytes = width * height * texelBytes;
+         memcpy(dst, src, bytes);
+      }
+
+      r128FireBlitLocked( rmesa, buffer,
+			  offset, pitch, format,
+			  x, y, width, height );
+      UNLOCK_HARDWARE( rmesa );
+   }
+
+   rmesa->new_state |= R128_NEW_CONTEXT;
+   rmesa->dirty |= R128_UPLOAD_CONTEXT | R128_UPLOAD_MASKS;
+}
+
+
+/* Upload the texture images associated with texture `t'.  This might
+ * require removing our own and/or other client's texture objects to
+ * make room for these images.
+ */
+void r128UploadTexImages( r128ContextPtr rmesa, r128TexObjPtr t )
+{
+   const GLint numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+   GLint i;
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %p )\n",
+	       __FUNCTION__, (void *) rmesa->glCtx, (void *) t );
+   }
+
+   assert(t);
+
+   LOCK_HARDWARE( rmesa );
+
+   if ( !t->base.memBlock ) {
+      int heap;
+
+
+      heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps,
+				 (driTextureObject *) t );
+      if ( heap == -1 ) {
+	 UNLOCK_HARDWARE( rmesa );
+	 return;
+      }
+
+      /* Set the base offset of the texture image */
+      assert(t->base.memBlock);
+      t->bufAddr = rmesa->r128Screen->texOffset[heap] 
+	   + t->base.memBlock->ofs;
+
+      /* Set texture offsets for each mipmap level */
+      if ( t->setup.tex_cntl & R128_MIP_MAP_DISABLE ) {
+	 for ( i = 0 ; i < R128_MAX_TEXTURE_LEVELS ; i++ ) {
+	    t->setup.tex_offset[i] = t->bufAddr;
+	 }
+      } else {
+         for ( i = 0; i < numLevels; i++ ) {
+            const int j = numLevels - i - 1;
+            t->setup.tex_offset[j] = t->bufAddr + t->image[i].offset;
+         }
+      }
+   }
+
+   /* Let the world know we've used this memory recently.
+    */
+   driUpdateTextureLRU( (driTextureObject *) t );
+   UNLOCK_HARDWARE( rmesa );
+
+   /* Upload any images that are new */
+   if ( t->base.dirty_images[0] ) {
+      for ( i = 0 ; i < numLevels; i++ ) {
+         const GLint j = t->base.firstLevel + i;  /* the texObj's level */
+	 if ( t->base.dirty_images[0] & (1 << j) ) {
+	    uploadSubImage( rmesa, t, j, 0, 0,
+			    t->image[i].width, t->image[i].height );
+	 }
+      }
+
+      rmesa->setup.tex_cntl_c |= R128_TEX_CACHE_FLUSH;
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+      t->base.dirty_images[0] = 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/r128/r128_texobj.h b/src/mesa/drivers/dri/r128/r128_texobj.h
new file mode 100644
index 0000000000..efbbb2df78
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_texobj.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef _R128_TEXOBJ_H_
+#define _R128_TEXOBJ_H_
+
+#include "main/mm.h"
+
+/* Individual texture image information.
+ */
+typedef struct {
+    GLuint offset;			/* Relative to local texture space */
+    GLuint width;
+    GLuint height;
+} r128TexImage;
+
+typedef struct r128_tex_obj r128TexObj, *r128TexObjPtr;
+
+/* Texture object in locally shared texture space.
+ */
+struct r128_tex_obj {
+   driTextureObject   base;
+
+   uint32_t bufAddr;			/* Offset to start of locally
+					   shared texture block */
+
+   GLuint age;
+   r128TexImage image[R128_MAX_TEXTURE_LEVELS]; /* Image data for all
+						   mipmap levels */
+
+   uint32_t textureFormat;		/* Actual hardware format */
+
+   drm_r128_texture_regs_t setup;		/* Setup regs for texture */
+};
+
+#endif /* _R128_TEXOBJ_H_ */
diff --git a/src/mesa/drivers/dri/r128/r128_texstate.c b/src/mesa/drivers/dri/r128/r128_texstate.c
new file mode 100644
index 0000000000..2505b5cd65
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_texstate.c
@@ -0,0 +1,645 @@
+/**************************************************************************
+
+Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
+                                               Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Brian Paul <brianp@valinux.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+
+#include "r128_context.h"
+#include "r128_ioctl.h"
+#include "r128_tris.h"
+#include "r128_tex.h"
+
+
+static void r128SetTexImages( r128ContextPtr rmesa,
+                              const struct gl_texture_object *tObj )
+{
+   r128TexObjPtr t = (r128TexObjPtr) tObj->DriverData;
+   struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel];
+   int log2Pitch, log2Height, log2Size, log2MinSize;
+   int totalSize;
+   int i;
+   GLint firstLevel, lastLevel;
+
+   assert(t);
+   assert(baseImage);
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API )
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *) tObj );
+
+   switch (baseImage->TexFormat) {
+   case MESA_FORMAT_ARGB8888:
+   case MESA_FORMAT_ARGB8888_REV:
+      t->textureFormat = R128_DATATYPE_ARGB8888;
+      break;
+   case MESA_FORMAT_ARGB4444:
+   case MESA_FORMAT_ARGB4444_REV:
+      t->textureFormat = R128_DATATYPE_ARGB4444;
+      break;
+   case MESA_FORMAT_RGB565:
+   case MESA_FORMAT_RGB565_REV:
+      t->textureFormat = R128_DATATYPE_RGB565;
+      break;
+   case MESA_FORMAT_RGB332:
+      t->textureFormat = R128_DATATYPE_RGB8;
+      break;
+   case MESA_FORMAT_CI8:
+      t->textureFormat = R128_DATATYPE_CI8;
+      break;
+   case MESA_FORMAT_YCBCR:
+      t->textureFormat = R128_DATATYPE_YVYU422;
+      break;
+   case MESA_FORMAT_YCBCR_REV:
+      t->textureFormat = R128_DATATYPE_VYUY422;
+      break;
+   default:
+      _mesa_problem(rmesa->glCtx, "Bad texture format in %s", __FUNCTION__);
+   };
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    */
+
+   driCalculateTextureFirstLastLevel( (driTextureObject *) t );
+   firstLevel = t->base.firstLevel;
+   lastLevel  = t->base.lastLevel;
+
+   log2Pitch = tObj->Image[0][firstLevel]->WidthLog2;
+   log2Height = tObj->Image[0][firstLevel]->HeightLog2;
+   log2Size = MAX2(log2Pitch, log2Height);
+   log2MinSize = log2Size;
+
+   t->base.dirty_images[0] = 0;
+   totalSize = 0;
+   for ( i = firstLevel; i <= lastLevel; i++ ) {
+      const struct gl_texture_image *texImage;
+
+      texImage = tObj->Image[0][i];
+      if ( !texImage || !texImage->Data ) {
+         lastLevel = i - 1;
+	 break;
+      }
+
+      log2MinSize = texImage->MaxLog2;
+
+      t->image[i - firstLevel].offset = totalSize;
+      t->image[i - firstLevel].width  = tObj->Image[0][i]->Width;
+      t->image[i - firstLevel].height = tObj->Image[0][i]->Height;
+
+      t->base.dirty_images[0] |= (1 << i);
+
+      totalSize += (tObj->Image[0][i]->Height *
+		    tObj->Image[0][i]->Width *
+		    _mesa_get_format_bytes(tObj->Image[0][i]->TexFormat));
+
+      /* Offsets must be 32-byte aligned for host data blits and tiling */
+      totalSize = (totalSize + 31) & ~31;
+   }
+
+   t->base.totalSize = totalSize;
+   t->base.firstLevel = firstLevel;
+   t->base.lastLevel = lastLevel;
+
+   /* Set the texture format */
+   t->setup.tex_cntl &= ~(0xf << 16);
+   t->setup.tex_cntl |= t->textureFormat;
+
+   t->setup.tex_combine_cntl = 0x00000000;  /* XXX is this right? */
+
+   t->setup.tex_size_pitch = ((log2Pitch   << R128_TEX_PITCH_SHIFT) |
+			      (log2Size    << R128_TEX_SIZE_SHIFT) |
+			      (log2Height  << R128_TEX_HEIGHT_SHIFT) |
+			      (log2MinSize << R128_TEX_MIN_SIZE_SHIFT));
+
+   for ( i = 0 ; i < R128_MAX_TEXTURE_LEVELS ; i++ ) {
+      t->setup.tex_offset[i]  = 0x00000000;
+   }
+
+   if (firstLevel == lastLevel)
+      t->setup.tex_cntl |= R128_MIP_MAP_DISABLE;
+   else
+      t->setup.tex_cntl &= ~R128_MIP_MAP_DISABLE;
+
+   /* FYI: r128UploadTexImages( rmesa, t ); used to be called here */
+}
+
+
+/* ================================================================
+ * Texture combine functions
+ */
+
+#define COLOR_COMB_DISABLE		(R128_COMB_DIS |		\
+					 R128_COLOR_FACTOR_TEX)
+#define COLOR_COMB_COPY_INPUT		(R128_COMB_COPY_INP |		\
+					 R128_COLOR_FACTOR_TEX)
+#define COLOR_COMB_MODULATE		(R128_COMB_MODULATE |		\
+					 R128_COLOR_FACTOR_TEX)
+#define COLOR_COMB_MODULATE_NTEX	(R128_COMB_MODULATE |		\
+					 R128_COLOR_FACTOR_NTEX)
+#define COLOR_COMB_ADD			(R128_COMB_ADD |		\
+					 R128_COLOR_FACTOR_TEX)
+#define COLOR_COMB_BLEND_TEX		(R128_COMB_BLEND_TEXTURE |	\
+					 R128_COLOR_FACTOR_TEX)
+/* Rage 128 Pro/M3 only! */
+#define COLOR_COMB_BLEND_COLOR		(R128_COMB_MODULATE2X |		\
+					 R128_COMB_FCN_MSB |		\
+					 R128_COLOR_FACTOR_CONST_COLOR)
+
+#define ALPHA_COMB_DISABLE		(R128_COMB_ALPHA_DIS |		\
+					 R128_ALPHA_FACTOR_TEX_ALPHA)
+#define ALPHA_COMB_COPY_INPUT		(R128_COMB_ALPHA_COPY_INP |	\
+					 R128_ALPHA_FACTOR_TEX_ALPHA)
+#define ALPHA_COMB_MODULATE		(R128_COMB_ALPHA_MODULATE |	\
+					 R128_ALPHA_FACTOR_TEX_ALPHA)
+#define ALPHA_COMB_MODULATE_NTEX	(R128_COMB_ALPHA_MODULATE |	\
+					 R128_ALPHA_FACTOR_NTEX_ALPHA)
+#define ALPHA_COMB_ADD			(R128_COMB_ALPHA_ADD |		\
+					 R128_ALPHA_FACTOR_TEX_ALPHA)
+
+#define INPUT_INTERP			(R128_INPUT_FACTOR_INT_COLOR |	\
+					 R128_INP_FACTOR_A_INT_ALPHA)
+#define INPUT_PREVIOUS			(R128_INPUT_FACTOR_PREV_COLOR |	\
+					 R128_INP_FACTOR_A_PREV_ALPHA)
+
+static GLboolean r128UpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLint source = rmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   const GLenum format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+   GLuint combine;
+
+   if ( R128_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s( %p, %d )\n",
+	       __FUNCTION__, (void *) ctx, unit );
+   }
+
+   if ( unit == 0 ) {
+      combine = INPUT_INTERP;
+   } else {
+      combine = INPUT_PREVIOUS;
+   }
+
+   /* Set the texture environment state */
+   switch ( texUnit->EnvMode ) {
+   case GL_REPLACE:
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_LUMINANCE_ALPHA:
+      case GL_INTENSITY:
+	 combine |= (COLOR_COMB_DISABLE |		/* C = Ct            */
+		     ALPHA_COMB_DISABLE);		/* A = At            */
+	 break;
+      case GL_RGB:
+      case GL_LUMINANCE:
+	 combine |= (COLOR_COMB_DISABLE |		/* C = Ct            */
+		     ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	 break;
+      case GL_ALPHA:
+	 combine |= (COLOR_COMB_COPY_INPUT |		/* C = Cf            */
+		     ALPHA_COMB_DISABLE);		/* A = At            */
+	 break;
+      case GL_COLOR_INDEX:
+      default:
+	 return GL_FALSE;
+      }
+      break;
+
+   case GL_MODULATE:
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_LUMINANCE_ALPHA:
+      case GL_INTENSITY:
+	 combine |= (COLOR_COMB_MODULATE |		/* C = CfCt          */
+		     ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	 break;
+      case GL_RGB:
+      case GL_LUMINANCE:
+	 combine |= (COLOR_COMB_MODULATE |		/* C = CfCt          */
+		     ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	 break;
+      case GL_ALPHA:
+	 combine |= (COLOR_COMB_COPY_INPUT |		/* C = Cf            */
+		     ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	 break;
+      case GL_COLOR_INDEX:
+      default:
+	 return GL_FALSE;
+      }
+      break;
+
+   case GL_DECAL:
+      switch ( format ) {
+      case GL_RGBA:
+	 combine |= (COLOR_COMB_BLEND_TEX |		/* C = Cf(1-At)+CtAt */
+		     ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	 break;
+      case GL_RGB:
+	 combine |= (COLOR_COMB_DISABLE |		/* C = Ct            */
+		     ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	 break;
+      case GL_ALPHA:
+      case GL_LUMINANCE:
+      case GL_LUMINANCE_ALPHA:
+      case GL_INTENSITY:
+	 /* Undefined behaviour - just copy the incoming fragment */
+	 combine |= (COLOR_COMB_COPY_INPUT |		/* C = undefined     */
+		     ALPHA_COMB_COPY_INPUT);		/* A = undefined     */
+	 break;
+      case GL_COLOR_INDEX:
+      default:
+	 return GL_FALSE;
+      }
+      break;
+
+   case GL_BLEND:
+      /* Rage 128 Pro and M3 can handle GL_BLEND texturing.
+       */
+      if ( !R128_IS_PLAIN( rmesa ) ) {
+         /* XXX this hasn't been fully tested, I don't have a Pro card. -BP */
+	 switch ( format ) {
+	 case GL_RGBA:
+	 case GL_LUMINANCE_ALPHA:
+	    combine |= (COLOR_COMB_BLEND_COLOR |	/* C = Cf(1-Ct)+CcCt */
+			ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	    break;
+
+	 case GL_RGB:
+	 case GL_LUMINANCE:
+	    combine |= (COLOR_COMB_BLEND_COLOR |	/* C = Cf(1-Ct)+CcCt */
+			ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	    break;
+
+	 case GL_ALPHA:
+	    combine |= (COLOR_COMB_COPY_INPUT |		/* C = Cf            */
+			ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	    break;
+
+	 case GL_INTENSITY:
+	    /* GH: We could be smarter about this... */
+	    switch ( rmesa->env_color & 0xff000000 ) {
+	    case 0x00000000:
+	       combine |= (COLOR_COMB_BLEND_COLOR |	/* C = Cf(1-It)+CcIt */
+			   ALPHA_COMB_MODULATE_NTEX);	/* A = Af(1-It)      */
+	    default:
+	       combine |= (COLOR_COMB_MODULATE |	/* C = fallback      */
+			   ALPHA_COMB_MODULATE);	/* A = fallback      */
+	       return GL_FALSE;
+	    }
+	    break;
+
+	 case GL_COLOR_INDEX:
+	 default:
+	    return GL_FALSE;
+	 }
+	 break;
+      }
+
+      /* Rage 128 has to fake some cases of GL_BLEND, otherwise fallback
+       * to software rendering.
+       */
+      if ( rmesa->blend_flags ) {
+	 return GL_FALSE;
+      }
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_LUMINANCE_ALPHA:
+	 switch ( rmesa->env_color & 0x00ffffff ) {
+	 case 0x00000000:
+	    combine |= (COLOR_COMB_MODULATE_NTEX |	/* C = Cf(1-Ct)      */
+			ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	    break;
+#if 0
+         /* This isn't right - BP */
+	 case 0x00ffffff:
+	    if ( unit == 0 ) {
+	       combine |= (COLOR_COMB_MODULATE_NTEX |	/* C = Cf(1-Ct)      */
+			   ALPHA_COMB_MODULATE);	/* A = AfAt          */
+	    } else {
+	       combine |= (COLOR_COMB_ADD |		/* C = Cf+Ct         */
+			   ALPHA_COMB_COPY_INPUT);	/* A = Af            */
+	    }
+	    break;
+#endif
+	 default:
+	    combine |= (COLOR_COMB_MODULATE |		/* C = fallback      */
+			ALPHA_COMB_MODULATE);		/* A = fallback      */
+	    return GL_FALSE;
+	 }
+	 break;
+      case GL_RGB:
+      case GL_LUMINANCE:
+	 switch ( rmesa->env_color & 0x00ffffff ) {
+	 case 0x00000000:
+	    combine |= (COLOR_COMB_MODULATE_NTEX |	/* C = Cf(1-Ct)      */
+			ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	    break;
+#if 0
+         /* This isn't right - BP */
+	 case 0x00ffffff:
+	    if ( unit == 0 ) {
+	       combine |= (COLOR_COMB_MODULATE_NTEX |	/* C = Cf(1-Ct)      */
+			   ALPHA_COMB_COPY_INPUT);	/* A = Af            */
+	    } else {
+	       combine |= (COLOR_COMB_ADD |		/* C = Cf+Ct         */
+			   ALPHA_COMB_COPY_INPUT);	/* A = Af            */
+	    }
+	    break;
+#endif
+	 default:
+	    combine |= (COLOR_COMB_MODULATE |		/* C = fallback      */
+			ALPHA_COMB_COPY_INPUT);		/* A = fallback      */
+	    return GL_FALSE;
+	 }
+	 break;
+      case GL_ALPHA:
+	 if ( unit == 0 ) {
+	    combine |= (COLOR_COMB_COPY_INPUT |		/* C = Cf            */
+			ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	 } else {
+	    combine |= (COLOR_COMB_COPY_INPUT |		/* C = Cf            */
+			ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	 }
+	 break;
+      case GL_INTENSITY:
+	 switch ( rmesa->env_color & 0x00ffffff ) {
+	 case 0x00000000:
+	    combine |= COLOR_COMB_MODULATE_NTEX;	/* C = Cf(1-It)      */
+	    break;
+#if 0
+         /* This isn't right - BP */
+	 case 0x00ffffff:
+	    if ( unit == 0 ) {
+	       combine |= COLOR_COMB_MODULATE_NTEX;	/* C = Cf(1-It)      */
+	    } else {
+	       combine |= COLOR_COMB_ADD;		/* C = Cf+It         */
+	    }
+	    break;
+#endif
+	 default:
+	    combine |= (COLOR_COMB_MODULATE |		/* C = fallback      */
+			ALPHA_COMB_MODULATE);		/* A = fallback      */
+	    return GL_FALSE;
+	 }
+	 switch ( rmesa->env_color & 0xff000000 ) {
+	 case 0x00000000:
+	    combine |= ALPHA_COMB_MODULATE_NTEX;	/* A = Af(1-It)      */
+	    break;
+#if 0
+         /* This isn't right - BP */
+	 case 0xff000000:
+	    if ( unit == 0 ) {
+	       combine |= ALPHA_COMB_MODULATE_NTEX;	/* A = Af(1-It)      */
+	    } else {
+	       combine |= ALPHA_COMB_ADD;		/* A = Af+It         */
+	    }
+	    break;
+#endif
+	 default:
+	    combine |= (COLOR_COMB_MODULATE |		/* C = fallback      */
+			ALPHA_COMB_MODULATE);		/* A = fallback      */
+	    return GL_FALSE;
+	 }
+	 break;
+      case GL_COLOR_INDEX:
+      default:
+	 return GL_FALSE;
+      }
+      break;
+
+   case GL_ADD:
+      switch ( format ) {
+      case GL_RGBA:
+      case GL_LUMINANCE_ALPHA:
+	 combine |= (COLOR_COMB_ADD |			/* C = Cf+Ct         */
+		     ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	 break;
+      case GL_RGB:
+      case GL_LUMINANCE:
+	 combine |= (COLOR_COMB_ADD |			/* C = Cf+Ct         */
+		     ALPHA_COMB_COPY_INPUT);		/* A = Af            */
+	 break;
+      case GL_ALPHA:
+	 combine |= (COLOR_COMB_COPY_INPUT |		/* C = Cf            */
+		     ALPHA_COMB_MODULATE);		/* A = AfAt          */
+	 break;
+      case GL_INTENSITY:
+	 combine |= (COLOR_COMB_ADD |			/* C = Cf+Ct         */
+		     ALPHA_COMB_ADD);			/* A = Af+At         */
+	 break;
+      case GL_COLOR_INDEX:
+      default:
+	 return GL_FALSE;
+      }
+      break;
+
+   default:
+      return GL_FALSE;
+   }
+
+   if ( rmesa->tex_combine[unit] != combine ) {
+     rmesa->tex_combine[unit] = combine;
+     rmesa->dirty |= R128_UPLOAD_TEX0 << unit;
+   }
+   return GL_TRUE;
+}
+
+static void disable_tex( GLcontext *ctx, int unit )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   FLUSH_BATCH( rmesa );
+
+   if ( rmesa->CurrentTexObj[unit] ) {
+      rmesa->CurrentTexObj[unit]->base.bound &= ~(1 << unit);
+      rmesa->CurrentTexObj[unit] = NULL;
+   }
+
+   rmesa->setup.tex_cntl_c &= ~(R128_TEXMAP_ENABLE << unit);
+   rmesa->setup.tex_size_pitch_c &= ~(R128_TEX_SIZE_PITCH_MASK << 
+				      (R128_SEC_TEX_SIZE_PITCH_SHIFT * unit));
+   rmesa->dirty |= R128_UPLOAD_CONTEXT;
+
+   /* If either texture unit is disabled, then multitexturing is not
+    * happening.
+    */
+
+   rmesa->blend_flags &= ~R128_BLEND_MULTITEX;
+}
+
+static GLboolean enable_tex_2d( GLcontext *ctx, int unit )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   const int source = rmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   r128TexObjPtr t = (r128TexObjPtr) tObj->DriverData;
+
+   /* Need to load the 2d images associated with this unit.
+    */
+   if ( t->base.dirty_images[0] ) {
+      /* FIXME: For Radeon, RADEON_FIREVERTICES is called here.  Should
+       * FIXME: something similar be done for R128?
+       */
+      /*  FLUSH_BATCH( rmesa ); */
+
+      r128SetTexImages( rmesa, tObj );
+      r128UploadTexImages( rmesa, t );
+      if ( !t->base.memBlock ) 
+	  return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean update_tex_common( GLcontext *ctx, int unit )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   const int source = rmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+   const struct gl_texture_object *tObj = texUnit->_Current;
+   r128TexObjPtr t = (r128TexObjPtr) tObj->DriverData;
+
+
+   /* Fallback if there's a texture border */
+   if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) {
+      return GL_FALSE;
+   }
+
+
+   /* Update state if this is a different texture object to last
+    * time.
+    */
+   if ( rmesa->CurrentTexObj[unit] != t ) {
+      if ( rmesa->CurrentTexObj[unit] != NULL ) {
+	 /* The old texture is no longer bound to this texture unit.
+	  * Mark it as such.
+	  */
+
+	 rmesa->CurrentTexObj[unit]->base.bound &= 
+	     ~(1UL << unit);
+      }
+
+      rmesa->CurrentTexObj[unit] = t;
+      t->base.bound |= (1UL << unit);
+      rmesa->dirty |= R128_UPLOAD_TEX0 << unit;
+
+      driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */
+   }
+
+   /* FIXME: We need to update the texture unit if any texture parameters have
+    * changed, but this texture was already bound.  This could be changed to
+    * work like the Radeon driver where the texture object has it's own
+    * dirty state flags
+    */
+   rmesa->dirty |= R128_UPLOAD_TEX0 << unit;
+
+   /* register setup */
+   rmesa->setup.tex_size_pitch_c &= ~(R128_TEX_SIZE_PITCH_MASK << 
+				      (R128_SEC_TEX_SIZE_PITCH_SHIFT * unit));
+
+   if ( unit == 0 ) {
+      rmesa->setup.tex_cntl_c       |= R128_TEXMAP_ENABLE;
+      rmesa->setup.tex_size_pitch_c |= t->setup.tex_size_pitch << 0;
+      rmesa->setup.scale_3d_cntl    &= ~R128_TEX_CACHE_SPLIT;
+      t->setup.tex_cntl             &= ~R128_SEC_SELECT_SEC_ST;
+   }
+   else {
+      rmesa->setup.tex_cntl_c       |= R128_SEC_TEXMAP_ENABLE;
+      rmesa->setup.tex_size_pitch_c |= t->setup.tex_size_pitch << 16;
+      rmesa->setup.scale_3d_cntl    |= R128_TEX_CACHE_SPLIT;
+      t->setup.tex_cntl             |=  R128_SEC_SELECT_SEC_ST;
+
+      /* If the second TMU is enabled, then multitexturing is happening.
+       */
+      if ( R128_IS_PLAIN( rmesa ) )
+	  rmesa->blend_flags            |= R128_BLEND_MULTITEX;
+   }
+
+   rmesa->dirty |= R128_UPLOAD_CONTEXT;
+
+
+   /* FIXME: The Radeon has some cached state so that it can avoid calling
+    * FIXME: UpdateTextureEnv in some cases.  Is that possible here?
+    */
+   return r128UpdateTextureEnv( ctx, unit );
+}
+
+static GLboolean updateTextureUnit( GLcontext *ctx, int unit )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   const int source = rmesa->tmu_source[unit];
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[source];
+
+
+   if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+      return (enable_tex_2d( ctx, unit ) &&
+	      update_tex_common( ctx, unit ));
+   }
+   else if ( texUnit->_ReallyEnabled ) {
+      return GL_FALSE;
+   }
+   else {
+      disable_tex( ctx, unit );
+      return GL_TRUE;
+   }
+}
+
+
+void r128UpdateTextureState( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLboolean ok;
+
+
+   /* This works around a quirk with the R128 hardware.  If only OpenGL 
+    * TEXTURE1 is enabled, then the hardware TEXTURE0 must be used.  The
+    * hardware TEXTURE1 can ONLY be used when hardware TEXTURE0 is also used.
+    */
+
+   rmesa->tmu_source[0] = 0;
+   rmesa->tmu_source[1] = 1;
+
+   if ((ctx->Texture._EnabledUnits & 0x03) == 0x02) {
+      /* only texture 1 enabled */
+      rmesa->tmu_source[0] = 1;
+      rmesa->tmu_source[1] = 0;
+   }
+
+   ok = (updateTextureUnit( ctx, 0 ) &&
+	 updateTextureUnit( ctx, 1 ));
+
+   FALLBACK( rmesa, R128_FALLBACK_TEXTURE, !ok );
+}
diff --git a/src/mesa/drivers/dri/r128/r128_tris.c b/src/mesa/drivers/dri/r128/r128_tris.c
new file mode 100644
index 0000000000..9ea2a9d162
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_tris.c
@@ -0,0 +1,797 @@
+/* -*- c-basic-offset: 3 -*- */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/macros.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "r128_tris.h"
+#include "r128_state.h"
+#include "r128_tex.h"
+#include "r128_ioctl.h"
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   R128_CCE_VC_CNTL_PRIM_TYPE_POINT,
+   R128_CCE_VC_CNTL_PRIM_TYPE_LINE,
+   R128_CCE_VC_CNTL_PRIM_TYPE_LINE,
+   R128_CCE_VC_CNTL_PRIM_TYPE_LINE,
+   R128_CCE_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   R128_CCE_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   R128_CCE_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   R128_CCE_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   R128_CCE_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   R128_CCE_VC_CNTL_PRIM_TYPE_TRI_LIST,
+};
+
+static void r128RasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void r128RenderPrimitive( GLcontext *ctx, GLenum prim );
+
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+	
+#define HAVE_QUADS 0
+#define HAVE_LINES 1
+#define HAVE_POINTS 1
+#define HAVE_LE32_VERTS 1
+#define CTX_ARG r128ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->vertex_size
+#define ALLOC_VERTS( n, size ) r128AllocDmaLow( rmesa, (n), (size) * 4 )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);			\
+   const char *vertptr = rmesa->verts;
+#define VERT(x) (r128Vertex *)(vertptr + ((x) * vertsize * 4))
+#define VERTEX r128Vertex
+#undef TAG
+#define TAG(x) r128_##x
+#include "tnl_dd/t_dd_triemit.h"
+#undef TAG
+#undef LOCAL_VARS
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do {						\
+   if (DO_FALLBACK)				\
+      rmesa->draw_tri( rmesa, a, b, c );	\
+   else						\
+      r128_triangle( rmesa, a, b, c );		\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do {						\
+   if (DO_FALLBACK) {				\
+      rmesa->draw_tri( rmesa, a, b, d );	\
+      rmesa->draw_tri( rmesa, b, c, d );	\
+   } else 					\
+      r128_quad( rmesa, a, b, c, d );		\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      rmesa->draw_line( rmesa, v0, v1 );	\
+   else 					\
+      r128_line( rmesa, v0, v1 );		\
+} while (0)
+
+#define POINT( v0 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      rmesa->draw_point( rmesa, v0 );		\
+   else 					\
+      r128_point( rmesa, v0 );			\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define R128_OFFSET_BIT	0x01
+#define R128_TWOSIDE_BIT	0x02
+#define R128_UNFILLED_BIT	0x04
+#define R128_FALLBACK_BIT	0x08
+#define R128_MAX_TRIFUNC	0x10
+
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[R128_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & R128_FALLBACK_BIT)
+#define DO_OFFSET   (IND & R128_OFFSET_BIT)
+#define DO_UNFILLED (IND & R128_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & R128_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX r128Vertex
+#define TAB rast_tab
+
+#define DEPTH_SCALE rmesa->depth_scale
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (rmesa->verts + (e * rmesa->vertex_size * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )  					\
+do {								\
+   r128_color_t *color = (r128_color_t *)&((v)->ui[coloroffset]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v0, c )					\
+do {								\
+   if (havespec) {						\
+      r128_color_t *spec = (r128_color_t *)&((v0)->ui[specoffset]); \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);		\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);		\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);		\
+   }								\
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )			\
+do {							\
+   if (havespec) {					\
+      r128_color_t *spec0 = (r128_color_t *)&((v0)->ui[specoffset]); \
+      r128_color_t *spec1 = (r128_color_t *)&((v1)->ui[specoffset]); \
+      spec0->red   = spec1->red;			\
+      spec0->green = spec1->green;			\
+      spec0->blue  = spec1->blue; 			\
+   }							\
+} while (0)
+
+/* These don't need LE32_TO_CPU() as they are used to save and restore
+ * colors which are already in the correct format.
+ */
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (havespec) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (havespec) v[idx]->ui[specoffset] = spec[idx]
+
+
+#define LOCAL_VARS(n)						\
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);			\
+   GLuint color[n] = { 0 };					\
+   GLuint spec[n] = { 0 };					\
+   GLuint coloroffset = rmesa->coloroffset;			\
+   GLuint specoffset = rmesa->specoffset;			\
+   GLboolean havespec = (rmesa->specoffset != 0);		\
+   (void) color; (void) spec; (void) specoffset;		\
+   (void) coloroffset; (void) havespec;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (rmesa->hw_primitive != hw_prim[x]) \
+                        r128RasterPrimitive( ctx, hw_prim[x] )
+#define RENDER_PRIMITIVE rmesa->render_primitive
+#define IND R128_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT|R128_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_OFFSET_BIT|R128_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT|R128_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT|R128_OFFSET_BIT|R128_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_OFFSET_BIT|R128_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT|R128_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT|R128_OFFSET_BIT|R128_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_UNFILLED_BIT|R128_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_OFFSET_BIT|R128_UNFILLED_BIT|R128_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT|R128_UNFILLED_BIT|R128_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R128_TWOSIDE_BIT|R128_OFFSET_BIT|R128_UNFILLED_BIT| \
+	     R128_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+r128_fallback_tri( r128ContextPtr rmesa,
+		     r128Vertex *v0,
+		     r128Vertex *v1,
+		     r128Vertex *v2 )
+{
+   GLcontext *ctx = rmesa->glCtx;
+   SWvertex v[3];
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   _swsetup_Translate( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void
+r128_fallback_line( r128ContextPtr rmesa,
+		    r128Vertex *v0,
+		    r128Vertex *v1 )
+{
+   GLcontext *ctx = rmesa->glCtx;
+   SWvertex v[2];
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void
+r128_fallback_point( r128ContextPtr rmesa,
+		     r128Vertex *v0 )
+{
+   GLcontext *ctx = rmesa->glCtx;
+   SWvertex v[1];
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      r128_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   r128_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   r128_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   r128_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);	\
+   r128RenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    r128ContextPtr rmesa = R128_CONTEXT(ctx);		\
+    const GLuint vertsize = rmesa->vertex_size;		\
+    const char *vertptr = (char *)rmesa->verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) r128_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) r128_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_STIPPLE)
+#define TRI_FALLBACK (DD_TRI_SMOOTH)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+#define _R128_NEW_RENDER_STATE (ANY_FALLBACK_FLAGS | ANY_RASTER_FLAGS)
+
+void r128ChooseRenderState(GLcontext *ctx)
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
+      rmesa->draw_point = r128_point;
+      rmesa->draw_line = r128_line;
+      rmesa->draw_tri = r128_triangle;
+
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R128_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)        index |= R128_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)      index |= R128_UNFILLED_BIT;
+      }
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)) {
+	 if (flags & POINT_FALLBACK) rmesa->draw_point = r128_fallback_point;
+	 if (flags & LINE_FALLBACK)  rmesa->draw_line = r128_fallback_line;
+	 if (flags & TRI_FALLBACK)   rmesa->draw_tri = r128_fallback_tri;
+	 index |= R128_FALLBACK_BIT;
+      }
+   }
+
+   if (index != rmesa->RenderIndex) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = r128_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = r128_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = r128_fast_clipped_poly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+
+      rmesa->RenderIndex = index;
+   }
+}
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+static void r128RunPipeline( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   if (rmesa->new_state || rmesa->NewGLState & _NEW_TEXTURE)
+      r128DDUpdateHWState( ctx );
+
+   if (!rmesa->Fallback && rmesa->NewGLState) {
+      if (rmesa->NewGLState & _R128_NEW_RENDER_STATE)
+	 r128ChooseRenderState( ctx );
+
+      rmesa->NewGLState = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+/* This is called when Mesa switches between rendering triangle
+ * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
+ * and lines, points and bitmaps.
+ *
+ * As the r128 uses triangles to render lines and points, it is
+ * necessary to turn off hardware culling when rendering these
+ * primitives.
+ */
+
+static void r128RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   rmesa->setup.dp_gui_master_cntl_c &= ~R128_GMC_BRUSH_NONE;
+
+   if ( ctx->Polygon.StippleFlag && hwprim == GL_TRIANGLES ) {
+      rmesa->setup.dp_gui_master_cntl_c |= R128_GMC_BRUSH_32x32_MONO_FG_LA;
+   }
+   else {
+      rmesa->setup.dp_gui_master_cntl_c |= R128_GMC_BRUSH_SOLID_COLOR;
+   }
+
+   rmesa->new_state |= R128_NEW_CONTEXT;
+   rmesa->dirty |= R128_UPLOAD_CONTEXT;
+
+   if (rmesa->hw_primitive != hwprim) {
+      FLUSH_BATCH( rmesa );
+      rmesa->hw_primitive = hwprim;
+   }
+}
+
+static void r128SetupAntialias( GLcontext *ctx, GLenum prim )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+
+   GLuint currAA, wantAA;
+   
+   currAA = (rmesa->setup.pm4_vc_fpu_setup & R128_EDGE_ANTIALIAS) != 0;
+   if( prim >= GL_TRIANGLES )
+      wantAA = ctx->Polygon.SmoothFlag;
+   else if( prim >= GL_LINES )
+      wantAA = ctx->Line.SmoothFlag;
+   else
+      wantAA = 0;
+      
+   if( wantAA != currAA )
+   {
+     FLUSH_BATCH( rmesa );
+     rmesa->setup.pm4_vc_fpu_setup ^= R128_EDGE_ANTIALIAS;
+     rmesa->dirty |= R128_UPLOAD_SETUP;
+   }
+}
+
+static void r128RenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint hw = hw_prim[prim];
+   rmesa->render_primitive = prim;
+
+   r128SetupAntialias( ctx, prim );
+   
+   if (prim >= GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+   r128RasterPrimitive( ctx, hw );
+}
+
+#define EMIT_ATTR( ATTR, STYLE, VF, SIZE )				\
+do {									\
+   rmesa->vertex_attrs[rmesa->vertex_attr_count].attrib = (ATTR);	\
+   rmesa->vertex_attrs[rmesa->vertex_attr_count].format = (STYLE);	\
+   rmesa->vertex_attr_count++;						\
+   vc_frmt |= (VF);							\
+   offset += (SIZE);							\
+} while (0)
+
+#define EMIT_PAD( SIZE )						\
+do {									\
+   rmesa->vertex_attrs[rmesa->vertex_attr_count].attrib = 0;		\
+   rmesa->vertex_attrs[rmesa->vertex_attr_count].format = EMIT_PAD;	\
+   rmesa->vertex_attrs[rmesa->vertex_attr_count].offset = (SIZE);	\
+   rmesa->vertex_attr_count++;						\
+   offset += (SIZE);							\
+} while (0)
+
+static void r128RenderStart( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint vc_frmt = 0;
+   GLboolean fallback_projtex = GL_FALSE;
+   GLuint offset = 0;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   /* Important: */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   rmesa->vertex_attr_count = 0;
+   rmesa->specoffset = 0;
+
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ))
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, R128_CCE_VC_FRMT_RHW, 4 );
+   else
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, 0, 3 );
+
+   rmesa->coloroffset = offset;
+#if MESA_LITTLE_ENDIAN 
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA,
+      R128_CCE_VC_FRMT_DIFFUSE_ARGB, 4 );
+#else
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ARGB,
+      R128_CCE_VC_FRMT_DIFFUSE_ARGB, 4 );
+#endif
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+#if MESA_LITTLE_ENDIAN
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 rmesa->specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR,
+	    R128_CCE_VC_FRMT_SPEC_FRGB, 3 );
+      } else 
+	 EMIT_PAD( 3 );
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, R128_CCE_VC_FRMT_SPEC_FRGB,
+		    1 );
+      else
+	 EMIT_PAD( 1 );
+#else
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, R128_CCE_VC_FRMT_SPEC_FRGB,
+		    1 );
+      else
+	 EMIT_PAD( 1 );
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 rmesa->specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB,
+	    R128_CCE_VC_FRMT_SPEC_FRGB, 3 );
+      } else 
+	 EMIT_PAD( 3 );
+#endif
+   }
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(rmesa->tmu_source[0]) )) {
+      if ( VB->AttribPtr[_TNL_ATTRIB_TEX0 + rmesa->tmu_source[0]]->size > 2 )
+	 fallback_projtex = GL_TRUE;
+      EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, R128_CCE_VC_FRMT_S_T, 8 );
+   }
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(rmesa->tmu_source[1]) )) {
+      if ( VB->AttribPtr[_TNL_ATTRIB_TEX0 + rmesa->tmu_source[1]]->size > 2 )
+	 fallback_projtex = GL_TRUE;
+      EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F, R128_CCE_VC_FRMT_S2_T2, 8 );
+   }
+
+   /* projective textures are not supported by the hardware */
+   FALLBACK( rmesa, R128_FALLBACK_PROJTEX, fallback_projtex );
+
+   /* Only need to change the vertex emit code if there has been a
+    * statechange to a TNL index.
+    */
+   if (!RENDERINPUTS_EQUAL( index_bitset, rmesa->tnl_state_bitset )) {
+      FLUSH_BATCH( rmesa );
+      rmesa->dirty |= R128_UPLOAD_CONTEXT;
+
+      rmesa->vertex_size = 
+	 _tnl_install_attrs( ctx, 
+			     rmesa->vertex_attrs, 
+			     rmesa->vertex_attr_count,
+			     rmesa->hw_viewport, 0 );
+      rmesa->vertex_size >>= 2;
+
+      rmesa->vertex_format = vc_frmt;
+   }
+}
+
+static void r128RenderFinish( GLcontext *ctx )
+{
+   if (R128_CONTEXT(ctx)->RenderIndex & R128_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glReadBuffer",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glLogicOp (mode != GL_COPY)",
+   "GL_SEPARATE_SPECULAR_COLOR",
+   "glBlendEquation(mode != ADD)",
+   "glBlendFunc",
+   "Projective texture",
+   "Rasterization disable",
+};
+
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+void r128Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->Fallback;
+
+   if (mode) {
+      rmesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 FLUSH_BATCH( rmesa );
+	 _swsetup_Wakeup( ctx );
+	 rmesa->RenderIndex = ~0;
+	 if ( R128_DEBUG & DEBUG_VERBOSE_FALL ) {
+	     fprintf(stderr, "R128 begin rasterization fallback: 0x%x %s\n",
+		     bit, getFallbackString(bit));
+	 }
+      }
+   }
+   else {
+      rmesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = r128RenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = r128RenderPrimitive;
+	 tnl->Driver.Render.Finish = r128RenderFinish;
+
+	 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	 tnl->Driver.Render.Interp = _tnl_interp;
+
+	 _tnl_invalidate_vertex_state( ctx, ~0 );
+	 _tnl_invalidate_vertices( ctx, ~0 );
+	 _tnl_install_attrs( ctx, 
+			     rmesa->vertex_attrs, 
+			     rmesa->vertex_attr_count,
+			     rmesa->hw_viewport, 0 ); 
+
+	 rmesa->NewGLState |= _R128_NEW_RENDER_STATE;
+	 if ( R128_DEBUG & DEBUG_VERBOSE_FALL ) {
+	     fprintf(stderr, "R128 end rasterization fallback: 0x%x %s\n",
+		     bit, getFallbackString(bit));
+	 }
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void r128InitTriFuncs( GLcontext *ctx )
+{
+   r128ContextPtr rmesa = R128_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = r128RunPipeline;
+   tnl->Driver.Render.Start = r128RenderStart;
+   tnl->Driver.Render.Finish = r128RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = r128RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+		       (6 + 2 * ctx->Const.MaxTextureUnits) * sizeof(GLfloat) );
+   rmesa->verts = (char *)tnl->clipspace.vertex_buf;
+   RENDERINPUTS_ONES( rmesa->tnl_state_bitset );
+
+   rmesa->NewGLState |= _R128_NEW_RENDER_STATE;
+}
diff --git a/src/mesa/drivers/dri/r128/r128_tris.h b/src/mesa/drivers/dri/r128/r128_tris.h
new file mode 100644
index 0000000000..c0667edb61
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/r128_tris.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __R128_TRIS_H__
+#define __R128_TRIS_H__
+
+#include "main/mtypes.h"
+
+extern void r128InitTriFuncs( GLcontext *ctx );
+extern void r128ChooseRenderState( GLcontext *ctx );
+
+extern void r128Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) r128Fallback( rmesa->glCtx, bit, mode )
+
+
+#endif /* __R128_TRIS_H__ */
diff --git a/src/mesa/drivers/dri/r128/server/pci_ids.h b/src/mesa/drivers/dri/r128/server/pci_ids.h
new file mode 100644
index 0000000000..fe8b087874
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/server/pci_ids.h
@@ -0,0 +1,57 @@
+/* Rage128 M3 */
+#define PCI_DEVICE_ID_ATI_RAGE128_LE	0x4c45
+#define PCI_DEVICE_ID_ATI_RAGE128_LF	0x4c46
+/* Rage128 M4 */
+#define PCI_DEVICE_ID_ATI_RAGE128_MF	0x4d46
+#define PCI_DEVICE_ID_ATI_RAGE128_ML	0x4d4c
+/* Rage128 Pro GL */
+#define PCI_DEVICE_ID_ATI_RAGE128_PA	0x5041
+#define PCI_DEVICE_ID_ATI_RAGE128_PB	0x5042
+#define PCI_DEVICE_ID_ATI_RAGE128_PC	0x5043
+#define PCI_DEVICE_ID_ATI_RAGE128_PD	0x5044
+#define PCI_DEVICE_ID_ATI_RAGE128_PE	0x5045
+#define PCI_DEVICE_ID_ATI_RAGE128_PF	0x5046
+/* Rage128 Pro VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_PG	0x5047
+#define PCI_DEVICE_ID_ATI_RAGE128_PH	0x5048
+#define PCI_DEVICE_ID_ATI_RAGE128_PI	0x5049
+#define PCI_DEVICE_ID_ATI_RAGE128_PJ	0x504A
+#define PCI_DEVICE_ID_ATI_RAGE128_PK	0x504B
+#define PCI_DEVICE_ID_ATI_RAGE128_PL	0x504C
+#define PCI_DEVICE_ID_ATI_RAGE128_PM	0x504D
+#define PCI_DEVICE_ID_ATI_RAGE128_PN	0x504E
+#define PCI_DEVICE_ID_ATI_RAGE128_PO	0x504F
+#define PCI_DEVICE_ID_ATI_RAGE128_PP	0x5050
+#define PCI_DEVICE_ID_ATI_RAGE128_PQ	0x5051
+#define PCI_DEVICE_ID_ATI_RAGE128_PR	0x5052
+#define PCI_DEVICE_ID_ATI_RAGE128_TR	0x5452
+#define PCI_DEVICE_ID_ATI_RAGE128_PS	0x5053
+#define PCI_DEVICE_ID_ATI_RAGE128_PT	0x5054
+#define PCI_DEVICE_ID_ATI_RAGE128_PU	0x5055
+#define PCI_DEVICE_ID_ATI_RAGE128_PV	0x5056
+#define PCI_DEVICE_ID_ATI_RAGE128_PW	0x5057
+#define PCI_DEVICE_ID_ATI_RAGE128_PX	0x5058
+/* Rage128 GL */
+#define PCI_DEVICE_ID_ATI_RAGE128_RE	0x5245
+#define PCI_DEVICE_ID_ATI_RAGE128_RF	0x5246
+/* Rage128 VR */
+#define PCI_DEVICE_ID_ATI_RAGE128_RE	0x5245
+#define PCI_DEVICE_ID_ATI_RAGE128_RF	0x5246
+#define PCI_DEVICE_ID_ATI_RAGE128_RG	0x5247
+#define PCI_DEVICE_ID_ATI_RAGE128_RK	0x524b
+#define PCI_DEVICE_ID_ATI_RAGE128_RL	0x524c
+#define PCI_DEVICE_ID_ATI_RAGE128_SE	0x5345
+#define PCI_DEVICE_ID_ATI_RAGE128_SF	0x5346
+#define PCI_DEVICE_ID_ATI_RAGE128_SG	0x5347
+#define PCI_DEVICE_ID_ATI_RAGE128_SH	0x5348
+#define PCI_DEVICE_ID_ATI_RAGE128_SK	0x534b
+#define PCI_DEVICE_ID_ATI_RAGE128_SL	0x534c
+#define PCI_DEVICE_ID_ATI_RAGE128_SM	0x534d
+#define PCI_DEVICE_ID_ATI_RAGE128_SN	0x534e
+/* Rage128 Pro Ultra */
+#define PCI_DEVICE_ID_ATI_RAGE128_TF	0x5446
+#define PCI_DEVICE_ID_ATI_RAGE128_TL	0x544C
+#define PCI_DEVICE_ID_ATI_RAGE128_TR	0x5452
+#define PCI_DEVICE_ID_ATI_RAGE128_TS	0x5453
+#define PCI_DEVICE_ID_ATI_RAGE128_TT	0x5454
+#define PCI_DEVICE_ID_ATI_RAGE128_TU	0x5455
diff --git a/src/mesa/drivers/dri/r128/server/r128.h b/src/mesa/drivers/dri/r128/server/r128.h
new file mode 100644
index 0000000000..413c7c6690
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/server/r128.h
@@ -0,0 +1,464 @@
+/*
+ * Copyright 1999, 2000 ATI Technologies Inc., Markham, Ontario,
+ *                      Precision Insight, Inc., Cedar Park, Texas, and
+ *                      VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, PRECISION INSIGHT, VA LINUX
+ * SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Rickard E. Faith <faith@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *
+ */
+
+#ifndef _R128_H_
+#define _R128_H_
+
+#include "dri_util.h"
+
+#define R128_DEBUG          0   /* Turn off debugging output               */
+#define R128_IDLE_RETRY    32   /* Fall out of idle loops after this count */
+#define R128_TIMEOUT  2000000   /* Fall out of wait loops after this count */
+#define R128_MMIOSIZE  0x4000
+
+#define R128_VBIOS_SIZE 0x00010000
+
+#if R128_DEBUG
+#define R128TRACE(x)                                          \
+    do {                                                      \
+	ErrorF("(**) %s(%d): ", R128_NAME, pScrn->scrnIndex); \
+	ErrorF x;                                             \
+    } while (0);
+#else
+#define R128TRACE(x)
+#endif
+
+
+/* Other macros */
+#define R128_ARRAY_SIZE(x)  (sizeof(x)/sizeof(x[0]))
+#define R128_ALIGN(x,bytes) (((x) + ((bytes) - 1)) & ~((bytes) - 1))
+#define R128PTR(pScrn) ((R128InfoPtr)(pScrn)->driverPrivate)
+    
+/**
+ * \brief Chip families.
+ */
+typedef enum {
+    CHIP_FAMILY_UNKNOWN,
+    CHIP_FAMILY_R128_PCI,
+    CHIP_FAMILY_R128_AGP,
+} R128ChipFamily;
+
+typedef struct {        /* All values in XCLKS    */
+    int  ML;            /* Memory Read Latency    */
+    int  MB;            /* Memory Burst Length    */
+    int  Trcd;          /* RAS to CAS delay       */
+    int  Trp;           /* RAS percentage         */
+    int  Twr;           /* Write Recovery         */
+    int  CL;            /* CAS Latency            */
+    int  Tr2w;          /* Read to Write Delay    */
+    int  Rloop;         /* Loop Latency           */
+    int  Rloop_fudge;   /* Add to ML to get Rloop */
+    char *name;
+} R128RAMRec, *R128RAMPtr;
+
+typedef struct {
+				/* Common registers */
+    uint32_t     ovr_clr;
+    uint32_t     ovr_wid_left_right;
+    uint32_t     ovr_wid_top_bottom;
+    uint32_t     ov0_scale_cntl;
+    uint32_t     mpp_tb_config;
+    uint32_t     mpp_gp_config;
+    uint32_t     subpic_cntl;
+    uint32_t     viph_control;
+    uint32_t     i2c_cntl_1;
+    uint32_t     gen_int_cntl;
+    uint32_t     cap0_trig_cntl;
+    uint32_t     cap1_trig_cntl;
+    uint32_t     bus_cntl;
+    uint32_t     config_cntl;
+
+				/* Other registers to save for VT switches */
+    uint32_t     dp_datatype;
+    uint32_t     gen_reset_cntl;
+    uint32_t     clock_cntl_index;
+    uint32_t     amcgpio_en_reg;
+    uint32_t     amcgpio_mask;
+
+				/* CRTC registers */
+    uint32_t     crtc_gen_cntl;
+    uint32_t     crtc_ext_cntl;
+    uint32_t     dac_cntl;
+    uint32_t     crtc_h_total_disp;
+    uint32_t     crtc_h_sync_strt_wid;
+    uint32_t     crtc_v_total_disp;
+    uint32_t     crtc_v_sync_strt_wid;
+    uint32_t     crtc_offset;
+    uint32_t     crtc_offset_cntl;
+    uint32_t     crtc_pitch;
+
+				/* CRTC2 registers */
+    uint32_t     crtc2_gen_cntl;
+
+				/* Flat panel registers */
+    uint32_t     fp_crtc_h_total_disp;
+    uint32_t     fp_crtc_v_total_disp;
+    uint32_t     fp_gen_cntl;
+    uint32_t     fp_h_sync_strt_wid;
+    uint32_t     fp_horz_stretch;
+    uint32_t     fp_panel_cntl;
+    uint32_t     fp_v_sync_strt_wid;
+    uint32_t     fp_vert_stretch;
+    uint32_t     lvds_gen_cntl;
+    uint32_t     tmds_crc;
+    uint32_t     tmds_transmitter_cntl;
+
+				/* Computed values for PLL */
+    uint32_t     dot_clock_freq;
+    uint32_t     pll_output_freq;
+    int        feedback_div;
+    int        post_div;
+
+				/* PLL registers */
+    uint32_t     ppll_ref_div;
+    uint32_t     ppll_div_3;
+    uint32_t     htotal_cntl;
+
+				/* DDA register */
+    uint32_t     dda_config;
+    uint32_t     dda_on_off;
+
+				/* Pallet */
+    GLboolean  palette_valid;
+    uint32_t     palette[256];
+} R128SaveRec, *R128SavePtr;
+
+typedef struct {
+    int               Chipset;
+    GLboolean              Primary;
+
+    GLboolean              FBDev;
+
+    unsigned long     LinearAddr;   /* Frame buffer physical address         */
+    unsigned long     BIOSAddr;     /* BIOS physical address                 */
+
+    unsigned char     *MMIO;        /* Map of MMIO region                    */
+    unsigned char     *FB;          /* Map of frame buffer                   */
+
+    uint32_t            MemCntl;
+    uint32_t            BusCntl;
+    unsigned long     FbMapSize;    /* Size of frame buffer, in bytes        */
+    int               Flags;        /* Saved copy of mode flags              */
+
+				/* Computed values for FPs */
+    int               PanelXRes;
+    int               PanelYRes;
+    int               HOverPlus;
+    int               HSyncWidth;
+    int               HBlank;
+    int               VOverPlus;
+    int               VSyncWidth;
+    int               VBlank;
+    int               PanelPwrDly;
+    
+    unsigned long     cursor_start;
+    unsigned long     cursor_end;
+
+    /*
+     * XAAForceTransBlit is used to change the behavior of the XAA
+     * SetupForScreenToScreenCopy function, to make it DGA-friendly.
+     */
+    GLboolean              XAAForceTransBlit;
+
+    int               fifo_slots;   /* Free slots in the FIFO (64 max)       */
+    int               pix24bpp;     /* Depth of pixmap for 24bpp framebuffer */
+    GLboolean              dac6bits;     /* Use 6 bit DAC?                        */
+
+				/* Computed values for Rage 128 */
+    int               pitch;
+    int               datatype;
+    uint32_t            dp_gui_master_cntl;
+
+				/* Saved values for ScreenToScreenCopy */
+    int               xdir;
+    int               ydir;
+
+				/* ScanlineScreenToScreenColorExpand support */
+    unsigned char     *scratch_buffer[1];
+    unsigned char     *scratch_save;
+    int               scanline_x;
+    int               scanline_y;
+    int               scanline_w;
+    int               scanline_h;
+
+    int               scanline_hpass;
+    int               scanline_x1clip;
+    int               scanline_x2clip;
+    int               scanline_rop;
+    int               scanline_fg;
+    int               scanline_bg;
+
+    int               scanline_words;
+    int               scanline_direct;
+    int               scanline_bpp; /* Only used for ImageWrite */
+
+    drm_context_t        drmCtx;
+
+    drmSize           registerSize;
+    drm_handle_t         registerHandle;
+
+    GLboolean         IsPCI;            /* Current card is a PCI card */
+    drmSize           pciSize;
+    drm_handle_t         pciMemHandle;
+    unsigned char     *PCI;             /* Map */
+
+    GLboolean         allowPageFlip;    /* Enable 3d page flipping */
+    GLboolean         have3DWindows;    /* Are there any 3d clients? */
+    int               drmMinor;
+
+    drmSize           agpSize;
+    drm_handle_t         agpMemHandle;     /* Handle from drmAgpAlloc */
+    unsigned long     agpOffset;
+    unsigned char     *AGP;             /* Map */
+    int               agpMode;
+
+    GLboolean         CCEInUse;         /* CCE is currently active */
+    int               CCEMode;          /* CCE mode that server/clients use */
+    int               CCEFifoSize;      /* Size of the CCE command FIFO */
+    GLboolean         CCESecure;        /* CCE security enabled */
+    int               CCEusecTimeout;   /* CCE timeout in usecs */
+
+				/* CCE ring buffer data */
+    unsigned long     ringStart;        /* Offset into AGP space */
+    drm_handle_t         ringHandle;       /* Handle from drmAddMap */
+    drmSize           ringMapSize;      /* Size of map */
+    int               ringSize;         /* Size of ring (in MB) */
+    unsigned char     *ring;            /* Map */
+    int               ringSizeLog2QW;
+
+    unsigned long     ringReadOffset;   /* Offset into AGP space */
+    drm_handle_t         ringReadPtrHandle; /* Handle from drmAddMap */
+    drmSize           ringReadMapSize;  /* Size of map */
+    unsigned char     *ringReadPtr;     /* Map */
+
+				/* CCE vertex/indirect buffer data */
+    unsigned long     bufStart;        /* Offset into AGP space */
+    drm_handle_t         bufHandle;       /* Handle from drmAddMap */
+    drmSize           bufMapSize;      /* Size of map */
+    int               bufSize;         /* Size of buffers (in MB) */
+    unsigned char     *buf;            /* Map */
+    int               bufNumBufs;      /* Number of buffers */
+    drmBufMapPtr      buffers;         /* Buffer map */
+
+				/* CCE AGP Texture data */
+    unsigned long     agpTexStart;      /* Offset into AGP space */
+    drm_handle_t         agpTexHandle;     /* Handle from drmAddMap */
+    drmSize           agpTexMapSize;    /* Size of map */
+    int               agpTexSize;       /* Size of AGP tex space (in MB) */
+    unsigned char     *agpTex;          /* Map */
+    int               log2AGPTexGran;
+
+				/* CCE 2D accleration */
+    drmBufPtr         indirectBuffer;
+    int               indirectStart;
+
+				/* DRI screen private data */
+    int               fbX;
+    int               fbY;
+    int               backX;
+    int               backY;
+    int               depthX;
+    int               depthY;
+
+    int               frontOffset;
+    int               frontPitch;
+    int               backOffset;
+    int               backPitch;
+    int               depthOffset;
+    int               depthPitch;
+    int               spanOffset;
+    int               textureOffset;
+    int               textureSize;
+    int               log2TexGran;
+
+				/* Saved scissor values */
+    uint32_t            sc_left;
+    uint32_t            sc_right;
+    uint32_t            sc_top;
+    uint32_t            sc_bottom;
+
+    uint32_t            re_top_left;
+    uint32_t            re_width_height;
+
+    uint32_t            aux_sc_cntl;
+
+    int               irq;
+    uint32_t            gen_int_cntl;
+
+    GLboolean              DMAForXv;
+
+} R128InfoRec, *R128InfoPtr;
+
+#define R128WaitForFifo(pScrn, entries)                                      \
+do {                                                                         \
+    if (info->fifo_slots < entries) R128WaitForFifoFunction(pScrn, entries); \
+    info->fifo_slots -= entries;                                             \
+} while (0)
+
+extern void        r128WaitForFifoFunction(const DRIDriverContext *ctx, int entries);
+extern void        r128WaitForIdle(const DRIDriverContext *ctx);
+
+extern void        r128WaitForVerticalSync(const DRIDriverContext *ctx);
+
+extern GLboolean        r128AccelInit(const DRIDriverContext *ctx);
+extern void        r128EngineInit(const DRIDriverContext *ctx);
+extern GLboolean        r128CursorInit(const DRIDriverContext *ctx);
+extern GLboolean        r128DGAInit(const DRIDriverContext *ctx);
+
+extern void        r128InitVideo(const DRIDriverContext *ctx);
+
+extern GLboolean        r128DRIScreenInit(const DRIDriverContext *ctx);
+extern void        r128DRICloseScreen(const DRIDriverContext *ctx);
+extern GLboolean        r128DRIFinishScreenInit(const DRIDriverContext *ctx);
+
+#define R128CCE_START(ctx, info)					\
+do {									\
+    int _ret = drmCommandNone(ctx->drmFD, DRM_R128_CCE_START);		\
+    if (_ret) {								\
+	   fprintf(stderr,				\
+		   "%s: CCE start %d\n", __FUNCTION__, _ret);		\
+    }									\
+} while (0)
+
+#define R128CCE_STOP(ctx, info)					\
+do {									\
+    int _ret = R128CCEStop(ctx);					\
+    if (_ret) {								\
+	   fprintf(stderr,				\
+		   "%s: CCE stop %d\n", __FUNCTION__, _ret);		\
+    }									\
+} while (0)
+
+#define R128CCE_RESET(ctx, info)					\
+do {									\
+    if (info->directRenderingEnabled					\
+	&& R128CCE_USE_RING_BUFFER(info->CCEMode)) {			\
+	int _ret = drmCommandNone(info->drmFD, DRM_R128_CCE_RESET);	\
+	if (_ret) {							\
+	       fprintf(stderr,			\
+		       "%s: CCE reset %d\n", __FUNCTION__, _ret);	\
+	}								\
+    }									\
+} while (0)
+
+    
+#define CCE_PACKET0( reg, n )						\
+	(R128_CCE_PACKET0 | ((n) << 16) | ((reg) >> 2))
+#define CCE_PACKET1( reg0, reg1 )					\
+	(R128_CCE_PACKET1 | (((reg1) >> 2) << 11) | ((reg0) >> 2))
+#define CCE_PACKET2()							\
+	(R128_CCE_PACKET2)
+#define CCE_PACKET3( pkt, n )						\
+	(R128_CCE_PACKET3 | (pkt) | ((n) << 16))
+
+
+#define R128_VERBOSE	0
+
+#define RING_LOCALS	uint32_t *__head; int __count;
+
+#define R128CCE_REFRESH(pScrn, info)					\
+do {									\
+   if ( R128_VERBOSE ) {						\
+         fprintf(stderr, "REFRESH( %d ) in %s\n",	\
+		  !info->CCEInUse , __FUNCTION__ );			\
+   }									\
+   if ( !info->CCEInUse ) {						\
+      R128CCEWaitForIdle(pScrn);       					\
+      BEGIN_RING( 6 );							\
+      OUT_RING_REG( R128_RE_TOP_LEFT,     info->re_top_left );		\
+      OUT_RING_REG( R128_RE_WIDTH_HEIGHT, info->re_width_height );	\
+      OUT_RING_REG( R128_AUX_SC_CNTL,     info->aux_sc_cntl );		\
+      ADVANCE_RING();							\
+      info->CCEInUse = TRUE;						\
+   }									\
+} while (0)
+
+#define BEGIN_RING( n ) do {						\
+   if ( R128_VERBOSE ) {						\
+         fprintf(stderr,				\
+		  "BEGIN_RING( %d ) in %s\n", n, __FUNCTION__ );	\
+   }									\
+   if ( !info->indirectBuffer ) {					\
+      info->indirectBuffer = R128CCEGetBuffer( pScrn );			\
+      info->indirectStart = 0;						\
+   } else if ( (info->indirectBuffer->used + 4*(n)) >			\
+                info->indirectBuffer->total ) {				\
+      R128CCEFlushIndirect( pScrn, 1 );					\
+   }									\
+   __head = (pointer)((char *)info->indirectBuffer->address +		\
+		       info->indirectBuffer->used);			\
+   __count = 0;								\
+} while (0)
+
+#define ADVANCE_RING() do {						\
+   if ( R128_VERBOSE ) {						\
+         fprintf(stderr,				\
+		  "ADVANCE_RING() used: %d+%d=%d/%d\n",			\
+		  info->indirectBuffer->used - info->indirectStart,	\
+		  __count * sizeof(uint32_t),				\
+		  info->indirectBuffer->used - info->indirectStart +	\
+		  __count * sizeof(uint32_t),				\
+		  info->indirectBuffer->total - info->indirectStart );	\
+   }									\
+   info->indirectBuffer->used += __count * (int)sizeof(uint32_t);		\
+} while (0)
+
+#define OUT_RING( x ) do {						\
+   if ( R128_VERBOSE ) {						\
+         fprintf(stderr,				\
+		  "   OUT_RING( 0x%08x )\n", (unsigned int)(x) );	\
+   }									\
+   MMIO_OUT32(&__head[__count++], 0, (x));				\
+} while (0)
+
+#define OUT_RING_REG( reg, val )					\
+do {									\
+   OUT_RING( CCE_PACKET0( reg, 0 ) );					\
+   OUT_RING( val );							\
+} while (0)
+
+#define FLUSH_RING()							\
+do {									\
+   if ( R128_VERBOSE )							\
+         fprintf(stderr,				\
+		  "FLUSH_RING in %s\n", __FUNCTION__ );			\
+   if ( info->indirectBuffer ) {					\
+      R128CCEFlushIndirect( pScrn, 0 );					\
+   }									\
+} while (0)
+
+    
+#endif
diff --git a/src/mesa/drivers/dri/r128/server/r128_dri.h b/src/mesa/drivers/dri/r128/server/r128_dri.h
new file mode 100644
index 0000000000..430e5f580b
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/server/r128_dri.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright 1999, 2000 ATI Technologies Inc., Markham, Ontario,
+ *                      Precision Insight, Inc., Cedar Park, Texas, and
+ *                      VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, PRECISION INSIGHT, VA LINUX
+ * SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Rickard E. Faith <faith@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef _R128_DRI_
+#define _R128_DRI_
+
+#include "xf86drm.h"
+#include "drm.h"
+
+/* DRI Driver defaults */
+#define R128_DEFAULT_CCE_PIO_MODE R128_PM4_64PIO_64VCBM_64INDBM
+#define R128_DEFAULT_CCE_BM_MODE  R128_PM4_64BM_64VCBM_64INDBM
+#define R128_DEFAULT_AGP_MODE     1
+#define R128_DEFAULT_AGP_SIZE     8 /* MB (must be a power of 2 and > 4MB) */
+#define R128_DEFAULT_RING_SIZE    1 /* MB (must be page aligned) */
+#define R128_DEFAULT_BUFFER_SIZE  2 /* MB (must be page aligned) */
+#define R128_DEFAULT_AGP_TEX_SIZE 1 /* MB (must be page aligned) */
+
+#define R128_DEFAULT_CCE_TIMEOUT  10000  /* usecs */
+
+#define R128_AGP_MAX_MODE         4
+#define R128_BUFFER_ALIGN         0x00000fff
+
+#define R128_CARD_TYPE_R128          1
+#define R128_CARD_TYPE_R128_PRO      2
+#define R128_CARD_TYPE_R128_MOBILITY 3
+
+#define R128CCE_USE_RING_BUFFER(m)                                        \
+(((m) == R128_PM4_192BM) ||                                               \
+ ((m) == R128_PM4_128BM_64INDBM) ||                                       \
+ ((m) == R128_PM4_64BM_128INDBM) ||                                       \
+ ((m) == R128_PM4_64BM_64VCBM_64INDBM))
+
+typedef struct {
+    /* DRI screen private data */
+    int           deviceID;     /* PCI device ID */
+    int           width;        /* Width in pixels of display */
+    int           height;       /* Height in scanlines of display */
+    int           depth;        /* Depth of display (8, 15, 16, 24) */
+    int           bpp;          /* Bit depth of display (8, 16, 24, 32) */
+
+    int           IsPCI;        /* Current card is a PCI card */
+    int           AGPMode;
+
+    int           frontOffset;  /* Start of front buffer */
+    int           frontPitch;
+    int           backOffset;   /* Start of shared back buffer */
+    int           backPitch;
+    int           depthOffset;  /* Start of shared depth buffer */
+    int           depthPitch;
+    int           spanOffset;   /* Start of scratch spanline */
+    int           textureOffset;/* Start of texture data in frame buffer */
+    int           textureSize;
+    int           log2TexGran;
+
+    /* MMIO register data */
+    drm_handle_t     registerHandle;
+    drmSize       registerSize;
+
+    /* CCE AGP Texture data */
+    drm_handle_t     agpTexHandle;
+    drmSize       agpTexMapSize;
+    int           log2AGPTexGran;
+    int           agpTexOffset;
+    unsigned int  sarea_priv_offset;
+} R128DRIRec, *R128DRIPtr;
+
+#endif
diff --git a/src/mesa/drivers/dri/r128/server/r128_macros.h b/src/mesa/drivers/dri/r128/server/r128_macros.h
new file mode 100644
index 0000000000..f7b945da93
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/server/r128_macros.h
@@ -0,0 +1,134 @@
+/**
+ * \file server/R128_macros.h
+ * \brief Macros for R128 MMIO operation.
+ *
+ * \authors Kevin E. Martin <martin@xfree86.org>
+ * \authors Rickard E. Faith <faith@valinux.com>
+ * \authors Alan Hourihane <alanh@fairlite.demon.co.uk>
+ */
+
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _R128_MACROS_H_
+#define _R128_MACROS_H_
+
+
+
+#  define MMIO_IN8(base, offset) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN16(base, offset) \
+	*(volatile unsigned short *)(void *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN32(base, offset) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_OUT8(base, offset, val) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT16(base, offset, val) \
+	*(volatile unsigned short *)(void *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT32(base, offset, val) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = (val)
+
+
+				/* Memory mapped register access macros */
+#define INREG8(addr)        MMIO_IN8(R128MMIO, addr)
+#define INREG16(addr)       MMIO_IN16(R128MMIO, addr)
+#define INREG(addr)         MMIO_IN32(R128MMIO, addr)
+#define OUTREG8(addr, val)  MMIO_OUT8(R128MMIO, addr, val)
+#define OUTREG16(addr, val) MMIO_OUT16(R128MMIO, addr, val)
+#define OUTREG(addr, val)   MMIO_OUT32(R128MMIO, addr, val)
+
+#define ADDRREG(addr)       ((volatile GLuint *)(pointer)(R128MMIO + (addr)))
+
+
+#define OUTREGP(addr, val, mask)					\
+do {									\
+    GLuint tmp = INREG(addr);						\
+    tmp &= (mask);							\
+    tmp |= (val);							\
+    OUTREG(addr, tmp);							\
+} while (0)
+
+#define INPLL(dpy, addr) r128INPLL(dpy, addr)
+
+#define OUTPLL(addr, val)						\
+do {									\
+    OUTREG8(R128_CLOCK_CNTL_INDEX, (((addr) & 0x3f) |			\
+				      R128_PLL_WR_EN));		\
+    OUTREG(R128_CLOCK_CNTL_DATA, val);				\
+} while (0)
+
+#define OUTPLLP(dpy, addr, val, mask)					\
+do {									\
+    GLuint tmp = INPLL(dpy, addr);					\
+    tmp &= (mask);							\
+    tmp |= (val);							\
+    OUTPLL(addr, tmp);							\
+} while (0)
+
+#define OUTPAL_START(idx)						\
+do {									\
+    OUTREG8(R128_PALETTE_INDEX, (idx));				\
+} while (0)
+
+#define OUTPAL_NEXT(r, g, b)						\
+do {									\
+    OUTREG(R128_PALETTE_DATA, ((r) << 16) | ((g) << 8) | (b));	\
+} while (0)
+
+#define OUTPAL_NEXT_CARD32(v)						\
+do {									\
+    OUTREG(R128_PALETTE_DATA, (v & 0x00ffffff));			\
+} while (0)
+
+#define OUTPAL(idx, r, g, b)						\
+do {									\
+    OUTPAL_START((idx));						\
+    OUTPAL_NEXT((r), (g), (b));						\
+} while (0)
+
+#define INPAL_START(idx)						\
+do {									\
+    OUTREG(R128_PALETTE_INDEX, (idx) << 16);				\
+} while (0)
+
+#define INPAL_NEXT() INREG(R128_PALETTE_DATA)
+
+#define PAL_SELECT(idx)							\
+do {									\
+    if (!idx) {								\
+	OUTREG(R128_DAC_CNTL2, INREG(R128_DAC_CNTL2) &		\
+	       (GLuint)~R128_DAC2_PALETTE_ACC_CTL);			\
+    } else {								\
+	OUTREG(R128_DAC_CNTL2, INREG(R128_DAC_CNTL2) |		\
+	       R128_DAC2_PALETTE_ACC_CTL);				\
+    }									\
+} while (0)
+
+
+#endif
diff --git a/src/mesa/drivers/dri/r128/server/r128_reg.h b/src/mesa/drivers/dri/r128/server/r128_reg.h
new file mode 100644
index 0000000000..50033540b9
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/server/r128_reg.h
@@ -0,0 +1,1403 @@
+/*
+ * Copyright 1999, 2000 ATI Technologies Inc., Markham, Ontario,
+ *                      Precision Insight, Inc., Cedar Park, Texas, and
+ *                      VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, PRECISION INSIGHT, VA LINUX
+ * SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Rickard E. Faith <faith@valinux.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ * References:
+ *
+ *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
+ *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
+ *   1999.
+ *
+ *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
+ *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
+ *
+ */
+
+#ifndef _R128_REG_H_
+#define _R128_REG_H_
+
+#define R128_ADAPTER_ID                   0x0f2c /* PCI */
+#define R128_AGP_APER_OFFSET              0x0178
+#define R128_AGP_BASE                     0x0170
+#define R128_AGP_CNTL                     0x0174
+#       define R128_AGP_APER_SIZE_256MB   (0x00 << 0)
+#       define R128_AGP_APER_SIZE_128MB   (0x20 << 0)
+#       define R128_AGP_APER_SIZE_64MB    (0x30 << 0)
+#       define R128_AGP_APER_SIZE_32MB    (0x38 << 0)
+#       define R128_AGP_APER_SIZE_16MB    (0x3c << 0)
+#       define R128_AGP_APER_SIZE_8MB     (0x3e << 0)
+#       define R128_AGP_APER_SIZE_4MB     (0x3f << 0)
+#       define R128_AGP_APER_SIZE_MASK    (0x3f << 0)
+#define R128_AGP_CNTL_B                   0x0b44
+#define R128_AGP_COMMAND                  0x0f58 /* PCI */
+#define R128_AGP_PLL_CNTL                 0x0010 /* PLL */
+#define R128_AGP_STATUS                   0x0f54 /* PCI */
+#       define R128_AGP_1X_MODE           0x01
+#       define R128_AGP_2X_MODE           0x02
+#       define R128_AGP_4X_MODE           0x04
+#       define R128_AGP_MODE_MASK         0x07
+#define R128_AMCGPIO_A_REG                0x01a0
+#define R128_AMCGPIO_EN_REG               0x01a8
+#define R128_AMCGPIO_MASK                 0x0194
+#define R128_AMCGPIO_Y_REG                0x01a4
+#define R128_ATTRDR                       0x03c1 /* VGA */
+#define R128_ATTRDW                       0x03c0 /* VGA */
+#define R128_ATTRX                        0x03c0 /* VGA */
+#define R128_AUX_SC_CNTL                  0x1660
+#       define R128_AUX1_SC_EN            (1 << 0)
+#       define R128_AUX1_SC_MODE_OR       (0 << 1)
+#       define R128_AUX1_SC_MODE_NAND     (1 << 1)
+#       define R128_AUX2_SC_EN            (1 << 2)
+#       define R128_AUX2_SC_MODE_OR       (0 << 3)
+#       define R128_AUX2_SC_MODE_NAND     (1 << 3)
+#       define R128_AUX3_SC_EN            (1 << 4)
+#       define R128_AUX3_SC_MODE_OR       (0 << 5)
+#       define R128_AUX3_SC_MODE_NAND     (1 << 5)
+#define R128_AUX1_SC_BOTTOM               0x1670
+#define R128_AUX1_SC_LEFT                 0x1664
+#define R128_AUX1_SC_RIGHT                0x1668
+#define R128_AUX1_SC_TOP                  0x166c
+#define R128_AUX2_SC_BOTTOM               0x1680
+#define R128_AUX2_SC_LEFT                 0x1674
+#define R128_AUX2_SC_RIGHT                0x1678
+#define R128_AUX2_SC_TOP                  0x167c
+#define R128_AUX3_SC_BOTTOM               0x1690
+#define R128_AUX3_SC_LEFT                 0x1684
+#define R128_AUX3_SC_RIGHT                0x1688
+#define R128_AUX3_SC_TOP                  0x168c
+#define R128_AUX_WINDOW_HORZ_CNTL         0x02d8
+#define R128_AUX_WINDOW_VERT_CNTL         0x02dc
+
+#define R128_BASE_CODE                    0x0f0b
+#define R128_BIOS_0_SCRATCH               0x0010
+#define R128_BIOS_1_SCRATCH               0x0014
+#define R128_BIOS_2_SCRATCH               0x0018
+#define R128_BIOS_3_SCRATCH               0x001c
+#define R128_BIOS_4_SCRATCH               0x0020
+#define R128_BIOS_5_SCRATCH               0x0024
+#       define R128_BIOS_DISPLAY_FP       (1 << 0)
+#       define R128_BIOS_DISPLAY_CRT      (2 << 0)
+#       define R128_BIOS_DISPLAY_FP_CRT   (3 << 0)
+#define R128_BIOS_6_SCRATCH               0x0028
+#define R128_BIOS_7_SCRATCH               0x002c
+#define R128_BIOS_ROM                     0x0f30 /* PCI */
+#define R128_BIST                         0x0f0f /* PCI */
+#define R128_BM_CHUNK_0_VAL               0x0a18
+#       define R128_BM_PTR_FORCE_TO_PCI    (1 << 21)
+#       define R128_BM_PM4_RD_FORCE_TO_PCI (1 << 22)
+#       define R128_BM_GLOBAL_FORCE_TO_PCI (1 << 23)
+#define R128_BRUSH_DATA0                  0x1480
+#define R128_BRUSH_DATA1                  0x1484
+#define R128_BRUSH_DATA10                 0x14a8
+#define R128_BRUSH_DATA11                 0x14ac
+#define R128_BRUSH_DATA12                 0x14b0
+#define R128_BRUSH_DATA13                 0x14b4
+#define R128_BRUSH_DATA14                 0x14b8
+#define R128_BRUSH_DATA15                 0x14bc
+#define R128_BRUSH_DATA16                 0x14c0
+#define R128_BRUSH_DATA17                 0x14c4
+#define R128_BRUSH_DATA18                 0x14c8
+#define R128_BRUSH_DATA19                 0x14cc
+#define R128_BRUSH_DATA2                  0x1488
+#define R128_BRUSH_DATA20                 0x14d0
+#define R128_BRUSH_DATA21                 0x14d4
+#define R128_BRUSH_DATA22                 0x14d8
+#define R128_BRUSH_DATA23                 0x14dc
+#define R128_BRUSH_DATA24                 0x14e0
+#define R128_BRUSH_DATA25                 0x14e4
+#define R128_BRUSH_DATA26                 0x14e8
+#define R128_BRUSH_DATA27                 0x14ec
+#define R128_BRUSH_DATA28                 0x14f0
+#define R128_BRUSH_DATA29                 0x14f4
+#define R128_BRUSH_DATA3                  0x148c
+#define R128_BRUSH_DATA30                 0x14f8
+#define R128_BRUSH_DATA31                 0x14fc
+#define R128_BRUSH_DATA32                 0x1500
+#define R128_BRUSH_DATA33                 0x1504
+#define R128_BRUSH_DATA34                 0x1508
+#define R128_BRUSH_DATA35                 0x150c
+#define R128_BRUSH_DATA36                 0x1510
+#define R128_BRUSH_DATA37                 0x1514
+#define R128_BRUSH_DATA38                 0x1518
+#define R128_BRUSH_DATA39                 0x151c
+#define R128_BRUSH_DATA4                  0x1490
+#define R128_BRUSH_DATA40                 0x1520
+#define R128_BRUSH_DATA41                 0x1524
+#define R128_BRUSH_DATA42                 0x1528
+#define R128_BRUSH_DATA43                 0x152c
+#define R128_BRUSH_DATA44                 0x1530
+#define R128_BRUSH_DATA45                 0x1534
+#define R128_BRUSH_DATA46                 0x1538
+#define R128_BRUSH_DATA47                 0x153c
+#define R128_BRUSH_DATA48                 0x1540
+#define R128_BRUSH_DATA49                 0x1544
+#define R128_BRUSH_DATA5                  0x1494
+#define R128_BRUSH_DATA50                 0x1548
+#define R128_BRUSH_DATA51                 0x154c
+#define R128_BRUSH_DATA52                 0x1550
+#define R128_BRUSH_DATA53                 0x1554
+#define R128_BRUSH_DATA54                 0x1558
+#define R128_BRUSH_DATA55                 0x155c
+#define R128_BRUSH_DATA56                 0x1560
+#define R128_BRUSH_DATA57                 0x1564
+#define R128_BRUSH_DATA58                 0x1568
+#define R128_BRUSH_DATA59                 0x156c
+#define R128_BRUSH_DATA6                  0x1498
+#define R128_BRUSH_DATA60                 0x1570
+#define R128_BRUSH_DATA61                 0x1574
+#define R128_BRUSH_DATA62                 0x1578
+#define R128_BRUSH_DATA63                 0x157c
+#define R128_BRUSH_DATA7                  0x149c
+#define R128_BRUSH_DATA8                  0x14a0
+#define R128_BRUSH_DATA9                  0x14a4
+#define R128_BRUSH_SCALE                  0x1470
+#define R128_BRUSH_Y_X                    0x1474
+#define R128_BUS_CNTL                     0x0030
+#       define R128_BUS_MASTER_DIS         (1 << 6)
+#       define R128_BUS_RD_DISCARD_EN      (1 << 24)
+#       define R128_BUS_RD_ABORT_EN        (1 << 25)
+#       define R128_BUS_MSTR_DISCONNECT_EN (1 << 28)
+#       define R128_BUS_WRT_BURST          (1 << 29)
+#       define R128_BUS_READ_BURST         (1 << 30)
+#define R128_BUS_CNTL1                    0x0034
+#       define R128_BUS_WAIT_ON_LOCK_EN    (1 << 4)
+
+#define R128_CACHE_CNTL                   0x1724
+#define R128_CACHE_LINE                   0x0f0c /* PCI */
+#define R128_CAP0_TRIG_CNTL               0x0950 /* ? */
+#define R128_CAP1_TRIG_CNTL               0x09c0 /* ? */
+#define R128_CAPABILITIES_ID              0x0f50 /* PCI */
+#define R128_CAPABILITIES_PTR             0x0f34 /* PCI */
+#define R128_CLK_PIN_CNTL                 0x0001 /* PLL */
+#define R128_CLOCK_CNTL_DATA              0x000c
+#define R128_CLOCK_CNTL_INDEX             0x0008
+#       define R128_PLL_WR_EN             (1 << 7)
+#       define R128_PLL_DIV_SEL           (3 << 8)
+#define R128_CLR_CMP_CLR_3D               0x1a24
+#define R128_CLR_CMP_CLR_DST              0x15c8
+#define R128_CLR_CMP_CLR_SRC              0x15c4
+#define R128_CLR_CMP_CNTL                 0x15c0
+#       define R128_SRC_CMP_EQ_COLOR      (4 <<  0)
+#       define R128_SRC_CMP_NEQ_COLOR     (5 <<  0)
+#       define R128_CLR_CMP_SRC_SOURCE    (1 << 24)
+#define R128_CLR_CMP_MASK                 0x15cc
+#       define R128_CLR_CMP_MSK           0xffffffff
+#define R128_CLR_CMP_MASK_3D              0x1A28
+#define R128_COMMAND                      0x0f04 /* PCI */
+#define R128_COMPOSITE_SHADOW_ID          0x1a0c
+#define R128_CONFIG_APER_0_BASE           0x0100
+#define R128_CONFIG_APER_1_BASE           0x0104
+#define R128_CONFIG_APER_SIZE             0x0108
+#define R128_CONFIG_BONDS                 0x00e8
+#define R128_CONFIG_CNTL                  0x00e0
+#       define APER_0_BIG_ENDIAN_16BPP_SWAP (1 << 0)
+#       define APER_0_BIG_ENDIAN_32BPP_SWAP (2 << 0)
+#define R128_CONFIG_MEMSIZE               0x00f8
+#define R128_CONFIG_MEMSIZE_EMBEDDED      0x0114
+#define R128_CONFIG_REG_1_BASE            0x010c
+#define R128_CONFIG_REG_APER_SIZE         0x0110
+#define R128_CONFIG_XSTRAP                0x00e4
+#define R128_CONSTANT_COLOR_C             0x1d34
+#       define R128_CONSTANT_COLOR_MASK   0x00ffffff
+#       define R128_CONSTANT_COLOR_ONE    0x00ffffff
+#       define R128_CONSTANT_COLOR_ZERO   0x00000000
+#define R128_CRC_CMDFIFO_ADDR             0x0740
+#define R128_CRC_CMDFIFO_DOUT             0x0744
+#define R128_CRTC_CRNT_FRAME              0x0214
+#define R128_CRTC_DEBUG                   0x021c
+#define R128_CRTC_EXT_CNTL                0x0054
+#       define R128_CRTC_VGA_XOVERSCAN    (1 <<  0)
+#       define R128_VGA_ATI_LINEAR        (1 <<  3)
+#       define R128_XCRT_CNT_EN           (1 <<  6)
+#       define R128_CRTC_HSYNC_DIS        (1 <<  8)
+#       define R128_CRTC_VSYNC_DIS        (1 <<  9)
+#       define R128_CRTC_DISPLAY_DIS      (1 << 10)
+#       define R128_CRTC_CRT_ON           (1 << 15)
+#       define R128_FP_OUT_EN             (1 << 22)
+#       define R128_FP_ACTIVE             (1 << 23)
+#define R128_CRTC_EXT_CNTL_DPMS_BYTE      0x0055
+#       define R128_CRTC_HSYNC_DIS_BYTE   (1 <<  0)
+#       define R128_CRTC_VSYNC_DIS_BYTE   (1 <<  1)
+#       define R128_CRTC_DISPLAY_DIS_BYTE (1 <<  2)
+#define R128_CRTC_GEN_CNTL                0x0050
+#       define R128_CRTC_DBL_SCAN_EN      (1 <<  0)
+#       define R128_CRTC_INTERLACE_EN     (1 <<  1)
+#       define R128_CRTC_CSYNC_EN         (1 <<  4)
+#       define R128_CRTC_CUR_EN           (1 << 16)
+#       define R128_CRTC_CUR_MODE_MASK    (7 << 17)
+#       define R128_CRTC_ICON_EN          (1 << 20)
+#       define R128_CRTC_EXT_DISP_EN      (1 << 24)
+#       define R128_CRTC_EN               (1 << 25)
+#       define R128_CRTC_DISP_REQ_EN_B    (1 << 26)
+#define R128_CRTC_GUI_TRIG_VLINE          0x0218
+#define R128_CRTC_H_SYNC_STRT_WID         0x0204
+#       define R128_CRTC_H_SYNC_STRT_PIX        (0x07  <<  0)
+#       define R128_CRTC_H_SYNC_STRT_CHAR       (0x1ff <<  3)
+#       define R128_CRTC_H_SYNC_STRT_CHAR_SHIFT 3
+#       define R128_CRTC_H_SYNC_WID             (0x3f  << 16)
+#       define R128_CRTC_H_SYNC_WID_SHIFT       16
+#       define R128_CRTC_H_SYNC_POL             (1     << 23)
+#define R128_CRTC_H_TOTAL_DISP            0x0200
+#       define R128_CRTC_H_TOTAL          (0x01ff << 0)
+#       define R128_CRTC_H_TOTAL_SHIFT    0
+#       define R128_CRTC_H_DISP           (0x00ff << 16)
+#       define R128_CRTC_H_DISP_SHIFT     16
+#define R128_CRTC_OFFSET                  0x0224
+#define R128_CRTC_OFFSET_CNTL             0x0228
+#define R128_CRTC_PITCH                   0x022c
+#define R128_CRTC_STATUS                  0x005c
+#       define R128_CRTC_VBLANK_SAVE      (1 <<  1)
+#define R128_CRTC_V_SYNC_STRT_WID         0x020c
+#       define R128_CRTC_V_SYNC_STRT       (0x7ff <<  0)
+#       define R128_CRTC_V_SYNC_STRT_SHIFT 0
+#       define R128_CRTC_V_SYNC_WID        (0x1f  << 16)
+#       define R128_CRTC_V_SYNC_WID_SHIFT  16
+#       define R128_CRTC_V_SYNC_POL        (1     << 23)
+#define R128_CRTC_V_TOTAL_DISP            0x0208
+#       define R128_CRTC_V_TOTAL          (0x07ff << 0)
+#       define R128_CRTC_V_TOTAL_SHIFT    0
+#       define R128_CRTC_V_DISP           (0x07ff << 16)
+#       define R128_CRTC_V_DISP_SHIFT     16
+#define R128_CRTC_VLINE_CRNT_VLINE        0x0210
+#       define R128_CRTC_CRNT_VLINE_MASK  (0x7ff << 16)
+#define R128_CRTC2_CRNT_FRAME             0x0314
+#define R128_CRTC2_DEBUG                  0x031c
+#define R128_CRTC2_GEN_CNTL               0x03f8
+#define R128_CRTC2_GUI_TRIG_VLINE         0x0318
+#define R128_CRTC2_H_SYNC_STRT_WID        0x0304
+#define R128_CRTC2_H_TOTAL_DISP           0x0300
+#define R128_CRTC2_OFFSET                 0x0324
+#define R128_CRTC2_OFFSET_CNTL            0x0328
+#define R128_CRTC2_PITCH                  0x032c
+#define R128_CRTC2_STATUS                 0x03fc
+#define R128_CRTC2_V_SYNC_STRT_WID        0x030c
+#define R128_CRTC2_V_TOTAL_DISP           0x0308
+#define R128_CRTC2_VLINE_CRNT_VLINE       0x0310
+#define R128_CRTC8_DATA                   0x03d5 /* VGA, 0x3b5 */
+#define R128_CRTC8_IDX                    0x03d4 /* VGA, 0x3b4 */
+#define R128_CUR_CLR0                     0x026c
+#define R128_CUR_CLR1                     0x0270
+#define R128_CUR_HORZ_VERT_OFF            0x0268
+#define R128_CUR_HORZ_VERT_POSN           0x0264
+#define R128_CUR_OFFSET                   0x0260
+#       define R128_CUR_LOCK              (1 << 31)
+
+#define R128_DAC_CNTL                     0x0058
+#       define R128_DAC_RANGE_CNTL        (3 <<  0)
+#       define R128_DAC_BLANKING          (1 <<  2)
+#       define R128_DAC_CRT_SEL_CRTC2     (1 <<  4)
+#       define R128_DAC_PALETTE_ACC_CTL   (1 <<  5)
+#       define R128_DAC_8BIT_EN           (1 <<  8)
+#       define R128_DAC_VGA_ADR_EN        (1 << 13)
+#       define R128_DAC_MASK_ALL          (0xff << 24)
+#define R128_DAC_CRC_SIG                  0x02cc
+#define R128_DAC_DATA                     0x03c9 /* VGA */
+#define R128_DAC_MASK                     0x03c6 /* VGA */
+#define R128_DAC_R_INDEX                  0x03c7 /* VGA */
+#define R128_DAC_W_INDEX                  0x03c8 /* VGA */
+#define R128_DDA_CONFIG                   0x02e0
+#define R128_DDA_ON_OFF                   0x02e4
+#define R128_DEFAULT_OFFSET               0x16e0
+#define R128_DEFAULT_PITCH                0x16e4
+#define R128_DEFAULT_SC_BOTTOM_RIGHT      0x16e8
+#       define R128_DEFAULT_SC_RIGHT_MAX  (0x1fff <<  0)
+#       define R128_DEFAULT_SC_BOTTOM_MAX (0x1fff << 16)
+#define R128_DESTINATION_3D_CLR_CMP_VAL   0x1820
+#define R128_DESTINATION_3D_CLR_CMP_MSK   0x1824
+#define R128_DEVICE_ID                    0x0f02 /* PCI */
+#define R128_DP_BRUSH_BKGD_CLR            0x1478
+#define R128_DP_BRUSH_FRGD_CLR            0x147c
+#define R128_DP_CNTL                      0x16c0
+#       define R128_DST_X_LEFT_TO_RIGHT   (1 <<  0)
+#       define R128_DST_Y_TOP_TO_BOTTOM   (1 <<  1)
+#define R128_DP_CNTL_XDIR_YDIR_YMAJOR     0x16d0
+#       define R128_DST_Y_MAJOR             (1 <<  2)
+#       define R128_DST_Y_DIR_TOP_TO_BOTTOM (1 << 15)
+#       define R128_DST_X_DIR_LEFT_TO_RIGHT (1 << 31)
+#define R128_DP_DATATYPE                  0x16c4
+#       define R128_HOST_BIG_ENDIAN_EN    (1 << 29)
+#define R128_DP_GUI_MASTER_CNTL           0x146c
+#       define R128_GMC_SRC_PITCH_OFFSET_CNTL (1    <<  0)
+#       define R128_GMC_DST_PITCH_OFFSET_CNTL (1    <<  1)
+#       define R128_GMC_SRC_CLIPPING          (1    <<  2)
+#       define R128_GMC_DST_CLIPPING          (1    <<  3)
+#       define R128_GMC_BRUSH_DATATYPE_MASK   (0x0f <<  4)
+#       define R128_GMC_BRUSH_8X8_MONO_FG_BG  (0    <<  4)
+#       define R128_GMC_BRUSH_8X8_MONO_FG_LA  (1    <<  4)
+#       define R128_GMC_BRUSH_1X8_MONO_FG_BG  (4    <<  4)
+#       define R128_GMC_BRUSH_1X8_MONO_FG_LA  (5    <<  4)
+#       define R128_GMC_BRUSH_32x1_MONO_FG_BG (6    <<  4)
+#       define R128_GMC_BRUSH_32x1_MONO_FG_LA (7    <<  4)
+#       define R128_GMC_BRUSH_32x32_MONO_FG_BG (8    <<  4)
+#       define R128_GMC_BRUSH_32x32_MONO_FG_LA (9    <<  4)
+#       define R128_GMC_BRUSH_8x8_COLOR       (10   <<  4)
+#       define R128_GMC_BRUSH_1X8_COLOR       (12   <<  4)
+#       define R128_GMC_BRUSH_SOLID_COLOR     (13   <<  4)
+#       define R128_GMC_BRUSH_NONE            (15   <<  4)
+#       define R128_GMC_DST_8BPP_CI           (2    <<  8)
+#       define R128_GMC_DST_15BPP             (3    <<  8)
+#       define R128_GMC_DST_16BPP             (4    <<  8)
+#       define R128_GMC_DST_24BPP             (5    <<  8)
+#       define R128_GMC_DST_32BPP             (6    <<  8)
+#       define R128_GMC_DST_8BPP_RGB          (7    <<  8)
+#       define R128_GMC_DST_Y8                (8    <<  8)
+#       define R128_GMC_DST_RGB8              (9    <<  8)
+#       define R128_GMC_DST_VYUY              (11   <<  8)
+#       define R128_GMC_DST_YVYU              (12   <<  8)
+#       define R128_GMC_DST_AYUV444           (14   <<  8)
+#       define R128_GMC_DST_ARGB4444          (15   <<  8)
+#       define R128_GMC_DST_DATATYPE_MASK     (0x0f <<  8)
+#       define R128_GMC_DST_DATATYPE_SHIFT    8
+#       define R128_GMC_SRC_DATATYPE_MASK       (3    << 12)
+#       define R128_GMC_SRC_DATATYPE_MONO_FG_BG (0    << 12)
+#       define R128_GMC_SRC_DATATYPE_MONO_FG_LA (1    << 12)
+#       define R128_GMC_SRC_DATATYPE_COLOR      (3    << 12)
+#       define R128_GMC_BYTE_PIX_ORDER        (1    << 14)
+#       define R128_GMC_BYTE_MSB_TO_LSB       (0    << 14)
+#       define R128_GMC_BYTE_LSB_TO_MSB       (1    << 14)
+#       define R128_GMC_CONVERSION_TEMP       (1    << 15)
+#       define R128_GMC_CONVERSION_TEMP_6500  (0    << 15)
+#       define R128_GMC_CONVERSION_TEMP_9300  (1    << 15)
+#       define R128_GMC_ROP3_MASK             (0xff << 16)
+#       define R128_DP_SRC_SOURCE_MASK        (7    << 24)
+#       define R128_DP_SRC_SOURCE_MEMORY      (2    << 24)
+#       define R128_DP_SRC_SOURCE_HOST_DATA   (3    << 24)
+#       define R128_GMC_3D_FCN_EN             (1    << 27)
+#       define R128_GMC_CLR_CMP_CNTL_DIS      (1    << 28)
+#       define R128_GMC_AUX_CLIP_DIS          (1    << 29)
+#       define R128_GMC_WR_MSK_DIS            (1    << 30)
+#       define R128_GMC_LD_BRUSH_Y_X          (1    << 31)
+#       define R128_ROP3_ZERO             0x00000000
+#       define R128_ROP3_DSa              0x00880000
+#       define R128_ROP3_SDna             0x00440000
+#       define R128_ROP3_S                0x00cc0000
+#       define R128_ROP3_DSna             0x00220000
+#       define R128_ROP3_D                0x00aa0000
+#       define R128_ROP3_DSx              0x00660000
+#       define R128_ROP3_DSo              0x00ee0000
+#       define R128_ROP3_DSon             0x00110000
+#       define R128_ROP3_DSxn             0x00990000
+#       define R128_ROP3_Dn               0x00550000
+#       define R128_ROP3_SDno             0x00dd0000
+#       define R128_ROP3_Sn               0x00330000
+#       define R128_ROP3_DSno             0x00bb0000
+#       define R128_ROP3_DSan             0x00770000
+#       define R128_ROP3_ONE              0x00ff0000
+#       define R128_ROP3_DPa              0x00a00000
+#       define R128_ROP3_PDna             0x00500000
+#       define R128_ROP3_P                0x00f00000
+#       define R128_ROP3_DPna             0x000a0000
+#       define R128_ROP3_D                0x00aa0000
+#       define R128_ROP3_DPx              0x005a0000
+#       define R128_ROP3_DPo              0x00fa0000
+#       define R128_ROP3_DPon             0x00050000
+#       define R128_ROP3_PDxn             0x00a50000
+#       define R128_ROP3_PDno             0x00f50000
+#       define R128_ROP3_Pn               0x000f0000
+#       define R128_ROP3_DPno             0x00af0000
+#       define R128_ROP3_DPan             0x005f0000
+
+
+#define R128_DP_GUI_MASTER_CNTL_C         0x1c84
+#define R128_DP_MIX                       0x16c8
+#define R128_DP_SRC_BKGD_CLR              0x15dc
+#define R128_DP_SRC_FRGD_CLR              0x15d8
+#define R128_DP_WRITE_MASK                0x16cc
+#define R128_DST_BRES_DEC                 0x1630
+#define R128_DST_BRES_ERR                 0x1628
+#define R128_DST_BRES_INC                 0x162c
+#define R128_DST_BRES_LNTH                0x1634
+#define R128_DST_BRES_LNTH_SUB            0x1638
+#define R128_DST_HEIGHT                   0x1410
+#define R128_DST_HEIGHT_WIDTH             0x143c
+#define R128_DST_HEIGHT_WIDTH_8           0x158c
+#define R128_DST_HEIGHT_WIDTH_BW          0x15b4
+#define R128_DST_HEIGHT_Y                 0x15a0
+#define R128_DST_OFFSET                   0x1404
+#define R128_DST_PITCH                    0x1408
+#define R128_DST_PITCH_OFFSET             0x142c
+#define R128_DST_PITCH_OFFSET_C           0x1c80
+#       define R128_PITCH_SHIFT               21
+#       define R128_DST_TILE                 (1 << 31)
+#define R128_DST_WIDTH                    0x140c
+#define R128_DST_WIDTH_HEIGHT             0x1598
+#define R128_DST_WIDTH_X                  0x1588
+#define R128_DST_WIDTH_X_INCY             0x159c
+#define R128_DST_X                        0x141c
+#define R128_DST_X_SUB                    0x15a4
+#define R128_DST_X_Y                      0x1594
+#define R128_DST_Y                        0x1420
+#define R128_DST_Y_SUB                    0x15a8
+#define R128_DST_Y_X                      0x1438
+
+#define R128_EXT_MEM_CNTL                 0x0144
+
+#define R128_FCP_CNTL                     0x0012 /* PLL */
+#define R128_FLUSH_1                      0x1704
+#define R128_FLUSH_2                      0x1708
+#define R128_FLUSH_3                      0x170c
+#define R128_FLUSH_4                      0x1710
+#define R128_FLUSH_5                      0x1714
+#define R128_FLUSH_6                      0x1718
+#define R128_FLUSH_7                      0x171c
+#define R128_FOG_3D_TABLE_START           0x1810
+#define R128_FOG_3D_TABLE_END             0x1814
+#define R128_FOG_3D_TABLE_DENSITY         0x181c
+#define R128_FOG_TABLE_INDEX              0x1a14
+#define R128_FOG_TABLE_DATA               0x1a18
+#define R128_FP_CRTC_H_TOTAL_DISP         0x0250
+#define R128_FP_CRTC_V_TOTAL_DISP         0x0254
+#define R128_FP_GEN_CNTL                  0x0284
+#       define R128_FP_FPON                  (1 << 0)
+#       define R128_FP_BLANK_DIS             (1 << 1)
+#       define R128_FP_TDMS_EN               (1 <<  2)
+#       define R128_FP_DETECT_SENSE          (1 <<  8)
+#       define R128_FP_SEL_CRTC2             (1 << 13)
+#       define R128_FP_CRTC_DONT_SHADOW_VPAR (1 << 16)
+#       define R128_FP_CRTC_DONT_SHADOW_HEND (1 << 17)
+#       define R128_FP_CRTC_USE_SHADOW_VEND  (1 << 18)
+#       define R128_FP_CRTC_USE_SHADOW_ROWCUR (1 << 19)
+#       define R128_FP_CRTC_HORZ_DIV2_EN     (1 << 20)
+#       define R128_FP_CRTC_HOR_CRT_DIV2_DIS (1 << 21)
+#       define R128_FP_CRT_SYNC_SEL          (1 << 23)
+#       define R128_FP_USE_SHADOW_EN         (1 << 24)
+#define R128_FP_H_SYNC_STRT_WID           0x02c4
+#define R128_FP_HORZ_STRETCH              0x028c
+#       define R128_HORZ_STRETCH_RATIO_MASK  0xffff
+#       define R128_HORZ_STRETCH_RATIO_SHIFT 0
+#       define R128_HORZ_STRETCH_RATIO_MAX   4096
+#       define R128_HORZ_PANEL_SIZE          (0xff   << 16)
+#       define R128_HORZ_PANEL_SHIFT         16
+#       define R128_AUTO_HORZ_RATIO          (0      << 24)
+#       define R128_HORZ_STRETCH_PIXREP      (0      << 25)
+#       define R128_HORZ_STRETCH_BLEND       (1      << 25)
+#       define R128_HORZ_STRETCH_ENABLE      (1      << 26)
+#       define R128_HORZ_FP_LOOP_STRETCH     (0x7    << 27)
+#       define R128_HORZ_STRETCH_RESERVED    (1      << 30)
+#       define R128_HORZ_AUTO_RATIO_FIX_EN   (1      << 31)
+
+#define R128_FP_PANEL_CNTL                0x0288
+#       define R128_FP_DIGON              (1 << 0)
+#       define R128_FP_BLON               (1 << 1)
+#define R128_FP_V_SYNC_STRT_WID           0x02c8
+#define R128_FP_VERT_STRETCH              0x0290
+#       define R128_VERT_PANEL_SIZE          (0x7ff <<  0)
+#       define R128_VERT_PANEL_SHIFT         0
+#       define R128_VERT_STRETCH_RATIO_MASK  0x3ff
+#       define R128_VERT_STRETCH_RATIO_SHIFT 11
+#       define R128_VERT_STRETCH_RATIO_MAX   1024
+#       define R128_VERT_STRETCH_ENABLE      (1     << 24)
+#       define R128_VERT_STRETCH_LINEREP     (0     << 25)
+#       define R128_VERT_STRETCH_BLEND       (1     << 25)
+#       define R128_VERT_AUTO_RATIO_EN       (1     << 26)
+#       define R128_VERT_STRETCH_RESERVED    0xf8e00000
+
+#define R128_GEN_INT_CNTL                 0x0040
+#define R128_GEN_INT_STATUS               0x0044
+#       define R128_VSYNC_INT_AK          (1 <<  2)
+#       define R128_VSYNC_INT             (1 <<  2)
+#define R128_GEN_RESET_CNTL               0x00f0
+#       define R128_SOFT_RESET_GUI          (1 <<  0)
+#       define R128_SOFT_RESET_VCLK         (1 <<  8)
+#       define R128_SOFT_RESET_PCLK         (1 <<  9)
+#       define R128_SOFT_RESET_DISPENG_XCLK (1 << 11)
+#       define R128_SOFT_RESET_MEMCTLR_XCLK (1 << 12)
+#define R128_GENENB                       0x03c3 /* VGA */
+#define R128_GENFC_RD                     0x03ca /* VGA */
+#define R128_GENFC_WT                     0x03da /* VGA, 0x03ba */
+#define R128_GENMO_RD                     0x03cc /* VGA */
+#define R128_GENMO_WT                     0x03c2 /* VGA */
+#define R128_GENS0                        0x03c2 /* VGA */
+#define R128_GENS1                        0x03da /* VGA, 0x03ba */
+#define R128_GPIO_MONID                   0x0068
+#       define R128_GPIO_MONID_A_0        (1 <<  0)
+#       define R128_GPIO_MONID_A_1        (1 <<  1)
+#       define R128_GPIO_MONID_A_2        (1 <<  2)
+#       define R128_GPIO_MONID_A_3        (1 <<  3)
+#       define R128_GPIO_MONID_Y_0        (1 <<  8)
+#       define R128_GPIO_MONID_Y_1        (1 <<  9)
+#       define R128_GPIO_MONID_Y_2        (1 << 10)
+#       define R128_GPIO_MONID_Y_3        (1 << 11)
+#       define R128_GPIO_MONID_EN_0       (1 << 16)
+#       define R128_GPIO_MONID_EN_1       (1 << 17)
+#       define R128_GPIO_MONID_EN_2       (1 << 18)
+#       define R128_GPIO_MONID_EN_3       (1 << 19)
+#       define R128_GPIO_MONID_MASK_0     (1 << 24)
+#       define R128_GPIO_MONID_MASK_1     (1 << 25)
+#       define R128_GPIO_MONID_MASK_2     (1 << 26)
+#       define R128_GPIO_MONID_MASK_3     (1 << 27)
+#define R128_GPIO_MONIDB                  0x006c
+#define R128_GRPH8_DATA                   0x03cf /* VGA */
+#define R128_GRPH8_IDX                    0x03ce /* VGA */
+#define R128_GUI_DEBUG0                   0x16a0
+#define R128_GUI_DEBUG1                   0x16a4
+#define R128_GUI_DEBUG2                   0x16a8
+#define R128_GUI_DEBUG3                   0x16ac
+#define R128_GUI_DEBUG4                   0x16b0
+#define R128_GUI_DEBUG5                   0x16b4
+#define R128_GUI_DEBUG6                   0x16b8
+#define R128_GUI_PROBE                    0x16bc
+#define R128_GUI_SCRATCH_REG0             0x15e0
+#define R128_GUI_SCRATCH_REG1             0x15e4
+#define R128_GUI_SCRATCH_REG2             0x15e8
+#define R128_GUI_SCRATCH_REG3             0x15ec
+#define R128_GUI_SCRATCH_REG4             0x15f0
+#define R128_GUI_SCRATCH_REG5             0x15f4
+#define R128_GUI_STAT                     0x1740
+#       define R128_GUI_FIFOCNT_MASK      0x0fff
+#       define R128_GUI_ACTIVE            (1 << 31)
+
+#define R128_HEADER                       0x0f0e /* PCI */
+#define R128_HOST_DATA0                   0x17c0
+#define R128_HOST_DATA1                   0x17c4
+#define R128_HOST_DATA2                   0x17c8
+#define R128_HOST_DATA3                   0x17cc
+#define R128_HOST_DATA4                   0x17d0
+#define R128_HOST_DATA5                   0x17d4
+#define R128_HOST_DATA6                   0x17d8
+#define R128_HOST_DATA7                   0x17dc
+#define R128_HOST_DATA_LAST               0x17e0
+#define R128_HOST_PATH_CNTL               0x0130
+#define R128_HTOTAL_CNTL                  0x0009 /* PLL */
+#define R128_HW_DEBUG                     0x0128
+#define R128_HW_DEBUG2                    0x011c
+
+#define R128_I2C_CNTL_1                   0x0094 /* ? */
+#define R128_INTERRUPT_LINE               0x0f3c /* PCI */
+#define R128_INTERRUPT_PIN                0x0f3d /* PCI */
+#define R128_IO_BASE                      0x0f14 /* PCI */
+
+#define R128_LATENCY                      0x0f0d /* PCI */
+#define R128_LEAD_BRES_DEC                0x1608
+#define R128_LEAD_BRES_ERR                0x1600
+#define R128_LEAD_BRES_INC                0x1604
+#define R128_LEAD_BRES_LNTH               0x161c
+#define R128_LEAD_BRES_LNTH_SUB           0x1624
+#define R128_LVDS_GEN_CNTL                0x02d0
+#       define R128_LVDS_ON               (1   <<  0)
+#       define R128_LVDS_DISPLAY_DIS      (1   <<  1)
+#       define R128_LVDS_EN               (1   <<  7)
+#       define R128_LVDS_DIGON            (1   << 18)
+#       define R128_LVDS_BLON             (1   << 19)
+#       define R128_LVDS_SEL_CRTC2        (1   << 23)
+#       define R128_HSYNC_DELAY_SHIFT     28
+#       define R128_HSYNC_DELAY_MASK      (0xf << 28)
+
+#define R128_MAX_LATENCY                  0x0f3f /* PCI */
+#define R128_MCLK_CNTL                    0x000f /* PLL */
+#       define R128_FORCE_GCP             (1 << 16)
+#       define R128_FORCE_PIPE3D_CP       (1 << 17)
+#       define R128_FORCE_RCP             (1 << 18)
+#define R128_MDGPIO_A_REG                 0x01ac
+#define R128_MDGPIO_EN_REG                0x01b0
+#define R128_MDGPIO_MASK                  0x0198
+#define R128_MDGPIO_Y_REG                 0x01b4
+#define R128_MEM_ADDR_CONFIG              0x0148
+#define R128_MEM_BASE                     0x0f10 /* PCI */
+#define R128_MEM_CNTL                     0x0140
+#define R128_MEM_INIT_LAT_TIMER           0x0154
+#define R128_MEM_INTF_CNTL                0x014c
+#define R128_MEM_SDRAM_MODE_REG           0x0158
+#define R128_MEM_STR_CNTL                 0x0150
+#define R128_MEM_VGA_RP_SEL               0x003c
+#define R128_MEM_VGA_WP_SEL               0x0038
+#define R128_MIN_GRANT                    0x0f3e /* PCI */
+#define R128_MM_DATA                      0x0004
+#define R128_MM_INDEX                     0x0000
+#define R128_MPLL_CNTL                    0x000e /* PLL */
+#define R128_MPP_TB_CONFIG                0x01c0 /* ? */
+#define R128_MPP_GP_CONFIG                0x01c8 /* ? */
+
+#define R128_N_VIF_COUNT                  0x0248
+
+#define R128_OVR_CLR                      0x0230
+#define R128_OVR_WID_LEFT_RIGHT           0x0234
+#define R128_OVR_WID_TOP_BOTTOM           0x0238
+
+/* first overlay unit (there is only one) */
+
+#define R128_OV0_Y_X_START                0x0400
+#define R128_OV0_Y_X_END                  0x0404
+#define R128_OV0_EXCLUSIVE_HORZ           0x0408
+#       define  R128_EXCL_HORZ_START_MASK        0x000000ff
+#       define  R128_EXCL_HORZ_END_MASK          0x0000ff00
+#       define  R128_EXCL_HORZ_BACK_PORCH_MASK   0x00ff0000
+#       define  R128_EXCL_HORZ_EXCLUSIVE_EN      0x80000000
+#define R128_OV0_EXCLUSIVE_VERT           0x040C
+#       define  R128_EXCL_VERT_START_MASK        0x000003ff
+#       define  R128_EXCL_VERT_END_MASK          0x03ff0000
+#define R128_OV0_REG_LOAD_CNTL            0x0410
+#       define  R128_REG_LD_CTL_LOCK                 0x00000001L
+#       define  R128_REG_LD_CTL_VBLANK_DURING_LOCK   0x00000002L
+#       define  R128_REG_LD_CTL_STALL_GUI_UNTIL_FLIP 0x00000004L
+#       define  R128_REG_LD_CTL_LOCK_READBACK        0x00000008L
+#define R128_OV0_SCALE_CNTL               0x0420
+#       define  R128_SCALER_PIX_EXPAND           0x00000001L
+#       define  R128_SCALER_Y2R_TEMP             0x00000002L
+#       define  R128_SCALER_HORZ_PICK_NEAREST    0x00000003L
+#       define  R128_SCALER_VERT_PICK_NEAREST    0x00000004L
+#       define  R128_SCALER_SIGNED_UV            0x00000010L
+#       define  R128_SCALER_GAMMA_SEL_MASK       0x00000060L
+#       define  R128_SCALER_GAMMA_SEL_BRIGHT     0x00000000L
+#       define  R128_SCALER_GAMMA_SEL_G22        0x00000020L
+#       define  R128_SCALER_GAMMA_SEL_G18        0x00000040L
+#       define  R128_SCALER_GAMMA_SEL_G14        0x00000060L
+#       define  R128_SCALER_COMCORE_SHIFT_UP_ONE 0x00000080L
+#       define  R128_SCALER_SURFAC_FORMAT        0x00000f00L
+#       define  R128_SCALER_SOURCE_15BPP         0x00000300L
+#       define  R128_SCALER_SOURCE_16BPP         0x00000400L
+#       define  R128_SCALER_SOURCE_32BPP         0x00000600L
+#       define  R128_SCALER_SOURCE_YUV9          0x00000900L
+#       define  R128_SCALER_SOURCE_YUV12         0x00000A00L
+#       define  R128_SCALER_SOURCE_VYUY422       0x00000B00L
+#       define  R128_SCALER_SOURCE_YVYU422       0x00000C00L
+#       define  R128_SCALER_SMART_SWITCH         0x00008000L
+#       define  R128_SCALER_BURST_PER_PLANE      0x00ff0000L
+#       define  R128_SCALER_DOUBLE_BUFFER        0x01000000L
+#       define  R128_SCALER_DIS_LIMIT            0x08000000L
+#       define  R128_SCALER_PRG_LOAD_START       0x10000000L
+#       define  R128_SCALER_INT_EMU              0x20000000L
+#       define  R128_SCALER_ENABLE               0x40000000L
+#       define  R128_SCALER_SOFT_RESET           0x80000000L
+#define R128_OV0_V_INC                    0x0424
+#define R128_OV0_P1_V_ACCUM_INIT          0x0428
+#       define  R128_OV0_P1_MAX_LN_IN_PER_LN_OUT        0x00000003L
+#       define  R128_OV0_P1_V_ACCUM_INIT_MASK           0x01ff8000L
+#define R128_OV0_P23_V_ACCUM_INIT         0x042C
+#define R128_OV0_P1_BLANK_LINES_AT_TOP    0x0430
+#       define  R128_P1_BLNK_LN_AT_TOP_M1_MASK   0x00000fffL
+#       define  R128_P1_ACTIVE_LINES_M1          0x0fff0000L
+#define R128_OV0_P23_BLANK_LINES_AT_TOP   0x0434
+#       define  R128_P23_BLNK_LN_AT_TOP_M1_MASK  0x000007ffL
+#       define  R128_P23_ACTIVE_LINES_M1         0x07ff0000L
+#define R128_OV0_VID_BUF0_BASE_ADRS       0x0440
+#       define  R128_VIF_BUF0_PITCH_SEL          0x00000001L
+#       define  R128_VIF_BUF0_TILE_ADRS          0x00000002L
+#       define  R128_VIF_BUF0_BASE_ADRS_MASK     0x03fffff0L
+#       define  R128_VIF_BUF0_1ST_LINE_LSBS_MASK 0x48000000L
+#define R128_OV0_VID_BUF1_BASE_ADRS       0x0444
+#       define  R128_VIF_BUF1_PITCH_SEL          0x00000001L
+#       define  R128_VIF_BUF1_TILE_ADRS          0x00000002L
+#       define  R128_VIF_BUF1_BASE_ADRS_MASK     0x03fffff0L
+#       define  R128_VIF_BUF1_1ST_LINE_LSBS_MASK 0x48000000L
+#define R128_OV0_VID_BUF2_BASE_ADRS       0x0448
+#       define  R128_VIF_BUF2_PITCH_SEL          0x00000001L
+#       define  R128_VIF_BUF2_TILE_ADRS          0x00000002L
+#       define  R128_VIF_BUF2_BASE_ADRS_MASK     0x03fffff0L
+#       define  R128_VIF_BUF2_1ST_LINE_LSBS_MASK 0x48000000L
+#define R128_OV0_VID_BUF3_BASE_ADRS       0x044C
+#define R128_OV0_VID_BUF4_BASE_ADRS       0x0450
+#define R128_OV0_VID_BUF5_BASE_ADRS       0x0454
+#define R128_OV0_VID_BUF_PITCH0_VALUE     0x0460
+#define R128_OV0_VID_BUF_PITCH1_VALUE     0x0464
+#define R128_OV0_AUTO_FLIP_CNTL           0x0470
+#define R128_OV0_DEINTERLACE_PATTERN      0x0474
+#define R128_OV0_H_INC                    0x0480
+#define R128_OV0_STEP_BY                  0x0484
+#define R128_OV0_P1_H_ACCUM_INIT          0x0488
+#define R128_OV0_P23_H_ACCUM_INIT         0x048C
+#define R128_OV0_P1_X_START_END           0x0494
+#define R128_OV0_P2_X_START_END           0x0498
+#define R128_OV0_P3_X_START_END           0x049C
+#define R128_OV0_FILTER_CNTL              0x04A0
+#define R128_OV0_FOUR_TAP_COEF_0          0x04B0
+#define R128_OV0_FOUR_TAP_COEF_1          0x04B4
+#define R128_OV0_FOUR_TAP_COEF_2          0x04B8
+#define R128_OV0_FOUR_TAP_COEF_3          0x04BC
+#define R128_OV0_FOUR_TAP_COEF_4          0x04C0
+#define R128_OV0_COLOUR_CNTL              0x04E0
+#define R128_OV0_VIDEO_KEY_CLR            0x04E4
+#define R128_OV0_VIDEO_KEY_MSK            0x04E8
+#define R128_OV0_GRAPHICS_KEY_CLR         0x04EC
+#define R128_OV0_GRAPHICS_KEY_MSK         0x04F0
+#define R128_OV0_KEY_CNTL                 0x04F4
+#       define  R128_VIDEO_KEY_FN_MASK           0x00000007L
+#       define  R128_VIDEO_KEY_FN_FALSE          0x00000000L
+#       define  R128_VIDEO_KEY_FN_TRUE           0x00000001L
+#       define  R128_VIDEO_KEY_FN_EQ             0x00000004L
+#       define  R128_VIDEO_KEY_FN_NE             0x00000005L
+#       define  R128_GRAPHIC_KEY_FN_MASK         0x00000070L
+#       define  R128_GRAPHIC_KEY_FN_FALSE        0x00000000L
+#       define  R128_GRAPHIC_KEY_FN_TRUE         0x00000010L
+#       define  R128_GRAPHIC_KEY_FN_EQ           0x00000040L
+#       define  R128_GRAPHIC_KEY_FN_NE           0x00000050L
+#       define  R128_CMP_MIX_MASK                0x00000100L
+#       define  R128_CMP_MIX_OR                  0x00000000L
+#       define  R128_CMP_MIX_AND                 0x00000100L
+#define R128_OV0_TEST                     0x04F8
+
+
+#define R128_PALETTE_DATA                 0x00b4
+#define R128_PALETTE_INDEX                0x00b0
+#define R128_PC_DEBUG_MODE                0x1760
+#define R128_PC_GUI_CTLSTAT               0x1748
+#define R128_PC_GUI_MODE                  0x1744
+#       define R128_PC_IGNORE_UNIFY       (1 << 5)
+#define R128_PC_MISC_CNTL                 0x0188
+#define R128_PC_NGUI_CTLSTAT              0x0184
+#       define R128_PC_FLUSH_GUI          (3 << 0)
+#       define R128_PC_RI_GUI             (1 << 2)
+#       define R128_PC_FLUSH_ALL          0x00ff
+#       define R128_PC_BUSY               (1 << 31)
+#define R128_PC_NGUI_MODE                 0x0180
+#define R128_PCI_GART_PAGE                0x017c
+#define R128_PLANE_3D_MASK_C              0x1d44
+#define R128_PLL_TEST_CNTL                0x0013 /* PLL */
+#define R128_PMI_CAP_ID                   0x0f5c /* PCI */
+#define R128_PMI_DATA                     0x0f63 /* PCI */
+#define R128_PMI_NXT_CAP_PTR              0x0f5d /* PCI */
+#define R128_PMI_PMC_REG                  0x0f5e /* PCI */
+#define R128_PMI_PMCSR_REG                0x0f60 /* PCI */
+#define R128_PMI_REGISTER                 0x0f5c /* PCI */
+#define R128_PPLL_CNTL                    0x0002 /* PLL */
+#       define R128_PPLL_RESET                (1 <<  0)
+#       define R128_PPLL_SLEEP                (1 <<  1)
+#       define R128_PPLL_ATOMIC_UPDATE_EN     (1 << 16)
+#       define R128_PPLL_VGA_ATOMIC_UPDATE_EN (1 << 17)
+#define R128_PPLL_DIV_0                   0x0004 /* PLL */
+#define R128_PPLL_DIV_1                   0x0005 /* PLL */
+#define R128_PPLL_DIV_2                   0x0006 /* PLL */
+#define R128_PPLL_DIV_3                   0x0007 /* PLL */
+#       define R128_PPLL_FB3_DIV_MASK     0x07ff
+#       define R128_PPLL_POST3_DIV_MASK   0x00070000
+#define R128_PPLL_REF_DIV                 0x0003 /* PLL */
+#       define R128_PPLL_REF_DIV_MASK     0x03ff
+#       define R128_PPLL_ATOMIC_UPDATE_R  (1 << 15) /* same as _W */
+#       define R128_PPLL_ATOMIC_UPDATE_W  (1 << 15) /* same as _R */
+#define R128_PWR_MNGMT_CNTL_STATUS        0x0f60 /* PCI */
+#define R128_REG_BASE                     0x0f18 /* PCI */
+#define R128_REGPROG_INF                  0x0f09 /* PCI */
+#define R128_REVISION_ID                  0x0f08 /* PCI */
+
+#define R128_SC_BOTTOM                    0x164c
+#define R128_SC_BOTTOM_RIGHT              0x16f0
+#define R128_SC_BOTTOM_RIGHT_C            0x1c8c
+#define R128_SC_LEFT                      0x1640
+#define R128_SC_RIGHT                     0x1644
+#define R128_SC_TOP                       0x1648
+#define R128_SC_TOP_LEFT                  0x16ec
+#define R128_SC_TOP_LEFT_C                0x1c88
+#define R128_SEQ8_DATA                    0x03c5 /* VGA */
+#define R128_SEQ8_IDX                     0x03c4 /* VGA */
+#define R128_SNAPSHOT_F_COUNT             0x0244
+#define R128_SNAPSHOT_VH_COUNTS           0x0240
+#define R128_SNAPSHOT_VIF_COUNT           0x024c
+#define R128_SRC_OFFSET                   0x15ac
+#define R128_SRC_PITCH                    0x15b0
+#define R128_SRC_PITCH_OFFSET             0x1428
+#define R128_SRC_SC_BOTTOM                0x165c
+#define R128_SRC_SC_BOTTOM_RIGHT          0x16f4
+#define R128_SRC_SC_RIGHT                 0x1654
+#define R128_SRC_X                        0x1414
+#define R128_SRC_X_Y                      0x1590
+#define R128_SRC_Y                        0x1418
+#define R128_SRC_Y_X                      0x1434
+#define R128_STATUS                       0x0f06 /* PCI */
+#define R128_SUBPIC_CNTL                  0x0540 /* ? */
+#define R128_SUB_CLASS                    0x0f0a /* PCI */
+#define R128_SURFACE_DELAY                0x0b00
+#define R128_SURFACE0_INFO                0x0b0c
+#define R128_SURFACE0_LOWER_BOUND         0x0b04
+#define R128_SURFACE0_UPPER_BOUND         0x0b08
+#define R128_SURFACE1_INFO                0x0b1c
+#define R128_SURFACE1_LOWER_BOUND         0x0b14
+#define R128_SURFACE1_UPPER_BOUND         0x0b18
+#define R128_SURFACE2_INFO                0x0b2c
+#define R128_SURFACE2_LOWER_BOUND         0x0b24
+#define R128_SURFACE2_UPPER_BOUND         0x0b28
+#define R128_SURFACE3_INFO                0x0b3c
+#define R128_SURFACE3_LOWER_BOUND         0x0b34
+#define R128_SURFACE3_UPPER_BOUND         0x0b38
+#define R128_SW_SEMAPHORE                 0x013c
+
+#define R128_TEST_DEBUG_CNTL              0x0120
+#define R128_TEST_DEBUG_MUX               0x0124
+#define R128_TEST_DEBUG_OUT               0x012c
+#define R128_TMDS_CRC                     0x02a0
+#define R128_TMDS_TRANSMITTER_CNTL        0x02a4
+#       define R128_TMDS_PLLEN            (1 << 0)
+#       define R128_TMDS_PLLRST           (1 << 1)
+#define R128_TRAIL_BRES_DEC               0x1614
+#define R128_TRAIL_BRES_ERR               0x160c
+#define R128_TRAIL_BRES_INC               0x1610
+#define R128_TRAIL_X                      0x1618
+#define R128_TRAIL_X_SUB                  0x1620
+
+#define R128_VCLK_ECP_CNTL                0x0008 /* PLL */
+#define R128_VENDOR_ID                    0x0f00 /* PCI */
+#define R128_VGA_DDA_CONFIG               0x02e8
+#define R128_VGA_DDA_ON_OFF               0x02ec
+#define R128_VID_BUFFER_CONTROL           0x0900
+#define R128_VIDEOMUX_CNTL                0x0190
+#define R128_VIPH_CONTROL                 0x01D0 /* ? */
+
+#define R128_WAIT_UNTIL                   0x1720
+
+#define R128_X_MPLL_REF_FB_DIV            0x000a /* PLL */
+#define R128_XCLK_CNTL                    0x000d /* PLL */
+#define R128_XDLL_CNTL                    0x000c /* PLL */
+#define R128_XPLL_CNTL                    0x000b /* PLL */
+
+				/* Registers for CCE and Microcode Engine */
+#define R128_PM4_MICROCODE_ADDR           0x07d4
+#define R128_PM4_MICROCODE_RADDR          0x07d8
+#define R128_PM4_MICROCODE_DATAH          0x07dc
+#define R128_PM4_MICROCODE_DATAL          0x07e0
+
+#define R128_PM4_BUFFER_OFFSET            0x0700
+#define R128_PM4_BUFFER_CNTL              0x0704
+#       define R128_PM4_NONPM4                 (0  << 28)
+#       define R128_PM4_192PIO                 (1  << 28)
+#       define R128_PM4_192BM                  (2  << 28)
+#       define R128_PM4_128PIO_64INDBM         (3  << 28)
+#       define R128_PM4_128BM_64INDBM          (4  << 28)
+#       define R128_PM4_64PIO_128INDBM         (5  << 28)
+#       define R128_PM4_64BM_128INDBM          (6  << 28)
+#       define R128_PM4_64PIO_64VCBM_64INDBM   (7  << 28)
+#       define R128_PM4_64BM_64VCBM_64INDBM    (8  << 28)
+#       define R128_PM4_64PIO_64VCPIO_64INDPIO (15 << 28)
+#define R128_PM4_BUFFER_WM_CNTL           0x0708
+#       define R128_WMA_SHIFT                  0
+#       define R128_WMB_SHIFT                  8
+#       define R128_WMC_SHIFT                 16
+#       define R128_WB_WM_SHIFT               24
+#define R128_PM4_BUFFER_DL_RPTR_ADDR      0x070c
+#define R128_PM4_BUFFER_DL_RPTR           0x0710
+#define R128_PM4_BUFFER_DL_WPTR           0x0714
+#       define R128_PM4_BUFFER_DL_DONE    (1 << 31)
+#define R128_PM4_BUFFER_DL_WPTR_DELAY     0x0718
+#       define R128_PRE_WRITE_TIMER_SHIFT      0
+#       define R128_PRE_WRITE_LIMIT_SHIFT     23
+#define R128_PM4_VC_FPU_SETUP             0x071c
+#       define R128_FRONT_DIR_CW          (0 <<  0)
+#       define R128_FRONT_DIR_CCW         (1 <<  0)
+#       define R128_FRONT_DIR_MASK        (1 <<  0)
+#       define R128_BACKFACE_CULL         (0 <<  1)
+#       define R128_BACKFACE_POINTS       (1 <<  1)
+#       define R128_BACKFACE_LINES        (2 <<  1)
+#       define R128_BACKFACE_SOLID        (3 <<  1)
+#       define R128_BACKFACE_MASK         (3 <<  1)
+#       define R128_FRONTFACE_CULL        (0 <<  3)
+#       define R128_FRONTFACE_POINTS      (1 <<  3)
+#       define R128_FRONTFACE_LINES       (2 <<  3)
+#       define R128_FRONTFACE_SOLID       (3 <<  3)
+#       define R128_FRONTFACE_MASK        (3 <<  3)
+#       define R128_FPU_COLOR_SOLID       (0 <<  5)
+#       define R128_FPU_COLOR_FLAT        (1 <<  5)
+#       define R128_FPU_COLOR_GOURAUD     (2 <<  5)
+#       define R128_FPU_COLOR_GOURAUD2    (3 <<  5)
+#       define R128_FPU_COLOR_MASK        (3 <<  5)
+#       define R128_FPU_SUB_PIX_2BITS     (0 <<  7)
+#       define R128_FPU_SUB_PIX_4BITS     (1 <<  7)
+#       define R128_FPU_MODE_2D           (0 <<  8)
+#       define R128_FPU_MODE_3D           (1 <<  8)
+#       define R128_TRAP_BITS_DISABLE     (1 <<  9)
+#       define R128_EDGE_ANTIALIAS        (1 << 10)
+#       define R128_SUPERSAMPLE           (1 << 11)
+#       define R128_XFACTOR_2             (0 << 12)
+#       define R128_XFACTOR_4             (1 << 12)
+#       define R128_YFACTOR_2             (0 << 13)
+#       define R128_YFACTOR_4             (1 << 13)
+#       define R128_FLAT_SHADE_VERTEX_D3D (0 << 14)
+#       define R128_FLAT_SHADE_VERTEX_OGL (1 << 14)
+#       define R128_FPU_ROUND_TRUNCATE    (0 << 15)
+#       define R128_FPU_ROUND_NEAREST     (1 << 15)
+#       define R128_WM_SEL_8DW            (0 << 16)
+#       define R128_WM_SEL_16DW           (1 << 16)
+#       define R128_WM_SEL_32DW           (2 << 16)
+#define R128_PM4_VC_DEBUG_CONFIG          0x07a4
+#define R128_PM4_VC_STAT                  0x07a8
+#define R128_PM4_VC_TIMESTAMP0            0x07b0
+#define R128_PM4_VC_TIMESTAMP1            0x07b4
+#define R128_PM4_STAT                     0x07b8
+#       define R128_PM4_FIFOCNT_MASK      0x0fff
+#       define R128_PM4_BUSY              (1 << 16)
+#       define R128_PM4_GUI_ACTIVE        (1 << 31)
+#define R128_PM4_BUFFER_ADDR              0x07f0
+#define R128_PM4_MICRO_CNTL               0x07fc
+#       define R128_PM4_MICRO_FREERUN     (1 << 30)
+#define R128_PM4_FIFO_DATA_EVEN           0x1000
+#define R128_PM4_FIFO_DATA_ODD            0x1004
+
+#define R128_SCALE_3D_CNTL                0x1a00
+#       define R128_SCALE_DITHER_ERR_DIFF         (0  <<  1)
+#       define R128_SCALE_DITHER_TABLE            (1  <<  1)
+#       define R128_TEX_CACHE_SIZE_FULL           (0  <<  2)
+#       define R128_TEX_CACHE_SIZE_HALF           (1  <<  2)
+#       define R128_DITHER_INIT_CURR              (0  <<  3)
+#       define R128_DITHER_INIT_RESET             (1  <<  3)
+#       define R128_ROUND_24BIT                   (1  <<  4)
+#       define R128_TEX_CACHE_DISABLE             (1  <<  5)
+#       define R128_SCALE_3D_NOOP                 (0  <<  6)
+#       define R128_SCALE_3D_SCALE                (1  <<  6)
+#       define R128_SCALE_3D_TEXMAP_SHADE         (2  <<  6)
+#       define R128_SCALE_PIX_BLEND               (0  <<  8)
+#       define R128_SCALE_PIX_REPLICATE           (1  <<  8)
+#       define R128_TEX_CACHE_SPLIT               (1  <<  9)
+#       define R128_APPLE_YUV_MODE                (1  << 10)
+#       define R128_TEX_CACHE_PALLETE_MODE        (1  << 11)
+#       define R128_ALPHA_COMB_ADD_CLAMP          (0  << 12)
+#       define R128_ALPHA_COMB_ADD_NCLAMP         (1  << 12)
+#       define R128_ALPHA_COMB_SUB_SRC_DST_CLAMP  (2  << 12)
+#       define R128_ALPHA_COMB_SUB_SRC_DST_NCLAMP (3  << 12)
+#       define R128_ALPHA_COMB_FCN_MASK           (3  << 12)
+#       define R128_FOG_VERTEX                    (0  << 14)
+#       define R128_FOG_TABLE                     (1  << 14)
+#       define R128_SIGNED_DST_CLAMP              (1  << 15)
+
+#       define R128_ALPHA_BLEND_ZERO              (0 )
+#       define R128_ALPHA_BLEND_ONE               (1 )
+#       define R128_ALPHA_BLEND_SRCCOLOR          (2 )
+#       define R128_ALPHA_BLEND_INVSRCCOLOR       (3 )
+#       define R128_ALPHA_BLEND_SRCALPHA          (4 )
+#       define R128_ALPHA_BLEND_INVSRCALPHA       (5 )
+#       define R128_ALPHA_BLEND_DSTALPHA          (6 )
+#       define R128_ALPHA_BLEND_INVDSTALPHA       (7 )
+#       define R128_ALPHA_BLEND_DSTCOLOR          (8 )
+#       define R128_ALPHA_BLEND_INVDSTCOLOR       (9 )
+#       define R128_ALPHA_BLEND_SAT               (10) /* aka SRCALPHASAT */
+#       define R128_ALPHA_BLEND_BLEND             (11) /* aka BOTHSRCALPHA */
+#       define R128_ALPHA_BLEND_INVBLEND          (12) /* aka BOTHINVSRCALPHA */
+#       define R128_ALPHA_BLEND_MASK              (15)
+
+#       define R128_ALPHA_BLEND_SRC_SHIFT         (16)
+#       define R128_ALPHA_BLEND_DST_SHIFT         (20)
+
+#       define R128_ALPHA_TEST_NEVER              (0  << 24)
+#       define R128_ALPHA_TEST_LESS               (1  << 24)
+#       define R128_ALPHA_TEST_LESSEQUAL          (2  << 24)
+#       define R128_ALPHA_TEST_EQUAL              (3  << 24)
+#       define R128_ALPHA_TEST_GREATEREQUAL       (4  << 24)
+#       define R128_ALPHA_TEST_GREATER            (5  << 24)
+#       define R128_ALPHA_TEST_NEQUAL             (6  << 24)
+#       define R128_ALPHA_TEST_ALWAYS             (7  << 24)
+#       define R128_ALPHA_TEST_MASK               (7  << 24)
+#       define R128_COMPOSITE_SHADOW_CMP_EQUAL    (0  << 28)
+#       define R128_COMPOSITE_SHADOW_CMP_NEQUAL   (1  << 28)
+#       define R128_COMPOSITE_SHADOW              (1  << 29)
+#       define R128_TEX_MAP_ALPHA_IN_TEXTURE      (1  << 30)
+#       define R128_TEX_CACHE_LINE_SIZE_8QW       (0  << 31)
+#       define R128_TEX_CACHE_LINE_SIZE_4QW       (1  << 31)
+#define R128_SCALE_3D_DATATYPE            0x1a20
+
+#define R128_SETUP_CNTL                   0x1bc4
+#       define R128_DONT_START_TRIANGLE   (1 <<  0)
+#       define R128_Z_BIAS                (0 <<  1)
+#       define R128_DONT_START_ANY_ON     (1 <<  2)
+#       define R128_COLOR_SOLID_COLOR     (0 <<  3)
+#       define R128_COLOR_FLAT_VERT_1     (1 <<  3)
+#       define R128_COLOR_FLAT_VERT_2     (2 <<  3)
+#       define R128_COLOR_FLAT_VERT_3     (3 <<  3)
+#       define R128_COLOR_GOURAUD         (4 <<  3)
+#       define R128_PRIM_TYPE_TRI         (0 <<  7)
+#       define R128_PRIM_TYPE_LINE        (1 <<  7)
+#       define R128_PRIM_TYPE_POINT       (2 <<  7)
+#       define R128_PRIM_TYPE_POLY_EDGE   (3 <<  7)
+#       define R128_TEXTURE_ST_MULT_W     (0 <<  9)
+#       define R128_TEXTURE_ST_DIRECT     (1 <<  9)
+#       define R128_STARTING_VERTEX_1     (1 << 14)
+#       define R128_STARTING_VERTEX_2     (2 << 14)
+#       define R128_STARTING_VERTEX_3     (3 << 14)
+#       define R128_ENDING_VERTEX_1       (1 << 16)
+#       define R128_ENDING_VERTEX_2       (2 << 16)
+#       define R128_ENDING_VERTEX_3       (3 << 16)
+#       define R128_SU_POLY_LINE_LAST     (0 << 18)
+#       define R128_SU_POLY_LINE_NOT_LAST (1 << 18)
+#       define R128_SUB_PIX_2BITS         (0 << 19)
+#       define R128_SUB_PIX_4BITS         (1 << 19)
+#       define R128_SET_UP_CONTINUE       (1 << 31)
+
+#define R128_WINDOW_XY_OFFSET             0x1bcc
+#       define R128_WINDOW_Y_SHIFT        4
+#       define R128_WINDOW_X_SHIFT        20
+
+#define R128_Z_OFFSET_C                   0x1c90
+#define R128_Z_PITCH_C                    0x1c94
+#       define R128_Z_TILE                    (1 << 16)
+#define R128_Z_STEN_CNTL_C                0x1c98
+#       define R128_Z_PIX_WIDTH_16            (0 <<  1)
+#       define R128_Z_PIX_WIDTH_24            (1 <<  1)
+#       define R128_Z_PIX_WIDTH_32            (2 <<  1)
+#       define R128_Z_PIX_WIDTH_MASK          (3 <<  1)
+#       define R128_Z_TEST_NEVER              (0 <<  4)
+#       define R128_Z_TEST_LESS               (1 <<  4)
+#       define R128_Z_TEST_LESSEQUAL          (2 <<  4)
+#       define R128_Z_TEST_EQUAL              (3 <<  4)
+#       define R128_Z_TEST_GREATEREQUAL       (4 <<  4)
+#       define R128_Z_TEST_GREATER            (5 <<  4)
+#       define R128_Z_TEST_NEQUAL             (6 <<  4)
+#       define R128_Z_TEST_ALWAYS             (7 <<  4)
+#       define R128_Z_TEST_MASK               (7 <<  4)
+#       define R128_STENCIL_TEST_NEVER        (0 << 12)
+#       define R128_STENCIL_TEST_LESS         (1 << 12)
+#       define R128_STENCIL_TEST_LESSEQUAL    (2 << 12)
+#       define R128_STENCIL_TEST_EQUAL        (3 << 12)
+#       define R128_STENCIL_TEST_GREATEREQUAL (4 << 12)
+#       define R128_STENCIL_TEST_GREATER      (5 << 12)
+#       define R128_STENCIL_TEST_NEQUAL       (6 << 12)
+#       define R128_STENCIL_TEST_ALWAYS       (7 << 12)
+#       define R128_STENCIL_TEST_MASK         (7 << 12)
+#       define R128_STENCIL_S_FAIL_KEEP       (0 << 16)
+#       define R128_STENCIL_S_FAIL_ZERO       (1 << 16)
+#       define R128_STENCIL_S_FAIL_REPLACE    (2 << 16)
+#       define R128_STENCIL_S_FAIL_INC        (3 << 16)
+#       define R128_STENCIL_S_FAIL_DEC        (4 << 16)
+#       define R128_STENCIL_S_FAIL_INV        (5 << 16)
+#       define R128_STENCIL_S_FAIL_INC_WRAP   (6 << 16)	/* GUESS */
+#       define R128_STENCIL_S_FAIL_DEC_WRAP   (7 << 16)	/* GUESS */
+#       define R128_STENCIL_S_FAIL_MASK       (7 << 16)
+#       define R128_STENCIL_ZPASS_KEEP        (0 << 20)
+#       define R128_STENCIL_ZPASS_ZERO        (1 << 20)
+#       define R128_STENCIL_ZPASS_REPLACE     (2 << 20)
+#       define R128_STENCIL_ZPASS_INC         (3 << 20)
+#       define R128_STENCIL_ZPASS_DEC         (4 << 20)
+#       define R128_STENCIL_ZPASS_INV         (5 << 20)
+#       define R128_STENCIL_ZPASS_INC_WRAP    (6 << 20)	/* GUESS */
+#       define R128_STENCIL_ZPASS_DEC_WRAP    (7 << 20)	/* GUESS */
+#       define R128_STENCIL_ZPASS_MASK        (7 << 20)
+#       define R128_STENCIL_ZFAIL_KEEP        (0 << 24)
+#       define R128_STENCIL_ZFAIL_ZERO        (1 << 24)
+#       define R128_STENCIL_ZFAIL_REPLACE     (2 << 24)
+#       define R128_STENCIL_ZFAIL_INC         (3 << 24)
+#       define R128_STENCIL_ZFAIL_DEC         (4 << 24)
+#       define R128_STENCIL_ZFAIL_INV         (5 << 24)
+#       define R128_STENCIL_ZFAIL_INC_WRAP    (6 << 24)	/* GUESS */
+#       define R128_STENCIL_ZFAIL_DEC_WRAP    (7 << 24)	/* GUESS */
+#       define R128_STENCIL_ZFAIL_MASK        (7 << 24)
+#define R128_TEX_CNTL_C                   0x1c9c
+#       define R128_Z_ENABLE                   (1 <<  0)
+#       define R128_Z_WRITE_ENABLE             (1 <<  1)
+#       define R128_STENCIL_ENABLE             (1 <<  3)
+#       define R128_SHADE_ENABLE               (0 <<  4)
+#       define R128_TEXMAP_ENABLE              (1 <<  4)
+#       define R128_SEC_TEXMAP_ENABLE          (1 <<  5)
+#       define R128_FOG_ENABLE                 (1 <<  7)
+#       define R128_DITHER_ENABLE              (1 <<  8)
+#       define R128_ALPHA_ENABLE               (1 <<  9)
+#       define R128_ALPHA_TEST_ENABLE          (1 << 10)
+#       define R128_SPEC_LIGHT_ENABLE          (1 << 11)
+#       define R128_TEX_CHROMA_KEY_ENABLE      (1 << 12)
+#       define R128_ALPHA_IN_TEX_COMPLETE_A    (0 << 13)
+#       define R128_ALPHA_IN_TEX_LSB_A         (1 << 13)
+#       define R128_LIGHT_DIS                  (0 << 14)
+#       define R128_LIGHT_COPY                 (1 << 14)
+#       define R128_LIGHT_MODULATE             (2 << 14)
+#       define R128_LIGHT_ADD                  (3 << 14)
+#       define R128_LIGHT_BLEND_CONSTANT       (4 << 14)
+#       define R128_LIGHT_BLEND_TEXTURE        (5 << 14)
+#       define R128_LIGHT_BLEND_VERTEX         (6 << 14)
+#       define R128_LIGHT_BLEND_CONST_COLOR    (7 << 14)
+#       define R128_ALPHA_LIGHT_DIS            (0 << 18)
+#       define R128_ALPHA_LIGHT_COPY           (1 << 18)
+#       define R128_ALPHA_LIGHT_MODULATE       (2 << 18)
+#       define R128_ALPHA_LIGHT_ADD            (3 << 18)
+#       define R128_ANTI_ALIAS                 (1 << 21)
+#       define R128_TEX_CACHE_FLUSH            (1 << 23)
+#       define R128_LOD_BIAS_SHIFT             24
+#       define R128_LOD_BIAS_MASK              (0xff << 24)
+#define R128_MISC_3D_STATE_CNTL_REG       0x1ca0
+#       define R128_REF_ALPHA_MASK                  0xff
+#       define R128_MISC_SCALE_3D_NOOP              (0  <<  8)
+#       define R128_MISC_SCALE_3D_SCALE             (1  <<  8)
+#       define R128_MISC_SCALE_3D_TEXMAP_SHADE      (2  <<  8)
+#       define R128_MISC_SCALE_PIX_BLEND            (0  << 10)
+#       define R128_MISC_SCALE_PIX_REPLICATE        (1  << 10)
+/* Bits [14:12] are the same as R128_SCALE_3D_CNTL */
+/* Bit  [15]    is unknown */
+/* Bits [26:16] are the same as R128_SCALE_3D_CNTL */
+/* Bits [31:27] are unknown */
+
+#define R128_TEXTURE_CLR_CMP_CLR_C        0x1ca4
+#define R128_TEXTURE_CLR_CMP_MSK_C        0x1ca8
+#define R128_FOG_COLOR_C                  0x1cac
+#       define R128_FOG_BLUE_SHIFT             0
+#       define R128_FOG_GREEN_SHIFT            8
+#       define R128_FOG_RED_SHIFT             16
+#define R128_PRIM_TEX_CNTL_C              0x1cb0
+#       define R128_MIN_BLEND_NEAREST          (0  <<  1)
+#       define R128_MIN_BLEND_LINEAR           (1  <<  1)
+#       define R128_MIN_BLEND_MIPNEAREST       (2  <<  1)
+#       define R128_MIN_BLEND_MIPLINEAR        (3  <<  1)
+#       define R128_MIN_BLEND_LINEARMIPNEAREST (4  <<  1)
+#       define R128_MIN_BLEND_LINEARMIPLINEAR  (5  <<  1)
+#       define R128_MIN_BLEND_MASK             (7  <<  1)
+#       define R128_MAG_BLEND_NEAREST          (0  <<  4)
+#       define R128_MAG_BLEND_LINEAR           (1  <<  4)
+#       define R128_MAG_BLEND_MASK             (7  <<  4)
+#       define R128_MIP_MAP_DISABLE            (1  <<  7)
+#       define R128_TEX_CLAMP_S_WRAP           (0  <<  8)
+#       define R128_TEX_CLAMP_S_MIRROR         (1  <<  8)
+#       define R128_TEX_CLAMP_S_CLAMP          (2  <<  8)
+#       define R128_TEX_CLAMP_S_BORDER_COLOR   (3  <<  8)
+#       define R128_TEX_CLAMP_S_MASK           (3  <<  8)
+#       define R128_TEX_WRAP_S                 (1  << 10)
+#       define R128_TEX_CLAMP_T_WRAP           (0  << 11)
+#       define R128_TEX_CLAMP_T_MIRROR         (1  << 11)
+#       define R128_TEX_CLAMP_T_CLAMP          (2  << 11)
+#       define R128_TEX_CLAMP_T_BORDER_COLOR   (3  << 11)
+#       define R128_TEX_CLAMP_T_MASK           (3  << 11)
+#       define R128_TEX_WRAP_T                 (1  << 13)
+#       define R128_TEX_PERSPECTIVE_DISABLE    (1  << 14)
+#       define R128_DATATYPE_VQ                (0  << 16)
+#       define R128_DATATYPE_CI4               (1  << 16)
+#       define R128_DATATYPE_CI8               (2  << 16)
+#       define R128_DATATYPE_ARGB1555          (3  << 16)
+#       define R128_DATATYPE_RGB565            (4  << 16)
+#       define R128_DATATYPE_RGB888            (5  << 16)
+#       define R128_DATATYPE_ARGB8888          (6  << 16)
+#       define R128_DATATYPE_RGB332            (7  << 16)
+#       define R128_DATATYPE_Y8                (8  << 16)
+#       define R128_DATATYPE_RGB8              (9  << 16)
+#       define R128_DATATYPE_CI16              (10 << 16)
+#       define R128_DATATYPE_YVYU422           (11 << 16)
+#       define R128_DATATYPE_VYUY422           (12 << 16)
+#       define R128_DATATYPE_AYUV444           (14 << 16)
+#       define R128_DATATYPE_ARGB4444          (15 << 16)
+#       define R128_PALLETE_EITHER             (0  << 20)
+#       define R128_PALLETE_1                  (1  << 20)
+#       define R128_PALLETE_2                  (2  << 20)
+#       define R128_PSEUDOCOLOR_DT_RGB565      (0  << 24)
+#       define R128_PSEUDOCOLOR_DT_ARGB1555    (1  << 24)
+#       define R128_PSEUDOCOLOR_DT_ARGB4444    (2  << 24)
+#define R128_PRIM_TEXTURE_COMBINE_CNTL_C  0x1cb4
+#       define R128_COMB_DIS                   (0  <<  0)
+#       define R128_COMB_COPY                  (1  <<  0)
+#       define R128_COMB_COPY_INP              (2  <<  0)
+#       define R128_COMB_MODULATE              (3  <<  0)
+#       define R128_COMB_MODULATE2X            (4  <<  0)
+#       define R128_COMB_MODULATE4X            (5  <<  0)
+#       define R128_COMB_ADD                   (6  <<  0)
+#       define R128_COMB_ADD_SIGNED            (7  <<  0)
+#       define R128_COMB_BLEND_VERTEX          (8  <<  0)
+#       define R128_COMB_BLEND_TEXTURE         (9  <<  0)
+#       define R128_COMB_BLEND_CONST           (10 <<  0)
+#       define R128_COMB_BLEND_PREMULT         (11 <<  0)
+#       define R128_COMB_BLEND_PREV            (12 <<  0)
+#       define R128_COMB_BLEND_PREMULT_INV     (13 <<  0)
+#       define R128_COMB_ADD_SIGNED2X          (14 <<  0)
+#       define R128_COMB_BLEND_CONST_COLOR     (15 <<  0)
+#       define R128_COMB_MASK                  (15 <<  0)
+#       define R128_COLOR_FACTOR_CONST_COLOR   (0  <<  4)
+#       define R128_COLOR_FACTOR_NCONST_COLOR  (1  <<  4)
+#       define R128_COLOR_FACTOR_TEX           (4  <<  4)
+#       define R128_COLOR_FACTOR_NTEX          (5  <<  4)
+#       define R128_COLOR_FACTOR_ALPHA         (6  <<  4)
+#       define R128_COLOR_FACTOR_NALPHA        (7  <<  4)
+#       define R128_COLOR_FACTOR_PREV_COLOR    (8  <<  4)
+#       define R128_COLOR_FACTOR_MASK          (15 <<  4)
+#       define R128_COMB_FCN_MSB               (1  <<  8)
+#       define R128_INPUT_FACTOR_CONST_COLOR   (2  << 10)
+#       define R128_INPUT_FACTOR_CONST_ALPHA   (3  << 10)
+#       define R128_INPUT_FACTOR_INT_COLOR     (4  << 10)
+#       define R128_INPUT_FACTOR_INT_ALPHA     (5  << 10)
+#       define R128_INPUT_FACTOR_MASK          (15 << 10)
+#       define R128_COMB_ALPHA_DIS             (0  << 14)
+#       define R128_COMB_ALPHA_COPY            (1  << 14)
+#       define R128_COMB_ALPHA_COPY_INP        (2  << 14)
+#       define R128_COMB_ALPHA_MODULATE        (3  << 14)
+#       define R128_COMB_ALPHA_MODULATE2X      (4  << 14)
+#       define R128_COMB_ALPHA_MODULATE4X      (5  << 14)
+#       define R128_COMB_ALPHA_ADD             (6  << 14)
+#       define R128_COMB_ALPHA_ADD_SIGNED      (7  << 14)
+#       define R128_COMB_ALPHA_ADD_SIGNED2X    (14 << 14)
+#       define R128_COMB_ALPHA_MASK            (15 << 14)
+#       define R128_ALPHA_FACTOR_TEX_ALPHA     (6  << 18)
+#       define R128_ALPHA_FACTOR_NTEX_ALPHA    (7  << 18)
+#       define R128_ALPHA_FACTOR_MASK          (15 << 18)
+#       define R128_INP_FACTOR_A_CONST_ALPHA   (1  << 25)
+#       define R128_INP_FACTOR_A_INT_ALPHA     (2  << 25)
+#       define R128_INP_FACTOR_A_MASK          (7  << 25)
+#define R128_TEX_SIZE_PITCH_C             0x1cb8
+#       define R128_TEX_PITCH_SHIFT           0
+#       define R128_TEX_SIZE_SHIFT            4
+#       define R128_TEX_HEIGHT_SHIFT          8
+#       define R128_TEX_MIN_SIZE_SHIFT       12
+#       define R128_SEC_TEX_PITCH_SHIFT      16
+#       define R128_SEC_TEX_SIZE_SHIFT       20
+#       define R128_SEC_TEX_HEIGHT_SHIFT     24
+#       define R128_SEC_TEX_MIN_SIZE_SHIFT   28
+#       define R128_TEX_PITCH_MASK           (0x0f <<  0)
+#       define R128_TEX_SIZE_MASK            (0x0f <<  4)
+#       define R128_TEX_HEIGHT_MASK          (0x0f <<  8)
+#       define R128_TEX_MIN_SIZE_MASK        (0x0f << 12)
+#       define R128_SEC_TEX_PITCH_MASK       (0x0f << 16)
+#       define R128_SEC_TEX_SIZE_MASK        (0x0f << 20)
+#       define R128_SEC_TEX_HEIGHT_MASK      (0x0f << 24)
+#       define R128_SEC_TEX_MIN_SIZE_MASK    (0x0f << 28)
+#       define R128_TEX_SIZE_PITCH_SHIFT      0
+#       define R128_SEC_TEX_SIZE_PITCH_SHIFT 16
+#       define R128_TEX_SIZE_PITCH_MASK      (0xffff <<  0)
+#       define R128_SEC_TEX_SIZE_PITCH_MASK  (0xffff << 16)
+#define R128_PRIM_TEX_0_OFFSET_C          0x1cbc
+#define R128_PRIM_TEX_1_OFFSET_C          0x1cc0
+#define R128_PRIM_TEX_2_OFFSET_C          0x1cc4
+#define R128_PRIM_TEX_3_OFFSET_C          0x1cc8
+#define R128_PRIM_TEX_4_OFFSET_C          0x1ccc
+#define R128_PRIM_TEX_5_OFFSET_C          0x1cd0
+#define R128_PRIM_TEX_6_OFFSET_C          0x1cd4
+#define R128_PRIM_TEX_7_OFFSET_C          0x1cd8
+#define R128_PRIM_TEX_8_OFFSET_C          0x1cdc
+#define R128_PRIM_TEX_9_OFFSET_C          0x1ce0
+#define R128_PRIM_TEX_10_OFFSET_C         0x1ce4
+#       define R128_TEX_NO_TILE           (0 << 30)
+#       define R128_TEX_TILED_BY_HOST     (1 << 30)
+#       define R128_TEX_TILED_BY_STORAGE  (2 << 30)
+#       define R128_TEX_TILED_BY_STORAGE2 (3 << 30)
+
+#define R128_SEC_TEX_CNTL_C               0x1d00
+#       define R128_SEC_SELECT_PRIM_ST    (0  <<  0)
+#       define R128_SEC_SELECT_SEC_ST     (1  <<  0)
+#define R128_SEC_TEX_COMBINE_CNTL_C       0x1d04
+#       define R128_INPUT_FACTOR_PREV_COLOR (8  << 10)
+#       define R128_INPUT_FACTOR_PREV_ALPHA (9  << 10)
+#       define R128_INP_FACTOR_A_PREV_ALPHA (4  << 25)
+#define R128_SEC_TEX_0_OFFSET_C           0x1d08
+#define R128_SEC_TEX_1_OFFSET_C           0x1d0c
+#define R128_SEC_TEX_2_OFFSET_C           0x1d10
+#define R128_SEC_TEX_3_OFFSET_C           0x1d14
+#define R128_SEC_TEX_4_OFFSET_C           0x1d18
+#define R128_SEC_TEX_5_OFFSET_C           0x1d1c
+#define R128_SEC_TEX_6_OFFSET_C           0x1d20
+#define R128_SEC_TEX_7_OFFSET_C           0x1d24
+#define R128_SEC_TEX_8_OFFSET_C           0x1d28
+#define R128_SEC_TEX_9_OFFSET_C           0x1d2c
+#define R128_SEC_TEX_10_OFFSET_C          0x1d30
+#define R128_CONSTANT_COLOR_C             0x1d34
+#       define R128_CONSTANT_BLUE_SHIFT        0
+#       define R128_CONSTANT_GREEN_SHIFT       8
+#       define R128_CONSTANT_RED_SHIFT        16
+#       define R128_CONSTANT_ALPHA_SHIFT      24
+#define R128_PRIM_TEXTURE_BORDER_COLOR_C  0x1d38
+#       define R128_PRIM_TEX_BORDER_BLUE_SHIFT   0
+#       define R128_PRIM_TEX_BORDER_GREEN_SHIFT  8
+#       define R128_PRIM_TEX_BORDER_RED_SHIFT   16
+#       define R128_PRIM_TEX_BORDER_ALPHA_SHIFT 24
+#define R128_SEC_TEXTURE_BORDER_COLOR_C   0x1d3c
+#       define R128_SEC_TEX_BORDER_BLUE_SHIFT   0
+#       define R128_SEC_TEX_BORDER_GREEN_SHIFT  8
+#       define R128_SEC_TEX_BORDER_RED_SHIFT   16
+#       define R128_SEC_TEX_BORDER_ALPHA_SHIFT 24
+#define R128_STEN_REF_MASK_C              0x1d40
+#       define R128_STEN_REFERENCE_SHIFT       0
+#       define R128_STEN_MASK_SHIFT           16
+#       define R128_STEN_WRITE_MASK_SHIFT     24
+#define R128_PLANE_3D_MASK_C              0x1d44
+#define R128_TEX_CACHE_STAT_COUNT         0x1974
+
+
+				/* Constants */
+#define R128_AGP_TEX_OFFSET               0x02000000
+
+#define R128_LAST_FRAME_REG               R128_GUI_SCRATCH_REG0
+
+				/* CCE packet types */
+#define R128_CCE_PACKET0                         0x00000000
+#define R128_CCE_PACKET0_ONE_REG_WR              0x00008000
+#define R128_CCE_PACKET1                         0x40000000
+#define R128_CCE_PACKET2                         0x80000000
+#define R128_CCE_PACKET3                         0xC0000000
+#define R128_CCE_PACKET3_NOP                     0xC0001000
+#define R128_CCE_PACKET3_PAINT                   0xC0001100
+#define R128_CCE_PACKET3_BITBLT                  0xC0001200
+#define R128_CCE_PACKET3_SMALLTEXT               0xC0001300
+#define R128_CCE_PACKET3_HOSTDATA_BLT            0xC0001400
+#define R128_CCE_PACKET3_POLYLINE                0xC0001500
+#define R128_CCE_PACKET3_SCALING                 0xC0001600
+#define R128_CCE_PACKET3_TRANS_SCALING           0xC0001700
+#define R128_CCE_PACKET3_POLYSCANLINES           0xC0001800
+#define R128_CCE_PACKET3_NEXT_CHAR               0xC0001900
+#define R128_CCE_PACKET3_PAINT_MULTI             0xC0001A00
+#define R128_CCE_PACKET3_BITBLT_MULTI            0xC0001B00
+#define R128_CCE_PACKET3_PLY_NEXTSCAN            0xC0001D00
+#define R128_CCE_PACKET3_SET_SCISSORS            0xC0001E00
+#define R128_CCE_PACKET3_SET_MODE24BPP           0xC0001F00
+#define R128_CCE_PACKET3_CNTL_PAINT              0xC0009100
+#define R128_CCE_PACKET3_CNTL_BITBLT             0xC0009200
+#define R128_CCE_PACKET3_CNTL_SMALLTEXT          0xC0009300
+#define R128_CCE_PACKET3_CNTL_HOSTDATA_BLT       0xC0009400
+#define R128_CCE_PACKET3_CNTL_POLYLINE           0xC0009500
+#define R128_CCE_PACKET3_CNTL_SCALING            0xC0009600
+#define R128_CCE_PACKET3_CNTL_TRANS_SCALING      0xC0009700
+#define R128_CCE_PACKET3_CNTL_POLYSCANLINES      0xC0009800
+#define R128_CCE_PACKET3_CNTL_NEXT_CHAR          0xC0009900
+#define R128_CCE_PACKET3_CNTL_PAINT_MULTI        0xC0009A00
+#define R128_CCE_PACKET3_CNTL_BITBLT_MULTI       0xC0009B00
+#define R128_CCE_PACKET3_CNTL_TRANS_BITBLT       0xC0009C00
+#define R128_CCE_PACKET3_3D_SAVE_CONTEXT         0xC0002000
+#define R128_CCE_PACKET3_3D_PLAY_CONTEXT         0xC0002100
+#define R128_CCE_PACKET3_3D_RNDR_GEN_INDX_PRIM   0xC0002300
+#define R128_CCE_PACKET3_3D_RNDR_GEN_PRIM        0xC0002500
+#define R128_CCE_PACKET3_LOAD_PALETTE            0xC0002C00
+#define R128_CCE_PACKET3_PURGE                   0xC0002D00
+#define R128_CCE_PACKET3_NEXT_VERTEX_BUNDLE      0xC0002E00
+#       define R128_CCE_PACKET_MASK              0xC0000000
+#       define R128_CCE_PACKET_COUNT_MASK        0x3fff0000
+#       define R128_CCE_PACKET_MAX_DWORDS        (1 << 12)
+#       define R128_CCE_PACKET0_REG_MASK         0x000007ff
+#       define R128_CCE_PACKET1_REG0_MASK        0x000007ff
+#       define R128_CCE_PACKET1_REG1_MASK        0x003ff800
+
+#define R128_CCE_VC_FRMT_RHW                     0x00000001
+#define R128_CCE_VC_FRMT_DIFFUSE_BGR             0x00000002
+#define R128_CCE_VC_FRMT_DIFFUSE_A               0x00000004
+#define R128_CCE_VC_FRMT_DIFFUSE_ARGB            0x00000008
+#define R128_CCE_VC_FRMT_SPEC_BGR                0x00000010
+#define R128_CCE_VC_FRMT_SPEC_F                  0x00000020
+#define R128_CCE_VC_FRMT_SPEC_FRGB               0x00000040
+#define R128_CCE_VC_FRMT_S_T                     0x00000080
+#define R128_CCE_VC_FRMT_S2_T2                   0x00000100
+#define R128_CCE_VC_FRMT_RHW2                    0x00000200
+
+#define R128_CCE_VC_CNTL_PRIM_TYPE_NONE          0x00000000
+#define R128_CCE_VC_CNTL_PRIM_TYPE_POINT         0x00000001
+#define R128_CCE_VC_CNTL_PRIM_TYPE_LINE          0x00000002
+#define R128_CCE_VC_CNTL_PRIM_TYPE_POLY_LINE     0x00000003
+#define R128_CCE_VC_CNTL_PRIM_TYPE_TRI_LIST      0x00000004
+#define R128_CCE_VC_CNTL_PRIM_TYPE_TRI_FAN       0x00000005
+#define R128_CCE_VC_CNTL_PRIM_TYPE_TRI_STRIP     0x00000006
+#define R128_CCE_VC_CNTL_PRIM_TYPE_TRI_TYPE2     0x00000007
+#define R128_CCE_VC_CNTL_PRIM_WALK_IND           0x00000010
+#define R128_CCE_VC_CNTL_PRIM_WALK_LIST          0x00000020
+#define R128_CCE_VC_CNTL_PRIM_WALK_RING          0x00000030
+#define R128_CCE_VC_CNTL_NUM_SHIFT               16
+
+/* hmm copyed blindly (no specs) from radeon.h ... */
+#define R128_RE_TOP_LEFT                  0x26c0
+#       define R128_RE_LEFT_SHIFT         0
+#       define R128_RE_TOP_SHIFT          16
+#define R128_RE_WIDTH_HEIGHT              0x1c44
+#       define R128_RE_WIDTH_SHIFT        0
+#       define R128_RE_HEIGHT_SHIFT       16
+
+#endif
diff --git a/src/mesa/drivers/dri/r128/server/r128_version.h b/src/mesa/drivers/dri/r128/server/r128_version.h
new file mode 100644
index 0000000000..783711ef97
--- /dev/null
+++ b/src/mesa/drivers/dri/r128/server/r128_version.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2000 through 2003 by Marc Aurele La France (TSI @ UQV), tsi@xfree86.org
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of Marc Aurele La France not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Marc Aurele La France makes no representations
+ * about the suitability of this software for any purpose.  It is provided
+ * "as-is" without express or implied warranty.
+ *
+ * MARC AURELE LA FRANCE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN NO
+ * EVENT SHALL MARC AURELE LA FRANCE BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _R128_VERSION_H_
+#define _R128_VERSION_H_ 1
+
+#undef  R128_NAME
+#undef  R128_DRIVER_NAME
+#undef  R128_VERSION_MAJOR
+#undef  R128_VERSION_MINOR
+#undef  R128_VERSION_PATCH
+#undef  R128_VERSION_CURRENT
+#undef  R128_VERSION_EVALUATE
+#undef  R128_VERSION_STRINGIFY
+#undef  R128_VERSION_NAME
+
+#define R128_NAME          "R128"
+#define R128_DRIVER_NAME   "r128"
+
+#define R128_VERSION_MAJOR 4
+#define R128_VERSION_MINOR 0
+#define R128_VERSION_PATCH 1
+
+#ifndef R128_VERSION_EXTRA
+#define R128_VERSION_EXTRA ""
+#endif
+
+#define R128_VERSION_CURRENT \
+    ((R128_VERSION_MAJOR << 20) | \
+     (R128_VERSION_MINOR << 10) | \
+     (R128_VERSION_PATCH))
+
+#define R128_VERSION_EVALUATE(__x) #__x
+#define R128_VERSION_STRINGIFY(_x) R128_VERSION_EVALUATE(_x)
+#define R128_VERSION_NAME                                         \
+    R128_VERSION_STRINGIFY(R128_VERSION_MAJOR) "."                \
+    R128_VERSION_STRINGIFY(R128_VERSION_MINOR) "."                \
+    R128_VERSION_STRINGIFY(R128_VERSION_MINOR) R128_VERSION_EXTRA
+
+#endif /* _R128_VERSION_H_ */
diff --git a/src/mesa/drivers/dri/r200/Doxyfile b/src/mesa/drivers/dri/r200/Doxyfile
new file mode 100644
index 0000000000..27b3d0371e
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/Doxyfile
@@ -0,0 +1,232 @@
+# Doxyfile 1.3.2-Gideon
+
+#---------------------------------------------------------------------------
+# General configuration options
+#---------------------------------------------------------------------------
+PROJECT_NAME           = r200
+PROJECT_NUMBER         = $VERSION$
+OUTPUT_DIRECTORY       = 
+OUTPUT_LANGUAGE        = English
+USE_WINDOWS_ENCODING   = NO
+EXTRACT_ALL            = NO
+EXTRACT_PRIVATE        = NO
+EXTRACT_STATIC         = NO
+EXTRACT_LOCAL_CLASSES  = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        = 
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+SHORT_NAMES            = NO
+HIDE_SCOPE_NAMES       = NO
+SHOW_INCLUDE_FILES     = YES
+JAVADOC_AUTOBRIEF      = NO
+MULTILINE_CPP_IS_BRIEF = NO
+DETAILS_AT_TOP         = NO
+INHERIT_DOCS           = YES
+INLINE_INFO            = YES
+SORT_MEMBER_DOCS       = YES
+DISTRIBUTE_GROUP_DOC   = NO
+TAB_SIZE               = 8
+GENERATE_TODOLIST      = YES
+GENERATE_TESTLIST      = YES
+GENERATE_BUGLIST       = YES
+GENERATE_DEPRECATEDLIST= YES
+ALIASES                = 
+ENABLED_SECTIONS       = 
+MAX_INITIALIZER_LINES  = 30
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+SHOW_USED_FILES        = YES
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = YES
+WARN_IF_DOC_ERROR      = YES
+WARN_FORMAT            = "$file:$line: $text"
+WARN_LOGFILE           = 
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+INPUT                  = /home/temp/Mesa/src/drv/r200
+FILE_PATTERNS          = *.c \
+                         *.cc \
+                         *.cxx \
+                         *.cpp \
+                         *.c++ \
+                         *.java \
+                         *.ii \
+                         *.ixx \
+                         *.ipp \
+                         *.i++ \
+                         *.inl \
+                         *.h \
+                         *.hh \
+                         *.hxx \
+                         *.hpp \
+                         *.h++ \
+                         *.idl \
+                         *.odl \
+                         *.cs \
+                         *.C \
+                         *.H \
+                         *.tlh \
+                         *.diff \
+                         *.patch \
+                         *.moc \
+                         *.xpm
+RECURSIVE              = yes
+EXCLUDE                = 
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       = 
+EXAMPLE_PATH           = 
+EXAMPLE_PATTERNS       = *
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             = 
+INPUT_FILTER           = 
+FILTER_SOURCE_FILES    = NO
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+VERBATIM_HEADERS       = YES
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+ALPHABETICAL_INDEX     = NO
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          = 
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            = 
+HTML_FOOTER            = 
+HTML_STYLESHEET        = 
+HTML_ALIGN_MEMBERS     = YES
+GENERATE_HTMLHELP      = NO
+CHM_FILE               = 
+HHC_LOCATION           = 
+GENERATE_CHI           = NO
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+DISABLE_INDEX          = NO
+ENUM_VALUES_PER_LINE   = 4
+GENERATE_TREEVIEW      = NO
+TREEVIEW_WIDTH         = 250
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+GENERATE_LATEX         = YES
+LATEX_OUTPUT           = latex
+LATEX_CMD_NAME         = latex
+MAKEINDEX_CMD_NAME     = makeindex
+COMPACT_LATEX          = NO
+PAPER_TYPE             = a4wide
+EXTRA_PACKAGES         = 
+LATEX_HEADER           = 
+PDF_HYPERLINKS         = NO
+USE_PDFLATEX           = NO
+LATEX_BATCHMODE        = NO
+LATEX_HIDE_INDICES     = NO
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+GENERATE_RTF           = NO
+RTF_OUTPUT             = rtf
+COMPACT_RTF            = NO
+RTF_HYPERLINKS         = NO
+RTF_STYLESHEET_FILE    = 
+RTF_EXTENSIONS_FILE    = 
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+GENERATE_MAN           = NO
+MAN_OUTPUT             = man
+MAN_EXTENSION          = .3
+MAN_LINKS              = NO
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+GENERATE_XML           = yes
+XML_OUTPUT             = xml
+XML_SCHEMA             = 
+XML_DTD                = 
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+GENERATE_AUTOGEN_DEF   = NO
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+GENERATE_PERLMOD       = NO
+PERLMOD_LATEX          = NO
+PERLMOD_PRETTY         = YES
+PERLMOD_MAKEVAR_PREFIX = 
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+EXPAND_ONLY_PREDEF     = NO
+SEARCH_INCLUDES        = YES
+INCLUDE_PATH           = 
+INCLUDE_FILE_PATTERNS  = 
+PREDEFINED             = 
+EXPAND_AS_DEFINED      = 
+SKIP_FUNCTION_MACROS   = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to external references   
+#---------------------------------------------------------------------------
+TAGFILES               = 
+GENERATE_TAGFILE       = 
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+PERL_PATH              = /usr/bin/perl
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+CLASS_DIAGRAMS         = YES
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT               = NO
+CLASS_GRAPH            = YES
+COLLABORATION_GRAPH    = YES
+UML_LOOK               = NO
+TEMPLATE_RELATIONS     = NO
+INCLUDE_GRAPH          = YES
+INCLUDED_BY_GRAPH      = YES
+CALL_GRAPH             = NO
+GRAPHICAL_HIERARCHY    = YES
+DOT_IMAGE_FORMAT       = png
+DOT_PATH               = 
+DOTFILE_DIRS           = 
+MAX_DOT_GRAPH_WIDTH    = 1024
+MAX_DOT_GRAPH_HEIGHT   = 1024
+MAX_DOT_GRAPH_DEPTH    = 1000
+GENERATE_LEGEND        = YES
+DOT_CLEANUP            = YES
+#---------------------------------------------------------------------------
+# Configuration::addtions related to the search engine   
+#---------------------------------------------------------------------------
+SEARCHENGINE           = NO
+CGI_NAME               = search.cgi
+CGI_URL                = 
+DOC_URL                = 
+DOC_ABSPATH            = 
+BIN_ABSPATH            = /usr/local/bin/
+EXT_DOC_PATHS          = 
diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile
new file mode 100644
index 0000000000..c9c1346c3a
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/Makefile
@@ -0,0 +1,65 @@
+# src/mesa/drivers/dri/r200/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += $(RADEON_CFLAGS)
+
+LIBNAME = r200_dri.so
+
+ifeq ($(RADEON_LDFLAGS),)
+CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
+endif
+
+RADEON_COMMON_SOURCES = \
+	radeon_bo_legacy.c \
+	radeon_common_context.c \
+	radeon_common.c \
+	radeon_cs_legacy.c \
+	radeon_dma.c \
+	radeon_debug.c \
+	radeon_fbo.c \
+	radeon_lock.c \
+	radeon_mipmap_tree.c \
+	radeon_pixel_read.c \
+	radeon_queryobj.c \
+	radeon_span.c \
+	radeon_texture.c \
+	radeon_tex_copy.c \
+	radeon_tex_getimage.c \
+	radeon_tile.c
+
+DRIVER_SOURCES = r200_context.c \
+		 r200_ioctl.c \
+		 r200_state.c \
+		 r200_state_init.c \
+		 r200_cmdbuf.c \
+		 r200_tex.c \
+		 r200_texstate.c \
+		 r200_tcl.c \
+		 r200_swtcl.c \
+		 r200_maos.c \
+		 r200_sanity.c \
+		 r200_fragshader.c \
+		 r200_vertprog.c \
+		 r200_blit.c \
+		 radeon_screen.c \
+		 $(EGL_SOURCES) \
+		 $(RADEON_COMMON_SOURCES) \
+		 $(CS_SOURCES)
+
+C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+
+X86_SOURCES = 
+
+DRIVER_DEFINES = -DRADEON_R200
+
+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+
+##### TARGETS #####
+
+
+include ../Makefile.template
+
+#INCLUDES += -I../radeon/server
+
diff --git a/src/mesa/drivers/dri/r200/r200_blit.c b/src/mesa/drivers/dri/r200/r200_blit.c
new file mode 100644
index 0000000000..e187fc0f61
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_blit.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "r200_context.h"
+#include "r200_blit.h"
+
+static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
+                                  int reg, int count)
+{
+    if (count)
+	    return CP_PACKET0(reg, count - 1);
+    return CP_PACKET2;
+}
+
+/* common formats supported as both textures and render targets */
+unsigned r200_check_blit(gl_format mesa_format)
+{
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGB565:
+    case MESA_FORMAT_ARGB4444:
+    case MESA_FORMAT_ARGB1555:
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+    /* swizzled */
+    case MESA_FORMAT_RGBA8888:
+    case MESA_FORMAT_RGBA8888_REV:
+	    break;
+    default:
+	    return 0;
+    }
+
+    /* ??? */
+    if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+	    return 0;
+
+    return 1;
+}
+
+static inline void emit_vtx_state(struct r200_context *r200)
+{
+    BATCH_LOCALS(&r200->radeon);
+
+    BEGIN_BATCH(14);
+    if (r200->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+	    OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, 0);
+    } else {
+	    OUT_BATCH_REGVAL(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS);
+    }
+    OUT_BATCH_REGVAL(R200_SE_VAP_CNTL, (R200_VAP_FORCE_W_TO_ONE |
+					(9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT)));
+    OUT_BATCH_REGVAL(R200_SE_VTX_STATE_CNTL, 0);
+    OUT_BATCH_REGVAL(R200_SE_VTE_CNTL, 0);
+    OUT_BATCH_REGVAL(R200_SE_VTX_FMT_0, R200_VTX_XY);
+    OUT_BATCH_REGVAL(R200_SE_VTX_FMT_1, (2 << R200_VTX_TEX0_COMP_CNT_SHIFT));
+    OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
+				      RADEON_BFACE_SOLID |
+				      RADEON_FFACE_SOLID |
+				      RADEON_VTX_PIX_CENTER_OGL |
+				      RADEON_ROUND_MODE_ROUND |
+				      RADEON_ROUND_PREC_4TH_PIX));
+    END_BATCH();
+}
+
+static void inline emit_tx_setup(struct r200_context *r200,
+				 gl_format src_mesa_format,
+				 gl_format dst_mesa_format,
+				 struct radeon_bo *bo,
+				 intptr_t offset,
+				 unsigned width,
+				 unsigned height,
+				 unsigned pitch)
+{
+    uint32_t txformat = R200_TXFORMAT_NON_POWER2;
+    BATCH_LOCALS(&r200->radeon);
+
+    assert(width <= 2047);
+    assert(height <= 2047);
+    assert(offset % 32 == 0);
+
+    /* XXX others?  BE/LE? */
+    switch (src_mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+	    txformat |= R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_RGBA8888:
+	    txformat |= R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_RGBA8888_REV:
+	    txformat |= R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_XRGB8888:
+	    txformat |= R200_TXFORMAT_ARGB8888;
+	    break;
+    case MESA_FORMAT_RGB565:
+	    txformat |= R200_TXFORMAT_RGB565;
+	    break;
+    case MESA_FORMAT_ARGB4444:
+	    txformat |= R200_TXFORMAT_ARGB4444 | R200_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_ARGB1555:
+	    txformat |= R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_I8:
+	    txformat |= R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_L8:
+	    txformat |= R200_TXFORMAT_I8;
+	    break;
+    case MESA_FORMAT_AL88:
+	    txformat |= R200_TXFORMAT_AI88 | R200_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    default:
+	    break;
+    }
+
+    switch (dst_mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGB565:
+    case MESA_FORMAT_ARGB4444:
+    case MESA_FORMAT_ARGB1555:
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+    default:
+	    /* no swizzle required */
+	    BEGIN_BATCH(10);
+	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
+					      RADEON_TEX_BLEND_0_ENABLE));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
+						  R200_TXC_ARG_B_ZERO |
+						  R200_TXC_ARG_C_R0_COLOR |
+						  R200_TXC_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
+						   R200_TXC_OUTPUT_REG_R0));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
+						  R200_TXA_ARG_B_ZERO |
+						  R200_TXA_ARG_C_R0_ALPHA |
+						  R200_TXA_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
+						   R200_TXA_OUTPUT_REG_R0));
+	    END_BATCH();
+	    break;
+    case MESA_FORMAT_RGBA8888:
+	    BEGIN_BATCH(10);
+	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
+					      RADEON_TEX_BLEND_0_ENABLE));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
+						  R200_TXC_ARG_B_ZERO |
+						  R200_TXC_ARG_C_R0_COLOR |
+						  R200_TXC_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
+						   R200_TXC_OUTPUT_ROTATE_GBA |
+						   R200_TXC_OUTPUT_REG_R0));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
+						  R200_TXA_ARG_B_ZERO |
+						  R200_TXA_ARG_C_R0_ALPHA |
+						  R200_TXA_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
+						   (R200_TXA_REPL_RED << R200_TXA_REPL_ARG_C_SHIFT) |
+						   R200_TXA_OUTPUT_REG_R0));
+	    END_BATCH();
+	    break;
+    case MESA_FORMAT_RGBA8888_REV:
+	    BEGIN_BATCH(34);
+	    OUT_BATCH_REGVAL(RADEON_PP_CNTL, (RADEON_TEX_0_ENABLE |
+					      RADEON_TEX_BLEND_0_ENABLE |
+					      RADEON_TEX_BLEND_1_ENABLE |
+					      RADEON_TEX_BLEND_2_ENABLE |
+					      RADEON_TEX_BLEND_3_ENABLE));
+	    /* r1.r = r0.b */
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_0, (R200_TXC_ARG_A_ZERO |
+						  R200_TXC_ARG_B_ZERO |
+						  R200_TXC_ARG_C_R0_COLOR |
+						  R200_TXC_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_0, (R200_TXC_CLAMP_0_1 |
+						   R200_TXC_OUTPUT_MASK_R |
+						   (R200_TXC_REPL_BLUE << R200_TXC_REPL_ARG_C_SHIFT) |
+						   R200_TXC_OUTPUT_REG_R1));
+	    /* r1.a = r0.a */
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_0, (R200_TXA_ARG_A_ZERO |
+						  R200_TXA_ARG_B_ZERO |
+						  R200_TXA_ARG_C_R0_ALPHA |
+						  R200_TXA_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_0, (R200_TXA_CLAMP_0_1 |
+						   R200_TXA_OUTPUT_REG_R1));
+	    /* r1.g = r0.g */
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_1, (R200_TXC_ARG_A_ZERO |
+						  R200_TXC_ARG_B_ZERO |
+						  R200_TXC_ARG_C_R0_COLOR |
+						  R200_TXC_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_1, (R200_TXC_CLAMP_0_1 |
+						   R200_TXC_OUTPUT_MASK_G |
+						   (R200_TXC_REPL_GREEN << R200_TXC_REPL_ARG_C_SHIFT) |
+						   R200_TXC_OUTPUT_REG_R1));
+	    /* r1.a = r0.a */
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_1, (R200_TXA_ARG_A_ZERO |
+						  R200_TXA_ARG_B_ZERO |
+						  R200_TXA_ARG_C_R0_ALPHA |
+						  R200_TXA_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_1, (R200_TXA_CLAMP_0_1 |
+						   R200_TXA_OUTPUT_REG_R1));
+	    /* r1.b = r0.r */
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_2, (R200_TXC_ARG_A_ZERO |
+						  R200_TXC_ARG_B_ZERO |
+						  R200_TXC_ARG_C_R0_COLOR |
+						  R200_TXC_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_2, (R200_TXC_CLAMP_0_1 |
+						   R200_TXC_OUTPUT_MASK_B |
+						   (R200_TXC_REPL_RED << R200_TXC_REPL_ARG_C_SHIFT) |
+						   R200_TXC_OUTPUT_REG_R1));
+	    /* r1.a = r0.a */
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_2, (R200_TXA_ARG_A_ZERO |
+						  R200_TXA_ARG_B_ZERO |
+						  R200_TXA_ARG_C_R0_ALPHA |
+						  R200_TXA_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_2, (R200_TXA_CLAMP_0_1 |
+						   R200_TXA_OUTPUT_REG_R1));
+	    /* r0.rgb = r1.rgb */
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND_3, (R200_TXC_ARG_A_ZERO |
+						  R200_TXC_ARG_B_ZERO |
+						  R200_TXC_ARG_C_R1_COLOR |
+						  R200_TXC_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXCBLEND2_3, (R200_TXC_CLAMP_0_1 |
+						   R200_TXC_OUTPUT_REG_R0));
+	    /* r0.a = r1.a */
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND_3, (R200_TXA_ARG_A_ZERO |
+						  R200_TXA_ARG_B_ZERO |
+						  R200_TXA_ARG_C_R1_ALPHA |
+						  R200_TXA_OP_MADD));
+	    OUT_BATCH_REGVAL(R200_PP_TXABLEND2_3, (R200_TXA_CLAMP_0_1 |
+						   R200_TXA_OUTPUT_REG_R0));
+	    END_BATCH();
+	    break;
+    }
+
+    BEGIN_BATCH(18);
+    OUT_BATCH_REGVAL(R200_PP_CNTL_X, 0);
+    OUT_BATCH_REGVAL(R200_PP_TXMULTI_CTL_0, 0);
+    OUT_BATCH_REGVAL(R200_PP_TXFILTER_0, (R200_CLAMP_S_CLAMP_LAST |
+					  R200_CLAMP_T_CLAMP_LAST |
+					  R200_MAG_FILTER_NEAREST |
+					  R200_MIN_FILTER_NEAREST));
+    OUT_BATCH_REGVAL(R200_PP_TXFORMAT_0, txformat);
+    OUT_BATCH_REGVAL(R200_PP_TXFORMAT_X_0, 0);
+    OUT_BATCH_REGVAL(R200_PP_TXSIZE_0, ((width - 1) |
+					((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
+    OUT_BATCH_REGVAL(R200_PP_TXPITCH_0, pitch * _mesa_get_format_bytes(src_mesa_format) - 32);
+
+    OUT_BATCH_REGSEQ(R200_PP_TXOFFSET_0, 1);
+    OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+
+    END_BATCH();
+}
+
+static inline void emit_cb_setup(struct r200_context *r200,
+				 struct radeon_bo *bo,
+				 intptr_t offset,
+				 gl_format mesa_format,
+				 unsigned pitch,
+				 unsigned width,
+				 unsigned height)
+{
+    uint32_t dst_pitch = pitch;
+    uint32_t dst_format = 0;
+    BATCH_LOCALS(&r200->radeon);
+
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGBA8888:
+    case MESA_FORMAT_RGBA8888_REV:
+	    dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+	    break;
+    case MESA_FORMAT_RGB565:
+	    dst_format = RADEON_COLOR_FORMAT_RGB565;
+	    break;
+    case MESA_FORMAT_ARGB4444:
+	    dst_format = RADEON_COLOR_FORMAT_ARGB4444;
+	    break;
+    case MESA_FORMAT_ARGB1555:
+	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+	    break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+	    dst_format = RADEON_COLOR_FORMAT_RGB8;
+	    break;
+    default:
+	    break;
+    }
+
+    BEGIN_BATCH_NO_AUTOSTATE(22);
+    OUT_BATCH_REGVAL(R200_RE_AUX_SCISSOR_CNTL, 0);
+    OUT_BATCH_REGVAL(R200_RE_CNTL, 0);
+    OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0);
+    OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) |
+					      (height << RADEON_RE_HEIGHT_SHIFT)));
+    OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff);
+    OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+    OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format);
+
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1);
+    OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1);
+    OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+
+    END_BATCH();
+}
+
+static GLboolean validate_buffers(struct r200_context *r200,
+                                  struct radeon_bo *src_bo,
+                                  struct radeon_bo *dst_bo)
+{
+    int ret;
+
+    radeon_cs_space_reset_bos(r200->radeon.cmdbuf.cs);
+
+    ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs,
+                                        src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
+
+    ret = radeon_cs_space_check_with_bo(r200->radeon.cmdbuf.cs,
+                                        dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
+    if (ret)
+        return GL_FALSE;
+
+    return GL_TRUE;
+}
+
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static inline void calc_tex_coords(float img_width, float img_height,
+				   float x, float y,
+				   float reg_width, float reg_height,
+				   unsigned flip_y, float *buf)
+{
+    buf[0] = x / img_width;
+    buf[1] = buf[0] + reg_width / img_width;
+    buf[2] = y / img_height;
+    buf[3] = buf[2] + reg_height / img_height;
+    if (flip_y)
+    {
+        buf[2] = 1.0 - buf[2];
+        buf[3] = 1.0 - buf[3];
+    }
+}
+
+static inline void emit_draw_packet(struct r200_context *r200,
+				    unsigned src_width, unsigned src_height,
+				    unsigned src_x_offset, unsigned src_y_offset,
+				    unsigned dst_x_offset, unsigned dst_y_offset,
+				    unsigned reg_width, unsigned reg_height,
+				    unsigned flip_y)
+{
+    float texcoords[4];
+    float verts[12];
+    BATCH_LOCALS(&r200->radeon);
+
+    calc_tex_coords(src_width, src_height,
+                    src_x_offset, src_y_offset,
+                    reg_width, reg_height,
+                    flip_y, texcoords);
+
+    verts[0] = dst_x_offset;
+    verts[1] = dst_y_offset + reg_height;
+    verts[2] = texcoords[0];
+    verts[3] = texcoords[3];
+
+    verts[4] = dst_x_offset + reg_width;
+    verts[5] = dst_y_offset + reg_height;
+    verts[6] = texcoords[1];
+    verts[7] = texcoords[3];
+
+    verts[8] = dst_x_offset + reg_width;
+    verts[9] = dst_y_offset;
+    verts[10] = texcoords[1];
+    verts[11] = texcoords[2];
+
+    BEGIN_BATCH(14);
+    OUT_BATCH(R200_CP_CMD_3D_DRAW_IMMD_2 | (12 << 16));
+    OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+	      RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+              (3 << 16));
+    OUT_BATCH_TABLE(verts, 12);
+    END_BATCH();
+}
+
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r200 r200 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r200_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned reg_width,
+                   unsigned reg_height,
+                   unsigned flip_y)
+{
+    struct r200_context *r200 = R200_CONTEXT(ctx);
+
+    if (!r200_check_blit(dst_mesaformat))
+        return GL_FALSE;
+
+    /* Make sure that colorbuffer has even width - hw limitation */
+    if (dst_pitch % 2 > 0)
+        ++dst_pitch;
+
+    /* Rendering to small buffer doesn't work.
+     * Looks like a hw limitation.
+     */
+    if (dst_pitch < 32)
+        return GL_FALSE;
+
+    /* Need to clamp the region size to make sure
+     * we don't read outside of the source buffer
+     * or write outside of the destination buffer.
+     */
+    if (reg_width + src_x_offset > src_width)
+        reg_width = src_width - src_x_offset;
+    if (reg_height + src_y_offset > src_height)
+        reg_height = src_height - src_y_offset;
+    if (reg_width + dst_x_offset > dst_width)
+        reg_width = dst_width - dst_x_offset;
+    if (reg_height + dst_y_offset > dst_height)
+        reg_height = dst_height - dst_y_offset;
+
+    if (src_bo == dst_bo) {
+        return GL_FALSE;
+    }
+
+    if (src_offset % 32 || dst_offset % 32) {
+        return GL_FALSE;
+    }
+
+    if (0) {
+        fprintf(stderr, "src: size [%d x %d], pitch %d, "
+                "offset [%d x %d], format %s, bo %p\n",
+                src_width, src_height, src_pitch,
+                src_x_offset, src_y_offset,
+                _mesa_get_format_name(src_mesaformat),
+                src_bo);
+        fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+                dst_pitch, dst_x_offset, dst_y_offset,
+                _mesa_get_format_name(dst_mesaformat), dst_bo);
+        fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+    }
+
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(r200->radeon.glCtx);
+
+    rcommonEnsureCmdBufSpace(&r200->radeon, 102, __FUNCTION__);
+
+    if (!validate_buffers(r200, src_bo, dst_bo))
+        return GL_FALSE;
+
+    /* 14 */
+    emit_vtx_state(r200);
+    /* 52 */
+    emit_tx_setup(r200, src_mesaformat, dst_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+    /* 22 */
+    emit_cb_setup(r200, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+    /* 14 */
+    emit_draw_packet(r200, src_width, src_height,
+                     src_x_offset, src_y_offset,
+                     dst_x_offset, dst_y_offset,
+                     reg_width, reg_height,
+                     flip_y);
+
+    radeonFlush(ctx);
+
+    return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_blit.h b/src/mesa/drivers/dri/r200/r200_blit.h
new file mode 100644
index 0000000000..53206f0b47
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_blit.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef R200_BLIT_H
+#define R200_BLIT_H
+
+void r200_blit_init(struct r200_context *r200);
+
+unsigned r200_check_blit(gl_format mesa_format);
+
+unsigned r200_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned width,
+                   unsigned height,
+                   unsigned flip_y);
+
+#endif // R200_BLIT_H
diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
new file mode 100644
index 0000000000..ad43a8ca92
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c
@@ -0,0 +1,353 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+
+#include "radeon_common.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "radeon_reg.h"
+
+/* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+#define insert_at_tail_if(atom_list, atom) \
+   do { \
+      struct radeon_state_atom* __atom = (atom); \
+      if (__atom->check) \
+	 insert_at_tail((atom_list), __atom); \
+   } while(0)
+
+void r200SetUpAtomList( r200ContextPtr rmesa )
+{
+   int i, mtu;
+
+   mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
+
+   make_empty_list(&rmesa->radeon.hw.atomlist);
+   rmesa->radeon.hw.atomlist.name = "atom-list";
+
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.set );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf );
+   for (i = 0; i < mtu; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] );
+   for (i = 0; i < mtu; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] );
+   for (i = 0; i < 6; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.stp );
+   for (i = 0; i < 8; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] );
+   for (i = 0; i < 3 + mtu; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt );
+   for (i = 0; i < 2; ++i)
+      insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] );
+   for (i = 0; i < 6; ++i)
+       insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] );
+   insert_at_tail_if( &rmesa->radeon.hw.atomlist, &rmesa->hw.sci );
+}
+
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.  
+ */
+void r200EmitVbufPrim( r200ContextPtr rmesa,
+                       GLuint primitive,
+                       GLuint vertex_nr )
+{
+   BATCH_LOCALS(&rmesa->radeon);
+
+   assert(!(primitive & R200_VF_PRIM_WALK_IND));
+   
+   radeonEmitState(&rmesa->radeon);
+   
+   radeon_print(RADEON_RENDER|RADEON_SWRENDER,RADEON_VERBOSE,
+           "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__,
+           rmesa->store.cmd_used/4, primitive, vertex_nr);
+ 
+   BEGIN_BATCH(3);
+   OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0);
+   OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA |
+	     (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT));
+   END_BATCH();
+}
+
+static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+
+	if (vertex_count > 0) {
+		BEGIN_BATCH(8+2);
+		OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_INDX_2, 0);
+		OUT_BATCH(R200_VF_PRIM_WALK_IND |
+			  R200_VF_COLOR_ORDER_RGBA | 
+			  ((vertex_count + 0) << 16) |
+			  type);
+		
+		if (!rmesa->radeon.radeonScreen->kernel_mm) {
+			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
+			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
+			OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
+					rmesa->radeon.tcl.elt_dma_bo,
+					rmesa->radeon.tcl.elt_dma_offset,
+					RADEON_GEM_DOMAIN_GTT, 0, 0);
+			OUT_BATCH((vertex_count + 1)/2);
+		} else {
+			OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2);
+			OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810);
+			OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
+			OUT_BATCH((vertex_count + 1)/2);
+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+					      rmesa->radeon.tcl.elt_dma_bo,
+					      RADEON_GEM_DOMAIN_GTT, 0, 0);
+		}
+		END_BATCH();
+	}
+}
+
+void r200FlushElts(GLcontext *ctx)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int nr, elt_used = rmesa->tcl.elt_used;
+
+   radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used);
+
+   assert( rmesa->radeon.dma.flush == r200FlushElts );
+   rmesa->radeon.dma.flush = NULL;
+
+   nr = elt_used / 2;
+
+   radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
+
+   r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive);
+
+   radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo);
+   rmesa->radeon.tcl.elt_dma_bo = NULL;
+
+   if (R200_ELT_BUF_SZ > elt_used)
+     radeonReturnDmaRegion(&rmesa->radeon, R200_ELT_BUF_SZ - elt_used);
+
+   if (radeon_is_debug_enabled(RADEON_SYNC, RADEON_CRITICAL)
+         && !rmesa->radeon.radeonScreen->kernel_mm) {
+      radeon_print(RADEON_SYNC, RADEON_NORMAL, "%s: Syncing\n", __FUNCTION__);
+      radeonFinish( rmesa->radeon.glCtx );
+   }
+}
+
+
+GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+				    GLuint primitive,
+				    GLuint min_nr )
+{
+   GLushort *retval;
+
+   radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+
+   assert((primitive & R200_VF_PRIM_WALK_IND));
+   
+   radeonEmitState(&rmesa->radeon);
+
+   radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
+			&rmesa->radeon.tcl.elt_dma_offset, R200_ELT_BUF_SZ, 4);
+   rmesa->tcl.elt_used = min_nr * 2;
+
+   radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
+   retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
+   
+   assert(!rmesa->radeon.dma.flush);
+   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   rmesa->radeon.dma.flush = r200FlushElts;
+
+   return retval;
+}
+
+void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count)
+{
+   BATCH_LOCALS(&rmesa->radeon);
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+	   BEGIN_BATCH_NO_AUTOSTATE(2);
+	   OUT_BATCH(CP_PACKET0(R200_SE_VF_MAX_VTX_INDX, 0));
+	   OUT_BATCH(count);
+	   END_BATCH();
+   }
+}
+
+void r200EmitVertexAOS( r200ContextPtr rmesa,
+			GLuint vertex_size,
+ 			struct radeon_bo *bo,
+			GLuint offset )
+{
+   BATCH_LOCALS(&rmesa->radeon);
+
+   radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s:  vertex_size 0x%x offset 0x%x \n",
+	      __FUNCTION__, vertex_size, offset);
+
+
+   BEGIN_BATCH(7);
+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2);
+   OUT_BATCH(1);
+   OUT_BATCH(vertex_size | (vertex_size << 8));
+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   END_BATCH();
+}
+
+void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset)
+{
+   BATCH_LOCALS(&rmesa->radeon);
+   uint32_t voffset;
+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+   int i;
+   
+   radeon_print(RADEON_RENDER, RADEON_VERBOSE,
+           "%s: nr=%d, ofs=0x%08x\n",
+           __FUNCTION__, nr, offset);
+
+   BEGIN_BATCH(sz+2+ (nr*2));
+   OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1);
+   OUT_BATCH(nr);
+
+    
+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
+      for (i = 0; i + 1 < nr; i += 2) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+			
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[i].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[i+1].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
+      }
+      
+      if (nr & 1) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[nr - 1].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
+      }
+   } else {
+      for (i = 0; i + 1 < nr; i += 2) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+	 
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 OUT_BATCH(voffset);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 OUT_BATCH(voffset);
+      }
+      
+      if (nr & 1) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 OUT_BATCH(voffset);
+      }
+      for (i = 0; i + 1 < nr; i += 2) {
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[i+0].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[i+1].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
+      }
+      if (nr & 1) {
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[nr-1].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
+      }
+   }
+   END_BATCH();
+}
diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c
new file mode 100644
index 0000000000..5896296021
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_context.c
@@ -0,0 +1,513 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_tcl.h"
+#include "r200_vertprog.h"
+#include "radeon_queryobj.h"
+#include "r200_blit.h"
+
+#include "radeon_span.h"
+
+#define need_GL_ARB_occlusion_query
+#define need_GL_ARB_vertex_program
+#define need_GL_ATI_fragment_shader
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_NV_vertex_program
+#define need_GL_ARB_point_parameters
+#define need_GL_EXT_framebuffer_object
+#include "main/remap_helper.h"
+
+#define DRIVER_DATE	"20060602"
+
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *r200GetString( GLcontext *ctx, GLenum name )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 :
+      rmesa->radeon.radeonScreen->AGPMode;
+
+   switch ( name ) {
+   case GL_VENDOR:
+      return (GLubyte *)"Tungsten Graphics, Inc.";
+
+   case GL_RENDERER:
+      offset = driGetRendererString( buffer, "R200", DRIVER_DATE,
+				     agp_mode );
+
+      sprintf( & buffer[ offset ], " %sTCL",
+	       !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE)
+	       ? "" : "NO-" );
+
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+
+/* Extension strings exported by the R200 driver.
+ */
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_occlusion_query",		   GL_ARB_occlusion_query_functions},
+    { "GL_ARB_texture_border_clamp",       NULL },
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_env_combine",        NULL },
+    { "GL_ARB_texture_env_dot3",           NULL },
+    { "GL_ARB_texture_env_crossbar",       NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_EXT_blend_minmax",               GL_EXT_blend_minmax_functions },
+    { "GL_EXT_blend_subtract",             NULL },
+    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_packed_depth_stencil",	   NULL},
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_EXT_texture_env_combine",        NULL },
+    { "GL_EXT_texture_env_dot3",           NULL },
+    { "GL_EXT_texture_filter_anisotropic", NULL },
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_EXT_texture_mirror_clamp",       NULL },
+    { "GL_EXT_texture_rectangle",          NULL },
+    { "GL_ATI_texture_env_combine3",       NULL },
+    { "GL_ATI_texture_mirror_once",        NULL },
+    { "GL_MESA_pack_invert",               NULL },
+    { "GL_NV_blend_square",                NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+static const struct dri_extension blend_extensions[] = {
+    { "GL_EXT_blend_equation_separate",    GL_EXT_blend_equation_separate_functions },
+    { "GL_EXT_blend_func_separate",        GL_EXT_blend_func_separate_functions },
+    { NULL,                                NULL }
+};
+
+static const struct dri_extension ARB_vp_extension[] = {
+    { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions }
+};
+
+static const struct dri_extension NV_vp_extension[] = {
+    { "GL_NV_vertex_program",              GL_NV_vertex_program_functions }
+};
+
+static const struct dri_extension ATI_fs_extension[] = {
+    { "GL_ATI_fragment_shader",            GL_ATI_fragment_shader_functions }
+};
+
+static const struct dri_extension point_extensions[] = {
+    { "GL_ARB_point_sprite",               NULL },
+    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
+    { NULL,                                NULL }
+};
+
+static const struct dri_extension mm_extensions[] = {
+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+  { NULL, NULL }
+};
+
+extern const struct tnl_pipeline_stage _r200_render_stage;
+extern const struct tnl_pipeline_stage _r200_tcl_stage;
+
+static const struct tnl_pipeline_stage *r200_pipeline[] = {
+
+   /* Try and go straight to t&l
+    */
+   &_r200_tcl_stage,  
+
+   /* Catch any t&l fallbacks
+    */
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_tnl_point_attenuation_stage,
+   &_tnl_vertex_program_stage,
+   /* Try again to go to tcl? 
+    *     - no good for asymmetric-twoside (do with multipass)
+    *     - no good for asymmetric-unfilled (do with multipass)
+    *     - good for material
+    *     - good for texgen
+    *     - need to manipulate a bit of state
+    *
+    * - worth it/not worth it?
+    */
+			
+   /* Else do them here.
+    */
+/*    &_r200_render_stage,  */ /* FIXME: bugs with ut2003 */
+   &_tnl_render_stage,		/* FALLBACK:  */
+   NULL,
+};
+
+
+
+/* Initialize the driver's misc functions.
+ */
+static void r200InitDriverFuncs( struct dd_function_table *functions )
+{
+    functions->GetBufferSize		= NULL; /* OBSOLETE */
+    functions->GetString		= r200GetString;
+}
+
+
+static void r200_get_lock(radeonContextPtr radeon)
+{
+   r200ContextPtr rmesa = (r200ContextPtr)radeon;
+   drm_radeon_sarea_t *sarea = radeon->sarea;
+
+   R200_STATECHANGE( rmesa, ctx );
+   if (rmesa->radeon.sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE;
+   }
+   else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE;
+
+   if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) {
+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
+      if (!radeon->radeonScreen->kernel_mm)
+         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
+   }
+
+}
+
+static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+}
+
+static void r200_emit_query_finish(radeonContextPtr radeon)
+{
+   BATCH_LOCALS(radeon);
+   struct radeon_query_object *query = radeon->query.current;
+
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0));
+   OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+   END_BATCH();
+   query->curr_offset += sizeof(uint32_t);
+   assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+   query->emitted_begin = GL_FALSE;
+}
+
+static void r200_init_vtbl(radeonContextPtr radeon)
+{
+   radeon->vtbl.get_lock = r200_get_lock;
+   radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset;
+   radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header;
+   radeon->vtbl.swtcl_flush = r200_swtcl_flush;
+   radeon->vtbl.fallback = r200Fallback;
+   radeon->vtbl.update_scissor = r200_vtbl_update_scissor;
+   radeon->vtbl.emit_query_finish = r200_emit_query_finish;
+   radeon->vtbl.check_blit = r200_check_blit;
+   radeon->vtbl.blit = r200_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
+}
+
+
+/* Create the device specific rendering context.
+ */
+GLboolean r200CreateContext( gl_api api,
+			     const __GLcontextModes *glVisual,
+			     __DRIcontext *driContextPriv,
+			     void *sharedContextPrivate)
+{
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
+   struct dd_function_table functions;
+   r200ContextPtr rmesa;
+   GLcontext *ctx;
+   int i;
+   int tcl_mode;
+
+   assert(glVisual);
+   assert(driContextPriv);
+   assert(screen);
+
+   /* Allocate the R200 context */
+   rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) );
+   if ( !rmesa )
+      return GL_FALSE;
+
+   rmesa->radeon.radeonScreen = screen;
+   r200_init_vtbl(&rmesa->radeon);
+   /* init exp fog table data */
+   r200InitStaticFogData();
+
+   /* Parse configuration files.
+    * Do this here so that initialMaxAnisotropy is set before we create
+    * the default textures.
+    */
+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+			screen->driScreen->myNum, "r200");
+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
+							"def_max_anisotropy");
+
+   if ( sPriv->drm_version.major == 1
+       && driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+      if ( sPriv->drm_version.minor < 13 )
+	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+			  "disabling.\n", sPriv->drm_version.minor );
+      else
+	 rmesa->using_hyperz = GL_TRUE;
+   }
+ 
+   if ( sPriv->drm_version.minor >= 15 )
+      rmesa->texmicrotile = GL_TRUE;
+
+   /* Init default driver functions then plug in our R200-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions(&functions);
+   r200InitDriverFuncs(&functions);
+   r200InitIoctlFuncs(&functions);
+   r200InitStateFuncs(&rmesa->radeon, &functions);
+   r200InitTextureFuncs(&rmesa->radeon, &functions);
+   r200InitShaderFuncs(&functions);
+   radeonInitQueryObjFunctions(&functions);
+
+   if (!radeonInitContext(&rmesa->radeon, &functions,
+			  glVisual, driContextPriv,
+			  sharedContextPrivate)) {
+     FREE(rmesa);
+     return GL_FALSE;
+   }
+
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.hw.all_dirty = 1;
+
+   /* Set the maximum texture size small enough that we can guarentee that
+    * all texture units can bind a maximal texture and have all of them in
+    * texturable memory at once. Depending on the allow_large_textures driconf
+    * setting allow larger textures.
+    */
+
+   ctx = rmesa->radeon.glCtx;
+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+						 "texture_units");
+   ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+
+   ctx->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxTextureUnits;
+
+   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+
+   /* FIXME: When no memory manager is available we should set this 
+    * to some reasonable value based on texture memory pool size */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = 2048;
+   ctx->Const.MaxRenderbufferSize = 2048;
+
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+   /* No wide AA points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+   ctx->Const.PointSizeGranularity = 0.0625;
+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
+      ctx->Const.MaxPointSize = 2047.0;
+   else
+      ctx->Const.MaxPointSize = 1.0;
+
+   /* mesa initialization problem - _mesa_init_point was already called */
+   ctx->Point.MaxSize = ctx->Const.MaxPointSize;
+
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 10.0;
+   ctx->Const.MaxLineWidthAA = 10.0;
+   ctx->Const.LineWidthGranularity = 0.0625;
+
+   ctx->Const.VertexProgram.MaxNativeInstructions = R200_VSF_MAX_INST;
+   ctx->Const.VertexProgram.MaxNativeAttribs = 12;
+   ctx->Const.VertexProgram.MaxNativeTemps = R200_VSF_MAX_TEMPS;
+   ctx->Const.VertexProgram.MaxNativeParameters = R200_VSF_MAX_PARAM;
+   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+
+   ctx->Const.MaxDrawBuffers = 1;
+   ctx->Const.MaxColorAttachments = 1;
+
+   _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, r200_pipeline );
+
+   /* Try and keep materials and vertices separate:
+    */
+/*    _tnl_isolate_materials( ctx, GL_TRUE ); */
+
+
+   /* Configure swrast and TNL to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+
+
+   for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) {
+      _math_matrix_ctr( &rmesa->TexGenMatrix[i] );
+      _math_matrix_set_identity( &rmesa->TexGenMatrix[i] );
+   }
+   _math_matrix_ctr( &rmesa->tmpmat );
+   _math_matrix_set_identity( &rmesa->tmpmat );
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+     driInitExtensions(ctx, mm_extensions, GL_FALSE);
+   if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) {
+     /* yuv textures don't work with some chips - R200 / rv280 okay so far
+	others get the bit ordering right but don't actually do YUV-RGB conversion */
+      _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" );
+   }
+   if (rmesa->radeon.glCtx->Mesa_DXTn) {
+      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+      _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+   }
+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+   }
+
+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200)
+      _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+       driInitExtensions( ctx, blend_extensions, GL_FALSE );
+   }
+   if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram)
+      driInitSingleExtension( ctx, ARB_vp_extension );
+   if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program"))
+      driInitSingleExtension( ctx, NV_vp_extension );
+
+   if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader)
+      driInitSingleExtension( ctx, ATI_fs_extension );
+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites)
+      driInitExtensions( ctx, point_extensions, GL_FALSE );
+
+   if (!rmesa->radeon.radeonScreen->kernel_mm)
+      _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+#if 0
+   r200InitDriverFuncs( ctx );
+   r200InitIoctlFuncs( ctx );
+   r200InitStateFuncs( ctx );
+   r200InitTextureFuncs( ctx );
+#endif
+   /* plug in a few more device driver functions */
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   radeon_fbo_init(&rmesa->radeon);
+   radeonInitSpanFuncs( ctx );
+   r200InitTnlFuncs( ctx );
+   r200InitState( rmesa );
+   r200InitSwtcl( ctx );
+
+   rmesa->prefer_gart_client_texturing = 
+      (getenv("R200_GART_CLIENT_TEXTURES") != 0);
+
+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1);
+   }
+   else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") ||
+	    !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+	 fprintf(stderr, "Disabling HW TCL support\n");
+      }
+      TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1);
+   }
+
+   return GL_TRUE;
+}
+
+
+void r200DestroyContext( __DRIcontext *driContextPriv )
+{
+	int i;
+	r200ContextPtr rmesa = (r200ContextPtr)driContextPriv->driverPrivate;
+	if (rmesa)
+	{
+		for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS ; i++ ) {
+			_math_matrix_dtr( &rmesa->TexGenMatrix[i] );
+		}
+	}
+	radeonDestroyContext(driContextPriv);
+}
diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
new file mode 100644
index 0000000000..305958f5d7
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_context.h
@@ -0,0 +1,659 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_CONTEXT_H__
+#define __R200_CONTEXT_H__
+
+#include "tnl/t_vertex.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "texmem.h"
+
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "r200_reg.h"
+#include "r200_vertprog.h"
+
+#define ENABLE_HW_3D_TEXTURE 1  /* XXX this is temporary! */
+
+#ifndef R200_EMIT_VAP_PVS_CNTL
+#error This driver requires a newer libdrm to compile
+#endif
+
+#include "radeon_screen.h"
+#include "radeon_common.h"
+
+#include "radeon_lock.h"
+
+struct r200_context;
+typedef struct r200_context r200ContextRec;
+typedef struct r200_context *r200ContextPtr;
+
+#include "main/mm.h"
+
+struct r200_vertex_program {
+        struct gl_vertex_program mesa_program; /* Must be first */
+        int translated;
+        /* need excess instr: 1 for late loop checking, 2 for 
+           additional instr due to instr/attr, 3 for fog */
+        VERTEX_SHADER_INSTRUCTION instr[R200_VSF_MAX_INST + 6];
+        int pos_end;
+        int inputs[VERT_ATTRIB_MAX];
+        GLubyte inputmap_rev[16];
+        int native;
+        int fogpidx;
+        int fogmode;
+};
+
+#define R200_TEX_ALL 0x3f
+
+
+struct r200_texture_env_state {
+   radeonTexObjPtr texobj;
+   GLuint outputreg;
+   GLuint unitneeded;
+};
+
+#define R200_MAX_TEXTURE_UNITS 6
+
+struct r200_texture_state {
+   struct r200_texture_env_state unit[R200_MAX_TEXTURE_UNITS];
+};
+
+
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.  
+ */
+
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12 /* why */
+#define CTX_RB3D_COLORPITCH   13 /* why */
+#define CTX_STATE_SIZE_OLDDRM 14
+#define CTX_CMD_3             14
+#define CTX_RB3D_BLENDCOLOR   15
+#define CTX_RB3D_ABLENDCNTL   16
+#define CTX_RB3D_CBLENDCNTL   17
+#define CTX_STATE_SIZE_NEWDRM 18
+
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_RE_CNTL             2 /* replace se_coord_fmt */
+#define SET_STATE_SIZE          3
+
+#define VTE_CMD_0               0
+#define VTE_SE_VTE_CNTL         1
+#define VTE_STATE_SIZE          2
+
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+
+#define ZBS_CMD_0               0
+#define ZBS_SE_ZBIAS_FACTOR     1
+#define ZBS_SE_ZBIAS_CONSTANT   2
+#define ZBS_STATE_SIZE          3
+
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+
+#define TAM_CMD_0               0
+#define TAM_DEBUG3              1
+#define TAM_STATE_SIZE          2
+
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1  /*2c00*/
+#define TEX_PP_TXFORMAT             2  /*2c04*/
+#define TEX_PP_TXFORMAT_X           3  /*2c08*/
+#define TEX_PP_TXSIZE               4  /*2c0c*/
+#define TEX_PP_TXPITCH              5  /*2c10*/
+#define TEX_PP_BORDER_COLOR         6  /*2c14*/
+#define TEX_CMD_1_OLDDRM            7
+#define TEX_PP_TXOFFSET_OLDDRM      8  /*2d00 */
+#define TEX_STATE_SIZE_OLDDRM       9
+#define TEX_PP_CUBIC_FACES          7
+#define TEX_PP_TXMULTI_CTL          8
+#define TEX_CMD_1_NEWDRM            9
+#define TEX_PP_TXOFFSET_NEWDRM     10
+#define TEX_STATE_SIZE_NEWDRM      11
+
+#define CUBE_CMD_0                  0  /* 1 register follows */ /* this command unnecessary */
+#define CUBE_PP_CUBIC_FACES         1  /* 0x2c18 */             /* with new enough drm */
+#define CUBE_CMD_1                  2  /* 5 registers follow */
+#define CUBE_PP_CUBIC_OFFSET_F1     3  /* 0x2d04 */
+#define CUBE_PP_CUBIC_OFFSET_F2     4  /* 0x2d08 */
+#define CUBE_PP_CUBIC_OFFSET_F3     5  /* 0x2d0c */
+#define CUBE_PP_CUBIC_OFFSET_F4     6  /* 0x2d10 */
+#define CUBE_PP_CUBIC_OFFSET_F5     7  /* 0x2d14 */
+#define CUBE_STATE_SIZE             8
+
+#define PIX_CMD_0                   0
+#define PIX_PP_TXCBLEND             1
+#define PIX_PP_TXCBLEND2            2
+#define PIX_PP_TXABLEND             3
+#define PIX_PP_TXABLEND2            4
+#define PIX_STATE_SIZE              5
+
+#define TF_CMD_0                    0
+#define TF_TFACTOR_0                1
+#define TF_TFACTOR_1                2
+#define TF_TFACTOR_2                3
+#define TF_TFACTOR_3                4
+#define TF_TFACTOR_4                5
+#define TF_TFACTOR_5                6
+#define TF_STATE_SIZE               7
+
+#define ATF_CMD_0                   0
+#define ATF_TFACTOR_0               1
+#define ATF_TFACTOR_1               2
+#define ATF_TFACTOR_2               3
+#define ATF_TFACTOR_3               4
+#define ATF_TFACTOR_4               5
+#define ATF_TFACTOR_5               6
+#define ATF_TFACTOR_6               7
+#define ATF_TFACTOR_7               8
+#define ATF_STATE_SIZE              9
+
+/* ATI_FRAGMENT_SHADER */
+#define AFS_CMD_0                 0
+#define AFS_IC0                   1 /* 2f00 */
+#define AFS_IC1                   2 /* 2f04 */
+#define AFS_IA0                   3 /* 2f08 */
+#define AFS_IA1                   4 /* 2f0c */
+#define AFS_STATE_SIZE           33
+
+#define PVS_CMD_0                 0
+#define PVS_CNTL_1                1
+#define PVS_CNTL_2                2
+#define PVS_STATE_SIZE            3
+
+/* those are quite big... */
+#define VPI_CMD_0                 0
+#define VPI_OPDST_0               1
+#define VPI_SRC0_0                2
+#define VPI_SRC1_0                3
+#define VPI_SRC2_0                4
+#define VPI_OPDST_63              253
+#define VPI_SRC0_63               254
+#define VPI_SRC1_63               255
+#define VPI_SRC2_63               256
+#define VPI_STATE_SIZE            257
+
+#define VPP_CMD_0                0
+#define VPP_PARAM0_0             1
+#define VPP_PARAM1_0             2
+#define VPP_PARAM2_0             3
+#define VPP_PARAM3_0             4
+#define VPP_PARAM0_95            381
+#define VPP_PARAM1_95            382
+#define VPP_PARAM2_95            383
+#define VPP_PARAM3_95            384
+#define VPP_STATE_SIZE           385
+
+#define TCL_CMD_0                 0
+#define TCL_LIGHT_MODEL_CTL_0     1
+#define TCL_LIGHT_MODEL_CTL_1     2
+#define TCL_PER_LIGHT_CTL_0       3
+#define TCL_PER_LIGHT_CTL_1       4
+#define TCL_PER_LIGHT_CTL_2       5
+#define TCL_PER_LIGHT_CTL_3       6
+#define TCL_CMD_1                 7
+#define TCL_UCP_VERT_BLEND_CTL    8
+#define TCL_STATE_SIZE            9
+
+#define MSL_CMD_0                     0
+#define MSL_MATRIX_SELECT_0           1
+#define MSL_MATRIX_SELECT_1           2
+#define MSL_MATRIX_SELECT_2           3
+#define MSL_MATRIX_SELECT_3           4
+#define MSL_MATRIX_SELECT_4           5
+#define MSL_STATE_SIZE                6
+
+#define TCG_CMD_0                 0
+#define TCG_TEX_PROC_CTL_2            1
+#define TCG_TEX_PROC_CTL_3            2
+#define TCG_TEX_PROC_CTL_0            3
+#define TCG_TEX_PROC_CTL_1            4
+#define TCG_TEX_CYL_WRAP_CTL      5
+#define TCG_STATE_SIZE            6
+
+#define MTL_CMD_0            0	
+#define MTL_EMMISSIVE_RED    1	
+#define MTL_EMMISSIVE_GREEN  2	
+#define MTL_EMMISSIVE_BLUE   3	
+#define MTL_EMMISSIVE_ALPHA  4	
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_CMD_1            17
+#define MTL_SHININESS        18
+#define MTL_STATE_SIZE       19
+
+#define VAP_CMD_0                   0
+#define VAP_SE_VAP_CNTL             1
+#define VAP_STATE_SIZE              2
+
+/* Replaces a lot of packet info from radeon
+ */
+#define VTX_CMD_0                   0
+#define VTX_VTXFMT_0            1
+#define VTX_VTXFMT_1            2
+#define VTX_TCL_OUTPUT_VTXFMT_0 3
+#define VTX_TCL_OUTPUT_VTXFMT_1 4
+#define VTX_CMD_1               5
+#define VTX_TCL_OUTPUT_COMPSEL  6
+#define VTX_CMD_2               7
+#define VTX_STATE_CNTL          8
+#define VTX_STATE_SIZE          9
+
+/* SPR - point sprite state
+ */
+#define SPR_CMD_0              0
+#define SPR_POINT_SPRITE_CNTL  1
+#define SPR_STATE_SIZE         2
+
+#define PTP_CMD_0              0
+#define PTP_VPORT_SCALE_0      1
+#define PTP_VPORT_SCALE_1      2
+#define PTP_VPORT_SCALE_PTSIZE 3
+#define PTP_VPORT_SCALE_3      4
+#define PTP_CMD_1              5
+#define PTP_ATT_CONST_QUAD     6
+#define PTP_ATT_CONST_LIN      7
+#define PTP_ATT_CONST_CON      8
+#define PTP_ATT_CONST_3        9
+#define PTP_EYE_X             10
+#define PTP_EYE_Y             11
+#define PTP_EYE_Z             12
+#define PTP_EYE_3             13
+#define PTP_CLAMP_MIN         14
+#define PTP_CLAMP_MAX         15
+#define PTP_CLAMP_2           16
+#define PTP_CLAMP_3           17
+#define PTP_STATE_SIZE        18
+
+#define VTX_COLOR(v,n)   (((v)>>(R200_VTX_COLOR_0_SHIFT+(n)*2))&\
+                         R200_VTX_COLOR_MASK)
+
+/**
+ * Given the \c R200_SE_VTX_FMT_1 for the current vertex state, determine
+ * how many components are in texture coordinate \c n.
+ */
+#define VTX_TEXn_COUNT(v,n)   (((v) >> (3 * n)) & 0x07)
+
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?  
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_QUADRATIC        21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_CONST            23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_DCM               27
+#define LIT_SPOT_EXPONENT          28
+#define LIT_SPOT_CUTOFF            29
+#define LIT_SPECULAR_THRESH        30
+#define LIT_RANGE_CUTOFF           31 /* ? */
+#define LIT_ATTEN_CONST_INV        32
+#define LIT_STATE_SIZE             33
+
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+
+/* CST - constant state
+ */
+#define CST_CMD_0                             0
+#define CST_PP_CNTL_X                         1
+#define CST_CMD_1                             2
+#define CST_RB3D_DEPTHXY_OFFSET               3
+#define CST_CMD_2                             4
+#define CST_RE_AUX_SCISSOR_CNTL               5
+#define CST_CMD_3                             6
+#define CST_RE_SCISSOR_TL_0                   7
+#define CST_RE_SCISSOR_BR_0                   8
+#define CST_CMD_4                             9
+#define CST_SE_VAP_CNTL_STATUS                10
+#define CST_CMD_5                             11
+#define CST_RE_POINTSIZE                      12
+#define CST_CMD_6                             13
+#define CST_SE_TCL_INPUT_VTX_0                14
+#define CST_SE_TCL_INPUT_VTX_1                15
+#define CST_SE_TCL_INPUT_VTX_2                16
+#define CST_SE_TCL_INPUT_VTX_3                17
+#define CST_STATE_SIZE                        18
+
+#define PRF_CMD_0         0
+#define PRF_PP_TRI_PERF   1
+#define PRF_PP_PERF_CNTL  2
+#define PRF_STATE_SIZE    3
+
+
+#define SCI_CMD_0         0
+#define SCI_RE_AUX        1
+#define SCI_CMD_1         2
+#define SCI_XY_1          3
+#define SCI_CMD_2         4
+#define SCI_XY_2          5
+#define SCI_STATE_SIZE    6
+
+#define R200_QUERYOBJ_CMD_0  0
+#define R200_QUERYOBJ_DATA_0 1
+#define R200_QUERYOBJ_CMDSIZE  2
+
+#define STP_CMD_0 0
+#define STP_DATA_0 1
+#define STP_CMD_1 2
+#define STP_STATE_SIZE 35
+
+struct r200_hw_state {
+   /* Hardware state, stored as cmdbuf commands:  
+    *   -- Need to doublebuffer for
+    *           - reviving state after loss of context
+    *           - eliding noop statechange loops? (except line stipple count)
+    */
+   struct radeon_state_atom ctx;
+   struct radeon_state_atom set;
+   struct radeon_state_atom sci;
+   struct radeon_state_atom vte;
+   struct radeon_state_atom lin;
+   struct radeon_state_atom msk;
+   struct radeon_state_atom vpt;
+   struct radeon_state_atom vap;
+   struct radeon_state_atom vtx;
+   struct radeon_state_atom tcl;
+   struct radeon_state_atom msl;
+   struct radeon_state_atom tcg;
+   struct radeon_state_atom msc;
+   struct radeon_state_atom cst;
+   struct radeon_state_atom tam;
+   struct radeon_state_atom tf;
+   struct radeon_state_atom tex[6];
+   struct radeon_state_atom cube[6];
+   struct radeon_state_atom zbs;
+   struct radeon_state_atom mtl[2];
+   struct radeon_state_atom mat[9];
+   struct radeon_state_atom lit[8]; /* includes vec, scl commands */
+   struct radeon_state_atom ucp[6];
+   struct radeon_state_atom pix[6]; /* pixshader stages */
+   struct radeon_state_atom eye; /* eye pos */
+   struct radeon_state_atom grd; /* guard band clipping */
+   struct radeon_state_atom fog;
+   struct radeon_state_atom glt;
+   struct radeon_state_atom prf;
+   struct radeon_state_atom afs[2];
+   struct radeon_state_atom pvs;
+   struct radeon_state_atom vpi[2];
+   struct radeon_state_atom vpp[2];
+   struct radeon_state_atom atf;
+   struct radeon_state_atom spr;
+   struct radeon_state_atom ptp;
+   struct radeon_state_atom stp;
+};
+
+struct r200_state {
+   /* Derived state for internal purposes:
+    */
+   struct r200_texture_state texture;
+   GLuint envneeded;
+};
+
+#define R200_CMD_BUF_SZ  (16*1024) 
+
+#define R200_ELT_BUF_SZ  (16*1024) 
+/* r200_tcl.c
+ */
+struct r200_tcl_info {
+   GLuint hw_primitive;
+
+   int elt_used;
+
+};
+
+
+/* r200_swtcl.c
+ */
+struct r200_swtcl_info {
+
+
+   radeon_point_func draw_point;
+   radeon_line_func draw_line;
+   radeon_tri_func draw_tri;
+
+   /**
+    * Offset of the 4UB color data within a hardware (swtcl) vertex.
+    */
+   GLuint coloroffset;
+
+   /**
+    * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+    */
+   GLuint specoffset;
+
+   /**
+    * Should Mesa project vertex data or will the hardware do it?
+    */
+   GLboolean needproj;
+};
+
+
+
+
+   /* A maximum total of 29 elements per vertex:  3 floats for position, 3
+    * floats for normal, 4 floats for color, 4 bytes for secondary color,
+    * 3 floats for each texture unit (18 floats total).
+    * 
+    * we maybe need add. 4 to prevent segfault if someone specifies
+    * GL_TEXTURE6/GL_TEXTURE7 (esp. for the codegen-path) (FIXME: )
+    * 
+    * The position data is never actually stored here, so 3 elements could be
+    * trimmed out of the buffer.
+    */
+
+#define R200_MAX_VERTEX_SIZE ((3*6)+11)
+
+struct r200_context {
+   struct radeon_context radeon;
+
+   /* Driver and hardware state management
+    */
+   struct r200_hw_state hw;
+   struct r200_state state;
+   struct r200_vertex_program *curr_vp_hw;
+
+   /* Vertex buffers
+    */
+   struct radeon_ioctl ioctl;
+   struct radeon_store store;
+
+   /* Clientdata textures;
+    */
+   GLuint prefer_gart_client_texturing;
+
+   /* TCL stuff
+    */
+   GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS];
+   GLboolean recheck_texgen[R200_MAX_TEXTURE_UNITS];
+   GLboolean TexGenNeedNormals[R200_MAX_TEXTURE_UNITS];
+   GLuint TexMatEnabled;
+   GLuint TexMatCompSel;
+   GLuint TexGenEnabled;
+   GLuint TexGenCompSel;
+   GLmatrix tmpmat;
+
+   /* r200_tcl.c
+    */
+   struct r200_tcl_info tcl;
+
+   /* r200_swtcl.c
+    */
+   struct r200_swtcl_info swtcl;
+
+   GLboolean using_hyperz;
+   GLboolean texmicrotile;
+
+  struct ati_fragment_shader *afs_loaded;
+};
+
+#define R200_CONTEXT(ctx)		((r200ContextPtr)(ctx->DriverCtx))
+
+
+extern void r200DestroyContext( __DRIcontext *driContextPriv );
+extern GLboolean r200CreateContext( gl_api api,
+				    const __GLcontextModes *glVisual,
+				    __DRIcontext *driContextPriv,
+				    void *sharedContextPrivate);
+extern GLboolean r200MakeCurrent( __DRIcontext *driContextPriv,
+				  __DRIdrawable *driDrawPriv,
+				  __DRIdrawable *driReadPriv );
+extern GLboolean r200UnbindContext( __DRIcontext *driContextPriv );
+
+extern void r200_init_texcopy_functions(struct dd_function_table *table);
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define R200_DEBUG RADEON_DEBUG
+
+
+
+#endif /* __R200_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c
new file mode 100644
index 0000000000..85c1b7bdd1
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_fragshader.c
@@ -0,0 +1,548 @@
+/**************************************************************************
+ *
+ * Copyright 2004 David Airlie
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL DAVID AIRLIE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "tnl/t_context.h"
+#include "shader/atifragshader.h"
+#include "shader/program.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+
+#define SET_INST(inst, type) afs_cmd[((inst<<2) + (type<<1) + 1)]
+#define SET_INST_2(inst, type) afs_cmd[((inst<<2) + (type<<1) + 2)]
+
+static void r200SetFragShaderArg( GLuint *afs_cmd, GLuint opnum, GLuint optype,
+				const struct atifragshader_src_register srcReg,
+				GLuint argPos, GLuint *tfactor )
+{
+   const GLuint index = srcReg.Index;
+   const GLuint srcmod = srcReg.argMod;
+   const GLuint srcrep = srcReg.argRep;
+   GLuint reg0 = 0;
+   GLuint reg2 = 0;
+   GLuint useOddSrc = 0;
+
+   switch(srcrep) {
+   case GL_RED:
+      reg2 |= R200_TXC_REPL_RED << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      if (optype)
+	 useOddSrc = 1;
+      break;
+   case GL_GREEN:
+      reg2 |= R200_TXC_REPL_GREEN << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      if (optype)
+	 useOddSrc = 1;
+      break;
+   case GL_BLUE:
+      if (!optype)
+	 reg2 |= R200_TXC_REPL_BLUE << (R200_TXC_REPL_ARG_A_SHIFT + (2*argPos));
+      else
+	 useOddSrc = 1;
+      break;
+   case GL_ALPHA:
+      if (!optype)
+	 useOddSrc = 1;
+      break;
+   }
+
+   if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
+      reg0 |= (((index - GL_REG_0_ATI)*2) + 10 + useOddSrc) << (5*argPos);
+   else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
+      if ((*tfactor == 0) || (index == *tfactor)) {
+	 reg0 |= (R200_TXC_ARG_A_TFACTOR_COLOR + useOddSrc) << (5*argPos);
+	 reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR_SEL_SHIFT;
+	 *tfactor = index;
+      }
+      else {
+	 reg0 |= (R200_TXC_ARG_A_TFACTOR1_COLOR + useOddSrc) << (5*argPos);
+	 reg2 |= (index - GL_CON_0_ATI) << R200_TXC_TFACTOR1_SEL_SHIFT;
+      }
+   }
+   else if (index == GL_PRIMARY_COLOR_EXT) {
+      reg0 |= (R200_TXC_ARG_A_DIFFUSE_COLOR + useOddSrc) << (5*argPos);
+   }
+   else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
+      reg0 |= (R200_TXC_ARG_A_SPECULAR_COLOR + useOddSrc) << (5*argPos);
+   }
+   /* GL_ZERO is a noop, for GL_ONE we set the complement */
+   else if (index == GL_ONE) {
+      reg0 |= R200_TXC_COMP_ARG_A << (4*argPos);
+   }
+
+   if (srcmod & GL_COMP_BIT_ATI)
+      reg0 ^= R200_TXC_COMP_ARG_A << (4*argPos);
+   if (srcmod & GL_BIAS_BIT_ATI)
+      reg0 |= R200_TXC_BIAS_ARG_A << (4*argPos);
+   if (srcmod & GL_2X_BIT_ATI)
+      reg0 |= R200_TXC_SCALE_ARG_A << (4*argPos);
+   if (srcmod & GL_NEGATE_BIT_ATI)
+      reg0 ^= R200_TXC_NEG_ARG_A << (4*argPos);
+
+   SET_INST(opnum, optype) |= reg0;
+   SET_INST_2(opnum, optype) |= reg2;
+}
+
+static GLuint dstmask_table[8] =
+{
+   R200_TXC_OUTPUT_MASK_RGB,
+   R200_TXC_OUTPUT_MASK_R,
+   R200_TXC_OUTPUT_MASK_G,
+   R200_TXC_OUTPUT_MASK_RG,
+   R200_TXC_OUTPUT_MASK_B,
+   R200_TXC_OUTPUT_MASK_RB,
+   R200_TXC_OUTPUT_MASK_GB,
+   R200_TXC_OUTPUT_MASK_RGB
+};
+
+static void r200UpdateFSArith( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint *afs_cmd;
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint pass;
+
+   R200_STATECHANGE( rmesa, afs[0] );
+   R200_STATECHANGE( rmesa, afs[1] );
+
+   if (shader->NumPasses < 2) {
+      afs_cmd = (GLuint *) rmesa->hw.afs[1].cmd;
+   }
+   else {
+      afs_cmd = (GLuint *) rmesa->hw.afs[0].cmd;
+   }
+   for (pass = 0; pass < shader->NumPasses; pass++) {
+      GLuint opnum = 0;
+      GLuint pc;
+      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
+         GLuint optype;
+	 struct atifs_instruction *inst = &shader->Instructions[pass][pc];
+
+	 SET_INST(opnum, 0) = 0;
+	 SET_INST_2(opnum, 0) = 0;
+	 SET_INST(opnum, 1) = 0;
+	 SET_INST_2(opnum, 1) = 0;
+
+	 for (optype = 0; optype < 2; optype++) {
+	    GLuint tfactor = 0;
+
+	    if (inst->Opcode[optype]) {
+	       switch (inst->Opcode[optype]) {
+	       /* these are all MADD in disguise
+		  MADD is A * B + C
+		  so for GL_ADD use arg B/C and make A complement 0
+		  for GL_SUB use arg B/C, negate C and make A complement 0
+		  for GL_MOV use arg C
+		  for GL_MUL use arg A
+		  for GL_MAD all good */
+	       case GL_SUB_ATI:
+		  /* negate C */
+		  SET_INST(opnum, optype) |= R200_TXC_NEG_ARG_C;
+		  /* fallthrough */
+	       case GL_ADD_ATI:
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][0], 1, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][1], 2, &tfactor);
+		  /* A = complement 0 */
+		  SET_INST(opnum, optype) |= R200_TXC_COMP_ARG_A;
+		  SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+		  break;
+	       case GL_MOV_ATI:
+		  /* put arg0 in C */
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][0], 2, &tfactor);
+		  SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+		  break;
+	       case GL_MAD_ATI:
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][2], 2, &tfactor);
+		  /* fallthrough */
+	       case GL_MUL_ATI:
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][0], 0, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][1], 1, &tfactor);
+		  SET_INST(opnum, optype) |= R200_TXC_OP_MADD;
+		  break;
+	       case GL_LERP_ATI:
+		  /* arg order is not native chip order, swap A and C */
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][0], 2, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][1], 1, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][2], 0, &tfactor);
+		  SET_INST(opnum, optype) |= R200_TXC_OP_LERP;
+		  break;
+	       case GL_CND_ATI:
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][0], 0, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][1], 1, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][2], 2, &tfactor);
+		  SET_INST(opnum, optype) |= R200_TXC_OP_CONDITIONAL;
+		  break;
+	       case GL_CND0_ATI:
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][0], 0, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][1], 1, &tfactor);
+		  r200SetFragShaderArg(afs_cmd, opnum, optype,
+					inst->SrcReg[optype][2], 2, &tfactor);
+		  SET_INST(opnum, optype) |= R200_TXC_OP_CND0;
+		  break;
+		  /* cannot specify dot ops as alpha ops directly */
+	       case GL_DOT2_ADD_ATI:
+		  if (optype)
+		     SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+		  else {
+		     r200SetFragShaderArg(afs_cmd, opnum, 0,
+					inst->SrcReg[0][0], 0, &tfactor);
+		     r200SetFragShaderArg(afs_cmd, opnum, 0,
+					inst->SrcReg[0][1], 1, &tfactor);
+		     r200SetFragShaderArg(afs_cmd, opnum, 0,
+					inst->SrcReg[0][2], 2, &tfactor);
+		     SET_INST(opnum, 0) |= R200_TXC_OP_DOT2_ADD;
+		  }
+		  break;
+	       case GL_DOT3_ATI:
+		  if (optype)
+		     SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+		  else {
+		     r200SetFragShaderArg(afs_cmd, opnum, 0,
+					inst->SrcReg[0][0], 0, &tfactor);
+		     r200SetFragShaderArg(afs_cmd, opnum, 0,
+					inst->SrcReg[0][1], 1, &tfactor);
+		     SET_INST(opnum, 0) |= R200_TXC_OP_DOT3;
+		  }
+		  break;
+	       case GL_DOT4_ATI:
+	       /* experimental verification: for dot4 setup of alpha args is needed
+		  (dstmod is ignored, though, so dot2/dot3 should be safe)
+		  the hardware apparently does R1*R2 + G1*G2 + B1*B2 + A3*A4
+		  but the API doesn't allow it */
+		  if (optype)
+		     SET_INST_2(opnum, 1) |= R200_TXA_DOT_ALPHA;
+		  else {
+		     r200SetFragShaderArg(afs_cmd, opnum, 0,
+					inst->SrcReg[0][0], 0, &tfactor);
+		     r200SetFragShaderArg(afs_cmd, opnum, 0,
+					inst->SrcReg[0][1], 1, &tfactor);
+		     r200SetFragShaderArg(afs_cmd, opnum, 1,
+					inst->SrcReg[0][0], 0, &tfactor);
+		     r200SetFragShaderArg(afs_cmd, opnum, 1,
+					inst->SrcReg[0][1], 1, &tfactor);
+		     SET_INST(opnum, optype) |= R200_TXC_OP_DOT4;
+		  }
+		  break;
+	       }
+	    }
+
+	    /* destination */
+	    if (inst->DstReg[optype].Index) {
+	       GLuint dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
+	       GLuint dstmask = inst->DstReg[optype].dstMask;
+	       GLuint sat = inst->DstReg[optype].dstMod & GL_SATURATE_BIT_ATI;
+	       GLuint dstmod = inst->DstReg[optype].dstMod;
+
+	       dstmod &= ~GL_SATURATE_BIT_ATI;
+
+	       SET_INST_2(opnum, optype) |= (dstreg + 1) << R200_TXC_OUTPUT_REG_SHIFT;
+	       SET_INST_2(opnum, optype) |= dstmask_table[dstmask];
+
+		/* fglrx does clamp the last instructions to 0_1 it seems */
+		/* this won't necessarily catch the last instruction
+		   which writes to reg0 */
+	       if (sat || (pc == (shader->numArithInstr[pass] - 1) &&
+			((pass == 1) || (shader->NumPasses == 1))))
+		  SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_0_1;
+	       else
+		/*should we clamp or not? spec is vague, I would suppose yes but fglrx doesn't */
+		  SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_8_8;
+/*		  SET_INST_2(opnum, optype) |= R200_TXC_CLAMP_WRAP;*/
+	       switch(dstmod) {
+	       case GL_2X_BIT_ATI:
+		  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_2X;
+		  break;
+	       case GL_4X_BIT_ATI:
+		  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_4X;
+		  break;
+	       case GL_8X_BIT_ATI:
+		  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_8X;
+		  break;
+	       case GL_HALF_BIT_ATI:
+		  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV2;
+		  break;
+	       case GL_QUARTER_BIT_ATI:
+		  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV4;
+		  break;
+	       case GL_EIGHTH_BIT_ATI:
+		  SET_INST_2(opnum, optype) |= R200_TXC_SCALE_INV8;
+		  break;
+	       default:
+		  break;
+	       }
+	    }
+	 }
+/*	 fprintf(stderr, "pass %d nr %d inst 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n",
+		pass, opnum, SET_INST(opnum, 0), SET_INST_2(opnum, 0),
+		SET_INST(opnum, 1), SET_INST_2(opnum, 1));*/
+         opnum++;
+      }
+      afs_cmd = (GLuint *) rmesa->hw.afs[1].cmd;
+   }
+   rmesa->afs_loaded = ctx->ATIFragmentShader.Current;
+}
+
+static void r200UpdateFSRouting( GLcontext *ctx ) {
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint reg;
+
+   R200_STATECHANGE( rmesa, ctx );
+   R200_STATECHANGE( rmesa, cst );
+
+   for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+      if (shader->swizzlerq & (1 << (2 * reg)))
+	 /* r coord */
+	 set_re_cntl_d3d( ctx, reg, 1);
+	 /* q coord */
+      else set_re_cntl_d3d( ctx, reg, 0);
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_MULTI_PASS_ENABLE |
+				       R200_TEX_BLEND_ENABLE_MASK |
+				       R200_TEX_ENABLE_MASK);
+   rmesa->hw.cst.cmd[CST_PP_CNTL_X] &= ~(R200_PPX_PFS_INST_ENABLE_MASK |
+					 R200_PPX_TEX_ENABLE_MASK |
+					 R200_PPX_OUTPUT_REG_MASK);
+
+   /* first pass registers use slots 8 - 15
+      but single pass shaders use slots 0 - 7 */
+   if (shader->NumPasses < 2) {
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[0] == 8 ?
+	 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+	 (0xff >> (8 - shader->numArithInstr[0])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_MULTI_PASS_ENABLE;
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= shader->numArithInstr[1] == 8 ?
+	 0xff << (R200_TEX_BLEND_0_ENABLE_SHIFT - 1) :
+	 (0xff >> (8 - shader->numArithInstr[1])) << R200_TEX_BLEND_0_ENABLE_SHIFT;
+      rmesa->hw.cst.cmd[CST_PP_CNTL_X] |=
+	 (0xff >> (8 - shader->numArithInstr[0])) << R200_PPX_FPS_INST0_ENABLE_SHIFT;
+   }
+
+   if (shader->NumPasses < 2) {
+      for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+	 GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled;
+         R200_STATECHANGE( rmesa, tex[reg] );
+	 rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = 0;
+	 if (shader->SetupInst[0][reg].Opcode) {
+	    GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+		& ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+	    GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+	    txformat |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+		<< R200_TXFORMAT_ST_ROUTE_SHIFT;
+	    /* fix up texcoords for proj/non-proj 2d (3d and cube are not defined when
+	       using projection so don't have to worry there).
+	       When passing coords, need R200_TEXCOORD_VOLUME, otherwise loose a coord */
+	    /* FIXME: someone might rely on default tex coords r/q, which we unfortunately
+	       don't provide (we have the same problem without shaders) */
+	    if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+	       txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+	       if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+		  shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+		  txformat_x |= R200_TEXCOORD_VOLUME;
+	       }
+	       else {
+		  txformat_x |= R200_TEXCOORD_PROJ;
+	       }
+	       rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+	    }
+	    else if (targetbit == TEXTURE_3D_BIT) {
+	       txformat_x |= R200_TEXCOORD_VOLUME;
+	    }
+	    else if (targetbit == TEXTURE_CUBE_BIT) {
+	       txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+	    }
+	    else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+	       shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+	       txformat_x |= R200_TEXCOORD_NONPROJ;
+	    }
+	    else {
+	       txformat_x |= R200_TEXCOORD_PROJ;
+	    }
+	    rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+	    rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+	    /* enabling texturing when unit isn't correctly configured may not be safe */
+	    if (targetbit)
+	       rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+	 }
+      }
+
+   } else {
+      /* setup 1st pass */
+      for (reg = 0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+	 GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled;
+	 R200_STATECHANGE( rmesa, tex[reg] );
+	 GLuint txformat_multi = 0;
+	 if (shader->SetupInst[0][reg].Opcode) {
+	    txformat_multi |= (shader->SetupInst[0][reg].src - GL_TEXTURE0_ARB)
+		<< R200_PASS1_ST_ROUTE_SHIFT;
+	    if (shader->SetupInst[0][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+	       txformat_multi |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+	       if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+		  shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+		  txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+	       }
+	       else {
+		  txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+	       }
+	       rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg;
+	    }
+	    else if (targetbit == TEXTURE_3D_BIT) {
+	       txformat_multi |= R200_PASS1_TEXCOORD_VOLUME;
+	    }
+	    else if (targetbit == TEXTURE_CUBE_BIT) {
+	       txformat_multi |= R200_PASS1_TEXCOORD_CUBIC_ENV;
+	    }
+	    else if (shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+		  shader->SetupInst[0][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+		  txformat_multi |= R200_PASS1_TEXCOORD_NONPROJ;
+	    }
+	    else {
+	       txformat_multi |= R200_PASS1_TEXCOORD_PROJ;
+	    }
+	    if (targetbit)
+	       rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_0_ENABLE << reg;
+	 }
+         rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+      }
+
+      /* setup 2nd pass */
+      for (reg=0; reg < R200_MAX_TEXTURE_UNITS; reg++) {
+	 GLbitfield targetbit = ctx->Texture.Unit[reg]._ReallyEnabled;
+	 if (shader->SetupInst[1][reg].Opcode) {
+	    GLuint coord = shader->SetupInst[1][reg].src;
+	    GLuint txformat = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT]
+		& ~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+	    GLuint txformat_x = rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] & ~R200_TEXCOORD_MASK;
+	    R200_STATECHANGE( rmesa, tex[reg] );
+	    if (shader->SetupInst[1][reg].Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
+	       txformat |= R200_TXFORMAT_LOOKUP_DISABLE;
+	       txformat_x |= R200_TEXCOORD_VOLUME;
+	       if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+		  shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+	          txformat_x |= R200_TEXCOORD_VOLUME;
+	       }
+	       else {
+		  txformat_x |= R200_TEXCOORD_PROJ;
+	       }
+	       rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+	    }
+	    else if (targetbit == TEXTURE_3D_BIT) {
+	       txformat_x |= R200_TEXCOORD_VOLUME;
+	    }
+	    else if (targetbit == TEXTURE_CUBE_BIT) {
+	       txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+	    }
+	    else if (shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STR_ATI ||
+	       shader->SetupInst[1][reg].swizzle == GL_SWIZZLE_STQ_ATI) {
+	       txformat_x |= R200_TEXCOORD_NONPROJ;
+	    }
+	    else {
+	       txformat_x |= R200_TEXCOORD_PROJ;
+	    }
+	    if (coord >= GL_REG_0_ATI) {
+	       GLuint txformat_multi = rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL];
+	       txformat_multi |= (coord - GL_REG_0_ATI + 2) << R200_PASS2_COORDS_REG_SHIFT;
+	       rmesa->hw.tex[reg].cmd[TEX_PP_TXMULTI_CTL] = txformat_multi;
+	       rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= 1 <<
+		  (R200_PPX_OUTPUT_REG_0_SHIFT + coord - GL_REG_0_ATI);
+	    } else {
+	       txformat |= (coord - GL_TEXTURE0_ARB) << R200_TXFORMAT_ST_ROUTE_SHIFT;
+	    }
+	    rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT_X] = txformat_x;
+	    rmesa->hw.tex[reg].cmd[TEX_PP_TXFORMAT] = txformat;
+	    if (targetbit)
+	       rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << reg;
+	 }
+      }
+   }
+}
+
+static void r200UpdateFSConstants( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
+   GLuint i;
+
+   /* update constants */
+   R200_STATECHANGE(rmesa, atf);
+   for (i = 0; i < 8; i++)
+   {
+      GLubyte con_byte[4];
+      if ((shader->LocalConstDef >> i) & 1) {
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[0], shader->Constants[i][0]);
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[1], shader->Constants[i][1]);
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[2], shader->Constants[i][2]);
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[3], shader->Constants[i][3]);
+      }
+      else {
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[0], ctx->ATIFragmentShader.GlobalConstants[i][0]);
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[1], ctx->ATIFragmentShader.GlobalConstants[i][1]);
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]);
+	 CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]);
+      }
+      rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor (
+	 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] );
+   }
+}
+
+/* update routing, constants and arithmetic
+ * constants need to be updated always (globals can change, no separate notification)
+ * routing needs to be updated always too (non-shader code will overwrite state, plus
+ * some of the routing depends on what sort of texture is bound)
+ * for both of them, we need to update anyway because of disabling/enabling ati_fs which
+ * we'd need to track otherwise
+ * arithmetic is only updated if current shader changes (and probably the data should be
+ * stored in some DriverData object attached to the mesa atifs object, i.e. binding a
+ * shader wouldn't force us to "recompile" the shader).
+ */
+void r200UpdateFragmentShader( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   r200UpdateFSConstants( ctx );
+   r200UpdateFSRouting( ctx );
+   if (rmesa->afs_loaded != ctx->ATIFragmentShader.Current)
+      r200UpdateFSArith( ctx );
+}
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c
new file mode 100644
index 0000000000..b72f69b7f4
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.c
@@ -0,0 +1,396 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "swrast/swrast.h"
+
+
+
+#include "radeon_common.h"
+#include "radeon_lock.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "radeon_reg.h"
+
+#include "vblank.h"
+
+#define R200_TIMEOUT             512
+#define R200_IDLE_RETRY           16
+
+static void r200KernelClear(GLcontext *ctx, GLuint flags)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLint cx, cy, cw, ch, ret;
+   GLuint i;
+
+   radeonEmitState(&rmesa->radeon);
+
+   LOCK_HARDWARE( &rmesa->radeon );
+
+   /* Throttle the number of clear ioctls we do.
+    */
+   while ( 1 ) {
+      drm_radeon_getparam_t gp;
+      int ret;
+      int clear;
+
+      gp.param = RADEON_PARAM_LAST_CLEAR;
+      gp.value = (int *)&clear;
+      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+		      DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+
+      if ( ret ) {
+	 fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret );
+	 exit(1);
+      }
+
+      /* Clear throttling needs more thought.
+       */
+      if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) {
+	 break;
+      }
+
+      if (rmesa->radeon.do_usleeps) {
+	 UNLOCK_HARDWARE( &rmesa->radeon );
+	 DO_USLEEP( 1 );
+	 LOCK_HARDWARE( &rmesa->radeon );
+      }
+   }
+
+   /* Send current state to the hardware */
+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+
+
+  /* compute region after locking: */
+   cx = ctx->DrawBuffer->_Xmin;
+   cy = ctx->DrawBuffer->_Ymin;
+   cw = ctx->DrawBuffer->_Xmax - cx;
+   ch = ctx->DrawBuffer->_Ymax - cy;
+
+   /* Flip top to bottom */
+   cx += dPriv->x;
+   cy  = dPriv->y + dPriv->h - cy - ch;
+   for ( i = 0 ; i < dPriv->numClipRects ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
+      drm_radeon_clear_t clear;
+      drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+      GLint n = 0;
+
+      if (cw != dPriv->w || ch != dPriv->h) {
+         /* clear subregion */
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
+	 }
+      } else {
+         /* clear whole window */
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
+	 }
+      }
+
+      rmesa->radeon.sarea->nbox = n;
+
+      clear.flags       = flags;
+      clear.clear_color = rmesa->radeon.state.color.clear;
+      clear.clear_depth = rmesa->radeon.state.depth.clear;	/* needed for hyperz */
+      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
+      clear.depth_boxes = depth_boxes;
+
+      n--;
+      b = rmesa->radeon.sarea->boxes;
+      for ( ; n >= 0 ; n-- ) {
+	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
+	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
+	 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
+	 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
+	 depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear;
+      }
+
+      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
+			     &clear, sizeof(clear));
+
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( &rmesa->radeon );
+	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+	 exit( 1 );
+      }
+   }
+   UNLOCK_HARDWARE( &rmesa->radeon );
+}
+/* ================================================================
+ * Buffer clear
+ */
+static void r200Clear( GLcontext *ctx, GLbitfield mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLuint flags = 0;
+   GLuint color_mask = 0;
+   GLuint orig_mask = mask;
+
+   if ( R200_DEBUG & RADEON_IOCTL ) {
+	   if (rmesa->radeon.sarea)
+	       fprintf( stderr, "r200Clear %x %d\n", mask, rmesa->radeon.sarea->pfCurrentPage);
+	   else
+	       fprintf( stderr, "r200Clear %x radeon->sarea is NULL\n", mask);
+   }
+
+   {
+      LOCK_HARDWARE( &rmesa->radeon );
+      UNLOCK_HARDWARE( &rmesa->radeon );
+      if ( dPriv->numClipRects == 0 )
+	 return;
+   }
+
+   radeonFlush( ctx );
+
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+      flags |= RADEON_FRONT;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+      flags |= RADEON_BACK;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_DEPTH ) {
+      flags |= RADEON_DEPTH;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   if ( (mask & BUFFER_BIT_STENCIL) ) {
+      flags |= RADEON_STENCIL;
+      mask &= ~BUFFER_BIT_STENCIL;
+   }
+
+   if ( mask ) {
+      if (R200_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+      _swrast_Clear( ctx, mask );
+   }
+
+   if ( !flags )
+      return;
+
+   if (rmesa->using_hyperz) {
+      flags |= RADEON_USE_COMP_ZBUF;
+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
+	 flags |= RADEON_USE_HIERZ; */
+      if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+	    ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) {
+	  flags |= RADEON_CLEAR_FASTZ;
+      }
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+      radeonUserClear(ctx, orig_mask);
+   else {
+      r200KernelClear(ctx, flags);
+      rmesa->radeon.hw.all_dirty = GL_TRUE;
+   }
+}
+
+/* This version of AllocateMemoryMESA allocates only GART memory, and
+ * only does so after the point at which the driver has been
+ * initialized.
+ *
+ * Theoretically a valid context isn't required.  However, in this
+ * implementation, it is, as I'm using the hardware lock to protect
+ * the kernel data structures, and the current context to get the
+ * device fd.
+ */
+void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size,
+			     GLfloat readfreq, GLfloat writefreq,
+			     GLfloat priority)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa;
+   int region_offset;
+   drm_radeon_mem_alloc_t alloc;
+   int ret;
+
+   if (R200_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq,
+	      writefreq, priority);
+
+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map)
+      return NULL;
+
+   if (getenv("R200_NO_ALLOC"))
+      return NULL;
+
+   alloc.region = RADEON_MEM_REGION_GART;
+   alloc.alignment = 0;
+   alloc.size = size;
+   alloc.region_offset = &region_offset;
+
+   ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd,
+			      DRM_RADEON_ALLOC,
+			      &alloc, sizeof(alloc));
+
+   if (ret) {
+      fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret);
+      return NULL;
+   }
+
+   {
+      char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+      return (void *)(region_start + region_offset);
+   }
+}
+
+
+/* Called via glXFreeMemoryMESA() */
+void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa;
+   ptrdiff_t region_offset;
+   drm_radeon_mem_free_t memfree;
+   int ret;
+
+   if (R200_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s %p\n", __FUNCTION__, pointer);
+
+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) {
+      fprintf(stderr, "%s: no context\n", __FUNCTION__);
+      return;
+   }
+
+   region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+
+   if (region_offset < 0 ||
+       region_offset > rmesa->radeon.radeonScreen->gartTextures.size) {
+      fprintf(stderr, "offset %d outside range 0..%d\n", region_offset,
+	      rmesa->radeon.radeonScreen->gartTextures.size);
+      return;
+   }
+
+   memfree.region = RADEON_MEM_REGION_GART;
+   memfree.region_offset = region_offset;
+
+   ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd,
+			  DRM_RADEON_FREE,
+			  &memfree, sizeof(memfree));
+
+   if (ret)
+      fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret);
+}
+
+/* Called via glXGetMemoryOffsetMESA() */
+GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   r200ContextPtr rmesa;
+   GLuint card_offset;
+
+   if (!ctx || !(rmesa = R200_CONTEXT(ctx)) ) {
+      fprintf(stderr, "%s: no context\n", __FUNCTION__);
+      return ~0;
+   }
+
+   if (!r200IsGartMemory( rmesa, pointer, 0 ))
+      return ~0;
+
+   card_offset = r200GartOffsetFromVirtual( rmesa, pointer );
+
+   return card_offset - rmesa->radeon.radeonScreen->gart_base;
+}
+
+GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+			   GLint size )
+{
+   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+   int valid = (size >= 0 &&
+		offset >= 0 &&
+		offset + size < rmesa->radeon.radeonScreen->gartTextures.size);
+
+   if (R200_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid );
+
+   return valid;
+}
+
+
+GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer )
+{
+   ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map;
+
+   if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size)
+      return ~0;
+   else
+      return rmesa->radeon.radeonScreen->gart_texture_offset + offset;
+}
+
+
+
+void r200InitIoctlFuncs( struct dd_function_table *functions )
+{
+    functions->Clear = r200Clear;
+    functions->Finish = radeonFinish;
+    functions->Flush = radeonFlush;
+}
+
diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h
new file mode 100644
index 0000000000..8d51aefa04
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_ioctl.h
@@ -0,0 +1,178 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_IOCTL_H__
+#define __R200_IOCTL_H__
+
+#include "main/simple_list.h"
+#include "radeon_dri.h"
+
+#include "radeon_bocs_wrapper.h"
+
+#include "xf86drm.h"
+#include "drm.h"
+#include "radeon_drm.h"
+
+extern void r200EmitMaxVtxIndex(r200ContextPtr rmesa, int count);
+extern void r200EmitVertexAOS( r200ContextPtr rmesa,
+			       GLuint vertex_size,
+			       struct radeon_bo *bo,
+			       GLuint offset );
+
+extern void r200EmitVbufPrim( r200ContextPtr rmesa,
+				GLuint primitive,
+				GLuint vertex_nr );
+
+extern void r200FlushElts(GLcontext *ctx);
+
+extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa,
+					   GLuint primitive,
+					   GLuint min_nr );
+
+extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset);
+
+extern void r200InitIoctlFuncs( struct dd_function_table *functions );
+
+extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq,
+				   GLfloat writefreq, GLfloat priority );
+extern void r200FreeMemoryMESA( __DRIscreen *screen, GLvoid *pointer );
+extern GLuint r200GetMemoryOffsetMESA( __DRIscreen *screen, const GLvoid *pointer );
+
+extern GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer,
+				   GLint size );
+
+extern GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, 
+					 const GLvoid *pointer );
+
+void r200SetUpAtomList( r200ContextPtr rmesa );
+
+/* ================================================================
+ * Helper macros:
+ */
+
+/* Close off the last primitive, if it exists.
+ */
+#define R200_NEWPRIM( rmesa )			\
+do {						\
+   if ( rmesa->radeon.dma.flush )			\
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
+} while (0)
+
+/* Can accomodate several state changes and primitive changes without
+ * actually firing the buffer.
+ */
+#define R200_STATECHANGE( rmesa, ATOM )			\
+do {								\
+   R200_NEWPRIM( rmesa );					\
+   rmesa->hw.ATOM.dirty = GL_TRUE;				\
+   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
+} while (0)
+
+#define R200_SET_STATE( rmesa, ATOM, index, newvalue ) 	\
+  do {	\
+    uint32_t __index = (index); \
+    uint32_t __dword = (newvalue); \
+    if (__dword != (rmesa)->hw.ATOM.cmd[__index]) { \
+      R200_STATECHANGE( (rmesa), ATOM ); \
+      (rmesa)->hw.ATOM.cmd[__index] = __dword; \
+    } \
+  } while(0)
+
+#define R200_DB_STATE( ATOM )			        \
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
+	   rmesa->hw.ATOM.cmd_size * 4)
+
+static INLINE int R200_DB_STATECHANGE( 
+   r200ContextPtr rmesa,
+   struct radeon_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      GLuint *tmp;
+      R200_NEWPRIM( rmesa );
+      atom->dirty = GL_TRUE;
+      rmesa->radeon.hw.is_dirty = GL_TRUE;
+      tmp = atom->cmd; 
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+
+
+/* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+ * are available, you will also be adding an rmesa->state.max_state_size because
+ * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+ */
+#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ	(5)
+#define ELTS_BUFSZ(nr)	(12 + nr * 2)
+#define VBUF_BUFSZ	(3)
+#define SCISSOR_BUFSZ	(8)
+#define INDEX_BUFSZ	(8+2)
+
+static inline uint32_t cmdpacket3(int cmd_type)
+{
+  drm_radeon_cmd_header_t cmd;
+
+  cmd.i = 0;
+  cmd.header.cmd_type = cmd_type;
+
+  return (uint32_t)cmd.i;
+
+}
+
+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+
+#endif /* __R200_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_maos.c b/src/mesa/drivers/dri/r200/r200_maos.c
new file mode 100644
index 0000000000..23e1c962a1
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_maos.c
@@ -0,0 +1,15 @@
+
+
+/* Currently, can only use arrays, verts are not implemented, though
+ * verts are suspected to be faster.
+ * To get an idea how the verts path works, look at the radeon implementation.
+ */
+#include <string.h>
+ 
+#include "r200_context.h"
+#define R200_MAOS_VERTS 0
+#if (R200_MAOS_VERTS)
+#include "r200_maos_verts.c"
+#else
+#include "r200_maos_arrays.c"
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_maos.h b/src/mesa/drivers/dri/r200/r200_maos.h
new file mode 100644
index 0000000000..16a70475e1
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_maos.h
@@ -0,0 +1,42 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_MAOS_H__
+#define __R200_MAOS_H__
+
+#include "r200_context.h"
+
+extern void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev );
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
new file mode 100644
index 0000000000..aecba7f894
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
@@ -0,0 +1,227 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_swtcl.h"
+#include "r200_maos.h"
+#include "r200_tcl.h"
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )					\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+			      : "0" (nr),				\
+			        "D" ((long)dst),			\
+			        "S" ((long)src) );			\
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )		\
+do {						\
+   int j;					\
+   for ( j = 0 ; j < nr ; j++ )			\
+      dst[j] = ((int *)src)[j];			\
+   dst += nr;					\
+} while (0)
+#endif
+
+static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
+			     GLvoid *data, int stride, int count)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	GLfloat *out;
+	int i;
+	int size = 1;
+
+	if (stride == 0) {
+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+		count = 1;
+		aos->stride = 0;
+	} else {
+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+		aos->stride = size;
+	}
+
+	aos->components = size;
+	aos->count = count;
+
+	radeon_bo_map(aos->bo, 1);
+	out = (GLfloat*)((char*)aos->bo->ptr + aos->offset);
+	for (i = 0; i < count; i++) {
+	  out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data );
+	  out++;
+	  data += stride;
+	}
+	radeon_bo_unmap(aos->bo);
+}
+
+/* Emit any changed arrays to new GART memory, re-emit a packet to
+ * update the arrays.  
+ */
+void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   GLuint nr = 0;
+   GLuint vfmt0 = 0, vfmt1 = 0;
+   GLuint count = VB->Count;
+   GLuint i, emitsize;
+
+   //   fprintf(stderr,"emit arrays\n");
+   for ( i = 0; i < 15; i++ ) {
+      GLubyte attrib = vimap_rev[i];
+      if (attrib != 255) {
+	 switch (i) {
+	 case 0:
+	    emitsize = (VB->AttribPtr[attrib]->size);
+	    switch (emitsize) {
+	    case 4:
+	       vfmt0 |= R200_VTX_W0;
+	       /* fallthrough */
+	    case 3:
+	       vfmt0 |= R200_VTX_Z0;
+	       break;
+	    case 2:
+	       break;
+	    default: assert(0);
+	    }
+	    break;
+	 case 1:
+	    assert(attrib == VERT_ATTRIB_WEIGHT);
+	    emitsize = (VB->AttribPtr[attrib]->size);
+	    vfmt0 |= emitsize << R200_VTX_WEIGHT_COUNT_SHIFT;
+	    break;
+	 case 2:
+	    assert(attrib == VERT_ATTRIB_NORMAL);
+	    emitsize = 3;
+	    vfmt0 |= R200_VTX_N0;
+	    break;
+	 case 3:
+	    /* special handling to fix up fog. Will get us into trouble with vbos...*/
+	    assert(attrib == VERT_ATTRIB_FOG);
+	    if (!rmesa->radeon.tcl.aos[i].bo) {
+	       if (ctx->VertexProgram._Enabled)
+		  rcommon_emit_vector( ctx,
+				       &(rmesa->radeon.tcl.aos[nr]),
+				       (char *)VB->AttribPtr[attrib]->data,
+				       1,
+				       VB->AttribPtr[attrib]->stride,
+				       count);
+	       else
+		 r200_emit_vecfog( ctx,
+				   &(rmesa->radeon.tcl.aos[nr]),
+				   (char *)VB->AttribPtr[attrib]->data,
+				   VB->AttribPtr[attrib]->stride,
+				   count);
+	    }
+	    vfmt0 |= R200_VTX_DISCRETE_FOG;
+	    goto after_emit;
+	    break;
+	 case 4:
+	 case 5:
+	 case 6:
+	 case 7:
+	    if (VB->AttribPtr[attrib]->size == 4 &&
+	       (VB->AttribPtr[attrib]->stride != 0 ||
+		VB->AttribPtr[attrib]->data[0][3] != 1.0)) emitsize = 4;
+	    else emitsize = 3;
+	    if (emitsize == 4)
+	       vfmt0 |= R200_VTX_FP_RGBA << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2);
+	    else {
+	       vfmt0 |= R200_VTX_FP_RGB << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2);
+	    }
+	    break;
+	 case 8:
+	 case 9:
+	 case 10:
+	 case 11:
+	 case 12:
+	 case 13:
+	    emitsize = VB->AttribPtr[attrib]->size;
+	    vfmt1 |= emitsize << (R200_VTX_TEX0_COMP_CNT_SHIFT + (i - 8) * 3);
+	    break;
+	 case 14:
+	    emitsize = VB->AttribPtr[attrib]->size >= 2 ? VB->AttribPtr[attrib]->size : 2;
+	    switch (emitsize) {
+	    case 2:
+	       vfmt0 |= R200_VTX_XY1;
+	       /* fallthrough */
+	    case 3:
+	       vfmt0 |= R200_VTX_Z1;
+	       /* fallthrough */
+	    case 4:
+	       vfmt0 |= R200_VTX_W1;
+	    break;
+	    }
+	 default:
+	    assert(0);
+	 }
+	 if (!rmesa->radeon.tcl.aos[nr].bo) {
+	   rcommon_emit_vector( ctx,
+				&(rmesa->radeon.tcl.aos[nr]),
+				(char *)VB->AttribPtr[attrib]->data,
+				emitsize,
+				VB->AttribPtr[attrib]->stride,
+				count );
+	 }
+after_emit:
+	 assert(nr < 12);
+	 nr++;
+      }
+   }
+
+   if (vfmt0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] ||
+       vfmt1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) {
+      R200_STATECHANGE( rmesa, vtx );
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = vfmt0;
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = vfmt1;
+   }
+
+   rmesa->radeon.tcl.aos_count = nr;
+}
+
diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h
new file mode 100644
index 0000000000..e331be223b
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_reg.h
@@ -0,0 +1,1597 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#ifndef _R200_REG_H_
+#define _R200_REG_H_
+
+#define R200_PP_MISC                      0x1c14 
+#define     R200_REF_ALPHA_MASK        0x000000ff
+#define     R200_ALPHA_TEST_FAIL       (0 << 8)
+#define     R200_ALPHA_TEST_LESS       (1 << 8)
+#define     R200_ALPHA_TEST_LEQUAL     (2 << 8)
+#define     R200_ALPHA_TEST_EQUAL      (3 << 8)
+#define     R200_ALPHA_TEST_GEQUAL     (4 << 8)
+#define     R200_ALPHA_TEST_GREATER    (5 << 8)
+#define     R200_ALPHA_TEST_NEQUAL     (6 << 8)
+#define     R200_ALPHA_TEST_PASS       (7 << 8)
+#define     R200_ALPHA_TEST_OP_MASK    (7 << 8)
+#define     R200_CHROMA_FUNC_FAIL      (0 << 16)
+#define     R200_CHROMA_FUNC_PASS      (1 << 16)
+#define     R200_CHROMA_FUNC_NEQUAL    (2 << 16)
+#define     R200_CHROMA_FUNC_EQUAL     (3 << 16)
+#define     R200_CHROMA_KEY_NEAREST    (0 << 18)
+#define     R200_CHROMA_KEY_ZERO       (1 << 18)
+#define     R200_RIGHT_HAND_CUBE_D3D   (0 << 24)
+#define     R200_RIGHT_HAND_CUBE_OGL   (1 << 24)
+#define R200_PP_FOG_COLOR                 0x1c18 
+#define     R200_FOG_COLOR_MASK        0x00ffffff
+#define     R200_FOG_VERTEX            (0 << 24)
+#define     R200_FOG_TABLE             (1 << 24)
+#define     R200_FOG_USE_DEPTH         (0 << 25)
+#define     R200_FOG_USE_W             (1 << 25)
+#define     R200_FOG_USE_DIFFUSE_ALPHA (2 << 25)
+#define     R200_FOG_USE_SPEC_ALPHA    (3 << 25)
+#define     R200_FOG_USE_VTX_FOG       (4 << 25)
+#define     R200_FOG_USE_MASK          (7 << 25)
+#define R200_RE_SOLID_COLOR               0x1c1c 
+#define R200_RB3D_BLENDCNTL               0x1c20
+#define     R200_COMB_FCN_MASK                    (7  << 12)
+#define     R200_COMB_FCN_ADD_CLAMP               (0  << 12)
+#define     R200_COMB_FCN_ADD_NOCLAMP             (1  << 12)
+#define     R200_COMB_FCN_SUB_CLAMP               (2  << 12)
+#define     R200_COMB_FCN_SUB_NOCLAMP             (3  << 12)
+#define     R200_COMB_FCN_MIN                     (4  << 12)
+#define     R200_COMB_FCN_MAX                     (5  << 12)
+#define     R200_COMB_FCN_RSUB_CLAMP              (6  << 12)
+#define     R200_COMB_FCN_RSUB_NOCLAMP            (7  << 12)
+#define       R200_BLEND_GL_ZERO                  (32)
+#define       R200_BLEND_GL_ONE                   (33)
+#define       R200_BLEND_GL_SRC_COLOR             (34)
+#define       R200_BLEND_GL_ONE_MINUS_SRC_COLOR   (35)
+#define       R200_BLEND_GL_DST_COLOR             (36)
+#define       R200_BLEND_GL_ONE_MINUS_DST_COLOR   (37)
+#define       R200_BLEND_GL_SRC_ALPHA             (38)
+#define       R200_BLEND_GL_ONE_MINUS_SRC_ALPHA   (39)
+#define       R200_BLEND_GL_DST_ALPHA             (40)
+#define       R200_BLEND_GL_ONE_MINUS_DST_ALPHA   (41)
+#define       R200_BLEND_GL_SRC_ALPHA_SATURATE    (42) /* src factor only */
+#define       R200_BLEND_GL_CONST_COLOR           (43)
+#define       R200_BLEND_GL_ONE_MINUS_CONST_COLOR (44)
+#define       R200_BLEND_GL_CONST_ALPHA           (45)
+#define       R200_BLEND_GL_ONE_MINUS_CONST_ALPHA (46)
+#define       R200_BLEND_MASK                     (63)
+#define     R200_SRC_BLEND_SHIFT                  (16)
+#define     R200_DST_BLEND_SHIFT                  (24)
+#define R200_RB3D_DEPTHOFFSET             0x1c24
+#define R200_RB3D_DEPTHPITCH              0x1c28
+#define     R200_DEPTHPITCH_MASK         0x00001ff8
+#define     R200_DEPTH_HYPERZ            (3 << 16)
+#define     R200_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
+#define     R200_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
+#define     R200_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
+#define R200_RB3D_ZSTENCILCNTL            0x1c2c 
+#define     R200_DEPTH_FORMAT_MASK          (0xf << 0)
+#define     R200_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_INT_Z   (2  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_FLOAT_Z (3  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_INT_Z   (4  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_FLOAT_Z (5  <<  0)
+#define     R200_DEPTH_FORMAT_24BIT_FLOAT_W (9  <<  0)
+#define     R200_DEPTH_FORMAT_32BIT_FLOAT_W (11 <<  0)
+#define     R200_Z_TEST_NEVER               (0  <<  4)
+#define     R200_Z_TEST_LESS                (1  <<  4)
+#define     R200_Z_TEST_LEQUAL              (2  <<  4)
+#define     R200_Z_TEST_EQUAL               (3  <<  4)
+#define     R200_Z_TEST_GEQUAL              (4  <<  4)
+#define     R200_Z_TEST_GREATER             (5  <<  4)
+#define     R200_Z_TEST_NEQUAL              (6  <<  4)
+#define     R200_Z_TEST_ALWAYS              (7  <<  4)
+#define     R200_Z_TEST_MASK                (7  <<  4)
+#define     R200_Z_HIERARCHY_ENABLE         (1  <<  8)
+#define     R200_STENCIL_TEST_NEVER         (0  << 12)
+#define     R200_STENCIL_TEST_LESS          (1  << 12)
+#define     R200_STENCIL_TEST_LEQUAL        (2  << 12)
+#define     R200_STENCIL_TEST_EQUAL         (3  << 12)
+#define     R200_STENCIL_TEST_GEQUAL        (4  << 12)
+#define     R200_STENCIL_TEST_GREATER       (5  << 12)
+#define     R200_STENCIL_TEST_NEQUAL        (6  << 12)
+#define     R200_STENCIL_TEST_ALWAYS        (7  << 12)
+#define     R200_STENCIL_TEST_MASK          (0x7 << 12)
+#define     R200_STENCIL_FAIL_KEEP          (0  << 16)
+#define     R200_STENCIL_FAIL_ZERO          (1  << 16)
+#define     R200_STENCIL_FAIL_REPLACE       (2  << 16)
+#define     R200_STENCIL_FAIL_INC           (3  << 16)
+#define     R200_STENCIL_FAIL_DEC           (4  << 16)
+#define     R200_STENCIL_FAIL_INVERT        (5  << 16)
+#define     R200_STENCIL_FAIL_INC_WRAP      (6  << 16)
+#define     R200_STENCIL_FAIL_DEC_WRAP      (7  << 16)
+#define     R200_STENCIL_FAIL_MASK          (0x7 << 16)
+#define     R200_STENCIL_ZPASS_KEEP         (0  << 20)
+#define     R200_STENCIL_ZPASS_ZERO         (1  << 20)
+#define     R200_STENCIL_ZPASS_REPLACE      (2  << 20)
+#define     R200_STENCIL_ZPASS_INC          (3  << 20)
+#define     R200_STENCIL_ZPASS_DEC          (4  << 20)
+#define     R200_STENCIL_ZPASS_INVERT       (5  << 20)
+#define     R200_STENCIL_ZPASS_INC_WRAP     (6  << 20)
+#define     R200_STENCIL_ZPASS_DEC_WRAP     (7  << 20)
+#define     R200_STENCIL_ZPASS_MASK         (0x7 << 20)
+#define     R200_STENCIL_ZFAIL_KEEP         (0  << 24)
+#define     R200_STENCIL_ZFAIL_ZERO         (1  << 24)
+#define     R200_STENCIL_ZFAIL_REPLACE      (2  << 24)
+#define     R200_STENCIL_ZFAIL_INC          (3  << 24)
+#define     R200_STENCIL_ZFAIL_DEC          (4  << 24)
+#define     R200_STENCIL_ZFAIL_INVERT       (5  << 24)
+#define     R200_STENCIL_ZFAIL_INC_WRAP     (6  << 24)
+#define     R200_STENCIL_ZFAIL_DEC_WRAP     (7  << 24)
+#define     R200_STENCIL_ZFAIL_MASK         (0x7 << 24)
+#define     R200_Z_COMPRESSION_ENABLE       (1  << 28)
+#define     R200_FORCE_Z_DIRTY              (1  << 29)
+#define     R200_Z_WRITE_ENABLE             (1  << 30)
+#define     R200_Z_DECOMPRESSION_ENABLE     (1  << 31)
+/*gap*/
+#define R200_PP_CNTL                      0x1c38 
+#define     R200_TEX_0_ENABLE                         0x00000010
+#define     R200_TEX_1_ENABLE                         0x00000020
+#define     R200_TEX_2_ENABLE                         0x00000040
+#define     R200_TEX_3_ENABLE                         0x00000080
+#define     R200_TEX_4_ENABLE                         0x00000100
+#define     R200_TEX_5_ENABLE                         0x00000200
+#define     R200_TEX_ENABLE_MASK                      0x000003f0
+#define     R200_FILTER_ROUND_MODE_MASK               0x00000400
+#define     R200_TEX_BLEND_7_ENABLE                   0x00000800
+#define     R200_TEX_BLEND_0_ENABLE                   0x00001000
+#define     R200_TEX_BLEND_1_ENABLE                   0x00002000
+#define     R200_TEX_BLEND_2_ENABLE                   0x00004000
+#define     R200_TEX_BLEND_3_ENABLE                   0x00008000
+#define     R200_TEX_BLEND_4_ENABLE                   0x00010000
+#define     R200_TEX_BLEND_5_ENABLE                   0x00020000
+#define     R200_TEX_BLEND_6_ENABLE                   0x00040000
+#define     R200_TEX_BLEND_ENABLE_MASK                0x0007f800
+#define     R200_TEX_BLEND_0_ENABLE_SHIFT             (12)
+#define     R200_MULTI_PASS_ENABLE                    0x00080000
+#define     R200_SPECULAR_ENABLE                      0x00200000
+#define     R200_FOG_ENABLE                           0x00400000
+#define     R200_ALPHA_TEST_ENABLE                    0x00800000
+#define     R200_ANTI_ALIAS_NONE                       0x00000000
+#define     R200_ANTI_ALIAS_LINE                       0x01000000
+#define     R200_ANTI_ALIAS_POLY                       0x02000000
+#define     R200_ANTI_ALIAS_MASK                       0x03000000
+#define R200_RB3D_CNTL                    0x1c3c 
+#define     R200_ALPHA_BLEND_ENABLE       (1  <<  0)
+#define     R200_PLANE_MASK_ENABLE        (1  <<  1)
+#define     R200_DITHER_ENABLE            (1  <<  2)
+#define     R200_ROUND_ENABLE             (1  <<  3)
+#define     R200_SCALE_DITHER_ENABLE      (1  <<  4)
+#define     R200_DITHER_INIT              (1  <<  5)
+#define     R200_ROP_ENABLE               (1  <<  6)
+#define     R200_STENCIL_ENABLE           (1  <<  7)
+#define     R200_Z_ENABLE                 (1  <<  8)
+#define     R200_DEPTH_XZ_OFFEST_ENABLE   (1  <<  9)
+#define     R200_COLOR_FORMAT_ARGB1555    (3  << 10)
+#define     R200_COLOR_FORMAT_RGB565      (4  << 10)
+#define     R200_COLOR_FORMAT_ARGB8888    (6  << 10)
+#define     R200_COLOR_FORMAT_RGB332      (7  << 10)
+#define     R200_COLOR_FORMAT_Y8          (8  << 10)
+#define     R200_COLOR_FORMAT_RGB8        (9  << 10)
+#define     R200_COLOR_FORMAT_YUV422_VYUY (11 << 10)
+#define     R200_COLOR_FORMAT_YUV422_YVYU (12 << 10)
+#define     R200_COLOR_FORMAT_aYUV444     (14 << 10)
+#define     R200_COLOR_FORMAT_ARGB4444    (15 << 10)
+#define     R200_CLRCMP_FLIP_ENABLE       (1  << 14)
+#define     R200_SEPARATE_ALPHA_ENABLE    (1  << 16)
+#define R200_RB3D_COLOROFFSET             0x1c40 
+#define     R200_COLOROFFSET_MASK      0xfffffff0
+#define R200_RE_WIDTH_HEIGHT              0x1c44 
+#define     R200_RE_WIDTH_SHIFT        0
+#define     R200_RE_HEIGHT_SHIFT       16
+#define R200_RB3D_COLORPITCH              0x1c48 
+#define     R200_COLORPITCH_MASK         0x000001ff8
+#define     R200_COLOR_TILE_ENABLE       (1 << 16)
+#define     R200_COLOR_MICROTILE_ENABLE  (1 << 17)
+#define     R200_COLOR_ENDIAN_NO_SWAP    (0 << 18)
+#define     R200_COLOR_ENDIAN_WORD_SWAP  (1 << 18)
+#define     R200_COLOR_ENDIAN_DWORD_SWAP (2 << 18)
+#define R200_SE_CNTL                      0x1c4c 
+#define     R200_FFACE_CULL_CW          (0 <<  0)
+#define     R200_FFACE_CULL_CCW         (1 <<  0)
+#define     R200_FFACE_CULL_DIR_MASK    (1 <<  0)
+#define     R200_BFACE_CULL             (0 <<  1)
+#define     R200_BFACE_SOLID            (3 <<  1)
+#define     R200_FFACE_CULL             (0 <<  3)
+#define     R200_FFACE_SOLID            (3 <<  3)
+#define     R200_FFACE_CULL_MASK        (3 <<  3)
+#define     R200_FLAT_SHADE_VTX_0       (0 <<  6)
+#define     R200_FLAT_SHADE_VTX_1       (1 <<  6)
+#define     R200_FLAT_SHADE_VTX_2       (2 <<  6)
+#define     R200_FLAT_SHADE_VTX_LAST    (3 <<  6)
+#define     R200_DIFFUSE_SHADE_SOLID    (0 <<  8)
+#define     R200_DIFFUSE_SHADE_FLAT     (1 <<  8)
+#define     R200_DIFFUSE_SHADE_GOURAUD  (2 <<  8)
+#define     R200_DIFFUSE_SHADE_MASK     (3 <<  8)
+#define     R200_ALPHA_SHADE_SOLID      (0 << 10)
+#define     R200_ALPHA_SHADE_FLAT       (1 << 10)
+#define     R200_ALPHA_SHADE_GOURAUD    (2 << 10)
+#define     R200_ALPHA_SHADE_MASK       (3 << 10)
+#define     R200_SPECULAR_SHADE_SOLID   (0 << 12)
+#define     R200_SPECULAR_SHADE_FLAT    (1 << 12)
+#define     R200_SPECULAR_SHADE_GOURAUD (2 << 12)
+#define     R200_SPECULAR_SHADE_MASK    (3 << 12)
+#define     R200_FOG_SHADE_SOLID        (0 << 14)
+#define     R200_FOG_SHADE_FLAT         (1 << 14)
+#define     R200_FOG_SHADE_GOURAUD      (2 << 14)
+#define     R200_FOG_SHADE_MASK         (3 << 14)
+#define     R200_ZBIAS_ENABLE_POINT     (1 << 16)
+#define     R200_ZBIAS_ENABLE_LINE      (1 << 17)
+#define     R200_ZBIAS_ENABLE_TRI       (1 << 18)
+#define     R200_WIDELINE_ENABLE        (1 << 20)
+#define     R200_DISC_FOG_SHADE_SOLID   (0 << 24)
+#define     R200_DISC_FOG_SHADE_FLAT    (1 << 24)
+#define     R200_DISC_FOG_SHADE_GOURAUD (2 << 24)
+#define     R200_DISC_FOG_SHADE_MASK    (3 << 24)
+#define     R200_VTX_PIX_CENTER_D3D     (0 << 27)
+#define     R200_VTX_PIX_CENTER_OGL     (1 << 27)
+#define     R200_ROUND_MODE_TRUNC       (0 << 28)
+#define     R200_ROUND_MODE_ROUND       (1 << 28)
+#define     R200_ROUND_MODE_ROUND_EVEN  (2 << 28)
+#define     R200_ROUND_MODE_ROUND_ODD   (3 << 28)
+#define     R200_ROUND_PREC_16TH_PIX    (0 << 30)
+#define     R200_ROUND_PREC_8TH_PIX     (1 << 30)
+#define     R200_ROUND_PREC_4TH_PIX     (2 << 30)
+#define     R200_ROUND_PREC_HALF_PIX    (3 << 30)
+#define R200_RE_CNTL                      0x1c50 
+#define     R200_STIPPLE_ENABLE                     0x1
+#define     R200_SCISSOR_ENABLE                     0x2
+#define     R200_PATTERN_ENABLE                     0x4
+#define     R200_PERSPECTIVE_ENABLE                 0x8
+#define     R200_POINT_SMOOTH                       0x20
+#define     R200_VTX_STQ0_D3D                       0x00010000
+#define     R200_VTX_STQ1_D3D                       0x00040000
+#define     R200_VTX_STQ2_D3D                       0x00100000
+#define     R200_VTX_STQ3_D3D                       0x00400000
+#define     R200_VTX_STQ4_D3D                       0x01000000
+#define     R200_VTX_STQ5_D3D                       0x04000000
+/* gap */
+#define R200_RE_STIPPLE_ADDR              0x1cc8
+#define R200_RE_STIPPLE_DATA              0x1ccc
+#define R200_RE_LINE_PATTERN              0x1cd0 
+#define     R200_LINE_PATTERN_MASK             0x0000ffff
+#define     R200_LINE_REPEAT_COUNT_SHIFT       16
+#define     R200_LINE_PATTERN_START_SHIFT      24
+#define     R200_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28)
+#define     R200_LINE_PATTERN_BIG_BIT_ORDER    (1 << 28)
+#define     R200_LINE_PATTERN_AUTO_RESET       (1 << 29)
+#define R200_RE_LINE_STATE                0x1cd4 
+#define     R200_LINE_CURRENT_PTR_SHIFT       0
+#define     R200_LINE_CURRENT_COUNT_SHIFT     8
+#define R200_RE_SCISSOR_TL_0              0x1cd8
+#define R200_RE_SCISSOR_BR_0              0x1cdc
+#define R200_RE_SCISSOR_TL_1              0x1ce0
+#define R200_RE_SCISSOR_BR_1              0x1ce4
+#define R200_RE_SCISSOR_TL_2              0x1ce8
+#define R200_RE_SCISSOR_BR_2              0x1cec
+/* gap */
+#define R200_RB3D_DEPTHXY_OFFSET          0x1d60 
+#define     R200_DEPTHX_SHIFT  0
+#define     R200_DEPTHY_SHIFT  16
+/* gap */
+#define R200_RB3D_STENCILREFMASK          0x1d7c 
+#define     R200_STENCIL_REF_SHIFT           0
+#define     R200_STENCIL_REF_MASK            (0xff << 0)
+#define     R200_STENCIL_MASK_SHIFT          16
+#define     R200_STENCIL_VALUE_MASK          (0xff << 16)
+#define     R200_STENCIL_WRITEMASK_SHIFT     24
+#define     R200_STENCIL_WRITE_MASK          (0xff << 24)
+#define R200_RB3D_ROPCNTL                 0x1d80 
+#define     R200_ROP_MASK                    (15 << 8)
+#define     R200_ROP_CLEAR                   (0  << 8)
+#define     R200_ROP_NOR                     (1  << 8)
+#define     R200_ROP_AND_INVERTED            (2  << 8)
+#define     R200_ROP_COPY_INVERTED           (3  << 8)
+#define     R200_ROP_AND_REVERSE             (4  << 8)
+#define     R200_ROP_INVERT                  (5  << 8)
+#define     R200_ROP_XOR                     (6  << 8)
+#define     R200_ROP_NAND                    (7  << 8)
+#define     R200_ROP_AND                     (8  << 8)
+#define     R200_ROP_EQUIV                   (9  << 8)
+#define     R200_ROP_NOOP                    (10 << 8)
+#define     R200_ROP_OR_INVERTED             (11 << 8)
+#define     R200_ROP_COPY                    (12 << 8)
+#define     R200_ROP_OR_REVERSE              (13 << 8)
+#define     R200_ROP_OR                      (14 << 8)
+#define     R200_ROP_SET                     (15 << 8)
+#define R200_RB3D_PLANEMASK               0x1d84 
+/* gap */
+#define R200_SE_VPORT_XSCALE              0x1d98 
+#define R200_SE_VPORT_XOFFSET             0x1d9c 
+#define R200_SE_VPORT_YSCALE              0x1da0 
+#define R200_SE_VPORT_YOFFSET             0x1da4 
+#define R200_SE_VPORT_ZSCALE              0x1da8 
+#define R200_SE_VPORT_ZOFFSET             0x1dac 
+#define R200_SE_ZBIAS_FACTOR              0x1db0 
+#define R200_SE_ZBIAS_CONSTANT            0x1db4 
+#define R200_SE_LINE_WIDTH                0x1db8 
+#define	    R200_LINE_WIDTH_SHIFT                   0x00000000
+#define	    R200_MINPOINTSIZE_SHIFT                 0x00000010
+/* gap */
+#define R200_SE_VAP_CNTL                           0x2080
+#define     R200_VAP_TCL_ENABLE                       0x00000001
+#define     R200_VAP_PROG_VTX_SHADER_ENABLE           0x00000004
+#define     R200_VAP_SINGLE_BUF_STATE_ENABLE          0x00000010
+#define     R200_VAP_FORCE_W_TO_ONE                   0x00010000
+#define     R200_VAP_D3D_TEX_DEFAULT                  0x00020000
+#define     R200_VAP_VF_MAX_VTX_NUM__SHIFT            18
+#define     R200_VAP_DX_CLIP_SPACE_DEF                0x00400000
+#define R200_SE_VF_CNTL                           0x2084
+#define     R200_VF_PRIM_NONE                         0x00000000
+#define     R200_VF_PRIM_POINTS                       0x00000001
+#define     R200_VF_PRIM_LINES                        0x00000002
+#define     R200_VF_PRIM_LINE_STRIP                   0x00000003
+#define     R200_VF_PRIM_TRIANGLES                    0x00000004
+#define     R200_VF_PRIM_TRIANGLE_FAN                 0x00000005
+#define     R200_VF_PRIM_TRIANGLE_STRIP               0x00000006
+#define     R200_VF_PRIM_RECT_LIST                    0x00000008
+#define     R200_VF_PRIM_3VRT_POINTS                  0x00000009
+#define     R200_VF_PRIM_3VRT_LINES                   0x0000000a
+#define     R200_VF_PRIM_POINT_SPRITES                0x0000000b
+#define     R200_VF_PRIM_LINE_LOOP                    0x0000000c
+#define     R200_VF_PRIM_QUADS                        0x0000000d
+#define     R200_VF_PRIM_QUAD_STRIP                   0x0000000e
+#define     R200_VF_PRIM_POLYGON                      0x0000000f
+#define     R200_VF_PRIM_MASK                         0x0000000f
+#define     R200_VF_PRIM_WALK_IND                     0x00000010
+#define     R200_VF_PRIM_WALK_LIST                    0x00000020
+#define     R200_VF_PRIM_WALK_RING                    0x00000030
+#define     R200_VF_PRIM_WALK_MASK                    0x00000030
+#define     R200_VF_COLOR_ORDER_RGBA                  0x00000040
+#define     R200_VF_TCL_OUTPUT_VTX_ENABLE             0x00000200
+#define     R200_VF_INDEX_SZ_4                        0x00000800
+#define     R200_VF_VERTEX_NUMBER_MASK                0xffff0000
+#define     R200_VF_VERTEX_NUMBER_SHIFT               16
+#define R200_SE_VTX_FMT_0                 0x2088
+#define     R200_VTX_XY                     0 /* always have xy */
+#define     R200_VTX_Z0                     (1<<0)
+#define     R200_VTX_W0                     (1<<1)
+#define     R200_VTX_WEIGHT_COUNT_SHIFT     (2)
+#define     R200_VTX_PV_MATRIX_SEL          (1<<5)
+#define     R200_VTX_N0                     (1<<6)
+#define     R200_VTX_POINT_SIZE             (1<<7)
+#define     R200_VTX_DISCRETE_FOG           (1<<8)
+#define     R200_VTX_SHININESS_0            (1<<9)
+#define     R200_VTX_SHININESS_1            (1<<10)
+#define       R200_VTX_COLOR_NOT_PRESENT      0
+#define       R200_VTX_PK_RGBA          1
+#define       R200_VTX_FP_RGB           2
+#define       R200_VTX_FP_RGBA          3
+#define       R200_VTX_COLOR_MASK             3
+#define     R200_VTX_COLOR_0_SHIFT          11
+#define     R200_VTX_COLOR_1_SHIFT          13
+#define     R200_VTX_COLOR_2_SHIFT          15
+#define     R200_VTX_COLOR_3_SHIFT          17
+#define     R200_VTX_COLOR_4_SHIFT          19
+#define     R200_VTX_COLOR_5_SHIFT          21
+#define     R200_VTX_COLOR_6_SHIFT          23
+#define     R200_VTX_COLOR_7_SHIFT          25
+#define     R200_VTX_XY1                    (1<<28)
+#define     R200_VTX_Z1                     (1<<29)
+#define     R200_VTX_W1                     (1<<30)
+#define     R200_VTX_N1                     (1<<31)
+#define R200_SE_VTX_FMT_1                 0x208c
+#define     R200_VTX_TEX0_COMP_CNT_SHIFT        0
+#define     R200_VTX_TEX1_COMP_CNT_SHIFT        3
+#define     R200_VTX_TEX2_COMP_CNT_SHIFT        6
+#define     R200_VTX_TEX3_COMP_CNT_SHIFT        9
+#define     R200_VTX_TEX4_COMP_CNT_SHIFT        12
+#define     R200_VTX_TEX5_COMP_CNT_SHIFT        15
+#define R200_SE_TCL_OUTPUT_VTX_FMT_0      0x2090 
+#define R200_SE_TCL_OUTPUT_VTX_FMT_1      0x2094 
+/* gap */
+#define R200_SE_VTE_CNTL                  0x20b0
+#define     R200_VPORT_X_SCALE_ENA                0x00000001
+#define     R200_VPORT_X_OFFSET_ENA               0x00000002
+#define     R200_VPORT_Y_SCALE_ENA                0x00000004
+#define     R200_VPORT_Y_OFFSET_ENA               0x00000008
+#define     R200_VPORT_Z_SCALE_ENA                0x00000010
+#define     R200_VPORT_Z_OFFSET_ENA               0x00000020
+#define     R200_VTX_XY_FMT                       0x00000100
+#define     R200_VTX_Z_FMT                        0x00000200
+#define     R200_VTX_W0_FMT                       0x00000400
+#define     R200_VTX_W0_NORMALIZE                 0x00000800
+#define     R200_VTX_ST_DENORMALIZED              0x00001000
+/* gap */
+#define R200_SE_VTX_NUM_ARRAYS            0x20c0
+#define R200_SE_VTX_AOS_ATTR01            0x20c4
+#define R200_SE_VTX_AOS_ADDR0             0x20c8
+#define R200_SE_VTX_AOS_ADDR1             0x20cc
+#define R200_SE_VTX_AOS_ATTR23            0x20d0
+#define R200_SE_VTX_AOS_ADDR2             0x20d4
+#define R200_SE_VTX_AOS_ADDR3             0x20d8
+#define R200_SE_VTX_AOS_ATTR45            0x20dc
+#define R200_SE_VTX_AOS_ADDR4             0x20e0
+#define R200_SE_VTX_AOS_ADDR5             0x20e4
+#define R200_SE_VTX_AOS_ATTR67            0x20e8
+#define R200_SE_VTX_AOS_ADDR6             0x20ec
+#define R200_SE_VTX_AOS_ADDR7             0x20f0
+#define R200_SE_VTX_AOS_ATTR89            0x20f4
+#define R200_SE_VTX_AOS_ADDR8             0x20f8
+#define R200_SE_VTX_AOS_ADDR9             0x20fc
+#define R200_SE_VTX_AOS_ATTR1011          0x2100
+#define R200_SE_VTX_AOS_ADDR10            0x2104
+#define R200_SE_VTX_AOS_ADDR11            0x2108
+#define R200_SE_VF_MAX_VTX_INDX           0x210c
+#define R200_SE_VF_MIN_VTX_INDX           0x2110
+/* gap */
+#define R200_SE_VAP_CNTL_STATUS           0x2140
+#define     R200_VC_NO_SWAP                  (0 << 0)
+#define     R200_VC_16BIT_SWAP               (1 << 0)
+#define     R200_VC_32BIT_SWAP               (2 << 0)
+/* gap */
+#define R200_SE_VTX_STATE_CNTL                     0x2180
+#define     R200_VSC_COLOR_0_ASSEMBLY_CNTL_SHIFT    0x00000000
+#define     R200_VSC_COLOR_1_ASSEMBLY_CNTL_SHIFT    0x00000002
+#define     R200_VSC_COLOR_2_ASSEMBLY_CNTL_SHIFT    0x00000004
+#define     R200_VSC_COLOR_3_ASSEMBLY_CNTL_SHIFT    0x00000006
+#define     R200_VSC_COLOR_4_ASSEMBLY_CNTL_SHIFT    0x00000008
+#define     R200_VSC_COLOR_5_ASSEMBLY_CNTL_SHIFT    0x0000000a
+#define     R200_VSC_COLOR_6_ASSEMBLY_CNTL_SHIFT    0x0000000c
+#define     R200_VSC_COLOR_7_ASSEMBLY_CNTL_SHIFT    0x0000000e
+#define     R200_VSC_UPDATE_USER_COLOR_0_ENABLE    0x00010000
+#define     R200_VSC_UPDATE_USER_COLOR_1_ENABLE    0x00020000
+/* gap */
+#define R200_SE_TCL_VECTOR_INDX_REG                0x2200
+#       define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT  16
+#       define RADEON_VEC_INDX_DWORD_COUNT_SHIFT     28
+#define R200_SE_TCL_VECTOR_DATA_REG                0x2204
+#define R200_SE_TCL_SCALAR_INDX_REG                0x2208
+#       define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT  16
+#define R200_SE_TCL_SCALAR_DATA_REG                0x220c
+/* gap */
+#define R200_SE_TCL_MATRIX_SEL_0                   0x2230
+#define     R200_MODELVIEW_0_SHIFT           (0) 
+#define     R200_MODELVIEW_1_SHIFT           (8) 
+#define     R200_MODELVIEW_2_SHIFT           (16) 
+#define     R200_MODELVIEW_3_SHIFT           (24) 
+#define R200_SE_TCL_MATRIX_SEL_1                   0x2234
+#define     R200_IT_MODELVIEW_0_SHIFT        (0)
+#define     R200_IT_MODELVIEW_1_SHIFT        (8) 
+#define     R200_IT_MODELVIEW_2_SHIFT        (16)
+#define     R200_IT_MODELVIEW_3_SHIFT        (24)
+#define R200_SE_TCL_MATRIX_SEL_2                   0x2238
+#define     R200_MODELPROJECT_0_SHIFT         (0) 
+#define     R200_MODELPROJECT_1_SHIFT         (8) 
+#define     R200_MODELPROJECT_2_SHIFT         (16) 
+#define     R200_MODELPROJECT_3_SHIFT         (24) 
+#define R200_SE_TCL_MATRIX_SEL_3                   0x223c
+#define     R200_TEXMAT_0_SHIFT    0
+#define     R200_TEXMAT_1_SHIFT    8
+#define     R200_TEXMAT_2_SHIFT    16
+#define     R200_TEXMAT_3_SHIFT    24
+#define R200_SE_TCL_MATRIX_SEL_4                   0x2240
+#define     R200_TEXMAT_4_SHIFT    0
+#define     R200_TEXMAT_5_SHIFT    8
+/* gap */
+#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL     0x2250
+#define     R200_OUTPUT_XYZW                    (1<<0)
+#define     R200_OUTPUT_COLOR_0                 (1<<8)
+#define     R200_OUTPUT_COLOR_1                 (1<<9)
+#define     R200_OUTPUT_TEX_0                   (1<<16)
+#define     R200_OUTPUT_TEX_1                   (1<<17)
+#define     R200_OUTPUT_TEX_2                   (1<<18)
+#define     R200_OUTPUT_TEX_3                   (1<<19)
+#define     R200_OUTPUT_TEX_4                   (1<<20)
+#define     R200_OUTPUT_TEX_5                   (1<<21)
+#define     R200_OUTPUT_TEX_MASK                (0x3f<<16)
+#define     R200_OUTPUT_DISCRETE_FOG            (1<<24)
+#define     R200_OUTPUT_PT_SIZE                 (1<<25)
+#define     R200_FORCE_INORDER_PROC             (1<<31)
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0  0x2254
+#define	    R200_VERTEX_POSITION_ADDR__SHIFT     0x00000000
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1  0x2258
+#define	    R200_VTX_COLOR_0_ADDR__SHIFT         0x00000000
+#define	    R200_VTX_COLOR_1_ADDR__SHIFT         0x00000008
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2  0x225c
+#define	    R200_VTX_TEX_0_ADDR__SHIFT           0x00000000
+#define	    R200_VTX_TEX_1_ADDR__SHIFT           0x00000008
+#define	    R200_VTX_TEX_2_ADDR__SHIFT           0x00000010
+#define	    R200_VTX_TEX_3_ADDR__SHIFT           0x00000018
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3  0x2260
+#define	    R200_VTX_TEX_4_ADDR__SHIFT           0x00000000
+#define	    R200_VTX_TEX_5_ADDR__SHIFT           0x00000008
+
+/* gap */
+#define R200_SE_TCL_LIGHT_MODEL_CTL_0       0x2268 
+#define     R200_LIGHTING_ENABLE                (1<<0)
+#define     R200_LIGHT_IN_MODELSPACE            (1<<1)
+#define     R200_LOCAL_VIEWER                   (1<<2)
+#define     R200_NORMALIZE_NORMALS              (1<<3)
+#define     R200_RESCALE_NORMALS                (1<<4)
+#define     R200_SPECULAR_LIGHTS                (1<<5)
+#define     R200_DIFFUSE_SPECULAR_COMBINE       (1<<6)
+#define     R200_LIGHT_ALPHA                    (1<<7)
+#define     R200_LOCAL_LIGHT_VEC_GL             (1<<8)
+#define     R200_LIGHT_NO_NORMAL_AMBIENT_ONLY   (1<<9)
+#define     R200_LIGHT_TWOSIDE                  (1<<10)
+#define     R200_FRONT_SHININESS_SOURCE_SHIFT       (0xb)
+#define     R200_BACK_SHININESS_SOURCE_SHIFT        (0xd)
+#define       R200_LM0_SOURCE_MATERIAL_0           (0)
+#define       R200_LM0_SOURCE_MATERIAL_1           (1)
+#define       R200_LM0_SOURCE_VERTEX_SHININESS_0   (2)
+#define       R200_LM0_SOURCE_VERTEX_SHININESS_1   (3)
+#define R200_SE_TCL_LIGHT_MODEL_CTL_1       0x226c 
+#define       R200_LM1_SOURCE_LIGHT_PREMULT        (0)
+#define       R200_LM1_SOURCE_MATERIAL_0           (1)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_0       (2)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_1       (3)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_2       (4)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_3       (5)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_4       (6)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_5       (7)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_6       (8)
+#define       R200_LM1_SOURCE_VERTEX_COLOR_7       (9)
+#define       R200_LM1_SOURCE_MATERIAL_1           (0xf)
+#define     R200_FRONT_EMISSIVE_SOURCE_SHIFT        (0)
+#define     R200_FRONT_AMBIENT_SOURCE_SHIFT         (4)
+#define     R200_FRONT_DIFFUSE_SOURCE_SHIFT         (8)
+#define     R200_FRONT_SPECULAR_SOURCE_SHIFT        (12)
+#define     R200_BACK_EMISSIVE_SOURCE_SHIFT         (16)
+#define     R200_BACK_AMBIENT_SOURCE_SHIFT          (20)
+#define     R200_BACK_DIFFUSE_SOURCE_SHIFT          (24)
+#define     R200_BACK_SPECULAR_SOURCE_SHIFT         (28)
+#define R200_SE_TCL_PER_LIGHT_CTL_0       0x2270 
+#define     R200_LIGHT_0_ENABLE                    (1<<0)
+#define     R200_LIGHT_0_ENABLE_AMBIENT            (1<<1)
+#define     R200_LIGHT_0_ENABLE_SPECULAR           (1<<2)
+#define     R200_LIGHT_0_IS_LOCAL                  (1<<3)
+#define     R200_LIGHT_0_IS_SPOT                   (1<<4)
+#define     R200_LIGHT_0_DUAL_CONE                 (1<<5)
+#define     R200_LIGHT_0_ENABLE_RANGE_ATTEN        (1<<6)
+#define     R200_LIGHT_0_CONSTANT_RANGE_ATTEN      (1<<7)
+#define     R200_LIGHT_1_ENABLE                    (1<<16)
+#define     R200_LIGHT_1_ENABLE_AMBIENT            (1<<17)
+#define     R200_LIGHT_1_ENABLE_SPECULAR           (1<<18)
+#define     R200_LIGHT_1_IS_LOCAL                  (1<<19)
+#define     R200_LIGHT_1_IS_SPOT                   (1<<20)
+#define     R200_LIGHT_1_DUAL_CONE                 (1<<21)
+#define     R200_LIGHT_1_ENABLE_RANGE_ATTEN        (1<<22)
+#define     R200_LIGHT_1_CONSTANT_RANGE_ATTEN      (1<<23)
+#define     R200_LIGHT_0_SHIFT                   (0)
+#define     R200_LIGHT_1_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_1       0x2274 
+#define     R200_LIGHT_2_SHIFT                   (0)
+#define     R200_LIGHT_3_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_2       0x2278 
+#define     R200_LIGHT_4_SHIFT                   (0)
+#define     R200_LIGHT_5_SHIFT                   (16)
+#define R200_SE_TCL_PER_LIGHT_CTL_3       0x227c 
+#define     R200_LIGHT_6_SHIFT                   (0)
+#define     R200_LIGHT_7_SHIFT                   (16)
+/* gap */
+#define R200_SE_TCL_TEX_PROC_CTL_2        0x22a8 
+#define     R200_TEXGEN_COMP_MASK                (0xf)
+#define     R200_TEXGEN_COMP_S                   (0x1)
+#define     R200_TEXGEN_COMP_T                   (0x2)
+#define     R200_TEXGEN_COMP_R                   (0x4)
+#define     R200_TEXGEN_COMP_Q                   (0x8)
+#define     R200_TEXGEN_0_COMP_MASK_SHIFT        (0)
+#define     R200_TEXGEN_1_COMP_MASK_SHIFT        (4)
+#define     R200_TEXGEN_2_COMP_MASK_SHIFT        (8)
+#define     R200_TEXGEN_3_COMP_MASK_SHIFT        (12)
+#define     R200_TEXGEN_4_COMP_MASK_SHIFT        (16)
+#define     R200_TEXGEN_5_COMP_MASK_SHIFT        (20)
+#define R200_SE_TCL_TEX_PROC_CTL_3        0x22ac 
+#define     R200_TEXGEN_0_INPUT_TEX_SHIFT        (0)
+#define     R200_TEXGEN_1_INPUT_TEX_SHIFT        (4)
+#define     R200_TEXGEN_2_INPUT_TEX_SHIFT        (8)
+#define     R200_TEXGEN_3_INPUT_TEX_SHIFT        (12)
+#define     R200_TEXGEN_4_INPUT_TEX_SHIFT        (16)
+#define     R200_TEXGEN_5_INPUT_TEX_SHIFT        (20)
+#define R200_SE_TCL_TEX_PROC_CTL_0        0x22b0 
+#define     R200_TEXGEN_TEXMAT_0_ENABLE         (1<<0)
+#define     R200_TEXGEN_TEXMAT_1_ENABLE         (1<<1)
+#define     R200_TEXGEN_TEXMAT_2_ENABLE         (1<<2)
+#define     R200_TEXGEN_TEXMAT_3_ENABLE         (1<<3)
+#define     R200_TEXGEN_TEXMAT_4_ENABLE         (1<<4)
+#define     R200_TEXGEN_TEXMAT_5_ENABLE         (1<<5)
+#define     R200_TEXMAT_0_ENABLE                (1<<8)
+#define     R200_TEXMAT_1_ENABLE                (1<<9)
+#define     R200_TEXMAT_2_ENABLE                (1<<10)
+#define     R200_TEXMAT_3_ENABLE                (1<<11)
+#define     R200_TEXMAT_4_ENABLE                (1<<12)
+#define     R200_TEXMAT_5_ENABLE                (1<<13)
+#define     R200_TEXGEN_FORCE_W_TO_ONE          (1<<16)
+#define R200_SE_TCL_TEX_PROC_CTL_1        0x22b4 
+#define       R200_TEXGEN_INPUT_MASK           (0xf)
+#define       R200_TEXGEN_INPUT_TEXCOORD_0     (0)
+#define       R200_TEXGEN_INPUT_TEXCOORD_1     (1)
+#define       R200_TEXGEN_INPUT_TEXCOORD_2     (2)
+#define       R200_TEXGEN_INPUT_TEXCOORD_3     (3)
+#define       R200_TEXGEN_INPUT_TEXCOORD_4     (4)
+#define       R200_TEXGEN_INPUT_TEXCOORD_5     (5)
+#define       R200_TEXGEN_INPUT_OBJ            (8)
+#define       R200_TEXGEN_INPUT_EYE            (9)
+#define       R200_TEXGEN_INPUT_EYE_NORMAL     (0xa)
+#define       R200_TEXGEN_INPUT_EYE_REFLECT    (0xb)
+#define       R200_TEXGEN_INPUT_SPHERE         (0xd)
+#define     R200_TEXGEN_0_INPUT_SHIFT        (0)
+#define     R200_TEXGEN_1_INPUT_SHIFT        (4)
+#define     R200_TEXGEN_2_INPUT_SHIFT        (8)
+#define     R200_TEXGEN_3_INPUT_SHIFT        (12)
+#define     R200_TEXGEN_4_INPUT_SHIFT        (16)
+#define     R200_TEXGEN_5_INPUT_SHIFT        (20)
+#define R200_SE_TC_TEX_CYL_WRAP_CTL       0x22b8
+/* gap */
+#define R200_SE_TCL_UCP_VERT_BLEND_CTL    0x22c0 
+#define     R200_UCP_IN_CLIP_SPACE              (1<<0)
+#define     R200_UCP_IN_MODEL_SPACE             (1<<1)
+#define     R200_UCP_ENABLE_0                   (1<<2)
+#define     R200_UCP_ENABLE_1                   (1<<3)
+#define     R200_UCP_ENABLE_2                   (1<<4)
+#define     R200_UCP_ENABLE_3                   (1<<5)
+#define     R200_UCP_ENABLE_4                   (1<<6)
+#define     R200_UCP_ENABLE_5                   (1<<7)
+#define     R200_TCL_FOG_MASK                   (3<<8)
+#define     R200_TCL_FOG_DISABLE                (0<<8)
+#define     R200_TCL_FOG_EXP                    (1<<8)
+#define     R200_TCL_FOG_EXP2                   (2<<8)
+#define     R200_TCL_FOG_LINEAR                 (3<<8)
+#define     R200_RNG_BASED_FOG                  (1<<10)
+#define     R200_CLIP_DISABLE                   (1<<11)
+#define     R200_CULL_FRONT_IS_CW               (0<<28)
+#define     R200_CULL_FRONT_IS_CCW              (1<<28)
+#define     R200_CULL_FRONT                     (1<<29)
+#define     R200_CULL_BACK                      (1<<30)
+#define R200_SE_TCL_POINT_SPRITE_CNTL     0x22c4
+#define     R200_PS_MULT_PVATTENCONST           (0<<0)
+#define     R200_PS_MULT_PVATTEN                (1<<0)
+#define     R200_PS_MULT_ATTENCONST             (2<<0)
+#define     R200_PS_MULT_PVCONST                (3<<0)
+#define     R200_PS_MULT_CONST                  (4<<0)
+#define     R200_PS_MULT_MASK                   (7<<0)
+#define     R200_PS_LIN_ATT_ZERO                (1<<3)
+#define     R200_PS_USE_MODEL_EYE_VEC           (1<<4)
+#define     R200_PS_ATT_ALPHA                   (1<<5)
+#define     R200_PS_UCP_MODE_MASK               (3<<6)
+#define     R200_PS_GEN_TEX_0                   (1<<8)
+#define     R200_PS_GEN_TEX_1                   (1<<9)
+#define     R200_PS_GEN_TEX_2                   (1<<10)
+#define     R200_PS_GEN_TEX_3                   (1<<11)
+#define     R200_PS_GEN_TEX_4                   (1<<12)
+#define     R200_PS_GEN_TEX_5                   (1<<13)
+#define     R200_PS_GEN_TEX_0_SHIFT             (8)
+#define     R200_PS_GEN_TEX_MASK                (0x3f<<8)
+#define     R200_PS_SE_SEL_STATE                (1<<16)
+/* gap */
+/* taken from r300, see comments there */
+#define R200_VAP_PVS_CNTL_1                 0x22d0
+#       define R200_PVS_CNTL_1_PROGRAM_START_SHIFT   0
+#       define R200_PVS_CNTL_1_POS_END_SHIFT         10
+#       define R200_PVS_CNTL_1_PROGRAM_END_SHIFT     20
+/* Addresses are relative to the vertex program parameters area. */
+#define R200_VAP_PVS_CNTL_2                 0x22d4
+#       define R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT 0
+#       define R200_PVS_CNTL_2_PARAM_COUNT_SHIFT  16
+/* gap */
+
+#define R200_SE_VTX_ST_POS_0_X_4                   0x2300
+#define R200_SE_VTX_ST_POS_0_Y_4                   0x2304
+#define R200_SE_VTX_ST_POS_0_Z_4                   0x2308
+#define R200_SE_VTX_ST_POS_0_W_4                   0x230c
+#define R200_SE_VTX_ST_NORM_0_X                    0x2310
+#define R200_SE_VTX_ST_NORM_0_Y                    0x2314
+#define R200_SE_VTX_ST_NORM_0_Z                    0x2318
+#define R200_SE_VTX_ST_PVMS                        0x231c
+#define R200_SE_VTX_ST_CLR_0_R                     0x2320
+#define R200_SE_VTX_ST_CLR_0_G                     0x2324
+#define R200_SE_VTX_ST_CLR_0_B                     0x2328
+#define R200_SE_VTX_ST_CLR_0_A                     0x232c
+#define R200_SE_VTX_ST_CLR_1_R                     0x2330
+#define R200_SE_VTX_ST_CLR_1_G                     0x2334
+#define R200_SE_VTX_ST_CLR_1_B                     0x2338
+#define R200_SE_VTX_ST_CLR_1_A                     0x233c
+#define R200_SE_VTX_ST_CLR_2_R                     0x2340
+#define R200_SE_VTX_ST_CLR_2_G                     0x2344
+#define R200_SE_VTX_ST_CLR_2_B                     0x2348
+#define R200_SE_VTX_ST_CLR_2_A                     0x234c
+#define R200_SE_VTX_ST_CLR_3_R                     0x2350
+#define R200_SE_VTX_ST_CLR_3_G                     0x2354
+#define R200_SE_VTX_ST_CLR_3_B                     0x2358
+#define R200_SE_VTX_ST_CLR_3_A                     0x235c
+#define R200_SE_VTX_ST_CLR_4_R                     0x2360
+#define R200_SE_VTX_ST_CLR_4_G                     0x2364
+#define R200_SE_VTX_ST_CLR_4_B                     0x2368
+#define R200_SE_VTX_ST_CLR_4_A                     0x236c
+#define R200_SE_VTX_ST_CLR_5_R                     0x2370
+#define R200_SE_VTX_ST_CLR_5_G                     0x2374
+#define R200_SE_VTX_ST_CLR_5_B                     0x2378
+#define R200_SE_VTX_ST_CLR_5_A                     0x237c
+#define R200_SE_VTX_ST_CLR_6_R                     0x2380
+#define R200_SE_VTX_ST_CLR_6_G                     0x2384
+#define R200_SE_VTX_ST_CLR_6_B                     0x2388
+#define R200_SE_VTX_ST_CLR_6_A                     0x238c
+#define R200_SE_VTX_ST_CLR_7_R                     0x2390
+#define R200_SE_VTX_ST_CLR_7_G                     0x2394
+#define R200_SE_VTX_ST_CLR_7_B                     0x2398
+#define R200_SE_VTX_ST_CLR_7_A                     0x239c
+#define R200_SE_VTX_ST_TEX_0_S                     0x23a0
+#define R200_SE_VTX_ST_TEX_0_T                     0x23a4
+#define R200_SE_VTX_ST_TEX_0_R                     0x23a8
+#define R200_SE_VTX_ST_TEX_0_Q                     0x23ac
+#define R200_SE_VTX_ST_TEX_1_S                     0x23b0
+#define R200_SE_VTX_ST_TEX_1_T                     0x23b4
+#define R200_SE_VTX_ST_TEX_1_R                     0x23b8
+#define R200_SE_VTX_ST_TEX_1_Q                     0x23bc
+#define R200_SE_VTX_ST_TEX_2_S                     0x23c0
+#define R200_SE_VTX_ST_TEX_2_T                     0x23c4
+#define R200_SE_VTX_ST_TEX_2_R                     0x23c8
+#define R200_SE_VTX_ST_TEX_2_Q                     0x23cc
+#define R200_SE_VTX_ST_TEX_3_S                     0x23d0
+#define R200_SE_VTX_ST_TEX_3_T                     0x23d4
+#define R200_SE_VTX_ST_TEX_3_R                     0x23d8
+#define R200_SE_VTX_ST_TEX_3_Q                     0x23dc
+#define R200_SE_VTX_ST_TEX_4_S                     0x23e0
+#define R200_SE_VTX_ST_TEX_4_T                     0x23e4
+#define R200_SE_VTX_ST_TEX_4_R                     0x23e8
+#define R200_SE_VTX_ST_TEX_4_Q                     0x23ec
+#define R200_SE_VTX_ST_TEX_5_S                     0x23f0
+#define R200_SE_VTX_ST_TEX_5_T                     0x23f4
+#define R200_SE_VTX_ST_TEX_5_R                     0x23f8
+#define R200_SE_VTX_ST_TEX_5_Q                     0x23fc
+#define R200_SE_VTX_ST_PNT_SPRT_SZ                 0x2400
+#define R200_SE_VTX_ST_DISC_FOG                    0x2404
+#define R200_SE_VTX_ST_SHININESS_0                 0x2408
+#define R200_SE_VTX_ST_SHININESS_1                 0x240c
+#define R200_SE_VTX_ST_BLND_WT_0                   0x2410
+#define R200_SE_VTX_ST_BLND_WT_1                   0x2414
+#define R200_SE_VTX_ST_BLND_WT_2                   0x2418
+#define R200_SE_VTX_ST_BLND_WT_3                   0x241c
+#define R200_SE_VTX_ST_POS_1_X                     0x2420
+#define R200_SE_VTX_ST_POS_1_Y                     0x2424
+#define R200_SE_VTX_ST_POS_1_Z                     0x2428
+#define R200_SE_VTX_ST_POS_1_W                     0x242c
+#define R200_SE_VTX_ST_NORM_1_X                    0x2430
+#define R200_SE_VTX_ST_NORM_1_Y                    0x2434
+#define R200_SE_VTX_ST_NORM_1_Z                    0x2438
+#define R200_SE_VTX_ST_USR_CLR_0_R                 0x2440
+#define R200_SE_VTX_ST_USR_CLR_0_G                 0x2444
+#define R200_SE_VTX_ST_USR_CLR_0_B                 0x2448
+#define R200_SE_VTX_ST_USR_CLR_0_A                 0x244c
+#define R200_SE_VTX_ST_USR_CLR_1_R                 0x2450
+#define R200_SE_VTX_ST_USR_CLR_1_G                 0x2454
+#define R200_SE_VTX_ST_USR_CLR_1_B                 0x2458
+#define R200_SE_VTX_ST_USR_CLR_1_A                 0x245c
+#define R200_SE_VTX_ST_CLR_0_PKD                   0x2460
+#define R200_SE_VTX_ST_CLR_1_PKD                   0x2464
+#define R200_SE_VTX_ST_CLR_2_PKD                   0x2468
+#define R200_SE_VTX_ST_CLR_3_PKD                   0x246c
+#define R200_SE_VTX_ST_CLR_4_PKD                   0x2470
+#define R200_SE_VTX_ST_CLR_5_PKD                   0x2474
+#define R200_SE_VTX_ST_CLR_6_PKD                   0x2478
+#define R200_SE_VTX_ST_CLR_7_PKD                   0x247c
+#define R200_SE_VTX_ST_POS_0_X_2                   0x2480
+#define R200_SE_VTX_ST_POS_0_Y_2                   0x2484
+#define R200_SE_VTX_ST_PAR_CLR_LD                  0x2488
+#define R200_SE_VTX_ST_USR_CLR_PKD                 0x248c
+#define R200_SE_VTX_ST_POS_0_X_3                   0x2490
+#define R200_SE_VTX_ST_POS_0_Y_3                   0x2494
+#define R200_SE_VTX_ST_POS_0_Z_3                   0x2498
+#define R200_SE_VTX_ST_END_OF_PKT                  0x249c
+/* gap */
+#define R200_RE_POINTSIZE                          0x2648
+#define     R200_POINTSIZE_SHIFT                       0
+#define     R200_MAXPOINTSIZE_SHIFT                    16
+/* gap */
+#define R200_RE_TOP_LEFT                  0x26c0 
+#define     R200_RE_LEFT_SHIFT         0
+#define     R200_RE_TOP_SHIFT          16
+#define R200_RE_MISC                      0x26c4 
+#define     R200_STIPPLE_COORD_MASK           0x1f
+#define     R200_STIPPLE_X_OFFSET_SHIFT       0
+#define     R200_STIPPLE_X_OFFSET_MASK        (0x1f << 0)
+#define     R200_STIPPLE_Y_OFFSET_SHIFT       8
+#define     R200_STIPPLE_Y_OFFSET_MASK        (0x1f << 8)
+#define     R200_STIPPLE_LITTLE_BIT_ORDER     (0 << 16)
+#define     R200_STIPPLE_BIG_BIT_ORDER        (1 << 16)
+/* gap */
+#define R200_RE_AUX_SCISSOR_CNTL                   0x26f0
+#define     R200_EXCLUSIVE_SCISSOR_0      0x01000000
+#define     R200_EXCLUSIVE_SCISSOR_1      0x02000000
+#define     R200_EXCLUSIVE_SCISSOR_2      0x04000000
+#define     R200_SCISSOR_ENABLE_0         0x10000000
+#define     R200_SCISSOR_ENABLE_1         0x20000000
+#define     R200_SCISSOR_ENABLE_2         0x40000000
+/* gap */
+#define R200_PP_TXFILTER_0                0x2c00 
+#define     R200_MAG_FILTER_NEAREST                   (0  <<  0)
+#define     R200_MAG_FILTER_LINEAR                    (1  <<  0)
+#define     R200_MAG_FILTER_MASK                      (1  <<  0)
+#define     R200_MIN_FILTER_NEAREST                   (0  <<  1)
+#define     R200_MIN_FILTER_LINEAR                    (1  <<  1)
+#define     R200_MIN_FILTER_NEAREST_MIP_NEAREST       (2  <<  1)
+#define     R200_MIN_FILTER_NEAREST_MIP_LINEAR        (3  <<  1)
+#define     R200_MIN_FILTER_LINEAR_MIP_NEAREST        (6  <<  1)
+#define     R200_MIN_FILTER_LINEAR_MIP_LINEAR         (7  <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST             (8  <<  1)
+#define     R200_MIN_FILTER_ANISO_LINEAR              (9  <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 <<  1)
+#define     R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR  (11 <<  1)
+#define     R200_MIN_FILTER_MASK                      (15 <<  1)
+#define     R200_MAX_ANISO_1_TO_1                     (0  <<  5)
+#define     R200_MAX_ANISO_2_TO_1                     (1  <<  5)
+#define     R200_MAX_ANISO_4_TO_1                     (2  <<  5)
+#define     R200_MAX_ANISO_8_TO_1                     (3  <<  5)
+#define     R200_MAX_ANISO_16_TO_1                    (4  <<  5)
+#define     R200_MAX_ANISO_MASK                       (7  <<  5)
+#define     R200_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
+#define     R200_MAX_MIP_LEVEL_SHIFT                  16
+#define     R200_YUV_TO_RGB                           (1  << 20)
+#define     R200_YUV_TEMPERATURE_COOL                 (0  << 21)
+#define     R200_YUV_TEMPERATURE_HOT                  (1  << 21)
+#define     R200_YUV_TEMPERATURE_MASK                 (1  << 21)
+#define     R200_WRAPEN_S                             (1  << 22)
+#define     R200_CLAMP_S_WRAP                         (0  << 23)
+#define     R200_CLAMP_S_MIRROR                       (1  << 23)
+#define     R200_CLAMP_S_CLAMP_LAST                   (2  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_LAST            (3  << 23)
+#define     R200_CLAMP_S_CLAMP_BORDER                 (4  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_BORDER          (5  << 23)
+#define     R200_CLAMP_S_CLAMP_GL                     (6  << 23)
+#define     R200_CLAMP_S_MIRROR_CLAMP_GL              (7  << 23)
+#define     R200_CLAMP_S_MASK                         (7  << 23)
+#define     R200_WRAPEN_T                             (1  << 26)
+#define     R200_CLAMP_T_WRAP                         (0  << 27)
+#define     R200_CLAMP_T_MIRROR                       (1  << 27)
+#define     R200_CLAMP_T_CLAMP_LAST                   (2  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_LAST            (3  << 27)
+#define     R200_CLAMP_T_CLAMP_BORDER                 (4  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_BORDER          (5  << 27)
+#define     R200_CLAMP_T_CLAMP_GL                     (6  << 27)
+#define     R200_CLAMP_T_MIRROR_CLAMP_GL              (7  << 27)
+#define     R200_CLAMP_T_MASK                         (7  << 27)
+#define     R200_KILL_LT_ZERO                         (1  << 30)
+#define     R200_BORDER_MODE_OGL                      (0  << 31)
+#define     R200_BORDER_MODE_D3D                      (1  << 31)
+#define R200_PP_TXFORMAT_0                0x2c04
+#define     R200_TXFORMAT_I8                 (0  <<  0)
+#define     R200_TXFORMAT_AI88               (1  <<  0)
+#define     R200_TXFORMAT_RGB332             (2  <<  0)
+#define     R200_TXFORMAT_ARGB1555           (3  <<  0)
+#define     R200_TXFORMAT_RGB565             (4  <<  0)
+#define     R200_TXFORMAT_ARGB4444           (5  <<  0)
+#define     R200_TXFORMAT_ARGB8888           (6  <<  0)
+#define     R200_TXFORMAT_RGBA8888           (7  <<  0)
+#define     R200_TXFORMAT_Y8                 (8  <<  0)
+#define     R200_TXFORMAT_AVYU4444           (9  <<  0)
+#define     R200_TXFORMAT_VYUY422            (10  <<  0)
+#define     R200_TXFORMAT_YVYU422            (11  <<  0)
+#define     R200_TXFORMAT_DXT1               (12  <<  0)
+#define     R200_TXFORMAT_DXT23              (14  <<  0)
+#define     R200_TXFORMAT_DXT45              (15  <<  0)
+#define     R200_TXFORMAT_DVDU88             (18  <<  0)
+#define     R200_TXFORMAT_LDVDU655           (19  <<  0)
+#define     R200_TXFORMAT_LDVDU8888          (20  <<  0)
+#define     R200_TXFORMAT_GR1616             (21  <<  0)
+#define     R200_TXFORMAT_ABGR8888           (22  <<  0)
+#define     R200_TXFORMAT_BGR111110          (23  <<  0)
+#define     R200_TXFORMAT_FORMAT_MASK        (31 <<  0)
+#define     R200_TXFORMAT_FORMAT_SHIFT       0
+#define     R200_TXFORMAT_APPLE_YUV          (1  <<  5)
+#define     R200_TXFORMAT_ALPHA_IN_MAP       (1  <<  6)
+#define     R200_TXFORMAT_NON_POWER2         (1  <<  7)
+#define     R200_TXFORMAT_WIDTH_MASK         (15 <<  8)
+#define     R200_TXFORMAT_WIDTH_SHIFT        8
+#define     R200_TXFORMAT_HEIGHT_MASK        (15 << 12)
+#define     R200_TXFORMAT_HEIGHT_SHIFT       12
+#define     R200_TXFORMAT_F5_WIDTH_MASK      (15 << 16)	/* cube face 5 */
+#define     R200_TXFORMAT_F5_WIDTH_SHIFT     16
+#define     R200_TXFORMAT_F5_HEIGHT_MASK     (15 << 20)
+#define     R200_TXFORMAT_F5_HEIGHT_SHIFT    20
+#define     R200_TXFORMAT_ST_ROUTE_STQ0      (0  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ1      (1  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ2      (2  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ3      (3  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ4      (4  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_STQ5      (5  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_MASK      (7  << 24)
+#define     R200_TXFORMAT_ST_ROUTE_SHIFT     24
+#define     R200_TXFORMAT_LOOKUP_DISABLE     (1  << 27)
+#define     R200_TXFORMAT_ALPHA_MASK_ENABLE  (1  << 28)
+#define     R200_TXFORMAT_CHROMA_KEY_ENABLE  (1  << 29)
+#define     R200_TXFORMAT_CUBIC_MAP_ENABLE   (1  << 30)
+#define R200_PP_TXFORMAT_X_0              0x2c08
+#define     R200_DEPTH_LOG2_MASK                      (0xf << 0)
+#define     R200_DEPTH_LOG2_SHIFT                     0
+#define     R200_VOLUME_FILTER_SHIFT                  4
+#define     R200_VOLUME_FILTER_MASK                   (1 << 4)
+#define     R200_VOLUME_FILTER_NEAREST                (0 << 4)
+#define     R200_VOLUME_FILTER_LINEAR                 (1 << 4)
+#define     R200_WRAPEN_Q                             (1  << 8)
+#define     R200_CLAMP_Q_WRAP                         (0  << 9)
+#define     R200_CLAMP_Q_MIRROR                       (1  << 9)
+#define     R200_CLAMP_Q_CLAMP_LAST                   (2  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_LAST            (3  << 9)
+#define     R200_CLAMP_Q_CLAMP_BORDER                 (4  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_BORDER          (5  << 9)
+#define     R200_CLAMP_Q_CLAMP_GL                     (6  << 9)
+#define     R200_CLAMP_Q_MIRROR_CLAMP_GL              (7  << 9)
+#define     R200_CLAMP_Q_MASK                         (7  << 9)
+#define     R200_MIN_MIP_LEVEL_MASK                   (0x0f << 12)
+#define     R200_MIN_MIP_LEVEL_SHIFT                  12
+#define     R200_TEXCOORD_NONPROJ                     (0  << 16)
+#define     R200_TEXCOORD_CUBIC_ENV                   (1  << 16)
+#define     R200_TEXCOORD_VOLUME                      (2  << 16)
+#define     R200_TEXCOORD_PROJ                        (3  << 16)
+#define     R200_TEXCOORD_DEPTH                       (4  << 16)
+#define     R200_TEXCOORD_1D_PROJ                     (5  << 16)
+#define     R200_TEXCOORD_1D                          (6  << 16)
+#define     R200_TEXCOORD_ZERO                        (7  << 16)
+#define     R200_TEXCOORD_MASK                        (7  << 16)
+#define     R200_LOD_BIAS_MASK                        (0xfff80000)
+#define     R200_LOD_BIAS_FIXED_ONE                   (0x08000000)
+#define     R200_LOD_BIAS_CORRECTION                  (0x00600000)
+#define     R200_LOD_BIAS_SHIFT                       19
+#define R200_PP_TXSIZE_0                  0x2c0c /* NPOT only */
+#define R200_PP_TX_WIDTHMASK_SHIFT 0
+#define R200_PP_TX_HEIGHTMASK_SHIFT 16
+
+#define R200_PP_TXPITCH_0                 0x2c10 /* NPOT only */
+#define R200_PP_BORDER_COLOR_0            0x2c14
+#define R200_PP_CUBIC_FACES_0             0x2c18
+#define     R200_FACE_WIDTH_1_SHIFT                   0
+#define     R200_FACE_HEIGHT_1_SHIFT                  4
+#define     R200_FACE_WIDTH_1_MASK                   (0xf << 0)
+#define     R200_FACE_HEIGHT_1_MASK                  (0xf << 4)
+#define     R200_FACE_WIDTH_2_SHIFT                   8
+#define     R200_FACE_HEIGHT_2_SHIFT                 12
+#define     R200_FACE_WIDTH_2_MASK                   (0xf << 8)
+#define     R200_FACE_HEIGHT_2_MASK                  (0xf << 12)
+#define     R200_FACE_WIDTH_3_SHIFT                  16
+#define     R200_FACE_HEIGHT_3_SHIFT                 20
+#define     R200_FACE_WIDTH_3_MASK                   (0xf << 16)
+#define     R200_FACE_HEIGHT_3_MASK                  (0xf << 20)
+#define     R200_FACE_WIDTH_4_SHIFT                  24
+#define     R200_FACE_HEIGHT_4_SHIFT                 28
+#define     R200_FACE_WIDTH_4_MASK                   (0xf << 24)
+#define     R200_FACE_HEIGHT_4_MASK                  (0xf << 28)
+#define R200_PP_TXMULTI_CTL_0                  0x2c1c /* name from ddx, rest RE... */
+#define     R200_PASS1_TXFORMAT_LOOKUP_DISABLE (1 << 0)
+#define     R200_PASS1_TEXCOORD_NONPROJ        (0 << 1)
+#define     R200_PASS1_TEXCOORD_CUBIC_ENV      (1 << 1)
+#define     R200_PASS1_TEXCOORD_VOLUME         (2 << 1)
+#define     R200_PASS1_TEXCOORD_PROJ           (3 << 1)
+#define     R200_PASS1_TEXCOORD_DEPTH          (4 << 1)
+#define     R200_PASS1_TEXCOORD_1D_PROJ        (5 << 1)
+#define     R200_PASS1_TEXCOORD_1D             (6 << 1) /* pass1 texcoords only */
+#define     R200_PASS1_TEXCOORD_ZERO           (7 << 1) /* verifed for 2d targets! */
+#define     R200_PASS1_TEXCOORD_MASK           (7 << 1) /* assumed same values as for pass2 */
+#define     R200_PASS1_ST_ROUTE_STQ0           (0 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ1           (1 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ2           (2 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ3           (3 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ4           (4 << 4)
+#define     R200_PASS1_ST_ROUTE_STQ5           (5 << 4)
+#define     R200_PASS1_ST_ROUTE_MASK           (7 << 4)
+#define     R200_PASS1_ST_ROUTE_SHIFT          (4)
+#define     R200_PASS2_COORDS_REG_0            (2 << 24)
+#define     R200_PASS2_COORDS_REG_1            (3 << 24)
+#define     R200_PASS2_COORDS_REG_2            (4 << 24)
+#define     R200_PASS2_COORDS_REG_3            (5 << 24)
+#define     R200_PASS2_COORDS_REG_4            (6 << 24)
+#define     R200_PASS2_COORDS_REG_5            (7 << 24)
+#define     R200_PASS2_COORDS_REG_MASK         (0x7 << 24)
+#define     R200_PASS2_COORDS_REG_SHIFT        (24)
+#define R200_PP_TXFILTER_1                0x2c20
+#define R200_PP_TXFORMAT_1                0x2c24
+#define R200_PP_TXFORMAT_X_1              0x2c28
+#define R200_PP_TXSIZE_1                  0x2c2c
+#define R200_PP_TXPITCH_1                 0x2c30
+#define R200_PP_BORDER_COLOR_1            0x2c34
+#define R200_PP_CUBIC_FACES_1             0x2c38
+#define R200_PP_TXMULTI_CTL_1             0x2c3c
+#define R200_PP_TXFILTER_2                0x2c40
+#define R200_PP_TXFORMAT_2                0x2c44
+#define R200_PP_TXSIZE_2                  0x2c4c
+#define R200_PP_TXFORMAT_X_2              0x2c48
+#define R200_PP_TXPITCH_2                 0x2c50
+#define R200_PP_BORDER_COLOR_2            0x2c54
+#define R200_PP_CUBIC_FACES_2             0x2c58
+#define R200_PP_TXMULTI_CTL_2             0x2c5c
+#define R200_PP_TXFILTER_3                0x2c60
+#define R200_PP_TXFORMAT_3                0x2c64
+#define R200_PP_TXSIZE_3                  0x2c6c
+#define R200_PP_TXFORMAT_X_3              0x2c68
+#define R200_PP_TXPITCH_3                 0x2c70
+#define R200_PP_BORDER_COLOR_3            0x2c74
+#define R200_PP_CUBIC_FACES_3             0x2c78
+#define R200_PP_TXMULTI_CTL_3             0x2c7c
+#define R200_PP_TXFILTER_4                0x2c80
+#define R200_PP_TXFORMAT_4                0x2c84
+#define R200_PP_TXSIZE_4                  0x2c8c
+#define R200_PP_TXFORMAT_X_4              0x2c88
+#define R200_PP_TXPITCH_4                 0x2c90
+#define R200_PP_BORDER_COLOR_4            0x2c94
+#define R200_PP_CUBIC_FACES_4             0x2c98
+#define R200_PP_TXMULTI_CTL_4             0x2c9c
+#define R200_PP_TXFILTER_5                0x2ca0
+#define R200_PP_TXFORMAT_5                0x2ca4
+#define R200_PP_TXSIZE_5                  0x2cac
+#define R200_PP_TXFORMAT_X_5              0x2ca8
+#define R200_PP_TXPITCH_5                 0x2cb0
+#define R200_PP_BORDER_COLOR_5            0x2cb4
+#define R200_PP_CUBIC_FACES_5             0x2cb8
+#define R200_PP_TXMULTI_CTL_5             0x2cbc
+/* gap */
+#define R200_PP_CNTL_X             0x2cc4  /* Reveree engineered from fglrx */
+#define     R200_PPX_TEX_0_ENABLE      (1 <<  0)
+#define     R200_PPX_TEX_1_ENABLE      (1 <<  1)
+#define     R200_PPX_TEX_2_ENABLE      (1 <<  2)
+#define     R200_PPX_TEX_3_ENABLE      (1 <<  3)
+#define     R200_PPX_TEX_4_ENABLE      (1 <<  4)
+#define     R200_PPX_TEX_5_ENABLE      (1 <<  5)
+#define     R200_PPX_TEX_ENABLE_MASK   (0x3f << 0)
+#define     R200_PPX_OUTPUT_REG_0      (1 <<  6)
+#define     R200_PPX_OUTPUT_REG_1      (1 <<  7)
+#define     R200_PPX_OUTPUT_REG_2      (1 <<  8)
+#define     R200_PPX_OUTPUT_REG_3      (1 <<  9)
+#define     R200_PPX_OUTPUT_REG_4      (1 << 10)
+#define     R200_PPX_OUTPUT_REG_5      (1 << 11)
+#define     R200_PPX_OUTPUT_REG_MASK   (0x3f << 6)
+#define     R200_PPX_OUTPUT_REG_0_SHIFT (6)
+#define     R200_PPX_PFS_INST0_ENABLE  (1 << 12)
+#define     R200_PPX_PFS_INST1_ENABLE  (1 << 13)
+#define     R200_PPX_PFS_INST2_ENABLE  (1 << 14)
+#define     R200_PPX_PFS_INST3_ENABLE  (1 << 15)
+#define     R200_PPX_PFS_INST4_ENABLE  (1 << 16)
+#define     R200_PPX_PFS_INST5_ENABLE  (1 << 17)
+#define     R200_PPX_PFS_INST6_ENABLE  (1 << 18)
+#define     R200_PPX_PFS_INST7_ENABLE  (1 << 19)
+#define     R200_PPX_PFS_INST_ENABLE_MASK (0xff << 12)
+#define     R200_PPX_FPS_INST0_ENABLE_SHIFT (12)
+/* gap */
+#define R200_PP_TRI_PERF                  0x2cf8
+#define     R200_TRI_CUTOFF_MASK            (0x1f << 0)
+#define R200_PP_PERF_CNTL                 0x2cfc
+#define R200_PP_TXOFFSET_0                0x2d00
+#define     R200_TXO_ENDIAN_NO_SWAP     (0 << 0)
+#define     R200_TXO_ENDIAN_BYTE_SWAP   (1 << 0)
+#define     R200_TXO_ENDIAN_WORD_SWAP   (2 << 0)
+#define     R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#define     R200_TXO_MACRO_TILE         (1 << 2)
+#define     R200_TXO_MICRO_TILE         (1 << 3)
+#define     R200_TXO_OFFSET_MASK        0xffffffe0
+#define     R200_TXO_OFFSET_SHIFT       5
+#define R200_PP_CUBIC_OFFSET_F1_0         0x2d04
+#define R200_PP_CUBIC_OFFSET_F2_0         0x2d08
+#define R200_PP_CUBIC_OFFSET_F3_0         0x2d0c
+#define R200_PP_CUBIC_OFFSET_F4_0         0x2d10
+#define R200_PP_CUBIC_OFFSET_F5_0         0x2d14
+#define R200_PP_TXOFFSET_1                0x2d18
+#define R200_PP_CUBIC_OFFSET_F1_1         0x2d1c
+#define R200_PP_CUBIC_OFFSET_F2_1         0x2d20
+#define R200_PP_CUBIC_OFFSET_F3_1         0x2d24
+#define R200_PP_CUBIC_OFFSET_F4_1         0x2d28
+#define R200_PP_CUBIC_OFFSET_F5_1         0x2d2c
+#define R200_PP_TXOFFSET_2                0x2d30
+#define R200_PP_CUBIC_OFFSET_F1_2         0x2d34
+#define R200_PP_CUBIC_OFFSET_F2_2         0x2d38
+#define R200_PP_CUBIC_OFFSET_F3_2         0x2d3c
+#define R200_PP_CUBIC_OFFSET_F4_2         0x2d40
+#define R200_PP_CUBIC_OFFSET_F5_2         0x2d44
+#define R200_PP_TXOFFSET_3                0x2d48
+#define R200_PP_CUBIC_OFFSET_F1_3         0x2d4c
+#define R200_PP_CUBIC_OFFSET_F2_3         0x2d50
+#define R200_PP_CUBIC_OFFSET_F3_3         0x2d54
+#define R200_PP_CUBIC_OFFSET_F4_3         0x2d58
+#define R200_PP_CUBIC_OFFSET_F5_3         0x2d5c
+#define R200_PP_TXOFFSET_4                0x2d60
+#define R200_PP_CUBIC_OFFSET_F1_4         0x2d64
+#define R200_PP_CUBIC_OFFSET_F2_4         0x2d68
+#define R200_PP_CUBIC_OFFSET_F3_4         0x2d6c
+#define R200_PP_CUBIC_OFFSET_F4_4         0x2d70
+#define R200_PP_CUBIC_OFFSET_F5_4         0x2d74
+#define R200_PP_TXOFFSET_5                0x2d78
+#define R200_PP_CUBIC_OFFSET_F1_5         0x2d7c
+#define R200_PP_CUBIC_OFFSET_F2_5         0x2d80
+#define R200_PP_CUBIC_OFFSET_F3_5         0x2d84
+#define R200_PP_CUBIC_OFFSET_F4_5         0x2d88
+#define R200_PP_CUBIC_OFFSET_F5_5         0x2d8c
+/* gap */
+#define R200_PP_TAM_DEBUG3                0x2d9c
+/* gap */
+#define R200_PP_TFACTOR_0                 0x2ee0
+#define R200_PP_TFACTOR_1                 0x2ee4
+#define R200_PP_TFACTOR_2                 0x2ee8
+#define R200_PP_TFACTOR_3                 0x2eec
+#define R200_PP_TFACTOR_4                 0x2ef0
+#define R200_PP_TFACTOR_5                 0x2ef4
+#define R200_PP_TFACTOR_6                 0x2ef8
+#define R200_PP_TFACTOR_7                 0x2efc
+#define R200_PP_TXCBLEND_0                0x2f00
+#define     R200_TXC_ARG_A_ZERO                (0)
+#define     R200_TXC_ARG_A_CURRENT_COLOR       (2)
+#define     R200_TXC_ARG_A_CURRENT_ALPHA       (3)
+#define     R200_TXC_ARG_A_DIFFUSE_COLOR       (4)
+#define     R200_TXC_ARG_A_DIFFUSE_ALPHA       (5)
+#define     R200_TXC_ARG_A_SPECULAR_COLOR      (6)
+#define     R200_TXC_ARG_A_SPECULAR_ALPHA      (7)
+#define     R200_TXC_ARG_A_TFACTOR_COLOR       (8)
+#define     R200_TXC_ARG_A_TFACTOR_ALPHA       (9)
+#define     R200_TXC_ARG_A_R0_COLOR            (10)
+#define     R200_TXC_ARG_A_R0_ALPHA            (11)
+#define     R200_TXC_ARG_A_R1_COLOR            (12)
+#define     R200_TXC_ARG_A_R1_ALPHA            (13)
+#define     R200_TXC_ARG_A_R2_COLOR            (14)
+#define     R200_TXC_ARG_A_R2_ALPHA            (15)
+#define     R200_TXC_ARG_A_R3_COLOR            (16)
+#define     R200_TXC_ARG_A_R3_ALPHA            (17)
+#define     R200_TXC_ARG_A_R4_COLOR            (18)
+#define     R200_TXC_ARG_A_R4_ALPHA            (19)
+#define     R200_TXC_ARG_A_R5_COLOR            (20)
+#define     R200_TXC_ARG_A_R5_ALPHA            (21)
+#define     R200_TXC_ARG_A_TFACTOR1_COLOR      (26)
+#define     R200_TXC_ARG_A_TFACTOR1_ALPHA      (27)
+#define     R200_TXC_ARG_A_MASK			(31 << 0)
+#define     R200_TXC_ARG_A_SHIFT			0
+#define     R200_TXC_ARG_B_ZERO                (0<<5)
+#define     R200_TXC_ARG_B_CURRENT_COLOR       (2<<5)
+#define     R200_TXC_ARG_B_CURRENT_ALPHA       (3<<5)
+#define     R200_TXC_ARG_B_DIFFUSE_COLOR       (4<<5)
+#define     R200_TXC_ARG_B_DIFFUSE_ALPHA       (5<<5)
+#define     R200_TXC_ARG_B_SPECULAR_COLOR      (6<<5)
+#define     R200_TXC_ARG_B_SPECULAR_ALPHA      (7<<5)
+#define     R200_TXC_ARG_B_TFACTOR_COLOR       (8<<5)
+#define     R200_TXC_ARG_B_TFACTOR_ALPHA       (9<<5)
+#define     R200_TXC_ARG_B_R0_COLOR            (10<<5)
+#define     R200_TXC_ARG_B_R0_ALPHA            (11<<5)
+#define     R200_TXC_ARG_B_R1_COLOR            (12<<5)
+#define     R200_TXC_ARG_B_R1_ALPHA            (13<<5)
+#define     R200_TXC_ARG_B_R2_COLOR            (14<<5)
+#define     R200_TXC_ARG_B_R2_ALPHA            (15<<5)
+#define     R200_TXC_ARG_B_R3_COLOR            (16<<5)
+#define     R200_TXC_ARG_B_R3_ALPHA            (17<<5)
+#define     R200_TXC_ARG_B_R4_COLOR            (18<<5)
+#define     R200_TXC_ARG_B_R4_ALPHA            (19<<5)
+#define     R200_TXC_ARG_B_R5_COLOR            (20<<5)
+#define     R200_TXC_ARG_B_R5_ALPHA            (21<<5)
+#define     R200_TXC_ARG_B_TFACTOR1_COLOR      (26<<5)
+#define     R200_TXC_ARG_B_TFACTOR1_ALPHA      (27<<5)
+#define     R200_TXC_ARG_B_MASK			(31 << 5)
+#define     R200_TXC_ARG_B_SHIFT			5
+#define     R200_TXC_ARG_C_ZERO                (0<<10)
+#define     R200_TXC_ARG_C_CURRENT_COLOR       (2<<10)
+#define     R200_TXC_ARG_C_CURRENT_ALPHA       (3<<10)
+#define     R200_TXC_ARG_C_DIFFUSE_COLOR       (4<<10)
+#define     R200_TXC_ARG_C_DIFFUSE_ALPHA       (5<<10)
+#define     R200_TXC_ARG_C_SPECULAR_COLOR      (6<<10)
+#define     R200_TXC_ARG_C_SPECULAR_ALPHA      (7<<10)
+#define     R200_TXC_ARG_C_TFACTOR_COLOR       (8<<10)
+#define     R200_TXC_ARG_C_TFACTOR_ALPHA       (9<<10)
+#define     R200_TXC_ARG_C_R0_COLOR            (10<<10)
+#define     R200_TXC_ARG_C_R0_ALPHA            (11<<10)
+#define     R200_TXC_ARG_C_R1_COLOR            (12<<10)
+#define     R200_TXC_ARG_C_R1_ALPHA            (13<<10)
+#define     R200_TXC_ARG_C_R2_COLOR            (14<<10)
+#define     R200_TXC_ARG_C_R2_ALPHA            (15<<10)
+#define     R200_TXC_ARG_C_R3_COLOR            (16<<10)
+#define     R200_TXC_ARG_C_R3_ALPHA            (17<<10)
+#define     R200_TXC_ARG_C_R4_COLOR            (18<<10)
+#define     R200_TXC_ARG_C_R4_ALPHA            (19<<10)
+#define     R200_TXC_ARG_C_R5_COLOR            (20<<10)
+#define     R200_TXC_ARG_C_R5_ALPHA            (21<<10)
+#define     R200_TXC_ARG_C_TFACTOR1_COLOR      (26<<10)
+#define     R200_TXC_ARG_C_TFACTOR1_ALPHA      (27<<10)
+#define     R200_TXC_ARG_C_MASK			(31 << 10)
+#define     R200_TXC_ARG_C_SHIFT			10
+#define     R200_TXC_COMP_ARG_A                    (1 << 16)
+#define     R200_TXC_COMP_ARG_A_SHIFT              (16)
+#define     R200_TXC_BIAS_ARG_A                    (1 << 17)
+#define     R200_TXC_SCALE_ARG_A                   (1 << 18)
+#define     R200_TXC_NEG_ARG_A                     (1 << 19)
+#define     R200_TXC_COMP_ARG_B                    (1 << 20)
+#define     R200_TXC_COMP_ARG_B_SHIFT              (20)
+#define     R200_TXC_BIAS_ARG_B                    (1 << 21)
+#define     R200_TXC_SCALE_ARG_B                   (1 << 22)
+#define     R200_TXC_NEG_ARG_B                     (1 << 23)
+#define     R200_TXC_COMP_ARG_C                    (1 << 24)
+#define     R200_TXC_COMP_ARG_C_SHIFT              (24)
+#define     R200_TXC_BIAS_ARG_C                    (1 << 25)
+#define     R200_TXC_SCALE_ARG_C                   (1 << 26)
+#define     R200_TXC_NEG_ARG_C                     (1 << 27)
+#define     R200_TXC_OP_MADD                        (0 << 28)
+#define     R200_TXC_OP_CND0                       (2 << 28)
+#define     R200_TXC_OP_LERP                       (3 << 28)
+#define     R200_TXC_OP_DOT3                       (4 << 28)
+#define     R200_TXC_OP_DOT4                       (5 << 28)
+#define     R200_TXC_OP_CONDITIONAL                (6 << 28)
+#define     R200_TXC_OP_DOT2_ADD                   (7 << 28)
+#define     R200_TXC_OP_MASK                       (7 << 28)
+#define R200_PP_TXCBLEND2_0                0x2f04
+#define     R200_TXC_TFACTOR_SEL_SHIFT             0
+#define     R200_TXC_TFACTOR_SEL_MASK              0x7
+#define     R200_TXC_TFACTOR1_SEL_SHIFT            4
+#define     R200_TXC_TFACTOR1_SEL_MASK             (0x7 << 4)
+#define     R200_TXC_SCALE_SHIFT                   8
+#define     R200_TXC_SCALE_MASK                    (7 << 8)
+#define     R200_TXC_SCALE_1X                      (0 << 8)
+#define     R200_TXC_SCALE_2X                      (1 << 8)
+#define     R200_TXC_SCALE_4X                      (2 << 8)
+#define     R200_TXC_SCALE_8X                      (3 << 8)
+#define     R200_TXC_SCALE_INV2                    (5 << 8)
+#define     R200_TXC_SCALE_INV4                    (6 << 8)
+#define     R200_TXC_SCALE_INV8                    (7 << 8)
+#define     R200_TXC_CLAMP_SHIFT                   12
+#define     R200_TXC_CLAMP_MASK                    (3 << 12)
+#define     R200_TXC_CLAMP_WRAP                    (0 << 12)
+#define     R200_TXC_CLAMP_0_1                     (1 << 12)
+#define     R200_TXC_CLAMP_8_8                     (2 << 12)
+#define     R200_TXC_OUTPUT_REG_SHIFT              16
+#define     R200_TXC_OUTPUT_REG_MASK               (7 << 16)
+#define     R200_TXC_OUTPUT_REG_NONE               (0 << 16)
+#define     R200_TXC_OUTPUT_REG_R0                 (1 << 16)
+#define     R200_TXC_OUTPUT_REG_R1                 (2 << 16)
+#define     R200_TXC_OUTPUT_REG_R2                 (3 << 16)
+#define     R200_TXC_OUTPUT_REG_R3                 (4 << 16)
+#define     R200_TXC_OUTPUT_REG_R4                 (5 << 16)
+#define     R200_TXC_OUTPUT_REG_R5                 (6 << 16)
+#define     R200_TXC_OUTPUT_MASK_MASK              (7 << 20)
+#define     R200_TXC_OUTPUT_MASK_RGB               (0 << 20)
+#define     R200_TXC_OUTPUT_MASK_RG                (1 << 20)
+#define     R200_TXC_OUTPUT_MASK_RB                (2 << 20)
+#define     R200_TXC_OUTPUT_MASK_R                 (3 << 20)
+#define     R200_TXC_OUTPUT_MASK_GB                (4 << 20)
+#define     R200_TXC_OUTPUT_MASK_G                 (5 << 20)
+#define     R200_TXC_OUTPUT_MASK_B                 (6 << 20)
+#define     R200_TXC_OUTPUT_MASK_NONE              (7 << 20)
+#define     R200_TXC_OUTPUT_ROTATE_RGB             (0 << 24)
+#define     R200_TXC_OUTPUT_ROTATE_ARG             (1 << 24)
+#define     R200_TXC_OUTPUT_ROTATE_GBA             (2 << 24)
+#define     R200_TXC_OUTPUT_ROTATE_RGA             (3 << 24)
+#define     R200_TXC_REPL_NORMAL                   0
+#define     R200_TXC_REPL_RED                      1
+#define     R200_TXC_REPL_GREEN                    2
+#define     R200_TXC_REPL_BLUE                     3
+#define     R200_TXC_REPL_ARG_A_SHIFT              26
+#define     R200_TXC_REPL_ARG_A_MASK               (3 << 26)
+#define     R200_TXC_REPL_ARG_B_SHIFT              28
+#define     R200_TXC_REPL_ARG_B_MASK               (3 << 28)
+#define     R200_TXC_REPL_ARG_C_SHIFT              30
+#define     R200_TXC_REPL_ARG_C_MASK               (3 << 30)
+#define R200_PP_TXABLEND_0                0x2f08
+#define     R200_TXA_ARG_A_ZERO              (0)
+#define     R200_TXA_ARG_A_CURRENT_ALPHA     (2) /* guess */
+#define     R200_TXA_ARG_A_CURRENT_BLUE      (3) /* guess */
+#define     R200_TXA_ARG_A_DIFFUSE_ALPHA     (4)
+#define     R200_TXA_ARG_A_DIFFUSE_BLUE      (5)
+#define     R200_TXA_ARG_A_SPECULAR_ALPHA    (6)
+#define     R200_TXA_ARG_A_SPECULAR_BLUE     (7)
+#define     R200_TXA_ARG_A_TFACTOR_ALPHA     (8)
+#define     R200_TXA_ARG_A_TFACTOR_BLUE      (9)
+#define     R200_TXA_ARG_A_R0_ALPHA          (10)
+#define     R200_TXA_ARG_A_R0_BLUE           (11)
+#define     R200_TXA_ARG_A_R1_ALPHA          (12)
+#define     R200_TXA_ARG_A_R1_BLUE           (13)
+#define     R200_TXA_ARG_A_R2_ALPHA          (14)
+#define     R200_TXA_ARG_A_R2_BLUE           (15)
+#define     R200_TXA_ARG_A_R3_ALPHA          (16)
+#define     R200_TXA_ARG_A_R3_BLUE           (17)
+#define     R200_TXA_ARG_A_R4_ALPHA          (18)
+#define     R200_TXA_ARG_A_R4_BLUE           (19)
+#define     R200_TXA_ARG_A_R5_ALPHA          (20)
+#define     R200_TXA_ARG_A_R5_BLUE           (21)
+#define     R200_TXA_ARG_A_TFACTOR1_ALPHA    (26)
+#define     R200_TXA_ARG_A_TFACTOR1_BLUE     (27)
+#define     R200_TXA_ARG_A_MASK			(31 << 0)
+#define     R200_TXA_ARG_A_SHIFT			0
+#define     R200_TXA_ARG_B_ZERO              (0<<5)
+#define     R200_TXA_ARG_B_CURRENT_ALPHA     (2<<5) /* guess */
+#define     R200_TXA_ARG_B_CURRENT_BLUE      (3<<5) /* guess */
+#define     R200_TXA_ARG_B_DIFFUSE_ALPHA     (4<<5)
+#define     R200_TXA_ARG_B_DIFFUSE_BLUE      (5<<5)
+#define     R200_TXA_ARG_B_SPECULAR_ALPHA    (6<<5)
+#define     R200_TXA_ARG_B_SPECULAR_BLUE     (7<<5)
+#define     R200_TXA_ARG_B_TFACTOR_ALPHA     (8<<5)
+#define     R200_TXA_ARG_B_TFACTOR_BLUE      (9<<5)
+#define     R200_TXA_ARG_B_R0_ALPHA          (10<<5)
+#define     R200_TXA_ARG_B_R0_BLUE           (11<<5)
+#define     R200_TXA_ARG_B_R1_ALPHA          (12<<5)
+#define     R200_TXA_ARG_B_R1_BLUE           (13<<5)
+#define     R200_TXA_ARG_B_R2_ALPHA          (14<<5)
+#define     R200_TXA_ARG_B_R2_BLUE           (15<<5)
+#define     R200_TXA_ARG_B_R3_ALPHA          (16<<5)
+#define     R200_TXA_ARG_B_R3_BLUE           (17<<5)
+#define     R200_TXA_ARG_B_R4_ALPHA          (18<<5)
+#define     R200_TXA_ARG_B_R4_BLUE           (19<<5)
+#define     R200_TXA_ARG_B_R5_ALPHA          (20<<5)
+#define     R200_TXA_ARG_B_R5_BLUE           (21<<5)
+#define     R200_TXA_ARG_B_TFACTOR1_ALPHA    (26<<5)
+#define     R200_TXA_ARG_B_TFACTOR1_BLUE     (27<<5)
+#define     R200_TXA_ARG_B_MASK			(31 << 5)
+#define     R200_TXA_ARG_B_SHIFT			5
+#define     R200_TXA_ARG_C_ZERO              (0<<10)
+#define     R200_TXA_ARG_C_CURRENT_ALPHA     (2<<10) /* guess */
+#define     R200_TXA_ARG_C_CURRENT_BLUE      (3<<10) /* guess */
+#define     R200_TXA_ARG_C_DIFFUSE_ALPHA     (4<<10)
+#define     R200_TXA_ARG_C_DIFFUSE_BLUE      (5<<10)
+#define     R200_TXA_ARG_C_SPECULAR_ALPHA    (6<<10)
+#define     R200_TXA_ARG_C_SPECULAR_BLUE     (7<<10)
+#define     R200_TXA_ARG_C_TFACTOR_ALPHA     (8<<10)
+#define     R200_TXA_ARG_C_TFACTOR_BLUE      (9<<10)
+#define     R200_TXA_ARG_C_R0_ALPHA          (10<<10)
+#define     R200_TXA_ARG_C_R0_BLUE           (11<<10)
+#define     R200_TXA_ARG_C_R1_ALPHA          (12<<10)
+#define     R200_TXA_ARG_C_R1_BLUE           (13<<10)
+#define     R200_TXA_ARG_C_R2_ALPHA          (14<<10)
+#define     R200_TXA_ARG_C_R2_BLUE           (15<<10)
+#define     R200_TXA_ARG_C_R3_ALPHA          (16<<10)
+#define     R200_TXA_ARG_C_R3_BLUE           (17<<10)
+#define     R200_TXA_ARG_C_R4_ALPHA          (18<<10)
+#define     R200_TXA_ARG_C_R4_BLUE           (19<<10)
+#define     R200_TXA_ARG_C_R5_ALPHA          (20<<10)
+#define     R200_TXA_ARG_C_R5_BLUE           (21<<10)
+#define     R200_TXA_ARG_C_TFACTOR1_ALPHA    (26<<10)
+#define     R200_TXA_ARG_C_TFACTOR1_BLUE     (27<<10)
+#define     R200_TXA_ARG_C_MASK			(31 << 10)
+#define     R200_TXA_ARG_C_SHIFT			10
+#define     R200_TXA_COMP_ARG_A                    (1 << 16)
+#define     R200_TXA_COMP_ARG_A_SHIFT              (16)
+#define     R200_TXA_BIAS_ARG_A                    (1 << 17)
+#define     R200_TXA_SCALE_ARG_A                   (1 << 18)
+#define     R200_TXA_NEG_ARG_A                     (1 << 19)
+#define     R200_TXA_COMP_ARG_B                    (1 << 20)
+#define     R200_TXA_COMP_ARG_B_SHIFT              (20)
+#define     R200_TXA_BIAS_ARG_B                    (1 << 21)
+#define     R200_TXA_SCALE_ARG_B                   (1 << 22)
+#define     R200_TXA_NEG_ARG_B                     (1 << 23)
+#define     R200_TXA_COMP_ARG_C                    (1 << 24)
+#define     R200_TXA_COMP_ARG_C_SHIFT              (24)
+#define     R200_TXA_BIAS_ARG_C                    (1 << 25)
+#define     R200_TXA_SCALE_ARG_C                   (1 << 26)
+#define     R200_TXA_NEG_ARG_C                     (1 << 27)
+#define     R200_TXA_OP_MADD                       (0 << 28)
+#define     R200_TXA_OP_CND0                       (2 << 28)
+#define     R200_TXA_OP_LERP                       (3 << 28)
+#define     R200_TXA_OP_CONDITIONAL                (6 << 28)
+#define     R200_TXA_OP_MASK                       (7 << 28)
+#define R200_PP_TXABLEND2_0                0x2f0c
+#define     R200_TXA_TFACTOR_SEL_SHIFT             0
+#define     R200_TXA_TFACTOR_SEL_MASK              0x7
+#define     R200_TXA_TFACTOR1_SEL_SHIFT            4
+#define     R200_TXA_TFACTOR1_SEL_MASK             (0x7 << 4)
+#define     R200_TXA_SCALE_SHIFT                   8
+#define     R200_TXA_SCALE_MASK                    (7 << 8)
+#define     R200_TXA_SCALE_1X                      (0 << 8)
+#define     R200_TXA_SCALE_2X                      (1 << 8)
+#define     R200_TXA_SCALE_4X                      (2 << 8)
+#define     R200_TXA_SCALE_8X                      (3 << 8)
+#define     R200_TXA_SCALE_INV2                    (5 << 8)
+#define     R200_TXA_SCALE_INV4                    (6 << 8)
+#define     R200_TXA_SCALE_INV8                    (7 << 8)
+#define     R200_TXA_CLAMP_SHIFT                   12
+#define     R200_TXA_CLAMP_MASK                    (3 << 12)
+#define     R200_TXA_CLAMP_WRAP                    (0 << 12)
+#define     R200_TXA_CLAMP_0_1                     (1 << 12)
+#define     R200_TXA_CLAMP_8_8                     (2 << 12)
+#define     R200_TXA_OUTPUT_REG_SHIFT              16
+#define     R200_TXA_OUTPUT_REG_MASK               (7 << 16)
+#define     R200_TXA_OUTPUT_REG_NONE               (0 << 16)
+#define     R200_TXA_OUTPUT_REG_R0                 (1 << 16)
+#define     R200_TXA_OUTPUT_REG_R1                 (2 << 16)
+#define     R200_TXA_OUTPUT_REG_R2                 (3 << 16)
+#define     R200_TXA_OUTPUT_REG_R3                 (4 << 16)
+#define     R200_TXA_OUTPUT_REG_R4                 (5 << 16)
+#define     R200_TXA_OUTPUT_REG_R5                 (6 << 16)
+#define     R200_TXA_DOT_ALPHA                     (1 << 20)
+#define     R200_TXA_REPL_NORMAL                   0
+#define     R200_TXA_REPL_RED                      1
+#define     R200_TXA_REPL_GREEN                    2
+#define     R200_TXA_REPL_ARG_A_SHIFT              26
+#define     R200_TXA_REPL_ARG_A_MASK               (3 << 26)
+#define     R200_TXA_REPL_ARG_B_SHIFT              28
+#define     R200_TXA_REPL_ARG_B_MASK               (3 << 28)
+#define     R200_TXA_REPL_ARG_C_SHIFT              30
+#define     R200_TXA_REPL_ARG_C_MASK               (3 << 30)
+#define R200_PP_TXCBLEND_1                0x2f10
+#define R200_PP_TXCBLEND2_1               0x2f14
+#define R200_PP_TXABLEND_1                0x2f18
+#define R200_PP_TXABLEND2_1               0x2f1c
+#define R200_PP_TXCBLEND_2                0x2f20
+#define R200_PP_TXCBLEND2_2               0x2f24
+#define R200_PP_TXABLEND_2                0x2f28
+#define R200_PP_TXABLEND2_2               0x2f2c
+#define R200_PP_TXCBLEND_3                0x2f30
+#define R200_PP_TXCBLEND2_3               0x2f34
+#define R200_PP_TXABLEND_3                0x2f38
+#define R200_PP_TXABLEND2_3               0x2f3c
+#define R200_PP_TXCBLEND_4                0x2f40
+#define R200_PP_TXCBLEND2_4               0x2f44
+#define R200_PP_TXABLEND_4                0x2f48
+#define R200_PP_TXABLEND2_4               0x2f4c
+#define R200_PP_TXCBLEND_5                0x2f50
+#define R200_PP_TXCBLEND2_5               0x2f54
+#define R200_PP_TXABLEND_5                0x2f58
+#define R200_PP_TXABLEND2_5               0x2f5c
+#define R200_PP_TXCBLEND_6                0x2f60
+#define R200_PP_TXCBLEND2_6               0x2f64
+#define R200_PP_TXABLEND_6                0x2f68
+#define R200_PP_TXABLEND2_6               0x2f6c
+#define R200_PP_TXCBLEND_7                0x2f70
+#define R200_PP_TXCBLEND2_7               0x2f74
+#define R200_PP_TXABLEND_7                0x2f78
+#define R200_PP_TXABLEND2_7               0x2f7c
+#define R200_PP_TXCBLEND_8                0x2f80
+#define R200_PP_TXCBLEND2_8               0x2f84
+#define R200_PP_TXABLEND_8                0x2f88
+#define R200_PP_TXABLEND2_8               0x2f8c
+#define R200_PP_TXCBLEND_9                0x2f90
+#define R200_PP_TXCBLEND2_9               0x2f94
+#define R200_PP_TXABLEND_9                0x2f98
+#define R200_PP_TXABLEND2_9               0x2f9c
+#define R200_PP_TXCBLEND_10               0x2fa0
+#define R200_PP_TXCBLEND2_10              0x2fa4
+#define R200_PP_TXABLEND_10               0x2fa8
+#define R200_PP_TXABLEND2_10              0x2fac
+#define R200_PP_TXCBLEND_11               0x2fb0
+#define R200_PP_TXCBLEND2_11              0x2fb4
+#define R200_PP_TXABLEND_11               0x2fb8
+#define R200_PP_TXABLEND2_11              0x2fbc
+#define R200_PP_TXCBLEND_12               0x2fc0
+#define R200_PP_TXCBLEND2_12              0x2fc4
+#define R200_PP_TXABLEND_12               0x2fc8
+#define R200_PP_TXABLEND2_12              0x2fcc
+#define R200_PP_TXCBLEND_13               0x2fd0
+#define R200_PP_TXCBLEND2_13              0x2fd4
+#define R200_PP_TXABLEND_13               0x2fd8
+#define R200_PP_TXABLEND2_13              0x2fdc
+#define R200_PP_TXCBLEND_14               0x2fe0
+#define R200_PP_TXCBLEND2_14              0x2fe4
+#define R200_PP_TXABLEND_14               0x2fe8
+#define R200_PP_TXABLEND2_14              0x2fec
+#define R200_PP_TXCBLEND_15               0x2ff0
+#define R200_PP_TXCBLEND2_15              0x2ff4
+#define R200_PP_TXABLEND_15               0x2ff8
+#define R200_PP_TXABLEND2_15              0x2ffc
+/* gap */
+#define R200_RB3D_BLENDCOLOR               0x3218 /* ARGB 8888 */
+#define R200_RB3D_ABLENDCNTL               0x321C /* see BLENDCTL */
+#define R200_RB3D_CBLENDCNTL               0x3220 /* see BLENDCTL */
+
+
+/*
+ * Offsets in TCL vector state.  NOTE: Hardwiring matrix positions.
+ * Multiple contexts could collaberate to eliminate state bouncing.
+ */
+#define R200_VS_LIGHT_AMBIENT_ADDR          0x00000028
+#define R200_VS_LIGHT_DIFFUSE_ADDR          0x00000030
+#define R200_VS_LIGHT_SPECULAR_ADDR         0x00000038
+#define R200_VS_LIGHT_DIRPOS_ADDR           0x00000040
+#define R200_VS_LIGHT_HWVSPOT_ADDR          0x00000048
+#define R200_VS_LIGHT_ATTENUATION_ADDR      0x00000050
+#define R200_VS_SPOT_DUAL_CONE              0x00000058
+#define R200_VS_GLOBAL_AMBIENT_ADDR         0x0000005C
+#define R200_VS_FOG_PARAM_ADDR              0x0000005D
+#define R200_VS_EYE_VECTOR_ADDR             0x0000005E
+#define R200_VS_UCP_ADDR                    0x00000060
+#define R200_VS_PNT_SPRITE_VPORT_SCALE      0x00000068
+#define R200_VS_MATRIX_0_MV                 0x00000080
+#define R200_VS_MATRIX_1_INV_MV        	    0x00000084
+#define R200_VS_MATRIX_2_MVP        	    0x00000088
+#define R200_VS_MATRIX_3_TEX0        	    0x0000008C
+#define R200_VS_MATRIX_4_TEX1        	    0x00000090
+#define R200_VS_MATRIX_5_TEX2        	    0x00000094
+#define R200_VS_MATRIX_6_TEX3        	    0x00000098
+#define R200_VS_MATRIX_7_TEX4        	    0x0000009C
+#define R200_VS_MATRIX_8_TEX5        	    0x000000A0
+#define R200_VS_MAT_0_EMISS                 0x000000B0
+#define R200_VS_MAT_0_AMB                   0x000000B1
+#define R200_VS_MAT_0_DIF                   0x000000B2
+#define R200_VS_MAT_0_SPEC                  0x000000B3
+#define R200_VS_MAT_1_EMISS                 0x000000B4
+#define R200_VS_MAT_1_AMB                   0x000000B5
+#define R200_VS_MAT_1_DIF                   0x000000B6
+#define R200_VS_MAT_1_SPEC                  0x000000B7
+#define R200_VS_EYE2CLIP_MTX                0x000000B8
+#define R200_VS_PNT_SPRITE_ATT_CONST        0x000000BC
+#define R200_VS_PNT_SPRITE_EYE_IN_MODEL     0x000000BD
+#define R200_VS_PNT_SPRITE_CLAMP            0x000000BE
+#define R200_VS_MAX                         0x000001C0
+
+#define R200_PVS_PROG0                      0x00000080
+#define R200_PVS_PROG1                      0x00000180
+#define R200_PVS_PARAM0                     0x00000000
+#define R200_PVS_PARAM1                     0x00000100
+
+/*
+ * Offsets in TCL scalar state
+ */
+#define R200_SS_LIGHT_DCD_ADDR              0x00000000
+#define R200_SS_LIGHT_DCM_ADDR              0x00000008
+#define R200_SS_LIGHT_SPOT_EXPONENT_ADDR    0x00000010
+#define R200_SS_LIGHT_SPOT_CUTOFF_ADDR      0x00000018
+#define R200_SS_LIGHT_SPECULAR_THRESH_ADDR  0x00000020
+#define R200_SS_LIGHT_RANGE_CUTOFF_SQRD     0x00000028
+#define R200_SS_LIGHT_RANGE_ATT_CONST       0x00000030
+#define R200_SS_VERT_GUARD_CLIP_ADJ_ADDR    0x00000080
+#define R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR 0x00000081
+#define R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR    0x00000082
+#define R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR 0x00000083
+#define R200_SS_MAT_0_SHININESS             0x00000100
+#define R200_SS_MAT_1_SHININESS             0x00000101
+
+
+/*
+ * Matrix indices
+ */
+#define R200_MTX_MV                        0
+#define R200_MTX_IMV                       1
+#define R200_MTX_MVP                       2
+#define R200_MTX_TEX0                      3
+#define R200_MTX_TEX1                      4
+#define R200_MTX_TEX2                      5
+#define R200_MTX_TEX3                      6
+#define R200_MTX_TEX4                      7
+#define R200_MTX_TEX5                      8
+
+/* Color formats for 2d packets
+ */
+#define R200_CP_COLOR_FORMAT_CI8	2
+#define R200_CP_COLOR_FORMAT_ARGB1555	3
+#define R200_CP_COLOR_FORMAT_RGB565	4
+#define R200_CP_COLOR_FORMAT_ARGB8888	6
+#define R200_CP_COLOR_FORMAT_RGB332	7
+#define R200_CP_COLOR_FORMAT_RGB8	9
+#define R200_CP_COLOR_FORMAT_ARGB4444	15
+
+
+/*
+ * CP type-3 packets
+ */
+#define R200_CP_CMD_NOP                 0xC0001000
+#define R200_CP_CMD_NEXT_CHAR           0xC0001900
+#define R200_CP_CMD_PLY_NEXTSCAN        0xC0001D00
+#define R200_CP_CMD_SET_SCISSORS        0xC0001E00
+#define R200_CP_CMD_LOAD_MICROCODE      0xC0002400
+#define R200_CP_CMD_WAIT_FOR_IDLE       0xC0002600
+#define R200_CP_CMD_3D_DRAW_VBUF        0xC0002800
+#define R200_CP_CMD_3D_DRAW_IMMD        0xC0002900
+#define R200_CP_CMD_3D_DRAW_INDX        0xC0002A00
+#define R200_CP_CMD_LOAD_PALETTE        0xC0002C00
+#define R200_CP_CMD_3D_LOAD_VBPNTR      0xC0002F00
+#define R200_CP_CMD_INDX_BUFFER         0xC0003300
+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
+#define R200_CP_CMD_PAINT		0xC0009100
+#define R200_CP_CMD_BITBLT		0xC0009200
+#define R200_CP_CMD_SMALLTEXT		0xC0009300
+#define R200_CP_CMD_HOSTDATA_BLT	0xC0009400
+#define R200_CP_CMD_POLYLINE		0xC0009500
+#define R200_CP_CMD_POLYSCANLINES	0xC0009800
+#define R200_CP_CMD_PAINT_MULTI		0xC0009A00
+#define R200_CP_CMD_BITBLT_MULTI	0xC0009B00
+#define R200_CP_CMD_TRANS_BITBLT	0xC0009C00
+
+#endif
+
diff --git a/src/mesa/drivers/dri/r200/r200_sanity.c b/src/mesa/drivers/dri/r200/r200_sanity.c
new file mode 100644
index 0000000000..a439fd84ed
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_sanity.c
@@ -0,0 +1,1456 @@
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+ 
+#include <errno.h> 
+
+#include "main/glheader.h"
+#include "main/imports.h"
+
+#include "r200_context.h"
+#include "r200_sanity.h"
+#include "radeon_reg.h"
+#include "r200_reg.h"
+
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 1
+
+#if MORE_VERBOSE
+#define VERBOSE (R200_DEBUG & RADEON_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (R200_DEBUG & RADEON_VERBOSE)
+#endif
+
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.  
+ */
+static struct { 
+   int start; 
+   int len; 
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+   { R200_PP_TXCBLEND_0, 4, "R200_EMIT_PP_TXCBLEND_0" },
+   { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
+   { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
+   { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
+   { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
+   { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
+   { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
+   { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+   { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
+   { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
+   { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
+   { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
+   { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
+   { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
+   { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
+   { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
+   { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
+   { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
+   { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
+   { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
+   { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
+   { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
+   { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
+   { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
+   { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
+   { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
+   { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+   { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
+   { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
+   { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
+   { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
+   { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
+   { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
+   { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
+   { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
+   { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
+   { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+   { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
+   { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
+   { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
+   { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
+   { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
+   { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
+   { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
+   { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
+   { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
+   { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
+   { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
+   { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
+   { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
+   { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+   { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0" },
+   { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0" },
+   { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1" },
+   { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0" },
+   { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2" },
+   { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0" },
+   { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF" },
+   { R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},   /* 85 */
+   { R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+   { R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+   { R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+   { R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+   { R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+   { R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+   { R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+   { R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+   { R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+
+struct reg_names {
+   int idx;
+   const char *name;
+};
+
+static struct reg_names reg_names[] = {
+   { R200_PP_MISC, "R200_PP_MISC" },
+   { R200_PP_FOG_COLOR, "R200_PP_FOG_COLOR" },
+   { R200_RE_SOLID_COLOR, "R200_RE_SOLID_COLOR" },
+   { R200_RB3D_BLENDCNTL, "R200_RB3D_BLENDCNTL" },
+   { R200_RB3D_DEPTHOFFSET, "R200_RB3D_DEPTHOFFSET" },
+   { R200_RB3D_DEPTHPITCH, "R200_RB3D_DEPTHPITCH" },
+   { R200_RB3D_ZSTENCILCNTL, "R200_RB3D_ZSTENCILCNTL" },
+   { R200_PP_CNTL, "R200_PP_CNTL" },
+   { R200_RB3D_CNTL, "R200_RB3D_CNTL" },
+   { R200_RB3D_COLOROFFSET, "R200_RB3D_COLOROFFSET" },
+   { R200_RE_WIDTH_HEIGHT, "R200_RE_WIDTH_HEIGHT" },
+   { R200_RB3D_COLORPITCH, "R200_RB3D_COLORPITCH" },
+   { R200_SE_CNTL, "R200_SE_CNTL" },
+   { R200_RE_CNTL, "R200_RE_CNTL" },
+   { R200_RE_MISC, "R200_RE_MISC" },
+   { R200_RE_STIPPLE_ADDR, "R200_RE_STIPPLE_ADDR" },
+   { R200_RE_STIPPLE_DATA, "R200_RE_STIPPLE_DATA" },
+   { R200_RE_LINE_PATTERN, "R200_RE_LINE_PATTERN" },
+   { R200_RE_LINE_STATE, "R200_RE_LINE_STATE" },
+   { R200_RE_SCISSOR_TL_0, "R200_RE_SCISSOR_TL_0" },
+   { R200_RE_SCISSOR_BR_0, "R200_RE_SCISSOR_BR_0" },
+   { R200_RE_SCISSOR_TL_1, "R200_RE_SCISSOR_TL_1" },
+   { R200_RE_SCISSOR_BR_1, "R200_RE_SCISSOR_BR_1" },
+   { R200_RE_SCISSOR_TL_2, "R200_RE_SCISSOR_TL_2" },
+   { R200_RE_SCISSOR_BR_2, "R200_RE_SCISSOR_BR_2" },
+   { R200_RB3D_DEPTHXY_OFFSET, "R200_RB3D_DEPTHXY_OFFSET" },
+   { R200_RB3D_STENCILREFMASK, "R200_RB3D_STENCILREFMASK" },
+   { R200_RB3D_ROPCNTL, "R200_RB3D_ROPCNTL" },
+   { R200_RB3D_PLANEMASK, "R200_RB3D_PLANEMASK" },
+   { R200_SE_VPORT_XSCALE, "R200_SE_VPORT_XSCALE" },
+   { R200_SE_VPORT_XOFFSET, "R200_SE_VPORT_XOFFSET" },
+   { R200_SE_VPORT_YSCALE, "R200_SE_VPORT_YSCALE" },
+   { R200_SE_VPORT_YOFFSET, "R200_SE_VPORT_YOFFSET" },
+   { R200_SE_VPORT_ZSCALE, "R200_SE_VPORT_ZSCALE" },
+   { R200_SE_VPORT_ZOFFSET, "R200_SE_VPORT_ZOFFSET" },
+   { R200_SE_ZBIAS_FACTOR, "R200_SE_ZBIAS_FACTOR" },
+   { R200_SE_ZBIAS_CONSTANT, "R200_SE_ZBIAS_CONSTANT" },
+   { R200_SE_LINE_WIDTH, "R200_SE_LINE_WIDTH" },
+   { R200_SE_VAP_CNTL, "R200_SE_VAP_CNTL" },
+   { R200_SE_VF_CNTL, "R200_SE_VF_CNTL" },
+   { R200_SE_VTX_FMT_0, "R200_SE_VTX_FMT_0" },
+   { R200_SE_VTX_FMT_1, "R200_SE_VTX_FMT_1" },
+   { R200_SE_TCL_OUTPUT_VTX_FMT_0, "R200_SE_TCL_OUTPUT_VTX_FMT_0" },
+   { R200_SE_TCL_OUTPUT_VTX_FMT_1, "R200_SE_TCL_OUTPUT_VTX_FMT_1" },
+   { R200_SE_VTE_CNTL, "R200_SE_VTE_CNTL" },
+   { R200_SE_VTX_NUM_ARRAYS, "R200_SE_VTX_NUM_ARRAYS" },
+   { R200_SE_VTX_AOS_ATTR01, "R200_SE_VTX_AOS_ATTR01" },
+   { R200_SE_VTX_AOS_ADDR0, "R200_SE_VTX_AOS_ADDR0" },
+   { R200_SE_VTX_AOS_ADDR1, "R200_SE_VTX_AOS_ADDR1" },
+   { R200_SE_VTX_AOS_ATTR23, "R200_SE_VTX_AOS_ATTR23" },
+   { R200_SE_VTX_AOS_ADDR2, "R200_SE_VTX_AOS_ADDR2" },
+   { R200_SE_VTX_AOS_ADDR3, "R200_SE_VTX_AOS_ADDR3" },
+   { R200_SE_VTX_AOS_ATTR45, "R200_SE_VTX_AOS_ATTR45" },
+   { R200_SE_VTX_AOS_ADDR4, "R200_SE_VTX_AOS_ADDR4" },
+   { R200_SE_VTX_AOS_ADDR5, "R200_SE_VTX_AOS_ADDR5" },
+   { R200_SE_VTX_AOS_ATTR67, "R200_SE_VTX_AOS_ATTR67" },
+   { R200_SE_VTX_AOS_ADDR6, "R200_SE_VTX_AOS_ADDR6" },
+   { R200_SE_VTX_AOS_ADDR7, "R200_SE_VTX_AOS_ADDR7" },
+   { R200_SE_VTX_AOS_ATTR89, "R200_SE_VTX_AOS_ATTR89" },
+   { R200_SE_VTX_AOS_ADDR8, "R200_SE_VTX_AOS_ADDR8" },
+   { R200_SE_VTX_AOS_ADDR9, "R200_SE_VTX_AOS_ADDR9" },
+   { R200_SE_VTX_AOS_ATTR1011, "R200_SE_VTX_AOS_ATTR1011" },
+   { R200_SE_VTX_AOS_ADDR10, "R200_SE_VTX_AOS_ADDR10" },
+   { R200_SE_VTX_AOS_ADDR11, "R200_SE_VTX_AOS_ADDR11" },
+   { R200_SE_VF_MAX_VTX_INDX, "R200_SE_VF_MAX_VTX_INDX" },
+   { R200_SE_VF_MIN_VTX_INDX, "R200_SE_VF_MIN_VTX_INDX" },
+   { R200_SE_VTX_STATE_CNTL, "R200_SE_VTX_STATE_CNTL" },
+   { R200_SE_TCL_VECTOR_INDX_REG, "R200_SE_TCL_VECTOR_INDX_REG" },
+   { R200_SE_TCL_VECTOR_DATA_REG, "R200_SE_TCL_VECTOR_DATA_REG" },
+   { R200_SE_TCL_SCALAR_INDX_REG, "R200_SE_TCL_SCALAR_INDX_REG" },
+   { R200_SE_TCL_SCALAR_DATA_REG, "R200_SE_TCL_SCALAR_DATA_REG" },
+   { R200_SE_TCL_MATRIX_SEL_0, "R200_SE_TCL_MATRIX_SEL_0" },
+   { R200_SE_TCL_MATRIX_SEL_1, "R200_SE_TCL_MATRIX_SEL_1" },
+   { R200_SE_TCL_MATRIX_SEL_2, "R200_SE_TCL_MATRIX_SEL_2" },
+   { R200_SE_TCL_MATRIX_SEL_3, "R200_SE_TCL_MATRIX_SEL_3" },
+   { R200_SE_TCL_MATRIX_SEL_4, "R200_SE_TCL_MATRIX_SEL_4" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_0, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+   { R200_SE_TCL_LIGHT_MODEL_CTL_1, "R200_SE_TCL_LIGHT_MODEL_CTL_1" },
+   { R200_SE_TCL_PER_LIGHT_CTL_0, "R200_SE_TCL_PER_LIGHT_CTL_0" },
+   { R200_SE_TCL_PER_LIGHT_CTL_1, "R200_SE_TCL_PER_LIGHT_CTL_1" },
+   { R200_SE_TCL_PER_LIGHT_CTL_2, "R200_SE_TCL_PER_LIGHT_CTL_2" },
+   { R200_SE_TCL_PER_LIGHT_CTL_3, "R200_SE_TCL_PER_LIGHT_CTL_3" },
+   { R200_SE_TCL_TEX_PROC_CTL_2, "R200_SE_TCL_TEX_PROC_CTL_2" },
+   { R200_SE_TCL_TEX_PROC_CTL_3, "R200_SE_TCL_TEX_PROC_CTL_3" },
+   { R200_SE_TCL_TEX_PROC_CTL_0, "R200_SE_TCL_TEX_PROC_CTL_0" },
+   { R200_SE_TCL_TEX_PROC_CTL_1, "R200_SE_TCL_TEX_PROC_CTL_1" },
+   { R200_SE_TC_TEX_CYL_WRAP_CTL, "R200_SE_TC_TEX_CYL_WRAP_CTL" },
+   { R200_SE_TCL_UCP_VERT_BLEND_CTL, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { R200_SE_TCL_POINT_SPRITE_CNTL, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+   { R200_SE_VTX_ST_POS_0_X_4, "R200_SE_VTX_ST_POS_0_X_4" },
+   { R200_SE_VTX_ST_POS_0_Y_4, "R200_SE_VTX_ST_POS_0_Y_4" },
+   { R200_SE_VTX_ST_POS_0_Z_4, "R200_SE_VTX_ST_POS_0_Z_4" },
+   { R200_SE_VTX_ST_POS_0_W_4, "R200_SE_VTX_ST_POS_0_W_4" },
+   { R200_SE_VTX_ST_NORM_0_X, "R200_SE_VTX_ST_NORM_0_X" },
+   { R200_SE_VTX_ST_NORM_0_Y, "R200_SE_VTX_ST_NORM_0_Y" },
+   { R200_SE_VTX_ST_NORM_0_Z, "R200_SE_VTX_ST_NORM_0_Z" },
+   { R200_SE_VTX_ST_PVMS, "R200_SE_VTX_ST_PVMS" },
+   { R200_SE_VTX_ST_CLR_0_R, "R200_SE_VTX_ST_CLR_0_R" },
+   { R200_SE_VTX_ST_CLR_0_G, "R200_SE_VTX_ST_CLR_0_G" },
+   { R200_SE_VTX_ST_CLR_0_B, "R200_SE_VTX_ST_CLR_0_B" },
+   { R200_SE_VTX_ST_CLR_0_A, "R200_SE_VTX_ST_CLR_0_A" },
+   { R200_SE_VTX_ST_CLR_1_R, "R200_SE_VTX_ST_CLR_1_R" },
+   { R200_SE_VTX_ST_CLR_1_G, "R200_SE_VTX_ST_CLR_1_G" },
+   { R200_SE_VTX_ST_CLR_1_B, "R200_SE_VTX_ST_CLR_1_B" },
+   { R200_SE_VTX_ST_CLR_1_A, "R200_SE_VTX_ST_CLR_1_A" },
+   { R200_SE_VTX_ST_CLR_2_R, "R200_SE_VTX_ST_CLR_2_R" },
+   { R200_SE_VTX_ST_CLR_2_G, "R200_SE_VTX_ST_CLR_2_G" },
+   { R200_SE_VTX_ST_CLR_2_B, "R200_SE_VTX_ST_CLR_2_B" },
+   { R200_SE_VTX_ST_CLR_2_A, "R200_SE_VTX_ST_CLR_2_A" },
+   { R200_SE_VTX_ST_CLR_3_R, "R200_SE_VTX_ST_CLR_3_R" },
+   { R200_SE_VTX_ST_CLR_3_G, "R200_SE_VTX_ST_CLR_3_G" },
+   { R200_SE_VTX_ST_CLR_3_B, "R200_SE_VTX_ST_CLR_3_B" },
+   { R200_SE_VTX_ST_CLR_3_A, "R200_SE_VTX_ST_CLR_3_A" },
+   { R200_SE_VTX_ST_CLR_4_R, "R200_SE_VTX_ST_CLR_4_R" },
+   { R200_SE_VTX_ST_CLR_4_G, "R200_SE_VTX_ST_CLR_4_G" },
+   { R200_SE_VTX_ST_CLR_4_B, "R200_SE_VTX_ST_CLR_4_B" },
+   { R200_SE_VTX_ST_CLR_4_A, "R200_SE_VTX_ST_CLR_4_A" },
+   { R200_SE_VTX_ST_CLR_5_R, "R200_SE_VTX_ST_CLR_5_R" },
+   { R200_SE_VTX_ST_CLR_5_G, "R200_SE_VTX_ST_CLR_5_G" },
+   { R200_SE_VTX_ST_CLR_5_B, "R200_SE_VTX_ST_CLR_5_B" },
+   { R200_SE_VTX_ST_CLR_5_A, "R200_SE_VTX_ST_CLR_5_A" },
+   { R200_SE_VTX_ST_CLR_6_R, "R200_SE_VTX_ST_CLR_6_R" },
+   { R200_SE_VTX_ST_CLR_6_G, "R200_SE_VTX_ST_CLR_6_G" },
+   { R200_SE_VTX_ST_CLR_6_B, "R200_SE_VTX_ST_CLR_6_B" },
+   { R200_SE_VTX_ST_CLR_6_A, "R200_SE_VTX_ST_CLR_6_A" },
+   { R200_SE_VTX_ST_CLR_7_R, "R200_SE_VTX_ST_CLR_7_R" },
+   { R200_SE_VTX_ST_CLR_7_G, "R200_SE_VTX_ST_CLR_7_G" },
+   { R200_SE_VTX_ST_CLR_7_B, "R200_SE_VTX_ST_CLR_7_B" },
+   { R200_SE_VTX_ST_CLR_7_A, "R200_SE_VTX_ST_CLR_7_A" },
+   { R200_SE_VTX_ST_TEX_0_S, "R200_SE_VTX_ST_TEX_0_S" },
+   { R200_SE_VTX_ST_TEX_0_T, "R200_SE_VTX_ST_TEX_0_T" },
+   { R200_SE_VTX_ST_TEX_0_R, "R200_SE_VTX_ST_TEX_0_R" },
+   { R200_SE_VTX_ST_TEX_0_Q, "R200_SE_VTX_ST_TEX_0_Q" },
+   { R200_SE_VTX_ST_TEX_1_S, "R200_SE_VTX_ST_TEX_1_S" },
+   { R200_SE_VTX_ST_TEX_1_T, "R200_SE_VTX_ST_TEX_1_T" },
+   { R200_SE_VTX_ST_TEX_1_R, "R200_SE_VTX_ST_TEX_1_R" },
+   { R200_SE_VTX_ST_TEX_1_Q, "R200_SE_VTX_ST_TEX_1_Q" },
+   { R200_SE_VTX_ST_TEX_2_S, "R200_SE_VTX_ST_TEX_2_S" },
+   { R200_SE_VTX_ST_TEX_2_T, "R200_SE_VTX_ST_TEX_2_T" },
+   { R200_SE_VTX_ST_TEX_2_R, "R200_SE_VTX_ST_TEX_2_R" },
+   { R200_SE_VTX_ST_TEX_2_Q, "R200_SE_VTX_ST_TEX_2_Q" },
+   { R200_SE_VTX_ST_TEX_3_S, "R200_SE_VTX_ST_TEX_3_S" },
+   { R200_SE_VTX_ST_TEX_3_T, "R200_SE_VTX_ST_TEX_3_T" },
+   { R200_SE_VTX_ST_TEX_3_R, "R200_SE_VTX_ST_TEX_3_R" },
+   { R200_SE_VTX_ST_TEX_3_Q, "R200_SE_VTX_ST_TEX_3_Q" },
+   { R200_SE_VTX_ST_TEX_4_S, "R200_SE_VTX_ST_TEX_4_S" },
+   { R200_SE_VTX_ST_TEX_4_T, "R200_SE_VTX_ST_TEX_4_T" },
+   { R200_SE_VTX_ST_TEX_4_R, "R200_SE_VTX_ST_TEX_4_R" },
+   { R200_SE_VTX_ST_TEX_4_Q, "R200_SE_VTX_ST_TEX_4_Q" },
+   { R200_SE_VTX_ST_TEX_5_S, "R200_SE_VTX_ST_TEX_5_S" },
+   { R200_SE_VTX_ST_TEX_5_T, "R200_SE_VTX_ST_TEX_5_T" },
+   { R200_SE_VTX_ST_TEX_5_R, "R200_SE_VTX_ST_TEX_5_R" },
+   { R200_SE_VTX_ST_TEX_5_Q, "R200_SE_VTX_ST_TEX_5_Q" },
+   { R200_SE_VTX_ST_PNT_SPRT_SZ, "R200_SE_VTX_ST_PNT_SPRT_SZ" },
+   { R200_SE_VTX_ST_DISC_FOG, "R200_SE_VTX_ST_DISC_FOG" },
+   { R200_SE_VTX_ST_SHININESS_0, "R200_SE_VTX_ST_SHININESS_0" },
+   { R200_SE_VTX_ST_SHININESS_1, "R200_SE_VTX_ST_SHININESS_1" },
+   { R200_SE_VTX_ST_BLND_WT_0, "R200_SE_VTX_ST_BLND_WT_0" },
+   { R200_SE_VTX_ST_BLND_WT_1, "R200_SE_VTX_ST_BLND_WT_1" },
+   { R200_SE_VTX_ST_BLND_WT_2, "R200_SE_VTX_ST_BLND_WT_2" },
+   { R200_SE_VTX_ST_BLND_WT_3, "R200_SE_VTX_ST_BLND_WT_3" },
+   { R200_SE_VTX_ST_POS_1_X, "R200_SE_VTX_ST_POS_1_X" },
+   { R200_SE_VTX_ST_POS_1_Y, "R200_SE_VTX_ST_POS_1_Y" },
+   { R200_SE_VTX_ST_POS_1_Z, "R200_SE_VTX_ST_POS_1_Z" },
+   { R200_SE_VTX_ST_POS_1_W, "R200_SE_VTX_ST_POS_1_W" },
+   { R200_SE_VTX_ST_NORM_1_X, "R200_SE_VTX_ST_NORM_1_X" },
+   { R200_SE_VTX_ST_NORM_1_Y, "R200_SE_VTX_ST_NORM_1_Y" },
+   { R200_SE_VTX_ST_NORM_1_Z, "R200_SE_VTX_ST_NORM_1_Z" },
+   { R200_SE_VTX_ST_USR_CLR_0_R, "R200_SE_VTX_ST_USR_CLR_0_R" },
+   { R200_SE_VTX_ST_USR_CLR_0_G, "R200_SE_VTX_ST_USR_CLR_0_G" },
+   { R200_SE_VTX_ST_USR_CLR_0_B, "R200_SE_VTX_ST_USR_CLR_0_B" },
+   { R200_SE_VTX_ST_USR_CLR_0_A, "R200_SE_VTX_ST_USR_CLR_0_A" },
+   { R200_SE_VTX_ST_USR_CLR_1_R, "R200_SE_VTX_ST_USR_CLR_1_R" },
+   { R200_SE_VTX_ST_USR_CLR_1_G, "R200_SE_VTX_ST_USR_CLR_1_G" },
+   { R200_SE_VTX_ST_USR_CLR_1_B, "R200_SE_VTX_ST_USR_CLR_1_B" },
+   { R200_SE_VTX_ST_USR_CLR_1_A, "R200_SE_VTX_ST_USR_CLR_1_A" },
+   { R200_SE_VTX_ST_CLR_0_PKD, "R200_SE_VTX_ST_CLR_0_PKD" },
+   { R200_SE_VTX_ST_CLR_1_PKD, "R200_SE_VTX_ST_CLR_1_PKD" },
+   { R200_SE_VTX_ST_CLR_2_PKD, "R200_SE_VTX_ST_CLR_2_PKD" },
+   { R200_SE_VTX_ST_CLR_3_PKD, "R200_SE_VTX_ST_CLR_3_PKD" },
+   { R200_SE_VTX_ST_CLR_4_PKD, "R200_SE_VTX_ST_CLR_4_PKD" },
+   { R200_SE_VTX_ST_CLR_5_PKD, "R200_SE_VTX_ST_CLR_5_PKD" },
+   { R200_SE_VTX_ST_CLR_6_PKD, "R200_SE_VTX_ST_CLR_6_PKD" },
+   { R200_SE_VTX_ST_CLR_7_PKD, "R200_SE_VTX_ST_CLR_7_PKD" },
+   { R200_SE_VTX_ST_POS_0_X_2, "R200_SE_VTX_ST_POS_0_X_2" },
+   { R200_SE_VTX_ST_POS_0_Y_2, "R200_SE_VTX_ST_POS_0_Y_2" },
+   { R200_SE_VTX_ST_PAR_CLR_LD, "R200_SE_VTX_ST_PAR_CLR_LD" },
+   { R200_SE_VTX_ST_USR_CLR_PKD, "R200_SE_VTX_ST_USR_CLR_PKD" },
+   { R200_SE_VTX_ST_POS_0_X_3, "R200_SE_VTX_ST_POS_0_X_3" },
+   { R200_SE_VTX_ST_POS_0_Y_3, "R200_SE_VTX_ST_POS_0_Y_3" },
+   { R200_SE_VTX_ST_POS_0_Z_3, "R200_SE_VTX_ST_POS_0_Z_3" },
+   { R200_SE_VTX_ST_END_OF_PKT, "R200_SE_VTX_ST_END_OF_PKT" },
+   { R200_RE_POINTSIZE, "R200_RE_POINTSIZE" },
+   { R200_RE_TOP_LEFT, "R200_RE_TOP_LEFT" },
+   { R200_RE_AUX_SCISSOR_CNTL, "R200_RE_AUX_SCISSOR_CNTL" },
+   { R200_PP_TXFILTER_0, "R200_PP_TXFILTER_0" },
+   { R200_PP_TXFORMAT_0, "R200_PP_TXFORMAT_0" },
+   { R200_PP_TXSIZE_0, "R200_PP_TXSIZE_0" },
+   { R200_PP_TXFORMAT_X_0, "R200_PP_TXFORMAT_X_0" },
+   { R200_PP_TXPITCH_0, "R200_PP_TXPITCH_0" },
+   { R200_PP_BORDER_COLOR_0, "R200_PP_BORDER_COLOR_0" },
+   { R200_PP_CUBIC_FACES_0, "R200_PP_CUBIC_FACES_0" },
+   { R200_PP_TXMULTI_CTL_0, "R200_PP_TXMULTI_CTL_0" },
+   { R200_PP_TXFILTER_1, "R200_PP_TXFILTER_1" },
+   { R200_PP_TXFORMAT_1, "R200_PP_TXFORMAT_1" },
+   { R200_PP_TXSIZE_1, "R200_PP_TXSIZE_1" },
+   { R200_PP_TXFORMAT_X_1, "R200_PP_TXFORMAT_X_1" },
+   { R200_PP_TXPITCH_1, "R200_PP_TXPITCH_1" },
+   { R200_PP_BORDER_COLOR_1, "R200_PP_BORDER_COLOR_1" },
+   { R200_PP_CUBIC_FACES_1, "R200_PP_CUBIC_FACES_1" },
+   { R200_PP_TXMULTI_CTL_1, "R200_PP_TXMULTI_CTL_1" },
+   { R200_PP_TXFILTER_2, "R200_PP_TXFILTER_2" },
+   { R200_PP_TXFORMAT_2, "R200_PP_TXFORMAT_2" },
+   { R200_PP_TXSIZE_2, "R200_PP_TXSIZE_2" },
+   { R200_PP_TXFORMAT_X_2, "R200_PP_TXFORMAT_X_2" },
+   { R200_PP_TXPITCH_2, "R200_PP_TXPITCH_2" },
+   { R200_PP_BORDER_COLOR_2, "R200_PP_BORDER_COLOR_2" },
+   { R200_PP_CUBIC_FACES_2, "R200_PP_CUBIC_FACES_2" },
+   { R200_PP_TXMULTI_CTL_2, "R200_PP_TXMULTI_CTL_2" },
+   { R200_PP_TXFILTER_3, "R200_PP_TXFILTER_3" },
+   { R200_PP_TXFORMAT_3, "R200_PP_TXFORMAT_3" },
+   { R200_PP_TXSIZE_3, "R200_PP_TXSIZE_3" },
+   { R200_PP_TXFORMAT_X_3, "R200_PP_TXFORMAT_X_3" },
+   { R200_PP_TXPITCH_3, "R200_PP_TXPITCH_3" },
+   { R200_PP_BORDER_COLOR_3, "R200_PP_BORDER_COLOR_3" },
+   { R200_PP_CUBIC_FACES_3, "R200_PP_CUBIC_FACES_3" },
+   { R200_PP_TXMULTI_CTL_3, "R200_PP_TXMULTI_CTL_3" },
+   { R200_PP_TXFILTER_4, "R200_PP_TXFILTER_4" },
+   { R200_PP_TXFORMAT_4, "R200_PP_TXFORMAT_4" },
+   { R200_PP_TXSIZE_4, "R200_PP_TXSIZE_4" },
+   { R200_PP_TXFORMAT_X_4, "R200_PP_TXFORMAT_X_4" },
+   { R200_PP_TXPITCH_4, "R200_PP_TXPITCH_4" },
+   { R200_PP_BORDER_COLOR_4, "R200_PP_BORDER_COLOR_4" },
+   { R200_PP_CUBIC_FACES_4, "R200_PP_CUBIC_FACES_4" },
+   { R200_PP_TXMULTI_CTL_4, "R200_PP_TXMULTI_CTL_4" },
+   { R200_PP_TXFILTER_5, "R200_PP_TXFILTER_5" },
+   { R200_PP_TXFORMAT_5, "R200_PP_TXFORMAT_5" },
+   { R200_PP_TXSIZE_5, "R200_PP_TXSIZE_5" },
+   { R200_PP_TXFORMAT_X_5, "R200_PP_TXFORMAT_X_5" },
+   { R200_PP_TXPITCH_5, "R200_PP_TXPITCH_5" },
+   { R200_PP_BORDER_COLOR_5, "R200_PP_BORDER_COLOR_5" },
+   { R200_PP_CUBIC_FACES_5, "R200_PP_CUBIC_FACES_5" },
+   { R200_PP_TXMULTI_CTL_5, "R200_PP_TXMULTI_CTL_5" },
+   { R200_PP_TXOFFSET_0, "R200_PP_TXOFFSET_0" },
+   { R200_PP_CUBIC_OFFSET_F1_0, "R200_PP_CUBIC_OFFSET_F1_0" },
+   { R200_PP_CUBIC_OFFSET_F2_0, "R200_PP_CUBIC_OFFSET_F2_0" },
+   { R200_PP_CUBIC_OFFSET_F3_0, "R200_PP_CUBIC_OFFSET_F3_0" },
+   { R200_PP_CUBIC_OFFSET_F4_0, "R200_PP_CUBIC_OFFSET_F4_0" },
+   { R200_PP_CUBIC_OFFSET_F5_0, "R200_PP_CUBIC_OFFSET_F5_0" },
+   { R200_PP_TXOFFSET_1, "R200_PP_TXOFFSET_1" },
+   { R200_PP_CUBIC_OFFSET_F1_1, "R200_PP_CUBIC_OFFSET_F1_1" },
+   { R200_PP_CUBIC_OFFSET_F2_1, "R200_PP_CUBIC_OFFSET_F2_1" },
+   { R200_PP_CUBIC_OFFSET_F3_1, "R200_PP_CUBIC_OFFSET_F3_1" },
+   { R200_PP_CUBIC_OFFSET_F4_1, "R200_PP_CUBIC_OFFSET_F4_1" },
+   { R200_PP_CUBIC_OFFSET_F5_1, "R200_PP_CUBIC_OFFSET_F5_1" },
+   { R200_PP_TXOFFSET_2, "R200_PP_TXOFFSET_2" },
+   { R200_PP_CUBIC_OFFSET_F1_2, "R200_PP_CUBIC_OFFSET_F1_2" },
+   { R200_PP_CUBIC_OFFSET_F2_2, "R200_PP_CUBIC_OFFSET_F2_2" },
+   { R200_PP_CUBIC_OFFSET_F3_2, "R200_PP_CUBIC_OFFSET_F3_2" },
+   { R200_PP_CUBIC_OFFSET_F4_2, "R200_PP_CUBIC_OFFSET_F4_2" },
+   { R200_PP_CUBIC_OFFSET_F5_2, "R200_PP_CUBIC_OFFSET_F5_2" },
+   { R200_PP_TXOFFSET_3, "R200_PP_TXOFFSET_3" },
+   { R200_PP_CUBIC_OFFSET_F1_3, "R200_PP_CUBIC_OFFSET_F1_3" },
+   { R200_PP_CUBIC_OFFSET_F2_3, "R200_PP_CUBIC_OFFSET_F2_3" },
+   { R200_PP_CUBIC_OFFSET_F3_3, "R200_PP_CUBIC_OFFSET_F3_3" },
+   { R200_PP_CUBIC_OFFSET_F4_3, "R200_PP_CUBIC_OFFSET_F4_3" },
+   { R200_PP_CUBIC_OFFSET_F5_3, "R200_PP_CUBIC_OFFSET_F5_3" },
+   { R200_PP_TXOFFSET_4, "R200_PP_TXOFFSET_4" },
+   { R200_PP_CUBIC_OFFSET_F1_4, "R200_PP_CUBIC_OFFSET_F1_4" },
+   { R200_PP_CUBIC_OFFSET_F2_4, "R200_PP_CUBIC_OFFSET_F2_4" },
+   { R200_PP_CUBIC_OFFSET_F3_4, "R200_PP_CUBIC_OFFSET_F3_4" },
+   { R200_PP_CUBIC_OFFSET_F4_4, "R200_PP_CUBIC_OFFSET_F4_4" },
+   { R200_PP_CUBIC_OFFSET_F5_4, "R200_PP_CUBIC_OFFSET_F5_4" },
+   { R200_PP_TXOFFSET_5, "R200_PP_TXOFFSET_5" },
+   { R200_PP_CUBIC_OFFSET_F1_5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { R200_PP_CUBIC_OFFSET_F2_5, "R200_PP_CUBIC_OFFSET_F2_5" },
+   { R200_PP_CUBIC_OFFSET_F3_5, "R200_PP_CUBIC_OFFSET_F3_5" },
+   { R200_PP_CUBIC_OFFSET_F4_5, "R200_PP_CUBIC_OFFSET_F4_5" },
+   { R200_PP_CUBIC_OFFSET_F5_5, "R200_PP_CUBIC_OFFSET_F5_5" },
+   { R200_PP_TAM_DEBUG3, "R200_PP_TAM_DEBUG3" },
+   { R200_PP_TFACTOR_0, "R200_PP_TFACTOR_0" },
+   { R200_PP_TFACTOR_1, "R200_PP_TFACTOR_1" },
+   { R200_PP_TFACTOR_2, "R200_PP_TFACTOR_2" },
+   { R200_PP_TFACTOR_3, "R200_PP_TFACTOR_3" },
+   { R200_PP_TFACTOR_4, "R200_PP_TFACTOR_4" },
+   { R200_PP_TFACTOR_5, "R200_PP_TFACTOR_5" },
+   { R200_PP_TFACTOR_6, "R200_PP_TFACTOR_6" },
+   { R200_PP_TFACTOR_7, "R200_PP_TFACTOR_7" },
+   { R200_PP_TXCBLEND_0, "R200_PP_TXCBLEND_0" },
+   { R200_PP_TXCBLEND2_0, "R200_PP_TXCBLEND2_0" },
+   { R200_PP_TXABLEND_0, "R200_PP_TXABLEND_0" },
+   { R200_PP_TXABLEND2_0, "R200_PP_TXABLEND2_0" },
+   { R200_PP_TXCBLEND_1, "R200_PP_TXCBLEND_1" },
+   { R200_PP_TXCBLEND2_1, "R200_PP_TXCBLEND2_1" },
+   { R200_PP_TXABLEND_1, "R200_PP_TXABLEND_1" },
+   { R200_PP_TXABLEND2_1, "R200_PP_TXABLEND2_1" },
+   { R200_PP_TXCBLEND_2, "R200_PP_TXCBLEND_2" },
+   { R200_PP_TXCBLEND2_2, "R200_PP_TXCBLEND2_2" },
+   { R200_PP_TXABLEND_2, "R200_PP_TXABLEND_2" },
+   { R200_PP_TXABLEND2_2, "R200_PP_TXABLEND2_2" },
+   { R200_PP_TXCBLEND_3, "R200_PP_TXCBLEND_3" },
+   { R200_PP_TXCBLEND2_3, "R200_PP_TXCBLEND2_3" },
+   { R200_PP_TXABLEND_3, "R200_PP_TXABLEND_3" },
+   { R200_PP_TXABLEND2_3, "R200_PP_TXABLEND2_3" },
+   { R200_PP_TXCBLEND_4, "R200_PP_TXCBLEND_4" },
+   { R200_PP_TXCBLEND2_4, "R200_PP_TXCBLEND2_4" },
+   { R200_PP_TXABLEND_4, "R200_PP_TXABLEND_4" },
+   { R200_PP_TXABLEND2_4, "R200_PP_TXABLEND2_4" },
+   { R200_PP_TXCBLEND_5, "R200_PP_TXCBLEND_5" },
+   { R200_PP_TXCBLEND2_5, "R200_PP_TXCBLEND2_5" },
+   { R200_PP_TXABLEND_5, "R200_PP_TXABLEND_5" },
+   { R200_PP_TXABLEND2_5, "R200_PP_TXABLEND2_5" },
+   { R200_PP_TXCBLEND_6, "R200_PP_TXCBLEND_6" },
+   { R200_PP_TXCBLEND2_6, "R200_PP_TXCBLEND2_6" },
+   { R200_PP_TXABLEND_6, "R200_PP_TXABLEND_6" },
+   { R200_PP_TXABLEND2_6, "R200_PP_TXABLEND2_6" },
+   { R200_PP_TXCBLEND_7, "R200_PP_TXCBLEND_7" },
+   { R200_PP_TXCBLEND2_7, "R200_PP_TXCBLEND2_7" },
+   { R200_PP_TXABLEND_7, "R200_PP_TXABLEND_7" },
+   { R200_PP_TXABLEND2_7, "R200_PP_TXABLEND2_7" },
+   { R200_RB3D_BLENDCOLOR, "R200_RB3D_BLENDCOLOR" },
+   { R200_RB3D_ABLENDCNTL, "R200_RB3D_ABLENDCNTL" },
+   { R200_RB3D_CBLENDCNTL, "R200_RB3D_CBLENDCNTL" },
+   { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+   { R200_PP_CNTL_X, "R200_PP_CNTL_X" },
+   { R200_SE_VAP_CNTL_STATUS, "R200_SE_VAP_CNTL_STATUS" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_1" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_2" },
+   { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_3" },
+   { R200_PP_TRI_PERF, "R200_PP_TRI_PERF" },
+   { R200_PP_PERF_CNTL, "R200_PP_PERF_CNTL" },
+   { R200_PP_TXCBLEND_8, "R200_PP_TXCBLEND_8" },
+   { R200_PP_TXCBLEND2_8, "R200_PP_TXCBLEND2_8" },
+   { R200_PP_TXABLEND_8, "R200_PP_TXABLEND_8" },
+   { R200_PP_TXABLEND2_8, "R200_PP_TXABLEND2_8" },
+   { R200_PP_TXCBLEND_9, "R200_PP_TXCBLEND_9" },
+   { R200_PP_TXCBLEND2_9, "R200_PP_TXCBLEND2_9" },
+   { R200_PP_TXABLEND_9, "R200_PP_TXABLEND_9" },
+   { R200_PP_TXABLEND2_9, "R200_PP_TXABLEND2_9" },
+   { R200_PP_TXCBLEND_10, "R200_PP_TXCBLEND_10" },
+   { R200_PP_TXCBLEND2_10, "R200_PP_TXCBLEND2_10" },
+   { R200_PP_TXABLEND_10, "R200_PP_TXABLEND_10" },
+   { R200_PP_TXABLEND2_10, "R200_PP_TXABLEND2_10" },
+   { R200_PP_TXCBLEND_11, "R200_PP_TXCBLEND_11" },
+   { R200_PP_TXCBLEND2_11, "R200_PP_TXCBLEND2_11" },
+   { R200_PP_TXABLEND_11, "R200_PP_TXABLEND_11" },
+   { R200_PP_TXABLEND2_11, "R200_PP_TXABLEND2_11" },
+   { R200_PP_TXCBLEND_12, "R200_PP_TXCBLEND_12" },
+   { R200_PP_TXCBLEND2_12, "R200_PP_TXCBLEND2_12" },
+   { R200_PP_TXABLEND_12, "R200_PP_TXABLEND_12" },
+   { R200_PP_TXABLEND2_12, "R200_PP_TXABLEND2_12" },
+   { R200_PP_TXCBLEND_13, "R200_PP_TXCBLEND_13" },
+   { R200_PP_TXCBLEND2_13, "R200_PP_TXCBLEND2_13" },
+   { R200_PP_TXABLEND_13, "R200_PP_TXABLEND_13" },
+   { R200_PP_TXABLEND2_13, "R200_PP_TXABLEND2_13" },
+   { R200_PP_TXCBLEND_14, "R200_PP_TXCBLEND_14" },
+   { R200_PP_TXCBLEND2_14, "R200_PP_TXCBLEND2_14" },
+   { R200_PP_TXABLEND_14, "R200_PP_TXABLEND_14" },
+   { R200_PP_TXABLEND2_14, "R200_PP_TXABLEND2_14" },
+   { R200_PP_TXCBLEND_15, "R200_PP_TXCBLEND_15" },
+   { R200_PP_TXCBLEND2_15, "R200_PP_TXCBLEND2_15" },
+   { R200_PP_TXABLEND_15, "R200_PP_TXABLEND_15" },
+   { R200_PP_TXABLEND2_15, "R200_PP_TXABLEND2_15" },
+   { R200_VAP_PVS_CNTL_1, "R200_VAP_PVS_CNTL_1" },
+   { R200_VAP_PVS_CNTL_2, "R200_VAP_PVS_CNTL_2" },
+};
+
+static struct reg_names scalar_names[] = {
+   { R200_SS_LIGHT_DCD_ADDR, "R200_SS_LIGHT_DCD_ADDR" },
+   { R200_SS_LIGHT_DCM_ADDR, "R200_SS_LIGHT_DCM_ADDR" },
+   { R200_SS_LIGHT_SPOT_EXPONENT_ADDR, "R200_SS_LIGHT_SPOT_EXPONENT_ADDR" },
+   { R200_SS_LIGHT_SPOT_CUTOFF_ADDR, "R200_SS_LIGHT_SPOT_CUTOFF_ADDR" },
+   { R200_SS_LIGHT_SPECULAR_THRESH_ADDR, "R200_SS_LIGHT_SPECULAR_THRESH_ADDR" },
+   { R200_SS_LIGHT_RANGE_CUTOFF_SQRD, "R200_SS_LIGHT_RANGE_CUTOFF_SQRD" },
+   { R200_SS_LIGHT_RANGE_ATT_CONST, "R200_SS_LIGHT_RANGE_ATT_CONST" },
+   { R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, "R200_SS_VERT_GUARD_CLIP_ADJ_ADDR" },
+   { R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "R200_SS_VERT_GUARD_DISCARD_ADJ_ADDR" },
+   { R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "R200_SS_HORZ_GUARD_CLIP_ADJ_ADDR" },
+   { R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "R200_SS_HORZ_GUARD_DISCARD_ADJ_ADDR" },
+   { R200_SS_MAT_0_SHININESS, "R200_SS_MAT_0_SHININESS" },
+   { R200_SS_MAT_1_SHININESS, "R200_SS_MAT_1_SHININESS" },
+   { 1000, "" },
+};
+
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { 0, "start" },
+   { R200_VS_LIGHT_AMBIENT_ADDR, "R200_VS_LIGHT_AMBIENT_ADDR" },
+   { R200_VS_LIGHT_DIFFUSE_ADDR, "R200_VS_LIGHT_DIFFUSE_ADDR" },
+   { R200_VS_LIGHT_SPECULAR_ADDR, "R200_VS_LIGHT_SPECULAR_ADDR" },
+   { R200_VS_LIGHT_DIRPOS_ADDR, "R200_VS_LIGHT_DIRPOS_ADDR" },
+   { R200_VS_LIGHT_HWVSPOT_ADDR, "R200_VS_LIGHT_HWVSPOT_ADDR" },
+   { R200_VS_LIGHT_ATTENUATION_ADDR, "R200_VS_LIGHT_ATTENUATION_ADDR" },
+   { R200_VS_SPOT_DUAL_CONE, "R200_VS_SPOT_DUAL_CONE" },
+   { R200_VS_GLOBAL_AMBIENT_ADDR, "R200_VS_GLOBAL_AMBIENT_ADDR" },
+   { R200_VS_FOG_PARAM_ADDR, "R200_VS_FOG_PARAM_ADDR" },
+   { R200_VS_EYE_VECTOR_ADDR, "R200_VS_EYE_VECTOR_ADDR" },
+   { R200_VS_UCP_ADDR, "R200_VS_UCP_ADDR" },
+   { R200_VS_PNT_SPRITE_VPORT_SCALE, "R200_VS_PNT_SPRITE_VPORT_SCALE" },
+   { R200_VS_MATRIX_0_MV, "R200_VS_MATRIX_0_MV" },
+   { R200_VS_MATRIX_1_INV_MV, "R200_VS_MATRIX_1_INV_MV" },
+   { R200_VS_MATRIX_2_MVP, "R200_VS_MATRIX_2_MVP" },
+   { R200_VS_MATRIX_3_TEX0, "R200_VS_MATRIX_3_TEX0" },
+   { R200_VS_MATRIX_4_TEX1, "R200_VS_MATRIX_4_TEX1" },
+   { R200_VS_MATRIX_5_TEX2, "R200_VS_MATRIX_5_TEX2" },
+   { R200_VS_MATRIX_6_TEX3, "R200_VS_MATRIX_6_TEX3" },
+   { R200_VS_MATRIX_7_TEX4, "R200_VS_MATRIX_7_TEX4" },
+   { R200_VS_MATRIX_8_TEX5, "R200_VS_MATRIX_8_TEX5" },
+   { R200_VS_MAT_0_EMISS, "R200_VS_MAT_0_EMISS" },
+   { R200_VS_MAT_0_AMB, "R200_VS_MAT_0_AMB" },
+   { R200_VS_MAT_0_DIF, "R200_VS_MAT_0_DIF" },
+   { R200_VS_MAT_0_SPEC, "R200_VS_MAT_0_SPEC" },
+   { R200_VS_MAT_1_EMISS, "R200_VS_MAT_1_EMISS" },
+   { R200_VS_MAT_1_AMB, "R200_VS_MAT_1_AMB" },
+   { R200_VS_MAT_1_DIF, "R200_VS_MAT_1_DIF" },
+   { R200_VS_MAT_1_SPEC, "R200_VS_MAT_1_SPEC" },
+   { R200_VS_EYE2CLIP_MTX, "R200_VS_EYE2CLIP_MTX" },
+   { R200_VS_PNT_SPRITE_ATT_CONST, "R200_VS_PNT_SPRITE_ATT_CONST" },
+   { R200_VS_PNT_SPRITE_EYE_IN_MODEL, "R200_VS_PNT_SPRITE_EYE_IN_MODEL" },
+   { R200_VS_PNT_SPRITE_CLAMP, "R200_VS_PNT_SPRITE_CLAMP" },
+   { R200_VS_MAX, "R200_VS_MAX" },
+   { 1000, "" },
+};
+
+union fi { float f; int i; };
+
+#define ISVEC   1
+#define ISFLOAT 2
+#define TOUCHED 4
+
+struct reg {
+   int idx; 
+   struct reg_names *closest;
+   int flags;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+
+
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[512+1];
+static struct reg vectors[512*4+1];
+
+static int total, total_changed, bufs;
+
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) {
+      regs[i].idx = reg_names[i].idx;
+      regs[i].closest = &reg_names[i];
+      regs[i].flags = 0;
+   }
+
+   for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].flags = ISFLOAT;
+   }
+
+   for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) {
+      if (tmp[1].idx*4 == i) tmp++;
+      vectors[i].idx = i;
+      vectors[i].closest = tmp;
+      vectors[i].flags = ISFLOAT|ISVEC;
+   }
+
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+	 return 1;
+
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = (union fi *) realloc( reg->values, 
+					  reg->nalloc * sizeof(union fi) );
+   }
+
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+	 return &tab[i];
+   }
+
+   fprintf(stderr, "*** unknown reg 0x%x\n", reg);
+   return NULL;
+}
+
+
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+
+   if (reg->idx == reg->closest->idx) 
+      return reg->closest->name;
+
+   
+   if (reg->flags & ISVEC) {
+      if (reg->idx/4 != reg->closest->idx)
+	 sprintf(tmp, "%s+%d[%d]", 
+		 reg->closest->name, 
+		 (reg->idx/4) - reg->closest->idx,
+		 reg->idx%4);
+      else
+	 sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4);
+   }
+   else {
+      if (reg->idx != reg->closest->idx)
+	 sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx);
+      else
+	 sprintf(tmp, "%s", reg->closest->name);
+   }
+
+   return tmp;
+}
+
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+       
+   if (NORMAL) {
+      if (!ever_seen) 
+	 fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed) 
+	 fprintf(stderr, " *** CHANGED"); 
+   }
+   
+   reg->current.i = data;
+
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+
+   if (NORMAL) {
+      if (newmin) {
+	 fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+	 reg->vmin = data;
+      }
+      else if (newmax) {
+	 fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+	 reg->vmax = data;
+      }
+      else if (changed) {
+	 fprintf(stderr, " *** CHANGED");
+      }
+   }
+
+   reg->current.f = data;
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   float_ui32_type datau;
+   datau.ui32 = data;
+   reg->flags |= TOUCHED;
+   if (reg->flags & ISFLOAT)
+      return print_float_reg_assignment( reg, datau.f );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+
+static void print_reg( struct reg *reg )
+{
+   if (reg->flags & TOUCHED) {
+      if (reg->flags & ISFLOAT) {
+	 fprintf(stderr, "   %s == %f\n", get_reg_name(reg), reg->current.f);
+      } else {
+	 fprintf(stderr, "   %s == 0x%x\n", get_reg_name(reg), reg->current.i);
+      }
+   }
+}
+
+
+static void dump_state( void )
+{
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) 
+      print_reg( &regs[i] );
+
+   for (i = 0 ; i < Elements(scalars) ; i++) 
+      print_reg( &scalars[i] );
+
+   for (i = 0 ; i < Elements(vectors) ; i++) 
+      print_reg( &vectors[i] );
+}
+
+
+
+static int radeon_emit_packets( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");      
+      return -EINVAL;
+   }
+
+   if (!packet[id].name) {
+      fprintf(stderr, "*** Unknown packet 0 nr %d\n", id );
+      return -EINVAL;
+   }
+
+   
+   if (VERBOSE) 
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars2( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset + 0x100;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+   if (start + stride * sz > 258) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
+	      start, stride, sz, start + stride * sz, header.i);
+
+/*    if (start + stride * (sz/4) > 128) { */
+/*       fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
+/*       return -1; */
+/*    } */
+
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+	 struct reg *reg = lookup_reg( vectors, start*4+j );
+	 if (print_reg_assignment( reg, data[i] ))
+	    changed = 1;
+      }
+      if (changed)
+	 total_changed += 4;
+      total += 4;
+   }
+	 
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+static int radeon_emit_veclinear( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.veclinear.count * 4;
+   int *data = (int *)cmdbuf->buf;
+   float *fdata =(float *)cmdbuf->buf;
+   int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
+   int i;
+
+   if (1||VERBOSE)
+      fprintf(stderr, "emit vectors linear, start %d nr %d (end %d) (0x%x)\n",
+	      start, sz >> 2, start + (sz >> 2), header.i);
+
+
+   if (start < 0x60) {
+      for (i = 0 ; i < sz ;  i += 4) {
+	 fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start, fdata[i]);
+	 fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start, fdata[i+1]);
+	 fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start, fdata[i+2]);
+	 fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start, fdata[i+3]);
+      }
+   }
+   else if ((start >= 0x100) && (start < 0x160)) {
+      for (i = 0 ; i < sz ;  i += 4) {
+	 fprintf(stderr, "R200_VS_PARAM %d 0 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i]);
+	 fprintf(stderr, "R200_VS_PARAM %d 1 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+1]);
+	 fprintf(stderr, "R200_VS_PARAM %d 2 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+2]);
+	 fprintf(stderr, "R200_VS_PARAM %d 3 %f\n", (i >> 2) + start - 0x100 + 0x60, fdata[i+3]);
+      }
+   }
+   else if ((start >= 0x80) && (start < 0xc0)) {
+      for (i = 0 ; i < sz ;  i += 4) {
+	 fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x80, data[i]);
+	 fprintf(stderr, "R200_VS_PROG %d SRC1  %08x\n", (i >> 2) + start - 0x80, data[i+1]);
+	 fprintf(stderr, "R200_VS_PROG %d SRC2  %08x\n", (i >> 2) + start - 0x80, data[i+2]);
+	 fprintf(stderr, "R200_VS_PROG %d SRC3  %08x\n", (i >> 2) + start - 0x80, data[i+3]);
+      }
+   }
+   else if ((start >= 0x180) && (start < 0x1c0)) {
+      for (i = 0 ; i < sz ;  i += 4) {
+	 fprintf(stderr, "R200_VS_PROG %d OPDST %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i]);
+	 fprintf(stderr, "R200_VS_PROG %d SRC1  %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+1]);
+	 fprintf(stderr, "R200_VS_PROG %d SRC2  %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+2]);
+	 fprintf(stderr, "R200_VS_PROG %d SRC3  %08x\n", (i >> 2) + start - 0x180 + 0x40, data[i+3]);
+      }
+   }
+   else {
+      fprintf(stderr, "write to unknown vector area\n");
+   }
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+#if 0
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+	      "vertex format",
+	      vfmt,
+	      "xy,",
+	      (vfmt & R200_VTX_Z0) ? "z," : "",
+	      (vfmt & R200_VTX_W0) ? "w0," : "",
+	      (vfmt & R200_VTX_FPCOLOR) ? "fpcolor," : "",
+	      (vfmt & R200_VTX_FPALPHA) ? "fpalpha," : "",
+	      (vfmt & R200_VTX_PKCOLOR) ? "pkcolor," : "",
+	      (vfmt & R200_VTX_FPSPEC) ? "fpspec," : "",
+	      (vfmt & R200_VTX_FPFOG) ? "fpfog," : "",
+	      (vfmt & R200_VTX_PKSPEC) ? "pkspec," : "",
+	      (vfmt & R200_VTX_ST0) ? "st0," : "",
+	      (vfmt & R200_VTX_ST1) ? "st1," : "",
+	      (vfmt & R200_VTX_Q1) ? "q1," : "",
+	      (vfmt & R200_VTX_ST2) ? "st2," : "",
+	      (vfmt & R200_VTX_Q2) ? "q2," : "",
+	      (vfmt & R200_VTX_ST3) ? "st3," : "",
+	      (vfmt & R200_VTX_Q3) ? "q3," : "",
+	      (vfmt & R200_VTX_Q0) ? "q0," : "",
+	      (vfmt & R200_VTX_N0) ? "n0," : "",
+	      (vfmt & R200_VTX_XY1) ? "xy1," : "",
+	      (vfmt & R200_VTX_Z1) ? "z1," : "",
+	      (vfmt & R200_VTX_W1) ? "w1," : "",
+	      (vfmt & R200_VTX_N1) ? "n1," : "");
+
+   
+      if (!find_or_add_value( &others[V_VTXFMT], vfmt ))
+	 fprintf(stderr, " *** NEW VALUE");
+
+      fprintf(stderr, "\n");
+   }
+
+   return 0;
+}
+#endif
+
+static char *primname[0x10] = {
+   "NONE",
+   "POINTS",
+   "LINES",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_FAN",
+   "TRIANGLE_STRIP",
+   "RECT_LIST",
+   NULL,
+   "3VRT_POINTS",
+   "3VRT_LINES",
+   "POINT_SPRITES",
+   "LINE_LOOP",
+   "QUADS",
+   "QUAD_STRIP",
+   "POLYGON",
+};
+
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s\n",
+	      "prim flags",
+	      prim,
+	      ((prim & 0x30) == R200_VF_PRIM_WALK_IND) ? "IND," : "",
+	      ((prim & 0x30) == R200_VF_PRIM_WALK_LIST) ? "LIST," : "",
+	      ((prim & 0x30) == R200_VF_PRIM_WALK_RING) ? "RING," : "",
+	      (prim & R200_VF_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+	      (prim & R200_VF_INDEX_SZ_4) ? "INDX-32," : "",
+	      (prim & R200_VF_TCL_OUTPUT_VTX_ENABLE) ? "TCL_OUT_VTX," : "");
+
+   numverts = prim>>16;
+   
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+
+   switch (prim & 0xf) {
+   case R200_VF_PRIM_NONE:
+   case R200_VF_PRIM_POINTS:
+      if (numverts < 1) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case R200_VF_PRIM_LINES:
+   case R200_VF_PRIM_POINT_SPRITES:
+      if ((numverts & 1) || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case R200_VF_PRIM_LINE_STRIP:
+   case R200_VF_PRIM_LINE_LOOP:
+      if (numverts < 2) {
+	 fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case R200_VF_PRIM_TRIANGLES:
+   case R200_VF_PRIM_3VRT_POINTS:
+   case R200_VF_PRIM_3VRT_LINES:
+   case R200_VF_PRIM_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case R200_VF_PRIM_TRIANGLE_FAN:
+   case R200_VF_PRIM_TRIANGLE_STRIP:
+   case R200_VF_PRIM_POLYGON:
+      if (numverts < 3) {
+	 fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case R200_VF_PRIM_QUADS:
+      if (numverts % 4 || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for quad %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case R200_VF_PRIM_QUAD_STRIP:
+      if (numverts % 2 || numverts < 4) {
+	 fprintf(stderr, "Bad nr verts for quadstrip %d\n", numverts);
+	 return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }	
+   return 0;
+}
+
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case R200_CP_CMD_NOP:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_NEXT_CHAR:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_PLY_NEXTSCAN:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_SET_SCISSORS:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_LOAD_MICROCODE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_WAIT_FOR_IDLE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+
+   case R200_CP_CMD_3D_DRAW_VBUF:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+/*       print_vertex_format(cmd[1]); */
+      if (print_prim_and_flags(cmd[2]))
+	 return -EINVAL;
+      break;
+
+   case R200_CP_CMD_3D_DRAW_IMMD:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+/*       print_vertex_format(cmd[1]); */
+      if (print_prim_and_flags(cmd[2]))
+	 return -EINVAL;
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+	 fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+		 neltdwords, cmdsz);
+      break;
+   }
+   case R200_CP_CMD_LOAD_PALETTE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+	 fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+	 fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+
+      if (((cmd[1]/2)*3) + ((cmd[1]%2)*2) != cmdsz - 2) {
+	 fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+		 ((cmd[1]/2)*3) + ((cmd[1]%2)*2) + 2, cmdsz);
+	 return -EINVAL;
+      }
+
+      if (NORMAL) {
+	 tmp = cmd+2;
+	 for (i = 0 ; i < cmd[1] ; i++) {
+	    if (i & 1) {
+	       stride = (tmp[0]>>24) & 0xff;
+	       size = (tmp[0]>>16) & 0xff;
+	       start = tmp[2];
+	       tmp += 3;
+	    }
+	    else {
+	       stride = (tmp[0]>>8) & 0xff;
+	       size = (tmp[0]) & 0xff;
+	       start = tmp[1];
+	    }
+	    fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+		    i, start, size, stride );
+	 }
+      }
+      break;
+   case R200_CP_CMD_PAINT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_SMALLTEXT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_HOSTDATA_BLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   case R200_CP_CMD_POLYLINE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case R200_CP_CMD_POLYSCANLINES:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", 
+	      cmdsz);
+      break;
+   case R200_CP_CMD_PAINT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case R200_CP_CMD_BITBLT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case R200_CP_CMD_TRANS_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   case R200_CP_CMD_3D_DRAW_VBUF_2:
+      if (NORMAL)
+	 fprintf(stderr, "R200_CP_CMD_3D_DRAW_VBUF_2, %d dwords\n", 
+	      cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+	 return -EINVAL;
+      break;
+   case R200_CP_CMD_3D_DRAW_IMMD_2:
+      if (NORMAL)
+	 fprintf(stderr, "R200_CP_CMD_3D_DRAW_IMMD_2, %d dwords\n", 
+	      cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+	 return -EINVAL;
+      break;
+   case R200_CP_CMD_3D_DRAW_INDX_2:
+      if (NORMAL)
+	 fprintf(stderr, "R200_CP_CMD_3D_DRAW_INDX_2, %d dwords\n", 
+	      cmdsz);
+      if (print_prim_and_flags(cmd[1]))
+	 return -EINVAL;
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+      
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+
+
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
+{   
+   drm_clip_rect_t *boxes = (drm_clip_rect_t *)cmdbuf->boxes;
+   int i = 0;
+
+   if (VERBOSE && total_changed) {
+      dump_state();
+      total_changed = 0;
+   }
+
+   if (NORMAL) {
+      do {
+	 if ( i < cmdbuf->nbox ) {
+	    fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+		    i, cmdbuf->nbox,
+		    boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+	 }
+      } while ( ++i < cmdbuf->nbox );
+   }
+
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+
+   return radeon_emit_packet3( cmdbuf );
+}
+
+
+int r200SanityCmdBuffer( r200ContextPtr rmesa,
+			   int nbox,
+			   drm_clip_rect_t *boxes )
+{
+   int idx;
+   drm_radeon_cmd_buffer_t cmdbuf;
+   drm_radeon_cmd_header_t header;
+   static int inited = 0;
+
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+
+
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = (drm_clip_rect_t *)boxes;
+   cmdbuf.nbox = nbox;
+
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+		
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET: 
+	 if (radeon_emit_packets( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packets failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS:
+	 if (radeon_emit_scalars( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS2:
+	 if (radeon_emit_scalars2( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_VECTORS:
+	 if (radeon_emit_vectors( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_vectors failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_DMA_DISCARD:
+	 idx = header.dma.buf_idx;
+	 if (NORMAL)
+	    fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+	 bufs++;
+	 break;
+
+      case RADEON_CMD_PACKET3:
+	 if (radeon_emit_packet3( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3 failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_PACKET3_CLIP:
+	 if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_WAIT:
+	 break;
+
+      case RADEON_CMD_VECLINEAR:
+	 if (radeon_emit_veclinear( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_veclinear failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      default:
+	 fprintf(stderr,"bad cmd_type %d at %p\n", 
+		   header.header.cmd_type,
+		   cmdbuf.buf - sizeof(header));
+	 return -EINVAL;
+      }
+   }
+
+   if (0)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+	 fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+		 bufs,
+		 total, total_changed, 
+		 ((float)total_changed/(float)total*100.0));
+	 fprintf(stderr, "Total emitted per buf: %.2f\n",
+		 (float)total/(float)bufs);
+	 fprintf(stderr, "Real changes per buf: %.2f\n",
+		 (float)total_changed/(float)bufs);
+
+	 bufs = n = total = total_changed = 0;
+      }
+   }
+
+   fprintf(stderr, "leaving %s\n\n\n", __FUNCTION__);
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_sanity.h b/src/mesa/drivers/dri/r200/r200_sanity.h
new file mode 100644
index 0000000000..f4c110dba6
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_sanity.h
@@ -0,0 +1,8 @@
+#ifndef R200_SANITY_H
+#define R200_SANITY_H
+
+extern int r200SanityCmdBuffer( r200ContextPtr rmesa,
+				int nbox,
+				drm_clip_rect_t *boxes );
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c
new file mode 100644
index 0000000000..29d7bed8b6
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_state.c
@@ -0,0 +1,2545 @@
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/light.h"
+#include "main/framebuffer.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+#include "drivers/common/meta.h"
+
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_tcl.h"
+#include "r200_tex.h"
+#include "r200_swtcl.h"
+#include "r200_vertprog.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+   GLubyte refByte;
+
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   pp_misc &= ~(R200_ALPHA_TEST_OP_MASK | R200_REF_ALPHA_MASK);
+   pp_misc |= (refByte & R200_REF_ALPHA_MASK);
+
+   switch ( func ) {
+   case GL_NEVER:
+      pp_misc |= R200_ALPHA_TEST_FAIL;
+      break;
+   case GL_LESS:
+      pp_misc |= R200_ALPHA_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      pp_misc |= R200_ALPHA_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      pp_misc |= R200_ALPHA_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      pp_misc |= R200_ALPHA_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      pp_misc |= R200_ALPHA_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      pp_misc |= R200_ALPHA_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      pp_misc |= R200_ALPHA_TEST_PASS;
+      break;
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
+}
+
+static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] )
+{
+   GLubyte color[4];
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+   CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+   CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+   CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+   CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] );
+}
+
+/**
+ * Calculate the hardware blend factor setting.  This same function is used
+ * for source and destination of both alpha and RGB.
+ *
+ * \returns
+ * The hardware register value for the specified blend factor.  This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen.  Determine if these
+ * cases can be removed.
+ */
+static int blend_factor( GLenum factor, GLboolean is_src )
+{
+   int func;
+
+   switch ( factor ) {
+   case GL_ZERO:
+      func = R200_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      func = R200_BLEND_GL_ONE;
+      break;
+   case GL_DST_COLOR:
+      func = R200_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      func = R200_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_SRC_COLOR:
+      func = R200_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      func = R200_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      func = R200_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      func = R200_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      func = R200_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      func = R200_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      func = (is_src) ? R200_BLEND_GL_SRC_ALPHA_SATURATE : R200_BLEND_GL_ZERO;
+      break;
+   case GL_CONSTANT_COLOR:
+      func = R200_BLEND_GL_CONST_COLOR;
+      break;
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      func = R200_BLEND_GL_ONE_MINUS_CONST_COLOR;
+      break;
+   case GL_CONSTANT_ALPHA:
+      func = R200_BLEND_GL_CONST_ALPHA;
+      break;
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      func = R200_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+      break;
+   default:
+      func = (is_src) ? R200_BLEND_GL_ONE : R200_BLEND_GL_ZERO;
+   }
+   return func;
+}
+
+/**
+ * Sets both the blend equation and the blend function.
+ * This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ * Also, make sure that blend function and blend equation are set to their default
+ * value if color blending is not enabled, since at least blend equations GL_MIN
+ * and GL_FUNC_REVERSE_SUBTRACT will cause wrong results otherwise for
+ * unknown reasons.
+ */
+static void r200_set_blend_state( GLcontext * ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint cntl = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &
+      ~(R200_ROP_ENABLE | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE);
+
+   int func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqn = R200_COMB_FCN_ADD_CLAMP;
+   int funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+      (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT);
+   int eqnA = R200_COMB_FCN_ADD_CLAMP;
+
+   R200_STATECHANGE( rmesa, ctx );
+
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+      if (ctx->Color.ColorLogicOpEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+         return;
+      } else if (ctx->Color.BlendEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ALPHA_BLEND_ENABLE | R200_SEPARATE_ALPHA_ENABLE;
+      }
+      else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
+         rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func;
+         rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+         return;
+      }
+   }
+   else {
+      if (ctx->Color.ColorLogicOpEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ROP_ENABLE;
+         rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+         return;
+      } else if (ctx->Color.BlendEnabled) {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] =  cntl | R200_ALPHA_BLEND_ENABLE;
+      }
+      else {
+         rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl;
+         rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+         return;
+      }
+   }
+
+   func = (blend_factor( ctx->Color.BlendSrcRGB, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.BlendDstRGB, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+
+   switch(ctx->Color.BlendEquationRGB) {
+   case GL_FUNC_ADD:
+      eqn = R200_COMB_FCN_ADD_CLAMP;
+      break;
+
+   case GL_FUNC_SUBTRACT:
+      eqn = R200_COMB_FCN_SUB_CLAMP;
+      break;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqn = R200_COMB_FCN_RSUB_CLAMP;
+      break;
+
+   case GL_MIN:
+      eqn = R200_COMB_FCN_MIN;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+
+   case GL_MAX:
+      eqn = R200_COMB_FCN_MAX;
+      func = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+
+   default:
+      fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB );
+      return;
+   }
+
+   if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func;
+      return;
+   }
+
+   funcA = (blend_factor( ctx->Color.BlendSrcA, GL_TRUE ) << R200_SRC_BLEND_SHIFT) |
+      (blend_factor( ctx->Color.BlendDstA, GL_FALSE ) << R200_DST_BLEND_SHIFT);
+
+   switch(ctx->Color.BlendEquationA) {
+   case GL_FUNC_ADD:
+      eqnA = R200_COMB_FCN_ADD_CLAMP;
+      break;
+
+   case GL_FUNC_SUBTRACT:
+      eqnA = R200_COMB_FCN_SUB_CLAMP;
+      break;
+
+   case GL_FUNC_REVERSE_SUBTRACT:
+      eqnA = R200_COMB_FCN_RSUB_CLAMP;
+      break;
+
+   case GL_MIN:
+      eqnA = R200_COMB_FCN_MIN;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+
+   case GL_MAX:
+      eqnA = R200_COMB_FCN_MAX;
+      funcA = (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+         (R200_BLEND_GL_ONE << R200_DST_BLEND_SHIFT);
+      break;
+
+   default:
+      fprintf( stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n",
+         __FUNCTION__, __LINE__, ctx->Color.BlendEquationA );
+      return;
+   }
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqnA | funcA;
+   rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = eqn | func;
+
+}
+
+static void r200BlendEquationSeparate( GLcontext *ctx,
+				       GLenum modeRGB, GLenum modeA )
+{
+      r200_set_blend_state( ctx );
+}
+
+static void r200BlendFuncSeparate( GLcontext *ctx,
+				     GLenum sfactorRGB, GLenum dfactorRGB,
+				     GLenum sfactorA, GLenum dfactorA )
+{
+      r200_set_blend_state( ctx );
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void r200DepthFunc( GLcontext *ctx, GLenum func )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_TEST_MASK;
+
+   switch ( ctx->Depth.Func ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_TEST_ALWAYS;
+      break;
+   }
+}
+
+static void r200ClearDepth( GLcontext *ctx, GLclampd d )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+		    R200_DEPTH_FORMAT_MASK);
+
+   switch ( format ) {
+   case R200_DEPTH_FORMAT_16BIT_INT_Z:
+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
+      break;
+   case R200_DEPTH_FORMAT_24BIT_INT_Z:
+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
+      break;
+   }
+}
+
+static void r200DepthMask( GLcontext *ctx, GLboolean flag )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, ctx );
+
+   if ( ctx->Depth.Mask ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  R200_Z_WRITE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_Z_WRITE_ENABLE;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+
+static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLchan col[4];
+   GLuint i;
+
+   c.i = rmesa->hw.fog.cmd[FOG_C];
+   d.i = rmesa->hw.fog.cmd[FOG_D];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+	 return;
+      R200_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_LINEAR;
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 }
+	 else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+	 break;
+      case GL_EXP:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP;
+	 c.f = 0.0;
+	 d.f = -ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_TCL_FOG_EXP2;
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 return;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+	 c.f = 0.0;
+	 d.f = -ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      default:
+	 break;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (ctx->Fog.Mode == GL_LINEAR) {
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 } else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+      }
+      break;
+   case GL_FOG_COLOR:
+      R200_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      i = radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i;
+      break;
+   case GL_FOG_COORD_SRC: {
+      GLuint out_0 = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0];
+      GLuint fog   = rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR];
+
+      fog &= ~R200_FOG_USE_MASK;
+      if ( ctx->Fog.FogCoordinateSource == GL_FOG_COORD || ctx->VertexProgram.Enabled) {
+	 fog   |= R200_FOG_USE_VTX_FOG;
+	 out_0 |= R200_VTX_DISCRETE_FOG;
+      }
+      else {
+	 fog   |=  R200_FOG_USE_SPEC_ALPHA;
+	 out_0 &= ~R200_VTX_DISCRETE_FOG;
+      }
+
+      if ( fog != rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] ) {
+	 R200_STATECHANGE( rmesa, ctx );
+	 rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = fog;
+      }
+
+      if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) {
+	 R200_STATECHANGE( rmesa, vtx );
+	 rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0;
+      }
+
+      break;
+   }
+   default:
+      return;
+   }
+
+   if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+      R200_STATECHANGE( rmesa, fog );
+      rmesa->hw.fog.cmd[FOG_C] = c.i;
+      rmesa->hw.fog.cmd[FOG_D] = d.i;
+   }
+}
+
+/* =============================================================
+ * Culling
+ */
+
+static void r200CullFace( GLcontext *ctx, GLenum unused )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+
+   s |= R200_FFACE_SOLID | R200_BFACE_SOLID;
+   t &= ~(R200_CULL_FRONT | R200_CULL_BACK);
+
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+	 s &= ~R200_FFACE_SOLID;
+	 t |= R200_CULL_FRONT;
+	 break;
+      case GL_BACK:
+	 s &= ~R200_BFACE_SOLID;
+	 t |= R200_CULL_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 s &= ~(R200_FFACE_SOLID | R200_BFACE_SOLID);
+	 t |= (R200_CULL_FRONT | R200_CULL_BACK);
+	 break;
+      }
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      R200_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
+   }
+}
+
+static void r200FrontFace( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_FFACE_CULL_DIR_MASK;
+
+   R200_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_CULL_FRONT_IS_CCW;
+
+   /* Winding is inverted when rendering to FBO */
+   if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+      mode = (mode == GL_CW) ? GL_CCW : GL_CW;
+
+   switch ( mode ) {
+   case GL_CW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CW;
+      break;
+   case GL_CCW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= R200_FFACE_CULL_CCW;
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= R200_CULL_FRONT_IS_CCW;
+      break;
+   }
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r200PointSize( GLcontext *ctx, GLfloat size )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+
+   radeon_print(RADEON_STATE, RADEON_TRACE,
+       "%s(%p) size: %f, fixed point result: %d.%d (%d/16)\n",
+       __func__, ctx, size,
+       ((GLuint)(ctx->Point.Size * 16.0))/16,
+       (((GLuint)(ctx->Point.Size * 16.0))&15)*100/16,
+       ((GLuint)(ctx->Point.Size * 16.0))&15);
+
+   R200_STATECHANGE( rmesa, cst );
+   R200_STATECHANGE( rmesa, ptp );
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= ~0xffff;
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= ((GLuint)(ctx->Point.Size * 16.0));
+/* this is the size param of the point size calculation (point size reg value
+   is not used when calculation is active). */
+   fcmd[PTP_VPORT_SCALE_PTSIZE] = ctx->Point.Size;
+}
+
+static void r200PointParameter( GLcontext *ctx, GLenum pname, const GLfloat *params)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+
+   switch (pname) {
+   case GL_POINT_SIZE_MIN:
+   /* Can clamp both in tcl and setup - just set both (as does fglrx) */
+      R200_STATECHANGE( rmesa, lin );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= 0xffff;
+      rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)(ctx->Point.MinSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MIN] = ctx->Point.MinSize;
+      break;
+   case GL_POINT_SIZE_MAX:
+      R200_STATECHANGE( rmesa, cst );
+      R200_STATECHANGE( rmesa, ptp );
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] &= 0xffff;
+      rmesa->hw.cst.cmd[CST_RE_POINTSIZE] |= (GLuint)(ctx->Point.MaxSize * 16.0) << 16;
+      fcmd[PTP_CLAMP_MAX] = ctx->Point.MaxSize;
+      break;
+   case GL_POINT_DISTANCE_ATTENUATION:
+      R200_STATECHANGE( rmesa, vtx );
+      R200_STATECHANGE( rmesa, spr );
+      R200_STATECHANGE( rmesa, ptp );
+      GLfloat *fcmd = (GLfloat *)rmesa->hw.ptp.cmd;
+      rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &=
+	 ~(R200_PS_MULT_MASK | R200_PS_LIN_ATT_ZERO | R200_PS_SE_SEL_STATE);
+      /* can't rely on ctx->Point._Attenuated here and test for NEW_POINT in
+	 r200ValidateState looks like overkill */
+      if (ctx->Point.Params[0] != 1.0 ||
+	  ctx->Point.Params[1] != 0.0 ||
+	  ctx->Point.Params[2] != 0.0 ||
+	  (ctx->VertexProgram.Enabled && ctx->VertexProgram.PointSizeEnabled)) {
+	 /* all we care for vp would be the ps_se_sel_state setting */
+	 fcmd[PTP_ATT_CONST_QUAD] = ctx->Point.Params[2];
+	 fcmd[PTP_ATT_CONST_LIN] = ctx->Point.Params[1];
+	 fcmd[PTP_ATT_CONST_CON] = ctx->Point.Params[0];
+	 rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_MULT_ATTENCONST;
+	 if (ctx->Point.Params[1] == 0.0)
+	    rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_LIN_ATT_ZERO;
+/* FIXME: setting this here doesn't look quite ok - we only want to do
+          that if we're actually drawing points probably */
+	 rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_PT_SIZE;
+	 rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= R200_VTX_POINT_SIZE;
+      }
+      else {
+	 rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+	    R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
+	 rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_PT_SIZE;
+	 rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~R200_VTX_POINT_SIZE;
+      }
+      break;
+   case GL_POINT_FADE_THRESHOLD_SIZE:
+      /* don't support multisampling, so doesn't matter. */
+      break;
+   /* can't do these but don't need them.
+   case GL_POINT_SPRITE_R_MODE_NV:
+   case GL_POINT_SPRITE_COORD_ORIGIN: */
+   default:
+      fprintf(stderr, "bad pname parameter in r200PointParameter\n");
+      return;
+   }
+}
+
+/* =============================================================
+ * Line state
+ */
+static void r200LineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, lin );
+   R200_STATECHANGE( rmesa, set );
+
+   /* Line width is stored in U6.4 format.
+    * Same min/max limits for AA, non-AA lines.
+    */
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] &= ~0xffff;
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] |= (GLuint)
+      (CLAMP(widthf, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth) * 16.0);
+
+   if ( widthf > 1.0 ) {
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_WIDELINE_ENABLE;
+   } else {
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_WIDELINE_ENABLE;
+   }
+}
+
+static void r200LineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
+}
+
+
+/* =============================================================
+ * Masks
+ */
+static void r200ColorMask( GLcontext *ctx,
+			   GLboolean r, GLboolean g,
+			   GLboolean b, GLboolean a )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint mask;
+   struct radeon_renderbuffer *rrb;
+   GLuint flag = rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] & ~R200_PLANE_MASK_ENABLE;
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+   mask = radeonPackColor( rrb->cpp,
+			   ctx->Color.ColorMask[0][RCOMP],
+			   ctx->Color.ColorMask[0][GCOMP],
+			   ctx->Color.ColorMask[0][BCOMP],
+			   ctx->Color.ColorMask[0][ACOMP] );
+
+
+   if (!(r && g && b && a))
+      flag |= R200_PLANE_MASK_ENABLE;
+
+   if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag;
+   }
+
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      R200_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
+   }
+}
+
+
+/* =============================================================
+ * Polygon state
+ */
+
+static void r200PolygonOffset( GLcontext *ctx,
+			       GLfloat factor, GLfloat units )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   float_ui32_type constant =  { units * depthScale };
+   float_ui32_type factoru = { factor };
+
+/*    factor *= 2; */
+/*    constant *= 2; */
+
+/*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+
+   R200_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = factoru.ui32;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
+}
+
+static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work.
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag);
+   if (rmesa->radeon.TclFallback) {
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+static void r200UpdateSpecular( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+
+   R200_STATECHANGE( rmesa, tcl );
+   R200_STATECHANGE( rmesa, vtx );
+
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_0_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] &= ~(3<<R200_VTX_COLOR_1_SHIFT);
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] &= ~R200_OUTPUT_COLOR_1;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LIGHTING_ENABLE;
+
+   p &= ~R200_SPECULAR_ENABLE;
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_DIFFUSE_SPECULAR_COMBINE;
+
+
+   if (ctx->Light.Enabled &&
+       ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+	 ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+	  (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+      p |=  R200_SPECULAR_ENABLE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &=
+	 ~R200_DIFFUSE_SPECULAR_COMBINE;
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+	 ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+	 ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) |
+	  (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+      p |=  R200_SPECULAR_ENABLE;
+   } else {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+	 ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT));
+   }
+
+   if (ctx->Fog.Enabled) {
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |=
+	 ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT));
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1;
+   }
+
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Update vertex/render formats
+    */
+   if (rmesa->radeon.TclFallback) {
+      r200ChooseRenderState( ctx );
+      r200ChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient,
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   float *fcmd = (float *)R200_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    * I believe this is not nessary when using source_material. This condition thus
+    * will never happen currently, and the function has no dependencies on materials now
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] &
+       ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+	(3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0)
+   {
+      COPY_3V( &fcmd[GLT_RED],
+	       ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+		   ctx->Light.Model.Ambient,
+		   ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
+   }
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+
+   R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to
+ *    - light[p].colors
+ *    - light[p].enabled
+ */
+static void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      float *fcmd = (float *)R200_DB_STATE( lit[p] );
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+      r200ContextPtr rmesa = R200_CONTEXT(ctx);
+      GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1];
+      light_model_ctl1 &= ~((0xf << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+			   (0xf << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+			   (0xf << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+		   (0xf << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+		   (0xf << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+		   (0xf << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+		   (0xf << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+		   (0xf << R200_BACK_SPECULAR_SOURCE_SHIFT));
+
+   if (ctx->Light.ColorMaterialEnabled) {
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      if (mask & MAT_BIT_FRONT_EMISSION) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_FRONT_EMISSIVE_SOURCE_SHIFT);
+      }
+      else
+	 light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+			     R200_FRONT_EMISSIVE_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_FRONT_AMBIENT) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_FRONT_AMBIENT_SOURCE_SHIFT);
+      }
+      else
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+			     R200_FRONT_AMBIENT_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_FRONT_DIFFUSE) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_FRONT_DIFFUSE_SOURCE_SHIFT);
+      }
+      else
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+			     R200_FRONT_DIFFUSE_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_FRONT_SPECULAR) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_FRONT_SPECULAR_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 <<
+			     R200_FRONT_SPECULAR_SOURCE_SHIFT);
+      }
+
+      if (mask & MAT_BIT_BACK_EMISSION) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_BACK_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+			     R200_BACK_EMISSIVE_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_BACK_AMBIENT) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_BACK_AMBIENT_SOURCE_SHIFT);
+      }
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+			     R200_BACK_AMBIENT_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_BACK_DIFFUSE) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_BACK_DIFFUSE_SOURCE_SHIFT);
+   }
+      else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+			     R200_BACK_DIFFUSE_SOURCE_SHIFT);
+
+      if (mask & MAT_BIT_BACK_SPECULAR) {
+	 light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 <<
+			     R200_BACK_SPECULAR_SOURCE_SHIFT);
+      }
+      else {
+         light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_1 <<
+			     R200_BACK_SPECULAR_SOURCE_SHIFT);
+      }
+      }
+   else {
+       /* Default to SOURCE_MATERIAL:
+        */
+     light_model_ctl1 |=
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+        (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT);
+   }
+
+   if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1]) {
+      R200_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1;
+   }
+
+
+}
+
+void r200UpdateMaterial( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+   GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] );
+   GLfloat *fcmd2 = (GLfloat *)R200_DB_STATE( mtl[1] );
+   GLuint mask = ~0;
+
+   /* Might be possible and faster to update everything unconditionally? */
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (mask & MAT_BIT_FRONT_EMISSION) {
+      fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
+      fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_FRONT_EMISSION][2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3];
+   }
+   if (mask & MAT_BIT_FRONT_AMBIENT) {
+      fcmd[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_FRONT_AMBIENT][0];
+      fcmd[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_FRONT_AMBIENT][1];
+      fcmd[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_FRONT_AMBIENT][2];
+      fcmd[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_FRONT_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_FRONT_DIFFUSE) {
+      fcmd[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_FRONT_DIFFUSE][0];
+      fcmd[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][1];
+      fcmd[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_FRONT_DIFFUSE][2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_FRONT_SPECULAR) {
+      fcmd[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_FRONT_SPECULAR][0];
+      fcmd[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_FRONT_SPECULAR][1];
+      fcmd[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_FRONT_SPECULAR][2];
+      fcmd[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_FRONT_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_FRONT_SHININESS) {
+      fcmd[MTL_SHININESS]       = mat[MAT_ATTRIB_FRONT_SHININESS][0];
+   }
+
+   if (mask & MAT_BIT_BACK_EMISSION) {
+      fcmd2[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_BACK_EMISSION][0];
+      fcmd2[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_BACK_EMISSION][1];
+      fcmd2[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_BACK_EMISSION][2];
+      fcmd2[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_BACK_EMISSION][3];
+   }
+   if (mask & MAT_BIT_BACK_AMBIENT) {
+      fcmd2[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_BACK_AMBIENT][0];
+      fcmd2[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_BACK_AMBIENT][1];
+      fcmd2[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_BACK_AMBIENT][2];
+      fcmd2[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_BACK_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_BACK_DIFFUSE) {
+      fcmd2[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_BACK_DIFFUSE][0];
+      fcmd2[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_BACK_DIFFUSE][1];
+      fcmd2[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_BACK_DIFFUSE][2];
+      fcmd2[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_BACK_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_BACK_SPECULAR) {
+      fcmd2[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_BACK_SPECULAR][0];
+      fcmd2[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_BACK_SPECULAR][1];
+      fcmd2[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_BACK_SPECULAR][2];
+      fcmd2[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_BACK_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_BACK_SHININESS) {
+      fcmd2[MTL_SHININESS]       = mat[MAT_ATTRIB_BACK_SHININESS][0];
+   }
+
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[0] );
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mtl[1] );
+
+   /* currently material changes cannot trigger a global ambient change, I believe this is correct
+    update_global_ambient( ctx ); */
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormSpotDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.
+ */
+static void update_light( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0];
+
+      if (ctx->_NeedEyeCoords)
+	 tmp &= ~R200_LIGHT_IN_MODELSPACE;
+      else
+	 tmp |= R200_LIGHT_IN_MODELSPACE;
+
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0])
+      {
+	 R200_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)R200_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+	 if (ctx->Light.Light[p].Enabled) {
+	    struct gl_light *l = &ctx->Light.Light[p];
+	    GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] );
+
+	    if (l->EyePosition[3] == 0.0) {
+	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
+	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
+	       fcmd[LIT_POSITION_W] = 0;
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    } else {
+	       COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+	       fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0];
+	       fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1];
+	       fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2];
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    }
+
+	    R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+	 }
+      }
+   }
+}
+
+static void r200Lightfv( GLcontext *ctx, GLenum light,
+			   GLenum pname, const GLfloat *params )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+
+
+   switch (pname) {
+   case GL_AMBIENT:
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION:
+      /* picked up in update_light */
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */
+      GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? R200_LIGHT_1_IS_SPOT : R200_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      R200_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      if ( params[0] == 0.0 )
+	 fcmd[LIT_ATTEN_CONST_INV] = FLT_MAX;
+      else
+	 fcmd[LIT_ATTEN_CONST_INV] = 1.0 / params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      R200_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+
+   /* Set RANGE_ATTEN only when needed */
+   switch (pname) {
+   case GL_POSITION:
+   case GL_CONSTANT_ATTENUATION:
+   case GL_LINEAR_ATTENUATION:
+   case GL_QUADRATIC_ATTENUATION: {
+      GLuint *icmd = (GLuint *)R200_DB_STATE( tcl );
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      GLuint atten_flag = ( p&1 ) ? R200_LIGHT_1_ENABLE_RANGE_ATTEN
+				  : R200_LIGHT_0_ENABLE_RANGE_ATTEN;
+      GLuint atten_const_flag = ( p&1 ) ? R200_LIGHT_1_CONSTANT_RANGE_ATTEN
+				  : R200_LIGHT_0_CONSTANT_RANGE_ATTEN;
+
+      if ( l->EyePosition[3] == 0.0F ||
+	   ( ( fcmd[LIT_ATTEN_CONST] == 0.0 || fcmd[LIT_ATTEN_CONST] == 1.0 ) &&
+	     fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) ) {
+	 /* Disable attenuation */
+	 icmd[idx] &= ~atten_flag;
+      } else {
+	 if ( fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) {
+	    /* Enable only constant portion of attenuation calculation */
+	    icmd[idx] |= ( atten_flag | atten_const_flag );
+	 } else {
+	    /* Enable full attenuation calculation */
+	    icmd[idx] &= ~atten_const_flag;
+	    icmd[idx] |= atten_flag;
+	 }
+      }
+
+      R200_DB_STATECHANGE( rmesa, &rmesa->hw.tcl );
+      break;
+   }
+   default:
+     break;
+   }
+}
+
+static void r200UpdateLocalViewer ( GLcontext *ctx )
+{
+/* It looks like for the texgen modes GL_SPHERE_MAP, GL_NORMAL_MAP and
+   GL_REFLECTION_MAP we need R200_LOCAL_VIEWER set (fglrx does exactly that
+   for these and only these modes). This means specular highlights may turn out
+   wrong in some cases when lighting is enabled but GL_LIGHT_MODEL_LOCAL_VIEWER
+   is not set, though it seems to happen rarely and the effect seems quite
+   subtle. May need TCL fallback to fix it completely, though I'm not sure
+   how you'd identify the cases where the specular highlights indeed will
+   be wrong. Don't know if fglrx does something special in that case.
+*/
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, tcl );
+   if (ctx->Light.Model.LocalViewer ||
+       ctx->Texture._GenFlags & TEXGEN_NEED_NORMALS)
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LOCAL_VIEWER;
+   else
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_LOCAL_VIEWER;
+}
+
+static void r200LightModelfv( GLcontext *ctx, GLenum pname,
+				const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT:
+	 update_global_ambient( ctx );
+	 break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+	 r200UpdateLocalViewer( ctx );
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+	 R200_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.TwoSide)
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE);
+	 if (rmesa->radeon.TclFallback) {
+	    r200ChooseRenderState( ctx );
+	    r200ChooseVertexState( ctx );
+	 }
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+	 r200UpdateSpecular(ctx);
+         break;
+
+      default:
+         break;
+   }
+}
+
+static void r200ShadeModel( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+
+   s &= ~(R200_DIFFUSE_SHADE_MASK |
+	  R200_ALPHA_SHADE_MASK |
+	  R200_SPECULAR_SHADE_MASK |
+	  R200_FOG_SHADE_MASK |
+	  R200_DISC_FOG_SHADE_MASK);
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= (R200_DIFFUSE_SHADE_FLAT |
+	    R200_ALPHA_SHADE_FLAT |
+	    R200_SPECULAR_SHADE_FLAT |
+	    R200_FOG_SHADE_FLAT |
+	    R200_DISC_FOG_SHADE_FLAT);
+      break;
+   case GL_SMOOTH:
+      s |= (R200_DIFFUSE_SHADE_GOURAUD |
+	    R200_ALPHA_SHADE_GOURAUD |
+	    R200_SPECULAR_SHADE_GOURAUD |
+	    R200_FOG_SHADE_GOURAUD |
+	    R200_DISC_FOG_SHADE_GOURAUD);
+      break;
+   default:
+      return;
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      R200_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+
+
+/* =============================================================
+ * User clip planes
+ */
+
+static void r200ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+   R200_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+
+static void r200UpdateClipPlanes( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p;
+
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+	 GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+	 R200_STATECHANGE( rmesa, ucp[p] );
+	 rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+	 rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+	 rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+	 rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
+   }
+}
+
+
+/* =============================================================
+ * Stencil
+ */
+
+static void
+r200StencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                         GLint ref, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << R200_STENCIL_REF_SHIFT) |
+		     ((ctx->Stencil.ValueMask[0] & 0xff) << R200_STENCIL_MASK_SHIFT));
+
+   R200_STATECHANGE( rmesa, ctx );
+   R200_STATECHANGE( rmesa, msk );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~R200_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(R200_STENCIL_REF_MASK|
+						   R200_STENCIL_VALUE_MASK);
+
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_TEST_ALWAYS;
+      break;
+   }
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
+}
+
+static void
+r200StencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~R200_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
+      ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT);
+}
+
+static void
+r200StencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+                       GLenum zfail, GLenum zpass )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(R200_STENCIL_FAIL_MASK |
+					       R200_STENCIL_ZFAIL_MASK |
+					       R200_STENCIL_ZPASS_MASK);
+
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_FAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZFAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INCR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INC_WRAP;
+      break;
+   case GL_DECR_WRAP_EXT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_STENCIL_ZPASS_INVERT;
+      break;
+   }
+}
+
+static void r200ClearStencil( GLcontext *ctx, GLint s )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   rmesa->radeon.state.stencil.clear =
+      ((GLuint) (ctx->Stencil.Clear & 0xff) |
+       (0xff << R200_STENCIL_MASK_SHIFT) |
+       ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT));
+}
+
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+/**
+ * Called when window size or position changes or viewport or depth range
+ * state is changed.  We update the hardware viewport state here.
+ */
+void r200UpdateWindow( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat y_scale, y_bias;
+
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = yoffset;
+   }
+
+   float_ui32_type sx = { v[MAT_SX] };
+   float_ui32_type tx = { v[MAT_TX] + xoffset };
+   float_ui32_type sy = { v[MAT_SY] * y_scale };
+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias };
+   float_ui32_type sz = { v[MAT_SZ] * depthScale };
+   float_ui32_type tz = { v[MAT_TZ] * depthScale };
+
+   R200_STATECHANGE( rmesa, vpt );
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = sy.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = sz.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
+}
+
+void r200_vtbl_update_scissor( GLcontext *ctx )
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   unsigned x1, y1, x2, y2;
+   struct radeon_renderbuffer *rrb;
+
+   R200_SET_STATE(r200, set, SET_RE_CNTL, R200_SCISSOR_ENABLE | r200->hw.set.cmd[SET_RE_CNTL]);
+
+   if (r200->radeon.state.scissor.enabled) {
+      x1 = r200->radeon.state.scissor.rect.x1;
+      y1 = r200->radeon.state.scissor.rect.y1;
+      x2 = r200->radeon.state.scissor.rect.x2;
+      y2 = r200->radeon.state.scissor.rect.y2;
+   } else {
+      rrb = radeon_get_colorbuffer(&r200->radeon);
+      x1 = 0;
+      y1 = 0;
+      x2 = rrb->base.Width - 1;
+      y2 = rrb->base.Height - 1;
+   }
+
+   R200_SET_STATE(r200, sci, SCI_XY_1, x1 | (y1 << 16));
+   R200_SET_STATE(r200, sci, SCI_XY_2, x2 | (y2 << 16));
+}
+
+
+static void r200Viewport( GLcontext *ctx, GLint x, GLint y,
+			    GLsizei width, GLsizei height )
+{
+   /* Don't pipeline viewport changes, conflict with window offset
+    * setting below.  Could apply deltas to rescue pipelined viewport
+    * values, or keep the originals hanging around.
+    */
+   r200UpdateWindow( ctx );
+
+   radeon_viewport(ctx, x, y, width, height);
+}
+
+static void r200DepthRange( GLcontext *ctx, GLclampd nearval,
+			      GLclampd farval )
+{
+   r200UpdateWindow( ctx );
+}
+
+void r200UpdateViewportOffset( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   float_ui32_type tx;
+   float_ui32_type ty;
+
+   tx.f = v[MAT_TX] + xoffset;
+   ty.f = (- v[MAT_TY]) + yoffset;
+
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
+	rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
+   {
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      R200_STATECHANGE( rmesa, vpt );
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+
+      /* update polygon stipple x/y screen offset */
+      {
+         GLuint stx, sty;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
+
+         m &= ~(R200_STIPPLE_X_OFFSET_MASK |
+                R200_STIPPLE_Y_OFFSET_MASK);
+
+         /* add magic offsets, then invert */
+         stx = 31 - ((dPriv->x - 1) & R200_STIPPLE_COORD_MASK);
+         sty = 31 - ((dPriv->y + dPriv->h - 1)
+                     & R200_STIPPLE_COORD_MASK);
+
+         m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) |
+               (sty << R200_STIPPLE_Y_OFFSET_SHIFT));
+
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            R200_STATECHANGE( rmesa, msc );
+	    rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
+         }
+      }
+   }
+
+   radeonUpdateScissor( ctx );
+}
+
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLubyte color[4];
+   struct radeon_renderbuffer *rrb;
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+   CLAMPED_FLOAT_TO_UBYTE(color[0], c[0]);
+   CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]);
+   CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]);
+   CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]);
+   rmesa->radeon.state.color.clear = radeonPackColor( rrb->cpp,
+                                             color[0], color[1],
+                                             color[2], color[3] );
+}
+
+
+static void r200RenderMode( GLcontext *ctx, GLenum mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   FALLBACK( rmesa, R200_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+
+static GLuint r200_rop_tab[] = {
+   R200_ROP_CLEAR,
+   R200_ROP_AND,
+   R200_ROP_AND_REVERSE,
+   R200_ROP_COPY,
+   R200_ROP_AND_INVERTED,
+   R200_ROP_NOOP,
+   R200_ROP_XOR,
+   R200_ROP_OR,
+   R200_ROP_NOR,
+   R200_ROP_EQUIV,
+   R200_ROP_INVERT,
+   R200_ROP_OR_REVERSE,
+   R200_ROP_COPY_INVERTED,
+   R200_ROP_OR_INVERTED,
+   R200_ROP_NAND,
+   R200_ROP_SET,
+};
+
+static void r200LogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint rop = (GLuint)opcode - GL_CLEAR;
+
+   ASSERT( rop < 16 );
+
+   R200_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop];
+}
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint p, flag;
+
+   if ( R200_DEBUG & RADEON_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
+	       _mesa_lookup_enum_by_nr( cap ),
+	       state ? "GL_TRUE" : "GL_FALSE" );
+
+   switch ( cap ) {
+      /* Fast track this one...
+       */
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+
+   case GL_ALPHA_TEST:
+      R200_STATECHANGE( rmesa, ctx );
+      if (state) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_ALPHA_TEST_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ALPHA_TEST_ENABLE;
+      }
+      break;
+
+   case GL_BLEND:
+   case GL_COLOR_LOGIC_OP:
+      r200_set_blend_state( ctx );
+      break;
+
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5:
+      p = cap-GL_CLIP_PLANE0;
+      R200_STATECHANGE( rmesa, tcl );
+      if (state) {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0<<p);
+	 r200ClipPlane( ctx, cap, NULL );
+      }
+      else {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0<<p);
+      }
+      break;
+
+   case GL_COLOR_MATERIAL:
+      r200ColorMaterial( ctx, 0, 0 );
+      r200UpdateMaterial( ctx );
+      break;
+
+   case GL_CULL_FACE:
+      r200CullFace( ctx, 0 );
+      break;
+
+   case GL_DEPTH_TEST:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_Z_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_Z_ENABLE;
+      }
+      break;
+
+   case GL_DITHER:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
+      }
+      break;
+
+   case GL_FOG:
+      R200_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_FOG_ENABLE;
+	 r200Fogfv( ctx, GL_FOG_MODE, NULL );
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_FOG_ENABLE;
+	 R200_STATECHANGE(rmesa, tcl);
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK;
+      }
+      r200UpdateSpecular( ctx ); /* for PK_SPEC */
+      if (rmesa->radeon.TclFallback)
+	 r200ChooseVertexState( ctx );
+      _mesa_allow_light_in_model( ctx, !state );
+      break;
+
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      R200_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1)
+	 flag = (R200_LIGHT_1_ENABLE |
+		 R200_LIGHT_1_ENABLE_AMBIENT |
+		 R200_LIGHT_1_ENABLE_SPECULAR);
+      else
+	 flag = (R200_LIGHT_0_ENABLE |
+		 R200_LIGHT_0_ENABLE_AMBIENT |
+		 R200_LIGHT_0_ENABLE_SPECULAR);
+
+      if (state)
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+
+      /*
+       */
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_LIGHTING:
+      r200UpdateSpecular(ctx);
+      /* for reflection map fixup - might set recheck_texgen for all units too */
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE;
+      break;
+
+   case GL_LINE_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_LINE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_LINE;
+      }
+      break;
+
+   case GL_LINE_STIPPLE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_PATTERN_ENABLE;
+      } else {
+	 rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PATTERN_ENABLE;
+      }
+      break;
+
+   case GL_NORMALIZE:
+      R200_STATECHANGE( rmesa, tcl );
+      if ( state ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_NORMALIZE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_NORMALIZE_NORMALS;
+      }
+      break;
+
+      /* Pointsize registers on r200 only work for point sprites, and point smooth
+       * doesn't work for point sprites (and isn't needed for 1.0 sized aa points).
+       * In any case, setting pointmin == pointsizemax == 1.0 for aa points
+       * is enough to satisfy conform.
+       */
+   case GL_POINT_SMOOTH:
+      break;
+
+      /* These don't really do anything, as we don't use the 3vtx
+       * primitives yet.
+       */
+#if 0
+   case GL_POLYGON_OFFSET_POINT:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_POINT;
+      } else {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_POINT;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_LINE:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_LINE;
+      } else {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_LINE;
+      }
+      break;
+#endif
+
+   case GL_POINT_SPRITE_ARB:
+      R200_STATECHANGE( rmesa, spr );
+      if ( state ) {
+	 int i;
+	 for (i = 0; i < 6; i++) {
+	    rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |=
+		ctx->Point.CoordReplace[i] << (R200_PS_GEN_TEX_0_SHIFT + i);
+	 }
+      } else {
+	 rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~R200_PS_GEN_TEX_MASK;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_FILL:
+      R200_STATECHANGE( rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] |=  R200_ZBIAS_ENABLE_TRI;
+      } else {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] &= ~R200_ZBIAS_ENABLE_TRI;
+      }
+      break;
+
+   case GL_POLYGON_SMOOTH:
+      R200_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  R200_ANTI_ALIAS_POLY;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~R200_ANTI_ALIAS_POLY;
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      R200_STATECHANGE(rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_RE_CNTL] |=  R200_STIPPLE_ENABLE;
+      } else {
+	 rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_STIPPLE_ENABLE;
+      }
+      break;
+
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      R200_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+      }
+      break;
+   }
+
+   case GL_SCISSOR_TEST:
+      radeon_firevertices(&rmesa->radeon);
+      rmesa->radeon.state.scissor.enabled = state;
+      radeonUpdateScissor( ctx );
+      break;
+
+   case GL_STENCIL_TEST:
+      {
+	 GLboolean hw_stencil = GL_FALSE;
+	 if (ctx->DrawBuffer) {
+	    struct radeon_renderbuffer *rrbStencil
+	       = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+	    hw_stencil = (rrbStencil && rrbStencil->bo);
+	 }
+
+	 if (hw_stencil) {
+	    R200_STATECHANGE( rmesa, ctx );
+	    if ( state ) {
+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  R200_STENCIL_ENABLE;
+	    } else {
+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE;
+	    }
+	 } else {
+	    FALLBACK( rmesa, R200_FALLBACK_STENCIL, state );
+	 }
+      }
+      break;
+
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in r200UpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
+      break;
+
+   case GL_COLOR_SUM_EXT:
+      r200UpdateSpecular ( ctx );
+      break;
+
+   case GL_VERTEX_PROGRAM_ARB:
+      if (!state) {
+	 GLuint i;
+	 rmesa->curr_vp_hw = NULL;
+	 R200_STATECHANGE( rmesa, vap );
+	 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_PROG_VTX_SHADER_ENABLE;
+	 /* mark all tcl atoms (tcl vector state got overwritten) dirty
+	    not sure about tcl scalar state - we need at least grd
+	    with vert progs too.
+	    ucp looks like it doesn't get overwritten (may even work
+	    with vp for pos-invariant progs if we're lucky) */
+	 R200_STATECHANGE( rmesa, mtl[0] );
+	 R200_STATECHANGE( rmesa, mtl[1] );
+	 R200_STATECHANGE( rmesa, fog );
+	 R200_STATECHANGE( rmesa, glt );
+	 R200_STATECHANGE( rmesa, eye );
+	 for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) {
+	    R200_STATECHANGE( rmesa, mat[i] );
+	 }
+	 for (i = 0 ; i < 8; i++) {
+	    R200_STATECHANGE( rmesa, lit[i] );
+	 }
+	 R200_STATECHANGE( rmesa, tcl );
+	 for (i = 0; i <= ctx->Const.MaxClipPlanes; i++) {
+	    if (ctx->Transform.ClipPlanesEnabled & (1 << i)) {
+	       rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (R200_UCP_ENABLE_0 << i);
+	    }
+/*	    else {
+	       rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(R200_UCP_ENABLE_0 << i);
+	    }*/
+	 }
+	 /* ugly. Need to call everything which might change compsel. */
+	 r200UpdateSpecular( ctx );
+#if 0
+	/* shouldn't be necessary, as it's picked up anyway in r200ValidateState (_NEW_PROGRAM),
+	   but without it doom3 locks up at always the same places. Why? */
+	/* FIXME: This can (and should) be replaced by a call to the TCL_STATE_FLUSH reg before
+	   accessing VAP_SE_VAP_CNTL. Requires drm changes (done). Remove after some time... */
+	 r200UpdateTextureState( ctx );
+	 /* if we call r200UpdateTextureState we need the code below because we are calling it with
+	    non-current derived enabled values which may revert the state atoms for frag progs even when
+	    they already got disabled... ugh
+	    Should really figure out why we need to call r200UpdateTextureState in the first place */
+	 GLuint unit;
+	 for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+	    R200_STATECHANGE( rmesa, pix[unit] );
+	    R200_STATECHANGE( rmesa, tex[unit] );
+	    rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+		~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+	    rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+	    /* need to guard this with drmSupportsFragmentShader? Should never get here if
+	       we don't announce ATI_fs, right? */
+	    rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+	 R200_STATECHANGE( rmesa, cst );
+	 R200_STATECHANGE( rmesa, tf );
+	 rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+#endif
+      }
+      else {
+	 /* picked up later */
+      }
+      /* call functions which change hw state based on ARB_vp enabled or not. */
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      r200Fogfv( ctx, GL_FOG_COORD_SRC, NULL );
+      break;
+
+   case GL_VERTEX_PROGRAM_POINT_SIZE_ARB:
+      r200PointParameter( ctx, GL_POINT_DISTANCE_ATTENUATION, NULL );
+      break;
+
+   case GL_FRAGMENT_SHADER_ATI:
+      if ( !state ) {
+	 /* restore normal tex env colors and make sure tex env combine will get updated
+	    mark env atoms dirty (as their data was overwritten by afs even
+	    if they didn't change) and restore tex coord routing */
+	 GLuint unit;
+	 for (unit = 0; unit < R200_MAX_TEXTURE_UNITS; unit++) {
+	    R200_STATECHANGE( rmesa, pix[unit] );
+	    R200_STATECHANGE( rmesa, tex[unit] );
+	    rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &=
+		~(R200_TXFORMAT_ST_ROUTE_MASK | R200_TXFORMAT_LOOKUP_DISABLE);
+	    rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+	    /* need to guard this with drmSupportsFragmentShader? Should never get here if
+	       we don't announce ATI_fs, right? */
+	    rmesa->hw.tex[unit].cmd[TEX_PP_TXMULTI_CTL] = 0;
+         }
+	 R200_STATECHANGE( rmesa, cst );
+	 R200_STATECHANGE( rmesa, tf );
+	 rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+      }
+      else {
+	 /* need to mark this dirty as pix/tf atoms have overwritten the data
+	    even if the data in the atoms didn't change */
+	 R200_STATECHANGE( rmesa, atf );
+	 R200_STATECHANGE( rmesa, afs[1] );
+	 /* everything else picked up in r200UpdateTextureState hopefully */
+      }
+      break;
+   default:
+      return;
+   }
+}
+
+
+void r200LightingSpaceChange( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean tmp;
+
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   R200_STATECHANGE( rmesa, tcl );
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |=  R200_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS;
+   }
+
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]);
+}
+
+/* =============================================================
+ * Deferred state management - matrices, textures, other?
+ */
+
+
+
+
+static void upload_matrix( r200ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+
+
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+static void upload_matrix_t( r200ContextPtr rmesa, const GLfloat *src, int idx )
+{
+   float *dest = ((float *)R200_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   R200_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+
+static void update_texturematrix( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0];
+   GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL];
+   int unit;
+
+   if (R200_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__,
+	      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]);
+
+   rmesa->TexMatEnabled = 0;
+   rmesa->TexMatCompSel = 0;
+
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (!ctx->Texture.Unit[unit]._ReallyEnabled)
+	 continue;
+
+      if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+	 rmesa->TexMatEnabled |= (R200_TEXGEN_TEXMAT_0_ENABLE|
+				  R200_TEXMAT_0_ENABLE) << unit;
+
+	 rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit;
+
+	 if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+	    /* Need to preconcatenate any active texgen
+	     * obj/eyeplane matrices:
+	     */
+	    _math_matrix_mul_matrix( &rmesa->tmpmat,
+				     ctx->TextureMatrixStack[unit].Top,
+				     &rmesa->TexGenMatrix[unit] );
+	    upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit );
+	 }
+	 else {
+	    upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m,
+			   R200_MTX_TEX0+unit );
+	 }
+      }
+      else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) {
+	 upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m,
+			R200_MTX_TEX0+unit );
+      }
+   }
+
+   tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
+   if (tpc != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0]) {
+      R200_STATECHANGE(rmesa, tcg);
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = tpc;
+   }
+
+   compsel &= ~R200_OUTPUT_TEX_MASK;
+   compsel |= rmesa->TexMatCompSel | rmesa->TexGenCompSel;
+   if (compsel != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]) {
+      R200_STATECHANGE(rmesa, vtx);
+      rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = compsel;
+   }
+}
+
+static GLboolean r200ValidateBuffers(GLcontext *ctx)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   struct radeon_dma_bo *dma_bo;
+   int i, ret;
+
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+   radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+				       0, RADEON_GEM_DOMAIN_VRAM);
+   }
+
+   /* depth buffer */
+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+				       0, RADEON_GEM_DOMAIN_VRAM);
+   }
+
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+      radeonTexObj *t;
+
+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
+	 continue;
+
+      t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+      if (t->image_override && t->bo)
+	radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+      else if (t->mt->bo)
+	radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->mt->bo,
+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+
+   dma_bo = first_elem(&rmesa->radeon.dma.reserved);
+   {
+       ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, dma_bo->bo, RADEON_GEM_DOMAIN_GTT, 0);
+       if (ret)
+	   return GL_FALSE;
+   }
+   return GL_TRUE;
+}
+
+GLboolean r200ValidateState( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint new_state = rmesa->radeon.NewGLState;
+
+   if (new_state & _NEW_BUFFERS) {
+      _mesa_update_framebuffer(ctx);
+      /* this updates the DrawBuffer's Width/Height if it's a FBO */
+      _mesa_update_draw_buffer_bounds(ctx);
+
+      R200_STATECHANGE(rmesa, ctx);
+   }
+
+   if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) {
+      r200UpdateTextureState( ctx );
+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+      r200UpdateLocalViewer( ctx );
+   }
+
+   /* we need to do a space check here */
+   if (!r200ValidateBuffers(ctx))
+     return GL_FALSE;
+
+/* FIXME: don't really need most of these when vertex progs are enabled */
+
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP );
+
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, R200_MTX_MV );
+      upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, R200_MTX_IMV );
+   }
+
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & (_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) {
+      update_texturematrix( ctx );
+   }
+
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
+   }
+
+   /* emit all active clip planes if projection matrix changes.
+    */
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform.ClipPlanesEnabled)
+	 r200UpdateClipPlanes( ctx );
+   }
+
+   if (new_state & (_NEW_PROGRAM|
+                    _NEW_PROGRAM_CONSTANTS |
+   /* need to test for pretty much anything due to possible parameter bindings */
+	_NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM|
+	_NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX|
+	_NEW_FOG|_NEW_POINT|_NEW_TRACK_MATRIX)) {
+      if (ctx->VertexProgram._Enabled) {
+	 r200SetupVertexProg( ctx );
+      }
+      else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0);
+   }
+
+   rmesa->radeon.NewGLState = 0;
+   return GL_TRUE;
+}
+
+
+static void r200InvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   R200_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+}
+
+/* A hack.  The r200 can actually cope just fine with materials
+ * between begin/ends, so fix this.
+ * Should map to inputs just like the generic vertex arrays for vertex progs.
+ * In theory there could still be too many and we'd still need a fallback.
+ */
+static GLboolean check_material( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLint i;
+
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+	i < _TNL_ATTRIB_MAT_BACK_INDEXES;
+	i++)
+      if (tnl->vb.AttribPtr[i] &&
+	  tnl->vb.AttribPtr[i]->stride)
+	 return GL_TRUE;
+
+   return GL_FALSE;
+}
+
+static void r200WrapRunPipeline( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean has_material;
+
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+
+   /* Validate state:
+    */
+   if (rmesa->radeon.NewGLState)
+      if (!r200ValidateState( ctx ))
+	 FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
+
+   has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx );
+
+   if (has_material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
+
+   /* Run the pipeline.
+    */
+   _tnl_run_pipeline( ctx );
+
+   if (has_material) {
+      TCL_FALLBACK( ctx, R200_TCL_FALLBACK_MATERIAL, GL_FALSE );
+   }
+}
+
+
+static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   GLint i;
+
+   radeon_firevertices(&r200->radeon);
+
+   radeon_print(RADEON_STATE, RADEON_TRACE,
+		   "%s(%p) first 32 bits are %x.\n",
+		   __func__,
+		   ctx,
+		   *(uint32_t*)mask);
+
+   R200_STATECHANGE(r200, stp);
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 31 ; i >= 0; i--) {
+     r200->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i];
+   }
+}
+/* Initialize the driver's state functions.
+ */
+void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
+{
+   functions->UpdateState		= r200InvalidateState;
+   functions->LightingSpaceChange	= r200LightingSpaceChange;
+
+   functions->DrawBuffer		= radeonDrawBuffer;
+   functions->ReadBuffer		= radeonReadBuffer;
+
+   functions->CopyPixels                = _mesa_meta_CopyPixels;
+   functions->DrawPixels                = _mesa_meta_DrawPixels;
+   if (radeon->radeonScreen->kernel_mm)
+	   functions->ReadPixels                = radeonReadPixels;
+
+   functions->AlphaFunc			= r200AlphaFunc;
+   functions->BlendColor		= r200BlendColor;
+   functions->BlendEquationSeparate	= r200BlendEquationSeparate;
+   functions->BlendFuncSeparate		= r200BlendFuncSeparate;
+   functions->ClearColor		= r200ClearColor;
+   functions->ClearDepth		= r200ClearDepth;
+   functions->ClearStencil		= r200ClearStencil;
+   functions->ClipPlane			= r200ClipPlane;
+   functions->ColorMask			= r200ColorMask;
+   functions->CullFace			= r200CullFace;
+   functions->DepthFunc			= r200DepthFunc;
+   functions->DepthMask			= r200DepthMask;
+   functions->DepthRange		= r200DepthRange;
+   functions->Enable			= r200Enable;
+   functions->Fogfv			= r200Fogfv;
+   functions->FrontFace			= r200FrontFace;
+   functions->Hint			= NULL;
+   functions->LightModelfv		= r200LightModelfv;
+   functions->Lightfv			= r200Lightfv;
+   functions->LineStipple		= r200LineStipple;
+   functions->LineWidth			= r200LineWidth;
+   functions->LogicOpcode		= r200LogicOpCode;
+   functions->PolygonMode		= r200PolygonMode;
+   functions->PolygonOffset		= r200PolygonOffset;
+   functions->PolygonStipple		= r200PolygonStipple;
+   functions->PointParameterfv		= r200PointParameter;
+   functions->PointSize			= r200PointSize;
+   functions->RenderMode		= r200RenderMode;
+   functions->Scissor			= radeonScissor;
+   functions->ShadeModel		= r200ShadeModel;
+   functions->StencilFuncSeparate	= r200StencilFuncSeparate;
+   functions->StencilMaskSeparate	= r200StencilMaskSeparate;
+   functions->StencilOpSeparate		= r200StencilOpSeparate;
+   functions->Viewport			= r200Viewport;
+}
+
+
+void r200InitTnlFuncs( GLcontext *ctx )
+{
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = r200UpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = r200WrapRunPipeline;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h
new file mode 100644
index 0000000000..327ba837e2
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_state.h
@@ -0,0 +1,63 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_STATE_H__
+#define __R200_STATE_H__
+
+#include "r200_context.h"
+
+extern void r200InitState( r200ContextPtr rmesa );
+extern void r200InitStateFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
+extern void r200InitTnlFuncs( GLcontext *ctx );
+
+extern void r200UpdateMaterial( GLcontext *ctx );
+
+extern void r200UpdateViewportOffset( GLcontext *ctx );
+extern void r200UpdateWindow( GLcontext *ctx );
+extern void r200UpdateDrawBuffer(GLcontext *ctx);
+
+extern GLboolean r200ValidateState( GLcontext *ctx );
+
+extern void r200_vtbl_update_scissor( GLcontext *ctx );
+
+extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) do {				\
+   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
+		     __FUNCTION__, bit, mode );				\
+   r200Fallback( rmesa->radeon.glCtx, bit, mode );				\
+} while (0)
+
+extern void r200LightingSpaceChange( GLcontext *ctx );
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c
new file mode 100644
index 0000000000..1606553009
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_state_init.c
@@ -0,0 +1,1626 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/api_arrayelt.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "radeon_queryobj.h"
+
+#include "xmlpool.h"
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+	int start;
+	int len;
+	const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
+	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
+	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
+	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
+	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
+	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
+	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
+	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
+	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
+	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
+	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
+	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
+	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
+	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
+	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
+	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
+	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
+	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
+	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
+	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
+	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
+		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
+	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
+	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
+	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
+	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
+	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
+	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
+	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
+	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
+	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
+	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
+	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
+	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
+	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
+	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
+	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
+	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
+	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
+	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
+	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
+	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
+	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
+	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
+	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
+	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
+	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
+	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
+	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
+	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
+	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
+	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
+	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
+	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
+	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
+	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
+	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
+	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
+	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
+	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
+	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
+	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
+	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
+		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
+	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
+	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
+	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
+	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
+	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
+	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
+	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
+	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
+	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
+	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
+	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
+	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
+	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
+	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
+	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
+	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
+	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
+	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
+	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
+	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
+	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
+	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
+	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
+	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
+	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+
+/* =============================================================
+ * State initialization
+ */
+static int cmdpkt( r200ContextPtr rmesa, int id ) 
+{
+   drm_radeon_cmd_header_t h;
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     return CP_PACKET0(packet[id].start, packet[id].len - 1);
+   } else {
+     h.i = 0;
+     h.packet.cmd_type = RADEON_CMD_PACKET;
+     h.packet.packet_id = id;
+   }
+   return h.i;
+}
+
+static int cmdvec( int offset, int stride, int count ) 
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+
+/* warning: the count here is divided by 4 compared to other cmds
+   (so it doesn't exceed the char size)! */
+static int cmdveclinear( int offset, int count ) 
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.veclinear.cmd_type = RADEON_CMD_VECLINEAR;
+   h.veclinear.addr_lo = offset & 0xff;
+   h.veclinear.addr_hi = (offset & 0xff00) >> 8;
+   h.veclinear.count = count;
+   return h.i;
+}
+
+static int cmdscl( int offset, int stride, int count ) 
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+static int cmdscl2( int offset, int stride, int count ) 
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS2;
+   h.scalars.offset = offset - 0x100;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+/**
+ * Check functions are used to check if state is active.
+ * If it is active check function returns maximum emit size.
+ */
+#define CHECK( NM, FLAG, ADD )				\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
+{							\
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+   (void) rmesa;					\
+   return (FLAG) ? atom->cmd_size + (ADD) : 0;			\
+}
+
+#define TCL_CHECK( NM, FLAG, ADD )				\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \
+{									\
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
+   return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
+}
+
+#define TCL_OR_VP_CHECK( NM, FLAG, ADD )			\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
+{							\
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;	\
+}
+
+#define VP_CHECK( NM, FLAG, ADD )				\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \
+{									\
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);				\
+   (void) atom;								\
+   return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size + (ADD) : 0; \
+}
+
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex_any, ctx->Texture._EnabledUnits, 0 )
+CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled), 0 );
+CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled, 0 )
+   CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled), 0 )
+CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)), 0 )
+CHECK( afs, ctx->ATIFragmentShader._Enabled, 0 )
+CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( tex_cube_cs, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT, 2 + 4*5 - CUBE_STATE_SIZE )
+TCL_CHECK( tcl_fog, ctx->Fog.Enabled, 0 )
+TCL_CHECK( tcl_fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add8, GL_TRUE, 8 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded, 0 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 0 )
+TCL_CHECK( tcl_tex_add4, rmesa->state.texture.unit[atom->idx].unitneeded, 4 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lighting_add6, ctx->Light.Enabled, 6 )
+TCL_CHECK( tcl_light_add8, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled, 8 )
+TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 0 )
+TCL_OR_VP_CHECK( tcl_ucp_add4, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))), 4 )
+TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE, 0 )
+TCL_OR_VP_CHECK( tcl_or_vp_add2, GL_TRUE, 2 )
+VP_CHECK( tcl_vp, GL_TRUE, 0 )
+VP_CHECK( tcl_vp_add4, GL_TRUE, 4 )
+VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 0 )
+VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 0 )
+VP_CHECK( tcl_vp_size_add4, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64, 4 )
+VP_CHECK( tcl_vpp_size_add4, ctx->VertexProgram.Current->Base.NumNativeParameters > 96, 4 )
+
+#define OUT_VEC(hdr, data) do {			\
+    drm_radeon_cmd_header_t h;					\
+    h.i = hdr;								\
+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
+    OUT_BATCH(0);							\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
+    OUT_BATCH_TABLE((data), h.vectors.count);				\
+  } while(0)
+
+#define OUT_VECLINEAR(hdr, data) do {					\
+    drm_radeon_cmd_header_t h;						\
+    uint32_t _start, _sz;						\
+    h.i = hdr;								\
+    _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8);		\
+    _sz = h.veclinear.count * 4;					\
+    if (r200->radeon.radeonScreen->kernel_mm && _sz) { \
+    BEGIN_BATCH_NO_AUTOSTATE(dwords); \
+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
+    OUT_BATCH(0);							\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
+    OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));	\
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1));	\
+    OUT_BATCH_TABLE((data), _sz);					\
+    END_BATCH(); \
+    } \
+  } while(0)
+
+#define OUT_SCL(hdr, data) do {					\
+    drm_radeon_cmd_header_t h;						\
+    h.i = hdr;								\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
+    OUT_BATCH_TABLE((data), h.scalars.count);				\
+  } while(0)
+
+#define OUT_SCL2(hdr, data) do {					\
+    drm_radeon_cmd_header_t h;						\
+    h.i = hdr;								\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
+    OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
+    OUT_BATCH_TABLE((data), h.scalars.count);				\
+  } while(0)
+static int check_rrb(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo)
+      return 0;
+   return atom->cmd_size;
+}
+
+static int check_polygon_stipple(GLcontext *ctx,
+		struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   if (r200->hw.set.cmd[SET_RE_CNTL] & R200_STIPPLE_ENABLE)
+	   return atom->cmd_size;
+   return 0;
+}
+
+static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1));
+   OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18));
+   END_BATCH();
+}
+
+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
+   OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
+   END_BATCH();
+}
+
+static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1);
+   OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1);
+   END_BATCH();
+}
+
+static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   OUT_VECLINEAR(atom->cmd[0], atom->cmd+1);
+}
+
+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_SCL(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+
+
+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+
+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   struct radeon_renderbuffer *rrb;
+   uint32_t cbpitch;
+   uint32_t zbpitch, depth_fmt;
+   uint32_t dwords = atom->check(ctx, atom);
+
+   /* output the first 7 bytes of context */
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 5);
+
+   rrb = radeon_get_depthbuffer(&r200->radeon);
+   if (!rrb) {
+     OUT_BATCH(0);
+     OUT_BATCH(0);
+   } else {
+     zbpitch = (rrb->pitch / rrb->cpp);
+     if (r200->using_hyperz)
+       zbpitch |= RADEON_DEPTH_HYPERZ;
+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH(zbpitch);
+     if (rrb->cpp == 4) 
+       depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; 
+     else 
+       depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; 
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; 
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; 
+   }
+     
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(atom->cmd[CTX_CMD_1]);
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo) {
+     OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+     OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
+   } else {
+     atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); 
+     if (rrb->cpp == 4) 
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; 
+     else 
+       atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; 
+ 
+     OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); 
+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+
+   OUT_BATCH(atom->cmd[CTX_CMD_2]);
+
+   if (!rrb || !rrb->bo) {
+     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
+   } else {
+     cbpitch = (rrb->pitch / rrb->cpp);
+     if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= R200_COLOR_TILE_ENABLE;
+     OUT_BATCH(cbpitch);
+   }
+
+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
+     OUT_BATCH_TABLE((atom->cmd + 14), 4);
+
+   END_BATCH();
+}
+
+static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t dwords;
+
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo) {
+      return 0;
+   }
+
+   drb = radeon_get_depthbuffer(&r200->radeon);
+
+   dwords = 10;
+   if (drb)
+     dwords += 6;
+   if (rrb)
+     dwords += 8;
+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM)
+     dwords += 4;
+
+
+   return dwords;
+}
+
+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t cbpitch = 0;
+   uint32_t zbpitch = 0;
+   uint32_t dwords = atom->check(ctx, atom);
+   uint32_t depth_fmt;
+
+   rrb = radeon_get_colorbuffer(&r200->radeon);
+   if (!rrb || !rrb->bo) {
+      return;
+   }
+
+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+   if (rrb->cpp == 4)
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+   else switch (rrb->base.Format) {
+   case MESA_FORMAT_RGB565:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+	break;
+   case MESA_FORMAT_ARGB4444:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+	break;
+   case MESA_FORMAT_ARGB1555:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+	break;
+   default:
+	_mesa_problem(ctx, "Unexpected format in ctx_emit_cs");
+   }
+
+   cbpitch = (rrb->pitch / rrb->cpp);
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= R200_COLOR_TILE_ENABLE;
+
+   drb = radeon_get_depthbuffer(&r200->radeon);
+   if (drb) {
+     zbpitch = (drb->pitch / drb->cpp);
+     if (drb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+   }
+
+   /* output the first 7 bytes of context */
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   /* In the CS case we need to split this up */
+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
+
+   if (drb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
+     OUT_BATCH(zbpitch);
+   }
+
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+
+
+   if (rrb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
+     OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+
+   if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
+     OUT_BATCH_TABLE((atom->cmd + 14), 4);
+   }
+
+   END_BATCH();
+}
+
+static int get_tex_size(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   uint32_t dwords = atom->cmd_size + 2;
+   int i = atom->idx;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   if (!(t && t->mt && !t->image_override))
+     dwords -= 2;
+
+   return dwords;
+}
+
+static int check_tex_pair(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   /** XOR is bit flip operation so use it for finding pair */
+   if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
+     return 0;
+
+   return get_tex_size(ctx, atom);
+}
+
+static int check_tex(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   if (!(r200->state.texture.unit[atom->idx].unitneeded))
+     return 0;
+
+   return get_tex_size(ctx, atom);
+}
+
+
+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   /* is this ok even with drm older than 1.18? */
+   OUT_BATCH_TABLE(atom->cmd, 10);
+
+   if (t && t->mt && !t->image_override) {
+     OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
+		  RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+   } else if (!t) {
+     /* workaround for old CS mechanism */
+     OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
+   } else {
+     OUT_BATCH(t->override_offset);
+   }
+
+   END_BATCH();
+}
+
+static int get_tex_mm_size(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   uint32_t dwords = atom->cmd_size + 2;
+   int hastexture = 1;
+   int i = atom->idx;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   if (!t)
+	hastexture = 0;
+   else {
+	if (!t->mt && !t->bo)
+		hastexture = 0;
+   }
+
+   if (!hastexture)
+     dwords -= 4;
+   return dwords;
+}
+
+static int check_tex_pair_mm(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   /** XOR is bit flip operation so use it for finding pair */
+   if (!(r200->state.texture.unit[atom->idx].unitneeded | r200->state.texture.unit[atom->idx ^ 1].unitneeded))
+     return 0;
+
+   return get_tex_mm_size(ctx, atom);
+}
+
+static int check_tex_mm(GLcontext* ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   if (!(r200->state.texture.unit[atom->idx].unitneeded))
+     return 0;
+
+   return get_tex_mm_size(ctx, atom);
+}
+
+
+static void tex_emit_mm(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+
+   if (!r200->state.texture.unit[i].unitneeded && !(dwords <= atom->cmd_size))
+        dwords -= 4;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7));
+   OUT_BATCH_TABLE((atom->cmd + 1), 8);
+
+   if (dwords > atom->cmd_size) {
+     OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0));
+     if (t->mt && !t->image_override) {
+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+		  RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      } else {
+	if (t->bo)
+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      }
+   }
+   END_BATCH();
+}
+
+
+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+
+   if (!(t && !t->image_override))
+     dwords = 2;
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   /* XXX that size won't really match with image_override... */
+   OUT_BATCH_TABLE(atom->cmd, 2);
+
+   if (t && !t->image_override) {
+     lvl = &t->mt->levels[0];
+     OUT_BATCH_TABLE((atom->cmd + 2), 1);
+     for (j = 1; j <= 5; j++) {
+       OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+			RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     }
+   }
+   END_BATCH();
+}
+
+static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r200ContextPtr r200 = R200_CONTEXT(ctx);
+   BATCH_LOCALS(&r200->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r200->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   if (!(t && !t->image_override))
+     dwords = 2;
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 2);
+
+   if (t && !t->image_override) {
+     lvl = &t->mt->levels[0];
+     for (j = 1; j <= 5; j++) {
+       OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0));
+       OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     }
+   }
+   END_BATCH();
+}
+
+/* Initialize the context's hardware state.
+ */
+void r200InitState( r200ContextPtr rmesa )
+{
+   GLcontext *ctx = rmesa->radeon.glCtx;
+   GLuint i;
+
+   rmesa->radeon.state.color.clear = 0x00000000;
+
+   switch ( ctx->Visual.depthBits ) {
+   case 16:
+      rmesa->radeon.state.depth.clear = 0x0000ffff;
+      rmesa->radeon.state.stencil.clear = 0x00000000;
+      break;
+   case 24:
+   default:
+      rmesa->radeon.state.depth.clear = 0x00ffffff;
+      rmesa->radeon.state.stencil.clear = 0xffff0000;
+      break;
+   }
+
+   rmesa->radeon.Fallback = 0;
+
+   rmesa->radeon.hw.max_state_size = 0;
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )				\
+   do {								\
+      rmesa->hw.ATOM.cmd_size = SZ;				\
+      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.name = NM;					\
+      rmesa->hw.ATOM.idx = IDX;					\
+      if (check_##CHK != check_never) {				\
+         rmesa->hw.ATOM.check = check_##CHK;			\
+         rmesa->radeon.hw.max_state_size += SZ * sizeof(int);	\
+      } else {							\
+         rmesa->hw.ATOM.check = NULL;				\
+      }								\
+      rmesa->hw.ATOM.dirty = GL_FALSE;				\
+   } while (0)
+
+
+   /* Allocate state buffers:
+    */
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+      ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 );
+   else
+      ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 );
+
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+   {
+     rmesa->hw.ctx.emit = ctx_emit_cs;
+     rmesa->hw.ctx.check = check_always_ctx;
+   }
+   else
+   {
+     rmesa->hw.ctx.emit = ctx_emit;
+   }
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( vtx, always, VTX_STATE_SIZE, "VTX/vertex", 0 );
+   ALLOC_STATE( vap, always, VAP_STATE_SIZE, "VAP/vap", 0 );
+   ALLOC_STATE( vte, always, VTE_STATE_SIZE, "VTE/vte", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 );
+   {
+      int state_size = TEX_STATE_SIZE_NEWDRM;
+      if (!rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+         state_size = TEX_STATE_SIZE_OLDDRM;
+      }
+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+         if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+            /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */
+            ALLOC_STATE( tex[0], tex_pair_mm, state_size, "TEX/tex-0", 0 );
+            ALLOC_STATE( tex[1], tex_pair_mm, state_size, "TEX/tex-1", 1 );
+            ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 );
+         }
+         else {
+            ALLOC_STATE( tex[0], tex_mm, state_size, "TEX/tex-0", 0 );
+            ALLOC_STATE( tex[1], tex_mm, state_size, "TEX/tex-1", 1 );
+            ALLOC_STATE( tam, never, TAM_STATE_SIZE, "TAM/tam", 0 );
+         }
+         ALLOC_STATE( tex[2], tex_mm, state_size, "TEX/tex-2", 2 );
+         ALLOC_STATE( tex[3], tex_mm, state_size, "TEX/tex-3", 3 );
+         ALLOC_STATE( tex[4], tex_mm, state_size, "TEX/tex-4", 4 );
+         ALLOC_STATE( tex[5], tex_mm, state_size, "TEX/tex-5", 5 );
+         if (!rmesa->radeon.radeonScreen->kernel_mm)
+         {
+            if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+               rmesa->hw.tex[0].check = check_tex_pair;
+               rmesa->hw.tex[1].check = check_tex_pair;
+            } else {
+               rmesa->hw.tex[0].check = check_tex;
+               rmesa->hw.tex[1].check = check_tex;
+            }
+            rmesa->hw.tex[2].check = check_tex;
+            rmesa->hw.tex[3].check = check_tex;
+            rmesa->hw.tex[4].check = check_tex;
+            rmesa->hw.tex[5].check = check_tex;
+         }
+         if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+            ALLOC_STATE( atf, afs, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+            ALLOC_STATE( afs[0], afs_pass1, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+            ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+         } else {
+            ALLOC_STATE( atf, never, ATF_STATE_SIZE, "ATF/tfactor", 0 );
+            ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 );
+            ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 );
+         }
+      }
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+	   ALLOC_STATE( stp, polygon_stipple, STP_STATE_SIZE, "STP/stp", 0 );
+   else
+	   ALLOC_STATE( stp, never, STP_STATE_SIZE, "STP/stp", 0 );
+
+   for (i = 0; i < 6; i++)
+      if (rmesa->radeon.radeonScreen->kernel_mm)
+          rmesa->hw.tex[i].emit = tex_emit_mm;
+      else
+          rmesa->hw.tex[i].emit = tex_emit;
+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) {
+      ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+      ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
+      ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
+      ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
+      ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
+      ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
+      for (i = 0; i < 6; i++)
+          if (rmesa->radeon.radeonScreen->kernel_mm) {
+              rmesa->hw.cube[i].emit = cube_emit_cs;
+              rmesa->hw.cube[i].check = check_tex_cube_cs;
+          } else
+              rmesa->hw.cube[i].emit = cube_emit;
+   }
+   else {
+      ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 );
+      ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/tex-1", 1 );
+      ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/tex-2", 2 );
+      ALLOC_STATE( cube[3], never, CUBE_STATE_SIZE, "CUBE/tex-3", 3 );
+      ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 );
+      ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 );
+   }
+
+   if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) {
+      ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
+      if (rmesa->radeon.radeonScreen->kernel_mm) {
+         ALLOC_STATE( vpi[0], tcl_vp_add4, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+         ALLOC_STATE( vpi[1], tcl_vp_size_add4, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+         ALLOC_STATE( vpp[0], tcl_vp_add4, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+         ALLOC_STATE( vpp[1], tcl_vpp_size_add4, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+      } else {
+         ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+         ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+         ALLOC_STATE( vpp[0], tcl_vp, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+         ALLOC_STATE( vpp[1], tcl_vpp_size, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+      }
+   }
+   else {
+      ALLOC_STATE( pvs, never, PVS_STATE_SIZE, "PVS/pvscntl", 0 );
+      ALLOC_STATE( vpi[0], never, VPI_STATE_SIZE, "VP/vertexprog-0", 0 );
+      ALLOC_STATE( vpi[1], never, VPI_STATE_SIZE, "VP/vertexprog-1", 1 );
+      ALLOC_STATE( vpp[0], never, VPP_STATE_SIZE, "VPP/vertexparam-0", 0 );
+      ALLOC_STATE( vpp[1], never, VPP_STATE_SIZE, "VPP/vertexparam-1", 1 );
+   }
+   /* FIXME: this atom has two commands, we need only one (ucp_vert_blend) for vp */
+   ALLOC_STATE( tcl, tcl_or_vp, TCL_STATE_SIZE, "TCL/tcl", 0 );
+   ALLOC_STATE( msl, tcl, MSL_STATE_SIZE, "MSL/matrix-select", 0 );
+   ALLOC_STATE( tcg, tcl, TCG_STATE_SIZE, "TCG/texcoordgen", 0 );
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      ALLOC_STATE( mtl[0], tcl_lighting_add6, MTL_STATE_SIZE, "MTL0/material0", 0 );
+      ALLOC_STATE( mtl[1], tcl_lighting_add6, MTL_STATE_SIZE, "MTL1/material1", 1 );
+      ALLOC_STATE( grd, tcl_or_vp_add2, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+      ALLOC_STATE( fog, tcl_fog_add4, FOG_STATE_SIZE, "FOG/fog", 0 );
+      ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 0 );
+      ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+      ALLOC_STATE( mat[R200_MTX_MV], tcl_add4, MAT_STATE_SIZE, "MAT/modelview", 0 );
+      ALLOC_STATE( mat[R200_MTX_IMV], tcl_add4, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+      ALLOC_STATE( mat[R200_MTX_MVP], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+      ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+      ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+      ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat2", 2 );
+      ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat3", 3 );
+      ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat4", 4 );
+      ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex_add4, MAT_STATE_SIZE, "MAT/texmat5", 5 );
+      ALLOC_STATE( ucp[0], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+      ALLOC_STATE( ucp[1], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+      ALLOC_STATE( ucp[2], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+      ALLOC_STATE( ucp[3], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+      ALLOC_STATE( ucp[4], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+      ALLOC_STATE( ucp[5], tcl_ucp_add4, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+      ALLOC_STATE( lit[0], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-0", 0 );
+      ALLOC_STATE( lit[1], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-1", 1 );
+      ALLOC_STATE( lit[2], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-2", 2 );
+      ALLOC_STATE( lit[3], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-3", 3 );
+      ALLOC_STATE( lit[4], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-4", 4 );
+      ALLOC_STATE( lit[5], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-5", 5 );
+      ALLOC_STATE( lit[6], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-6", 6 );
+      ALLOC_STATE( lit[7], tcl_light_add8, LIT_STATE_SIZE, "LIT/light-7", 7 );
+      ALLOC_STATE( sci, rrb, SCI_STATE_SIZE, "SCI/scissor", 0 );
+   } else {
+      ALLOC_STATE( mtl[0], tcl_lighting, MTL_STATE_SIZE, "MTL0/material0", 0 );
+      ALLOC_STATE( mtl[1], tcl_lighting, MTL_STATE_SIZE, "MTL1/material1", 1 );
+      ALLOC_STATE( grd, tcl_or_vp, GRD_STATE_SIZE, "GRD/guard-band", 0 );
+      ALLOC_STATE( fog, tcl_fog, FOG_STATE_SIZE, "FOG/fog", 0 );
+      ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 0 );
+      ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 0 );
+      ALLOC_STATE( mat[R200_MTX_MV], tcl, MAT_STATE_SIZE, "MAT/modelview", 0 );
+      ALLOC_STATE( mat[R200_MTX_IMV], tcl, MAT_STATE_SIZE, "MAT/it-modelview", 0 );
+      ALLOC_STATE( mat[R200_MTX_MVP], tcl, MAT_STATE_SIZE, "MAT/modelproject", 0 );
+      ALLOC_STATE( mat[R200_MTX_TEX0], tcl_tex, MAT_STATE_SIZE, "MAT/texmat0", 0 );
+      ALLOC_STATE( mat[R200_MTX_TEX1], tcl_tex, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+      ALLOC_STATE( mat[R200_MTX_TEX2], tcl_tex, MAT_STATE_SIZE, "MAT/texmat2", 2 );
+      ALLOC_STATE( mat[R200_MTX_TEX3], tcl_tex, MAT_STATE_SIZE, "MAT/texmat3", 3 );
+      ALLOC_STATE( mat[R200_MTX_TEX4], tcl_tex, MAT_STATE_SIZE, "MAT/texmat4", 4 );
+      ALLOC_STATE( mat[R200_MTX_TEX5], tcl_tex, MAT_STATE_SIZE, "MAT/texmat5", 5 );
+      ALLOC_STATE( ucp[0], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-0", 0 );
+      ALLOC_STATE( ucp[1], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+      ALLOC_STATE( ucp[2], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-2", 2 );
+      ALLOC_STATE( ucp[3], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-3", 3 );
+      ALLOC_STATE( ucp[4], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-4", 4 );
+      ALLOC_STATE( ucp[5], tcl_ucp, UCP_STATE_SIZE, "UCP/userclip-5", 5 );
+      ALLOC_STATE( lit[0], tcl_light, LIT_STATE_SIZE, "LIT/light-0", 0 );
+      ALLOC_STATE( lit[1], tcl_light, LIT_STATE_SIZE, "LIT/light-1", 1 );
+      ALLOC_STATE( lit[2], tcl_light, LIT_STATE_SIZE, "LIT/light-2", 2 );
+      ALLOC_STATE( lit[3], tcl_light, LIT_STATE_SIZE, "LIT/light-3", 3 );
+      ALLOC_STATE( lit[4], tcl_light, LIT_STATE_SIZE, "LIT/light-4", 4 );
+      ALLOC_STATE( lit[5], tcl_light, LIT_STATE_SIZE, "LIT/light-5", 5 );
+      ALLOC_STATE( lit[6], tcl_light, LIT_STATE_SIZE, "LIT/light-6", 6 );
+      ALLOC_STATE( lit[7], tcl_light, LIT_STATE_SIZE, "LIT/light-7", 7 );
+      ALLOC_STATE( sci, never, SCI_STATE_SIZE, "SCI/scissor", 0 );
+   }
+   ALLOC_STATE( pix[0], pix_zero, PIX_STATE_SIZE, "PIX/pixstage-0", 0 );
+   ALLOC_STATE( pix[1], texenv, PIX_STATE_SIZE, "PIX/pixstage-1", 1 );
+   ALLOC_STATE( pix[2], texenv, PIX_STATE_SIZE, "PIX/pixstage-2", 2 );
+   ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 );
+   ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
+   ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
+   if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) {
+      ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
+   }
+   else {
+      ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 );
+   }
+   if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) {
+      ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
+      if (rmesa->radeon.radeonScreen->kernel_mm)
+         ALLOC_STATE( ptp, tcl_add8, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+      else
+         ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+   }
+   else {
+      ALLOC_STATE (spr, never, SPR_STATE_SIZE, "SPR/pointsprite", 0 );
+      ALLOC_STATE (ptp, never, PTP_STATE_SIZE, "PTP/pointparams", 0 );
+   }
+
+   r200SetUpAtomList( rmesa );
+
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor)
+      rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
+   rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X);
+   rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET);
+   rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL);
+   rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0);
+   rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS);
+   rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE);
+   rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0);
+   rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3);
+   rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0);
+   if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+      rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR);
+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0);
+      rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1);
+      rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2);
+      rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3);
+      rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4);
+      rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5);
+      rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
+   } else {
+      rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0);
+      rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0);
+      rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1);
+      rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1);
+      rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2);
+      rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2);
+      rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3);
+      rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3);
+      rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4);
+      rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4);
+      rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5);
+      rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5);
+   }
+   rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0);
+   rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1);
+   rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2);
+   rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3);
+   rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3);
+   rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4);
+   rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4);
+   rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5);
+   rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5);
+   rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0);
+   rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1);
+   rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2);
+   rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3);
+   rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4);
+   rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0);
+   rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL);
+   rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2);
+   rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0);
+   rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL);
+   rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0);
+   rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL);
+   rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL);
+   rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL);
+   rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL);
+   rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL);
+
+   rmesa->hw.sci.cmd[SCI_CMD_0] = CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0);
+   rmesa->hw.sci.cmd[SCI_CMD_1] = CP_PACKET0(R200_RE_TOP_LEFT, 0);
+   rmesa->hw.sci.cmd[SCI_CMD_2] = CP_PACKET0(R200_RE_WIDTH_HEIGHT, 0);
+
+   rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+   rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+   rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+        rmesa->hw.mtl[0].emit = mtl_emit;
+        rmesa->hw.mtl[1].emit = mtl_emit;
+
+        rmesa->hw.vpi[0].emit = veclinear_emit;
+        rmesa->hw.vpi[1].emit = veclinear_emit;
+        rmesa->hw.vpp[0].emit = veclinear_emit;
+        rmesa->hw.vpp[1].emit = veclinear_emit;
+
+        rmesa->hw.grd.emit = scl_emit;
+        rmesa->hw.fog.emit = vec_emit;
+        rmesa->hw.glt.emit = vec_emit;
+        rmesa->hw.eye.emit = vec_emit;
+
+	for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++)
+	  rmesa->hw.mat[i].emit = vec_emit;
+
+	for (i = 0; i < 8; i++)
+	  rmesa->hw.lit[i].emit = lit_emit;
+
+	for (i = 0; i < 6; i++)
+	  rmesa->hw.ucp[i].emit = vec_emit;
+
+	rmesa->hw.ptp.emit = ptp_emit;
+   }
+
+
+   
+   rmesa->hw.mtl[0].cmd[MTL_CMD_0] = 
+      cmdvec( R200_VS_MAT_0_EMISS, 1, 16 );
+   rmesa->hw.mtl[0].cmd[MTL_CMD_1] = 
+      cmdscl2( R200_SS_MAT_0_SHININESS, 1, 1 );
+   rmesa->hw.mtl[1].cmd[MTL_CMD_0] =
+      cmdvec( R200_VS_MAT_1_EMISS, 1, 16 );
+   rmesa->hw.mtl[1].cmd[MTL_CMD_1] =
+      cmdscl2( R200_SS_MAT_1_SHININESS, 1, 1 );
+
+   rmesa->hw.vpi[0].cmd[VPI_CMD_0] =
+      cmdveclinear( R200_PVS_PROG0, 64 );
+   rmesa->hw.vpi[1].cmd[VPI_CMD_0] =
+      cmdveclinear( R200_PVS_PROG1, 64 );
+   rmesa->hw.vpp[0].cmd[VPP_CMD_0] =
+      cmdveclinear( R200_PVS_PARAM0, 96 );
+   rmesa->hw.vpp[1].cmd[VPP_CMD_0] =
+      cmdveclinear( R200_PVS_PARAM1, 96 );
+
+   rmesa->hw.grd.cmd[GRD_CMD_0] = 
+      cmdscl( R200_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] = 
+      cmdvec( R200_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] = 
+      cmdvec( R200_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] = 
+      cmdvec( R200_VS_EYE_VECTOR_ADDR, 1, 4 );
+
+   rmesa->hw.mat[R200_MTX_MV].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_0_MV, 1, 16);
+   rmesa->hw.mat[R200_MTX_IMV].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_1_INV_MV, 1, 16);
+   rmesa->hw.mat[R200_MTX_MVP].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_2_MVP, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX0].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_3_TEX0, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX1].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_4_TEX1, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX2].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_5_TEX2, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX3].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_6_TEX3, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX4].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_7_TEX4, 1, 16);
+   rmesa->hw.mat[R200_MTX_TEX5].cmd[MAT_CMD_0] = 
+      cmdvec( R200_VS_MATRIX_8_TEX5, 1, 16);
+
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] = 
+	 cmdvec( R200_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] = 
+	 cmdscl( R200_SS_LIGHT_DCD_ADDR + i, 8, 7 );
+   }
+
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] = 
+	 cmdvec( R200_VS_UCP_ADDR + i, 1, 4 );
+   }
+
+   rmesa->hw.ptp.cmd[PTP_CMD_0] =
+      cmdvec( R200_VS_PNT_SPRITE_VPORT_SCALE, 1, 4 );
+   rmesa->hw.ptp.cmd[PTP_CMD_1] =
+      cmdvec( R200_VS_PNT_SPRITE_ATT_CONST, 1, 12 );
+
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (R200_ALPHA_TEST_PASS
+				     /* | R200_RIGHT_HAND_CUBE_OGL*/);
+
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (R200_FOG_VERTEX |
+					  R200_FOG_USE_SPEC_ALPHA);
+
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+
+   if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000;
+      rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+      rmesa->hw.ctx.cmd[CTX_RB3D_CBLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP |
+				(R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) |
+				(R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT));
+   }
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] =
+      rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = 
+      ((rmesa->radeon.radeonScreen->depthPitch &
+	R200_DEPTHPITCH_MASK) |
+       R200_DEPTH_ENDIAN_NO_SWAP);
+   
+   if (rmesa->using_hyperz)
+      rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS |
+					       R200_STENCIL_TEST_ALWAYS |
+					       R200_STENCIL_FAIL_KEEP |
+					       R200_STENCIL_ZPASS_KEEP |
+					       R200_STENCIL_ZFAIL_KEEP |
+					       R200_Z_WRITE_ENABLE);
+
+   if (rmesa->using_hyperz) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE |
+						  R200_Z_DECOMPRESSION_ENABLE;
+/*      if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200)
+	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE 
+ 				     | R200_TEX_BLEND_0_ENABLE);
+
+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+   case DRI_CONF_DITHER_XERRORDIFFRESET:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT;
+      break;
+   case DRI_CONF_DITHER_ORDERED:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE;
+      break;
+   }
+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+	DRI_CONF_ROUND_ROUND )
+      rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE;
+   else
+      rmesa->radeon.state.color.roundEnable = 0;
+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+	DRI_CONF_COLOR_REDUCTION_DITHER )
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE;
+   else
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+
+   rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * 
+			driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality");
+   rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0;
+
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW |
+				     R200_BFACE_SOLID |
+				     R200_FFACE_SOLID |
+				     R200_FLAT_SHADE_VTX_LAST |
+				     R200_DIFFUSE_SHADE_GOURAUD |
+				     R200_ALPHA_SHADE_GOURAUD |
+				     R200_SPECULAR_SHADE_GOURAUD |
+				     R200_FOG_SHADE_GOURAUD |
+				     R200_DISC_FOG_SHADE_GOURAUD |
+				     R200_VTX_PIX_CENTER_OGL |
+				     R200_ROUND_MODE_TRUNC |
+				     R200_ROUND_PREC_8TH_PIX);
+
+   rmesa->hw.set.cmd[SET_RE_CNTL] = (R200_PERSPECTIVE_ENABLE |
+				     R200_SCISSOR_ENABLE);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = 
+      ((0 << R200_LINE_CURRENT_PTR_SHIFT) |
+       (1 << R200_LINE_CURRENT_COUNT_SHIFT));
+
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = 
+      ((0x00 << R200_STENCIL_REF_SHIFT) |
+       (0xff << R200_STENCIL_MASK_SHIFT) |
+       (0xff << R200_STENCIL_WRITEMASK_SHIFT));
+
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = R200_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+
+   rmesa->hw.tam.cmd[TAM_DEBUG3] = 0;
+
+   rmesa->hw.msc.cmd[MSC_RE_MISC] = 
+      ((0 << R200_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << R200_STIPPLE_Y_OFFSET_SHIFT) |
+       R200_STIPPLE_BIG_BIT_ORDER);
+
+
+   rmesa->hw.cst.cmd[CST_PP_CNTL_X] = 0;
+   rmesa->hw.cst.cmd[CST_RB3D_DEPTHXY_OFFSET] = 0;
+   rmesa->hw.cst.cmd[CST_RE_AUX_SCISSOR_CNTL] = 0x0;
+   rmesa->hw.cst.cmd[CST_RE_SCISSOR_TL_0] = 0;
+   rmesa->hw.cst.cmd[CST_RE_SCISSOR_BR_0] = 0;
+   rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] =
+#ifdef MESA_BIG_ENDIAN
+						R200_VC_32BIT_SWAP;
+#else
+						R200_VC_NO_SWAP;
+#endif
+
+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      /* Bypass TCL */
+      rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8);
+   }
+
+   rmesa->hw.cst.cmd[CST_RE_POINTSIZE] =
+      (((GLuint)(ctx->Const.MaxPointSize * 16.0)) << R200_MAXPOINTSIZE_SHIFT) | 0x10;
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_0] =
+      (0x0 << R200_VERTEX_POSITION_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_1] =
+      (0x02 << R200_VTX_COLOR_0_ADDR__SHIFT) |
+      (0x03 << R200_VTX_COLOR_1_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_2] =
+      (0x06 << R200_VTX_TEX_0_ADDR__SHIFT) |
+      (0x07 << R200_VTX_TEX_1_ADDR__SHIFT) |
+      (0x08 << R200_VTX_TEX_2_ADDR__SHIFT) |
+      (0x09 << R200_VTX_TEX_3_ADDR__SHIFT);
+   rmesa->hw.cst.cmd[CST_SE_TCL_INPUT_VTX_3] =
+      (0x0A << R200_VTX_TEX_4_ADDR__SHIFT) |
+      (0x0B << R200_VTX_TEX_5_ADDR__SHIFT);
+  
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = R200_BORDER_MODE_OGL;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] = 
+         ((i << R200_TXFORMAT_ST_ROUTE_SHIFT) |  /* <-- note i */
+          (2 << R200_TXFORMAT_WIDTH_SHIFT) |
+          (2 << R200_TXFORMAT_HEIGHT_SHIFT));
+      rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] =
+         (/* R200_TEXCOORD_PROJ | */
+          R200_LOD_BIAS_CORRECTION);	/* Small default bias */
+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+	 rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] =
+	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+	 rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0;
+	 rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0;
+      }
+      else {
+	  rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] =
+	     rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+     }
+
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] =
+         rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+
+      rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] =
+         (R200_TXC_ARG_A_ZERO |
+          R200_TXC_ARG_B_ZERO |
+          R200_TXC_ARG_C_DIFFUSE_COLOR |
+          R200_TXC_OP_MADD);
+
+      rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND2] =
+         ((i << R200_TXC_TFACTOR_SEL_SHIFT) |
+          R200_TXC_SCALE_1X |
+          R200_TXC_CLAMP_0_1 |
+          R200_TXC_OUTPUT_REG_R0);
+
+      rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND] =
+         (R200_TXA_ARG_A_ZERO |
+          R200_TXA_ARG_B_ZERO |
+          R200_TXA_ARG_C_DIFFUSE_ALPHA |
+          R200_TXA_OP_MADD);
+
+      rmesa->hw.pix[i].cmd[PIX_PP_TXABLEND2] =
+         ((i << R200_TXA_TFACTOR_SEL_SHIFT) |
+          R200_TXA_SCALE_1X |
+          R200_TXA_CLAMP_0_1 |
+          R200_TXA_OUTPUT_REG_R0);
+   }
+
+   rmesa->hw.tf.cmd[TF_TFACTOR_0] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_1] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_2] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_3] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_4] = 0;
+   rmesa->hw.tf.cmd[TF_TFACTOR_5] = 0;
+
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = 
+      (R200_VAP_TCL_ENABLE | 
+       (0x9 << R200_VAP_VF_MAX_VTX_NUM__SHIFT));
+
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = 
+      (R200_VPORT_X_SCALE_ENA |
+       R200_VPORT_Y_SCALE_ENA |
+       R200_VPORT_Z_SCALE_ENA |
+       R200_VPORT_X_OFFSET_ENA |
+       R200_VPORT_Y_OFFSET_ENA |
+       R200_VPORT_Z_OFFSET_ENA |
+/* FIXME: Turn on for tex rect only */
+       R200_VTX_ST_DENORMALIZED |  
+       R200_VTX_W0_FMT); 
+
+
+   rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = 0;
+   rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = 0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = 
+      ((R200_VTX_Z0 | R200_VTX_W0 |
+       (R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)));	
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] = 0;
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = (R200_OUTPUT_XYZW);
+   rmesa->hw.vtx.cmd[VTX_STATE_CNTL] = R200_VSC_UPDATE_USER_COLOR_0_ENABLE;
+						   
+
+   /* Matrix selection */
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_0] = 
+      (R200_MTX_MV << R200_MODELVIEW_0_SHIFT);
+   
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_1] = 
+       (R200_MTX_IMV << R200_IT_MODELVIEW_0_SHIFT);
+
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_2] = 
+      (R200_MTX_MVP << R200_MODELPROJECT_0_SHIFT);
+
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_3] = 
+      ((R200_MTX_TEX0 << R200_TEXMAT_0_SHIFT) |
+       (R200_MTX_TEX1 << R200_TEXMAT_1_SHIFT) |
+       (R200_MTX_TEX2 << R200_TEXMAT_2_SHIFT) |
+       (R200_MTX_TEX3 << R200_TEXMAT_3_SHIFT));
+
+   rmesa->hw.msl.cmd[MSL_MATRIX_SELECT_4] = 
+      ((R200_MTX_TEX4 << R200_TEXMAT_4_SHIFT) |
+       (R200_MTX_TEX5 << R200_TEXMAT_5_SHIFT));
+
+
+   /* General TCL state */
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = 
+      (R200_SPECULAR_LIGHTS |
+       R200_DIFFUSE_SPECULAR_COMBINE |
+       R200_LOCAL_LIGHT_VEC_GL |
+       R200_LM0_SOURCE_MATERIAL_0 << R200_FRONT_SHININESS_SOURCE_SHIFT |
+       R200_LM0_SOURCE_MATERIAL_1 << R200_BACK_SHININESS_SOURCE_SHIFT);
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = 
+      ((R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_EMISSIVE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_AMBIENT_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_DIFFUSE_SOURCE_SHIFT) |
+       (R200_LM1_SOURCE_MATERIAL_1 << R200_BACK_SPECULAR_SOURCE_SHIFT)); 
+
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_0] = 0; /* filled in via callbacks */
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_1] = 0;
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_2] = 0;
+   rmesa->hw.tcl.cmd[TCL_PER_LIGHT_CTL_3] = 0;
+   
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = 
+      (R200_UCP_IN_CLIP_SPACE |
+       R200_CULL_FRONT_IS_CCW);
+
+   /* Texgen/Texmat state */
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = 0x00ffffff;
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_3] = 
+      ((0 << R200_TEXGEN_0_INPUT_TEX_SHIFT) |
+       (1 << R200_TEXGEN_1_INPUT_TEX_SHIFT) |
+       (2 << R200_TEXGEN_2_INPUT_TEX_SHIFT) |
+       (3 << R200_TEXGEN_3_INPUT_TEX_SHIFT) |
+       (4 << R200_TEXGEN_4_INPUT_TEX_SHIFT) |
+       (5 << R200_TEXGEN_5_INPUT_TEX_SHIFT)); 
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_0] = 0; 
+   rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] =  
+      ((0 << R200_TEXGEN_0_INPUT_SHIFT) |
+       (1 << R200_TEXGEN_1_INPUT_SHIFT) |
+       (2 << R200_TEXGEN_2_INPUT_SHIFT) |
+       (3 << R200_TEXGEN_3_INPUT_SHIFT) |
+       (4 << R200_TEXGEN_4_INPUT_SHIFT) |
+       (5 << R200_TEXGEN_5_INPUT_SHIFT)); 
+   rmesa->hw.tcg.cmd[TCG_TEX_CYL_WRAP_CTL] = 0;
+
+
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+			   &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, 
+			   &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, 
+			   &l->QuadraticAttenuation );
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0;
+   }
+
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, 
+			     ctx->Light.Model.Ambient );
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+
+   for (i = 0 ; i < 6; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
+   
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+
+   rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] =
+      R200_PS_SE_SEL_STATE | R200_PS_MULT_CONST;
+
+   /* ptp_eye is presumably used to calculate the attenuation wrt a different
+      location? In any case, since point attenuation triggers _needeyecoords,
+      it is constant. Probably ignored as long as R200_PS_USE_MODEL_EYE_VEC
+      isn't set */
+   rmesa->hw.ptp.cmd[PTP_EYE_X] = 0;
+   rmesa->hw.ptp.cmd[PTP_EYE_Y] = 0;
+   rmesa->hw.ptp.cmd[PTP_EYE_Z] = IEEE_ONE | 0x80000000; /* -1.0 */
+   rmesa->hw.ptp.cmd[PTP_EYE_3] = 0;
+   /* no idea what the ptp_vport_scale values are good for, except the
+      PTSIZE one - hopefully doesn't matter */
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_0] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_1] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_PTSIZE] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_VPORT_SCALE_3] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_QUAD] = 0;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_LIN] = 0;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_CON] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_ATT_CONST_3] = 0;
+   rmesa->hw.ptp.cmd[PTP_CLAMP_MIN] = IEEE_ONE;
+   rmesa->hw.ptp.cmd[PTP_CLAMP_MAX] = 0x44ffe000; /* 2047 */
+   rmesa->hw.ptp.cmd[PTP_CLAMP_2] = 0;
+   rmesa->hw.ptp.cmd[PTP_CLAMP_3] = 0;
+
+   r200LightingSpaceChange( ctx );
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      radeon_init_query_stateobj(&rmesa->radeon, R200_QUERYOBJ_CMDSIZE);
+      rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
+      rmesa->radeon.query.queryobj.cmd[R200_QUERYOBJ_DATA_0] = 0;
+   }
+
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
+
+   rcommonInitCmdBuf(&rmesa->radeon);
+}
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
new file mode 100644
index 0000000000..262fe3cdde
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -0,0 +1,947 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/simple_list.h"
+
+#include "swrast/s_context.h"
+#include "swrast/s_fog.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_state.h"
+#include "r200_swtcl.h"
+#include "r200_tcl.h"
+
+
+/***********************************************************************
+ *                         Initialization
+ ***********************************************************************/
+
+#define EMIT_ATTR( ATTR, STYLE, F0 )					\
+do {									\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
+   fmt_0 |= F0;								\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
+} while (0)
+
+static void r200SetVertexFormat( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   int fmt_0 = 0;
+   int fmt_1 = 0;
+   int offset = 0;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   /* Important:
+    */
+   if ( VB->NdcPtr != NULL ) {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   }
+   else {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+   }
+
+   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+   rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if ( !rmesa->swtcl.needproj ||
+       RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { /* need w coord for projected textures */
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F, R200_VTX_XY | R200_VTX_Z0 | R200_VTX_W0 );
+      offset = 4;
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F, R200_VTX_XY | R200_VTX_Z0 );
+      offset = 3;
+   }
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
+      EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F, R200_VTX_POINT_SIZE );
+      offset += 1;
+   }
+
+   rmesa->swtcl.coloroffset = offset;
+#if MESA_LITTLE_ENDIAN
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) );
+#else
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) );
+#endif
+   offset += 1;
+
+   rmesa->swtcl.specoffset = 0;
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+
+#if MESA_LITTLE_ENDIAN
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 rmesa->swtcl.specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+	 EMIT_PAD( 3 );
+      }
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+	 EMIT_PAD( 1 );
+      }
+#else
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+	 EMIT_PAD( 1 );
+      }
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 rmesa->swtcl.specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) );
+      }
+      else {
+	 EMIT_PAD( 3 );
+      }
+#endif
+   }
+
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+      int i;
+
+      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+	 if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+	    GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+
+	    fmt_1 |= sz << (3 * i);
+	    EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1, 0 );
+	 }
+      }
+   }
+
+   if ( (rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK)
+      != R200_FOG_USE_SPEC_ALPHA ) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA;
+   }
+
+   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
+	(rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) ||
+	(rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+      R200_NEWPRIM(rmesa);
+      R200_STATECHANGE( rmesa, vtx );
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
+      rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
+
+      rmesa->radeon.swtcl.vertex_size =
+	  _tnl_install_attrs( ctx,
+			      rmesa->radeon.swtcl.vertex_attrs,
+			      rmesa->radeon.swtcl.vertex_attr_count,
+			      NULL, 0 );
+      rmesa->radeon.swtcl.vertex_size /= 4;
+      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+   }
+}
+
+static void r200_predict_emit_size( r200ContextPtr rmesa )
+{
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s\n", __func__);
+   const int vertex_array_size = 7;
+   const int prim_size = 3;
+   if (!rmesa->radeon.swtcl.emit_prediction) {
+      const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+      if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+	       state_size +
+	       vertex_array_size + prim_size,
+	       __FUNCTION__))
+	 rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+      else
+	 rmesa->radeon.swtcl.emit_prediction = state_size;
+      rmesa->radeon.swtcl.emit_prediction += vertex_array_size + prim_size
+	 + rmesa->radeon.cmdbuf.cs->cdw;
+   }
+}
+
+
+static void r200RenderStart( GLcontext *ctx )
+{
+   r200SetVertexFormat( ctx );
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s\n", __func__);
+}
+
+
+/**
+ * Set vertex state for SW TCL.  The primary purpose of this function is to
+ * determine in advance whether or not the hardware can / should do the
+ * projection divide or Mesa should do it.
+ */
+void r200ChooseVertexState( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint vte;
+   GLuint vap;
+
+   /* We must ensure that we don't do _tnl_need_projected_coords while in a
+    * rasterization fallback.  As this function will be called again when we
+    * leave a rasterization fallback, we can just skip it for now.
+    */
+   if (rmesa->radeon.Fallback != 0)
+      return;
+
+   vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+   vap = rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL];
+
+   /* HW perspective divide is a win, but tiny vertex formats are a
+    * bigger one.
+    */
+   if (!RENDERINPUTS_TEST_RANGE( tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )
+	|| (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+      rmesa->swtcl.needproj = GL_TRUE;
+      vte |= R200_VTX_XY_FMT | R200_VTX_Z_FMT;
+      vte &= ~R200_VTX_W0_FMT;
+      if (RENDERINPUTS_TEST_RANGE( tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+	 vap &= ~R200_VAP_FORCE_W_TO_ONE;
+      }
+      else {
+	 vap |= R200_VAP_FORCE_W_TO_ONE;
+      }
+   }
+   else {
+      rmesa->swtcl.needproj = GL_FALSE;
+      vte &= ~(R200_VTX_XY_FMT | R200_VTX_Z_FMT);
+      vte |= R200_VTX_W0_FMT;
+      vap &= ~R200_VAP_FORCE_W_TO_ONE;
+   }
+
+   _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj );
+
+   if (vte != rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]) {
+      R200_STATECHANGE( rmesa, vte );
+      rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = vte;
+   }
+
+   if (vap != rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]) {
+      R200_STATECHANGE( rmesa, vap );
+      rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = vap;
+   }
+}
+
+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s\n", __func__);
+
+
+   radeonEmitState(&rmesa->radeon);
+   r200EmitVertexAOS( rmesa,
+		      rmesa->radeon.swtcl.vertex_size,
+		      rmesa->radeon.swtcl.bo,
+		      current_offset);
+
+
+   r200EmitVbufPrim( rmesa,
+		     rmesa->radeon.swtcl.hw_primitive,
+		     rmesa->radeon.swtcl.numverts);
+   if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+	    " We might overflow  command buffer.\n",
+	    rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+
+   rmesa->radeon.swtcl.emit_prediction = 0;
+
+}
+
+/**************************************************************************/
+
+
+static INLINE GLuint reduced_hw_prim( GLcontext *ctx, GLuint prim)
+{
+   switch (prim) {
+   case GL_POINTS:
+      return (ctx->Point.PointSprite ||
+	 ((ctx->_TriangleCaps & (DD_POINT_SIZE | DD_POINT_ATTEN)) &&
+	 !(ctx->_TriangleCaps & (DD_POINT_SMOOTH)))) ?
+	 R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS;
+   case GL_LINES:
+   /* fallthrough */
+   case GL_LINE_LOOP:
+   /* fallthrough */
+   case GL_LINE_STRIP:
+      return R200_VF_PRIM_LINES;
+   default:
+   /* all others reduced to triangles */
+      return R200_VF_PRIM_TRIANGLES;
+   }
+}
+
+
+static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void r200RenderPrimitive( GLcontext *ctx, GLenum prim );
+static void r200ResetLineStipple( GLcontext *ctx );
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        0
+
+static void* r200_alloc_verts( r200ContextPtr rmesa, GLuint n, GLuint size)
+{
+   void *rv;
+   do {
+      r200_predict_emit_size( rmesa );
+      rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 );
+   } while(!rv);
+   return rv;
+}
+
+#undef LOCAL_VARS
+#undef ALLOC_VERTS
+#define CTX_ARG r200ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r200_alloc_verts(rmesa, n, size)
+#define LOCAL_VARS						\
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;
+#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int)))
+#define VERTEX radeonVertex
+#define DO_DEBUG_VERTS (1 && (R200_DEBUG & RADEON_VERTS))
+
+#undef TAG
+#define TAG(x) r200_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) r200_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     r200_triangle( rmesa, a, b, c )
+#define LINE( a, b )       r200_line( rmesa, a, b )
+#define POINT( a )         r200_point( rmesa, a )
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define R200_TWOSIDE_BIT	0x01
+#define R200_UNFILLED_BIT	0x02
+#define R200_MAX_TRIFUNC	0x04
+
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[R200_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK  0
+#define DO_UNFILLED (IND & R200_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & R200_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_OFFSET     0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )  					\
+do {								\
+   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v, c )					\
+do {								\
+   if (specoffset) {						\
+      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);	\
+   }								\
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )			\
+do {							\
+   if (specoffset) {					\
+      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);	\
+      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);	\
+      spec0->red   = spec1->red;	\
+      spec0->green = spec1->green;	\
+      spec0->blue  = spec1->blue; 	\
+   }							\
+} while (0)
+
+/* These don't need LE32_TO_CPU() as they used to save and restore
+ * colors which are already in the correct format.
+ */
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+
+#define LOCAL_VARS(n)							\
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);			\
+   GLuint color[n] = {0}, spec[n] = {0};						\
+   GLuint coloroffset = rmesa->swtcl.coloroffset;	\
+   GLuint specoffset = rmesa->swtcl.specoffset;			\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) )
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R200_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R200_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R200_TWOSIDE_BIT|R200_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      r200_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   r200_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   r200_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   r200_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   r200RenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);		\
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
+   const char *r200verts = (char *)rmesa->radeon.swtcl.verts;		\
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE	if ( stipple ) r200ResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) r200_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) r200_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+void r200ChooseRenderState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint index = 0;
+   GLuint flags = ctx->_TriangleCaps;
+
+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback)
+      return;
+
+   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT;
+   if (flags & DD_TRI_UNFILLED)      index |= R200_UNFILLED_BIT;
+
+   if (index != rmesa->radeon.swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = r200_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = r200_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = r200_fast_clipped_poly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+
+      rmesa->radeon.swtcl.RenderIndex = index;
+   }
+}
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+      /* need to disable perspective-correct texturing for point sprites */
+      if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
+	 if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
+	    R200_STATECHANGE( rmesa, set );
+	    rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PERSPECTIVE_ENABLE;
+	 }
+      }
+      else if (!(rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE)) {
+	 R200_STATECHANGE( rmesa, set );
+	 rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
+      }
+      R200_NEWPRIM( rmesa );
+      rmesa->radeon.swtcl.hw_primitive = hwprim;
+   }
+}
+
+static void r200RenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   rmesa->radeon.swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) );
+}
+
+static void r200RenderFinish( GLcontext *ctx )
+{
+}
+
+static void r200ResetLineStipple( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   R200_STATECHANGE( rmesa, lin );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "R200_NO_RAST",
+   "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)"
+};
+
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.Fallback;
+
+   if (mode) {
+      rmesa->radeon.Fallback |= bit;
+      if (oldfallback == 0) {
+	 radeon_firevertices(&rmesa->radeon);
+	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE );
+	 _swsetup_Wakeup( ctx );
+	 rmesa->radeon.swtcl.RenderIndex = ~0;
+         if (R200_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->radeon.Fallback &= ~bit;
+      if (oldfallback == bit) {
+
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = r200RenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive;
+	 tnl->Driver.Render.Finish = r200RenderFinish;
+
+	 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	 tnl->Driver.Render.Interp = _tnl_interp;
+
+	 tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+	 TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE );
+	 if (rmesa->radeon.TclFallback) {
+	    /* These are already done if rmesa->radeon.TclFallback goes to
+	     * zero above. But not if it doesn't (R200_NO_TCL for
+	     * example?)
+	     */
+	    _tnl_invalidate_vertex_state( ctx, ~0 );
+	    _tnl_invalidate_vertices( ctx, ~0 );
+	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
+	    r200ChooseVertexState( ctx );
+	    r200ChooseRenderState( ctx );
+	 }
+         if (R200_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "R200 end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+
+
+/**
+ * Cope with depth operations by drawing individual pixels as points.
+ *
+ * \todo
+ * The way the vertex state is set in this routine is hokey.  It seems to
+ * work, but it's very hackish.  This whole routine is pretty hackish.  If
+ * the bitmap is small enough, it seems like it would be faster to copy it
+ * to AGP memory and use it as a non-power-of-two texture (i.e.,
+ * NV_texture_rectangle).
+ */
+void
+r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+		  GLsizei width, GLsizei height,
+		  const struct gl_pixelstore_attrib *unpack,
+		  const GLubyte *bitmap )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const GLfloat *rc = ctx->Current.RasterColor;
+   GLint row, col;
+   radeonVertex vert;
+   GLuint orig_vte;
+   GLuint h;
+
+
+   /* Turn off tcl.
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 1 );
+
+   /* Choose tiny vertex format
+    */
+   {
+      const GLuint fmt_0 = R200_VTX_XY | R200_VTX_Z0 | R200_VTX_W0
+	  | (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT);
+      const GLuint fmt_1 = 0;
+      GLuint vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+      GLuint vap = rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL];
+
+      vte &= ~(R200_VTX_XY_FMT | R200_VTX_Z_FMT);
+      vte |= R200_VTX_W0_FMT;
+      vap &= ~R200_VAP_FORCE_W_TO_ONE;
+
+      rmesa->radeon.swtcl.vertex_size = 5;
+
+      if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0)
+	   || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) {
+	 R200_NEWPRIM(rmesa);
+	 R200_STATECHANGE( rmesa, vtx );
+	 rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0;
+	 rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1;
+      }
+
+      if (vte != rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]) {
+	 R200_STATECHANGE( rmesa, vte );
+	 rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = vte;
+      }
+
+      if (vap != rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL]) {
+	 R200_STATECHANGE( rmesa, vap );
+	 rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] = vap;
+      }
+   }
+
+   /* Ready for point primitives:
+    */
+   r200RenderPrimitive( ctx, GL_POINTS );
+
+   /* Turn off the hw viewport transformation:
+    */
+   R200_STATECHANGE( rmesa, vte );
+   orig_vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL];
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VPORT_X_SCALE_ENA |
+					   R200_VPORT_Y_SCALE_ENA |
+					   R200_VPORT_Z_SCALE_ENA |
+					   R200_VPORT_X_OFFSET_ENA |
+					   R200_VPORT_Y_OFFSET_ENA |
+					   R200_VPORT_Z_OFFSET_ENA);
+
+   /* Turn off other stuff:  Stipple?, texture?, blending?, etc.
+    */
+
+
+   /* Populate the vertex
+    *
+    * Incorporate FOG into RGBA
+    */
+   if (ctx->Fog.Enabled) {
+      const GLfloat *fc = ctx->Fog.Color;
+      GLfloat color[4];
+      GLfloat f;
+
+      if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT)
+         f = _swrast_z_to_fogfactor(ctx, ctx->Current.Attrib[VERT_ATTRIB_FOG][0]);
+      else
+         f = _swrast_z_to_fogfactor(ctx, ctx->Current.RasterDistance);
+
+      color[0] = f * rc[0] + (1.F - f) * fc[0];
+      color[1] = f * rc[1] + (1.F - f) * fc[1];
+      color[2] = f * rc[2] + (1.F - f) * fc[2];
+      color[3] = rc[3];
+
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red,   color[0]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, color[1]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue,  color[2]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, color[3]);
+   }
+   else {
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.red,   rc[0]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.green, rc[1]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.blue,  rc[2]);
+      UNCLAMPED_FLOAT_TO_CHAN(vert.tv.color.alpha, rc[3]);
+   }
+
+
+   vert.tv.z = ctx->Current.RasterPos[2];
+
+
+   /* Update window height
+    */
+   LOCK_HARDWARE( &rmesa->radeon );
+   UNLOCK_HARDWARE( &rmesa->radeon );
+   h = radeon_get_drawable(&rmesa->radeon)->h + radeon_get_drawable(&rmesa->radeon)->y;
+   px += radeon_get_drawable(&rmesa->radeon)->x;
+
+   /* Clipping handled by existing mechansims in r200_ioctl.c?
+    */
+   for (row=0; row<height; row++) {
+      const GLubyte *src = (const GLubyte *)
+	 _mesa_image_address2d(unpack, bitmap, width, height,
+                               GL_COLOR_INDEX, GL_BITMAP, row, 0 );
+
+      if (unpack->LsbFirst) {
+         /* Lsb first */
+         GLubyte mask = 1U << (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            if (*src & mask) {
+	       vert.tv.x = px+col;
+	       vert.tv.y = h - (py+row) - 1;
+	       r200_point( rmesa, &vert );
+            }
+	    src += (mask >> 7);
+	    mask = ((mask << 1) & 0xff) | (mask >> 7);
+         }
+
+         /* get ready for next row */
+         if (mask != 1)
+            src++;
+      }
+      else {
+         /* Msb first */
+         GLubyte mask = 128U >> (unpack->SkipPixels & 0x7);
+         for (col=0; col<width; col++) {
+            if (*src & mask) {
+	       vert.tv.x = px+col;
+	       vert.tv.y = h - (py+row) - 1;
+	       r200_point( rmesa, &vert );
+            }
+	    src += mask & 1;
+	    mask = ((mask << 7) & 0xff) | (mask >> 1);
+         }
+         /* get ready for next row */
+         if (mask != 128)
+            src++;
+      }
+   }
+
+   /* Fire outstanding vertices, restore state
+    */
+   R200_STATECHANGE( rmesa, vte );
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] = orig_vte;
+
+   /* Unfallback
+    */
+   TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 0 );
+
+   /* Need to restore vertexformat?
+    */
+   if (rmesa->radeon.TclFallback)
+      r200ChooseVertexState( ctx );
+}
+
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void r200InitSwtcl( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+   rmesa->radeon.swtcl.emit_prediction = 0;
+
+   tnl->Driver.Render.Start = r200RenderStart;
+   tnl->Driver.Render.Finish = r200RenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = r200RenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+
+   /* FIXME: what are these numbers? */
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+		       36 * sizeof(GLfloat) );
+
+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->radeon.swtcl.hw_primitive = 0;
+}
+
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h
new file mode 100644
index 0000000000..b0905879d7
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.h
@@ -0,0 +1,69 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_SWTCL_H__
+#define __R200_SWTCL_H__
+
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "r200_context.h"
+
+extern void r200InitSwtcl( GLcontext *ctx );
+
+extern void r200ChooseRenderState( GLcontext *ctx );
+extern void r200ChooseVertexState( GLcontext *ctx );
+
+extern void r200CheckTexSizes( GLcontext *ctx );
+
+extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+				 GLuint newinputs );
+
+extern void r200PrintSetupFlags(char *msg, GLuint flags );
+
+
+extern void r200_translate_vertex( GLcontext *ctx, 
+				     const radeonVertex *src, 
+				     SWvertex *dst );
+
+extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v );
+
+extern void r200_import_float_colors( GLcontext *ctx );
+extern void r200_import_float_spec_colors( GLcontext *ctx );
+
+extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py,
+			      GLsizei width, GLsizei height,
+			      const struct gl_pixelstore_attrib *unpack,
+			      const GLubyte *bitmap );
+
+void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
new file mode 100644
index 0000000000..d43e14581e
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -0,0 +1,721 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/colormac.h"
+#include "main/light.h"
+
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_tcl.h"
+#include "r200_swtcl.h"
+#include "r200_maos.h"
+
+#include "radeon_common_context.h"
+
+
+
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       1
+#define HAVE_QUAD_STRIPS 1
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+
+#define HW_POINTS           ((ctx->Point.PointSprite || \
+				((ctx->_TriangleCaps & (DD_POINT_SIZE | DD_POINT_ATTEN)) && \
+	 			!(ctx->_TriangleCaps & (DD_POINT_SMOOTH)))) ? \
+				R200_VF_PRIM_POINT_SPRITES : R200_VF_PRIM_POINTS)
+#define HW_LINES            R200_VF_PRIM_LINES
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       R200_VF_PRIM_LINE_STRIP
+#define HW_TRIANGLES        R200_VF_PRIM_TRIANGLES
+#define HW_TRIANGLE_STRIP_0 R200_VF_PRIM_TRIANGLE_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     R200_VF_PRIM_TRIANGLE_FAN
+#define HW_QUADS            R200_VF_PRIM_QUADS
+#define HW_QUAD_STRIP       R200_VF_PRIM_QUAD_STRIP
+#define HW_POLYGON          R200_VF_PRIM_POLYGON
+
+
+static GLboolean discrete_prim[0x10] = {
+   0,				/* 0 none */
+   1,				/* 1 points */
+   1,				/* 2 lines */
+   0,				/* 3 line_strip */
+   1,				/* 4 tri_list */
+   0,				/* 5 tri_fan */
+   0,				/* 6 tri_strip */
+   0,				/* 7 tri_w_flags */
+   1,				/* 8 rect list (unused) */
+   1,				/* 9 3vert point */
+   1,				/* a 3vert line */
+   0,				/* b point sprite */
+   0,				/* c line loop */
+   1,				/* d quads */
+   0,				/* e quad strip */
+   0,				/* f polygon */
+};
+   
+
+#define LOCAL_VARS r200ContextPtr rmesa = R200_CONTEXT(ctx)
+#define ELT_TYPE  GLushort
+
+#define ELT_INIT(prim, hw_prim) \
+   r200TclPrimitive( ctx, prim, hw_prim | R200_VF_PRIM_WALK_IND )
+
+#define GET_MESA_ELTS() TNL_CONTEXT(ctx)->vb.Elts
+
+
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_MAX_HW_ELTS() 300
+
+#define RESET_STIPPLE() do {			\
+   R200_STATECHANGE( rmesa, lin );		\
+   radeonEmitState(&rmesa->radeon);			\
+} while (0)
+
+#define AUTO_STIPPLE( mode )  do {		\
+   R200_STATECHANGE( rmesa, lin );		\
+   if (mode)					\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
+	 R200_LINE_PATTERN_AUTO_RESET;	\
+   else						\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+	 ~R200_LINE_PATTERN_AUTO_RESET;	\
+   radeonEmitState(&rmesa->radeon);			\
+} while (0)
+
+
+#define ALLOC_ELTS(nr)	r200AllocElts( rmesa, nr )
+
+static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) 
+{
+   if (rmesa->radeon.dma.flush == r200FlushElts &&
+       rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) {
+
+      GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr +
+				    rmesa->radeon.tcl.elt_dma_offset + rmesa->tcl.elt_used);
+
+      rmesa->tcl.elt_used += nr*2;
+
+      return dest;
+   }
+   else {
+      if (rmesa->radeon.dma.flush)
+	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+      r200EmitAOS( rmesa,
+		   rmesa->radeon.tcl.aos_count, 0 );
+
+      r200EmitMaxVtxIndex(rmesa, rmesa->radeon.tcl.aos[0].count);
+      return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr );
+   }
+}
+
+
+#define CLOSE_ELTS() 				\
+do {						\
+   if (0) R200_NEWPRIM( rmesa );		\
+}						\
+while (0)
+
+
+/* TODO: Try to extend existing primitive if both are identical,
+ * discrete and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void r200EmitPrim( GLcontext *ctx, 
+		          GLenum prim, 
+		          GLuint hwprim, 
+		          GLuint start, 
+		          GLuint count)	
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   r200TclPrimitive( ctx, prim, hwprim );
+   
+   //   fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count);
+
+   r200EmitAOS( rmesa,
+		rmesa->radeon.tcl.aos_count,
+		start );
+   
+   /* Why couldn't this packet have taken an offset param?
+    */
+   r200EmitVbufPrim( rmesa,
+		     rmesa->tcl.hw_primitive,
+		     count - start );
+}
+
+#define EMIT_PRIM(ctx, prim, hwprim, start, count) do {         \
+   r200EmitPrim( ctx, prim, hwprim, start, count );             \
+   (void) rmesa; } while (0)
+
+#define MAX_CONVERSION_SIZE 40
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
+  ((NR) < 20 ||							\
+   ((NR) < 40 &&						\
+    rmesa->tcl.hw_primitive == (PRIM|				\
+			    R200_VF_TCL_OUTPUT_VTX_ENABLE|	\
+			        R200_VF_PRIM_WALK_IND)))
+#endif
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(dest, offset, x) do {                          \
+        int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );     \
+        GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );    \
+        (des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x);	\
+	(void)rmesa; } while (0)
+#else
+#define EMIT_ELT(dest, offset, x) do {				\
+	(dest)[offset] = (GLushort) (x);			\
+	(void)rmesa; } while (0)
+#endif
+
+#define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)((dest)+offset) = ((y)<<16)|(x);
+
+
+
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+
+void r200EmitPrimitive( GLcontext *ctx, 
+			  GLuint first,
+			  GLuint last,
+			  GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void r200EmitEltPrimitive( GLcontext *ctx, 
+			     GLuint first,
+			     GLuint last,
+			     GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void r200TclPrimitive( GLcontext *ctx, 
+			 GLenum prim,
+			 int hw_prim )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE;
+
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discrete_prim[hw_prim&0xf]) {
+      /* need to disable perspective-correct texturing for point sprites */
+      if ((prim & PRIM_MODE_MASK) == GL_POINTS && ctx->Point.PointSprite) {
+	 if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) {
+	    R200_STATECHANGE( rmesa, set );
+	    rmesa->hw.set.cmd[SET_RE_CNTL] &= ~R200_PERSPECTIVE_ENABLE;
+	 }
+      }
+      else if (!(rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE)) {
+	 R200_STATECHANGE( rmesa, set );
+	 rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE;
+      }
+      R200_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+}
+
+
+/**********************************************************************/
+/*             Fog blend factor computation for hw tcl                */
+/*             same calculation used as in t_vb_fog.c                 */
+/**********************************************************************/
+
+#define FOG_EXP_TABLE_SIZE 256
+#define FOG_MAX (10.0)
+#define EXP_FOG_MAX .0006595
+#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
+static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
+
+#if 1
+#define NEG_EXP( result, narg )						\
+do {									\
+   GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));			\
+   GLint k = (GLint) f;							\
+   if (k > FOG_EXP_TABLE_SIZE-2) 					\
+      result = (GLfloat) EXP_FOG_MAX;					\
+   else									\
+      result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]);	\
+} while (0)
+#else
+#define NEG_EXP( result, narg )					\
+do {								\
+   result = exp(-narg);						\
+} while (0)
+#endif
+
+
+/**
+ * Initialize the exp_table[] lookup table for approximating exp().
+ */
+void
+r200InitStaticFogData( void )
+{
+   GLfloat f = 0.0F;
+   GLint i = 0;
+   for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) {
+      exp_table[i] = (GLfloat) exp(-f);
+   }
+}
+
+
+/**
+ * Compute per-vertex fog blend factors from fog coordinates by
+ * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function.
+ * Fog coordinates are distances from the eye (typically between the
+ * near and far clip plane distances).
+ * Note the fog (eye Z) coords may be negative so we use ABS(z) below.
+ * Fog blend factors are in the range [0,1].
+ */
+float
+r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
+{
+   GLfloat end  = ctx->Fog.End;
+   GLfloat d, temp;
+   const GLfloat z = FABSF(fogcoord);
+
+   switch (ctx->Fog.Mode) {
+   case GL_LINEAR:
+      if (ctx->Fog.Start == ctx->Fog.End)
+         d = 1.0F;
+      else
+         d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
+      temp = (end - z) * d;
+      return CLAMP(temp, 0.0F, 1.0F);
+      break;
+   case GL_EXP:
+      d = ctx->Fog.Density;
+      NEG_EXP( temp, d * z );
+      return temp;
+      break;
+   case GL_EXP2:
+      d = ctx->Fog.Density*ctx->Fog.Density;
+      NEG_EXP( temp, d * z * z );
+      return temp;
+      break;
+   default:
+      _mesa_problem(ctx, "Bad fog mode in make_fog_coord");
+      return 0;
+   }
+}
+
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint r200EnsureEmitSize( GLcontext * ctx , GLubyte* vimap_rev )
+{
+  r200ContextPtr rmesa = R200_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint state_size;
+  GLuint nr_aos = 0;
+  int i;
+  /* predict number of aos to emit */
+  for (i = 0; i < 15; ++i)
+  {
+    if (vimap_rev[i] != 255)
+    {
+      ++nr_aos;
+    }
+  }
+
+  {
+    /* count the prediction for state size */
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
+    /* vtx may be changed in r200EmitArrays so account for it if not dirty */
+    if (!rmesa->hw.vtx.dirty)
+      state_size += rmesa->hw.vtx.check(rmesa->radeon.glCtx, &rmesa->hw.vtx);
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      if (!VB->Primitive[i].count)
+	continue;
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+         rendering code may decide convert to elts.
+	 In that case we have to make pessimistic prediction.
+	 and use larger of 2 paths. */
+      const GLuint elt_count =(VB->Primitive[i].count/GET_MAX_HW_ELTS() + 1);
+      const GLuint elts = ELTS_BUFSZ(nr_aos) * elt_count;
+      const GLuint index = INDEX_BUFSZ * elt_count;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+	  || vbuf > index + elts)
+	space_required += vbuf;
+      else
+	space_required += index + elts;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+  }
+
+  radeon_print(RADEON_RENDER,RADEON_VERBOSE,
+      "%s space %u, aos %d\n",
+      __func__, space_required, AOS_BUFSZ(nr_aos) );
+  /* flush the buffer in case we need more than is left. */
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required + state_size, __FUNCTION__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+/* TCL render.
+ */
+static GLboolean r200_run_tcl_render( GLcontext *ctx,
+				      struct tnl_pipeline_stage *stage )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+   GLubyte *vimap_rev;
+/* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3, 
+   color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use
+   more than 12 of those at the same time. */
+   GLubyte map_rev_fixed[15] = {255, 255, 255, 255, 255, 255, 255, 255,
+			    255, 255, 255, 255, 255, 255, 255};
+
+
+   /* TODO: separate this from the swtnl pipeline 
+    */
+   if (rmesa->radeon.TclFallback)
+      return GL_TRUE;	/* fallback to software t&l */
+
+   radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s\n", __FUNCTION__);
+
+   if (VB->Count == 0)
+      return GL_FALSE;
+
+   /* Validate state:
+    */
+   if (rmesa->radeon.NewGLState)
+      if (!r200ValidateState( ctx ))
+         return GL_TRUE; /* fallback to sw t&l */
+
+   if (!ctx->VertexProgram._Enabled) {
+   /* NOTE: inputs != tnl->render_inputs - these are the untransformed
+    * inputs.
+    */
+      map_rev_fixed[0] = VERT_ATTRIB_POS;
+      /* technically there is no reason we always need VA_COLOR0. In theory
+         could disable it depending on lighting, color materials, texturing... */
+      map_rev_fixed[4] = VERT_ATTRIB_COLOR0;
+
+      if (ctx->Light.Enabled) {
+	 map_rev_fixed[2] = VERT_ATTRIB_NORMAL;
+      }
+
+      /* this also enables VA_COLOR1 when using separate specular
+         lighting model, which is unnecessary.
+         FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses
+         the secondary color (if lighting is disabled). The chip seems
+         misconfigured for that though elsewhere (tcl output, might lock up) */
+      if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+	 map_rev_fixed[5] = VERT_ATTRIB_COLOR1;
+      }
+
+      if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
+	 map_rev_fixed[3] = VERT_ATTRIB_FOG;
+      }
+
+      for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
+	 if (ctx->Texture.Unit[i]._ReallyEnabled) {
+	    if (rmesa->TexGenNeedNormals[i]) {
+	       map_rev_fixed[2] = VERT_ATTRIB_NORMAL;
+	    }
+	    map_rev_fixed[8 + i] = VERT_ATTRIB_TEX0 + i;
+	 }
+      }
+      vimap_rev = &map_rev_fixed[0];
+   }
+   else {
+      /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment
+	 part", since using some vertex interpolator later which is not in
+	 out_vtxfmt0/1 will lock up. It seems to be ok to write in vertex
+	 prog to a not enabled output however, so just don't mess with it.
+	 We only need to change compsel. */
+      GLuint out_compsel = 0;
+      const GLbitfield64 vp_out =
+	 rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten;
+
+      vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0];
+      assert(vp_out & BITFIELD64_BIT(VERT_RESULT_HPOS));
+      out_compsel = R200_OUTPUT_XYZW;
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL0)) {
+	 out_compsel |= R200_OUTPUT_COLOR_0;
+      }
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL1)) {
+	 out_compsel |= R200_OUTPUT_COLOR_1;
+      }
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_FOGC)) {
+         out_compsel |= R200_OUTPUT_DISCRETE_FOG;
+      }
+      if (vp_out & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+	 out_compsel |= R200_OUTPUT_PT_SIZE;
+      }
+      for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) {
+	 if (vp_out & BITFIELD64_BIT(i)) {
+	    out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0);
+	 }
+      }
+      if (rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] != out_compsel) {
+	 R200_STATECHANGE( rmesa, vtx );
+	 rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] = out_compsel;
+      }
+   }
+
+   /* Do the actual work:
+    */
+   radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
+   GLuint emit_end = r200EnsureEmitSize( ctx, vimap_rev )
+     + rmesa->radeon.cmdbuf.cs->cdw;
+   r200EmitArrays( ctx, vimap_rev );
+
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      if (VB->Elts)
+	 r200EmitEltPrimitive( ctx, start, start+length, prim );
+      else
+	 r200EmitPrimitive( ctx, start, start+length, prim );
+   }
+   if ( emit_end < rmesa->radeon.cmdbuf.cs->cdw )
+     WARN_ONCE("Rendering was %d commands larger than predicted size."
+	 " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+/* Initial state for tcl stage.  
+ */
+const struct tnl_pipeline_stage _r200_tcl_stage =
+{
+   "r200 render",
+   NULL,			/*  private */
+   NULL,
+   NULL,
+   NULL,
+   r200_run_tcl_render	/* run */
+};
+
+
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+
+
+static void transition_to_swtnl( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   R200_NEWPRIM( rmesa );
+
+   r200ChooseVertexState( ctx );
+   r200ChooseRenderState( ctx );
+
+   _mesa_validate_all_lighting_tables( ctx ); 
+
+   tnl->Driver.NotifyMaterialChange = 
+      _mesa_validate_all_lighting_tables;
+
+   radeonReleaseArrays( ctx, ~0 );
+
+   /* Still using the D3D based hardware-rasterizer from the radeon;
+    * need to put the card into D3D mode to make it work:
+    */
+   R200_STATECHANGE( rmesa, vap );
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~(R200_VAP_TCL_ENABLE|R200_VAP_PROG_VTX_SHADER_ENABLE);
+}
+
+static void transition_to_hwtnl( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   _tnl_need_projected_coords( ctx, GL_FALSE );
+
+   r200UpdateMaterial( ctx );
+
+   tnl->Driver.NotifyMaterialChange = r200UpdateMaterial;
+
+   if ( rmesa->radeon.dma.flush )			
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
+
+   rmesa->radeon.dma.flush = NULL;
+   
+   R200_STATECHANGE( rmesa, vap );
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE;
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE;
+
+   if (ctx->VertexProgram._Enabled) {
+      rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE;
+   }
+
+   if ( ((rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] & R200_FOG_USE_MASK)
+      == R200_FOG_USE_SPEC_ALPHA) &&
+      (ctx->Fog.FogCoordinateSource == GL_FOG_COORD )) {
+      R200_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_USE_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_VTX_FOG;
+   }
+
+   R200_STATECHANGE( rmesa, vte );
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] &= ~(R200_VTX_XY_FMT|R200_VTX_Z_FMT);
+   rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL] |= R200_VTX_W0_FMT;
+
+   if (R200_DEBUG & RADEON_FALLBACKS)
+      fprintf(stderr, "R200 end tcl fallback\n");
+}
+
+
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "Texgen unit 3",
+   "Texgen unit 4",
+   "Texgen unit 5",
+   "User disable",
+   "Bitmap as points",
+   "Vertex program"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+
+void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+	r200ContextPtr rmesa = R200_CONTEXT(ctx);
+	GLuint oldfallback = rmesa->radeon.TclFallback;
+
+	if (mode) {
+		if (oldfallback == 0) {
+			/* We have to flush before transition */
+			if ( rmesa->radeon.dma.flush )
+				rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+			if (R200_DEBUG & RADEON_FALLBACKS)
+				fprintf(stderr, "R200 begin tcl fallback %s\n",
+						getFallbackString( bit ));
+			rmesa->radeon.TclFallback |= bit;
+			transition_to_swtnl( ctx );
+		} else
+			rmesa->radeon.TclFallback |= bit;
+	} else {
+		if (oldfallback == bit) {
+			/* We have to flush before transition */
+			if ( rmesa->radeon.dma.flush )
+				rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+			if (R200_DEBUG & RADEON_FALLBACKS)
+				fprintf(stderr, "R200 end tcl fallback %s\n",
+						getFallbackString( bit ));
+			rmesa->radeon.TclFallback &= ~bit;
+			transition_to_hwtnl( ctx );
+		} else
+			rmesa->radeon.TclFallback &= ~bit;
+	}
+}
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.h b/src/mesa/drivers/dri/r200/r200_tcl.h
new file mode 100644
index 0000000000..f191ddc7eb
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_tcl.h
@@ -0,0 +1,68 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_TCL_H__
+#define __R200_TCL_H__
+
+#include "r200_context.h"
+
+extern void r200TclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim );
+extern void r200EmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				    GLuint flags );
+extern void r200EmitPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				 GLuint flags );
+
+extern void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+
+extern void r200InitStaticFogData( void );
+
+extern float r200ComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord );
+					      
+#define R200_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define R200_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define R200_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define R200_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define R200_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define R200_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define R200_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define R200_TCL_FALLBACK_TEXGEN_3          0x80 /* texgen, unit 3 */
+#define R200_TCL_FALLBACK_TEXGEN_4          0x100 /* texgen, unit 4 */
+#define R200_TCL_FALLBACK_TEXGEN_5          0x200 /* texgen, unit 5 */
+#define R200_TCL_FALLBACK_TCL_DISABLE       0x400 /* user disable */
+#define R200_TCL_FALLBACK_BITMAP            0x800 /* draw bitmap with points */
+#define R200_TCL_FALLBACK_VERTEX_PROGRAM    0x1000/* vertex program active */
+
+#define TCL_FALLBACK( ctx, bit, mode )	r200TclFallback( ctx, bit, mode )
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c
new file mode 100644
index 0000000000..6723b12bf4
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_tex.c
@@ -0,0 +1,542 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/simple_list.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+
+#include "xmlpool.h"
+
+
+
+/**
+ * Set the texture wrap modes.
+ * 
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+
+static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap )
+{
+   GLboolean  is_clamp = GL_FALSE;
+   GLboolean  is_clamp_to_border = GL_FALSE;
+   struct gl_texture_object *tObj = &t->base;
+
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(tex %p) sw %s, tw %s, rw %s\n",
+		__func__, t,
+		_mesa_lookup_enum_by_nr(swrap),
+		_mesa_lookup_enum_by_nr(twrap),
+		_mesa_lookup_enum_by_nr(rwrap));
+
+   t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D);
+
+   switch ( swrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_S_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= R200_CLAMP_S_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_EXT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_LAST;
+      break;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      t->pp_txfilter |= R200_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+
+   if (tObj->Target != GL_TEXTURE_1D) {
+      switch ( twrap ) {
+      case GL_REPEAT:
+         t->pp_txfilter |= R200_CLAMP_T_WRAP;
+         break;
+      case GL_CLAMP:
+         t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
+         is_clamp = GL_TRUE;
+         break;
+      case GL_CLAMP_TO_EDGE:
+         t->pp_txfilter |= R200_CLAMP_T_CLAMP_LAST;
+         break;
+      case GL_CLAMP_TO_BORDER:
+         t->pp_txfilter |= R200_CLAMP_T_CLAMP_GL;
+         is_clamp_to_border = GL_TRUE;
+         break;
+      case GL_MIRRORED_REPEAT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR;
+         break;
+      case GL_MIRROR_CLAMP_EXT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
+         is_clamp = GL_TRUE;
+         break;
+      case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_LAST;
+         break;
+      case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+         t->pp_txfilter |= R200_CLAMP_T_MIRROR_CLAMP_GL;
+         is_clamp_to_border = GL_TRUE;
+         break;
+      default:
+         _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+      }
+   }
+
+   t->pp_txformat_x &= ~R200_CLAMP_Q_MASK;
+
+   switch ( rwrap ) {
+   case GL_REPEAT:
+      t->pp_txformat_x |= R200_CLAMP_Q_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txformat_x |= R200_CLAMP_Q_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_EXT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_LAST;
+      break;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      t->pp_txformat_x |= R200_CLAMP_Q_MIRROR_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad R wrap mode in %s", __FUNCTION__);
+   }
+
+   if ( is_clamp_to_border ) {
+      t->pp_txfilter |= R200_BORDER_MODE_D3D;
+   }
+
+   t->border_fallback = (is_clamp && is_clamp_to_border);
+}
+
+static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
+{
+   t->pp_txfilter &= ~R200_MAX_ANISO_MASK;
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+	"%s(tex %p) max %f.\n",
+	__func__, t, max);
+
+   if ( max <= 1.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_1_TO_1;
+   } else if ( max <= 2.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_2_TO_1;
+   } else if ( max <= 4.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_4_TO_1;
+   } else if ( max <= 8.0 ) {
+      t->pp_txfilter |= R200_MAX_ANISO_8_TO_1;
+   } else {
+      t->pp_txfilter |= R200_MAX_ANISO_16_TO_1;
+   }
+}
+
+/**
+ * Set the texture magnification and minification modes.
+ * 
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+
+static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+{
+   GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK);
+
+   /* Force revalidation to account for switches from/to mipmapping. */
+   t->validated = GL_FALSE;
+
+   t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK);
+   t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK;
+
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+	"%s(tex %p) minf %s, maxf %s, anisotropy %d.\n",
+	__func__, t,
+	_mesa_lookup_enum_by_nr(minf),
+	_mesa_lookup_enum_by_nr(magf),
+	anisotropy);
+
+   if ( anisotropy == R200_MAX_ANISO_1_TO_1 ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= R200_MIN_FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= R200_MIN_FILTER_LINEAR;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+	 t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+	 t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_NEAREST;
+	 break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 t->pp_txfilter |= R200_MIN_FILTER_NEAREST_MIP_LINEAR;
+	 break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= R200_MIN_FILTER_LINEAR_MIP_LINEAR;
+	 break;
+      }
+   } else {
+      switch ( minf ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= R200_MIN_FILTER_ANISO_LINEAR;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
+	 break;
+      }
+   }
+
+   /* Note we don't have 3D mipmaps so only use the mag filter setting
+    * to set the 3D texture filter mode.
+    */
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->pp_txfilter |= R200_MAG_FILTER_NEAREST;
+      t->pp_txformat_x |= R200_VOLUME_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->pp_txfilter |= R200_MAG_FILTER_LINEAR;
+      t->pp_txformat_x |= R200_VOLUME_FILTER_LINEAR;
+      break;
+   }
+}
+
+static void r200SetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
+{
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+}
+
+static void r200TexEnv( GLcontext *ctx, GLenum target,
+			  GLenum pname, const GLfloat *param )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+
+   /* This is incorrect: Need to maintain this data for each of
+    * GL_TEXTURE_{123}D, GL_TEXTURE_RECTANGLE_NV, etc, and switch
+    * between them according to _ReallyEnabled.
+    */
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) {
+	 R200_STATECHANGE( rmesa, tf );
+	 rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor;
+      }
+      break;
+   }
+
+   case GL_TEXTURE_LOD_BIAS_EXT: {
+      GLfloat bias, min;
+      GLuint b;
+      const int fixed_one = R200_LOD_BIAS_FIXED_ONE;
+
+      /* The R200's LOD bias is a signed 2's complement value with a
+       * range of -16.0 <= bias < 16.0. 
+       *
+       * NOTE: Add a small bias to the bias for conform mipsel.c test.
+       */
+      bias = *param;
+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+	  0.0 : -16.0;
+      bias = CLAMP( bias, min, 16.0 );
+      b = ((int)(bias * fixed_one)
+		+ R200_LOD_BIAS_CORRECTION) & R200_LOD_BIAS_MASK;
+      
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] & R200_LOD_BIAS_MASK) != b ) {
+	 R200_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] &= ~R200_LOD_BIAS_MASK;
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT_X] |= b;
+      }
+      break;
+   }
+   case GL_COORD_REPLACE_ARB:
+      if (ctx->Point.PointSprite) {
+	 R200_STATECHANGE( rmesa, spr );
+	 if ((GLenum)param[0]) {
+	    rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] |= R200_PS_GEN_TEX_0 << unit;
+	 } else {
+	    rmesa->hw.spr.cmd[SPR_POINT_SPRITE_CNTL] &= ~(R200_PS_GEN_TEX_0 << unit);
+	 }
+      }
+      break;
+   default:
+      return;
+   }
+}
+
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void r200TexParameter( GLcontext *ctx, GLenum target,
+				struct gl_texture_object *texObj,
+				GLenum pname, const GLfloat *params )
+{
+   radeonTexObj* t = radeon_tex_obj(texObj);
+
+   radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_VERBOSE,
+		"%s(%p, tex %p)  target %s, pname %s\n",
+		__FUNCTION__, ctx, texObj,
+		_mesa_lookup_enum_by_nr( target ),
+	       _mesa_lookup_enum_by_nr( pname ) );
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+      r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+      r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+   case GL_TEXTURE_WRAP_R:
+      r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      r200SetTexBorderColor( t, texObj->BorderColor.f );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      t->validated = GL_FALSE;
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   radeonTexObj* t = radeon_tex_obj(texObj);
+
+   radeon_print(RADEON_TEXTURE | RADEON_STATE, RADEON_NORMAL,
+           "%s( %p (target = %s) )\n", __FUNCTION__,
+	   (void *)texObj,
+	   _mesa_lookup_enum_by_nr(texObj->Target));
+
+   if (rmesa) {
+      int i;
+      radeon_firevertices(&rmesa->radeon);
+      for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
+	 if ( t == rmesa->state.texture.unit[i].texobj ) {
+	    rmesa->state.texture.unit[i].texobj = NULL;
+	    rmesa->hw.tex[i].dirty = GL_FALSE;
+	    rmesa->hw.cube[i].dirty = GL_FALSE;
+	 }
+      }      
+   }
+
+   radeon_miptree_unreference(&t->mt);
+
+   _mesa_delete_texture_object(ctx, texObj);
+}
+
+/* Need:  
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *  
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void r200TexGen( GLcontext *ctx,
+			  GLenum coord,
+			  GLenum pname,
+			  const GLfloat *params )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx,
+						      GLuint name,
+						      GLenum target)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+
+   radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+           "%s(%p) target %s, new texture %p.\n",
+	   __FUNCTION__, ctx,
+	   _mesa_lookup_enum_by_nr(target), t);
+
+   _mesa_initialize_texture_object(&t->base, name, target);
+   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+   /* Initialize hardware state */
+   r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR );
+   r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
+   r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter);
+   r200SetTexBorderColor(t, t->base.BorderColor.f);
+
+   return &t->base;
+}
+
+
+
+void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
+{
+   /* Note: we only plug in the functions we implement in the driver
+    * since _mesa_init_driver_functions() was already called.
+    */
+   functions->ChooseTextureFormat	= radeonChooseTextureFormat_mesa;
+   functions->TexImage1D		= radeonTexImage1D;
+   functions->TexImage2D		= radeonTexImage2D;
+#if ENABLE_HW_3D_TEXTURE
+   functions->TexImage3D		= radeonTexImage3D;
+#else
+   functions->TexImage3D		= _mesa_store_teximage3d;
+#endif
+   functions->TexSubImage1D		= radeonTexSubImage1D;
+   functions->TexSubImage2D		= radeonTexSubImage2D;
+#if ENABLE_HW_3D_TEXTURE
+   functions->TexSubImage3D		= radeonTexSubImage3D;
+#else
+   functions->TexSubImage3D		= _mesa_store_texsubimage3d;
+#endif
+   functions->GetTexImage               = radeonGetTexImage;
+   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
+   functions->NewTextureObject		= r200NewTextureObject;
+   //   functions->BindTexture		= r200BindTexture;
+   functions->DeleteTexture		= r200DeleteTexture;
+   functions->IsTextureResident		= driIsTextureResident;
+
+   functions->TexEnv			= r200TexEnv;
+   functions->TexParameter		= r200TexParameter;
+   functions->TexGen			= r200TexGen;
+
+   functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
+   functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
+
+   if (radeon->radeonScreen->kernel_mm) {
+      functions->CopyTexImage2D = radeonCopyTexImage2D;
+      functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+   }
+
+   functions->GenerateMipmap = radeonGenerateMipmap;
+
+   functions->NewTextureImage = radeonNewTextureImage;
+   functions->FreeTexImageData = radeonFreeTexImageData;
+   functions->MapTexture = radeonMapTexture;
+   functions->UnmapTexture = radeonUnmapTexture;
+
+   driInitTextureFormats();
+
+}
diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h
new file mode 100644
index 0000000000..1a1e7038df
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_tex.h
@@ -0,0 +1,57 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __R200_TEX_H__
+#define __R200_TEX_H__
+
+extern void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
+extern void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+			      __DRIdrawable *dPriv);
+extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+			     unsigned long long offset, GLint depth,
+			     GLuint pitch);
+
+extern void r200UpdateTextureState( GLcontext *ctx );
+
+extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face );
+
+extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t );
+
+extern void r200InitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
+
+extern void r200UpdateFragmentShader( GLcontext *ctx );
+
+extern void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d );
+
+#endif /* __R200_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c
new file mode 100644
index 0000000000..9ccf30c3ac
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -0,0 +1,1736 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+#include "r200_context.h"
+#include "r200_state.h"
+#include "r200_ioctl.h"
+#include "r200_swtcl.h"
+#include "r200_tex.h"
+#include "r200_tcl.h"
+
+
+#define R200_TXFORMAT_A8        R200_TXFORMAT_I8
+#define R200_TXFORMAT_L8        R200_TXFORMAT_I8
+#define R200_TXFORMAT_AL88      R200_TXFORMAT_AI88
+#define R200_TXFORMAT_YCBCR     R200_TXFORMAT_YVYU422
+#define R200_TXFORMAT_YCBCR_REV R200_TXFORMAT_VYUY422
+#define R200_TXFORMAT_RGB_DXT1  R200_TXFORMAT_DXT1
+#define R200_TXFORMAT_RGBA_DXT1 R200_TXFORMAT_DXT1
+#define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
+#define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
+
+#define _COLOR(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 }
+#define _COLOR_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 }
+#define _ALPHA(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _ALPHA_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | R200_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _YUV(f) \
+    [ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB }
+#define _INVALID(f) \
+    [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
+			     && (tx_table_be[f].format != 0xffffffff) )
+
+struct tx_table {
+   GLuint format, filter;
+};
+
+static const struct tx_table tx_table_be[] =
+{
+   [ MESA_FORMAT_RGBA8888 ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   _ALPHA_REV(RGBA8888),
+   _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
+   _INVALID(RGB888),
+   _COLOR(RGB565),
+   _COLOR_REV(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
+   _ALPHA(AL88),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
+   _INVALID(CI8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
+};
+
+static const struct tx_table tx_table_le[] =
+{
+   _ALPHA(RGBA8888),
+   [ MESA_FORMAT_RGBA8888_REV ] = { R200_TXFORMAT_ABGR8888 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
+   [ MESA_FORMAT_RGB888 ] = { R200_TXFORMAT_ARGB8888, 0 },
+   _COLOR(RGB565),
+   _COLOR_REV(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
+   _ALPHA(AL88),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
+   _INVALID(CI8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
+};
+
+#undef _COLOR
+#undef _ALPHA
+#undef _INVALID
+
+/* ================================================================
+ * Texture combine functions
+ */
+
+/* GL_ARB_texture_env_combine support
+ */
+
+/* The color tables have combine functions for GL_SRC_COLOR,
+ * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint r200_register_color[][R200_MAX_TEXTURE_UNITS] =
+{
+   {
+      R200_TXC_ARG_A_R0_COLOR,
+      R200_TXC_ARG_A_R1_COLOR,
+      R200_TXC_ARG_A_R2_COLOR,
+      R200_TXC_ARG_A_R3_COLOR,
+      R200_TXC_ARG_A_R4_COLOR,
+      R200_TXC_ARG_A_R5_COLOR
+   },
+   {
+      R200_TXC_ARG_A_R0_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R1_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R2_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R3_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R4_COLOR | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R5_COLOR | R200_TXC_COMP_ARG_A
+   },
+   {
+      R200_TXC_ARG_A_R0_ALPHA,
+      R200_TXC_ARG_A_R1_ALPHA,
+      R200_TXC_ARG_A_R2_ALPHA,
+      R200_TXC_ARG_A_R3_ALPHA,
+      R200_TXC_ARG_A_R4_ALPHA,
+      R200_TXC_ARG_A_R5_ALPHA
+   },
+   {
+      R200_TXC_ARG_A_R0_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R1_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R2_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R3_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R4_ALPHA | R200_TXC_COMP_ARG_A,
+      R200_TXC_ARG_A_R5_ALPHA | R200_TXC_COMP_ARG_A
+   },
+};
+
+static GLuint r200_tfactor_color[] =
+{
+   R200_TXC_ARG_A_TFACTOR_COLOR,
+   R200_TXC_ARG_A_TFACTOR_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_TFACTOR_ALPHA,
+   R200_TXC_ARG_A_TFACTOR_ALPHA | R200_TXC_COMP_ARG_A
+};
+
+static GLuint r200_tfactor1_color[] =
+{
+   R200_TXC_ARG_A_TFACTOR1_COLOR,
+   R200_TXC_ARG_A_TFACTOR1_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_TFACTOR1_ALPHA,
+   R200_TXC_ARG_A_TFACTOR1_ALPHA | R200_TXC_COMP_ARG_A
+};
+
+static GLuint r200_primary_color[] =
+{
+   R200_TXC_ARG_A_DIFFUSE_COLOR,
+   R200_TXC_ARG_A_DIFFUSE_COLOR | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_DIFFUSE_ALPHA,
+   R200_TXC_ARG_A_DIFFUSE_ALPHA | R200_TXC_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-3
+ * GL_ONE  table - indices 1-4
+ */
+static GLuint r200_zero_color[] =
+{
+   R200_TXC_ARG_A_ZERO,
+   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_ZERO,
+   R200_TXC_ARG_A_ZERO | R200_TXC_COMP_ARG_A,
+   R200_TXC_ARG_A_ZERO
+};
+
+/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint r200_register_alpha[][R200_MAX_TEXTURE_UNITS] =
+{
+   {
+      R200_TXA_ARG_A_R0_ALPHA,
+      R200_TXA_ARG_A_R1_ALPHA,
+      R200_TXA_ARG_A_R2_ALPHA,
+      R200_TXA_ARG_A_R3_ALPHA,
+      R200_TXA_ARG_A_R4_ALPHA,
+      R200_TXA_ARG_A_R5_ALPHA
+   },
+   {
+      R200_TXA_ARG_A_R0_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R1_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R2_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R3_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R4_ALPHA | R200_TXA_COMP_ARG_A,
+      R200_TXA_ARG_A_R5_ALPHA | R200_TXA_COMP_ARG_A
+   },
+};
+
+static GLuint r200_tfactor_alpha[] =
+{
+   R200_TXA_ARG_A_TFACTOR_ALPHA,
+   R200_TXA_ARG_A_TFACTOR_ALPHA | R200_TXA_COMP_ARG_A
+};
+
+static GLuint r200_tfactor1_alpha[] =
+{
+   R200_TXA_ARG_A_TFACTOR1_ALPHA,
+   R200_TXA_ARG_A_TFACTOR1_ALPHA | R200_TXA_COMP_ARG_A
+};
+
+static GLuint r200_primary_alpha[] =
+{
+   R200_TXA_ARG_A_DIFFUSE_ALPHA,
+   R200_TXA_ARG_A_DIFFUSE_ALPHA | R200_TXA_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-1
+ * GL_ONE  table - indices 1-2
+ */
+static GLuint r200_zero_alpha[] =
+{
+   R200_TXA_ARG_A_ZERO,
+   R200_TXA_ARG_A_ZERO | R200_TXA_COMP_ARG_A,
+   R200_TXA_ARG_A_ZERO,
+};
+
+
+/* Extract the arg from slot A, shift it into the correct argument slot
+ * and set the corresponding complement bit.
+ */
+#define R200_COLOR_ARG( n, arg )			\
+do {							\
+   color_combine |=					\
+      ((color_arg[n] & R200_TXC_ARG_A_MASK)		\
+       << R200_TXC_ARG_##arg##_SHIFT);			\
+   color_combine |=					\
+      ((color_arg[n] >> R200_TXC_COMP_ARG_A_SHIFT)	\
+       << R200_TXC_COMP_ARG_##arg##_SHIFT);		\
+} while (0)
+
+#define R200_ALPHA_ARG( n, arg )			\
+do {							\
+   alpha_combine |=					\
+      ((alpha_arg[n] & R200_TXA_ARG_A_MASK)		\
+       << R200_TXA_ARG_##arg##_SHIFT);			\
+   alpha_combine |=					\
+      ((alpha_arg[n] >> R200_TXA_COMP_ARG_A_SHIFT)	\
+       << R200_TXA_COMP_ARG_##arg##_SHIFT);		\
+} while (0)
+
+
+/* ================================================================
+ * Texture unit state management
+ */
+
+static GLboolean r200UpdateTextureEnv( GLcontext *ctx, int unit, int slot, GLuint replaceargs )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint color_combine, alpha_combine;
+   GLuint color_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] &
+      ~(R200_TXC_SCALE_MASK | R200_TXC_OUTPUT_REG_MASK | R200_TXC_TFACTOR_SEL_MASK |
+	R200_TXC_TFACTOR1_SEL_MASK);
+   GLuint alpha_scale = rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] &
+      ~(R200_TXA_DOT_ALPHA | R200_TXA_SCALE_MASK | R200_TXA_OUTPUT_REG_MASK |
+	R200_TXA_TFACTOR_SEL_MASK | R200_TXA_TFACTOR1_SEL_MASK);
+
+   /* texUnit->_Current can be NULL if and only if the texture unit is
+    * not actually enabled.
+    */
+   assert( (texUnit->_ReallyEnabled == 0)
+	   || (texUnit->_Current != NULL) );
+
+   if ( R200_DEBUG & RADEON_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
+   }
+
+   /* Set the texture environment state.  Isn't this nice and clean?
+    * The chip will automagically set the texture alpha to 0xff when
+    * the texture format does not include an alpha component.  This
+    * reduces the amount of special-casing we have to do, alpha-only
+    * textures being a notable exception.
+    */
+
+   color_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXC_OUTPUT_REG_SHIFT) |
+			(unit << R200_TXC_TFACTOR_SEL_SHIFT) |
+			(replaceargs << R200_TXC_TFACTOR1_SEL_SHIFT);
+   alpha_scale |= ((rmesa->state.texture.unit[unit].outputreg + 1) << R200_TXA_OUTPUT_REG_SHIFT) |
+			(unit << R200_TXA_TFACTOR_SEL_SHIFT) |
+			(replaceargs << R200_TXA_TFACTOR1_SEL_SHIFT);
+
+   if ( !texUnit->_ReallyEnabled ) {
+      assert( unit == 0);
+      color_combine = R200_TXC_ARG_A_ZERO | R200_TXC_ARG_B_ZERO
+	  | R200_TXC_ARG_C_DIFFUSE_COLOR | R200_TXC_OP_MADD;
+      alpha_combine = R200_TXA_ARG_A_ZERO | R200_TXA_ARG_B_ZERO
+	  | R200_TXA_ARG_C_DIFFUSE_ALPHA | R200_TXA_OP_MADD;
+   }
+   else {
+      GLuint color_arg[3], alpha_arg[3];
+      GLuint i;
+      const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+      const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+      GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
+      GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
+
+
+      const GLint replaceoprgb =
+	 ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandRGB[0] - GL_SRC_COLOR;
+      const GLint replaceopa =
+	 ctx->Texture.Unit[replaceargs]._CurrentCombine->OperandA[0] - GL_SRC_ALPHA;
+
+      /* Step 1:
+       * Extract the color and alpha combine function arguments.
+       */
+      for ( i = 0 ; i < numColorArgs ; i++ ) {
+	 GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+	 const GLint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+	 assert(op >= 0);
+	 assert(op <= 3);
+	 switch ( srcRGBi ) {
+	 case GL_TEXTURE:
+	    color_arg[i] = r200_register_color[op][unit];
+	    break;
+	 case GL_CONSTANT:
+	    color_arg[i] = r200_tfactor_color[op];
+	    break;
+	 case GL_PRIMARY_COLOR:
+	    color_arg[i] = r200_primary_color[op];
+	    break;
+	 case GL_PREVIOUS:
+	    if (replaceargs != unit) {
+	       const GLint srcRGBreplace =
+		  ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceRGB[0];
+	       if (op >= 2) {
+		  op = op ^ replaceopa;
+	       }
+	       else {
+		  op = op ^ replaceoprgb;
+	       }
+	       switch (srcRGBreplace) {
+	       case GL_TEXTURE:
+		  color_arg[i] = r200_register_color[op][replaceargs];
+		  break;
+	       case GL_CONSTANT:
+		  color_arg[i] = r200_tfactor1_color[op];
+		  break;
+	       case GL_PRIMARY_COLOR:
+		  color_arg[i] = r200_primary_color[op];
+		  break;
+	       case GL_PREVIOUS:
+		  if (slot == 0)
+		     color_arg[i] = r200_primary_color[op];
+		  else
+		     color_arg[i] = r200_register_color[op]
+			[rmesa->state.texture.unit[replaceargs - 1].outputreg];
+		  break;
+	       case GL_ZERO:
+		  color_arg[i] = r200_zero_color[op];
+		  break;
+	       case GL_ONE:
+		  color_arg[i] = r200_zero_color[op+1];
+		  break;
+	       case GL_TEXTURE0:
+	       case GL_TEXTURE1:
+	       case GL_TEXTURE2:
+	       case GL_TEXTURE3:
+	       case GL_TEXTURE4:
+	       case GL_TEXTURE5:
+		  color_arg[i] = r200_register_color[op][srcRGBreplace - GL_TEXTURE0];
+		  break;
+	       default:
+	       return GL_FALSE;
+	       }
+	    }
+	    else {
+	       if (slot == 0)
+		  color_arg[i] = r200_primary_color[op];
+	       else
+		  color_arg[i] = r200_register_color[op]
+		     [rmesa->state.texture.unit[unit - 1].outputreg];
+            }
+	    break;
+	 case GL_ZERO:
+	    color_arg[i] = r200_zero_color[op];
+	    break;
+	 case GL_ONE:
+	    color_arg[i] = r200_zero_color[op+1];
+	    break;
+	 case GL_TEXTURE0:
+	 case GL_TEXTURE1:
+	 case GL_TEXTURE2:
+	 case GL_TEXTURE3:
+	 case GL_TEXTURE4:
+	 case GL_TEXTURE5:
+	    color_arg[i] = r200_register_color[op][srcRGBi - GL_TEXTURE0];
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+      }
+
+      for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+	 GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+	 const GLint srcAi = texUnit->_CurrentCombine->SourceA[i];
+	 assert(op >= 0);
+	 assert(op <= 1);
+	 switch ( srcAi ) {
+	 case GL_TEXTURE:
+	    alpha_arg[i] = r200_register_alpha[op][unit];
+	    break;
+	 case GL_CONSTANT:
+	    alpha_arg[i] = r200_tfactor_alpha[op];
+	    break;
+	 case GL_PRIMARY_COLOR:
+	    alpha_arg[i] = r200_primary_alpha[op];
+	    break;
+	 case GL_PREVIOUS:
+	    if (replaceargs != unit) {
+	       const GLint srcAreplace =
+		  ctx->Texture.Unit[replaceargs]._CurrentCombine->SourceA[0];
+	       op = op ^ replaceopa;
+	       switch (srcAreplace) {
+	       case GL_TEXTURE:
+		  alpha_arg[i] = r200_register_alpha[op][replaceargs];
+		  break;
+	       case GL_CONSTANT:
+		  alpha_arg[i] = r200_tfactor1_alpha[op];
+		  break;
+	       case GL_PRIMARY_COLOR:
+		  alpha_arg[i] = r200_primary_alpha[op];
+		  break;
+	       case GL_PREVIOUS:
+		  if (slot == 0)
+		     alpha_arg[i] = r200_primary_alpha[op];
+		  else
+		     alpha_arg[i] = r200_register_alpha[op]
+			[rmesa->state.texture.unit[replaceargs - 1].outputreg];
+		  break;
+	       case GL_ZERO:
+		  alpha_arg[i] = r200_zero_alpha[op];
+		  break;
+	       case GL_ONE:
+		  alpha_arg[i] = r200_zero_alpha[op+1];
+		  break;
+	       case GL_TEXTURE0:
+	       case GL_TEXTURE1:
+	       case GL_TEXTURE2:
+	       case GL_TEXTURE3:
+	       case GL_TEXTURE4:
+	       case GL_TEXTURE5:
+		  alpha_arg[i] = r200_register_alpha[op][srcAreplace - GL_TEXTURE0];
+		  break;
+	       default:
+	       return GL_FALSE;
+	       }
+	    }
+	    else {
+	       if (slot == 0)
+		  alpha_arg[i] = r200_primary_alpha[op];
+	       else
+		  alpha_arg[i] = r200_register_alpha[op]
+		    [rmesa->state.texture.unit[unit - 1].outputreg];
+            }
+	    break;
+	 case GL_ZERO:
+	    alpha_arg[i] = r200_zero_alpha[op];
+	    break;
+	 case GL_ONE:
+	    alpha_arg[i] = r200_zero_alpha[op+1];
+	    break;
+	 case GL_TEXTURE0:
+	 case GL_TEXTURE1:
+	 case GL_TEXTURE2:
+	 case GL_TEXTURE3:
+	 case GL_TEXTURE4:
+	 case GL_TEXTURE5:
+	    alpha_arg[i] = r200_register_alpha[op][srcAi - GL_TEXTURE0];
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+      }
+
+      /* Step 2:
+       * Build up the color and alpha combine functions.
+       */
+      switch ( texUnit->_CurrentCombine->ModeRGB ) {
+      case GL_REPLACE:
+	 color_combine = (R200_TXC_ARG_A_ZERO |
+			  R200_TXC_ARG_B_ZERO |
+			  R200_TXC_OP_MADD);
+	 R200_COLOR_ARG( 0, C );
+	 break;
+      case GL_MODULATE:
+	 color_combine = (R200_TXC_ARG_C_ZERO |
+			  R200_TXC_OP_MADD);
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, B );
+	 break;
+      case GL_ADD:
+	 color_combine = (R200_TXC_ARG_B_ZERO |
+			  R200_TXC_COMP_ARG_B | 
+			  R200_TXC_OP_MADD);
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, C );
+	 break;
+      case GL_ADD_SIGNED:
+	 color_combine = (R200_TXC_ARG_B_ZERO |
+			  R200_TXC_COMP_ARG_B |
+			  R200_TXC_BIAS_ARG_C |	/* new */
+			  R200_TXC_OP_MADD); /* was ADDSIGNED */
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, C );
+	 break;
+      case GL_SUBTRACT:
+	 color_combine = (R200_TXC_ARG_B_ZERO |
+			  R200_TXC_COMP_ARG_B | 
+			  R200_TXC_NEG_ARG_C |
+			  R200_TXC_OP_MADD);
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, C );
+	 break;
+      case GL_INTERPOLATE:
+	 color_combine = (R200_TXC_OP_LERP);
+	 R200_COLOR_ARG( 0, B );
+	 R200_COLOR_ARG( 1, A );
+	 R200_COLOR_ARG( 2, C );
+	 break;
+
+      case GL_DOT3_RGB_EXT:
+      case GL_DOT3_RGBA_EXT:
+	 /* The EXT version of the DOT3 extension does not support the
+	  * scale factor, but the ARB version (and the version in OpenGL
+	  * 1.3) does.
+	  */
+	 RGBshift = 0;
+	 /* FALLTHROUGH */
+
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+	 /* DOT3 works differently on R200 than on R100.  On R100, just
+	  * setting the DOT3 mode did everything for you.  On R200, the
+	  * driver has to enable the biasing and scale in the inputs to
+	  * put them in the proper [-1,1] range.  This is what the 4x and
+	  * the -0.5 in the DOT3 spec do.  The post-scale is then set
+	  * normally.
+	  */
+
+	 color_combine = (R200_TXC_ARG_C_ZERO |
+			  R200_TXC_OP_DOT3 |
+			  R200_TXC_BIAS_ARG_A |
+			  R200_TXC_BIAS_ARG_B |
+			  R200_TXC_SCALE_ARG_A |
+			  R200_TXC_SCALE_ARG_B);
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, B );
+	 break;
+
+      case GL_MODULATE_ADD_ATI:
+	 color_combine = (R200_TXC_OP_MADD);
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, C );
+	 R200_COLOR_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+	 color_combine = (R200_TXC_BIAS_ARG_C |	/* new */
+			  R200_TXC_OP_MADD); /* was ADDSIGNED */
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, C );
+	 R200_COLOR_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SUBTRACT_ATI:
+	 color_combine = (R200_TXC_NEG_ARG_C |
+			  R200_TXC_OP_MADD);
+	 R200_COLOR_ARG( 0, A );
+	 R200_COLOR_ARG( 1, C );
+	 R200_COLOR_ARG( 2, B );
+	 break;
+      default:
+	 return GL_FALSE;
+      }
+
+      switch ( texUnit->_CurrentCombine->ModeA ) {
+      case GL_REPLACE:
+	 alpha_combine = (R200_TXA_ARG_A_ZERO |
+			  R200_TXA_ARG_B_ZERO |
+			  R200_TXA_OP_MADD);
+	 R200_ALPHA_ARG( 0, C );
+	 break;
+      case GL_MODULATE:
+	 alpha_combine = (R200_TXA_ARG_C_ZERO |
+			  R200_TXA_OP_MADD);
+	 R200_ALPHA_ARG( 0, A );
+	 R200_ALPHA_ARG( 1, B );
+	 break;
+      case GL_ADD:
+	 alpha_combine = (R200_TXA_ARG_B_ZERO |
+			  R200_TXA_COMP_ARG_B |
+			  R200_TXA_OP_MADD);
+	 R200_ALPHA_ARG( 0, A );
+	 R200_ALPHA_ARG( 1, C );
+	 break;
+      case GL_ADD_SIGNED:
+	 alpha_combine = (R200_TXA_ARG_B_ZERO |
+			  R200_TXA_COMP_ARG_B |
+			  R200_TXA_BIAS_ARG_C |	/* new */
+			  R200_TXA_OP_MADD); /* was ADDSIGNED */
+	 R200_ALPHA_ARG( 0, A );
+	 R200_ALPHA_ARG( 1, C );
+	 break;
+      case GL_SUBTRACT:
+	 alpha_combine = (R200_TXA_ARG_B_ZERO |
+			  R200_TXA_COMP_ARG_B |
+			  R200_TXA_NEG_ARG_C |
+			  R200_TXA_OP_MADD);
+	 R200_ALPHA_ARG( 0, A );
+	 R200_ALPHA_ARG( 1, C );
+	 break;
+      case GL_INTERPOLATE:
+	 alpha_combine = (R200_TXA_OP_LERP);
+	 R200_ALPHA_ARG( 0, B );
+	 R200_ALPHA_ARG( 1, A );
+	 R200_ALPHA_ARG( 2, C );
+	 break;
+
+      case GL_MODULATE_ADD_ATI:
+	 alpha_combine = (R200_TXA_OP_MADD);
+	 R200_ALPHA_ARG( 0, A );
+	 R200_ALPHA_ARG( 1, C );
+	 R200_ALPHA_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+	 alpha_combine = (R200_TXA_BIAS_ARG_C |	/* new */
+			  R200_TXA_OP_MADD); /* was ADDSIGNED */
+	 R200_ALPHA_ARG( 0, A );
+	 R200_ALPHA_ARG( 1, C );
+	 R200_ALPHA_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SUBTRACT_ATI:
+	 alpha_combine = (R200_TXA_NEG_ARG_C |
+			  R200_TXA_OP_MADD);
+	 R200_ALPHA_ARG( 0, A );
+	 R200_ALPHA_ARG( 1, C );
+	 R200_ALPHA_ARG( 2, B );
+	 break;
+      default:
+	 return GL_FALSE;
+      }
+
+      if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
+	   || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
+	 alpha_scale |= R200_TXA_DOT_ALPHA;
+	 Ashift = RGBshift;
+      }
+
+      /* Step 3:
+       * Apply the scale factor.
+       */
+      color_scale |= (RGBshift << R200_TXC_SCALE_SHIFT);
+      alpha_scale |= (Ashift   << R200_TXA_SCALE_SHIFT);
+
+      /* All done!
+       */
+   }
+
+   if ( rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] != color_combine ||
+	rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] != alpha_combine ||
+	rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] != color_scale ||
+	rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] != alpha_scale) {
+      R200_STATECHANGE( rmesa, pix[slot] );
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND] = alpha_combine;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXCBLEND2] = color_scale;
+      rmesa->hw.pix[slot].cmd[PIX_PP_TXABLEND2] = alpha_scale;
+   }
+
+   return GL_TRUE;
+}
+
+void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+		      unsigned long long offset, GLint depth, GLuint pitch)
+{
+	r200ContextPtr rmesa = pDRICtx->driverPrivate;
+	struct gl_texture_object *tObj =
+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+	radeonTexObjPtr t = radeon_tex_obj(tObj);
+
+	if (!tObj)
+		return;
+
+	t->image_override = GL_TRUE;
+
+	if (!offset)
+		return;
+
+	t->bo = NULL;
+	t->override_offset = offset;
+	t->pp_txpitch = pitch - 32;
+
+	switch (depth) {
+	case 32:
+		t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format;
+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter;
+		break;
+	case 24:
+	default:
+		t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter;
+		break;
+	case 16:
+		t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format;
+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter;
+		break;
+	}
+}
+
+void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format,
+		       __DRIdrawable *dPriv)
+{
+	struct gl_texture_unit *texUnit;
+	struct gl_texture_object *texObj;
+	struct gl_texture_image *texImage;
+	struct radeon_renderbuffer *rb;
+	radeon_texture_image *rImage;
+	radeonContextPtr radeon;
+	r200ContextPtr rmesa;
+	struct radeon_framebuffer *rfb;
+	radeonTexObjPtr t;
+	uint32_t pitch_val;
+	uint32_t internalFormat, type, format;
+
+	type = GL_BGRA;
+	format = GL_UNSIGNED_BYTE;
+	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+
+	radeon = pDRICtx->driverPrivate;
+	rmesa = pDRICtx->driverPrivate;
+
+	rfb = dPriv->driverPrivate;
+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+	rImage = get_radeon_texture_image(texImage);
+	t = radeon_tex_obj(texObj);
+        if (t == NULL) {
+    	    return;
+    	}
+
+	radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+	rb = rfb->color_rb[0];
+	if (rb->bo == NULL) {
+		/* Failed to BO for the buffer */
+		return;
+	}
+
+	_mesa_lock_texture(radeon->glCtx, texObj);
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+	if (rImage->bo) {
+		radeon_bo_unref(rImage->bo);
+		rImage->bo = NULL;
+	}
+
+	radeon_miptree_unreference(&t->mt);
+	radeon_miptree_unreference(&rImage->mt);
+
+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+				   rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+	texImage->RowStride = rb->pitch / rb->cpp;
+
+	rImage->bo = rb->bo;
+	radeon_bo_ref(rImage->bo);
+	t->bo = rb->bo;
+	radeon_bo_ref(t->bo);
+	t->tile_bits = 0;
+	t->image_override = GL_TRUE;
+	t->override_offset = 0;
+	t->pp_txpitch &= (1 << 13) -1;
+	pitch_val = rb->pitch;
+	switch (rb->cpp) {
+	case 4:
+		if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+			t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+		else
+			t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format;
+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter;
+		break;
+	case 3:
+	default:
+		t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format;
+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter;
+		break;
+	case 2:
+		t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format;
+		t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter;
+		break;
+	}
+        t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+		   | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+        t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+	t->pp_txpitch = pitch_val;
+        t->pp_txpitch -= 32;
+
+	t->validated = GL_TRUE;
+	_mesa_unlock_texture(radeon->glCtx, texObj);
+	return;
+}
+
+
+void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+        r200SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+
+
+#define REF_COLOR 1
+#define REF_ALPHA 2
+
+static GLboolean r200UpdateAllTexEnv( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLint i, j, currslot;
+   GLint maxunitused = -1;
+   GLboolean texregfree[6] = {GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE};
+   GLubyte stageref[7] = {0, 0, 0, 0, 0, 0, 0};
+   GLint nextunit[R200_MAX_TEXTURE_UNITS] = {0, 0, 0, 0, 0, 0};
+   GLint currentnext = -1;
+   GLboolean ok;
+
+   /* find highest used unit */
+   for ( j = 0; j < R200_MAX_TEXTURE_UNITS; j++) {
+      if (ctx->Texture.Unit[j]._ReallyEnabled) {
+	 maxunitused = j;
+      }
+   }
+   stageref[maxunitused + 1] = REF_COLOR | REF_ALPHA;
+
+   for ( j = maxunitused; j >= 0; j-- ) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[j];
+
+      rmesa->state.texture.unit[j].outputreg = -1;
+
+      if (stageref[j + 1]) {
+
+	 /* use the lowest available reg. That gets us automatically reg0 for the last stage.
+	    need this even for disabled units, as it may get referenced due to the replace
+	    optimization */
+	 for ( i = 0 ; i < R200_MAX_TEXTURE_UNITS; i++ ) {
+	    if (texregfree[i]) {
+	       rmesa->state.texture.unit[j].outputreg = i;
+	       break;
+	    }
+	 }
+	 if (rmesa->state.texture.unit[j].outputreg == -1) {
+	    /* no more free regs we can use. Need a fallback :-( */
+	    return GL_FALSE;
+         }
+
+         nextunit[j] = currentnext;
+
+         if (!texUnit->_ReallyEnabled) {
+	 /* the not enabled stages are referenced "indirectly",
+            must not cut off the lower stages */
+	    stageref[j] = REF_COLOR | REF_ALPHA;
+	    continue;
+         }
+	 currentnext = j;
+ 
+	 const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+	 const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+	 const GLboolean isdot3rgba = (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ||
+				      (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT);
+
+
+	 /* check if we need the color part, special case for dot3_rgba
+	    as if only the alpha part is referenced later on it still is using the color part */
+	 if ((stageref[j + 1] & REF_COLOR) || isdot3rgba) {
+	    for ( i = 0 ; i < numColorArgs ; i++ ) {
+	       const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+	       const GLuint op = texUnit->_CurrentCombine->OperandRGB[i];
+	       switch ( srcRGBi ) {
+	       case GL_PREVIOUS:
+		  /* op 0/1 are referencing color, op 2/3 alpha */
+		  stageref[j] |= (op >> 1) + 1;
+	          break;
+	       case GL_TEXTURE:
+		  texregfree[j] = GL_FALSE;
+		  break;
+	       case GL_TEXTURE0:
+	       case GL_TEXTURE1:
+	       case GL_TEXTURE2:
+	       case GL_TEXTURE3:
+	       case GL_TEXTURE4:
+	       case GL_TEXTURE5:
+		  texregfree[srcRGBi - GL_TEXTURE0] = GL_FALSE;
+	          break;
+	       default: /* don't care about other sources here */
+		  break;
+	       }
+	    }
+	 }
+
+	 /* alpha args are ignored for dot3_rgba */
+	 if ((stageref[j + 1] & REF_ALPHA) && !isdot3rgba) {
+
+	    for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+	       const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
+	       switch ( srcAi ) {
+	       case GL_PREVIOUS:
+		  stageref[j] |= REF_ALPHA;
+		  break;
+	       case GL_TEXTURE:
+		  texregfree[j] = GL_FALSE;
+		  break;
+	       case GL_TEXTURE0:
+	       case GL_TEXTURE1:
+	       case GL_TEXTURE2:
+	       case GL_TEXTURE3:
+	       case GL_TEXTURE4:
+	       case GL_TEXTURE5:
+		  texregfree[srcAi - GL_TEXTURE0] = GL_FALSE;
+		  break;
+	       default: /* don't care about other sources here */
+		  break;
+	       }
+	    }
+	 }
+      }
+   }
+
+   /* don't enable texture sampling for units if the result is not used */
+   for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled && !texregfree[i])
+	 rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
+      else rmesa->state.texture.unit[i].unitneeded = 0;
+   }
+
+   ok = GL_TRUE;
+   currslot = 0;
+   rmesa->state.envneeded = 1;
+
+   i = 0;
+   while ((i <= maxunitused) && (i >= 0)) {
+      /* only output instruction if the results are referenced */
+      if (ctx->Texture.Unit[i]._ReallyEnabled && stageref[i+1]) {
+         GLuint replaceunit = i;
+	 /* try to optimize GL_REPLACE away (only one level deep though) */
+	 if (	(ctx->Texture.Unit[i]._CurrentCombine->ModeRGB == GL_REPLACE) &&
+		(ctx->Texture.Unit[i]._CurrentCombine->ModeA == GL_REPLACE) &&
+		(ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftRGB == 0) &&
+		(ctx->Texture.Unit[i]._CurrentCombine->ScaleShiftA == 0) &&
+		(nextunit[i] > 0) ) {
+	    /* yippie! can optimize it away! */
+	    replaceunit = i;
+	    i = nextunit[i];
+	 }
+
+	 /* need env instruction slot */
+	 rmesa->state.envneeded |= 1 << currslot;
+	 ok = r200UpdateTextureEnv( ctx, i, currslot, replaceunit );
+	 if (!ok) return GL_FALSE;
+	 currslot++;
+      }
+      i = i + 1;
+   }
+
+   if (currslot == 0) {
+      /* need one stage at least */
+      rmesa->state.texture.unit[0].outputreg = 0;
+      ok = r200UpdateTextureEnv( ctx, 0, 0, 0 );
+   }
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_BLEND_ENABLE_MASK | R200_MULTI_PASS_ENABLE);
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= rmesa->state.envneeded << R200_TEX_BLEND_0_ENABLE_SHIFT;
+
+   return ok;
+}
+
+#undef REF_COLOR
+#undef REF_ALPHA
+
+
+#define TEXOBJ_TXFILTER_MASK (R200_MAX_MIP_LEVEL_MASK |		\
+			      R200_MIN_FILTER_MASK | 		\
+			      R200_MAG_FILTER_MASK |		\
+			      R200_MAX_ANISO_MASK |		\
+			      R200_YUV_TO_RGB |			\
+			      R200_YUV_TEMPERATURE_MASK |	\
+			      R200_CLAMP_S_MASK | 		\
+			      R200_CLAMP_T_MASK | 		\
+			      R200_BORDER_MODE_D3D )
+
+#define TEXOBJ_TXFORMAT_MASK (R200_TXFORMAT_WIDTH_MASK |	\
+			      R200_TXFORMAT_HEIGHT_MASK |	\
+			      R200_TXFORMAT_FORMAT_MASK |	\
+			      R200_TXFORMAT_F5_WIDTH_MASK |	\
+			      R200_TXFORMAT_F5_HEIGHT_MASK |	\
+			      R200_TXFORMAT_ALPHA_IN_MAP |	\
+			      R200_TXFORMAT_CUBIC_MAP_ENABLE |	\
+			      R200_TXFORMAT_NON_POWER2)
+
+#define TEXOBJ_TXFORMAT_X_MASK (R200_DEPTH_LOG2_MASK |		\
+                                R200_TEXCOORD_MASK |		\
+                                R200_MIN_MIP_LEVEL_MASK |	\
+                                R200_CLAMP_Q_MASK | 		\
+                                R200_VOLUME_FILTER_MASK)
+
+
+static void disable_tex_obj_state( r200ContextPtr rmesa, 
+				   int unit )
+{
+   
+   R200_STATECHANGE( rmesa, vtx );
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit);
+   if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) {
+      TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+   }
+
+   /* Actually want to keep all units less than max active texture
+    * enabled, right?  Fix this for >2 texunits.
+    */
+
+   {
+      GLuint tmp = rmesa->TexGenEnabled;
+
+      rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+      rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+
+      if (tmp != rmesa->TexGenEnabled) {
+	 rmesa->recheck_texgen[unit] = GL_TRUE;
+	 rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+      }
+   }
+}
+static void import_tex_obj_state( r200ContextPtr rmesa,
+				  int unit,
+				  radeonTexObjPtr texobj )
+{
+/* do not use RADEON_DB_STATE to avoid stale texture caches */
+   GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+
+   R200_STATECHANGE( rmesa, tex[unit] );
+
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT_X] &= ~TEXOBJ_TXFORMAT_X_MASK;
+   cmd[TEX_PP_TXFORMAT_X] |= texobj->pp_txformat_x & TEXOBJ_TXFORMAT_X_MASK;
+   cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */
+   cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+
+   if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+      GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+
+      R200_STATECHANGE( rmesa, cube[unit] );
+      cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      if (rmesa->radeon.radeonScreen->drmSupportsFragShader) {
+	 /* that value is submitted twice. could change cube atom
+	    to not include that command when new drm is used */
+	 cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+      }
+   }
+
+}
+
+static void set_texgen_matrix( r200ContextPtr rmesa, 
+			       GLuint unit,
+			       const GLfloat *s_plane,
+			       const GLfloat *t_plane,
+			       const GLfloat *r_plane,
+			       const GLfloat *q_plane )
+{
+   GLfloat m[16];
+
+   m[0]  = s_plane[0];
+   m[4]  = s_plane[1];
+   m[8]  = s_plane[2];
+   m[12] = s_plane[3];
+
+   m[1]  = t_plane[0];
+   m[5]  = t_plane[1];
+   m[9]  = t_plane[2];
+   m[13] = t_plane[3];
+
+   m[2]  = r_plane[0];
+   m[6]  = r_plane[1];
+   m[10] = r_plane[2];
+   m[14] = r_plane[3];
+
+   m[3]  = q_plane[0];
+   m[7]  = q_plane[1];
+   m[11] = q_plane[2];
+   m[15] = q_plane[3];
+
+   _math_matrix_loadf( &(rmesa->TexGenMatrix[unit]), m);
+   _math_matrix_analyse( &(rmesa->TexGenMatrix[unit]) );
+   rmesa->TexGenEnabled |= R200_TEXMAT_0_ENABLE<<unit;
+}
+
+
+static GLuint r200_need_dis_texgen(const GLbitfield texGenEnabled,
+				   const GLfloat *planeS,
+				   const GLfloat *planeT,
+				   const GLfloat *planeR,
+				   const GLfloat *planeQ)
+{
+   GLuint needtgenable = 0;
+
+   if (!(texGenEnabled & S_BIT)) {
+      if (((texGenEnabled & T_BIT) && planeT[0] != 0.0) ||
+	 ((texGenEnabled & R_BIT) && planeR[0] != 0.0) ||
+	 ((texGenEnabled & Q_BIT) && planeQ[0] != 0.0)) {
+	 needtgenable |= S_BIT;
+      }
+   }
+   if (!(texGenEnabled & T_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[1] != 0.0) ||
+	 ((texGenEnabled & R_BIT) && planeR[1] != 0.0) ||
+	 ((texGenEnabled & Q_BIT) && planeQ[1] != 0.0)) {
+	 needtgenable |= T_BIT;
+     }
+   }
+   if (!(texGenEnabled & R_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[2] != 0.0) ||
+	 ((texGenEnabled & T_BIT) && planeT[2] != 0.0) ||
+	 ((texGenEnabled & Q_BIT) && planeQ[2] != 0.0)) {
+	 needtgenable |= R_BIT;
+      }
+   }
+   if (!(texGenEnabled & Q_BIT)) {
+      if (((texGenEnabled & S_BIT) && planeS[3] != 0.0) ||
+	 ((texGenEnabled & T_BIT) && planeT[3] != 0.0) ||
+	 ((texGenEnabled & R_BIT) && planeR[3] != 0.0)) {
+	 needtgenable |= Q_BIT;
+      }
+   }
+
+   return needtgenable;
+}
+
+
+/*
+ * Returns GL_FALSE if fallback required.  
+ */
+static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit )
+{  
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = R200_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tgi, tgcm;
+   GLuint mode = 0;
+   GLboolean mixed_fallback = GL_FALSE;
+   static const GLfloat I[16] = {
+      1,  0,  0,  0,
+      0,  1,  0,  0,
+      0,  0,  1,  0,
+      0,  0,  0,  1 };
+   static const GLfloat reflect[16] = {
+      -1,  0,  0,  0,
+       0, -1,  0,  0,
+       0,  0,  -1, 0,
+       0,  0,  0,  1 };
+
+   rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit);
+   rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit);
+   rmesa->TexGenNeedNormals[unit] = GL_FALSE;
+   tgi = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] & ~(R200_TEXGEN_INPUT_MASK <<
+						   inputshift);
+   tgcm = rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] & ~(R200_TEXGEN_COMP_MASK <<
+						    (unit * 4));
+
+   if (0) 
+      fprintf(stderr, "%s unit %d\n", __FUNCTION__, unit);
+
+   if (texUnit->TexGenEnabled & S_BIT) {
+      mode = texUnit->GenS.Mode;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_S << (unit * 4);
+   }
+
+   if (texUnit->TexGenEnabled & T_BIT) {
+      if (texUnit->GenT.Mode != mode)
+	 mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_T << (unit * 4);
+   }
+   if (texUnit->TexGenEnabled & R_BIT) {
+      if (texUnit->GenR.Mode != mode)
+	 mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_R << (unit * 4);
+   }
+
+   if (texUnit->TexGenEnabled & Q_BIT) {
+      if (texUnit->GenQ.Mode != mode)
+	 mixed_fallback = GL_TRUE;
+   } else {
+      tgcm |= R200_TEXGEN_COMP_Q << (unit * 4);
+   }
+
+   if (mixed_fallback) {
+      if (R200_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "fallback mixed texgen, 0x%x (0x%x 0x%x 0x%x 0x%x)\n",
+		 texUnit->TexGenEnabled, texUnit->GenS.Mode, texUnit->GenT.Mode,
+		 texUnit->GenR.Mode, texUnit->GenQ.Mode);
+      return GL_FALSE;
+   }
+
+/* we CANNOT do mixed mode if the texgen mode requires a plane where the input
+   is not enabled for texgen, since the planes are concatenated into texmat,
+   and thus the input will come from texcoord rather than tex gen equation!
+   Either fallback or just hope that those texcoords aren't really needed...
+   Assuming the former will cause lots of unnecessary fallbacks, the latter will
+   generate bogus results sometimes - it's pretty much impossible to really know
+   when a fallback is needed, depends on texmat and what sort of texture is bound
+   etc, - for now fallback if we're missing either S or T bits, there's a high
+   probability we need the texcoords in that case.
+   That's a lot of work for some obscure texgen mixed mode fixup - why oh why
+   doesn't the chip just directly accept the plane parameters :-(. */
+   switch (mode) {
+   case GL_OBJECT_LINEAR: {
+      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
+                                                  texUnit->GenS.ObjectPlane,
+                                                  texUnit->GenT.ObjectPlane,
+                                                  texUnit->GenR.ObjectPlane,
+                                                  texUnit->GenQ.ObjectPlane );
+      if (needtgenable & (S_BIT | T_BIT)) {
+	 if (R200_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "fallback mixed texgen / obj plane, 0x%x\n",
+		 texUnit->TexGenEnabled);
+	 return GL_FALSE;
+      }
+      if (needtgenable & (R_BIT)) {
+	 tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
+      }
+      if (needtgenable & (Q_BIT)) {
+	 tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
+      }
+
+      tgi |= R200_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit, 
+	 (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.ObjectPlane : I,
+	 (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.ObjectPlane : I + 4,
+	 (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.ObjectPlane : I + 8,
+	 (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.ObjectPlane : I + 12);
+      }
+      break;
+
+   case GL_EYE_LINEAR: {
+      GLuint needtgenable = r200_need_dis_texgen( texUnit->TexGenEnabled,
+                                                  texUnit->GenS.EyePlane,
+                                                  texUnit->GenT.EyePlane,
+                                                  texUnit->GenR.EyePlane,
+                                                  texUnit->GenQ.EyePlane );
+      if (needtgenable & (S_BIT | T_BIT)) {
+	 if (R200_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "fallback mixed texgen / eye plane, 0x%x\n",
+		 texUnit->TexGenEnabled);
+	 return GL_FALSE;
+      }
+      if (needtgenable & (R_BIT)) {
+	 tgcm &= ~(R200_TEXGEN_COMP_R << (unit * 4));
+      }
+      if (needtgenable & (Q_BIT)) {
+	 tgcm &= ~(R200_TEXGEN_COMP_Q << (unit * 4));
+      }
+      tgi |= R200_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit,
+	 (texUnit->TexGenEnabled & S_BIT) ? texUnit->GenS.EyePlane : I,
+	 (texUnit->TexGenEnabled & T_BIT) ? texUnit->GenT.EyePlane : I + 4,
+	 (texUnit->TexGenEnabled & R_BIT) ? texUnit->GenR.EyePlane : I + 8,
+	 (texUnit->TexGenEnabled & Q_BIT) ? texUnit->GenQ.EyePlane : I + 12);
+      }
+      break;
+
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      tgi |= R200_TEXGEN_INPUT_EYE_REFLECT << inputshift;
+      /* pretty weird, must only negate when lighting is enabled? */
+      if (ctx->Light.Enabled)
+	 set_texgen_matrix( rmesa, unit, 
+	    (texUnit->TexGenEnabled & S_BIT) ? reflect : I,
+	    (texUnit->TexGenEnabled & T_BIT) ? reflect + 4 : I + 4,
+	    (texUnit->TexGenEnabled & R_BIT) ? reflect + 8 : I + 8,
+	    I + 12);
+      break;
+
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      tgi |= R200_TEXGEN_INPUT_EYE_NORMAL<<inputshift;
+      break;
+
+   case GL_SPHERE_MAP:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      tgi |= R200_TEXGEN_INPUT_SPHERE<<inputshift;
+      break;
+
+   case 0:
+      /* All texgen units were disabled, so just pass coords through. */
+      tgi |= unit << inputshift;
+      break;
+
+   default:
+      /* Unsupported mode, fallback:
+       */
+      if (R200_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "fallback unsupported texgen, %d\n",
+		 texUnit->GenS.Mode);
+      return GL_FALSE;
+   }
+
+   rmesa->TexGenEnabled |= R200_TEXGEN_TEXMAT_0_ENABLE << unit;
+   rmesa->TexGenCompSel |= R200_OUTPUT_TEX_0 << unit;
+
+   if (tgi != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] || 
+       tgcm != rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2])
+   {
+      R200_STATECHANGE(rmesa, tcg);
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_1] = tgi;
+      rmesa->hw.tcg.cmd[TCG_TEX_PROC_CTL_2] = tgcm;
+   }
+
+   return GL_TRUE;
+}
+
+void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   GLuint re_cntl;
+
+   re_cntl = rmesa->hw.set.cmd[SET_RE_CNTL] & ~(R200_VTX_STQ0_D3D << (2 * unit));
+   if (use_d3d)
+      re_cntl |= R200_VTX_STQ0_D3D << (2 * unit);
+
+   if ( re_cntl != rmesa->hw.set.cmd[SET_RE_CNTL] ) {
+      R200_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_RE_CNTL] = re_cntl;
+   }
+}
+
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t)
+{
+   const struct gl_texture_image *firstImage = t->base.Image[0][t->minLod];
+   GLint log2Width, log2Height, log2Depth, texelBytes;
+   uint extra_size = 0;
+
+   if ( t->bo ) {
+       return;
+   }
+
+   log2Width  = firstImage->WidthLog2;
+   log2Height = firstImage->HeightLog2;
+   log2Depth  = firstImage->DepthLog2;
+   texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
+
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+	"%s(%p, tex %p) log2(w %d, h %d, d %d), texelBytes %d. format %d\n",
+	__func__, rmesa, t, log2Width, log2Height,
+	log2Depth, texelBytes, firstImage->TexFormat);
+
+   if (!t->image_override) {
+      if (VALID_FORMAT(firstImage->TexFormat)) {
+	 const struct tx_table *table = _mesa_little_endian() ? tx_table_le :
+	    tx_table_be;
+	 
+	 t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK |
+			     R200_TXFORMAT_ALPHA_IN_MAP);
+	 t->pp_txfilter &= ~R200_YUV_TO_RGB;
+	 
+	 t->pp_txformat |= table[ firstImage->TexFormat ].format;
+	 t->pp_txfilter |= table[ firstImage->TexFormat ].filter;
+
+
+      } else {
+	 _mesa_problem(NULL, "unexpected texture format in %s",
+		       __FUNCTION__);
+	 return;
+      }
+   }
+
+   t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= ((t->maxLod) << R200_MAX_MIP_LEVEL_SHIFT)
+	   & R200_MAX_MIP_LEVEL_MASK;
+
+   if ( t->pp_txfilter &
+		(R200_MIN_FILTER_NEAREST_MIP_NEAREST
+		 | R200_MIN_FILTER_NEAREST_MIP_LINEAR
+		 | R200_MIN_FILTER_LINEAR_MIP_NEAREST
+		 | R200_MIN_FILTER_LINEAR_MIP_LINEAR
+		 | R200_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST
+		 | R200_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR))
+		 extra_size = t->minLod;
+
+   t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK |
+		       R200_TXFORMAT_HEIGHT_MASK |
+		       R200_TXFORMAT_CUBIC_MAP_ENABLE |
+		       R200_TXFORMAT_F5_WIDTH_MASK |
+		       R200_TXFORMAT_F5_HEIGHT_MASK);
+   t->pp_txformat |= (((log2Width + extra_size) << R200_TXFORMAT_WIDTH_SHIFT) |
+		      ((log2Height + extra_size)<< R200_TXFORMAT_HEIGHT_SHIFT));
+   
+   t->tile_bits = 0;
+   
+   t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK
+		   | R200_MIN_MIP_LEVEL_MASK);
+
+   t->pp_txformat_x |= (t->minLod << R200_MIN_MIP_LEVEL_SHIFT)
+	   & R200_MIN_MIP_LEVEL_MASK;
+
+   if (t->base.Target == GL_TEXTURE_3D) {
+      t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT);
+      t->pp_txformat_x |= R200_TEXCOORD_VOLUME;
+
+   }
+   else if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+      ASSERT(log2Width == log2Height);
+      t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) |
+			 (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) |
+			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
+			 (R200_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV;
+      t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << R200_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << R200_FACE_HEIGHT_4_SHIFT));
+   }
+   else {
+      /* If we don't in fact send enough texture coordinates, q will be 1,
+       * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?)
+       */
+      t->pp_txformat_x |= R200_TEXCOORD_PROJ;
+   }
+   /* FIXME: NPOT sizes, Is it correct realy? */
+   t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT)
+		   | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT));
+
+   if ( !t->image_override ) {
+      if (_mesa_is_format_compressed(firstImage->TexFormat))
+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+      else
+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+      t->pp_txpitch -= 32;
+   }
+
+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      t->pp_txformat |= R200_TXFORMAT_NON_POWER2;
+   }
+
+}
+
+static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   radeonTexObj *t = radeon_tex_obj(texObj);
+
+   if (!radeon_validate_texture_miptree(ctx, texObj))
+      return GL_FALSE;
+
+   r200_validate_texgen(ctx, unit);
+   /* Configure the hardware registers (more precisely, the cached version
+    * of the hardware registers). */
+   setup_hardware_state(rmesa, t);
+
+   if (texObj->Target == GL_TEXTURE_RECTANGLE_NV ||
+       texObj->Target == GL_TEXTURE_2D ||
+       texObj->Target == GL_TEXTURE_1D)
+      set_re_cntl_d3d( ctx, unit, GL_FALSE );
+   else
+      set_re_cntl_d3d( ctx, unit, GL_TRUE );
+   R200_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit;
+   
+   R200_STATECHANGE( rmesa, vtx );
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3));
+   rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3);
+
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+   import_tex_obj_state( rmesa, unit, t );
+
+   if (rmesa->recheck_texgen[unit]) {
+      GLboolean fallback = !r200_validate_texgen( ctx, unit );
+      TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+      rmesa->recheck_texgen[unit] = 0;
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+   t->validated = GL_TRUE;
+
+   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+
+   return !t->border_fallback;
+}
+
+static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded;
+
+   if (!unitneeded) {
+      /* disable the unit */
+     disable_tex_obj_state(rmesa, unit);
+     return GL_TRUE;
+   }
+
+   if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+    _mesa_warning(ctx,
+		  "failed to validate texture for unit %d.\n",
+		  unit);
+    rmesa->state.texture.unit[unit].texobj = NULL;
+    return GL_FALSE;
+  }
+
+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+  return GL_TRUE;
+}
+
+
+void r200UpdateTextureState( GLcontext *ctx )
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   GLboolean ok;
+   GLuint dbg;
+
+   /* NOTE: must not manipulate rmesa->state.texture.unit[].unitneeded or
+      rmesa->state.envneeded before a R200_STATECHANGE (or R200_NEWPRIM) since
+      we use these to determine if we want to emit the corresponding state
+      atoms. */
+   R200_NEWPRIM( rmesa );
+
+   if (ctx->ATIFragmentShader._Enabled) {
+      GLuint i;
+      for (i = 0; i < R200_MAX_TEXTURE_UNITS; i++) {
+	 rmesa->state.texture.unit[i].unitneeded = ctx->Texture.Unit[i]._ReallyEnabled;
+      }
+      ok = GL_TRUE;
+   }
+   else {
+      ok = r200UpdateAllTexEnv( ctx );
+   }
+   if (ok) {
+      ok = (r200UpdateTextureUnit( ctx, 0 ) &&
+	 r200UpdateTextureUnit( ctx, 1 ) &&
+	 r200UpdateTextureUnit( ctx, 2 ) &&
+	 r200UpdateTextureUnit( ctx, 3 ) &&
+	 r200UpdateTextureUnit( ctx, 4 ) &&
+	 r200UpdateTextureUnit( ctx, 5 ));
+   }
+
+   if (ok && ctx->ATIFragmentShader._Enabled) {
+      r200UpdateFragmentShader(ctx);
+   }
+
+   FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok );
+
+   if (rmesa->radeon.TclFallback)
+      r200ChooseVertexState( ctx );
+
+
+   if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) {
+
+      /*
+       * T0 hang workaround -------------
+       * not needed for r200 derivatives
+        */
+      if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_ENABLE_MASK) == R200_TEX_0_ENABLE &&
+	 (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+
+	 R200_STATECHANGE(rmesa, ctx);
+	 R200_STATECHANGE(rmesa, tex[1]);
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE;
+	 if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE))
+	   rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+	 rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE;
+      }
+      else if (!ctx->ATIFragmentShader._Enabled) {
+	 if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE) &&
+	    (rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] & R200_TXFORMAT_LOOKUP_DISABLE)) {
+	    R200_STATECHANGE(rmesa, tex[1]);
+	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~R200_TXFORMAT_LOOKUP_DISABLE;
+         }
+      }
+      /* do the same workaround for the first pass of a fragment shader.
+       * completely unknown if necessary / sufficient.
+       */
+      if ((rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_ENABLE_MASK) == R200_PPX_TEX_0_ENABLE &&
+	 (rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) > R200_MIN_FILTER_LINEAR) {
+
+	 R200_STATECHANGE(rmesa, cst);
+	 R200_STATECHANGE(rmesa, tex[1]);
+	 rmesa->hw.cst.cmd[CST_PP_CNTL_X] |= R200_PPX_TEX_1_ENABLE;
+	 if (!(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_1_ENABLE))
+	    rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+	 rmesa->hw.tex[1].cmd[TEX_PP_TXMULTI_CTL] |= R200_PASS1_TXFORMAT_LOOKUP_DISABLE;
+      }
+
+      /* maybe needs to be done pairwise due to 2 parallel (physical) tex units ?
+         looks like that's not the case, if 8500/9100 owners don't complain remove this...
+      for ( i = 0; i < ctx->Const.MaxTextureUnits; i += 2) {
+         if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ((R200_TEX_0_ENABLE |
+            R200_TEX_1_ENABLE ) << i)) == (R200_TEX_0_ENABLE << i)) &&
+            ((rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK) >
+            R200_MIN_FILTER_LINEAR)) {
+            R200_STATECHANGE(rmesa, ctx);
+            R200_STATECHANGE(rmesa, tex[i+1]);
+            rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= (R200_TEX_1_ENABLE << i);
+            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+            rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] |= 0x08000000;
+         }
+         else {
+            if ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE << i)) &&
+               (rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] & 0x08000000)) {
+               R200_STATECHANGE(rmesa, tex[i+1]);
+               rmesa->hw.tex[i+1].cmd[TEX_PP_TXFORMAT] &= ~0x08000000;
+            }
+         }
+      } */
+
+      /*
+       * Texture cache LRU hang workaround -------------
+       * not needed for r200 derivatives
+       * hopefully this covers first pass of a shader as well
+       */
+
+      /* While the cases below attempt to only enable the workaround in the
+       * specific cases necessary, they were insufficient.  See bugzilla #1519,
+       * #729, #814.  Tests with quake3 showed no impact on performance.
+       */
+      dbg = 0x6;
+
+      /*
+      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE )) &&
+         ((((rmesa->hw.tex[0].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_2_ENABLE) &&
+         ((((rmesa->hw.tex[2].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_4_ENABLE) &&
+         ((((rmesa->hw.tex[4].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)))
+      {
+         dbg |= 0x02;
+      }
+
+      if (((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_1_ENABLE )) &&
+         ((((rmesa->hw.tex[1].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_3_ENABLE) &&
+         ((((rmesa->hw.tex[3].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)) ||
+         ((rmesa->hw.ctx.cmd[CTX_PP_CNTL] & R200_TEX_5_ENABLE) &&
+         ((((rmesa->hw.tex[5].cmd[TEX_PP_TXFILTER] & R200_MIN_FILTER_MASK)) &
+         0x04) == 0)))
+      {
+         dbg |= 0x04;
+      }*/
+
+      if (dbg != rmesa->hw.tam.cmd[TAM_DEBUG3]) {
+         R200_STATECHANGE( rmesa, tam );
+         rmesa->hw.tam.cmd[TAM_DEBUG3] = dbg;
+         if (0) printf("TEXCACHE LRU HANG WORKAROUND %x\n", dbg);
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
new file mode 100644
index 0000000000..12f869d96f
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -0,0 +1,1272 @@
+/**************************************************************************
+
+Copyright (C) 2005 Aapo Tahkola.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Aapo Tahkola <aet@rasterburn.org>
+ *   Roland Scheidegger <rscheidegger_lists@hispeed.ch>
+ */
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "shader/programopt.h"
+#include "tnl/tnl.h"
+
+#include "r200_context.h"
+#include "r200_vertprog.h"
+#include "r200_ioctl.h"
+#include "r200_tcl.h"
+
+#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
+    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
+    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
+    SWIZZLE_W != VSF_IN_COMPONENT_W || \
+    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
+    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
+    WRITEMASK_X != VSF_FLAG_X || \
+    WRITEMASK_Y != VSF_FLAG_Y || \
+    WRITEMASK_Z != VSF_FLAG_Z || \
+    WRITEMASK_W != VSF_FLAG_W
+#error Cannot change these!
+#endif
+
+#define SCALAR_FLAG (1<<31)
+#define FLAG_MASK (1<<31)
+#define OP_MASK (0xf)  /* we are unlikely to have more than 15 */
+#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
+
+static struct{
+   char *name;
+   int opcode;
+   unsigned long ip; /* number of input operands and flags */
+}op_names[]={
+   OPN(ABS, 1),
+   OPN(ADD, 2),
+   OPN(ARL, 1|SCALAR_FLAG),
+   OPN(DP3, 2),
+   OPN(DP4, 2),
+   OPN(DPH, 2),
+   OPN(DST, 2),
+   OPN(EX2, 1|SCALAR_FLAG),
+   OPN(EXP, 1|SCALAR_FLAG),
+   OPN(FLR, 1),
+   OPN(FRC, 1),
+   OPN(LG2, 1|SCALAR_FLAG),
+   OPN(LIT, 1),
+   OPN(LOG, 1|SCALAR_FLAG),
+   OPN(MAD, 3),
+   OPN(MAX, 2),
+   OPN(MIN, 2),
+   OPN(MOV, 1),
+   OPN(MUL, 2),
+   OPN(POW, 2|SCALAR_FLAG),
+   OPN(RCP, 1|SCALAR_FLAG),
+   OPN(RSQ, 1|SCALAR_FLAG),
+   OPN(SGE, 2),
+   OPN(SLT, 2),
+   OPN(SUB, 2),
+   OPN(SWZ, 1),
+   OPN(XPD, 2),
+   OPN(PRINT, 0),
+   OPN(END, 0),
+};
+#undef OPN
+
+static GLboolean r200VertexProgUpdateParams(GLcontext *ctx, struct r200_vertex_program *vp)
+{
+   r200ContextPtr rmesa = R200_CONTEXT( ctx );
+   GLfloat *fcmd = (GLfloat *)&rmesa->hw.vpp[0].cmd[VPP_CMD_0 + 1];
+   int pi;
+   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
+   struct gl_program_parameter_list *paramList;
+   drm_radeon_cmd_header_t tmp;
+
+   R200_STATECHANGE( rmesa, vpp[0] );
+   R200_STATECHANGE( rmesa, vpp[1] );
+   assert(mesa_vp->Base.Parameters);
+   _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+   paramList = mesa_vp->Base.Parameters;
+
+   if(paramList->NumParameters > R200_VSF_MAX_PARAM){
+      fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   for(pi = 0; pi < paramList->NumParameters; pi++) {
+      switch(paramList->Parameters[pi].Type) {
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_NAMED_PARAM:
+      //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
+      case PROGRAM_CONSTANT:
+	 *fcmd++ = paramList->ParameterValues[pi][0];
+	 *fcmd++ = paramList->ParameterValues[pi][1];
+	 *fcmd++ = paramList->ParameterValues[pi][2];
+	 *fcmd++ = paramList->ParameterValues[pi][3];
+	 break;
+      default:
+	 _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
+	 break;
+      }
+      if (pi == 95) {
+	 fcmd = (GLfloat *)&rmesa->hw.vpp[1].cmd[VPP_CMD_0 + 1];
+      }
+   }
+   /* hack up the cmd_size so not the whole state atom is emitted always. */
+   rmesa->hw.vpp[0].cmd_size =
+      1 + 4 * ((paramList->NumParameters > 96) ? 96 : paramList->NumParameters);
+   tmp.i = rmesa->hw.vpp[0].cmd[VPP_CMD_0];
+   tmp.veclinear.count = (paramList->NumParameters > 96) ? 96 : paramList->NumParameters;
+   rmesa->hw.vpp[0].cmd[VPP_CMD_0] = tmp.i;
+   if (paramList->NumParameters > 96) {
+      rmesa->hw.vpp[1].cmd_size = 1 + 4 * (paramList->NumParameters - 96);
+      tmp.i = rmesa->hw.vpp[1].cmd[VPP_CMD_0];
+      tmp.veclinear.count = paramList->NumParameters - 96;
+      rmesa->hw.vpp[1].cmd[VPP_CMD_0] = tmp.i;
+   }
+   return GL_TRUE;
+}
+
+static INLINE unsigned long t_dst_mask(GLuint mask)
+{
+   /* WRITEMASK_* is equivalent to VSF_FLAG_* */
+   return mask & VSF_FLAG_ALL;
+}
+
+static unsigned long t_dst(struct prog_dst_register *dst)
+{
+   switch(dst->File) {
+   case PROGRAM_TEMPORARY:
+      return ((dst->Index << R200_VPI_OUT_REG_INDEX_SHIFT)
+	 | R200_VSF_OUT_CLASS_TMP);
+   case PROGRAM_OUTPUT:
+      switch (dst->Index) {
+      case VERT_RESULT_HPOS:
+	 return R200_VSF_OUT_CLASS_RESULT_POS;
+      case VERT_RESULT_COL0:
+	 return R200_VSF_OUT_CLASS_RESULT_COLOR;
+      case VERT_RESULT_COL1:
+	 return ((1 << R200_VPI_OUT_REG_INDEX_SHIFT)
+	    | R200_VSF_OUT_CLASS_RESULT_COLOR);
+      case VERT_RESULT_FOGC:
+	 return R200_VSF_OUT_CLASS_RESULT_FOGC;
+      case VERT_RESULT_TEX0:
+      case VERT_RESULT_TEX1:
+      case VERT_RESULT_TEX2:
+      case VERT_RESULT_TEX3:
+      case VERT_RESULT_TEX4:
+      case VERT_RESULT_TEX5:
+	 return (((dst->Index - VERT_RESULT_TEX0) << R200_VPI_OUT_REG_INDEX_SHIFT)
+	    | R200_VSF_OUT_CLASS_RESULT_TEXC);
+      case VERT_RESULT_PSIZ:
+	 return R200_VSF_OUT_CLASS_RESULT_POINTSIZE;
+      default:
+	 fprintf(stderr, "problem in %s, unknown dst output reg %d\n", __FUNCTION__, dst->Index);
+	 exit(0);
+	 return 0;
+      }
+   case PROGRAM_ADDRESS:
+      assert (dst->Index == 0);
+      return R200_VSF_OUT_CLASS_ADDR;
+   default:
+      fprintf(stderr, "problem in %s, unknown register type %d\n", __FUNCTION__, dst->File);
+      exit(0);
+      return 0;
+   }
+}
+
+static unsigned long t_src_class(gl_register_file file)
+{
+
+   switch(file){
+   case PROGRAM_TEMPORARY:
+      return VSF_IN_CLASS_TMP;
+
+   case PROGRAM_INPUT:
+      return VSF_IN_CLASS_ATTR;
+
+   case PROGRAM_LOCAL_PARAM:
+   case PROGRAM_ENV_PARAM:
+   case PROGRAM_NAMED_PARAM:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_STATE_VAR:
+      return VSF_IN_CLASS_PARAM;
+   /*
+   case PROGRAM_OUTPUT:
+   case PROGRAM_WRITE_ONLY:
+   case PROGRAM_ADDRESS:
+   */
+   default:
+      fprintf(stderr, "problem in %s", __FUNCTION__);
+      exit(0);
+   }
+}
+
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
+{
+/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+   return swizzle;
+}
+
+#if 0
+static void vp_dump_inputs(struct r200_vertex_program *vp, char *caller)
+{
+   int i;
+
+   if(vp == NULL){
+      fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
+      return ;
+   }
+
+   fprintf(stderr, "%s:<", caller);
+   for(i=0; i < VERT_ATTRIB_MAX; i++)
+   fprintf(stderr, "%d ", vp->inputs[i]);
+   fprintf(stderr, ">\n");
+
+}
+#endif
+
+static unsigned long t_src_index(struct r200_vertex_program *vp, struct prog_src_register *src)
+{
+/*
+   int i;
+   int max_reg = -1;
+*/
+   if(src->File == PROGRAM_INPUT){
+/*      if(vp->inputs[src->Index] != -1)
+	 return vp->inputs[src->Index];
+
+      for(i=0; i < VERT_ATTRIB_MAX; i++)
+	 if(vp->inputs[i] > max_reg)
+	    max_reg = vp->inputs[i];
+
+      vp->inputs[src->Index] = max_reg+1;*/
+
+      //vp_dump_inputs(vp, __FUNCTION__);	
+      assert(vp->inputs[src->Index] != -1);
+      return vp->inputs[src->Index];
+   } else {
+      if (src->Index < 0) {
+	 fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
+	 return 0;
+      }
+      return src->Index;
+   }
+}
+
+static unsigned long t_src(struct r200_vertex_program *vp, struct prog_src_register *src)
+{
+
+   return MAKE_VSF_SOURCE(t_src_index(vp, src),
+			t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			t_swizzle(GET_SWZ(src->Swizzle, 1)),
+			t_swizzle(GET_SWZ(src->Swizzle, 2)),
+			t_swizzle(GET_SWZ(src->Swizzle, 3)),
+			t_src_class(src->File),
+			src->Negate) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r200_vertex_program *vp, struct prog_src_register *src)
+{
+
+   return MAKE_VSF_SOURCE(t_src_index(vp, src),
+			t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			t_src_class(src->File),
+			src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
+}
+
+static unsigned long t_opcode(enum prog_opcode opcode)
+{
+
+   switch(opcode){
+   case OPCODE_ADD: return R200_VPI_OUT_OP_ADD;
+   /* FIXME: ARL works fine, but negative offsets won't work - fglrx just
+    * seems to ignore neg offsets which isn't quite correct...
+    */
+   case OPCODE_ARL: return R200_VPI_OUT_OP_ARL;
+   case OPCODE_DP4: return R200_VPI_OUT_OP_DOT;
+   case OPCODE_DST: return R200_VPI_OUT_OP_DST;
+   case OPCODE_EX2: return R200_VPI_OUT_OP_EX2;
+   case OPCODE_EXP: return R200_VPI_OUT_OP_EXP;
+   case OPCODE_FRC: return R200_VPI_OUT_OP_FRC;
+   case OPCODE_LG2: return R200_VPI_OUT_OP_LG2;
+   case OPCODE_LIT: return R200_VPI_OUT_OP_LIT;
+   case OPCODE_LOG: return R200_VPI_OUT_OP_LOG;
+   case OPCODE_MAX: return R200_VPI_OUT_OP_MAX;
+   case OPCODE_MIN: return R200_VPI_OUT_OP_MIN;
+   case OPCODE_MUL: return R200_VPI_OUT_OP_MUL;
+   case OPCODE_RCP: return R200_VPI_OUT_OP_RCP;
+   case OPCODE_RSQ: return R200_VPI_OUT_OP_RSQ;
+   case OPCODE_SGE: return R200_VPI_OUT_OP_SGE;
+   case OPCODE_SLT: return R200_VPI_OUT_OP_SLT;
+
+   default: 
+      fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
+   }
+   exit(-1);
+   return 0;
+}
+
+static unsigned long op_operands(enum prog_opcode opcode)
+{
+   int i;
+
+   /* Can we trust mesas opcodes to be in order ? */
+   for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
+      if(op_names[i].opcode == opcode)
+	 return op_names[i].ip;
+
+   fprintf(stderr, "op %d not found in op_names\n", opcode);
+   exit(-1);
+   return 0;
+}
+
+/* TODO: Get rid of t_src_class call */
+#define CMP_SRCS(a, b) (((a.RelAddr != b.RelAddr) || (a.Index != b.Index)) && \
+		       ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
+			 t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
+			(t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
+			 t_src_class(b.File) == VSF_IN_CLASS_ATTR))) \
+
+/* fglrx on rv250 codes up unused sources as follows:
+   unused but necessary sources are same as previous source, zero-ed out.
+   unnecessary sources are same as previous source but with VSF_IN_CLASS_NONE set.
+   i.e. an add (2 args) has its 2nd arg (if you use it as mov) zero-ed out, and 3rd arg
+   set to VSF_IN_CLASS_NONE. Not sure if strictly necessary. */
+
+/* use these simpler definitions. Must obviously not be used with not yet set up regs.
+   Those are NOT semantically equivalent to the r300 ones, requires code changes */
+#define ZERO_SRC_0 (((o_inst->src0 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
+				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
+
+#define ZERO_SRC_1 (((o_inst->src1 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
+				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
+
+#define ZERO_SRC_2 (((o_inst->src2 & ~(0xfff << R200_VPI_IN_X_SHIFT)) \
+				   | ((R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_X_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Y_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_Z_SHIFT) \
+				   | (R200_VPI_IN_SELECT_ZERO << R200_VPI_IN_W_SHIFT))))
+
+#define UNUSED_SRC_0 ((o_inst->src0 & ~15) | 9)
+
+#define UNUSED_SRC_1 ((o_inst->src1 & ~15) | 9)
+
+#define UNUSED_SRC_2 ((o_inst->src2 & ~15) | 9)
+
+
+/**
+ * Generate an R200 vertex program from Mesa's internal representation.
+ *
+ * \return  GL_TRUE for success, GL_FALSE for failure.
+ */
+static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_vertex_program *vp)
+{
+   struct gl_vertex_program *mesa_vp = &vp->mesa_program;
+   struct prog_instruction *vpi;
+   int i;
+   VERTEX_SHADER_INSTRUCTION *o_inst;
+   unsigned long operands;
+   int are_srcs_scalar;
+   unsigned long hw_op;
+   int dofogfix = 0;
+   int fog_temp_i = 0;
+   int free_inputs;
+   int array_count = 0;
+   int u_temp_used;
+
+   vp->native = GL_FALSE;
+   vp->translated = GL_TRUE;
+   vp->fogmode = ctx->Fog.Mode;
+
+   if (mesa_vp->Base.NumInstructions == 0)
+      return GL_FALSE;
+
+#if 0
+   if ((mesa_vp->Base.InputsRead &
+      ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
+      VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
+      VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) != 0) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+	 fprintf(stderr, "can't handle vert prog inputs 0x%x\n",
+	    mesa_vp->Base.InputsRead);
+      }
+      return GL_FALSE;
+   }
+#endif
+
+   if ((mesa_vp->Base.OutputsWritten &
+      ~((1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_COL0) | (1 << VERT_RESULT_COL1) |
+      (1 << VERT_RESULT_FOGC) | (1 << VERT_RESULT_TEX0) | (1 << VERT_RESULT_TEX1) |
+      (1 << VERT_RESULT_TEX2) | (1 << VERT_RESULT_TEX3) | (1 << VERT_RESULT_TEX4) |
+      (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+	 fprintf(stderr, "can't handle vert prog outputs 0x%llx\n",
+	    mesa_vp->Base.OutputsWritten);
+      }
+      return GL_FALSE;
+   }
+
+   if (mesa_vp->IsNVProgram) {
+   /* subtle differences in spec like guaranteed initialized regs could cause
+      headaches. Might want to remove the driconf option to enable it completely */
+      return GL_FALSE;
+   }
+   /* Initial value should be last tmp reg that hw supports.
+      Strangely enough r300 doesnt mind even though these would be out of range.
+      Smart enough to realize that it doesnt need it? */
+   int u_temp_i = R200_VSF_MAX_TEMPS - 1;
+   struct prog_src_register src[3];
+   struct prog_dst_register dst;
+
+/* FIXME: is changing the prog safe to do here? */
+   if (mesa_vp->IsPositionInvariant &&
+      /* make sure we only do this once */
+       !(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
+	 _mesa_insert_mvp_code(ctx, mesa_vp);
+      }
+
+   /* for fogc, can't change mesa_vp, as it would hose swtnl, and exp with
+      base e isn't directly available neither. */
+   if ((mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) && !vp->fogpidx) {
+      struct gl_program_parameter_list *paramList;
+      gl_state_index tokens[STATE_LENGTH] = { STATE_FOG_PARAMS, 0, 0, 0, 0 };
+      paramList = mesa_vp->Base.Parameters;
+      vp->fogpidx = _mesa_add_state_reference(paramList, tokens);
+   }
+
+   vp->pos_end = 0;
+   mesa_vp->Base.NumNativeInstructions = 0;
+   if (mesa_vp->Base.Parameters)
+      mesa_vp->Base.NumNativeParameters = mesa_vp->Base.Parameters->NumParameters;
+   else
+      mesa_vp->Base.NumNativeParameters = 0;
+
+   for(i = 0; i < VERT_ATTRIB_MAX; i++)
+      vp->inputs[i] = -1;
+   for(i = 0; i < 15; i++)
+      vp->inputmap_rev[i] = 255;
+   free_inputs = 0x2ffd;
+
+/* fglrx uses fixed inputs as follows for conventional attribs.
+   generic attribs use non-fixed assignment, fglrx will always use the
+   lowest attrib values available. We'll just do the same.
+   There are 12 generic attribs possible, corresponding to attrib 0, 2-11
+   and 13 in a hw vertex prog.
+   attr 1 and 12 aren't used for generic attribs as those cannot be made vec4
+   (correspond to vertex normal/weight - maybe weight actually could be made vec4).
+   Additionally, not more than 12 arrays in total are possible I think.
+   attr 0 is pos, R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0
+   attr 2-5 use colors 0-3 (R200_VTX_FP_RGBA << R200_VTX_COLOR_0/1/2/3_SHIFT in R200_SE_VTX_FMT_0)
+   attr 6-11 use tex 0-5 (4 << R200_VTX_TEX0/1/2/3/4/5_COMP_CNT_SHIFT in R200_SE_VTX_FMT_1)
+   attr 13 uses vtx1 pos (R200_VTX_XY1|R200_VTX_Z1|R200_VTX_W1 in R200_SE_VTX_FMT_0)
+*/
+
+/* attr 4,5 and 13 are only used with generic attribs.
+   Haven't seen attr 14 used, maybe that's for the hw pointsize vec1 (which is
+   not possibe to use with vertex progs as it is lacking in vert prog specification) */
+/* may look different when using idx buf / input_route instead of se_vtx_fmt? */
+   if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
+      vp->inputs[VERT_ATTRIB_POS] = 0;
+      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
+      free_inputs &= ~(1 << 0);
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
+      vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
+      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
+      vp->inputs[VERT_ATTRIB_NORMAL] = 1;
+      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
+      vp->inputs[VERT_ATTRIB_COLOR0] = 2;
+      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
+      free_inputs &= ~(1 << 2);
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
+      vp->inputs[VERT_ATTRIB_COLOR1] = 3;
+      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
+      free_inputs &= ~(1 << 3);
+      array_count++;
+   }
+   if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
+      vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
+      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
+      array_count++;
+   }
+   for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
+      if (mesa_vp->Base.InputsRead & (1 << i)) {
+	 vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
+	 vp->inputmap_rev[8 + i - VERT_ATTRIB_TEX0] = i;
+	 free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
+	 array_count++;
+      }
+   }
+   /* using VERT_ATTRIB_TEX6/7 would be illegal */
+   /* completely ignore aliasing? */
+   for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
+      int j;
+   /* completely ignore aliasing? */
+      if (mesa_vp->Base.InputsRead & (1 << i)) {
+	 array_count++;
+	 if (array_count > 12) {
+	    if (R200_DEBUG & RADEON_FALLBACKS) {
+	       fprintf(stderr, "more than 12 attribs used in vert prog\n");
+	    }
+	    return GL_FALSE;
+	 }
+	 for (j = 0; j < 14; j++) {
+	    /* will always find one due to limited array_count */
+	    if (free_inputs & (1 << j)) {
+	       free_inputs &= ~(1 << j);
+	       vp->inputs[i] = j;
+	       if (j == 0) vp->inputmap_rev[j] = i; /* mapped to pos */
+	       else if (j < 12) vp->inputmap_rev[j + 2] = i; /* mapped to col/tex */
+	       else vp->inputmap_rev[j + 1] = i; /* mapped to pos1 */
+	       break;
+	    }
+	 }
+      }
+   }
+
+   if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+	 fprintf(stderr, "can't handle vert prog without position output\n");
+      }
+      return GL_FALSE;
+   }
+   if (free_inputs & 1) {
+      if (R200_DEBUG & RADEON_FALLBACKS) {
+	 fprintf(stderr, "can't handle vert prog without position input\n");
+      }
+      return GL_FALSE;
+   }
+
+   o_inst = vp->instr;
+   for (vpi = mesa_vp->Base.Instructions; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
+      operands = op_operands(vpi->Opcode);
+      are_srcs_scalar = operands & SCALAR_FLAG;
+      operands &= OP_MASK;
+
+      for(i = 0; i < operands; i++) {
+	 src[i] = vpi->SrcReg[i];
+	 /* hack up default attrib values as per spec as swizzling.
+	    normal, fog, secondary color. Crazy?
+	    May need more if we don't submit vec4 elements? */
+	 if (src[i].File == PROGRAM_INPUT) {
+	    if (src[i].Index == VERT_ATTRIB_NORMAL) {
+	       int j;
+	       for (j = 0; j < 4; j++) {
+		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
+		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
+		  }
+	       }
+	    }
+	    else if (src[i].Index == VERT_ATTRIB_COLOR1) {
+	       int j;
+	       for (j = 0; j < 4; j++) {
+		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
+		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
+		  }
+	       }
+	    }
+	    else if (src[i].Index == VERT_ATTRIB_FOG) {
+	       int j;
+	       for (j = 0; j < 4; j++) {
+		  if (GET_SWZ(src[i].Swizzle, j) == SWIZZLE_W) {
+		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+		     src[i].Swizzle |= SWIZZLE_ONE << (j*3);
+		  }
+		  else if ((GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Y) ||
+			    GET_SWZ(src[i].Swizzle, j) == SWIZZLE_Z) {
+		     src[i].Swizzle &= ~(SWIZZLE_W << (j*3));
+		     src[i].Swizzle |= SWIZZLE_ZERO << (j*3);
+		  }
+	       }
+	    }
+	 }
+      }
+
+      if(operands == 3){
+	 if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+		VSF_FLAG_ALL);
+
+	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
+		  SWIZZLE_X, SWIZZLE_Y,
+		  SWIZZLE_Z, SWIZZLE_W,
+		  t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
+
+	    o_inst->src1 = ZERO_SRC_0;
+	    o_inst->src2 = UNUSED_SRC_1;
+	    o_inst++;
+
+	    src[2].File = PROGRAM_TEMPORARY;
+	    src[2].Index = u_temp_i;
+	    src[2].RelAddr = 0;
+	    u_temp_i--;
+	 }
+      }
+
+      if(operands >= 2){
+	 if( CMP_SRCS(src[1], src[0]) ){
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+		(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+		VSF_FLAG_ALL);
+
+	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		  SWIZZLE_X, SWIZZLE_Y,
+		  SWIZZLE_Z, SWIZZLE_W,
+		  t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+
+	    o_inst->src1 = ZERO_SRC_0;
+	    o_inst->src2 = UNUSED_SRC_1;
+	    o_inst++;
+
+	    src[0].File = PROGRAM_TEMPORARY;
+	    src[0].Index = u_temp_i;
+	    src[0].RelAddr = 0;
+	    u_temp_i--;
+	 }
+      }
+
+      dst = vpi->DstReg;
+      if (dst.File == PROGRAM_OUTPUT &&
+	  dst.Index == VERT_RESULT_FOGC &&
+	  dst.WriteMask & WRITEMASK_X) {
+	  fog_temp_i = u_temp_i;
+	  dst.File = PROGRAM_TEMPORARY;
+	  dst.Index = fog_temp_i;
+	  dofogfix = 1;
+	  u_temp_i--;
+      }
+
+      /* These ops need special handling. */
+      switch(vpi->Opcode){
+      case OPCODE_POW:
+/* pow takes only one argument, first scalar is in slot x, 2nd in slot z (other slots don't matter).
+   So may need to insert additional instruction */
+	 if ((src[0].File == src[1].File) &&
+	     (src[0].Index == src[1].Index)) {
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
+		   t_dst_mask(dst.WriteMask));
+	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+		   SWIZZLE_ZERO,
+		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+		   SWIZZLE_ZERO,
+		   t_src_class(src[0].File),
+		   src[0].Negate) | (src[0].RelAddr << 4);
+	    o_inst->src1 = UNUSED_SRC_0;
+	    o_inst->src2 = UNUSED_SRC_0;
+	 }
+	 else {
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+		   (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+		   VSF_FLAG_ALL);
+	    o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		   t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+		   SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO,
+		   t_src_class(src[0].File),
+		   src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+	    o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+		   SWIZZLE_ZERO, SWIZZLE_ZERO,
+		   t_swizzle(GET_SWZ(src[1].Swizzle, 0)), SWIZZLE_ZERO,
+		   t_src_class(src[1].File),
+		   src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+	    o_inst->src2 = UNUSED_SRC_1;
+	    o_inst++;
+
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_POW, t_dst(&dst),
+		   t_dst_mask(dst.WriteMask));
+	    o_inst->src0 = MAKE_VSF_SOURCE(u_temp_i,
+		   VSF_IN_COMPONENT_X,
+		   VSF_IN_COMPONENT_Y,
+		   VSF_IN_COMPONENT_Z,
+		   VSF_IN_COMPONENT_W,
+		   VSF_IN_CLASS_TMP,
+		   VSF_FLAG_NONE);
+	    o_inst->src1 = UNUSED_SRC_0;
+	    o_inst->src2 = UNUSED_SRC_0;
+	    u_temp_i--;
+	 }
+	 goto next;
+
+      case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} 
+      case OPCODE_SWZ:
+	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
+		t_dst_mask(dst.WriteMask));
+	 o_inst->src0 = t_src(vp, &src[0]);
+	 o_inst->src1 = ZERO_SRC_0;
+	 o_inst->src2 = UNUSED_SRC_1;
+	 goto next;
+
+      case OPCODE_MAD:
+	 /* only 2 read ports into temp memory thus may need the macro op MAD_2
+	    instead (requiring 2 clocks) if all inputs are in temp memory
+	    (and, only if they actually reference 3 distinct temps) */
+	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+	    src[1].File == PROGRAM_TEMPORARY &&
+	    src[2].File == PROGRAM_TEMPORARY &&
+	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
+	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
+	    (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
+	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
+
+	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
+	    t_dst_mask(dst.WriteMask));
+	 o_inst->src0 = t_src(vp, &src[0]);
+#if 0
+if ((o_inst - vp->instr) == 31) {
+/* fix up the broken vertex program of quake4 demo... */
+o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+			SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X,
+			t_src_class(src[1].File),
+			src[1].Negate) | (src[1].RelAddr << 4);
+o_inst->src2 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+			SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y,
+			t_src_class(src[1].File),
+			src[1].Negate) | (src[1].RelAddr << 4);
+}
+else {
+	 o_inst->src1 = t_src(vp, &src[1]);
+	 o_inst->src2 = t_src(vp, &src[2]);
+}
+#else
+	 o_inst->src1 = t_src(vp, &src[1]);
+	 o_inst->src2 = t_src(vp, &src[2]);
+#endif
+	 goto next;
+
+      case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} 
+	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
+		t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+		SWIZZLE_ZERO,
+		t_src_class(src[0].File),
+		src[0].Negate) | (src[0].RelAddr << 4);
+
+	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+		SWIZZLE_ZERO,
+		t_src_class(src[1].File),
+		src[1].Negate) | (src[1].RelAddr << 4);
+
+	 o_inst->src2 = UNUSED_SRC_1;
+	 goto next;
+
+      case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} 
+	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_DOT, t_dst(&dst),
+		t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+		VSF_IN_COMPONENT_ONE,
+		t_src_class(src[0].File),
+		src[0].Negate) | (src[0].RelAddr << 4);
+	 o_inst->src1 = t_src(vp, &src[1]);
+	 o_inst->src2 = UNUSED_SRC_1;
+	 goto next;
+
+      case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
+		t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0 = t_src(vp, &src[0]);
+	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+		t_src_class(src[1].File),
+		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+	 o_inst->src2 = UNUSED_SRC_1;
+	 goto next;
+
+      case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+	 o_inst->op=MAKE_VSF_OP(R200_VPI_OUT_OP_MAX, t_dst(&dst),
+		t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0=t_src(vp, &src[0]);
+	 o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+		t_src_class(src[0].File),
+		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+	 o_inst->src2 = UNUSED_SRC_1;
+	 goto next;
+
+      case OPCODE_FLR:
+      /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} 
+         ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+
+	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_FRC,
+	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+	    t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0 = t_src(vp, &src[0]);
+	 o_inst->src1 = UNUSED_SRC_0;
+	 o_inst->src2 = UNUSED_SRC_1;
+	 o_inst++;
+
+	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD, t_dst(&dst),
+		t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0 = t_src(vp, &src[0]);
+	 o_inst->src1 = MAKE_VSF_SOURCE(u_temp_i,
+		VSF_IN_COMPONENT_X,
+		VSF_IN_COMPONENT_Y,
+		VSF_IN_COMPONENT_Z,
+		VSF_IN_COMPONENT_W,
+		VSF_IN_CLASS_TMP,
+		/* Not 100% sure about this */
+		(!src[0].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
+
+	 o_inst->src2 = UNUSED_SRC_0;
+	 u_temp_i--;
+	 goto next;
+
+      case OPCODE_XPD:
+	 /* mul r0, r1.yzxw, r2.zxyw
+	    mad r0, -r2.yzxw, r1.zxyw, r0
+	  */
+	 hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+	    src[1].File == PROGRAM_TEMPORARY &&
+	    (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
+	    R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
+
+	 o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+	    (u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+	    t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+		t_src_class(src[0].File),
+		src[0].Negate) | (src[0].RelAddr << 4);
+
+	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+		t_src_class(src[1].File),
+		src[1].Negate) | (src[1].RelAddr << 4);
+
+	 o_inst->src2 = UNUSED_SRC_1;
+	 o_inst++;
+	 u_temp_i--;
+
+	 o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
+		t_dst_mask(dst.WriteMask));
+
+	 o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+		t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+		t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+		t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+		t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+		t_src_class(src[1].File),
+		(!src[1].Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+
+	 o_inst->src1 = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+		t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+		t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+		t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+		t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+		t_src_class(src[0].File),
+		src[0].Negate) | (src[0].RelAddr << 4);
+
+	 o_inst->src2 = MAKE_VSF_SOURCE(u_temp_i+1,
+		VSF_IN_COMPONENT_X,
+		VSF_IN_COMPONENT_Y,
+		VSF_IN_COMPONENT_Z,
+		VSF_IN_COMPONENT_W,
+		VSF_IN_CLASS_TMP,
+		VSF_FLAG_NONE);
+	 goto next;
+
+      case OPCODE_END:
+	 assert(0);
+      default:
+	 break;
+      }
+
+      o_inst->op = MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst(&dst),
+	    t_dst_mask(dst.WriteMask));
+
+      if(are_srcs_scalar){
+	 switch(operands){
+	    case 1:
+		o_inst->src0 = t_src_scalar(vp, &src[0]);
+		o_inst->src1 = UNUSED_SRC_0;
+		o_inst->src2 = UNUSED_SRC_1;
+	    break;
+
+	    case 2:
+		o_inst->src0 = t_src_scalar(vp, &src[0]);
+		o_inst->src1 = t_src_scalar(vp, &src[1]);
+		o_inst->src2 = UNUSED_SRC_1;
+	    break;
+
+	    case 3:
+		o_inst->src0 = t_src_scalar(vp, &src[0]);
+		o_inst->src1 = t_src_scalar(vp, &src[1]);
+		o_inst->src2 = t_src_scalar(vp, &src[2]);
+	    break;
+
+	    default:
+		fprintf(stderr, "illegal number of operands %lu\n", operands);
+		exit(-1);
+	    break;
+	 }
+      } else {
+	 switch(operands){
+	    case 1:
+		o_inst->src0 = t_src(vp, &src[0]);
+		o_inst->src1 = UNUSED_SRC_0;
+		o_inst->src2 = UNUSED_SRC_1;
+	    break;
+
+	    case 2:
+		o_inst->src0 = t_src(vp, &src[0]);
+		o_inst->src1 = t_src(vp, &src[1]);
+		o_inst->src2 = UNUSED_SRC_1;
+	    break;
+
+	    case 3:
+		o_inst->src0 = t_src(vp, &src[0]);
+		o_inst->src1 = t_src(vp, &src[1]);
+		o_inst->src2 = t_src(vp, &src[2]);
+	    break;
+
+	    default:
+		fprintf(stderr, "illegal number of operands %lu\n", operands);
+		exit(-1);
+	    break;
+	 }
+      }
+      next:
+
+      if (dofogfix) {
+	 o_inst++;
+	 if (vp->fogmode == GL_EXP) {
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+		VSF_FLAG_X);
+	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
+	    o_inst->src2 = UNUSED_SRC_1;
+	    o_inst++;
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
+		R200_VSF_OUT_CLASS_RESULT_FOGC,
+		VSF_FLAG_X);
+	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
+	    o_inst->src1 = UNUSED_SRC_0;
+	    o_inst->src2 = UNUSED_SRC_1;
+	 }
+	 else if (vp->fogmode == GL_EXP2) {
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+		VSF_FLAG_X);
+	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, X, X, X, X, PARAM, NONE);
+	    o_inst->src2 = UNUSED_SRC_1;
+	    o_inst++;
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+		VSF_FLAG_X);
+	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+	    o_inst->src1 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+	    o_inst->src2 = UNUSED_SRC_1;
+	    o_inst++;
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_EXP_E,
+		R200_VSF_OUT_CLASS_RESULT_FOGC,
+		VSF_FLAG_X);
+	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
+	    o_inst->src1 = UNUSED_SRC_0;
+	    o_inst->src2 = UNUSED_SRC_1;
+	 }
+	 else { /* fogmode == GL_LINEAR */
+		/* could do that with single op (dot) if using params like
+		   with fixed function pipeline fog */
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_ADD,
+		(fog_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
+		VSF_FLAG_X);
+	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, ALL);
+	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, Z, Z, Z, Z, PARAM, NONE);
+	    o_inst->src2 = UNUSED_SRC_1;
+	    o_inst++;
+	    o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
+		R200_VSF_OUT_CLASS_RESULT_FOGC,
+		VSF_FLAG_X);
+	    o_inst->src0 = EASY_VSF_SOURCE(fog_temp_i, X, X, X, X, TMP, NONE);
+	    o_inst->src1 = EASY_VSF_SOURCE(vp->fogpidx, W, W, W, W, PARAM, NONE);
+	    o_inst->src2 = UNUSED_SRC_1;
+
+	 }
+         dofogfix = 0;
+      }
+
+      u_temp_used = (R200_VSF_MAX_TEMPS - 1) - u_temp_i;
+      if (mesa_vp->Base.NumNativeTemporaries <
+	 (mesa_vp->Base.NumTemporaries + u_temp_used)) {
+	 mesa_vp->Base.NumNativeTemporaries =
+	    mesa_vp->Base.NumTemporaries + u_temp_used;
+      }
+      if ((mesa_vp->Base.NumTemporaries + u_temp_used) > R200_VSF_MAX_TEMPS) {
+	 if (R200_DEBUG & RADEON_FALLBACKS) {
+	    fprintf(stderr, "Ran out of temps, num temps %d, us %d\n", mesa_vp->Base.NumTemporaries, u_temp_used);
+	 }
+	 return GL_FALSE;
+      }
+      u_temp_i = R200_VSF_MAX_TEMPS - 1;
+      if(o_inst - vp->instr >= R200_VSF_MAX_INST) {
+	 mesa_vp->Base.NumNativeInstructions = 129;
+	 if (R200_DEBUG & RADEON_FALLBACKS) {
+	    fprintf(stderr, "more than 128 native instructions\n");
+	 }
+	 return GL_FALSE;
+      }
+      if ((o_inst->op & R200_VSF_OUT_CLASS_MASK) == R200_VSF_OUT_CLASS_RESULT_POS) {
+	 vp->pos_end = (o_inst - vp->instr);
+      }
+   }
+
+   vp->native = GL_TRUE;
+   mesa_vp->Base.NumNativeInstructions = (o_inst - vp->instr);
+#if 0
+   fprintf(stderr, "hw program:\n");
+   for(i=0; i < vp->program.length; i++)
+      fprintf(stderr, "%08x\n", vp->instr[i]);
+#endif
+   return GL_TRUE;
+}
+
+void r200SetupVertexProg( GLcontext *ctx ) {
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+   struct r200_vertex_program *vp = (struct r200_vertex_program *)ctx->VertexProgram.Current;
+   GLboolean fallback;
+   GLint i;
+
+   if (!vp->translated || (ctx->Fog.Enabled && ctx->Fog.Mode != vp->fogmode)) {
+      rmesa->curr_vp_hw = NULL;
+      r200_translate_vertex_program(ctx, vp);
+   }
+   /* could optimize setting up vertex progs away for non-tcl hw */
+   fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) &&
+      rmesa->radeon.radeonScreen->drmSupportsVertexProgram);
+   TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback);
+   if (rmesa->radeon.TclFallback) return;
+
+   R200_STATECHANGE( rmesa, vap );
+   /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it?
+             maybe only when using more than 64 inst / 96 param? */
+   rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_PROG_VTX_SHADER_ENABLE /*| R200_VAP_SINGLE_BUF_STATE_ENABLE*/;
+
+   R200_STATECHANGE( rmesa, pvs );
+
+   rmesa->hw.pvs.cmd[PVS_CNTL_1] = (0 << R200_PVS_CNTL_1_PROGRAM_START_SHIFT) |
+      ((vp->mesa_program.Base.NumNativeInstructions - 1) << R200_PVS_CNTL_1_PROGRAM_END_SHIFT) |
+      (vp->pos_end << R200_PVS_CNTL_1_POS_END_SHIFT);
+   rmesa->hw.pvs.cmd[PVS_CNTL_2] = (0 << R200_PVS_CNTL_2_PARAM_OFFSET_SHIFT) |
+      (vp->mesa_program.Base.NumNativeParameters << R200_PVS_CNTL_2_PARAM_COUNT_SHIFT);
+
+   /* maybe user clip planes just work with vertex progs... untested */
+   if (ctx->Transform.ClipPlanesEnabled) {
+      R200_STATECHANGE( rmesa, tcl );
+      if (vp->mesa_program.IsPositionInvariant) {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (ctx->Transform.ClipPlanesEnabled << 2);
+      }
+      else {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(0xfc);
+      }
+   }
+
+   if (vp != rmesa->curr_vp_hw) {
+      GLuint count = vp->mesa_program.Base.NumNativeInstructions;
+      drm_radeon_cmd_header_t tmp;
+
+      R200_STATECHANGE( rmesa, vpi[0] );
+      R200_STATECHANGE( rmesa, vpi[1] );
+
+      /* FIXME: what about using a memcopy... */
+      for (i = 0; (i < 64) && i < count; i++) {
+	 rmesa->hw.vpi[0].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i].op;
+	 rmesa->hw.vpi[0].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i].src0;
+	 rmesa->hw.vpi[0].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i].src1;
+	 rmesa->hw.vpi[0].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i].src2;
+      }
+      /* hack up the cmd_size so not the whole state atom is emitted always.
+         This may require some more thought, we may emit half progs on lost state, but
+         hopefully it won't matter?
+         WARNING: must not use R200_DB_STATECHANGE, this will produce bogus (and rejected)
+         packet emits (due to the mismatched cmd_size and count in cmd/last_cmd) */
+      rmesa->hw.vpi[0].cmd_size = 1 + 4 * ((count > 64) ? 64 : count);
+      tmp.i = rmesa->hw.vpi[0].cmd[VPI_CMD_0];
+      tmp.veclinear.count = (count > 64) ? 64 : count;
+      rmesa->hw.vpi[0].cmd[VPI_CMD_0] = tmp.i;
+      if (count > 64) {
+	 for (i = 0; i < (count - 64); i++) {
+	    rmesa->hw.vpi[1].cmd[VPI_OPDST_0 + 4 * i] = vp->instr[i + 64].op;
+	    rmesa->hw.vpi[1].cmd[VPI_SRC0_0 + 4 * i] = vp->instr[i + 64].src0;
+	    rmesa->hw.vpi[1].cmd[VPI_SRC1_0 + 4 * i] = vp->instr[i + 64].src1;
+	    rmesa->hw.vpi[1].cmd[VPI_SRC2_0 + 4 * i] = vp->instr[i + 64].src2;
+	 }
+	 rmesa->hw.vpi[1].cmd_size = 1 + 4 * (count - 64);
+	 tmp.i = rmesa->hw.vpi[1].cmd[VPI_CMD_0];
+	 tmp.veclinear.count = count - 64;
+	 rmesa->hw.vpi[1].cmd[VPI_CMD_0] = tmp.i;
+      }
+      rmesa->curr_vp_hw = vp;
+   }
+}
+
+
+static void
+r200BindProgram(GLcontext *ctx, GLenum target, struct gl_program *prog)
+{
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   switch(target){
+   case GL_VERTEX_PROGRAM_ARB:
+      rmesa->curr_vp_hw = NULL;
+      break;
+   default:
+      _mesa_problem(ctx, "Target not supported yet!");
+      break;
+   }
+}
+
+static struct gl_program *
+r200NewProgram(GLcontext *ctx, GLenum target, GLuint id)
+{
+   struct r200_vertex_program *vp;
+
+   switch(target){
+   case GL_VERTEX_PROGRAM_ARB:
+      vp = CALLOC_STRUCT(r200_vertex_program);
+      return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
+   case GL_FRAGMENT_PROGRAM_ARB:
+   case GL_FRAGMENT_PROGRAM_NV:
+      return _mesa_init_fragment_program( ctx, CALLOC_STRUCT(gl_fragment_program), target, id );
+   default:
+      _mesa_problem(ctx, "Bad target in r200NewProgram");
+   }
+   return NULL;	
+}
+
+
+static void
+r200DeleteProgram(GLcontext *ctx, struct gl_program *prog)
+{
+   _mesa_delete_program(ctx, prog);
+}
+
+static GLboolean
+r200ProgramStringNotify(GLcontext *ctx, GLenum target, struct gl_program *prog)
+{
+   struct r200_vertex_program *vp = (void *)prog;
+   r200ContextPtr rmesa = R200_CONTEXT(ctx);
+
+   switch(target) {
+   case GL_VERTEX_PROGRAM_ARB:
+      vp->translated = GL_FALSE;
+      vp->fogpidx = 0;
+/*      memset(&vp->translated, 0, sizeof(struct r200_vertex_program) - sizeof(struct gl_vertex_program));*/
+      r200_translate_vertex_program(ctx, vp);
+      rmesa->curr_vp_hw = NULL;
+      break;
+   case GL_FRAGMENT_SHADER_ATI:
+      rmesa->afs_loaded = NULL;
+      break;
+   }
+   /* need this for tcl fallbacks */
+   (void) _tnl_program_string(ctx, target, prog);
+
+   /* XXX check if program is legal, within limits */
+   return GL_TRUE;
+}
+
+static GLboolean
+r200IsProgramNative(GLcontext *ctx, GLenum target, struct gl_program *prog)
+{
+   struct r200_vertex_program *vp = (void *)prog;
+
+   switch(target){
+   case GL_VERTEX_STATE_PROGRAM_NV:
+   case GL_VERTEX_PROGRAM_ARB:
+      if (!vp->translated) {
+	 r200_translate_vertex_program(ctx, vp);
+      }
+     /* does not take parameters etc. into account */
+      return vp->native;
+   default:
+      _mesa_problem(ctx, "Bad target in r200NewProgram");
+   }
+   return 0;
+}
+
+void r200InitShaderFuncs(struct dd_function_table *functions)
+{
+   functions->NewProgram = r200NewProgram;
+   functions->BindProgram = r200BindProgram;
+   functions->DeleteProgram = r200DeleteProgram;
+   functions->ProgramStringNotify = r200ProgramStringNotify;
+   functions->IsProgramNative = r200IsProgramNative;
+}
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.h b/src/mesa/drivers/dri/r200/r200_vertprog.h
new file mode 100644
index 0000000000..938237680c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.h
@@ -0,0 +1,163 @@
+#ifndef __VERTEX_SHADER_H__
+#define __VERTEX_SHADER_H__
+
+#include "r200_reg.h"
+
+typedef struct {
+   uint32_t op;
+   uint32_t src0;
+   uint32_t src1;
+   uint32_t src2;
+} VERTEX_SHADER_INSTRUCTION;
+
+extern void r200InitShaderFuncs(struct dd_function_table *functions);
+extern void r200SetupVertexProg( GLcontext *ctx );
+
+#define VSF_FLAG_X	1
+#define VSF_FLAG_Y	2
+#define VSF_FLAG_Z	4
+#define VSF_FLAG_W	8
+#define VSF_FLAG_XYZ	(VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z)
+#define VSF_FLAG_ALL	0xf
+#define VSF_FLAG_NONE	0
+
+#define R200_VSF_MAX_INST	128
+#define R200_VSF_MAX_PARAM	192
+#define R200_VSF_MAX_TEMPS	12
+
+#define R200_VPI_OUT_REG_INDEX_SHIFT            13
+#define R200_VPI_OUT_REG_INDEX_MASK             (31 << 13) /* GUESS based on fglrx native limits */
+
+#define R200_VPI_OUT_WRITE_X                    (1 << 20)
+#define R200_VPI_OUT_WRITE_Y                    (1 << 21)
+#define R200_VPI_OUT_WRITE_Z                    (1 << 22)
+#define R200_VPI_OUT_WRITE_W                    (1 << 23)
+
+#define R200_VPI_IN_REG_CLASS_TEMPORARY         (0 << 0)
+#define R200_VPI_IN_REG_CLASS_ATTRIBUTE         (1 << 0)
+#define R200_VPI_IN_REG_CLASS_PARAMETER         (2 << 0)
+#define R200_VPI_IN_REG_CLASS_NONE              (9 << 0)
+#define R200_VPI_IN_REG_CLASS_MASK              (31 << 0) /* GUESS */
+
+#define R200_VPI_IN_REG_INDEX_SHIFT             5
+#define R200_VPI_IN_REG_INDEX_MASK              (255 << 5) /* GUESS based on fglrx native limits */
+
+/* The R200 can select components from the input register arbitrarily.
+// Use the following constants, shifted by the component shift you
+// want to select */
+#define R200_VPI_IN_SELECT_X    0
+#define R200_VPI_IN_SELECT_Y    1
+#define R200_VPI_IN_SELECT_Z    2
+#define R200_VPI_IN_SELECT_W    3
+#define R200_VPI_IN_SELECT_ZERO 4
+#define R200_VPI_IN_SELECT_ONE  5
+#define R200_VPI_IN_SELECT_MASK 7
+
+#define R200_VPI_IN_X_SHIFT                     13
+#define R200_VPI_IN_Y_SHIFT                     16
+#define R200_VPI_IN_Z_SHIFT                     19
+#define R200_VPI_IN_W_SHIFT                     22
+
+#define R200_VPI_IN_NEG_X                       (1 << 25)
+#define R200_VPI_IN_NEG_Y                       (1 << 26)
+#define R200_VPI_IN_NEG_Z                       (1 << 27)
+#define R200_VPI_IN_NEG_W                       (1 << 28)
+
+#define R200_VSF_OUT_CLASS_TMP			(0 << 8)
+#define R200_VSF_OUT_CLASS_ADDR			(3 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_POS		(4 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_COLOR		(5 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_TEXC		(6 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_FOGC		(7 << 8)
+#define R200_VSF_OUT_CLASS_RESULT_POINTSIZE	(8 << 8)
+#define R200_VSF_OUT_CLASS_MASK			(31 << 8)
+
+/* opcodes - they all are the same as on r300 it seems, however
+   LIT and POW require different setup */
+#define R200_VPI_OUT_OP_DOT                     (1 << 0)
+#define R200_VPI_OUT_OP_MUL                     (2 << 0)
+#define R200_VPI_OUT_OP_ADD                     (3 << 0)
+#define R200_VPI_OUT_OP_MAD                     (4 << 0)
+#define R200_VPI_OUT_OP_DST                     (5 << 0)
+#define R200_VPI_OUT_OP_FRC                     (6 << 0)
+#define R200_VPI_OUT_OP_MAX                     (7 << 0)
+#define R200_VPI_OUT_OP_MIN                     (8 << 0)
+#define R200_VPI_OUT_OP_SGE                     (9 << 0)
+#define R200_VPI_OUT_OP_SLT                     (10 << 0)
+
+#define R200_VPI_OUT_OP_ARL                     (13 << 0)
+
+#define R200_VPI_OUT_OP_EXP                     (65 << 0)
+#define R200_VPI_OUT_OP_LOG                     (66 << 0)
+/* base e exp. Useful for fog. */
+#define R200_VPI_OUT_OP_EXP_E                   (67 << 0)
+
+#define R200_VPI_OUT_OP_LIT                     (68 << 0)
+#define R200_VPI_OUT_OP_POW                     (69 << 0)
+#define R200_VPI_OUT_OP_RCP                     (70 << 0)
+#define R200_VPI_OUT_OP_RSQ                     (72 << 0)
+
+#define R200_VPI_OUT_OP_EX2                     (75 << 0)
+#define R200_VPI_OUT_OP_LG2                     (76 << 0)
+
+#define R200_VPI_OUT_OP_MAD_2                   (128 << 0)
+
+/* first CARD32 of an instruction */
+
+/* possible operations: 
+    DOT, MUL, ADD, MAD, FRC, MAX, MIN, SGE, SLT, EXP, LOG, LIT, POW, RCP, RSQ, EX2,
+    LG2, MAD_2, ARL */
+
+#define MAKE_VSF_OP(op, out_reg, out_reg_fields) \
+   ((op) | (out_reg) | ((out_reg_fields) << 20) )
+
+#define VSF_IN_CLASS_TMP	0
+#define VSF_IN_CLASS_ATTR	1
+#define VSF_IN_CLASS_PARAM	2
+#define VSF_IN_CLASS_NONE	9
+
+#define VSF_IN_COMPONENT_X	0
+#define VSF_IN_COMPONENT_Y	1
+#define VSF_IN_COMPONENT_Z	2
+#define VSF_IN_COMPONENT_W	3
+#define VSF_IN_COMPONENT_ZERO	4
+#define VSF_IN_COMPONENT_ONE	5
+
+#define MAKE_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
+	( ((in_reg_index)<<R200_VPI_IN_REG_INDEX_SHIFT) \
+	   | ((comp_x)<<R200_VPI_IN_X_SHIFT) \
+	   | ((comp_y)<<R200_VPI_IN_Y_SHIFT) \
+	   | ((comp_z)<<R200_VPI_IN_Z_SHIFT) \
+	   | ((comp_w)<<R200_VPI_IN_W_SHIFT) \
+	   | ((negate)<<25) | ((class)))
+
+#define EASY_VSF_SOURCE(in_reg_index, comp_x, comp_y, comp_z, comp_w, class, negate) \
+	MAKE_VSF_SOURCE(in_reg_index, \
+		VSF_IN_COMPONENT_##comp_x, \
+		VSF_IN_COMPONENT_##comp_y, \
+		VSF_IN_COMPONENT_##comp_z, \
+		VSF_IN_COMPONENT_##comp_w, \
+		VSF_IN_CLASS_##class, VSF_FLAG_##negate)
+
+/* special sources: */
+
+/* (1.0,1.0,1.0,1.0) vector (ATTR, plain ) */
+#define VSF_ATTR_UNITY(reg) 	EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, ATTR, NONE)
+#define VSF_UNITY(reg) 	EASY_VSF_SOURCE(reg, ONE, ONE, ONE, ONE, NONE, NONE)
+
+/* contents of unmodified register */
+#define VSF_REG(reg) 	EASY_VSF_SOURCE(reg, X, Y, Z, W, ATTR, NONE)
+
+/* contents of unmodified parameter */
+#define VSF_PARAM(reg) 	EASY_VSF_SOURCE(reg, X, Y, Z, W, PARAM, NONE)
+
+/* contents of unmodified temporary register */
+#define VSF_TMP(reg) 	EASY_VSF_SOURCE(reg, X, Y, Z, W, TMP, NONE)
+
+/* components of ATTR register */
+#define VSF_ATTR_X(reg) EASY_VSF_SOURCE(reg, X, X, X, X, ATTR, NONE)
+#define VSF_ATTR_Y(reg) EASY_VSF_SOURCE(reg, Y, Y, Y, Y, ATTR, NONE)
+#define VSF_ATTR_Z(reg) EASY_VSF_SOURCE(reg, Z, Z, Z, Z, ATTR, NONE)
+#define VSF_ATTR_W(reg) EASY_VSF_SOURCE(reg, W, W, W, W, ATTR, NONE)
+
+#endif
diff --git a/src/mesa/drivers/dri/r200/radeon_bo.c b/src/mesa/drivers/dri/r200/radeon_bo.c
new file mode 120000
index 0000000000..9448ffee54
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo.c
@@ -0,0 +1 @@
+../radeon/radeon_bo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bo_int_drm.h b/src/mesa/drivers/dri/r200/radeon_bo_int_drm.h
new file mode 120000
index 0000000000..029450928b
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_int_drm.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bo_legacy.c b/src/mesa/drivers/dri/r200/radeon_bo_legacy.c
new file mode 120000
index 0000000000..79ad050e6b
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bo_legacy.h b/src/mesa/drivers/dri/r200/radeon_bo_legacy.h
new file mode 120000
index 0000000000..83b0f7ffab
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h
new file mode 120000
index 0000000000..ca894b2443
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_chipset.h b/src/mesa/drivers/dri/r200/radeon_chipset.h
new file mode 120000
index 0000000000..eba99001ff
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cmdbuf.h b/src/mesa/drivers/dri/r200/radeon_cmdbuf.h
new file mode 120000
index 0000000000..a799e1dc6d
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common.c b/src/mesa/drivers/dri/r200/radeon_common.c
new file mode 120000
index 0000000000..67b19ba940
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common.h b/src/mesa/drivers/dri/r200/radeon_common.h
new file mode 120000
index 0000000000..5bcb696a9f
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common_context.c b/src/mesa/drivers/dri/r200/radeon_common_context.c
new file mode 120000
index 0000000000..86800f3819
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_common_context.h b/src/mesa/drivers/dri/r200/radeon_common_context.h
new file mode 120000
index 0000000000..4d66312550
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs.c b/src/mesa/drivers/dri/r200/radeon_cs.c
new file mode 120000
index 0000000000..66b7ad1eb0
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs.c
@@ -0,0 +1 @@
+../radeon/radeon_cs.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_int_drm.h b/src/mesa/drivers/dri/r200/radeon_cs_int_drm.h
new file mode 120000
index 0000000000..462f5245d0
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_int_drm.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_legacy.c b/src/mesa/drivers/dri/r200/radeon_cs_legacy.c
new file mode 120000
index 0000000000..006720f8a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_legacy.h b/src/mesa/drivers/dri/r200/radeon_cs_legacy.h
new file mode 120000
index 0000000000..a5f95e0a3d
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c
new file mode 120000
index 0000000000..c248ea7d1a
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_debug.c b/src/mesa/drivers/dri/r200/radeon_debug.c
new file mode 120000
index 0000000000..c98c2e074c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_debug.c
@@ -0,0 +1 @@
+../radeon/radeon_debug.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_debug.h b/src/mesa/drivers/dri/r200/radeon_debug.h
new file mode 120000
index 0000000000..bd8aa28e89
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_debug.h
@@ -0,0 +1 @@
+../radeon/radeon_debug.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_dma.c b/src/mesa/drivers/dri/r200/radeon_dma.c
new file mode 120000
index 0000000000..43be000625
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_dma.h b/src/mesa/drivers/dri/r200/radeon_dma.h
new file mode 120000
index 0000000000..82e50634e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_fbo.c b/src/mesa/drivers/dri/r200/radeon_fbo.c
new file mode 120000
index 0000000000..0d738d8d78
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_lock.c b/src/mesa/drivers/dri/r200/radeon_lock.c
new file mode 120000
index 0000000000..af4108a8e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_lock.h b/src/mesa/drivers/dri/r200/radeon_lock.h
new file mode 120000
index 0000000000..64bdf94ee7
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
new file mode 120000
index 0000000000..31c0cfbe94
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h
new file mode 120000
index 0000000000..254d50cf8c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_pixel_read.c b/src/mesa/drivers/dri/r200/radeon_pixel_read.c
new file mode 120000
index 0000000000..3b03803126
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_queryobj.c b/src/mesa/drivers/dri/r200/radeon_queryobj.c
new file mode 120000
index 0000000000..1d6ebc1c48
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_queryobj.c
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_queryobj.h b/src/mesa/drivers/dri/r200/radeon_queryobj.h
new file mode 120000
index 0000000000..8f6f842b0a
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_queryobj.h
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_screen.c b/src/mesa/drivers/dri/r200/radeon_screen.c
new file mode 120000
index 0000000000..86161118dd
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_screen.h b/src/mesa/drivers/dri/r200/radeon_screen.h
new file mode 120000
index 0000000000..23bb6bd459
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_span.c b/src/mesa/drivers/dri/r200/radeon_span.c
new file mode 120000
index 0000000000..232868c4c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_span.h b/src/mesa/drivers/dri/r200/radeon_span.h
new file mode 120000
index 0000000000..f9d634508c
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_tex_copy.c b/src/mesa/drivers/dri/r200/radeon_tex_copy.c
new file mode 120000
index 0000000000..dfa5ba34e6
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_tex_copy.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_tex_getimage.c b/src/mesa/drivers/dri/r200/radeon_tex_getimage.c
new file mode 120000
index 0000000000..d9836d7326
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_tex_getimage.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_getimage.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_texture.c b/src/mesa/drivers/dri/r200/radeon_texture.c
new file mode 120000
index 0000000000..a822710915
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_texture.h b/src/mesa/drivers/dri/r200/radeon_texture.h
new file mode 120000
index 0000000000..17fac3d5ea
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_tile.c b/src/mesa/drivers/dri/r200/radeon_tile.c
new file mode 120000
index 0000000000..d4bfe27da6
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_tile.c
@@ -0,0 +1 @@
+../radeon/radeon_tile.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/radeon_tile.h b/src/mesa/drivers/dri/r200/radeon_tile.h
new file mode 120000
index 0000000000..31074c581e
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/radeon_tile.h
@@ -0,0 +1 @@
+../radeon/radeon_tile.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon.h b/src/mesa/drivers/dri/r200/server/radeon.h
new file mode 120000
index 0000000000..81274a54f1
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_dri.h b/src/mesa/drivers/dri/r200/server/radeon_dri.h
new file mode 120000
index 0000000000..27c591d3c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_macros.h b/src/mesa/drivers/dri/r200/server/radeon_macros.h
new file mode 120000
index 0000000000..c56cd735b8
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r200/server/radeon_reg.h b/src/mesa/drivers/dri/r200/server/radeon_reg.h
new file mode 120000
index 0000000000..e2349dcb68
--- /dev/null
+++ b/src/mesa/drivers/dri/r200/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/Lindent b/src/mesa/drivers/dri/r300/Lindent
new file mode 100755
index 0000000000..7d8d8896e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/Lindent
@@ -0,0 +1,2 @@
+#!/bin/sh
+indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
new file mode 100644
index 0000000000..2245998c95
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -0,0 +1,77 @@
+# src/mesa/drivers/dri/r300/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += $(RADEON_CFLAGS)
+
+LIBNAME = r300_dri.so
+
+ifeq ($(RADEON_LDFLAGS),)
+CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
+endif
+
+COMMON_SOURCES = \
+	../../common/driverfuncs.c \
+	../common/mm.c \
+	../common/utils.c \
+	../common/texmem.c \
+	../common/vblank.c \
+	../common/xmlconfig.c \
+	../common/dri_util.c
+
+RADEON_COMMON_SOURCES = \
+	radeon_bo_legacy.c \
+	radeon_buffer_objects.c \
+	radeon_common_context.c \
+	radeon_common.c \
+	radeon_cs_legacy.c \
+	radeon_dma.c \
+	radeon_debug.c \
+	radeon_fbo.c \
+	radeon_lock.c \
+	radeon_mipmap_tree.c \
+	radeon_pixel_read.c \
+	radeon_queryobj.c \
+	radeon_span.c \
+	radeon_texture.c \
+	radeon_tex_copy.c \
+	radeon_tex_getimage.c \
+	radeon_tile.c
+
+DRIVER_SOURCES = \
+		 radeon_screen.c \
+		 r300_blit.c \
+		 r300_context.c \
+		 r300_draw.c \
+		 r300_cmdbuf.c \
+		 r300_state.c \
+		 r300_render.c \
+		 r300_tex.c \
+		 r300_texstate.c \
+		 r300_vertprog.c \
+		 r300_fragprog_common.c \
+		 r300_shader.c \
+		 radeon_mesa_to_rc.c \
+		 r300_emit.c \
+		 r300_swtcl.c \
+		 $(RADEON_COMMON_SOURCES) \
+		 $(EGL_SOURCES) \
+		 $(CS_SOURCES)
+
+C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+
+DRIVER_DEFINES = -DRADEON_R300
+#	-DRADEON_BO_TRACK \
+
+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+
+SUBDIRS = compiler
+
+EXTRA_MODULES = compiler/libr300compiler.a
+
+
+##### TARGETS #####
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
new file mode 100644
index 0000000000..ff3801dc67
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -0,0 +1,86 @@
+# src/mesa/drivers/dri/r300/compiler/Makefile
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r300compiler
+
+C_SOURCES = \
+		radeon_code.c \
+		radeon_compiler.c \
+		radeon_emulate_branches.c \
+		radeon_emulate_loops.c \
+		radeon_program.c \
+		radeon_program_print.c \
+		radeon_opcodes.c \
+		radeon_program_alu.c \
+		radeon_program_pair.c \
+		radeon_program_tex.c \
+		radeon_pair_translate.c \
+		radeon_pair_schedule.c \
+		radeon_pair_regalloc.c \
+		radeon_dataflow.c \
+		radeon_dataflow_deadcode.c \
+		radeon_dataflow_swizzles.c \
+		radeon_optimize.c \
+		r3xx_fragprog.c \
+		r300_fragprog.c \
+		r300_fragprog_swizzle.c \
+		r300_fragprog_emit.c \
+		r500_fragprog.c \
+		r500_fragprog_emit.c \
+		r3xx_vertprog.c \
+		r3xx_vertprog_dump.c \
+		\
+		memory_pool.c
+
+
+### Basic defines ###
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+	$(CPP_SOURCES:.cpp=.o) \
+	$(ASM_SOURCES:.S=.o)
+
+INCLUDES = \
+	-I. \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mesa \
+
+
+##### TARGETS #####
+
+default: depend lib$(LIBNAME).a
+
+lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current
+	$(MKLIB) -o $(LIBNAME) -static $(OBJECTS)
+
+depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+	rm -f depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+	rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
+
+# Dummy target
+install:
+	@echo -n ""
+
+##### RULES #####
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.cpp.o:
+	$(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES)  $< -o $@
+
+
+sinclude depend
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
new file mode 100755
index 0000000000..50d9cdb7f2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -0,0 +1,41 @@
+Import('*')
+
+env = env.Clone()
+env.Append(CPPPATH = '#/include')
+env.Append(CPPPATH = '#/src/mesa')
+
+# temporary fix
+env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '')
+
+r300compiler = env.ConvenienceLibrary(
+    target = 'r300compiler',
+    source = [
+        'radeon_code.c',
+        'radeon_compiler.c',
+        'radeon_program.c',
+        'radeon_program_print.c',
+        'radeon_opcodes.c',
+        'radeon_program_alu.c',
+        'radeon_program_pair.c',
+        'radeon_program_tex.c',
+        'radeon_pair_translate.c',
+        'radeon_pair_schedule.c',
+        'radeon_pair_regalloc.c',
+        'radeon_optimize.c',
+        'radeon_emulate_branches.c',
+        'radeon_emulate_loops.c',
+        'radeon_dataflow.c',
+        'radeon_dataflow_deadcode.c',
+        'radeon_dataflow_swizzles.c',
+        'r3xx_fragprog.c',
+        'r300_fragprog.c',
+        'r300_fragprog_swizzle.c',
+        'r300_fragprog_emit.c',
+        'r500_fragprog.c',
+        'r500_fragprog_emit.c',
+        'r3xx_vertprog.c',
+        'r3xx_vertprog_dump.c',
+        'memory_pool.c',
+    ])
+
+Return('r300compiler')
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
new file mode 100644
index 0000000000..76c7c60d8f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 4
+
+
+struct memory_block {
+	struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+	memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+	while(pool->blocks) {
+		struct memory_block * block = pool->blocks;
+		pool->blocks = block->next;
+		free(block);
+	}
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+	unsigned int blocksize = pool->total_allocated;
+	struct memory_block * newblock;
+
+	if (!blocksize)
+		blocksize = 2*POOL_LARGE_ALLOC;
+
+	newblock = (struct memory_block*)malloc(blocksize);
+	newblock->next = pool->blocks;
+	pool->blocks = newblock;
+
+	pool->head = (unsigned char*)(newblock + 1);
+	pool->end = ((unsigned char*)newblock) + blocksize;
+	pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+	if (bytes < POOL_LARGE_ALLOC) {
+		void * ptr;
+
+		if (pool->head + bytes > pool->end)
+			refill_pool(pool);
+
+		assert(pool->head + bytes <= pool->end);
+
+		ptr = pool->head;
+
+		pool->head += bytes;
+		pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+		return ptr;
+	} else {
+		struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+		block->next = pool->blocks;
+		pool->blocks = block;
+
+		return (block + 1);
+	}
+}
+
+
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
new file mode 100644
index 0000000000..42344d0e3b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+	unsigned char * head;
+	unsigned char * end;
+	unsigned int total_allocated;
+	struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+
+/**
+ * Generic helper for growing an array that has separate size/count
+ * and reserved counters to accomodate up to num new element.
+ *
+ *  type * Array;
+ *  unsigned int Size;
+ *  unsigned int Reserved;
+ *
+ * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
+ * assert(Size + k < Reserved);
+ *
+ * \note Size is not changed by this macro.
+ *
+ * \warning Array, Size, Reserved have to be lvalues and may be evaluated
+ * several times.
+ */
+#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
+	unsigned int _num = (num); \
+	if ((size) + _num > (reserved)) { \
+		unsigned int newreserve = (reserved) * 2; \
+		type * newarray; \
+		if (newreserve < _num) \
+			newreserve = 4 * _num; /* arbitrary heuristic */ \
+		newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
+		memcpy(newarray, (array), (size) * sizeof(type)); \
+		(array) = newarray; \
+		(reserved) = newreserve; \
+	} \
+} while(0)
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
new file mode 100644
index 0000000000..794db8335a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+/* just some random things... */
+void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+	struct r300_fragment_program_code *code = &c->code.r300;
+	int n, i, j;
+	static int pc = 0;
+
+	fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+	fprintf(stderr, "Hardware program\n");
+	fprintf(stderr, "----------------\n");
+
+	for (n = 0; n <= (code->config & 3); n++) {
+		uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+		int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
+		int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+		int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+		int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+		fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
+			"alu_end: %d, tex_end: %d  (code_addr: %08x)\n", n,
+			alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+		if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+			fprintf(stderr, "  TEX:\n");
+			for (i = tex_offset;
+			     i <= tex_offset + tex_end;
+			     ++i) {
+				const char *instr;
+
+				switch ((code->tex.
+					 inst[i] >> R300_TEX_INST_SHIFT) &
+					15) {
+				case R300_TEX_OP_LD:
+					instr = "TEX";
+					break;
+				case R300_TEX_OP_KIL:
+					instr = "KIL";
+					break;
+				case R300_TEX_OP_TXP:
+					instr = "TXP";
+					break;
+				case R300_TEX_OP_TXB:
+					instr = "TXB";
+					break;
+				default:
+					instr = "UNKNOWN";
+				}
+
+				fprintf(stderr,
+					"    %s t%i, %c%i, texture[%i]   (%08x)\n",
+					instr,
+					(code->tex.
+					 inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+					't',
+					(code->tex.
+					 inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+					(code->tex.
+					 inst[i] & R300_TEX_ID_MASK) >>
+					R300_TEX_ID_SHIFT,
+					code->tex.inst[i]);
+			}
+		}
+
+		for (i = alu_offset;
+		     i <= alu_offset + alu_end; ++i) {
+			char srcc[3][10], dstc[20];
+			char srca[3][10], dsta[20];
+			char argc[3][20];
+			char arga[3][20];
+			char flags[5], tmp[10];
+
+			for (j = 0; j < 3; ++j) {
+				int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+				int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+
+				sprintf(srcc[j], "%c%i",
+					(regc & 32) ? 'c' : 't', regc & 31);
+				sprintf(srca[j], "%c%i",
+					(rega & 32) ? 'c' : 't', rega & 31);
+			}
+
+			dstc[0] = 0;
+			sprintf(flags, "%s%s%s",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				sprintf(dstc, "t%i.%s ",
+					(code->alu.inst[i].
+					 rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+					flags);
+			}
+			sprintf(flags, "%s%s%s",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+				(code->alu.inst[i].
+				 rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+			if (flags[0] != 0) {
+				sprintf(tmp, "o%i.%s",
+					(code->alu.inst[i].
+					 rgb_addr >> 29) & 3,
+					flags);
+				strcat(dstc, tmp);
+			}
+
+			dsta[0] = 0;
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+				sprintf(dsta, "t%i.w ",
+					(code->alu.inst[i].
+					 alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+			}
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+				sprintf(tmp, "o%i.w ",
+					(code->alu.inst[i].
+					 alpha_addr >> 25) & 3);
+				strcat(dsta, tmp);
+			}
+			if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+				strcat(dsta, "Z");
+			}
+
+			fprintf(stderr,
+				"%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+				"       w: %3s %3s %3s -> %-20s (%08x)\n", i,
+				srcc[0], srcc[1], srcc[2], dstc,
+				code->alu.inst[i].rgb_addr, srca[0], srca[1],
+				srca[2], dsta, code->alu.inst[i].alpha_addr);
+
+			for (j = 0; j < 3; ++j) {
+				int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+				int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+				int d;
+				char buf[20];
+
+				d = regc & 31;
+				if (d < 12) {
+					switch (d % 4) {
+					case R300_ALU_ARGC_SRC0C_XYZ:
+						sprintf(buf, "%s.xyz",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_XXX:
+						sprintf(buf, "%s.xxx",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_YYY:
+						sprintf(buf, "%s.yyy",
+							srcc[d / 4]);
+						break;
+					case R300_ALU_ARGC_SRC0C_ZZZ:
+						sprintf(buf, "%s.zzz",
+							srcc[d / 4]);
+						break;
+					}
+				} else if (d < 15) {
+					sprintf(buf, "%s.www", srca[d - 12]);
+				} else if (d == 20) {
+					sprintf(buf, "0.0");
+				} else if (d == 21) {
+					sprintf(buf, "1.0");
+				} else if (d == 22) {
+					sprintf(buf, "0.5");
+				} else if (d >= 23 && d < 32) {
+					d -= 23;
+					switch (d / 3) {
+					case 0:
+						sprintf(buf, "%s.yzx",
+							srcc[d % 3]);
+						break;
+					case 1:
+						sprintf(buf, "%s.zxy",
+							srcc[d % 3]);
+						break;
+					case 2:
+						sprintf(buf, "%s.Wzy",
+							srcc[d % 3]);
+						break;
+					}
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(argc[j], "%s%s%s%s",
+					(regc & 32) ? "-" : "",
+					(regc & 64) ? "|" : "",
+					buf, (regc & 64) ? "|" : "");
+
+				d = rega & 31;
+				if (d < 9) {
+					sprintf(buf, "%s.%c", srcc[d / 3],
+						'x' + (char)(d % 3));
+				} else if (d < 12) {
+					sprintf(buf, "%s.w", srca[d - 9]);
+				} else if (d == 16) {
+					sprintf(buf, "0.0");
+				} else if (d == 17) {
+					sprintf(buf, "1.0");
+				} else if (d == 18) {
+					sprintf(buf, "0.5");
+				} else {
+					sprintf(buf, "%i", d);
+				}
+
+				sprintf(arga[j], "%s%s%s%s",
+					(rega & 32) ? "-" : "",
+					(rega & 64) ? "|" : "",
+					buf, (rega & 64) ? "|" : "");
+			}
+
+			fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x\n"
+				"       w: %8s %8s %8s    op: %08x\n",
+				argc[0], argc[1], argc[2],
+				code->alu.inst[i].rgb_inst, arga[0], arga[1],
+				arga[2], code->alu.inst[i].alpha_inst);
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
new file mode 100644
index 0000000000..8b755703be
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
new file mode 100644
index 0000000000..b27a683c39
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \todo FogOption
+ */
+
+#include "r300_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+
+
+struct r300_emit_state {
+	struct r300_fragment_program_compiler * compiler;
+
+	unsigned current_node : 2;
+	unsigned node_first_tex : 8;
+	unsigned node_first_alu : 8;
+	uint32_t node_flags;
+};
+
+#define PROG_CODE \
+	struct r300_fragment_program_compiler *c = emit->compiler; \
+	struct r300_fragment_program_code *code = &c->code->code.r300
+
+#define error(fmt, args...) do {			\
+		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
+			__FILE__, __FUNCTION__, ##args);	\
+	} while(0)
+
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
+{
+	if (index > code->pixsize)
+		code->pixsize = index;
+}
+
+static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+	if (src.File == RC_FILE_CONSTANT) {
+		return src.Index | (1 << 5);
+	} else if (src.File == RC_FILE_TEMPORARY) {
+		use_temporary(code, src.Index);
+		return src.Index;
+	}
+
+	return 0;
+}
+
+
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+	case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+	case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+	case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
+	default:
+		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+	case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+	case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+	case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+	}
+}
+
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+	case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+	case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+	case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+	case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+	case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
+	default:
+		error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+	case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+	case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+	case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+	case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+	}
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
+{
+	PROG_CODE;
+
+	if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
+		error("Too many ALU instructions");
+		return 0;
+	}
+
+	int ip = code->alu.length++;
+	int j;
+
+	code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+	code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
+
+	for(j = 0; j < 3; ++j) {
+		unsigned int src = use_source(code, inst->RGB.Src[j]);
+		code->alu.inst[ip].rgb_addr |= src << (6*j);
+
+		src = use_source(code, inst->Alpha.Src[j]);
+		code->alu.inst[ip].alpha_addr |= src << (6*j);
+
+		unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+		arg |= inst->RGB.Arg[j].Abs << 6;
+		arg |= inst->RGB.Arg[j].Negate << 5;
+		code->alu.inst[ip].rgb_inst |= arg << (7*j);
+
+		arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+		arg |= inst->Alpha.Arg[j].Abs << 6;
+		arg |= inst->Alpha.Arg[j].Negate << 5;
+		code->alu.inst[ip].alpha_inst |= arg << (7*j);
+	}
+
+	if (inst->RGB.Saturate)
+		code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
+	if (inst->Alpha.Saturate)
+		code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
+
+	if (inst->RGB.WriteMask) {
+		use_temporary(code, inst->RGB.DestIndex);
+		code->alu.inst[ip].rgb_addr |=
+			(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+			(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+	}
+	if (inst->RGB.OutputWriteMask) {
+		code->alu.inst[ip].rgb_addr |=
+            (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
+            R300_RGB_TARGET(inst->RGB.Target);
+		emit->node_flags |= R300_RGBA_OUT;
+	}
+
+	if (inst->Alpha.WriteMask) {
+		use_temporary(code, inst->Alpha.DestIndex);
+		code->alu.inst[ip].alpha_addr |=
+			(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+			R300_ALU_DSTA_REG;
+	}
+	if (inst->Alpha.OutputWriteMask) {
+		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
+            R300_ALPHA_TARGET(inst->Alpha.Target);
+		emit->node_flags |= R300_RGBA_OUT;
+	}
+	if (inst->Alpha.DepthWriteMask) {
+		code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+		emit->node_flags |= R300_W_OUT;
+		c->code->writes_depth = 1;
+	}
+
+	return 1;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static int finish_node(struct r300_emit_state * emit)
+{
+	struct r300_fragment_program_compiler * c = emit->compiler;
+	struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+
+	if (code->alu.length == emit->node_first_alu) {
+		/* Generate a single NOP for this node */
+		struct rc_pair_instruction inst;
+		memset(&inst, 0, sizeof(inst));
+		if (!emit_alu(emit, &inst))
+			return 0;
+	}
+
+	unsigned alu_offset = emit->node_first_alu;
+	unsigned alu_end = code->alu.length - alu_offset - 1;
+	unsigned tex_offset = emit->node_first_tex;
+	unsigned tex_end = code->tex.length - tex_offset - 1;
+
+	if (code->tex.length == emit->node_first_tex) {
+		if (emit->current_node > 0) {
+			error("Node %i has no TEX instructions", emit->current_node);
+			return 0;
+		}
+
+		tex_end = 0;
+	} else {
+		if (emit->current_node == 0)
+			code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
+	}
+
+	/* Write the config register.
+	 * Note: The order in which the words for each node are written
+	 * is not correct here and needs to be fixed up once we're entirely
+	 * done
+	 *
+	 * Also note that the register specification from AMD is slightly
+	 * incorrect in its description of this register. */
+	code->code_addr[emit->current_node] =
+			(alu_offset << R300_ALU_START_SHIFT) |
+			(alu_end << R300_ALU_SIZE_SHIFT) |
+			(tex_offset << R300_TEX_START_SHIFT) |
+			(tex_end << R300_TEX_SIZE_SHIFT) |
+			emit->node_flags;
+
+	return 1;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static int begin_tex(struct r300_emit_state * emit)
+{
+	PROG_CODE;
+
+	if (code->alu.length == emit->node_first_alu &&
+	    code->tex.length == emit->node_first_tex) {
+		return 1;
+	}
+
+	if (emit->current_node == 3) {
+		error("Too many texture indirections");
+		return 0;
+	}
+
+	if (!finish_node(emit))
+		return 0;
+
+	emit->current_node++;
+	emit->node_first_tex = code->tex.length;
+	emit->node_first_alu = code->alu.length;
+	emit->node_flags = 0;
+	return 1;
+}
+
+
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
+{
+	PROG_CODE;
+
+	if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
+		error("Too many TEX instructions");
+		return 0;
+	}
+
+	unsigned int unit = inst->U.I.TexSrcUnit;
+	unsigned int dest = inst->U.I.DstReg.Index;
+	unsigned int opcode;
+
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+	case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+	case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+	case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+	default:
+		error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
+		return 0;
+	}
+
+	if (inst->U.I.Opcode == RC_OPCODE_KIL) {
+		unit = 0;
+		dest = 0;
+	} else {
+		use_temporary(code, dest);
+	}
+
+	use_temporary(code, inst->U.I.SrcReg[0].Index);
+
+	code->tex.inst[code->tex.length++] =
+		(inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+		(dest << R300_DST_ADDR_SHIFT) |
+		(unit << R300_TEX_ID_SHIFT) |
+		(opcode << R300_TEX_INST_SHIFT);
+	return 1;
+}
+
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+	struct r300_emit_state emit;
+	struct r300_fragment_program_code *code = &compiler->code->code.r300;
+
+	memset(&emit, 0, sizeof(emit));
+	emit.compiler = compiler;
+
+	memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+	    inst = inst->Next) {
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+				begin_tex(&emit);
+				continue;
+			}
+
+			emit_tex(&emit, inst);
+		} else {
+			emit_alu(&emit, &inst->U.P);
+		}
+	}
+
+	if (code->pixsize >= compiler->Base.max_temp_regs)
+		rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
+
+	if (compiler->Base.Error)
+		return;
+
+	/* Finish the program */
+	finish_node(&emit);
+
+	code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+	code->code_offset =
+		(0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+		((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+		(0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+		((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+
+	if (emit.current_node < 3) {
+		int shift = 3 - emit.current_node;
+		int i;
+		for(i = emit.current_node; i >= 0; --i)
+			code->code_addr[shift + i] = code->code_addr[i];
+		for(i = 0; i < shift; ++i)
+			code->code_addr[i] = 0;
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
new file mode 100644
index 0000000000..5d5de2f1b2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+#include "radeon_compiler.h"
+
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
+
+struct swizzle_data {
+	unsigned int hash; /**< swizzle value this matches */
+	unsigned int base; /**< base value for hw swizzle */
+	unsigned int stride; /**< difference in base between arg0/1/2 */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+	{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
+	{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
+	{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
+	{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
+	{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
+	{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
+	{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
+	{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
+	{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
+	{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
+	{MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
+{
+	int i, comp;
+
+	for(i = 0; i < num_native_swizzles; ++i) {
+		const struct swizzle_data* sd = &native_swizzles[i];
+		for(comp = 0; comp < 3; ++comp) {
+			unsigned int swz = GET_SWZ(swizzle, comp);
+			if (swz == RC_SWIZZLE_UNUSED)
+				continue;
+			if (swz != GET_SWZ(sd->hash, comp))
+				break;
+		}
+		if (comp == 3)
+			return sd;
+	}
+
+	return 0;
+}
+
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	if (reg.Abs)
+		reg.Negate = RC_MASK_NONE;
+
+	if (opcode == RC_OPCODE_KIL ||
+	    opcode == RC_OPCODE_TEX ||
+	    opcode == RC_OPCODE_TXB ||
+	    opcode == RC_OPCODE_TXP) {
+		int j;
+
+		if (reg.Abs || reg.Negate)
+			return 0;
+
+		for(j = 0; j < 4; ++j) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, j);
+			if (swz == RC_SWIZZLE_UNUSED)
+				continue;
+			if (swz != j)
+				return 0;
+		}
+
+		return 1;
+	}
+
+	unsigned int relevant = 0;
+	int j;
+
+	for(j = 0; j < 3; ++j)
+		if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
+			relevant |= 1 << j;
+
+	if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+		return 0;
+
+	if (!lookup_native_swizzle(reg.Swizzle))
+		return 0;
+
+	return 1;
+}
+
+
+static void r300_swizzle_split(
+		struct rc_src_register src, unsigned int mask,
+		struct rc_swizzle_split * split)
+{
+	if (src.Abs)
+		src.Negate = RC_MASK_NONE;
+
+	split->NumPhases = 0;
+
+	while(mask) {
+		const struct swizzle_data *best_swizzle = 0;
+		unsigned int best_matchcount = 0;
+		unsigned int best_matchmask = 0;
+		int i, comp;
+
+		for(i = 0; i < num_native_swizzles; ++i) {
+			const struct swizzle_data *sd = &native_swizzles[i];
+			unsigned int matchcount = 0;
+			unsigned int matchmask = 0;
+			for(comp = 0; comp < 3; ++comp) {
+				if (!GET_BIT(mask, comp))
+					continue;
+				unsigned int swz = GET_SWZ(src.Swizzle, comp);
+				if (swz == RC_SWIZZLE_UNUSED)
+					continue;
+				if (swz == GET_SWZ(sd->hash, comp)) {
+					/* check if the negate bit of current component
+					 * is the same for already matched components */
+					if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
+						continue;
+
+					matchcount++;
+					matchmask |= 1 << comp;
+				}
+			}
+			if (matchcount > best_matchcount) {
+				best_swizzle = sd;
+				best_matchcount = matchcount;
+				best_matchmask = matchmask;
+				if (matchmask == (mask & RC_MASK_XYZ))
+					break;
+			}
+		}
+
+		if (mask & RC_MASK_W)
+			best_matchmask |= RC_MASK_W;
+
+		split->Phase[split->NumPhases++] = best_matchmask;
+		mask &= ~best_matchmask;
+	}
+}
+
+struct rc_swizzle_caps r300_swizzle_caps = {
+	.IsNative = r300_swizzle_is_native,
+	.Split = r300_swizzle_split
+};
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
+{
+	const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+	if (!sd) {
+		fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
+		return 0;
+	}
+
+	return sd->base + src*sd->stride;
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
+{
+	if (swizzle < 3)
+		return swizzle + 3*src;
+
+	switch(swizzle) {
+	case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+	case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+	case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+	case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
+	default: return R300_ALU_ARGA_ONE;
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
new file mode 100644
index 0000000000..118476af13
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "radeon_swizzle.h"
+
+extern struct rc_swizzle_caps r300_swizzle_caps;
+
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 0000000000..38312658d6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "radeon_dataflow.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_program_alu.h"
+#include "radeon_program_tex.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+		void (*callback)(void *, unsigned int, unsigned int))
+{
+	struct r300_fragment_program_compiler * c = userdata;
+	callback(data, c->OutputColor[0], RC_MASK_XYZW);
+	callback(data, c->OutputColor[1], RC_MASK_XYZW);
+	callback(data, c->OutputColor[2], RC_MASK_XYZW);
+	callback(data, c->OutputColor[3], RC_MASK_XYZW);
+	callback(data, c->OutputDepth, RC_MASK_W);
+}
+
+static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
+{
+	struct rc_instruction *rci;
+
+	for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+		struct rc_sub_instruction * inst = &rci->U.I;
+
+		if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+			continue;
+
+		if (inst->DstReg.WriteMask & RC_MASK_Z) {
+			inst->DstReg.WriteMask = RC_MASK_W;
+		} else {
+			inst->DstReg.WriteMask = 0;
+			continue;
+		}
+
+		switch (inst->Opcode) {
+			case RC_OPCODE_FRC:
+			case RC_OPCODE_MOV:
+				inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+				break;
+			case RC_OPCODE_ADD:
+			case RC_OPCODE_MAX:
+			case RC_OPCODE_MIN:
+			case RC_OPCODE_MUL:
+				inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+				inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+				break;
+			case RC_OPCODE_CMP:
+			case RC_OPCODE_MAD:
+				inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+				inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+				inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
+				break;
+			default:
+				// Scalar instructions needn't be reswizzled
+				break;
+		}
+	}
+}
+
+static void debug_program_log(struct r300_fragment_program_compiler* c, const char * where)
+{
+	if (c->Base.Debug) {
+		fprintf(stderr, "Fragment Program: %s\n", where);
+		rc_print_program(&c->Base.Program);
+	}
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+	rewrite_depth_out(c);
+
+	debug_program_log(c, "before compilation");
+
+	/* XXX Ideally this should be done only for r3xx, but since
+	 * we don't have branching support for r5xx, we use the emulation
+	 * on all chipsets. */
+	
+	if(c->Base.is_r500){
+		rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+	}
+	else{
+		rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+	}
+	debug_program_log(c, "after emulate loops");
+	
+	rc_emulate_branches(&c->Base);
+
+	debug_program_log(c, "after emulate branches");
+
+	if (c->Base.is_r500) {
+		struct radeon_program_transformation transformations[] = {
+			{ &r500_transform_IF, 0 },
+			{ &radeonTransformALU, 0 },
+			{ &radeonTransformDeriv, 0 },
+			{ &radeonTransformTrigScale, 0 }
+		};
+		radeonLocalTransform(&c->Base, 4, transformations);
+
+		debug_program_log(c, "after native rewrite part 1");
+
+		c->Base.SwizzleCaps = &r500_swizzle_caps;
+	} else {
+		struct radeon_program_transformation transformations[] = {
+			{ &radeonTransformALU, 0 },
+			{ &radeonTransformTrigSimple, 0 }
+		};
+		radeonLocalTransform(&c->Base, 2, transformations);
+
+		debug_program_log(c, "after native rewrite part 1");
+
+		c->Base.SwizzleCaps = &r300_swizzle_caps;
+	}
+
+	/* Run the common transformations too.
+	 * Remember, lowering comes last! */
+	struct radeon_program_transformation common_transformations[] = {
+		{ &radeonTransformTEX, c },
+	};
+	radeonLocalTransform(&c->Base, 1, common_transformations);
+
+	common_transformations[0].function = &radeonTransformALU;
+	radeonLocalTransform(&c->Base, 1, common_transformations);
+
+	if (c->Base.Error)
+		return;
+
+	debug_program_log(c, "after native rewrite part 2");
+
+	rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c);
+	if (c->Base.Error)
+		return;
+
+	debug_program_log(c, "after deadcode");
+
+	rc_optimize(&c->Base);
+
+	debug_program_log(c, "after dataflow optimize");
+
+	rc_dataflow_swizzles(&c->Base);
+	if (c->Base.Error)
+		return;
+
+	debug_program_log(c, "after dataflow passes");
+
+	rc_pair_translate(c);
+	if (c->Base.Error)
+		return;
+
+	debug_program_log(c, "after pair translate");
+
+	rc_pair_schedule(c);
+	if (c->Base.Error)
+		return;
+
+	debug_program_log(c, "after pair scheduling");
+
+	rc_pair_regalloc(c, c->Base.max_temp_regs);
+
+	if (c->Base.Error)
+		return;
+
+	debug_program_log(c, "after register allocation");
+
+	if (c->Base.is_r500) {
+		r500BuildFragmentProgramHwCode(c);
+	} else {
+		r300BuildFragmentProgramHwCode(c);
+	}
+
+	rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
+	if (c->Base.Debug) {
+		if (c->Base.is_r500) {
+			r500FragmentProgramDump(c->code);
+		} else {
+			r300FragmentProgramDump(c->code);
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 0000000000..507b2e532f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+#include "radeon_dataflow.h"
+#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
+#include "radeon_emulate_branches.h"
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y)	\
+	(PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_swizzle(y),	\
+			   t_src_class(vpi->SrcReg[x].File), \
+			   RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(unsigned int mask)
+{
+	/* RC_MASK_* is equivalent to VSF_FLAG_* */
+	return mask & RC_MASK_XYZW;
+}
+
+static unsigned long t_dst_class(rc_register_file file)
+{
+	switch (file) {
+	default:
+		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+		/* fall-through */
+	case RC_FILE_TEMPORARY:
+		return PVS_DST_REG_TEMPORARY;
+	case RC_FILE_OUTPUT:
+		return PVS_DST_REG_OUT;
+	case RC_FILE_ADDRESS:
+		return PVS_DST_REG_A0;
+	}
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+				 struct rc_dst_register *dst)
+{
+	if (dst->File == RC_FILE_OUTPUT)
+		return vp->outputs[dst->Index];
+
+	return dst->Index;
+}
+
+static unsigned long t_src_class(rc_register_file file)
+{
+	switch (file) {
+	default:
+		fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+		/* fall-through */
+	case RC_FILE_NONE:
+	case RC_FILE_TEMPORARY:
+		return PVS_SRC_REG_TEMPORARY;
+	case RC_FILE_INPUT:
+		return PVS_SRC_REG_INPUT;
+	case RC_FILE_CONSTANT:
+		return PVS_SRC_REG_CONSTANT;
+	}
+}
+
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
+{
+	unsigned long aclass = t_src_class(a.File);
+	unsigned long bclass = t_src_class(b.File);
+
+	if (aclass != bclass)
+		return 0;
+	if (aclass == PVS_SRC_REG_TEMPORARY)
+		return 0;
+
+	if (a.RelAddr || b.RelAddr)
+		return 1;
+	if (a.Index != b.Index)
+		return 1;
+
+	return 0;
+}
+
+static inline unsigned long t_swizzle(unsigned int swizzle)
+{
+	/* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+	return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+				 struct rc_src_register *src)
+{
+	if (src->File == RC_FILE_INPUT) {
+		assert(vp->inputs[src->Index] != -1);
+		return vp->inputs[src->Index];
+	} else {
+		if (src->Index < 0) {
+			fprintf(stderr,
+				"negative offsets for indirect addressing do not work.\n");
+			return 0;
+		}
+		return src->Index;
+	}
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+			   struct rc_src_register *src)
+{
+	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+	 */
+	return PVS_SRC_OPERAND(t_src_index(vp, src),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 1)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 2)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 3)),
+			       t_src_class(src->File),
+			       src->Negate) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+				  struct rc_src_register *src)
+{
+	/* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+	 * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+	 */
+	return PVS_SRC_OPERAND(t_src_index(vp, src),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_swizzle(GET_SWZ(src->Swizzle, 0)),
+			       t_src_class(src->File),
+			       src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (src->RelAddr << 4);
+}
+
+static int valid_dst(struct r300_vertex_program_code *vp,
+			   struct rc_dst_register *dst)
+{
+	if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+		return 0;
+	} else if (dst->File == RC_FILE_ADDRESS) {
+		assert(dst->Index == 0);
+	}
+
+	return 1;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     0,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     0,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = t_src(vp, &vpi->SrcReg[1]);
+	inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+				unsigned int hw_opcode,
+				struct rc_sub_instruction *vpi,
+				unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	//LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+	inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	/* NOTE: Users swizzling might not work. */
+	inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+	inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+	inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)),	// Y
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)),	// X
+				  PVS_SRC_SELECT_FORCE_0,	// Z
+				  t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)),	// W
+				  t_src_class(vpi->SrcReg[0].File),
+				  vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+	    (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	/* Remarks about hardware limitations of MAD
+	 * (please preserve this comment, as this information is _NOT_
+	 * in the documentation provided by AMD).
+	 *
+	 * As described in the documentation, MAD with three unique temporary
+	 * source registers requires the use of the macro version.
+	 *
+	 * However (and this is not mentioned in the documentation), apparently
+	 * the macro version is _NOT_ a full superset of the normal version.
+	 * In particular, the macro version does not always work when relative
+	 * addressing is used in the source operands.
+	 *
+	 * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+	 * assembly shader path when using medium quality animations
+	 * (i.e. animations with matrix blending instead of quaternion blending).
+	 *
+	 * Unfortunately, I (nha) have been unable to extract a Piglit regression
+	 * test for this issue - for some reason, it is possible to have vertex
+	 * programs whose prefix is *exactly* the same as the prefix of the
+	 * offending program in Sauerbraten up to the offending instruction
+	 * without causing any trouble.
+	 *
+	 * Bottom line: Only use the macro version only when really necessary;
+	 * according to AMD docs, this should improve performance by one clock
+	 * as a nice side bonus.
+	 */
+	if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
+	    vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+	    vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+	    vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+		inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+				0,
+				1,
+				t_dst_index(vp, &vpi->DstReg),
+				t_dst_mask(vpi->DstReg.WriteMask),
+				t_dst_class(vpi->DstReg.File));
+	} else {
+		inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+				0,
+				0,
+				t_dst_index(vp, &vpi->DstReg),
+				t_dst_mask(vpi->DstReg.WriteMask),
+				t_dst_class(vpi->DstReg.File));
+	}
+	inst[1] = t_src(vp, &vpi->SrcReg[0]);
+	inst[2] = t_src(vp, &vpi->SrcReg[1]);
+	inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+				      struct rc_sub_instruction *vpi,
+				      unsigned int * inst)
+{
+	inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+				     1,
+				     0,
+				     t_dst_index(vp, &vpi->DstReg),
+				     t_dst_mask(vpi->DstReg.WriteMask),
+				     t_dst_class(vpi->DstReg.File));
+	inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+	inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+	inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+
+static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+{
+	struct rc_instruction *rci;
+
+	compiler->code->pos_end = 0;	/* Not supported yet */
+	compiler->code->length = 0;
+
+	compiler->SetHwInputOutput(compiler);
+
+	for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+		struct rc_sub_instruction *vpi = &rci->U.I;
+		unsigned int *inst = compiler->code->body.d + compiler->code->length;
+
+		/* Skip instructions writing to non-existing destination */
+		if (!valid_dst(compiler->code, &vpi->DstReg))
+			continue;
+
+		if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+			rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+			return;
+		}
+
+		assert(compiler->Base.is_r500 ||
+		       (vpi->Opcode != RC_OPCODE_SEQ &&
+			vpi->Opcode != RC_OPCODE_SNE));
+
+		switch (vpi->Opcode) {
+		case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+		case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+		case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
+		case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+		case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+		case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+		case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+		case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+		case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+		case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+		case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+		case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+		case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+		case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+		case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+		case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+		case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+		case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+		case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+		case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
+		case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+		case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
+		case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+		case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+		default:
+			rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
+			return;
+		}
+
+		compiler->code->length += 4;
+
+		if (compiler->Base.Error)
+			return;
+	}
+}
+
+struct temporary_allocation {
+	unsigned int Allocated:1;
+	unsigned int HwTemp:15;
+	struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+{
+	struct rc_instruction *inst;
+	unsigned int num_orig_temps = 0;
+	char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+	struct temporary_allocation * ta;
+	unsigned int i, j;
+
+	compiler->code->num_temporaries = 0;
+	memset(hwtemps, 0, sizeof(hwtemps));
+
+	/* Pass 1: Count original temporaries and allocate structures */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+					num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
+			}
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+				if (inst->U.I.DstReg.Index >= num_orig_temps)
+					num_orig_temps = inst->U.I.DstReg.Index + 1;
+			}
+		}
+	}
+
+	ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+			sizeof(struct temporary_allocation) * num_orig_temps);
+	memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+	/* Pass 2: Determine original temporary lifetimes */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
+				ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+		}
+	}
+
+	/* Pass 3: Register allocation */
+	for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+				unsigned int orig = inst->U.I.SrcReg[i].Index;
+				inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+
+				if (ta[orig].Allocated && inst == ta[orig].LastRead)
+					hwtemps[ta[orig].HwTemp] = 0;
+			}
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+				unsigned int orig = inst->U.I.DstReg.Index;
+
+				if (!ta[orig].Allocated) {
+					for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+						if (!hwtemps[j])
+							break;
+					}
+					if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+						fprintf(stderr, "Out of hw temporaries\n");
+					} else {
+						ta[orig].Allocated = 1;
+						ta[orig].HwTemp = j;
+						hwtemps[j] = 1;
+
+						if (j >= compiler->code->num_temporaries)
+							compiler->code->num_temporaries = j + 1;
+					}
+				}
+
+				inst->U.I.DstReg.Index = ta[orig].HwTemp;
+			}
+		}
+	}
+}
+
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static int transform_source_conflicts(
+	struct radeon_compiler *c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (opcode->NumSrcRegs == 3) {
+		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+		    || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
+			int tmpreg = rc_find_free_temporary(c);
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = tmpreg;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+			reset_srcreg(&inst->U.I.SrcReg[2]);
+			inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[2].Index = tmpreg;
+		}
+	}
+
+	if (opcode->NumSrcRegs >= 2) {
+		if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
+			int tmpreg = rc_find_free_temporary(c);
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = tmpreg;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+			reset_srcreg(&inst->U.I.SrcReg[1]);
+			inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[1].Index = tmpreg;
+		}
+	}
+
+	return 1;
+}
+
+static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+{
+	int i;
+
+	for(i = 0; i < 32; ++i) {
+		if ((compiler->RequiredOutputs & (1 << i)) &&
+		    !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+			struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+
+			inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+			inst->U.I.DstReg.Index = i;
+			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+			inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+			inst->U.I.SrcReg[0].Index = 0;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+			compiler->Base.Program.OutputsWritten |= 1 << i;
+		}
+	}
+}
+
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+		void (*callback)(void *, unsigned int, unsigned int))
+{
+	struct r300_vertex_program_compiler * c = userdata;
+	int i;
+
+	for(i = 0; i < 32; ++i) {
+		if (c->RequiredOutputs & (1 << i))
+			callback(data, i, RC_MASK_XYZW);
+	}
+}
+
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	(void) opcode;
+	(void) reg;
+
+	return 1;
+}
+
+static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
+{
+	if (c->Base.Debug) {
+		fprintf(stderr, "Vertex Program: %s\n", where);
+		rc_print_program(&c->Base.Program);
+	}
+}
+
+
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+	.IsNative = &swizzle_is_native,
+	.Split = 0 /* should never be called */
+};
+
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+{
+	compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
+	addArtificialOutputs(compiler);
+
+	debug_program_log(compiler, "before compilation");
+
+	/* XXX Ideally this should be done only for r3xx, but since
+	 * we don't have branching support for r5xx, we use the emulation
+	 * on all chipsets. */
+	rc_emulate_branches(&compiler->Base);
+
+	debug_program_log(compiler, "after emulate branches");
+
+	{
+		struct radeon_program_transformation transformations[] = {
+			{ &r300_transform_vertex_alu, 0 },
+			{ &r300_transform_trig_scale_vertex, 0 }
+		};
+		radeonLocalTransform(&compiler->Base, 2, transformations);
+	}
+
+	debug_program_log(compiler, "after native rewrite");
+
+	{
+		/* Note: This pass has to be done seperately from ALU rewrite,
+		 * otherwise non-native ALU instructions with source conflits
+		 * will not be treated properly.
+		 */
+		struct radeon_program_transformation transformations[] = {
+			{ &transform_source_conflicts, 0 },
+		};
+		radeonLocalTransform(&compiler->Base, 1, transformations);
+	}
+
+	debug_program_log(compiler, "after source conflict resolve");
+
+	rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+
+	debug_program_log(compiler, "after deadcode");
+
+	rc_dataflow_swizzles(&compiler->Base);
+
+	allocate_temporary_registers(compiler);
+
+	debug_program_log(compiler, "after dataflow");
+
+	translate_vertex_program(compiler);
+
+	rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+
+	compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+	compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+
+	if (compiler->Base.Debug) {
+		fprintf(stderr, "Final vertex program code:\n");
+		r300_vertex_program_dump(compiler->code);
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 0000000000..5800f1a78e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_code.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+	/* R300 vector ops */
+	"                 VE_NO_OP",
+	"           VE_DOT_PRODUCT",
+	"              VE_MULTIPLY",
+	"                   VE_ADD",
+	"          VE_MULTIPLY_ADD",
+	"       VE_DISTANCE_FACTOR",
+	"              VE_FRACTION",
+	"               VE_MAXIMUM",
+	"               VE_MINIMUM",
+	"VE_SET_GREATER_THAN_EQUAL",
+	"         VE_SET_LESS_THAN",
+	"        VE_MULTIPLYX2_ADD",
+	"        VE_MULTIPLY_CLAMP",
+	"            VE_FLT2FIX_DX",
+	"        VE_FLT2FIX_DX_RND",
+	/* R500 vector ops */
+	"      VE_PRED_SET_EQ_PUSH",
+	"      VE_PRED_SET_GT_PUSH",
+	"     VE_PRED_SET_GTE_PUSH",
+	"     VE_PRED_SET_NEQ_PUSH",
+	"         VE_COND_WRITE_EQ",
+	"         VE_COND_WRITE_GT",
+	"        VE_COND_WRITE_GTE",
+	"        VE_COND_WRITE_NEQ",
+	"           VE_COND_MUX_EQ",
+	"           VE_COND_MUX_GT",
+	"          VE_COND_MUX_GTE",
+	"      VE_SET_GREATER_THAN",
+	"             VE_SET_EQUAL",
+	"         VE_SET_NOT_EQUAL",
+	"               (reserved)",
+	"               (reserved)",
+	"               (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+	/* R300 math ops */
+	"                 ME_NO_OP",
+	"          ME_EXP_BASE2_DX",
+	"          ME_LOG_BASE2_DX",
+	"          ME_EXP_BASEE_FF",
+	"        ME_LIGHT_COEFF_DX",
+	"         ME_POWER_FUNC_FF",
+	"              ME_RECIP_DX",
+	"              ME_RECIP_FF",
+	"         ME_RECIP_SQRT_DX",
+	"         ME_RECIP_SQRT_FF",
+	"              ME_MULTIPLY",
+	"     ME_EXP_BASE2_FULL_DX",
+	"     ME_LOG_BASE2_FULL_DX",
+	" ME_POWER_FUNC_FF_CLAMP_B",
+	"ME_POWER_FUNC_FF_CLAMP_B1",
+	"ME_POWER_FUNC_FF_CLAMP_01",
+	"                   ME_SIN",
+	"                   ME_COS",
+	/* R500 math ops */
+	"        ME_LOG_BASE2_IEEE",
+	"            ME_RECIP_IEEE",
+	"       ME_RECIP_SQRT_IEEE",
+	"           ME_PRED_SET_EQ",
+	"           ME_PRED_SET_GT",
+	"          ME_PRED_SET_GTE",
+	"          ME_PRED_SET_NEQ",
+	"          ME_PRED_SET_CLR",
+	"          ME_PRED_SET_INV",
+	"          ME_PRED_SET_POP",
+	"      ME_PRED_SET_RESTORE",
+	"               (reserved)",
+	"               (reserved)",
+	"               (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+	"t",
+	"i",
+	"c",
+	"a",
+};
+
+static char* r300_vs_dst_debug[] = {
+	"t",
+	"a0",
+	"o",
+	"ox",
+	"a",
+	"i",
+	"u",
+	"u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+	"X",
+	"Y",
+	"Z",
+	"W",
+	"0",
+	"1",
+	"U",
+	"U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+	fprintf(stderr, " dst: %d%s op: ",
+			(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+	if (op & 0x80) {
+		if (op & 0x1) {
+			fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
+		} else {
+			fprintf(stderr, "   PVS_MACRO_OP_2CLK_MADD\n");
+		}
+	} else if (op & 0x40) {
+		fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
+	} else {
+		fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
+	}
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+	fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+			(src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
+			src & (1 << 25) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 13) & 0x7],
+			src & (1 << 26) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 16) & 0x7],
+			src & (1 << 27) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 19) & 0x7],
+			src & (1 << 28) ? "-" : " ",
+			r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+{
+	unsigned instrcount = vs->length / 4;
+	unsigned i;
+
+	for(i = 0; i < instrcount; i++) {
+		unsigned offset = i*4;
+		unsigned src;
+
+		fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
+		r300_vs_op_dump(vs->body.d[offset]);
+
+		for(src = 0; src < 3; ++src) {
+			fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+			r300_vs_src_dump(vs->body.d[offset+1+src]);
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
new file mode 100644
index 0000000000..632f0bcf4f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_IF)
+		return 0;
+
+	struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+	inst_mov->U.I.DstReg.WriteMask = 0;
+	inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
+	inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+	inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
+			RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
+
+	inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+	inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+	inst->U.I.SrcReg[0].Negate = 0;
+
+	return 1;
+}
+
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+	unsigned int relevant;
+	int i;
+
+	if (opcode == RC_OPCODE_TEX ||
+	    opcode == RC_OPCODE_TXB ||
+	    opcode == RC_OPCODE_TXP ||
+	    opcode == RC_OPCODE_KIL) {
+		if (reg.Abs)
+			return 0;
+
+		if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+			return 0;
+
+		if (reg.Negate)
+			reg.Negate ^= RC_MASK_XYZW;
+
+		for(i = 0; i < 4; ++i) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, i);
+			if (swz == RC_SWIZZLE_UNUSED) {
+				reg.Negate &= ~(1 << i);
+				continue;
+			}
+			if (swz >= 4)
+				return 0;
+		}
+
+		if (reg.Negate)
+			return 0;
+
+		return 1;
+	} else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
+		/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+		 * if it doesn't fit perfectly into a .xyzw case... */
+		if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+			return 1;
+
+		return 0;
+	} else {
+		/* ALU instructions support almost everything */
+		if (reg.Abs)
+			return 1;
+
+		relevant = 0;
+		for(i = 0; i < 3; ++i) {
+			unsigned int swz = GET_SWZ(reg.Swizzle, i);
+			if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
+				relevant |= 1 << i;
+		}
+		if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+			return 0;
+
+		return 1;
+	}
+}
+
+/**
+ * Split source register access.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation.
+ */
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+		struct rc_swizzle_split * split)
+{
+	unsigned int negatebase[2] = { 0, 0 };
+	int i;
+
+	for(i = 0; i < 4; ++i) {
+		unsigned int swz = GET_SWZ(src.Swizzle, i);
+		if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
+			continue;
+		negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+	}
+
+	split->NumPhases = 0;
+
+	for(i = 0; i <= 1; ++i) {
+		if (!negatebase[i])
+			continue;
+
+		split->Phase[split->NumPhases++] = negatebase[i];
+	}
+}
+
+struct rc_swizzle_caps r500_swizzle_caps = {
+	.IsNative = r500_swizzle_is_native,
+	.Split = r500_swizzle_split
+};
+
+static char *toswiz(int swiz_val) {
+  switch(swiz_val) {
+  case 0: return "R";
+  case 1: return "G";
+  case 2: return "B";
+  case 3: return "A";
+  case 4: return "0";
+  case 5: return "H";
+  case 6: return "1";
+  case 7: return "U";
+  }
+  return NULL;
+}
+
+static char *toop(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP3"; break;
+  case 2: str = "DP4"; break;
+  case 3: str = "D2A"; break;
+  case 4: str = "MIN"; break;
+  case 5: str = "MAX"; break;
+  case 6: str = "Reserved"; break;
+  case 7: str = "CND"; break;
+  case 8: str = "CMP"; break;
+  case 9: str = "FRC"; break;
+  case 10: str = "SOP"; break;
+  case 11: str = "MDH"; break;
+  case 12: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+  char *str = NULL;
+  switch (op_val) {
+  case 0: str = "MAD"; break;
+  case 1: str = "DP"; break;
+  case 2: str = "MIN"; break;
+  case 3: str = "MAX"; break;
+  case 4: str = "Reserved"; break;
+  case 5: str = "CND"; break;
+  case 6: str = "CMP"; break;
+  case 7: str = "FRC"; break;
+  case 8: str = "EX2"; break;
+  case 9: str = "LN2"; break;
+  case 10: str = "RCP"; break;
+  case 11: str = "RSQ"; break;
+  case 12: str = "SIN"; break;
+  case 13: str = "COS"; break;
+  case 14: str = "MDH"; break;
+  case 15: str = "MDV"; break;
+  }
+  return str;
+}
+
+static char *to_mask(int val)
+{
+  char *str = NULL;
+  switch(val) {
+  case 0: str = "NONE"; break;
+  case 1: str = "R"; break;
+  case 2: str = "G"; break;
+  case 3: str = "RG"; break;
+  case 4: str = "B"; break;
+  case 5: str = "RB"; break;
+  case 6: str = "GB"; break;
+  case 7: str = "RGB"; break;
+  case 8: str = "A"; break;
+  case 9: str = "AR"; break;
+  case 10: str = "AG"; break;
+  case 11: str = "ARG"; break;
+  case 12: str = "AB"; break;
+  case 13: str = "ARB"; break;
+  case 14: str = "AGB"; break;
+  case 15: str = "ARGB"; break;
+  }
+  return str;
+}
+
+static char *to_texop(int val)
+{
+  switch(val) {
+  case 0: return "NOP";
+  case 1: return "LD";
+  case 2: return "TEXKILL";
+  case 3: return "PROJ";
+  case 4: return "LODBIAS";
+  case 5: return "LOD";
+  case 6: return "DXDY";
+  }
+  return NULL;
+}
+
+void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+  struct r500_fragment_program_code *code = &c->code.r500;
+  fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+  int n;
+  uint32_t inst;
+  uint32_t inst0;
+  char *str = NULL;
+
+  for (n = 0; n < code->inst_end+1; n++) {
+    inst0 = inst = code->inst[n].inst0;
+    fprintf(stderr,"%d\t0:CMN_INST   0x%08x:", n, inst);
+    switch(inst & 0x3) {
+    case R500_INST_TYPE_ALU: str = "ALU"; break;
+    case R500_INST_TYPE_OUT: str = "OUT"; break;
+    case R500_INST_TYPE_FC: str = "FC"; break;
+    case R500_INST_TYPE_TEX: str = "TEX"; break;
+    };
+    fprintf(stderr,"%s %s %s %s %s ", str,
+	    inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+	    inst & R500_INST_LAST ? "LAST" : "",
+	    inst & R500_INST_NOP ? "NOP" : "",
+	    inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+    fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+	    to_mask((inst >> 15) & 0xf));
+
+    switch(inst0 & 0x3) {
+    case 0:
+    case 1:
+      fprintf(stderr,"\t1:RGB_ADDR   0x%08x:", code->inst[n].inst1);
+      inst = code->inst[n].inst1;
+
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+	      inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+	      (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+	      (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+	      (inst >> 30));
+
+      fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+	      inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+	      (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+	      (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+	      (inst >> 30));
+      fprintf(stderr,"\t3 RGB_INST:  0x%08x:", code->inst[n].inst3);
+      inst = code->inst[n].inst3;
+      fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
+	      (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+	      (inst >> 11) & 0x3,
+	      (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+	      (inst >> 24) & 0x3, (inst >> 29) & 0x3);
+
+
+      fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+      inst = code->inst[n].inst4;
+      fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
+	      (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+	      (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+	      (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+	      (inst >> 29) & 0x3,
+	      (inst >> 31) & 0x1);
+
+      fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+      inst = code->inst[n].inst5;
+      fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+	      (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+	      (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+	      (inst >> 23) & 0x3,
+	      (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+      break;
+    case 2:
+      break;
+    case 3:
+      inst = code->inst[n].inst1;
+      fprintf(stderr,"\t1:TEX_INST:  0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+	      to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+	      (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+      inst = code->inst[n].inst2;
+      fprintf(stderr,"\t2:TEX_ADDR:  0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+	      inst & 127, inst & (1<<7) ? "(rel)" : "",
+	      toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+	      toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+	      (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+	      toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+	      toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+      fprintf(stderr,"\t3:TEX_DXDY:  0x%08x\n", code->inst[n].inst3);
+      break;
+    }
+    fprintf(stderr,"\n");
+  }
+
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
new file mode 100644
index 0000000000..4efbae7ba6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+extern struct rc_swizzle_caps r500_swizzle_caps;
+
+extern int r500_transform_IF(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
new file mode 100644
index 0000000000..fb2d8b5a9c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \author Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+
+
+#define PROG_CODE \
+	struct r500_fragment_program_code *code = &c->code->code.r500
+
+#define error(fmt, args...) do {			\
+		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
+			__FILE__, __FUNCTION__, ##args);	\
+	} while(0)
+
+
+struct branch_info {
+	int If;
+	int Else;
+	int Endif;
+};
+
+struct emit_state {
+	struct radeon_compiler * C;
+	struct r500_fragment_program_code * Code;
+
+	struct branch_info * Branches;
+	unsigned int CurrentBranchDepth;
+	unsigned int BranchesReserved;
+
+	unsigned int MaxBranchDepth;
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+	default:
+		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+	}
+}
+
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+	switch(opcode) {
+	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
+	default:
+		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+		/* fall through */
+	case RC_OPCODE_NOP:
+		/* fall through */
+	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
+	}
+}
+
+static unsigned int fix_hw_swizzle(unsigned int swz)
+{
+    switch (swz) {
+        case RC_SWIZZLE_ZERO:
+        case RC_SWIZZLE_UNUSED:
+            swz = 4;
+            break;
+        case RC_SWIZZLE_HALF:
+            swz = 5;
+            break;
+        case RC_SWIZZLE_ONE:
+            swz = 6;
+            break;
+    }
+
+	return swz;
+}
+
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
+{
+	unsigned int t = inst->RGB.Arg[arg].Source;
+	int comp;
+	t |= inst->RGB.Arg[arg].Negate << 11;
+	t |= inst->RGB.Arg[arg].Abs << 12;
+
+	for(comp = 0; comp < 3; ++comp)
+		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+	return t;
+}
+
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
+{
+	unsigned int t = inst->Alpha.Arg[i].Source;
+	t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
+	t |= inst->Alpha.Arg[i].Negate << 5;
+	t |= inst->Alpha.Arg[i].Abs << 6;
+	return t;
+}
+
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+	switch(func) {
+	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+	default:
+		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+		return 0;
+	}
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
+{
+	if (index > code->max_temp_idx)
+		code->max_temp_idx = index;
+}
+
+static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+	if (src.File == RC_FILE_CONSTANT) {
+		return src.Index | 0x100;
+	} else if (src.File == RC_FILE_TEMPORARY) {
+		use_temporary(code, src.Index);
+		return src.Index;
+	}
+
+	return 0;
+}
+
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+	PROG_CODE;
+
+	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+		code->inst[ip].inst0 |= R500_INST_NOP;
+	}
+}
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
+{
+	PROG_CODE;
+
+	if (code->inst_end >= 511) {
+		error("emit_alu: Too many instructions");
+		return;
+	}
+
+	int ip = ++code->inst_end;
+
+	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+		if (ip > 0) {
+			alu_nop(c, ip - 1);
+		}
+	}
+
+	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
+
+	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
+		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+		if (inst->WriteALUResult) {
+			error("%s: cannot write output and ALU result at the same time");
+			return;
+		}
+	} else {
+		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+	}
+	code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
+	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+	if (inst->Alpha.DepthWriteMask) {
+		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+		c->code->writes_depth = 1;
+	}
+
+	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+	use_temporary(code, inst->Alpha.DestIndex);
+	use_temporary(code, inst->RGB.DestIndex);
+
+	if (inst->RGB.Saturate)
+		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+	if (inst->Alpha.Saturate)
+		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+
+	if (inst->WriteALUResult) {
+		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+		if (inst->WriteALUResult == RC_ALURESULT_X)
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+		else
+			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+	}
+}
+
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
+{
+	unsigned int swiz = 0;
+	int i;
+	for (i = 0; i < 4; i++)
+		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
+	return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
+{
+	PROG_CODE;
+
+	if (code->inst_end >= 511) {
+		error("emit_tex: Too many instructions");
+		return 0;
+	}
+
+	int ip = ++code->inst_end;
+
+	code->inst[ip].inst0 = R500_INST_TYPE_TEX
+		| (inst->DstReg.WriteMask << 11)
+		| R500_INST_TEX_SEM_WAIT;
+	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+		| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
+
+	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+	switch (inst->Opcode) {
+	case RC_OPCODE_KIL:
+		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+		break;
+	case RC_OPCODE_TEX:
+		code->inst[ip].inst1 |= R500_TEX_INST_LD;
+		break;
+	case RC_OPCODE_TXB:
+		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+		break;
+	case RC_OPCODE_TXP:
+		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+		break;
+	default:
+		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
+	}
+
+	use_temporary(code, inst->SrcReg[0].Index);
+	if (inst->Opcode != RC_OPCODE_KIL)
+		use_temporary(code, inst->DstReg.Index);
+
+	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+		| R500_TEX_DST_ADDR(inst->DstReg.Index)
+		| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
+		| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+
+	return 1;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+	if (s->Code->inst_end >= 511) {
+		rc_error(s->C, "emit_tex: Too many instructions");
+		return;
+	}
+
+	unsigned int newip = ++s->Code->inst_end;
+
+	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+	if (inst->U.I.Opcode == RC_OPCODE_IF) {
+		if (s->CurrentBranchDepth >= 32) {
+			rc_error(s->C, "Branch depth exceeds hardware limit");
+			return;
+		}
+
+		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
+
+		struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+		branch->If = newip;
+		branch->Else = -1;
+		branch->Endif = -1;
+
+		if (s->CurrentBranchDepth > s->MaxBranchDepth)
+			s->MaxBranchDepth = s->CurrentBranchDepth;
+
+		/* actual instruction is filled in at ENDIF time */
+	} else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Else = newip;
+
+		/* actual instruction is filled in at ENDIF time */
+	} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+		if (!s->CurrentBranchDepth) {
+			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+			return;
+		}
+
+		struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+		branch->Endif = newip;
+
+		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
+		;
+
+		if (branch->Else >= 0) {
+			/* increment branch counter also if jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+				| R500_FC_A_OP_NONE /* no address stack */
+				| R500_FC_B_ELSE /* all active pixels want to jump */
+				| R500_FC_B_OP0_NONE /* no counter op if stay */
+				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+				| R500_FC_B_POP_CNT(1)
+			;
+			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		} else {
+			/* don't touch branch counter on jump */
+			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+		}
+
+		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+			| R500_FC_A_OP_NONE /* no address stack */
+			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+			| R500_FC_B_OP1_NONE /* no branch counter if stay */
+			| R500_FC_B_POP_CNT(1)
+		;
+		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+
+		s->CurrentBranchDepth--;
+	} else {
+		rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
+	}
+}
+
+void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+	struct emit_state s;
+	struct r500_fragment_program_code *code = &compiler->code->code.r500;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &compiler->Base;
+	s.Code = code;
+
+	memset(code, 0, sizeof(*code));
+	code->max_temp_idx = 1;
+	code->inst_end = -1;
+
+	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+	    inst = inst->Next) {
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+			if (opcode->IsFlowControl) {
+				emit_flowcontrol(&s, inst);
+			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+				continue;
+			} else {
+				emit_tex(compiler, &inst->U.I);
+			}
+		} else {
+			emit_paired(compiler, &inst->U.P);
+		}
+	}
+
+	if (code->max_temp_idx >= 128)
+		rc_error(&compiler->Base, "Too many hardware temporaries used");
+
+	if (compiler->Base.Error)
+		return;
+
+	if (code->inst_end == -1 ||
+	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+		/* This may happen when dead-code elimination is disabled or
+		 * when most of the fragment program logic is leading to a KIL */
+		if (code->inst_end >= 511) {
+			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+			return;
+		}
+
+		int ip = ++code->inst_end;
+		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+	}
+
+	if (s.MaxBranchDepth >= 4) {
+		if (code->max_temp_idx < 1)
+			code->max_temp_idx = 1;
+
+		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
new file mode 100644
index 0000000000..0eab18c344
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_code.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+	memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+	dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+	memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+	dst->Count = src->Count;
+	dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+	free(c->Constants);
+	memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+	unsigned index = c->Count;
+
+	if (c->Count >= c->_Reserved) {
+		struct rc_constant * newlist;
+
+		c->_Reserved = c->_Reserved * 2;
+		if (!c->_Reserved)
+			c->_Reserved = 16;
+
+		newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+		memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+		free(c->Constants);
+		c->Constants = newlist;
+	}
+
+	c->Constants[index] = *constant;
+	c->Count++;
+
+	return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+	unsigned index;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+			if (c->Constants[index].u.State[0] == state0 &&
+			    c->Constants[index].u.State[1] == state1)
+				return index;
+		}
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_STATE;
+	constant.Size = 4;
+	constant.u.State[0] = state0;
+	constant.u.State[1] = state1;
+
+	return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+	unsigned index;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+			if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+				return index;
+		}
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_IMMEDIATE;
+	constant.Size = 4;
+	memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+	return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+	unsigned index;
+	int free_index = -1;
+	struct rc_constant constant;
+
+	for(index = 0; index < c->Count; ++index) {
+		if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+			unsigned comp;
+			for(comp = 0; comp < c->Constants[index].Size; ++comp) {
+				if (c->Constants[index].u.Immediate[comp] == data) {
+					*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+					return index;
+				}
+			}
+
+			if (c->Constants[index].Size < 4)
+				free_index = index;
+		}
+	}
+
+	if (free_index >= 0) {
+		unsigned comp = c->Constants[free_index].Size++;
+		c->Constants[free_index].u.Immediate[comp] = data;
+		*swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+		return free_index;
+	}
+
+	memset(&constant, 0, sizeof(constant));
+	constant.Type = RC_CONSTANT_IMMEDIATE;
+	constant.Size = 1;
+	constant.u.Immediate[0] = data;
+	*swizzle = RC_SWIZZLE_XXXX;
+
+	return rc_constants_add(c, &constant);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
new file mode 100644
index 0000000000..1979e7e4e4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST     64
+#define R300_PFS_MAX_TEX_INST     32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS    32
+#define R300_PFS_NUM_CONST_REGS   32
+
+#define R500_PFS_MAX_INST         512
+#define R500_PFS_NUM_TEMP_REGS    128
+#define R500_PFS_NUM_CONST_REGS   256
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+	/**
+	 * External constants are constants whose meaning is unknown to this
+	 * compiler. For example, a Mesa gl_program's constants are turned
+	 * into external constants.
+	 */
+	RC_CONSTANT_EXTERNAL = 0,
+
+	RC_CONSTANT_IMMEDIATE,
+
+	/**
+	 * Constant referring to state that is known by this compiler,
+	 * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+	 */
+	RC_CONSTANT_STATE
+};
+
+enum {
+	RC_STATE_SHADOW_AMBIENT = 0,
+
+	RC_STATE_R300_WINDOW_DIMENSION,
+	RC_STATE_R300_TEXRECT_FACTOR,
+	RC_STATE_R300_VIEWPORT_SCALE,
+	RC_STATE_R300_VIEWPORT_OFFSET
+};
+
+struct rc_constant {
+	unsigned Type:2; /**< RC_CONSTANT_xxx */
+	unsigned Size:3;
+
+	union {
+		unsigned External;
+		float Immediate[4];
+		unsigned State[2];
+	} u;
+};
+
+struct rc_constant_list {
+	struct rc_constant * Constants;
+	unsigned Count;
+
+	unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+
+/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+	RC_COMPARE_FUNC_NEVER = 0,
+	RC_COMPARE_FUNC_LESS,
+	RC_COMPARE_FUNC_EQUAL,
+	RC_COMPARE_FUNC_LEQUAL,
+	RC_COMPARE_FUNC_GREATER,
+	RC_COMPARE_FUNC_NOTEQUAL,
+	RC_COMPARE_FUNC_GEQUAL,
+	RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
+ * Coordinate wrapping modes.
+ *
+ * These are not quite the same as their GL counterparts yet.
+ */
+typedef enum {
+	RC_WRAP_NONE = 0,
+	RC_WRAP_REPEAT,
+	RC_WRAP_MIRRORED_REPEAT,
+	RC_WRAP_MIRRORED_CLAMP
+} rc_wrap_mode;
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+	struct {
+		/**
+		 * If the sampler is used as a shadow sampler,
+		 * this field is:
+		 *  0 - GL_LUMINANCE
+		 *  1 - GL_INTENSITY
+		 *  2 - GL_ALPHA
+		 * depending on the depth texture mode.
+		 */
+		unsigned depth_texture_swizzle:12;
+
+		/**
+		 * If the sampler is used as a shadow sampler,
+		 * this field specifies the compare function.
+		 *
+		 * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
+		 * \sa rc_compare_func
+		 */
+		unsigned texture_compare_func : 3;
+
+		/**
+		 * No matter what the sampler type is,
+		 * this field turns it into a shadow sampler.
+		 */
+		unsigned compare_mode_enabled : 1;
+
+		/**
+		 * If the sampler needs to fake NPOT, this field is set.
+		 */
+		unsigned fake_npot : 1;
+
+		/**
+		 * If the sampler will recieve non-normalized coords,
+		 * this field is set.
+		 */
+		unsigned non_normalized_coords : 1;
+
+		/**
+		 * This field specifies wrapping modes for the sampler.
+		 *
+		 * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
+		 * will be performed on the coordinates.
+		 */
+		unsigned wrap_mode : 2;
+	} unit[16];
+};
+
+
+
+struct r300_fragment_program_node {
+	int tex_offset; /**< first tex instruction */
+	int tex_end; /**< last tex instruction, relative to tex_offset */
+	int alu_offset; /**< first ALU instruction */
+	int alu_end; /**< last ALU instruction, relative to alu_offset */
+	int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+	struct {
+		int length; /**< total # of texture instructions used */
+		uint32_t inst[R300_PFS_MAX_TEX_INST];
+	} tex;
+
+	struct {
+		int length; /**< total # of ALU instructions used */
+		struct {
+			uint32_t rgb_inst;
+			uint32_t rgb_addr;
+			uint32_t alpha_inst;
+			uint32_t alpha_addr;
+		} inst[R300_PFS_MAX_ALU_INST];
+	} alu;
+
+	uint32_t config; /* US_CONFIG */
+	uint32_t pixsize; /* US_PIXSIZE */
+	uint32_t code_offset; /* US_CODE_OFFSET */
+	uint32_t code_addr[4]; /* US_CODE_ADDR */
+};
+
+
+struct r500_fragment_program_code {
+	struct {
+		uint32_t inst0;
+		uint32_t inst1;
+		uint32_t inst2;
+		uint32_t inst3;
+		uint32_t inst4;
+		uint32_t inst5;
+	} inst[R500_PFS_MAX_INST];
+
+	int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+	int max_temp_idx;
+
+	uint32_t us_fc_ctrl;
+};
+
+struct rX00_fragment_program_code {
+	union {
+		struct r300_fragment_program_code r300;
+		struct r500_fragment_program_code r500;
+	} code;
+
+	unsigned writes_depth:1;
+
+	struct rc_constant_list constants;
+};
+
+
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+	int length;
+	union {
+		uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
+		float f[VSF_MAX_FRAGMENT_LENGTH];
+	} body;
+
+	int pos_end;
+	int num_temporaries;	/* Number of temp vars used by program */
+	int inputs[VSF_MAX_INPUTS];
+	int outputs[VSF_MAX_OUTPUTS];
+
+	struct rc_constant_list constants;
+
+	uint32_t InputsRead;
+	uint32_t OutputsWritten;
+};
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+
+#endif /* RADEON_CODE_H */
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
new file mode 100644
index 0000000000..1c8ba864a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "radeon_program.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+	memset(c, 0, sizeof(*c));
+
+	memory_pool_init(&c->Pool);
+	c->Program.Instructions.Prev = &c->Program.Instructions;
+	c->Program.Instructions.Next = &c->Program.Instructions;
+	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+	rc_constants_destroy(&c->Program.Constants);
+	memory_pool_destroy(&c->Pool);
+	free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+	va_list ap;
+
+	if (!c->Debug)
+		return;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+	va_list ap;
+
+	c->Error = 1;
+
+	if (!c->ErrorMsg) {
+		/* Only remember the first error */
+		char buf[1024];
+		int written;
+
+		va_start(ap, fmt);
+		written = vsnprintf(buf, sizeof(buf), fmt, ap);
+		va_end(ap);
+
+		if (written < sizeof(buf)) {
+			c->ErrorMsg = strdup(buf);
+		} else {
+			c->ErrorMsg = malloc(written + 1);
+
+			va_start(ap, fmt);
+			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+			va_end(ap);
+		}
+	}
+
+	if (c->Debug) {
+		fprintf(stderr, "r300compiler error: ");
+
+		va_start(ap, fmt);
+		vfprintf(stderr, fmt, ap);
+		va_end(ap);
+	}
+}
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+	return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+	struct rc_instruction *inst;
+
+	c->Program.InputsRead = 0;
+	c->Program.OutputsWritten = 0;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+	{
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		int i;
+
+		for (i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+				c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+				c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+		}
+	}
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
+{
+	struct rc_instruction * inst;
+
+	c->Program.InputsRead &= ~(1 << input);
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; ++i) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+				inst->U.I.SrcReg[i].File = new_input.File;
+				inst->U.I.SrcReg[i].Index = new_input.Index;
+				inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+				if (!inst->U.I.SrcReg[i].Abs) {
+					inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+					inst->U.I.SrcReg[i].Abs = new_input.Abs;
+				}
+
+				c->Program.InputsRead |= 1 << new_input.Index;
+			}
+		}
+	}
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+	struct rc_instruction * inst;
+
+	c->Program.OutputsWritten &= ~(1 << output);
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+				inst->U.I.DstReg.Index = new_output;
+				inst->U.I.DstReg.WriteMask &= writemask;
+
+				c->Program.OutputsWritten |= 1 << new_output;
+			}
+		}
+	}
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+	unsigned tempreg = rc_find_free_temporary(c);
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst->U.I.DstReg.Index = tempreg;
+			}
+		}
+	}
+
+	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	inst->U.I.DstReg.Index = output;
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = tempreg;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+	inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+	inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+	inst->U.I.DstReg.Index = dup_output;
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = tempreg;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+	c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform)
+{
+	unsigned tempregi = rc_find_free_temporary(c);
+	struct rc_instruction * inst_rcp;
+	struct rc_instruction * inst_mul;
+	struct rc_instruction * inst_mad;
+	struct rc_instruction * inst;
+
+	c->Program.InputsRead &= ~(1 << wpos);
+	c->Program.InputsRead |= 1 << new_input;
+
+	/* perspective divide */
+	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+
+	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_rcp->U.I.DstReg.Index = tempregi;
+	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+
+	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+	inst_rcp->U.I.SrcReg[0].Index = new_input;
+	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+	inst_mul = rc_insert_new_instruction(c, inst_rcp);
+	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+
+	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.DstReg.Index = tempregi;
+	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+	inst_mul->U.I.SrcReg[0].Index = new_input;
+
+	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_mul->U.I.SrcReg[1].Index = tempregi;
+	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+	/* viewport transformation */
+	inst_mad = rc_insert_new_instruction(c, inst_mul);
+	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+
+	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mad->U.I.DstReg.Index = tempregi;
+	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst_mad->U.I.SrcReg[0].Index = tempregi;
+	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+
+	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
+
+	if (full_vtransform) {
+		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
+		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
+	} else {
+		inst_mad->U.I.SrcReg[1].Index =
+		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+	}
+
+	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned i;
+
+		for(i = 0; i < opcode->NumSrcRegs; i++) {
+			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+			    inst->U.I.SrcReg[i].Index == wpos) {
+				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+				inst->U.I.SrcReg[i].Index = tempregi;
+			}
+		}
+	}
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
new file mode 100644
index 0000000000..f15905d79d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "../../../../main/compiler.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+#include "radeon_program.h"
+
+struct rc_swizzle_caps;
+
+struct radeon_compiler {
+	struct memory_pool Pool;
+	struct rc_program Program;
+	unsigned Debug:1;
+	unsigned Error:1;
+	char * ErrorMsg;
+
+	/* Hardware specification. */
+	unsigned is_r500;
+	unsigned max_temp_regs;
+
+	/**
+	 * Variables used internally, not be touched by callers
+	 * of the compiler
+	 */
+	/*@{*/
+	struct rc_swizzle_caps * SwizzleCaps;
+	/*@}*/
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ *  if (rc_assert(c, condition-that-must-be-true))
+ *  	return;
+ */
+#define rc_assert(c, cond) \
+	(!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+                                int full_vtransform);
+
+struct r300_fragment_program_compiler {
+	struct radeon_compiler Base;
+	struct rX00_fragment_program_code *code;
+	/* Optional transformations and features. */
+	struct r300_fragment_program_external_state state;
+	unsigned enable_shadow_ambient;
+	/* Register corresponding to the depthbuffer. */
+	unsigned OutputDepth;
+	/* Registers corresponding to the four colorbuffers. */
+	unsigned OutputColor[4];
+
+	void * UserData;
+	void (*AllocateHwInputs)(
+		struct r300_fragment_program_compiler * c,
+		void (*allocate)(void * data, unsigned input, unsigned hwreg),
+		void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+
+struct r300_vertex_program_compiler {
+	struct radeon_compiler Base;
+	struct r300_vertex_program_code *code;
+	uint32_t RequiredOutputs;
+
+	void * UserData;
+	void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
new file mode 100644
index 0000000000..0e6c62541f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_program.h"
+
+
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
+{
+	struct rc_sub_instruction * inst = &fullinst->U.I;
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		unsigned int refmask = 0;
+
+		if (inst->SrcReg[src].File == RC_FILE_NONE)
+			return;
+
+		for(unsigned int chan = 0; chan < 4; ++chan)
+			refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
+
+		refmask &= RC_MASK_XYZW;
+
+		if (refmask)
+			cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
+
+		if (refmask && inst->SrcReg[src].RelAddr)
+			cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+	}
+}
+
+static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+	unsigned int refmasks[3] = { 0, 0, 0 };
+
+	if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			for(unsigned int chan = 0; chan < 3; ++chan) {
+				unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+				if (swz < 4)
+					refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
+			}
+		}
+	}
+
+	if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			if (inst->Alpha.Arg[arg].Swizzle < 4)
+				refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
+		}
+	}
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+			cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+			   refmasks[src] & RC_MASK_XYZ);
+
+		if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+			cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
+	}
+}
+
+/**
+ * Calls a callback function for all register reads.
+ *
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		reads_normal(inst, cb, userdata);
+	} else {
+		reads_pair(inst, cb, userdata);
+	}
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_sub_instruction * inst = &fullinst->U.I;
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+	if (opcode->HasDstReg && inst->DstReg.WriteMask)
+		cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
+
+	if (inst->WriteALUResult)
+		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+
+	if (inst->RGB.WriteMask)
+		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
+
+	if (inst->Alpha.WriteMask)
+		cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
+
+	if (inst->WriteALUResult)
+		cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+/**
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		writes_normal(inst, cb, userdata);
+	} else {
+		writes_pair(inst, cb, userdata);
+	}
+}
+
+
+struct mask_to_chan_data {
+	void * UserData;
+	rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct mask_to_chan_data * d = data;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		if (GET_BIT(mask, chan))
+			d->Fn(d->UserData, inst, file, index, chan);
+	}
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+	struct mask_to_chan_data d;
+	d.UserData = userdata;
+	d.Fn = cb;
+	rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+	struct mask_to_chan_data d;
+	d.UserData = userdata;
+	d.Fn = cb;
+	rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
+static void remap_normal_instruction(struct rc_instruction * fullinst,
+		rc_remap_register_fn cb, void * userdata)
+{
+	struct rc_sub_instruction * inst = &fullinst->U.I;
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+	if (opcode->HasDstReg) {
+		rc_register_file file = inst->DstReg.File;
+		unsigned int index = inst->DstReg.Index;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->DstReg.File = file;
+		inst->DstReg.Index = index;
+	}
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		rc_register_file file = inst->SrcReg[src].File;
+		unsigned int index = inst->SrcReg[src].Index;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->SrcReg[src].File = file;
+		inst->SrcReg[src].Index = index;
+	}
+}
+
+static void remap_pair_instruction(struct rc_instruction * fullinst,
+		rc_remap_register_fn cb, void * userdata)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+
+	if (inst->RGB.WriteMask) {
+		rc_register_file file = RC_FILE_TEMPORARY;
+		unsigned int index = inst->RGB.DestIndex;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->RGB.DestIndex = index;
+	}
+
+	if (inst->Alpha.WriteMask) {
+		rc_register_file file = RC_FILE_TEMPORARY;
+		unsigned int index = inst->Alpha.DestIndex;
+
+		cb(userdata, fullinst, &file, &index);
+
+		inst->Alpha.DestIndex = index;
+	}
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used) {
+			rc_register_file file = inst->RGB.Src[src].File;
+			unsigned int index = inst->RGB.Src[src].Index;
+
+			cb(userdata, fullinst, &file, &index);
+
+			inst->RGB.Src[src].File = file;
+			inst->RGB.Src[src].Index = index;
+		}
+
+		if (inst->Alpha.Src[src].Used) {
+			rc_register_file file = inst->Alpha.Src[src].File;
+			unsigned int index = inst->Alpha.Src[src].Index;
+
+			cb(userdata, fullinst, &file, &index);
+
+			inst->Alpha.Src[src].File = file;
+			inst->Alpha.Src[src].Index = index;
+		}
+	}
+}
+
+
+/**
+ * Remap all register accesses according to the given function.
+ * That is, call the function \p cb for each referenced register (both read and written)
+ * and update the given instruction \p inst accordingly
+ * if it modifies its \ref pfile and \ref pindex contents.
+ */
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL)
+		remap_normal_instruction(inst, cb, userdata);
+	else
+		remap_pair_instruction(inst, cb, userdata);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
new file mode 100644
index 0000000000..60a6e192a9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+
+
+/**
+ * Help analyze and modify the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+
+typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
+			rc_register_file * pfile, unsigned int * pindex);
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
+/*@}*/
+
+
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+			void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata);
+void rc_dataflow_swizzles(struct radeon_compiler * c);
+/*@}*/
+
+void rc_optimize(struct radeon_compiler * c);
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644
index 0000000000..e3c2c83c0c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+	unsigned char Output[RC_REGISTER_MAX_INDEX];
+	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+	unsigned char Address;
+	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+	unsigned char WriteMask:4;
+	unsigned char WriteALUResult:1;
+	unsigned char SrcReg[3];
+};
+
+struct branchinfo {
+	unsigned int HaveElse:1;
+
+	struct updatemask_state StoreEndif;
+	struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+	struct radeon_compiler * C;
+	struct instruction_state * Instructions;
+
+	struct updatemask_state R;
+
+	struct branchinfo * BranchStack;
+	unsigned int BranchStackSize;
+	unsigned int BranchStackReserved;
+};
+
+
+static void or_updatemasks(
+	struct updatemask_state * dst,
+	struct updatemask_state * a,
+	struct updatemask_state * b)
+{
+	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+		dst->Output[i] = a->Output[i] | b->Output[i];
+		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+	}
+
+	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+		dst->Special[i] = a->Special[i] | b->Special[i];
+
+	dst->Address = a->Address | b->Address;
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
+			s->BranchStackSize, s->BranchStackReserved, 1);
+
+	struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
+	branch->HaveElse = 0;
+	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+		if (index >= RC_REGISTER_MAX_INDEX) {
+			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+			return 0;
+		}
+
+		if (file == RC_FILE_OUTPUT)
+			return &s->R.Output[index];
+		else
+			return &s->R.Temporary[index];
+	} else if (file == RC_FILE_ADDRESS) {
+		return &s->R.Address;
+	} else if (file == RC_FILE_SPECIAL) {
+		if (index >= RC_NUM_SPECIAL_REGISTERS) {
+			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+			return 0;
+		}
+
+		return &s->R.Special[index];
+	}
+
+	return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+	unsigned char * pused = get_used_ptr(s, file, index);
+	if (pused)
+		*pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	struct instruction_state * insts = &s->Instructions[inst->IP];
+	unsigned int usedmask = 0;
+
+	if (opcode->HasDstReg) {
+		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+		if (pused) {
+			usedmask = *pused & inst->U.I.DstReg.WriteMask;
+			*pused &= ~usedmask;
+		}
+	}
+
+	insts->WriteMask |= usedmask;
+
+	if (inst->U.I.WriteALUResult) {
+		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+		if (pused && *pused) {
+			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+				usedmask |= RC_MASK_X;
+			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+				usedmask |= RC_MASK_W;
+
+			*pused = 0;
+			insts->WriteALUResult = 1;
+		}
+	}
+
+	unsigned int srcmasks[3];
+	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
+
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		unsigned int refmask = 0;
+		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+		insts->SrcReg[src] |= newsrcmask;
+
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			if (GET_BIT(newsrcmask, chan))
+				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+		}
+
+		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+		refmask &= RC_MASK_XYZW;
+
+		if (!refmask)
+			continue;
+
+		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+		if (inst->U.I.SrcReg[src].RelAddr)
+			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+	}
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+	struct deadcode_state * s = data;
+
+	mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+	struct deadcode_state s;
+	unsigned int nr_instructions;
+
+	memset(&s, 0, sizeof(s));
+	s.C = c;
+
+	nr_instructions = rc_recompute_ips(c);
+	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+	dce(userdata, &s, &mark_output_use);
+
+	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Prev) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->IsFlowControl) {
+			if (opcode->Opcode == RC_OPCODE_ENDIF) {
+				push_branch(&s);
+			} else {
+				if (s.BranchStackSize) {
+					struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+
+					if (opcode->Opcode == RC_OPCODE_IF) {
+						or_updatemasks(&s.R,
+								&s.R,
+								branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+						s.BranchStackSize--;
+					} else if (opcode->Opcode == RC_OPCODE_ELSE) {
+						if (branch->HaveElse) {
+							rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+						} else {
+							memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+							memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+							branch->HaveElse = 1;
+						}
+					} else {
+						rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+					}
+				} else {
+					rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__);
+				}
+			}
+		}
+
+		update_instruction(&s, inst);
+	}
+
+	unsigned int ip = 0;
+	for(struct rc_instruction * inst = c->Program.Instructions.Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next, ++ip) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		int dead = 1;
+
+		if (!opcode->HasDstReg) {
+			dead = 0;
+		} else {
+			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+			if (s.Instructions[ip].WriteMask)
+				dead = 0;
+
+			if (s.Instructions[ip].WriteALUResult)
+				dead = 0;
+			else
+				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+		}
+
+		if (dead) {
+			struct rc_instruction * todelete = inst;
+			inst = inst->Prev;
+			rc_remove_instruction(todelete);
+			continue;
+		}
+
+		unsigned int srcmasks[3];
+		unsigned int usemask = s.Instructions[ip].WriteMask;
+
+		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+			usemask |= RC_MASK_X;
+		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+			usemask |= RC_MASK_W;
+
+		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
+
+		for(unsigned int src = 0; src < 3; ++src) {
+			for(unsigned int chan = 0; chan < 4; ++chan) {
+				if (!GET_BIT(srcmasks[src], chan))
+					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+			}
+		}
+	}
+
+	rc_calculate_inputs_outputs(c);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 0000000000..33acbd30f4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+		struct rc_instruction * inst, unsigned src)
+{
+	struct rc_swizzle_split split;
+	unsigned int tempreg = rc_find_free_temporary(c);
+	unsigned int usemask;
+
+	usemask = 0;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+			usemask |= 1 << chan;
+	}
+
+	c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+	for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+		struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+		unsigned int phase_refmask;
+		unsigned int masked_negate;
+
+		mov->U.I.Opcode = RC_OPCODE_MOV;
+		mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		mov->U.I.DstReg.Index = tempreg;
+		mov->U.I.DstReg.WriteMask = split.Phase[phase];
+		mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+
+		phase_refmask = 0;
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			if (!GET_BIT(split.Phase[phase], chan))
+				SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+			else
+				phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+		}
+
+		phase_refmask &= RC_MASK_XYZW;
+
+		masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+		if (masked_negate == 0)
+			mov->U.I.SrcReg[0].Negate = 0;
+		else if (masked_negate == split.Phase[phase])
+			mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+	}
+
+	inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[src].Index = tempreg;
+	inst->U.I.SrcReg[src].Swizzle = 0;
+	inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+	inst->U.I.SrcReg[src].Abs = 0;
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+				GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+	}
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c)
+{
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		unsigned int src;
+
+		for(src = 0; src < opcode->NumSrcRegs; ++src) {
+			if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+				rewrite_source(c, inst, src);
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
new file mode 100644
index 0000000000..863654cf68
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_emulate_branches.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct proxy_info {
+	unsigned int Proxied:1;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct register_proxies {
+	struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
+};
+
+struct branch_info {
+	struct rc_instruction * If;
+	struct rc_instruction * Else;
+};
+
+struct emulate_branch_state {
+	struct radeon_compiler * C;
+
+	struct branch_info * Branches;
+	unsigned int BranchCount;
+	unsigned int BranchReserved;
+};
+
+
+static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+			s->Branches, s->BranchCount, s->BranchReserved, 1);
+
+	DBG("%s\n", __FUNCTION__);
+
+	struct branch_info * branch = &s->Branches[s->BranchCount++];
+	memset(branch, 0, sizeof(struct branch_info));
+	branch->If = inst;
+
+	/* Make a safety copy of the decision register, because we will need
+	 * it at ENDIF time and it might be overwritten in both branches. */
+	struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+	inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+	inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+	inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
+	inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
+	inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+	inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+	inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+	inst->U.I.SrcReg[0].Swizzle = 0;
+	inst->U.I.SrcReg[0].Abs = 0;
+	inst->U.I.SrcReg[0].Negate = 0;
+}
+
+static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	if (!s->BranchCount) {
+		rc_error(s->C, "Encountered ELSE outside of branches");
+		return;
+	}
+
+	DBG("%s\n", __FUNCTION__);
+
+	struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+	branch->Else = inst;
+}
+
+
+struct state_and_proxies {
+	struct emulate_branch_state * S;
+	struct register_proxies * Proxies;
+};
+
+static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
+			rc_register_file file, unsigned int index)
+{
+	if (file == RC_FILE_TEMPORARY) {
+		return &sap->Proxies->Temporary[index];
+	} else {
+		return 0;
+	}
+}
+
+static void scan_write(void * userdata, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int comp)
+{
+	struct state_and_proxies * sap = userdata;
+	struct proxy_info * proxy = get_proxy_info(sap, file, index);
+
+	if (proxy && !proxy->Proxied) {
+		proxy->Proxied = 1;
+		proxy->Index = rc_find_free_temporary(sap->S->C);
+	}
+}
+
+static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
+		rc_register_file * pfile, unsigned int * pindex)
+{
+	struct state_and_proxies * sap = userdata;
+	struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
+
+	if (proxy && proxy->Proxied) {
+		*pfile = RC_FILE_TEMPORARY;
+		*pindex = proxy->Index;
+	}
+}
+
+/**
+ * Redirect all writes in the instruction range [begin, end) to proxy
+ * temporary registers.
+ */
+static void allocate_and_insert_proxies(struct emulate_branch_state * s,
+		struct register_proxies * proxies,
+		struct rc_instruction * begin,
+		struct rc_instruction * end)
+{
+	struct state_and_proxies sap;
+
+	sap.S = s;
+	sap.Proxies = proxies;
+
+	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+		rc_for_all_writes_mask(inst, scan_write, &sap);
+		rc_remap_registers(inst, remap_proxy_function, &sap);
+	}
+
+	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+		if (proxies->Temporary[index].Proxied) {
+			struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
+			inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+			inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.SrcReg[0].Index = index;
+		}
+	}
+}
+
+
+static void inject_cmp(struct emulate_branch_state * s,
+		struct rc_instruction * inst_if,
+		struct rc_instruction * inst_endif,
+		rc_register_file file, unsigned int index,
+		struct proxy_info ifproxy,
+		struct proxy_info elseproxy)
+{
+	struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
+	inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+	inst_cmp->U.I.DstReg.File = file;
+	inst_cmp->U.I.DstReg.Index = index;
+	inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+	inst_cmp->U.I.SrcReg[0].Abs = 1;
+	inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+	inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+	inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
+	inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+	inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
+}
+
+static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	if (!s->BranchCount) {
+		rc_error(s->C, "Encountered ENDIF outside of branches");
+		return;
+	}
+
+	DBG("%s\n", __FUNCTION__);
+
+	struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+	struct register_proxies IfProxies;
+	struct register_proxies ElseProxies;
+
+	memset(&IfProxies, 0, sizeof(IfProxies));
+	memset(&ElseProxies, 0, sizeof(ElseProxies));
+
+	allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
+
+	if (branch->Else)
+		allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
+
+	/* Insert the CMP instructions at the end. */
+	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+		if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
+			inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
+					IfProxies.Temporary[index], ElseProxies.Temporary[index]);
+		}
+	}
+
+	/* Remove all traces of the branch instructions */
+	rc_remove_instruction(branch->If);
+	if (branch->Else)
+		rc_remove_instruction(branch->Else);
+	rc_remove_instruction(inst);
+
+	s->BranchCount--;
+
+	if (VERBOSE) {
+		DBG("Program after ENDIF handling:\n");
+		rc_print_program(&s->C->Program);
+	}
+}
+
+
+struct remap_output_data {
+	unsigned int Output:RC_REGISTER_INDEX_BITS;
+	unsigned int Temporary:RC_REGISTER_INDEX_BITS;
+};
+
+static void remap_output_function(void * userdata, struct rc_instruction * inst,
+		rc_register_file * pfile, unsigned int * pindex)
+{
+	struct remap_output_data * data = userdata;
+
+	if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
+		*pfile = RC_FILE_TEMPORARY;
+		*pindex = data->Temporary;
+	}
+}
+
+
+/**
+ * Output registers cannot be read from and so cannot be dealt with like
+ * temporary registers.
+ *
+ * We do the simplest thing: If an output registers is written within
+ * a branch, then *all* writes to this register are proxied to a
+ * temporary register, and a final MOV is appended to the end of
+ * the program.
+ */
+static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+	if (!s->BranchCount)
+		return;
+
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+	if (!opcode->HasDstReg)
+		return;
+
+	if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
+		struct remap_output_data remap;
+
+		remap.Output = inst->U.I.DstReg.Index;
+		remap.Temporary = rc_find_free_temporary(s->C);
+
+		for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+		    inst != &s->C->Program.Instructions;
+		    inst = inst->Next) {
+			rc_remap_registers(inst, &remap_output_function, &remap);
+		}
+
+		struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
+		inst_mov->U.I.DstReg.Index = remap.Output;
+		inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
+	}
+}
+
+/**
+ * Remove branch instructions; instead, execute both branches
+ * on different register sets and choose between their results
+ * using CMP instructions in place of the original ENDIF.
+ */
+void rc_emulate_branches(struct radeon_compiler * c)
+{
+	struct emulate_branch_state s;
+
+	memset(&s, 0, sizeof(s));
+	s.C = c;
+
+	/* Untypical loop because we may remove the current instruction */
+	struct rc_instruction * ptr = c->Program.Instructions.Next;
+	while(ptr != &c->Program.Instructions) {
+		struct rc_instruction * inst = ptr;
+		ptr = ptr->Next;
+
+		if (inst->Type == RC_INSTRUCTION_NORMAL) {
+			switch(inst->U.I.Opcode) {
+			case RC_OPCODE_IF:
+				handle_if(&s, inst);
+				break;
+			case RC_OPCODE_ELSE:
+				handle_else(&s, inst);
+				break;
+			case RC_OPCODE_ENDIF:
+				handle_endif(&s, inst);
+				break;
+			default:
+				fix_output_writes(&s, inst);
+				break;
+			}
+		} else {
+			rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
new file mode 100644
index 0000000000..e07279f093
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_EMULATE_BRANCHES_H
+#define RADEON_EMULATE_BRANCHES_H
+
+struct radeon_compiler;
+
+void rc_emulate_branches(struct radeon_compiler * c);
+
+#endif /* RADEON_EMULATE_BRANCHES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 0000000000..4c5d29f421
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct emulate_loop_state {
+	struct radeon_compiler * C;
+	struct loop_info * Loops;
+	unsigned int LoopCount;
+	unsigned int LoopReserved;
+};
+
+struct loop_info {
+	struct rc_instruction * BeginLoop;
+	struct rc_instruction * Cond;
+	struct rc_instruction * If;
+	struct rc_instruction * Brk;
+	struct rc_instruction * EndIf;
+	struct rc_instruction * EndLoop;
+};
+
+struct const_value {
+	
+	struct radeon_compiler * C;
+	struct rc_src_register * Src;
+	float Value;
+	int HasValue;
+};
+
+struct count_inst {
+	struct radeon_compiler * C;
+	int Index;
+	rc_swizzle Swz;
+	float Amount;
+	int Unknown;
+};
+
+static float get_constant_value(struct radeon_compiler * c,
+						struct rc_src_register * src,
+						int chan)
+{
+	float base = 1.0f;
+	int swz = GET_SWZ(src->Swizzle, chan);
+	if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
+		rc_error(c, "get_constant_value: Can't find a value.\n");
+		return 0.0f;
+	}
+	if(GET_BIT(src->Negate, chan)){
+		base = -1.0f;
+	}
+	return base *
+		c->Program.Constants.Constants[src->Index].u.Immediate[swz];
+}
+
+static int src_reg_is_immediate(struct rc_src_register * src,
+						struct radeon_compiler * c)
+{
+	return src->File == RC_FILE_CONSTANT &&
+	c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
+}
+
+static unsigned int loop_count_instructions(struct loop_info * loop)
+{
+	unsigned int count = 0;
+	struct rc_instruction * inst = loop->BeginLoop->Next;
+	while(inst != loop->EndLoop){
+		count++;
+		inst = inst->Next;
+	}
+	return count;
+}
+
+static unsigned int loop_calc_iterations(struct loop_info * loop,
+		unsigned int loop_count, unsigned int max_instructions)
+{
+	unsigned int icount = loop_count_instructions(loop);
+	return max_instructions / (loop_count * icount);
+}
+
+static void loop_unroll(struct emulate_loop_state * s,
+			struct loop_info *loop, unsigned int iterations)
+{
+	unsigned int i;
+	struct rc_instruction * ptr;
+	struct rc_instruction * first = loop->BeginLoop->Next;
+	struct rc_instruction * last = loop->EndLoop->Prev;
+	struct rc_instruction * append_to = last;
+	rc_remove_instruction(loop->BeginLoop);
+	rc_remove_instruction(loop->EndLoop);
+	for( i = 1; i < iterations; i++){
+		for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+			struct rc_instruction *new = rc_alloc_instruction(s->C);
+			memcpy(new, ptr, sizeof(struct rc_instruction));
+			rc_insert_instruction(append_to, new);
+			append_to = new;
+		}
+	}
+}
+
+
+static void update_const_value(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct const_value * value = data;
+	if(value->Src->File != file ||
+	   value->Src->Index != index ||
+	   !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
+	   	return;
+	}
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_MOV:
+		if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
+			return;
+		}
+		value->HasValue = 1;
+		value->Value =
+			get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
+		break;
+	}
+}
+
+static void get_incr_amount(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct count_inst * count_inst = data;
+	int amnt_src_index;
+	const struct rc_opcode_info * opcode;
+	float amount;
+
+	if(file != RC_FILE_TEMPORARY ||
+	   count_inst->Index != index ||
+	   (1 << GET_SWZ(count_inst->Swz,0) != mask)){
+	   	return;
+	}
+	/* Find the index of the counter register. */
+	opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	if(opcode->NumSrcRegs != 2){
+		count_inst->Unknown = 1;
+		return;
+	}
+	if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
+	   inst->U.I.SrcReg[0].Index == count_inst->Index &&
+	   inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
+		amnt_src_index = 1;
+	} else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
+		   inst->U.I.SrcReg[1].Index == count_inst->Index &&
+		   inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
+		amnt_src_index = 0;
+	}
+	else{
+		count_inst->Unknown = 1;
+		return;
+	}
+	if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
+							count_inst->C)){
+		amount = get_constant_value(count_inst->C,
+				&inst->U.I.SrcReg[amnt_src_index], 0);
+	}
+	else{
+		count_inst->Unknown = 1 ;
+		return;
+	}
+	switch(inst->U.I.Opcode){
+	case RC_OPCODE_ADD:
+		count_inst->Amount += amount;
+		break;
+	case RC_OPCODE_SUB:
+		if(amnt_src_index == 0){
+			count_inst->Unknown = 0;
+			return;
+		}
+		count_inst->Amount -= amount;
+		break;
+	default:
+		count_inst->Unknown = 1;
+		return;
+	}
+	
+}
+
+static int transform_const_loop(struct emulate_loop_state * s,
+						struct loop_info * loop,
+						struct rc_instruction * cond)
+{
+	int end_loops = 1;
+	int iterations;
+	struct count_inst count_inst;
+	float limit_value;
+	struct rc_src_register * counter;
+	struct rc_src_register * limit;
+	struct const_value counter_value;
+	struct rc_instruction * inst;
+
+	/* Find the counter and the upper limit */
+	
+	if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
+		limit = &cond->U.I.SrcReg[0];
+		counter = &cond->U.I.SrcReg[1];
+	}
+	else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
+		limit = &cond->U.I.SrcReg[1];
+		counter = &cond->U.I.SrcReg[0];
+	}
+	else{
+		DBG("No constant limit.\n");
+		return 0;
+	}
+	
+	/* Find the initial value of the counter */
+	counter_value.Src = counter;
+	counter_value.Value = 0.0f;
+	counter_value.HasValue = 0;
+	counter_value.C = s->C;
+	for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+							inst = inst->Next){
+		rc_for_all_writes_mask(inst, update_const_value, &counter_value);
+	}
+	if(!counter_value.HasValue){
+		DBG("Initial counter value cannot be determined.\n");
+		return 0;
+	}
+	DBG("Initial counter value is %f\n", counter_value.Value);
+	/* Determine how the counter is modified each loop */
+	count_inst.C = s->C;
+	count_inst.Index = counter->Index;
+	count_inst.Swz = counter->Swizzle;
+	count_inst.Amount = 0.0f;
+	count_inst.Unknown = 0;
+	for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
+		switch(inst->U.I.Opcode){
+		/* XXX In the future we might want to try to unroll nested
+		 * loops here.*/
+		case RC_OPCODE_BGNLOOP:
+			end_loops++;
+			break;
+		case RC_OPCODE_ENDLOOP:
+			loop->EndLoop = inst;
+			end_loops--;
+			break;
+		/* XXX Check if the counter is modified within an if statement.
+		 */
+		case RC_OPCODE_IF:
+			break;
+		default:
+			rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
+			if(count_inst.Unknown){
+				return 0;
+			}
+			break;
+		}
+	}
+	/* Infinite loop */
+	if(count_inst.Amount == 0.0f){
+		return 0;
+	}
+	DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
+	/* Calculate the number of iterations of this loop.  Keeping this
+	 * simple, since we only support increment and decrement loops.
+	 */
+	limit_value = get_constant_value(s->C, limit, 0);
+	iterations = (int) ((limit_value - counter_value.Value) /
+							count_inst.Amount);
+
+	DBG("Loop will have %d iterations.\n", iterations);
+	
+	/* Prepare loop for unrolling */
+	rc_remove_instruction(loop->Cond);
+	rc_remove_instruction(loop->If);
+	rc_remove_instruction(loop->Brk);
+	rc_remove_instruction(loop->EndIf);
+	
+	loop_unroll(s, loop, iterations);
+	loop->EndLoop = NULL;
+	return 1;
+}
+
+/** 
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement.  Here is an outline of the conversion process:
+ * BGNLOOP;                         	-> BGNLOOP;
+ * <Additional conditional code>	-> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2];	-> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0];                      	-> IF temp[0];
+ * BRK;                             	->
+ * ENDIF;                           	-> <Loop Body>
+ * <Loop Body>                      	-> ENDIF;
+ * ENDLOOP;                         	-> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return If the loop can be unrolled, a pointer to the first instruction of
+ * 		the unrolled loop.
+ * 	   Otherwise, A pointer to the ENDLOOP instruction.
+ * 	   Null if there is an error.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+						struct rc_instruction * inst)
+{
+	struct loop_info *loop;
+	struct rc_instruction * ptr;
+
+	memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+			s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+	loop = &s->Loops[s->LoopCount++];
+	memset(loop, 0, sizeof(struct loop_info));
+	if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+		rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
+		return NULL;
+	}
+	loop->BeginLoop = inst;
+
+	for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+		switch(ptr->U.I.Opcode){
+		case RC_OPCODE_BGNLOOP:
+			/* Nested loop */
+			ptr = transform_loop(s, ptr);
+			if(!ptr){
+				return NULL;
+			}
+			break;
+		case RC_OPCODE_BRK:
+			loop->Brk = ptr;
+			if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
+				rc_error(s->C,
+					"%s: expected ENDIF\n",__FUNCTION__);
+				return NULL;
+			}
+			loop->EndIf = ptr->Next;
+			if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
+				rc_error(s->C,
+					"%s: expected IF\n", __FUNCTION__);
+				return NULL;
+			}
+			loop->If = ptr->Prev;
+			switch(loop->If->Prev->U.I.Opcode){
+			case RC_OPCODE_SLT:
+			case RC_OPCODE_SGE:
+			case RC_OPCODE_SGT:
+			case RC_OPCODE_SLE:
+			case RC_OPCODE_SEQ:
+			case RC_OPCODE_SNE:
+				break;
+			default:
+				rc_error(s->C, "%s expected conditional\n",
+								__FUNCTION__);
+				return NULL;
+			}
+			loop->Cond = loop->If->Prev;
+			ptr = loop->EndIf;
+			break;
+		case RC_OPCODE_ENDLOOP:
+			loop->EndLoop = ptr;
+			break;
+		}
+	}
+	/* Reverse the conditional instruction */
+	switch(loop->Cond->U.I.Opcode){
+	case RC_OPCODE_SGE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
+		break;
+	case RC_OPCODE_SLT:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
+		break;
+	case RC_OPCODE_SLE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
+		break;
+	case RC_OPCODE_SGT:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
+		break;
+	case RC_OPCODE_SEQ:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
+		break;
+	case RC_OPCODE_SNE:
+		loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+		break;
+	default:
+		rc_error(s->C, "loop->Cond is not a conditional.\n");
+		return NULL;
+	}
+	
+	/* Check if the number of loops is known at compile time. */
+	if(transform_const_loop(s, loop, ptr)){
+		return loop->BeginLoop->Next;
+	}
+
+	/* Prepare the loop to be unrolled */
+	rc_remove_instruction(loop->Brk);
+	rc_remove_instruction(loop->EndIf);
+	rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+	return loop->EndLoop;
+}
+
+static void rc_transform_loops(struct emulate_loop_state * s)
+{
+	struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+	while(ptr != &s->C->Program.Instructions) {
+		if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+					ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+			ptr = transform_loop(s, ptr);
+			if(!ptr){
+				return;
+			}
+		}
+		ptr = ptr->Next;
+	}
+}
+
+static void rc_unroll_loops(struct emulate_loop_state *s,
+						unsigned int max_instructions)
+{
+	int i;
+	/* Iterate backwards of the list of loops so that loops that nested
+	 * loops are unrolled first.
+	 */
+	for( i = s->LoopCount - 1; i >= 0; i-- ){
+		if(!s->Loops[i].EndLoop){
+			continue;
+		}
+		unsigned int iterations = loop_calc_iterations(&s->Loops[i],
+						s->LoopCount, max_instructions);
+		loop_unroll(s, &s->Loops[i], iterations);
+	}
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
+{
+	struct emulate_loop_state s;
+
+	memset(&s, 0, sizeof(struct emulate_loop_state));
+	s.C = c;
+
+	/* We may need to move these two operations to r3xx_(vert|frag)prog.c
+	 * and run the optimization passes between them in order to increase
+	 * the number of unrolls we can do for each loop.
+	 */
+	rc_transform_loops(&s);
+	
+	rc_unroll_loops(&s, max_instructions);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 0000000000..ddcf1c0fab
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,12 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
new file mode 100644
index 0000000000..1dc16855dc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+	{
+		.Opcode = RC_OPCODE_NOP,
+		.Name = "NOP"
+	},
+	{
+		.Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+		.Name = "ILLEGAL OPCODE"
+	},
+	{
+		.Opcode = RC_OPCODE_ABS,
+		.Name = "ABS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ADD,
+		.Name = "ADD",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ARL,
+		.Name = "ARL",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CEIL,
+		.Name = "CEIL",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_CMP,
+		.Name = "CMP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_COS,
+		.Name = "COS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DDX,
+		.Name = "DDX",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DDY,
+		.Name = "DDY",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP3,
+		.Name = "DP3",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DP4,
+		.Name = "DP4",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DPH,
+		.Name = "DPH",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_DST,
+		.Name = "DST",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_EX2,
+		.Name = "EX2",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_EXP,
+		.Name = "EXP",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_FLR,
+		.Name = "FLR",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_FRC,
+		.Name = "FRC",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_KIL,
+		.Name = "KIL",
+		.NumSrcRegs = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LG2,
+		.Name = "LG2",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LIT,
+		.Name = "LIT",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LOG,
+		.Name = "LOG",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_LRP,
+		.Name = "LRP",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MAD,
+		.Name = "MAD",
+		.NumSrcRegs = 3,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MAX,
+		.Name = "MAX",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MIN,
+		.Name = "MIN",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MOV,
+		.Name = "MOV",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_MUL,
+		.Name = "MUL",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_POW,
+		.Name = "POW",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_RCP,
+		.Name = "RCP",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_RSQ,
+		.Name = "RSQ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SCS,
+		.Name = "SCS",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SEQ,
+		.Name = "SEQ",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SFL,
+		.Name = "SFL",
+		.NumSrcRegs = 0,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SGE,
+		.Name = "SGE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SGT,
+		.Name = "SGT",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SIN,
+		.Name = "SIN",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsStandardScalar = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SLE,
+		.Name = "SLE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SLT,
+		.Name = "SLT",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SNE,
+		.Name = "SNE",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SUB,
+		.Name = "SUB",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_SWZ,
+		.Name = "SWZ",
+		.NumSrcRegs = 1,
+		.HasDstReg = 1,
+		.IsComponentwise = 1
+	},
+	{
+		.Opcode = RC_OPCODE_XPD,
+		.Name = "XPD",
+		.NumSrcRegs = 2,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TEX,
+		.Name = "TEX",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXB,
+		.Name = "TXB",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXD,
+		.Name = "TXD",
+		.HasTexture = 1,
+		.NumSrcRegs = 3,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXL,
+		.Name = "TXL",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_TXP,
+		.Name = "TXP",
+		.HasTexture = 1,
+		.NumSrcRegs = 1,
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_IF,
+		.Name = "IF",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 1
+	},
+	{
+		.Opcode = RC_OPCODE_ELSE,
+		.Name = "ELSE",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_ENDIF,
+		.Name = "ENDIF",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_BGNLOOP,
+		.Name = "BGNLOOP",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_BRK,
+		.Name = "BRK",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0
+	},
+	{
+		.Opcode = RC_OPCODE_ENDLOOP,
+		.Name = "ENDLOOP",
+		.IsFlowControl = 1,
+		.NumSrcRegs = 0,
+	},
+	{
+		.Opcode = RC_OPCODE_REPL_ALPHA,
+		.Name = "REPL_ALPHA",
+		.HasDstReg = 1
+	},
+	{
+		.Opcode = RC_OPCODE_BEGIN_TEX,
+		.Name = "BEGIN_TEX"
+	}
+};
+
+void rc_compute_sources_for_writemask(
+		const struct rc_instruction *inst,
+		unsigned int writemask,
+		unsigned int *srcmasks)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	srcmasks[0] = 0;
+	srcmasks[1] = 0;
+	srcmasks[2] = 0;
+
+	if (opcode->Opcode == RC_OPCODE_KIL)
+		srcmasks[0] |= RC_MASK_XYZW;
+	else if (opcode->Opcode == RC_OPCODE_IF)
+		srcmasks[0] |= RC_MASK_X;
+
+	if (!writemask)
+		return;
+
+	if (opcode->IsComponentwise) {
+		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+			srcmasks[src] |= writemask;
+	} else if (opcode->IsStandardScalar) {
+		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+			srcmasks[src] |= RC_MASK_X;
+	} else {
+		switch(opcode->Opcode) {
+		case RC_OPCODE_ARL:
+			srcmasks[0] |= RC_MASK_X;
+			break;
+		case RC_OPCODE_DP3:
+			srcmasks[0] |= RC_MASK_XYZ;
+			srcmasks[1] |= RC_MASK_XYZ;
+			break;
+		case RC_OPCODE_DP4:
+			srcmasks[0] |= RC_MASK_XYZW;
+			srcmasks[1] |= RC_MASK_XYZW;
+			break;
+		case RC_OPCODE_TXB:
+		case RC_OPCODE_TXP:
+			srcmasks[0] |= RC_MASK_W;
+			/* Fall through */
+		case RC_OPCODE_TEX:
+			switch (inst->U.I.TexSrcTarget) {
+				case RC_TEXTURE_1D:
+					srcmasks[0] |= RC_MASK_X;
+					break;
+				case RC_TEXTURE_2D:
+				case RC_TEXTURE_RECT:
+				case RC_TEXTURE_1D_ARRAY:
+					srcmasks[0] |= RC_MASK_XY;
+					break;
+				case RC_TEXTURE_3D:
+				case RC_TEXTURE_CUBE:
+				case RC_TEXTURE_2D_ARRAY:
+					srcmasks[0] |= RC_MASK_XYZ;
+					break;
+			}
+			break;
+		case RC_OPCODE_DST:
+			srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
+			srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
+			break;
+		case RC_OPCODE_EXP:
+		case RC_OPCODE_LOG:
+			srcmasks[0] |= RC_MASK_XY;
+			break;
+		case RC_OPCODE_LIT:
+			srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
+			break;
+		default:
+			break;
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
new file mode 100644
index 0000000000..91c82ac089
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+	RC_OPCODE_NOP = 0,
+	RC_OPCODE_ILLEGAL_OPCODE,
+
+	/** vec4 instruction: dst.c = abs(src0.c); */
+	RC_OPCODE_ABS,
+
+	/** vec4 instruction: dst.c = src0.c + src1.c; */
+	RC_OPCODE_ADD,
+
+	/** special instruction: load address register
+	 * dst.x = floor(src.x), where dst must be an address register */
+	RC_OPCODE_ARL,
+
+	/** vec4 instruction: dst.c = ceil(src0.c) */
+	RC_OPCODE_CEIL,
+
+	/** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+	RC_OPCODE_CMP,
+
+	/** scalar instruction: dst = cos(src0.x) */
+	RC_OPCODE_COS,
+
+	/** special instruction: take vec4 partial derivative in X direction
+	 * dst.c = d src0.c / dx */
+	RC_OPCODE_DDX,
+
+	/** special instruction: take vec4 partial derivative in Y direction
+	 * dst.c = d src0.c / dy */
+	RC_OPCODE_DDY,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+	RC_OPCODE_DP3,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+	RC_OPCODE_DP4,
+
+	/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+	RC_OPCODE_DPH,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_DST,
+
+	/** scalar instruction: dst = 2**src0.x */
+	RC_OPCODE_EX2,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_EXP,
+
+	/** vec4 instruction: dst.c = floor(src0.c) */
+	RC_OPCODE_FLR,
+
+	/** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+	RC_OPCODE_FRC,
+
+	/** special instruction: stop execution if any component of src0 is negative */
+	RC_OPCODE_KIL,
+
+	/** scalar instruction: dst = log_2(src0.x) */
+	RC_OPCODE_LG2,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_LIT,
+
+	/** special instruction, see ARB_vertex_program */
+	RC_OPCODE_LOG,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+	RC_OPCODE_LRP,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+	RC_OPCODE_MAD,
+
+	/** vec4 instruction: dst.c = max(src0.c, src1.c) */
+	RC_OPCODE_MAX,
+
+	/** vec4 instruction: dst.c = min(src0.c, src1.c) */
+	RC_OPCODE_MIN,
+
+	/** vec4 instruction: dst.c = src0.c */
+	RC_OPCODE_MOV,
+
+	/** vec4 instruction: dst.c = src0.c*src1.c */
+	RC_OPCODE_MUL,
+
+	/** scalar instruction: dst = src0.x ** src1.x */
+	RC_OPCODE_POW,
+
+	/** scalar instruction: dst = 1 / src0.x */
+	RC_OPCODE_RCP,
+
+	/** scalar instruction: dst = 1 / sqrt(src0.x) */
+	RC_OPCODE_RSQ,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_SCS,
+
+	/** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SEQ,
+
+	/** vec4 instruction: dst.c = 0.0 */
+	RC_OPCODE_SFL,
+
+	/** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SGE,
+
+	/** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SGT,
+
+	/** scalar instruction: dst = sin(src0.x) */
+	RC_OPCODE_SIN,
+
+	/** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SLE,
+
+	/** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SLT,
+
+	/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+	RC_OPCODE_SNE,
+
+	/** vec4 instruction: dst.c = src0.c - src1.c */
+	RC_OPCODE_SUB,
+
+	/** vec4 instruction: dst.c = src0.c */
+	RC_OPCODE_SWZ,
+
+	/** special instruction, see ARB_fragment_program */
+	RC_OPCODE_XPD,
+
+	RC_OPCODE_TEX,
+	RC_OPCODE_TXB,
+	RC_OPCODE_TXD,
+	RC_OPCODE_TXL,
+	RC_OPCODE_TXP,
+
+	/** branch instruction:
+	 * If src0.x != 0.0, continue with the next instruction;
+	 * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+	 */
+	RC_OPCODE_IF,
+
+	/** branch instruction: jump to matching RC_OPCODE_ENDIF */
+	RC_OPCODE_ELSE,
+
+	/** branch instruction: has no effect */
+	RC_OPCODE_ENDIF,
+	
+	RC_OPCODE_BGNLOOP,
+
+	RC_OPCODE_BRK,
+
+	RC_OPCODE_ENDLOOP,
+
+	/** special instruction, used in R300-R500 fragment program pair instructions
+	 * indicates that the result of the alpha operation shall be replicated
+	 * across all other channels */
+	RC_OPCODE_REPL_ALPHA,
+
+	/** special instruction, used in R300-R500 fragment programs
+	 * to indicate the start of a block of texture instructions that
+	 * can run simultaneously. */
+	RC_OPCODE_BEGIN_TEX,
+
+	MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+	rc_opcode Opcode;
+	const char * Name;
+
+	/** true if the instruction reads from a texture.
+	 *
+	 * \note This is false for the KIL instruction, even though KIL is
+	 * a texture instruction from a hardware point of view. */
+	unsigned int HasTexture:1;
+
+	unsigned int NumSrcRegs:2;
+	unsigned int HasDstReg:1;
+
+	/** true if this instruction affects control flow */
+	unsigned int IsFlowControl:1;
+
+	/** true if this is a vector instruction that operates on components in parallel
+	 * without any cross-component interaction */
+	unsigned int IsComponentwise:1;
+
+	/** true if this instruction sources only its operands X components
+	 * to compute one result which is smeared across all output channels */
+	unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+	assert((unsigned int)opcode < MAX_RC_OPCODE);
+	assert(rc_opcodes[opcode].Opcode == opcode);
+
+	return &rc_opcodes[opcode];
+}
+
+struct rc_instruction;
+
+void rc_compute_sources_for_writemask(
+		const struct rc_instruction *inst,
+		unsigned int writemask,
+		unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
new file mode 100644
index 0000000000..21d7210888
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+	struct rc_src_register combine;
+	combine.File = inner.File;
+	combine.Index = inner.Index;
+	combine.RelAddr = inner.RelAddr;
+	if (outer.Abs) {
+		combine.Abs = 1;
+		combine.Negate = outer.Negate;
+	} else {
+		combine.Abs = inner.Abs;
+		combine.Negate = 0;
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			unsigned int swz = GET_SWZ(outer.Swizzle, chan);
+			if (swz < 4)
+				combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
+		}
+		combine.Negate ^= outer.Negate;
+	}
+	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+	return combine;
+}
+
+struct peephole_state {
+	struct radeon_compiler * C;
+	struct rc_instruction * Mov;
+	unsigned int Conflict:1;
+
+	/** Whether Mov's source has been clobbered */
+	unsigned int SourceClobbered:1;
+
+	/** Which components of Mov's destination register are still from that Mov? */
+	unsigned int MovMask:4;
+
+	/** Which components of Mov's destination register are clearly *not* from that Mov */
+	unsigned int DefinedMask:4;
+
+	/** Which components of Mov's source register are sourced */
+	unsigned int SourcedMask:4;
+
+	/** Branch depth beyond Mov; negative value indicates we left the Mov's block */
+	int BranchDepth;
+};
+
+static void peephole_scan_read(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct peephole_state * s = data;
+
+	if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+		return;
+
+	/* These instructions cannot read from the constants file.
+	 * see radeonTransformTEX()
+	 */
+	if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+			s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+				(inst->U.I.Opcode == RC_OPCODE_TEX ||
+				inst->U.I.Opcode == RC_OPCODE_TXB ||
+				inst->U.I.Opcode == RC_OPCODE_TXP ||
+				inst->U.I.Opcode == RC_OPCODE_KIL)){
+		s->Conflict = 1;
+		return;
+	}
+	if ((mask & s->MovMask) == mask) {
+		if (s->SourceClobbered) {
+			s->Conflict = 1;
+		}
+	} else if ((mask & s->DefinedMask) == mask) {
+		/* read from something entirely written by other instruction: this is okay */
+	} else {
+		/* read from component combination that is not well-defined without
+		 * the MOV: cannot remove it */
+		s->Conflict = 1;
+	}
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct peephole_state * s = data;
+
+	if (s->BranchDepth < 0)
+		return;
+
+	if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
+		s->MovMask &= ~mask;
+		if (s->BranchDepth == 0)
+			s->DefinedMask |= mask;
+		else
+			s->DefinedMask &= ~mask;
+	}
+	if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+		if (mask & s->SourcedMask)
+			s->SourceClobbered = 1;
+	} else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
+		s->SourceClobbered = 1;
+	}
+}
+
+static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+	struct peephole_state s;
+
+	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
+		return;
+
+	memset(&s, 0, sizeof(s));
+	s.C = c;
+	s.Mov = inst_mov;
+	s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+	s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
+		s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
+	}
+
+	/* 1st pass: Check whether all subsequent readers can be changed */
+	for(struct rc_instruction * inst = inst_mov->Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next) {
+		rc_for_all_reads_mask(inst, peephole_scan_read, &s);
+		rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+		if (s.Conflict)
+			return;
+
+		if (s.BranchDepth >= 0) {
+			if (inst->U.I.Opcode == RC_OPCODE_IF) {
+				s.BranchDepth++;
+			} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+				s.BranchDepth--;
+				if (s.BranchDepth < 0) {
+					s.DefinedMask &= ~s.MovMask;
+					s.MovMask = 0;
+				}
+			}
+		}
+	}
+
+	if (s.Conflict)
+		return;
+
+	/* 2nd pass: We can satisfy all readers, so switch them over all at once */
+	s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+	s.BranchDepth = 0;
+
+	for(struct rc_instruction * inst = inst_mov->Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+			if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
+			    inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
+				unsigned int refmask = 0;
+
+				for(unsigned int chan = 0; chan < 4; ++chan) {
+					unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+					refmask |= (1 << swz) & RC_MASK_XYZW;
+				}
+
+				if ((refmask & s.MovMask) == refmask)
+					inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+			}
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+			    inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
+				s.MovMask &= ~inst->U.I.DstReg.WriteMask;
+			}
+		}
+
+		if (s.BranchDepth >= 0) {
+			if (inst->U.I.Opcode == RC_OPCODE_IF) {
+				s.BranchDepth++;
+			} else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+				s.BranchDepth--;
+				if (s.BranchDepth < 0)
+					break; /* no more readers after this point */
+			}
+		}
+	}
+
+	/* Finally, remove the original MOV instruction */
+	rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+		rc_swizzle * pswz, unsigned int * pnegate)
+{
+	int have_used = 0;
+
+	if (src.File != RC_FILE_NONE) {
+		*pswz = 0;
+		return 0;
+	}
+
+	for(unsigned int chan = 0; chan < 4; ++chan) {
+		unsigned int swz = GET_SWZ(src.Swizzle, chan);
+		if (swz < 4) {
+			*pswz = 0;
+			return 0;
+		}
+		if (swz == RC_SWIZZLE_UNUSED)
+			continue;
+
+		if (!have_used) {
+			*pswz = swz;
+			*pnegate = GET_BIT(src.Negate, chan);
+			have_used = 1;
+		} else {
+			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+				*pswz = 0;
+				return 0;
+			}
+		}
+	}
+
+	return 1;
+}
+
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+	rc_swizzle swz;
+	unsigned int negate;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MUL;
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_ADD;
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_ADD;
+			if (negate)
+				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+			return;
+		}
+	}
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+	rc_swizzle swz;
+	unsigned int negate;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ONE) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			if (negate)
+				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+			return;
+		} else if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+			return;
+		}
+	}
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+	rc_swizzle swz;
+	unsigned int negate;
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+			return;
+		}
+	}
+
+	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+		if (swz == RC_SWIZZLE_ZERO) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+			return;
+		}
+	}
+}
+
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+		    inst->U.I.SrcReg[src].RelAddr ||
+		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+			continue;
+
+		struct rc_constant * constant =
+			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+		if (constant->Type != RC_CONSTANT_IMMEDIATE)
+			continue;
+
+		struct rc_src_register newsrc = inst->U.I.SrcReg[src];
+		int have_real_reference = 0;
+		for(unsigned int chan = 0; chan < 4; ++chan) {
+			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+			if (swz >= 4)
+				continue;
+
+			unsigned int newswz;
+			float imm = constant->u.Immediate[swz];
+			float baseimm = imm;
+			if (imm < 0.0)
+				baseimm = -baseimm;
+
+			if (baseimm == 0.0) {
+				newswz = RC_SWIZZLE_ZERO;
+			} else if (baseimm == 1.0) {
+				newswz = RC_SWIZZLE_ONE;
+			} else if (baseimm == 0.5) {
+				newswz = RC_SWIZZLE_HALF;
+			} else {
+				have_real_reference = 1;
+				continue;
+			}
+
+			SET_SWZ(newsrc.Swizzle, chan, newswz);
+			if (imm < 0.0 && !newsrc.Abs)
+				newsrc.Negate ^= 1 << chan;
+		}
+
+		if (!have_real_reference) {
+			newsrc.File = RC_FILE_NONE;
+			newsrc.Index = 0;
+		}
+
+		/* don't make the swizzle worse */
+		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+		    c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+			continue;
+
+		inst->U.I.SrcReg[src] = newsrc;
+	}
+
+	/* Simplify instructions based on constants */
+	if (inst->U.I.Opcode == RC_OPCODE_MAD)
+		constant_folding_mad(inst);
+
+	/* note: MAD can simplify to MUL or ADD */
+	if (inst->U.I.Opcode == RC_OPCODE_MUL)
+		constant_folding_mul(inst);
+	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+		constant_folding_add(inst);
+}
+
+void rc_optimize(struct radeon_compiler * c)
+{
+	struct rc_instruction * inst = c->Program.Instructions.Next;
+	while(inst != &c->Program.Instructions) {
+		struct rc_instruction * cur = inst;
+		inst = inst->Next;
+
+		constant_folding(c, cur);
+
+		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+			peephole(c, cur);
+			/* cur may no longer be part of the program */
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
new file mode 100644
index 0000000000..8a912da461
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct live_intervals {
+	int Start;
+	int End;
+	struct live_intervals * Next;
+};
+
+struct register_info {
+	struct live_intervals Live;
+
+	unsigned int Used:1;
+	unsigned int Allocated:1;
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct hardware_register {
+	struct live_intervals * Used;
+};
+
+struct regalloc_state {
+	struct radeon_compiler * C;
+
+	struct register_info Input[RC_REGISTER_MAX_INDEX];
+	struct register_info Temporary[RC_REGISTER_MAX_INDEX];
+
+	struct hardware_register * HwTemporary;
+	unsigned int NumHwTemporaries;
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+	if (!src) {
+		DBG("(null)");
+		return;
+	}
+
+	while(src) {
+		DBG("(%i,%i)", src->Start, src->End);
+		src = src->Next;
+	}
+}
+
+static void add_live_intervals(struct regalloc_state * s,
+		struct live_intervals ** dst, struct live_intervals * src)
+{
+	struct live_intervals ** dst_backup = dst;
+
+	if (VERBOSE) {
+		DBG("add_live_intervals: ");
+		print_live_intervals(*dst);
+		DBG(" to ");
+		print_live_intervals(src);
+		DBG("\n");
+	}
+
+	while(src) {
+		if (*dst && (*dst)->End < src->Start) {
+			dst = &(*dst)->Next;
+		} else if (!*dst || (*dst)->Start > src->End) {
+			struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
+			li->Start = src->Start;
+			li->End = src->End;
+			li->Next = *dst;
+			*dst = li;
+			src = src->Next;
+		} else {
+			if (src->End > (*dst)->End)
+				(*dst)->End = src->End;
+			if (src->Start < (*dst)->Start)
+				(*dst)->Start = src->Start;
+			src = src->Next;
+		}
+	}
+
+	if (VERBOSE) {
+		DBG("    result: ");
+		print_live_intervals(*dst_backup);
+		DBG("\n");
+	}
+}
+
+static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
+{
+	if (VERBOSE) {
+		DBG("overlap_live_intervals: ");
+		print_live_intervals(dst);
+		DBG(" to ");
+		print_live_intervals(src);
+		DBG("\n");
+	}
+
+	while(src && dst) {
+		if (dst->End <= src->Start) {
+			dst = dst->Next;
+		} else if (dst->End <= src->End) {
+			DBG("    overlap\n");
+			return 1;
+		} else if (dst->Start < src->End) {
+			DBG("    overlap\n");
+			return 1;
+		} else {
+			src = src->Next;
+		}
+	}
+
+	DBG("    no overlap\n");
+
+	return 0;
+}
+
+static int try_add_live_intervals(struct regalloc_state * s,
+		struct live_intervals ** dst, struct live_intervals * src)
+{
+	if (overlap_live_intervals(*dst, src))
+		return 0;
+
+	add_live_intervals(s, dst, src);
+	return 1;
+}
+
+static void scan_callback(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int mask)
+{
+	struct regalloc_state * s = data;
+	struct register_info * reg;
+
+	if (file == RC_FILE_TEMPORARY)
+		reg = &s->Temporary[index];
+	else if (file == RC_FILE_INPUT)
+		reg = &s->Input[index];
+	else
+		return;
+
+	if (!reg->Used) {
+		reg->Used = 1;
+		if (file == RC_FILE_INPUT)
+			reg->Live.Start = -1;
+		else
+			reg->Live.Start = inst->IP;
+		reg->Live.End = inst->IP;
+	} else {
+		if (inst->IP > reg->Live.End)
+			reg->Live.End = inst->IP;
+	}
+}
+
+static void compute_live_intervals(struct regalloc_state * s)
+{
+	rc_recompute_ips(s->C);
+
+	for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+	    inst != &s->C->Program.Instructions;
+	    inst = inst->Next) {
+		rc_for_all_reads_mask(inst, scan_callback, s);
+		rc_for_all_writes_mask(inst, scan_callback, s);
+	}
+}
+
+static void remap_register(void * data, struct rc_instruction * inst,
+		rc_register_file * file, unsigned int * index)
+{
+	struct regalloc_state * s = data;
+	const struct register_info * reg;
+
+	if (*file == RC_FILE_TEMPORARY)
+		reg = &s->Temporary[*index];
+	else if (*file == RC_FILE_INPUT)
+		reg = &s->Input[*index];
+	else
+		return;
+
+	if (reg->Allocated) {
+		*file = reg->File;
+		*index = reg->Index;
+	}
+}
+
+static void do_regalloc(struct regalloc_state * s)
+{
+	/* Simple and stupid greedy register allocation */
+	for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+		struct register_info * reg = &s->Temporary[index];
+
+		if (!reg->Used)
+			continue;
+
+		for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
+			if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
+				reg->Allocated = 1;
+				reg->File = RC_FILE_TEMPORARY;
+				reg->Index = hwreg;
+				goto success;
+			}
+		}
+
+		rc_error(s->C, "Ran out of hardware temporaries\n");
+		return;
+
+	success:;
+	}
+
+	/* Rewrite all instructions based on the translation table we built */
+	for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+	    inst != &s->C->Program.Instructions;
+	    inst = inst->Next) {
+		rc_remap_registers(inst, &remap_register, s);
+	}
+}
+
+static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+{
+	struct regalloc_state * s = data;
+
+	if (!s->Input[input].Used)
+		return;
+
+	add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+
+	s->Input[input].Allocated = 1;
+	s->Input[input].File = RC_FILE_TEMPORARY;
+	s->Input[input].Index = hwreg;
+
+}
+
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps)
+{
+	struct regalloc_state s;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &c->Base;
+	s.NumHwTemporaries = maxtemps;
+	s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register));
+	memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register));
+
+	compute_live_intervals(&s);
+
+	c->AllocateHwInputs(c, &alloc_input, &s);
+
+	do_regalloc(&s);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
new file mode 100644
index 0000000000..a279549ff8
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+	struct rc_instruction * Instruction;
+
+	/** Next instruction in the linked list of ready instructions. */
+	struct schedule_instruction *NextReady;
+
+	/** Values that this instruction reads and writes */
+	struct reg_value * WriteValues[4];
+	struct reg_value * ReadValues[12];
+	unsigned int NumWriteValues:3;
+	unsigned int NumReadValues:4;
+
+	/**
+	 * Number of (read and write) dependencies that must be resolved before
+	 * this instruction can be scheduled.
+	 */
+	unsigned int NumDependencies:5;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+	struct schedule_instruction *Reader;
+	struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+	struct schedule_instruction * Writer;
+
+	/**
+	 * Unordered linked list of instructions that read from this value.
+	 * When this value becomes available, we increase all readers'
+	 * dependency count.
+	 */
+	struct reg_value_reader *Readers;
+
+	/**
+	 * Number of readers of this value. This is decremented each time
+	 * a reader of the value is committed.
+	 * When the reader cound reaches zero, the dependency count
+	 * of the instruction writing \ref Next is decremented.
+	 */
+	unsigned int NumReaders;
+
+	struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+	struct reg_value * Values[4];
+};
+
+struct schedule_state {
+	struct radeon_compiler * C;
+	struct schedule_instruction * Current;
+
+	struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+	/**
+	 * Linked lists of instructions that can be scheduled right now,
+	 * based on which ALU/TEX resources they require.
+	 */
+	/*@{*/
+	struct schedule_instruction *ReadyFullALU;
+	struct schedule_instruction *ReadyRGB;
+	struct schedule_instruction *ReadyAlpha;
+	struct schedule_instruction *ReadyTEX;
+	/*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	if (file != RC_FILE_TEMPORARY)
+		return 0;
+
+	if (index >= RC_REGISTER_MAX_INDEX) {
+		rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+		return 0;
+	}
+
+	return &s->Temporary[index].Values[chan];
+}
+
+static struct reg_value * get_reg_value(struct schedule_state * s,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+	if (!pv)
+		return 0;
+	return *pv;
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+	inst->NextReady = *list;
+	*list = inst;
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	DBG("%i is now ready\n", sinst->Instruction->IP);
+
+	if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+		add_inst_to_list(&s->ReadyTEX, sinst);
+	else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+		add_inst_to_list(&s->ReadyRGB, sinst);
+	else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+		add_inst_to_list(&s->ReadyAlpha, sinst);
+	else
+		add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	assert(sinst->NumDependencies > 0);
+	sinst->NumDependencies--;
+	if (!sinst->NumDependencies)
+		instruction_ready(s, sinst);
+}
+
+static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+	DBG("%i: commit\n", sinst->Instruction->IP);
+
+	for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
+		struct reg_value * v = sinst->ReadValues[i];
+		assert(v->NumReaders > 0);
+		v->NumReaders--;
+		if (!v->NumReaders) {
+			if (v->Next)
+				decrease_dependencies(s, v->Next->Writer);
+		}
+	}
+
+	for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
+		struct reg_value * v = sinst->WriteValues[i];
+		if (v->NumReaders) {
+			for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
+				decrease_dependencies(s, r->Reader);
+			}
+		} else {
+			/* This happens in instruction sequences of the type
+			 *  OP r.x, ...;
+			 *  OP r.x, r.x, ...;
+			 * See also the subtlety in how instructions that both
+			 * read and write the same register are scanned.
+			 */
+			if (v->Next)
+				decrease_dependencies(s, v->Next->Writer);
+		}
+	}
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+	struct schedule_instruction *readytex;
+
+	assert(s->ReadyTEX);
+
+	/* Don't let the ready list change under us! */
+	readytex = s->ReadyTEX;
+	s->ReadyTEX = 0;
+
+	/* Node marker for R300 */
+	struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+	inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+	/* Link texture instructions back in */
+	while(readytex) {
+		struct schedule_instruction * tex = readytex;
+		readytex = readytex->NextReady;
+
+		rc_insert_instruction(before->Prev, tex->Instruction);
+		commit_instruction(s, tex);
+	}
+}
+
+
+static int destructive_merge_instructions(
+		struct rc_pair_instruction * rgb,
+		struct rc_pair_instruction * alpha)
+{
+	assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+	assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+	/* Copy alpha args into rgb */
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+	for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+		unsigned int srcrgb = 0;
+		unsigned int srcalpha = 0;
+		unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+		rc_register_file file = 0;
+		unsigned int index = 0;
+
+		if (alpha->Alpha.Arg[arg].Swizzle < 3) {
+			srcrgb = 1;
+			file = alpha->RGB.Src[oldsrc].File;
+			index = alpha->RGB.Src[oldsrc].Index;
+		} else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
+			srcalpha = 1;
+			file = alpha->Alpha.Src[oldsrc].File;
+			index = alpha->Alpha.Src[oldsrc].Index;
+		}
+
+		int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+		if (source < 0)
+			return 0;
+
+		rgb->Alpha.Arg[arg].Source = source;
+		rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+		rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+		rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+	}
+
+	/* Copy alpha opcode into rgb */
+	rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+	rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+	rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+	rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+	rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+	rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+	/* Merge ALU result writing */
+	if (alpha->WriteALUResult) {
+		if (rgb->WriteALUResult)
+			return 0;
+
+		rgb->WriteALUResult = alpha->WriteALUResult;
+		rgb->ALUResultCompare = alpha->ALUResultCompare;
+	}
+
+	return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+	struct rc_pair_instruction backup;
+
+	memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+	if (destructive_merge_instructions(rgb, alpha))
+		return 1;
+
+	memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+	return 0;
+}
+
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+	struct schedule_instruction * sinst;
+
+	if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
+		if (s->ReadyFullALU) {
+			sinst = s->ReadyFullALU;
+			s->ReadyFullALU = s->ReadyFullALU->NextReady;
+		} else if (s->ReadyRGB) {
+			sinst = s->ReadyRGB;
+			s->ReadyRGB = s->ReadyRGB->NextReady;
+		} else {
+			sinst = s->ReadyAlpha;
+			s->ReadyAlpha = s->ReadyAlpha->NextReady;
+		}
+
+		rc_insert_instruction(before->Prev, sinst->Instruction);
+		commit_instruction(s, sinst);
+	} else {
+		struct schedule_instruction **prgb;
+		struct schedule_instruction **palpha;
+
+		/* Some pairings might fail because they require too
+		 * many source slots; try all possible pairings if necessary */
+		for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+			for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+				struct schedule_instruction * psirgb = *prgb;
+				struct schedule_instruction * psialpha = *palpha;
+
+				if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+					continue;
+
+				*prgb = (*prgb)->NextReady;
+				*palpha = (*palpha)->NextReady;
+				rc_insert_instruction(before->Prev, psirgb->Instruction);
+				commit_instruction(s, psirgb);
+				commit_instruction(s, psialpha);
+				goto success;
+			}
+		}
+
+		/* No success in pairing; just take the first RGB instruction */
+		sinst = s->ReadyRGB;
+		s->ReadyRGB = s->ReadyRGB->NextReady;
+
+		rc_insert_instruction(before->Prev, sinst->Instruction);
+		commit_instruction(s, sinst);
+	success: ;
+	}
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	struct schedule_state * s = data;
+	struct reg_value * v = get_reg_value(s, file, index, chan);
+
+	if (!v)
+		return;
+
+	if (v->Writer == s->Current) {
+		/* The instruction reads and writes to a register component.
+		 * In this case, we only want to increment dependencies by one.
+		 */
+		return;
+	}
+
+	DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+	struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+	reader->Reader = s->Current;
+	reader->Next = v->Readers;
+	v->Readers = reader;
+	v->NumReaders++;
+
+	s->Current->NumDependencies++;
+
+	if (s->Current->NumReadValues >= 12) {
+		rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+	} else {
+		s->Current->ReadValues[s->Current->NumReadValues++] = v;
+	}
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+		rc_register_file file, unsigned int index, unsigned int chan)
+{
+	struct schedule_state * s = data;
+	struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+
+	if (!pv)
+		return;
+
+	DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+	struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+	memset(newv, 0, sizeof(*newv));
+
+	newv->Writer = s->Current;
+
+	if (*pv) {
+		(*pv)->Next = newv;
+		s->Current->NumDependencies++;
+	}
+
+	*pv = newv;
+
+	if (s->Current->NumWriteValues >= 4) {
+		rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+	} else {
+		s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+	}
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+		struct rc_instruction * begin, struct rc_instruction * end)
+{
+	struct schedule_state s;
+
+	memset(&s, 0, sizeof(s));
+	s.C = &c->Base;
+
+	/* Scan instructions for data dependencies */
+	unsigned int ip = 0;
+	for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+		s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+		memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+		s.Current->Instruction = inst;
+		inst->IP = ip++;
+
+		DBG("%i: Scanning\n", inst->IP);
+
+		/* The order of things here is subtle and maybe slightly
+		 * counter-intuitive, to account for the case where an
+		 * instruction writes to the same register as it reads
+		 * from. */
+		rc_for_all_writes_chan(inst, &scan_write, &s);
+		rc_for_all_reads_chan(inst, &scan_read, &s);
+
+		DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+		if (!s.Current->NumDependencies)
+			instruction_ready(&s, s.Current);
+	}
+
+	/* Temporarily unlink all instructions */
+	begin->Prev->Next = end;
+	end->Prev = begin->Prev;
+
+	/* Schedule instructions back */
+	while(!s.C->Error &&
+	      (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+		if (s.ReadyTEX)
+			emit_all_tex(&s, end);
+
+		while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+			emit_one_alu(&s, end);
+	}
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+	if (inst->Type == RC_INSTRUCTION_NORMAL) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+		return opcode->IsFlowControl;
+	}
+	return 0;
+}
+
+void rc_pair_schedule(struct r300_fragment_program_compiler *c)
+{
+	struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+	while(inst != &c->Base.Program.Instructions) {
+		if (is_controlflow(inst)) {
+			inst = inst->Next;
+			continue;
+		}
+
+		struct rc_instruction * first = inst;
+
+		while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+			inst = inst->Next;
+
+		DBG("Schedule one block\n");
+		schedule_block(c, first, inst);
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
new file mode 100644
index 0000000000..407a0a55ee
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+	struct rc_src_register tmp;
+
+	switch(inst->Opcode) {
+	case RC_OPCODE_ADD:
+		inst->SrcReg[2] = inst->SrcReg[1];
+		inst->SrcReg[1].File = RC_FILE_NONE;
+		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+		inst->SrcReg[1].Negate = RC_MASK_NONE;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	case RC_OPCODE_CMP:
+		tmp = inst->SrcReg[2];
+		inst->SrcReg[2] = inst->SrcReg[0];
+		inst->SrcReg[0] = tmp;
+		break;
+	case RC_OPCODE_MOV:
+		/* AMD say we should use CMP.
+		 * However, when we transform
+		 *  KIL -r0;
+		 * into
+		 *  CMP tmp, -r0, -r0, 0;
+		 *  KIL tmp;
+		 * we get incorrect behaviour on R500 when r0 == 0.0.
+		 * It appears that the R500 KIL hardware treats -0.0 as less
+		 * than zero.
+		 */
+		inst->SrcReg[1].File = RC_FILE_NONE;
+		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+		inst->SrcReg[2].File = RC_FILE_NONE;
+		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	case RC_OPCODE_MUL:
+		inst->SrcReg[2].File = RC_FILE_NONE;
+		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+		inst->Opcode = RC_OPCODE_MAD;
+		break;
+	default:
+		/* nothing to do */
+		break;
+	}
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+	int * needrgb, int * needalpha, int * istranscendent)
+{
+	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+	*istranscendent = 0;
+
+	if (inst->WriteALUResult == RC_ALURESULT_X)
+		*needrgb = 1;
+	else if (inst->WriteALUResult == RC_ALURESULT_W)
+		*needalpha = 1;
+
+	switch(inst->Opcode) {
+	case RC_OPCODE_ADD:
+	case RC_OPCODE_CMP:
+	case RC_OPCODE_DDX:
+	case RC_OPCODE_DDY:
+	case RC_OPCODE_FRC:
+	case RC_OPCODE_MAD:
+	case RC_OPCODE_MAX:
+	case RC_OPCODE_MIN:
+	case RC_OPCODE_MOV:
+	case RC_OPCODE_MUL:
+		break;
+	case RC_OPCODE_COS:
+	case RC_OPCODE_EX2:
+	case RC_OPCODE_LG2:
+	case RC_OPCODE_RCP:
+	case RC_OPCODE_RSQ:
+	case RC_OPCODE_SIN:
+		*istranscendent = 1;
+		*needalpha = 1;
+		break;
+	case RC_OPCODE_DP4:
+		*needalpha = 1;
+		/* fall through */
+	case RC_OPCODE_DP3:
+		*needrgb = 1;
+		break;
+	default:
+		break;
+	}
+}
+
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+	struct rc_pair_instruction * pair,
+	struct rc_sub_instruction * inst)
+{
+	memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+	int needrgb, needalpha, istranscendent;
+	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+	if (needrgb) {
+		if (istranscendent)
+			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+		else
+			pair->RGB.Opcode = inst->Opcode;
+		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+			pair->RGB.Saturate = 1;
+	}
+	if (needalpha) {
+		pair->Alpha.Opcode = inst->Opcode;
+		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+			pair->Alpha.Saturate = 1;
+	}
+
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+	int i;
+
+	for(i = 0; i < opcode->NumSrcRegs; ++i) {
+		int source;
+		if (needrgb && !istranscendent) {
+			unsigned int srcrgb = 0;
+			unsigned int srcalpha = 0;
+			int j;
+			for(j = 0; j < 3; ++j) {
+				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+				if (swz < 3)
+					srcrgb = 1;
+				else if (swz < 4)
+					srcalpha = 1;
+			}
+			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+							inst->SrcReg[i].File, inst->SrcReg[i].Index);
+			pair->RGB.Arg[i].Source = source;
+			pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+			pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+		}
+		if (needalpha) {
+			unsigned int srcrgb = 0;
+			unsigned int srcalpha = 0;
+			unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+			if (swz < 3)
+				srcrgb = 1;
+			else if (swz < 4)
+				srcalpha = 1;
+			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+							inst->SrcReg[i].File, inst->SrcReg[i].Index);
+			pair->Alpha.Arg[i].Source = source;
+			pair->Alpha.Arg[i].Swizzle = swz;
+			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+			pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+		}
+	}
+
+	/* Destination handling */
+	if (inst->DstReg.File == RC_FILE_OUTPUT) {
+        if (inst->DstReg.Index == c->OutputDepth) {
+            pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+        } else {
+            for (i = 0; i < 4; i++) {
+                if (inst->DstReg.Index == c->OutputColor[i]) {
+                    pair->RGB.Target = i;
+                    pair->Alpha.Target = i;
+                    pair->RGB.OutputWriteMask |=
+                        inst->DstReg.WriteMask & RC_MASK_XYZ;
+                    pair->Alpha.OutputWriteMask |=
+                        GET_BIT(inst->DstReg.WriteMask, 3);
+                    break;
+                }
+            }
+        }
+	} else {
+		if (needrgb) {
+			pair->RGB.DestIndex = inst->DstReg.Index;
+			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+		}
+		if (needalpha) {
+			pair->Alpha.DestIndex = inst->DstReg.Index;
+			pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+		}
+	}
+
+	if (inst->WriteALUResult) {
+		pair->WriteALUResult = inst->WriteALUResult;
+		pair->ALUResultCompare = inst->ALUResultCompare;
+	}
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct r300_fragment_program_compiler *c)
+{
+	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+	    inst != &c->Base.Program.Instructions;
+	    inst = inst->Next) {
+		if (inst->Type != RC_INSTRUCTION_NORMAL)
+			continue;
+
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+			continue;
+
+		struct rc_sub_instruction copy = inst->U.I;
+
+		final_rewrite(&copy);
+		inst->Type = RC_INSTRUCTION_PAIR;
+		set_pair_instruction(c, &inst->U.P, &copy);
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
new file mode 100644
index 0000000000..a3c41d7bd4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ *  1. Replace it with an empty clause
+ *  2. For every instruction in the original clause, try the given
+ *     transformations in order.
+ *  3. If one of the transformations returns GL_TRUE, assume that it
+ *     has emitted the appropriate instruction(s) into the new clause;
+ *     otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonLocalTransform(
+	struct radeon_compiler * c,
+	int num_transformations,
+	struct radeon_program_transformation* transformations)
+{
+	struct rc_instruction * inst = c->Program.Instructions.Next;
+
+	while(inst != &c->Program.Instructions) {
+		struct rc_instruction * current = inst;
+		int i;
+
+		inst = inst->Next;
+
+		for(i = 0; i < num_transformations; ++i) {
+			struct radeon_program_transformation* t = transformations + i;
+
+			if (t->function(c, current, t->userData))
+				break;
+		}
+	}
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+	struct rc_src_register tmp = srcreg;
+	int i;
+	tmp.Swizzle = 0;
+	tmp.Negate = 0;
+	for(i = 0; i < 4; ++i) {
+		rc_swizzle swz = GET_SWZ(swizzle, i);
+		if (swz < 4) {
+			tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+			tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+		} else {
+			tmp.Swizzle |= swz << (i*3);
+		}
+	}
+	return tmp;
+}
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+{
+	char used[RC_REGISTER_MAX_INDEX];
+	unsigned int i;
+	struct rc_instruction * rcinst;
+
+	memset(used, 0, sizeof(used));
+
+	for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
+		const struct rc_sub_instruction *inst = &rcinst->U.I;
+		const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
+		unsigned int k;
+
+		for (k = 0; k < opcode->NumSrcRegs; k++) {
+			if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
+				used[inst->SrcReg[k].Index] = 1;
+		}
+
+		if (opcode->HasDstReg) {
+			if (inst->DstReg.File == RC_FILE_TEMPORARY)
+				used[inst->DstReg.Index] = 1;
+		}
+	}
+
+	for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
+		if (!used[i])
+			return i;
+	}
+
+	rc_error(c, "Ran out of temporary registers\n");
+	return 0;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+	struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+	memset(inst, 0, sizeof(struct rc_instruction));
+
+	inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+	inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+	inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
+
+	return inst;
+}
+
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
+{
+	inst->Prev = after;
+	inst->Next = after->Next;
+
+	inst->Prev->Next = inst;
+	inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+	struct rc_instruction * inst = rc_alloc_instruction(c);
+
+	rc_insert_instruction(after, inst);
+
+	return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+	inst->Prev->Next = inst->Next;
+	inst->Next->Prev = inst->Prev;
+}
+
+/**
+ * Return the number of instructions in the program.
+ */
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
+{
+	unsigned int ip = 0;
+	struct rc_instruction * inst;
+
+	for(inst = c->Program.Instructions.Next;
+	    inst != &c->Program.Instructions;
+	    inst = inst->Next) {
+		inst->IP = ip++;
+	}
+
+	c->Program.Instructions.IP = 0xcafedead;
+
+	return ip;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
new file mode 100644
index 0000000000..e318867696
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
+
+struct radeon_compiler;
+
+struct rc_src_register {
+	unsigned int File:3;
+
+	/** Negative values may be used for relative addressing. */
+	signed int Index:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int RelAddr:1;
+
+	unsigned int Swizzle:12;
+
+	/** Take the component-wise absolute value */
+	unsigned int Abs:1;
+
+	/** Post-Abs negation. */
+	unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+	unsigned int File:3;
+
+	/** Negative values may be used for relative addressing. */
+	signed int Index:(RC_REGISTER_INDEX_BITS+1);
+	unsigned int RelAddr:1;
+
+	unsigned int WriteMask:4;
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+	struct rc_src_register SrcReg[3];
+	struct rc_dst_register DstReg;
+
+	/**
+	 * Opcode of this instruction, according to \ref rc_opcode enums.
+	 */
+	unsigned int Opcode:8;
+
+	/**
+	 * Saturate each value of the result to the range [0,1] or [-1,1],
+	 * according to \ref rc_saturate_mode enums.
+	 */
+	unsigned int SaturateMode:2;
+
+	/**
+	 * Writing to the special register RC_SPECIAL_ALU_RESULT
+	 */
+	/*@{*/
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
+	/*@}*/
+
+	/**
+	 * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+	 */
+	/*@{*/
+	/** Source texture unit. */
+	unsigned int TexSrcUnit:5;
+
+	/** Source texture target, one of the \ref rc_texture_target enums */
+	unsigned int TexSrcTarget:3;
+
+	/** True if tex instruction should do shadow comparison */
+	unsigned int TexShadow:1;
+	/*@}*/
+};
+
+typedef enum {
+	RC_INSTRUCTION_NORMAL = 0,
+	RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+	struct rc_instruction * Prev;
+	struct rc_instruction * Next;
+
+	rc_instruction_type Type;
+	union {
+		struct rc_sub_instruction I;
+		struct rc_pair_instruction P;
+	} U;
+
+	/**
+	 * Warning: IPs are not stable. If you want to use them,
+	 * you need to recompute them at the beginning of each pass
+	 * using \ref rc_recompute_ips
+	 */
+	unsigned int IP;
+};
+
+struct rc_program {
+	/**
+	 * Instructions.Next points to the first instruction,
+	 * Instructions.Prev points to the last instruction.
+	 */
+	struct rc_instruction Instructions;
+
+	/* Long term, we should probably remove InputsRead & OutputsWritten,
+	 * since updating dependent state can be fragile, and they aren't
+	 * actually used very often. */
+	uint32_t InputsRead;
+	uint32_t OutputsWritten;
+	uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+	struct rc_constant_list Constants;
+};
+
+enum {
+	OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */
+};
+
+
+static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+	if (idx & 0x4)
+		return idx;
+	return GET_SWZ(swz, idx);
+}
+
+static inline unsigned int combine_swizzles4(unsigned int src,
+		rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+	unsigned int ret = 0;
+
+	ret |= get_swz(src, swz_x);
+	ret |= get_swz(src, swz_y) << 3;
+	ret |= get_swz(src, swz_z) << 6;
+	ret |= get_swz(src, swz_w) << 9;
+
+	return ret;
+}
+
+static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+	unsigned int ret = 0;
+
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+	ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+	return ret;
+}
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+static inline void reset_srcreg(struct rc_src_register* reg)
+{
+	memset(reg, 0, sizeof(struct rc_src_register));
+	reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
+
+/**
+ * A transformation that can be passed to \ref radeonLocalTransform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return true, or return false if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+	int (*function)(
+		struct radeon_compiler*,
+		struct rc_instruction*,
+		void*);
+	void *userData;
+};
+
+void radeonLocalTransform(
+	struct radeon_compiler *c,
+	int num_transformations,
+	struct radeon_program_transformation* transformations);
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
+void rc_remove_instruction(struct rc_instruction * inst);
+
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
+void rc_print_program(const struct rc_program *prog);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
new file mode 100644
index 0000000000..c922d3d9a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -0,0 +1,975 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "radeon_compiler.h"
+
+
+static struct rc_instruction *emit1(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg;
+	return fpi;
+}
+
+static struct rc_instruction *emit2(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg0;
+	fpi->U.I.SrcReg[1] = SrcReg1;
+	return fpi;
+}
+
+static struct rc_instruction *emit3(
+	struct radeon_compiler * c, struct rc_instruction * after,
+	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+	struct rc_src_register SrcReg2)
+{
+	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+	fpi->U.I.Opcode = Opcode;
+	fpi->U.I.SaturateMode = Saturate;
+	fpi->U.I.DstReg = DstReg;
+	fpi->U.I.SrcReg[0] = SrcReg0;
+	fpi->U.I.SrcReg[1] = SrcReg1;
+	fpi->U.I.SrcReg[2] = SrcReg2;
+	return fpi;
+}
+
+static struct rc_dst_register dstreg(int file, int index)
+{
+	struct rc_dst_register dst;
+	dst.File = file;
+	dst.Index = index;
+	dst.WriteMask = RC_MASK_XYZW;
+	dst.RelAddr = 0;
+	return dst;
+}
+
+static struct rc_dst_register dstregtmpmask(int index, int mask)
+{
+	struct rc_dst_register dst = {0};
+	dst.File = RC_FILE_TEMPORARY;
+	dst.Index = index;
+	dst.WriteMask = mask;
+	dst.RelAddr = 0;
+	return dst;
+}
+
+static const struct rc_src_register builtin_zero = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_0000
+};
+static const struct rc_src_register builtin_one = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_1111
+};
+static const struct rc_src_register srcreg_undefined = {
+	.File = RC_FILE_NONE,
+	.Index = 0,
+	.Swizzle = RC_SWIZZLE_XYZW
+};
+
+static struct rc_src_register srcreg(int file, int index)
+{
+	struct rc_src_register src = srcreg_undefined;
+	src.File = file;
+	src.Index = index;
+	return src;
+}
+
+static struct rc_src_register srcregswz(int file, int index, int swz)
+{
+	struct rc_src_register src = srcreg_undefined;
+	src.File = file;
+	src.Index = index;
+	src.Swizzle = swz;
+	return src;
+}
+
+static struct rc_src_register absolute(struct rc_src_register reg)
+{
+	struct rc_src_register newreg = reg;
+	newreg.Abs = 1;
+	newreg.Negate = RC_MASK_NONE;
+	return newreg;
+}
+
+static struct rc_src_register negate(struct rc_src_register reg)
+{
+	struct rc_src_register newreg = reg;
+	newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
+	return newreg;
+}
+
+static struct rc_src_register swizzle(struct rc_src_register reg,
+		rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
+{
+	struct rc_src_register swizzled = reg;
+	swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
+	return swizzled;
+}
+
+static struct rc_src_register swizzle_smear(struct rc_src_register reg,
+		rc_swizzle x)
+{
+	return swizzle(reg, x, x, x, x);
+}
+
+static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_X);
+}
+
+static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_Y);
+}
+
+static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_Z);
+}
+
+static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
+{
+	return swizzle_smear(reg, RC_SWIZZLE_W);
+}
+
+static void transform_ABS(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src = inst->U.I.SrcReg[0];
+	src.Abs = 1;
+	src.Negate = RC_MASK_NONE;
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+	rc_remove_instruction(inst);
+}
+
+static void transform_CEIL(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* Assuming:
+	 *     ceil(x) = -floor(-x)
+	 *
+	 * After inlining floor:
+	 *     ceil(x) = -(-x-frac(-x))
+	 *
+	 * After simplification:
+	 *     ceil(x) = x+frac(-x)
+	 */
+
+	int tempreg = rc_find_free_temporary(c);
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+	rc_remove_instruction(inst);
+}
+
+static void transform_DP3(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	struct rc_src_register src1 = inst->U.I.SrcReg[1];
+	src0.Negate &= ~RC_MASK_W;
+	src0.Swizzle &= ~(7 << (3 * 3));
+	src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	src1.Negate &= ~RC_MASK_W;
+	src1.Swizzle &= ~(7 << (3 * 3));
+	src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	struct rc_src_register src0 = inst->U.I.SrcReg[0];
+	src0.Negate &= ~RC_MASK_W;
+	src0.Swizzle &= ~(7 << (3 * 3));
+	src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+	rc_remove_instruction(inst);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
+	rc_remove_instruction(inst);
+}
+
+static void transform_FLR(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+	rc_remove_instruction(inst);
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ *  tmp = VectorLoad(op0);
+ *  if (tmp.x < 0) tmp.x = 0;
+ *  if (tmp.y < 0) tmp.y = 0;
+ *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ *  result.x = 1.0;
+ *  result.y = tmp.x;
+ *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ *  result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	unsigned int constant;
+	unsigned int constant_swizzle;
+	unsigned int temp;
+	struct rc_src_register srctemp;
+
+	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
+
+	if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
+		struct rc_instruction * inst_mov;
+
+		inst_mov = emit1(c, inst,
+			RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+			srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
+
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	temp = inst->U.I.DstReg.Index;
+	srctemp = srcreg(RC_FILE_TEMPORARY, temp);
+
+	/* tmp.x = max(0.0, Src.x); */
+	/* tmp.y = max(0.0, Src.y); */
+	/* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+		dstregtmpmask(temp, RC_MASK_XYW),
+		inst->U.I.SrcReg[0],
+		swizzle(srcreg(RC_FILE_CONSTANT, constant),
+			RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+	emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+		dstregtmpmask(temp, RC_MASK_Z),
+		swizzle_wwww(srctemp),
+		negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
+
+	/* tmp.w = Pow(tmp.y, tmp.w) */
+	emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_yyyy(srctemp));
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_wwww(srctemp),
+		swizzle_zzzz(srctemp));
+	emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+		dstregtmpmask(temp, RC_MASK_W),
+		swizzle_wwww(srctemp));
+
+	/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+		dstregtmpmask(temp, RC_MASK_Z),
+		negate(swizzle_xxxx(srctemp)),
+		swizzle_wwww(srctemp),
+		builtin_zero);
+
+	/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+		dstregtmpmask(temp, RC_MASK_XYW),
+		swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_LRP(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+		dstreg(RC_FILE_TEMPORARY, tempreg),
+		inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+		inst->U.I.DstReg,
+		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_POW(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+	struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
+	struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+	tempdst.WriteMask = RC_MASK_W;
+	tempsrc.Swizzle = RC_SWIZZLE_WWWW;
+
+	emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
+	emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_RSQ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+	rc_remove_instruction(inst);
+}
+
+static void transform_SGE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SLT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_SUB(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.Opcode = RC_OPCODE_ADD;
+	inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
+}
+
+static void transform_SWZ(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	inst->U.I.Opcode = RC_OPCODE_MOV;
+}
+
+static void transform_XPD(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+		negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+
+	rc_remove_instruction(inst);
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ *  ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * using:
+ *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+int radeonTransformALU(
+	struct radeon_compiler * c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+	case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+	case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+	case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+	case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+	case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+	case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+	case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+	case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+	case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+	case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+	default:
+		return 0;
+	}
+}
+
+
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* Note: r500 can take absolute values, but r300 cannot. */
+	inst->U.I.Opcode = RC_OPCODE_MAX;
+	inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* There is no decent CMP available, so let's rig one up.
+	 * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+	 * The following sequence consumes two temps and two extra slots
+	 * (the second temp and the second slot is consumed by transform_LRP),
+	 * but should be equivalent:
+	 *
+	 * SLT tmp0, src0, 0.0
+	 * LRP dst, tmp0, src1, src2
+	 *
+	 * Yes, I know, I'm a mad scientist. ~ C. & M. */
+	int tempreg0 = rc_find_free_temporary(c);
+
+	/* SLT tmp0, src0, 0.0 */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+		dstreg(RC_FILE_TEMPORARY, tempreg0),
+		inst->U.I.SrcReg[0], builtin_zero);
+
+	/* LRP dst, tmp0, src1, src2 */
+	transform_LRP(c,
+		emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+		      inst->U.I.DstReg,
+		      srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1],  inst->U.I.SrcReg[2]));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	int tempreg = rc_find_free_temporary(c);
+	unsigned constant_swizzle;
+	int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+							 0.0000000000000000001,
+							 &constant_swizzle);
+
+	/* MOV dst, src */
+	emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+		dstreg(RC_FILE_TEMPORARY, tempreg),
+		inst->U.I.SrcReg[0]);
+
+	/* MAX dst.z, src, 0.00...001 */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+		dstregtmpmask(tempreg, RC_MASK_Y),
+		srcreg(RC_FILE_TEMPORARY, tempreg),
+		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+	inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+}
+
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* x = y  <==>  x >= y && y >= x */
+	int tmp = rc_find_free_temporary(c);
+
+	/* x <= y */
+	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      inst->U.I.SrcReg[1]);
+
+	/* y <= x */
+	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+	      inst->U.I.DstReg,
+	      inst->U.I.SrcReg[1],
+	      inst->U.I.SrcReg[0]);
+
+	/* x && y  =  x * y */
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, tmp),
+	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+	struct rc_instruction *inst)
+{
+	/* x != y  <==>  x < y || y < x */
+	int tmp = rc_find_free_temporary(c);
+
+	/* x < y */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+	      inst->U.I.SrcReg[0],
+	      inst->U.I.SrcReg[1]);
+
+	/* y < x */
+	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+	      inst->U.I.DstReg,
+	      inst->U.I.SrcReg[1],
+	      inst->U.I.SrcReg[0]);
+
+	/* x || y  =  max(x, y) */
+	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+	      inst->U.I.DstReg,
+	      srcreg(RC_FILE_TEMPORARY, tmp),
+	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+	rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SGT(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* x > y  <==>  -x < -y */
+	inst->U.I.Opcode = RC_OPCODE_SLT;
+	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SLE(struct radeon_compiler* c,
+	struct rc_instruction* inst)
+{
+	/* x <= y  <==>  -x >= -y */
+	inst->U.I.Opcode = RC_OPCODE_SGE;
+	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+/**
+ * For use with radeonLocalTransform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+int r300_transform_vertex_alu(
+	struct radeon_compiler * c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	switch(inst->U.I.Opcode) {
+	case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+	case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
+	case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+	case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
+	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+	case RC_OPCODE_SEQ:
+		if (!c->is_r500) {
+			transform_r300_vertex_SEQ(c, inst);
+			return 1;
+		}
+		return 0;
+	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+	case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
+	case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+	case RC_OPCODE_SNE:
+		if (!c->is_r500) {
+			transform_r300_vertex_SNE(c, inst);
+			return 1;
+		}
+		return 0;
+	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+	default:
+		return 0;
+	}
+}
+
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
+{
+	static const float SinCosConsts[2][4] = {
+		{
+			1.273239545,		/* 4/PI */
+			-0.405284735,		/* -4/(PI*PI) */
+			3.141592654,		/* PI */
+			0.2225			/* weight */
+		},
+		{
+			0.75,
+			0.5,
+			0.159154943,		/* 1/(2*PI) */
+			6.283185307		/* 2*PI */
+		}
+	};
+	int i;
+
+	for(i = 0; i < 2; ++i)
+		constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(
+	struct radeon_compiler* c, struct rc_instruction * inst,
+	struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+	unsigned int tempreg = rc_find_free_temporary(c);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+		swizzle_xxxx(src),
+		srcreg(RC_FILE_CONSTANT, constants[0]));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		absolute(swizzle_xxxx(src)),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
+		negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
+		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+		swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
+		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ *  MOV, ADD, MUL, MAD, FRC
+ */
+int radeonTransformTrigSimple(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	unsigned int constants[2];
+	unsigned int tempreg = rc_find_free_temporary(c);
+
+	sincos_constants(c, constants);
+
+	if (inst->U.I.Opcode == RC_OPCODE_COS) {
+		/* MAD tmp.x, src, 1/(2*PI), 0.75 */
+		/* FRC tmp.x, tmp.x */
+		/* MAD tmp.z, tmp.x, 2*PI, -PI */
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		sin_approx(c, inst, inst->U.I.DstReg,
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		sin_approx(c, inst, inst->U.I.DstReg,
+			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	} else {
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			swizzle_xxxx(inst->U.I.SrcReg[0]),
+			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+			swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			srcreg(RC_FILE_TEMPORARY, tempreg));
+		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+			srcreg(RC_FILE_TEMPORARY, tempreg),
+			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+		struct rc_dst_register dst = inst->U.I.DstReg;
+
+		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
+		sin_approx(c, inst, dst,
+			swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+
+		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
+		sin_approx(c, inst, dst,
+			swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+			constants);
+	}
+
+	rc_remove_instruction(inst);
+
+	return 1;
+}
+
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	unsigned srctmp)
+{
+	if (inst->U.I.Opcode == RC_OPCODE_COS) {
+		emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+			srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+		emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+			inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+	} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+		struct rc_dst_register moddst = inst->U.I.DstReg;
+
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+			moddst.WriteMask = RC_MASK_X;
+			emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+		if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+			moddst.WriteMask = RC_MASK_Y;
+			emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+		}
+	}
+
+	rc_remove_instruction(inst);
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+int radeonTransformTrigScale(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	static const float RCP_2PI = 0.15915494309189535;
+	unsigned int temp;
+	unsigned int constant;
+	unsigned int constant_swizzle;
+
+	temp = rc_find_free_temporary(c);
+	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
+
+	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+		swizzle_xxxx(inst->U.I.SrcReg[0]),
+		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp));
+
+	r300_transform_SIN_COS_SCS(c, inst, temp);
+	return 1;
+}
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void *unused)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_COS &&
+	    inst->U.I.Opcode != RC_OPCODE_SIN &&
+	    inst->U.I.Opcode != RC_OPCODE_SCS)
+		return 0;
+
+	/* Repeat x in the range [-PI, PI]:
+	 *
+	 *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+	 */
+
+	static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+	unsigned int temp;
+	unsigned int constant;
+
+	temp = rc_find_free_temporary(c);
+	constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		swizzle_xxxx(inst->U.I.SrcReg[0]),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp));
+	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+		srcreg(RC_FILE_TEMPORARY, temp),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
+
+	r300_transform_SIN_COS_SCS(c, inst, temp);
+	return 1;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+int radeonTransformDeriv(struct radeon_compiler* c,
+	struct rc_instruction* inst,
+	void* unused)
+{
+	if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+		return 0;
+
+	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+	inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+	return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
new file mode 100644
index 0000000000..77d444476f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+int radeonTransformALU(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_vertex_alu(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int radeonTransformTrigSimple(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int radeonTransformTrigScale(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+int r300_transform_trig_scale_vertex(
+	struct radeon_compiler *c,
+	struct rc_instruction *inst,
+	void*);
+
+int radeonTransformDeriv(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void*);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
new file mode 100644
index 0000000000..2ddf60b677
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+	RC_SATURATE_NONE = 0,
+	RC_SATURATE_ZERO_ONE,
+	RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+	RC_TEXTURE_2D_ARRAY,
+	RC_TEXTURE_1D_ARRAY,
+	RC_TEXTURE_CUBE,
+	RC_TEXTURE_3D,
+	RC_TEXTURE_RECT,
+	RC_TEXTURE_2D,
+	RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+	/**
+	 * Used to indicate unused register descriptions and
+	 * source register that use a constant swizzle.
+	 */
+	RC_FILE_NONE = 0,
+	RC_FILE_TEMPORARY,
+
+	/**
+	 * Input register.
+	 *
+	 * \note The compiler attaches no implicit semantics to input registers.
+	 * Fragment/vertex program specific semantics must be defined explicitly
+	 * using the appropriate compiler interfaces.
+	 */
+	RC_FILE_INPUT,
+
+	/**
+	 * Output register.
+	 *
+	 * \note The compiler attaches no implicit semantics to input registers.
+	 * Fragment/vertex program specific semantics must be defined explicitly
+	 * using the appropriate compiler interfaces.
+	 */
+	RC_FILE_OUTPUT,
+	RC_FILE_ADDRESS,
+
+	/**
+	 * Indicates a constant from the \ref rc_constant_list .
+	 */
+	RC_FILE_CONSTANT,
+
+	/**
+	 * Indicates a special register, see RC_SPECIAL_xxx.
+	 */
+	RC_FILE_SPECIAL
+} rc_register_file;
+
+enum {
+	/** R500 fragment program ALU result "register" */
+	RC_SPECIAL_ALU_RESULT = 0,
+
+	/** Must be last */
+	RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+	RC_SWIZZLE_X = 0,
+	RC_SWIZZLE_Y,
+	RC_SWIZZLE_Z,
+	RC_SWIZZLE_W,
+	RC_SWIZZLE_ZERO,
+	RC_SWIZZLE_ONE,
+	RC_SWIZZLE_HALF,
+	RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+	do { \
+		(swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+	} while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+	RC_ALURESULT_NONE = 0,
+	RC_ALURESULT_X,
+	RC_ALURESULT_W
+} rc_write_aluresult;
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
new file mode 100644
index 0000000000..ee839596aa
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+
+/**
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
+ */
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+	unsigned int rgb, unsigned int alpha,
+	rc_register_file file, unsigned int index)
+{
+	int candidate = -1;
+	int candidate_quality = -1;
+	int i;
+
+	if ((!rgb && !alpha) || file == RC_FILE_NONE)
+		return 0;
+
+	for(i = 0; i < 3; ++i) {
+		int q = 0;
+		if (rgb) {
+			if (pair->RGB.Src[i].Used) {
+				if (pair->RGB.Src[i].File != file ||
+				    pair->RGB.Src[i].Index != index)
+					continue;
+				q++;
+			}
+		}
+		if (alpha) {
+			if (pair->Alpha.Src[i].Used) {
+				if (pair->Alpha.Src[i].File != file ||
+				    pair->Alpha.Src[i].Index != index)
+					continue;
+				q++;
+			}
+		}
+		if (q > candidate_quality) {
+			candidate_quality = q;
+			candidate = i;
+		}
+	}
+
+	if (candidate >= 0) {
+		if (rgb) {
+			pair->RGB.Src[candidate].Used = 1;
+			pair->RGB.Src[candidate].File = file;
+			pair->RGB.Src[candidate].Index = index;
+		}
+		if (alpha) {
+			pair->Alpha.Src[candidate].Used = 1;
+			pair->Alpha.Src[candidate].File = file;
+			pair->Alpha.Src[candidate].Index = index;
+		}
+	}
+
+	return candidate;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
new file mode 100644
index 0000000000..511cc707a3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
+
+struct r300_fragment_program_compiler;
+
+
+/**
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
+ * fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
+ */
+
+
+struct radeon_pair_instruction_source {
+	unsigned int Used:1;
+	unsigned int File:3;
+	unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct radeon_pair_instruction_rgb {
+	unsigned int Opcode:8;
+	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+	unsigned int WriteMask:3;
+    unsigned int Target:2;
+	unsigned int OutputWriteMask:3;
+	unsigned int Saturate:1;
+
+	struct radeon_pair_instruction_source Src[3];
+
+	struct {
+		unsigned int Source:2;
+		unsigned int Swizzle:9;
+		unsigned int Abs:1;
+		unsigned int Negate:1;
+	} Arg[3];
+};
+
+struct radeon_pair_instruction_alpha {
+	unsigned int Opcode:8;
+	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+	unsigned int WriteMask:1;
+    unsigned int Target:2;
+	unsigned int OutputWriteMask:1;
+	unsigned int DepthWriteMask:1;
+	unsigned int Saturate:1;
+
+	struct radeon_pair_instruction_source Src[3];
+
+	struct {
+		unsigned int Source:2;
+		unsigned int Swizzle:3;
+		unsigned int Abs:1;
+		unsigned int Negate:1;
+	} Arg[3];
+};
+
+struct rc_pair_instruction {
+	struct radeon_pair_instruction_rgb RGB;
+	struct radeon_pair_instruction_alpha Alpha;
+
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
+};
+
+
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+	unsigned int rgb, unsigned int alpha,
+	rc_register_file file, unsigned int index);
+/*@}*/
+
+
+/**
+ * Compiler passes that operate with the paired format.
+ */
+/*@{*/
+struct radeon_pair_handler;
+
+void rc_pair_translate(struct r300_fragment_program_compiler *c);
+void rc_pair_schedule(struct r300_fragment_program_compiler *c);
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps);
+/*@}*/
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
new file mode 100644
index 0000000000..28fb9eae92
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+	switch(target) {
+	case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+	case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+	case RC_TEXTURE_CUBE: return "CUBE";
+	case RC_TEXTURE_3D: return "3D";
+	case RC_TEXTURE_RECT: return "RECT";
+	case RC_TEXTURE_2D: return "2D";
+	case RC_TEXTURE_1D: return "1D";
+	default: return "BAD_TEXTURE_TARGET";
+	}
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+	if (func == RC_COMPARE_FUNC_NEVER) {
+		fprintf(f, "false");
+	} else if (func == RC_COMPARE_FUNC_ALWAYS) {
+		fprintf(f, "true");
+	} else {
+		const char * op;
+		switch(func) {
+		case RC_COMPARE_FUNC_LESS: op = "<"; break;
+		case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+		case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+		case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+		case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+		case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+		default: op = "???"; break;
+		}
+		fprintf(f, "%s %s %s", lhs, op, rhs);
+	}
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+	if (file == RC_FILE_NONE) {
+		fprintf(f, "none");
+	} else if (file == RC_FILE_SPECIAL) {
+		switch(index) {
+		case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+		default: fprintf(f, "special[%i]", index); break;
+		}
+	} else {
+		const char * filename;
+		switch(file) {
+		case RC_FILE_TEMPORARY: filename = "temp"; break;
+		case RC_FILE_INPUT: filename = "input"; break;
+		case RC_FILE_OUTPUT: filename = "output"; break;
+		case RC_FILE_ADDRESS: filename = "addr"; break;
+		case RC_FILE_CONSTANT: filename = "const"; break;
+		default: filename = "BAD FILE"; break;
+		}
+		fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+	}
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+	if (mask & RC_MASK_X) fprintf(f, "x");
+	if (mask & RC_MASK_Y) fprintf(f, "y");
+	if (mask & RC_MASK_Z) fprintf(f, "z");
+	if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+	rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+	if (dst.WriteMask != RC_MASK_XYZW) {
+		fprintf(f, ".");
+		rc_print_mask(f, dst.WriteMask);
+	}
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+	switch(swz) {
+	case RC_SWIZZLE_X: return 'x';
+	case RC_SWIZZLE_Y: return 'y';
+	case RC_SWIZZLE_Z: return 'z';
+	case RC_SWIZZLE_W: return 'w';
+	case RC_SWIZZLE_ZERO: return '0';
+	case RC_SWIZZLE_ONE: return '1';
+	case RC_SWIZZLE_HALF: return 'H';
+	case RC_SWIZZLE_UNUSED: return '_';
+	}
+	return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+	unsigned int comp;
+	for(comp = 0; comp < 4; ++comp) {
+		rc_swizzle swz = GET_SWZ(swizzle, comp);
+		if (GET_BIT(negate, comp))
+			fprintf(f, "-");
+		fprintf(f, "%c", rc_swizzle_char(swz));
+	}
+}
+
+static void rc_print_src_register(FILE * f, struct rc_src_register src)
+{
+	int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+	if (src.Negate == RC_MASK_XYZW)
+		fprintf(f, "-");
+	if (src.Abs)
+		fprintf(f, "|");
+
+	rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+	if (src.Abs && !trivial_negate)
+		fprintf(f, "|");
+
+	if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+		fprintf(f, ".");
+		rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+	}
+
+	if (src.Abs && trivial_negate)
+		fprintf(f, "|");
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst)
+{
+	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+	unsigned int reg;
+
+	fprintf(f, "%s", opcode->Name);
+
+	switch(inst->U.I.SaturateMode) {
+	case RC_SATURATE_NONE: break;
+	case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+	case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+	default: fprintf(f, "_BAD_SAT"); break;
+	}
+
+	if (opcode->HasDstReg) {
+		fprintf(f, " ");
+		rc_print_dst_register(f, inst->U.I.DstReg);
+		if (opcode->NumSrcRegs)
+			fprintf(f, ",");
+	}
+
+	for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+		if (reg > 0)
+			fprintf(f, ",");
+		fprintf(f, " ");
+		rc_print_src_register(f, inst->U.I.SrcReg[reg]);
+	}
+
+	if (opcode->HasTexture) {
+		fprintf(f, ", %s%s[%u]",
+			textarget_to_string(inst->U.I.TexSrcTarget),
+			inst->U.I.TexShadow ? "SHADOW" : "",
+			inst->U.I.TexSrcUnit);
+	}
+
+	fprintf(f, ";");
+
+	if (inst->U.I.WriteALUResult) {
+		fprintf(f, " [aluresult = (");
+		rc_print_comparefunc(f,
+			(inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+			inst->U.I.ALUResultCompare, "0");
+		fprintf(f, ")]");
+	}
+
+	fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst)
+{
+	struct rc_pair_instruction * inst = &fullinst->U.P;
+	int printedsrc = 0;
+
+	for(unsigned int src = 0; src < 3; ++src) {
+		if (inst->RGB.Src[src].Used) {
+			if (printedsrc)
+				fprintf(f, ", ");
+			fprintf(f, "src%i.xyz = ", src);
+			rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+			printedsrc = 1;
+		}
+		if (inst->Alpha.Src[src].Used) {
+			if (printedsrc)
+				fprintf(f, ", ");
+			fprintf(f, "src%i.w = ", src);
+			rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+			printedsrc = 1;
+		}
+	}
+	fprintf(f, "\n");
+
+	if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+		fprintf(f, "     %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+		if (inst->RGB.WriteMask)
+			fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+				(inst->RGB.WriteMask & 1) ? "x" : "",
+				(inst->RGB.WriteMask & 2) ? "y" : "",
+				(inst->RGB.WriteMask & 4) ? "z" : "");
+		if (inst->RGB.OutputWriteMask)
+			fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
+				(inst->RGB.OutputWriteMask & 1) ? "x" : "",
+				(inst->RGB.OutputWriteMask & 2) ? "y" : "",
+				(inst->RGB.OutputWriteMask & 4) ? "z" : "");
+		if (inst->WriteALUResult == RC_ALURESULT_X)
+			fprintf(f, " aluresult");
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+			const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+			fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+				rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+				abs);
+		}
+		fprintf(f, "\n");
+	}
+
+	if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+		fprintf(f, "     %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+		if (inst->Alpha.WriteMask)
+			fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+		if (inst->Alpha.OutputWriteMask)
+			fprintf(f, " color[%i].w", inst->Alpha.Target);
+		if (inst->Alpha.DepthWriteMask)
+			fprintf(f, " depth.w");
+		if (inst->WriteALUResult == RC_ALURESULT_W)
+			fprintf(f, " aluresult");
+
+		for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+			const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+			const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+			fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
+				rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
+		}
+		fprintf(f, "\n");
+	}
+
+	if (inst->WriteALUResult) {
+		fprintf(f, "      [aluresult = (");
+		rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+		fprintf(f, ")]\n");
+	}
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+	unsigned int linenum = 0;
+	struct rc_instruction *inst;
+
+	fprintf(stderr, "# Radeon Compiler Program\n");
+
+	for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+		fprintf(stderr, "%3d: ", linenum);
+
+		if (inst->Type == RC_INSTRUCTION_PAIR)
+			rc_print_pair_instruction(stderr, inst);
+		else
+			rc_print_normal_instruction(stderr, inst);
+
+		linenum++;
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
new file mode 100644
index 0000000000..9c4b65f4c0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_tex.h"
+
+/* Series of transformations to be done on textures. */
+
+static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
+											 int tmu)
+{
+	struct rc_src_register reg = { 0, };
+
+	if (compiler->enable_shadow_ambient) {
+		reg.File = RC_FILE_CONSTANT;
+		reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+										   RC_STATE_SHADOW_AMBIENT, tmu);
+		reg.Swizzle = RC_SWIZZLE_WWWW;
+	} else {
+		reg.File = RC_FILE_NONE;
+		reg.Swizzle = RC_SWIZZLE_0000;
+	}
+	return reg;
+}
+
+static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
+							   struct rc_instruction *inst)
+{
+	struct rc_instruction *inst_rect;
+	unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+	if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+		compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
+		inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+		inst_rect->U.I.Opcode = RC_OPCODE_MUL;
+		inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_rect->U.I.DstReg.Index = temp;
+		inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+		inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+		inst_rect->U.I.SrcReg[1].Index =
+				rc_constants_add_state(&compiler->Base.Program.Constants,
+									   RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = temp;
+
+		inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+	}
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
+ *  - implement texture compare (shadow extensions)
+ *  - extract non-native source / destination operands
+ *  - premultiply texture coordinates for RECT
+ *  - extract operand swizzles
+ *  - introduce a temporary register when write masks are needed
+ */
+int radeonTransformTEX(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data)
+{
+	struct r300_fragment_program_compiler *compiler =
+		(struct r300_fragment_program_compiler*)data;
+
+	if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+		inst->U.I.Opcode != RC_OPCODE_TXB &&
+		inst->U.I.Opcode != RC_OPCODE_TXP &&
+		inst->U.I.Opcode != RC_OPCODE_KIL)
+		return 0;
+
+	/* ARB_shadow & EXT_shadow_funcs */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+		((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+		 (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
+		rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+
+		if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+			inst->U.I.Opcode = RC_OPCODE_MOV;
+
+			if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+				inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+				inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+			} else {
+				inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+			}
+
+			return 1;
+		} else {
+			rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+			struct rc_instruction * inst_rcp = NULL;
+			struct rc_instruction * inst_mad;
+			struct rc_instruction * inst_cmp;
+			unsigned tmp_texsample = rc_find_free_temporary(c);
+			unsigned tmp_sum = rc_find_free_temporary(c);
+			unsigned tmp_recip_w = 0;
+			int pass, fail, tex;
+
+			/* Save the output register. */
+			struct rc_dst_register output_reg = inst->U.I.DstReg;
+
+			/* Redirect TEX to a new temp. */
+			inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst->U.I.DstReg.Index = tmp_texsample;
+			inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+				tmp_recip_w = rc_find_free_temporary(c);
+
+				/* Compute 1/W. */
+				inst_rcp = rc_insert_new_instruction(c, inst);
+				inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+				inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_rcp->U.I.DstReg.Index = tmp_recip_w;
+				inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+				inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+				inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+			}
+
+			/* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+			inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
+			inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mad->U.I.DstReg.Index = tmp_sum;
+			inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+			if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+				inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+				inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+				inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
+				inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+				tex = 2;
+			} else {
+				inst_mad->U.I.Opcode = RC_OPCODE_ADD;
+				tex = 1;
+			}
+			inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
+			inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
+			inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;
+
+			/* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
+			if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
+				comparefunc = RC_COMPARE_FUNC_GEQUAL;
+			} else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+				comparefunc = RC_COMPARE_FUNC_LESS;
+			}
+
+			/* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
+			 *   LESS:    r  < tex  <=>      -tex+r < 0
+			 *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
+			 *   GREATER: r  > tex  <=>       tex-r < 0
+			 *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
+			 *
+			 * This negates either r or tex: */
+			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+				inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
+			else
+				inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+
+			/* This negates the whole expresion: */
+			if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
+				pass = 1;
+				fail = 2;
+			} else {
+				pass = 2;
+				fail = 1;
+			}
+
+			inst_cmp = rc_insert_new_instruction(c, inst_mad);
+			inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+			inst_cmp->U.I.DstReg = output_reg;
+			inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
+			inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+			inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+			inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+		}
+	}
+
+	/* Texture wrap modes don't work on NPOT textures or texrects.
+	 *
+	 * The game plan is simple. We have two flags, fake_npot and
+	 * non_normalized_coords, as well as a tex target. The RECT tex target
+	 * will make the emitted code use non-scaled texcoords.
+	 *
+	 * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
+	 * mirroring are not. If we need to repeat, we do:
+	 *
+	 * MUL temp, texcoord, <scaling factor constant>
+	 * FRC temp, temp ; Discard integer portion of coords
+	 *
+	 * This gives us coords in [0, 1].
+	 *
+	 * Mirroring is trickier. We're going to start out like repeat:
+	 *
+	 * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+	 * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
+	 *                            ; so scale to [0, 1]
+	 * FRC temp, temp ; Make the pattern repeat
+	 * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+	 * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+	 *				; The pattern is backwards, so reverse it (1-x).
+	 *
+	 * This gives us coords in [0, 1].
+	 *
+	 * ~ C & M. ;)
+	 */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+		(inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+			compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
+			compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
+		rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+
+		/* R300 cannot sample from rectangles. */
+		if (!c->is_r500) {
+			lower_texture_rect(compiler, inst);
+		}
+
+		if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
+			wrapmode != RC_WRAP_NONE) {
+			struct rc_instruction *inst_mov;
+			unsigned temp = rc_find_free_temporary(c);
+
+			/* For NPOT fallback, we need normalized coordinates anyway. */
+			if (c->is_r500) {
+				lower_texture_rect(compiler, inst);
+			}
+
+			if (wrapmode == RC_WRAP_REPEAT) {
+				/* Both instructions will be paired up. */
+				struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+				inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+				inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_frc->U.I.DstReg.Index = temp;
+				inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+				inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+			} else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+				/*
+				 * Function:
+				 *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+				 *
+				 * Code:
+				 *   MUL temp, src0, 0.5
+				 *   FRC temp, temp
+				 *   MAD temp, temp, 2, -1
+				 *   ADD temp, 1, -abs(temp)
+				 */
+
+				struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+				unsigned two, two_swizzle;
+
+				inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+				inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+				inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_mul->U.I.DstReg.Index = temp;
+				inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+				inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+				inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+				inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+				inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+				inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_frc->U.I.DstReg.Index = temp;
+				inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+				inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+				inst_frc->U.I.SrcReg[0].Index = temp;
+				inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+				two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+				inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+				inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+				inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_mad->U.I.DstReg.Index = temp;
+				inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+				inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+				inst_mad->U.I.SrcReg[0].Index = temp;
+				inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+				inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+				inst_mad->U.I.SrcReg[1].Index = two;
+				inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+				inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+				inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+				inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+				inst_add->U.I.Opcode = RC_OPCODE_ADD;
+				inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_add->U.I.DstReg.Index = temp;
+				inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+				inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+				inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+				inst_add->U.I.SrcReg[1].Index = temp;
+				inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+				inst_add->U.I.SrcReg[1].Abs = 1;
+				inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+			} else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+				/*
+				 * Mirrored clamp modes are bloody simple, we just use abs
+				 * to mirror [0, 1] into [-1, 0]. This works for
+				 * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+				 */
+				struct rc_instruction *inst_mov;
+
+				inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+				inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+				inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+				inst_mov->U.I.DstReg.Index = temp;
+				inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+				inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+				inst_mov->U.I.SrcReg[0].Abs = 1;
+			}
+
+			/* Preserve W for TXP/TXB. */
+			inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+			inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+			inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+			inst_mov->U.I.DstReg.Index = temp;
+			inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+			inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+			reset_srcreg(&inst->U.I.SrcReg[0]);
+			inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+			inst->U.I.SrcReg[0].Index = temp;
+		}
+	}
+
+	/* Cannot write texture to output registers (all chips) or with masks (non-r500) */
+	if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+		(inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+		 (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
+		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg = inst->U.I.DstReg;
+		inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+	}
+
+	/* Cannot read texture coordinate from constants file */
+	if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
+		struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+		inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+		inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+		inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+		reset_srcreg(&inst->U.I.SrcReg[0]);
+		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+		inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+	}
+
+	return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
new file mode 100644
index 0000000000..a0105051ac
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_TEX_H_
+#define __RADEON_PROGRAM_TEX_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+int radeonTransformTEX(
+	struct radeon_compiler * c,
+	struct rc_instruction * inst,
+	void* data);
+
+#endif /* __RADEON_PROGRAM_TEX_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
new file mode 100644
index 0000000000..c81d5f7a5e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+	unsigned char NumPhases;
+	unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+	/**
+	 * Check whether the given swizzle, absolute and negate combination
+	 * can be implemented natively by the hardware for this opcode.
+	 *
+	 * \return 1 if the swizzle is native for the given opcode
+	 */
+	int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+	/**
+	 * Determine how to split access to the masked channels of the
+	 * given source register to obtain ALU-native swizzles.
+	 */
+	void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c
new file mode 100644
index 0000000000..d2c25fb9cd
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_blit.c
@@ -0,0 +1,663 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "r300_context.h"
+
+#include "r300_blit.h"
+#include "r300_cmdbuf.h"
+#include "r300_emit.h"
+#include "r300_tex.h"
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_opcodes.h"
+
+static void vp_ins_outs(struct r300_vertex_program_compiler *c)
+{
+    c->code->inputs[VERT_ATTRIB_POS] = 0;
+    c->code->inputs[VERT_ATTRIB_TEX0] = 1;
+    c->code->outputs[VERT_RESULT_HPOS] = 0;
+    c->code->outputs[VERT_RESULT_TEX0] = 1;
+}
+
+static void fp_allocate_hw_inputs(
+    struct r300_fragment_program_compiler * c,
+    void (*allocate)(void * data, unsigned input, unsigned hwreg),
+    void * mydata)
+{
+    allocate(mydata, FRAG_ATTRIB_TEX0, 0);
+}
+
+static void create_vertex_program(struct r300_context *r300)
+{
+    struct r300_vertex_program_compiler compiler;
+    struct rc_instruction *inst;
+
+    rc_init(&compiler.Base);
+
+    inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
+    inst->U.I.Opcode = RC_OPCODE_MOV;
+    inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+    inst->U.I.DstReg.Index = VERT_RESULT_HPOS;
+    inst->U.I.DstReg.RelAddr = 0;
+    inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+    inst->U.I.SrcReg[0].Abs = 0;
+    inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
+    inst->U.I.SrcReg[0].Index = VERT_ATTRIB_POS;
+    inst->U.I.SrcReg[0].Negate = 0;
+    inst->U.I.SrcReg[0].RelAddr = 0;
+    inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+    inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
+    inst->U.I.Opcode = RC_OPCODE_MOV;
+    inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+    inst->U.I.DstReg.Index = VERT_RESULT_TEX0;
+    inst->U.I.DstReg.RelAddr = 0;
+    inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+    inst->U.I.SrcReg[0].Abs = 0;
+    inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
+    inst->U.I.SrcReg[0].Index = VERT_ATTRIB_TEX0;
+    inst->U.I.SrcReg[0].Negate = 0;
+    inst->U.I.SrcReg[0].RelAddr = 0;
+    inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+    compiler.Base.Program.InputsRead = (1 << VERT_ATTRIB_POS) | (1 << VERT_ATTRIB_TEX0);
+    compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0);
+    compiler.SetHwInputOutput = vp_ins_outs;
+    compiler.code = &r300->blit.vp_code;
+
+    r3xx_compile_vertex_program(&compiler);
+}
+
+static void create_fragment_program(struct r300_context *r300)
+{
+    struct r300_fragment_program_compiler compiler;
+    struct rc_instruction *inst;
+
+    memset(&compiler, 0, sizeof(struct r300_fragment_program_compiler));
+    rc_init(&compiler.Base);
+
+    inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
+    inst->U.I.Opcode = RC_OPCODE_TEX;
+    inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+    inst->U.I.TexSrcUnit = 0;
+    inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+    inst->U.I.DstReg.Index = FRAG_RESULT_COLOR;
+    inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+    inst->U.I.SrcReg[0].Abs = 0;
+    inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
+    inst->U.I.SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+    inst->U.I.SrcReg[0].Negate = 0;
+    inst->U.I.SrcReg[0].RelAddr = 0;
+    inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+    compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0);
+    compiler.OutputColor[0] = FRAG_RESULT_COLOR;
+    compiler.OutputDepth = FRAG_RESULT_DEPTH;
+    compiler.enable_shadow_ambient = GL_TRUE;
+    compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
+    compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
+    compiler.code = &r300->blit.fp_code;
+    compiler.AllocateHwInputs = fp_allocate_hw_inputs;
+
+    r3xx_compile_fragment_program(&compiler);
+}
+
+void r300_blit_init(struct r300_context *r300)
+{
+    if (r300->options.hw_tcl_enabled)
+	create_vertex_program(r300);
+    create_fragment_program(r300);
+}
+
+static void r300_emit_tx_setup(struct r300_context *r300,
+                               gl_format mesa_format,
+                               struct radeon_bo *bo,
+                               intptr_t offset,
+                               unsigned width,
+                               unsigned height,
+                               unsigned pitch)
+{
+    int is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
+    BATCH_LOCALS(&r300->radeon);
+
+    assert(is_r500 ? width  <= 4096 : width  <= 2048);
+    assert(is_r500 ? height <= 4096 : height <= 2048);
+    assert(r300TranslateTexFormat(mesa_format) >= 0);
+    assert(offset % 32 == 0);
+
+    BEGIN_BATCH(17);
+    OUT_BATCH_REGVAL(R300_TX_FILTER0_0,
+                     (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_S_SHIFT) |
+                     (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_T_SHIFT) |
+                     (R300_TX_CLAMP_TO_EDGE  << R300_TX_WRAP_R_SHIFT) |
+                     R300_TX_MIN_FILTER_MIP_NONE |
+                     R300_TX_MIN_FILTER_NEAREST |
+                     R300_TX_MAG_FILTER_NEAREST |
+                     (0 << 28));
+    OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0);
+    OUT_BATCH_REGVAL(R300_TX_SIZE_0,
+                     (((width  - 1) & 0x7ff) << R300_TX_WIDTHMASK_SHIFT) |
+                     (((height - 1) & 0x7ff) << R300_TX_HEIGHTMASK_SHIFT) |
+                     (0 << R300_TX_DEPTHMASK_SHIFT) |
+                     (0 << R300_TX_MAX_MIP_LEVEL_SHIFT) |
+                     R300_TX_SIZE_TXPITCH_EN);
+
+    OUT_BATCH_REGVAL(R300_TX_FORMAT_0, r300TranslateTexFormat(mesa_format));
+    OUT_BATCH_REGVAL(R300_TX_FORMAT2_0,
+                     (pitch - 1) |
+                     (is_r500 && width  > 2048 ? R500_TXWIDTH_BIT11  : 0) |
+                     (is_r500 && height > 2048 ? R500_TXHEIGHT_BIT11 : 0));
+    OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, 1);
+    OUT_BATCH_RELOC(0, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+
+    OUT_BATCH_REGSEQ(R300_TX_INVALTAGS, 2);
+    OUT_BATCH(0);
+    OUT_BATCH(1);
+
+    END_BATCH();
+}
+
+#define EASY_US_FORMAT(FMT, C0, C1, C2, C3, SIGN) \
+    (FMT  | R500_C0_SEL_##C0 | R500_C1_SEL_##C1 | \
+    R500_C2_SEL_##C2 | R500_C3_SEL_##C3 | R500_OUT_SIGN(SIGN))
+
+static uint32_t mesa_format_to_us_format(gl_format mesa_format)
+{
+    switch(mesa_format)
+    {
+        case MESA_FORMAT_RGBA8888: // x
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0);
+        case MESA_FORMAT_RGB565: // x
+        case MESA_FORMAT_ARGB1555: // x
+        case MESA_FORMAT_RGBA8888_REV: // x
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0);
+        case MESA_FORMAT_ARGB8888: // x
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_8, B, G, R, A, 0);
+        case MESA_FORMAT_ARGB8888_REV:
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0);
+        case MESA_FORMAT_XRGB8888:
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0);
+
+        case MESA_FORMAT_RGB332:
+            return EASY_US_FORMAT(R500_OUT_FMT_C_3_3_2, A, R, G, B, 0);
+
+        case MESA_FORMAT_RGBA_FLOAT32:
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_32_FP, R, G, B, A, 0);
+        case MESA_FORMAT_RGBA_FLOAT16:
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_16_FP, R, G, B, A, 0);
+        case MESA_FORMAT_ALPHA_FLOAT32:
+            return EASY_US_FORMAT(R500_OUT_FMT_C_32_FP, A, A, A, A, 0);
+        case MESA_FORMAT_ALPHA_FLOAT16:
+            return EASY_US_FORMAT(R500_OUT_FMT_C_16_FP, A, A, A, A, 0);
+
+        case MESA_FORMAT_SIGNED_RGBA8888:
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0xf);
+        case MESA_FORMAT_SIGNED_RGBA8888_REV:
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0xf);
+        case MESA_FORMAT_SIGNED_RGBA_16:
+            return EASY_US_FORMAT(R500_OUT_FMT_C4_16, R, G, B, A, 0xf);
+
+        default:
+            fprintf(stderr, "Unsupported format %s for US output\n", _mesa_get_format_name(mesa_format));
+            assert(0);
+            return 0;
+    }
+}
+#undef EASY_US_FORMAT
+
+static void r500_emit_fp_setup(struct r300_context *r300,
+                               struct r500_fragment_program_code *fp,
+                               gl_format dst_format)
+{
+    r500_emit_fp(r300, (uint32_t *)fp->inst, (fp->inst_end + 1) * 6, 0, 0, 0);
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH(10);
+    OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
+    OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(fp->inst_end));
+    OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(fp->inst_end));
+    OUT_BATCH(0);
+    OUT_BATCH_REGVAL(R500_US_CONFIG, 0);
+    OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format));
+    OUT_BATCH_REGVAL(R500_US_PIXSIZE, fp->max_temp_idx);
+    END_BATCH();
+}
+
+static void r500_emit_rs_setup(struct r300_context *r300)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH(7);
+    OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+    OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
+    OUT_BATCH(0);
+    OUT_BATCH_REGVAL(R500_RS_INST_0,
+                     (0 << R500_RS_INST_TEX_ID_SHIFT) |
+                     (0 << R500_RS_INST_TEX_ADDR_SHIFT) |
+                     R500_RS_INST_TEX_CN_WRITE |
+                     R500_RS_INST_COL_CN_NO_WRITE);
+    OUT_BATCH_REGVAL(R500_RS_IP_0,
+                     (0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+                     (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+                     (2 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+                     (3 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+    END_BATCH();
+}
+
+static void r300_emit_fp_setup(struct r300_context *r300,
+                               struct r300_fragment_program_code *code,
+                               gl_format dst_format)
+{
+    unsigned i;
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH((code->alu.length + 1) * 4 + code->tex.length + 1 + 11);
+
+    OUT_BATCH_REGSEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].rgb_inst);
+    }
+    OUT_BATCH_REGSEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].rgb_addr);
+    }
+    OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].alpha_inst);
+    }
+    OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+    for (i = 0; i < code->alu.length; i++) {
+        OUT_BATCH(code->alu.inst[i].alpha_addr);
+    }
+
+    OUT_BATCH_REGSEQ(R300_US_TEX_INST_0, code->tex.length);
+    OUT_BATCH_TABLE(code->tex.inst, code->tex.length);
+
+    OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
+    OUT_BATCH(R300_PFS_CNTL_FIRST_NODE_HAS_TEX);
+    OUT_BATCH(code->pixsize);
+    OUT_BATCH(code->code_offset);
+    OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
+    OUT_BATCH_TABLE(code->code_addr, 4);
+    OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format));
+    END_BATCH();
+}
+
+static void r300_emit_rs_setup(struct r300_context *r300)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH(7);
+    OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+    OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
+    OUT_BATCH(0);
+    OUT_BATCH_REGVAL(R300_RS_INST_0,
+                     R300_RS_INST_TEX_ID(0) |
+                     R300_RS_INST_TEX_ADDR(0) |
+                     R300_RS_INST_TEX_CN_WRITE);
+    OUT_BATCH_REGVAL(R300_RS_IP_0,
+                     R300_RS_TEX_PTR(0) |
+                     R300_RS_SEL_S(R300_RS_SEL_C0) |
+                     R300_RS_SEL_T(R300_RS_SEL_C1) |
+                     R300_RS_SEL_R(R300_RS_SEL_K0) |
+                     R300_RS_SEL_Q(R300_RS_SEL_K1));
+    END_BATCH();
+}
+
+static void emit_pvs_setup(struct r300_context *r300,
+                           uint32_t *vp_code,
+                           unsigned vp_len)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    r300_emit_vpu(r300, vp_code, vp_len * 4, R300_PVS_CODE_START);
+
+    BEGIN_BATCH(4);
+    OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
+    OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
+              ((vp_len - 1)  << R300_PVS_XYZW_VALID_INST_SHIFT) |
+              ((vp_len - 1)<< R300_PVS_LAST_INST_SHIFT));
+    OUT_BATCH(0);
+    OUT_BATCH((vp_len - 1) << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+    END_BATCH();
+}
+
+static void emit_vap_setup(struct r300_context *r300)
+{
+    int tex_offset;
+    BATCH_LOCALS(&r300->radeon);
+
+    if (r300->options.hw_tcl_enabled)
+	tex_offset = 1;
+    else
+	tex_offset = 6;
+
+    BEGIN_BATCH(12);
+    OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
+    OUT_BATCH(R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+    OUT_BATCH(4);
+
+    OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
+    OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_0,
+                     ((R300_DATA_TYPE_FLOAT_2 | (0 << R300_DST_VEC_LOC_SHIFT)) << 0) |
+                     (((tex_offset << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_2 | R300_LAST_VEC) << 16));
+    OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+                    ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+                       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+                       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) |
+                       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | 
+                       (0xf << R300_WRITE_ENA_SHIFT) ) << 0) |
+                     (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+                       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+                       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) |
+                       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) |
+                       (0xf << R300_WRITE_ENA_SHIFT) ) << 16) ) );
+    OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+    OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT);
+    OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS);
+    END_BATCH();
+}
+
+static GLboolean validate_buffers(struct r300_context *r300,
+                                  struct radeon_bo *src_bo,
+                                  struct radeon_bo *dst_bo)
+{
+    int ret;
+
+    radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
+
+    ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
+                                        src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
+
+    ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
+                                        dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
+    if (ret)
+        return GL_FALSE;
+
+    return GL_TRUE;
+}
+
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static void calc_tex_coords(float img_width, float img_height,
+                            float x, float y,
+                            float reg_width, float reg_height,
+                            unsigned flip_y, float *buf)
+{
+    buf[0] = x / img_width;
+    buf[1] = buf[0] + reg_width / img_width;
+    buf[2] = y / img_height;
+    buf[3] = buf[2] + reg_height / img_height;
+    if (flip_y)
+    {
+        buf[2] = 1.0 - buf[2];
+        buf[3] = 1.0 - buf[3];
+    }
+}
+
+static void emit_draw_packet(struct r300_context *r300,
+                             unsigned src_width, unsigned src_height,
+                             unsigned src_x_offset, unsigned src_y_offset,
+                             unsigned dst_x_offset, unsigned dst_y_offset,
+                             unsigned reg_width, unsigned reg_height,
+                             unsigned flip_y)
+{
+    float texcoords[4];
+
+    calc_tex_coords(src_width, src_height,
+                    src_x_offset, src_y_offset,
+                    reg_width, reg_height,
+                    flip_y, texcoords);
+
+    float verts[] = { dst_x_offset, dst_y_offset,
+                      texcoords[0], texcoords[2],
+                      dst_x_offset, dst_y_offset + reg_height,
+                      texcoords[0], texcoords[3],
+                      dst_x_offset + reg_width, dst_y_offset + reg_height,
+                      texcoords[1], texcoords[3],
+                      dst_x_offset + reg_width, dst_y_offset,
+                      texcoords[1], texcoords[2] };
+
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH(19);
+    OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_IMMD_2, 16);
+    OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED |
+              (4 << 16) | R300_VAP_VF_CNTL__PRIM_QUADS);
+    OUT_BATCH_TABLE(verts, 16);
+    END_BATCH();
+}
+
+static void other_stuff(struct r300_context *r300)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH(13);
+    OUT_BATCH_REGVAL(R300_GA_POLY_MODE,
+                     R300_GA_POLY_MODE_FRONT_PTYPE_TRI | R300_GA_POLY_MODE_BACK_PTYPE_TRI);
+    OUT_BATCH_REGVAL(R300_SU_CULL_MODE, R300_FRONT_FACE_CCW);
+    OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
+    OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
+    OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
+    OUT_BATCH(0x0);
+    OUT_BATCH(0x0);
+    OUT_BATCH_REGVAL(R300_ZB_CNTL, 0);
+    END_BATCH();
+    if (r300->options.hw_tcl_enabled) {
+        BEGIN_BATCH(2);
+        OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+        END_BATCH();
+    }
+}
+
+static void emit_cb_setup(struct r300_context *r300,
+                          struct radeon_bo *bo,
+                          intptr_t offset,
+                          gl_format mesa_format,
+                          unsigned pitch,
+                          unsigned width,
+                          unsigned height)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    unsigned x1, y1, x2, y2;
+    x1 = 0;
+    y1 = 0;
+    x2 = width - 1;
+    y2 = height - 1;
+
+    if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+        x1 += R300_SCISSORS_OFFSET;
+        y1 += R300_SCISSORS_OFFSET;
+        x2 += R300_SCISSORS_OFFSET;
+        y2 += R300_SCISSORS_OFFSET;
+    }
+
+    r300_emit_cb_setup(r300, bo, offset, mesa_format,
+                       _mesa_get_format_bytes(mesa_format),
+                       _mesa_format_row_stride(mesa_format, pitch));
+
+    BEGIN_BATCH_NO_AUTOSTATE(5);
+    OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+    OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT));
+    OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
+    OUT_BATCH_REGVAL(R300_RB3D_CCTL, 0);
+    END_BATCH();
+}
+
+unsigned r300_check_blit(gl_format dst_format)
+{
+    switch (dst_format) {
+        case MESA_FORMAT_RGB565:
+        case MESA_FORMAT_ARGB1555:
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_XRGB8888:
+            break;
+        default:
+            return 0;
+    }
+
+    if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0)
+        return 0;
+
+    return 1;
+}
+
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r300 r300 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r300_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned reg_width,
+                   unsigned reg_height,
+                   unsigned flip_y)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+    if (!r300_check_blit(dst_mesaformat))
+        return 0;
+
+    /* Make sure that colorbuffer has even width - hw limitation */
+    if (dst_pitch % 2 > 0)
+        ++dst_pitch;
+
+    /* Need to clamp the region size to make sure
+     * we don't read outside of the source buffer
+     * or write outside of the destination buffer.
+     */
+    if (reg_width + src_x_offset > src_width)
+        reg_width = src_width - src_x_offset;
+    if (reg_height + src_y_offset > src_height)
+        reg_height = src_height - src_y_offset;
+    if (reg_width + dst_x_offset > dst_width)
+        reg_width = dst_width - dst_x_offset;
+    if (reg_height + dst_y_offset > dst_height)
+        reg_height = dst_height - dst_y_offset;
+
+    if (src_bo == dst_bo) {
+        return 0;
+    }
+
+    if (src_offset % 32 || dst_offset % 32) {
+        return GL_FALSE;
+    }
+
+    if (0) {
+        fprintf(stderr, "src: size [%d x %d], pitch %d, "
+                "offset [%d x %d], format %s, bo %p\n",
+                src_width, src_height, src_pitch,
+                src_x_offset, src_y_offset,
+                _mesa_get_format_name(src_mesaformat),
+                src_bo);
+        fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+                dst_pitch, dst_x_offset, dst_y_offset,
+                _mesa_get_format_name(dst_mesaformat), dst_bo);
+        fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+    }
+
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(r300->radeon.glCtx);
+
+    if (!validate_buffers(r300, src_bo, dst_bo))
+        return 0;
+
+    rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__);
+
+    other_stuff(r300);
+
+    r300_emit_tx_setup(r300, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+
+    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+        r500_emit_fp_setup(r300, &r300->blit.fp_code.code.r500, dst_mesaformat);
+        r500_emit_rs_setup(r300);
+    } else {
+        r300_emit_fp_setup(r300, &r300->blit.fp_code.code.r300, dst_mesaformat);
+        r300_emit_rs_setup(r300);
+    }
+
+    if (r300->options.hw_tcl_enabled)
+	emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2);
+
+    emit_vap_setup(r300);
+
+    emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+
+    emit_draw_packet(r300, src_width, src_height,
+                     src_x_offset, src_y_offset,
+                     dst_x_offset, dst_y_offset,
+                     reg_width, reg_height,
+                     flip_y);
+
+    r300EmitCacheFlush(r300);
+
+    radeonFlush(r300->radeon.glCtx);
+
+    return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h
new file mode 100644
index 0000000000..39b157a57b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_blit.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef R300_BLIT_H
+#define R300_BLIT_H
+
+void r300_blit_init(struct r300_context *r300);
+
+unsigned r300_check_blit(gl_format mesa_format);
+
+unsigned r300_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned reg_width,
+                   unsigned reg_height,
+                   unsigned flip_y);
+
+#endif // R300_BLIT_H
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
new file mode 100644
index 0000000000..c40802aec6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -0,0 +1,907 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_cmdbuf.h"
+#include "r300_emit.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_queryobj.h"
+
+/** # of dwords reserved for additional instructions that may need to be written
+ * during flushing.
+ */
+#define SPACE_FOR_FLUSHING	4
+
+static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
+{
+    if (r300->radeon.radeonScreen->kernel_mm) {
+        return ((((*pkt) >> 16) & 0x3FFF) + 1);
+    } else {
+        drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
+        return t->packet0.count;
+    }
+}
+
+#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+
+static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int cnt;
+	int extra = 1;
+	cnt = vpu_count(atom->cmd);
+
+	if (r300->radeon.radeonScreen->kernel_mm) {
+		extra = 3;
+	}
+
+	return cnt ? (cnt * 4) + extra : 0;
+}
+
+static int check_vpp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+    int cnt;
+    int extra = 1;
+
+    if (r300->radeon.radeonScreen->kernel_mm) {
+        cnt = r300->selected_vp->code.constants.Count * 4;
+        extra = 3;
+    } else {
+        cnt = vpu_count(atom->cmd);
+        extra = 1;
+    }
+
+    return cnt ? (cnt * 4) + extra : 0;
+}
+
+void r300_emit_vpu(struct r300_context *r300,
+                   uint32_t *data,
+                   unsigned len,
+                   uint32_t addr)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    BEGIN_BATCH_NO_AUTOSTATE(3 + len);
+    OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr);
+    OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, len-1) | RADEON_ONE_REG_WR);
+    OUT_BATCH_TABLE(data, len);
+    END_BATCH();
+}
+
+static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+    drm_r300_cmd_header_t cmd;
+    uint32_t addr;
+
+    cmd.u = atom->cmd[0];
+    addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
+
+    r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr);
+}
+
+static void emit_vpp_state(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+    drm_r300_cmd_header_t cmd;
+    uint32_t addr;
+
+    cmd.u = atom->cmd[0];
+    addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
+
+    r300_emit_vpu(r300, &atom->cmd[1], r300->selected_vp->code.constants.Count * 4, addr);
+}
+
+void r500_emit_fp(struct r300_context *r300,
+                  uint32_t *data,
+                  unsigned len,
+                  uint32_t addr,
+                  unsigned type,
+                  unsigned clamp)
+{
+    BATCH_LOCALS(&r300->radeon);
+
+    addr |= (type << 16);
+    addr |= (clamp << 17);
+
+    BEGIN_BATCH_NO_AUTOSTATE(len + 3);
+    OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
+    OUT_BATCH(addr);
+    OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, len-1) | RADEON_ONE_REG_WR);
+    OUT_BATCH_TABLE(data, len);
+    END_BATCH();
+}
+
+static void emit_r500fp_atom(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+    drm_r300_cmd_header_t cmd;
+    uint32_t addr, count;
+    int type, clamp;
+
+    cmd.u = atom->cmd[0];
+    addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
+    type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
+    clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
+
+    if (type) {
+        count = r500fp_count(atom->cmd) * 4;
+    } else {
+        count = r500fp_count(atom->cmd) * 6;
+    }
+
+    r500_emit_fp(r300, &atom->cmd[1], count, addr, type, clamp);
+}
+
+static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
+	int dw = 0, i;
+	if (atom->cmd[0] == CP_PACKET2) {
+		return dw;
+	}
+	for(i = 0; i < numtmus; ++i) {
+		radeonTexObj *t = r300->hw.textures[i];
+		if (!t && !r300->radeon.radeonScreen->kernel_mm) {
+			dw += 0;
+		} else if (t && t->image_override && !t->bo) {
+			if (!r300->radeon.radeonScreen->kernel_mm)
+				dw += 2;
+		} else
+			dw += 4;
+	}
+	return dw;
+}
+
+static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	BATCH_LOCALS(&r300->radeon);
+	int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
+	int i;
+
+	for(i = 0; i < numtmus; ++i) {
+		radeonTexObj *t = r300->hw.textures[i];
+		if (t && !t->image_override) {
+			BEGIN_BATCH_NO_AUTOSTATE(4);
+			OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+			OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
+					RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+			END_BATCH();
+		} else if (!t) {
+			/* Texture unit hasn't a texture bound.
+			 * We assign the current color buffer as a fakery to make
+			 * KIL work on KMS (without it, the CS checker will complain).
+			 */
+			if (r300->radeon.radeonScreen->kernel_mm) {
+				struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon);
+				if (rrb && rrb->bo) {
+					BEGIN_BATCH_NO_AUTOSTATE(4);
+					OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+					OUT_BATCH_RELOC(0, rrb->bo, 0,
+							RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+					END_BATCH();
+				}
+			}
+		} else { /* override cases */
+			if (t->bo) {
+				BEGIN_BATCH_NO_AUTOSTATE(4);
+				OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+				OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+						RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+				END_BATCH();
+			} else if (!r300->radeon.radeonScreen->kernel_mm) {
+				BEGIN_BATCH_NO_AUTOSTATE(2);
+				OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+				OUT_BATCH(t->override_offset);
+				END_BATCH();
+			} else {
+				/* Texture unit hasn't a texture bound nothings to do */
+			}
+		}
+	}
+}
+
+void r300_emit_scissor(GLcontext *ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	BATCH_LOCALS(&r300->radeon);
+    unsigned x1, y1, x2, y2;
+	struct radeon_renderbuffer *rrb;
+
+    if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+        return;
+    }
+	rrb = radeon_get_colorbuffer(&r300->radeon);
+	if (!rrb || !rrb->bo) {
+		fprintf(stderr, "no rrb\n");
+		return;
+	}
+    if (r300->radeon.state.scissor.enabled) {
+        x1 = r300->radeon.state.scissor.rect.x1;
+        y1 = r300->radeon.state.scissor.rect.y1;
+        x2 = r300->radeon.state.scissor.rect.x2;
+        y2 = r300->radeon.state.scissor.rect.y2;
+    } else {
+        x1 = 0;
+        y1 = 0;
+        x2 = rrb->base.Width - 1;
+        y2 = rrb->base.Height - 1;
+    }
+    if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+        x1 += R300_SCISSORS_OFFSET;
+        y1 += R300_SCISSORS_OFFSET;
+        x2 += R300_SCISSORS_OFFSET;
+        y2 += R300_SCISSORS_OFFSET;
+    }
+    BEGIN_BATCH_NO_AUTOSTATE(3);
+    OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+    OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT));
+    OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
+    END_BATCH();
+}
+static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	uint32_t dw = 6 + 3 + 16;
+	if (r300->radeon.radeonScreen->kernel_mm)
+		dw += 2;
+	if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+		dw -= 3 + 16;
+	}
+	return dw;
+}
+
+static void emit_scissor(struct r300_context *r300,
+                         unsigned width,
+                         unsigned height)
+{
+    int i;
+    BATCH_LOCALS(&r300->radeon);
+    if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+        BEGIN_BATCH_NO_AUTOSTATE(3);
+        OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+        OUT_BATCH(0);
+        OUT_BATCH(((width - 1) << R300_SCISSORS_X_SHIFT) |
+                ((height - 1) << R300_SCISSORS_Y_SHIFT));
+        END_BATCH();
+        BEGIN_BATCH_NO_AUTOSTATE(16);
+        for (i = 0; i < 4; i++) {
+            OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
+            OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT));
+            OUT_BATCH(((width - 1) << R300_CLIPRECT_X_SHIFT) | ((height - 1) << R300_CLIPRECT_Y_SHIFT));
+        }
+        OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
+        OUT_BATCH(0xAAAA);
+        OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
+        OUT_BATCH(0xffffff);
+        END_BATCH();
+    } else {
+        BEGIN_BATCH_NO_AUTOSTATE(3);
+        OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+        OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) |
+                (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT));
+        OUT_BATCH(((width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) |
+                ((height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT));
+        END_BATCH();
+        BEGIN_BATCH_NO_AUTOSTATE(16);
+        for (i = 0; i < 4; i++) {
+            OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
+            OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT));
+            OUT_BATCH(((R300_SCISSORS_OFFSET + width - 1) << R300_CLIPRECT_X_SHIFT) |
+                        ((R300_SCISSORS_OFFSET + height - 1) << R300_CLIPRECT_Y_SHIFT));
+        }
+        OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
+        OUT_BATCH(0xAAAA);
+        OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
+        OUT_BATCH(0xffffff);
+        END_BATCH();
+    }
+}
+
+void r300_emit_cb_setup(struct r300_context *r300,
+                        struct radeon_bo *bo,
+                        uint32_t offset,
+                        GLuint format,
+                        unsigned cpp,
+                        unsigned pitch)
+{
+    BATCH_LOCALS(&r300->radeon);
+    uint32_t cbpitch = pitch / cpp;
+    uint32_t dw = 6;
+
+    assert(offset % 32 == 0);
+
+    switch (format) {
+        case MESA_FORMAT_SL8:
+        case MESA_FORMAT_A8:
+        case MESA_FORMAT_L8:
+        case MESA_FORMAT_I8:
+            cbpitch |= R300_COLOR_FORMAT_I8;
+            break;
+        case MESA_FORMAT_RGB565:
+        case MESA_FORMAT_RGB565_REV:
+            cbpitch |= R300_COLOR_FORMAT_RGB565;
+            break;
+        case MESA_FORMAT_ARGB4444:
+        case MESA_FORMAT_ARGB4444_REV:
+            cbpitch |= R300_COLOR_FORMAT_ARGB4444;
+            break;
+        case MESA_FORMAT_RGBA5551:
+        case MESA_FORMAT_ARGB1555:
+        case MESA_FORMAT_ARGB1555_REV:
+            cbpitch |= R300_COLOR_FORMAT_ARGB1555;
+            break;
+        case MESA_FORMAT_RGBA8888:
+        case MESA_FORMAT_RGBA8888_REV:
+        case MESA_FORMAT_XRGB8888:
+        case MESA_FORMAT_ARGB8888:
+        case MESA_FORMAT_XRGB8888_REV:
+        case MESA_FORMAT_ARGB8888_REV:
+        case MESA_FORMAT_SRGBA8:
+        case MESA_FORMAT_SARGB8:
+            cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+            break;
+        default:
+            _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");
+            break;
+    }
+
+    if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+        cbpitch |= R300_COLOR_TILE_ENABLE;
+
+    if (r300->radeon.radeonScreen->kernel_mm)
+        dw += 2;
+
+    BEGIN_BATCH_NO_AUTOSTATE(dw);
+    OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
+    OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+    OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
+    if (!r300->radeon.radeonScreen->kernel_mm)
+        OUT_BATCH(cbpitch);
+    else
+        OUT_BATCH_RELOC(cbpitch, bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+}
+
+static void emit_cb_offset_atom(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+    r300ContextPtr r300 = R300_CONTEXT(ctx);
+    struct radeon_renderbuffer *rrb;
+    uint32_t offset = r300->radeon.state.color.draw_offset;
+
+    rrb = radeon_get_colorbuffer(&r300->radeon);
+    if (!rrb || !rrb->bo) {
+        fprintf(stderr, "no rrb\n");
+        return;
+    }
+
+    if (RADEON_DEBUG & RADEON_STATE)
+        fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height);
+
+    r300_emit_cb_setup(r300, rrb->bo, offset, rrb->base.Format, rrb->cpp, rrb->pitch);
+
+    if (r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+        emit_scissor(r300, rrb->base.Width, rrb->base.Height);
+    }
+}
+
+static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	uint32_t dw;
+	dw = 6;
+	if (r300->radeon.radeonScreen->kernel_mm)
+		dw += 2;
+	return dw;
+}
+
+static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	BATCH_LOCALS(&r300->radeon);
+	struct radeon_renderbuffer *rrb;
+	uint32_t zbpitch;
+	uint32_t dw = atom->check(ctx, atom);
+
+	rrb = radeon_get_depthbuffer(&r300->radeon);
+	if (!rrb)
+		return;
+
+	zbpitch = (rrb->pitch / rrb->cpp);
+	if (!r300->radeon.radeonScreen->kernel_mm) {
+	    if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+	        zbpitch |= R300_DEPTHMACROTILE_ENABLE;
+	   }
+	    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
+	        zbpitch |= R300_DEPTHMICROTILE_TILED;
+	    }
+	}
+
+	BEGIN_BATCH_NO_AUTOSTATE(dw);
+	OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
+	OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+	OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1);
+    	if (!r300->radeon.radeonScreen->kernel_mm)
+	    OUT_BATCH(zbpitch);
+	else
+	    OUT_BATCH_RELOC(cbpitch, rrb->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+	END_BATCH();
+}
+
+static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	BATCH_LOCALS(&r300->radeon);
+	struct radeon_renderbuffer *rrb;
+	uint32_t format = 0;
+
+	rrb = radeon_get_depthbuffer(&r300->radeon);
+	if (!rrb)
+	  format = 0;
+	else {
+	  if (rrb->cpp == 2)
+	    format = R300_DEPTHFORMAT_16BIT_INT_Z;
+	  else if (rrb->cpp == 4)
+	    format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+	}
+
+	BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size);
+	OUT_BATCH(atom->cmd[0]);
+	atom->cmd[1] &= ~0xf;
+	atom->cmd[1] |= format;
+	OUT_BATCH(atom->cmd[1]);
+	OUT_BATCH(atom->cmd[2]);
+	OUT_BATCH(atom->cmd[3]);
+	OUT_BATCH(atom->cmd[4]);
+	END_BATCH();
+}
+
+static int check_never(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   return 0;
+}
+
+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	return atom->cmd_size;
+}
+
+static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int cnt;
+	if (atom->cmd[0] == CP_PACKET2) {
+		return 0;
+	}
+	cnt = packet0_count(r300, atom->cmd);
+	return cnt ? cnt + 1 : 0;
+}
+
+static int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	int cnt;
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int extra = 1;
+	cnt = r500fp_count(atom->cmd);
+	if (r300->radeon.radeonScreen->kernel_mm)
+		extra = 3;
+
+	return cnt ? (cnt * 6) + extra : 0;
+}
+
+static int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	int cnt;
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int extra = 1;
+	cnt = r500fp_count(atom->cmd);
+	if (r300->radeon.radeonScreen->kernel_mm)
+		extra = 3;
+
+	cnt = r500fp_count(atom->cmd);
+	return cnt ? (cnt * 4) + extra : 0;
+}
+
+#define ALLOC_STATE( ATOM, CHK, SZ, IDX )				\
+   do {									\
+      r300->hw.ATOM.cmd_size = (SZ);					\
+      r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t));	\
+      r300->hw.ATOM.name = #ATOM;					\
+      r300->hw.ATOM.idx = (IDX);					\
+      r300->hw.ATOM.check = check_##CHK;				\
+      r300->hw.ATOM.dirty = GL_FALSE;					\
+      r300->radeon.hw.max_state_size += (SZ);					\
+      insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM);		\
+   } while (0)
+/**
+ * Allocate memory for the command buffer and initialize the state atom
+ * list. Note that the initial hardware state is set by r300InitState().
+ */
+void r300InitCmdBuf(r300ContextPtr r300)
+{
+	int mtu;
+	int has_tcl;
+	int is_r500 = 0;
+
+	has_tcl = r300->options.hw_tcl_enabled;
+
+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+		is_r500 = 1;
+
+	r300->radeon.hw.max_state_size = 2 + 2;	/* reserve extra space for WAIT_IDLE and tex cache flush */
+
+	mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+	if (RADEON_DEBUG & RADEON_TEXTURE) {
+		fprintf(stderr, "Using %d maximum texture units..\n", mtu);
+	}
+
+	/* Setup the atom linked list */
+	make_empty_list(&r300->radeon.hw.atomlist);
+	r300->radeon.hw.atomlist.name = "atom-list";
+
+	/* Initialize state atoms */
+	ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
+	r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
+	ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
+	r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
+	if (is_r500 && !r300->radeon.radeonScreen->kernel_mm) {
+	    ALLOC_STATE(vap_index_offset, always, 2, 0);
+	    r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
+	    r300->hw.vap_index_offset.cmd[1] = 0;
+	}
+	ALLOC_STATE(vte, always, 3, 0);
+	r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
+	ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
+	r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
+	ALLOC_STATE(vap_cntl_status, always, 2, 0);
+	r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
+	ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
+	r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
+	ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
+	r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
+	ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
+	r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
+	ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
+	r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
+
+	if (has_tcl) {
+		ALLOC_STATE(vap_clip_cntl, always, 2, 0);
+		r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
+		ALLOC_STATE(vap_clip, always, 5, 0);
+		r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
+		ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
+		r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
+	}
+
+	ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
+	r300->hw.vof.cmd[R300_VOF_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
+
+	if (has_tcl) {
+		ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
+		r300->hw.pvs.cmd[R300_PVS_CMD_0] =
+		    cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
+	}
+
+	ALLOC_STATE(gb_enable, always, 2, 0);
+	r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
+	if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+		ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+	} else {
+		ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0);
+	}
+	r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3);
+	ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0);
+	r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2);
+	ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
+	r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
+	ALLOC_STATE(ga_point_s0, always, 5, 0);
+	r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
+	ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
+	r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
+	ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
+	r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
+	ALLOC_STATE(ga_point_minmax, always, 4, 0);
+	r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
+	ALLOC_STATE(lcntl, always, 2, 0);
+	r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
+	ALLOC_STATE(ga_line_stipple, always, 4, 0);
+	r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
+        if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+		ALLOC_STATE(shade, always, 2, 0);
+        } else {
+		ALLOC_STATE(shade, never, 2, 0);
+        }
+	r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1);
+	ALLOC_STATE(shade2, always, 4, 0);
+	r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3);
+	ALLOC_STATE(polygon_mode, always, 4, 0);
+	r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
+	ALLOC_STATE(fogp, always, 3, 0);
+	r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
+	ALLOC_STATE(zbias_cntl, always, 2, 0);
+	r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
+	ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
+	r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+	ALLOC_STATE(occlusion_cntl, always, 2, 0);
+	r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
+	ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
+	r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
+	ALLOC_STATE(su_depth_scale, always, 3, 0);
+	r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
+	ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
+	r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
+	if (is_r500) {
+		ALLOC_STATE(ri, variable, R500_RI_CMDSIZE, 0);
+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
+		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
+	} else {
+		ALLOC_STATE(ri, variable, R300_RI_CMDSIZE, 0);
+		r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
+		ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+		r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
+	}
+	ALLOC_STATE(sc_hyperz, always, 3, 0);
+	r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
+	ALLOC_STATE(sc_screendoor, always, 2, 0);
+	r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+	ALLOC_STATE(us_out_fmt, always, 6, 0);
+	r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
+
+	if (is_r500) {
+		ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
+		r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
+		r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
+		r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
+		r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
+		r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
+
+		ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
+		r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
+		if (r300->radeon.radeonScreen->kernel_mm)
+			r300->hw.r500fp.emit = emit_r500fp_atom;
+
+		ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
+		r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
+			cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
+		if (r300->radeon.radeonScreen->kernel_mm)
+			r300->hw.r500fp_const.emit = emit_r500fp_atom;
+	} else {
+		ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
+		r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
+		r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
+
+		ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
+		r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
+
+		ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
+		r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
+		ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
+		r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
+		ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
+		r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
+		ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
+		r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
+		ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
+		r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
+	}
+	ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
+	r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
+	ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
+	r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
+	ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
+	r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
+	ALLOC_STATE(fg_depth_src, always, 2, 0);
+	r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
+	ALLOC_STATE(rb3d_cctl, always, 2, 0);
+	r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
+	ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
+	r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
+	ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
+	r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
+	if (is_r500) {
+		ALLOC_STATE(blend_color, always, 3, 0);
+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
+	} else {
+		ALLOC_STATE(blend_color, always, 2, 0);
+		r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
+	}
+	ALLOC_STATE(rop, always, 2, 0);
+	r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
+	ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0);
+	r300->hw.cb.emit = &emit_cb_offset_atom;
+	ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
+	r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
+	ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
+	r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) {
+		ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
+	} else {
+		ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0);
+	}
+	r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
+	ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
+	r300->hw.zs.cmd[R300_ZS_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
+	if (is_r500) {
+		if (r300->radeon.radeonScreen->kernel_mm)
+			ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0);
+		else
+			ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0);
+		r300->hw.zsb.cmd[R300_ZSB_CMD_0] =
+			cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1);
+	}
+
+	ALLOC_STATE(zstencil_format, always, 5, 0);
+	r300->hw.zstencil_format.cmd[0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
+	r300->hw.zstencil_format.emit = emit_zstencil_format;
+
+	ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0);
+	r300->hw.zb.emit = emit_zb_offset;
+	ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
+	r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
+	ALLOC_STATE(zb_zmask, always, 3, 0);
+	r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2);
+	ALLOC_STATE(zb_hiz_offset, always, 2, 0);
+	r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
+	ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
+	r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
+
+	/* VPU only on TCL */
+	if (has_tcl) {
+		int i;
+		ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
+		r300->hw.vpi.cmd[0] =
+			cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
+		if (r300->radeon.radeonScreen->kernel_mm)
+			r300->hw.vpi.emit = emit_vpu_state;
+
+		if (is_r500) {
+			ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
+			r300->hw.vpp.cmd[0] =
+				cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
+			if (r300->radeon.radeonScreen->kernel_mm)
+				r300->hw.vpp.emit = emit_vpp_state;
+
+			ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+			r300->hw.vps.cmd[0] =
+				cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
+			if (r300->radeon.radeonScreen->kernel_mm)
+				r300->hw.vps.emit = emit_vpu_state;
+
+			for (i = 0; i < 6; i++) {
+				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+				r300->hw.vpucp[i].cmd[0] =
+					cmdvpu(r300->radeon.radeonScreen,
+							R500_PVS_UCP_START + i, 1);
+				if (r300->radeon.radeonScreen->kernel_mm)
+					r300->hw.vpucp[i].emit = emit_vpu_state;
+			}
+		} else {
+			ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
+			r300->hw.vpp.cmd[0] =
+				cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
+			if (r300->radeon.radeonScreen->kernel_mm)
+				r300->hw.vpp.emit = emit_vpp_state;
+
+			ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+			r300->hw.vps.cmd[0] =
+				cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
+			if (r300->radeon.radeonScreen->kernel_mm)
+				r300->hw.vps.emit = emit_vpu_state;
+
+			for (i = 0; i < 6; i++) {
+				ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+				r300->hw.vpucp[i].cmd[0] =
+					cmdvpu(r300->radeon.radeonScreen,
+							R300_PVS_UCP_START + i, 1);
+				if (r300->radeon.radeonScreen->kernel_mm)
+					r300->hw.vpucp[i].emit = emit_vpu_state;
+			}
+		}
+	}
+
+	/* Textures */
+	ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
+	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
+
+	ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
+	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
+
+	ALLOC_STATE(tex.size, variable, mtu + 1, 0);
+	r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
+
+	ALLOC_STATE(tex.format, variable, mtu + 1, 0);
+	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
+
+	ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
+	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
+
+	ALLOC_STATE(tex.offset, tex_offsets, 1, 0);
+	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
+	r300->hw.tex.offset.emit = &emit_tex_offsets;
+
+	ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
+	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
+
+	ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
+	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
+
+	radeon_init_query_stateobj(&r300->radeon, R300_QUERYOBJ_CMDSIZE);
+	if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+		r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RV530_FG_ZBREG_DEST, 1);
+		r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL;
+	} else {
+		r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_REG_DEST, 1);
+		r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = R300_RASTER_PIPE_SELECT_ALL;
+	}
+	r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZPASS_DATA, 1);
+	r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_1] = 0;
+
+	r300->radeon.hw.is_dirty = GL_TRUE;
+	r300->radeon.hw.all_dirty = GL_TRUE;
+
+	rcommonInitCmdBuf(&r300->radeon);
+}
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
new file mode 100644
index 0000000000..0e68da928e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
@@ -0,0 +1,69 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_CMDBUF_H__
+#define __R300_CMDBUF_H__
+
+#include "r300_context.h"
+
+#define CACHE_FLUSH_BUFSZ      (4*2)
+#define PRE_EMIT_STATE_BUFSZ   (2+2)
+#define AOS_BUFSZ(nr)          (3+(nr >>1)*3 + (nr&1)*2 + (nr*2))
+#define FIREAOS_BUFSZ          (3)
+#define SCISSORS_BUFSZ         (3)
+
+void r300InitCmdBuf(r300ContextPtr r300);
+void r300_emit_scissor(GLcontext *ctx);
+
+void r300_emit_vpu(struct r300_context *ctx,
+                   uint32_t *data,
+                   unsigned len,
+                   uint32_t addr);
+
+void r500_emit_fp(struct r300_context *r300,
+                  uint32_t *data,
+                  unsigned len,
+                  uint32_t addr,
+                  unsigned type,
+                  unsigned clamp);
+
+void r300_emit_cb_setup(struct r300_context *r300,
+                        struct radeon_bo *bo,
+                        uint32_t offset,
+                        GLuint format,
+                        unsigned cpp,
+                        unsigned pitch);
+
+#endif /* __R300_CMDBUF_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
new file mode 100644
index 0000000000..6992ca59db
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -0,0 +1,570 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+#include "main/bufferobj.h"
+#include "main/texobj.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+
+#include "r300_context.h"
+#include "radeon_span.h"
+#include "r300_blit.h"
+#include "r300_cmdbuf.h"
+#include "r300_state.h"
+#include "r300_tex.h"
+#include "r300_emit.h"
+#include "r300_render.h"
+#include "r300_swtcl.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_buffer_objects.h"
+#include "radeon_queryobj.h"
+
+#include "utils.h"
+#include "xmlpool.h"		/* for symbolic values of enum-type options */
+
+#define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_vertex_program
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_framebuffer_blit
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_provoking_vertex
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_stencil_two_side
+#define need_GL_ATI_separate_stencil
+#define need_GL_NV_vertex_program
+
+#include "main/remap_helper.h"
+
+static const struct dri_extension card_extensions[] = {
+  /* *INDENT-OFF* */
+  {"GL_ARB_depth_texture",		NULL},
+  {"GL_ARB_fragment_program",		NULL},
+  {"GL_ARB_occlusion_query",		GL_ARB_occlusion_query_functions},
+  {"GL_ARB_multitexture",		NULL},
+  {"GL_ARB_point_parameters",		GL_ARB_point_parameters_functions},
+  {"GL_ARB_shadow",			NULL},
+  {"GL_ARB_shadow_ambient",		NULL},
+  {"GL_ARB_texture_border_clamp",	NULL},
+  {"GL_ARB_texture_cube_map",		NULL},
+  {"GL_ARB_texture_env_add",		NULL},
+  {"GL_ARB_texture_env_combine",	NULL},
+  {"GL_ARB_texture_env_crossbar",	NULL},
+  {"GL_ARB_texture_env_dot3",		NULL},
+  {"GL_ARB_texture_mirrored_repeat",	NULL},
+  {"GL_ARB_vertex_program",		GL_ARB_vertex_program_functions},
+  {"GL_EXT_blend_equation_separate",	GL_EXT_blend_equation_separate_functions},
+  {"GL_EXT_blend_func_separate",	GL_EXT_blend_func_separate_functions},
+  {"GL_EXT_blend_minmax",		GL_EXT_blend_minmax_functions},
+  {"GL_EXT_blend_subtract",		NULL},
+  {"GL_EXT_fog_coord",			GL_EXT_fog_coord_functions },
+  {"GL_EXT_gpu_program_parameters",     GL_EXT_gpu_program_parameters_functions},
+  {"GL_EXT_provoking_vertex",           GL_EXT_provoking_vertex_functions },
+  {"GL_EXT_secondary_color", 		GL_EXT_secondary_color_functions},
+  {"GL_EXT_shadow_funcs",		NULL},
+  {"GL_EXT_stencil_two_side",		GL_EXT_stencil_two_side_functions},
+  {"GL_EXT_stencil_wrap",		NULL},
+  {"GL_EXT_texture_edge_clamp",		NULL},
+  {"GL_EXT_texture_env_combine", 	NULL},
+  {"GL_EXT_texture_env_dot3", 		NULL},
+  {"GL_EXT_texture_filter_anisotropic",	NULL},
+  {"GL_EXT_texture_lod_bias",		NULL},
+  {"GL_EXT_texture_mirror_clamp",	NULL},
+  {"GL_EXT_texture_rectangle",		NULL},
+  {"GL_EXT_texture_sRGB",		NULL},
+  {"GL_EXT_vertex_array_bgra",		NULL},
+  {"GL_ATI_separate_stencil",		GL_ATI_separate_stencil_functions},
+  {"GL_ATI_texture_env_combine3",	NULL},
+  {"GL_ATI_texture_mirror_once",	NULL},
+  {"GL_MESA_pack_invert",		NULL},
+  {"GL_MESA_ycbcr_texture",		NULL},
+  {"GL_MESAX_texture_float",		NULL},
+  {"GL_NV_blend_square",		NULL},
+  {"GL_NV_vertex_program",		GL_NV_vertex_program_functions},
+  {"GL_SGIS_generate_mipmap",		NULL},
+  {NULL,				NULL}
+  /* *INDENT-ON* */
+};
+
+
+static const struct dri_extension mm_extensions[] = {
+  { "GL_EXT_framebuffer_blit",	GL_EXT_framebuffer_blit_functions },
+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+  { NULL, NULL }
+};
+
+/**
+ * The GL 2.0 functions are needed to make display lists work with
+ * functions added by GL_ATI_separate_stencil.
+ */
+static const struct dri_extension gl_20_extension[] = {
+  {"GL_VERSION_2_0",			GL_VERSION_2_0_functions },
+};
+
+static const struct tnl_pipeline_stage *r300_pipeline[] = {
+	/* Catch any t&l fallbacks
+	 */
+	&_tnl_vertex_transform_stage,
+	&_tnl_normal_transform_stage,
+	&_tnl_lighting_stage,
+	&_tnl_fog_coordinate_stage,
+	&_tnl_texgen_stage,
+	&_tnl_texture_transform_stage,
+	&_tnl_point_attenuation_stage,
+	&_tnl_vertex_program_stage,
+	&_tnl_render_stage,
+	0,
+};
+
+static void r300_get_lock(radeonContextPtr rmesa)
+{
+	drm_radeon_sarea_t *sarea = rmesa->sarea;
+
+	if (sarea->ctx_owner != rmesa->dri.hwContext) {
+		sarea->ctx_owner = rmesa->dri.hwContext;
+		if (!rmesa->radeonScreen->kernel_mm)
+			radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
+	}
+}
+
+static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+    /* please flush pipe do all pending work */
+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+                                  R300_SC_SCREENDOOR, 1));
+    radeon_cs_write_dword(cs, 0x0);
+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+                                  R300_SC_SCREENDOOR, 1));
+    radeon_cs_write_dword(cs, 0x00FFFFFF);
+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+                                  R300_SC_HYPERZ, 1));
+    radeon_cs_write_dword(cs, 0x0);
+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+                                  R300_US_CONFIG, 1));
+    radeon_cs_write_dword(cs, 0x0);
+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+                                  R300_ZB_CNTL, 1));
+    radeon_cs_write_dword(cs, 0x0);
+    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+                                  R300_RB3D_DSTCACHE_CTLSTAT, 1));
+    radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+    radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+                                  R300_ZB_ZCACHE_CTLSTAT, 1));
+    radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
+    radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
+                               R300_WAIT_3D | R300_WAIT_3D_CLEAN));
+}
+
+static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
+{
+	BATCH_LOCALS(radeon);
+
+	cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+	BEGIN_BATCH_NO_AUTOSTATE(2);
+	OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
+	END_BATCH();
+	end_3d(radeon);
+}
+
+static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	if (mode)
+		r300->radeon.Fallback |= bit;
+	else
+		r300->radeon.Fallback &= ~bit;
+
+	r300SwitchFallback(ctx, R300_FALLBACK_RADEON_COMMON, mode);
+}
+
+static void r300_emit_query_finish(radeonContextPtr radeon)
+{
+	r300ContextPtr r300 = (r300ContextPtr)radeon;
+	struct radeon_query_object *query = radeon->query.current;
+	BATCH_LOCALS(radeon);
+
+	BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->radeon.radeonScreen->num_gb_pipes + 2);
+	switch (r300->radeon.radeonScreen->num_gb_pipes) {
+	case 4:
+		OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+		OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+		OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+	case 3:
+		OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2);
+		OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+		OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+	case 2:
+		if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) {
+			OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+		} else {
+			OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1);
+		}
+		OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+		OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+	case 1:
+	default:
+		OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0);
+		OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+		OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+		break;
+	}
+	OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+	END_BATCH();
+	query->curr_offset += r300->radeon.radeonScreen->num_gb_pipes * sizeof(uint32_t);
+	assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+	query->emitted_begin = GL_FALSE;
+}
+
+static void rv530_emit_query_finish_single_z(radeonContextPtr radeon)
+{
+	BATCH_LOCALS(radeon);
+	struct radeon_query_object *query = radeon->query.current;
+
+	BEGIN_BATCH_NO_AUTOSTATE(8);
+	OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+	OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+	OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+	END_BATCH();
+
+	query->curr_offset += sizeof(uint32_t);
+	assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+	query->emitted_begin = GL_FALSE;
+}
+
+static void rv530_emit_query_finish_double_z(radeonContextPtr radeon)
+{
+	BATCH_LOCALS(radeon);
+	struct radeon_query_object *query = radeon->query.current;
+
+	BEGIN_BATCH_NO_AUTOSTATE(14);
+	OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+	OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+	OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
+	OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+	OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+	OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+	END_BATCH();
+
+	query->curr_offset += 2 * sizeof(uint32_t);
+	assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+	query->emitted_begin = GL_FALSE;
+}
+
+static void r300_init_vtbl(radeonContextPtr radeon)
+{
+	radeon->vtbl.get_lock = r300_get_lock;
+	radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
+	radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
+	radeon->vtbl.swtcl_flush = r300_swtcl_flush;
+	radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
+	radeon->vtbl.fallback = r300_fallback;
+	if (radeon->radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+		if (radeon->radeonScreen->num_z_pipes == 2)
+			radeon->vtbl.emit_query_finish = rv530_emit_query_finish_double_z;
+		else
+			radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z;
+	} else
+		radeon->vtbl.emit_query_finish = r300_emit_query_finish;
+
+	radeon->vtbl.check_blit = r300_check_blit;
+	radeon->vtbl.blit = r300_blit;
+
+	if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+		radeon->vtbl.is_format_renderable = r500IsFormatRenderable;
+	} else {
+		radeon->vtbl.is_format_renderable = r300IsFormatRenderable;
+	}
+}
+
+static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	ctx->Const.MaxTextureImageUnits =
+	    driQueryOptioni(&r300->radeon.optionCache, "texture_image_units");
+	ctx->Const.MaxTextureCoordUnits =
+	    driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units");
+	ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits,
+		 ctx->Const.MaxTextureCoordUnits);
+	ctx->Const.MaxCombinedTextureImageUnits =
+		ctx->Const.MaxVertexTextureImageUnits +
+		ctx->Const.MaxTextureImageUnits;
+
+
+	ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+	ctx->Const.MaxTextureLodBias = 16.0;
+
+	if (screen->chip_family >= CHIP_FAMILY_RV515) {
+		ctx->Const.MaxTextureLevels = 13;
+		ctx->Const.MaxCubeTextureLevels = 13;
+		ctx->Const.MaxTextureRectSize = 4096;
+		ctx->Const.MaxRenderbufferSize = 4096;
+	}
+	else {
+		ctx->Const.MaxTextureLevels = 12;
+		ctx->Const.MaxCubeTextureLevels = 12;
+		ctx->Const.MaxTextureRectSize = 2048;
+		ctx->Const.MaxRenderbufferSize = 2048;
+	}
+
+	ctx->Const.MinPointSize = 1.0;
+	ctx->Const.MinPointSizeAA = 1.0;
+	ctx->Const.MaxPointSize = R300_POINTSIZE_MAX;
+	ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX;
+
+	ctx->Const.MinLineWidth = 1.0;
+	ctx->Const.MinLineWidthAA = 1.0;
+	ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
+	ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
+
+	ctx->Const.MaxDrawBuffers = 1;
+	ctx->Const.MaxColorAttachments = 1;
+
+	/* currently bogus data */
+	if (r300->options.hw_tcl_enabled) {
+		ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+		ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+		ctx->Const.VertexProgram.MaxNativeAttribs = 16;	/* r420 */
+		ctx->Const.VertexProgram.MaxNativeTemps = 32;
+		ctx->Const.VertexProgram.MaxNativeParameters = 256;	/* r420 */
+		ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+	}
+
+	if (screen->chip_family >= CHIP_FAMILY_RV515) {
+		ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS;
+		ctx->Const.FragmentProgram.MaxNativeAttribs = 11;	/* copy i915... */
+
+		/* The hardware limits are higher than this,
+		 * but the non-KMS DRM interface artificially limits us
+		 * to this many instructions.
+		 *
+		 * We could of course work around it in the KMS path,
+		 * but it would be a mess, so it seems wiser
+		 * to leave it as is. Going forward, the Gallium driver
+		 * will not be subject to these limitations.
+		 */
+		ctx->Const.FragmentProgram.MaxNativeParameters = 255;
+		ctx->Const.FragmentProgram.MaxNativeAluInstructions = 255;
+		ctx->Const.FragmentProgram.MaxNativeTexInstructions = 255;
+		ctx->Const.FragmentProgram.MaxNativeInstructions = 255;
+		ctx->Const.FragmentProgram.MaxNativeTexIndirections = 255;
+		ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+	} else {
+		ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS;
+		ctx->Const.FragmentProgram.MaxNativeAttribs = 11;	/* copy i915... */
+		ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS;
+		ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST;
+		ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST;
+		ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST;
+		ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT;
+		ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+	}
+
+}
+
+static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen)
+{
+	struct r300_options options = { 0 };
+
+	driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
+			    screen->driScreen->myNum, "r300");
+
+	r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, "def_max_anisotropy");
+
+	options.stencil_two_side_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side");
+	options.s3tc_force_enabled = driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable");
+	options.s3tc_force_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc");
+
+	if (!(screen->chip_flags & RADEON_CHIPSET_TCL) || driQueryOptioni(&r300->radeon.optionCache, "tcl_mode") == DRI_CONF_TCL_SW)
+		options.hw_tcl_enabled = 0;
+	else
+		options.hw_tcl_enabled = 1;
+
+	options.conformance_mode = !driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback");
+
+	r300->options = options;
+}
+
+static void r300InitGLExtensions(GLcontext *ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	driInitExtensions(ctx, card_extensions, GL_TRUE);
+	if (r300->radeon.radeonScreen->kernel_mm)
+		driInitExtensions(ctx, mm_extensions, GL_FALSE);
+
+	if (r300->options.stencil_two_side_disabled)
+		_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
+
+	if (r300->options.s3tc_force_disabled) {
+		_mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+	} else if (ctx->Mesa_DXTn || r300->options.s3tc_force_enabled) {
+		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+		_mesa_enable_extension(ctx, "GL_S3_s3tc");
+	}
+
+	if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) {
+		_mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+	}
+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350)
+  		_mesa_enable_extension(ctx, "GL_ARB_half_float_vertex");
+
+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+		_mesa_enable_extension(ctx, "GL_EXT_packed_depth_stencil");
+}
+
+static void r300InitIoctlFuncs(struct dd_function_table *functions)
+{
+	functions->Clear = _mesa_meta_Clear;
+	functions->Finish = radeonFinish;
+	functions->Flush = radeonFlush;
+}
+
+/* Create the device specific rendering context.
+ */
+GLboolean r300CreateContext(gl_api api,
+			    const __GLcontextModes * glVisual,
+			    __DRIcontext * driContextPriv,
+			    void *sharedContextPrivate)
+{
+	__DRIscreen *sPriv = driContextPriv->driScreenPriv;
+	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+	struct dd_function_table functions;
+	r300ContextPtr r300;
+	GLcontext *ctx;
+
+	assert(glVisual);
+	assert(driContextPriv);
+	assert(screen);
+
+	r300 = (r300ContextPtr) CALLOC(sizeof(*r300));
+	if (!r300)
+		return GL_FALSE;
+
+	r300ParseOptions(r300, screen);
+
+	r300->radeon.radeonScreen = screen;
+	r300_init_vtbl(&r300->radeon);
+
+	_mesa_init_driver_functions(&functions);
+	r300InitIoctlFuncs(&functions);
+	r300InitStateFuncs(&r300->radeon, &functions);
+	r300InitTextureFuncs(&r300->radeon, &functions);
+	r300InitShaderFuncs(&functions);
+	radeonInitQueryObjFunctions(&functions);
+	radeonInitBufferObjectFuncs(&functions);
+
+	if (!radeonInitContext(&r300->radeon, &functions,
+			       glVisual, driContextPriv,
+			       sharedContextPrivate)) {
+		FREE(r300);
+		return GL_FALSE;
+	}
+
+	ctx = r300->radeon.glCtx;
+
+	r300->fallback = 0;
+	if (r300->options.hw_tcl_enabled)
+		ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+
+	ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+	r300InitConstValues(ctx, screen);
+
+	_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
+	/* Initialize the software rasterizer and helper modules.
+	 */
+	_swrast_CreateContext(ctx);
+	_vbo_CreateContext(ctx);
+	_tnl_CreateContext(ctx);
+	_swsetup_CreateContext(ctx);
+	_swsetup_Wakeup(ctx);
+
+	/* Install the customized pipeline:
+	 */
+	_tnl_destroy_pipeline(ctx);
+	_tnl_install_pipeline(ctx, r300_pipeline);
+	TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+	/* Configure swrast and TNL to match hardware characteristics:
+	 */
+	_swrast_allow_pixel_fog(ctx, GL_FALSE);
+	_swrast_allow_vertex_fog(ctx, GL_TRUE);
+	_tnl_allow_pixel_fog(ctx, GL_FALSE);
+	_tnl_allow_vertex_fog(ctx, GL_TRUE);
+
+	if (r300->options.hw_tcl_enabled) {
+		r300InitDraw(ctx);
+	} else {
+		r300InitSwtcl(ctx);
+	}
+
+	r300_blit_init(r300);
+	radeon_fbo_init(&r300->radeon);
+	radeonInitSpanFuncs( ctx );
+	r300InitCmdBuf(r300);
+	r300InitState(r300);
+	r300InitShaderFunctions(r300);
+
+	r300InitGLExtensions(ctx);
+
+	return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
new file mode 100644
index 0000000000..fbb609b9f6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -0,0 +1,560 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_CONTEXT_H__
+#define __R300_CONTEXT_H__
+
+#include "drm.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "radeon_common.h"
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+#include "compiler/radeon_code.h"
+
+struct r300_context;
+typedef struct r300_context r300ContextRec;
+typedef struct r300_context *r300ContextPtr;
+
+
+#include "r300_vertprog.h"
+
+
+/* The blit width for texture uploads
+ */
+#define R300_BLIT_WIDTH_BYTES 1024
+#define R300_MAX_TEXTURE_UNITS 8
+
+
+
+#define R300_VPT_CMD_0		0
+#define R300_VPT_XSCALE		1
+#define R300_VPT_XOFFSET	2
+#define R300_VPT_YSCALE		3
+#define R300_VPT_YOFFSET	4
+#define R300_VPT_ZSCALE		5
+#define R300_VPT_ZOFFSET	6
+#define R300_VPT_CMDSIZE	7
+
+#define R300_VIR_CMD_0		0	/* vir is variable size (at least 1) */
+#define R300_VIR_CNTL_0		1
+#define R300_VIR_CNTL_1		2
+#define R300_VIR_CNTL_2		3
+#define R300_VIR_CNTL_3		4
+#define R300_VIR_CNTL_4		5
+#define R300_VIR_CNTL_5		6
+#define R300_VIR_CNTL_6		7
+#define R300_VIR_CNTL_7		8
+#define R300_VIR_CMDSIZE	9
+
+#define R300_VIC_CMD_0		0
+#define R300_VIC_CNTL_0		1
+#define R300_VIC_CNTL_1		2
+#define R300_VIC_CMDSIZE	3
+
+#define R300_VOF_CMD_0		0
+#define R300_VOF_CNTL_0		1
+#define R300_VOF_CNTL_1		2
+#define R300_VOF_CMDSIZE	3
+
+#define R300_PVS_CMD_0		0
+#define R300_PVS_CNTL_1		1
+#define R300_PVS_CNTL_2		2
+#define R300_PVS_CNTL_3		3
+#define R300_PVS_CMDSIZE	4
+
+#define R300_GB_MISC_CMD_0		0
+#define R300_GB_MISC_MSPOS_0		1
+#define R300_GB_MISC_MSPOS_1		2
+#define R300_GB_MISC_TILE_CONFIG	3
+#define R300_GB_MISC_CMDSIZE		4
+#define R300_GB_MISC2_CMD_0		    0
+#define R300_GB_MISC2_SELECT		1
+#define R300_GB_MISC2_AA_CONFIG		2
+#define R300_GB_MISC2_CMDSIZE		3
+
+#define R300_TXE_CMD_0		0
+#define R300_TXE_ENABLE		1
+#define R300_TXE_CMDSIZE	2
+
+#define R300_PS_CMD_0		0
+#define R300_PS_POINTSIZE	1
+#define R300_PS_CMDSIZE		2
+
+#define R300_ZBS_CMD_0		0
+#define R300_ZBS_T_FACTOR	1
+#define R300_ZBS_T_CONSTANT	2
+#define R300_ZBS_W_FACTOR	3
+#define R300_ZBS_W_CONSTANT	4
+#define R300_ZBS_CMDSIZE	5
+
+#define R300_CUL_CMD_0		0
+#define R300_CUL_CULL		1
+#define R300_CUL_CMDSIZE	2
+
+#define R300_RC_CMD_0		0
+#define R300_RC_CNTL_0		1
+#define R300_RC_CNTL_1		2
+#define R300_RC_CMDSIZE		3
+
+#define R300_RI_CMD_0		0
+#define R300_RI_INTERP_0	1
+#define R300_RI_INTERP_1	2
+#define R300_RI_INTERP_2	3
+#define R300_RI_INTERP_3	4
+#define R300_RI_INTERP_4	5
+#define R300_RI_INTERP_5	6
+#define R300_RI_INTERP_6	7
+#define R300_RI_INTERP_7	8
+#define R300_RI_CMDSIZE		9
+
+#define R500_RI_CMDSIZE	       17
+
+#define R300_RR_CMD_0		0	/* rr is variable size (at least 1) */
+#define R300_RR_INST_0		1
+#define R300_RR_INST_1		2
+#define R300_RR_INST_2		3
+#define R300_RR_INST_3		4
+#define R300_RR_INST_4		5
+#define R300_RR_INST_5		6
+#define R300_RR_INST_6		7
+#define R300_RR_INST_7		8
+#define R300_RR_CMDSIZE		9
+
+#define R300_FP_CMD_0		0
+#define R300_FP_CNTL0		1
+#define R300_FP_CNTL1		2
+#define R300_FP_CNTL2		3
+#define R300_FP_CMD_1		4
+#define R300_FP_NODE0		5
+#define R300_FP_NODE1		6
+#define R300_FP_NODE2		7
+#define R300_FP_NODE3		8
+#define R300_FP_CMDSIZE		9
+
+#define R500_FP_CMD_0           0
+#define R500_FP_CNTL            1
+#define R500_FP_PIXSIZE         2
+#define R500_FP_CMD_1           3
+#define R500_FP_CODE_ADDR       4
+#define R500_FP_CODE_RANGE      5
+#define R500_FP_CODE_OFFSET     6
+#define R500_FP_CMD_2           7
+#define R500_FP_FC_CNTL         8
+#define R500_FP_CMDSIZE         9
+
+#define R300_FPT_CMD_0		0
+#define R300_FPT_INSTR_0	1
+#define R300_FPT_CMDSIZE	65
+
+#define R300_FPI_CMD_0		0
+#define R300_FPI_INSTR_0	1
+#define R300_FPI_CMDSIZE	65
+/* R500 has space for 512 instructions - 6 dwords per instruction */
+#define R500_FPI_CMDSIZE	(512*6+1)
+
+#define R300_FPP_CMD_0		0
+#define R300_FPP_PARAM_0	1
+#define R300_FPP_CMDSIZE	(32*4+1)
+/* R500 has spcae for 256 constants - 4 dwords per constant */
+#define R500_FPP_CMDSIZE	(256*4+1)
+
+#define R300_FOGS_CMD_0		0
+#define R300_FOGS_STATE		1
+#define R300_FOGS_CMDSIZE	2
+
+#define R300_FOGC_CMD_0		0
+#define R300_FOGC_R		1
+#define R300_FOGC_G		2
+#define R300_FOGC_B		3
+#define R300_FOGC_CMDSIZE	4
+
+#define R300_FOGP_CMD_0		0
+#define R300_FOGP_SCALE		1
+#define R300_FOGP_START		2
+#define R300_FOGP_CMDSIZE	3
+
+#define R300_AT_CMD_0		0
+#define R300_AT_ALPHA_TEST	1
+#define R300_AT_UNKNOWN		2
+#define R300_AT_CMDSIZE		3
+
+#define R300_BLD_CMD_0		0
+#define R300_BLD_CBLEND		1
+#define R300_BLD_ABLEND		2
+#define R300_BLD_CMDSIZE	3
+
+#define R300_CMK_CMD_0		0
+#define R300_CMK_COLORMASK	1
+#define R300_CMK_CMDSIZE	2
+
+#define R300_CB_CMD_0		0
+#define R300_CB_OFFSET		1
+#define R300_CB_CMD_1		2
+#define R300_CB_PITCH		3
+#define R300_CB_CMDSIZE		4
+
+#define R300_ZS_CMD_0		0
+#define R300_ZS_CNTL_0		1
+#define R300_ZS_CNTL_1		2
+#define R300_ZS_CNTL_2		3
+#define R300_ZS_CMDSIZE		4
+
+#define R300_ZSB_CMD_0		0
+#define R300_ZSB_CNTL_0		1
+#define R300_ZSB_CMDSIZE	2
+
+#define R300_ZB_CMD_0		0
+#define R300_ZB_OFFSET		1
+#define R300_ZB_PITCH		2
+#define R300_ZB_CMDSIZE		3
+
+#define R300_VAP_CNTL_FLUSH     0
+#define R300_VAP_CNTL_FLUSH_1   1
+#define R300_VAP_CNTL_CMD       2
+#define R300_VAP_CNTL_INSTR     3
+#define R300_VAP_CNTL_SIZE      4
+
+#define R300_VPI_CMD_0		0
+#define R300_VPI_INSTR_0	1
+#define R300_VPI_CMDSIZE	1025	/* 256 16 byte instructions */
+
+#define R300_VPP_CMD_0		0
+#define R300_VPP_PARAM_0	1
+#define R300_VPP_CMDSIZE	1025	/* 256 4-component parameters */
+
+#define R300_VPUCP_CMD_0		0
+#define R300_VPUCP_X            1
+#define R300_VPUCP_Y            2
+#define R300_VPUCP_Z            3
+#define R300_VPUCP_W            4
+#define R300_VPUCP_CMDSIZE	5	/* 256 4-component parameters */
+
+#define R300_VPS_CMD_0		0
+#define R300_VPS_ZERO_0		1
+#define R300_VPS_ZERO_1		2
+#define R300_VPS_POINTSIZE	3
+#define R300_VPS_ZERO_3		4
+#define R300_VPS_CMDSIZE	5
+
+	/* the layout is common for all fields inside tex */
+#define R300_TEX_CMD_0		0
+#define R300_TEX_VALUE_0	1
+/* We don't really use this, instead specify mtu+1 dynamically
+#define R300_TEX_CMDSIZE	(MAX_TEXTURE_UNITS+1)
+*/
+
+#define R300_QUERYOBJ_CMD_0  0
+#define R300_QUERYOBJ_DATA_0 1
+#define R300_QUERYOBJ_CMD_1  2
+#define R300_QUERYOBJ_DATA_1  3
+#define R300_QUERYOBJ_CMDSIZE  4
+
+/**
+ * Cache for hardware register state.
+ */
+struct r300_hw_state {
+	struct radeon_state_atom vpt;	/* viewport (1D98) */
+	struct radeon_state_atom vap_cntl;
+	struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
+	struct radeon_state_atom vof;	/* VAP output format register 0x2090 */
+	struct radeon_state_atom vte;	/* (20B0) */
+	struct radeon_state_atom vap_vf_max_vtx_indx;	/* Maximum Vertex Indx Clamp (2134) */
+	struct radeon_state_atom vap_cntl_status;
+	struct radeon_state_atom vir[2];	/* vap input route (2150/21E0) */
+	struct radeon_state_atom vic;	/* vap input control (2180) */
+	struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
+	struct radeon_state_atom vap_clip_cntl;
+	struct radeon_state_atom vap_clip;
+	struct radeon_state_atom vap_pvs_vtx_timeout_reg;	/* Vertex timeout register (2288) */
+	struct radeon_state_atom pvs;	/* pvs_cntl (22D0) */
+	struct radeon_state_atom gb_enable;	/* (4008) */
+	struct radeon_state_atom gb_misc;	/* Multisampling position shifts ? (4010) */
+	struct radeon_state_atom gb_misc2;	/* Multisampling position shifts ? (4010) */
+	struct radeon_state_atom ga_point_s0;	/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
+	struct radeon_state_atom ga_triangle_stipple;	/* (4214) */
+	struct radeon_state_atom ps;	/* pointsize (421C) */
+	struct radeon_state_atom ga_point_minmax;	/* (4230) */
+	struct radeon_state_atom lcntl;	/* line control */
+	struct radeon_state_atom ga_line_stipple;	/* (4260) */
+	struct radeon_state_atom shade;
+	struct radeon_state_atom shade2;
+	struct radeon_state_atom polygon_mode;
+	struct radeon_state_atom fogp;	/* fog parameters (4294) */
+	struct radeon_state_atom ga_soft_reset;	/* (429C) */
+	struct radeon_state_atom zbias_cntl;
+	struct radeon_state_atom zbs;	/* zbias (42A4) */
+	struct radeon_state_atom occlusion_cntl;
+	struct radeon_state_atom cul;	/* cull cntl (42B8) */
+	struct radeon_state_atom su_depth_scale;	/* (42C0) */
+	struct radeon_state_atom rc;	/* rs control (4300) */
+	struct radeon_state_atom ri;	/* rs interpolators (4310) */
+	struct radeon_state_atom rr;	/* rs route (4330) */
+	struct radeon_state_atom sc_hyperz;	/* (43A4) */
+	struct radeon_state_atom sc_screendoor;	/* (43E8) */
+	struct radeon_state_atom fp;	/* fragment program cntl + nodes (4600) */
+	struct radeon_state_atom fpt;	/* texi - (4620) */
+	struct radeon_state_atom us_out_fmt;	/* (46A4) */
+	struct radeon_state_atom r500fp;	/* r500 fp instructions */
+	struct radeon_state_atom r500fp_const;	/* r500 fp constants */
+	struct radeon_state_atom fpi[4];	/* fp instructions (46C0/47C0/48C0/49C0) */
+	struct radeon_state_atom fogs;	/* fog state (4BC0) */
+	struct radeon_state_atom fogc;	/* fog color (4BC8) */
+	struct radeon_state_atom at;	/* alpha test (4BD4) */
+	struct radeon_state_atom fg_depth_src;	/* (4BD8) */
+	struct radeon_state_atom fpp;	/* 0x4C00 and following */
+	struct radeon_state_atom rb3d_cctl;	/* (4E00) */
+	struct radeon_state_atom bld;	/* blending (4E04) */
+	struct radeon_state_atom cmk;	/* colormask (4E0C) */
+	struct radeon_state_atom blend_color;	/* constant blend color */
+	struct radeon_state_atom rop;	/* ropcntl */
+	struct radeon_state_atom cb;	/* colorbuffer (4E28) */
+	struct radeon_state_atom rb3d_dither_ctl;	/* (4E50) */
+	struct radeon_state_atom rb3d_aaresolve_ctl;	/* (4E88) */
+	struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold;	/* (4E88) I saw it only written on RV350 hardware..  */
+	struct radeon_state_atom zs;	/* zstencil control (4F00) */
+	struct radeon_state_atom zsb;	/* zstencil bf */
+	struct radeon_state_atom zstencil_format;
+	struct radeon_state_atom zb;	/* z buffer (4F20) */
+	struct radeon_state_atom zb_depthclearvalue;	/* (4F28) */
+	struct radeon_state_atom zb_zmask;	/* (4F30) */
+	struct radeon_state_atom zb_hiz_offset;	/* (4F44) */
+	struct radeon_state_atom zb_hiz_pitch;	/* (4F54) */
+
+	struct radeon_state_atom vpi;	/* vp instructions */
+	struct radeon_state_atom vpp;	/* vp parameters */
+	struct radeon_state_atom vps;	/* vertex point size (?) */
+	struct radeon_state_atom vpucp[6];	/* vp user clip plane - 6 */
+	/* 8 texture units */
+	/* the state is grouped by function and not by
+	   texture unit. This makes single unit updates
+	   really awkward - we are much better off
+	   updating the whole thing at once */
+	struct {
+		struct radeon_state_atom filter;
+		struct radeon_state_atom filter_1;
+		struct radeon_state_atom size;
+		struct radeon_state_atom format;
+		struct radeon_state_atom pitch;
+		struct radeon_state_atom offset;
+		struct radeon_state_atom chroma_key;
+		struct radeon_state_atom border_color;
+	} tex;
+	struct radeon_state_atom txe;	/* tex enable (4104) */
+	radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
+};
+
+/**
+ * State cache
+ */
+
+/* Vertex shader state */
+
+#define COLOR_IS_RGBA
+#define TAG(x) r300##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+struct r300_vertex_program_key {
+	GLbitfield FpReads;
+	GLuint FogAttr;
+	GLuint WPosAttr;
+};
+
+struct r300_vertex_program {
+	struct gl_vertex_program *Base;
+	struct r300_vertex_program *next;
+
+	struct r300_vertex_program_key key;
+	struct r300_vertex_program_code code;
+
+	GLboolean error;
+};
+
+struct r300_vertex_program_cont {
+	/* This is the unmodified vertex program mesa provided us with.
+	 * We need to keep it unchanged because we may need to create another
+	 * hw specific vertex program based on this.
+	 */
+	struct gl_vertex_program mesa_program;
+	/* This is the list of hw specific vertex programs derived from mesa_program */
+	struct r300_vertex_program *progs;
+};
+
+
+/**
+* Store everything about a fragment program that is needed
+* to render with that program.
+*/
+struct r300_fragment_program {
+	GLboolean error;
+	struct r300_fragment_program *next;
+	struct r300_fragment_program_external_state state;
+
+	struct rX00_fragment_program_code code;
+	GLbitfield InputsRead;
+
+	/* attribute that we are sending the WPOS in */
+	gl_frag_attrib wpos_attr;
+	/* attribute that we are sending the fog coordinate in */
+	gl_frag_attrib fog_attr;
+};
+
+struct r300_fragment_program_cont {
+	/* This is the unmodified fragment program mesa provided us with.
+	 * We need to keep it unchanged because we may need to create another
+	 * hw specific fragment program based on this.
+	 */
+	struct gl_fragment_program Base;
+	/* This is the list of hw specific fragment programs derived from Base */
+	struct r300_fragment_program *progs;
+};
+
+
+#define R300_MAX_AOS_ARRAYS		16
+
+
+/* r300_swtcl.c
+ */
+struct r300_swtcl_info {
+  /*
+    * Offset of the 4UB color data within a hardware (swtcl) vertex.
+    */
+   GLuint coloroffset;
+
+   /**
+    * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+    */
+   GLuint specoffset;
+};
+
+struct r300_vtable {
+	void (* SetupRSUnit)(GLcontext *ctx);
+	void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings);
+	void (* SetupPixelShader)(GLcontext *ctx);
+};
+
+struct r300_vertex_buffer {
+	struct vertex_attribute {
+		/* generic */
+		GLubyte element;
+		GLuint stride;
+		GLuint dwords;
+		GLubyte size; /* number of components */
+		GLboolean is_named_bo;
+		struct radeon_bo *bo;
+		GLint bo_offset;
+
+		/* hw specific */
+		uint32_t data_type:4;
+		uint32_t dst_loc:5;
+		uint32_t _signed:1;
+		uint32_t normalize:1;
+		uint32_t swizzle:12;
+		uint32_t write_mask:4;
+	} attribs[VERT_ATTRIB_MAX];
+
+	GLubyte num_attribs;
+};
+
+struct r300_index_buffer {
+	struct radeon_bo *bo;
+	int bo_offset;
+
+	GLboolean is_32bit;
+	GLuint count;
+};
+
+
+/**
+ * \brief R300 context structure.
+ */
+struct r300_context {
+	struct radeon_context radeon;	/* parent class, must be first */
+
+	struct r300_vtable vtbl;
+
+	struct r300_hw_state hw;
+
+	struct r300_vertex_program *selected_vp;
+	struct r300_fragment_program *selected_fp;
+
+	/* Vertex buffers
+	 */
+	GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+	GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+
+	struct r300_options {
+		uint32_t conformance_mode:1;
+		uint32_t hw_tcl_enabled:1;
+		uint32_t s3tc_force_enabled:1;
+		uint32_t s3tc_force_disabled:1;
+		uint32_t stencil_two_side_disabled:1;
+	} options;
+
+	struct r300_swtcl_info swtcl;
+	struct r300_vertex_buffer vbuf;
+	struct r300_index_buffer ind_buf;
+
+	uint32_t fallback;
+
+	struct {
+		struct r300_vertex_program_code vp_code;
+		struct rX00_fragment_program_code fp_code;
+	} blit;
+
+	DECLARE_RENDERINPUTS(render_inputs_bitset);
+};
+
+#define R300_CONTEXT(ctx)		((r300ContextPtr)(ctx->DriverCtx))
+
+extern void r300DestroyContext(__DRIcontext * driContextPriv);
+extern GLboolean r300CreateContext(gl_api api,
+				   const __GLcontextModes * glVisual,
+				   __DRIcontext * driContextPriv,
+				   void *sharedContextPrivate);
+
+extern void r300InitShaderFuncs(struct dd_function_table *functions);
+
+extern void r300InitShaderFunctions(r300ContextPtr r300);
+
+extern void r300InitDraw(GLcontext *ctx);
+
+#define r300PackFloat32 radeonPackFloat32
+#define r300PackFloat24 radeonPackFloat24
+
+#endif				/* __R300_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
new file mode 100644
index 0000000000..282c0e18bc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -0,0 +1,750 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Maciej Cencora
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHOR(S) AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+
+#include "r300_reg.h"
+#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_render.h"
+#include "r300_state.h"
+#include "r300_tex.h"
+#include "r300_cmdbuf.h"
+
+#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "vbo/vbo_context.h"
+
+
+static int getTypeSize(GLenum type)
+{
+	switch (type) {
+		case GL_DOUBLE:
+			return sizeof(GLdouble);
+		case GL_HALF_FLOAT:
+			return sizeof(GLhalfARB);
+		case GL_FLOAT:
+			return sizeof(GLfloat);
+		case GL_INT:
+			return sizeof(GLint);
+		case GL_UNSIGNED_INT:
+			return sizeof(GLuint);
+		case GL_SHORT:
+			return sizeof(GLshort);
+		case GL_UNSIGNED_SHORT:
+			return sizeof(GLushort);
+		case GL_BYTE:
+			return sizeof(GLbyte);
+		case GL_UNSIGNED_BYTE:
+			return sizeof(GLubyte);
+		default:
+			assert(0);
+			return 0;
+	}
+}
+
+static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	GLvoid *src_ptr;
+	GLuint *out;
+	int i;
+	GLboolean mapped_named_bo = GL_FALSE;
+
+	if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+		ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+		mapped_named_bo = GL_TRUE;
+		assert(mesa_ind_buf->obj->Pointer != NULL);
+	}
+	src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+	radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+			"%s: Fixing index buffer format. type %d\n",
+			__func__, mesa_ind_buf->type);
+
+	if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) {
+		GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+		GLubyte *in = (GLubyte *)src_ptr;
+
+		radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+		radeon_bo_map(r300->ind_buf.bo, 1);
+		assert(r300->ind_buf.bo->ptr != NULL);
+		out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+
+		for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
+			*out++ = in[i] | in[i + 1] << 16;
+		}
+
+		if (i < mesa_ind_buf->count) {
+			*out++ = in[i];
+		}
+		radeon_bo_unmap(r300->ind_buf.bo);
+#if MESA_BIG_ENDIAN
+	} else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+		GLushort *in = (GLushort *)src_ptr;
+		GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+		radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo,
+				     &r300->ind_buf.bo_offset, size, 4);
+
+		radeon_bo_map(r300->ind_buf.bo, 1);
+		assert(r300->ind_buf.bo->ptr != NULL);
+		out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+
+		for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
+			*out++ = in[i] | in[i + 1] << 16;
+		}
+
+		if (i < mesa_ind_buf->count) {
+			*out++ = in[i];
+		}
+		radeon_bo_unmap(r300->ind_buf.bo);
+#endif
+	}
+
+	r300->ind_buf.is_32bit = GL_FALSE;
+	r300->ind_buf.count = mesa_ind_buf->count;
+
+	if (mapped_named_bo) {
+		ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+	}
+}
+
+
+static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	if (!mesa_ind_buf) {
+		r300->ind_buf.bo = NULL;
+		return;
+	}
+	radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__);
+
+#if MESA_BIG_ENDIAN
+	if (mesa_ind_buf->type == GL_UNSIGNED_INT) {
+#else
+	if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) {
+#endif
+		const GLvoid *src_ptr;
+		GLvoid *dst_ptr;
+		GLboolean mapped_named_bo = GL_FALSE;
+
+		if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+			ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+			assert(mesa_ind_buf->obj->Pointer != NULL);
+			mapped_named_bo = GL_TRUE;
+		}
+
+		src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+		const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+		radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+		radeon_bo_map(r300->ind_buf.bo, 1);
+		assert(r300->ind_buf.bo->ptr != NULL);
+		dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+		memcpy(dst_ptr, src_ptr, size);
+
+		radeon_bo_unmap(r300->ind_buf.bo);
+		r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+		r300->ind_buf.count = mesa_ind_buf->count;
+
+		if (mapped_named_bo) {
+			ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+		}
+	} else {
+		r300FixupIndexBuffer(ctx, mesa_ind_buf);
+	}
+}
+
+#define CONVERT( TYPE, MACRO ) do {		\
+	GLuint i, j, sz;				\
+	sz = input->Size;				\
+	if (input->Normalized) {			\
+		for (i = 0; i < count; i++) {		\
+			const TYPE *in = (TYPE *)src_ptr;		\
+			for (j = 0; j < sz; j++) {		\
+				*dst_ptr++ = MACRO(*in);		\
+				in++;				\
+			}					\
+			src_ptr += stride;			\
+		}						\
+	} else {					\
+		for (i = 0; i < count; i++) {		\
+			const TYPE *in = (TYPE *)src_ptr;		\
+			for (j = 0; j < sz; j++) {		\
+				*dst_ptr++ = (GLfloat)(*in);		\
+				in++;				\
+			}					\
+			src_ptr += stride;			\
+		}						\
+	}						\
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	const GLvoid *src_ptr;
+	GLboolean mapped_named_bo = GL_FALSE;
+	GLfloat *dst_ptr;
+	GLuint stride;
+
+	stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+	/* Convert value for first element only */
+	if (input->StrideB == 0)
+		count = 1;
+
+	if (input->BufferObj->Name) {
+		if (!input->BufferObj->Pointer) {
+			ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+			mapped_named_bo = GL_TRUE;
+		}
+
+		src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+	} else {
+		src_ptr = input->Ptr;
+	}
+
+	radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32);
+	radeon_bo_map(attr->bo, 1);
+	dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+	radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+			"%s: Converting vertex attributes, attribute data format %x,"
+			"stride %d, components %d\n"
+			, __FUNCTION__, input->Type
+			, stride, input->Size);
+
+	assert(src_ptr != NULL);
+
+	switch (input->Type) {
+		case GL_DOUBLE:
+			CONVERT(GLdouble, (GLfloat));
+			break;
+		case GL_UNSIGNED_INT:
+			CONVERT(GLuint, UINT_TO_FLOAT);
+			break;
+		case GL_INT:
+			CONVERT(GLint, INT_TO_FLOAT);
+			break;
+		case GL_UNSIGNED_SHORT:
+			CONVERT(GLushort, USHORT_TO_FLOAT);
+			break;
+		case GL_SHORT:
+			CONVERT(GLshort, SHORT_TO_FLOAT);
+			break;
+		case GL_UNSIGNED_BYTE:
+			assert(input->Format != GL_BGRA);
+			CONVERT(GLubyte, UBYTE_TO_FLOAT);
+			break;
+		case GL_BYTE:
+			CONVERT(GLbyte, BYTE_TO_FLOAT);
+			break;
+		default:
+			assert(0);
+			break;
+	}
+
+	radeon_bo_unmap(attr->bo);
+	if (mapped_named_bo) {
+		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+	}
+}
+
+static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	const int dst_stride = (input->StrideB + 3) & ~3;
+	const int size = getTypeSize(input->Type) * input->Size * count;
+	GLboolean mapped_named_bo = GL_FALSE;
+
+	radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+	radeon_bo_map(attr->bo, 1);
+
+	if (!input->BufferObj->Pointer) {
+		ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+		mapped_named_bo = GL_TRUE;
+	}
+
+	radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s. Vertex alignment doesn't match hw requirements.\n", __func__);
+
+	{
+		GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+		GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+		int i;
+
+		for (i = 0; i < count; ++i) {
+			memcpy(dst_ptr, src_ptr, input->StrideB);
+			src_ptr += input->StrideB;
+			dst_ptr += dst_stride;
+		}
+	}
+
+	if (mapped_named_bo) {
+		ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+	}
+
+	radeon_bo_unmap(attr->bo);
+	attr->stride = dst_stride;
+}
+
+static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	struct r300_vertex_buffer *vbuf = &r300->vbuf;
+	struct vertex_attribute r300_attr = { 0 };
+	GLenum type;
+	GLuint stride;
+
+	radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__);
+	stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+	if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+	    getTypeSize(input->Type) != 4 ||
+#endif
+	    stride < 4) {
+
+		type = GL_FLOAT;
+
+		if (input->StrideB == 0) {
+			r300_attr.stride = 0;
+		} else {
+			r300_attr.stride = sizeof(GLfloat) * input->Size;
+		}
+		r300_attr.dwords = input->Size;
+		r300_attr.is_named_bo = GL_FALSE;
+	} else {
+		type = input->Type;
+		r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4;
+		if (!input->BufferObj->Name) {
+
+			if (input->StrideB == 0) {
+				r300_attr.stride = 0;
+			} else {
+				r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3;
+			}
+
+			r300_attr.is_named_bo = GL_FALSE;
+		}
+	}
+
+	r300_attr.size = input->Size;
+	r300_attr.element = attr;
+	r300_attr.dst_loc = vbuf->num_attribs;
+
+	switch (type) {
+		case GL_FLOAT:
+			switch (input->Size) {
+				case 1: r300_attr.data_type = R300_DATA_TYPE_FLOAT_1; break;
+				case 2: r300_attr.data_type = R300_DATA_TYPE_FLOAT_2; break;
+				case 3: r300_attr.data_type = R300_DATA_TYPE_FLOAT_3; break;
+				case 4: r300_attr.data_type = R300_DATA_TYPE_FLOAT_4; break;
+			}
+			r300_attr._signed = 0;
+			r300_attr.normalize = 0;
+			break;
+		case GL_HALF_FLOAT:
+			switch (input->Size) {
+				case 1:
+				case 2:
+					r300_attr.data_type = R300_DATA_TYPE_FLT16_2;
+					break;
+				case 3:
+				case 4:
+					r300_attr.data_type = R300_DATA_TYPE_FLT16_4;
+					break;
+			}
+			break;
+		case GL_SHORT:
+			r300_attr._signed = 1;
+			r300_attr.normalize = input->Normalized;
+			switch (input->Size) {
+				case 1:
+				case 2:
+					r300_attr.data_type = R300_DATA_TYPE_SHORT_2;
+					break;
+				case 3:
+				case 4:
+					r300_attr.data_type = R300_DATA_TYPE_SHORT_4;
+					break;
+			}
+			break;
+		case GL_BYTE:
+			r300_attr._signed = 1;
+			r300_attr.normalize = input->Normalized;
+			r300_attr.data_type = R300_DATA_TYPE_BYTE;
+			break;
+		case GL_UNSIGNED_SHORT:
+			r300_attr._signed = 0;
+			r300_attr.normalize = input->Normalized;
+			switch (input->Size) {
+				case 1:
+				case 2:
+					r300_attr.data_type = R300_DATA_TYPE_SHORT_2;
+					break;
+				case 3:
+				case 4:
+					r300_attr.data_type = R300_DATA_TYPE_SHORT_4;
+					break;
+			}
+			break;
+		case GL_UNSIGNED_BYTE:
+			r300_attr._signed = 0;
+			r300_attr.normalize = input->Normalized;
+			if (input->Format == GL_BGRA)
+				r300_attr.data_type = R300_DATA_TYPE_D3DCOLOR;
+			else
+				r300_attr.data_type = R300_DATA_TYPE_BYTE;
+			break;
+
+		default:
+		case GL_DOUBLE:
+		case GL_INT:
+		case GL_UNSIGNED_INT:
+			assert(0);
+			break;
+	}
+
+	switch (input->Size) {
+		case 4:
+			r300_attr.swizzle = SWIZZLE_XYZW;
+			break;
+		case 3:
+			r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+			break;
+		case 2:
+			r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+			break;
+		case 1:
+			r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+			break;
+	}
+
+	r300_attr.write_mask = MASK_XYZW;
+
+	vbuf->attribs[vbuf->num_attribs] = r300_attr;
+	++vbuf->num_attribs;
+}
+
+static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	struct r300_vertex_buffer *vbuf = &r300->vbuf;
+	radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__);
+	{
+		int i, tmp;
+
+		tmp = r300->selected_vp->code.InputsRead;
+		i = 0;
+		vbuf->num_attribs = 0;
+		while (tmp) {
+			/* find first enabled bit */
+			while (!(tmp & 1)) {
+				tmp >>= 1;
+				++i;
+			}
+
+			r300TranslateAttrib(ctx, i, count, arrays[i]);
+
+			tmp >>= 1;
+			++i;
+		}
+	}
+
+	r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS);
+	if (r300->fallback)
+		return;
+}
+
+static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	struct r300_vertex_buffer *vbuf = &r300->vbuf;
+	GLuint stride;
+	int ret;
+	int i, index;
+	radeon_print(RADEON_RENDER, RADEON_VERBOSE,
+			"%s: count %d num_attribs %d\n",
+			__func__, count, vbuf->num_attribs);
+
+	for (index = 0; index < vbuf->num_attribs; index++) {
+		struct radeon_aos *aos = &r300->radeon.tcl.aos[index];
+		i = vbuf->attribs[index].element;
+
+		stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+		if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+				getTypeSize(input[i]->Type) != 4 ||
+#endif
+				stride < 4) {
+
+			r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]);
+		} else {
+			if (input[i]->BufferObj->Name) {
+				if (stride % 4 != 0) {
+					assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+					r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]);
+					vbuf->attribs[index].is_named_bo = GL_FALSE;
+				} else {
+					vbuf->attribs[index].stride = input[i]->StrideB;
+					vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr;
+					vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+					vbuf->attribs[index].is_named_bo = GL_TRUE;
+				}
+			} else {
+
+				int size;
+				int local_count = count;
+				uint32_t *dst;
+
+				if (input[i]->StrideB == 0) {
+					size = getTypeSize(input[i]->Type) * input[i]->Size;
+					local_count = 1;
+				} else {
+					size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+				}
+
+				radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32);
+				radeon_bo_map(vbuf->attribs[index].bo, 1);
+				assert(vbuf->attribs[index].bo->ptr != NULL);
+				dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset);
+				switch (vbuf->attribs[index].dwords) {
+					case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+					case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+					case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+					case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+					default: assert(0); break;
+				}
+				radeon_bo_unmap(vbuf->attribs[index].bo);
+
+			}
+		}
+
+		aos->count = vbuf->attribs[index].stride == 0 ? 1 : count;
+		aos->stride = vbuf->attribs[index].stride / sizeof(float);
+		aos->components = vbuf->attribs[index].dwords;
+		aos->bo = vbuf->attribs[index].bo;
+		aos->offset = vbuf->attribs[index].bo_offset;
+
+		if (vbuf->attribs[index].is_named_bo) {
+			radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0);
+		}
+	}
+
+	r300->radeon.tcl.aos_count = vbuf->num_attribs;
+	ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+	r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret);
+
+}
+
+static void r300FreeData(GLcontext *ctx)
+{
+	/* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+	 * to prevent double unref in radeonReleaseArrays
+	 * called during context destroy
+	 */
+	radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__);
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	{
+		int i;
+
+		for (i = 0; i < r300->vbuf.num_attribs; i++) {
+			if (!r300->vbuf.attribs[i].is_named_bo) {
+				radeon_bo_unref(r300->vbuf.attribs[i].bo);
+			}
+			r300->radeon.tcl.aos[i].bo = NULL;
+		}
+	}
+
+	{
+		if (r300->ind_buf.bo != NULL) {
+			radeon_bo_unref(r300->ind_buf.bo);
+		}
+	}
+}
+
+static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx,
+		GLuint nr_prims, const struct _mesa_prim *prim)
+{
+	struct r300_context *r300 = R300_CONTEXT(ctx);
+	struct r300_vertex_buffer *vbuf = &r300->vbuf;
+	GLboolean flushed;
+	GLuint dwords;
+	GLuint state_size;
+	int i;
+	GLuint extra_prims = 0;
+
+	/* Check for primitive splitting. */
+	for (i = 0; i < nr_prims; ++i) {
+		const GLuint num_verts =  r300NumVerts(r300, prim[i].count, prim[i].mode);
+		extra_prims += num_verts/(65535 - 32);
+	}
+	nr_prims += extra_prims;
+
+	dwords = 2*CACHE_FLUSH_BUFSZ;
+	dwords += PRE_EMIT_STATE_BUFSZ;
+	dwords += (AOS_BUFSZ(vbuf->num_attribs)
+		+ SCISSORS_BUFSZ*2
+		+ FIREAOS_BUFSZ )*nr_prims;
+
+	state_size = radeonCountStateEmitSize(&r300->radeon);
+	flushed = rcommonEnsureCmdBufSpace(&r300->radeon,
+			dwords + state_size,
+			__FUNCTION__);
+	if (flushed)
+		dwords += radeonCountStateEmitSize(&r300->radeon);
+	else
+		dwords += state_size;
+
+	radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+	return dwords;
+}
+
+static GLboolean r300TryDrawPrims(GLcontext *ctx,
+					 const struct gl_client_array *arrays[],
+					 const struct _mesa_prim *prim,
+					 GLuint nr_prims,
+					 const struct _mesa_index_buffer *ib,
+					 GLuint min_index,
+					 GLuint max_index )
+{
+	struct r300_context *r300 = R300_CONTEXT(ctx);
+	GLuint i;
+
+	radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: %u (%d-%d) cs begin at %d\n",
+				__FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
+	if (ctx->NewState)
+		_mesa_update_state( ctx );
+
+	if (r300->options.hw_tcl_enabled)
+		_tnl_UpdateFixedFunctionProgram(ctx);
+
+	r300UpdateShaders(r300);
+
+	r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx));
+
+	r300SetVertexFormat(ctx, arrays, max_index + 1);
+
+	if (r300->fallback)
+		return GL_FALSE;
+
+	r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten);
+
+	r300UpdateShaderStates(r300);
+
+	/* ensure we have the cmd buf space in advance to cover
+	 * the state + DMA AOS pointers */
+	GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims, prim)
+		+ r300->radeon.cmdbuf.cs->cdw;
+
+	r300SetupIndexBuffer(ctx, ib);
+
+	r300AllocDmaRegions(ctx, arrays, max_index + 1);
+
+	if (r300->fallback)
+		return GL_FALSE;
+
+	r300EmitCacheFlush(r300);
+	radeonEmitState(&r300->radeon);
+
+	for (i = 0; i < nr_prims; ++i) {
+		r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode);
+	}
+
+	r300EmitCacheFlush(r300);
+
+	r300FreeData(ctx);
+
+	radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: %u (%d-%d) cs ending at %d\n",
+			__FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
+	if (emit_end < r300->radeon.cmdbuf.cs->cdw)
+		WARN_ONCE("Rendering was %d commands larger than predicted size."
+				" We might overflow  command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end);
+
+	return GL_TRUE;
+}
+
+static void r300DrawPrims(GLcontext *ctx,
+			 const struct gl_client_array *arrays[],
+			 const struct _mesa_prim *prim,
+			 GLuint nr_prims,
+			 const struct _mesa_index_buffer *ib,
+			 GLboolean index_bounds_valid,
+			 GLuint min_index,
+			 GLuint max_index)
+{
+	GLboolean retval;
+
+	/* This check should get folded into just the places that
+	 * min/max index are really needed.
+	 */
+	if (!index_bounds_valid) {
+		vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+	}
+
+	if (min_index) {
+		radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+				"%s: Rebasing primitives. %p nr_prims %d min_index %u max_index %u\n",
+				__func__, prim, nr_prims, min_index, max_index);
+		vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims );
+		return;
+	}
+
+	/* Make an attempt at drawing */
+	retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+	/* If failed run tnl pipeline - it should take care of fallbacks */
+	if (!retval)
+		_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+}
+
+void r300InitDraw(GLcontext *ctx)
+{
+	struct vbo_context *vbo = vbo_context(ctx);
+
+	vbo->draw_prims = r300DrawPrims;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
new file mode 100644
index 0000000000..a24d431611
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_emit.c
@@ -0,0 +1,135 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Maciej Cencora <m.cencora@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "r300_context.h"
+#include "r300_emit.h"
+
+
+GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
+{
+	/* No idea what this value means. I have seen other values written to
+	 * this register... */
+	return 0x5555;
+}
+
+GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
+{
+	GLuint i, vic_1 = 0;
+
+	if (InputsRead & (1 << VERT_ATTRIB_POS))
+		vic_1 |= R300_INPUT_CNTL_POS;
+
+	if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
+		vic_1 |= R300_INPUT_CNTL_NORMAL;
+
+	if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+		vic_1 |= R300_INPUT_CNTL_COLOR;
+
+	for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+		if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
+			vic_1 |= R300_INPUT_CNTL_TC0 << i;
+		}
+
+	return vic_1;
+}
+
+GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes)
+{
+	GLuint ret = 0;
+
+	if (vp_writes & (1 << VERT_RESULT_HPOS))
+		ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+
+	if (vp_writes & (1 << VERT_RESULT_COL0))
+		ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
+
+	if (vp_writes & (1 << VERT_RESULT_COL1))
+		ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
+
+	/* Two sided lighting works only if all 4 colors are written */
+	if (vp_writes & (1 << VERT_RESULT_BFC0) || vp_writes & (1 << VERT_RESULT_BFC1))
+		ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
+			   R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
+
+	if (vp_writes & (1 << VERT_RESULT_PSIZ))
+		ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+
+	return ret;
+}
+
+GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes)
+{
+	GLuint i, ret = 0, first_free_texcoord = 0;
+
+	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+		if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) {
+			ret |= (4 << (3 * first_free_texcoord));
+			++first_free_texcoord;
+		}
+	}
+
+	if (first_free_texcoord > 8) {
+		fprintf(stderr, "\tout of free texcoords\n");
+		exit(-1);
+	}
+
+	return ret;
+}
+
+void r300EmitCacheFlush(r300ContextPtr rmesa)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+
+	BEGIN_BATCH_NO_AUTOSTATE(4);
+	OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+		R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+	OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
+		R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+		R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+	END_BATCH();
+	COMMIT_BATCH();
+}
diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
new file mode 100644
index 0000000000..a456d8867c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_emit.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2005 Vladimir Dergachev.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *   Vladimir Dergachev <volodya@mindspring.com>
+ *   Nicolai Haehnle <prefect_@gmx.net>
+ *   Aapo Tahkola <aet@rasterburn.org>
+ *   Ben Skeggs <darktama@iinet.net.au>
+ *   Jerome Glisse <j.glisse@gmail.com>
+ */
+
+/* This files defines functions for accessing R300 hardware.
+ */
+#ifndef __R300_EMIT_H__
+#define __R300_EMIT_H__
+
+#include "main/glheader.h"
+#include "r300_context.h"
+#include "r300_cmdbuf.h"
+
+static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
+                                  int reg, int count)
+{
+    if (!rscrn->kernel_mm) {
+	    drm_r300_cmd_header_t cmd;
+
+	cmd.u = 0;
+    	cmd.packet0.cmd_type = R300_CMD_PACKET0;
+	    cmd.packet0.count = count;
+    	cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
+	    cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
+
+    	return cmd.u;
+    }
+    if (count) {
+        return CP_PACKET0(reg, count - 1);
+    }
+    return CP_PACKET2;
+}
+
+static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
+{
+	drm_r300_cmd_header_t cmd;
+
+	cmd.u = 0;
+	cmd.vpu.cmd_type = R300_CMD_VPU;
+	cmd.vpu.count = count;
+	cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
+	cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF);
+
+	return cmd.u;
+}
+
+static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
+                                 int addr, int count, int type, int clamp)
+{
+	drm_r300_cmd_header_t cmd;
+
+	cmd.u = 0;
+	cmd.r500fp.cmd_type = R300_CMD_R500FP;
+	cmd.r500fp.count = count;
+	cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
+	cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0;
+	cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0;
+	cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF);
+
+	return cmd.u;
+}
+
+static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
+{
+	drm_r300_cmd_header_t cmd;
+
+	cmd.u = 0;
+	cmd.packet3.cmd_type = R300_CMD_PACKET3;
+	cmd.packet3.packet = packet;
+
+	return cmd.u;
+}
+
+static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,
+                                  unsigned short count)
+{
+	drm_r300_cmd_header_t cmd;
+
+	cmd.u = 0;
+
+	cmd.delay.cmd_type = R300_CMD_CP_DELAY;
+	cmd.delay.count = count;
+
+	return cmd.u;
+}
+
+static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
+                               unsigned char flags)
+{
+	drm_r300_cmd_header_t cmd;
+
+	cmd.u = 0;
+	cmd.wait.cmd_type = R300_CMD_WAIT;
+	cmd.wait.flags = flags;
+
+	return cmd.u;
+}
+
+static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
+{
+	drm_r300_cmd_header_t cmd;
+
+	cmd.u = 0;
+	cmd.header.cmd_type = R300_CMD_END3D;
+
+	return cmd.u;
+}
+
+/**
+ * Write the header of a packet3 to the command buffer.
+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
+ */
+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		\
+    	OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
+                  R300_CMD_PACKET3_RAW)); \
+    } else b_l_rmesa->cmdbuf.cs->section_cdw++;\
+	OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+	} while(0)
+
+/**
+ * Must be sent to switch to 2d commands
+ */
+void static INLINE end_3d(radeonContextPtr radeon)
+{
+	BATCH_LOCALS(radeon);
+
+	if (!radeon->radeonScreen->kernel_mm) {
+		BEGIN_BATCH_NO_AUTOSTATE(1);
+		OUT_BATCH(cmdpacify(radeon->radeonScreen));
+		END_BATCH();
+	}
+}
+
+void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+
+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
+		BEGIN_BATCH_NO_AUTOSTATE(1);
+		OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
+		END_BATCH();
+	}
+}
+
+void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
+{
+	BATCH_LOCALS(radeon);
+	uint32_t wait_until;
+
+	if (!radeon->radeonScreen->kernel_mm) {
+		BEGIN_BATCH_NO_AUTOSTATE(1);
+		OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
+		END_BATCH();
+	} else {
+		switch(flags) {
+		case R300_WAIT_2D:
+			wait_until = (1 << 14);
+			break;
+		case R300_WAIT_3D:
+			wait_until = (1 << 15);
+			break;
+		case R300_NEW_WAIT_2D_3D:
+			wait_until = (1 << 14) | (1 << 15);
+			break;
+		case R300_NEW_WAIT_2D_2D_CLEAN:
+			wait_until = (1 << 14) | (1 << 16) | (1 << 18);
+			break;
+		case R300_NEW_WAIT_3D_3D_CLEAN:
+			wait_until = (1 << 15) | (1 << 17) | (1 << 18);
+			break;
+		case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
+			wait_until  = (1 << 14) | (1 << 16) | (1 << 18);
+			wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
+			break;
+		default:
+			return;
+		}
+		BEGIN_BATCH_NO_AUTOSTATE(2);
+		OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+		OUT_BATCH(wait_until);
+		END_BATCH();
+	}
+}
+
+extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
+extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
+
+extern void r300EmitCacheFlush(r300ContextPtr rmesa);
+
+extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
+extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
+extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes);
+extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
new file mode 100644
index 0000000000..7be2f74b5b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Fragment program compiler. Perform transformations on the intermediate
+ * representation until the program is in a form where we can translate
+ * it more or less directly into machine-readable form.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ */
+
+#include "r300_fragprog_common.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+
+#include "compiler/radeon_compiler.h"
+
+#include "radeon_mesa_to_rc.h"
+
+
+static GLuint build_dts(GLuint depthmode)
+{
+	switch(depthmode) {
+	default:
+	case GL_LUMINANCE: return RC_SWIZZLE_XYZZ;
+	case GL_INTENSITY: return RC_SWIZZLE_XYZW;
+	case GL_ALPHA: return RC_SWIZZLE_WWWW;
+	}
+}
+
+static GLuint build_func(GLuint comparefunc)
+{
+	return comparefunc - GL_NEVER;
+}
+
+/**
+ * Collect all external state that is relevant for compiling the given
+ * fragment program.
+ */
+static void build_state(
+	r300ContextPtr r300,
+	struct gl_fragment_program *fp,
+	struct r300_fragment_program_external_state *state)
+{
+	int unit;
+
+	memset(state, 0, sizeof(*state));
+
+	for(unit = 0; unit < 16; ++unit) {
+		if (fp->Base.ShadowSamplers & (1 << unit)) {
+			struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
+
+			state->unit[unit].depth_texture_swizzle = build_dts(tex->DepthMode);
+			state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
+		}
+	}
+}
+
+
+/**
+ * Transform the program to support fragment.position.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary.
+ *
+ */
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
+{
+	int i;
+
+	fp->wpos_attr = FRAG_ATTRIB_MAX;
+	if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
+		return;
+	}
+
+	for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+	{
+		if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+			fp->wpos_attr = i;
+			break;
+		}
+	}
+
+	/* No free texcoord found, fall-back to software rendering */
+	if (fp->wpos_attr == FRAG_ATTRIB_MAX)
+	{
+		compiler->Base.Error = 1;
+		return;
+	}
+
+	rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr, GL_FALSE);
+}
+
+/**
+ * Rewrite fragment.fogcoord to use a texture coordinate slot.
+ * Note that fogcoord is forced into an X001 pattern, and this enforcement
+ * is done here.
+ *
+ * See also the counterpart rewriting for vertex programs.
+ */
+static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
+{
+	struct rc_src_register src;
+	int i;
+
+	fp->fog_attr = FRAG_ATTRIB_MAX;
+	if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
+		return;
+	}
+
+	for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+	{
+		if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+			fp->fog_attr = i;
+			break;
+		}
+	}
+
+	/* No free texcoord found, fall-back to software rendering */
+	if (fp->fog_attr == FRAG_ATTRIB_MAX)
+	{
+		compiler->Base.Error = 1;
+		return;
+	}
+
+	memset(&src, 0, sizeof(src));
+	src.File = RC_FILE_INPUT;
+	src.Index = fp->fog_attr;
+	src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+	rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
+}
+
+
+/**
+ * Reserve hardware temporary registers for the program inputs.
+ *
+ * @note This allocation is performed explicitly, because the order of inputs
+ * is determined by the RS hardware.
+ */
+static void allocate_hw_inputs(
+	struct r300_fragment_program_compiler * c,
+	void (*allocate)(void * data, unsigned input, unsigned hwreg),
+	void * mydata)
+{
+	GLuint InputsRead = c->Base.Program.InputsRead;
+	int i;
+	GLuint hwindex = 0;
+
+	/* Primary colour */
+	if (InputsRead & FRAG_BIT_COL0)
+		allocate(mydata, FRAG_ATTRIB_COL0, hwindex++);
+	InputsRead &= ~FRAG_BIT_COL0;
+
+	/* Secondary color */
+	if (InputsRead & FRAG_BIT_COL1)
+		allocate(mydata, FRAG_ATTRIB_COL1, hwindex++);
+	InputsRead &= ~FRAG_BIT_COL1;
+
+	/* Texcoords */
+	for (i = 0; i < 8; i++) {
+		if (InputsRead & (FRAG_BIT_TEX0 << i))
+			allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++);
+	}
+	InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+	/* Fogcoords treated as a texcoord */
+	if (InputsRead & FRAG_BIT_FOGC)
+		allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++);
+	InputsRead &= ~FRAG_BIT_FOGC;
+
+	/* fragment position treated as a texcoord */
+	if (InputsRead & FRAG_BIT_WPOS)
+		allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++);
+	InputsRead &= ~FRAG_BIT_WPOS;
+
+	/* Anything else */
+	if (InputsRead)
+		rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead);
+}
+
+
+static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	struct r300_fragment_program_compiler compiler;
+
+	rc_init(&compiler.Base);
+	compiler.Base.Debug = (RADEON_DEBUG & RADEON_PIXEL) ? GL_TRUE : GL_FALSE;
+
+	compiler.code = &fp->code;
+	compiler.state = fp->state;
+	compiler.enable_shadow_ambient = GL_TRUE;
+	compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+	compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
+	compiler.OutputDepth = FRAG_RESULT_DEPTH;
+	memset(compiler.OutputColor, 0, 4 * sizeof(unsigned));
+	compiler.OutputColor[0] = FRAG_RESULT_COLOR;
+	compiler.AllocateHwInputs = &allocate_hw_inputs;
+
+	if (compiler.Base.Debug) {
+		fflush(stderr);
+		printf("Fragment Program: Initial program:\n");
+		_mesa_print_program(&cont->Base.Base);
+		fflush(stderr);
+	}
+
+	radeon_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
+
+	insert_WPOS_trailer(&compiler, fp);
+
+	rewriteFog(&compiler, fp);
+
+	r3xx_compile_fragment_program(&compiler);
+
+	if (compiler.Base.is_r500) {
+		/* We need to support the non-KMS DRM interface, which
+		 * artificially limits the number of instructions and
+		 * constants which are available to us.
+		 *
+		 * See also the comment in r300_context.c where we
+		 * set the MAX_NATIVE_xxx values.
+		 */
+		if (fp->code.code.r500.inst_end >= 255 || fp->code.constants.Count > 255)
+			rc_error(&compiler.Base, "Program is too big (upgrade to r300g to avoid this limitation).\n");
+	}
+
+	fp->error = compiler.Base.Error;
+
+	fp->InputsRead = compiler.Base.Program.InputsRead;
+
+	/* Clear the fog/wpos_attr if code accessing these
+	 * attributes has been removed during compilation
+	 */
+	if (fp->fog_attr != FRAG_ATTRIB_MAX) {
+		if (!(fp->InputsRead & (1 << fp->fog_attr)))
+			fp->fog_attr = FRAG_ATTRIB_MAX;
+	}
+
+	if (fp->wpos_attr != FRAG_ATTRIB_MAX) {
+		if (!(fp->InputsRead & (1 << fp->wpos_attr)))
+			fp->wpos_attr = FRAG_ATTRIB_MAX;
+	}
+
+	rc_destroy(&compiler.Base);
+}
+
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	struct r300_fragment_program_cont *fp_list;
+	struct r300_fragment_program *fp;
+	struct r300_fragment_program_external_state state;
+
+	fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current;
+	build_state(r300, ctx->FragmentProgram._Current, &state);
+
+	fp = fp_list->progs;
+	while (fp) {
+		if (memcmp(&fp->state, &state, sizeof(state)) == 0) {
+			return r300->selected_fp = fp;
+		}
+		fp = fp->next;
+	}
+
+	fp = calloc(1, sizeof(struct r300_fragment_program));
+
+	fp->state = state;
+
+	fp->next = fp_list->progs;
+	fp_list->progs = fp;
+
+	translate_fragment_program(ctx, fp_list, fp);
+
+	return r300->selected_fp = fp;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
new file mode 100644
index 0000000000..3d64c08cee
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_COMMON_H_
+#define __R300_FRAGPROG_COMMON_H_
+
+#include "main/mtypes.h"
+
+#include "r300_context.h"
+
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
new file mode 100644
index 0000000000..ac93563ed9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -0,0 +1,3313 @@
+/**************************************************************************
+
+Copyright (C) 2004-2005 Nicolai Haehnle et al.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* *INDENT-OFF* */
+
+#ifndef _R300_REG_H
+#define _R300_REG_H
+
+#define R300_MC_INIT_MISC_LAT_TIMER	0x180
+#	define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT	0
+#	define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT	4
+#	define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT	8
+#	define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT	12
+#	define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT	16
+#	define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT	20
+#	define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT	24
+#	define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT	28
+
+
+#define R300_MC_INIT_GFX_LAT_TIMER	0x154
+#	define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT	0
+#	define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT	4
+#	define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT	8
+#	define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT	12
+#	define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT	16
+#	define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT	20
+#	define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT	24
+#	define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT	28
+
+/*
+ * This file contains registers and constants for the R300. They have been
+ * found mostly by examining command buffers captured using glxtest, as well
+ * as by extrapolating some known registers and constants from the R200.
+ * I am fairly certain that they are correct unless stated otherwise
+ * in comments.
+ */
+
+#define R300_SE_VPORT_XSCALE                0x1D98
+#define R300_SE_VPORT_XOFFSET               0x1D9C
+#define R300_SE_VPORT_YSCALE                0x1DA0
+#define R300_SE_VPORT_YOFFSET               0x1DA4
+#define R300_SE_VPORT_ZSCALE                0x1DA8
+#define R300_SE_VPORT_ZOFFSET               0x1DAC
+
+#define R300_VAP_PORT_IDX0		    0x2040
+/*
+ * Vertex Array Processing (VAP) Control
+ */
+#define R300_VAP_CNTL	0x2080
+#       define R300_PVS_NUM_SLOTS_SHIFT                 0
+#       define R300_PVS_NUM_CNTLRS_SHIFT                4
+#       define R300_PVS_NUM_FPUS_SHIFT                  8
+#       define R300_VF_MAX_VTX_NUM_SHIFT                18
+#       define R300_GL_CLIP_SPACE_DEF                   (0 << 22)
+#       define R300_DX_CLIP_SPACE_DEF                   (1 << 22)
+#       define R500_TCL_STATE_OPTIMIZATION              (1 << 23)
+
+/* This register is written directly and also starts data section
+ * in many 3d CP_PACKET3's
+ */
+#define R300_VAP_VF_CNTL	0x2084
+#	define	R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT              0
+#	define  R300_VAP_VF_CNTL__PRIM_NONE                     (0<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_POINTS                   (1<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_LINES                    (2<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_LINE_STRIP               (3<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_TRIANGLES                (4<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN             (5<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP           (6<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_LINE_LOOP                (12<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_QUADS                    (13<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_QUAD_STRIP               (14<<0)
+#	define  R300_VAP_VF_CNTL__PRIM_POLYGON                  (15<<0)
+
+#	define	R300_VAP_VF_CNTL__PRIM_WALK__SHIFT              4
+	/* State based - direct writes to registers trigger vertex
+           generation */
+#	define	R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED         (0<<4)
+#	define	R300_VAP_VF_CNTL__PRIM_WALK_INDICES             (1<<4)
+#	define	R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST         (2<<4)
+#	define	R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED     (3<<4)
+
+	/* I don't think I saw these three used.. */
+#	define	R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT            6
+#	define	R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT     9
+#	define	R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT        10
+
+	/* index size - when not set the indices are assumed to be 16 bit */
+#	define	R300_VAP_VF_CNTL__INDEX_SIZE_32bit              (1<<11)
+	/* number of vertices */
+#	define	R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT           16
+
+#define R500_VAP_INDEX_OFFSET		    0x208c
+
+#define R300_VAP_OUTPUT_VTX_FMT_0           0x2090
+#       define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT     (1<<0)
+#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+#       define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+#       define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16)
+
+#define R300_VAP_OUTPUT_VTX_FMT_1           0x2094
+	/* each of the following is 3 bits wide, specifies number
+	   of components */
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+#       define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+#	define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT  0
+#	define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT  1
+#	define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2
+#	define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3
+#	define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4
+
+#define R300_SE_VTE_CNTL                  0x20b0
+#	define     R300_VPORT_X_SCALE_ENA                (1 << 0)
+#	define     R300_VPORT_X_OFFSET_ENA               (1 << 1)
+#	define     R300_VPORT_Y_SCALE_ENA                (1 << 2)
+#	define     R300_VPORT_Y_OFFSET_ENA               (1 << 3)
+#	define     R300_VPORT_Z_SCALE_ENA                (1 << 4)
+#	define     R300_VPORT_Z_OFFSET_ENA               (1 << 5)
+#	define     R300_VTX_XY_FMT                       (1 << 8)
+#	define     R300_VTX_Z_FMT                        (1 << 9)
+#	define     R300_VTX_W0_FMT                       (1 << 10)
+#	define     R300_SERIAL_PROC_ENA                  (1 << 11)
+
+/* BEGIN: Vertex data assembly - lots of uncertainties */
+
+/* gap */
+
+/* Maximum Vertex Indx Clamp */
+#define R300_VAP_VF_MAX_VTX_INDX         0x2134
+/* Minimum Vertex Indx Clamp */
+#define R300_VAP_VF_MIN_VTX_INDX         0x2138
+
+/** Vertex assembler/processor control status */
+#define R300_VAP_CNTL_STATUS              0x2140
+/* No swap at all (default) */
+#	define R300_VC_NO_SWAP                  (0 << 0)
+/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */
+#	define R300_VC_16BIT_SWAP               (1 << 0)
+/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */
+#	define R300_VC_32BIT_SWAP               (2 << 0)
+/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */
+#	define R300_VC_HALF_DWORD_SWAP          (3 << 0)
+/* The TCL engine will not be used (as it is logically or even physically removed) */
+#	define R300_VAP_TCL_BYPASS		(1 << 8)
+/* Read only flag if TCL engine is busy. */
+#	define R300_VAP_PVS_BUSY                (1 << 11)
+/* TODO: gap for MAX_MPS */
+/* Read only flag if the vertex store is busy. */
+#	define R300_VAP_VS_BUSY                 (1 << 24)
+/* Read only flag if the reciprocal engine is busy. */
+#	define R300_VAP_RCP_BUSY                (1 << 25)
+/* Read only flag if the viewport transform engine is busy. */
+#	define R300_VAP_VTE_BUSY                (1 << 26)
+/* Read only flag if the memory interface unit is busy. */
+#	define R300_VAP_MUI_BUSY                (1 << 27)
+/* Read only flag if the vertex cache is busy. */
+#	define R300_VAP_VC_BUSY                 (1 << 28)
+/* Read only flag if the vertex fetcher is busy. */
+#	define R300_VAP_VF_BUSY                 (1 << 29)
+/* Read only flag if the register pipeline is busy. */
+#	define R300_VAP_REGPIPE_BUSY            (1 << 30)
+/* Read only flag if the VAP engine is busy. */
+#	define R300_VAP_VAP_BUSY                (1 << 31)
+
+/* gap */
+
+/* Where do we get our vertex data?
+ *
+ * Vertex data either comes either from immediate mode registers or from
+ * vertex arrays.
+ * There appears to be no mixed mode (though we can force the pitch of
+ * vertex arrays to 0, effectively reusing the same element over and over
+ * again).
+ *
+ * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
+ * if these registers influence vertex array processing.
+ *
+ * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
+ *
+ * In both cases, vertex attributes are then passed through INPUT_ROUTE.
+ *
+ * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
+ * into the vertex processor's input registers.
+ * The first word routes the first input, the second word the second, etc.
+ * The corresponding input is routed into the register with the given index.
+ * The list is ended by a word with INPUT_ROUTE_END set.
+ *
+ * Always set COMPONENTS_4 in immediate mode.
+ */
+
+#define R300_VAP_PROG_STREAM_CNTL_0                     0x2150
+#       define R300_DATA_TYPE_0_SHIFT                   0
+#       define R300_DATA_TYPE_FLOAT_1                   0
+#       define R300_DATA_TYPE_FLOAT_2                   1
+#       define R300_DATA_TYPE_FLOAT_3                   2
+#       define R300_DATA_TYPE_FLOAT_4                   3
+#       define R300_DATA_TYPE_BYTE                      4
+#       define R300_DATA_TYPE_D3DCOLOR                  5
+#       define R300_DATA_TYPE_SHORT_2                   6
+#       define R300_DATA_TYPE_SHORT_4                   7
+#       define R300_DATA_TYPE_VECTOR_3_TTT              8
+#       define R300_DATA_TYPE_VECTOR_3_EET              9
+#	define R300_DATA_TYPE_FLT16_2			11
+#	define R300_DATA_TYPE_FLT16_4			12
+
+#       define R300_SKIP_DWORDS_SHIFT                   4
+#       define R300_DST_VEC_LOC_SHIFT                   8
+#       define R300_LAST_VEC                            (1 << 13)
+#       define R300_SIGNED                              (1 << 14)
+#       define R300_NORMALIZE                           (1 << 15)
+#       define R300_DATA_TYPE_1_SHIFT                   16
+#define R300_VAP_PROG_STREAM_CNTL_1                     0x2154
+#define R300_VAP_PROG_STREAM_CNTL_2                     0x2158
+#define R300_VAP_PROG_STREAM_CNTL_3                     0x215C
+#define R300_VAP_PROG_STREAM_CNTL_4                     0x2160
+#define R300_VAP_PROG_STREAM_CNTL_5                     0x2164
+#define R300_VAP_PROG_STREAM_CNTL_6                     0x2168
+#define R300_VAP_PROG_STREAM_CNTL_7                     0x216C
+/* gap */
+
+/* Notes:
+ *  - always set up to produce at least two attributes:
+ *    if vertex program uses only position, fglrx will set normal, too
+ *  - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
+ */
+#define R300_VAP_VTX_STATE_CNTL               0x2180
+#       define R300_COLOR_0_ASSEMBLY_SHIFT    0
+#       define R300_SEL_COLOR                 0
+#       define R300_SEL_USER_COLOR_0          1
+#       define R300_SEL_USER_COLOR_1          2
+#       define R300_COLOR_1_ASSEMBLY_SHIFT    2
+#       define R300_COLOR_2_ASSEMBLY_SHIFT    4
+#       define R300_COLOR_3_ASSEMBLY_SHIFT    6
+#       define R300_COLOR_4_ASSEMBLY_SHIFT    8
+#       define R300_COLOR_5_ASSEMBLY_SHIFT    10
+#       define R300_COLOR_6_ASSEMBLY_SHIFT    12
+#       define R300_COLOR_7_ASSEMBLY_SHIFT    14
+#       define R300_UPDATE_USER_COLOR_0_ENA   (1 << 16)
+
+/*
+ * Each bit in this field applies to the corresponding vector in the VSM
+ * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit
+ * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream.
+ */
+#define R300_VAP_VSM_VTX_ASSM               0x2184
+#       define R300_INPUT_CNTL_POS               0x00000001
+#       define R300_INPUT_CNTL_NORMAL            0x00000002
+#       define R300_INPUT_CNTL_COLOR             0x00000004
+#       define R300_INPUT_CNTL_TC0               0x00000400
+#       define R300_INPUT_CNTL_TC1               0x00000800
+#       define R300_INPUT_CNTL_TC2               0x00001000 /* GUESS */
+#       define R300_INPUT_CNTL_TC3               0x00002000 /* GUESS */
+#       define R300_INPUT_CNTL_TC4               0x00004000 /* GUESS */
+#       define R300_INPUT_CNTL_TC5               0x00008000 /* GUESS */
+#       define R300_INPUT_CNTL_TC6               0x00010000 /* GUESS */
+#       define R300_INPUT_CNTL_TC7               0x00020000 /* GUESS */
+
+/* Programmable Stream Control Signed Normalize Control */
+#define R300_VAP_PSC_SGN_NORM_CNTL         0x21dc
+#	define SGN_NORM_ZERO                 0
+#	define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1
+#	define SGN_NORM_NO_ZERO              2
+
+/* gap */
+
+/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
+ * are set to a swizzling bit pattern, other words are 0.
+ *
+ * In immediate mode, the pattern is always set to xyzw. In vertex array
+ * mode, the swizzling pattern is e.g. used to set zw components in texture
+ * coordinates with only tweo components.
+ */
+#define R300_VAP_PROG_STREAM_CNTL_EXT_0                 0x21e0
+#       define R300_SWIZZLE0_SHIFT                      0
+#       define R300_SWIZZLE_SELECT_X_SHIFT              0
+#       define R300_SWIZZLE_SELECT_Y_SHIFT              3
+#       define R300_SWIZZLE_SELECT_Z_SHIFT              6
+#       define R300_SWIZZLE_SELECT_W_SHIFT              9
+
+#       define R300_SWIZZLE_SELECT_X                    0
+#       define R300_SWIZZLE_SELECT_Y                    1
+#       define R300_SWIZZLE_SELECT_Z                    2
+#       define R300_SWIZZLE_SELECT_W                    3
+#       define R300_SWIZZLE_SELECT_FP_ZERO              4
+#       define R300_SWIZZLE_SELECT_FP_ONE               5
+/* alternate forms for r300_emit.c */
+#       define R300_INPUT_ROUTE_SELECT_X    0
+#       define R300_INPUT_ROUTE_SELECT_Y    1
+#       define R300_INPUT_ROUTE_SELECT_Z    2
+#       define R300_INPUT_ROUTE_SELECT_W    3
+#       define R300_INPUT_ROUTE_SELECT_ZERO 4
+#       define R300_INPUT_ROUTE_SELECT_ONE  5
+
+#       define R300_WRITE_ENA_SHIFT                     12
+#       define R300_WRITE_ENA_X                         1
+#       define R300_WRITE_ENA_Y                         2
+#       define R300_WRITE_ENA_Z                         4
+#       define R300_WRITE_ENA_W                         8
+#       define R300_SWIZZLE1_SHIFT                      16
+#define R300_VAP_PROG_STREAM_CNTL_EXT_1                 0x21e4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_2                 0x21e8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_3                 0x21ec
+#define R300_VAP_PROG_STREAM_CNTL_EXT_4                 0x21f0
+#define R300_VAP_PROG_STREAM_CNTL_EXT_5                 0x21f4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_6                 0x21f8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_7                 0x21fc
+
+/* END: Vertex data assembly */
+
+/* gap */
+
+/* BEGIN: Upload vertex program and data */
+
+/*
+ * The programmable vertex shader unit has a memory bank of unknown size
+ * that can be written to in 16 byte units by writing the address into
+ * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
+ *
+ * Pointers into the memory bank are always in multiples of 16 bytes.
+ *
+ * The memory bank is divided into areas with fixed meaning.
+ *
+ * Starting at address UPLOAD_PROGRAM: Vertex program instructions.
+ * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
+ * whereas the difference between known addresses suggests size 512.
+ *
+ * Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
+ * Native reported limits and the VPI layout suggest size 256, whereas
+ * difference between known addresses suggests size 512.
+ *
+ * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
+ * floating point pointsize. The exact purpose of this state is uncertain,
+ * as there is also the R300_RE_POINTSIZE register.
+ *
+ * Multiple vertex programs and parameter sets can be loaded at once,
+ * which could explain the size discrepancy.
+ */
+#define R300_VAP_PVS_VECTOR_INDX_REG         0x2200
+#       define R300_PVS_CODE_START           0
+#       define R300_MAX_PVS_CODE_LINES       256
+#       define R500_MAX_PVS_CODE_LINES       1024
+#       define R300_PVS_CONST_START          512
+#       define R500_PVS_CONST_START          1024
+#       define R300_MAX_PVS_CONST_VECS       256
+#       define R500_MAX_PVS_CONST_VECS       1024
+#       define R300_PVS_UCP_START            1024
+#       define R500_PVS_UCP_START            1536
+#       define R300_POINT_VPORT_SCALE_OFFSET 1030
+#       define R500_POINT_VPORT_SCALE_OFFSET 1542
+#       define R300_POINT_GEN_TEX_OFFSET     1031
+#       define R500_POINT_GEN_TEX_OFFSET     1543
+
+/*
+ * These are obsolete defines form r300_context.h, but they might give some
+ * clues when investigating the addresses further...
+ */
+#if 0
+#define VSF_DEST_PROGRAM        0x0
+#define VSF_DEST_MATRIX0        0x200
+#define VSF_DEST_MATRIX1        0x204
+#define VSF_DEST_MATRIX2        0x208
+#define VSF_DEST_VECTOR0        0x20c
+#define VSF_DEST_VECTOR1        0x20d
+#define VSF_DEST_UNKNOWN1       0x400
+#define VSF_DEST_UNKNOWN2       0x406
+#endif
+
+/* gap */
+
+#define R300_VAP_PVS_UPLOAD_DATA            0x2208
+
+/* END: Upload vertex program and data */
+
+/* gap */
+
+/* I do not know the purpose of this register. However, I do know that
+ * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
+ * for normal rendering.
+ *
+ * 2007-11-05: This register is the user clip plane control register, but there
+ * also seems to be a rendering mode control; the NORMAL/CLEAR defines.
+ *
+ * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
+ */
+#define R300_VAP_CLIP_CNTL                       0x221C
+#       define R300_VAP_UCP_ENABLE_0             (1 << 0)
+#       define R300_VAP_UCP_ENABLE_1             (1 << 1)
+#       define R300_VAP_UCP_ENABLE_2             (1 << 2)
+#       define R300_VAP_UCP_ENABLE_3             (1 << 3)
+#       define R300_VAP_UCP_ENABLE_4             (1 << 4)
+#       define R300_VAP_UCP_ENABLE_5             (1 << 5)
+#       define R300_PS_UCP_MODE_DIST_COP         (0 << 14)
+#       define R300_PS_UCP_MODE_RADIUS_COP       (1 << 14)
+#       define R300_PS_UCP_MODE_RADIUS_COP_CLIP  (2 << 14)
+#       define R300_PS_UCP_MODE_CLIP_AS_TRIFAN   (3 << 14)
+#       define R300_CLIP_DISABLE                 (1 << 16)
+#       define R300_UCP_CULL_ONLY_ENABLE         (1 << 17)
+#       define R300_BOUNDARY_EDGE_FLAG_ENABLE    (1 << 18)
+#       define R500_COLOR2_IS_TEXTURE            (1 << 20)
+#       define R500_COLOR3_IS_TEXTURE            (1 << 21)
+
+/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first
+ * plane is per-pixel and the second plane is per-vertex.
+ *
+ * This was determined by experimentation alone but I believe it is correct.
+ *
+ * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest.
+ */
+#define R300_VAP_GB_VERT_CLIP_ADJ                   0x2220
+#define R300_VAP_GB_VERT_DISC_ADJ                   0x2224
+#define R300_VAP_GB_HORZ_CLIP_ADJ                   0x2228
+#define R300_VAP_GB_HORZ_DISC_ADJ                   0x222c
+
+/* gap */
+
+/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
+ * rendering commands and overwriting vertex program parameters.
+ * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
+ * avoids bugs caused by still running shaders reading bad data from memory.
+ */
+#define R300_VAP_PVS_STATE_FLUSH_REG        0x2284
+
+/* This register is used to define the number of core clocks to wait for a
+ * vertex to be received by the VAP input controller (while the primitive
+ * path is backed up) before forcing any accumulated vertices to be submitted
+ * to the vertex processing path.
+ */
+#define VAP_PVS_VTX_TIMEOUT_REG             0x2288
+#       define R300_2288_R300                    0x00750000 /* -- nh */
+#       define R300_2288_RV350                   0x0000FFFF /* -- Vladimir */
+
+/* gap */
+
+/* Addresses are relative to the vertex program instruction area of the
+ * memory bank. PROGRAM_END points to the last instruction of the active
+ * program
+ *
+ * The meaning of the two UNKNOWN fields is obviously not known. However,
+ * experiments so far have shown that both *must* point to an instruction
+ * inside the vertex program, otherwise the GPU locks up.
+ *
+ * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
+ * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to
+ * position takes place.
+ *
+ * Most likely this is used to ignore rest of the program in cases
+ * where group of verts arent visible. For some reason this "section"
+ * is sometimes accepted other instruction that have no relationship with
+ * position calculations.
+ */
+#define R300_VAP_PVS_CODE_CNTL_0            0x22D0
+#       define R300_PVS_FIRST_INST_SHIFT         0
+#       define R300_PVS_XYZW_VALID_INST_SHIFT    10
+#       define R300_PVS_LAST_INST_SHIFT          20
+/* Addresses are relative to the vertex program parameters area. */
+#define R300_VAP_PVS_CONST_CNTL             0x22D4
+#       define R300_PVS_CONST_BASE_OFFSET_SHIFT  0
+#       define R300_PVS_MAX_CONST_ADDR_SHIFT     16
+#define R300_VAP_PVS_CODE_CNTL_1	    0x22D8
+#       define R300_PVS_LAST_VTX_SRC_INST_SHIFT  0
+#define R300_VAP_PVS_FLOW_CNTL_OPC          0x22DC
+
+/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
+ * immediate vertices
+ */
+#define R300_VAP_VTX_COLOR_R                0x2464
+#define R300_VAP_VTX_COLOR_G                0x2468
+#define R300_VAP_VTX_COLOR_B                0x246C
+#define R300_VAP_VTX_POS_0_X_1              0x2490 /* used for glVertex2*() */
+#define R300_VAP_VTX_POS_0_Y_1              0x2494
+#define R300_VAP_VTX_COLOR_PKD              0x249C /* RGBA */
+#define R300_VAP_VTX_POS_0_X_2              0x24A0 /* used for glVertex3*() */
+#define R300_VAP_VTX_POS_0_Y_2              0x24A4
+#define R300_VAP_VTX_POS_0_Z_2              0x24A8
+/* write 0 to indicate end of packet? */
+#define R300_VAP_VTX_END_OF_PKT             0x24AC
+
+/* gap */
+
+/* These are values from r300_reg/r300_reg.h - they are known to be correct
+ * and are here so we can use one register file instead of several
+ * - Vladimir
+ */
+#define R300_GB_VAP_RASTER_VTX_FMT_0	0x4000
+#	define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT	(1<<0)
+#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT	(1<<1)
+#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT	(1<<2)
+#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT	(1<<3)
+#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT	(1<<4)
+#	define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE	(0xf<<5)
+#	define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT	(0x1<<16)
+
+#define R300_GB_VAP_RASTER_VTX_FMT_1	0x4004
+	/* each of the following is 3 bits wide, specifies number
+	   of components */
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT	0
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT	3
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT	6
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT	9
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT	12
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT	15
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT	18
+#	define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT	21
+
+/* UNK30 seems to enables point to quad transformation on textures
+ * (or something closely related to that).
+ * This bit is rather fatal at the time being due to lackings at pixel
+ * shader side
+ * Specifies top of Raster pipe specific enable controls.
+ */
+#define R300_GB_ENABLE	0x4008
+#	define R300_GB_POINT_STUFF_DISABLE     (0 << 0)
+#	define R300_GB_POINT_STUFF_ENABLE      (1 << 0) /* Specifies if points will have stuffed texture coordinates. */
+#	define R300_GB_LINE_STUFF_DISABLE      (0 << 1)
+#	define R300_GB_LINE_STUFF_ENABLE       (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */
+#	define R300_GB_TRIANGLE_STUFF_DISABLE  (0 << 2)
+#	define R300_GB_TRIANGLE_STUFF_ENABLE   (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */
+#	define R300_GB_STENCIL_AUTO_DISABLE    (0 << 4)
+#	define R300_GB_STENCIL_AUTO_ENABLE     (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */
+#	define R300_GB_STENCIL_AUTO_FORCE      (2 << 4) /* Force 0 into dzy low bit. */
+
+	/* each of the following is 2 bits wide */
+#define R300_GB_TEX_REPLICATE	0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */
+#define R300_GB_TEX_ST		1 /* Stuff with source texture coordinates (S,T). */
+#define R300_GB_TEX_STR		2 /* Stuff with source texture coordinates (S,T,R). */
+#	define R300_GB_TEX0_SOURCE_SHIFT	16
+#	define R300_GB_TEX1_SOURCE_SHIFT	18
+#	define R300_GB_TEX2_SOURCE_SHIFT	20
+#	define R300_GB_TEX3_SOURCE_SHIFT	22
+#	define R300_GB_TEX4_SOURCE_SHIFT	24
+#	define R300_GB_TEX5_SOURCE_SHIFT	26
+#	define R300_GB_TEX6_SOURCE_SHIFT	28
+#	define R300_GB_TEX7_SOURCE_SHIFT	30
+
+/* MSPOS - positions for multisample antialiasing (?) */
+#define R300_GB_MSPOS0                           0x4010
+	/* shifts - each of the fields is 4 bits */
+#	define R300_GB_MSPOS0__MS_X0_SHIFT	0
+#	define R300_GB_MSPOS0__MS_Y0_SHIFT	4
+#	define R300_GB_MSPOS0__MS_X1_SHIFT	8
+#	define R300_GB_MSPOS0__MS_Y1_SHIFT	12
+#	define R300_GB_MSPOS0__MS_X2_SHIFT	16
+#	define R300_GB_MSPOS0__MS_Y2_SHIFT	20
+#	define R300_GB_MSPOS0__MSBD0_Y		24
+#	define R300_GB_MSPOS0__MSBD0_X		28
+
+#define R300_GB_MSPOS1                           0x4014
+#	define R300_GB_MSPOS1__MS_X3_SHIFT	0
+#	define R300_GB_MSPOS1__MS_Y3_SHIFT	4
+#	define R300_GB_MSPOS1__MS_X4_SHIFT	8
+#	define R300_GB_MSPOS1__MS_Y4_SHIFT	12
+#	define R300_GB_MSPOS1__MS_X5_SHIFT	16
+#	define R300_GB_MSPOS1__MS_Y5_SHIFT	20
+#	define R300_GB_MSPOS1__MSBD1		24
+
+/* Specifies the graphics pipeline configuration for rasterization. */
+#define R300_GB_TILE_CONFIG                      0x4018
+#	define R300_GB_TILE_DISABLE             (0 << 0)
+#	define R300_GB_TILE_ENABLE              (1 << 0)
+#	define R300_GB_TILE_PIPE_COUNT_RV300	(0 << 1) /* RV350 (1 pipe, 1 ctx) */
+#	define R300_GB_TILE_PIPE_COUNT_R300	(3 << 1) /* R300 (2 pipes, 1 ctx) */
+#	define R300_GB_TILE_PIPE_COUNT_R420_3P  (6 << 1) /* R420-3P (3 pipes, 1 ctx) */
+#	define R300_GB_TILE_PIPE_COUNT_R420	(7 << 1) /* R420 (4 pipes, 1 ctx) */
+#	define R300_GB_TILE_SIZE_8		(0 << 4)
+#	define R300_GB_TILE_SIZE_16		(1 << 4)
+#	define R300_GB_TILE_SIZE_32		(2 << 4)
+#	define R300_GB_SUPER_SIZE_1		(0 << 6)
+#	define R300_GB_SUPER_SIZE_2		(1 << 6)
+#	define R300_GB_SUPER_SIZE_4		(2 << 6)
+#	define R300_GB_SUPER_SIZE_8		(3 << 6)
+#	define R300_GB_SUPER_SIZE_16		(4 << 6)
+#	define R300_GB_SUPER_SIZE_32		(5 << 6)
+#	define R300_GB_SUPER_SIZE_64		(6 << 6)
+#	define R300_GB_SUPER_SIZE_128		(7 << 6)
+#	define R300_GB_SUPER_X_SHIFT		9	/* 3 bits wide */
+#	define R300_GB_SUPER_Y_SHIFT		12	/* 3 bits wide */
+#	define R300_GB_SUPER_TILE_A		(0 << 15)
+#	define R300_GB_SUPER_TILE_B		(1 << 15)
+#	define R300_GB_SUBPIXEL_1_12		(0 << 16)
+#	define R300_GB_SUBPIXEL_1_16		(1 << 16)
+#	define GB_TILE_CONFIG_QUADS_PER_RAS_4   (0 << 17)
+#	define GB_TILE_CONFIG_QUADS_PER_RAS_8   (1 << 17)
+#	define GB_TILE_CONFIG_QUADS_PER_RAS_16  (2 << 17)
+#	define GB_TILE_CONFIG_QUADS_PER_RAS_32  (3 << 17)
+#	define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
+#	define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
+#	define GB_TILE_CONFIG_ALT_SCAN_EN_LR    (0 << 20)
+#	define GB_TILE_CONFIG_ALT_SCAN_EN_LRL   (1 << 20)
+#	define GB_TILE_CONFIG_ALT_OFFSET        (0 << 21)
+#	define GB_TILE_CONFIG_SUBPRECISION      (0 << 22)
+#	define GB_TILE_CONFIG_ALT_TILING_DEF    (0 << 23)
+#	define GB_TILE_CONFIG_ALT_TILING_3_2    (1 << 23)
+#	define GB_TILE_CONFIG_Z_EXTENDED_24_1   (0 << 24)
+#	define GB_TILE_CONFIG_Z_EXTENDED_S25_1  (1 << 24)
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
+#define R300_GB_FIFO_SIZE	0x4024
+	/* each of the following is 2 bits wide */
+#define R300_GB_FIFO_SIZE_32	0
+#define R300_GB_FIFO_SIZE_64	1
+#define R300_GB_FIFO_SIZE_128	2
+#define R300_GB_FIFO_SIZE_256	3
+#	define R300_SC_IFIFO_SIZE_SHIFT	0
+#	define R300_SC_TZFIFO_SIZE_SHIFT	2
+#	define R300_SC_BFIFO_SIZE_SHIFT	4
+
+#	define R300_US_OFIFO_SIZE_SHIFT	12
+#	define R300_US_WFIFO_SIZE_SHIFT	14
+	/* the following use the same constants as above, but meaning is
+	   is times 2 (i.e. instead of 32 words it means 64 */
+#	define R300_RS_TFIFO_SIZE_SHIFT	6
+#	define R300_RS_CFIFO_SIZE_SHIFT	8
+#	define R300_US_RAM_SIZE_SHIFT		10
+	/* watermarks, 3 bits wide */
+#	define R300_RS_HIGHWATER_COL_SHIFT	16
+#	define R300_RS_HIGHWATER_TEX_SHIFT	19
+#	define R300_OFIFO_HIGHWATER_SHIFT	22	/* two bits only */
+#	define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT	24
+
+#define GB_Z_PEQ_CONFIG                          0x4028
+#	define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4    (0 << 0)
+#	define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8    (1 << 0)
+
+/* Specifies various polygon specific selects (fog, depth, perspective). */
+#define R300_GB_SELECT                           0x401c
+#	define R300_GB_FOG_SELECT_C0A		(0 << 0)
+#	define R300_GB_FOG_SELECT_C1A           (1 << 0)
+#	define R300_GB_FOG_SELECT_C2A           (2 << 0)
+#	define R300_GB_FOG_SELECT_C3A           (3 << 0)
+#	define R300_GB_FOG_SELECT_1_1_W         (4 << 0)
+#	define R300_GB_FOG_SELECT_Z		(5 << 0)
+#	define R300_GB_DEPTH_SELECT_Z		(0 << 3)
+#	define R300_GB_DEPTH_SELECT_1_1_W	(1 << 3)
+#	define R300_GB_W_SELECT_1_W		(0 << 4)
+#	define R300_GB_W_SELECT_1		(1 << 4)
+#	define R300_GB_FOG_STUFF_DISABLE        (0 << 5)
+#	define R300_GB_FOG_STUFF_ENABLE         (1 << 5)
+#	define R300_GB_FOG_STUFF_TEX_SHIFT      6
+#	define R300_GB_FOG_STUFF_TEX_MASK       0x000003c0
+#	define R300_GB_FOG_STUFF_COMP_SHIFT     10
+#	define R300_GB_FOG_STUFF_COMP_MASK      0x00000c00
+
+/* Specifies the graphics pipeline configuration for antialiasing. */
+#define GB_AA_CONFIG   	                         0x4020
+#	define GB_AA_CONFIG_AA_DISABLE           (0 << 0)
+#	define GB_AA_CONFIG_AA_ENABLE            (1 << 0)
+#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2  (0 << 1)
+#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3  (1 << 1)
+#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4  (2 << 1)
+#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6  (3 << 1)
+
+/* Selects which of 4 pipes are active. */
+#define GB_PIPE_SELECT                           0x402c
+#	define GB_PIPE_SELECT_PIPE0_ID_SHIFT  0
+#	define GB_PIPE_SELECT_PIPE1_ID_SHIFT  2
+#	define GB_PIPE_SELECT_PIPE2_ID_SHIFT  4
+#	define GB_PIPE_SELECT_PIPE3_ID_SHIFT  6
+#	define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
+#	define GB_PIPE_SELECT_MAX_PIPE        12
+#	define GB_PIPE_SELECT_BAD_PIPES       14
+#	define GB_PIPE_SELECT_CONFIG_PIPES    18
+
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs. */
+#define GB_FIFO_SIZE1                            0x4070
+/* High water mark for SC input fifo */
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK  0x0000003f
+/* High water mark for SC input fifo (B) */
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK  0x00000fc0
+/* High water mark for RS colors' fifo */
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT   12
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK    0x0003f000
+/* High water mark for RS textures' fifo */
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT   18
+#	define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK    0x00fc0000
+
+/* This table specifies the source location and format for up to 16 texture
+ * addresses (i[0]:i[15]) and four colors (c[0]:c[3])
+ */
+#define R500_RS_IP_0					0x4074
+#define R500_RS_IP_1					0x4078
+#define R500_RS_IP_2					0x407C
+#define R500_RS_IP_3					0x4080
+#define R500_RS_IP_4					0x4084
+#define R500_RS_IP_5					0x4088
+#define R500_RS_IP_6					0x408C
+#define R500_RS_IP_7					0x4090
+#define R500_RS_IP_8					0x4094
+#define R500_RS_IP_9					0x4098
+#define R500_RS_IP_10					0x409C
+#define R500_RS_IP_11					0x40A0
+#define R500_RS_IP_12					0x40A4
+#define R500_RS_IP_13					0x40A8
+#define R500_RS_IP_14					0x40AC
+#define R500_RS_IP_15					0x40B0
+#define R500_RS_IP_PTR_K0                               62
+#define R500_RS_IP_PTR_K1                               63
+#define R500_RS_IP_TEX_PTR_S_SHIFT 			0
+#define R500_RS_IP_TEX_PTR_T_SHIFT 			6
+#define R500_RS_IP_TEX_PTR_R_SHIFT 			12
+#define R500_RS_IP_TEX_PTR_Q_SHIFT 			18
+#define R500_RS_IP_COL_PTR_SHIFT 			24
+#define R500_RS_IP_COL_FMT_SHIFT 			27
+#	define R500_RS_COL_PTR(x)		        ((x) << 24)
+#       define R500_RS_COL_FMT(x)                       ((x) << 27)
+/* gap */
+#define R500_RS_IP_OFFSET_DIS 				(0 << 31)
+#define R500_RS_IP_OFFSET_EN 				(1 << 31)
+
+/* gap */
+
+/* Zero to flush caches. */
+#define R300_TX_INVALTAGS                   0x4100
+#define R300_TX_FLUSH                       0x0
+
+/* The upper enable bits are guessed, based on fglrx reported limits. */
+#define R300_TX_ENABLE                      0x4104
+#       define R300_TX_ENABLE_0                  (1 << 0)
+#       define R300_TX_ENABLE_1                  (1 << 1)
+#       define R300_TX_ENABLE_2                  (1 << 2)
+#       define R300_TX_ENABLE_3                  (1 << 3)
+#       define R300_TX_ENABLE_4                  (1 << 4)
+#       define R300_TX_ENABLE_5                  (1 << 5)
+#       define R300_TX_ENABLE_6                  (1 << 6)
+#       define R300_TX_ENABLE_7                  (1 << 7)
+#       define R300_TX_ENABLE_8                  (1 << 8)
+#       define R300_TX_ENABLE_9                  (1 << 9)
+#       define R300_TX_ENABLE_10                 (1 << 10)
+#       define R300_TX_ENABLE_11                 (1 << 11)
+#       define R300_TX_ENABLE_12                 (1 << 12)
+#       define R300_TX_ENABLE_13                 (1 << 13)
+#       define R300_TX_ENABLE_14                 (1 << 14)
+#       define R300_TX_ENABLE_15                 (1 << 15)
+
+#define R500_TX_FILTER_4		    0x4110
+#	define R500_TX_WEIGHT_1_SHIFT            (0)
+#	define R500_TX_WEIGHT_0_SHIFT            (11)
+#	define R500_TX_WEIGHT_PAIR               (1<<22)
+#	define R500_TX_PHASE_SHIFT               (23)
+#	define R500_TX_DIRECTION_HORIZONTAL	 (0<<27)
+#	define R500_TX_DIRECTION_VERITCAL	 (1<<27)
+
+/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_S0                              0x4200
+
+/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_T0                              0x4204
+
+/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_S1                              0x4208
+
+/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_T1                              0x420c
+
+/* Specifies amount to shift integer position of vertex (screen space) before
+ * converting to float for triangle stipple.
+ */
+#define R300_GA_TRIANGLE_STIPPLE            0x4214
+#	define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0
+#	define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK  0x0000000f
+#	define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16
+#	define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK  0x000f0000
+
+/* The pointsize is given in multiples of 6. The pointsize can be enormous:
+ * Clear() renders a single point that fills the entire framebuffer.
+ * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in
+ * 8b precision).
+ */
+#define R300_GA_POINT_SIZE                   0x421C
+#       define R300_POINTSIZE_Y_SHIFT         0
+#       define R300_POINTSIZE_Y_MASK          0x0000ffff
+#       define R300_POINTSIZE_X_SHIFT         16
+#       define R300_POINTSIZE_X_MASK          0xffff0000
+#       define R300_POINTSIZE_MAX             (R300_POINTSIZE_Y_MASK / 6)
+
+/* Blue fill color */
+#define R500_GA_FILL_R                                0x4220
+
+/* Blue fill color */
+#define R500_GA_FILL_G                                0x4224
+
+/* Blue fill color */
+#define R500_GA_FILL_B                                0x4228
+
+/* Alpha fill color */
+#define R500_GA_FILL_A                                0x422c
+
+
+/* Specifies maximum and minimum point & sprite sizes for per vertex size
+ * specification. The lower part (15:0) is MIN and (31:16) is max.
+ */
+#define R300_GA_POINT_MINMAX                0x4230
+#       define R300_GA_POINT_MINMAX_MIN_SHIFT          0
+#       define R300_GA_POINT_MINMAX_MIN_MASK           (0xFFFF << 0)
+#       define R300_GA_POINT_MINMAX_MAX_SHIFT          16
+#       define R300_GA_POINT_MINMAX_MAX_MASK           (0xFFFF << 16)
+
+/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b
+ * subprecision); (16.0) fixed format.
+ *
+ * The line width is given in multiples of 6.
+ * In default mode lines are classified as vertical lines.
+ * HO: horizontal
+ * VE: vertical or horizontal
+ * HO & VE: no classification
+ */
+#define R300_GA_LINE_CNTL                             0x4234
+#       define R300_GA_LINE_CNTL_WIDTH_SHIFT       0
+#       define R300_GA_LINE_CNTL_WIDTH_MASK        0x0000ffff
+#	define R300_GA_LINE_CNTL_END_TYPE_HOR      (0 << 16)
+#	define R300_GA_LINE_CNTL_END_TYPE_VER      (1 << 16)
+#	define R300_GA_LINE_CNTL_END_TYPE_SQR      (2 << 16) /* horizontal or vertical depending upon slope */
+#	define R300_GA_LINE_CNTL_END_TYPE_COMP     (3 << 16) /* Computed (perpendicular to slope) */
+#	define R500_GA_LINE_CNTL_SORT_NO           (0 << 18)
+#	define R500_GA_LINE_CNTL_SORT_MINX_MINY    (1 << 18)
+/** TODO: looks wrong */
+#       define R300_LINESIZE_MAX              (R300_GA_LINE_CNTL_WIDTH_MASK / 6)
+/** TODO: looks wrong */
+#       define R300_LINE_CNT_HO               (1 << 16)
+/** TODO: looks wrong */
+#       define R300_LINE_CNT_VE               (1 << 17)
+
+/* Line Stipple configuration information. */
+#define R300_GA_LINE_STIPPLE_CONFIG                   0x4238
+#	define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO     (0 << 0)
+#	define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE   (1 << 0)
+#	define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0)
+#	define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2
+#	define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK  0xfffffffc
+
+/* Used to load US instructions and constants */
+#define R500_GA_US_VECTOR_INDEX               0x4250
+#	define R500_GA_US_VECTOR_INDEX_SHIFT       0
+#	define R500_GA_US_VECTOR_INDEX_MASK        0x000000ff
+#	define R500_GA_US_VECTOR_INDEX_TYPE_INSTR  (0 << 16)
+#	define R500_GA_US_VECTOR_INDEX_TYPE_CONST  (1 << 16)
+#	define R500_GA_US_VECTOR_INDEX_CLAMP_NO    (0 << 17)
+#	define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17)
+
+/* Data register for loading US instructions and constants */
+#define R500_GA_US_VECTOR_DATA                0x4254
+
+/* Specifies color properties and mappings of textures. */
+#define R500_GA_COLOR_CONTROL_PS3                     0x4258
+#	define R500_TEX0_SHADING_PS3_SOLID       (0 << 0)
+#	define R500_TEX0_SHADING_PS3_FLAT        (1 << 0)
+#	define R500_TEX0_SHADING_PS3_GOURAUD     (2 << 0)
+#	define R500_TEX1_SHADING_PS3_SOLID       (0 << 2)
+#	define R500_TEX1_SHADING_PS3_FLAT        (1 << 2)
+#	define R500_TEX1_SHADING_PS3_GOURAUD     (2 << 2)
+#	define R500_TEX2_SHADING_PS3_SOLID       (0 << 4)
+#	define R500_TEX2_SHADING_PS3_FLAT        (1 << 4)
+#	define R500_TEX2_SHADING_PS3_GOURAUD     (2 << 4)
+#	define R500_TEX3_SHADING_PS3_SOLID       (0 << 6)
+#	define R500_TEX3_SHADING_PS3_FLAT        (1 << 6)
+#	define R500_TEX3_SHADING_PS3_GOURAUD     (2 << 6)
+#	define R500_TEX4_SHADING_PS3_SOLID       (0 << 8)
+#	define R500_TEX4_SHADING_PS3_FLAT        (1 << 8)
+#	define R500_TEX4_SHADING_PS3_GOURAUD     (2 << 8)
+#	define R500_TEX5_SHADING_PS3_SOLID       (0 << 10)
+#	define R500_TEX5_SHADING_PS3_FLAT        (1 << 10)
+#	define R500_TEX5_SHADING_PS3_GOURAUD     (2 << 10)
+#	define R500_TEX6_SHADING_PS3_SOLID       (0 << 12)
+#	define R500_TEX6_SHADING_PS3_FLAT        (1 << 12)
+#	define R500_TEX6_SHADING_PS3_GOURAUD     (2 << 12)
+#	define R500_TEX7_SHADING_PS3_SOLID       (0 << 14)
+#	define R500_TEX7_SHADING_PS3_FLAT        (1 << 14)
+#	define R500_TEX7_SHADING_PS3_GOURAUD     (2 << 14)
+#	define R500_TEX8_SHADING_PS3_SOLID       (0 << 16)
+#	define R500_TEX8_SHADING_PS3_FLAT        (1 << 16)
+#	define R500_TEX8_SHADING_PS3_GOURAUD     (2 << 16)
+#	define R500_TEX9_SHADING_PS3_SOLID       (0 << 18)
+#	define R500_TEX9_SHADING_PS3_FLAT        (1 << 18)
+#	define R500_TEX9_SHADING_PS3_GOURAUD     (2 << 18)
+#	define R500_TEX10_SHADING_PS3_SOLID      (0 << 20)
+#	define R500_TEX10_SHADING_PS3_FLAT       (1 << 20)
+#	define R500_TEX10_SHADING_PS3_GOURAUD    (2 << 20)
+#	define R500_COLOR0_TEX_OVERRIDE_NO       (0 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_0    (1 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_1    (2 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_2    (3 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_3    (4 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_4    (5 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_5    (6 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_6    (7 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_7    (8 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22)
+#	define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22)
+#	define R500_COLOR1_TEX_OVERRIDE_NO       (0 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_0    (1 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_1    (2 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_2    (3 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_3    (4 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_4    (5 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_5    (6 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_6    (7 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_7    (8 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26)
+#	define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26)
+
+/* Returns idle status of various G3D block, captured when GA_IDLE written or
+ * when hard or soft reset asserted.
+ */
+#define R500_GA_IDLE                                  0x425c
+#	define R500_GA_IDLE_PIPE3_Z_IDLE  (0 << 0)
+#	define R500_GA_IDLE_PIPE2_Z_IDLE  (0 << 1)
+#	define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2)
+#	define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3)
+#	define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4)
+#	define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5)
+#	define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6)
+#	define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7)
+#	define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8)
+#	define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9)
+#	define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10)
+#	define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11)
+#	define R500_GA_IDLE_PIPE1_Z_IDLE  (0 << 12)
+#	define R500_GA_IDLE_PIPE0_Z_IDLE  (0 << 13)
+#	define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14)
+#	define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15)
+#	define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16)
+#	define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17)
+#	define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18)
+#	define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19)
+#	define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20)
+#	define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21)
+#	define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22)
+#	define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23)
+#	define R500_GA_IDLE_SU_IDLE       (0 << 24)
+#	define R500_GA_IDLE_GA_IDLE       (0 << 25)
+#	define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26)
+
+/* Current value of stipple accumulator. */
+#define R300_GA_LINE_STIPPLE_VALUE            0x4260
+
+/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S0                               0x4264
+/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S1                               0x4268
+
+/* GA Input fifo high water marks */
+#define R500_GA_FIFO_CNTL                             0x4270
+#	define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK   0x00000007
+#	define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT  0
+#	define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK  0x00000038
+#	define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3
+#	define R500_GA_FIFO_CNTL_VERTEX_REG_MASK    0x00003fc0
+#	define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT   6
+
+/* GA enhance/tweaks */
+#define R300_GA_ENHANCE                               0x4274
+#	define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT   (0 << 0)
+#	define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */
+#	define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT   (0 << 1)
+#	define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE      (1 << 1) /* Enables high-performance register/primitive switching. */
+#	define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT   (0 << 2) /* R520+ only */
+#	define R500_GA_ENHANCE_REG_READWRITE_ENABLE      (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */
+#	define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT     (0 << 3)
+#	define R500_GA_ENHANCE_REG_NOSTALL_ENABLE        (1 << 3) /* Enables GA support of no-stall reads for register read back. */
+
+#define R300_GA_COLOR_CONTROL                   0x4278
+#	define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID      (0 << 0)
+#	define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT       (1 << 0)
+#	define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD    (2 << 0)
+#	define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID    (0 << 2)
+#	define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT     (1 << 2)
+#	define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD  (2 << 2)
+#	define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID      (0 << 4)
+#	define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT       (1 << 4)
+#	define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD    (2 << 4)
+#	define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID    (0 << 6)
+#	define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT     (1 << 6)
+#	define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD  (2 << 6)
+#	define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID      (0 << 8)
+#	define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT       (1 << 8)
+#	define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD    (2 << 8)
+#	define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID    (0 << 10)
+#	define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT     (1 << 10)
+#	define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD  (2 << 10)
+#	define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID      (0 << 12)
+#	define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT       (1 << 12)
+#	define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD    (2 << 12)
+#	define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID    (0 << 14)
+#	define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT     (1 << 14)
+#	define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD  (2 << 14)
+#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST  (0 << 16)
+#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16)
+#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD  (2 << 16)
+#	define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST   (3 << 16)
+
+/** TODO: might be candidate for removal */
+#	define R300_RE_SHADE_MODEL_SMOOTH     ( \
+	R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
+	R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+	R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
+	R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
+/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */
+#	define R300_RE_SHADE_MODEL_FLAT     ( \
+	R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
+	R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+	R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
+	R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
+
+/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_RG                         0x427c
+#	define GA_SOLID_RG_COLOR_GREEN_SHIFT 0
+#	define GA_SOLID_RG_COLOR_GREEN_MASK  0x0000ffff
+#	define GA_SOLID_RG_COLOR_RED_SHIFT   16
+#	define GA_SOLID_RG_COLOR_RED_MASK    0xffff0000
+/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_BA                         0x4280
+#	define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0
+#	define GA_SOLID_BA_COLOR_ALPHA_MASK  0x0000ffff
+#	define GA_SOLID_BA_COLOR_BLUE_SHIFT  16
+#	define GA_SOLID_BA_COLOR_BLUE_MASK   0xffff0000
+
+/* Polygon Mode
+ * Dangerous
+ */
+#define R300_GA_POLY_MODE                             0x4288
+#	define R300_GA_POLY_MODE_DISABLE           (0 << 0)
+#	define R300_GA_POLY_MODE_DUAL              (1 << 0) /* send 2 sets of 3 polys with specified poly type */
+/* reserved */
+#	define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4)
+#	define R300_GA_POLY_MODE_FRONT_PTYPE_LINE  (1 << 4)
+#	define R300_GA_POLY_MODE_FRONT_PTYPE_TRI   (2 << 4)
+/* reserved */
+#	define R300_GA_POLY_MODE_BACK_PTYPE_POINT  (0 << 7)
+#	define R300_GA_POLY_MODE_BACK_PTYPE_LINE   (1 << 7)
+#	define R300_GA_POLY_MODE_BACK_PTYPE_TRI    (2 << 7)
+/* reserved */
+
+/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */
+#define R300_GA_ROUND_MODE                            0x428c
+#	define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC   (0 << 0)
+#	define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0)
+#	define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC      (0 << 2)
+#	define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST    (1 << 2)
+#	define R300_GA_ROUND_MODE_RGB_CLAMP_RGB          (0 << 4)
+#	define R300_GA_ROUND_MODE_RGB_CLAMP_FP20         (1 << 4)
+#	define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB        (0 << 5)
+#	define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20       (1 << 5)
+#	define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT    6
+#	define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK     0x000003c0
+
+/* Specifies x & y offsets for vertex data after conversion to FP.
+ * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b
+ * subprecision).
+ */
+#define R300_GA_OFFSET                                0x4290
+#	define R300_GA_OFFSET_X_OFFSET_SHIFT 0
+#	define R300_GA_OFFSET_X_OFFSET_MASK  0x0000ffff
+#	define R300_GA_OFFSET_Y_OFFSET_SHIFT 16
+#	define R300_GA_OFFSET_Y_OFFSET_MASK  0xffff0000
+
+/* Specifies the scale to apply to fog. */
+#define R300_GA_FOG_SCALE                     0x4294
+/* Specifies the offset to apply to fog. */
+#define R300_GA_FOG_OFFSET                    0x4298
+/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */
+#define R300_GA_SOFT_RESET                    0x429c
+
+/* Not sure why there are duplicate of factor and constant values.
+ * My best guess so far is that there are seperate zbiases for test and write.
+ * Ordering might be wrong.
+ * Some of the tests indicate that fgl has a fallback implementation of zbias
+ * via pixel shaders.
+ */
+#define R300_SU_TEX_WRAP                      0x42A0
+#define R300_SU_POLY_OFFSET_FRONT_SCALE       0x42A4
+#define R300_SU_POLY_OFFSET_FRONT_OFFSET      0x42A8
+#define R300_SU_POLY_OFFSET_BACK_SCALE        0x42AC
+#define R300_SU_POLY_OFFSET_BACK_OFFSET       0x42B0
+
+/* This register needs to be set to (1<<1) for RV350 to correctly
+ * perform depth test (see --vb-triangles in r300_demo)
+ * Don't know about other chips. - Vladimir
+ * This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
+ * My guess is that there are two bits for each zbias primitive
+ * (FILL, LINE, POINT).
+ *  One to enable depth test and one for depth write.
+ * Yet this doesnt explain why depth writes work ...
+ */
+#define R300_SU_POLY_OFFSET_ENABLE	       0x42B4
+#	define R300_FRONT_ENABLE	       (1 << 0)
+#	define R300_BACK_ENABLE 	       (1 << 1)
+#	define R300_PARA_ENABLE 	       (1 << 2)
+
+#define R300_SU_CULL_MODE                      0x42B8
+#       define R300_CULL_FRONT                   (1 << 0)
+#       define R300_CULL_BACK                    (1 << 1)
+#       define R300_FRONT_FACE_CCW               (0 << 2)
+#       define R300_FRONT_FACE_CW                (1 << 2)
+
+/* SU Depth Scale value */
+#define R300_SU_DEPTH_SCALE                 0x42c0
+/* SU Depth Offset value */
+#define R300_SU_DEPTH_OFFSET                0x42c4
+
+#define R300_SU_REG_DEST		    0x42c8
+#	define R300_RASTER_PIPE_SELECT_0	(1 << 0)
+#	define R300_RASTER_PIPE_SELECT_1	(1 << 1)
+#	define R300_RASTER_PIPE_SELECT_2	(1 << 2)
+#	define R300_RASTER_PIPE_SELECT_3	(1 << 3)
+#	define R300_RASTER_PIPE_SELECT_ALL	0xf
+
+
+/* BEGIN: Rasterization / Interpolators - many guesses */
+
+/*
+ * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
+ * on the vertex program, *not* the fragment program)
+ */
+#define R300_RS_COUNT                      0x4300
+#       define R300_IT_COUNT_SHIFT               0
+#       define R300_IT_COUNT_MASK                0x0000007f
+#       define R300_IC_COUNT_SHIFT               7
+#       define R300_IC_COUNT_MASK                0x00000780
+#       define R300_W_ADDR_SHIFT                 12
+#       define R300_W_ADDR_MASK                  0x0003f000
+#       define R300_HIRES_DIS                    (0 << 18)
+#       define R300_HIRES_EN                     (1 << 18)
+
+#define R300_RS_INST_COUNT                       0x4304
+#       define R300_RS_INST_COUNT_SHIFT          0
+#       define R300_RS_INST_COUNT_MASK           0x0000000f
+#       define R300_RS_TX_OFFSET_SHIFT           5
+#	define R300_RS_TX_OFFSET_MASK            0x000000e0
+
+/* gap */
+
+/* Only used for texture coordinates.
+ * Use the source field to route texture coordinate input from the
+ * vertex program to the desired interpolator. Note that the source
+ * field is relative to the outputs the vertex program *actually*
+ * writes. If a vertex program only writes texcoord[1], this will
+ * be source index 0.
+ * Set INTERP_USED on all interpolators that produce data used by
+ * the fragment program. INTERP_USED looks like a swizzling mask,
+ * but I haven't seen it used that way.
+ *
+ * Note: The _UNKNOWN constants are always set in their respective
+ * register. I don't know if this is necessary.
+ */
+#define R300_RS_IP_0				        0x4310
+#define R300_RS_IP_1				        0x4314
+#define R300_RS_IP_2				        0x4318
+#define R300_RS_IP_3				        0x431C
+#       define R300_RS_INTERP_SRC_SHIFT          2 /* TODO: check for removal */
+#       define R300_RS_INTERP_SRC_MASK           (7 << 2) /* TODO: check for removal */
+#	define R300_RS_TEX_PTR(x)		        ((x) << 0)
+#	define R300_RS_COL_PTR(x)		        ((x) << 6)
+#	define R300_RS_COL_FMT(x)		        ((x) << 9)
+#	define R300_RS_COL_FMT_RGBA		        0
+#	define R300_RS_COL_FMT_RGB0		        1
+#	define R300_RS_COL_FMT_RGB1		        2
+#	define R300_RS_COL_FMT_000A		        4
+#	define R300_RS_COL_FMT_0000		        5
+#	define R300_RS_COL_FMT_0001		        6
+#	define R300_RS_COL_FMT_111A		        8
+#	define R300_RS_COL_FMT_1110		        9
+#	define R300_RS_COL_FMT_1111		        10
+#	define R300_RS_SEL_S(x)		                ((x) << 13)
+#	define R300_RS_SEL_T(x)		                ((x) << 16)
+#	define R300_RS_SEL_R(x)		                ((x) << 19)
+#	define R300_RS_SEL_Q(x)		                ((x) << 22)
+#	define R300_RS_SEL_C0		                0
+#	define R300_RS_SEL_C1		                1
+#	define R300_RS_SEL_C2		                2
+#	define R300_RS_SEL_C3		                3
+#	define R300_RS_SEL_K0		                4
+#	define R300_RS_SEL_K1		                5
+
+
+/*  */
+#define R500_RS_INST_0					0x4320
+#define R500_RS_INST_1					0x4324
+#define R500_RS_INST_2					0x4328
+#define R500_RS_INST_3					0x432c
+#define R500_RS_INST_4					0x4330
+#define R500_RS_INST_5					0x4334
+#define R500_RS_INST_6					0x4338
+#define R500_RS_INST_7					0x433c
+#define R500_RS_INST_8					0x4340
+#define R500_RS_INST_9					0x4344
+#define R500_RS_INST_10					0x4348
+#define R500_RS_INST_11					0x434c
+#define R500_RS_INST_12					0x4350
+#define R500_RS_INST_13					0x4354
+#define R500_RS_INST_14					0x4358
+#define R500_RS_INST_15					0x435c
+#define R500_RS_INST_TEX_ID_SHIFT			0
+#define R500_RS_INST_TEX_CN_WRITE			(1 << 4)
+#define R500_RS_INST_TEX_ADDR_SHIFT			5
+#define R500_RS_INST_COL_ID_SHIFT			12
+#define R500_RS_INST_COL_CN_NO_WRITE			(0 << 16)
+#define R500_RS_INST_COL_CN_WRITE			(1 << 16)
+#define R500_RS_INST_COL_CN_WRITE_FBUFFER		(2 << 16)
+#define R500_RS_INST_COL_CN_WRITE_BACKFACE		(3 << 16)
+#define R500_RS_INST_COL_ADDR_SHIFT			18
+#define R500_RS_INST_TEX_ADJ				(1 << 25)
+#define R500_RS_INST_W_CN				(1 << 26)
+#define R500_RS_INST_TEX_ID(x)				((x) << R500_RS_INST_TEX_ID_SHIFT)
+#define R500_RS_INST_TEX_ADDR(x)			((x) << R500_RS_INST_TEX_ADDR_SHIFT)
+#define R500_RS_INST_COL_ID(x)				((x) << R500_RS_INST_COL_ID_SHIFT)
+#define R500_RS_INST_COL_ADDR(x)			((x) << R500_RS_INST_COL_ADDR_SHIFT)
+
+/* These DWORDs control how vertex data is routed into fragment program
+ * registers, after interpolators.
+ */
+#define R300_RS_INST_0                     0x4330
+#define R300_RS_INST_1                     0x4334
+#define R300_RS_INST_2                     0x4338
+#define R300_RS_INST_3                     0x433C /* GUESS */
+#define R300_RS_INST_4                     0x4340 /* GUESS */
+#define R300_RS_INST_5                     0x4344 /* GUESS */
+#define R300_RS_INST_6                     0x4348 /* GUESS */
+#define R300_RS_INST_7                     0x434C /* GUESS */
+#	define R300_RS_INST_TEX_ID(x)  		((x) << 0)
+#	define R300_RS_INST_TEX_CN_WRITE 	(1 << 3)
+#	define R300_RS_INST_TEX_ADDR_SHIFT 	6
+#	define R300_RS_INST_TEX_ADDR(x)		((x) << R300_RS_INST_TEX_ADDR_SHIFT)
+#	define R300_RS_INST_COL_ID(x)		((x) << 11)
+#	define R300_RS_INST_COL_CN_WRITE	(1 << 14)
+#	define R300_RS_INST_COL_ADDR_SHIFT	17
+#	define R300_RS_INST_COL_ADDR(x)		((x) << R300_RS_INST_COL_ADDR_SHIFT)
+#	define R300_RS_INST_TEX_ADJ		(1 << 22)
+#	define R300_RS_COL_BIAS_UNUSED_SHIFT    23
+
+/* END: Rasterization / Interpolators - many guesses */
+
+/* Hierarchical Z Enable */
+#define R300_SC_HYPERZ                   0x43a4
+#	define R300_SC_HYPERZ_DISABLE     (0 << 0)
+#	define R300_SC_HYPERZ_ENABLE      (1 << 0)
+#	define R300_SC_HYPERZ_MIN         (0 << 1)
+#	define R300_SC_HYPERZ_MAX         (1 << 1)
+#	define R300_SC_HYPERZ_ADJ_256     (0 << 2)
+#	define R300_SC_HYPERZ_ADJ_128     (1 << 2)
+#	define R300_SC_HYPERZ_ADJ_64      (2 << 2)
+#	define R300_SC_HYPERZ_ADJ_32      (3 << 2)
+#	define R300_SC_HYPERZ_ADJ_16      (4 << 2)
+#	define R300_SC_HYPERZ_ADJ_8       (5 << 2)
+#	define R300_SC_HYPERZ_ADJ_4       (6 << 2)
+#	define R300_SC_HYPERZ_ADJ_2       (7 << 2)
+#	define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5)
+#	define R300_SC_HYPERZ_HZ_Z0MIN    (1 << 5)
+#	define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6)
+#	define R300_SC_HYPERZ_HZ_Z0MAX    (1 << 6)
+
+#define R300_SC_EDGERULE                 0x43a8
+
+/* BEGIN: Scissors and cliprects */
+
+/* There are four clipping rectangles. Their corner coordinates are inclusive.
+ * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
+ * on whether the pixel is inside cliprects 0-3, respectively. For example,
+ * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
+ * the number 3 (binary 0011).
+ * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
+ * the pixel is rasterized.
+ *
+ * In addition to this, there is a scissors rectangle. Only pixels inside the
+ * scissors rectangle are drawn. (coordinates are inclusive)
+ *
+ * For some reason, the top-left corner of the framebuffer is at (1440, 1440)
+ * for the purpose of clipping and scissors.
+ */
+#define R300_SC_CLIPRECT_TL_0               0x43B0
+#define R300_SC_CLIPRECT_BR_0               0x43B4
+#define R300_SC_CLIPRECT_TL_1               0x43B8
+#define R300_SC_CLIPRECT_BR_1               0x43BC
+#define R300_SC_CLIPRECT_TL_2               0x43C0
+#define R300_SC_CLIPRECT_BR_2               0x43C4
+#define R300_SC_CLIPRECT_TL_3               0x43C8
+#define R300_SC_CLIPRECT_BR_3               0x43CC
+#       define R300_CLIPRECT_OFFSET              1440
+#       define R300_CLIPRECT_MASK                0x1FFF
+#       define R300_CLIPRECT_X_SHIFT             0
+#       define R300_CLIPRECT_X_MASK              (0x1FFF << 0)
+#       define R300_CLIPRECT_Y_SHIFT             13
+#       define R300_CLIPRECT_Y_MASK              (0x1FFF << 13)
+#define R300_SC_CLIP_RULE                   0x43D0
+#       define R300_CLIP_OUT                     (1 << 0)
+#       define R300_CLIP_0                       (1 << 1)
+#       define R300_CLIP_1                       (1 << 2)
+#       define R300_CLIP_10                      (1 << 3)
+#       define R300_CLIP_2                       (1 << 4)
+#       define R300_CLIP_20                      (1 << 5)
+#       define R300_CLIP_21                      (1 << 6)
+#       define R300_CLIP_210                     (1 << 7)
+#       define R300_CLIP_3                       (1 << 8)
+#       define R300_CLIP_30                      (1 << 9)
+#       define R300_CLIP_31                      (1 << 10)
+#       define R300_CLIP_310                     (1 << 11)
+#       define R300_CLIP_32                      (1 << 12)
+#       define R300_CLIP_320                     (1 << 13)
+#       define R300_CLIP_321                     (1 << 14)
+#       define R300_CLIP_3210                    (1 << 15)
+
+/* gap */
+
+#define R300_SC_SCISSORS_TL                 0x43E0
+#define R300_SC_SCISSORS_BR                 0x43E4
+#       define R300_SCISSORS_OFFSET              1440
+#       define R300_SCISSORS_X_SHIFT             0
+#       define R300_SCISSORS_X_MASK              (0x1FFF << 0)
+#       define R300_SCISSORS_Y_SHIFT             13
+#       define R300_SCISSORS_Y_MASK              (0x1FFF << 13)
+
+/* Screen door sample mask */
+#define R300_SC_SCREENDOOR                 0x43e8
+
+/* END: Scissors and cliprects */
+
+/* BEGIN: Texture specification */
+
+/*
+ * The texture specification dwords are grouped by meaning and not by texture
+ * unit. This means that e.g. the offset for texture image unit N is found in
+ * register TX_OFFSET_0 + (4*N)
+ */
+#define R300_TX_FILTER0_0                        0x4400
+#define R300_TX_FILTER0_1                        0x4404
+#define R300_TX_FILTER0_2                        0x4408
+#define R300_TX_FILTER0_3                        0x440c
+#define R300_TX_FILTER0_4                        0x4410
+#define R300_TX_FILTER0_5                        0x4414
+#define R300_TX_FILTER0_6                        0x4418
+#define R300_TX_FILTER0_7                        0x441c
+#define R300_TX_FILTER0_8                        0x4420
+#define R300_TX_FILTER0_9                        0x4424
+#define R300_TX_FILTER0_10                       0x4428
+#define R300_TX_FILTER0_11                       0x442c
+#define R300_TX_FILTER0_12                       0x4430
+#define R300_TX_FILTER0_13                       0x4434
+#define R300_TX_FILTER0_14                       0x4438
+#define R300_TX_FILTER0_15                       0x443c
+#       define R300_TX_REPEAT                    0
+#       define R300_TX_MIRRORED                  1
+#       define R300_TX_CLAMP_TO_EDGE             2
+#	define R300_TX_MIRROR_ONCE_TO_EDGE       3
+#       define R300_TX_CLAMP                     4
+#	define R300_TX_MIRROR_ONCE               5
+#       define R300_TX_CLAMP_TO_BORDER           6
+#	define R300_TX_MIRROR_ONCE_TO_BORDER     7
+#       define R300_TX_WRAP_S_SHIFT              0
+#       define R300_TX_WRAP_S_MASK               (7 << 0)
+#       define R300_TX_WRAP_T_SHIFT              3
+#       define R300_TX_WRAP_T_MASK               (7 << 3)
+#       define R300_TX_WRAP_R_SHIFT              6
+#       define R300_TX_WRAP_R_MASK               (7 << 6)
+#	define R300_TX_MAG_FILTER_4              (0 << 9)
+#       define R300_TX_MAG_FILTER_NEAREST        (1 << 9)
+#       define R300_TX_MAG_FILTER_LINEAR         (2 << 9)
+#       define R300_TX_MAG_FILTER_ANISO          (3 << 9)
+#       define R300_TX_MAG_FILTER_MASK           (3 << 9)
+#       define R300_TX_MIN_FILTER_NEAREST        (1 << 11)
+#       define R300_TX_MIN_FILTER_LINEAR         (2 << 11)
+#	define R300_TX_MIN_FILTER_ANISO          (3 << 11)
+#	define R300_TX_MIN_FILTER_MASK           (3 << 11)
+#	define R300_TX_MIN_FILTER_MIP_NONE       (0 << 13)
+#	define R300_TX_MIN_FILTER_MIP_NEAREST    (1 << 13)
+#	define R300_TX_MIN_FILTER_MIP_LINEAR     (2 << 13)
+#	define R300_TX_MIN_FILTER_MIP_MASK       (3 << 13)
+#	define R300_TX_MAX_ANISO_1_TO_1          (0 << 21)
+#	define R300_TX_MAX_ANISO_2_TO_1          (1 << 21)
+#	define R300_TX_MAX_ANISO_4_TO_1          (2 << 21)
+#	define R300_TX_MAX_ANISO_8_TO_1          (3 << 21)
+#	define R300_TX_MAX_ANISO_16_TO_1         (4 << 21)
+#	define R300_TX_MAX_ANISO_MASK            (7 << 21)
+
+#define R300_TX_FILTER1_0                      0x4440
+#	define R300_CHROMA_KEY_MODE_DISABLE    0
+#	define R300_CHROMA_KEY_FORCE	       1
+#	define R300_CHROMA_KEY_BLEND           2
+#	define R300_MC_ROUND_NORMAL            (0<<2)
+#	define R300_MC_ROUND_MPEG4             (1<<2)
+#	define R300_LOD_BIAS_SHIFT             3
+#	define R300_LOD_BIAS_MASK	       0x1ff8
+#	define R300_EDGE_ANISO_EDGE_DIAG       (0<<13)
+#	define R300_EDGE_ANISO_EDGE_ONLY       (1<<13)
+#	define R300_MC_COORD_TRUNCATE_DISABLE  (0<<14)
+#	define R300_MC_COORD_TRUNCATE_MPEG     (1<<14)
+#	define R300_TX_TRI_PERF_0_8            (0<<15)
+#	define R300_TX_TRI_PERF_1_8            (1<<15)
+#	define R300_TX_TRI_PERF_1_4            (2<<15)
+#	define R300_TX_TRI_PERF_3_8            (3<<15)
+#	define R300_ANISO_THRESHOLD_MASK       (7<<17)
+
+#	define R500_MACRO_SWITCH               (1<<22)
+#	define R500_BORDER_FIX                 (1<<31)
+
+#define R300_TX_SIZE_0                      0x4480
+#       define R300_TX_WIDTHMASK_SHIFT           0
+#       define R300_TX_WIDTHMASK_MASK            (2047 << 0)
+#       define R300_TX_HEIGHTMASK_SHIFT          11
+#       define R300_TX_HEIGHTMASK_MASK           (2047 << 11)
+#	define R300_TX_DEPTHMASK_SHIFT		 22
+#	define R300_TX_DEPTHMASK_MASK		 (0xf << 22)
+#       define R300_TX_MAX_MIP_LEVEL_SHIFT       26
+#       define R300_TX_MAX_MIP_LEVEL_MASK        (0xf << 26)
+#       define R300_TX_SIZE_PROJECTED            (1<<30)
+#       define R300_TX_SIZE_TXPITCH_EN           (1<<31)
+#define R300_TX_FORMAT_0                    0x44C0
+	/* The interpretation of the format word by Wladimir van der Laan */
+	/* The X, Y, Z and W refer to the layout of the components.
+	   They are given meanings as R, G, B and Alpha by the swizzle
+	   specification */
+#	define R300_TX_FORMAT_X8		    0x0
+#	define R500_TX_FORMAT_X1		    0x0 // bit set in format 2
+#	define R300_TX_FORMAT_X16		    0x1
+#	define R500_TX_FORMAT_X1_REV		    0x0 // bit set in format 2
+#	define R300_TX_FORMAT_Y4X4		    0x2
+#	define R300_TX_FORMAT_Y8X8		    0x3
+#	define R300_TX_FORMAT_Y16X16		    0x4
+#	define R300_TX_FORMAT_Z3Y3X2		    0x5
+#	define R300_TX_FORMAT_Z5Y6X5		    0x6
+#	define R300_TX_FORMAT_Z6Y5X5		    0x7
+#	define R300_TX_FORMAT_Z11Y11X10		    0x8
+#	define R300_TX_FORMAT_Z10Y11X11		    0x9
+#	define R300_TX_FORMAT_W4Z4Y4X4		    0xA
+#	define R300_TX_FORMAT_W1Z5Y5X5		    0xB
+#	define R300_TX_FORMAT_W8Z8Y8X8		    0xC
+#	define R300_TX_FORMAT_W2Z10Y10X10	    0xD
+#	define R300_TX_FORMAT_W16Z16Y16X16	    0xE
+#	define R300_TX_FORMAT_DXT1	    	    0xF
+#	define R300_TX_FORMAT_DXT3	    	    0x10
+#	define R300_TX_FORMAT_DXT5	    	    0x11
+#	define R300_TX_FORMAT_D3DMFT_CxV8U8	    0x12     /* no swizzle */
+#	define R300_TX_FORMAT_A8R8G8B8	    	    0x13     /* no swizzle */
+#	define R300_TX_FORMAT_B8G8_B8G8	    	    0x14     /* no swizzle */
+#	define R300_TX_FORMAT_G8R8_G8B8	    	    0x15     /* no swizzle */
+
+	/* These two values are wrong, but they're the only values that
+	 * produce any even vaguely correct results.  Can r300 only do 16-bit
+	 * depth textures?
+	 */
+#	define R300_TX_FORMAT_X24_Y8	    	    0x1e
+#	define R300_TX_FORMAT_X32	    	    0x1e
+
+	/* 0x16 - some 16 bit green format.. ?? */
+#	define R300_TX_FORMAT_3D		   (1 << 25)
+#	define R300_TX_FORMAT_CUBIC_MAP		   (2 << 25)
+
+#	define R300_TX_FORMAT_GAMMA			(1 << 21)
+
+	/* gap */
+	/* Floating point formats */
+	/* Note - hardware supports both 16 and 32 bit floating point */
+#	define R300_TX_FORMAT_FL_I16	    	    0x18
+#	define R300_TX_FORMAT_FL_I16A16	    	    0x19
+#	define R300_TX_FORMAT_FL_R16G16B16A16	    0x1A
+#	define R300_TX_FORMAT_FL_I32	    	    0x1B
+#	define R300_TX_FORMAT_FL_I32A32	    	    0x1C
+#	define R300_TX_FORMAT_FL_R32G32B32A32	    0x1D
+	/* alpha modes, convenience mostly */
+	/* if you have alpha, pick constant appropriate to the
+	   number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */
+# 	define R300_TX_FORMAT_ALPHA_1CH		    0x000
+# 	define R300_TX_FORMAT_ALPHA_2CH		    0x200
+# 	define R300_TX_FORMAT_ALPHA_4CH		    0x600
+# 	define R300_TX_FORMAT_ALPHA_NONE	    0xA00
+	/* Swizzling */
+	/* constants */
+#	define R300_TX_FORMAT_X		0
+#	define R300_TX_FORMAT_Y		1
+#	define R300_TX_FORMAT_Z		2
+#	define R300_TX_FORMAT_W		3
+#	define R300_TX_FORMAT_ZERO	4
+#	define R300_TX_FORMAT_ONE	5
+	/* 2.0*Z, everything above 1.0 is set to 0.0 */
+#	define R300_TX_FORMAT_CUT_Z	6
+	/* 2.0*W, everything above 1.0 is set to 0.0 */
+#	define R300_TX_FORMAT_CUT_W	7
+
+#	define R300_TX_FORMAT_B_SHIFT	18
+#	define R300_TX_FORMAT_G_SHIFT	15
+#	define R300_TX_FORMAT_R_SHIFT	12
+#	define R300_TX_FORMAT_A_SHIFT	9
+	/* Convenience macro to take care of layout and swizzling */
+#	define R300_EASY_TX_FORMAT(B, G, R, A, FMT)	(		\
+		((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT)		\
+		| ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT)	\
+		| ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT)	\
+		| ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT)	\
+		| (R300_TX_FORMAT_##FMT)				\
+		)
+	/* These can be ORed with result of R300_EASY_TX_FORMAT()
+	   We don't really know what they do. Take values from a
+           constant color ? */
+#	define R300_TX_FORMAT_CONST_X		(1<<5)
+#	define R300_TX_FORMAT_CONST_Y		(2<<5)
+#	define R300_TX_FORMAT_CONST_Z		(4<<5)
+#	define R300_TX_FORMAT_CONST_W		(8<<5)
+
+#	define R300_TX_FORMAT_YUV_MODE		0x00800000
+
+#define R300_TX_FORMAT2_0		    0x4500 /* obvious missing in gap */
+#       define R300_TX_PITCHMASK_SHIFT           0
+#       define R300_TX_PITCHMASK_MASK            (2047 << 0)
+#	define R500_TXFORMAT_MSB		 (1 << 14)
+#	define R500_TXWIDTH_BIT11	         (1 << 15)
+#	define R500_TXHEIGHT_BIT11	         (1 << 16)
+#	define R500_POW2FIX2FLT			 (1 << 17)
+#	define R500_SEL_FILTER4_TC0		 (0 << 18)
+#	define R500_SEL_FILTER4_TC1		 (1 << 18)
+#	define R500_SEL_FILTER4_TC2		 (2 << 18)
+#	define R500_SEL_FILTER4_TC3		 (3 << 18)
+
+#define R300_TX_OFFSET_0                    0x4540
+#define R300_TX_OFFSET_1                    0x4544
+#define R300_TX_OFFSET_2                    0x4548
+#define R300_TX_OFFSET_3                    0x454C
+#define R300_TX_OFFSET_4                    0x4550
+#define R300_TX_OFFSET_5                    0x4554
+#define R300_TX_OFFSET_6                    0x4558
+#define R300_TX_OFFSET_7                    0x455C
+	/* BEGIN: Guess from R200 */
+#       define R300_TXO_ENDIAN_NO_SWAP           (0 << 0)
+#       define R300_TXO_ENDIAN_BYTE_SWAP         (1 << 0)
+#       define R300_TXO_ENDIAN_WORD_SWAP         (2 << 0)
+#       define R300_TXO_ENDIAN_HALFDW_SWAP       (3 << 0)
+#       define R300_TXO_MACRO_TILE               (1 << 2)
+#       define R300_TXO_MICRO_TILE_LINEAR        (0 << 3)
+#       define R300_TXO_MICRO_TILE               (1 << 3)
+#       define R300_TXO_MICRO_TILE_SQUARE        (2 << 3)
+#       define R300_TXO_OFFSET_MASK              0xffffffe0
+#       define R300_TXO_OFFSET_SHIFT             5
+	/* END: Guess from R200 */
+
+/* 32 bit chroma key */
+#define R300_TX_CHROMA_KEY_0                      0x4580
+#define R300_TX_CHROMA_KEY_1                      0x4584
+#define R300_TX_CHROMA_KEY_2                      0x4588
+#define R300_TX_CHROMA_KEY_3                      0x458c
+#define R300_TX_CHROMA_KEY_4                      0x4590
+#define R300_TX_CHROMA_KEY_5                      0x4594
+#define R300_TX_CHROMA_KEY_6                      0x4598
+#define R300_TX_CHROMA_KEY_7                      0x459c
+#define R300_TX_CHROMA_KEY_8                      0x45a0
+#define R300_TX_CHROMA_KEY_9                      0x45a4
+#define R300_TX_CHROMA_KEY_10                     0x45a8
+#define R300_TX_CHROMA_KEY_11                     0x45ac
+#define R300_TX_CHROMA_KEY_12                     0x45b0
+#define R300_TX_CHROMA_KEY_13                     0x45b4
+#define R300_TX_CHROMA_KEY_14                     0x45b8
+#define R300_TX_CHROMA_KEY_15                     0x45bc
+/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
+
+/* Border Color */
+#define R300_TX_BORDER_COLOR_0              0x45c0
+#define R300_TX_BORDER_COLOR_1              0x45c4
+#define R300_TX_BORDER_COLOR_2              0x45c8
+#define R300_TX_BORDER_COLOR_3              0x45cc
+#define R300_TX_BORDER_COLOR_4              0x45d0
+#define R300_TX_BORDER_COLOR_5              0x45d4
+#define R300_TX_BORDER_COLOR_6              0x45d8
+#define R300_TX_BORDER_COLOR_7              0x45dc
+#define R300_TX_BORDER_COLOR_8              0x45e0
+#define R300_TX_BORDER_COLOR_9              0x45e4
+#define R300_TX_BORDER_COLOR_10             0x45e8
+#define R300_TX_BORDER_COLOR_11             0x45ec
+#define R300_TX_BORDER_COLOR_12             0x45f0
+#define R300_TX_BORDER_COLOR_13             0x45f4
+#define R300_TX_BORDER_COLOR_14             0x45f8
+#define R300_TX_BORDER_COLOR_15             0x45fc
+
+
+/* END: Texture specification */
+
+/* BEGIN: Fragment program instruction set */
+
+/* Fragment programs are written directly into register space.
+ * There are separate instruction streams for texture instructions and ALU
+ * instructions.
+ * In order to synchronize these streams, the program is divided into up
+ * to 4 nodes. Each node begins with a number of TEX operations, followed
+ * by a number of ALU operations.
+ * The first node can have zero TEX ops, all subsequent nodes must have at
+ * least
+ * one TEX ops.
+ * All nodes must have at least one ALU op.
+ *
+ * The index of the last node is stored in PFS_CNTL_0: A value of 0 means
+ * 1 node, a value of 3 means 4 nodes.
+ * The total amount of instructions is defined in PFS_CNTL_2. The offsets are
+ * offsets into the respective instruction streams, while *_END points to the
+ * last instruction relative to this offset.
+ */
+#define R300_US_CONFIG                      0x4600
+#       define R300_PFS_CNTL_LAST_NODES_SHIFT    0
+#       define R300_PFS_CNTL_LAST_NODES_MASK     (3 << 0)
+#       define R300_PFS_CNTL_FIRST_NODE_HAS_TEX  (1 << 3)
+#define R300_US_PIXSIZE                     0x4604
+/* There is an unshifted value here which has so far always been equal to the
+ * index of the highest used temporary register.
+ */
+#define R300_US_CODE_OFFSET                 0x4608
+#       define R300_PFS_CNTL_ALU_OFFSET_SHIFT    0
+#       define R300_PFS_CNTL_ALU_OFFSET_MASK     (63 << 0)
+#       define R300_PFS_CNTL_ALU_END_SHIFT       6
+#       define R300_PFS_CNTL_ALU_END_MASK        (63 << 6)
+#       define R300_PFS_CNTL_TEX_OFFSET_SHIFT    13
+#       define R300_PFS_CNTL_TEX_OFFSET_MASK     (31 << 13)
+#       define R300_PFS_CNTL_TEX_END_SHIFT       18
+#       define R300_PFS_CNTL_TEX_END_MASK        (31 << 18)
+
+/* gap */
+
+/* Nodes are stored backwards. The last active node is always stored in
+ * PFS_NODE_3.
+ * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
+ * first node is stored in NODE_2, the second node is stored in NODE_3.
+ *
+ * Offsets are relative to the master offset from PFS_CNTL_2.
+ */
+#define R300_US_CODE_ADDR_0                 0x4610
+#define R300_US_CODE_ADDR_1                 0x4614
+#define R300_US_CODE_ADDR_2                 0x4618
+#define R300_US_CODE_ADDR_3                 0x461C
+#       define R300_ALU_START_SHIFT         0
+#       define R300_ALU_START_MASK          (63 << 0)
+#       define R300_ALU_SIZE_SHIFT          6
+#       define R300_ALU_SIZE_MASK           (63 << 6)
+#       define R300_TEX_START_SHIFT         12
+#       define R300_TEX_START_MASK          (31 << 12)
+#       define R300_TEX_SIZE_SHIFT          17
+#       define R300_TEX_SIZE_MASK           (31 << 17)
+#	define R300_RGBA_OUT                (1 << 22)
+#	define R300_W_OUT                   (1 << 23)
+
+/* TEX
+ * As far as I can tell, texture instructions cannot write into output
+ * registers directly. A subsequent ALU instruction is always necessary,
+ * even if it's just MAD o0, r0, 1, 0
+ */
+#define R300_US_TEX_INST_0                  0x4620
+#	define R300_SRC_ADDR_SHIFT          0
+#	define R300_SRC_ADDR_MASK           (31 << 0)
+#	define R300_DST_ADDR_SHIFT          6
+#	define R300_DST_ADDR_MASK           (31 << 6)
+#	define R300_TEX_ID_SHIFT            11
+#       define R300_TEX_ID_MASK             (15 << 11)
+#	define R300_TEX_INST_SHIFT		15
+#		define R300_TEX_OP_NOP	        0
+#		define R300_TEX_OP_LD	        1
+#		define R300_TEX_OP_KIL	        2
+#		define R300_TEX_OP_TXP	        3
+#		define R300_TEX_OP_TXB	        4
+#	define R300_TEX_INST_MASK               (7 << 15)
+
+/* Output format from the unfied shader */
+#define R300_US_OUT_FMT                     0x46A4
+#	define R300_US_OUT_FMT_C4_8         (0 << 0)
+#	define R300_US_OUT_FMT_C4_10        (1 << 0)
+#	define R300_US_OUT_FMT_C4_10_GAMMA  (2 << 0)
+#	define R300_US_OUT_FMT_C_16         (3 << 0)
+#	define R300_US_OUT_FMT_C2_16        (4 << 0)
+#	define R300_US_OUT_FMT_C4_16        (5 << 0)
+#	define R300_US_OUT_FMT_C_16_MPEG    (6 << 0)
+#	define R300_US_OUT_FMT_C2_16_MPEG   (7 << 0)
+#	define R300_US_OUT_FMT_C2_4         (8 << 0)
+#	define R300_US_OUT_FMT_C_3_3_2      (9 << 0)
+#	define R300_US_OUT_FMT_C_6_5_6      (10 << 0)
+#	define R300_US_OUT_FMT_C_11_11_10   (11 << 0)
+#	define R300_US_OUT_FMT_C_10_11_11   (12 << 0)
+#	define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* reserved */
+#	define R300_US_OUT_FMT_UNUSED       (15 << 0)
+#	define R300_US_OUT_FMT_C_16_FP      (16 << 0)
+#	define R300_US_OUT_FMT_C2_16_FP     (17 << 0)
+#	define R300_US_OUT_FMT_C4_16_FP     (18 << 0)
+#	define R300_US_OUT_FMT_C_32_FP      (19 << 0)
+#	define R300_US_OUT_FMT_C2_32_FP     (20 << 0)
+#	define R300_US_OUT_FMT_C4_32_FP     (20 << 0)
+
+/* ALU
+ * The ALU instructions register blocks are enumerated according to the order
+ * in which fglrx. I assume there is space for 64 instructions, since
+ * each block has space for a maximum of 64 DWORDs, and this matches reported
+ * native limits.
+ *
+ * The basic functional block seems to be one MAD for each color and alpha,
+ * and an adder that adds all components after the MUL.
+ *  - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
+ *  - DP4: Use OUTC_DP4, OUTA_DP4
+ *  - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
+ *  - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
+ *  - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1
+ *  - CMP: If ARG2 < 0, return ARG1, else return ARG0
+ *  - FLR: use FRC+MAD
+ *  - XPD: use MAD+MAD
+ *  - SGE, SLT: use MAD+CMP
+ *  - RSQ: use ABS modifier for argument
+ *  - Use OUTC_REPL_ALPHA to write results of an alpha-only operation
+ *    (e.g. RCP) into color register
+ *  - apparently, there's no quick DST operation
+ *  - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
+ *  - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
+ *  - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
+ *
+ * Operand selection
+ * First stage selects three sources from the available registers and
+ * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
+ * fglrx sorts the three source fields: Registers before constants,
+ * lower indices before higher indices; I do not know whether this is
+ * necessary.
+ *
+ * fglrx fills unused sources with "read constant 0"
+ * According to specs, you cannot select more than two different constants.
+ *
+ * Second stage selects the operands from the sources. This is defined in
+ * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
+ * zero and one.
+ * Swizzling and negation happens in this stage, as well.
+ *
+ * Important: Color and alpha seem to be mostly separate, i.e. their sources
+ * selection appears to be fully independent (the register storage is probably
+ * physically split into a color and an alpha section).
+ * However (because of the apparent physical split), there is some interaction
+ * WRT swizzling. If, for example, you want to load an R component into an
+ * Alpha operand, this R component is taken from a *color* source, not from
+ * an alpha source. The corresponding register doesn't even have to appear in
+ * the alpha sources list. (I hope this all makes sense to you)
+ *
+ * Destination selection
+ * The destination register index is in FPI1 (color) and FPI3 (alpha)
+ * together with enable bits.
+ * There are separate enable bits for writing into temporary registers
+ * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_*
+ * /DSTA_OUTPUT). You can write to both at once, or not write at all (the
+ * same index must be used for both).
+ *
+ * Note: There is a special form for LRP
+ *  - Argument order is the same as in ARB_fragment_program.
+ *  - Operation is MAD
+ *  - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
+ *  - Set FPI0/FPI2_SPECIAL_LRP
+ * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
+ */
+#define R300_US_ALU_RGB_ADDR_0                   0x46C0
+#       define R300_ALU_SRC0C_SHIFT             0
+#       define R300_ALU_SRC0C_MASK              (31 << 0)
+#       define R300_ALU_SRC0C_CONST             (1 << 5)
+#       define R300_ALU_SRC1C_SHIFT             6
+#       define R300_ALU_SRC1C_MASK              (31 << 6)
+#       define R300_ALU_SRC1C_CONST             (1 << 11)
+#       define R300_ALU_SRC2C_SHIFT             12
+#       define R300_ALU_SRC2C_MASK              (31 << 12)
+#       define R300_ALU_SRC2C_CONST             (1 << 17)
+#       define R300_ALU_SRC_MASK                0x0003ffff
+#       define R300_ALU_DSTC_SHIFT              18
+#       define R300_ALU_DSTC_MASK               (31 << 18)
+#		define R300_ALU_DSTC_REG_MASK_SHIFT     23
+#       define R300_ALU_DSTC_REG_X              (1 << 23)
+#       define R300_ALU_DSTC_REG_Y              (1 << 24)
+#       define R300_ALU_DSTC_REG_Z              (1 << 25)
+#		define R300_ALU_DSTC_OUTPUT_MASK_SHIFT  26
+#       define R300_ALU_DSTC_OUTPUT_X           (1 << 26)
+#       define R300_ALU_DSTC_OUTPUT_Y           (1 << 27)
+#       define R300_ALU_DSTC_OUTPUT_Z           (1 << 28)
+#       define R300_RGB_TARGET(x)               ((x) << 29)
+
+#define R300_US_ALU_ALPHA_ADDR_0                 0x47C0
+#       define R300_ALU_SRC0A_SHIFT             0
+#       define R300_ALU_SRC0A_MASK              (31 << 0)
+#       define R300_ALU_SRC0A_CONST             (1 << 5)
+#       define R300_ALU_SRC1A_SHIFT             6
+#       define R300_ALU_SRC1A_MASK              (31 << 6)
+#       define R300_ALU_SRC1A_CONST             (1 << 11)
+#       define R300_ALU_SRC2A_SHIFT             12
+#       define R300_ALU_SRC2A_MASK              (31 << 12)
+#       define R300_ALU_SRC2A_CONST             (1 << 17)
+#       define R300_ALU_SRC_MASK                0x0003ffff
+#       define R300_ALU_DSTA_SHIFT              18
+#       define R300_ALU_DSTA_MASK               (31 << 18)
+#       define R300_ALU_DSTA_REG                (1 << 23)
+#       define R300_ALU_DSTA_OUTPUT             (1 << 24)
+#		define R300_ALU_DSTA_DEPTH              (1 << 27)
+#		define R300_ALPHA_TARGET(x)             ((x) << 25)
+
+#define R300_US_ALU_RGB_INST_0                   0x48C0
+#       define R300_ALU_ARGC_SRC0C_XYZ          0
+#       define R300_ALU_ARGC_SRC0C_XXX          1
+#       define R300_ALU_ARGC_SRC0C_YYY          2
+#       define R300_ALU_ARGC_SRC0C_ZZZ          3
+#       define R300_ALU_ARGC_SRC1C_XYZ          4
+#       define R300_ALU_ARGC_SRC1C_XXX          5
+#       define R300_ALU_ARGC_SRC1C_YYY          6
+#       define R300_ALU_ARGC_SRC1C_ZZZ          7
+#       define R300_ALU_ARGC_SRC2C_XYZ          8
+#       define R300_ALU_ARGC_SRC2C_XXX          9
+#       define R300_ALU_ARGC_SRC2C_YYY          10
+#       define R300_ALU_ARGC_SRC2C_ZZZ          11
+#       define R300_ALU_ARGC_SRC0A              12
+#       define R300_ALU_ARGC_SRC1A              13
+#       define R300_ALU_ARGC_SRC2A              14
+#       define R300_ALU_ARGC_SRCP_XYZ           15
+#       define R300_ALU_ARGC_SRCP_XXX           16
+#       define R300_ALU_ARGC_SRCP_YYY           17
+#       define R300_ALU_ARGC_SRCP_ZZZ           18
+#       define R300_ALU_ARGC_SRCP_WWW           19
+#       define R300_ALU_ARGC_ZERO               20
+#       define R300_ALU_ARGC_ONE                21
+#       define R300_ALU_ARGC_HALF               22
+#       define R300_ALU_ARGC_SRC0C_YZX          23
+#       define R300_ALU_ARGC_SRC1C_YZX          24
+#       define R300_ALU_ARGC_SRC2C_YZX          25
+#       define R300_ALU_ARGC_SRC0C_ZXY          26
+#       define R300_ALU_ARGC_SRC1C_ZXY          27
+#       define R300_ALU_ARGC_SRC2C_ZXY          28
+#       define R300_ALU_ARGC_SRC0CA_WZY         29
+#       define R300_ALU_ARGC_SRC1CA_WZY         30
+#       define R300_ALU_ARGC_SRC2CA_WZY         31
+
+#       define R300_ALU_ARG0C_SHIFT             0
+#       define R300_ALU_ARG0C_MASK              (31 << 0)
+#       define R300_ALU_ARG0C_NOP               (0 << 5)
+#       define R300_ALU_ARG0C_NEG               (1 << 5)
+#       define R300_ALU_ARG0C_ABS               (2 << 5)
+#       define R300_ALU_ARG0C_NAB               (3 << 5)
+#       define R300_ALU_ARG1C_SHIFT             7
+#       define R300_ALU_ARG1C_MASK              (31 << 7)
+#       define R300_ALU_ARG1C_NOP               (0 << 12)
+#       define R300_ALU_ARG1C_NEG               (1 << 12)
+#       define R300_ALU_ARG1C_ABS               (2 << 12)
+#       define R300_ALU_ARG1C_NAB               (3 << 12)
+#       define R300_ALU_ARG2C_SHIFT             14
+#       define R300_ALU_ARG2C_MASK              (31 << 14)
+#       define R300_ALU_ARG2C_NOP               (0 << 19)
+#       define R300_ALU_ARG2C_NEG               (1 << 19)
+#       define R300_ALU_ARG2C_ABS               (2 << 19)
+#       define R300_ALU_ARG2C_NAB               (3 << 19)
+#       define R300_ALU_SRCP_1_MINUS_2_SRC0     (0 << 21)
+#       define R300_ALU_SRCP_SRC1_MINUS_SRC0    (1 << 21)
+#       define R300_ALU_SRCP_SRC1_PLUS_SRC0     (2 << 21)
+#       define R300_ALU_SRCP_1_MINUS_SRC0       (3 << 21)
+
+#       define R300_ALU_OUTC_MAD                (0 << 23)
+#       define R300_ALU_OUTC_DP3                (1 << 23)
+#       define R300_ALU_OUTC_DP4                (2 << 23)
+#       define R300_ALU_OUTC_D2A                (3 << 23)
+#       define R300_ALU_OUTC_MIN                (4 << 23)
+#       define R300_ALU_OUTC_MAX                (5 << 23)
+#       define R300_ALU_OUTC_CMPH               (7 << 23)
+#       define R300_ALU_OUTC_CMP                (8 << 23)
+#       define R300_ALU_OUTC_FRC                (9 << 23)
+#       define R300_ALU_OUTC_REPL_ALPHA         (10 << 23)
+
+#       define R300_ALU_OUTC_MOD_NOP            (0 << 27)
+#       define R300_ALU_OUTC_MOD_MUL2           (1 << 27)
+#       define R300_ALU_OUTC_MOD_MUL4           (2 << 27)
+#       define R300_ALU_OUTC_MOD_MUL8           (3 << 27)
+#       define R300_ALU_OUTC_MOD_DIV2           (4 << 27)
+#       define R300_ALU_OUTC_MOD_DIV4           (5 << 27)
+#       define R300_ALU_OUTC_MOD_DIV8           (6 << 27)
+
+#       define R300_ALU_OUTC_CLAMP              (1 << 30)
+#       define R300_ALU_INSERT_NOP              (1 << 31)
+
+#define R300_US_ALU_ALPHA_INST_0                 0x49C0
+#       define R300_ALU_ARGA_SRC0C_X            0
+#       define R300_ALU_ARGA_SRC0C_Y            1
+#       define R300_ALU_ARGA_SRC0C_Z            2
+#       define R300_ALU_ARGA_SRC1C_X            3
+#       define R300_ALU_ARGA_SRC1C_Y            4
+#       define R300_ALU_ARGA_SRC1C_Z            5
+#       define R300_ALU_ARGA_SRC2C_X            6
+#       define R300_ALU_ARGA_SRC2C_Y            7
+#       define R300_ALU_ARGA_SRC2C_Z            8
+#       define R300_ALU_ARGA_SRC0A              9
+#       define R300_ALU_ARGA_SRC1A              10
+#       define R300_ALU_ARGA_SRC2A              11
+#       define R300_ALU_ARGA_SRCP_X             12
+#       define R300_ALU_ARGA_SRCP_Y             13
+#       define R300_ALU_ARGA_SRCP_Z             14
+#       define R300_ALU_ARGA_SRCP_W             15
+
+#       define R300_ALU_ARGA_ZERO               16
+#       define R300_ALU_ARGA_ONE                17
+#       define R300_ALU_ARGA_HALF               18
+#       define R300_ALU_ARG0A_SHIFT             0
+#       define R300_ALU_ARG0A_MASK              (31 << 0)
+#       define R300_ALU_ARG0A_NOP               (0 << 5)
+#       define R300_ALU_ARG0A_NEG               (1 << 5)
+#	define R300_ALU_ARG0A_ABS		 (2 << 5)
+#	define R300_ALU_ARG0A_NAB		 (3 << 5)
+#       define R300_ALU_ARG1A_SHIFT             7
+#       define R300_ALU_ARG1A_MASK              (31 << 7)
+#       define R300_ALU_ARG1A_NOP               (0 << 12)
+#       define R300_ALU_ARG1A_NEG               (1 << 12)
+#	define R300_ALU_ARG1A_ABS		 (2 << 12)
+#	define R300_ALU_ARG1A_NAB		 (3 << 12)
+#       define R300_ALU_ARG2A_SHIFT             14
+#       define R300_ALU_ARG2A_MASK              (31 << 14)
+#       define R300_ALU_ARG2A_NOP               (0 << 19)
+#       define R300_ALU_ARG2A_NEG               (1 << 19)
+#	define R300_ALU_ARG2A_ABS		 (2 << 19)
+#	define R300_ALU_ARG2A_NAB		 (3 << 19)
+#       define R300_ALU_SRCP_1_MINUS_2_SRC0     (0 << 21)
+#       define R300_ALU_SRCP_SRC1_MINUS_SRC0    (1 << 21)
+#       define R300_ALU_SRCP_SRC1_PLUS_SRC0     (2 << 21)
+#       define R300_ALU_SRCP_1_MINUS_SRC0       (3 << 21)
+
+#       define R300_ALU_OUTA_MAD                (0 << 23)
+#       define R300_ALU_OUTA_DP4                (1 << 23)
+#       define R300_ALU_OUTA_MIN                (2 << 23)
+#       define R300_ALU_OUTA_MAX                (3 << 23)
+#       define R300_ALU_OUTA_CND                (5 << 23)
+#       define R300_ALU_OUTA_CMP                (6 << 23)
+#       define R300_ALU_OUTA_FRC                (7 << 23)
+#       define R300_ALU_OUTA_EX2                (8 << 23)
+#       define R300_ALU_OUTA_LG2                (9 << 23)
+#       define R300_ALU_OUTA_RCP                (10 << 23)
+#       define R300_ALU_OUTA_RSQ                (11 << 23)
+
+#       define R300_ALU_OUTA_MOD_NOP            (0 << 27)
+#       define R300_ALU_OUTA_MOD_MUL2           (1 << 27)
+#       define R300_ALU_OUTA_MOD_MUL4           (2 << 27)
+#       define R300_ALU_OUTA_MOD_MUL8           (3 << 27)
+#       define R300_ALU_OUTA_MOD_DIV2           (4 << 27)
+#       define R300_ALU_OUTA_MOD_DIV4           (5 << 27)
+#       define R300_ALU_OUTA_MOD_DIV8           (6 << 27)
+
+#       define R300_ALU_OUTA_CLAMP              (1 << 30)
+/* END: Fragment program instruction set */
+
+/* Fog: Fog Blending Enable */
+#define R300_FG_FOG_BLEND                             0x4bc0
+#       define R300_FG_FOG_BLEND_DISABLE              (0 << 0)
+#       define R300_FG_FOG_BLEND_ENABLE               (1 << 0)
+#	define R300_FG_FOG_BLEND_FN_LINEAR            (0 << 1)
+#	define R300_FG_FOG_BLEND_FN_EXP               (1 << 1)
+#	define R300_FG_FOG_BLEND_FN_EXP2              (2 << 1)
+#	define R300_FG_FOG_BLEND_FN_CONSTANT          (3 << 1)
+#	define R300_FG_FOG_BLEND_FN_MASK              (3 << 1)
+
+/* Fog: Red Component of Fog Color */
+#define R300_FG_FOG_COLOR_R                           0x4bc8
+/* Fog: Green Component of Fog Color */
+#define R300_FG_FOG_COLOR_G                           0x4bcc
+/* Fog: Blue Component of Fog Color */
+#define R300_FG_FOG_COLOR_B                           0x4bd0
+#	define R300_FG_FOG_COLOR_MASK 0x000003ff
+
+/* Fog: Constant Factor for Fog Blending */
+#define R300_FG_FOG_FACTOR                            0x4bc4
+#	define FG_FOG_FACTOR_MASK 0x000003ff
+
+/* Fog: Alpha function */
+#define R300_FG_ALPHA_FUNC                            0x4bd4
+#       define R300_FG_ALPHA_FUNC_VAL_MASK               0x000000ff
+#       define R300_FG_ALPHA_FUNC_NEVER                     (0 << 8)
+#       define R300_FG_ALPHA_FUNC_LESS                      (1 << 8)
+#       define R300_FG_ALPHA_FUNC_EQUAL                     (2 << 8)
+#       define R300_FG_ALPHA_FUNC_LE                        (3 << 8)
+#       define R300_FG_ALPHA_FUNC_GREATER                   (4 << 8)
+#       define R300_FG_ALPHA_FUNC_NOTEQUAL                  (5 << 8)
+#       define R300_FG_ALPHA_FUNC_GE                        (6 << 8)
+#       define R300_FG_ALPHA_FUNC_ALWAYS                    (7 << 8)
+#       define R300_ALPHA_TEST_OP_MASK                      (7 << 8)
+#       define R300_FG_ALPHA_FUNC_DISABLE                   (0 << 11)
+#       define R300_FG_ALPHA_FUNC_ENABLE                    (1 << 11)
+
+#       define R500_FG_ALPHA_FUNC_10BIT                     (0 << 12)
+#       define R500_FG_ALPHA_FUNC_8BIT                      (1 << 12)
+
+#       define R300_FG_ALPHA_FUNC_MASK_DISABLE              (0 << 16)
+#       define R300_FG_ALPHA_FUNC_MASK_ENABLE               (1 << 16)
+#       define R300_FG_ALPHA_FUNC_CFG_2_OF_4                (0 << 17)
+#       define R300_FG_ALPHA_FUNC_CFG_3_OF_6                (1 << 17)
+
+#       define R300_FG_ALPHA_FUNC_DITH_DISABLE              (0 << 20)
+#       define R300_FG_ALPHA_FUNC_DITH_ENABLE               (1 << 20)
+
+#       define R500_FG_ALPHA_FUNC_OFFSET_DISABLE            (0 << 24)
+#       define R500_FG_ALPHA_FUNC_OFFSET_ENABLE             (1 << 24) /* Not supported in R520 */
+#       define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE    (0 << 25)
+#       define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE     (1 << 25)
+
+#       define R500_FG_ALPHA_FUNC_FP16_DISABLE              (0 << 28)
+#       define R500_FG_ALPHA_FUNC_FP16_ENABLE               (1 << 28)
+
+
+/* Fog: Where does the depth come from? */
+#define R300_FG_DEPTH_SRC                  0x4bd8
+#	define R300_FG_DEPTH_SRC_SCAN   (0 << 0)
+#	define R300_FG_DEPTH_SRC_SHADER (1 << 0)
+
+/* Fog: Alpha Compare Value */
+#define R500_FG_ALPHA_VALUE                0x4be0
+#	define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
+
+#define RV530_FG_ZBREG_DEST                 0x4be8
+#	define RV530_FG_ZBREG_DEST_PIPE_SELECT_0             (1 << 0)
+#	define RV530_FG_ZBREG_DEST_PIPE_SELECT_1             (1 << 1)
+#	define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL           (3 << 0)
+
+/* gap */
+
+/* Fragment program parameters in 7.16 floating point */
+#define R300_PFS_PARAM_0_X                  0x4C00
+#define R300_PFS_PARAM_0_Y                  0x4C04
+#define R300_PFS_PARAM_0_Z                  0x4C08
+#define R300_PFS_PARAM_0_W                  0x4C0C
+/* last consts */
+#define R300_PFS_PARAM_31_X                 0x4DF0
+#define R300_PFS_PARAM_31_Y                 0x4DF4
+#define R300_PFS_PARAM_31_Z                 0x4DF8
+#define R300_PFS_PARAM_31_W                 0x4DFC
+
+/* Unpipelined. */
+#define R300_RB3D_CCTL                      0x4e00
+#	define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER                (0 << 5)
+#	define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS               (1 << 5)
+#	define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS               (2 << 5)
+#	define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS               (3 << 5)
+#	define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE                    (0 << 7)
+#	define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE                     (1 << 7)
+#	define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE                  (0 << 9)
+#	define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE                   (1 << 9)
+#	define R300_RB3D_CCTL_CMASK_DISABLE                           (0 << 10)
+#	define R300_RB3D_CCTL_CMASK_ENABLE                            (1 << 10)
+/* reserved */
+#	define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE  (0 << 12)
+#	define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE   (1 << 12)
+#	define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE                (0 << 13)
+#	define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE               (1 << 13)
+#	define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE  (0 << 14)
+#	define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE   (1 << 14)
+
+
+/* Notes:
+ * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in
+ *   the application
+ * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND
+ *    are set to the same
+ *   function (both registers are always set up completely in any case)
+ * - Most blend flags are simply copied from R200 and not tested yet
+ */
+#define R300_RB3D_CBLEND                    0x4E04
+#define R300_RB3D_ABLEND                    0x4E08
+/* the following only appear in CBLEND */
+#       define R300_ALPHA_BLEND_ENABLE         (1 << 0)
+#       define R300_SEPARATE_ALPHA_ENABLE      (1 << 1)
+#       define R300_READ_ENABLE                (1 << 2)
+#       define R300_DISCARD_SRC_PIXELS_DIS     (0 << 3)
+#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0     (1 << 3)
+#       define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0     (2 << 3)
+#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0     (3 << 3)
+#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1     (4 << 3)
+#       define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1     (5 << 3)
+#       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1     (6 << 3)
+
+/* the following are shared between CBLEND and ABLEND */
+#       define R300_FCN_MASK                         (3  << 12)
+#       define R300_COMB_FCN_ADD_CLAMP               (0  << 12)
+#       define R300_COMB_FCN_ADD_NOCLAMP             (1  << 12)
+#       define R300_COMB_FCN_SUB_CLAMP               (2  << 12)
+#       define R300_COMB_FCN_SUB_NOCLAMP             (3  << 12)
+#       define R300_COMB_FCN_MIN                     (4  << 12)
+#       define R300_COMB_FCN_MAX                     (5  << 12)
+#       define R300_COMB_FCN_RSUB_CLAMP              (6  << 12)
+#       define R300_COMB_FCN_RSUB_NOCLAMP            (7  << 12)
+#       define R300_BLEND_GL_ZERO                    (32)
+#       define R300_BLEND_GL_ONE                     (33)
+#       define R300_BLEND_GL_SRC_COLOR               (34)
+#       define R300_BLEND_GL_ONE_MINUS_SRC_COLOR     (35)
+#       define R300_BLEND_GL_DST_COLOR               (36)
+#       define R300_BLEND_GL_ONE_MINUS_DST_COLOR     (37)
+#       define R300_BLEND_GL_SRC_ALPHA               (38)
+#       define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA     (39)
+#       define R300_BLEND_GL_DST_ALPHA               (40)
+#       define R300_BLEND_GL_ONE_MINUS_DST_ALPHA     (41)
+#       define R300_BLEND_GL_SRC_ALPHA_SATURATE      (42)
+#       define R300_BLEND_GL_CONST_COLOR             (43)
+#       define R300_BLEND_GL_ONE_MINUS_CONST_COLOR   (44)
+#       define R300_BLEND_GL_CONST_ALPHA             (45)
+#       define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA   (46)
+#       define R300_BLEND_MASK                       (63)
+#       define R300_SRC_BLEND_SHIFT                  (16)
+#       define R300_DST_BLEND_SHIFT                  (24)
+
+/* Constant color used by the blender. Pipelined through the blender.
+ * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE,
+ * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead.
+ */
+#define R300_RB3D_BLEND_COLOR               0x4E10
+
+
+/* 3D Color Channel Mask. If all the channels used in the current color format
+ * are disabled, then the cb will discard all the incoming quads. Pipelined
+ * through the blender.
+ */
+#define RB3D_COLOR_CHANNEL_MASK                  0x4E0C
+#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0  (1 << 0)
+#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1)
+#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK0   (1 << 2)
+#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3)
+#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1  (1 << 4)
+#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5)
+#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK1   (1 << 6)
+#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7)
+#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2  (1 << 8)
+#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9)
+#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK2   (1 << 10)
+#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11)
+#	define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3  (1 << 12)
+#	define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13)
+#	define RB3D_COLOR_CHANNEL_MASK_RED_MASK3   (1 << 14)
+#	define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15)
+
+/* Clear color that is used when the color mask is set to 00. Unpipelined.
+ * Program this register with a 32-bit value in ARGB8888 or ARGB2101010
+ * formats, ignoring the fields.
+ */
+#define RB3D_COLOR_CLEAR_VALUE                   0x4e14
+
+/* gap */
+
+/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_CLR                     0x4e20
+
+/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_MSK                     0x4e24
+
+/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */
+#define R300_RB3D_COLOROFFSET0              0x4E28
+#       define R300_COLOROFFSET_MASK             0xFFFFFFE0
+/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */
+#define R300_RB3D_COLOROFFSET1              0x4E2C
+/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */
+#define R300_RB3D_COLOROFFSET2              0x4E30
+/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */
+#define R300_RB3D_COLOROFFSET3              0x4E34
+
+/* Color buffer format and tiling control for all the multibuffers and the
+ * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any
+ * of the registers are changed.
+ *
+ * Bit 16: Larger tiles
+ * Bit 17: 4x2 tiles
+ * Bit 18: Extremely weird tile like, but some pixels duplicated?
+ */
+#define R300_RB3D_COLORPITCH0               0x4E38
+#       define R300_COLORPITCH_MASK              0x00003FFE
+#       define R300_COLOR_TILE_DISABLE            (0 << 16)
+#       define R300_COLOR_TILE_ENABLE             (1 << 16)
+#       define R300_COLOR_MICROTILE_DISABLE       (0 << 17)
+#       define R300_COLOR_MICROTILE_ENABLE        (1 << 17)
+#       define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
+#       define R300_COLOR_ENDIAN_NO_SWAP          (0 << 19)
+#       define R300_COLOR_ENDIAN_WORD_SWAP        (1 << 19)
+#       define R300_COLOR_ENDIAN_DWORD_SWAP       (2 << 19)
+#       define R300_COLOR_ENDIAN_HALF_DWORD_SWAP  (3 << 19)
+#	define R500_COLOR_FORMAT_ARGB10101010     (0 << 21)
+#	define R500_COLOR_FORMAT_UV1010           (1 << 21)
+#	define R500_COLOR_FORMAT_CI8              (2 << 21) /* 2D only */
+#	define R300_COLOR_FORMAT_ARGB1555         (3 << 21)
+#       define R300_COLOR_FORMAT_RGB565           (4 << 21)
+#       define R500_COLOR_FORMAT_ARGB2101010      (5 << 21)
+#       define R300_COLOR_FORMAT_ARGB8888         (6 << 21)
+#       define R300_COLOR_FORMAT_ARGB32323232     (7 << 21)
+/* reserved */
+#       define R300_COLOR_FORMAT_I8               (9 << 21)
+#       define R300_COLOR_FORMAT_ARGB16161616     (10 << 21)
+#       define R300_COLOR_FORMAT_VYUY             (11 << 21)
+#       define R300_COLOR_FORMAT_YVYU             (12 << 21)
+#       define R300_COLOR_FORMAT_UV88             (13 << 21)
+#       define R500_COLOR_FORMAT_I10              (14 << 21)
+#       define R300_COLOR_FORMAT_ARGB4444         (15 << 21)
+#define R300_RB3D_COLORPITCH1               0x4E3C
+#define R300_RB3D_COLORPITCH2               0x4E40
+#define R300_RB3D_COLORPITCH3               0x4E44
+
+/* gap */
+
+/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then
+ * a flush or free will not occur upon a write to this register, but a sync
+ * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE
+ * are zero but DC_FINISH is one, then a sync will be sent immediately -- the
+ * cb will not wait for all the previous operations to complete before sending
+ * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to
+ * zero.
+ *
+ * Set to 0A before 3D operations, set to 02 afterwards.
+ */
+#define R300_RB3D_DSTCACHE_CTLSTAT               0x4e4c
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT         (0 << 0)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1       (1 << 0)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D    (2 << 0)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1  (3 << 0)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT          (0 << 2)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1        (1 << 2)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS       (2 << 2)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1     (3 << 2)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL        (0 << 4)
+#	define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL           (1 << 4)
+
+#define R300_RB3D_DITHER_CTL 0x4E50
+#	define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE         (0 << 0)
+#	define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND            (1 << 0)
+#	define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT              (2 << 0)
+/* reserved */
+#	define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE   (0 << 2)
+#	define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND      (1 << 2)
+#	define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT        (2 << 2)
+/* reserved */
+
+/* Resolve buffer destination address. The cache must be empty before changing
+ * this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_OFFSET        0x4e80
+#	define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5
+#	define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before
+ * changing this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_PITCH         0x4e84
+#	define R300_RB3D_AARESOLVE_PITCH_SHIFT 1
+#	define R300_RB3D_AARESOLVE_PITCH_MASK  0x00003ffe /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Control. Unpipelined */
+#define R300_RB3D_AARESOLVE_CTL           0x4e88
+#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL   (0 << 0)
+#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE  (1 << 0)
+#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10      (0 << 1)
+#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22      (1 << 1)
+#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2)
+#	define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2)
+
+
+/* Discard src pixels less than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0
+/* Discard src pixels greater than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24
+#	define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000
+
+/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_ROPCNTL                             0x4e18
+#	define R300_RB3D_ROPCNTL_ROP_ENABLE            0x00000004
+#	define R300_RB3D_ROPCNTL_ROP_MASK              (15 << 8)
+#	define R300_RB3D_ROPCNTL_ROP_SHIFT             8
+
+/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_CLRCMP_FLIPE                        0x4e1c
+
+/* Sets the fifo sizes */
+#define R500_RB3D_FIFO_SIZE                           0x4ef4
+#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL   (0 << 0)
+#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF   (1 << 0)
+#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0)
+#	define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0)
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_AR                   0x4ef8
+#	define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK    0x0000ffff
+#	define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT   0
+#	define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK  0xffff0000
+#	define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_GB                   0x4efc
+#	define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK   0x0000ffff
+#	define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT  0
+#	define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK  0xffff0000
+#	define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16
+
+/* gap */
+/* There seems to be no "write only" setting, so use Z-test = ALWAYS
+ * for this.
+ * Bit (1<<8) is the "test" bit. so plain write is 6  - vd
+ */
+#define R300_ZB_CNTL                             0x4F00
+#	define R300_STENCIL_ENABLE		 (1 << 0)
+#	define R300_Z_ENABLE		         (1 << 1)
+#	define R300_Z_WRITE_ENABLE		 (1 << 2)
+#	define R300_Z_SIGNED_COMPARE		 (1 << 3)
+#	define R300_STENCIL_FRONT_BACK		 (1 << 4)
+#	define R400_ZSIGNED_MAGNITUDE		 (1 << 5)
+#	define R500_STENCIL_REFMASK_FRONT_BACK	 (1 << 6)
+
+#define R300_ZB_ZSTENCILCNTL                   0x4f04
+	/* functions */
+#	define R300_ZS_NEVER			0
+#	define R300_ZS_LESS			1
+#	define R300_ZS_LEQUAL			2
+#	define R300_ZS_EQUAL			3
+#	define R300_ZS_GEQUAL			4
+#	define R300_ZS_GREATER			5
+#	define R300_ZS_NOTEQUAL			6
+#	define R300_ZS_ALWAYS			7
+#       define R300_ZS_MASK                     7
+	/* operations */
+#	define R300_ZS_KEEP			0
+#	define R300_ZS_ZERO			1
+#	define R300_ZS_REPLACE			2
+#	define R300_ZS_INCR			3
+#	define R300_ZS_DECR			4
+#	define R300_ZS_INVERT			5
+#	define R300_ZS_INCR_WRAP		6
+#	define R300_ZS_DECR_WRAP		7
+#	define R300_Z_FUNC_SHIFT		0
+	/* front and back refer to operations done for front
+	   and back faces, i.e. separate stencil function support */
+#	define R300_S_FRONT_FUNC_SHIFT	        3
+#	define R300_S_FRONT_SFAIL_OP_SHIFT	6
+#	define R300_S_FRONT_ZPASS_OP_SHIFT	9
+#	define R300_S_FRONT_ZFAIL_OP_SHIFT      12
+#	define R300_S_BACK_FUNC_SHIFT           15
+#	define R300_S_BACK_SFAIL_OP_SHIFT       18
+#	define R300_S_BACK_ZPASS_OP_SHIFT       21
+#	define R300_S_BACK_ZFAIL_OP_SHIFT       24
+
+#define R300_ZB_STENCILREFMASK                        0x4f08
+#	define R300_STENCILREF_SHIFT       0
+#	define R300_STENCILREF_MASK        0x000000ff
+#	define R300_STENCILMASK_SHIFT      8
+#	define R300_STENCILMASK_MASK       0x0000ff00
+#	define R300_STENCILWRITEMASK_SHIFT 16
+#	define R300_STENCILWRITEMASK_MASK  0x00ff0000
+
+/* gap */
+
+#define R300_ZB_FORMAT                             0x4f10
+#	define R300_DEPTHFORMAT_16BIT_INT_Z   (0 << 0)
+#	define R300_DEPTHFORMAT_16BIT_13E3    (1 << 0)
+#	define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL   (2 << 0)
+/* reserved up to (15 << 0) */
+#	define R300_INVERT_13E3_LEADING_ONES  (0 << 4)
+#	define R300_INVERT_13E3_LEADING_ZEROS (1 << 4)
+
+#define R300_ZB_ZTOP                             0x4F14
+#	define R300_ZTOP_DISABLE                 (0 << 0)
+#	define R300_ZTOP_ENABLE                  (1 << 0)
+
+/* gap */
+
+#define R300_ZB_ZCACHE_CTLSTAT            0x4f18
+#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT      (0 << 0)
+#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0)
+#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT       (0 << 1)
+#       define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE            (1 << 1)
+#       define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE            (0 << 31)
+#       define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY            (1 << 31)
+
+#define R300_ZB_BW_CNTL                     0x4f1c
+#	define R300_HIZ_DISABLE                              (0 << 0)
+#	define R300_HIZ_ENABLE                               (1 << 0)
+#	define R300_HIZ_MIN                                  (0 << 1)
+#	define R300_HIZ_MAX                                  (1 << 1)
+#	define R300_FAST_FILL_DISABLE                        (0 << 2)
+#	define R300_FAST_FILL_ENABLE                         (1 << 2)
+#	define R300_RD_COMP_DISABLE                          (0 << 3)
+#	define R300_RD_COMP_ENABLE                           (1 << 3)
+#	define R300_WR_COMP_DISABLE                          (0 << 4)
+#	define R300_WR_COMP_ENABLE                           (1 << 4)
+#	define R300_ZB_CB_CLEAR_RMW                          (0 << 5)
+#	define R300_ZB_CB_CLEAR_CACHE_LINEAR                 (1 << 5)
+#	define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE   (0 << 6)
+#	define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE    (1 << 6)
+
+#	define R500_ZEQUAL_OPTIMIZE_ENABLE                   (0 << 7)
+#	define R500_ZEQUAL_OPTIMIZE_DISABLE                  (1 << 7)
+#	define R500_SEQUAL_OPTIMIZE_ENABLE                   (0 << 8)
+#	define R500_SEQUAL_OPTIMIZE_DISABLE                  (1 << 8)
+
+#	define R500_BMASK_ENABLE                             (0 << 10)
+#	define R500_BMASK_DISABLE                            (1 << 10)
+#	define R500_HIZ_EQUAL_REJECT_DISABLE                 (0 << 11)
+#	define R500_HIZ_EQUAL_REJECT_ENABLE                  (1 << 11)
+#	define R500_HIZ_FP_EXP_BITS_DISABLE                  (0 << 12)
+#	define R500_HIZ_FP_EXP_BITS_1                        (1 << 12)
+#	define R500_HIZ_FP_EXP_BITS_2                        (2 << 12)
+#	define R500_HIZ_FP_EXP_BITS_3                        (3 << 12)
+#	define R500_HIZ_FP_EXP_BITS_4                        (4 << 12)
+#	define R500_HIZ_FP_EXP_BITS_5                        (5 << 12)
+#	define R500_HIZ_FP_INVERT_LEADING_ONES               (0 << 15)
+#	define R500_HIZ_FP_INVERT_LEADING_ZEROS              (1 << 15)
+#	define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE      (0 << 16)
+#	define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE     (1 << 16)
+#	define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE           (0 << 17)
+#	define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE          (1 << 17)
+#	define R500_PEQ_PACKING_DISABLE                      (0 << 18)
+#	define R500_PEQ_PACKING_ENABLE                       (1 << 18)
+#	define R500_COVERED_PTR_MASKING_DISABLE              (0 << 18)
+#	define R500_COVERED_PTR_MASKING_ENABLE               (1 << 18)
+
+
+/* gap */
+
+/* Z Buffer Address Offset.
+ * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles.
+ */
+#define R300_ZB_DEPTHOFFSET               0x4f20
+
+/* Z Buffer Pitch and Endian Control */
+#define R300_ZB_DEPTHPITCH                0x4f24
+#       define R300_DEPTHPITCH_MASK              0x00003FFC
+#       define R300_DEPTHMACROTILE_DISABLE      (0 << 16)
+#       define R300_DEPTHMACROTILE_ENABLE       (1 << 16)
+#       define R300_DEPTHMICROTILE_LINEAR       (0 << 17)
+#       define R300_DEPTHMICROTILE_TILED        (1 << 17)
+#       define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
+#       define R300_DEPTHENDIAN_NO_SWAP         (0 << 18)
+#       define R300_DEPTHENDIAN_WORD_SWAP       (1 << 18)
+#       define R300_DEPTHENDIAN_DWORD_SWAP      (2 << 18)
+#       define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18)
+
+/* Z Buffer Clear Value */
+#define R300_ZB_DEPTHCLEARVALUE                  0x4f28
+
+#define R300_ZB_ZMASK_OFFSET                     0x4f30
+#define R300_ZB_ZMASK_PITCH                      0x4f34
+#define R300_ZB_ZMASK_WRINDEX                    0x4f38
+#define R300_ZB_ZMASK_DWORD                      0x4f3c
+#define R300_ZB_ZMASK_RDINDEX                    0x4f40
+
+/* Hierarchical Z Memory Offset */
+#define R300_ZB_HIZ_OFFSET                       0x4f44
+
+/* Hierarchical Z Write Index */
+#define R300_ZB_HIZ_WRINDEX                      0x4f48
+
+/* Hierarchical Z Data */
+#define R300_ZB_HIZ_DWORD                        0x4f4c
+
+/* Hierarchical Z Read Index */
+#define R300_ZB_HIZ_RDINDEX                      0x4f50
+
+/* Hierarchical Z Pitch */
+#define R300_ZB_HIZ_PITCH                        0x4f54
+
+/* Z Buffer Z Pass Counter Data */
+#define R300_ZB_ZPASS_DATA                       0x4f58
+
+/* Z Buffer Z Pass Counter Address */
+#define R300_ZB_ZPASS_ADDR                       0x4f5c
+
+/* Depth buffer X and Y coordinate offset */
+#define R300_ZB_DEPTHXY_OFFSET                   0x4f60
+#	define R300_DEPTHX_OFFSET_SHIFT  1
+#	define R300_DEPTHX_OFFSET_MASK   0x000007FE
+#	define R300_DEPTHY_OFFSET_SHIFT  17
+#	define R300_DEPTHY_OFFSET_MASK   0x07FE0000
+
+/* Sets the fifo sizes */
+#define R500_ZB_FIFO_SIZE                        0x4fd0
+#	define R500_OP_FIFO_SIZE_FULL   (0 << 0)
+#	define R500_OP_FIFO_SIZE_HALF   (1 << 0)
+#	define R500_OP_FIFO_SIZE_QUATER (2 << 0)
+#	define R500_OP_FIFO_SIZE_EIGTHS (4 << 0)
+
+/* Stencil Reference Value and Mask for backfacing quads */
+/* R300_ZB_STENCILREFMASK handles front face */
+#define R500_ZB_STENCILREFMASK_BF                0x4fd4
+#	define R500_STENCILREF_SHIFT       0
+#	define R500_STENCILREF_MASK        0x000000ff
+#	define R500_STENCILMASK_SHIFT      8
+#	define R500_STENCILMASK_MASK       0x0000ff00
+#	define R500_STENCILWRITEMASK_SHIFT 16
+#	define R500_STENCILWRITEMASK_MASK  0x00ff0000
+
+/**
+ * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION
+ *
+ * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector
+ * Engine instruction or a Math Engine instruction.
+ */
+
+/*\{*/
+
+enum {
+	/* R3XX */
+	VECTOR_NO_OP			= 0,
+	VE_DOT_PRODUCT			= 1,
+	VE_MULTIPLY			= 2,
+	VE_ADD				= 3,
+	VE_MULTIPLY_ADD			= 4,
+	VE_DISTANCE_VECTOR		= 5,
+	VE_FRACTION			= 6,
+	VE_MAXIMUM			= 7,
+	VE_MINIMUM			= 8,
+	VE_SET_GREATER_THAN_EQUAL	= 9,
+	VE_SET_LESS_THAN		= 10,
+	VE_MULTIPLYX2_ADD		= 11,
+	VE_MULTIPLY_CLAMP		= 12,
+	VE_FLT2FIX_DX			= 13,
+	VE_FLT2FIX_DX_RND		= 14,
+	/* R5XX */
+	VE_PRED_SET_EQ_PUSH		= 15,
+	VE_PRED_SET_GT_PUSH		= 16,
+	VE_PRED_SET_GTE_PUSH		= 17,
+	VE_PRED_SET_NEQ_PUSH		= 18,
+	VE_COND_WRITE_EQ		= 19,
+	VE_COND_WRITE_GT		= 20,
+	VE_COND_WRITE_GTE		= 21,
+	VE_COND_WRITE_NEQ		= 22,
+	VE_COND_MUX_EQ			= 23,
+	VE_COND_MUX_GT			= 24,
+	VE_COND_MUX_GTE			= 25,
+	VE_SET_GREATER_THAN		= 26,
+	VE_SET_EQUAL			= 27,
+	VE_SET_NOT_EQUAL		= 28,
+};
+
+enum {
+	/* R3XX */
+	MATH_NO_OP			= 0,
+	ME_EXP_BASE2_DX			= 1,
+	ME_LOG_BASE2_DX			= 2,
+	ME_EXP_BASEE_FF			= 3,
+	ME_LIGHT_COEFF_DX		= 4,
+	ME_POWER_FUNC_FF		= 5,
+	ME_RECIP_DX			= 6,
+	ME_RECIP_FF			= 7,
+	ME_RECIP_SQRT_DX		= 8,
+	ME_RECIP_SQRT_FF		= 9,
+	ME_MULTIPLY			= 10,
+	ME_EXP_BASE2_FULL_DX		= 11,
+	ME_LOG_BASE2_FULL_DX		= 12,
+	ME_POWER_FUNC_FF_CLAMP_B	= 13,
+	ME_POWER_FUNC_FF_CLAMP_B1	= 14,
+	ME_POWER_FUNC_FF_CLAMP_01	= 15,
+	ME_SIN				= 16,
+	ME_COS				= 17,
+	/* R5XX */
+	ME_LOG_BASE2_IEEE		= 18,
+	ME_RECIP_IEEE			= 19,
+	ME_RECIP_SQRT_IEEE		= 20,
+	ME_PRED_SET_EQ			= 21,
+	ME_PRED_SET_GT			= 22,
+	ME_PRED_SET_GTE			= 23,
+	ME_PRED_SET_NEQ			= 24,
+	ME_PRED_SET_CLR			= 25,
+	ME_PRED_SET_INV			= 26,
+	ME_PRED_SET_POP			= 27,
+	ME_PRED_SET_RESTORE		= 28,
+};
+
+enum {
+	/* R3XX */
+	PVS_MACRO_OP_2CLK_MADD		= 0,
+	PVS_MACRO_OP_2CLK_M2X_ADD	= 1,
+};
+
+enum {
+	PVS_SRC_REG_TEMPORARY		= 0,	/* Intermediate Storage */
+	PVS_SRC_REG_INPUT		= 1,	/* Input Vertex Storage */
+	PVS_SRC_REG_CONSTANT		= 2,	/* Constant State Storage */
+	PVS_SRC_REG_ALT_TEMPORARY	= 3,	/* Alternate Intermediate Storage */
+};
+
+enum {
+	PVS_DST_REG_TEMPORARY		= 0,	/* Intermediate Storage */
+	PVS_DST_REG_A0			= 1,	/* Address Register Storage */
+	PVS_DST_REG_OUT			= 2,	/* Output Memory. Used for all outputs */
+	PVS_DST_REG_OUT_REPL_X		= 3,	/* Output Memory & Replicate X to all channels */
+	PVS_DST_REG_ALT_TEMPORARY	= 4,	/* Alternate Intermediate Storage */
+	PVS_DST_REG_INPUT		= 5,	/* Output Memory & Replicate X to all channels */
+};
+
+enum {
+	PVS_SRC_SELECT_X		= 0,	/* Select X Component */
+	PVS_SRC_SELECT_Y		= 1,	/* Select Y Component */
+	PVS_SRC_SELECT_Z		= 2,	/* Select Z Component */
+	PVS_SRC_SELECT_W		= 3,	/* Select W Component */
+	PVS_SRC_SELECT_FORCE_0		= 4,	/* Force Component to 0.0 */
+	PVS_SRC_SELECT_FORCE_1		= 5,	/* Force Component to 1.0 */
+};
+
+/* PVS Opcode & Destination Operand Description */
+
+enum {
+	PVS_DST_OPCODE_MASK		= 0x3f,
+	PVS_DST_OPCODE_SHIFT		= 0,
+	PVS_DST_MATH_INST_MASK		= 0x1,
+	PVS_DST_MATH_INST_SHIFT		= 6,
+	PVS_DST_MACRO_INST_MASK		= 0x1,
+	PVS_DST_MACRO_INST_SHIFT	= 7,
+	PVS_DST_REG_TYPE_MASK		= 0xf,
+	PVS_DST_REG_TYPE_SHIFT		= 8,
+	PVS_DST_ADDR_MODE_1_MASK	= 0x1,
+	PVS_DST_ADDR_MODE_1_SHIFT	= 12,
+	PVS_DST_OFFSET_MASK		= 0x7f,
+	PVS_DST_OFFSET_SHIFT		= 13,
+	PVS_DST_WE_X_MASK		= 0x1,
+	PVS_DST_WE_X_SHIFT		= 20,
+	PVS_DST_WE_Y_MASK		= 0x1,
+	PVS_DST_WE_Y_SHIFT		= 21,
+	PVS_DST_WE_Z_MASK		= 0x1,
+	PVS_DST_WE_Z_SHIFT		= 22,
+	PVS_DST_WE_W_MASK		= 0x1,
+	PVS_DST_WE_W_SHIFT		= 23,
+	PVS_DST_VE_SAT_MASK		= 0x1,
+	PVS_DST_VE_SAT_SHIFT		= 24,
+	PVS_DST_ME_SAT_MASK		= 0x1,
+	PVS_DST_ME_SAT_SHIFT		= 25,
+	PVS_DST_PRED_ENABLE_MASK	= 0x1,
+	PVS_DST_PRED_ENABLE_SHIFT	= 26,
+	PVS_DST_PRED_SENSE_MASK		= 0x1,
+	PVS_DST_PRED_SENSE_SHIFT	= 27,
+	PVS_DST_DUAL_MATH_OP_MASK	= 0x3,
+	PVS_DST_DUAL_MATH_OP_SHIFT	= 27,
+	PVS_DST_ADDR_SEL_MASK		= 0x3,
+	PVS_DST_ADDR_SEL_SHIFT		= 29,
+	PVS_DST_ADDR_MODE_0_MASK	= 0x1,
+	PVS_DST_ADDR_MODE_0_SHIFT	= 31,
+};
+
+/* PVS Source Operand Description */
+
+enum {
+	PVS_SRC_REG_TYPE_MASK		= 0x3,
+	PVS_SRC_REG_TYPE_SHIFT		= 0,
+	SPARE_0_MASK			= 0x1,
+	SPARE_0_SHIFT			= 2,
+	PVS_SRC_ABS_XYZW_MASK		= 0x1,
+	PVS_SRC_ABS_XYZW_SHIFT		= 3,
+	PVS_SRC_ADDR_MODE_0_MASK	= 0x1,
+	PVS_SRC_ADDR_MODE_0_SHIFT	= 4,
+	PVS_SRC_OFFSET_MASK		= 0xff,
+	PVS_SRC_OFFSET_SHIFT		= 5,
+	PVS_SRC_SWIZZLE_X_MASK		= 0x7,
+	PVS_SRC_SWIZZLE_X_SHIFT		= 13,
+	PVS_SRC_SWIZZLE_Y_MASK		= 0x7,
+	PVS_SRC_SWIZZLE_Y_SHIFT		= 16,
+	PVS_SRC_SWIZZLE_Z_MASK		= 0x7,
+	PVS_SRC_SWIZZLE_Z_SHIFT		= 19,
+	PVS_SRC_SWIZZLE_W_MASK		= 0x7,
+	PVS_SRC_SWIZZLE_W_SHIFT		= 22,
+	PVS_SRC_MODIFIER_X_MASK		= 0x1,
+	PVS_SRC_MODIFIER_X_SHIFT	= 25,
+	PVS_SRC_MODIFIER_Y_MASK		= 0x1,
+	PVS_SRC_MODIFIER_Y_SHIFT	= 26,
+	PVS_SRC_MODIFIER_Z_MASK		= 0x1,
+	PVS_SRC_MODIFIER_Z_SHIFT	= 27,
+	PVS_SRC_MODIFIER_W_MASK		= 0x1,
+	PVS_SRC_MODIFIER_W_SHIFT	= 28,
+	PVS_SRC_ADDR_SEL_MASK		= 0x3,
+	PVS_SRC_ADDR_SEL_SHIFT		= 29,
+	PVS_SRC_ADDR_MODE_1_MASK	= 0x0,
+	PVS_SRC_ADDR_MODE_1_SHIFT	= 32,
+};
+
+
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class)	\
+	 (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT)	\
+	 | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT)	\
+	 | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT)	\
+	 | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT)	\
+	 | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT)	/* X Y Z W */	\
+	 | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate)	\
+	(((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT)				\
+	 | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT)			\
+	 | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT)			\
+	 | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT)			\
+	 | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT)			\
+	 | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT)	/* X Y Z W */				\
+	 | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
+/*\}*/
+
+/* BEGIN: Packet 3 commands */
+
+/* A primitive emission dword. */
+#define R300_PRIM_TYPE_NONE                     (0 << 0)
+#define R300_PRIM_TYPE_POINT                    (1 << 0)
+#define R300_PRIM_TYPE_LINE                     (2 << 0)
+#define R300_PRIM_TYPE_LINE_STRIP               (3 << 0)
+#define R300_PRIM_TYPE_TRI_LIST                 (4 << 0)
+#define R300_PRIM_TYPE_TRI_FAN                  (5 << 0)
+#define R300_PRIM_TYPE_TRI_STRIP                (6 << 0)
+#define R300_PRIM_TYPE_TRI_TYPE2                (7 << 0)
+#define R300_PRIM_TYPE_RECT_LIST                (8 << 0)
+#define R300_PRIM_TYPE_3VRT_POINT_LIST          (9 << 0)
+#define R300_PRIM_TYPE_3VRT_LINE_LIST           (10 << 0)
+	/* GUESS (based on r200) */
+#define R300_PRIM_TYPE_POINT_SPRITES            (11 << 0)
+#define R300_PRIM_TYPE_LINE_LOOP                (12 << 0)
+#define R300_PRIM_TYPE_QUADS                    (13 << 0)
+#define R300_PRIM_TYPE_QUAD_STRIP               (14 << 0)
+#define R300_PRIM_TYPE_POLYGON                  (15 << 0)
+#define R300_PRIM_TYPE_MASK                     0xF
+#define R300_PRIM_WALK_IND                      (1 << 4)
+#define R300_PRIM_WALK_LIST                     (2 << 4)
+#define R300_PRIM_WALK_RING                     (3 << 4)
+#define R300_PRIM_WALK_MASK                     (3 << 4)
+	/* GUESS (based on r200) */
+#define R300_PRIM_COLOR_ORDER_BGRA              (0 << 6)
+#define R300_PRIM_COLOR_ORDER_RGBA              (1 << 6)
+#define R300_PRIM_NUM_VERTICES_SHIFT            16
+#define R300_PRIM_NUM_VERTICES_MASK             0xffff
+
+
+
+/*
+ * The R500 unified shader (US) registers come in banks of 512 each, one
+ * for each instruction slot in the shader.  You can't touch them directly.
+ * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
+ * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
+ * instruction is fully specified.
+ */
+#define R500_US_ALU_ALPHA_INST_0			0xa800
+#   define R500_ALPHA_OP_MAD				0
+#   define R500_ALPHA_OP_DP				1
+#   define R500_ALPHA_OP_MIN				2
+#   define R500_ALPHA_OP_MAX				3
+/* #define R500_ALPHA_OP_RESERVED			4 */
+#   define R500_ALPHA_OP_CND				5
+#   define R500_ALPHA_OP_CMP				6
+#   define R500_ALPHA_OP_FRC				7
+#   define R500_ALPHA_OP_EX2				8
+#   define R500_ALPHA_OP_LN2				9
+#   define R500_ALPHA_OP_RCP				10
+#   define R500_ALPHA_OP_RSQ				11
+#   define R500_ALPHA_OP_SIN				12
+#   define R500_ALPHA_OP_COS				13
+#   define R500_ALPHA_OP_MDH				14
+#   define R500_ALPHA_OP_MDV				15
+#   define R500_ALPHA_ADDRD(x)				((x) << 4)
+#   define R500_ALPHA_ADDRD_REL				(1 << 11)
+#  define R500_ALPHA_SEL_A_SHIFT			12
+#   define R500_ALPHA_SEL_A_SRC0			(0 << 12)
+#   define R500_ALPHA_SEL_A_SRC1			(1 << 12)
+#   define R500_ALPHA_SEL_A_SRC2			(2 << 12)
+#   define R500_ALPHA_SEL_A_SRCP			(3 << 12)
+#   define R500_ALPHA_SWIZ_A_R				(0 << 14)
+#   define R500_ALPHA_SWIZ_A_G				(1 << 14)
+#   define R500_ALPHA_SWIZ_A_B				(2 << 14)
+#   define R500_ALPHA_SWIZ_A_A				(3 << 14)
+#   define R500_ALPHA_SWIZ_A_0				(4 << 14)
+#   define R500_ALPHA_SWIZ_A_HALF			(5 << 14)
+#   define R500_ALPHA_SWIZ_A_1				(6 << 14)
+/* #define R500_ALPHA_SWIZ_A_UNUSED			(7 << 14) */
+#   define R500_ALPHA_MOD_A_NOP				(0 << 17)
+#   define R500_ALPHA_MOD_A_NEG				(1 << 17)
+#   define R500_ALPHA_MOD_A_ABS				(2 << 17)
+#   define R500_ALPHA_MOD_A_NAB				(3 << 17)
+#  define R500_ALPHA_SEL_B_SHIFT			19
+#   define R500_ALPHA_SEL_B_SRC0			(0 << 19)
+#   define R500_ALPHA_SEL_B_SRC1			(1 << 19)
+#   define R500_ALPHA_SEL_B_SRC2			(2 << 19)
+#   define R500_ALPHA_SEL_B_SRCP			(3 << 19)
+#   define R500_ALPHA_SWIZ_B_R				(0 << 21)
+#   define R500_ALPHA_SWIZ_B_G				(1 << 21)
+#   define R500_ALPHA_SWIZ_B_B				(2 << 21)
+#   define R500_ALPHA_SWIZ_B_A				(3 << 21)
+#   define R500_ALPHA_SWIZ_B_0				(4 << 21)
+#   define R500_ALPHA_SWIZ_B_HALF			(5 << 21)
+#   define R500_ALPHA_SWIZ_B_1				(6 << 21)
+/* #define R500_ALPHA_SWIZ_B_UNUSED			(7 << 21) */
+#   define R500_ALPHA_MOD_B_NOP				(0 << 24)
+#   define R500_ALPHA_MOD_B_NEG				(1 << 24)
+#   define R500_ALPHA_MOD_B_ABS				(2 << 24)
+#   define R500_ALPHA_MOD_B_NAB				(3 << 24)
+#   define R500_ALPHA_OMOD_IDENTITY			(0 << 26)
+#   define R500_ALPHA_OMOD_MUL_2			(1 << 26)
+#   define R500_ALPHA_OMOD_MUL_4			(2 << 26)
+#   define R500_ALPHA_OMOD_MUL_8			(3 << 26)
+#   define R500_ALPHA_OMOD_DIV_2			(4 << 26)
+#   define R500_ALPHA_OMOD_DIV_4			(5 << 26)
+#   define R500_ALPHA_OMOD_DIV_8			(6 << 26)
+#   define R500_ALPHA_OMOD_DISABLE			(7 << 26)
+#   define R500_ALPHA_TARGET(x)				((x) << 29)
+#   define R500_ALPHA_W_OMASK				(1 << 31)
+#define R500_US_ALU_ALPHA_ADDR_0			0x9800
+#   define R500_ALPHA_ADDR0(x)				((x) << 0)
+#   define R500_ALPHA_ADDR0_CONST			(1 << 8)
+#   define R500_ALPHA_ADDR0_REL				(1 << 9)
+#   define R500_ALPHA_ADDR1(x)				((x) << 10)
+#   define R500_ALPHA_ADDR1_CONST			(1 << 18)
+#   define R500_ALPHA_ADDR1_REL				(1 << 19)
+#   define R500_ALPHA_ADDR2(x)				((x) << 20)
+#   define R500_ALPHA_ADDR2_CONST			(1 << 28)
+#   define R500_ALPHA_ADDR2_REL				(1 << 29)
+#   define R500_ALPHA_SRCP_OP_1_MINUS_2A0		(0 << 30)
+#   define R500_ALPHA_SRCP_OP_A1_MINUS_A0		(1 << 30)
+#   define R500_ALPHA_SRCP_OP_A1_PLUS_A0		(2 << 30)
+#   define R500_ALPHA_SRCP_OP_1_MINUS_A0		(3 << 30)
+#define R500_US_ALU_RGBA_INST_0				0xb000
+#   define R500_ALU_RGBA_OP_MAD				(0 << 0)
+#   define R500_ALU_RGBA_OP_DP3				(1 << 0)
+#   define R500_ALU_RGBA_OP_DP4				(2 << 0)
+#   define R500_ALU_RGBA_OP_D2A				(3 << 0)
+#   define R500_ALU_RGBA_OP_MIN				(4 << 0)
+#   define R500_ALU_RGBA_OP_MAX				(5 << 0)
+/* #define R500_ALU_RGBA_OP_RESERVED			(6 << 0) */
+#   define R500_ALU_RGBA_OP_CND				(7 << 0)
+#   define R500_ALU_RGBA_OP_CMP				(8 << 0)
+#   define R500_ALU_RGBA_OP_FRC				(9 << 0)
+#   define R500_ALU_RGBA_OP_SOP				(10 << 0)
+#   define R500_ALU_RGBA_OP_MDH				(11 << 0)
+#   define R500_ALU_RGBA_OP_MDV				(12 << 0)
+#   define R500_ALU_RGBA_ADDRD(x)			((x) << 4)
+#   define R500_ALU_RGBA_ADDRD_REL			(1 << 11)
+#  define R500_ALU_RGBA_SEL_C_SHIFT			12
+#   define R500_ALU_RGBA_SEL_C_SRC0			(0 << 12)
+#   define R500_ALU_RGBA_SEL_C_SRC1			(1 << 12)
+#   define R500_ALU_RGBA_SEL_C_SRC2			(2 << 12)
+#   define R500_ALU_RGBA_SEL_C_SRCP			(3 << 12)
+#   define R500_ALU_RGBA_R_SWIZ_R			(0 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_G			(1 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_B			(2 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_A			(3 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_0			(4 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_HALF			(5 << 14)
+#   define R500_ALU_RGBA_R_SWIZ_1			(6 << 14)
+/* #define R500_ALU_RGBA_R_SWIZ_UNUSED			(7 << 14) */
+#   define R500_ALU_RGBA_G_SWIZ_R			(0 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_G			(1 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_B			(2 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_A			(3 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_0			(4 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_HALF			(5 << 17)
+#   define R500_ALU_RGBA_G_SWIZ_1			(6 << 17)
+/* #define R500_ALU_RGBA_G_SWIZ_UNUSED			(7 << 17) */
+#   define R500_ALU_RGBA_B_SWIZ_R			(0 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_G			(1 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_B			(2 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_A			(3 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_0			(4 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_HALF			(5 << 20)
+#   define R500_ALU_RGBA_B_SWIZ_1			(6 << 20)
+/* #define R500_ALU_RGBA_B_SWIZ_UNUSED			(7 << 20) */
+#   define R500_ALU_RGBA_MOD_C_NOP			(0 << 23)
+#   define R500_ALU_RGBA_MOD_C_NEG			(1 << 23)
+#   define R500_ALU_RGBA_MOD_C_ABS			(2 << 23)
+#   define R500_ALU_RGBA_MOD_C_NAB			(3 << 23)
+#  define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT		25
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC0		(0 << 25)
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC1		(1 << 25)
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRC2		(2 << 25)
+#   define R500_ALU_RGBA_ALPHA_SEL_C_SRCP		(3 << 25)
+#   define R500_ALU_RGBA_A_SWIZ_R			(0 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_G			(1 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_B			(2 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_A			(3 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_0			(4 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_HALF			(5 << 27)
+#   define R500_ALU_RGBA_A_SWIZ_1			(6 << 27)
+/* #define R500_ALU_RGBA_A_SWIZ_UNUSED			(7 << 27) */
+#   define R500_ALU_RGBA_ALPHA_MOD_C_NOP		(0 << 30)
+#   define R500_ALU_RGBA_ALPHA_MOD_C_NEG		(1 << 30)
+#   define R500_ALU_RGBA_ALPHA_MOD_C_ABS		(2 << 30)
+#   define R500_ALU_RGBA_ALPHA_MOD_C_NAB		(3 << 30)
+#define R500_US_ALU_RGB_INST_0				0xa000
+#  define R500_ALU_RGB_SEL_A_SHIFT			0
+#   define R500_ALU_RGB_SEL_A_SRC0			(0 << 0)
+#   define R500_ALU_RGB_SEL_A_SRC1			(1 << 0)
+#   define R500_ALU_RGB_SEL_A_SRC2			(2 << 0)
+#   define R500_ALU_RGB_SEL_A_SRCP			(3 << 0)
+#   define R500_ALU_RGB_R_SWIZ_A_R			(0 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_G			(1 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_B			(2 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_A			(3 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_0			(4 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_HALF			(5 << 2)
+#   define R500_ALU_RGB_R_SWIZ_A_1			(6 << 2)
+/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED			(7 << 2) */
+#   define R500_ALU_RGB_G_SWIZ_A_R			(0 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_G			(1 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_B			(2 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_A			(3 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_0			(4 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_HALF			(5 << 5)
+#   define R500_ALU_RGB_G_SWIZ_A_1			(6 << 5)
+/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED			(7 << 5) */
+#   define R500_ALU_RGB_B_SWIZ_A_R			(0 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_G			(1 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_B			(2 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_A			(3 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_0			(4 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_HALF			(5 << 8)
+#   define R500_ALU_RGB_B_SWIZ_A_1			(6 << 8)
+/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED			(7 << 8) */
+#   define R500_ALU_RGB_MOD_A_NOP			(0 << 11)
+#   define R500_ALU_RGB_MOD_A_NEG			(1 << 11)
+#   define R500_ALU_RGB_MOD_A_ABS			(2 << 11)
+#   define R500_ALU_RGB_MOD_A_NAB			(3 << 11)
+#  define R500_ALU_RGB_SEL_B_SHIFT			13
+#   define R500_ALU_RGB_SEL_B_SRC0			(0 << 13)
+#   define R500_ALU_RGB_SEL_B_SRC1			(1 << 13)
+#   define R500_ALU_RGB_SEL_B_SRC2			(2 << 13)
+#   define R500_ALU_RGB_SEL_B_SRCP			(3 << 13)
+#   define R500_ALU_RGB_R_SWIZ_B_R			(0 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_G			(1 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_B			(2 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_A			(3 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_0			(4 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_HALF			(5 << 15)
+#   define R500_ALU_RGB_R_SWIZ_B_1			(6 << 15)
+/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED			(7 << 15) */
+#   define R500_ALU_RGB_G_SWIZ_B_R			(0 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_G			(1 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_B			(2 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_A			(3 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_0			(4 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_HALF			(5 << 18)
+#   define R500_ALU_RGB_G_SWIZ_B_1			(6 << 18)
+/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED			(7 << 18) */
+#   define R500_ALU_RGB_B_SWIZ_B_R			(0 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_G			(1 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_B			(2 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_A			(3 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_0			(4 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_HALF			(5 << 21)
+#   define R500_ALU_RGB_B_SWIZ_B_1			(6 << 21)
+/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED			(7 << 21) */
+#   define R500_ALU_RGB_MOD_B_NOP			(0 << 24)
+#   define R500_ALU_RGB_MOD_B_NEG			(1 << 24)
+#   define R500_ALU_RGB_MOD_B_ABS			(2 << 24)
+#   define R500_ALU_RGB_MOD_B_NAB			(3 << 24)
+#   define R500_ALU_RGB_OMOD_IDENTITY			(0 << 26)
+#   define R500_ALU_RGB_OMOD_MUL_2			(1 << 26)
+#   define R500_ALU_RGB_OMOD_MUL_4			(2 << 26)
+#   define R500_ALU_RGB_OMOD_MUL_8			(3 << 26)
+#   define R500_ALU_RGB_OMOD_DIV_2			(4 << 26)
+#   define R500_ALU_RGB_OMOD_DIV_4			(5 << 26)
+#   define R500_ALU_RGB_OMOD_DIV_8			(6 << 26)
+#   define R500_ALU_RGB_OMOD_DISABLE			(7 << 26)
+#   define R500_ALU_RGB_TARGET(x)			((x) << 29)
+#   define R500_ALU_RGB_WMASK				(1 << 31)
+#define R500_US_ALU_RGB_ADDR_0				0x9000
+#   define R500_RGB_ADDR0(x)				((x) << 0)
+#   define R500_RGB_ADDR0_CONST				(1 << 8)
+#   define R500_RGB_ADDR0_REL				(1 << 9)
+#   define R500_RGB_ADDR1(x)				((x) << 10)
+#   define R500_RGB_ADDR1_CONST				(1 << 18)
+#   define R500_RGB_ADDR1_REL				(1 << 19)
+#   define R500_RGB_ADDR2(x)				((x) << 20)
+#   define R500_RGB_ADDR2_CONST				(1 << 28)
+#   define R500_RGB_ADDR2_REL				(1 << 29)
+#   define R500_RGB_SRCP_OP_1_MINUS_2RGB0		(0 << 30)
+#   define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0		(1 << 30)
+#   define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0		(2 << 30)
+#   define R500_RGB_SRCP_OP_1_MINUS_RGB0		(3 << 30)
+#define R500_US_CMN_INST_0				0xb800
+#  define R500_INST_TYPE_MASK				(3 << 0)
+#   define R500_INST_TYPE_ALU				(0 << 0)
+#   define R500_INST_TYPE_OUT				(1 << 0)
+#   define R500_INST_TYPE_FC				(2 << 0)
+#   define R500_INST_TYPE_TEX				(3 << 0)
+#   define R500_INST_TEX_SEM_WAIT			(1 << 2)
+#   define R500_INST_RGB_PRED_SEL_NONE			(0 << 3)
+#   define R500_INST_RGB_PRED_SEL_RGBA			(1 << 3)
+#   define R500_INST_RGB_PRED_SEL_RRRR			(2 << 3)
+#   define R500_INST_RGB_PRED_SEL_GGGG			(3 << 3)
+#   define R500_INST_RGB_PRED_SEL_BBBB			(4 << 3)
+#   define R500_INST_RGB_PRED_SEL_AAAA			(5 << 3)
+#   define R500_INST_RGB_PRED_INV			(1 << 6)
+#   define R500_INST_WRITE_INACTIVE			(1 << 7)
+#   define R500_INST_LAST				(1 << 8)
+#   define R500_INST_NOP				(1 << 9)
+#   define R500_INST_ALU_WAIT				(1 << 10)
+#   define R500_INST_RGB_WMASK_R			(1 << 11)
+#   define R500_INST_RGB_WMASK_G			(1 << 12)
+#   define R500_INST_RGB_WMASK_B			(1 << 13)
+#   define R500_INST_ALPHA_WMASK			(1 << 14)
+#   define R500_INST_RGB_OMASK_R			(1 << 15)
+#   define R500_INST_RGB_OMASK_G			(1 << 16)
+#   define R500_INST_RGB_OMASK_B			(1 << 17)
+#   define R500_INST_ALPHA_OMASK			(1 << 18)
+#   define R500_INST_RGB_CLAMP				(1 << 19)
+#   define R500_INST_ALPHA_CLAMP			(1 << 20)
+#   define R500_INST_ALU_RESULT_SEL			(1 << 21)
+#   define R500_INST_ALU_RESULT_SEL_RED		(0 << 21)
+#   define R500_INST_ALU_RESULT_SEL_ALPHA		(1 << 21)
+#   define R500_INST_ALPHA_PRED_INV			(1 << 22)
+#   define R500_INST_ALU_RESULT_OP_EQ			(0 << 23)
+#   define R500_INST_ALU_RESULT_OP_LT			(1 << 23)
+#   define R500_INST_ALU_RESULT_OP_GE			(2 << 23)
+#   define R500_INST_ALU_RESULT_OP_NE			(3 << 23)
+#   define R500_INST_ALPHA_PRED_SEL_NONE		(0 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_RGBA		(1 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_RRRR		(2 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_GGGG		(3 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_BBBB		(4 << 25)
+#   define R500_INST_ALPHA_PRED_SEL_AAAA		(5 << 25)
+/* XXX next four are kind of guessed */
+#   define R500_INST_STAT_WE_R				(1 << 28)
+#   define R500_INST_STAT_WE_G				(1 << 29)
+#   define R500_INST_STAT_WE_B				(1 << 30)
+#   define R500_INST_STAT_WE_A				(1 << 31)
+
+/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+#define R500_US_CODE_ADDR				0x4630
+#   define R500_US_CODE_START_ADDR(x)			((x) << 0)
+#   define R500_US_CODE_END_ADDR(x)			((x) << 16)
+#define R500_US_CODE_OFFSET				0x4638
+#   define R500_US_CODE_OFFSET_ADDR(x)			((x) << 0)
+#define R500_US_CODE_RANGE				0x4634
+#   define R500_US_CODE_RANGE_ADDR(x)			((x) << 0)
+#   define R500_US_CODE_RANGE_SIZE(x)			((x) << 16)
+#define R500_US_CONFIG					0x4600
+#   define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO		(1 << 1)
+#define R500_US_FC_ADDR_0				0xa000
+#   define R500_FC_BOOL_ADDR(x)				((x) << 0)
+#   define R500_FC_INT_ADDR(x)				((x) << 8)
+#   define R500_FC_JUMP_ADDR(x)				((x) << 16)
+#   define R500_FC_JUMP_GLOBAL				(1 << 31)
+#define R500_US_FC_BOOL_CONST				0x4620
+#   define R500_FC_KBOOL(x)				(x)
+#define R500_US_FC_CTRL					0x4624
+#   define R500_FC_TEST_EN				(1 << 30)
+#   define R500_FC_FULL_FC_EN				(1 << 31)
+#define R500_US_FC_INST_0				0x9800
+#   define R500_FC_OP_JUMP				(0 << 0)
+#   define R500_FC_OP_LOOP				(1 << 0)
+#   define R500_FC_OP_ENDLOOP				(2 << 0)
+#   define R500_FC_OP_REP				(3 << 0)
+#   define R500_FC_OP_ENDREP				(4 << 0)
+#   define R500_FC_OP_BREAKLOOP				(5 << 0)
+#   define R500_FC_OP_BREAKREP				(6 << 0)
+#   define R500_FC_OP_CONTINUE				(7 << 0)
+#   define R500_FC_B_ELSE				(1 << 4)
+#   define R500_FC_JUMP_ANY				(1 << 5)
+#   define R500_FC_A_OP_NONE				(0 << 6)
+#   define R500_FC_A_OP_POP				(1 << 6)
+#   define R500_FC_A_OP_PUSH				(2 << 6)
+#   define R500_FC_JUMP_FUNC(x)				((x) << 8)
+#   define R500_FC_B_POP_CNT(x)				((x) << 16)
+#   define R500_FC_B_OP0_NONE				(0 << 24)
+#   define R500_FC_B_OP0_DECR				(1 << 24)
+#   define R500_FC_B_OP0_INCR				(2 << 24)
+#   define R500_FC_B_OP1_DECR				(0 << 26)
+#   define R500_FC_B_OP1_NONE				(1 << 26)
+#   define R500_FC_B_OP1_INCR				(2 << 26)
+#   define R500_FC_IGNORE_UNCOVERED			(1 << 28)
+#define R500_US_FC_INT_CONST_0				0x4c00
+#   define R500_FC_INT_CONST_KR(x)			((x) << 0)
+#   define R500_FC_INT_CONST_KG(x)			((x) << 8)
+#   define R500_FC_INT_CONST_KB(x)			((x) << 16)
+/* _0 through _15 */
+#define R500_US_FORMAT0_0				0x4640
+#   define R500_FORMAT_TXWIDTH(x)			((x) << 0)
+#   define R500_FORMAT_TXHEIGHT(x)			((x) << 11)
+#   define R500_FORMAT_TXDEPTH(x)			((x) << 22)
+/* _0 through _3 */
+#define R500_US_OUT_FMT_0				0x46a4
+#   define R500_OUT_FMT_C4_8				(0 << 0)
+#   define R500_OUT_FMT_C4_10				(1 << 0)
+#   define R500_OUT_FMT_C4_10_GAMMA			(2 << 0)
+#   define R500_OUT_FMT_C_16				(3 << 0)
+#   define R500_OUT_FMT_C2_16				(4 << 0)
+#   define R500_OUT_FMT_C4_16				(5 << 0)
+#   define R500_OUT_FMT_C_16_MPEG			(6 << 0)
+#   define R500_OUT_FMT_C2_16_MPEG			(7 << 0)
+#   define R500_OUT_FMT_C2_4				(8 << 0)
+#   define R500_OUT_FMT_C_3_3_2				(9 << 0)
+#   define R500_OUT_FMT_C_6_5_6				(10 << 0)
+#   define R500_OUT_FMT_C_11_11_10			(11 << 0)
+#   define R500_OUT_FMT_C_10_11_11			(12 << 0)
+#   define R500_OUT_FMT_C_2_10_10_10			(13 << 0)
+/* #define R500_OUT_FMT_RESERVED			(14 << 0) */
+#   define R500_OUT_FMT_UNUSED				(15 << 0)
+#   define R500_OUT_FMT_C_16_FP				(16 << 0)
+#   define R500_OUT_FMT_C2_16_FP			(17 << 0)
+#   define R500_OUT_FMT_C4_16_FP			(18 << 0)
+#   define R500_OUT_FMT_C_32_FP				(19 << 0)
+#   define R500_OUT_FMT_C2_32_FP			(20 << 0)
+#   define R500_OUT_FMT_C4_32_FP			(21 << 0)
+#   define R500_C0_SEL_A				(0 << 8)
+#   define R500_C0_SEL_R				(1 << 8)
+#   define R500_C0_SEL_G				(2 << 8)
+#   define R500_C0_SEL_B				(3 << 8)
+#   define R500_C1_SEL_A				(0 << 10)
+#   define R500_C1_SEL_R				(1 << 10)
+#   define R500_C1_SEL_G				(2 << 10)
+#   define R500_C1_SEL_B				(3 << 10)
+#   define R500_C2_SEL_A				(0 << 12)
+#   define R500_C2_SEL_R				(1 << 12)
+#   define R500_C2_SEL_G				(2 << 12)
+#   define R500_C2_SEL_B				(3 << 12)
+#   define R500_C3_SEL_A				(0 << 14)
+#   define R500_C3_SEL_R				(1 << 14)
+#   define R500_C3_SEL_G				(2 << 14)
+#   define R500_C3_SEL_B				(3 << 14)
+#   define R500_OUT_SIGN(x)				((x) << 16)
+#   define R500_ROUND_ADJ				(1 << 20)
+#define R500_US_PIXSIZE					0x4604
+#   define R500_PIX_SIZE(x)				(x)
+#define R500_US_TEX_ADDR_0				0x9800
+#   define R500_TEX_SRC_ADDR(x)				((x) << 0)
+#   define R500_TEX_SRC_ADDR_REL			(1 << 7)
+#   define R500_TEX_SRC_S_SWIZ_R			(0 << 8)
+#   define R500_TEX_SRC_S_SWIZ_G			(1 << 8)
+#   define R500_TEX_SRC_S_SWIZ_B			(2 << 8)
+#   define R500_TEX_SRC_S_SWIZ_A			(3 << 8)
+#   define R500_TEX_SRC_T_SWIZ_R			(0 << 10)
+#   define R500_TEX_SRC_T_SWIZ_G			(1 << 10)
+#   define R500_TEX_SRC_T_SWIZ_B			(2 << 10)
+#   define R500_TEX_SRC_T_SWIZ_A			(3 << 10)
+#   define R500_TEX_SRC_R_SWIZ_R			(0 << 12)
+#   define R500_TEX_SRC_R_SWIZ_G			(1 << 12)
+#   define R500_TEX_SRC_R_SWIZ_B			(2 << 12)
+#   define R500_TEX_SRC_R_SWIZ_A			(3 << 12)
+#   define R500_TEX_SRC_Q_SWIZ_R			(0 << 14)
+#   define R500_TEX_SRC_Q_SWIZ_G			(1 << 14)
+#   define R500_TEX_SRC_Q_SWIZ_B			(2 << 14)
+#   define R500_TEX_SRC_Q_SWIZ_A			(3 << 14)
+#   define R500_TEX_DST_ADDR(x)				((x) << 16)
+#   define R500_TEX_DST_ADDR_REL			(1 << 23)
+#   define R500_TEX_DST_R_SWIZ_R			(0 << 24)
+#   define R500_TEX_DST_R_SWIZ_G			(1 << 24)
+#   define R500_TEX_DST_R_SWIZ_B			(2 << 24)
+#   define R500_TEX_DST_R_SWIZ_A			(3 << 24)
+#   define R500_TEX_DST_G_SWIZ_R			(0 << 26)
+#   define R500_TEX_DST_G_SWIZ_G			(1 << 26)
+#   define R500_TEX_DST_G_SWIZ_B			(2 << 26)
+#   define R500_TEX_DST_G_SWIZ_A			(3 << 26)
+#   define R500_TEX_DST_B_SWIZ_R			(0 << 28)
+#   define R500_TEX_DST_B_SWIZ_G			(1 << 28)
+#   define R500_TEX_DST_B_SWIZ_B			(2 << 28)
+#   define R500_TEX_DST_B_SWIZ_A			(3 << 28)
+#   define R500_TEX_DST_A_SWIZ_R			(0 << 30)
+#   define R500_TEX_DST_A_SWIZ_G			(1 << 30)
+#   define R500_TEX_DST_A_SWIZ_B			(2 << 30)
+#   define R500_TEX_DST_A_SWIZ_A			(3 << 30)
+#define R500_US_TEX_ADDR_DXDY_0				0xa000
+#   define R500_DX_ADDR(x)				((x) << 0)
+#   define R500_DX_ADDR_REL				(1 << 7)
+#   define R500_DX_S_SWIZ_R				(0 << 8)
+#   define R500_DX_S_SWIZ_G				(1 << 8)
+#   define R500_DX_S_SWIZ_B				(2 << 8)
+#   define R500_DX_S_SWIZ_A				(3 << 8)
+#   define R500_DX_T_SWIZ_R				(0 << 10)
+#   define R500_DX_T_SWIZ_G				(1 << 10)
+#   define R500_DX_T_SWIZ_B				(2 << 10)
+#   define R500_DX_T_SWIZ_A				(3 << 10)
+#   define R500_DX_R_SWIZ_R				(0 << 12)
+#   define R500_DX_R_SWIZ_G				(1 << 12)
+#   define R500_DX_R_SWIZ_B				(2 << 12)
+#   define R500_DX_R_SWIZ_A				(3 << 12)
+#   define R500_DX_Q_SWIZ_R				(0 << 14)
+#   define R500_DX_Q_SWIZ_G				(1 << 14)
+#   define R500_DX_Q_SWIZ_B				(2 << 14)
+#   define R500_DX_Q_SWIZ_A				(3 << 14)
+#   define R500_DY_ADDR(x)				((x) << 16)
+#   define R500_DY_ADDR_REL				(1 << 17)
+#   define R500_DY_S_SWIZ_R				(0 << 24)
+#   define R500_DY_S_SWIZ_G				(1 << 24)
+#   define R500_DY_S_SWIZ_B				(2 << 24)
+#   define R500_DY_S_SWIZ_A				(3 << 24)
+#   define R500_DY_T_SWIZ_R				(0 << 26)
+#   define R500_DY_T_SWIZ_G				(1 << 26)
+#   define R500_DY_T_SWIZ_B				(2 << 26)
+#   define R500_DY_T_SWIZ_A				(3 << 26)
+#   define R500_DY_R_SWIZ_R				(0 << 28)
+#   define R500_DY_R_SWIZ_G				(1 << 28)
+#   define R500_DY_R_SWIZ_B				(2 << 28)
+#   define R500_DY_R_SWIZ_A				(3 << 28)
+#   define R500_DY_Q_SWIZ_R				(0 << 30)
+#   define R500_DY_Q_SWIZ_G				(1 << 30)
+#   define R500_DY_Q_SWIZ_B				(2 << 30)
+#   define R500_DY_Q_SWIZ_A				(3 << 30)
+#define R500_US_TEX_INST_0				0x9000
+#   define R500_TEX_ID(x)				((x) << 16)
+#   define R500_TEX_INST_NOP				(0 << 22)
+#   define R500_TEX_INST_LD				(1 << 22)
+#   define R500_TEX_INST_TEXKILL			(2 << 22)
+#   define R500_TEX_INST_PROJ				(3 << 22)
+#   define R500_TEX_INST_LODBIAS			(4 << 22)
+#   define R500_TEX_INST_LOD				(5 << 22)
+#   define R500_TEX_INST_DXDY				(6 << 22)
+#   define R500_TEX_SEM_ACQUIRE				(1 << 25)
+#   define R500_TEX_IGNORE_UNCOVERED			(1 << 26)
+#   define R500_TEX_UNSCALED				(1 << 27)
+#define R300_US_W_FMT					0x46b4
+#   define R300_W_FMT_W0				(0 << 0)
+#   define R300_W_FMT_W24				(1 << 0)
+#   define R300_W_FMT_W24FP				(2 << 0)
+#   define R300_W_SRC_US				(0 << 2)
+#   define R300_W_SRC_RAS				(1 << 2)
+
+
+/* Packet0 field ordering to write all values to the same reg */
+#define RADEON_ONE_REG_WR        (1 << 15)
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
+ * Two parameter dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ */
+#define R300_PACKET3_3D_DRAW_VBUF           0x00002800
+
+/* Draw a primitive from immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_IMMD           0x00002900
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and
+ * immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_INDX           0x00002A00
+
+
+/* Specify the full set of vertex arrays as (address, stride).
+ * The first parameter is the number of vertex arrays specified.
+ * The rest of the command is a variable length list of blocks, where
+ * each block is three dwords long and specifies two arrays.
+ * The first dword of a block is split into two words, the lower significant
+ * word refers to the first array, the more significant word to the second
+ * array in the block.
+ * The low byte of each word contains the size of an array entry in dwords,
+ * the high byte contains the stride of the array.
+ * The second dword of a block contains the pointer to the first array,
+ * the third dword of a block contains the pointer to the second array.
+ * Note that if the total number of arrays is odd, the third dword of
+ * the last block is omitted.
+ */
+#define R300_PACKET3_3D_LOAD_VBPNTR         0x00002F00
+
+#define R300_PACKET3_INDX_BUFFER            0x00003300
+#    define R300_INDX_BUFFER_DST_SHIFT          0
+#    define R300_INDX_BUFFER_SKIP_SHIFT         16
+#    define R300_INDX_BUFFER_ONE_REG_WR		(1<<31)
+
+/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_VBUF_2         0x00003400
+/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_IMMD_2         0x00003500
+/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_INDX_2         0x00003600
+
+/* Clears a portion of hierachical Z RAM
+ * 3 dword parameters
+ * 0. START
+ * 1. COUNT: 13:0 (max is 0x3FFF)
+ * 2. CLEAR_VALUE: Value to write into HIZ RAM.
+ */
+#define R300_PACKET3_3D_CLEAR_HIZ           0x00003700
+
+/* Draws a set of primitives using vertex buffers pointed by the state data.
+ * At least 2 Parameters:
+ * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword.
+ * 2 to end: Data or indices (see other 3D_DRAW_* packets for details)
+ */
+#define R300_PACKET3_3D_DRAW_128            0x00003900
+
+/* END: Packet 3 commands */
+
+
+/* Color formats for 2d packets
+ */
+#define R300_CP_COLOR_FORMAT_CI8	2
+#define R300_CP_COLOR_FORMAT_ARGB1555	3
+#define R300_CP_COLOR_FORMAT_RGB565	4
+#define R300_CP_COLOR_FORMAT_ARGB8888	6
+#define R300_CP_COLOR_FORMAT_RGB332	7
+#define R300_CP_COLOR_FORMAT_RGB8	9
+#define R300_CP_COLOR_FORMAT_ARGB4444	15
+
+/*
+ * CP type-3 packets
+ */
+#define R300_CP_CMD_BITBLT_MULTI	0xC0009B00
+
+#endif /* _R300_REG_H */
+
+/* *INDENT-ON* */
+
+/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
new file mode 100644
index 0000000000..bb8f91491f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -0,0 +1,505 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \brief R300 Render (Vertex Buffer Implementation)
+ *
+ * The immediate implementation has been removed from CVS in favor of the vertex
+ * buffer implementation.
+ *
+ * The render functions are called by the pipeline manager to render a batch of
+ * primitives. They return TRUE to pass on to the next stage (i.e. software
+ * rasterization) or FALSE to indicate that the pipeline has finished after
+ * rendering something.
+ *
+ * When falling back to software TCL still attempt to use hardware
+ * rasterization.
+ *
+ * I am not sure that the cache related registers are setup correctly, but
+ * obviously this does work... Further investigation is needed.
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ *
+ * \todo Add immediate implementation back? Perhaps this is useful if there are
+ * no bugs...
+ */
+
+#include "r300_render.h"
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+#include "vbo/vbo_split.h"
+#include "r300_context.h"
+#include "r300_state.h"
+#include "r300_reg.h"
+#include "r300_emit.h"
+#include "r300_swtcl.h"
+
+/**
+ * \brief Convert a OpenGL primitive type into a R300 primitive type.
+ */
+int r300PrimitiveType(r300ContextPtr rmesa, int prim)
+{
+	switch (prim & PRIM_MODE_MASK) {
+	case GL_POINTS:
+		return R300_VAP_VF_CNTL__PRIM_POINTS;
+		break;
+	case GL_LINES:
+		return R300_VAP_VF_CNTL__PRIM_LINES;
+		break;
+	case GL_LINE_STRIP:
+		return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
+		break;
+	case GL_LINE_LOOP:
+		return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
+		break;
+	case GL_TRIANGLES:
+		return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
+		break;
+	case GL_TRIANGLE_STRIP:
+		return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
+		break;
+	case GL_TRIANGLE_FAN:
+		return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
+		break;
+	case GL_QUADS:
+		return R300_VAP_VF_CNTL__PRIM_QUADS;
+		break;
+	case GL_QUAD_STRIP:
+		return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
+		break;
+	case GL_POLYGON:
+		return R300_VAP_VF_CNTL__PRIM_POLYGON;
+		break;
+	default:
+		assert(0);
+		return -1;
+		break;
+	}
+}
+
+int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
+{
+	int verts_off = 0;
+
+	switch (prim & PRIM_MODE_MASK) {
+	case GL_POINTS:
+		verts_off = 0;
+		break;
+	case GL_LINES:
+		verts_off = num_verts % 2;
+		break;
+	case GL_LINE_STRIP:
+		if (num_verts < 2)
+			verts_off = num_verts;
+		break;
+	case GL_LINE_LOOP:
+		if (num_verts < 2)
+			verts_off = num_verts;
+		break;
+	case GL_TRIANGLES:
+		verts_off = num_verts % 3;
+		break;
+	case GL_TRIANGLE_STRIP:
+		if (num_verts < 3)
+			verts_off = num_verts;
+		break;
+	case GL_TRIANGLE_FAN:
+		if (num_verts < 3)
+			verts_off = num_verts;
+		break;
+	case GL_QUADS:
+		verts_off = num_verts % 4;
+		break;
+	case GL_QUAD_STRIP:
+		if (num_verts < 4)
+			verts_off = num_verts;
+		else
+			verts_off = num_verts % 2;
+		break;
+	case GL_POLYGON:
+		if (num_verts < 3)
+			verts_off = num_verts;
+		break;
+	default:
+		assert(0);
+		return -1;
+		break;
+	}
+
+	return num_verts - verts_off;
+}
+
+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+	int size;
+
+	/* offset is in indices */
+	BEGIN_BATCH(10);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+	if (rmesa->ind_buf.is_32bit) {
+		/* convert to bytes */
+		offset *= 4;
+		size = vertex_count;
+		OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+		  (vertex_count << 16) | type |
+		  R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+	} else {
+		/* convert to bytes */
+		offset *= 2;
+		size = (vertex_count + 1) >> 1;
+		OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+		   (vertex_count << 16) | type);
+	}
+
+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
+		OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+		OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+				 (R300_VAP_PORT_IDX0 >> 2));
+		OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+		OUT_BATCH(size);
+	} else {
+		OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+		OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+				 (R300_VAP_PORT_IDX0 >> 2));
+		OUT_BATCH(rmesa->ind_buf.bo_offset + offset);
+		OUT_BATCH(size);
+		radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+				      rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
+	}
+	END_BATCH();
+}
+
+static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+	uint32_t voffset;
+	int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
+			offset);
+
+	if (!rmesa->radeon.radeonScreen->kernel_mm) {
+		BEGIN_BATCH(sz+2+(nr * 2));
+		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+		OUT_BATCH(nr);
+
+		for (i = 0; i + 1 < nr; i += 2) {
+			OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+				  (rmesa->radeon.tcl.aos[i].stride << 8) |
+				  (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+				  (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+			voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+			OUT_BATCH_RELOC(voffset,
+					rmesa->radeon.tcl.aos[i].bo,
+					voffset,
+					RADEON_GEM_DOMAIN_GTT,
+					0, 0);
+			voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+			  offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+			OUT_BATCH_RELOC(voffset,
+					rmesa->radeon.tcl.aos[i+1].bo,
+					voffset,
+					RADEON_GEM_DOMAIN_GTT,
+					0, 0);
+		}
+
+		if (nr & 1) {
+			OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+				  (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+			voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+			OUT_BATCH_RELOC(voffset,
+					rmesa->radeon.tcl.aos[nr - 1].bo,
+					voffset,
+					RADEON_GEM_DOMAIN_GTT,
+					0, 0);
+		}
+		END_BATCH();
+	} else {
+
+		BEGIN_BATCH(sz+2+(nr * 2));
+		OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+		OUT_BATCH(nr);
+
+		for (i = 0; i + 1 < nr; i += 2) {
+			OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+				  (rmesa->radeon.tcl.aos[i].stride << 8) |
+				  (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+				  (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+			voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+			OUT_BATCH(voffset);
+			voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+			OUT_BATCH(voffset);
+		}
+
+		if (nr & 1) {
+			OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+			  (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+			voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+			OUT_BATCH(voffset);
+		}
+		for (i = 0; i + 1 < nr; i += 2) {
+			voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+					      rmesa->radeon.tcl.aos[i+0].bo,
+					      RADEON_GEM_DOMAIN_GTT,
+					      0, 0);
+			voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+					      rmesa->radeon.tcl.aos[i+1].bo,
+					      RADEON_GEM_DOMAIN_GTT,
+					      0, 0);
+		}
+		if (nr & 1) {
+			voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+				offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+			radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+					      rmesa->radeon.tcl.aos[nr-1].bo,
+					      RADEON_GEM_DOMAIN_GTT,
+					      0, 0);
+		}
+		END_BATCH();
+	}
+
+}
+
+static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+
+        r300_emit_scissor(rmesa->radeon.glCtx);
+	BEGIN_BATCH(3);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
+	END_BATCH();
+}
+
+void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	BATCH_LOCALS(&rmesa->radeon);
+	int type, num_verts;
+
+	type = r300PrimitiveType(rmesa, prim);
+	num_verts = r300NumVerts(rmesa, end - start, prim);
+
+	if (type < 0 || num_verts <= 0)
+		return;
+
+	if (rmesa->ind_buf.bo) {
+		GLuint first, incr, offset = 0;
+
+		if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+			num_verts > 65500) {
+			WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
+			return;
+		}
+
+
+		r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
+		if (rmesa->radeon.radeonScreen->kernel_mm) {
+			BEGIN_BATCH_NO_AUTOSTATE(2);
+			OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
+			OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
+			END_BATCH();
+		}
+
+		r300_emit_scissor(rmesa->radeon.glCtx);
+		while (num_verts > 0) {
+			int nr;
+			int align;
+
+			nr = MIN2(num_verts, 65535);
+			nr -= (nr - first) % incr;
+
+			/* get alignment for IB correct */
+			if (nr != num_verts) {
+				do {
+				    align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2);
+				    if (align % 4)
+					nr -= incr;
+				} while(align % 4);
+				if (nr <= 0) {
+					WARN_ONCE("did the impossible happen? we never aligned nr to dword\n");
+					return;
+				}
+					
+			}
+			r300FireEB(rmesa, nr, type, offset);
+
+			num_verts -= nr;
+			offset += nr;
+		}
+
+	} else {
+		GLuint first, incr, offset = 0;
+
+		if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+			num_verts > 65535) {
+			WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
+			return;
+		}
+
+		if (rmesa->radeon.radeonScreen->kernel_mm) {
+			BEGIN_BATCH_NO_AUTOSTATE(2);
+			OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
+			OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
+			END_BATCH();
+		}
+
+		r300_emit_scissor(rmesa->radeon.glCtx);
+		while (num_verts > 0) {
+			int nr;
+			nr = MIN2(num_verts, 65535);
+			nr -= (nr - first) % incr;
+			r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset);
+			r300FireAOS(rmesa, nr, type);
+			num_verts -= nr;
+			offset += nr;
+		}
+	}
+	COMMIT_BATCH();
+}
+
+static const char *getFallbackString(r300ContextPtr rmesa, uint32_t bit)
+{
+	static char common_fallback_str[32];
+	switch (bit) {
+		case R300_FALLBACK_VERTEX_PROGRAM :
+			return "vertex program";
+		case R300_FALLBACK_LINE_SMOOTH:
+			return "smooth lines";
+		case R300_FALLBACK_POINT_SMOOTH:
+			return "smooth points";
+		case R300_FALLBACK_POLYGON_SMOOTH:
+			return "smooth polygons";
+		case R300_FALLBACK_LINE_STIPPLE:
+			return "line stipple";
+		case R300_FALLBACK_POLYGON_STIPPLE:
+			return "polygon stipple";
+		case R300_FALLBACK_STENCIL_TWOSIDE:
+			return "two-sided stencil";
+		case R300_FALLBACK_RENDER_MODE:
+			return "render mode != GL_RENDER";
+		case R300_FALLBACK_FRAGMENT_PROGRAM:
+			return "fragment program";
+		case R300_FALLBACK_RADEON_COMMON:
+			snprintf(common_fallback_str, 32, "radeon common 0x%08x", rmesa->radeon.Fallback);
+			return common_fallback_str;
+		case R300_FALLBACK_AOS_LIMIT:
+			return "aos limit";
+		case R300_FALLBACK_INVALID_BUFFERS:
+			return "invalid buffers";
+		default:
+			return "unknown";
+	}
+}
+
+void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
+{
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	uint32_t old_fallback = rmesa->fallback;
+	static uint32_t fallback_warn = 0;
+
+	if (mode) {
+		if ((fallback_warn & bit) == 0) {
+			if (RADEON_DEBUG & RADEON_FALLBACKS)
+				fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(rmesa, bit));
+			fallback_warn |= bit;
+		}
+		rmesa->fallback |= bit;
+
+		/* update only if we change from no tcl fallbacks to some tcl fallbacks */
+		if (rmesa->options.hw_tcl_enabled) {
+			if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) &&
+				((bit & R300_TCL_FALLBACK_MASK) > 0)) {
+				R300_STATECHANGE(rmesa, vap_cntl_status);
+				rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
+			}
+		}
+
+		/* update only if we change from no raster fallbacks to some raster fallbacks */
+		if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
+			((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
+
+			radeon_firevertices(&rmesa->radeon);
+			rmesa->radeon.swtcl.RenderIndex = ~0;
+			_swsetup_Wakeup( ctx );
+		}
+	} else {
+		rmesa->fallback &= ~bit;
+
+		/* update only if we have disabled all tcl fallbacks */
+		if (rmesa->options.hw_tcl_enabled) {
+			if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) {
+				R300_STATECHANGE(rmesa, vap_cntl_status);
+				rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS;
+			}
+		}
+
+		/* update only if we have disabled all raster fallbacks */
+		if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
+			_swrast_flush( ctx );
+
+			tnl->Driver.Render.Start = r300RenderStart;
+			tnl->Driver.Render.Finish = r300RenderFinish;
+			tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+			tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
+			tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+			tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+			tnl->Driver.Render.Interp = _tnl_interp;
+
+			_tnl_invalidate_vertex_state( ctx, ~0 );
+			_tnl_invalidate_vertices( ctx, ~0 );
+		}
+	}
+
+}
diff --git a/src/mesa/drivers/dri/r300/r300_render.h b/src/mesa/drivers/dri/r300/r300_render.h
new file mode 100644
index 0000000000..581e9fa0cc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_render.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_RENDER_H__
+#define __R300_RENDER_H__
+
+#include "main/mtypes.h"
+
+#define R300_FALLBACK_VERTEX_PROGRAM    (1 << 0)
+#define R300_TCL_FALLBACK_MASK           0x0000ffff
+
+#define R300_FALLBACK_LINE_SMOOTH       (1 << 16)
+#define R300_FALLBACK_POINT_SMOOTH      (1 << 17)
+#define R300_FALLBACK_POLYGON_SMOOTH    (1 << 18)
+#define R300_FALLBACK_LINE_STIPPLE      (1 << 19)
+#define R300_FALLBACK_POLYGON_STIPPLE   (1 << 20)
+#define R300_FALLBACK_STENCIL_TWOSIDE   (1 << 21)
+#define R300_FALLBACK_RENDER_MODE       (1 << 22)
+#define R300_FALLBACK_FRAGMENT_PROGRAM  (1 << 23)
+#define R300_FALLBACK_RADEON_COMMON     (1 << 29)
+#define R300_FALLBACK_AOS_LIMIT         (1 << 30)
+#define R300_FALLBACK_INVALID_BUFFERS   (1 << 31)
+#define R300_RASTER_FALLBACK_MASK        0xffff0000
+
+#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+#define MASK_X R300_WRITE_ENA_X
+#define MASK_Y R300_WRITE_ENA_Y
+#define MASK_Z R300_WRITE_ENA_Z
+#define MASK_W R300_WRITE_ENA_W
+
+#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
+    SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
+    SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
+    SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
+    SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
+    SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
+#error Cannot change these!
+#endif
+
+extern const struct tnl_pipeline_stage _r300_render_stage;
+
+extern void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode);
+
+extern void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
new file mode 100644
index 0000000000..9c24166ec5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+
+#include "shader/program.h"
+#include "tnl/tnl.h"
+#include "r300_context.h"
+#include "r300_fragprog_common.h"
+
+static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache)
+{
+	struct r300_fragment_program *tmp, *fp = cache->progs;
+
+	while (fp) {
+		tmp = fp->next;
+		rc_constants_destroy(&fp->code.constants);
+		free(fp);
+		fp = tmp;
+	}
+}
+
+static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache)
+{
+	struct r300_vertex_program *tmp, *vp = cache->progs;
+
+	while (vp) {
+		tmp = vp->next;
+		rc_constants_destroy(&vp->code.constants);
+		_mesa_reference_vertprog(ctx, &vp->Base, NULL);
+		free(vp);
+		vp = tmp;
+	}
+}
+
+static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
+					 GLuint id)
+{
+	struct r300_vertex_program_cont *vp;
+	struct r300_fragment_program_cont *fp;
+
+	switch (target) {
+	case GL_VERTEX_STATE_PROGRAM_NV:
+	case GL_VERTEX_PROGRAM_ARB:
+		vp = CALLOC_STRUCT(r300_vertex_program_cont);
+		return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
+
+	case GL_FRAGMENT_PROGRAM_NV:
+	case GL_FRAGMENT_PROGRAM_ARB:
+		fp = CALLOC_STRUCT(r300_fragment_program_cont);
+		return _mesa_init_fragment_program(ctx, &fp->Base, target, id);
+
+	default:
+		_mesa_problem(ctx, "Bad target in r300NewProgram");
+	}
+
+	return NULL;
+}
+
+static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+	struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+	struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
+
+	switch (prog->Target) {
+		case GL_VERTEX_PROGRAM_ARB:
+			freeVertProgCache(ctx, vp);
+			break;
+		case GL_FRAGMENT_PROGRAM_ARB:
+			freeFragProgCache(ctx, fp);
+			break;
+	}
+
+	_mesa_delete_program(ctx, prog);
+}
+
+static GLboolean
+r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+	struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+	struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
+
+	switch (target) {
+	case GL_VERTEX_PROGRAM_ARB:
+		freeVertProgCache(ctx, vp);
+		vp->progs = NULL;
+		break;
+	case GL_FRAGMENT_PROGRAM_ARB:
+		freeFragProgCache(ctx, fp);
+		fp->progs = NULL;
+		break;
+	}
+
+	/* need this for tcl fallbacks */
+	(void) _tnl_program_string(ctx, target, prog);
+
+	/* XXX check if program is legal, within limits */
+	return GL_TRUE;
+}
+
+static GLboolean
+r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+	if (target == GL_FRAGMENT_PROGRAM_ARB) {
+		struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx);
+
+		return !fp->error;
+	} else {
+		struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx);
+
+		return !vp->error;
+	}
+}
+
+void r300InitShaderFuncs(struct dd_function_table *functions)
+{
+	functions->NewProgram = r300NewProgram;
+	functions->DeleteProgram = r300DeleteProgram;
+	functions->ProgramStringNotify = r300ProgramStringNotify;
+	functions->IsProgramNative = r300IsProgramNative;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
new file mode 100644
index 0000000000..fa33be4998
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -0,0 +1,2416 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.
+Copyright (C) 2004 Nicolai Haehnle.
+All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/framebuffer.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+
+#include "drivers/common/meta.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+
+#include "r300_context.h"
+#include "r300_state.h"
+#include "r300_reg.h"
+#include "r300_emit.h"
+#include "r300_fragprog_common.h"
+#include "r300_render.h"
+#include "r300_vertprog.h"
+
+static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4])
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+	R300_STATECHANGE(rmesa, blend_color);
+
+	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+		GLuint r = IROUND(cf[0]*1023.0f);
+		GLuint g = IROUND(cf[1]*1023.0f);
+		GLuint b = IROUND(cf[2]*1023.0f);
+		GLuint a = IROUND(cf[3]*1023.0f);
+
+		rmesa->hw.blend_color.cmd[1] = r | (a << 16);
+		rmesa->hw.blend_color.cmd[2] = b | (g << 16);
+	} else {
+		GLubyte color[4];
+		CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+		CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+		CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+		CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+
+		rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0],
+							color[1], color[2]);
+	}
+}
+
+/**
+ * Calculate the hardware blend factor setting.  This same function is used
+ * for source and destination of both alpha and RGB.
+ *
+ * \returns
+ * The hardware register value for the specified blend factor.  This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen.  Determine if these
+ * cases can be removed.
+ */
+static int blend_factor(GLenum factor, GLboolean is_src)
+{
+	switch (factor) {
+	case GL_ZERO:
+		return R300_BLEND_GL_ZERO;
+		break;
+	case GL_ONE:
+		return R300_BLEND_GL_ONE;
+		break;
+	case GL_DST_COLOR:
+		return R300_BLEND_GL_DST_COLOR;
+		break;
+	case GL_ONE_MINUS_DST_COLOR:
+		return R300_BLEND_GL_ONE_MINUS_DST_COLOR;
+		break;
+	case GL_SRC_COLOR:
+		return R300_BLEND_GL_SRC_COLOR;
+		break;
+	case GL_ONE_MINUS_SRC_COLOR:
+		return R300_BLEND_GL_ONE_MINUS_SRC_COLOR;
+		break;
+	case GL_SRC_ALPHA:
+		return R300_BLEND_GL_SRC_ALPHA;
+		break;
+	case GL_ONE_MINUS_SRC_ALPHA:
+		return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+		break;
+	case GL_DST_ALPHA:
+		return R300_BLEND_GL_DST_ALPHA;
+		break;
+	case GL_ONE_MINUS_DST_ALPHA:
+		return R300_BLEND_GL_ONE_MINUS_DST_ALPHA;
+		break;
+	case GL_SRC_ALPHA_SATURATE:
+		return (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE :
+		    R300_BLEND_GL_ZERO;
+		break;
+	case GL_CONSTANT_COLOR:
+		return R300_BLEND_GL_CONST_COLOR;
+		break;
+	case GL_ONE_MINUS_CONSTANT_COLOR:
+		return R300_BLEND_GL_ONE_MINUS_CONST_COLOR;
+		break;
+	case GL_CONSTANT_ALPHA:
+		return R300_BLEND_GL_CONST_ALPHA;
+		break;
+	case GL_ONE_MINUS_CONSTANT_ALPHA:
+		return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+		break;
+	default:
+		fprintf(stderr, "unknown blend factor %x\n", factor);
+		return (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO;
+		break;
+	}
+}
+
+/**
+ * Sets both the blend equation and the blend function.
+ * This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ * Also, make sure that blend function and blend equation are set to their
+ * default value if color blending is not enabled, since at least blend
+ * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results
+ * otherwise for unknown reasons.
+ */
+
+/* helper function */
+static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn,
+			     int cbits, int funcA, int eqnA)
+{
+	GLuint new_ablend, new_cblend;
+
+#if 0
+	fprintf(stderr,
+		"eqnA=%08x funcA=%08x eqn=%08x func=%08x cbits=%08x\n",
+		eqnA, funcA, eqn, func, cbits);
+#endif
+	new_ablend = eqnA | funcA;
+	new_cblend = eqn | func;
+
+	/* Some blend factor combinations don't seem to work when the
+	 * BLEND_NO_SEPARATE bit is set.
+	 *
+	 * Especially problematic candidates are the ONE_MINUS_* flags,
+	 * but I can't see a real pattern.
+	 */
+#if 0
+	if (new_ablend == new_cblend) {
+		new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
+	}
+#endif
+	new_cblend |= cbits;
+
+	if ((new_ablend != r300->hw.bld.cmd[R300_BLD_ABLEND]) ||
+	    (new_cblend != r300->hw.bld.cmd[R300_BLD_CBLEND])) {
+		R300_STATECHANGE(r300, bld);
+		r300->hw.bld.cmd[R300_BLD_ABLEND] = new_ablend;
+		r300->hw.bld.cmd[R300_BLD_CBLEND] = new_cblend;
+	}
+}
+
+static void r300SetBlendState(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+	    (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+	int eqn = R300_COMB_FCN_ADD_CLAMP;
+	int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+	    (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+	int eqnA = R300_COMB_FCN_ADD_CLAMP;
+
+	if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+		r300SetBlendCntl(r300, func, eqn, 0, func, eqn);
+		return;
+	}
+
+	func =
+	    (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) <<
+	     R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB,
+						   GL_FALSE) <<
+				      R300_DST_BLEND_SHIFT);
+
+	switch (ctx->Color.BlendEquationRGB) {
+	case GL_FUNC_ADD:
+		eqn = R300_COMB_FCN_ADD_CLAMP;
+		break;
+
+	case GL_FUNC_SUBTRACT:
+		eqn = R300_COMB_FCN_SUB_CLAMP;
+		break;
+
+	case GL_FUNC_REVERSE_SUBTRACT:
+		eqn = R300_COMB_FCN_RSUB_CLAMP;
+		break;
+
+	case GL_MIN:
+		eqn = R300_COMB_FCN_MIN;
+		func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+		    (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+		break;
+
+	case GL_MAX:
+		eqn = R300_COMB_FCN_MAX;
+		func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+		    (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+		break;
+
+	default:
+		fprintf(stderr,
+			"[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+			__FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+		return;
+	}
+
+	funcA =
+	    (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) <<
+	     R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA,
+						   GL_FALSE) <<
+				      R300_DST_BLEND_SHIFT);
+
+	switch (ctx->Color.BlendEquationA) {
+	case GL_FUNC_ADD:
+		eqnA = R300_COMB_FCN_ADD_CLAMP;
+		break;
+
+	case GL_FUNC_SUBTRACT:
+		eqnA = R300_COMB_FCN_SUB_CLAMP;
+		break;
+
+	case GL_FUNC_REVERSE_SUBTRACT:
+		eqnA = R300_COMB_FCN_RSUB_CLAMP;
+		break;
+
+	case GL_MIN:
+		eqnA = R300_COMB_FCN_MIN;
+		funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+		    (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+		break;
+
+	case GL_MAX:
+		eqnA = R300_COMB_FCN_MAX;
+		funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+		    (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+		break;
+
+	default:
+		fprintf(stderr,
+			"[%s:%u] Invalid A blend equation (0x%04x).\n",
+			__FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+		return;
+	}
+
+	r300SetBlendCntl(r300,
+			 func, eqn,
+			 (R300_SEPARATE_ALPHA_ENABLE |
+			  R300_READ_ENABLE |
+			  R300_ALPHA_BLEND_ENABLE), funcA, eqnA);
+}
+
+static void r300BlendEquationSeparate(GLcontext * ctx,
+				      GLenum modeRGB, GLenum modeA)
+{
+	r300SetBlendState(ctx);
+}
+
+static void r300BlendFuncSeparate(GLcontext * ctx,
+				  GLenum sfactorRGB, GLenum dfactorRGB,
+				  GLenum sfactorA, GLenum dfactorA)
+{
+	r300SetBlendState(ctx);
+}
+
+/**
+ * Translate LogicOp enums into hardware representation.
+ * Both use a very logical bit-wise layout, but unfortunately the order
+ * of bits is reversed.
+ */
+static GLuint translate_logicop(GLenum logicop)
+{
+	GLuint bits = logicop - GL_CLEAR;
+	bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3);
+	return bits << R300_RB3D_ROPCNTL_ROP_SHIFT;
+}
+
+/**
+ * Used internally to update the r300->hw hardware state to match the
+ * current OpenGL state.
+ */
+static void r300SetLogicOpState(GLcontext *ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	R300_STATECHANGE(r300, rop);
+	if (RGBA_LOGICOP_ENABLED(ctx)) {
+		r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE |
+			translate_logicop(ctx->Color.LogicOp);
+	} else {
+		r300->hw.rop.cmd[1] = 0;
+	}
+}
+
+/**
+ * Called by Mesa when an application program changes the LogicOp state
+ * via glLogicOp.
+ */
+static void r300LogicOpcode(GLcontext *ctx, GLenum logicop)
+{
+	if (RGBA_LOGICOP_ENABLED(ctx))
+		r300SetLogicOpState(ctx);
+}
+
+static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	GLint p;
+	GLint *ip;
+
+	/* no VAP UCP on non-TCL chipsets */
+	if (!rmesa->options.hw_tcl_enabled)
+			return;
+
+	p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+	ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+	R300_STATECHANGE( rmesa, vpucp[p] );
+	rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
+	rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
+	rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2];
+	rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3];
+}
+
+static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	GLuint p;
+
+	/* no VAP UCP on non-TCL chipsets */
+	if (!r300->options.hw_tcl_enabled)
+		return;
+
+	p = cap - GL_CLIP_PLANE0;
+	R300_STATECHANGE(r300, vap_clip_cntl);
+	if (state) {
+		r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0 << p);
+		r300ClipPlane(ctx, cap, NULL);
+	} else {
+		r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0 << p);
+	}
+}
+
+/**
+ * Update our tracked culling state based on Mesa's state.
+ */
+static void r300UpdateCulling(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	uint32_t val = 0;
+
+	if (ctx->Polygon.CullFlag) {
+		switch (ctx->Polygon.CullFaceMode) {
+		case GL_FRONT:
+			val = R300_CULL_FRONT;
+			break;
+		case GL_BACK:
+			val = R300_CULL_BACK;
+			break;
+		case GL_FRONT_AND_BACK:
+			val = R300_CULL_FRONT | R300_CULL_BACK;
+			break;
+		default:
+			break;
+		}
+	}
+
+	switch (ctx->Polygon.FrontFace) {
+	case GL_CW:
+		val |= R300_FRONT_FACE_CW;
+		break;
+	case GL_CCW:
+		val |= R300_FRONT_FACE_CCW;
+		break;
+	default:
+		break;
+	}
+
+	/* Winding is inverted when rendering to FBO */
+	if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+		val ^= R300_FRONT_FACE_CW;
+
+	R300_STATECHANGE(r300, cul);
+	r300->hw.cul.cmd[R300_CUL_CULL] = val;
+}
+
+static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	R300_STATECHANGE(r300, occlusion_cntl);
+	if (state) {
+		r300->hw.occlusion_cntl.cmd[1] |= (3 << 0);
+	} else {
+		r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0);
+	}
+}
+
+static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth;
+}
+
+static void r300SetEarlyZState(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	GLuint topZ = R300_ZTOP_ENABLE;
+	GLuint w_fmt, fgdepthsrc;
+
+	if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
+		topZ = R300_ZTOP_DISABLE;
+	else if (current_fragment_program_writes_depth(ctx))
+		topZ = R300_ZTOP_DISABLE;
+	else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill)
+		topZ = R300_ZTOP_DISABLE;
+	else if (r300->radeon.query.current)
+		topZ = R300_ZTOP_DISABLE;
+
+	if (topZ != r300->hw.zstencil_format.cmd[2]) {
+		/* Note: This completely reemits the stencil format.
+		 * I have not tested whether this is strictly necessary,
+		 * or if emitting a write to ZB_ZTOP is enough.
+		 */
+		R300_STATECHANGE(r300, zstencil_format);
+		r300->hw.zstencil_format.cmd[2] = topZ;
+	}
+
+	/* w_fmt value is set to get best performance
+	* see p.130 R5xx 3D acceleration guide v1.3 */
+	if (current_fragment_program_writes_depth(ctx)) {
+		fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
+		w_fmt = R300_W_FMT_W24 | R300_W_SRC_US;
+	} else {
+		fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+		w_fmt = R300_W_FMT_W0 | R300_W_SRC_US;
+	}
+
+	if (w_fmt != r300->hw.us_out_fmt.cmd[5]) {
+		R300_STATECHANGE(r300, us_out_fmt);
+		r300->hw.us_out_fmt.cmd[5] = w_fmt;
+	}
+
+	if (fgdepthsrc != r300->hw.fg_depth_src.cmd[1]) {
+		R300_STATECHANGE(r300, fg_depth_src);
+		r300->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+	}
+}
+
+static void r300SetAlphaState(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	GLubyte refByte;
+	uint32_t pp_misc = 0x0;
+	GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+	CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef);
+
+	switch (ctx->Color.AlphaFunc) {
+	case GL_NEVER:
+		pp_misc |= R300_FG_ALPHA_FUNC_NEVER;
+		break;
+	case GL_LESS:
+		pp_misc |= R300_FG_ALPHA_FUNC_LESS;
+		break;
+	case GL_EQUAL:
+		pp_misc |= R300_FG_ALPHA_FUNC_EQUAL;
+		break;
+	case GL_LEQUAL:
+		pp_misc |= R300_FG_ALPHA_FUNC_LE;
+		break;
+	case GL_GREATER:
+		pp_misc |= R300_FG_ALPHA_FUNC_GREATER;
+		break;
+	case GL_NOTEQUAL:
+		pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL;
+		break;
+	case GL_GEQUAL:
+		pp_misc |= R300_FG_ALPHA_FUNC_GE;
+		break;
+	case GL_ALWAYS:
+		/*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */
+		really_enabled = GL_FALSE;
+		break;
+	}
+
+	if (really_enabled) {
+		pp_misc |= R300_FG_ALPHA_FUNC_ENABLE;
+		pp_misc |= R500_FG_ALPHA_FUNC_8BIT;
+		pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK);
+	} else {
+		pp_misc = 0x0;
+	}
+
+	R300_STATECHANGE(r300, at);
+	r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
+	r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
+}
+
+static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
+{
+	(void)func;
+	(void)ref;
+	r300SetAlphaState(ctx);
+}
+
+static int translate_func(int func)
+{
+	switch (func) {
+	case GL_NEVER:
+		return R300_ZS_NEVER;
+	case GL_LESS:
+		return R300_ZS_LESS;
+	case GL_EQUAL:
+		return R300_ZS_EQUAL;
+	case GL_LEQUAL:
+		return R300_ZS_LEQUAL;
+	case GL_GREATER:
+		return R300_ZS_GREATER;
+	case GL_NOTEQUAL:
+		return R300_ZS_NOTEQUAL;
+	case GL_GEQUAL:
+		return R300_ZS_GEQUAL;
+	case GL_ALWAYS:
+		return R300_ZS_ALWAYS;
+	}
+	return 0;
+}
+
+static void r300SetDepthState(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	R300_STATECHANGE(r300, zs);
+	r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE |
+					    R300_STENCIL_FRONT_BACK |
+					    R500_STENCIL_REFMASK_FRONT_BACK);
+	r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
+
+	if (ctx->Depth.Test && ctx->DrawBuffer->_DepthBuffer) {
+		r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE;
+		if (ctx->Depth.Mask)
+			r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE;
+		r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
+		    translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT;
+	}
+}
+
+static void r300CatchStencilFallback(GLcontext *ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	const unsigned back = ctx->Stencil._BackFace;
+
+	if (rmesa->radeon.radeonScreen->kernel_mm &&
+	    (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) {
+		r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+	} else if (ctx->Stencil._Enabled &&
+		   (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
+		    || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
+		    || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
+		r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE);
+	} else {
+		r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+	}
+}
+
+static void r300SetStencilState(GLcontext * ctx, GLboolean state)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	GLboolean hw_stencil = GL_FALSE;
+
+	r300CatchStencilFallback(ctx);
+
+	if (ctx->DrawBuffer) {
+		struct radeon_renderbuffer *rrbStencil
+			= radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+		hw_stencil = (rrbStencil && rrbStencil->bo);
+	}
+
+	if (hw_stencil) {
+		R300_STATECHANGE(r300, zs);
+		if (state) {
+			r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+			    R300_STENCIL_ENABLE;
+		} else {
+			r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
+			    ~R300_STENCIL_ENABLE;
+		}
+	}
+}
+
+static void r300UpdatePolygonMode(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE;
+
+	/* Only do something if a polygon mode is wanted, default is GL_FILL */
+	if (ctx->Polygon.FrontMode != GL_FILL ||
+	    ctx->Polygon.BackMode != GL_FILL) {
+		GLenum f, b;
+
+		/* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
+		 * correctly by selecting the correct front and back face
+		 */
+		if (ctx->Polygon.FrontFace == GL_CCW) {
+			f = ctx->Polygon.FrontMode;
+			b = ctx->Polygon.BackMode;
+		} else {
+			f = ctx->Polygon.BackMode;
+			b = ctx->Polygon.FrontMode;
+		}
+
+		/* Enable polygon mode */
+		hw_mode |= R300_GA_POLY_MODE_DUAL;
+
+		switch (f) {
+		case GL_LINE:
+			hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
+			break;
+		case GL_POINT:
+			hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
+			break;
+		case GL_FILL:
+			hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+			break;
+		}
+
+		switch (b) {
+		case GL_LINE:
+			hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE;
+			break;
+		case GL_POINT:
+			hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT;
+			break;
+		case GL_FILL:
+			hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+			break;
+		}
+	}
+
+	if (r300->hw.polygon_mode.cmd[1] != hw_mode) {
+		R300_STATECHANGE(r300, polygon_mode);
+		r300->hw.polygon_mode.cmd[1] = hw_mode;
+	}
+
+	r300->hw.polygon_mode.cmd[2] = 0x00000001;
+	r300->hw.polygon_mode.cmd[3] = 0x00000000;
+}
+
+/**
+ * Change the culling mode.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300CullFace(GLcontext * ctx, GLenum mode)
+{
+	(void)mode;
+
+	r300UpdateCulling(ctx);
+}
+
+/**
+ * Change the polygon orientation.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300FrontFace(GLcontext * ctx, GLenum mode)
+{
+	(void)mode;
+
+	r300UpdateCulling(ctx);
+	r300UpdatePolygonMode(ctx);
+}
+
+/**
+ * Change the depth testing function.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300DepthFunc(GLcontext * ctx, GLenum func)
+{
+	(void)func;
+	r300SetDepthState(ctx);
+}
+
+/**
+ * Enable/Disable depth writing.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300DepthMask(GLcontext * ctx, GLboolean mask)
+{
+	(void)mask;
+	r300SetDepthState(ctx);
+}
+
+/**
+ * Handle glColorMask()
+ */
+static void r300ColorMask(GLcontext * ctx,
+			  GLboolean r, GLboolean g, GLboolean b, GLboolean a)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int mask = (r ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+	    (g ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+	    (b ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+	    (a ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0);
+
+	if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) {
+		R300_STATECHANGE(r300, cmk);
+		r300->hw.cmk.cmd[R300_CMK_COLORMASK] = mask;
+	}
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r300PointSize(GLcontext * ctx, GLfloat size)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	/* We need to clamp to user defined range here, because
+	 * the HW clamping happens only for per vertex point size. */
+	size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+	/* same size limits for AA, non-AA points */
+	size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
+
+	R300_STATECHANGE(r300, ps);
+	r300->hw.ps.cmd[R300_PS_POINTSIZE] =
+	    ((int)(size * 6) << R300_POINTSIZE_X_SHIFT) |
+	    ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT);
+}
+
+static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	switch (pname) {
+	case GL_POINT_SIZE_MIN:
+		R300_STATECHANGE(r300, ga_point_minmax);
+		r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK;
+		r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0);
+		r300PointSize(ctx, ctx->Point.Size);
+		break;
+	case GL_POINT_SIZE_MAX:
+		R300_STATECHANGE(r300, ga_point_minmax);
+		r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK;
+		r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0)
+			<< R300_GA_POINT_MINMAX_MAX_SHIFT;
+		r300PointSize(ctx, ctx->Point.Size);
+		break;
+	case GL_POINT_DISTANCE_ATTENUATION:
+		break;
+	case GL_POINT_FADE_THRESHOLD_SIZE:
+		break;
+	default:
+		break;
+	}
+}
+
+/* =============================================================
+ * Line state
+ */
+static void r300LineWidth(GLcontext * ctx, GLfloat widthf)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	widthf = CLAMP(widthf,
+                       ctx->Const.MinPointSize,
+                       ctx->Const.MaxPointSize);
+	R300_STATECHANGE(r300, lcntl);
+	r300->hw.lcntl.cmd[1] =
+	    R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0);
+}
+
+static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode)
+{
+	(void)face;
+	(void)mode;
+
+	r300UpdatePolygonMode(ctx);
+}
+
+/* =============================================================
+ * Stencil
+ */
+
+static int translate_stencil_op(int op)
+{
+	switch (op) {
+	case GL_KEEP:
+		return R300_ZS_KEEP;
+	case GL_ZERO:
+		return R300_ZS_ZERO;
+	case GL_REPLACE:
+		return R300_ZS_REPLACE;
+	case GL_INCR:
+		return R300_ZS_INCR;
+	case GL_DECR:
+		return R300_ZS_DECR;
+	case GL_INCR_WRAP_EXT:
+		return R300_ZS_INCR_WRAP;
+	case GL_DECR_WRAP_EXT:
+		return R300_ZS_DECR_WRAP;
+	case GL_INVERT:
+		return R300_ZS_INVERT;
+	default:
+		WARN_ONCE("Do not know how to translate stencil op");
+		return R300_ZS_KEEP;
+	}
+	return 0;
+}
+
+static void r300ShadeModel(GLcontext * ctx, GLenum mode)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+	R300_STATECHANGE(rmesa, shade);
+	rmesa->hw.shade.cmd[1] = 0x00000002;
+	R300_STATECHANGE(rmesa, shade2);
+	switch (mode) {
+	case GL_FLAT:
+		rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_FLAT;
+		break;
+	case GL_SMOOTH:
+		rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_SMOOTH;
+		break;
+	default:
+		return;
+	}
+	rmesa->hw.shade2.cmd[2] = 0x00000000;
+	rmesa->hw.shade2.cmd[3] = 0x00000000;
+}
+
+static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
+				    GLenum func, GLint ref, GLuint mask)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	GLuint refmask;
+	GLuint flag;
+	const unsigned back = ctx->Stencil._BackFace;
+
+	r300CatchStencilFallback(ctx);
+
+	refmask = ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT)
+	     | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT);
+
+	R300_STATECHANGE(rmesa, zs);
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK;
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK <<
+					       R300_S_FRONT_FUNC_SHIFT)
+					      | (R300_ZS_MASK <<
+						 R300_S_BACK_FUNC_SHIFT));
+
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
+	    ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+	      (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
+
+	flag = translate_func(ctx->Stencil.Function[0]);
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+	    (flag << R300_S_FRONT_FUNC_SHIFT);
+
+	flag = translate_func(ctx->Stencil.Function[back]);
+
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+	    (flag << R300_S_BACK_FUNC_SHIFT);
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
+
+	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+		rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK;
+		R300_STATECHANGE(rmesa, zsb);
+		refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT)
+			| ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT);
+
+		rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &=
+			~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+			  (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
+		rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask;
+	}
+}
+
+static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	const unsigned back = ctx->Stencil._BackFace;
+
+	r300CatchStencilFallback(ctx);
+
+	R300_STATECHANGE(rmesa, zs);
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
+	    ~(R300_STENCILREF_MASK <<
+	      R300_STENCILWRITEMASK_SHIFT);
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |=
+	    (ctx->Stencil.
+	     WriteMask[0] & R300_STENCILREF_MASK) <<
+	     R300_STENCILWRITEMASK_SHIFT;
+	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+		R300_STATECHANGE(rmesa, zsb);
+		rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |=
+			(ctx->Stencil.
+			 WriteMask[back] & R300_STENCILREF_MASK) <<
+			R300_STENCILWRITEMASK_SHIFT;
+	}
+}
+
+static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
+				  GLenum fail, GLenum zfail, GLenum zpass)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	const unsigned back = ctx->Stencil._BackFace;
+
+	r300CatchStencilFallback(ctx);
+
+	R300_STATECHANGE(rmesa, zs);
+	/* It is easier to mask what's left.. */
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
+	    (R300_ZS_MASK << R300_Z_FUNC_SHIFT) |
+	    (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) |
+	    (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT);
+
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+	    (translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
+	     R300_S_FRONT_SFAIL_OP_SHIFT)
+	    | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
+	       R300_S_FRONT_ZFAIL_OP_SHIFT)
+	    | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
+	       R300_S_FRONT_ZPASS_OP_SHIFT);
+
+	rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+	    (translate_stencil_op(ctx->Stencil.FailFunc[back]) <<
+	     R300_S_BACK_SFAIL_OP_SHIFT)
+	    | (translate_stencil_op(ctx->Stencil.ZFailFunc[back]) <<
+	       R300_S_BACK_ZFAIL_OP_SHIFT)
+	    | (translate_stencil_op(ctx->Stencil.ZPassFunc[back]) <<
+	       R300_S_BACK_ZPASS_OP_SHIFT);
+}
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+static void r300UpdateWindow(GLcontext * ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	__DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+	GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+	GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+	const GLfloat *v = ctx->Viewport._WindowMap.m;
+	const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+	const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+	GLfloat y_scale, y_bias;
+
+	if (render_to_fbo) {
+		y_scale = 1.0;
+		y_bias = 0;
+	} else {
+		y_scale = -1.0;
+		y_bias = yoffset;
+	}
+
+	GLfloat sx = v[MAT_SX];
+	GLfloat tx = v[MAT_TX] + xoffset;
+	GLfloat sy = v[MAT_SY] * y_scale;
+	GLfloat ty = (v[MAT_TY] * y_scale) + y_bias;
+	GLfloat sz = v[MAT_SZ] * depthScale;
+	GLfloat tz = v[MAT_TZ] * depthScale;
+
+	R300_STATECHANGE(rmesa, vpt);
+
+	rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
+	rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
+	rmesa->hw.vpt.cmd[R300_VPT_YSCALE] = r300PackFloat32(sy);
+	rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
+	rmesa->hw.vpt.cmd[R300_VPT_ZSCALE] = r300PackFloat32(sz);
+	rmesa->hw.vpt.cmd[R300_VPT_ZOFFSET] = r300PackFloat32(tz);
+}
+
+static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
+			 GLsizei width, GLsizei height)
+{
+	/* Don't pipeline viewport changes, conflict with window offset
+	 * setting below.  Could apply deltas to rescue pipelined viewport
+	 * values, or keep the originals hanging around.
+	 */
+	r300UpdateWindow(ctx);
+
+	radeon_viewport(ctx, x, y, width, height);
+}
+
+static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
+{
+	r300UpdateWindow(ctx);
+}
+
+void r300UpdateViewportOffset(GLcontext * ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	__DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+	GLfloat xoffset = (GLfloat) dPriv->x;
+	GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
+	const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+	GLfloat tx = v[MAT_TX] + xoffset;
+	GLfloat ty = (-v[MAT_TY]) + yoffset;
+
+	if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) ||
+	    rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) {
+		/* Note: this should also modify whatever data the context reset
+		 * code uses...
+		 */
+		R300_STATECHANGE(rmesa, vpt);
+		rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
+		rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
+
+	}
+
+	radeonUpdateScissor(ctx);
+}
+
+/**
+ * Update R300's own internal state parameters.
+ * For now just STATE_R300_WINDOW_DIMENSION
+ */
+static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	struct gl_program_parameter_list *paramList;
+
+	if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
+		return;
+
+	if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
+		return;
+
+	paramList = ctx->FragmentProgram._Current->Base.Parameters;
+
+	if (!paramList)
+		return;
+
+	_mesa_load_state_parameters(ctx, paramList);
+}
+
+/* =============================================================
+ * Polygon state
+ */
+static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	GLfloat constant = units;
+
+	switch (ctx->Visual.depthBits) {
+	case 16:
+		constant *= 4.0;
+		break;
+	case 24:
+		constant *= 2.0;
+		break;
+	}
+
+	factor *= 12.0;
+
+/*    fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+
+	R300_STATECHANGE(rmesa, zbs);
+	rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor);
+	rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant);
+	rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor);
+	rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant);
+}
+
+/* Routing and texture-related */
+
+/* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST.
+ * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead.
+ * We need to recalculate wrap modes whenever filter mode is changed because someone might do:
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ * Since r300 completely ignores R300_TX_CLAMP when either min or mag is nearest it cant handle
+ * combinations where only one of them is nearest.
+ */
+static unsigned long gen_fixed_filter(unsigned long f)
+{
+	unsigned long mag, min, needs_fixing = 0;
+	//return f;
+
+	/* We ignore MIRROR bit so we dont have to do everything twice */
+	if ((f & ((7 - 1) << R300_TX_WRAP_S_SHIFT)) ==
+	    (R300_TX_CLAMP << R300_TX_WRAP_S_SHIFT)) {
+		needs_fixing |= 1;
+	}
+	if ((f & ((7 - 1) << R300_TX_WRAP_T_SHIFT)) ==
+	    (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) {
+		needs_fixing |= 2;
+	}
+	if ((f & ((7 - 1) << R300_TX_WRAP_R_SHIFT)) ==
+	    (R300_TX_CLAMP << R300_TX_WRAP_R_SHIFT)) {
+		needs_fixing |= 4;
+	}
+
+	if (!needs_fixing)
+		return f;
+
+	mag = f & R300_TX_MAG_FILTER_MASK;
+	min = f & (R300_TX_MIN_FILTER_MASK|R300_TX_MIN_FILTER_MIP_MASK);
+
+	/* TODO: Check for anisto filters too */
+	if ((mag != R300_TX_MAG_FILTER_NEAREST)
+	    && (min != R300_TX_MIN_FILTER_NEAREST))
+		return f;
+
+	/* r300 cant handle these modes hence we force nearest to linear */
+	if ((mag == R300_TX_MAG_FILTER_NEAREST)
+	    && (min != R300_TX_MIN_FILTER_NEAREST)) {
+		f &= ~R300_TX_MAG_FILTER_NEAREST;
+		f |= R300_TX_MAG_FILTER_LINEAR;
+		return f;
+	}
+
+	if ((min == R300_TX_MIN_FILTER_NEAREST)
+	    && (mag != R300_TX_MAG_FILTER_NEAREST)) {
+		f &= ~R300_TX_MIN_FILTER_NEAREST;
+		f |= R300_TX_MIN_FILTER_LINEAR;
+		return f;
+	}
+
+	/* Both are nearest */
+	if (needs_fixing & 1) {
+		f &= ~((7 - 1) << R300_TX_WRAP_S_SHIFT);
+		f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT;
+	}
+	if (needs_fixing & 2) {
+		f &= ~((7 - 1) << R300_TX_WRAP_T_SHIFT);
+		f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT;
+	}
+	if (needs_fixing & 4) {
+		f &= ~((7 - 1) << R300_TX_WRAP_R_SHIFT);
+		f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT;
+	}
+	return f;
+}
+
+static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int i;
+	struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300;
+
+	R300_STATECHANGE(r300, fpt);
+
+	for (i = 0; i < code->tex.length; i++) {
+		int unit;
+		int opcode;
+		unsigned long val;
+
+		unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT;
+		unit &= 15;
+
+		val = code->tex.inst[i];
+		val &= ~R300_TEX_ID_MASK;
+
+		opcode =
+			(val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT;
+		if (opcode == R300_TEX_OP_KIL) {
+			r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+		} else {
+			if (tmu_mappings[unit] >= 0) {
+				val |=
+					tmu_mappings[unit] <<
+					R300_TEX_ID_SHIFT;
+				r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+			} else {
+				// We get here when the corresponding texture image is incomplete
+				// (e.g. incomplete mipmaps etc.)
+				r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+			}
+		}
+	}
+
+	r300->hw.fpt.cmd[R300_FPT_CMD_0] =
+		cmdpacket0(r300->radeon.radeonScreen,
+                   R300_US_TEX_INST_0, code->tex.length);
+}
+
+static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int i;
+	struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500;
+
+	/* find all the texture instructions and relocate the texture units */
+	for (i = 0; i < code->inst_end + 1; i++) {
+		if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) {
+			uint32_t val;
+			int unit, opcode, new_unit;
+
+			val = code->inst[i].inst1;
+
+			unit = (val >> 16) & 0xf;
+
+			val &= ~(0xf << 16);
+
+			opcode = val & (0x7 << 22);
+			if (opcode == R500_TEX_INST_TEXKILL) {
+				new_unit = 0;
+			} else {
+				if (tmu_mappings[unit] >= 0) {
+					new_unit = tmu_mappings[unit];
+				} else {
+					new_unit = 0;
+				}
+			}
+			val |= R500_TEX_ID(new_unit);
+			code->inst[i].inst1 = val;
+		}
+	}
+}
+
+static GLuint translate_lod_bias(GLfloat bias)
+{
+	GLint b = (int)(bias*32);
+	if (b >= (1 << 9))
+		b = (1 << 9)-1;
+	else if (b < -(1 << 9))
+		b = -(1 << 9);
+	return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
+}
+
+
+static void r300SetupTextures(GLcontext * ctx)
+{
+	int i, mtu;
+	struct radeon_tex_obj *t;
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	int hw_tmu = 0;
+	int last_hw_tmu = -1;	/* -1 translates into no setup costs for fields */
+	int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
+
+	R300_STATECHANGE(r300, txe);
+	R300_STATECHANGE(r300, tex.filter);
+	R300_STATECHANGE(r300, tex.filter_1);
+	R300_STATECHANGE(r300, tex.size);
+	R300_STATECHANGE(r300, tex.format);
+	R300_STATECHANGE(r300, tex.pitch);
+	R300_STATECHANGE(r300, tex.offset);
+	R300_STATECHANGE(r300, tex.chroma_key);
+	R300_STATECHANGE(r300, tex.border_color);
+
+	r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0;
+
+	mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+	if (RADEON_DEBUG & RADEON_STATE)
+		fprintf(stderr, "mtu=%d\n", mtu);
+
+	if (mtu > R300_MAX_TEXTURE_UNITS) {
+		fprintf(stderr,
+			"Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n",
+			mtu, R300_MAX_TEXTURE_UNITS);
+		exit(-1);
+	}
+
+	/* We cannot let disabled tmu offsets pass DRM */
+	for (i = 0; i < mtu; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled) {
+			tmu_mappings[i] = hw_tmu;
+
+			t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+			if (!t)
+				continue;
+
+			if ((t->pp_txformat & 0xffffff00) == 0xffffff00) {
+				WARN_ONCE
+				    ("unknown texture format (entry %x) encountered. Help me !\n",
+				     t->pp_txformat & 0xff);
+			}
+
+			if (RADEON_DEBUG & RADEON_STATE)
+				fprintf(stderr,
+					"Activating texture unit %d\n", i);
+
+			r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu);
+
+			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
+						hw_tmu] =
+			    gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28);
+			/* Note: There is a LOD bias per texture unit and a LOD bias
+			 * per texture object. We add them here to get the correct behaviour.
+			 * (The per-texture object LOD bias was introduced in OpenGL 1.4
+			 * and is not present in the EXT_texture_object extension).
+			 */
+			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+				t->pp_txfilter_1 |
+				translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
+			r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+			    t->pp_txsize;
+			r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
+						hw_tmu] = t->pp_txformat;
+			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+			  t->pp_txpitch;
+			r300->hw.textures[hw_tmu] = t;
+
+			if (t->tile_bits & R300_TXO_MACRO_TILE) {
+				WARN_ONCE("macro tiling enabled!\n");
+			}
+
+			if (t->tile_bits & R300_TXO_MICRO_TILE) {
+				WARN_ONCE("micro tiling enabled!\n");
+			}
+
+			r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 +
+						    hw_tmu] = 0x0;
+			r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 +
+						      hw_tmu] =
+			    t->pp_border_color;
+
+			last_hw_tmu = hw_tmu;
+
+			hw_tmu++;
+		}
+	}
+
+	/* R3xx and R4xx chips require that the texture unit corresponding to
+	 * KIL instructions is really enabled.
+	 *
+	 * We do some fakery here and in the state atom emit logic to enable
+	 * the texture without tripping up the CS checker in the kernel.
+	 */
+	if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+		if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
+			last_hw_tmu++;
+
+			r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+
+			r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0;
+			r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0;
+			r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+			r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0;
+			r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */
+			r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */
+			r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0;
+		}
+	}
+
+	r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
+	r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
+	r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
+	r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
+	r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
+	r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
+	r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+	r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+	    cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
+
+	r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings);
+
+	if (RADEON_DEBUG & RADEON_STATE)
+		fprintf(stderr, "TX_ENABLE: %08x  last_hw_tmu=%d\n",
+			r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu);
+}
+
+union r300_outputs_written {
+	GLuint vp_outputs;	/* hw_tcl_on */
+	 DECLARE_RENDERINPUTS(index_bitset);	/* !hw_tcl_on */
+};
+
+#define R300_OUTPUTS_WRITTEN_TEST(ow, vp_result, tnl_attrib) \
+	((hw_tcl_on) ? (ow).vp_outputs & (1 << (vp_result)) : \
+	RENDERINPUTS_TEST( (ow.index_bitset), (tnl_attrib) ))
+
+static void r300SetupRSUnit(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	union r300_outputs_written OutputsWritten;
+	GLuint InputsRead;
+	int fp_reg, high_rr;
+	int col_ip, tex_ip;
+	int rs_tex_count = 0;
+	int i, col_fmt, hw_tcl_on;
+
+	hw_tcl_on = r300->options.hw_tcl_enabled;
+
+	if (hw_tcl_on)
+		OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
+	else
+		RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
+
+	InputsRead = r300->selected_fp->InputsRead;
+
+	R300_STATECHANGE(r300, ri);
+	R300_STATECHANGE(r300, rc);
+	R300_STATECHANGE(r300, rr);
+
+	fp_reg = col_ip = tex_ip = col_fmt = 0;
+
+	r300->hw.rc.cmd[1] = 0;
+	r300->hw.rc.cmd[2] = 0;
+	for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+		r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
+
+	for (i=0; i<R300_RI_CMDSIZE-1; ++i)
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
+
+
+	if (InputsRead & FRAG_BIT_COL0) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL0;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+		}
+	}
+
+	if (InputsRead & FRAG_BIT_COL1) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL1;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+		}
+	}
+
+	/* We always route 4 texcoord components */
+	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+		    continue;
+
+		if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+		    WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+		    continue;
+		}
+
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~(FRAG_BIT_TEX0 << i);
+		rs_tex_count += 4;
+		++tex_ip;
+		++fp_reg;
+	}
+
+	/* Setup default color if no color or tex was set */
+	if (rs_tex_count == 0 && col_ip == 0) {
+		r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0);
+		r300->hw.ri.cmd[R300_RI_INTERP_0] = R300_RS_COL_PTR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+		++col_ip;
+	}
+
+	high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+	r300->hw.rc.cmd[2] |= high_rr - 1;
+
+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr);
+	r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, high_rr);
+
+	if (InputsRead)
+		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
+}
+
+static void r500SetupRSUnit(GLcontext * ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	union r300_outputs_written OutputsWritten;
+	GLuint InputsRead;
+	int fp_reg, high_rr;
+	int col_ip, tex_ip;
+	int rs_tex_count = 0;
+	int i, col_fmt, hw_tcl_on;
+
+	hw_tcl_on = r300->options.hw_tcl_enabled;
+
+	if (hw_tcl_on)
+		OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
+	else
+		RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
+
+	InputsRead = r300->selected_fp->InputsRead;
+
+	R300_STATECHANGE(r300, ri);
+	R300_STATECHANGE(r300, rc);
+	R300_STATECHANGE(r300, rr);
+
+	fp_reg = col_ip = tex_ip = col_fmt = 0;
+
+	r300->hw.rc.cmd[1] = 0;
+	r300->hw.rc.cmd[2] = 0;
+	for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+		r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
+
+	for (i=0; i<R500_RI_CMDSIZE-1; ++i)
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
+
+
+	if (InputsRead & FRAG_BIT_COL0) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL0;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+		}
+	}
+
+	if (InputsRead & FRAG_BIT_COL1) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL1;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+		}
+	}
+
+	/* We always route 4 texcoord components */
+	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+		    continue;
+
+		if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+		    WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+		    continue;
+		}
+
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
+			((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+			((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+			((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
+
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~(FRAG_BIT_TEX0 << i);
+		rs_tex_count += 4;
+		++tex_ip;
+		++fp_reg;
+	}
+
+	/* Setup default color if no color or tex was set */
+	if (rs_tex_count == 0 && col_ip == 0) {
+		r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0);
+		r300->hw.ri.cmd[R300_RI_INTERP_0] = R500_RS_COL_PTR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+		++col_ip;
+	}
+
+	high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+	r300->hw.rc.cmd[1] = (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+	r300->hw.rc.cmd[2] = 0xC0 | (high_rr - 1);
+
+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr);
+	r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, high_rr);
+
+	if (InputsRead)
+		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
+}
+
+#define MIN3(a, b, c)	((a) < (b) ? MIN2(a, c) : MIN2(b, c))
+
+void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
+			GLuint output_count, GLuint temp_count)
+{
+    int vtx_mem_size;
+    int pvs_num_slots;
+    int pvs_num_cntrls;
+
+    /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS.
+     * See r500 docs 6.5.2 - done in emit */
+
+    /* avoid division by zero */
+    if (input_count == 0) input_count = 1;
+    if (output_count == 0) output_count = 1;
+    if (temp_count == 0) temp_count = 1;
+
+    if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+	vtx_mem_size = 128;
+    else
+	vtx_mem_size = 72;
+
+    pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count);
+    pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count);
+
+    R300_STATECHANGE(rmesa, vap_cntl);
+    if (rmesa->options.hw_tcl_enabled) {
+	rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] =
+	    (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) |
+	    (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) |
+	    (12 << R300_VF_MAX_VTX_NUM_SHIFT);
+	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+	    rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION;
+    } else
+	/* not sure about non-tcl */
+	rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+				    (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				    (5 << R300_VF_MAX_VTX_NUM_SHIFT));
+
+    if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) ||
+	(rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350))
+	rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+    else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530)
+	rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+    else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+	     (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
+	rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+    else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+	     (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) ||
+	     (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+	     (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+	rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+    else
+	rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+
+}
+
+/**
+ * Enable/Disable states.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	if (RADEON_DEBUG & RADEON_STATE)
+		fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
+			_mesa_lookup_enum_by_nr(cap),
+			state ? "GL_TRUE" : "GL_FALSE");
+
+	switch (cap) {
+	case GL_ALPHA_TEST:
+		r300SetAlphaState(ctx);
+		break;
+	case GL_COLOR_LOGIC_OP:
+		r300SetLogicOpState(ctx);
+		/* fall-through, because logic op overrides blending */
+	case GL_BLEND:
+		r300SetBlendState(ctx);
+		break;
+	case GL_CLIP_PLANE0:
+	case GL_CLIP_PLANE1:
+	case GL_CLIP_PLANE2:
+	case GL_CLIP_PLANE3:
+	case GL_CLIP_PLANE4:
+	case GL_CLIP_PLANE5:
+		r300SetClipPlaneState(ctx, cap, state);
+		break;
+	case GL_CULL_FACE:
+		r300UpdateCulling(ctx);
+		break;
+	case GL_DEPTH_TEST:
+		r300SetDepthState(ctx);
+		break;
+	case GL_LINE_SMOOTH:
+		if (rmesa->options.conformance_mode)
+			r300SwitchFallback(ctx, R300_FALLBACK_LINE_SMOOTH, ctx->Line.SmoothFlag);
+		break;
+	case GL_LINE_STIPPLE:
+		if (rmesa->options.conformance_mode)
+			r300SwitchFallback(ctx, R300_FALLBACK_LINE_STIPPLE, ctx->Line.StippleFlag);
+		break;
+	case GL_POINT_SMOOTH:
+		if (rmesa->options.conformance_mode)
+			r300SwitchFallback(ctx, R300_FALLBACK_POINT_SMOOTH, ctx->Point.SmoothFlag);
+		break;
+	case GL_POLYGON_SMOOTH:
+		if (rmesa->options.conformance_mode)
+			r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_SMOOTH, ctx->Polygon.SmoothFlag);
+		break;
+	case GL_POLYGON_STIPPLE:
+		if (rmesa->options.conformance_mode)
+			r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_STIPPLE, ctx->Polygon.StippleFlag);
+		break;
+	case GL_POLYGON_OFFSET_POINT:
+	case GL_POLYGON_OFFSET_LINE:
+	case GL_POLYGON_OFFSET_FILL:
+		r300SetPolygonOffsetState(ctx, state);
+		break;
+	case GL_SCISSOR_TEST:
+		radeon_firevertices(&rmesa->radeon);
+		rmesa->radeon.state.scissor.enabled = state;
+		radeonUpdateScissor( ctx );
+		break;
+	case GL_STENCIL_TEST:
+		r300SetStencilState(ctx, state);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * Completely recalculates hardware state based on the Mesa state.
+ */
+static void r300ResetHwState(r300ContextPtr r300)
+{
+	GLcontext *ctx = r300->radeon.glCtx;
+	int has_tcl;
+
+	has_tcl = r300->options.hw_tcl_enabled;
+
+	if (RADEON_DEBUG & RADEON_STATE)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+
+	r300ColorMask(ctx,
+		      ctx->Color.ColorMask[0][RCOMP],
+		      ctx->Color.ColorMask[0][GCOMP],
+		      ctx->Color.ColorMask[0][BCOMP],
+                      ctx->Color.ColorMask[0][ACOMP]);
+
+	r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+	r300DepthMask(ctx, ctx->Depth.Mask);
+	r300DepthFunc(ctx, ctx->Depth.Func);
+
+	/* stencil */
+	r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled);
+	r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
+	r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0],
+				ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+	r300StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0],
+			      ctx->Stencil.ZFailFunc[0],
+			      ctx->Stencil.ZPassFunc[0]);
+
+	r300UpdateCulling(ctx);
+
+	r300SetBlendState(ctx);
+	r300SetLogicOpState(ctx);
+
+	r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+	r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+
+	r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
+	    | R300_VPORT_X_OFFSET_ENA
+	    | R300_VPORT_Y_SCALE_ENA
+	    | R300_VPORT_Y_OFFSET_ENA
+	    | R300_VPORT_Z_SCALE_ENA
+	    | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT;
+	r300->hw.vte.cmd[2] = 0x00000008;
+
+	r300->hw.vap_vf_max_vtx_indx.cmd[1] = 0x00FFFFFF;
+	r300->hw.vap_vf_max_vtx_indx.cmd[2] = 0x00000000;
+
+#ifdef MESA_LITTLE_ENDIAN
+	r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP;
+#else
+	r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP;
+#endif
+
+	/* disable VAP/TCL on non-TCL capable chips */
+	if (!has_tcl)
+		r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
+
+	r300->hw.vap_psc_sgn_norm_cntl.cmd[1] = 0xAAAAAAAA;
+
+	/* XXX: Other families? */
+	if (has_tcl) {
+		r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP;
+
+		r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */
+		r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */
+		r300->hw.vap_clip.cmd[3] = r300PackFloat32(1.0); /* Y */
+		r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */
+
+		switch (r300->radeon.radeonScreen->chip_family) {
+		case CHIP_FAMILY_R300:
+			r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_R300;
+			break;
+		default:
+			r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_RV350;
+			break;
+		}
+	}
+
+	r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE
+	    | R300_GB_LINE_STUFF_ENABLE
+	    | R300_GB_TRIANGLE_STUFF_ENABLE;
+
+	r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666;
+	r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666;
+
+	r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
+	    R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/;
+	switch (r300->radeon.radeonScreen->num_gb_pipes) {
+	case 1:
+	default:
+		r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+		    R300_GB_TILE_PIPE_COUNT_RV300;
+		break;
+	case 2:
+		r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+		    R300_GB_TILE_PIPE_COUNT_R300;
+		break;
+	case 3:
+		r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+		    R300_GB_TILE_PIPE_COUNT_R420_3P;
+		break;
+	case 4:
+		r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+		    R300_GB_TILE_PIPE_COUNT_R420;
+		break;
+	}
+
+	/* XXX: Enable anti-aliasing? */
+	r300->hw.gb_misc2.cmd[R300_GB_MISC2_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE;
+	r300->hw.gb_misc2.cmd[R300_GB_MISC2_SELECT] = 0;
+
+	r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0);
+	r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0);
+	r300->hw.ga_point_s0.cmd[3] = r300PackFloat32(1.0);
+	r300->hw.ga_point_s0.cmd[4] = r300PackFloat32(1.0);
+
+	r300->hw.ga_triangle_stipple.cmd[1] = 0x00050005;
+
+	r300PointSize(ctx, 1.0);
+
+	r300->hw.ga_point_minmax.cmd[1] = 0x18000006;
+	r300->hw.ga_point_minmax.cmd[2] = 0x00020006;
+	r300->hw.ga_point_minmax.cmd[3] = r300PackFloat32(1.0 / 192.0);
+
+	r300LineWidth(ctx, 1.0);
+
+	r300->hw.ga_line_stipple.cmd[1] = 0;
+	r300->hw.ga_line_stipple.cmd[2] = r300PackFloat32(0.0);
+	r300->hw.ga_line_stipple.cmd[3] = r300PackFloat32(1.0);
+
+	r300ShadeModel(ctx, ctx->Light.ShadeModel);
+
+	r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
+	r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
+	r300->hw.zbias_cntl.cmd[1] = 0x00000000;
+
+	r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor,
+			  ctx->Polygon.OffsetUnits);
+	r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint);
+	r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine);
+	r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
+
+	r300->hw.su_depth_scale.cmd[1] = 0x4B7FFFFF;
+	r300->hw.su_depth_scale.cmd[2] = 0x00000000;
+
+	r300->hw.sc_hyperz.cmd[1] = 0x0000001C;
+	r300->hw.sc_hyperz.cmd[2] = 0x2DA49525;
+
+	r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF;
+
+	r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8  |
+	  R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+	r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED |
+	  R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+	r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED |
+	  R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+	r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED |
+	  R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+	r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W0 | R300_W_SRC_US;
+
+	/* disable fog unit */
+	r300->hw.fogs.cmd[R300_FOGS_STATE] = 0;
+	r300->hw.fg_depth_src.cmd[1] = R300_FG_DEPTH_SRC_SCAN;
+
+	r300->hw.rb3d_cctl.cmd[1] = 0;
+
+	r300BlendColor(ctx, ctx->Color.BlendColor);
+
+	r300->hw.rb3d_dither_ctl.cmd[1] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[2] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[3] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[4] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[5] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[6] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[7] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[8] = 0;
+	r300->hw.rb3d_dither_ctl.cmd[9] = 0;
+
+	r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0;
+
+    r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+    r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
+
+	r300->hw.zb_depthclearvalue.cmd[1] = 0;
+
+	r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE;
+	r300->hw.zstencil_format.cmd[3] = 0x00000003;
+	r300->hw.zstencil_format.cmd[4] = 0x00000000;
+	r300SetEarlyZState(ctx);
+
+	r300->hw.zb_zmask.cmd[1] = 0;
+	r300->hw.zb_zmask.cmd[2] = 0;
+
+	r300->hw.zb_hiz_offset.cmd[1] = 0;
+
+	r300->hw.zb_hiz_pitch.cmd[1] = 0;
+
+	r300VapCntl(r300, 0, 0, 0);
+	if (has_tcl) {
+		r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0;
+		r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0;
+		r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
+		r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
+	}
+
+	r300->radeon.hw.all_dirty = GL_TRUE;
+}
+
+void r300UpdateShaders(r300ContextPtr rmesa)
+{
+	GLcontext *ctx = rmesa->radeon.glCtx;
+
+	/* should only happenen once, just after context is created */
+	/* TODO: shouldn't we fallback to sw here? */
+	if (!ctx->FragmentProgram._Current) {
+		fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+		return;
+	}
+
+	{
+		struct r300_fragment_program *fp;
+
+		fp = r300SelectAndTranslateFragmentShader(ctx);
+
+		r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
+	}
+
+	if (rmesa->options.hw_tcl_enabled) {
+		struct r300_vertex_program *vp;
+
+		vp = r300SelectAndTranslateVertexShader(ctx);
+
+		r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
+	}
+
+	r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+	rmesa->radeon.NewGLState = 0;
+}
+
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer)
+{
+	static const GLfloat dummy[4] = { 0, 0, 0, 0 };
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index];
+
+	switch(rcc->Type) {
+	case RC_CONSTANT_EXTERNAL:
+		return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External];
+	case RC_CONSTANT_IMMEDIATE:
+		return rcc->u.Immediate;
+	case RC_CONSTANT_STATE:
+		switch(rcc->u.State[0]) {
+		case RC_STATE_SHADOW_AMBIENT: {
+			const int unit = (int) rcc->u.State[1];
+			const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+			if (texObj) {
+				buffer[0] =
+				buffer[1] =
+				buffer[2] =
+				buffer[3] = texObj->CompareFailValue;
+			}
+			return buffer;
+		}
+
+		case RC_STATE_R300_WINDOW_DIMENSION: {
+			__DRIdrawable * drawable = radeon_get_drawable(&rmesa->radeon);
+			buffer[0] = drawable->w * 0.5f;	/* width*0.5 */
+			buffer[1] = drawable->h * 0.5f;	/* height*0.5 */
+			buffer[2] = 0.5F;	/* for moving range [-1 1] -> [0 1] */
+			buffer[3] = 1.0F;	/* not used */
+			return buffer;
+		}
+
+		case RC_STATE_R300_TEXRECT_FACTOR: {
+			struct gl_texture_object *t =
+				ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX];
+
+			if (t && t->Image[0][t->BaseLevel]) {
+				struct gl_texture_image *image =
+					t->Image[0][t->BaseLevel];
+				buffer[0] = 1.0 / image->Width2;
+				buffer[1] = 1.0 / image->Height2;
+			} else {
+				buffer[0] = 1.0;
+				buffer[1] = 1.0;
+			}
+			buffer[2] = 1.0;
+			buffer[3] = 1.0;
+			return buffer;
+		}
+		}
+	}
+
+	return dummy;
+}
+
+
+static void r300SetupPixelShader(GLcontext *ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	struct r300_fragment_program *fp = rmesa->selected_fp;
+	struct r300_fragment_program_code *code;
+	int i;
+
+	code = &fp->code.code.r300;
+
+	R300_STATECHANGE(rmesa, fpi[0]);
+	R300_STATECHANGE(rmesa, fpi[1]);
+	R300_STATECHANGE(rmesa, fpi[2]);
+	R300_STATECHANGE(rmesa, fpi[3]);
+	rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
+	rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
+	rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
+	rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+	for (i = 0; i < code->alu.length; i++) {
+		rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst;
+		rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr;
+		rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst;
+		rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr;
+	}
+
+	R300_STATECHANGE(rmesa, fp);
+	rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config;
+	rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize;
+	rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset;
+	for (i = 0; i < 4; i++)
+		rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i];
+
+	R300_STATECHANGE(rmesa, fpp);
+	rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
+	for (i = 0; i < fp->code.constants.Count; i++) {
+		GLfloat buffer[4];
+		const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
+		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
+		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
+		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
+		rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]);
+	}
+}
+
+#define bump_r500fp_count(ptr, new_count)   do{\
+	drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+	int _nc=(new_count)/6; \
+	assert(_nc < 256); \
+	if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+#define bump_r500fp_const_count(ptr, new_count)   do{\
+	drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+	int _nc=(new_count)/4; \
+	assert(_nc < 256); \
+	if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+static void r500SetupPixelShader(GLcontext *ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	struct r300_fragment_program *fp = rmesa->selected_fp;
+	int i;
+	struct r500_fragment_program_code *code;
+
+	((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
+	((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
+
+	code = &fp->code.code.r500;
+
+	R300_STATECHANGE(rmesa, fp);
+	rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx;
+
+	rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] =
+	    R500_US_CODE_START_ADDR(0) |
+	    R500_US_CODE_END_ADDR(code->inst_end);
+	rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] =
+	    R500_US_CODE_RANGE_ADDR(0) |
+	    R500_US_CODE_RANGE_SIZE(code->inst_end);
+	rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] =
+	    R500_US_CODE_OFFSET_ADDR(0);
+
+	R300_STATECHANGE(rmesa, r500fp);
+	/* Emit our shader... */
+	for (i = 0; i < code->inst_end+1; i++) {
+		rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0;
+		rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1;
+		rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2;
+		rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3;
+		rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4;
+		rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5;
+	}
+
+	bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6);
+
+	R300_STATECHANGE(rmesa, r500fp_const);
+	for (i = 0; i < fp->code.constants.Count; i++) {
+		GLfloat buffer[4];
+		const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
+		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
+		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
+		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
+		rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);
+	}
+	bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4);
+}
+
+void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
+{
+	r300ContextPtr rmesa = R300_CONTEXT( ctx );
+	struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+	int i, j, reg_count;
+	uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1];
+	uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1];
+
+	for (i = 0; i < R300_VIR_CMDSIZE-1; ++i)
+		vir0[i] = vir1[i] = 0;
+
+	for (i = 0, j = 0; i < rmesa->vbuf.num_attribs; ++i) {
+		int tmp;
+
+		tmp = attrs[i].data_type | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT);
+		if (attrs[i]._signed)
+			tmp |= R300_SIGNED;
+		if (attrs[i].normalize)
+			tmp |= R300_NORMALIZE;
+
+		if (i % 2 == 0) {
+			vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT;
+			vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT);
+		} else {
+			vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT;
+			vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
+			++j;
+		}
+	}
+
+	reg_count = (rmesa->vbuf.num_attribs + 1) >> 1;
+	if (rmesa->vbuf.num_attribs % 2 != 0) {
+		vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
+	} else {
+		vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
+	}
+
+	R300_STATECHANGE(rmesa, vir[0]);
+	R300_STATECHANGE(rmesa, vir[1]);
+	R300_STATECHANGE(rmesa, vof);
+	R300_STATECHANGE(rmesa, vic);
+
+	if (rmesa->radeon.radeonScreen->kernel_mm) {
+		rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
+		rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
+		rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16;
+		rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16;
+	} else {
+		((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count;
+		((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count;
+	}
+
+	rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+	rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+	rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+	rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
+}
+
+void r300UpdateShaderStates(r300ContextPtr rmesa)
+{
+	GLcontext *ctx;
+	ctx = rmesa->radeon.glCtx;
+
+	/* should only happenen once, just after context is created */
+	if (!ctx->FragmentProgram._Current)
+		return;
+
+	r300SetEarlyZState(ctx);
+
+	r300SetupTextures(ctx);
+
+	rmesa->vtbl.SetupPixelShader(ctx);
+
+	rmesa->vtbl.SetupRSUnit(ctx);
+
+	if (rmesa->options.hw_tcl_enabled) {
+		r300SetupVertexProgram(rmesa);
+	}
+}
+
+#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \
+	(R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \
+	 R500_C2_SEL_##c2 | R500_C3_SEL_##c3)
+static void r300SetupUsOutputFormat(GLcontext *ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	uint32_t hw_format;
+	struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon);
+
+	if (!rrb) {
+		return;
+	}
+	
+	switch (rrb->base.Format)
+	{
+		case MESA_FORMAT_RGBA5551:
+		case MESA_FORMAT_RGBA8888:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R);
+			break;
+		case MESA_FORMAT_RGB565_REV:
+		case MESA_FORMAT_RGBA8888_REV:
+			hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A);
+			break;
+		case MESA_FORMAT_RGB565:
+		case MESA_FORMAT_ARGB4444:
+		case MESA_FORMAT_ARGB1555:
+		case MESA_FORMAT_XRGB8888:
+		case MESA_FORMAT_ARGB8888:
+			hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A);
+			break;
+		case MESA_FORMAT_ARGB4444_REV:
+		case MESA_FORMAT_ARGB1555_REV:
+		case MESA_FORMAT_XRGB8888_REV:
+		case MESA_FORMAT_ARGB8888_REV:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B);
+			break;
+		case MESA_FORMAT_SRGBA8:
+			hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R);
+			break;
+		case MESA_FORMAT_SARGB8:
+			hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A);
+			break;
+		case MESA_FORMAT_SL8:
+			hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A);
+			break;
+		case MESA_FORMAT_A8:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A);
+			break;
+		case MESA_FORMAT_L8:
+		case MESA_FORMAT_I8:
+			hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A);
+			break;
+		default:
+			assert(!"Unsupported format");
+			break;
+	}
+
+	R300_STATECHANGE(rmesa, us_out_fmt);
+	rmesa->hw.us_out_fmt.cmd[1] = hw_format;
+}
+#undef EASY_US_OUT_FMT
+
+/**
+ * Called by Mesa after an internal state update.
+ */
+static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+	_swrast_InvalidateState(ctx, new_state);
+	_swsetup_InvalidateState(ctx, new_state);
+	_vbo_InvalidateState(ctx, new_state);
+	_tnl_InvalidateState(ctx, new_state);
+
+	if (new_state & _NEW_BUFFERS) {
+		_mesa_update_framebuffer(ctx);
+		/* this updates the DrawBuffer's Width/Height if it's a FBO */
+		_mesa_update_draw_buffer_bounds(ctx);
+
+		R300_STATECHANGE(r300, cb);
+		R300_STATECHANGE(r300, zb);
+	}
+
+	if (new_state & (_NEW_LIGHT)) {
+		R300_STATECHANGE(r300, shade2);
+		if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+			r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+		else
+			r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+	}
+
+	if (new_state & _NEW_BUFFERS) {
+		r300SetupUsOutputFormat(ctx);
+	}
+
+	r300->radeon.NewGLState |= new_state;
+}
+
+/**
+ * Calculate initial hardware state and register state functions.
+ * Assumes that the command buffer and state atoms have been
+ * initialized already.
+ */
+void r300InitState(r300ContextPtr r300)
+{
+	r300ResetHwState(r300);
+}
+
+static void r300RenderMode(GLcontext * ctx, GLenum mode)
+{
+	r300SwitchFallback(ctx, R300_FALLBACK_RENDER_MODE, ctx->RenderMode != GL_RENDER);
+}
+
+/**
+ * Initialize driver's state callback functions
+ */
+void r300InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+
+	functions->UpdateState = r300InvalidateState;
+	functions->AlphaFunc = r300AlphaFunc;
+	functions->BlendColor = r300BlendColor;
+	functions->BlendEquationSeparate = r300BlendEquationSeparate;
+	functions->BlendFuncSeparate = r300BlendFuncSeparate;
+	functions->Enable = r300Enable;
+	functions->ColorMask = r300ColorMask;
+	functions->DepthFunc = r300DepthFunc;
+	functions->DepthMask = r300DepthMask;
+	functions->CullFace = r300CullFace;
+	functions->FrontFace = r300FrontFace;
+	functions->ShadeModel = r300ShadeModel;
+	functions->LogicOpcode = r300LogicOpcode;
+
+	/* ARB_point_parameters */
+	functions->PointParameterfv = r300PointParameter;
+
+	/* Stencil related */
+	functions->StencilFuncSeparate = r300StencilFuncSeparate;
+	functions->StencilMaskSeparate = r300StencilMaskSeparate;
+	functions->StencilOpSeparate = r300StencilOpSeparate;
+
+	/* Viewport related */
+	functions->Viewport = r300Viewport;
+	functions->DepthRange = r300DepthRange;
+	functions->PointSize = r300PointSize;
+	functions->LineWidth = r300LineWidth;
+
+	functions->PolygonOffset = r300PolygonOffset;
+	functions->PolygonMode = r300PolygonMode;
+
+	functions->RenderMode = r300RenderMode;
+
+	functions->ClipPlane = r300ClipPlane;
+	functions->Scissor = radeonScissor;
+
+	functions->DrawBuffer = radeonDrawBuffer;
+	functions->ReadBuffer = radeonReadBuffer;
+
+	functions->CopyPixels = _mesa_meta_CopyPixels;
+	functions->DrawPixels = _mesa_meta_DrawPixels;
+	if (radeon->radeonScreen->kernel_mm)
+		functions->ReadPixels = radeonReadPixels;
+}
+
+void r300InitShaderFunctions(r300ContextPtr r300)
+{
+	if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+		r300->vtbl.SetupRSUnit = r500SetupRSUnit;
+		r300->vtbl.SetupPixelShader = r500SetupPixelShader;
+		r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures;
+	} else {
+		r300->vtbl.SetupRSUnit = r300SetupRSUnit;
+		r300->vtbl.SetupPixelShader = r300SetupPixelShader;
+		r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures;
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
new file mode 100644
index 0000000000..e70f84f4e4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_state.h
@@ -0,0 +1,62 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_STATE_H__
+#define __R300_STATE_H__
+
+#include "r300_context.h"
+
+#define R300_NEWPRIM( rmesa )			\
+  do {						\
+  if ( rmesa->radeon.dma.flush )			\
+    rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
+  } while (0)
+
+#define R300_STATECHANGE(r300, atom) \
+	do {						\
+	  R300_NEWPRIM(r300);				\
+		r300->hw.atom.dirty = GL_TRUE;		\
+		r300->radeon.hw.is_dirty = GL_TRUE;		\
+	} while(0)
+
+void r300UpdateViewportOffset (GLcontext * ctx);
+void r300UpdateDrawBuffer (GLcontext * ctx);
+void r300UpdateShaders (r300ContextPtr rmesa);
+void r300UpdateShaderStates (r300ContextPtr rmesa);
+void r300InitState (r300ContextPtr r300);
+void r300InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions);
+void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count);
+void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten);
+
+#endif				/* __R300_STATE_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
new file mode 100644
index 0000000000..4dcc7cb022
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -0,0 +1,683 @@
+/**************************************************************************
+
+Copyright (C) 2007 Dave Airlie
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Dave Airlie <airlied@linux.ie>
+ *   Maciej Cencora <m.cencora@gmail.com>
+ */
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "r300_state.h"
+#include "r300_swtcl.h"
+#include "r300_emit.h"
+#include "r300_tex.h"
+#include "r300_render.h"
+#include "main/simple_list.h"
+
+#define EMIT_ATTR( ATTR, STYLE )					\
+do {									\
+	rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
+	rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
+	rmesa->radeon.swtcl.vertex_attr_count++;					\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
+} while (0)
+
+#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \
+do { \
+	attrs[num_attrs].element = (_attr); \
+	attrs[num_attrs].data_type = (_format); \
+	attrs[num_attrs].dst_loc = (_dst_loc); \
+	attrs[num_attrs].swizzle = (_swizzle); \
+	attrs[num_attrs].write_mask = (_write_mask); \
+	attrs[num_attrs]._signed = 0; \
+	attrs[num_attrs].normalize = (_normalize); \
+	++num_attrs; \
+} while (0)
+
+void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead,  GLuint *_OutputsWritten)
+{
+	r300ContextPtr rmesa = R300_CONTEXT( ctx );
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+	struct vertex_buffer *VB = &tnl->vb;
+	int first_free_tex = 0;
+	GLuint InputsRead = 0;
+	GLuint OutputsWritten = 0;
+	int num_attrs = 0;
+	GLuint fp_reads = rmesa->selected_fp->InputsRead;
+	struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+
+	radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+	rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
+	rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s\n", __func__);
+
+	/* We always want non Ndc coords format */
+	VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+
+	/* Always write position vector */
+	InputsRead |= 1 << VERT_ATTRIB_POS;
+	OutputsWritten |= 1 << VERT_RESULT_HPOS;
+	EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
+	ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0);
+	rmesa->swtcl.coloroffset = 4;
+
+	if (fp_reads & FRAG_BIT_COL0) {
+		InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+		OutputsWritten |= 1 << VERT_RESULT_COL0;
+#if MESA_LITTLE_ENDIAN
+		EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA );
+		ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+		EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR );
+		ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
+	}
+
+	if (fp_reads & FRAG_BIT_COL1) {
+		GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+		InputsRead |= 1 << VERT_ATTRIB_COLOR1;
+		OutputsWritten |= 1 << VERT_RESULT_COL1;
+#if MESA_LITTLE_ENDIAN
+		EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA );
+		ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#else
+		EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR );
+		ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#endif
+		rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1;
+	}
+
+	if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+		VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->BackfaceColorPtr;
+		OutputsWritten |= 1 << VERT_RESULT_BFC0;
+#if MESA_LITTLE_ENDIAN
+		EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA );
+		ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+		EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR );
+		ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
+		if (fp_reads & FRAG_BIT_COL1) {
+			VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->BackfaceSecondaryColorPtr;
+			GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+			OutputsWritten |= 1 << VERT_RESULT_BFC1;
+#if MESA_LITTLE_ENDIAN
+			EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA );
+			ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#else
+			EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR );
+			ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#endif
+		}
+	}
+
+	if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) {
+		GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
+		InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE;
+		OutputsWritten |= 1 << VERT_RESULT_PSIZ;
+		EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
+		ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
+	}
+
+	if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+		int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+		VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+		VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+		RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+	}
+
+	if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+		int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
+
+		VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+		VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+		RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+	}
+
+	/**
+	 *  Sending only one texcoord component may lead to lock up,
+	 *  so for all textures always output 4 texcoord components to RS.
+	 */
+	{
+		int i;
+		GLuint swiz, format, hw_format;
+		for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+			if (fp_reads & FRAG_BIT_TEX(i)) {
+				switch (VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size) {
+					case 1:
+						format = EMIT_1F;
+						hw_format = R300_DATA_TYPE_FLOAT_1;
+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+						break;
+					case 2:
+						format = EMIT_2F;
+						hw_format = R300_DATA_TYPE_FLOAT_2;
+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+						break;
+					case 3:
+						format = EMIT_3F;
+						hw_format = R300_DATA_TYPE_FLOAT_3;
+						swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+						break;
+					case 4:
+						format = EMIT_4F;
+						hw_format = R300_DATA_TYPE_FLOAT_4;
+						swiz = SWIZZLE_XYZW;
+						break;
+					default:
+						continue;
+				}
+				InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+				OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+				EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
+				ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+				++first_free_tex;
+			}
+		}
+	}
+
+	if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+		fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+		exit(-1);
+	}
+
+	R300_NEWPRIM(rmesa);
+	rmesa->vbuf.num_attribs = num_attrs;
+	*_InputsRead = InputsRead;
+	*_OutputsWritten = OutputsWritten;
+
+	RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset);
+}
+
+static void r300PrepareVertices(GLcontext *ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	GLuint InputsRead, OutputsWritten;
+	radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+
+	r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten);
+	r300SetupVAP(ctx, InputsRead, OutputsWritten);
+
+	rmesa->radeon.swtcl.vertex_size =
+		_tnl_install_attrs( ctx,
+				    rmesa->radeon.swtcl.vertex_attrs,
+				    rmesa->radeon.swtcl.vertex_attr_count,
+				    NULL, 0 );
+
+	rmesa->radeon.swtcl.vertex_size /= 4;
+}
+
+static void r300_predict_emit_size( r300ContextPtr rmesa )
+{
+	if (!rmesa->radeon.swtcl.emit_prediction) {
+		const int vertex_size = 7;
+		const int prim_size = 3;
+		const int cache_flush_size = 4;
+		const int pre_emit_state = 4;
+		const int scissor_size = 3;
+		const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+
+		if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+					state_size + pre_emit_state + scissor_size
+					+ vertex_size + prim_size + cache_flush_size * 2,
+					__FUNCTION__))
+			rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+		else
+			rmesa->radeon.swtcl.emit_prediction = state_size;
+
+		rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw
+			+ vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state;
+		radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+				"%s, size %d\n",
+				__func__, rmesa->radeon.cmdbuf.cs->cdw
+				+ vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state);
+	}
+}
+
+
+static GLuint reduced_prim[] = {
+	GL_POINTS,
+	GL_LINES,
+	GL_LINES,
+	GL_LINES,
+	GL_TRIANGLES,
+	GL_TRIANGLES,
+	GL_TRIANGLES,
+	GL_TRIANGLES,
+	GL_TRIANGLES,
+	GL_TRIANGLES,
+};
+
+static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size)
+{
+	void *rv;
+	do {
+		r300_predict_emit_size( rmesa );
+		rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 );
+	} while (!rv);
+	return rv;
+}
+
+#undef LOCAL_VARS
+#undef ALLOC_VERTS
+#define CTX_ARG r300ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size);
+#define LOCAL_VARS						\
+   r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
+#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
+#define VERTEX r300Vertex
+#undef TAG
+#define TAG(x) r300_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     r300_triangle( rmesa, a, b, c )
+#define LINE( a, b )       r300_line( rmesa, a, b )
+#define POINT( a )         r300_point( rmesa, a )
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define R300_UNFILLED_BIT	0x01
+#define R300_MAX_TRIFUNC	0x02
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[R300_MAX_TRIFUNC];
+
+#define DO_FALLBACK  0
+#define DO_UNFILLED (IND & R300_UNFILLED_BIT)
+#define DO_TWOSIDE   0
+#define DO_FLAT      0
+#define DO_OFFSET    0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+
+#define VERT_SET_RGBA( v, c ) \
+do { \
+   r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v0, c ) \
+do { \
+   if (specoffset) { \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \
+   UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \
+   } \
+} while (0)
+
+#define VERT_COPY_SPEC( v0, v1 ) \
+do { \
+   if (specoffset) { \
+       v0->v.specular.red = v1->v.specular.red; \
+       v0->v.specular.green = v1->v.specular.green; \
+       v0->v.specular.blue = v1->v.specular.blue; \
+   } \
+} while (0)
+
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+
+#define LOCAL_VARS(n)							\
+   r300ContextPtr rmesa = R300_CONTEXT(ctx);			\
+   GLuint color[n] = { 0, }, spec[n] = { 0, };				\
+   GLuint coloroffset = rmesa->swtcl.coloroffset;	\
+   GLuint specoffset = rmesa->swtcl.specoffset;			\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R300_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_unfilled();
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      r300_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   r300_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   r300RenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   r300ContextPtr rmesa = R300_CONTEXT(ctx);		\
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
+   const char *r300verts = (char *)rmesa->radeon.swtcl.verts;		\
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE	//if ( stipple ) r200ResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) r300_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) r300_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+static void r300ChooseRenderState( GLcontext *ctx )
+{
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	GLuint index = 0;
+	GLuint flags = ctx->_TriangleCaps;
+	radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+
+	if (flags & DD_TRI_UNFILLED)      index |= R300_UNFILLED_BIT;
+
+	if (index != rmesa->radeon.swtcl.RenderIndex) {
+		tnl->Driver.Render.Points = rast_tab[index].points;
+		tnl->Driver.Render.Line = rast_tab[index].line;
+		tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+		tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+		tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+		if (index == 0) {
+			tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
+			tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
+			tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
+		} else {
+			tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+			tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+			tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+		}
+
+		rmesa->radeon.swtcl.RenderIndex = index;
+	}
+}
+
+void r300RenderStart(GLcontext *ctx)
+{
+	radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+	r300ContextPtr rmesa = R300_CONTEXT( ctx );
+
+	r300ChooseRenderState(ctx);
+
+	r300UpdateShaders(rmesa);
+
+	r300PrepareVertices(ctx);
+
+	r300ValidateBuffers(ctx);
+
+	r300UpdateShaderStates(rmesa);
+
+
+	/* investigate if we can put back flush optimisation if needed */
+	if (rmesa->radeon.dma.flush != NULL) {
+		rmesa->radeon.dma.flush(ctx);
+	}
+}
+
+void r300RenderFinish(GLcontext *ctx)
+{
+}
+
+static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+
+	if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+		R300_NEWPRIM( rmesa );
+		rmesa->radeon.swtcl.hw_primitive = hwprim;
+	}
+}
+
+void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+{
+
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	rmesa->radeon.swtcl.render_primitive = prim;
+	radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+
+	if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+		return;
+
+	r300RasterPrimitive( ctx, reduced_prim[prim] );
+}
+
+void r300ResetLineStipple(GLcontext *ctx)
+{
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s\n", __func__);
+}
+
+void r300InitSwtcl(GLcontext *ctx)
+{
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	static int firsttime = 1;
+	radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__);
+
+	if (firsttime) {
+		init_rast_tab();
+		firsttime = 0;
+	}
+	rmesa->radeon.swtcl.emit_prediction = 0;
+
+	tnl->Driver.Render.Start = r300RenderStart;
+	tnl->Driver.Render.Finish = r300RenderFinish;
+	tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+	tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
+	tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	tnl->Driver.Render.Interp = _tnl_interp;
+
+	/* FIXME: what are these numbers? */
+	_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+			    48 * sizeof(GLfloat) );
+
+	rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+	rmesa->radeon.swtcl.RenderIndex = ~0;
+	rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+	rmesa->radeon.swtcl.hw_primitive = 0;
+
+	_tnl_invalidate_vertex_state( ctx, ~0 );
+	_tnl_invalidate_vertices( ctx, ~0 );
+
+	_tnl_need_projected_coords( ctx, GL_FALSE );
+}
+
+void r300DestroySwtcl(GLcontext *ctx)
+{
+}
+
+static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+
+	radeon_print(RADEON_SWRENDER, RADEON_TRACE,
+		"%s:  vertex_size %d, offset 0x%x \n",
+			__FUNCTION__, vertex_size, offset);
+
+	BEGIN_BATCH(7);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
+	OUT_BATCH(1);
+	OUT_BATCH(vertex_size | (vertex_size << 8));
+	OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+	END_BATCH();
+}
+
+static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+{
+	BATCH_LOCALS(&rmesa->radeon);
+	int type, num_verts;
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s\n", __func__);
+
+	type = r300PrimitiveType(rmesa, primitive);
+	num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
+
+	BEGIN_BATCH(3);
+	OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+	OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+	END_BATCH();
+}
+
+void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+{
+	radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+	r300EmitCacheFlush(rmesa);
+
+	radeonEmitState(&rmesa->radeon);
+	r300_emit_scissor(ctx);
+	r300EmitVertexAOS(rmesa,
+			  rmesa->radeon.swtcl.vertex_size,
+			  rmesa->radeon.swtcl.bo,
+			  current_offset);
+
+	r300EmitVbufPrim(rmesa,
+		   rmesa->radeon.swtcl.hw_primitive,
+		   rmesa->radeon.swtcl.numverts);
+	r300EmitCacheFlush(rmesa);
+	if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+		WARN_ONCE("Rendering was %d commands larger than predicted size."
+			" We might overflow  command buffer.\n",
+			rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+	rmesa->radeon.swtcl.emit_prediction = 0;
+	COMMIT_BATCH();
+}
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h
new file mode 100644
index 0000000000..c271d26546
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.h
@@ -0,0 +1,65 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com> - original r200 code
+ *   Dave Airlie <airlied@linux.ie>
+ */
+
+#ifndef __R300_SWTCL_H__
+#define __R300_SWTCL_H__
+
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "r300_context.h"
+
+/*
+ * Here are definitions of OVM locations of vertex attributes for non TCL hw
+ */
+#define SWTCL_OVM_POS 0
+#define SWTCL_OVM_COLOR0 2
+#define SWTCL_OVM_COLOR1 3
+#define SWTCL_OVM_COLOR2 4
+#define SWTCL_OVM_COLOR3 5
+#define SWTCL_OVM_TEX(n) ((n) + 6)
+#define SWTCL_OVM_POINT_SIZE 15
+
+extern void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *InputsRead,  GLuint *OutputsWritten);
+
+extern void r300InitSwtcl( GLcontext *ctx );
+extern void r300DestroySwtcl( GLcontext *ctx );
+
+extern void r300RenderStart(GLcontext *ctx);
+extern void r300RenderFinish(GLcontext *ctx);
+extern void r300RenderPrimitive(GLcontext *ctx, GLenum prim);
+extern void r300ResetLineStipple(GLcontext *ctx);
+
+extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
new file mode 100644
index 0000000000..baef206bc2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -0,0 +1,386 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mipmap.h"
+#include "main/simple_list.h"
+#include "main/texstore.h"
+#include "main/texobj.h"
+
+#include "texmem.h"
+
+#include "r300_context.h"
+#include "radeon_mipmap_tree.h"
+#include "r300_tex.h"
+
+
+static unsigned int translate_wrap_mode(GLenum wrapmode)
+{
+	switch(wrapmode) {
+	case GL_REPEAT: return R300_TX_REPEAT;
+	case GL_CLAMP: return R300_TX_CLAMP;
+	case GL_CLAMP_TO_EDGE: return R300_TX_CLAMP_TO_EDGE;
+	case GL_CLAMP_TO_BORDER: return R300_TX_CLAMP_TO_BORDER;
+	case GL_MIRRORED_REPEAT: return R300_TX_REPEAT | R300_TX_MIRRORED;
+	case GL_MIRROR_CLAMP_EXT: return R300_TX_CLAMP | R300_TX_MIRRORED;
+	case GL_MIRROR_CLAMP_TO_EDGE_EXT: return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+	case GL_MIRROR_CLAMP_TO_BORDER_EXT: return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
+	default:
+		_mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__);
+		return 0;
+	}
+}
+
+
+/**
+ * Update the cached hardware registers based on the current texture wrap modes.
+ *
+ * \param t Texture object whose wrap modes are to be set
+ */
+static void r300UpdateTexWrap(radeonTexObjPtr t)
+{
+	struct gl_texture_object *tObj = &t->base;
+
+	t->pp_txfilter &=
+	    ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
+
+	t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
+
+	if (tObj->Target != GL_TEXTURE_1D) {
+		t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
+
+		if (tObj->Target == GL_TEXTURE_3D)
+			t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
+	}
+}
+
+static GLuint aniso_filter(GLfloat anisotropy)
+{
+	if (anisotropy >= 16.0) {
+		return R300_TX_MAX_ANISO_16_TO_1;
+	} else if (anisotropy >= 8.0) {
+		return R300_TX_MAX_ANISO_8_TO_1;
+	} else if (anisotropy >= 4.0) {
+		return R300_TX_MAX_ANISO_4_TO_1;
+	} else if (anisotropy >= 2.0) {
+		return R300_TX_MAX_ANISO_2_TO_1;
+	} else {
+		return R300_TX_MAX_ANISO_1_TO_1;
+	}
+}
+
+/**
+ * Set the texture magnification and minification modes.
+ *
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ * \param anisotropy Maximum anisotropy level
+ */
+static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+{
+	/* Force revalidation to account for switches from/to mipmapping. */
+	t->validated = GL_FALSE;
+
+	t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
+	t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
+
+	/* Note that EXT_texture_filter_anisotropic is extremely vague about
+	 * how anisotropic filtering interacts with the "normal" filter modes.
+	 * When anisotropic filtering is enabled, we override min and mag
+	 * filter settings completely. This includes driconf's settings.
+	 */
+	if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+		t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
+			| R300_TX_MIN_FILTER_ANISO
+			| R300_TX_MIN_FILTER_MIP_LINEAR
+			| aniso_filter(anisotropy);
+		if (RADEON_DEBUG & RADEON_TEXTURE)
+			fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy);
+		return;
+	}
+
+	switch (minf) {
+	case GL_NEAREST:
+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
+		break;
+	case GL_LINEAR:
+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
+		break;
+	case GL_NEAREST_MIPMAP_NEAREST:
+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
+		break;
+	case GL_NEAREST_MIPMAP_LINEAR:
+		t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
+		break;
+	case GL_LINEAR_MIPMAP_NEAREST:
+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
+		break;
+	case GL_LINEAR_MIPMAP_LINEAR:
+		t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
+		break;
+	}
+
+	/* Note we don't have 3D mipmaps so only use the mag filter setting
+	 * to set the 3D texture filter mode.
+	 */
+	switch (magf) {
+	case GL_NEAREST:
+		t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
+		break;
+	case GL_LINEAR:
+		t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
+		break;
+	}
+}
+
+static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
+{
+	GLubyte c[4];
+	CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+	CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+	CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+	CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+	t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
+}
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void r300TexParameter(GLcontext * ctx, GLenum target,
+			     struct gl_texture_object *texObj,
+			     GLenum pname, const GLfloat * params)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	GLenum texBaseFormat;
+
+	if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+		fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+			_mesa_lookup_enum_by_nr(pname));
+	}
+
+	switch (pname) {
+	case GL_TEXTURE_MIN_FILTER:
+	case GL_TEXTURE_MAG_FILTER:
+	case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+		r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+		break;
+
+	case GL_TEXTURE_WRAP_S:
+	case GL_TEXTURE_WRAP_T:
+	case GL_TEXTURE_WRAP_R:
+		r300UpdateTexWrap(t);
+		break;
+
+	case GL_TEXTURE_BORDER_COLOR:
+		r300SetTexBorderColor(t, texObj->BorderColor.f);
+		break;
+
+	case GL_TEXTURE_BASE_LEVEL:
+	case GL_TEXTURE_MAX_LEVEL:
+	case GL_TEXTURE_MIN_LOD:
+	case GL_TEXTURE_MAX_LOD:
+		t->validated = GL_FALSE;
+		break;
+
+	case GL_DEPTH_TEXTURE_MODE:
+		if (!texObj->Image[0][texObj->BaseLevel])
+			return;
+		texBaseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+
+		if (texBaseFormat == GL_DEPTH_COMPONENT ||
+			texBaseFormat == GL_DEPTH_STENCIL) {
+			r300SetDepthTexMode(texObj);
+			break;
+		} else {
+			/* If the texture isn't a depth texture, changing this
+			 * state won't cause any changes to the hardware.
+			 * Don't force a flush of texture state.
+			 */
+			return;
+		}
+
+	default:
+		return;
+	}
+}
+
+static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	radeonTexObj* t = radeon_tex_obj(texObj);
+
+	if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+			(void *)texObj,
+			_mesa_lookup_enum_by_nr(texObj->Target));
+	}
+
+	if (rmesa) {
+		int i;
+		struct radeon_bo *bo;
+		bo = !t->mt ? t->bo : t->mt->bo;
+		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->radeon.cmdbuf.cs)) {
+			radeon_firevertices(&rmesa->radeon);
+		}
+
+		for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
+			if (rmesa->hw.textures[i] == t)
+				rmesa->hw.textures[i] = 0;
+	}
+
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+
+	radeon_miptree_unreference(&t->mt);
+
+	_mesa_delete_texture_object(ctx, texObj);
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+						      GLuint name,
+						      GLenum target)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+
+	if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+		fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+			t, _mesa_lookup_enum_by_nr(target));
+	}
+
+	_mesa_initialize_texture_object(&t->base, name, target);
+	t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+	/* Initialize hardware state */
+	r300UpdateTexWrap(t);
+	r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
+	r300SetTexBorderColor(t, t->base.BorderColor.f);
+
+	return &t->base;
+}
+
+unsigned r300IsFormatRenderable(gl_format mesa_format)
+{
+	switch (mesa_format)
+	{
+		case MESA_FORMAT_RGB565:
+		case MESA_FORMAT_RGBA5551:
+		case MESA_FORMAT_RGBA8888:
+		case MESA_FORMAT_RGB565_REV:
+		case MESA_FORMAT_RGBA8888_REV:
+		case MESA_FORMAT_ARGB4444:
+		case MESA_FORMAT_ARGB1555:
+		case MESA_FORMAT_XRGB8888:
+		case MESA_FORMAT_ARGB8888:
+		case MESA_FORMAT_ARGB4444_REV:
+		case MESA_FORMAT_ARGB1555_REV:
+		case MESA_FORMAT_XRGB8888_REV:
+		case MESA_FORMAT_ARGB8888_REV:
+		case MESA_FORMAT_SRGBA8:
+		case MESA_FORMAT_SARGB8:
+		case MESA_FORMAT_SL8:
+		case MESA_FORMAT_A8:
+		case MESA_FORMAT_L8:
+		case MESA_FORMAT_I8:
+		case MESA_FORMAT_Z16:
+			return 1;
+		default:
+			return 0;
+	}
+}
+
+unsigned r500IsFormatRenderable(gl_format mesa_format)
+{
+	if (mesa_format == MESA_FORMAT_S8_Z24) {
+		return 1;
+	} else {
+		return r300IsFormatRenderable(mesa_format);
+	}
+}
+
+void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+	/* Note: we only plug in the functions we implement in the driver
+	 * since _mesa_init_driver_functions() was already called.
+	 */
+	functions->NewTextureImage = radeonNewTextureImage;
+	functions->FreeTexImageData = radeonFreeTexImageData;
+	functions->MapTexture = radeonMapTexture;
+	functions->UnmapTexture = radeonUnmapTexture;
+
+	functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+	functions->TexImage1D = radeonTexImage1D;
+	functions->TexImage2D = radeonTexImage2D;
+	functions->TexImage3D = radeonTexImage3D;
+	functions->TexSubImage1D = radeonTexSubImage1D;
+	functions->TexSubImage2D = radeonTexSubImage2D;
+	functions->TexSubImage3D = radeonTexSubImage3D;
+	functions->GetTexImage = radeonGetTexImage;
+	functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+	functions->NewTextureObject = r300NewTextureObject;
+	functions->DeleteTexture = r300DeleteTexture;
+	functions->IsTextureResident = driIsTextureResident;
+
+	functions->TexParameter = r300TexParameter;
+
+	functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+
+	if (radeon->radeonScreen->kernel_mm) {
+		functions->CopyTexImage2D = radeonCopyTexImage2D;
+		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+	}
+
+	functions->GenerateMipmap = radeonGenerateMipmap;
+
+	driInitTextureFormats();
+}
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
new file mode 100644
index 0000000000..aca44cd766
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -0,0 +1,59 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __r300_TEX_H__
+#define __r300_TEX_H__
+
+extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
+
+extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+			     __DRIdrawable *dPriv);
+
+extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+			      GLint format, __DRIdrawable *dPriv);
+
+extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+			     unsigned long long offset, GLint depth,
+			     GLuint pitch);
+
+extern GLboolean r300ValidateBuffers(GLcontext * ctx);
+
+extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions);
+
+int32_t r300TranslateTexFormat(gl_format mesaFormat);
+
+unsigned r300IsFormatRenderable(gl_format mesaFormat);
+unsigned r500IsFormatRenderable(gl_format mesaFormat);
+
+#endif				/* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
new file mode 100644
index 0000000000..4ba6740e3d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -0,0 +1,524 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \todo Enable R300 texture tiling code?
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+
+#include "r300_context.h"
+#include "radeon_mipmap_tree.h"
+#include "r300_tex.h"
+#include "r300_reg.h"
+
+/*
+ * Note that the _REV formats are the same as the non-REV formats.  This is
+ * because the REV and non-REV formats are identical as a byte string, but
+ * differ when accessed as 16-bit or 32-bit words depending on the endianness of
+ * the host.  Since the textures are transferred to the R300 as a byte string
+ * (i.e. without any byte-swapping), the R300 sees the REV and non-REV formats
+ * identically.  -- paulus
+ */
+
+int32_t r300TranslateTexFormat(gl_format mesaFormat)
+{
+	switch (mesaFormat)
+	{
+#ifdef MESA_LITTLE_ENDIAN
+		case MESA_FORMAT_RGBA8888:
+			return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8);
+		case MESA_FORMAT_RGBA8888_REV:
+			return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8);
+		case MESA_FORMAT_ARGB8888:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+		case MESA_FORMAT_ARGB8888_REV:
+			return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8);
+#else
+		case MESA_FORMAT_RGBA8888:
+			return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8);
+		case MESA_FORMAT_RGBA8888_REV:
+			return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8);
+		case MESA_FORMAT_ARGB8888:
+			return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8);
+		case MESA_FORMAT_ARGB8888_REV:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+#endif
+		case MESA_FORMAT_XRGB8888:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+		case MESA_FORMAT_RGB888:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+		case MESA_FORMAT_RGB565:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+		case MESA_FORMAT_RGB565_REV:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+		case MESA_FORMAT_ARGB4444:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4);
+		case MESA_FORMAT_ARGB4444_REV:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4);
+		case MESA_FORMAT_ARGB1555:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5);
+		case MESA_FORMAT_ARGB1555_REV:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5);
+		case MESA_FORMAT_AL88:
+			return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
+		case MESA_FORMAT_AL88_REV:
+			return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
+		case MESA_FORMAT_RGB332:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2);
+		case MESA_FORMAT_A8:
+			return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8);
+		case MESA_FORMAT_L8:
+			return R300_EASY_TX_FORMAT(X, X, X, ONE, X8);
+		case MESA_FORMAT_I8:
+			return R300_EASY_TX_FORMAT(X, X, X, X, X8);
+		case MESA_FORMAT_CI8:
+			return R300_EASY_TX_FORMAT(X, X, X, X, X8);
+		case MESA_FORMAT_YCBCR:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE;
+		case MESA_FORMAT_YCBCR_REV:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE;
+		case MESA_FORMAT_RGB_DXT1:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1);
+		case MESA_FORMAT_RGBA_DXT1:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1);
+		case MESA_FORMAT_RGBA_DXT3:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3);
+		case MESA_FORMAT_RGBA_DXT5:
+			return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5);
+		case MESA_FORMAT_RGBA_FLOAT32:
+			return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32);
+		case MESA_FORMAT_RGBA_FLOAT16:
+			return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
+		case MESA_FORMAT_ALPHA_FLOAT32:
+			return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32);
+		case MESA_FORMAT_ALPHA_FLOAT16:
+			return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16);
+		case MESA_FORMAT_LUMINANCE_FLOAT32:
+			return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32);
+		case MESA_FORMAT_LUMINANCE_FLOAT16:
+			return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16);
+		case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+			return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32);
+		case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+			return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16);
+		case MESA_FORMAT_INTENSITY_FLOAT32:
+			return R300_EASY_TX_FORMAT(X, X, X, X, FL_I32);
+		case MESA_FORMAT_INTENSITY_FLOAT16:
+			return R300_EASY_TX_FORMAT(X, X, X, X, FL_I16);
+		case MESA_FORMAT_Z16:
+			return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+		case MESA_FORMAT_Z24_S8:
+			return R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8);
+		case MESA_FORMAT_S8_Z24:
+			return R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8);
+		case MESA_FORMAT_Z32:
+			return R300_EASY_TX_FORMAT(X, X, X, X, X32);
+		/* EXT_texture_sRGB */
+		case MESA_FORMAT_SRGBA8:
+			return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
+		case MESA_FORMAT_SLA8:
+			return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA;
+		case MESA_FORMAT_SL8:
+			return R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA;
+		case MESA_FORMAT_SRGB_DXT1:
+			return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1) | R300_TX_FORMAT_GAMMA;
+		case MESA_FORMAT_SRGBA_DXT1:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1) | R300_TX_FORMAT_GAMMA;
+		case MESA_FORMAT_SRGBA_DXT3:
+			return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3) | R300_TX_FORMAT_GAMMA;
+		case MESA_FORMAT_SRGBA_DXT5:
+			return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5) | R300_TX_FORMAT_GAMMA;
+		default:
+			return -1;
+	}
+};
+
+void r300SetDepthTexMode(struct gl_texture_object *tObj)
+{
+	static const GLuint formats[3][3] = {
+		{
+			R300_EASY_TX_FORMAT(X, X, X, ONE, X16),
+			R300_EASY_TX_FORMAT(X, X, X, X, X16),
+			R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16),
+		},
+		{
+			R300_EASY_TX_FORMAT(Y, Y, Y, ONE, X24_Y8),
+			R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8),
+			R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, Y, X24_Y8),
+		},
+		{
+			R300_EASY_TX_FORMAT(X, X, X, ONE, X32),
+			R300_EASY_TX_FORMAT(X, X, X, X, X32),
+			R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32),
+		},
+	};
+	const GLuint *format;
+	radeonTexObjPtr t;
+
+	if (!tObj)
+		return;
+
+	t = radeon_tex_obj(tObj);
+
+	switch (tObj->Image[0][tObj->BaseLevel]->TexFormat) {
+	case MESA_FORMAT_Z16:
+		format = formats[0];
+		break;
+	case MESA_FORMAT_S8_Z24:
+		format = formats[1];
+		break;
+	case MESA_FORMAT_Z32:
+		format = formats[2];
+		break;
+	default:
+		/* Error...which should have already been caught by higher
+		 * levels of Mesa.
+		 */
+		ASSERT(0);
+		return;
+	}
+
+	switch (tObj->DepthMode) {
+	case GL_LUMINANCE:
+		t->pp_txformat = format[0];
+		break;
+	case GL_INTENSITY:
+		t->pp_txformat = format[1];
+		break;
+	case GL_ALPHA:
+		t->pp_txformat = format[2];
+		break;
+	default:
+		/* Error...which should have already been caught by higher
+		 * levels of Mesa.
+		 */
+		ASSERT(0);
+		return;
+	}
+}
+
+
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
+{
+	const struct gl_texture_image *firstImage;
+	firstImage = t->base.Image[0][t->minLod];
+
+	if (!t->image_override) {
+		if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+			r300SetDepthTexMode(&t->base);
+		} else {
+			int32_t txformat = r300TranslateTexFormat(firstImage->TexFormat);
+			if (txformat < 0) {
+				_mesa_problem(rmesa->radeon.glCtx, "%s: Invalid format %s",
+							  __FUNCTION__, _mesa_get_format_name(firstImage->TexFormat));
+				exit(1);
+			}
+			t->pp_txformat = (uint32_t) txformat;
+		}
+	}
+
+	if (t->image_override && t->bo)
+		return;
+
+	t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+			| ((R300_TX_HEIGHTMASK_MASK & ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)))
+			| ((R300_TX_DEPTHMASK_MASK & ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)))
+			| ((R300_TX_MAX_MIP_LEVEL_MASK & ((t->maxLod - t->minLod) << R300_TX_MAX_MIP_LEVEL_SHIFT))));
+
+	t->tile_bits = 0;
+
+	if (t->base.Target == GL_TEXTURE_CUBE_MAP)
+		t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
+	if (t->base.Target == GL_TEXTURE_3D)
+		t->pp_txformat |= R300_TX_FORMAT_3D;
+
+
+	if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+		unsigned int align = (64 / _mesa_get_format_bytes(firstImage->TexFormat)) - 1;
+		t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
+		if (!t->image_override)
+			t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
+	}
+
+	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+	    if (firstImage->Width > 2048)
+		t->pp_txpitch |= R500_TXWIDTH_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
+	    if (firstImage->Height > 2048)
+		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
+	}
+}
+
+/**
+ * Ensure the given texture is ready for rendering.
+ *
+ * Mostly this means populating the texture object's mipmap tree.
+ */
+static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	radeonTexObj *t = radeon_tex_obj(texObj);
+
+	if (!radeon_validate_texture_miptree(ctx, texObj))
+		return GL_FALSE;
+
+	/* Configure the hardware registers (more precisely, the cached version
+	 * of the hardware registers). */
+	setup_hardware_state(rmesa, t);
+
+	t->validated = GL_TRUE;
+	return GL_TRUE;
+}
+
+/**
+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
+ */
+GLboolean r300ValidateBuffers(GLcontext * ctx)
+{
+	r300ContextPtr rmesa = R300_CONTEXT(ctx);
+	struct radeon_renderbuffer *rrb;
+	int i;
+	int ret;
+
+	radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+	rrb = radeon_get_colorbuffer(&rmesa->radeon);
+	/* color buffer */
+	if (rrb && rrb->bo) {
+		radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+						  rrb->bo, 0,
+						  RADEON_GEM_DOMAIN_VRAM);
+	}
+
+	/* depth buffer */
+	rrb = radeon_get_depthbuffer(&rmesa->radeon);
+	if (rrb && rrb->bo) {
+		radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+						  rrb->bo, 0,
+						  RADEON_GEM_DOMAIN_VRAM);
+	}
+	
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+		radeonTexObj *t;
+
+		if (!ctx->Texture.Unit[i]._ReallyEnabled)
+			continue;
+
+		if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
+			_mesa_warning(ctx,
+				      "failed to validate texture for unit %d.\n",
+				      i);
+		}
+		t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+		if (t->image_override && t->bo)
+			radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+							  t->bo,
+							  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+		else if (t->mt->bo)
+			radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+							  t->mt->bo,
+							  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+	}
+
+	ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+	if (ret)
+		return GL_FALSE;
+	return GL_TRUE;
+}
+
+void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+		      unsigned long long offset, GLint depth, GLuint pitch)
+{
+	r300ContextPtr rmesa = pDRICtx->driverPrivate;
+	struct gl_texture_object *tObj =
+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+	radeonTexObjPtr t = radeon_tex_obj(tObj);
+	uint32_t pitch_val;
+
+	if (!tObj)
+		return;
+
+	t->image_override = GL_TRUE;
+
+	if (!offset)
+		return;
+
+	t->bo = NULL;
+	t->override_offset = offset;
+	t->pp_txpitch &= (1 << 13) -1;
+	pitch_val = pitch;
+
+	switch (depth) {
+	case 32:
+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+		pitch_val /= 4;
+		break;
+	case 24:
+	default:
+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+		pitch_val /= 4;
+		break;
+	case 16:
+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+		pitch_val /= 2;
+		break;
+	}
+	pitch_val--;
+
+	t->pp_txpitch |= pitch_val;
+}
+
+void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format, __DRIdrawable *dPriv)
+{
+	struct gl_texture_unit *texUnit;
+	struct gl_texture_object *texObj;
+	struct gl_texture_image *texImage;
+	struct radeon_renderbuffer *rb;
+	radeon_texture_image *rImage;
+	radeonContextPtr radeon;
+	r300ContextPtr rmesa;
+	struct radeon_framebuffer *rfb;
+	radeonTexObjPtr t;
+	uint32_t pitch_val;
+	uint32_t internalFormat, type, format;
+
+	type = GL_BGRA;
+	format = GL_UNSIGNED_BYTE;
+	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+
+	radeon = pDRICtx->driverPrivate;
+	rmesa = pDRICtx->driverPrivate;
+
+	rfb = dPriv->driverPrivate;
+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+	rImage = get_radeon_texture_image(texImage);
+	t = radeon_tex_obj(texObj);
+        if (t == NULL) {
+    	    return;
+    	}
+
+	radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+	rb = rfb->color_rb[0];
+	if (rb->bo == NULL) {
+		/* Failed to BO for the buffer */
+		return;
+	}
+	
+	_mesa_lock_texture(radeon->glCtx, texObj);
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+	if (rImage->bo) {
+		radeon_bo_unref(rImage->bo);
+		rImage->bo = NULL;
+	}
+
+	radeon_miptree_unreference(&t->mt);
+	radeon_miptree_unreference(&rImage->mt);
+
+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+				   rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+	texImage->RowStride = rb->pitch / rb->cpp;
+	rImage->bo = rb->bo;
+	radeon_bo_ref(rImage->bo);
+	t->bo = rb->bo;
+	radeon_bo_ref(t->bo);
+	t->tile_bits = 0;
+	t->image_override = GL_TRUE;
+	t->override_offset = 0;
+	t->pp_txpitch &= (1 << 13) -1;
+	pitch_val = rb->pitch;
+	switch (rb->cpp) {
+	case 4:
+		if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+			t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+		else
+			t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+		pitch_val /= 4;
+		break;
+	case 3:
+	default:
+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+		pitch_val /= 4;
+		break;
+	case 2:
+		t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+		pitch_val /= 2;
+		break;
+	}
+	pitch_val--;
+	t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+			| ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT))));
+	t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
+	t->pp_txpitch |= pitch_val;
+
+	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+	    if (rb->base.Width > 2048)
+		t->pp_txpitch |= R500_TXWIDTH_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
+	    if (rb->base.Height > 2048)
+		t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+            else
+		t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
+	}
+	t->validated = GL_TRUE;
+	_mesa_unlock_texture(radeon->glCtx, texObj);
+	return;
+}
+
+void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+        r300SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
new file mode 100644
index 0000000000..a1fe378029
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -0,0 +1,414 @@
+/**************************************************************************
+
+Copyright (C) 2005  Aapo Tahkola <aet@rasterburn.org>
+Copyright (C) 2008  Oliver McFadden <z3ro.geek@gmail.com>
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* Radeon R5xx Acceleration, Revision 1.2 */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+#include "tnl/tnl.h"
+
+#include "compiler/radeon_compiler.h"
+#include "radeon_mesa_to_rc.h"
+#include "r300_context.h"
+#include "r300_fragprog_common.h"
+#include "r300_state.h"
+
+/**
+ * Write parameter array for the given vertex program into dst.
+ * Return the total number of components written.
+ */
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
+{
+	int i;
+
+	if (vp->Base->IsNVProgram) {
+		_mesa_load_tracked_matrices(ctx);
+	} else {
+		if (vp->Base->Base.Parameters) {
+			_mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
+		}
+	}
+
+	for(i = 0; i < vp->code.constants.Count; ++i) {
+		const float * src = 0;
+		const struct rc_constant * constant = &vp->code.constants.Constants[i];
+
+		switch(constant->Type) {
+		case RC_CONSTANT_EXTERNAL:
+			if (vp->Base->IsNVProgram) {
+				src = ctx->VertexProgram.Parameters[constant->u.External];
+			} else {
+				src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
+			}
+			break;
+
+		case RC_CONSTANT_IMMEDIATE:
+			src = constant->u.Immediate;
+			break;
+		}
+
+		assert(src);
+		dst[4*i] = src[0];
+		dst[4*i + 1] = src[1];
+		dst[4*i + 2] = src[2];
+		dst[4*i + 3] = src[3];
+	}
+
+	return 4 * vp->code.constants.Count;
+}
+
+static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
+{
+	GLbitfield outputs = 0;
+	int i;
+
+#define ADD_OUTPUT(fp_attr, vp_result) \
+	do { \
+		if (fpreads & (1 << (fp_attr))) \
+			outputs |= (1 << (vp_result)); \
+	} while (0)
+
+	ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+	ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+	for (i = 0; i <= 7; ++i) {
+		ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
+	}
+
+#undef ADD_OUTPUT
+
+	if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
+	    (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
+		outputs |= 1 << VERT_RESULT_BFC0;
+	if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
+	    (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
+		outputs |= 1 << VERT_RESULT_BFC1;
+
+	outputs |= 1 << VERT_RESULT_HPOS;
+	if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+		outputs |= 1 << VERT_RESULT_PSIZ;
+
+	return outputs;
+}
+
+
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
+{
+	int i;
+	int cur_reg;
+	GLuint OutputsWritten, InputsRead;
+
+	OutputsWritten = c->Base.Program.OutputsWritten;
+	InputsRead = c->Base.Program.InputsRead;
+
+	cur_reg = -1;
+	for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+		if (InputsRead & (1 << i))
+			c->code->inputs[i] = ++cur_reg;
+		else
+			c->code->inputs[i] = -1;
+	}
+
+	cur_reg = 0;
+	for (i = 0; i < VERT_RESULT_MAX; i++)
+		c->code->outputs[i] = -1;
+
+	assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
+
+	if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+		c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
+	}
+
+	if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+		c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+	}
+
+	/* If we're writing back facing colors we need to send
+	 * four colors to make front/back face colors selection work.
+	 * If the vertex program doesn't write all 4 colors, lets
+	 * pretend it does by skipping output index reg so the colors
+	 * get written into appropriate output vectors.
+	 */
+	if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
+		c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
+	} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+		OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+		cur_reg++;
+	}
+
+	if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+		c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
+	} else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+		OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+		cur_reg++;
+	}
+
+	if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+		c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
+	} else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+		cur_reg++;
+	}
+
+	if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+		c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
+	} else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+		cur_reg++;
+	}
+
+	for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
+		if (OutputsWritten & (1 << i)) {
+			c->code->outputs[i] = cur_reg++;
+		}
+	}
+
+	if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+		c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
+	}
+}
+
+/**
+ * The NV_vertex_program spec mandates that all registers be
+ * initialized to zero. We do this here unconditionally.
+ *
+ * \note We rely on dead-code elimination in the compiler.
+ */
+static void initialize_NV_registers(struct radeon_compiler * compiler)
+{
+	unsigned int reg;
+	struct rc_instruction * inst;
+
+	for(reg = 0; reg < 12; ++reg) {
+		inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+		inst->U.I.Opcode = RC_OPCODE_MOV;
+		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+		inst->U.I.DstReg.Index = reg;
+		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+	}
+
+	inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+	inst->U.I.Opcode = RC_OPCODE_ARL;
+	inst->U.I.DstReg.File = RC_FILE_ADDRESS;
+	inst->U.I.DstReg.Index = 0;
+	inst->U.I.DstReg.WriteMask = WRITEMASK_X;
+	inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+	inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+}
+
+static struct r300_vertex_program *build_program(GLcontext *ctx,
+						 struct r300_vertex_program_key *wanted_key,
+						 const struct gl_vertex_program *mesa_vp)
+{
+	struct r300_vertex_program *vp;
+	struct r300_vertex_program_compiler compiler;
+
+	vp = calloc(1, sizeof(*vp));
+	vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp);
+	memcpy(&vp->key, wanted_key, sizeof(vp->key));
+
+	rc_init(&compiler.Base);
+	compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
+
+	compiler.code = &vp->code;
+	compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
+	compiler.SetHwInputOutput = &t_inputs_outputs;
+
+	if (compiler.Base.Debug) {
+		fprintf(stderr, "Initial vertex program:\n");
+		_mesa_print_program(&vp->Base->Base);
+		fflush(stderr);
+	}
+
+	if (mesa_vp->IsPositionInvariant) {
+		_mesa_insert_mvp_code(ctx, vp->Base);
+	}
+
+	radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
+
+	if (mesa_vp->IsNVProgram)
+		initialize_NV_registers(&compiler.Base);
+
+	rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
+
+	if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
+		unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+		/* Set empty writemask for instructions writing to vp_wpos_attr
+		 * before moving the wpos attr there.
+		 * Such instructions will be removed by DCE.
+		 */
+		rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0);
+		rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr);
+	}
+
+	if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
+		unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+		/* Set empty writemask for instructions writing to vp_fog_attr
+		 * before moving the fog attr there.
+		 * Such instructions will be removed by DCE.
+		 */
+		rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0);
+		rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X);
+	}
+
+	r3xx_compile_vertex_program(&compiler);
+
+	if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
+		rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
+	}
+
+	vp->error = compiler.Base.Error;
+
+	vp->Base->Base.InputsRead = vp->code.InputsRead;
+	vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
+
+	rc_destroy(&compiler.Base);
+
+	return vp;
+}
+
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
+{
+	r300ContextPtr r300 = R300_CONTEXT(ctx);
+	struct r300_vertex_program_key wanted_key = { 0 };
+	struct r300_vertex_program_cont *vpc;
+	struct r300_vertex_program *vp;
+
+	vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+
+	if (!r300->selected_fp) {
+		/* This can happen when GetProgramiv is called to check
+		 * whether the program runs natively.
+		 *
+		 * To be honest, this is not a very good solution,
+		 * but solving the problem of reporting good values
+		 * for those queries is tough anyway considering that
+		 * we recompile vertex programs based on the precise
+		 * fragment program that is in use.
+		 */
+		r300SelectAndTranslateFragmentShader(ctx);
+	}
+
+	assert(r300->selected_fp);
+	wanted_key.FpReads = r300->selected_fp->InputsRead;
+	wanted_key.FogAttr = r300->selected_fp->fog_attr;
+	wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+
+	for (vp = vpc->progs; vp; vp = vp->next) {
+		if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
+			return r300->selected_vp = vp;
+		}
+	}
+
+	vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
+	vp->next = vpc->progs;
+	vpc->progs = vp;
+
+	return r300->selected_vp = vp;
+}
+
+#define bump_vpu_count(ptr, new_count)   do { \
+		drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
+		int _nc=(new_count)/4; \
+		if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
+	} while(0)
+
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
+{
+	int i;
+
+	assert((code->length > 0) && (code->length % 4 == 0));
+
+	switch ((dest >> 8) & 0xf) {
+		case 0:
+			R300_STATECHANGE(r300, vpi);
+			for (i = 0; i < code->length; i++)
+				r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+			bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
+			break;
+		case 2:
+			R300_STATECHANGE(r300, vpp);
+			for (i = 0; i < code->length; i++)
+				r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+			bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
+			break;
+		case 4:
+			R300_STATECHANGE(r300, vps);
+			for (i = 0; i < code->length; i++)
+				r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+			bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
+			break;
+		default:
+			fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
+			exit(-1);
+	}
+}
+
+void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+	GLcontext *ctx = rmesa->radeon.glCtx;
+	struct r300_vertex_program *prog = rmesa->selected_vp;
+	int inst_count = 0;
+	int param_count = 0;
+
+	/* Reset state, in case we don't use something */
+	((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+	((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+	((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+
+	R300_STATECHANGE(rmesa, vap_cntl);
+	R300_STATECHANGE(rmesa, vpp);
+	param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+	if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) {
+		WARN_ONCE("Too many VP params, expect rendering errors\n");
+	}
+	/* Prevent the overflow (vpu.count is u8) */
+	bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count));
+	param_count /= 4;
+
+	r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
+	inst_count = (prog->code.length / 4) - 1;
+
+	r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
+				 _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
+
+	R300_STATECHANGE(rmesa, pvs);
+	rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+				(inst_count << R300_PVS_LAST_INST_SHIFT);
+
+	rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT);
+	rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+}
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h
new file mode 100644
index 0000000000..ccec896be4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.h
@@ -0,0 +1,11 @@
+#ifndef __R300_VERTPROG_H_
+#define __R300_VERTPROG_H_
+
+#include "r300_reg.h"
+
+
+void r300SetupVertexProgram(r300ContextPtr rmesa);
+
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/radeon_bo.c b/src/mesa/drivers/dri/r300/radeon_bo.c
new file mode 120000
index 0000000000..9448ffee54
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo.c
@@ -0,0 +1 @@
+../radeon/radeon_bo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h b/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h
new file mode 120000
index 0000000000..029450928b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_int_drm.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.c b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c
new file mode 120000
index 0000000000..79ad050e6b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.h b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h
new file mode 120000
index 0000000000..83b0f7ffab
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h
new file mode 120000
index 0000000000..ca894b2443
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.c b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c
new file mode 120000
index 0000000000..f6a5f66470
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.h b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h
new file mode 120000
index 0000000000..2f134fd17b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_chipset.h b/src/mesa/drivers/dri/r300/radeon_chipset.h
new file mode 120000
index 0000000000..eba99001ff
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cmdbuf.h b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h
new file mode 120000
index 0000000000..a799e1dc6d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common.c b/src/mesa/drivers/dri/r300/radeon_common.c
new file mode 120000
index 0000000000..67b19ba940
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common.h b/src/mesa/drivers/dri/r300/radeon_common.h
new file mode 120000
index 0000000000..5bcb696a9f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.c b/src/mesa/drivers/dri/r300/radeon_common_context.c
new file mode 120000
index 0000000000..86800f3819
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.h b/src/mesa/drivers/dri/r300/radeon_common_context.h
new file mode 120000
index 0000000000..4d66312550
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
new file mode 100644
index 0000000000..da4812d323
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_context.h
@@ -0,0 +1,62 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __RADEON_CONTEXT_H__
+#define __RADEON_CONTEXT_H__
+
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "radeon_screen.h"
+#include "drm.h"
+#include "dri_util.h"
+
+#include "radeon_screen.h"
+
+#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__);
+
+/* TCL fallbacks */
+extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+
+#define TCL_FALLBACK( ctx, bit, mode )	;
+
+
+#endif				/* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r300/radeon_cs.c b/src/mesa/drivers/dri/r300/radeon_cs.c
new file mode 120000
index 0000000000..66b7ad1eb0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs.c
@@ -0,0 +1 @@
+../radeon/radeon_cs.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h b/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h
new file mode 120000
index 0000000000..462f5245d0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_int_drm.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.c b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c
new file mode 120000
index 0000000000..006720f8a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.h b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h
new file mode 120000
index 0000000000..a5f95e0a3d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c
new file mode 120000
index 0000000000..c248ea7d1a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_debug.c b/src/mesa/drivers/dri/r300/radeon_debug.c
new file mode 120000
index 0000000000..c98c2e074c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_debug.c
@@ -0,0 +1 @@
+../radeon/radeon_debug.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_debug.h b/src/mesa/drivers/dri/r300/radeon_debug.h
new file mode 120000
index 0000000000..bd8aa28e89
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_debug.h
@@ -0,0 +1 @@
+../radeon/radeon_debug.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_dma.c b/src/mesa/drivers/dri/r300/radeon_dma.c
new file mode 120000
index 0000000000..43be000625
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_dma.h b/src/mesa/drivers/dri/r300/radeon_dma.h
new file mode 120000
index 0000000000..82e50634e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_fbo.c b/src/mesa/drivers/dri/r300/radeon_fbo.c
new file mode 120000
index 0000000000..0d738d8d78
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
new file mode 120000
index 0000000000..af4108a8e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
new file mode 120000
index 0000000000..64bdf94ee7
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
new file mode 100644
index 0000000000..9f9dec840b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_mesa_to_rc.h"
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_program.h"
+
+
+static rc_opcode translate_opcode(gl_inst_opcode opcode)
+{
+	switch(opcode) {
+	case OPCODE_NOP: return RC_OPCODE_NOP;
+	case OPCODE_ABS: return RC_OPCODE_ABS;
+	case OPCODE_ADD: return RC_OPCODE_ADD;
+	case OPCODE_ARL: return RC_OPCODE_ARL;
+	case OPCODE_CMP: return RC_OPCODE_CMP;
+	case OPCODE_COS: return RC_OPCODE_COS;
+	case OPCODE_DDX: return RC_OPCODE_DDX;
+	case OPCODE_DDY: return RC_OPCODE_DDY;
+	case OPCODE_DP3: return RC_OPCODE_DP3;
+	case OPCODE_DP4: return RC_OPCODE_DP4;
+	case OPCODE_DPH: return RC_OPCODE_DPH;
+	case OPCODE_DST: return RC_OPCODE_DST;
+	case OPCODE_EX2: return RC_OPCODE_EX2;
+	case OPCODE_EXP: return RC_OPCODE_EXP;
+	case OPCODE_FLR: return RC_OPCODE_FLR;
+	case OPCODE_FRC: return RC_OPCODE_FRC;
+	case OPCODE_KIL: return RC_OPCODE_KIL;
+	case OPCODE_LG2: return RC_OPCODE_LG2;
+	case OPCODE_LIT: return RC_OPCODE_LIT;
+	case OPCODE_LOG: return RC_OPCODE_LOG;
+	case OPCODE_LRP: return RC_OPCODE_LRP;
+	case OPCODE_MAD: return RC_OPCODE_MAD;
+	case OPCODE_MAX: return RC_OPCODE_MAX;
+	case OPCODE_MIN: return RC_OPCODE_MIN;
+	case OPCODE_MOV: return RC_OPCODE_MOV;
+	case OPCODE_MUL: return RC_OPCODE_MUL;
+	case OPCODE_POW: return RC_OPCODE_POW;
+	case OPCODE_RCP: return RC_OPCODE_RCP;
+	case OPCODE_RSQ: return RC_OPCODE_RSQ;
+	case OPCODE_SCS: return RC_OPCODE_SCS;
+	case OPCODE_SEQ: return RC_OPCODE_SEQ;
+	case OPCODE_SFL: return RC_OPCODE_SFL;
+	case OPCODE_SGE: return RC_OPCODE_SGE;
+	case OPCODE_SGT: return RC_OPCODE_SGT;
+	case OPCODE_SIN: return RC_OPCODE_SIN;
+	case OPCODE_SLE: return RC_OPCODE_SLE;
+	case OPCODE_SLT: return RC_OPCODE_SLT;
+	case OPCODE_SNE: return RC_OPCODE_SNE;
+	case OPCODE_SUB: return RC_OPCODE_SUB;
+	case OPCODE_SWZ: return RC_OPCODE_SWZ;
+	case OPCODE_TEX: return RC_OPCODE_TEX;
+	case OPCODE_TXB: return RC_OPCODE_TXB;
+	case OPCODE_TXD: return RC_OPCODE_TXD;
+	case OPCODE_TXL: return RC_OPCODE_TXL;
+	case OPCODE_TXP: return RC_OPCODE_TXP;
+	case OPCODE_XPD: return RC_OPCODE_XPD;
+	default: return RC_OPCODE_ILLEGAL_OPCODE;
+	}
+}
+
+static rc_saturate_mode translate_saturate(unsigned int saturate)
+{
+	switch(saturate) {
+	default:
+	case SATURATE_OFF: return RC_SATURATE_NONE;
+	case SATURATE_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
+	}
+}
+
+static rc_register_file translate_register_file(unsigned int file)
+{
+	switch(file) {
+	case PROGRAM_TEMPORARY: return RC_FILE_TEMPORARY;
+	case PROGRAM_INPUT: return RC_FILE_INPUT;
+	case PROGRAM_OUTPUT: return RC_FILE_OUTPUT;
+	case PROGRAM_LOCAL_PARAM:
+	case PROGRAM_ENV_PARAM:
+	case PROGRAM_STATE_VAR:
+	case PROGRAM_NAMED_PARAM:
+	case PROGRAM_CONSTANT:
+	case PROGRAM_UNIFORM: return RC_FILE_CONSTANT;
+	case PROGRAM_ADDRESS: return RC_FILE_ADDRESS;
+	default: return RC_FILE_NONE;
+	}
+}
+
+static void translate_srcreg(struct rc_src_register * dest, struct prog_src_register * src)
+{
+	dest->File = translate_register_file(src->File);
+	dest->Index = src->Index;
+	dest->RelAddr = src->RelAddr;
+	dest->Swizzle = src->Swizzle;
+	dest->Abs = src->Abs;
+	dest->Negate = src->Negate;
+}
+
+static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_register * src)
+{
+	dest->File = translate_register_file(src->File);
+	dest->Index = src->Index;
+	dest->RelAddr = src->RelAddr;
+	dest->WriteMask = src->WriteMask;
+}
+
+static rc_texture_target translate_tex_target(gl_texture_index target)
+{
+	switch(target) {
+	case TEXTURE_2D_ARRAY_INDEX: return RC_TEXTURE_2D_ARRAY;
+	case TEXTURE_1D_ARRAY_INDEX: return RC_TEXTURE_1D_ARRAY;
+	case TEXTURE_CUBE_INDEX: return RC_TEXTURE_CUBE;
+	case TEXTURE_3D_INDEX: return RC_TEXTURE_3D;
+	case TEXTURE_RECT_INDEX: return RC_TEXTURE_RECT;
+	default:
+	case TEXTURE_2D_INDEX: return RC_TEXTURE_2D;
+	case TEXTURE_1D_INDEX: return RC_TEXTURE_1D;
+	}
+}
+
+static void translate_instruction(struct radeon_compiler * c,
+		struct rc_instruction * dest, struct prog_instruction * src)
+{
+	const struct rc_opcode_info * opcode;
+	unsigned int i;
+
+	dest->U.I.Opcode = translate_opcode(src->Opcode);
+	if (dest->U.I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) {
+		rc_error(c, "Unsupported opcode %i\n", src->Opcode);
+		return;
+	}
+	dest->U.I.SaturateMode = translate_saturate(src->SaturateMode);
+
+	opcode = rc_get_opcode_info(dest->U.I.Opcode);
+
+	for(i = 0; i < opcode->NumSrcRegs; ++i)
+		translate_srcreg(&dest->U.I.SrcReg[i], &src->SrcReg[i]);
+
+	if (opcode->HasDstReg)
+		translate_dstreg(&dest->U.I.DstReg, &src->DstReg);
+
+	if (opcode->HasTexture) {
+		dest->U.I.TexSrcUnit = src->TexSrcUnit;
+		dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget);
+		dest->U.I.TexShadow = src->TexShadow;
+	}
+}
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
+{
+	struct prog_instruction *source;
+	unsigned int i;
+
+	for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
+		struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+		translate_instruction(c, dest, source);
+	}
+
+	c->Program.ShadowSamplers = program->ShadowSamplers;
+	c->Program.InputsRead = program->InputsRead;
+	c->Program.OutputsWritten = program->OutputsWritten;
+
+	int isNVProgram = 0;
+
+	if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+		struct gl_vertex_program * vp = (struct gl_vertex_program *) program;
+		isNVProgram = vp->IsNVProgram;
+	}
+
+	if (isNVProgram) {
+		/* NV_vertex_program has a fixed-sized constant environment.
+		 * This could be handled more efficiently for programs that
+		 * do not use relative addressing.
+		 */
+		for(i = 0; i < 96; ++i) {
+			struct rc_constant constant;
+
+			constant.Type = RC_CONSTANT_EXTERNAL;
+			constant.Size = 4;
+			constant.u.External = i;
+
+			rc_constants_add(&c->Program.Constants, &constant);
+		}
+	} else {
+		for(i = 0; i < program->Parameters->NumParameters; ++i) {
+			struct rc_constant constant;
+
+			constant.Type = RC_CONSTANT_EXTERNAL;
+			constant.Size = 4;
+			constant.u.External = i;
+
+			rc_constants_add(&c->Program.Constants, &constant);
+		}
+	}
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h
new file mode 100644
index 0000000000..9511a04f36
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_MESA_TO_RC_H
+#define RADEON_MESA_TO_RC_H
+
+struct gl_program;
+struct radeon_compiler;
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
+#endif /* RADEON_MESA_TO_RC_H */
diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c
new file mode 120000
index 0000000000..31c0cfbe94
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h
new file mode 120000
index 0000000000..254d50cf8c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
new file mode 120000
index 0000000000..3b03803126
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.c b/src/mesa/drivers/dri/r300/radeon_queryobj.c
new file mode 120000
index 0000000000..1d6ebc1c48
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_queryobj.c
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.h b/src/mesa/drivers/dri/r300/radeon_queryobj.h
new file mode 120000
index 0000000000..8f6f842b0a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_queryobj.h
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_screen.c b/src/mesa/drivers/dri/r300/radeon_screen.c
new file mode 120000
index 0000000000..86161118dd
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_screen.h b/src/mesa/drivers/dri/r300/radeon_screen.h
new file mode 120000
index 0000000000..23bb6bd459
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
new file mode 120000
index 0000000000..232868c4c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_span.h b/src/mesa/drivers/dri/r300/radeon_span.h
new file mode 120000
index 0000000000..f9d634508c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tex_copy.c b/src/mesa/drivers/dri/r300/radeon_tex_copy.c
new file mode 120000
index 0000000000..dfa5ba34e6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tex_copy.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tex_getimage.c b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c
new file mode 120000
index 0000000000..d9836d7326
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_getimage.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_texture.c b/src/mesa/drivers/dri/r300/radeon_texture.c
new file mode 120000
index 0000000000..a822710915
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_texture.h b/src/mesa/drivers/dri/r300/radeon_texture.h
new file mode 120000
index 0000000000..17fac3d5ea
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tile.c b/src/mesa/drivers/dri/r300/radeon_tile.c
new file mode 120000
index 0000000000..d4bfe27da6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tile.c
@@ -0,0 +1 @@
+../radeon/radeon_tile.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tile.h b/src/mesa/drivers/dri/r300/radeon_tile.h
new file mode 120000
index 0000000000..31074c581e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tile.h
@@ -0,0 +1 @@
+../radeon/radeon_tile.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon.h b/src/mesa/drivers/dri/r300/server/radeon.h
new file mode 120000
index 0000000000..81274a54f1
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.h b/src/mesa/drivers/dri/r300/server/radeon_dri.h
new file mode 120000
index 0000000000..27c591d3c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_macros.h b/src/mesa/drivers/dri/r300/server/radeon_macros.h
new file mode 120000
index 0000000000..c56cd735b8
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_reg.h b/src/mesa/drivers/dri/r300/server/radeon_reg.h
new file mode 120000
index 0000000000..e2349dcb68
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/Lindent b/src/mesa/drivers/dri/r600/Lindent
new file mode 100755
index 0000000000..7d8d8896e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/Lindent
@@ -0,0 +1,2 @@
+#!/bin/sh
+indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
new file mode 100644
index 0000000000..17915621ee
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -0,0 +1,76 @@
+# src/mesa/drivers/dri/r300/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += $(RADEON_CFLAGS)
+
+LIBNAME = r600_dri.so
+
+ifeq ($(RADEON_LDFLAGS),)
+CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
+endif
+
+COMMON_SOURCES = \
+	../../common/driverfuncs.c \
+	../common/mm.c \
+	../common/utils.c \
+	../common/texmem.c \
+	../common/vblank.c \
+	../common/xmlconfig.c \
+	../common/dri_util.c
+
+RADEON_COMMON_SOURCES = \
+	radeon_bo_legacy.c \
+	radeon_common_context.c \
+	radeon_buffer_objects.c \
+	radeon_common.c \
+	radeon_cs_legacy.c \
+	radeon_dma.c \
+	radeon_debug.c \
+	radeon_fbo.c \
+	radeon_lock.c \
+	radeon_mipmap_tree.c \
+	radeon_pixel_read.c \
+	radeon_queryobj.c \
+	radeon_span.c \
+	radeon_texture.c \
+	radeon_tex_copy.c \
+	radeon_tex_getimage.c \
+	radeon_tile.c
+
+DRIVER_SOURCES = \
+		 radeon_screen.c \
+		 r600_context.c \
+		 r600_cmdbuf.c \
+		 r600_emit.c       \
+		 r700_assembler.c  \
+		 r700_fragprog.c \
+		 r700_vertprog.c \
+		 r700_shader.c \
+		 r700_shaderinst.c \
+		 r700_ioctl.c \
+		 r700_oglprog.c \
+		 r700_chip.c     \
+		 r700_state.c    \
+		 r700_clear.c    \
+		 r700_render.c   \
+		 r600_tex.c      \
+		 r600_texstate.c      \
+		 r600_blit.c     \
+		 r700_debug.c    \
+		 $(RADEON_COMMON_SOURCES) \
+		 $(EGL_SOURCES) \
+		 $(CS_SOURCES)
+
+C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+
+DRIVER_DEFINES = -DRADEON_R600
+#	-DRADEON_BO_TRACK \
+
+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+
+##### TARGETS #####
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/r600/defaultendian.h b/src/mesa/drivers/dri/r600/defaultendian.h
new file mode 100644
index 0000000000..32caf32cd2
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/defaultendian.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _DEFINEENDIAN_H_
+#define _DEFINEENDIAN_H_
+
+//We have to choose a reg bits orientation if there is no compile flag for it.
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#define LITTLEENDIAN_CPU
+#endif
+
+#endif //_DEFINEENDIAN_H_
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
new file mode 100644
index 0000000000..172f85eb26
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -0,0 +1,1663 @@
+/*
+ * Copyright (C) 2009 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "r600_context.h"
+
+#include "r600_blit.h"
+#include "r600_blit_shaders.h"
+#include "r600_cmdbuf.h"
+
+/* common formats supported as both textures and render targets */
+unsigned r600_check_blit(gl_format mesa_format)
+{
+    switch (mesa_format) {
+    case MESA_FORMAT_RGBA8888:
+    case MESA_FORMAT_SIGNED_RGBA8888:
+    case MESA_FORMAT_RGBA8888_REV:
+    case MESA_FORMAT_SIGNED_RGBA8888_REV:
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_ARGB8888_REV:
+    case MESA_FORMAT_XRGB8888_REV:
+    case MESA_FORMAT_RGB565:
+    case MESA_FORMAT_RGB565_REV:
+    case MESA_FORMAT_ARGB4444:
+    case MESA_FORMAT_ARGB4444_REV:
+    case MESA_FORMAT_ARGB1555:
+    case MESA_FORMAT_ARGB1555_REV:
+    case MESA_FORMAT_AL88:
+    case MESA_FORMAT_AL88_REV:
+    case MESA_FORMAT_RGB332:
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_I8:
+    case MESA_FORMAT_CI8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_RGBA_FLOAT32:
+    case MESA_FORMAT_RGBA_FLOAT16:
+    case MESA_FORMAT_ALPHA_FLOAT32:
+    case MESA_FORMAT_ALPHA_FLOAT16:
+    case MESA_FORMAT_LUMINANCE_FLOAT32:
+    case MESA_FORMAT_LUMINANCE_FLOAT16:
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+    case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+    case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+    case MESA_FORMAT_X8_Z24:
+    case MESA_FORMAT_S8_Z24:
+    case MESA_FORMAT_Z24_S8:
+    case MESA_FORMAT_Z16:
+    case MESA_FORMAT_Z32:
+    case MESA_FORMAT_SRGBA8:
+    case MESA_FORMAT_SLA8:
+    case MESA_FORMAT_SL8:
+	    break;
+    default:
+	    return 0;
+    }
+
+    /* ??? */
+    /* not sure blit to depth works or not yet */
+    if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+	    return 0;
+
+    return 1;
+}
+
+static inline void
+set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_format,
+                  int nPitchInPixel, int w, int h, intptr_t dst_offset)
+{
+    uint32_t cb_color0_base, cb_color0_size = 0, cb_color0_info = 0, cb_color0_view = 0;
+    int id = 0;
+    uint32_t comp_swap, format;
+    BATCH_LOCALS(&context->radeon);
+
+    cb_color0_base = dst_offset / 256;
+
+    SETfield(cb_color0_size, (nPitchInPixel / 8) - 1,
+             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+    SETfield(cb_color0_size, ((nPitchInPixel * h) / 64) - 1,
+             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
+
+    SETfield(cb_color0_info, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
+    SETfield(cb_color0_info, ARRAY_LINEAR_GENERAL,
+             CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+
+    SETbit(cb_color0_info, BLEND_BYPASS_bit);
+
+    switch(mesa_format) {
+    case MESA_FORMAT_RGBA8888:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_SIGNED_RGBA8888:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_RGBA8888_REV:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_SIGNED_RGBA8888_REV:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_SNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_ALT;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ARGB8888_REV:
+    case MESA_FORMAT_XRGB8888_REV:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_RGB565:
+            format = COLOR_5_6_5;
+            comp_swap = SWAP_STD_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_RGB565_REV:
+            format = COLOR_5_6_5;
+            comp_swap = SWAP_STD;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ARGB4444:
+            format = COLOR_4_4_4_4;
+            comp_swap = SWAP_ALT;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ARGB4444_REV:
+            format = COLOR_4_4_4_4;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ARGB1555:
+            format = COLOR_1_5_5_5;
+            comp_swap = SWAP_ALT;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ARGB1555_REV:
+            format = COLOR_1_5_5_5;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_AL88:
+            format = COLOR_8_8;
+            comp_swap = SWAP_STD;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_AL88_REV:
+            format = COLOR_8_8;
+            comp_swap = SWAP_STD_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_RGB332:
+            format = COLOR_3_3_2;
+            comp_swap = SWAP_STD_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_A8:
+            format = COLOR_8;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_I8:
+    case MESA_FORMAT_CI8:
+            format = COLOR_8;
+            comp_swap = SWAP_STD;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_L8:
+            format = COLOR_8;
+            comp_swap = SWAP_ALT;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_RGBA_FLOAT32:
+            format = COLOR_32_32_32_32_FLOAT;
+            comp_swap = SWAP_STD_REV;
+	    SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_RGBA_FLOAT16:
+            format = COLOR_16_16_16_16_FLOAT;
+            comp_swap = SWAP_STD_REV;
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ALPHA_FLOAT32:
+            format = COLOR_32_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_ALPHA_FLOAT16:
+            format = COLOR_16_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_LUMINANCE_FLOAT32:
+            format = COLOR_32_FLOAT;
+            comp_swap = SWAP_ALT;
+	    SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_LUMINANCE_FLOAT16:
+            format = COLOR_16_FLOAT;
+            comp_swap = SWAP_ALT;
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+            format = COLOR_32_32_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+            format = COLOR_16_16_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+            format = COLOR_32_FLOAT;
+            comp_swap = SWAP_STD;
+	    SETbit(cb_color0_info, BLEND_FLOAT32_bit);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+            format = COLOR_16_FLOAT;
+            comp_swap = SWAP_STD;
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_FLOAT, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_X8_Z24:
+    case MESA_FORMAT_S8_Z24:
+            format = COLOR_8_24;
+            comp_swap = SWAP_STD;
+	    SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_Z24_S8:
+            format = COLOR_24_8;
+            comp_swap = SWAP_STD;
+	    SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_Z16:
+            format = COLOR_16;
+            comp_swap = SWAP_STD;
+	    SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_Z32:
+            format = COLOR_32;
+            comp_swap = SWAP_STD;
+	    SETfield(cb_color0_info, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_SRGBA8:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_SLA8:
+            format = COLOR_8_8;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    case MESA_FORMAT_SL8:
+            format = COLOR_8;
+            comp_swap = SWAP_ALT_REV;
+	    SETbit(cb_color0_info, SOURCE_FORMAT_bit);
+	    SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+            break;
+    default:
+            fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format));
+            assert("Invalid format for US output\n");
+            return;
+    }
+
+    /* must be 0 on r7xx */
+    if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+	    CLEARbit(cb_color0_info, BLEND_FLOAT32_bit);
+
+    SETfield(cb_color0_info, format, CB_COLOR0_INFO__FORMAT_shift,
+             CB_COLOR0_INFO__FORMAT_mask);
+    SETfield(cb_color0_info, comp_swap, COMP_SWAP_shift, COMP_SWAP_mask);
+
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1);
+    R600_OUT_BATCH(cb_color0_base);
+    R600_OUT_BATCH_RELOC(0,
+			 bo,
+			 0,
+			 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    END_BATCH();
+
+    if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
+	(context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
+	    BEGIN_BATCH_NO_AUTOSTATE(2);
+	    R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+	    R600_OUT_BATCH((2 << id));
+	    END_BATCH();
+    }
+
+    /* Set CMASK & TILE buffer to the offset of color buffer as
+     * we don't use those this shouldn't cause any issue and we
+     * then have a valid cmd stream
+     */
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
+    R600_OUT_BATCH(cb_color0_base);
+    R600_OUT_BATCH_RELOC(0,
+			 bo,
+			 0,
+			 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    END_BATCH();
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
+    R600_OUT_BATCH(cb_color0_base);
+    R600_OUT_BATCH_RELOC(0,
+			 bo,
+			 0,
+			 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(12);
+    R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size);
+    R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view);
+    R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
+    R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0);
+    END_BATCH();
+
+    COMMIT_BATCH();
+
+}
+
+static inline void load_shaders(GLcontext * ctx)
+{
+
+    radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
+    context_t *context = R700_CONTEXT(ctx);
+    int i, size;
+    uint32_t *shader;
+
+    if (context->blit_bo_loaded == 1)
+        return;
+
+    size = 4096;
+    context->blit_bo = radeon_bo_open(radeonctx->radeonScreen->bom, 0,
+                                      size, 256, RADEON_GEM_DOMAIN_GTT, 0);
+    radeon_bo_map(context->blit_bo, 1);
+    shader = context->blit_bo->ptr;
+
+    for(i=0; i<sizeof(r6xx_vs)/4; i++) {
+        shader[128+i] = r6xx_vs[i];
+    }
+    for(i=0; i<sizeof(r6xx_ps)/4; i++) {
+        shader[256+i] = r6xx_ps[i];
+    }
+
+    radeon_bo_unmap(context->blit_bo);
+    context->blit_bo_loaded = 1;
+
+}
+
+static inline void
+set_shaders(context_t *context)
+{
+    struct radeon_bo * pbo = context->blit_bo;
+    BATCH_LOCALS(&context->radeon);
+
+    uint32_t sq_pgm_start_fs = (512 >> 8);
+    uint32_t sq_pgm_resources_fs = 0;
+    uint32_t sq_pgm_cf_offset_fs = 0;
+
+    uint32_t sq_pgm_start_vs = (512 >> 8);
+    uint32_t sq_pgm_resources_vs = (1 << NUM_GPRS_shift);
+    uint32_t sq_pgm_cf_offset_vs = 0;
+
+    uint32_t sq_pgm_start_ps = (1024 >> 8);
+    uint32_t sq_pgm_resources_ps = (1 << NUM_GPRS_shift);
+    uint32_t sq_pgm_cf_offset_ps = 0;
+    uint32_t sq_pgm_exports_ps = (1 << 1);
+
+    r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+    /* FS */
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1);
+    R600_OUT_BATCH(sq_pgm_start_fs);
+    R600_OUT_BATCH_RELOC(sq_pgm_start_fs,
+			 pbo,
+			 sq_pgm_start_fs,
+			 RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, sq_pgm_resources_fs);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, sq_pgm_cf_offset_fs);
+    END_BATCH();
+
+    /* VS */
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
+    R600_OUT_BATCH(sq_pgm_start_vs);
+    R600_OUT_BATCH_RELOC(sq_pgm_start_vs,
+		         pbo,
+		         sq_pgm_start_vs,
+		         RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, sq_pgm_resources_vs);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, sq_pgm_cf_offset_vs);
+    END_BATCH();
+
+    /* PS */
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
+    R600_OUT_BATCH(sq_pgm_start_ps);
+    R600_OUT_BATCH_RELOC(sq_pgm_start_ps,
+		         pbo,
+		         sq_pgm_start_ps,
+		         RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(9);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, sq_pgm_resources_ps);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, sq_pgm_exports_ps);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, sq_pgm_cf_offset_ps);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(18);
+    R600_OUT_BATCH_REGVAL(SPI_VS_OUT_CONFIG, 0); //EXPORT_COUNT is - 1
+    R600_OUT_BATCH_REGVAL(SPI_VS_OUT_ID_0, 0);
+    R600_OUT_BATCH_REGVAL(SPI_PS_INPUT_CNTL_0, SEL_CENTROID_bit);
+    R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
+    R600_OUT_BATCH_REGVAL(SPI_PS_IN_CONTROL_1, 0);
+    R600_OUT_BATCH_REGVAL(SPI_INTERP_CONTROL_0, 0);
+    END_BATCH();
+
+    COMMIT_BATCH();
+
+}
+
+static inline void
+set_vtx_resource(context_t *context)
+{
+    struct radeon_bo *bo = context->blit_bo;
+    BATCH_LOCALS(&context->radeon);
+
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+    R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R600_OUT_BATCH(0);
+
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+    R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R600_OUT_BATCH(0);
+    END_BATCH();
+    COMMIT_BATCH();
+
+    if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+	    r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
+    else
+	    r700SyncSurf(context, bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+
+    BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
+
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+    R600_OUT_BATCH(SQ_FETCH_RESOURCE_VS_OFFSET * FETCH_RESOURCE_STRIDE);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(48 - 1);
+    R600_OUT_BATCH(16 << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift);
+    R600_OUT_BATCH(1 << MEM_REQUEST_SIZE_shift);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(SQ_TEX_VTX_VALID_BUFFER << SQ_TEX_RESOURCE_WORD6_0__TYPE_shift);
+    R600_OUT_BATCH_RELOC(SQ_VTX_CONSTANT_WORD0_0,
+                         bo,
+                         SQ_VTX_CONSTANT_WORD0_0,
+                         RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+    COMMIT_BATCH();
+
+}
+
+static inline void
+set_tex_resource(context_t * context,
+		 gl_format mesa_format, struct radeon_bo *bo, int w, int h,
+		 int TexelPitch, intptr_t src_offset)
+{
+    uint32_t sq_tex_resource0, sq_tex_resource1, sq_tex_resource2, sq_tex_resource4, sq_tex_resource6;
+
+    sq_tex_resource0 = sq_tex_resource1 = sq_tex_resource2 = sq_tex_resource4 = sq_tex_resource6 = 0;
+    BATCH_LOCALS(&context->radeon);
+
+    SETfield(sq_tex_resource0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);
+    SETfield(sq_tex_resource0, ARRAY_LINEAR_GENERAL,
+                 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+                 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+
+    switch (mesa_format) {
+    case MESA_FORMAT_RGBA8888:
+    case MESA_FORMAT_SIGNED_RGBA8888:
+	    SETfield(sq_tex_resource1, FMT_8_8_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) {
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+	    }
+	    break;
+    case MESA_FORMAT_RGBA8888_REV:
+    case MESA_FORMAT_SIGNED_RGBA8888_REV:
+	    SETfield(sq_tex_resource1, FMT_8_8_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) {
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+		    SETfield(sq_tex_resource4, SQ_FORMAT_COMP_SIGNED,
+			     FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+	    }
+	    break;
+    case MESA_FORMAT_ARGB8888:
+	    SETfield(sq_tex_resource1, FMT_8_8_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_XRGB8888:
+	    SETfield(sq_tex_resource1, FMT_8_8_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_ARGB8888_REV:
+	    SETfield(sq_tex_resource1, FMT_8_8_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_XRGB8888_REV:
+	    SETfield(sq_tex_resource1, FMT_8_8_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_RGB565:
+	    SETfield(sq_tex_resource1, FMT_5_6_5,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_RGB565_REV:
+	    SETfield(sq_tex_resource1, FMT_5_6_5,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_ARGB4444:
+	    SETfield(sq_tex_resource1, FMT_4_4_4_4,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_ARGB4444_REV:
+	    SETfield(sq_tex_resource1, FMT_4_4_4_4,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_ARGB1555:
+	    SETfield(sq_tex_resource1, FMT_1_5_5_5,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_ARGB1555_REV:
+	    SETfield(sq_tex_resource1, FMT_1_5_5_5,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_AL88:
+    case MESA_FORMAT_AL88_REV: /* TODO : Check this. */
+	    SETfield(sq_tex_resource1, FMT_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_RGB332:
+	    SETfield(sq_tex_resource1, FMT_3_3_2,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */
+	    SETfield(sq_tex_resource1, FMT_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_L8: /* X, X, X, ONE */
+	    SETfield(sq_tex_resource1, FMT_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_I8: /* X, X, X, X */
+    case MESA_FORMAT_CI8:
+	    SETfield(sq_tex_resource1, FMT_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_RGBA_FLOAT32:
+	    SETfield(sq_tex_resource1, FMT_32_32_32_32_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_RGBA_FLOAT16:
+	    SETfield(sq_tex_resource1, FMT_16_16_16_16_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */
+	    SETfield(sq_tex_resource1, FMT_32_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */
+	    SETfield(sq_tex_resource1, FMT_16_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */
+	    SETfield(sq_tex_resource1, FMT_32_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */
+	    SETfield(sq_tex_resource1, FMT_16_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+	    SETfield(sq_tex_resource1, FMT_32_32_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+	    SETfield(sq_tex_resource1, FMT_16_16_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+	    SETfield(sq_tex_resource1, FMT_32_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+	    SETfield(sq_tex_resource1, FMT_16_FLOAT,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_Z16:
+	    SETbit(sq_tex_resource0, TILE_TYPE_bit);
+	    SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+	    SETfield(sq_tex_resource1, FMT_16,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_X8_Z24:
+	    SETbit(sq_tex_resource0, TILE_TYPE_bit);
+	    SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+	    SETfield(sq_tex_resource1, FMT_8_24,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_S8_Z24:
+	    SETbit(sq_tex_resource0, TILE_TYPE_bit);
+	    SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+	    SETfield(sq_tex_resource1, FMT_8_24,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_Z24_S8:
+	    SETbit(sq_tex_resource0, TILE_TYPE_bit);
+	    SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+	    SETfield(sq_tex_resource1, FMT_24_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_0,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_Z32:
+	    SETbit(sq_tex_resource0, TILE_TYPE_bit);
+	    SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+	    SETfield(sq_tex_resource1, FMT_32,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_S8:
+	    SETbit(sq_tex_resource0, TILE_TYPE_bit);
+	    SETfield(sq_tex_resource0, ARRAY_1D_TILED_THIN1,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+		     SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+	    SETfield(sq_tex_resource1, FMT_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    break;
+    case MESA_FORMAT_SRGBA8:
+	    SETfield(sq_tex_resource1, FMT_8_8_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_W,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Z,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+	    break;
+    case MESA_FORMAT_SLA8:
+	    SETfield(sq_tex_resource1, FMT_8_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_Y,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+	    break;
+    case MESA_FORMAT_SL8: /* X, X, X, ONE */
+	    SETfield(sq_tex_resource1, FMT_8,
+		     SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_X,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	    SETfield(sq_tex_resource4, SQ_SEL_1,
+		     SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	    SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+	    break;
+    default:
+            fprintf(stderr,"Invalid format for copy %s\n",_mesa_get_format_name(mesa_format));
+            assert("Invalid format for US output\n");
+            return;
+    };
+
+    SETfield(sq_tex_resource0, (TexelPitch/8)-1, PITCH_shift, PITCH_mask);
+    SETfield(sq_tex_resource0, w - 1, TEX_WIDTH_shift, TEX_WIDTH_mask);
+    SETfield(sq_tex_resource1, h - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+
+    sq_tex_resource2 = src_offset / 256;
+
+    SETfield(sq_tex_resource6, SQ_TEX_VTX_VALID_TEXTURE,
+             SQ_TEX_RESOURCE_WORD6_0__TYPE_shift,
+             SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+    r700SyncSurf(context, bo,
+                 RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+		 0, TC_ACTION_ENA_bit);
+
+    BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+    R600_OUT_BATCH(0 * 7);
+
+    R600_OUT_BATCH(sq_tex_resource0);
+    R600_OUT_BATCH(sq_tex_resource1);
+    R600_OUT_BATCH(sq_tex_resource2);
+    R600_OUT_BATCH(0); //SQ_TEX_RESOURCE3
+    R600_OUT_BATCH(sq_tex_resource4);
+    R600_OUT_BATCH(0); //SQ_TEX_RESOURCE5
+    R600_OUT_BATCH(sq_tex_resource6);
+    R600_OUT_BATCH_RELOC(0,
+		     bo,
+		     0,
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+    R600_OUT_BATCH_RELOC(0,
+		     bo,
+		     0,
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+static inline void
+set_tex_sampler(context_t * context)
+{
+    uint32_t sq_tex_sampler_word0 = 0, sq_tex_sampler_word1 = 0, sq_tex_sampler_word2 = 0;
+    int i = 0;
+
+    SETbit(sq_tex_sampler_word2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
+
+    BATCH_LOCALS(&context->radeon);
+
+    BEGIN_BATCH_NO_AUTOSTATE(5);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
+    R600_OUT_BATCH(i * 3);
+    R600_OUT_BATCH(sq_tex_sampler_word0);
+    R600_OUT_BATCH(sq_tex_sampler_word1);
+    R600_OUT_BATCH(sq_tex_sampler_word2);
+    END_BATCH();
+
+}
+
+static inline void
+set_scissors(context_t *context, int x1, int y1, int x2, int y2)
+{
+    BATCH_LOCALS(&context->radeon);
+
+    BEGIN_BATCH_NO_AUTOSTATE(17);
+    R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2);
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16));
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+    R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 3);
+    R600_OUT_BATCH(0); //PA_SC_WINDOW_OFFSET
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit)); //PA_SC_WINDOW_SCISSOR_TL
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+    R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2);
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit));
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+
+    /* XXX 16 of these PA_SC_VPORT_SCISSOR_0_TL_num ... */
+    R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL, 2 );
+    R600_OUT_BATCH((x1 << 0) | (y1 << 16) | (WINDOW_OFFSET_DISABLE_bit));
+    R600_OUT_BATCH((x2 << 0) | (y2 << 16));
+    END_BATCH();
+
+    COMMIT_BATCH();
+
+}
+
+static inline void
+set_vb_data(context_t * context, int src_x, int src_y, int dst_x, int dst_y,
+            int w, int h, int src_h, unsigned flip_y)
+{
+    float *vb;
+    radeon_bo_map(context->blit_bo, 1);
+    vb = context->blit_bo->ptr;
+
+    vb[0] = (float)(dst_x);
+    vb[1] = (float)(dst_y);
+    vb[2] = (float)(src_x);
+    vb[3] = (flip_y) ? (float)(src_h - src_y) : (float)src_y;
+
+    vb[4] = (float)(dst_x);
+    vb[5] = (float)(dst_y + h);
+    vb[6] = (float)(src_x);
+    vb[7] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h);
+
+    vb[8] = (float)(dst_x + w);
+    vb[9] = (float)(dst_y + h);
+    vb[10] = (float)(src_x + w);
+    vb[11] = (flip_y) ? (float)(src_h - (src_y + h)) : (float)(src_y + h);
+
+    radeon_bo_unmap(context->blit_bo);
+
+}
+
+static inline void
+draw_auto(context_t *context)
+{
+    BATCH_LOCALS(&context->radeon);
+    uint32_t vgt_primitive_type = 0, vgt_index_type = 0, vgt_draw_initiator = 0, vgt_num_indices;
+
+    SETfield(vgt_primitive_type, DI_PT_RECTLIST,
+             VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift,
+             VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+    SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift,
+             INDEX_TYPE_mask);
+    SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift,
+             MAJOR_MODE_mask);
+    SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift,
+             SOURCE_SELECT_mask);
+
+    vgt_num_indices = 3;
+
+    BEGIN_BATCH_NO_AUTOSTATE(10);
+    // prim
+    R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+    R600_OUT_BATCH(vgt_primitive_type);
+    // index type
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+    R600_OUT_BATCH(vgt_index_type);
+    // num instances
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+    R600_OUT_BATCH(1);
+    //
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+    R600_OUT_BATCH(vgt_num_indices);
+    R600_OUT_BATCH(vgt_draw_initiator);
+
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+static inline void
+set_default_state(context_t *context)
+{
+    int ps_prio = 0;
+    int vs_prio = 1;
+    int gs_prio = 2;
+    int es_prio = 3;
+    int num_ps_gprs;
+    int num_vs_gprs;
+    int num_gs_gprs;
+    int num_es_gprs;
+    int num_temp_gprs;
+    int num_ps_threads;
+    int num_vs_threads;
+    int num_gs_threads;
+    int num_es_threads;
+    int num_ps_stack_entries;
+    int num_vs_stack_entries;
+    int num_gs_stack_entries;
+    int num_es_stack_entries;
+    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
+    uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
+    uint32_t ta_cntl_aux, db_watermarks, sq_dyn_gpr_cntl_ps_flush_req, db_debug;
+    BATCH_LOCALS(&context->radeon);
+
+    switch (context->radeon.radeonScreen->chip_family) {
+    case CHIP_FAMILY_R600:
+	    num_ps_gprs = 192;
+	    num_vs_gprs = 56;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 136;
+	    num_vs_threads = 48;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 128;
+	    num_vs_stack_entries = 128;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    case CHIP_FAMILY_RV630:
+    case CHIP_FAMILY_RV635:
+	    num_ps_gprs = 84;
+	    num_vs_gprs = 36;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 144;
+	    num_vs_threads = 40;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 40;
+	    num_vs_stack_entries = 40;
+	    num_gs_stack_entries = 32;
+	    num_es_stack_entries = 16;
+	    break;
+    case CHIP_FAMILY_RV610:
+    case CHIP_FAMILY_RV620:
+    case CHIP_FAMILY_RS780:
+    case CHIP_FAMILY_RS880:
+    default:
+	    num_ps_gprs = 84;
+	    num_vs_gprs = 36;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 136;
+	    num_vs_threads = 48;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 40;
+	    num_vs_stack_entries = 40;
+	    num_gs_stack_entries = 32;
+	    num_es_stack_entries = 16;
+	    break;
+    case CHIP_FAMILY_RV670:
+	    num_ps_gprs = 144;
+	    num_vs_gprs = 40;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 136;
+	    num_vs_threads = 48;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 40;
+	    num_vs_stack_entries = 40;
+	    num_gs_stack_entries = 32;
+	    num_es_stack_entries = 16;
+	    break;
+    case CHIP_FAMILY_RV770:
+	    num_ps_gprs = 192;
+	    num_vs_gprs = 56;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 188;
+	    num_vs_threads = 60;
+	    num_gs_threads = 0;
+	    num_es_threads = 0;
+	    num_ps_stack_entries = 256;
+	    num_vs_stack_entries = 256;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    case CHIP_FAMILY_RV730:
+    case CHIP_FAMILY_RV740:
+	    num_ps_gprs = 84;
+	    num_vs_gprs = 36;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 188;
+	    num_vs_threads = 60;
+	    num_gs_threads = 0;
+	    num_es_threads = 0;
+	    num_ps_stack_entries = 128;
+	    num_vs_stack_entries = 128;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    case CHIP_FAMILY_RV710:
+	    num_ps_gprs = 192;
+	    num_vs_gprs = 56;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 144;
+	    num_vs_threads = 48;
+	    num_gs_threads = 0;
+	    num_es_threads = 0;
+	    num_ps_stack_entries = 128;
+	    num_vs_stack_entries = 128;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    }
+
+    sq_config = 0;
+    if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+	    CLEARbit(sq_config, VC_ENABLE_bit);
+    else
+	    SETbit(sq_config, VC_ENABLE_bit);
+    SETbit(sq_config, DX9_CONSTS_bit);
+    SETbit(sq_config, ALU_INST_PREFER_VECTOR_bit);
+    SETfield(sq_config, ps_prio, PS_PRIO_shift, PS_PRIO_mask);
+    SETfield(sq_config, vs_prio, VS_PRIO_shift, VS_PRIO_mask);
+    SETfield(sq_config, gs_prio, GS_PRIO_shift, GS_PRIO_mask);
+    SETfield(sq_config, es_prio, ES_PRIO_shift, ES_PRIO_mask);
+
+    sq_gpr_resource_mgmt_1 = 0;
+    SETfield(sq_gpr_resource_mgmt_1, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+    SETfield(sq_gpr_resource_mgmt_1, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+    SETfield(sq_gpr_resource_mgmt_1, num_temp_gprs,
+	     NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask);
+
+    sq_gpr_resource_mgmt_2 = 0;
+    SETfield(sq_gpr_resource_mgmt_2, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask);
+    SETfield(sq_gpr_resource_mgmt_2, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask);
+
+    sq_thread_resource_mgmt = 0;
+    SETfield(sq_thread_resource_mgmt, num_ps_threads,
+	     NUM_PS_THREADS_shift, NUM_PS_THREADS_mask);
+    SETfield(sq_thread_resource_mgmt, num_vs_threads,
+	     NUM_VS_THREADS_shift, NUM_VS_THREADS_mask);
+    SETfield(sq_thread_resource_mgmt, num_gs_threads,
+	     NUM_GS_THREADS_shift, NUM_GS_THREADS_mask);
+    SETfield(sq_thread_resource_mgmt, num_es_threads,
+	     NUM_ES_THREADS_shift, NUM_ES_THREADS_mask);
+
+    sq_stack_resource_mgmt_1 = 0;
+    SETfield(sq_stack_resource_mgmt_1, num_ps_stack_entries,
+	     NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask);
+    SETfield(sq_stack_resource_mgmt_1, num_vs_stack_entries,
+	     NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask);
+
+    sq_stack_resource_mgmt_2 = 0;
+    SETfield(sq_stack_resource_mgmt_2, num_gs_stack_entries,
+	     NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask);
+    SETfield(sq_stack_resource_mgmt_2, num_es_stack_entries,
+	     NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask);
+
+    ta_cntl_aux = 0;
+    SETfield(ta_cntl_aux, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask);
+    db_watermarks = 0;
+    SETfield(db_watermarks, 4, DEPTH_FREE_shift, DEPTH_FREE_mask);
+    SETfield(db_watermarks, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask);
+    SETfield(db_watermarks, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask);
+    SETfield(db_watermarks, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask);
+    sq_dyn_gpr_cntl_ps_flush_req = 0;
+    db_debug = 0;
+    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+	    SETfield(ta_cntl_aux, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+	    db_debug = 0x82000000;
+	    SETfield(db_watermarks, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+    } else {
+	    SETfield(ta_cntl_aux, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+	    SETfield(db_watermarks, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+	    SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit);
+    }
+
+    BEGIN_BATCH_NO_AUTOSTATE(117);
+    R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
+    R600_OUT_BATCH(sq_config);
+    R600_OUT_BATCH(sq_gpr_resource_mgmt_1);
+    R600_OUT_BATCH(sq_gpr_resource_mgmt_2);
+    R600_OUT_BATCH(sq_thread_resource_mgmt);
+    R600_OUT_BATCH(sq_stack_resource_mgmt_1);
+    R600_OUT_BATCH(sq_stack_resource_mgmt_2);
+
+    R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, ta_cntl_aux);
+    R600_OUT_BATCH_REGVAL(VC_ENHANCE, 0);
+    R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, sq_dyn_gpr_cntl_ps_flush_req);
+    R600_OUT_BATCH_REGVAL(DB_DEBUG, db_debug);
+    R600_OUT_BATCH_REGVAL(DB_WATERMARKS, db_watermarks);
+
+    R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+
+    R600_OUT_BATCH_REGVAL(CB_CLRCMP_CONTROL,
+                         (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
+    R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
+    R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
+    R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
+    R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
+    R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
+    R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
+    R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit));
+    R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, (0xcc << ROP3_shift));
+
+    R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, VTX_XY_FMT_bit);
+    R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, 0);
+    R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, CLIP_DISABLE_bit);
+    R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, (FACE_bit) |
+        (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) |
+        (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift));
+    R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) |
+        (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+        (X_1_256TH << QUANT_MODE_shift));
+
+    R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
+    R600_OUT_BATCH(2048);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+
+    R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+
+    R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, 0);
+    R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, 0);
+    R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, 0);
+    R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, 0);
+
+    R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+
+    R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0);
+
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+static GLboolean validate_buffers(context_t *rmesa,
+                                  struct radeon_bo *src_bo,
+                                  struct radeon_bo *dst_bo)
+{
+    int ret;
+
+    radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+    ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
+					src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
+
+    ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
+                                        dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
+    if (ret)
+        return GL_FALSE;
+
+    ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs,
+					rmesa->blit_bo,
+					RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
+
+    return GL_TRUE;
+}
+
+unsigned r600_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x,
+                   unsigned src_y,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x,
+                   unsigned dst_y,
+                   unsigned w,
+                   unsigned h,
+                   unsigned flip_y)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    int id = 0;
+
+    if (!r600_check_blit(dst_mesaformat))
+        return GL_FALSE;
+
+    if (src_bo == dst_bo) {
+        return GL_FALSE;
+    }
+
+    if (src_offset % 256 || dst_offset % 256) {
+        return GL_FALSE;
+    }
+
+    if (0) {
+        fprintf(stderr, "src: width %d, height %d, pitch %d vs %d, format %s\n",
+                src_width, src_height, src_pitch,
+                _mesa_format_row_stride(src_mesaformat, src_width),
+                _mesa_get_format_name(src_mesaformat));
+        fprintf(stderr, "dst: width %d, height %d, pitch %d, format %s\n",
+                dst_width, dst_height,
+                _mesa_format_row_stride(dst_mesaformat, dst_width),
+                _mesa_get_format_name(dst_mesaformat));
+    }
+
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(ctx);
+
+    rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__);
+
+    /* load shaders */
+    load_shaders(context->radeon.glCtx);
+
+    if (!validate_buffers(context, src_bo, dst_bo))
+        return GL_FALSE;
+
+    /* set clear state */
+    /* 117 */
+    set_default_state(context);
+
+    /* shaders */
+    /* 72 */
+    set_shaders(context);
+
+    /* src */
+    /* 20 */
+    set_tex_resource(context, src_mesaformat, src_bo,
+		     src_width, src_height, src_pitch, src_offset);
+
+    /* 5 */
+    set_tex_sampler(context);
+
+    /* dst */
+    /* 27 */
+    set_render_target(context, dst_bo, dst_mesaformat,
+		      dst_pitch, dst_width, dst_height, dst_offset);
+    /* scissors */
+    /* 17 */
+    set_scissors(context, dst_x, dst_y, dst_x + dst_width, dst_y + dst_height);
+
+    set_vb_data(context, src_x, src_y, dst_x, dst_y, w, h, src_height, flip_y);
+    /* Vertex buffer setup */
+    /* 24 */
+    set_vtx_resource(context);
+
+    /* draw */
+    /* 10 */
+    draw_auto(context);
+
+    /* 7 */
+    r700SyncSurf(context, dst_bo, 0,
+                 RADEON_GEM_DOMAIN_VRAM|RADEON_GEM_DOMAIN_GTT,
+		 CB_ACTION_ENA_bit | (1 << (id + 6)));
+
+    /* 5 */
+    /* XXX drm should handle this in fence submit */
+    r700WaitForIdleClean(context);
+
+    radeonFlush(ctx);
+
+    return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r600/r600_blit.h b/src/mesa/drivers/dri/r600/r600_blit.h
new file mode 100644
index 0000000000..d56b21ba9b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef R600_BLIT_H
+#define R600_BLIT_H
+
+unsigned r600_check_blit(gl_format mesa_format);
+
+unsigned r600_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned w,
+                   unsigned h,
+                   unsigned flip_y);
+
+#endif // R600_BLIT_H
diff --git a/src/mesa/drivers/dri/r600/r600_blit_shaders.h b/src/mesa/drivers/dri/r600/r600_blit_shaders.h
new file mode 100644
index 0000000000..492dde9636
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_blit_shaders.h
@@ -0,0 +1,28 @@
+const uint32_t r6xx_vs[] =
+{
+        0x00000004, // CF_DWORD0(ADDR(4))
+        0x81000000, // SQ_CF_INST_VTX COUNT(1)
+        0x0000203c, // CF_EXP_IMP CF_POS0 SQ_EXPORT_POS RW_GPR(0) ELEM_SIZE(0)
+        0x94000b08, // SQ_CF_INST_EXPORT_DONE SWZ XY01 BARRIER(1)
+        0x00004000, // CF_EXP_IMP 0 SQ_EXPORT_PARAM RW_GPR(0) ELEM_SIZE(0)
+        0x14200b1a, // SQ_CF_INST_EXPORT_DONE SWZ ZW01 EOP(1) BARRIER(0)
+        0x00000000,
+        0x00000000,
+        0x3c000000, // SQ_VTX_INST_FETCH BUFFER_ID(0) MEGA_FETCH_COUNT(16)
+        0x68cd1000, // DST_GPR(0) DST_SWZ: XYZW DATA_FORMAT(35) SQ_NUM_FORMAT_SCALED SQ_FORMAT_COMP_SIGNED
+        0x00080000, // ENDIAN_SWAP(SQ_ENDIAN_NONE) MEGA_FETCH(1)
+        0x00000000, // VTX_DWORD_PAD
+};
+
+const uint32_t r6xx_ps[] =
+{
+        0x00000002, // CF_DWORD0 AADR(2)
+        0x80800000, // SQ_CF_INST_TEX COUNT(1)
+        0x00000000, // CF_ALLOC_IMP_EXP0 SQ_EXPORT_PIXEL RW_GPR(0) ELEM_SIZE(0)
+        0x94200688, // SQ_CF_INST_EXPORT_DONE EOP(1) BARRIER(1) SWZ: XYZW
+        0x00000010, // SQ_TEX_INST_SAMPLE SRC_GPR(0) RESOURCE_ID(0)
+        0x000d1000, // DST_GPR(0) SWZ: XYZW TEX_UNNORMALIZED
+        0xb0800000, // SAMPLER_ID(0) SRC_SWZ XYZW
+        0x00000000, // TEX_DWORD_PAD
+};
+
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
new file mode 100644
index 0000000000..afe2d55dc7
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -0,0 +1,518 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * Mostly coppied from \radeon\radeon_cs_legacy.c
+ */
+
+#include <errno.h>
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+
+#include "r600_context.h"
+#include "radeon_reg.h"
+#include "r600_cmdbuf.h"
+#include "r600_emit.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_reg.h"
+
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
+
+struct r600_cs_manager_legacy
+{
+    struct radeon_cs_manager    base;
+    struct radeon_context       *ctx;
+    /* hack for scratch stuff */
+    uint32_t                    pending_age;
+    uint32_t                    pending_count;
+};
+
+struct r600_cs_reloc_legacy {
+    struct radeon_cs_reloc  base;
+    uint32_t                cindices;
+    uint32_t                *indices;
+    uint32_t                *reloc_indices;
+};
+
+static struct radeon_cs_int *r600_cs_create(struct radeon_cs_manager *csm,
+					    uint32_t ndw)
+{
+    struct radeon_cs_int *csi;
+
+    csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+    if (csi == NULL) {
+        return NULL;
+    }
+    csi->csm = csm;
+    csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+    csi->packets = (uint32_t*)malloc(4*csi->ndw);
+    if (csi->packets == NULL) {
+        free(csi);
+        return NULL;
+    }
+    csi->relocs_total_size = 0;
+    return csi;
+}
+
+static int r600_cs_write_reloc(struct radeon_cs_int *csi,
+			       struct radeon_bo *bo,
+			       uint32_t read_domain,
+			       uint32_t write_domain,
+			       uint32_t flags)
+{
+    struct r600_cs_reloc_legacy *relocs;
+    int i;
+
+    relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
+    /* check domains */
+    if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
+        /* in one CS a bo can only be in read or write domain but not
+         * in read & write domain at the same sime
+         */
+        return -EINVAL;
+    }
+    if (read_domain == RADEON_GEM_DOMAIN_CPU) {
+        return -EINVAL;
+    }
+    if (write_domain == RADEON_GEM_DOMAIN_CPU) {
+        return -EINVAL;
+    }
+    /* check if bo is already referenced */
+    for(i = 0; i < csi->crelocs; i++) {
+        uint32_t *indices;
+        uint32_t *reloc_indices;
+
+        if (relocs[i].base.bo->handle == bo->handle) {
+            /* Check domains must be in read or write. As we check already
+             * checked that in argument one of the read or write domain was
+             * set we only need to check that if previous reloc as the read
+             * domain set then the read_domain should also be set for this
+             * new relocation.
+             */
+            if (relocs[i].base.read_domain && !read_domain) {
+                return -EINVAL;
+            }
+            if (relocs[i].base.write_domain && !write_domain) {
+                return -EINVAL;
+            }
+            relocs[i].base.read_domain |= read_domain;
+            relocs[i].base.write_domain |= write_domain;
+            /* save indice */
+            relocs[i].cindices++;
+            indices = (uint32_t*)realloc(relocs[i].indices,
+                                         relocs[i].cindices * 4);
+            reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices,
+                                               relocs[i].cindices * 4);
+            if ( (indices == NULL) || (reloc_indices == NULL) ) {
+                relocs[i].cindices -= 1;
+                return -ENOMEM;
+            }
+            relocs[i].indices = indices;
+            relocs[i].reloc_indices = reloc_indices;
+            relocs[i].indices[relocs[i].cindices - 1] = csi->cdw;
+            relocs[i].reloc_indices[relocs[i].cindices - 1] = csi->cdw;
+            csi->section_cdw += 2;
+	    csi->cdw += 2;
+
+            return 0;
+        }
+    }
+    /* add bo to reloc */
+    relocs = (struct r600_cs_reloc_legacy*)
+             realloc(csi->relocs,
+                     sizeof(struct r600_cs_reloc_legacy) * (csi->crelocs + 1));
+    if (relocs == NULL) {
+        return -ENOMEM;
+    }
+    csi->relocs = relocs;
+    relocs[csi->crelocs].base.bo = bo;
+    relocs[csi->crelocs].base.read_domain = read_domain;
+    relocs[csi->crelocs].base.write_domain = write_domain;
+    relocs[csi->crelocs].base.flags = flags;
+    relocs[csi->crelocs].indices = (uint32_t*)malloc(4);
+    relocs[csi->crelocs].reloc_indices = (uint32_t*)malloc(4);
+    if ( (relocs[csi->crelocs].indices == NULL) || (relocs[csi->crelocs].reloc_indices == NULL) )
+    {
+        return -ENOMEM;
+    }
+
+    relocs[csi->crelocs].indices[0] = csi->cdw;
+    relocs[csi->crelocs].reloc_indices[0] = csi->cdw;
+    csi->section_cdw += 2;
+    csi->cdw += 2;
+    relocs[csi->crelocs].cindices = 1;
+    csi->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+    csi->crelocs++;
+
+    radeon_bo_ref(bo);
+
+    return 0;
+}
+
+static int r600_cs_begin(struct radeon_cs_int *csi,
+                    uint32_t ndw,
+                    const char *file,
+                    const char *func,
+                    int line)
+{
+    if (csi->section_ndw) {
+        fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
+                csi->section_file, csi->section_func, csi->section_line);
+        fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+
+    csi->section_ndw = ndw;
+    csi->section_cdw = 0;
+    csi->section_file = file;
+    csi->section_func = func;
+    csi->section_line = line;
+
+    if (csi->cdw + ndw > csi->ndw) {
+        uint32_t tmp, *ptr;
+	int num = (ndw > 0x400) ? ndw : 0x400;
+
+        tmp = (csi->cdw + num + 0x3FF) & (~0x3FF);
+        ptr = (uint32_t*)realloc(csi->packets, 4 * tmp);
+        if (ptr == NULL) {
+            return -ENOMEM;
+        }
+        csi->packets = ptr;
+        csi->ndw = tmp;
+    }
+
+    return 0;
+}
+
+static int r600_cs_end(struct radeon_cs_int *csi,
+                  const char *file,
+                  const char *func,
+                  int line)
+
+{
+    if (!csi->section_ndw) {
+        fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+
+    if ( csi->section_ndw != csi->section_cdw ) {
+        fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
+                csi->section_file, csi->section_func, csi->section_line, csi->section_ndw, csi->section_cdw);
+        fprintf(stderr, "csi->section_ndw = %d, csi->cdw = %d, csi->section_cdw = %d \n",
+                csi->section_ndw, csi->cdw, csi->section_cdw);
+        fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+    csi->section_ndw = 0;
+
+    if (csi->cdw > csi->ndw) {
+	    fprintf(stderr, "CS section overflow at (%s,%s,%d) cdw %d ndw %d\n",
+		    csi->section_file, csi->section_func, csi->section_line,csi->cdw,csi->ndw);
+	    fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+		    file, func, line);
+	    assert(0);
+    }
+
+    return 0;
+}
+
+static int r600_cs_process_relocs(struct radeon_cs_int *csi, 
+                                  uint32_t * reloc_chunk,
+                                  uint32_t * length_dw_reloc_chunk) 
+{
+    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
+    struct r600_cs_reloc_legacy *relocs;
+    int i, j, r;
+
+    uint32_t offset_dw = 0;
+
+    csm = (struct r600_cs_manager_legacy*)csi->csm;
+    relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
+restart:
+    for (i = 0; i < csi->crelocs; i++) {
+            uint32_t soffset, eoffset;
+
+            r = radeon_bo_legacy_validate(relocs[i].base.bo,
+					  &soffset, &eoffset);
+	    if (r == -EAGAIN) {
+		    goto restart;
+            }
+            if (r) {
+		    fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n",
+			    relocs[i].base.bo, soffset, eoffset);
+		    return r;
+            }
+
+	    for (j = 0; j < relocs[i].cindices; j++) {
+		    /* pkt3 nop header in ib chunk */
+		    csi->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
+		    /* reloc index in ib chunk */
+		    csi->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
+	    }
+
+	    /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
+	    reloc_chunk[offset_dw] = soffset;
+	    reloc_chunk[offset_dw + 3] = 0;
+
+	    offset_dw += 4;
+    }
+
+    *length_dw_reloc_chunk = offset_dw;
+
+    return 0;
+}
+
+static int r600_cs_set_age(struct radeon_cs_int *csi) /* -------------- */
+{
+    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
+    struct r600_cs_reloc_legacy *relocs;
+    int i;
+
+    relocs = (struct r600_cs_reloc_legacy *)csi->relocs;
+    for (i = 0; i < csi->crelocs; i++) {
+        radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
+        radeon_bo_unref(relocs[i].base.bo);
+    }
+    return 0;
+}
+
+#if 0
+static void dump_cmdbuf(struct radeon_cs_int *csi)
+{
+	int i;
+	fprintf(stderr,"--start--\n");
+	for (i = 0; i < csi->cdw; i++){
+		fprintf(stderr,"0x%08x\n", csi->packets[i]);
+	}
+	fprintf(stderr,"--end--\n");
+
+}
+#endif
+
+static int r600_cs_emit(struct radeon_cs_int *csi)
+{
+    struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)csi->csm;
+    struct drm_radeon_cs       cs_cmd;
+    struct drm_radeon_cs_chunk cs_chunk[2];
+    uint32_t length_dw_reloc_chunk;
+    uint64_t chunk_ptrs[2];
+    uint32_t *reloc_chunk;
+    int r;
+    int retry = 0;
+
+    /* TODO : put chip level things here if need. */
+    /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */
+
+    csm->pending_count = 1;
+
+    reloc_chunk = (uint32_t*)calloc(1, csi->crelocs * 4 * 4);
+
+    r = r600_cs_process_relocs(csi, reloc_chunk, &length_dw_reloc_chunk);
+    if (r) {
+	free(reloc_chunk);
+        return 0;
+    }
+
+    /* raw ib chunk */
+    cs_chunk[0].chunk_id   = RADEON_CHUNK_ID_IB;
+    cs_chunk[0].length_dw  = csi->cdw;
+    cs_chunk[0].chunk_data = (unsigned long)(csi->packets);
+
+    /* reloc chaunk */
+    cs_chunk[1].chunk_id   = RADEON_CHUNK_ID_RELOCS;
+    cs_chunk[1].length_dw  = length_dw_reloc_chunk;
+    cs_chunk[1].chunk_data = (unsigned long)reloc_chunk;
+
+    chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]);
+    chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]);
+
+    cs_cmd.num_chunks = 2;
+    /* cs_cmd.cs_id      = 0; */
+    cs_cmd.chunks     = (uint64_t)(unsigned long)chunk_ptrs;
+
+    //dump_cmdbuf(cs);
+
+    do 
+    {
+        r = drmCommandWriteRead(csi->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
+        retry++;
+    } while (r == -EAGAIN && retry < 1000);
+
+    if (r) {
+	free(reloc_chunk);
+        return r;
+    }
+
+    csm->pending_age = cs_cmd.cs_id;
+
+    r600_cs_set_age(csi);
+
+    csi->csm->read_used = 0;
+    csi->csm->vram_write_used = 0;
+    csi->csm->gart_write_used = 0;
+
+    free(reloc_chunk);
+
+    return 0;
+}
+
+static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
+{
+    struct r600_cs_reloc_legacy *relocs = relocs_p;
+    int i;
+    if (!relocs_p)
+      return;
+    for (i = 0; i < crelocs; i++)
+    {
+        free(relocs[i].indices);
+        free(relocs[i].reloc_indices);
+    }
+}
+
+static int r600_cs_destroy(struct radeon_cs_int *csi)
+{
+    r600_cs_free_reloc(csi->relocs, csi->crelocs);
+    free(csi->relocs);
+    free(csi->packets);
+    free(csi);
+    return 0;
+}
+
+static int r600_cs_erase(struct radeon_cs_int *csi)
+{
+    r600_cs_free_reloc(csi->relocs, csi->crelocs);
+    free(csi->relocs);
+    csi->relocs_total_size = 0;
+    csi->relocs = NULL;
+    csi->crelocs = 0;
+    csi->cdw = 0;
+    return 0;
+}
+
+static int r600_cs_need_flush(struct radeon_cs_int *csi)
+{
+    /* this function used to flush when the BO usage got to
+     * a certain size, now the higher levels handle this better */
+    return 0;
+}
+
+static void r600_cs_print(struct radeon_cs_int *csi, FILE *file)
+{
+}
+
+static struct radeon_cs_funcs  r600_cs_funcs = {
+    r600_cs_create,
+    r600_cs_write_reloc,
+    r600_cs_begin,
+    r600_cs_end,
+    r600_cs_emit,
+    r600_cs_destroy,
+    r600_cs_erase,
+    r600_cs_need_flush,
+    r600_cs_print
+};
+
+struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
+{
+    struct r600_cs_manager_legacy *csm;
+
+    csm = (struct r600_cs_manager_legacy*)
+          calloc(1, sizeof(struct r600_cs_manager_legacy));
+    if (csm == NULL) {
+        return NULL;
+    }
+    csm->base.funcs = &r600_cs_funcs;
+    csm->base.fd = ctx->dri.fd;
+    csm->ctx = ctx;
+    csm->pending_age = 1;
+    return (struct radeon_cs_manager*)csm;
+}
+
+void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
+{
+	radeonContextPtr rmesa = &r600->radeon;
+	GLuint size;
+
+	r600InitAtoms(r600);
+
+	/* Initialize command buffer */
+	size = 256 * driQueryOptioni(&rmesa->optionCache,
+				     "command_buffer_size");
+	if (size < 2 * rmesa->hw.max_state_size) {
+		size = 2 * rmesa->hw.max_state_size + 65535;
+	}
+	if (size > 64 * 256)
+		size = 64 * 256;
+
+	if (rmesa->radeonScreen->kernel_mm) {
+		int fd = rmesa->radeonScreen->driScreen->fd;
+		rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
+	} else {
+		rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa);
+	}
+	if (rmesa->cmdbuf.csm == NULL) {
+		/* FIXME: fatal error */
+		return;
+	}
+	rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
+	assert(rmesa->cmdbuf.cs != NULL);
+	rmesa->cmdbuf.size = size;
+
+	radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
+				  (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx);
+
+	if (!rmesa->radeonScreen->kernel_mm) {
+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
+	} else {
+		struct drm_radeon_gem_info mminfo;
+
+		if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
+		{
+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
+		}
+	}
+}
+
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
new file mode 100644
index 0000000000..dff0009699
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
@@ -0,0 +1,196 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R600_CMDBUF_H__
+#define __R600_CMDBUF_H__
+
+#include "r600_context.h"
+#include "r600_emit.h"
+
+#define RADEON_CP_PACKET3_NOP                       0xC0001000
+#define RADEON_CP_PACKET3_NEXT_CHAR                 0xC0001900
+#define RADEON_CP_PACKET3_PLY_NEXTSCAN              0xC0001D00
+#define RADEON_CP_PACKET3_SET_SCISSORS              0xC0001E00
+#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM     0xC0002300
+#define RADEON_CP_PACKET3_LOAD_MICROCODE            0xC0002400
+#define RADEON_CP_PACKET3_WAIT_FOR_IDLE             0xC0002600
+#define RADEON_CP_PACKET3_3D_DRAW_VBUF              0xC0002800
+#define RADEON_CP_PACKET3_3D_DRAW_IMMD              0xC0002900
+#define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
+#define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
+#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
+#define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
+#define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
+#define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
+#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT         0xC0009400
+#define RADEON_CP_PACKET3_CNTL_POLYLINE             0xC0009500
+#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES        0xC0009800
+#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI          0xC0009A00
+#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI         0xC0009B00
+#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT         0xC0009C00
+
+/* r6xx/r7xx packet 3 type offsets */
+#define R600_SET_CONFIG_REG_OFFSET                  0x00008000
+#define R600_SET_CONFIG_REG_END                     0x0000ac00
+#define R600_SET_CONTEXT_REG_OFFSET                 0x00028000
+#define R600_SET_CONTEXT_REG_END                    0x00029000
+#define R600_SET_ALU_CONST_OFFSET                   0x00030000
+#define R600_SET_ALU_CONST_END                      0x00032000
+#define R600_SET_RESOURCE_OFFSET                    0x00038000
+#define R600_SET_RESOURCE_END                       0x0003c000
+#define R600_SET_SAMPLER_OFFSET                     0x0003c000
+#define R600_SET_SAMPLER_END                        0x0003cff0
+#define R600_SET_CTL_CONST_OFFSET                   0x0003cff0
+#define R600_SET_CTL_CONST_END                      0x0003e200
+#define R600_SET_LOOP_CONST_OFFSET                  0x0003e200
+#define R600_SET_LOOP_CONST_END                     0x0003e380
+#define R600_SET_BOOL_CONST_OFFSET                  0x0003e380
+#define R600_SET_BOOL_CONST_END                     0x00040000
+
+/* r6xx/r7xx packet 3 types */
+#define R600_IT_INDIRECT_BUFFER_END               0x00001700
+#define R600_IT_SET_PREDICATION                   0x00002000
+#define R600_IT_REG_RMW                           0x00002100
+#define R600_IT_COND_EXEC                         0x00002200
+#define R600_IT_PRED_EXEC                         0x00002300
+#define R600_IT_START_3D_CMDBUF                   0x00002400
+#define R600_IT_DRAW_INDEX_2                      0x00002700
+#define R600_IT_CONTEXT_CONTROL                   0x00002800
+#define R600_IT_DRAW_INDEX_IMMD_BE                0x00002900
+#define R600_IT_INDEX_TYPE                        0x00002A00
+#define R600_IT_DRAW_INDEX                        0x00002B00
+#define R600_IT_DRAW_INDEX_AUTO                   0x00002D00
+#define R600_IT_DRAW_INDEX_IMMD                   0x00002E00
+#define R600_IT_NUM_INSTANCES                     0x00002F00
+#define R600_IT_STRMOUT_BUFFER_UPDATE             0x00003400
+#define R600_IT_INDIRECT_BUFFER_MP                0x00003800
+#define R600_IT_MEM_SEMAPHORE                     0x00003900
+#define R600_IT_MPEG_INDEX                        0x00003A00
+#define R600_IT_WAIT_REG_MEM                      0x00003C00
+#define R600_IT_MEM_WRITE                         0x00003D00
+#define R600_IT_INDIRECT_BUFFER                   0x00003200
+#define R600_IT_CP_INTERRUPT                      0x00004000
+#define R600_IT_SURFACE_SYNC                      0x00004300
+#define R600_IT_ME_INITIALIZE                     0x00004400
+#define R600_IT_COND_WRITE                        0x00004500
+#define R600_IT_EVENT_WRITE                       0x00004600
+#define R600_IT_EVENT_WRITE_EOP                   0x00004700
+#define R600_IT_ONE_REG_WRITE                     0x00005700
+#define R600_IT_SET_CONFIG_REG                    0x00006800
+#define R600_IT_SET_CONTEXT_REG                   0x00006900
+#define R600_IT_SET_ALU_CONST                     0x00006A00
+#define R600_IT_SET_BOOL_CONST                    0x00006B00
+#define R600_IT_SET_LOOP_CONST                    0x00006C00
+#define R600_IT_SET_RESOURCE                      0x00006D00
+#define R600_IT_SET_SAMPLER                       0x00006E00
+#define R600_IT_SET_CTL_CONST                     0x00006F00
+#define R600_IT_SURFACE_BASE_UPDATE               0x00007300
+
+struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
+
+/**
+ * Write one dword to the command buffer.
+ */
+#define R600_OUT_BATCH(data)				\
+do {							\
+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);	\
+} while(0)
+
+/**
+ * Write n dwords from ptr to the command buffer.
+ */
+#define R600_OUT_BATCH_TABLE(ptr,n)		\
+do {						     \
+	radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, ptr, n);	\
+} while(0)
+
+/**
+ * Write a relocated dword to the command buffer.
+ */
+#define R600_OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) 	\
+	do { 							\
+        if (0 && offset) {					\
+            fprintf(stderr, "(%s:%s:%d) offset : %d\n",		\
+            __FILE__, __FUNCTION__, __LINE__, offset);		\
+        }							\
+        radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, 		\
+                              bo, rd, wd, flags);		\
+	} while(0)
+
+/* R600/R700 */
+#define R600_OUT_BATCH_REGS(reg, num)					\
+do {								\
+	if ((reg) >= R600_SET_CONFIG_REG_OFFSET && (reg) < R600_SET_CONFIG_REG_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_CONFIG_REG_OFFSET) >> 2);	\
+	} else if ((reg) >= R600_SET_CONTEXT_REG_OFFSET && (reg) < R600_SET_CONTEXT_REG_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONTEXT_REG, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_CONTEXT_REG_OFFSET) >> 2);	\
+	} else if ((reg) >= R600_SET_ALU_CONST_OFFSET && (reg) < R600_SET_ALU_CONST_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_ALU_CONST_OFFSET) >> 2);	\
+	} else if ((reg) >= R600_SET_RESOURCE_OFFSET && (reg) < R600_SET_RESOURCE_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_RESOURCE_OFFSET) >> 2);	\
+	} else if ((reg) >= R600_SET_SAMPLER_OFFSET && (reg) < R600_SET_SAMPLER_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_SAMPLER_OFFSET) >> 2);	\
+	} else if ((reg) >= R600_SET_CTL_CONST_OFFSET && (reg) < R600_SET_CTL_CONST_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_CTL_CONST_OFFSET) >> 2);	\
+	} else if ((reg) >= R600_SET_LOOP_CONST_OFFSET && (reg) < R600_SET_LOOP_CONST_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_LOOP_CONST, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_LOOP_CONST_OFFSET) >> 2);	\
+	} else if ((reg) >= R600_SET_BOOL_CONST_OFFSET && (reg) < R600_SET_BOOL_CONST_END) { \
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_BOOL_CONST, (num)));	\
+		R600_OUT_BATCH(((reg) - R600_SET_BOOL_CONST_OFFSET) >> 2);	\
+	} else {							\
+		R600_OUT_BATCH(CP_PACKET0((reg), (num))); \
+	}								\
+} while (0)
+
+/** Single register write to command buffer; requires 3 dwords for most things. */
+#define R600_OUT_BATCH_REGVAL(reg, val)		\
+	R600_OUT_BATCH_REGS((reg), 1);		\
+	R600_OUT_BATCH((val))
+
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define R600_OUT_BATCH_REGSEQ(reg, count)	\
+	R600_OUT_BATCH_REGS((reg), (count))
+
+extern void r600InitCmdBuf(context_t *r600);
+
+#endif				/* __R600_CMDBUF_H__ */
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
new file mode 100644
index 0000000000..f4aed4e87f
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -0,0 +1,447 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+#include "main/bufferobj.h"
+#include "main/texobj.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "radeon_debug.h"
+#include "r600_context.h"
+#include "radeon_common_context.h"
+#include "radeon_buffer_objects.h"
+#include "radeon_span.h"
+#include "r600_cmdbuf.h"
+#include "r600_emit.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_queryobj.h"
+#include "r600_blit.h"
+
+#include "r700_state.h"
+#include "r700_ioctl.h"
+
+
+#include "utils.h"
+
+#define R600_ENABLE_GLSL_TEST 1
+
+#define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_vertex_program
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_provoking_vertex
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_stencil_two_side
+#define need_GL_ATI_separate_stencil
+#define need_GL_NV_vertex_program
+
+#include "main/remap_helper.h"
+
+static const struct dri_extension card_extensions[] = {
+  /* *INDENT-OFF* */
+  {"GL_ARB_depth_clamp",                NULL},
+  {"GL_ARB_depth_texture",		NULL},
+  {"GL_ARB_fragment_program",		NULL},
+  {"GL_ARB_fragment_program_shadow",	NULL},
+  {"GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions},
+  {"GL_ARB_multitexture",		NULL},
+  {"GL_ARB_point_parameters",		GL_ARB_point_parameters_functions},
+  {"GL_ARB_shadow",			NULL},
+  {"GL_ARB_shadow_ambient",		NULL},
+  {"GL_ARB_texture_border_clamp",	NULL},
+  {"GL_ARB_texture_cube_map",		NULL},
+  {"GL_ARB_texture_env_add",		NULL},
+  {"GL_ARB_texture_env_combine",	NULL},
+  {"GL_ARB_texture_env_crossbar",	NULL},
+  {"GL_ARB_texture_env_dot3",		NULL},
+  {"GL_ARB_texture_mirrored_repeat",	NULL},
+  {"GL_ARB_texture_non_power_of_two",   NULL},
+  {"GL_ARB_vertex_program",		GL_ARB_vertex_program_functions},
+  {"GL_EXT_blend_equation_separate",	GL_EXT_blend_equation_separate_functions},
+  {"GL_EXT_blend_func_separate",	GL_EXT_blend_func_separate_functions},
+  {"GL_EXT_blend_minmax",		GL_EXT_blend_minmax_functions},
+  {"GL_EXT_blend_subtract",		NULL},
+  {"GL_EXT_packed_depth_stencil",	NULL},
+  {"GL_EXT_fog_coord",			GL_EXT_fog_coord_functions },
+  {"GL_EXT_gpu_program_parameters",     GL_EXT_gpu_program_parameters_functions},
+  {"GL_EXT_provoking_vertex",           GL_EXT_provoking_vertex_functions },
+  {"GL_EXT_secondary_color", 		GL_EXT_secondary_color_functions},
+  {"GL_EXT_shadow_funcs",		NULL},
+  {"GL_EXT_stencil_two_side",		GL_EXT_stencil_two_side_functions},
+  {"GL_EXT_stencil_wrap",		NULL},
+  {"GL_EXT_texture_edge_clamp",		NULL},
+  {"GL_EXT_texture_env_combine", 	NULL},
+  {"GL_EXT_texture_env_dot3", 		NULL},
+  {"GL_EXT_texture_filter_anisotropic",	NULL},
+  {"GL_EXT_texture_lod_bias",		NULL},
+  {"GL_EXT_texture_mirror_clamp",	NULL},
+  {"GL_EXT_texture_rectangle",		NULL},
+  {"GL_EXT_vertex_array_bgra",          NULL},
+  {"GL_EXT_texture_sRGB",               NULL},
+  {"GL_ATI_separate_stencil",		GL_ATI_separate_stencil_functions},
+  {"GL_ATI_texture_env_combine3",	NULL},
+  {"GL_ATI_texture_mirror_once",	NULL},
+  {"GL_MESA_pack_invert",		NULL},
+  {"GL_MESA_ycbcr_texture",		NULL},
+  {"GL_MESAX_texture_float",		NULL},
+  {"GL_NV_blend_square",		NULL},
+  {"GL_NV_vertex_program",		GL_NV_vertex_program_functions},
+  {"GL_SGIS_generate_mipmap",		NULL},
+  {"GL_ARB_pixel_buffer_object",        NULL},
+  {NULL,				NULL}
+  /* *INDENT-ON* */
+};
+
+
+static const struct dri_extension mm_extensions[] = {
+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+  { NULL, NULL }
+};
+
+/**
+ * The GL 2.0 functions are needed to make display lists work with
+ * functions added by GL_ATI_separate_stencil.
+ */
+static const struct dri_extension gl_20_extension[] = {
+#ifdef R600_ENABLE_GLSL_TEST
+    {"GL_ARB_shading_language_100",			GL_VERSION_2_0_functions },
+#else
+  {"GL_VERSION_2_0",			GL_VERSION_2_0_functions },
+#endif /* R600_ENABLE_GLSL_TEST */
+  {NULL, NULL}
+};
+
+static const struct tnl_pipeline_stage *r600_pipeline[] = {
+	/* Catch any t&l fallbacks
+	 */
+	&_tnl_vertex_transform_stage,
+	&_tnl_normal_transform_stage,
+	&_tnl_lighting_stage,
+	&_tnl_fog_coordinate_stage,
+	&_tnl_texgen_stage,
+	&_tnl_texture_transform_stage,
+	&_tnl_point_attenuation_stage,
+	&_tnl_vertex_program_stage,
+	&_tnl_render_stage,
+	0,
+};
+
+static void r600_get_lock(radeonContextPtr rmesa)
+{
+	drm_radeon_sarea_t *sarea = rmesa->sarea;
+
+	if (sarea->ctx_owner != rmesa->dri.hwContext) {
+		sarea->ctx_owner = rmesa->dri.hwContext;
+		if (!rmesa->radeonScreen->kernel_mm)
+			radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
+	}
+}
+
+static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+    /* please flush pipe do all pending work */
+    /* to be enabled */
+}
+
+static void r600_vtbl_pre_emit_atoms(radeonContextPtr radeon)
+{
+	r700Start3D((context_t *)radeon);
+}
+
+static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	if (mode)
+		context->radeon.Fallback |= bit;
+	else
+		context->radeon.Fallback &= ~bit;
+}
+
+static void r600_emit_query_finish(radeonContextPtr radeon)
+{
+	context_t *context = (context_t*) radeon;
+	BATCH_LOCALS(&context->radeon);
+
+	struct radeon_query_object *query = radeon->query.current;
+
+	BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+	R600_OUT_BATCH(ZPASS_DONE);
+	R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */
+	R600_OUT_BATCH(0x00000000);
+	R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	END_BATCH();
+	assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+	query->emitted_begin = GL_FALSE;
+}
+
+static void r600_init_vtbl(radeonContextPtr radeon)
+{
+	radeon->vtbl.get_lock = r600_get_lock;
+	radeon->vtbl.update_viewport_offset = r700UpdateViewportOffset;
+	radeon->vtbl.emit_cs_header = r600_vtbl_emit_cs_header;
+	radeon->vtbl.swtcl_flush = NULL;
+	radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
+	radeon->vtbl.fallback = r600_fallback;
+	radeon->vtbl.emit_query_finish = r600_emit_query_finish;
+	radeon->vtbl.check_blit = r600_check_blit;
+	radeon->vtbl.blit = r600_blit;
+	radeon->vtbl.is_format_renderable = r600IsFormatRenderable;
+}
+
+static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
+{
+	ctx->Const.MaxTextureImageUnits = 16;
+	/* 8 per clause on r6xx, 16 on r7xx
+	 * but I think mesa only supports 8 at the moment
+	 */
+	ctx->Const.MaxTextureCoordUnits = 8;
+	ctx->Const.MaxTextureUnits =
+	    MIN2(ctx->Const.MaxTextureImageUnits,
+		 ctx->Const.MaxTextureCoordUnits);
+	ctx->Const.MaxCombinedTextureImageUnits =
+		ctx->Const.MaxVertexTextureImageUnits +
+		ctx->Const.MaxTextureImageUnits;
+
+	ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+	ctx->Const.MaxTextureLodBias = 16.0;
+
+	ctx->Const.MaxTextureLevels = 13; /* hw support 14 */
+	ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */
+
+	ctx->Const.MinPointSize   = 0x0001 / 8.0;
+	ctx->Const.MinPointSizeAA = 0x0001 / 8.0;
+	ctx->Const.MaxPointSize   = 0xffff / 8.0;
+	ctx->Const.MaxPointSizeAA = 0xffff / 8.0;
+
+	ctx->Const.MinLineWidth   = 0x0001 / 8.0;
+	ctx->Const.MinLineWidthAA = 0x0001 / 8.0;
+	ctx->Const.MaxLineWidth   = 0xffff / 8.0;
+	ctx->Const.MaxLineWidthAA = 0xffff / 8.0;
+
+	ctx->Const.MaxDrawBuffers = 1; /* hw supports 8 */
+	ctx->Const.MaxColorAttachments = 1;
+	ctx->Const.MaxRenderbufferSize = 4096;
+
+	/* 256 for reg-based consts, inline consts also supported */
+	ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */
+	ctx->Const.VertexProgram.MaxNativeInstructions = 8192;
+	ctx->Const.VertexProgram.MaxNativeAttribs = 160;
+	ctx->Const.VertexProgram.MaxTemps = 128;
+	ctx->Const.VertexProgram.MaxNativeTemps = 128;
+	ctx->Const.VertexProgram.MaxNativeParameters = 256;
+	ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */
+
+	ctx->Const.FragmentProgram.MaxNativeTemps = 128;
+	ctx->Const.FragmentProgram.MaxNativeAttribs = 32;
+	ctx->Const.FragmentProgram.MaxNativeParameters = 256;
+	ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192;
+	/* 8 per clause on r6xx, 16 on r7xx */
+	if (screen->chip_family >= CHIP_FAMILY_RV770)
+		ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16;
+	else
+		ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8;
+	ctx->Const.FragmentProgram.MaxNativeInstructions = 8192;
+	ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */
+	ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;	/* and these are?? */
+}
+
+static void r600ParseOptions(context_t *r600, radeonScreenPtr screen)
+{
+	/* Parse configuration files.
+	 * Do this here so that initialMaxAnisotropy is set before we create
+	 * the default textures.
+	 */
+	driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache,
+			    screen->driScreen->myNum, "r600");
+
+	r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache,
+							    "def_max_anisotropy");
+
+}
+
+static void r600InitGLExtensions(GLcontext *ctx)
+{
+	context_t *r600 = R700_CONTEXT(ctx);
+
+	driInitExtensions(ctx, card_extensions, GL_TRUE);
+	if (r600->radeon.radeonScreen->kernel_mm)
+	  driInitExtensions(ctx, mm_extensions, GL_FALSE);
+
+#ifdef R600_ENABLE_GLSL_TEST
+    driInitExtensions(ctx, gl_20_extension, GL_TRUE);
+    _mesa_enable_2_0_extensions(ctx);
+    
+    /* glsl compiler has problem if this is not GL_TRUE */
+    ctx->Shader.EmitCondCodes = GL_TRUE;
+#endif /* R600_ENABLE_GLSL_TEST */
+
+	if (driQueryOptionb
+	    (&r600->radeon.optionCache, "disable_stencil_two_side"))
+		_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
+
+	if (r600->radeon.glCtx->Mesa_DXTn
+	    && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) {
+		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+		_mesa_enable_extension(ctx, "GL_S3_s3tc");
+	} else
+	    if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable"))
+	{
+		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+	}
+
+	/* RV740 had a broken pipe config prior to drm 1.32 */
+	if (!r600->radeon.radeonScreen->kernel_mm) {
+		if ((r600->radeon.dri.drmMinor < 32) &&
+		    (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740))
+			_mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+	}
+}
+
+/* Create the device specific rendering context.
+ */
+GLboolean r600CreateContext(gl_api api,
+			    const __GLcontextModes * glVisual,
+			    __DRIcontext * driContextPriv,
+			    void *sharedContextPrivate)
+{
+	__DRIscreen *sPriv = driContextPriv->driScreenPriv;
+	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+	struct dd_function_table functions;
+	context_t *r600;
+	GLcontext *ctx;
+
+	assert(glVisual);
+	assert(driContextPriv);
+	assert(screen);
+
+	/* Allocate the R600 context */
+	r600 = (context_t*) CALLOC(sizeof(*r600));
+	if (!r600) {
+		radeon_error("Failed to allocate memory for context.\n");
+		return GL_FALSE;
+	}
+
+	r600ParseOptions(r600, screen);
+
+	r600->radeon.radeonScreen = screen;
+	r600_init_vtbl(&r600->radeon);
+
+	/* Init default driver functions then plug in our R600-specific functions
+	 * (the texture functions are especially important)
+	 */
+	_mesa_init_driver_functions(&functions);
+
+	r700InitStateFuncs(&r600->radeon, &functions);
+	r600InitTextureFuncs(&r600->radeon, &functions);
+	r700InitShaderFuncs(&functions);
+	radeonInitQueryObjFunctions(&functions);
+	r700InitIoctlFuncs(&functions);
+	radeonInitBufferObjectFuncs(&functions);
+
+	if (!radeonInitContext(&r600->radeon, &functions,
+			       glVisual, driContextPriv,
+			       sharedContextPrivate)) {
+		radeon_error("Initializing context failed.\n");
+		FREE(r600);
+		return GL_FALSE;
+	}
+
+	ctx = r600->radeon.glCtx;
+
+	ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+	ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+	r600InitConstValues(ctx, screen);
+
+	_mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
+	/* Initialize the software rasterizer and helper modules.
+	 */
+	_swrast_CreateContext(ctx);
+	_vbo_CreateContext(ctx);
+	_tnl_CreateContext(ctx);
+	_swsetup_CreateContext(ctx);
+	_swsetup_Wakeup(ctx);
+
+	/* Install the customized pipeline:
+	 */
+	_tnl_destroy_pipeline(ctx);
+	_tnl_install_pipeline(ctx, r600_pipeline);
+	TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+	/* Configure swrast and TNL to match hardware characteristics:
+	 */
+	_swrast_allow_pixel_fog(ctx, GL_FALSE);
+	_swrast_allow_vertex_fog(ctx, GL_TRUE);
+	_tnl_allow_pixel_fog(ctx, GL_FALSE);
+	_tnl_allow_vertex_fog(ctx, GL_TRUE);
+
+	radeon_init_debug();
+
+	r700InitDraw(ctx);
+
+	radeon_fbo_init(&r600->radeon);
+   	radeonInitSpanFuncs( ctx );
+	r600InitCmdBuf(r600);
+	r700InitState(r600->radeon.glCtx);
+
+	r600InitGLExtensions(ctx);
+
+	return GL_TRUE;
+}
+
+
diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h
new file mode 100644
index 0000000000..063dd7c49a
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_context.h
@@ -0,0 +1,195 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R600_CONTEXT_H__
+#define __R600_CONTEXT_H__
+
+#include "tnl/t_vertex.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "texmem.h"
+#include "radeon_common.h"
+
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+
+#include "r700_chip.h"
+#include "r600_tex.h"
+#include "r700_oglprog.h"
+#include "r700_vertprog.h"
+
+struct r600_context;
+typedef struct r600_context context_t;
+
+#include "main/mm.h"
+
+#define COLOR_IS_RGBA
+#define TAG(x) r600##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define R600_FALLBACK_NONE 0
+#define R600_FALLBACK_TCL 1
+#define R600_FALLBACK_RAST 2
+
+struct r600_hw_state {
+	struct radeon_state_atom sq;
+	struct radeon_state_atom db;
+	struct radeon_state_atom stencil;
+	struct radeon_state_atom db_target;
+	struct radeon_state_atom sc;
+	struct radeon_state_atom scissor;
+	struct radeon_state_atom aa;
+	struct radeon_state_atom cl;
+	struct radeon_state_atom gb;
+	struct radeon_state_atom ucp;
+	struct radeon_state_atom su;
+	struct radeon_state_atom poly;
+	struct radeon_state_atom cb;
+	struct radeon_state_atom clrcmp;
+	struct radeon_state_atom blnd;
+	struct radeon_state_atom blnd_clr;
+	struct radeon_state_atom cb_target;
+	struct radeon_state_atom sx;
+	struct radeon_state_atom vgt;
+	struct radeon_state_atom spi;
+	struct radeon_state_atom vpt;
+
+	struct radeon_state_atom fs;
+	struct radeon_state_atom vs;
+	struct radeon_state_atom ps;
+
+	struct radeon_state_atom vs_consts;
+	struct radeon_state_atom ps_consts;
+
+	struct radeon_state_atom vtx;
+	struct radeon_state_atom tx;
+	struct radeon_state_atom tx_smplr;
+	struct radeon_state_atom tx_brdr_clr;
+};
+
+typedef struct StreamDesc
+{
+	GLint   size;   //number of data element
+	GLenum  type;  //data element type
+	GLsizei stride;
+	GLenum  format; // GL_RGBA,GLBGRA 
+
+	struct radeon_bo *bo;
+	GLint  bo_offset;
+
+	GLuint    dwords;
+	GLuint    dst_loc;
+	GLuint    _signed;
+	GLboolean normalize;
+	GLboolean is_named_bo;
+	GLubyte   element;
+} StreamDesc;
+
+typedef struct r700_index_buffer
+{
+	struct radeon_bo *bo;
+	int    bo_offset;
+
+	GLboolean is_32bit;
+	GLuint    count;
+} r700_index_buffer;
+
+/**
+ * \brief R600 context structure.
+ */
+struct r600_context {
+	struct radeon_context radeon;	/* parent class, must be first */
+
+	/* ------ */
+	R700_CHIP_CONTEXT hw;
+
+	struct r600_hw_state atoms;
+
+	struct r700_vertex_program *selected_vp;
+
+	/* Vertex buffers
+	 */
+	GLint      nNumActiveAos;
+	StreamDesc stream_desc[VERT_ATTRIB_MAX];
+    struct r700_index_buffer ind_buf;
+	struct radeon_bo *blit_bo;
+	GLboolean blit_bo_loaded;
+};
+
+#define R700_CONTEXT(ctx)		((context_t *)(ctx->DriverCtx))
+#define GL_CONTEXT(context)     ((GLcontext *)(context->radeon.glCtx))
+
+extern GLboolean r600CreateContext(gl_api api,
+				   const __GLcontextModes * glVisual,
+				   __DRIcontext * driContextPriv,
+				   void *sharedContextPrivate);
+
+#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw))
+
+#define R600_NEWPRIM( rmesa )			\
+do {						\
+	if ( rmesa->radeon.dma.flush )			\
+		rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
+} while (0)
+
+#define R600_STATECHANGE(r600, ATOM)			\
+do {							\
+	R600_NEWPRIM(r600);					\
+	r600->atoms.ATOM.dirty = GL_TRUE;					\
+	r600->radeon.hw.is_dirty = GL_TRUE;			\
+} while(0)
+
+extern GLboolean r700SyncSurf(context_t *context,
+			      struct radeon_bo *pbo,
+			      uint32_t read_domain,
+			      uint32_t write_domain,
+			      uint32_t sync_type);
+
+extern void r700WaitForIdleClean(context_t *context);
+
+extern void r700Start3D(context_t *context);
+extern void r600InitAtoms(context_t *context);
+extern void r700InitDraw(GLcontext *ctx);
+
+#define RADEON_D_CAPTURE 0
+#define RADEON_D_PLAYBACK 1
+#define RADEON_D_PLAYBACK_RAW 2
+#define RADEON_D_T 3
+
+#endif				/* __R600_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c
new file mode 100644
index 0000000000..1eb89a5305
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_emit.c
@@ -0,0 +1,115 @@
+/**************************************************************************
+
+Copyright 2008, 2009 Advanced Micro Devices Inc. (AMD)
+
+Copyright (C) Advanced Micro Devices Inc. (AMD)  2009.  All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "r600_context.h"
+#include "r600_emit.h"
+
+void r600EmitCacheFlush(context_t *rmesa)
+{
+}
+
+GLboolean r600EmitShader(GLcontext * ctx,
+                         void ** shaderbo,
+			 GLvoid * data,
+                         int sizeinDWORD,
+                         char * szShaderUsage)
+{
+	radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
+	struct radeon_bo * pbo;
+	uint32_t *out;
+shader_again_alloc:
+	pbo = radeon_bo_open(radeonctx->radeonScreen->bom,
+			0,
+			sizeinDWORD * 4,
+			256,
+			RADEON_GEM_DOMAIN_GTT,
+			0);
+
+	radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s %p size %d: %s\n", __func__, pbo, sizeinDWORD, szShaderUsage);
+
+	if (!pbo) {
+		radeon_print(RADEON_MEMORY | RADEON_CS, RADEON_IMPORTANT, "No memory for buffer object. Flushing command buffer.\n");
+		rcommonFlushCmdBuf(radeonctx, __FUNCTION__);
+		goto shader_again_alloc;
+	}
+
+	radeon_cs_space_add_persistent_bo(radeonctx->cmdbuf.cs,
+			pbo,
+			RADEON_GEM_DOMAIN_GTT, 0);
+
+	if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs,
+				pbo,
+				RADEON_GEM_DOMAIN_GTT, 0)) {
+		radeon_error("failure to revalidate BOs - badness\n");
+		return GL_FALSE;
+	}
+
+	radeon_bo_map(pbo, 1);
+
+	out = (uint32_t*)(pbo->ptr);
+
+	memcpy(out, data, sizeinDWORD * 4);
+
+	radeon_bo_unmap(pbo);
+
+	*shaderbo = (void*)pbo;
+
+	return GL_TRUE;
+}
+
+GLboolean r600DeleteShader(GLcontext * ctx,
+                           void * shaderbo)
+{
+    struct radeon_bo * pbo = (struct radeon_bo *)shaderbo;
+
+    radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s: %p\n", __func__, pbo);
+
+    if (pbo) {
+	    if (pbo->ptr)
+		radeon_bo_unmap(pbo);
+	    radeon_bo_unref(pbo); /* when bo->cref <= 0, bo will be bo_free */
+    }
+
+    return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r600/r600_emit.h b/src/mesa/drivers/dri/r600/r600_emit.h
new file mode 100644
index 0000000000..661774d11e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_emit.h
@@ -0,0 +1,55 @@
+/**************************************************************************
+
+Copyright 2008, 2009 Advanced Micro Devices Inc. (AMD)
+
+Copyright (C) Advanced Micro Devices Inc. (AMD)  2009.  All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+
+#ifndef __R600_EMIT_H__
+#define __R600_EMIT_H__
+
+#include "main/glheader.h"
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+#include "radeon_reg.h"
+
+void r600EmitCacheFlush(context_t *rmesa);
+
+extern GLboolean r600EmitShader(GLcontext * ctx, 
+                                void ** shaderbo,
+			                    GLvoid * data, 
+                                int sizeinDWORD,
+                                char * szShaderUsage); 
+
+extern GLboolean r600DeleteShader(GLcontext * ctx, 
+                                 void * shaderbo);
+
+#endif
diff --git a/src/mesa/drivers/dri/r600/r600_reg.h b/src/mesa/drivers/dri/r600/r600_reg.h
new file mode 100644
index 0000000000..ffe5ee4f74
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_reg.h
@@ -0,0 +1,121 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009  Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_H_
+#define _R600_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "r600_reg_auto_r6xx.h"
+#include "r600_reg_r6xx.h"
+#include "r600_reg_r7xx.h"
+
+
+/* SET_*_REG offsets + ends */
+enum 
+{
+    SET_CONFIG_REG_offset                = 0x00008000,
+    SET_CONFIG_REG_end                   = 0x0000ac00,
+    SET_CONTEXT_REG_offset               = 0x00028000,
+    SET_CONTEXT_REG_end                  = 0x00029000,
+    SET_ALU_CONST_offset                 = 0x00030000,
+    SET_ALU_CONST_end                    = 0x00032000,
+    SET_RESOURCE_offset                  = 0x00038000,
+    SET_RESOURCE_end                     = 0x0003c000,
+    SET_SAMPLER_offset                   = 0x0003c000,
+    SET_SAMPLER_end                      = 0x0003cff0,
+    SET_CTL_CONST_offset                 = 0x0003cff0,
+    SET_CTL_CONST_end                    = 0x0003e200,
+    SET_LOOP_CONST_offset                = 0x0003e200,
+    SET_LOOP_CONST_end                   = 0x0003e380,
+    SET_BOOL_CONST_offset                = 0x0003e380,
+    SET_BOOL_CONST_end                   = 0x00040000,
+};
+
+/* packet3 IT_SURFACE_BASE_UPDATE bits */
+enum 
+{
+    DEPTH_BASE                           = (1 << 0),
+    COLOR0_BASE                          = (1 << 1),
+    COLOR1_BASE                          = (1 << 2),
+    COLOR2_BASE                          = (1 << 3),
+    COLOR3_BASE                          = (1 << 4),
+    COLOR4_BASE                          = (1 << 5),
+    COLOR5_BASE                          = (1 << 6),
+    COLOR6_BASE                          = (1 << 7),
+    COLOR7_BASE                          = (1 << 8),
+    STRMOUT_BASE0                        = (1 << 9),
+    STRMOUT_BASE1                        = (1 << 10),
+    STRMOUT_BASE2                        = (1 << 11),
+    STRMOUT_BASE3                        = (1 << 12),
+    COHER_BASE0                          = (1 << 13),
+    COHER_BASE1                          = (1 << 14),
+};
+
+/* Packet3 commands */
+enum 
+{
+    IT_NOP                               = 0x10,
+    IT_INDIRECT_BUFFER_END               = 0x17,
+    IT_SET_PREDICATION                   = 0x20,
+    IT_REG_RMW                           = 0x21,
+    IT_COND_EXEC                         = 0x22,
+    IT_PRED_EXEC                         = 0x23,
+    IT_START_3D_CMDBUF                   = 0x24,
+    IT_DRAW_INDEX_2                      = 0x27,
+    IT_CONTEXT_CONTROL                   = 0x28,
+    IT_DRAW_INDEX_IMMD_BE                = 0x29,
+    IT_INDEX_TYPE                        = 0x2A,
+    IT_DRAW_INDEX                        = 0x2B,
+    IT_DRAW_INDEX_AUTO                   = 0x2D,
+    IT_DRAW_INDEX_IMMD                   = 0x2E,
+    IT_NUM_INSTANCES                     = 0x2F,
+    IT_STRMOUT_BUFFER_UPDATE             = 0x34,
+    IT_INDIRECT_BUFFER_MP                = 0x38,
+    IT_MEM_SEMAPHORE                     = 0x39,
+    IT_MPEG_INDEX                        = 0x3A,
+    IT_WAIT_REG_MEM                      = 0x3C,
+    IT_MEM_WRITE                         = 0x3D,
+    IT_INDIRECT_BUFFER                   = 0x32,
+    IT_CP_INTERRUPT                      = 0x40,
+    IT_SURFACE_SYNC                      = 0x43,
+    IT_ME_INITIALIZE                     = 0x44,
+    IT_COND_WRITE                        = 0x45,
+    IT_EVENT_WRITE                       = 0x46,
+    IT_EVENT_WRITE_EOP                   = 0x47,
+    IT_ONE_REG_WRITE                     = 0x57,
+    IT_SET_CONFIG_REG                    = 0x68,
+    IT_SET_CONTEXT_REG                   = 0x69,
+    IT_SET_ALU_CONST                     = 0x6A,
+    IT_SET_BOOL_CONST                    = 0x6B,
+    IT_SET_LOOP_CONST                    = 0x6C,
+    IT_SET_RESOURCE                      = 0x6D,
+    IT_SET_SAMPLER                       = 0x6E,
+    IT_SET_CTL_CONST                     = 0x6F,
+    IT_SURFACE_BASE_UPDATE               = 0x73,
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h
new file mode 100644
index 0000000000..edd85b0fac
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_reg_auto_r6xx.h
@@ -0,0 +1,3089 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009  Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _AUTOREGS
+#define _AUTOREGS
+
+enum {
+
+    VGT_VTX_VECT_EJECT_REG                                = 0x000088b0,
+	PRIM_COUNT_mask                                   = 0x3ff << 0,
+	PRIM_COUNT_shift                                  = 0,
+    VGT_LAST_COPY_STATE                                   = 0x000088c0,
+	SRC_STATE_ID_mask                                 = 0x07 << 0,
+	SRC_STATE_ID_shift                                = 0,
+	DST_STATE_ID_mask                                 = 0x07 << 16,
+	DST_STATE_ID_shift                                = 16,
+    VGT_CACHE_INVALIDATION                                = 0x000088c4,
+	CACHE_INVALIDATION_mask                           = 0x03 << 0,
+	CACHE_INVALIDATION_shift                          = 0,
+	    VC_ONLY                                       = 0x00,
+	    TC_ONLY                                       = 0x01,
+	    VC_AND_TC                                     = 0x02,
+	VS_NO_EXTRA_BUFFER_bit                            = 1 << 5,
+    VGT_GS_PER_ES                                         = 0x000088c8,
+    VGT_ES_PER_GS                                         = 0x000088cc,
+    VGT_GS_VERTEX_REUSE                                   = 0x000088d4,
+	VERT_REUSE_mask                                   = 0x1f << 0,
+	VERT_REUSE_shift                                  = 0,
+    VGT_MC_LAT_CNTL                                       = 0x000088d8,
+	MC_TIME_STAMP_RES_mask                            = 0x03 << 0,
+	MC_TIME_STAMP_RES_shift                           = 0,
+	    X_0_992_MAX_LATENCY                           = 0x00,
+	    X_0_496_MAX_LATENCY                           = 0x01,
+	    X_0_248_MAX_LATENCY                           = 0x02,
+	    X_0_124_MAX_LATENCY                           = 0x03,
+    VGT_GS_PER_VS                                         = 0x000088e8,
+	GS_PER_VS_mask                                    = 0x0f << 0,
+	GS_PER_VS_shift                                   = 0,
+    VGT_CNTL_STATUS                                       = 0x000088f0,
+	VGT_OUT_INDX_BUSY_bit                             = 1 << 0,
+	VGT_OUT_BUSY_bit                                  = 1 << 1,
+	VGT_PT_BUSY_bit                                   = 1 << 2,
+	VGT_TE_BUSY_bit                                   = 1 << 3,
+	VGT_VR_BUSY_bit                                   = 1 << 4,
+	VGT_GRP_BUSY_bit                                  = 1 << 5,
+	VGT_DMA_REQ_BUSY_bit                              = 1 << 6,
+	VGT_DMA_BUSY_bit                                  = 1 << 7,
+	VGT_GS_BUSY_bit                                   = 1 << 8,
+	VGT_BUSY_bit                                      = 1 << 9,
+    VGT_PRIMITIVE_TYPE                                    = 0x00008958,
+	VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask                = 0x3f << 0,
+	VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift               = 0,
+	    DI_PT_NONE                                    = 0x00,
+	    DI_PT_POINTLIST                               = 0x01,
+	    DI_PT_LINELIST                                = 0x02,
+	    DI_PT_LINESTRIP                               = 0x03,
+	    DI_PT_TRILIST                                 = 0x04,
+	    DI_PT_TRIFAN                                  = 0x05,
+	    DI_PT_TRISTRIP                                = 0x06,
+	    DI_PT_UNUSED_0                                = 0x07,
+	    DI_PT_UNUSED_1                                = 0x08,
+	    DI_PT_UNUSED_2                                = 0x09,
+	    DI_PT_LINELIST_ADJ                            = 0x0a,
+	    DI_PT_LINESTRIP_ADJ                           = 0x0b,
+	    DI_PT_TRILIST_ADJ                             = 0x0c,
+	    DI_PT_TRISTRIP_ADJ                            = 0x0d,
+	    DI_PT_UNUSED_3                                = 0x0e,
+	    DI_PT_UNUSED_4                                = 0x0f,
+	    DI_PT_TRI_WITH_WFLAGS                         = 0x10,
+	    DI_PT_RECTLIST                                = 0x11,
+	    DI_PT_LINELOOP                                = 0x12,
+	    DI_PT_QUADLIST                                = 0x13,
+	    DI_PT_QUADSTRIP                               = 0x14,
+	    DI_PT_POLYGON                                 = 0x15,
+	    DI_PT_2D_COPY_RECT_LIST_V0                    = 0x16,
+	    DI_PT_2D_COPY_RECT_LIST_V1                    = 0x17,
+	    DI_PT_2D_COPY_RECT_LIST_V2                    = 0x18,
+	    DI_PT_2D_COPY_RECT_LIST_V3                    = 0x19,
+	    DI_PT_2D_FILL_RECT_LIST                       = 0x1a,
+	    DI_PT_2D_LINE_STRIP                           = 0x1b,
+	    DI_PT_2D_TRI_STRIP                            = 0x1c,
+    VGT_INDEX_TYPE                                        = 0x0000895c,
+	INDEX_TYPE_mask                                   = 0x03 << 0,
+	INDEX_TYPE_shift                                  = 0,
+	    DI_INDEX_SIZE_16_BIT                          = 0x00,
+	    DI_INDEX_SIZE_32_BIT                          = 0x01,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_0                      = 0x00008960,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_1                      = 0x00008964,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_2                      = 0x00008968,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_3                      = 0x0000896c,
+    VGT_NUM_INDICES                                       = 0x00008970,
+    VGT_NUM_INSTANCES                                     = 0x00008974,
+    PA_CL_CNTL_STATUS                                     = 0x00008a10,
+	CL_BUSY_bit                                       = 1 << 31,
+    PA_CL_ENHANCE                                         = 0x00008a14,
+	CLIP_VTX_REORDER_ENA_bit                          = 1 << 0,
+	NUM_CLIP_SEQ_mask                                 = 0x03 << 1,
+	NUM_CLIP_SEQ_shift                                = 1,
+	CLIPPED_PRIM_SEQ_STALL_bit                        = 1 << 3,
+	VE_NAN_PROC_DISABLE_bit                           = 1 << 4,
+    PA_SU_CNTL_STATUS                                     = 0x00008a50,
+	SU_BUSY_bit                                       = 1 << 31,
+    PA_SC_LINE_STIPPLE_STATE                              = 0x00008b10,
+	CURRENT_PTR_mask                                  = 0x0f << 0,
+	CURRENT_PTR_shift                                 = 0,
+	CURRENT_COUNT_mask                                = 0xff << 8,
+	CURRENT_COUNT_shift                               = 8,
+    PA_SC_MULTI_CHIP_CNTL                                 = 0x00008b20,
+	LOG2_NUM_CHIPS_mask                               = 0x07 << 0,
+	LOG2_NUM_CHIPS_shift                              = 0,
+	MULTI_CHIP_TILE_SIZE_mask                         = 0x03 << 3,
+	MULTI_CHIP_TILE_SIZE_shift                        = 3,
+	    X_16_X_16_PIXEL_TILE_PER_CHIP                 = 0x00,
+	    X_32_X_32_PIXEL_TILE_PER_CHIP                 = 0x01,
+	    X_64_X_64_PIXEL_TILE_PER_CHIP                 = 0x02,
+	    X_128X128_PIXEL_TILE_PER_CHIP                 = 0x03,
+	CHIP_TILE_X_LOC_mask                              = 0x07 << 5,
+	CHIP_TILE_X_LOC_shift                             = 5,
+	CHIP_TILE_Y_LOC_mask                              = 0x07 << 8,
+	CHIP_TILE_Y_LOC_shift                             = 8,
+	CHIP_SUPER_TILE_B_bit                             = 1 << 11,
+    PA_SC_AA_SAMPLE_LOCS_2S                               = 0x00008b40,
+	S0_X_mask                                         = 0x0f << 0,
+	S0_X_shift                                        = 0,
+	S0_Y_mask                                         = 0x0f << 4,
+	S0_Y_shift                                        = 4,
+	S1_X_mask                                         = 0x0f << 8,
+	S1_X_shift                                        = 8,
+	S1_Y_mask                                         = 0x0f << 12,
+	S1_Y_shift                                        = 12,
+    PA_SC_AA_SAMPLE_LOCS_4S                               = 0x00008b44,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+	S2_X_mask                                         = 0x0f << 16,
+	S2_X_shift                                        = 16,
+	S2_Y_mask                                         = 0x0f << 20,
+	S2_Y_shift                                        = 20,
+	S3_X_mask                                         = 0x0f << 24,
+	S3_X_shift                                        = 24,
+	S3_Y_mask                                         = 0x0f << 28,
+	S3_Y_shift                                        = 28,
+    PA_SC_AA_SAMPLE_LOCS_8S_WD0                           = 0x00008b48,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_8S_WD1                           = 0x00008b4c,
+	S4_X_mask                                         = 0x0f << 0,
+	S4_X_shift                                        = 0,
+	S4_Y_mask                                         = 0x0f << 4,
+	S4_Y_shift                                        = 4,
+	S5_X_mask                                         = 0x0f << 8,
+	S5_X_shift                                        = 8,
+	S5_Y_mask                                         = 0x0f << 12,
+	S5_Y_shift                                        = 12,
+	S6_X_mask                                         = 0x0f << 16,
+	S6_X_shift                                        = 16,
+	S6_Y_mask                                         = 0x0f << 20,
+	S6_Y_shift                                        = 20,
+	S7_X_mask                                         = 0x0f << 24,
+	S7_X_shift                                        = 24,
+	S7_Y_mask                                         = 0x0f << 28,
+	S7_Y_shift                                        = 28,
+    PA_SC_CNTL_STATUS                                     = 0x00008be0,
+	MPASS_OVERFLOW_bit                                = 1 << 30,
+    PA_SC_ENHANCE                                         = 0x00008bf0,
+	FORCE_EOV_MAX_CLK_CNT_mask                        = 0xfff << 0,
+	FORCE_EOV_MAX_CLK_CNT_shift                       = 0,
+	FORCE_EOV_MAX_TILE_CNT_mask                       = 0xfff << 12,
+	FORCE_EOV_MAX_TILE_CNT_shift                      = 12,
+    SQ_CONFIG                                             = 0x00008c00,
+	VC_ENABLE_bit                                     = 1 << 0,
+	EXPORT_SRC_C_bit                                  = 1 << 1,
+	DX9_CONSTS_bit                                    = 1 << 2,
+	ALU_INST_PREFER_VECTOR_bit                        = 1 << 3,
+	SQ_CONFIG__DX10_CLAMP_bit                         = 1 << 4,
+	ALU_PREFER_ONE_WATERFALL_bit                      = 1 << 5,
+	ALU_MAX_ONE_WATERFALL_bit                         = 1 << 6,
+	CLAUSE_SEQ_PRIO_mask                              = 0x03 << 8,
+	CLAUSE_SEQ_PRIO_shift                             = 8,
+	    SQ_CL_PRIO_RND_ROBIN                          = 0x00,
+	    SQ_CL_PRIO_MACRO_SEQ                          = 0x01,
+	    SQ_CL_PRIO_NONE                               = 0x02,
+	PS_PRIO_mask                                      = 0x03 << 24,
+	PS_PRIO_shift                                     = 24,
+	VS_PRIO_mask                                      = 0x03 << 26,
+	VS_PRIO_shift                                     = 26,
+	GS_PRIO_mask                                      = 0x03 << 28,
+	GS_PRIO_shift                                     = 28,
+	ES_PRIO_mask                                      = 0x03 << 30,
+	ES_PRIO_shift                                     = 30,
+    SQ_GPR_RESOURCE_MGMT_1                                = 0x00008c04,
+	NUM_PS_GPRS_mask                                  = 0xff << 0,
+	NUM_PS_GPRS_shift                                 = 0,
+	NUM_VS_GPRS_mask                                  = 0xff << 16,
+	NUM_VS_GPRS_shift                                 = 16,
+	NUM_CLAUSE_TEMP_GPRS_mask                         = 0x0f << 28,
+	NUM_CLAUSE_TEMP_GPRS_shift                        = 28,
+    SQ_GPR_RESOURCE_MGMT_2                                = 0x00008c08,
+	NUM_GS_GPRS_mask                                  = 0xff << 0,
+	NUM_GS_GPRS_shift                                 = 0,
+	NUM_ES_GPRS_mask                                  = 0xff << 16,
+	NUM_ES_GPRS_shift                                 = 16,
+    SQ_THREAD_RESOURCE_MGMT                               = 0x00008c0c,
+	NUM_PS_THREADS_mask                               = 0xff << 0,
+	NUM_PS_THREADS_shift                              = 0,
+	NUM_VS_THREADS_mask                               = 0xff << 8,
+	NUM_VS_THREADS_shift                              = 8,
+	NUM_GS_THREADS_mask                               = 0xff << 16,
+	NUM_GS_THREADS_shift                              = 16,
+	NUM_ES_THREADS_mask                               = 0xff << 24,
+	NUM_ES_THREADS_shift                              = 24,
+    SQ_STACK_RESOURCE_MGMT_1                              = 0x00008c10,
+	NUM_PS_STACK_ENTRIES_mask                         = 0xfff << 0,
+	NUM_PS_STACK_ENTRIES_shift                        = 0,
+	NUM_VS_STACK_ENTRIES_mask                         = 0xfff << 16,
+	NUM_VS_STACK_ENTRIES_shift                        = 16,
+    SQ_STACK_RESOURCE_MGMT_2                              = 0x00008c14,
+	NUM_GS_STACK_ENTRIES_mask                         = 0xfff << 0,
+	NUM_GS_STACK_ENTRIES_shift                        = 0,
+	NUM_ES_STACK_ENTRIES_mask                         = 0xfff << 16,
+	NUM_ES_STACK_ENTRIES_shift                        = 16,
+    SQ_ESGS_RING_BASE                                     = 0x00008c40,
+    SQ_ESGS_RING_SIZE                                     = 0x00008c44,
+    SQ_GSVS_RING_BASE                                     = 0x00008c48,
+    SQ_GSVS_RING_SIZE                                     = 0x00008c4c,
+    SQ_ESTMP_RING_BASE                                    = 0x00008c50,
+    SQ_ESTMP_RING_SIZE                                    = 0x00008c54,
+    SQ_GSTMP_RING_BASE                                    = 0x00008c58,
+    SQ_GSTMP_RING_SIZE                                    = 0x00008c5c,
+    SQ_VSTMP_RING_BASE                                    = 0x00008c60,
+    SQ_VSTMP_RING_SIZE                                    = 0x00008c64,
+    SQ_PSTMP_RING_BASE                                    = 0x00008c68,
+    SQ_PSTMP_RING_SIZE                                    = 0x00008c6c,
+    SQ_FBUF_RING_BASE                                     = 0x00008c70,
+    SQ_FBUF_RING_SIZE                                     = 0x00008c74,
+    SQ_REDUC_RING_BASE                                    = 0x00008c78,
+    SQ_REDUC_RING_SIZE                                    = 0x00008c7c,
+    SQ_ALU_WORD1_OP3                                      = 0x00008dfc,
+	SRC2_SEL_mask                                     = 0x1ff << 0,
+	SRC2_SEL_shift                                    = 0,
+	    SQ_ALU_SRC_0                                  = 0xf8,
+	    SQ_ALU_SRC_1                                  = 0xf9,
+	    SQ_ALU_SRC_1_INT                              = 0xfa,
+	    SQ_ALU_SRC_M_1_INT                            = 0xfb,
+	    SQ_ALU_SRC_0_5                                = 0xfc,
+	    SQ_ALU_SRC_LITERAL                            = 0xfd,
+	    SQ_ALU_SRC_PV                                 = 0xfe,
+	    SQ_ALU_SRC_PS                                 = 0xff,
+	SRC2_REL_bit                                      = 1 << 9,
+	SRC2_CHAN_mask                                    = 0x03 << 10,
+	SRC2_CHAN_shift                                   = 10,
+	    SQ_CHAN_X                                     = 0x00,
+	    SQ_CHAN_Y                                     = 0x01,
+	    SQ_CHAN_Z                                     = 0x02,
+	    SQ_CHAN_W                                     = 0x03,
+	SRC2_NEG_bit                                      = 1 << 12,
+	SQ_ALU_WORD1_OP3__ALU_INST_mask                   = 0x1f << 13,
+	SQ_ALU_WORD1_OP3__ALU_INST_shift                  = 13,
+	    SQ_OP3_INST_MUL_LIT                           = 0x0c,
+	    SQ_OP3_INST_MUL_LIT_M2                        = 0x0d,
+	    SQ_OP3_INST_MUL_LIT_M4                        = 0x0e,
+	    SQ_OP3_INST_MUL_LIT_D2                        = 0x0f,
+	    SQ_OP3_INST_MULADD                            = 0x10,
+	    SQ_OP3_INST_MULADD_M2                         = 0x11,
+	    SQ_OP3_INST_MULADD_M4                         = 0x12,
+	    SQ_OP3_INST_MULADD_D2                         = 0x13,
+	    SQ_OP3_INST_MULADD_IEEE                       = 0x14,
+	    SQ_OP3_INST_MULADD_IEEE_M2                    = 0x15,
+	    SQ_OP3_INST_MULADD_IEEE_M4                    = 0x16,
+	    SQ_OP3_INST_MULADD_IEEE_D2                    = 0x17,
+	    SQ_OP3_INST_CNDE                              = 0x18,
+	    SQ_OP3_INST_CNDGT                             = 0x19,
+	    SQ_OP3_INST_CNDGE                             = 0x1a,
+	    SQ_OP3_INST_CNDE_INT                          = 0x1c,
+	    SQ_OP3_INST_CNDGT_INT                         = 0x1d,
+	    SQ_OP3_INST_CNDGE_INT                         = 0x1e,
+    SQ_TEX_WORD2                                          = 0x00008dfc,
+	OFFSET_X_mask                                     = 0x1f << 0,
+	OFFSET_X_shift                                    = 0,
+	OFFSET_Y_mask                                     = 0x1f << 5,
+	OFFSET_Y_shift                                    = 5,
+	OFFSET_Z_mask                                     = 0x1f << 10,
+	OFFSET_Z_shift                                    = 10,
+	SAMPLER_ID_mask                                   = 0x1f << 15,
+	SAMPLER_ID_shift                                  = 15,
+	SQ_TEX_WORD2__SRC_SEL_X_mask                      = 0x07 << 20,
+	SQ_TEX_WORD2__SRC_SEL_X_shift                     = 20,
+	    SQ_SEL_X                                      = 0x00,
+	    SQ_SEL_Y                                      = 0x01,
+	    SQ_SEL_Z                                      = 0x02,
+	    SQ_SEL_W                                      = 0x03,
+	    SQ_SEL_0                                      = 0x04,
+	    SQ_SEL_1                                      = 0x05,
+	SRC_SEL_Y_mask                                    = 0x07 << 23,
+	SRC_SEL_Y_shift                                   = 23,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SRC_SEL_Z_mask                                    = 0x07 << 26,
+	SRC_SEL_Z_shift                                   = 26,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SRC_SEL_W_mask                                    = 0x07 << 29,
+	SRC_SEL_W_shift                                   = 29,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+    SQ_CF_ALLOC_EXPORT_WORD1                              = 0x00008dfc,
+	BURST_COUNT_mask                                  = 0x0f << 17,
+	BURST_COUNT_shift                                 = 17,
+	END_OF_PROGRAM_bit                                = 1 << 21,
+	VALID_PIXEL_MODE_bit                              = 1 << 22,
+	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask            = 0x7f << 23,
+	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift           = 23,
+	    SQ_CF_INST_MEM_STREAM0                        = 0x20,
+	    SQ_CF_INST_MEM_STREAM1                        = 0x21,
+	    SQ_CF_INST_MEM_STREAM2                        = 0x22,
+	    SQ_CF_INST_MEM_STREAM3                        = 0x23,
+	    SQ_CF_INST_MEM_SCRATCH                        = 0x24,
+	    SQ_CF_INST_MEM_REDUCTION                      = 0x25,
+	    SQ_CF_INST_MEM_RING                           = 0x26,
+	    SQ_CF_INST_EXPORT                             = 0x27,
+	    SQ_CF_INST_EXPORT_DONE                        = 0x28,
+	WHOLE_QUAD_MODE_bit                               = 1 << 30,
+	BARRIER_bit                                       = 1 << 31,
+    SQ_CF_ALU_WORD1                                       = 0x00008dfc,
+	KCACHE_MODE1_mask                                 = 0x03 << 0,
+	KCACHE_MODE1_shift                                = 0,
+	    SQ_CF_KCACHE_NOP                              = 0x00,
+	    SQ_CF_KCACHE_LOCK_1                           = 0x01,
+	    SQ_CF_KCACHE_LOCK_2                           = 0x02,
+	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03,
+	KCACHE_ADDR0_mask                                 = 0xff << 2,
+	KCACHE_ADDR0_shift                                = 2,
+	KCACHE_ADDR1_mask                                 = 0xff << 10,
+	KCACHE_ADDR1_shift                                = 10,
+	SQ_CF_ALU_WORD1__COUNT_mask                       = 0x7f << 18,
+	SQ_CF_ALU_WORD1__COUNT_shift                      = 18,
+	SQ_CF_ALU_WORD1__ALT_CONST_bit                    = 1 << 25,
+	SQ_CF_ALU_WORD1__CF_INST_mask                     = 0x0f << 26,
+	SQ_CF_ALU_WORD1__CF_INST_shift                    = 26,
+	    SQ_CF_INST_ALU                                = 0x08,
+	    SQ_CF_INST_ALU_PUSH_BEFORE                    = 0x09,
+	    SQ_CF_INST_ALU_POP_AFTER                      = 0x0a,
+	    SQ_CF_INST_ALU_POP2_AFTER                     = 0x0b,
+	    SQ_CF_INST_ALU_CONTINUE                       = 0x0d,
+	    SQ_CF_INST_ALU_BREAK                          = 0x0e,
+	    SQ_CF_INST_ALU_ELSE_AFTER                     = 0x0f,
+/* 	WHOLE_QUAD_MODE_bit                               = 1 << 30, */
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_TEX_WORD1                                          = 0x00008dfc,
+	SQ_TEX_WORD1__DST_GPR_mask                        = 0x7f << 0,
+	SQ_TEX_WORD1__DST_GPR_shift                       = 0,
+	SQ_TEX_WORD1__DST_REL_bit                         = 1 << 7,
+	SQ_TEX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+	SQ_TEX_WORD1__DST_SEL_X_shift                     = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	    SQ_SEL_MASK                                   = 0x07,
+	SQ_TEX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+	SQ_TEX_WORD1__DST_SEL_Y_shift                     = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+	SQ_TEX_WORD1__DST_SEL_Z_shift                     = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+	SQ_TEX_WORD1__DST_SEL_W_shift                     = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__LOD_BIAS_mask                       = 0x7f << 21,
+	SQ_TEX_WORD1__LOD_BIAS_shift                      = 21,
+	COORD_TYPE_X_bit                                  = 1 << 28,
+	COORD_TYPE_Y_bit                                  = 1 << 29,
+	COORD_TYPE_Z_bit                                  = 1 << 30,
+	COORD_TYPE_W_bit                                  = 1 << 31,
+    SQ_VTX_WORD0                                          = 0x00008dfc,
+	VTX_INST_mask                                     = 0x1f << 0,
+	VTX_INST_shift                                    = 0,
+	    SQ_VTX_INST_FETCH                             = 0x00,
+	    SQ_VTX_INST_SEMANTIC                          = 0x01,
+	FETCH_TYPE_mask                                   = 0x03 << 5,
+	FETCH_TYPE_shift                                  = 5,
+	    SQ_VTX_FETCH_VERTEX_DATA                      = 0x00,
+	    SQ_VTX_FETCH_INSTANCE_DATA                    = 0x01,
+	    SQ_VTX_FETCH_NO_INDEX_OFFSET                  = 0x02,
+	FETCH_WHOLE_QUAD_bit                              = 1 << 7,
+	BUFFER_ID_mask                                    = 0xff << 8,
+	BUFFER_ID_shift                                   = 8,
+	SRC_GPR_mask                                      = 0x7f << 16,
+	SRC_GPR_shift                                     = 16,
+	SRC_REL_bit                                       = 1 << 23,
+	SQ_VTX_WORD0__SRC_SEL_X_mask                      = 0x03 << 24,
+	SQ_VTX_WORD0__SRC_SEL_X_shift                     = 24,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+	MEGA_FETCH_COUNT_mask                             = 0x3f << 26,
+	MEGA_FETCH_COUNT_shift                            = 26,
+    SQ_CF_ALLOC_EXPORT_WORD1_SWIZ                         = 0x00008dfc,
+	SEL_X_mask                                        = 0x07 << 0,
+	SEL_X_shift                                       = 0,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_Y_mask                                        = 0x07 << 3,
+	SEL_Y_shift                                       = 3,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_Z_mask                                        = 0x07 << 6,
+	SEL_Z_shift                                       = 6,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_W_mask                                        = 0x07 << 9,
+	SEL_W_shift                                       = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+    SQ_ALU_WORD1                                          = 0x00008dfc,
+	ENCODING_mask                                     = 0x07 << 15,
+	ENCODING_shift                                    = 15,
+	BANK_SWIZZLE_mask                                 = 0x07 << 18,
+	BANK_SWIZZLE_shift                                = 18,
+	    SQ_ALU_VEC_012                                = 0x00,
+	    SQ_ALU_VEC_021                                = 0x01,
+	    SQ_ALU_VEC_120                                = 0x02,
+	    SQ_ALU_VEC_102                                = 0x03,
+	    SQ_ALU_VEC_201                                = 0x04,
+	    SQ_ALU_VEC_210                                = 0x05,
+	SQ_ALU_WORD1__DST_GPR_mask                        = 0x7f << 21,
+	SQ_ALU_WORD1__DST_GPR_shift                       = 21,
+	SQ_ALU_WORD1__DST_REL_bit                         = 1 << 28,
+	DST_CHAN_mask                                     = 0x03 << 29,
+	DST_CHAN_shift                                    = 29,
+	    CHAN_X                                        = 0x00,
+	    CHAN_Y                                        = 0x01,
+	    CHAN_Z                                        = 0x02,
+	    CHAN_W                                        = 0x03,
+	SQ_ALU_WORD1__CLAMP_bit                           = 1 << 31,
+    SQ_CF_ALU_WORD0                                       = 0x00008dfc,
+	SQ_CF_ALU_WORD0__ADDR_mask                        = 0x3fffff << 0,
+	SQ_CF_ALU_WORD0__ADDR_shift                       = 0,
+	KCACHE_BANK0_mask                                 = 0x0f << 22,
+	KCACHE_BANK0_shift                                = 22,
+	KCACHE_BANK1_mask                                 = 0x0f << 26,
+	KCACHE_BANK1_shift                                = 26,
+	KCACHE_MODE0_mask                                 = 0x03 << 30,
+	KCACHE_MODE0_shift                                = 30,
+/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */
+/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+    SQ_VTX_WORD2                                          = 0x00008dfc,
+	SQ_VTX_WORD2__OFFSET_mask                         = 0xffff << 0,
+	SQ_VTX_WORD2__OFFSET_shift                        = 0,
+	SQ_VTX_WORD2__ENDIAN_SWAP_mask                    = 0x03 << 16,
+	SQ_VTX_WORD2__ENDIAN_SWAP_shift                   = 16,
+	    SQ_ENDIAN_NONE                                = 0x00,
+	    SQ_ENDIAN_8IN16                               = 0x01,
+	    SQ_ENDIAN_8IN32                               = 0x02,
+	CONST_BUF_NO_STRIDE_bit                           = 1 << 18,
+	MEGA_FETCH_bit                                    = 1 << 19,
+	SQ_VTX_WORD2__ALT_CONST_bit                       = 1 << 20,
+    SQ_ALU_WORD1_OP2_V2                                   = 0x00008dfc,
+	SRC0_ABS_bit                                      = 1 << 0,
+	SRC1_ABS_bit                                      = 1 << 1,
+	UPDATE_EXECUTE_MASK_bit                           = 1 << 2,
+	UPDATE_PRED_bit                                   = 1 << 3,
+	WRITE_MASK_bit                                    = 1 << 4,
+	SQ_ALU_WORD1_OP2_V2__OMOD_mask                    = 0x03 << 5,
+	SQ_ALU_WORD1_OP2_V2__OMOD_shift                   = 5,
+	    SQ_ALU_OMOD_OFF                               = 0x00,
+	    SQ_ALU_OMOD_M2                                = 0x01,
+	    SQ_ALU_OMOD_M4                                = 0x02,
+	    SQ_ALU_OMOD_D2                                = 0x03,
+	SQ_ALU_WORD1_OP2_V2__ALU_INST_mask                = 0x7ff << 7,
+	SQ_ALU_WORD1_OP2_V2__ALU_INST_shift               = 7,
+	    SQ_OP2_INST_ADD                               = 0x00,
+	    SQ_OP2_INST_MUL                               = 0x01,
+	    SQ_OP2_INST_MUL_IEEE                          = 0x02,
+	    SQ_OP2_INST_MAX                               = 0x03,
+	    SQ_OP2_INST_MIN                               = 0x04,
+	    SQ_OP2_INST_MAX_DX10                          = 0x05,
+	    SQ_OP2_INST_MIN_DX10                          = 0x06,
+	    SQ_OP2_INST_SETE                              = 0x08,
+	    SQ_OP2_INST_SETGT                             = 0x09,
+	    SQ_OP2_INST_SETGE                             = 0x0a,
+	    SQ_OP2_INST_SETNE                             = 0x0b,
+	    SQ_OP2_INST_SETE_DX10                         = 0x0c,
+	    SQ_OP2_INST_SETGT_DX10                        = 0x0d,
+	    SQ_OP2_INST_SETGE_DX10                        = 0x0e,
+	    SQ_OP2_INST_SETNE_DX10                        = 0x0f,
+	    SQ_OP2_INST_FRACT                             = 0x10,
+	    SQ_OP2_INST_TRUNC                             = 0x11,
+	    SQ_OP2_INST_CEIL                              = 0x12,
+	    SQ_OP2_INST_RNDNE                             = 0x13,
+	    SQ_OP2_INST_FLOOR                             = 0x14,
+	    SQ_OP2_INST_MOVA                              = 0x15,
+	    SQ_OP2_INST_MOVA_FLOOR                        = 0x16,
+	    SQ_OP2_INST_MOVA_INT                          = 0x18,
+	    SQ_OP2_INST_MOV                               = 0x19,
+	    SQ_OP2_INST_NOP                               = 0x1a,
+	    SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e,
+	    SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f,
+	    SQ_OP2_INST_PRED_SETE                         = 0x20,
+	    SQ_OP2_INST_PRED_SETGT                        = 0x21,
+	    SQ_OP2_INST_PRED_SETGE                        = 0x22,
+	    SQ_OP2_INST_PRED_SETNE                        = 0x23,
+	    SQ_OP2_INST_PRED_SET_INV                      = 0x24,
+	    SQ_OP2_INST_PRED_SET_POP                      = 0x25,
+	    SQ_OP2_INST_PRED_SET_CLR                      = 0x26,
+	    SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27,
+	    SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28,
+	    SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29,
+	    SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a,
+	    SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b,
+	    SQ_OP2_INST_KILLE                             = 0x2c,
+	    SQ_OP2_INST_KILLGT                            = 0x2d,
+	    SQ_OP2_INST_KILLGE                            = 0x2e,
+	    SQ_OP2_INST_KILLNE                            = 0x2f,
+	    SQ_OP2_INST_AND_INT                           = 0x30,
+	    SQ_OP2_INST_OR_INT                            = 0x31,
+	    SQ_OP2_INST_XOR_INT                           = 0x32,
+	    SQ_OP2_INST_NOT_INT                           = 0x33,
+	    SQ_OP2_INST_ADD_INT                           = 0x34,
+	    SQ_OP2_INST_SUB_INT                           = 0x35,
+	    SQ_OP2_INST_MAX_INT                           = 0x36,
+	    SQ_OP2_INST_MIN_INT                           = 0x37,
+	    SQ_OP2_INST_MAX_UINT                          = 0x38,
+	    SQ_OP2_INST_MIN_UINT                          = 0x39,
+	    SQ_OP2_INST_SETE_INT                          = 0x3a,
+	    SQ_OP2_INST_SETGT_INT                         = 0x3b,
+	    SQ_OP2_INST_SETGE_INT                         = 0x3c,
+	    SQ_OP2_INST_SETNE_INT                         = 0x3d,
+	    SQ_OP2_INST_SETGT_UINT                        = 0x3e,
+	    SQ_OP2_INST_SETGE_UINT                        = 0x3f,
+	    SQ_OP2_INST_KILLGT_UINT                       = 0x40,
+	    SQ_OP2_INST_KILLGE_UINT                       = 0x41,
+	    SQ_OP2_INST_PRED_SETE_INT                     = 0x42,
+	    SQ_OP2_INST_PRED_SETGT_INT                    = 0x43,
+	    SQ_OP2_INST_PRED_SETGE_INT                    = 0x44,
+	    SQ_OP2_INST_PRED_SETNE_INT                    = 0x45,
+	    SQ_OP2_INST_KILLE_INT                         = 0x46,
+	    SQ_OP2_INST_KILLGT_INT                        = 0x47,
+	    SQ_OP2_INST_KILLGE_INT                        = 0x48,
+	    SQ_OP2_INST_KILLNE_INT                        = 0x49,
+	    SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a,
+	    SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b,
+	    SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c,
+	    SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d,
+	    SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e,
+	    SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f,
+	    SQ_OP2_INST_DOT4                              = 0x50,
+	    SQ_OP2_INST_DOT4_IEEE                         = 0x51,
+	    SQ_OP2_INST_CUBE                              = 0x52,
+	    SQ_OP2_INST_MAX4                              = 0x53,
+	    SQ_OP2_INST_MOVA_GPR_INT                      = 0x60,
+	    SQ_OP2_INST_EXP_IEEE                          = 0x61,
+	    SQ_OP2_INST_LOG_CLAMPED                       = 0x62,
+	    SQ_OP2_INST_LOG_IEEE                          = 0x63,
+	    SQ_OP2_INST_RECIP_CLAMPED                     = 0x64,
+	    SQ_OP2_INST_RECIP_FF                          = 0x65,
+	    SQ_OP2_INST_RECIP_IEEE                        = 0x66,
+	    SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x67,
+	    SQ_OP2_INST_RECIPSQRT_FF                      = 0x68,
+	    SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x69,
+	    SQ_OP2_INST_SQRT_IEEE                         = 0x6a,
+	    SQ_OP2_INST_FLT_TO_INT                        = 0x6b,
+	    SQ_OP2_INST_INT_TO_FLT                        = 0x6c,
+	    SQ_OP2_INST_UINT_TO_FLT                       = 0x6d,
+	    SQ_OP2_INST_SIN                               = 0x6e,
+	    SQ_OP2_INST_COS                               = 0x6f,
+	    SQ_OP2_INST_ASHR_INT                          = 0x70,
+	    SQ_OP2_INST_LSHR_INT                          = 0x71,
+	    SQ_OP2_INST_LSHL_INT                          = 0x72,
+	    SQ_OP2_INST_MULLO_INT                         = 0x73,
+	    SQ_OP2_INST_MULHI_INT                         = 0x74,
+	    SQ_OP2_INST_MULLO_UINT                        = 0x75,
+	    SQ_OP2_INST_MULHI_UINT                        = 0x76,
+	    SQ_OP2_INST_RECIP_INT                         = 0x77,
+	    SQ_OP2_INST_RECIP_UINT                        = 0x78,
+	    SQ_OP2_INST_FLT_TO_UINT                       = 0x79,
+    SQ_CF_ALLOC_EXPORT_WORD1_BUF                          = 0x00008dfc,
+	ARRAY_SIZE_mask                                   = 0xfff << 0,
+	ARRAY_SIZE_shift                                  = 0,
+	COMP_MASK_mask                                    = 0x0f << 12,
+	COMP_MASK_shift                                   = 12,
+    SQ_CF_WORD0                                           = 0x00008dfc,
+    SQ_CF_ALLOC_EXPORT_WORD0                              = 0x00008dfc,
+	ARRAY_BASE_mask                                   = 0x1fff << 0,
+	ARRAY_BASE_shift                                  = 0,
+	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask               = 0x03 << 13,
+	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift              = 13,
+	    SQ_EXPORT_PIXEL                               = 0x00,
+	    SQ_EXPORT_POS                                 = 0x01,
+	    SQ_EXPORT_PARAM                               = 0x02,
+	    X_UNUSED_FOR_SX_EXPORTS                       = 0x03,
+	RW_GPR_mask                                       = 0x7f << 15,
+	RW_GPR_shift                                      = 15,
+	RW_REL_bit                                        = 1 << 22,
+	INDEX_GPR_mask                                    = 0x7f << 23,
+	INDEX_GPR_shift                                   = 23,
+	ELEM_SIZE_mask                                    = 0x03 << 30,
+	ELEM_SIZE_shift                                   = 30,
+    SQ_VTX_WORD1                                          = 0x00008dfc,
+	SQ_VTX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+	SQ_VTX_WORD1__DST_SEL_X_shift                     = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+	SQ_VTX_WORD1__DST_SEL_Y_shift                     = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+	SQ_VTX_WORD1__DST_SEL_Z_shift                     = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+	SQ_VTX_WORD1__DST_SEL_W_shift                     = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	USE_CONST_FIELDS_bit                              = 1 << 21,
+	SQ_VTX_WORD1__DATA_FORMAT_mask                    = 0x3f << 22,
+	SQ_VTX_WORD1__DATA_FORMAT_shift                   = 22,
+	SQ_VTX_WORD1__NUM_FORMAT_ALL_mask                 = 0x03 << 28,
+	SQ_VTX_WORD1__NUM_FORMAT_ALL_shift                = 28,
+	    SQ_NUM_FORMAT_NORM                            = 0x00,
+	    SQ_NUM_FORMAT_INT                             = 0x01,
+	    SQ_NUM_FORMAT_SCALED                          = 0x02,
+	SQ_VTX_WORD1__FORMAT_COMP_ALL_bit                 = 1 << 30,
+	SQ_VTX_WORD1__SRF_MODE_ALL_bit                    = 1 << 31,
+    SQ_ALU_WORD1_OP2                                      = 0x00008dfc,
+/* 	SRC0_ABS_bit                                      = 1 << 0, */
+/* 	SRC1_ABS_bit                                      = 1 << 1, */
+/* 	UPDATE_EXECUTE_MASK_bit                           = 1 << 2, */
+/* 	UPDATE_PRED_bit                                   = 1 << 3, */
+/* 	WRITE_MASK_bit                                    = 1 << 4, */
+	FOG_MERGE_bit                                     = 1 << 5,
+	SQ_ALU_WORD1_OP2__OMOD_mask                       = 0x03 << 6,
+	SQ_ALU_WORD1_OP2__OMOD_shift                      = 6,
+/* 	    SQ_ALU_OMOD_OFF                               = 0x00, */
+/* 	    SQ_ALU_OMOD_M2                                = 0x01, */
+/* 	    SQ_ALU_OMOD_M4                                = 0x02, */
+/* 	    SQ_ALU_OMOD_D2                                = 0x03, */
+	SQ_ALU_WORD1_OP2__ALU_INST_mask                   = 0x3ff << 8,
+	SQ_ALU_WORD1_OP2__ALU_INST_shift                  = 8,
+/* 	    SQ_OP2_INST_ADD                               = 0x00, */
+/* 	    SQ_OP2_INST_MUL                               = 0x01, */
+/* 	    SQ_OP2_INST_MUL_IEEE                          = 0x02, */
+/* 	    SQ_OP2_INST_MAX                               = 0x03, */
+/* 	    SQ_OP2_INST_MIN                               = 0x04, */
+/* 	    SQ_OP2_INST_MAX_DX10                          = 0x05, */
+/* 	    SQ_OP2_INST_MIN_DX10                          = 0x06, */
+/* 	    SQ_OP2_INST_SETE                              = 0x08, */
+/* 	    SQ_OP2_INST_SETGT                             = 0x09, */
+/* 	    SQ_OP2_INST_SETGE                             = 0x0a, */
+/* 	    SQ_OP2_INST_SETNE                             = 0x0b, */
+/* 	    SQ_OP2_INST_SETE_DX10                         = 0x0c, */
+/* 	    SQ_OP2_INST_SETGT_DX10                        = 0x0d, */
+/* 	    SQ_OP2_INST_SETGE_DX10                        = 0x0e, */
+/* 	    SQ_OP2_INST_SETNE_DX10                        = 0x0f, */
+/* 	    SQ_OP2_INST_FRACT                             = 0x10, */
+/* 	    SQ_OP2_INST_TRUNC                             = 0x11, */
+/* 	    SQ_OP2_INST_CEIL                              = 0x12, */
+/* 	    SQ_OP2_INST_RNDNE                             = 0x13, */
+/* 	    SQ_OP2_INST_FLOOR                             = 0x14, */
+/* 	    SQ_OP2_INST_MOVA                              = 0x15, */
+/* 	    SQ_OP2_INST_MOVA_FLOOR                        = 0x16, */
+/* 	    SQ_OP2_INST_MOVA_INT                          = 0x18, */
+/* 	    SQ_OP2_INST_MOV                               = 0x19, */
+/* 	    SQ_OP2_INST_NOP                               = 0x1a, */
+/* 	    SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e, */
+/* 	    SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f, */
+/* 	    SQ_OP2_INST_PRED_SETE                         = 0x20, */
+/* 	    SQ_OP2_INST_PRED_SETGT                        = 0x21, */
+/* 	    SQ_OP2_INST_PRED_SETGE                        = 0x22, */
+/* 	    SQ_OP2_INST_PRED_SETNE                        = 0x23, */
+/* 	    SQ_OP2_INST_PRED_SET_INV                      = 0x24, */
+/* 	    SQ_OP2_INST_PRED_SET_POP                      = 0x25, */
+/* 	    SQ_OP2_INST_PRED_SET_CLR                      = 0x26, */
+/* 	    SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27, */
+/* 	    SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28, */
+/* 	    SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29, */
+/* 	    SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a, */
+/* 	    SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b, */
+/* 	    SQ_OP2_INST_KILLE                             = 0x2c, */
+/* 	    SQ_OP2_INST_KILLGT                            = 0x2d, */
+/* 	    SQ_OP2_INST_KILLGE                            = 0x2e, */
+/* 	    SQ_OP2_INST_KILLNE                            = 0x2f, */
+/* 	    SQ_OP2_INST_AND_INT                           = 0x30, */
+/* 	    SQ_OP2_INST_OR_INT                            = 0x31, */
+/* 	    SQ_OP2_INST_XOR_INT                           = 0x32, */
+/* 	    SQ_OP2_INST_NOT_INT                           = 0x33, */
+/* 	    SQ_OP2_INST_ADD_INT                           = 0x34, */
+/* 	    SQ_OP2_INST_SUB_INT                           = 0x35, */
+/* 	    SQ_OP2_INST_MAX_INT                           = 0x36, */
+/* 	    SQ_OP2_INST_MIN_INT                           = 0x37, */
+/* 	    SQ_OP2_INST_MAX_UINT                          = 0x38, */
+/* 	    SQ_OP2_INST_MIN_UINT                          = 0x39, */
+/* 	    SQ_OP2_INST_SETE_INT                          = 0x3a, */
+/* 	    SQ_OP2_INST_SETGT_INT                         = 0x3b, */
+/* 	    SQ_OP2_INST_SETGE_INT                         = 0x3c, */
+/* 	    SQ_OP2_INST_SETNE_INT                         = 0x3d, */
+/* 	    SQ_OP2_INST_SETGT_UINT                        = 0x3e, */
+/* 	    SQ_OP2_INST_SETGE_UINT                        = 0x3f, */
+/* 	    SQ_OP2_INST_KILLGT_UINT                       = 0x40, */
+/* 	    SQ_OP2_INST_KILLGE_UINT                       = 0x41, */
+/* 	    SQ_OP2_INST_PRED_SETE_INT                     = 0x42, */
+/* 	    SQ_OP2_INST_PRED_SETGT_INT                    = 0x43, */
+/* 	    SQ_OP2_INST_PRED_SETGE_INT                    = 0x44, */
+/* 	    SQ_OP2_INST_PRED_SETNE_INT                    = 0x45, */
+/* 	    SQ_OP2_INST_KILLE_INT                         = 0x46, */
+/* 	    SQ_OP2_INST_KILLGT_INT                        = 0x47, */
+/* 	    SQ_OP2_INST_KILLGE_INT                        = 0x48, */
+/* 	    SQ_OP2_INST_KILLNE_INT                        = 0x49, */
+/* 	    SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a, */
+/* 	    SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b, */
+/* 	    SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c, */
+/* 	    SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d, */
+/* 	    SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e, */
+/* 	    SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f, */
+/* 	    SQ_OP2_INST_DOT4                              = 0x50, */
+/* 	    SQ_OP2_INST_DOT4_IEEE                         = 0x51, */
+/* 	    SQ_OP2_INST_CUBE                              = 0x52, */
+/* 	    SQ_OP2_INST_MAX4                              = 0x53, */
+/* 	    SQ_OP2_INST_MOVA_GPR_INT                      = 0x60, */
+/* 	    SQ_OP2_INST_EXP_IEEE                          = 0x61, */
+/* 	    SQ_OP2_INST_LOG_CLAMPED                       = 0x62, */
+/* 	    SQ_OP2_INST_LOG_IEEE                          = 0x63, */
+/* 	    SQ_OP2_INST_RECIP_CLAMPED                     = 0x64, */
+/* 	    SQ_OP2_INST_RECIP_FF                          = 0x65, */
+/* 	    SQ_OP2_INST_RECIP_IEEE                        = 0x66, */
+/* 	    SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x67, */
+/* 	    SQ_OP2_INST_RECIPSQRT_FF                      = 0x68, */
+/* 	    SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x69, */
+/* 	    SQ_OP2_INST_SQRT_IEEE                         = 0x6a, */
+/* 	    SQ_OP2_INST_FLT_TO_INT                        = 0x6b, */
+/* 	    SQ_OP2_INST_INT_TO_FLT                        = 0x6c, */
+/* 	    SQ_OP2_INST_UINT_TO_FLT                       = 0x6d, */
+/* 	    SQ_OP2_INST_SIN                               = 0x6e, */
+/* 	    SQ_OP2_INST_COS                               = 0x6f, */
+/* 	    SQ_OP2_INST_ASHR_INT                          = 0x70, */
+/* 	    SQ_OP2_INST_LSHR_INT                          = 0x71, */
+/* 	    SQ_OP2_INST_LSHL_INT                          = 0x72, */
+/* 	    SQ_OP2_INST_MULLO_INT                         = 0x73, */
+/* 	    SQ_OP2_INST_MULHI_INT                         = 0x74, */
+/* 	    SQ_OP2_INST_MULLO_UINT                        = 0x75, */
+/* 	    SQ_OP2_INST_MULHI_UINT                        = 0x76, */
+/* 	    SQ_OP2_INST_RECIP_INT                         = 0x77, */
+/* 	    SQ_OP2_INST_RECIP_UINT                        = 0x78, */
+/* 	    SQ_OP2_INST_FLT_TO_UINT                       = 0x79, */
+    SQ_CF_WORD1                                           = 0x00008dfc,
+	POP_COUNT_mask                                    = 0x07 << 0,
+	POP_COUNT_shift                                   = 0,
+	CF_CONST_mask                                     = 0x1f << 3,
+	CF_CONST_shift                                    = 3,
+	COND_mask                                         = 0x03 << 8,
+	COND_shift                                        = 8,
+	    SQ_CF_COND_ACTIVE                             = 0x00,
+	    SQ_CF_COND_FALSE                              = 0x01,
+	    SQ_CF_COND_BOOL                               = 0x02,
+	    SQ_CF_COND_NOT_BOOL                           = 0x03,
+	SQ_CF_WORD1__COUNT_mask                           = 0x07 << 10,
+	SQ_CF_WORD1__COUNT_shift                          = 10,
+	CALL_COUNT_mask                                   = 0x3f << 13,
+	CALL_COUNT_shift                                  = 13,
+	COUNT_3_bit                                       = 1 << 19,
+/* 	END_OF_PROGRAM_bit                                = 1 << 21, */
+/* 	VALID_PIXEL_MODE_bit                              = 1 << 22, */
+	SQ_CF_WORD1__CF_INST_mask                         = 0x7f << 23,
+	SQ_CF_WORD1__CF_INST_shift                        = 23,
+	    SQ_CF_INST_NOP                                = 0x00,
+	    SQ_CF_INST_TEX                                = 0x01,
+	    SQ_CF_INST_VTX                                = 0x02,
+	    SQ_CF_INST_VTX_TC                             = 0x03,
+	    SQ_CF_INST_LOOP_START                         = 0x04,
+	    SQ_CF_INST_LOOP_END                           = 0x05,
+	    SQ_CF_INST_LOOP_START_DX10                    = 0x06,
+	    SQ_CF_INST_LOOP_START_NO_AL                   = 0x07,
+	    SQ_CF_INST_LOOP_CONTINUE                      = 0x08,
+	    SQ_CF_INST_LOOP_BREAK                         = 0x09,
+	    SQ_CF_INST_JUMP                               = 0x0a,
+	    SQ_CF_INST_PUSH                               = 0x0b,
+	    SQ_CF_INST_PUSH_ELSE                          = 0x0c,
+	    SQ_CF_INST_ELSE                               = 0x0d,
+	    SQ_CF_INST_POP                                = 0x0e,
+	    SQ_CF_INST_POP_JUMP                           = 0x0f,
+	    SQ_CF_INST_POP_PUSH                           = 0x10,
+	    SQ_CF_INST_POP_PUSH_ELSE                      = 0x11,
+	    SQ_CF_INST_CALL                               = 0x12,
+	    SQ_CF_INST_CALL_FS                            = 0x13,
+	    SQ_CF_INST_RETURN                             = 0x14,
+	    SQ_CF_INST_EMIT_VERTEX                        = 0x15,
+	    SQ_CF_INST_EMIT_CUT_VERTEX                    = 0x16,
+	    SQ_CF_INST_CUT_VERTEX                         = 0x17,
+	    SQ_CF_INST_KILL                               = 0x18,
+/* 	WHOLE_QUAD_MODE_bit                               = 1 << 30, */
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_VTX_WORD1_SEM                                      = 0x00008dfc,
+	SEMANTIC_ID_mask                                  = 0xff << 0,
+	SEMANTIC_ID_shift                                 = 0,
+    SQ_TEX_WORD0                                          = 0x00008dfc,
+	TEX_INST_mask                                     = 0x1f << 0,
+	TEX_INST_shift                                    = 0,
+	    SQ_TEX_INST_VTX_FETCH                         = 0x00,
+	    SQ_TEX_INST_VTX_SEMANTIC                      = 0x01,
+	    SQ_TEX_INST_LD                                = 0x03,
+	    SQ_TEX_INST_GET_TEXTURE_RESINFO               = 0x04,
+	    SQ_TEX_INST_GET_NUMBER_OF_SAMPLES             = 0x05,
+	    SQ_TEX_INST_GET_LOD                           = 0x06,
+	    SQ_TEX_INST_GET_GRADIENTS_H                   = 0x07,
+	    SQ_TEX_INST_GET_GRADIENTS_V                   = 0x08,
+	    SQ_TEX_INST_GET_LERP                          = 0x09,
+	    SQ_TEX_INST_RESERVED_10                       = 0x0a,
+	    SQ_TEX_INST_SET_GRADIENTS_H                   = 0x0b,
+	    SQ_TEX_INST_SET_GRADIENTS_V                   = 0x0c,
+	    SQ_TEX_INST_PASS                              = 0x0d,
+	    X_Z_SET_INDEX_FOR_ARRAY_OF_CUBEMAPS           = 0x0e,
+	    SQ_TEX_INST_SAMPLE                            = 0x10,
+	    SQ_TEX_INST_SAMPLE_L                          = 0x11,
+	    SQ_TEX_INST_SAMPLE_LB                         = 0x12,
+	    SQ_TEX_INST_SAMPLE_LZ                         = 0x13,
+	    SQ_TEX_INST_SAMPLE_G                          = 0x14,
+	    SQ_TEX_INST_SAMPLE_G_L                        = 0x15,
+	    SQ_TEX_INST_SAMPLE_G_LB                       = 0x16,
+	    SQ_TEX_INST_SAMPLE_G_LZ                       = 0x17,
+	    SQ_TEX_INST_SAMPLE_C                          = 0x18,
+	    SQ_TEX_INST_SAMPLE_C_L                        = 0x19,
+	    SQ_TEX_INST_SAMPLE_C_LB                       = 0x1a,
+	    SQ_TEX_INST_SAMPLE_C_LZ                       = 0x1b,
+	    SQ_TEX_INST_SAMPLE_C_G                        = 0x1c,
+	    SQ_TEX_INST_SAMPLE_C_G_L                      = 0x1d,
+	    SQ_TEX_INST_SAMPLE_C_G_LB                     = 0x1e,
+	    SQ_TEX_INST_SAMPLE_C_G_LZ                     = 0x1f,
+	BC_FRAC_MODE_bit                                  = 1 << 5,
+/* 	FETCH_WHOLE_QUAD_bit                              = 1 << 7, */
+	RESOURCE_ID_mask                                  = 0xff << 8,
+	RESOURCE_ID_shift                                 = 8,
+/* 	SRC_GPR_mask                                      = 0x7f << 16, */
+/* 	SRC_GPR_shift                                     = 16, */
+/* 	SRC_REL_bit                                       = 1 << 23, */
+	SQ_TEX_WORD0__ALT_CONST_bit                       = 1 << 24,
+    SQ_VTX_WORD1_GPR                                      = 0x00008dfc,
+	SQ_VTX_WORD1_GPR__DST_GPR_mask                    = 0x7f << 0,
+	SQ_VTX_WORD1_GPR__DST_GPR_shift                   = 0,
+	SQ_VTX_WORD1_GPR__DST_REL_bit                     = 1 << 7,
+    SQ_ALU_WORD0                                          = 0x00008dfc,
+	SRC0_SEL_mask                                     = 0x1ff << 0,
+	SRC0_SEL_shift                                    = 0,
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+	SRC0_REL_bit                                      = 1 << 9,
+	SRC0_CHAN_mask                                    = 0x03 << 10,
+	SRC0_CHAN_shift                                   = 10,
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	SRC0_NEG_bit                                      = 1 << 12,
+	SRC1_SEL_mask                                     = 0x1ff << 13,
+	SRC1_SEL_shift                                    = 13,
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+	SRC1_REL_bit                                      = 1 << 22,
+	SRC1_CHAN_mask                                    = 0x03 << 23,
+	SRC1_CHAN_shift                                   = 23,
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	SRC1_NEG_bit                                      = 1 << 25,
+	INDEX_MODE_mask                                   = 0x07 << 26,
+	INDEX_MODE_shift                                  = 26,
+	    SQ_INDEX_AR_X                                 = 0x00,
+	    SQ_INDEX_AR_Y                                 = 0x01,
+	    SQ_INDEX_AR_Z                                 = 0x02,
+	    SQ_INDEX_AR_W                                 = 0x03,
+	    SQ_INDEX_LOOP                                 = 0x04,
+	PRED_SEL_mask                                     = 0x03 << 29,
+	PRED_SEL_shift                                    = 29,
+	    SQ_PRED_SEL_OFF                               = 0x00,
+	    SQ_PRED_SEL_ZERO                              = 0x02,
+	    SQ_PRED_SEL_ONE                               = 0x03,
+	LAST_bit                                          = 1 << 31,
+    SX_EXPORT_BUFFER_SIZES                                = 0x0000900c,
+	COLOR_BUFFER_SIZE_mask                            = 0xff << 0,
+	COLOR_BUFFER_SIZE_shift                           = 0,
+	POSITION_BUFFER_SIZE_mask                         = 0xff << 8,
+	POSITION_BUFFER_SIZE_shift                        = 8,
+	SMX_BUFFER_SIZE_mask                              = 0xff << 16,
+	SMX_BUFFER_SIZE_shift                             = 16,
+    SX_MEMORY_EXPORT_BASE                                 = 0x00009010,
+    SX_MEMORY_EXPORT_SIZE                                 = 0x00009014,
+    SPI_CONFIG_CNTL                                       = 0x00009100,
+	GPR_WRITE_PRIORITY_mask                           = 0x1f << 0,
+	GPR_WRITE_PRIORITY_shift                          = 0,
+	    X_PRIORITY_ORDER                              = 0x00,
+	    X_PRIORITY_ORDER_VS                           = 0x01,
+	DISABLE_INTERP_1_bit                              = 1 << 5,
+	DEBUG_THREAD_TYPE_SEL_mask                        = 0x03 << 6,
+	DEBUG_THREAD_TYPE_SEL_shift                       = 6,
+	DEBUG_GROUP_SEL_mask                              = 0x1f << 8,
+	DEBUG_GROUP_SEL_shift                             = 8,
+	DEBUG_GRBM_OVERRIDE_bit                           = 1 << 13,
+    SPI_CONFIG_CNTL_1                                     = 0x0000913c,
+	VTX_DONE_DELAY_mask                               = 0x0f << 0,
+	VTX_DONE_DELAY_shift                              = 0,
+	    X_DELAY_10_CLKS                               = 0x00,
+	    X_DELAY_11_CLKS                               = 0x01,
+	    X_DELAY_12_CLKS                               = 0x02,
+	    X_DELAY_13_CLKS                               = 0x03,
+	    X_DELAY_14_CLKS                               = 0x04,
+	    X_DELAY_15_CLKS                               = 0x05,
+	    X_DELAY_16_CLKS                               = 0x06,
+	    X_DELAY_17_CLKS                               = 0x07,
+	    X_DELAY_2_CLKS                                = 0x08,
+	    X_DELAY_3_CLKS                                = 0x09,
+	    X_DELAY_4_CLKS                                = 0x0a,
+	    X_DELAY_5_CLKS                                = 0x0b,
+	    X_DELAY_6_CLKS                                = 0x0c,
+	    X_DELAY_7_CLKS                                = 0x0d,
+	    X_DELAY_8_CLKS                                = 0x0e,
+	    X_DELAY_9_CLKS                                = 0x0f,
+	INTERP_ONE_PRIM_PER_ROW_bit                       = 1 << 4,
+    TD_FILTER4                                            = 0x00009400,
+	WEIGHT_1_mask                                     = 0x7ff << 0,
+	WEIGHT_1_shift                                    = 0,
+	WEIGHT_0_mask                                     = 0x7ff << 11,
+	WEIGHT_0_shift                                    = 11,
+	WEIGHT_PAIR_bit                                   = 1 << 22,
+	PHASE_mask                                        = 0x0f << 23,
+	PHASE_shift                                       = 23,
+	DIRECTION_bit                                     = 1 << 27,
+    TD_FILTER4_1                                          = 0x00009404,
+	TD_FILTER4_1_num                                  = 35,
+/* 	WEIGHT_1_mask                                     = 0x7ff << 0, */
+/* 	WEIGHT_1_shift                                    = 0, */
+/* 	WEIGHT_0_mask                                     = 0x7ff << 11, */
+/* 	WEIGHT_0_shift                                    = 11, */
+    TD_CNTL                                               = 0x00009490,
+	SYNC_PHASE_SH_mask                                = 0x03 << 0,
+	SYNC_PHASE_SH_shift                               = 0,
+	SYNC_PHASE_VC_SMX_mask                            = 0x03 << 4,
+	SYNC_PHASE_VC_SMX_shift                           = 4,
+    TD0_CNTL                                              = 0x00009494,
+	TD0_CNTL_num                                      = 4,
+	ID_OVERRIDE_mask                                  = 0x03 << 28,
+	ID_OVERRIDE_shift                                 = 28,
+    TD0_STATUS                                            = 0x000094a4,
+	TD0_STATUS_num                                    = 4,
+	BUSY_bit                                          = 1 << 31,
+    TA_CNTL                                               = 0x00009504,
+	GRADIENT_CREDIT_mask                              = 0x1f << 0,
+	GRADIENT_CREDIT_shift                             = 0,
+	WALKER_CREDIT_mask                                = 0x1f << 8,
+	WALKER_CREDIT_shift                               = 8,
+	ALIGNER_CREDIT_mask                               = 0x1f << 16,
+	ALIGNER_CREDIT_shift                              = 16,
+	TD_FIFO_CREDIT_mask                               = 0x3ff << 22,
+	TD_FIFO_CREDIT_shift                              = 22,
+    TA_CNTL_AUX                                           = 0x00009508,
+	DISABLE_CUBE_WRAP_bit                             = 1 << 0,
+	SYNC_GRADIENT_bit                                 = 1 << 24,
+	SYNC_WALKER_bit                                   = 1 << 25,
+	SYNC_ALIGNER_bit                                  = 1 << 26,
+	BILINEAR_PRECISION_bit                            = 1 << 31,
+    TA0_CNTL                                              = 0x00009510,
+/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/* 	ID_OVERRIDE_shift                                 = 28, */
+    TA1_CNTL                                              = 0x00009514,
+/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/* 	ID_OVERRIDE_shift                                 = 28, */
+    TA2_CNTL                                              = 0x00009518,
+/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/* 	ID_OVERRIDE_shift                                 = 28, */
+    TA3_CNTL                                              = 0x0000951c,
+/* 	ID_OVERRIDE_mask                                  = 0x03 << 28, */
+/* 	ID_OVERRIDE_shift                                 = 28, */
+    TA0_STATUS                                            = 0x00009520,
+	FG_PFIFO_EMPTYB_bit                               = 1 << 12,
+	FG_LFIFO_EMPTYB_bit                               = 1 << 13,
+	FG_SFIFO_EMPTYB_bit                               = 1 << 14,
+	FL_PFIFO_EMPTYB_bit                               = 1 << 16,
+	FL_LFIFO_EMPTYB_bit                               = 1 << 17,
+	FL_SFIFO_EMPTYB_bit                               = 1 << 18,
+	FA_PFIFO_EMPTYB_bit                               = 1 << 20,
+	FA_LFIFO_EMPTYB_bit                               = 1 << 21,
+	FA_SFIFO_EMPTYB_bit                               = 1 << 22,
+	IN_BUSY_bit                                       = 1 << 24,
+	FG_BUSY_bit                                       = 1 << 25,
+	FL_BUSY_bit                                       = 1 << 27,
+	TA_BUSY_bit                                       = 1 << 28,
+	FA_BUSY_bit                                       = 1 << 29,
+	AL_BUSY_bit                                       = 1 << 30,
+/* 	BUSY_bit                                          = 1 << 31, */
+    TA1_STATUS                                            = 0x00009524,
+/* 	FG_PFIFO_EMPTYB_bit                               = 1 << 12, */
+/* 	FG_LFIFO_EMPTYB_bit                               = 1 << 13, */
+/* 	FG_SFIFO_EMPTYB_bit                               = 1 << 14, */
+/* 	FL_PFIFO_EMPTYB_bit                               = 1 << 16, */
+/* 	FL_LFIFO_EMPTYB_bit                               = 1 << 17, */
+/* 	FL_SFIFO_EMPTYB_bit                               = 1 << 18, */
+/* 	FA_PFIFO_EMPTYB_bit                               = 1 << 20, */
+/* 	FA_LFIFO_EMPTYB_bit                               = 1 << 21, */
+/* 	FA_SFIFO_EMPTYB_bit                               = 1 << 22, */
+/* 	IN_BUSY_bit                                       = 1 << 24, */
+/* 	FG_BUSY_bit                                       = 1 << 25, */
+/* 	FL_BUSY_bit                                       = 1 << 27, */
+/* 	TA_BUSY_bit                                       = 1 << 28, */
+/* 	FA_BUSY_bit                                       = 1 << 29, */
+/* 	AL_BUSY_bit                                       = 1 << 30, */
+/* 	BUSY_bit                                          = 1 << 31, */
+    TA2_STATUS                                            = 0x00009528,
+/* 	FG_PFIFO_EMPTYB_bit                               = 1 << 12, */
+/* 	FG_LFIFO_EMPTYB_bit                               = 1 << 13, */
+/* 	FG_SFIFO_EMPTYB_bit                               = 1 << 14, */
+/* 	FL_PFIFO_EMPTYB_bit                               = 1 << 16, */
+/* 	FL_LFIFO_EMPTYB_bit                               = 1 << 17, */
+/* 	FL_SFIFO_EMPTYB_bit                               = 1 << 18, */
+/* 	FA_PFIFO_EMPTYB_bit                               = 1 << 20, */
+/* 	FA_LFIFO_EMPTYB_bit                               = 1 << 21, */
+/* 	FA_SFIFO_EMPTYB_bit                               = 1 << 22, */
+/* 	IN_BUSY_bit                                       = 1 << 24, */
+/* 	FG_BUSY_bit                                       = 1 << 25, */
+/* 	FL_BUSY_bit                                       = 1 << 27, */
+/* 	TA_BUSY_bit                                       = 1 << 28, */
+/* 	FA_BUSY_bit                                       = 1 << 29, */
+/* 	AL_BUSY_bit                                       = 1 << 30, */
+/* 	BUSY_bit                                          = 1 << 31, */
+    TA3_STATUS                                            = 0x0000952c,
+/* 	FG_PFIFO_EMPTYB_bit                               = 1 << 12, */
+/* 	FG_LFIFO_EMPTYB_bit                               = 1 << 13, */
+/* 	FG_SFIFO_EMPTYB_bit                               = 1 << 14, */
+/* 	FL_PFIFO_EMPTYB_bit                               = 1 << 16, */
+/* 	FL_LFIFO_EMPTYB_bit                               = 1 << 17, */
+/* 	FL_SFIFO_EMPTYB_bit                               = 1 << 18, */
+/* 	FA_PFIFO_EMPTYB_bit                               = 1 << 20, */
+/* 	FA_LFIFO_EMPTYB_bit                               = 1 << 21, */
+/* 	FA_SFIFO_EMPTYB_bit                               = 1 << 22, */
+/* 	IN_BUSY_bit                                       = 1 << 24, */
+/* 	FG_BUSY_bit                                       = 1 << 25, */
+/* 	FL_BUSY_bit                                       = 1 << 27, */
+/* 	TA_BUSY_bit                                       = 1 << 28, */
+/* 	FA_BUSY_bit                                       = 1 << 29, */
+/* 	AL_BUSY_bit                                       = 1 << 30, */
+/* 	BUSY_bit                                          = 1 << 31, */
+    TC_STATUS                                             = 0x00009600,
+	TC_BUSY_bit                                       = 1 << 0,
+    TC_INVALIDATE                                         = 0x00009604,
+	START_bit                                         = 1 << 0,
+    TC_CNTL                                               = 0x00009608,
+	FORCE_HIT_bit                                     = 1 << 0,
+	FORCE_MISS_bit                                    = 1 << 1,
+	L2_SIZE_mask                                      = 0x0f << 5,
+	L2_SIZE_shift                                     = 5,
+	    _256K                                         = 0x00,
+	    _224K                                         = 0x01,
+	    _192K                                         = 0x02,
+	    _160K                                         = 0x03,
+	    _128K                                         = 0x04,
+	    _96K                                          = 0x05,
+	    _64K                                          = 0x06,
+	    _32K                                          = 0x07,
+	L2_DISABLE_LATE_HIT_bit                           = 1 << 9,
+	DISABLE_VERT_PERF_bit                             = 1 << 10,
+	DISABLE_INVAL_BUSY_bit                            = 1 << 11,
+	DISABLE_INVAL_SAME_SURFACE_bit                    = 1 << 12,
+	PARTITION_MODE_mask                               = 0x03 << 13,
+	PARTITION_MODE_shift                              = 13,
+	    X_VERTEX                                      = 0x00,
+	MISS_ARB_MODE_bit                                 = 1 << 15,
+	HIT_ARB_MODE_bit                                  = 1 << 16,
+	DISABLE_WRITE_DELAY_bit                           = 1 << 17,
+	HIT_FIFO_DEPTH_bit                                = 1 << 18,
+    VC_CNTL                                               = 0x00009700,
+	L2_INVALIDATE_bit                                 = 1 << 0,
+	RESERVED_bit                                      = 1 << 1,
+	CC_FORCE_MISS_bit                                 = 1 << 2,
+	MI_CHAN_SEL_mask                                  = 0x03 << 3,
+	MI_CHAN_SEL_shift                                 = 3,
+	    X_MC0_USES_CH_0_1                             = 0x00,
+	    X_MC0_USES_CH_0_3                             = 0x01,
+	    X_VC_MC0_IS_ACTIVE                            = 0x02,
+	    X_VC_MC1_IS_DISABLED                          = 0x03,
+	MI_STEER_DISABLE_bit                              = 1 << 5,
+	MI_CREDIT_CTR_mask                                = 0x0f << 6,
+	MI_CREDIT_CTR_shift                               = 6,
+	MI_CREDIT_WE_bit                                  = 1 << 10,
+	MI_REQ_STALL_THLD_mask                            = 0x07 << 11,
+	MI_REQ_STALL_THLD_shift                           = 11,
+	    X_LATENCY_EXCEEDS_399_CLOCKS                  = 0x00,
+	    X_LATENCY_EXCEEDS_415_CLOCKS                  = 0x01,
+	    X_LATENCY_EXCEEDS_431_CLOCKS                  = 0x02,
+	    X_LATENCY_EXCEEDS_447_CLOCKS                  = 0x03,
+	    X_LATENCY_EXCEEDS_463_CLOCKS                  = 0x04,
+	    X_LATENCY_EXCEEDS_479_CLOCKS                  = 0x05,
+	    X_LATENCY_EXCEEDS_495_CLOCKS                  = 0x06,
+	    X_LATENCY_EXCEEDS_511_CLOCKS                  = 0x07,
+	VC_CNTL__MI_TIMESTAMP_RES_mask                    = 0x1f << 14,
+	VC_CNTL__MI_TIMESTAMP_RES_shift                   = 14,
+	    X_1X_SYSTEM_CLOCK                             = 0x00,
+	    X_2X_SYSTEM_CLOCK                             = 0x01,
+	    X_4X_SYSTEM_CLOCK                             = 0x02,
+	    X_8X_SYSTEM_CLOCK                             = 0x03,
+	    X_16X_SYSTEM_CLOCK                            = 0x04,
+	    X_32X_SYSTEM_CLOCK                            = 0x05,
+	    X_64X_SYSTEM_CLOCK                            = 0x06,
+	    X_128X_SYSTEM_CLOCK                           = 0x07,
+	    X_256X_SYSTEM_CLOCK                           = 0x08,
+	    X_512X_SYSTEM_CLOCK                           = 0x09,
+	    X_1024X_SYSTEM_CLOCK                          = 0x0a,
+	    X_2048X_SYSTEM_CLOCK                          = 0x0b,
+	    X_4092X_SYSTEM_CLOCK                          = 0x0c,
+	    X_8192X_SYSTEM_CLOCK                          = 0x0d,
+	    X_16384X_SYSTEM_CLOCK                         = 0x0e,
+	    X_32768X_SYSTEM_CLOCK                         = 0x0f,
+    VC_CNTL_STATUS                                        = 0x00009704,
+	RP_BUSY_bit                                       = 1 << 0,
+	RG_BUSY_bit                                       = 1 << 1,
+	VC_BUSY_bit                                       = 1 << 2,
+	CLAMP_DETECT_bit                                  = 1 << 3,
+    VC_CONFIG                                             = 0x00009718,
+	WRITE_DIS_bit                                     = 1 << 0,
+	GPR_DATA_PHASE_ADJ_mask                           = 0x07 << 1,
+	GPR_DATA_PHASE_ADJ_shift                          = 1,
+	    X_LATENCY_BASE_0_CYCLES                       = 0x00,
+	    X_LATENCY_BASE_1_CYCLES                       = 0x01,
+	    X_LATENCY_BASE_2_CYCLES                       = 0x02,
+	    X_LATENCY_BASE_3_CYCLES                       = 0x03,
+	TD_SIMD_SYNC_ADJ_mask                             = 0x07 << 4,
+	TD_SIMD_SYNC_ADJ_shift                            = 4,
+	    X_0_CYCLES_DELAY                              = 0x00,
+	    X_1_CYCLES_DELAY                              = 0x01,
+	    X_2_CYCLES_DELAY                              = 0x02,
+	    X_3_CYCLES_DELAY                              = 0x03,
+	    X_4_CYCLES_DELAY                              = 0x04,
+	    X_5_CYCLES_DELAY                              = 0x05,
+	    X_6_CYCLES_DELAY                              = 0x06,
+	    X_7_CYCLES_DELAY                              = 0x07,
+    SMX_DC_CTL0                                           = 0x0000a020,
+	WR_GATHER_STREAM0_bit                             = 1 << 0,
+	WR_GATHER_STREAM1_bit                             = 1 << 1,
+	WR_GATHER_STREAM2_bit                             = 1 << 2,
+	WR_GATHER_STREAM3_bit                             = 1 << 3,
+	WR_GATHER_SCRATCH_bit                             = 1 << 4,
+	WR_GATHER_REDUC_BUF_bit                           = 1 << 5,
+	WR_GATHER_RING_BUF_bit                            = 1 << 6,
+	WR_GATHER_F_BUF_bit                               = 1 << 7,
+	DISABLE_CACHES_bit                                = 1 << 8,
+	AUTO_FLUSH_INVAL_EN_bit                           = 1 << 10,
+	AUTO_FLUSH_EN_bit                                 = 1 << 11,
+	AUTO_FLUSH_CNT_mask                               = 0xffff << 12,
+	AUTO_FLUSH_CNT_shift                              = 12,
+	MC_RD_STALL_FACTOR_mask                           = 0x03 << 28,
+	MC_RD_STALL_FACTOR_shift                          = 28,
+	MC_WR_STALL_FACTOR_mask                           = 0x03 << 30,
+	MC_WR_STALL_FACTOR_shift                          = 30,
+    SMX_DC_CTL1                                           = 0x0000a024,
+	OP_FIFO_SKID_mask                                 = 0x7f << 0,
+	OP_FIFO_SKID_shift                                = 0,
+	CACHE_LINE_SIZE_bit                               = 1 << 8,
+	MULTI_FLUSH_MODE_bit                              = 1 << 9,
+	MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_mask          = 0x0f << 10,
+	MULTI_FLUSH_REQ_ABORT_IDX_FIFO_SKID_shift         = 10,
+	DISABLE_WR_GATHER_RD_HIT_FORCE_EVICT_bit          = 1 << 16,
+	DISABLE_WR_GATHER_RD_HIT_COMP_VLDS_CHECK_bit      = 1 << 17,
+	DISABLE_FLUSH_ES_ALSO_INVALS_bit                  = 1 << 18,
+	DISABLE_FLUSH_GS_ALSO_INVALS_bit                  = 1 << 19,
+    SMX_DC_CTL2                                           = 0x0000a028,
+	INVALIDATE_CACHES_bit                             = 1 << 0,
+	CACHES_INVALID_bit                                = 1 << 1,
+	CACHES_DIRTY_bit                                  = 1 << 2,
+	FLUSH_ALL_bit                                     = 1 << 4,
+	FLUSH_GS_THREADS_bit                              = 1 << 8,
+	FLUSH_ES_THREADS_bit                              = 1 << 9,
+    SMX_DC_MC_INTF_CTL                                    = 0x0000a02c,
+	MC_RD_REQ_CRED_mask                               = 0xff << 0,
+	MC_RD_REQ_CRED_shift                              = 0,
+	MC_WR_REQ_CRED_mask                               = 0xff << 16,
+	MC_WR_REQ_CRED_shift                              = 16,
+    TD_PS_SAMPLER0_BORDER_RED                             = 0x0000a400,
+	TD_PS_SAMPLER0_BORDER_RED_num                     = 18,
+	TD_PS_SAMPLER0_BORDER_RED_offset                  = 16,
+    TD_PS_SAMPLER0_BORDER_GREEN                           = 0x0000a404,
+	TD_PS_SAMPLER0_BORDER_GREEN_num                   = 18,
+	TD_PS_SAMPLER0_BORDER_GREEN_offset                = 16,
+    TD_PS_SAMPLER0_BORDER_BLUE                            = 0x0000a408,
+	TD_PS_SAMPLER0_BORDER_BLUE_num                    = 18,
+	TD_PS_SAMPLER0_BORDER_BLUE_offset                 = 16,
+    TD_PS_SAMPLER0_BORDER_ALPHA                           = 0x0000a40c,
+	TD_PS_SAMPLER0_BORDER_ALPHA_num                   = 18,
+	TD_PS_SAMPLER0_BORDER_ALPHA_offset                = 16,
+    TD_VS_SAMPLER0_BORDER_RED                             = 0x0000a600,
+	TD_VS_SAMPLER0_BORDER_RED_num                     = 18,
+	TD_VS_SAMPLER0_BORDER_RED_offset                  = 16,
+    TD_VS_SAMPLER0_BORDER_GREEN                           = 0x0000a604,
+	TD_VS_SAMPLER0_BORDER_GREEN_num                   = 18,
+	TD_VS_SAMPLER0_BORDER_GREEN_offset                = 16,
+    TD_VS_SAMPLER0_BORDER_BLUE                            = 0x0000a608,
+	TD_VS_SAMPLER0_BORDER_BLUE_num                    = 18,
+	TD_VS_SAMPLER0_BORDER_BLUE_offset                 = 16,
+    TD_VS_SAMPLER0_BORDER_ALPHA                           = 0x0000a60c,
+	TD_VS_SAMPLER0_BORDER_ALPHA_num                   = 18,
+	TD_VS_SAMPLER0_BORDER_ALPHA_offset                = 16,
+    TD_GS_SAMPLER0_BORDER_RED                             = 0x0000a800,
+	TD_GS_SAMPLER0_BORDER_RED_num                     = 18,
+	TD_GS_SAMPLER0_BORDER_RED_offset                  = 16,
+    TD_GS_SAMPLER0_BORDER_GREEN                           = 0x0000a804,
+	TD_GS_SAMPLER0_BORDER_GREEN_num                   = 18,
+	TD_GS_SAMPLER0_BORDER_GREEN_offset                = 16,
+    TD_GS_SAMPLER0_BORDER_BLUE                            = 0x0000a808,
+	TD_GS_SAMPLER0_BORDER_BLUE_num                    = 18,
+	TD_GS_SAMPLER0_BORDER_BLUE_offset                 = 16,
+    TD_GS_SAMPLER0_BORDER_ALPHA                           = 0x0000a80c,
+	TD_GS_SAMPLER0_BORDER_ALPHA_num                   = 18,
+	TD_GS_SAMPLER0_BORDER_ALPHA_offset                = 16,
+    TD_PS_SAMPLER0_CLEARTYPE_KERNEL                       = 0x0000aa00,
+	TD_PS_SAMPLER0_CLEARTYPE_KERNEL_num               = 18,
+	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_mask       = 0x07 << 0,
+	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__WIDTH_shift      = 0,
+	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_mask      = 0x07 << 3,
+	TD_PS_SAMPLER0_CLEARTYPE_KERNEL__HEIGHT_shift     = 3,
+    DB_DEPTH_SIZE                                         = 0x00028000,
+	PITCH_TILE_MAX_mask                               = 0x3ff << 0,
+	PITCH_TILE_MAX_shift                              = 0,
+	SLICE_TILE_MAX_mask                               = 0xfffff << 10,
+	SLICE_TILE_MAX_shift                              = 10,
+    DB_DEPTH_VIEW                                         = 0x00028004,
+	SLICE_START_mask                                  = 0x7ff << 0,
+	SLICE_START_shift                                 = 0,
+	SLICE_MAX_mask                                    = 0x7ff << 13,
+	SLICE_MAX_shift                                   = 13,
+    DB_DEPTH_BASE                                         = 0x0002800c,
+    DB_DEPTH_INFO                                         = 0x00028010,
+	DB_DEPTH_INFO__FORMAT_mask                        = 0x07 << 0,
+	DB_DEPTH_INFO__FORMAT_shift                       = 0,
+	    DEPTH_INVALID                                 = 0x00,
+	    DEPTH_16                                      = 0x01,
+	    DEPTH_X8_24                                   = 0x02,
+	    DEPTH_8_24                                    = 0x03,
+	    DEPTH_X8_24_FLOAT                             = 0x04,
+	    DEPTH_8_24_FLOAT                              = 0x05,
+	    DEPTH_32_FLOAT                                = 0x06,
+	    DEPTH_X24_8_32_FLOAT                          = 0x07,
+	DB_DEPTH_INFO__READ_SIZE_bit                      = 1 << 3,
+	DB_DEPTH_INFO__ARRAY_MODE_mask                    = 0x0f << 15,
+	DB_DEPTH_INFO__ARRAY_MODE_shift                   = 15,
+	    ARRAY_1D_TILED_THIN1                          = 0x02,
+	    ARRAY_2D_TILED_THIN1                          = 0x04,
+	TILE_SURFACE_ENABLE_bit                           = 1 << 25,
+	TILE_COMPACT_bit                                  = 1 << 26,
+	ZRANGE_PRECISION_bit                              = 1 << 31,
+    DB_HTILE_DATA_BASE                                    = 0x00028014,
+    DB_STENCIL_CLEAR                                      = 0x00028028,
+	DB_STENCIL_CLEAR__CLEAR_mask                      = 0xff << 0,
+	DB_STENCIL_CLEAR__CLEAR_shift                     = 0,
+	MIN_mask                                          = 0xff << 16,
+	MIN_shift                                         = 16,
+    DB_DEPTH_CLEAR                                        = 0x0002802c,
+    PA_SC_SCREEN_SCISSOR_TL                               = 0x00028030,
+	PA_SC_SCREEN_SCISSOR_TL__TL_X_mask                = 0x7fff << 0,
+	PA_SC_SCREEN_SCISSOR_TL__TL_X_shift               = 0,
+	PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask                = 0x7fff << 16,
+	PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift               = 16,
+    PA_SC_SCREEN_SCISSOR_BR                               = 0x00028034,
+	PA_SC_SCREEN_SCISSOR_BR__BR_X_mask                = 0x7fff << 0,
+	PA_SC_SCREEN_SCISSOR_BR__BR_X_shift               = 0,
+	PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask                = 0x7fff << 16,
+	PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift               = 16,
+    CB_COLOR0_BASE                                        = 0x00028040,
+	CB_COLOR0_BASE_num                                = 8,
+    CB_COLOR0_SIZE                                        = 0x00028060,
+	CB_COLOR0_SIZE_num                                = 8,
+/* 	PITCH_TILE_MAX_mask                               = 0x3ff << 0, */
+/* 	PITCH_TILE_MAX_shift                              = 0, */
+/* 	SLICE_TILE_MAX_mask                               = 0xfffff << 10, */
+/* 	SLICE_TILE_MAX_shift                              = 10, */
+    CB_COLOR0_VIEW                                        = 0x00028080,
+	CB_COLOR0_VIEW_num                                = 8,
+/* 	SLICE_START_mask                                  = 0x7ff << 0, */
+/* 	SLICE_START_shift                                 = 0, */
+/* 	SLICE_MAX_mask                                    = 0x7ff << 13, */
+/* 	SLICE_MAX_shift                                   = 13, */
+    CB_COLOR0_INFO                                        = 0x000280a0,
+	CB_COLOR0_INFO_num                                = 8,
+	ENDIAN_mask                                       = 0x03 << 0,
+	ENDIAN_shift                                      = 0,
+	    ENDIAN_NONE                                   = 0x00,
+	    ENDIAN_8IN16                                  = 0x01,
+	    ENDIAN_8IN32                                  = 0x02,
+	    ENDIAN_8IN64                                  = 0x03,
+	CB_COLOR0_INFO__FORMAT_mask                       = 0x3f << 2,
+	CB_COLOR0_INFO__FORMAT_shift                      = 2,
+	    COLOR_INVALID                                 = 0x00,
+	    COLOR_8                                       = 0x01,
+	    COLOR_4_4                                     = 0x02,
+	    COLOR_3_3_2                                   = 0x03,
+	    COLOR_16                                      = 0x05,
+	    COLOR_16_FLOAT                                = 0x06,
+	    COLOR_8_8                                     = 0x07,
+	    COLOR_5_6_5                                   = 0x08,
+	    COLOR_6_5_5                                   = 0x09,
+	    COLOR_1_5_5_5                                 = 0x0a,
+	    COLOR_4_4_4_4                                 = 0x0b,
+	    COLOR_5_5_5_1                                 = 0x0c,
+	    COLOR_32                                      = 0x0d,
+	    COLOR_32_FLOAT                                = 0x0e,
+	    COLOR_16_16                                   = 0x0f,
+	    COLOR_16_16_FLOAT                             = 0x10,
+	    COLOR_8_24                                    = 0x11,
+	    COLOR_8_24_FLOAT                              = 0x12,
+	    COLOR_24_8                                    = 0x13,
+	    COLOR_24_8_FLOAT                              = 0x14,
+	    COLOR_10_11_11                                = 0x15,
+	    COLOR_10_11_11_FLOAT                          = 0x16,
+	    COLOR_11_11_10                                = 0x17,
+	    COLOR_11_11_10_FLOAT                          = 0x18,
+	    COLOR_2_10_10_10                              = 0x19,
+	    COLOR_8_8_8_8                                 = 0x1a,
+	    COLOR_10_10_10_2                              = 0x1b,
+	    COLOR_X24_8_32_FLOAT                          = 0x1c,
+	    COLOR_32_32                                   = 0x1d,
+	    COLOR_32_32_FLOAT                             = 0x1e,
+	    COLOR_16_16_16_16                             = 0x1f,
+	    COLOR_16_16_16_16_FLOAT                       = 0x20,
+	    COLOR_32_32_32_32                             = 0x22,
+	    COLOR_32_32_32_32_FLOAT                       = 0x23,
+	CB_COLOR0_INFO__ARRAY_MODE_mask                   = 0x0f << 8,
+	CB_COLOR0_INFO__ARRAY_MODE_shift                  = 8,
+	    ARRAY_LINEAR_GENERAL                          = 0x00,
+	    ARRAY_LINEAR_ALIGNED                          = 0x01,
+/*	    ARRAY_1D_TILED_THIN1                          = 0x02, */
+/* 	    ARRAY_2D_TILED_THIN1                          = 0x04, */
+	NUMBER_TYPE_mask                                  = 0x07 << 12,
+	NUMBER_TYPE_shift                                 = 12,
+	    NUMBER_UNORM                                  = 0x00,
+	    NUMBER_SNORM                                  = 0x01,
+	    NUMBER_USCALED                                = 0x02,
+	    NUMBER_SSCALED                                = 0x03,
+	    NUMBER_UINT                                   = 0x04,
+	    NUMBER_SINT                                   = 0x05,
+	    NUMBER_SRGB                                   = 0x06,
+	    NUMBER_FLOAT                                  = 0x07,
+	CB_COLOR0_INFO__READ_SIZE_bit                     = 1 << 15,
+	COMP_SWAP_mask                                    = 0x03 << 16,
+	COMP_SWAP_shift                                   = 16,
+	    SWAP_STD                                      = 0x00,
+	    SWAP_ALT                                      = 0x01,
+	    SWAP_STD_REV                                  = 0x02,
+	    SWAP_ALT_REV                                  = 0x03,
+	CB_COLOR0_INFO__TILE_MODE_mask                    = 0x03 << 18,
+	CB_COLOR0_INFO__TILE_MODE_shift                   = 18,
+	    TILE_DISABLE                                  = 0x00,
+	    TILE_CLEAR_ENABLE                             = 0x01,
+	    TILE_FRAG_ENABLE                              = 0x02,
+	BLEND_CLAMP_bit                                   = 1 << 20,
+	CLEAR_COLOR_bit                                   = 1 << 21,
+	BLEND_BYPASS_bit                                  = 1 << 22,
+	BLEND_FLOAT32_bit                                 = 1 << 23,
+	SIMPLE_FLOAT_bit                                  = 1 << 24,
+	CB_COLOR0_INFO__ROUND_MODE_bit                    = 1 << 25,
+/* 	TILE_COMPACT_bit                                  = 1 << 26, */
+	SOURCE_FORMAT_bit                                 = 1 << 27,
+    CB_COLOR0_TILE                                        = 0x000280c0,
+	CB_COLOR0_TILE_num                                = 8,
+    CB_COLOR0_FRAG                                        = 0x000280e0,
+	CB_COLOR0_FRAG_num                                = 8,
+    CB_COLOR0_MASK                                        = 0x00028100,
+	CB_COLOR0_MASK_num                                = 8,
+	CMASK_BLOCK_MAX_mask                              = 0xfff << 0,
+	CMASK_BLOCK_MAX_shift                             = 0,
+	FMASK_TILE_MAX_mask                               = 0xfffff << 12,
+	FMASK_TILE_MAX_shift                              = 12,
+    CB_CLEAR_RED                                          = 0x00028120,
+    CB_CLEAR_GREEN                                        = 0x00028124,
+    CB_CLEAR_BLUE                                         = 0x00028128,
+    CB_CLEAR_ALPHA                                        = 0x0002812c,
+    SQ_ALU_CONST_BUFFER_SIZE_PS_0                         = 0x00028140,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_VS_0                         = 0x00028180,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_GS_0                         = 0x000281c0,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift         = 0,
+    PA_SC_WINDOW_OFFSET                                   = 0x00028200,
+	WINDOW_X_OFFSET_mask                              = 0x7fff << 0,
+	WINDOW_X_OFFSET_shift                             = 0,
+	WINDOW_Y_OFFSET_mask                              = 0x7fff << 16,
+	WINDOW_Y_OFFSET_shift                             = 16,
+    PA_SC_WINDOW_SCISSOR_TL                               = 0x00028204,
+	PA_SC_WINDOW_SCISSOR_TL__TL_X_mask                = 0x3fff << 0,
+	PA_SC_WINDOW_SCISSOR_TL__TL_X_shift               = 0,
+	PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask                = 0x3fff << 16,
+	PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift               = 16,
+	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31,
+    PA_SC_WINDOW_SCISSOR_BR                               = 0x00028208,
+	PA_SC_WINDOW_SCISSOR_BR__BR_X_mask                = 0x3fff << 0,
+	PA_SC_WINDOW_SCISSOR_BR__BR_X_shift               = 0,
+	PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask                = 0x3fff << 16,
+	PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift               = 16,
+    PA_SC_CLIPRECT_RULE                                   = 0x0002820c,
+	CLIP_RULE_mask                                    = 0xffff << 0,
+	CLIP_RULE_shift                                   = 0,
+    PA_SC_CLIPRECT_0_TL                                   = 0x00028210,
+	PA_SC_CLIPRECT_0_TL_num                           = 4,
+	PA_SC_CLIPRECT_0_TL_offset                        = 8,
+	PA_SC_CLIPRECT_0_TL__TL_X_mask                    = 0x3fff << 0,
+	PA_SC_CLIPRECT_0_TL__TL_X_shift                   = 0,
+	PA_SC_CLIPRECT_0_TL__TL_Y_mask                    = 0x3fff << 16,
+	PA_SC_CLIPRECT_0_TL__TL_Y_shift                   = 16,
+    PA_SC_CLIPRECT_0_BR                                   = 0x00028214,
+	PA_SC_CLIPRECT_0_BR_num                           = 4,
+	PA_SC_CLIPRECT_0_BR_offset                        = 8,
+	PA_SC_CLIPRECT_0_BR__BR_X_mask                    = 0x3fff << 0,
+	PA_SC_CLIPRECT_0_BR__BR_X_shift                   = 0,
+	PA_SC_CLIPRECT_0_BR__BR_Y_mask                    = 0x3fff << 16,
+	PA_SC_CLIPRECT_0_BR__BR_Y_shift                   = 16,
+    CB_TARGET_MASK                                        = 0x00028238,
+	TARGET0_ENABLE_mask                               = 0x0f << 0,
+	TARGET0_ENABLE_shift                              = 0,
+	TARGET1_ENABLE_mask                               = 0x0f << 4,
+	TARGET1_ENABLE_shift                              = 4,
+	TARGET2_ENABLE_mask                               = 0x0f << 8,
+	TARGET2_ENABLE_shift                              = 8,
+	TARGET3_ENABLE_mask                               = 0x0f << 12,
+	TARGET3_ENABLE_shift                              = 12,
+	TARGET4_ENABLE_mask                               = 0x0f << 16,
+	TARGET4_ENABLE_shift                              = 16,
+	TARGET5_ENABLE_mask                               = 0x0f << 20,
+	TARGET5_ENABLE_shift                              = 20,
+	TARGET6_ENABLE_mask                               = 0x0f << 24,
+	TARGET6_ENABLE_shift                              = 24,
+	TARGET7_ENABLE_mask                               = 0x0f << 28,
+	TARGET7_ENABLE_shift                              = 28,
+    CB_SHADER_MASK                                        = 0x0002823c,
+	OUTPUT0_ENABLE_mask                               = 0x0f << 0,
+	OUTPUT0_ENABLE_shift                              = 0,
+	OUTPUT1_ENABLE_mask                               = 0x0f << 4,
+	OUTPUT1_ENABLE_shift                              = 4,
+	OUTPUT2_ENABLE_mask                               = 0x0f << 8,
+	OUTPUT2_ENABLE_shift                              = 8,
+	OUTPUT3_ENABLE_mask                               = 0x0f << 12,
+	OUTPUT3_ENABLE_shift                              = 12,
+	OUTPUT4_ENABLE_mask                               = 0x0f << 16,
+	OUTPUT4_ENABLE_shift                              = 16,
+	OUTPUT5_ENABLE_mask                               = 0x0f << 20,
+	OUTPUT5_ENABLE_shift                              = 20,
+	OUTPUT6_ENABLE_mask                               = 0x0f << 24,
+	OUTPUT6_ENABLE_shift                              = 24,
+	OUTPUT7_ENABLE_mask                               = 0x0f << 28,
+	OUTPUT7_ENABLE_shift                              = 28,
+    PA_SC_GENERIC_SCISSOR_TL                              = 0x00028240,
+	PA_SC_GENERIC_SCISSOR_TL__TL_X_mask               = 0x3fff << 0,
+	PA_SC_GENERIC_SCISSOR_TL__TL_X_shift              = 0,
+	PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask               = 0x3fff << 16,
+	PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift              = 16,
+/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_GENERIC_SCISSOR_BR                              = 0x00028244,
+	PA_SC_GENERIC_SCISSOR_BR__BR_X_mask               = 0x3fff << 0,
+	PA_SC_GENERIC_SCISSOR_BR__BR_X_shift              = 0,
+	PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask               = 0x3fff << 16,
+	PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_SCISSOR_0_TL                              = 0x00028250,
+	PA_SC_VPORT_SCISSOR_0_TL_num                      = 16,
+	PA_SC_VPORT_SCISSOR_0_TL_offset                   = 8,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask               = 0x3fff << 0,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift              = 0,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask               = 0x3fff << 16,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift              = 16,
+/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_VPORT_SCISSOR_0_BR                              = 0x00028254,
+	PA_SC_VPORT_SCISSOR_0_BR_num                      = 16,
+	PA_SC_VPORT_SCISSOR_0_BR_offset                   = 8,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask               = 0x3fff << 0,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift              = 0,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask               = 0x3fff << 16,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_ZMIN_0                                    = 0x000282d0,
+	PA_SC_VPORT_ZMIN_0_num                            = 16,
+	PA_SC_VPORT_ZMIN_0_offset                         = 8,
+    PA_SC_VPORT_ZMAX_0                                    = 0x000282d4,
+	PA_SC_VPORT_ZMAX_0_num                            = 16,
+	PA_SC_VPORT_ZMAX_0_offset                         = 8,
+    SX_MISC                                               = 0x00028350,
+	MULTIPASS_bit                                     = 1 << 0,
+    SQ_VTX_SEMANTIC_0                                     = 0x00028380,
+	SQ_VTX_SEMANTIC_0_num                             = 32,
+/* 	SEMANTIC_ID_mask                                  = 0xff << 0, */
+/* 	SEMANTIC_ID_shift                                 = 0, */
+    VGT_MAX_VTX_INDX                                      = 0x00028400,
+    VGT_MIN_VTX_INDX                                      = 0x00028404,
+    VGT_INDX_OFFSET                                       = 0x00028408,
+    VGT_MULTI_PRIM_IB_RESET_INDX                          = 0x0002840c,
+    SX_ALPHA_TEST_CONTROL                                 = 0x00028410,
+	ALPHA_FUNC_mask                                   = 0x07 << 0,
+	ALPHA_FUNC_shift                                  = 0,
+	    REF_NEVER                                     = 0x00,
+	    REF_LESS                                      = 0x01,
+	    REF_EQUAL                                     = 0x02,
+	    REF_LEQUAL                                    = 0x03,
+	    REF_GREATER                                   = 0x04,
+	    REF_NOTEQUAL                                  = 0x05,
+	    REF_GEQUAL                                    = 0x06,
+	    REF_ALWAYS                                    = 0x07,
+	ALPHA_TEST_ENABLE_bit                             = 1 << 3,
+	ALPHA_TEST_BYPASS_bit                             = 1 << 8,
+    CB_BLEND_RED                                          = 0x00028414,
+    CB_BLEND_GREEN                                        = 0x00028418,
+    CB_BLEND_BLUE                                         = 0x0002841c,
+    CB_BLEND_ALPHA                                        = 0x00028420,
+    CB_FOG_RED                                            = 0x00028424,
+    CB_FOG_GREEN                                          = 0x00028428,
+    CB_FOG_BLUE                                           = 0x0002842c,
+    DB_STENCILREFMASK                                     = 0x00028430,
+	STENCILREF_mask                                   = 0xff << 0,
+	STENCILREF_shift                                  = 0,
+	STENCILMASK_mask                                  = 0xff << 8,
+	STENCILMASK_shift                                 = 8,
+	STENCILWRITEMASK_mask                             = 0xff << 16,
+	STENCILWRITEMASK_shift                            = 16,
+    DB_STENCILREFMASK_BF                                  = 0x00028434,
+	STENCILREF_BF_mask                                = 0xff << 0,
+	STENCILREF_BF_shift                               = 0,
+	STENCILMASK_BF_mask                               = 0xff << 8,
+	STENCILMASK_BF_shift                              = 8,
+	STENCILWRITEMASK_BF_mask                          = 0xff << 16,
+	STENCILWRITEMASK_BF_shift                         = 16,
+    SX_ALPHA_REF                                          = 0x00028438,
+    PA_CL_VPORT_XSCALE_0                                  = 0x0002843c,
+	PA_CL_VPORT_XSCALE_0_num                          = 16,
+	PA_CL_VPORT_XSCALE_0_offset                       = 24,
+    PA_CL_VPORT_XOFFSET_0                                 = 0x00028440,
+	PA_CL_VPORT_XOFFSET_0_num                         = 16,
+	PA_CL_VPORT_XOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_YSCALE_0                                  = 0x00028444,
+	PA_CL_VPORT_YSCALE_0_num                          = 16,
+	PA_CL_VPORT_YSCALE_0_offset                       = 24,
+    PA_CL_VPORT_YOFFSET_0                                 = 0x00028448,
+	PA_CL_VPORT_YOFFSET_0_num                         = 16,
+	PA_CL_VPORT_YOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_ZSCALE_0                                  = 0x0002844c,
+	PA_CL_VPORT_ZSCALE_0_num                          = 16,
+	PA_CL_VPORT_ZSCALE_0_offset                       = 24,
+    PA_CL_VPORT_ZOFFSET_0                                 = 0x00028450,
+	PA_CL_VPORT_ZOFFSET_0_num                         = 16,
+	PA_CL_VPORT_ZOFFSET_0_offset                      = 24,
+    SPI_VS_OUT_ID_0                                       = 0x00028614,
+	SPI_VS_OUT_ID_0_num                               = 10,
+	SEMANTIC_0_mask                                   = 0xff << 0,
+	SEMANTIC_0_shift                                  = 0,
+	SEMANTIC_1_mask                                   = 0xff << 8,
+	SEMANTIC_1_shift                                  = 8,
+	SEMANTIC_2_mask                                   = 0xff << 16,
+	SEMANTIC_2_shift                                  = 16,
+	SEMANTIC_3_mask                                   = 0xff << 24,
+	SEMANTIC_3_shift                                  = 24,
+    SPI_PS_INPUT_CNTL_0                                   = 0x00028644,
+	SPI_PS_INPUT_CNTL_0_num                           = 32,
+	SEMANTIC_mask                                     = 0xff << 0,
+	SEMANTIC_shift                                    = 0,
+	DEFAULT_VAL_mask                                  = 0x03 << 8,
+	DEFAULT_VAL_shift                                 = 8,
+	    X_0_0F                                        = 0x00,
+	FLAT_SHADE_bit                                    = 1 << 10,
+	SEL_CENTROID_bit                                  = 1 << 11,
+	SEL_LINEAR_bit                                    = 1 << 12,
+	CYL_WRAP_mask                                     = 0x0f << 13,
+	CYL_WRAP_shift                                    = 13,
+	PT_SPRITE_TEX_bit                                 = 1 << 17,
+	SEL_SAMPLE_bit                                    = 1 << 18,
+    SPI_VS_OUT_CONFIG                                     = 0x000286c4,
+	VS_PER_COMPONENT_bit                              = 1 << 0,
+	VS_EXPORT_COUNT_mask                              = 0x1f << 1,
+	VS_EXPORT_COUNT_shift                             = 1,
+	VS_EXPORTS_FOG_bit                                = 1 << 8,
+	VS_OUT_FOG_VEC_ADDR_mask                          = 0x1f << 9,
+	VS_OUT_FOG_VEC_ADDR_shift                         = 9,
+    SPI_PS_IN_CONTROL_0                                   = 0x000286cc,
+	NUM_INTERP_mask                                   = 0x3f << 0,
+	NUM_INTERP_shift                                  = 0,
+	POSITION_ENA_bit                                  = 1 << 8,
+	POSITION_CENTROID_bit                             = 1 << 9,
+	POSITION_ADDR_mask                                = 0x1f << 10,
+	POSITION_ADDR_shift                               = 10,
+	PARAM_GEN_mask                                    = 0x0f << 15,
+	PARAM_GEN_shift                                   = 15,
+	PARAM_GEN_ADDR_mask                               = 0x7f << 19,
+	PARAM_GEN_ADDR_shift                              = 19,
+	BARYC_SAMPLE_CNTL_mask                            = 0x03 << 26,
+	BARYC_SAMPLE_CNTL_shift                           = 26,
+	    CENTROIDS_ONLY                                = 0x00,
+	    CENTERS_ONLY                                  = 0x01,
+	    CENTROIDS_AND_CENTERS                         = 0x02,
+	    UNDEF                                         = 0x03,
+	PERSP_GRADIENT_ENA_bit                            = 1 << 28,
+	LINEAR_GRADIENT_ENA_bit                           = 1 << 29,
+	POSITION_SAMPLE_bit                               = 1 << 30,
+	BARYC_AT_SAMPLE_ENA_bit                           = 1 << 31,
+    SPI_PS_IN_CONTROL_1                                   = 0x000286d0,
+	GEN_INDEX_PIX_bit                                 = 1 << 0,
+	GEN_INDEX_PIX_ADDR_mask                           = 0x7f << 1,
+	GEN_INDEX_PIX_ADDR_shift                          = 1,
+	FRONT_FACE_ENA_bit                                = 1 << 8,
+	FRONT_FACE_CHAN_mask                              = 0x03 << 9,
+	FRONT_FACE_CHAN_shift                             = 9,
+	FRONT_FACE_ALL_BITS_bit                           = 1 << 11,
+	FRONT_FACE_ADDR_mask                              = 0x1f << 12,
+	FRONT_FACE_ADDR_shift                             = 12,
+	FOG_ADDR_mask                                     = 0x7f << 17,
+	FOG_ADDR_shift                                    = 17,
+	FIXED_PT_POSITION_ENA_bit                         = 1 << 24,
+	FIXED_PT_POSITION_ADDR_mask                       = 0x1f << 25,
+	FIXED_PT_POSITION_ADDR_shift                      = 25,
+    SPI_INTERP_CONTROL_0                                  = 0x000286d4,
+	FLAT_SHADE_ENA_bit                                = 1 << 0,
+	PNT_SPRITE_ENA_bit                                = 1 << 1,
+	PNT_SPRITE_OVRD_X_mask                            = 0x07 << 2,
+	PNT_SPRITE_OVRD_X_shift                           = 2,
+	    SPI_PNT_SPRITE_SEL_0                          = 0x00,
+	    SPI_PNT_SPRITE_SEL_1                          = 0x01,
+	    SPI_PNT_SPRITE_SEL_S                          = 0x02,
+	    SPI_PNT_SPRITE_SEL_T                          = 0x03,
+	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04,
+	PNT_SPRITE_OVRD_Y_mask                            = 0x07 << 5,
+	PNT_SPRITE_OVRD_Y_shift                           = 5,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_OVRD_Z_mask                            = 0x07 << 8,
+	PNT_SPRITE_OVRD_Z_shift                           = 8,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_OVRD_W_mask                            = 0x07 << 11,
+	PNT_SPRITE_OVRD_W_shift                           = 11,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_TOP_1_bit                              = 1 << 14,
+    SPI_INPUT_Z                                           = 0x000286d8,
+	PROVIDE_Z_TO_SPI_bit                              = 1 << 0,
+    SPI_FOG_CNTL                                          = 0x000286dc,
+	PASS_FOG_THROUGH_PS_bit                           = 1 << 0,
+	PIXEL_FOG_FUNC_mask                               = 0x03 << 1,
+	PIXEL_FOG_FUNC_shift                              = 1,
+	    SPI_FOG_NONE                                  = 0x00,
+	    SPI_FOG_EXP                                   = 0x01,
+	    SPI_FOG_EXP2                                  = 0x02,
+	    SPI_FOG_LINEAR                                = 0x03,
+	PIXEL_FOG_SRC_SEL_bit                             = 1 << 3,
+	VS_FOG_CLAMP_DISABLE_bit                          = 1 << 4,
+    SPI_FOG_FUNC_SCALE                                    = 0x000286e0,
+    SPI_FOG_FUNC_BIAS                                     = 0x000286e4,
+    CB_BLEND0_CONTROL                                     = 0x00028780,
+	CB_BLEND0_CONTROL_num                             = 8,
+	COLOR_SRCBLEND_mask                               = 0x1f << 0,
+	COLOR_SRCBLEND_shift                              = 0,
+	COLOR_COMB_FCN_mask                               = 0x07 << 5,
+	COLOR_COMB_FCN_shift                              = 5,
+	COLOR_DESTBLEND_mask                              = 0x1f << 8,
+	COLOR_DESTBLEND_shift                             = 8,
+	OPACITY_WEIGHT_bit                                = 1 << 13,
+	ALPHA_SRCBLEND_mask                               = 0x1f << 16,
+	ALPHA_SRCBLEND_shift                              = 16,
+	ALPHA_COMB_FCN_mask                               = 0x07 << 21,
+	ALPHA_COMB_FCN_shift                              = 21,
+	ALPHA_DESTBLEND_mask                              = 0x1f << 24,
+	ALPHA_DESTBLEND_shift                             = 24,
+	SEPARATE_ALPHA_BLEND_bit                          = 1 << 29,
+    VGT_DMA_BASE_HI                                       = 0x000287e4,
+	VGT_DMA_BASE_HI__BASE_ADDR_mask                   = 0xff << 0,
+	VGT_DMA_BASE_HI__BASE_ADDR_shift                  = 0,
+    VGT_DMA_BASE                                          = 0x000287e8,
+    VGT_DRAW_INITIATOR                                    = 0x000287f0,
+	SOURCE_SELECT_mask                                = 0x03 << 0,
+	SOURCE_SELECT_shift                               = 0,
+	    DI_SRC_SEL_DMA                                = 0x00,
+	    DI_SRC_SEL_IMMEDIATE                          = 0x01,
+	    DI_SRC_SEL_AUTO_INDEX                         = 0x02,
+	    DI_SRC_SEL_RESERVED                           = 0x03,
+	MAJOR_MODE_mask                                   = 0x03 << 2,
+	MAJOR_MODE_shift                                  = 2,
+	    DI_MAJOR_MODE_0                               = 0x00,
+	    DI_MAJOR_MODE_1                               = 0x01,
+	SPRITE_EN_bit                                     = 1 << 4,
+	NOT_EOP_bit                                       = 1 << 5,
+	USE_OPAQUE_bit                                    = 1 << 6,
+    VGT_IMMED_DATA                                        = 0x000287f4,
+    VGT_EVENT_ADDRESS_REG                                 = 0x000287f8,
+	ADDRESS_LOW_mask                                  = 0xfffffff << 0,
+	ADDRESS_LOW_shift                                 = 0,
+    DB_DEPTH_CONTROL                                      = 0x00028800,
+	STENCIL_ENABLE_bit                                = 1 << 0,
+	Z_ENABLE_bit                                      = 1 << 1,
+	Z_WRITE_ENABLE_bit                                = 1 << 2,
+	ZFUNC_mask                                        = 0x07 << 4,
+	ZFUNC_shift                                       = 4,
+	    FRAG_NEVER                                    = 0x00,
+	    FRAG_LESS                                     = 0x01,
+	    FRAG_EQUAL                                    = 0x02,
+	    FRAG_LEQUAL                                   = 0x03,
+	    FRAG_GREATER                                  = 0x04,
+	    FRAG_NOTEQUAL                                 = 0x05,
+	    FRAG_GEQUAL                                   = 0x06,
+	    FRAG_ALWAYS                                   = 0x07,
+	BACKFACE_ENABLE_bit                               = 1 << 7,
+	STENCILFUNC_mask                                  = 0x07 << 8,
+	STENCILFUNC_shift                                 = 8,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	STENCILFAIL_mask                                  = 0x07 << 11,
+	STENCILFAIL_shift                                 = 11,
+	    STENCIL_KEEP                                  = 0x00,
+	    STENCIL_ZERO                                  = 0x01,
+	    STENCIL_REPLACE                               = 0x02,
+	    STENCIL_INCR_CLAMP                            = 0x03,
+	    STENCIL_DECR_CLAMP                            = 0x04,
+	    STENCIL_INVERT                                = 0x05,
+	    STENCIL_INCR_WRAP                             = 0x06,
+	    STENCIL_DECR_WRAP                             = 0x07,
+	STENCILZPASS_mask                                 = 0x07 << 14,
+	STENCILZPASS_shift                                = 14,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZFAIL_mask                                 = 0x07 << 17,
+	STENCILZFAIL_shift                                = 17,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILFUNC_BF_mask                               = 0x07 << 20,
+	STENCILFUNC_BF_shift                              = 20,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	STENCILFAIL_BF_mask                               = 0x07 << 23,
+	STENCILFAIL_BF_shift                              = 23,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZPASS_BF_mask                              = 0x07 << 26,
+	STENCILZPASS_BF_shift                             = 26,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZFAIL_BF_mask                              = 0x07 << 29,
+	STENCILZFAIL_BF_shift                             = 29,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+    CB_BLEND_CONTROL                                      = 0x00028804,
+/* 	COLOR_SRCBLEND_mask                               = 0x1f << 0, */
+/* 	COLOR_SRCBLEND_shift                              = 0, */
+	    BLEND_ZERO                                    = 0x00,
+	    BLEND_ONE                                     = 0x01,
+	    BLEND_SRC_COLOR                               = 0x02,
+	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03,
+	    BLEND_SRC_ALPHA                               = 0x04,
+	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05,
+	    BLEND_DST_ALPHA                               = 0x06,
+	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07,
+	    BLEND_DST_COLOR                               = 0x08,
+	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09,
+	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a,
+	    BLEND_BOTH_SRC_ALPHA                          = 0x0b,
+	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c,
+	    BLEND_CONSTANT_COLOR                          = 0x0d,
+	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e,
+	    BLEND_SRC1_COLOR                              = 0x0f,
+	    BLEND_INV_SRC1_COLOR                          = 0x10,
+	    BLEND_SRC1_ALPHA                              = 0x11,
+	    BLEND_INV_SRC1_ALPHA                          = 0x12,
+	    BLEND_CONSTANT_ALPHA                          = 0x13,
+	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14,
+/* 	COLOR_COMB_FCN_mask                               = 0x07 << 5, */
+/* 	COLOR_COMB_FCN_shift                              = 5, */
+	    COMB_DST_PLUS_SRC                             = 0x00,
+	    COMB_SRC_MINUS_DST                            = 0x01,
+	    COMB_MIN_DST_SRC                              = 0x02,
+	    COMB_MAX_DST_SRC                              = 0x03,
+	    COMB_DST_MINUS_SRC                            = 0x04,
+/* 	COLOR_DESTBLEND_mask                              = 0x1f << 8, */
+/* 	COLOR_DESTBLEND_shift                             = 8, */
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+/* 	OPACITY_WEIGHT_bit                                = 1 << 13, */
+/* 	ALPHA_SRCBLEND_mask                               = 0x1f << 16, */
+/* 	ALPHA_SRCBLEND_shift                              = 16, */
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+/* 	ALPHA_COMB_FCN_mask                               = 0x07 << 21, */
+/* 	ALPHA_COMB_FCN_shift                              = 21, */
+/* 	    COMB_DST_PLUS_SRC                             = 0x00, */
+/* 	    COMB_SRC_MINUS_DST                            = 0x01, */
+/* 	    COMB_MIN_DST_SRC                              = 0x02, */
+/* 	    COMB_MAX_DST_SRC                              = 0x03, */
+/* 	    COMB_DST_MINUS_SRC                            = 0x04, */
+/* 	ALPHA_DESTBLEND_mask                              = 0x1f << 24, */
+/* 	ALPHA_DESTBLEND_shift                             = 24, */
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+/* 	SEPARATE_ALPHA_BLEND_bit                          = 1 << 29, */
+    CB_COLOR_CONTROL                                      = 0x00028808,
+	FOG_ENABLE_bit                                    = 1 << 0,
+	MULTIWRITE_ENABLE_bit                             = 1 << 1,
+	DITHER_ENABLE_bit                                 = 1 << 2,
+	DEGAMMA_ENABLE_bit                                = 1 << 3,
+	SPECIAL_OP_mask                                   = 0x07 << 4,
+	SPECIAL_OP_shift                                  = 4,
+	    SPECIAL_NORMAL                                = 0x00,
+	    SPECIAL_DISABLE                               = 0x01,
+	    SPECIAL_FAST_CLEAR                            = 0x02,
+	    SPECIAL_FORCE_CLEAR                           = 0x03,
+	    SPECIAL_EXPAND_COLOR                          = 0x04,
+	    SPECIAL_EXPAND_TEXTURE                        = 0x05,
+	    SPECIAL_EXPAND_SAMPLES                        = 0x06,
+	    SPECIAL_RESOLVE_BOX                           = 0x07,
+	PER_MRT_BLEND_bit                                 = 1 << 7,
+	TARGET_BLEND_ENABLE_mask                          = 0xff << 8,
+	TARGET_BLEND_ENABLE_shift                         = 8,
+	ROP3_mask                                         = 0xff << 16,
+	ROP3_shift                                        = 16,
+    DB_SHADER_CONTROL                                     = 0x0002880c,
+	Z_EXPORT_ENABLE_bit                               = 1 << 0,
+	STENCIL_REF_EXPORT_ENABLE_bit                     = 1 << 1,
+	Z_ORDER_mask                                      = 0x03 << 4,
+	Z_ORDER_shift                                     = 4,
+	    LATE_Z                                        = 0x00,
+	    EARLY_Z_THEN_LATE_Z                           = 0x01,
+	    RE_Z                                          = 0x02,
+	    EARLY_Z_THEN_RE_Z                             = 0x03,
+	KILL_ENABLE_bit                                   = 1 << 6,
+	COVERAGE_TO_MASK_ENABLE_bit                       = 1 << 7,
+	MASK_EXPORT_ENABLE_bit                            = 1 << 8,
+	DUAL_EXPORT_ENABLE_bit                            = 1 << 9,
+	EXEC_ON_HIER_FAIL_bit                             = 1 << 10,
+	EXEC_ON_NOOP_bit                                  = 1 << 11,
+    PA_CL_CLIP_CNTL                                       = 0x00028810,
+	UCP_ENA_0_bit                                     = 1 << 0,
+	UCP_ENA_1_bit                                     = 1 << 1,
+	UCP_ENA_2_bit                                     = 1 << 2,
+	UCP_ENA_3_bit                                     = 1 << 3,
+	UCP_ENA_4_bit                                     = 1 << 4,
+	UCP_ENA_5_bit                                     = 1 << 5,
+	PS_UCP_Y_SCALE_NEG_bit                            = 1 << 13,
+	PS_UCP_MODE_mask                                  = 0x03 << 14,
+	PS_UCP_MODE_shift                                 = 14,
+	CLIP_DISABLE_bit                                  = 1 << 16,
+	UCP_CULL_ONLY_ENA_bit                             = 1 << 17,
+	BOUNDARY_EDGE_FLAG_ENA_bit                        = 1 << 18,
+	DX_CLIP_SPACE_DEF_bit                             = 1 << 19,
+	DIS_CLIP_ERR_DETECT_bit                           = 1 << 20,
+	VTX_KILL_OR_bit                                   = 1 << 21,
+	DX_LINEAR_ATTR_CLIP_ENA_bit                       = 1 << 24,
+	VTE_VPORT_PROVOKE_DISABLE_bit                     = 1 << 25,
+	ZCLIP_NEAR_DISABLE_bit                            = 1 << 26,
+	ZCLIP_FAR_DISABLE_bit                             = 1 << 27,
+    PA_SU_SC_MODE_CNTL                                    = 0x00028814,
+	CULL_FRONT_bit                                    = 1 << 0,
+	CULL_BACK_bit                                     = 1 << 1,
+	FACE_bit                                          = 1 << 2,
+	POLY_MODE_mask                                    = 0x03 << 3,
+	POLY_MODE_shift                                   = 3,
+	    X_DISABLE_POLY_MODE                           = 0x00,
+	    X_DUAL_MODE                                   = 0x01,
+	POLYMODE_FRONT_PTYPE_mask                         = 0x07 << 5,
+	POLYMODE_FRONT_PTYPE_shift                        = 5,
+	    X_DRAW_POINTS                                 = 0x00,
+	    X_DRAW_LINES                                  = 0x01,
+	    X_DRAW_TRIANGLES                              = 0x02,
+	POLYMODE_BACK_PTYPE_mask                          = 0x07 << 8,
+	POLYMODE_BACK_PTYPE_shift                         = 8,
+/* 	    X_DRAW_POINTS                                 = 0x00, */
+/* 	    X_DRAW_LINES                                  = 0x01, */
+/* 	    X_DRAW_TRIANGLES                              = 0x02, */
+	POLY_OFFSET_FRONT_ENABLE_bit                      = 1 << 11,
+	POLY_OFFSET_BACK_ENABLE_bit                       = 1 << 12,
+	POLY_OFFSET_PARA_ENABLE_bit                       = 1 << 13,
+	VTX_WINDOW_OFFSET_ENABLE_bit                      = 1 << 16,
+	PROVOKING_VTX_LAST_bit                            = 1 << 19,
+	PERSP_CORR_DIS_bit                                = 1 << 20,
+	MULTI_PRIM_IB_ENA_bit                             = 1 << 21,
+    PA_CL_VTE_CNTL                                        = 0x00028818,
+	VPORT_X_SCALE_ENA_bit                             = 1 << 0,
+	VPORT_X_OFFSET_ENA_bit                            = 1 << 1,
+	VPORT_Y_SCALE_ENA_bit                             = 1 << 2,
+	VPORT_Y_OFFSET_ENA_bit                            = 1 << 3,
+	VPORT_Z_SCALE_ENA_bit                             = 1 << 4,
+	VPORT_Z_OFFSET_ENA_bit                            = 1 << 5,
+	VTX_XY_FMT_bit                                    = 1 << 8,
+	VTX_Z_FMT_bit                                     = 1 << 9,
+	VTX_W0_FMT_bit                                    = 1 << 10,
+	PERFCOUNTER_REF_bit                               = 1 << 11,
+    PA_CL_VS_OUT_CNTL                                     = 0x0002881c,
+	CLIP_DIST_ENA_0_bit                               = 1 << 0,
+	CLIP_DIST_ENA_1_bit                               = 1 << 1,
+	CLIP_DIST_ENA_2_bit                               = 1 << 2,
+	CLIP_DIST_ENA_3_bit                               = 1 << 3,
+	CLIP_DIST_ENA_4_bit                               = 1 << 4,
+	CLIP_DIST_ENA_5_bit                               = 1 << 5,
+	CLIP_DIST_ENA_6_bit                               = 1 << 6,
+	CLIP_DIST_ENA_7_bit                               = 1 << 7,
+	CULL_DIST_ENA_0_bit                               = 1 << 8,
+	CULL_DIST_ENA_1_bit                               = 1 << 9,
+	CULL_DIST_ENA_2_bit                               = 1 << 10,
+	CULL_DIST_ENA_3_bit                               = 1 << 11,
+	CULL_DIST_ENA_4_bit                               = 1 << 12,
+	CULL_DIST_ENA_5_bit                               = 1 << 13,
+	CULL_DIST_ENA_6_bit                               = 1 << 14,
+	CULL_DIST_ENA_7_bit                               = 1 << 15,
+	USE_VTX_POINT_SIZE_bit                            = 1 << 16,
+	USE_VTX_EDGE_FLAG_bit                             = 1 << 17,
+	USE_VTX_RENDER_TARGET_INDX_bit                    = 1 << 18,
+	USE_VTX_VIEWPORT_INDX_bit                         = 1 << 19,
+	USE_VTX_KILL_FLAG_bit                             = 1 << 20,
+	VS_OUT_MISC_VEC_ENA_bit                           = 1 << 21,
+	VS_OUT_CCDIST0_VEC_ENA_bit                        = 1 << 22,
+	VS_OUT_CCDIST1_VEC_ENA_bit                        = 1 << 23,
+    PA_CL_NANINF_CNTL                                     = 0x00028820,
+	VTE_XY_INF_DISCARD_bit                            = 1 << 0,
+	VTE_Z_INF_DISCARD_bit                             = 1 << 1,
+	VTE_W_INF_DISCARD_bit                             = 1 << 2,
+	VTE_0XNANINF_IS_0_bit                             = 1 << 3,
+	VTE_XY_NAN_RETAIN_bit                             = 1 << 4,
+	VTE_Z_NAN_RETAIN_bit                              = 1 << 5,
+	VTE_W_NAN_RETAIN_bit                              = 1 << 6,
+	VTE_W_RECIP_NAN_IS_0_bit                          = 1 << 7,
+	VS_XY_NAN_TO_INF_bit                              = 1 << 8,
+	VS_XY_INF_RETAIN_bit                              = 1 << 9,
+	VS_Z_NAN_TO_INF_bit                               = 1 << 10,
+	VS_Z_INF_RETAIN_bit                               = 1 << 11,
+	VS_W_NAN_TO_INF_bit                               = 1 << 12,
+	VS_W_INF_RETAIN_bit                               = 1 << 13,
+	VS_CLIP_DIST_INF_DISCARD_bit                      = 1 << 14,
+	VTE_NO_OUTPUT_NEG_0_bit                           = 1 << 20,
+    SQ_PGM_START_PS                                       = 0x00028840,
+    SQ_PGM_RESOURCES_PS                                   = 0x00028850,
+	NUM_GPRS_mask                                     = 0xff << 0,
+	NUM_GPRS_shift                                    = 0,
+	STACK_SIZE_mask                                   = 0xff << 8,
+	STACK_SIZE_shift                                  = 8,
+	SQ_PGM_RESOURCES_PS__DX10_CLAMP_bit               = 1 << 21,
+	FETCH_CACHE_LINES_mask                            = 0x07 << 24,
+	FETCH_CACHE_LINES_shift                           = 24,
+	UNCACHED_FIRST_INST_bit                           = 1 << 28,
+	CLAMP_CONSTS_bit                                  = 1 << 31,
+    SQ_PGM_EXPORTS_PS                                     = 0x00028854,
+	EXPORT_MODE_mask                                  = 0x1f << 0,
+	EXPORT_MODE_shift                                 = 0,
+    SQ_PGM_START_VS                                       = 0x00028858,
+    SQ_PGM_RESOURCES_VS                                   = 0x00028868,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+	SQ_PGM_RESOURCES_VS__DX10_CLAMP_bit               = 1 << 21,
+/* 	FETCH_CACHE_LINES_mask                            = 0x07 << 24, */
+/* 	FETCH_CACHE_LINES_shift                           = 24, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_START_GS                                       = 0x0002886c,
+    SQ_PGM_RESOURCES_GS                                   = 0x0002887c,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+	SQ_PGM_RESOURCES_GS__DX10_CLAMP_bit               = 1 << 21,
+/* 	FETCH_CACHE_LINES_mask                            = 0x07 << 24, */
+/* 	FETCH_CACHE_LINES_shift                           = 24, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_START_ES                                       = 0x00028880,
+    SQ_PGM_RESOURCES_ES                                   = 0x00028890,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+	SQ_PGM_RESOURCES_ES__DX10_CLAMP_bit               = 1 << 21,
+/* 	FETCH_CACHE_LINES_mask                            = 0x07 << 24, */
+/* 	FETCH_CACHE_LINES_shift                           = 24, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_START_FS                                       = 0x00028894,
+    SQ_PGM_RESOURCES_FS                                   = 0x000288a4,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+	SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit               = 1 << 21,
+    SQ_ESGS_RING_ITEMSIZE                                 = 0x000288a8,
+	ITEMSIZE_mask                                     = 0x7fff << 0,
+	ITEMSIZE_shift                                    = 0,
+    SQ_GSVS_RING_ITEMSIZE                                 = 0x000288ac,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_ESTMP_RING_ITEMSIZE                                = 0x000288b0,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GSTMP_RING_ITEMSIZE                                = 0x000288b4,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_VSTMP_RING_ITEMSIZE                                = 0x000288b8,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_PSTMP_RING_ITEMSIZE                                = 0x000288bc,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_FBUF_RING_ITEMSIZE                                 = 0x000288c0,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_REDUC_RING_ITEMSIZE                                = 0x000288c4,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE                                   = 0x000288c8,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_PGM_CF_OFFSET_PS                                   = 0x000288cc,
+	PGM_CF_OFFSET_mask                                = 0xfffff << 0,
+	PGM_CF_OFFSET_shift                               = 0,
+    SQ_PGM_CF_OFFSET_VS                                   = 0x000288d0,
+/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/* 	PGM_CF_OFFSET_shift                               = 0, */
+    SQ_PGM_CF_OFFSET_GS                                   = 0x000288d4,
+/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/* 	PGM_CF_OFFSET_shift                               = 0, */
+    SQ_PGM_CF_OFFSET_ES                                   = 0x000288d8,
+/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/* 	PGM_CF_OFFSET_shift                               = 0, */
+    SQ_PGM_CF_OFFSET_FS                                   = 0x000288dc,
+/* 	PGM_CF_OFFSET_mask                                = 0xfffff << 0, */
+/* 	PGM_CF_OFFSET_shift                               = 0, */
+    SQ_VTX_SEMANTIC_CLEAR                                 = 0x000288e0,
+    SQ_ALU_CONST_CACHE_PS_0                               = 0x00028940,
+	SQ_ALU_CONST_CACHE_PS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_VS_0                               = 0x00028980,
+	SQ_ALU_CONST_CACHE_VS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_GS_0                               = 0x000289c0,
+	SQ_ALU_CONST_CACHE_GS_0_num                       = 16,
+    PA_SU_POINT_SIZE                                      = 0x00028a00,
+	PA_SU_POINT_SIZE__HEIGHT_mask                     = 0xffff << 0,
+	PA_SU_POINT_SIZE__HEIGHT_shift                    = 0,
+	PA_SU_POINT_SIZE__WIDTH_mask                      = 0xffff << 16,
+	PA_SU_POINT_SIZE__WIDTH_shift                     = 16,
+    PA_SU_POINT_MINMAX                                    = 0x00028a04,
+	MIN_SIZE_mask                                     = 0xffff << 0,
+	MIN_SIZE_shift                                    = 0,
+	MAX_SIZE_mask                                     = 0xffff << 16,
+	MAX_SIZE_shift                                    = 16,
+    PA_SU_LINE_CNTL                                       = 0x00028a08,
+	PA_SU_LINE_CNTL__WIDTH_mask                       = 0xffff << 0,
+	PA_SU_LINE_CNTL__WIDTH_shift                      = 0,
+    PA_SC_LINE_STIPPLE                                    = 0x00028a0c,
+	LINE_PATTERN_mask                                 = 0xffff << 0,
+	LINE_PATTERN_shift                                = 0,
+	REPEAT_COUNT_mask                                 = 0xff << 16,
+	REPEAT_COUNT_shift                                = 16,
+	PATTERN_BIT_ORDER_bit                             = 1 << 28,
+	AUTO_RESET_CNTL_mask                              = 0x03 << 29,
+	AUTO_RESET_CNTL_shift                             = 29,
+    VGT_OUTPUT_PATH_CNTL                                  = 0x00028a10,
+	PATH_SELECT_mask                                  = 0x03 << 0,
+	PATH_SELECT_shift                                 = 0,
+	    VGT_OUTPATH_VTX_REUSE                         = 0x00,
+	    VGT_OUTPATH_TESS_EN                           = 0x01,
+	    VGT_OUTPATH_PASSTHRU                          = 0x02,
+	    VGT_OUTPATH_GS_BLOCK                          = 0x03,
+    VGT_HOS_CNTL                                          = 0x00028a14,
+	TESS_MODE_mask                                    = 0x03 << 0,
+	TESS_MODE_shift                                   = 0,
+    VGT_HOS_MAX_TESS_LEVEL                                = 0x00028a18,
+    VGT_HOS_MIN_TESS_LEVEL                                = 0x00028a1c,
+    VGT_HOS_REUSE_DEPTH                                   = 0x00028a20,
+	REUSE_DEPTH_mask                                  = 0xff << 0,
+	REUSE_DEPTH_shift                                 = 0,
+    VGT_GROUP_PRIM_TYPE                                   = 0x00028a24,
+	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask               = 0x1f << 0,
+	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift              = 0,
+	    VGT_GRP_3D_POINT                              = 0x00,
+	    VGT_GRP_3D_LINE                               = 0x01,
+	    VGT_GRP_3D_TRI                                = 0x02,
+	    VGT_GRP_3D_RECT                               = 0x03,
+	    VGT_GRP_3D_QUAD                               = 0x04,
+	    VGT_GRP_2D_COPY_RECT_V0                       = 0x05,
+	    VGT_GRP_2D_COPY_RECT_V1                       = 0x06,
+	    VGT_GRP_2D_COPY_RECT_V2                       = 0x07,
+	    VGT_GRP_2D_COPY_RECT_V3                       = 0x08,
+	    VGT_GRP_2D_FILL_RECT                          = 0x09,
+	    VGT_GRP_2D_LINE                               = 0x0a,
+	    VGT_GRP_2D_TRI                                = 0x0b,
+	    VGT_GRP_PRIM_INDEX_LINE                       = 0x0c,
+	    VGT_GRP_PRIM_INDEX_TRI                        = 0x0d,
+	    VGT_GRP_PRIM_INDEX_QUAD                       = 0x0e,
+	    VGT_GRP_3D_LINE_ADJ                           = 0x0f,
+	    VGT_GRP_3D_TRI_ADJ                            = 0x10,
+	RETAIN_ORDER_bit                                  = 1 << 14,
+	RETAIN_QUADS_bit                                  = 1 << 15,
+	PRIM_ORDER_mask                                   = 0x07 << 16,
+	PRIM_ORDER_shift                                  = 16,
+	    VGT_GRP_LIST                                  = 0x00,
+	    VGT_GRP_STRIP                                 = 0x01,
+	    VGT_GRP_FAN                                   = 0x02,
+	    VGT_GRP_LOOP                                  = 0x03,
+	    VGT_GRP_POLYGON                               = 0x04,
+    VGT_GROUP_FIRST_DECR                                  = 0x00028a28,
+	FIRST_DECR_mask                                   = 0x0f << 0,
+	FIRST_DECR_shift                                  = 0,
+    VGT_GROUP_DECR                                        = 0x00028a2c,
+	DECR_mask                                         = 0x0f << 0,
+	DECR_shift                                        = 0,
+    VGT_GROUP_VECT_0_CNTL                                 = 0x00028a30,
+	COMP_X_EN_bit                                     = 1 << 0,
+	COMP_Y_EN_bit                                     = 1 << 1,
+	COMP_Z_EN_bit                                     = 1 << 2,
+	COMP_W_EN_bit                                     = 1 << 3,
+	VGT_GROUP_VECT_0_CNTL__STRIDE_mask                = 0xff << 8,
+	VGT_GROUP_VECT_0_CNTL__STRIDE_shift               = 8,
+	SHIFT_mask                                        = 0xff << 16,
+	SHIFT_shift                                       = 16,
+    VGT_GROUP_VECT_1_CNTL                                 = 0x00028a34,
+/* 	COMP_X_EN_bit                                     = 1 << 0, */
+/* 	COMP_Y_EN_bit                                     = 1 << 1, */
+/* 	COMP_Z_EN_bit                                     = 1 << 2, */
+/* 	COMP_W_EN_bit                                     = 1 << 3, */
+	VGT_GROUP_VECT_1_CNTL__STRIDE_mask                = 0xff << 8,
+	VGT_GROUP_VECT_1_CNTL__STRIDE_shift               = 8,
+/* 	SHIFT_mask                                        = 0xff << 16, */
+/* 	SHIFT_shift                                       = 16, */
+    VGT_GROUP_VECT_0_FMT_CNTL                             = 0x00028a38,
+	X_CONV_mask                                       = 0x0f << 0,
+	X_CONV_shift                                      = 0,
+	    VGT_GRP_INDEX_16                              = 0x00,
+	    VGT_GRP_INDEX_32                              = 0x01,
+	    VGT_GRP_UINT_16                               = 0x02,
+	    VGT_GRP_UINT_32                               = 0x03,
+	    VGT_GRP_SINT_16                               = 0x04,
+	    VGT_GRP_SINT_32                               = 0x05,
+	    VGT_GRP_FLOAT_32                              = 0x06,
+	    VGT_GRP_AUTO_PRIM                             = 0x07,
+	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08,
+	X_OFFSET_mask                                     = 0x0f << 4,
+	X_OFFSET_shift                                    = 4,
+	Y_CONV_mask                                       = 0x0f << 8,
+	Y_CONV_shift                                      = 8,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	Y_OFFSET_mask                                     = 0x0f << 12,
+	Y_OFFSET_shift                                    = 12,
+	Z_CONV_mask                                       = 0x0f << 16,
+	Z_CONV_shift                                      = 16,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	Z_OFFSET_mask                                     = 0x0f << 20,
+	Z_OFFSET_shift                                    = 20,
+	W_CONV_mask                                       = 0x0f << 24,
+	W_CONV_shift                                      = 24,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	W_OFFSET_mask                                     = 0x0f << 28,
+	W_OFFSET_shift                                    = 28,
+    VGT_GROUP_VECT_1_FMT_CNTL                             = 0x00028a3c,
+/* 	X_CONV_mask                                       = 0x0f << 0, */
+/* 	X_CONV_shift                                      = 0, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	X_OFFSET_mask                                     = 0x0f << 4, */
+/* 	X_OFFSET_shift                                    = 4, */
+/* 	Y_CONV_mask                                       = 0x0f << 8, */
+/* 	Y_CONV_shift                                      = 8, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	Y_OFFSET_mask                                     = 0x0f << 12, */
+/* 	Y_OFFSET_shift                                    = 12, */
+/* 	Z_CONV_mask                                       = 0x0f << 16, */
+/* 	Z_CONV_shift                                      = 16, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	Z_OFFSET_mask                                     = 0x0f << 20, */
+/* 	Z_OFFSET_shift                                    = 20, */
+/* 	W_CONV_mask                                       = 0x0f << 24, */
+/* 	W_CONV_shift                                      = 24, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	W_OFFSET_mask                                     = 0x0f << 28, */
+/* 	W_OFFSET_shift                                    = 28, */
+    VGT_GS_MODE                                           = 0x00028a40,
+	MODE_mask                                         = 0x03 << 0,
+	MODE_shift                                        = 0,
+	    GS_OFF                                        = 0x00,
+	    GS_SCENARIO_A                                 = 0x01,
+	    GS_SCENARIO_B                                 = 0x02,
+	    GS_SCENARIO_G                                 = 0x03,
+	ES_PASSTHRU_bit                                   = 1 << 2,
+	CUT_MODE_mask                                     = 0x03 << 3,
+	CUT_MODE_shift                                    = 3,
+	    GS_CUT_1024                                   = 0x00,
+	    GS_CUT_512                                    = 0x01,
+	    GS_CUT_256                                    = 0x02,
+	    GS_CUT_128                                    = 0x03,
+    PA_SC_MPASS_PS_CNTL                                   = 0x00028a48,
+	MPASS_PIX_VEC_PER_PASS_mask                       = 0xfffff << 0,
+	MPASS_PIX_VEC_PER_PASS_shift                      = 0,
+	MPASS_PS_ENA_bit                                  = 1 << 31,
+    PA_SC_MODE_CNTL                                       = 0x00028a4c,
+	MSAA_ENABLE_bit                                   = 1 << 0,
+	CLIPRECT_ENABLE_bit                               = 1 << 1,
+	LINE_STIPPLE_ENABLE_bit                           = 1 << 2,
+	MULTI_CHIP_PRIM_DISCARD_ENAB_bit                  = 1 << 3,
+	WALK_ORDER_ENABLE_bit                             = 1 << 4,
+	HALVE_DETAIL_SAMPLE_PERF_bit                      = 1 << 5,
+	WALK_SIZE_bit                                     = 1 << 6,
+	WALK_ALIGNMENT_bit                                = 1 << 7,
+	WALK_ALIGN8_PRIM_FITS_ST_bit                      = 1 << 8,
+	TILE_COVER_NO_SCISSOR_bit                         = 1 << 9,
+	KILL_PIX_POST_HI_Z_bit                            = 1 << 10,
+	KILL_PIX_POST_DETAIL_MASK_bit                     = 1 << 11,
+	MULTI_CHIP_SUPERTILE_ENABLE_bit                   = 1 << 12,
+	TILE_COVER_DISABLE_bit                            = 1 << 13,
+	FORCE_EOV_CNTDWN_ENABLE_bit                       = 1 << 14,
+	FORCE_EOV_TILE_ENABLE_bit                         = 1 << 15,
+	FORCE_EOV_REZ_ENABLE_bit                          = 1 << 16,
+	PS_ITER_SAMPLE_bit                                = 1 << 17,
+    VGT_ENHANCE                                           = 0x00028a50,
+	VGT_ENHANCE__MI_TIMESTAMP_RES_mask                = 0x03 << 0,
+	VGT_ENHANCE__MI_TIMESTAMP_RES_shift               = 0,
+	    X_0_992_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_32   = 0x00,
+	    X_0_496_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_16   = 0x01,
+	    X_0_248_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_8    = 0x02,
+	    X_0_124_CLOCKS_LATENCY_RANGE_IN_STEPS_OF_4    = 0x03,
+	MISC_mask                                         = 0x3fffffff << 2,
+	MISC_shift                                        = 2,
+    VGT_GS_OUT_PRIM_TYPE                                  = 0x00028a6c,
+	OUTPRIM_TYPE_mask                                 = 0x3f << 0,
+	OUTPRIM_TYPE_shift                                = 0,
+	    POINTLIST                                     = 0x00,
+	    LINESTRIP                                     = 0x01,
+	    TRISTRIP                                      = 0x02,
+    VGT_DMA_SIZE                                          = 0x00028a74,
+    VGT_DMA_INDEX_TYPE                                    = 0x00028a7c,
+/* 	INDEX_TYPE_mask                                   = 0x03 << 0, */
+/* 	INDEX_TYPE_shift                                  = 0, */
+	    VGT_INDEX_16                                  = 0x00,
+	    VGT_INDEX_32                                  = 0x01,
+	SWAP_MODE_mask                                    = 0x03 << 2,
+	SWAP_MODE_shift                                   = 2,
+	    VGT_DMA_SWAP_NONE                             = 0x00,
+	    VGT_DMA_SWAP_16_BIT                           = 0x01,
+	    VGT_DMA_SWAP_32_BIT                           = 0x02,
+	    VGT_DMA_SWAP_WORD                             = 0x03,
+    VGT_PRIMITIVEID_EN                                    = 0x00028a84,
+	PRIMITIVEID_EN_bit                                = 1 << 0,
+    VGT_DMA_NUM_INSTANCES                                 = 0x00028a88,
+    VGT_EVENT_INITIATOR                                   = 0x00028a90,
+	EVENT_TYPE_mask                                   = 0x3f << 0,
+	EVENT_TYPE_shift                                  = 0,
+	    CACHE_FLUSH_TS                                = 0x04,
+	    CONTEXT_DONE                                  = 0x05,
+	    CACHE_FLUSH                                   = 0x06,
+	    VIZQUERY_START                                = 0x07,
+	    VIZQUERY_END                                  = 0x08,
+	    SC_WAIT_WC                                    = 0x09,
+	    MPASS_PS_CP_REFETCH                           = 0x0a,
+	    MPASS_PS_RST_START                            = 0x0b,
+	    MPASS_PS_INCR_START                           = 0x0c,
+	    RST_PIX_CNT                                   = 0x0d,
+	    RST_VTX_CNT                                   = 0x0e,
+	    VS_PARTIAL_FLUSH                              = 0x0f,
+	    PS_PARTIAL_FLUSH                              = 0x10,
+	    CACHE_FLUSH_AND_INV_TS_EVENT                  = 0x14,
+	    ZPASS_DONE                                    = 0x15,
+	    CACHE_FLUSH_AND_INV_EVENT                     = 0x16,
+	    PERFCOUNTER_START                             = 0x17,
+	    PERFCOUNTER_STOP                              = 0x18,
+	    PIPELINESTAT_START                            = 0x19,
+	    PIPELINESTAT_STOP                             = 0x1a,
+	    PERFCOUNTER_SAMPLE                            = 0x1b,
+	    FLUSH_ES_OUTPUT                               = 0x1c,
+	    FLUSH_GS_OUTPUT                               = 0x1d,
+	    SAMPLE_PIPELINESTAT                           = 0x1e,
+	    SO_VGTSTREAMOUT_FLUSH                         = 0x1f,
+	    SAMPLE_STREAMOUTSTATS                         = 0x20,
+	    RESET_VTX_CNT                                 = 0x21,
+	    BLOCK_CONTEXT_DONE                            = 0x22,
+	    CR_CONTEXT_DONE                               = 0x23,
+	    VGT_FLUSH                                     = 0x24,
+	    CR_DONE_TS                                    = 0x25,
+	    SQ_NON_EVENT                                  = 0x26,
+	    SC_SEND_DB_VPZ                                = 0x27,
+	    BOTTOM_OF_PIPE_TS                             = 0x28,
+	    DB_CACHE_FLUSH_AND_INV                        = 0x2a,
+	ADDRESS_HI_mask                                   = 0xff << 19,
+	ADDRESS_HI_shift                                  = 19,
+	EXTENDED_EVENT_bit                                = 1 << 27,
+    VGT_MULTI_PRIM_IB_RESET_EN                            = 0x00028a94,
+	RESET_EN_bit                                      = 1 << 0,
+    VGT_INSTANCE_STEP_RATE_0                              = 0x00028aa0,
+    VGT_INSTANCE_STEP_RATE_1                              = 0x00028aa4,
+    VGT_STRMOUT_EN                                        = 0x00028ab0,
+	STREAMOUT_bit                                     = 1 << 0,
+    VGT_REUSE_OFF                                         = 0x00028ab4,
+	REUSE_OFF_bit                                     = 1 << 0,
+    VGT_VTX_CNT_EN                                        = 0x00028ab8,
+	VTX_CNT_EN_bit                                    = 1 << 0,
+    VGT_STRMOUT_BUFFER_SIZE_0                             = 0x00028ad0,
+    VGT_STRMOUT_VTX_STRIDE_0                              = 0x00028ad4,
+	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_0                             = 0x00028ad8,
+    VGT_STRMOUT_BUFFER_OFFSET_0                           = 0x00028adc,
+    VGT_STRMOUT_BUFFER_SIZE_1                             = 0x00028ae0,
+    VGT_STRMOUT_VTX_STRIDE_1                              = 0x00028ae4,
+	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_1                             = 0x00028ae8,
+    VGT_STRMOUT_BUFFER_OFFSET_1                           = 0x00028aec,
+    VGT_STRMOUT_BUFFER_SIZE_2                             = 0x00028af0,
+    VGT_STRMOUT_VTX_STRIDE_2                              = 0x00028af4,
+	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_2                             = 0x00028af8,
+    VGT_STRMOUT_BUFFER_OFFSET_2                           = 0x00028afc,
+    VGT_STRMOUT_BUFFER_SIZE_3                             = 0x00028b00,
+    VGT_STRMOUT_VTX_STRIDE_3                              = 0x00028b04,
+	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_3                             = 0x00028b08,
+    VGT_STRMOUT_BUFFER_OFFSET_3                           = 0x00028b0c,
+    VGT_STRMOUT_BASE_OFFSET_0                             = 0x00028b10,
+    VGT_STRMOUT_BASE_OFFSET_1                             = 0x00028b14,
+    VGT_STRMOUT_BASE_OFFSET_2                             = 0x00028b18,
+    VGT_STRMOUT_BASE_OFFSET_3                             = 0x00028b1c,
+    VGT_STRMOUT_BUFFER_EN                                 = 0x00028b20,
+	BUFFER_0_EN_bit                                   = 1 << 0,
+	BUFFER_1_EN_bit                                   = 1 << 1,
+	BUFFER_2_EN_bit                                   = 1 << 2,
+	BUFFER_3_EN_bit                                   = 1 << 3,
+    VGT_STRMOUT_DRAW_OPAQUE_OFFSET                        = 0x00028b28,
+    VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE            = 0x00028b2c,
+    VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE                 = 0x00028b30,
+    VGT_STRMOUT_BASE_OFFSET_HI_0                          = 0x00028b44,
+	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_1                          = 0x00028b48,
+	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_2                          = 0x00028b4c,
+	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_3                          = 0x00028b50,
+	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift   = 0,
+    PA_SC_LINE_CNTL                                       = 0x00028c00,
+	BRES_CNTL_mask                                    = 0xff << 0,
+	BRES_CNTL_shift                                   = 0,
+	USE_BRES_CNTL_bit                                 = 1 << 8,
+	EXPAND_LINE_WIDTH_bit                             = 1 << 9,
+	LAST_PIXEL_bit                                    = 1 << 10,
+    PA_SC_AA_CONFIG                                       = 0x00028c04,
+	MSAA_NUM_SAMPLES_mask                             = 0x03 << 0,
+	MSAA_NUM_SAMPLES_shift                            = 0,
+	AA_MASK_CENTROID_DTMN_bit                         = 1 << 4,
+	MAX_SAMPLE_DIST_mask                              = 0x0f << 13,
+	MAX_SAMPLE_DIST_shift                             = 13,
+    PA_SU_VTX_CNTL                                        = 0x00028c08,
+	PIX_CENTER_bit                                    = 1 << 0,
+	PA_SU_VTX_CNTL__ROUND_MODE_mask                   = 0x03 << 1,
+	PA_SU_VTX_CNTL__ROUND_MODE_shift                  = 1,
+	    X_TRUNCATE                                    = 0x00,
+	    X_ROUND                                       = 0x01,
+	    X_ROUND_TO_EVEN                               = 0x02,
+	    X_ROUND_TO_ODD                                = 0x03,
+	QUANT_MODE_mask                                   = 0x07 << 3,
+	QUANT_MODE_shift                                  = 3,
+	    X_1_16TH                                      = 0x00,
+	    X_1_8TH                                       = 0x01,
+	    X_1_4TH                                       = 0x02,
+	    X_1_2                                         = 0x03,
+	    X_1                                           = 0x04,
+	    X_1_256TH                                     = 0x05,
+    PA_CL_GB_VERT_CLIP_ADJ                                = 0x00028c0c,
+    PA_CL_GB_VERT_DISC_ADJ                                = 0x00028c10,
+    PA_CL_GB_HORZ_CLIP_ADJ                                = 0x00028c14,
+    PA_CL_GB_HORZ_DISC_ADJ                                = 0x00028c18,
+    PA_SC_AA_SAMPLE_LOCS_MCTX                             = 0x00028c1c,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX                      = 0x00028c20,
+/* 	S4_X_mask                                         = 0x0f << 0, */
+/* 	S4_X_shift                                        = 0, */
+/* 	S4_Y_mask                                         = 0x0f << 4, */
+/* 	S4_Y_shift                                        = 4, */
+/* 	S5_X_mask                                         = 0x0f << 8, */
+/* 	S5_X_shift                                        = 8, */
+/* 	S5_Y_mask                                         = 0x0f << 12, */
+/* 	S5_Y_shift                                        = 12, */
+/* 	S6_X_mask                                         = 0x0f << 16, */
+/* 	S6_X_shift                                        = 16, */
+/* 	S6_Y_mask                                         = 0x0f << 20, */
+/* 	S6_Y_shift                                        = 20, */
+/* 	S7_X_mask                                         = 0x0f << 24, */
+/* 	S7_X_shift                                        = 24, */
+/* 	S7_Y_mask                                         = 0x0f << 28, */
+/* 	S7_Y_shift                                        = 28, */
+    CB_CLRCMP_CONTROL                                     = 0x00028c30,
+	CLRCMP_FCN_SRC_mask                               = 0x07 << 0,
+	CLRCMP_FCN_SRC_shift                              = 0,
+	    CLRCMP_DRAW_ALWAYS                            = 0x00,
+	    CLRCMP_DRAW_NEVER                             = 0x01,
+	    CLRCMP_DRAW_ON_NEQ                            = 0x04,
+	    CLRCMP_DRAW_ON_EQ                             = 0x05,
+	CLRCMP_FCN_DST_mask                               = 0x07 << 8,
+	CLRCMP_FCN_DST_shift                              = 8,
+/* 	    CLRCMP_DRAW_ALWAYS                            = 0x00, */
+/* 	    CLRCMP_DRAW_NEVER                             = 0x01, */
+/* 	    CLRCMP_DRAW_ON_NEQ                            = 0x04, */
+/* 	    CLRCMP_DRAW_ON_EQ                             = 0x05, */
+	CLRCMP_FCN_SEL_mask                               = 0x03 << 24,
+	CLRCMP_FCN_SEL_shift                              = 24,
+	    CLRCMP_SEL_DST                                = 0x00,
+	    CLRCMP_SEL_SRC                                = 0x01,
+	    CLRCMP_SEL_AND                                = 0x02,
+    CB_CLRCMP_SRC                                         = 0x00028c34,
+    CB_CLRCMP_DST                                         = 0x00028c38,
+    CB_CLRCMP_MSK                                         = 0x00028c3c,
+    PA_SC_AA_MASK                                         = 0x00028c48,
+    VGT_VERTEX_REUSE_BLOCK_CNTL                           = 0x00028c58,
+	VTX_REUSE_DEPTH_mask                              = 0xff << 0,
+	VTX_REUSE_DEPTH_shift                             = 0,
+    VGT_OUT_DEALLOC_CNTL                                  = 0x00028c5c,
+	DEALLOC_DIST_mask                                 = 0x7f << 0,
+	DEALLOC_DIST_shift                                = 0,
+    DB_RENDER_CONTROL                                     = 0x00028d0c,
+	DEPTH_CLEAR_ENABLE_bit                            = 1 << 0,
+	STENCIL_CLEAR_ENABLE_bit                          = 1 << 1,
+	DEPTH_COPY_bit                                    = 1 << 2,
+	STENCIL_COPY_bit                                  = 1 << 3,
+	RESUMMARIZE_ENABLE_bit                            = 1 << 4,
+	STENCIL_COMPRESS_DISABLE_bit                      = 1 << 5,
+	DEPTH_COMPRESS_DISABLE_bit                        = 1 << 6,
+	COPY_CENTROID_bit                                 = 1 << 7,
+	COPY_SAMPLE_mask                                  = 0x07 << 8,
+	COPY_SAMPLE_shift                                 = 8,
+	ZPASS_INCREMENT_DISABLE_bit                       = 1 << 11,
+    DB_RENDER_OVERRIDE                                    = 0x00028d10,
+	FORCE_HIZ_ENABLE_mask                             = 0x03 << 0,
+	FORCE_HIZ_ENABLE_shift                            = 0,
+	    FORCE_OFF                                     = 0x00,
+	    FORCE_ENABLE                                  = 0x01,
+	    FORCE_DISABLE                                 = 0x02,
+	    FORCE_RESERVED                                = 0x03,
+	FORCE_HIS_ENABLE0_mask                            = 0x03 << 2,
+	FORCE_HIS_ENABLE0_shift                           = 2,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_HIS_ENABLE1_mask                            = 0x03 << 4,
+	FORCE_HIS_ENABLE1_shift                           = 4,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_SHADER_Z_ORDER_bit                          = 1 << 6,
+	FAST_Z_DISABLE_bit                                = 1 << 7,
+	FAST_STENCIL_DISABLE_bit                          = 1 << 8,
+	NOOP_CULL_DISABLE_bit                             = 1 << 9,
+	FORCE_COLOR_KILL_bit                              = 1 << 10,
+	FORCE_Z_READ_bit                                  = 1 << 11,
+	FORCE_STENCIL_READ_bit                            = 1 << 12,
+	FORCE_FULL_Z_RANGE_mask                           = 0x03 << 13,
+	FORCE_FULL_Z_RANGE_shift                          = 13,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_QC_SMASK_CONFLICT_bit                       = 1 << 15,
+	DISABLE_VIEWPORT_CLAMP_bit                        = 1 << 16,
+	IGNORE_SC_ZRANGE_bit                              = 1 << 17,
+    DB_HTILE_SURFACE                                      = 0x00028d24,
+	HTILE_WIDTH_bit                                   = 1 << 0,
+	HTILE_HEIGHT_bit                                  = 1 << 1,
+	LINEAR_bit                                        = 1 << 2,
+	FULL_CACHE_bit                                    = 1 << 3,
+	HTILE_USES_PRELOAD_WIN_bit                        = 1 << 4,
+	PRELOAD_bit                                       = 1 << 5,
+	PREFETCH_WIDTH_mask                               = 0x3f << 6,
+	PREFETCH_WIDTH_shift                              = 6,
+	PREFETCH_HEIGHT_mask                              = 0x3f << 12,
+	PREFETCH_HEIGHT_shift                             = 12,
+    DB_SRESULTS_COMPARE_STATE1                            = 0x00028d2c,
+	COMPAREFUNC1_mask                                 = 0x07 << 0,
+	COMPAREFUNC1_shift                                = 0,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	COMPAREVALUE1_mask                                = 0xff << 4,
+	COMPAREVALUE1_shift                               = 4,
+	COMPAREMASK1_mask                                 = 0xff << 12,
+	COMPAREMASK1_shift                                = 12,
+	ENABLE1_bit                                       = 1 << 24,
+    DB_PRELOAD_CONTROL                                    = 0x00028d30,
+	START_X_mask                                      = 0xff << 0,
+	START_X_shift                                     = 0,
+	START_Y_mask                                      = 0xff << 8,
+	START_Y_shift                                     = 8,
+	MAX_X_mask                                        = 0xff << 16,
+	MAX_X_shift                                       = 16,
+	MAX_Y_mask                                        = 0xff << 24,
+	MAX_Y_shift                                       = 24,
+    DB_PREFETCH_LIMIT                                     = 0x00028d34,
+	DEPTH_HEIGHT_TILE_MAX_mask                        = 0x3ff << 0,
+	DEPTH_HEIGHT_TILE_MAX_shift                       = 0,
+    PA_SU_POLY_OFFSET_DB_FMT_CNTL                         = 0x00028df8,
+	POLY_OFFSET_NEG_NUM_DB_BITS_mask                  = 0xff << 0,
+	POLY_OFFSET_NEG_NUM_DB_BITS_shift                 = 0,
+	POLY_OFFSET_DB_IS_FLOAT_FMT_bit                   = 1 << 8,
+    PA_SU_POLY_OFFSET_CLAMP                               = 0x00028dfc,
+    PA_SU_POLY_OFFSET_FRONT_SCALE                         = 0x00028e00,
+    PA_SU_POLY_OFFSET_FRONT_OFFSET                        = 0x00028e04,
+    PA_SU_POLY_OFFSET_BACK_SCALE                          = 0x00028e08,
+    PA_SU_POLY_OFFSET_BACK_OFFSET                         = 0x00028e0c,
+    PA_CL_POINT_X_RAD                                     = 0x00028e10,
+    PA_CL_POINT_Y_RAD                                     = 0x00028e14,
+    PA_CL_POINT_SIZE                                      = 0x00028e18,
+    PA_CL_POINT_CULL_RAD                                  = 0x00028e1c,
+    PA_CL_UCP_0_X                                         = 0x00028e20,
+	PA_CL_UCP_0_X_num                                 = 6,
+	PA_CL_UCP_0_X_offset                              = 16,
+    PA_CL_UCP_0_Y                                         = 0x00028e24,
+	PA_CL_UCP_0_Y_num                                 = 6,
+	PA_CL_UCP_0_Y_offset                              = 16,
+    PA_CL_UCP_0_Z                                         = 0x00028e28,
+	PA_CL_UCP_0_Z_num                                 = 6,
+	PA_CL_UCP_0_Z_offset                              = 16,
+    SQ_ALU_CONSTANT0_0                                    = 0x00030000,
+    SQ_ALU_CONSTANT1_0                                    = 0x00030004,
+    SQ_ALU_CONSTANT2_0                                    = 0x00030008,
+    SQ_ALU_CONSTANT3_0                                    = 0x0003000c,
+    SQ_VTX_CONSTANT_WORD0_0                               = 0x00038000,
+    SQ_TEX_RESOURCE_WORD0_0                               = 0x00038000,
+	DIM_mask                                          = 0x07 << 0,
+	DIM_shift                                         = 0,
+	    SQ_TEX_DIM_1D                                 = 0x00,
+	    SQ_TEX_DIM_2D                                 = 0x01,
+	    SQ_TEX_DIM_3D                                 = 0x02,
+	    SQ_TEX_DIM_CUBEMAP                            = 0x03,
+	    SQ_TEX_DIM_1D_ARRAY                           = 0x04,
+	    SQ_TEX_DIM_2D_ARRAY                           = 0x05,
+	    SQ_TEX_DIM_2D_MSAA                            = 0x06,
+	    SQ_TEX_DIM_2D_ARRAY_MSAA                      = 0x07,
+	SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask           = 0x0f << 3,
+	SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift          = 3,
+	TILE_TYPE_bit                                     = 1 << 7,
+	PITCH_mask                                        = 0x7ff << 8,
+	PITCH_shift                                       = 8,
+	TEX_WIDTH_mask                                    = 0x1fff << 19,
+	TEX_WIDTH_shift                                   = 19,
+    SQ_VTX_CONSTANT_WORD1_0                               = 0x00038004,
+    SQ_TEX_RESOURCE_WORD1_0                               = 0x00038004,
+	TEX_HEIGHT_mask                                   = 0x1fff << 0,
+	TEX_HEIGHT_shift                                  = 0,
+	TEX_DEPTH_mask                                    = 0x1fff << 13,
+	TEX_DEPTH_shift                                   = 13,
+	SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask         = 0x3f << 26,
+	SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift        = 26,
+    SQ_VTX_CONSTANT_WORD2_0                               = 0x00038008,
+	BASE_ADDRESS_HI_mask                              = 0xff << 0,
+	BASE_ADDRESS_HI_shift                             = 0,
+	SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask              = 0x7ff << 8,
+	SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift             = 8,
+	SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit              = 1 << 19,
+	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift        = 20,
+	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask      = 0x03 << 26,
+	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift     = 26,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit      = 1 << 28,
+	SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit         = 1 << 29,
+	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask         = 0x03 << 30,
+	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift        = 30,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+    SQ_TEX_RESOURCE_WORD2_0                               = 0x00038008,
+    SQ_VTX_CONSTANT_WORD3_0                               = 0x0003800c,
+	MEM_REQUEST_SIZE_mask                             = 0x03 << 0,
+	MEM_REQUEST_SIZE_shift                            = 0,
+    SQ_TEX_RESOURCE_WORD3_0                               = 0x0003800c,
+    SQ_TEX_RESOURCE_WORD4_0                               = 0x00038010,
+	FORMAT_COMP_X_mask                                = 0x03 << 0,
+	FORMAT_COMP_X_shift                               = 0,
+	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00,
+	    SQ_FORMAT_COMP_SIGNED                         = 0x01,
+	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02,
+	FORMAT_COMP_Y_mask                                = 0x03 << 2,
+	FORMAT_COMP_Y_shift                               = 2,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	FORMAT_COMP_Z_mask                                = 0x03 << 4,
+	FORMAT_COMP_Z_shift                               = 4,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	FORMAT_COMP_W_mask                                = 0x03 << 6,
+	FORMAT_COMP_W_shift                               = 6,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask      = 0x03 << 8,
+	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift     = 8,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit         = 1 << 10,
+	SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit        = 1 << 11,
+	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask         = 0x03 << 12,
+	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift        = 12,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+	REQUEST_SIZE_mask                                 = 0x03 << 14,
+	REQUEST_SIZE_shift                                = 14,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask           = 0x07 << 16,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift          = 16,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask           = 0x07 << 19,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift          = 19,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask           = 0x07 << 22,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift          = 22,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask           = 0x07 << 25,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift          = 25,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	BASE_LEVEL_mask                                   = 0x0f << 28,
+	BASE_LEVEL_shift                                  = 28,
+    SQ_TEX_RESOURCE_WORD5_0                               = 0x00038014,
+	LAST_LEVEL_mask                                   = 0x0f << 0,
+	LAST_LEVEL_shift                                  = 0,
+	BASE_ARRAY_mask                                   = 0x1fff << 4,
+	BASE_ARRAY_shift                                  = 4,
+	LAST_ARRAY_mask                                   = 0x1fff << 17,
+	LAST_ARRAY_shift                                  = 17,
+    SQ_TEX_RESOURCE_WORD6_0                               = 0x00038018,
+	MPEG_CLAMP_mask                                   = 0x03 << 0,
+	MPEG_CLAMP_shift                                  = 0,
+	    SQ_TEX_MPEG_CLAMP_OFF                         = 0x00,
+	    SQ_TEX_MPEG_9                                 = 0x01,
+	    SQ_TEX_MPEG_10                                = 0x02,
+	PERF_MODULATION_mask                              = 0x07 << 5,
+	PERF_MODULATION_shift                             = 5,
+	INTERLACED_bit                                    = 1 << 8,
+	SQ_TEX_RESOURCE_WORD6_0__TYPE_mask                = 0x03 << 30,
+	SQ_TEX_RESOURCE_WORD6_0__TYPE_shift               = 30,
+	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00,
+	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01,
+	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02,
+	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03,
+    SQ_VTX_CONSTANT_WORD6_0                               = 0x00038018,
+	SQ_VTX_CONSTANT_WORD6_0__TYPE_mask                = 0x03 << 30,
+	SQ_VTX_CONSTANT_WORD6_0__TYPE_shift               = 30,
+/* 	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00, */
+/* 	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01, */
+/* 	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02, */
+/* 	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03, */
+    SQ_TEX_SAMPLER_WORD0_0                                = 0x0003c000,
+	SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask              = 0x07 << 0,
+	SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift             = 0,
+	    SQ_TEX_WRAP                                   = 0x00,
+	    SQ_TEX_MIRROR                                 = 0x01,
+	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02,
+	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03,
+	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04,
+	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05,
+	    SQ_TEX_CLAMP_BORDER                           = 0x06,
+	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07,
+	CLAMP_Y_mask                                      = 0x07 << 3,
+	CLAMP_Y_shift                                     = 3,
+/* 	    SQ_TEX_WRAP                                   = 0x00, */
+/* 	    SQ_TEX_MIRROR                                 = 0x01, */
+/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+	CLAMP_Z_mask                                      = 0x07 << 6,
+	CLAMP_Z_shift                                     = 6,
+/* 	    SQ_TEX_WRAP                                   = 0x00, */
+/* 	    SQ_TEX_MIRROR                                 = 0x01, */
+/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+	XY_MAG_FILTER_mask                                = 0x07 << 9,
+	XY_MAG_FILTER_shift                               = 9,
+	    SQ_TEX_XY_FILTER_POINT                        = 0x00,
+	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01,
+	    SQ_TEX_XY_FILTER_BICUBIC                      = 0x02,
+	XY_MIN_FILTER_mask                                = 0x07 << 12,
+	XY_MIN_FILTER_shift                               = 12,
+/* 	    SQ_TEX_XY_FILTER_POINT                        = 0x00, */
+/* 	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01, */
+/* 	    SQ_TEX_XY_FILTER_BICUBIC                      = 0x02, */
+	Z_FILTER_mask                                     = 0x03 << 15,
+	Z_FILTER_shift                                    = 15,
+	    SQ_TEX_Z_FILTER_NONE                          = 0x00,
+	    SQ_TEX_Z_FILTER_POINT                         = 0x01,
+	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02,
+	MIP_FILTER_mask                                   = 0x03 << 17,
+	MIP_FILTER_shift                                  = 17,
+/* 	    SQ_TEX_Z_FILTER_NONE                          = 0x00, */
+/* 	    SQ_TEX_Z_FILTER_POINT                         = 0x01, */
+/* 	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02, */
+	BORDER_COLOR_TYPE_mask                            = 0x03 << 22,
+	BORDER_COLOR_TYPE_shift                           = 22,
+	    SQ_TEX_BORDER_COLOR_TRANS_BLACK               = 0x00,
+	    SQ_TEX_BORDER_COLOR_OPAQUE_BLACK              = 0x01,
+	    SQ_TEX_BORDER_COLOR_OPAQUE_WHITE              = 0x02,
+	    SQ_TEX_BORDER_COLOR_REGISTER                  = 0x03,
+	POINT_SAMPLING_CLAMP_bit                          = 1 << 24,
+	TEX_ARRAY_OVERRIDE_bit                            = 1 << 25,
+	DEPTH_COMPARE_FUNCTION_mask                       = 0x07 << 26,
+	DEPTH_COMPARE_FUNCTION_shift                      = 26,
+	    SQ_TEX_DEPTH_COMPARE_NEVER                    = 0x00,
+	    SQ_TEX_DEPTH_COMPARE_LESS                     = 0x01,
+	    SQ_TEX_DEPTH_COMPARE_EQUAL                    = 0x02,
+	    SQ_TEX_DEPTH_COMPARE_LESSEQUAL                = 0x03,
+	    SQ_TEX_DEPTH_COMPARE_GREATER                  = 0x04,
+	    SQ_TEX_DEPTH_COMPARE_NOTEQUAL                 = 0x05,
+	    SQ_TEX_DEPTH_COMPARE_GREATEREQUAL             = 0x06,
+	    SQ_TEX_DEPTH_COMPARE_ALWAYS                   = 0x07,
+	CHROMA_KEY_mask                                   = 0x03 << 29,
+	CHROMA_KEY_shift                                  = 29,
+	    SQ_TEX_CHROMA_KEY_DISABLED                    = 0x00,
+	    SQ_TEX_CHROMA_KEY_KILL                        = 0x01,
+	    SQ_TEX_CHROMA_KEY_BLEND                       = 0x02,
+	LOD_USES_MINOR_AXIS_bit                           = 1 << 31,
+    SQ_TEX_SAMPLER_WORD1_0                                = 0x0003c004,
+	MIN_LOD_mask                                      = 0x3ff << 0,
+	MIN_LOD_shift                                     = 0,
+	MAX_LOD_mask                                      = 0x3ff << 10,
+	MAX_LOD_shift                                     = 10,
+	SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask             = 0xfff << 20,
+	SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift            = 20,
+    SQ_TEX_SAMPLER_WORD2_0                                = 0x0003c008,
+	LOD_BIAS_SEC_mask                                 = 0xfff << 0,
+	LOD_BIAS_SEC_shift                                = 0,
+	MC_COORD_TRUNCATE_bit                             = 1 << 12,
+	SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit         = 1 << 13,
+	HIGH_PRECISION_FILTER_bit                         = 1 << 14,
+	PERF_MIP_mask                                     = 0x07 << 15,
+	PERF_MIP_shift                                    = 15,
+	PERF_Z_mask                                       = 0x03 << 18,
+	PERF_Z_shift                                      = 18,
+	FETCH_4_bit                                       = 1 << 26,
+	SAMPLE_IS_PCF_bit                                 = 1 << 27,
+	SQ_TEX_SAMPLER_WORD2_0__TYPE_bit                  = 1 << 31,
+    SQ_VTX_BASE_VTX_LOC                                   = 0x0003cff0,
+    SQ_VTX_START_INST_LOC                                 = 0x0003cff4,
+    SQ_LOOP_CONST_DX10_0                                  = 0x0003e200,
+    SQ_LOOP_CONST_0                                       = 0x0003e200,
+	SQ_LOOP_CONST_0__COUNT_mask                       = 0xfff << 0,
+	SQ_LOOP_CONST_0__COUNT_shift                      = 0,
+	INIT_mask                                         = 0xfff << 12,
+	INIT_shift                                        = 12,
+	INC_mask                                          = 0xff << 24,
+	INC_shift                                         = 24,
+    SQ_BOOL_CONST_0                                       = 0x0003e380,
+	SQ_BOOL_CONST_0_num                               = 3,
+
+} ;
+
+#endif /* _AUTOREGS */
+
diff --git a/src/mesa/drivers/dri/r600/r600_reg_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h
new file mode 100644
index 0000000000..74af7b4fed
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h
@@ -0,0 +1,492 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009  Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R6xx_H_
+#define _R600_REG_R6xx_H_
+
+/*
+ * Registers for R6xx chips that are not documented yet
+ */
+
+enum {
+
+    MM_INDEX                                              = 0x0000,
+    MM_DATA                                               = 0x0004,
+
+    SRBM_STATUS                                           = 0x0e50,
+	RLC_RQ_PENDING_bit                                = 1 << 3,
+	RCU_RQ_PENDING_bit                                = 1 << 4,
+	GRBM_RQ_PENDING_bit                               = 1 << 5,
+	HI_RQ_PENDING_bit                                 = 1 << 6,
+	IO_EXTERN_SIGNAL_bit                              = 1 << 7,
+	VMC_BUSY_bit                                      = 1 << 8,
+	MCB_BUSY_bit                                      = 1 << 9,
+	MCDZ_BUSY_bit                                     = 1 << 10,
+	MCDY_BUSY_bit                                     = 1 << 11,
+	MCDX_BUSY_bit                                     = 1 << 12,
+	MCDW_BUSY_bit                                     = 1 << 13,
+	SEM_BUSY_bit                                      = 1 << 14,
+	SRBM_STATUS__RLC_BUSY_bit                         = 1 << 15,
+	PDMA_BUSY_bit                                     = 1 << 16,
+	IH_BUSY_bit                                       = 1 << 17,
+	CSC_BUSY_bit                                      = 1 << 20,
+	CMC7_BUSY_bit                                     = 1 << 21,
+	CMC6_BUSY_bit                                     = 1 << 22,
+	CMC5_BUSY_bit                                     = 1 << 23,
+	CMC4_BUSY_bit                                     = 1 << 24,
+	CMC3_BUSY_bit                                     = 1 << 25,
+	CMC2_BUSY_bit                                     = 1 << 26,
+	CMC1_BUSY_bit                                     = 1 << 27,
+	CMC0_BUSY_bit                                     = 1 << 28,
+	BIF_BUSY_bit                                      = 1 << 29,
+	IDCT_BUSY_bit                                     = 1 << 30,
+
+    SRBM_READ_ERROR                                       = 0x0e98,
+	READ_ADDRESS_mask                                 = 0xffff << 2,
+	READ_ADDRESS_shift                                = 2,
+	READ_REQUESTER_HI_bit                             = 1 << 24,
+	READ_REQUESTER_GRBM_bit                           = 1 << 25,
+	READ_REQUESTER_RCU_bit                            = 1 << 26,
+	READ_REQUESTER_RLC_bit                            = 1 << 27,
+	READ_ERROR_bit                                    = 1 << 31,
+
+    SRBM_INT_STATUS                                       = 0x0ea4,
+	RDERR_INT_STAT_bit                                = 1 << 0,
+	GFX_CNTX_SWITCH_INT_STAT_bit                      = 1 << 1,
+    SRBM_INT_ACK                                          = 0x0ea8,
+	RDERR_INT_ACK_bit                                 = 1 << 0,
+	GFX_CNTX_SWITCH_INT_ACK_bit                       = 1 << 1,
+
+    R6XX_MC_VM_FB_LOCATION                                = 0x2180,
+
+    VENDOR_DEVICE_ID                                      = 0x4000,
+
+    D1GRPH_PRIMARY_SURFACE_ADDRESS                        = 0x6110,
+    D1GRPH_PITCH                                          = 0x6120,
+    D1GRPH_Y_END                                          = 0x6138,
+
+    GRBM_STATUS                                           = 0x8010,
+	CMDFIFO_AVAIL_mask                                = 0x1f << 0,
+	CMDFIFO_AVAIL_shift                               = 0,
+	SRBM_RQ_PENDING_bit                               = 1 << 5,
+	CP_RQ_PENDING_bit                                 = 1 << 6,
+	CF_RQ_PENDING_bit                                 = 1 << 7,
+	PF_RQ_PENDING_bit                                 = 1 << 8,
+	GRBM_EE_BUSY_bit                                  = 1 << 10,
+	GRBM_STATUS__VC_BUSY_bit                          = 1 << 11,
+	DB03_CLEAN_bit                                    = 1 << 12,
+	CB03_CLEAN_bit                                    = 1 << 13,
+	VGT_BUSY_NO_DMA_bit                               = 1 << 16,
+	GRBM_STATUS__VGT_BUSY_bit                         = 1 << 17,
+	TA03_BUSY_bit                                     = 1 << 18,
+	GRBM_STATUS__TC_BUSY_bit                          = 1 << 19,
+	SX_BUSY_bit                                       = 1 << 20,
+	SH_BUSY_bit                                       = 1 << 21,
+	SPI03_BUSY_bit                                    = 1 << 22,
+	SMX_BUSY_bit                                      = 1 << 23,
+	SC_BUSY_bit                                       = 1 << 24,
+	PA_BUSY_bit                                       = 1 << 25,
+	DB03_BUSY_bit                                     = 1 << 26,
+	CR_BUSY_bit                                       = 1 << 27,
+	CP_COHERENCY_BUSY_bit                             = 1 << 28,
+	GRBM_STATUS__CP_BUSY_bit                          = 1 << 29,
+	CB03_BUSY_bit                                     = 1 << 30,
+	GUI_ACTIVE_bit                                    = 1 << 31,
+    GRBM_STATUS2                                          = 0x8014,
+	CR_CLEAN_bit                                      = 1 << 0,
+	SMX_CLEAN_bit                                     = 1 << 1,
+	SPI0_BUSY_bit                                     = 1 << 8,
+	SPI1_BUSY_bit                                     = 1 << 9,
+	SPI2_BUSY_bit                                     = 1 << 10,
+	SPI3_BUSY_bit                                     = 1 << 11,
+	TA0_BUSY_bit                                      = 1 << 12,
+	TA1_BUSY_bit                                      = 1 << 13,
+	TA2_BUSY_bit                                      = 1 << 14,
+	TA3_BUSY_bit                                      = 1 << 15,
+	DB0_BUSY_bit                                      = 1 << 16,
+	DB1_BUSY_bit                                      = 1 << 17,
+	DB2_BUSY_bit                                      = 1 << 18,
+	DB3_BUSY_bit                                      = 1 << 19,
+	CB0_BUSY_bit                                      = 1 << 20,
+	CB1_BUSY_bit                                      = 1 << 21,
+	CB2_BUSY_bit                                      = 1 << 22,
+	CB3_BUSY_bit                                      = 1 << 23,
+    GRBM_SOFT_RESET                                       = 0x8020,
+	SOFT_RESET_CP_bit                                 = 1 << 0,
+	SOFT_RESET_CB_bit                                 = 1 << 1,
+	SOFT_RESET_CR_bit                                 = 1 << 2,
+	SOFT_RESET_DB_bit                                 = 1 << 3,
+	SOFT_RESET_PA_bit                                 = 1 << 5,
+	SOFT_RESET_SC_bit                                 = 1 << 6,
+	SOFT_RESET_SMX_bit                                = 1 << 7,
+	SOFT_RESET_SPI_bit                                = 1 << 8,
+	SOFT_RESET_SH_bit                                 = 1 << 9,
+	SOFT_RESET_SX_bit                                 = 1 << 10,
+	SOFT_RESET_TC_bit                                 = 1 << 11,
+	SOFT_RESET_TA_bit                                 = 1 << 12,
+	SOFT_RESET_VC_bit                                 = 1 << 13,
+	SOFT_RESET_VGT_bit                                = 1 << 14,
+	SOFT_RESET_GRBM_GCA_bit                           = 1 << 15,
+
+    WAIT_UNTIL                                            = 0x8040,
+	WAIT_CP_DMA_IDLE_bit                              = 1 << 8,
+	WAIT_CMDFIFO_bit                                  = 1 << 10,
+	WAIT_2D_IDLE_bit                                  = 1 << 14,
+	WAIT_3D_IDLE_bit                                  = 1 << 15,
+	WAIT_2D_IDLECLEAN_bit                             = 1 << 16,
+	WAIT_3D_IDLECLEAN_bit                             = 1 << 17,
+	WAIT_EXTERN_SIG_bit                               = 1 << 19,
+	CMDFIFO_ENTRIES_mask                              = 0x1f << 20,
+	CMDFIFO_ENTRIES_shift                             = 20,
+
+    GRBM_READ_ERROR                                       = 0x8058,
+/* 	READ_ADDRESS_mask                                 = 0xffff << 2, */
+/* 	READ_ADDRESS_shift                                = 2, */
+	READ_REQUESTER_SRBM_bit                           = 1 << 28,
+	READ_REQUESTER_CP_bit                             = 1 << 29,
+	READ_REQUESTER_WU_POLL_bit                        = 1 << 30,
+/* 	READ_ERROR_bit                                    = 1 << 31, */
+
+    SCRATCH_REG0		                          = 0x8500,
+    SCRATCH_REG1		                          = 0x8504,
+    SCRATCH_REG2		                          = 0x8508,
+    SCRATCH_REG3		                          = 0x850c,
+    SCRATCH_REG4		                          = 0x8510,
+    SCRATCH_REG5		                          = 0x8514,
+    SCRATCH_REG6		                          = 0x8518,
+    SCRATCH_REG7		                          = 0x851c,
+    SCRATCH_UMSK		                          = 0x8540,
+    SCRATCH_ADDR		                          = 0x8544,
+
+    CP_COHER_CNTL                                         = 0x85f0,
+	DEST_BASE_0_ENA_bit                               = 1 << 0,
+	DEST_BASE_1_ENA_bit                               = 1 << 1,
+	SO0_DEST_BASE_ENA_bit                             = 1 << 2,
+	SO1_DEST_BASE_ENA_bit                             = 1 << 3,
+	SO2_DEST_BASE_ENA_bit                             = 1 << 4,
+	SO3_DEST_BASE_ENA_bit                             = 1 << 5,
+	CB0_DEST_BASE_ENA_bit                             = 1 << 6,
+	CB1_DEST_BASE_ENA_bit                             = 1 << 7,
+	CB2_DEST_BASE_ENA_bit                             = 1 << 8,
+	CB3_DEST_BASE_ENA_bit                             = 1 << 9,
+	CB4_DEST_BASE_ENA_bit                             = 1 << 10,
+	CB5_DEST_BASE_ENA_bit                             = 1 << 11,
+	CB6_DEST_BASE_ENA_bit                             = 1 << 12,
+	CB7_DEST_BASE_ENA_bit                             = 1 << 13,
+	DB_DEST_BASE_ENA_bit                              = 1 << 14,
+	CR_DEST_BASE_ENA_bit                              = 1 << 15,
+	TC_ACTION_ENA_bit                                 = 1 << 23,
+	VC_ACTION_ENA_bit                                 = 1 << 24,
+	CB_ACTION_ENA_bit                                 = 1 << 25,
+	DB_ACTION_ENA_bit                                 = 1 << 26,
+	SH_ACTION_ENA_bit                                 = 1 << 27,
+	SMX_ACTION_ENA_bit                                = 1 << 28,
+	CR0_ACTION_ENA_bit                                = 1 << 29,
+	CR1_ACTION_ENA_bit                                = 1 << 30,
+	CR2_ACTION_ENA_bit                                = 1 << 31,
+    CP_COHER_SIZE                                         = 0x85f4,
+    CP_COHER_BASE                                         = 0x85f8,
+    CP_COHER_STATUS                                       = 0x85fc,
+	MATCHING_GFX_CNTX_mask                            = 0xff << 0,
+	MATCHING_GFX_CNTX_shift                           = 0,
+	MATCHING_CR_CNTX_mask                             = 0xffff << 8,
+	MATCHING_CR_CNTX_shift                            = 8,
+	STATUS_bit                                        = 1 << 31,
+
+    CP_STALLED_STAT1                                      = 0x8674,
+	RBIU_TO_DMA_NOT_RDY_TO_RCV_bit                    = 1 << 0,
+	RBIU_TO_IBS_NOT_RDY_TO_RCV_bit                    = 1 << 1,
+	RBIU_TO_SEM_NOT_RDY_TO_RCV_bit                    = 1 << 2,
+	RBIU_TO_2DREGS_NOT_RDY_TO_RCV_bit                 = 1 << 3,
+	RBIU_TO_MEMWR_NOT_RDY_TO_RCV_bit                  = 1 << 4,
+	RBIU_TO_MEMRD_NOT_RDY_TO_RCV_bit                  = 1 << 5,
+	RBIU_TO_EOPD_NOT_RDY_TO_RCV_bit                   = 1 << 6,
+	RBIU_TO_RECT_NOT_RDY_TO_RCV_bit                   = 1 << 7,
+	RBIU_TO_STRMO_NOT_RDY_TO_RCV_bit                  = 1 << 8,
+	RBIU_TO_PSTAT_NOT_RDY_TO_RCV_bit                  = 1 << 9,
+	MIU_WAITING_ON_RDREQ_FREE_bit                     = 1 << 16,
+	MIU_WAITING_ON_WRREQ_FREE_bit                     = 1 << 17,
+	MIU_NEEDS_AVAIL_WRREQ_PHASE_bit                   = 1 << 18,
+	RCIU_WAITING_ON_GRBM_FREE_bit                     = 1 << 24,
+	RCIU_WAITING_ON_VGT_FREE_bit                      = 1 << 25,
+	RCIU_STALLED_ON_ME_READ_bit                       = 1 << 26,
+	RCIU_STALLED_ON_DMA_READ_bit                      = 1 << 27,
+	RCIU_HALTED_BY_REG_VIOLATION_bit                  = 1 << 28,
+    CP_STALLED_STAT2                                      = 0x8678,
+	PFP_TO_CSF_NOT_RDY_TO_RCV_bit                     = 1 << 0,
+	PFP_TO_MEQ_NOT_RDY_TO_RCV_bit                     = 1 << 1,
+	PFP_TO_VGT_NOT_RDY_TO_RCV_bit                     = 1 << 2,
+	PFP_HALTED_BY_INSTR_VIOLATION_bit                 = 1 << 3,
+	MULTIPASS_IB_PENDING_IN_PFP_bit                   = 1 << 4,
+	ME_BRUSH_WC_NOT_RDY_TO_RCV_bit                    = 1 << 8,
+	ME_STALLED_ON_BRUSH_LOGIC_bit                     = 1 << 9,
+	CR_CNTX_NOT_AVAIL_TO_ME_bit                       = 1 << 10,
+	GFX_CNTX_NOT_AVAIL_TO_ME_bit                      = 1 << 11,
+	ME_RCIU_NOT_RDY_TO_RCV_bit                        = 1 << 12,
+	ME_TO_CONST_NOT_RDY_TO_RCV_bit                    = 1 << 13,
+	ME_WAITING_DATA_FROM_PFP_bit                      = 1 << 14,
+	ME_WAITING_ON_PARTIAL_FLUSH_bit                   = 1 << 15,
+	RECT_FIFO_NEEDS_CR_RECT_DONE_bit                  = 1 << 16,
+	RECT_FIFO_NEEDS_WR_CONFIRM_bit                    = 1 << 17,
+	EOPD_FIFO_NEEDS_SC_EOP_DONE_bit                   = 1 << 18,
+	EOPD_FIFO_NEEDS_SMX_EOP_DONE_bit                  = 1 << 19,
+	EOPD_FIFO_NEEDS_WR_CONFIRM_bit                    = 1 << 20,
+	EOPD_FIFO_NEEDS_SIGNAL_SEM_bit                    = 1 << 21,
+	SO_NUMPRIM_FIFO_NEEDS_SOADDR_bit                  = 1 << 22,
+	SO_NUMPRIM_FIFO_NEEDS_NUMPRIM_bit                 = 1 << 23,
+	PIPE_STATS_FIFO_NEEDS_SAMPLE_bit                  = 1 << 24,
+	SURF_SYNC_NEEDS_IDLE_CNTXS_bit                    = 1 << 30,
+	SURF_SYNC_NEEDS_ALL_CLEAN_bit                     = 1 << 31,
+    CP_BUSY_STAT                                          = 0x867c,
+	REG_BUS_FIFO_BUSY_bit                             = 1 << 0,
+	RING_FETCHING_DATA_bit                            = 1 << 1,
+	INDR1_FETCHING_DATA_bit                           = 1 << 2,
+	INDR2_FETCHING_DATA_bit                           = 1 << 3,
+	STATE_FETCHING_DATA_bit                           = 1 << 4,
+	PRED_FETCHING_DATA_bit                            = 1 << 5,
+	COHER_CNTR_NEQ_ZERO_bit                           = 1 << 6,
+	PFP_PARSING_PACKETS_bit                           = 1 << 7,
+	ME_PARSING_PACKETS_bit                            = 1 << 8,
+	RCIU_PFP_BUSY_bit                                 = 1 << 9,
+	RCIU_ME_BUSY_bit                                  = 1 << 10,
+	OUTSTANDING_READ_TAGS_bit                         = 1 << 11,
+	SEM_CMDFIFO_NOT_EMPTY_bit                         = 1 << 12,
+	SEM_FAILED_AND_HOLDING_bit                        = 1 << 13,
+	SEM_POLLING_FOR_PASS_bit                          = 1 << 14,
+	_3D_BUSY_bit                                      = 1 << 15,
+	_2D_BUSY_bit                                      = 1 << 16,
+    CP_STAT                                               = 0x8680,
+	CSF_RING_BUSY_bit                                 = 1 << 0,
+	CSF_WPTR_POLL_BUSY_bit                            = 1 << 1,
+	CSF_INDIRECT1_BUSY_bit                            = 1 << 2,
+	CSF_INDIRECT2_BUSY_bit                            = 1 << 3,
+	CSF_STATE_BUSY_bit                                = 1 << 4,
+	CSF_PREDICATE_BUSY_bit                            = 1 << 5,
+	CSF_BUSY_bit                                      = 1 << 6,
+	MIU_RDREQ_BUSY_bit                                = 1 << 7,
+	MIU_WRREQ_BUSY_bit                                = 1 << 8,
+	ROQ_RING_BUSY_bit                                 = 1 << 9,
+	ROQ_INDIRECT1_BUSY_bit                            = 1 << 10,
+	ROQ_INDIRECT2_BUSY_bit                            = 1 << 11,
+	ROQ_STATE_BUSY_bit                                = 1 << 12,
+	ROQ_PREDICATE_BUSY_bit                            = 1 << 13,
+	ROQ_ALIGN_BUSY_bit                                = 1 << 14,
+	PFP_BUSY_bit                                      = 1 << 15,
+	MEQ_BUSY_bit                                      = 1 << 16,
+	ME_BUSY_bit                                       = 1 << 17,
+	QUERY_BUSY_bit                                    = 1 << 18,
+	SEMAPHORE_BUSY_bit                                = 1 << 19,
+	INTERRUPT_BUSY_bit                                = 1 << 20,
+	SURFACE_SYNC_BUSY_bit                             = 1 << 21,
+	DMA_BUSY_bit                                      = 1 << 22,
+	RCIU_BUSY_bit                                     = 1 << 23,
+	CP_STAT__CP_BUSY_bit                              = 1 << 31,
+
+    CP_ME_CNTL                                            = 0x86d8,
+	ME_STATMUX_mask                                   = 0xff << 0,
+	ME_STATMUX_shift                                  = 0,
+	ME_HALT_bit                                       = 1 << 28,
+    CP_ME_STATUS                                          = 0x86dc,
+
+    CP_RB_RPTR                                            = 0x8700,
+	RB_RPTR_mask                                      = 0xfffff << 0,
+	RB_RPTR_shift                                     = 0,
+    CP_RB_WPTR_DELAY                                      = 0x8704,
+	PRE_WRITE_TIMER_mask                              = 0xfffffff << 0,
+	PRE_WRITE_TIMER_shift                             = 0,
+	PRE_WRITE_LIMIT_mask                              = 0x0f << 28,
+	PRE_WRITE_LIMIT_shift                             = 28,
+
+    CP_ROQ_RB_STAT                                        = 0x8780,
+	ROQ_RPTR_PRIMARY_mask                             = 0x3ff << 0,
+	ROQ_RPTR_PRIMARY_shift                            = 0,
+	ROQ_WPTR_PRIMARY_mask                             = 0x3ff << 16,
+	ROQ_WPTR_PRIMARY_shift                            = 16,
+    CP_ROQ_IB1_STAT                                       = 0x8784,
+	ROQ_RPTR_INDIRECT1_mask                           = 0x3ff << 0,
+	ROQ_RPTR_INDIRECT1_shift                          = 0,
+	ROQ_WPTR_INDIRECT1_mask                           = 0x3ff << 16,
+	ROQ_WPTR_INDIRECT1_shift                          = 16,
+    CP_ROQ_IB2_STAT                                       = 0x8788,
+	ROQ_RPTR_INDIRECT2_mask                           = 0x3ff << 0,
+	ROQ_RPTR_INDIRECT2_shift                          = 0,
+	ROQ_WPTR_INDIRECT2_mask                           = 0x3ff << 16,
+	ROQ_WPTR_INDIRECT2_shift                          = 16,
+
+    CP_MEQ_STAT                                           = 0x8794,
+	MEQ_RPTR_mask                                     = 0x3ff << 0,
+	MEQ_RPTR_shift                                    = 0,
+	MEQ_WPTR_mask                                     = 0x3ff << 16,
+	MEQ_WPTR_shift                                    = 16,
+
+    CC_GC_SHADER_PIPE_CONFIG                              = 0x8950,
+	INACTIVE_QD_PIPES_mask                            = 0xff << 8,
+	INACTIVE_QD_PIPES_shift                           = 8,
+	    R6XX_MAX_QD_PIPES                             = 8,
+	INACTIVE_SIMDS_mask                               = 0xff << 16,
+	INACTIVE_SIMDS_shift                              = 16,
+	    R6XX_MAX_SIMDS                                = 8,
+    GC_USER_SHADER_PIPE_CONFIG                            = 0x8954,
+
+    VC_ENHANCE                                            = 0x9714,
+    DB_DEBUG                                              = 0x9830,
+        PREZ_MUST_WAIT_FOR_POSTZ_DONE                     = 1 << 31,
+
+    DB_WATERMARKS                                         = 0x00009838,
+	DEPTH_FREE_mask                                   = 0x1f << 0,
+	DEPTH_FREE_shift                                  = 0,
+	DEPTH_FLUSH_mask                                  = 0x3f << 5,
+	DEPTH_FLUSH_shift                                 = 5,
+	FORCE_SUMMARIZE_mask                              = 0x0f << 11,
+	FORCE_SUMMARIZE_shift                             = 11,
+	DEPTH_PENDING_FREE_mask                           = 0x1f << 15,
+	DEPTH_PENDING_FREE_shift                          = 15,
+	DEPTH_CACHELINE_FREE_mask                         = 0x1f << 20,
+	DEPTH_CACHELINE_FREE_shift                        = 20,
+	EARLY_Z_PANIC_DISABLE_bit                         = 1 << 25,
+	LATE_Z_PANIC_DISABLE_bit                          = 1 << 26,
+	RE_Z_PANIC_DISABLE_bit                            = 1 << 27,
+	DB_EXTRA_DEBUG_mask                               = 0x0f << 28,
+	DB_EXTRA_DEBUG_shift                              = 28,
+
+    CP_RB_BASE                                            = 0xc100,
+    CP_RB_CNTL                                            = 0xc104,
+        RB_BUFSZ_mask                                     = 0x3f << 0,
+    CP_RB_WPTR                                            = 0xc114,
+	RB_WPTR_mask                                      = 0xfffff << 0,
+	RB_WPTR_shift                                     = 0,
+    CP_RB_RPTR_WR                                         = 0xc108,
+	RB_RPTR_WR_mask                                   = 0xfffff << 0,
+	RB_RPTR_WR_shift                                  = 0,
+
+    CP_INT_STATUS                                         = 0xc128,
+	DISABLE_CNTX_SWITCH_INT_STAT_bit                  = 1 << 0,
+	ENABLE_CNTX_SWITCH_INT_STAT_bit                   = 1 << 1,
+	SEM_SIGNAL_INT_STAT_bit                           = 1 << 18,
+	CNTX_BUSY_INT_STAT_bit                            = 1 << 19,
+	CNTX_EMPTY_INT_STAT_bit                           = 1 << 20,
+	WAITMEM_SEM_INT_STAT_bit                          = 1 << 21,
+	PRIV_INSTR_INT_STAT_bit                           = 1 << 22,
+	PRIV_REG_INT_STAT_bit                             = 1 << 23,
+	OPCODE_ERROR_INT_STAT_bit                         = 1 << 24,
+	SCRATCH_INT_STAT_bit                              = 1 << 25,
+	TIME_STAMP_INT_STAT_bit                           = 1 << 26,
+	RESERVED_BIT_ERROR_INT_STAT_bit                   = 1 << 27,
+	DMA_INT_STAT_bit                                  = 1 << 28,
+	IB2_INT_STAT_bit                                  = 1 << 29,
+	IB1_INT_STAT_bit                                  = 1 << 30,
+	RB_INT_STAT_bit                                   = 1 << 31,
+
+//  SX_ALPHA_TEST_CONTROL                                 = 0x00028410,
+	ALPHA_FUNC__REF_NEVER                             = 0,
+	ALPHA_FUNC__REF_ALWAYS                            = 7,
+//  DB_SHADER_CONTROL                                     = 0x0002880c,
+	Z_ORDER__EARLY_Z_THEN_LATE_Z                      = 2,
+//  PA_SU_SC_MODE_CNTL                                    = 0x00028814,
+//	POLY_MODE_mask                                    = 0x03 << 3,
+	POLY_MODE__TRIANGLES = 0, POLY_MODE__DUAL_MODE,
+//	POLYMODE_FRONT_PTYPE_mask                         = 0x07 << 5,
+	POLYMODE_PTYPE__POINTS = 0, POLYMODE_PTYPE__LINES, POLYMODE_PTYPE__TRIANGLES,
+    PA_SC_AA_SAMPLE_LOCS_8S_WD1_M                         = 0x00028c20,
+    DB_SRESULTS_COMPARE_STATE0                            = 0x00028d28,	/* See autoregs: DB_SRESULTS_COMPARE_STATE1 */
+//  DB_SRESULTS_COMPARE_STATE1                            = 0x00028d2c,
+    DB_ALPHA_TO_MASK                                      = 0x00028d44,
+	ALPHA_TO_MASK_ENABLE                              = 1 << 0,
+	ALPHA_TO_MASK_OFFSET0_mask                        = 0x03 << 8,
+	ALPHA_TO_MASK_OFFSET0_shift                       = 8,
+	ALPHA_TO_MASK_OFFSET1_mask                        = 0x03 << 10,
+	ALPHA_TO_MASK_OFFSET1_shift                       = 10,
+	ALPHA_TO_MASK_OFFSET2_mask                        = 0x03 << 12,
+	ALPHA_TO_MASK_OFFSET2_shift                       = 12,
+	ALPHA_TO_MASK_OFFSET3_mask                        = 0x03 << 14,
+	ALPHA_TO_MASK_OFFSET3_shift                       = 14,
+
+//  SQ_VTX_CONSTANT_WORD2_0                               = 0x00038008,
+//    	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+	FMT_INVALID=0,      FMT_8,          FMT_4_4,            FMT_3_3_2,
+	                    FMT_16=5,       FMT_16_FLOAT,       FMT_8_8,
+	FMT_5_6_5,          FMT_6_5_5,      FMT_1_5_5_5,        FMT_4_4_4_4,
+	FMT_5_5_5_1,        FMT_32,         FMT_32_FLOAT,       FMT_16_16,
+	FMT_16_16_FLOAT=16, FMT_8_24,       FMT_8_24_FLOAT,     FMT_24_8,
+	FMT_24_8_FLOAT,     FMT_10_11_11,   FMT_10_11_11_FLOAT, FMT_11_11_10,
+	FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8,        FMT_10_10_10_2,
+	FMT_X24_8_32_FLOAT, FMT_32_32,      FMT_32_32_FLOAT,    FMT_16_16_16_16,
+	FMT_16_16_16_16_FLOAT=32,           FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+	                    FMT_1 = 37,                         FMT_GB_GR=39,
+	FMT_BG_RG,          FMT_32_AS_8,    FMT_32_AS_8_8,      FMT_5_9_9_9_SHAREDEXP,
+	FMT_8_8_8,          FMT_16_16_16,   FMT_16_16_16_FLOAT, FMT_32_32_32,
+	FMT_32_32_32_FLOAT=48,
+
+//  High level register file lengths
+    SQ_ALU_CONSTANT                                       = SQ_ALU_CONSTANT0_0,	/* 256 PS, 256 VS */
+    SQ_ALU_CONSTANT_ps_num                                = 256,
+    SQ_ALU_CONSTANT_vs_num                                = 256,
+    SQ_ALU_CONSTANT_all_num                               = 512,
+    SQ_ALU_CONSTANT_offset                                = 16,
+    SQ_ALU_CONSTANT_ps                                    = 0,
+    SQ_ALU_CONSTANT_vs                                    = SQ_ALU_CONSTANT_ps + SQ_ALU_CONSTANT_ps_num,
+    SQ_TEX_RESOURCE                                       = SQ_TEX_RESOURCE_WORD0_0,	/* 160 PS, 160 VS, 16 FS, 160 GS */
+    SQ_TEX_RESOURCE_ps_num                                = 160,
+    SQ_TEX_RESOURCE_vs_num                                = 160,
+    SQ_TEX_RESOURCE_fs_num                                = 16,
+    SQ_TEX_RESOURCE_gs_num                                = 160,
+    SQ_TEX_RESOURCE_all_num                               = 496,
+    SQ_TEX_RESOURCE_offset                                = 28,
+    SQ_TEX_RESOURCE_ps                                    = 0,
+    SQ_TEX_RESOURCE_vs                                    = SQ_TEX_RESOURCE_ps + SQ_TEX_RESOURCE_ps_num,
+    SQ_TEX_RESOURCE_fs                                    = SQ_TEX_RESOURCE_vs + SQ_TEX_RESOURCE_vs_num,
+    SQ_TEX_RESOURCE_gs                                    = SQ_TEX_RESOURCE_fs + SQ_TEX_RESOURCE_fs_num,
+    SQ_VTX_RESOURCE                                       = SQ_VTX_CONSTANT_WORD0_0,	/* 160 PS, 160 VS, 16 FS, 160 GS */
+    SQ_VTX_RESOURCE_ps_num                                = 160,
+    SQ_VTX_RESOURCE_vs_num                                = 160,
+    SQ_VTX_RESOURCE_fs_num                                = 16,
+    SQ_VTX_RESOURCE_gs_num                                = 160,
+    SQ_VTX_RESOURCE_all_num                               = 496,
+    SQ_VTX_RESOURCE_offset                                = 28,
+    SQ_VTX_RESOURCE_ps                                    = 0,
+    SQ_VTX_RESOURCE_vs                                    = SQ_VTX_RESOURCE_ps + SQ_VTX_RESOURCE_ps_num,
+    SQ_VTX_RESOURCE_fs                                    = SQ_VTX_RESOURCE_vs + SQ_VTX_RESOURCE_vs_num,
+    SQ_VTX_RESOURCE_gs                                    = SQ_VTX_RESOURCE_fs + SQ_VTX_RESOURCE_fs_num,
+    SQ_TEX_SAMPLER_WORD                                   = SQ_TEX_SAMPLER_WORD0_0,	/* 18 per PS, VS, GS */
+    SQ_TEX_SAMPLER_WORD_ps_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_vs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_gs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_all_num                           = 54,
+    SQ_TEX_SAMPLER_WORD_offset                            = 12,
+    SQ_TEX_SAMPLER_WORD_ps                                = 0,
+    SQ_TEX_SAMPLER_WORD_vs                                = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num,
+    SQ_TEX_SAMPLER_WORD_gs                                = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num,
+    SQ_LOOP_CONST                                         = SQ_LOOP_CONST_0,		/* 32 per PS, VS, GS */
+    SQ_LOOP_CONST_ps_num                                  = 32,
+    SQ_LOOP_CONST_vs_num                                  = 32,
+    SQ_LOOP_CONST_gs_num                                  = 32,
+    SQ_LOOP_CONST_all_num                                 = 96,
+    SQ_LOOP_CONST_offset                                  = 4,
+    SQ_LOOP_CONST_ps                                      = 0,
+    SQ_LOOP_CONST_vs                                      = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num,
+    SQ_LOOP_CONST_gs                                      = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num,
+} ;
+
+
+#endif
diff --git a/src/mesa/drivers/dri/r600/r600_reg_r7xx.h b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h
new file mode 100644
index 0000000000..eb169bd885
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h
@@ -0,0 +1,151 @@
+/*
+ * RadeonHD R6xx, R7xx Register documentation
+ *
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ * Copyright (C) 2008-2009  Matthias Hopf
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _R600_REG_R7xx_H_
+#define _R600_REG_R7xx_H_
+
+/*
+ * Register update for R7xx chips
+ */
+
+enum {
+
+    R7XX_MC_VM_FB_LOCATION                                = 0x00002024,
+
+//  GRBM_STATUS                                           = 0x00008010,
+	R7XX_TA_BUSY_bit                                  = 1 << 14,
+
+    R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ                     = 0x00008d8c,
+	RING0_OFFSET_mask                                 = 0xff << 0,
+	RING0_OFFSET_shift                                = 0,
+	ISOLATE_ES_ENABLE_bit                             = 1 << 12,
+	ISOLATE_GS_ENABLE_bit                             = 1 << 13,
+	VS_PC_LIMIT_ENABLE_bit                            = 1 << 14,
+
+//  SQ_ALU_WORD0                                          = 0x00008dfc,
+//	SRC0_SEL_mask                                     = 0x1ff << 0,
+// 	SRC1_SEL_mask                                     = 0x1ff << 13,
+	    R7xx_SQ_ALU_SRC_1_DBL_L                       = 0xf4,
+	    R7xx_SQ_ALU_SRC_1_DBL_M                       = 0xf5,
+	    R7xx_SQ_ALU_SRC_0_5_DBL_L                     = 0xf6,
+	    R7xx_SQ_ALU_SRC_0_5_DBL_M                     = 0xf7,
+// 	INDEX_MODE_mask                                   = 0x07 << 26,
+	    R7xx_SQ_INDEX_GLOBAL                          = 0x05,
+	    R7xx_SQ_INDEX_GLOBAL_AR_X                     = 0x06,
+    R6xx_SQ_ALU_WORD1_OP2                                 = 0x00008dfc,
+    R7xx_SQ_ALU_WORD1_OP2_V2                              = 0x00008dfc,
+	R6xx_FOG_MERGE_bit                                = 1 << 5,
+	R6xx_OMOD_mask                                    = 0x03 << 6,
+	R7xx_OMOD_mask                                    = 0x03 << 5,
+	R6xx_OMOD_shift                                   = 6,
+	R7xx_OMOD_shift                                   = 5,
+	R6xx_SQ_ALU_WORD1_OP2__ALU_INST_mask              = 0x3ff << 8,
+	R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_mask           = 0x7ff << 7,
+	R6xx_SQ_ALU_WORD1_OP2__ALU_INST_shift             = 8,
+	R7xx_SQ_ALU_WORD1_OP2_V2__ALU_INST_shift          = 7,
+	    R7xx_SQ_OP2_INST_FREXP_64                     = 0x07,
+	    R7xx_SQ_OP2_INST_ADD_64                       = 0x17,
+	    R7xx_SQ_OP2_INST_MUL_64                       = 0x1b,
+	    R7xx_SQ_OP2_INST_FLT64_TO_FLT32               = 0x1c,
+	    R7xx_SQ_OP2_INST_FLT32_TO_FLT64               = 0x1d,
+	    R7xx_SQ_OP2_INST_LDEXP_64                     = 0x7a,
+	    R7xx_SQ_OP2_INST_FRACT_64                     = 0x7b,
+	    R7xx_SQ_OP2_INST_PRED_SETGT_64                = 0x7c,
+	    R7xx_SQ_OP2_INST_PRED_SETE_64                 = 0x7d,
+	    R7xx_SQ_OP2_INST_PRED_SETGE_64                = 0x7e,
+//  SQ_ALU_WORD1_OP3                                      = 0x00008dfc,
+//	SRC2_SEL_mask                                     = 0x1ff << 0,
+//	    R7xx_SQ_ALU_SRC_1_DBL_L                       = 0xf4,
+//	    R7xx_SQ_ALU_SRC_1_DBL_M                       = 0xf5,
+//	    R7xx_SQ_ALU_SRC_0_5_DBL_L                     = 0xf6,
+//	    R7xx_SQ_ALU_SRC_0_5_DBL_M                     = 0xf7,
+// 	SQ_ALU_WORD1_OP3__ALU_INST_mask                   = 0x1f << 13,
+	    R7xx_SQ_OP3_INST_MULADD_64                    = 0x08,
+	    R7xx_SQ_OP3_INST_MULADD_64_M2                 = 0x09,
+	    R7xx_SQ_OP3_INST_MULADD_64_M4                 = 0x0a,
+	    R7xx_SQ_OP3_INST_MULADD_64_D2                 = 0x0b,
+//  SQ_CF_ALU_WORD1                                       = 0x00008dfc,
+	R6xx_USES_WATERFALL_bit                           = 1 << 25,
+	R7xx_SQ_CF_ALU_WORD1__ALT_CONST_bit               = 1 << 25,
+//  SQ_CF_ALLOC_EXPORT_WORD0                              = 0x00008dfc,
+//	ARRAY_BASE_mask                                   = 0x1fff << 0,
+//	TYPE_mask                                         = 0x03 << 13,
+//	    SQ_EXPORT_PARAM                               = 0x02,
+//	    X_UNUSED_FOR_SX_EXPORTS                       = 0x03,
+//	ELEM_SIZE_mask                                    = 0x03 << 30,
+//  SQ_CF_ALLOC_EXPORT_WORD1                              = 0x00008dfc,
+//	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask            = 0x7f << 23,
+	    R7xx_SQ_CF_INST_MEM_EXPORT                    = 0x3a,
+//  SQ_CF_WORD1                                           = 0x00008dfc,
+//	SQ_CF_WORD1__COUNT_mask                           = 0x07 << 10,
+	R7xx_COUNT_3_bit                                  = 1 << 19,
+//	SQ_CF_WORD1__CF_INST_mask                         = 0x7f << 23,
+	    R7xx_SQ_CF_INST_END_PROGRAM                   = 0x19,
+	    R7xx_SQ_CF_INST_WAIT_ACK                      = 0x1a,
+	    R7xx_SQ_CF_INST_TEX_ACK                       = 0x1b,
+	    R7xx_SQ_CF_INST_VTX_ACK                       = 0x1c,
+	    R7xx_SQ_CF_INST_VTX_TC_ACK                    = 0x1d,
+//  SQ_VTX_WORD0                                          = 0x00008dfc,
+//	VTX_INST_mask                                     = 0x1f << 0,
+	    R7xx_SQ_VTX_INST_MEM                          = 0x02,
+//  SQ_VTX_WORD2                                          = 0x00008dfc,
+	R7xx_SQ_VTX_WORD2__ALT_CONST_bit                  = 1 << 20,
+
+//  SQ_TEX_WORD0                                          = 0x00008dfc,
+//	TEX_INST_mask                                     = 0x1f << 0,
+	    R7xx_X_MEMORY_READ                            = 0x02,
+	    R7xx_SQ_TEX_INST_KEEP_GRADIENTS               = 0x0a,
+	    R7xx_X_FETCH4_LOAD4_INSTRUCTION_FOR_DX10_1    = 0x0f,
+	R7xx_SQ_TEX_WORD0__ALT_CONST_bit                  = 1 << 24,
+
+    R7xx_PA_SC_EDGERULE                                   = 0x00028230,
+    R7xx_SPI_THREAD_GROUPING                              = 0x000286c8,
+	PS_GROUPING_mask                                  = 0x1f << 0,
+	PS_GROUPING_shift                                 = 0,
+	VS_GROUPING_mask                                  = 0x1f << 8,
+	VS_GROUPING_shift                                 = 8,
+	GS_GROUPING_mask                                  = 0x1f << 16,
+	GS_GROUPING_shift                                 = 16,
+	ES_GROUPING_mask                                  = 0x1f << 24,
+	ES_GROUPING_shift                                 = 24,
+    R7xx_CB_SHADER_CONTROL                                = 0x000287a0,
+	RT0_ENABLE_bit                                    = 1 << 0,
+	RT1_ENABLE_bit                                    = 1 << 1,
+	RT2_ENABLE_bit                                    = 1 << 2,
+	RT3_ENABLE_bit                                    = 1 << 3,
+	RT4_ENABLE_bit                                    = 1 << 4,
+	RT5_ENABLE_bit                                    = 1 << 5,
+	RT6_ENABLE_bit                                    = 1 << 6,
+	RT7_ENABLE_bit                                    = 1 << 7,
+//  DB_ALPHA_TO_MASK                                      = 0x00028d44,
+	R7xx_OFFSET_ROUND_bit                             = 1 << 16,
+//  SQ_TEX_SAMPLER_MISC_0                                 = 0x0003d03c,
+	R7xx_TRUNCATE_COORD_bit                           = 1 << 9,
+	R7xx_DISABLE_CUBE_WRAP_bit                        = 1 << 10,
+//  DB_RENDER_CONTROL                                     = 0x00028d0c,
+	PERFECT_ZPASS_COUNTS_bit                          = 1 << 15,
+
+} ;
+
+#endif /* _R600_REG_R7xx_H_ */
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
new file mode 100644
index 0000000000..41419f8460
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -0,0 +1,479 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mipmap.h"
+#include "main/simple_list.h"
+#include "main/texstore.h"
+#include "main/texobj.h"
+
+#include "texmem.h"
+
+#include "r600_context.h"
+#include "radeon_mipmap_tree.h"
+#include "r600_tex.h"
+
+
+static unsigned int translate_wrap_mode(GLenum wrapmode)
+{
+	switch(wrapmode) {
+	case GL_REPEAT: return SQ_TEX_WRAP;
+	case GL_CLAMP: return SQ_TEX_CLAMP_HALF_BORDER;
+	case GL_CLAMP_TO_EDGE: return SQ_TEX_CLAMP_LAST_TEXEL;
+	case GL_CLAMP_TO_BORDER: return SQ_TEX_CLAMP_BORDER;
+	case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR;
+	case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+	case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+	case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_BORDER;
+	default:
+		radeon_error("bad wrap mode in %s", __FUNCTION__);
+		return 0;
+	}
+}
+
+
+/**
+ * Update the cached hardware registers based on the current texture wrap modes.
+ *
+ * \param t Texture object whose wrap modes are to be set
+ */
+static void r600UpdateTexWrap(radeonTexObjPtr t)
+{
+	struct gl_texture_object *tObj = &t->base;
+
+        SETfield(t->SQ_TEX_SAMPLER0, translate_wrap_mode(tObj->WrapS),
+                 SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+
+	if (tObj->Target != GL_TEXTURE_1D) {
+		SETfield(t->SQ_TEX_SAMPLER0, translate_wrap_mode(tObj->WrapT),
+			 CLAMP_Y_shift, CLAMP_Y_mask);
+
+		if (tObj->Target == GL_TEXTURE_3D)
+			SETfield(t->SQ_TEX_SAMPLER0, translate_wrap_mode(tObj->WrapR),
+				 CLAMP_Z_shift, CLAMP_Z_mask);
+	}
+}
+
+static void r600SetTexDefaultState(radeonTexObjPtr t)
+{
+        /* Init text object to default states. */
+        t->SQ_TEX_RESOURCE0              = 0;
+        SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);
+        SETfield(t->SQ_TEX_RESOURCE0, ARRAY_LINEAR_GENERAL,
+                 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift, SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+        CLEARbit(t->SQ_TEX_RESOURCE0, TILE_TYPE_bit);
+
+        t->SQ_TEX_RESOURCE1                = 0;
+        SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+                 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+        t->SQ_TEX_RESOURCE2                = 0;
+        t->SQ_TEX_RESOURCE3                = 0;
+
+        t->SQ_TEX_RESOURCE4                   = 0;
+        SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+                 FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+        SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+                 FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+        SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+                 FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+        SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+                 FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+        SETfield(t->SQ_TEX_RESOURCE4, SQ_NUM_FORMAT_NORM,
+                 SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift, SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask);
+        CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit);
+        CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+        SETfield(t->SQ_TEX_RESOURCE4, SQ_ENDIAN_NONE,
+                 SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask);
+        SETfield(t->SQ_TEX_RESOURCE4, 1, REQUEST_SIZE_shift, REQUEST_SIZE_mask);
+        SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift,
+		 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+        SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); /* mip-maps */
+
+        t->SQ_TEX_RESOURCE5 = 0;
+        t->SQ_TEX_RESOURCE6 = 0;
+
+        SETfield(t->SQ_TEX_RESOURCE6, SQ_TEX_VTX_VALID_TEXTURE,
+                 SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+        /* Initialize sampler registers */
+        t->SQ_TEX_SAMPLER0                           = 0;
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift,
+		 SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Y_shift, CLAMP_Y_mask);
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP, CLAMP_Z_shift, CLAMP_Z_mask);
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT, XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, Z_FILTER_shift, Z_FILTER_mask);
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE, MIP_FILTER_shift, MIP_FILTER_mask);
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_TRANS_BLACK, BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
+
+        t->SQ_TEX_SAMPLER1                           = 0;
+        SETfield(t->SQ_TEX_SAMPLER1, 0x3ff, MAX_LOD_shift, MAX_LOD_mask);
+
+        t->SQ_TEX_SAMPLER2                          = 0;
+        SETbit(t->SQ_TEX_SAMPLER2, SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
+}
+
+
+#if 0
+static GLuint aniso_filter(GLfloat anisotropy)
+{
+	if (anisotropy >= 16.0) {
+		return R300_TX_MAX_ANISO_16_TO_1;
+	} else if (anisotropy >= 8.0) {
+		return R300_TX_MAX_ANISO_8_TO_1;
+	} else if (anisotropy >= 4.0) {
+		return R300_TX_MAX_ANISO_4_TO_1;
+	} else if (anisotropy >= 2.0) {
+		return R300_TX_MAX_ANISO_2_TO_1;
+	} else {
+		return R300_TX_MAX_ANISO_1_TO_1;
+	}
+	return 0;
+}
+#endif
+
+/**
+ * Set the texture magnification and minification modes.
+ *
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ * \param anisotropy Maximum anisotropy level
+ */
+static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+{
+	/* Force revalidation to account for switches from/to mipmapping. */
+	t->validated = GL_FALSE;
+
+	/* Note that EXT_texture_filter_anisotropic is extremely vague about
+	 * how anisotropic filtering interacts with the "normal" filter modes.
+	 * When anisotropic filtering is enabled, we override min and mag
+	 * filter settings completely. This includes driconf's settings.
+	 */
+	if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+		/*t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
+			| R300_TX_MIN_FILTER_ANISO
+			| R300_TX_MIN_FILTER_MIP_LINEAR
+			| aniso_filter(anisotropy);*/
+		radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "Using maximum anisotropy of %f\n", anisotropy);
+		return;
+	}
+
+	switch (minf) {
+	case GL_NEAREST:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+			 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None,
+			 MIP_FILTER_shift, MIP_FILTER_mask);
+		break;
+	case GL_LINEAR:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+			 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None,
+			 MIP_FILTER_shift, MIP_FILTER_mask);
+		break;
+	case GL_NEAREST_MIPMAP_NEAREST:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+			 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point,
+			 MIP_FILTER_shift, MIP_FILTER_mask);
+		break;
+	case GL_NEAREST_MIPMAP_LINEAR:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+			 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear,
+			 MIP_FILTER_shift, MIP_FILTER_mask);
+		break;
+	case GL_LINEAR_MIPMAP_NEAREST:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+			 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point,
+			 MIP_FILTER_shift, MIP_FILTER_mask);
+		break;
+	case GL_LINEAR_MIPMAP_LINEAR:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+			 XY_MIN_FILTER_shift, XY_MIN_FILTER_mask);
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear,
+			 MIP_FILTER_shift, MIP_FILTER_mask);
+		break;
+	}
+
+	/* Note we don't have 3D mipmaps so only use the mag filter setting
+	 * to set the 3D texture filter mode.
+	 */
+	switch (magf) {
+	case GL_NEAREST:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+			 XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);
+		break;
+	case GL_LINEAR:
+		SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+			 XY_MAG_FILTER_shift, XY_MAG_FILTER_mask);
+		break;
+	}
+}
+
+static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
+{
+	t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3]));
+	t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2]));
+	t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1]));
+	t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0]));
+        SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER,
+		 BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask);
+}
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void r600TexParameter(GLcontext * ctx, GLenum target,
+			     struct gl_texture_object *texObj,
+			     GLenum pname, const GLfloat * params)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	GLenum baseFormat;
+
+	radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s( %s )\n", __FUNCTION__,
+			_mesa_lookup_enum_by_nr(pname));
+
+	switch (pname) {
+	case GL_TEXTURE_MIN_FILTER:
+	case GL_TEXTURE_MAG_FILTER:
+	case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+		r600SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+		break;
+
+	case GL_TEXTURE_WRAP_S:
+	case GL_TEXTURE_WRAP_T:
+	case GL_TEXTURE_WRAP_R:
+		r600UpdateTexWrap(t);
+		break;
+
+	case GL_TEXTURE_BORDER_COLOR:
+		r600SetTexBorderColor(t, texObj->BorderColor.f);
+		break;
+
+	case GL_TEXTURE_BASE_LEVEL:
+	case GL_TEXTURE_MAX_LEVEL:
+	case GL_TEXTURE_MIN_LOD:
+	case GL_TEXTURE_MAX_LOD:
+		t->validated = GL_FALSE;
+		break;
+
+	case GL_DEPTH_TEXTURE_MODE:
+		if (!texObj->Image[0][texObj->BaseLevel])
+			return;
+		baseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+		if (baseFormat == GL_DEPTH_COMPONENT ||
+		    baseFormat == GL_DEPTH_STENCIL) {
+			r600SetDepthTexMode(texObj);
+			break;
+		} else {
+			/* If the texture isn't a depth texture, changing this
+			 * state won't cause any changes to the hardware.
+			 * Don't force a flush of texture state.
+			 */
+			return;
+		}
+
+	default:
+		return;
+	}
+}
+
+static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+	context_t* rmesa = R700_CONTEXT(ctx);
+	radeonTexObj* t = radeon_tex_obj(texObj);
+
+	radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+		"%s( %p (target = %s) )\n", __FUNCTION__,
+			(void *)texObj,
+			_mesa_lookup_enum_by_nr(texObj->Target));
+
+	if (rmesa) {
+		int i;
+		radeon_firevertices(&rmesa->radeon);
+
+		for(i = 0; i < R700_MAX_TEXTURE_UNITS; ++i)
+			if (rmesa->hw.textures[i] == t)
+				rmesa->hw.textures[i] = 0;
+	}
+
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+
+	radeon_miptree_unreference(&t->mt);
+
+	_mesa_delete_texture_object(ctx, texObj);
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx,
+						      GLuint name,
+						      GLenum target)
+{
+	context_t* rmesa = R700_CONTEXT(ctx);
+	radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+
+	radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+		"%s( %p (target = %s) )\n", __FUNCTION__,
+			t, _mesa_lookup_enum_by_nr(target));
+
+	_mesa_initialize_texture_object(&t->base, name, target);
+	t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+	/* Initialize hardware state */
+	r600SetTexDefaultState(t);
+	r600UpdateTexWrap(t);
+	r600SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
+	r600SetTexBorderColor(t, t->base.BorderColor.f);
+
+	return &t->base;
+}
+
+unsigned r600IsFormatRenderable(gl_format mesa_format)
+{
+	switch (mesa_format) {
+	case MESA_FORMAT_RGBA8888:
+	case MESA_FORMAT_SIGNED_RGBA8888:
+	case MESA_FORMAT_RGBA8888_REV:
+	case MESA_FORMAT_SIGNED_RGBA8888_REV:
+	case MESA_FORMAT_ARGB8888:
+	case MESA_FORMAT_XRGB8888:
+	case MESA_FORMAT_ARGB8888_REV:
+	case MESA_FORMAT_XRGB8888_REV:
+	case MESA_FORMAT_RGB565:
+	case MESA_FORMAT_RGB565_REV:
+	case MESA_FORMAT_ARGB4444:
+	case MESA_FORMAT_ARGB4444_REV:
+	case MESA_FORMAT_ARGB1555:
+	case MESA_FORMAT_ARGB1555_REV:
+	case MESA_FORMAT_AL88:
+	case MESA_FORMAT_AL88_REV:
+	case MESA_FORMAT_RGB332:
+	case MESA_FORMAT_A8:
+	case MESA_FORMAT_I8:
+	case MESA_FORMAT_CI8:
+	case MESA_FORMAT_L8:
+	case MESA_FORMAT_RGBA_FLOAT32:
+	case MESA_FORMAT_RGBA_FLOAT16:
+	case MESA_FORMAT_ALPHA_FLOAT32:
+	case MESA_FORMAT_ALPHA_FLOAT16:
+	case MESA_FORMAT_LUMINANCE_FLOAT32:
+	case MESA_FORMAT_LUMINANCE_FLOAT16:
+	case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+	case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+	case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+	case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+	case MESA_FORMAT_X8_Z24:
+	case MESA_FORMAT_S8_Z24:
+	case MESA_FORMAT_Z24_S8:
+	case MESA_FORMAT_Z16:
+	case MESA_FORMAT_Z32:
+	case MESA_FORMAT_SRGBA8:
+	case MESA_FORMAT_SLA8:
+	case MESA_FORMAT_SL8:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+	/* Note: we only plug in the functions we implement in the driver
+	 * since _mesa_init_driver_functions() was already called.
+	 */
+	functions->NewTextureImage = radeonNewTextureImage;
+	functions->FreeTexImageData = radeonFreeTexImageData;
+	functions->MapTexture = radeonMapTexture;
+	functions->UnmapTexture = radeonUnmapTexture;
+
+	functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+	functions->TexImage1D = radeonTexImage1D;
+	functions->TexImage2D = radeonTexImage2D;
+	functions->TexImage3D = radeonTexImage3D;
+	functions->TexSubImage1D = radeonTexSubImage1D;
+	functions->TexSubImage2D = radeonTexSubImage2D;
+	functions->TexSubImage3D = radeonTexSubImage3D;
+	functions->GetTexImage = radeonGetTexImage;
+	functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+	functions->NewTextureObject = r600NewTextureObject;
+	functions->DeleteTexture = r600DeleteTexture;
+	functions->IsTextureResident = driIsTextureResident;
+
+	functions->TexParameter = r600TexParameter;
+
+	functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+	functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+
+	if (radeon->radeonScreen->kernel_mm) {
+		functions->CopyTexImage2D = radeonCopyTexImage2D;
+		functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+	}
+
+	functions->GenerateMipmap = radeonGenerateMipmap;
+
+	driInitTextureFormats();
+}
diff --git a/src/mesa/drivers/dri/r600/r600_tex.h b/src/mesa/drivers/dri/r600/r600_tex.h
new file mode 100644
index 0000000000..771affdfa6
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_tex.h
@@ -0,0 +1,65 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __r600_TEX_H__
+#define __r600_TEX_H__
+
+/* TODO : review this after texture load code. */
+#define R700_BLIT_WIDTH_BYTES 1024
+/* The BASE_ADDRESS and MIP_ADDRESS fields are 256-byte-aligned */
+#define R700_TEXTURE_ALIGNMENT_MASK     0x255
+/* Texel pitch is 8 alignment. */
+#define R700_TEXEL_PITCH_ALIGNMENT_MASK 0x7
+
+#define R700_MAX_TEXTURE_UNITS 16
+
+extern void r600SetDepthTexMode(struct gl_texture_object *tObj);
+
+extern void r600SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+			     __DRIdrawable *dPriv);
+
+extern void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+			      GLint format, __DRIdrawable *dPriv);
+
+extern void r600SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+			     unsigned long long offset, GLint depth,
+			     GLuint pitch);
+
+extern GLboolean r600ValidateBuffers(GLcontext * ctx);
+
+extern void r600InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions);
+
+unsigned r600IsFormatRenderable(gl_format mesa_format);
+
+#endif				/* __r600_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
new file mode 100644
index 0000000000..1600033b9b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -0,0 +1,1116 @@
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \todo Enable R300 texture tiling code?
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+
+#include "r600_context.h"
+#include "radeon_mipmap_tree.h"
+#include "r600_tex.h"
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+
+void r600UpdateTextureState(GLcontext * ctx);
+
+void r600UpdateTextureState(GLcontext * ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	struct gl_texture_unit *texUnit;
+	struct radeon_tex_obj *t;
+	GLuint    unit;
+
+	R600_STATECHANGE(context, tx);
+	R600_STATECHANGE(context, tx_smplr);
+	R600_STATECHANGE(context, tx_brdr_clr);
+
+	for (unit = 0; unit < R700_MAX_TEXTURE_UNITS; unit++) {
+		texUnit = &ctx->Texture.Unit[unit];
+		t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+		r700->textures[unit] = NULL;
+		if (texUnit->_ReallyEnabled) {
+			if (!t)
+				continue;
+			r700->textures[unit] = t;
+		}
+	}
+}
+
+static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa_format)
+{
+	radeonTexObj *t = radeon_tex_obj(tObj);
+
+	CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+	CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+	CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+	CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+	CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+
+	SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+		 FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+	SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+		 FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+	SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+		 FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+	SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+		 FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+
+	CLEARbit(t->SQ_TEX_RESOURCE0, TILE_TYPE_bit);
+	SETfield(t->SQ_TEX_RESOURCE0, ARRAY_LINEAR_GENERAL,
+		 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+		 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+
+	switch (mesa_format) /* This is mesa format. */
+	{
+	case MESA_FORMAT_RGBA8888:
+	case MESA_FORMAT_SIGNED_RGBA8888:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) {
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+		}
+		break;
+	case MESA_FORMAT_RGBA8888_REV:
+	case MESA_FORMAT_SIGNED_RGBA8888_REV:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) {
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+				 FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+		}
+		break;
+	case MESA_FORMAT_ARGB8888:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_XRGB8888:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_XRGB8888_REV:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_ARGB8888_REV:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_RGB888:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_RGB565:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_RGB565_REV:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_ARGB4444:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_ARGB4444_REV:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_4_4_4_4,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_ARGB1555:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_ARGB1555_REV:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_1_5_5_5,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_AL88:
+	case MESA_FORMAT_AL88_REV: /* TODO : Check this. */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_RGB332:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_3_3_2,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_L8: /* X, X, X, ONE */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_I8: /* X, X, X, X */
+	case MESA_FORMAT_CI8:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+		/* YUV422 TODO conversion */  /* X, Y, Z, ONE, G8R8_G8B8 */
+		/*
+		  case MESA_FORMAT_YCBCR:
+		  t->SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ;
+		  break;
+		*/
+		/* VUY422 TODO conversion */  /* X, Y, Z, ONE, G8R8_G8B8 */
+		/*
+		  case MESA_FORMAT_YCBCR_REV:
+		  t->SQ_TEX_RESOURCE1.bitfields.DATA_FORMAT = ;
+		  break;
+		*/
+	case MESA_FORMAT_RGB_DXT1: /* not supported yet */
+	case MESA_FORMAT_RGBA_DXT1: /* not supported yet */
+	case MESA_FORMAT_RGBA_DXT3: /* not supported yet */
+	case MESA_FORMAT_RGBA_DXT5: /* not supported yet */
+	        return GL_FALSE;
+
+	case MESA_FORMAT_RGBA_FLOAT32:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_32_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_RGBA_FLOAT16:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_16_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_32_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_RGB_FLOAT16:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_16_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_32_32_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_16_16_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_32_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_16_FLOAT,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		break;
+	case MESA_FORMAT_Z16:
+	case MESA_FORMAT_X8_Z24:
+	case MESA_FORMAT_S8_Z24:
+	case MESA_FORMAT_Z24_S8:
+	case MESA_FORMAT_Z32:
+	case MESA_FORMAT_S8:
+		SETbit(t->SQ_TEX_RESOURCE0, TILE_TYPE_bit);
+		SETfield(t->SQ_TEX_RESOURCE0, ARRAY_1D_TILED_THIN1,
+			 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_shift,
+			 SQ_TEX_RESOURCE_WORD0_0__TILE_MODE_mask);
+		switch (mesa_format) {
+		case MESA_FORMAT_Z16:
+			SETfield(t->SQ_TEX_RESOURCE1, FMT_16,
+				 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+			break;
+		case MESA_FORMAT_X8_Z24:
+		case MESA_FORMAT_S8_Z24:
+			SETfield(t->SQ_TEX_RESOURCE1, FMT_8_24,
+				 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+			break;
+		case MESA_FORMAT_Z24_S8:
+			SETfield(t->SQ_TEX_RESOURCE1, FMT_24_8,
+				 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+			break;
+		case MESA_FORMAT_Z32:
+			SETfield(t->SQ_TEX_RESOURCE1, FMT_32,
+				 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+			break;
+		case MESA_FORMAT_S8:
+			SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+				 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+			break;
+		default:
+			break;
+		};
+		switch (tObj->DepthMode) {
+		case GL_LUMINANCE:  /* X, X, X, ONE */
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+			break;
+		case GL_INTENSITY:  /* X, X, X, X */
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+			break;
+		case GL_ALPHA:     /* ZERO, ZERO, ZERO, X */
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+			break;
+		default:
+			return GL_FALSE;
+		}
+		break;
+	/* EXT_texture_sRGB */
+	case MESA_FORMAT_SRGBA8:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+		break;
+	case MESA_FORMAT_SLA8:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+		break;
+	case MESA_FORMAT_SL8: /* X, X, X, ONE */
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+		break;
+	default:
+		/* Not supported format */
+		return GL_FALSE;
+	};
+
+	return GL_TRUE;
+}
+
+static GLuint r600_translate_shadow_func(GLenum func)
+{
+   switch (func) {
+   case GL_NEVER:
+      return SQ_TEX_DEPTH_COMPARE_NEVER;
+   case GL_LESS:
+      return SQ_TEX_DEPTH_COMPARE_LESS;
+   case GL_LEQUAL:
+      return SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
+   case GL_GREATER:
+      return SQ_TEX_DEPTH_COMPARE_GREATER;
+   case GL_GEQUAL:
+      return SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
+   case GL_NOTEQUAL:
+      return SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
+   case GL_EQUAL:
+      return SQ_TEX_DEPTH_COMPARE_EQUAL;
+   case GL_ALWAYS:
+      return SQ_TEX_DEPTH_COMPARE_ALWAYS;
+   default:
+      WARN_ONCE("Unknown shadow compare function! %d", func);
+      return 0;
+   }
+}
+
+static INLINE uint32_t
+S_FIXED(float value, uint32_t frac_bits)
+{
+   return value * (1 << frac_bits);
+}
+
+void r600SetDepthTexMode(struct gl_texture_object *tObj)
+{
+	radeonTexObjPtr t;
+
+	if (!tObj)
+		return;
+
+	t = radeon_tex_obj(tObj);
+
+	if(!r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat))
+	  t->validated = GL_FALSE;
+}
+
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static GLboolean setup_hardware_state(GLcontext * ctx, struct gl_texture_object *texObj, int unit)
+{
+	context_t *rmesa = R700_CONTEXT(ctx);
+	radeonTexObj *t = radeon_tex_obj(texObj);
+	const struct gl_texture_image *firstImage;
+	GLuint uTexelPitch, row_align;
+
+	if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled &&
+	    t->image_override &&
+	    t->bo)
+		return GL_TRUE;
+
+	firstImage = t->base.Image[0][t->minLod];
+
+	if (!t->image_override) {
+		if (!r600GetTexFormat(texObj, firstImage->TexFormat)) {
+			radeon_warning("unsupported texture format in %s\n",
+				       __FUNCTION__);
+			return GL_FALSE;
+		}
+	}
+
+	switch (texObj->Target) {
+        case GL_TEXTURE_1D:
+		SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_1D, DIM_shift, DIM_mask);
+		SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+		break;
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_RECTANGLE_NV:
+		SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D, DIM_shift, DIM_mask);
+		SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+		break;
+        case GL_TEXTURE_3D:
+		SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_3D, DIM_shift, DIM_mask);
+		SETfield(t->SQ_TEX_RESOURCE1, firstImage->Depth - 1, // ???
+			 TEX_DEPTH_shift, TEX_DEPTH_mask);
+		break;
+        case GL_TEXTURE_CUBE_MAP:
+		SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_CUBEMAP, DIM_shift, DIM_mask);
+		SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask);
+		break;
+        default:
+		radeon_error("unexpected texture target type in %s\n", __FUNCTION__);
+		return GL_FALSE;
+	}
+
+	row_align = rmesa->radeon.texture_row_align - 1;
+	uTexelPitch = (_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align;
+	uTexelPitch = uTexelPitch / _mesa_get_format_bytes(firstImage->TexFormat);
+	uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+		& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+	/* min pitch is 8 */
+	if (uTexelPitch < 8)
+		uTexelPitch = 8;
+
+	SETfield(t->SQ_TEX_RESOURCE0, (uTexelPitch/8)-1, PITCH_shift, PITCH_mask);
+	SETfield(t->SQ_TEX_RESOURCE0, firstImage->Width - 1,
+		 TEX_WIDTH_shift, TEX_WIDTH_mask);
+	SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1,
+		 TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+
+	t->SQ_TEX_RESOURCE2 = get_base_teximage_offset(t) / 256;
+
+	t->SQ_TEX_RESOURCE3 = radeon_miptree_image_offset(t->mt, 0, t->minLod + 1) / 256;
+
+	SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask);
+	SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask);
+
+	SETfield(t->SQ_TEX_SAMPLER1,
+		S_FIXED(CLAMP(t->base.MinLod - t->minLod, 0, 15), 6),
+		MIN_LOD_shift, MIN_LOD_mask);
+	SETfield(t->SQ_TEX_SAMPLER1,
+		S_FIXED(CLAMP(t->base.MaxLod - t->minLod, 0, 15), 6),
+		MAX_LOD_shift, MAX_LOD_mask);
+	SETfield(t->SQ_TEX_SAMPLER1,
+		S_FIXED(CLAMP(ctx->Texture.Unit[unit].LodBias + t->base.LodBias, -16, 16), 6),
+		SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_shift, SQ_TEX_SAMPLER_WORD1_0__LOD_BIAS_mask);
+
+	if(texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB)
+	{
+		SETfield(t->SQ_TEX_SAMPLER0, r600_translate_shadow_func(texObj->CompareFunc), DEPTH_COMPARE_FUNCTION_shift, DEPTH_COMPARE_FUNCTION_mask);
+	}
+	else
+	{
+		CLEARfield(t->SQ_TEX_SAMPLER0, DEPTH_COMPARE_FUNCTION_mask);
+	}
+
+	return GL_TRUE;
+}
+
+/**
+ * Ensure the given texture is ready for rendering.
+ *
+ * Mostly this means populating the texture object's mipmap tree.
+ */
+static GLboolean r600_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj, int unit)
+{
+	radeonTexObj *t = radeon_tex_obj(texObj);
+
+	if (!radeon_validate_texture_miptree(ctx, texObj))
+		return GL_FALSE;
+
+	/* Configure the hardware registers (more precisely, the cached version
+	 * of the hardware registers). */
+	if (!setup_hardware_state(ctx, texObj, unit))
+	        return GL_FALSE;
+
+	t->validated = GL_TRUE;
+	return GL_TRUE;
+}
+
+/**
+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
+ */
+GLboolean r600ValidateBuffers(GLcontext * ctx)
+{
+	context_t *rmesa = R700_CONTEXT(ctx);
+	struct radeon_renderbuffer *rrb;
+	struct radeon_bo *pbo;
+	int i;
+	int ret;
+
+	radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+	rrb = radeon_get_colorbuffer(&rmesa->radeon);
+	/* color buffer */
+	if (rrb && rrb->bo) {
+		radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+						  rrb->bo, 0,
+						  RADEON_GEM_DOMAIN_VRAM);
+	}
+
+	/* depth buffer */
+	rrb = radeon_get_depthbuffer(&rmesa->radeon);
+	if (rrb && rrb->bo) {
+		radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+						  rrb->bo, 0,
+						  RADEON_GEM_DOMAIN_VRAM);
+	}
+	
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+		radeonTexObj *t;
+
+		if (!ctx->Texture.Unit[i]._ReallyEnabled)
+			continue;
+
+		if (!r600_validate_texture(ctx, ctx->Texture.Unit[i]._Current, i)) {
+			radeon_warning("failed to validate texture for unit %d.\n", i);
+		}
+		t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+		if (t->image_override && t->bo)
+			radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+							  t->bo,
+							  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+		else if (t->mt->bo)
+			radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+							  t->mt->bo,
+							  RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+	}
+
+	pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(ctx);
+	if (pbo) {
+		radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+						  RADEON_GEM_DOMAIN_GTT, 0);
+	}
+
+	pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(ctx);
+	if (pbo) {
+		radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+						  RADEON_GEM_DOMAIN_GTT, 0);
+	}
+
+	ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+	if (ret)
+		return GL_FALSE;
+	return GL_TRUE;
+}
+
+void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+		      unsigned long long offset, GLint depth, GLuint pitch)
+{
+	context_t *rmesa = pDRICtx->driverPrivate;
+	struct gl_texture_object *tObj =
+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+	radeonTexObjPtr t = radeon_tex_obj(tObj);
+	const struct gl_texture_image *firstImage;
+	uint32_t pitch_val, size, row_align;
+
+	if (!tObj)
+		return;
+
+	t->image_override = GL_TRUE;
+
+	if (!offset)
+		return;
+
+	firstImage = t->base.Image[0][t->minLod];
+	row_align = rmesa->radeon.texture_row_align - 1;
+	size = ((_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align) * firstImage->Height;
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+	t->bo = radeon_legacy_bo_alloc_fake(rmesa->radeon.radeonScreen->bom, size, offset);
+	t->override_offset = offset;
+	pitch_val = pitch;
+	switch (depth) {
+	case 32:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		pitch_val /= 4;
+		break;
+	case 24:
+	default:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		pitch_val /= 4;
+		break;
+	case 16:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		pitch_val /= 2;
+		break;
+	}
+
+	pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+		& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+	/* min pitch is 8 */
+	if (pitch_val < 8)
+		pitch_val = 8;
+
+	SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask);
+}
+
+void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv)
+{
+	struct gl_texture_unit *texUnit;
+	struct gl_texture_object *texObj;
+	struct gl_texture_image *texImage;
+	struct radeon_renderbuffer *rb;
+	radeon_texture_image *rImage;
+	radeonContextPtr radeon;
+	context_t *rmesa;
+	struct radeon_framebuffer *rfb;
+	radeonTexObjPtr t;
+	uint32_t pitch_val;
+	uint32_t internalFormat, type, format;
+
+	type = GL_BGRA;
+	format = GL_UNSIGNED_BYTE;
+	internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+
+	radeon = pDRICtx->driverPrivate;
+	rmesa = pDRICtx->driverPrivate;
+
+	rfb = dPriv->driverPrivate;
+        texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+	rImage = get_radeon_texture_image(texImage);
+	t = radeon_tex_obj(texObj);
+        if (t == NULL) {
+    	    return;
+    	}
+
+	radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+	rb = rfb->color_rb[0];
+	if (rb->bo == NULL) {
+		/* Failed to BO for the buffer */
+		return;
+	}
+
+	_mesa_lock_texture(radeon->glCtx, texObj);
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+	if (rImage->bo) {
+		radeon_bo_unref(rImage->bo);
+		rImage->bo = NULL;
+	}
+
+	radeon_miptree_unreference(&t->mt);
+	radeon_miptree_unreference(&rImage->mt);
+
+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+				   rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+	texImage->RowStride = rb->pitch / rb->cpp;
+
+	rImage->bo = rb->bo;
+	radeon_bo_ref(rImage->bo);
+	t->bo = rb->bo;
+	radeon_bo_ref(t->bo);
+	t->image_override = GL_TRUE;
+	t->override_offset = 0;
+	pitch_val = rb->pitch;
+	switch (rb->cpp) {
+	case 4:
+		if (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+			SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+				 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		} else {
+			SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+				 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+			SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+				 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		}
+		pitch_val /= 4;
+		break;
+	case 3:
+	default:
+		// FMT_8_8_8 ???
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		pitch_val /= 4;
+		break;
+	case 2:
+		SETfield(t->SQ_TEX_RESOURCE1, FMT_5_6_5,
+			 SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
+
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+		SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+			 SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+		pitch_val /= 2;
+		break;
+	}
+
+	pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+		& ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+	/* min pitch is 8 */
+	if (pitch_val < 8)
+		pitch_val = 8;
+
+	SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1, PITCH_shift, PITCH_mask);
+	SETfield(t->SQ_TEX_RESOURCE0, rb->base.Width - 1,
+		 TEX_WIDTH_shift, TEX_WIDTH_mask);
+	SETfield(t->SQ_TEX_RESOURCE1, rb->base.Height - 1,
+		 TEX_HEIGHT_shift, TEX_HEIGHT_mask);
+
+	t->validated = GL_TRUE;
+	_mesa_unlock_texture(radeon->glCtx, texObj);
+	return;
+}
+
+void r600SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+        r600SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
new file mode 100644
index 0000000000..de5c5d89fe
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -0,0 +1,6668 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "shader/prog_parameter.h"
+
+#include "radeon_debug.h"
+#include "r600_context.h"
+
+#include "r700_assembler.h"
+
+#define USE_CF_FOR_CONTINUE_BREAK 1
+#define USE_CF_FOR_POP_AFTER      1
+
+struct prog_instruction noise1_insts[12] = { 
+    {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, 
+    {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, 
+    {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
+};
+float noise1_const[2][4] = {
+    {0.300000f, 0.900000f, 0.500000f, 0.300000f}
+};
+
+COMPILED_SUB noise1_presub = {
+    &(noise1_insts[0]),
+    12, 
+    2, 
+    1, 
+    0, 
+    &(noise1_const[0]), 
+    SWIZZLE_X, 
+    SWIZZLE_X, 
+    SWIZZLE_X, 
+    SWIZZLE_X,
+    {0,0,0},
+    0 
+};
+
+BITS addrmode_PVSDST(PVSDST * pPVSDST)
+{
+	return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
+}
+
+void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode) 
+{
+	pPVSDST->addrmode0 = addrmode & 1;
+	pPVSDST->addrmode1 = (addrmode >> 1) & 1;
+}
+
+void nomask_PVSDST(PVSDST * pPVSDST) 
+{
+	pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
+}
+
+BITS addrmode_PVSSRC(PVSSRC* pPVSSRC) 
+{
+	return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
+}
+
+void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode) 
+{
+	pPVSSRC->addrmode0 = addrmode & 1;
+	pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
+}
+
+
+void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz) 
+{
+	pPVSSRC->swizzlex = 
+	pPVSSRC->swizzley = 
+	pPVSSRC->swizzlez = 
+	pPVSSRC->swizzlew = swz;
+}
+
+void noswizzle_PVSSRC(PVSSRC* pPVSSRC) 
+{
+	pPVSSRC->swizzlex = SQ_SEL_X;
+	pPVSSRC->swizzley = SQ_SEL_Y;
+	pPVSSRC->swizzlez = SQ_SEL_Z;
+	pPVSSRC->swizzlew = SQ_SEL_W;
+}
+
+void
+swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
+{
+    switch (x) 
+    {
+        case SQ_SEL_X: x = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: x = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: x = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: x = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    switch (y) 
+    {
+        case SQ_SEL_X: y = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: y = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: y = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: y = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    switch (z) 
+    {
+        case SQ_SEL_X: z = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: z = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: z = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: z = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    switch (w) 
+    {
+        case SQ_SEL_X: w = pPVSSRC->swizzlex; 
+            break;
+        case SQ_SEL_Y: w = pPVSSRC->swizzley; 
+            break;
+        case SQ_SEL_Z: w = pPVSSRC->swizzlez; 
+            break;
+        case SQ_SEL_W: w = pPVSSRC->swizzlew; 
+            break;
+        default:;
+    }
+
+    pPVSSRC->swizzlex = x;
+    pPVSSRC->swizzley = y;
+    pPVSSRC->swizzlez = z;
+    pPVSSRC->swizzlew = w;
+}
+
+void neg_PVSSRC(PVSSRC* pPVSSRC) 
+{
+	pPVSSRC->negx = 1;
+	pPVSSRC->negy = 1;
+	pPVSSRC->negz = 1;
+	pPVSSRC->negw = 1;
+}
+
+void noneg_PVSSRC(PVSSRC* pPVSSRC) 
+{
+	pPVSSRC->negx = 0;
+	pPVSSRC->negy = 0;
+	pPVSSRC->negz = 0;
+	pPVSSRC->negw = 0;
+}
+
+// negate argument (for SUB instead of ADD and alike)
+void flipneg_PVSSRC(PVSSRC* pPVSSRC) 
+{
+	pPVSSRC->negx = !pPVSSRC->negx;
+	pPVSSRC->negy = !pPVSSRC->negy;
+	pPVSSRC->negz = !pPVSSRC->negz;
+	pPVSSRC->negw = !pPVSSRC->negw;
+}
+
+void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
+{
+	switch (c) 
+	{
+		case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
+		case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
+		case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
+		case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
+		default:;
+	} 
+}
+
+void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c) 
+{
+	switch (c) 
+	{
+		case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
+		case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
+		case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
+		case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
+		default:;
+	} 
+}
+
+BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)  
+{
+	  return (pOutVTXFmt0->point_size            |
+			  pOutVTXFmt0->edge_flag             |
+			  pOutVTXFmt0->rta_index             |
+			  pOutVTXFmt0->kill_flag             |
+			  pOutVTXFmt0->viewport_index);
+}
+
+BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) 
+{
+	  return (pFPOutFmt->depth            | 
+			  pFPOutFmt->stencil_ref      | 
+			  pFPOutFmt->mask             | 
+			  pFPOutFmt->coverage_to_mask);
+}
+
+GLboolean is_reduction_opcode(PVSDWORD* dest)
+{
+    if (dest->dst.op3 == 0) 
+    {
+        if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) 
+        {
+            return GL_TRUE;
+        }
+    }
+    return GL_FALSE;
+}
+
+GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
+{
+    GLuint format = FMT_INVALID;
+	GLuint uiElemSize = 0;
+
+    switch (eType)
+    {
+        case GL_BYTE:
+        case GL_UNSIGNED_BYTE:
+			uiElemSize = 1;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_8; break;
+                case 2:
+                    format = FMT_8_8; break;
+                case 3:
+                    format = FMT_8_8_8; break;
+                case 4:
+                    format = FMT_8_8_8_8; break;
+                default:
+                    break;
+            }
+            break;
+
+        case GL_UNSIGNED_SHORT:
+        case GL_SHORT:
+			uiElemSize = 2;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_16; break;
+                case 2:
+                    format = FMT_16_16; break;
+                case 3:
+                    format = FMT_16_16_16; break;
+                case 4:
+                    format = FMT_16_16_16_16; break;
+                default:
+                    break;
+            }
+            break;
+
+        case GL_UNSIGNED_INT:
+        case GL_INT:
+			uiElemSize = 4;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_32; break;
+                case 2:
+                    format = FMT_32_32; break;
+                case 3:
+                    format = FMT_32_32_32; break;
+                case 4:
+                    format = FMT_32_32_32_32; break;
+                default:
+                    break;
+            }
+            break;
+
+        case GL_FLOAT:
+			uiElemSize = 4;
+			switch(nChannels)
+            {
+                case 1:
+                    format = FMT_32_FLOAT; break;
+                case 2:
+                    format = FMT_32_32_FLOAT; break;
+                case 3:
+                    format = FMT_32_32_32_FLOAT; break;
+                case 4:
+                    format = FMT_32_32_32_32_FLOAT; break;
+                default:
+                    break;
+            }
+			break;
+        case GL_DOUBLE:
+			uiElemSize = 8;
+            switch(nChannels)
+            {
+                case 1:
+                    format = FMT_32_FLOAT; break;
+                case 2:
+                    format = FMT_32_32_FLOAT; break;
+                case 3:
+                    format = FMT_32_32_32_FLOAT; break;
+                case 4:
+                    format = FMT_32_32_32_32_FLOAT; break;
+                default:
+                    break;
+            }
+            break;
+        default:
+			;
+            //GL_ASSERT_NO_CASE();
+    }
+
+    if(NULL != pClient_size)
+    {
+	    *pClient_size = uiElemSize * nChannels;
+    }
+
+    return(format);
+}
+
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) 
+{
+    if(nIsOp3 > 0)
+    {
+        return 3;
+    }
+
+    switch (opcode)
+    {
+    case SQ_OP2_INST_ADD:
+    case SQ_OP2_INST_KILLE:
+    case SQ_OP2_INST_KILLGT:
+    case SQ_OP2_INST_KILLGE:
+    case SQ_OP2_INST_KILLNE:
+    case SQ_OP2_INST_MUL: 
+    case SQ_OP2_INST_MAX:
+    case SQ_OP2_INST_MIN:
+    //case SQ_OP2_INST_MAX_DX10:
+    //case SQ_OP2_INST_MIN_DX10:
+    case SQ_OP2_INST_SETE: 
+    case SQ_OP2_INST_SETNE:
+    case SQ_OP2_INST_SETGT:
+    case SQ_OP2_INST_SETGE:
+    case SQ_OP2_INST_PRED_SETE:
+    case SQ_OP2_INST_PRED_SETGT:
+    case SQ_OP2_INST_PRED_SETGE:
+    case SQ_OP2_INST_PRED_SETNE:
+    case SQ_OP2_INST_DOT4:
+    case SQ_OP2_INST_DOT4_IEEE:
+    case SQ_OP2_INST_CUBE:
+        return 2;  
+
+    case SQ_OP2_INST_MOV: 
+    case SQ_OP2_INST_MOVA_FLOOR:
+    case SQ_OP2_INST_FRACT:
+    case SQ_OP2_INST_FLOOR:
+    case SQ_OP2_INST_TRUNC:
+    case SQ_OP2_INST_EXP_IEEE:
+    case SQ_OP2_INST_LOG_CLAMPED:
+    case SQ_OP2_INST_LOG_IEEE:
+    case SQ_OP2_INST_RECIP_IEEE:
+    case SQ_OP2_INST_RECIPSQRT_IEEE:
+    case SQ_OP2_INST_FLT_TO_INT:
+    case SQ_OP2_INST_SIN:
+    case SQ_OP2_INST_COS:
+        return 1;
+        
+    default: radeon_error(
+		    "Need instruction operand number for %x.\n", opcode); 
+    };
+
+    return 3;
+}
+
+int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
+{
+    GLuint i;
+
+    Init_R700_Shader(pShader);
+    pAsm->pR700Shader = pShader;
+    pAsm->currentShaderType = spt;
+
+    pAsm->cf_last_export_ptr   = NULL;
+
+    pAsm->cf_current_export_clause_ptr = NULL;
+    pAsm->cf_current_alu_clause_ptr    = NULL;
+    pAsm->cf_current_tex_clause_ptr    = NULL;
+    pAsm->cf_current_vtx_clause_ptr    = NULL;
+    pAsm->cf_current_cf_clause_ptr     = NULL;
+
+    // No clause has been created yet
+    pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+
+    pAsm->number_of_colorandz_exports = 0;
+    pAsm->number_of_exports           = 0;
+    pAsm->number_of_export_opcodes    = 0;
+
+    pAsm->alu_x_opcode = 0;
+
+    pAsm->D2.bits = 0;
+
+    pAsm->D.bits = 0;
+    pAsm->S[0].bits = 0;
+    pAsm->S[1].bits = 0;
+    pAsm->S[2].bits = 0;
+
+    pAsm->uLastPosUpdate = 0; 
+	
+    *(BITS *) &pAsm->fp_stOutFmt0 = 0;
+
+    pAsm->uIIns = 0;
+    pAsm->uOIns = 0;
+    pAsm->number_used_registers = 0;
+    pAsm->uUsedConsts = 256; 
+
+
+    // Fragment programs
+    pAsm->uBoolConsts = 0;
+    pAsm->uIntConsts = 0;
+    pAsm->uInsts = 0;
+    pAsm->uConsts = 0;
+
+    pAsm->FCSP = 0;
+    pAsm->fc_stack[0].type = FC_NONE;
+
+    pAsm->aArgSubst[0] =
+    pAsm->aArgSubst[1] =
+    pAsm->aArgSubst[2] =
+    pAsm->aArgSubst[3] = (-1);
+
+    pAsm->uOutputs = 0;
+
+    for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) 
+    {
+        pAsm->color_export_register_number[i] = (-1);
+    }
+
+
+    pAsm->depth_export_register_number = (-1);
+    pAsm->stencil_export_register_number = (-1);
+    pAsm->coverage_to_mask_export_register_number = (-1);
+    pAsm->mask_export_register_number = (-1);
+
+    pAsm->starting_export_register_number = 0;
+    pAsm->starting_vfetch_register_number = 0;
+    pAsm->starting_temp_register_number   = 0;
+    pAsm->uFirstHelpReg = 0;
+
+    pAsm->input_position_is_used = GL_FALSE;
+    pAsm->input_normal_is_used   = GL_FALSE;
+
+    for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) 
+    {
+        pAsm->input_color_is_used[ i ] = GL_FALSE;
+    }
+
+    for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) 
+    {
+        pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
+    }
+
+    for (i=0; i<VERT_ATTRIB_MAX; i++) 
+    {
+        pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+    }
+
+    pAsm->number_of_inputs = 0;
+
+    pAsm->is_tex = GL_FALSE;
+    pAsm->need_tex_barrier = GL_FALSE;
+
+    pAsm->subs              = NULL;
+    pAsm->unSubArraySize    = 0;
+    pAsm->unSubArrayPointer = 0;
+    pAsm->callers              = NULL;
+    pAsm->unCallerArraySize    = 0;
+    pAsm->unCallerArrayPointer = 0;
+
+    pAsm->CALLSP = 0;
+    pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
+    pAsm->CALLSTACK[0].plstCFInstructions_local
+          = &(pAsm->pR700Shader->lstCFInstructions);
+
+    pAsm->CALLSTACK[0].max = 0;
+    pAsm->CALLSTACK[0].current = 0;
+
+    SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
+
+    pAsm->unCFflags = 0;
+
+    pAsm->presubs           = NULL;
+    pAsm->unPresubArraySize = 0;
+    pAsm->unNumPresub       = 0;
+    pAsm->unCurNumILInsts   = 0;
+
+    pAsm->unVetTexBits      = 0;
+
+    return 0;
+}
+
+GLboolean IsTex(gl_inst_opcode Opcode)
+{
+    if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
+        (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
+    {
+        return GL_TRUE;
+    }
+    return GL_FALSE;
+}
+
+GLboolean IsAlu(gl_inst_opcode Opcode)
+{
+    //TODO : more for fc and ex for higher spec.
+    if( IsTex(Opcode) )
+    {
+        return GL_FALSE;
+    }
+    return GL_TRUE;
+}
+
+int check_current_clause(r700_AssemblerBase* pAsm,
+					     CF_CLAUSE_TYPE      new_clause_type)
+{
+	if (pAsm->cf_current_clause_type != new_clause_type) 
+	{	//Close last open clause
+		switch (pAsm->cf_current_clause_type) 
+		{
+		case CF_ALU_CLAUSE:
+			if ( pAsm->cf_current_alu_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_alu_clause_ptr = NULL;
+            }
+			break;
+		case CF_VTX_CLAUSE:
+			if ( pAsm->cf_current_vtx_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_vtx_clause_ptr = NULL;
+            }
+			break;
+		case CF_TEX_CLAUSE:
+			if ( pAsm->cf_current_tex_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_tex_clause_ptr = NULL;
+            }
+			break;
+		case CF_EXPORT_CLAUSE:
+			if ( pAsm->cf_current_export_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_export_clause_ptr = NULL;
+            }
+			break;
+		case CF_OTHER_CLAUSE:
+			if ( pAsm->cf_current_cf_clause_ptr != NULL) 
+            {
+                pAsm->cf_current_cf_clause_ptr = NULL;
+            }
+			break;
+		case CF_EMPTY_CLAUSE:
+			break;
+		default:
+            radeon_error(
+                       "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
+			return GL_FALSE;
+		}
+
+        pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+
+		// Create new clause
+        switch (new_clause_type) 
+	    {
+        case CF_ALU_CLAUSE:
+            pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
+            break;
+        case CF_VTX_CLAUSE:
+            pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
+            break;
+        case CF_TEX_CLAUSE:        
+            pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
+            break;
+        case CF_EXPORT_CLAUSE:
+            {
+                R700ControlFlowSXClause* pR700ControlFlowSXClause 
+                            = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause); 
+            
+                // Add new export instruction to control flow program        
+                if (pR700ControlFlowSXClause != 0) 
+                {
+                    pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
+                    Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
+                    AddCFInstruction( pAsm->pR700Shader, 
+                                      (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
+                }
+                else 
+                {
+                    radeon_error(
+                               "Error allocating new EXPORT CF instruction in check_current_clause. \n");
+                    return GL_FALSE;
+                }
+                pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
+            }
+            break;
+        case CF_EMPTY_CLAUSE:
+            break;
+        case CF_OTHER_CLAUSE:
+            pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
+            break;
+        default:
+            radeon_error(
+                       "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
+{
+    if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->cf_current_cf_clause_ptr = 
+      (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+    if (pAsm->cf_current_cf_clause_ptr != NULL) 
+	{
+		Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
+		AddCFInstruction( pAsm->pR700Shader, 
+                          (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
+	}
+	else 
+	{
+        radeon_error("Could not allocate a new VFetch CF instruction.\n");
+		return GL_FALSE;
+	}
+
+    return GL_TRUE;
+}
+
+GLboolean add_vfetch_instruction(r700_AssemblerBase*     pAsm,
+								 R700VertexInstruction*  vertex_instruction_ptr)
+{
+	if( GL_FALSE == check_current_clause(pAsm,  CF_VTX_CLAUSE) )
+	{
+		return GL_FALSE;
+	}
+
+    if( pAsm->cf_current_vtx_clause_ptr == NULL ||
+        ( (pAsm->cf_current_vtx_clause_ptr != NULL) && 
+         (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1) 
+        ) ) 
+    { 
+		// Create new Vfetch control flow instruction for this new clause
+		pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+		if (pAsm->cf_current_vtx_clause_ptr != NULL) 
+		{
+			Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
+			AddCFInstruction( pAsm->pR700Shader, 
+                              (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
+		}
+		else 
+		{
+            radeon_error("Could not allocate a new VFetch CF instruction.\n");
+			return GL_FALSE;
+		}
+
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count        = 0x0;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const         = 0x0;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count            = 0x0;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_VTX;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+		LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
+	}
+	else
+	{
+		pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
+	}
+
+	AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
+
+	return GL_TRUE;
+}
+
+GLboolean add_tex_instruction(r700_AssemblerBase*     pAsm,
+                              R700TextureInstruction* tex_instruction_ptr)
+{ 
+    if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( pAsm->cf_current_tex_clause_ptr == NULL ||
+         ( (pAsm->cf_current_tex_clause_ptr != NULL) && 
+           (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1) 
+         ) ) 
+    {
+        // new tex cf instruction for this new clause  
+        pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+		if (pAsm->cf_current_tex_clause_ptr != NULL) 
+		{
+			Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
+			AddCFInstruction( pAsm->pR700Shader, 
+                              (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
+		}
+		else 
+		{
+            radeon_error("Could not allocate a new TEX CF instruction.\n");
+			return GL_FALSE;
+		}
+        
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count        = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const         = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_TEX;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier          = 0x0;   //0x1;
+    }
+    else 
+    {        
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
+    }
+
+    // If this clause constains any TEX instruction that is dependent on a previous instruction, 
+    // set the barrier bit
+    if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
+    {
+        pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;  
+    }
+
+    if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
+    {
+        pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
+        tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
+    }
+
+    AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+								GLuint gl_client_id,
+                                GLuint destination_register,
+								GLuint number_of_elements,
+                                GLenum dataElementType,
+								VTX_FETCH_METHOD* pFetchMethod)
+{
+    GLuint client_size_inbyte;
+	GLuint data_format;
+    GLuint mega_fetch_count;
+	GLuint is_mega_fetch_flag;
+
+	R700VertexGenericFetch*   vfetch_instruction_ptr;
+	R700VertexGenericFetch*   assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
+
+	if (assembled_vfetch_instruction_ptr == NULL) 
+	{
+		vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
+		if (vfetch_instruction_ptr == NULL) 
+		{
+			return GL_FALSE;
+		}
+        Init_R700VertexGenericFetch(vfetch_instruction_ptr);
+    }
+	else 
+	{
+		vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
+	}
+
+	data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
+
+	if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
+	{
+		//TODO : mini fetch
+		mega_fetch_count = 0;
+		is_mega_fetch_flag = 0;
+	}
+	else
+	{
+		mega_fetch_count = MEGA_FETCH_BYTES - 1;
+		is_mega_fetch_flag       = 0x1;
+		pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
+	}
+
+	vfetch_instruction_ptr->m_Word0.f.vtx_inst         = SQ_VTX_INST_FETCH;
+	vfetch_instruction_ptr->m_Word0.f.fetch_type       = SQ_VTX_FETCH_VERTEX_DATA;
+	vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+	vfetch_instruction_ptr->m_Word0.f.buffer_id        = gl_client_id;
+	vfetch_instruction_ptr->m_Word0.f.src_gpr          = 0x0; 
+	vfetch_instruction_ptr->m_Word0.f.src_rel          = SQ_ABSOLUTE;
+	vfetch_instruction_ptr->m_Word0.f.src_sel_x        = SQ_SEL_X;
+	vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
+
+	vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
+	vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+	vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+	vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+	vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
+
+	// Destination register
+	vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; 
+	vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
+
+	vfetch_instruction_ptr->m_Word2.f.offset              = 0;
+	vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
+
+	vfetch_instruction_ptr->m_Word2.f.mega_fetch          = is_mega_fetch_flag;
+
+	if (assembled_vfetch_instruction_ptr == NULL) 
+	{
+		if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) 
+        {   
+			return GL_FALSE;
+		}
+
+		if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL) 
+		{
+			return GL_FALSE;
+		}
+		else 
+		{
+			pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
+		}
+	}
+
+	return GL_TRUE;
+}
+
+GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
+                                       GLuint              destination_register,								       
+                                       GLenum              type,
+                                       GLint               size,
+                                       GLubyte             element,
+                                       GLuint              _signed,
+                                       GLboolean           normalize,
+                                       GLenum              format,
+                                       VTX_FETCH_METHOD  * pFetchMethod)
+{
+    GLuint client_size_inbyte;
+	GLuint data_format;
+    GLuint mega_fetch_count;
+	GLuint is_mega_fetch_flag;
+
+	R700VertexGenericFetch*   vfetch_instruction_ptr;
+	R700VertexGenericFetch*   assembled_vfetch_instruction_ptr 
+                                     = pAsm->vfetch_instruction_ptr_array[element];
+
+	if (assembled_vfetch_instruction_ptr == NULL) 
+	{
+		vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
+		if (vfetch_instruction_ptr == NULL) 
+		{
+			return GL_FALSE;
+		}
+        Init_R700VertexGenericFetch(vfetch_instruction_ptr);
+    }
+	else 
+	{
+		vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
+	}
+
+    data_format = GetSurfaceFormat(type, size, &client_size_inbyte);	
+
+	if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
+	{
+		//TODO : mini fetch
+		mega_fetch_count = 0;
+		is_mega_fetch_flag = 0;
+	}
+	else
+	{
+		mega_fetch_count = MEGA_FETCH_BYTES - 1;
+		is_mega_fetch_flag       = 0x1;
+		pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
+	}
+
+	vfetch_instruction_ptr->m_Word0.f.vtx_inst         = SQ_VTX_INST_FETCH;
+	vfetch_instruction_ptr->m_Word0.f.fetch_type       = SQ_VTX_FETCH_VERTEX_DATA;
+	vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+
+	vfetch_instruction_ptr->m_Word0.f.buffer_id        = element;
+	vfetch_instruction_ptr->m_Word0.f.src_gpr          = 0x0; 
+	vfetch_instruction_ptr->m_Word0.f.src_rel          = SQ_ABSOLUTE;
+	vfetch_instruction_ptr->m_Word0.f.src_sel_x        = SQ_SEL_X;
+	vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
+
+	if(format == GL_BGRA)
+	{
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+	}
+	else
+	{
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_x        = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_y        = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_z        = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+		vfetch_instruction_ptr->m_Word1.f.dst_sel_w        = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+	}
+
+	vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
+    vfetch_instruction_ptr->m_Word1.f.data_format      = data_format;
+    vfetch_instruction_ptr->m_Word2.f.endian_swap      = SQ_ENDIAN_NONE;
+
+    if(1 == _signed)
+    {
+        vfetch_instruction_ptr->m_Word1.f.format_comp_all  = SQ_FORMAT_COMP_SIGNED;
+    }
+    else
+    {
+        vfetch_instruction_ptr->m_Word1.f.format_comp_all  = SQ_FORMAT_COMP_UNSIGNED;
+    }
+
+    if(GL_TRUE == normalize)
+    {
+        vfetch_instruction_ptr->m_Word1.f.num_format_all   = SQ_NUM_FORMAT_NORM;
+    }
+    else
+    {
+        vfetch_instruction_ptr->m_Word1.f.num_format_all   = SQ_NUM_FORMAT_INT;
+    }
+
+	// Destination register
+	vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; 
+	vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
+
+	vfetch_instruction_ptr->m_Word2.f.offset              = 0;
+	vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
+
+	vfetch_instruction_ptr->m_Word2.f.mega_fetch          = is_mega_fetch_flag;
+
+	if (assembled_vfetch_instruction_ptr == NULL) 
+	{
+		if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) 
+        {   
+			return GL_FALSE;
+		}
+
+		if (pAsm->vfetch_instruction_ptr_array[element] != NULL) 
+		{
+			return GL_FALSE;
+		}
+		else 
+		{
+			pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
+		}
+	}
+
+	return GL_TRUE;
+}
+
+GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
+{
+    GLint i;
+    pAsm->cf_current_clause_type    = CF_EMPTY_CLAUSE;
+    pAsm->cf_current_vtx_clause_ptr = NULL;
+
+    for (i=0; i<VERT_ATTRIB_MAX; i++) 
+	{
+		pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+	}
+
+    cleanup_vfetch_shaderinst(pAsm->pR700Shader);
+    
+    return GL_TRUE;
+}
+
+GLuint gethelpr(r700_AssemblerBase* pAsm) 
+{
+    GLuint r = pAsm->uHelpReg;
+    pAsm->uHelpReg++;
+    if (pAsm->uHelpReg > pAsm->number_used_registers)
+    {
+        pAsm->number_used_registers = pAsm->uHelpReg;
+	}
+    return r;
+}
+void resethelpr(r700_AssemblerBase* pAsm) 
+{
+    pAsm->uHelpReg = pAsm->uFirstHelpReg;
+}
+
+void checkop_init(r700_AssemblerBase* pAsm)
+{
+    resethelpr(pAsm);
+    pAsm->aArgSubst[0] =
+    pAsm->aArgSubst[1] =
+    pAsm->aArgSubst[2] =
+    pAsm->aArgSubst[3] = -1;
+}
+
+static GLboolean next_ins(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    if (GL_TRUE == pAsm->is_tex)
+    {
+        if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX)
+        {
+            if (GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE))
+            {
+                radeon_error("Error assembling TEX instruction\n");
+                return GL_FALSE;
+            }
+        }
+        else
+        {
+            if (GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE))
+            {
+                radeon_error("Error assembling TEX instruction\n");
+                return GL_FALSE;
+            }
+        }
+    }
+    else
+    {   //ALU
+        if (GL_FALSE == assemble_alu_instruction(pAsm))
+        {
+            radeon_error("Error assembling ALU instruction\n");
+            return GL_FALSE;
+        }
+    }
+
+    if (pAsm->D.dst.rtype == DST_REG_OUT)
+    {
+        assert(pAsm->D.dst.reg >= pAsm->starting_export_register_number);
+
+        if (pAsm->D.dst.op3)
+        {
+            // There is no mask for OP3 instructions, so all channels are written
+            pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
+        }
+        else
+        {
+            pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
+               |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
+        }
+    }
+
+    //reset for next inst.
+    pAsm->D.bits    = 0;
+    pAsm->D2.bits   = 0;
+    pAsm->S[0].bits = 0;
+    pAsm->S[1].bits = 0;
+    pAsm->S[2].bits = 0;
+    pAsm->is_tex = GL_FALSE;
+    pAsm->need_tex_barrier = GL_FALSE;
+    pAsm->D2.bits = 0;
+    pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
+    return GL_TRUE;
+}
+
+GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
+{
+    GLuint tmp = gethelpr(pAsm);
+
+    //mov src to temp helper gpr.
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+  
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp;
+
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    if( GL_FALSE == assemble_src(pAsm, src, 0) )
+    {
+        return GL_FALSE;
+    }
+
+    noswizzle_PVSSRC(&(pAsm->S[0].src));
+    noneg_PVSSRC(&(pAsm->S[0].src));
+   
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->aArgSubst[1 + src] = tmp;
+
+    return GL_TRUE;
+}
+
+GLboolean checkop1(r700_AssemblerBase* pAsm)
+{
+    checkop_init(pAsm);
+    return GL_TRUE;
+}
+
+GLboolean checkop2(r700_AssemblerBase* pAsm)
+{
+    GLboolean bSrcConst[2];
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    checkop_init(pAsm);
+
+    if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM)     || 
+        (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[0] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[0] = GL_FALSE;
+    }
+    if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM)     || 
+        (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[1] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[1] = GL_FALSE;
+    }
+
+    if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
+    {
+        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
+        {
+            if( GL_FALSE == mov_temp(pAsm, 1) )
+            {
+                return GL_FALSE;
+            }
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean checkop3(r700_AssemblerBase* pAsm)
+{
+    GLboolean bSrcConst[3];
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    checkop_init(pAsm);
+
+    if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM)     || 
+        (pILInst->SrcReg[0].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[0] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[0] = GL_FALSE;
+    }
+    if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM)     || 
+        (pILInst->SrcReg[1].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[1] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[1] = GL_FALSE;
+    }
+    if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM)     || 
+        (pILInst->SrcReg[2].File == PROGRAM_CONSTANT)    ||
+        (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
+        (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM)   ||
+        (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
+    {
+        bSrcConst[2] = GL_TRUE;
+    }
+    else
+    {
+        bSrcConst[2] = GL_FALSE;
+    }
+
+    if( (GL_TRUE == bSrcConst[0]) && 
+        (GL_TRUE == bSrcConst[1]) && 
+        (GL_TRUE == bSrcConst[2]) ) 
+    {
+        if( GL_FALSE == mov_temp(pAsm, 1) )
+        {
+            return GL_FALSE;
+        }
+        if( GL_FALSE == mov_temp(pAsm, 2) )
+        {
+            return GL_FALSE;
+        }
+
+        return GL_TRUE;
+    }
+    else if( (GL_TRUE == bSrcConst[0]) && 
+             (GL_TRUE == bSrcConst[1]) ) 
+    {
+        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)    
+	    {
+            if( GL_FALSE == mov_temp(pAsm, 1) )
+            {
+                return 1;
+            }
+        }
+
+        return GL_TRUE;
+    }
+    else if ( (GL_TRUE == bSrcConst[0]) && 
+              (GL_TRUE == bSrcConst[2]) )  
+    {
+        if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)     
+	    {
+            if( GL_FALSE == mov_temp(pAsm, 2) )
+            {
+                return GL_FALSE;
+            }
+        }
+
+        return GL_TRUE;
+    }
+    else if( (GL_TRUE == bSrcConst[1]) && 
+             (GL_TRUE == bSrcConst[2]) ) 
+    {
+        if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
+	    {
+            if( GL_FALSE == mov_temp(pAsm, 2) )
+            {
+                return GL_FALSE;
+            }
+        }
+
+        return GL_TRUE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_src(r700_AssemblerBase *pAsm,
+                       int src, 
+                       int fld)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    if (fld == -1)
+    {
+        fld = src;
+    }
+
+    if(pAsm->aArgSubst[1+src] >= 0) 
+    {
+        assert(fld >= 0);
+        setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+        pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[fld].src.reg   = pAsm->aArgSubst[1+src];
+    }
+    else 
+    {
+        switch (pILInst->SrcReg[src].File)
+        {
+        case PROGRAM_TEMPORARY:
+            setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+            pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
+            pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
+            break;
+        case PROGRAM_CONSTANT:
+        case PROGRAM_LOCAL_PARAM:
+        case PROGRAM_ENV_PARAM:
+        case PROGRAM_STATE_VAR:
+        case PROGRAM_UNIFORM:
+            if (1 == pILInst->SrcReg[src].RelAddr)
+            {
+                setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
+            }
+            else
+            {
+                setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);              
+            }
+
+            pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
+            if(pILInst->SrcReg[src].Index < 0)
+            {
+                WARN_ONCE("Negative register offsets not supported yet!\n");
+                pAsm->S[fld].src.reg  = 0;
+            } 
+            else
+            {
+                pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+            }
+            break;      
+        case PROGRAM_INPUT:
+            setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); 
+            pAsm->S[fld].src.rtype = SRC_REG_INPUT;
+            switch (pAsm->currentShaderType)
+            {
+            case SPT_FP:
+                pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
+                break;
+            case SPT_VP:
+                pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
+                break;
+            }
+            break;      
+        default:
+            radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
+            return GL_FALSE;
+        }
+    } 
+
+    pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
+    pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
+    pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
+    pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
+
+    pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
+    pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
+    pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
+    pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
+     
+    return GL_TRUE;
+}
+
+GLboolean assemble_dst(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+    switch (pILInst->DstReg.File) 
+    {
+    case PROGRAM_TEMPORARY:
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
+        break;
+    case PROGRAM_ADDRESS:
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_A0;
+        pAsm->D.dst.reg = 0;
+        break;
+    case PROGRAM_OUTPUT:
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_OUT;
+        switch (pAsm->currentShaderType)
+        {
+        case SPT_FP:
+            pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
+            break;
+        case SPT_VP:
+            pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
+            break;
+        }
+        break;   
+    default:
+        radeon_error("Invalid destination output argument type\n");
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
+    pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
+    pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
+    pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+  
+    if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
+    {
+        pAsm->D2.dst2.SaturateMode = 1;
+    }
+    else
+    {
+        pAsm->D2.dst2.SaturateMode = 0;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean tex_dst(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
+    {
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    }
+    else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
+    {
+        pAsm->D.dst.rtype = DST_REG_OUT;
+        switch (pAsm->currentShaderType)
+        {
+        case SPT_FP:
+            pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
+            break;
+        case SPT_VP:
+            pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
+            break;
+        }
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    }
+    else 
+    {
+        radeon_error("Invalid destination output argument type\n");
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
+    pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
+    pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
+    pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+  
+    return GL_TRUE;
+}
+
+GLboolean tex_src(r700_AssemblerBase *pAsm)
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    GLboolean bValidTexCoord = GL_FALSE;
+
+    if(pAsm->aArgSubst[1] >= 0)
+    {
+        bValidTexCoord = GL_TRUE;
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = pAsm->aArgSubst[1];
+    }
+    else
+    {
+    switch (pILInst->SrcReg[0].File) {
+        case PROGRAM_UNIFORM: 
+        case PROGRAM_CONSTANT:
+        case PROGRAM_LOCAL_PARAM:
+        case PROGRAM_ENV_PARAM:
+        case PROGRAM_STATE_VAR:
+            break;
+        case PROGRAM_TEMPORARY:
+            bValidTexCoord = GL_TRUE;
+            pAsm->S[0].src.reg   = pILInst->SrcReg[0].Index +
+            pAsm->starting_temp_register_number;
+            pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+            break;
+        case PROGRAM_INPUT:
+            if(SPT_VP == pAsm->currentShaderType)
+            {
+                switch (pILInst->SrcReg[0].Index)
+                {
+                    case VERT_ATTRIB_TEX0:
+                    case VERT_ATTRIB_TEX1:
+                    case VERT_ATTRIB_TEX2:
+                    case VERT_ATTRIB_TEX3:
+                    case VERT_ATTRIB_TEX4:
+                    case VERT_ATTRIB_TEX5:
+                    case VERT_ATTRIB_TEX6:
+                    case VERT_ATTRIB_TEX7:
+                        bValidTexCoord = GL_TRUE;
+                        pAsm->S[0].src.reg   =
+                            pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
+                        pAsm->S[0].src.rtype = SRC_REG_INPUT;
+                        break;
+                }
+            }
+            else
+            {
+                switch (pILInst->SrcReg[0].Index)
+                {
+                    case FRAG_ATTRIB_WPOS:
+                    case FRAG_ATTRIB_COL0:
+                    case FRAG_ATTRIB_COL1:
+                    case FRAG_ATTRIB_FOGC:
+                    case FRAG_ATTRIB_TEX0:
+                    case FRAG_ATTRIB_TEX1:
+                    case FRAG_ATTRIB_TEX2:
+                    case FRAG_ATTRIB_TEX3:
+                    case FRAG_ATTRIB_TEX4:
+                    case FRAG_ATTRIB_TEX5:
+                    case FRAG_ATTRIB_TEX6:
+                    case FRAG_ATTRIB_TEX7:
+                        bValidTexCoord = GL_TRUE;
+                        pAsm->S[0].src.reg   =
+                            pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+                        pAsm->S[0].src.rtype = SRC_REG_INPUT;
+                        break;
+                    case FRAG_ATTRIB_FACE:
+                        fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+                        break;
+                    case FRAG_ATTRIB_PNTC:
+                        fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+                        break;
+                }
+
+                if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+                    (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+                {
+				    bValidTexCoord = GL_TRUE;
+                    pAsm->S[0].src.reg   =
+                        pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+                    pAsm->S[0].src.rtype = SRC_REG_INPUT;
+                }
+            }
+
+            break;
+        }
+    }
+
+    if(GL_TRUE == bValidTexCoord)
+    {
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    }
+    else
+    {
+        radeon_error("Invalid source texcoord for TEX instruction\n");
+        return GL_FALSE;
+    }
+
+    pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
+    pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
+    pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
+    pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
+
+    pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
+    pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
+    pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
+    pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
+{
+    PVSSRC *   texture_coordinate_source;
+    PVSSRC *   texture_unit_source;
+    
+    R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
+	if (tex_instruction_ptr == NULL) 
+	{
+		return GL_FALSE;
+	}
+    Init_R700TextureInstruction(tex_instruction_ptr);
+
+    texture_coordinate_source = &(pAsm->S[0].src);
+    texture_unit_source       = &(pAsm->S[1].src);
+
+    tex_instruction_ptr->m_Word0.f.tex_inst         = pAsm->D.dst.opcode;
+    tex_instruction_ptr->m_Word0.f.bc_frac_mode     = 0x0;
+    tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+    tex_instruction_ptr->m_Word0.f.alt_const        = 0;
+
+    if(SPT_VP == pAsm->currentShaderType)
+    {
+        tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg + VERT_ATTRIB_MAX;
+        pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+    }
+    else
+    {
+        tex_instruction_ptr->m_Word0.f.resource_id      = texture_unit_source->reg;
+    }
+
+    tex_instruction_ptr->m_Word1.f.lod_bias     = 0x0;
+    if (normalized) {
+	    tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
+	    tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
+	    tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
+	    tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
+    } else {
+	    /* XXX: UNNORMALIZED tex coords have limited wrap modes */
+	    tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
+	    tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
+	    tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
+	    tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
+    }
+
+    tex_instruction_ptr->m_Word2.f.offset_x   = 0x0;
+    tex_instruction_ptr->m_Word2.f.offset_y   = 0x0;
+    tex_instruction_ptr->m_Word2.f.offset_z   = 0x0;
+    tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
+
+    // dst
+    if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
+         (pAsm->D.dst.rtype == DST_REG_OUT) ) 
+    {
+        tex_instruction_ptr->m_Word0.f.src_gpr    = texture_coordinate_source->reg;
+        tex_instruction_ptr->m_Word0.f.src_rel    = SQ_ABSOLUTE;
+
+        tex_instruction_ptr->m_Word1.f.dst_gpr    = pAsm->D.dst.reg;
+        tex_instruction_ptr->m_Word1.f.dst_rel    = SQ_ABSOLUTE;
+
+        tex_instruction_ptr->m_Word1.f.dst_sel_x  = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
+        tex_instruction_ptr->m_Word1.f.dst_sel_y  = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
+        tex_instruction_ptr->m_Word1.f.dst_sel_z  = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
+        tex_instruction_ptr->m_Word1.f.dst_sel_w  = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
+
+
+        tex_instruction_ptr->m_Word2.f.src_sel_x  = texture_coordinate_source->swizzlex;
+        tex_instruction_ptr->m_Word2.f.src_sel_y  = texture_coordinate_source->swizzley;
+        tex_instruction_ptr->m_Word2.f.src_sel_z  = texture_coordinate_source->swizzlez;
+        tex_instruction_ptr->m_Word2.f.src_sel_w  = texture_coordinate_source->swizzlew;
+    }
+    else 
+    {
+        radeon_error("Only temp destination registers supported for TEX dest regs.\n");
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+void initialize(r700_AssemblerBase *pAsm)
+{
+    GLuint cycle, component;
+
+    for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++) 
+    {
+        for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
+        {
+            pAsm->hw_gpr[cycle][component] = (-1);
+        }
+    }
+    for (component=0; component<NUMBER_OF_COMPONENTS; component++) 
+    {
+        pAsm->hw_cfile_addr[component] = (-1);
+        pAsm->hw_cfile_chan[component] = (-1);
+    }
+}
+
+GLboolean assemble_alu_src(R700ALUInstruction*  alu_instruction_ptr,
+                           int                  source_index,
+                           PVSSRC*              pSource,
+                           BITS                 scalar_channel_index)
+{
+    BITS src_sel;
+    BITS src_rel;
+    BITS src_chan;
+    BITS src_neg;
+
+    //--------------------------------------------------------------------------
+    // Source for operands src0, src1. 
+    // Values [0,127] correspond to GPR[0..127]. 
+    // Values [256,511] correspond to cfile constants c[0..255]. 
+
+    //--------------------------------------------------------------------------
+    // Other special values are shown in the list below.
+
+    // 248	SQ_ALU_SRC_0: special constant 0.0.
+    // 249	SQ_ALU_SRC_1: special constant 1.0 float.
+
+    // 250	SQ_ALU_SRC_1_INT: special constant 1 integer.
+    // 251	SQ_ALU_SRC_M_1_INT: special constant -1 integer.
+
+    // 252	SQ_ALU_SRC_0_5: special constant 0.5 float.
+    // 253	SQ_ALU_SRC_LITERAL: literal constant.
+
+    // 254	SQ_ALU_SRC_PV: previous vector result.
+    // 255	SQ_ALU_SRC_PS: previous scalar result.
+    //--------------------------------------------------------------------------
+
+    BITS channel_swizzle;
+    switch (scalar_channel_index) 
+    {
+        case 0: channel_swizzle = pSource->swizzlex; break;
+        case 1: channel_swizzle = pSource->swizzley; break;
+        case 2: channel_swizzle = pSource->swizzlez; break;
+        case 3: channel_swizzle = pSource->swizzlew; break;
+        default: channel_swizzle = SQ_SEL_MASK; break;
+    }
+
+    if(channel_swizzle == SQ_SEL_0) 
+    {
+        src_sel = SQ_ALU_SRC_0; 
+    }
+    else if (channel_swizzle == SQ_SEL_1) 
+    {
+        src_sel = SQ_ALU_SRC_1; 
+    }
+    else 
+    {
+        if ( (pSource->rtype == SRC_REG_TEMPORARY) || 
+             (pSource->rtype == SRC_REG_INPUT)
+        ) 
+        {
+            src_sel = pSource->reg;
+        }
+        else if (pSource->rtype == SRC_REG_CONSTANT)
+        {
+            src_sel = pSource->reg + CFILE_REGISTER_OFFSET;            
+        }
+        else if (pSource->rtype == SRC_REC_LITERAL)
+        {
+            src_sel = SQ_ALU_SRC_LITERAL;            
+        }
+        else
+        {
+            radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
+                     source_index, pSource->rtype);
+            return GL_FALSE;
+        }
+    }
+
+    if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) ) 
+    {
+        src_rel = SQ_ABSOLUTE;
+    }
+    else 
+    {
+        src_rel = SQ_RELATIVE;
+    }
+
+    switch (channel_swizzle) 
+    {
+        case SQ_SEL_X: 
+            src_chan = SQ_CHAN_X; 
+            break;
+        case SQ_SEL_Y: 
+            src_chan = SQ_CHAN_Y; 
+            break;
+        case SQ_SEL_Z: 
+            src_chan = SQ_CHAN_Z; 
+            break;
+        case SQ_SEL_W: 
+            src_chan = SQ_CHAN_W; 
+            break;
+        case SQ_SEL_0:
+        case SQ_SEL_1:
+            // Does not matter since src_sel controls
+            src_chan = SQ_CHAN_X; 
+            break;
+        default:
+            radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
+            return GL_FALSE;
+            break;
+    }
+
+    switch (scalar_channel_index) 
+    {
+        case 0: src_neg = pSource->negx; break;
+        case 1: src_neg = pSource->negy; break;
+        case 2: src_neg = pSource->negz; break;
+        case 3: src_neg = pSource->negw; break;
+        default: src_neg = 0; break;
+    }
+
+    switch (source_index) 
+    {
+        case 0:
+            assert(alu_instruction_ptr);
+            alu_instruction_ptr->m_Word0.f.src0_sel  = src_sel;
+            alu_instruction_ptr->m_Word0.f.src0_rel  = src_rel;
+            alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
+            alu_instruction_ptr->m_Word0.f.src0_neg  = src_neg;
+            break;
+        case 1:
+            assert(alu_instruction_ptr);
+            alu_instruction_ptr->m_Word0.f.src1_sel  = src_sel;
+            alu_instruction_ptr->m_Word0.f.src1_rel  = src_rel;
+            alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
+            alu_instruction_ptr->m_Word0.f.src1_neg  = src_neg;
+            break;
+        case 2:
+            assert(alu_instruction_ptr);
+            alu_instruction_ptr->m_Word1_OP3.f.src2_sel  = src_sel;
+            alu_instruction_ptr->m_Word1_OP3.f.src2_rel  = src_rel;
+            alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
+            alu_instruction_ptr->m_Word1_OP3.f.src2_neg  = src_neg;
+            break;
+        default:
+            radeon_error("Only three sources allowed in ALU opcodes.\n");
+          return GL_FALSE;
+          break;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
+                              R700ALUInstruction* alu_instruction_ptr,
+                              GLuint              contiguous_slots_needed)
+{
+    if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( pAsm->alu_x_opcode != 0 ||
+         pAsm->cf_current_alu_clause_ptr == NULL ||
+         ( (pAsm->cf_current_alu_clause_ptr != NULL) && 
+           (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
+         ) ) 
+    {
+
+        //new cf inst for this clause
+        pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
+            
+        // link the new cf to cf segment    
+        if(NULL != pAsm->cf_current_alu_clause_ptr) 
+        {
+            Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
+			AddCFInstruction( pAsm->pR700Shader, 
+                              (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );            
+        }
+        else 
+        {
+            radeon_error("Could not allocate a new ALU CF instruction.\n");
+            return GL_FALSE;
+        }
+
+        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
+        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
+        pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
+
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
+
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count           = 0x0;
+
+        if(pAsm->alu_x_opcode != 0)
+        {
+            pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
+            pAsm->alu_x_opcode = 0;
+        }
+        else
+        {
+            pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+        }
+
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier         = 0x1;
+    }
+    else 
+    {
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
+    }
+
+    // If this clause constains any instruction that is forward dependent on a TEX instruction, 
+    // set the whole_quad_mode for this clause
+    if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) 
+    {
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;   
+    }
+
+    if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) ) 
+    {
+        alu_instruction_ptr->m_Word0.f.last = 1;
+    }
+
+    if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
+    {
+        pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
+        alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
+    }
+    
+    AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
+
+    return GL_TRUE;
+}
+
+void get_src_properties(R700ALUInstruction*  alu_instruction_ptr,
+                        int                  source_index,
+                        BITS*                psrc_sel,
+                        BITS*                psrc_rel,
+                        BITS*                psrc_chan,
+                        BITS*                psrc_neg)
+{
+    switch (source_index) 
+    {
+        case 0:
+            *psrc_sel  = alu_instruction_ptr->m_Word0.f.src0_sel ;
+            *psrc_rel  = alu_instruction_ptr->m_Word0.f.src0_rel ;
+            *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
+            *psrc_neg  = alu_instruction_ptr->m_Word0.f.src0_neg ;
+            break;
+
+        case 1:
+            *psrc_sel  = alu_instruction_ptr->m_Word0.f.src1_sel ;
+            *psrc_rel  = alu_instruction_ptr->m_Word0.f.src1_rel ;
+            *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
+            *psrc_neg  = alu_instruction_ptr->m_Word0.f.src1_neg ;
+            break;
+
+        case 2:
+            *psrc_sel  = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
+            *psrc_rel  = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
+            *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
+            *psrc_neg  = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
+            break;
+    }
+}
+
+int is_cfile(BITS sel) 
+{
+    if (sel > 255 && sel < 512) 
+    {
+        return 1;
+    }
+    return 0;
+}
+
+int is_const(BITS sel) 
+{
+    if (is_cfile(sel)) 
+    {
+        return 1;
+    }
+    else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL) 
+    {
+        return 1;
+    }
+    return 0;
+}
+
+int is_gpr(BITS sel) 
+{
+    if (sel >= 0 && sel < 128) 
+    {
+        return 1;
+    }
+    return 0;
+}
+
+const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210,  //000
+                                    SQ_ALU_VEC_120,  //001
+                                    SQ_ALU_VEC_102,  //010
+
+                                    SQ_ALU_VEC_201,  //011
+                                    SQ_ALU_VEC_012,  //100
+                                    SQ_ALU_VEC_021,  //101
+
+                                    SQ_ALU_VEC_012,  //110
+                                    SQ_ALU_VEC_012}; //111
+
+const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210,  //000
+                                    SQ_ALU_SCL_122,  //001 
+                                    SQ_ALU_SCL_122,  //010
+
+                                    SQ_ALU_SCL_221,  //011
+                                    SQ_ALU_SCL_212,  //100
+                                    SQ_ALU_SCL_122,  //101
+
+                                    SQ_ALU_SCL_122,  //110
+                                    SQ_ALU_SCL_122}; //111
+
+GLboolean reserve_cfile(r700_AssemblerBase* pAsm, 
+                        GLuint sel, 
+                        GLuint chan)
+{
+    int res_match = (-1);
+    int res_empty = (-1);
+
+    GLint res;
+
+    for (res=3; res>=0; res--) 
+    {
+        if(pAsm->hw_cfile_addr[ res] < 0)  
+        {
+            res_empty = res;
+        }
+        else if( (pAsm->hw_cfile_addr[res] == (int)sel)
+                 &&
+                 (pAsm->hw_cfile_chan[ res ] == (int) chan) ) 
+        {
+            res_match = res;
+        }
+    }
+
+    if(res_match >= 0) 
+    {
+        // Read for this scalar component already reserved, nothing to do here.
+        ;
+    }
+    else if(res_empty >= 0) 
+    {
+        pAsm->hw_cfile_addr[ res_empty ] = sel;
+        pAsm->hw_cfile_chan[ res_empty ] = chan;
+    }
+    else 
+    {
+        radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
+        return GL_FALSE;
+    }
+    return GL_TRUE;
+}
+
+GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
+{
+    if(pAsm->hw_gpr[cycle][chan] < 0) 
+    {
+        pAsm->hw_gpr[cycle][chan] = sel;
+    }
+    else if(pAsm->hw_gpr[cycle][chan] != (int)sel) 
+    {
+        radeon_error("Another scalar operation has already used GPR read port for given channel\n");
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
+{
+    switch (swiz) 
+    {
+        case SQ_ALU_SCL_210:
+            {
+                int table[3] = {2,	1,	0};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        case SQ_ALU_SCL_122:
+            {
+                int table[3] = {1,	2,	2};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        case SQ_ALU_SCL_212:
+            {	
+                int table[3] = {2,	1,	2};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        case SQ_ALU_SCL_221:
+            {
+                int table[3] = {2, 2, 1};
+                *pCycle = table[sel];
+                return GL_TRUE;
+            }
+            break;
+        default:
+            radeon_error("Bad Scalar bank swizzle value\n");
+            break;
+    }
+
+    return GL_FALSE;
+}
+
+GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
+{
+    switch (swiz) 
+    {
+        case SQ_ALU_VEC_012:
+            {
+                int table[3] = {0, 1, 2};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_021:
+            {
+                int table[3] = {0, 2,	1};
+                *pCycle = table[sel];
+            }
+            break;        
+        case SQ_ALU_VEC_120:
+            {
+                int table[3] = {1, 2,	0};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_102:
+            {
+                int table[3] = {1, 0,	2};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_201:
+            {
+                int table[3] = {2, 0,	1};
+                *pCycle = table[sel];
+            }
+            break;
+        case SQ_ALU_VEC_210:
+            {
+                int table[3] = {2, 1,	0};
+                *pCycle = table[sel];
+            }
+            break;
+        default:
+            radeon_error("Bad Vec bank swizzle value\n");
+            return GL_FALSE;
+            break;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean check_scalar(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr)
+{
+    GLuint cycle;
+    GLuint bank_swizzle;
+    GLuint const_count = 0;
+
+    BITS sel;
+    BITS chan;
+    BITS rel;
+    BITS neg;
+
+    GLuint src;
+
+    BITS src_sel [3] = {0,0,0};
+    BITS src_chan[3] = {0,0,0};
+    BITS src_rel [3] = {0,0,0};
+    BITS src_neg [3] = {0,0,0};
+
+    GLuint swizzle_key;
+
+    GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        get_src_properties(alu_instruction_ptr,
+                           src,
+                           &(src_sel[src]), 
+                           &(src_rel[src]), 
+                           &(src_chan[src]), 
+                           &(src_neg[src]) );
+    }
+
+
+    swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
+                    (is_const( src_sel[1] ) ? 2 : 0) + 
+                    (is_const( src_sel[2] ) ? 1 : 0) );
+  
+    alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        sel  = src_sel [src];
+        chan = src_chan[src];
+        rel  = src_rel [src];
+        neg  = src_neg [src];
+
+        if (is_const( sel )) 
+        {
+            // Any constant, including literal and inline constants
+            const_count++;
+
+            if (is_cfile( sel )) 
+            {
+                reserve_cfile(pAsm, sel, chan);
+            }
+
+        }
+    }
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        sel  = src_sel [src];
+        chan = src_chan[src];
+        rel  = src_rel [src];
+        neg  = src_neg [src];
+
+        if( is_gpr(sel) ) 
+        {
+            bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
+
+            if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
+            {
+                return GL_FALSE;
+            }
+
+            if(cycle < const_count) 
+            {
+                if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
+                {
+                    return GL_FALSE;
+                }
+            }
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean check_vector(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr)
+{
+    GLuint cycle;
+    GLuint bank_swizzle;
+    GLuint const_count = 0;
+
+    GLuint src;
+
+    BITS sel;
+    BITS chan;
+    BITS rel;
+    BITS neg;
+
+    BITS src_sel [3] = {0,0,0};
+    BITS src_chan[3] = {0,0,0};
+    BITS src_rel [3] = {0,0,0};
+    BITS src_neg [3] = {0,0,0};
+
+    GLuint swizzle_key;
+
+    GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        get_src_properties(alu_instruction_ptr,
+                           src,
+                           &(src_sel[src]), 
+                           &(src_rel[src]), 
+                           &(src_chan[src]), 
+                           &(src_neg[src]) );
+    }
+
+
+    swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + 
+                           (is_const( src_sel[1] ) ? 2 : 0) + 
+                           (is_const( src_sel[2] ) ? 1 : 0) 
+                         );
+
+    alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
+
+    for (src=0; src<number_of_operands; src++) 
+    {
+        sel  = src_sel [src];
+        chan = src_chan[src];
+        rel  = src_rel [src];
+        neg  = src_neg [src];
+
+
+        bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
+
+        if( is_gpr(sel) ) 
+        {
+            if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
+            {             
+                return GL_FALSE;
+            }
+
+            if ( (src  == 1)          && 
+                 (sel  == src_sel[0]) &&
+                 (chan == src_chan[0]) ) 
+            {        
+            }
+            else 
+            {
+                if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
+                {                    
+                    return GL_FALSE;
+                }
+            }
+        }
+        else if( is_const(sel) ) 
+        {                  
+            const_count++;
+
+            if( is_cfile(sel) ) 
+            {        
+                if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
+                {                    
+                    return GL_FALSE;
+                }
+            }
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
+{
+    R700ALUInstruction            * alu_instruction_ptr = NULL;
+    R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
+    R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
+
+    GLuint    number_of_scalar_operations;
+    GLboolean is_single_scalar_operation;
+    GLuint    scalar_channel_index;
+
+    PVSSRC * pcurrent_source;
+    int    current_source_index;
+    GLuint contiguous_slots_needed;
+
+    GLuint    uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+    //GLuint    channel_swizzle, j;
+    //GLuint    chan_counter[4] = {0, 0, 0, 0};
+    //PVSSRC *  pSource[3];
+    GLboolean bSplitInst = GL_FALSE;
+
+    if (1 == pAsm->D.dst.math) 
+    {
+        is_single_scalar_operation = GL_TRUE;
+        number_of_scalar_operations = 1;
+    }
+    else 
+    {
+        is_single_scalar_operation = GL_FALSE;
+        number_of_scalar_operations = 4;
+
+/* current assembler doesn't do more than 1 register per source */
+#if 0
+        /* check read port, only very preliminary algorithm, not count in 
+           src0/1 same comp case and prev slot repeat case; also not count relative
+           addressing. TODO: improve performance. */
+        for(j=0; j<uNumSrc; j++)
+        {
+            pSource[j] = &(pAsm->S[j].src);
+        }
+        for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++) 
+        {
+            for(j=0; j<uNumSrc; j++) 
+            {
+                switch (scalar_channel_index) 
+                {
+                    case 0: channel_swizzle = pSource[j]->swizzlex; break;
+                    case 1: channel_swizzle = pSource[j]->swizzley; break;
+                    case 2: channel_swizzle = pSource[j]->swizzlez; break;
+                    case 3: channel_swizzle = pSource[j]->swizzlew; break;
+                    default: channel_swizzle = SQ_SEL_MASK; break;
+                }
+                if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || 
+                     (pSource[j]->rtype == SRC_REG_INPUT))
+                     && (channel_swizzle <= SQ_SEL_W) )
+                {                    
+                    chan_counter[channel_swizzle]++;                        
+                }
+            }
+        }
+        if(   (chan_counter[SQ_SEL_X] > 3)
+           || (chan_counter[SQ_SEL_Y] > 3)
+           || (chan_counter[SQ_SEL_Z] > 3)
+           || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
+        {
+            bSplitInst = GL_TRUE;
+        }
+#endif
+    }
+
+    contiguous_slots_needed = 0;
+
+    if(!is_single_scalar_operation) 
+    {
+        contiguous_slots_needed = 4;
+    }
+
+    contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
+
+    initialize(pAsm);    
+
+    for (scalar_channel_index=0;
+            scalar_channel_index < number_of_scalar_operations; 
+                scalar_channel_index++) 
+    {
+        if(scalar_channel_index == (number_of_scalar_operations-1))
+        {
+            switch(pAsm->D2.dst2.literal_slots)
+            {
+            case 0:
+                alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+                Init_R700ALUInstruction(alu_instruction_ptr);
+                break;
+            case 1:
+                alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
+                Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
+                alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
+                break;
+            case 2:
+                alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
+                Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
+                alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
+            break;
+            };
+        }
+        else
+        {
+            alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+            Init_R700ALUInstruction(alu_instruction_ptr);
+        }
+        
+        //src 0
+        current_source_index = 0;
+        pcurrent_source = &(pAsm->S[0].src);
+
+        if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+                                         current_source_index,
+                                         pcurrent_source, 
+                                         scalar_channel_index) )     
+        {            
+            return GL_FALSE;
+        }
+   
+        if (uNumSrc > 1) 
+        {            
+            // Process source 1            
+            current_source_index = 1;
+            pcurrent_source = &(pAsm->S[current_source_index].src);
+
+            if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+                                             current_source_index,
+                                             pcurrent_source, 
+                                             scalar_channel_index) ) 
+            {                
+                return GL_FALSE;
+            }
+        }
+
+        //other bits
+        alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
+
+        if(   (is_single_scalar_operation == GL_TRUE) 
+           || (GL_TRUE == bSplitInst) )
+        {
+            alu_instruction_ptr->m_Word0.f.last = 1;
+        }
+        else 
+        {
+            alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ?  1 : 0;
+        }
+
+        alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
+        if(1 == pAsm->D.dst.predicated)
+        {
+            alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x1;
+            alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
+        }
+        else
+        {
+            alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;
+            alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+        }
+
+        // dst
+        if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || 
+            (pAsm->D.dst.rtype == DST_REG_OUT) ) 
+        {
+            alu_instruction_ptr->m_Word1.f.dst_gpr  = pAsm->D.dst.reg;
+        }
+        else 
+        {            
+            radeon_error("Only temp destination registers supported for ALU dest regs.\n");
+            return GL_FALSE;
+        }
+
+        alu_instruction_ptr->m_Word1.f.dst_rel  = SQ_ABSOLUTE;  //D.rtype
+
+        if ( is_single_scalar_operation == GL_TRUE ) 
+        {
+            // Override scalar_channel_index since only one scalar value will be written
+            if(pAsm->D.dst.writex) 
+            {
+                scalar_channel_index = 0;
+            }
+            else if(pAsm->D.dst.writey) 
+            {
+                scalar_channel_index = 1;
+            }
+            else if(pAsm->D.dst.writez) 
+            {
+                scalar_channel_index = 2;
+            }
+            else if(pAsm->D.dst.writew) 
+            {
+                scalar_channel_index = 3;
+            }
+        }
+
+        alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
+
+        alu_instruction_ptr->m_Word1.f.clamp    = pAsm->D2.dst2.SaturateMode;
+
+        if (pAsm->D.dst.op3) 
+        {            
+            //op3
+
+            alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
+
+            //There's 3rd src for op3
+            current_source_index = 2;
+            pcurrent_source = &(pAsm->S[current_source_index].src);
+
+            if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+                                              current_source_index,
+                                              pcurrent_source, 
+                                              scalar_channel_index) ) 
+            {
+                return GL_FALSE;
+            }
+        }
+        else 
+        {
+            //op2
+            if (pAsm->bR6xx)
+            {
+                alu_instruction_ptr->m_Word1_OP2.f6.alu_inst           = pAsm->D.dst.opcode;
+
+                alu_instruction_ptr->m_Word1_OP2.f6.src0_abs           = pAsm->S[0].src.abs;
+                alu_instruction_ptr->m_Word1_OP2.f6.src1_abs           = pAsm->S[1].src.abs;
+
+                //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
+                //alu_instruction_ptr->m_Word1_OP2.f6.update_pred         = 0x0;
+                switch (scalar_channel_index) 
+                {
+                    case 0: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; 
+                        break;
+                    case 1: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; 
+                        break;
+                    case 2: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; 
+                        break;
+                    case 3: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; 
+                        break;
+                    default: 
+                        alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
+                        break;
+                }            
+                alu_instruction_ptr->m_Word1_OP2.f6.omod               = SQ_ALU_OMOD_OFF;
+            }
+            else
+            {
+                alu_instruction_ptr->m_Word1_OP2.f.alu_inst           = pAsm->D.dst.opcode;
+
+                alu_instruction_ptr->m_Word1_OP2.f.src0_abs           = pAsm->S[0].src.abs;
+                alu_instruction_ptr->m_Word1_OP2.f.src1_abs           = pAsm->S[1].src.abs;
+
+                //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+                //alu_instruction_ptr->m_Word1_OP2.f.update_pred         = 0x0;
+                switch (scalar_channel_index) 
+                {
+                    case 0: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; 
+                        break;
+                    case 1: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; 
+                        break;
+                    case 2: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; 
+                        break;
+                    case 3: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; 
+                        break;
+                    default: 
+                        alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
+                        break;
+                }            
+                alu_instruction_ptr->m_Word1_OP2.f.omod               = SQ_ALU_OMOD_OFF;
+            }
+        }
+
+        if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
+        {            
+            return GL_FALSE;
+        }
+
+        /*
+         * Judge the type of current instruction, is it vector or scalar 
+         * instruction.
+         */        
+        if (is_single_scalar_operation) 
+        {
+            if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
+            {                
+                return GL_FALSE;
+            }
+        }
+        else 
+        {
+            if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
+            {                
+                return GL_FALSE; 
+            }
+        }
+
+        contiguous_slots_needed -= 1;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
+{
+    BITS tmp;
+
+    checkop1(pAsm);
+
+    tmp = gethelpr(pAsm);
+
+    // opcode  tmp.x,    a.x
+    // MOV     dst,      tmp.x
+
+    pAsm->D.dst.opcode = opcode;
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp;
+    pAsm->D.dst.writex = 1;
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // Now replicate result to all necessary channels in destination
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MAX;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+ 
+    pAsm->S[1].bits = pAsm->S[0].bits;
+    flipneg_PVSSRC(&(pAsm->S[1].src));
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+ 
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+ 
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
+    {
+        flipneg_PVSSRC(&(pAsm->S[1].src));
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
+{ /* TODO: ar values dont' persist between clauses */
+    if( GL_FALSE == checkop1(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = 0;
+    pAsm->D.dst.writex = 0;
+    pAsm->D.dst.writey = 0;
+    pAsm->D.dst.writez = 0;
+    pAsm->D.dst.writew = 0;
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_BAD(char *opcode_str) 
+{
+    radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
+    return GL_FALSE;
+}
+
+GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
+{
+    int tmp;
+
+    if( GL_FALSE == checkop3(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
+    pAsm->D.dst.op3     = 1;  
+
+    tmp = (-1);
+
+    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+    {
+        //OP3 has no support for write mask
+        tmp = gethelpr(pAsm);
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp;
+
+        nomask_PVSDST(&(pAsm->D.dst));
+    }
+    else 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+              
+    if( GL_FALSE == assemble_src(pAsm, 2, 1) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, 2) ) 
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        //tmp for source
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp;
+
+        noneg_PVSSRC(&(pAsm->S[0].src));
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
+{
+    int tmp;
+    checkop1(pAsm);
+
+    tmp = gethelpr(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp;
+    pAsm->D.dst.writex = 1;
+
+    assemble_src(pAsm, 0, -1);
+
+    pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+    setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+    pAsm->D2.dst2.literal_slots = 1;
+    pAsm->C[0].f = 1/(3.1415926535 * 2);
+    pAsm->C[1].f = 0.0F;
+    next_ins(pAsm);
+
+    pAsm->D.dst.opcode = opcode;
+    pAsm->D.dst.math = 1;
+
+    assemble_dst(pAsm);
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    next_ins(pAsm);
+
+    //TODO - replicate if more channels set in WriteMask
+    return GL_TRUE;
+
+}
+ 
+GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+        return GL_FALSE;
+    }
+ 
+    pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
+    {
+        zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
+        zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
+    }
+    else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) 
+    {
+        onecomp_PVSSRC(&(pAsm->S[0].src), 3);
+    } 
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_DST(r700_AssemblerBase *pAsm)
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    onecomp_PVSSRC(&(pAsm->S[0].src), 0);
+    onecomp_PVSSRC(&(pAsm->S[0].src), 3);
+
+    onecomp_PVSSRC(&(pAsm->S[1].src), 0);
+    onecomp_PVSSRC(&(pAsm->S[1].src), 2);
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
+}
+
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
+{
+    BITS tmp;
+
+    checkop1(pAsm);
+
+    tmp = gethelpr(pAsm);
+
+    // FLOOR   tmp.x,    a.x
+    // EX2     dst.x     tmp.x
+
+    if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
+        pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg    = tmp;
+        pAsm->D.dst.writex = 1;
+
+        if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+        {
+            return GL_FALSE;
+        }
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+        pAsm->D.dst.math = 1;
+
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp;
+
+        setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+        noneg_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    // FRACT   dst.y     a.x
+
+    if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
+        pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    // EX2     dst.z,    a.x
+
+    if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
+        pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+        pAsm->D.dst.math = 1;
+
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    // MOV     dst.w     1.0
+
+    if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp;
+
+        setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+        noneg_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;  
+
+    if ( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
+}
+
+GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; 
+
+    if ( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
+{  
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    if(pILInst->Opcode == OPCODE_KIL)
+        checkop1(pAsm);
+
+    pAsm->D.dst.opcode = opcode;  
+    //pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = 0;
+    pAsm->D.dst.writex = 0;
+    pAsm->D.dst.writey = 0;
+    pAsm->D.dst.writez = 0;
+    pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = 0;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if(pILInst->Opcode == OPCODE_KIL_NV)
+    {
+        setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+        pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[1].src.reg = 0;
+        setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
+        neg_PVSSRC(&(pAsm->S[1].src));
+    }
+    else
+    {
+        if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+        {
+            return GL_FALSE;
+        }
+
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    /* Doc says KILL has to be last(end) ALU clause */
+    pAsm->pR700Shader->killIsUsed = GL_TRUE;
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    
+    return GL_TRUE;
+}
+
+GLboolean assemble_LG2(r700_AssemblerBase *pAsm) 
+{ 
+    return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
+}
+
+GLboolean assemble_LRP(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp;
+
+    if( GL_FALSE == checkop3(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    tmp = gethelpr(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp;
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    nomask_PVSDST(&(pAsm->D.dst));
+
+          
+    if( GL_FALSE == assemble_src(pAsm, 1, 0) ) 
+    {
+	    return GL_FALSE;
+    }
+
+    if ( GL_FALSE == assemble_src(pAsm, 2, 1) )   
+    {
+	    return GL_FALSE;
+    }
+
+    neg_PVSSRC(&(pAsm->S[1].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+	    return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+    pAsm->D.dst.op3    = 1;
+
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+
+    if( GL_FALSE == assemble_src(pAsm, 0, 1) ) 
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
+{
+    BITS tmp1, tmp2, tmp3;
+
+    checkop1(pAsm);
+
+    tmp1 = gethelpr(pAsm);
+    tmp2 = gethelpr(pAsm);
+    tmp3 = gethelpr(pAsm);
+
+    // FIXME: The hardware can do fabs() directly on input
+    //        elements, but the compiler doesn't have the
+    //        capability to use that.
+
+    // MAX     tmp1.x,   a.x,    -a.x   (fabs(a.x))
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MAX;  
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp1;
+    pAsm->D.dst.writex = 1;
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+ 
+    pAsm->S[1].bits = pAsm->S[0].bits;
+    flipneg_PVSSRC(&(pAsm->S[1].src));
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // Entire algo:
+    //
+    // LG2     tmp2.x,   tmp1.x
+    // FLOOR   tmp3.x,   tmp2.x
+    // MOV     dst.x,    tmp3.x
+    // ADD     tmp3.x,   tmp2.x,    -tmp3.x
+    // EX2     dst.y,    tmp3.x
+    // MOV     dst.z,    tmp2.x
+    // MOV     dst.w,    1.0
+
+    // LG2     tmp2.x,   tmp1.x
+    // FLOOR   tmp3.x,   tmp2.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp2;
+    pAsm->D.dst.writex = 1;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp1;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp3;
+    pAsm->D.dst.writex = 1;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp2;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // MOV     dst.x,    tmp3.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp3;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // ADD     tmp3.x,   tmp2.x,    -tmp3.x
+    // EX2     dst.y,    tmp3.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp3;
+    pAsm->D.dst.writex = 1;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp2;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = tmp3;
+
+    setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+    neg_PVSSRC(&(pAsm->S[1].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+    pAsm->D.dst.math = 1;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp3;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // MOV     dst.z,    tmp2.x
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp2;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // MOV     dst.w     1.0
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp1;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) 
+{
+    int tmp, ii;
+    GLboolean bReplaceDst = GL_FALSE;
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+	if( GL_FALSE == checkop3(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+	pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;  
+	pAsm->D.dst.op3     = 1; 
+
+	tmp = (-1);
+
+    if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
+    {   /* TODO : more investigation on MAD src and dst using same register */
+        for(ii=0; ii<3; ii++)
+        {
+            if(   (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
+               && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
+            {
+                bReplaceDst = GL_TRUE;
+                break;
+            }
+        }
+    }
+    if(0xF != pILInst->DstReg.WriteMask)
+    {   /* OP3 has no support for write mask */
+        bReplaceDst = GL_TRUE;
+    }
+
+	if(GL_TRUE == bReplaceDst)
+    {
+        tmp = gethelpr(pAsm);
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp;
+
+        nomask_PVSDST(&(pAsm->D.dst));
+    }
+    else 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+	if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+              
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 2, -1) ) 
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+	if (GL_TRUE == bReplaceDst) 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        //tmp for source
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp;
+
+        noneg_PVSSRC(&(pAsm->S[0].src));
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+/* LIT dst, src */
+GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
+{
+    unsigned int dstReg;
+    unsigned int dstType;
+    unsigned int srcReg;
+    unsigned int srcType;
+    checkop1(pAsm);
+    int tmp = gethelpr(pAsm);
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+    dstReg  = pAsm->D.dst.reg;
+    dstType = pAsm->D.dst.rtype;
+    srcReg  = pAsm->S[0].src.reg;
+    srcType = pAsm->S[0].src.rtype;
+
+    /* dst.xw, <- 1.0  */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_MOV;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 1;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 1;
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_1;
+    pAsm->S[0].src.swizzley = SQ_SEL_1;
+    pAsm->S[0].src.swizzlez = SQ_SEL_1;
+    pAsm->S[0].src.swizzlew = SQ_SEL_1;
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    /* dst.y = max(src.x, 0.0) */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_MAX;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 0;
+    pAsm->D.dst.writey   = 1;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 0;
+    pAsm->S[0].src.rtype = srcType;
+    pAsm->S[0].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = tmp;
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[1].src));
+    pAsm->S[1].src.swizzlex = SQ_SEL_0;
+    pAsm->S[1].src.swizzley = SQ_SEL_0;
+    pAsm->S[1].src.swizzlez = SQ_SEL_0;
+    pAsm->S[1].src.swizzlew = SQ_SEL_0;
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
+
+    /* dst.z = log(src.y) */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_LOG_CLAMPED;
+    pAsm->D.dst.math     = 1;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 0;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 1;
+    pAsm->D.dst.writew   = 0;
+    pAsm->S[0].src.rtype = srcType;
+    pAsm->S[0].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, 2) )
+    {
+        return GL_FALSE;
+    }
+
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
+
+    swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
+
+    /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
+    pAsm->D.dst.opcode   = SQ_OP3_INST_MUL_LIT;
+    pAsm->D.dst.math     = 1;
+    pAsm->D.dst.op3      = 1;
+    pAsm->D.dst.rtype    = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg      = tmp;
+    pAsm->D.dst.writex   = 1;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 0;
+
+    pAsm->S[0].src.rtype = srcType;
+    pAsm->S[0].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = dstReg;
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[1].src));
+    pAsm->S[1].src.swizzlex = SQ_SEL_Z;
+    pAsm->S[1].src.swizzley = SQ_SEL_Z;
+    pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+    pAsm->S[1].src.swizzlew = SQ_SEL_Z;
+
+    pAsm->S[2].src.rtype = srcType;
+    pAsm->S[2].src.reg   = srcReg;
+    setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    /* dst.z = exp(tmp.x) */
+    pAsm->D.dst.opcode   = SQ_OP2_INST_EXP_IEEE;
+    pAsm->D.dst.math     = 1;
+    pAsm->D.dst.rtype    = dstType;
+    pAsm->D.dst.reg      = dstReg;
+    pAsm->D.dst.writex   = 0;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 1;
+    pAsm->D.dst.writew   = 0;
+
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_X;
+    pAsm->S[0].src.swizzley = SQ_SEL_X;
+    pAsm->S[0].src.swizzlez = SQ_SEL_X;
+    pAsm->S[0].src.swizzlew = SQ_SEL_X;
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_MAX(r700_AssemblerBase *pAsm) 
+{
+	if( GL_FALSE == checkop2(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+	pAsm->D.dst.opcode = SQ_OP2_INST_MAX; 
+	
+	if( GL_FALSE == assemble_dst(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == next_ins(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_MIN(r700_AssemblerBase *pAsm) 
+{
+	if( GL_FALSE == checkop2(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+	pAsm->D.dst.opcode = SQ_OP2_INST_MIN;  
+
+	if( GL_FALSE == assemble_dst(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+	{
+		return GL_FALSE;
+	}
+ 
+	if( GL_FALSE == next_ins(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_MOV(r700_AssemblerBase *pAsm) 
+{
+    checkop1(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if (GL_FALSE == assemble_dst(pAsm))
+    {
+        return GL_FALSE;
+    }
+
+    if (GL_FALSE == assemble_src(pAsm, 0, -1))
+    {
+        return GL_FALSE;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_MUL(r700_AssemblerBase *pAsm) 
+{
+	if( GL_FALSE == checkop2(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+	pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+	if( GL_FALSE == assemble_dst(pAsm) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+	{
+		return GL_FALSE;
+	}
+
+	if( GL_FALSE == next_ins(pAsm) ) 
+	{
+		return GL_FALSE;
+	}
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_POW(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp;
+
+    checkop1(pAsm);
+
+    tmp = gethelpr(pAsm);
+
+    // LG2 tmp.x,     a.swizzle
+    pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;  
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // MUL tmp.x,     tmp.x, b.swizzle
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // EX2 dst.mask,          tmp.x
+    // EX2 tmp.x,             tmp.x
+    pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+    pAsm->D.dst.math = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = tmp;
+    nomask_PVSDST(&(pAsm->D.dst));
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = tmp;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    // Now replicate result to all necessary channels in destination
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_RCP(r700_AssemblerBase *pAsm) 
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
+}
+ 
+GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) 
+{
+    return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
+}
+ 
+GLboolean assemble_SCS(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp;
+
+    checkop1(pAsm);
+
+    tmp = gethelpr(pAsm);
+    /* tmp.x = src /2*PI */
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype  = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg    = tmp;
+    pAsm->D.dst.writex = 1;
+
+    assemble_src(pAsm, 0, -1);
+
+    pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+    setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+    pAsm->D2.dst2.literal_slots = 1;
+    pAsm->C[0].f = 1/(3.1415926535 * 2);
+    pAsm->C[1].f = 0.0F;
+
+    next_ins(pAsm);
+
+    // COS dst.x,    a.x
+    pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+    pAsm->D.dst.math = 1;
+
+    assemble_dst(pAsm);
+    /* mask y */
+    pAsm->D.dst.writey = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if ( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    // SIN dst.y,    a.x
+    pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+    pAsm->D.dst.math = 1;
+
+    assemble_dst(pAsm);
+    /* mask x */
+    pAsm->D.dst.writex = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = tmp;
+    setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) 
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+	    return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = opcode;
+    //pAsm->D.dst.math   = 1;
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+	    return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+	    return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+	    return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+	    return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) 
+{
+    struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+    pAsm->D.dst.opcode = opcode;
+    pAsm->D.dst.math   = 1;
+    pAsm->D.dst.predicated = 1;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg = pAsm->uHelpReg;
+    pAsm->D.dst.writex = 1;
+    pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
+    pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = pAsm->uHelpReg;
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[1].src));
+    pAsm->S[1].src.swizzlex = SQ_SEL_0;
+    pAsm->S[1].src.swizzley = SQ_SEL_0;
+    pAsm->S[1].src.swizzlez = SQ_SEL_0;
+    pAsm->S[1].src.swizzlew = SQ_SEL_0;
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+	    return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_SGE(r700_AssemblerBase *pAsm) 
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+	    return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+	    return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+	    return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+	    return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+	    return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_SLT(r700_AssemblerBase *pAsm) 
+{
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+	    return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;  
+
+    if( GL_FALSE == assemble_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+                
+    if( GL_FALSE == assemble_src(pAsm, 0, 1) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, 0) )  
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_STP(r700_AssemblerBase *pAsm) 
+{
+    return GL_TRUE;
+}
+ 
+GLboolean assemble_TEX(r700_AssemblerBase *pAsm) 
+{
+    GLboolean src_const;
+    GLboolean need_barrier = GL_FALSE; 
+
+    checkop1(pAsm);
+    
+    switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
+    {
+    case PROGRAM_UNIFORM: 
+    case PROGRAM_CONSTANT:
+    case PROGRAM_LOCAL_PARAM:
+    case PROGRAM_ENV_PARAM:
+    case PROGRAM_STATE_VAR:
+        src_const = GL_TRUE;
+        break;
+    case PROGRAM_TEMPORARY:
+    case PROGRAM_INPUT:
+    default:
+        src_const = GL_FALSE;
+	break;
+    }
+
+    if (GL_TRUE == src_const)
+    {
+	    if ( GL_FALSE == mov_temp(pAsm, 0) )
+		    return GL_FALSE;
+	    need_barrier = GL_TRUE;
+    }
+
+    if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
+    {
+        GLuint tmp = gethelpr(pAsm);
+        pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+        pAsm->D.dst.math = 1;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp;
+        pAsm->D.dst.writew = 1;
+
+        if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+        {
+            return GL_FALSE;
+        }
+        swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp;
+        pAsm->D.dst.writex = 1;
+        pAsm->D.dst.writey = 1;
+        pAsm->D.dst.writez = 1;
+        pAsm->D.dst.writew = 0;
+
+        if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+        {
+            return GL_FALSE;
+        }
+        setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+        pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[1].src.reg   = tmp;
+        setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+        
+        pAsm->aArgSubst[1] = tmp;
+        need_barrier = GL_TRUE;
+    }
+
+    if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
+    {
+        GLuint tmp1 = gethelpr(pAsm);
+        GLuint tmp2 = gethelpr(pAsm);
+        
+        /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
+        pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp1;
+        nomask_PVSDST(&(pAsm->D.dst));
+	
+        if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+        {
+            return GL_FALSE;
+        }
+
+        if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+        {
+            return GL_FALSE;
+        }
+
+        swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
+        swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); 
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+ 
+        /* tmp1.z = RCP_e(|tmp1.z|) */
+        pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+        pAsm->D.dst.math = 1;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp1;
+        pAsm->D.dst.writez = 1;
+
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg = tmp1;
+        pAsm->S[0].src.swizzlex = SQ_SEL_Z;
+        pAsm->S[0].src.abs = 1;
+
+        next_ins(pAsm);
+
+        /* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
+         * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
+         * muladd has no writemask, have to use another temp 
+         */
+        pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+        pAsm->D.dst.op3    = 1;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp2;
+
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp1;
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+        setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+        pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[1].src.reg   = tmp1;
+        setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
+        setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
+        /* immediate c 1.5 */
+        pAsm->D2.dst2.literal_slots = 1;
+        pAsm->C[0].f = 1.5F;
+        pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+        pAsm->S[2].src.reg   = tmp1;
+        setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
+
+        next_ins(pAsm);
+
+        /* tmp1.xy = temp2.xy */
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp1;
+        pAsm->D.dst.writex = 1;
+        pAsm->D.dst.writey = 1;
+        pAsm->D.dst.writez = 0;
+        pAsm->D.dst.writew = 0;
+
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp2;
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+        next_ins(pAsm);
+        pAsm->aArgSubst[1] = tmp1;
+        need_barrier = GL_TRUE;
+
+    }
+
+    switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
+    {
+        case OPCODE_DDX:
+            /* will these need WQM(1) on CF inst ? */
+            pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
+            break;
+        case OPCODE_DDY:
+            pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
+            break;
+        case OPCODE_TXB:
+            pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+            break;
+        default:
+            if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+                pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
+            else
+                pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+    }
+
+    pAsm->is_tex = GL_TRUE;
+    if ( GL_TRUE == need_barrier )
+
+    pAsm->is_tex = GL_TRUE;
+    if ( GL_TRUE == need_barrier )
+    {
+        pAsm->need_tex_barrier = GL_TRUE;
+    }
+    // Set src1 to tex unit id
+    pAsm->S[1].src.reg   = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
+    pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+
+    //No sw info from mesa compiler, so hard code here.
+    pAsm->S[1].src.swizzlex = SQ_SEL_X;
+    pAsm->S[1].src.swizzley = SQ_SEL_Y;
+    pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+    pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+    if( GL_FALSE == tex_dst(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == tex_src(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
+    {
+        /* hopefully did swizzles before */
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+    }
+   
+    if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
+    {
+        /* SAMPLE dst, tmp.yxwy, CUBE */
+        pAsm->S[0].src.swizzlex = SQ_SEL_Y;
+        pAsm->S[0].src.swizzley = SQ_SEL_X;
+        pAsm->S[0].src.swizzlez = SQ_SEL_W;
+        pAsm->S[0].src.swizzlew = SQ_SEL_Y;
+    }
+ 
+    if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+    {
+        /* compare value goes to w chan ? */
+        pAsm->S[0].src.swizzlew = SQ_SEL_Z;
+    }
+
+    if ( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+    /* add ARB shadow ambient but clamp to 0..1 */
+    if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+    {
+	/* ADD_SAT dst,  dst,  ambient[texunit] */
+	pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+	if( GL_FALSE == assemble_dst(pAsm) )
+	{
+	    return GL_FALSE;
+	}
+	pAsm->D2.dst2.SaturateMode = 1;
+
+	pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+	pAsm->S[0].src.reg = pAsm->D.dst.reg;
+	noswizzle_PVSSRC(&(pAsm->S[0].src));
+	noneg_PVSSRC(&(pAsm->S[0].src));
+
+	pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
+	pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
+	noswizzle_PVSSRC(&(pAsm->S[1].src));
+	noneg_PVSSRC(&(pAsm->S[1].src));
+
+	if( GL_FALSE == next_ins(pAsm) )
+	{
+	    return GL_FALSE;
+	}
+
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_XPD(r700_AssemblerBase *pAsm) 
+{
+    BITS tmp1;
+    BITS tmp2 = 0;
+
+    if( GL_FALSE == checkop2(pAsm) )
+    {
+	    return GL_FALSE;
+    }
+
+    tmp1 = gethelpr(pAsm);
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = tmp1;
+    nomask_PVSDST(&(pAsm->D.dst));
+  
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+ 
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
+    swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+    pAsm->D.dst.op3    = 1;
+
+    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
+    {
+        tmp2 = gethelpr(pAsm);
+
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = tmp2;
+
+        nomask_PVSDST(&(pAsm->D.dst));
+    }
+    else 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+    {
+        return GL_FALSE;
+    }
+ 
+    swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
+    swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
+
+    // result1 + (neg) result0
+    setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
+    pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+    pAsm->S[2].src.reg   = tmp1;
+
+    neg_PVSSRC(&(pAsm->S[2].src));
+    noswizzle_PVSSRC(&(pAsm->S[2].src));
+
+    if( GL_FALSE == next_ins(pAsm) ) 
+    {
+        return GL_FALSE;
+    }
+
+
+    if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) 
+    {
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        // Use tmp as source
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = tmp2;
+
+        noneg_PVSSRC(&(pAsm->S[0].src));
+        noswizzle_PVSSRC(&(pAsm->S[0].src));
+
+        if( GL_FALSE == next_ins(pAsm) )
+        {
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
+{
+    return GL_TRUE;
+}
+
+static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
+{
+    switch (uReason)
+    {
+    case FC_PUSH_VPM:
+        pAsm->CALLSTACK[pAsm->CALLSP].current--;
+        break;
+    case FC_PUSH_WQM:
+        pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+        break;
+    case FC_LOOP:
+        pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+        break;
+    case FC_REP:
+        /* TODO : for 16 vp asic, should -= 2; */
+        pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
+        break;
+    };
+}
+
+static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
+{
+    if(GL_TRUE == bCheckMaxOnly)
+    {
+        switch (uReason)
+        {
+        case FC_PUSH_VPM:
+            if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
+                    > pAsm->CALLSTACK[pAsm->CALLSP].max)
+            {
+                pAsm->CALLSTACK[pAsm->CALLSP].max =
+                    pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
+            }
+            break;
+        case FC_PUSH_WQM:
+            if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
+                    > pAsm->CALLSTACK[pAsm->CALLSP].max)
+            {
+                pAsm->CALLSTACK[pAsm->CALLSP].max =
+                    pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
+            }
+            break;
+        }
+        return;
+    }
+
+    switch (uReason)
+    {
+    case FC_PUSH_VPM:
+        pAsm->CALLSTACK[pAsm->CALLSP].current++;
+        break;
+    case FC_PUSH_WQM:
+        pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+        break;
+    case FC_LOOP:
+        pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+        break;
+    case FC_REP:
+        /* TODO : for 16 vp asic, should += 2; */
+        pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
+        break;
+    };
+
+    if(pAsm->CALLSTACK[pAsm->CALLSP].current
+         > pAsm->CALLSTACK[pAsm->CALLSP].max)
+    {
+        pAsm->CALLSTACK[pAsm->CALLSP].max =
+            pAsm->CALLSTACK[pAsm->CALLSP].current;
+    }
+}
+
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
+{
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = pops;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_JUMP;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
+
+    return GL_TRUE;
+}
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
+{
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = pops;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_POP;
+ 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+    pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr             = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
+{
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+    assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    if(GL_TRUE != bHasElse)
+    {
+        pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; 
+    }
+    else
+    {
+        pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+    }
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_JUMP;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->FCSP++;
+	pAsm->fc_stack[pAsm->FCSP].type  = FC_IF;
+    pAsm->fc_stack[pAsm->FCSP].mid   = NULL;
+    pAsm->fc_stack[pAsm->FCSP].midLen= 0;
+    pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
+
+#ifndef USE_CF_FOR_POP_AFTER
+    if(GL_TRUE != bHasElse)
+    {
+        pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+    }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+    checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); 
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
+{
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1; ///
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_ELSE;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
+                                                                                     0,
+                                                                                     sizeof(R700ControlFlowGenericClause *) );
+    pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
+    //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
+
+#ifndef USE_CF_FOR_POP_AFTER
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+#endif /* USE_CF_FOR_POP_AFTER */
+
+    pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; 
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_POP_AFTER
+    pops(pAsm, 1); 
+#endif /* USE_CF_FOR_POP_AFTER */
+
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+    if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
+    {
+        /* no else in between */
+        pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+    }
+    else
+    {
+        pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+    }
+
+    if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+    {
+        FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+    }
+
+    if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
+    {
+        radeon_error("if/endif in shader code are not paired. \n");
+        return GL_FALSE;
+    }
+    
+    pAsm->FCSP--;
+
+    decreaseCurrent(pAsm, FC_PUSH_VPM);
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
+{
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_START_NO_AL;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->FCSP++;
+	pAsm->fc_stack[pAsm->FCSP].type  = FC_LOOP;
+    pAsm->fc_stack[pAsm->FCSP].mid   = NULL;
+    pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
+    pAsm->fc_stack[pAsm->FCSP].midLen   = 0;
+    pAsm->fc_stack[pAsm->FCSP].first    = pAsm->cf_current_cf_clause_ptr;
+
+    checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+    assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+    
+    unsigned int unFCSP;
+    for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+    {
+        if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+        {
+            break;
+        }
+    }
+    if(0 == FC_LOOP)
+    {
+        radeon_error("Break is not inside loop/endloop pair.\n");
+        return GL_FALSE;
+    }
+
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_BREAK;
+ 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( 
+                                              (void *)pAsm->fc_stack[unFCSP].mid,
+                                              sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+                                              sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+    pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+    pAsm->fc_stack[unFCSP].unNumMid++;
+
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_POP;
+ 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+    pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr             = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+    checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif //USE_CF_FOR_CONTINUE_BREAK
+    return GL_TRUE;
+}
+
+GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+    assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+    unsigned int unFCSP;
+    for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+    {
+        if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+        {
+            break;
+        }
+    }
+    if(0 == FC_LOOP)
+    {
+        radeon_error("Continue is not inside loop/endloop pair.\n");
+        return GL_FALSE;
+    }
+
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_CONTINUE;
+ 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( 
+                                              (void *)pAsm->fc_stack[unFCSP].mid,
+                                              sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+                                              sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+    pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+    pAsm->fc_stack[unFCSP].unNumMid++;
+
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_POP;
+ 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+    pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr             = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+    checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif /* USE_CF_FOR_CONTINUE_BREAK */
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
+{
+    GLuint i;
+
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_END;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr   = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
+    pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+    for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
+    {
+        pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
+    }
+    if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+    {
+        FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+    }
+#endif
+
+    if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
+    {
+        radeon_error("loop/endloop in shader code are not paired. \n");
+        return GL_FALSE;
+    }
+
+    GLuint unFCSP;
+    GLuint unIF = 0;
+    if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
+    {        
+        for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+        {
+            if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+            {
+                breakLoopOnFlag(pAsm, unFCSP);
+                break;
+            }
+            else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+            {
+                unIF++;
+            }
+        }
+        if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
+        {            
+#ifdef USE_CF_FOR_POP_AFTER
+            returnOnFlag(pAsm, unIF); 
+#else
+            returnOnFlag(pAsm, 0);
+#endif /* USE_CF_FOR_POP_AFTER */
+            pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
+        }
+    }
+
+    pAsm->FCSP--;
+
+    decreaseCurrent(pAsm, FC_LOOP);
+    
+    return GL_TRUE;
+}
+
+void add_return_inst(r700_AssemblerBase *pAsm)
+{
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return;
+    }
+    //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_RETURN;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+}
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
+{
+    /* Put in sub */
+    if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
+    {
+        pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
+                                  sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
+                                  sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
+        if(NULL == pAsm->subs)
+        {
+            return GL_FALSE;
+        }
+        pAsm->unSubArraySize += 10;
+    }
+
+    pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
+    pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;  
+    pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;  
+    pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
+
+    pAsm->CALLSP++;
+    pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
+    pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
+    pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
+                   = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
+    pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
+    pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
+    SetActiveCFlist(pAsm->pR700Shader, 
+                    pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+
+    pAsm->unSubArrayPointer++;
+
+    /* start sub */
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+    pAsm->FCSP++;
+    pAsm->fc_stack[pAsm->FCSP].type  = FC_REP;
+
+    checkStackDepth(pAsm, FC_REP, GL_FALSE);
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
+{
+    if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
+    {
+        radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
+        return GL_FALSE;
+    }
+
+    /* copy max to sub structure */
+    pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
+        = pAsm->CALLSTACK[pAsm->CALLSP].max;
+
+    decreaseCurrent(pAsm, FC_REP);
+
+    pAsm->CALLSP--;
+    SetActiveCFlist(pAsm->pR700Shader, 
+                    pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+    
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+    pAsm->FCSP--;
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_RET(r700_AssemblerBase *pAsm)
+{
+    GLuint unIF = 0;
+
+    if(pAsm->CALLSP > 0)
+    {   /* in sub */
+        GLuint unFCSP;        
+        for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+        {
+            if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+            {
+                setRetInLoopFlag(pAsm, SQ_SEL_1);
+                breakLoopOnFlag(pAsm, unFCSP);
+                pAsm->unCFflags |= LOOPRET_FLAGS;
+
+                return GL_TRUE;
+            }
+            else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+            {
+                unIF++;
+            }
+        }
+    }
+
+#ifdef USE_CF_FOR_POP_AFTER    
+    if(unIF > 0)
+    {
+        pops(pAsm, unIF);
+    }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+    add_return_inst(pAsm);
+
+    return GL_TRUE;
+}
+
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm, 
+                       GLint nILindex,
+                       GLuint uiIL_Shift,
+                       GLuint uiNumberInsts,
+                       struct prog_instruction *pILInst,
+                       PRESUB_DESC * pPresubDesc)
+{
+    GLint uiIL_Offset;
+
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count       = 1;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_CALL;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    /* Put in caller */
+    if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
+    {
+        pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers, 
+                       sizeof(CALLER_POINTER) * pAsm->unCallerArraySize, 
+                       sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
+        if(NULL == pAsm->callers)
+        {
+            return GL_FALSE;
+        }
+        pAsm->unCallerArraySize += 10;
+    }
+    
+    uiIL_Offset = nILindex + uiIL_Shift;
+    pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; 
+    pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr       = pAsm->cf_current_cf_clause_ptr;
+    
+    pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr  = NULL; 
+    pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; 
+
+    pAsm->unCallerArrayPointer++;
+
+    int j;
+    GLuint max;
+    GLuint unSubID;
+    GLboolean bRet;
+    for(j=0; j<pAsm->unSubArrayPointer; j++)
+    {
+        if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
+        {   /* compiled before */
+
+            max = pAsm->subs[j].unStackDepthMax 
+                + pAsm->CALLSTACK[pAsm->CALLSP].current;
+            if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+            {
+                pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+            }
+            
+            pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; 
+            return GL_TRUE;
+        }
+    }
+
+    pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
+    unSubID = pAsm->unSubArrayPointer;
+
+    bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
+
+    if(GL_TRUE == bRet)
+    {
+        max = pAsm->subs[unSubID].unStackDepthMax 
+            + pAsm->CALLSTACK[pAsm->CALLSP].current;
+        if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+        {
+            pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+        }
+
+        pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
+    }
+
+    return bRet;
+}
+
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
+{
+    /*GLfloat fLiteral[2] = {0.1, 0.0};*/
+
+    pAsm->D.dst.opcode   = SQ_OP2_INST_MOV;
+    pAsm->D.dst.op3      = 0;
+    pAsm->D.dst.rtype    = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg      = pAsm->flag_reg_index;
+    pAsm->D.dst.writex   = 1;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 0;
+    pAsm->D2.dst2.literal_slots      = 1;
+    pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+    pAsm->D.dst.predicated     = 0;
+    /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
+    pAsm->D.dst.math           = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
+    pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
+#if 0
+    pAsm->S[0].src.rtype = SRC_REC_LITERAL;
+    //pAsm->S[0].src.reg   = 0;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_X;
+    pAsm->S[0].src.swizzley = SQ_SEL_Y;
+    pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+    pAsm->S[0].src.swizzlew = SQ_SEL_W;
+
+    if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+    {
+        return GL_FALSE;
+    }
+#else
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = 0;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = flagValue;
+    pAsm->S[0].src.swizzley = flagValue;
+    pAsm->S[0].src.swizzlez = flagValue;
+    pAsm->S[0].src.swizzlew = flagValue;
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+#endif
+
+    return GL_TRUE;
+}
+
+GLboolean testFlag(r700_AssemblerBase *pAsm)
+{
+    /*GLfloat fLiteral[2] = {0.1, 0.0};*/
+
+    //Test flag
+    GLuint tmp = gethelpr(pAsm);
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+    pAsm->D.dst.opcode   = SQ_OP2_INST_PRED_SETE;
+    pAsm->D.dst.math     = 1;
+    pAsm->D.dst.rtype    = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg      = tmp;
+    pAsm->D.dst.writex   = 1;
+    pAsm->D.dst.writey   = 0;
+    pAsm->D.dst.writez   = 0;
+    pAsm->D.dst.writew   = 0;
+    pAsm->D2.dst2.literal_slots      = 1;
+    pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+    pAsm->D.dst.predicated     = 1;
+    pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
+
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = pAsm->flag_reg_index;
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[0].src));
+    pAsm->S[0].src.swizzlex = SQ_SEL_X;
+    pAsm->S[0].src.swizzley = SQ_SEL_Y;
+    pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+    pAsm->S[0].src.swizzlew = SQ_SEL_W;
+#if 0
+    pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+    //pAsm->S[1].src.reg   = 0;
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[1].src));
+    pAsm->S[1].src.swizzlex = SQ_SEL_X;
+    pAsm->S[1].src.swizzley = SQ_SEL_Y;
+    pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+    pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+    if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+    {
+        return GL_FALSE;
+    }
+#else
+    pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[1].src.reg   = 0;
+    setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+    noneg_PVSSRC(&(pAsm->S[1].src));
+    pAsm->S[1].src.swizzlex = SQ_SEL_1;
+    pAsm->S[1].src.swizzley = SQ_SEL_1;
+    pAsm->S[1].src.swizzlez = SQ_SEL_1;
+    pAsm->S[1].src.swizzlew = SQ_SEL_1;
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+#endif
+
+    checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+    return GL_TRUE;
+}
+
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
+{
+    testFlag(pAsm);
+    jumpToOffest(pAsm, 1, 4);
+    setRetInLoopFlag(pAsm, SQ_SEL_0);
+    pops(pAsm, unIF + 1);
+    add_return_inst(pAsm);
+
+    return GL_TRUE;
+}
+
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
+{
+    testFlag(pAsm);
+ 
+    //break
+    if(GL_FALSE == add_cf_instruction(pAsm) )
+    {
+        return GL_FALSE;
+    }
+    
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count        = 1;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const         = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond             = SQ_CF_COND_ACTIVE;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; 
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_LOOP_BREAK;
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+
+    pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( 
+                                              (void *)pAsm->fc_stack[unFCSP].mid,
+                                              sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+                                              sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+    pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+    pAsm->fc_stack[unFCSP].unNumMid++;
+
+    pops(pAsm, 1);
+               
+    return GL_TRUE;
+}
+
+GLboolean AssembleInstr(GLuint uiFirstInst,
+                        GLuint uiIL_Shift,
+                        GLuint uiNumberInsts,
+                        struct prog_instruction *pILInst, 
+						r700_AssemblerBase *pR700AsmCode)
+{
+    GLuint i;
+
+    pR700AsmCode->pILInst = pILInst;
+	for(i=uiFirstInst; i<uiNumberInsts; i++)
+    {
+        pR700AsmCode->uiCurInst = i;
+
+#ifndef USE_CF_FOR_CONTINUE_BREAK
+        if(OPCODE_BRK == pILInst[i+1].Opcode)
+        {
+            switch(pILInst[i].Opcode)            
+            {
+            case OPCODE_SLE:
+                pILInst[i].Opcode = OPCODE_SGT;
+                break;
+            case OPCODE_SLT:
+                pILInst[i].Opcode = OPCODE_SGE;
+                break;
+            case OPCODE_SGE:
+                pILInst[i].Opcode = OPCODE_SLT;
+                break;
+            case OPCODE_SGT:
+                pILInst[i].Opcode = OPCODE_SLE;
+                break;
+            case OPCODE_SEQ:
+                pILInst[i].Opcode = OPCODE_SNE;
+                break;
+            case OPCODE_SNE:
+                pILInst[i].Opcode = OPCODE_SEQ;
+                break;
+            default:
+                break;
+            }
+        }
+#endif
+        if(pILInst[i].CondUpdate == 1)
+        {
+            /* remember dest register used for cond evaluation */
+            /* XXX also handle PROGRAM_OUTPUT registers here? */
+            pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; 
+        }
+
+        switch (pILInst[i].Opcode)
+        {
+        case OPCODE_ABS: 
+            if ( GL_FALSE == assemble_ABS(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_ADD: 
+        case OPCODE_SUB: 
+            if ( GL_FALSE == assemble_ADD(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_ARL: 
+            if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_ARR: 
+            radeon_error("Not yet implemented instruction OPCODE_ARR \n");
+            //if ( GL_FALSE == assemble_BAD("ARR") ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_CMP: 
+            if ( GL_FALSE == assemble_CMP(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_COS: 
+            if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_DP3: 
+        case OPCODE_DP4: 
+        case OPCODE_DPH: 
+            if ( GL_FALSE == assemble_DOT(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_DST: 
+            if ( GL_FALSE == assemble_DST(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_EX2: 
+            if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_EXP: 
+            if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_FLR:     
+            if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        //case OP_FLR_INT: ;
+
+        //    if ( GL_FALSE == assemble_FLR_INT() ) 
+        //        return GL_FALSE;
+        //    break;  
+
+        case OPCODE_FRC: 
+            if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_KIL: 
+        case OPCODE_KIL_NV: 
+            if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_LG2: 
+            if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_LIT:
+            if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+        case OPCODE_LRP: 
+            if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_LOG: 
+            if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_MAD: 
+            if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_MAX: 
+            if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_MIN: 
+            if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_MOV: 
+            if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_MUL: 
+            if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+            
+        case OPCODE_NOISE1:
+            {                                               
+                callPreSub(pR700AsmCode, 
+                           GLSL_NOISE1,                         
+                           &noise1_presub,                                                  
+                           pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, 
+                           1); 
+                radeon_error("noise1: not yet supported shader instruction\n");
+            };
+            break; 
+        case OPCODE_NOISE2: 
+            radeon_error("noise2: not yet supported shader instruction\n");
+            break; 
+        case OPCODE_NOISE3: 
+            radeon_error("noise3: not yet supported shader instruction\n");
+            break; 
+        case OPCODE_NOISE4: 
+            radeon_error("noise4: not yet supported shader instruction\n");
+            break; 
+
+        case OPCODE_POW: 
+            if ( GL_FALSE == assemble_POW(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_RCP: 
+            if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_RSQ: 
+            if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_SIN: 
+            if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) 
+                return GL_FALSE;
+            break;  
+        case OPCODE_SCS: 
+            if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) 
+                return GL_FALSE;
+            break; 
+            
+        case OPCODE_SEQ:
+            if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) 
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        case OPCODE_SGT: 
+            if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) 
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        case OPCODE_SGE: 
+            if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) 
+            { 
+                return GL_FALSE;
+            }
+            break;
+        
+        /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
+        case OPCODE_SLT: 
+            {
+                struct prog_src_register SrcRegSave[2];
+                SrcRegSave[0] = pILInst[i].SrcReg[0];
+                SrcRegSave[1] = pILInst[i].SrcReg[1];
+                pILInst[i].SrcReg[0] = SrcRegSave[1];
+                pILInst[i].SrcReg[1] = SrcRegSave[0];
+                if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) 
+                {
+                    pILInst[i].SrcReg[0] = SrcRegSave[0];
+                    pILInst[i].SrcReg[1] = SrcRegSave[1];
+                    return GL_FALSE;
+                }
+                pILInst[i].SrcReg[0] = SrcRegSave[0];
+                pILInst[i].SrcReg[1] = SrcRegSave[1];
+            }
+            break;
+
+        case OPCODE_SLE: 
+            {
+                struct prog_src_register SrcRegSave[2];
+                SrcRegSave[0] = pILInst[i].SrcReg[0];
+                SrcRegSave[1] = pILInst[i].SrcReg[1];
+                pILInst[i].SrcReg[0] = SrcRegSave[1];
+                pILInst[i].SrcReg[1] = SrcRegSave[0];
+                if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) 
+                {
+                    pILInst[i].SrcReg[0] = SrcRegSave[0];
+                    pILInst[i].SrcReg[1] = SrcRegSave[1];
+                    return GL_FALSE;
+                }
+                pILInst[i].SrcReg[0] = SrcRegSave[0];
+                pILInst[i].SrcReg[1] = SrcRegSave[1];
+            }
+            break;
+
+        case OPCODE_SNE: 
+            if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) 
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        //case OP_STP: 
+        //    if ( GL_FALSE == assemble_STP(pR700AsmCode) ) 
+        //        return GL_FALSE;
+        //    break;
+
+        case OPCODE_SWZ: 
+            if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) 
+            {
+                return GL_FALSE; 
+            }
+            else
+            {
+                if( (i+1)<uiNumberInsts )
+                {
+                    if(OPCODE_END != pILInst[i+1].Opcode)
+                    {
+                        if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
+                        {
+                            pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
+                        }
+                    }
+                }
+            }
+            break;
+        case OPCODE_DDX:
+        case OPCODE_DDY:
+        case OPCODE_TEX: 
+        case OPCODE_TXB:  
+        case OPCODE_TXP: 
+            if ( GL_FALSE == assemble_TEX(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_TRUNC:
+            if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
+                return GL_FALSE;
+            break;
+
+        case OPCODE_XPD: 
+            if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;  
+
+        case OPCODE_IF:
+            {                
+                GLboolean bHasElse = GL_FALSE;
+
+                if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
+                {
+                    bHasElse = GL_TRUE;
+                }
+
+                if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) ) 
+                {
+                    return GL_FALSE;
+                }
+            }
+            break;
+
+        case OPCODE_ELSE : 
+            if ( GL_FALSE == assemble_ELSE(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_ENDIF: 
+            if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) 
+                return GL_FALSE;
+            break;
+
+        case OPCODE_BGNLOOP:
+            if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        case OPCODE_BRK:
+            if( GL_FALSE == assemble_BRK(pR700AsmCode) )
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        case OPCODE_CONT:
+            if( GL_FALSE == assemble_CONT(pR700AsmCode) )
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        case OPCODE_ENDLOOP:
+            if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        case OPCODE_BGNSUB:
+            if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
+            {
+                return GL_FALSE;
+            }
+            break;
+        
+        case OPCODE_RET:
+            if( GL_FALSE == assemble_RET(pR700AsmCode) )
+            {
+                return GL_FALSE;
+            }
+            break;
+        
+        case OPCODE_CAL:
+            if( GL_FALSE == assemble_CAL(pR700AsmCode, 
+                                         pILInst[i].BranchTarget,
+                                         uiIL_Shift,
+                                         uiNumberInsts,
+                                         pILInst,
+                                         NULL) )
+            {
+                return GL_FALSE;
+            }
+            break;
+
+        //case OPCODE_EXPORT: 
+        //    if ( GL_FALSE == assemble_EXPORT() ) 
+        //        return GL_FALSE;
+        //    break;
+
+        case OPCODE_ENDSUB:
+            return assemble_ENDSUB(pR700AsmCode);
+
+        case OPCODE_END: 
+			//pR700AsmCode->uiCurInst = i;
+			//This is to remaind that if in later exoort there is depth/stencil
+			//export, we need a mov to re-arrange DST channel, where using a
+			//psuedo inst, we will use this end inst to do it.
+            return GL_TRUE;
+
+        default:
+            radeon_error("internal: unknown instruction\n");
+            return GL_FALSE;
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
+{
+    setRetInLoopFlag(pAsm, SQ_SEL_0);
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    return GL_TRUE;
+}
+
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
+{
+    GLuint i;
+    GLuint unCFoffset;
+    TypedShaderList * plstCFmain;
+    TypedShaderList * plstCFsub;
+
+    R700ShaderInstruction *        pInst;
+    R700ControlFlowGenericClause * pCFInst;
+
+    R700ControlFlowALUClause * pCF_ALU;
+    R700ALUInstruction       * pALU;
+    GLuint                     unConstOffset = 0;
+    GLuint                     unRegOffset;
+    GLuint                     unMinRegIndex;
+
+    plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
+
+    /* remove flags init if they are not used */
+    if((pAsm->unCFflags & HAS_LOOPRET) == 0)
+    {
+        R700ControlFlowALUClause * pCF_ALU;
+        pInst = plstCFmain->pHead;
+        while(pInst)
+        {
+            if(SIT_CF_ALU == pInst->m_ShaderInstType)
+            {
+                pCF_ALU = (R700ControlFlowALUClause *)pInst;
+                if(0 == pCF_ALU->m_Word1.f.count)
+                {
+                    pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
+                }
+                else
+                {
+                    R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
+                    
+                    pALU->m_pLinkedALUClause = NULL;
+                    pALU = (R700ALUInstruction *)(pALU->pNextInst);
+                    pALU->m_pLinkedALUClause = pCF_ALU;
+                    pCF_ALU->m_pLinkedALUInstruction = pALU;
+
+                    pCF_ALU->m_Word1.f.count--;
+                }
+                break;
+            }
+            pInst = pInst->pNextInst;
+        };
+    }
+
+    if(pAsm->CALLSTACK[0].max > 0)
+    {
+        pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
+    }
+
+    if(0 == pAsm->unSubArrayPointer)
+    {
+        return GL_TRUE;
+    }
+
+    unCFoffset = plstCFmain->uNumOfNode;
+
+    if(NULL != pILProg->Parameters)
+    {        
+        unConstOffset = pILProg->Parameters->NumParameters;
+    }
+
+    /* Reloc subs */
+    for(i=0; i<pAsm->unSubArrayPointer; i++)
+    {
+        pAsm->subs[i].unCFoffset = unCFoffset;
+        plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
+
+        pInst = plstCFsub->pHead;
+
+        /* reloc instructions */
+        while(pInst)
+        {
+            if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
+            {
+                pCFInst = (R700ControlFlowGenericClause *)pInst;
+
+                switch (pCFInst->m_Word1.f.cf_inst)
+                {
+                case SQ_CF_INST_POP:
+                case SQ_CF_INST_JUMP:
+                case SQ_CF_INST_ELSE:
+                case SQ_CF_INST_LOOP_END:
+                case SQ_CF_INST_LOOP_START:
+                case SQ_CF_INST_LOOP_START_NO_AL:
+                case SQ_CF_INST_LOOP_CONTINUE:
+                case SQ_CF_INST_LOOP_BREAK:
+                    pCFInst->m_Word0.f.addr += unCFoffset;
+                    break;
+                default:
+                    break;
+                }
+            }  
+            
+            pInst->m_uIndex += unCFoffset;
+
+            pInst = pInst->pNextInst;
+        };
+
+        if(NULL != pAsm->subs[i].pPresubDesc)
+        {
+            GLuint                     uNumSrc;            
+            
+            unMinRegIndex  = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
+            unRegOffset    = pAsm->subs[i].pPresubDesc->maxStartReg;            
+            unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
+
+            pInst = plstCFsub->pHead;
+            while(pInst)
+            {
+                if(SIT_CF_ALU == pInst->m_ShaderInstType)
+                {
+                    pCF_ALU = (R700ControlFlowALUClause *)pInst;
+
+                    pALU = pCF_ALU->m_pLinkedALUInstruction;
+                    for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+                    {
+                        pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+
+                        if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
+                        {   
+                            pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+                        }
+                        else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
+                        {   
+                            pALU->m_Word0.f.src0_sel += unConstOffset;
+                        }
+
+                        if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) 
+                            >= SQ_OP3_INST_MUL_LIT )
+                        {   /* op3 : 3 srcs */
+                            if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
+                            {   
+                                pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
+                            }
+                            else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
+                            {   
+                                pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
+                            }    
+                            if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+                            {   
+                                pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+                            }
+                            else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+                            {   
+                                pALU->m_Word0.f.src1_sel += unConstOffset;
+                            }                                 
+                        }
+                        else
+                        {
+                            if(pAsm->bR6xx)
+                            {
+                                uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+                            }
+                            else
+                            {
+                                uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+                            }
+                            if(2 == uNumSrc)
+                            {   /* 2 srcs */
+                                if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+                                {   
+                                    pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+                                }
+                                else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+                                {   
+                                    pALU->m_Word0.f.src1_sel += unConstOffset;
+                                }                                  
+                            }                            
+                        }
+                        pALU = (R700ALUInstruction*)(pALU->pNextInst);
+                    }                    
+                }             
+                pInst = pInst->pNextInst;
+            };
+        }
+
+        /* Put sub into main */
+        plstCFmain->pTail->pNextInst = plstCFsub->pHead;
+        plstCFmain->pTail            = plstCFsub->pTail;
+        plstCFmain->uNumOfNode      += plstCFsub->uNumOfNode;
+
+        unCFoffset += plstCFsub->uNumOfNode;
+    }
+
+    /* reloc callers */
+    for(i=0; i<pAsm->unCallerArrayPointer; i++)
+    {
+        pAsm->callers[i].cf_ptr->m_Word0.f.addr
+            = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; 
+
+        if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
+        {                 
+            unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
+            unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
+
+            if(NULL != pAsm->callers[i].prelude_cf_ptr)
+            {                
+                pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
+                pALU = pCF_ALU->m_pLinkedALUInstruction;
+                for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+                {
+                    pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+                    pALU = (R700ALUInstruction*)(pALU->pNextInst);
+                }
+            }
+            if(NULL != pAsm->callers[i].finale_cf_ptr)
+            {
+                pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
+                pALU = pCF_ALU->m_pLinkedALUInstruction;
+                for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+                {
+                    pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+                    pALU = (R700ALUInstruction*)(pALU->pNextInst);
+                }
+            }
+        }
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean callPreSub(r700_AssemblerBase* pAsm, 
+                         LOADABLE_SCRIPT_SIGNITURE scriptSigniture,                          
+                         COMPILED_SUB * pCompiledSub,                                               
+                         GLshort uOutReg,
+                         GLshort uNumValidSrc)
+{
+    /* save assemble context */
+    GLuint starting_temp_register_number_save;
+    GLuint number_used_registers_save;
+    GLuint uFirstHelpReg_save;
+    GLuint uHelpReg_save;
+    GLuint uiCurInst_save;
+    struct prog_instruction *pILInst_save;
+    PRESUB_DESC * pPresubDesc;
+    GLboolean     bRet;
+    int i;
+
+    R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
+
+    /* copy srcs to presub inputs */
+    pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    for(i=0; i<uNumValidSrc; i++)
+    {
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+        setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+        pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+        pAsm->D.dst.reg   = pCompiledSub->srcRegIndex[i];
+        pAsm->D.dst.writex = 1;
+        pAsm->D.dst.writey = 1;
+        pAsm->D.dst.writez = 1;
+        pAsm->D.dst.writew = 1;
+
+        if( GL_FALSE == assemble_src(pAsm, i, 0) )
+        {
+            return GL_FALSE;
+        }
+
+        next_ins(pAsm);
+    }
+    if(uNumValidSrc > 0)
+    {
+        prelude_cf_ptr     = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
+        pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    }
+
+    /* browse thro existing presubs. */
+    for(i=0; i<pAsm->unNumPresub; i++)
+    {
+        if(pAsm->presubs[i].sptSigniture == scriptSigniture)
+        {
+            break;
+        }
+    }
+
+    if(i == pAsm->unNumPresub)
+    {   /* not loaded yet */
+        /* save assemble context */
+        number_used_registers_save         = pAsm->number_used_registers;
+        uFirstHelpReg_save                 = pAsm->uFirstHelpReg;
+        uHelpReg_save                      = pAsm->uHelpReg;
+        starting_temp_register_number_save = pAsm->starting_temp_register_number;
+        pILInst_save                       = pAsm->pILInst;
+        uiCurInst_save                     = pAsm->uiCurInst;
+
+        /* alloc in presub */
+        if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
+        {
+            pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
+                                      sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
+                                      sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
+            if(NULL == pAsm->presubs)
+            {
+                radeon_error("No memeory to allocate built in shader function description structures. \n");
+                return GL_FALSE;
+            }
+            pAsm->unPresubArraySize += 4;
+        }
+        
+        pPresubDesc = &(pAsm->presubs[i]);
+        pPresubDesc->sptSigniture = scriptSigniture;
+
+        /* constants offsets need to be final resolved at reloc. */
+        if(0 == pAsm->unNumPresub)
+        {
+            pPresubDesc->unConstantsStart = 0; 
+        }
+        else
+        {
+            pPresubDesc->unConstantsStart =  pAsm->presubs[i-1].unConstantsStart
+                                           + pAsm->presubs[i-1].pCompiledSub->NumParameters;
+        }
+
+        pPresubDesc->pCompiledSub = pCompiledSub;
+
+        pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
+        pPresubDesc->maxStartReg  = uFirstHelpReg_save;
+        pAsm->unCurNumILInsts    += pCompiledSub->NumInstructions;
+
+        pAsm->unNumPresub++;
+
+        /* setup new assemble context */
+        pAsm->starting_temp_register_number = 0;
+        pAsm->number_used_registers = pCompiledSub->NumTemporaries;
+        pAsm->uFirstHelpReg         = pAsm->number_used_registers;
+        pAsm->uHelpReg              = pAsm->uFirstHelpReg;
+
+        bRet = assemble_CAL(pAsm, 
+                            0, 
+                            pPresubDesc->subIL_Shift, 
+                            pCompiledSub->NumInstructions,
+                            pCompiledSub->Instructions,
+                            pPresubDesc);
+
+        
+        pPresubDesc->number_used_registers = pAsm->number_used_registers;        
+
+        /* restore assemble context */
+        pAsm->number_used_registers         = number_used_registers_save; 
+        pAsm->uFirstHelpReg                 = uFirstHelpReg_save;
+        pAsm->uHelpReg                      = uHelpReg_save;
+        pAsm->starting_temp_register_number = starting_temp_register_number_save;
+        pAsm->pILInst                       = pILInst_save; 
+        pAsm->uiCurInst                     = uiCurInst_save;
+    }
+    else
+    {   /* was loaded */
+        pPresubDesc = &(pAsm->presubs[i]);  
+        
+        bRet = assemble_CAL(pAsm, 
+                            0, 
+                            pPresubDesc->subIL_Shift, 
+                            pCompiledSub->NumInstructions,
+                            pCompiledSub->Instructions,
+                            pPresubDesc);
+    }
+
+    if(GL_FALSE == bRet)
+    {
+        radeon_error("Shader presub assemble failed. \n");
+    }
+    else
+    {
+        /* copy presub output to real dst */ 
+        pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+        pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+        if( GL_FALSE == assemble_dst(pAsm) )
+        {
+            return GL_FALSE;
+        }
+
+        setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+        pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+        pAsm->S[0].src.reg   = pCompiledSub->dstRegIndex;
+        pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
+        pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
+        pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
+        pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
+
+        next_ins(pAsm);        
+
+        pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr  = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
+        pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
+        pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+    }
+
+    if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
+    {
+        pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
+    }
+    if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
+    {
+        pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
+    }
+
+    return bRet;
+}
+
+GLboolean Process_Export(r700_AssemblerBase* pAsm,
+                         GLuint type,
+                         GLuint export_starting_index,
+                         GLuint export_count, 
+                         GLuint starting_register_number,
+                         GLboolean is_depth_export)
+{
+    unsigned char ucWriteMask;
+
+    check_current_clause(pAsm, CF_EMPTY_CLAUSE);
+    check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
+
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
+
+    switch (type) 
+    {
+        case SQ_EXPORT_PIXEL:
+            if(GL_TRUE == is_depth_export) 
+            {
+                pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_Z;
+            }
+            else 
+            {
+                pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_PIXEL_MRT0 + export_starting_index;
+            }
+            break;
+
+        case SQ_EXPORT_POS:
+            pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = SQ_CF_POS_0 + export_starting_index; 
+            break;
+
+        case SQ_EXPORT_PARAM:
+            pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base  = 0x0 + export_starting_index; 
+            break;
+
+        default:
+            radeon_error("Unknown export type: %d\n", type);
+            return GL_FALSE;
+            break;
+    }
+
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr      = starting_register_number;
+
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel      = SQ_ABSOLUTE;
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr   = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size   = 0x3; 
+
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count      = (export_count - 1);
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program   = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst          = SQ_CF_INST_EXPORT;  // _DONE
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode  = 0x0;
+    pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier          = 0x1;
+
+    if (export_count == 1) 
+    {
+        assert(starting_register_number >= pAsm->starting_export_register_number);
+
+        ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
+	/* exports Z as a float into Red channel */
+	if (GL_TRUE == is_depth_export)
+	    ucWriteMask = 0x1;
+
+        if( (ucWriteMask & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
+        }
+        if( ((ucWriteMask>>1) & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
+        }
+        if( ((ucWriteMask>>2) & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
+        }
+        if( ((ucWriteMask>>3) & 0x1) != 0)
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
+        }
+        else
+        {
+            pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
+        }
+    }
+    else 
+    {
+        // This should only be used if all components for all registers have been written
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
+        pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
+    }
+
+    pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
+
+    return GL_TRUE;
+}
+
+GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
+{
+	gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
+    pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
+
+    // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
+
+    pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+    setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+    pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+    pAsm->D.dst.reg   = pAsm->depth_export_register_number;
+
+    pAsm->D.dst.writex = 1;   // depth          goes in R channel for HW                       
+
+    setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+    pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+    pAsm->S[0].src.reg   = pAsm->depth_export_register_number;
+
+    setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
+
+    noneg_PVSSRC(&(pAsm->S[0].src));
+
+    if( GL_FALSE == next_ins(pAsm) )
+    {
+        return GL_FALSE;
+    }
+
+    pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
+
+    return GL_TRUE;
+}
+ 
+GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
+                                   GLbitfield          OutputsWritten)  
+{ 
+    unsigned int unBit;
+    GLuint export_count = 0;
+    unsigned int i;
+
+    if(pR700AsmCode->depth_export_register_number >= 0) 
+    {
+        if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) )  // depth
+		{
+			return GL_FALSE;
+		}
+    }
+
+    for (i = 0; i < FRAG_RESULT_MAX; ++i)
+    {
+        unBit = 1 << i;
+
+        if (OutputsWritten & unBit)
+        {
+            GLboolean is_depth = i == FRAG_RESULT_DEPTH ? GL_TRUE : GL_FALSE;
+            if (!Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->uiFP_OutputMap[i], is_depth))
+                return GL_FALSE;
+            ++export_count;
+        }
+    }
+
+    /* Need to export something, otherwise we'll hang
+     * results are undefined anyway */
+    if(export_count == 0)
+    {
+        Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+    }
+    
+    if(pR700AsmCode->cf_last_export_ptr != NULL) 
+    {
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst        = SQ_CF_INST_EXPORT_DONE;
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+    }
+
+    return GL_TRUE;
+}
+
+GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
+                                 GLbitfield          OutputsWritten)  
+{
+    unsigned int unBit;
+    unsigned int i;
+
+    GLuint export_starting_index  = 0;
+    GLuint export_count           = pR700AsmCode->number_of_exports;
+
+    unBit = 1 << VERT_RESULT_HPOS;
+	if(OutputsWritten & unBit)
+	{
+        if( GL_FALSE == Process_Export(pR700AsmCode, 
+                                       SQ_EXPORT_POS, 
+                                       export_starting_index, 
+                                       1, 
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+        export_starting_index++;
+        export_count--;
+        }
+
+    unBit = 1 << VERT_RESULT_PSIZ;
+    if(OutputsWritten & unBit)
+    {
+        if( GL_FALSE == Process_Export(pR700AsmCode,
+                                       SQ_EXPORT_POS,
+                                       export_starting_index,
+                                       1,
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_PSIZ],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+        export_count--;
+    }
+
+    pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+
+
+    pR700AsmCode->number_of_exports = export_count;
+    export_starting_index = 0;
+
+	unBit = 1 << VERT_RESULT_COL0;
+	if(OutputsWritten & unBit)
+	{
+        if( GL_FALSE == Process_Export(pR700AsmCode, 
+                                       SQ_EXPORT_PARAM, 
+                                       export_starting_index, 
+                                       1, 
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+
+        export_starting_index++;
+	}
+
+	unBit = 1 << VERT_RESULT_COL1;
+	if(OutputsWritten & unBit)
+	{
+        if( GL_FALSE == Process_Export(pR700AsmCode, 
+                                       SQ_EXPORT_PARAM, 
+                                       export_starting_index, 
+                                       1, 
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+
+        export_starting_index++;
+	}
+
+        unBit = 1 << VERT_RESULT_FOGC;
+        if(OutputsWritten & unBit)
+        {
+        if( GL_FALSE == Process_Export(pR700AsmCode,
+                                       SQ_EXPORT_PARAM,
+                                       export_starting_index,
+                                       1,
+                                       pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+
+        export_starting_index++;
+        }
+
+	for(i=0; i<8; i++)
+	{
+		unBit = 1 << (VERT_RESULT_TEX0 + i);
+		if(OutputsWritten & unBit)
+		{
+            if( GL_FALSE == Process_Export(pR700AsmCode,
+                                          SQ_EXPORT_PARAM, 
+                                          export_starting_index, 
+                                          1, 
+                                          pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
+                                          GL_FALSE) )
+            {
+                return GL_FALSE;
+            }
+
+            export_starting_index++;
+		}
+	}
+    
+    for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+	{
+        unBit = 1 << i;
+        if(OutputsWritten & unBit)
+		{
+            if( GL_FALSE == Process_Export(pR700AsmCode,
+                                          SQ_EXPORT_PARAM, 
+                                          export_starting_index, 
+                                          1, 
+                                          pR700AsmCode->ucVP_OutputMap[i],
+                                          GL_FALSE) )
+            {                
+                return GL_FALSE;
+            }
+
+            export_starting_index++;
+		}
+    }
+
+    // At least one param should be exported
+    if (export_count) 
+    {
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;    
+    }
+    else
+    {
+        if( GL_FALSE == Process_Export(pR700AsmCode,
+                                       SQ_EXPORT_PARAM, 
+                                       0, 
+                                       1, 
+                                       pR700AsmCode->starting_export_register_number,
+                                       GL_FALSE) )
+        {
+            return GL_FALSE;
+        }
+      
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
+        pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
+        pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+    }
+
+    pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+
+    return GL_TRUE;
+}
+
+GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
+{
+    FREE(pR700AsmCode->pucOutMask);
+    FREE(pR700AsmCode->pInstDeps);
+
+    if(NULL != pR700AsmCode->subs)
+    {
+        FREE(pR700AsmCode->subs);
+    }
+    if(NULL != pR700AsmCode->callers)
+    {
+        FREE(pR700AsmCode->callers);
+    }
+
+    if(NULL != pR700AsmCode->presubs)
+    {
+        FREE(pR700AsmCode->presubs);
+    }
+
+    return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
new file mode 100644
index 0000000000..2d3c32487e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -0,0 +1,682 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_ASSEMBLER_H_
+#define _R700_ASSEMBLER_H_
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+
+#include "r700_chip.h"
+#include "r700_shaderinst.h"
+#include "r700_shader.h"
+
+typedef enum LOADABLE_SCRIPT_SIGNITURE
+{
+    GLSL_NOISE1 = 0x10000001,
+    GLSL_NOISE2 = 0x10000002,
+    GLSL_NOISE3 = 0x10000003,
+    GLSL_NOISE4 = 0x10000004
+}LOADABLE_SCRIPT_SIGNITURE;
+
+typedef struct COMPILED_SUB
+{
+    struct  prog_instruction *Instructions;
+    GLuint  NumInstructions;
+    GLuint  NumTemporaries;
+    GLuint  NumParameters;
+    GLuint  MinRegIndex;
+    GLfloat (*ParameterValues)[4];
+    GLbyte  outputSwizzleX; 
+    GLbyte  outputSwizzleY;
+    GLbyte  outputSwizzleZ;
+    GLbyte  outputSwizzleW;
+    GLshort srcRegIndex[3];
+    GLushort dstRegIndex;
+}COMPILED_SUB;
+
+typedef struct PRESUB_DESCtag 
+{
+    LOADABLE_SCRIPT_SIGNITURE sptSigniture;
+    GLint  subIL_Shift;
+    struct prog_src_register InReg[3];
+    struct prog_dst_register OutReg;
+
+    GLushort maxStartReg;
+    GLushort number_used_registers;
+
+    GLuint   unConstantsStart;
+
+    COMPILED_SUB * pCompiledSub;
+} PRESUB_DESC;
+
+typedef enum SHADER_PIPE_TYPE 
+{
+    SPT_VP = 0,
+    SPT_FP = 1
+} SHADER_PIPE_TYPE;
+
+typedef enum ConstantCycles 
+{
+    NUMBER_OF_CYCLES     = 3,
+    NUMBER_OF_COMPONENTS = 4
+} ConstantCycles;
+
+typedef enum  HARDWARE_LIMIT_VALUES  
+{
+   TEMPORARY_REGISTER_OFFSET = SQ_ALU_SRC_GPR_BASE,
+   MAX_TEMPORARY_REGISTERS   = SQ_ALU_SRC_GPR_SIZE,
+   MAX_CONSTANT_REGISTERS    = SQ_ALU_SRC_CFILE_SIZE,
+   CFILE_REGISTER_OFFSET     = SQ_ALU_SRC_CFILE_BASE,
+   NUMBER_OF_INPUT_COLORS    = 2,
+   NUMBER_OF_OUTPUT_COLORS   = 8,
+   NUMBER_OF_TEXTURE_UNITS   = 16,
+   MEGA_FETCH_BYTES          = 32
+} HARDWARE_LIMIT_VALUES;
+
+typedef enum AddressMode 
+{
+    ADDR_ABSOLUTE          = 0,
+    ADDR_RELATIVE_A0       = 1,
+    ADDR_RELATIVE_FLI_0    = 2,
+    NUMBER_OF_ADDR_MOD     = 3
+} AddressMode;
+
+typedef enum SrcRegisterType 
+{
+    SRC_REG_TEMPORARY      = 0,
+    SRC_REG_INPUT          = 1,
+    SRC_REG_CONSTANT       = 2,
+    SRC_REG_ALT_TEMPORARY  = 3,
+    SRC_REC_LITERAL        = 4, 
+    NUMBER_OF_SRC_REG_TYPE = 5
+} SrcRegisterType;
+
+typedef enum DstRegisterType 
+{
+    DST_REG_TEMPORARY      = 0,
+    DST_REG_A0             = 1,
+    DST_REG_OUT            = 2,
+    DST_REG_OUT_X_REPL     = 3,
+    DST_REG_ALT_TEMPORARY  = 4,
+    DST_REG_INPUT          = 5,
+    NUMBER_OF_DST_REG_TYPE = 6
+} DstRegisterType;
+
+typedef unsigned int BITS;
+
+typedef struct PVSDSTtag 
+{
+	BITS opcode:8;     //(:6)  //@@@ really should be 10 bits for OP2
+	BITS math:1;
+	BITS predicated:1; //10   //8
+	BITS pred_inv  :1; //11   //8
+
+	BITS rtype:3;
+	BITS reg:10;       //24   //20
+
+	BITS writex:1;
+	BITS writey:1;
+	BITS writez:1;
+	BITS writew:1;     //28
+
+	BITS op3:1;       // 29  Represents *_OP3_* ALU opcode
+
+	BITS dualop:1;    // 30  //26
+
+	BITS addrmode0:1; //31   //29
+	BITS addrmode1:1; //32
+} PVSDST;
+
+typedef struct PVSINSTtag
+{
+    BITS literal_slots      :2; 
+    BITS SaturateMode :2; 
+    BITS index_mode   :3;
+} PVSINST;
+
+typedef struct PVSSRCtag 
+{
+	BITS rtype:3;            
+	BITS addrmode0:1;        
+	BITS reg:10;      //14     (8)
+	BITS swizzlex:3;
+	BITS swizzley:3;
+	BITS swizzlez:3;
+	BITS swizzlew:3;  //26        
+
+	BITS abs:1;
+	BITS negx:1;
+	BITS negy:1;
+	BITS negz:1;
+	BITS negw:1;      //31
+	//BITS addrsel:2;
+	BITS addrmode1:1; //32
+} PVSSRC;
+
+typedef struct PVSMATHtag 
+{
+	BITS rtype:4;
+	BITS spare:1;
+	BITS reg:8;
+	BITS swizzlex:3;
+	BITS swizzley:3;
+	BITS dstoff:2; // 2 bits of dest offset into alt ram
+	BITS opcode:4;
+	BITS negx:1;
+	BITS negy:1;
+	BITS dstcomp:2; // select dest component
+	BITS spare2:3;
+} PVSMATH;
+
+typedef union PVSDWORDtag 
+{
+	BITS    bits;
+	PVSDST  dst;
+    PVSINST dst2;
+	PVSSRC  src;
+	PVSMATH math;
+	float   f;
+} PVSDWORD;
+
+typedef struct VAP_OUT_VTX_FMT_0tag 
+{
+	BITS pos:1;      // 0
+	BITS misc:1;
+	BITS clip_dist0:1;
+	BITS clip_dist1:1;
+	BITS pos_param:1; // 4
+
+	BITS color0:1;    // 5
+	BITS color1:1;
+	BITS color2:1;
+	BITS color3:1;
+	BITS color4:1;
+	BITS color5:1;
+	BITS color6:1;
+	BITS color7:1;
+
+	BITS normal:1;    
+
+	BITS depth:1;          // 14
+
+	BITS point_size:1;     // 15   
+	BITS edge_flag:1;      
+	BITS rta_index:1;      //     shares same channel as kill_flag
+	BITS kill_flag:1;
+	BITS viewport_index:1; // 19   
+
+	BITS resvd1:12;        // 20
+} VAP_OUT_VTX_FMT_0;
+
+typedef struct VAP_OUT_VTX_FMT_1tag 
+{
+	BITS tex0comp:3;
+	BITS tex1comp:3;
+	BITS tex2comp:3;
+	BITS tex3comp:3;
+	BITS tex4comp:3;
+	BITS tex5comp:3;
+	BITS tex6comp:3;
+	BITS tex7comp:3;
+
+	BITS resvd:8;
+} VAP_OUT_VTX_FMT_1;
+
+typedef struct VAP_OUT_VTX_FMT_2tag 
+{
+	BITS tex8comp :3;
+	BITS tex9comp :3;
+	BITS tex10comp:3;
+	BITS tex11comp:3;
+	BITS tex12comp:3;
+	BITS tex13comp:3;
+	BITS tex14comp:3;
+	BITS tex15comp:3;
+
+	BITS resvd:8;
+} VAP_OUT_VTX_FMT_2;
+
+typedef struct OUT_FRAGMENT_FMT_0tag 
+{
+	BITS color0:1;
+	BITS color1:1;
+	BITS color2:1;
+	BITS color3:1;
+	BITS color4:1;
+	BITS color5:1;
+	BITS color6:1;
+	BITS color7:1;
+
+	BITS depth:1;
+	BITS stencil_ref:1;
+	BITS coverage_to_mask:1;
+	BITS mask:1;
+
+	BITS resvd1:20;
+} OUT_FRAGMENT_FMT_0;
+
+typedef enum  CF_CLAUSE_TYPE 
+{
+   CF_EXPORT_CLAUSE,
+   CF_ALU_CLAUSE,
+   CF_TEX_CLAUSE,
+   CF_VTX_CLAUSE,
+   CF_OTHER_CLAUSE,
+   CF_EMPTY_CLAUSE,
+   NUMBER_CF_CLAUSE_TYPES
+} CF_CLAUSE_TYPE;
+
+enum 
+{
+    MAX_BOOL_CONSTANTS   = 32,
+    MAX_INT_CONSTANTS    = 32,
+    MAX_FLOAT_CONSTANTS  = 256,
+
+    FC_NONE = 0,
+    FC_IF = 1,
+    FC_LOOP = 2,
+    FC_REP = 3,
+    FC_PUSH_VPM = 4,
+    FC_PUSH_WQM = 5,
+
+    COND_NONE = 0,
+    COND_BOOL = 1,
+    COND_PRED = 2,
+    COND_ALU = 3,
+
+    SAFEDIST_TEX = 6, ///< safe distance for using result of texture lookup in alu or another tex lookup
+    SAFEDIST_ALU = 6 ///< the same for alu->fc
+};
+
+typedef struct FC_LEVEL 
+{
+    R700ControlFlowGenericClause *  first;
+    R700ControlFlowGenericClause ** mid;
+    unsigned int unNumMid;
+    unsigned int midLen;
+    unsigned int type;
+    unsigned int cond;
+    unsigned int inv;
+    int id; ///< id of bool or int variable
+} FC_LEVEL;
+
+typedef struct VTX_FETCH_METHOD 
+{
+    GLboolean bEnableMini;
+    GLuint mega_fetch_remainder;
+} VTX_FETCH_METHOD;
+
+typedef struct SUB_OFFSET
+{
+    GLint  subIL_Offset;
+    GLuint unCFoffset;
+    GLuint unStackDepthMax;
+    PRESUB_DESC *   pPresubDesc;
+    TypedShaderList lstCFInstructions_local;
+} SUB_OFFSET;
+
+typedef struct CALLER_POINTER
+{
+    GLint  subIL_Offset;
+    GLint  subDescIndex;
+    R700ControlFlowGenericClause* cf_ptr;
+
+    R700ControlFlowGenericClause* prelude_cf_ptr;
+    R700ControlFlowGenericClause* finale_cf_ptr;
+} CALLER_POINTER;
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+
+typedef struct CALL_LEVEL
+{
+    unsigned int      FCSP_BeforeEntry;
+    GLint             subDescIndex;
+    GLushort          current;
+    GLushort          max;
+    TypedShaderList * plstCFInstructions_local;
+} CALL_LEVEL;
+
+#define HAS_CURRENT_LOOPRET 0x1L
+#define HAS_LOOPRET         0x2L
+#define LOOPRET_FLAGS       HAS_LOOPRET | HAS_CURRENT_LOOPRET
+
+typedef struct r700_AssemblerBase 
+{
+	R700ControlFlowSXClause*      cf_last_export_ptr;
+	R700ControlFlowSXClause*      cf_current_export_clause_ptr;
+	R700ControlFlowALUClause*     cf_current_alu_clause_ptr;
+	R700ControlFlowGenericClause* cf_current_tex_clause_ptr;
+	R700ControlFlowGenericClause* cf_current_vtx_clause_ptr;
+	R700ControlFlowGenericClause* cf_current_cf_clause_ptr;
+
+    //Result shader
+    R700_Shader * pR700Shader;
+
+	// No clause has been created yet
+	CF_CLAUSE_TYPE cf_current_clause_type;
+
+    BITS alu_x_opcode;
+
+	GLuint number_of_exports;
+	GLuint number_of_colorandz_exports;
+	GLuint number_of_export_opcodes;
+
+	PVSDWORD D;
+    PVSDWORD D2;
+	PVSDWORD S[3];
+        PVSDWORD C[4];
+
+	unsigned int uLastPosUpdate;
+	unsigned int last_cond_register;
+
+	OUT_FRAGMENT_FMT_0     fp_stOutFmt0;
+
+	unsigned int uIIns;
+	unsigned int uOIns;
+	unsigned int number_used_registers;
+	unsigned int uUsedConsts; 
+
+    unsigned int flag_reg_index;
+
+	// Fragment programs
+	unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
+	unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
+	unsigned int uBoolConsts;
+	unsigned int uIntConsts;
+	unsigned int uInsts;
+	unsigned int uConsts;
+
+	// Vertex programs
+	unsigned char ucVP_AttributeMap[VERT_ATTRIB_MAX];
+	unsigned char ucVP_OutputMap[VERT_RESULT_MAX];
+
+    unsigned char * pucOutMask;
+
+	//-----------------------------------------------------------------------------------
+	// flow control members
+	//-----------------------------------------------------------------------------------
+	unsigned int FCSP;
+	FC_LEVEL fc_stack[32];
+
+	//-----------------------------------------------------------------------------------
+	// ArgSubst used in Assemble_Source() function
+	//-----------------------------------------------------------------------------------
+	int aArgSubst[4];
+
+    GLint hw_gpr[ NUMBER_OF_CYCLES ][ NUMBER_OF_COMPONENTS ];
+    GLint hw_cfile_addr[ NUMBER_OF_COMPONENTS ];
+    GLint hw_cfile_chan[ NUMBER_OF_COMPONENTS ];
+
+    GLuint uOutputs;
+  
+    GLint color_export_register_number[NUMBER_OF_OUTPUT_COLORS];
+	GLint depth_export_register_number;
+
+	GLint stencil_export_register_number;
+	GLint coverage_to_mask_export_register_number;
+	GLint mask_export_register_number;
+
+	GLuint starting_export_register_number;
+	GLuint starting_vfetch_register_number;
+	GLuint starting_temp_register_number;
+	GLuint uHelpReg;
+	GLuint uFirstHelpReg;
+
+	GLboolean input_position_is_used;
+	GLboolean input_normal_is_used;
+
+	GLboolean input_color_is_used[NUMBER_OF_INPUT_COLORS];
+  
+	GLboolean input_texture_unit_is_used[NUMBER_OF_TEXTURE_UNITS];
+  
+    R700VertexGenericFetch* vfetch_instruction_ptr_array[VERT_ATTRIB_MAX];
+  
+	GLuint number_of_inputs;
+
+    InstDeps *pInstDeps;
+
+    SHADER_PIPE_TYPE currentShaderType;
+    struct prog_instruction * pILInst;
+    GLuint             uiCurInst;
+    GLubyte SamplerUnits[MAX_SAMPLERS];
+    GLboolean   bR6xx;
+    /* helper to decide which type of instruction to assemble */
+    GLboolean is_tex;
+    /* we inserted helper intructions and need barrier on next TEX ins */ 
+    GLboolean need_tex_barrier; 
+
+    SUB_OFFSET     * subs;
+    GLuint           unSubArraySize;
+    GLuint           unSubArrayPointer;
+    CALLER_POINTER * callers;
+    GLuint           unCallerArraySize;
+    GLuint           unCallerArrayPointer;
+    unsigned int     CALLSP;
+    CALL_LEVEL       CALLSTACK[SQ_MAX_CALL_DEPTH];
+
+    GLuint unCFflags;
+
+    PRESUB_DESC * presubs;
+    GLuint        unPresubArraySize;
+    GLuint        unNumPresub;
+    GLuint        unCurNumILInsts;
+
+    GLuint    unVetTexBits;
+
+    GLuint    shadow_regs[R700_MAX_TEXTURE_UNITS];
+
+} r700_AssemblerBase;
+
+//Internal use
+BITS addrmode_PVSDST(PVSDST * pPVSDST);
+void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode);
+void nomask_PVSDST(PVSDST * pPVSDST);
+BITS addrmode_PVSSRC(PVSSRC* pPVSSRC);
+void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode);
+void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz);
+void noswizzle_PVSSRC(PVSSRC* pPVSSRC);
+void swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w);
+void neg_PVSSRC(PVSSRC* pPVSSRC);
+void noneg_PVSSRC(PVSSRC* pPVSSRC);
+void flipneg_PVSSRC(PVSSRC* pPVSSRC);
+void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c);
+BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0);
+BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ;
+GLboolean is_reduction_opcode(PVSDWORD * dest);
+GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
+
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3);
+
+GLboolean IsTex(gl_inst_opcode Opcode);
+GLboolean IsAlu(gl_inst_opcode Opcode);
+int check_current_clause(r700_AssemblerBase* pAsm,
+					     CF_CLAUSE_TYPE      new_clause_type);
+GLboolean add_vfetch_instruction(r700_AssemblerBase*     pAsm,
+								 R700VertexInstruction*  vertex_instruction_ptr);
+GLboolean add_tex_instruction(r700_AssemblerBase*     pAsm,
+                              R700TextureInstruction* tex_instruction_ptr);
+GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+								GLuint gl_client_id,
+                                GLuint destination_register,
+								GLuint number_of_elements,
+                                GLenum dataElementType,
+								VTX_FETCH_METHOD* pFetchMethod);
+GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
+                                       GLuint              destination_register,								       
+                                       GLenum              type,
+                                       GLint               size,
+                                       GLubyte             element,
+                                       GLuint              _signed,
+                                       GLboolean           normalize,
+                                       GLenum              format,
+                                       VTX_FETCH_METHOD  * pFetchMethod);
+GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
+GLuint gethelpr(r700_AssemblerBase* pAsm);
+void resethelpr(r700_AssemblerBase* pAsm);
+void checkop_init(r700_AssemblerBase* pAsm);
+GLboolean mov_temp(r700_AssemblerBase* pAsm, int src);
+GLboolean checkop1(r700_AssemblerBase* pAsm);
+GLboolean checkop2(r700_AssemblerBase* pAsm);
+GLboolean checkop3(r700_AssemblerBase* pAsm);
+GLboolean assemble_src(r700_AssemblerBase *pAsm,
+                       int src, 
+                       int fld);
+GLboolean assemble_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_dst(r700_AssemblerBase *pAsm);
+GLboolean tex_src(r700_AssemblerBase *pAsm);
+GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized);
+void initialize(r700_AssemblerBase *pAsm);
+GLboolean assemble_alu_src(R700ALUInstruction*  alu_instruction_ptr,
+                           int                  source_index,
+                           PVSSRC*              pSource,
+                           BITS                 scalar_channel_index);
+GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
+                              R700ALUInstruction* alu_instruction_ptr,
+                              GLuint              contiguous_slots_needed);
+
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm);
+void add_return_inst(r700_AssemblerBase *pAsm);
+
+void get_src_properties(R700ALUInstruction*  alu_instruction_ptr,
+                        int                  source_index,
+                        BITS*                psrc_sel,
+                        BITS*                psrc_rel,
+                        BITS*                psrc_chan,
+                        BITS*                psrc_neg);
+int is_cfile(BITS sel);
+int is_const(BITS sel);
+int is_gpr(BITS sel);
+GLboolean reserve_cfile(r700_AssemblerBase* pAsm, 
+                        GLuint sel, 
+                        GLuint chan);
+GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle);
+GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle);
+GLboolean check_scalar(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr);
+GLboolean check_vector(r700_AssemblerBase* pAsm,
+                       R700ALUInstruction* alu_instruction_ptr);
+GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops);
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset);
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue);
+GLboolean testFlag(r700_AssemblerBase *pAsm);
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP);
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF);
+
+GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
+GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
+GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
+GLboolean assemble_BAD(char *opcode_str);
+GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
+GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
+GLboolean assemble_DST(r700_AssemblerBase *pAsm);
+GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
+GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
+GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode);
+GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
+GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
+GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
+GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
+GLboolean assemble_MIN(r700_AssemblerBase *pAsm);
+GLboolean assemble_MOV(r700_AssemblerBase *pAsm);
+GLboolean assemble_MUL(r700_AssemblerBase *pAsm);
+GLboolean assemble_POW(r700_AssemblerBase *pAsm);
+GLboolean assemble_RCP(r700_AssemblerBase *pAsm);
+GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
+GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
+GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+GLboolean assemble_CONT(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode);
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); 
+GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode);
+
+GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
+GLboolean assemble_STP(r700_AssemblerBase *pAsm);
+GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
+GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse);
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm);
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm);
+GLboolean assemble_COND(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift);
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm);
+GLboolean assemble_RET(r700_AssemblerBase *pAsm);
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm, 
+                       GLint nILindex,
+                       GLuint uiIL_Offest,
+                       GLuint uiNumberInsts,
+                       struct prog_instruction *pILInst,
+                       PRESUB_DESC * pPresubDesc);
+
+GLboolean Process_Export(r700_AssemblerBase* pAsm,
+                         GLuint type, 
+                         GLuint export_starting_index,
+                         GLuint export_count, 
+                         GLuint starting_register_number,
+                         GLboolean is_depth_export);
+GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, 
+                                                 BITS depth_channel_select);
+
+GLboolean callPreSub(r700_AssemblerBase* pAsm, 
+                     LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+                     /* struct prog_instruction ** pILInstParent, */
+                     COMPILED_SUB * pCompiledSub,                                            
+                     GLshort uOutReg,
+                     GLshort uNumValidSrc);
+
+//Interface
+GLboolean AssembleInstr(GLuint uiFirstInst,
+                        GLuint uiIL_Shift,
+                        GLuint uiNumberInsts,
+                        struct prog_instruction *pILInst, 
+						r700_AssemblerBase *pR700AsmCode);
+GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);  
+GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg);
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm);
+
+int       Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
+GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
+
+#endif //_R700_ASSEMBLER_H_
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
new file mode 100644
index 0000000000..cefda3ac4b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -0,0 +1,1601 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/imports.h"
+#include "main/glheader.h"
+#include "main/simple_list.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "r600_tex.h"
+#include "r700_oglprog.h"
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+
+#include "radeon_mipmap_tree.h"
+
+static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t         *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+    struct r700_vertex_program *vp = context->selected_vp;
+
+	struct radeon_bo *bo = NULL;
+	unsigned int i;
+	BATCH_LOCALS(&context->radeon);
+
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled) {            
+			radeonTexObj *t = r700->textures[i];
+			if (t) {
+				if (!t->image_override) {
+					bo = t->mt->bo;
+				} else {
+					bo = t->bo;
+				}
+				if (bo) {
+
+					r700SyncSurf(context, bo,
+						     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+						     0, TC_ACTION_ENA_bit);
+
+					BEGIN_BATCH_NO_AUTOSTATE(9 + 4);
+					R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+
+                    if( (1<<i) & vp->r700AsmCode.unVetTexBits )                    
+                    {   /* vs texture */                                     
+                        R600_OUT_BATCH((i + VERT_ATTRIB_MAX + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+                    }
+                    else
+                    {
+					    R600_OUT_BATCH(i * 7);
+                    }
+
+					R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE0);
+					R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE1);
+					R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE2);
+					R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE3);
+					R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE4);
+					R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE5);
+					R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6);
+					R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2,
+							     bo,
+							     r700->textures[i]->SQ_TEX_RESOURCE2,
+							     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+					R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3,
+							     bo,
+							     r700->textures[i]->SQ_TEX_RESOURCE3,
+							     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+					END_BATCH();
+					COMMIT_BATCH();
+				}
+			}
+		}
+	}
+}
+
+#define SAMPLER_STRIDE                 3
+
+static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t         *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	unsigned int i;
+
+    struct r700_vertex_program *vp = context->selected_vp;
+
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled) {            
+			radeonTexObj *t = r700->textures[i];
+			if (t) {
+				BEGIN_BATCH_NO_AUTOSTATE(5);
+				R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
+
+                if( (1<<i) & vp->r700AsmCode.unVetTexBits )                    
+                {   /* vs texture */
+                    R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * SAMPLER_STRIDE); //work 1
+                }
+                else
+                {
+				    R600_OUT_BATCH(i * 3);
+                }
+
+				R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER0);
+				R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER1);
+				R600_OUT_BATCH(r700->textures[i]->SQ_TEX_SAMPLER2);
+				END_BATCH();
+				COMMIT_BATCH();
+			}
+		}
+	}
+}
+
+static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t         *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	unsigned int i;
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled) {
+			radeonTexObj *t = r700->textures[i];
+			if (t) {
+				BEGIN_BATCH_NO_AUTOSTATE(2 + 4);
+				R600_OUT_BATCH_REGSEQ((TD_PS_SAMPLER0_BORDER_RED + (i * 16)), 4);
+				R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_RED);
+				R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN);
+				R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE);
+				R600_OUT_BATCH(r700->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA);
+				END_BATCH();
+				COMMIT_BATCH();
+			}
+		}
+	}
+}
+
+extern int getTypeSize(GLenum type);
+static void r700SetupVTXConstants(GLcontext  * ctx,
+				  void *       pAos,
+				  StreamDesc * pStreamDesc)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    struct radeon_aos * paos = (struct radeon_aos *)pAos;
+    unsigned int nVBsize;
+    BATCH_LOCALS(&context->radeon);
+
+    unsigned int uSQ_VTX_CONSTANT_WORD0_0;
+    unsigned int uSQ_VTX_CONSTANT_WORD1_0;
+    unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0;
+    unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
+    unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0;
+
+    if (!paos->bo)
+	    return;
+
+    if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+	    r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit);
+    else
+	    r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+
+    if(0 == pStreamDesc->stride)
+    {
+        nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type);
+    }
+    else
+    {
+        nVBsize = (paos->count - 1) * pStreamDesc->stride
+                  + pStreamDesc->size * getTypeSize(pStreamDesc->type);
+    }
+
+    uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
+    uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1;
+
+    SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
+    SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
+	     SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
+    SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL),
+	     SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
+	     SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */
+    
+    if(GL_TRUE == pStreamDesc->normalize)
+    {
+        SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM,
+	             SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+    }
+    else
+    {
+        SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
+	             SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+    }
+
+    if(1 == pStreamDesc->_signed)
+    {
+        SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+    }
+
+    SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask);
+    SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER,
+	     SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+    BEGIN_BATCH_NO_AUTOSTATE(9 + 2);
+
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
+    R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
+    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
+    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
+    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0);
+    R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0,
+                         paos->bo,
+                         uSQ_VTX_CONSTANT_WORD0_0,
+                         RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+    COMMIT_BATCH();
+
+}
+
+static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+    context_t         *context = R700_CONTEXT(ctx);
+    struct r700_vertex_program *vp = context->selected_vp;
+    unsigned int i, j = 0;
+    BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+    if (context->radeon.tcl.aos_count == 0)
+	    return;
+
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+    R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R600_OUT_BATCH(0);
+
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+    R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
+    R600_OUT_BATCH(0);
+    END_BATCH();
+    COMMIT_BATCH();
+
+    for(i=0; i<VERT_ATTRIB_MAX; i++) {
+	    if(vp->mesa_program->Base.InputsRead & (1 << i))
+	    {
+                r700SetupVTXConstants(ctx,
+				      (void*)(&context->radeon.tcl.aos[j]),
+				      &(context->stream_desc[j]));
+		j++;
+	    }
+    }
+}
+
+static void r700SetRenderTarget(context_t *context, int id)
+{
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+    uint32_t format = COLOR_8_8_8_8, comp_swap = SWAP_ALT, number_type = NUMBER_UNORM;
+    struct radeon_renderbuffer *rrb;
+    unsigned int nPitchInPixel;
+
+    rrb = radeon_get_colorbuffer(&context->radeon);
+    if (!rrb || !rrb->bo) {
+	    return;
+    }
+
+    R600_STATECHANGE(context, cb_target);
+
+    /* color buffer */
+    r700->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset / 256;
+
+    nPitchInPixel = rrb->pitch/rrb->cpp;
+    SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, (nPitchInPixel/8)-1,
+             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+    SETfield(r700->render_target[id].CB_COLOR0_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
+             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask);
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ENDIAN_NONE, ENDIAN_shift, ENDIAN_mask);
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_LINEAR_GENERAL,
+             CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+
+    switch (rrb->base.Format) {
+    case MESA_FORMAT_RGBA8888:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_SIGNED_RGBA8888:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_SNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_RGBA8888_REV:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_SIGNED_RGBA8888_REV:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_SNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_ALT;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ARGB8888_REV:
+    case MESA_FORMAT_XRGB8888_REV:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_RGB565:
+            format = COLOR_5_6_5;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_RGB565_REV:
+            format = COLOR_5_6_5;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ARGB4444:
+            format = COLOR_4_4_4_4;
+            comp_swap = SWAP_ALT;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ARGB4444_REV:
+            format = COLOR_4_4_4_4;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ARGB1555:
+            format = COLOR_1_5_5_5;
+            comp_swap = SWAP_ALT;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ARGB1555_REV:
+            format = COLOR_1_5_5_5;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_AL88:
+            format = COLOR_8_8;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_AL88_REV:
+            format = COLOR_8_8;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_RGB332:
+            format = COLOR_3_3_2;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_A8:
+            format = COLOR_8;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_I8:
+    case MESA_FORMAT_CI8:
+            format = COLOR_8;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_L8:
+            format = COLOR_8;
+            comp_swap = SWAP_ALT;
+	    number_type = NUMBER_UNORM;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_RGBA_FLOAT32:
+            format = COLOR_32_32_32_32_FLOAT;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_FLOAT;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_FLOAT32_bit);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_RGBA_FLOAT16:
+            format = COLOR_16_16_16_16_FLOAT;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_FLOAT;
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ALPHA_FLOAT32:
+            format = COLOR_32_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_FLOAT;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_FLOAT32_bit);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_ALPHA_FLOAT16:
+            format = COLOR_16_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_FLOAT;
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_LUMINANCE_FLOAT32:
+            format = COLOR_32_FLOAT;
+            comp_swap = SWAP_ALT;
+	    number_type = NUMBER_FLOAT;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_FLOAT32_bit);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_LUMINANCE_FLOAT16:
+            format = COLOR_16_FLOAT;
+            comp_swap = SWAP_ALT;
+	    number_type = NUMBER_FLOAT;
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+            format = COLOR_32_32_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_FLOAT;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_FLOAT32_bit);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+            format = COLOR_16_16_FLOAT;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_FLOAT;
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+            format = COLOR_32_FLOAT;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_FLOAT;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_FLOAT32_bit);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+            format = COLOR_16_FLOAT;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_X8_Z24:
+    case MESA_FORMAT_S8_Z24:
+            format = COLOR_8_24;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_Z24_S8:
+            format = COLOR_24_8;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_Z16:
+            format = COLOR_16;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_Z32:
+            format = COLOR_32;
+            comp_swap = SWAP_STD;
+	    number_type = NUMBER_UNORM;
+	    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, ARRAY_1D_TILED_THIN1,
+		     CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_SRGBA8:
+            format = COLOR_8_8_8_8;
+            comp_swap = SWAP_STD_REV;
+	    number_type = NUMBER_SRGB;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_SLA8:
+            format = COLOR_8_8;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_SRGB;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    case MESA_FORMAT_SL8:
+            format = COLOR_8;
+            comp_swap = SWAP_ALT_REV;
+	    number_type = NUMBER_SRGB;
+	    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
+            break;
+    default:
+	    _mesa_problem(context->radeon.glCtx, "unexpected format in r700SetRenderTarget()");
+	    break;
+    }
+
+    /* must be 0 on r7xx */
+    if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+	    CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_FLOAT32_bit);
+
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, format,
+	     CB_COLOR0_INFO__FORMAT_shift, CB_COLOR0_INFO__FORMAT_mask);
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, comp_swap,
+	     COMP_SWAP_shift, COMP_SWAP_mask);
+    SETfield(r700->render_target[id].CB_COLOR0_INFO.u32All, number_type,
+	     NUMBER_TYPE_shift, NUMBER_TYPE_mask);
+    SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, BLEND_CLAMP_bit);
+
+    r700->render_target[id].enabled = GL_TRUE;
+}
+
+static void r700SetDepthTarget(context_t *context)
+{
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+    struct radeon_renderbuffer *rrb;
+    unsigned int nPitchInPixel;
+
+    rrb = radeon_get_depthbuffer(&context->radeon);
+    if (!rrb)
+	    return;
+
+    R600_STATECHANGE(context, db_target);
+
+    /* depth buf */
+    r700->DB_DEPTH_SIZE.u32All = 0;
+    r700->DB_DEPTH_BASE.u32All = 0;
+    r700->DB_DEPTH_INFO.u32All = 0;
+    r700->DB_DEPTH_VIEW.u32All = 0;
+
+    nPitchInPixel = rrb->pitch/rrb->cpp;
+
+    SETfield(r700->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1,
+             PITCH_TILE_MAX_shift, PITCH_TILE_MAX_mask);
+    SETfield(r700->DB_DEPTH_SIZE.u32All, ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
+             SLICE_TILE_MAX_shift, SLICE_TILE_MAX_mask); /* size in pixel / 64 - 1 */
+
+    if(4 == rrb->cpp)
+    {
+        SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_8_24,
+                 DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
+    }
+    else
+    {
+        SETfield(r700->DB_DEPTH_INFO.u32All, DEPTH_16,
+                     DB_DEPTH_INFO__FORMAT_shift, DB_DEPTH_INFO__FORMAT_mask);
+    }
+    SETfield(r700->DB_DEPTH_INFO.u32All, ARRAY_1D_TILED_THIN1,
+             DB_DEPTH_INFO__ARRAY_MODE_shift, DB_DEPTH_INFO__ARRAY_MODE_mask);
+    /* r700->DB_PREFETCH_LIMIT.bits.DEPTH_HEIGHT_TILE_MAX = (context->currentDraw->h >> 3) - 1; */ /* z buffer sie may much bigger than what need, so use actual used h. */
+}
+
+static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	struct radeon_renderbuffer *rrb;
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	rrb = radeon_get_depthbuffer(&context->radeon);
+	if (!rrb || !rrb->bo) {
+		return;
+	}
+
+	r700SetDepthTarget(context);
+
+        BEGIN_BATCH_NO_AUTOSTATE(8 + 2);
+	R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2);
+	R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All);
+	R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All);
+	R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2);
+	R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All);
+	R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
+	R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All,
+			     rrb->bo,
+			     r700->DB_DEPTH_BASE.u32All,
+			     0, RADEON_GEM_DOMAIN_VRAM, 0);
+        END_BATCH();
+
+	if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
+	    (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
+		BEGIN_BATCH_NO_AUTOSTATE(2);
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+		R600_OUT_BATCH(1 << 0);
+		END_BATCH();
+	}
+
+	COMMIT_BATCH();
+
+}
+
+static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	struct radeon_renderbuffer *rrb;
+	BATCH_LOCALS(&context->radeon);
+	int id = 0;
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	rrb = radeon_get_colorbuffer(&context->radeon);
+	if (!rrb || !rrb->bo) {
+		return;
+	}
+
+	r700SetRenderTarget(context, 0);
+
+	if (id > R700_MAX_RENDER_TARGETS)
+		return;
+
+	if (!r700->render_target[id].enabled)
+		return;
+
+        BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+	R600_OUT_BATCH_REGSEQ(CB_COLOR0_BASE + (4 * id), 1);
+	R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_BASE.u32All);
+	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     rrb->bo,
+			     r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     0, RADEON_GEM_DOMAIN_VRAM, 0);
+        END_BATCH();
+
+	if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
+	    (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
+		BEGIN_BATCH_NO_AUTOSTATE(2);
+		R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
+		R600_OUT_BATCH((2 << id));
+		END_BATCH();
+	}
+	/* Set CMASK & TILE buffer to the offset of color buffer as
+	 * we don't use those this shouldn't cause any issue and we
+	 * then have a valid cmd stream
+	 */
+	BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+	R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
+	R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All);
+	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     rrb->bo,
+			     r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     0, RADEON_GEM_DOMAIN_VRAM, 0);
+	END_BATCH();
+	BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+	R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
+	R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All);
+	R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     rrb->bo,
+			     r700->render_target[id].CB_COLOR0_BASE.u32All,
+			     0, RADEON_GEM_DOMAIN_VRAM, 0);
+        END_BATCH();
+
+        BEGIN_BATCH_NO_AUTOSTATE(12);
+	R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
+	R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
+	R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
+	R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
+        END_BATCH();
+
+	COMMIT_BATCH();
+
+}
+
+static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+    struct radeon_bo * pbo;
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+    pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
+
+    if (!pbo)
+	    return;
+
+    r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
+    R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
+    R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
+		         pbo,
+		         r700->ps.SQ_PGM_START_PS.u32All,
+		         RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(9);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(3);
+    R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF);
+    END_BATCH();
+
+    COMMIT_BATCH();
+
+}
+
+static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+    struct radeon_bo * pbo;
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+    pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
+
+    if (!pbo)
+	    return;
+
+    r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+    BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+    R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
+    R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
+    R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
+		         pbo,
+		         r700->vs.SQ_PGM_START_VS.u32All,
+		         RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(6);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
+    R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
+    END_BATCH();
+
+    BEGIN_BATCH_NO_AUTOSTATE(3);
+    R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F);
+    //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F);
+    END_BATCH();
+
+    COMMIT_BATCH();
+}
+
+static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	struct radeon_bo * pbo;
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	/* XXX fixme
+	 * R6xx chips require a FS be emitted, even if it's not used.
+	 * since we aren't using FS yet, just send the VS address to make
+	 * the kernel command checker happy
+	 */
+	pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
+	r700->fs.SQ_PGM_START_FS.u32All = r700->vs.SQ_PGM_START_VS.u32All;
+	r700->fs.SQ_PGM_RESOURCES_FS.u32All = 0;
+	r700->fs.SQ_PGM_CF_OFFSET_FS.u32All = 0;
+	/* XXX */
+
+	if (!pbo)
+		return;
+
+	r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+        BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+	R600_OUT_BATCH_REGSEQ(SQ_PGM_START_FS, 1);
+	R600_OUT_BATCH(r700->fs.SQ_PGM_START_FS.u32All);
+	R600_OUT_BATCH_RELOC(r700->fs.SQ_PGM_START_FS.u32All,
+			     pbo,
+			     r700->fs.SQ_PGM_START_FS.u32All,
+			     RADEON_GEM_DOMAIN_GTT, 0, 0);
+	END_BATCH();
+
+        BEGIN_BATCH_NO_AUTOSTATE(6);
+	R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_FS, r700->fs.SQ_PGM_RESOURCES_FS.u32All);
+	R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_FS, r700->fs.SQ_PGM_CF_OFFSET_FS.u32All);
+        END_BATCH();
+
+	COMMIT_BATCH();
+
+}
+
+static void r700SendViewportState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	int id = 0;
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	if (id > R700_MAX_VIEWPORTS)
+		return;
+
+	if (!r700->viewport[id].enabled)
+		return;
+
+        BEGIN_BATCH_NO_AUTOSTATE(16);
+	R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_SCISSOR_0_TL + (8 * id), 2);
+	R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All);
+	R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All);
+	R600_OUT_BATCH_REGSEQ(PA_SC_VPORT_ZMIN_0 + (8 * id), 2);
+	R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All);
+	R600_OUT_BATCH(r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All);
+	R600_OUT_BATCH_REGSEQ(PA_CL_VPORT_XSCALE_0 + (24 * id), 6);
+	R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_XSCALE.u32All);
+	R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_XOFFSET.u32All);
+	R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_YSCALE.u32All);
+	R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_YOFFSET.u32All);
+	R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_ZSCALE.u32All);
+	R600_OUT_BATCH(r700->viewport[id].PA_CL_VPORT_ZOFFSET.u32All);
+        END_BATCH();
+
+	COMMIT_BATCH();
+
+}
+
+static void r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+        BEGIN_BATCH_NO_AUTOSTATE(34);
+	R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
+	R600_OUT_BATCH(r700->sq_config.SQ_CONFIG.u32All);
+	R600_OUT_BATCH(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All);
+	R600_OUT_BATCH(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All);
+	R600_OUT_BATCH(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All);
+	R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All);
+	R600_OUT_BATCH(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All);
+
+	R600_OUT_BATCH_REGVAL(TA_CNTL_AUX, r700->TA_CNTL_AUX.u32All);
+	R600_OUT_BATCH_REGVAL(VC_ENHANCE, r700->VC_ENHANCE.u32All);
+	R600_OUT_BATCH_REGVAL(R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All);
+	R600_OUT_BATCH_REGVAL(DB_DEBUG, r700->DB_DEBUG.u32All);
+	R600_OUT_BATCH_REGVAL(DB_WATERMARKS, r700->DB_WATERMARKS.u32All);
+
+	R600_OUT_BATCH_REGSEQ(SQ_ESGS_RING_ITEMSIZE, 9);
+	R600_OUT_BATCH(r700->SQ_ESGS_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_GSVS_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_ESTMP_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_GSTMP_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_VSTMP_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_PSTMP_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_FBUF_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_REDUC_RING_ITEMSIZE.u32All);
+	R600_OUT_BATCH(r700->SQ_GS_VERT_ITEMSIZE.u32All);
+        END_BATCH();
+
+	COMMIT_BATCH();
+}
+
+static void r700SendUCPState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	int i;
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	for (i = 0; i < R700_MAX_UCP; i++) {
+		if (r700->ucp[i].enabled) {
+			BEGIN_BATCH_NO_AUTOSTATE(6);
+			R600_OUT_BATCH_REGSEQ(PA_CL_UCP_0_X + (16 * i), 4);
+			R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_X.u32All);
+			R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_Y.u32All);
+			R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_Z.u32All);
+			R600_OUT_BATCH(r700->ucp[i].PA_CL_UCP_0_W.u32All);
+			END_BATCH();
+			COMMIT_BATCH();
+		}
+	}
+}
+
+static void r700SendSPIState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	unsigned int ui;
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	BEGIN_BATCH_NO_AUTOSTATE(59 + R700_MAX_SHADER_EXPORTS);
+
+	R600_OUT_BATCH_REGSEQ(SQ_VTX_SEMANTIC_0, 32);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_0.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_1.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_2.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_3.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_4.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_5.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_6.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_7.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_8.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_9.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_10.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_11.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_12.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_13.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_14.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_15.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_16.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_17.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_18.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_19.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_20.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_21.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_22.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_23.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_24.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_25.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_26.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_27.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_28.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_29.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_30.u32All);
+	R600_OUT_BATCH(r700->SQ_VTX_SEMANTIC_31.u32All);
+
+	R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_ID_0, 10);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_0.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_1.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_2.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_3.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_4.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_5.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_6.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_7.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_8.u32All);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_ID_9.u32All);
+
+	R600_OUT_BATCH_REGSEQ(SPI_VS_OUT_CONFIG, 9);
+	R600_OUT_BATCH(r700->SPI_VS_OUT_CONFIG.u32All);
+	R600_OUT_BATCH(r700->SPI_THREAD_GROUPING.u32All);
+	R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_0.u32All);
+	R600_OUT_BATCH(r700->SPI_PS_IN_CONTROL_1.u32All);
+	R600_OUT_BATCH(r700->SPI_INTERP_CONTROL_0.u32All);
+	R600_OUT_BATCH(r700->SPI_INPUT_Z.u32All);
+	R600_OUT_BATCH(r700->SPI_FOG_CNTL.u32All);
+	R600_OUT_BATCH(r700->SPI_FOG_FUNC_SCALE.u32All);
+	R600_OUT_BATCH(r700->SPI_FOG_FUNC_BIAS.u32All);
+
+	R600_OUT_BATCH_REGSEQ(SPI_PS_INPUT_CNTL_0, R700_MAX_SHADER_EXPORTS);
+	for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+		R600_OUT_BATCH(r700->SPI_PS_INPUT_CNTL[ui].u32All);
+
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendVGTState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+        BEGIN_BATCH_NO_AUTOSTATE(41);
+
+	R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
+	R600_OUT_BATCH(r700->VGT_MAX_VTX_INDX.u32All);
+	R600_OUT_BATCH(r700->VGT_MIN_VTX_INDX.u32All);
+	R600_OUT_BATCH(r700->VGT_INDX_OFFSET.u32All);
+	R600_OUT_BATCH(r700->VGT_MULTI_PRIM_IB_RESET_INDX.u32All);
+
+	R600_OUT_BATCH_REGSEQ(VGT_OUTPUT_PATH_CNTL, 13);
+	R600_OUT_BATCH(r700->VGT_OUTPUT_PATH_CNTL.u32All);
+	R600_OUT_BATCH(r700->VGT_HOS_CNTL.u32All);
+	R600_OUT_BATCH(r700->VGT_HOS_MAX_TESS_LEVEL.u32All);
+	R600_OUT_BATCH(r700->VGT_HOS_MIN_TESS_LEVEL.u32All);
+	R600_OUT_BATCH(r700->VGT_HOS_REUSE_DEPTH.u32All);
+	R600_OUT_BATCH(r700->VGT_GROUP_PRIM_TYPE.u32All);
+	R600_OUT_BATCH(r700->VGT_GROUP_FIRST_DECR.u32All);
+	R600_OUT_BATCH(r700->VGT_GROUP_DECR.u32All);
+	R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_CNTL.u32All);
+	R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_CNTL.u32All);
+	R600_OUT_BATCH(r700->VGT_GROUP_VECT_0_FMT_CNTL.u32All);
+	R600_OUT_BATCH(r700->VGT_GROUP_VECT_1_FMT_CNTL.u32All);
+	R600_OUT_BATCH(r700->VGT_GS_MODE.u32All);
+
+	R600_OUT_BATCH_REGVAL(VGT_PRIMITIVEID_EN, r700->VGT_PRIMITIVEID_EN.u32All);
+	R600_OUT_BATCH_REGVAL(VGT_MULTI_PRIM_IB_RESET_EN, r700->VGT_MULTI_PRIM_IB_RESET_EN.u32All);
+	R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_0, r700->VGT_INSTANCE_STEP_RATE_0.u32All);
+	R600_OUT_BATCH_REGVAL(VGT_INSTANCE_STEP_RATE_1, r700->VGT_INSTANCE_STEP_RATE_1.u32All);
+
+	R600_OUT_BATCH_REGSEQ(VGT_STRMOUT_EN, 3);
+	R600_OUT_BATCH(r700->VGT_STRMOUT_EN.u32All);
+	R600_OUT_BATCH(r700->VGT_REUSE_OFF.u32All);
+	R600_OUT_BATCH(r700->VGT_VTX_CNT_EN.u32All);
+
+	R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, r700->VGT_STRMOUT_BUFFER_EN.u32All);
+
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendSXState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+        BEGIN_BATCH_NO_AUTOSTATE(9);
+	R600_OUT_BATCH_REGVAL(SX_MISC, r700->SX_MISC.u32All);
+	R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, r700->SX_ALPHA_TEST_CONTROL.u32All);
+	R600_OUT_BATCH_REGVAL(SX_ALPHA_REF, r700->SX_ALPHA_REF.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	BEGIN_BATCH_NO_AUTOSTATE(17);
+
+	R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2);
+	R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All);
+	R600_OUT_BATCH(r700->DB_DEPTH_CLEAR.u32All);
+
+	R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, r700->DB_DEPTH_CONTROL.u32All);
+	R600_OUT_BATCH_REGVAL(DB_SHADER_CONTROL, r700->DB_SHADER_CONTROL.u32All);
+
+	R600_OUT_BATCH_REGSEQ(DB_RENDER_CONTROL, 2);
+	R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All);
+	R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All);
+
+	R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All);
+
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendStencilState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+
+        BEGIN_BATCH_NO_AUTOSTATE(4);
+	R600_OUT_BATCH_REGSEQ(DB_STENCILREFMASK, 2);
+	R600_OUT_BATCH(r700->DB_STENCILREFMASK.u32All);
+	R600_OUT_BATCH(r700->DB_STENCILREFMASK_BF.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendCBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+		BEGIN_BATCH_NO_AUTOSTATE(11);
+		R600_OUT_BATCH_REGSEQ(CB_CLEAR_RED, 4);
+		R600_OUT_BATCH(r700->CB_CLEAR_RED_R6XX.u32All);
+		R600_OUT_BATCH(r700->CB_CLEAR_GREEN_R6XX.u32All);
+		R600_OUT_BATCH(r700->CB_CLEAR_BLUE_R6XX.u32All);
+		R600_OUT_BATCH(r700->CB_CLEAR_ALPHA_R6XX.u32All);
+		R600_OUT_BATCH_REGSEQ(CB_FOG_RED, 3);
+		R600_OUT_BATCH(r700->CB_FOG_RED_R6XX.u32All);
+		R600_OUT_BATCH(r700->CB_FOG_GREEN_R6XX.u32All);
+		R600_OUT_BATCH(r700->CB_FOG_BLUE_R6XX.u32All);
+		END_BATCH();
+	}
+
+	BEGIN_BATCH_NO_AUTOSTATE(7);
+	R600_OUT_BATCH_REGSEQ(CB_TARGET_MASK, 2);
+	R600_OUT_BATCH(r700->CB_TARGET_MASK.u32All);
+	R600_OUT_BATCH(r700->CB_SHADER_MASK.u32All);
+	R600_OUT_BATCH_REGVAL(R7xx_CB_SHADER_CONTROL, r700->CB_SHADER_CONTROL.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendCBCLRCMPState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+
+	BEGIN_BATCH_NO_AUTOSTATE(6);
+	R600_OUT_BATCH_REGSEQ(CB_CLRCMP_CONTROL, 4);
+	R600_OUT_BATCH(r700->CB_CLRCMP_CONTROL.u32All);
+	R600_OUT_BATCH(r700->CB_CLRCMP_SRC.u32All);
+	R600_OUT_BATCH(r700->CB_CLRCMP_DST.u32All);
+	R600_OUT_BATCH(r700->CB_CLRCMP_MSK.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendCBBlendState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	unsigned int ui;
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+		BEGIN_BATCH_NO_AUTOSTATE(3);
+		R600_OUT_BATCH_REGVAL(CB_BLEND_CONTROL, r700->CB_BLEND_CONTROL.u32All);
+		END_BATCH();
+	}
+
+	BEGIN_BATCH_NO_AUTOSTATE(3);
+	R600_OUT_BATCH_REGVAL(CB_COLOR_CONTROL, r700->CB_COLOR_CONTROL.u32All);
+	END_BATCH();
+
+	if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
+		for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) {
+			if (r700->render_target[ui].enabled) {
+				BEGIN_BATCH_NO_AUTOSTATE(3);
+				R600_OUT_BATCH_REGVAL(CB_BLEND0_CONTROL + (4 * ui),
+						      r700->render_target[ui].CB_BLEND0_CONTROL.u32All);
+				END_BATCH();
+			}
+		}
+	}
+
+	COMMIT_BATCH();
+}
+
+static void r700SendCBBlendColorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	BEGIN_BATCH_NO_AUTOSTATE(6);
+	R600_OUT_BATCH_REGSEQ(CB_BLEND_RED, 4);
+	R600_OUT_BATCH(r700->CB_BLEND_RED.u32All);
+	R600_OUT_BATCH(r700->CB_BLEND_GREEN.u32All);
+	R600_OUT_BATCH(r700->CB_BLEND_BLUE.u32All);
+	R600_OUT_BATCH(r700->CB_BLEND_ALPHA.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendSUState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+
+	BEGIN_BATCH_NO_AUTOSTATE(9);
+	R600_OUT_BATCH_REGVAL(PA_SU_SC_MODE_CNTL, r700->PA_SU_SC_MODE_CNTL.u32All);
+	R600_OUT_BATCH_REGSEQ(PA_SU_POINT_SIZE, 4);
+	R600_OUT_BATCH(r700->PA_SU_POINT_SIZE.u32All);
+	R600_OUT_BATCH(r700->PA_SU_POINT_MINMAX.u32All);
+	R600_OUT_BATCH(r700->PA_SU_LINE_CNTL.u32All);
+	R600_OUT_BATCH(r700->PA_SU_VTX_CNTL.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+
+}
+
+static void r700SendPolyState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+
+	BEGIN_BATCH_NO_AUTOSTATE(10);
+	R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2);
+	R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All);
+	R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_CLAMP.u32All);
+	R600_OUT_BATCH_REGSEQ(PA_SU_POLY_OFFSET_FRONT_SCALE, 4);
+	R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_SCALE.u32All);
+	R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.u32All);
+	R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_SCALE.u32All);
+	R600_OUT_BATCH(r700->PA_SU_POLY_OFFSET_BACK_OFFSET.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+
+}
+
+static void r700SendCLState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	BEGIN_BATCH_NO_AUTOSTATE(12);
+	R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, r700->PA_CL_CLIP_CNTL.u32All);
+	R600_OUT_BATCH_REGVAL(PA_CL_VTE_CNTL, r700->PA_CL_VTE_CNTL.u32All);
+	R600_OUT_BATCH_REGVAL(PA_CL_VS_OUT_CNTL, r700->PA_CL_VS_OUT_CNTL.u32All);
+	R600_OUT_BATCH_REGVAL(PA_CL_NANINF_CNTL, r700->PA_CL_NANINF_CNTL.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendGBState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+
+	BEGIN_BATCH_NO_AUTOSTATE(6);
+	R600_OUT_BATCH_REGSEQ(PA_CL_GB_VERT_CLIP_ADJ, 4);
+	R600_OUT_BATCH(r700->PA_CL_GB_VERT_CLIP_ADJ.u32All);
+	R600_OUT_BATCH(r700->PA_CL_GB_VERT_DISC_ADJ.u32All);
+	R600_OUT_BATCH(r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All);
+	R600_OUT_BATCH(r700->PA_CL_GB_HORZ_DISC_ADJ.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendScissorState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	BEGIN_BATCH_NO_AUTOSTATE(22);
+	R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2);
+	R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_TL.u32All);
+	R600_OUT_BATCH(r700->PA_SC_SCREEN_SCISSOR_BR.u32All);
+
+	R600_OUT_BATCH_REGSEQ(PA_SC_WINDOW_OFFSET, 12);
+	R600_OUT_BATCH(r700->PA_SC_WINDOW_OFFSET.u32All);
+	R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_TL.u32All);
+	R600_OUT_BATCH(r700->PA_SC_WINDOW_SCISSOR_BR.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_RULE.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_TL.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_0_BR.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_TL.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_1_BR.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_TL.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_2_BR.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_TL.u32All);
+	R600_OUT_BATCH(r700->PA_SC_CLIPRECT_3_BR.u32All);
+
+	R600_OUT_BATCH_REGSEQ(PA_SC_GENERIC_SCISSOR_TL, 2);
+	R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_TL.u32All);
+	R600_OUT_BATCH(r700->PA_SC_GENERIC_SCISSOR_BR.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendSCState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	BEGIN_BATCH_NO_AUTOSTATE(15);
+	R600_OUT_BATCH_REGVAL(R7xx_PA_SC_EDGERULE, r700->PA_SC_EDGERULE.u32All);
+	R600_OUT_BATCH_REGVAL(PA_SC_LINE_STIPPLE, r700->PA_SC_LINE_STIPPLE.u32All);
+	R600_OUT_BATCH_REGVAL(PA_SC_MPASS_PS_CNTL, r700->PA_SC_MPASS_PS_CNTL.u32All);
+	R600_OUT_BATCH_REGVAL(PA_SC_MODE_CNTL, r700->PA_SC_MODE_CNTL.u32All);
+	R600_OUT_BATCH_REGVAL(PA_SC_LINE_CNTL, r700->PA_SC_LINE_CNTL.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendAAState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	BATCH_LOCALS(&context->radeon);
+
+	BEGIN_BATCH_NO_AUTOSTATE(12);
+	R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, r700->PA_SC_AA_CONFIG.u32All);
+	R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_MCTX.u32All);
+	R600_OUT_BATCH_REGVAL(PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX, r700->PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX.u32All);
+	R600_OUT_BATCH_REGVAL(PA_SC_AA_MASK, r700->PA_SC_AA_MASK.u32All);
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendPSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	int i;
+	BATCH_LOCALS(&context->radeon);
+
+	if (r700->ps.num_consts == 0)
+		return;
+
+	BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->ps.num_consts * 4));
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->ps.num_consts * 4)));
+	/* assembler map const from very beginning. */
+	R600_OUT_BATCH(SQ_ALU_CONSTANT_PS_OFFSET * 4);
+	for (i = 0; i < r700->ps.num_consts; i++) {
+		R600_OUT_BATCH(r700->ps.consts[i][0].u32All);
+		R600_OUT_BATCH(r700->ps.consts[i][1].u32All);
+		R600_OUT_BATCH(r700->ps.consts[i][2].u32All);
+		R600_OUT_BATCH(r700->ps.consts[i][3].u32All);
+	}
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+	int i;
+	BATCH_LOCALS(&context->radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	if (r700->vs.num_consts == 0)
+		return;
+
+	BEGIN_BATCH_NO_AUTOSTATE(2 + (r700->vs.num_consts * 4));
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_ALU_CONST, (r700->vs.num_consts * 4)));
+	/* assembler map const from very beginning. */
+	R600_OUT_BATCH(SQ_ALU_CONSTANT_VS_OFFSET * 4);
+	for (i = 0; i < r700->vs.num_consts; i++) {
+		R600_OUT_BATCH(r700->vs.consts[i][0].u32All);
+		R600_OUT_BATCH(r700->vs.consts[i][1].u32All);
+		R600_OUT_BATCH(r700->vs.consts[i][2].u32All);
+		R600_OUT_BATCH(r700->vs.consts[i][3].u32All);
+	}
+	END_BATCH();
+	COMMIT_BATCH();
+}
+
+static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+	BATCH_LOCALS(radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	/* clear the buffer */
+	radeon_bo_map(query->bo, GL_FALSE);
+	memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */
+	radeon_bo_unmap(query->bo);
+
+	radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+				      query->bo,
+				      0, RADEON_GEM_DOMAIN_GTT);
+
+	BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+	R600_OUT_BATCH(ZPASS_DONE);
+	R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+	R600_OUT_BATCH(0x00000000);
+	R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	END_BATCH();
+	query->emitted_begin = GL_TRUE;
+}
+
+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	return atom->cmd_size;
+}
+
+static int check_cb(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	int count = 7;
+
+	if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+		count += 11;
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+	return count;
+}
+
+static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	unsigned int ui;
+	int count = 3;
+
+	if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+		count += 3;
+
+	if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) {
+		/* targets are enabled in r700SetRenderTarget but state
+		   size is calculated before that. Until MRT's are done
+		   hardcode target0 as enabled. */
+		count += 3;
+		for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) {
+                        if (r700->render_target[ui].enabled)
+				count += 3;
+		}
+	}
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+	return count;
+}
+
+static int check_ucp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	int i;
+	int count = 0;
+
+	for (i = 0; i < R700_MAX_UCP; i++) {
+		if (r700->ucp[i].enabled)
+			count += 6;
+	}
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+	return count;
+}
+
+static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	int count = context->radeon.tcl.aos_count * 18;
+
+	if (count)
+		count += 6;
+
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+	return count;
+}
+
+static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	unsigned int i, count = 0;
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled) {
+			radeonTexObj *t = r700->textures[i];
+			if (t)
+				count++;
+		}
+	}
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+	return count * 31;
+}
+
+static int check_ps_consts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	int count = r700->ps.num_consts * 4;
+
+	if (count)
+		count += 2;
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+	return count;
+}
+
+static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	int count = r700->vs.num_consts * 4;
+
+	if (count)
+		count += 2;
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+
+	return count;
+}
+
+static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+	int count;
+
+	if (!query || query->emitted_begin)
+		count = 0;
+	else
+		count = atom->cmd_size;
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+	return count;
+}
+
+#define ALLOC_STATE( ATOM, CHK, SZ, EMIT )				\
+do {									\
+	context->atoms.ATOM.cmd_size = (SZ);				\
+	context->atoms.ATOM.cmd = NULL;					\
+	context->atoms.ATOM.name = #ATOM;				\
+	context->atoms.ATOM.idx = 0;					\
+	context->atoms.ATOM.check = check_##CHK;			\
+	context->atoms.ATOM.dirty = GL_FALSE;				\
+	context->atoms.ATOM.emit = (EMIT);				\
+	context->radeon.hw.max_state_size += (SZ);			\
+	insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
+} while (0)
+
+static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+	radeon->query.queryobj.cmd_size = (SZ);
+	radeon->query.queryobj.cmd = NULL;
+	radeon->query.queryobj.name = "queryobj";
+	radeon->query.queryobj.idx = 0;
+	radeon->query.queryobj.check = check_queryobj;
+	radeon->query.queryobj.dirty = GL_FALSE;
+	radeon->query.queryobj.emit = r700SendQueryBegin;
+	radeon->hw.max_state_size += (SZ);
+	insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
+void r600InitAtoms(context_t *context)
+{
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
+	context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */
+
+	/* Setup the atom linked list */
+	make_empty_list(&context->radeon.hw.atomlist);
+	context->radeon.hw.atomlist.name = "atom-list";
+
+	ALLOC_STATE(sq, always, 34, r700SendSQConfig);
+	ALLOC_STATE(db, always, 17, r700SendDBState);
+	ALLOC_STATE(stencil, always, 4, r700SendStencilState);
+	ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
+	ALLOC_STATE(sc, always, 15, r700SendSCState);
+	ALLOC_STATE(scissor, always, 22, r700SendScissorState);
+	ALLOC_STATE(aa, always, 12, r700SendAAState);
+	ALLOC_STATE(cl, always, 12, r700SendCLState);
+	ALLOC_STATE(gb, always, 6, r700SendGBState);
+	ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState);
+	ALLOC_STATE(su, always, 9, r700SendSUState);
+	ALLOC_STATE(poly, always, 10, r700SendPolyState);
+	ALLOC_STATE(cb, cb, 18, r700SendCBState);
+	ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+	ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState);
+	ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
+	ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
+	ALLOC_STATE(sx, always, 9, r700SendSXState);
+	ALLOC_STATE(vgt, always, 41, r700SendVGTState);
+	ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
+	ALLOC_STATE(vpt, always, 16, r700SendViewportState);
+	ALLOC_STATE(fs, always, 18, r700SendFSState);
+	ALLOC_STATE(vs, always, 21, r700SendVSState);
+	ALLOC_STATE(ps, always, 24, r700SendPSState);
+	ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
+	ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
+	ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
+	ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
+	ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
+	ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+	r600_init_query_stateobj(&context->radeon, 6 * 2);
+
+	context->radeon.hw.is_dirty = GL_TRUE;
+	context->radeon.hw.all_dirty = GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h
new file mode 100644
index 0000000000..ae249e15fd
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_chip.h
@@ -0,0 +1,503 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_CHIP_H_
+#define _R700_CHIP_H_
+
+#include "r600_context.h"
+
+#include "r600_reg.h"
+#include "r600_reg_auto_r6xx.h"
+#include "r600_reg_r6xx.h"
+#include "r600_reg_r7xx.h"
+
+#include "r700_chipoffset.h"
+
+#define SETfield(x, val, shift, mask)  ( (x) = ((x) & ~(mask)) | ((val) << (shift)) ) /* u32All */
+#define CLEARfield(x, mask)            ( (x) &= ~(mask) )
+#define SETbit(x, bit)                 ( (x) |= (bit) )
+#define CLEARbit(x, bit)               ( (x) &= ~(bit) )
+
+#define R700_TEXTURE_NUMBERUNITS 16
+#define R700_MAX_RENDER_TARGETS  8
+#define R700_MAX_VIEWPORTS       16
+#define R700_MAX_SHADER_EXPORTS  32
+#define R700_MAX_UCP             6
+#define R700_MAX_DX9_CONSTS      256
+
+/* Enum not show in r600_*.h */
+
+#define FETCH_RESOURCE_STRIDE 7
+
+#define ASIC_CONFIG_BASE_INDEX    0x2000
+#define ASIC_CONTEXT_BASE_INDEX   0xA000
+#define ASIC_CTL_CONST_BASE_INDEX 0xF3FC
+
+
+enum 
+{
+    SQ_ABSOLUTE                              = 0x00000000,
+    SQ_RELATIVE                              = 0x00000001,
+};
+
+enum 
+{
+    SQ_ALU_SCL_210                           = 0x00000000,
+    SQ_ALU_SCL_122                           = 0x00000001,
+    SQ_ALU_SCL_212                           = 0x00000002,
+    SQ_ALU_SCL_221                           = 0x00000003,
+};
+
+enum 
+{
+    SQ_TEX_UNNORMALIZED                      = 0x00000000,
+    SQ_TEX_NORMALIZED                        = 0x00000001,
+};
+
+enum 
+{
+    SQ_CF_PIXEL_MRT0                         = 0x00000000,
+    SQ_CF_PIXEL_MRT1                         = 0x00000001,
+    SQ_CF_PIXEL_MRT2                         = 0x00000002,
+    SQ_CF_PIXEL_MRT3                         = 0x00000003,
+    SQ_CF_PIXEL_MRT4                         = 0x00000004,
+    SQ_CF_PIXEL_MRT5                         = 0x00000005,
+    SQ_CF_PIXEL_MRT6                         = 0x00000006,
+    SQ_CF_PIXEL_MRT7                         = 0x00000007,
+    SQ_CF_PIXEL_Z                            = 0x0000003d,
+};
+
+typedef enum ENUM_SQ_CF_ARRAY_BASE_POS {
+SQ_CF_POS_0                              = 0x0000003c,
+SQ_CF_POS_1                              = 0x0000003d,
+SQ_CF_POS_2                              = 0x0000003e,
+SQ_CF_POS_3                              = 0x0000003f,
+} ENUM_SQ_CF_ARRAY_BASE_POS;
+
+enum
+{
+    PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit = 23,
+};
+
+enum 
+{
+    TEX_XYFilter_Point                       = 0x00000000,
+    TEX_XYFilter_Linear                      = 0x00000001,
+    TEX_XYFilter_Cubic                       = 0x00000002,
+    TEX_XYFilter_Cleartype                   = 0x00000003,
+
+    TEX_MipFilter_None                       = 0x00000000,
+    TEX_MipFilter_Point                      = 0x00000001,
+    TEX_MipFilter_Linear                     = 0x00000002,
+};
+
+enum 
+{
+    SQ_EXPORT_WRITE                          = 0x00000000,
+    SQ_EXPORT_WRITE_IND                      = 0x00000001,
+    SQ_EXPORT_WRITE_ACK                      = 0x00000002,
+    SQ_EXPORT_WRITE_IND_ACK                  = 0x00000003,
+};
+
+/* --------------------------------- */
+
+enum
+{
+    R700_PM4_PACKET0_NOP = 0x00000000,
+    R700_PM4_PACKET1_NOP = 0x40000000,
+    R700_PM4_PACKET2_NOP = 0x80000000,
+    R700_PM4_PACKET3_NOP = 0xC0000000,
+};
+
+#define  PM4_OPCODE_SET_INDEX_TYPE      (R700_PM4_PACKET3_NOP | (IT_INDEX_TYPE << 8))
+
+#define  PM4_OPCODE_DRAW_INDEX_AUTO     (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_AUTO << 8))
+#define  PM4_OPCODE_DRAW_INDEX_IMMD     (R700_PM4_PACKET3_NOP | (IT_DRAW_INDEX_IMMD << 8))
+#define  PM4_OPCODE_WAIT_REG_MEM        (R700_PM4_PACKET3_NOP | (IT_WAIT_REG_MEM << 8))
+#define  PM4_OPCODE_SET_CONTEXT_REG     (R700_PM4_PACKET3_NOP | (IT_SET_CONTEXT_REG << 8))
+#define  PM4_OPCODE_SET_CONFIG_REG      (R700_PM4_PACKET3_NOP | (IT_SET_CONFIG_REG << 8))
+#define  PM4_OPCODE_SET_ALU_CONST       (R700_PM4_PACKET3_NOP | (IT_SET_ALU_CONST << 8))
+#define  PM4_OPCODE_SET_RESOURCE        (R700_PM4_PACKET3_NOP | (IT_SET_RESOURCE << 8))
+#define  PM4_OPCODE_SET_SAMPLER         (R700_PM4_PACKET3_NOP | (IT_SET_SAMPLER << 8))
+#define  PM4_OPCODE_CONTEXT_CONTROL     (R700_PM4_PACKET3_NOP | (IT_CONTEXT_CONTROL << 8))
+
+union UINT_FLOAT 
+{
+    unsigned int u32All;
+    float	f32All;
+};
+
+#if 0
+typedef struct _TEXTURE_STATE_STRUCT
+{
+    union UINT_FLOAT     SQ_TEX_RESOURCE0;
+    union UINT_FLOAT     SQ_TEX_RESOURCE1;
+    union UINT_FLOAT     SQ_TEX_RESOURCE2;
+    union UINT_FLOAT     SQ_TEX_RESOURCE3;
+    union UINT_FLOAT     SQ_TEX_RESOURCE4;
+    union UINT_FLOAT     SQ_TEX_RESOURCE5;
+    union UINT_FLOAT     SQ_TEX_RESOURCE6;
+    GLboolean                         enabled;
+} TEXTURE_STATE_STRUCT;
+
+typedef struct _SAMPLER_STATE_STRUCT
+{
+    union UINT_FLOAT      SQ_TEX_SAMPLER0;
+    union UINT_FLOAT      SQ_TEX_SAMPLER1;
+    union UINT_FLOAT      SQ_TEX_SAMPLER2;
+    GLboolean                         enabled;
+} SAMPLER_STATE_STRUCT;
+
+typedef struct _R700_TEXTURE_STATES
+{
+    TEXTURE_STATE_STRUCT *textures[R700_TEXTURE_NUMBERUNITS];
+    SAMPLER_STATE_STRUCT *samplers[R700_TEXTURE_NUMBERUNITS];
+} R700_TEXTURE_STATES;
+#endif
+
+typedef struct _RENDER_TARGET_STATE_STRUCT
+{
+	union UINT_FLOAT            	CB_COLOR0_BASE;  /* 0xA010 */
+	union UINT_FLOAT            	CB_COLOR0_SIZE;  /* 0xA018 */
+	union UINT_FLOAT            	CB_COLOR0_VIEW;  /* 0xA020 */
+	union UINT_FLOAT            	CB_COLOR0_INFO;  /* 0xA028 */
+	union UINT_FLOAT            	CB_COLOR0_TILE;  /* 0xA030 */
+	union UINT_FLOAT            	CB_COLOR0_FRAG;  /* 0xA038 */
+	union UINT_FLOAT            	CB_COLOR0_MASK;  /* 0xA040 */
+	union UINT_FLOAT         	CB_BLEND0_CONTROL;  /* 0xA1E0 */
+	GLboolean                         enabled;
+	GLboolean                         dirty;
+} RENDER_TARGET_STATE_STRUCT;
+
+typedef struct _VIEWPORT_STATE_STRUCT
+{
+	union UINT_FLOAT  	PA_SC_VPORT_SCISSOR_0_TL;  /* 0xA094 */
+	union UINT_FLOAT  	PA_SC_VPORT_SCISSOR_0_BR;  /* 0xA095 */
+	union UINT_FLOAT        PA_SC_VPORT_ZMIN_0;        /* 0xA0B4 */
+	union UINT_FLOAT        PA_SC_VPORT_ZMAX_0;        /* 0xA0B5 */
+	union UINT_FLOAT        PA_CL_VPORT_XSCALE;        /* 0xA10F */
+	union UINT_FLOAT       	PA_CL_VPORT_XOFFSET;       /* 0xA110 */
+	union UINT_FLOAT        PA_CL_VPORT_YSCALE;        /* 0xA111 */
+	union UINT_FLOAT       	PA_CL_VPORT_YOFFSET;       /* 0xA112 */
+	union UINT_FLOAT        PA_CL_VPORT_ZSCALE;        /* 0xA113 */
+	union UINT_FLOAT       	PA_CL_VPORT_ZOFFSET;       /* 0xA114 */
+	GLboolean                         enabled;
+	GLboolean                         dirty;
+} VIEWPORT_STATE_STRUCT;
+
+typedef struct _UCP_STATE_STRUCT
+{
+	union UINT_FLOAT        PA_CL_UCP_0_X;
+	union UINT_FLOAT        PA_CL_UCP_0_Y;
+	union UINT_FLOAT        PA_CL_UCP_0_Z;
+	union UINT_FLOAT        PA_CL_UCP_0_W;
+	GLboolean                         enabled;
+	GLboolean                         dirty;
+} UCP_STATE_STRUCT;
+
+typedef struct _PS_STATE_STRUCT
+{
+	union UINT_FLOAT           	SQ_PGM_START_PS           ;  /* 0xA210 */
+	union UINT_FLOAT       	        SQ_PGM_RESOURCES_PS       ;  /* 0xA214 */
+	union UINT_FLOAT         	SQ_PGM_EXPORTS_PS         ;  /* 0xA215 */
+	union UINT_FLOAT       	        SQ_PGM_CF_OFFSET_PS       ;  /* 0xA233 */
+	GLboolean                         dirty;
+	int                             num_consts;
+	union UINT_FLOAT                consts[R700_MAX_DX9_CONSTS][4];
+} PS_STATE_STRUCT;
+
+typedef struct _VS_STATE_STRUCT
+{
+ 	union UINT_FLOAT           	SQ_PGM_START_VS           ;  /* 0xA216 */
+	union UINT_FLOAT  		SQ_PGM_RESOURCES_VS       ;  /* 0xA21A */
+	union UINT_FLOAT       	        SQ_PGM_CF_OFFSET_VS       ;  /* 0xA234 */
+	GLboolean                         dirty;
+	int                             num_consts;
+	union UINT_FLOAT                consts[R700_MAX_DX9_CONSTS][4];
+} VS_STATE_STRUCT;
+
+typedef struct _GS_STATE_STRUCT
+{
+	union UINT_FLOAT           	SQ_PGM_START_GS           ;  /* 0xA21B */
+	union UINT_FLOAT       	        SQ_PGM_RESOURCES_GS       ;  /* 0xA21F */
+	union UINT_FLOAT       	        SQ_PGM_CF_OFFSET_GS       ;  /* 0xA235 */
+	GLboolean                         dirty;
+} GS_STATE_STRUCT;
+
+typedef struct _ES_STATE_STRUCT
+{
+	union UINT_FLOAT           	SQ_PGM_START_ES           ;  /* 0xA220 */
+	union UINT_FLOAT       	        SQ_PGM_RESOURCES_ES       ;  /* 0xA224 */
+	union UINT_FLOAT       	        SQ_PGM_CF_OFFSET_ES       ;  /* 0xA236 */
+	GLboolean                         dirty;
+} ES_STATE_STRUCT;
+
+typedef struct _FS_STATE_STRUCT
+{
+	union UINT_FLOAT           	SQ_PGM_START_FS           ;  /* 0xA225 */
+	union UINT_FLOAT       	        SQ_PGM_RESOURCES_FS       ;  /* 0xA229 */
+	union UINT_FLOAT       	        SQ_PGM_CF_OFFSET_FS       ;  /* 0xA237 */
+	GLboolean                         dirty;
+} FS_STATE_STRUCT;
+
+typedef struct _SQ_CONFIG_STRUCT
+{
+	union UINT_FLOAT     	        SQ_CONFIG                 ;  /* 0x2300 */
+	union UINT_FLOAT     	        SQ_GPR_RESOURCE_MGMT_1    ;  /* 0x2301 */
+	union UINT_FLOAT     	        SQ_GPR_RESOURCE_MGMT_2    ;  /* 0x2302 */
+	union UINT_FLOAT     	        SQ_THREAD_RESOURCE_MGMT   ;  /* 0x2303 */
+	union UINT_FLOAT     	        SQ_STACK_RESOURCE_MGMT_1  ;  /* 0x2304 */
+	union UINT_FLOAT     	        SQ_STACK_RESOURCE_MGMT_2  ;  /* 0x2305 */
+} SQ_CONFIG_STRUCT;
+
+typedef struct _R700_CHIP_CONTEXT
+{
+	// DB
+	union UINT_FLOAT             	DB_DEPTH_SIZE             ;  /* 0xA000 */
+	union UINT_FLOAT             	DB_DEPTH_VIEW             ;  /* 0xA001 */
+	union UINT_FLOAT             	DB_DEPTH_BASE             ;  /* 0xA003 */
+	union UINT_FLOAT             	DB_DEPTH_INFO             ;  /* 0xA004 */
+	GLboolean                       db_target_dirty;
+	union UINT_FLOAT                DB_HTILE_DATA_BASE        ;  /* 0xA005 */
+	union UINT_FLOAT          	DB_STENCIL_CLEAR          ;  /* 0xA00A */
+	union UINT_FLOAT            	DB_DEPTH_CLEAR            ;  /* 0xA00B */
+	union UINT_FLOAT            	DB_STENCILREFMASK         ;  /* 0xA10C */
+	union UINT_FLOAT            	DB_STENCILREFMASK_BF      ;  /* 0xA10D */
+	union UINT_FLOAT         	DB_RENDER_CONTROL         ;  /* 0xA343 */
+	union UINT_FLOAT        	DB_RENDER_OVERRIDE        ;  /* 0xA344 */
+	union UINT_FLOAT          	DB_HTILE_SURFACE          ;  /* 0xA349 */
+	union UINT_FLOAT          	DB_ALPHA_TO_MASK          ;  /* 0xA351 */
+	union UINT_FLOAT          	DB_DEPTH_CONTROL          ;  /* 0xA200 */
+	union UINT_FLOAT         	DB_SHADER_CONTROL         ;  /* 0xA203 */
+	GLboolean                       db_dirty;
+
+	// SC
+	union UINT_FLOAT   	        PA_SC_SCREEN_SCISSOR_TL   ;  /* 0xA00C */
+	union UINT_FLOAT   	        PA_SC_SCREEN_SCISSOR_BR   ;  /* 0xA00D */
+	union UINT_FLOAT       	        PA_SC_WINDOW_OFFSET       ;  /* 0xA080 */
+	union UINT_FLOAT   	        PA_SC_WINDOW_SCISSOR_TL   ;  /* 0xA081 */
+	union UINT_FLOAT   	        PA_SC_WINDOW_SCISSOR_BR   ;  /* 0xA082 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_RULE       ;  /* 0xA083 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_0_TL       ;  /* 0xA084 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_0_BR       ;  /* 0xA085 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_1_TL       ;  /* 0xA086 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_1_BR       ;  /* 0xA087 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_2_TL       ;  /* 0xA088 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_2_BR       ;  /* 0xA089 */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_3_TL       ;  /* 0xA08A */
+	union UINT_FLOAT       	        PA_SC_CLIPRECT_3_BR       ;  /* 0xA08B */
+	union UINT_FLOAT            	PA_SC_EDGERULE            ;  /* 0xA08C */
+	union UINT_FLOAT  	        PA_SC_GENERIC_SCISSOR_TL  ;  /* 0xA090 */
+	union UINT_FLOAT  	        PA_SC_GENERIC_SCISSOR_BR  ;  /* 0xA091 */
+	GLboolean                       scissor_dirty;
+
+	union UINT_FLOAT        	PA_SC_LINE_STIPPLE        ;  /* 0xA283 */
+	union UINT_FLOAT           	PA_SC_LINE_CNTL           ;  /* 0xA300 */
+	union UINT_FLOAT           	PA_SC_AA_CONFIG           ;  /* 0xA301 */
+	union UINT_FLOAT       	        PA_SC_MPASS_PS_CNTL       ;  /* 0xA292 */
+	union UINT_FLOAT           	PA_SC_MODE_CNTL           ;  /* 0xA293 */
+	union UINT_FLOAT 	        PA_SC_AA_SAMPLE_LOCS_MCTX ;  /* 0xA307 */
+	union UINT_FLOAT                PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX; /* 0xA308 */
+	union UINT_FLOAT             	PA_SC_AA_MASK             ;  /* 0xA312 */
+	GLboolean                       sc_dirty;
+
+	// CL
+	union UINT_FLOAT           	PA_CL_CLIP_CNTL           ;  /* 0xA204 */
+	union UINT_FLOAT            	PA_CL_VTE_CNTL            ;  /* 0xA206 */
+	union UINT_FLOAT         	PA_CL_VS_OUT_CNTL         ;  /* 0xA207 */
+	union UINT_FLOAT         	PA_CL_NANINF_CNTL         ;  /* 0xA208 */
+	union UINT_FLOAT    	        PA_CL_GB_VERT_CLIP_ADJ    ;  /* 0xA303 */
+	union UINT_FLOAT    	        PA_CL_GB_VERT_DISC_ADJ    ;  /* 0xA304 */
+	union UINT_FLOAT    	        PA_CL_GB_HORZ_CLIP_ADJ    ;  /* 0xA305 */
+	union UINT_FLOAT    	        PA_CL_GB_HORZ_DISC_ADJ    ;  /* 0xA306 */
+	GLboolean                       cl_dirty;
+
+	// SU
+	union UINT_FLOAT        	PA_SU_SC_MODE_CNTL        ;  /* 0xA205 */
+	union UINT_FLOAT          	PA_SU_POINT_SIZE          ;  /* 0xA280 */
+	union UINT_FLOAT        	PA_SU_POINT_MINMAX        ;  /* 0xA281 */
+	union UINT_FLOAT           	PA_SU_LINE_CNTL           ;  /* 0xA282 */
+	union UINT_FLOAT            	PA_SU_VTX_CNTL            ;  /* 0xA302 */
+	union UINT_FLOAT                PA_SU_POLY_OFFSET_DB_FMT_CNTL;   /* 0xA37E */
+	union UINT_FLOAT   	        PA_SU_POLY_OFFSET_CLAMP   ;      /* 0xA37F */
+	union UINT_FLOAT                PA_SU_POLY_OFFSET_FRONT_SCALE;   /* 0xA380 */
+	union UINT_FLOAT                PA_SU_POLY_OFFSET_FRONT_OFFSET; /* 0xA381 */
+	union UINT_FLOAT                PA_SU_POLY_OFFSET_BACK_SCALE;    /* 0xA382 */
+	union UINT_FLOAT                PA_SU_POLY_OFFSET_BACK_OFFSET;   /* 0xA383 */
+	GLboolean                       su_dirty;
+
+	VIEWPORT_STATE_STRUCT           viewport[R700_MAX_VIEWPORTS];
+	UCP_STATE_STRUCT                ucp[R700_MAX_UCP];
+
+	// CB
+	union UINT_FLOAT              	CB_CLEAR_RED_R6XX         ;  /* 0xA048 */
+	union UINT_FLOAT            	CB_CLEAR_GREEN_R6XX       ;  /* 0xA049 */
+	union UINT_FLOAT             	CB_CLEAR_BLUE_R6XX        ;  /* 0xA04A */
+	union UINT_FLOAT            	CB_CLEAR_ALPHA_R6XX       ;  /* 0xA04B */
+	union UINT_FLOAT            	CB_TARGET_MASK            ;  /* 0xA08E */
+	union UINT_FLOAT            	CB_SHADER_MASK            ;  /* 0xA08F */
+	union UINT_FLOAT              	CB_BLEND_RED              ;  /* 0xA105 */
+	union UINT_FLOAT            	CB_BLEND_GREEN            ;  /* 0xA106 */
+	union UINT_FLOAT             	CB_BLEND_BLUE             ;  /* 0xA107 */
+	union UINT_FLOAT            	CB_BLEND_ALPHA            ;  /* 0xA108 */
+	union UINT_FLOAT              	CB_FOG_RED_R6XX           ;  /* 0xA109 */
+	union UINT_FLOAT            	CB_FOG_GREEN_R6XX         ;  /* 0xA10A */
+	union UINT_FLOAT             	CB_FOG_BLUE_R6XX          ;  /* 0xA10B */
+	union UINT_FLOAT         	CB_SHADER_CONTROL         ;  /* 0xA1E8 */
+	union UINT_FLOAT          	CB_COLOR_CONTROL          ;  /* 0xA202 */
+	union UINT_FLOAT         	CB_CLRCMP_CONTROL         ;  /* 0xA30C */
+	union UINT_FLOAT             	CB_CLRCMP_SRC             ;  /* 0xA30D */
+	union UINT_FLOAT             	CB_CLRCMP_DST             ;  /* 0xA30E */
+	union UINT_FLOAT             	CB_CLRCMP_MSK             ;  /* 0xA30F */
+	union UINT_FLOAT             	CB_BLEND_CONTROL          ;  /* 0xABD0 */
+	GLboolean                       cb_dirty;
+	RENDER_TARGET_STATE_STRUCT      render_target[R700_MAX_RENDER_TARGETS];
+
+	// SX
+	union UINT_FLOAT                SX_MISC                   ;  /* 0xA0D4 */
+	union UINT_FLOAT     	        SX_ALPHA_TEST_CONTROL     ;  /* 0xA104 */
+	union UINT_FLOAT     	        SX_ALPHA_REF              ;  /* 0xA10E */
+	GLboolean                       sx_dirty;
+
+	// VGT
+	union UINT_FLOAT          	VGT_MAX_VTX_INDX          ;  /* 0xA100 */
+	union UINT_FLOAT          	VGT_MIN_VTX_INDX          ;  /* 0xA101 */
+	union UINT_FLOAT           	VGT_INDX_OFFSET           ;  /* 0xA102 */
+	union UINT_FLOAT                VGT_MULTI_PRIM_IB_RESET_INDX;  /* 0xA103 */
+	union UINT_FLOAT      	        VGT_OUTPUT_PATH_CNTL      ;  /* 0xA284 */
+	union UINT_FLOAT      	        VGT_HOS_CNTL              ;  /* 0xA285 */
+	union UINT_FLOAT      	        VGT_HOS_MAX_TESS_LEVEL    ;  /* 0xA286 */
+	union UINT_FLOAT      	        VGT_HOS_MIN_TESS_LEVEL    ;  /* 0xA287 */
+	union UINT_FLOAT      	        VGT_HOS_REUSE_DEPTH       ;  /* 0xA288 */
+	union UINT_FLOAT      	        VGT_GROUP_PRIM_TYPE       ;  /* 0xA289 */
+	union UINT_FLOAT      	        VGT_GROUP_FIRST_DECR      ;  /* 0xA28A */
+	union UINT_FLOAT      	        VGT_GROUP_DECR            ;  /* 0xA28B */
+	union UINT_FLOAT      	        VGT_GROUP_VECT_0_CNTL     ;  /* 0xA28C */
+	union UINT_FLOAT      	        VGT_GROUP_VECT_1_CNTL     ;  /* 0xA28D */
+	union UINT_FLOAT      	        VGT_GROUP_VECT_0_FMT_CNTL ;  /* 0xA28E */
+	union UINT_FLOAT      	        VGT_GROUP_VECT_1_FMT_CNTL ;  /* 0xA28F */
+	union UINT_FLOAT               	VGT_GS_MODE               ;  /* 0xA290 */
+	union UINT_FLOAT        	VGT_PRIMITIVEID_EN        ;  /* 0xA2A1 */
+	union UINT_FLOAT	        VGT_MULTI_PRIM_IB_RESET_EN;  /* 0xA2A5 */
+	union UINT_FLOAT  	        VGT_INSTANCE_STEP_RATE_0  ;  /* 0xA2A8 */
+	union UINT_FLOAT  	        VGT_INSTANCE_STEP_RATE_1  ;  /* 0xA2A9 */
+	union UINT_FLOAT            	VGT_STRMOUT_EN            ;  /* 0xA2AC */
+	union UINT_FLOAT             	VGT_REUSE_OFF             ;  /* 0xA2AD */
+	union UINT_FLOAT             	VGT_VTX_CNT_EN            ;  /* 0xA2AE */
+	union UINT_FLOAT            	VGT_STRMOUT_BUFFER_EN     ;  /* 0xA2C8 */
+	GLboolean                       vgt_dirty;
+
+	// SPI
+	union UINT_FLOAT           	SPI_VS_OUT_ID_0           ;  /* 0xA185 */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_1           ;  /* 0xA186 */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_2           ;  /* 0xA187 */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_3           ;  /* 0xA188 */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_4           ;  /* 0xA189 */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_5           ;  /* 0xA18A */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_6           ;  /* 0xA18B */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_7           ;  /* 0xA18C */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_8           ;  /* 0xA18D */
+	union UINT_FLOAT           	SPI_VS_OUT_ID_9           ;  /* 0xA18E */
+	union UINT_FLOAT                SPI_VS_OUT_CONFIG         ;  /* 0xA1B1 */
+	union UINT_FLOAT       	        SPI_THREAD_GROUPING       ;  /* 0xA1B2 */
+	union UINT_FLOAT       	        SPI_PS_IN_CONTROL_0       ;  /* 0xA1B3 */
+	union UINT_FLOAT       	        SPI_PS_IN_CONTROL_1       ;  /* 0xA1B4 */
+	union UINT_FLOAT       	        SPI_INTERP_CONTROL_0      ;  /* 0xA1B5 */
+ 	union UINT_FLOAT               	SPI_INPUT_Z               ;  /* 0xA1B6 */
+	union UINT_FLOAT              	SPI_FOG_CNTL              ;  /* 0xA1B7 */
+	union UINT_FLOAT              	SPI_FOG_FUNC_SCALE        ;  /* 0xA1B8 */
+	union UINT_FLOAT              	SPI_FOG_FUNC_BIAS         ;  /* 0xA1B9 */
+
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_0         ;  /* 0xA0E0 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_1         ;  /* 0xA0E1 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_2         ;  /* 0xA0E2 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_3         ;  /* 0xA0E3 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_4         ;  /* 0xA0E4 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_5         ;  /* 0xA0E5 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_6         ;  /* 0xA0E6 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_7         ;  /* 0xA0E7 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_8         ;  /* 0xA0E8 */
+	union UINT_FLOAT         	SQ_VTX_SEMANTIC_9         ;  /* 0xA0E9 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_10        ;  /* 0xA0EA */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_11        ;  /* 0xA0EB */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_12        ;  /* 0xA0EC */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_13        ;  /* 0xA0ED */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_14        ;  /* 0xA0EE */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_15        ;  /* 0xA0EF */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_16        ;  /* 0xA0F0 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_17        ;  /* 0xA0F1 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_18        ;  /* 0xA0F2 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_19        ;  /* 0xA0F3 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_20        ;  /* 0xA0F4 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_21        ;  /* 0xA0F5 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_22        ;  /* 0xA0F6 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_23        ;  /* 0xA0F7 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_24        ;  /* 0xA0F8 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_25        ;  /* 0xA0F9 */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_26        ;  /* 0xA0FA */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_27        ;  /* 0xA0FB */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_28        ;  /* 0xA0FC */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_29        ;  /* 0xA0FD */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_30        ;  /* 0xA0FE */
+	union UINT_FLOAT        	SQ_VTX_SEMANTIC_31        ;  /* 0xA0FF */
+	union UINT_FLOAT       	        SPI_PS_INPUT_CNTL[R700_MAX_SHADER_EXPORTS];
+	GLboolean                       spi_dirty;
+
+	// shaders
+	PS_STATE_STRUCT                 ps;
+	VS_STATE_STRUCT                 vs;
+	GS_STATE_STRUCT                 gs;
+	ES_STATE_STRUCT                 es;
+	FS_STATE_STRUCT                 fs;
+
+	// SQ CONFIG
+	SQ_CONFIG_STRUCT                sq_config;
+	// misc
+	union UINT_FLOAT             	TA_CNTL_AUX               ;  /* 0x2542 */
+	union UINT_FLOAT             	VC_ENHANCE                ;  /* 0x25C5 */
+	union UINT_FLOAT             	SQ_DYN_GPR_CNTL_PS_FLUSH_REQ;  /* 0x2363 */
+	union UINT_FLOAT             	DB_DEBUG                  ;  /* 0x260C */
+	union UINT_FLOAT             	DB_WATERMARKS             ;  /* 0x260E */
+	// SQ
+	union UINT_FLOAT     	        SQ_ESGS_RING_ITEMSIZE     ;  /* 0xA22A */
+	union UINT_FLOAT     	        SQ_GSVS_RING_ITEMSIZE     ;  /* 0xA22B */
+	union UINT_FLOAT    	        SQ_ESTMP_RING_ITEMSIZE    ;  /* 0xA22C */
+	union UINT_FLOAT    	        SQ_GSTMP_RING_ITEMSIZE    ;  /* 0xA22D */
+	union UINT_FLOAT    	        SQ_VSTMP_RING_ITEMSIZE    ;  /* 0xA22E */
+	union UINT_FLOAT    	        SQ_PSTMP_RING_ITEMSIZE    ;  /* 0xA22F */
+	union UINT_FLOAT     	        SQ_FBUF_RING_ITEMSIZE     ;  /* 0xA230 */
+	union UINT_FLOAT    	        SQ_REDUC_RING_ITEMSIZE    ;  /* 0xA231 */
+	union UINT_FLOAT       	        SQ_GS_VERT_ITEMSIZE       ;  /* 0xA232 */
+	GLboolean                       sq_dirty;
+
+	radeonTexObj*                   textures[R700_TEXTURE_NUMBERUNITS];
+
+	GLboolean                       bEnablePerspective;
+
+} R700_CHIP_CONTEXT;
+
+#endif /* _R700_CHIP_H_ */
+
diff --git a/src/mesa/drivers/dri/r600/r700_chipoffset.h b/src/mesa/drivers/dri/r600/r700_chipoffset.h
new file mode 100644
index 0000000000..4d73fb99a7
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_chipoffset.h
@@ -0,0 +1,693 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_CHIPOFFSET_H_
+#define _R700_CHIPOFFSET_H_
+
+#define mmWAIT_UNTIL                                    0x2010
+#define mmSCRATCH_REG0                                  0x2140
+#define mmGUI_SCRATCH_REG0                              0x2140
+#define mmSCRATCH_REG1                                  0x2141
+#define mmGUI_SCRATCH_REG1                              0x2141
+#define mmSCRATCH_REG2                                  0x2142
+#define mmGUI_SCRATCH_REG2                              0x2142
+#define mmSCRATCH_REG3                                  0x2143
+#define mmGUI_SCRATCH_REG3                              0x2143
+#define mmSCRATCH_REG4                                  0x2144
+#define mmGUI_SCRATCH_REG4                              0x2144
+#define mmSCRATCH_REG5                                  0x2145
+#define mmGUI_SCRATCH_REG5                              0x2145
+#define mmSCRATCH_REG6                                  0x2146
+#define mmGUI_SCRATCH_REG6                              0x2146
+#define mmSCRATCH_REG7                                  0x2147
+#define mmGUI_SCRATCH_REG7                              0x2147
+
+#define mmCP_COHER_CNTL                                 0x217C
+#define mmCP_COHER_SIZE                                 0x217D
+#define mmCP_COHER_BASE                                 0x217E
+#define mmCP_COHER_STATUS                               0x217F
+
+#define mmTA_CNTL_AUX                                   0x2542
+#define mmVC_ENHANCE                                    0x25C5
+#define mmSQ_DYN_GPR_CNTL_PS_FLUSH_REQ                  0x2363
+#define mmDB_DEBUG                                      0x260C
+#define mmDB_WATERMARKS                                 0x260E
+
+#define mmPA_CL_VPORT_XSCALE                            0xA10F
+#define mmPA_CL_VPORT_XOFFSET                           0xA110
+#define mmPA_CL_VPORT_YSCALE                            0xA111
+#define mmPA_CL_VPORT_YOFFSET                           0xA112
+#define mmPA_CL_VPORT_ZSCALE                            0xA113
+#define mmPA_CL_VPORT_ZOFFSET                           0xA114
+#define mmPA_CL_VPORT_XSCALE_1                          0xA115
+#define mmPA_CL_VPORT_XSCALE_2                          0xA11B
+#define mmPA_CL_VPORT_XSCALE_3                          0xA121
+#define mmPA_CL_VPORT_XSCALE_4                          0xA127
+#define mmPA_CL_VPORT_XSCALE_5                          0xA12D
+#define mmPA_CL_VPORT_XSCALE_6                          0xA133
+#define mmPA_CL_VPORT_XSCALE_7                          0xA139
+#define mmPA_CL_VPORT_XSCALE_8                          0xA13F
+#define mmPA_CL_VPORT_XSCALE_9                          0xA145
+#define mmPA_CL_VPORT_XSCALE_10                         0xA14B
+#define mmPA_CL_VPORT_XSCALE_11                         0xA151
+#define mmPA_CL_VPORT_XSCALE_12                         0xA157
+#define mmPA_CL_VPORT_XSCALE_13                         0xA15D
+#define mmPA_CL_VPORT_XSCALE_14                         0xA163
+#define mmPA_CL_VPORT_XSCALE_15                         0xA169
+#define mmPA_CL_VPORT_XOFFSET_1                         0xA116
+#define mmPA_CL_VPORT_XOFFSET_2                         0xA11C
+#define mmPA_CL_VPORT_XOFFSET_3                         0xA122
+#define mmPA_CL_VPORT_XOFFSET_4                         0xA128
+#define mmPA_CL_VPORT_XOFFSET_5                         0xA12E
+#define mmPA_CL_VPORT_XOFFSET_6                         0xA134
+#define mmPA_CL_VPORT_XOFFSET_7                         0xA13A
+#define mmPA_CL_VPORT_XOFFSET_8                         0xA140
+#define mmPA_CL_VPORT_XOFFSET_9                         0xA146
+#define mmPA_CL_VPORT_XOFFSET_10                        0xA14C
+#define mmPA_CL_VPORT_XOFFSET_11                        0xA152
+#define mmPA_CL_VPORT_XOFFSET_12                        0xA158
+#define mmPA_CL_VPORT_XOFFSET_13                        0xA15E
+#define mmPA_CL_VPORT_XOFFSET_14                        0xA164
+#define mmPA_CL_VPORT_XOFFSET_15                        0xA16A
+#define mmPA_CL_VPORT_YSCALE_1                          0xA117
+#define mmPA_CL_VPORT_YSCALE_2                          0xA11D
+#define mmPA_CL_VPORT_YSCALE_3                          0xA123
+#define mmPA_CL_VPORT_YSCALE_4                          0xA129
+#define mmPA_CL_VPORT_YSCALE_5                          0xA12F
+#define mmPA_CL_VPORT_YSCALE_6                          0xA135
+#define mmPA_CL_VPORT_YSCALE_7                          0xA13B
+#define mmPA_CL_VPORT_YSCALE_8                          0xA141
+#define mmPA_CL_VPORT_YSCALE_9                          0xA147
+#define mmPA_CL_VPORT_YSCALE_10                         0xA14D
+#define mmPA_CL_VPORT_YSCALE_11                         0xA153
+#define mmPA_CL_VPORT_YSCALE_12                         0xA159
+#define mmPA_CL_VPORT_YSCALE_13                         0xA15F
+#define mmPA_CL_VPORT_YSCALE_14                         0xA165
+#define mmPA_CL_VPORT_YSCALE_15                         0xA16B
+#define mmPA_CL_VPORT_YOFFSET_1                         0xA118
+#define mmPA_CL_VPORT_YOFFSET_2                         0xA11E
+#define mmPA_CL_VPORT_YOFFSET_3                         0xA124
+#define mmPA_CL_VPORT_YOFFSET_4                         0xA12A
+#define mmPA_CL_VPORT_YOFFSET_5                         0xA130
+#define mmPA_CL_VPORT_YOFFSET_6                         0xA136
+#define mmPA_CL_VPORT_YOFFSET_7                         0xA13C
+#define mmPA_CL_VPORT_YOFFSET_8                         0xA142
+#define mmPA_CL_VPORT_YOFFSET_9                         0xA148
+#define mmPA_CL_VPORT_YOFFSET_10                        0xA14E
+#define mmPA_CL_VPORT_YOFFSET_11                        0xA154
+#define mmPA_CL_VPORT_YOFFSET_12                        0xA15A
+#define mmPA_CL_VPORT_YOFFSET_13                        0xA160
+#define mmPA_CL_VPORT_YOFFSET_14                        0xA166
+#define mmPA_CL_VPORT_YOFFSET_15                        0xA16C
+#define mmPA_CL_VPORT_ZSCALE_1                          0xA119
+#define mmPA_CL_VPORT_ZSCALE_2                          0xA11F
+#define mmPA_CL_VPORT_ZSCALE_3                          0xA125
+#define mmPA_CL_VPORT_ZSCALE_4                          0xA12B
+#define mmPA_CL_VPORT_ZSCALE_5                          0xA131
+#define mmPA_CL_VPORT_ZSCALE_6                          0xA137
+#define mmPA_CL_VPORT_ZSCALE_7                          0xA13D
+#define mmPA_CL_VPORT_ZSCALE_8                          0xA143
+#define mmPA_CL_VPORT_ZSCALE_9                          0xA149
+#define mmPA_CL_VPORT_ZSCALE_10                         0xA14F
+#define mmPA_CL_VPORT_ZSCALE_11                         0xA155
+#define mmPA_CL_VPORT_ZSCALE_12                         0xA15B
+#define mmPA_CL_VPORT_ZSCALE_13                         0xA161
+#define mmPA_CL_VPORT_ZSCALE_14                         0xA167
+#define mmPA_CL_VPORT_ZSCALE_15                         0xA16D
+#define mmPA_CL_VPORT_ZOFFSET_1                         0xA11A
+#define mmPA_CL_VPORT_ZOFFSET_2                         0xA120
+#define mmPA_CL_VPORT_ZOFFSET_3                         0xA126
+#define mmPA_CL_VPORT_ZOFFSET_4                         0xA12C
+#define mmPA_CL_VPORT_ZOFFSET_5                         0xA132
+#define mmPA_CL_VPORT_ZOFFSET_6                         0xA138
+#define mmPA_CL_VPORT_ZOFFSET_7                         0xA13E
+#define mmPA_CL_VPORT_ZOFFSET_8                         0xA144
+#define mmPA_CL_VPORT_ZOFFSET_9                         0xA14A
+#define mmPA_CL_VPORT_ZOFFSET_10                        0xA150
+#define mmPA_CL_VPORT_ZOFFSET_11                        0xA156
+#define mmPA_CL_VPORT_ZOFFSET_12                        0xA15C
+#define mmPA_CL_VPORT_ZOFFSET_13                        0xA162
+#define mmPA_CL_VPORT_ZOFFSET_14                        0xA168
+#define mmPA_CL_VPORT_ZOFFSET_15                        0xA16E
+#define mmPA_CL_VTE_CNTL                                0xA206
+#define mmPA_CL_VS_OUT_CNTL                             0xA207
+#define mmPA_CL_NANINF_CNTL                             0xA208
+#define mmPA_CL_CLIP_CNTL                               0xA204
+#define mmPA_CL_GB_VERT_CLIP_ADJ                        0xA303
+#define mmPA_CL_GB_VERT_DISC_ADJ                        0xA304
+#define mmPA_CL_GB_HORZ_CLIP_ADJ                        0xA305
+#define mmPA_CL_GB_HORZ_DISC_ADJ                        0xA306
+#define mmPA_CL_UCP_0_X                                 0xA388
+#define mmPA_CL_UCP_0_Y                                 0xA389
+#define mmPA_CL_UCP_0_Z                                 0xA38A
+#define mmPA_CL_UCP_0_W                                 0xA38B
+#define mmPA_CL_UCP_1_X                                 0xA38C
+#define mmPA_CL_UCP_1_Y                                 0xA38D
+#define mmPA_CL_UCP_1_Z                                 0xA38E
+#define mmPA_CL_UCP_1_W                                 0xA38F
+#define mmPA_CL_UCP_2_X                                 0xA390
+#define mmPA_CL_UCP_2_Y                                 0xA391
+#define mmPA_CL_UCP_2_Z                                 0xA392
+#define mmPA_CL_UCP_2_W                                 0xA393
+#define mmPA_CL_UCP_3_X                                 0xA394
+#define mmPA_CL_UCP_3_Y                                 0xA395
+#define mmPA_CL_UCP_3_Z                                 0xA396
+#define mmPA_CL_UCP_3_W                                 0xA397
+#define mmPA_CL_UCP_4_X                                 0xA398
+#define mmPA_CL_UCP_4_Y                                 0xA399
+#define mmPA_CL_UCP_4_Z                                 0xA39A
+#define mmPA_CL_UCP_4_W                                 0xA39B
+#define mmPA_CL_UCP_5_X                                 0xA39C
+#define mmPA_CL_UCP_5_Y                                 0xA39D
+#define mmPA_CL_UCP_5_Z                                 0xA39E
+#define mmPA_CL_UCP_5_W                                 0xA39F
+#define mmPA_CL_POINT_X_RAD                             0xA384
+#define mmPA_CL_POINT_Y_RAD                             0xA385
+#define mmPA_CL_POINT_SIZE                              0xA386
+#define mmPA_CL_POINT_CULL_RAD                          0xA387
+
+#define mmPA_SU_VTX_CNTL                                0xA302
+#define mmPA_SU_POINT_SIZE                              0xA280
+#define mmPA_SU_POINT_MINMAX                            0xA281
+#define mmPA_SU_LINE_CNTL                               0xA282
+#define mmPA_SU_SC_MODE_CNTL                            0xA205
+#define mmPA_SU_POLY_OFFSET_DB_FMT_CNTL                 0xA37E
+#define mmPA_SU_POLY_OFFSET_CLAMP                       0xA37F
+#define mmPA_SU_POLY_OFFSET_FRONT_SCALE                 0xA380
+#define mmPA_SU_POLY_OFFSET_FRONT_OFFSET                0xA381
+#define mmPA_SU_POLY_OFFSET_BACK_SCALE                  0xA382
+#define mmPA_SU_POLY_OFFSET_BACK_OFFSET                 0xA383
+
+#define mmPA_SC_WINDOW_OFFSET                           0xA080
+#define mmPA_SC_AA_CONFIG                               0xA301
+#define mmPA_SC_AA_MASK                                 0xA312
+#define mmPA_SC_AA_SAMPLE_LOCS_MCTX                     0xA307
+#define mmPA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX              0xA308
+#define mmPA_SC_LINE_STIPPLE                            0xA283
+#define mmPA_SC_LINE_CNTL                               0xA300
+#define mmPA_SC_SCREEN_SCISSOR_TL                       0xA00C
+#define mmPA_SC_SCREEN_SCISSOR_BR                       0xA00D
+#define mmPA_SC_WINDOW_SCISSOR_TL                       0xA081
+#define mmPA_SC_WINDOW_SCISSOR_BR                       0xA082
+#define mmPA_SC_CLIPRECT_RULE                           0xA083
+#define mmPA_SC_CLIPRECT_0_TL                           0xA084
+#define mmPA_SC_CLIPRECT_0_BR                           0xA085
+#define mmPA_SC_CLIPRECT_1_TL                           0xA086
+#define mmPA_SC_CLIPRECT_1_BR                           0xA087
+#define mmPA_SC_CLIPRECT_2_TL                           0xA088
+#define mmPA_SC_CLIPRECT_2_BR                           0xA089
+#define mmPA_SC_CLIPRECT_3_TL                           0xA08A
+#define mmPA_SC_CLIPRECT_3_BR                           0xA08B
+#define mmPA_SC_EDGERULE                                0xA08C
+#define mmPA_SC_GENERIC_SCISSOR_TL                      0xA090
+#define mmPA_SC_GENERIC_SCISSOR_BR                      0xA091
+#define mmPA_SC_VPORT_SCISSOR_0_TL                      0xA094
+#define mmPA_SC_VPORT_SCISSOR_1_TL                      0xA096
+#define mmPA_SC_VPORT_SCISSOR_2_TL                      0xA098
+#define mmPA_SC_VPORT_SCISSOR_3_TL                      0xA09A
+#define mmPA_SC_VPORT_SCISSOR_4_TL                      0xA09C
+#define mmPA_SC_VPORT_SCISSOR_5_TL                      0xA09E
+#define mmPA_SC_VPORT_SCISSOR_6_TL                      0xA0A0
+#define mmPA_SC_VPORT_SCISSOR_7_TL                      0xA0A2
+#define mmPA_SC_VPORT_SCISSOR_8_TL                      0xA0A4
+#define mmPA_SC_VPORT_SCISSOR_9_TL                      0xA0A6
+#define mmPA_SC_VPORT_SCISSOR_10_TL                     0xA0A8
+#define mmPA_SC_VPORT_SCISSOR_11_TL                     0xA0AA
+#define mmPA_SC_VPORT_SCISSOR_12_TL                     0xA0AC
+#define mmPA_SC_VPORT_SCISSOR_13_TL                     0xA0AE
+#define mmPA_SC_VPORT_SCISSOR_14_TL                     0xA0B0
+#define mmPA_SC_VPORT_SCISSOR_15_TL                     0xA0B2
+#define mmPA_SC_VPORT_SCISSOR_0_BR                      0xA095
+#define mmPA_SC_VPORT_SCISSOR_1_BR                      0xA097
+#define mmPA_SC_VPORT_SCISSOR_2_BR                      0xA099
+#define mmPA_SC_VPORT_SCISSOR_3_BR                      0xA09B
+#define mmPA_SC_VPORT_SCISSOR_4_BR                      0xA09D
+#define mmPA_SC_VPORT_SCISSOR_5_BR                      0xA09F
+#define mmPA_SC_VPORT_SCISSOR_6_BR                      0xA0A1
+#define mmPA_SC_VPORT_SCISSOR_7_BR                      0xA0A3
+#define mmPA_SC_VPORT_SCISSOR_8_BR                      0xA0A5
+#define mmPA_SC_VPORT_SCISSOR_9_BR                      0xA0A7
+#define mmPA_SC_VPORT_SCISSOR_10_BR                     0xA0A9
+#define mmPA_SC_VPORT_SCISSOR_11_BR                     0xA0AB
+#define mmPA_SC_VPORT_SCISSOR_12_BR                     0xA0AD
+#define mmPA_SC_VPORT_SCISSOR_13_BR                     0xA0AF
+#define mmPA_SC_VPORT_SCISSOR_14_BR                     0xA0B1
+#define mmPA_SC_VPORT_SCISSOR_15_BR                     0xA0B3
+#define mmPA_SC_VPORT_ZMIN_0                            0xA0B4
+#define mmPA_SC_VPORT_ZMIN_1                            0xA0B6
+#define mmPA_SC_VPORT_ZMIN_2                            0xA0B8
+#define mmPA_SC_VPORT_ZMIN_3                            0xA0BA
+#define mmPA_SC_VPORT_ZMIN_4                            0xA0BC
+#define mmPA_SC_VPORT_ZMIN_5                            0xA0BE
+#define mmPA_SC_VPORT_ZMIN_6                            0xA0C0
+#define mmPA_SC_VPORT_ZMIN_7                            0xA0C2
+#define mmPA_SC_VPORT_ZMIN_8                            0xA0C4
+#define mmPA_SC_VPORT_ZMIN_9                            0xA0C6
+#define mmPA_SC_VPORT_ZMIN_10                           0xA0C8
+#define mmPA_SC_VPORT_ZMIN_11                           0xA0CA
+#define mmPA_SC_VPORT_ZMIN_12                           0xA0CC
+#define mmPA_SC_VPORT_ZMIN_13                           0xA0CE
+#define mmPA_SC_VPORT_ZMIN_14                           0xA0D0
+#define mmPA_SC_VPORT_ZMIN_15                           0xA0D2
+#define mmPA_SC_VPORT_ZMAX_0                            0xA0B5
+#define mmPA_SC_VPORT_ZMAX_1                            0xA0B7
+#define mmPA_SC_VPORT_ZMAX_2                            0xA0B9
+#define mmPA_SC_VPORT_ZMAX_3                            0xA0BB
+#define mmPA_SC_VPORT_ZMAX_4                            0xA0BD
+#define mmPA_SC_VPORT_ZMAX_5                            0xA0BF
+#define mmPA_SC_VPORT_ZMAX_6                            0xA0C1
+#define mmPA_SC_VPORT_ZMAX_7                            0xA0C3
+#define mmPA_SC_VPORT_ZMAX_8                            0xA0C5
+#define mmPA_SC_VPORT_ZMAX_9                            0xA0C7
+#define mmPA_SC_VPORT_ZMAX_10                           0xA0C9
+#define mmPA_SC_VPORT_ZMAX_11                           0xA0CB
+#define mmPA_SC_VPORT_ZMAX_12                           0xA0CD
+#define mmPA_SC_VPORT_ZMAX_13                           0xA0CF
+#define mmPA_SC_VPORT_ZMAX_14                           0xA0D1
+#define mmPA_SC_VPORT_ZMAX_15                           0xA0D3
+#define mmPA_SC_MODE_CNTL                               0xA293
+#define mmPA_SC_MPASS_PS_CNTL                           0xA292
+
+#define mmVGT_DRAW_INITIATOR                            0xA1FC
+#define mmVGT_EVENT_INITIATOR                           0xA2A4
+#define mmVGT_EVENT_ADDRESS_REG                         0xA1FE
+#define mmVGT_DMA_BASE_HI                               0xA1F9
+#define mmVGT_DMA_BASE                                  0xA1FA
+#define mmVGT_DMA_INDEX_TYPE                            0xA29F
+#define mmVGT_DMA_NUM_INSTANCES                         0xA2A2
+#define mmVGT_DMA_SIZE                                  0xA29D
+
+#define mmVGT_IMMED_DATA                                0xA1FD
+#define mmVGT_INDEX_TYPE                                0x2257
+#define mmVGT_NUM_INDICES                               0x225C
+#define mmVGT_NUM_INSTANCES                             0x225D
+#define mmVGT_PRIMITIVE_TYPE                            0x2256
+#define mmVGT_PRIMITIVEID_EN                            0xA2A1
+#define mmVGT_VTX_CNT_EN                                0xA2AE
+#define mmVGT_REUSE_OFF                                 0xA2AD
+#define mmVGT_INSTANCE_STEP_RATE_0                      0xA2A8
+#define mmVGT_INSTANCE_STEP_RATE_1                      0xA2A9
+#define mmVGT_MAX_VTX_INDX                              0xA100
+#define mmVGT_MIN_VTX_INDX                              0xA101
+#define mmVGT_INDX_OFFSET                               0xA102
+#define mmVGT_VERTEX_REUSE_BLOCK_CNTL                   0xA316
+#define mmVGT_OUT_DEALLOC_CNTL                          0xA317
+#define mmVGT_MULTI_PRIM_IB_RESET_INDX                  0xA103
+#define mmVGT_MULTI_PRIM_IB_RESET_EN                    0xA2A5
+#define mmVGT_ENHANCE                                   0xA294
+#define mmVGT_OUTPUT_PATH_CNTL                          0xA284
+#define mmVGT_HOS_CNTL                                  0xA285
+#define mmVGT_HOS_MAX_TESS_LEVEL                        0xA286
+#define mmVGT_HOS_MIN_TESS_LEVEL                        0xA287
+#define mmVGT_HOS_REUSE_DEPTH                           0xA288
+#define mmVGT_GROUP_PRIM_TYPE                           0xA289
+#define mmVGT_GROUP_FIRST_DECR                          0xA28A
+#define mmVGT_GROUP_DECR                                0xA28B
+#define mmVGT_GROUP_VECT_0_CNTL                         0xA28C
+#define mmVGT_GROUP_VECT_1_CNTL                         0xA28D
+#define mmVGT_GROUP_VECT_0_FMT_CNTL                     0xA28E
+#define mmVGT_GROUP_VECT_1_FMT_CNTL                     0xA28F
+#define mmVGT_GS_MODE                                   0xA290
+#define mmVGT_GS_OUT_PRIM_TYPE                          0xA29B
+
+#define mmVGT_STRMOUT_EN                                0xA2AC
+#define mmVGT_STRMOUT_BUFFER_SIZE_0                     0xA2B4
+#define mmVGT_STRMOUT_BUFFER_SIZE_1                     0xA2B8
+#define mmVGT_STRMOUT_BUFFER_SIZE_2                     0xA2BC
+#define mmVGT_STRMOUT_BUFFER_SIZE_3                     0xA2C0
+#define mmVGT_STRMOUT_BUFFER_OFFSET_0                   0xA2B7
+#define mmVGT_STRMOUT_BUFFER_OFFSET_1                   0xA2BB
+#define mmVGT_STRMOUT_BUFFER_OFFSET_2                   0xA2BF
+#define mmVGT_STRMOUT_BUFFER_OFFSET_3                   0xA2C3
+#define mmVGT_STRMOUT_VTX_STRIDE_0                      0xA2B5
+#define mmVGT_STRMOUT_VTX_STRIDE_1                      0xA2B9
+#define mmVGT_STRMOUT_VTX_STRIDE_2                      0xA2BD
+#define mmVGT_STRMOUT_VTX_STRIDE_3                      0xA2C1
+#define mmVGT_STRMOUT_BUFFER_BASE_0                     0xA2B6
+#define mmVGT_STRMOUT_BUFFER_BASE_1                     0xA2BA
+#define mmVGT_STRMOUT_BUFFER_BASE_2                     0xA2BE
+#define mmVGT_STRMOUT_BUFFER_BASE_3                     0xA2C2
+#define mmVGT_STRMOUT_BUFFER_EN                         0xA2C8
+#define mmVGT_STRMOUT_BASE_OFFSET_0                     0xA2C4
+#define mmVGT_STRMOUT_BASE_OFFSET_1                     0xA2C5
+#define mmVGT_STRMOUT_BASE_OFFSET_2                     0xA2C6
+#define mmVGT_STRMOUT_BASE_OFFSET_3                     0xA2C7
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_0                  0xA2D1
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_1                  0xA2D2
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_2                  0xA2D3
+#define mmVGT_STRMOUT_BASE_OFFSET_HI_3                  0xA2D4
+#define mmVGT_STRMOUT_DRAW_OPAQUE_OFFSET                0xA2CA
+#define mmVGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE    0xA2CB
+#define mmVGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE         0xA2CC
+
+#define mmSQ_PGM_START_PS                               0xA210
+#define mmSQ_PGM_CF_OFFSET_PS                           0xA233
+#define mmSQ_PGM_RESOURCES_PS                           0xA214
+#define mmSQ_PGM_EXPORTS_PS                             0xA215
+#define mmSQ_PGM_START_VS                               0xA216
+#define mmSQ_PGM_CF_OFFSET_VS                           0xA234
+#define mmSQ_PGM_RESOURCES_VS                           0xA21A
+#define mmSQ_PGM_START_GS                               0xA21B
+#define mmSQ_PGM_CF_OFFSET_GS                           0xA235
+#define mmSQ_PGM_RESOURCES_GS                           0xA21F
+#define mmSQ_PGM_START_ES                               0xA220
+#define mmSQ_PGM_CF_OFFSET_ES                           0xA236
+#define mmSQ_PGM_RESOURCES_ES                           0xA224
+#define mmSQ_PGM_START_FS                               0xA225
+#define mmSQ_PGM_CF_OFFSET_FS                           0xA237
+#define mmSQ_PGM_RESOURCES_FS                           0xA229
+#define mmSQ_ESGS_RING_ITEMSIZE                         0xA22A
+#define mmSQ_GSVS_RING_ITEMSIZE                         0xA22B
+#define mmSQ_ESTMP_RING_ITEMSIZE                        0xA22C
+#define mmSQ_GSTMP_RING_ITEMSIZE                        0xA22D
+#define mmSQ_VSTMP_RING_ITEMSIZE                        0xA22E
+#define mmSQ_PSTMP_RING_ITEMSIZE                        0xA22F
+#define mmSQ_FBUF_RING_ITEMSIZE                         0xA230
+#define mmSQ_REDUC_RING_ITEMSIZE                        0xA231
+#define mmSQ_GS_VERT_ITEMSIZE                           0xA232
+#define mmSQ_VTX_SEMANTIC_CLEAR                         0xA238
+
+#define mmSQ_VTX_SEMANTIC_0                             0xA0E0
+#define mmSQ_VTX_SEMANTIC_1                             0xA0E1
+#define mmSQ_VTX_SEMANTIC_2                             0xA0E2
+#define mmSQ_VTX_SEMANTIC_3                             0xA0E3
+#define mmSQ_VTX_SEMANTIC_4                             0xA0E4
+#define mmSQ_VTX_SEMANTIC_5                             0xA0E5
+#define mmSQ_VTX_SEMANTIC_6                             0xA0E6
+#define mmSQ_VTX_SEMANTIC_7                             0xA0E7
+#define mmSQ_VTX_SEMANTIC_8                             0xA0E8
+#define mmSQ_VTX_SEMANTIC_9                             0xA0E9
+#define mmSQ_VTX_SEMANTIC_10                            0xA0EA
+#define mmSQ_VTX_SEMANTIC_11                            0xA0EB
+#define mmSQ_VTX_SEMANTIC_12                            0xA0EC
+#define mmSQ_VTX_SEMANTIC_13                            0xA0ED
+#define mmSQ_VTX_SEMANTIC_14                            0xA0EE
+#define mmSQ_VTX_SEMANTIC_15                            0xA0EF
+#define mmSQ_VTX_SEMANTIC_16                            0xA0F0
+#define mmSQ_VTX_SEMANTIC_17                            0xA0F1
+#define mmSQ_VTX_SEMANTIC_18                            0xA0F2
+#define mmSQ_VTX_SEMANTIC_19                            0xA0F3
+#define mmSQ_VTX_SEMANTIC_20                            0xA0F4
+#define mmSQ_VTX_SEMANTIC_21                            0xA0F5
+#define mmSQ_VTX_SEMANTIC_22                            0xA0F6
+#define mmSQ_VTX_SEMANTIC_23                            0xA0F7
+#define mmSQ_VTX_SEMANTIC_24                            0xA0F8
+#define mmSQ_VTX_SEMANTIC_25                            0xA0F9
+#define mmSQ_VTX_SEMANTIC_26                            0xA0FA
+#define mmSQ_VTX_SEMANTIC_27                            0xA0FB
+#define mmSQ_VTX_SEMANTIC_28                            0xA0FC
+#define mmSQ_VTX_SEMANTIC_29                            0xA0FD
+#define mmSQ_VTX_SEMANTIC_30                            0xA0FE
+#define mmSQ_VTX_SEMANTIC_31                            0xA0FF
+
+#define mmSQ_ALU_CONST_CACHE_PS_0                       0xA250
+#define mmSQ_ALU_CONST_CACHE_PS_1                       0xA251
+#define mmSQ_ALU_CONST_CACHE_PS_2                       0xA252
+#define mmSQ_ALU_CONST_CACHE_PS_3                       0xA253
+#define mmSQ_ALU_CONST_CACHE_PS_4                       0xA254
+#define mmSQ_ALU_CONST_CACHE_PS_5                       0xA255
+#define mmSQ_ALU_CONST_CACHE_PS_6                       0xA256
+#define mmSQ_ALU_CONST_CACHE_PS_7                       0xA257
+#define mmSQ_ALU_CONST_CACHE_PS_8                       0xA258
+#define mmSQ_ALU_CONST_CACHE_PS_9                       0xA259
+#define mmSQ_ALU_CONST_CACHE_PS_10                      0xA25A
+#define mmSQ_ALU_CONST_CACHE_PS_11                      0xA25B
+#define mmSQ_ALU_CONST_CACHE_PS_12                      0xA25C
+#define mmSQ_ALU_CONST_CACHE_PS_13                      0xA25D
+#define mmSQ_ALU_CONST_CACHE_PS_14                      0xA25E
+#define mmSQ_ALU_CONST_CACHE_PS_15                      0xA25F
+#define mmSQ_ALU_CONST_CACHE_VS_0                       0xA260
+#define mmSQ_ALU_CONST_CACHE_VS_1                       0xA261
+#define mmSQ_ALU_CONST_CACHE_VS_2                       0xA262
+#define mmSQ_ALU_CONST_CACHE_VS_3                       0xA263
+#define mmSQ_ALU_CONST_CACHE_VS_4                       0xA264
+#define mmSQ_ALU_CONST_CACHE_VS_5                       0xA265
+#define mmSQ_ALU_CONST_CACHE_VS_6                       0xA266
+#define mmSQ_ALU_CONST_CACHE_VS_7                       0xA267
+#define mmSQ_ALU_CONST_CACHE_VS_8                       0xA268
+#define mmSQ_ALU_CONST_CACHE_VS_9                       0xA269
+#define mmSQ_ALU_CONST_CACHE_VS_10                      0xA26A
+#define mmSQ_ALU_CONST_CACHE_VS_11                      0xA26B
+#define mmSQ_ALU_CONST_CACHE_VS_12                      0xA26C
+#define mmSQ_ALU_CONST_CACHE_VS_13                      0xA26D
+#define mmSQ_ALU_CONST_CACHE_VS_14                      0xA26E
+#define mmSQ_ALU_CONST_CACHE_VS_15                      0xA26F
+#define mmSQ_ALU_CONST_CACHE_GS_0                       0xA270
+#define mmSQ_ALU_CONST_CACHE_GS_1                       0xA271
+#define mmSQ_ALU_CONST_CACHE_GS_2                       0xA272
+#define mmSQ_ALU_CONST_CACHE_GS_3                       0xA273
+#define mmSQ_ALU_CONST_CACHE_GS_4                       0xA274
+#define mmSQ_ALU_CONST_CACHE_GS_5                       0xA275
+#define mmSQ_ALU_CONST_CACHE_GS_6                       0xA276
+#define mmSQ_ALU_CONST_CACHE_GS_7                       0xA277
+#define mmSQ_ALU_CONST_CACHE_GS_8                       0xA278
+#define mmSQ_ALU_CONST_CACHE_GS_9                       0xA279
+#define mmSQ_ALU_CONST_CACHE_GS_10                      0xA27A
+#define mmSQ_ALU_CONST_CACHE_GS_11                      0xA27B
+#define mmSQ_ALU_CONST_CACHE_GS_12                      0xA27C
+#define mmSQ_ALU_CONST_CACHE_GS_13                      0xA27D
+#define mmSQ_ALU_CONST_CACHE_GS_14                      0xA27E
+#define mmSQ_ALU_CONST_CACHE_GS_15                      0xA27F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_0                 0xA050
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_1                 0xA051
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_2                 0xA052
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_3                 0xA053
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_4                 0xA054
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_5                 0xA055
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_6                 0xA056
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_7                 0xA057
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_8                 0xA058
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_9                 0xA059
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_10                0xA05A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_11                0xA05B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_12                0xA05C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_13                0xA05D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_14                0xA05E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_PS_15                0xA05F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_0                 0xA060
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_1                 0xA061
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_2                 0xA062
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_3                 0xA063
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_4                 0xA064
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_5                 0xA065
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_6                 0xA066
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_7                 0xA067
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_8                 0xA068
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_9                 0xA069
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_10                0xA06A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_11                0xA06B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_12                0xA06C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_13                0xA06D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_14                0xA06E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_VS_15                0xA06F
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_0                 0xA070
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_1                 0xA071
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_2                 0xA072
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_3                 0xA073
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_4                 0xA074
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_5                 0xA075
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_6                 0xA076
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_7                 0xA077
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_8                 0xA078
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_9                 0xA079
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_10                0xA07A
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_11                0xA07B
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_12                0xA07C
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_13                0xA07D
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_14                0xA07E
+#define mmSQ_ALU_CONST_BUFFER_SIZE_GS_15                0xA07F
+
+#define mmSPI_VS_OUT_ID_0                               0xA185
+#define mmSPI_VS_OUT_ID_1                               0xA186
+#define mmSPI_VS_OUT_ID_2                               0xA187
+#define mmSPI_VS_OUT_ID_3                               0xA188
+#define mmSPI_VS_OUT_ID_4                               0xA189
+#define mmSPI_VS_OUT_ID_5                               0xA18A
+#define mmSPI_VS_OUT_ID_6                               0xA18B
+#define mmSPI_VS_OUT_ID_7                               0xA18C
+#define mmSPI_VS_OUT_ID_8                               0xA18D
+#define mmSPI_VS_OUT_ID_9                               0xA18E
+#define mmSPI_PS_INPUT_CNTL_0                           0xA191
+#define mmSPI_PS_INPUT_CNTL_1                           0xA192
+#define mmSPI_PS_INPUT_CNTL_2                           0xA193
+#define mmSPI_PS_INPUT_CNTL_3                           0xA194
+#define mmSPI_PS_INPUT_CNTL_4                           0xA195
+#define mmSPI_PS_INPUT_CNTL_5                           0xA196
+#define mmSPI_PS_INPUT_CNTL_6                           0xA197
+#define mmSPI_PS_INPUT_CNTL_7                           0xA198
+#define mmSPI_PS_INPUT_CNTL_8                           0xA199
+#define mmSPI_PS_INPUT_CNTL_9                           0xA19A
+#define mmSPI_PS_INPUT_CNTL_10                          0xA19B
+#define mmSPI_PS_INPUT_CNTL_11                          0xA19C
+#define mmSPI_PS_INPUT_CNTL_12                          0xA19D
+#define mmSPI_PS_INPUT_CNTL_13                          0xA19E
+#define mmSPI_PS_INPUT_CNTL_14                          0xA19F
+#define mmSPI_PS_INPUT_CNTL_15                          0xA1A0
+#define mmSPI_PS_INPUT_CNTL_16                          0xA1A1
+#define mmSPI_PS_INPUT_CNTL_17                          0xA1A2
+#define mmSPI_PS_INPUT_CNTL_18                          0xA1A3
+#define mmSPI_PS_INPUT_CNTL_19                          0xA1A4
+#define mmSPI_PS_INPUT_CNTL_20                          0xA1A5
+#define mmSPI_PS_INPUT_CNTL_21                          0xA1A6
+#define mmSPI_PS_INPUT_CNTL_22                          0xA1A7
+#define mmSPI_PS_INPUT_CNTL_23                          0xA1A8
+#define mmSPI_PS_INPUT_CNTL_24                          0xA1A9
+#define mmSPI_PS_INPUT_CNTL_25                          0xA1AA
+#define mmSPI_PS_INPUT_CNTL_26                          0xA1AB
+#define mmSPI_PS_INPUT_CNTL_27                          0xA1AC
+#define mmSPI_PS_INPUT_CNTL_28                          0xA1AD
+#define mmSPI_PS_INPUT_CNTL_29                          0xA1AE
+#define mmSPI_PS_INPUT_CNTL_30                          0xA1AF
+#define mmSPI_PS_INPUT_CNTL_31                          0xA1B0
+#define mmSPI_VS_OUT_CONFIG                             0xA1B1
+#define mmSPI_THREAD_GROUPING                           0xA1B2
+#define mmSPI_PS_IN_CONTROL_0                           0xA1B3
+#define mmSPI_PS_IN_CONTROL_1                           0xA1B4
+#define mmSPI_INTERP_CONTROL_0                          0xA1B5
+#define mmSPI_INPUT_Z                                   0xA1B6
+#define mmSPI_FOG_CNTL                                  0xA1B7
+#define mmSPI_FOG_FUNC_SCALE                            0xA1B8
+#define mmSPI_FOG_FUNC_BIAS                             0xA1B9
+
+#define mmSX_MISC                                       0xA0D4
+#define mmSX_ALPHA_TEST_CONTROL                         0xA104
+#define mmSX_ALPHA_REF                                  0xA10E
+
+#define mmDB_DEPTH_BASE                                 0xA003
+#define mmDB_DEPTH_INFO                                 0xA004
+#define mmDB_HTILE_DATA_BASE                            0xA005
+#define mmDB_DEPTH_SIZE                                 0xA000
+#define mmDB_DEPTH_VIEW                                 0xA001
+#define mmDB_RENDER_CONTROL                             0xA343
+#define mmDB_RENDER_OVERRIDE                            0xA344
+#define mmDB_SHADER_CONTROL                             0xA203
+#define mmDB_STENCIL_CLEAR                              0xA00A
+#define mmDB_DEPTH_CLEAR                                0xA00B
+#define mmDB_HTILE_SURFACE                              0xA349
+#define mmDB_PRELOAD_CONTROL                            0xA34C
+#define mmDB_PREFETCH_LIMIT                             0xA34D
+#define mmDB_STENCILREFMASK                             0xA10C
+#define mmDB_STENCILREFMASK_BF                          0xA10D
+#define mmDB_SRESULTS_COMPARE_STATE0                    0xA34A
+#define mmDB_SRESULTS_COMPARE_STATE1                    0xA34B
+#define mmDB_DEPTH_CONTROL                              0xA200
+#define mmDB_ALPHA_TO_MASK                              0xA351
+
+#define mmCB_CLEAR_RED_R6XX                             0xA048
+#define mmCB_CLEAR_GREEN_R6XX                           0xA049
+#define mmCB_CLEAR_BLUE_R6XX                            0xA04A
+#define mmCB_CLEAR_ALPHA_R6XX                           0xA04B
+#define mmCB_BLEND_RED                                  0xA105
+#define mmCB_BLEND_GREEN                                0xA106
+#define mmCB_BLEND_BLUE                                 0xA107
+#define mmCB_BLEND_ALPHA                                0xA108
+#define mmCB_FOG_RED_R6XX                               0xA109
+#define mmCB_FOG_GREEN_R6XX                             0xA10A
+#define mmCB_FOG_BLUE_R6XX                              0xA10B
+#define mmCB_BLEND_CONTROL                              0xA201
+#define mmCB_COLOR_CONTROL                              0xA202
+#define mmCB_BLEND0_CONTROL                             0xA1E0
+#define mmCB_BLEND1_CONTROL                             0xA1E1
+#define mmCB_BLEND2_CONTROL                             0xA1E2
+#define mmCB_BLEND3_CONTROL                             0xA1E3
+#define mmCB_BLEND4_CONTROL                             0xA1E4
+#define mmCB_BLEND5_CONTROL                             0xA1E5
+#define mmCB_BLEND6_CONTROL                             0xA1E6
+#define mmCB_BLEND7_CONTROL                             0xA1E7
+#define mmCB_CLRCMP_CONTROL                             0xA30C
+#define mmCB_CLRCMP_SRC                                 0xA30D
+#define mmCB_CLRCMP_DST                                 0xA30E
+#define mmCB_CLRCMP_MSK                                 0xA30F
+#define mmCB_COLOR0_BASE                                0xA010
+#define mmCB_COLOR1_BASE                                0xA011
+#define mmCB_COLOR2_BASE                                0xA012
+#define mmCB_COLOR3_BASE                                0xA013
+#define mmCB_COLOR4_BASE                                0xA014
+#define mmCB_COLOR5_BASE                                0xA015
+#define mmCB_COLOR6_BASE                                0xA016
+#define mmCB_COLOR7_BASE                                0xA017
+#define mmCB_COLOR0_SIZE                                0xA018
+#define mmCB_COLOR1_SIZE                                0xA019
+#define mmCB_COLOR2_SIZE                                0xA01A
+#define mmCB_COLOR3_SIZE                                0xA01B
+#define mmCB_COLOR4_SIZE                                0xA01C
+#define mmCB_COLOR5_SIZE                                0xA01D
+#define mmCB_COLOR6_SIZE                                0xA01E
+#define mmCB_COLOR7_SIZE                                0xA01F
+#define mmCB_COLOR0_VIEW                                0xA020
+#define mmCB_COLOR1_VIEW                                0xA021
+#define mmCB_COLOR2_VIEW                                0xA022
+#define mmCB_COLOR3_VIEW                                0xA023
+#define mmCB_COLOR4_VIEW                                0xA024
+#define mmCB_COLOR5_VIEW                                0xA025
+#define mmCB_COLOR6_VIEW                                0xA026
+#define mmCB_COLOR7_VIEW                                0xA027
+#define mmCB_COLOR0_INFO                                0xA028
+#define mmCB_COLOR1_INFO                                0xA029
+#define mmCB_COLOR2_INFO                                0xA02A
+#define mmCB_COLOR3_INFO                                0xA02B
+#define mmCB_COLOR4_INFO                                0xA02C
+#define mmCB_COLOR5_INFO                                0xA02D
+#define mmCB_COLOR6_INFO                                0xA02E
+#define mmCB_COLOR7_INFO                                0xA02F
+#define mmCB_COLOR0_TILE                                0xA030
+#define mmCB_COLOR1_TILE                                0xA031
+#define mmCB_COLOR2_TILE                                0xA032
+#define mmCB_COLOR3_TILE                                0xA033
+#define mmCB_COLOR4_TILE                                0xA034
+#define mmCB_COLOR5_TILE                                0xA035
+#define mmCB_COLOR6_TILE                                0xA036
+#define mmCB_COLOR7_TILE                                0xA037
+#define mmCB_COLOR0_FRAG                                0xA038
+#define mmCB_COLOR1_FRAG                                0xA039
+#define mmCB_COLOR2_FRAG                                0xA03A
+#define mmCB_COLOR3_FRAG                                0xA03B
+#define mmCB_COLOR4_FRAG                                0xA03C
+#define mmCB_COLOR5_FRAG                                0xA03D
+#define mmCB_COLOR6_FRAG                                0xA03E
+#define mmCB_COLOR7_FRAG                                0xA03F
+#define mmCB_COLOR0_MASK                                0xA040
+#define mmCB_COLOR1_MASK                                0xA041
+#define mmCB_COLOR2_MASK                                0xA042
+#define mmCB_COLOR3_MASK                                0xA043
+#define mmCB_COLOR4_MASK                                0xA044
+#define mmCB_COLOR5_MASK                                0xA045
+#define mmCB_COLOR6_MASK                                0xA046
+#define mmCB_COLOR7_MASK                                0xA047
+#define mmCB_CLEAR_RED_R6XX                             0xA048
+#define mmCB_CLEAR_GREEN_R6XX                           0xA049
+#define mmCB_CLEAR_BLUE_R6XX                            0xA04A
+#define mmCB_CLEAR_ALPHA_R6XX                           0xA04B
+#define mmCB_TARGET_MASK                                0xA08E
+#define mmCB_SHADER_MASK                                0xA08F
+#define mmCB_SHADER_CONTROL                             0xA1E8
+
+#define mmSQ_VTX_BASE_VTX_LOC                           0xF3FC
+#define mmSQ_VTX_START_INST_LOC                         0xF3FD
+
+#endif /* _R700_CHIPOFFSET_H_ */
+
diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c
new file mode 100644
index 0000000000..09c48565b6
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_clear.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+ 
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "swrast/swrast.h"
+
+#include "radeon_lock.h"
+#include "r600_context.h"
+
+#include "r700_shaderinst.h"
+#include "r700_clear.h"
+
+static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
+{
+    /* TODO, fast clear need implementation */
+    return GL_FALSE;
+}
+
+void r700Clear(GLcontext * ctx, GLbitfield mask)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
+    const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
+    GLbitfield swrast_mask = 0, tri_mask = 0;
+    int i;
+    struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+    radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x\n", __func__, mask);
+
+    if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+        context->radeon.front_buffer_dirty = GL_TRUE;
+    }
+
+    if( GL_TRUE == r700ClearFast(context, mask) )
+    {
+        return;
+    }
+	if (!context->radeon.radeonScreen->driScreen->dri2.enabled) {
+		LOCK_HARDWARE(&context->radeon);
+		UNLOCK_HARDWARE(&context->radeon);
+		if (dPriv->numClipRects == 0)
+			return;
+	}
+
+	R600_NEWPRIM(context);
+
+	if (colorMask == ~0)
+	  tri_mask |= (mask & BUFFER_BITS_COLOR);
+	else
+	  tri_mask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+
+
+	/* HW stencil */
+	if (mask & BUFFER_BIT_STENCIL) {
+		tri_mask |= BUFFER_BIT_STENCIL;
+	}
+
+	/* HW depth */
+	if (mask & BUFFER_BIT_DEPTH) {
+    	        tri_mask |= BUFFER_BIT_DEPTH;
+	}
+
+	/* If we're doing a tri pass for depth/stencil, include a likely color
+	 * buffer with it.
+	 */
+
+	for (i = 0; i < BUFFER_COUNT; i++) {
+	  GLuint bufBit = 1 << i;
+	  if ((tri_mask) & bufBit) {
+	    if (!fb->Attachment[i].Renderbuffer->ClassID) {
+	      tri_mask &= ~bufBit;
+	      swrast_mask |= bufBit;
+	    }
+	  }
+	}
+
+	/* SW fallback clearing */
+	swrast_mask = mask & ~tri_mask;
+
+	if (tri_mask) {
+		radeonUserClear(ctx, tri_mask);
+	}
+
+	if (swrast_mask) {
+		radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s: swrast clear, mask: %x\n",
+				__FUNCTION__, swrast_mask);
+		_swrast_Clear(ctx, swrast_mask);
+	}
+
+}
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_clear.h b/src/mesa/drivers/dri/r600/r700_clear.h
new file mode 100644
index 0000000000..bed1d3a90e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_clear.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef __r700_CLEAR_H__
+#define __r700_CLEAR_H__
+
+extern void r700Clear(GLcontext * ctx, GLbitfield mask);
+
+#endif /* __r700_CLEAR_H__ */
diff --git a/src/mesa/drivers/dri/r600/r700_debug.c b/src/mesa/drivers/dri/r600/r700_debug.c
new file mode 100644
index 0000000000..cd1ba9eca3
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_debug.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "r700_debug.h"
+#include "radeon_debug.h"
+
+void DumpHwBinary(int type, void *addr, int size)
+{
+    int i;
+    unsigned int *pHw = (unsigned int *)addr;
+
+    return;
+
+    switch (type)
+    {
+        case DUMP_PIXEL_SHADER:
+            radeon_print(RADEON_SHADER, RADEON_TRACE, "Pixel Shader\n");
+        break;
+        case DUMP_VERTEX_SHADER:
+            radeon_print(RADEON_SHADER, RADEON_TRACE, "Vertex Shader\n");
+        break;
+        case DUMP_FETCH_SHADER:
+            radeon_print(RADEON_SHADER, RADEON_TRACE, "Fetch Shader\n");
+        break;
+    }
+
+    for (i = 0; i < size; i++)
+    {
+        radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x,\t", *pHw);
+        if (i%4 == 3)
+            radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x\n", *pHw);
+        pHw++;
+
+    }
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_debug.h b/src/mesa/drivers/dri/r600/r700_debug.h
new file mode 100644
index 0000000000..c0921bf610
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_debug.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_DEBUG_H_
+#define _R700_DEBUG_H_
+enum R700_DUMP_TYPE
+{
+	DUMP_VERTEX_SHADER      = 0x1,
+	DUMP_PIXEL_SHADER       = 0x2,
+	DUMP_FETCH_SHADER       = 0x4,
+};
+
+extern void DumpHwBinary(int, void *, int);
+
+#endif /*_R700_DEBUG_H_*/
diff --git a/src/mesa/drivers/dri/r600/r700_driconf.h b/src/mesa/drivers/dri/r600/r700_driconf.h
new file mode 100644
index 0000000000..a9e2152344
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_driconf.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_DRICONF_H_
+#define _R700_DRICONF_H_
+
+#define DRI_CONF_FP_OPTIMIZATION_SPEED   0
+#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
+
+#endif /* _R700_DRICONF_H_ */
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
new file mode 100644
index 0000000000..fbb808e066
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -0,0 +1,792 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "shader/program.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "r700_fragprog.h"
+
+#include "r700_debug.h"
+
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
+{
+    static const gl_state_index winstate[STATE_LENGTH]
+         = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+    struct prog_instruction *newInst, *inst;
+    GLint  win_size;  /* state reference */
+    GLuint wpos_temp; /* temp register */
+    int i, j;
+
+    /* PARAM win_size = STATE_FB_SIZE */
+    win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
+
+    wpos_temp = fprog->Base.NumTemporaries++;
+
+    /* scan program where WPOS is used and replace with wpos_temp */
+    inst = fprog->Base.Instructions;
+    for (i = 0; i < fprog->Base.NumInstructions; i++) {
+        for (j=0; j < 3; j++) {
+            if(inst->SrcReg[j].File == PROGRAM_INPUT && 
+               inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
+                inst->SrcReg[j].File = PROGRAM_TEMPORARY;
+                inst->SrcReg[j].Index = wpos_temp;
+            }
+        }
+        inst++;
+    }
+
+    _mesa_insert_instructions(&(fprog->Base), 0, 1);
+
+    newInst = fprog->Base.Instructions;
+    /* invert wpos.y
+     * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
+    newInst[0].Opcode = OPCODE_ADD;
+    newInst[0].DstReg.File = PROGRAM_TEMPORARY;
+    newInst[0].DstReg.Index = wpos_temp;
+    newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
+
+    newInst[0].SrcReg[0].File = PROGRAM_INPUT;
+    newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+    newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+    newInst[0].SrcReg[0].Negate = NEGATE_Y;
+
+    newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
+    newInst[0].SrcReg[1].Index = win_size;
+    newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+
+}
+
+//TODO : Validate FP input with VP output.
+void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
+						  struct gl_fragment_program *mesa_fp,
+                          GLcontext *ctx) 
+{
+	unsigned int unBit;
+    unsigned int i;
+    GLuint       ui;
+
+    /* match fp inputs with vp exports. */
+    struct r700_vertex_program_cont *vpc =
+		       (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+    GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+    
+	pAsm->number_used_registers = 0;
+
+//Input mapping : mesa_fp->Base.InputsRead set the flag, set in 
+	//The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
+	//MUST match order in Map_Vertex_Output
+	unBit = 1 << FRAG_ATTRIB_WPOS;
+	if(mesa_fp->Base.InputsRead & unBit)
+	{
+		pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
+	}
+
+    unBit = 1 << VERT_RESULT_COL0;
+	if(OutputsWritten & unBit)
+	{
+		pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
+	}
+
+	unBit = 1 << VERT_RESULT_COL1;
+	if(OutputsWritten & unBit)
+	{
+		pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
+	}
+
+    unBit = 1 << VERT_RESULT_FOGC;
+    if(OutputsWritten & unBit)
+    {
+        pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
+    }
+
+	for(i=0; i<8; i++)
+	{
+		unBit = 1 << (VERT_RESULT_TEX0 + i);
+		if(OutputsWritten & unBit)
+		{
+			pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
+		}
+	}
+ 
+/* order has been taken care of */ 
+#if 1
+    for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+	{
+        unBit = 1 << i;
+        if(OutputsWritten & unBit)
+		{
+            pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
+        }
+    }
+#else
+    if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
+    {
+	    struct r700_vertex_program_cont *vpc =
+		       (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+        struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
+        struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
+        struct gl_program_parameter      * pVsParam;
+        struct gl_program_parameter      * pPsParam;
+        GLuint j, k;
+        GLuint unMaxVarying = 0;
+
+        for(i=0; i<VsVarying->NumParameters; i++)
+        {
+            pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
+        }
+
+        for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
+	    {
+            unBit = 1 << i;
+            if(mesa_fp->Base.InputsRead & unBit)
+		    {
+                j = i - FRAG_ATTRIB_VAR0;
+                pPsParam = PsVarying->Parameters + j;
+
+                for(k=0; k<VsVarying->NumParameters; k++)
+                {					
+                    pVsParam = VsVarying->Parameters + k;
+
+			        if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
+                    {
+                        pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;                  
+                        if(k > unMaxVarying)
+                        {
+                            unMaxVarying = k;
+                        }
+                        break;
+                    }
+                }
+		    }
+        }
+
+        pAsm->number_used_registers += unMaxVarying + 1;
+    }
+#endif
+    unBit = 1 << FRAG_ATTRIB_FACE;
+    if(mesa_fp->Base.InputsRead & unBit)
+    {
+        pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
+    }
+
+    unBit = 1 << FRAG_ATTRIB_PNTC;
+    if(mesa_fp->Base.InputsRead & unBit)
+    {
+        pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
+    }
+
+/* Map temporary registers (GPRs) */
+    pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+    if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
+    {
+	    pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
+    }
+    else
+    {
+        pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
+    }
+
+/* Output mapping */
+	pAsm->number_of_exports = 0;
+	pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
+	pAsm->starting_export_register_number = pAsm->number_used_registers;
+
+    for (i = 0; i < FRAG_RESULT_MAX; ++i)
+    {
+        unBit = 1 << i;
+        if (mesa_fp->Base.OutputsWritten & unBit)
+        {
+            if (i == FRAG_RESULT_DEPTH)
+            {
+                pAsm->depth_export_register_number = pAsm->number_used_registers;
+                pAsm->pR700Shader->depthIsExported = 1;
+            }
+
+            pAsm->uiFP_OutputMap[i] = pAsm->number_used_registers++;
+            ++pAsm->number_of_exports;
+            ++pAsm->number_of_colorandz_exports;
+        }
+    }
+
+    pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
+    for(ui=0; ui<pAsm->number_of_exports; ui++)
+    {
+        pAsm->pucOutMask[ui] = 0x0;
+    }
+
+    pAsm->flag_reg_index = pAsm->number_used_registers++;
+
+    pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
+					                	struct gl_fragment_program   *mesa_fp)
+{
+    GLuint i, j;
+    GLint * puiTEMPwrites;
+    GLint * puiTEMPreads;
+    struct prog_instruction * pILInst;
+    InstDeps         *pInstDeps;
+    struct prog_instruction * texcoord_DepInst;
+    GLint              nDepInstID;
+
+    puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+    puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+
+    for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
+    {
+        puiTEMPwrites[i] = -1;
+        puiTEMPreads[i] = -1;
+    }
+
+    pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
+
+    for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+    {
+        pInstDeps[i].nDstDep = -1;
+        pILInst = &(mesa_fp->Base.Instructions[i]);
+
+        //Dst
+        if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+        {
+            //Set lastwrite for the temp
+            puiTEMPwrites[pILInst->DstReg.Index] = i;
+        }
+
+        //Src
+        for(j=0; j<3; j++)
+        {
+            if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+            {
+                //Set dep.
+                pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+                //Set first read
+                if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
+                {
+                    puiTEMPreads[pILInst->SrcReg[j].Index] = i;
+                }
+            }
+            else
+            {
+                pInstDeps[i].nSrcDeps[j] = -1;
+            }
+        }
+    }
+
+    fp->r700AsmCode.pInstDeps = pInstDeps;
+
+    //Find dep for tex inst    
+    for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+    {
+        pILInst = &(mesa_fp->Base.Instructions[i]);
+
+        if(GL_TRUE == IsTex(pILInst->Opcode))
+        {   //src0 is the tex coord register, src1 is texunit, src2 is textype
+            nDepInstID = pInstDeps[i].nSrcDeps[0];
+            if(nDepInstID >= 0)
+            {
+                texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+                if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+                {
+                    pInstDeps[nDepInstID].nDstDep = i;
+                    pInstDeps[i].nDstDep = i;
+                }
+                else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
+                {
+                    pInstDeps[i].nDstDep = i;
+                }
+                else
+                {   //... other deps?
+                }
+            }
+            // make sure that we dont overwrite src used earlier
+            nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
+            if(nDepInstID < i)
+            {
+                pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
+                texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+                if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+                {
+                    pInstDeps[nDepInstID].nDstDep = i;
+                }
+ 
+            }
+
+        }
+	}
+
+    FREE(puiTEMPwrites);
+    FREE(puiTEMPreads);
+
+    return GL_TRUE;
+}
+
+GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
+							     struct gl_fragment_program   *mesa_fp,
+                                 GLcontext *ctx) 
+{
+	GLuint    number_of_colors_exported;
+	GLboolean z_enabled = GL_FALSE;
+	GLuint    unBit, shadow_unit;
+	int i;
+	struct prog_instruction *inst;
+	gl_state_index shadow_ambient[STATE_LENGTH]
+	    = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};
+
+    //Init_Program
+	Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
+
+    if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
+    {
+        insert_wpos_code(ctx, mesa_fp);
+    }
+
+    /* add/map  consts for ARB_shadow_ambient */
+    if(mesa_fp->Base.ShadowSamplers)
+    {
+        inst = mesa_fp->Base.Instructions;
+        for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
+        {
+            if(inst->TexShadow == 1)
+            {
+                shadow_unit = inst->TexSrcUnit;
+                shadow_ambient[2] = shadow_unit;
+                fp->r700AsmCode.shadow_regs[shadow_unit] = 
+                    _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
+            }
+            inst++;
+        }
+    }
+
+    Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); 
+
+    if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
+	{
+		return GL_FALSE;
+    }
+
+    InitShaderProgram(&(fp->r700AsmCode));
+	
+    for(i=0; i < MAX_SAMPLERS; i++)
+    {
+         fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
+    }
+
+    fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
+
+	if( GL_FALSE == AssembleInstr(0,
+                                  0,
+                                  mesa_fp->Base.NumInstructions,
+                                  &(mesa_fp->Base.Instructions[0]), 
+                                  &(fp->r700AsmCode)) )
+	{
+		return GL_FALSE;
+	}
+
+    if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
+    {
+        return GL_FALSE;
+    }
+
+    if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
+    {
+        return GL_FALSE;
+    }
+
+    fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 
+                         : (fp->r700AsmCode.number_used_registers - 1);
+
+	fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
+
+	number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
+
+	unBit = 1 << FRAG_RESULT_DEPTH;
+	if(mesa_fp->Base.OutputsWritten & unBit)
+	{
+		z_enabled = GL_TRUE;
+		number_of_colors_exported--;
+	}
+
+	/* illegal to set this to 0 */
+	if(number_of_colors_exported || z_enabled)
+	{
+	    fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+	}
+	else
+	{
+	    fp->r700Shader.exportMode = (1 << 1);
+	}
+
+    fp->translated = GL_TRUE;
+
+	return GL_TRUE;
+}
+
+void r700SelectFragmentShader(GLcontext *ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    struct r700_fragment_program *fp = (struct r700_fragment_program *)
+	    (ctx->FragmentProgram._Current);
+    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+    {
+	    fp->r700AsmCode.bR6xx = 1;
+    }
+
+    if (GL_FALSE == fp->translated)
+	    r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx); 
+}
+
+void * r700GetActiveFpShaderBo(GLcontext * ctx)
+{
+    struct r700_fragment_program *fp = (struct r700_fragment_program *)
+	                                   (ctx->FragmentProgram._Current);
+
+    return fp->shaderbo;
+}
+
+GLboolean r700SetupFragmentProgram(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+    struct r700_fragment_program *fp = (struct r700_fragment_program *)
+	                                   (ctx->FragmentProgram._Current);
+    r700_AssemblerBase         *pAsm = &(fp->r700AsmCode);
+    struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
+    struct gl_program_parameter_list *paramList;
+    unsigned int unNumParamData;
+    unsigned int ui, i;
+    unsigned int unNumOfReg;
+    unsigned int unBit;
+    GLuint exportCount;
+    GLboolean point_sprite = GL_FALSE;
+
+    if(GL_FALSE == fp->loaded)
+    {
+	    if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
+	    {
+		    Assemble( &(fp->r700Shader) );
+	    }
+
+        /* Load fp to gpu */
+        r600EmitShader(ctx,
+                       &(fp->shaderbo),
+                       (GLvoid *)(fp->r700Shader.pProgram),
+                       fp->r700Shader.uShaderBinaryDWORDSize,
+                       "FS");
+
+        fp->loaded = GL_TRUE;
+    }
+
+    DumpHwBinary(DUMP_PIXEL_SHADER, (GLvoid *)(fp->r700Shader.pProgram),
+                 fp->r700Shader.uShaderBinaryDWORDSize);
+
+    /* TODO : enable this after MemUse fixed *=
+    (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
+    */
+
+    R600_STATECHANGE(context, ps);
+
+    r700->ps.SQ_PGM_RESOURCES_PS.u32All = 0;
+    SETbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+
+    r700->ps.SQ_PGM_START_PS.u32All = 0; /* set from buffer obj */
+
+    R600_STATECHANGE(context, spi);
+
+    unNumOfReg = fp->r700Shader.nRegs + 1;
+
+    ui = (r700->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
+
+    /* PS uses fragment.position */
+    if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+    {
+        ui += 1;
+        SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+        SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
+        SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+        SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+    }
+    else
+    {
+        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+        CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+    }
+
+    if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
+    {
+        ui += 1;
+        SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+        SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+        SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
+        SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
+    }
+    else
+    {
+        CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+    }
+
+    /* see if we need any point_sprite replacements, also increase num_interp
+     * as there's no vp output for them */
+    if (ctx->Point.PointSprite)
+    {
+        for (i = FRAG_ATTRIB_TEX0; i<= FRAG_ATTRIB_TEX7; i++)
+        {
+            if (ctx->Point.CoordReplace[i - FRAG_ATTRIB_TEX0] == GL_TRUE)
+            {
+                ui++;
+                point_sprite = GL_TRUE;
+            }
+        }
+    }
+
+    if( mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
+        ui++;
+
+    if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
+    {
+        SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+        SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+        SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
+        SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
+        SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
+        SETfield(r700->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
+        if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
+            SETbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+        else
+            CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+    }
+    else
+    {
+        CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+    }
+
+
+    ui = (unNumOfReg < ui) ? ui : unNumOfReg;
+
+    SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
+
+    CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
+
+    if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+	{
+        SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
+                 STACK_SIZE_shift, STACK_SIZE_mask);
+    }
+
+    SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
+             EXPORT_MODE_shift, EXPORT_MODE_mask);
+
+    // emit ps input map
+    struct r700_vertex_program_cont *vpc =
+		       (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+    GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+    
+    for(ui = 0; ui < R700_MAX_SHADER_EXPORTS; ui++)
+        r700->SPI_PS_INPUT_CNTL[ui].u32All = 0;
+
+    unBit = 1 << FRAG_ATTRIB_WPOS;
+    if(mesa_fp->Base.InputsRead & unBit)
+    {
+            ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+            SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+                     SEMANTIC_shift, SEMANTIC_mask);
+            if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+                    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            else
+                    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+    }
+
+    unBit = 1 << VERT_RESULT_COL0;
+    if(OutputsWritten & unBit)
+    {
+	    ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
+	    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+	    SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+		     SEMANTIC_shift, SEMANTIC_mask);
+	    if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+		    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+	    else
+		    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+    }
+
+    unBit = 1 << VERT_RESULT_COL1;
+    if(OutputsWritten & unBit)
+    {
+	    ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
+	    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+	    SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+		     SEMANTIC_shift, SEMANTIC_mask);
+	    if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+		    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+	    else
+		    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+    }
+
+    unBit = 1 << VERT_RESULT_FOGC;
+    if(OutputsWritten & unBit)
+    {
+            ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+            SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+                     SEMANTIC_shift, SEMANTIC_mask);
+            if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+                    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            else
+                    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+    }
+
+    for(i=0; i<8; i++)
+    {
+	    GLboolean coord_replace = ctx->Point.PointSprite && ctx->Point.CoordReplace[i];
+	    unBit = 1 << (VERT_RESULT_TEX0 + i);
+	    if ((OutputsWritten & unBit) || coord_replace)
+	    {
+		    ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
+		    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+		    SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+			     SEMANTIC_shift, SEMANTIC_mask);
+		    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+		    /* ARB_point_sprite */
+		    if (coord_replace)
+		    {
+			     SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+		    }
+	    }
+    }
+
+    unBit = 1 << FRAG_ATTRIB_FACE;
+    if(mesa_fp->Base.InputsRead & unBit)
+    {
+            ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+            SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+                     SEMANTIC_shift, SEMANTIC_mask);
+            if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+                    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            else
+                    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+    }
+    unBit = 1 << FRAG_ATTRIB_PNTC;
+    if(mesa_fp->Base.InputsRead & unBit)
+    {
+            ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+            SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+                     SEMANTIC_shift, SEMANTIC_mask);
+            if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+                    SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            else
+                    CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+    }
+
+
+
+
+    for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+	{
+        unBit = 1 << i;
+        if(OutputsWritten & unBit)
+		{
+            ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
+            SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+            SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+		             SEMANTIC_shift, SEMANTIC_mask);
+            if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+		        SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+            else
+		        CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+        }
+    }
+
+    exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
+    if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1))
+    {
+	    R600_STATECHANGE(context, cb);
+	    r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1;
+    }
+
+    /* sent out shader constants. */
+    paramList = fp->mesa_program.Base.Parameters;
+
+    if(NULL != paramList) 
+    {
+	    _mesa_load_state_parameters(ctx, paramList);
+
+	    if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
+		    return GL_FALSE;
+
+	    R600_STATECHANGE(context, ps_consts);
+
+	    r700->ps.num_consts = paramList->NumParameters;
+
+	    unNumParamData = paramList->NumParameters;
+
+	    for(ui=0; ui<unNumParamData; ui++) {
+		        r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+		        r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+		        r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+		        r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+	    }
+    } else
+	    r700->ps.num_consts = 0;
+
+    COMPILED_SUB * pCompiledSub;
+    GLuint uj;
+    GLuint unConstOffset = r700->ps.num_consts;
+    for(ui=0; ui<pAsm->unNumPresub; ui++)
+    {
+        pCompiledSub = pAsm->presubs[ui].pCompiledSub;
+
+        r700->ps.num_consts += pCompiledSub->NumParameters;
+
+        for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+        {
+            r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+		    r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+		    r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+		    r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+        }
+        unConstOffset += pCompiledSub->NumParameters;
+    }
+
+    return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h
new file mode 100644
index 0000000000..39c59c9201
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_FRAGPROG_H_
+#define _R700_FRAGPROG_H_
+
+#include "r600_context.h"
+#include "r700_assembler.h"
+
+struct r700_fragment_program
+{
+	struct gl_fragment_program mesa_program;
+
+    r700_AssemblerBase r700AsmCode;
+	R700_Shader        r700Shader;
+
+	GLboolean translated;
+    GLboolean loaded;
+	GLboolean error;
+
+    void * shaderbo;
+
+	GLboolean WritesDepth;
+	GLuint optimization;
+};
+
+/* Internal */
+void insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog);
+
+void Map_Fragment_Program(r700_AssemblerBase         *pAsm,
+			  struct gl_fragment_program *mesa_fp,
+                          GLcontext *ctx); 
+GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
+					   struct gl_fragment_program   *mesa_fp);
+
+GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
+				      struct gl_fragment_program   *mesa_vp,
+                                      GLcontext *ctx); 
+
+/* Interface */
+extern void r700SelectFragmentShader(GLcontext *ctx);
+
+extern GLboolean r700SetupFragmentProgram(GLcontext * ctx);
+
+extern void *    r700GetActiveFpShaderBo(GLcontext * ctx);
+
+#endif /*_R700_FRAGPROG_H_*/
diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c
new file mode 100644
index 0000000000..3bc422f394
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_ioctl.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+
+#include "radeon_common.h"
+#include "r600_context.h"
+
+#include "r700_ioctl.h"
+#include "r700_clear.h"
+
+
+void r700InitIoctlFuncs(struct dd_function_table *functions)
+{
+	functions->Clear = r700Clear;
+	functions->Finish = radeonFinish;
+	functions->Flush = radeonFlush;
+}
diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.h b/src/mesa/drivers/dri/r600/r700_ioctl.h
new file mode 100644
index 0000000000..414dc3e23e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_ioctl.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef __R700_IOCTL_H__
+#define __R700_IOCTL_H__
+
+#include "r600_context.h"
+#include "radeon_drm.h"
+
+extern void r700InitIoctlFuncs(struct dd_function_table *functions);
+
+#endif				/* __R700_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c
new file mode 100644
index 0000000000..b7124e644a
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_oglprog.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <string.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+
+#include "shader/program.h"
+#include "tnl/tnl.h"
+
+#include "r600_context.h"
+#include "r600_emit.h"
+
+#include "r700_oglprog.h"
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+
+
+static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache)
+{
+	struct r700_vertex_program *tmp, *vp = cache->progs;
+
+	while (vp) {
+		tmp = vp->next;
+		/* Release DMA region */
+		r600DeleteShader(ctx, vp->shaderbo);
+		/* Clean up */
+		Clean_Up_Assembler(&(vp->r700AsmCode));
+		Clean_Up_Shader(&(vp->r700Shader));
+		
+		_mesa_reference_vertprog(ctx, &vp->mesa_program, NULL);
+		free(vp);
+		vp = tmp;
+	}
+}
+
+static struct gl_program *r700NewProgram(GLcontext * ctx, 
+                                         GLenum target,
+					                     GLuint id)
+{
+	struct gl_program *pProgram = NULL;
+
+    struct r700_vertex_program_cont *vpc;
+	struct r700_fragment_program *fp;
+
+	radeon_print(RADEON_SHADER, RADEON_VERBOSE,
+			"%s %u, %u\n", __func__, target, id);
+
+    switch (target) 
+    {
+    case GL_VERTEX_STATE_PROGRAM_NV:
+    case GL_VERTEX_PROGRAM_ARB:	    
+        vpc       = CALLOC_STRUCT(r700_vertex_program_cont);
+	    pProgram = _mesa_init_vertex_program(ctx, 
+                                             &vpc->mesa_program,
+					                         target, 
+                                             id);
+	    break;
+    case GL_FRAGMENT_PROGRAM_NV:
+    case GL_FRAGMENT_PROGRAM_ARB:
+		fp       = CALLOC_STRUCT(r700_fragment_program);
+		pProgram = _mesa_init_fragment_program(ctx, 
+                                               &fp->mesa_program,
+						                       target, 
+                                               id);
+        fp->translated = GL_FALSE;
+        fp->loaded     = GL_FALSE;
+
+        fp->shaderbo   = NULL;
+
+	    break;
+    default:
+	    _mesa_problem(ctx, "Bad target in r700NewProgram");
+    }
+
+	return pProgram;
+}
+
+static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+    struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
+    struct r700_fragment_program * fp;
+
+	radeon_print(RADEON_SHADER, RADEON_VERBOSE,
+			"%s %p\n", __func__, prog);
+
+    switch (prog->Target) 
+    {
+    case GL_VERTEX_STATE_PROGRAM_NV:
+    case GL_VERTEX_PROGRAM_ARB:	    
+	    freeVertProgCache(ctx, vpc);
+	    break;
+    case GL_FRAGMENT_PROGRAM_NV:
+    case GL_FRAGMENT_PROGRAM_ARB:
+		fp = (struct r700_fragment_program*)prog;
+        /* Release DMA region */
+
+        r600DeleteShader(ctx, fp->shaderbo);
+
+        /* Clean up */
+        Clean_Up_Assembler(&(fp->r700AsmCode));
+        Clean_Up_Shader(&(fp->r700Shader));
+	    break;
+    default:
+	    _mesa_problem(ctx, "Bad target in r700NewProgram");
+    }
+
+	_mesa_delete_program(ctx, prog);
+}
+
+static GLboolean
+r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+	struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog;
+	struct r700_fragment_program * fp = (struct r700_fragment_program*)prog;
+
+	switch (target) {
+	case GL_VERTEX_PROGRAM_ARB:
+		freeVertProgCache(ctx, vpc);
+		vpc->progs = NULL;
+		break;
+	case GL_FRAGMENT_PROGRAM_ARB:
+		r600DeleteShader(ctx, fp->shaderbo);
+		Clean_Up_Assembler(&(fp->r700AsmCode));
+		Clean_Up_Shader(&(fp->r700Shader));
+		fp->translated = GL_FALSE;
+		fp->loaded     = GL_FALSE;
+		fp->shaderbo   = NULL;
+		break;
+	}
+		
+	/* XXX check if program is legal, within limits */
+	return GL_TRUE;
+}
+
+static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+
+	return GL_TRUE;
+}
+
+void r700InitShaderFuncs(struct dd_function_table *functions)
+{
+	functions->NewProgram = r700NewProgram;
+	functions->DeleteProgram = r700DeleteProgram;
+	functions->ProgramStringNotify = r700ProgramStringNotify;
+	functions->IsProgramNative = r700IsProgramNative;
+}
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.h b/src/mesa/drivers/dri/r600/r700_oglprog.h
new file mode 100644
index 0000000000..fe2e9d1974
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_oglprog.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef _R700_OGLPROG_H_
+#define _R700_OGLPROG_H_
+#include "r600_context.h"
+
+extern void r700InitShaderFuncs(struct dd_function_table *functions);
+
+#endif /*_R700_OGLPROG_H_*/
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
new file mode 100644
index 0000000000..1929b7cc12
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -0,0 +1,1010 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "vbo/vbo_context.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "r600_tex.h"
+
+#include "r700_vertprog.h"
+#include "r700_fragprog.h"
+#include "r700_state.h"
+
+#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
+
+void r700WaitForIdle(context_t *context);
+void r700WaitForIdleClean(context_t *context);
+static unsigned int r700PrimitiveType(int prim);
+GLboolean r700SyncSurf(context_t *context,
+		       struct radeon_bo *pbo,
+		       uint32_t read_domain,
+		       uint32_t write_domain,
+		       uint32_t sync_type);
+
+void r700WaitForIdle(context_t *context)
+{
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+    BEGIN_BATCH_NO_AUTOSTATE(3);
+
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+    R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
+    R600_OUT_BATCH(WAIT_3D_IDLE_bit);
+
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+void r700WaitForIdleClean(context_t *context)
+{
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+    BEGIN_BATCH_NO_AUTOSTATE(5);
+
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
+    R600_OUT_BATCH(CACHE_FLUSH_AND_INV_EVENT);
+
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
+    R600_OUT_BATCH(mmWAIT_UNTIL - ASIC_CONFIG_BASE_INDEX);
+    R600_OUT_BATCH(WAIT_3D_IDLE_bit | WAIT_3D_IDLECLEAN_bit);
+
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+void r700Start3D(context_t *context)
+{
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+    {
+        BEGIN_BATCH_NO_AUTOSTATE(2);
+        R600_OUT_BATCH(CP_PACKET3(R600_IT_START_3D_CMDBUF, 0));
+        R600_OUT_BATCH(0);
+        END_BATCH();
+    }
+
+    BEGIN_BATCH_NO_AUTOSTATE(3);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1));
+    R600_OUT_BATCH(0x80000000);
+    R600_OUT_BATCH(0x80000000);
+    END_BATCH();
+
+    COMMIT_BATCH();
+}
+
+GLboolean r700SyncSurf(context_t *context,
+		       struct radeon_bo *pbo,
+		       uint32_t read_domain,
+		       uint32_t write_domain,
+		       uint32_t sync_type)
+{
+    BATCH_LOCALS(&context->radeon);
+    radeon_print(RADEON_RENDER | RADEON_STATE, RADEON_TRACE, "%s\n", __func__);
+    uint32_t cp_coher_size;
+
+    if (!pbo)
+	    return GL_FALSE;
+
+    if (pbo->size == 0xffffffff)
+	    cp_coher_size = 0xffffffff;
+    else
+	    cp_coher_size = ((pbo->size + 255) >> 8);
+
+    BEGIN_BATCH_NO_AUTOSTATE(5 + 2);
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
+    R600_OUT_BATCH(sync_type);
+    R600_OUT_BATCH(cp_coher_size);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(10);
+    R600_OUT_BATCH_RELOC(0,
+			 pbo,
+			 0,
+			 read_domain, write_domain, 0);
+    END_BATCH();
+    COMMIT_BATCH();
+
+    return GL_TRUE;
+}
+
+static unsigned int r700PrimitiveType(int prim)
+{
+    switch (prim & PRIM_MODE_MASK)
+    {
+    case GL_POINTS:
+        return DI_PT_POINTLIST;
+        break;
+    case GL_LINES:
+        return DI_PT_LINELIST;
+        break;
+    case GL_LINE_STRIP:
+        return DI_PT_LINESTRIP;
+        break;
+    case GL_LINE_LOOP:
+        return DI_PT_LINELOOP;
+        break;
+    case GL_TRIANGLES:
+        return DI_PT_TRILIST;
+        break;
+    case GL_TRIANGLE_STRIP:
+        return DI_PT_TRISTRIP;
+        break;
+    case GL_TRIANGLE_FAN:
+        return DI_PT_TRIFAN;
+        break;
+    case GL_QUADS:
+        return DI_PT_QUADLIST;
+        break;
+    case GL_QUAD_STRIP:
+        return DI_PT_QUADSTRIP;
+        break;
+    case GL_POLYGON:
+        return DI_PT_POLYGON;
+        break;
+    default:
+        assert(0);
+        return -1;
+        break;
+    }
+}
+
+static int r700NumVerts(int num_verts, int prim)
+{
+	int verts_off = 0;
+
+	switch (prim & PRIM_MODE_MASK) {
+	case GL_POINTS:
+		verts_off = 0;
+		break;
+	case GL_LINES:
+		verts_off = num_verts % 2;
+		break;
+	case GL_LINE_STRIP:
+		if (num_verts < 2)
+			verts_off = num_verts;
+		break;
+	case GL_LINE_LOOP:
+		if (num_verts < 2)
+			verts_off = num_verts;
+		break;
+	case GL_TRIANGLES:
+		verts_off = num_verts % 3;
+		break;
+	case GL_TRIANGLE_STRIP:
+		if (num_verts < 3)
+			verts_off = num_verts;
+		break;
+	case GL_TRIANGLE_FAN:
+		if (num_verts < 3)
+			verts_off = num_verts;
+		break;
+	case GL_QUADS:
+		verts_off = num_verts % 4;
+		break;
+	case GL_QUAD_STRIP:
+		if (num_verts < 4)
+			verts_off = num_verts;
+		else
+			verts_off = num_verts % 2;
+		break;
+	case GL_POLYGON:
+		if (num_verts < 3)
+			verts_off = num_verts;
+		break;
+	default:
+		assert(0);
+		return -1;
+		break;
+	}
+
+	return num_verts - verts_off;
+}
+
+static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    BATCH_LOCALS(&context->radeon);
+    int type, total_emit;
+    int num_indices;
+    uint32_t vgt_draw_initiator = 0;
+    uint32_t vgt_index_type     = 0;
+    uint32_t vgt_primitive_type = 0;
+    uint32_t vgt_num_indices    = 0;
+
+    type = r700PrimitiveType(prim);
+    num_indices = r700NumVerts(end - start, prim);
+
+    radeon_print(RADEON_RENDER, RADEON_TRACE,
+		 "%s type %x num_indices %d\n",
+		 __func__, type, num_indices);
+
+    if (type < 0 || num_indices <= 0)
+	    return;
+
+    SETfield(vgt_primitive_type, type,
+	     VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+    SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+
+    if(GL_TRUE != context->ind_buf.is_32bit)
+    {
+            SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+    }
+
+    vgt_num_indices = num_indices;
+    SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+    SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+    total_emit =   3  /* VGT_PRIMITIVE_TYPE */
+	         + 2  /* VGT_INDEX_TYPE */
+	         + 2  /* NUM_INSTANCES */
+	         + 5 + 2; /* DRAW_INDEX */
+
+    BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+    // prim
+    R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+    R600_OUT_BATCH(vgt_primitive_type);
+    // index type
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+    R600_OUT_BATCH(vgt_index_type);
+    // num instances
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+    R600_OUT_BATCH(1);
+    // draw packet
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
+    R600_OUT_BATCH(context->ind_buf.bo_offset);
+    R600_OUT_BATCH(0);
+    R600_OUT_BATCH(vgt_num_indices);
+    R600_OUT_BATCH(vgt_draw_initiator);
+    R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
+			 context->ind_buf.bo,
+			 context->ind_buf.bo_offset,
+			 RADEON_GEM_DOMAIN_GTT, 0, 0);
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    BATCH_LOCALS(&context->radeon);
+    int type, i;
+    uint32_t num_indices, total_emit = 0;
+    uint32_t vgt_draw_initiator = 0;
+    uint32_t vgt_index_type     = 0;
+    uint32_t vgt_primitive_type = 0;
+    uint32_t vgt_num_indices    = 0;
+
+    type = r700PrimitiveType(prim);
+    num_indices = r700NumVerts(end - start, prim);
+
+    radeon_print(RADEON_RENDER, RADEON_TRACE,
+		 "%s type %x num_indices %d\n",
+		 __func__, type, num_indices);
+
+    if (type < 0 || num_indices <= 0)
+	    return;
+
+    SETfield(vgt_primitive_type, type,
+	     VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+    if (num_indices > 0xffff)
+    {
+	    SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+    }
+    else
+    {
+            SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+    }
+
+    vgt_num_indices = num_indices;
+    SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+    if (start == 0)
+    {
+	SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+    }
+    else
+    {
+	if (num_indices > 0xffff)
+	{
+		total_emit += num_indices;
+	}
+	else
+	{
+		total_emit += (num_indices + 1) / 2;
+	}
+	SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+    }
+
+    total_emit +=   3 /* VGT_PRIMITIVE_TYPE */
+	          + 2 /* VGT_INDEX_TYPE */
+	          + 2 /* NUM_INSTANCES */
+	          + 3; /* DRAW */
+
+    BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+    // prim
+    R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+    R600_OUT_BATCH(vgt_primitive_type);
+    // index type
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+    R600_OUT_BATCH(vgt_index_type);
+    // num instances
+    R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+    R600_OUT_BATCH(1);
+    // draw packet
+    if(start == 0)
+    {
+        R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+        R600_OUT_BATCH(vgt_num_indices);
+        R600_OUT_BATCH(vgt_draw_initiator);
+    }
+    else
+    {
+	if (num_indices > 0xffff)
+        {
+	    R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
+	    R600_OUT_BATCH(vgt_num_indices);
+	    R600_OUT_BATCH(vgt_draw_initiator);
+	    for (i = start; i < (start + num_indices); i++)
+	    {
+		R600_OUT_BATCH(i);
+	    }
+	}
+	else
+        {
+	    R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1)));
+	    R600_OUT_BATCH(vgt_num_indices);
+	    R600_OUT_BATCH(vgt_draw_initiator);
+	    for (i = start; i < (start + num_indices); i += 2)
+	    {
+		if ((i + 1) == (start + num_indices))
+		{
+		    R600_OUT_BATCH(i);
+		}
+		else
+		{
+		    R600_OUT_BATCH(((i + 1) << 16) | (i));
+		}
+	    }
+	}
+    }
+
+    END_BATCH();
+    COMMIT_BATCH();
+}
+
+/* start 3d, idle, cb/db flush */
+#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14
+
+static GLuint r700PredictRenderSize(GLcontext* ctx,
+				    const struct _mesa_prim *prim,
+				    const struct _mesa_index_buffer *ib,
+				    GLuint nr_prims)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    GLboolean flushed;
+    GLuint dwords, i;
+    GLuint state_size;
+
+    dwords = PRE_EMIT_STATE_BUFSZ;
+    if (ib)
+	    dwords += nr_prims * 14;
+    else {
+	    for (i = 0; i < nr_prims; ++i)
+	    {
+		    if (prim[i].start == 0)
+			    dwords += 10;
+		    else if (prim[i].count > 0xffff)
+			    dwords += prim[i].count + 10;
+		    else
+			    dwords += ((prim[i].count + 1) / 2) + 10;
+	    }
+    }
+
+    state_size = radeonCountStateEmitSize(&context->radeon);
+    flushed = rcommonEnsureCmdBufSpace(&context->radeon,
+				       dwords + state_size,
+				       __FUNCTION__);
+    if (flushed)
+	    dwords += radeonCountStateEmitSize(&context->radeon);
+    else
+	    dwords += state_size;
+
+    radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+    return dwords;
+
+}
+
+#define CONVERT( TYPE, MACRO ) do {		\
+	GLuint i, j, sz;				\
+	sz = input->Size;				\
+	if (input->Normalized) {			\
+		for (i = 0; i < count; i++) {		\
+			const TYPE *in = (TYPE *)src_ptr;		\
+			for (j = 0; j < sz; j++) {		\
+				*dst_ptr++ = MACRO(*in);		\
+				in++;				\
+			}					\
+			src_ptr += stride;			\
+		}						\
+	} else {					\
+		for (i = 0; i < count; i++) {		\
+			const TYPE *in = (TYPE *)src_ptr;		\
+			for (j = 0; j < sz; j++) {		\
+				*dst_ptr++ = (GLfloat)(*in);		\
+				in++;				\
+			}					\
+			src_ptr += stride;			\
+		}						\
+	}						\
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r700ConvertAttrib(GLcontext *ctx, int count, 
+                              const struct gl_client_array *input, 
+                              struct StreamDesc *attr)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    const GLvoid *src_ptr;
+    GLboolean mapped_named_bo = GL_FALSE;
+    GLfloat *dst_ptr;
+    GLuint stride;
+
+    stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+    /* Convert value for first element only */
+    if (input->StrideB == 0)
+    {
+        count = 1;
+    }
+
+    if (input->BufferObj->Name) 
+    {
+        if (!input->BufferObj->Pointer) 
+        {
+            ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+            mapped_named_bo = GL_TRUE;
+        }
+
+        src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+    } 
+    else 
+    {
+        src_ptr = input->Ptr;
+    }
+
+    radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, 
+                         sizeof(GLfloat) * input->Size * count, 32);
+
+    radeon_bo_map(attr->bo, 1);
+
+    dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+    assert(src_ptr != NULL);
+
+    switch (input->Type) 
+    {
+        case GL_DOUBLE:
+            CONVERT(GLdouble, (GLfloat));
+            break;
+        case GL_UNSIGNED_INT:
+            CONVERT(GLuint, UINT_TO_FLOAT);
+            break;
+        case GL_INT:
+            CONVERT(GLint, INT_TO_FLOAT);
+            break;
+        case GL_UNSIGNED_SHORT:
+            CONVERT(GLushort, USHORT_TO_FLOAT);
+            break;
+        case GL_SHORT:
+            CONVERT(GLshort, SHORT_TO_FLOAT);
+            break;
+        case GL_UNSIGNED_BYTE:
+            assert(input->Format != GL_BGRA);
+            CONVERT(GLubyte, UBYTE_TO_FLOAT);
+            break;
+        case GL_BYTE:
+            CONVERT(GLbyte, BYTE_TO_FLOAT);
+            break;
+        default:
+            assert(0);
+            break;
+    }
+
+    radeon_bo_unmap(attr->bo);
+
+    if (mapped_named_bo) 
+    {
+        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+    }
+}
+
+static void r700AlignDataToDword(GLcontext *ctx, 
+                                 const struct gl_client_array *input, 
+                                 int count, 
+                                 struct StreamDesc *attr)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    const int dst_stride = (input->StrideB + 3) & ~3;
+    const int size = getTypeSize(input->Type) * input->Size * count;
+    GLboolean mapped_named_bo = GL_FALSE;
+
+    radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+    radeon_bo_map(attr->bo, 1);
+
+    if (!input->BufferObj->Pointer) 
+    {
+        ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+        mapped_named_bo = GL_TRUE;
+    }
+
+    {
+        GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+        GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+        int i;
+
+        for (i = 0; i < count; ++i) 
+        {
+            memcpy(dst_ptr, src_ptr, input->StrideB);
+            src_ptr += input->StrideB;
+            dst_ptr += dst_stride;
+        }
+    }
+
+    radeon_bo_unmap(attr->bo);
+    if (mapped_named_bo) 
+    {
+        ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+    }
+
+    attr->stride = dst_stride;
+}
+
+static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+	context_t *context = R700_CONTEXT(ctx);
+    GLuint stride;
+    int ret;
+    int i, index;
+
+    R600_STATECHANGE(context, vtx);
+
+    for(index = 0; index < context->nNumActiveAos; index++) 
+    {
+        struct radeon_aos *aos = &context->radeon.tcl.aos[index];
+        i = context->stream_desc[index].element;
+
+        stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+        if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+            getTypeSize(input[i]->Type) != 4 || 
+#endif
+            stride < 4) 
+        {
+            r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
+        } 
+        else 
+        {
+            if (input[i]->BufferObj->Name) 
+            {
+                if (stride % 4 != 0) 
+                {
+                    assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+                    r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
+                    context->stream_desc[index].is_named_bo = GL_FALSE;
+                } 
+                else 
+                {
+                    context->stream_desc[index].stride = input[i]->StrideB;
+                    context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+                    context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+                    context->stream_desc[index].is_named_bo = GL_TRUE;
+                }
+            } 
+            else 
+            {
+                int size;
+                int local_count = count;
+                uint32_t *dst;
+
+                if (input[i]->StrideB == 0) 
+                {
+                    size = getTypeSize(input[i]->Type) * input[i]->Size;
+                    local_count = 1;
+                } 
+                else 
+                {
+                    size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+                }
+
+                radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, 
+                                     &context->stream_desc[index].bo_offset, size, 32);
+
+                radeon_bo_map(context->stream_desc[index].bo, 1);
+                assert(context->stream_desc[index].bo->ptr != NULL);
+
+
+                dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, 
+                                               context->stream_desc[index].bo_offset);
+
+                switch (context->stream_desc[index].dwords) 
+                {
+                case 1:                     
+                    radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+                    break;
+                case 2: 
+                    radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); 
+                    break;
+                case 3: 
+                    radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); 
+                    break;
+                case 4: 
+                    radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); 
+                    break;
+                default: 
+                    assert(0); 
+                    break;
+                }
+		radeon_bo_unmap(context->stream_desc[index].bo);
+            }
+        }
+
+        aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
+        aos->stride = context->stream_desc[index].stride / sizeof(float);
+        aos->components = context->stream_desc[index].dwords;
+        aos->bo = context->stream_desc[index].bo;
+        aos->offset = context->stream_desc[index].bo_offset;
+
+        if(context->stream_desc[index].is_named_bo) 
+        {
+            radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, 
+                                              context->stream_desc[index].bo, 
+                                              RADEON_GEM_DOMAIN_GTT, 0);
+        }
+    }
+
+    ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, 
+                                        first_elem(&context->radeon.dma.reserved)->bo, 
+                                        RADEON_GEM_DOMAIN_GTT, 0);    
+}
+
+static void r700FreeData(GLcontext *ctx)
+{
+    /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+     * to prevent double unref in radeonReleaseArrays
+     * called during context destroy
+     */
+    context_t *context = R700_CONTEXT(ctx);
+
+    int i;
+
+    for (i = 0; i < context->nNumActiveAos; i++)
+    {
+        if (!context->stream_desc[i].is_named_bo)
+        {
+	        radeon_bo_unref(context->stream_desc[i].bo);
+        }
+        context->radeon.tcl.aos[i].bo = NULL;
+    }
+
+    if (context->ind_buf.bo != NULL)
+    {
+            radeon_bo_unref(context->ind_buf.bo);
+    }
+}
+
+static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    GLvoid *src_ptr;
+    GLuint *out;
+    int i;
+    GLboolean mapped_named_bo = GL_FALSE;
+
+    if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+    {
+        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+        mapped_named_bo = GL_TRUE;
+        assert(mesa_ind_buf->obj->Pointer != NULL);
+    }
+    src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+    if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
+    {
+        GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+        GLubyte *in = (GLubyte *)src_ptr;
+
+	radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+			     &context->ind_buf.bo_offset, size, 4);
+
+	radeon_bo_map(context->ind_buf.bo, 1);
+	assert(context->ind_buf.bo->ptr != NULL);
+	out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+        for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+        {
+            *out++ = in[i] | in[i + 1] << 16;
+        }
+
+        if (i < mesa_ind_buf->count)
+        {
+            *out++ = in[i];
+        }
+
+	radeon_bo_unmap(context->ind_buf.bo);
+#if MESA_BIG_ENDIAN
+    }
+    else
+    { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+        GLushort *in = (GLushort *)src_ptr;
+        GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+	radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+			     &context->ind_buf.bo_offset, size, 4);
+
+	radeon_bo_map(context->ind_buf.bo, 1);
+	assert(context->ind_buf.bo->ptr != NULL);
+	out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+        for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+        {
+            *out++ = in[i] | in[i + 1] << 16;
+        }
+
+        if (i < mesa_ind_buf->count)
+        {
+            *out++ = in[i];
+        }
+	radeon_bo_unmap(context->ind_buf.bo);
+#endif
+    }
+
+    context->ind_buf.is_32bit = GL_FALSE;
+    context->ind_buf.count = mesa_ind_buf->count;
+
+    if (mapped_named_bo)
+    {
+        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+    }
+}
+
+static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+    if (!mesa_ind_buf) {
+        context->ind_buf.bo = NULL;
+        return;
+    }
+
+#if MESA_BIG_ENDIAN
+    if (mesa_ind_buf->type == GL_UNSIGNED_INT)
+#else
+    if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
+#endif
+    {
+        const GLvoid *src_ptr;
+        GLvoid *dst_ptr;
+        GLboolean mapped_named_bo = GL_FALSE;
+
+        if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+        {
+	        ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+	        assert(mesa_ind_buf->obj->Pointer != NULL);
+	        mapped_named_bo = GL_TRUE;
+        }
+
+        src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+        const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+	radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+			     &context->ind_buf.bo_offset, size, 4);
+	radeon_bo_map(context->ind_buf.bo, 1);
+	assert(context->ind_buf.bo->ptr != NULL);
+	dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+        memcpy(dst_ptr, src_ptr, size);
+
+	radeon_bo_unmap(context->ind_buf.bo);
+        context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+        context->ind_buf.count = mesa_ind_buf->count;
+
+        if (mapped_named_bo)
+        {
+	        ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+        }
+    }
+    else
+    {
+	    r700FixupIndexBuffer(ctx, mesa_ind_buf);
+    }
+}
+
+static GLboolean check_fallbacks(GLcontext *ctx)
+{
+	if (ctx->RenderMode != GL_RENDER)
+		return GL_TRUE;
+
+	return GL_FALSE;
+}
+
+static GLboolean r700TryDrawPrims(GLcontext *ctx,
+				  const struct gl_client_array *arrays[],
+				  const struct _mesa_prim *prim,
+				  GLuint nr_prims,
+				  const struct _mesa_index_buffer *ib,
+				  GLuint min_index,
+				  GLuint max_index )
+{
+    context_t *context = R700_CONTEXT(ctx);
+    radeonContextPtr radeon = &context->radeon;
+    GLuint i, id = 0;
+    struct radeon_renderbuffer *rrb;
+
+    if (ctx->NewState)
+        _mesa_update_state( ctx );
+
+    if (check_fallbacks(ctx))
+	    return GL_FALSE;
+
+    _tnl_UpdateFixedFunctionProgram(ctx);
+    r700SetVertexFormat(ctx, arrays, max_index + 1);
+    /* shaders need to be updated before buffers are validated */
+    r700UpdateShaders(ctx);
+    if (!r600ValidateBuffers(ctx))
+	    return GL_FALSE;
+
+    /* always emit CB base to prevent
+     * lock ups on some chips.
+     */
+    R600_STATECHANGE(context, cb_target);
+    /* mark vtx as dirty since it changes per-draw */
+    R600_STATECHANGE(context, vtx);
+
+    r700SetScissor(context);
+    r700SetupVertexProgram(ctx);
+    r700SetupFragmentProgram(ctx);
+    r700UpdateShaderStates(ctx);
+
+    GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims)
+                    + context->radeon.cmdbuf.cs->cdw;
+
+    r700SetupIndexBuffer(ctx, ib);
+    r700SetupStreams(ctx, arrays, max_index + 1);
+
+    radeonEmitState(radeon);
+
+    radeon_debug_add_indent();
+    for (i = 0; i < nr_prims; ++i)
+    {
+	    if (context->ind_buf.bo)
+		    r700RunRenderPrimitive(ctx,
+					   prim[i].start,
+					   prim[i].start + prim[i].count,
+					   prim[i].mode);
+	    else
+		    r700RunRenderPrimitiveImmediate(ctx,
+						    prim[i].start,
+						    prim[i].start + prim[i].count,
+						    prim[i].mode);
+    }
+    radeon_debug_remove_indent();
+
+    /* Flush render op cached for last several quads. */
+    /* XXX drm should handle this in fence submit */
+    r700WaitForIdleClean(context);
+
+    rrb = radeon_get_colorbuffer(&context->radeon);
+    if (rrb && rrb->bo)
+	    r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+			 CB_ACTION_ENA_bit | (1 << (id + 6)));
+
+    rrb = radeon_get_depthbuffer(&context->radeon);
+    if (rrb && rrb->bo)
+	    r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+			 DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
+
+    r700FreeData(ctx);
+
+    if (emit_end < context->radeon.cmdbuf.cs->cdw)
+    {
+        WARN_ONCE("Rendering was %d commands larger than predicted size."
+            " We might overflow  command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
+    }
+
+    return GL_TRUE;
+}
+
+static void r700DrawPrims(GLcontext *ctx,
+			  const struct gl_client_array *arrays[],
+			  const struct _mesa_prim *prim,
+			  GLuint nr_prims,
+			  const struct _mesa_index_buffer *ib,
+			  GLboolean index_bounds_valid,
+			  GLuint min_index,
+			  GLuint max_index)
+{
+	GLboolean retval = GL_FALSE;
+
+	/* This check should get folded into just the places that
+	 * min/max index are really needed.
+	 */
+	if (!index_bounds_valid) {
+		vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+	}
+
+	if (min_index) {
+		vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
+		return;
+	}
+
+	/* Make an attempt at drawing */
+	retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+	/* If failed run tnl pipeline - it should take care of fallbacks */
+	if (!retval) {
+		_swsetup_Wakeup(ctx);
+		_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+	}
+}
+
+void r700InitDraw(GLcontext *ctx)
+{
+	struct vbo_context *vbo = vbo_context(ctx);
+
+	/* to be enabled */
+	vbo->draw_prims = r700DrawPrims;
+}
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c
new file mode 100644
index 0000000000..67b0d40308
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_shader.c
@@ -0,0 +1,621 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+
+#include "main/glheader.h"
+
+#include "r600_context.h"
+
+#include "r700_shader.h"
+
+void r700ShaderInit(GLcontext * ctx)
+{
+}
+
+void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst)
+{
+	if(NULL == plstCFInstructions->pTail)
+	{	//first
+		plstCFInstructions->pHead = pInst;
+		plstCFInstructions->pTail = pInst;
+	}
+	else
+	{
+		plstCFInstructions->pTail->pNextInst = pInst;
+		plstCFInstructions->pTail = pInst;
+	}
+	pInst->pNextInst = NULL;
+
+	plstCFInstructions->uNumOfNode++;
+}
+
+void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst)
+{
+    GLuint    ulIndex = 0;
+    GLboolean bFound  = GL_FALSE;
+    R700ShaderInstruction * pPrevInst = NULL;
+    R700ShaderInstruction * pCurInst = plstCFInstructions->pHead;
+
+    /* Need go thro list to make sure pInst is there. */
+    while(NULL != pCurInst)
+    {
+        if(pCurInst == pInst)
+        {                        
+            bFound  = GL_TRUE;
+            break;
+        }
+
+        pPrevInst = pCurInst;
+        pCurInst  = pCurInst->pNextInst;
+    }
+    if(GL_TRUE == bFound)
+    {
+        plstCFInstructions->uNumOfNode--;
+
+        pCurInst = pInst->pNextInst;
+        ulIndex  = pInst->m_uIndex;
+        while(NULL != pCurInst)
+        {
+            pCurInst->m_uIndex = ulIndex;
+            ulIndex++;
+            pCurInst = pCurInst->pNextInst;
+        }
+
+        if(plstCFInstructions->pHead == pInst)
+        {
+            plstCFInstructions->pHead = pInst->pNextInst;
+        }
+        if(plstCFInstructions->pTail == pInst)
+        {
+            plstCFInstructions->pTail = pPrevInst;
+        }
+        if(NULL != pPrevInst)
+        {
+            pPrevInst->pNextInst = pInst->pNextInst;
+        }
+
+        FREE(pInst);
+    }
+}
+
+void Init_R700_Shader(R700_Shader * pShader)
+{
+	pShader->Type = R700_SHADER_INVALID;
+	pShader->pProgram = NULL;
+	pShader->bBinaryShader = GL_FALSE;
+	pShader->bFetchShaderRequired = GL_FALSE;
+	pShader->bNeedsAssembly = GL_FALSE;
+	pShader->bLinksDirty = GL_TRUE;
+	pShader->uShaderBinaryDWORDSize = 0;
+	pShader->nRegs = 0;
+	pShader->nParamExports = 0;
+	pShader->nMemExports = 0;
+	pShader->resource = 0;
+
+	pShader->exportMode = 0;
+	pShader->depthIsImported = GL_FALSE;
+
+	pShader->positionVectorIsExported = GL_FALSE;
+	pShader->miscVectorIsExported = GL_FALSE;
+	pShader->renderTargetArrayIndexIsExported = GL_FALSE;
+	pShader->ccDist0VectorIsExported = GL_FALSE;
+	pShader->ccDist1VectorIsExported = GL_FALSE; 
+
+
+	pShader->depthIsExported = GL_FALSE;
+	pShader->stencilRefIsExported = GL_FALSE;
+	pShader->coverageToMaskIsExported = GL_FALSE;
+	pShader->maskIsExported = GL_FALSE;
+	pShader->killIsUsed = GL_FALSE;
+
+	pShader->uCFOffset = 0;
+	pShader->uStackSize = 0;
+	pShader->uMaxCallDepth = 0;
+
+	pShader->bSurfAllocated = GL_FALSE;
+	
+	pShader->lstCFInstructions.pHead=NULL;  
+	pShader->lstCFInstructions.pTail=NULL;  
+	pShader->lstCFInstructions.uNumOfNode=0;
+	pShader->lstALUInstructions.pHead=NULL; 
+	pShader->lstALUInstructions.pTail=NULL; 
+	pShader->lstALUInstructions.uNumOfNode=0;
+	pShader->lstTEXInstructions.pHead=NULL; 
+	pShader->lstTEXInstructions.pTail=NULL; 
+	pShader->lstTEXInstructions.uNumOfNode=0;
+	pShader->lstVTXInstructions.pHead=NULL; 
+	pShader->lstVTXInstructions.pTail=NULL; 
+	pShader->lstVTXInstructions.uNumOfNode=0;
+}
+
+void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF)
+{
+    pShader->plstCFInstructions_active = plstCF;
+}
+
+void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst)
+{
+    R700ControlFlowSXClause*  pSXClause; 
+    R700ControlFlowSMXClause* pSMXClause;
+
+    pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode;
+    AddInstToList(pShader->plstCFInstructions_active, 
+                  (R700ShaderInstruction*)pCFInst);
+    pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType);
+
+    pSXClause = NULL;
+    pSMXClause = NULL; 
+	switch (pCFInst->m_ShaderInstType)
+	{
+	case SIT_CF_ALL_EXP_SX:
+		pSXClause =  (R700ControlFlowSXClause*)pCFInst;
+		break;
+	case SIT_CF_ALL_EXP_SMX:
+		pSMXClause = (R700ControlFlowSMXClause*)pCFInst;
+		break;
+	default:
+		break;
+	};
+
+    if((pSXClause != NULL) && (pSXClause->m_Word0.f.type == SQ_EXPORT_PARAM))
+    {
+        pShader->nParamExports += pSXClause->m_Word1.f.burst_count + 1;
+    }
+    else if ((pSMXClause != NULL) && (pSMXClause->m_Word1.f.cf_inst == SQ_CF_INST_MEM_RING) &&
+            (pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE || pSMXClause->m_Word0.f.type == SQ_EXPORT_WRITE_IND))
+    {
+        pShader->nMemExports += pSMXClause->m_Word1.f.burst_count + 1;
+    }
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pCFInst->useCount++;
+}
+
+void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst)
+{
+    pVTXInst->m_uIndex = pShader->lstVTXInstructions.uNumOfNode;
+	AddInstToList(&(pShader->lstVTXInstructions), 
+                  (R700ShaderInstruction*)pVTXInst);
+	pShader->uShaderBinaryDWORDSize += GetInstructionSize(pVTXInst->m_ShaderInstType);
+
+	if(pVTXInst->m_ShaderInstType == SIT_VTX_GENERIC)
+	{
+		R700VertexGenericFetch* pVTXGenericClause = (R700VertexGenericFetch*)pVTXInst;	
+		pShader->nRegs = (pShader->nRegs < pVTXGenericClause->m_Word1_GPR.f.dst_gpr) ? pVTXGenericClause->m_Word1_GPR.f.dst_gpr : pShader->nRegs;
+	}
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pVTXInst->useCount++;
+}
+
+void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst)
+{
+    pTEXInst->m_uIndex = pShader->lstTEXInstructions.uNumOfNode;
+	AddInstToList(&(pShader->lstTEXInstructions), 
+                  (R700ShaderInstruction*)pTEXInst);
+	pShader->uShaderBinaryDWORDSize += GetInstructionSize(pTEXInst->m_ShaderInstType);
+
+    pShader->nRegs = (pShader->nRegs < pTEXInst->m_Word1.f.dst_gpr) ? pTEXInst->m_Word1.f.dst_gpr : pShader->nRegs;
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pTEXInst->useCount++;
+}
+
+void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst)
+{
+    pALUInst->m_uIndex = pShader->lstALUInstructions.uNumOfNode;
+    AddInstToList(&(pShader->lstALUInstructions), 
+                  (R700ShaderInstruction*)pALUInst);
+    pShader->uShaderBinaryDWORDSize += GetInstructionSize(pALUInst->m_ShaderInstType);
+
+    pShader->nRegs = (pShader->nRegs < pALUInst->m_Word1.f.dst_gpr) ? pALUInst->m_Word1.f.dst_gpr : pShader->nRegs;
+
+    pShader->bLinksDirty    = GL_TRUE;
+    pShader->bNeedsAssembly = GL_TRUE;
+
+    pALUInst->useCount++;
+}
+
+void ResolveLinks(R700_Shader *pShader)
+{
+    GLuint uiSize;
+    R700ShaderInstruction  *pInst;
+    R700ALUInstruction     *pALUinst;
+    R700TextureInstruction *pTEXinst;
+    R700VertexInstruction  *pVTXinst; 
+
+    GLuint vtxOffset;
+
+	GLuint cfOffset = 0x0;  
+
+    GLuint aluOffset = cfOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
+
+    GLuint texOffset = aluOffset;  // + m_lstALUInstructions.size() * R700ALUInstruction::SIZE,
+
+    pInst = pShader->lstALUInstructions.pHead;
+    while(NULL != pInst)
+    {
+        texOffset += GetInstructionSize(pInst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+  
+    vtxOffset = texOffset + pShader->lstTEXInstructions.uNumOfNode * GetInstructionSize(SIT_TEX);
+
+    if ( ((pShader->lstTEXInstructions.uNumOfNode > 0) && (texOffset % 4 != 0)) || 
+         ((pShader->lstVTXInstructions.uNumOfNode > 0) && (vtxOffset % 4 != 0))    )
+    {
+        pALUinst = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+        Init_R700ALUInstruction(pALUinst);
+        AddALUInstruction(pShader, pALUinst);
+        texOffset += GetInstructionSize(SIT_ALU);
+        vtxOffset += GetInstructionSize(SIT_ALU);
+    }
+
+    pInst  = pShader->lstALUInstructions.pHead;
+    uiSize = 0;
+    while(NULL != pInst)
+    {
+        pALUinst = (R700ALUInstruction*)pInst;
+
+        if(pALUinst->m_pLinkedALUClause != NULL)
+        {
+            // This address is quad-word aligned
+            pALUinst->m_pLinkedALUClause->m_Word0.f.addr = (aluOffset + uiSize) >> 1;
+        }
+
+        uiSize += GetInstructionSize(pALUinst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+
+    pInst  = pShader->lstTEXInstructions.pHead;
+    uiSize = 0;
+    while(NULL != pInst)
+    {
+        pTEXinst = (R700TextureInstruction*)pInst;
+
+        if (pTEXinst->m_pLinkedGenericClause != NULL)
+        {
+            pTEXinst->m_pLinkedGenericClause->m_Word0.f.addr = (texOffset + uiSize) >> 1;
+        }
+
+        uiSize += GetInstructionSize(pTEXinst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+
+    pInst  = pShader->lstVTXInstructions.pHead;
+    uiSize = 0;
+    while(NULL != pInst)
+    {
+        pVTXinst = (R700VertexInstruction*)pInst;
+
+        if (pVTXinst->m_pLinkedGenericClause != NULL)
+        {
+            pVTXinst->m_pLinkedGenericClause->m_Word0.f.addr = (vtxOffset + uiSize) >> 1;
+        }
+
+        uiSize += GetInstructionSize(pVTXinst->m_ShaderInstType);
+
+        pInst = pInst->pNextInst;
+    };
+
+    pShader->bLinksDirty = GL_FALSE;
+}
+
+void Assemble(R700_Shader *pShader)
+{
+	GLuint i;
+    GLuint *pShaderBinary;
+    GLuint size_of_program;
+    GLuint *pCurrPos;
+
+    GLuint end_of_cf_instructions;
+    GLuint number_of_alu_dwords;
+
+    R700ShaderInstruction  *pInst;
+
+    if(GL_TRUE == pShader->bBinaryShader)
+    {
+        return;
+    }
+
+    if(pShader->bLinksDirty == GL_TRUE) 
+    {
+        ResolveLinks(pShader);
+    }
+
+    size_of_program = pShader->uShaderBinaryDWORDSize;
+    
+    pShaderBinary = (GLuint*) MALLOC(sizeof(GLuint)*size_of_program);
+ 
+    pCurrPos = pShaderBinary;
+
+    for (i = 0; i < size_of_program; i++)
+    {
+        pShaderBinary[i] = 0;
+    }
+
+    pInst = pShader->lstCFInstructions.pHead;
+    while(NULL != pInst)
+    {
+        switch (pInst->m_ShaderInstType)
+        {
+        case SIT_CF_GENERIC: 
+            {
+                R700ControlFlowGenericClause* pCFgeneric = (R700ControlFlowGenericClause*)pInst;
+                *pCurrPos++ = pCFgeneric->m_Word0.val;
+                *pCurrPos++ = pCFgeneric->m_Word1.val;
+            }
+            break;
+        case SIT_CF_ALU: 
+            {
+                R700ControlFlowALUClause* pCFalu = (R700ControlFlowALUClause*)pInst;
+                *pCurrPos++ = pCFalu->m_Word0.val;
+                *pCurrPos++ = pCFalu->m_Word1.val;
+            }
+            break;
+        case SIT_CF_ALL_EXP_SX: 
+            {
+                R700ControlFlowSXClause* pCFsx = (R700ControlFlowSXClause*)pInst;
+                *pCurrPos++ = pCFsx->m_Word0.val;
+                *pCurrPos++ = (pCFsx->m_Word1.val | pCFsx->m_Word1_SWIZ.val);
+            }
+            break;
+        case SIT_CF_ALL_EXP_SMX: 
+            {
+                R700ControlFlowSMXClause* pCFsmx = (R700ControlFlowSMXClause*)pInst;
+                *pCurrPos++ = pCFsmx->m_Word0.val;
+                *pCurrPos++ = (pCFsmx->m_Word1.val | pCFsmx->m_Word1_BUF.val);
+            }
+            break;
+        default:
+            break;
+        }
+
+        pInst = pInst->pNextInst;
+    };
+    
+    number_of_alu_dwords = 0;
+    pInst = pShader->lstALUInstructions.pHead;
+    while(NULL != pInst)
+    {
+        switch (pInst->m_ShaderInstType)
+        {
+        case SIT_ALU: 
+            {
+                R700ALUInstruction* pALU = (R700ALUInstruction*)pInst;
+
+                *pCurrPos++ = pALU->m_Word0.val;
+                *pCurrPos++ = (pALU->m_Word1.val | pALU->m_Word1_OP2.val | pALU->m_Word1_OP3.val);
+
+                number_of_alu_dwords += 2;
+            }
+            break;
+        case SIT_ALU_HALF_LIT: 
+            {
+                R700ALUInstructionHalfLiteral* pALUhalf = (R700ALUInstructionHalfLiteral*)pInst;
+
+                *pCurrPos++ = pALUhalf->m_Word0.val;
+                *pCurrPos++ = (pALUhalf->m_Word1.val | pALUhalf->m_Word1_OP2.val | pALUhalf->m_Word1_OP3.val);
+                *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralX));
+                *pCurrPos++ = *((GLuint*)&(pALUhalf->m_fLiteralY));
+
+                number_of_alu_dwords += 4;
+            }
+            break;
+        case SIT_ALU_FALL_LIT: 
+            {
+                R700ALUInstructionFullLiteral* pALUfull = (R700ALUInstructionFullLiteral*)pInst;
+
+                *pCurrPos++ = pALUfull->m_Word0.val;
+                *pCurrPos++ = (pALUfull->m_Word1.val | pALUfull->m_Word1_OP2.val | pALUfull->m_Word1_OP3.val);
+
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralX));
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralY));
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralZ));
+                *pCurrPos++ = *((GLuint*)&(pALUfull->m_fLiteralW));
+
+                number_of_alu_dwords += 6;
+            }
+            break;
+        default:
+            break;
+        }
+
+        pInst = pInst->pNextInst;
+    };
+    
+    pInst = pShader->lstTEXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        R700TextureInstruction* pTEX = (R700TextureInstruction*)pInst;
+
+        *pCurrPos++ = pTEX->m_Word0.val;
+        *pCurrPos++ = pTEX->m_Word1.val;
+        *pCurrPos++ = pTEX->m_Word2.val;
+        *pCurrPos++ = 0x0beadeaf;
+
+        pInst = pInst->pNextInst;
+    };
+    
+    pInst = pShader->lstVTXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        switch (pInst->m_ShaderInstType)
+        {
+        case SIT_VTX_SEM: //
+            {
+                R700VertexSemanticFetch* pVTXsem = (R700VertexSemanticFetch*)pInst;
+
+                *pCurrPos++ = pVTXsem->m_Word0.val;
+                *pCurrPos++ = (pVTXsem->m_Word1.val | pVTXsem->m_Word1_SEM.val);
+                *pCurrPos++ = pVTXsem->m_Word2.val;
+                *pCurrPos++ = 0x0beadeaf;
+            }
+            break;
+        case SIT_VTX_GENERIC: //
+            {
+                R700VertexGenericFetch* pVTXgeneric = (R700VertexGenericFetch*)pInst;
+
+                *pCurrPos++ = pVTXgeneric->m_Word0.val;
+                *pCurrPos++ = (pVTXgeneric->m_Word1.val | pVTXgeneric->m_Word1_GPR.val);
+                *pCurrPos++ = pVTXgeneric->m_Word2.val;
+                *pCurrPos++ = 0x0beadeaf;
+            }
+            break;
+        default:
+            break;
+        }
+
+        pInst = pInst->pNextInst;
+    };
+
+    if(NULL != pShader->pProgram)
+    {
+        FREE(pShader->pProgram);
+    }
+    pShader->pProgram = (GLubyte*)pShaderBinary;
+
+    end_of_cf_instructions = pShader->uCFOffset + pShader->lstCFInstructions.uNumOfNode * GetInstructionSize(SIT_CF);
+    
+    pShader->uEndOfCF = end_of_cf_instructions >> 1;
+
+    pShader->uEndOfALU = (end_of_cf_instructions + number_of_alu_dwords) >> 1;
+
+    pShader->uEndOfFetch = (pShader->uCFOffset + pShader->uShaderBinaryDWORDSize) >> 1;
+
+    pShader->bNeedsAssembly = GL_FALSE;
+}
+
+void LoadProgram(R700_Shader *pShader) //context
+{
+}
+
+void UpdateShaderRegisters(R700_Shader *pShader) //context
+{
+}
+
+void DeleteInstructions(R700_Shader *pShader)
+{
+}
+
+void DebugPrint(void)
+{
+}
+
+void cleanup_vfetch_shaderinst(R700_Shader *pShader)
+{
+    R700ShaderInstruction      *pInst;
+    R700ShaderInstruction      *pInstToFree;
+    R700VertexInstruction      *pVTXInst;
+    R700ControlFlowInstruction *pCFInst;
+
+    pInst = pShader->lstVTXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pVTXInst = (R700VertexInstruction  *)pInst;        
+        pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType);
+
+        if(NULL != pVTXInst->m_pLinkedGenericClause)
+        {
+            pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause);
+
+            TakeInstOutFromList(&(pShader->lstCFInstructions), 
+                                 (R700ShaderInstruction*)pCFInst);
+
+            pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType);
+        }
+
+        pInst = pInst->pNextInst;
+    };
+
+    //destroy each item in pShader->lstVTXInstructions;
+    pInst = pShader->lstVTXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+
+    //set NULL pShader->lstVTXInstructions
+    pShader->lstVTXInstructions.pHead=NULL; 
+	pShader->lstVTXInstructions.pTail=NULL; 
+	pShader->lstVTXInstructions.uNumOfNode=0;
+}
+
+void Clean_Up_Shader(R700_Shader *pShader)
+{
+    FREE(pShader->pProgram);
+
+    R700ShaderInstruction  *pInst;
+    R700ShaderInstruction  *pInstToFree;
+
+    pInst = pShader->lstCFInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+    pInst = pShader->lstALUInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+    pInst = pShader->lstTEXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+    pInst = pShader->lstVTXInstructions.pHead;
+    while(NULL != pInst)
+    {
+        pInstToFree = pInst;
+        pInst = pInst->pNextInst;
+        FREE(pInstToFree);
+    };
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h
new file mode 100644
index 0000000000..0599ffd901
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_shader.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef __R700_SHADER_H__
+#define __R700_SHADER_H__
+
+#include "main/mtypes.h"
+
+#include "r700_shaderinst.h"
+
+
+void r700ShaderInit(GLcontext * ctx);
+
+typedef enum R700ShaderType
+{
+    R700_SHADER_FS      = 0x0,
+    R700_SHADER_ES      = 0x1,
+    R700_SHADER_GS      = 0x2,
+    R700_SHADER_VS      = 0x3,
+    R700_SHADER_PS      = 0x4,
+    R700_SHADER_INVALID = 0x5,
+} R700ShaderType;
+
+typedef struct TypedShaderList 
+{
+	R700ShaderInstruction * pHead;
+	R700ShaderInstruction * pTail;
+	GLuint  uNumOfNode;
+} TypedShaderList;
+
+typedef struct RealRegister 
+{
+    GLuint uAddr;
+    GLuint uValue;
+} RealRegister;
+
+typedef struct InstDeps
+{
+    GLint nDstDep;
+    GLint nSrcDeps[3];
+} InstDeps;
+
+typedef struct R700_Shader 
+{
+	R700ShaderType   Type;
+
+    GLubyte*  pProgram;
+
+    GLboolean bBinaryShader;
+    GLboolean bFetchShaderRequired;
+    GLboolean bNeedsAssembly;
+    GLboolean bLinksDirty;
+
+    GLuint  uShaderBinaryDWORDSize; // in DWORDS
+    GLuint  nRegs;      
+    GLuint  nParamExports;   // VS_ EXPORT_COUNT (1 based, the actual register is 0 based!)
+    GLuint  nMemExports; 
+    GLuint  resource;     // VS and PS _RESOURCE
+    GLuint  exportMode;   // VS and PS _EXPORT_MODE
+
+    GLboolean  depthIsImported;             
+
+    // Vertex program exports
+    GLboolean  positionVectorIsExported;          
+
+    GLboolean  miscVectorIsExported;               
+    GLboolean  renderTargetArrayIndexIsExported;  
+
+    GLboolean  ccDist0VectorIsExported;  
+    GLboolean  ccDist1VectorIsExported;  
+
+    // Pixel program exports
+    GLboolean  depthIsExported;             
+    GLboolean  stencilRefIsExported;        
+    GLboolean  coverageToMaskIsExported;    
+    GLboolean  maskIsExported;              
+
+    GLboolean  killIsUsed;                  
+
+    GLuint  uStartAddr;
+    GLuint  uCFOffset;
+    GLuint  uEndOfCF;
+    GLuint  uEndOfALU;
+    GLuint  uEndOfFetch;
+    GLuint  uStackSize;
+    GLuint  uMaxCallDepth;
+
+    TypedShaderList * plstCFInstructions_active;
+	TypedShaderList lstCFInstructions;
+	TypedShaderList lstALUInstructions;
+	TypedShaderList lstTEXInstructions;
+	TypedShaderList lstVTXInstructions;
+
+    RealRegister RegStartAddr;
+    RealRegister RegCFOffset;
+    RealRegister RegEndCF;
+    RealRegister RegEndALU;
+    RealRegister egEndFetcg;
+
+	// -------- constants
+	GLfloat   ConstantArray[SQ_ALU_CONSTANT_PS_COUNT * 4];
+	
+	GLboolean bSurfAllocated;
+} R700_Shader;
+
+//Internal
+void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
+void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst);
+void ResolveLinks(R700_Shader *pShader);
+void Assemble(R700_Shader *pShader);
+
+//Interface
+void Init_R700_Shader(R700_Shader * pShader);
+void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst);
+void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst);
+void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst);
+void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst);
+void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF);
+
+void LoadProgram(R700_Shader *pShader);
+void UpdateShaderRegisters(R700_Shader *pShader);
+void DeleteInstructions(R700_Shader *pShader);
+void DebugPrint(void);
+void cleanup_vfetch_shaderinst(R700_Shader *pShader);
+
+void Clean_Up_Shader(R700_Shader *pShader);
+
+#endif /*__R700_SHADER_H__*/
+
diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.c b/src/mesa/drivers/dri/r600/r700_shaderinst.c
new file mode 100644
index 0000000000..f120d9f941
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_shaderinst.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#include "main/mtypes.h"
+
+#include "radeon_debug.h"
+#include "r700_shaderinst.h"
+
+void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst)
+{
+    pInst->m_Word0.val = 0x00000000;
+    pInst->m_Word1.val = 0x00000000;
+
+    pInst->m_pLinkedVTXInstruction = 0;
+    pInst->m_pLinkedTEXInstruction = 0;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_CF_GENERIC;
+}
+
+void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst)
+{
+    pInst->m_Word0.val = 0x00000000;
+    pInst->m_Word1.val = 0x00000000;
+
+    pInst->m_pLinkedALUInstruction = 0;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_CF_ALU;
+}
+
+void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst)
+{
+    pInst->m_Word0.val      = 0x00000000;
+    pInst->m_Word1.val      = 0x00000000;
+    pInst->m_Word1_SWIZ.val = 0x00000000;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SX;
+}
+
+void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_BUF.val = 0x00000000;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_CF_ALL_EXP_SMX;
+}
+
+void Init_R700ALUInstruction(R700ALUInstruction* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_OP2.val = 0x00000000;
+    pInst->m_Word1_OP3.val = 0x00000000;
+
+    pInst->m_pLinkedALUClause = 0;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_ALU;
+}
+
+void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y)
+{
+	pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_OP2.val = 0x00000000;
+    pInst->m_Word1_OP3.val = 0x00000000;
+
+	pInst->m_pLinkedALUClause = 0;
+
+    pInst->m_fLiteralX = x;
+    pInst->m_fLiteralY = y;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_ALU_HALF_LIT;
+}
+
+void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
+{
+	pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_OP2.val = 0x00000000;
+    pInst->m_Word1_OP3.val = 0x00000000;
+
+	pInst->m_pLinkedALUClause = 0;
+
+    pInst->m_fLiteralX = x;
+    pInst->m_fLiteralY = y;
+    pInst->m_fLiteralZ = z;
+    pInst->m_fLiteralW = w;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_ALU_FALL_LIT;
+}
+
+void Init_R700TextureInstruction(R700TextureInstruction* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word2.val     = 0x00000000;
+
+    pInst->m_pLinkedGenericClause = 0;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_TEX;
+}
+
+void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_SEM.val = 0x00000000;
+    pInst->m_Word2.val     = 0x00000000;
+
+    pInst->m_pLinkedGenericClause = 0;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_VTX_SEM;
+}
+
+void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst)
+{
+    pInst->m_Word0.val     = 0x00000000;
+    pInst->m_Word1.val     = 0x00000000;
+    pInst->m_Word1_GPR.val = 0x00000000;
+    pInst->m_Word2.val     = 0x00000000;
+
+    pInst->m_pLinkedGenericClause = 0;
+
+    pInst->useCount = 0;
+
+	pInst->m_ShaderInstType = SIT_VTX_GENERIC;
+}
+
+unsigned int GetInstructionSize(ShaderInstType instType)
+{
+    switch(instType)
+    {
+    case SIT_ALU_HALF_LIT:  
+    case SIT_TEX:           
+    case SIT_VTX:           
+    case SIT_VTX_GENERIC:   
+    case SIT_VTX_SEM:       
+        return 4;
+    case SIT_ALU_FALL_LIT:
+        return 6;
+    default:
+        break;
+    }
+
+    return 2;
+}
+
+unsigned int GetCFMaxInstructions(ShaderInstType instType)
+{
+    switch (instType)
+    {
+    case SIT_CF_ALL_EXP:    
+    case SIT_CF_ALL_EXP_SX: 
+    case SIT_CF_ALL_EXP_SMX:  
+        return 0x10;
+    case SIT_CF_GENERIC:
+        return 0x8;  //For tex and vtx
+    case SIT_CF_ALU:
+        return 0x80;
+    default:
+        break;
+    }
+    return 0x10;
+}
+
+GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric,
+								R700VertexInstruction *pVTXInstruction)
+{
+    if (pCFGeneric->m_pLinkedTEXInstruction != 0)
+    {
+	radeon_error("This instruction is already linked to a texture instruction.\n");
+	return GL_FALSE;
+    }
+
+    pCFGeneric->m_pLinkedVTXInstruction     = pVTXInstruction;
+    pVTXInstruction->m_pLinkedGenericClause = pCFGeneric;
+
+    return GL_TRUE;
+}
+
+
+
diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.h b/src/mesa/drivers/dri/r600/r700_shaderinst.h
new file mode 100644
index 0000000000..cdb9a570f7
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_shaderinst.h
@@ -0,0 +1,328 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _R700_SHADERINST_H_
+#define _R700_SHADERINST_H_
+
+#include "main/glheader.h"
+
+#include "defaultendian.h" 
+#include "sq_micro_reg.h"
+
+#define SQ_ALU_CONSTANT_PS_OFFSET      0x00000000
+#define SQ_ALU_CONSTANT_PS_COUNT       0x00000100
+#define SQ_ALU_CONSTANT_VS_OFFSET      0x00000100
+#define SQ_ALU_CONSTANT_VS_COUNT       0x00000100
+#define SQ_FETCH_RESOURCE_PS_OFFSET    0x00000000
+#define SQ_FETCH_RESOURCE_PS_COUNT     0x000000a0
+#define SQ_FETCH_RESOURCE_VS_OFFSET    0x000000a0
+#define SQ_FETCH_RESOURCE_VS_COUNT     0x000000b0
+
+//richard dec.10 glsl
+#define SQ_TEX_SAMPLER_PS_OFFSET       0x00000000
+#define SQ_TEX_SAMPLER_PS_COUNT        0x00000012
+#define SQ_TEX_SAMPLER_VS_OFFSET       0x00000012
+#define SQ_TEX_SAMPLER_VS_COUNT        0x00000012
+//-------------------
+
+#define SHADERINST_TYPEMASK_CF  0x10
+#define SHADERINST_TYPEMASK_ALU 0x20
+#define SHADERINST_TYPEMASK_TEX 0x40
+#define SHADERINST_TYPEMASK_VTX 0x80
+
+typedef enum ShaderInstType 
+{
+    SIT_CF = 0x10,            /*SIZE = 0x2*/
+        SIT_CF_ALL_EXP = 0x14,    /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+            SIT_CF_ALL_EXP_SX = 0x15, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+            SIT_CF_ALL_EXP_SMX= 0x16, /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x10;*/
+        SIT_CF_GENERIC = 0x18,    /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x8;  //For tex and vtx*/
+        SIT_CF_ALU = 0x19,        /*SIZE = 0x2, MAX_INSTRUCTIONS = 0x80;*/
+    SIT_ALU = 0x20,           /*SIZE = 0x2,*/
+        SIT_ALU_HALF_LIT = 0x21,  /*SIZE = 0x4,*/
+        SIT_ALU_FALL_LIT = 0x22,  /*SIZE = 0x6,*/
+    SIT_TEX = 0x40,           /*SIZE = 0x4,*/
+    SIT_VTX = 0x80,           /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+        SIT_VTX_GENERIC = 0x81,   /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+        SIT_VTX_SEM = 0x82       /*SIZE = 0x4, MEGA_FETCH_BYTES = 0x20*/
+} ShaderInstType;
+
+typedef struct R700ShaderInstruction 
+{
+    ShaderInstType m_ShaderInstType;
+    struct R700ShaderInstruction *pNextInst;
+    GLuint m_uIndex;
+    GLuint useCount;
+} R700ShaderInstruction;
+
+// ------------------ CF insts ---------------------------
+
+typedef R700ShaderInstruction R700ControlFlowInstruction;
+
+typedef struct R700ControlFlowAllocExportClause  
+{
+    ShaderInstType          m_ShaderInstType;
+    R700ShaderInstruction * pNextInst;    
+    GLuint m_uIndex;
+    GLuint useCount;
+		
+    sq_cf_alloc_export_word0_u      m_Word0;
+    sq_cf_alloc_export_word1_u      m_Word1;
+} R700ControlFlowAllocExportClause;
+
+typedef struct R700ControlFlowSXClause 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700ControlFlowAllocExportClause
+		//R700ControlFlowInstruction 
+			//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+			//---------------------
+		//---------------------------
+    sq_cf_alloc_export_word0_u      m_Word0;
+    sq_cf_alloc_export_word1_u      m_Word1;
+	//-------------------------------------
+
+    sq_cf_alloc_export_word1_swiz_u m_Word1_SWIZ;
+} R700ControlFlowSXClause;
+
+typedef struct R700ControlFlowSMXClause 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+    //R700ControlFlowAllocExportClause
+		//R700ControlFlowInstruction 
+			//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+			//---------------------
+		//---------------------------
+    sq_cf_alloc_export_word0_u      m_Word0;
+    sq_cf_alloc_export_word1_u      m_Word1;
+	//-------------------------------
+
+    sq_cf_alloc_export_word1_buf_u m_Word1_BUF;
+} R700ControlFlowSMXClause;
+
+typedef struct R700ControlFlowGenericClause 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700ControlFlowInstruction
+		//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+		//---------------------
+	//---------------------
+
+    sq_cf_word0_u m_Word0;
+    sq_cf_word1_u m_Word1;
+
+    struct R700VertexInstruction  *m_pLinkedVTXInstruction;
+    struct R700TextureInstruction *m_pLinkedTEXInstruction;
+} R700ControlFlowGenericClause;
+
+typedef struct R700ControlFlowALUClause 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+    //R700ControlFlowInstruction
+		//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+		//---------------------
+	//---------------------
+
+    sq_cf_alu_word0_u m_Word0;
+    sq_cf_alu_word1_u m_Word1;
+    
+    struct R700ALUInstruction *m_pLinkedALUInstruction;
+} R700ControlFlowALUClause;
+
+// ------------------- End of CF Inst ------------------------
+
+// ------------------- ALU Inst ------------------------------
+typedef struct R700ALUInstruction 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+	//---------------------
+
+    sq_alu_word0_u     m_Word0;
+    sq_alu_word1_u     m_Word1;
+    sq_alu_word1_op2_v2_u m_Word1_OP2;
+    sq_alu_word1_op3_u m_Word1_OP3;
+
+    struct R700ControlFlowALUClause *m_pLinkedALUClause;
+} R700ALUInstruction;
+
+typedef struct R700ALUInstructionHalfLiteral
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700ALUInstruction 
+		//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+		//---------------------
+
+    sq_alu_word0_u     m_Word0;
+    sq_alu_word1_u     m_Word1;
+    sq_alu_word1_op2_v2_u m_Word1_OP2;
+    sq_alu_word1_op3_u m_Word1_OP3;
+
+    struct R700ControlFlowALUClause *m_pLinkedALUClause;
+	//-------------------
+
+    GLfloat m_fLiteralX,
+            m_fLiteralY;
+} R700ALUInstructionHalfLiteral;
+
+typedef struct R700ALUInstructionFullLiteral 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700ALUInstruction 
+		//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+		//---------------------
+
+    sq_alu_word0_u     m_Word0;
+    sq_alu_word1_u     m_Word1;
+    sq_alu_word1_op2_v2_u m_Word1_OP2;
+    sq_alu_word1_op3_u m_Word1_OP3;
+
+    struct R700ControlFlowALUClause *m_pLinkedALUClause;
+	//-------------------
+
+    GLfloat m_fLiteralX,
+            m_fLiteralY,
+            m_fLiteralZ,
+            m_fLiteralW;
+} R700ALUInstructionFullLiteral;
+// ------------------- End of ALU Inst -----------------------
+
+// ------------------- Textuer/Vertex Instruction --------------------
+
+typedef struct R700TextureInstruction 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+	//---------------------
+	
+    sq_tex_word0_u m_Word0;
+    sq_tex_word1_u m_Word1;
+    sq_tex_word2_u m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+} R700TextureInstruction;
+
+typedef struct R700VertexInstruction 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+	//---------------------
+	
+    sq_vtx_word0_u     m_Word0;
+    sq_vtx_word1_u     m_Word1;
+    sq_vtx_word2_u     m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+} R700VertexInstruction;
+//
+typedef struct R700VertexSemanticFetch 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700VertexInstruction
+		//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+		//---------------------
+	
+    sq_vtx_word0_u     m_Word0;
+    sq_vtx_word1_u     m_Word1;
+    sq_vtx_word2_u     m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+	//---------------------------
+
+    sq_vtx_word1_sem_u m_Word1_SEM;
+} R700VertexSemanticFetch;
+//
+typedef struct R700VertexGenericFetch 
+{
+	ShaderInstType          m_ShaderInstType;
+	R700ShaderInstruction * pNextInst;
+	//R700VertexInstruction
+		//R700ShaderInstruction
+	GLuint m_uIndex;
+    GLuint useCount;
+		//---------------------
+	
+    sq_vtx_word0_u     m_Word0;
+    sq_vtx_word1_u     m_Word1;
+    sq_vtx_word2_u     m_Word2;
+
+    struct R700ControlFlowGenericClause *m_pLinkedGenericClause;
+	//---------------------------
+
+    sq_vtx_word1_gpr_u m_Word1_GPR;
+} R700VertexGenericFetch;
+
+// ------------------- End of Texture Vertex Instruction --------------------
+
+void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst);
+void Init_R700ControlFlowALUClause(R700ControlFlowALUClause* pInst);
+void Init_R700ControlFlowSXClause(R700ControlFlowSXClause* pInst);
+void Init_R700ControlFlowSMXClause(R700ControlFlowSMXClause* pInst);
+void Init_R700ALUInstruction(R700ALUInstruction* pInst);
+void Init_R700ALUInstructionHalfLiteral(R700ALUInstructionHalfLiteral* pInst, GLfloat x, GLfloat y);
+void Init_R700ALUInstructionFullLiteral(R700ALUInstructionFullLiteral* pInst, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+void Init_R700TextureInstruction(R700TextureInstruction* pInst);
+void Init_R700VertexSemanticFetch(R700VertexSemanticFetch* pInst);
+void Init_R700VertexGenericFetch(R700VertexGenericFetch* pInst);
+
+unsigned int GetInstructionSize(ShaderInstType instType);
+unsigned int GetCFMaxInstructions(ShaderInstType instType);
+
+GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric,
+								R700VertexInstruction *pVTXInstruction);
+
+#endif //_R700_SHADERINST_H_
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
new file mode 100644
index 0000000000..ac64bbf874
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -0,0 +1,1872 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "main/api_arrayelt.h"
+#include "main/framebuffer.h"
+#include "drivers/common/meta.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "vbo/vbo.h"
+
+#include "r600_context.h"
+
+#include "r700_state.h"
+
+#include "r700_fragprog.h"
+#include "r700_vertprog.h"
+
+void r600UpdateTextureState(GLcontext * ctx);
+static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state);
+static void r700UpdatePolygonMode(GLcontext * ctx);
+static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state);
+static void r700SetStencilState(GLcontext * ctx, GLboolean state);
+static void r700UpdateWindow(GLcontext * ctx, int id);
+
+void r700UpdateShaders(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+    /* should only happenen once, just after context is created */
+    /* TODO: shouldn't we fallback to sw here? */
+    if (!ctx->FragmentProgram._Current) {
+	    fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+	    return;
+    }
+
+    r700SelectFragmentShader(ctx);
+
+    r700SelectVertexShader(ctx);
+    r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+    context->radeon.NewGLState = 0;
+}
+
+/*
+ * To correctly position primitives:
+ */
+void r700UpdateViewportOffset(GLcontext * ctx) //------------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	__DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
+	GLfloat xoffset = (GLfloat) dPriv->x;
+	GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
+	const GLfloat *v = ctx->Viewport._WindowMap.m;
+	int id = 0;
+
+	GLfloat tx = v[MAT_TX] + xoffset;
+	GLfloat ty = (-v[MAT_TY]) + yoffset;
+
+	if (r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All != tx ||
+	    r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All != ty) {
+		/* Note: this should also modify whatever data the context reset
+		 * code uses...
+		 */
+		R600_STATECHANGE(context, vpt);
+		r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
+		r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
+	}
+
+	radeonUpdateScissor(ctx);
+}
+
+void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state) //--------------------
+{
+	struct r700_fragment_program *fp =
+		(struct r700_fragment_program *)ctx->FragmentProgram._Current;
+	struct gl_program_parameter_list *paramList;
+
+	if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
+		return;
+
+	if (!ctx->FragmentProgram._Current || !fp)
+		return;
+
+	paramList = ctx->FragmentProgram._Current->Base.Parameters;
+
+	if (!paramList)
+		return;
+
+	_mesa_load_state_parameters(ctx, paramList);
+
+}
+
+/**
+ * Called by Mesa after an internal state update.
+ */
+static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //-------------------
+{
+    context_t *context = R700_CONTEXT(ctx);
+
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+    _swrast_InvalidateState(ctx, new_state);
+    _swsetup_InvalidateState(ctx, new_state);
+    _vbo_InvalidateState(ctx, new_state);
+    _tnl_InvalidateState(ctx, new_state);
+    _ae_invalidate_state(ctx, new_state);
+
+    if (new_state & _NEW_BUFFERS) {
+	    _mesa_update_framebuffer(ctx);
+	    /* this updates the DrawBuffer's Width/Height if it's a FBO */
+	    _mesa_update_draw_buffer_bounds(ctx);
+
+	    R600_STATECHANGE(context, cb_target);
+	    R600_STATECHANGE(context, db_target);
+    }
+
+    if (new_state & (_NEW_LIGHT)) {
+	    R600_STATECHANGE(context, su);
+	    if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+		    SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+	    else
+		    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+    }
+
+    r700UpdateStateParameters(ctx, new_state);
+
+    R600_STATECHANGE(context, cl);
+    R600_STATECHANGE(context, spi);
+
+    if(GL_TRUE == r700->bEnablePerspective)
+    {
+        /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */
+        CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+        CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+        SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+    }
+    else
+    {
+        /* For orthogonal case. */
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+        SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+        CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+        SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+    }
+
+    context->radeon.NewGLState |= new_state;
+}
+
+static void r700SetDBRenderState(GLcontext * ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	struct r700_fragment_program *fp = (struct r700_fragment_program *)
+		(ctx->FragmentProgram._Current);
+
+	R600_STATECHANGE(context, db);
+
+	SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit);
+	SETfield(r700->DB_SHADER_CONTROL.u32All, EARLY_Z_THEN_LATE_Z, Z_ORDER_shift, Z_ORDER_mask);
+	/* XXX need to enable htile for hiz/s */
+	SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
+	SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
+	SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+
+	if (context->radeon.query.current)
+	{
+		SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+		if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+		{
+			SETbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit);
+		}
+	}
+	else
+	{
+		CLEARbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+		if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+		{
+			CLEARbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit);
+		}
+	}
+
+	if (fp)
+	{
+		if (fp->r700Shader.killIsUsed)
+		{
+			SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+		}
+		else
+		{
+			CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+		}
+
+		if (fp->r700Shader.depthIsExported)
+		{
+			SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+		}
+		else
+		{
+			CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+		}
+	}
+}
+
+void r700UpdateShaderStates(GLcontext * ctx)
+{
+	r700SetDBRenderState(ctx);
+	r600UpdateTextureState(ctx);
+}
+
+static void r700SetDepthState(GLcontext * ctx)
+{
+	struct radeon_renderbuffer *rrb;
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	R600_STATECHANGE(context, db);
+
+	rrb = radeon_get_depthbuffer(&context->radeon);
+
+    if (ctx->Depth.Test && rrb && rrb->bo)
+    {
+        SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+        if (ctx->Depth.Mask)
+        {
+            SETbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+        }
+        else
+        {
+            CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+        }
+
+        switch (ctx->Depth.Func)
+        {
+        case GL_NEVER:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NEVER,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_LESS:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LESS,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_EQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_LEQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_GREATER:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GREATER,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_NOTEQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_GEQUAL:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        case GL_ALWAYS:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        default:
+            SETfield(r700->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+                     ZFUNC_shift, ZFUNC_mask);
+            break;
+        }
+    }
+    else
+    {
+        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+        CLEARbit(r700->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+    }
+}
+
+static void r700SetAlphaState(GLcontext * ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	uint32_t alpha_func = REF_ALWAYS;
+	GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+	R600_STATECHANGE(context, sx);
+
+	switch (ctx->Color.AlphaFunc) {
+	case GL_NEVER:
+		alpha_func = REF_NEVER;
+		break;
+	case GL_LESS:
+		alpha_func = REF_LESS;
+		break;
+	case GL_EQUAL:
+		alpha_func = REF_EQUAL;
+		break;
+	case GL_LEQUAL:
+		alpha_func = REF_LEQUAL;
+		break;
+	case GL_GREATER:
+		alpha_func = REF_GREATER;
+		break;
+	case GL_NOTEQUAL:
+		alpha_func = REF_NOTEQUAL;
+		break;
+	case GL_GEQUAL:
+		alpha_func = REF_GEQUAL;
+		break;
+	case GL_ALWAYS:
+		/*alpha_func = REF_ALWAYS; */
+		really_enabled = GL_FALSE;
+		break;
+	}
+
+	if (really_enabled) {
+		SETfield(r700->SX_ALPHA_TEST_CONTROL.u32All, alpha_func,
+			 ALPHA_FUNC_shift, ALPHA_FUNC_mask);
+		SETbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+		r700->SX_ALPHA_REF.f32All = ctx->Color.AlphaRef;
+	} else {
+		CLEARbit(r700->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+	}
+
+}
+
+static void r700AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //---------------
+{
+	(void)func;
+	(void)ref;
+	r700SetAlphaState(ctx);
+}
+
+
+static void r700BlendColor(GLcontext * ctx, const GLfloat cf[4]) //----------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	R600_STATECHANGE(context, blnd_clr);
+
+	r700->CB_BLEND_RED.f32All = cf[0];
+	r700->CB_BLEND_GREEN.f32All = cf[1];
+	r700->CB_BLEND_BLUE.f32All = cf[2];
+	r700->CB_BLEND_ALPHA.f32All = cf[3];
+}
+
+static int blend_factor(GLenum factor, GLboolean is_src)
+{
+	switch (factor) {
+	case GL_ZERO:
+		return BLEND_ZERO;
+		break;
+	case GL_ONE:
+		return BLEND_ONE;
+		break;
+	case GL_DST_COLOR:
+		return BLEND_DST_COLOR;
+		break;
+	case GL_ONE_MINUS_DST_COLOR:
+		return BLEND_ONE_MINUS_DST_COLOR;
+		break;
+	case GL_SRC_COLOR:
+		return BLEND_SRC_COLOR;
+		break;
+	case GL_ONE_MINUS_SRC_COLOR:
+		return BLEND_ONE_MINUS_SRC_COLOR;
+		break;
+	case GL_SRC_ALPHA:
+		return BLEND_SRC_ALPHA;
+		break;
+	case GL_ONE_MINUS_SRC_ALPHA:
+		return BLEND_ONE_MINUS_SRC_ALPHA;
+		break;
+	case GL_DST_ALPHA:
+		return BLEND_DST_ALPHA;
+		break;
+	case GL_ONE_MINUS_DST_ALPHA:
+		return BLEND_ONE_MINUS_DST_ALPHA;
+		break;
+	case GL_SRC_ALPHA_SATURATE:
+		return (is_src) ? BLEND_SRC_ALPHA_SATURATE : BLEND_ZERO;
+		break;
+	case GL_CONSTANT_COLOR:
+		return BLEND_CONSTANT_COLOR;
+		break;
+	case GL_ONE_MINUS_CONSTANT_COLOR:
+		return BLEND_ONE_MINUS_CONSTANT_COLOR;
+		break;
+	case GL_CONSTANT_ALPHA:
+		return BLEND_CONSTANT_ALPHA;
+		break;
+	case GL_ONE_MINUS_CONSTANT_ALPHA:
+		return BLEND_ONE_MINUS_CONSTANT_ALPHA;
+		break;
+	default:
+		fprintf(stderr, "unknown blend factor %x\n", factor);
+		return (is_src) ? BLEND_ONE : BLEND_ZERO;
+		break;
+	}
+}
+
+static void r700SetBlendState(GLcontext * ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	int id = 0;
+	uint32_t blend_reg = 0, eqn, eqnA;
+
+	R600_STATECHANGE(context, blnd);
+
+	if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+		SETfield(blend_reg,
+			 BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+		SETfield(blend_reg,
+			 BLEND_ZERO, COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+		SETfield(blend_reg,
+			 COMB_DST_PLUS_SRC, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+		SETfield(blend_reg,
+			 BLEND_ONE, ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+		SETfield(blend_reg,
+			 BLEND_ZERO, ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+		SETfield(blend_reg,
+			 COMB_DST_PLUS_SRC, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+		if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
+			r700->CB_BLEND_CONTROL.u32All = blend_reg;
+		else
+			r700->render_target[id].CB_BLEND0_CONTROL.u32All = blend_reg;
+		return;
+	}
+
+	SETfield(blend_reg,
+		 blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+		 COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+	SETfield(blend_reg,
+		 blend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+		 COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+
+	switch (ctx->Color.BlendEquationRGB) {
+	case GL_FUNC_ADD:
+		eqn = COMB_DST_PLUS_SRC;
+		break;
+	case GL_FUNC_SUBTRACT:
+		eqn = COMB_SRC_MINUS_DST;
+		break;
+	case GL_FUNC_REVERSE_SUBTRACT:
+		eqn = COMB_DST_MINUS_SRC;
+		break;
+	case GL_MIN:
+		eqn = COMB_MIN_DST_SRC;
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+		break;
+	case GL_MAX:
+		eqn = COMB_MAX_DST_SRC;
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+		break;
+
+	default:
+		fprintf(stderr,
+			"[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+			__FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+		return;
+	}
+	SETfield(blend_reg,
+		 eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+
+	SETfield(blend_reg,
+		 blend_factor(ctx->Color.BlendSrcA, GL_TRUE),
+		 ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+	SETfield(blend_reg,
+		 blend_factor(ctx->Color.BlendDstA, GL_FALSE),
+		 ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+
+	switch (ctx->Color.BlendEquationA) {
+	case GL_FUNC_ADD:
+		eqnA = COMB_DST_PLUS_SRC;
+		break;
+	case GL_FUNC_SUBTRACT:
+		eqnA = COMB_SRC_MINUS_DST;
+		break;
+	case GL_FUNC_REVERSE_SUBTRACT:
+		eqnA = COMB_DST_MINUS_SRC;
+		break;
+	case GL_MIN:
+		eqnA = COMB_MIN_DST_SRC;
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+		break;
+	case GL_MAX:
+		eqnA = COMB_MAX_DST_SRC;
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+		SETfield(blend_reg,
+			 BLEND_ONE,
+			 ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+		break;
+	default:
+		fprintf(stderr,
+			"[%s:%u] Invalid A blend equation (0x%04x).\n",
+			__FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+		return;
+	}
+
+	SETfield(blend_reg,
+		 eqnA, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+
+	SETbit(blend_reg, SEPARATE_ALPHA_BLEND_bit);
+
+	if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
+		r700->CB_BLEND_CONTROL.u32All = blend_reg;
+	else {
+		r700->render_target[id].CB_BLEND0_CONTROL.u32All = blend_reg;
+		SETbit(r700->CB_COLOR_CONTROL.u32All, PER_MRT_BLEND_bit);
+	}
+	SETfield(r700->CB_COLOR_CONTROL.u32All, (1 << id),
+		 TARGET_BLEND_ENABLE_shift, TARGET_BLEND_ENABLE_mask);
+
+}
+
+static void r700BlendEquationSeparate(GLcontext * ctx,
+				                      GLenum modeRGB, GLenum modeA) //-----------------
+{
+	r700SetBlendState(ctx);
+}
+
+static void r700BlendFuncSeparate(GLcontext * ctx,
+				  GLenum sfactorRGB, GLenum dfactorRGB,
+				  GLenum sfactorA, GLenum dfactorA) //------------------------
+{
+	r700SetBlendState(ctx);
+}
+
+/**
+ * Translate LogicOp enums into hardware representation.
+ */
+static GLuint translate_logicop(GLenum logicop)
+{
+	switch (logicop) {
+	case GL_CLEAR:
+		return 0x00;
+	case GL_SET:
+		return 0xff;
+	case GL_COPY:
+		return 0xcc;
+	case GL_COPY_INVERTED:
+		return 0x33;
+	case GL_NOOP:
+		return 0xaa;
+	case GL_INVERT:
+		return 0x55;
+	case GL_AND:
+		return 0x88;
+	case GL_NAND:
+		return 0x77;
+	case GL_OR:
+		return 0xee;
+	case GL_NOR:
+		return 0x11;
+	case GL_XOR:
+		return 0x66;
+	case GL_EQUIV:
+		return 0x99;
+	case GL_AND_REVERSE:
+		return 0x44;
+	case GL_AND_INVERTED:
+		return 0x22;
+	case GL_OR_REVERSE:
+		return 0xdd;
+	case GL_OR_INVERTED:
+		return 0xbb;
+	default:
+		fprintf(stderr, "unknown blend logic operation %x\n", logicop);
+		return 0xcc;
+	}
+}
+
+/**
+ * Used internally to update the r300->hw hardware state to match the
+ * current OpenGL state.
+ */
+static void r700SetLogicOpState(GLcontext *ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+
+	R600_STATECHANGE(context, blnd);
+
+	if (RGBA_LOGICOP_ENABLED(ctx))
+		SETfield(r700->CB_COLOR_CONTROL.u32All,
+			 translate_logicop(ctx->Color.LogicOp), ROP3_shift, ROP3_mask);
+	else
+		SETfield(r700->CB_COLOR_CONTROL.u32All, 0xCC, ROP3_shift, ROP3_mask);
+}
+
+/**
+ * Called by Mesa when an application program changes the LogicOp state
+ * via glLogicOp.
+ */
+static void r700LogicOpcode(GLcontext *ctx, GLenum logicop)
+{
+	if (RGBA_LOGICOP_ENABLED(ctx))
+		r700SetLogicOpState(ctx);
+}
+
+static void r700UpdateCulling(GLcontext * ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+
+    R600_STATECHANGE(context, su);
+
+    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+    CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+
+    if (ctx->Polygon.CullFlag)
+    {
+        switch (ctx->Polygon.CullFaceMode)
+        {
+        case GL_FRONT:
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        case GL_BACK:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        case GL_FRONT_AND_BACK:
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        default:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+            break;
+        }
+    }
+
+    switch (ctx->Polygon.FrontFace)
+    {
+        case GL_CW:
+            SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+            break;
+        case GL_CCW:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+            break;
+        default:
+            CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
+            break;
+    }
+
+    /* Winding is inverted when rendering to FBO */
+    if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+	    r700->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit;
+}
+
+static void r700UpdateLineStipple(GLcontext * ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+
+    R600_STATECHANGE(context, sc);
+
+    if (ctx->Line.StippleFlag)
+    {
+	SETbit(r700->PA_SC_MODE_CNTL.u32All, LINE_STIPPLE_ENABLE_bit);
+    }
+    else
+    {
+	CLEARbit(r700->PA_SC_MODE_CNTL.u32All, LINE_STIPPLE_ENABLE_bit);
+    }
+}
+
+static void r700Enable(GLcontext * ctx, GLenum cap, GLboolean state) //------------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+
+	switch (cap) {
+	case GL_TEXTURE_1D:
+	case GL_TEXTURE_2D:
+	case GL_TEXTURE_3D:
+		/* empty */
+		break;
+	case GL_FOG:
+		/* empty */
+		break;
+	case GL_ALPHA_TEST:
+		r700SetAlphaState(ctx);
+		break;
+	case GL_COLOR_LOGIC_OP:
+		r700SetLogicOpState(ctx);
+		/* fall-through, because logic op overrides blending */
+	case GL_BLEND:
+		r700SetBlendState(ctx);
+		break;
+	case GL_CLIP_PLANE0:
+	case GL_CLIP_PLANE1:
+	case GL_CLIP_PLANE2:
+	case GL_CLIP_PLANE3:
+	case GL_CLIP_PLANE4:
+	case GL_CLIP_PLANE5:
+		r700SetClipPlaneState(ctx, cap, state);
+		break;
+	case GL_DEPTH_TEST:
+		r700SetDepthState(ctx);
+		break;
+	case GL_STENCIL_TEST:
+		r700SetStencilState(ctx, state);
+		break;
+	case GL_CULL_FACE:
+		r700UpdateCulling(ctx);
+		break;
+	case GL_POLYGON_OFFSET_POINT:
+	case GL_POLYGON_OFFSET_LINE:
+	case GL_POLYGON_OFFSET_FILL:
+		r700SetPolygonOffsetState(ctx, state);
+		break;
+	case GL_SCISSOR_TEST:
+		radeon_firevertices(&context->radeon);
+		context->radeon.state.scissor.enabled = state;
+		radeonUpdateScissor(ctx);
+		break;
+	case GL_LINE_STIPPLE:
+		r700UpdateLineStipple(ctx);
+		break;
+	case GL_DEPTH_CLAMP:
+		r700UpdateWindow(ctx, 0);
+		break;
+	default:
+		break;
+	}
+
+}
+
+/**
+ * Handle glColorMask()
+ */
+static void r700ColorMask(GLcontext * ctx,
+			  GLboolean r, GLboolean g, GLboolean b, GLboolean a) //------------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw);
+	unsigned int mask = ((r ? 1 : 0) |
+			     (g ? 2 : 0) |
+			     (b ? 4 : 0) |
+			     (a ? 8 : 0));
+
+	if (mask != r700->CB_TARGET_MASK.u32All) {
+		R600_STATECHANGE(context, cb);
+		SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
+	}
+}
+
+/**
+ * Change the depth testing function.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700DepthFunc(GLcontext * ctx, GLenum func) //--------------------
+{
+    r700SetDepthState(ctx);
+}
+
+/**
+ * Enable/Disable depth writing.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700DepthMask(GLcontext * ctx, GLboolean mask) //------------------
+{
+    r700SetDepthState(ctx);
+}
+
+/**
+ * Change the culling mode.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700CullFace(GLcontext * ctx, GLenum mode) //-----------------
+{
+    r700UpdateCulling(ctx);
+}
+
+/* =============================================================
+ * Fog
+ */
+static void r700Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) //--------------
+{
+}
+
+/**
+ * Change the polygon orientation.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r700FrontFace(GLcontext * ctx, GLenum mode) //------------------
+{
+    r700UpdateCulling(ctx);
+    r700UpdatePolygonMode(ctx);
+}
+
+static void r700ShadeModel(GLcontext * ctx, GLenum mode) //--------------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	R600_STATECHANGE(context, spi);
+
+	/* also need to set/clear FLAT_SHADE bit per param in SPI_PS_INPUT_CNTL_[0-31] */
+	switch (mode) {
+	case GL_FLAT:
+		SETbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+		break;
+	case GL_SMOOTH:
+		CLEARbit(r700->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+		break;
+	default:
+		return;
+	}
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r700PointSize(GLcontext * ctx, GLfloat size)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	R600_STATECHANGE(context, su);
+
+	/* We need to clamp to user defined range here, because
+	 * the HW clamping happens only for per vertex point size. */
+	size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+	/* same size limits for AA, non-AA points */
+	size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
+
+	/* format is 12.4 fixed point */
+	SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0),
+		 PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
+	SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0),
+		 PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
+
+}
+
+static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) //---------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	R600_STATECHANGE(context, su);
+
+	/* format is 12.4 fixed point */
+	switch (pname) {
+	case GL_POINT_SIZE_MIN:
+		SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0),
+			 MIN_SIZE_shift, MIN_SIZE_mask);
+		r700PointSize(ctx, ctx->Point.Size);
+		break;
+	case GL_POINT_SIZE_MAX:
+		SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0),
+			 MAX_SIZE_shift, MAX_SIZE_mask);
+		r700PointSize(ctx, ctx->Point.Size);
+		break;
+	case GL_POINT_DISTANCE_ATTENUATION:
+		break;
+	case GL_POINT_FADE_THRESHOLD_SIZE:
+		break;
+	default:
+		break;
+	}
+}
+
+static int translate_stencil_func(int func)
+{
+	switch (func) {
+	case GL_NEVER:
+		return REF_NEVER;
+	case GL_LESS:
+		return REF_LESS;
+	case GL_EQUAL:
+		return REF_EQUAL;
+	case GL_LEQUAL:
+		return REF_LEQUAL;
+	case GL_GREATER:
+		return REF_GREATER;
+	case GL_NOTEQUAL:
+		return REF_NOTEQUAL;
+	case GL_GEQUAL:
+		return REF_GEQUAL;
+	case GL_ALWAYS:
+		return REF_ALWAYS;
+	}
+	return 0;
+}
+
+static int translate_stencil_op(int op)
+{
+	switch (op) {
+	case GL_KEEP:
+		return STENCIL_KEEP;
+	case GL_ZERO:
+		return STENCIL_ZERO;
+	case GL_REPLACE:
+		return STENCIL_REPLACE;
+	case GL_INCR:
+		return STENCIL_INCR_CLAMP;
+	case GL_DECR:
+		return STENCIL_DECR_CLAMP;
+	case GL_INCR_WRAP_EXT:
+		return STENCIL_INCR_WRAP;
+	case GL_DECR_WRAP_EXT:
+		return STENCIL_DECR_WRAP;
+	case GL_INVERT:
+		return STENCIL_INVERT;
+	default:
+		WARN_ONCE("Do not know how to translate stencil op");
+		return STENCIL_KEEP;
+	}
+	return 0;
+}
+
+static void r700SetStencilState(GLcontext * ctx, GLboolean state)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	GLboolean hw_stencil = GL_FALSE;
+
+	if (ctx->DrawBuffer) {
+		struct radeon_renderbuffer *rrbStencil
+			= radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+		hw_stencil = (rrbStencil && rrbStencil->bo);
+	}
+
+	if (hw_stencil) {
+		R600_STATECHANGE(context, db);
+		if (state) {
+			SETbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+			SETbit(r700->DB_DEPTH_CONTROL.u32All, BACKFACE_ENABLE_bit);
+		} else
+			CLEARbit(r700->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+	}
+}
+
+static void r700StencilFuncSeparate(GLcontext * ctx, GLenum face,
+				    GLenum func, GLint ref, GLuint mask) //---------------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	const unsigned back = ctx->Stencil._BackFace;
+
+	R600_STATECHANGE(context, stencil);
+	R600_STATECHANGE(context, db);
+
+	//front
+	SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.Ref[0],
+		 STENCILREF_shift, STENCILREF_mask);
+	SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.ValueMask[0],
+		 STENCILMASK_shift, STENCILMASK_mask);
+
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_func(ctx->Stencil.Function[0]),
+		 STENCILFUNC_shift, STENCILFUNC_mask);
+
+	//back
+	SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.Ref[back],
+		 STENCILREF_BF_shift, STENCILREF_BF_mask);
+	SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.ValueMask[back],
+		 STENCILMASK_BF_shift, STENCILMASK_BF_mask);
+
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_func(ctx->Stencil.Function[back]),
+		 STENCILFUNC_BF_shift, STENCILFUNC_BF_mask);
+
+}
+
+static void r700StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) //--------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	const unsigned back = ctx->Stencil._BackFace;
+
+	R600_STATECHANGE(context, stencil);
+
+	// front
+	SETfield(r700->DB_STENCILREFMASK.u32All, ctx->Stencil.WriteMask[0],
+		 STENCILWRITEMASK_shift, STENCILWRITEMASK_mask);
+
+	// back
+	SETfield(r700->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.WriteMask[back],
+		 STENCILWRITEMASK_BF_shift, STENCILWRITEMASK_BF_mask);
+
+}
+
+static void r700StencilOpSeparate(GLcontext * ctx, GLenum face,
+				  GLenum fail, GLenum zfail, GLenum zpass) //--------------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	const unsigned back = ctx->Stencil._BackFace;
+
+	R600_STATECHANGE(context, db);
+
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[0]),
+		 STENCILFAIL_shift, STENCILFAIL_mask);
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZFailFunc[0]),
+		 STENCILZFAIL_shift, STENCILZFAIL_mask);
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZPassFunc[0]),
+		 STENCILZPASS_shift, STENCILZPASS_mask);
+
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.FailFunc[back]),
+		 STENCILFAIL_BF_shift, STENCILFAIL_BF_mask);
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZFailFunc[back]),
+		 STENCILZFAIL_BF_shift, STENCILZFAIL_BF_mask);
+	SETfield(r700->DB_DEPTH_CONTROL.u32All, translate_stencil_op(ctx->Stencil.ZPassFunc[back]),
+		 STENCILZPASS_BF_shift, STENCILZPASS_BF_mask);
+}
+
+static void r700UpdateWindow(GLcontext * ctx, int id) //--------------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	__DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
+	GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+	GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+	const GLfloat *v = ctx->Viewport._WindowMap.m;
+	const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+	const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+	GLfloat y_scale, y_bias;
+
+	if (render_to_fbo) {
+		y_scale = 1.0;
+		y_bias = 0;
+	} else {
+		y_scale = -1.0;
+		y_bias = yoffset;
+	}
+
+	GLfloat sx = v[MAT_SX];
+	GLfloat tx = v[MAT_TX] + xoffset;
+	GLfloat sy = v[MAT_SY] * y_scale;
+	GLfloat ty = (v[MAT_TY] * y_scale) + y_bias;
+	GLfloat sz = v[MAT_SZ] * depthScale;
+	GLfloat tz = v[MAT_TZ] * depthScale;
+
+	R600_STATECHANGE(context, vpt);
+	R600_STATECHANGE(context, cl);
+
+	r700->viewport[id].PA_CL_VPORT_XSCALE.f32All  = sx;
+	r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
+
+	r700->viewport[id].PA_CL_VPORT_YSCALE.f32All  = sy;
+	r700->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
+
+	r700->viewport[id].PA_CL_VPORT_ZSCALE.f32All  = sz;
+	r700->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz;
+
+	if (ctx->Transform.DepthClamp) {
+		r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+		r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+		SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+		SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+	} else {
+		r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0;
+		r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0;
+		CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+		CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+	}
+
+	r700->viewport[id].enabled = GL_TRUE;
+
+	r700SetScissor(context);
+}
+
+
+static void r700Viewport(GLcontext * ctx,
+                         GLint x,
+                         GLint y,
+			 GLsizei width,
+                         GLsizei height) //--------------------
+{
+	r700UpdateWindow(ctx, 0);
+
+	radeon_viewport(ctx, x, y, width, height);
+}
+
+static void r700DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) //-------------
+{
+	r700UpdateWindow(ctx, 0);
+}
+
+static void r700LineWidth(GLcontext * ctx, GLfloat widthf) //---------------
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+    uint32_t lineWidth = (uint32_t)((widthf * 0.5) * (1 << 4));
+
+    R600_STATECHANGE(context, su);
+
+    if (lineWidth > 0xFFFF)
+	    lineWidth = 0xFFFF;
+    SETfield(r700->PA_SU_LINE_CNTL.u32All,(uint16_t)lineWidth,
+	     PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
+}
+
+static void r700LineStipple(GLcontext *ctx, GLint factor, GLushort pattern)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+    R600_STATECHANGE(context, sc);
+
+    SETfield(r700->PA_SC_LINE_STIPPLE.u32All, pattern, LINE_PATTERN_shift, LINE_PATTERN_mask);
+    SETfield(r700->PA_SC_LINE_STIPPLE.u32All, (factor-1), REPEAT_COUNT_shift, REPEAT_COUNT_mask);
+    SETfield(r700->PA_SC_LINE_STIPPLE.u32All, 1, AUTO_RESET_CNTL_shift, AUTO_RESET_CNTL_mask);
+}
+
+static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	R600_STATECHANGE(context, su);
+
+	if (state) {
+		SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+		SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+		SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+	} else {
+		CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+		CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+		CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+	}
+}
+
+static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) //--------------
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	GLfloat constant = units;
+	GLchar depth = 0;
+
+	R600_STATECHANGE(context, poly);
+
+	switch (ctx->Visual.depthBits) {
+	case 16:
+		constant *= 4.0;
+		depth = -16;
+		break;
+	case 24:
+		constant *= 2.0;
+		depth = -24;
+		break;
+	}
+
+	factor *= 12.0;
+	SETfield(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth,
+		 POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask);
+	//r700->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //???
+	r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor;
+	r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant;
+	r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor;
+	r700->PA_SU_POLY_OFFSET_BACK_OFFSET.f32All = constant;
+}
+
+static void r700UpdatePolygonMode(GLcontext * ctx)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+	R600_STATECHANGE(context, su);
+
+	SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DISABLE_POLY_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+	/* Only do something if a polygon mode is wanted, default is GL_FILL */
+	if (ctx->Polygon.FrontMode != GL_FILL ||
+	    ctx->Polygon.BackMode != GL_FILL) {
+		GLenum f, b;
+
+		/* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
+		 * correctly by selecting the correct front and back face
+		 */
+		f = ctx->Polygon.FrontMode;
+		b = ctx->Polygon.BackMode;
+
+		/* Enable polygon mode */
+		SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+		switch (f) {
+		case GL_LINE:
+			SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+				 POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+			break;
+		case GL_POINT:
+			SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+				 POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+			break;
+		case GL_FILL:
+			SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+				 POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+			break;
+		}
+
+		switch (b) {
+		case GL_LINE:
+			SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+				 POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+			break;
+		case GL_POINT:
+			SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+				 POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+			break;
+		case GL_FILL:
+			SETfield(r700->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+				 POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+			break;
+		}
+	}
+}
+
+static void r700PolygonMode(GLcontext * ctx, GLenum face, GLenum mode) //------------------
+{
+	(void)face;
+	(void)mode;
+
+	r700UpdatePolygonMode(ctx);
+}
+
+static void r700RenderMode(GLcontext * ctx, GLenum mode) //---------------------
+{
+}
+
+static void r700ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	GLint p;
+	GLint *ip;
+
+	p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+	ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+	R600_STATECHANGE(context, ucp);
+
+	r700->ucp[p].PA_CL_UCP_0_X.u32All = ip[0];
+	r700->ucp[p].PA_CL_UCP_0_Y.u32All = ip[1];
+	r700->ucp[p].PA_CL_UCP_0_Z.u32All = ip[2];
+	r700->ucp[p].PA_CL_UCP_0_W.u32All = ip[3];
+}
+
+static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	GLuint p;
+
+	p = cap - GL_CLIP_PLANE0;
+
+	R600_STATECHANGE(context, cl);
+
+	if (state) {
+		r700->PA_CL_CLIP_CNTL.u32All |= (UCP_ENA_0_bit << p);
+		r700->ucp[p].enabled = GL_TRUE;
+		r700ClipPlane(ctx, cap, NULL);
+	} else {
+		r700->PA_CL_CLIP_CNTL.u32All &= ~(UCP_ENA_0_bit << p);
+		r700->ucp[p].enabled = GL_FALSE;
+	}
+}
+
+void r700SetScissor(context_t *context) //---------------
+{
+	R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+	unsigned x1, y1, x2, y2;
+	int id = 0;
+	struct radeon_renderbuffer *rrb;
+
+	rrb = radeon_get_colorbuffer(&context->radeon);
+	if (!rrb || !rrb->bo) {
+		return;
+	}
+	if (context->radeon.state.scissor.enabled) {
+		x1 = context->radeon.state.scissor.rect.x1;
+		y1 = context->radeon.state.scissor.rect.y1;
+		x2 = context->radeon.state.scissor.rect.x2;
+		y2 = context->radeon.state.scissor.rect.y2;
+		/* r600 has exclusive BR scissors */
+		if (context->radeon.radeonScreen->kernel_mm) {
+			x2++;
+			y2++;
+		}
+	} else {
+		if (context->radeon.radeonScreen->driScreen->dri2.enabled) {
+			x1 = 0;
+			y1 = 0;
+			x2 = rrb->base.Width;
+			y2 = rrb->base.Height;
+		} else {
+			x1 = rrb->dPriv->x;
+			y1 = rrb->dPriv->y;
+			x2 = rrb->dPriv->x + rrb->dPriv->w;
+			y2 = rrb->dPriv->y + rrb->dPriv->h;
+		}
+	}
+
+	R600_STATECHANGE(context, scissor);
+
+	/* screen */
+	SETbit(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+	SETfield(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, x1,
+		 PA_SC_SCREEN_SCISSOR_TL__TL_X_shift, PA_SC_SCREEN_SCISSOR_TL__TL_X_mask);
+	SETfield(r700->PA_SC_SCREEN_SCISSOR_TL.u32All, y1,
+		 PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift, PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask);
+
+	SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, x2,
+		 PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, PA_SC_SCREEN_SCISSOR_BR__BR_X_mask);
+	SETfield(r700->PA_SC_SCREEN_SCISSOR_BR.u32All, y2,
+		 PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask);
+
+	/* window */
+	SETbit(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+	SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, x1,
+		 PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, PA_SC_WINDOW_SCISSOR_TL__TL_X_mask);
+	SETfield(r700->PA_SC_WINDOW_SCISSOR_TL.u32All, y1,
+		 PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask);
+
+	SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, x2,
+		 PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, PA_SC_WINDOW_SCISSOR_BR__BR_X_mask);
+	SETfield(r700->PA_SC_WINDOW_SCISSOR_BR.u32All, y2,
+		 PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask);
+
+
+	SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, x1,
+		 PA_SC_CLIPRECT_0_TL__TL_X_shift, PA_SC_CLIPRECT_0_TL__TL_X_mask);
+	SETfield(r700->PA_SC_CLIPRECT_0_TL.u32All, y1,
+		 PA_SC_CLIPRECT_0_TL__TL_Y_shift, PA_SC_CLIPRECT_0_TL__TL_Y_mask);
+	SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, x2,
+		 PA_SC_CLIPRECT_0_BR__BR_X_shift, PA_SC_CLIPRECT_0_BR__BR_X_mask);
+	SETfield(r700->PA_SC_CLIPRECT_0_BR.u32All, y2,
+		 PA_SC_CLIPRECT_0_BR__BR_Y_shift, PA_SC_CLIPRECT_0_BR__BR_Y_mask);
+
+	r700->PA_SC_CLIPRECT_1_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+	r700->PA_SC_CLIPRECT_1_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+	r700->PA_SC_CLIPRECT_2_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+	r700->PA_SC_CLIPRECT_2_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+	r700->PA_SC_CLIPRECT_3_TL.u32All = r700->PA_SC_CLIPRECT_0_TL.u32All;
+	r700->PA_SC_CLIPRECT_3_BR.u32All = r700->PA_SC_CLIPRECT_0_BR.u32All;
+
+	/* more....2d clip */
+	SETbit(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+	SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, x1,
+		 PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, PA_SC_GENERIC_SCISSOR_TL__TL_X_mask);
+	SETfield(r700->PA_SC_GENERIC_SCISSOR_TL.u32All, y1,
+		 PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask);
+	SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, x2,
+		 PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, PA_SC_GENERIC_SCISSOR_BR__BR_X_mask);
+	SETfield(r700->PA_SC_GENERIC_SCISSOR_BR.u32All, y2,
+		 PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask);
+
+	SETbit(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+	SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, x1,
+		 PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask);
+	SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, y1,
+		 PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask);
+	SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, x2,
+		 PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask);
+	SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2,
+		 PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
+
+	r700->viewport[id].enabled = GL_TRUE;
+}
+
+static void r700InitSQConfig(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+    int ps_prio;
+    int vs_prio;
+    int gs_prio;
+    int es_prio;
+    int num_ps_gprs;
+    int num_vs_gprs;
+    int num_gs_gprs;
+    int num_es_gprs;
+    int num_temp_gprs;
+    int num_ps_threads;
+    int num_vs_threads;
+    int num_gs_threads;
+    int num_es_threads;
+    int num_ps_stack_entries;
+    int num_vs_stack_entries;
+    int num_gs_stack_entries;
+    int num_es_stack_entries;
+
+    R600_STATECHANGE(context, sq);
+
+    // SQ
+    ps_prio = 0;
+    vs_prio = 1;
+    gs_prio = 2;
+    es_prio = 3;
+    switch (context->radeon.radeonScreen->chip_family) {
+    case CHIP_FAMILY_R600:
+	    num_ps_gprs = 192;
+	    num_vs_gprs = 56;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 136;
+	    num_vs_threads = 48;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 128;
+	    num_vs_stack_entries = 128;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    case CHIP_FAMILY_RV630:
+    case CHIP_FAMILY_RV635:
+	    num_ps_gprs = 84;
+	    num_vs_gprs = 36;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 144;
+	    num_vs_threads = 40;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 40;
+	    num_vs_stack_entries = 40;
+	    num_gs_stack_entries = 32;
+	    num_es_stack_entries = 16;
+	    break;
+    case CHIP_FAMILY_RV610:
+    case CHIP_FAMILY_RV620:
+    case CHIP_FAMILY_RS780:
+    case CHIP_FAMILY_RS880:
+    default:
+	    num_ps_gprs = 84;
+	    num_vs_gprs = 36;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 136;
+	    num_vs_threads = 48;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 40;
+	    num_vs_stack_entries = 40;
+	    num_gs_stack_entries = 32;
+	    num_es_stack_entries = 16;
+	    break;
+    case CHIP_FAMILY_RV670:
+	    num_ps_gprs = 144;
+	    num_vs_gprs = 40;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 136;
+	    num_vs_threads = 48;
+	    num_gs_threads = 4;
+	    num_es_threads = 4;
+	    num_ps_stack_entries = 40;
+	    num_vs_stack_entries = 40;
+	    num_gs_stack_entries = 32;
+	    num_es_stack_entries = 16;
+	    break;
+    case CHIP_FAMILY_RV770:
+	    num_ps_gprs = 192;
+	    num_vs_gprs = 56;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 188;
+	    num_vs_threads = 60;
+	    num_gs_threads = 0;
+	    num_es_threads = 0;
+	    num_ps_stack_entries = 256;
+	    num_vs_stack_entries = 256;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    case CHIP_FAMILY_RV730:
+    case CHIP_FAMILY_RV740:
+	    num_ps_gprs = 84;
+	    num_vs_gprs = 36;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 188;
+	    num_vs_threads = 60;
+	    num_gs_threads = 0;
+	    num_es_threads = 0;
+	    num_ps_stack_entries = 128;
+	    num_vs_stack_entries = 128;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    case CHIP_FAMILY_RV710:
+	    num_ps_gprs = 192;
+	    num_vs_gprs = 56;
+	    num_temp_gprs = 4;
+	    num_gs_gprs = 0;
+	    num_es_gprs = 0;
+	    num_ps_threads = 144;
+	    num_vs_threads = 48;
+	    num_gs_threads = 0;
+	    num_es_threads = 0;
+	    num_ps_stack_entries = 128;
+	    num_vs_stack_entries = 128;
+	    num_gs_stack_entries = 0;
+	    num_es_stack_entries = 0;
+	    break;
+    }
+
+    r700->sq_config.SQ_CONFIG.u32All = 0;
+    if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) ||
+        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) ||
+	(context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) ||
+        (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710))
+	    CLEARbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
+    else
+	    SETbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
+    SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit);
+    SETbit(r700->sq_config.SQ_CONFIG.u32All, ALU_INST_PREFER_VECTOR_bit);
+    SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, PS_PRIO_shift, PS_PRIO_mask);
+    SETfield(r700->sq_config.SQ_CONFIG.u32All, vs_prio, VS_PRIO_shift, VS_PRIO_mask);
+    SETfield(r700->sq_config.SQ_CONFIG.u32All, gs_prio, GS_PRIO_shift, GS_PRIO_mask);
+    SETfield(r700->sq_config.SQ_CONFIG.u32All, es_prio, ES_PRIO_shift, ES_PRIO_mask);
+
+    r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All = 0;
+    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_ps_gprs, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_vs_gprs, NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, num_temp_gprs,
+	     NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask);
+
+    r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All = 0;
+    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All, num_gs_gprs, NUM_GS_GPRS_shift, NUM_GS_GPRS_mask);
+    SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_2.u32All, num_es_gprs, NUM_ES_GPRS_shift, NUM_ES_GPRS_mask);
+
+    r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All = 0;
+    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_ps_threads,
+	     NUM_PS_THREADS_shift, NUM_PS_THREADS_mask);
+    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_vs_threads,
+	     NUM_VS_THREADS_shift, NUM_VS_THREADS_mask);
+    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_gs_threads,
+	     NUM_GS_THREADS_shift, NUM_GS_THREADS_mask);
+    SETfield(r700->sq_config.SQ_THREAD_RESOURCE_MGMT.u32All, num_es_threads,
+	     NUM_ES_THREADS_shift, NUM_ES_THREADS_mask);
+
+    r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All = 0;
+    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All, num_ps_stack_entries,
+	     NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask);
+    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_1.u32All, num_vs_stack_entries,
+	     NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask);
+
+    r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All = 0;
+    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All, num_gs_stack_entries,
+	     NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask);
+    SETfield(r700->sq_config.SQ_STACK_RESOURCE_MGMT_2.u32All, num_es_stack_entries,
+	     NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask);
+
+}
+
+/**
+ * Calculate initial hardware state and register state functions.
+ * Assumes that the command buffer and state atoms have been
+ * initialized already.
+ */
+void r700InitState(GLcontext * ctx) //-------------------
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+    int id = 0;
+
+    r700->TA_CNTL_AUX.u32All = 0;
+    SETfield(r700->TA_CNTL_AUX.u32All, 28, TD_FIFO_CREDIT_shift, TD_FIFO_CREDIT_mask);
+    r700->VC_ENHANCE.u32All = 0;
+    r700->DB_WATERMARKS.u32All = 0;
+    SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_FREE_shift, DEPTH_FREE_mask);
+    SETfield(r700->DB_WATERMARKS.u32All, 16, DEPTH_FLUSH_shift, DEPTH_FLUSH_mask);
+    SETfield(r700->DB_WATERMARKS.u32All, 0, FORCE_SUMMARIZE_shift, FORCE_SUMMARIZE_mask);
+    SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_PENDING_FREE_shift, DEPTH_PENDING_FREE_mask);
+    r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All = 0;
+    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+	    SETfield(r700->TA_CNTL_AUX.u32All, 3, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+	    r700->DB_DEBUG.u32All = 0x82000000;
+	    SETfield(r700->DB_WATERMARKS.u32All, 16, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+    } else {
+	    SETfield(r700->TA_CNTL_AUX.u32All, 2, GRADIENT_CREDIT_shift, GRADIENT_CREDIT_mask);
+	    SETfield(r700->DB_WATERMARKS.u32All, 4, DEPTH_CACHELINE_FREE_shift, DEPTH_CACHELINE_FREE_mask);
+	    SETbit(r700->SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All, VS_PC_LIMIT_ENABLE_bit);
+    }
+
+    /* Turn off vgt reuse */
+    r700->VGT_REUSE_OFF.u32All = 0;
+    SETbit(r700->VGT_REUSE_OFF.u32All, REUSE_OFF_bit);
+
+    /* Specify offsetting and clamp values for vertices */
+    r700->VGT_MAX_VTX_INDX.u32All      = 0xFFFFFF;
+    r700->VGT_MIN_VTX_INDX.u32All      = 0;
+    r700->VGT_INDX_OFFSET.u32All    = 0;
+
+    /* default shader connections. */
+    r700->SPI_VS_OUT_ID_0.u32All  = 0x03020100;
+    r700->SPI_VS_OUT_ID_1.u32All  = 0x07060504;
+    r700->SPI_VS_OUT_ID_2.u32All  = 0x0b0a0908;
+    r700->SPI_VS_OUT_ID_3.u32All  = 0x0f0e0d0c;
+
+    r700->SPI_THREAD_GROUPING.u32All = 0;
+    if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+	    SETfield(r700->SPI_THREAD_GROUPING.u32All, 1, PS_GROUPING_shift, PS_GROUPING_mask);
+
+    /* 4 clip rectangles */ /* TODO : set these clip rects according to context->currentDraw->numClipRects */
+    r700->PA_SC_CLIPRECT_RULE.u32All = 0;
+    SETfield(r700->PA_SC_CLIPRECT_RULE.u32All, CLIP_RULE_mask, CLIP_RULE_shift, CLIP_RULE_mask);
+
+    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+	    r700->PA_SC_EDGERULE.u32All = 0;
+    else
+	    r700->PA_SC_EDGERULE.u32All = 0xAAAAAAAA;
+
+    if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) {
+	    r700->PA_SC_MODE_CNTL.u32All = 0;
+	    SETbit(r700->PA_SC_MODE_CNTL.u32All, WALK_ORDER_ENABLE_bit);
+	    SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit);
+    } else {
+	    r700->PA_SC_MODE_CNTL.u32All = 0x00500000;
+	    SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_REZ_ENABLE_bit);
+	    SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit);
+    }
+
+    /* Do scale XY and Z by 1/W0. */
+    r700->bEnablePerspective = GL_TRUE;
+    CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+    CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+    /* Enable viewport scaling for all three axis */
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_SCALE_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_X_OFFSET_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_SCALE_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Y_OFFSET_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit);
+    SETbit(r700->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit);
+
+    /* GL uses last vtx for flat shading components */
+    SETbit(r700->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+
+    /* Set up vertex control */
+    r700->PA_SU_VTX_CNTL.u32All = 0;
+    CLEARfield(r700->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask);
+    SETbit(r700->PA_SU_VTX_CNTL.u32All, PIX_CENTER_bit);
+    SETfield(r700->PA_SU_VTX_CNTL.u32All, X_ROUND_TO_EVEN,
+             PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask);
+
+    /* to 1.0 = no guard band */
+    r700->PA_CL_GB_VERT_CLIP_ADJ.u32All  = 0x3F800000;  /* 1.0 */
+    r700->PA_CL_GB_VERT_DISC_ADJ.u32All  = 0x3F800000;
+    r700->PA_CL_GB_HORZ_CLIP_ADJ.u32All  = 0x3F800000;
+    r700->PA_CL_GB_HORZ_DISC_ADJ.u32All  = 0x3F800000;
+
+    /* Enable all samples for multi-sample anti-aliasing */
+    r700->PA_SC_AA_MASK.u32All = 0xFFFFFFFF;
+    /* Turn off AA */
+    r700->PA_SC_AA_CONFIG.u32All = 0;
+
+    r700->SX_MISC.u32All = 0;
+
+    r700InitSQConfig(ctx);
+
+    r700ColorMask(ctx,
+		  ctx->Color.ColorMask[0][RCOMP],
+		  ctx->Color.ColorMask[0][GCOMP],
+		  ctx->Color.ColorMask[0][BCOMP],
+		  ctx->Color.ColorMask[0][ACOMP]);
+
+    r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+    r700DepthMask(ctx, ctx->Depth.Mask);
+    r700DepthFunc(ctx, ctx->Depth.Func);
+    r700->DB_DEPTH_CLEAR.u32All     = 0x3F800000;
+    SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit);
+    SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit);
+    r700SetDBRenderState(ctx);
+
+    r700->DB_ALPHA_TO_MASK.u32All = 0;
+    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask);
+    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET1_shift, ALPHA_TO_MASK_OFFSET1_mask);
+    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET2_shift, ALPHA_TO_MASK_OFFSET2_mask);
+    SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET3_shift, ALPHA_TO_MASK_OFFSET3_mask);
+
+    /* stencil */
+    r700Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled);
+    r700StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
+    r700StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0],
+			    ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+    r700StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0],
+			  ctx->Stencil.ZFailFunc[0],
+			  ctx->Stencil.ZPassFunc[0]);
+
+    r700UpdateCulling(ctx);
+
+    r700SetBlendState(ctx);
+    r700SetLogicOpState(ctx);
+
+    r700AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+    r700Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+
+    r700PointSize(ctx, 1.0);
+
+    CLEARfield(r700->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
+    SETfield(r700->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
+
+    r700LineWidth(ctx, 1.0);
+
+    r700->PA_SC_LINE_CNTL.u32All = 0;
+    CLEARbit(r700->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit);
+    SETbit(r700->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit);
+
+    r700ShadeModel(ctx, ctx->Light.ShadeModel);
+    r700PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
+    r700PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
+    r700PolygonOffset(ctx, ctx->Polygon.OffsetFactor,
+		      ctx->Polygon.OffsetUnits);
+    r700Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint);
+    r700Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine);
+    r700Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
+
+    /* CB */
+    r700BlendColor(ctx, ctx->Color.BlendColor);
+
+    r700->CB_CLEAR_RED_R6XX.f32All = 1.0; //r6xx only
+    r700->CB_CLEAR_GREEN_R6XX.f32All = 0.0; //r6xx only
+    r700->CB_CLEAR_BLUE_R6XX.f32All = 1.0; //r6xx only
+    r700->CB_CLEAR_ALPHA_R6XX.f32All = 1.0; //r6xx only
+    r700->CB_FOG_RED_R6XX.u32All = 0; //r6xx only
+    r700->CB_FOG_GREEN_R6XX.u32All = 0; //r6xx only
+    r700->CB_FOG_BLUE_R6XX.u32All = 0; //r6xx only
+
+    /* Disable color compares */
+    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
+             CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask);
+    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
+             CLRCMP_FCN_DST_shift, CLRCMP_FCN_DST_mask);
+    SETfield(r700->CB_CLRCMP_CONTROL.u32All, CLRCMP_SEL_SRC,
+             CLRCMP_FCN_SEL_shift, CLRCMP_FCN_SEL_mask);
+
+    /* Zero out source */
+    r700->CB_CLRCMP_SRC.u32All = 0x00000000;
+
+    /* Put a compare color in for error checking */
+    r700->CB_CLRCMP_DST.u32All = 0x000000FF;
+
+    /* Set up color compare mask */
+    r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
+
+    /* screen/window/view */
+    SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask);
+
+    context->radeon.hw.all_dirty = GL_TRUE;
+
+}
+
+void r700InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+	functions->UpdateState = r700InvalidateState;
+	functions->AlphaFunc = r700AlphaFunc;
+	functions->BlendColor = r700BlendColor;
+	functions->BlendEquationSeparate = r700BlendEquationSeparate;
+	functions->BlendFuncSeparate = r700BlendFuncSeparate;
+	functions->Enable = r700Enable;
+	functions->ColorMask = r700ColorMask;
+	functions->DepthFunc = r700DepthFunc;
+	functions->DepthMask = r700DepthMask;
+	functions->CullFace = r700CullFace;
+	functions->Fogfv = r700Fogfv;
+	functions->FrontFace = r700FrontFace;
+	functions->ShadeModel = r700ShadeModel;
+	functions->LogicOpcode = r700LogicOpcode;
+
+	/* ARB_point_parameters */
+	functions->PointParameterfv = r700PointParameter;
+
+	/* Stencil related */
+	functions->StencilFuncSeparate = r700StencilFuncSeparate;
+	functions->StencilMaskSeparate = r700StencilMaskSeparate;
+	functions->StencilOpSeparate = r700StencilOpSeparate;
+
+	/* Viewport related */
+	functions->Viewport = r700Viewport;
+	functions->DepthRange = r700DepthRange;
+	functions->PointSize = r700PointSize;
+	functions->LineWidth = r700LineWidth;
+	functions->LineStipple = r700LineStipple;
+
+	functions->PolygonOffset = r700PolygonOffset;
+	functions->PolygonMode = r700PolygonMode;
+
+	functions->RenderMode = r700RenderMode;
+
+	functions->ClipPlane = r700ClipPlane;
+
+	functions->Scissor = radeonScissor;
+
+	functions->DrawBuffer = radeonDrawBuffer;
+	functions->ReadBuffer = radeonReadBuffer;
+
+	functions->CopyPixels = _mesa_meta_CopyPixels;
+	functions->DrawPixels = _mesa_meta_DrawPixels;
+	if (radeon->radeonScreen->kernel_mm)
+		functions->ReadPixels = radeonReadPixels;
+}
+
diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h
new file mode 100644
index 0000000000..56885e0b15
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_state.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _R700_STATE_H
+#define _R700_STATE_H
+
+#include "main/mtypes.h"
+
+#include "r600_context.h"
+
+#include "r700_chip.h"
+
+extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state);
+extern void r700UpdateShaders (GLcontext * ctx);
+extern void r700UpdateShaderStates(GLcontext * ctx);
+
+extern void r700UpdateViewportOffset(GLcontext * ctx);
+
+extern void r700InitState (GLcontext * ctx);
+extern void r700InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions);
+
+extern void r700SetScissor(context_t *context);
+
+#endif	/* _R600_SCREEN_H */
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
new file mode 100644
index 0000000000..14dd2a5482
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -0,0 +1,712 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+
+#include "radeon_debug.h"
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+#include "shader/programopt.h"
+
+#include "r700_debug.h"
+#include "r700_vertprog.h"
+
+unsigned int Map_Vertex_Output(r700_AssemblerBase       *pAsm, 
+					           struct gl_vertex_program *mesa_vp,
+					           unsigned int unStart)
+{
+    unsigned int i;
+	unsigned int unBit;
+	unsigned int unTotal = unStart;
+
+    //!!!!!!! THE ORDER MATCH FS INPUT
+
+	unBit = 1 << VERT_RESULT_HPOS;
+	if(mesa_vp->Base.OutputsWritten & unBit)
+	{
+		pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
+	}
+
+	unBit = 1 << VERT_RESULT_COL0;
+	if(mesa_vp->Base.OutputsWritten & unBit)
+	{
+		pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
+	}
+
+	unBit = 1 << VERT_RESULT_COL1;
+	if(mesa_vp->Base.OutputsWritten & unBit)
+	{
+		pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
+	}
+
+	//TODO : dealing back face.
+	unBit = 1 << VERT_RESULT_BFC0;
+	if(mesa_vp->Base.OutputsWritten & unBit)
+	{
+		pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
+	}
+
+	unBit = 1 << VERT_RESULT_BFC1;
+	if(mesa_vp->Base.OutputsWritten & unBit)
+	{
+		pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
+	}
+
+	//TODO : dealing fog.
+	unBit = 1 << VERT_RESULT_FOGC;
+	if(mesa_vp->Base.OutputsWritten & unBit)
+	{
+		pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
+	}
+
+	//TODO : dealing point size.
+	unBit = 1 << VERT_RESULT_PSIZ;
+	if(mesa_vp->Base.OutputsWritten & unBit)
+	{
+		pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
+	}
+
+	for(i=0; i<8; i++)
+	{
+		unBit = 1 << (VERT_RESULT_TEX0 + i);
+		if(mesa_vp->Base.OutputsWritten & unBit)
+		{
+			pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
+		}
+	}
+
+    for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+	{
+		unBit = 1 << i;
+		if(mesa_vp->Base.OutputsWritten & unBit)
+		{
+			pAsm->ucVP_OutputMap[i] = unTotal++;
+		}
+	}
+
+	return (unTotal - unStart);
+}
+
+unsigned int Map_Vertex_Input(r700_AssemblerBase       *pAsm, 
+					  struct gl_vertex_program *mesa_vp,
+					  unsigned int unStart)
+{
+	int i;
+	unsigned int unBit;
+	unsigned int unTotal = unStart;
+	for(i=0; i<VERT_ATTRIB_MAX; i++)
+	{
+		unBit = 1 << i;
+		if(mesa_vp->Base.InputsRead & unBit)
+		{
+			pAsm->ucVP_AttributeMap[i] = unTotal++;
+		}
+	}
+	return (unTotal - unStart);
+}
+
+GLboolean Process_Vertex_Program_Vfetch_Instructions(
+						struct r700_vertex_program *vp,
+						struct gl_vertex_program   *mesa_vp)
+{
+	int i;
+    unsigned int unBit;
+	VTX_FETCH_METHOD vtxFetchMethod;
+	vtxFetchMethod.bEnableMini          = GL_FALSE;
+	vtxFetchMethod.mega_fetch_remainder = 0;
+
+	for(i=0; i<VERT_ATTRIB_MAX; i++)
+	{
+		unBit = 1 << i;
+		if(mesa_vp->Base.InputsRead & unBit)
+		{
+			assemble_vfetch_instruction(&vp->r700AsmCode,
+						    i,
+						    vp->r700AsmCode.ucVP_AttributeMap[i],
+						    vp->aos_desc[i].size,
+						    vp->aos_desc[i].type,
+						    &vtxFetchMethod);
+		}
+	}
+	
+	return GL_TRUE;
+}
+
+GLboolean Process_Vertex_Program_Vfetch_Instructions2(
+    GLcontext *ctx,
+	struct r700_vertex_program *vp,
+	struct gl_vertex_program   *mesa_vp)
+{
+    int i;
+    context_t *context = R700_CONTEXT(ctx);
+
+    VTX_FETCH_METHOD vtxFetchMethod;
+	vtxFetchMethod.bEnableMini          = GL_FALSE;
+	vtxFetchMethod.mega_fetch_remainder = 0;
+
+    for(i=0; i<context->nNumActiveAos; i++)
+    {
+        assemble_vfetch_instruction2(&vp->r700AsmCode,
+                                      vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
+                                      context->stream_desc[i].type,
+                                      context->stream_desc[i].size,
+                                      context->stream_desc[i].element,
+                                      context->stream_desc[i]._signed,
+                                      context->stream_desc[i].normalize,
+                                      context->stream_desc[i].format,
+                                     &vtxFetchMethod);
+    }
+
+    return GL_TRUE;
+}
+
+void Map_Vertex_Program(GLcontext *ctx,
+                        struct r700_vertex_program *vp,
+						struct gl_vertex_program   *mesa_vp)
+{
+    GLuint ui;
+    r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
+	unsigned int num_inputs;
+
+	// R0 will always be used for index into vertex buffer
+	pAsm->number_used_registers = 1;
+	pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
+
+    // Map Inputs: Add 1 to mapping since R0 is used for index
+	num_inputs = Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
+	pAsm->number_used_registers += num_inputs;
+
+	// Create VFETCH instructions for inputs
+        if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
+	{
+		radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
+		return;
+	}
+
+	// Map Outputs
+	pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
+
+	pAsm->starting_export_register_number = pAsm->number_used_registers;
+
+	pAsm->number_used_registers += pAsm->number_of_exports;
+
+    pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
+
+    for(ui=0; ui<pAsm->number_of_exports; ui++)
+    {
+        pAsm->pucOutMask[ui] = 0x0;
+    }
+
+    /* Map temporary registers (GPRs) */
+    pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+    if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
+    {   /* arb uses NumNativeTemporaries */
+        pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
+    }
+    else
+    {   /* fix func t_vp uses NumTemporaries */
+        pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
+    }
+
+    pAsm->flag_reg_index = pAsm->number_used_registers++;
+
+    pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
+					                	struct gl_vertex_program   *mesa_vp)
+{
+    GLuint i, j;
+    GLint * puiTEMPwrites;
+    struct prog_instruction *pILInst;
+    InstDeps         *pInstDeps;
+
+    puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
+    for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
+    {
+        puiTEMPwrites[i] = -1;
+    }
+
+    pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
+
+    for(i=0; i<mesa_vp->Base.NumInstructions; i++)
+    {
+        pInstDeps[i].nDstDep = -1;
+        pILInst = &(mesa_vp->Base.Instructions[i]);
+
+        //Dst
+        if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+        {
+            //Set lastwrite for the temp
+            puiTEMPwrites[pILInst->DstReg.Index] = i;
+        }
+
+        //Src
+        for(j=0; j<3; j++)
+        {
+            if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+            {
+                //Set dep.
+                pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+            }
+            else
+            {
+                pInstDeps[i].nSrcDeps[j] = -1;
+            }
+        }
+    }
+
+    vp->r700AsmCode.pInstDeps = pInstDeps;
+
+    FREE(puiTEMPwrites);
+
+    return GL_TRUE;
+}
+
+struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
+						      struct gl_vertex_program *mesa_vp)
+{
+	context_t *context = R700_CONTEXT(ctx);
+	struct r700_vertex_program *vp;
+	unsigned int i;
+
+	vp = calloc(1, sizeof(*vp));
+	vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp);
+
+	if (mesa_vp->IsPositionInvariant)
+	{
+                _mesa_insert_mvp_code(ctx, vp->mesa_program);
+        }
+
+	for(i=0; i<context->nNumActiveAos; i++)
+	{
+		vp->aos_desc[i].size   = context->stream_desc[i].size;
+		vp->aos_desc[i].stride = context->stream_desc[i].stride;
+		vp->aos_desc[i].type   = context->stream_desc[i].type;
+		vp->aos_desc[i].format = context->stream_desc[i].format;
+	}
+
+	if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+	{
+		vp->r700AsmCode.bR6xx = 1;
+	}
+
+	//Init_Program
+	Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
+	Map_Vertex_Program(ctx, vp, vp->mesa_program );
+
+	if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
+	{
+		return NULL;
+	}
+
+    InitShaderProgram(&(vp->r700AsmCode));
+
+    for(i=0; i < MAX_SAMPLERS; i++)
+    {
+        vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
+    }
+
+    vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
+
+	if(GL_FALSE == AssembleInstr(0,
+                                 0,
+                                 vp->mesa_program->Base.NumInstructions,
+                                 &(vp->mesa_program->Base.Instructions[0]),
+                                 &(vp->r700AsmCode)) )
+	{
+		return NULL;
+	}
+
+    if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
+    {
+        return NULL;
+    }
+
+    if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
+    {
+        return GL_FALSE;
+    }
+
+    vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 
+                         : (vp->r700AsmCode.number_used_registers - 1);
+
+	vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
+
+    vp->translated = GL_TRUE;
+
+	return vp;
+}
+
+void r700SelectVertexShader(GLcontext *ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    struct r700_vertex_program_cont *vpc;
+    struct r700_vertex_program *vp;
+    unsigned int i;
+    GLboolean match;
+    GLbitfield InputsRead;
+
+    vpc = (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+
+    InputsRead = vpc->mesa_program.Base.InputsRead;
+    if (vpc->mesa_program.IsPositionInvariant)
+    {
+	InputsRead |= VERT_BIT_POS;
+    }
+
+    for (vp = vpc->progs; vp; vp = vp->next)
+    {
+	match = GL_TRUE;
+	for(i=0; i<context->nNumActiveAos; i++)
+	{
+		if (vp->aos_desc[i].size != context->stream_desc[i].size ||
+		    vp->aos_desc[i].format != context->stream_desc[i].format)
+		{
+			match = GL_FALSE;
+			break;
+		}
+	}
+	if (match)
+	{
+		context->selected_vp = vp;
+		return;
+	}
+    }
+
+    vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program));
+    if(!vp)
+    {
+	radeon_error("Failed to translate vertex shader. \n");
+	return;
+    }
+    vp->next = vpc->progs;
+    vpc->progs = vp;
+    context->selected_vp = vp;
+    return;
+}
+
+int getTypeSize(GLenum type)
+{
+    switch (type) 
+    {
+    case GL_DOUBLE:
+        return sizeof(GLdouble);
+    case GL_FLOAT:
+        return sizeof(GLfloat);
+    case GL_INT:
+        return sizeof(GLint);
+    case GL_UNSIGNED_INT:
+        return sizeof(GLuint);
+    case GL_SHORT:
+        return sizeof(GLshort);
+    case GL_UNSIGNED_SHORT:
+        return sizeof(GLushort);
+    case GL_BYTE:
+        return sizeof(GLbyte);
+    case GL_UNSIGNED_BYTE:
+        return sizeof(GLubyte);
+    default:
+        assert(0);
+        return 0;
+    }
+}
+
+static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    
+    StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
+
+	GLuint stride;
+
+	stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size 
+                                   : input->StrideB;
+
+    if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+        getTypeSize(input->Type) != 4 ||
+#endif
+        stride < 4) 
+    {
+        pStreamDesc->type = GL_FLOAT;
+
+        if (input->StrideB == 0) 
+        {
+	        pStreamDesc->stride = 0;
+        } 
+        else 
+        {
+	        pStreamDesc->stride = sizeof(GLfloat) * input->Size;
+        }
+        pStreamDesc->dwords = input->Size;
+        pStreamDesc->is_named_bo = GL_FALSE;
+    } 
+    else 
+    {
+        pStreamDesc->type = input->Type;
+        pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4;
+        if (!input->BufferObj->Name) 
+        {
+            if (input->StrideB == 0) 
+            {
+                pStreamDesc->stride = 0;
+            } 
+            else 
+            {
+                pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
+            }
+
+            pStreamDesc->is_named_bo = GL_FALSE;
+        }
+    }
+
+	pStreamDesc->size = input->Size;
+	pStreamDesc->dst_loc = context->nNumActiveAos;
+	pStreamDesc->element = unLoc;
+	pStreamDesc->format = input->Format;
+
+	switch (pStreamDesc->type) 
+	{ //GetSurfaceFormat
+	case GL_FLOAT:
+		pStreamDesc->_signed = 0;
+		pStreamDesc->normalize = GL_FALSE;
+		break;
+	case GL_SHORT:
+		pStreamDesc->_signed = 1;
+		pStreamDesc->normalize = input->Normalized;
+		break;
+	case GL_BYTE:
+		pStreamDesc->_signed = 1;
+		pStreamDesc->normalize = input->Normalized;
+		break;
+	case GL_UNSIGNED_SHORT:
+		pStreamDesc->_signed = 0;
+		pStreamDesc->normalize = input->Normalized;
+		break;
+	case GL_UNSIGNED_BYTE:
+		pStreamDesc->_signed = 0;
+		pStreamDesc->normalize = input->Normalized;
+		break;
+	default:
+	case GL_INT:
+	case GL_UNSIGNED_INT:
+	case GL_DOUBLE: 
+		assert(0);
+		break;
+	}
+	context->nNumActiveAos++;
+}
+
+void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    struct r700_vertex_program *vpc
+           = (struct r700_vertex_program *)ctx->VertexProgram._Current;
+
+    struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
+    unsigned int unLoc = 0;
+    unsigned int unBit = mesa_vp->Base.InputsRead;
+    context->nNumActiveAos = 0;
+
+    if (mesa_vp->IsPositionInvariant)
+    {
+        unBit |= VERT_BIT_POS;
+    }
+
+    while(unBit) 
+    {
+        if(unBit & 1)
+        {
+            r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
+        }
+
+        unBit >>= 1;
+        ++unLoc;
+    }
+    context->radeon.tcl.aos_count = context->nNumActiveAos;
+}
+
+void * r700GetActiveVpShaderBo(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    struct r700_vertex_program *vp = context->selected_vp;;
+
+    if (vp)
+	return vp->shaderbo;
+    else
+	return NULL;
+}
+
+GLboolean r700SetupVertexProgram(GLcontext * ctx)
+{
+    context_t *context = R700_CONTEXT(ctx);
+    R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+    struct r700_vertex_program *vp = context->selected_vp;
+
+    struct gl_program_parameter_list *paramList;
+    unsigned int unNumParamData;
+    unsigned int ui;
+
+    if(GL_FALSE == vp->loaded)
+    {
+	    if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
+	    {
+		    Assemble( &(vp->r700Shader) );
+	    }
+
+        /* Load vp to gpu */
+        r600EmitShader(ctx,
+                       &(vp->shaderbo),
+                       (GLvoid *)(vp->r700Shader.pProgram),
+                       vp->r700Shader.uShaderBinaryDWORDSize,
+                       "VS");
+
+        vp->loaded = GL_TRUE;
+    }
+
+    DumpHwBinary(DUMP_VERTEX_SHADER, (GLvoid *)(vp->r700Shader.pProgram),
+                 vp->r700Shader.uShaderBinaryDWORDSize);
+
+    /* TODO : enable this after MemUse fixed *=
+    (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
+    */
+
+    R600_STATECHANGE(context, vs);
+    R600_STATECHANGE(context, fs); /* hack */
+
+    r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
+    SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+
+    r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
+
+    SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
+             NUM_GPRS_shift, NUM_GPRS_mask);
+
+    if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+	{
+        SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
+                 STACK_SIZE_shift, STACK_SIZE_mask);
+    }
+
+    R600_STATECHANGE(context, spi);
+
+    if(vp->mesa_program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+        R600_STATECHANGE(context, cl);
+        SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit);
+        SETbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit);
+    } else if (r700->PA_CL_VS_OUT_CNTL.u32All != 0) {
+        R600_STATECHANGE(context, cl);
+        CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, USE_VTX_POINT_SIZE_bit);
+        CLEARbit(r700->PA_CL_VS_OUT_CNTL.u32All, VS_OUT_MISC_VEC_ENA_bit);
+    }
+
+    SETfield(r700->SPI_VS_OUT_CONFIG.u32All,
+	     vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
+             VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
+    SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
+             NUM_INTERP_shift, NUM_INTERP_mask);
+
+    /*
+    SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+    CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+    */
+
+    /* sent out shader constants. */
+    paramList = vp->mesa_program->Base.Parameters;
+
+    if(NULL != paramList) {
+        /* vp->mesa_program was cloned, not updated by glsl shader api. */
+        /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
+        /* so, use ctx->VertexProgem._Current */       
+        struct gl_program_parameter_list *paramListOrginal = 
+                         ctx->VertexProgram._Current->Base.Parameters;
+         
+	    _mesa_load_state_parameters(ctx, paramList);
+
+	    if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
+		    return GL_FALSE;
+
+	    R600_STATECHANGE(context, vs_consts);
+
+	    r700->vs.num_consts = paramList->NumParameters;
+
+	    unNumParamData = paramList->NumParameters;
+
+	    for(ui=0; ui<unNumParamData; ui++) {
+            if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) 
+            {
+                r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
+		        r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
+		        r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
+		        r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+            }
+            else
+            {
+		        r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+		        r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+		        r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+		        r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+            }
+	    }
+    } else
+	    r700->vs.num_consts = 0;
+
+    COMPILED_SUB * pCompiledSub;
+    GLuint uj;
+    GLuint unConstOffset = r700->vs.num_consts;
+    for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
+    {
+        pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
+
+        r700->vs.num_consts += pCompiledSub->NumParameters;
+
+        for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+        {
+            r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+		    r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+		    r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+		    r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+        }
+        unConstOffset += pCompiledSub->NumParameters;
+    }
+
+    return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h
new file mode 100644
index 0000000000..645c9ac84a
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _R700_VERTPROG_H_
+#define _R700_VERTPROG_H_
+
+#include "main/glheader.h"
+#include "main/mtypes.h" 
+
+#include "r700_shader.h"
+#include "r700_assembler.h"
+
+typedef struct ArrayDesc //TEMP
+{
+	GLint size;   //number of data element
+	GLenum type;  //data element type
+	GLsizei stride;
+	GLenum format; //GL_RGBA or GL_BGRA
+} ArrayDesc;
+
+struct r700_vertex_program 
+{
+    struct gl_vertex_program *mesa_program; /* Must be first */
+
+    struct r700_vertex_program *next;
+
+    r700_AssemblerBase r700AsmCode;
+    R700_Shader        r700Shader;
+
+    GLboolean translated;
+    GLboolean loaded;
+
+    void * shaderbo;
+
+    ArrayDesc              aos_desc[VERT_ATTRIB_MAX];
+};
+
+struct r700_vertex_program_cont
+{
+    struct gl_vertex_program mesa_program;
+
+    struct r700_vertex_program *progs;
+};
+
+//Internal
+unsigned int Map_Vertex_Output(r700_AssemblerBase       *pAsm, 
+			       struct gl_vertex_program *mesa_vp,
+			       unsigned int unStart);
+unsigned int Map_Vertex_Input(r700_AssemblerBase       *pAsm, 
+			      struct gl_vertex_program *mesa_vp,
+			      unsigned int unStart);
+GLboolean Process_Vertex_Program_Vfetch_Instructions(
+	struct r700_vertex_program *vp,
+	struct gl_vertex_program   *mesa_vp);
+GLboolean Process_Vertex_Program_Vfetch_Instructions2(
+    GLcontext *ctx,
+	struct r700_vertex_program *vp,
+	struct gl_vertex_program   *mesa_vp);
+void Map_Vertex_Program(GLcontext *ctx,
+            struct r700_vertex_program *vp,
+			struct gl_vertex_program   *mesa_vp);
+GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp,
+					   struct gl_vertex_program   *mesa_vp);
+
+struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
+						      struct gl_vertex_program   *mesa_vp);
+
+/* Interface */
+extern void r700SelectVertexShader(GLcontext *ctx);
+extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count);
+
+extern GLboolean r700SetupVertexProgram(GLcontext * ctx);
+
+extern void *    r700GetActiveVpShaderBo(GLcontext * ctx);
+
+extern int getTypeSize(GLenum type);
+
+#endif /* _R700_VERTPROG_H_ */
diff --git a/src/mesa/drivers/dri/r600/radeon_bo.c b/src/mesa/drivers/dri/r600/radeon_bo.c
new file mode 120000
index 0000000000..9448ffee54
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo.c
@@ -0,0 +1 @@
+../radeon/radeon_bo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h b/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h
new file mode 120000
index 0000000000..029450928b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_int_drm.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bo_legacy.c b/src/mesa/drivers/dri/r600/radeon_bo_legacy.c
new file mode 120000
index 0000000000..79ad050e6b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bo_legacy.h b/src/mesa/drivers/dri/r600/radeon_bo_legacy.h
new file mode 120000
index 0000000000..83b0f7ffab
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h
new file mode 120000
index 0000000000..ca894b2443
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.c b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c
new file mode 120000
index 0000000000..f6a5f66470
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.h b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h
new file mode 120000
index 0000000000..2f134fd17b
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_chipset.h b/src/mesa/drivers/dri/r600/radeon_chipset.h
new file mode 120000
index 0000000000..eba99001ff
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cmdbuf.h b/src/mesa/drivers/dri/r600/radeon_cmdbuf.h
new file mode 120000
index 0000000000..a799e1dc6d
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common.c b/src/mesa/drivers/dri/r600/radeon_common.c
new file mode 120000
index 0000000000..67b19ba940
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common.h b/src/mesa/drivers/dri/r600/radeon_common.h
new file mode 120000
index 0000000000..5bcb696a9f
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common_context.c b/src/mesa/drivers/dri/r600/radeon_common_context.c
new file mode 120000
index 0000000000..86800f3819
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_common_context.h b/src/mesa/drivers/dri/r600/radeon_common_context.h
new file mode 120000
index 0000000000..4d66312550
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs.c b/src/mesa/drivers/dri/r600/radeon_cs.c
new file mode 120000
index 0000000000..66b7ad1eb0
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs.c
@@ -0,0 +1 @@
+../radeon/radeon_cs.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h b/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h
new file mode 120000
index 0000000000..462f5245d0
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_int_drm.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_legacy.c b/src/mesa/drivers/dri/r600/radeon_cs_legacy.c
new file mode 120000
index 0000000000..006720f8a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_legacy.h b/src/mesa/drivers/dri/r600/radeon_cs_legacy.h
new file mode 120000
index 0000000000..a5f95e0a3d
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r600/radeon_cs_space_drm.c
new file mode 120000
index 0000000000..c248ea7d1a
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_debug.c b/src/mesa/drivers/dri/r600/radeon_debug.c
new file mode 120000
index 0000000000..c98c2e074c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_debug.c
@@ -0,0 +1 @@
+../radeon/radeon_debug.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_debug.h b/src/mesa/drivers/dri/r600/radeon_debug.h
new file mode 120000
index 0000000000..bd8aa28e89
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_debug.h
@@ -0,0 +1 @@
+../radeon/radeon_debug.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_dma.c b/src/mesa/drivers/dri/r600/radeon_dma.c
new file mode 120000
index 0000000000..43be000625
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_dma.h b/src/mesa/drivers/dri/r600/radeon_dma.h
new file mode 120000
index 0000000000..82e50634e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_fbo.c b/src/mesa/drivers/dri/r600/radeon_fbo.c
new file mode 120000
index 0000000000..0d738d8d78
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_lock.c b/src/mesa/drivers/dri/r600/radeon_lock.c
new file mode 120000
index 0000000000..af4108a8e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_lock.h b/src/mesa/drivers/dri/r600/radeon_lock.h
new file mode 120000
index 0000000000..64bdf94ee7
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.c
new file mode 120000
index 0000000000..31c0cfbe94
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.h
new file mode 120000
index 0000000000..254d50cf8c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_pixel_read.c b/src/mesa/drivers/dri/r600/radeon_pixel_read.c
new file mode 120000
index 0000000000..3b03803126
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_queryobj.c b/src/mesa/drivers/dri/r600/radeon_queryobj.c
new file mode 120000
index 0000000000..1d6ebc1c48
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_queryobj.c
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_queryobj.h b/src/mesa/drivers/dri/r600/radeon_queryobj.h
new file mode 120000
index 0000000000..8f6f842b0a
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_queryobj.h
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_screen.c b/src/mesa/drivers/dri/r600/radeon_screen.c
new file mode 120000
index 0000000000..86161118dd
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_screen.h b/src/mesa/drivers/dri/r600/radeon_screen.h
new file mode 120000
index 0000000000..23bb6bd459
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_span.c b/src/mesa/drivers/dri/r600/radeon_span.c
new file mode 120000
index 0000000000..232868c4c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_span.h b/src/mesa/drivers/dri/r600/radeon_span.h
new file mode 120000
index 0000000000..f9d634508c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_tex_copy.c b/src/mesa/drivers/dri/r600/radeon_tex_copy.c
new file mode 120000
index 0000000000..dfa5ba34e6
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_tex_copy.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_copy.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_tex_getimage.c b/src/mesa/drivers/dri/r600/radeon_tex_getimage.c
new file mode 120000
index 0000000000..d9836d7326
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_tex_getimage.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_getimage.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_texture.c b/src/mesa/drivers/dri/r600/radeon_texture.c
new file mode 120000
index 0000000000..a822710915
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_texture.h b/src/mesa/drivers/dri/r600/radeon_texture.h
new file mode 120000
index 0000000000..17fac3d5ea
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_tile.c b/src/mesa/drivers/dri/r600/radeon_tile.c
new file mode 120000
index 0000000000..d4bfe27da6
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_tile.c
@@ -0,0 +1 @@
+../radeon/radeon_tile.c
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/radeon_tile.h b/src/mesa/drivers/dri/r600/radeon_tile.h
new file mode 120000
index 0000000000..31074c581e
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/radeon_tile.h
@@ -0,0 +1 @@
+../radeon/radeon_tile.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon.h b/src/mesa/drivers/dri/r600/server/radeon.h
new file mode 120000
index 0000000000..81274a54f1
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_dri.h b/src/mesa/drivers/dri/r600/server/radeon_dri.h
new file mode 120000
index 0000000000..27c591d3c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_macros.h b/src/mesa/drivers/dri/r600/server/radeon_macros.h
new file mode 120000
index 0000000000..c56cd735b8
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/server/radeon_reg.h b/src/mesa/drivers/dri/r600/server/radeon_reg.h
new file mode 120000
index 0000000000..e2349dcb68
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h
+\ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/sq_micro_reg.h b/src/mesa/drivers/dri/r600/sq_micro_reg.h
new file mode 100644
index 0000000000..bfd21cef62
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/sq_micro_reg.h
@@ -0,0 +1,2008 @@
+/*
+ * Copyright (C) 2008-2009  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Contacts:
+ *   Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#if !defined (_SQ_MICRO_REG_H)
+#define _SQ_MICRO_REG_H
+
+#if defined(LITTLEENDIAN_CPU)
+#elif defined(BIGENDIAN_CPU)
+#else
+#error "BIGENDIAN_CPU or LITTLEENDIAN_CPU must be defined"
+#endif
+
+/*
+ * SQ_ALU_SRC_GPR_BASE value
+ */
+
+#define SQ_ALU_SRC_GPR_BASE            0x00000000
+
+/*
+ * SQ_ALU_SRC_GPR_SIZE value
+ */
+
+#define SQ_ALU_SRC_GPR_SIZE            0x00000080
+
+/*
+ * SQ_ALU_SRC_KCACHE0_BASE value
+ */
+
+#define SQ_ALU_SRC_KCACHE0_BASE        0x00000080
+
+/*
+ * SQ_ALU_SRC_KCACHE0_SIZE value
+ */
+
+#define SQ_ALU_SRC_KCACHE0_SIZE        0x00000020
+
+/*
+ * SQ_ALU_SRC_KCACHE1_BASE value
+ */
+
+#define SQ_ALU_SRC_KCACHE1_BASE        0x000000a0
+
+/*
+ * SQ_ALU_SRC_KCACHE1_SIZE value
+ */
+
+#define SQ_ALU_SRC_KCACHE1_SIZE        0x00000020
+
+/*
+ * SQ_ALU_SRC_CFILE_BASE value
+ */
+
+#define SQ_ALU_SRC_CFILE_BASE          0x00000100
+
+/*
+ * SQ_ALU_SRC_CFILE_SIZE value
+ */
+
+#define SQ_ALU_SRC_CFILE_SIZE          0x00000100
+
+/*
+ * SQ_SP_OP_REDUC_BEGIN value
+ */
+
+#define SQ_SP_OP_REDUC_BEGIN           0x00000050
+
+/*
+ * SQ_SP_OP_REDUC_END value
+ */
+
+#define SQ_SP_OP_REDUC_END             0x00000053
+
+/*
+ * SQ_SP_OP_TRANS_BEGIN value
+ */
+
+#define SQ_SP_OP_TRANS_BEGIN           0x00000060
+
+/*
+ * SQ_SP_OP_TRANS_END value
+ */
+
+#define SQ_SP_OP_TRANS_END             0x0000007f
+
+/*
+ * SQ_CF_WORD0 struct
+ */
+
+#define SQ_CF_WORD0_ADDR_SIZE          32
+
+#define SQ_CF_WORD0_ADDR_SHIFT         0
+
+#define SQ_CF_WORD0_ADDR_MASK          0xffffffff
+
+#define SQ_CF_WORD0_MASK \
+     (SQ_CF_WORD0_ADDR_MASK)
+
+#define SQ_CF_WORD0_DEFAULT            0xcdcdcdcd
+
+#define SQ_CF_WORD0_GET_ADDR(sq_cf_word0) \
+     ((sq_cf_word0 & SQ_CF_WORD0_ADDR_MASK) >> SQ_CF_WORD0_ADDR_SHIFT)
+
+#define SQ_CF_WORD0_SET_ADDR(sq_cf_word0_reg, addr) \
+     sq_cf_word0_reg = (sq_cf_word0_reg & ~SQ_CF_WORD0_ADDR_MASK) | (addr << SQ_CF_WORD0_ADDR_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_word0_t {
+          unsigned int addr                           : SQ_CF_WORD0_ADDR_SIZE;
+     } sq_cf_word0_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_word0_t {
+          unsigned int addr                           : SQ_CF_WORD0_ADDR_SIZE;
+     } sq_cf_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_word0_t f;
+} sq_cf_word0_u;
+
+
+/*
+ * SQ_CF_WORD1 struct
+ */
+
+#define SQ_CF_WORD1_POP_COUNT_SIZE     3
+#define SQ_CF_WORD1_CF_CONST_SIZE      5
+#define SQ_CF_WORD1_COND_SIZE          2
+#define SQ_CF_WORD1_COUNT_SIZE         3
+#define SQ_CF_WORD1_CALL_COUNT_SIZE    6
+#define SQ_CF_WORD1_COUNT_3_SIZE       1
+#define SQ_CF_WORD1_END_OF_PROGRAM_SIZE 1
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE 1
+#define SQ_CF_WORD1_CF_INST_SIZE       7
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_WORD1_BARRIER_SIZE       1
+
+#define SQ_CF_WORD1_POP_COUNT_SHIFT    0
+#define SQ_CF_WORD1_CF_CONST_SHIFT     3
+#define SQ_CF_WORD1_COND_SHIFT         8
+#define SQ_CF_WORD1_COUNT_SHIFT        10
+#define SQ_CF_WORD1_CALL_COUNT_SHIFT   13
+#define SQ_CF_WORD1_COUNT_3_SHIFT      19
+#define SQ_CF_WORD1_END_OF_PROGRAM_SHIFT 21
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT 22
+#define SQ_CF_WORD1_CF_INST_SHIFT      23
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_WORD1_BARRIER_SHIFT      31
+
+#define SQ_CF_WORD1_POP_COUNT_MASK     0x00000007
+#define SQ_CF_WORD1_CF_CONST_MASK      0x000000f8
+#define SQ_CF_WORD1_COND_MASK          0x00000300
+#define SQ_CF_WORD1_COUNT_MASK         0x00001c00
+#define SQ_CF_WORD1_CALL_COUNT_MASK    0x0007e000
+#define SQ_CF_WORD1_COUNT_3_MASK       0x00080000
+#define SQ_CF_WORD1_END_OF_PROGRAM_MASK 0x00200000
+#define SQ_CF_WORD1_VALID_PIXEL_MODE_MASK 0x00400000
+#define SQ_CF_WORD1_CF_INST_MASK       0x3f800000
+#define SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_WORD1_BARRIER_MASK       0x80000000
+
+#define SQ_CF_WORD1_MASK \
+     (SQ_CF_WORD1_POP_COUNT_MASK | \
+      SQ_CF_WORD1_CF_CONST_MASK | \
+      SQ_CF_WORD1_COND_MASK | \
+      SQ_CF_WORD1_COUNT_MASK | \
+      SQ_CF_WORD1_CALL_COUNT_MASK | \
+      SQ_CF_WORD1_COUNT_3_MASK | \
+      SQ_CF_WORD1_END_OF_PROGRAM_MASK | \
+      SQ_CF_WORD1_VALID_PIXEL_MODE_MASK | \
+      SQ_CF_WORD1_CF_INST_MASK | \
+      SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK | \
+      SQ_CF_WORD1_BARRIER_MASK)
+
+#define SQ_CF_WORD1_DEFAULT            0xcdcdcdcd
+
+#define SQ_CF_WORD1_GET_POP_COUNT(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_POP_COUNT_MASK) >> SQ_CF_WORD1_POP_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_CF_CONST(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_CF_CONST_MASK) >> SQ_CF_WORD1_CF_CONST_SHIFT)
+#define SQ_CF_WORD1_GET_COND(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_COND_MASK) >> SQ_CF_WORD1_COND_SHIFT)
+#define SQ_CF_WORD1_GET_COUNT(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_COUNT_MASK) >> SQ_CF_WORD1_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_CALL_COUNT(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_CALL_COUNT_MASK) >> SQ_CF_WORD1_CALL_COUNT_SHIFT)
+#define SQ_CF_WORD1_GET_COUNT_3(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_COUNT_3_MASK) >> SQ_CF_WORD1_COUNT_3_SHIFT)
+#define SQ_CF_WORD1_GET_END_OF_PROGRAM(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_WORD1_GET_VALID_PIXEL_MODE(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_WORD1_GET_CF_INST(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_CF_INST_MASK) >> SQ_CF_WORD1_CF_INST_SHIFT)
+#define SQ_CF_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_WORD1_GET_BARRIER(sq_cf_word1) \
+     ((sq_cf_word1 & SQ_CF_WORD1_BARRIER_MASK) >> SQ_CF_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_WORD1_SET_POP_COUNT(sq_cf_word1_reg, pop_count) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_POP_COUNT_MASK) | (pop_count << SQ_CF_WORD1_POP_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_CF_CONST(sq_cf_word1_reg, cf_const) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_CONST_MASK) | (cf_const << SQ_CF_WORD1_CF_CONST_SHIFT)
+#define SQ_CF_WORD1_SET_COND(sq_cf_word1_reg, cond) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COND_MASK) | (cond << SQ_CF_WORD1_COND_SHIFT)
+#define SQ_CF_WORD1_SET_COUNT(sq_cf_word1_reg, count) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_MASK) | (count << SQ_CF_WORD1_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_CALL_COUNT(sq_cf_word1_reg, call_count) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CALL_COUNT_MASK) | (call_count << SQ_CF_WORD1_CALL_COUNT_SHIFT)
+#define SQ_CF_WORD1_SET_COUNT_3(sq_cf_word1_reg, count_3) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_COUNT_3_MASK) | (count_3 << SQ_CF_WORD1_COUNT_3_SHIFT)
+#define SQ_CF_WORD1_SET_END_OF_PROGRAM(sq_cf_word1_reg, end_of_program) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_WORD1_SET_VALID_PIXEL_MODE(sq_cf_word1_reg, valid_pixel_mode) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_WORD1_SET_CF_INST(sq_cf_word1_reg, cf_inst) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_WORD1_CF_INST_SHIFT)
+#define SQ_CF_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_word1_reg, whole_quad_mode) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_WORD1_SET_BARRIER(sq_cf_word1_reg, barrier) \
+     sq_cf_word1_reg = (sq_cf_word1_reg & ~SQ_CF_WORD1_BARRIER_MASK) | (barrier << SQ_CF_WORD1_BARRIER_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_word1_t {
+          unsigned int pop_count                      : SQ_CF_WORD1_POP_COUNT_SIZE;
+          unsigned int cf_const                       : SQ_CF_WORD1_CF_CONST_SIZE;
+          unsigned int cond                           : SQ_CF_WORD1_COND_SIZE;
+          unsigned int count                          : SQ_CF_WORD1_COUNT_SIZE;
+          unsigned int call_count                     : SQ_CF_WORD1_CALL_COUNT_SIZE;
+          unsigned int count_3                        : SQ_CF_WORD1_COUNT_3_SIZE;
+          unsigned int                                : 1;
+          unsigned int end_of_program                 : SQ_CF_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_WORD1_CF_INST_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int barrier                        : SQ_CF_WORD1_BARRIER_SIZE;
+     } sq_cf_word1_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_word1_t {
+          unsigned int barrier                        : SQ_CF_WORD1_BARRIER_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_WORD1_CF_INST_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int end_of_program                 : SQ_CF_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int                                : 1;
+          unsigned int count_3                        : SQ_CF_WORD1_COUNT_3_SIZE;
+          unsigned int call_count                     : SQ_CF_WORD1_CALL_COUNT_SIZE;
+          unsigned int count                          : SQ_CF_WORD1_COUNT_SIZE;
+          unsigned int cond                           : SQ_CF_WORD1_COND_SIZE;
+          unsigned int cf_const                       : SQ_CF_WORD1_CF_CONST_SIZE;
+          unsigned int pop_count                      : SQ_CF_WORD1_POP_COUNT_SIZE;
+     } sq_cf_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_word1_t f;
+} sq_cf_word1_u;
+
+
+/*
+ * SQ_CF_ALU_WORD0 struct
+ */
+
+#define SQ_CF_ALU_WORD0_ADDR_SIZE      22
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE 4
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE 4
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE 2
+
+#define SQ_CF_ALU_WORD0_ADDR_SHIFT     0
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT 22
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT 26
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT 30
+
+#define SQ_CF_ALU_WORD0_ADDR_MASK      0x003fffff
+#define SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK 0x03c00000
+#define SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK 0x3c000000
+#define SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK 0xc0000000
+
+#define SQ_CF_ALU_WORD0_MASK \
+     (SQ_CF_ALU_WORD0_ADDR_MASK | \
+      SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK | \
+      SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK | \
+      SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK)
+
+#define SQ_CF_ALU_WORD0_DEFAULT        0xcdcdcdcd
+
+#define SQ_CF_ALU_WORD0_GET_ADDR(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_ADDR_MASK) >> SQ_CF_ALU_WORD0_ADDR_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK0(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_BANK1(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) >> SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT)
+#define SQ_CF_ALU_WORD0_GET_KCACHE_MODE0(sq_cf_alu_word0) \
+     ((sq_cf_alu_word0 & SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) >> SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT)
+
+#define SQ_CF_ALU_WORD0_SET_ADDR(sq_cf_alu_word0_reg, addr) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_ADDR_MASK) | (addr << SQ_CF_ALU_WORD0_ADDR_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK0(sq_cf_alu_word0_reg, kcache_bank0) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK0_MASK) | (kcache_bank0 << SQ_CF_ALU_WORD0_KCACHE_BANK0_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_BANK1(sq_cf_alu_word0_reg, kcache_bank1) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_BANK1_MASK) | (kcache_bank1 << SQ_CF_ALU_WORD0_KCACHE_BANK1_SHIFT)
+#define SQ_CF_ALU_WORD0_SET_KCACHE_MODE0(sq_cf_alu_word0_reg, kcache_mode0) \
+     sq_cf_alu_word0_reg = (sq_cf_alu_word0_reg & ~SQ_CF_ALU_WORD0_KCACHE_MODE0_MASK) | (kcache_mode0 << SQ_CF_ALU_WORD0_KCACHE_MODE0_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word0_t {
+          unsigned int addr                           : SQ_CF_ALU_WORD0_ADDR_SIZE;
+          unsigned int kcache_bank0                   : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE;
+          unsigned int kcache_bank1                   : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE;
+          unsigned int kcache_mode0                   : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE;
+     } sq_cf_alu_word0_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word0_t {
+          unsigned int kcache_mode0                   : SQ_CF_ALU_WORD0_KCACHE_MODE0_SIZE;
+          unsigned int kcache_bank1                   : SQ_CF_ALU_WORD0_KCACHE_BANK1_SIZE;
+          unsigned int kcache_bank0                   : SQ_CF_ALU_WORD0_KCACHE_BANK0_SIZE;
+          unsigned int addr                           : SQ_CF_ALU_WORD0_ADDR_SIZE;
+     } sq_cf_alu_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alu_word0_t f;
+} sq_cf_alu_word0_u;
+
+
+/*
+ * SQ_CF_ALU_WORD1 struct
+ */
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE 2
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE 8
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE 8
+#define SQ_CF_ALU_WORD1_COUNT_SIZE     7
+#define SQ_CF_ALU_WORD1_ALT_CONST_SIZE 1
+#define SQ_CF_ALU_WORD1_CF_INST_SIZE   4
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_ALU_WORD1_BARRIER_SIZE   1
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT 0
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT 2
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT 10
+#define SQ_CF_ALU_WORD1_COUNT_SHIFT    18
+#define SQ_CF_ALU_WORD1_ALT_CONST_SHIFT 25
+#define SQ_CF_ALU_WORD1_CF_INST_SHIFT  26
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_ALU_WORD1_BARRIER_SHIFT  31
+
+#define SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK 0x00000003
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK 0x000003fc
+#define SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK 0x0003fc00
+#define SQ_CF_ALU_WORD1_COUNT_MASK     0x01fc0000
+#define SQ_CF_ALU_WORD1_ALT_CONST_MASK 0x02000000
+#define SQ_CF_ALU_WORD1_CF_INST_MASK   0x3c000000
+#define SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_ALU_WORD1_BARRIER_MASK   0x80000000
+
+#define SQ_CF_ALU_WORD1_MASK \
+     (SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK | \
+      SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK | \
+      SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK | \
+      SQ_CF_ALU_WORD1_COUNT_MASK | \
+      SQ_CF_ALU_WORD1_ALT_CONST_MASK | \
+      SQ_CF_ALU_WORD1_CF_INST_MASK | \
+      SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK | \
+      SQ_CF_ALU_WORD1_BARRIER_MASK)
+
+#define SQ_CF_ALU_WORD1_DEFAULT        0xcdcdcdcd
+
+#define SQ_CF_ALU_WORD1_GET_KCACHE_MODE1(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR0(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_KCACHE_ADDR1(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) >> SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_COUNT(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_COUNT_MASK) >> SQ_CF_ALU_WORD1_COUNT_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_ALT_CONST(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_ALT_CONST_MASK) >> SQ_CF_ALU_WORD1_ALT_CONST_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_CF_INST(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_CF_INST_MASK) >> SQ_CF_ALU_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALU_WORD1_GET_BARRIER(sq_cf_alu_word1) \
+     ((sq_cf_alu_word1 & SQ_CF_ALU_WORD1_BARRIER_MASK) >> SQ_CF_ALU_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_ALU_WORD1_SET_KCACHE_MODE1(sq_cf_alu_word1_reg, kcache_mode1) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_MODE1_MASK) | (kcache_mode1 << SQ_CF_ALU_WORD1_KCACHE_MODE1_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR0(sq_cf_alu_word1_reg, kcache_addr0) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR0_MASK) | (kcache_addr0 << SQ_CF_ALU_WORD1_KCACHE_ADDR0_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_KCACHE_ADDR1(sq_cf_alu_word1_reg, kcache_addr1) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_KCACHE_ADDR1_MASK) | (kcache_addr1 << SQ_CF_ALU_WORD1_KCACHE_ADDR1_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_COUNT(sq_cf_alu_word1_reg, count) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_COUNT_MASK) | (count << SQ_CF_ALU_WORD1_COUNT_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_ALT_CONST(sq_cf_alu_word1_reg, alt_const) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_ALT_CONST_MASK) | (alt_const << SQ_CF_ALU_WORD1_ALT_CONST_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_CF_INST(sq_cf_alu_word1_reg, cf_inst) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALU_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alu_word1_reg, whole_quad_mode) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALU_WORD1_SET_BARRIER(sq_cf_alu_word1_reg, barrier) \
+     sq_cf_alu_word1_reg = (sq_cf_alu_word1_reg & ~SQ_CF_ALU_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALU_WORD1_BARRIER_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word1_t {
+          unsigned int kcache_mode1                   : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE;
+          unsigned int kcache_addr0                   : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE;
+          unsigned int kcache_addr1                   : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE;
+          unsigned int count                          : SQ_CF_ALU_WORD1_COUNT_SIZE;
+          unsigned int alt_const                      : SQ_CF_ALU_WORD1_ALT_CONST_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALU_WORD1_CF_INST_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int barrier                        : SQ_CF_ALU_WORD1_BARRIER_SIZE;
+     } sq_cf_alu_word1_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alu_word1_t {
+          unsigned int barrier                        : SQ_CF_ALU_WORD1_BARRIER_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALU_WORD1_CF_INST_SIZE;
+          unsigned int alt_const                      : SQ_CF_ALU_WORD1_ALT_CONST_SIZE;
+          unsigned int count                          : SQ_CF_ALU_WORD1_COUNT_SIZE;
+          unsigned int kcache_addr1                   : SQ_CF_ALU_WORD1_KCACHE_ADDR1_SIZE;
+          unsigned int kcache_addr0                   : SQ_CF_ALU_WORD1_KCACHE_ADDR0_SIZE;
+          unsigned int kcache_mode1                   : SQ_CF_ALU_WORD1_KCACHE_MODE1_SIZE;
+     } sq_cf_alu_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alu_word1_t f;
+} sq_cf_alu_word1_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD0 struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE 13
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE 2
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE 2
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT 13
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT 15
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT 22
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT 23
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT 30
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK 0x00001fff
+#define SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK 0x00006000
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK 0x003f8000
+#define SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK 0x00400000
+#define SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK 0x3f800000
+#define SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK 0xc0000000
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_DEFAULT 0xcdcdcdcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ARRAY_BASE(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_TYPE(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_GPR(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_RW_REL(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_INDEX_GPR(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_GET_ELEM_SIZE(sq_cf_alloc_export_word0) \
+     ((sq_cf_alloc_export_word0 & SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ARRAY_BASE(sq_cf_alloc_export_word0_reg, array_base) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_MASK) | (array_base << SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_TYPE(sq_cf_alloc_export_word0_reg, type) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_TYPE_MASK) | (type << SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_GPR(sq_cf_alloc_export_word0_reg, rw_gpr) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_MASK) | (rw_gpr << SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_RW_REL(sq_cf_alloc_export_word0_reg, rw_rel) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_MASK) | (rw_rel << SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_INDEX_GPR(sq_cf_alloc_export_word0_reg, index_gpr) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_MASK) | (index_gpr << SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD0_SET_ELEM_SIZE(sq_cf_alloc_export_word0_reg, elem_size) \
+     sq_cf_alloc_export_word0_reg = (sq_cf_alloc_export_word0_reg & ~SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_MASK) | (elem_size << SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word0_t {
+          unsigned int array_base                     : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE;
+          unsigned int type                           : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE;
+          unsigned int rw_gpr                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE;
+          unsigned int rw_rel                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE;
+          unsigned int index_gpr                      : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE;
+          unsigned int elem_size                      : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE;
+     } sq_cf_alloc_export_word0_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word0_t {
+          unsigned int elem_size                      : SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE_SIZE;
+          unsigned int index_gpr                      : SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR_SIZE;
+          unsigned int rw_rel                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_REL_SIZE;
+          unsigned int rw_gpr                         : SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR_SIZE;
+          unsigned int type                           : SQ_CF_ALLOC_EXPORT_WORD0_TYPE_SIZE;
+          unsigned int array_base                     : SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE_SIZE;
+     } sq_cf_alloc_export_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word0_t f;
+} sq_cf_alloc_export_word0_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1 struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE 4
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE 7
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE 1
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE 1
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT 17
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT 21
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT 22
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT 23
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT 30
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT 31
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK 0x001e0000
+#define SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK 0x00200000
+#define SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK 0x00400000
+#define SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK 0x3f800000
+#define SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK 0x40000000
+#define SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK 0x80000000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_DEFAULT 0xcdcc0000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BURST_COUNT(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_END_OF_PROGRAM(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_CF_INST(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_GET_BARRIER(sq_cf_alloc_export_word1) \
+     ((sq_cf_alloc_export_word1 & SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BURST_COUNT(sq_cf_alloc_export_word1_reg, burst_count) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_MASK) | (burst_count << SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_END_OF_PROGRAM(sq_cf_alloc_export_word1_reg, end_of_program) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_MASK) | (end_of_program << SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_VALID_PIXEL_MODE(sq_cf_alloc_export_word1_reg, valid_pixel_mode) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_MASK) | (valid_pixel_mode << SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_CF_INST(sq_cf_alloc_export_word1_reg, cf_inst) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_MASK) | (cf_inst << SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_WHOLE_QUAD_MODE(sq_cf_alloc_export_word1_reg, whole_quad_mode) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_MASK) | (whole_quad_mode << SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SET_BARRIER(sq_cf_alloc_export_word1_reg, barrier) \
+     sq_cf_alloc_export_word1_reg = (sq_cf_alloc_export_word1_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_MASK) | (barrier << SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_t {
+          unsigned int                                : 17;
+          unsigned int burst_count                    : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE;
+          unsigned int end_of_program                 : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int barrier                        : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE;
+     } sq_cf_alloc_export_word1_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_t {
+          unsigned int barrier                        : SQ_CF_ALLOC_EXPORT_WORD1_BARRIER_SIZE;
+          unsigned int whole_quad_mode                : SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE_SIZE;
+          unsigned int cf_inst                        : SQ_CF_ALLOC_EXPORT_WORD1_CF_INST_SIZE;
+          unsigned int valid_pixel_mode               : SQ_CF_ALLOC_EXPORT_WORD1_VALID_PIXEL_MODE_SIZE;
+          unsigned int end_of_program                 : SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM_SIZE;
+          unsigned int burst_count                    : SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT_SIZE;
+          unsigned int                                : 17;
+     } sq_cf_alloc_export_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word1_t f;
+} sq_cf_alloc_export_word1_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1_BUF struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE 12
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE 4
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT 12
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK 0x00000fff
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK 0x0000f000
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_DEFAULT 0x0000cdcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf) \
+     ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_GET_COMP_MASK(sq_cf_alloc_export_word1_buf) \
+     ((sq_cf_alloc_export_word1_buf & SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_ARRAY_SIZE(sq_cf_alloc_export_word1_buf_reg, array_size) \
+     sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_MASK) | (array_size << SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_BUF_SET_COMP_MASK(sq_cf_alloc_export_word1_buf_reg, comp_mask) \
+     sq_cf_alloc_export_word1_buf_reg = (sq_cf_alloc_export_word1_buf_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_MASK) | (comp_mask << SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_buf_t {
+          unsigned int array_size                     : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE;
+          unsigned int comp_mask                      : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE;
+          unsigned int                                : 16;
+     } sq_cf_alloc_export_word1_buf_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_buf_t {
+          unsigned int                                : 16;
+          unsigned int comp_mask                      : SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK_SIZE;
+          unsigned int array_size                     : SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE_SIZE;
+     } sq_cf_alloc_export_word1_buf_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word1_buf_t f;
+} sq_cf_alloc_export_word1_buf_u;
+
+
+/*
+ * SQ_CF_ALLOC_EXPORT_WORD1_SWIZ struct
+ */
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE 3
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT 0
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT 3
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT 6
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT 9
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK 0x00000007
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK 0x00000038
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK 0x000001c0
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK 0x00000e00
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_MASK \
+     (SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK | \
+      SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_DEFAULT 0x00000dcd
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_X(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Y(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_Z(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_GET_SEL_W(sq_cf_alloc_export_word1_swiz) \
+     ((sq_cf_alloc_export_word1_swiz & SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) >> SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT)
+
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_X(sq_cf_alloc_export_word1_swiz_reg, sel_x) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_MASK) | (sel_x << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Y(sq_cf_alloc_export_word1_swiz_reg, sel_y) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_MASK) | (sel_y << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_Z(sq_cf_alloc_export_word1_swiz_reg, sel_z) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_MASK) | (sel_z << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SHIFT)
+#define SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SET_SEL_W(sq_cf_alloc_export_word1_swiz_reg, sel_w) \
+     sq_cf_alloc_export_word1_swiz_reg = (sq_cf_alloc_export_word1_swiz_reg & ~SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_MASK) | (sel_w << SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_swiz_t {
+          unsigned int sel_x                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE;
+          unsigned int sel_y                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE;
+          unsigned int sel_z                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE;
+          unsigned int sel_w                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE;
+          unsigned int                                : 20;
+     } sq_cf_alloc_export_word1_swiz_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_cf_alloc_export_word1_swiz_t {
+          unsigned int                                : 20;
+          unsigned int sel_w                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W_SIZE;
+          unsigned int sel_z                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z_SIZE;
+          unsigned int sel_y                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y_SIZE;
+          unsigned int sel_x                          : SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X_SIZE;
+     } sq_cf_alloc_export_word1_swiz_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_cf_alloc_export_word1_swiz_t f;
+} sq_cf_alloc_export_word1_swiz_u;
+
+
+/*
+ * SQ_ALU_WORD0 struct
+ */
+
+#define SQ_ALU_WORD0_SRC0_SEL_SIZE     9
+#define SQ_ALU_WORD0_SRC0_REL_SIZE     1
+#define SQ_ALU_WORD0_SRC0_CHAN_SIZE    2
+#define SQ_ALU_WORD0_SRC0_NEG_SIZE     1
+#define SQ_ALU_WORD0_SRC1_SEL_SIZE     9
+#define SQ_ALU_WORD0_SRC1_REL_SIZE     1
+#define SQ_ALU_WORD0_SRC1_CHAN_SIZE    2
+#define SQ_ALU_WORD0_SRC1_NEG_SIZE     1
+#define SQ_ALU_WORD0_INDEX_MODE_SIZE   3
+#define SQ_ALU_WORD0_PRED_SEL_SIZE     2
+#define SQ_ALU_WORD0_LAST_SIZE         1
+
+#define SQ_ALU_WORD0_SRC0_SEL_SHIFT    0
+#define SQ_ALU_WORD0_SRC0_REL_SHIFT    9
+#define SQ_ALU_WORD0_SRC0_CHAN_SHIFT   10
+#define SQ_ALU_WORD0_SRC0_NEG_SHIFT    12
+#define SQ_ALU_WORD0_SRC1_SEL_SHIFT    13
+#define SQ_ALU_WORD0_SRC1_REL_SHIFT    22
+#define SQ_ALU_WORD0_SRC1_CHAN_SHIFT   23
+#define SQ_ALU_WORD0_SRC1_NEG_SHIFT    25
+#define SQ_ALU_WORD0_INDEX_MODE_SHIFT  26
+#define SQ_ALU_WORD0_PRED_SEL_SHIFT    29
+#define SQ_ALU_WORD0_LAST_SHIFT        31
+
+#define SQ_ALU_WORD0_SRC0_SEL_MASK     0x000001ff
+#define SQ_ALU_WORD0_SRC0_REL_MASK     0x00000200
+#define SQ_ALU_WORD0_SRC0_CHAN_MASK    0x00000c00
+#define SQ_ALU_WORD0_SRC0_NEG_MASK     0x00001000
+#define SQ_ALU_WORD0_SRC1_SEL_MASK     0x003fe000
+#define SQ_ALU_WORD0_SRC1_REL_MASK     0x00400000
+#define SQ_ALU_WORD0_SRC1_CHAN_MASK    0x01800000
+#define SQ_ALU_WORD0_SRC1_NEG_MASK     0x02000000
+#define SQ_ALU_WORD0_INDEX_MODE_MASK   0x1c000000
+#define SQ_ALU_WORD0_PRED_SEL_MASK     0x60000000
+#define SQ_ALU_WORD0_LAST_MASK         0x80000000
+
+#define SQ_ALU_WORD0_MASK \
+     (SQ_ALU_WORD0_SRC0_SEL_MASK | \
+      SQ_ALU_WORD0_SRC0_REL_MASK | \
+      SQ_ALU_WORD0_SRC0_CHAN_MASK | \
+      SQ_ALU_WORD0_SRC0_NEG_MASK | \
+      SQ_ALU_WORD0_SRC1_SEL_MASK | \
+      SQ_ALU_WORD0_SRC1_REL_MASK | \
+      SQ_ALU_WORD0_SRC1_CHAN_MASK | \
+      SQ_ALU_WORD0_SRC1_NEG_MASK | \
+      SQ_ALU_WORD0_INDEX_MODE_MASK | \
+      SQ_ALU_WORD0_PRED_SEL_MASK | \
+      SQ_ALU_WORD0_LAST_MASK)
+
+#define SQ_ALU_WORD0_DEFAULT           0xcdcdcdcd
+
+#define SQ_ALU_WORD0_GET_SRC0_SEL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_SEL_MASK) >> SQ_ALU_WORD0_SRC0_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_REL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_REL_MASK) >> SQ_ALU_WORD0_SRC0_REL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_CHAN(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_CHAN_MASK) >> SQ_ALU_WORD0_SRC0_CHAN_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC0_NEG(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC0_NEG_MASK) >> SQ_ALU_WORD0_SRC0_NEG_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_SEL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_SEL_MASK) >> SQ_ALU_WORD0_SRC1_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_REL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_REL_MASK) >> SQ_ALU_WORD0_SRC1_REL_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_CHAN(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_CHAN_MASK) >> SQ_ALU_WORD0_SRC1_CHAN_SHIFT)
+#define SQ_ALU_WORD0_GET_SRC1_NEG(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_SRC1_NEG_MASK) >> SQ_ALU_WORD0_SRC1_NEG_SHIFT)
+#define SQ_ALU_WORD0_GET_INDEX_MODE(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_INDEX_MODE_MASK) >> SQ_ALU_WORD0_INDEX_MODE_SHIFT)
+#define SQ_ALU_WORD0_GET_PRED_SEL(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_PRED_SEL_MASK) >> SQ_ALU_WORD0_PRED_SEL_SHIFT)
+#define SQ_ALU_WORD0_GET_LAST(sq_alu_word0) \
+     ((sq_alu_word0 & SQ_ALU_WORD0_LAST_MASK) >> SQ_ALU_WORD0_LAST_SHIFT)
+
+#define SQ_ALU_WORD0_SET_SRC0_SEL(sq_alu_word0_reg, src0_sel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_SEL_MASK) | (src0_sel << SQ_ALU_WORD0_SRC0_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_REL(sq_alu_word0_reg, src0_rel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_REL_MASK) | (src0_rel << SQ_ALU_WORD0_SRC0_REL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_CHAN(sq_alu_word0_reg, src0_chan) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_CHAN_MASK) | (src0_chan << SQ_ALU_WORD0_SRC0_CHAN_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC0_NEG(sq_alu_word0_reg, src0_neg) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC0_NEG_MASK) | (src0_neg << SQ_ALU_WORD0_SRC0_NEG_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_SEL(sq_alu_word0_reg, src1_sel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_SEL_MASK) | (src1_sel << SQ_ALU_WORD0_SRC1_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_REL(sq_alu_word0_reg, src1_rel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_REL_MASK) | (src1_rel << SQ_ALU_WORD0_SRC1_REL_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_CHAN(sq_alu_word0_reg, src1_chan) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_CHAN_MASK) | (src1_chan << SQ_ALU_WORD0_SRC1_CHAN_SHIFT)
+#define SQ_ALU_WORD0_SET_SRC1_NEG(sq_alu_word0_reg, src1_neg) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_SRC1_NEG_MASK) | (src1_neg << SQ_ALU_WORD0_SRC1_NEG_SHIFT)
+#define SQ_ALU_WORD0_SET_INDEX_MODE(sq_alu_word0_reg, index_mode) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_INDEX_MODE_MASK) | (index_mode << SQ_ALU_WORD0_INDEX_MODE_SHIFT)
+#define SQ_ALU_WORD0_SET_PRED_SEL(sq_alu_word0_reg, pred_sel) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_PRED_SEL_MASK) | (pred_sel << SQ_ALU_WORD0_PRED_SEL_SHIFT)
+#define SQ_ALU_WORD0_SET_LAST(sq_alu_word0_reg, last) \
+     sq_alu_word0_reg = (sq_alu_word0_reg & ~SQ_ALU_WORD0_LAST_MASK) | (last << SQ_ALU_WORD0_LAST_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word0_t {
+          unsigned int src0_sel                       : SQ_ALU_WORD0_SRC0_SEL_SIZE;
+          unsigned int src0_rel                       : SQ_ALU_WORD0_SRC0_REL_SIZE;
+          unsigned int src0_chan                      : SQ_ALU_WORD0_SRC0_CHAN_SIZE;
+          unsigned int src0_neg                       : SQ_ALU_WORD0_SRC0_NEG_SIZE;
+          unsigned int src1_sel                       : SQ_ALU_WORD0_SRC1_SEL_SIZE;
+          unsigned int src1_rel                       : SQ_ALU_WORD0_SRC1_REL_SIZE;
+          unsigned int src1_chan                      : SQ_ALU_WORD0_SRC1_CHAN_SIZE;
+          unsigned int src1_neg                       : SQ_ALU_WORD0_SRC1_NEG_SIZE;
+          unsigned int index_mode                     : SQ_ALU_WORD0_INDEX_MODE_SIZE;
+          unsigned int pred_sel                       : SQ_ALU_WORD0_PRED_SEL_SIZE;
+          unsigned int last                           : SQ_ALU_WORD0_LAST_SIZE;
+     } sq_alu_word0_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word0_t {
+          unsigned int last                           : SQ_ALU_WORD0_LAST_SIZE;
+          unsigned int pred_sel                       : SQ_ALU_WORD0_PRED_SEL_SIZE;
+          unsigned int index_mode                     : SQ_ALU_WORD0_INDEX_MODE_SIZE;
+          unsigned int src1_neg                       : SQ_ALU_WORD0_SRC1_NEG_SIZE;
+          unsigned int src1_chan                      : SQ_ALU_WORD0_SRC1_CHAN_SIZE;
+          unsigned int src1_rel                       : SQ_ALU_WORD0_SRC1_REL_SIZE;
+          unsigned int src1_sel                       : SQ_ALU_WORD0_SRC1_SEL_SIZE;
+          unsigned int src0_neg                       : SQ_ALU_WORD0_SRC0_NEG_SIZE;
+          unsigned int src0_chan                      : SQ_ALU_WORD0_SRC0_CHAN_SIZE;
+          unsigned int src0_rel                       : SQ_ALU_WORD0_SRC0_REL_SIZE;
+          unsigned int src0_sel                       : SQ_ALU_WORD0_SRC0_SEL_SIZE;
+     } sq_alu_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word0_t f;
+} sq_alu_word0_u;
+
+
+/*
+ * SQ_ALU_WORD1 struct
+ */
+
+#define SQ_ALU_WORD1_ENCODING_SIZE     3
+#define SQ_ALU_WORD1_BANK_SWIZZLE_SIZE 3
+#define SQ_ALU_WORD1_DST_GPR_SIZE      7
+#define SQ_ALU_WORD1_DST_REL_SIZE      1
+#define SQ_ALU_WORD1_DST_CHAN_SIZE     2
+#define SQ_ALU_WORD1_CLAMP_SIZE        1
+
+#define SQ_ALU_WORD1_ENCODING_SHIFT    15
+#define SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT 18
+#define SQ_ALU_WORD1_DST_GPR_SHIFT     21
+#define SQ_ALU_WORD1_DST_REL_SHIFT     28
+#define SQ_ALU_WORD1_DST_CHAN_SHIFT    29
+#define SQ_ALU_WORD1_CLAMP_SHIFT       31
+
+#define SQ_ALU_WORD1_ENCODING_MASK     0x00038000
+#define SQ_ALU_WORD1_BANK_SWIZZLE_MASK 0x001c0000
+#define SQ_ALU_WORD1_DST_GPR_MASK      0x0fe00000
+#define SQ_ALU_WORD1_DST_REL_MASK      0x10000000
+#define SQ_ALU_WORD1_DST_CHAN_MASK     0x60000000
+#define SQ_ALU_WORD1_CLAMP_MASK        0x80000000
+
+#define SQ_ALU_WORD1_MASK \
+     (SQ_ALU_WORD1_ENCODING_MASK | \
+      SQ_ALU_WORD1_BANK_SWIZZLE_MASK | \
+      SQ_ALU_WORD1_DST_GPR_MASK | \
+      SQ_ALU_WORD1_DST_REL_MASK | \
+      SQ_ALU_WORD1_DST_CHAN_MASK | \
+      SQ_ALU_WORD1_CLAMP_MASK)
+
+#define SQ_ALU_WORD1_DEFAULT           0xcdcd8000
+
+#define SQ_ALU_WORD1_GET_ENCODING(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_ENCODING_MASK) >> SQ_ALU_WORD1_ENCODING_SHIFT)
+#define SQ_ALU_WORD1_GET_BANK_SWIZZLE(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_BANK_SWIZZLE_MASK) >> SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_GPR(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_DST_GPR_MASK) >> SQ_ALU_WORD1_DST_GPR_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_REL(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_DST_REL_MASK) >> SQ_ALU_WORD1_DST_REL_SHIFT)
+#define SQ_ALU_WORD1_GET_DST_CHAN(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_DST_CHAN_MASK) >> SQ_ALU_WORD1_DST_CHAN_SHIFT)
+#define SQ_ALU_WORD1_GET_CLAMP(sq_alu_word1) \
+     ((sq_alu_word1 & SQ_ALU_WORD1_CLAMP_MASK) >> SQ_ALU_WORD1_CLAMP_SHIFT)
+
+#define SQ_ALU_WORD1_SET_ENCODING(sq_alu_word1_reg, encoding) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_ENCODING_MASK) | (encoding << SQ_ALU_WORD1_ENCODING_SHIFT)
+#define SQ_ALU_WORD1_SET_BANK_SWIZZLE(sq_alu_word1_reg, bank_swizzle) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_BANK_SWIZZLE_MASK) | (bank_swizzle << SQ_ALU_WORD1_BANK_SWIZZLE_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_GPR(sq_alu_word1_reg, dst_gpr) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_ALU_WORD1_DST_GPR_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_REL(sq_alu_word1_reg, dst_rel) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_REL_MASK) | (dst_rel << SQ_ALU_WORD1_DST_REL_SHIFT)
+#define SQ_ALU_WORD1_SET_DST_CHAN(sq_alu_word1_reg, dst_chan) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_DST_CHAN_MASK) | (dst_chan << SQ_ALU_WORD1_DST_CHAN_SHIFT)
+#define SQ_ALU_WORD1_SET_CLAMP(sq_alu_word1_reg, clamp) \
+     sq_alu_word1_reg = (sq_alu_word1_reg & ~SQ_ALU_WORD1_CLAMP_MASK) | (clamp << SQ_ALU_WORD1_CLAMP_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_t {
+          unsigned int                                : 15;
+          unsigned int encoding                       : SQ_ALU_WORD1_ENCODING_SIZE;
+          unsigned int bank_swizzle                   : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE;
+          unsigned int dst_gpr                        : SQ_ALU_WORD1_DST_GPR_SIZE;
+          unsigned int dst_rel                        : SQ_ALU_WORD1_DST_REL_SIZE;
+          unsigned int dst_chan                       : SQ_ALU_WORD1_DST_CHAN_SIZE;
+          unsigned int clamp                          : SQ_ALU_WORD1_CLAMP_SIZE;
+     } sq_alu_word1_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_t {
+          unsigned int clamp                          : SQ_ALU_WORD1_CLAMP_SIZE;
+          unsigned int dst_chan                       : SQ_ALU_WORD1_DST_CHAN_SIZE;
+          unsigned int dst_rel                        : SQ_ALU_WORD1_DST_REL_SIZE;
+          unsigned int dst_gpr                        : SQ_ALU_WORD1_DST_GPR_SIZE;
+          unsigned int bank_swizzle                   : SQ_ALU_WORD1_BANK_SWIZZLE_SIZE;
+          unsigned int encoding                       : SQ_ALU_WORD1_ENCODING_SIZE;
+          unsigned int                                : 15;
+     } sq_alu_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word1_t f;
+} sq_alu_word1_u;
+
+
+/*
+ * SQ_ALU_WORD1_OP2_V2 struct
+ */
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE 1
+#define SQ_ALU_WORD1_OP2_V2_OMOD_SIZE  2
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE 11
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT 0
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT 1
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT 2
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT 3
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT 4
+#define SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT 5
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT 7
+
+#define SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK 0x00000001
+#define SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK 0x00000002
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK 0x00000004
+#define SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK 0x00000008
+#define SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK 0x00000010
+#define SQ_ALU_WORD1_OP2_V2_OMOD_MASK  0x00000060
+#define SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK 0x0003ff80
+
+#define SQ_ALU_WORD1_OP2_V2_MASK \
+     (SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK | \
+      SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK | \
+      SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK | \
+      SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK | \
+      SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK | \
+      SQ_ALU_WORD1_OP2_V2_OMOD_MASK | \
+      SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK)
+
+#define SQ_ALU_WORD1_OP2_V2_DEFAULT    0x0001cdcd
+
+#define SQ_ALU_WORD1_OP2_V2_GET_SRC0_ABS(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_SRC1_ABS(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) >> SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_UPDATE_PRED(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) >> SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_WRITE_MASK(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) >> SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_OMOD(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_OMOD_MASK) >> SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_GET_ALU_INST(sq_alu_word1_op2_v2) \
+     ((sq_alu_word1_op2_v2 & SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) >> SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT)
+
+#define SQ_ALU_WORD1_OP2_V2_SET_SRC0_ABS(sq_alu_word1_op2_v2_reg, src0_abs) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC0_ABS_MASK) | (src0_abs << SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_SRC1_ABS(sq_alu_word1_op2_v2_reg, src1_abs) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_SRC1_ABS_MASK) | (src1_abs << SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_EXECUTE_MASK(sq_alu_word1_op2_v2_reg, update_execute_mask) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_MASK) | (update_execute_mask << SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_UPDATE_PRED(sq_alu_word1_op2_v2_reg, update_pred) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_MASK) | (update_pred << SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_WRITE_MASK(sq_alu_word1_op2_v2_reg, write_mask) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_WRITE_MASK_MASK) | (write_mask << SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_OMOD(sq_alu_word1_op2_v2_reg, omod) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_OMOD_MASK) | (omod << SQ_ALU_WORD1_OP2_V2_OMOD_SHIFT)
+#define SQ_ALU_WORD1_OP2_V2_SET_ALU_INST(sq_alu_word1_op2_v2_reg, alu_inst) \
+     sq_alu_word1_op2_v2_reg = (sq_alu_word1_op2_v2_reg & ~SQ_ALU_WORD1_OP2_V2_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP2_V2_ALU_INST_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_v2_t {
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE;
+          unsigned int                                : 14;
+     } sq_alu_word1_op2_v2_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_v2_t {
+          unsigned int                                : 14;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP2_V2_ALU_INST_SIZE;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+     } sq_alu_word1_op2_v2_t;
+
+#endif
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_r6xx_t {
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int fog_export                     : 1;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int alu_inst                       : 10;
+          unsigned int                                : 14;
+     } sq_alu_word1_op2_v1_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op2_r6xx_t {
+          unsigned int                                : 14;
+          unsigned int alu_inst                       : 10;
+          unsigned int omod                           : SQ_ALU_WORD1_OP2_V2_OMOD_SIZE;
+          unsigned int fog_export                     : 1;
+          unsigned int write_mask                     : SQ_ALU_WORD1_OP2_V2_WRITE_MASK_SIZE;
+          unsigned int update_pred                    : SQ_ALU_WORD1_OP2_V2_UPDATE_PRED_SIZE;
+          unsigned int update_execute_mask            : SQ_ALU_WORD1_OP2_V2_UPDATE_EXECUTE_MASK_SIZE;
+          unsigned int src1_abs                       : SQ_ALU_WORD1_OP2_V2_SRC1_ABS_SIZE;
+          unsigned int src0_abs                       : SQ_ALU_WORD1_OP2_V2_SRC0_ABS_SIZE;
+     } sq_alu_word1_op2_v1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word1_op2_v2_t f;
+     sq_alu_word1_op2_v1_t f6;
+} sq_alu_word1_op2_v2_u;
+
+
+/*
+ * SQ_ALU_WORD1_OP3 struct
+ */
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE 9
+#define SQ_ALU_WORD1_OP3_SRC2_REL_SIZE 1
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE 2
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE 1
+#define SQ_ALU_WORD1_OP3_ALU_INST_SIZE 5
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT 0
+#define SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT 9
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT 10
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT 12
+#define SQ_ALU_WORD1_OP3_ALU_INST_SHIFT 13
+
+#define SQ_ALU_WORD1_OP3_SRC2_SEL_MASK 0x000001ff
+#define SQ_ALU_WORD1_OP3_SRC2_REL_MASK 0x00000200
+#define SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK 0x00000c00
+#define SQ_ALU_WORD1_OP3_SRC2_NEG_MASK 0x00001000
+#define SQ_ALU_WORD1_OP3_ALU_INST_MASK 0x0003e000
+
+#define SQ_ALU_WORD1_OP3_MASK \
+     (SQ_ALU_WORD1_OP3_SRC2_SEL_MASK | \
+      SQ_ALU_WORD1_OP3_SRC2_REL_MASK | \
+      SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK | \
+      SQ_ALU_WORD1_OP3_SRC2_NEG_MASK | \
+      SQ_ALU_WORD1_OP3_ALU_INST_MASK)
+
+#define SQ_ALU_WORD1_OP3_DEFAULT       0x0001cdcd
+
+#define SQ_ALU_WORD1_OP3_GET_SRC2_SEL(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_REL(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_REL_MASK) >> SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_CHAN(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) >> SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_SRC2_NEG(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) >> SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT)
+#define SQ_ALU_WORD1_OP3_GET_ALU_INST(sq_alu_word1_op3) \
+     ((sq_alu_word1_op3 & SQ_ALU_WORD1_OP3_ALU_INST_MASK) >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT)
+
+#define SQ_ALU_WORD1_OP3_SET_SRC2_SEL(sq_alu_word1_op3_reg, src2_sel) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_SEL_MASK) | (src2_sel << SQ_ALU_WORD1_OP3_SRC2_SEL_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_REL(sq_alu_word1_op3_reg, src2_rel) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_REL_MASK) | (src2_rel << SQ_ALU_WORD1_OP3_SRC2_REL_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_CHAN(sq_alu_word1_op3_reg, src2_chan) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_CHAN_MASK) | (src2_chan << SQ_ALU_WORD1_OP3_SRC2_CHAN_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_SRC2_NEG(sq_alu_word1_op3_reg, src2_neg) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_SRC2_NEG_MASK) | (src2_neg << SQ_ALU_WORD1_OP3_SRC2_NEG_SHIFT)
+#define SQ_ALU_WORD1_OP3_SET_ALU_INST(sq_alu_word1_op3_reg, alu_inst) \
+     sq_alu_word1_op3_reg = (sq_alu_word1_op3_reg & ~SQ_ALU_WORD1_OP3_ALU_INST_MASK) | (alu_inst << SQ_ALU_WORD1_OP3_ALU_INST_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op3_t {
+          unsigned int src2_sel                       : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE;
+          unsigned int src2_rel                       : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE;
+          unsigned int src2_chan                      : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE;
+          unsigned int src2_neg                       : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP3_ALU_INST_SIZE;
+          unsigned int                                : 14;
+     } sq_alu_word1_op3_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_alu_word1_op3_t {
+          unsigned int                                : 14;
+          unsigned int alu_inst                       : SQ_ALU_WORD1_OP3_ALU_INST_SIZE;
+          unsigned int src2_neg                       : SQ_ALU_WORD1_OP3_SRC2_NEG_SIZE;
+          unsigned int src2_chan                      : SQ_ALU_WORD1_OP3_SRC2_CHAN_SIZE;
+          unsigned int src2_rel                       : SQ_ALU_WORD1_OP3_SRC2_REL_SIZE;
+          unsigned int src2_sel                       : SQ_ALU_WORD1_OP3_SRC2_SEL_SIZE;
+     } sq_alu_word1_op3_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_alu_word1_op3_t f;
+} sq_alu_word1_op3_u;
+
+
+/*
+ * SQ_TEX_WORD0 struct
+ */
+
+#define SQ_TEX_WORD0_TEX_INST_SIZE     5
+#define SQ_TEX_WORD0_BC_FRAC_MODE_SIZE 1
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE 1
+#define SQ_TEX_WORD0_RESOURCE_ID_SIZE  8
+#define SQ_TEX_WORD0_SRC_GPR_SIZE      7
+#define SQ_TEX_WORD0_SRC_REL_SIZE      1
+#define SQ_TEX_WORD0_ALT_CONST_SIZE    1
+
+#define SQ_TEX_WORD0_TEX_INST_SHIFT    0
+#define SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT 5
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7
+#define SQ_TEX_WORD0_RESOURCE_ID_SHIFT 8
+#define SQ_TEX_WORD0_SRC_GPR_SHIFT     16
+#define SQ_TEX_WORD0_SRC_REL_SHIFT     23
+#define SQ_TEX_WORD0_ALT_CONST_SHIFT   24
+
+#define SQ_TEX_WORD0_TEX_INST_MASK     0x0000001f
+#define SQ_TEX_WORD0_BC_FRAC_MODE_MASK 0x00000020
+#define SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080
+#define SQ_TEX_WORD0_RESOURCE_ID_MASK  0x0000ff00
+#define SQ_TEX_WORD0_SRC_GPR_MASK      0x007f0000
+#define SQ_TEX_WORD0_SRC_REL_MASK      0x00800000
+#define SQ_TEX_WORD0_ALT_CONST_MASK    0x01000000
+
+#define SQ_TEX_WORD0_MASK \
+     (SQ_TEX_WORD0_TEX_INST_MASK | \
+      SQ_TEX_WORD0_BC_FRAC_MODE_MASK | \
+      SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK | \
+      SQ_TEX_WORD0_RESOURCE_ID_MASK | \
+      SQ_TEX_WORD0_SRC_GPR_MASK | \
+      SQ_TEX_WORD0_SRC_REL_MASK | \
+      SQ_TEX_WORD0_ALT_CONST_MASK)
+
+#define SQ_TEX_WORD0_DEFAULT           0x01cdcd8d
+
+#define SQ_TEX_WORD0_GET_TEX_INST(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_TEX_INST_MASK) >> SQ_TEX_WORD0_TEX_INST_SHIFT)
+#define SQ_TEX_WORD0_GET_BC_FRAC_MODE(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_BC_FRAC_MODE_MASK) >> SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT)
+#define SQ_TEX_WORD0_GET_FETCH_WHOLE_QUAD(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_TEX_WORD0_GET_RESOURCE_ID(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_RESOURCE_ID_MASK) >> SQ_TEX_WORD0_RESOURCE_ID_SHIFT)
+#define SQ_TEX_WORD0_GET_SRC_GPR(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_SRC_GPR_MASK) >> SQ_TEX_WORD0_SRC_GPR_SHIFT)
+#define SQ_TEX_WORD0_GET_SRC_REL(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_SRC_REL_MASK) >> SQ_TEX_WORD0_SRC_REL_SHIFT)
+#define SQ_TEX_WORD0_GET_ALT_CONST(sq_tex_word0) \
+     ((sq_tex_word0 & SQ_TEX_WORD0_ALT_CONST_MASK) >> SQ_TEX_WORD0_ALT_CONST_SHIFT)
+
+#define SQ_TEX_WORD0_SET_TEX_INST(sq_tex_word0_reg, tex_inst) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_TEX_INST_MASK) | (tex_inst << SQ_TEX_WORD0_TEX_INST_SHIFT)
+#define SQ_TEX_WORD0_SET_BC_FRAC_MODE(sq_tex_word0_reg, bc_frac_mode) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_BC_FRAC_MODE_MASK) | (bc_frac_mode << SQ_TEX_WORD0_BC_FRAC_MODE_SHIFT)
+#define SQ_TEX_WORD0_SET_FETCH_WHOLE_QUAD(sq_tex_word0_reg, fetch_whole_quad) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_TEX_WORD0_SET_RESOURCE_ID(sq_tex_word0_reg, resource_id) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_RESOURCE_ID_MASK) | (resource_id << SQ_TEX_WORD0_RESOURCE_ID_SHIFT)
+#define SQ_TEX_WORD0_SET_SRC_GPR(sq_tex_word0_reg, src_gpr) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_TEX_WORD0_SRC_GPR_SHIFT)
+#define SQ_TEX_WORD0_SET_SRC_REL(sq_tex_word0_reg, src_rel) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_SRC_REL_MASK) | (src_rel << SQ_TEX_WORD0_SRC_REL_SHIFT)
+#define SQ_TEX_WORD0_SET_ALT_CONST(sq_tex_word0_reg, alt_const) \
+     sq_tex_word0_reg = (sq_tex_word0_reg & ~SQ_TEX_WORD0_ALT_CONST_MASK) | (alt_const << SQ_TEX_WORD0_ALT_CONST_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_tex_word0_t {
+          unsigned int tex_inst                       : SQ_TEX_WORD0_TEX_INST_SIZE;
+          unsigned int bc_frac_mode                   : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE;
+          unsigned int                                : 1;
+          unsigned int fetch_whole_quad               : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int resource_id                    : SQ_TEX_WORD0_RESOURCE_ID_SIZE;
+          unsigned int src_gpr                        : SQ_TEX_WORD0_SRC_GPR_SIZE;
+          unsigned int src_rel                        : SQ_TEX_WORD0_SRC_REL_SIZE;
+          unsigned int alt_const                      : SQ_TEX_WORD0_ALT_CONST_SIZE;
+          unsigned int                                : 7;
+     } sq_tex_word0_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_tex_word0_t {
+          unsigned int                                : 7;
+          unsigned int alt_const                      : SQ_TEX_WORD0_ALT_CONST_SIZE;
+          unsigned int src_rel                        : SQ_TEX_WORD0_SRC_REL_SIZE;
+          unsigned int src_gpr                        : SQ_TEX_WORD0_SRC_GPR_SIZE;
+          unsigned int resource_id                    : SQ_TEX_WORD0_RESOURCE_ID_SIZE;
+          unsigned int fetch_whole_quad               : SQ_TEX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int                                : 1;
+          unsigned int bc_frac_mode                   : SQ_TEX_WORD0_BC_FRAC_MODE_SIZE;
+          unsigned int tex_inst                       : SQ_TEX_WORD0_TEX_INST_SIZE;
+     } sq_tex_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_tex_word0_t f;
+} sq_tex_word0_u;
+
+
+/*
+ * SQ_TEX_WORD1 struct
+ */
+
+#define SQ_TEX_WORD1_DST_GPR_SIZE      7
+#define SQ_TEX_WORD1_DST_REL_SIZE      1
+#define SQ_TEX_WORD1_DST_SEL_X_SIZE    3
+#define SQ_TEX_WORD1_DST_SEL_Y_SIZE    3
+#define SQ_TEX_WORD1_DST_SEL_Z_SIZE    3
+#define SQ_TEX_WORD1_DST_SEL_W_SIZE    3
+#define SQ_TEX_WORD1_LOD_BIAS_SIZE     7
+#define SQ_TEX_WORD1_COORD_TYPE_X_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_Y_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_Z_SIZE 1
+#define SQ_TEX_WORD1_COORD_TYPE_W_SIZE 1
+
+#define SQ_TEX_WORD1_DST_GPR_SHIFT     0
+#define SQ_TEX_WORD1_DST_REL_SHIFT     7
+#define SQ_TEX_WORD1_DST_SEL_X_SHIFT   9
+#define SQ_TEX_WORD1_DST_SEL_Y_SHIFT   12
+#define SQ_TEX_WORD1_DST_SEL_Z_SHIFT   15
+#define SQ_TEX_WORD1_DST_SEL_W_SHIFT   18
+#define SQ_TEX_WORD1_LOD_BIAS_SHIFT    21
+#define SQ_TEX_WORD1_COORD_TYPE_X_SHIFT 28
+#define SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT 29
+#define SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT 30
+#define SQ_TEX_WORD1_COORD_TYPE_W_SHIFT 31
+
+#define SQ_TEX_WORD1_DST_GPR_MASK      0x0000007f
+#define SQ_TEX_WORD1_DST_REL_MASK      0x00000080
+#define SQ_TEX_WORD1_DST_SEL_X_MASK    0x00000e00
+#define SQ_TEX_WORD1_DST_SEL_Y_MASK    0x00007000
+#define SQ_TEX_WORD1_DST_SEL_Z_MASK    0x00038000
+#define SQ_TEX_WORD1_DST_SEL_W_MASK    0x001c0000
+#define SQ_TEX_WORD1_LOD_BIAS_MASK     0x0fe00000
+#define SQ_TEX_WORD1_COORD_TYPE_X_MASK 0x10000000
+#define SQ_TEX_WORD1_COORD_TYPE_Y_MASK 0x20000000
+#define SQ_TEX_WORD1_COORD_TYPE_Z_MASK 0x40000000
+#define SQ_TEX_WORD1_COORD_TYPE_W_MASK 0x80000000
+
+#define SQ_TEX_WORD1_MASK \
+     (SQ_TEX_WORD1_DST_GPR_MASK | \
+      SQ_TEX_WORD1_DST_REL_MASK | \
+      SQ_TEX_WORD1_DST_SEL_X_MASK | \
+      SQ_TEX_WORD1_DST_SEL_Y_MASK | \
+      SQ_TEX_WORD1_DST_SEL_Z_MASK | \
+      SQ_TEX_WORD1_DST_SEL_W_MASK | \
+      SQ_TEX_WORD1_LOD_BIAS_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_X_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_Y_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_Z_MASK | \
+      SQ_TEX_WORD1_COORD_TYPE_W_MASK)
+
+#define SQ_TEX_WORD1_DEFAULT           0xcdcdcccd
+
+#define SQ_TEX_WORD1_GET_DST_GPR(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_GPR_MASK) >> SQ_TEX_WORD1_DST_GPR_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_REL(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_REL_MASK) >> SQ_TEX_WORD1_DST_REL_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_X(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_X_MASK) >> SQ_TEX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_Y(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Y_MASK) >> SQ_TEX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_Z(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_Z_MASK) >> SQ_TEX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_TEX_WORD1_GET_DST_SEL_W(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_DST_SEL_W_MASK) >> SQ_TEX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_TEX_WORD1_GET_LOD_BIAS(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_LOD_BIAS_MASK) >> SQ_TEX_WORD1_LOD_BIAS_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_X(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_X_MASK) >> SQ_TEX_WORD1_COORD_TYPE_X_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_Y(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Y_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_Z(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_Z_MASK) >> SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT)
+#define SQ_TEX_WORD1_GET_COORD_TYPE_W(sq_tex_word1) \
+     ((sq_tex_word1 & SQ_TEX_WORD1_COORD_TYPE_W_MASK) >> SQ_TEX_WORD1_COORD_TYPE_W_SHIFT)
+
+#define SQ_TEX_WORD1_SET_DST_GPR(sq_tex_word1_reg, dst_gpr) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_GPR_MASK) | (dst_gpr << SQ_TEX_WORD1_DST_GPR_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_REL(sq_tex_word1_reg, dst_rel) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_REL_MASK) | (dst_rel << SQ_TEX_WORD1_DST_REL_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_X(sq_tex_word1_reg, dst_sel_x) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_TEX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_Y(sq_tex_word1_reg, dst_sel_y) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_TEX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_Z(sq_tex_word1_reg, dst_sel_z) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_TEX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_TEX_WORD1_SET_DST_SEL_W(sq_tex_word1_reg, dst_sel_w) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_TEX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_TEX_WORD1_SET_LOD_BIAS(sq_tex_word1_reg, lod_bias) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_LOD_BIAS_MASK) | (lod_bias << SQ_TEX_WORD1_LOD_BIAS_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_X(sq_tex_word1_reg, coord_type_x) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_X_MASK) | (coord_type_x << SQ_TEX_WORD1_COORD_TYPE_X_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_Y(sq_tex_word1_reg, coord_type_y) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Y_MASK) | (coord_type_y << SQ_TEX_WORD1_COORD_TYPE_Y_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_Z(sq_tex_word1_reg, coord_type_z) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_Z_MASK) | (coord_type_z << SQ_TEX_WORD1_COORD_TYPE_Z_SHIFT)
+#define SQ_TEX_WORD1_SET_COORD_TYPE_W(sq_tex_word1_reg, coord_type_w) \
+     sq_tex_word1_reg = (sq_tex_word1_reg & ~SQ_TEX_WORD1_COORD_TYPE_W_MASK) | (coord_type_w << SQ_TEX_WORD1_COORD_TYPE_W_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_tex_word1_t {
+          unsigned int dst_gpr                        : SQ_TEX_WORD1_DST_GPR_SIZE;
+          unsigned int dst_rel                        : SQ_TEX_WORD1_DST_REL_SIZE;
+          unsigned int                                : 1;
+          unsigned int dst_sel_x                      : SQ_TEX_WORD1_DST_SEL_X_SIZE;
+          unsigned int dst_sel_y                      : SQ_TEX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_z                      : SQ_TEX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_w                      : SQ_TEX_WORD1_DST_SEL_W_SIZE;
+          unsigned int lod_bias                       : SQ_TEX_WORD1_LOD_BIAS_SIZE;
+          unsigned int coord_type_x                   : SQ_TEX_WORD1_COORD_TYPE_X_SIZE;
+          unsigned int coord_type_y                   : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE;
+          unsigned int coord_type_z                   : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE;
+          unsigned int coord_type_w                   : SQ_TEX_WORD1_COORD_TYPE_W_SIZE;
+     } sq_tex_word1_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_tex_word1_t {
+          unsigned int coord_type_w                   : SQ_TEX_WORD1_COORD_TYPE_W_SIZE;
+          unsigned int coord_type_z                   : SQ_TEX_WORD1_COORD_TYPE_Z_SIZE;
+          unsigned int coord_type_y                   : SQ_TEX_WORD1_COORD_TYPE_Y_SIZE;
+          unsigned int coord_type_x                   : SQ_TEX_WORD1_COORD_TYPE_X_SIZE;
+          unsigned int lod_bias                       : SQ_TEX_WORD1_LOD_BIAS_SIZE;
+          unsigned int dst_sel_w                      : SQ_TEX_WORD1_DST_SEL_W_SIZE;
+          unsigned int dst_sel_z                      : SQ_TEX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_y                      : SQ_TEX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_x                      : SQ_TEX_WORD1_DST_SEL_X_SIZE;
+          unsigned int                                : 1;
+          unsigned int dst_rel                        : SQ_TEX_WORD1_DST_REL_SIZE;
+          unsigned int dst_gpr                        : SQ_TEX_WORD1_DST_GPR_SIZE;
+     } sq_tex_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_tex_word1_t f;
+} sq_tex_word1_u;
+
+
+/*
+ * SQ_TEX_WORD2 struct
+ */
+
+#define SQ_TEX_WORD2_OFFSET_X_SIZE     5
+#define SQ_TEX_WORD2_OFFSET_Y_SIZE     5
+#define SQ_TEX_WORD2_OFFSET_Z_SIZE     5
+#define SQ_TEX_WORD2_SAMPLER_ID_SIZE   5
+#define SQ_TEX_WORD2_SRC_SEL_X_SIZE    3
+#define SQ_TEX_WORD2_SRC_SEL_Y_SIZE    3
+#define SQ_TEX_WORD2_SRC_SEL_Z_SIZE    3
+#define SQ_TEX_WORD2_SRC_SEL_W_SIZE    3
+
+#define SQ_TEX_WORD2_OFFSET_X_SHIFT    0
+#define SQ_TEX_WORD2_OFFSET_Y_SHIFT    5
+#define SQ_TEX_WORD2_OFFSET_Z_SHIFT    10
+#define SQ_TEX_WORD2_SAMPLER_ID_SHIFT  15
+#define SQ_TEX_WORD2_SRC_SEL_X_SHIFT   20
+#define SQ_TEX_WORD2_SRC_SEL_Y_SHIFT   23
+#define SQ_TEX_WORD2_SRC_SEL_Z_SHIFT   26
+#define SQ_TEX_WORD2_SRC_SEL_W_SHIFT   29
+
+#define SQ_TEX_WORD2_OFFSET_X_MASK     0x0000001f
+#define SQ_TEX_WORD2_OFFSET_Y_MASK     0x000003e0
+#define SQ_TEX_WORD2_OFFSET_Z_MASK     0x00007c00
+#define SQ_TEX_WORD2_SAMPLER_ID_MASK   0x000f8000
+#define SQ_TEX_WORD2_SRC_SEL_X_MASK    0x00700000
+#define SQ_TEX_WORD2_SRC_SEL_Y_MASK    0x03800000
+#define SQ_TEX_WORD2_SRC_SEL_Z_MASK    0x1c000000
+#define SQ_TEX_WORD2_SRC_SEL_W_MASK    0xe0000000
+
+#define SQ_TEX_WORD2_MASK \
+     (SQ_TEX_WORD2_OFFSET_X_MASK | \
+      SQ_TEX_WORD2_OFFSET_Y_MASK | \
+      SQ_TEX_WORD2_OFFSET_Z_MASK | \
+      SQ_TEX_WORD2_SAMPLER_ID_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_X_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_Y_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_Z_MASK | \
+      SQ_TEX_WORD2_SRC_SEL_W_MASK)
+
+#define SQ_TEX_WORD2_DEFAULT           0xcdcdcdcd
+
+#define SQ_TEX_WORD2_GET_OFFSET_X(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_X_MASK) >> SQ_TEX_WORD2_OFFSET_X_SHIFT)
+#define SQ_TEX_WORD2_GET_OFFSET_Y(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Y_MASK) >> SQ_TEX_WORD2_OFFSET_Y_SHIFT)
+#define SQ_TEX_WORD2_GET_OFFSET_Z(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_OFFSET_Z_MASK) >> SQ_TEX_WORD2_OFFSET_Z_SHIFT)
+#define SQ_TEX_WORD2_GET_SAMPLER_ID(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SAMPLER_ID_MASK) >> SQ_TEX_WORD2_SAMPLER_ID_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_X(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_X_MASK) >> SQ_TEX_WORD2_SRC_SEL_X_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_Y(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Y_MASK) >> SQ_TEX_WORD2_SRC_SEL_Y_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_Z(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_Z_MASK) >> SQ_TEX_WORD2_SRC_SEL_Z_SHIFT)
+#define SQ_TEX_WORD2_GET_SRC_SEL_W(sq_tex_word2) \
+     ((sq_tex_word2 & SQ_TEX_WORD2_SRC_SEL_W_MASK) >> SQ_TEX_WORD2_SRC_SEL_W_SHIFT)
+
+#define SQ_TEX_WORD2_SET_OFFSET_X(sq_tex_word2_reg, offset_x) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_X_MASK) | (offset_x << SQ_TEX_WORD2_OFFSET_X_SHIFT)
+#define SQ_TEX_WORD2_SET_OFFSET_Y(sq_tex_word2_reg, offset_y) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Y_MASK) | (offset_y << SQ_TEX_WORD2_OFFSET_Y_SHIFT)
+#define SQ_TEX_WORD2_SET_OFFSET_Z(sq_tex_word2_reg, offset_z) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_OFFSET_Z_MASK) | (offset_z << SQ_TEX_WORD2_OFFSET_Z_SHIFT)
+#define SQ_TEX_WORD2_SET_SAMPLER_ID(sq_tex_word2_reg, sampler_id) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SAMPLER_ID_MASK) | (sampler_id << SQ_TEX_WORD2_SAMPLER_ID_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_X(sq_tex_word2_reg, src_sel_x) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_X_MASK) | (src_sel_x << SQ_TEX_WORD2_SRC_SEL_X_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_Y(sq_tex_word2_reg, src_sel_y) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Y_MASK) | (src_sel_y << SQ_TEX_WORD2_SRC_SEL_Y_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_Z(sq_tex_word2_reg, src_sel_z) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_Z_MASK) | (src_sel_z << SQ_TEX_WORD2_SRC_SEL_Z_SHIFT)
+#define SQ_TEX_WORD2_SET_SRC_SEL_W(sq_tex_word2_reg, src_sel_w) \
+     sq_tex_word2_reg = (sq_tex_word2_reg & ~SQ_TEX_WORD2_SRC_SEL_W_MASK) | (src_sel_w << SQ_TEX_WORD2_SRC_SEL_W_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_tex_word2_t {
+          unsigned int offset_x                       : SQ_TEX_WORD2_OFFSET_X_SIZE;
+          unsigned int offset_y                       : SQ_TEX_WORD2_OFFSET_Y_SIZE;
+          unsigned int offset_z                       : SQ_TEX_WORD2_OFFSET_Z_SIZE;
+          unsigned int sampler_id                     : SQ_TEX_WORD2_SAMPLER_ID_SIZE;
+          unsigned int src_sel_x                      : SQ_TEX_WORD2_SRC_SEL_X_SIZE;
+          unsigned int src_sel_y                      : SQ_TEX_WORD2_SRC_SEL_Y_SIZE;
+          unsigned int src_sel_z                      : SQ_TEX_WORD2_SRC_SEL_Z_SIZE;
+          unsigned int src_sel_w                      : SQ_TEX_WORD2_SRC_SEL_W_SIZE;
+     } sq_tex_word2_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_tex_word2_t {
+          unsigned int src_sel_w                      : SQ_TEX_WORD2_SRC_SEL_W_SIZE;
+          unsigned int src_sel_z                      : SQ_TEX_WORD2_SRC_SEL_Z_SIZE;
+          unsigned int src_sel_y                      : SQ_TEX_WORD2_SRC_SEL_Y_SIZE;
+          unsigned int src_sel_x                      : SQ_TEX_WORD2_SRC_SEL_X_SIZE;
+          unsigned int sampler_id                     : SQ_TEX_WORD2_SAMPLER_ID_SIZE;
+          unsigned int offset_z                       : SQ_TEX_WORD2_OFFSET_Z_SIZE;
+          unsigned int offset_y                       : SQ_TEX_WORD2_OFFSET_Y_SIZE;
+          unsigned int offset_x                       : SQ_TEX_WORD2_OFFSET_X_SIZE;
+     } sq_tex_word2_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_tex_word2_t f;
+} sq_tex_word2_u;
+
+
+/*
+ * SQ_VTX_WORD0 struct
+ */
+
+#define SQ_VTX_WORD0_VTX_INST_SIZE     5
+#define SQ_VTX_WORD0_FETCH_TYPE_SIZE   2
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE 1
+#define SQ_VTX_WORD0_BUFFER_ID_SIZE    8
+#define SQ_VTX_WORD0_SRC_GPR_SIZE      7
+#define SQ_VTX_WORD0_SRC_REL_SIZE      1
+#define SQ_VTX_WORD0_SRC_SEL_X_SIZE    2
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE 6
+
+#define SQ_VTX_WORD0_VTX_INST_SHIFT    0
+#define SQ_VTX_WORD0_FETCH_TYPE_SHIFT  5
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT 7
+#define SQ_VTX_WORD0_BUFFER_ID_SHIFT   8
+#define SQ_VTX_WORD0_SRC_GPR_SHIFT     16
+#define SQ_VTX_WORD0_SRC_REL_SHIFT     23
+#define SQ_VTX_WORD0_SRC_SEL_X_SHIFT   24
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT 26
+
+#define SQ_VTX_WORD0_VTX_INST_MASK     0x0000001f
+#define SQ_VTX_WORD0_FETCH_TYPE_MASK   0x00000060
+#define SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK 0x00000080
+#define SQ_VTX_WORD0_BUFFER_ID_MASK    0x0000ff00
+#define SQ_VTX_WORD0_SRC_GPR_MASK      0x007f0000
+#define SQ_VTX_WORD0_SRC_REL_MASK      0x00800000
+#define SQ_VTX_WORD0_SRC_SEL_X_MASK    0x03000000
+#define SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK 0xfc000000
+
+#define SQ_VTX_WORD0_MASK \
+     (SQ_VTX_WORD0_VTX_INST_MASK | \
+      SQ_VTX_WORD0_FETCH_TYPE_MASK | \
+      SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK | \
+      SQ_VTX_WORD0_BUFFER_ID_MASK | \
+      SQ_VTX_WORD0_SRC_GPR_MASK | \
+      SQ_VTX_WORD0_SRC_REL_MASK | \
+      SQ_VTX_WORD0_SRC_SEL_X_MASK | \
+      SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK)
+
+#define SQ_VTX_WORD0_DEFAULT           0xcdcdcdcd
+
+#define SQ_VTX_WORD0_GET_VTX_INST(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_VTX_INST_MASK) >> SQ_VTX_WORD0_VTX_INST_SHIFT)
+#define SQ_VTX_WORD0_GET_FETCH_TYPE(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_TYPE_MASK) >> SQ_VTX_WORD0_FETCH_TYPE_SHIFT)
+#define SQ_VTX_WORD0_GET_FETCH_WHOLE_QUAD(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) >> SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_VTX_WORD0_GET_BUFFER_ID(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_BUFFER_ID_MASK) >> SQ_VTX_WORD0_BUFFER_ID_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_GPR(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_GPR_MASK) >> SQ_VTX_WORD0_SRC_GPR_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_REL(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_REL_MASK) >> SQ_VTX_WORD0_SRC_REL_SHIFT)
+#define SQ_VTX_WORD0_GET_SRC_SEL_X(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_SRC_SEL_X_MASK) >> SQ_VTX_WORD0_SRC_SEL_X_SHIFT)
+#define SQ_VTX_WORD0_GET_MEGA_FETCH_COUNT(sq_vtx_word0) \
+     ((sq_vtx_word0 & SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) >> SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT)
+
+#define SQ_VTX_WORD0_SET_VTX_INST(sq_vtx_word0_reg, vtx_inst) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_VTX_INST_MASK) | (vtx_inst << SQ_VTX_WORD0_VTX_INST_SHIFT)
+#define SQ_VTX_WORD0_SET_FETCH_TYPE(sq_vtx_word0_reg, fetch_type) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_TYPE_MASK) | (fetch_type << SQ_VTX_WORD0_FETCH_TYPE_SHIFT)
+#define SQ_VTX_WORD0_SET_FETCH_WHOLE_QUAD(sq_vtx_word0_reg, fetch_whole_quad) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_FETCH_WHOLE_QUAD_MASK) | (fetch_whole_quad << SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SHIFT)
+#define SQ_VTX_WORD0_SET_BUFFER_ID(sq_vtx_word0_reg, buffer_id) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_BUFFER_ID_MASK) | (buffer_id << SQ_VTX_WORD0_BUFFER_ID_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_GPR(sq_vtx_word0_reg, src_gpr) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_GPR_MASK) | (src_gpr << SQ_VTX_WORD0_SRC_GPR_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_REL(sq_vtx_word0_reg, src_rel) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_REL_MASK) | (src_rel << SQ_VTX_WORD0_SRC_REL_SHIFT)
+#define SQ_VTX_WORD0_SET_SRC_SEL_X(sq_vtx_word0_reg, src_sel_x) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_SRC_SEL_X_MASK) | (src_sel_x << SQ_VTX_WORD0_SRC_SEL_X_SHIFT)
+#define SQ_VTX_WORD0_SET_MEGA_FETCH_COUNT(sq_vtx_word0_reg, mega_fetch_count) \
+     sq_vtx_word0_reg = (sq_vtx_word0_reg & ~SQ_VTX_WORD0_MEGA_FETCH_COUNT_MASK) | (mega_fetch_count << SQ_VTX_WORD0_MEGA_FETCH_COUNT_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word0_t {
+          unsigned int vtx_inst                       : SQ_VTX_WORD0_VTX_INST_SIZE;
+          unsigned int fetch_type                     : SQ_VTX_WORD0_FETCH_TYPE_SIZE;
+          unsigned int fetch_whole_quad               : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int buffer_id                      : SQ_VTX_WORD0_BUFFER_ID_SIZE;
+          unsigned int src_gpr                        : SQ_VTX_WORD0_SRC_GPR_SIZE;
+          unsigned int src_rel                        : SQ_VTX_WORD0_SRC_REL_SIZE;
+          unsigned int src_sel_x                      : SQ_VTX_WORD0_SRC_SEL_X_SIZE;
+          unsigned int mega_fetch_count               : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE;
+     } sq_vtx_word0_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word0_t {
+          unsigned int mega_fetch_count               : SQ_VTX_WORD0_MEGA_FETCH_COUNT_SIZE;
+          unsigned int src_sel_x                      : SQ_VTX_WORD0_SRC_SEL_X_SIZE;
+          unsigned int src_rel                        : SQ_VTX_WORD0_SRC_REL_SIZE;
+          unsigned int src_gpr                        : SQ_VTX_WORD0_SRC_GPR_SIZE;
+          unsigned int buffer_id                      : SQ_VTX_WORD0_BUFFER_ID_SIZE;
+          unsigned int fetch_whole_quad               : SQ_VTX_WORD0_FETCH_WHOLE_QUAD_SIZE;
+          unsigned int fetch_type                     : SQ_VTX_WORD0_FETCH_TYPE_SIZE;
+          unsigned int vtx_inst                       : SQ_VTX_WORD0_VTX_INST_SIZE;
+     } sq_vtx_word0_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word0_t f;
+} sq_vtx_word0_u;
+
+
+/*
+ * SQ_VTX_WORD1 struct
+ */
+
+#define SQ_VTX_WORD1_DST_SEL_X_SIZE    3
+#define SQ_VTX_WORD1_DST_SEL_Y_SIZE    3
+#define SQ_VTX_WORD1_DST_SEL_Z_SIZE    3
+#define SQ_VTX_WORD1_DST_SEL_W_SIZE    3
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE 1
+#define SQ_VTX_WORD1_DATA_FORMAT_SIZE  6
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE 2
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE 1
+#define SQ_VTX_WORD1_SRF_MODE_ALL_SIZE 1
+
+#define SQ_VTX_WORD1_DST_SEL_X_SHIFT   9
+#define SQ_VTX_WORD1_DST_SEL_Y_SHIFT   12
+#define SQ_VTX_WORD1_DST_SEL_Z_SHIFT   15
+#define SQ_VTX_WORD1_DST_SEL_W_SHIFT   18
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT 21
+#define SQ_VTX_WORD1_DATA_FORMAT_SHIFT 22
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT 28
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT 30
+#define SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT 31
+
+#define SQ_VTX_WORD1_DST_SEL_X_MASK    0x00000e00
+#define SQ_VTX_WORD1_DST_SEL_Y_MASK    0x00007000
+#define SQ_VTX_WORD1_DST_SEL_Z_MASK    0x00038000
+#define SQ_VTX_WORD1_DST_SEL_W_MASK    0x001c0000
+#define SQ_VTX_WORD1_USE_CONST_FIELDS_MASK 0x00200000
+#define SQ_VTX_WORD1_DATA_FORMAT_MASK  0x0fc00000
+#define SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK 0x30000000
+#define SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK 0x40000000
+#define SQ_VTX_WORD1_SRF_MODE_ALL_MASK 0x80000000
+
+#define SQ_VTX_WORD1_MASK \
+     (SQ_VTX_WORD1_DST_SEL_X_MASK | \
+      SQ_VTX_WORD1_DST_SEL_Y_MASK | \
+      SQ_VTX_WORD1_DST_SEL_Z_MASK | \
+      SQ_VTX_WORD1_DST_SEL_W_MASK | \
+      SQ_VTX_WORD1_USE_CONST_FIELDS_MASK | \
+      SQ_VTX_WORD1_DATA_FORMAT_MASK | \
+      SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK | \
+      SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK | \
+      SQ_VTX_WORD1_SRF_MODE_ALL_MASK)
+
+#define SQ_VTX_WORD1_DEFAULT           0xcdcdcc00
+
+#define SQ_VTX_WORD1_GET_DST_SEL_X(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_X_MASK) >> SQ_VTX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_Y(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Y_MASK) >> SQ_VTX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_Z(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_Z_MASK) >> SQ_VTX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_VTX_WORD1_GET_DST_SEL_W(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DST_SEL_W_MASK) >> SQ_VTX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_VTX_WORD1_GET_USE_CONST_FIELDS(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) >> SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT)
+#define SQ_VTX_WORD1_GET_DATA_FORMAT(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_DATA_FORMAT_MASK) >> SQ_VTX_WORD1_DATA_FORMAT_SHIFT)
+#define SQ_VTX_WORD1_GET_NUM_FORMAT_ALL(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) >> SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT)
+#define SQ_VTX_WORD1_GET_FORMAT_COMP_ALL(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) >> SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT)
+#define SQ_VTX_WORD1_GET_SRF_MODE_ALL(sq_vtx_word1) \
+     ((sq_vtx_word1 & SQ_VTX_WORD1_SRF_MODE_ALL_MASK) >> SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT)
+
+#define SQ_VTX_WORD1_SET_DST_SEL_X(sq_vtx_word1_reg, dst_sel_x) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_X_MASK) | (dst_sel_x << SQ_VTX_WORD1_DST_SEL_X_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_Y(sq_vtx_word1_reg, dst_sel_y) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Y_MASK) | (dst_sel_y << SQ_VTX_WORD1_DST_SEL_Y_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_Z(sq_vtx_word1_reg, dst_sel_z) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_Z_MASK) | (dst_sel_z << SQ_VTX_WORD1_DST_SEL_Z_SHIFT)
+#define SQ_VTX_WORD1_SET_DST_SEL_W(sq_vtx_word1_reg, dst_sel_w) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DST_SEL_W_MASK) | (dst_sel_w << SQ_VTX_WORD1_DST_SEL_W_SHIFT)
+#define SQ_VTX_WORD1_SET_USE_CONST_FIELDS(sq_vtx_word1_reg, use_const_fields) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_USE_CONST_FIELDS_MASK) | (use_const_fields << SQ_VTX_WORD1_USE_CONST_FIELDS_SHIFT)
+#define SQ_VTX_WORD1_SET_DATA_FORMAT(sq_vtx_word1_reg, data_format) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_DATA_FORMAT_MASK) | (data_format << SQ_VTX_WORD1_DATA_FORMAT_SHIFT)
+#define SQ_VTX_WORD1_SET_NUM_FORMAT_ALL(sq_vtx_word1_reg, num_format_all) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_NUM_FORMAT_ALL_MASK) | (num_format_all << SQ_VTX_WORD1_NUM_FORMAT_ALL_SHIFT)
+#define SQ_VTX_WORD1_SET_FORMAT_COMP_ALL(sq_vtx_word1_reg, format_comp_all) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_FORMAT_COMP_ALL_MASK) | (format_comp_all << SQ_VTX_WORD1_FORMAT_COMP_ALL_SHIFT)
+#define SQ_VTX_WORD1_SET_SRF_MODE_ALL(sq_vtx_word1_reg, srf_mode_all) \
+     sq_vtx_word1_reg = (sq_vtx_word1_reg & ~SQ_VTX_WORD1_SRF_MODE_ALL_MASK) | (srf_mode_all << SQ_VTX_WORD1_SRF_MODE_ALL_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_t {
+          unsigned int                                : 9;
+          unsigned int dst_sel_x                      : SQ_VTX_WORD1_DST_SEL_X_SIZE;
+          unsigned int dst_sel_y                      : SQ_VTX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_z                      : SQ_VTX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_w                      : SQ_VTX_WORD1_DST_SEL_W_SIZE;
+          unsigned int use_const_fields               : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE;
+          unsigned int data_format                    : SQ_VTX_WORD1_DATA_FORMAT_SIZE;
+          unsigned int num_format_all                 : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE;
+          unsigned int format_comp_all                : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE;
+          unsigned int srf_mode_all                   : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE;
+     } sq_vtx_word1_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_t {
+          unsigned int srf_mode_all                   : SQ_VTX_WORD1_SRF_MODE_ALL_SIZE;
+          unsigned int format_comp_all                : SQ_VTX_WORD1_FORMAT_COMP_ALL_SIZE;
+          unsigned int num_format_all                 : SQ_VTX_WORD1_NUM_FORMAT_ALL_SIZE;
+          unsigned int data_format                    : SQ_VTX_WORD1_DATA_FORMAT_SIZE;
+          unsigned int use_const_fields               : SQ_VTX_WORD1_USE_CONST_FIELDS_SIZE;
+          unsigned int dst_sel_w                      : SQ_VTX_WORD1_DST_SEL_W_SIZE;
+          unsigned int dst_sel_z                      : SQ_VTX_WORD1_DST_SEL_Z_SIZE;
+          unsigned int dst_sel_y                      : SQ_VTX_WORD1_DST_SEL_Y_SIZE;
+          unsigned int dst_sel_x                      : SQ_VTX_WORD1_DST_SEL_X_SIZE;
+          unsigned int                                : 9;
+     } sq_vtx_word1_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word1_t f;
+} sq_vtx_word1_u;
+
+
+/*
+ * SQ_VTX_WORD1_GPR struct
+ */
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_SIZE  7
+#define SQ_VTX_WORD1_GPR_DST_REL_SIZE  1
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_SHIFT 0
+#define SQ_VTX_WORD1_GPR_DST_REL_SHIFT 7
+
+#define SQ_VTX_WORD1_GPR_DST_GPR_MASK  0x0000007f
+#define SQ_VTX_WORD1_GPR_DST_REL_MASK  0x00000080
+
+#define SQ_VTX_WORD1_GPR_MASK \
+     (SQ_VTX_WORD1_GPR_DST_GPR_MASK | \
+      SQ_VTX_WORD1_GPR_DST_REL_MASK)
+
+#define SQ_VTX_WORD1_GPR_DEFAULT       0x000000cd
+
+#define SQ_VTX_WORD1_GPR_GET_DST_GPR(sq_vtx_word1_gpr) \
+     ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_GPR_MASK) >> SQ_VTX_WORD1_GPR_DST_GPR_SHIFT)
+#define SQ_VTX_WORD1_GPR_GET_DST_REL(sq_vtx_word1_gpr) \
+     ((sq_vtx_word1_gpr & SQ_VTX_WORD1_GPR_DST_REL_MASK) >> SQ_VTX_WORD1_GPR_DST_REL_SHIFT)
+
+#define SQ_VTX_WORD1_GPR_SET_DST_GPR(sq_vtx_word1_gpr_reg, dst_gpr) \
+     sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_GPR_MASK) | (dst_gpr << SQ_VTX_WORD1_GPR_DST_GPR_SHIFT)
+#define SQ_VTX_WORD1_GPR_SET_DST_REL(sq_vtx_word1_gpr_reg, dst_rel) \
+     sq_vtx_word1_gpr_reg = (sq_vtx_word1_gpr_reg & ~SQ_VTX_WORD1_GPR_DST_REL_MASK) | (dst_rel << SQ_VTX_WORD1_GPR_DST_REL_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_gpr_t {
+          unsigned int dst_gpr                        : SQ_VTX_WORD1_GPR_DST_GPR_SIZE;
+          unsigned int dst_rel                        : SQ_VTX_WORD1_GPR_DST_REL_SIZE;
+          unsigned int                                : 24;
+     } sq_vtx_word1_gpr_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_gpr_t {
+          unsigned int                                : 24;
+          unsigned int dst_rel                        : SQ_VTX_WORD1_GPR_DST_REL_SIZE;
+          unsigned int dst_gpr                        : SQ_VTX_WORD1_GPR_DST_GPR_SIZE;
+     } sq_vtx_word1_gpr_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word1_gpr_t f;
+} sq_vtx_word1_gpr_u;
+
+
+/*
+ * SQ_VTX_WORD1_SEM struct
+ */
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE 8
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT 0
+
+#define SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK 0x000000ff
+
+#define SQ_VTX_WORD1_SEM_MASK \
+     (SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK)
+
+#define SQ_VTX_WORD1_SEM_DEFAULT       0x000000cd
+
+#define SQ_VTX_WORD1_SEM_GET_SEMANTIC_ID(sq_vtx_word1_sem) \
+     ((sq_vtx_word1_sem & SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) >> SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT)
+
+#define SQ_VTX_WORD1_SEM_SET_SEMANTIC_ID(sq_vtx_word1_sem_reg, semantic_id) \
+     sq_vtx_word1_sem_reg = (sq_vtx_word1_sem_reg & ~SQ_VTX_WORD1_SEM_SEMANTIC_ID_MASK) | (semantic_id << SQ_VTX_WORD1_SEM_SEMANTIC_ID_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_sem_t {
+          unsigned int semantic_id                    : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE;
+          unsigned int                                : 24;
+     } sq_vtx_word1_sem_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word1_sem_t {
+          unsigned int                                : 24;
+          unsigned int semantic_id                    : SQ_VTX_WORD1_SEM_SEMANTIC_ID_SIZE;
+     } sq_vtx_word1_sem_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word1_sem_t f;
+} sq_vtx_word1_sem_u;
+
+
+/*
+ * SQ_VTX_WORD2 struct
+ */
+
+#define SQ_VTX_WORD2_OFFSET_SIZE       16
+#define SQ_VTX_WORD2_ENDIAN_SWAP_SIZE  2
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE 1
+#define SQ_VTX_WORD2_MEGA_FETCH_SIZE   1
+#define SQ_VTX_WORD2_ALT_CONST_SIZE    1
+
+#define SQ_VTX_WORD2_OFFSET_SHIFT      0
+#define SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT 16
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT 18
+#define SQ_VTX_WORD2_MEGA_FETCH_SHIFT  19
+#define SQ_VTX_WORD2_ALT_CONST_SHIFT   20
+
+#define SQ_VTX_WORD2_OFFSET_MASK       0x0000ffff
+#define SQ_VTX_WORD2_ENDIAN_SWAP_MASK  0x00030000
+#define SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK 0x00040000
+#define SQ_VTX_WORD2_MEGA_FETCH_MASK   0x00080000
+#define SQ_VTX_WORD2_ALT_CONST_MASK    0x00100000
+
+#define SQ_VTX_WORD2_MASK \
+     (SQ_VTX_WORD2_OFFSET_MASK | \
+      SQ_VTX_WORD2_ENDIAN_SWAP_MASK | \
+      SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK | \
+      SQ_VTX_WORD2_MEGA_FETCH_MASK | \
+      SQ_VTX_WORD2_ALT_CONST_MASK)
+
+#define SQ_VTX_WORD2_DEFAULT           0x000dcdcd
+
+#define SQ_VTX_WORD2_GET_OFFSET(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_OFFSET_MASK) >> SQ_VTX_WORD2_OFFSET_SHIFT)
+#define SQ_VTX_WORD2_GET_ENDIAN_SWAP(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_ENDIAN_SWAP_MASK) >> SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT)
+#define SQ_VTX_WORD2_GET_CONST_BUF_NO_STRIDE(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) >> SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT)
+#define SQ_VTX_WORD2_GET_MEGA_FETCH(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_MEGA_FETCH_MASK) >> SQ_VTX_WORD2_MEGA_FETCH_SHIFT)
+#define SQ_VTX_WORD2_GET_ALT_CONST(sq_vtx_word2) \
+     ((sq_vtx_word2 & SQ_VTX_WORD2_ALT_CONST_MASK) >> SQ_VTX_WORD2_ALT_CONST_SHIFT)
+
+#define SQ_VTX_WORD2_SET_OFFSET(sq_vtx_word2_reg, offset) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_OFFSET_MASK) | (offset << SQ_VTX_WORD2_OFFSET_SHIFT)
+#define SQ_VTX_WORD2_SET_ENDIAN_SWAP(sq_vtx_word2_reg, endian_swap) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ENDIAN_SWAP_MASK) | (endian_swap << SQ_VTX_WORD2_ENDIAN_SWAP_SHIFT)
+#define SQ_VTX_WORD2_SET_CONST_BUF_NO_STRIDE(sq_vtx_word2_reg, const_buf_no_stride) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_MASK) | (const_buf_no_stride << SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SHIFT)
+#define SQ_VTX_WORD2_SET_MEGA_FETCH(sq_vtx_word2_reg, mega_fetch) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_MEGA_FETCH_MASK) | (mega_fetch << SQ_VTX_WORD2_MEGA_FETCH_SHIFT)
+#define SQ_VTX_WORD2_SET_ALT_CONST(sq_vtx_word2_reg, alt_const) \
+     sq_vtx_word2_reg = (sq_vtx_word2_reg & ~SQ_VTX_WORD2_ALT_CONST_MASK) | (alt_const << SQ_VTX_WORD2_ALT_CONST_SHIFT)
+
+#if		defined(LITTLEENDIAN_CPU)
+
+     typedef struct _sq_vtx_word2_t {
+          unsigned int offset                         : SQ_VTX_WORD2_OFFSET_SIZE;
+          unsigned int endian_swap                    : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE;
+          unsigned int const_buf_no_stride            : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE;
+          unsigned int mega_fetch                     : SQ_VTX_WORD2_MEGA_FETCH_SIZE;
+          unsigned int alt_const                      : SQ_VTX_WORD2_ALT_CONST_SIZE;
+          unsigned int                                : 11;
+     } sq_vtx_word2_t;
+
+#elif		defined(BIGENDIAN_CPU)
+
+     typedef struct _sq_vtx_word2_t {
+          unsigned int                                : 11;
+          unsigned int alt_const                      : SQ_VTX_WORD2_ALT_CONST_SIZE;
+          unsigned int mega_fetch                     : SQ_VTX_WORD2_MEGA_FETCH_SIZE;
+          unsigned int const_buf_no_stride            : SQ_VTX_WORD2_CONST_BUF_NO_STRIDE_SIZE;
+          unsigned int endian_swap                    : SQ_VTX_WORD2_ENDIAN_SWAP_SIZE;
+          unsigned int offset                         : SQ_VTX_WORD2_OFFSET_SIZE;
+     } sq_vtx_word2_t;
+
+#endif
+
+typedef union {
+     unsigned int val : 32;
+     sq_vtx_word2_t f;
+} sq_vtx_word2_u;
+
+#endif /* _SQ_MICRO_REG_H */
+
+
diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile
new file mode 100644
index 0000000000..19df62742e
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/Makefile
@@ -0,0 +1,60 @@
+# src/mesa/drivers/dri/radeon/Makefile
+# Note, this Makefile requires GNU make
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += $(RADEON_CFLAGS)
+
+LIBNAME = radeon_dri.so
+
+ifeq ($(RADEON_LDFLAGS),)
+CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
+endif
+
+RADEON_COMMON_SOURCES = \
+	radeon_bo_legacy.c \
+	radeon_common_context.c \
+	radeon_common.c \
+	radeon_cs_legacy.c \
+	radeon_dma.c \
+	radeon_debug.c \
+	radeon_fbo.c \
+	radeon_lock.c \
+	radeon_mipmap_tree.c \
+	radeon_pixel_read.c \
+	radeon_queryobj.c \
+	radeon_span.c \
+	radeon_texture.c \
+	radeon_tex_copy.c \
+	radeon_tex_getimage.c \
+	radeon_tile.c
+
+DRIVER_SOURCES = \
+	radeon_context.c \
+	radeon_ioctl.c \
+	radeon_screen.c \
+	radeon_state.c \
+	radeon_state_init.c \
+	radeon_tex.c \
+	radeon_texstate.c \
+	radeon_tcl.c \
+	radeon_swtcl.c \
+	radeon_maos.c \
+	radeon_sanity.c \
+	radeon_blit.c \
+	$(RADEON_COMMON_SOURCES)
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES) \
+	$(CS_SOURCES)
+
+DRIVER_DEFINES = -DRADEON_R100
+
+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+
+X86_SOURCES = 
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.c b/src/mesa/drivers/dri/radeon/radeon_blit.c
new file mode 100644
index 0000000000..143822361e
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_blit.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "radeon_context.h"
+#include "radeon_blit.h"
+
+static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
+                                  int reg, int count)
+{
+    if (count)
+	    return CP_PACKET0(reg, count - 1);
+    return CP_PACKET2;
+}
+
+/* common formats supported as both textures and render targets */
+unsigned r100_check_blit(gl_format mesa_format)
+{
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+    case MESA_FORMAT_RGB565:
+    case MESA_FORMAT_ARGB4444:
+    case MESA_FORMAT_ARGB1555:
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+	    break;
+    default:
+	    return 0;
+    }
+
+    /* ??? */
+    if (_mesa_get_format_bits(mesa_format, GL_DEPTH_BITS) > 0)
+	    return 0;
+
+    return 1;
+}
+
+static inline void emit_vtx_state(struct r100_context *r100)
+{
+    BATCH_LOCALS(&r100->radeon);
+
+    BEGIN_BATCH(8);
+    if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+	    OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0);
+    } else {
+	    OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
+
+    }
+    OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+					   RADEON_TEX1_W_ROUTING_USE_W0));
+    OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0);
+    OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
+				      RADEON_BFACE_SOLID |
+				      RADEON_FFACE_SOLID |
+				      RADEON_VTX_PIX_CENTER_OGL |
+				      RADEON_ROUND_MODE_ROUND |
+				      RADEON_ROUND_PREC_4TH_PIX));
+    END_BATCH();
+}
+
+static void inline emit_tx_setup(struct r100_context *r100,
+				 gl_format mesa_format,
+				 struct radeon_bo *bo,
+				 intptr_t offset,
+				 unsigned width,
+				 unsigned height,
+				 unsigned pitch)
+{
+    uint32_t txformat = RADEON_TXFORMAT_NON_POWER2;
+    BATCH_LOCALS(&r100->radeon);
+
+    assert(width <= 2047);
+    assert(height <= 2047);
+    assert(offset % 32 == 0);
+
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+	    txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_RGBA8888:
+            txformat |= RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    case MESA_FORMAT_XRGB8888:
+	    txformat |= RADEON_TXFORMAT_ARGB8888;
+	    break;
+    case MESA_FORMAT_RGB565:
+	    txformat |= RADEON_TXFORMAT_RGB565;
+	    break;
+    case MESA_FORMAT_ARGB4444:
+	    txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_ARGB1555:
+	    txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_I8:
+	    txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+	    break;
+    case MESA_FORMAT_L8:
+            txformat |= RADEON_TXFORMAT_I8;
+            break;
+    case MESA_FORMAT_AL88:
+            txformat |= RADEON_TXFORMAT_AI88 | RADEON_TXFORMAT_ALPHA_IN_MAP;
+            break;
+    default:
+	    break;
+    }
+
+    BEGIN_BATCH(18);
+    OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
+    OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO |
+					    RADEON_COLOR_ARG_B_ZERO |
+					    RADEON_COLOR_ARG_C_T0_COLOR |
+					    RADEON_BLEND_CTL_ADD |
+					    RADEON_CLAMP_TX));
+    OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO |
+					    RADEON_ALPHA_ARG_B_ZERO |
+					    RADEON_ALPHA_ARG_C_T0_ALPHA |
+					    RADEON_BLEND_CTL_ADD |
+					    RADEON_CLAMP_TX));
+    OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST |
+					    RADEON_CLAMP_T_CLAMP_LAST |
+					    RADEON_MAG_FILTER_NEAREST |
+					    RADEON_MIN_FILTER_NEAREST));
+    OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat);
+    OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) |
+					    ((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
+    OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch * _mesa_get_format_bytes(mesa_format) - 32);
+
+    OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1);
+    OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+
+    END_BATCH();
+}
+
+static inline void emit_cb_setup(struct r100_context *r100,
+				 struct radeon_bo *bo,
+				 intptr_t offset,
+				 gl_format mesa_format,
+				 unsigned pitch,
+				 unsigned width,
+				 unsigned height)
+{
+    uint32_t dst_pitch = pitch;
+    uint32_t dst_format = 0;
+    BATCH_LOCALS(&r100->radeon);
+
+    /* XXX others?  BE/LE? */
+    switch (mesa_format) {
+    case MESA_FORMAT_ARGB8888:
+    case MESA_FORMAT_XRGB8888:
+	    dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+	    break;
+    case MESA_FORMAT_RGB565:
+	    dst_format = RADEON_COLOR_FORMAT_RGB565;
+	    break;
+    case MESA_FORMAT_ARGB4444:
+	    dst_format = RADEON_COLOR_FORMAT_ARGB4444;
+	    break;
+    case MESA_FORMAT_ARGB1555:
+	    dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+	    break;
+    case MESA_FORMAT_A8:
+    case MESA_FORMAT_L8:
+    case MESA_FORMAT_I8:
+	    dst_format = RADEON_COLOR_FORMAT_RGB8;
+	    break;
+    default:
+	    break;
+    }
+
+    BEGIN_BATCH_NO_AUTOSTATE(18);
+    OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0);
+    OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) |
+					      (height << RADEON_RE_HEIGHT_SHIFT)));
+    OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff);
+    OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
+    OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format);
+
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1);
+    OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+    OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1);
+    OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+
+    END_BATCH();
+}
+
+static GLboolean validate_buffers(struct r100_context *r100,
+                                  struct radeon_bo *src_bo,
+                                  struct radeon_bo *dst_bo)
+{
+    int ret;
+
+    radeon_cs_space_reset_bos(r100->radeon.cmdbuf.cs);
+
+    ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs,
+                                        src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+    if (ret)
+        return GL_FALSE;
+
+    ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs,
+                                        dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
+    if (ret)
+        return GL_FALSE;
+
+    return GL_TRUE;
+}
+
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static inline void calc_tex_coords(float img_width, float img_height,
+				   float x, float y,
+				   float reg_width, float reg_height,
+				   unsigned flip_y, float *buf)
+{
+    buf[0] = x / img_width;
+    buf[1] = buf[0] + reg_width / img_width;
+    buf[2] = y / img_height;
+    buf[3] = buf[2] + reg_height / img_height;
+    if (flip_y)
+    {
+        buf[2] = 1.0 - buf[2];
+        buf[3] = 1.0 - buf[3];
+    }
+}
+
+static inline void emit_draw_packet(struct r100_context *r100,
+				    unsigned src_width, unsigned src_height,
+				    unsigned src_x_offset, unsigned src_y_offset,
+				    unsigned dst_x_offset, unsigned dst_y_offset,
+				    unsigned reg_width, unsigned reg_height,
+				    unsigned flip_y)
+{
+    float texcoords[4];
+    float verts[12];
+    BATCH_LOCALS(&r100->radeon);
+
+    calc_tex_coords(src_width, src_height,
+                    src_x_offset, src_y_offset,
+                    reg_width, reg_height,
+                    flip_y, texcoords);
+
+    verts[0] = dst_x_offset;
+    verts[1] = dst_y_offset + reg_height;
+    verts[2] = texcoords[0];
+    verts[3] = texcoords[3];
+
+    verts[4] = dst_x_offset + reg_width;
+    verts[5] = dst_y_offset + reg_height;
+    verts[6] = texcoords[1];
+    verts[7] = texcoords[3];
+
+    verts[8] = dst_x_offset + reg_width;
+    verts[9] = dst_y_offset;
+    verts[10] = texcoords[1];
+    verts[11] = texcoords[2];
+
+    BEGIN_BATCH(15);
+    OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16));
+    OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0);
+    OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+	      RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
+	      RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	      RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+              (3 << 16));
+    OUT_BATCH_TABLE(verts, 12);
+    END_BATCH();
+}
+
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r100 r100 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r100_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned reg_width,
+                   unsigned reg_height,
+                   unsigned flip_y)
+{
+    struct r100_context *r100 = R100_CONTEXT(ctx);
+
+    if (!r100_check_blit(dst_mesaformat))
+        return GL_FALSE;
+
+    /* Make sure that colorbuffer has even width - hw limitation */
+    if (dst_pitch % 2 > 0)
+        ++dst_pitch;
+
+    /* Rendering to small buffer doesn't work.
+     * Looks like a hw limitation.
+     */
+    if (dst_pitch < 32)
+        return GL_FALSE;
+
+    /* Need to clamp the region size to make sure
+     * we don't read outside of the source buffer
+     * or write outside of the destination buffer.
+     */
+    if (reg_width + src_x_offset > src_width)
+        reg_width = src_width - src_x_offset;
+    if (reg_height + src_y_offset > src_height)
+        reg_height = src_height - src_y_offset;
+    if (reg_width + dst_x_offset > dst_width)
+        reg_width = dst_width - dst_x_offset;
+    if (reg_height + dst_y_offset > dst_height)
+        reg_height = dst_height - dst_y_offset;
+
+    if (src_bo == dst_bo) {
+        return GL_FALSE;
+    }
+
+    if (src_offset % 32 || dst_offset % 32) {
+        return GL_FALSE;
+    }
+
+    if (0) {
+        fprintf(stderr, "src: size [%d x %d], pitch %d, "
+                "offset [%d x %d], format %s, bo %p\n",
+                src_width, src_height, src_pitch,
+                src_x_offset, src_y_offset,
+                _mesa_get_format_name(src_mesaformat),
+                src_bo);
+        fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+                dst_pitch, dst_x_offset, dst_y_offset,
+                _mesa_get_format_name(dst_mesaformat), dst_bo);
+        fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+    }
+
+    /* Flush is needed to make sure that source buffer has correct data */
+    radeonFlush(ctx);
+
+    rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__);
+
+    if (!validate_buffers(r100, src_bo, dst_bo))
+        return GL_FALSE;
+
+    /* 8 */
+    emit_vtx_state(r100);
+    /* 18 */
+    emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+    /* 18 */
+    emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+    /* 15 */
+    emit_draw_packet(r100, src_width, src_height,
+                     src_x_offset, src_y_offset,
+                     dst_x_offset, dst_y_offset,
+                     reg_width, reg_height,
+                     flip_y);
+
+    radeonFlush(ctx);
+
+    return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_blit.h b/src/mesa/drivers/dri/radeon/radeon_blit.h
new file mode 100644
index 0000000000..d7d0b5554a
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_blit.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2010 Advanced Micro Devices, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_BLIT_H
+#define RADEON_BLIT_H
+
+void r100_blit_init(struct r100_context *r100);
+
+unsigned r100_check_blit(gl_format mesa_format);
+
+unsigned r100_blit(GLcontext *ctx,
+                   struct radeon_bo *src_bo,
+                   intptr_t src_offset,
+                   gl_format src_mesaformat,
+                   unsigned src_pitch,
+                   unsigned src_width,
+                   unsigned src_height,
+                   unsigned src_x_offset,
+                   unsigned src_y_offset,
+                   struct radeon_bo *dst_bo,
+                   intptr_t dst_offset,
+                   gl_format dst_mesaformat,
+                   unsigned dst_pitch,
+                   unsigned dst_width,
+                   unsigned dst_height,
+                   unsigned dst_x_offset,
+                   unsigned dst_y_offset,
+                   unsigned width,
+                   unsigned height,
+                   unsigned flip_y);
+
+#endif // RADEON_BLIT_H
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo.c b/src/mesa/drivers/dri/radeon/radeon_bo.c
new file mode 100644
index 0000000000..393d156cde
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo.c
@@ -0,0 +1,110 @@
+#include <radeon_bocs_wrapper.h>
+#include <radeon_bo_int_drm.h>
+
+void radeon_bo_debug(struct radeon_bo *bo,
+		     const char *op)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+
+    fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X\n",
+            op, bo, bo->handle, boi->size, boi->cref);
+}
+
+struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom,
+				 uint32_t handle,
+				 uint32_t size,
+				 uint32_t alignment,
+				 uint32_t domains,
+				 uint32_t flags)
+{
+    struct radeon_bo *bo;
+    bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags);
+    return bo;
+}
+
+void radeon_bo_ref(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    boi->cref++;
+    boi->bom->funcs->bo_ref(boi);
+}
+
+struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    boi->cref--;
+    return boi->bom->funcs->bo_unref(boi);
+}
+
+int radeon_bo_map(struct radeon_bo *bo, int write)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_map(boi, write);
+}
+
+int radeon_bo_unmap(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_unmap(boi);
+}
+
+int radeon_bo_wait(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    if (!boi->bom->funcs->bo_wait)
+	return 0;
+    return boi->bom->funcs->bo_wait(boi);
+}
+
+int radeon_bo_is_busy(struct radeon_bo *bo,
+		      uint32_t *domain)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_is_busy(boi, domain);
+}
+
+int radeon_bo_set_tiling(struct radeon_bo *bo,
+			 uint32_t tiling_flags, uint32_t pitch)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_set_tiling(boi, tiling_flags, pitch);
+}
+
+int radeon_bo_get_tiling(struct radeon_bo *bo,
+			  uint32_t *tiling_flags, uint32_t *pitch)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->bom->funcs->bo_get_tiling(boi, tiling_flags, pitch);
+}
+
+int radeon_bo_is_static(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    if (boi->bom->funcs->bo_is_static)
+	return boi->bom->funcs->bo_is_static(boi);
+    return 0;
+}
+
+int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo,
+				  struct radeon_cs *cs)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    return boi->cref > 1;
+}
+
+uint32_t radeon_bo_get_handle(struct radeon_bo *bo)
+{
+    return bo->handle;
+}
+
+uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    uint32_t src_domain;
+
+    src_domain = boi->space_accounted & 0xffff;
+    if (!src_domain)
+	src_domain = boi->space_accounted >> 16;
+
+    return src_domain;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
new file mode 100644
index 0000000000..beb2369880
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h
@@ -0,0 +1,75 @@
+/* 
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_BO_H
+#define RADEON_BO_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+/* bo object */
+#define RADEON_BO_FLAGS_MACRO_TILE  1
+#define RADEON_BO_FLAGS_MICRO_TILE  2
+
+struct radeon_bo_manager;
+struct radeon_cs;
+
+struct radeon_bo {
+    void                        *ptr;
+    uint32_t                    flags;
+    uint32_t                    handle;
+    uint32_t                    size;
+};
+
+struct radeon_bo_manager;
+
+void radeon_bo_debug(struct radeon_bo *bo,
+		     const char *op);
+
+struct radeon_bo *radeon_bo_open(struct radeon_bo_manager *bom,
+				  uint32_t handle,
+				  uint32_t size,
+				  uint32_t alignment,
+				  uint32_t domains,
+				  uint32_t flags);
+
+void radeon_bo_ref(struct radeon_bo *bo);
+struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo);
+int radeon_bo_map(struct radeon_bo *bo, int write);
+int radeon_bo_unmap(struct radeon_bo *bo);
+int radeon_bo_wait(struct radeon_bo *bo);
+int radeon_bo_is_busy(struct radeon_bo *bo, uint32_t *domain);
+int radeon_bo_set_tiling(struct radeon_bo *bo, uint32_t tiling_flags, uint32_t pitch);
+int radeon_bo_get_tiling(struct radeon_bo *bo, uint32_t *tiling_flags, uint32_t *pitch);
+int radeon_bo_is_static(struct radeon_bo *bo);
+int radeon_bo_is_referenced_by_cs(struct radeon_bo *bo,
+				  struct radeon_cs *cs);
+uint32_t radeon_bo_get_handle(struct radeon_bo *bo);
+uint32_t radeon_bo_get_src_domain(struct radeon_bo *bo);
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_int_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_int_drm.h
new file mode 100644
index 0000000000..190c332475
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo_int_drm.h
@@ -0,0 +1,45 @@
+#ifndef RADEON_BO_INT
+#define RADEON_BO_INT
+
+struct radeon_bo_manager {
+    struct radeon_bo_funcs  *funcs;
+    int                     fd;
+};
+
+struct radeon_bo_int {
+    void                        *ptr;
+    uint32_t                    flags;
+    uint32_t                    handle;
+    uint32_t                    size;
+    /* private members */
+    uint32_t                    alignment;
+    uint32_t                    domains;
+    unsigned                    cref;
+    struct radeon_bo_manager    *bom;
+    uint32_t                    space_accounted;
+    uint32_t                    referenced_in_cs;
+};
+
+/* bo functions */
+struct radeon_bo_funcs {
+    struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom,
+                                 uint32_t handle,
+                                 uint32_t size,
+                                 uint32_t alignment,
+                                 uint32_t domains,
+                                 uint32_t flags);
+    void (*bo_ref)(struct radeon_bo_int *bo);
+    struct radeon_bo *(*bo_unref)(struct radeon_bo_int *bo);
+    int (*bo_map)(struct radeon_bo_int *bo, int write);
+    int (*bo_unmap)(struct radeon_bo_int *bo);
+    int (*bo_wait)(struct radeon_bo_int *bo);
+    int (*bo_is_static)(struct radeon_bo_int *bo);
+    int (*bo_set_tiling)(struct radeon_bo_int *bo, uint32_t tiling_flags,
+			  uint32_t pitch);
+    int (*bo_get_tiling)(struct radeon_bo_int *bo, uint32_t *tiling_flags,
+			  uint32_t *pitch);
+    int (*bo_is_busy)(struct radeon_bo_int *bo, uint32_t *domain);
+    int (*bo_is_referenced_by_cs)(struct radeon_bo_int *bo, struct radeon_cs *cs);
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
new file mode 100644
index 0000000000..78f73bf99c
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c
@@ -0,0 +1,938 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Dave Airlie
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Dave Airlie
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include "xf86drm.h"
+#include "texmem.h"
+#include "main/simple_list.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_common.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_macros.h"
+
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_bo_int.h"
+#else
+#include "radeon_bo_int_drm.h"
+#endif
+
+/* no seriously texmem.c is this screwed up */
+struct bo_legacy_texture_object {
+    driTextureObject    base;
+    struct bo_legacy *parent;
+};
+
+struct bo_legacy {
+    struct radeon_bo_int    base;
+    int                 map_count;
+    uint32_t            pending;
+    int                 is_pending;
+    int                 static_bo;
+    uint32_t            offset;
+    struct bo_legacy_texture_object *tobj;
+    int                 validated;
+    int                 dirty;
+    void                *ptr;
+    struct bo_legacy    *next, *prev;
+    struct bo_legacy    *pnext, *pprev;
+};
+
+struct bo_manager_legacy {
+    struct radeon_bo_manager    base;
+    unsigned                    nhandle;
+    unsigned                    nfree_handles;
+    unsigned                    cfree_handles;
+    uint32_t                    current_age;
+    struct bo_legacy            bos;
+    struct bo_legacy            pending_bos;
+    uint32_t                    fb_location;
+    uint32_t                    texture_offset;
+    unsigned                    dma_alloc_size;
+    uint32_t                    dma_buf_count;
+    unsigned                    cpendings;
+    driTextureObject            texture_swapped;
+    driTexHeap                  *texture_heap;
+    struct radeon_screen        *screen;
+    unsigned                    *free_handles;
+};
+
+static void bo_legacy_tobj_destroy(void *data, driTextureObject *t)
+{
+    struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t;
+    
+    if (tobj->parent) {
+        tobj->parent->tobj = NULL;
+        tobj->parent->validated = 0;
+    }
+}
+
+static void inline clean_handles(struct bo_manager_legacy *bom)
+{
+  while (bom->cfree_handles > 0 &&
+	 !bom->free_handles[bom->cfree_handles - 1])
+    bom->cfree_handles--;
+
+}
+static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle)
+{
+    uint32_t tmp;
+
+    *handle = 0;
+    if (bom->nhandle == 0xFFFFFFFF) {
+        return -EINVAL;
+    }
+    if (bom->cfree_handles > 0) {
+        tmp = bom->free_handles[--bom->cfree_handles];
+	clean_handles(bom);
+    } else {
+        bom->cfree_handles = 0;
+        tmp = bom->nhandle++;
+    }
+    assert(tmp);
+    *handle = tmp;
+    return 0;
+}
+
+static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle)
+{
+    uint32_t *handles;
+
+    if (!handle) {
+        return 0;
+    }
+    if (handle == (bom->nhandle - 1)) {
+        int i;
+
+        bom->nhandle--;
+        for (i = bom->cfree_handles - 1; i >= 0; i--) {
+            if (bom->free_handles[i] == (bom->nhandle - 1)) {
+                bom->nhandle--;
+                bom->free_handles[i] = 0;
+            }
+        }
+        clean_handles(bom);
+        return 0;
+    }
+    if (bom->cfree_handles < bom->nfree_handles) {
+        bom->free_handles[bom->cfree_handles++] = handle;
+        return 0;
+    }
+    bom->nfree_handles += 0x100;
+    handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4);
+    if (handles == NULL) {
+        bom->nfree_handles -= 0x100;
+        return -ENOMEM;
+    }
+    bom->free_handles = handles;
+    bom->free_handles[bom->cfree_handles++] = handle;
+    return 0;
+}
+
+static void legacy_get_current_age(struct bo_manager_legacy *boml)
+{
+    drm_radeon_getparam_t gp;
+    unsigned char *RADEONMMIO = NULL;
+    int r;
+
+    if (   IS_R300_CLASS(boml->screen) 
+        || IS_R600_CLASS(boml->screen) ) 
+    {
+    	gp.param = RADEON_PARAM_LAST_CLEAR;
+    	gp.value = (int *)&boml->current_age;
+    	r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM,
+       	                     &gp, sizeof(gp));
+    	if (r) {
+       	 fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r);
+         exit(1);
+       }
+    } 
+    else {
+        RADEONMMIO = boml->screen->mmio.map;
+        boml->current_age = boml->screen->scratch[3];
+        boml->current_age = INREG(RADEON_GUI_SCRATCH_REG3);
+    }
+}
+
+static int legacy_is_pending(struct radeon_bo_int *boi)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)boi;
+
+    if (bo_legacy->is_pending <= 0) {
+        bo_legacy->is_pending = 0;
+        return 0;
+    }
+    if (boml->current_age >= bo_legacy->pending) {
+        if (boml->pending_bos.pprev == bo_legacy) {
+            boml->pending_bos.pprev = bo_legacy->pprev;
+        }
+        bo_legacy->pprev->pnext = bo_legacy->pnext;
+        if (bo_legacy->pnext) {
+            bo_legacy->pnext->pprev = bo_legacy->pprev;
+        }
+	assert(bo_legacy->is_pending <= boi->cref);
+        while (bo_legacy->is_pending--) {
+	    boi = (struct radeon_bo_int *)radeon_bo_unref((struct radeon_bo *)boi);
+	    if (!boi)
+	      break;
+        }
+	if (boi)
+	  bo_legacy->is_pending = 0;
+        boml->cpendings--;
+        return 0;
+    }
+    return 1;
+}
+
+static int legacy_wait_pending(struct radeon_bo_int *bo)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (!bo_legacy->is_pending) {
+        return 0;
+    }
+    /* FIXME: lockup and userspace busy looping that's all the folks */
+    legacy_get_current_age(boml);
+    while (legacy_is_pending(bo)) {
+        usleep(10);
+        legacy_get_current_age(boml);
+    }
+    return 0;
+}
+
+void legacy_track_pending(struct radeon_bo_manager *bom, int debug)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy*) bom;
+    struct bo_legacy *bo_legacy;
+    struct bo_legacy *next;
+
+    legacy_get_current_age(boml);
+    bo_legacy = boml->pending_bos.pnext;
+    while (bo_legacy) {
+        if (debug)
+            fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size,
+                    boml->current_age, bo_legacy->pending);
+        next = bo_legacy->pnext;
+        if (legacy_is_pending(&(bo_legacy->base))) {
+        }
+        bo_legacy = next;
+    } 
+}
+
+static int legacy_wait_any_pending(struct bo_manager_legacy *boml)
+{
+    struct bo_legacy *bo_legacy;
+
+    legacy_get_current_age(boml);
+    bo_legacy = boml->pending_bos.pnext;
+    if (!bo_legacy)
+      return -1;
+    legacy_wait_pending(&bo_legacy->base);
+    return 0;
+}
+
+static void legacy_kick_all_buffers(struct bo_manager_legacy *boml)
+{
+    struct bo_legacy *legacy;
+
+    legacy = boml->bos.next;
+    while (legacy != &boml->bos) {
+	if (legacy->tobj) {
+	    if (legacy->validated) {
+		driDestroyTextureObject(&legacy->tobj->base);
+		legacy->tobj = 0;
+		legacy->validated = 0;
+	    }
+	}
+	legacy = legacy->next;
+    }
+}
+
+static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml,
+                                     uint32_t size,
+                                     uint32_t alignment,
+                                     uint32_t domains,
+                                     uint32_t flags)
+{
+    struct bo_legacy *bo_legacy;
+    static int pgsize;
+
+    if (pgsize == 0)
+        pgsize = getpagesize() - 1;
+
+    size = (size + pgsize) & ~pgsize;
+
+    bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy));
+    if (bo_legacy == NULL) {
+        return NULL;
+    }
+    bo_legacy->base.bom = (struct radeon_bo_manager*)boml;
+    bo_legacy->base.handle = 0;
+    bo_legacy->base.size = size;
+    bo_legacy->base.alignment = alignment;
+    bo_legacy->base.domains = domains;
+    bo_legacy->base.flags = flags;
+    bo_legacy->base.ptr = NULL;
+    bo_legacy->map_count = 0;
+    bo_legacy->next = NULL;
+    bo_legacy->prev = NULL;
+    bo_legacy->pnext = NULL;
+    bo_legacy->pprev = NULL;
+    bo_legacy->next = boml->bos.next;
+    bo_legacy->prev = &boml->bos;
+    boml->bos.next = bo_legacy;
+    if (bo_legacy->next) {
+        bo_legacy->next->prev = bo_legacy;
+    }
+
+    return bo_legacy;
+}
+
+static int bo_dma_alloc(struct radeon_bo_int *bo)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    drm_radeon_mem_alloc_t alloc;
+    unsigned size;
+    int base_offset;
+    int r;
+
+    /* align size on 4Kb */
+    size = (((4 * 1024) - 1) + bo_legacy->base.size) & ~((4 * 1024) - 1);
+    alloc.region = RADEON_MEM_REGION_GART;
+    alloc.alignment = bo_legacy->base.alignment;
+    alloc.size = size;
+    alloc.region_offset = &base_offset;
+    r = drmCommandWriteRead(bo->bom->fd,
+                            DRM_RADEON_ALLOC,
+                            &alloc,
+                            sizeof(alloc));
+    if (r) {
+        /* ptr is set to NULL if dma allocation failed */
+        bo_legacy->ptr = NULL;
+        return r;
+    }
+    bo_legacy->ptr = boml->screen->gartTextures.map + base_offset;
+    bo_legacy->offset = boml->screen->gart_texture_offset + base_offset;
+    bo->size = size;
+    boml->dma_alloc_size += size;
+    boml->dma_buf_count++;
+    return 0;
+}
+
+static int bo_dma_free(struct radeon_bo_int *bo)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    drm_radeon_mem_free_t memfree;
+    int r;
+
+    if (bo_legacy->ptr == NULL) {
+        /* ptr is set to NULL if dma allocation failed */
+        return 0;
+    }
+    legacy_get_current_age(boml);
+    memfree.region = RADEON_MEM_REGION_GART;
+    memfree.region_offset  = bo_legacy->offset;
+    memfree.region_offset -= boml->screen->gart_texture_offset;
+    r = drmCommandWrite(boml->base.fd,
+                        DRM_RADEON_FREE,
+                        &memfree,
+                        sizeof(memfree));
+    if (r) {
+        fprintf(stderr, "Failed to free bo[%p] at %08x\n",
+                &bo_legacy->base, memfree.region_offset);
+        fprintf(stderr, "ret = %s\n", strerror(-r));
+        return r;
+    }
+    boml->dma_alloc_size -= bo_legacy->base.size;
+    boml->dma_buf_count--;
+    return 0;
+}
+
+static void bo_free(struct bo_legacy *bo_legacy)
+{
+    struct bo_manager_legacy *boml;
+
+    if (bo_legacy == NULL) {
+        return;
+    }
+    boml = (struct bo_manager_legacy *)bo_legacy->base.bom;
+    bo_legacy->prev->next = bo_legacy->next;
+    if (bo_legacy->next) {
+        bo_legacy->next->prev = bo_legacy->prev;
+    }
+    if (!bo_legacy->static_bo) {
+        legacy_free_handle(boml, bo_legacy->base.handle);
+        if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) {
+            /* dma buffers */
+            bo_dma_free(&bo_legacy->base);
+        } else {
+  	    driDestroyTextureObject(&bo_legacy->tobj->base);
+	    bo_legacy->tobj = NULL;
+            /* free backing store */
+            free(bo_legacy->ptr);
+        }
+    }
+    memset(bo_legacy, 0 , sizeof(struct bo_legacy));
+    free(bo_legacy);
+}
+
+static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
+                                 uint32_t handle,
+                                 uint32_t size,
+                                 uint32_t alignment,
+                                 uint32_t domains,
+                                 uint32_t flags)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    struct bo_legacy *bo_legacy;
+    int r;
+
+    if (handle) {
+        bo_legacy = boml->bos.next;
+        while (bo_legacy) {
+            if (bo_legacy->base.handle == handle) {
+                radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base));
+                return (struct radeon_bo*)bo_legacy;
+            }
+            bo_legacy = bo_legacy->next;
+        }
+        return NULL;
+    }
+    bo_legacy = bo_allocate(boml, size, alignment, domains, flags);
+    bo_legacy->static_bo = 0;
+    r = legacy_new_handle(boml, &bo_legacy->base.handle);
+    if (r) {
+        bo_free(bo_legacy);
+        return NULL;
+    }
+    if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) 
+    {
+retry:
+        legacy_track_pending(&boml->base, 0);
+        /* dma buffers */
+
+        r = bo_dma_alloc(&(bo_legacy->base));
+        if (r) 
+        {
+	         if (legacy_wait_any_pending(boml) == -1) 
+             {
+                  bo_free(bo_legacy);
+	              return NULL;
+             }
+	         goto retry;
+	         return NULL;
+        }
+    } 
+    else 
+    {
+        bo_legacy->ptr = malloc(bo_legacy->base.size);
+        if (bo_legacy->ptr == NULL) {
+            bo_free(bo_legacy);
+            return NULL;
+        }
+    }
+    radeon_bo_ref((struct radeon_bo *)&(bo_legacy->base));
+
+    return (struct radeon_bo*)bo_legacy;
+}
+
+static void bo_ref(struct radeon_bo_int *bo)
+{
+}
+
+static struct radeon_bo *bo_unref(struct radeon_bo_int *boi)
+{
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)boi;
+
+    if (boi->cref <= 0) {
+        bo_legacy->prev->next = bo_legacy->next;
+        if (bo_legacy->next) {
+            bo_legacy->next->prev = bo_legacy->prev;
+        }
+        if (!bo_legacy->is_pending) {
+            bo_free(bo_legacy);
+        }
+        return NULL;
+    }
+    return (struct radeon_bo *)boi;
+}
+
+static int bo_map(struct radeon_bo_int *bo, int write)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    legacy_wait_pending(bo);
+    bo_legacy->validated = 0;
+    bo_legacy->dirty = 1;
+    bo_legacy->map_count++;
+    bo->ptr = bo_legacy->ptr;
+    /* Read the first pixel in the frame buffer.  This should
+     * be a noop, right?  In fact without this conform fails as reading
+     * from the framebuffer sometimes produces old results -- the
+     * on-card read cache gets mixed up and doesn't notice that the
+     * framebuffer has been updated.
+     *
+     * Note that we should probably be reading some otherwise unused
+     * region of VRAM, otherwise we might get incorrect results when
+     * reading pixels from the top left of the screen.
+     *
+     * I found this problem on an R420 with glean's texCube test.
+     * Note that the R200 span code also *writes* the first pixel in the
+     * framebuffer, but I've found this to be unnecessary.
+     *  -- Nicolai Hähnle, June 2008
+     */
+    if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) {
+        int p;
+        volatile int *buf = (int*)boml->screen->driScreen->pFB;
+        p = *buf;
+    }
+
+    return 0;
+}
+
+static int bo_unmap(struct radeon_bo_int *bo)
+{
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (--bo_legacy->map_count > 0) 
+    {
+        return 0;
+    }
+    
+    bo->ptr = NULL;
+
+    return 0;
+}
+
+static int bo_is_busy(struct radeon_bo_int *bo, uint32_t *domain)
+{
+    *domain = 0;
+    if (bo->domains & RADEON_GEM_DOMAIN_GTT)
+        *domain = RADEON_GEM_DOMAIN_GTT;
+    else
+        *domain = RADEON_GEM_DOMAIN_CPU;
+    if (legacy_is_pending(bo))
+        return -EBUSY;
+    else
+        return 0;
+}
+
+static int bo_is_static(struct radeon_bo_int *bo)
+{
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    return bo_legacy->static_bo;
+}
+
+static struct radeon_bo_funcs bo_legacy_funcs = {
+    bo_open,
+    bo_ref,
+    bo_unref,
+    bo_map,
+    bo_unmap,
+    NULL,
+    bo_is_static,
+    NULL,
+    NULL,
+    bo_is_busy
+};
+
+static int bo_vram_validate(struct radeon_bo_int *bo,
+                            uint32_t *soffset,
+                            uint32_t *eoffset)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    int r;
+    int retry_count = 0, pending_retry = 0;
+    
+    if (!bo_legacy->tobj) {
+	bo_legacy->tobj = CALLOC(sizeof(struct bo_legacy_texture_object));
+	bo_legacy->tobj->parent = bo_legacy;
+	make_empty_list(&bo_legacy->tobj->base);
+	bo_legacy->tobj->base.totalSize = bo->size;
+    retry:
+        r = driAllocateTexture(&boml->texture_heap, 1,
+                               &bo_legacy->tobj->base);
+        if (r) {
+		pending_retry = 0;
+		while(boml->cpendings && pending_retry++ < 10000) {
+			legacy_track_pending(&boml->base, 0);
+			retry_count++;
+			if (retry_count > 2) {
+				free(bo_legacy->tobj);
+				bo_legacy->tobj = NULL;
+				fprintf(stderr, "Ouch! vram_validate failed %d\n", r);
+				return -1;
+			}
+			goto retry;
+		}
+	}
+        bo_legacy->offset = boml->texture_offset +
+                            bo_legacy->tobj->base.memBlock->ofs;
+        bo_legacy->dirty = 1;
+    }
+
+    assert(bo_legacy->tobj->base.memBlock);
+
+    driUpdateTextureLRU(&bo_legacy->tobj->base);
+
+    if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) {
+	    if (IS_R600_CLASS(boml->screen)) {
+		    drm_radeon_texture_t tex;
+		    drm_radeon_tex_image_t tmp;
+		    int ret;
+
+		    tex.offset = bo_legacy->offset;
+		    tex.image = &tmp;
+		    assert(!(tex.offset & 1023));
+
+		    tmp.x = 0;
+		    tmp.y = 0;
+		    tmp.width = bo->size;
+		    tmp.height = 1;
+		    tmp.data = bo_legacy->ptr;
+		    tex.format = RADEON_TXFORMAT_ARGB8888;
+		    tex.width = tmp.width;
+		    tex.height = tmp.height;
+		    tex.pitch = bo->size;
+		    do {
+			    ret = drmCommandWriteRead(bo->bom->fd,
+						      DRM_RADEON_TEXTURE,
+						      &tex,
+						      sizeof(drm_radeon_texture_t));
+			    if (ret) {
+				    if (RADEON_DEBUG & RADEON_IOCTL)
+					    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
+				    usleep(1);
+			    }
+		    } while (ret == -EAGAIN);
+	    } else {
+		    /* Copy to VRAM using a blit.
+		     * All memory is 4K aligned. We're using 1024 pixels wide blits.
+		     */
+		    drm_radeon_texture_t tex;
+		    drm_radeon_tex_image_t tmp;
+		    int ret;
+
+		    tex.offset = bo_legacy->offset;
+		    tex.image = &tmp;
+		    assert(!(tex.offset & 1023));
+
+		    tmp.x = 0;
+		    tmp.y = 0;
+		    if (bo->size < 4096) {
+			    tmp.width = (bo->size + 3) / 4;
+			    tmp.height = 1;
+		    } else {
+			    tmp.width = 1024;
+			    tmp.height = (bo->size + 4095) / 4096;
+		    }
+		    tmp.data = bo_legacy->ptr;
+		    tex.format = RADEON_TXFORMAT_ARGB8888;
+		    tex.width = tmp.width;
+		    tex.height = tmp.height;
+		    tex.pitch = MAX2(tmp.width / 16, 1);
+		    do {
+			    ret = drmCommandWriteRead(bo->bom->fd,
+						      DRM_RADEON_TEXTURE,
+						      &tex,
+						      sizeof(drm_radeon_texture_t));
+			    if (ret) {
+				    if (RADEON_DEBUG & RADEON_IOCTL)
+					    fprintf(stderr, "DRM_RADEON_TEXTURE:  again!\n");
+				    usleep(1);
+			    }
+		    } while (ret == -EAGAIN);
+	    }
+	    bo_legacy->dirty = 0;
+	    bo_legacy->tobj->base.dirty_images[0] = 0;
+    }
+    return 0;
+}
+
+/* 
+ *  radeon_bo_legacy_validate -
+ *  returns:
+ *  0 - all good
+ *  -EINVAL - mapped buffer can't be validated
+ *  -EAGAIN - restart validation we've kicked all the buffers out
+ */
+int radeon_bo_legacy_validate(struct radeon_bo *bo,
+                              uint32_t *soffset,
+                              uint32_t *eoffset)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+    int r;
+    int retries = 0;
+
+    if (bo_legacy->map_count) {
+        fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n",
+                bo, boi->size, bo_legacy->map_count);
+        return -EINVAL;
+    }
+    if(boi->size == 0) {
+        fprintf(stderr, "bo(%p) has size 0.\n", bo);
+        return -EINVAL;
+    }
+    if (bo_legacy->static_bo || bo_legacy->validated) {
+        *soffset = bo_legacy->offset;
+        *eoffset = bo_legacy->offset + boi->size;
+
+        return 0;
+    }
+    if (!(boi->domains & RADEON_GEM_DOMAIN_GTT)) {
+
+        r = bo_vram_validate(boi, soffset, eoffset);
+        if (r) {
+	    legacy_track_pending(&boml->base, 0);
+	    legacy_kick_all_buffers(boml);
+	    retries++;
+	    if (retries == 2) {
+		fprintf(stderr,"legacy bo: failed to get relocations into aperture\n");
+		assert(0);
+		exit(-1);
+	    }
+	    return -EAGAIN;
+        }
+    }
+    *soffset = bo_legacy->offset;
+    *eoffset = bo_legacy->offset + boi->size;
+    bo_legacy->validated = 1;
+
+    return 0;
+}
+
+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)boi->bom;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    bo_legacy->pending = pending;
+    bo_legacy->is_pending++;
+    /* add to pending list */
+    radeon_bo_ref(bo);
+    if (bo_legacy->is_pending > 1) {
+        return;    
+    }
+    bo_legacy->pprev = boml->pending_bos.pprev;
+    bo_legacy->pnext = NULL;
+    bo_legacy->pprev->pnext = bo_legacy;
+    boml->pending_bos.pprev = bo_legacy;
+    boml->cpendings++;
+}
+
+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    struct bo_legacy *bo_legacy;
+
+    if (bom == NULL) {
+        return;
+    }
+    bo_legacy = boml->bos.next;
+    while (bo_legacy) {
+        struct bo_legacy *next;
+
+        next = bo_legacy->next;
+        bo_free(bo_legacy);
+        bo_legacy = next;
+    }
+    driDestroyTextureHeap(boml->texture_heap);
+    free(boml->free_handles);
+    free(boml);
+}
+
+static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom,
+						       int size,
+						       uint32_t offset)
+{
+    struct bo_legacy *bo;
+
+    bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+    if (bo == NULL)
+	return NULL;
+    bo->static_bo = 1;
+    bo->offset = offset + bom->fb_location;
+    bo->base.handle = bo->offset;
+    bo->ptr = bom->screen->driScreen->pFB + offset;
+    if (bo->base.handle > bom->nhandle) {
+        bom->nhandle = bo->base.handle + 1;
+    }
+    radeon_bo_ref((struct radeon_bo *)&(bo->base));
+    return bo;
+}
+
+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn)
+{
+    struct bo_manager_legacy *bom;
+    struct bo_legacy *bo;
+    unsigned size;
+
+    bom = (struct bo_manager_legacy*)
+          calloc(1, sizeof(struct bo_manager_legacy));
+    if (bom == NULL) {
+        return NULL;
+    }
+
+    make_empty_list(&bom->texture_swapped);
+
+    bom->texture_heap = driCreateTextureHeap(0,
+                                             bom,
+                                             scrn->texSize[0],
+                                             12,
+                                             RADEON_NR_TEX_REGIONS,
+                                             (drmTextureRegionPtr)scrn->sarea->tex_list[0],
+                                             &scrn->sarea->tex_age[0],
+                                             &bom->texture_swapped,
+                                             sizeof(struct bo_legacy_texture_object),
+                                             &bo_legacy_tobj_destroy);
+    bom->texture_offset = scrn->texOffset[0];
+
+    bom->base.funcs = &bo_legacy_funcs;
+    bom->base.fd = scrn->driScreen->fd;
+    bom->bos.next = NULL;
+    bom->bos.prev = NULL;
+    bom->pending_bos.pprev = &bom->pending_bos;
+    bom->pending_bos.pnext = NULL;
+    bom->screen = scrn;
+    bom->fb_location = scrn->fbLocation;
+    bom->nhandle = 1;
+    bom->cfree_handles = 0;
+    bom->nfree_handles = 0x400;
+    bom->free_handles = (uint32_t*)malloc(bom->nfree_handles * 4);
+    if (bom->free_handles == NULL) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+
+    /* biggest framebuffer size */
+    size = 4096*4096*4; 
+
+    /* allocate front */
+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset);
+
+    if (!bo) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+    if (scrn->sarea->tiling_enabled) {
+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
+    }
+
+    /* allocate back */
+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset);
+
+    if (!bo) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+    if (scrn->sarea->tiling_enabled) {
+        bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE;
+    }
+
+    /* allocate depth */
+    bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset);
+
+    if (!bo) {
+        radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom);
+        return NULL;
+    }
+    bo->base.flags = 0;
+    if (scrn->sarea->tiling_enabled) {
+        bo->base.flags |= RADEON_BO_FLAGS_MACRO_TILE;
+        bo->base.flags |= RADEON_BO_FLAGS_MICRO_TILE;
+    }
+    return (struct radeon_bo_manager*)bom;
+}
+
+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    DRI_AGE_TEXTURES(boml->texture_heap);
+}
+
+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo)
+{
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    struct bo_legacy *bo_legacy = (struct bo_legacy*)bo;
+
+    if (bo_legacy->static_bo || (boi->domains & RADEON_GEM_DOMAIN_GTT)) {
+        return 0;
+    }
+    return boi->size;
+}
+
+/*
+ * Fake up a bo for things like texture image_override.
+ * bo->offset already includes fb_location
+ */
+struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
+					      int size,
+	                                      uint32_t offset)
+{
+    struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom;
+    struct bo_legacy *bo;
+
+    bo = bo_allocate(boml, size, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+    if (bo == NULL)
+	return NULL;
+    bo->static_bo = 1;
+    bo->offset = offset;
+    bo->base.handle = bo->offset;
+    bo->ptr = boml->screen->driScreen->pFB + (offset - boml->fb_location);
+    if (bo->base.handle > boml->nhandle) {
+        boml->nhandle = bo->base.handle + 1;
+    }
+    radeon_bo_ref((struct radeon_bo *)&(bo->base));
+    return (struct radeon_bo *)&(bo->base);
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
new file mode 100644
index 0000000000..2cf15dfaff
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h
@@ -0,0 +1,50 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_BO_LEGACY_H
+#define RADEON_BO_LEGACY_H
+
+#include "radeon_screen.h"
+
+void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending);
+int radeon_bo_legacy_validate(struct radeon_bo *bo,
+                              uint32_t *soffset,
+                              uint32_t *eoffset);
+struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn);
+void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom);
+void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom);
+unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo);
+struct radeon_bo *radeon_legacy_bo_alloc_fake(struct radeon_bo_manager *bom,
+					      int size,
+	                                      uint32_t offset);
+void legacy_track_pending(struct radeon_bo_manager *bom, int debug);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
new file mode 100644
index 0000000000..6c2648b6bd
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h
@@ -0,0 +1,102 @@
+#ifndef RADEON_CS_WRAPPER_H
+#define RADEON_CS_WRAPPER_H
+
+#ifdef HAVE_LIBDRM_RADEON
+
+#include "radeon_bo.h"
+#include "radeon_bo_gem.h"
+#include "radeon_cs.h"
+#include "radeon_cs_gem.h"
+
+#else
+#include <stdint.h>
+
+#define RADEON_GEM_DOMAIN_CPU 0x1   // Cached CPU domain
+#define RADEON_GEM_DOMAIN_GTT 0x2   // GTT or cache flushed
+#define RADEON_GEM_DOMAIN_VRAM 0x4  // VRAM domain
+
+#define RADEON_TILING_MACRO 0x1
+#define RADEON_TILING_MICRO 0x2
+#define RADEON_TILING_SWAP 0x4
+
+#ifndef RADEON_TILING_SURFACE
+#define RADEON_TILING_SURFACE 0x8 /* this object requires a surface
+				   * when mapped - i.e. front buffer */
+#endif
+
+/* to be used to build locally in mesa with no libdrm bits */
+#include "../radeon/radeon_bo_drm.h"
+#include "../radeon/radeon_cs_drm.h"
+
+#ifndef DRM_RADEON_GEM_INFO
+#define DRM_RADEON_GEM_INFO 0x1c
+
+struct drm_radeon_gem_info {
+        uint64_t gart_size;
+        uint64_t vram_size;
+        uint64_t vram_visible;
+};
+
+struct drm_radeon_info {
+	uint32_t request;
+	uint32_t pad;
+	uint32_t value;
+};
+#endif
+
+#ifndef RADEON_PARAM_DEVICE_ID
+#define RADEON_PARAM_DEVICE_ID 16
+#endif
+
+#ifndef RADEON_PARAM_NUM_Z_PIPES
+#define RADEON_PARAM_NUM_Z_PIPES 17
+#endif
+
+#ifndef RADEON_INFO_DEVICE_ID
+#define RADEON_INFO_DEVICE_ID 0
+#endif
+#ifndef RADEON_INFO_NUM_GB_PIPES
+#define RADEON_INFO_NUM_GB_PIPES 0
+#endif
+
+#ifndef RADEON_INFO_NUM_Z_PIPES
+#define RADEON_INFO_NUM_Z_PIPES 0
+#endif
+
+#ifndef DRM_RADEON_INFO
+#define DRM_RADEON_INFO 0x1
+#endif
+
+
+static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy)
+{
+  return 0;
+}
+
+static inline void *radeon_bo_manager_gem_ctor(int fd)
+{
+  return NULL;
+}
+
+static inline void radeon_bo_manager_gem_dtor(void *dummy)
+{
+}
+
+static inline void *radeon_cs_manager_gem_ctor(int fd)
+{
+  return NULL;
+}
+
+static inline void radeon_cs_manager_gem_dtor(void *dummy)
+{
+}
+
+static inline void radeon_tracker_print(void *ptr, int io)
+{
+}
+#endif
+
+#include "radeon_bo_legacy.h"
+#include "radeon_cs_legacy.h"
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
new file mode 100644
index 0000000000..0897dafbd8
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_buffer_objects.h"
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/bufferobj.h"
+
+#include "radeon_common.h"
+
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj)
+{
+    return (struct radeon_buffer_object *) obj;
+}
+
+static struct gl_buffer_object *
+radeonNewBufferObject(GLcontext * ctx,
+                      GLuint name,
+                      GLenum target)
+{
+    struct radeon_buffer_object *obj = CALLOC_STRUCT(radeon_buffer_object);
+
+    _mesa_initialize_buffer_object(&obj->Base, name, target);
+
+    obj->bo = NULL;
+
+    return &obj->Base;
+}
+
+/**
+ * Called via glDeleteBuffersARB().
+ */
+static void
+radeonDeleteBufferObject(GLcontext * ctx,
+                         struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    if (obj->Pointer) {
+        radeon_bo_unmap(radeon_obj->bo);
+    }
+
+    if (radeon_obj->bo) {
+        radeon_bo_unref(radeon_obj->bo);
+    }
+
+    free(radeon_obj);
+}
+
+
+/**
+ * Allocate space for and store data in a buffer object.  Any data that was
+ * previously stored in the buffer object is lost.  If data is NULL,
+ * memory will be allocated, but no copy will occur.
+ * Called via ctx->Driver.BufferData().
+ * \return GL_TRUE for success, GL_FALSE if out of memory
+ */
+static GLboolean
+radeonBufferData(GLcontext * ctx,
+                 GLenum target,
+                 GLsizeiptrARB size,
+                 const GLvoid * data,
+                 GLenum usage,
+                 struct gl_buffer_object *obj)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    radeon_obj->Base.Size = size;
+    radeon_obj->Base.Usage = usage;
+
+    if (radeon_obj->bo != NULL) {
+        radeon_bo_unref(radeon_obj->bo);
+        radeon_obj->bo = NULL;
+    }
+
+    if (size != 0) {
+        radeon_obj->bo = radeon_bo_open(radeon->radeonScreen->bom,
+                                        0,
+                                        size,
+                                        32,
+                                        RADEON_GEM_DOMAIN_GTT,
+                                        0);
+
+        if (!radeon_obj->bo)
+            return GL_FALSE;
+
+        if (data != NULL) {
+            radeon_bo_map(radeon_obj->bo, GL_TRUE);
+
+            memcpy(radeon_obj->bo->ptr, data, size);
+
+            radeon_bo_unmap(radeon_obj->bo);
+        }
+    }
+    return GL_TRUE;
+}
+
+/**
+ * Replace data in a subrange of buffer object.  If the data range
+ * specified by size + offset extends beyond the end of the buffer or
+ * if data is NULL, no copy is performed.
+ * Called via glBufferSubDataARB().
+ */
+static void
+radeonBufferSubData(GLcontext * ctx,
+                    GLenum target,
+                    GLintptrARB offset,
+                    GLsizeiptrARB size,
+                    const GLvoid * data,
+                    struct gl_buffer_object *obj)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    if (radeon_bo_is_referenced_by_cs(radeon_obj->bo, radeon->cmdbuf.cs)) {
+        radeon_firevertices(radeon);
+    }
+
+    radeon_bo_map(radeon_obj->bo, GL_TRUE);
+
+    memcpy(radeon_obj->bo->ptr + offset, data, size);
+
+    radeon_bo_unmap(radeon_obj->bo);
+}
+
+/**
+ * Called via glGetBufferSubDataARB()
+ */
+static void
+radeonGetBufferSubData(GLcontext * ctx,
+                       GLenum target,
+                       GLintptrARB offset,
+                       GLsizeiptrARB size,
+                       GLvoid * data,
+                       struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    radeon_bo_map(radeon_obj->bo, GL_FALSE);
+
+    memcpy(data, radeon_obj->bo->ptr + offset, size);
+
+    radeon_bo_unmap(radeon_obj->bo);
+}
+
+/**
+ * Called via glMapBufferARB()
+ */
+static void *
+radeonMapBuffer(GLcontext * ctx,
+                GLenum target,
+                GLenum access,
+                struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    if (access == GL_WRITE_ONLY_ARB) {
+        ctx->Driver.Flush(ctx);
+    }
+
+    if (radeon_obj->bo == NULL) {
+        obj->Pointer = NULL;
+        return NULL;
+    }
+
+    radeon_bo_map(radeon_obj->bo, access == GL_WRITE_ONLY_ARB);
+
+    obj->Pointer = radeon_obj->bo->ptr;
+    obj->Length = obj->Size;
+    obj->Offset = 0;
+
+    return obj->Pointer;
+}
+
+
+/**
+ * Called via glUnmapBufferARB()
+ */
+static GLboolean
+radeonUnmapBuffer(GLcontext * ctx,
+                  GLenum target,
+                  struct gl_buffer_object *obj)
+{
+    struct radeon_buffer_object *radeon_obj = get_radeon_buffer_object(obj);
+
+    if (radeon_obj->bo != NULL) {
+        radeon_bo_unmap(radeon_obj->bo);
+    }
+
+    obj->Pointer = NULL;
+    obj->Offset = 0;
+    obj->Length = 0;
+
+    return GL_TRUE;
+}
+
+void
+radeonInitBufferObjectFuncs(struct dd_function_table *functions)
+{
+    functions->NewBufferObject = radeonNewBufferObject;
+    functions->DeleteBuffer = radeonDeleteBufferObject;
+    functions->BufferData = radeonBufferData;
+    functions->BufferSubData = radeonBufferSubData;
+    functions->GetBufferSubData = radeonGetBufferSubData;
+    functions->MapBuffer = radeonMapBuffer;
+    functions->UnmapBuffer = radeonUnmapBuffer;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h
new file mode 100644
index 0000000000..d681960825
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_buffer_objects.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_BUFFER_OBJECTS_H
+#define RADEON_BUFFER_OBJECTS_H
+
+#include "main/mtypes.h"
+
+struct radeon_bo;
+
+/**
+ * Radeon vertex/pixel buffer object, derived from Mesa's gl_buffer_object.
+ */
+struct radeon_buffer_object
+{
+   struct gl_buffer_object Base;
+   struct radeon_bo *bo;
+};
+
+struct radeon_buffer_object *
+get_radeon_buffer_object(struct gl_buffer_object *obj);
+
+/**
+ * Hook the bufferobject implementation into mesa:
+ */
+void radeonInitBufferObjectFuncs(struct dd_function_table *functions);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
new file mode 100644
index 0000000000..b7ee9a134b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -0,0 +1,455 @@
+#ifndef _RADEON_CHIPSET_H
+#define _RADEON_CHIPSET_H
+/* Including xf86PciInfo.h introduces a bunch of errors...
+ */
+
+/* General chip classes:
+ * r100 includes R100, RV100, RV200, RS100, RS200, RS250.
+ * r200 includes R200, RV250, RV280, RS300.
+ * r300 includes R300, RV350, RV370.
+ * (RS* denotes IGP)
+ */
+#define PCI_CHIP_RV380_3150		0x3150
+#define PCI_CHIP_RV380_3152		0x3152
+#define PCI_CHIP_RV380_3154		0x3154
+#define PCI_CHIP_RV380_3155		0x3155
+#define PCI_CHIP_RV380_3E50		0x3E50
+#define PCI_CHIP_RV380_3E54		0x3E54
+#define PCI_CHIP_RS100_4136		0x4136
+#define PCI_CHIP_RS200_4137		0x4137
+#define PCI_CHIP_R300_AD		0x4144
+#define PCI_CHIP_R300_AE		0x4145
+#define PCI_CHIP_R300_AF		0x4146
+#define PCI_CHIP_R300_AG		0x4147
+#define PCI_CHIP_R350_AH                0x4148
+#define PCI_CHIP_R350_AI                0x4149
+#define PCI_CHIP_R350_AJ                0x414A
+#define PCI_CHIP_R350_AK                0x414B
+#define PCI_CHIP_RV350_AP               0x4150
+#define PCI_CHIP_RV350_AQ               0x4151
+#define PCI_CHIP_RV350_AR               0x4152
+#define PCI_CHIP_RV350_AS               0x4153
+#define PCI_CHIP_RV350_AT               0x4154
+#define PCI_CHIP_RV350_AU		0x4155
+#define PCI_CHIP_RV350_AV               0x4156
+#define PCI_CHIP_RS250_4237		0x4237
+#define PCI_CHIP_R200_BB		0x4242
+#define PCI_CHIP_R200_BC		0x4243
+#define PCI_CHIP_RS100_4336		0x4336
+#define PCI_CHIP_RS200_4337		0x4337
+#define PCI_CHIP_RS250_4437		0x4437
+#define PCI_CHIP_RV250_If		0x4966
+#define PCI_CHIP_RV250_Ig		0x4967
+#define PCI_CHIP_R420_JH		0x4A48
+#define PCI_CHIP_R420_JI		0x4A49
+#define PCI_CHIP_R420_JJ		0x4A4A
+#define PCI_CHIP_R420_JK		0x4A4B
+#define PCI_CHIP_R420_JL		0x4A4C
+#define PCI_CHIP_R420_JM		0x4A4D
+#define PCI_CHIP_R420_JN		0x4A4E
+#define PCI_CHIP_R420_JO		0x4A4F
+#define PCI_CHIP_R420_JP		0x4A50
+#define PCI_CHIP_R420_JT		0x4A54
+#define PCI_CHIP_R481_4B49		0x4B49
+#define PCI_CHIP_R481_4B4A		0x4B4A
+#define PCI_CHIP_R481_4B4B		0x4B4B
+#define PCI_CHIP_R481_4B4C		0x4B4C
+#define PCI_CHIP_RADEON_LW		0x4C57
+#define PCI_CHIP_RADEON_LX		0x4C58
+#define PCI_CHIP_RADEON_LY		0x4C59
+#define PCI_CHIP_RADEON_LZ		0x4C5A
+#define PCI_CHIP_RV250_Ld		0x4C64
+#define PCI_CHIP_RV250_Lf		0x4C66
+#define PCI_CHIP_RV250_Lg		0x4C67
+#define PCI_CHIP_R300_ND		0x4E44
+#define PCI_CHIP_R300_NE		0x4E45
+#define PCI_CHIP_R300_NF		0x4E46
+#define PCI_CHIP_R300_NG		0x4E47
+#define PCI_CHIP_R350_NH                0x4E48
+#define PCI_CHIP_R350_NI                0x4E49  
+#define PCI_CHIP_R360_NJ                0x4E4A  
+#define PCI_CHIP_R350_NK                0x4E4B  
+#define PCI_CHIP_RV350_NP               0x4E50
+#define PCI_CHIP_RV350_NQ               0x4E51
+#define PCI_CHIP_RV350_NR               0x4E52
+#define PCI_CHIP_RV350_NS               0x4E53
+#define PCI_CHIP_RV350_NT               0x4E54
+#define PCI_CHIP_RV350_NV               0x4E56
+#define PCI_CHIP_RADEON_QD		0x5144
+#define PCI_CHIP_RADEON_QE		0x5145
+#define PCI_CHIP_RADEON_QF		0x5146
+#define PCI_CHIP_RADEON_QG		0x5147
+#define PCI_CHIP_R200_QH		0x5148
+#define PCI_CHIP_R200_QL		0x514C
+#define PCI_CHIP_R200_QM		0x514D
+#define PCI_CHIP_RV200_QW		0x5157
+#define PCI_CHIP_RV200_QX		0x5158
+#define PCI_CHIP_RADEON_QY		0x5159
+#define PCI_CHIP_RADEON_QZ		0x515A
+#define PCI_CHIP_RN50_515E		0x515E
+#define PCI_CHIP_RV370_5460		0x5460
+#define PCI_CHIP_RV370_5462		0x5462
+#define PCI_CHIP_RV370_5464		0x5464
+#define PCI_CHIP_R423_UH		0x5548
+#define PCI_CHIP_R423_UI		0x5549
+#define PCI_CHIP_R423_UJ		0x554A
+#define PCI_CHIP_R423_UK		0x554B
+#define PCI_CHIP_R430_554C		0x554C
+#define PCI_CHIP_R430_554D		0x554D
+#define PCI_CHIP_R430_554E		0x554E
+#define PCI_CHIP_R430_554F		0x554F
+#define PCI_CHIP_R423_5550		0x5550
+#define PCI_CHIP_R423_UQ		0x5551
+#define PCI_CHIP_R423_UR		0x5552
+#define PCI_CHIP_R423_UT		0x5554
+#define PCI_CHIP_RV410_564A		0x564A
+#define PCI_CHIP_RV410_564B		0x564B
+#define PCI_CHIP_RV410_564F		0x564F
+#define PCI_CHIP_RV410_5652		0x5652
+#define PCI_CHIP_RV410_5653		0x5653
+#define PCI_CHIP_RV410_5657		0x5657
+#define PCI_CHIP_RS300_5834		0x5834
+#define PCI_CHIP_RS300_5835		0x5835
+#define PCI_CHIP_RS480_5954		0x5954
+#define PCI_CHIP_RS480_5955		0x5955
+#define PCI_CHIP_RV280_5960		0x5960
+#define PCI_CHIP_RV280_5961		0x5961
+#define PCI_CHIP_RV280_5962		0x5962
+#define PCI_CHIP_RV280_5964		0x5964
+#define PCI_CHIP_RV280_5965		0x5965
+#define PCI_CHIP_RN50_5969		0x5969
+#define PCI_CHIP_RS482_5974		0x5974
+#define PCI_CHIP_RS482_5975		0x5975
+#define PCI_CHIP_RS400_5A41		0x5A41
+#define PCI_CHIP_RS400_5A42		0x5A42
+#define PCI_CHIP_RC410_5A61		0x5A61
+#define PCI_CHIP_RC410_5A62		0x5A62
+#define PCI_CHIP_RV370_5B60		0x5B60
+#define PCI_CHIP_RV370_5B62		0x5B62
+#define PCI_CHIP_RV370_5B63		0x5B63
+#define PCI_CHIP_RV370_5B64		0x5B64
+#define PCI_CHIP_RV370_5B65		0x5B65
+#define PCI_CHIP_RV280_5C61		0x5C61
+#define PCI_CHIP_RV280_5C63		0x5C63
+#define PCI_CHIP_R430_5D48		0x5D48
+#define PCI_CHIP_R430_5D49		0x5D49
+#define PCI_CHIP_R430_5D4A		0x5D4A
+#define PCI_CHIP_R480_5D4C		0x5D4C
+#define PCI_CHIP_R480_5D4D		0x5D4D
+#define PCI_CHIP_R480_5D4E		0x5D4E
+#define PCI_CHIP_R480_5D4F		0x5D4F
+#define PCI_CHIP_R480_5D50		0x5D50
+#define PCI_CHIP_R480_5D52		0x5D52
+#define PCI_CHIP_R423_5D57		0x5D57
+#define PCI_CHIP_RV410_5E48		0x5E48
+#define PCI_CHIP_RV410_5E4A		0x5E4A
+#define PCI_CHIP_RV410_5E4B		0x5E4B
+#define PCI_CHIP_RV410_5E4C		0x5E4C
+#define PCI_CHIP_RV410_5E4D		0x5E4D
+#define PCI_CHIP_RV410_5E4F		0x5E4F
+
+#define PCI_CHIP_R520_7100              0x7100
+#define PCI_CHIP_R520_7101              0x7101
+#define PCI_CHIP_R520_7102              0x7102
+#define PCI_CHIP_R520_7103              0x7103
+#define PCI_CHIP_R520_7104              0x7104
+#define PCI_CHIP_R520_7105              0x7105
+#define PCI_CHIP_R520_7106              0x7106
+#define PCI_CHIP_R520_7108              0x7108
+#define PCI_CHIP_R520_7109              0x7109
+#define PCI_CHIP_R520_710A              0x710A
+#define PCI_CHIP_R520_710B              0x710B
+#define PCI_CHIP_R520_710C              0x710C
+#define PCI_CHIP_R520_710E              0x710E
+#define PCI_CHIP_R520_710F              0x710F
+#define PCI_CHIP_RV515_7140             0x7140
+#define PCI_CHIP_RV515_7141             0x7141
+#define PCI_CHIP_RV515_7142             0x7142
+#define PCI_CHIP_RV515_7143             0x7143
+#define PCI_CHIP_RV515_7144             0x7144
+#define PCI_CHIP_RV515_7145             0x7145
+#define PCI_CHIP_RV515_7146             0x7146
+#define PCI_CHIP_RV515_7147             0x7147
+#define PCI_CHIP_RV515_7149             0x7149
+#define PCI_CHIP_RV515_714A             0x714A
+#define PCI_CHIP_RV515_714B             0x714B
+#define PCI_CHIP_RV515_714C             0x714C
+#define PCI_CHIP_RV515_714D             0x714D
+#define PCI_CHIP_RV515_714E             0x714E
+#define PCI_CHIP_RV515_714F             0x714F
+#define PCI_CHIP_RV515_7151             0x7151
+#define PCI_CHIP_RV515_7152             0x7152
+#define PCI_CHIP_RV515_7153             0x7153
+#define PCI_CHIP_RV515_715E             0x715E
+#define PCI_CHIP_RV515_715F             0x715F
+#define PCI_CHIP_RV515_7180             0x7180
+#define PCI_CHIP_RV515_7181             0x7181
+#define PCI_CHIP_RV515_7183             0x7183
+#define PCI_CHIP_RV515_7186             0x7186
+#define PCI_CHIP_RV515_7187             0x7187
+#define PCI_CHIP_RV515_7188             0x7188
+#define PCI_CHIP_RV515_718A             0x718A
+#define PCI_CHIP_RV515_718B             0x718B
+#define PCI_CHIP_RV515_718C             0x718C
+#define PCI_CHIP_RV515_718D             0x718D
+#define PCI_CHIP_RV515_718F             0x718F
+#define PCI_CHIP_RV515_7193             0x7193
+#define PCI_CHIP_RV515_7196             0x7196
+#define PCI_CHIP_RV515_719B             0x719B
+#define PCI_CHIP_RV515_719F             0x719F
+#define PCI_CHIP_RV530_71C0             0x71C0
+#define PCI_CHIP_RV530_71C1             0x71C1
+#define PCI_CHIP_RV530_71C2             0x71C2
+#define PCI_CHIP_RV530_71C3             0x71C3
+#define PCI_CHIP_RV530_71C4             0x71C4
+#define PCI_CHIP_RV530_71C5             0x71C5
+#define PCI_CHIP_RV530_71C6             0x71C6
+#define PCI_CHIP_RV530_71C7             0x71C7
+#define PCI_CHIP_RV530_71CD             0x71CD
+#define PCI_CHIP_RV530_71CE             0x71CE
+#define PCI_CHIP_RV530_71D2             0x71D2
+#define PCI_CHIP_RV530_71D4             0x71D4
+#define PCI_CHIP_RV530_71D5             0x71D5
+#define PCI_CHIP_RV530_71D6             0x71D6
+#define PCI_CHIP_RV530_71DA             0x71DA
+#define PCI_CHIP_RV530_71DE             0x71DE
+#define PCI_CHIP_RV515_7200             0x7200
+#define PCI_CHIP_RV515_7210             0x7210
+#define PCI_CHIP_RV515_7211             0x7211
+#define PCI_CHIP_R580_7240              0x7240
+#define PCI_CHIP_R580_7243              0x7243
+#define PCI_CHIP_R580_7244              0x7244
+#define PCI_CHIP_R580_7245              0x7245
+#define PCI_CHIP_R580_7246              0x7246
+#define PCI_CHIP_R580_7247              0x7247
+#define PCI_CHIP_R580_7248              0x7248
+#define PCI_CHIP_R580_7249              0x7249
+#define PCI_CHIP_R580_724A              0x724A
+#define PCI_CHIP_R580_724B              0x724B
+#define PCI_CHIP_R580_724C              0x724C
+#define PCI_CHIP_R580_724D              0x724D
+#define PCI_CHIP_R580_724E              0x724E
+#define PCI_CHIP_R580_724F              0x724F
+#define PCI_CHIP_RV570_7280             0x7280
+#define PCI_CHIP_RV560_7281             0x7281
+#define PCI_CHIP_RV560_7283             0x7283
+#define PCI_CHIP_R580_7284              0x7284
+#define PCI_CHIP_RV560_7287             0x7287
+#define PCI_CHIP_RV570_7288             0x7288
+#define PCI_CHIP_RV570_7289             0x7289
+#define PCI_CHIP_RV570_728B             0x728B
+#define PCI_CHIP_RV570_728C             0x728C
+#define PCI_CHIP_RV560_7290             0x7290
+#define PCI_CHIP_RV560_7291             0x7291
+#define PCI_CHIP_RV560_7293             0x7293
+#define PCI_CHIP_RV560_7297             0x7297
+
+#define PCI_CHIP_RS350_7834		0x7834
+#define PCI_CHIP_RS350_7835		0x7835
+#define PCI_CHIP_RS690_791E             0x791E
+#define PCI_CHIP_RS690_791F             0x791F
+#define PCI_CHIP_RS600_793F             0x793F
+#define PCI_CHIP_RS600_7941             0x7941
+#define PCI_CHIP_RS600_7942             0x7942
+#define PCI_CHIP_RS740_796C             0x796C
+#define PCI_CHIP_RS740_796D             0x796D
+#define PCI_CHIP_RS740_796E             0x796E
+#define PCI_CHIP_RS740_796F             0x796F
+
+#define PCI_CHIP_R600_9400              0x9400
+#define PCI_CHIP_R600_9401              0x9401
+#define PCI_CHIP_R600_9402              0x9402
+#define PCI_CHIP_R600_9403              0x9403
+#define PCI_CHIP_R600_9405              0x9405
+#define PCI_CHIP_R600_940A              0x940A
+#define PCI_CHIP_R600_940B              0x940B
+#define PCI_CHIP_R600_940F              0x940F
+
+#define PCI_CHIP_RV610_94C0             0x94C0
+#define PCI_CHIP_RV610_94C1             0x94C1
+#define PCI_CHIP_RV610_94C3             0x94C3
+#define PCI_CHIP_RV610_94C4             0x94C4
+#define PCI_CHIP_RV610_94C5             0x94C5
+#define PCI_CHIP_RV610_94C6             0x94C6
+#define PCI_CHIP_RV610_94C7             0x94C7
+#define PCI_CHIP_RV610_94C8             0x94C8
+#define PCI_CHIP_RV610_94C9             0x94C9
+#define PCI_CHIP_RV610_94CB             0x94CB
+#define PCI_CHIP_RV610_94CC             0x94CC
+#define PCI_CHIP_RV610_94CD             0x94CD
+
+#define PCI_CHIP_RV630_9580             0x9580
+#define PCI_CHIP_RV630_9581             0x9581
+#define PCI_CHIP_RV630_9583             0x9583
+#define PCI_CHIP_RV630_9586             0x9586
+#define PCI_CHIP_RV630_9587             0x9587
+#define PCI_CHIP_RV630_9588             0x9588
+#define PCI_CHIP_RV630_9589             0x9589
+#define PCI_CHIP_RV630_958A             0x958A
+#define PCI_CHIP_RV630_958B             0x958B
+#define PCI_CHIP_RV630_958C             0x958C
+#define PCI_CHIP_RV630_958D             0x958D
+#define PCI_CHIP_RV630_958E             0x958E
+#define PCI_CHIP_RV630_958F             0x958F
+
+#define PCI_CHIP_RV670_9500             0x9500
+#define PCI_CHIP_RV670_9501             0x9501
+#define PCI_CHIP_RV670_9504             0x9504
+#define PCI_CHIP_RV670_9505             0x9505
+#define PCI_CHIP_RV670_9506             0x9506
+#define PCI_CHIP_RV670_9507             0x9507
+#define PCI_CHIP_RV670_9508             0x9508
+#define PCI_CHIP_RV670_9509             0x9509
+#define PCI_CHIP_RV670_950F             0x950F
+#define PCI_CHIP_RV670_9511             0x9511
+#define PCI_CHIP_RV670_9515             0x9515
+#define PCI_CHIP_RV670_9517             0x9517
+#define PCI_CHIP_RV670_9519             0x9519
+
+#define PCI_CHIP_RV620_95C0             0x95C0
+#define PCI_CHIP_RV620_95C2             0x95C2
+#define PCI_CHIP_RV620_95C4             0x95C4
+#define PCI_CHIP_RV620_95C5             0x95C5
+#define PCI_CHIP_RV620_95C6             0x95C6
+#define PCI_CHIP_RV620_95C7             0x95C7
+#define PCI_CHIP_RV620_95C9             0x95C9
+#define PCI_CHIP_RV620_95CC             0x95CC
+#define PCI_CHIP_RV620_95CD             0x95CD
+#define PCI_CHIP_RV620_95CE             0x95CE
+#define PCI_CHIP_RV620_95CF             0x95CF
+
+#define PCI_CHIP_RV635_9590             0x9590
+#define PCI_CHIP_RV635_9591             0x9591
+#define PCI_CHIP_RV635_9593             0x9593
+#define PCI_CHIP_RV635_9595             0x9595
+#define PCI_CHIP_RV635_9596             0x9596
+#define PCI_CHIP_RV635_9597             0x9597
+#define PCI_CHIP_RV635_9598             0x9598
+#define PCI_CHIP_RV635_9599             0x9599
+#define PCI_CHIP_RV635_959B             0x959B
+
+#define PCI_CHIP_RS780_9610             0x9610
+#define PCI_CHIP_RS780_9611             0x9611
+#define PCI_CHIP_RS780_9612             0x9612
+#define PCI_CHIP_RS780_9613             0x9613
+#define PCI_CHIP_RS780_9614             0x9614
+#define PCI_CHIP_RS780_9615             0x9615
+#define PCI_CHIP_RS780_9616             0x9616
+
+#define PCI_CHIP_RS880_9710             0x9710
+#define PCI_CHIP_RS880_9711             0x9711
+#define PCI_CHIP_RS880_9712             0x9712
+#define PCI_CHIP_RS880_9713             0x9713
+#define PCI_CHIP_RS880_9714             0x9714
+#define PCI_CHIP_RS880_9715             0x9715
+
+#define PCI_CHIP_RV770_9440             0x9440
+#define PCI_CHIP_RV770_9441             0x9441
+#define PCI_CHIP_RV770_9442             0x9442
+#define PCI_CHIP_RV770_9443             0x9443
+#define PCI_CHIP_RV770_9444             0x9444
+#define PCI_CHIP_RV770_9446             0x9446
+#define PCI_CHIP_RV770_944A             0x944A
+#define PCI_CHIP_RV770_944B             0x944B
+#define PCI_CHIP_RV770_944C             0x944C
+#define PCI_CHIP_RV770_944E             0x944E
+#define PCI_CHIP_RV770_9450             0x9450
+#define PCI_CHIP_RV770_9452             0x9452
+#define PCI_CHIP_RV770_9456             0x9456
+#define PCI_CHIP_RV770_945A             0x945A
+#define PCI_CHIP_RV770_945B             0x945B
+#define PCI_CHIP_RV770_945E             0x945E
+#define PCI_CHIP_RV790_9460             0x9460
+#define PCI_CHIP_RV790_9462             0x9462
+#define PCI_CHIP_RV770_946A             0x946A
+#define PCI_CHIP_RV770_946B             0x946B
+#define PCI_CHIP_RV770_947A             0x947A
+#define PCI_CHIP_RV770_947B             0x947B
+
+#define PCI_CHIP_RV730_9480             0x9480
+#define PCI_CHIP_RV730_9487             0x9487
+#define PCI_CHIP_RV730_9488             0x9488
+#define PCI_CHIP_RV730_9489             0x9489
+#define PCI_CHIP_RV730_948A             0x948A
+#define PCI_CHIP_RV730_948F             0x948F
+#define PCI_CHIP_RV730_9490             0x9490
+#define PCI_CHIP_RV730_9491             0x9491
+#define PCI_CHIP_RV730_9495             0x9495
+#define PCI_CHIP_RV730_9498             0x9498
+#define PCI_CHIP_RV730_949C             0x949C
+#define PCI_CHIP_RV730_949E             0x949E
+#define PCI_CHIP_RV730_949F             0x949F
+
+#define PCI_CHIP_RV710_9540             0x9540
+#define PCI_CHIP_RV710_9541             0x9541
+#define PCI_CHIP_RV710_9542             0x9542
+#define PCI_CHIP_RV710_954E             0x954E
+#define PCI_CHIP_RV710_954F             0x954F
+#define PCI_CHIP_RV710_9552             0x9552
+#define PCI_CHIP_RV710_9553             0x9553
+#define PCI_CHIP_RV710_9555             0x9555
+#define PCI_CHIP_RV710_9557             0x9557
+#define PCI_CHIP_RV710_955F             0x955F
+
+#define PCI_CHIP_RV740_94A0             0x94A0
+#define PCI_CHIP_RV740_94A1             0x94A1
+#define PCI_CHIP_RV740_94A3             0x94A3
+#define PCI_CHIP_RV740_94B1             0x94B1
+#define PCI_CHIP_RV740_94B3             0x94B3
+#define PCI_CHIP_RV740_94B4             0x94B4
+#define PCI_CHIP_RV740_94B5             0x94B5
+#define PCI_CHIP_RV740_94B9             0x94B9
+
+enum {
+   CHIP_FAMILY_R100,
+   CHIP_FAMILY_RV100,
+   CHIP_FAMILY_RS100,
+   CHIP_FAMILY_RV200,
+   CHIP_FAMILY_RS200,
+   CHIP_FAMILY_R200,
+   CHIP_FAMILY_RV250,
+   CHIP_FAMILY_RS300,
+   CHIP_FAMILY_RV280,
+   CHIP_FAMILY_R300,
+   CHIP_FAMILY_R350,
+   CHIP_FAMILY_RV350,
+   CHIP_FAMILY_RV380,
+   CHIP_FAMILY_R420,
+   CHIP_FAMILY_RV410,
+   CHIP_FAMILY_RS400,
+   CHIP_FAMILY_RS600,
+   CHIP_FAMILY_RS690,
+   CHIP_FAMILY_RS740,
+   CHIP_FAMILY_RV515,
+   CHIP_FAMILY_R520,
+   CHIP_FAMILY_RV530,
+   CHIP_FAMILY_R580,
+   CHIP_FAMILY_RV560,
+   CHIP_FAMILY_RV570,
+   CHIP_FAMILY_R600,
+   CHIP_FAMILY_RV610,
+   CHIP_FAMILY_RV630,
+   CHIP_FAMILY_RV670,
+   CHIP_FAMILY_RV620,
+   CHIP_FAMILY_RV635,
+   CHIP_FAMILY_RS780,
+   CHIP_FAMILY_RS880,
+   CHIP_FAMILY_RV770,
+   CHIP_FAMILY_RV730,
+   CHIP_FAMILY_RV710,
+   CHIP_FAMILY_RV740,
+   CHIP_FAMILY_LAST
+};
+
+/* General classes of Radeons, as described above the device ID section */
+#define RADEON_CLASS_R100		(0 << 0)
+#define RADEON_CLASS_R200		(1 << 0)
+#define RADEON_CLASS_R300		(2 << 0)
+#define RADEON_CLASS_R600		(3 << 0)
+#define RADEON_CLASS_MASK		(3 << 0)
+
+#define RADEON_CHIPSET_TCL		(1 << 2)	/* tcl support - any radeon */
+#define RADEON_CHIPSET_BROKEN_STENCIL	(1 << 3)	/* r100 stencil bug */
+#define R200_CHIPSET_YCBCR_BROKEN	(1 << 4)	/* r200 ycbcr bug */
+
+#endif /* _RADEON_CHIPSET_H */
diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
new file mode 100644
index 0000000000..6fcd1ce7ca
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h
@@ -0,0 +1,121 @@
+#ifndef COMMON_CMDBUF_H
+#define COMMON_CMDBUF_H
+
+#include "radeon_bocs_wrapper.h"
+
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller);
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller);
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller);
+void rcommonInitCmdBuf(radeonContextPtr rmesa);
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa);
+
+void rcommonBeginBatch(radeonContextPtr rmesa,
+		       int n,
+		       int dostate,
+		       const char *file,
+		       const char *function,
+		       int line);
+
+/* +r6/r7 : code here moved */
+
+#define CP_PACKET2  (2 << 30)
+#define CP_PACKET0(reg, n)	(RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET0_ONE(reg, n)	(RADEON_CP_PACKET0 | RADEON_CP_PACKET0_ONE_REG_WR | ((n)<<16) | ((reg)>>2))
+#define CP_PACKET3(pkt, n)	(RADEON_CP_PACKET3 | (pkt) | ((n) << 16))
+
+/**
+ * Every function writing to the command buffer needs to declare this
+ * to get the necessary local variables.
+ */
+#define BATCH_LOCALS(rmesa) \
+	const radeonContextPtr b_l_rmesa = rmesa
+
+/**
+ * Prepare writing n dwords to the command buffer,
+ * including producing any necessary state emits on buffer wraparound.
+ */
+#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, 1, __FILE__, __FUNCTION__, __LINE__)
+
+/**
+ * Same as BEGIN_BATCH, but do not cause automatic state emits.
+ */
+#define BEGIN_BATCH_NO_AUTOSTATE(n) rcommonBeginBatch(b_l_rmesa, n, 0, __FILE__, __FUNCTION__, __LINE__)
+
+/**
+ * Write one dword to the command buffer.
+ */
+#define OUT_BATCH(data) \
+	do { \
+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\
+	} while(0)
+
+/**
+ * Write a relocated dword to the command buffer.
+ */
+#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) 	\
+	do { 							\
+	int  __offset = (offset);				\
+        if (0 && __offset) {					\
+            fprintf(stderr, "(%s:%s:%d) offset : %d\n",		\
+            __FILE__, __FUNCTION__, __LINE__, __offset);	\
+        }							\
+        radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, __offset);	\
+        radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, 		\
+                              bo, rd, wd, flags);		\
+	if (!b_l_rmesa->radeonScreen->kernel_mm) 		\
+		b_l_rmesa->cmdbuf.cs->section_cdw += 2;		\
+	} while(0)
+
+
+/**
+ * Write n dwords from ptr to the command buffer.
+ */
+#define OUT_BATCH_TABLE(ptr,n) \
+	do { \
+		radeon_cs_write_table(b_l_rmesa->cmdbuf.cs, (ptr), (n));\
+	} while(0)
+
+/**
+ * Finish writing dwords to the command buffer.
+ * The number of (direct or indirect) OUT_BATCH calls between the previous
+ * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time.
+ */
+#define END_BATCH() \
+	do { \
+        radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\
+	} while(0)
+
+/**
+ * After the last END_BATCH() of rendering, this indicates that flushing
+ * the command buffer now is okay.
+ */
+#define COMMIT_BATCH() \
+	do { \
+	} while(0)
+
+
+/** Single register write to command buffer; requires 2 dwords. */
+#define OUT_BATCH_REGVAL(reg, val) \
+	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), 1)); \
+	OUT_BATCH((val))
+
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define OUT_BATCH_REGSEQ(reg, count) \
+	OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), (count)))
+
+/** Write a 32 bit float to the ring; requires 1 dword. */
+#define OUT_BATCH_FLOAT32(f) \
+	OUT_BATCH(radeonPackFloat32((f)))
+
+/* +r6/r7 : code here moved */
+
+/* Fire the buffered vertices no matter what.
+ */
+static INLINE void radeon_firevertices(radeonContextPtr radeon)
+{
+   if (radeon->cmdbuf.cs->cdw || radeon->dma.flush )
+      radeon->glCtx->Driver.Flush(radeon->glCtx); /* +r6/r7 */
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
new file mode 100644
index 0000000000..13f1f0611b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -0,0 +1,1338 @@
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/*
+   - Scissor implementation
+   - buffer swap/copy ioctls
+   - finish/flush
+   - state emission
+   - cmdbuffer management
+*/
+
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "drivers/common/meta.h"
+
+#include "vblank.h"
+
+#include "radeon_common.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_lock.h"
+#include "radeon_drm.h"
+#include "radeon_queryobj.h"
+
+/**
+ * Enable verbose debug output for emit code.
+ * 0 no output
+ * 1 most output
+ * 2 also print state alues
+ */
+#define RADEON_CMDBUF         0
+
+/* =============================================================
+ * Scissoring
+ */
+
+static GLboolean intersect_rect(drm_clip_rect_t * out,
+				drm_clip_rect_t * a, drm_clip_rect_t * b)
+{
+	*out = *a;
+	if (b->x1 > out->x1)
+		out->x1 = b->x1;
+	if (b->y1 > out->y1)
+		out->y1 = b->y1;
+	if (b->x2 < out->x2)
+		out->x2 = b->x2;
+	if (b->y2 < out->y2)
+		out->y2 = b->y2;
+	if (out->x1 >= out->x2)
+		return GL_FALSE;
+	if (out->y1 >= out->y2)
+		return GL_FALSE;
+	return GL_TRUE;
+}
+
+void radeonRecalcScissorRects(radeonContextPtr radeon)
+{
+	drm_clip_rect_t *out;
+	int i;
+
+	/* Grow cliprect store?
+	 */
+	if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) {
+		while (radeon->state.scissor.numAllocedClipRects <
+		       radeon->numClipRects) {
+			radeon->state.scissor.numAllocedClipRects += 1;	/* zero case */
+			radeon->state.scissor.numAllocedClipRects *= 2;
+		}
+
+		if (radeon->state.scissor.pClipRects)
+			FREE(radeon->state.scissor.pClipRects);
+
+		radeon->state.scissor.pClipRects =
+			MALLOC(radeon->state.scissor.numAllocedClipRects *
+			       sizeof(drm_clip_rect_t));
+
+		if (radeon->state.scissor.pClipRects == NULL) {
+			radeon->state.scissor.numAllocedClipRects = 0;
+			return;
+		}
+	}
+
+	out = radeon->state.scissor.pClipRects;
+	radeon->state.scissor.numClipRects = 0;
+
+	for (i = 0; i < radeon->numClipRects; i++) {
+		if (intersect_rect(out,
+				   &radeon->pClipRects[i],
+				   &radeon->state.scissor.rect)) {
+			radeon->state.scissor.numClipRects++;
+			out++;
+		}
+	}
+
+	if (radeon->vtbl.update_scissor)
+	   radeon->vtbl.update_scissor(radeon->glCtx);
+}
+
+void radeon_get_cliprects(radeonContextPtr radeon,
+			  struct drm_clip_rect **cliprects,
+			  unsigned int *num_cliprects,
+			  int *x_off, int *y_off)
+{
+	__DRIdrawable *dPriv = radeon_get_drawable(radeon);
+	struct radeon_framebuffer *rfb = dPriv->driverPrivate;
+
+	if (radeon->constant_cliprect) {
+		radeon->fboRect.x1 = 0;
+		radeon->fboRect.y1 = 0;
+		radeon->fboRect.x2 = radeon->glCtx->DrawBuffer->Width;
+		radeon->fboRect.y2 = radeon->glCtx->DrawBuffer->Height;
+
+		*cliprects = &radeon->fboRect;
+		*num_cliprects = 1;
+		*x_off = 0;
+		*y_off = 0;
+	} else if (radeon->front_cliprects ||
+		   rfb->pf_active || dPriv->numBackClipRects == 0) {
+		*cliprects = dPriv->pClipRects;
+		*num_cliprects = dPriv->numClipRects;
+		*x_off = dPriv->x;
+		*y_off = dPriv->y;
+	} else {
+		*num_cliprects = dPriv->numBackClipRects;
+		*cliprects = dPriv->pBackClipRects;
+		*x_off = dPriv->backX;
+		*y_off = dPriv->backY;
+	}
+}
+
+/**
+ * Update cliprects and scissors.
+ */
+void radeonSetCliprects(radeonContextPtr radeon)
+{
+	__DRIdrawable *const drawable = radeon_get_drawable(radeon);
+	__DRIdrawable *const readable = radeon_get_readable(radeon);
+	struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate;
+	struct radeon_framebuffer *const read_rfb = readable->driverPrivate;
+	int x_off, y_off;
+
+	radeon_get_cliprects(radeon, &radeon->pClipRects,
+			     &radeon->numClipRects, &x_off, &y_off);
+
+	if ((draw_rfb->base.Width != drawable->w) ||
+	    (draw_rfb->base.Height != drawable->h)) {
+		_mesa_resize_framebuffer(radeon->glCtx, &draw_rfb->base,
+					 drawable->w, drawable->h);
+		draw_rfb->base.Initialized = GL_TRUE;
+	}
+
+	if (drawable != readable) {
+		if ((read_rfb->base.Width != readable->w) ||
+		    (read_rfb->base.Height != readable->h)) {
+			_mesa_resize_framebuffer(radeon->glCtx, &read_rfb->base,
+						 readable->w, readable->h);
+			read_rfb->base.Initialized = GL_TRUE;
+		}
+	}
+
+	if (radeon->state.scissor.enabled)
+		radeonRecalcScissorRects(radeon);
+
+}
+
+
+
+void radeonUpdateScissor( GLcontext *ctx )
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	GLint x = ctx->Scissor.X, y = ctx->Scissor.Y;
+	GLsizei w = ctx->Scissor.Width, h = ctx->Scissor.Height;
+	int x1, y1, x2, y2;
+	int min_x, min_y, max_x, max_y;
+
+	if (!ctx->DrawBuffer)
+	    return;
+	min_x = min_y = 0;
+	max_x = ctx->DrawBuffer->Width - 1;
+	max_y = ctx->DrawBuffer->Height - 1;
+
+	if ( !ctx->DrawBuffer->Name ) {
+		x1 = x;
+		y1 = ctx->DrawBuffer->Height - (y + h);
+		x2 = x + w - 1;
+		y2 = y1 + h - 1;
+	} else {
+		x1 = x;
+		y1 = y;
+		x2 = x + w - 1;
+		y2 = y + h - 1;
+
+	}
+	if (!rmesa->radeonScreen->kernel_mm) {
+	   /* Fix scissors for dri 1 */
+	   __DRIdrawable *dPriv = radeon_get_drawable(rmesa);
+	   x1 += dPriv->x;
+	   x2 += dPriv->x + 1;
+	   min_x += dPriv->x;
+	   max_x += dPriv->x + 1;
+	   y1 += dPriv->y;
+	   y2 += dPriv->y + 1;
+	   min_y += dPriv->y;
+	   max_y += dPriv->y + 1;
+	}
+
+	rmesa->state.scissor.rect.x1 = CLAMP(x1,  min_x, max_x);
+	rmesa->state.scissor.rect.y1 = CLAMP(y1,  min_y, max_y);
+	rmesa->state.scissor.rect.x2 = CLAMP(x2,  min_x, max_x);
+	rmesa->state.scissor.rect.y2 = CLAMP(y2,  min_y, max_y);
+
+	radeonRecalcScissorRects( rmesa );
+}
+
+/* =============================================================
+ * Scissoring
+ */
+
+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	if (ctx->Scissor.Enabled) {
+		/* We don't pipeline cliprect changes */
+		radeon_firevertices(radeon);
+		radeonUpdateScissor(ctx);
+	}
+}
+
+/* ================================================================
+ * SwapBuffers with client-side throttling
+ */
+
+static uint32_t radeonGetLastFrame(radeonContextPtr radeon)
+{
+	drm_radeon_getparam_t gp;
+	int ret;
+	uint32_t frame = 0;
+
+	gp.param = RADEON_PARAM_LAST_FRAME;
+	gp.value = (int *)&frame;
+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+				  &gp, sizeof(gp));
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+
+	return frame;
+}
+
+uint32_t radeonGetAge(radeonContextPtr radeon)
+{
+	drm_radeon_getparam_t gp;
+	int ret;
+	uint32_t age;
+
+	gp.param = RADEON_PARAM_LAST_CLEAR;
+	gp.value = (int *)&age;
+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM,
+				  &gp, sizeof(gp));
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+
+	return age;
+}
+
+static void radeonEmitIrqLocked(radeonContextPtr radeon)
+{
+	drm_radeon_irq_emit_t ie;
+	int ret;
+
+	ie.irq_seq = &radeon->iw.irq_seq;
+	ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT,
+				  &ie, sizeof(ie));
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+}
+
+static void radeonWaitIrq(radeonContextPtr radeon)
+{
+	int ret;
+
+	do {
+		ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT,
+				      &radeon->iw, sizeof(radeon->iw));
+	} while (ret && (errno == EINTR || errno == EBUSY));
+
+	if (ret) {
+		fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__,
+			ret);
+		exit(1);
+	}
+}
+
+static void radeonWaitForFrameCompletion(radeonContextPtr radeon)
+{
+	drm_radeon_sarea_t *sarea = radeon->sarea;
+
+	if (radeon->do_irqs) {
+		if (radeonGetLastFrame(radeon) < sarea->last_frame) {
+			if (!radeon->irqsEmitted) {
+				while (radeonGetLastFrame(radeon) <
+				       sarea->last_frame) ;
+			} else {
+				UNLOCK_HARDWARE(radeon);
+				radeonWaitIrq(radeon);
+				LOCK_HARDWARE(radeon);
+			}
+			radeon->irqsEmitted = 10;
+		}
+
+		if (radeon->irqsEmitted) {
+			radeonEmitIrqLocked(radeon);
+			radeon->irqsEmitted--;
+		}
+	} else {
+		while (radeonGetLastFrame(radeon) < sarea->last_frame) {
+			UNLOCK_HARDWARE(radeon);
+			if (radeon->do_usleeps)
+				DO_USLEEP(1);
+			LOCK_HARDWARE(radeon);
+		}
+	}
+}
+
+/* wait for idle */
+void radeonWaitForIdleLocked(radeonContextPtr radeon)
+{
+	int ret;
+	int i = 0;
+
+	do {
+		ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE);
+		if (ret)
+			DO_USLEEP(1);
+	} while (ret && ++i < 100);
+
+	if (ret < 0) {
+		UNLOCK_HARDWARE(radeon);
+		fprintf(stderr, "Error: R300 timed out... exiting\n");
+		exit(-1);
+	}
+}
+
+static void radeonWaitForIdle(radeonContextPtr radeon)
+{
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+        LOCK_HARDWARE(radeon);
+	    radeonWaitForIdleLocked(radeon);
+	    UNLOCK_HARDWARE(radeon);
+    }
+}
+
+static void radeon_flip_renderbuffers(struct radeon_framebuffer *rfb)
+{
+	int current_page = rfb->pf_current_page;
+	int next_page = (current_page + 1) % rfb->pf_num_pages;
+	struct gl_renderbuffer *tmp_rb;
+
+	/* Exchange renderbuffers if necessary but make sure their
+	 * reference counts are preserved.
+	 */
+	if (rfb->color_rb[current_page] &&
+	    rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer !=
+	    &rfb->color_rb[current_page]->base) {
+		tmp_rb = NULL;
+		_mesa_reference_renderbuffer(&tmp_rb,
+					     rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+		tmp_rb = &rfb->color_rb[current_page]->base;
+		_mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb);
+		_mesa_reference_renderbuffer(&tmp_rb, NULL);
+	}
+
+	if (rfb->color_rb[next_page] &&
+	    rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer !=
+	    &rfb->color_rb[next_page]->base) {
+		tmp_rb = NULL;
+		_mesa_reference_renderbuffer(&tmp_rb,
+					     rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+		tmp_rb = &rfb->color_rb[next_page]->base;
+		_mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb);
+		_mesa_reference_renderbuffer(&tmp_rb, NULL);
+	}
+}
+
+/* Copy the back color buffer to the front color buffer.
+ */
+void radeonCopyBuffer( __DRIdrawable *dPriv,
+		       const drm_clip_rect_t	  *rect)
+{
+	radeonContextPtr rmesa;
+	struct radeon_framebuffer *rfb;
+	GLint nbox, i, ret;
+
+	assert(dPriv);
+	assert(dPriv->driContextPriv);
+	assert(dPriv->driContextPriv->driverPrivate);
+
+	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+
+	LOCK_HARDWARE(rmesa);
+
+	rfb = dPriv->driverPrivate;
+
+	if ( RADEON_DEBUG & RADEON_IOCTL ) {
+		fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx );
+	}
+
+	nbox = dPriv->numClipRects; /* must be in locked region */
+
+	for ( i = 0 ; i < nbox ; ) {
+		GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox );
+		drm_clip_rect_t *box = dPriv->pClipRects;
+		drm_clip_rect_t *b = rmesa->sarea->boxes;
+		GLint n = 0;
+
+		for ( ; i < nr ; i++ ) {
+
+			*b = box[i];
+
+			if (rect)
+			{
+				if (rect->x1 > b->x1)
+					b->x1 = rect->x1;
+				if (rect->y1 > b->y1)
+					b->y1 = rect->y1;
+				if (rect->x2 < b->x2)
+					b->x2 = rect->x2;
+				if (rect->y2 < b->y2)
+					b->y2 = rect->y2;
+
+				if (b->x1 >= b->x2 || b->y1 >= b->y2)
+					continue;
+			}
+
+			b++;
+			n++;
+		}
+		rmesa->sarea->nbox = n;
+
+		if (!n)
+			continue;
+
+		ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP );
+
+		if ( ret ) {
+			fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret );
+			UNLOCK_HARDWARE( rmesa );
+			exit( 1 );
+		}
+	}
+
+	UNLOCK_HARDWARE( rmesa );
+}
+
+static int radeonScheduleSwap(__DRIdrawable *dPriv, GLboolean *missed_target)
+{
+	radeonContextPtr rmesa;
+
+	rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+	radeon_firevertices(rmesa);
+
+	LOCK_HARDWARE( rmesa );
+
+	if (!dPriv->numClipRects) {
+		UNLOCK_HARDWARE(rmesa);
+		usleep(10000);	/* throttle invisible client 10ms */
+		return 0;
+	}
+
+	radeonWaitForFrameCompletion(rmesa);
+
+	UNLOCK_HARDWARE(rmesa);
+	driWaitForVBlank(dPriv, missed_target);
+
+	return 0;
+}
+
+static GLboolean radeonPageFlip( __DRIdrawable *dPriv )
+{
+	radeonContextPtr radeon;
+	GLint ret;
+	__DRIscreen *psp;
+	struct radeon_renderbuffer *rrb;
+	struct radeon_framebuffer *rfb;
+
+	assert(dPriv);
+	assert(dPriv->driContextPriv);
+	assert(dPriv->driContextPriv->driverPrivate);
+
+	radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+	rfb = dPriv->driverPrivate;
+	rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+
+	psp = dPriv->driScreenPriv;
+
+	LOCK_HARDWARE(radeon);
+
+	if ( RADEON_DEBUG & RADEON_IOCTL ) {
+		fprintf(stderr, "%s: pfCurrentPage: %d %d\n", __FUNCTION__,
+			radeon->sarea->pfCurrentPage, radeon->sarea->pfState);
+	}
+	drm_clip_rect_t *box = dPriv->pClipRects;
+	drm_clip_rect_t *b = radeon->sarea->boxes;
+	b[0] = box[0];
+	radeon->sarea->nbox = 1;
+
+	ret = drmCommandNone( radeon->dri.fd, DRM_RADEON_FLIP );
+
+	UNLOCK_HARDWARE(radeon);
+
+	if ( ret ) {
+		fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret );
+		return GL_FALSE;
+	}
+
+	if (!rfb->pf_active)
+		return GL_FALSE;
+
+	rfb->pf_current_page = radeon->sarea->pfCurrentPage;
+	radeon_flip_renderbuffers(rfb);
+	radeon_draw_buffer(radeon->glCtx, &rfb->base);
+
+	return GL_TRUE;
+}
+
+
+/**
+ * Swap front and back buffer.
+ */
+void radeonSwapBuffers(__DRIdrawable * dPriv)
+{
+	int64_t ust;
+	__DRIscreen *psp;
+
+	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+		radeonContextPtr radeon;
+		GLcontext *ctx;
+
+		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+		ctx = radeon->glCtx;
+
+		if (ctx->Visual.doubleBufferMode) {
+			GLboolean missed_target;
+			struct radeon_framebuffer *rfb = dPriv->driverPrivate;
+			_mesa_notifySwapBuffers(ctx);/* flush pending rendering comands */
+
+			radeonScheduleSwap(dPriv, &missed_target);
+
+			if (rfb->pf_active) {
+				radeonPageFlip(dPriv);
+			} else {
+				radeonCopyBuffer(dPriv, NULL);
+			}
+
+			psp = dPriv->driScreenPriv;
+
+			rfb->swap_count++;
+			(*psp->systemTime->getUST)( & ust );
+			if ( missed_target ) {
+				rfb->swap_missed_count++;
+				rfb->swap_missed_ust = ust - rfb->swap_ust;
+			}
+
+			rfb->swap_ust = ust;
+			radeon->hw.all_dirty = GL_TRUE;
+		}
+	} else {
+		/* XXX this shouldn't be an error but we can't handle it for now */
+		_mesa_problem(NULL, "%s: drawable has no context!",
+			      __FUNCTION__);
+	}
+}
+
+void radeonCopySubBuffer(__DRIdrawable * dPriv,
+			 int x, int y, int w, int h )
+{
+	if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+		radeonContextPtr radeon;
+		GLcontext *ctx;
+
+		radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate;
+		ctx = radeon->glCtx;
+
+		if (ctx->Visual.doubleBufferMode) {
+			drm_clip_rect_t rect;
+			rect.x1 = x + dPriv->x;
+			rect.y1 = (dPriv->h - y - h) + dPriv->y;
+			rect.x2 = rect.x1 + w;
+			rect.y2 = rect.y1 + h;
+			_mesa_notifySwapBuffers(ctx);	/* flush pending rendering comands */
+			radeonCopyBuffer(dPriv, &rect);
+		}
+	} else {
+		/* XXX this shouldn't be an error but we can't handle it for now */
+		_mesa_problem(NULL, "%s: drawable has no context!",
+			      __FUNCTION__);
+	}
+}
+
+/**
+ * Check if we're about to draw into the front color buffer.
+ * If so, set the intel->front_buffer_dirty field to true.
+ */
+void
+radeon_check_front_buffer_rendering(GLcontext *ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	const struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+	if (fb->Name == 0) {
+		/* drawing to window system buffer */
+		if (fb->_NumColorDrawBuffers > 0) {
+			if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+				radeon->front_buffer_dirty = GL_TRUE;
+			}
+		}
+	}
+}
+
+
+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_renderbuffer *rrbDepth = NULL, *rrbStencil = NULL,
+		*rrbColor = NULL;
+	uint32_t offset = 0;
+
+
+	if (!fb) {
+		/* this can happen during the initial context initialization */
+		return;
+	}
+
+	/* radeons only handle 1 color draw so far */
+	if (fb->_NumColorDrawBuffers != 1) {
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+		return;
+	}
+
+	/* Do this here, note core Mesa, since this function is called from
+	 * many places within the driver.
+	 */
+	if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) {
+		/* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */
+		_mesa_update_framebuffer(ctx);
+		/* this updates the DrawBuffer's Width/Height if it's a FBO */
+		_mesa_update_draw_buffer_bounds(ctx);
+	}
+
+	if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
+		/* this may occur when we're called by glBindFrameBuffer() during
+		 * the process of someone setting up renderbuffers, etc.
+		 */
+		/*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/
+		return;
+	}
+
+	if (fb->Name)
+		;/* do something depthy/stencily TODO */
+
+
+		/* none */
+	if (fb->Name == 0) {
+		if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
+			rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+			radeon->front_cliprects = GL_TRUE;
+			radeon->front_buffer_dirty = GL_TRUE;
+		} else {
+			rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+			radeon->front_cliprects = GL_FALSE;
+		}
+	} else {
+		/* user FBO in theory */
+		struct radeon_renderbuffer *rrb;
+		rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[0]);
+		if (rrb) {
+			offset = rrb->draw_offset;
+			rrbColor = rrb;
+		}
+		radeon->constant_cliprect = GL_TRUE;
+	}
+
+	if (rrbColor == NULL)
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE);
+	else
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE);
+
+
+	if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) {
+		rrbDepth = radeon_renderbuffer(fb->_DepthBuffer->Wrapped);
+		if (rrbDepth && rrbDepth->bo) {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+		} else {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_TRUE);
+		}
+	} else {
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE);
+		rrbDepth = NULL;
+	}
+
+	if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) {
+		rrbStencil = radeon_renderbuffer(fb->_StencilBuffer->Wrapped);
+		if (rrbStencil && rrbStencil->bo) {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+			/* need to re-compute stencil hw state */
+			if (!rrbDepth)
+				rrbDepth = rrbStencil;
+		} else {
+			radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_TRUE);
+		}
+	} else {
+		radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE);
+		if (ctx->Driver.Enable != NULL)
+			ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled);
+		else
+			ctx->NewState |= _NEW_STENCIL;
+	}
+
+	/* Update culling direction which changes depending on the
+	 * orientation of the buffer:
+	 */
+	if (ctx->Driver.FrontFace)
+		ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+	else
+		ctx->NewState |= _NEW_POLYGON;
+
+	/*
+	 * Update depth test state
+	 */
+	if (ctx->Driver.Enable) {
+		ctx->Driver.Enable(ctx, GL_DEPTH_TEST,
+				   (ctx->Depth.Test && fb->Visual.depthBits > 0));
+		/* Need to update the derived ctx->Stencil._Enabled first */
+		ctx->Driver.Enable(ctx, GL_STENCIL_TEST,
+				   (ctx->Stencil.Enabled && fb->Visual.stencilBits > 0));
+	} else {
+		ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL);
+	}
+
+	_mesa_reference_renderbuffer(&radeon->state.depth.rb, &rrbDepth->base);
+	_mesa_reference_renderbuffer(&radeon->state.color.rb, &rrbColor->base);
+	radeon->state.color.draw_offset = offset;
+
+#if 0
+	/* update viewport since it depends on window size */
+	if (ctx->Driver.Viewport) {
+		ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y,
+				     ctx->Viewport.Width, ctx->Viewport.Height);
+	} else {
+
+	}
+#endif
+	ctx->NewState |= _NEW_VIEWPORT;
+
+	/* Set state we know depends on drawable parameters:
+	 */
+	radeonUpdateScissor(ctx);
+	radeon->NewGLState |= _NEW_SCISSOR;
+
+	if (ctx->Driver.DepthRange)
+		ctx->Driver.DepthRange(ctx,
+				       ctx->Viewport.Near,
+				       ctx->Viewport.Far);
+
+	/* Update culling direction which changes depending on the
+	 * orientation of the buffer:
+	 */
+	if (ctx->Driver.FrontFace)
+		ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace);
+	else
+		ctx->NewState |= _NEW_POLYGON;
+}
+
+/**
+ * Called via glDrawBuffer.
+ */
+void radeonDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+	if (RADEON_DEBUG & RADEON_DRI)
+		fprintf(stderr, "%s %s\n", __FUNCTION__,
+			_mesa_lookup_enum_by_nr( mode ));
+
+	if (ctx->DrawBuffer->Name == 0) {
+		radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+		const GLboolean was_front_buffer_rendering =
+			radeon->is_front_buffer_rendering;
+
+		radeon->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) ||
+                                            (mode == GL_FRONT);
+
+      /* If we weren't front-buffer rendering before but we are now, make sure
+       * that the front-buffer has actually been allocated.
+       */
+		if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) {
+			radeon_update_renderbuffers(radeon->dri.context,
+				radeon->dri.context->driDrawablePriv, GL_FALSE);
+      }
+	}
+
+	radeon_draw_buffer(ctx, ctx->DrawBuffer);
+}
+
+void radeonReadBuffer( GLcontext *ctx, GLenum mode )
+{
+	if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) {
+		struct radeon_context *const rmesa = RADEON_CONTEXT(ctx);
+		const GLboolean was_front_buffer_reading = rmesa->is_front_buffer_reading;
+		rmesa->is_front_buffer_reading = (mode == GL_FRONT_LEFT)
+					|| (mode == GL_FRONT);
+
+		if (!was_front_buffer_reading && rmesa->is_front_buffer_reading) {
+			radeon_update_renderbuffers(rmesa->dri.context,
+						    rmesa->dri.context->driReadablePriv, GL_FALSE);
+	 	}
+	}
+	/* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+	if (ctx->ReadBuffer == ctx->DrawBuffer) {
+		/* This will update FBO completeness status.
+		 * A framebuffer will be incomplete if the GL_READ_BUFFER setting
+		 * refers to a missing renderbuffer.  Calling glReadBuffer can set
+		 * that straight and can make the drawing buffer complete.
+		 */
+		radeon_draw_buffer(ctx, ctx->DrawBuffer);
+	}
+}
+
+
+/* Turn on/off page flipping according to the flags in the sarea:
+ */
+void radeonUpdatePageFlipping(radeonContextPtr radeon)
+{
+	struct radeon_framebuffer *rfb = radeon_get_drawable(radeon)->driverPrivate;
+
+	rfb->pf_active = radeon->sarea->pfState;
+	rfb->pf_current_page = radeon->sarea->pfCurrentPage;
+	rfb->pf_num_pages = 2;
+	radeon_flip_renderbuffers(rfb);
+	radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer);
+}
+
+void radeon_window_moved(radeonContextPtr radeon)
+{
+	/* Cliprects has to be updated before doing anything else */
+	radeonSetCliprects(radeon);
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+		radeonUpdatePageFlipping(radeon);
+	}
+}
+
+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	__DRIcontext *driContext = radeon->dri.context;
+	void (*old_viewport)(GLcontext *ctx, GLint x, GLint y,
+			     GLsizei w, GLsizei h);
+
+	if (!driContext->driScreenPriv->dri2.enabled)
+		return;
+
+	if (!radeon->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) {
+		if (radeon->is_front_buffer_rendering) {
+			ctx->Driver.Flush(ctx);
+		}
+		radeon_update_renderbuffers(driContext, driContext->driDrawablePriv, GL_FALSE);
+		if (driContext->driDrawablePriv != driContext->driReadablePriv)
+			radeon_update_renderbuffers(driContext, driContext->driReadablePriv, GL_FALSE);
+	}
+
+	old_viewport = ctx->Driver.Viewport;
+	ctx->Driver.Viewport = NULL;
+	radeon_window_moved(radeon);
+	radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer);
+	ctx->Driver.Viewport = old_viewport;
+}
+
+static void radeon_print_state_atom_prekmm(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+	int i, j, reg;
+	int dwords = (*state->check) (radeon->glCtx, state);
+	drm_r300_cmd_header_t cmd;
+
+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+	if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) {
+		if (dwords > state->cmd_size)
+			dwords = state->cmd_size;
+
+		for (i = 0; i < dwords;) {
+			cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]);
+			reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo;
+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+					state->name, i, reg, cmd.packet0.count);
+			++i;
+			for (j = 0; j < cmd.packet0.count && i < dwords; j++) {
+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+						state->name, i, reg, state->cmd[i]);
+				reg += 4;
+				++i;
+			}
+		}
+	}
+}
+
+static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state)
+{
+	int i, j, reg, count;
+	int dwords;
+	uint32_t packet0;
+	if (!radeon_is_debug_enabled(RADEON_STATE, RADEON_VERBOSE) )
+		return;
+
+	if (!radeon->radeonScreen->kernel_mm) {
+		radeon_print_state_atom_prekmm(radeon, state);
+		return;
+	}
+
+	dwords = (*state->check) (radeon->glCtx, state);
+
+	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords, state->cmd_size);
+
+	if (radeon_is_debug_enabled(RADEON_STATE, RADEON_TRACE)) {
+		if (dwords > state->cmd_size)
+			dwords = state->cmd_size;
+		for (i = 0; i < dwords;) {
+			packet0 = state->cmd[i];
+			reg = (packet0 & 0x1FFF) << 2;
+			count = ((packet0 & 0x3FFF0000) >> 16) + 1;
+			fprintf(stderr, "      %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n",
+					state->name, i, reg, count);
+			++i;
+			for (j = 0; j < count && i < dwords; j++) {
+				fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+						state->name, i, reg, state->cmd[i]);
+				reg += 4;
+				++i;
+			}
+		}
+	}
+}
+
+/**
+ * Count total size for next state emit.
+ **/
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon)
+{
+	struct radeon_state_atom *atom;
+	GLuint dwords = 0;
+	/* check if we are going to emit full state */
+
+	if (radeon->cmdbuf.cs->cdw && !radeon->hw.all_dirty) {
+		if (!radeon->hw.is_dirty)
+			goto out;
+		foreach(atom, &radeon->hw.atomlist) {
+			if (atom->dirty) {
+				const GLuint atom_size = atom->check(radeon->glCtx, atom);
+				dwords += atom_size;
+				if (RADEON_CMDBUF && atom_size) {
+					radeon_print_state_atom(radeon, atom);
+				}
+			}
+		}
+	} else {
+		foreach(atom, &radeon->hw.atomlist) {
+			const GLuint atom_size = atom->check(radeon->glCtx, atom);
+			dwords += atom_size;
+			if (RADEON_CMDBUF && atom_size) {
+				radeon_print_state_atom(radeon, atom);
+			}
+
+		}
+	}
+out:
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %u\n", __func__, dwords);
+	return dwords;
+}
+
+static INLINE void radeon_emit_atom(radeonContextPtr radeon, struct radeon_state_atom *atom)
+{
+	BATCH_LOCALS(radeon);
+	int dwords;
+
+	dwords = (*atom->check) (radeon->glCtx, atom);
+	if (dwords) {
+
+		radeon_print_state_atom(radeon, atom);
+
+		if (atom->emit) {
+			(*atom->emit)(radeon->glCtx, atom);
+		} else {
+			BEGIN_BATCH_NO_AUTOSTATE(dwords);
+			OUT_BATCH_TABLE(atom->cmd, dwords);
+			END_BATCH();
+		}
+		atom->dirty = GL_FALSE;
+
+	} else {
+		radeon_print(RADEON_STATE, RADEON_VERBOSE, "  skip state %s\n", atom->name);
+	}
+
+}
+
+static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean emitAll)
+{
+	struct radeon_state_atom *atom;
+
+	if (radeon->vtbl.pre_emit_atoms)
+		radeon->vtbl.pre_emit_atoms(radeon);
+
+	/* Emit actual atoms */
+	if (radeon->hw.all_dirty || emitAll) {
+		foreach(atom, &radeon->hw.atomlist)
+			radeon_emit_atom( radeon, atom );
+	} else {
+		foreach(atom, &radeon->hw.atomlist) {
+			if ( atom->dirty )
+				radeon_emit_atom( radeon, atom );
+		}
+	}
+
+	COMMIT_BATCH();
+}
+
+static GLboolean radeon_revalidate_bos(GLcontext *ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	int ret;
+
+	ret = radeon_cs_space_check(radeon->cmdbuf.cs);
+	if (ret == RADEON_CS_SPACE_FLUSH)
+		return GL_FALSE;
+	return GL_TRUE;
+}
+
+void radeonEmitState(radeonContextPtr radeon)
+{
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s\n", __FUNCTION__);
+
+	if (radeon->vtbl.pre_emit_state)
+		radeon->vtbl.pre_emit_state(radeon);
+
+	/* this code used to return here but now it emits zbs */
+	if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty)
+		return;
+
+	if (!radeon->cmdbuf.cs->cdw) {
+		if (RADEON_DEBUG & RADEON_STATE)
+			fprintf(stderr, "Begin reemit state\n");
+
+		radeonEmitAtoms(radeon, GL_TRUE);
+	} else {
+
+		if (RADEON_DEBUG & RADEON_STATE)
+			fprintf(stderr, "Begin dirty state\n");
+
+		radeonEmitAtoms(radeon, GL_FALSE);
+	}
+
+	radeon->hw.is_dirty = GL_FALSE;
+	radeon->hw.all_dirty = GL_FALSE;
+}
+
+
+void radeonFlush(GLcontext *ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw);
+
+	/* okay if we have no cmds in the buffer &&
+	   we have no DMA flush &&
+	   we have no DMA buffer allocated.
+	   then no point flushing anything at all.
+	*/
+	if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && is_empty_list(&radeon->dma.reserved))
+		goto flush_front;
+
+	if (radeon->dma.flush)
+		radeon->dma.flush( ctx );
+
+	if (radeon->cmdbuf.cs->cdw)
+		rcommonFlushCmdBuf(radeon, __FUNCTION__);
+
+flush_front:
+	if ((ctx->DrawBuffer->Name == 0) && radeon->front_buffer_dirty) {
+		__DRIscreen *const screen = radeon->radeonScreen->driScreen;
+
+		if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
+			&& (screen->dri2.loader->flushFrontBuffer != NULL)) {
+			__DRIdrawable * drawable = radeon_get_drawable(radeon);
+			(*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
+
+			/* Only clear the dirty bit if front-buffer rendering is no longer
+			 * enabled.  This is done so that the dirty bit can only be set in
+			 * glDrawBuffer.  Otherwise the dirty bit would have to be set at
+			 * each of N places that do rendering.  This has worse performances,
+			 * but it is much easier to get correct.
+			 */
+			if (!radeon->is_front_buffer_rendering) {
+				radeon->front_buffer_dirty = GL_FALSE;
+			}
+		}
+	}
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+void radeonFinish(GLcontext * ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	int i;
+
+	if (ctx->Driver.Flush)
+		ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+	if (radeon->radeonScreen->kernel_mm) {
+		for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
+			struct radeon_renderbuffer *rrb;
+			rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[i]);
+			if (rrb && rrb->bo)
+				radeon_bo_wait(rrb->bo);
+		}
+		{
+			struct radeon_renderbuffer *rrb;
+			rrb = radeon_get_depthbuffer(radeon);
+			if (rrb && rrb->bo)
+				radeon_bo_wait(rrb->bo);
+		}
+	} else if (radeon->do_irqs) {
+		LOCK_HARDWARE(radeon);
+		radeonEmitIrqLocked(radeon);
+		UNLOCK_HARDWARE(radeon);
+		radeonWaitIrq(radeon);
+	} else {
+		radeonWaitForIdle(radeon);
+	}
+}
+
+/* cmdbuffer */
+/**
+ * Send the current command buffer via ioctl to the hardware.
+ */
+int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller)
+{
+	int ret = 0;
+
+	if (rmesa->cmdbuf.flushing) {
+		fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n");
+		exit(-1);
+	}
+	rmesa->cmdbuf.flushing = 1;
+
+	if (RADEON_DEBUG & RADEON_IOCTL) {
+		fprintf(stderr, "%s from %s - %i cliprects\n",
+			__FUNCTION__, caller, rmesa->numClipRects);
+	}
+
+	radeonEmitQueryEnd(rmesa->glCtx);
+
+	if (rmesa->cmdbuf.cs->cdw) {
+		ret = radeon_cs_emit(rmesa->cmdbuf.cs);
+		rmesa->hw.all_dirty = GL_TRUE;
+	}
+	radeon_cs_erase(rmesa->cmdbuf.cs);
+	rmesa->cmdbuf.flushing = 0;
+
+	if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE) {
+		fprintf(stderr,"failed to revalidate buffers\n");
+	}
+
+	return ret;
+}
+
+int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller)
+{
+	int ret;
+
+	radeonReleaseDmaRegions(rmesa);
+
+	LOCK_HARDWARE(rmesa);
+	ret = rcommonFlushCmdBufLocked(rmesa, caller);
+	UNLOCK_HARDWARE(rmesa);
+
+	if (ret) {
+		fprintf(stderr, "drmRadeonCmdBuffer: %d. Kernel failed to "
+				"parse or rejected command stream. See dmesg "
+				"for more info.\n", ret);
+		exit(ret);
+	}
+
+	return ret;
+}
+
+/**
+ * Make sure that enough space is available in the command buffer
+ * by flushing if necessary.
+ *
+ * \param dwords The number of dwords we need to be free on the command buffer
+ */
+GLboolean rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller)
+{
+   if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size
+	 || radeon_cs_need_flush(rmesa->cmdbuf.cs)) {
+      /* If we try to flush empty buffer there is too big rendering operation. */
+      assert(rmesa->cmdbuf.cs->cdw);
+      rcommonFlushCmdBuf(rmesa, caller);
+      return GL_TRUE;
+   }
+   return GL_FALSE;
+}
+
+void rcommonInitCmdBuf(radeonContextPtr rmesa)
+{
+	GLuint size;
+	/* Initialize command buffer */
+	size = 256 * driQueryOptioni(&rmesa->optionCache,
+				     "command_buffer_size");
+	if (size < 2 * rmesa->hw.max_state_size) {
+		size = 2 * rmesa->hw.max_state_size + 65535;
+	}
+	if (size > 64 * 256)
+		size = 64 * 256;
+
+	radeon_print(RADEON_CS, RADEON_VERBOSE,
+			"sizeof(drm_r300_cmd_header_t)=%zd\n", sizeof(drm_r300_cmd_header_t));
+	radeon_print(RADEON_CS, RADEON_VERBOSE,
+			"sizeof(drm_radeon_cmd_buffer_t)=%zd\n", sizeof(drm_radeon_cmd_buffer_t));
+	radeon_print(RADEON_CS, RADEON_VERBOSE,
+			"Allocating %d bytes command buffer (max state is %d bytes)\n",
+			size * 4, rmesa->hw.max_state_size * 4);
+
+	if (rmesa->radeonScreen->kernel_mm) {
+		int fd = rmesa->radeonScreen->driScreen->fd;
+		rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
+	} else {
+		rmesa->cmdbuf.csm = radeon_cs_manager_legacy_ctor(rmesa);
+	}
+	if (rmesa->cmdbuf.csm == NULL) {
+		/* FIXME: fatal error */
+		return;
+	}
+	rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
+	assert(rmesa->cmdbuf.cs != NULL);
+	rmesa->cmdbuf.size = size;
+
+	radeon_cs_space_set_flush(rmesa->cmdbuf.cs,
+				  (void (*)(void *))rmesa->glCtx->Driver.Flush, rmesa->glCtx);
+
+	if (!rmesa->radeonScreen->kernel_mm) {
+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
+		radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
+	} else {
+		struct drm_radeon_gem_info mminfo = { 0 };
+
+		if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
+		{
+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
+			radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
+		}
+	}
+
+}
+/**
+ * Destroy the command buffer
+ */
+void rcommonDestroyCmdBuf(radeonContextPtr rmesa)
+{
+	radeon_cs_destroy(rmesa->cmdbuf.cs);
+	if (rmesa->radeonScreen->driScreen->dri2.enabled || rmesa->radeonScreen->kernel_mm) {
+		radeon_cs_manager_gem_dtor(rmesa->cmdbuf.csm);
+	} else {
+		radeon_cs_manager_legacy_dtor(rmesa->cmdbuf.csm);
+	}
+}
+
+void rcommonBeginBatch(radeonContextPtr rmesa, int n,
+		       int dostate,
+		       const char *file,
+		       const char *function,
+		       int line)
+{
+	radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line);
+
+    radeon_print(RADEON_CS, RADEON_VERBOSE, "BEGIN_BATCH(%d) at %d, from %s:%i\n",
+                        n, rmesa->cmdbuf.cs->cdw, function, line);
+
+}
+
+void radeonUserClear(GLcontext *ctx, GLuint mask)
+{
+   _mesa_meta_Clear(ctx, mask);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h
new file mode 100644
index 0000000000..35b3f08fff
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common.h
@@ -0,0 +1,101 @@
+#ifndef COMMON_MISC_H
+#define COMMON_MISC_H
+
+#include "radeon_common_context.h"
+#include "radeon_dma.h"
+#include "radeon_texture.h"
+
+void radeonUserClear(GLcontext *ctx, GLuint mask);
+void radeonRecalcScissorRects(radeonContextPtr radeon);
+void radeonSetCliprects(radeonContextPtr radeon);
+void radeonUpdateScissor( GLcontext *ctx );
+void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h);
+
+void radeonWaitForIdleLocked(radeonContextPtr radeon);
+extern uint32_t radeonGetAge(radeonContextPtr radeon);
+void radeonCopyBuffer( __DRIdrawable *dPriv,
+		       const drm_clip_rect_t	  *rect);
+void radeonSwapBuffers(__DRIdrawable * dPriv);
+void radeonCopySubBuffer(__DRIdrawable * dPriv,
+			 int x, int y, int w, int h );
+
+void radeonUpdatePageFlipping(radeonContextPtr rmesa);
+
+void radeonFlush(GLcontext *ctx);
+void radeonFinish(GLcontext * ctx);
+void radeonEmitState(radeonContextPtr radeon);
+GLuint radeonCountStateEmitSize(radeonContextPtr radeon);
+
+void radeon_clear_tris(GLcontext *ctx, GLbitfield mask);
+
+void radeon_window_moved(radeonContextPtr radeon);
+void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb);
+void radeonDrawBuffer( GLcontext *ctx, GLenum mode );
+void radeonReadBuffer( GLcontext *ctx, GLenum mode );
+void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height);
+void radeon_get_cliprects(radeonContextPtr radeon,
+			  struct drm_clip_rect **cliprects,
+			  unsigned int *num_cliprects,
+			  int *x_off, int *y_off);
+void radeon_fbo_init(struct radeon_context *radeon);
+void
+radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+			   struct radeon_bo *bo);
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv);
+
+void
+radeonReadPixels(GLcontext * ctx,
+				GLint x, GLint y, GLsizei width, GLsizei height,
+				GLenum format, GLenum type,
+				const struct gl_pixelstore_attrib *pack, GLvoid * pixels);
+
+void radeon_check_front_buffer_rendering(GLcontext *ctx);
+static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb)
+{
+	struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb;
+	radeon_print(RADEON_MEMORY, RADEON_TRACE,
+		"%s(rb %p)\n",
+		__func__, rb);
+	if (rrb && rrb->base.ClassID == RADEON_RB_CLASS)
+		return rrb;
+	else
+		return NULL;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_framebuffer *fb, int att_index)
+{
+	radeon_print(RADEON_MEMORY, RADEON_TRACE,
+		"%s(fb %p, index %d)\n",
+		__func__, fb, att_index);
+
+	if (att_index >= 0)
+		return radeon_renderbuffer(fb->Attachment[att_index].Renderbuffer);
+	else
+		return NULL;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa)
+{
+	struct radeon_renderbuffer *rrb;
+	rrb = radeon_renderbuffer(rmesa->state.depth.rb);
+	if (!rrb)
+		return NULL;
+
+	return rrb;
+}
+
+static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPtr rmesa)
+{
+	struct radeon_renderbuffer *rrb;
+
+	rrb = radeon_renderbuffer(rmesa->state.color.rb);
+	if (!rrb)
+		return NULL;
+	return rrb;
+}
+
+#include "radeon_cmdbuf.h"
+
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
new file mode 100644
index 0000000000..94f476617b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -0,0 +1,789 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include "radeon_common.h"
+#include "xmlpool.h"		/* for symbolic values of enum-type options */
+#include "utils.h"
+#include "vblank.h"
+#include "drirenderbuffer.h"
+#include "drivers/common/meta.h"
+#include "main/context.h"
+#include "main/renderbuffer.h"
+#include "main/state.h"
+#include "main/simple_list.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+
+#define DRIVER_DATE "20090101"
+
+#ifndef RADEON_DEBUG
+int RADEON_DEBUG = (0);
+#endif
+
+
+static const char* get_chip_family_name(int chip_family)
+{
+	switch(chip_family) {
+	case CHIP_FAMILY_R100: return "R100";
+	case CHIP_FAMILY_RV100: return "RV100";
+	case CHIP_FAMILY_RS100: return "RS100";
+	case CHIP_FAMILY_RV200: return "RV200";
+	case CHIP_FAMILY_RS200: return "RS200";
+	case CHIP_FAMILY_R200: return "R200";
+	case CHIP_FAMILY_RV250: return "RV250";
+	case CHIP_FAMILY_RS300: return "RS300";
+	case CHIP_FAMILY_RV280: return "RV280";
+	case CHIP_FAMILY_R300: return "R300";
+	case CHIP_FAMILY_R350: return "R350";
+	case CHIP_FAMILY_RV350: return "RV350";
+	case CHIP_FAMILY_RV380: return "RV380";
+	case CHIP_FAMILY_R420: return "R420";
+	case CHIP_FAMILY_RV410: return "RV410";
+	case CHIP_FAMILY_RS400: return "RS400";
+	case CHIP_FAMILY_RS600: return "RS600";
+	case CHIP_FAMILY_RS690: return "RS690";
+	case CHIP_FAMILY_RS740: return "RS740";
+	case CHIP_FAMILY_RV515: return "RV515";
+	case CHIP_FAMILY_R520: return "R520";
+	case CHIP_FAMILY_RV530: return "RV530";
+	case CHIP_FAMILY_R580: return "R580";
+	case CHIP_FAMILY_RV560: return "RV560";
+	case CHIP_FAMILY_RV570: return "RV570";
+	case CHIP_FAMILY_R600: return "R600";
+	case CHIP_FAMILY_RV610: return "RV610";
+	case CHIP_FAMILY_RV630: return "RV630";
+	case CHIP_FAMILY_RV670: return "RV670";
+	case CHIP_FAMILY_RV620: return "RV620";
+	case CHIP_FAMILY_RV635: return "RV635";
+	case CHIP_FAMILY_RS780: return "RS780";
+	case CHIP_FAMILY_RS880: return "RS880";
+	case CHIP_FAMILY_RV770: return "RV770";
+	case CHIP_FAMILY_RV730: return "RV730";
+	case CHIP_FAMILY_RV710: return "RV710";
+	case CHIP_FAMILY_RV740: return "RV740";
+	default: return "unknown";
+	}
+}
+
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	static char buffer[128];
+
+	switch (name) {
+	case GL_VENDOR:
+		if (IS_R600_CLASS(radeon->radeonScreen))
+			return (GLubyte *) "Advanced Micro Devices, Inc.";
+		else if (IS_R300_CLASS(radeon->radeonScreen))
+			return (GLubyte *) "DRI R300 Project";
+		else
+			return (GLubyte *) "Tungsten Graphics, Inc.";
+
+	case GL_RENDERER:
+	{
+		unsigned offset;
+		GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 :
+			radeon->radeonScreen->AGPMode;
+		const char* chipclass;
+		char hardwarename[32];
+
+		if (IS_R600_CLASS(radeon->radeonScreen))
+			chipclass = "R600";
+		else if (IS_R300_CLASS(radeon->radeonScreen))
+			chipclass = "R300";
+		else if (IS_R200_CLASS(radeon->radeonScreen))
+			chipclass = "R200";
+		else
+			chipclass = "R100";
+
+		sprintf(hardwarename, "%s (%s %04X)",
+		        chipclass,
+		        get_chip_family_name(radeon->radeonScreen->chip_family),
+		        radeon->radeonScreen->device_id);
+
+		offset = driGetRendererString(buffer, hardwarename, DRIVER_DATE,
+					      agp_mode);
+
+		if (IS_R600_CLASS(radeon->radeonScreen)) {
+			sprintf(&buffer[offset], " TCL");
+		} else if (IS_R300_CLASS(radeon->radeonScreen)) {
+			sprintf(&buffer[offset], " %sTCL",
+				(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
+				? "" : "NO-");
+		} else {
+			sprintf(&buffer[offset], " %sTCL",
+				!(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE)
+				? "" : "NO-");
+		}
+
+		if (radeon->radeonScreen->driScreen->dri2.enabled)
+			strcat(buffer, " DRI2");
+
+		return (GLubyte *) buffer;
+	}
+
+	default:
+		return NULL;
+	}
+}
+
+/* Initialize the driver's misc functions.
+ */
+static void radeonInitDriverFuncs(struct dd_function_table *functions)
+{
+	functions->GetString = radeonGetString;
+}
+
+/**
+ * Create and initialize all common fields of the context,
+ * including the Mesa context itself.
+ */
+GLboolean radeonInitContext(radeonContextPtr radeon,
+			    struct dd_function_table* functions,
+			    const __GLcontextModes * glVisual,
+			    __DRIcontext * driContextPriv,
+			    void *sharedContextPrivate)
+{
+	__DRIscreen *sPriv = driContextPriv->driScreenPriv;
+	radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+	GLcontext* ctx;
+	GLcontext* shareCtx;
+	int fthrottle_mode;
+
+	/* Fill in additional standard functions. */
+	radeonInitDriverFuncs(functions);
+
+	radeon->radeonScreen = screen;
+	/* Allocate and initialize the Mesa context */
+	if (sharedContextPrivate)
+		shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx;
+	else
+		shareCtx = NULL;
+	radeon->glCtx = _mesa_create_context(glVisual, shareCtx,
+					    functions, (void *)radeon);
+	if (!radeon->glCtx)
+		return GL_FALSE;
+
+	ctx = radeon->glCtx;
+	driContextPriv->driverPrivate = radeon;
+
+	meta_init_metaops(ctx, &radeon->meta);
+
+	_mesa_meta_init(ctx);
+
+	/* DRI fields */
+	radeon->dri.context = driContextPriv;
+	radeon->dri.screen = sPriv;
+	radeon->dri.hwContext = driContextPriv->hHWContext;
+	radeon->dri.hwLock = &sPriv->pSAREA->lock;
+	radeon->dri.hwLockCount = 0;
+	radeon->dri.fd = sPriv->fd;
+	radeon->dri.drmMinor = sPriv->drm_version.minor;
+
+	radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+					       screen->sarea_priv_offset);
+
+	/* Setup IRQs */
+	fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode");
+	radeon->iw.irq_seq = -1;
+	radeon->irqsEmitted = 0;
+	radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS &&
+			   radeon->radeonScreen->irq);
+
+	radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+	if (!radeon->do_irqs)
+		fprintf(stderr,
+			"IRQ's not enabled, falling back to %s: %d %d\n",
+			radeon->do_usleeps ? "usleeps" : "busy waits",
+			fthrottle_mode, radeon->radeonScreen->irq);
+
+        radeon->texture_depth = driQueryOptioni (&radeon->optionCache,
+					        "texture_depth");
+        if (radeon->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+                radeon->texture_depth = ( glVisual->rgbBits > 16 ) ?
+	        DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+	if (IS_R600_CLASS(radeon->radeonScreen)) {
+		radeon->texture_row_align = 256;
+		radeon->texture_rect_row_align = 256;
+		radeon->texture_compressed_row_align = 256;
+	} else if (IS_R200_CLASS(radeon->radeonScreen) ||
+		   IS_R100_CLASS(radeon->radeonScreen)) {
+		radeon->texture_row_align = 32;
+		radeon->texture_rect_row_align = 64;
+		radeon->texture_compressed_row_align = 32;
+	} else { /* R300 - not sure this is all correct */
+		int chip_family = radeon->radeonScreen->chip_family;
+		if (chip_family == CHIP_FAMILY_RS600 ||
+		    chip_family == CHIP_FAMILY_RS690 ||
+		    chip_family == CHIP_FAMILY_RS740)
+			radeon->texture_row_align = 64;
+		else
+			radeon->texture_row_align = 32;
+		radeon->texture_rect_row_align = 64;
+		radeon->texture_compressed_row_align = 32;
+	}
+
+	radeon_init_dma(radeon);
+
+	return GL_TRUE;
+}
+
+
+
+/**
+ * Destroy the command buffer and state atoms.
+ */
+static void radeon_destroy_atom_list(radeonContextPtr radeon)
+{
+	struct radeon_state_atom *atom;
+
+	foreach(atom, &radeon->hw.atomlist) {
+		FREE(atom->cmd);
+		if (atom->lastcmd)
+			FREE(atom->lastcmd);
+	}
+
+}
+
+/**
+ * Cleanup common context fields.
+ * Called by r200DestroyContext/r300DestroyContext
+ */
+void radeonDestroyContext(__DRIcontext *driContextPriv )
+{
+#ifdef RADEON_BO_TRACK
+	FILE *track;
+#endif
+	GET_CURRENT_CONTEXT(ctx);
+	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+	radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL;
+
+	assert(radeon);
+
+	_mesa_meta_free(radeon->glCtx);
+
+	if (radeon == current) {
+		radeon_firevertices(radeon);
+		_mesa_make_current(NULL, NULL, NULL);
+	}
+
+	if (!is_empty_list(&radeon->dma.reserved)) {
+		rcommonFlushCmdBuf( radeon, __FUNCTION__ );
+	}
+
+	radeonFreeDmaRegions(radeon);
+	radeonReleaseArrays(radeon->glCtx, ~0);
+	meta_destroy_metaops(&radeon->meta);
+	if (radeon->vtbl.free_context)
+		radeon->vtbl.free_context(radeon->glCtx);
+	_swsetup_DestroyContext( radeon->glCtx );
+	_tnl_DestroyContext( radeon->glCtx );
+	_vbo_DestroyContext( radeon->glCtx );
+	_swrast_DestroyContext( radeon->glCtx );
+
+	/* free atom list */
+	/* free the Mesa context */
+	_mesa_destroy_context(radeon->glCtx);
+
+	/* _mesa_destroy_context() might result in calls to functions that
+	 * depend on the DriverCtx, so don't set it to NULL before.
+	 *
+	 * radeon->glCtx->DriverCtx = NULL;
+	 */
+	/* free the option cache */
+	driDestroyOptionCache(&radeon->optionCache);
+
+	rcommonDestroyCmdBuf(radeon);
+
+	radeon_destroy_atom_list(radeon);
+
+	if (radeon->state.scissor.pClipRects) {
+		FREE(radeon->state.scissor.pClipRects);
+		radeon->state.scissor.pClipRects = 0;
+	}
+#ifdef RADEON_BO_TRACK
+	track = fopen("/tmp/tracklog", "w");
+	if (track) {
+		radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track);
+		fclose(track);
+	}
+#endif
+	FREE(radeon);
+}
+
+/* Force the context `c' to be unbound from its buffer.
+ */
+GLboolean radeonUnbindContext(__DRIcontext * driContextPriv)
+{
+	radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+
+	if (RADEON_DEBUG & RADEON_DRI)
+		fprintf(stderr, "%s ctx %p\n", __FUNCTION__,
+			radeon->glCtx);
+
+	return GL_TRUE;
+}
+
+
+static void
+radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon,
+					struct radeon_framebuffer *draw)
+{
+	/* if radeon->fake */
+	struct radeon_renderbuffer *rb;
+
+	if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->frontOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->backOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+}
+
+static void
+radeon_make_renderbuffer_current(radeonContextPtr radeon,
+				 struct radeon_framebuffer *draw)
+{
+	int size = 4096*4096*4;
+	/* if radeon->fake */
+	struct radeon_renderbuffer *rb;
+
+	if (radeon->radeonScreen->kernel_mm) {
+		radeon_make_kernel_renderbuffer_current(radeon, draw);
+		return;
+	}
+
+
+	if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->frontOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->backOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->backPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+	if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) {
+		if (!rb->bo) {
+			rb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+						radeon->radeonScreen->depthOffset +
+						radeon->radeonScreen->fbLocation,
+						size,
+						4096,
+						RADEON_GEM_DOMAIN_VRAM,
+						0);
+		}
+		rb->cpp = radeon->radeonScreen->cpp;
+		rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp;
+	}
+}
+
+static unsigned
+radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
+{
+   return _mesa_get_format_bytes(rb->base.Format) * 8; 
+}
+
+void
+radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
+			    GLboolean front_only)
+{
+	unsigned int attachments[10];
+	__DRIbuffer *buffers = NULL;
+	__DRIscreen *screen;
+	struct radeon_renderbuffer *rb;
+	int i, count;
+	struct radeon_framebuffer *draw;
+	radeonContextPtr radeon;
+	char *regname;
+	struct radeon_bo *depth_bo = NULL, *bo;
+
+	if (RADEON_DEBUG & RADEON_DRI)
+	    fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+
+	draw = drawable->driverPrivate;
+	screen = context->driScreenPriv;
+	radeon = (radeonContextPtr) context->driverPrivate;
+
+	if (screen->dri2.loader
+	   && (screen->dri2.loader->base.version > 2)
+	   && (screen->dri2.loader->getBuffersWithFormat != NULL)) {
+		struct radeon_renderbuffer *depth_rb;
+		struct radeon_renderbuffer *stencil_rb;
+
+		i = 0;
+		if ((front_only || radeon->is_front_buffer_rendering ||
+		     radeon->is_front_buffer_reading ||
+		     !draw->color_rb[1])
+		    && draw->color_rb[0]) {
+			attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+			attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]);
+		}
+
+		if (!front_only) {
+			if (draw->color_rb[1]) {
+				attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+				attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]);
+			}
+
+			depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+			stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+
+			if ((depth_rb != NULL) && (stencil_rb != NULL)) {
+				attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL;
+				attachments[i++] = radeon_bits_per_pixel(depth_rb);
+			} else if (depth_rb != NULL) {
+				attachments[i++] = __DRI_BUFFER_DEPTH;
+				attachments[i++] = radeon_bits_per_pixel(depth_rb);
+			} else if (stencil_rb != NULL) {
+				attachments[i++] = __DRI_BUFFER_STENCIL;
+				attachments[i++] = radeon_bits_per_pixel(stencil_rb);
+			}
+		}
+
+		buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable,
+								&drawable->w,
+								&drawable->h,
+								attachments, i / 2,
+								&count,
+								drawable->loaderPrivate);
+	} else if (screen->dri2.loader) {
+		i = 0;
+		if (draw->color_rb[0])
+			attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
+		if (!front_only) {
+			if (draw->color_rb[1])
+				attachments[i++] = __DRI_BUFFER_BACK_LEFT;
+			if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH))
+				attachments[i++] = __DRI_BUFFER_DEPTH;
+			if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL))
+				attachments[i++] = __DRI_BUFFER_STENCIL;
+		}
+
+		buffers = (*screen->dri2.loader->getBuffers)(drawable,
+								 &drawable->w,
+								 &drawable->h,
+								 attachments, i,
+								 &count,
+								 drawable->loaderPrivate);
+	}
+
+	if (buffers == NULL)
+		return;
+
+	/* set one cliprect to cover the whole drawable */
+	drawable->x = 0;
+	drawable->y = 0;
+	drawable->backX = 0;
+	drawable->backY = 0;
+	drawable->numClipRects = 1;
+	drawable->pClipRects[0].x1 = 0;
+	drawable->pClipRects[0].y1 = 0;
+	drawable->pClipRects[0].x2 = drawable->w;
+	drawable->pClipRects[0].y2 = drawable->h;
+	drawable->numBackClipRects = 1;
+	drawable->pBackClipRects[0].x1 = 0;
+	drawable->pBackClipRects[0].y1 = 0;
+	drawable->pBackClipRects[0].x2 = drawable->w;
+	drawable->pBackClipRects[0].y2 = drawable->h;
+	for (i = 0; i < count; i++) {
+		switch (buffers[i].attachment) {
+		case __DRI_BUFFER_FRONT_LEFT:
+			rb = draw->color_rb[0];
+			regname = "dri2 front buffer";
+			break;
+		case __DRI_BUFFER_FAKE_FRONT_LEFT:
+			rb = draw->color_rb[0];
+			regname = "dri2 fake front buffer";
+			break;
+		case __DRI_BUFFER_BACK_LEFT:
+			rb = draw->color_rb[1];
+			regname = "dri2 back buffer";
+			break;
+		case __DRI_BUFFER_DEPTH:
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+			regname = "dri2 depth buffer";
+			break;
+		case __DRI_BUFFER_DEPTH_STENCIL:
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH);
+			regname = "dri2 depth / stencil buffer";
+			break;
+		case __DRI_BUFFER_STENCIL:
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+			regname = "dri2 stencil buffer";
+			break;
+		case __DRI_BUFFER_ACCUM:
+		default:
+			fprintf(stderr,
+				"unhandled buffer attach event, attacment type %d\n",
+				buffers[i].attachment);
+			return;
+		}
+
+		if (rb == NULL)
+			continue;
+
+		if (rb->bo) {
+			uint32_t name = radeon_gem_name_bo(rb->bo);
+			if (name == buffers[i].name)
+				continue;
+		}
+
+		if (RADEON_DEBUG & RADEON_DRI)
+			fprintf(stderr,
+				"attaching buffer %s, %d, at %d, cpp %d, pitch %d\n",
+				regname, buffers[i].name, buffers[i].attachment,
+				buffers[i].cpp, buffers[i].pitch);
+
+		rb->cpp = buffers[i].cpp;
+		rb->pitch = buffers[i].pitch;
+		rb->base.Width = drawable->w;
+		rb->base.Height = drawable->h;
+		rb->has_surface = 0;
+
+		if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
+			if (RADEON_DEBUG & RADEON_DRI)
+				fprintf(stderr, "(reusing depth buffer as stencil)\n");
+			bo = depth_bo;
+			radeon_bo_ref(bo);
+		} else {
+			uint32_t tiling_flags = 0, pitch = 0;
+			int ret;
+
+			bo = radeon_bo_open(radeon->radeonScreen->bom,
+						buffers[i].name,
+						0,
+						0,
+						RADEON_GEM_DOMAIN_VRAM,
+						buffers[i].flags);
+
+			if (bo == NULL) {
+
+				fprintf(stderr, "failed to attach %s %d\n",
+					regname, buffers[i].name);
+
+			}
+
+			ret = radeon_bo_get_tiling(bo, &tiling_flags, &pitch);
+			if (tiling_flags & RADEON_TILING_MACRO)
+				bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
+			if (tiling_flags & RADEON_TILING_MICRO)
+				bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
+			
+		}
+
+		if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
+			if (draw->base.Visual.depthBits == 16)
+				rb->cpp = 2;
+			depth_bo = bo;
+		}
+
+		radeon_renderbuffer_set_bo(rb, bo);
+		radeon_bo_unref(bo);
+
+		if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) {
+			rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL);
+			if (rb != NULL) {
+				struct radeon_bo *stencil_bo = NULL;
+
+				if (rb->bo) {
+					uint32_t name = radeon_gem_name_bo(rb->bo);
+					if (name == buffers[i].name)
+						continue;
+				}
+
+				stencil_bo = bo;
+				radeon_bo_ref(stencil_bo);
+				radeon_renderbuffer_set_bo(rb, stencil_bo);
+				radeon_bo_unref(stencil_bo);
+			}
+		}
+	}
+
+	driUpdateFramebufferSize(radeon->glCtx, drawable);
+}
+
+/* Force the context `c' to be the current context and associate with it
+ * buffer `b'.
+ */
+GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv,
+			    __DRIdrawable * driDrawPriv,
+			    __DRIdrawable * driReadPriv)
+{
+	radeonContextPtr radeon;
+	struct radeon_framebuffer *drfb;
+	struct gl_framebuffer *readfb;
+
+	if (!driContextPriv) {
+		if (RADEON_DEBUG & RADEON_DRI)
+			fprintf(stderr, "%s ctx is null\n", __FUNCTION__);
+		_mesa_make_current(NULL, NULL, NULL);
+		return GL_TRUE;
+	}
+
+	radeon = (radeonContextPtr) driContextPriv->driverPrivate;
+	drfb = driDrawPriv->driverPrivate;
+	readfb = driReadPriv->driverPrivate;
+
+	if (driContextPriv->driScreenPriv->dri2.enabled) {
+		radeon_update_renderbuffers(driContextPriv, driDrawPriv, GL_FALSE);
+		if (driDrawPriv != driReadPriv)
+			radeon_update_renderbuffers(driContextPriv, driReadPriv, GL_FALSE);
+		_mesa_reference_renderbuffer(&radeon->state.color.rb,
+			&(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base));
+		_mesa_reference_renderbuffer(&radeon->state.depth.rb,
+			&(radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH)->base));
+	} else {
+		radeon_make_renderbuffer_current(radeon, drfb);
+	}
+
+	if (RADEON_DEBUG & RADEON_DRI)
+	     fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb);
+
+	driUpdateFramebufferSize(radeon->glCtx, driDrawPriv);
+	if (driReadPriv != driDrawPriv)
+		driUpdateFramebufferSize(radeon->glCtx, driReadPriv);
+
+	_mesa_make_current(radeon->glCtx, &drfb->base, readfb);
+
+	_mesa_update_state(radeon->glCtx);
+
+	if (radeon->glCtx->DrawBuffer == &drfb->base) {
+		if (driDrawPriv->swap_interval == (unsigned)-1) {
+			int i;
+			driDrawPriv->vblFlags =
+				(radeon->radeonScreen->irq != 0)
+				? driGetDefaultVBlankFlags(&radeon->
+							   optionCache)
+				: VBLANK_FLAG_NO_IRQ;
+
+			driDrawableInitVBlank(driDrawPriv);
+			drfb->vbl_waited = driDrawPriv->vblSeq;
+
+			for (i = 0; i < 2; i++) {
+				if (drfb->color_rb[i])
+					drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq;
+			}
+
+		}
+
+		radeon_window_moved(radeon);
+		radeon_draw_buffer(radeon->glCtx, &drfb->base);
+	}
+
+
+	if (RADEON_DEBUG & RADEON_DRI)
+		fprintf(stderr, "End %s\n", __FUNCTION__);
+
+	return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
new file mode 100644
index 0000000000..5156c5d0d0
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -0,0 +1,618 @@
+
+#ifndef COMMON_CONTEXT_H
+#define COMMON_CONTEXT_H
+
+#include "main/mm.h"
+#include "math/m_vector.h"
+#include "texmem.h"
+#include "tnl/t_context.h"
+#include "main/colormac.h"
+
+#include "radeon_debug.h"
+#include "radeon_screen.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "tnl/t_vertex.h"
+
+#include "dri_metaops.h"
+struct radeon_context;
+
+#include "radeon_bocs_wrapper.h"
+
+/* This union is used to avoid warnings/miscompilation
+   with float to uint32_t casts due to strict-aliasing */
+typedef union { GLfloat f; uint32_t ui32; } float_ui32_type;
+
+struct radeon_context;
+typedef struct radeon_context radeonContextRec;
+typedef struct radeon_context *radeonContextPtr;
+
+
+#define TEX_0   0x1
+#define TEX_1   0x2
+#define TEX_2   0x4
+#define TEX_3	0x8
+#define TEX_4	0x10
+#define TEX_5	0x20
+
+/* Rasterizing fallbacks */
+/* See correponding strings in r200_swtcl.c */
+#define RADEON_FALLBACK_TEXTURE		0x0001
+#define RADEON_FALLBACK_DRAW_BUFFER	0x0002
+#define RADEON_FALLBACK_STENCIL		0x0004
+#define RADEON_FALLBACK_RENDER_MODE	0x0008
+#define RADEON_FALLBACK_BLEND_EQ	0x0010
+#define RADEON_FALLBACK_BLEND_FUNC	0x0020
+#define RADEON_FALLBACK_DISABLE 	0x0040
+#define RADEON_FALLBACK_BORDER_MODE	0x0080
+#define RADEON_FALLBACK_DEPTH_BUFFER	0x0100
+#define RADEON_FALLBACK_STENCIL_BUFFER  0x0200
+
+#define R200_FALLBACK_TEXTURE           0x01
+#define R200_FALLBACK_DRAW_BUFFER       0x02
+#define R200_FALLBACK_STENCIL           0x04
+#define R200_FALLBACK_RENDER_MODE       0x08
+#define R200_FALLBACK_DISABLE           0x10
+#define R200_FALLBACK_BORDER_MODE       0x20
+
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+#define RADEON_TCL_FALLBACK_FOGCOORDSPEC      0x100 /* fogcoord, sep. spec light */
+
+/* The blit width for texture uploads
+ */
+#define BLIT_WIDTH_BYTES 1024
+
+/* Use the templated vertex format:
+ */
+#define COLOR_IS_RGBA
+#define TAG(x) radeon##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+#define RADEON_RB_CLASS 0xdeadbeef
+
+struct radeon_renderbuffer
+{
+	struct gl_renderbuffer base;
+	struct radeon_bo *bo;
+	unsigned int cpp;
+	/* unsigned int offset; */
+	unsigned int pitch;
+
+	uint32_t draw_offset; /* FBO */
+	/* boo Xorg 6.8.2 compat */
+	int has_surface;
+
+	GLuint pf_pending;  /**< sequence number of pending flip */
+	GLuint vbl_pending;   /**< vblank sequence number of pending flip */
+	__DRIdrawable *dPriv;
+};
+
+struct radeon_framebuffer
+{
+	struct gl_framebuffer base;
+
+	struct radeon_renderbuffer *color_rb[2];
+
+	GLuint vbl_waited;
+
+	/* buffer swap */
+	int64_t swap_ust;
+	int64_t swap_missed_ust;
+
+	GLuint swap_count;
+	GLuint swap_missed_count;
+
+	/* Drawable page flipping state */
+	GLboolean pf_active;
+	GLint pf_current_page;
+	GLint pf_num_pages;
+
+};
+
+
+struct radeon_colorbuffer_state {
+	GLuint clear;
+	int roundEnable;
+	struct gl_renderbuffer *rb;
+	uint32_t draw_offset; /* offset into color renderbuffer - FBOs */
+};
+
+struct radeon_depthbuffer_state {
+	GLuint clear;
+	struct gl_renderbuffer *rb;
+};
+
+struct radeon_scissor_state {
+	drm_clip_rect_t rect;
+	GLboolean enabled;
+
+	GLuint numClipRects;	/* Cliprects active */
+	GLuint numAllocedClipRects;	/* Cliprects available */
+	drm_clip_rect_t *pClipRects;
+};
+
+struct radeon_stencilbuffer_state {
+	GLuint clear;		/* rb3d_stencilrefmask value */
+};
+
+struct radeon_state_atom {
+	struct radeon_state_atom *next, *prev;
+	const char *name;	/* for debug */
+	int cmd_size;		/* size in bytes */
+        GLuint idx;
+	GLuint is_tcl;
+        GLuint *cmd;		/* one or more cmd's */
+	GLuint *lastcmd;		/* one or more cmd's */
+	GLboolean dirty;	/* dirty-mark in emit_state_list */
+        int (*check) (GLcontext *, struct radeon_state_atom *atom); /* is this state active? */
+        void (*emit) (GLcontext *, struct radeon_state_atom *atom);
+};
+
+struct radeon_hw_state {
+  	/* Head of the linked list of state atoms. */
+	struct radeon_state_atom atomlist;
+	int max_state_size;	/* Number of bytes necessary for a full state emit. */
+	int max_post_flush_size; /* Number of bytes necessary for post flushing emits */
+	GLboolean is_dirty, all_dirty;
+};
+
+
+/* Texture related */
+typedef struct _radeon_texture_image radeon_texture_image;
+
+struct _radeon_texture_image {
+	struct gl_texture_image base;
+
+	/**
+	 * If mt != 0, the image is stored in hardware format in the
+	 * given mipmap tree. In this case, base.Data may point into the
+	 * mapping of the buffer object that contains the mipmap tree.
+	 *
+	 * If mt == 0, the image is stored in normal memory pointed to
+	 * by base.Data.
+	 */
+	struct _radeon_mipmap_tree *mt;
+	struct radeon_bo *bo;
+
+	int mtlevel; /** if mt != 0, this is the image's level in the mipmap tree */
+	int mtface; /** if mt != 0, this is the image's face in the mipmap tree */
+};
+
+
+static INLINE radeon_texture_image *get_radeon_texture_image(struct gl_texture_image *image)
+{
+	return (radeon_texture_image*)image;
+}
+
+
+typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
+
+#define RADEON_TXO_MICRO_TILE               (1 << 3)
+
+/* Texture object in locally shared texture space.
+ */
+struct radeon_tex_obj {
+	struct gl_texture_object base;
+	struct _radeon_mipmap_tree *mt;
+
+	/**
+	 * This is true if we've verified that the mipmap tree above is complete
+	 * and so on.
+	 */
+	GLboolean validated;
+	/* Minimum LOD to be used during rendering */
+	unsigned minLod;
+	/* Miximum LOD to be used during rendering */
+	unsigned maxLod;
+
+	GLuint override_offset;
+	GLboolean image_override; /* Image overridden by GLX_EXT_tfp */
+	GLuint tile_bits;	/* hw texture tile bits used on this texture */
+        struct radeon_bo *bo;
+
+	GLuint pp_txfilter;	/* hardware register values */
+	GLuint pp_txformat;
+	GLuint pp_txformat_x;
+	GLuint pp_txsize;	/* npot only */
+	GLuint pp_txpitch;	/* npot only */
+	GLuint pp_border_color;
+	GLuint pp_cubic_faces;	/* cube face 1,2,3,4 log2 sizes */
+
+        GLuint pp_txfilter_1;	/*  r300 */
+
+	/* r700 texture states */
+	GLuint SQ_TEX_RESOURCE0;
+	GLuint SQ_TEX_RESOURCE1;
+	GLuint SQ_TEX_RESOURCE2;
+	GLuint SQ_TEX_RESOURCE3;
+	GLuint SQ_TEX_RESOURCE4;
+	GLuint SQ_TEX_RESOURCE5;
+	GLuint SQ_TEX_RESOURCE6;
+
+	GLuint SQ_TEX_SAMPLER0;
+	GLuint SQ_TEX_SAMPLER1;
+	GLuint SQ_TEX_SAMPLER2;
+
+	GLuint TD_PS_SAMPLER0_BORDER_RED;
+	GLuint TD_PS_SAMPLER0_BORDER_GREEN;
+	GLuint TD_PS_SAMPLER0_BORDER_BLUE;
+	GLuint TD_PS_SAMPLER0_BORDER_ALPHA;
+
+	GLboolean border_fallback;
+
+
+};
+
+static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj)
+{
+	return (radeonTexObj*)texObj;
+}
+
+/* occlusion query */
+struct radeon_query_object {
+	struct gl_query_object Base;
+	struct radeon_bo *bo;
+	int curr_offset;
+	GLboolean emitted_begin;
+
+	/* Double linked list of not flushed query objects */
+	struct radeon_query_object *prev, *next;
+};
+
+/* Need refcounting on dma buffers:
+ */
+struct radeon_dma_buffer {
+	int refcount;		/* the number of retained regions in buf */
+	drmBufPtr buf;
+};
+
+struct radeon_aos {
+	struct radeon_bo *bo; /** Buffer object where vertex data is stored */
+	int offset; /** Offset into buffer object, in bytes */
+	int components; /** Number of components per vertex */
+	int stride; /** Stride in dwords (may be 0 for repeating) */
+	int count; /** Number of vertices */
+};
+
+#define DMA_BO_FREE_TIME 100
+
+struct radeon_dma_bo {
+  struct radeon_dma_bo *next, *prev;
+  struct radeon_bo *bo;
+  int expire_counter;
+};
+
+struct radeon_dma {
+        /* Active dma region.  Allocations for vertices and retained
+         * regions come from here.  Also used for emitting random vertices,
+         * these may be flushed by calling flush_current();
+         */
+	struct radeon_dma_bo free;
+	struct radeon_dma_bo wait;
+	struct radeon_dma_bo reserved;
+        size_t current_used; /** Number of bytes allocated and forgotten about */
+        size_t current_vertexptr; /** End of active vertex region */
+        size_t minimum_size;
+
+        /**
+         * If current_vertexptr != current_used then flush must be non-zero.
+         * flush must be called before non-active vertex allocations can be
+         * performed.
+         */
+        void (*flush) (GLcontext *);
+};
+
+/* radeon_swtcl.c
+ */
+struct radeon_swtcl_info {
+
+	GLuint RenderIndex;
+	GLuint vertex_size;
+	GLubyte *verts;
+
+	/* Fallback rasterization functions
+	 */
+	GLuint hw_primitive;
+	GLenum render_primitive;
+	GLuint numverts;
+
+	struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+	GLuint vertex_attr_count;
+
+	GLuint emit_prediction;
+        struct radeon_bo *bo;
+};
+
+#define RADEON_MAX_AOS_ARRAYS		16
+struct radeon_tcl_info {
+	struct radeon_aos aos[RADEON_MAX_AOS_ARRAYS];
+	GLuint aos_count;
+	struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */
+	int elt_dma_offset; /** Offset into this buffer object, in bytes */
+};
+
+struct radeon_ioctl {
+	GLuint vertex_offset;
+	GLuint vertex_max;
+	struct radeon_bo *bo;
+	GLuint vertex_size;
+};
+
+#define RADEON_MAX_PRIMS 64
+
+struct radeon_prim {
+	GLuint start;
+	GLuint end;
+	GLuint prim;
+};
+
+static INLINE GLuint radeonPackColor(GLuint cpp,
+                                     GLubyte r, GLubyte g,
+                                     GLubyte b, GLubyte a)
+{
+	switch (cpp) {
+	case 2:
+		return PACK_COLOR_565(r, g, b);
+	case 4:
+		return PACK_COLOR_8888(a, r, g, b);
+	default:
+		return 0;
+	}
+}
+
+#define MAX_CMD_BUF_SZ (16*1024)
+
+#define MAX_DMA_BUF_SZ (64*1024)
+
+struct radeon_store {
+	GLuint statenr;
+	GLuint primnr;
+	char cmd_buf[MAX_CMD_BUF_SZ];
+	int cmd_used;
+	int elts_start;
+};
+
+struct radeon_dri_mirror {
+	__DRIcontext *context;	/* DRI context */
+	__DRIscreen *screen;	/* DRI screen */
+
+	drm_context_t hwContext;
+	drm_hw_lock_t *hwLock;
+	int hwLockCount;
+	int fd;
+	int drmMinor;
+};
+
+typedef void (*radeon_tri_func) (radeonContextPtr,
+				 radeonVertex *,
+				 radeonVertex *, radeonVertex *);
+
+typedef void (*radeon_line_func) (radeonContextPtr,
+				  radeonVertex *, radeonVertex *);
+
+typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *);
+
+#define RADEON_MAX_BOS 32
+struct radeon_state {
+	struct radeon_colorbuffer_state color;
+	struct radeon_depthbuffer_state depth;
+	struct radeon_scissor_state scissor;
+	struct radeon_stencilbuffer_state stencil;
+};
+
+/**
+ * This structure holds the command buffer while it is being constructed.
+ *
+ * The first batch of commands in the buffer is always the state that needs
+ * to be re-emitted when the context is lost. This batch can be skipped
+ * otherwise.
+ */
+struct radeon_cmdbuf {
+	struct radeon_cs_manager    *csm;
+	struct radeon_cs            *cs;
+	int size; /** # of dwords total */
+	unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */
+};
+
+struct radeon_context {
+   GLcontext *glCtx;
+   radeonScreenPtr radeonScreen;	/* Screen private DRI data */
+
+   /* Texture object bookkeeping
+    */
+   int                   texture_depth;
+   float                 initialMaxAnisotropy;
+   uint32_t              texture_row_align;
+   uint32_t              texture_rect_row_align;
+   uint32_t              texture_compressed_row_align;
+
+  struct radeon_dma dma;
+  struct radeon_hw_state hw;
+   /* Rasterization and vertex state:
+    */
+   GLuint TclFallback;
+   GLuint Fallback;
+   GLuint NewGLState;
+   DECLARE_RENDERINPUTS(tnl_index_bitset);	/* index of bits for last tnl_install_attrs */
+
+   /* Drawable, cliprect and scissor information */
+   GLuint numClipRects;	/* Cliprects for the draw buffer */
+   drm_clip_rect_t *pClipRects;
+   unsigned int lastStamp;
+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
+
+   /* Mirrors of some DRI state */
+   struct radeon_dri_mirror dri;
+
+   /* Busy waiting */
+   GLuint do_usleeps;
+   GLuint do_irqs;
+   GLuint irqsEmitted;
+   drm_radeon_irq_wait_t iw;
+
+   /* Derived state - for r300 only */
+   struct radeon_state state;
+
+   struct radeon_swtcl_info swtcl;
+   struct radeon_tcl_info tcl;
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+
+   struct radeon_cmdbuf cmdbuf;
+
+   struct radeon_debug debug;
+
+  drm_clip_rect_t fboRect;
+  GLboolean constant_cliprect; /* use for FBO or DRI2 rendering */
+  GLboolean front_cliprects;
+
+   /**
+    * Set if rendering has occured to the drawable's front buffer.
+    *
+    * This is used in the DRI2 case to detect that glFlush should also copy
+    * the contents of the fake front buffer to the real front buffer.
+    */
+   GLboolean front_buffer_dirty;
+
+   /**
+    * Track whether front-buffer rendering is currently enabled
+    *
+    * A separate flag is used to track this in order to support MRT more
+    * easily.
+    */
+   GLboolean is_front_buffer_rendering;
+
+   /**
+    * Track whether front-buffer is the current read target.
+    *
+    * This is closely associated with is_front_buffer_rendering, but may
+    * be set separately.  The DRI2 fake front buffer must be referenced
+    * either way.
+    */
+   GLboolean is_front_buffer_reading;
+
+   struct dri_metaops meta;
+
+   struct {
+	struct radeon_query_object *current;
+	struct radeon_state_atom queryobj;
+   } query;
+
+   struct {
+	   void (*get_lock)(radeonContextPtr radeon);
+	   void (*update_viewport_offset)(GLcontext *ctx);
+	   void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa);
+	   void (*swtcl_flush)(GLcontext *ctx, uint32_t offset);
+	   void (*pre_emit_atoms)(radeonContextPtr rmesa);
+	   void (*pre_emit_state)(radeonContextPtr rmesa);
+	   void (*fallback)(GLcontext *ctx, GLuint bit, GLboolean mode);
+	   void (*free_context)(GLcontext *ctx);
+	   void (*emit_query_finish)(radeonContextPtr radeon);
+	   void (*update_scissor)(GLcontext *ctx);
+	   unsigned (*check_blit)(gl_format mesa_format);
+	   unsigned (*blit)(GLcontext *ctx,
+                        struct radeon_bo *src_bo,
+                        intptr_t src_offset,
+                        gl_format src_mesaformat,
+                        unsigned src_pitch,
+                        unsigned src_width,
+                        unsigned src_height,
+                        unsigned src_x_offset,
+                        unsigned src_y_offset,
+                        struct radeon_bo *dst_bo,
+                        intptr_t dst_offset,
+                        gl_format dst_mesaformat,
+                        unsigned dst_pitch,
+                        unsigned dst_width,
+                        unsigned dst_height,
+                        unsigned dst_x_offset,
+                        unsigned dst_y_offset,
+                        unsigned reg_width,
+                        unsigned reg_height,
+                        unsigned flip_y);
+	   unsigned (*is_format_renderable)(gl_format mesa_format);
+   } vtbl;
+};
+
+#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx))
+
+static inline __DRIdrawable* radeon_get_drawable(radeonContextPtr radeon)
+{
+	return radeon->dri.context->driDrawablePriv;
+}
+
+static inline __DRIdrawable* radeon_get_readable(radeonContextPtr radeon)
+{
+	return radeon->dri.context->driReadablePriv;
+}
+
+/**
+ * This function takes a float and packs it into a uint32_t
+ */
+static INLINE uint32_t radeonPackFloat32(float fl)
+{
+	union {
+		float fl;
+		uint32_t u;
+	} u;
+
+	u.fl = fl;
+	return u.u;
+}
+
+/* This is probably wrong for some values, I need to test this
+ * some more.  Range checking would be a good idea also..
+ *
+ * But it works for most things.  I'll fix it later if someone
+ * else with a better clue doesn't
+ */
+static INLINE uint32_t radeonPackFloat24(float f)
+{
+	float mantissa;
+	int exponent;
+	uint32_t float24 = 0;
+
+	if (f == 0.0)
+		return 0;
+
+	mantissa = frexpf(f, &exponent);
+
+	/* Handle -ve */
+	if (mantissa < 0) {
+		float24 |= (1 << 23);
+		mantissa = mantissa * -1.0;
+	}
+	/* Handle exponent, bias of 63 */
+	exponent += 62;
+	float24 |= (exponent << 16);
+	/* Kill 7 LSB of mantissa */
+	float24 |= (radeonPackFloat32(mantissa) & 0x7FFFFF) >> 7;
+
+	return float24;
+}
+
+GLboolean radeonInitContext(radeonContextPtr radeon,
+			    struct dd_function_table* functions,
+			    const __GLcontextModes * glVisual,
+			    __DRIcontext * driContextPriv,
+			    void *sharedContextPrivate);
+
+void radeonCleanupContext(radeonContextPtr radeon);
+GLboolean radeonUnbindContext(__DRIcontext * driContextPriv);
+void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
+				 GLboolean front_only);
+GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv,
+			    __DRIdrawable * driDrawPriv,
+			    __DRIdrawable * driReadPriv);
+extern void radeonDestroyContext(__DRIcontext * driContextPriv);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c
new file mode 100644
index 0000000000..ee65d7ff3d
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_context.c
@@ -0,0 +1,421 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "radeon_common.h"
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_span.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+#include "radeon_queryobj.h"
+#include "radeon_blit.h"
+
+#define need_GL_ARB_occlusion_query
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_framebuffer_object
+#include "main/remap_helper.h"
+
+#define DRIVER_DATE	"20061018"
+
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+/* Extension strings exported by the R100 driver.
+ */
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_occlusion_query",		   GL_ARB_occlusion_query_functions},
+    { "GL_ARB_texture_border_clamp",       NULL },
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_env_combine",        NULL },
+    { "GL_ARB_texture_env_crossbar",       NULL },
+    { "GL_ARB_texture_env_dot3",           NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_EXT_blend_logic_op",             NULL },
+    { "GL_EXT_blend_subtract",             GL_EXT_blend_minmax_functions },
+    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_packed_depth_stencil",	   NULL},
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_edge_clamp",         NULL },
+    { "GL_EXT_texture_env_combine",        NULL },
+    { "GL_EXT_texture_env_dot3",           NULL },
+    { "GL_EXT_texture_filter_anisotropic", NULL },
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_EXT_texture_mirror_clamp",       NULL },
+    { "GL_ATI_texture_env_combine3",       NULL },
+    { "GL_ATI_texture_mirror_once",        NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_NV_blend_square",                NULL },
+    { "GL_SGIS_generate_mipmap",           NULL },
+    { NULL,                                NULL }
+};
+
+static const struct dri_extension mm_extensions[] = {
+  { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+  { NULL, NULL }
+};
+
+extern const struct tnl_pipeline_stage _radeon_render_stage;
+extern const struct tnl_pipeline_stage _radeon_tcl_stage;
+
+static const struct tnl_pipeline_stage *radeon_pipeline[] = {
+
+   /* Try and go straight to t&l
+    */
+   &_radeon_tcl_stage,  
+
+   /* Catch any t&l fallbacks
+    */
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+
+   &_radeon_render_stage,
+   &_tnl_render_stage,		/* FALLBACK:  */
+   NULL,
+};
+
+static void r100_get_lock(radeonContextPtr radeon)
+{
+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
+   drm_radeon_sarea_t *sarea = radeon->sarea;
+
+   RADEON_STATECHANGE(rmesa, ctx);
+   if (rmesa->radeon.sarea->tiling_enabled) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |=
+	 RADEON_COLOR_TILE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &=
+	 ~RADEON_COLOR_TILE_ENABLE;
+   }
+   
+   if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) {
+      sarea->ctx_owner = rmesa->radeon.dri.hwContext;
+      
+      if (!radeon->radeonScreen->kernel_mm)
+         radeon_bo_legacy_texture_age(radeon->radeonScreen->bom);
+   }
+}
+
+static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+}
+
+static void r100_vtbl_pre_emit_state(radeonContextPtr radeon)
+{
+   r100ContextPtr rmesa = (r100ContextPtr)radeon;
+   
+   /* r100 always needs to emit ZBS to avoid TCL lockups */
+   rmesa->hw.zbs.dirty = 1;
+   radeon->hw.is_dirty = 1;
+}
+
+static void r100_vtbl_free_context(GLcontext *ctx)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   _mesa_vector4f_free( &rmesa->tcl.ObjClean );
+}
+
+static void r100_emit_query_finish(radeonContextPtr radeon)
+{
+   BATCH_LOCALS(radeon);
+   struct radeon_query_object *query = radeon->query.current;
+
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZPASS_ADDR, 0));
+   OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+   END_BATCH();
+   query->curr_offset += sizeof(uint32_t);
+   assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+   query->emitted_begin = GL_FALSE;
+}
+
+static void r100_init_vtbl(radeonContextPtr radeon)
+{
+   radeon->vtbl.get_lock = r100_get_lock;
+   radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset;
+   radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header;
+   radeon->vtbl.swtcl_flush = r100_swtcl_flush;
+   radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state;
+   radeon->vtbl.fallback = radeonFallback;
+   radeon->vtbl.free_context = r100_vtbl_free_context;
+   radeon->vtbl.emit_query_finish = r100_emit_query_finish;
+   radeon->vtbl.check_blit = r100_check_blit;
+   radeon->vtbl.blit = r100_blit;
+   radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
+}
+
+/* Create the device specific context.
+ */
+GLboolean
+r100CreateContext( gl_api api,
+		   const __GLcontextModes *glVisual,
+		   __DRIcontext *driContextPriv,
+		   void *sharedContextPrivate)
+{
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private);
+   struct dd_function_table functions;
+   r100ContextPtr rmesa;
+   GLcontext *ctx;
+   int i;
+   int tcl_mode, fthrottle_mode;
+
+   assert(glVisual);
+   assert(driContextPriv);
+   assert(screen);
+
+   /* Allocate the Radeon context */
+   rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) );
+   if ( !rmesa )
+      return GL_FALSE;
+
+   rmesa->radeon.radeonScreen = screen;
+   r100_init_vtbl(&rmesa->radeon);
+
+   /* init exp fog table data */
+   radeonInitStaticFogData();
+   
+   /* Parse configuration files.
+    * Do this here so that initialMaxAnisotropy is set before we create
+    * the default textures.
+    */
+   driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache,
+			screen->driScreen->myNum, "radeon");
+   rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache,
+                                                 "def_max_anisotropy");
+
+   if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) {
+      if ( sPriv->drm_version.minor < 13 )
+	 fprintf( stderr, "DRM version 1.%d too old to support HyperZ, "
+			  "disabling.\n", sPriv->drm_version.minor );
+      else
+	 rmesa->using_hyperz = GL_TRUE;
+   }
+
+   if ( sPriv->drm_version.minor >= 15 )
+      rmesa->texmicrotile = GL_TRUE;
+
+   /* Init default driver functions then plug in our Radeon-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions( &functions );
+   radeonInitTextureFuncs( &rmesa->radeon, &functions );
+   radeonInitQueryObjFunctions(&functions);
+
+   if (!radeonInitContext(&rmesa->radeon, &functions,
+			  glVisual, driContextPriv,
+			  sharedContextPrivate)) {
+     FREE(rmesa);
+     return GL_FALSE;
+   }
+
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
+
+   /* Set the maximum texture size small enough that we can guarentee that
+    * all texture units can bind a maximal texture and have all of them in
+    * texturable memory at once. Depending on the allow_large_textures driconf
+    * setting allow larger textures.
+    */
+
+   ctx = rmesa->radeon.glCtx;
+   ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache,
+						 "texture_units");
+   ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxTextureUnits;
+
+   i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures");
+
+   /* FIXME: When no memory manager is available we should set this 
+    * to some reasonable value based on texture memory pool size */
+   ctx->Const.MaxTextureLevels = 12;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = 2048;
+
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+   /* No wide points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 10.0;
+   ctx->Const.MaxLineWidthAA = 10.0;
+   ctx->Const.LineWidthGranularity = 0.0625;
+
+   /* Set maxlocksize (and hence vb size) small enough to avoid
+    * fallbacks in radeon_tcl.c.  ie. guarentee that all vertices can
+    * fit in a single dma buffer for indexed rendering of quad strips,
+    * etc.
+    */
+   ctx->Const.MaxArrayLockSize = 
+      MIN2( ctx->Const.MaxArrayLockSize, 
+ 	    RADEON_BUFFER_SIZE / RADEON_MAX_TCL_VERTSIZE ); 
+
+   rmesa->boxes = 0;
+
+   ctx->Const.MaxDrawBuffers = 1;
+   ctx->Const.MaxColorAttachments = 1;
+   ctx->Const.MaxRenderbufferSize = 2048;
+
+   _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+   _ae_create_context( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, radeon_pipeline );
+
+   /* Try and keep materials and vertices separate:
+    */
+/*    _tnl_isolate_materials( ctx, GL_TRUE ); */
+
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+
+
+   for ( i = 0 ; i < RADEON_MAX_TEXTURE_UNITS ; i++ ) {
+      _math_matrix_ctr( &rmesa->TexGenMatrix[i] );
+      _math_matrix_ctr( &rmesa->tmpmat[i] );
+      _math_matrix_set_identity( &rmesa->TexGenMatrix[i] );
+      _math_matrix_set_identity( &rmesa->tmpmat[i] );
+   }
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+     driInitExtensions(ctx, mm_extensions, GL_FALSE);
+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
+      _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" );
+   if (rmesa->radeon.glCtx->Mesa_DXTn) {
+      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+      _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+   }
+   else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) {
+      _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm || rmesa->radeon.dri.drmMinor >= 9)
+      _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
+
+   if (!rmesa->radeon.radeonScreen->kernel_mm)
+      _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   radeon_fbo_init(&rmesa->radeon);
+   radeonInitSpanFuncs( ctx );
+   radeonInitIoctlFuncs( ctx );
+   radeonInitStateFuncs( ctx , rmesa->radeon.radeonScreen->kernel_mm );
+   radeonInitState( rmesa );
+   radeonInitSwtcl( ctx );
+
+   _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, 
+			 ctx->Const.MaxArrayLockSize, 32 );
+
+   fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode");
+   rmesa->radeon.iw.irq_seq = -1;
+   rmesa->radeon.irqsEmitted = 0;
+   rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 &&
+			    fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS);
+
+   rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS);
+
+
+#if DO_DEBUG
+   RADEON_DEBUG = driParseDebugString( getenv( "RADEON_DEBUG" ),
+				       debug_control );
+#endif
+
+   tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode");
+   if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1);
+   } else if (tcl_mode == DRI_CONF_TCL_SW ||
+	      !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+	 rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL;
+	 fprintf(stderr, "Disabling HW TCL support\n");
+      }
+      TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
+   }
+
+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+/*       _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */
+   }
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h
new file mode 100644
index 0000000000..c4bfbfdaeb
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_context.h
@@ -0,0 +1,459 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __RADEON_CONTEXT_H__
+#define __RADEON_CONTEXT_H__
+
+#include "tnl/t_vertex.h"
+#include "dri_util.h"
+#include "drm.h"
+#include "radeon_drm.h"
+#include "texmem.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "radeon_screen.h"
+
+#include "radeon_common.h"
+
+
+struct r100_context;
+typedef struct r100_context r100ContextRec;
+typedef struct r100_context *r100ContextPtr;
+
+#include "radeon_lock.h"
+
+
+
+#define R100_TEX_ALL 0x7
+
+/* used for both tcl_vtx and vc_frmt tex bits (they are identical) */
+#define RADEON_ST_BIT(unit) \
+(unit == 0 ? RADEON_CP_VC_FRMT_ST0 : (RADEON_CP_VC_FRMT_ST1 >> 2) << (2 * unit))
+
+#define RADEON_Q_BIT(unit) \
+(unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit))
+
+struct radeon_texture_env_state {
+	radeonTexObjPtr texobj;
+	GLenum format;
+	GLenum envMode;
+};
+
+struct radeon_texture_state {
+	struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS];
+};
+
+/* Trying to keep these relatively short as the variables are becoming
+ * extravagently long.  Drop the driver name prefix off the front of
+ * everything - I think we know which driver we're in by now, and keep the
+ * prefix to 3 letters unless absolutely impossible.  
+ */
+
+#define CTX_CMD_0             0
+#define CTX_PP_MISC           1
+#define CTX_PP_FOG_COLOR      2
+#define CTX_RE_SOLID_COLOR    3
+#define CTX_RB3D_BLENDCNTL    4
+#define CTX_RB3D_DEPTHOFFSET  5
+#define CTX_RB3D_DEPTHPITCH   6
+#define CTX_RB3D_ZSTENCILCNTL 7
+#define CTX_CMD_1             8
+#define CTX_PP_CNTL           9
+#define CTX_RB3D_CNTL         10
+#define CTX_RB3D_COLOROFFSET  11
+#define CTX_CMD_2             12
+#define CTX_RB3D_COLORPITCH   13
+#define CTX_STATE_SIZE        14
+
+#define SET_CMD_0               0
+#define SET_SE_CNTL             1
+#define SET_SE_COORDFMT         2
+#define SET_CMD_1               3
+#define SET_SE_CNTL_STATUS      4
+#define SET_STATE_SIZE          5
+
+#define LIN_CMD_0               0
+#define LIN_RE_LINE_PATTERN     1
+#define LIN_RE_LINE_STATE       2
+#define LIN_CMD_1               3
+#define LIN_SE_LINE_WIDTH       4
+#define LIN_STATE_SIZE          5
+
+#define MSK_CMD_0               0
+#define MSK_RB3D_STENCILREFMASK 1
+#define MSK_RB3D_ROPCNTL        2
+#define MSK_RB3D_PLANEMASK      3
+#define MSK_STATE_SIZE          4
+
+#define VPT_CMD_0           0
+#define VPT_SE_VPORT_XSCALE          1
+#define VPT_SE_VPORT_XOFFSET         2
+#define VPT_SE_VPORT_YSCALE          3
+#define VPT_SE_VPORT_YOFFSET         4
+#define VPT_SE_VPORT_ZSCALE          5
+#define VPT_SE_VPORT_ZOFFSET         6
+#define VPT_STATE_SIZE      7
+
+#define MSC_CMD_0               0
+#define MSC_RE_MISC             1
+#define MSC_STATE_SIZE          2
+
+#define TEX_CMD_0                   0
+#define TEX_PP_TXFILTER             1
+#define TEX_PP_TXFORMAT             2
+#define TEX_PP_TXOFFSET             3
+#define TEX_PP_TXCBLEND             4
+#define TEX_PP_TXABLEND             5
+#define TEX_PP_TFACTOR              6
+#define TEX_CMD_1                   7
+#define TEX_PP_BORDER_COLOR         8
+#define TEX_STATE_SIZE              9
+
+#define TXR_CMD_0                   0	/* rectangle textures */
+#define TXR_PP_TEX_SIZE             1	/* 0x1d04, 0x1d0c for NPOT! */
+#define TXR_PP_TEX_PITCH            2	/* 0x1d08, 0x1d10 for NPOT! */
+#define TXR_STATE_SIZE              3
+
+#define CUBE_CMD_0                  0
+#define CUBE_PP_CUBIC_FACES         1
+#define CUBE_CMD_1                  2
+#define CUBE_PP_CUBIC_OFFSET_0      3
+#define CUBE_PP_CUBIC_OFFSET_1      4
+#define CUBE_PP_CUBIC_OFFSET_2      5
+#define CUBE_PP_CUBIC_OFFSET_3      6
+#define CUBE_PP_CUBIC_OFFSET_4      7
+#define CUBE_STATE_SIZE             8
+
+#define ZBS_CMD_0              0
+#define ZBS_SE_ZBIAS_FACTOR             1
+#define ZBS_SE_ZBIAS_CONSTANT           2
+#define ZBS_STATE_SIZE         3
+
+#define TCL_CMD_0                        0
+#define TCL_OUTPUT_VTXFMT         1
+#define TCL_OUTPUT_VTXSEL         2
+#define TCL_MATRIX_SELECT_0       3
+#define TCL_MATRIX_SELECT_1       4
+#define TCL_UCP_VERT_BLEND_CTL    5
+#define TCL_TEXTURE_PROC_CTL      6
+#define TCL_LIGHT_MODEL_CTL       7
+#define TCL_PER_LIGHT_CTL_0       8
+#define TCL_PER_LIGHT_CTL_1       9
+#define TCL_PER_LIGHT_CTL_2       10
+#define TCL_PER_LIGHT_CTL_3       11
+#define TCL_STATE_SIZE                   12
+
+#define MTL_CMD_0            0
+#define MTL_EMMISSIVE_RED    1
+#define MTL_EMMISSIVE_GREEN  2
+#define MTL_EMMISSIVE_BLUE   3
+#define MTL_EMMISSIVE_ALPHA  4
+#define MTL_AMBIENT_RED      5
+#define MTL_AMBIENT_GREEN    6
+#define MTL_AMBIENT_BLUE     7
+#define MTL_AMBIENT_ALPHA    8
+#define MTL_DIFFUSE_RED      9
+#define MTL_DIFFUSE_GREEN    10
+#define MTL_DIFFUSE_BLUE     11
+#define MTL_DIFFUSE_ALPHA    12
+#define MTL_SPECULAR_RED     13
+#define MTL_SPECULAR_GREEN   14
+#define MTL_SPECULAR_BLUE    15
+#define MTL_SPECULAR_ALPHA   16
+#define MTL_SHININESS        17
+#define MTL_STATE_SIZE       18
+
+#define VTX_CMD_0              0
+#define VTX_SE_COORD_FMT       1
+#define VTX_STATE_SIZE         2
+
+#define MAT_CMD_0              0
+#define MAT_ELT_0              1
+#define MAT_STATE_SIZE         17
+
+#define GRD_CMD_0                  0
+#define GRD_VERT_GUARD_CLIP_ADJ    1
+#define GRD_VERT_GUARD_DISCARD_ADJ 2
+#define GRD_HORZ_GUARD_CLIP_ADJ    3
+#define GRD_HORZ_GUARD_DISCARD_ADJ 4
+#define GRD_STATE_SIZE             5
+
+/* position changes frequently when lighting in modelpos - separate
+ * out to new state item?  
+ */
+#define LIT_CMD_0                  0
+#define LIT_AMBIENT_RED            1
+#define LIT_AMBIENT_GREEN          2
+#define LIT_AMBIENT_BLUE           3
+#define LIT_AMBIENT_ALPHA          4
+#define LIT_DIFFUSE_RED            5
+#define LIT_DIFFUSE_GREEN          6
+#define LIT_DIFFUSE_BLUE           7
+#define LIT_DIFFUSE_ALPHA          8
+#define LIT_SPECULAR_RED           9
+#define LIT_SPECULAR_GREEN         10
+#define LIT_SPECULAR_BLUE          11
+#define LIT_SPECULAR_ALPHA         12
+#define LIT_POSITION_X             13
+#define LIT_POSITION_Y             14
+#define LIT_POSITION_Z             15
+#define LIT_POSITION_W             16
+#define LIT_DIRECTION_X            17
+#define LIT_DIRECTION_Y            18
+#define LIT_DIRECTION_Z            19
+#define LIT_DIRECTION_W            20
+#define LIT_ATTEN_QUADRATIC        21
+#define LIT_ATTEN_LINEAR           22
+#define LIT_ATTEN_CONST            23
+#define LIT_ATTEN_XXX              24
+#define LIT_CMD_1                  25
+#define LIT_SPOT_DCD               26
+#define LIT_SPOT_EXPONENT          27
+#define LIT_SPOT_CUTOFF            28
+#define LIT_SPECULAR_THRESH        29
+#define LIT_RANGE_CUTOFF           30	/* ? */
+#define LIT_ATTEN_CONST_INV        31
+#define LIT_STATE_SIZE             32
+
+/* Fog
+ */
+#define FOG_CMD_0      0
+#define FOG_R          1
+#define FOG_C          2
+#define FOG_D          3
+#define FOG_PAD        4
+#define FOG_STATE_SIZE 5
+
+/* UCP
+ */
+#define UCP_CMD_0      0
+#define UCP_X          1
+#define UCP_Y          2
+#define UCP_Z          3
+#define UCP_W          4
+#define UCP_STATE_SIZE 5
+
+/* GLT - Global ambient
+ */
+#define GLT_CMD_0      0
+#define GLT_RED        1
+#define GLT_GREEN      2
+#define GLT_BLUE       3
+#define GLT_ALPHA      4
+#define GLT_STATE_SIZE 5
+
+/* EYE
+ */
+#define EYE_CMD_0          0
+#define EYE_X              1
+#define EYE_Y              2
+#define EYE_Z              3
+#define EYE_RESCALE_FACTOR 4
+#define EYE_STATE_SIZE     5
+
+#define SHN_CMD_0          0
+#define SHN_SHININESS      1
+#define SHN_STATE_SIZE     2
+
+#define R100_QUERYOBJ_CMD_0  0
+#define R100_QUERYOBJ_DATA_0 1
+#define R100_QUERYOBJ_CMDSIZE  2
+
+#define STP_CMD_0 0
+#define STP_DATA_0 1
+#define STP_CMD_1 2
+#define STP_STATE_SIZE 35
+
+struct r100_hw_state {
+	/* Hardware state, stored as cmdbuf commands:  
+	 *   -- Need to doublebuffer for
+	 *           - eliding noop statechange loops? (except line stipple count)
+	 */
+	struct radeon_state_atom ctx;
+	struct radeon_state_atom set;
+	struct radeon_state_atom lin;
+	struct radeon_state_atom msk;
+	struct radeon_state_atom vpt;
+	struct radeon_state_atom tcl;
+	struct radeon_state_atom msc;
+	struct radeon_state_atom tex[3];
+	struct radeon_state_atom cube[3];
+	struct radeon_state_atom zbs;
+	struct radeon_state_atom mtl;
+	struct radeon_state_atom mat[6];
+	struct radeon_state_atom lit[8];	/* includes vec, scl commands */
+	struct radeon_state_atom ucp[6];
+	struct radeon_state_atom eye;	/* eye pos */
+	struct radeon_state_atom grd;	/* guard band clipping */
+	struct radeon_state_atom fog;
+	struct radeon_state_atom glt;
+	struct radeon_state_atom txr[3];	/* for NPOT */
+	struct radeon_state_atom stp;
+};
+
+struct radeon_stipple_state {
+	GLuint mask[32];
+};
+
+struct r100_state {
+	struct radeon_stipple_state stipple;
+	struct radeon_texture_state texture;
+};
+
+#define RADEON_CMD_BUF_SZ  (8*1024)
+#define R200_ELT_BUF_SZ  (8*1024)
+/* radeon_tcl.c
+ */
+struct r100_tcl_info {
+	GLuint vertex_format;
+	GLuint hw_primitive;
+
+	/* Temporary for cases where incoming vertex data is incompatible
+	 * with maos code.
+	 */
+	GLvector4f ObjClean;
+
+	GLuint *Elts;
+
+        int elt_cmd_offset;
+	int elt_cmd_start;
+        int elt_used;
+};
+
+/* radeon_swtcl.c
+ */
+struct r100_swtcl_info {
+	GLuint vertex_format;
+
+	GLubyte *verts;
+
+	/* Fallback rasterization functions
+	 */
+	radeon_point_func draw_point;
+	radeon_line_func draw_line;
+	radeon_tri_func draw_tri;
+
+   /**
+    * Offset of the 4UB color data within a hardware (swtcl) vertex.
+    */
+	GLuint coloroffset;
+
+   /**
+    * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+    */
+	GLuint specoffset;
+
+	GLboolean needproj;
+};
+
+
+
+/* A maximum total of 20 elements per vertex:  3 floats for position, 3
+ * floats for normal, 4 floats for color, 4 bytes for secondary color,
+ * 3 floats for each texture unit (9 floats total).
+ * 
+ * The position data is never actually stored here, so 3 elements could be
+ * trimmed out of the buffer. This number is only valid for vtxfmt!
+ */
+#define RADEON_MAX_VERTEX_SIZE 20
+
+struct r100_context {
+        struct radeon_context radeon;
+
+	/* Driver and hardware state management
+	 */
+	struct r100_hw_state hw;
+	struct r100_state state;
+
+	/* Vertex buffers
+	 */
+	struct radeon_ioctl ioctl;
+	struct radeon_store store;
+
+	/* TCL stuff
+	 */
+	GLmatrix TexGenMatrix[RADEON_MAX_TEXTURE_UNITS];
+	GLboolean recheck_texgen[RADEON_MAX_TEXTURE_UNITS];
+	GLboolean TexGenNeedNormals[RADEON_MAX_TEXTURE_UNITS];
+	GLuint TexGenEnabled;
+	GLuint NeedTexMatrix;
+	GLuint TexMatColSwap;
+	GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS];
+	GLuint last_ReallyEnabled;
+
+	/* radeon_tcl.c
+	 */
+	struct r100_tcl_info tcl;
+
+	/* radeon_swtcl.c
+	 */
+	struct r100_swtcl_info swtcl;
+
+	GLboolean using_hyperz;
+	GLboolean texmicrotile;
+
+	/* Performance counters
+	 */
+	GLuint boxes;		/* Draw performance boxes */
+	GLuint hardwareWentIdle;
+	GLuint c_clears;
+	GLuint c_drawWaits;
+	GLuint c_textureSwaps;
+	GLuint c_textureBytes;
+	GLuint c_vertexBuffers;
+
+};
+
+
+#define R100_CONTEXT(ctx)		((r100ContextPtr)(ctx->DriverCtx))
+
+
+#define RADEON_OLD_PACKETS 1
+
+extern GLboolean r100CreateContext( gl_api api,
+				    const __GLcontextModes *glVisual,
+				    __DRIcontext *driContextPriv,
+				    void *sharedContextPrivate);
+
+
+#endif				/* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs.c b/src/mesa/drivers/dri/radeon/radeon_cs.c
new file mode 100644
index 0000000000..17e7433369
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs.c
@@ -0,0 +1,95 @@
+
+#include <stdio.h>
+#include <stdint.h>
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_cs_int_drm.h"
+
+struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+			    uint32_t ndw)
+{
+    struct radeon_cs_int *csi = csm->funcs->cs_create(csm, ndw);
+    return (struct radeon_cs *)csi;
+}
+
+int radeon_cs_write_reloc(struct radeon_cs *cs,
+			  struct radeon_bo *bo,
+			  uint32_t read_domain,
+			  uint32_t write_domain,
+			  uint32_t flags)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+
+    return csi->csm->funcs->cs_write_reloc(csi,
+					   bo,
+					   read_domain,
+					   write_domain,
+					   flags);
+}
+
+int radeon_cs_begin(struct radeon_cs *cs,
+		    uint32_t ndw,
+		    const char *file,
+		    const char *func,
+		    int line)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_begin(csi, ndw, file, func, line);
+}
+
+int radeon_cs_end(struct radeon_cs *cs,
+		  const char *file,
+		  const char *func,
+		  int line)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_end(csi, file, func, line);
+}
+
+int radeon_cs_emit(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_emit(csi);
+}
+
+int radeon_cs_destroy(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_destroy(csi);
+}
+
+int radeon_cs_erase(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_erase(csi);
+}
+
+int radeon_cs_need_flush(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->csm->funcs->cs_need_flush(csi);
+}
+
+void radeon_cs_print(struct radeon_cs *cs, FILE *file)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    csi->csm->funcs->cs_print(csi, file);
+}
+
+void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    if (domain == RADEON_GEM_DOMAIN_VRAM)
+	csi->csm->vram_limit = limit;
+    else
+	csi->csm->gart_limit = limit;
+}
+
+void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    csi->space_flush_fn = fn;
+    csi->space_flush_data = data;
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
new file mode 100644
index 0000000000..a3f1750c6e
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h
@@ -0,0 +1,141 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_CS_H
+#define RADEON_CS_H
+
+#include <stdint.h>
+#include <string.h>
+#include "drm.h"
+#include "radeon_drm.h"
+#include "radeon_bo_drm.h"
+
+struct radeon_cs_reloc {
+    struct radeon_bo    *bo;
+    uint32_t            read_domain;
+    uint32_t            write_domain;
+    uint32_t            flags;
+};
+
+
+#define RADEON_CS_SPACE_OK 0
+#define RADEON_CS_SPACE_OP_TO_BIG 1
+#define RADEON_CS_SPACE_FLUSH 2
+
+struct radeon_cs {
+    uint32_t *packets;
+    unsigned cdw;
+    unsigned ndw;
+    unsigned                    section_ndw;
+    unsigned                    section_cdw;
+};
+
+#define MAX_SPACE_BOS (32)
+
+struct radeon_cs_manager;
+
+extern struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm,
+					  uint32_t ndw);
+
+extern int radeon_cs_begin(struct radeon_cs *cs,
+			   uint32_t ndw,
+			   const char *file,
+			   const char *func, int line);
+extern int radeon_cs_end(struct radeon_cs *cs,
+			 const char *file,
+			 const char *func,
+			 int line);
+extern int radeon_cs_emit(struct radeon_cs *cs);
+extern int radeon_cs_destroy(struct radeon_cs *cs);
+extern int radeon_cs_erase(struct radeon_cs *cs);
+extern int radeon_cs_need_flush(struct radeon_cs *cs);
+extern void radeon_cs_print(struct radeon_cs *cs, FILE *file);
+extern void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit);
+extern void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *data);
+extern int radeon_cs_write_reloc(struct radeon_cs *cs,
+				 struct radeon_bo *bo,
+				 uint32_t read_domain,
+				 uint32_t write_domain,
+				 uint32_t flags);
+
+/*
+ * add a persistent BO to the list
+ * a persistent BO is one that will be referenced across flushes,
+ * i.e. colorbuffer, textures etc.
+ * They get reset when a new "operation" happens, where an operation
+ * is a state emission with a color/textures etc followed by a bunch of vertices.
+ */
+void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs,
+				       struct radeon_bo *bo,
+				       uint32_t read_domains,
+				       uint32_t write_domain);
+
+/* reset the persistent BO list */
+void radeon_cs_space_reset_bos(struct radeon_cs *cs);
+
+/* do a space check with the current persistent BO list */
+int radeon_cs_space_check(struct radeon_cs *cs);
+
+/* do a space check with the current persistent BO list and a temporary BO
+ * a temporary BO is like a DMA buffer, which  gets flushed with the
+ * command buffer */
+int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
+				  struct radeon_bo *bo,
+				  uint32_t read_domains,
+				  uint32_t write_domain);
+
+static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword)
+{
+    cs->packets[cs->cdw++] = dword;
+    if (cs->section_ndw) {
+        cs->section_cdw++;
+    }
+}
+
+static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword)
+{
+    memcpy(cs->packets + cs->cdw, &qword, sizeof(uint64_t));
+    cs->cdw += 2;
+    if (cs->section_ndw) {
+        cs->section_cdw += 2;
+    }
+}
+
+static inline void radeon_cs_write_table(struct radeon_cs *cs,
+					 void *data, uint32_t size)
+{
+    memcpy(cs->packets + cs->cdw, data, size * 4);
+    cs->cdw += size;
+    if (cs->section_ndw) {
+	cs->section_cdw += size;
+    }
+}
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_int_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_int_drm.h
new file mode 100644
index 0000000000..8ba76bf951
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_int_drm.h
@@ -0,0 +1,66 @@
+
+#ifndef _RADEON_CS_INT_H_
+#define _RADEON_CS_INT_H_
+
+struct radeon_cs_space_check {
+    struct radeon_bo_int *bo;
+    uint32_t read_domains;
+    uint32_t write_domain;
+    uint32_t new_accounted;
+};
+
+struct radeon_cs_int {
+    /* keep first two in same place */
+    uint32_t                    *packets;    
+    unsigned                    cdw;
+    unsigned                    ndw;
+    unsigned                    section_ndw;
+    unsigned                    section_cdw;
+    /* private members */
+    struct radeon_cs_manager    *csm;
+    void                        *relocs;
+    unsigned                    crelocs;
+    unsigned                    relocs_total_size;
+    const char                  *section_file;
+    const char                  *section_func;
+    int                         section_line;
+    struct radeon_cs_space_check bos[MAX_SPACE_BOS];
+    int                         bo_count;
+    void                        (*space_flush_fn)(void *);
+    void                        *space_flush_data;
+};
+
+/* cs functions */
+struct radeon_cs_funcs {
+    struct radeon_cs_int *(*cs_create)(struct radeon_cs_manager *csm,
+                                   uint32_t ndw);
+    int (*cs_write_reloc)(struct radeon_cs_int *cs,
+                          struct radeon_bo *bo,
+                          uint32_t read_domain,
+                          uint32_t write_domain,
+                          uint32_t flags);
+    int (*cs_begin)(struct radeon_cs_int *cs,
+                    uint32_t ndw,
+		    const char *file,
+		    const char *func,
+		    int line);
+    int (*cs_end)(struct radeon_cs_int *cs,
+		  const char *file, const char *func,
+		  int line);
+
+
+    int (*cs_emit)(struct radeon_cs_int *cs);
+    int (*cs_destroy)(struct radeon_cs_int *cs);
+    int (*cs_erase)(struct radeon_cs_int *cs);
+    int (*cs_need_flush)(struct radeon_cs_int *cs);
+    void (*cs_print)(struct radeon_cs_int *cs, FILE *file);
+};
+
+struct radeon_cs_manager {
+    struct radeon_cs_funcs  *funcs;
+    int                     fd;
+    int32_t vram_limit, gart_limit;
+    int32_t vram_write_used, gart_write_used;
+    int32_t read_used;
+};
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
new file mode 100644
index 0000000000..c2722a4e19
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c
@@ -0,0 +1,416 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#include <errno.h>
+#include <unistd.h>
+#include <stdint.h>
+#include "drm.h"
+#include "radeon_drm.h"
+
+#include "radeon_bocs_wrapper.h"
+#include "radeon_common.h"
+#ifdef HAVE_LIBDRM_RADEON
+#include "radeon_cs_int.h"
+#else
+#include "radeon_cs_int_drm.h"
+#endif
+struct cs_manager_legacy {
+    struct radeon_cs_manager    base;
+    struct radeon_context       *ctx;
+    /* hack for scratch stuff */
+    uint32_t                    pending_age;
+    uint32_t                    pending_count;
+
+
+};
+
+struct cs_reloc_legacy {
+    struct radeon_cs_reloc  base;
+    uint32_t                cindices;
+    uint32_t                *indices;
+};
+
+
+static struct radeon_cs_int *cs_create(struct radeon_cs_manager *csm,
+				       uint32_t ndw)
+{
+    struct radeon_cs_int *csi;
+
+    csi = (struct radeon_cs_int*)calloc(1, sizeof(struct radeon_cs_int));
+    if (csi == NULL) {
+        return NULL;
+    }
+    csi->csm = csm;
+    csi->ndw = (ndw + 0x3FF) & (~0x3FF);
+    csi->packets = (uint32_t*)malloc(4*csi->ndw);
+    if (csi->packets == NULL) {
+        free(csi);
+        return NULL;
+    }
+    csi->relocs_total_size = 0;
+    return csi;
+}
+
+static int cs_write_reloc(struct radeon_cs_int *cs,
+                          struct radeon_bo *bo,
+                          uint32_t read_domain,
+                          uint32_t write_domain,
+                          uint32_t flags)
+{
+    struct cs_reloc_legacy *relocs;
+    int i;
+
+    relocs = (struct cs_reloc_legacy *)cs->relocs;
+    /* check domains */
+    if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
+        /* in one CS a bo can only be in read or write domain but not
+         * in read & write domain at the same sime
+         */
+        return -EINVAL;
+    }
+    if (read_domain == RADEON_GEM_DOMAIN_CPU) {
+        return -EINVAL;
+    }
+    if (write_domain == RADEON_GEM_DOMAIN_CPU) {
+        return -EINVAL;
+    }
+    /* check if bo is already referenced */
+    for(i = 0; i < cs->crelocs; i++) {
+        uint32_t *indices;
+
+        if (relocs[i].base.bo->handle == bo->handle) {
+            /* Check domains must be in read or write. As we check already
+             * checked that in argument one of the read or write domain was
+             * set we only need to check that if previous reloc as the read
+             * domain set then the read_domain should also be set for this
+             * new relocation.
+             */
+            if (relocs[i].base.read_domain && !read_domain) {
+                return -EINVAL;
+            }
+            if (relocs[i].base.write_domain && !write_domain) {
+                return -EINVAL;
+            }
+            relocs[i].base.read_domain |= read_domain;
+            relocs[i].base.write_domain |= write_domain;
+            /* save indice */
+            relocs[i].cindices++;
+            indices = (uint32_t*)realloc(relocs[i].indices,
+                                         relocs[i].cindices * 4);
+            if (indices == NULL) {
+                relocs[i].cindices -= 1;
+                return -ENOMEM;
+            }
+            relocs[i].indices = indices;
+            relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
+            return 0;
+        }
+    }
+    /* add bo to reloc */
+    relocs = (struct cs_reloc_legacy*)
+             realloc(cs->relocs,
+                     sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1));
+    if (relocs == NULL) {
+        return -ENOMEM;
+    }
+    cs->relocs = relocs;
+    relocs[cs->crelocs].base.bo = bo;
+    relocs[cs->crelocs].base.read_domain = read_domain;
+    relocs[cs->crelocs].base.write_domain = write_domain;
+    relocs[cs->crelocs].base.flags = flags;
+    relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
+    if (relocs[cs->crelocs].indices == NULL) {
+        return -ENOMEM;
+    }
+    relocs[cs->crelocs].indices[0] = cs->cdw - 1;
+    relocs[cs->crelocs].cindices = 1;
+    cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
+    cs->crelocs++;
+    radeon_bo_ref(bo);
+    return 0;
+}
+
+static int cs_begin(struct radeon_cs_int *cs,
+                    uint32_t ndw,
+                    const char *file,
+                    const char *func,
+                    int line)
+{
+    if (cs->section_ndw) {
+        fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
+                cs->section_file, cs->section_func, cs->section_line);
+        fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+    cs->section_ndw = ndw;
+    cs->section_cdw = 0;
+    cs->section_file = file;
+    cs->section_func = func;
+    cs->section_line = line;
+
+
+    if (cs->cdw + ndw > cs->ndw) {
+        uint32_t tmp, *ptr;
+
+        tmp = (cs->cdw + ndw + 0x3ff) & (~0x3ff);
+        ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
+        if (ptr == NULL) {
+            return -ENOMEM;
+        }
+        cs->packets = ptr;
+        cs->ndw = tmp;
+    }
+
+    return 0;
+}
+
+static int cs_end(struct radeon_cs_int *cs,
+                  const char *file,
+                  const char *func,
+                  int line)
+
+{
+    if (!cs->section_ndw) {
+        fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+    if (cs->section_ndw != cs->section_cdw) {
+        fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
+                cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
+        fprintf(stderr, "CS section end at (%s,%s,%d)\n",
+                file, func, line);
+        return -EPIPE;
+    }
+    cs->section_ndw = 0;
+
+    return 0;
+}
+
+static int cs_process_relocs(struct radeon_cs_int *cs)
+{
+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+    struct cs_reloc_legacy *relocs;
+    int i, j, r;
+
+    csm = (struct cs_manager_legacy*)cs->csm;
+    relocs = (struct cs_reloc_legacy *)cs->relocs;
+restart:
+    for (i = 0; i < cs->crelocs; i++) 
+    {
+        for (j = 0; j < relocs[i].cindices; j++) 
+        {
+            uint32_t soffset, eoffset;
+
+            r = radeon_bo_legacy_validate(relocs[i].base.bo,
+                                           &soffset, &eoffset);
+	        if (r == -EAGAIN)
+            {
+	             goto restart;
+            }
+            if (r) 
+            {
+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+                        relocs[i].base.bo, soffset, eoffset);
+                return r;
+            }
+            cs->packets[relocs[i].indices[j]] += soffset;
+            if (cs->packets[relocs[i].indices[j]] >= eoffset) 
+            {
+	      /*                radeon_bo_debug(relocs[i].base.bo, 12); */
+                fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
+                        relocs[i].base.bo, soffset, eoffset);
+                fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
+                        relocs[i].base.bo,
+                        cs->packets[relocs[i].indices[j]],
+                        eoffset);
+                exit(0);
+                return -EINVAL;
+            }
+        }
+    }
+    return 0;
+}
+
+static int cs_set_age(struct radeon_cs_int *cs)
+{
+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+    struct cs_reloc_legacy *relocs;
+    int i;
+
+    relocs = (struct cs_reloc_legacy *)cs->relocs;
+    for (i = 0; i < cs->crelocs; i++) {
+        radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
+        radeon_bo_unref(relocs[i].base.bo);
+    }
+    return 0;
+}
+
+static int cs_emit(struct radeon_cs_int *cs)
+{
+    struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm;
+    drm_radeon_cmd_buffer_t cmd;
+    drm_r300_cmd_header_t age;
+    uint64_t ull;
+    int r;
+
+    csm->ctx->vtbl.emit_cs_header((struct radeon_cs *)cs, csm->ctx);
+
+    /* append buffer age */
+    if ( IS_R300_CLASS(csm->ctx->radeonScreen) )
+    { 
+      age.scratch.cmd_type = R300_CMD_SCRATCH;
+      /* Scratch register 2 corresponds to what radeonGetAge polls */
+      csm->pending_age = 0;
+      csm->pending_count = 1;
+      ull = (uint64_t) (intptr_t) &csm->pending_age;
+      age.scratch.reg = 2;
+      age.scratch.n_bufs = 1;
+      age.scratch.flags = 0;
+      radeon_cs_write_dword((struct radeon_cs *)cs, age.u);
+      radeon_cs_write_qword((struct radeon_cs *)cs, ull);
+      radeon_cs_write_dword((struct radeon_cs *)cs, 0);
+    }
+
+    r = cs_process_relocs(cs);
+    if (r) {
+        return 0;
+    }
+
+    cmd.buf = (char *)cs->packets;
+    cmd.bufsz = cs->cdw * 4;
+    if (csm->ctx->state.scissor.enabled) {
+        cmd.nbox = csm->ctx->state.scissor.numClipRects;
+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects;
+    } else {
+        cmd.nbox = csm->ctx->numClipRects;
+        cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects;
+    }
+
+    //dump_cmdbuf(cs);
+
+    r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd));
+    if (r) {
+        return r;
+    }
+    if ((!IS_R300_CLASS(csm->ctx->radeonScreen)) &&
+        (!IS_R600_CLASS(csm->ctx->radeonScreen))) { /* +r6/r7 : No irq for r6/r7 yet. */
+	drm_radeon_irq_emit_t emit_cmd;
+	emit_cmd.irq_seq = (int*)&csm->pending_age;
+	r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd));
+	if (r) {
+		return r;
+	}
+    }
+    cs_set_age(cs);
+
+    cs->csm->read_used = 0;
+    cs->csm->vram_write_used = 0;
+    cs->csm->gart_write_used = 0;
+    return 0;
+}
+
+static void inline cs_free_reloc(void *relocs_p, int crelocs)
+{
+    struct cs_reloc_legacy *relocs = relocs_p;
+    int i;
+    if (!relocs_p)
+      return;
+    for (i = 0; i < crelocs; i++)
+      free(relocs[i].indices);
+}
+
+static int cs_destroy(struct radeon_cs_int *cs)
+{
+    cs_free_reloc(cs->relocs, cs->crelocs);
+    free(cs->relocs);
+    free(cs->packets);
+    free(cs);
+    return 0;
+}
+
+static int cs_erase(struct radeon_cs_int *cs)
+{
+    cs_free_reloc(cs->relocs, cs->crelocs);
+    free(cs->relocs);
+    cs->relocs_total_size = 0;
+    cs->relocs = NULL;
+    cs->crelocs = 0;
+    cs->cdw = 0;
+    cs->section_ndw = 0;
+    return 0;
+}
+
+static int cs_need_flush(struct radeon_cs_int *cs)
+{
+    /* this function used to flush when the BO usage got to
+     * a certain size, now the higher levels handle this better */
+    return 0;
+}
+
+static void cs_print(struct radeon_cs_int *cs, FILE *file)
+{
+}
+
+static struct radeon_cs_funcs  radeon_cs_legacy_funcs = {
+    cs_create,
+    cs_write_reloc,
+    cs_begin,
+    cs_end,
+    cs_emit,
+    cs_destroy,
+    cs_erase,
+    cs_need_flush,
+    cs_print,
+};
+
+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
+{
+    struct cs_manager_legacy *csm;
+
+    csm = (struct cs_manager_legacy*)
+          calloc(1, sizeof(struct cs_manager_legacy));
+    if (csm == NULL) {
+        return NULL;
+    }
+    csm->base.funcs = &radeon_cs_legacy_funcs;
+    csm->base.fd = ctx->dri.fd;
+    csm->ctx = ctx;
+    csm->pending_age = 1;
+    return (struct radeon_cs_manager*)csm;
+}
+
+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm)
+{
+    free(csm);
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
new file mode 100644
index 0000000000..cafbc9e576
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h
@@ -0,0 +1,40 @@
+/* 
+ * Copyright © 2008 Nicolai Haehnle
+ * Copyright © 2008 Jérôme Glisse
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Aapo Tahkola <aet@rasterburn.org>
+ *      Nicolai Haehnle <prefect_@gmx.net>
+ *      Jérôme Glisse <glisse@freedesktop.org>
+ */
+#ifndef RADEON_CS_LEGACY_H
+#define RADEON_CS_LEGACY_H
+
+struct radeon_context;
+
+struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx);
+void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c
new file mode 100644
index 0000000000..e22b437d56
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_cs_space_drm.c
@@ -0,0 +1,244 @@
+/* 
+ * Copyright © 2009 Red Hat Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include "radeon_bocs_wrapper.h"
+#include "radeon_bo_int_drm.h"
+#include "radeon_cs_int_drm.h"
+
+struct rad_sizes {
+    int32_t op_read;
+    int32_t op_gart_write;
+    int32_t op_vram_write;
+};
+
+static inline int radeon_cs_setup_bo(struct radeon_cs_space_check *sc, struct rad_sizes *sizes)
+{
+    uint32_t read_domains, write_domain;
+    struct radeon_bo_int *bo;
+
+    bo = sc->bo;
+    sc->new_accounted = 0;
+    read_domains = sc->read_domains;
+    write_domain = sc->write_domain;
+
+    /* legacy needs a static check */
+    if (radeon_bo_is_static((struct radeon_bo *)sc->bo)) {
+	bo->space_accounted = sc->new_accounted = (read_domains << 16) | write_domain;
+	return 0;
+    }
+
+    /* already accounted this bo */
+    if (write_domain && (write_domain == bo->space_accounted)) {
+	sc->new_accounted = bo->space_accounted;
+	return 0;
+    }
+    if (read_domains && ((read_domains << 16) == bo->space_accounted)) {
+	sc->new_accounted = bo->space_accounted;
+	return 0;
+    }
+
+    if (bo->space_accounted == 0) {
+	if (write_domain == RADEON_GEM_DOMAIN_VRAM)
+	    sizes->op_vram_write += bo->size;
+	else if (write_domain == RADEON_GEM_DOMAIN_GTT)
+	  sizes->op_gart_write += bo->size;
+	else
+	    sizes->op_read += bo->size;
+	sc->new_accounted = (read_domains << 16) | write_domain;
+    } else {
+	uint16_t old_read, old_write;
+	
+	old_read = bo->space_accounted >> 16;
+	old_write = bo->space_accounted & 0xffff;
+	
+	if (write_domain && (old_read & write_domain)) {
+	    sc->new_accounted = write_domain;
+	    /* moving from read to a write domain */
+	    if (write_domain == RADEON_GEM_DOMAIN_VRAM) {
+		sizes->op_read -= bo->size;
+		sizes->op_vram_write += bo->size;
+	    } else if (write_domain == RADEON_GEM_DOMAIN_GTT) {
+		sizes->op_read -= bo->size;
+		sizes->op_gart_write += bo->size;
+	    }
+	} else if (read_domains & old_write) {
+	    sc->new_accounted = bo->space_accounted & 0xffff;
+	} else {
+	    /* rewrite the domains */
+	    if (write_domain != old_write)
+		fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write);
+	    if (read_domains != old_read)
+		fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read);
+	    return RADEON_CS_SPACE_FLUSH;
+	}
+    }
+    return 0;
+}
+
+static int radeon_cs_do_space_check(struct radeon_cs_int *cs, struct radeon_cs_space_check *new_tmp)
+{
+    struct radeon_cs_manager *csm = cs->csm;
+    int i;
+    struct radeon_bo_int *bo;
+    struct rad_sizes sizes;
+    int ret;
+
+    /* check the totals for this operation */
+
+    if (cs->bo_count == 0 && !new_tmp)
+	return 0;
+
+    memset(&sizes, 0, sizeof(struct rad_sizes));
+
+    /* prepare */
+    for (i = 0; i < cs->bo_count; i++) {
+	ret = radeon_cs_setup_bo(&cs->bos[i], &sizes);
+	if (ret)
+	    return ret;
+    }
+
+    if (new_tmp) {
+	ret = radeon_cs_setup_bo(new_tmp, &sizes);
+	if (ret)
+	    return ret;
+    }
+	
+    if (sizes.op_read < 0)
+	    sizes.op_read = 0;
+
+    /* check sizes - operation first */
+    if ((sizes.op_read + sizes.op_gart_write > csm->gart_limit) ||
+	(sizes.op_vram_write > csm->vram_limit)) {
+	    return RADEON_CS_SPACE_OP_TO_BIG;
+    }
+    
+    if (((csm->vram_write_used + sizes.op_vram_write) > csm->vram_limit) ||
+	((csm->read_used + csm->gart_write_used + sizes.op_gart_write + sizes.op_read) > csm->gart_limit)) {
+	    return RADEON_CS_SPACE_FLUSH;
+    }
+    
+    csm->gart_write_used += sizes.op_gart_write;
+    csm->vram_write_used += sizes.op_vram_write;
+    csm->read_used += sizes.op_read;
+    /* commit */
+    for (i = 0; i < cs->bo_count; i++) {
+	    bo = cs->bos[i].bo;
+	    bo->space_accounted = cs->bos[i].new_accounted;
+    }
+    if (new_tmp)
+	new_tmp->bo->space_accounted = new_tmp->new_accounted;
+    
+    return RADEON_CS_SPACE_OK;
+}
+
+void radeon_cs_space_add_persistent_bo(struct radeon_cs *cs, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    int i;
+    for (i = 0; i < csi->bo_count; i++) {
+	if (csi->bos[i].bo == boi &&
+	    csi->bos[i].read_domains == read_domains &&
+	    csi->bos[i].write_domain == write_domain)
+	    return;
+    }
+    radeon_bo_ref(bo);
+    i = csi->bo_count;
+    csi->bos[i].bo = boi;
+    csi->bos[i].read_domains = read_domains;
+    csi->bos[i].write_domain = write_domain;
+    csi->bos[i].new_accounted = 0;
+    csi->bo_count++;
+
+    assert(csi->bo_count < MAX_SPACE_BOS);
+}
+
+static int radeon_cs_check_space_internal(struct radeon_cs_int *cs,
+					  struct radeon_cs_space_check *tmp_bo)
+{
+    int ret;
+    int flushed = 0;
+
+again:
+    ret = radeon_cs_do_space_check(cs, tmp_bo);
+    if (ret == RADEON_CS_SPACE_OP_TO_BIG)
+	return -1;
+    if (ret == RADEON_CS_SPACE_FLUSH) {
+	(*cs->space_flush_fn)(cs->space_flush_data);
+	if (flushed)
+	    return -1;
+	flushed = 1;
+	goto again;
+    }
+    return 0;
+}
+
+int radeon_cs_space_check_with_bo(struct radeon_cs *cs,
+				  struct radeon_bo *bo,
+				  uint32_t read_domains, uint32_t write_domain)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    struct radeon_bo_int *boi = (struct radeon_bo_int *)bo;
+    struct radeon_cs_space_check temp_bo;
+    
+    int ret = 0;
+
+    if (bo) {
+	temp_bo.bo = boi;
+	temp_bo.read_domains = read_domains;
+	temp_bo.write_domain = write_domain;
+	temp_bo.new_accounted = 0;
+    }
+
+    ret = radeon_cs_check_space_internal(csi, bo ? &temp_bo : NULL);
+    return ret;
+}
+
+int radeon_cs_space_check(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return radeon_cs_check_space_internal(csi, NULL);
+}
+
+void radeon_cs_space_reset_bos(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    int i;
+    for (i = 0; i < csi->bo_count; i++) {
+	radeon_bo_unref((struct radeon_bo *)csi->bos[i].bo);
+	csi->bos[i].bo = NULL;
+	csi->bos[i].read_domains = 0;
+	csi->bos[i].write_domain = 0;
+	csi->bos[i].new_accounted = 0;
+    }
+    csi->bo_count = 0;
+}
+
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.c b/src/mesa/drivers/dri/radeon/radeon_debug.c
new file mode 100644
index 0000000000..413000b6c0
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_debug.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Pauli Nieminen <suokkos@gmail.com>
+ */
+
+#include "utils.h"
+
+#include "radeon_debug.h"
+#include "radeon_common_context.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+
+static const struct dri_debug_control debug_control[] = {
+	{"fall", RADEON_FALLBACKS},
+	{"tex", RADEON_TEXTURE},
+	{"ioctl", RADEON_IOCTL},
+	{"verts", RADEON_VERTS},
+	{"render", RADEON_RENDER},
+	{"swrender", RADEON_SWRENDER},
+	{"state", RADEON_STATE},
+	{"shader", RADEON_SHADER},
+	{"vfmt", RADEON_VFMT},
+	{"vtxf", RADEON_VFMT},
+	{"dri", RADEON_DRI},
+	{"dma", RADEON_DMA},
+	{"sanity", RADEON_SANITY},
+	{"sync", RADEON_SYNC},
+	{"pixel", RADEON_PIXEL},
+	{"mem", RADEON_MEMORY},
+	{"cs", RADEON_CS},
+	{"allmsg", ~RADEON_SYNC}, /* avoid the term "sync" because the parser uses strstr */
+	{NULL, 0}
+};
+
+radeon_debug_type_t radeon_enabled_debug_types;
+
+void radeon_init_debug(void)
+{
+	radeon_enabled_debug_types = driParseDebugString(getenv("RADEON_DEBUG"), debug_control);
+
+	radeon_enabled_debug_types |= RADEON_GENERAL;
+}
+
+void _radeon_debug_add_indent(void)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	const size_t length = sizeof(radeon->debug.indent)
+		/ sizeof(radeon->debug.indent[0]);
+	if (radeon->debug.indent_depth < length - 1) {
+		radeon->debug.indent[radeon->debug.indent_depth] = '\t';
+		++radeon->debug.indent_depth;
+	};
+}
+
+void _radeon_debug_remove_indent(void)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	if (radeon->debug.indent_depth > 0) {
+		radeon->debug.indent[radeon->debug.indent_depth] = '\0';
+		--radeon->debug.indent_depth;
+	}
+}
+
+void _radeon_print(const radeon_debug_type_t type,
+	   const radeon_debug_level_t level,
+	   const char* message,
+	   ...)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	if (ctx) {
+		radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+		// FIXME: Make this multi thread safe
+		if (radeon->debug.indent_depth)
+			fprintf(stderr, "%s", radeon->debug.indent);
+	}
+	va_list values;
+	va_start( values, message );
+	vfprintf(stderr, message, values);
+	va_end( values );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_debug.h b/src/mesa/drivers/dri/radeon/radeon_debug.h
new file mode 100644
index 0000000000..ef8b9671ac
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_debug.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright © 2009 Pauli Nieminen
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Pauli Nieminen <suokkos@gmail.com>
+ */
+
+#ifndef RADEON_DEBUG_H_INCLUDED
+#define RADEON_DEBUG_H_INCLUDED
+
+#include <stdlib.h>
+
+typedef enum radeon_debug_levels {
+	RADEON_CRITICAL  = 0, /* Only errors */
+	RADEON_IMPORTANT = 1, /* Important warnings and messages */
+	RADEON_NORMAL    = 2, /* Normal log messages usefull for debugging */
+	RADEON_VERBOSE   = 3, /* Extra details to debugging */
+	RADEON_TRACE     = 4  /* Log about everything that happens */
+} radeon_debug_level_t;
+
+/**
+ * Compile time option to change level of debugging compiled to dri driver.
+ * Selecting critical level is not recommended because perfromance gains are
+ * going to minimal but you will lose a lot of important warnings in case of
+ * errors.
+ */
+#ifndef RADEON_DEBUG_LEVEL
+# ifdef DEBUG
+#  define RADEON_DEBUG_LEVEL RADEON_TRACE
+# else
+#  define RADEON_DEBUG_LEVEL RADEON_VERBOSE
+# endif
+#endif
+
+typedef enum radeon_debug_types {
+	RADEON_TEXTURE   = 0x00001,
+	RADEON_STATE     = 0x00002,
+	RADEON_IOCTL     = 0x00004,
+	RADEON_RENDER    = 0x00008,
+	RADEON_SWRENDER  = 0x00010,
+	RADEON_FALLBACKS = 0x00020,
+	RADEON_VFMT      = 0x00040,
+	RADEON_SHADER    = 0x00080,
+	RADEON_CS        = 0x00100,
+	RADEON_DRI       = 0x00200,
+	RADEON_DMA       = 0x00400,
+	RADEON_SANITY    = 0x00800,
+	RADEON_SYNC      = 0x01000,
+	RADEON_PIXEL     = 0x02000,
+	RADEON_MEMORY    = 0x04000,
+	RADEON_VERTS     = 0x08000,
+	RADEON_GENERAL   = 0x10000   /* Used for errors and warnings */
+} radeon_debug_type_t;
+
+#define RADEON_MAX_INDENT 5
+
+struct radeon_debug {
+       size_t indent_depth;
+       char indent[RADEON_MAX_INDENT];
+};
+
+extern radeon_debug_type_t radeon_enabled_debug_types;
+
+/**
+ * Compabibility layer for old debug code
+ **/
+#define RADEON_DEBUG radeon_enabled_debug_types
+
+static inline int radeon_is_debug_enabled(const radeon_debug_type_t type,
+	   const radeon_debug_level_t level)
+{
+       return RADEON_DEBUG_LEVEL >= level
+		&& (type & radeon_enabled_debug_types);
+}
+/*
+ * define macro for gcc specific __attribute__ if using alternative compiler
+ */
+#ifndef __GNUC__
+#define  __attribute__(x)  /*empty*/
+#endif
+
+
+extern void _radeon_print(const radeon_debug_type_t type,
+	   const radeon_debug_level_t level,
+	   const char* message,
+	   ...)  __attribute__((format(printf,3,4)));
+/**
+ * Print out debug message if channel specified by type is enabled
+ * and compile time debugging level is at least as high as level parameter
+ */
+#define radeon_print(type, level, message, ...) do {		\
+	const radeon_debug_level_t _debug_level = (level);	\
+	const radeon_debug_type_t _debug_type = (type);		\
+	/* Compile out if level of message is too high */	\
+	if (radeon_is_debug_enabled(type, level)) {		\
+		_radeon_print(_debug_type, _debug_level,	\
+			(message), ## __VA_ARGS__);		\
+	}							\
+} while(0)
+
+/**
+ * printf style function for writing error messages.
+ */
+#define radeon_error(message, ...) do {				\
+	radeon_print(RADEON_GENERAL, RADEON_CRITICAL,		\
+		(message), ## __VA_ARGS__);			\
+} while(0)
+
+/**
+ * printf style function for writing warnings.
+ */
+#define radeon_warning(message, ...) do {			\
+	radeon_print(RADEON_GENERAL, RADEON_IMPORTANT,		\
+		(message), ## __VA_ARGS__);			\
+} while(0)
+
+extern void radeon_init_debug(void);
+extern void _radeon_debug_add_indent(void);
+extern void _radeon_debug_remove_indent(void);
+
+static inline void radeon_debug_add_indent(void)
+{
+       if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+	      _radeon_debug_add_indent();
+       }
+}
+static inline void radeon_debug_remove_indent(void)
+{
+       if (RADEON_DEBUG_LEVEL >= RADEON_VERBOSE) {
+	      _radeon_debug_remove_indent();
+       }
+}
+
+
+/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
+   I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
+   with other compilers ... GLUE!
+*/
+#define WARN_ONCE(a, ...)      do { \
+       static int __warn_once=1; \
+       if(__warn_once){ \
+               radeon_warning("*********************************WARN_ONCE*********************************\n"); \
+               radeon_warning("File %s function %s line %d\n", \
+                       __FILE__, __FUNCTION__, __LINE__); \
+               radeon_warning(  (a), ## __VA_ARGS__);\
+               radeon_warning("***************************************************************************\n"); \
+               __warn_once=0;\
+               } \
+       } while(0)
+
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c
new file mode 100644
index 0000000000..31a45169da
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.c
@@ -0,0 +1,482 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#include <errno.h>
+#include "radeon_common.h"
+#include "main/simple_list.h"
+
+#if defined(USE_X86_ASM)
+#define COPY_DWORDS( dst, src, nr )					\
+do {									\
+	int __tmp;							\
+	__asm__ __volatile__( "rep ; movsl"				\
+			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
+			      : "0" (nr),				\
+			        "D" ((long)dst),			\
+			        "S" ((long)src) );			\
+} while (0)
+#else
+#define COPY_DWORDS( dst, src, nr )		\
+do {						\
+   int j;					\
+   for ( j = 0 ; j < nr ; j++ )			\
+      dst[j] = ((int *)src)[j];			\
+   dst += nr;					\
+} while (0)
+#endif
+
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 4)
+		COPY_DWORDS(out, data, count);
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out++;
+			data += stride;
+		}
+}
+
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 8)
+		COPY_DWORDS(out, data, count * 2);
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out[1] = *(int *)(data + 4);
+			out += 2;
+			data += stride;
+		}
+}
+
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 12) {
+		COPY_DWORDS(out, data, count * 3);
+    }
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out[1] = *(int *)(data + 4);
+			out[2] = *(int *)(data + 8);
+			out += 3;
+			data += stride;
+		}
+}
+
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
+{
+	int i;
+
+	if (RADEON_DEBUG & RADEON_VERTS)
+		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
+			__FUNCTION__, count, stride, (void *)out, (void *)data);
+
+	if (stride == 16)
+		COPY_DWORDS(out, data, count * 4);
+	else
+		for (i = 0; i < count; i++) {
+			out[0] = *(int *)data;
+			out[1] = *(int *)(data + 4);
+			out[2] = *(int *)(data + 8);
+			out[3] = *(int *)(data + 12);
+			out += 4;
+			data += stride;
+		}
+}
+
+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
+			 const GLvoid * data, int size, int stride, int count)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	uint32_t *out;
+
+	if (stride == 0) {
+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+		count = 1;
+		aos->stride = 0;
+	} else {
+		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
+		aos->stride = size;
+	}
+
+	aos->components = size;
+	aos->count = count;
+
+	radeon_bo_map(aos->bo, 1);
+	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+	switch (size) {
+	case 1: radeonEmitVec4(out, data, stride, count); break;
+	case 2: radeonEmitVec8(out, data, stride, count); break;
+	case 3: radeonEmitVec12(out, data, stride, count); break;
+	case 4: radeonEmitVec16(out, data, stride, count); break;
+	default:
+		assert(0);
+		break;
+	}
+	radeon_bo_unmap(aos->bo);
+}
+
+void radeon_init_dma(radeonContextPtr rmesa)
+{
+	make_empty_list(&rmesa->dma.free);
+	make_empty_list(&rmesa->dma.wait);
+	make_empty_list(&rmesa->dma.reserved);
+	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
+}
+
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
+{
+	struct radeon_dma_bo *dma_bo = NULL;
+	/* we set minimum sizes to at least requested size
+	   aligned to next 16 bytes. */
+	if (size > rmesa->dma.minimum_size)
+		rmesa->dma.minimum_size = (size + 15) & (~15);
+
+	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
+			__FUNCTION__, size, rmesa->dma.minimum_size);
+
+	if (is_empty_list(&rmesa->dma.free)
+	      || last_elem(&rmesa->dma.free)->bo->size < size) {
+		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
+		assert(dma_bo);
+
+again_alloc:
+		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+					    0, rmesa->dma.minimum_size, 4,
+					    RADEON_GEM_DOMAIN_GTT, 0);
+
+		if (!dma_bo->bo) {
+			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
+			goto again_alloc;
+		}
+		insert_at_head(&rmesa->dma.reserved, dma_bo);
+	} else {
+		/* We push and pop buffers from end of list so we can keep
+		   counter on unused buffers for later freeing them from
+		   begin of list */
+		dma_bo = last_elem(&rmesa->dma.free);
+		remove_from_list(dma_bo);
+		insert_at_head(&rmesa->dma.reserved, dma_bo);
+	}
+
+	rmesa->dma.current_used = 0;
+	rmesa->dma.current_vertexptr = 0;
+
+	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
+					  first_elem(&rmesa->dma.reserved)->bo,
+					  RADEON_GEM_DOMAIN_GTT, 0))
+		fprintf(stderr,"failure to revalidate BOs - badness\n");
+
+	if (is_empty_list(&rmesa->dma.reserved)) {
+        /* Cmd buff have been flushed in radeon_revalidate_bos */
+		goto again_alloc;
+	}
+	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
+}
+
+/* Allocates a region from rmesa->dma.current.  If there isn't enough
+ * space in current, grab a new buffer (and discard what was left of current)
+ */
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+			  struct radeon_bo **pbo, int *poffset,
+			  int bytes, int alignment)
+{
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
+
+	if (rmesa->dma.flush)
+		rmesa->dma.flush(rmesa->glCtx);
+
+	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
+
+	alignment--;
+	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
+
+	if (is_empty_list(&rmesa->dma.reserved)
+		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
+		radeonRefillCurrentDmaRegion(rmesa, bytes);
+
+	*poffset = rmesa->dma.current_used;
+	*pbo = first_elem(&rmesa->dma.reserved)->bo;
+	radeon_bo_ref(*pbo);
+
+	/* Always align to at least 16 bytes */
+	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
+	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+
+	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
+}
+
+void radeonFreeDmaRegions(radeonContextPtr rmesa)
+{
+	struct radeon_dma_bo *dma_bo;
+	struct radeon_dma_bo *temp;
+	if (RADEON_DEBUG & RADEON_DMA)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+
+	foreach_s(dma_bo, temp, &rmesa->dma.free) {
+		remove_from_list(dma_bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+
+	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+		remove_from_list(dma_bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+
+	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+		remove_from_list(dma_bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+}
+
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
+{
+	if (is_empty_list(&rmesa->dma.reserved))
+		return;
+
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
+	rmesa->dma.current_used -= return_bytes;
+	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
+}
+
+static int radeon_bo_is_idle(struct radeon_bo* bo)
+{
+	uint32_t domain;
+	int ret = radeon_bo_is_busy(bo, &domain);
+	if (ret == -EINVAL) {
+		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
+			"This may cause small performance drop for you.\n");
+	}
+	return ret != -EBUSY;
+}
+
+void radeonReleaseDmaRegions(radeonContextPtr rmesa)
+{
+	struct radeon_dma_bo *dma_bo;
+	struct radeon_dma_bo *temp;
+	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
+	const int time = rmesa->dma.free.expire_counter;
+
+	if (RADEON_DEBUG & RADEON_DMA) {
+		size_t free = 0,
+		       wait = 0,
+		       reserved = 0;
+		foreach(dma_bo, &rmesa->dma.free)
+			++free;
+
+		foreach(dma_bo, &rmesa->dma.wait)
+			++wait;
+
+		foreach(dma_bo, &rmesa->dma.reserved)
+			++reserved;
+
+		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
+		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
+	}
+
+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+		/* request updated cs processing information from kernel */
+		legacy_track_pending(rmesa->radeonScreen->bom, 0);
+	}
+
+	/* move waiting bos to free list.
+	   wait list provides gpu time to handle data before reuse */
+	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
+		if (dma_bo->expire_counter == time) {
+			WARN_ONCE("Leaking dma buffer object!\n");
+			radeon_bo_unref(dma_bo->bo);
+			remove_from_list(dma_bo);
+			FREE(dma_bo);
+			continue;
+		}
+		/* free objects that are too small to be used because of large request */
+		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+		   radeon_bo_unref(dma_bo->bo);
+		   remove_from_list(dma_bo);
+		   FREE(dma_bo);
+		   continue;
+		}
+		if (!radeon_bo_is_idle(dma_bo->bo)) {
+			if (rmesa->radeonScreen->driScreen->dri2.enabled)
+				break;
+			continue;
+		}
+		remove_from_list(dma_bo);
+		dma_bo->expire_counter = expire_at;
+		insert_at_tail(&rmesa->dma.free, dma_bo);
+	}
+
+	/* move reserved to wait list */
+	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
+		radeon_bo_unmap(dma_bo->bo);
+		/* free objects that are too small to be used because of large request */
+		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
+		   radeon_bo_unref(dma_bo->bo);
+		   remove_from_list(dma_bo);
+		   FREE(dma_bo);
+		   continue;
+		}
+		remove_from_list(dma_bo);
+		dma_bo->expire_counter = expire_at;
+		insert_at_tail(&rmesa->dma.wait, dma_bo);
+	}
+
+	/* free bos that have been unused for some time */
+	foreach_s(dma_bo, temp, &rmesa->dma.free) {
+		if (dma_bo->expire_counter != time)
+			break;
+		remove_from_list(dma_bo);
+	        radeon_bo_unref(dma_bo->bo);
+		FREE(dma_bo);
+	}
+
+}
+
+
+/* Flush vertices in the current dma region.
+ */
+void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	struct radeon_dma *dma = &rmesa->dma;
+
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+	dma->flush = NULL;
+
+	radeon_bo_unmap(rmesa->swtcl.bo);
+
+	if (!is_empty_list(&dma->reserved)) {
+	    GLuint current_offset = dma->current_used;
+
+	    assert (dma->current_used +
+		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+		    dma->current_vertexptr);
+
+	    if (dma->current_used != dma->current_vertexptr) {
+		    dma->current_used = dma->current_vertexptr;
+
+		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
+	    }
+	    rmesa->swtcl.numverts = 0;
+	}
+	radeon_bo_unref(rmesa->swtcl.bo);
+	rmesa->swtcl.bo = NULL;
+}
+/* Alloc space in the current dma region.
+ */
+void *
+rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
+{
+	GLuint bytes = vsize * nverts;
+	void *head;
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+
+	if(is_empty_list(&rmesa->dma.reserved)
+	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
+		if (rmesa->dma.flush) {
+			rmesa->dma.flush(rmesa->glCtx);
+		}
+
+                radeonRefillCurrentDmaRegion(rmesa, bytes);
+
+		return NULL;
+	}
+
+        if (!rmesa->dma.flush) {
+		/* if cmdbuf flushed DMA restart */
+                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
+        }
+
+	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
+        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
+        ASSERT( rmesa->dma.current_used +
+                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
+                rmesa->dma.current_vertexptr );
+
+	if (!rmesa->swtcl.bo) {
+		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
+		radeon_bo_ref(rmesa->swtcl.bo);
+		radeon_bo_map(rmesa->swtcl.bo, 1);
+	}
+
+	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
+	rmesa->dma.current_vertexptr += bytes;
+	rmesa->swtcl.numverts += nverts;
+	return head;
+}
+
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
+{
+   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
+   int i;
+	if (RADEON_DEBUG & RADEON_IOCTL)
+		fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (radeon->dma.flush) {
+       radeon->dma.flush(radeon->glCtx);
+   }
+   for (i = 0; i < radeon->tcl.aos_count; i++) {
+      if (radeon->tcl.aos[i].bo) {
+         radeon_bo_unref(radeon->tcl.aos[i].bo);
+         radeon->tcl.aos[i].bo = NULL;
+
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h
new file mode 100644
index 0000000000..74e653fd18
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_dma.h
@@ -0,0 +1,58 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+#ifndef RADEON_DMA_H
+#define RADEON_DMA_H
+
+void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count);
+void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count);
+
+void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
+			 const GLvoid * data, int size, int stride, int count);
+
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size);
+void radeon_init_dma(radeonContextPtr rmesa);
+void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes);
+void radeonAllocDmaRegion(radeonContextPtr rmesa,
+			  struct radeon_bo **pbo, int *poffset,
+			  int bytes, int alignment);
+void radeonReleaseDmaRegions(radeonContextPtr rmesa);
+
+void rcommon_flush_last_swtcl_prim(GLcontext *ctx);
+
+void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize);
+void radeonFreeDmaRegions(radeonContextPtr rmesa);
+void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs );
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
new file mode 100644
index 0000000000..517485091a
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -0,0 +1,633 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Red Hat Inc.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/context.h"
+#include "main/texrender.h"
+#include "drivers/common/meta.h"
+
+#include "radeon_common.h"
+#include "radeon_mipmap_tree.h"
+
+#define FILE_DEBUG_FLAG RADEON_TEXTURE
+#define DBG(...) do {                                           \
+        if (RADEON_DEBUG & FILE_DEBUG_FLAG)                      \
+                printf(__VA_ARGS__);                      \
+} while(0)
+
+static struct gl_framebuffer *
+radeon_new_framebuffer(GLcontext *ctx, GLuint name)
+{
+  return _mesa_new_framebuffer(ctx, name);
+}
+
+static void
+radeon_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(rb %p, rrb %p) \n",
+		__func__, rb, rrb);
+
+  ASSERT(rrb);
+
+  if (rrb && rrb->bo) {
+    radeon_bo_unref(rrb->bo);
+  }
+  free(rrb);
+}
+
+static void *
+radeon_get_pointer(GLcontext *ctx, struct gl_renderbuffer *rb,
+		   GLint x, GLint y)
+{
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, rb %p) \n",
+		__func__, ctx, rb);
+
+  return NULL;
+}
+
+/**
+ * Called via glRenderbufferStorageEXT() to set the format and allocate
+ * storage for a user-created renderbuffer.
+ */
+static GLboolean
+radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                                 GLenum internalFormat,
+                                 GLuint width, GLuint height)
+{
+  struct radeon_context *radeon = RADEON_CONTEXT(ctx);
+  struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+  GLboolean software_buffer = GL_FALSE;
+  int cpp;
+
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, rb %p) \n",
+		__func__, ctx, rb);
+
+   ASSERT(rb->Name != 0);
+  switch (internalFormat) {
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+      rb->Format = _dri_texformat_rgb565;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      cpp = 2;
+      break;
+   case GL_RGB:
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      rb->Format = _dri_texformat_argb8888;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      cpp = 4;
+      break;
+   case GL_RGBA:
+   case GL_RGBA2:
+   case GL_RGBA4:
+   case GL_RGB5_A1:
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      rb->Format = _dri_texformat_argb8888;
+      rb->DataType = GL_UNSIGNED_BYTE;
+      cpp = 4;
+      break;
+   case GL_STENCIL_INDEX:
+   case GL_STENCIL_INDEX1_EXT:
+   case GL_STENCIL_INDEX4_EXT:
+   case GL_STENCIL_INDEX8_EXT:
+   case GL_STENCIL_INDEX16_EXT:
+      /* alloc a depth+stencil buffer */
+      rb->Format = MESA_FORMAT_S8_Z24;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      cpp = 4;
+      break;
+   case GL_DEPTH_COMPONENT16:
+      rb->Format = MESA_FORMAT_Z16;
+      rb->DataType = GL_UNSIGNED_SHORT;
+      cpp = 2;
+      break;
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+      rb->Format = MESA_FORMAT_X8_Z24;
+      rb->DataType = GL_UNSIGNED_INT;
+      cpp = 4;
+      break;
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      rb->Format = MESA_FORMAT_S8_Z24;
+      rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
+      cpp = 4;
+      break;
+   default:
+      _mesa_problem(ctx,
+                    "Unexpected format in radeon_alloc_renderbuffer_storage");
+      return GL_FALSE;
+   }
+
+  rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat);
+
+  if (ctx->Driver.Flush)
+	  ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+  if (rrb->bo)
+    radeon_bo_unref(rrb->bo);
+  
+    
+   if (software_buffer) {
+      return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat,
+                                             width, height);
+   }
+   else {
+     uint32_t size;
+     uint32_t pitch = ((cpp * width + 63) & ~63) / cpp;
+
+     if (RADEON_DEBUG & RADEON_MEMORY)
+	     fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width,
+		     height, pitch);
+
+     size = pitch * height * cpp;
+     rrb->pitch = pitch * cpp;
+     rrb->cpp = cpp;
+     rrb->bo = radeon_bo_open(radeon->radeonScreen->bom,
+			      0,
+			      size,
+			      0,
+			      RADEON_GEM_DOMAIN_VRAM,
+			      0);
+     rb->Width = width;
+     rb->Height = height;
+       return GL_TRUE;
+   }    
+   
+}
+
+
+/**
+ * Called for each hardware renderbuffer when a _window_ is resized.
+ * Just update fields.
+ * Not used for user-created renderbuffers!
+ */
+static GLboolean
+radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+                           GLenum internalFormat, GLuint width, GLuint height)
+{
+   ASSERT(rb->Name == 0);
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, rb %p) \n",
+		__func__, ctx, rb);
+
+
+   return GL_TRUE;
+}
+
+
+static void
+radeon_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb,
+		     GLuint width, GLuint height)
+{
+     struct radeon_framebuffer *radeon_fb = (struct radeon_framebuffer*)fb;
+   int i;
+
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, fb %p) \n",
+		__func__, ctx, fb);
+
+   _mesa_resize_framebuffer(ctx, fb, width, height);
+
+   fb->Initialized = GL_TRUE; /* XXX remove someday */
+
+   if (fb->Name != 0) {
+      return;
+   }
+
+   /* Make sure all window system renderbuffers are up to date */
+   for (i = 0; i < 2; i++) {
+      struct gl_renderbuffer *rb = &radeon_fb->color_rb[i]->base;
+
+      /* only resize if size is changing */
+      if (rb && (rb->Width != width || rb->Height != height)) {
+	 rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height);
+      }
+   }
+}
+
+
+/** Dummy function for gl_renderbuffer::AllocStorage() */
+static GLboolean
+radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
+			 GLenum internalFormat, GLuint width, GLuint height)
+{
+   _mesa_problem(ctx, "radeon_op_alloc_storage should never be called.");
+   return GL_FALSE;
+}
+
+
+/**
+ * Create a renderbuffer for a window's color, depth and/or stencil buffer.
+ * Not used for user-created renderbuffers.
+ */
+struct radeon_renderbuffer *
+radeon_create_renderbuffer(gl_format format, __DRIdrawable *driDrawPriv)
+{
+    struct radeon_renderbuffer *rrb;
+
+    rrb = CALLOC_STRUCT(radeon_renderbuffer);
+
+    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s( rrb %p ) \n",
+		__func__, rrb);
+
+    if (!rrb)
+	return NULL;
+
+    _mesa_init_renderbuffer(&rrb->base, 0);
+    rrb->base.ClassID = RADEON_RB_CLASS;
+
+    rrb->base.Format = format;
+
+    switch (format) {
+        case MESA_FORMAT_RGB565:
+	    assert(_mesa_little_endian());
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+            rrb->base._BaseFormat = GL_RGB;
+	    break;
+        case MESA_FORMAT_RGB565_REV:
+	    assert(!_mesa_little_endian());
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+            rrb->base._BaseFormat = GL_RGB;
+	    break;
+        case MESA_FORMAT_XRGB8888:
+	    assert(_mesa_little_endian());
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+            rrb->base._BaseFormat = GL_RGB;
+	    break;
+        case MESA_FORMAT_XRGB8888_REV:
+	    assert(!_mesa_little_endian());
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+            rrb->base._BaseFormat = GL_RGB;
+	    break;
+	case MESA_FORMAT_ARGB8888:
+	    assert(_mesa_little_endian());
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+            rrb->base._BaseFormat = GL_RGBA;
+	    break;
+	case MESA_FORMAT_ARGB8888_REV:
+	    assert(!_mesa_little_endian());
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+            rrb->base._BaseFormat = GL_RGBA;
+	    break;
+	case MESA_FORMAT_S8:
+	    rrb->base.DataType = GL_UNSIGNED_BYTE;
+            rrb->base._BaseFormat = GL_STENCIL_INDEX;
+	    break;
+	case MESA_FORMAT_Z16:
+	    rrb->base.DataType = GL_UNSIGNED_SHORT;
+            rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+	    break;
+	case MESA_FORMAT_X8_Z24:
+	    rrb->base.DataType = GL_UNSIGNED_INT;
+            rrb->base._BaseFormat = GL_DEPTH_COMPONENT;
+	    break;
+	case MESA_FORMAT_S8_Z24:
+	    rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+            rrb->base._BaseFormat = GL_DEPTH_STENCIL;
+	    break;
+	default:
+	    fprintf(stderr, "%s: Unknown format %s\n",
+                    __FUNCTION__, _mesa_get_format_name(format));
+	    _mesa_delete_renderbuffer(&rrb->base);
+	    return NULL;
+    }
+
+    rrb->dPriv = driDrawPriv;
+    rrb->base.InternalFormat = _mesa_get_format_base_format(format);
+
+    rrb->base.Delete = radeon_delete_renderbuffer;
+    rrb->base.AllocStorage = radeon_alloc_window_storage;
+    rrb->base.GetPointer = radeon_get_pointer;
+
+    rrb->bo = NULL;
+    return rrb;
+}
+
+static struct gl_renderbuffer *
+radeon_new_renderbuffer(GLcontext * ctx, GLuint name)
+{
+  struct radeon_renderbuffer *rrb;
+
+  rrb = CALLOC_STRUCT(radeon_renderbuffer);
+
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, rrb %p) \n",
+		__func__, ctx, rrb);
+
+  if (!rrb)
+    return NULL;
+
+  _mesa_init_renderbuffer(&rrb->base, name);
+  rrb->base.ClassID = RADEON_RB_CLASS;
+
+  rrb->base.Delete = radeon_delete_renderbuffer;
+  rrb->base.AllocStorage = radeon_alloc_renderbuffer_storage;
+  rrb->base.GetPointer = radeon_get_pointer;
+
+  return &rrb->base;
+}
+
+static void
+radeon_bind_framebuffer(GLcontext * ctx, GLenum target,
+                       struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
+{
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, fb %p, target %s) \n",
+		__func__, ctx, fb,
+		_mesa_lookup_enum_by_nr(target));
+
+   if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
+      radeon_draw_buffer(ctx, fb);
+   }
+   else {
+      /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */
+   }
+}
+
+static void
+radeon_framebuffer_renderbuffer(GLcontext * ctx,
+                               struct gl_framebuffer *fb,
+                               GLenum attachment, struct gl_renderbuffer *rb)
+{
+
+	if (ctx->Driver.Flush)
+		ctx->Driver.Flush(ctx); /* +r6/r7 */
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, fb %p, rb %p) \n",
+		__func__, ctx, fb, rb);
+
+   _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb);
+   radeon_draw_buffer(ctx, fb);
+}
+
+static GLboolean
+radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, 
+		     struct gl_texture_image *texImage)
+{
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, rrb %p, texImage %p, texFormat %s) \n",
+		__func__, ctx, rrb, texImage, _mesa_get_format_name(texImage->TexFormat));
+
+	switch (texImage->TexFormat) {
+		case MESA_FORMAT_RGBA8888:
+		case MESA_FORMAT_RGBA8888_REV:
+		case MESA_FORMAT_ARGB8888:
+		case MESA_FORMAT_ARGB8888_REV:
+		case MESA_FORMAT_XRGB8888:
+		case MESA_FORMAT_XRGB8888_REV:
+		case MESA_FORMAT_RGB565:
+		case MESA_FORMAT_RGB565_REV:
+		case MESA_FORMAT_RGBA5551:
+		case MESA_FORMAT_ARGB1555:
+		case MESA_FORMAT_ARGB1555_REV:
+		case MESA_FORMAT_ARGB4444:
+		case MESA_FORMAT_ARGB4444_REV:
+			rrb->base.DataType = GL_UNSIGNED_BYTE;
+			break;
+		case MESA_FORMAT_Z16:
+			rrb->base.DataType = GL_UNSIGNED_SHORT;
+			break;
+		case MESA_FORMAT_X8_Z24:
+			rrb->base.DataType = GL_UNSIGNED_INT;
+			break;
+		case MESA_FORMAT_S8_Z24:
+			rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT;
+			break;
+	}
+		
+	rrb->cpp = _mesa_get_format_bytes(texImage->TexFormat);
+	rrb->pitch = texImage->Width * rrb->cpp;
+	rrb->base.Format = texImage->TexFormat;
+	rrb->base.InternalFormat = texImage->InternalFormat;
+	rrb->base._BaseFormat = _mesa_base_fbo_format(ctx, rrb->base.InternalFormat);
+	rrb->base.Width = texImage->Width;
+	rrb->base.Height = texImage->Height;
+	rrb->base.Delete = radeon_delete_renderbuffer;
+	rrb->base.AllocStorage = radeon_nop_alloc_storage;
+
+	return GL_TRUE;
+}
+
+
+static struct radeon_renderbuffer *
+radeon_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage)
+{
+  const GLuint name = ~0;   /* not significant, but distinct for debugging */
+  struct radeon_renderbuffer *rrb;
+
+   /* make an radeon_renderbuffer to wrap the texture image */
+   rrb = CALLOC_STRUCT(radeon_renderbuffer);
+
+   radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, rrb %p, texImage %p) \n",
+		__func__, ctx, rrb, texImage);
+
+   if (!rrb) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture");
+      return NULL;
+   }
+
+   _mesa_init_renderbuffer(&rrb->base, name);
+   rrb->base.ClassID = RADEON_RB_CLASS;
+
+   if (!radeon_update_wrapper(ctx, rrb, texImage)) {
+      free(rrb);
+      return NULL;
+   }
+
+   return rrb;
+  
+}
+static void
+radeon_render_texture(GLcontext * ctx,
+                     struct gl_framebuffer *fb,
+                     struct gl_renderbuffer_attachment *att)
+{
+   struct gl_texture_image *newImage
+      = att->Texture->Image[att->CubeMapFace][att->TextureLevel];
+   struct radeon_renderbuffer *rrb = radeon_renderbuffer(att->Renderbuffer);
+   radeon_texture_image *radeon_image;
+   GLuint imageOffset;
+
+  radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, fb %p, rrb %p, att %p)\n",
+		__func__, ctx, fb, rrb, att);
+
+   (void) fb;
+
+   ASSERT(newImage);
+
+   radeon_image = (radeon_texture_image *)newImage;
+
+   if (!radeon_image->mt || newImage->Border != 0) {
+      /* Fallback on drawing to a texture without a miptree.
+       */
+      _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+      _mesa_render_texture(ctx, fb, att);
+      return;
+   }
+   else if (!rrb) {
+      rrb = radeon_wrap_texture(ctx, newImage);
+      if (rrb) {
+         /* bind the wrapper to the attachment point */
+         _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base);
+      }
+      else {
+         /* fallback to software rendering */
+         _mesa_render_texture(ctx, fb, att);
+         return;
+      }
+   }
+
+   if (!radeon_update_wrapper(ctx, rrb, newImage)) {
+       _mesa_reference_renderbuffer(&att->Renderbuffer, NULL);
+       _mesa_render_texture(ctx, fb, att);
+       return;
+   }
+
+   DBG("Begin render texture tid %lx tex=%u w=%d h=%d refcount=%d\n",
+       _glthread_GetID(),
+       att->Texture->Name, newImage->Width, newImage->Height,
+       rrb->base.RefCount);
+
+   /* point the renderbufer's region to the texture image region */
+   if (rrb->bo != radeon_image->mt->bo) {
+      if (rrb->bo)
+  	radeon_bo_unref(rrb->bo);
+      rrb->bo = radeon_image->mt->bo;
+      radeon_bo_ref(rrb->bo);
+   }
+
+   /* compute offset of the particular 2D image within the texture region */
+   imageOffset = radeon_miptree_image_offset(radeon_image->mt,
+                                            att->CubeMapFace,
+                                            att->TextureLevel);
+
+   if (att->Texture->Target == GL_TEXTURE_3D) {
+      imageOffset += radeon_image->mt->levels[att->TextureLevel].rowstride *
+                     radeon_image->mt->levels[att->TextureLevel].height *
+                     att->Zoffset;
+   }
+
+   /* store that offset in the region, along with the correct pitch for
+    * the image we are rendering to */
+   rrb->draw_offset = imageOffset;
+   rrb->pitch = radeon_image->mt->levels[att->TextureLevel].rowstride;
+
+   /* update drawing region, etc */
+   radeon_draw_buffer(ctx, fb);
+}
+
+static void
+radeon_finish_render_texture(GLcontext * ctx,
+                            struct gl_renderbuffer_attachment *att)
+{
+
+}
+static void
+radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	gl_format mesa_format;
+	int i;
+
+	for (i = -2; i < (GLint) ctx->Const.MaxColorAttachments; i++) {
+		struct gl_renderbuffer_attachment *att;
+		if (i == -2) {
+			att = &fb->Attachment[BUFFER_DEPTH];
+		} else if (i == -1) {
+			att = &fb->Attachment[BUFFER_STENCIL];
+		} else {
+			att = &fb->Attachment[BUFFER_COLOR0 + i];
+		}
+
+		if (att->Type == GL_TEXTURE) {
+			mesa_format = att->Texture->Image[att->CubeMapFace][att->TextureLevel]->TexFormat;
+		} else {
+			/* All renderbuffer formats are renderable, but not sampable */
+			continue;
+		}
+
+		if (!radeon->vtbl.is_format_renderable(mesa_format)){
+			fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED;
+			radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+						"%s: HW doesn't support format %s as output format of attachment %d\n",
+						__FUNCTION__, _mesa_get_format_name(mesa_format), i);
+			return;
+		}
+	}
+}
+
+void radeon_fbo_init(struct radeon_context *radeon)
+{
+  radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer;
+  radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer;
+  radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer;
+  radeon->glCtx->Driver.FramebufferRenderbuffer = radeon_framebuffer_renderbuffer;
+  radeon->glCtx->Driver.RenderTexture = radeon_render_texture;
+  radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
+  radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
+  radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
+  radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+}
+
+  
+void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb,
+				struct radeon_bo *bo)
+{
+  struct radeon_bo *old;
+  old = rb->bo;
+  rb->bo = bo;
+  radeon_bo_ref(bo);
+  if (old)
+    radeon_bo_unref(old);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
new file mode 100644
index 0000000000..5ac526c6de
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c
@@ -0,0 +1,639 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "main/attrib.h"
+#include "main/bufferobj.h"
+#include "swrast/swrast.h"
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "swrast/swrast.h"
+
+#include "radeon_context.h"
+#include "radeon_common.h"
+#include "radeon_ioctl.h"
+
+#define STANDALONE_MMIO
+
+#include "vblank.h"
+
+#define RADEON_TIMEOUT             512
+#define RADEON_IDLE_RETRY           16
+
+
+/* =============================================================
+ * Kernel command buffer handling
+ */
+
+/* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+void radeonSetUpAtomList( r100ContextPtr rmesa )
+{
+   int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits;
+
+   make_empty_list(&rmesa->radeon.hw.atomlist);
+   rmesa->radeon.hw.atomlist.name = "atom-list";
+
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc);
+   for (i = 0; i < mtu; ++i) {
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]);
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]);
+       insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]);
+   }
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl);
+   for (i = 0; i < 3 + mtu; ++i)
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]);
+   for (i = 0; i < 8; ++i)
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]);
+   for (i = 0; i < 6; ++i)
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]);
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+      insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.stp);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog);
+   insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt);
+}
+
+static void radeonEmitScissor(r100ContextPtr rmesa)
+{
+    BATCH_LOCALS(&rmesa->radeon);
+    if (!rmesa->radeon.radeonScreen->kernel_mm) {
+       return;
+    }
+    if (rmesa->radeon.state.scissor.enabled) {
+        BEGIN_BATCH(6);
+        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE);
+        OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+        OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) |
+                  rmesa->radeon.state.scissor.rect.x1);
+        OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+        OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2) << 16) |
+                  (rmesa->radeon.state.scissor.rect.x2));
+        END_BATCH();
+    } else {
+        BEGIN_BATCH(2);
+        OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0));
+        OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE);
+        END_BATCH();
+    }
+}
+
+/* Fire a section of the retained (indexed_verts) buffer as a regular
+ * primtive.
+ */
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr )
+{
+   BATCH_LOCALS(&rmesa->radeon);
+
+   assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitScissor(rmesa);
+
+#if RADEON_OLD_PACKETS
+   BEGIN_BATCH(8);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3);
+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
+     OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   } else {
+     OUT_BATCH(rmesa->ioctl.vertex_offset);
+   }
+
+   OUT_BATCH(vertex_nr);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |  RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			   rmesa->ioctl.bo,
+			   RADEON_GEM_DOMAIN_GTT,
+			   0, 0);
+   }
+
+   END_BATCH();
+
+#else
+   BEGIN_BATCH(4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+	     RADEON_CP_VC_CNTL_PRIM_WALK_LIST |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+	     (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT));
+   END_BATCH();
+#endif
+}
+
+void radeonFlushElts( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&rmesa->radeon);
+   int nr;
+   uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start);
+   int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw);
+
+   if (RADEON_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   assert( rmesa->radeon.dma.flush == radeonFlushElts );
+   rmesa->radeon.dma.flush = NULL;
+
+   nr = rmesa->tcl.elt_used;
+
+#if RADEON_OLD_PACKETS
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     dwords -= 2;
+   }
+#endif
+
+#if RADEON_OLD_PACKETS
+   cmd[1] |= (dwords + 3) << 16;
+   cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#else
+   cmd[1] |= (dwords + 2) << 16;
+   cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT;
+#endif
+
+   rmesa->radeon.cmdbuf.cs->cdw += dwords;
+   rmesa->radeon.cmdbuf.cs->section_cdw += dwords;
+
+#if RADEON_OLD_PACKETS
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			    rmesa->ioctl.bo,
+			    RADEON_GEM_DOMAIN_GTT,
+			    0, 0);
+   }
+#endif
+
+   END_BATCH();
+
+   if (RADEON_DEBUG & RADEON_SYNC) {
+      fprintf(stderr, "%s: Syncing\n", __FUNCTION__);
+      radeonFinish( rmesa->radeon.glCtx );
+   }
+
+}
+
+GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+				    GLuint vertex_format,
+				    GLuint primitive,
+				    GLuint min_nr )
+{
+   GLushort *retval;
+   int align_min_nr;
+   BATCH_LOCALS(&rmesa->radeon);
+
+   if (RADEON_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive);
+
+   assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND));
+
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitScissor(rmesa);
+
+   rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw;
+
+   /* round up min_nr to align the state */
+   align_min_nr = (min_nr + 1) & ~1;
+
+#if RADEON_OLD_PACKETS
+   BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0);
+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
+     OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   } else {
+     OUT_BATCH(rmesa->ioctl.vertex_offset);
+   }
+   OUT_BATCH(rmesa->ioctl.vertex_max);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+#else
+   BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4);
+   OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0);
+   OUT_BATCH(vertex_format);
+   OUT_BATCH(primitive |
+	     RADEON_CP_VC_CNTL_PRIM_WALK_IND |
+	     RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA |
+	     RADEON_CP_VC_CNTL_MAOS_ENABLE |
+	     RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE);
+#endif
+
+
+   rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw;
+   rmesa->tcl.elt_used = min_nr;
+
+   retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset);
+
+   if (RADEON_DEBUG & RADEON_RENDER)
+      fprintf(stderr, "%s: header prim %x \n",
+	      __FUNCTION__, primitive);
+
+   assert(!rmesa->radeon.dma.flush);
+   rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
+   rmesa->radeon.dma.flush = radeonFlushElts;
+
+   return retval;
+}
+
+void radeonEmitVertexAOS( r100ContextPtr rmesa,
+			  GLuint vertex_size,
+			  struct radeon_bo *bo,
+			  GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   rmesa->ioctl.vertex_offset = offset;
+   rmesa->ioctl.bo = bo;
+#else
+   BATCH_LOCALS(&rmesa->radeon);
+
+   if (RADEON_DEBUG & (RADEON_PRIMS|DEBUG_IOCTL))
+      fprintf(stderr, "%s:  vertex_size 0x%x offset 0x%x \n",
+	      __FUNCTION__, vertex_size, offset);
+
+   BEGIN_BATCH(7);
+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2);
+   OUT_BATCH(1);
+   OUT_BATCH(vertex_size | (vertex_size << 8));
+   OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+   END_BATCH();
+
+#endif
+}
+
+
+void radeonEmitAOS( r100ContextPtr rmesa,
+		    GLuint nr,
+		    GLuint offset )
+{
+#if RADEON_OLD_PACKETS
+   assert( nr == 1 );
+   rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo;
+   rmesa->ioctl.vertex_offset =
+     (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4);
+   rmesa->ioctl.vertex_max = rmesa->radeon.tcl.aos[0].count;
+#else
+   BATCH_LOCALS(&rmesa->radeon);
+   uint32_t voffset;
+   //   int sz = AOS_BUFSZ(nr);
+   int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+   int i;
+
+   if (RADEON_DEBUG & RADEON_IOCTL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   BEGIN_BATCH(sz+2+(nr * 2));
+   OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+   OUT_BATCH(nr);
+
+   if (!rmesa->radeon.radeonScreen->kernel_mm) {
+      for (i = 0; i + 1 < nr; i += 2) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[i].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[i+1].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
+      }
+
+      if (nr & 1) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 OUT_BATCH_RELOC(voffset,
+			 rmesa->radeon.tcl.aos[nr - 1].bo,
+			 voffset,
+			 RADEON_GEM_DOMAIN_GTT,
+			 0, 0);
+      }
+   } else {
+      for (i = 0; i + 1 < nr; i += 2) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+		   (rmesa->radeon.tcl.aos[i].stride << 8) |
+		   (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+		   (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 OUT_BATCH(voffset);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 OUT_BATCH(voffset);
+      }
+
+      if (nr & 1) {
+	 OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+		   (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 OUT_BATCH(voffset);
+      }
+      for (i = 0; i + 1 < nr; i += 2) {
+	 voffset =  rmesa->radeon.tcl.aos[i + 0].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[i+0].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
+	 voffset =  rmesa->radeon.tcl.aos[i + 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[i+1].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
+      }
+      if (nr & 1) {
+	 voffset =  rmesa->radeon.tcl.aos[nr - 1].offset +
+	    offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+	 radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+			       rmesa->radeon.tcl.aos[nr-1].bo,
+			       RADEON_GEM_DOMAIN_GTT,
+			       0, 0);
+      }
+   }
+   END_BATCH();
+
+#endif
+}
+
+/* ================================================================
+ * Buffer clear
+ */
+#define RADEON_MAX_CLEARS	256
+
+static void radeonKernelClear(GLcontext *ctx, GLuint flags)
+{
+     r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   drm_radeon_sarea_t *sarea = rmesa->radeon.sarea;
+   uint32_t clear;
+   GLint ret, i;
+   GLint cx, cy, cw, ch;
+
+   radeonEmitState(&rmesa->radeon);
+
+   LOCK_HARDWARE( &rmesa->radeon );
+
+   /* compute region after locking: */
+   cx = ctx->DrawBuffer->_Xmin;
+   cy = ctx->DrawBuffer->_Ymin;
+   cw = ctx->DrawBuffer->_Xmax - cx;
+   ch = ctx->DrawBuffer->_Ymax - cy;
+
+   /* Flip top to bottom */
+   cx += dPriv->x;
+   cy  = dPriv->y + dPriv->h - cy - ch;
+
+   /* Throttle the number of clear ioctls we do.
+    */
+   while ( 1 ) {
+      int ret;
+      drm_radeon_getparam_t gp;
+
+      gp.param = RADEON_PARAM_LAST_CLEAR;
+      gp.value = (int *)&clear;
+      ret = drmCommandWriteRead( rmesa->radeon.dri.fd,
+				 DRM_RADEON_GETPARAM, &gp, sizeof(gp) );
+
+      if ( ret ) {
+	 fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret );
+	 exit(1);
+      }
+
+      if ( sarea->last_clear - clear <= RADEON_MAX_CLEARS ) {
+	 break;
+      }
+
+      if ( rmesa->radeon.do_usleeps ) {
+	 UNLOCK_HARDWARE( &rmesa->radeon );
+	 DO_USLEEP( 1 );
+	 LOCK_HARDWARE( &rmesa->radeon );
+      }
+   }
+
+   /* Send current state to the hardware */
+   rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ );
+
+   for ( i = 0 ; i < dPriv->numClipRects ; ) {
+      GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects );
+      drm_clip_rect_t *box = dPriv->pClipRects;
+      drm_clip_rect_t *b = rmesa->radeon.sarea->boxes;
+      drm_radeon_clear_t clear;
+      drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
+      GLint n = 0;
+
+      if (cw != dPriv->w || ch != dPriv->h) {
+         /* clear subregion */
+	 for ( ; i < nr ; i++ ) {
+	    GLint x = box[i].x1;
+	    GLint y = box[i].y1;
+	    GLint w = box[i].x2 - x;
+	    GLint h = box[i].y2 - y;
+
+	    if ( x < cx ) w -= cx - x, x = cx;
+	    if ( y < cy ) h -= cy - y, y = cy;
+	    if ( x + w > cx + cw ) w = cx + cw - x;
+	    if ( y + h > cy + ch ) h = cy + ch - y;
+	    if ( w <= 0 ) continue;
+	    if ( h <= 0 ) continue;
+
+	    b->x1 = x;
+	    b->y1 = y;
+	    b->x2 = x + w;
+	    b->y2 = y + h;
+	    b++;
+	    n++;
+	 }
+      } else {
+         /* clear whole buffer */
+	 for ( ; i < nr ; i++ ) {
+	    *b++ = box[i];
+	    n++;
+	 }
+      }
+
+      rmesa->radeon.sarea->nbox = n;
+
+      clear.flags       = flags;
+      clear.clear_color = rmesa->radeon.state.color.clear;
+      clear.clear_depth = rmesa->radeon.state.depth.clear;
+      clear.color_mask  = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      clear.depth_mask  = rmesa->radeon.state.stencil.clear;
+      clear.depth_boxes = depth_boxes;
+
+      n--;
+      b = rmesa->radeon.sarea->boxes;
+      for ( ; n >= 0 ; n-- ) {
+	 depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1;
+	 depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1;
+	 depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2;
+	 depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2;
+	 depth_boxes[n].f[CLEAR_DEPTH] =
+	    (float)rmesa->radeon.state.depth.clear;
+      }
+
+      ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR,
+			     &clear, sizeof(drm_radeon_clear_t));
+
+      if ( ret ) {
+	 UNLOCK_HARDWARE( &rmesa->radeon );
+	 fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret );
+	 exit( 1 );
+      }
+   }
+   UNLOCK_HARDWARE( &rmesa->radeon );
+}
+
+static void radeonClear( GLcontext *ctx, GLbitfield mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLuint flags = 0;
+   GLuint color_mask = 0;
+   GLuint orig_mask = mask;
+
+   if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) {
+      rmesa->radeon.front_buffer_dirty = GL_TRUE;
+   }
+
+   if ( RADEON_DEBUG & RADEON_IOCTL ) {
+      fprintf( stderr, "radeonClear\n");
+   }
+
+   {
+      LOCK_HARDWARE( &rmesa->radeon );
+      UNLOCK_HARDWARE( &rmesa->radeon );
+      if ( dPriv->numClipRects == 0 )
+	 return;
+   }
+
+   radeon_firevertices(&rmesa->radeon);
+
+   if ( mask & BUFFER_BIT_FRONT_LEFT ) {
+      flags |= RADEON_FRONT;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_BACK_LEFT ) {
+      flags |= RADEON_BACK;
+      color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK];
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if ( mask & BUFFER_BIT_DEPTH ) {
+      flags |= RADEON_DEPTH;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   if ( (mask & BUFFER_BIT_STENCIL) ) {
+      flags |= RADEON_STENCIL;
+      mask &= ~BUFFER_BIT_STENCIL;
+   }
+
+   if ( mask ) {
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask);
+      _swrast_Clear( ctx, mask );
+   }
+
+   if ( !flags )
+      return;
+
+   if (rmesa->using_hyperz) {
+      flags |= RADEON_USE_COMP_ZBUF;
+/*      if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL)
+         flags |= RADEON_USE_HIERZ; */
+      if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) &&
+	    ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) {
+	  flags |= RADEON_CLEAR_FASTZ;
+      }
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm)
+     radeonUserClear(ctx, orig_mask);
+   else {
+      radeonKernelClear(ctx, flags);
+      rmesa->radeon.hw.all_dirty = GL_TRUE;
+   }
+}
+
+void radeonInitIoctlFuncs( GLcontext *ctx )
+{
+    ctx->Driver.Clear = radeonClear;
+    ctx->Driver.Finish = radeonFinish;
+    ctx->Driver.Flush = radeonFlush;
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
new file mode 100644
index 0000000000..deb53ae313
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h
@@ -0,0 +1,181 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __RADEON_IOCTL_H__
+#define __RADEON_IOCTL_H__
+
+#include "main/simple_list.h"
+#include "radeon_lock.h"
+#include "radeon_bocs_wrapper.h"
+
+extern void radeonEmitVertexAOS( r100ContextPtr rmesa,
+				 GLuint vertex_size,
+				 struct radeon_bo *bo,
+				 GLuint offset );
+
+extern void radeonEmitVbufPrim( r100ContextPtr rmesa,
+				GLuint vertex_format,
+				GLuint primitive,
+				GLuint vertex_nr );
+
+extern void radeonFlushElts( GLcontext *ctx );
+			    
+
+extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa,
+					   GLuint vertex_format,
+					   GLuint primitive,
+					   GLuint min_nr );
+
+
+extern void radeonEmitAOS( r100ContextPtr rmesa,
+			   GLuint n,
+			   GLuint offset );
+
+extern void radeonEmitBlit( r100ContextPtr rmesa,
+			    GLuint color_fmt,
+			    GLuint src_pitch,
+			    GLuint src_offset,
+			    GLuint dst_pitch,
+			    GLuint dst_offset,
+			    GLint srcx, GLint srcy,
+			    GLint dstx, GLint dsty,
+			    GLuint w, GLuint h );
+
+extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags );
+
+extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * );
+
+extern void radeonFlush( GLcontext *ctx );
+extern void radeonFinish( GLcontext *ctx );
+extern void radeonInitIoctlFuncs( GLcontext *ctx );
+extern void radeonGetAllParams( r100ContextPtr rmesa );
+extern void radeonSetUpAtomList( r100ContextPtr rmesa );
+
+/* ================================================================
+ * Helper macros:
+ */
+
+/* Close off the last primitive, if it exists.
+ */
+#define RADEON_NEWPRIM( rmesa )			\
+do {						\
+   if ( rmesa->radeon.dma.flush )			\
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	\
+} while (0)
+
+/* Can accomodate several state changes and primitive changes without
+ * actually firing the buffer.
+ */
+
+#define RADEON_STATECHANGE( rmesa, ATOM )			\
+do {								\
+   RADEON_NEWPRIM( rmesa );					\
+   rmesa->hw.ATOM.dirty = GL_TRUE;				\
+   rmesa->radeon.hw.is_dirty = GL_TRUE;				\
+} while (0)
+
+#define RADEON_DB_STATE( ATOM )				\
+   memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd,	\
+	   rmesa->hw.ATOM.cmd_size * 4)
+
+static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa,
+					struct radeon_state_atom *atom )
+{
+   if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
+      GLuint *tmp;
+      RADEON_NEWPRIM( rmesa );
+      atom->dirty = GL_TRUE;
+      rmesa->radeon.hw.is_dirty = GL_TRUE;
+      tmp = atom->cmd; 
+      atom->cmd = atom->lastcmd;
+      atom->lastcmd = tmp;
+      return 1;
+   }
+   else
+      return 0;
+}
+
+/* Command lengths.  Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ
+ * are available, you will also be adding an rmesa->state.max_state_size because
+ * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts.
+ */
+#if RADEON_OLD_PACKETS
+#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2))+nr*2)
+#define VERT_AOS_BUFSZ	(0)
+#define ELTS_BUFSZ(nr)	(24 + nr * 2)
+#define VBUF_BUFSZ	(8)
+#else
+#define AOS_BUFSZ(nr)	((3 + ((nr / 2) * 3) + ((nr & 1) * 2) + nr*2))
+#define VERT_AOS_BUFSZ	(5)
+#define ELTS_BUFSZ(nr)	(16 + nr * 2)
+#define VBUF_BUFSZ	(4)
+#endif
+#define SCISSOR_BUFSZ	(8)
+#define INDEX_BUFSZ	(7)
+
+
+static inline uint32_t cmdpacket3(int cmd_type)
+{
+  drm_radeon_cmd_header_t cmd;
+
+  cmd.i = 0;
+  cmd.header.cmd_type = cmd_type;
+
+  return (uint32_t)cmd.i;
+
+}
+
+#define OUT_BATCH_PACKET3(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3));				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do {	      \
+    if (!b_l_rmesa->radeonScreen->kernel_mm) {		      \
+      OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP));	      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    } else {						      \
+      OUT_BATCH(CP_PACKET2);				      \
+      OUT_BATCH(CP_PACKET3((packet), (num_extra)));	      \
+    }							      \
+  } while(0)
+
+
+#endif /* __RADEON_IOCTL_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c
new file mode 100644
index 0000000000..7b6bd36dcf
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.c
@@ -0,0 +1,166 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "dri_util.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
+#include "radeon_lock.h"
+
+/* Update the hardware state.  This is called if another context has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void radeonGetLock(radeonContextPtr rmesa, GLuint flags)
+{
+	__DRIdrawable *const drawable = radeon_get_drawable(rmesa);
+	__DRIdrawable *const readable = radeon_get_readable(rmesa);
+	__DRIscreen *sPriv = rmesa->dri.screen;
+
+	drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags);
+
+	/* The window might have moved, so we might need to get new clip
+	 * rects.
+	 *
+	 * NOTE: This releases and regrabs the hw lock to allow the X server
+	 * to respond to the DRI protocol request for new drawable info.
+	 * Since the hardware state depends on having the latest drawable
+	 * clip rects, all state checking must be done _after_ this call.
+	 */
+	if (drawable)
+		DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+	if (readable && drawable != readable) {
+		DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
+	}
+
+	if (drawable && (rmesa->lastStamp != drawable->lastStamp)) {
+		radeon_window_moved(rmesa);
+		rmesa->lastStamp = drawable->lastStamp;
+	}
+
+	rmesa->vtbl.get_lock(rmesa);
+}
+#ifndef NDEBUG
+struct lock_debug {
+	const char* function;
+	const char* file;
+	int line;
+};
+
+static struct lock_debug ldebug = {0};
+#endif
+
+#if 0
+/** TODO: use atomic operations for reference counting **/
+/** gcc 4.2 has builtin functios for this **/
+#define ATOMIC_INC_AND_FETCH(atomic) __sync_add_and_fetch(&atomic, 1)
+#define ATOMIC_DEC_AND_FETCH(atomic) __sync_sub_and_fetch(&atomic, 1)
+#else
+#define ATOMIC_INC_AND_FETCH(atomic) (++atomic)
+#define ATOMIC_DEC_AND_FETCH(atomic) (--atomic)
+#endif
+
+
+void radeon_lock_hardware(radeonContextPtr radeon
+#ifndef NDEBUG
+		,const char* function
+		,const char* file
+		,const int line
+#endif
+		)
+{
+	char ret = 0;
+	struct radeon_framebuffer *rfb = NULL;
+	struct radeon_renderbuffer *rrb = NULL;
+
+	if (radeon_get_drawable(radeon)) {
+		rfb = radeon_get_drawable(radeon)->driverPrivate;
+
+		if (rfb)
+			rrb = radeon_get_renderbuffer(&rfb->base,
+						      rfb->base._ColorDrawBufferIndexes[0]);
+	}
+
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+		if (ATOMIC_INC_AND_FETCH(radeon->dri.hwLockCount) > 1)
+		{
+#ifndef NDEBUG
+			if ( RADEON_DEBUG & RADEON_SANITY )
+				fprintf(stderr, "*** %d times of recursive call to %s ***\n"
+						"Original call was from %s (file: %s line: %d)\n"
+						"Now call is coming from %s (file: %s line: %d)\n"
+						, radeon->dri.hwLockCount, __FUNCTION__
+						, ldebug.function, ldebug.file, ldebug.line
+						, function, file, line
+					   );
+#endif
+			return;
+		}
+		DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext,
+			 (DRM_LOCK_HELD | radeon->dri.hwContext), ret );
+		if (ret)
+			radeonGetLock(radeon, 0);
+#ifndef NDEBUG
+		ldebug.function = function;
+		ldebug.file = file;
+		ldebug.line = line;
+#endif
+	}
+}
+
+void radeon_unlock_hardware(radeonContextPtr radeon)
+{
+	if (!radeon->radeonScreen->driScreen->dri2.enabled) {
+		if (ATOMIC_DEC_AND_FETCH(radeon->dri.hwLockCount) > 0)
+		{
+			return;
+		}
+		DRM_UNLOCK( radeon->dri.fd,
+			    radeon->dri.hwLock,
+			    radeon->dri.hwContext );
+	}
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h
new file mode 100644
index 0000000000..da5a5b4371
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_lock.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ */
+
+#ifndef COMMON_LOCK_H
+#define COMMON_LOCK_H
+
+#include "main/colormac.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
+
+extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags);
+
+void radeon_lock_hardware(radeonContextPtr rmesa
+#ifndef NDEBUG
+		,const char* function
+		,const char* file
+		,const int line
+#endif
+		);
+void radeon_unlock_hardware(radeonContextPtr rmesa);
+
+/* Lock the hardware and validate our state.
+ */
+#ifdef NDEBUG
+#define LOCK_HARDWARE( rmesa )	radeon_lock_hardware(rmesa)
+#else
+#define LOCK_HARDWARE( rmesa )	radeon_lock_hardware(rmesa, __FUNCTION__, __FILE__, __LINE__)
+#endif
+#define UNLOCK_HARDWARE( rmesa )  radeon_unlock_hardware(rmesa)
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos.c b/src/mesa/drivers/dri/radeon/radeon_maos.c
new file mode 100644
index 0000000000..ea1e8934de
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos.c
@@ -0,0 +1,12 @@
+
+
+/* If using new packets, can choose either verts or arrays.
+ * Otherwise, must use verts.
+ */
+#include "radeon_context.h"
+#define RADEON_MAOS_VERTS 0
+#if (RADEON_MAOS_VERTS) || (RADEON_OLD_PACKETS)
+#include "radeon_maos_verts.c"
+#else
+#include "radeon_maos_arrays.c"
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos.h b/src/mesa/drivers/dri/radeon/radeon_maos.h
new file mode 100644
index 0000000000..b88eb198d5
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos.h
@@ -0,0 +1,42 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __RADEON_MAOS_H__
+#define __RADEON_MAOS_H__
+
+#include "radeon_context.h"
+
+extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs );
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
new file mode 100644
index 0000000000..d810e6080e
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c
@@ -0,0 +1,330 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Cedar Park, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+#include "radeon_tcl.h"
+
+static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos,
+			GLvoid *data, int stride, int count)
+{
+   int i;
+   uint32_t *out;
+   int size = 1;
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   if (stride == 0) {
+      radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
+      count = 1;
+      aos->stride = 0;
+   }
+   else {
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
+      aos->stride = size;
+   }
+
+   aos->components = size;
+   aos->count = count;
+
+
+   /* Emit the data
+    */
+   radeon_bo_map(aos->bo, 1);
+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+   for (i = 0; i < count; i++) {
+      out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
+      out++;
+      data += stride;
+   }
+   radeon_bo_unmap(aos->bo);
+}
+
+static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count)
+{
+   int i;
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = 0;
+      out += 2;
+      data += stride;
+   }
+}
+
+static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count)
+{
+   int i;
+
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s count %d stride %d\n",
+	      __FUNCTION__, count, stride);
+
+   for (i = 0; i < count; i++) {
+      out[0] = *(int *)data;
+      out[1] = *(int *)(data+4);
+      out[2] = *(int *)(data+12);
+      out += 3;
+      data += stride;
+   }
+}
+
+
+
+
+static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos,
+			    GLvoid *data, int size, int stride, int count)
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   int emitsize;
+   uint32_t *out;
+
+   if (RADEON_DEBUG & RADEON_VERTS)
+      fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size);
+
+   switch (size) {
+   case 4: emitsize = 3; break;
+   case 3: emitsize = 3; break;
+   default: emitsize = 2; break;
+   }
+
+
+   if (stride == 0) {
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32);
+      count = 1;
+      aos->stride = 0;
+   }
+   else {
+      radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32);
+      aos->stride = emitsize;
+   }
+
+   aos->components = emitsize;
+   aos->count = count;
+
+   /* Emit the data
+    */
+   radeon_bo_map(aos->bo, 1);
+   out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
+   switch (size) {
+   case 1:
+      emit_s0_vec( out, data, stride, count );
+      break;
+   case 2:
+      radeonEmitVec8( out, data, stride, count );
+      break;
+   case 3:
+      radeonEmitVec12( out, data, stride, count );
+      break;
+   case 4:
+      emit_stq_vec( out, data, stride, count );
+      break;
+   default:
+      assert(0);
+      exit(1);
+      break;
+   }
+   radeon_bo_unmap(aos->bo);
+}
+
+
+
+
+/* Emit any changed arrays to new GART memory, re-emit a packet to
+ * update the arrays.  
+ */
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
+   GLuint nr = 0;
+   GLuint vfmt = 0;
+   GLuint count = VB->Count;
+   GLuint vtx, unit;
+   
+#if 0
+   if (RADEON_DEBUG & RADEON_VERTS)
+      _tnl_print_vert_flags( __FUNCTION__, inputs );
+#endif
+
+   if (1) {
+      if (!rmesa->tcl.obj.buf) 
+	rcommon_emit_vector( ctx, 
+			     &(rmesa->tcl.aos[nr]),
+			     (char *)VB->AttribPtr[_TNL_ATTRIB_POS]->data,
+			     VB->AttribPtr[_TNL_ATTRIB_POS]->size,
+			     VB->AttribPtr[_TNL_ATTRIB_POS]->stride,
+			     count);
+
+      switch( VB->AttribPtr[_TNL_ATTRIB_POS]->size ) {
+      case 4: vfmt |= RADEON_CP_VC_FRMT_W0;
+      case 3: vfmt |= RADEON_CP_VC_FRMT_Z;
+      case 2: vfmt |= RADEON_CP_VC_FRMT_XY;
+      default:
+         break;
+      }
+      nr++;
+   }
+   
+
+   if (inputs & VERT_BIT_NORMAL) {
+      if (!rmesa->tcl.norm.buf)
+	 rcommon_emit_vector( ctx, 
+			      &(rmesa->tcl.aos[nr]),
+			      (char *)VB->AttribPtr[_TNL_ATTRIB_NORMAL]->data,
+			      3,
+			      VB->AttribPtr[_TNL_ATTRIB_NORMAL]->stride,
+			      count);
+
+      vfmt |= RADEON_CP_VC_FRMT_N0;
+      nr++;
+   }
+
+   if (inputs & VERT_BIT_COLOR0) {
+      int emitsize;
+      if (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size == 4 &&
+	  (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride != 0 ||
+	   VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data[0][3] != 1.0)) {
+	 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR | RADEON_CP_VC_FRMT_FPALPHA;
+	 emitsize = 4;
+      }
+
+      else {
+	 vfmt |= RADEON_CP_VC_FRMT_FPCOLOR;
+	 emitsize = 3;
+      }
+
+      if (!rmesa->tcl.rgba.buf)
+	rcommon_emit_vector( ctx,
+			     &(rmesa->tcl.aos[nr]),
+			     (char *)VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data,
+			     emitsize,
+			     VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride,
+			     count);
+
+      nr++;
+   }
+
+
+   if (inputs & VERT_BIT_COLOR1) {
+      if (!rmesa->tcl.spec.buf) {
+
+	rcommon_emit_vector( ctx,
+			     &(rmesa->tcl.aos[nr]),
+			     (char *)VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data,
+			     3,
+			     VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride,
+			     count);
+      }
+
+      vfmt |= RADEON_CP_VC_FRMT_FPSPEC;
+      nr++;
+   }
+
+/* FIXME: not sure if this is correct. May need to stitch this together with
+   secondary color. It seems odd that for primary color color and alpha values
+   are emitted together but for secondary color not. */
+   if (inputs & VERT_BIT_FOG) {
+      if (!rmesa->tcl.fog.buf)
+	 emit_vecfog( ctx,
+		      &(rmesa->tcl.aos[nr]),
+		      (char *)VB->AttribPtr[_TNL_ATTRIB_FOG]->data,
+		      VB->AttribPtr[_TNL_ATTRIB_FOG]->stride,
+		      count);
+
+      vfmt |= RADEON_CP_VC_FRMT_FPFOG;
+      nr++;
+   }
+
+
+   vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+	  ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
+      
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (inputs & VERT_BIT_TEX(unit)) {
+	 if (!rmesa->tcl.tex[unit].buf)
+	    emit_tex_vector( ctx,
+			     &(rmesa->tcl.aos[nr]),
+			     (char *)VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->data,
+			     VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size,
+			     VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->stride,
+			     count );
+	 nr++;
+
+	 vfmt |= RADEON_ST_BIT(unit);
+         /* assume we need the 3rd coord if texgen is active for r/q OR at least
+	    3 coords are submitted. This may not be 100% correct */
+         if (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) {
+	    vtx |= RADEON_Q_BIT(unit);
+	    vfmt |= RADEON_Q_BIT(unit);
+	 }
+	 if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
+	    vtx |= RADEON_Q_BIT(unit);
+	 else if ((VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) &&
+	          ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
+	    GLuint swaptexmatcol = (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size - 3);
+	    if (((rmesa->NeedTexMatrix >> unit) & 1) &&
+		 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
+	       radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
+	 }
+      }
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   rmesa->tcl.nr_aos_components = nr;
+   rmesa->tcl.vertex_format = vfmt;
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
new file mode 100644
index 0000000000..d764ccb982
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_vbtmp.h
@@ -0,0 +1,300 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.1
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef LOCALVARS
+#define LOCALVARS
+#endif
+
+#undef TCL_DEBUG
+#ifndef TCL_DEBUG
+#define TCL_DEBUG 0
+#endif
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest )
+{
+   LOCALVARS
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint (*tc0)[4], (*tc1)[4], (*tc2)[4];
+   GLfloat (*col)[4], (*spec)[4];
+   GLfloat (*fog)[4];
+   GLuint (*norm)[4];
+   GLuint tc0_stride, tc1_stride, col_stride, spec_stride, fog_stride;
+   GLuint tc2_stride, norm_stride;
+   GLuint fill_tex = 0;
+   GLuint rqcoordsnoswap = 0;
+   GLuint (*coord)[4];
+   GLuint coord_stride; /* object coordinates */
+   int i;
+
+   union emit_union *v = (union emit_union *)dest;
+
+   radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __FUNCTION__);
+
+   coord = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_POS]->data;
+   coord_stride = VB->AttribPtr[_TNL_ATTRIB_POS]->stride;
+
+   if (DO_TEX2) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX2]) {
+	 const GLuint t2 = GET_TEXSOURCE(2);
+	 tc2 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->data;
+	 tc2_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->stride;
+	 if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->size < 3) {
+	    fill_tex |= (1<<2);
+	 }
+	 else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t2]->size < 4) {
+	    rqcoordsnoswap |= (1<<2);
+	 }
+      } else {
+	 tc2 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX2];
+	 tc2_stride = 0;
+      }
+   }
+
+   if (DO_TEX1) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX1]) {
+	 const GLuint t1 = GET_TEXSOURCE(1);
+	 tc1 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->data;
+	 tc1_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->stride;
+	 if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size < 3) {
+	    fill_tex |= (1<<1);
+	 }
+	 else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t1]->size < 4) {
+	    rqcoordsnoswap |= (1<<1);
+	 }
+      } else {
+	 tc1 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX1];
+	 tc1_stride = 0;
+      }
+   }
+
+   if (DO_TEX0) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX0]) {
+	 const GLuint t0 = GET_TEXSOURCE(0);
+	 tc0_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->stride;
+	 tc0 = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->data;
+	 if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size < 3) {
+	    fill_tex |= (1<<0);
+	 }
+	 else if (DO_PTEX && VB->AttribPtr[_TNL_ATTRIB_TEX0 + t0]->size < 4) {
+	    rqcoordsnoswap |= (1<<0);
+	 }
+      } else {
+	 tc0 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX0];
+	 tc0_stride = 0;
+      }
+	 
+   }
+
+   if (DO_NORM) {
+      if (VB->AttribPtr[_TNL_ATTRIB_NORMAL]) {
+	 norm_stride = VB->AttribPtr[_TNL_ATTRIB_NORMAL]->stride;
+	 norm = (GLuint (*)[4])VB->AttribPtr[_TNL_ATTRIB_NORMAL]->data;
+      } else {
+	 norm_stride = 0;
+	 norm = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_NORMAL];
+      }
+   }
+
+   if (DO_RGBA) {
+      if (VB->AttribPtr[_TNL_ATTRIB_COLOR0]) {
+	 col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;
+	 col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride;
+      } else {
+	 col = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
+	 col_stride = 0;
+      }
+   }
+
+   if (DO_SPEC_OR_FOG) {
+      if (VB->AttribPtr[_TNL_ATTRIB_COLOR1]) {
+	 spec = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->data;
+	 spec_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->stride;
+      } else {
+	 spec = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
+	 spec_stride = 0;
+      }
+   }
+
+   if (DO_SPEC_OR_FOG) {
+      if (VB->AttribPtr[_TNL_ATTRIB_FOG]) {
+	 fog = VB->AttribPtr[_TNL_ATTRIB_FOG]->data;
+	 fog_stride = VB->AttribPtr[_TNL_ATTRIB_FOG]->stride;
+      } else {
+	 fog = (GLfloat (*)[4])ctx->Current.Attrib[VERT_ATTRIB_FOG];
+	 fog_stride = 0;
+      }
+   }
+   
+   
+   if (start) {
+      coord =  (GLuint (*)[4])((GLubyte *)coord + start * coord_stride);
+      if (DO_TEX0)
+	 tc0 =  (GLuint (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+      if (DO_TEX1) 
+	 tc1 =  (GLuint (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+      if (DO_TEX2) 
+	 tc2 =  (GLuint (*)[4])((GLubyte *)tc2 + start * tc2_stride);
+      if (DO_NORM) 
+	 norm =  (GLuint (*)[4])((GLubyte *)norm + start * norm_stride);
+      if (DO_RGBA) 
+	 STRIDE_4F(col, start * col_stride);
+      if (DO_SPEC)
+	 STRIDE_4F(spec, start * spec_stride);
+      if (DO_FOG)
+	 STRIDE_4F(fog, start * fog_stride);
+   }
+
+
+   {
+      for (i=start; i < end; i++) {
+	 
+	 v[0].ui = coord[0][0];
+	 v[1].ui = coord[0][1];
+	 v[2].ui = coord[0][2];
+	 if (DO_W) {
+	    v[3].ui = coord[0][3];
+	    v += 4;
+	 } 
+	 else
+	    v += 3;
+	 coord =  (GLuint (*)[4])((GLubyte *)coord +  coord_stride);
+
+	 if (DO_NORM) {
+	    v[0].ui = norm[0][0];
+	    v[1].ui = norm[0][1];
+	    v[2].ui = norm[0][2];
+	    v += 3;
+	    norm =  (GLuint (*)[4])((GLubyte *)norm +  norm_stride);
+	 }
+	 if (DO_RGBA) {
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, col[0][0]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, col[0][1]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, col[0][2]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, col[0][3]);
+	    STRIDE_4F(col, col_stride);
+	    v++;
+	 }
+	 if (DO_SPEC_OR_FOG) {
+	    if (DO_SPEC) {
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.red, spec[0][0]);
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.green, spec[0][1]);
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.blue, spec[0][2]);
+	       STRIDE_4F(spec, spec_stride);
+	    }
+	    if (DO_FOG) {
+	       UNCLAMPED_FLOAT_TO_UBYTE(v[0].rgba.alpha, radeonComputeFogBlendFactor(ctx, fog[0][0]));
+	       STRIDE_4F(fog, fog_stride);
+	    }
+	    if (TCL_DEBUG) fprintf(stderr, "%x ", v[0].ui);
+	    v++;
+	 }
+	 if (DO_TEX0) {
+	    v[0].ui = tc0[0][0];
+	    v[1].ui = tc0[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t0: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       if (fill_tex & (1<<0))
+		  v[2].f = 1.0;
+	       else if (rqcoordsnoswap & (1<<0))
+		  v[2].ui = tc0[0][2];
+	       else
+		  v[2].ui = tc0[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc0 =  (GLuint (*)[4])((GLubyte *)tc0 +  tc0_stride);
+	 }
+	 if (DO_TEX1) {
+	    v[0].ui = tc1[0][0];
+	    v[1].ui = tc1[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t1: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       if (fill_tex & (1<<1))
+		  v[2].f = 1.0;
+	       else if (rqcoordsnoswap & (1<<1))
+		  v[2].ui = tc1[0][2];
+	       else
+		  v[2].ui = tc1[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc1 =  (GLuint (*)[4])((GLubyte *)tc1 +  tc1_stride);
+	 } 
+	 if (DO_TEX2) {
+	    v[0].ui = tc2[0][0];
+	    v[1].ui = tc2[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t2: %.2f %.2f ", v[0].f, v[1].f);
+	    if (DO_PTEX) {
+	       if (fill_tex & (1<<2))
+		  v[2].f = 1.0;
+	       else if (rqcoordsnoswap & (1<<2))
+		  v[2].ui = tc2[0][2];
+	       else
+		  v[2].ui = tc2[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
+	       v += 3;
+	    } 
+	    else
+	       v += 2;
+	    tc2 =  (GLuint (*)[4])((GLubyte *)tc2 +  tc2_stride);
+	 } 
+	 if (TCL_DEBUG) fprintf(stderr, "\n");
+      }
+   }
+}
+
+
+
+static void TAG(init)( void )
+{
+   int sz = 3;
+   if (DO_W) sz++;
+   if (DO_NORM) sz += 3;
+   if (DO_RGBA) sz++;
+   if (DO_SPEC_OR_FOG) sz++;
+   if (DO_TEX0) sz += 2;
+   if (DO_TEX0 && DO_PTEX) sz++;
+   if (DO_TEX1) sz += 2;
+   if (DO_TEX1 && DO_PTEX) sz++;
+   if (DO_TEX2) sz += 2;
+   if (DO_TEX2 && DO_PTEX) sz++;
+
+   setup_tab[IDX].emit = TAG(emit);
+   setup_tab[IDX].vertex_format = IND;
+   setup_tab[IDX].vertex_size = sz;
+}
+
+
+#undef IND
+#undef TAG
+#undef IDX
diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
new file mode 100644
index 0000000000..98f96ff2a7
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c
@@ -0,0 +1,433 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "vbo/vbo.h"
+#include "math/m_translate.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "math/m_translate.h"
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+
+
+#define RADEON_TCL_MAX_SETUP 19
+
+union emit_union { float f; GLuint ui; radeon_color_t rgba; };
+
+static struct {
+   void   (*emit)( GLcontext *, GLuint, GLuint, void * );
+   GLuint vertex_size;
+   GLuint vertex_format;
+} setup_tab[RADEON_TCL_MAX_SETUP];
+
+#define DO_W    (IND & RADEON_CP_VC_FRMT_W0)
+#define DO_RGBA (IND & RADEON_CP_VC_FRMT_PKCOLOR)
+#define DO_SPEC_OR_FOG (IND & RADEON_CP_VC_FRMT_PKSPEC)
+#define DO_SPEC ((IND & RADEON_CP_VC_FRMT_PKSPEC) && \
+		 (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR))
+#define DO_FOG  ((IND & RADEON_CP_VC_FRMT_PKSPEC) && ctx->Fog.Enabled && \
+		 (ctx->Fog.FogCoordinateSource == GL_FOG_COORD))
+#define DO_TEX0 (IND & RADEON_CP_VC_FRMT_ST0)
+#define DO_TEX1 (IND & RADEON_CP_VC_FRMT_ST1)
+#define DO_TEX2 (IND & RADEON_CP_VC_FRMT_ST2)
+#define DO_PTEX (IND & RADEON_CP_VC_FRMT_Q0)
+#define DO_NORM (IND & RADEON_CP_VC_FRMT_N0)
+
+#define DO_TEX3 0
+
+#define GET_TEXSOURCE(n)  n
+
+/***********************************************************************
+ *             Generate vertex emit functions               *
+ ***********************************************************************/
+
+
+/* Defined in order of increasing vertex size:
+ */
+#define IDX 0
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR)
+#define TAG(x) x##_rgba
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 1
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 2
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0)
+#define TAG(x) x##_rgba_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 3
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 4
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 5
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 6
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 7
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1)
+#define TAG(x) x##_rgba_spec_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 8
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 9
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_spec_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 10
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 11
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0)
+#define TAG(x) x##_rgba_stq_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 12
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_W0|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgba_spec_stq_stq_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 13
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2)
+#define TAG(x) x##_rgba_st_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 14
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2)
+#define TAG(x) x##_rgba_spec_st_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 15
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 16
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_spec_st_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 17
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_Q2)
+#define TAG(x) x##_rgba_stq_stq_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 18
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_W0|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_Q2|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgba_spec_stq_stq_stq_n
+#include "radeon_maos_vbtmp.h"
+
+
+
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+
+static void init_tcl_verts( void )
+{
+   init_rgba();
+   init_n();
+   init_rgba_n();
+   init_rgba_st();
+   init_st_n();
+   init_rgba_st_st();
+   init_rgba_st_n();
+   init_rgba_spec_st_st();
+   init_st_st_n();
+   init_rgba_spec_st_st_n();
+   init_rgba_stq();
+   init_rgba_stq_stq();
+   init_w_rgba_spec_stq_stq_n();
+   init_rgba_st_st_st();
+   init_rgba_spec_st_st_st();
+   init_st_st_st_n();
+   init_rgba_spec_st_st_st_n();
+   init_rgba_stq_stq_stq();
+   init_w_rgba_spec_stq_stq_stq_n();
+}
+
+
+void radeonEmitArrays( GLcontext *ctx, GLuint inputs )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint req = 0;
+   GLuint unit;
+   GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
+		 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
+   int i;
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_tcl_verts();
+      firsttime = 0;
+   }
+
+   if (1) {
+      req |= RADEON_CP_VC_FRMT_Z;
+      if (VB->AttribPtr[_TNL_ATTRIB_POS]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_W0;
+      }
+   }
+
+   if (inputs & VERT_BIT_NORMAL) {
+      req |= RADEON_CP_VC_FRMT_N0;
+   }
+
+   if (inputs & VERT_BIT_COLOR0) {
+      req |= RADEON_CP_VC_FRMT_PKCOLOR;
+   }
+
+   if (inputs & (VERT_BIT_COLOR1|VERT_BIT_FOG)) {
+      req |= RADEON_CP_VC_FRMT_PKSPEC;
+   }
+
+   for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (inputs & VERT_BIT_TEX(unit)) {
+	 req |= RADEON_ST_BIT(unit);
+	 /* assume we need the 3rd coord if texgen is active for r/q OR at least
+	    3 coords are submitted. This may not be 100% correct */
+	 if (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) {
+	    req |= RADEON_Q_BIT(unit);
+	    vtx |= RADEON_Q_BIT(unit);
+	 }
+	 if ( (ctx->Texture.Unit[unit].TexGenEnabled & (R_BIT | Q_BIT)) )
+	    vtx |= RADEON_Q_BIT(unit);
+	 else if ((VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size >= 3) &&
+	          ((ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_CUBE_BIT)) == 0)) {
+	    GLuint swaptexmatcol = (VB->AttribPtr[_TNL_ATTRIB_TEX0 + unit]->size - 3);
+	    if (((rmesa->NeedTexMatrix >> unit) & 1) &&
+		 (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1)))
+	       radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ;
+	 }
+      }
+   }
+
+   if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
+   }
+
+   for (i = 0 ; i < RADEON_TCL_MAX_SETUP ; i++) 
+      if ((setup_tab[i].vertex_format & req) == req) 
+	 break;
+
+   if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format &&
+       rmesa->radeon.tcl.aos[0].bo)
+      return;
+
+   if (rmesa->radeon.tcl.aos[0].bo)
+      radeonReleaseArrays( ctx, ~0 );
+
+   radeonAllocDmaRegion( &rmesa->radeon,
+			 &rmesa->radeon.tcl.aos[0].bo,
+			 &rmesa->radeon.tcl.aos[0].offset,
+			 VB->Count * setup_tab[i].vertex_size * 4, 
+			 4);
+
+   /* The vertex code expects Obj to be clean to element 3.  To fix
+    * this, add more vertex code (for obj-2, obj-3) or preferably move
+    * to maos.  
+    */
+   if (VB->AttribPtr[_TNL_ATTRIB_POS]->size < 3 ||
+       (VB->AttribPtr[_TNL_ATTRIB_POS]->size == 3 &&
+	(setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0))) {
+
+      _math_trans_4f( rmesa->tcl.ObjClean.data,
+		      VB->AttribPtr[_TNL_ATTRIB_POS]->data,
+		      VB->AttribPtr[_TNL_ATTRIB_POS]->stride,
+		      GL_FLOAT,
+		      VB->AttribPtr[_TNL_ATTRIB_POS]->size,
+		      0,
+		      VB->Count );
+
+      switch (VB->AttribPtr[_TNL_ATTRIB_POS]->size) {
+      case 1:
+	    _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 1);
+      case 2:
+	    _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 2);
+      case 3:
+	 if (setup_tab[i].vertex_format & RADEON_CP_VC_FRMT_W0) {
+	    _mesa_vector4f_clean_elem(&rmesa->tcl.ObjClean, VB->Count, 3);
+	 }
+      case 4:
+      default:
+	 break;
+      }
+
+      VB->AttribPtr[_TNL_ATTRIB_POS] = &rmesa->tcl.ObjClean;
+   }
+
+
+   radeon_bo_map(rmesa->radeon.tcl.aos[0].bo, 1);
+   setup_tab[i].emit( ctx, 0, VB->Count, 
+		      rmesa->radeon.tcl.aos[0].bo->ptr + rmesa->radeon.tcl.aos[0].offset);
+   radeon_bo_unmap(rmesa->radeon.tcl.aos[0].bo);
+   //   rmesa->radeon.tcl.aos[0].size = setup_tab[i].vertex_size;
+   rmesa->radeon.tcl.aos[0].stride = setup_tab[i].vertex_size;
+   rmesa->tcl.vertex_format = setup_tab[i].vertex_format;
+   rmesa->radeon.tcl.aos_count = 1;
+}
+
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
new file mode 100644
index 0000000000..6cd1d87de2
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -0,0 +1,658 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora.
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_mipmap_tree.h"
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "main/simple_list.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "radeon_texture.h"
+#include "radeon_tile.h"
+
+static unsigned get_aligned_compressed_row_stride(
+		gl_format format,
+		unsigned width,
+		unsigned minStride)
+{
+	const unsigned blockBytes = _mesa_get_format_bytes(format);
+	unsigned blockWidth, blockHeight;
+	unsigned stride;
+
+	_mesa_get_format_block_size(format, &blockWidth, &blockHeight);
+
+	/* Count number of blocks required to store the given width.
+	 * And then multiple it with bytes required to store a block.
+	 */
+	stride = (width + blockWidth - 1) / blockWidth * blockBytes;
+
+	/* Round the given minimum stride to the next full blocksize.
+	 * (minStride + blockBytes - 1) / blockBytes * blockBytes
+	 */
+	if ( stride < minStride )
+		stride = (minStride + blockBytes - 1) / blockBytes * blockBytes;
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+			"%s width %u, minStride %u, block(bytes %u, width %u):"
+			"stride %u\n",
+			__func__, width, minStride,
+			blockBytes, blockWidth,
+			stride);
+
+	return stride;
+}
+
+unsigned get_texture_image_size(
+		gl_format format,
+		unsigned rowStride,
+		unsigned height,
+		unsigned depth,
+		unsigned tiling)
+{
+	if (_mesa_is_format_compressed(format)) {
+		unsigned blockWidth, blockHeight;
+
+		_mesa_get_format_block_size(format, &blockWidth, &blockHeight);
+
+		return rowStride * ((height + blockHeight - 1) / blockHeight) * depth;
+	} else if (tiling) {
+		/* Need to align height to tile height */
+		unsigned tileWidth, tileHeight;
+
+		get_tile_size(format, &tileWidth, &tileHeight);
+		tileHeight--;
+
+		height = (height + tileHeight) & ~tileHeight;
+	}
+
+	return rowStride * height * depth;
+}
+
+unsigned get_texture_image_row_stride(radeonContextPtr rmesa, gl_format format, unsigned width, unsigned tiling)
+{
+	if (_mesa_is_format_compressed(format)) {
+		return get_aligned_compressed_row_stride(format, width, rmesa->texture_compressed_row_align);
+	} else {
+		unsigned row_align;
+
+		if (!_mesa_is_pow_two(width)) {
+			row_align = rmesa->texture_rect_row_align - 1;
+		} else if (tiling) {
+			unsigned tileWidth, tileHeight;
+			get_tile_size(format, &tileWidth, &tileHeight);
+			row_align = tileWidth * _mesa_get_format_bytes(format) - 1;
+		} else {
+			row_align = rmesa->texture_row_align - 1;
+		}
+
+		return (_mesa_format_row_stride(format, width) + row_align) & ~row_align;
+	}
+}
+
+/**
+ * Compute sizes and fill in offset and blit information for the given
+ * image (determined by \p face and \p level).
+ *
+ * \param curOffset points to the offset at which the image is to be stored
+ * and is updated by this function according to the size of the image.
+ */
+static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt,
+	GLuint face, GLuint level, GLuint* curOffset)
+{
+	radeon_mipmap_level *lvl = &mt->levels[level];
+	GLuint height;
+
+	height = _mesa_next_pow_two_32(lvl->height);
+
+	lvl->rowstride = get_texture_image_row_stride(rmesa, mt->mesaFormat, lvl->width, mt->tilebits);
+	lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, lvl->height, lvl->depth, mt->tilebits);
+
+	assert(lvl->size > 0);
+
+	lvl->faces[face].offset = *curOffset;
+	*curOffset += lvl->size;
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+			"%s(%p) level %d, face %d: rs:%d %dx%d at %d\n",
+			__func__, rmesa,
+			level, face,
+			lvl->rowstride, lvl->width, height, lvl->faces[face].offset);
+}
+
+static GLuint minify(GLuint size, GLuint levels)
+{
+	size = size >> levels;
+	if (size < 1)
+		size = 1;
+	return size;
+}
+
+
+static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
+{
+	GLuint curOffset, i, face, level;
+
+	assert(mt->numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+
+	curOffset = 0;
+	for(face = 0; face < mt->faces; face++) {
+
+		for(i = 0, level = mt->baseLevel; i < mt->numLevels; i++, level++) {
+			mt->levels[level].valid = 1;
+			mt->levels[level].width = minify(mt->width0, i);
+			mt->levels[level].height = minify(mt->height0, i);
+			mt->levels[level].depth = minify(mt->depth0, i);
+			compute_tex_image_offset(rmesa, mt, face, level, &curOffset);
+		}
+	}
+
+	/* Note the required size in memory */
+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+			"%s(%p, %p) total size %d\n",
+			__func__, rmesa, mt, mt->totalsize);
+}
+
+static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt)
+{
+	GLuint curOffset, i, level;
+
+	assert(mt->numLevels <= rmesa->glCtx->Const.MaxTextureLevels);
+
+	curOffset = 0;
+	for(i = 0, level = mt->baseLevel; i < mt->numLevels; i++, level++) {
+		GLuint face;
+
+		mt->levels[level].valid = 1;
+		mt->levels[level].width = minify(mt->width0, i);
+		mt->levels[level].height = minify(mt->height0, i);
+		mt->levels[level].depth = minify(mt->depth0, i);
+
+		for(face = 0; face < mt->faces; face++)
+			compute_tex_image_offset(rmesa, mt, face, level, &curOffset);
+		/* r600 cube levels seems to be aligned to 8 faces but
+		 * we have separate register for 1'st level offset so add
+		 * 2 image alignment after 1'st mip level */
+		if(rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R600 &&
+		   mt->target == GL_TEXTURE_CUBE_MAP && level >= 1)
+			curOffset += 2 * mt->levels[level].size;
+	}
+
+	/* Note the required size in memory */
+	mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK;
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+			"%s(%p, %p) total size %d\n",
+			__func__, rmesa, mt, mt->totalsize);
+}
+
+/**
+ * Create a new mipmap tree, calculate its layout and allocate memory.
+ */
+static radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa,
+		GLenum target, gl_format mesaFormat, GLuint baseLevel, GLuint numLevels,
+		GLuint width0, GLuint height0, GLuint depth0, GLuint tilebits)
+{
+	radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree);
+
+	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+		"%s(%p) new tree is %p.\n",
+		__func__, rmesa, mt);
+
+	mt->mesaFormat = mesaFormat;
+	mt->refcount = 1;
+	mt->target = target;
+	mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+	mt->baseLevel = baseLevel;
+	mt->numLevels = numLevels;
+	mt->width0 = width0;
+	mt->height0 = height0;
+	mt->depth0 = depth0;
+	mt->tilebits = tilebits;
+
+	if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300)
+		calculate_miptree_layout_r300(rmesa, mt);
+	else
+		calculate_miptree_layout_r100(rmesa, mt);
+
+	mt->bo = radeon_bo_open(rmesa->radeonScreen->bom,
+                            0, mt->totalsize, 1024,
+                            RADEON_GEM_DOMAIN_VRAM,
+                            0);
+
+	return mt;
+}
+
+void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr)
+{
+	assert(!*ptr);
+
+	mt->refcount++;
+	assert(mt->refcount > 0);
+
+	*ptr = mt;
+}
+
+void radeon_miptree_unreference(radeon_mipmap_tree **ptr)
+{
+	radeon_mipmap_tree *mt = *ptr;
+	if (!mt)
+		return;
+
+	assert(mt->refcount > 0);
+
+	mt->refcount--;
+	if (!mt->refcount) {
+		radeon_bo_unref(mt->bo);
+		free(mt);
+	}
+
+	*ptr = 0;
+}
+
+/**
+ * Calculate min and max LOD for the given texture object.
+ * @param[in] tObj texture object whose LOD values to calculate
+ * @param[out] pminLod minimal LOD
+ * @param[out] pmaxLod maximal LOD
+ */
+static void calculate_min_max_lod(struct gl_texture_object *tObj,
+				       unsigned *pminLod, unsigned *pmaxLod)
+{
+	int minLod, maxLod;
+	/* Yes, this looks overly complicated, but it's all needed.
+	*/
+	switch (tObj->Target) {
+	case GL_TEXTURE_1D:
+	case GL_TEXTURE_2D:
+	case GL_TEXTURE_3D:
+	case GL_TEXTURE_CUBE_MAP:
+		if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) {
+			/* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL.
+			*/
+			minLod = maxLod = tObj->BaseLevel;
+		} else {
+			minLod = tObj->BaseLevel + (GLint)(tObj->MinLod);
+			minLod = MAX2(minLod, tObj->BaseLevel);
+			minLod = MIN2(minLod, tObj->MaxLevel);
+			maxLod = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5);
+			maxLod = MIN2(maxLod, tObj->MaxLevel);
+			maxLod = MIN2(maxLod, tObj->Image[0][minLod]->MaxLog2 + minLod);
+			maxLod = MAX2(maxLod, minLod); /* need at least one level */
+		}
+		break;
+	case GL_TEXTURE_RECTANGLE_NV:
+	case GL_TEXTURE_4D_SGIS:
+		minLod = maxLod = 0;
+		break;
+	default:
+		return;
+	}
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+			"%s(%p) target %s, min %d, max %d.\n",
+			__func__, tObj,
+			_mesa_lookup_enum_by_nr(tObj->Target),
+			minLod, maxLod);
+
+	/* save these values */
+	*pminLod = minLod;
+	*pmaxLod = maxLod;
+}
+
+/**
+ * Checks whether the given miptree can hold the given texture image at the
+ * given face and level.
+ */
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+		struct gl_texture_image *texImage, GLuint face, GLuint level)
+{
+	radeon_mipmap_level *lvl;
+
+	if (face >= mt->faces)
+		return GL_FALSE;
+
+	if (texImage->TexFormat != mt->mesaFormat)
+		return GL_FALSE;
+
+	lvl = &mt->levels[level];
+	if (!lvl->valid ||
+	    lvl->width != texImage->Width ||
+	    lvl->height != texImage->Height ||
+	    lvl->depth != texImage->Depth)
+		return GL_FALSE;
+
+	return GL_TRUE;
+}
+
+/**
+ * Checks whether the given miptree has the right format to store the given texture object.
+ */
+static GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj)
+{
+	struct gl_texture_image *firstImage;
+	unsigned numLevels;
+	radeon_mipmap_level *mtBaseLevel;
+
+	if (texObj->BaseLevel < mt->baseLevel)
+		return GL_FALSE;
+
+	mtBaseLevel = &mt->levels[texObj->BaseLevel - mt->baseLevel];
+	firstImage = texObj->Image[0][texObj->BaseLevel];
+	numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, firstImage->MaxLog2 + 1);
+
+	if (radeon_is_debug_enabled(RADEON_TEXTURE,RADEON_TRACE)) {
+		fprintf(stderr, "Checking if miptree %p matches texObj %p\n", mt, texObj);
+		fprintf(stderr, "target %d vs %d\n", mt->target, texObj->Target);
+		fprintf(stderr, "format %d vs %d\n", mt->mesaFormat, firstImage->TexFormat);
+		fprintf(stderr, "numLevels %d vs %d\n", mt->numLevels, numLevels);
+		fprintf(stderr, "width0 %d vs %d\n", mtBaseLevel->width, firstImage->Width);
+		fprintf(stderr, "height0 %d vs %d\n", mtBaseLevel->height, firstImage->Height);
+		fprintf(stderr, "depth0 %d vs %d\n", mtBaseLevel->depth, firstImage->Depth);
+		if (mt->target == texObj->Target &&
+	        mt->mesaFormat == firstImage->TexFormat &&
+	        mt->numLevels >= numLevels &&
+	        mtBaseLevel->width == firstImage->Width &&
+	        mtBaseLevel->height == firstImage->Height &&
+	        mtBaseLevel->depth == firstImage->Depth) {
+			fprintf(stderr, "MATCHED\n");
+		} else {
+			fprintf(stderr, "NOT MATCHED\n");
+		}
+	}
+
+	return (mt->target == texObj->Target &&
+	        mt->mesaFormat == firstImage->TexFormat &&
+	        mt->numLevels >= numLevels &&
+	        mtBaseLevel->width == firstImage->Width &&
+	        mtBaseLevel->height == firstImage->Height &&
+	        mtBaseLevel->depth == firstImage->Depth);
+}
+
+/**
+ * Try to allocate a mipmap tree for the given texture object.
+ * @param[in] rmesa radeon context
+ * @param[in] t radeon texture object
+ */
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t)
+{
+	struct gl_texture_object *texObj = &t->base;
+	struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
+	GLuint numLevels;
+
+	assert(!t->mt);
+
+	if (!texImg) {
+		radeon_warning("%s(%p) No image in given texture object(%p).\n",
+				__func__, rmesa, t);
+		return;
+	}
+
+
+	numLevels = MIN2(texObj->MaxLevel - texObj->BaseLevel + 1, texImg->MaxLog2 + 1);
+
+	t->mt = radeon_miptree_create(rmesa, t->base.Target,
+		texImg->TexFormat, texObj->BaseLevel,
+		numLevels, texImg->Width, texImg->Height,
+		texImg->Depth, t->tile_bits);
+}
+
+GLuint
+radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+			    GLuint face, GLuint level)
+{
+	if (mt->target == GL_TEXTURE_CUBE_MAP_ARB)
+		return (mt->levels[level].faces[face].offset);
+	else
+		return mt->levels[level].faces[0].offset;
+}
+
+/**
+ * Ensure that the given image is stored in the given miptree from now on.
+ */
+static void migrate_image_to_miptree(radeon_mipmap_tree *mt,
+									 radeon_texture_image *image,
+									 int face, int level)
+{
+	radeon_mipmap_level *dstlvl = &mt->levels[level];
+	unsigned char *dest;
+
+	assert(image->mt != mt);
+	assert(dstlvl->valid);
+	assert(dstlvl->width == image->base.Width);
+	assert(dstlvl->height == image->base.Height);
+	assert(dstlvl->depth == image->base.Depth);
+
+	radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s miptree %p, image %p, face %d, level %d.\n",
+			__func__, mt, image, face, level);
+
+	radeon_bo_map(mt->bo, GL_TRUE);
+	dest = mt->bo->ptr + dstlvl->faces[face].offset;
+
+	if (image->mt) {
+		/* Format etc. should match, so we really just need a memcpy().
+		 * In fact, that memcpy() could be done by the hardware in many
+		 * cases, provided that we have a proper memory manager.
+		 */
+		assert(mt->mesaFormat == image->base.TexFormat);
+
+		radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel];
+
+		assert(image->mtlevel == level);
+		assert(srclvl->size == dstlvl->size);
+		assert(srclvl->rowstride == dstlvl->rowstride);
+
+		radeon_bo_map(image->mt->bo, GL_FALSE);
+
+		memcpy(dest,
+			image->mt->bo->ptr + srclvl->faces[face].offset,
+			dstlvl->size);
+		radeon_bo_unmap(image->mt->bo);
+
+		radeon_miptree_unreference(&image->mt);
+	} else if (image->base.Data) {
+		/* This condition should be removed, it's here to workaround
+		 * a segfault when mapping textures during software fallbacks.
+		 */
+		radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+				"%s Trying to map texture in sowftware fallback.\n",
+				__func__);
+		const uint32_t srcrowstride = _mesa_format_row_stride(image->base.TexFormat, image->base.Width);
+		uint32_t rows = image->base.Height * image->base.Depth;
+
+		if (_mesa_is_format_compressed(image->base.TexFormat)) {
+			uint32_t blockWidth, blockHeight;
+			_mesa_get_format_block_size(image->base.TexFormat, &blockWidth, &blockHeight);
+			rows = (rows + blockHeight - 1) / blockHeight;
+		}
+
+		copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride,
+				  rows, srcrowstride);
+
+		_mesa_free_texmemory(image->base.Data);
+		image->base.Data = 0;
+	}
+
+	radeon_bo_unmap(mt->bo);
+
+	radeon_miptree_reference(mt, &image->mt);
+	image->mtface = face;
+	image->mtlevel = level;
+}
+
+/**
+ * Filter matching miptrees, and select one with the most of data.
+ * @param[in] texObj radeon texture object
+ * @param[in] firstLevel first texture level to check
+ * @param[in] lastLevel last texture level to check
+ */
+static radeon_mipmap_tree * get_biggest_matching_miptree(radeonTexObj *texObj,
+														 unsigned firstLevel,
+														 unsigned lastLevel)
+{
+	const unsigned numLevels = lastLevel - firstLevel + 1;
+	unsigned *mtSizes = calloc(numLevels, sizeof(unsigned));
+	radeon_mipmap_tree **mts = calloc(numLevels, sizeof(radeon_mipmap_tree *));
+	unsigned mtCount = 0;
+	unsigned maxMtIndex = 0;
+	radeon_mipmap_tree *tmp;
+	unsigned int level;
+	int i;
+
+	for (level = firstLevel; level <= lastLevel; ++level) {
+		radeon_texture_image *img = get_radeon_texture_image(texObj->base.Image[0][level]);
+		unsigned found = 0;
+		// TODO: why this hack??
+		if (!img)
+			break;
+
+		if (!img->mt)
+			continue;
+
+		for (i = 0; i < mtCount; ++i) {
+			if (mts[i] == img->mt) {
+				found = 1;
+				mtSizes[i] += img->mt->levels[img->mtlevel].size;
+				break;
+			}
+		}
+
+		if (!found && radeon_miptree_matches_texture(img->mt, &texObj->base)) {
+			mtSizes[mtCount] = img->mt->levels[img->mtlevel].size;
+			mts[mtCount] = img->mt;
+			mtCount++;
+		}
+	}
+
+	if (mtCount == 0) {
+		free(mtSizes);
+		free(mts);
+		return NULL;
+	}
+
+	for (i = 1; i < mtCount; ++i) {
+		if (mtSizes[i] > mtSizes[maxMtIndex]) {
+			maxMtIndex = i;
+		}
+	}
+
+	tmp = mts[maxMtIndex];
+	free(mtSizes);
+	free(mts);
+
+	return tmp;
+}
+
+/**
+ * Validate texture mipmap tree.
+ * If individual images are stored in different mipmap trees
+ * use the mipmap tree that has the most of the correct data.
+ */
+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	radeonTexObj *t = radeon_tex_obj(texObj);
+
+	if (t->validated || t->image_override) {
+		return GL_TRUE;
+	}
+
+	if (texObj->Image[0][texObj->BaseLevel]->Border > 0)
+		return GL_FALSE;
+
+	_mesa_test_texobj_completeness(rmesa->glCtx, texObj);
+	if (!texObj->_Complete) {
+		return GL_FALSE;
+	}
+
+	calculate_min_max_lod(&t->base, &t->minLod, &t->maxLod);
+
+	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+			"%s: Validating texture %p now, minLod = %d, maxLod = %d\n",
+			__FUNCTION__, texObj ,t->minLod, t->maxLod);
+
+	radeon_mipmap_tree *dst_miptree;
+	dst_miptree = get_biggest_matching_miptree(t, t->minLod, t->maxLod);
+
+	if (!dst_miptree) {
+		radeon_miptree_unreference(&t->mt);
+		radeon_try_alloc_miptree(rmesa, t);
+		dst_miptree = t->mt;
+		radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+			"%s: No matching miptree found, allocated new one %p\n",
+			__FUNCTION__, t->mt);
+
+	} else {
+		radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+			"%s: Using miptree %p\n", __FUNCTION__, t->mt);
+	}
+
+	const unsigned faces = texObj->Target == GL_TEXTURE_CUBE_MAP ? 6 : 1;
+	unsigned face, level;
+	radeon_texture_image *img;
+	/* Validate only the levels that will actually be used during rendering */
+	for (face = 0; face < faces; ++face) {
+		for (level = t->minLod; level <= t->maxLod; ++level) {
+			img = get_radeon_texture_image(texObj->Image[face][level]);
+
+			radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+				"Checking image level %d, face %d, mt %p ... ",
+				level, face, img->mt);
+			
+			if (img->mt != dst_miptree) {
+				radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+					"MIGRATING\n");
+
+				struct radeon_bo *src_bo = (img->mt) ? img->mt->bo : img->bo;
+				if (src_bo && radeon_bo_is_referenced_by_cs(src_bo, rmesa->cmdbuf.cs)) {
+					radeon_firevertices(rmesa);
+				}
+				migrate_image_to_miptree(dst_miptree, img, face, level);
+			} else
+				radeon_print(RADEON_TEXTURE, RADEON_TRACE, "OK\n");
+		}
+	}
+
+	t->validated = GL_TRUE;
+
+	return GL_TRUE;
+}
+
+uint32_t get_base_teximage_offset(radeonTexObj *texObj)
+{
+	if (!texObj->mt) {
+		return 0;
+	} else {
+		return radeon_miptree_image_offset(texObj->mt, 0, texObj->minLod);
+	}
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
new file mode 100644
index 0000000000..088f970172
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_MIPMAP_TREE_H_
+#define __RADEON_MIPMAP_TREE_H_
+
+#include "radeon_common.h"
+
+typedef struct _radeon_mipmap_tree radeon_mipmap_tree;
+typedef struct _radeon_mipmap_level radeon_mipmap_level;
+typedef struct _radeon_mipmap_image radeon_mipmap_image;
+
+struct _radeon_mipmap_image {
+	GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */
+};
+
+struct _radeon_mipmap_level {
+	GLuint width;
+	GLuint height;
+	GLuint depth;
+	GLuint size; /** Size of each image, in bytes */
+	GLuint rowstride; /** in bytes */
+	GLuint valid;
+	radeon_mipmap_image faces[6];
+};
+
+/* store the max possible in the miptree */
+#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13
+
+/**
+ * A mipmap tree contains texture images in the layout that the hardware
+ * expects.
+ *
+ * The meta-data of mipmap trees is immutable, i.e. you cannot change the
+ * layout on-the-fly; however, the texture contents (i.e. texels) can be
+ * changed.
+ */
+struct _radeon_mipmap_tree {
+	struct radeon_bo *bo;
+	GLuint refcount;
+
+	GLuint totalsize; /** total size of the miptree, in bytes */
+
+	GLenum target; /** GL_TEXTURE_xxx */
+	GLenum mesaFormat; /** MESA_FORMAT_xxx */
+	GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */
+	GLuint baseLevel; /** gl_texture_object->baseLevel it was created for */
+	GLuint numLevels; /** Number of mip levels stored in this mipmap tree */
+
+	GLuint width0; /** Width of baseLevel image */
+	GLuint height0; /** Height of baseLevel image */
+	GLuint depth0; /** Depth of baseLevel image */
+
+	GLuint tilebits; /** RADEON_TXO_xxx_TILE */
+
+	radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS];
+};
+
+void radeon_miptree_reference(radeon_mipmap_tree *mt, radeon_mipmap_tree **ptr);
+void radeon_miptree_unreference(radeon_mipmap_tree **ptr);
+
+GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt,
+		struct gl_texture_image *texImage, GLuint face, GLuint level);
+void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t);
+GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt,
+				   GLuint face, GLuint level);
+uint32_t get_base_teximage_offset(radeonTexObj *texObj);
+
+unsigned get_texture_image_row_stride(radeonContextPtr rmesa, gl_format format, unsigned width, unsigned tiling);
+
+unsigned get_texture_image_size(
+		gl_format format,
+		unsigned rowStride,
+		unsigned height,
+		unsigned depth,
+		unsigned tiling);
+#endif /* __RADEON_MIPMAP_TREE_H_ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
new file mode 100644
index 0000000000..dadb8002c7
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "stdint.h"
+#include "main/bufferobj.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/state.h"
+#include "swrast/swrast.h"
+
+#include "radeon_common_context.h"
+#include "radeon_debug.h"
+#include "radeon_mipmap_tree.h"
+
+static gl_format gl_format_and_type_to_mesa_format(GLenum format, GLenum type)
+{
+    switch (format)
+    {
+        case GL_RGB:
+            switch (type) {
+                case GL_UNSIGNED_SHORT_5_6_5:
+                    return MESA_FORMAT_RGB565;
+                case GL_UNSIGNED_SHORT_5_6_5_REV:
+                    return MESA_FORMAT_RGB565_REV;
+            }
+            break;
+        case GL_RGBA:
+            switch (type) {
+                case GL_FLOAT:
+                    return MESA_FORMAT_RGBA_FLOAT32;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                    return MESA_FORMAT_RGBA5551;
+                case GL_UNSIGNED_INT_8_8_8_8:
+                    return MESA_FORMAT_RGBA8888;
+                case GL_UNSIGNED_BYTE:
+                case GL_UNSIGNED_INT_8_8_8_8_REV:
+                    return MESA_FORMAT_RGBA8888_REV;
+            }
+            break;
+        case GL_BGRA:
+            switch (type) {
+                case GL_UNSIGNED_SHORT_4_4_4_4:
+                    return MESA_FORMAT_ARGB4444_REV;
+                case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+                    return MESA_FORMAT_ARGB4444;
+                case GL_UNSIGNED_SHORT_5_5_5_1:
+                    return MESA_FORMAT_ARGB1555_REV;
+                case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+                    return MESA_FORMAT_ARGB1555;
+                case GL_UNSIGNED_INT_8_8_8_8:
+                    return MESA_FORMAT_ARGB8888_REV;
+                case GL_UNSIGNED_BYTE:
+                case GL_UNSIGNED_INT_8_8_8_8_REV:
+                    return MESA_FORMAT_ARGB8888;
+
+            }
+            break;
+    }
+
+    return MESA_FORMAT_NONE;
+}
+
+static GLboolean
+do_blit_readpixels(GLcontext * ctx,
+                   GLint x, GLint y, GLsizei width, GLsizei height,
+                   GLenum format, GLenum type,
+                   const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    const struct radeon_renderbuffer *rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+    const gl_format dst_format = gl_format_and_type_to_mesa_format(format, type);
+    unsigned dst_rowstride, dst_imagesize, aligned_rowstride, flip_y;
+    struct radeon_bo *dst_buffer;
+    GLint dst_x = 0, dst_y = 0;
+
+    /* It's not worth if number of pixels to copy is really small */
+    if (width * height < 100) {
+        return GL_FALSE;
+    }
+
+    if (dst_format == MESA_FORMAT_NONE ||
+        !radeon->vtbl.check_blit(dst_format) || !radeon->vtbl.blit) {
+        return GL_FALSE;
+    }
+
+    if (ctx->_ImageTransferState || ctx->Color._LogicOpEnabled) {
+        return GL_FALSE;
+    }
+
+    if (pack->SwapBytes || pack->LsbFirst) {
+        return GL_FALSE;
+    }
+
+    if (pack->RowLength > 0) {
+        dst_rowstride = pack->RowLength;
+    } else {
+        dst_rowstride = width;
+    }
+
+    if (!_mesa_clip_copytexsubimage(ctx, &dst_x, &dst_y, &x, &y, &width, &height)) {
+        return GL_TRUE;
+    }
+    assert(x >= 0 && y >= 0);
+
+    aligned_rowstride = get_texture_image_row_stride(radeon, dst_format, dst_rowstride, 0);
+    dst_imagesize = get_texture_image_size(dst_format,
+                                           aligned_rowstride,
+                                           height, 1, 0);
+    dst_buffer = radeon_bo_open(radeon->radeonScreen->bom, 0, dst_imagesize, 1024, RADEON_GEM_DOMAIN_GTT, 0);
+
+    /* Disable source Y flipping for FBOs */
+    flip_y = (ctx->ReadBuffer->Name == 0);
+    if (pack->Invert) {
+        y = rrb->base.Height - height - y;
+        flip_y = !flip_y;
+    }
+
+    if (radeon->vtbl.blit(ctx,
+                          rrb->bo,
+                          rrb->draw_offset,
+                          rrb->base.Format,
+                          rrb->pitch / rrb->cpp,
+                          rrb->base.Width,
+                          rrb->base.Height,
+                          x,
+                          y,
+                          dst_buffer,
+                          0, /* dst_offset */
+                          dst_format,
+                          aligned_rowstride / _mesa_get_format_bytes(dst_format),
+                          width,
+                          height,
+                          0, /* dst_x */
+                          0, /* dst_y */
+                          width,
+                          height,
+                          flip_y))
+    {
+        radeon_bo_map(dst_buffer, 0);
+        dst_rowstride *= _mesa_get_format_bytes(dst_format);
+        copy_rows(pixels, dst_rowstride, dst_buffer->ptr,
+                  aligned_rowstride, height, dst_rowstride);
+        radeon_bo_unmap(dst_buffer);
+        radeon_bo_unref(dst_buffer);
+        return GL_TRUE;
+    } else {
+        radeon_bo_unref(dst_buffer);
+        return GL_FALSE;
+    }
+}
+
+void
+radeonReadPixels(GLcontext * ctx,
+                 GLint x, GLint y, GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
+{
+    if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
+        return;
+
+    /* Update Mesa state before calling down into _swrast_ReadPixels, as
+     * the spans code requires the computed buffer states to be up to date,
+     * but _swrast_ReadPixels only updates Mesa state after setting up
+     * the spans code.
+     */
+
+    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+                 "Falling back to sw for ReadPixels (format %s, type %s)\n",
+                 _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type));
+
+    if (ctx->NewState)
+        _mesa_update_state(ctx);
+
+    _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
new file mode 100644
index 0000000000..ab6d02e56b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright © 2008-2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+#include "radeon_common.h"
+#include "radeon_queryobj.h"
+#include "radeon_debug.h"
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+
+#include <inttypes.h>
+
+static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+        uint32_t *result;
+	int i;
+
+	radeon_print(RADEON_STATE, RADEON_VERBOSE,
+			"%s: query id %d, result %d\n",
+			__FUNCTION__, query->Base.Id, (int) query->Base.Result);
+
+	radeon_bo_map(query->bo, GL_FALSE);
+        result = query->bo->ptr;
+
+	query->Base.Result = 0;
+	if (IS_R600_CLASS(radeon->radeonScreen)) {
+		/* ZPASS EVENT writes alternating qwords
+		 * At query start we set the start offset to 0 and
+		 * hw writes zpass start counts to qwords 0, 2, 4, 6.
+		 * At query end we set the start offset to 8 and
+		 * hw writes zpass end counts to qwords 1, 3, 5, 7.
+		 * then we substract. MSB is the valid bit.
+		 */
+		for (i = 0; i < 16; i += 4) {
+			uint64_t start = (uint64_t)LE32_TO_CPU(result[i]) |
+					 (uint64_t)LE32_TO_CPU(result[i + 1]) << 32;
+			uint64_t end = (uint64_t)LE32_TO_CPU(result[i + 2]) |
+				       (uint64_t)LE32_TO_CPU(result[i + 3]) << 32;
+			if ((start & 0x8000000000000000) && (end & 0x8000000000000000)) {
+				uint64_t query_count = end - start;
+				query->Base.Result += query_count;
+
+			}
+			radeon_print(RADEON_STATE, RADEON_TRACE,
+				     "%d start: %" PRIu64 ", end: %" PRIu64 " %" PRIu64 "\n", i, start, end, end - start);
+		}
+	} else {
+		for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+			query->Base.Result += LE32_TO_CPU(result[i]);
+			radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, LE32_TO_CPU(result[i]));
+		}
+	}
+
+	radeon_bo_unmap(query->bo);
+}
+
+static struct gl_query_object * radeonNewQueryObject(GLcontext *ctx, GLuint id)
+{
+	struct radeon_query_object *query;
+
+	query = calloc(1, sizeof(struct radeon_query_object));
+
+	query->Base.Id = id;
+	query->Base.Result = 0;
+	query->Base.Active = GL_FALSE;
+	query->Base.Ready = GL_TRUE;
+
+	radeon_print(RADEON_STATE, RADEON_VERBOSE,"%s: query id %d\n", __FUNCTION__, query->Base.Id);
+
+	return &query->Base;
+}
+
+static void radeonDeleteQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+	if (query->bo) {
+		radeon_bo_unref(query->bo);
+	}
+
+	free(query);
+}
+
+static void radeonWaitQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+	/* If the cmdbuf with packets for this query hasn't been flushed yet, do it now */
+	if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs))
+		ctx->Driver.Flush(ctx);
+
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, q->Id, query->bo, query->curr_offset);
+
+	radeonQueryGetResult(ctx, q);
+
+	query->Base.Ready = GL_TRUE;
+}
+
+
+static void radeonBeginQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = (struct radeon_query_object *)q;
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+	assert(radeon->query.current == NULL);
+
+	if (radeon->dma.flush)
+		radeon->dma.flush(radeon->glCtx);
+
+	if (!query->bo) {
+		query->bo = radeon_bo_open(radeon->radeonScreen->bom, 0, RADEON_QUERY_PAGE_SIZE, RADEON_QUERY_PAGE_SIZE, RADEON_GEM_DOMAIN_GTT, 0);
+	}
+	query->curr_offset = 0;
+
+	radeon->query.current = query;
+
+	radeon->query.queryobj.dirty = GL_TRUE;
+	radeon->hw.is_dirty = GL_TRUE;
+}
+
+void radeonEmitQueryEnd(GLcontext *ctx)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+
+	if (!query)
+		return;
+
+	if (query->emitted_begin == GL_FALSE)
+		return;
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d, bo %p, offset %d\n", __FUNCTION__, query->Base.Id, query->bo, query->curr_offset);
+
+	radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+				      query->bo,
+				      0, RADEON_GEM_DOMAIN_GTT);
+
+	radeon->vtbl.emit_query_finish(radeon);
+}
+
+static void radeonEndQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+	if (radeon->dma.flush)
+		radeon->dma.flush(radeon->glCtx);
+	radeonEmitQueryEnd(ctx);
+
+	radeon->query.current = NULL;
+}
+
+static void radeonCheckQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s: query id %d\n", __FUNCTION__, q->Id);
+
+#ifdef DRM_RADEON_GEM_BUSY
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+
+	if (radeon->radeonScreen->kernel_mm) {
+		struct radeon_query_object *query = (struct radeon_query_object *)q;
+		uint32_t domain;
+
+		/* Need to perform a flush, as per ARB_occlusion_query spec */
+		if (radeon_bo_is_referenced_by_cs(query->bo, radeon->cmdbuf.cs)) {
+			ctx->Driver.Flush(ctx);
+		}
+
+		if (radeon_bo_is_busy(query->bo, &domain) == 0) {
+			radeonQueryGetResult(ctx, q);
+			query->Base.Ready = GL_TRUE;
+		}
+	} else {
+		radeonWaitQuery(ctx, q);
+	}
+#else
+	radeonWaitQuery(ctx, q);
+#endif
+}
+
+void radeonInitQueryObjFunctions(struct dd_function_table *functions)
+{
+	functions->NewQueryObject = radeonNewQueryObject;
+	functions->DeleteQuery = radeonDeleteQuery;
+	functions->BeginQuery = radeonBeginQuery;
+	functions->EndQuery = radeonEndQuery;
+	functions->CheckQuery = radeonCheckQuery;
+	functions->WaitQuery = radeonWaitQuery;
+}
+
+int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+
+	if (!query || query->emitted_begin)
+		return 0;
+	return atom->cmd_size;
+}
+
+void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	BATCH_LOCALS(radeon);
+	int dwords;
+
+	dwords = (*atom->check) (ctx, atom);
+
+	BEGIN_BATCH_NO_AUTOSTATE(dwords);
+	OUT_BATCH_TABLE(atom->cmd, dwords);
+	END_BATCH();
+
+	radeon->query.current->emitted_begin = GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.h b/src/mesa/drivers/dri/radeon/radeon_queryobj.h
new file mode 100644
index 0000000000..19374dc76b
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright © 2008 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Maciej Cencora <m.cencora@gmail.com>
+ *
+ */
+
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include "radeon_common_context.h"
+
+extern void radeonEmitQueryBegin(GLcontext *ctx);
+extern void radeonEmitQueryEnd(GLcontext *ctx);
+
+extern void radeonInitQueryObjFunctions(struct dd_function_table *functions);
+
+#define RADEON_QUERY_PAGE_SIZE 4096
+
+int radeon_check_query_active(GLcontext *ctx, struct radeon_state_atom *atom);
+void radeon_emit_queryobj(GLcontext *ctx, struct radeon_state_atom *atom);
+
+static inline void radeon_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+	radeon->query.queryobj.cmd_size = (SZ);
+	radeon->query.queryobj.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t));
+	radeon->query.queryobj.name = "queryobj";
+	radeon->query.queryobj.idx = 0;
+	radeon->query.queryobj.check = radeon_check_query_active;
+	radeon->query.queryobj.dirty = GL_FALSE;
+	radeon->query.queryobj.emit = radeon_emit_queryobj;
+
+	radeon->hw.max_state_size += (SZ);
+	insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c
new file mode 100644
index 0000000000..3e64be83ed
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c
@@ -0,0 +1,1080 @@
+/**************************************************************************
+
+Copyright 2002 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc, Cedar Park, TX.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+#include <errno.h> 
+
+#include "main/glheader.h"
+
+#include "radeon_context.h"
+#include "radeon_sanity.h"
+
+/* Set this '1' to get more verbiage.
+ */
+#define MORE_VERBOSE 1
+
+#if MORE_VERBOSE
+#define VERBOSE (RADEON_DEBUG & RADEON_VERBOSE)
+#define NORMAL  (1)
+#else
+#define VERBOSE 0
+#define NORMAL  (RADEON_DEBUG & RADEON_VERBOSE)
+#endif
+
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.  
+ */
+static struct { 
+   int start; 
+   int len; 
+   const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+   { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
+   { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
+   { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
+   { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
+   { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
+   { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
+   { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
+   { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
+   { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+	{ 0, 4, "R200_PP_TXCBLEND_0" },
+	{ 0, 4, "R200_PP_TXCBLEND_1" },
+	{ 0, 4, "R200_PP_TXCBLEND_2" },
+	{ 0, 4, "R200_PP_TXCBLEND_3" },
+	{ 0, 4, "R200_PP_TXCBLEND_4" },
+	{ 0, 4, "R200_PP_TXCBLEND_5" },
+	{ 0, 4, "R200_PP_TXCBLEND_6" },
+	{ 0, 4, "R200_PP_TXCBLEND_7" },
+	{ 0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+	{ 0, 6, "R200_PP_TFACTOR_0" },
+	{ 0, 4, "R200_SE_VTX_FMT_0" },
+	{ 0, 1, "R200_SE_VAP_CNTL" },
+	{ 0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
+	{ 0, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
+	{ 0, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+	{ 0, 6, "R200_PP_TXFILTER_0" },
+	{ 0, 6, "R200_PP_TXFILTER_1" },
+	{ 0, 6, "R200_PP_TXFILTER_2" },
+	{ 0, 6, "R200_PP_TXFILTER_3" },
+	{ 0, 6, "R200_PP_TXFILTER_4" },
+	{ 0, 6, "R200_PP_TXFILTER_5" },
+	{ 0, 1, "R200_PP_TXOFFSET_0" },
+	{ 0, 1, "R200_PP_TXOFFSET_1" },
+	{ 0, 1, "R200_PP_TXOFFSET_2" },
+	{ 0, 1, "R200_PP_TXOFFSET_3" },
+	{ 0, 1, "R200_PP_TXOFFSET_4" },
+	{ 0, 1, "R200_PP_TXOFFSET_5" },
+	{ 0, 1, "R200_SE_VTE_CNTL" },
+	{ 0, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+	{ 0, 1, "R200_PP_TAM_DEBUG3" },
+	{ 0, 1, "R200_PP_CNTL_X" }, 
+	{ 0, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
+	{ 0, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
+	{ 0, 2, "R200_RE_SCISSOR_TL_0" }, 
+	{ 0, 2, "R200_RE_SCISSOR_TL_1" }, 
+	{ 0, 2, "R200_RE_SCISSOR_TL_2" }, 
+	{ 0, 1, "R200_SE_VAP_CNTL_STATUS" }, 
+	{ 0, 1, "R200_SE_VTX_STATE_CNTL" }, 
+	{ 0, 1, "R200_RE_POINTSIZE" }, 
+	{ 0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
+	{ 0, 1, "R200_PP_CUBIC_FACES_1" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_2" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_3" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_4" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
+	{ 0, 1, "R200_PP_CUBIC_FACES_5" },
+	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
+   { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
+	{ 0, 3, "R200_RB3D_BLENDCOLOR" },
+	{ 0, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
+   { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0" },
+   { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0" },
+   { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1" },
+   { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0" },
+   { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2" },
+   { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0" },
+   { 0, 2, "R200_PP_TRI_PERF" },
+   { 0, 32, "R200_PP_AFS_0"},   /* 85 */
+   { 0, 32, "R200_PP_AFS_1"},
+   { 0, 8, "R200_ATF_TFACTOR"},
+   { 0, 8, "R200_PP_TXCTLALL_0"},
+   { 0, 8, "R200_PP_TXCTLALL_1"},
+   { 0, 8, "R200_PP_TXCTLALL_2"},
+   { 0, 8, "R200_PP_TXCTLALL_3"},
+   { 0, 8, "R200_PP_TXCTLALL_4"},
+   { 0, 8, "R200_PP_TXCTLALL_5"},
+   { 0, 2, "R200_VAP_PVS_CNTL"},
+};
+
+struct reg_names {
+   int idx;
+   const char *name;
+};
+
+static struct reg_names reg_names[] = {
+   { RADEON_PP_MISC, "RADEON_PP_MISC" },
+   { RADEON_PP_FOG_COLOR, "RADEON_PP_FOG_COLOR" },
+   { RADEON_RE_SOLID_COLOR, "RADEON_RE_SOLID_COLOR" },
+   { RADEON_RB3D_BLENDCNTL, "RADEON_RB3D_BLENDCNTL" },
+   { RADEON_RB3D_DEPTHOFFSET, "RADEON_RB3D_DEPTHOFFSET" },
+   { RADEON_RB3D_DEPTHPITCH, "RADEON_RB3D_DEPTHPITCH" },
+   { RADEON_RB3D_ZSTENCILCNTL, "RADEON_RB3D_ZSTENCILCNTL" },
+   { RADEON_PP_CNTL, "RADEON_PP_CNTL" },
+   { RADEON_RB3D_CNTL, "RADEON_RB3D_CNTL" },
+   { RADEON_RB3D_COLOROFFSET, "RADEON_RB3D_COLOROFFSET" },
+   { RADEON_RB3D_COLORPITCH, "RADEON_RB3D_COLORPITCH" },
+   { RADEON_SE_CNTL, "RADEON_SE_CNTL" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORDFMT" },
+   { RADEON_SE_CNTL_STATUS, "RADEON_SE_CNTL_STATUS" },
+   { RADEON_RE_LINE_PATTERN, "RADEON_RE_LINE_PATTERN" },
+   { RADEON_RE_LINE_STATE, "RADEON_RE_LINE_STATE" },
+   { RADEON_SE_LINE_WIDTH, "RADEON_SE_LINE_WIDTH" },
+   { RADEON_RB3D_STENCILREFMASK, "RADEON_RB3D_STENCILREFMASK" },
+   { RADEON_RB3D_ROPCNTL, "RADEON_RB3D_ROPCNTL" },
+   { RADEON_RB3D_PLANEMASK, "RADEON_RB3D_PLANEMASK" },
+   { RADEON_SE_VPORT_XSCALE, "RADEON_SE_VPORT_XSCALE" },
+   { RADEON_SE_VPORT_XOFFSET, "RADEON_SE_VPORT_XOFFSET" },
+   { RADEON_SE_VPORT_YSCALE, "RADEON_SE_VPORT_YSCALE" },
+   { RADEON_SE_VPORT_YOFFSET, "RADEON_SE_VPORT_YOFFSET" },
+   { RADEON_SE_VPORT_ZSCALE, "RADEON_SE_VPORT_ZSCALE" },
+   { RADEON_SE_VPORT_ZOFFSET, "RADEON_SE_VPORT_ZOFFSET" },
+   { RADEON_RE_MISC, "RADEON_RE_MISC" },
+   { RADEON_PP_TXFILTER_0, "RADEON_PP_TXFILTER_0" },
+   { RADEON_PP_TXFILTER_1, "RADEON_PP_TXFILTER_1" },
+   { RADEON_PP_TXFILTER_2, "RADEON_PP_TXFILTER_2" },
+   { RADEON_PP_TXFORMAT_0, "RADEON_PP_TXFORMAT_0" },
+   { RADEON_PP_TXFORMAT_1, "RADEON_PP_TXFORMAT_1" },
+   { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_2" },
+   { RADEON_PP_TXOFFSET_0, "RADEON_PP_TXOFFSET_0" },
+   { RADEON_PP_TXOFFSET_1, "RADEON_PP_TXOFFSET_1" },
+   { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_2" },
+   { RADEON_PP_TXCBLEND_0, "RADEON_PP_TXCBLEND_0" },
+   { RADEON_PP_TXCBLEND_1, "RADEON_PP_TXCBLEND_1" },
+   { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_2" },
+   { RADEON_PP_TXABLEND_0, "RADEON_PP_TXABLEND_0" },
+   { RADEON_PP_TXABLEND_1, "RADEON_PP_TXABLEND_1" },
+   { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_2" },
+   { RADEON_PP_TFACTOR_0, "RADEON_PP_TFACTOR_0" },
+   { RADEON_PP_TFACTOR_1, "RADEON_PP_TFACTOR_1" },
+   { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_2" },
+   { RADEON_PP_BORDER_COLOR_0, "RADEON_PP_BORDER_COLOR_0" },
+   { RADEON_PP_BORDER_COLOR_1, "RADEON_PP_BORDER_COLOR_1" },
+   { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_2" },
+   { RADEON_SE_ZBIAS_FACTOR, "RADEON_SE_ZBIAS_FACTOR" },
+   { RADEON_SE_ZBIAS_CONSTANT, "RADEON_SE_ZBIAS_CONSTANT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_FMT, "RADEON_SE_TCL_OUTPUT_VTXFMT" },
+   { RADEON_SE_TCL_OUTPUT_VTX_SEL, "RADEON_SE_TCL_OUTPUT_VTXSEL" },
+   { RADEON_SE_TCL_MATRIX_SELECT_0, "RADEON_SE_TCL_MATRIX_SELECT_0" },
+   { RADEON_SE_TCL_MATRIX_SELECT_1, "RADEON_SE_TCL_MATRIX_SELECT_1" },
+   { RADEON_SE_TCL_UCP_VERT_BLEND_CTL, "RADEON_SE_TCL_UCP_VERT_BLEND_CTL" },
+   { RADEON_SE_TCL_TEXTURE_PROC_CTL, "RADEON_SE_TCL_TEXTURE_PROC_CTL" },
+   { RADEON_SE_TCL_LIGHT_MODEL_CTL, "RADEON_SE_TCL_LIGHT_MODEL_CTL" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_0, "RADEON_SE_TCL_PER_LIGHT_CTL_0" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_1, "RADEON_SE_TCL_PER_LIGHT_CTL_1" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_2, "RADEON_SE_TCL_PER_LIGHT_CTL_2" },
+   { RADEON_SE_TCL_PER_LIGHT_CTL_3, "RADEON_SE_TCL_PER_LIGHT_CTL_3" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, "RADEON_SE_TCL_EMMISSIVE_RED" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN, "RADEON_SE_TCL_EMMISSIVE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE, "RADEON_SE_TCL_EMMISSIVE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA, "RADEON_SE_TCL_EMMISSIVE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_RED, "RADEON_SE_TCL_AMBIENT_RED" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN, "RADEON_SE_TCL_AMBIENT_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE, "RADEON_SE_TCL_AMBIENT_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA, "RADEON_SE_TCL_AMBIENT_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_RED, "RADEON_SE_TCL_DIFFUSE_RED" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN, "RADEON_SE_TCL_DIFFUSE_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE, "RADEON_SE_TCL_DIFFUSE_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA, "RADEON_SE_TCL_DIFFUSE_ALPHA" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_RED, "RADEON_SE_TCL_SPECULAR_RED" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN, "RADEON_SE_TCL_SPECULAR_GREEN" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE, "RADEON_SE_TCL_SPECULAR_BLUE" },
+   { RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA, "RADEON_SE_TCL_SPECULAR_ALPHA" },
+   { RADEON_SE_TCL_SHININESS, "RADEON_SE_TCL_SHININESS" },
+   { RADEON_SE_COORD_FMT, "RADEON_SE_COORD_FMT" },
+   { RADEON_PP_TEX_SIZE_0, "RADEON_PP_TEX_SIZE_0" },
+   { RADEON_PP_TEX_SIZE_1, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, "RADEON_PP_TEX_SIZE_2" },
+   { RADEON_PP_TEX_SIZE_0+4, "RADEON_PP_TEX_PITCH_0" },
+   { RADEON_PP_TEX_SIZE_1+4, "RADEON_PP_TEX_PITCH_1" },
+   { RADEON_PP_TEX_SIZE_2+4, "RADEON_PP_TEX_PITCH_2" },
+   { RADEON_PP_CUBIC_FACES_0, "RADEON_PP_CUBIC_FACES_0" },
+   { RADEON_PP_CUBIC_FACES_1, "RADEON_PP_CUBIC_FACES_1" },
+   { RADEON_PP_CUBIC_FACES_2, "RADEON_PP_CUBIC_FACES_2" },
+   { RADEON_PP_CUBIC_OFFSET_T0_0, "RADEON_PP_CUBIC_OFFSET_T0_0" },
+   { RADEON_PP_CUBIC_OFFSET_T0_1, "RADEON_PP_CUBIC_OFFSET_T0_1" },
+   { RADEON_PP_CUBIC_OFFSET_T0_2, "RADEON_PP_CUBIC_OFFSET_T0_2" },
+   { RADEON_PP_CUBIC_OFFSET_T0_3, "RADEON_PP_CUBIC_OFFSET_T0_3" },
+   { RADEON_PP_CUBIC_OFFSET_T0_4, "RADEON_PP_CUBIC_OFFSET_T0_4" },
+   { RADEON_PP_CUBIC_OFFSET_T1_0, "RADEON_PP_CUBIC_OFFSET_T1_0" },
+   { RADEON_PP_CUBIC_OFFSET_T1_1, "RADEON_PP_CUBIC_OFFSET_T1_1" },
+   { RADEON_PP_CUBIC_OFFSET_T1_2, "RADEON_PP_CUBIC_OFFSET_T1_2" },
+   { RADEON_PP_CUBIC_OFFSET_T1_3, "RADEON_PP_CUBIC_OFFSET_T1_3" },
+   { RADEON_PP_CUBIC_OFFSET_T1_4, "RADEON_PP_CUBIC_OFFSET_T1_4" },
+   { RADEON_PP_CUBIC_OFFSET_T2_0, "RADEON_PP_CUBIC_OFFSET_T2_0" },
+   { RADEON_PP_CUBIC_OFFSET_T2_1, "RADEON_PP_CUBIC_OFFSET_T2_1" },
+   { RADEON_PP_CUBIC_OFFSET_T2_2, "RADEON_PP_CUBIC_OFFSET_T2_2" },
+   { RADEON_PP_CUBIC_OFFSET_T2_3, "RADEON_PP_CUBIC_OFFSET_T2_3" },
+   { RADEON_PP_CUBIC_OFFSET_T2_4, "RADEON_PP_CUBIC_OFFSET_T2_4" },
+};
+
+static struct reg_names scalar_names[] = {
+   { RADEON_SS_LIGHT_DCD_ADDR, "LIGHT_DCD" },
+   { RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR, "LIGHT_SPOT_EXPONENT" },
+   { RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR, "LIGHT_SPOT_CUTOFF" },
+   { RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR, "LIGHT_SPECULAR_THRESH" },
+   { RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR, "LIGHT_RANGE_CUTOFF" },
+   { RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, "VERT_GUARD_CLIP" },
+   { RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR, "VERT_GUARD_DISCARD" },
+   { RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR, "HORZ_GUARD_CLIP" },
+   { RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR, "HORZ_GUARD_DISCARD" },
+   { RADEON_SS_SHININESS, "SHININESS" },
+   { 1000, "" },
+};
+
+/* Puff these out to make them look like normal (dword) registers.
+ */
+static struct reg_names vector_names[] = {
+   { RADEON_VS_MATRIX_0_ADDR * 4, "MATRIX_0" },
+   { RADEON_VS_MATRIX_1_ADDR * 4, "MATRIX_1" },
+   { RADEON_VS_MATRIX_2_ADDR * 4, "MATRIX_2" },
+   { RADEON_VS_MATRIX_3_ADDR * 4, "MATRIX_3" },
+   { RADEON_VS_MATRIX_4_ADDR * 4, "MATRIX_4" },
+   { RADEON_VS_MATRIX_5_ADDR * 4, "MATRIX_5" },
+   { RADEON_VS_MATRIX_6_ADDR * 4, "MATRIX_6" },
+   { RADEON_VS_MATRIX_7_ADDR * 4, "MATRIX_7" },
+   { RADEON_VS_MATRIX_8_ADDR * 4, "MATRIX_8" },
+   { RADEON_VS_MATRIX_9_ADDR * 4, "MATRIX_9" },
+   { RADEON_VS_MATRIX_10_ADDR * 4, "MATRIX_10" },
+   { RADEON_VS_MATRIX_11_ADDR * 4, "MATRIX_11" },
+   { RADEON_VS_MATRIX_12_ADDR * 4, "MATRIX_12" },
+   { RADEON_VS_MATRIX_13_ADDR * 4, "MATRIX_13" },
+   { RADEON_VS_MATRIX_14_ADDR * 4, "MATRIX_14" },
+   { RADEON_VS_MATRIX_15_ADDR * 4, "MATRIX_15" },
+   { RADEON_VS_LIGHT_AMBIENT_ADDR * 4, "LIGHT_AMBIENT" },
+   { RADEON_VS_LIGHT_DIFFUSE_ADDR * 4, "LIGHT_DIFFUSE" },
+   { RADEON_VS_LIGHT_SPECULAR_ADDR * 4, "LIGHT_SPECULAR" },
+   { RADEON_VS_LIGHT_DIRPOS_ADDR * 4, "LIGHT_DIRPOS" },
+   { RADEON_VS_LIGHT_HWVSPOT_ADDR * 4, "LIGHT_HWVSPOT" },
+   { RADEON_VS_LIGHT_ATTENUATION_ADDR * 4, "LIGHT_ATTENUATION" },
+   { RADEON_VS_MATRIX_EYE2CLIP_ADDR * 4, "MATRIX_EYE2CLIP" },
+   { RADEON_VS_UCP_ADDR * 4, "UCP" },
+   { RADEON_VS_GLOBAL_AMBIENT_ADDR * 4, "GLOBAL_AMBIENT" },
+   { RADEON_VS_FOG_PARAM_ADDR * 4, "FOG_PARAM" },
+   { RADEON_VS_EYE_VECTOR_ADDR * 4, "EYE_VECTOR" },
+   { 1000, "" },
+};
+
+union fi { float f; int i; };
+
+#define ISVEC   1
+#define ISFLOAT 2
+#define TOUCHED 4
+
+struct reg {
+   int idx; 
+   struct reg_names *closest;
+   int flags;
+   union fi current;
+   union fi *values;
+   int nvalues;
+   int nalloc;
+   float vmin, vmax;
+};
+
+
+static struct reg regs[Elements(reg_names)+1];
+static struct reg scalars[512+1];
+static struct reg vectors[512*4+1];
+
+static int total, total_changed, bufs;
+
+static void init_regs( void )
+{
+   struct reg_names *tmp;
+   int i;
+
+   for (i = 0 ; i < Elements(regs)-1 ; i++) {
+      regs[i].idx = reg_names[i].idx;
+      regs[i].closest = &reg_names[i];
+      regs[i].flags = 0;
+   }
+
+   for (i = 0, tmp = scalar_names ; i < Elements(scalars) ; i++) {
+      if (tmp[1].idx == i) tmp++;
+      scalars[i].idx = i;
+      scalars[i].closest = tmp;
+      scalars[i].flags = ISFLOAT;
+   }
+
+   for (i = 0, tmp = vector_names ; i < Elements(vectors) ; i++) {
+      if (tmp[1].idx*4 == i) tmp++;
+      vectors[i].idx = i;
+      vectors[i].closest = tmp;
+      vectors[i].flags = ISFLOAT|ISVEC;
+   }
+
+   regs[Elements(regs)-1].idx = -1;
+   scalars[Elements(scalars)-1].idx = -1;
+   vectors[Elements(vectors)-1].idx = -1;
+}
+
+static int find_or_add_value( struct reg *reg, int val )
+{
+   int j;
+
+   for ( j = 0 ; j < reg->nvalues ; j++)
+      if ( val == reg->values[j].i )
+	 return 1;
+
+   if (j == reg->nalloc) {
+      reg->nalloc += 5;
+      reg->nalloc *= 2;
+      reg->values = (union fi *) realloc( reg->values, 
+					  reg->nalloc * sizeof(union fi) );
+   }
+
+   reg->values[reg->nvalues++].i = val;
+   return 0;
+}
+
+static struct reg *lookup_reg( struct reg *tab, int reg )
+{
+   int i;
+
+   for (i = 0 ; tab[i].idx != -1 ; i++) {
+      if (tab[i].idx == reg)
+	 return &tab[i];
+   }
+
+   fprintf(stderr, "*** unknown reg 0x%x\n", reg);
+   return NULL;
+}
+
+
+static const char *get_reg_name( struct reg *reg )
+{
+   static char tmp[80];
+
+   if (reg->idx == reg->closest->idx) 
+      return reg->closest->name;
+
+   
+   if (reg->flags & ISVEC) {
+      if (reg->idx/4 != reg->closest->idx)
+	 sprintf(tmp, "%s+%d[%d]", 
+		 reg->closest->name, 
+		 (reg->idx/4) - reg->closest->idx,
+		 reg->idx%4);
+      else
+	 sprintf(tmp, "%s[%d]", reg->closest->name, reg->idx%4);
+   }
+   else {
+      if (reg->idx != reg->closest->idx)
+	 sprintf(tmp, "%s+%d", reg->closest->name, reg->idx - reg->closest->idx);
+      else
+	 sprintf(tmp, "%s", reg->closest->name);
+   }
+
+   return tmp;
+}
+
+static int print_int_reg_assignment( struct reg *reg, int data )
+{
+   int changed = (reg->current.i != data);
+   int ever_seen = find_or_add_value( reg, data );
+   
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+       fprintf(stderr, "   %s <-- 0x%x", get_reg_name(reg), data);
+       
+   if (NORMAL) {
+      if (!ever_seen) 
+	 fprintf(stderr, " *** BRAND NEW VALUE");
+      else if (changed) 
+	 fprintf(stderr, " *** CHANGED"); 
+   }
+   
+   reg->current.i = data;
+
+   if (VERBOSE || (NORMAL && (changed || !ever_seen)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+
+static int print_float_reg_assignment( struct reg *reg, float data )
+{
+   int changed = (reg->current.f != data);
+   int newmin = (data < reg->vmin);
+   int newmax = (data > reg->vmax);
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "   %s <-- %.3f", get_reg_name(reg), data);
+
+   if (NORMAL) {
+      if (newmin) {
+	 fprintf(stderr, " *** NEW MIN (prev %.3f)", reg->vmin);
+	 reg->vmin = data;
+      }
+      else if (newmax) {
+	 fprintf(stderr, " *** NEW MAX (prev %.3f)", reg->vmax);
+	 reg->vmax = data;
+      }
+      else if (changed) {
+	 fprintf(stderr, " *** CHANGED");
+      }
+   }
+
+   reg->current.f = data;
+
+   if (VERBOSE || (NORMAL && (newmin || newmax || changed)))
+      fprintf(stderr, "\n");
+
+   return changed;
+}
+
+static int print_reg_assignment( struct reg *reg, int data )
+{
+   float_ui32_type datau;
+   datau.ui32 = data;
+   reg->flags |= TOUCHED;
+   if (reg->flags & ISFLOAT)
+      return print_float_reg_assignment( reg, datau.f );
+   else
+      return print_int_reg_assignment( reg, data );
+}
+
+static void print_reg( struct reg *reg )
+{
+   if (reg->flags & TOUCHED) {
+      if (reg->flags & ISFLOAT) {
+	 fprintf(stderr, "   %s == %f\n", get_reg_name(reg), reg->current.f);
+      } else {
+	 fprintf(stderr, "   %s == 0x%x\n", get_reg_name(reg), reg->current.i);
+      }
+   }
+}
+
+
+static void dump_state( void )
+{
+   int i;
+
+   for (i = 0 ; i < Elements(regs) ; i++) 
+      print_reg( &regs[i] );
+
+   for (i = 0 ; i < Elements(scalars) ; i++) 
+      print_reg( &scalars[i] );
+
+   for (i = 0 ; i < Elements(vectors) ; i++) 
+      print_reg( &vectors[i] );
+}
+
+
+
+static int radeon_emit_packets( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int id = (int)header.packet.packet_id;
+   int sz = packet[id].len;
+   int *data = (int *)cmdbuf->buf;
+   int i;
+   
+   if (sz * sizeof(int) > cmdbuf->bufsz) {
+      fprintf(stderr, "Packet overflows cmdbuf\n");      
+      return -EINVAL;
+   }
+
+   if (!packet[id].name) {
+      fprintf(stderr, "*** Unknown packet 0 nr %d\n", id );
+      return -EINVAL;
+   }
+
+   
+   if (VERBOSE) 
+      fprintf(stderr, "Packet 0 reg %s nr %d\n", packet[id].name, sz );
+
+   for ( i = 0 ; i < sz ; i++) {
+      struct reg *reg = lookup_reg( regs, packet[id].start + i*4 );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int radeon_emit_scalars2( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.scalars.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.scalars.offset + 0x100;
+   int stride = header.scalars.stride;
+   int i;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit scalars2, start %d stride %d nr %d (end %d)\n",
+	      start, stride, sz, start + stride * sz);
+
+   if (start + stride * sz > 257) {
+      fprintf(stderr, "emit scalars OVERFLOW %d/%d/%d\n", start, stride, sz);
+      return -1;
+   }
+
+   for (i = 0 ; i < sz ; i++, start += stride) {
+      struct reg *reg = lookup_reg( scalars, start );
+      if (print_reg_assignment( reg, data[i] ))
+	 total_changed++;
+      total++;
+   }
+	 
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+/* Check: inf/nan/extreme-size?
+ * Check: table start, end, nr, etc.
+ */
+static int radeon_emit_vectors( 
+   drm_radeon_cmd_header_t header,
+   drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int sz = header.vectors.count;
+   int *data = (int *)cmdbuf->buf;
+   int start = header.vectors.offset;
+   int stride = header.vectors.stride;
+   int i,j;
+
+   if (VERBOSE)
+      fprintf(stderr, "emit vectors, start %d stride %d nr %d (end %d) (0x%x)\n",
+	      start, stride, sz, start + stride * sz, header.i);
+
+/*    if (start + stride * (sz/4) > 128) { */
+/*       fprintf(stderr, "emit vectors OVERFLOW %d/%d/%d\n", start, stride, sz); */
+/*       return -1; */
+/*    } */
+
+   for (i = 0 ; i < sz ;  start += stride) {
+      int changed = 0;
+      for (j = 0 ; j < 4 ; i++,j++) {
+	 struct reg *reg = lookup_reg( vectors, start*4+j );
+	 if (print_reg_assignment( reg, data[i] ))
+	    changed = 1;
+      }
+      if (changed)
+	 total_changed += 4;
+      total += 4;
+   }
+	 
+
+   cmdbuf->buf += sz * sizeof(int);
+   cmdbuf->bufsz -= sz * sizeof(int);
+   return 0;
+}
+
+
+static int print_vertex_format( int vfmt )
+{
+   if (NORMAL) {
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+	      "vertex format",
+	      vfmt,
+	      "xy,",
+	      (vfmt & RADEON_CP_VC_FRMT_Z) ? "z," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W0) ? "w0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPCOLOR) ? "fpcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPALPHA) ? "fpalpha," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKCOLOR) ? "pkcolor," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPSPEC) ? "fpspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_FPFOG) ? "fpfog," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_PKSPEC) ? "pkspec," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST0) ? "st0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST1) ? "st1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q1) ? "q1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST2) ? "st2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q2) ? "q2," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_ST3) ? "st3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q3) ? "q3," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Q0) ? "q0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N0) ? "n0," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_XY1) ? "xy1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_Z1) ? "z1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_W1) ? "w1," : "",
+	      (vfmt & RADEON_CP_VC_FRMT_N1) ? "n1," : "");
+
+   
+/*       if (!find_or_add_value( &others[V_VTXFMT], vfmt )) */
+/* 	 fprintf(stderr, " *** NEW VALUE"); */
+
+      fprintf(stderr, "\n");
+   }
+
+   return 0;
+}
+
+static char *primname[0xf] = {
+   "NONE",
+   "POINTS",
+   "LINES",
+   "LINE_STRIP",
+   "TRIANGLES",
+   "TRIANGLE_FAN",
+   "TRIANGLE_STRIP",
+   "TRI_TYPE_2",
+   "RECT_LIST",
+   "3VRT_POINTS",
+   "3VRT_LINES",
+};
+
+static int print_prim_and_flags( int prim )
+{
+   int numverts;
+   
+   if (NORMAL)
+      fprintf(stderr, "   %s(%x): %s%s%s%s%s%s%s\n",
+	      "prim flags",
+	      prim,
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_IND) ? "IND," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_LIST) ? "LIST," : "",
+	      ((prim & 0x30) == RADEON_CP_VC_CNTL_PRIM_WALK_RING) ? "RING," : "",
+	      (prim & RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA) ? "RGBA," : "BGRA, ",
+	      (prim & RADEON_CP_VC_CNTL_MAOS_ENABLE) ? "MAOS," : "",
+	      (prim & RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE) ? "RADEON," : "",
+	      (prim & RADEON_CP_VC_CNTL_TCL_ENABLE) ? "TCL," : "");
+
+   if ((prim & 0xf) > RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST) {
+      fprintf(stderr, "   *** Bad primitive: %x\n", prim & 0xf);
+      return -1;
+   }
+
+   numverts = prim>>16;
+   
+   if (NORMAL)
+      fprintf(stderr, "   prim: %s numverts %d\n", primname[prim&0xf], numverts);
+
+   switch (prim & 0xf) {
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_NONE:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_POINT:
+      if (numverts < 1) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE:
+      if ((numverts & 1) || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for line %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP:
+      if (numverts < 2) {
+	 fprintf(stderr, "Bad nr verts for line_strip %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST:
+      if (numverts % 3 || numverts == 0) {
+	 fprintf(stderr, "Bad nr verts for tri %d\n", numverts);
+	 return -1;
+      }
+      break;
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN:
+   case RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP:
+      if (numverts < 3) {
+	 fprintf(stderr, "Bad nr verts for strip/fan %d\n", numverts);
+	 return -1;
+      }
+      break;
+   default:
+      fprintf(stderr, "Bad primitive\n");
+      return -1;
+   }	
+   return 0;
+}
+
+/* build in knowledge about each packet type
+ */
+static int radeon_emit_packet3( drm_radeon_cmd_buffer_t *cmdbuf )
+{
+   int cmdsz;
+   int *cmd = (int *)cmdbuf->buf;
+   int *tmp;
+   int i, stride, size, start;
+
+   cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
+
+   if ((cmd[0] & RADEON_CP_PACKET_MASK) != RADEON_CP_PACKET3 ||
+       cmdsz * 4 > cmdbuf->bufsz ||
+       cmdsz > RADEON_CP_PACKET_MAX_DWORDS) {
+      fprintf(stderr, "Bad packet\n");
+      return -EINVAL;
+   }
+
+   switch( cmd[0] & ~RADEON_CP_PACKET_COUNT_MASK ) {
+   case RADEON_CP_PACKET3_NOP:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NOP, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_NEXT_CHAR:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_NEXT_CHAR, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_PLY_NEXTSCAN:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_PLY_NEXTSCAN, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_SET_SCISSORS:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_SET_SCISSORS, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_RNDR_GEN_INDX_PRIM, %d dwords\n",
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_LOAD_MICROCODE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_MICROCODE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_WAIT_FOR_IDLE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_WAIT_FOR_IDLE, %d dwords\n", cmdsz);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_VBUF:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_VBUF, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      break;
+
+   case RADEON_CP_PACKET3_3D_DRAW_IMMD:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_IMMD, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_DRAW_INDX: {
+      int neltdwords;
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_3D_DRAW_INDX, %d dwords\n", cmdsz);
+      print_vertex_format(cmd[1]);
+      print_prim_and_flags(cmd[2]);
+      neltdwords = cmd[2]>>16;
+      neltdwords += neltdwords & 1;
+      neltdwords /= 2;
+      if (neltdwords + 3 != cmdsz)
+	 fprintf(stderr, "Mismatch in DRAW_INDX, %d vs cmdsz %d\n",
+		 neltdwords, cmdsz);
+      break;
+   }
+   case RADEON_CP_PACKET3_LOAD_PALETTE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_LOAD_PALETTE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_3D_LOAD_VBPNTR:
+      if (NORMAL) {
+	 fprintf(stderr, "PACKET3_3D_LOAD_VBPNTR, %d dwords\n", cmdsz);
+	 fprintf(stderr, "   nr arrays: %d\n", cmd[1]);
+      }
+
+      if (cmd[1]/2 + cmd[1]%2 != cmdsz - 3) {
+	 fprintf(stderr, "  ****** MISMATCH %d/%d *******\n",
+		 cmd[1]/2 + cmd[1]%2 + 3, cmdsz);
+	 return -EINVAL;
+      }
+
+      if (NORMAL) {
+	 tmp = cmd+2;
+	 for (i = 0 ; i < cmd[1] ; i++) {
+	    if (i & 1) {
+	       stride = (tmp[0]>>24) & 0xff;
+	       size = (tmp[0]>>16) & 0xff;
+	       start = tmp[2];
+	       tmp += 3;
+	    }
+	    else {
+	       stride = (tmp[0]>>8) & 0xff;
+	       size = (tmp[0]) & 0xff;
+	       start = tmp[1];
+	    }
+	    fprintf(stderr, "   array %d: start 0x%x vsize %d vstride %d\n",
+		    i, start, size, stride );
+	 }
+      }
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_SMALLTEXT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_SMALLTEXT, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_HOSTDATA_BLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYLINE:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYLINE, %d dwords\n", cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_POLYSCANLINES:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_POLYSCANLINES, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_PAINT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_PAINT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_BITBLT_MULTI:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_BITBLT_MULTI, %d dwords\n", 
+	      cmdsz);
+      break;
+   case RADEON_CP_PACKET3_CNTL_TRANS_BITBLT:
+      if (NORMAL)
+	 fprintf(stderr, "PACKET3_CNTL_TRANS_BITBLT, %d dwords\n", 
+	      cmdsz);
+      break;
+   default:
+      fprintf(stderr, "UNKNOWN PACKET, %d dwords\n", cmdsz);
+      break;
+   }
+      
+   cmdbuf->buf += cmdsz * 4;
+   cmdbuf->bufsz -= cmdsz * 4;
+   return 0;
+}
+
+
+/* Check cliprects for bounds, then pass on to above:
+ */
+static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf )
+{   
+   drm_clip_rect_t *boxes = cmdbuf->boxes;
+   int i = 0;
+
+   if (VERBOSE && total_changed) {
+      dump_state();
+      total_changed = 0;
+   }
+   else fprintf(stderr, "total_changed zero\n");
+
+   if (NORMAL) {
+      do {
+	 if ( i < cmdbuf->nbox ) {
+	    fprintf(stderr, "Emit box %d/%d %d,%d %d,%d\n",
+		    i, cmdbuf->nbox,
+		    boxes[i].x1, boxes[i].y1, boxes[i].x2, boxes[i].y2);
+	 }
+      } while ( ++i < cmdbuf->nbox );
+   }
+
+   if (cmdbuf->nbox == 1)
+      cmdbuf->nbox = 0;
+
+   return radeon_emit_packet3( cmdbuf );
+}
+
+
+int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+			   int nbox,
+			   drm_clip_rect_t *boxes )
+{
+   int idx;
+   drm_radeon_cmd_buffer_t cmdbuf;
+   drm_radeon_cmd_header_t header;
+   static int inited = 0;
+
+   if (!inited) {
+      init_regs();
+      inited = 1;
+   }
+
+   cmdbuf.buf = rmesa->store.cmd_buf;
+   cmdbuf.bufsz = rmesa->store.cmd_used;
+   cmdbuf.boxes = boxes;
+   cmdbuf.nbox = nbox;
+
+   while ( cmdbuf.bufsz >= sizeof(header) ) {
+		
+      header.i = *(int *)cmdbuf.buf;
+      cmdbuf.buf += sizeof(header);
+      cmdbuf.bufsz -= sizeof(header);
+
+      switch (header.header.cmd_type) {
+      case RADEON_CMD_PACKET: 
+	 if (radeon_emit_packets( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packets failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS:
+	 if (radeon_emit_scalars( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_SCALARS2:
+	 if (radeon_emit_scalars2( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_scalars failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_VECTORS:
+	 if (radeon_emit_vectors( header, &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_vectors failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_DMA_DISCARD:
+	 idx = header.dma.buf_idx;
+	 if (NORMAL)
+	    fprintf(stderr, "RADEON_CMD_DMA_DISCARD buf %d\n", idx);
+	 bufs++;
+	 break;
+
+      case RADEON_CMD_PACKET3:
+	 if (radeon_emit_packet3( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3 failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_PACKET3_CLIP:
+	 if (radeon_emit_packet3_cliprect( &cmdbuf )) {
+	    fprintf(stderr,"radeon_emit_packet3_clip failed\n");
+	    return -EINVAL;
+	 }
+	 break;
+
+      case RADEON_CMD_WAIT:
+	 break;
+
+      default:
+	 fprintf(stderr,"bad cmd_type %d at %p\n", 
+		   header.header.cmd_type,
+		   cmdbuf.buf - sizeof(header));
+	 return -EINVAL;
+      }
+   }
+
+   if (0)
+   {
+      static int n = 0;
+      n++;
+      if (n == 10) {
+	 fprintf(stderr, "Bufs %d Total emitted %d real changes %d (%.2f%%)\n",
+		 bufs,
+		 total, total_changed, 
+		 ((float)total_changed/(float)total*100.0));
+	 fprintf(stderr, "Total emitted per buf: %.2f\n",
+		 (float)total/(float)bufs);
+	 fprintf(stderr, "Real changes per buf: %.2f\n",
+		 (float)total_changed/(float)bufs);
+
+	 bufs = n = total = total_changed = 0;
+      }
+   }
+
+   return 0;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h
new file mode 100644
index 0000000000..f30eb1c4f1
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h
@@ -0,0 +1,8 @@
+#ifndef RADEON_SANITY_H
+#define RADEON_SANITY_H
+
+extern int radeonSanityCmdBuffer( r100ContextPtr rmesa,
+				  int nbox,
+				  drm_clip_rect_t *boxes );
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
new file mode 100644
index 0000000000..4f59511a52
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -0,0 +1,1768 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file radeon_screen.c
+ * Screen initialization functions for the Radeon driver.
+ *
+ * \author Kevin E. Martin <martin@valinux.com>
+ * \author  Gareth Hughes <gareth@valinux.com>
+ */
+
+#include <errno.h>
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+#define STANDALONE_MMIO
+#include "radeon_chipset.h"
+#include "radeon_macros.h"
+#include "radeon_screen.h"
+#include "radeon_common.h"
+#if defined(RADEON_R100)
+#include "radeon_context.h"
+#include "radeon_tex.h"
+#elif defined(RADEON_R200)
+#include "r200_context.h"
+#include "r200_ioctl.h"
+#include "r200_tex.h"
+#elif defined(RADEON_R300)
+#include "r300_context.h"
+#include "r300_tex.h"
+#elif defined(RADEON_R600)
+#include "r600_context.h"
+#include "r700_driconf.h" /* +r6/r7 */
+#include "r600_tex.h"     /* +r6/r7 */
+#endif
+
+#include "utils.h"
+#include "vblank.h"
+
+#include "radeon_bocs_wrapper.h"
+
+#include "GL/internal/dri_interface.h"
+
+/* Radeon configuration
+ */
+#include "xmlpool.h"
+
+#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Size of command buffer (in KB)") \
+        DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \
+DRI_CONF_OPT_END
+
+#if defined(RADEON_R100)	/* R100 */
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
+        DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+        DRI_CONF_MAX_TEXTURE_UNITS(3,2,3)
+        DRI_CONF_HYPERZ(false)
+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+        DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0")
+        DRI_CONF_NO_NEG_LOD_BIAS(false)
+        DRI_CONF_FORCE_S3TC_ENABLE(false)
+        DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+        DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
+        DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF)
+        DRI_CONF_ALLOW_LARGE_TEXTURES(2)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 15;
+
+#elif defined(RADEON_R200)
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
+        DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+        DRI_CONF_MAX_TEXTURE_UNITS(6,2,6)
+        DRI_CONF_HYPERZ(false)
+        DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+        DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0")
+        DRI_CONF_NO_NEG_LOD_BIAS(false)
+        DRI_CONF_FORCE_S3TC_ENABLE(false)
+        DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+        DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
+        DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF)
+        DRI_CONF_ALLOW_LARGE_TEXTURES(2)
+        DRI_CONF_TEXTURE_BLEND_QUALITY(1.0,"0.0:1.0")
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_SOFTWARE
+        DRI_CONF_NV_VERTEX_PROGRAM(false)
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 17;
+
+#elif defined(RADEON_R300) || defined(RADEON_R600)
+
+#define DRI_CONF_FP_OPTIMIZATION_SPEED   0
+#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
+
+/* TODO: integrate these into xmlpool.h! */
+#define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(texture_image_units,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Number of texture image units") \
+        DRI_CONF_DESC(de,"Anzahl der Textureinheiten") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_MAX_TEXTURE_COORD_UNITS(def,min,max) \
+DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \
+        DRI_CONF_DESC(en,"Number of texture coordinate units") \
+        DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \
+DRI_CONF_OPT_END
+
+
+
+#define DRI_CONF_DISABLE_S3TC(def) \
+DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \
+        DRI_CONF_DESC(en,"Disable S3TC compression") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_DISABLE_FALLBACK(def) \
+DRI_CONF_OPT_BEGIN(disable_lowimpact_fallback,bool,def) \
+        DRI_CONF_DESC(en,"Disable Low-impact fallback") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_DISABLE_DOUBLE_SIDE_STENCIL(def) \
+DRI_CONF_OPT_BEGIN(disable_stencil_two_side,bool,def) \
+        DRI_CONF_DESC(en,"Disable GL_EXT_stencil_two_side") \
+DRI_CONF_OPT_END
+
+#define DRI_CONF_FP_OPTIMIZATION(def) \
+DRI_CONF_OPT_BEGIN_V(fp_optimization,enum,def,"0:1") \
+	DRI_CONF_DESC_BEGIN(en,"Fragment Program optimization") \
+                DRI_CONF_ENUM(0,"Optimize for Speed") \
+                DRI_CONF_ENUM(1,"Optimize for Quality") \
+        DRI_CONF_DESC_END \
+DRI_CONF_OPT_END
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+	DRI_CONF_SECTION_PERFORMANCE
+		DRI_CONF_TCL_MODE(DRI_CONF_TCL_CODEGEN)
+		DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS)
+		DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+		DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(8, 2, 8)
+		DRI_CONF_MAX_TEXTURE_COORD_UNITS(8, 2, 8)
+		DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32)
+		DRI_CONF_DISABLE_FALLBACK(true)
+		DRI_CONF_DISABLE_DOUBLE_SIDE_STENCIL(false)
+	DRI_CONF_SECTION_END
+	DRI_CONF_SECTION_QUALITY
+		DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+		DRI_CONF_DEF_MAX_ANISOTROPY(1.0, "1.0,2.0,4.0,8.0,16.0")
+		DRI_CONF_FORCE_S3TC_ENABLE(false)
+		DRI_CONF_DISABLE_S3TC(false)
+		DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+		DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC)
+		DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF)
+		DRI_CONF_FP_OPTIMIZATION(DRI_CONF_FP_OPTIMIZATION_SPEED)
+	DRI_CONF_SECTION_END
+	DRI_CONF_SECTION_DEBUG
+		DRI_CONF_NO_RAST(false)
+	DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 17;
+
+#endif
+
+static int getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo );
+
+static int
+radeonGetParam(__DRIscreen *sPriv, int param, void *value)
+{
+  int ret;
+  drm_radeon_getparam_t gp = { 0 };
+  struct drm_radeon_info info = { 0 };
+
+  if (sPriv->drm_version.major >= 2) {
+      info.value = (uint64_t)(uintptr_t)value;
+      switch (param) {
+      case RADEON_PARAM_DEVICE_ID:
+          info.request = RADEON_INFO_DEVICE_ID;
+          break;
+      case RADEON_PARAM_NUM_GB_PIPES:
+          info.request = RADEON_INFO_NUM_GB_PIPES;
+          break;
+      case RADEON_PARAM_NUM_Z_PIPES:
+          info.request = RADEON_INFO_NUM_Z_PIPES;
+          break;
+      default:
+          return -EINVAL;
+      }
+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info));
+  } else {
+      gp.param = param;
+      gp.value = value;
+
+      ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp));
+  }
+  return ret;
+}
+
+static const __DRIconfig **
+radeonFillInModes( __DRIscreen *psp,
+		   unsigned pixel_bits, unsigned depth_bits,
+		   unsigned stencil_bits, GLboolean have_back_buffer )
+{
+    __DRIconfig **configs;
+    __GLcontextModes *m;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    int i;
+
+    /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+     * enough to add support.  Basically, if a context is created with an
+     * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+     * will never be used.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */
+    };
+
+    uint8_t depth_bits_array[2];
+    uint8_t stencil_bits_array[2];
+    uint8_t msaa_samples_array[1];
+
+    depth_bits_array[0] = depth_bits;
+    depth_bits_array[1] = depth_bits;
+
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.  It will be a sw fallback, but some apps won't
+     * care about that.
+     */
+    stencil_bits_array[0] = stencil_bits;
+    stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    msaa_samples_array[0] = 0;
+
+    depth_buffer_factor = (stencil_bits == 0) ? 2 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+
+    if (pixel_bits == 16) {
+	__DRIconfig **configs_a8r8g8b8;
+	__DRIconfig **configs_r5g6b5;
+
+	configs_r5g6b5 = driCreateConfigs(GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
+					  depth_bits_array, stencil_bits_array,
+					  depth_buffer_factor, back_buffer_modes,
+					  back_buffer_factor, msaa_samples_array,
+					  1, GL_TRUE);
+	configs_a8r8g8b8 = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
+					    depth_bits_array, stencil_bits_array,
+					    1, back_buffer_modes, 1,
+					    msaa_samples_array, 1, GL_TRUE);
+	configs = driConcatConfigs(configs_r5g6b5, configs_a8r8g8b8);
+   } else
+	configs = driCreateConfigs(GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
+				   depth_bits_array, stencil_bits_array,
+				   depth_buffer_factor,
+				   back_buffer_modes, back_buffer_factor,
+				   msaa_samples_array, 1, GL_TRUE);
+
+    if (configs == NULL) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+    }
+
+    /* Mark the visual as slow if there are "fake" stencil bits.
+     */
+    for (i = 0; configs[i]; i++) {
+	m = &configs[i]->modes;
+	if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+	    m->visualRating = GLX_SLOW_CONFIG;
+	}
+    }
+
+    return (const __DRIconfig **) configs;
+}
+
+#if defined(RADEON_R100)
+static const __DRItexOffsetExtension radeonTexOffsetExtension = {
+    { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+    radeonSetTexOffset,
+};
+
+static const __DRItexBufferExtension radeonTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   radeonSetTexBuffer,
+   radeonSetTexBuffer2,
+};
+#endif
+
+#if defined(RADEON_R200)
+static const __DRIallocateExtension r200AllocateExtension = {
+    { __DRI_ALLOCATE, __DRI_ALLOCATE_VERSION },
+    r200AllocateMemoryMESA,
+    r200FreeMemoryMESA,
+    r200GetMemoryOffsetMESA
+};
+
+static const __DRItexOffsetExtension r200texOffsetExtension = {
+    { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+   r200SetTexOffset,
+};
+
+static const __DRItexBufferExtension r200TexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   r200SetTexBuffer,
+   r200SetTexBuffer2,
+};
+#endif
+
+#if defined(RADEON_R300)
+static const __DRItexOffsetExtension r300texOffsetExtension = {
+    { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+   r300SetTexOffset,
+};
+
+static const __DRItexBufferExtension r300TexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   r300SetTexBuffer,
+   r300SetTexBuffer2,
+};
+#endif
+
+#if defined(RADEON_R600)
+static const __DRItexOffsetExtension r600texOffsetExtension = {
+    { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
+   r600SetTexOffset, /* +r6/r7 */
+};
+
+static const __DRItexBufferExtension r600TexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+   r600SetTexBuffer,  /* +r6/r7 */
+   r600SetTexBuffer2, /* +r6/r7 */
+};
+#endif
+
+static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
+{
+   screen->device_id = device_id;
+   screen->chip_flags = 0;
+   switch ( device_id ) {
+   case PCI_CHIP_RN50_515E:
+   case PCI_CHIP_RN50_5969:
+	return -1;
+
+   case PCI_CHIP_RADEON_LY:
+   case PCI_CHIP_RADEON_LZ:
+   case PCI_CHIP_RADEON_QY:
+   case PCI_CHIP_RADEON_QZ:
+      screen->chip_family = CHIP_FAMILY_RV100;
+      break;
+
+   case PCI_CHIP_RS100_4136:
+   case PCI_CHIP_RS100_4336:
+      screen->chip_family = CHIP_FAMILY_RS100;
+      break;
+
+   case PCI_CHIP_RS200_4137:
+   case PCI_CHIP_RS200_4337:
+   case PCI_CHIP_RS250_4237:
+   case PCI_CHIP_RS250_4437:
+      screen->chip_family = CHIP_FAMILY_RS200;
+      break;
+
+   case PCI_CHIP_RADEON_QD:
+   case PCI_CHIP_RADEON_QE:
+   case PCI_CHIP_RADEON_QF:
+   case PCI_CHIP_RADEON_QG:
+      /* all original radeons (7200) presumably have a stencil op bug */
+      screen->chip_family = CHIP_FAMILY_R100;
+      screen->chip_flags = RADEON_CHIPSET_TCL | RADEON_CHIPSET_BROKEN_STENCIL;
+      break;
+
+   case PCI_CHIP_RV200_QW:
+   case PCI_CHIP_RV200_QX:
+   case PCI_CHIP_RADEON_LW:
+   case PCI_CHIP_RADEON_LX:
+      screen->chip_family = CHIP_FAMILY_RV200;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_R200_BB:
+   case PCI_CHIP_R200_BC:
+   case PCI_CHIP_R200_QH:
+   case PCI_CHIP_R200_QL:
+   case PCI_CHIP_R200_QM:
+      screen->chip_family = CHIP_FAMILY_R200;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV250_If:
+   case PCI_CHIP_RV250_Ig:
+   case PCI_CHIP_RV250_Ld:
+   case PCI_CHIP_RV250_Lf:
+   case PCI_CHIP_RV250_Lg:
+      screen->chip_family = CHIP_FAMILY_RV250;
+      screen->chip_flags = R200_CHIPSET_YCBCR_BROKEN | RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV280_5960:
+   case PCI_CHIP_RV280_5961:
+   case PCI_CHIP_RV280_5962:
+   case PCI_CHIP_RV280_5964:
+   case PCI_CHIP_RV280_5965:
+   case PCI_CHIP_RV280_5C61:
+   case PCI_CHIP_RV280_5C63:
+      screen->chip_family = CHIP_FAMILY_RV280;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RS300_5834:
+   case PCI_CHIP_RS300_5835:
+   case PCI_CHIP_RS350_7834:
+   case PCI_CHIP_RS350_7835:
+      screen->chip_family = CHIP_FAMILY_RS300;
+      break;
+
+   case PCI_CHIP_R300_AD:
+   case PCI_CHIP_R300_AE:
+   case PCI_CHIP_R300_AF:
+   case PCI_CHIP_R300_AG:
+   case PCI_CHIP_R300_ND:
+   case PCI_CHIP_R300_NE:
+   case PCI_CHIP_R300_NF:
+   case PCI_CHIP_R300_NG:
+      screen->chip_family = CHIP_FAMILY_R300;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV350_AP:
+   case PCI_CHIP_RV350_AQ:
+   case PCI_CHIP_RV350_AR:
+   case PCI_CHIP_RV350_AS:
+   case PCI_CHIP_RV350_AT:
+   case PCI_CHIP_RV350_AV:
+   case PCI_CHIP_RV350_AU:
+   case PCI_CHIP_RV350_NP:
+   case PCI_CHIP_RV350_NQ:
+   case PCI_CHIP_RV350_NR:
+   case PCI_CHIP_RV350_NS:
+   case PCI_CHIP_RV350_NT:
+   case PCI_CHIP_RV350_NV:
+      screen->chip_family = CHIP_FAMILY_RV350;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_R350_AH:
+   case PCI_CHIP_R350_AI:
+   case PCI_CHIP_R350_AJ:
+   case PCI_CHIP_R350_AK:
+   case PCI_CHIP_R350_NH:
+   case PCI_CHIP_R350_NI:
+   case PCI_CHIP_R360_NJ:
+   case PCI_CHIP_R350_NK:
+      screen->chip_family = CHIP_FAMILY_R350;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV370_5460:
+   case PCI_CHIP_RV370_5462:
+   case PCI_CHIP_RV370_5464:
+   case PCI_CHIP_RV370_5B60:
+   case PCI_CHIP_RV370_5B62:
+   case PCI_CHIP_RV370_5B63:
+   case PCI_CHIP_RV370_5B64:
+   case PCI_CHIP_RV370_5B65:
+   case PCI_CHIP_RV380_3150:
+   case PCI_CHIP_RV380_3152:
+   case PCI_CHIP_RV380_3154:
+   case PCI_CHIP_RV380_3155:
+   case PCI_CHIP_RV380_3E50:
+   case PCI_CHIP_RV380_3E54:
+      screen->chip_family = CHIP_FAMILY_RV380;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_R420_JN:
+   case PCI_CHIP_R420_JH:
+   case PCI_CHIP_R420_JI:
+   case PCI_CHIP_R420_JJ:
+   case PCI_CHIP_R420_JK:
+   case PCI_CHIP_R420_JL:
+   case PCI_CHIP_R420_JM:
+   case PCI_CHIP_R420_JO:
+   case PCI_CHIP_R420_JP:
+   case PCI_CHIP_R420_JT:
+   case PCI_CHIP_R481_4B49:
+   case PCI_CHIP_R481_4B4A:
+   case PCI_CHIP_R481_4B4B:
+   case PCI_CHIP_R481_4B4C:
+   case PCI_CHIP_R423_UH:
+   case PCI_CHIP_R423_UI:
+   case PCI_CHIP_R423_UJ:
+   case PCI_CHIP_R423_UK:
+   case PCI_CHIP_R430_554C:
+   case PCI_CHIP_R430_554D:
+   case PCI_CHIP_R430_554E:
+   case PCI_CHIP_R430_554F:
+   case PCI_CHIP_R423_5550:
+   case PCI_CHIP_R423_UQ:
+   case PCI_CHIP_R423_UR:
+   case PCI_CHIP_R423_UT:
+   case PCI_CHIP_R430_5D48:
+   case PCI_CHIP_R430_5D49:
+   case PCI_CHIP_R430_5D4A:
+   case PCI_CHIP_R480_5D4C:
+   case PCI_CHIP_R480_5D4D:
+   case PCI_CHIP_R480_5D4E:
+   case PCI_CHIP_R480_5D4F:
+   case PCI_CHIP_R480_5D50:
+   case PCI_CHIP_R480_5D52:
+   case PCI_CHIP_R423_5D57:
+      screen->chip_family = CHIP_FAMILY_R420;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV410_5E4C:
+   case PCI_CHIP_RV410_5E4F:
+   case PCI_CHIP_RV410_564A:
+   case PCI_CHIP_RV410_564B:
+   case PCI_CHIP_RV410_564F:
+   case PCI_CHIP_RV410_5652:
+   case PCI_CHIP_RV410_5653:
+   case PCI_CHIP_RV410_5657:
+   case PCI_CHIP_RV410_5E48:
+   case PCI_CHIP_RV410_5E4A:
+   case PCI_CHIP_RV410_5E4B:
+   case PCI_CHIP_RV410_5E4D:
+      screen->chip_family = CHIP_FAMILY_RV410;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RS480_5954:
+   case PCI_CHIP_RS480_5955:
+   case PCI_CHIP_RS482_5974:
+   case PCI_CHIP_RS482_5975:
+   case PCI_CHIP_RS400_5A41:
+   case PCI_CHIP_RS400_5A42:
+   case PCI_CHIP_RC410_5A61:
+   case PCI_CHIP_RC410_5A62:
+      screen->chip_family = CHIP_FAMILY_RS400;
+      break;
+
+   case PCI_CHIP_RS600_793F:
+   case PCI_CHIP_RS600_7941:
+   case PCI_CHIP_RS600_7942:
+      screen->chip_family = CHIP_FAMILY_RS600;
+      break;
+
+   case PCI_CHIP_RS690_791E:
+   case PCI_CHIP_RS690_791F:
+      screen->chip_family = CHIP_FAMILY_RS690;
+      break;
+   case PCI_CHIP_RS740_796C:
+   case PCI_CHIP_RS740_796D:
+   case PCI_CHIP_RS740_796E:
+   case PCI_CHIP_RS740_796F:
+      screen->chip_family = CHIP_FAMILY_RS740;
+      break;
+
+   case PCI_CHIP_R520_7100:
+   case PCI_CHIP_R520_7101:
+   case PCI_CHIP_R520_7102:
+   case PCI_CHIP_R520_7103:
+   case PCI_CHIP_R520_7104:
+   case PCI_CHIP_R520_7105:
+   case PCI_CHIP_R520_7106:
+   case PCI_CHIP_R520_7108:
+   case PCI_CHIP_R520_7109:
+   case PCI_CHIP_R520_710A:
+   case PCI_CHIP_R520_710B:
+   case PCI_CHIP_R520_710C:
+   case PCI_CHIP_R520_710E:
+   case PCI_CHIP_R520_710F:
+      screen->chip_family = CHIP_FAMILY_R520;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV515_7140:
+   case PCI_CHIP_RV515_7141:
+   case PCI_CHIP_RV515_7142:
+   case PCI_CHIP_RV515_7143:
+   case PCI_CHIP_RV515_7144:
+   case PCI_CHIP_RV515_7145:
+   case PCI_CHIP_RV515_7146:
+   case PCI_CHIP_RV515_7147:
+   case PCI_CHIP_RV515_7149:
+   case PCI_CHIP_RV515_714A:
+   case PCI_CHIP_RV515_714B:
+   case PCI_CHIP_RV515_714C:
+   case PCI_CHIP_RV515_714D:
+   case PCI_CHIP_RV515_714E:
+   case PCI_CHIP_RV515_714F:
+   case PCI_CHIP_RV515_7151:
+   case PCI_CHIP_RV515_7152:
+   case PCI_CHIP_RV515_7153:
+   case PCI_CHIP_RV515_715E:
+   case PCI_CHIP_RV515_715F:
+   case PCI_CHIP_RV515_7180:
+   case PCI_CHIP_RV515_7181:
+   case PCI_CHIP_RV515_7183:
+   case PCI_CHIP_RV515_7186:
+   case PCI_CHIP_RV515_7187:
+   case PCI_CHIP_RV515_7188:
+   case PCI_CHIP_RV515_718A:
+   case PCI_CHIP_RV515_718B:
+   case PCI_CHIP_RV515_718C:
+   case PCI_CHIP_RV515_718D:
+   case PCI_CHIP_RV515_718F:
+   case PCI_CHIP_RV515_7193:
+   case PCI_CHIP_RV515_7196:
+   case PCI_CHIP_RV515_719B:
+   case PCI_CHIP_RV515_719F:
+   case PCI_CHIP_RV515_7200:
+   case PCI_CHIP_RV515_7210:
+   case PCI_CHIP_RV515_7211:
+      screen->chip_family = CHIP_FAMILY_RV515;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV530_71C0:
+   case PCI_CHIP_RV530_71C1:
+   case PCI_CHIP_RV530_71C2:
+   case PCI_CHIP_RV530_71C3:
+   case PCI_CHIP_RV530_71C4:
+   case PCI_CHIP_RV530_71C5:
+   case PCI_CHIP_RV530_71C6:
+   case PCI_CHIP_RV530_71C7:
+   case PCI_CHIP_RV530_71CD:
+   case PCI_CHIP_RV530_71CE:
+   case PCI_CHIP_RV530_71D2:
+   case PCI_CHIP_RV530_71D4:
+   case PCI_CHIP_RV530_71D5:
+   case PCI_CHIP_RV530_71D6:
+   case PCI_CHIP_RV530_71DA:
+   case PCI_CHIP_RV530_71DE:
+      screen->chip_family = CHIP_FAMILY_RV530;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_R580_7240:
+   case PCI_CHIP_R580_7243:
+   case PCI_CHIP_R580_7244:
+   case PCI_CHIP_R580_7245:
+   case PCI_CHIP_R580_7246:
+   case PCI_CHIP_R580_7247:
+   case PCI_CHIP_R580_7248:
+   case PCI_CHIP_R580_7249:
+   case PCI_CHIP_R580_724A:
+   case PCI_CHIP_R580_724B:
+   case PCI_CHIP_R580_724C:
+   case PCI_CHIP_R580_724D:
+   case PCI_CHIP_R580_724E:
+   case PCI_CHIP_R580_724F:
+   case PCI_CHIP_R580_7284:
+      screen->chip_family = CHIP_FAMILY_R580;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV570_7280:
+   case PCI_CHIP_RV560_7281:
+   case PCI_CHIP_RV560_7283:
+   case PCI_CHIP_RV560_7287:
+   case PCI_CHIP_RV570_7288:
+   case PCI_CHIP_RV570_7289:
+   case PCI_CHIP_RV570_728B:
+   case PCI_CHIP_RV570_728C:
+   case PCI_CHIP_RV560_7290:
+   case PCI_CHIP_RV560_7291:
+   case PCI_CHIP_RV560_7293:
+   case PCI_CHIP_RV560_7297:
+      screen->chip_family = CHIP_FAMILY_RV560;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_R600_9400:
+   case PCI_CHIP_R600_9401:
+   case PCI_CHIP_R600_9402:
+   case PCI_CHIP_R600_9403:
+   case PCI_CHIP_R600_9405:
+   case PCI_CHIP_R600_940A:
+   case PCI_CHIP_R600_940B:
+   case PCI_CHIP_R600_940F:
+      screen->chip_family = CHIP_FAMILY_R600;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV610_94C0:
+   case PCI_CHIP_RV610_94C1:
+   case PCI_CHIP_RV610_94C3:
+   case PCI_CHIP_RV610_94C4:
+   case PCI_CHIP_RV610_94C5:
+   case PCI_CHIP_RV610_94C6:
+   case PCI_CHIP_RV610_94C7:
+   case PCI_CHIP_RV610_94C8:
+   case PCI_CHIP_RV610_94C9:
+   case PCI_CHIP_RV610_94CB:
+   case PCI_CHIP_RV610_94CC:
+   case PCI_CHIP_RV610_94CD:
+      screen->chip_family = CHIP_FAMILY_RV610;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV630_9580:
+   case PCI_CHIP_RV630_9581:
+   case PCI_CHIP_RV630_9583:
+   case PCI_CHIP_RV630_9586:
+   case PCI_CHIP_RV630_9587:
+   case PCI_CHIP_RV630_9588:
+   case PCI_CHIP_RV630_9589:
+   case PCI_CHIP_RV630_958A:
+   case PCI_CHIP_RV630_958B:
+   case PCI_CHIP_RV630_958C:
+   case PCI_CHIP_RV630_958D:
+   case PCI_CHIP_RV630_958E:
+   case PCI_CHIP_RV630_958F:
+      screen->chip_family = CHIP_FAMILY_RV630;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV670_9500:
+   case PCI_CHIP_RV670_9501:
+   case PCI_CHIP_RV670_9504:
+   case PCI_CHIP_RV670_9505:
+   case PCI_CHIP_RV670_9506:
+   case PCI_CHIP_RV670_9507:
+   case PCI_CHIP_RV670_9508:
+   case PCI_CHIP_RV670_9509:
+   case PCI_CHIP_RV670_950F:
+   case PCI_CHIP_RV670_9511:
+   case PCI_CHIP_RV670_9515:
+   case PCI_CHIP_RV670_9517:
+   case PCI_CHIP_RV670_9519:
+      screen->chip_family = CHIP_FAMILY_RV670;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV620_95C0:
+   case PCI_CHIP_RV620_95C2:
+   case PCI_CHIP_RV620_95C4:
+   case PCI_CHIP_RV620_95C5:
+   case PCI_CHIP_RV620_95C6:
+   case PCI_CHIP_RV620_95C7:
+   case PCI_CHIP_RV620_95C9:
+   case PCI_CHIP_RV620_95CC:
+   case PCI_CHIP_RV620_95CD:
+   case PCI_CHIP_RV620_95CE:
+   case PCI_CHIP_RV620_95CF:
+      screen->chip_family = CHIP_FAMILY_RV620;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV635_9590:
+   case PCI_CHIP_RV635_9591:
+   case PCI_CHIP_RV635_9593:
+   case PCI_CHIP_RV635_9595:
+   case PCI_CHIP_RV635_9596:
+   case PCI_CHIP_RV635_9597:
+   case PCI_CHIP_RV635_9598:
+   case PCI_CHIP_RV635_9599:
+   case PCI_CHIP_RV635_959B:
+      screen->chip_family = CHIP_FAMILY_RV635;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RS780_9610:
+   case PCI_CHIP_RS780_9611:
+   case PCI_CHIP_RS780_9612:
+   case PCI_CHIP_RS780_9613:
+   case PCI_CHIP_RS780_9614:
+   case PCI_CHIP_RS780_9615:
+   case PCI_CHIP_RS780_9616:
+      screen->chip_family = CHIP_FAMILY_RS780;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+   case PCI_CHIP_RS880_9710:
+   case PCI_CHIP_RS880_9711:
+   case PCI_CHIP_RS880_9712:
+   case PCI_CHIP_RS880_9713:
+   case PCI_CHIP_RS880_9714:
+   case PCI_CHIP_RS880_9715:
+      screen->chip_family = CHIP_FAMILY_RS880;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV770_9440:
+   case PCI_CHIP_RV770_9441:
+   case PCI_CHIP_RV770_9442:
+   case PCI_CHIP_RV770_9443:
+   case PCI_CHIP_RV770_9444:
+   case PCI_CHIP_RV770_9446:
+   case PCI_CHIP_RV770_944A:
+   case PCI_CHIP_RV770_944B:
+   case PCI_CHIP_RV770_944C:
+   case PCI_CHIP_RV770_944E:
+   case PCI_CHIP_RV770_9450:
+   case PCI_CHIP_RV770_9452:
+   case PCI_CHIP_RV770_9456:
+   case PCI_CHIP_RV770_945A:
+   case PCI_CHIP_RV770_945B:
+   case PCI_CHIP_RV770_945E:
+   case PCI_CHIP_RV790_9460:
+   case PCI_CHIP_RV790_9462:
+   case PCI_CHIP_RV770_946A:
+   case PCI_CHIP_RV770_946B:
+   case PCI_CHIP_RV770_947A:
+   case PCI_CHIP_RV770_947B:
+      screen->chip_family = CHIP_FAMILY_RV770;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV730_9480:
+   case PCI_CHIP_RV730_9487:
+   case PCI_CHIP_RV730_9488:
+   case PCI_CHIP_RV730_9489:
+   case PCI_CHIP_RV730_948A:
+   case PCI_CHIP_RV730_948F:
+   case PCI_CHIP_RV730_9490:
+   case PCI_CHIP_RV730_9491:
+   case PCI_CHIP_RV730_9495:
+   case PCI_CHIP_RV730_9498:
+   case PCI_CHIP_RV730_949C:
+   case PCI_CHIP_RV730_949E:
+   case PCI_CHIP_RV730_949F:
+      screen->chip_family = CHIP_FAMILY_RV730;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV710_9540:
+   case PCI_CHIP_RV710_9541:
+   case PCI_CHIP_RV710_9542:
+   case PCI_CHIP_RV710_954E:
+   case PCI_CHIP_RV710_954F:
+   case PCI_CHIP_RV710_9552:
+   case PCI_CHIP_RV710_9553:
+   case PCI_CHIP_RV710_9555:
+   case PCI_CHIP_RV710_9557:
+   case PCI_CHIP_RV710_955F:
+      screen->chip_family = CHIP_FAMILY_RV710;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   case PCI_CHIP_RV740_94A0:
+   case PCI_CHIP_RV740_94A1:
+   case PCI_CHIP_RV740_94A3:
+   case PCI_CHIP_RV740_94B1:
+   case PCI_CHIP_RV740_94B3:
+   case PCI_CHIP_RV740_94B4:
+   case PCI_CHIP_RV740_94B5:
+   case PCI_CHIP_RV740_94B9:
+      screen->chip_family = CHIP_FAMILY_RV740;
+      screen->chip_flags = RADEON_CHIPSET_TCL;
+      break;
+
+   default:
+      fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
+	      device_id);
+      return -1;
+   }
+
+   return 0;
+}
+
+
+/* Create the device specific screen private data struct.
+ */
+static radeonScreenPtr
+radeonCreateScreen( __DRIscreen *sPriv )
+{
+   radeonScreenPtr screen;
+   RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv;
+   unsigned char *RADEONMMIO = NULL;
+   int i;
+   int ret;
+   uint32_t temp = 0;
+
+   if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(RADEONDRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+   if ( !screen ) {
+      __driUtilMessage("%s: Could not allocate memory for screen structure",
+		       __FUNCTION__);
+      return NULL;
+   }
+
+   radeon_init_debug();
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&screen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   /* This is first since which regions we map depends on whether or
+    * not we are using a PCI card.
+    */
+   screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP);
+   {
+      int ret;
+
+      ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BUFFER_OFFSET,
+			    &screen->gart_buffer_offset);
+
+      if (ret) {
+	 FREE( screen );
+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret);
+	 return NULL;
+      }
+
+      ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BASE,
+			    &screen->gart_base);
+      if (ret) {
+	 FREE( screen );
+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret);
+	 return NULL;
+      }
+
+      ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR,
+			    &screen->irq);
+      if (ret) {
+	 FREE( screen );
+	 fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret);
+	 return NULL;
+      }
+      screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7);
+      screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11);
+      screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16);
+      screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18);
+      screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13);
+      screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15);
+      screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25);
+      screen->drmSupportsOcclusionQueries = (sPriv->drm_version.minor >= 30);
+   }
+
+   ret = radeon_set_screen_flags(screen, dri_priv->deviceID);
+   if (ret == -1)
+     return NULL;
+
+   screen->mmio.handle = dri_priv->registerHandle;
+   screen->mmio.size   = dri_priv->registerSize;
+   if ( drmMap( sPriv->fd,
+		screen->mmio.handle,
+		screen->mmio.size,
+		&screen->mmio.map ) ) {
+     FREE( screen );
+     __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ );
+     return NULL;
+   }
+
+   RADEONMMIO = screen->mmio.map;
+
+   screen->status.handle = dri_priv->statusHandle;
+   screen->status.size   = dri_priv->statusSize;
+   if ( drmMap( sPriv->fd,
+		screen->status.handle,
+		screen->status.size,
+		&screen->status.map ) ) {
+     drmUnmap( screen->mmio.map, screen->mmio.size );
+     FREE( screen );
+     __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ );
+     return NULL;
+   }
+   if (screen->chip_family < CHIP_FAMILY_R600)
+	   screen->scratch = (__volatile__ uint32_t *)
+		   ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET);
+   else
+	   screen->scratch = (__volatile__ uint32_t *)
+		   ((GLubyte *)screen->status.map + R600_SCRATCH_REG_OFFSET);
+
+   screen->buffers = drmMapBufs( sPriv->fd );
+   if ( !screen->buffers ) {
+     drmUnmap( screen->status.map, screen->status.size );
+     drmUnmap( screen->mmio.map, screen->mmio.size );
+     FREE( screen );
+     __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ );
+     return NULL;
+   }
+
+   if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) {
+     screen->gartTextures.handle = dri_priv->gartTexHandle;
+     screen->gartTextures.size   = dri_priv->gartTexMapSize;
+     if ( drmMap( sPriv->fd,
+		  screen->gartTextures.handle,
+		  screen->gartTextures.size,
+		  (drmAddressPtr)&screen->gartTextures.map ) ) {
+       drmUnmapBufs( screen->buffers );
+       drmUnmap( screen->status.map, screen->status.size );
+       drmUnmap( screen->mmio.map, screen->mmio.size );
+       FREE( screen );
+       __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__);
+       return NULL;
+    }
+
+     screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base;
+   }
+
+   if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) &&
+       sPriv->ddx_version.minor < 2) {
+      fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n");
+      return NULL;
+   }
+
+   if ((sPriv->drm_version.minor < 29) && (screen->chip_family >= CHIP_FAMILY_RV515)) {
+      fprintf(stderr, "R500 support requires a newer drm.\n");
+      return NULL;
+   }
+
+   if (getenv("R300_NO_TCL"))
+	   screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+
+   if (screen->chip_family <= CHIP_FAMILY_RS200)
+	   screen->chip_flags |= RADEON_CLASS_R100;
+   else if (screen->chip_family <= CHIP_FAMILY_RV280)
+	   screen->chip_flags |= RADEON_CLASS_R200;
+   else if (screen->chip_family <= CHIP_FAMILY_RV570)
+	   screen->chip_flags |= RADEON_CLASS_R300;
+   else
+	   screen->chip_flags |= RADEON_CLASS_R600;
+
+   screen->cpp = dri_priv->bpp / 8;
+   screen->AGPMode = dri_priv->AGPMode;
+
+   ret = radeonGetParam(sPriv, RADEON_PARAM_FB_LOCATION, &temp);
+
+   /* +r6/r7 */
+   if(screen->chip_family >= CHIP_FAMILY_R600)
+   {
+       if (ret)
+       {
+            FREE( screen );
+            fprintf(stderr, "Unable to get fb location need newer drm\n");
+            return NULL;
+       }
+       else
+       {
+            screen->fbLocation = (temp & 0xffff) << 24;
+       }
+   }
+   else
+   {
+        if (ret)
+        {
+            if (screen->chip_family < CHIP_FAMILY_RS600 && !screen->kernel_mm)
+	            screen->fbLocation      = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16;
+            else
+            {
+                FREE( screen );
+                fprintf(stderr, "Unable to get fb location need newer drm\n");
+                return NULL;
+            }
+        }
+        else
+        {
+            screen->fbLocation = (temp & 0xffff) << 16;
+        }
+   }
+
+   if (IS_R300_CLASS(screen)) {
+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
+       if (ret) {
+	   fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
+	   switch (screen->chip_family) {
+	   case CHIP_FAMILY_R300:
+	   case CHIP_FAMILY_R350:
+	       screen->num_gb_pipes = 2;
+	       break;
+	   case CHIP_FAMILY_R420:
+	   case CHIP_FAMILY_R520:
+	   case CHIP_FAMILY_R580:
+	   case CHIP_FAMILY_RV560:
+	   case CHIP_FAMILY_RV570:
+	       screen->num_gb_pipes = 4;
+	       break;
+	   case CHIP_FAMILY_RV350:
+	   case CHIP_FAMILY_RV515:
+	   case CHIP_FAMILY_RV530:
+	   case CHIP_FAMILY_RV410:
+	   default:
+	       screen->num_gb_pipes = 1;
+	       break;
+	   }
+       } else {
+	   screen->num_gb_pipes = temp;
+       }
+
+       /* pipe overrides */
+       switch (dri_priv->deviceID) {
+       case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
+       case PCI_CHIP_R350_AH: /* 9800 SE only have 1 quadpipe */
+       case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */
+       case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */
+	   screen->num_gb_pipes = 1;
+	   break;
+       default:
+	   break;
+       }
+
+       if ( sPriv->drm_version.minor >= 31 ) {
+	       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp);
+	       if (ret)
+		       screen->num_z_pipes = 2;
+	       else
+		       screen->num_z_pipes = temp;
+       } else
+	       screen->num_z_pipes = 2;
+   }
+
+   if ( sPriv->drm_version.minor >= 10 ) {
+      drm_radeon_setparam_t sp;
+
+      sp.param = RADEON_SETPARAM_FB_LOCATION;
+      sp.value = screen->fbLocation;
+
+      drmCommandWrite( sPriv->fd, DRM_RADEON_SETPARAM,
+		       &sp, sizeof( sp ) );
+   }
+
+   screen->frontOffset	= dri_priv->frontOffset;
+   screen->frontPitch	= dri_priv->frontPitch;
+   screen->backOffset	= dri_priv->backOffset;
+   screen->backPitch	= dri_priv->backPitch;
+   screen->depthOffset	= dri_priv->depthOffset;
+   screen->depthPitch	= dri_priv->depthPitch;
+
+   /* Check if ddx has set up a surface reg to cover depth buffer */
+   screen->depthHasSurface = (sPriv->ddx_version.major > 4) ||
+      /* these chips don't use tiled z without hyperz. So always pretend
+         we have set up a surface which will cause linear reads/writes */
+      (IS_R100_CLASS(screen) &&
+      !(screen->chip_flags & RADEON_CHIPSET_TCL));
+
+   if ( dri_priv->textureSize == 0 ) {
+      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = screen->gart_texture_offset;
+      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->gartTexMapSize;
+      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
+	 dri_priv->log2GARTTexGran;
+   } else {
+      screen->texOffset[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureOffset
+				               + screen->fbLocation;
+      screen->texSize[RADEON_LOCAL_TEX_HEAP] = dri_priv->textureSize;
+      screen->logTexGranularity[RADEON_LOCAL_TEX_HEAP] =
+	 dri_priv->log2TexGran;
+   }
+
+   if ( !screen->gartTextures.map || dri_priv->textureSize == 0
+	|| getenv( "RADEON_GARTTEXTURING_FORCE_DISABLE" ) ) {
+      screen->numTexHeaps = RADEON_NR_TEX_HEAPS - 1;
+      screen->texOffset[RADEON_GART_TEX_HEAP] = 0;
+      screen->texSize[RADEON_GART_TEX_HEAP] = 0;
+      screen->logTexGranularity[RADEON_GART_TEX_HEAP] = 0;
+   } else {
+      screen->numTexHeaps = RADEON_NR_TEX_HEAPS;
+      screen->texOffset[RADEON_GART_TEX_HEAP] = screen->gart_texture_offset;
+      screen->texSize[RADEON_GART_TEX_HEAP] = dri_priv->gartTexMapSize;
+      screen->logTexGranularity[RADEON_GART_TEX_HEAP] =
+	 dri_priv->log2GARTTexGran;
+   }
+
+   i = 0;
+   screen->extensions[i++] = &driCopySubBufferExtension.base;
+   screen->extensions[i++] = &driFrameTrackingExtension.base;
+   screen->extensions[i++] = &driReadDrawableExtension;
+
+   if ( screen->irq != 0 ) {
+       screen->extensions[i++] = &driSwapControlExtension.base;
+       screen->extensions[i++] = &driMediaStreamCounterExtension.base;
+   }
+
+#if defined(RADEON_R100)
+   screen->extensions[i++] = &radeonTexOffsetExtension.base;
+#endif
+
+#if defined(RADEON_R200)
+   if (IS_R200_CLASS(screen))
+      screen->extensions[i++] = &r200AllocateExtension.base;
+
+   screen->extensions[i++] = &r200texOffsetExtension.base;
+#endif
+
+#if defined(RADEON_R300)
+   screen->extensions[i++] = &r300texOffsetExtension.base;
+#endif
+
+#if defined(RADEON_R600)
+   screen->extensions[i++] = &r600texOffsetExtension.base;
+#endif
+
+   screen->extensions[i++] = &dri2ConfigQueryExtension.base;
+
+   screen->extensions[i++] = NULL;
+   sPriv->extensions = screen->extensions;
+
+   screen->driScreen = sPriv;
+   screen->sarea_priv_offset = dri_priv->sarea_priv_offset;
+   screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA +
+					       screen->sarea_priv_offset);
+
+   screen->bom = radeon_bo_manager_legacy_ctor(screen);
+   if (screen->bom == NULL) {
+     free(screen);
+     return NULL;
+   }
+
+   return screen;
+}
+
+static radeonScreenPtr
+radeonCreateScreen2(__DRIscreen *sPriv)
+{
+   radeonScreenPtr screen;
+   int i;
+   int ret;
+   uint32_t device_id = 0;
+   uint32_t temp = 0;
+
+   /* Allocate the private area */
+   screen = (radeonScreenPtr) CALLOC( sizeof(*screen) );
+   if ( !screen ) {
+      __driUtilMessage("%s: Could not allocate memory for screen structure",
+		       __FUNCTION__);
+      fprintf(stderr, "leaving here\n");
+      return NULL;
+   }
+
+   radeon_init_debug();
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&screen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   screen->kernel_mm = 1;
+   screen->chip_flags = 0;
+
+   /* if we have kms we can support all of these */
+   screen->drmSupportsCubeMapsR200 = 1;
+   screen->drmSupportsBlendColor = 1;
+   screen->drmSupportsTriPerf = 1;
+   screen->drmSupportsFragShader = 1;
+   screen->drmSupportsPointSprites = 1;
+   screen->drmSupportsCubeMapsR100 = 1;
+   screen->drmSupportsVertexProgram = 1;
+   screen->drmSupportsOcclusionQueries = 1;
+   screen->irq = 1;
+
+   ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id);
+   if (ret) {
+     FREE( screen );
+     fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret);
+     return NULL;
+   }
+
+   ret = radeon_set_screen_flags(screen, device_id);
+   if (ret == -1)
+     return NULL;
+
+   if (getenv("R300_NO_TCL"))
+	   screen->chip_flags &= ~RADEON_CHIPSET_TCL;
+
+   if (screen->chip_family <= CHIP_FAMILY_RS200)
+	   screen->chip_flags |= RADEON_CLASS_R100;
+   else if (screen->chip_family <= CHIP_FAMILY_RV280)
+	   screen->chip_flags |= RADEON_CLASS_R200;
+   else if (screen->chip_family <= CHIP_FAMILY_RV570)
+	   screen->chip_flags |= RADEON_CLASS_R300;
+   else
+	   screen->chip_flags |= RADEON_CLASS_R600;
+
+   if (IS_R300_CLASS(screen)) {
+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
+       if (ret) {
+	   fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
+	   switch (screen->chip_family) {
+	   case CHIP_FAMILY_R300:
+	   case CHIP_FAMILY_R350:
+	       screen->num_gb_pipes = 2;
+	       break;
+	   case CHIP_FAMILY_R420:
+	   case CHIP_FAMILY_R520:
+	   case CHIP_FAMILY_R580:
+	   case CHIP_FAMILY_RV560:
+	   case CHIP_FAMILY_RV570:
+	       screen->num_gb_pipes = 4;
+	       break;
+	   case CHIP_FAMILY_RV350:
+	   case CHIP_FAMILY_RV515:
+	   case CHIP_FAMILY_RV530:
+	   case CHIP_FAMILY_RV410:
+	   default:
+	       screen->num_gb_pipes = 1;
+	       break;
+	   }
+       } else {
+	   screen->num_gb_pipes = temp;
+       }
+
+       /* pipe overrides */
+       switch (device_id) {
+       case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */
+       case PCI_CHIP_R350_AH: /* 9800 SE only have 1 quadpipe */
+       case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */
+       case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */
+	   screen->num_gb_pipes = 1;
+	   break;
+       default:
+	   break;
+       }
+
+       ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_Z_PIPES, &temp);
+       if (ret)
+	       screen->num_z_pipes = 2;
+       else
+	       screen->num_z_pipes = temp;
+
+   }
+
+   i = 0;
+   screen->extensions[i++] = &driCopySubBufferExtension.base;
+   screen->extensions[i++] = &driFrameTrackingExtension.base;
+   screen->extensions[i++] = &driReadDrawableExtension;
+
+   if ( screen->irq != 0 ) {
+       screen->extensions[i++] = &driSwapControlExtension.base;
+       screen->extensions[i++] = &driMediaStreamCounterExtension.base;
+   }
+
+#if defined(RADEON_R100)
+   screen->extensions[i++] = &radeonTexBufferExtension.base;
+#endif
+
+#if defined(RADEON_R200)
+   if (IS_R200_CLASS(screen))
+       screen->extensions[i++] = &r200AllocateExtension.base;
+
+   screen->extensions[i++] = &r200TexBufferExtension.base;
+#endif
+
+#if defined(RADEON_R300)
+   screen->extensions[i++] = &r300TexBufferExtension.base;
+#endif
+
+#if defined(RADEON_R600)
+   screen->extensions[i++] = &r600TexBufferExtension.base;
+#endif
+
+   screen->extensions[i++] = NULL;
+   sPriv->extensions = screen->extensions;
+
+   screen->driScreen = sPriv;
+   screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd);
+   if (screen->bom == NULL) {
+       free(screen);
+       return NULL;
+   }
+   return screen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+static void
+radeonDestroyScreen( __DRIscreen *sPriv )
+{
+    radeonScreenPtr screen = (radeonScreenPtr)sPriv->private;
+
+    if (!screen)
+        return;
+
+    if (screen->kernel_mm) {
+#ifdef RADEON_BO_TRACK
+        radeon_tracker_print(&screen->bom->tracker, stderr);
+#endif
+        radeon_bo_manager_gem_dtor(screen->bom);
+    } else {
+        radeon_bo_manager_legacy_dtor(screen->bom);
+
+        if ( screen->gartTextures.map ) {
+            drmUnmap( screen->gartTextures.map, screen->gartTextures.size );
+        }
+        drmUnmapBufs( screen->buffers );
+        drmUnmap( screen->status.map, screen->status.size );
+        drmUnmap( screen->mmio.map, screen->mmio.size );
+    }
+
+    /* free all option information */
+    driDestroyOptionInfo (&screen->optionCache);
+
+    FREE( screen );
+    sPriv->private = NULL;
+}
+
+
+/* Initialize the driver specific screen private data.
+ */
+static GLboolean
+radeonInitDriver( __DRIscreen *sPriv )
+{
+    if (sPriv->dri2.enabled) {
+        sPriv->private = (void *) radeonCreateScreen2( sPriv );
+    } else {
+        sPriv->private = (void *) radeonCreateScreen( sPriv );
+    }
+    if ( !sPriv->private ) {
+        radeonDestroyScreen( sPriv );
+        return GL_FALSE;
+    }
+
+    return GL_TRUE;
+}
+
+
+
+/**
+ * Create the Mesa framebuffer and renderbuffers for a given window/drawable.
+ *
+ * \todo This function (and its interface) will need to be updated to support
+ * pbuffers.
+ */
+static GLboolean
+radeonCreateBuffer( __DRIscreen *driScrnPriv,
+                    __DRIdrawable *driDrawPriv,
+                    const __GLcontextModes *mesaVis,
+                    GLboolean isPixmap )
+{
+    radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private;
+
+    const GLboolean swDepth = GL_FALSE;
+    const GLboolean swAlpha = GL_FALSE;
+    const GLboolean swAccum = mesaVis->accumRedBits > 0;
+    const GLboolean swStencil = mesaVis->stencilBits > 0 &&
+	mesaVis->depthBits != 24;
+    gl_format rgbFormat;
+    struct radeon_framebuffer *rfb;
+
+    if (isPixmap)
+      return GL_FALSE; /* not implemented */
+
+    rfb = CALLOC_STRUCT(radeon_framebuffer);
+    if (!rfb)
+      return GL_FALSE;
+
+    _mesa_initialize_window_framebuffer(&rfb->base, mesaVis);
+
+    if (mesaVis->redBits == 5)
+        rgbFormat = _mesa_little_endian() ? MESA_FORMAT_RGB565 : MESA_FORMAT_RGB565_REV;
+    else if (mesaVis->alphaBits == 0)
+        rgbFormat = _mesa_little_endian() ? MESA_FORMAT_XRGB8888 : MESA_FORMAT_XRGB8888_REV;
+    else
+        rgbFormat = _mesa_little_endian() ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB8888_REV;
+
+    /* front color renderbuffer */
+    rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+    _mesa_add_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT, &rfb->color_rb[0]->base);
+    rfb->color_rb[0]->has_surface = 1;
+
+    /* back color renderbuffer */
+    if (mesaVis->doubleBufferMode) {
+      rfb->color_rb[1] = radeon_create_renderbuffer(rgbFormat, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_BACK_LEFT, &rfb->color_rb[1]->base);
+	rfb->color_rb[1]->has_surface = 1;
+    }
+
+    if (mesaVis->depthBits == 24) {
+      if (mesaVis->stencilBits == 8) {
+	struct radeon_renderbuffer *depthStencilRb =
+           radeon_create_renderbuffer(MESA_FORMAT_S8_Z24, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base);
+	depthStencilRb->has_surface = screen->depthHasSurface;
+      } else {
+	/* depth renderbuffer */
+	struct radeon_renderbuffer *depth =
+           radeon_create_renderbuffer(MESA_FORMAT_X8_Z24, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
+	depth->has_surface = screen->depthHasSurface;
+      }
+    } else if (mesaVis->depthBits == 16) {
+        /* just 16-bit depth buffer, no hw stencil */
+	struct radeon_renderbuffer *depth =
+           radeon_create_renderbuffer(MESA_FORMAT_Z16, driDrawPriv);
+	_mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base);
+	depth->has_surface = screen->depthHasSurface;
+    }
+
+    _mesa_add_soft_renderbuffers(&rfb->base,
+	    GL_FALSE, /* color */
+	    swDepth,
+	    swStencil,
+	    swAccum,
+	    swAlpha,
+	    GL_FALSE /* aux */);
+    driDrawPriv->driverPrivate = (void *) rfb;
+
+    return (driDrawPriv->driverPrivate != NULL);
+}
+
+
+static void radeon_cleanup_renderbuffers(struct radeon_framebuffer *rfb)
+{
+	struct radeon_renderbuffer *rb;
+
+	rb = rfb->color_rb[0];
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+		rb->bo = NULL;
+	}
+	rb = rfb->color_rb[1];
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+		rb->bo = NULL;
+	}
+	rb = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
+	if (rb && rb->bo) {
+		radeon_bo_unref(rb->bo);
+		rb->bo = NULL;
+	}
+}
+
+void
+radeonDestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+    struct radeon_framebuffer *rfb;
+    if (!driDrawPriv)
+	return;
+
+    rfb = (void*)driDrawPriv->driverPrivate;
+    if (!rfb)
+	return;
+    radeon_cleanup_renderbuffers(rfb);
+    _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ *
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **
+radeonInitScreen(__DRIscreen *psp)
+{
+#if defined(RADEON_R100)
+   static const char *driver_name = "Radeon";
+   static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 6, 0 };
+#elif defined(RADEON_R200)
+   static const char *driver_name = "R200";
+   static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 6, 0 };
+#elif defined(RADEON_R300)
+   static const char *driver_name = "R300";
+   static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 24, 0 };
+#elif defined(RADEON_R600)
+   static const char *driver_name = "R600";
+   static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 24, 0 };
+#endif
+   RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions3( driver_name,
+				      &psp->dri_version, & dri_expected,
+				      &psp->ddx_version, & ddx_expected,
+				      &psp->drm_version, & drm_expected ) ) {
+      return NULL;
+   }
+
+   if (!radeonInitDriver(psp))
+       return NULL;
+
+   /* for now fill in all modes */
+   return radeonFillInModes( psp,
+			     dri_priv->bpp,
+			     (dri_priv->bpp == 16) ? 16 : 24,
+			     (dri_priv->bpp == 16) ? 0  : 8, 1);
+}
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * Called when using DRI2.
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const
+__DRIconfig **radeonInitScreen2(__DRIscreen *psp)
+{
+   GLenum fb_format[3];
+   GLenum fb_type[3];
+   /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+    * support pageflipping at all.
+    */
+   static const GLenum back_buffer_modes[] = {
+     GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/
+   };
+   uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1];
+   int color;
+   __DRIconfig **configs = NULL;
+
+   if (!radeonInitDriver(psp)) {
+       return NULL;
+    }
+   depth_bits[0] = 0;
+   stencil_bits[0] = 0;
+   depth_bits[1] = 16;
+   stencil_bits[1] = 0;
+   depth_bits[2] = 24;
+   stencil_bits[2] = 0;
+   depth_bits[3] = 24;
+   stencil_bits[3] = 8;
+
+   msaa_samples_array[0] = 0;
+
+   fb_format[0] = GL_RGB;
+   fb_type[0] = GL_UNSIGNED_SHORT_5_6_5;
+
+   fb_format[1] = GL_BGR;
+   fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+   fb_format[2] = GL_BGRA;
+   fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV;
+
+   for (color = 0; color < ARRAY_SIZE(fb_format); color++) {
+      __DRIconfig **new_configs;
+
+      new_configs = driCreateConfigs(fb_format[color], fb_type[color],
+				     depth_bits,
+				     stencil_bits,
+				     ARRAY_SIZE(depth_bits),
+				     back_buffer_modes,
+				     ARRAY_SIZE(back_buffer_modes),
+				     msaa_samples_array,
+				     ARRAY_SIZE(msaa_samples_array),
+				     GL_TRUE);
+      if (configs == NULL)
+	 configs = new_configs;
+      else
+	 configs = driConcatConfigs(configs, new_configs);
+   }
+
+   if (configs == NULL) {
+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+              __LINE__);
+      return NULL;
+   }
+
+   return (const __DRIconfig **)configs;
+}
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo )
+{
+    struct radeon_framebuffer *rfb;
+
+    if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+	 || (dPriv->driContextPriv->driverPrivate == NULL)
+	 || (sInfo == NULL) ) {
+	return -1;
+   }
+
+    rfb = dPriv->driverPrivate;
+    sInfo->swap_count = rfb->swap_count;
+    sInfo->swap_ust = rfb->swap_ust;
+    sInfo->swap_missed_count = rfb->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+       ? driCalculateSwapUsage( dPriv, 0, rfb->swap_missed_ust )
+       : 0.0;
+
+   return 0;
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = radeonInitScreen,
+   .DestroyScreen   = radeonDestroyScreen,
+#if defined(RADEON_R200)
+   .CreateContext   = r200CreateContext,
+   .DestroyContext  = r200DestroyContext,
+#elif defined(RADEON_R600)
+   .CreateContext   = r600CreateContext,
+   .DestroyContext  = radeonDestroyContext,
+#elif defined(RADEON_R300)
+   .CreateContext   = r300CreateContext,
+   .DestroyContext  = radeonDestroyContext,
+#else
+   .CreateContext   = r100CreateContext,
+   .DestroyContext  = radeonDestroyContext,
+#endif
+   .CreateBuffer    = radeonCreateBuffer,
+   .DestroyBuffer   = radeonDestroyBuffer,
+   .SwapBuffers     = radeonSwapBuffers,
+   .MakeCurrent     = radeonMakeCurrent,
+   .UnbindContext   = radeonUnbindContext,
+   .GetSwapInfo     = getSwapInfo,
+   .GetDrawableMSC  = driDrawableGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL,
+   .CopySubBuffer   = radeonCopySubBuffer,
+    /* DRI2 */
+   .InitScreen2     = radeonInitScreen2,
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    &driDRI2Extension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
new file mode 100644
index 0000000000..0d7e335fa3
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -0,0 +1,127 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifndef __RADEON_SCREEN_H__
+#define __RADEON_SCREEN_H__
+
+/*
+ * IMPORTS: these headers contain all the DRI, X and kernel-related
+ * definitions that we need.
+ */
+#include "dri_util.h"
+#include "radeon_dri.h"
+#include "radeon_chipset.h"
+#include "radeon_reg.h"
+#include "drm_sarea.h"
+#include "xmlconfig.h"
+
+
+typedef struct {
+   drm_handle_t handle;			/* Handle to the DRM region */
+   drmSize size;			/* Size of the DRM region */
+   drmAddress map;			/* Mapping of the DRM region */
+} radeonRegionRec, *radeonRegionPtr;
+
+typedef struct radeon_screen {
+   int chip_family;
+   int chip_flags;
+   int cpp;
+   int card_type;
+   int device_id; /* PCI ID */
+   int AGPMode;
+   unsigned int irq;			/* IRQ number (0 means none) */
+
+   unsigned int fbLocation;
+   unsigned int frontOffset;
+   unsigned int frontPitch;
+   unsigned int backOffset;
+   unsigned int backPitch;
+
+   unsigned int depthOffset;
+   unsigned int depthPitch;
+
+    /* Shared texture data */
+   int numTexHeaps;
+   int texOffset[RADEON_NR_TEX_HEAPS];
+   int texSize[RADEON_NR_TEX_HEAPS];
+   int logTexGranularity[RADEON_NR_TEX_HEAPS];
+
+   radeonRegionRec mmio;
+   radeonRegionRec status;
+   radeonRegionRec gartTextures;
+
+   drmBufMapPtr buffers;
+
+   __volatile__ uint32_t *scratch;
+
+   __DRIscreen *driScreen;
+   unsigned int sarea_priv_offset;
+   unsigned int gart_buffer_offset;	/* offset in card memory space */
+   unsigned int gart_texture_offset;	/* offset in card memory space */
+   unsigned int gart_base;
+
+   GLboolean drmSupportsCubeMapsR200;   /* need radeon kernel module >= 1.7 */
+   GLboolean drmSupportsBlendColor;     /* need radeon kernel module >= 1.11 */
+   GLboolean drmSupportsTriPerf;        /* need radeon kernel module >= 1.16 */
+   GLboolean drmSupportsFragShader;     /* need radeon kernel module >= 1.18 */
+   GLboolean drmSupportsPointSprites;   /* need radeon kernel module >= 1.13 */
+   GLboolean drmSupportsCubeMapsR100;   /* need radeon kernel module >= 1.15 */
+   GLboolean drmSupportsVertexProgram;  /* need radeon kernel module >= 1.25 */
+   GLboolean drmSupportsOcclusionQueries; /* need radeon kernel module >= 1.30 */
+   GLboolean depthHasSurface;
+
+   /* Configuration cache with default values for all contexts */
+   driOptionCache optionCache;
+
+   const __DRIextension *extensions[17];
+
+   int num_gb_pipes;
+   int num_z_pipes;
+   int kernel_mm;
+   drm_radeon_sarea_t *sarea;	/* Private SAREA data */
+   struct radeon_bo_manager *bom;
+} radeonScreenRec, *radeonScreenPtr;
+
+#define IS_R100_CLASS(screen) \
+	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R100)
+#define IS_R200_CLASS(screen) \
+	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R200)
+#define IS_R300_CLASS(screen) \
+	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300)
+#define IS_R600_CLASS(screen) \
+	((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R600)
+
+extern void radeonDestroyBuffer(__DRIdrawable *driDrawPriv);
+#endif /* __RADEON_SCREEN_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
new file mode 100644
index 0000000000..1adb609603
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -0,0 +1,971 @@
+/**************************************************************************
+
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/texformat.h"
+#include "swrast/swrast.h"
+
+#include "radeon_common.h"
+#include "radeon_lock.h"
+#include "radeon_span.h"
+
+#define DBG 0
+
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
+
+
+/* r200 depth buffer is always tiled - this is the formula
+   according to the docs unless I typo'ed in it
+*/
+#if defined(RADEON_R200)
+static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
+				 GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset;
+    if (rrb->has_surface) {
+	offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+	GLuint b;
+	offset = 0;
+	b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
+	offset += (b >> 1) << 12;
+	offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+	offset += ((y >> 2) & 0x3) << 9;
+	offset += ((x >> 3) & 0x1) << 8;
+	offset += ((x >> 4) & 0x3) << 6;
+	offset += ((x >> 2) & 0x1) << 5;
+	offset += ((y >> 1) & 0x1) << 4;
+	offset += ((x >> 1) & 0x1) << 3;
+	offset += (y & 0x1) << 2;
+	offset += (x & 0x1) << 1;
+    }
+    return &ptr[offset];
+}
+
+static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
+				 GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset;
+    if (rrb->has_surface) {
+	offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+	GLuint b;
+	offset = 0;
+	b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
+	offset += (b >> 1) << 12;
+	offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
+	offset += ((y >> 2) & 0x3) << 9;
+	offset += ((x >> 2) & 0x1) << 8;
+	offset += ((x >> 3) & 0x3) << 6;
+	offset += ((y >> 1) & 0x1) << 5;
+	offset += ((x >> 1) & 0x1) << 4;
+	offset += (y & 0x1) << 3;
+	offset += (x & 0x1) << 2;
+    }
+    return &ptr[offset];
+}
+#endif
+
+/* r600 tiling
+ * two main types:
+ * - 1D (akin to macro-linear/micro-tiled on older asics)
+ * - 2D (akin to macro-tiled/micro-tiled on older asics)
+ * only 1D tiling is implemented below
+ */
+#if defined(RADEON_R600)
+static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
+					GLint x, GLint y, GLint is_depth, GLint is_stencil)
+{
+    GLint element_bytes = rrb->cpp;
+    GLint num_samples = 1;
+    GLint tile_width = 8;
+    GLint tile_height = 8;
+    GLint tile_thickness = 1;
+    GLint pitch_elements = rrb->pitch / element_bytes;
+    GLint height = rrb->base.Height;
+    GLint z = 0;
+    GLint sample_number = 0;
+    /* */
+    GLint tile_bytes;
+    GLint tiles_per_row;
+    GLint tiles_per_slice;
+    GLint slice_offset;
+    GLint tile_row_index;
+    GLint tile_column_index;
+    GLint tile_offset;
+    GLint pixel_number = 0;
+    GLint element_offset;
+    GLint offset = 0;
+
+    tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
+    tiles_per_row = pitch_elements / tile_width;
+    tiles_per_slice = tiles_per_row * (height / tile_height);
+    slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
+    tile_row_index = y / tile_height;
+    tile_column_index = x / tile_width;
+    tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
+
+    if (is_depth) {
+	    GLint pixel_offset = 0;
+
+	    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+	    pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+	    pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+	    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+	    pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+	    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+	    switch (element_bytes) {
+	    case 2:
+		    pixel_offset = pixel_number * element_bytes * num_samples;
+		    break;
+	    case 4:
+		    /* stencil and depth data are stored separately within a tile.
+		     * stencil is stored in a contiguous tile before the depth tile.
+		     * stencil element is 1 byte, depth element is 3 bytes.
+		     * stencil tile is 64 bytes.
+		     */
+		    if (is_stencil)
+			    pixel_offset = pixel_number * 1 * num_samples;
+		    else
+			    pixel_offset = (pixel_number * 3 * num_samples) + 64;
+		    break;
+	    }
+	    element_offset = pixel_offset + (sample_number * element_bytes);
+    } else {
+	    GLint sample_offset;
+
+	    switch (element_bytes) {
+	    case 1:
+		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+		    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+		    pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+		    break;
+	    case 2:
+		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+		    pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+		    break;
+	    case 4:
+		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+		    pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+		    pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+		    break;
+	    }
+	    sample_offset = sample_number * (tile_bytes / num_samples);
+	    element_offset = sample_offset + (pixel_number * element_bytes);
+    }
+    offset = slice_offset + tile_offset + element_offset;
+    return offset;
+}
+
+/* depth buffers */
+static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
+			       GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
+    return &ptr[offset];
+}
+
+static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
+				 GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
+    return &ptr[offset];
+}
+
+static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
+			       GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+	    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
+    }
+    return &ptr[offset];
+}
+
+#else
+
+/* radeon tiling on r300-r500 has 4 states,
+   macro-linear/micro-linear
+   macro-linear/micro-tiled
+   macro-tiled /micro-linear
+   macro-tiled /micro-tiled
+   1 byte surface 
+   2 byte surface - two types - we only provide 8x2 microtiling
+   4 byte surface
+   8/16 byte (unused)
+*/
+static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
+			     GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+	    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
+		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
+		offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
+		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
+		offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
+		offset += ((y >> 1) & 0x1) << 6;
+		offset += ((x >> 2) & 0x1) << 5;
+		offset += (y & 1) << 4;
+		offset += (x & 3) << 2;
+            } else {
+		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
+		offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
+		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
+		offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
+		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
+		offset += (y & 1) << 6;
+		offset += (x & 15) << 2;
+            }
+        } else {
+	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
+	    offset += (y & 1) << 4;
+	    offset += (x & 3) << 2;
+        }
+    }
+    return &ptr[offset];
+}
+
+static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
+				     GLint x, GLint y)
+{
+    GLubyte *ptr = rrb->bo->ptr;
+    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
+    GLint offset;
+
+    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
+        offset = x * rrb->cpp + y * rrb->pitch;
+    } else {
+        offset = 0;
+        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
+		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
+		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
+		offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
+		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
+		offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
+		offset += ((y >> 1) & 0x1) << 6;
+		offset += ((x >> 3) & 0x1) << 5;
+		offset += (y & 1) << 4;
+		offset += (x & 3) << 2;
+            } else {
+		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
+		offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
+		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
+		offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
+		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
+		offset += (y & 1) << 6;
+		offset += ((x >> 4) & 0x1) << 5;
+                offset += (x & 15) << 2;
+            }
+        } else {
+	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
+	    offset += (y & 0x1) << 4;
+	    offset += (x & 0x7) << 1;
+        }
+    }
+    return &ptr[offset];
+}
+
+#endif
+
+/*
+ * Note that all information needed to access pixels in a renderbuffer
+ * should be obtained through the gl_renderbuffer parameter, not per-context
+ * information.
+ */
+#define LOCAL_VARS						\
+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
+   struct radeon_renderbuffer *rrb = (void *) rb;		\
+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+   unsigned int num_cliprects;						\
+   struct drm_clip_rect *cliprects;					\
+   int x_off, y_off;							\
+   GLuint p;						\
+   (void)p;						\
+   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
+
+#define LOCAL_DEPTH_VARS				\
+   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
+   struct radeon_renderbuffer *rrb = (void *) rb;	\
+   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
+   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
+   unsigned int num_cliprects;						\
+   struct drm_clip_rect *cliprects;					\
+   int x_off, y_off;							\
+  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+
+#define Y_FLIP(_y) ((_y) * yScale + yBias)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+/* XXX FBO: this is identical to the macro in spantmp2.h except we get
+ * the cliprect info from the context, not the driDrawable.
+ * Move this into spantmp2.h someday.
+ */
+#define HW_CLIPLOOP()							\
+   do {									\
+      int _nc = num_cliprects;						\
+      while ( _nc-- ) {							\
+	 int minx = cliprects[_nc].x1 - x_off;				\
+	 int miny = cliprects[_nc].y1 - y_off;				\
+	 int maxx = cliprects[_nc].x2 - x_off;				\
+	 int maxy = cliprects[_nc].y2 - y_off;
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    radeon##x##_RGB565
+#define TAG2(x,y) radeon##x##_RGB565##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV
+
+#define TAG(x)    radeon##x##_RGB565_REV
+#define TAG2(x,y) radeon##x##_RGB565_REV##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+/* 16 bit, ARGB1555 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
+
+#define TAG(x)    radeon##x##_ARGB1555
+#define TAG2(x,y) radeon##x##_ARGB1555##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5
+
+#define TAG(x)    radeon##x##_ARGB1555_REV
+#define TAG2(x,y) radeon##x##_ARGB1555_REV##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+/* 16 bit, RGBA4 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
+
+#define TAG(x)    radeon##x##_ARGB4444
+#define TAG2(x,y) radeon##x##_ARGB4444##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4
+
+#define TAG(x)    radeon##x##_ARGB4444_REV
+#define TAG2(x,y) radeon##x##_ARGB4444_REV##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+/* 32 bit, xRGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    radeon##x##_xRGB8888
+#define TAG2(x,y) radeon##x##_xRGB8888##y
+#if defined(RADEON_R600)
+#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#else
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#endif
+#include "spantmp2.h"
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    radeon##x##_ARGB8888
+#define TAG2(x,y) radeon##x##_ARGB8888##y
+#if defined(RADEON_R600)
+#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#else
+#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#endif
+#include "spantmp2.h"
+
+/* 32 bit, BGRx8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
+
+#define TAG(x)    radeon##x##_BGRx8888
+#define TAG2(x,y) radeon##x##_BGRx8888##y
+#if defined(RADEON_R600)
+#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#else
+#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
+#define PUT_VALUE(_x, _y, d) { \
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = d;								\
+} while (0)
+#endif
+#include "spantmp2.h"
+
+/* 32 bit, BGRA8888 color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
+
+#define TAG(x)    radeon##x##_BGRA8888
+#define TAG2(x,y) radeon##x##_BGRA8888##y
+#if defined(RADEON_R600)
+#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
+#else
+#define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off)
+#endif
+#include "spantmp2.h"
+
+/* ================================================================
+ * Depth buffer
+ */
+
+/* The Radeon family has depth tiling on all the time, so we have to convert
+ * the x,y coordinates into the memory bus address (mba) in the same
+ * manner as the engine.  In each case, the linear block address (ba)
+ * is calculated, and then wired with x and y to produce the final
+ * memory address.
+ * The chip will do address translation on its own if the surface registers
+ * are set up correctly. It is not quite enough to get it working with hyperz
+ * too...
+ */
+
+/* 16-bit depth buffer functions
+ */
+#define VALUE_TYPE GLushort
+
+#if defined(RADEON_R200)
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
+#elif defined(RADEON_R600)
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
+#else
+#define WRITE_DEPTH( _x, _y, d )					\
+   *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
+#endif
+
+#if defined(RADEON_R200)
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
+#elif defined(RADEON_R600)
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
+#else
+#define READ_DEPTH( d, _x, _y )						\
+   d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
+#endif
+
+#define TAG(x) radeon##x##_z16
+#include "depthtmp.h"
+
+/* 24 bit depth
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#define VALUE_TYPE GLuint
+
+#if defined(RADEON_R300)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   tmp &= 0x000000ff;							\
+   tmp |= ((d << 8) & 0xffffff00);					\
+   *_ptr = CPU_TO_LE32(tmp);                                            \
+} while (0)
+#elif defined(RADEON_R600)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0xff000000;							\
+   tmp |= ((d) & 0x00ffffff);					\
+   *_ptr = tmp;					\
+} while (0)
+#elif defined(RADEON_R200)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   tmp &= 0xff000000;							\
+   tmp |= ((d) & 0x00ffffff);						\
+   *_ptr = CPU_TO_LE32(tmp);                                            \
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   tmp &= 0xff000000;							\
+   tmp |= ((d) & 0x00ffffff);						\
+   *_ptr = CPU_TO_LE32(tmp);                                            \
+} while (0)
+#endif
+
+#if defined(RADEON_R300)
+#define READ_DEPTH( d, _x, _y )						\
+  do {									\
+    d = (LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0xffffff00) >> 8; \
+  }while(0)
+#elif defined(RADEON_R600)
+#define READ_DEPTH( d, _x, _y )						\
+  do {									\
+    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
+  }while(0)
+#elif defined(RADEON_R200)
+#define READ_DEPTH( d, _x, _y )						\
+  do {									\
+    d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
+  }while(0)
+#else
+#define READ_DEPTH( d, _x, _y )	\
+  d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off))) & 0x00ffffff;
+#endif
+
+#define TAG(x) radeon##x##_z24
+#include "depthtmp.h"
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ * EXT_depth_stencil
+ *
+ * Careful: It looks like the R300 uses ZZZS byte order while the R200
+ * uses SZZZ for 24 bit depth, 8 bit stencil mode.
+ */
+#define VALUE_TYPE GLuint
+
+#if defined(RADEON_R300)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = CPU_TO_LE32((((d) & 0xff000000) >> 24) | (((d) & 0x00ffffff) << 8));   \
+} while (0)
+#elif defined(RADEON_R600)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0xff000000;							\
+   tmp |= ((d) & 0x00ffffff);					\
+   *_ptr = tmp;					\
+   _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
+   tmp = *_ptr;				\
+   tmp &= 0xffffff00;							\
+   tmp |= ((d) >> 24) & 0xff;						\
+   *_ptr = tmp;					\
+} while (0)
+#elif defined(RADEON_R200)
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
+   *_ptr = CPU_TO_LE32(d);						\
+} while (0)
+#else
+#define WRITE_DEPTH( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
+   *_ptr = CPU_TO_LE32(d);						\
+} while (0)
+#endif
+
+#if defined(RADEON_R300)
+#define READ_DEPTH( d, _x, _y )						\
+  do { \
+    GLuint tmp = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));	\
+    d = LE32_TO_CPU(((tmp & 0x000000ff) << 24) | ((tmp & 0xffffff00) >> 8));	\
+  }while(0)
+#elif defined(RADEON_R600)
+#define READ_DEPTH( d, _x, _y )						\
+  do { \
+    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
+    d |= ((*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000; \
+  }while(0)
+#elif defined(RADEON_R200)
+#define READ_DEPTH( d, _x, _y )						\
+  do { \
+    d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
+  }while(0)
+#else
+#define READ_DEPTH( d, _x, _y )	do {					\
+    d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
+  } while (0)
+#endif
+
+#define TAG(x) radeon##x##_s8_z24
+#include "depthtmp.h"
+
+/* ================================================================
+ * Stencil buffer
+ */
+
+/* 24 bit depth, 8 bit stencil depthbuffer functions
+ */
+#ifdef RADEON_R300
+#define WRITE_STENCIL( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   tmp &= 0xffffff00;							\
+   tmp |= (d) & 0xff;							\
+   *_ptr = CPU_TO_LE32(tmp);                                            \
+} while (0)
+#elif defined(RADEON_R600)
+#define WRITE_STENCIL( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = *_ptr;				\
+   tmp &= 0xffffff00;							\
+   tmp |= (d) & 0xff;							\
+   *_ptr = tmp;					\
+} while (0)
+#elif defined(RADEON_R200)
+#define WRITE_STENCIL( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   tmp &= 0x00ffffff;							\
+   tmp |= (((d) & 0xff) << 24);						\
+   *_ptr = CPU_TO_LE32(tmp);                                            \
+} while (0)
+#else
+#define WRITE_STENCIL( _x, _y, d )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   tmp &= 0x00ffffff;							\
+   tmp |= (((d) & 0xff) << 24);						\
+   *_ptr = CPU_TO_LE32(tmp);                                            \
+} while (0)
+#endif
+
+#ifdef RADEON_R300
+#define READ_STENCIL( d, _x, _y )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   d = tmp & 0x000000ff;						\
+} while (0)
+#elif defined(RADEON_R600)
+#define READ_STENCIL( d, _x, _y )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = *_ptr;				\
+   d = tmp & 0x000000ff;						\
+} while (0)
+#elif defined(RADEON_R200)
+#define READ_STENCIL( d, _x, _y )					\
+do {									\
+   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   d = (tmp & 0xff000000) >> 24;					\
+} while (0)
+#else
+#define READ_STENCIL( d, _x, _y )					\
+do {									\
+   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
+   GLuint tmp = LE32_TO_CPU(*_ptr);                                     \
+   d = (tmp & 0xff000000) >> 24;					\
+} while (0)
+#endif
+
+#define TAG(x) radeon##x##_s8_z24
+#include "stenciltmp.h"
+
+
+static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
+{
+	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
+	int r;
+
+	if (rrb == NULL || !rrb->bo)
+		return;
+
+	radeon_print(RADEON_MEMORY, RADEON_TRACE,
+		"%s( rb %p, flag %s )\n",
+		__func__, rb, flag ? "true":"false");
+
+	if (flag) {
+	        radeon_bo_wait(rrb->bo);
+		r = radeon_bo_map(rrb->bo, 1);
+		if (r) {
+			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
+				__FUNCTION__, r);
+		}
+
+		radeonSetSpanFunctions(rrb);
+	} else {
+		radeon_bo_unmap(rrb->bo);
+		rb->GetRow = NULL;
+		rb->PutRow = NULL;
+	}
+}
+
+static void
+radeon_map_unmap_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb,
+			     GLboolean map)
+{
+	GLuint i, j;
+
+	radeon_print(RADEON_MEMORY, RADEON_TRACE,
+		"%s( %p , fb %p, map %s )\n",
+		__func__, ctx, fb, map ? "true":"false");
+
+	/* color draw buffers */
+	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
+		map_unmap_rb(fb->_ColorDrawBuffers[j], map);
+
+	map_unmap_rb(fb->_ColorReadBuffer, map);
+
+	/* check for render to textures */
+	for (i = 0; i < BUFFER_COUNT; i++) {
+		struct gl_renderbuffer_attachment *att =
+			fb->Attachment + i;
+		struct gl_texture_object *tex = att->Texture;
+		if (tex) {
+			/* Render to texture. Note that a mipmapped texture need not
+			 * be complete for render to texture, so we must restrict to
+			 * mapping only the attached image.
+			 */
+			radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
+			ASSERT(att->Renderbuffer);
+
+			if (map)
+				radeon_teximage_map(image, GL_TRUE);
+			else
+				radeon_teximage_unmap(image);
+		}
+	}
+	
+	/* depth buffer (Note wrapper!) */
+	if (fb->_DepthBuffer)
+		map_unmap_rb(fb->_DepthBuffer->Wrapped, map);
+
+	if (fb->_StencilBuffer)
+		map_unmap_rb(fb->_StencilBuffer->Wrapped, map);
+
+	radeon_check_front_buffer_rendering(ctx);
+}
+
+static void radeonSpanRenderStart(GLcontext * ctx)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	int i;
+
+	radeon_firevertices(rmesa);
+
+	/* The locking and wait for idle should really only be needed in classic mode.
+	 * In a future memory manager based implementation, this should become
+	 * unnecessary due to the fact that mapping our buffers, textures, etc.
+	 * should implicitly wait for any previous rendering commands that must
+	 * be waited on. */
+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+		LOCK_HARDWARE(rmesa);
+		radeonWaitForIdleLocked(rmesa);
+	}
+
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled)
+			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
+	}
+
+	radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_TRUE);
+	if (ctx->ReadBuffer != ctx->DrawBuffer)
+		radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_TRUE);
+}
+
+static void radeonSpanRenderFinish(GLcontext * ctx)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	int i;
+
+	_swrast_flush(ctx);
+
+	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
+		if (ctx->Texture.Unit[i]._ReallyEnabled)
+			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
+	}
+
+	radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_FALSE);
+	if (ctx->ReadBuffer != ctx->DrawBuffer)
+		radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_FALSE);
+
+	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
+		UNLOCK_HARDWARE(rmesa);
+	}
+}
+
+void radeonInitSpanFuncs(GLcontext * ctx)
+{
+	struct swrast_device_driver *swdd =
+	    _swrast_GetDeviceDriverReference(ctx);
+	swdd->SpanRenderStart = radeonSpanRenderStart;
+	swdd->SpanRenderFinish = radeonSpanRenderFinish;
+}
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
+{
+	if (rrb->base.Format == MESA_FORMAT_RGB565) {
+		radeonInitPointers_RGB565(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_RGB565_REV) {
+		radeonInitPointers_RGB565_REV(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_XRGB8888) {
+		radeonInitPointers_xRGB8888(&rrb->base);
+        } else if (rrb->base.Format == MESA_FORMAT_XRGB8888_REV) {
+		radeonInitPointers_BGRx8888(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_ARGB8888) {
+		radeonInitPointers_ARGB8888(&rrb->base);
+        } else if (rrb->base.Format == MESA_FORMAT_ARGB8888_REV) {
+		radeonInitPointers_BGRA8888(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
+		radeonInitPointers_ARGB4444(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_ARGB4444_REV) {
+		radeonInitPointers_ARGB4444_REV(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
+		radeonInitPointers_ARGB1555(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_ARGB1555_REV) {
+		radeonInitPointers_ARGB1555_REV(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_Z16) {
+		radeonInitDepthPointers_z16(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_X8_Z24) {
+		radeonInitDepthPointers_z24(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_S8_Z24) {
+		radeonInitDepthPointers_s8_z24(&rrb->base);
+	} else if (rrb->base.Format == MESA_FORMAT_S8) {
+		radeonInitStencilPointers_s8_z24(&rrb->base);
+	} else {
+		fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format);
+	}
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h
new file mode 100644
index 0000000000..ea6a2e7fb4
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_span.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *   Kevin E. Martin <martin@valinux.com>
+ */
+
+#ifndef __RADEON_SPAN_H__
+#define __RADEON_SPAN_H__
+
+extern void radeonInitSpanFuncs(GLcontext * ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c
new file mode 100644
index 0000000000..539b067742
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_state.c
@@ -0,0 +1,2294 @@
+/**************************************************************************
+
+Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Gareth Hughes <gareth@valinux.com>
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+#include "main/enums.h"
+#include "main/light.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/simple_list.h"
+
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+#include "drivers/common/meta.h"
+
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_tcl.h"
+#include "radeon_tex.h"
+#include "radeon_swtcl.h"
+
+static void radeonUpdateSpecular( GLcontext *ctx );
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC];
+   GLubyte refByte;
+
+   CLAMPED_FLOAT_TO_UBYTE(refByte, ref);
+
+   RADEON_STATECHANGE( rmesa, ctx );
+
+   pp_misc &= ~(RADEON_ALPHA_TEST_OP_MASK | RADEON_REF_ALPHA_MASK);
+   pp_misc |= (refByte & RADEON_REF_ALPHA_MASK);
+
+   switch ( func ) {
+   case GL_NEVER:
+      pp_misc |= RADEON_ALPHA_TEST_FAIL;
+      break;
+   case GL_LESS:
+      pp_misc |= RADEON_ALPHA_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      pp_misc |= RADEON_ALPHA_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      pp_misc |= RADEON_ALPHA_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      pp_misc |= RADEON_ALPHA_TEST_PASS;
+      break;
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = pp_misc;
+}
+
+static void radeonBlendEquationSeparate( GLcontext *ctx,
+					 GLenum modeRGB, GLenum modeA )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK;
+   GLboolean fallback = GL_FALSE;
+
+   assert( modeRGB == modeA );
+
+   switch ( modeRGB ) {
+   case GL_FUNC_ADD:
+   case GL_LOGIC_OP:
+      b |= RADEON_COMB_FCN_ADD_CLAMP;
+      break;
+
+   case GL_FUNC_SUBTRACT:
+      b |= RADEON_COMB_FCN_SUB_CLAMP;
+      break;
+
+   default:
+      if (ctx->Color.BlendEnabled)
+	 fallback = GL_TRUE;
+      else
+	 b |= RADEON_COMB_FCN_ADD_CLAMP;
+      break;
+   }
+
+   FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, fallback );
+   if ( !fallback ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+	    && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+   }
+}
+
+static void radeonBlendFuncSeparate( GLcontext *ctx,
+				     GLenum sfactorRGB, GLenum dfactorRGB,
+				     GLenum sfactorA, GLenum dfactorA )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] &
+      ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK);
+   GLboolean fallback = GL_FALSE;
+
+   switch ( ctx->Color.BlendSrcRGB ) {
+   case GL_ZERO:
+      b |= RADEON_SRC_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= RADEON_SRC_BLEND_GL_ONE;
+      break;
+   case GL_DST_COLOR:
+      b |= RADEON_SRC_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      b |= RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE;
+      break;
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      if (ctx->Color.BlendEnabled)
+	 fallback = GL_TRUE;
+      else
+	 b |= RADEON_SRC_BLEND_GL_ONE;
+      break;
+   default:
+      break;
+   }
+
+   switch ( ctx->Color.BlendDstRGB ) {
+   case GL_ZERO:
+      b |= RADEON_DST_BLEND_GL_ZERO;
+      break;
+   case GL_ONE:
+      b |= RADEON_DST_BLEND_GL_ONE;
+      break;
+   case GL_SRC_COLOR:
+      b |= RADEON_DST_BLEND_GL_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      b |= RADEON_DST_BLEND_GL_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_DST_ALPHA:
+      b |= RADEON_DST_BLEND_GL_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      b |= RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_CONSTANT_COLOR:
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+   case GL_CONSTANT_ALPHA:
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      if (ctx->Color.BlendEnabled)
+	 fallback = GL_TRUE;
+      else
+	 b |= RADEON_DST_BLEND_GL_ZERO;
+      break;
+   default:
+      break;
+   }
+
+   FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, fallback );
+   if ( !fallback ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b;
+   }
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void radeonDepthFunc( GLcontext *ctx, GLenum func )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK;
+
+   switch ( ctx->Depth.Func ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_TEST_ALWAYS;
+      break;
+   }
+}
+
+
+static void radeonDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, ctx );
+
+   if ( ctx->Depth.Mask ) {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |=  RADEON_Z_WRITE_ENABLE;
+   } else {
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_WRITE_ENABLE;
+   }
+}
+
+static void radeonClearDepth( GLcontext *ctx, GLclampd d )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &
+		    RADEON_DEPTH_FORMAT_MASK);
+
+   switch ( format ) {
+   case RADEON_DEPTH_FORMAT_16BIT_INT_Z:
+      rmesa->radeon.state.depth.clear = d * 0x0000ffff;
+      break;
+   case RADEON_DEPTH_FORMAT_24BIT_INT_Z:
+      rmesa->radeon.state.depth.clear = d * 0x00ffffff;
+      break;
+   }
+}
+
+
+/* =============================================================
+ * Fog
+ */
+
+
+static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   union { int i; float f; } c, d;
+   GLchan col[4];
+
+   switch (pname) {
+   case GL_FOG_MODE:
+      if (!ctx->Fog.Enabled)
+	 return;
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      switch (ctx->Fog.Mode) {
+      case GL_LINEAR:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_LINEAR;
+	 break;
+      case GL_EXP:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP;
+	 break;
+      case GL_EXP2:
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_TCL_FOG_EXP2;
+	 break;
+      default:
+	 return;
+      }
+   /* fallthrough */
+   case GL_FOG_DENSITY:
+   case GL_FOG_START:
+   case GL_FOG_END:
+      if (!ctx->Fog.Enabled)
+	 return;
+      c.i = rmesa->hw.fog.cmd[FOG_C];
+      d.i = rmesa->hw.fog.cmd[FOG_D];
+      switch (ctx->Fog.Mode) {
+      case GL_EXP:
+	 c.f = 0.0;
+	 /* While this is the opposite sign from the DDK, it makes the fog test
+	  * pass, and matches r200.
+	  */
+	 d.f = -ctx->Fog.Density;
+	 break;
+      case GL_EXP2:
+	 c.f = 0.0;
+	 d.f = -(ctx->Fog.Density * ctx->Fog.Density);
+	 break;
+      case GL_LINEAR:
+	 if (ctx->Fog.Start == ctx->Fog.End) {
+	    c.f = 1.0F;
+	    d.f = 1.0F;
+	 } else {
+	    c.f = ctx->Fog.End/(ctx->Fog.End-ctx->Fog.Start);
+	    /* While this is the opposite sign from the DDK, it makes the fog
+	     * test pass, and matches r200.
+	     */
+	    d.f = -1.0/(ctx->Fog.End-ctx->Fog.Start);
+	 }
+	 break;
+      default:
+	 break;
+      }
+      if (c.i != rmesa->hw.fog.cmd[FOG_C] || d.i != rmesa->hw.fog.cmd[FOG_D]) {
+	 RADEON_STATECHANGE( rmesa, fog );
+	 rmesa->hw.fog.cmd[FOG_C] = c.i;
+	 rmesa->hw.fog.cmd[FOG_D] = d.i;
+      }
+      break;
+   case GL_FOG_COLOR:
+      RADEON_STATECHANGE( rmesa, ctx );
+      UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color );
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK;
+      rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |=
+	 radeonPackColor( 4, col[0], col[1], col[2], 0 );
+      break;
+   case GL_FOG_COORD_SRC:
+      radeonUpdateSpecular( ctx );
+      break;
+   default:
+      return;
+   }
+}
+
+/* =============================================================
+ * Culling
+ */
+
+static void radeonCullFace( GLcontext *ctx, GLenum unused )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+   GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL];
+
+   s |= RADEON_FFACE_SOLID | RADEON_BFACE_SOLID;
+   t &= ~(RADEON_CULL_FRONT | RADEON_CULL_BACK);
+
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+	 s &= ~RADEON_FFACE_SOLID;
+	 t |= RADEON_CULL_FRONT;
+	 break;
+      case GL_BACK:
+	 s &= ~RADEON_BFACE_SOLID;
+	 t |= RADEON_CULL_BACK;
+	 break;
+      case GL_FRONT_AND_BACK:
+	 s &= ~(RADEON_FFACE_SOLID | RADEON_BFACE_SOLID);
+	 t |= (RADEON_CULL_FRONT | RADEON_CULL_BACK);
+	 break;
+      }
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE(rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+
+   if ( rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] != t ) {
+      RADEON_STATECHANGE(rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = t;
+   }
+}
+
+static void radeonFrontFace( GLcontext *ctx, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, set );
+   rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK;
+
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_CULL_FRONT_IS_CCW;
+
+   /* Winding is inverted when rendering to FBO */
+   if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+      mode = (mode == GL_CW) ? GL_CCW : GL_CW;
+
+   switch ( mode ) {
+   case GL_CW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CW;
+      break;
+   case GL_CCW:
+      rmesa->hw.set.cmd[SET_SE_CNTL] |= RADEON_FFACE_CULL_CCW;
+      rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_CULL_FRONT_IS_CCW;
+      break;
+   }
+}
+
+
+/* =============================================================
+ * Line state
+ */
+static void radeonLineWidth( GLcontext *ctx, GLfloat widthf )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, lin );
+   RADEON_STATECHANGE( rmesa, set );
+
+   /* Line width is stored in U6.4 format.
+    */
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (GLuint)(widthf * 16.0);
+   if ( widthf > 1.0 ) {
+      rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_WIDELINE_ENABLE;
+   } else {
+      rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_WIDELINE_ENABLE;
+   }
+}
+
+static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, lin );
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] =
+      ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern));
+}
+
+
+/* =============================================================
+ * Masks
+ */
+static void radeonColorMask( GLcontext *ctx,
+			     GLboolean r, GLboolean g,
+			     GLboolean b, GLboolean a )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   GLuint mask;
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+
+   mask = radeonPackColor( rrb->cpp,
+			   ctx->Color.ColorMask[0][RCOMP],
+			   ctx->Color.ColorMask[0][GCOMP],
+			   ctx->Color.ColorMask[0][BCOMP],
+			   ctx->Color.ColorMask[0][ACOMP] );
+
+   if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) {
+      RADEON_STATECHANGE( rmesa, msk );
+      rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = mask;
+   }
+}
+
+
+/* =============================================================
+ * Polygon state
+ */
+
+static void radeonPolygonOffset( GLcontext *ctx,
+				 GLfloat factor, GLfloat units )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   float_ui32_type constant =  { units * depthScale };
+   float_ui32_type factoru = { factor };
+
+   RADEON_STATECHANGE( rmesa, zbs );
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_FACTOR]   = factoru.ui32;
+   rmesa->hw.zbs.cmd[ZBS_SE_ZBIAS_CONSTANT] = constant.ui32;
+}
+
+static void radeonPolygonStipplePreKMS( GLcontext *ctx, const GLubyte *mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint i;
+   drm_radeon_stipple_t stipple;
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 0 ; i < 32 ; i++ ) {
+      rmesa->state.stipple.mask[31 - i] = ((GLuint *) mask)[i];
+   }
+
+   /* TODO: push this into cmd mechanism
+    */
+   radeon_firevertices(&rmesa->radeon);
+   LOCK_HARDWARE( &rmesa->radeon );
+
+   /* FIXME: Use window x,y offsets into stipple RAM.
+    */
+   stipple.mask = rmesa->state.stipple.mask;
+   drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE,
+		    &stipple, sizeof(drm_radeon_stipple_t) );
+   UNLOCK_HARDWARE( &rmesa->radeon );
+}
+
+static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0;
+
+   /* Can't generally do unfilled via tcl, but some good special
+    * cases work.
+    */
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag);
+   if (rmesa->radeon.TclFallback) {
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Rendering attributes
+ *
+ * We really don't want to recalculate all this every time we bind a
+ * texture.  These things shouldn't change all that often, so it makes
+ * sense to break them out of the core texture state update routines.
+ */
+
+/* Examine lighting and texture state to determine if separate specular
+ * should be enabled.
+ */
+static void radeonUpdateSpecular( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL];
+   GLuint flag = 0;
+
+   RADEON_STATECHANGE( rmesa, tcl );
+
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_SPECULAR;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &= ~RADEON_TCL_COMPUTE_DIFFUSE;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_SPEC;
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~RADEON_TCL_VTX_PK_DIFFUSE;
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LIGHTING_ENABLE;
+
+   p &= ~RADEON_SPECULAR_ENABLE;
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_DIFFUSE_SPECULAR_COMBINE;
+
+
+   if (ctx->Light.Enabled &&
+       ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      p |=  RADEON_SPECULAR_ENABLE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &=
+	 ~RADEON_DIFFUSE_SPECULAR_COMBINE;
+   }
+   else if (ctx->Light.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+   } else if (ctx->Fog.ColorSumEnabled ) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+      p |= RADEON_SPECULAR_ENABLE;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE;
+   }
+
+   if (ctx->Fog.Enabled) {
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_SPEC;
+      if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH) {
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] |= RADEON_TCL_COMPUTE_SPECULAR;
+      /* Bizzare: have to leave lighting enabled to get fog. */
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE;
+      }
+      else {
+      /* cannot do tcl fog factor calculation with fog coord source
+       * (send precomputed factors). Cannot use precomputed fog
+       * factors together with tcl spec light (need tcl fallback) */
+	 flag = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] &
+	    RADEON_TCL_COMPUTE_SPECULAR) != 0;
+      }
+   }
+
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag);
+
+   if (NEED_SECONDARY_COLOR(ctx)) {
+      assert( (p & RADEON_SPECULAR_ENABLE) != 0 );
+   } else {
+      assert( (p & RADEON_SPECULAR_ENABLE) == 0 );
+   }
+
+   if ( rmesa->hw.ctx.cmd[CTX_PP_CNTL] != p ) {
+      RADEON_STATECHANGE( rmesa, ctx );
+      rmesa->hw.ctx.cmd[CTX_PP_CNTL] = p;
+   }
+
+   /* Update vertex/render formats
+    */
+   if (rmesa->radeon.TclFallback) {
+      radeonChooseRenderState( ctx );
+      radeonChooseVertexState( ctx );
+   }
+}
+
+
+/* =============================================================
+ * Materials
+ */
+
+
+/* Update on colormaterial, material emmissive/ambient,
+ * lightmodel.globalambient
+ */
+static void update_global_ambient( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   float *fcmd = (float *)RADEON_DB_STATE( glt );
+
+   /* Need to do more if both emmissive & ambient are PREMULT:
+    * Hope this is not needed for MULT
+    */
+   if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &
+       ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+	(3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0)
+   {
+      COPY_3V( &fcmd[GLT_RED],
+	       ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]);
+      ACC_SCALE_3V( &fcmd[GLT_RED],
+		   ctx->Light.Model.Ambient,
+		   ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]);
+   }
+   else
+   {
+      COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient );
+   }
+
+   RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt);
+}
+
+/* Update on change to
+ *    - light[p].colors
+ *    - light[p].enabled
+ */
+static void update_light_colors( GLcontext *ctx, GLuint p )
+{
+   struct gl_light *l = &ctx->Light.Light[p];
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (l->Enabled) {
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
+      float *fcmd = (float *)RADEON_DB_STATE( lit[p] );
+
+      COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient );
+      COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse );
+      COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular );
+
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+   }
+}
+
+/* Also fallback for asym colormaterial mode in twoside lighting...
+ */
+static void check_twoside_fallback( GLcontext *ctx )
+{
+   GLboolean fallback = GL_FALSE;
+   GLint i;
+
+   if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+      if (ctx->Light.ColorMaterialEnabled &&
+	  (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) !=
+	  ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1))
+	 fallback = GL_TRUE;
+      else {
+	 for (i = MAT_ATTRIB_FRONT_AMBIENT; i < MAT_ATTRIB_FRONT_INDEXES; i+=2)
+	    if (memcmp( ctx->Light.Material.Attrib[i],
+			ctx->Light.Material.Attrib[i+1],
+			sizeof(GLfloat)*4) != 0) {
+	       fallback = GL_TRUE;
+	       break;
+	    }
+      }
+   }
+
+   TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_LIGHT_TWOSIDE, fallback );
+}
+
+
+static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode )
+{
+      r100ContextPtr rmesa = R100_CONTEXT(ctx);
+      GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+
+      light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) |
+			   (3 << RADEON_AMBIENT_SOURCE_SHIFT) |
+			   (3 << RADEON_DIFFUSE_SOURCE_SHIFT) |
+			   (3 << RADEON_SPECULAR_SOURCE_SHIFT));
+
+   if (ctx->Light.ColorMaterialEnabled) {
+      GLuint mask = ctx->Light.ColorMaterialBitmask;
+
+      if (mask & MAT_BIT_FRONT_EMISSION) {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+      else {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+			     RADEON_EMISSIVE_SOURCE_SHIFT);
+      }
+
+      if (mask & MAT_BIT_FRONT_AMBIENT) {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+      else {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+			     RADEON_AMBIENT_SOURCE_SHIFT);
+      }
+
+      if (mask & MAT_BIT_FRONT_DIFFUSE) {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+      else {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+			     RADEON_DIFFUSE_SOURCE_SHIFT);
+      }
+
+      if (mask & MAT_BIT_FRONT_SPECULAR) {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE <<
+			     RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+      else {
+	 light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT <<
+			     RADEON_SPECULAR_SOURCE_SHIFT);
+      }
+   }
+   else {
+   /* Default to MULT:
+    */
+      light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << RADEON_EMISSIVE_SOURCE_SHIFT) |
+		   (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT) |
+		   (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
+		   (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT);
+   }
+
+      if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) {
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1;
+   }
+}
+
+void radeonUpdateMaterial( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLfloat (*mat)[4] = ctx->Light.Material.Attrib;
+   GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl );
+   GLuint mask = ~0;
+
+   if (ctx->Light.ColorMaterialEnabled)
+      mask &= ~ctx->Light.ColorMaterialBitmask;
+
+   if (RADEON_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+
+   if (mask & MAT_BIT_FRONT_EMISSION) {
+      fcmd[MTL_EMMISSIVE_RED]   = mat[MAT_ATTRIB_FRONT_EMISSION][0];
+      fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1];
+      fcmd[MTL_EMMISSIVE_BLUE]  = mat[MAT_ATTRIB_FRONT_EMISSION][2];
+      fcmd[MTL_EMMISSIVE_ALPHA] = mat[MAT_ATTRIB_FRONT_EMISSION][3];
+   }
+   if (mask & MAT_BIT_FRONT_AMBIENT) {
+      fcmd[MTL_AMBIENT_RED]     = mat[MAT_ATTRIB_FRONT_AMBIENT][0];
+      fcmd[MTL_AMBIENT_GREEN]   = mat[MAT_ATTRIB_FRONT_AMBIENT][1];
+      fcmd[MTL_AMBIENT_BLUE]    = mat[MAT_ATTRIB_FRONT_AMBIENT][2];
+      fcmd[MTL_AMBIENT_ALPHA]   = mat[MAT_ATTRIB_FRONT_AMBIENT][3];
+   }
+   if (mask & MAT_BIT_FRONT_DIFFUSE) {
+      fcmd[MTL_DIFFUSE_RED]     = mat[MAT_ATTRIB_FRONT_DIFFUSE][0];
+      fcmd[MTL_DIFFUSE_GREEN]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][1];
+      fcmd[MTL_DIFFUSE_BLUE]    = mat[MAT_ATTRIB_FRONT_DIFFUSE][2];
+      fcmd[MTL_DIFFUSE_ALPHA]   = mat[MAT_ATTRIB_FRONT_DIFFUSE][3];
+   }
+   if (mask & MAT_BIT_FRONT_SPECULAR) {
+      fcmd[MTL_SPECULAR_RED]    = mat[MAT_ATTRIB_FRONT_SPECULAR][0];
+      fcmd[MTL_SPECULAR_GREEN]  = mat[MAT_ATTRIB_FRONT_SPECULAR][1];
+      fcmd[MTL_SPECULAR_BLUE]   = mat[MAT_ATTRIB_FRONT_SPECULAR][2];
+      fcmd[MTL_SPECULAR_ALPHA]  = mat[MAT_ATTRIB_FRONT_SPECULAR][3];
+   }
+   if (mask & MAT_BIT_FRONT_SHININESS) {
+      fcmd[MTL_SHININESS]       = mat[MAT_ATTRIB_FRONT_SHININESS][0];
+   }
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mtl );
+
+   check_twoside_fallback( ctx );
+/*   update_global_ambient( ctx );*/
+}
+
+/* _NEW_LIGHT
+ * _NEW_MODELVIEW
+ * _MESA_NEW_NEED_EYE_COORDS
+ *
+ * Uses derived state from mesa:
+ *       _VP_inf_norm
+ *       _h_inf_norm
+ *       _Position
+ *       _NormSpotDirection
+ *       _ModelViewInvScale
+ *       _NeedEyeCoords
+ *       _EyeZDir
+ *
+ * which are calculated in light.c and are correct for the current
+ * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW
+ * and _MESA_NEW_NEED_EYE_COORDS.
+ */
+static void update_light( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   /* Have to check these, or have an automatic shortcircuit mechanism
+    * to remove noop statechanges. (Or just do a better job on the
+    * front end).
+    */
+   {
+      GLuint tmp = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL];
+
+      if (ctx->_NeedEyeCoords)
+	 tmp &= ~RADEON_LIGHT_IN_MODELSPACE;
+      else
+	 tmp |= RADEON_LIGHT_IN_MODELSPACE;
+
+
+      /* Leave this test disabled: (unexplained q3 lockup) (even with
+         new packets)
+      */
+      if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL])
+      {
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp;
+      }
+   }
+
+   {
+      GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( eye );
+      fcmd[EYE_X] = ctx->_EyeZDir[0];
+      fcmd[EYE_Y] = ctx->_EyeZDir[1];
+      fcmd[EYE_Z] = - ctx->_EyeZDir[2];
+      fcmd[EYE_RESCALE_FACTOR] = ctx->_ModelViewInvScale;
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.eye );
+   }
+
+
+
+   if (ctx->Light.Enabled) {
+      GLint p;
+      for (p = 0 ; p < MAX_LIGHTS; p++) {
+	 if (ctx->Light.Light[p].Enabled) {
+	    struct gl_light *l = &ctx->Light.Light[p];
+	    GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] );
+
+	    if (l->EyePosition[3] == 0.0) {
+	       COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm );
+	       COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm );
+	       fcmd[LIT_POSITION_W] = 0;
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    } else {
+	       COPY_4V( &fcmd[LIT_POSITION_X], l->_Position );
+	       fcmd[LIT_DIRECTION_X] = -l->_NormSpotDirection[0];
+	       fcmd[LIT_DIRECTION_Y] = -l->_NormSpotDirection[1];
+	       fcmd[LIT_DIRECTION_Z] = -l->_NormSpotDirection[2];
+	       fcmd[LIT_DIRECTION_W] = 0;
+	    }
+
+	    RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] );
+	 }
+      }
+   }
+}
+
+static void radeonLightfv( GLcontext *ctx, GLenum light,
+			   GLenum pname, const GLfloat *params )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLint p = light - GL_LIGHT0;
+   struct gl_light *l = &ctx->Light.Light[p];
+   GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd;
+
+
+   switch (pname) {
+   case GL_AMBIENT:
+   case GL_DIFFUSE:
+   case GL_SPECULAR:
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_SPOT_DIRECTION:
+      /* picked up in update_light */
+      break;
+
+   case GL_POSITION: {
+      /* positions picked up in update_light, but can do flag here */
+      GLuint flag;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      /* FIXME: Set RANGE_ATTEN only when needed */
+      if (p&1)
+	 flag = RADEON_LIGHT_1_IS_LOCAL;
+      else
+	 flag = RADEON_LIGHT_0_IS_LOCAL;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->EyePosition[3] != 0.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+      break;
+   }
+
+   case GL_SPOT_EXPONENT:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_EXPONENT] = params[0];
+      break;
+
+   case GL_SPOT_CUTOFF: {
+      GLuint flag = (p&1) ? RADEON_LIGHT_1_IS_SPOT : RADEON_LIGHT_0_IS_SPOT;
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_SPOT_CUTOFF] = l->_CosCutoff;
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      if (l->SpotCutoff != 180.0F)
+	 rmesa->hw.tcl.cmd[idx] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[idx] &= ~flag;
+
+      break;
+   }
+
+   case GL_CONSTANT_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_CONST] = params[0];
+      if ( params[0] == 0.0 )
+	 fcmd[LIT_ATTEN_CONST_INV] = FLT_MAX;
+      else
+	 fcmd[LIT_ATTEN_CONST_INV] = 1.0 / params[0];
+      break;
+   case GL_LINEAR_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_LINEAR] = params[0];
+      break;
+   case GL_QUADRATIC_ATTENUATION:
+      RADEON_STATECHANGE(rmesa, lit[p]);
+      fcmd[LIT_ATTEN_QUADRATIC] = params[0];
+      break;
+   default:
+      return;
+   }
+
+   /* Set RANGE_ATTEN only when needed */
+   switch (pname) {
+   case GL_POSITION:
+   case GL_CONSTANT_ATTENUATION:
+   case GL_LINEAR_ATTENUATION:
+   case GL_QUADRATIC_ATTENUATION:
+   {
+      GLuint *icmd = (GLuint *)RADEON_DB_STATE( tcl );
+      GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2;
+      GLuint atten_flag = ( p&1 ) ? RADEON_LIGHT_1_ENABLE_RANGE_ATTEN
+				  : RADEON_LIGHT_0_ENABLE_RANGE_ATTEN;
+      GLuint atten_const_flag = ( p&1 ) ? RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN
+				  : RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN;
+
+      if ( l->EyePosition[3] == 0.0F ||
+	   ( ( fcmd[LIT_ATTEN_CONST] == 0.0 || fcmd[LIT_ATTEN_CONST] == 1.0 ) &&
+	     fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) ) {
+	 /* Disable attenuation */
+	 icmd[idx] &= ~atten_flag;
+      } else {
+	 if ( fcmd[LIT_ATTEN_QUADRATIC] == 0.0 && fcmd[LIT_ATTEN_LINEAR] == 0.0 ) {
+	    /* Enable only constant portion of attenuation calculation */
+	    icmd[idx] |= ( atten_flag | atten_const_flag );
+	 } else {
+	    /* Enable full attenuation calculation */
+	    icmd[idx] &= ~atten_const_flag;
+	    icmd[idx] |= atten_flag;
+	 }
+      }
+
+      RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.tcl );
+      break;
+   }
+   default:
+      break;
+   }
+}
+
+
+
+
+static void radeonLightModelfv( GLcontext *ctx, GLenum pname,
+				const GLfloat *param )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_LIGHT_MODEL_AMBIENT:
+	 update_global_ambient( ctx );
+	 break;
+
+      case GL_LIGHT_MODEL_LOCAL_VIEWER:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.LocalViewer)
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LOCAL_VIEWER;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_LOCAL_VIEWER;
+         break;
+
+      case GL_LIGHT_MODEL_TWO_SIDE:
+	 RADEON_STATECHANGE( rmesa, tcl );
+	 if (ctx->Light.Model.TwoSide)
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= RADEON_LIGHT_TWOSIDE;
+	 else
+	    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_LIGHT_TWOSIDE;
+
+	 check_twoside_fallback( ctx );
+
+	 if (rmesa->radeon.TclFallback) {
+	    radeonChooseRenderState( ctx );
+	    radeonChooseVertexState( ctx );
+	 }
+         break;
+
+      case GL_LIGHT_MODEL_COLOR_CONTROL:
+	 radeonUpdateSpecular(ctx);
+         break;
+
+      default:
+         break;
+   }
+}
+
+static void radeonShadeModel( GLcontext *ctx, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL];
+
+   s &= ~(RADEON_DIFFUSE_SHADE_MASK |
+	  RADEON_ALPHA_SHADE_MASK |
+	  RADEON_SPECULAR_SHADE_MASK |
+	  RADEON_FOG_SHADE_MASK);
+
+   switch ( mode ) {
+   case GL_FLAT:
+      s |= (RADEON_DIFFUSE_SHADE_FLAT |
+	    RADEON_ALPHA_SHADE_FLAT |
+	    RADEON_SPECULAR_SHADE_FLAT |
+	    RADEON_FOG_SHADE_FLAT);
+      break;
+   case GL_SMOOTH:
+      s |= (RADEON_DIFFUSE_SHADE_GOURAUD |
+	    RADEON_ALPHA_SHADE_GOURAUD |
+	    RADEON_SPECULAR_SHADE_GOURAUD |
+	    RADEON_FOG_SHADE_GOURAUD);
+      break;
+   default:
+      return;
+   }
+
+   if ( rmesa->hw.set.cmd[SET_SE_CNTL] != s ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = s;
+   }
+}
+
+
+/* =============================================================
+ * User clip planes
+ */
+
+static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+   GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+   RADEON_STATECHANGE( rmesa, ucp[p] );
+   rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+   rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+   rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+   rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+}
+
+static void radeonUpdateClipPlanes( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint p;
+
+   for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
+      if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
+	 GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+	 RADEON_STATECHANGE( rmesa, ucp[p] );
+	 rmesa->hw.ucp[p].cmd[UCP_X] = ip[0];
+	 rmesa->hw.ucp[p].cmd[UCP_Y] = ip[1];
+	 rmesa->hw.ucp[p].cmd[UCP_Z] = ip[2];
+	 rmesa->hw.ucp[p].cmd[UCP_W] = ip[3];
+      }
+   }
+}
+
+
+/* =============================================================
+ * Stencil
+ */
+
+static void
+radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                           GLint ref, GLuint mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) |
+		     ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT));
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   RADEON_STATECHANGE( rmesa, msk );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_STENCIL_TEST_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~(RADEON_STENCIL_REF_MASK|
+						   RADEON_STENCIL_VALUE_MASK);
+
+   switch ( ctx->Stencil.Function[0] ) {
+   case GL_NEVER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEVER;
+      break;
+   case GL_LESS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LESS;
+      break;
+   case GL_EQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_EQUAL;
+      break;
+   case GL_LEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_LEQUAL;
+      break;
+   case GL_GREATER:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_NEQUAL;
+      break;
+   case GL_GEQUAL:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_TEST_ALWAYS;
+      break;
+   }
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |= refmask;
+}
+
+static void
+radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK;
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] |=
+      ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT);
+}
+
+static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail,
+                                     GLenum zfail, GLenum zpass )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP,
+      and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC,
+      but DEC_WRAP can be fixed by using DEC and INC_WRAP at least use INC. */
+
+   GLuint tempRADEON_STENCIL_FAIL_DEC_WRAP;
+   GLuint tempRADEON_STENCIL_FAIL_INC_WRAP;
+   GLuint tempRADEON_STENCIL_ZFAIL_DEC_WRAP;
+   GLuint tempRADEON_STENCIL_ZFAIL_INC_WRAP;
+   GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP;
+   GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP;
+
+   if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) {
+      tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC;
+      tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC;
+      tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC;
+      tempRADEON_STENCIL_ZFAIL_INC_WRAP = RADEON_STENCIL_ZFAIL_INC;
+      tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC;
+      tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC;
+   }
+   else {
+      tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC_WRAP;
+      tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC_WRAP;
+      tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC_WRAP;
+      tempRADEON_STENCIL_ZFAIL_INC_WRAP = RADEON_STENCIL_ZFAIL_INC_WRAP;
+      tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC_WRAP;
+      tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC_WRAP;
+   }
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK |
+					       RADEON_STENCIL_ZFAIL_MASK |
+					       RADEON_STENCIL_ZPASS_MASK);
+
+   switch ( ctx->Stencil.FailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_DEC;
+      break;
+   case GL_INCR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_FAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_FAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_FAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZFailFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_DEC;
+      break;
+   case GL_INCR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZFAIL_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZFAIL_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZFAIL_INVERT;
+      break;
+   }
+
+   switch ( ctx->Stencil.ZPassFunc[0] ) {
+   case GL_KEEP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_KEEP;
+      break;
+   case GL_ZERO:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_ZERO;
+      break;
+   case GL_REPLACE:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_REPLACE;
+      break;
+   case GL_INCR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INC;
+      break;
+   case GL_DECR:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_DEC;
+      break;
+   case GL_INCR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZPASS_INC_WRAP;
+      break;
+   case GL_DECR_WRAP:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= tempRADEON_STENCIL_ZPASS_DEC_WRAP;
+      break;
+   case GL_INVERT:
+      rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_STENCIL_ZPASS_INVERT;
+      break;
+   }
+}
+
+static void radeonClearStencil( GLcontext *ctx, GLint s )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   rmesa->radeon.state.stencil.clear =
+      ((GLuint) (ctx->Stencil.Clear & 0xff) |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT));
+}
+
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+/*
+ * To correctly position primitives:
+ */
+#define SUBPIXEL_X 0.125
+#define SUBPIXEL_Y 0.125
+
+
+/**
+ * Called when window size or position changes or viewport or depth range
+ * state is changed.  We update the hardware viewport state here.
+ */
+void radeonUpdateWindow( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+   GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0);
+   const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   GLfloat y_scale, y_bias;
+
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   } else {
+      y_scale = -1.0;
+      y_bias = yoffset;
+   }
+
+   float_ui32_type sx = { v[MAT_SX] };
+   float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X };
+   float_ui32_type sy = { v[MAT_SY] * y_scale };
+   float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y };
+   float_ui32_type sz = { v[MAT_SZ] * depthScale };
+   float_ui32_type tz = { v[MAT_TZ] * depthScale };
+
+   RADEON_STATECHANGE( rmesa, vpt );
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = sx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = sy.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = sz.ui32;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = tz.ui32;
+}
+
+
+static void radeonViewport( GLcontext *ctx, GLint x, GLint y,
+			    GLsizei width, GLsizei height )
+{
+   /* Don't pipeline viewport changes, conflict with window offset
+    * setting below.  Could apply deltas to rescue pipelined viewport
+    * values, or keep the originals hanging around.
+    */
+   radeonUpdateWindow( ctx );
+
+   radeon_viewport(ctx, x, y, width, height);
+}
+
+static void radeonDepthRange( GLcontext *ctx, GLclampd nearval,
+			      GLclampd farval )
+{
+   radeonUpdateWindow( ctx );
+}
+
+void radeonUpdateViewportOffset( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+   GLfloat xoffset = (GLfloat)dPriv->x;
+   GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h;
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   float_ui32_type tx;
+   float_ui32_type ty;
+
+   tx.f = v[MAT_TX] + xoffset + SUBPIXEL_X;
+   ty.f = (- v[MAT_TY]) + yoffset + SUBPIXEL_Y;
+
+   if ( rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] != tx.ui32 ||
+	rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] != ty.ui32 )
+   {
+      /* Note: this should also modify whatever data the context reset
+       * code uses...
+       */
+      RADEON_STATECHANGE( rmesa, vpt );
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = tx.ui32;
+      rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = ty.ui32;
+
+      /* update polygon stipple x/y screen offset */
+      {
+         GLuint stx, sty;
+         GLuint m = rmesa->hw.msc.cmd[MSC_RE_MISC];
+
+         m &= ~(RADEON_STIPPLE_X_OFFSET_MASK |
+                RADEON_STIPPLE_Y_OFFSET_MASK);
+
+         /* add magic offsets, then invert */
+         stx = 31 - ((dPriv->x - 1) & RADEON_STIPPLE_COORD_MASK);
+         sty = 31 - ((dPriv->y + dPriv->h - 1)
+                     & RADEON_STIPPLE_COORD_MASK);
+
+         m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+               (sty << RADEON_STIPPLE_Y_OFFSET_SHIFT));
+
+         if ( rmesa->hw.msc.cmd[MSC_RE_MISC] != m ) {
+            RADEON_STATECHANGE( rmesa, msc );
+	    rmesa->hw.msc.cmd[MSC_RE_MISC] = m;
+         }
+      }
+   }
+
+   radeonUpdateScissor( ctx );
+}
+
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLubyte c[4];
+   struct radeon_renderbuffer *rrb;
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   if (!rrb)
+     return;
+     
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   rmesa->radeon.state.color.clear = radeonPackColor( rrb->cpp,
+					       c[0], c[1], c[2], c[3] );
+}
+
+
+static void radeonRenderMode( GLcontext *ctx, GLenum mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+
+static GLuint radeon_rop_tab[] = {
+   RADEON_ROP_CLEAR,
+   RADEON_ROP_AND,
+   RADEON_ROP_AND_REVERSE,
+   RADEON_ROP_COPY,
+   RADEON_ROP_AND_INVERTED,
+   RADEON_ROP_NOOP,
+   RADEON_ROP_XOR,
+   RADEON_ROP_OR,
+   RADEON_ROP_NOR,
+   RADEON_ROP_EQUIV,
+   RADEON_ROP_INVERT,
+   RADEON_ROP_OR_REVERSE,
+   RADEON_ROP_COPY_INVERTED,
+   RADEON_ROP_OR_INVERTED,
+   RADEON_ROP_NAND,
+   RADEON_ROP_SET,
+};
+
+static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint rop = (GLuint)opcode - GL_CLEAR;
+
+   ASSERT( rop < 16 );
+
+   RADEON_STATECHANGE( rmesa, msk );
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop];
+}
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint p, flag;
+
+   if ( RADEON_DEBUG & RADEON_STATE )
+      fprintf( stderr, "%s( %s = %s )\n", __FUNCTION__,
+	       _mesa_lookup_enum_by_nr( cap ),
+	       state ? "GL_TRUE" : "GL_FALSE" );
+
+   switch ( cap ) {
+      /* Fast track this one...
+       */
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_3D:
+      break;
+
+   case GL_ALPHA_TEST:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if (state) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_ALPHA_TEST_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ALPHA_TEST_ENABLE;
+      }
+      break;
+
+   case GL_BLEND:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if (state) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ALPHA_BLEND_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE;
+      }
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+	    && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+
+      /* Catch a possible fallback:
+       */
+      if (state) {
+	 ctx->Driver.BlendEquationSeparate( ctx,
+					    ctx->Color.BlendEquationRGB,
+					    ctx->Color.BlendEquationA );
+	 ctx->Driver.BlendFuncSeparate( ctx, ctx->Color.BlendSrcRGB,
+					ctx->Color.BlendDstRGB,
+					ctx->Color.BlendSrcA,
+					ctx->Color.BlendDstA );
+      }
+      else {
+	 FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, GL_FALSE );
+	 FALLBACK( rmesa, RADEON_FALLBACK_BLEND_EQ, GL_FALSE );
+      }
+      break;
+
+   case GL_CLIP_PLANE0:
+   case GL_CLIP_PLANE1:
+   case GL_CLIP_PLANE2:
+   case GL_CLIP_PLANE3:
+   case GL_CLIP_PLANE4:
+   case GL_CLIP_PLANE5:
+      p = cap-GL_CLIP_PLANE0;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if (state) {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] |= (RADEON_UCP_ENABLE_0<<p);
+	 radeonClipPlane( ctx, cap, NULL );
+      }
+      else {
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~(RADEON_UCP_ENABLE_0<<p);
+      }
+      break;
+
+   case GL_COLOR_MATERIAL:
+      radeonColorMaterial( ctx, 0, 0 );
+      radeonUpdateMaterial( ctx );
+      break;
+
+   case GL_CULL_FACE:
+      radeonCullFace( ctx, 0 );
+      break;
+
+   case GL_DEPTH_TEST:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_Z_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_Z_ENABLE;
+      }
+      break;
+
+   case GL_DITHER:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE;
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  rmesa->radeon.state.color.roundEnable;
+      }
+      break;
+
+   case GL_FOG:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= RADEON_FOG_ENABLE;
+	 radeonFogfv( ctx, GL_FOG_MODE, NULL );
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_FOG_ENABLE;
+	 RADEON_STATECHANGE(rmesa, tcl);
+	 rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~RADEON_TCL_FOG_MASK;
+      }
+      radeonUpdateSpecular( ctx ); /* for PK_SPEC */
+      _mesa_allow_light_in_model( ctx, !state );
+      break;
+
+   case GL_LIGHT0:
+   case GL_LIGHT1:
+   case GL_LIGHT2:
+   case GL_LIGHT3:
+   case GL_LIGHT4:
+   case GL_LIGHT5:
+   case GL_LIGHT6:
+   case GL_LIGHT7:
+      RADEON_STATECHANGE(rmesa, tcl);
+      p = cap - GL_LIGHT0;
+      if (p&1)
+	 flag = (RADEON_LIGHT_1_ENABLE |
+		 RADEON_LIGHT_1_ENABLE_AMBIENT |
+		 RADEON_LIGHT_1_ENABLE_SPECULAR);
+      else
+	 flag = (RADEON_LIGHT_0_ENABLE |
+		 RADEON_LIGHT_0_ENABLE_AMBIENT |
+		 RADEON_LIGHT_0_ENABLE_SPECULAR);
+
+      if (state)
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] |= flag;
+      else
+	 rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag;
+
+      /*
+       */
+      update_light_colors( ctx, p );
+      break;
+
+   case GL_LIGHTING:
+      RADEON_STATECHANGE(rmesa, tcl);
+      radeonUpdateSpecular(ctx);
+      check_twoside_fallback( ctx );
+      break;
+
+   case GL_LINE_SMOOTH:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_LINE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_LINE;
+      }
+      break;
+
+   case GL_LINE_STIPPLE:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_PATTERN_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_PATTERN_ENABLE;
+      }
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled
+	    && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_ROP_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE;
+      }
+      break;
+
+   case GL_NORMALIZE:
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( state ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_NORMALIZE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_NORMALIZE_NORMALS;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_POINT:
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_POINT;
+      } else {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_POINT;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_LINE:
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_LINE;
+      } else {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_LINE;
+      }
+      break;
+
+   case GL_POLYGON_OFFSET_FILL:
+      RADEON_STATECHANGE( rmesa, set );
+      if ( state ) {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] |=  RADEON_ZBIAS_ENABLE_TRI;
+      } else {
+	 rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_ZBIAS_ENABLE_TRI;
+      }
+      break;
+
+   case GL_POLYGON_SMOOTH:
+      RADEON_STATECHANGE( rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_ANTI_ALIAS_POLY;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_ANTI_ALIAS_POLY;
+      }
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      RADEON_STATECHANGE(rmesa, ctx );
+      if ( state ) {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] |=  RADEON_STIPPLE_ENABLE;
+      } else {
+	 rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~RADEON_STIPPLE_ENABLE;
+      }
+      break;
+
+   case GL_RESCALE_NORMAL_EXT: {
+      GLboolean tmp = ctx->_NeedEyeCoords ? state : !state;
+      RADEON_STATECHANGE( rmesa, tcl );
+      if ( tmp ) {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+      } else {
+	 rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+      }
+      break;
+   }
+
+   case GL_SCISSOR_TEST:
+      radeon_firevertices(&rmesa->radeon);
+      rmesa->radeon.state.scissor.enabled = state;
+      radeonUpdateScissor( ctx );
+      break;
+
+   case GL_STENCIL_TEST:
+      {
+	 GLboolean hw_stencil = GL_FALSE;
+	 if (ctx->DrawBuffer) {
+	    struct radeon_renderbuffer *rrbStencil
+	       = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+	    hw_stencil = (rrbStencil && rrbStencil->bo);
+	 }
+
+	 if (hw_stencil) {
+	    RADEON_STATECHANGE( rmesa, ctx );
+	    if ( state ) {
+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |=  RADEON_STENCIL_ENABLE;
+	    } else {
+	       rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE;
+	    }
+	 } else {
+	    FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state );
+	 }
+      }
+      break;
+
+   case GL_TEXTURE_GEN_Q:
+   case GL_TEXTURE_GEN_R:
+   case GL_TEXTURE_GEN_S:
+   case GL_TEXTURE_GEN_T:
+      /* Picked up in radeonUpdateTextureState.
+       */
+      rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE;
+      break;
+
+   case GL_COLOR_SUM_EXT:
+      radeonUpdateSpecular ( ctx );
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+static void radeonLightingSpaceChange( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean tmp;
+   RADEON_STATECHANGE( rmesa, tcl );
+
+   if (RADEON_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
+
+   if (ctx->_NeedEyeCoords)
+      tmp = ctx->Transform.RescaleNormals;
+   else
+      tmp = !ctx->Transform.RescaleNormals;
+
+   if ( tmp ) {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |=  RADEON_RESCALE_NORMALS;
+   } else {
+      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS;
+   }
+
+   if (RADEON_DEBUG & RADEON_STATE)
+      fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords,
+	      rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]);
+}
+
+/* =============================================================
+ * Deferred state management - matrices, textures, other?
+ */
+
+
+void radeonUploadTexMatrix( r100ContextPtr rmesa,
+			    int unit, GLboolean swapcols )
+{
+/* Here's how this works: on r100, only 3 tex coords can be submitted, so the
+   vector looks like this probably: (s t r|q 0) (not sure if the last coord
+   is hardwired to 0, could be 1 too). Interestingly, it actually looks like
+   texgen generates all 4 coords, at least tests with projtex indicated that.
+   So: if we need the q coord in the end (solely determined by the texture
+   target, i.e. 2d / 1d / texrect targets) we swap the third and 4th row.
+   Additionally, if we don't have texgen but 4 tex coords submitted, we swap
+   column 3 and 4 (for the 2d / 1d / texrect targets) since the q coord
+   will get submitted in the "wrong", i.e. 3rd, slot.
+   If an app submits 3 coords for 2d targets, we assume it is saving on vertex
+   size and using the texture matrix to swap the r and q coords around (ut2k3
+   does exactly that), so we don't need the 3rd / 4th column swap - still need
+   the 3rd / 4th row swap of course. This will potentially break for apps which
+   use TexCoord3x just for fun. Additionally, it will never work if an app uses
+   an "advanced" texture matrix and relies on all 4 texcoord inputs to generate
+   the maximum needed 3. This seems impossible to do with hw tcl on r100, and
+   incredibly hard to detect so we can't just fallback in such a case. Assume
+   it never happens... - rs
+*/
+
+   int idx = TEXMAT_0 + unit;
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0;
+   int i;
+   struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit];
+   GLfloat *src = rmesa->tmpmat[unit].m;
+
+   rmesa->TexMatColSwap &= ~(1 << unit);
+   if ((tUnit._ReallyEnabled & (TEXTURE_3D_BIT | TEXTURE_CUBE_BIT)) == 0) {
+      if (swapcols) {
+	 rmesa->TexMatColSwap |= 1 << unit;
+	 /* attention some elems are swapped 2 times! */
+	 *dest++ = src[0];
+	 *dest++ = src[4];
+	 *dest++ = src[12];
+	 *dest++ = src[8];
+	 *dest++ = src[1];
+	 *dest++ = src[5];
+	 *dest++ = src[13];
+	 *dest++ = src[9];
+	 *dest++ = src[2];
+	 *dest++ = src[6];
+	 *dest++ = src[15];
+	 *dest++ = src[11];
+	 /* those last 4 are probably never used */
+	 *dest++ = src[3];
+	 *dest++ = src[7];
+	 *dest++ = src[14];
+	 *dest++ = src[10];
+      }
+      else {
+	 for (i = 0; i < 2; i++) {
+	    *dest++ = src[i];
+	    *dest++ = src[i+4];
+	    *dest++ = src[i+8];
+	    *dest++ = src[i+12];
+	 }
+	 for (i = 3; i >= 2; i--) {
+	    *dest++ = src[i];
+	    *dest++ = src[i+4];
+	    *dest++ = src[i+8];
+	    *dest++ = src[i+12];
+	 }
+      }
+   }
+   else {
+      for (i = 0 ; i < 4 ; i++) {
+	 *dest++ = src[i];
+	 *dest++ = src[i+4];
+	 *dest++ = src[i+8];
+	 *dest++ = src[i+12];
+      }
+   }
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+
+static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   int i;
+
+
+   for (i = 0 ; i < 4 ; i++) {
+      *dest++ = src[i];
+      *dest++ = src[i+4];
+      *dest++ = src[i+8];
+      *dest++ = src[i+12];
+   }
+
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx )
+{
+   float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0;
+   memcpy(dest, src, 16*sizeof(float));
+   RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] );
+}
+
+
+static void update_texturematrix( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL];
+   GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL];
+   int unit;
+   GLuint texMatEnabled = 0;
+   rmesa->NeedTexMatrix = 0;
+   rmesa->TexMatColSwap = 0;
+
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+	 GLboolean needMatrix = GL_FALSE;
+	 if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
+	    needMatrix = GL_TRUE;
+	    texMatEnabled |= (RADEON_TEXGEN_TEXMAT_0_ENABLE |
+			      RADEON_TEXMAT_0_ENABLE) << unit;
+
+	    if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	       /* Need to preconcatenate any active texgen
+	        * obj/eyeplane matrices:
+	        */
+	       _math_matrix_mul_matrix( &rmesa->tmpmat[unit],
+				     ctx->TextureMatrixStack[unit].Top,
+				     &rmesa->TexGenMatrix[unit] );
+	    }
+	    else {
+	       _math_matrix_copy( &rmesa->tmpmat[unit],
+		  ctx->TextureMatrixStack[unit].Top );
+	    }
+	 }
+	 else if (rmesa->TexGenEnabled & (RADEON_TEXMAT_0_ENABLE << unit)) {
+	    _math_matrix_copy( &rmesa->tmpmat[unit], &rmesa->TexGenMatrix[unit] );
+	    needMatrix = GL_TRUE;
+	 }
+	 if (needMatrix) {
+	    rmesa->NeedTexMatrix |= 1 << unit;
+	    radeonUploadTexMatrix( rmesa, unit,
+			!ctx->Texture.Unit[unit].TexGenEnabled );
+	 }
+      }
+   }
+
+   tpc = (texMatEnabled | rmesa->TexGenEnabled);
+
+   /* TCL_TEX_COMPUTED_x is TCL_TEX_INPUT_x | 0x8 */
+   vs &= ~((RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	   (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+	   (RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
+
+   vs |= (((tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE) <<
+	 (RADEON_TCL_TEX_0_OUTPUT_SHIFT + 3)) |
+      ((tpc & RADEON_TEXGEN_TEXMAT_1_ENABLE) <<
+	 (RADEON_TCL_TEX_1_OUTPUT_SHIFT + 2)) |
+      ((tpc & RADEON_TEXGEN_TEXMAT_2_ENABLE) <<
+	 (RADEON_TCL_TEX_2_OUTPUT_SHIFT + 1)));
+
+   if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
+       vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
+
+      RADEON_STATECHANGE(rmesa, tcl);
+      rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = tpc;
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] = vs;
+   }
+}
+
+static GLboolean r100ValidateBuffers(GLcontext *ctx)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb;
+   int i, ret;
+
+   radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+   rrb = radeon_get_colorbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+				       0, RADEON_GEM_DOMAIN_VRAM);
+   }
+
+   /* depth buffer */
+   rrb = radeon_get_depthbuffer(&rmesa->radeon);
+   /* color buffer */
+   if (rrb && rrb->bo) {
+     radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, rrb->bo,
+				       0, RADEON_GEM_DOMAIN_VRAM);
+   }
+
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+      radeonTexObj *t;
+
+      if (!ctx->Texture.Unit[i]._ReallyEnabled)
+	 continue;
+
+      t = rmesa->state.texture.unit[i].texobj;
+      if (t->image_override && t->bo)
+	radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo,
+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+      else if (t->mt->bo)
+	radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->mt->bo,
+			   RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+
+   ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+   if (ret)
+       return GL_FALSE;
+   return GL_TRUE;
+}
+
+GLboolean radeonValidateState( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint new_state = rmesa->radeon.NewGLState;
+
+   if (new_state & _NEW_BUFFERS) {
+     _mesa_update_framebuffer(ctx);
+     /* this updates the DrawBuffer's Width/Height if it's a FBO */
+     _mesa_update_draw_buffer_bounds(ctx);
+     RADEON_STATECHANGE(rmesa, ctx);
+   }
+
+   if (new_state & _NEW_TEXTURE) {
+      radeonUpdateTextureState( ctx );
+      new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */
+   }
+
+   /* we need to do a space check here */
+   if (!r100ValidateBuffers(ctx))
+     return GL_FALSE;
+
+   /* Need an event driven matrix update?
+    */
+   if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION))
+      upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ );
+
+   /* Need these for lighting (shouldn't upload otherwise)
+    */
+   if (new_state & (_NEW_MODELVIEW)) {
+      upload_matrix( rmesa, ctx->ModelviewMatrixStack.Top->m, MODEL );
+      upload_matrix_t( rmesa, ctx->ModelviewMatrixStack.Top->inv, MODEL_IT );
+   }
+
+   /* Does this need to be triggered on eg. modelview for
+    * texgen-derived objplane/eyeplane matrices?
+    */
+   if (new_state & _NEW_TEXTURE_MATRIX) {
+      update_texturematrix( ctx );
+   }
+
+   if (new_state & (_NEW_LIGHT|_NEW_MODELVIEW|_MESA_NEW_NEED_EYE_COORDS)) {
+      update_light( ctx );
+   }
+
+   /* emit all active clip planes if projection matrix changes.
+    */
+   if (new_state & (_NEW_PROJECTION)) {
+      if (ctx->Transform.ClipPlanesEnabled)
+	 radeonUpdateClipPlanes( ctx );
+   }
+
+
+   rmesa->radeon.NewGLState = 0;
+
+   return GL_TRUE;
+}
+
+
+static void radeonInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _ae_invalidate_state( ctx, new_state );
+   R100_CONTEXT(ctx)->radeon.NewGLState |= new_state;
+}
+
+
+/* A hack.  Need a faster way to find this out.
+ */
+static GLboolean check_material( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLint i;
+
+   for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT;
+	i < _TNL_ATTRIB_MAT_BACK_INDEXES;
+	i++)
+      if (tnl->vb.AttribPtr[i] &&
+	  tnl->vb.AttribPtr[i]->stride)
+	 return GL_TRUE;
+
+   return GL_FALSE;
+}
+
+
+static void radeonWrapRunPipeline( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean has_material;
+
+   if (0)
+      fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState);
+
+   /* Validate state:
+    */
+   if (rmesa->radeon.NewGLState)
+      if (!radeonValidateState( ctx ))
+	 FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE);
+
+   has_material = (ctx->Light.Enabled && check_material( ctx ));
+
+   if (has_material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_TRUE );
+   }
+
+   /* Run the pipeline.
+    */
+   _tnl_run_pipeline( ctx );
+
+   if (has_material) {
+      TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_MATERIAL, GL_FALSE );
+   }
+}
+
+static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   GLint i;
+
+   radeon_firevertices(&r100->radeon);
+
+   RADEON_STATECHANGE(r100, stp);
+
+   /* Must flip pattern upside down.
+    */
+   for ( i = 31 ; i >= 0; i--) {
+     r100->hw.stp.cmd[3 + i] = ((GLuint *) mask)[i];
+   }
+}
+
+
+/* Initialize the driver's state functions.
+ * Many of the ctx->Driver functions might have been initialized to
+ * software defaults in the earlier _mesa_init_driver_functions() call.
+ */
+void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2 )
+{
+   ctx->Driver.UpdateState		= radeonInvalidateState;
+   ctx->Driver.LightingSpaceChange      = radeonLightingSpaceChange;
+
+   ctx->Driver.DrawBuffer		= radeonDrawBuffer;
+   ctx->Driver.ReadBuffer		= radeonReadBuffer;
+   ctx->Driver.CopyPixels               = _mesa_meta_CopyPixels;
+   ctx->Driver.DrawPixels               = _mesa_meta_DrawPixels;
+   if (dri2)
+	   ctx->Driver.ReadPixels               = radeonReadPixels;
+
+   ctx->Driver.AlphaFunc		= radeonAlphaFunc;
+   ctx->Driver.BlendEquationSeparate	= radeonBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate	= radeonBlendFuncSeparate;
+   ctx->Driver.ClearColor		= radeonClearColor;
+   ctx->Driver.ClearDepth		= radeonClearDepth;
+   ctx->Driver.ClearStencil		= radeonClearStencil;
+   ctx->Driver.ClipPlane		= radeonClipPlane;
+   ctx->Driver.ColorMask		= radeonColorMask;
+   ctx->Driver.CullFace			= radeonCullFace;
+   ctx->Driver.DepthFunc		= radeonDepthFunc;
+   ctx->Driver.DepthMask		= radeonDepthMask;
+   ctx->Driver.DepthRange		= radeonDepthRange;
+   ctx->Driver.Enable			= radeonEnable;
+   ctx->Driver.Fogfv			= radeonFogfv;
+   ctx->Driver.FrontFace		= radeonFrontFace;
+   ctx->Driver.Hint			= NULL;
+   ctx->Driver.LightModelfv		= radeonLightModelfv;
+   ctx->Driver.Lightfv			= radeonLightfv;
+   ctx->Driver.LineStipple              = radeonLineStipple;
+   ctx->Driver.LineWidth                = radeonLineWidth;
+   ctx->Driver.LogicOpcode		= radeonLogicOpCode;
+   ctx->Driver.PolygonMode		= radeonPolygonMode;
+   ctx->Driver.PolygonOffset		= radeonPolygonOffset;
+   if (dri2)
+      ctx->Driver.PolygonStipple		= radeonPolygonStipple;
+   else
+      ctx->Driver.PolygonStipple		= radeonPolygonStipplePreKMS;
+   ctx->Driver.RenderMode		= radeonRenderMode;
+   ctx->Driver.Scissor			= radeonScissor;
+   ctx->Driver.ShadeModel		= radeonShadeModel;
+   ctx->Driver.StencilFuncSeparate	= radeonStencilFuncSeparate;
+   ctx->Driver.StencilMaskSeparate	= radeonStencilMaskSeparate;
+   ctx->Driver.StencilOpSeparate	= radeonStencilOpSeparate;
+   ctx->Driver.Viewport			= radeonViewport;
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = radeonWrapRunPipeline;
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h
new file mode 100644
index 0000000000..c780cff0cf
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_state.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __RADEON_STATE_H__
+#define __RADEON_STATE_H__
+
+#include "radeon_context.h"
+
+extern void radeonInitState( r100ContextPtr rmesa );
+extern void radeonInitStateFuncs( GLcontext *ctx , GLboolean dri2);
+
+extern void radeonUpdateMaterial( GLcontext *ctx );
+
+extern void radeonUpdateViewportOffset( GLcontext *ctx );
+extern void radeonUpdateWindow( GLcontext *ctx );
+extern void radeonUpdateDrawBuffer( GLcontext *ctx );
+extern void radeonUploadTexMatrix( r100ContextPtr rmesa,
+				   int unit, GLboolean swapcols );
+
+extern GLboolean radeonValidateState( GLcontext *ctx );
+
+
+extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) do {				\
+   if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n",		\
+		     __FUNCTION__, bit, mode );				\
+   radeonFallback( rmesa->radeon.glCtx, bit, mode );				\
+} while (0)
+
+
+#define MODEL_PROJ 0
+#define MODEL      1
+#define MODEL_IT   2
+#define TEXMAT_0   3
+#define TEXMAT_1   4
+#define TEXMAT_2   5
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c
new file mode 100644
index 0000000000..91718a4777
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c
@@ -0,0 +1,1162 @@
+/*
+ * Copyright 2000, 2001 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/api_arrayelt.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_queryobj.h"
+
+#include "../r200/r200_reg.h"
+
+#include "xmlpool.h"
+
+/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
+ * 1.3 cmdbuffers allow all previous state to be updated as well as
+ * the tcl scalar and vector areas.
+ */
+static struct {
+	int start;
+	int len;
+	const char *name;
+} packet[RADEON_MAX_STATE_PACKETS] = {
+	{RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
+	{RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
+	{RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
+	{RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
+	{RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
+	{RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
+	{RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
+	{RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
+	{RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
+	{RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
+	{RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
+	{RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
+	{RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
+	{RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
+	{RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
+	{RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
+	{RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
+	{RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
+	{RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
+	{RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
+	{RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
+		    "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
+	{R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
+	{R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
+	{R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
+	{R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
+	{R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
+	{R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
+	{R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
+	{R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
+	{R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
+	{R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
+	{R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
+	{R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
+	{R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
+	{R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
+	{R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
+	{R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
+	{R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
+	{R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
+	{R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
+	{R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
+	{R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
+	{R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
+	{R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
+	{R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
+	{R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
+	{R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
+	{R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
+	{R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
+	{R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
+	 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
+	{R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
+	{R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
+	{R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
+	{R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
+	{R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
+	{R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
+	{R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
+	{R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
+	{R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
+	{R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
+	{R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
+		    "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
+	{R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},	/* 61 */
+	{R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
+	{R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
+	{R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
+	{R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
+	{R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
+	{R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
+	{R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
+	{R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
+	{R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
+	{R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
+	{R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
+	{RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
+	{RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
+	{RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
+	{R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
+	{R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+	{RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
+	{RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
+	{RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
+	{RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
+	{RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
+	{RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
+	{R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
+	{R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"},     /* 85 */
+	{R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"},
+	{R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
+	{R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
+	{R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
+	{R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
+	{R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
+	{R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
+	{R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
+	{R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
+};
+
+/* =============================================================
+ * State initialization
+ */
+static int cmdpkt( r100ContextPtr rmesa, int id ) 
+{
+   drm_radeon_cmd_header_t h;
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     return CP_PACKET0(packet[id].start, packet[id].len - 1);
+   } else {
+     h.i = 0;
+     h.packet.cmd_type = RADEON_CMD_PACKET;
+     h.packet.packet_id = id;
+   }
+   return h.i;
+}
+
+static int cmdvec( int offset, int stride, int count ) 
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.vectors.cmd_type = RADEON_CMD_VECTORS;
+   h.vectors.offset = offset;
+   h.vectors.stride = stride;
+   h.vectors.count = count;
+   return h.i;
+}
+
+static int cmdscl( int offset, int stride, int count ) 
+{
+   drm_radeon_cmd_header_t h;
+   h.i = 0;
+   h.scalars.cmd_type = RADEON_CMD_SCALARS;
+   h.scalars.offset = offset;
+   h.scalars.stride = stride;
+   h.scalars.count = count;
+   return h.i;
+}
+
+#define CHECK( NM, FLAG, ADD )				\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
+{							\
+   return FLAG ? atom->cmd_size + (ADD) : 0;			\
+}
+
+#define TCL_CHECK( NM, FLAG, ADD )				\
+static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom )	\
+{							\
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);	\
+   return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size + (ADD) : 0;	\
+}
+
+
+CHECK( always, GL_TRUE, 0 )
+CHECK( always_add2, GL_TRUE, 2 )
+CHECK( always_add4, GL_TRUE, 4 )
+CHECK( never, GL_FALSE, 0 )
+CHECK( tex0_mm, ctx->Texture.Unit[0]._ReallyEnabled, 3 )
+CHECK( tex1_mm, ctx->Texture.Unit[1]._ReallyEnabled, 3 )
+/* need this for the cubic_map on disabled unit 2 bug, maybe r100 only? */
+CHECK( tex2_mm, ctx->Texture._EnabledUnits, 3 )
+CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled, 2 )
+CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled, 2 )
+CHECK( tex2, ctx->Texture._EnabledUnits, 2 )
+CHECK( cube0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 3 + 3*5 - CUBE_STATE_SIZE )
+CHECK( cube0_mm, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube1_mm, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( cube2_mm, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_CUBE_BIT), 2 + 4*5 - CUBE_STATE_SIZE )
+CHECK( fog, ctx->Fog.Enabled, 0 )
+CHECK( fog_add4, ctx->Fog.Enabled, 4 )
+TCL_CHECK( tcl, GL_TRUE, 0 )
+TCL_CHECK( tcl_add4, GL_TRUE, 4 )
+TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled, 0 )
+TCL_CHECK( tcl_tex0_add4, ctx->Texture.Unit[0]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex1_add4, ctx->Texture.Unit[1]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_tex2_add4, ctx->Texture.Unit[2]._ReallyEnabled, 4 )
+TCL_CHECK( tcl_lighting, ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_lighting_add4, ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_lighting_add4, ctx->_NeedEyeCoords || ctx->Light.Enabled, 4 )
+TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 0 )
+TCL_CHECK( tcl_lit1, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 0 )
+TCL_CHECK( tcl_lit2, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 0 )
+TCL_CHECK( tcl_lit3, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 0 )
+TCL_CHECK( tcl_lit4, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 0 )
+TCL_CHECK( tcl_lit5, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 0 )
+TCL_CHECK( tcl_lit6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 0 )
+TCL_CHECK( tcl_lit7, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 0 )
+TCL_CHECK( tcl_lit0_add6, ctx->Light.Enabled && ctx->Light.Light[0].Enabled, 6 )
+TCL_CHECK( tcl_lit1_add6, ctx->Light.Enabled && ctx->Light.Light[1].Enabled, 6 )
+TCL_CHECK( tcl_lit2_add6, ctx->Light.Enabled && ctx->Light.Light[2].Enabled, 6 )
+TCL_CHECK( tcl_lit3_add6, ctx->Light.Enabled && ctx->Light.Light[3].Enabled, 6 )
+TCL_CHECK( tcl_lit4_add6, ctx->Light.Enabled && ctx->Light.Light[4].Enabled, 6 )
+TCL_CHECK( tcl_lit5_add6, ctx->Light.Enabled && ctx->Light.Light[5].Enabled, 6 )
+TCL_CHECK( tcl_lit6_add6, ctx->Light.Enabled && ctx->Light.Light[6].Enabled, 6 )
+TCL_CHECK( tcl_lit7_add6, ctx->Light.Enabled && ctx->Light.Light[7].Enabled, 6 )
+TCL_CHECK( tcl_ucp0, (ctx->Transform.ClipPlanesEnabled & 0x1), 0 )
+TCL_CHECK( tcl_ucp1, (ctx->Transform.ClipPlanesEnabled & 0x2), 0 )
+TCL_CHECK( tcl_ucp2, (ctx->Transform.ClipPlanesEnabled & 0x4), 0 )
+TCL_CHECK( tcl_ucp3, (ctx->Transform.ClipPlanesEnabled & 0x8), 0 )
+TCL_CHECK( tcl_ucp4, (ctx->Transform.ClipPlanesEnabled & 0x10), 0 )
+TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20), 0 )
+TCL_CHECK( tcl_ucp0_add4, (ctx->Transform.ClipPlanesEnabled & 0x1), 4 )
+TCL_CHECK( tcl_ucp1_add4, (ctx->Transform.ClipPlanesEnabled & 0x2), 4 )
+TCL_CHECK( tcl_ucp2_add4, (ctx->Transform.ClipPlanesEnabled & 0x4), 4 )
+TCL_CHECK( tcl_ucp3_add4, (ctx->Transform.ClipPlanesEnabled & 0x8), 4 )
+TCL_CHECK( tcl_ucp4_add4, (ctx->Transform.ClipPlanesEnabled & 0x10), 4 )
+TCL_CHECK( tcl_ucp5_add4, (ctx->Transform.ClipPlanesEnabled & 0x20), 4 )
+TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 0 )
+TCL_CHECK( tcl_eyespace_or_fog_add4, ctx->_NeedEyeCoords || ctx->Fog.Enabled, 4 )
+
+CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT), 0 )
+
+#define OUT_VEC(hdr, data) do {			\
+    drm_radeon_cmd_header_t h;					\
+    h.i = hdr;								\
+    OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0));		\
+    OUT_BATCH(0);							\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0));		\
+    OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1));	\
+    OUT_BATCH_TABLE((data), h.vectors.count);				\
+  } while(0)
+
+#define OUT_SCL(hdr, data) do {					\
+    drm_radeon_cmd_header_t h;						\
+    h.i = hdr;								\
+    OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0));		\
+    OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \
+    OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1));	\
+    OUT_BATCH_TABLE((data), h.scalars.count);				\
+  } while(0)
+
+static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_SCL(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+
+
+static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[0], atom->cmd+1);
+   END_BATCH();
+}
+
+
+static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1);
+   OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1);
+   END_BATCH();
+}
+
+static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   struct radeon_renderbuffer *rrb;
+   uint32_t cbpitch;
+   uint32_t zbpitch, depth_fmt;
+   uint32_t dwords = atom->check(ctx, atom);
+
+   /* output the first 7 bytes of context */
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 5);
+
+   rrb = radeon_get_depthbuffer(&r100->radeon);
+   if (!rrb) {
+     OUT_BATCH(0);
+     OUT_BATCH(0);
+   } else {
+     zbpitch = (rrb->pitch / rrb->cpp);
+     if (r100->using_hyperz)
+       zbpitch |= RADEON_DEPTH_HYPERZ;
+
+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+     OUT_BATCH(zbpitch);
+     if (rrb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+   }
+     
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(atom->cmd[CTX_CMD_1]);
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+      OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]);
+   } else {
+      atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+      if (rrb->cpp == 4)
+         atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+      else
+         atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+
+      OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+      OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+
+   OUT_BATCH(atom->cmd[CTX_CMD_2]);
+
+   if (!rrb || !rrb->bo) {
+     OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]);
+   } else {
+     cbpitch = (rrb->pitch / rrb->cpp);
+     if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= RADEON_COLOR_TILE_ENABLE;
+     OUT_BATCH(cbpitch);
+   }
+
+   END_BATCH();
+}
+
+static int check_always_ctx( GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t dwords;
+
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      return 0;
+   }
+
+   drb = radeon_get_depthbuffer(&r100->radeon);
+
+   dwords = 10;
+   if (drb)
+     dwords += 6;
+   if (rrb)
+     dwords += 8;
+
+   return dwords;
+}
+
+static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   struct radeon_renderbuffer *rrb, *drb;
+   uint32_t cbpitch = 0;
+   uint32_t zbpitch = 0;
+   uint32_t dwords = atom->check(ctx, atom);
+   uint32_t depth_fmt;
+
+   rrb = radeon_get_colorbuffer(&r100->radeon);
+   if (!rrb || !rrb->bo) {
+      fprintf(stderr, "no rrb\n");
+      return;
+   }
+
+   atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10);
+   if (rrb->cpp == 4)
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888;
+   else switch (rrb->base.Format) {
+   case MESA_FORMAT_RGB565:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565;
+	break;
+   case MESA_FORMAT_ARGB4444:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB4444;
+	break;
+   case MESA_FORMAT_ARGB1555:
+	atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB1555;
+	break;
+   default:
+	_mesa_problem(ctx, "unexpected format in ctx_emit_cs()");
+   }
+
+   cbpitch = (rrb->pitch / rrb->cpp);
+   if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+       cbpitch |= R200_COLOR_TILE_ENABLE;
+
+   drb = radeon_get_depthbuffer(&r100->radeon);
+   if (drb) {
+     zbpitch = (drb->pitch / drb->cpp);
+     if (drb->cpp == 4)
+        depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z;
+     else
+        depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK;
+     atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt;
+     
+   }
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   /* In the CS case we need to split this up */
+   OUT_BATCH(CP_PACKET0(packet[0].start, 3));
+   OUT_BATCH_TABLE((atom->cmd + 1), 4);
+
+   if (drb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0));
+     OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0));
+     OUT_BATCH(zbpitch);
+   }
+
+   OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0));
+   OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1));
+   OUT_BATCH(atom->cmd[CTX_PP_CNTL]);
+   OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]);
+
+   if (rrb) {
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0));
+     OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+     OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
+     OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+   }
+
+   // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) {
+   //   OUT_BATCH_TABLE((atom->cmd + 14), 4);
+   // }
+
+   END_BATCH();
+   BEGIN_BATCH_NO_AUTOSTATE(4);
+   OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
+   OUT_BATCH(0);
+   OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
+   if (rrb) {
+       OUT_BATCH(((rrb->base.Width - 1) << RADEON_RE_WIDTH_SHIFT) |
+                 ((rrb->base.Height - 1) << RADEON_RE_HEIGHT_SHIFT));
+   } else {
+       OUT_BATCH(0);
+   }
+   END_BATCH();
+}
+
+static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+
+   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
+	return;
+
+   if (!t)
+	return;
+
+   if (!t->mt)
+	return;
+
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 3);
+   lvl = &t->mt->levels[0];
+   for (j = 0; j < 5; j++) {
+	OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+			RADEON_GEM_DOMAIN_VRAM, 0, 0);
+   }
+   END_BATCH();
+}
+
+static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->check(ctx, atom);
+   int i = atom->idx, j;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   uint32_t base_reg;
+
+   if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT))
+	return;
+
+   if (!t)
+	return;
+
+   if (!t->mt)
+	return;
+
+   switch(i) {
+	case 1: base_reg = RADEON_PP_CUBIC_OFFSET_T1_0; break;
+	case 2: base_reg = RADEON_PP_CUBIC_OFFSET_T2_0; break;
+	default:
+	case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break;
+   };
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+   OUT_BATCH_TABLE(atom->cmd, 2);
+   lvl = &t->mt->levels[0];
+   for (j = 0; j < 5; j++) {
+	OUT_BATCH(CP_PACKET0(base_reg + (4 * j), 0));
+	OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset,
+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+   }
+   END_BATCH();
+}
+
+static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->cmd_size;
+   int i = atom->idx;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+
+   if (t && t->mt && !t->image_override)
+     dwords += 2;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   OUT_BATCH_TABLE(atom->cmd, 3);
+   if (t && t->mt && !t->image_override) {
+     if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
+   	lvl = &t->mt->levels[0];
+	OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     } else {
+        OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+     }
+   } else if (!t) {
+     /* workaround for old CS mechanism */
+     OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]);
+     //     OUT_BATCH(r100->radeon.radeonScreen);
+   } else {
+     OUT_BATCH(t->override_offset);
+   }
+
+   OUT_BATCH_TABLE((atom->cmd+4), 5);
+   END_BATCH();
+}
+
+static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+   r100ContextPtr r100 = R100_CONTEXT(ctx);
+   BATCH_LOCALS(&r100->radeon);
+   uint32_t dwords = atom->cmd_size;
+   int i = atom->idx;
+   radeonTexObj *t = r100->state.texture.unit[i].texobj;
+   radeon_mipmap_level *lvl;
+   int hastexture = 1;
+
+   if (!t)
+	hastexture = 0;
+   else {
+	if (!t->mt && !t->bo)
+		hastexture = 0;
+   }
+   dwords += 1;
+   if (hastexture)
+     dwords += 2;
+   else
+     dwords -= 2;
+   BEGIN_BATCH_NO_AUTOSTATE(dwords);
+
+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXFILTER_0 + (24 * i), 1));
+   OUT_BATCH_TABLE((atom->cmd + 1), 2);
+
+   if (hastexture) {
+     OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0));
+     if (t->mt && !t->image_override) {
+        if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) {
+            lvl = &t->mt->levels[t->minLod];
+	    OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset,
+			RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+        } else {
+           OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
+		     RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+        }
+      } else {
+	if (t->bo)
+            OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+                            RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+      }
+   }
+
+   OUT_BATCH(CP_PACKET0(RADEON_PP_TXCBLEND_0 + (i * 24), 1));
+   OUT_BATCH_TABLE((atom->cmd+4), 2);
+   OUT_BATCH(CP_PACKET0(RADEON_PP_BORDER_COLOR_0 + (i * 4), 0));
+   OUT_BATCH((atom->cmd[TEX_PP_BORDER_COLOR]));
+   END_BATCH();
+}
+
+/* Initialize the context's hardware state.
+ */
+void radeonInitState( r100ContextPtr rmesa )
+{
+   GLcontext *ctx = rmesa->radeon.glCtx;
+   GLuint i;
+
+   rmesa->radeon.state.color.clear = 0x00000000;
+
+   switch ( ctx->Visual.depthBits ) {
+   case 16:
+      rmesa->radeon.state.depth.clear = 0x0000ffff;
+      rmesa->radeon.state.stencil.clear = 0x00000000;
+      break;
+   case 24:
+      rmesa->radeon.state.depth.clear = 0x00ffffff;
+      rmesa->radeon.state.stencil.clear = 0xffff0000;
+      break;
+   default:
+      break;
+   }
+
+   rmesa->radeon.Fallback = 0;
+
+
+   rmesa->radeon.hw.max_state_size = 0;
+
+#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX )		\
+   do {								\
+      rmesa->hw.ATOM.cmd_size = SZ;				\
+      rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \
+      rmesa->hw.ATOM.name = NM;						\
+      rmesa->hw.ATOM.is_tcl = FLAG;					\
+      rmesa->hw.ATOM.check = check_##CHK;				\
+      rmesa->hw.ATOM.dirty = GL_TRUE;					\
+      rmesa->hw.ATOM.idx = IDX;					\
+      rmesa->radeon.hw.max_state_size += SZ * sizeof(int);		\
+   } while (0)
+
+#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG )		\
+   ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0)
+
+   /* Allocate state buffers:
+    */
+   ALLOC_STATE( ctx, always_add4, CTX_STATE_SIZE, "CTX/context", 0 );
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+     rmesa->hw.ctx.emit = ctx_emit_cs;
+     rmesa->hw.ctx.check = check_always_ctx;
+   } else
+     rmesa->hw.ctx.emit = ctx_emit;
+   ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 );
+   ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 );
+   ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 );
+   ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 );
+   ALLOC_STATE( msc, always, MSC_STATE_SIZE, "MSC/misc", 0 );
+   ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 );
+   ALLOC_STATE( tcl, always, TCL_STATE_SIZE, "TCL/tcl", 1 );
+   ALLOC_STATE( mtl, tcl_lighting, MTL_STATE_SIZE, "MTL/material", 1 );
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      ALLOC_STATE( grd, always_add2, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+      ALLOC_STATE( fog, fog_add4, FOG_STATE_SIZE, "FOG/fog", 1 );
+      ALLOC_STATE( glt, tcl_lighting_add4, GLT_STATE_SIZE, "GLT/light-global", 1 );
+      ALLOC_STATE( eye, tcl_lighting_add4, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+      ALLOC_STATE_IDX( tex[0], tex0_mm, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+      ALLOC_STATE_IDX( tex[1], tex1_mm, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+      ALLOC_STATE_IDX( tex[2], tex2_mm, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+      ALLOC_STATE( mat[0], tcl_add4, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+      ALLOC_STATE( mat[1], tcl_eyespace_or_fog_add4, MAT_STATE_SIZE, "MAT/modelview", 1 );
+      ALLOC_STATE( mat[2], tcl_eyespace_or_lighting_add4, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+      ALLOC_STATE( mat[3], tcl_tex0_add4, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+      ALLOC_STATE( mat[4], tcl_tex1_add4, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+      ALLOC_STATE( mat[5], tcl_tex2_add4, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+      ALLOC_STATE( lit[0], tcl_lit0_add6, LIT_STATE_SIZE, "LIT/light-0", 1 );
+      ALLOC_STATE( lit[1], tcl_lit1_add6, LIT_STATE_SIZE, "LIT/light-1", 1 );
+      ALLOC_STATE( lit[2], tcl_lit2_add6, LIT_STATE_SIZE, "LIT/light-2", 1 );
+      ALLOC_STATE( lit[3], tcl_lit3_add6, LIT_STATE_SIZE, "LIT/light-3", 1 );
+      ALLOC_STATE( lit[4], tcl_lit4_add6, LIT_STATE_SIZE, "LIT/light-4", 1 );
+      ALLOC_STATE( lit[5], tcl_lit5_add6, LIT_STATE_SIZE, "LIT/light-5", 1 );
+      ALLOC_STATE( lit[6], tcl_lit6_add6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+      ALLOC_STATE( lit[7], tcl_lit7_add6, LIT_STATE_SIZE, "LIT/light-7", 1 );
+      ALLOC_STATE( ucp[0], tcl_ucp0_add4, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+      ALLOC_STATE( ucp[1], tcl_ucp1_add4, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+      ALLOC_STATE( ucp[2], tcl_ucp2_add4, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+      ALLOC_STATE( ucp[3], tcl_ucp3_add4, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+      ALLOC_STATE( ucp[4], tcl_ucp4_add4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+      ALLOC_STATE( ucp[5], tcl_ucp5_add4, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   } else {
+      ALLOC_STATE( grd, always, GRD_STATE_SIZE, "GRD/guard-band", 1 );
+      ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 );
+      ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 );
+      ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
+      ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0);
+      ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1);
+      ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2);
+      ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
+      ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
+      ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
+      ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
+      ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+      ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
+      ALLOC_STATE( lit[0], tcl_lit0, LIT_STATE_SIZE, "LIT/light-0", 1 );
+      ALLOC_STATE( lit[1], tcl_lit1, LIT_STATE_SIZE, "LIT/light-1", 1 );
+      ALLOC_STATE( lit[2], tcl_lit2, LIT_STATE_SIZE, "LIT/light-2", 1 );
+      ALLOC_STATE( lit[3], tcl_lit3, LIT_STATE_SIZE, "LIT/light-3", 1 );
+      ALLOC_STATE( lit[4], tcl_lit4, LIT_STATE_SIZE, "LIT/light-4", 1 );
+      ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
+      ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
+      ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
+      ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
+      ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
+      ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
+      ALLOC_STATE( ucp[3], tcl_ucp3, UCP_STATE_SIZE, "UCP/userclip-3", 1 );
+      ALLOC_STATE( ucp[4], tcl_ucp4, UCP_STATE_SIZE, "UCP/userclip-4", 1 );
+      ALLOC_STATE( ucp[5], tcl_ucp5, UCP_STATE_SIZE, "UCP/userclip-5", 1 );
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+       ALLOC_STATE( stp, always, STP_STATE_SIZE, "STP/stp", 0 );
+   }
+   
+   for (i = 0; i < 3; i++) {
+      if (rmesa->radeon.radeonScreen->kernel_mm)
+          rmesa->hw.tex[i].emit = tex_emit_cs;
+      else
+          rmesa->hw.tex[i].emit = tex_emit;
+   }
+   if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100)
+   {
+      if (rmesa->radeon.radeonScreen->kernel_mm) {
+         ALLOC_STATE_IDX( cube[0], cube0_mm, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+         ALLOC_STATE_IDX( cube[1], cube1_mm, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+         ALLOC_STATE_IDX( cube[2], cube2_mm, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+         for (i = 0; i < 3; i++)
+            rmesa->hw.cube[i].emit = cube_emit_cs;
+      } else {
+         ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+         ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+         ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+         for (i = 0; i < 3; i++)
+            rmesa->hw.cube[i].emit = cube_emit;
+      }
+   }
+   else
+   {
+      ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 );
+      ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 );
+      ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 );
+   }
+   ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 );
+   ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 );
+   ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 );
+
+   radeonSetUpAtomList( rmesa );
+
+   /* Fill in the packet headers:
+    */
+   rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC);
+   rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL);
+   rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH);
+   rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN);
+   rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH);
+   rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK);
+   rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE);
+   rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL);
+   rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS);
+   rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC);
+   rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0);
+   rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0);
+   rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1);
+   rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2);
+   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0);
+   rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1);
+   rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2);
+   rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2);
+   rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR);
+   rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
+   rmesa->hw.mtl.cmd[MTL_CMD_0] = 
+      cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
+   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0);
+   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1);
+   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2);
+   rmesa->hw.grd.cmd[GRD_CMD_0] = 
+      cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
+   rmesa->hw.fog.cmd[FOG_CMD_0] = 
+      cmdvec( RADEON_VS_FOG_PARAM_ADDR, 1, 4 );
+   rmesa->hw.glt.cmd[GLT_CMD_0] = 
+      cmdvec( RADEON_VS_GLOBAL_AMBIENT_ADDR, 1, 4 );
+   rmesa->hw.eye.cmd[EYE_CMD_0] = 
+      cmdvec( RADEON_VS_EYE_VECTOR_ADDR, 1, 4 );
+
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.mat[i].cmd[MAT_CMD_0] = 
+	 cmdvec( RADEON_VS_MATRIX_0_ADDR + i*4, 1, 16);
+   }
+
+   for (i = 0 ; i < 8; i++) {
+      rmesa->hw.lit[i].cmd[LIT_CMD_0] = 
+	 cmdvec( RADEON_VS_LIGHT_AMBIENT_ADDR + i, 8, 24 );
+      rmesa->hw.lit[i].cmd[LIT_CMD_1] = 
+	 cmdscl( RADEON_SS_LIGHT_DCD_ADDR + i, 8, 6 );
+   }
+
+   for (i = 0 ; i < 6; i++) {
+      rmesa->hw.ucp[i].cmd[UCP_CMD_0] = 
+	 cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 );
+   }
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      rmesa->hw.stp.cmd[STP_CMD_0] = CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0);
+      rmesa->hw.stp.cmd[STP_DATA_0] = 0;
+      rmesa->hw.stp.cmd[STP_CMD_1] = CP_PACKET0_ONE(RADEON_RE_STIPPLE_DATA, 31);
+
+      rmesa->hw.grd.emit = scl_emit;
+      rmesa->hw.fog.emit = vec_emit;
+      rmesa->hw.glt.emit = vec_emit;
+      rmesa->hw.eye.emit = vec_emit;
+      
+      for (i = 0; i < 6; i++)
+	 rmesa->hw.mat[i].emit = vec_emit;
+
+      for (i = 0; i < 8; i++)
+	 rmesa->hw.lit[i].emit = lit_emit;
+
+      for (i = 0; i < 6; i++)
+	 rmesa->hw.ucp[i].emit = vec_emit;
+   }
+
+   rmesa->last_ReallyEnabled = -1;
+
+   /* Initial Harware state:
+    */
+   rmesa->hw.ctx.cmd[CTX_PP_MISC] = (RADEON_ALPHA_TEST_PASS |
+				     RADEON_CHROMA_FUNC_FAIL |
+				     RADEON_CHROMA_KEY_NEAREST |
+				     RADEON_SHADOW_FUNC_EQUAL |
+				     RADEON_SHADOW_PASS_1 /*|
+				     RADEON_RIGHT_HAND_CUBE_OGL */);
+
+   rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] = (RADEON_FOG_VERTEX |
+					  /* this bit unused for vertex fog */
+					  RADEON_FOG_USE_DEPTH);
+
+   rmesa->hw.ctx.cmd[CTX_RE_SOLID_COLOR] = 0x00000000;
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = (RADEON_COMB_FCN_ADD_CLAMP |
+					    RADEON_SRC_BLEND_GL_ONE |
+					    RADEON_DST_BLEND_GL_ZERO );
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS |
+					       RADEON_STENCIL_TEST_ALWAYS |
+					       RADEON_STENCIL_FAIL_KEEP |
+					       RADEON_STENCIL_ZPASS_KEEP |
+					       RADEON_STENCIL_ZFAIL_KEEP |
+					       RADEON_Z_WRITE_ENABLE);
+
+   if (rmesa->using_hyperz) {
+       rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE |
+						   RADEON_Z_DECOMPRESSION_ENABLE;
+      if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+	 /* works for q3, but slight rendering errors with glxgears ? */
+/*	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/
+	 /* need this otherwise get lots of lockups with q3 ??? */
+	 rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_FORCE_Z_DIRTY;
+      } 
+   }
+
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (RADEON_SCISSOR_ENABLE |
+				     RADEON_ANTI_ALIAS_NONE);
+
+   rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE |
+				       RADEON_ZBLOCK16);
+
+   switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) {
+   case DRI_CONF_DITHER_XERRORDIFFRESET:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT;
+      break;
+   case DRI_CONF_DITHER_ORDERED:
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE;
+      break;
+   }
+   if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) ==
+	DRI_CONF_ROUND_ROUND )
+      rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE;
+   else
+      rmesa->radeon.state.color.roundEnable = 0;
+   if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) ==
+	DRI_CONF_COLOR_REDUCTION_DITHER )
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE;
+   else
+      rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable;
+
+
+   rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW |
+				     RADEON_BFACE_SOLID |
+				     RADEON_FFACE_SOLID |
+/*  			     RADEON_BADVTX_CULL_DISABLE | */
+				     RADEON_FLAT_SHADE_VTX_LAST |
+				     RADEON_DIFFUSE_SHADE_GOURAUD |
+				     RADEON_ALPHA_SHADE_GOURAUD |
+				     RADEON_SPECULAR_SHADE_GOURAUD |
+				     RADEON_FOG_SHADE_GOURAUD |
+				     RADEON_VPORT_XY_XFORM_ENABLE |
+				     RADEON_VPORT_Z_XFORM_ENABLE |
+				     RADEON_VTX_PIX_CENTER_OGL |
+				     RADEON_ROUND_MODE_TRUNC |
+				     RADEON_ROUND_PREC_8TH_PIX);
+
+   rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] =
+#ifdef MESA_BIG_ENDIAN
+					    RADEON_VC_32BIT_SWAP;
+#else
+  					    RADEON_VC_NO_SWAP;
+#endif
+
+   if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
+     rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS;
+   }
+
+   rmesa->hw.set.cmd[SET_SE_COORDFMT] = (
+      RADEON_VTX_W0_IS_NOT_1_OVER_W0 |
+      RADEON_TEX1_W_ROUTING_USE_Q1);
+
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((1 << 16) | 0xffff);
+
+   rmesa->hw.lin.cmd[LIN_RE_LINE_STATE] = 
+      ((0 << RADEON_LINE_CURRENT_PTR_SHIFT) |
+       (1 << RADEON_LINE_CURRENT_COUNT_SHIFT));
+
+   rmesa->hw.lin.cmd[LIN_SE_LINE_WIDTH] = (1 << 4);
+
+   rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] = 
+      ((0x00 << RADEON_STENCIL_REF_SHIFT) |
+       (0xff << RADEON_STENCIL_MASK_SHIFT) |
+       (0xff << RADEON_STENCIL_WRITEMASK_SHIFT));
+
+   rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = RADEON_ROP_COPY;
+   rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] = 0xffffffff;
+
+   rmesa->hw.msc.cmd[MSC_RE_MISC] = 
+      ((0 << RADEON_STIPPLE_X_OFFSET_SHIFT) |
+       (0 << RADEON_STIPPLE_Y_OFFSET_SHIFT) |
+       RADEON_STIPPLE_BIG_BIT_ORDER);
+
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_XOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_YOFFSET] = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZSCALE]  = 0x00000000;
+   rmesa->hw.vpt.cmd[VPT_SE_VPORT_ZOFFSET] = 0x00000000;
+
+   for ( i = 0 ; i < ctx->Const.MaxTextureUnits ; i++ ) {
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFILTER] = RADEON_BORDER_MODE_OGL;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT] = 
+	  (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+	   RADEON_TXFORMAT_PERSPECTIVE_ENABLE |
+	   (i << 24) | /* This is one of RADEON_TXFORMAT_ST_ROUTE_STQ[012] */
+	   (2 << RADEON_TXFORMAT_WIDTH_SHIFT) |
+	   (2 << RADEON_TXFORMAT_HEIGHT_SHIFT));
+
+      /* Initialize the texture offset to the start of the card texture heap */
+      //      rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] =
+      //	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+
+      rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0;
+      rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] =  
+	  (RADEON_COLOR_ARG_A_ZERO |
+	   RADEON_COLOR_ARG_B_ZERO |
+	   RADEON_COLOR_ARG_C_CURRENT_COLOR |
+	   RADEON_BLEND_CTL_ADD |
+	   RADEON_SCALE_1X |
+	   RADEON_CLAMP_TX);
+      rmesa->hw.tex[i].cmd[TEX_PP_TXABLEND] = 
+	  (RADEON_ALPHA_ARG_A_ZERO |
+	   RADEON_ALPHA_ARG_B_ZERO |
+	   RADEON_ALPHA_ARG_C_CURRENT_ALPHA |
+	   RADEON_BLEND_CTL_ADD |
+	   RADEON_SCALE_1X |
+	   RADEON_CLAMP_TX);
+      rmesa->hw.tex[i].cmd[TEX_PP_TFACTOR] = 0;
+
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0;
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] =
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] =
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] =
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] =
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+      rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] =
+	  rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP];
+   }
+
+   /* Can only add ST1 at the time of doing some multitex but can keep
+    * it after that.  Errors if DIFFUSE is missing.
+    */
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = 
+      (RADEON_TCL_VTX_Z0 |
+       RADEON_TCL_VTX_W0 |
+       RADEON_TCL_VTX_PK_DIFFUSE
+	 );	/* need to keep this uptodate */
+						   
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL] =
+      ( RADEON_TCL_COMPUTE_XYZW 	|
+	(RADEON_TCL_TEX_INPUT_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+	(RADEON_TCL_TEX_INPUT_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
+
+
+   /* XXX */
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_0] = 
+      ((MODEL << RADEON_MODELVIEW_0_SHIFT) |
+       (MODEL_IT << RADEON_IT_MODELVIEW_0_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_1] = 
+      ((MODEL_PROJ << RADEON_MODELPROJECT_0_SHIFT) |
+       (TEXMAT_0 << RADEON_TEXMAT_0_SHIFT) |
+       (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT) |
+       (TEXMAT_2 << RADEON_TEXMAT_2_SHIFT));
+
+   rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = 
+      (RADEON_UCP_IN_CLIP_SPACE |
+       RADEON_CULL_FRONT_IS_CCW);
+
+   rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] = 0; 
+
+   rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = 
+      (RADEON_SPECULAR_LIGHTS |
+       RADEON_DIFFUSE_SPECULAR_COMBINE |
+       RADEON_LOCAL_LIGHT_VEC_GL |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_EMISSIVE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) |
+       (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT));
+
+   for (i = 0 ; i < 8; i++) {
+      struct gl_light *l = &ctx->Light.Light[i];
+      GLenum p = GL_LIGHT0 + i;
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_RANGE_CUTOFF]) = FLT_MAX;
+
+      ctx->Driver.Lightfv( ctx, p, GL_AMBIENT, l->Ambient );
+      ctx->Driver.Lightfv( ctx, p, GL_DIFFUSE, l->Diffuse );
+      ctx->Driver.Lightfv( ctx, p, GL_SPECULAR, l->Specular );
+      ctx->Driver.Lightfv( ctx, p, GL_POSITION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_DIRECTION, NULL );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_EXPONENT, &l->SpotExponent );
+      ctx->Driver.Lightfv( ctx, p, GL_SPOT_CUTOFF, &l->SpotCutoff );
+      ctx->Driver.Lightfv( ctx, p, GL_CONSTANT_ATTENUATION,
+			   &l->ConstantAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_LINEAR_ATTENUATION, 
+			   &l->LinearAttenuation );
+      ctx->Driver.Lightfv( ctx, p, GL_QUADRATIC_ATTENUATION, 
+		     &l->QuadraticAttenuation );
+      *(float *)&(rmesa->hw.lit[i].cmd[LIT_ATTEN_XXX]) = 0.0;
+   }
+
+   ctx->Driver.LightModelfv( ctx, GL_LIGHT_MODEL_AMBIENT, 
+			     ctx->Light.Model.Ambient );
+
+   TNL_CONTEXT(ctx)->Driver.NotifyMaterialChange( ctx );
+
+   for (i = 0 ; i < 6; i++) {
+      ctx->Driver.ClipPlane( ctx, GL_CLIP_PLANE0 + i, NULL );
+   }
+
+   ctx->Driver.Fogfv( ctx, GL_FOG_MODE, NULL );
+   ctx->Driver.Fogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   ctx->Driver.Fogfv( ctx, GL_FOG_START, &ctx->Fog.Start );
+   ctx->Driver.Fogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COLOR, ctx->Fog.Color );
+   ctx->Driver.Fogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
+   
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_VERT_GUARD_DISCARD_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_CLIP_ADJ] = IEEE_ONE;
+   rmesa->hw.grd.cmd[GRD_HORZ_GUARD_DISCARD_ADJ] = IEEE_ONE;
+
+   rmesa->hw.eye.cmd[EYE_X] = 0;
+   rmesa->hw.eye.cmd[EYE_Y] = 0;
+   rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE;
+   rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE;
+
+   if (rmesa->radeon.radeonScreen->kernel_mm) {
+      radeon_init_query_stateobj(&rmesa->radeon, R100_QUERYOBJ_CMDSIZE);
+      rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_CMD_0] = CP_PACKET0(RADEON_RB3D_ZPASS_DATA, 0);
+      rmesa->radeon.query.queryobj.cmd[R100_QUERYOBJ_DATA_0] = 0;
+   }
+     
+   rmesa->radeon.hw.all_dirty = GL_TRUE;
+
+   rcommonInitCmdBuf(&rmesa->radeon);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
new file mode 100644
index 0000000000..f2fcb46688
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -0,0 +1,857 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/enums.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/simple_list.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_state.h"
+#include "radeon_swtcl.h"
+#include "radeon_tcl.h"
+#include "radeon_debug.h"
+
+
+/* R100: xyzw, c0, c1/fog, stq[0..2]  = 4+1+1+3*3 = 15  right? */
+/* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */
+#define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat))	/* for mesa _tnl stage */
+
+/***********************************************************************
+ *                         Initialization 
+ ***********************************************************************/
+
+#define EMIT_ATTR( ATTR, STYLE, F0 )					\
+do {									\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR);	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE);	\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
+   fmt_0 |= F0;								\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0;		\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD;	\
+   rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N);		\
+   rmesa->radeon.swtcl.vertex_attr_count++;					\
+} while (0)
+
+static GLuint radeon_cp_vc_frmts[3][2] =
+{
+   { RADEON_CP_VC_FRMT_ST0, RADEON_CP_VC_FRMT_ST0 | RADEON_CP_VC_FRMT_Q0 },
+   { RADEON_CP_VC_FRMT_ST1, RADEON_CP_VC_FRMT_ST1 | RADEON_CP_VC_FRMT_Q1 },
+   { RADEON_CP_VC_FRMT_ST2, RADEON_CP_VC_FRMT_ST2 | RADEON_CP_VC_FRMT_Q2 },
+};
+
+static void radeonSetVertexFormat( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   int fmt_0 = 0;
+   int offset = 0;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   /* Important:
+    */
+   if ( VB->NdcPtr != NULL ) {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   }
+   else {
+      VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+   }
+
+   assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
+   rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if ( !rmesa->swtcl.needproj ||
+        RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {	/* for projtex */
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F, 
+		 RADEON_CP_VC_FRMT_XY |	RADEON_CP_VC_FRMT_Z | RADEON_CP_VC_FRMT_W0 );
+      offset = 4;
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F, 
+		 RADEON_CP_VC_FRMT_XY |	RADEON_CP_VC_FRMT_Z );
+      offset = 3;
+   }
+
+   rmesa->swtcl.coloroffset = offset;
+#if MESA_LITTLE_ENDIAN 
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, 
+	      RADEON_CP_VC_FRMT_PKCOLOR );
+#else
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR,
+	      RADEON_CP_VC_FRMT_PKCOLOR );
+#endif
+   offset += 1;
+
+   rmesa->swtcl.specoffset = 0;
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+
+#if MESA_LITTLE_ENDIAN 
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 rmesa->swtcl.specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 3 );
+      }
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 1 );
+      }
+#else
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 1 );
+      }
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 rmesa->swtcl.specoffset = offset;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR,
+	 	    RADEON_CP_VC_FRMT_PKSPEC );
+      }
+      else {
+	 EMIT_PAD( 3 );
+      }
+#endif
+   }
+
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+      int i;
+
+      for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+	 if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+	    GLuint sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
+
+	    switch (sz) {
+	    case 1:
+	    case 2:
+	       EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_2F,
+			  radeon_cp_vc_frmts[i][0] );
+	       break;
+	    case 3:
+	    case 4:
+	       if (ctx->Texture.Unit[i]._ReallyEnabled & (TEXTURE_CUBE_BIT) ) {
+		  EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F,
+			     radeon_cp_vc_frmts[i][1] );
+	       } else {
+		  EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_3F_XYW,
+			     radeon_cp_vc_frmts[i][1] );
+	       }
+	       break;
+	    default:
+	       continue;
+	    };
+	 }
+      }
+   }
+
+   if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) ||
+	fmt_0 != rmesa->swtcl.vertex_format) {
+      RADEON_NEWPRIM(rmesa);
+      rmesa->swtcl.vertex_format = fmt_0;
+      rmesa->radeon.swtcl.vertex_size =
+	  _tnl_install_attrs( ctx,
+			      rmesa->radeon.swtcl.vertex_attrs, 
+			      rmesa->radeon.swtcl.vertex_attr_count,
+			      NULL, 0 );
+      rmesa->radeon.swtcl.vertex_size /= 4;
+      RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset );
+      radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+	  "%s: vertex_size= %d floats\n",  __FUNCTION__, rmesa->radeon.swtcl.vertex_size);
+   }
+}
+
+static void radeon_predict_emit_size( r100ContextPtr rmesa )
+{
+
+    if (!rmesa->radeon.swtcl.emit_prediction) {
+        const int state_size = radeonCountStateEmitSize( &rmesa->radeon );
+        const int scissor_size = 8;
+        const int prims_size = 8;
+        const int vertex_size = 7;
+
+        if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+                    state_size +
+                    (scissor_size + prims_size + vertex_size),
+                    __FUNCTION__))
+            rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize( &rmesa->radeon );
+        else
+            rmesa->radeon.swtcl.emit_prediction = state_size;
+        rmesa->radeon.swtcl.emit_prediction += scissor_size + prims_size + vertex_size
+            + rmesa->radeon.cmdbuf.cs->cdw;
+    }
+}
+
+static void radeonRenderStart( GLcontext *ctx )
+{
+    r100ContextPtr rmesa = R100_CONTEXT( ctx );
+
+    radeonSetVertexFormat( ctx );
+
+    if (rmesa->radeon.dma.flush != 0 &&
+            rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim)
+        rmesa->radeon.dma.flush( ctx );
+}
+
+
+/**
+ * Set vertex state for SW TCL.  The primary purpose of this function is to
+ * determine in advance whether or not the hardware can / should do the
+ * projection divide or Mesa should do it.
+ */
+void radeonChooseVertexState( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+   
+   se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+		     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+		     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
+
+   /* We must ensure that we don't do _tnl_need_projected_coords while in a
+    * rasterization fallback.  As this function will be called again when we
+    * leave a rasterization fallback, we can just skip it for now.
+    */
+   if (rmesa->radeon.Fallback != 0)
+      return;
+
+   /* HW perspective divide is a win, but tiny vertex formats are a
+    * bigger one.
+    */
+
+   if ((!RENDERINPUTS_TEST_RANGE( tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) &&
+       !RENDERINPUTS_TEST( tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 ))
+       || (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+      rmesa->swtcl.needproj = GL_TRUE;
+      se_coord_fmt |= (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+		      RADEON_VTX_Z_PRE_MULT_1_OVER_W0);
+   }
+   else {
+      rmesa->swtcl.needproj = GL_FALSE;
+      se_coord_fmt |= (RADEON_VTX_W0_IS_NOT_1_OVER_W0);
+   }
+
+   _tnl_need_projected_coords( ctx, rmesa->swtcl.needproj );
+
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+   }
+}
+
+void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+
+
+   radeonEmitState(&rmesa->radeon);
+   radeonEmitVertexAOS( rmesa,
+			rmesa->radeon.swtcl.vertex_size,
+			rmesa->radeon.swtcl.bo,
+			current_offset);
+
+		      
+   radeonEmitVbufPrim( rmesa,
+		       rmesa->swtcl.vertex_format,
+		       rmesa->radeon.swtcl.hw_primitive,
+		       rmesa->radeon.swtcl.numverts);
+   if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+     WARN_ONCE("Rendering was %d commands larger than predicted size."
+	 " We might overflow  command buffer.\n",
+	 rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+
+
+   rmesa->radeon.swtcl.emit_prediction = 0;
+
+}
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    0
+/* \todo: is it possible to make "ELTS" work with t_vertex code ? */
+#define HAVE_ELTS        0
+
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   0,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN,
+   0,
+   0,
+   0
+};
+
+static INLINE void
+radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim )
+{
+   RADEON_NEWPRIM( rmesa );
+   rmesa->radeon.swtcl.hw_primitive = hw_prim[prim];
+   //   assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start);
+}
+
+static void* radeon_alloc_verts( r100ContextPtr rmesa , GLuint nr, GLuint size )
+{
+   void *rv;
+   do {
+     radeon_predict_emit_size( rmesa );
+     rv = rcommonAllocDmaLowVerts( &rmesa->radeon, nr, size );
+   } while (!rv);
+   return rv;
+}
+
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+#define INIT( prim ) radeonDmaPrimitive( rmesa, prim )
+#define FLUSH()  RADEON_NEWPRIM( rmesa )
+#define GET_CURRENT_VB_MAX_VERTS()					10\
+//  (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+  ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4))
+#define ALLOC_VERTS( nr ) radeon_alloc_verts( rmesa, nr, rmesa->radeon.swtcl.vertex_size * 4 )
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+  _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf)
+
+#define TAG(x) radeon_dma_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+static GLboolean radeon_run_render( GLcontext *ctx,
+				    struct tnl_pipeline_stage *stage )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   tnl_render_func *tab = TAG(render_tab_verts);
+   GLuint i;
+
+   if (rmesa->radeon.swtcl.RenderIndex != 0 ||   
+       !radeon_dma_validate_render( ctx, VB ))
+      return GL_TRUE;		
+
+   tnl->Driver.Render.Start( ctx );
+
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = VB->Primitive[i].mode;
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      radeon_print(RADEON_SWRENDER, RADEON_NORMAL,
+	  "radeon_render.c: prim %s %d..%d\n",
+		 _mesa_lookup_enum_by_nr(prim & PRIM_MODE_MASK), 
+		 start, start+length);
+
+      if (length)
+	 tab[prim & PRIM_MODE_MASK]( ctx, start, start + length, prim );
+   }
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+const struct tnl_pipeline_stage _radeon_render_stage =
+{
+   "radeon render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   radeon_run_render		/* run */
+};
+
+
+/**************************************************************************/
+
+
+static const GLuint reduced_hw_prim[GL_POLYGON+1] = {
+   RADEON_CP_VC_CNTL_PRIM_TYPE_POINT,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_LINE,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST,
+   RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+};
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim );
+static void radeonResetLineStipple( GLcontext *ctx );
+
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#undef LOCAL_VARS
+#undef ALLOC_VERTS
+#define CTX_ARG r100ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) radeon_alloc_verts( rmesa, n, (size) * 4 )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;
+#define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int)))
+#define VERTEX radeonVertex 
+#undef TAG
+#define TAG(x) radeon_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) radeon_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c )     radeon_triangle( rmesa, a, b, c )
+#define LINE( a, b )       radeon_line( rmesa, a, b )
+#define POINT( a )         radeon_point( rmesa, a )
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define RADEON_TWOSIDE_BIT	0x01
+#define RADEON_UNFILLED_BIT	0x02
+#define RADEON_MAX_TRIFUNC	0x04
+
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[RADEON_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK  0
+#define DO_OFFSET    0
+#define DO_UNFILLED (IND & RADEON_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & RADEON_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )  					\
+do {								\
+   radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v, c )					\
+do {								\
+   if (specoffset) {						\
+      radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);	\
+   }								\
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )			\
+do {							\
+   if (specoffset) {					\
+      radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]);	\
+      radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]);	\
+      spec0->red   = spec1->red;	\
+      spec0->green = spec1->green;	\
+      spec0->blue  = spec1->blue; 	\
+   }							\
+} while (0)
+
+/* These don't need LE32_TO_CPU() as they used to save and restore
+ * colors which are already in the correct format.
+ */
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+
+#define LOCAL_VARS(n)							\
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);			\
+   GLuint color[n] = {0}, spec[n] = {0};						\
+   GLuint coloroffset = rmesa->swtcl.coloroffset;	\
+   GLuint specoffset = rmesa->swtcl.specoffset;			\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] )
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (RADEON_TWOSIDE_BIT|RADEON_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_twoside();
+   init_unfilled();
+   init_twoside_unfilled();
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      radeon_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   radeon_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   radeon_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   radeon_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#undef INIT
+#define INIT(x) do {					\
+   radeonRenderPrimitive( ctx, x );			\
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);		\
+   const GLuint vertsize = rmesa->radeon.swtcl.vertex_size;		\
+   const char *radeonverts = (char *)rmesa->radeon.swtcl.verts;		\
+   const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE	if ( stipple ) radeonResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) radeon_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) radeon_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+void radeonChooseRenderState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint index = 0;
+   GLuint flags = ctx->_TriangleCaps;
+
+   if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) 
+      return;
+
+   if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT;
+   if (flags & DD_TRI_UNFILLED)      index |= RADEON_UNFILLED_BIT;
+
+   if (index != rmesa->radeon.swtcl.RenderIndex) {
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = radeon_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = radeon_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = radeon_fast_clipped_poly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+
+      rmesa->radeon.swtcl.RenderIndex = index;
+   }
+}
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->radeon.swtcl.hw_primitive = hwprim;
+   }
+}
+
+static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   rmesa->radeon.swtcl.render_primitive = prim;
+   if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) 
+      radeonRasterPrimitive( ctx, reduced_hw_prim[prim] );
+}
+
+static void radeonRenderFinish( GLcontext *ctx )
+{
+}
+
+static void radeonResetLineStipple( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   RADEON_STATECHANGE( rmesa, lin );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "glBlendFunc",
+   "RADEON_NO_RAST",
+   "Mixing GL_CLAMP_TO_BORDER and GL_CLAMP (or GL_MIRROR_CLAMP_ATI)"
+};
+
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.Fallback;
+
+   if (mode) {
+      rmesa->radeon.Fallback |= bit;
+      if (oldfallback == 0) {
+	 radeon_firevertices(&rmesa->radeon);
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE );
+	 _swsetup_Wakeup( ctx );
+	 rmesa->radeon.swtcl.RenderIndex = ~0;
+         if (RADEON_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      rmesa->radeon.Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = radeonRenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+	 tnl->Driver.Render.Finish = radeonRenderFinish;
+
+	 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	 tnl->Driver.Render.Interp = _tnl_interp;
+
+	 tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+	 TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE );
+	 if (rmesa->radeon.TclFallback) {
+	    /* These are already done if rmesa->radeon.TclFallback goes to
+	     * zero above. But not if it doesn't (RADEON_NO_TCL for
+	     * example?)
+	     */
+	    _tnl_invalidate_vertex_state( ctx, ~0 );
+	    _tnl_invalidate_vertices( ctx, ~0 );
+	    RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset );
+	    radeonChooseVertexState( ctx );
+	    radeonChooseRenderState( ctx );
+	 }
+         if (RADEON_DEBUG & RADEON_FALLBACKS) {
+            fprintf(stderr, "Radeon end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void radeonInitSwtcl( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+   rmesa->radeon.swtcl.emit_prediction = 0;
+
+   tnl->Driver.Render.Start = radeonRenderStart;
+   tnl->Driver.Render.Finish = radeonRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = radeonRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple;
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+		       RADEON_MAX_TNL_VERTEX_SIZE);
+   
+   rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+   rmesa->radeon.swtcl.RenderIndex = ~0;
+   rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+   rmesa->radeon.swtcl.hw_primitive = 0;
+}
+
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
new file mode 100644
index 0000000000..da89158eeb
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h
@@ -0,0 +1,66 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_TRIS_H__
+#define __RADEON_TRIS_H__
+
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "radeon_context.h"
+
+extern void radeonInitSwtcl( GLcontext *ctx );
+
+extern void radeonChooseRenderState( GLcontext *ctx );
+extern void radeonChooseVertexState( GLcontext *ctx );
+
+extern void radeonCheckTexSizes( GLcontext *ctx );
+
+extern void radeonBuildVertices( GLcontext *ctx, GLuint start, GLuint count,
+				 GLuint newinputs );
+
+extern void radeonPrintSetupFlags(char *msg, GLuint flags );
+
+
+extern void radeon_emit_indexed_verts( GLcontext *ctx,
+				       GLuint start,
+				       GLuint count );
+
+extern void radeon_translate_vertex( GLcontext *ctx, 
+				     const radeonVertex *src, 
+				     SWvertex *dst );
+
+extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v );
+
+extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
new file mode 100644
index 0000000000..ea796e1a45
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -0,0 +1,642 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Graphics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/light.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "radeon_common.h"
+#include "radeon_context.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_tcl.h"
+#include "radeon_swtcl.h"
+#include "radeon_maos.h"
+#include "radeon_common_context.h"
+
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_LOOP   0
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS    1
+#define HAVE_ELTS        1
+
+
+#define HW_POINTS           RADEON_CP_VC_CNTL_PRIM_TYPE_POINT
+#define HW_LINES            RADEON_CP_VC_CNTL_PRIM_TYPE_LINE
+#define HW_LINE_LOOP        0
+#define HW_LINE_STRIP       RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP
+#define HW_TRIANGLES        RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST
+#define HW_TRIANGLE_STRIP_0 RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP
+#define HW_TRIANGLE_STRIP_1 0
+#define HW_TRIANGLE_FAN     RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+#define HW_QUADS            0
+#define HW_QUAD_STRIP       0
+#define HW_POLYGON          RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN
+
+
+static GLboolean discrete_prim[0x10] = {
+   0,				/* 0 none */
+   1,				/* 1 points */
+   1,				/* 2 lines */
+   0,				/* 3 line_strip */
+   1,				/* 4 tri_list */
+   0,				/* 5 tri_fan */
+   0,				/* 6 tri_type2 */
+   1,				/* 7 rect list (unused) */
+   1,				/* 8 3vert point */
+   1,				/* 9 3vert line */
+   0,
+   0,
+   0,
+   0,
+   0,
+   0,
+};
+   
+
+#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx)
+#define ELT_TYPE  GLushort
+
+#define ELT_INIT(prim, hw_prim) \
+   radeonTclPrimitive( ctx, prim, hw_prim | RADEON_CP_VC_CNTL_PRIM_WALK_IND )
+
+#define GET_MESA_ELTS() rmesa->tcl.Elts
+
+
+/* Don't really know how many elts will fit in what's left of cmdbuf,
+ * as there is state to emit, etc:
+ */
+
+/* Testing on isosurf shows a maximum around here.  Don't know if it's
+ * the card or driver or kernel module that is causing the behaviour.
+ */
+#define GET_MAX_HW_ELTS() 300
+
+
+#define RESET_STIPPLE() do {			\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   radeonEmitState(&rmesa->radeon);			\
+} while (0)
+
+#define AUTO_STIPPLE( mode )  do {		\
+   RADEON_STATECHANGE( rmesa, lin );		\
+   if (mode)					\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] |=	\
+	 RADEON_LINE_PATTERN_AUTO_RESET;	\
+   else						\
+      rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &=	\
+	 ~RADEON_LINE_PATTERN_AUTO_RESET;	\
+   radeonEmitState(&rmesa->radeon);		\
+} while (0)
+
+
+
+#define ALLOC_ELTS(nr)	radeonAllocElts( rmesa, nr )
+
+static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) 
+{
+      if (rmesa->radeon.dma.flush)
+	 rmesa->radeon.dma.flush( rmesa->radeon.glCtx );
+
+      radeonEmitAOS( rmesa,
+		     rmesa->radeon.tcl.aos_count, 0 );
+
+      return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format,
+				       rmesa->tcl.hw_primitive, nr );
+}
+
+#define CLOSE_ELTS() if (0)  RADEON_NEWPRIM( rmesa )
+
+
+
+/* TODO: Try to extend existing primitive if both are identical,
+ * discrete and there are no intervening state changes.  (Somewhat
+ * duplicates changes to DrawArrays code)
+ */
+static void radeonEmitPrim( GLcontext *ctx, 
+		       GLenum prim, 
+		       GLuint hwprim, 
+		       GLuint start, 
+		       GLuint count)	
+{
+   r100ContextPtr rmesa = R100_CONTEXT( ctx );
+   radeonTclPrimitive( ctx, prim, hwprim );
+   
+   radeonEmitAOS( rmesa,
+		  rmesa->radeon.tcl.aos_count,
+		  start );
+   
+   /* Why couldn't this packet have taken an offset param?
+    */
+   radeonEmitVbufPrim( rmesa,
+		       rmesa->tcl.vertex_format,
+		       rmesa->tcl.hw_primitive,
+		       count - start );
+}
+
+#define EMIT_PRIM( ctx, prim, hwprim, start, count ) do {       \
+   radeonEmitPrim( ctx, prim, hwprim, start, count );           \
+   (void) rmesa; } while (0)
+
+#define MAX_CONVERSION_SIZE 40
+
+/* Try & join small primitives
+ */
+#if 0
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM ) 0
+#else
+#define PREFER_DISCRETE_ELT_PRIM( NR, PRIM )			\
+  ((NR) < 20 ||							\
+   ((NR) < 40 &&						\
+    rmesa->tcl.hw_primitive == (PRIM|				\
+			    RADEON_CP_VC_CNTL_PRIM_WALK_IND|	\
+			    RADEON_CP_VC_CNTL_TCL_ENABLE)))
+#endif
+
+#ifdef MESA_BIG_ENDIAN
+/* We could do without (most of) this ugliness if dest was always 32 bit word aligned... */
+#define EMIT_ELT(dest, offset, x) do {				\
+	int off = offset + ( ( (GLuint)dest & 0x2 ) >> 1 );	\
+	GLushort *des = (GLushort *)( (GLuint)dest & ~0x2 );	\
+	(des)[ off + 1 - 2 * ( off & 1 ) ] = (GLushort)(x); 	\
+	(void)rmesa; } while (0)
+#else
+#define EMIT_ELT(dest, offset, x) do {				\
+	(dest)[offset] = (GLushort) (x);			\
+	(void)rmesa; } while (0)
+#endif
+
+#define EMIT_TWO_ELTS(dest, offset, x, y)  *(GLuint *)(dest+offset) = ((y)<<16)|(x);
+
+
+
+#define TAG(x) tcl_##x
+#include "tnl_dd/t_dd_dmatmp2.h"
+
+/**********************************************************************/
+/*                          External entrypoints                     */
+/**********************************************************************/
+
+void radeonEmitPrimitive( GLcontext *ctx, 
+			  GLuint first,
+			  GLuint last,
+			  GLuint flags )
+{
+   tcl_render_tab_verts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonEmitEltPrimitive( GLcontext *ctx, 
+			     GLuint first,
+			     GLuint last,
+			     GLuint flags )
+{
+   tcl_render_tab_elts[flags&PRIM_MODE_MASK]( ctx, first, last, flags );
+}
+
+void radeonTclPrimitive( GLcontext *ctx, 
+			 GLenum prim,
+			 int hw_prim )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint se_cntl;
+   GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+
+   if (newprim != rmesa->tcl.hw_primitive ||
+       !discrete_prim[hw_prim&0xf]) {
+      RADEON_NEWPRIM( rmesa );
+      rmesa->tcl.hw_primitive = newprim;
+   }
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl &= ~RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (prim == GL_POLYGON && (ctx->_TriangleCaps & DD_FLATSHADE)) 
+      se_cntl |= RADEON_FLAT_SHADE_VTX_0;
+   else
+      se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+/**********************************************************************/
+/*             Fog blend factor computation for hw tcl                */
+/*             same calculation used as in t_vb_fog.c                 */
+/**********************************************************************/
+
+#define FOG_EXP_TABLE_SIZE 256
+#define FOG_MAX (10.0)
+#define EXP_FOG_MAX .0006595
+#define FOG_INCR (FOG_MAX/FOG_EXP_TABLE_SIZE)
+static GLfloat exp_table[FOG_EXP_TABLE_SIZE];
+
+#if 1
+#define NEG_EXP( result, narg )						\
+do {									\
+   GLfloat f = (GLfloat) (narg * (1.0/FOG_INCR));			\
+   GLint k = (GLint) f;							\
+   if (k > FOG_EXP_TABLE_SIZE-2) 					\
+      result = (GLfloat) EXP_FOG_MAX;					\
+   else									\
+      result = exp_table[k] + (f-k)*(exp_table[k+1]-exp_table[k]);	\
+} while (0)
+#else
+#define NEG_EXP( result, narg )					\
+do {								\
+   result = exp(-narg);						\
+} while (0)
+#endif
+
+
+/**
+ * Initialize the exp_table[] lookup table for approximating exp().
+ */
+void
+radeonInitStaticFogData( void )
+{
+   GLfloat f = 0.0F;
+   GLint i = 0;
+   for ( ; i < FOG_EXP_TABLE_SIZE ; i++, f += FOG_INCR) {
+      exp_table[i] = (GLfloat) exp(-f);
+   }
+}
+
+
+/**
+ * Compute per-vertex fog blend factors from fog coordinates by
+ * evaluating the GL_LINEAR, GL_EXP or GL_EXP2 fog function.
+ * Fog coordinates are distances from the eye (typically between the
+ * near and far clip plane distances).
+ * Note the fog (eye Z) coords may be negative so we use ABS(z) below.
+ * Fog blend factors are in the range [0,1].
+ */
+float
+radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord )
+{
+   GLfloat end  = ctx->Fog.End;
+   GLfloat d, temp;
+   const GLfloat z = FABSF(fogcoord);
+
+   switch (ctx->Fog.Mode) {
+   case GL_LINEAR:
+      if (ctx->Fog.Start == ctx->Fog.End)
+         d = 1.0F;
+      else
+         d = 1.0F / (ctx->Fog.End - ctx->Fog.Start);
+      temp = (end - z) * d;
+      return CLAMP(temp, 0.0F, 1.0F);
+      break;
+   case GL_EXP:
+      d = ctx->Fog.Density;
+      NEG_EXP( temp, d * z );
+      return temp;
+      break;
+   case GL_EXP2:
+      d = ctx->Fog.Density*ctx->Fog.Density;
+      NEG_EXP( temp, d * z * z );
+      return temp;
+      break;
+   default:
+      _mesa_problem(ctx, "Bad fog mode in make_fog_coord");
+      return 0;
+   }
+}
+
+/**
+ * Predict total emit size for next rendering operation so there is no flush in middle of rendering
+ * Prediction has to aim towards the best possible value that is worse than worst case scenario
+ */
+static GLuint radeonEnsureEmitSize( GLcontext * ctx , GLuint inputs )
+{
+  r100ContextPtr rmesa = R100_CONTEXT(ctx);
+  TNLcontext *tnl = TNL_CONTEXT(ctx);
+  struct vertex_buffer *VB = &tnl->vb;
+  GLuint space_required;
+  GLuint state_size;
+  GLuint nr_aos = 1; /* radeonEmitArrays does always emit one */
+  int i;
+  /* list of flags that are allocating aos object */
+  const GLuint flags_to_check[] = {
+    VERT_BIT_NORMAL,
+    VERT_BIT_COLOR0,
+    VERT_BIT_COLOR1,
+    VERT_BIT_FOG
+  };
+  /* predict number of aos to emit */
+  for (i=0; i < sizeof(flags_to_check)/sizeof(flags_to_check[0]); ++i)
+  {
+    if (inputs & flags_to_check[i])
+      ++nr_aos;
+  }
+  for (i = 0; i < ctx->Const.MaxTextureUnits; ++i)
+  {
+    if (inputs & VERT_BIT_TEX(i))
+      ++nr_aos;
+  }
+
+  {
+    /* count the prediction for state size */
+    space_required = 0;
+    state_size = radeonCountStateEmitSize( &rmesa->radeon );
+    /* tcl may be changed in radeonEmitArrays so account for it if not dirty */
+    if (!rmesa->hw.tcl.dirty)
+      state_size += rmesa->hw.tcl.check( rmesa->radeon.glCtx, &rmesa->hw.tcl );
+    /* predict size for elements */
+    for (i = 0; i < VB->PrimitiveCount; ++i)
+    {
+      if (!VB->Primitive[i].count)
+	continue;
+      /* If primitive.count is less than MAX_CONVERSION_SIZE
+	 rendering code may decide convert to elts.
+	 In that case we have to make pessimistic prediction.
+	 and use larger of 2 paths. */
+      const GLuint elts = ELTS_BUFSZ(nr_aos);
+      const GLuint index = INDEX_BUFSZ;
+      const GLuint vbuf = VBUF_BUFSZ;
+      if ( (!VB->Elts && VB->Primitive[i].count >= MAX_CONVERSION_SIZE)
+	  || vbuf > index + elts)
+	space_required += vbuf;
+      else
+	space_required += index + elts;
+      space_required += VB->Primitive[i].count * 3;
+      space_required += AOS_BUFSZ(nr_aos);
+    }
+    space_required += SCISSOR_BUFSZ;
+  }
+  /* flush the buffer in case we need more than is left. */
+  if (rcommonEnsureCmdBufSpace(&rmesa->radeon, space_required, __FUNCTION__))
+    return space_required + radeonCountStateEmitSize( &rmesa->radeon );
+  else
+    return space_required + state_size;
+}
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+/* TCL render.
+ */
+static GLboolean radeon_run_tcl_render( GLcontext *ctx,
+					struct tnl_pipeline_stage *stage )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
+   GLuint i;
+
+   /* TODO: separate this from the swtnl pipeline 
+    */
+   if (rmesa->radeon.TclFallback)
+      return GL_TRUE;	/* fallback to software t&l */
+
+   if (VB->Count == 0)
+      return GL_FALSE;
+
+   /* NOTE: inputs != tnl->render_inputs - these are the untransformed
+    * inputs.
+    */
+   if (ctx->Light.Enabled) {
+      inputs |= VERT_BIT_NORMAL;
+   }
+
+   if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
+      inputs |= VERT_BIT_COLOR1;
+   }
+
+   if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
+      inputs |= VERT_BIT_FOG;
+   }
+
+   for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
+      if (ctx->Texture.Unit[i]._ReallyEnabled) {
+      /* TODO: probably should not emit texture coords when texgen is enabled */
+	 if (rmesa->TexGenNeedNormals[i]) {
+	    inputs |= VERT_BIT_NORMAL;
+	 }
+	 inputs |= VERT_BIT_TEX(i);
+      }
+   }
+
+   radeonReleaseArrays( ctx, ~0 );
+   GLuint emit_end = radeonEnsureEmitSize( ctx, inputs )
+     + rmesa->radeon.cmdbuf.cs->cdw;
+   radeonEmitArrays( ctx, inputs );
+
+   rmesa->tcl.Elts = VB->Elts;
+
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (!length)
+	 continue;
+
+      if (rmesa->tcl.Elts)
+	 radeonEmitEltPrimitive( ctx, start, start+length, prim );
+      else
+	 radeonEmitPrimitive( ctx, start, start+length, prim );
+   }
+
+   if (emit_end < rmesa->radeon.cmdbuf.cs->cdw)
+      WARN_ONCE("Rendering was %d commands larger than predicted size."
+	  " We might overflow  command buffer.\n", rmesa->radeon.cmdbuf.cs->cdw - emit_end);
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+
+
+/* Initial state for tcl stage.  
+ */
+const struct tnl_pipeline_stage _radeon_tcl_stage =
+{
+   "radeon render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   radeon_run_tcl_render	/* run */
+};
+
+
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+
+/*-----------------------------------------------------------------------
+ * Manage TCL fallbacks
+ */
+
+
+static void transition_to_swtnl( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_cntl;
+
+   RADEON_NEWPRIM( rmesa );
+   rmesa->swtcl.vertex_format = 0;
+
+   radeonChooseVertexState( ctx );
+   radeonChooseRenderState( ctx );
+
+   _mesa_validate_all_lighting_tables( ctx ); 
+
+   tnl->Driver.NotifyMaterialChange = 
+      _mesa_validate_all_lighting_tables;
+
+   radeonReleaseArrays( ctx, ~0 );
+
+   se_cntl = rmesa->hw.set.cmd[SET_SE_CNTL];
+   se_cntl |= RADEON_FLAT_SHADE_VTX_LAST;
+	 
+   if (se_cntl != rmesa->hw.set.cmd[SET_SE_CNTL]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_CNTL] = se_cntl;
+   }
+}
+
+
+static void transition_to_hwtnl( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+
+   se_coord_fmt &= ~(RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+		     RADEON_VTX_Z_PRE_MULT_1_OVER_W0 |
+		     RADEON_VTX_W0_IS_NOT_1_OVER_W0);
+   se_coord_fmt |= RADEON_VTX_W0_IS_NOT_1_OVER_W0;
+
+   if ( se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT] ) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+      _tnl_need_projected_coords( ctx, GL_FALSE );
+   }
+
+   radeonUpdateMaterial( ctx );
+
+   tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial;
+
+   if ( rmesa->radeon.dma.flush )			
+      rmesa->radeon.dma.flush( rmesa->radeon.glCtx );	
+
+   rmesa->radeon.dma.flush = NULL;
+   rmesa->swtcl.vertex_format = 0;
+   
+   //   if (rmesa->swtcl.indexed_verts.buf) 
+   //      radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, 
+   //			      __FUNCTION__ );
+
+   if (RADEON_DEBUG & RADEON_FALLBACKS)
+      fprintf(stderr, "Radeon end tcl fallback\n");
+}
+
+static char *fallbackStrings[] = {
+   "Rasterization fallback",
+   "Unfilled triangles",
+   "Twosided lighting, differing materials",
+   "Materials in VB (maybe between begin/end)",
+   "Texgen unit 0",
+   "Texgen unit 1",
+   "Texgen unit 2",
+   "User disable",
+   "Fogcoord with separate specular lighting"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+
+void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint oldfallback = rmesa->radeon.TclFallback;
+
+   if (mode) {
+      rmesa->radeon.TclFallback |= bit;
+      if (oldfallback == 0) {
+	 if (RADEON_DEBUG & RADEON_FALLBACKS)
+	    fprintf(stderr, "Radeon begin tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_swtnl( ctx );
+      }
+   }
+   else {
+      rmesa->radeon.TclFallback &= ~bit;
+      if (oldfallback == bit) {
+	 if (RADEON_DEBUG & RADEON_FALLBACKS)
+	    fprintf(stderr, "Radeon end tcl fallback %s\n",
+		    getFallbackString( bit ));
+	 transition_to_hwtnl( ctx );
+      }
+   }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.h b/src/mesa/drivers/dri/radeon/radeon_tcl.h
new file mode 100644
index 0000000000..dccbea5fdb
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.h
@@ -0,0 +1,67 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     Tungsten Grahpics Inc., Austin, Texas.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef __RADEON_TCL_H__
+#define __RADEON_TCL_H__
+
+#include "radeon_context.h"
+
+extern void radeonTclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim );
+extern void radeonEmitEltPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				    GLuint flags );
+extern void radeonEmitPrimitive( GLcontext *ctx, GLuint first, GLuint last,
+				 GLuint flags );
+
+extern void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+
+extern void radeonInitStaticFogData( void );
+extern float radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord );
+
+#define RADEON_TCL_FALLBACK_RASTER            0x1 /* rasterization */
+#define RADEON_TCL_FALLBACK_UNFILLED          0x2 /* unfilled tris */
+#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE     0x4 /* twoside tris */
+#define RADEON_TCL_FALLBACK_MATERIAL          0x8 /* material in vb */
+#define RADEON_TCL_FALLBACK_TEXGEN_0          0x10 /* texgen, unit 0 */
+#define RADEON_TCL_FALLBACK_TEXGEN_1          0x20 /* texgen, unit 1 */
+#define RADEON_TCL_FALLBACK_TEXGEN_2          0x40 /* texgen, unit 2 */
+#define RADEON_TCL_FALLBACK_TCL_DISABLE       0x80 /* user disable */
+#define RADEON_TCL_FALLBACK_FOGCOORDSPEC      0x100 /* fogcoord, sep. spec light */
+
+/* max maos_verts vertex format has a size of 18 floats */
+#define RADEON_MAX_TCL_VERTSIZE (18*4)
+
+#define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c
new file mode 100644
index 0000000000..c66e5d17b1
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.c
@@ -0,0 +1,469 @@
+/*
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Brian Paul <brianp@valinux.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/simple_list.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_ioctl.h"
+#include "radeon_tex.h"
+
+#include "xmlpool.h"
+
+
+
+/**
+ * Set the texture wrap modes.
+ * 
+ * \param t Texture object whose wrap modes are to be set
+ * \param swrap Wrap mode for the \a s texture coordinate
+ * \param twrap Wrap mode for the \a t texture coordinate
+ */
+
+static void radeonSetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap )
+{
+   GLboolean  is_clamp = GL_FALSE;
+   GLboolean  is_clamp_to_border = GL_FALSE;
+
+   t->pp_txfilter &= ~(RADEON_CLAMP_S_MASK | RADEON_CLAMP_T_MASK | RADEON_BORDER_MODE_D3D);
+
+   switch ( swrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_S_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= RADEON_CLAMP_S_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_LAST;
+      break;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_S_MIRROR_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__);
+   }
+
+   switch ( twrap ) {
+   case GL_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_T_WRAP;
+      break;
+   case GL_CLAMP:
+      t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_CLAMP_TO_EDGE:
+      t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_LAST;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      t->pp_txfilter |= RADEON_CLAMP_T_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   case GL_MIRRORED_REPEAT:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR;
+      break;
+   case GL_MIRROR_CLAMP_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_GL;
+      is_clamp = GL_TRUE;
+      break;
+   case GL_MIRROR_CLAMP_TO_EDGE_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_LAST;
+      break;
+   case GL_MIRROR_CLAMP_TO_BORDER_EXT:
+      t->pp_txfilter |= RADEON_CLAMP_T_MIRROR_CLAMP_GL;
+      is_clamp_to_border = GL_TRUE;
+      break;
+   default:
+      _mesa_problem(NULL, "bad T wrap mode in %s", __FUNCTION__);
+   }
+
+   if ( is_clamp_to_border ) {
+      t->pp_txfilter |= RADEON_BORDER_MODE_D3D;
+   }
+
+   t->border_fallback = (is_clamp && is_clamp_to_border);
+}
+
+static void radeonSetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max )
+{
+   t->pp_txfilter &= ~RADEON_MAX_ANISO_MASK;
+
+   if ( max == 1.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_1_TO_1;
+   } else if ( max <= 2.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_2_TO_1;
+   } else if ( max <= 4.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_4_TO_1;
+   } else if ( max <= 8.0 ) {
+      t->pp_txfilter |= RADEON_MAX_ANISO_8_TO_1;
+   } else {
+      t->pp_txfilter |= RADEON_MAX_ANISO_16_TO_1;
+   }
+}
+
+/**
+ * Set the texture magnification and minification modes.
+ * 
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ */
+
+static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf )
+{
+   GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK);
+
+   /* Force revalidation to account for switches from/to mipmapping. */
+   t->validated = GL_FALSE;
+
+   t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK);
+
+   /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */
+   if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_NEAREST_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+      case GL_LINEAR_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }
+   }
+   else if ( anisotropy == RADEON_MAX_ANISO_1_TO_1 ) {
+      switch ( minf ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_NEAREST;
+	 break;
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_NEAREST_MIP_LINEAR;
+	 break;
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_LINEAR_MIP_LINEAR;
+	 break;
+      }
+   } else {
+      switch ( minf ) {
+      case GL_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_LINEAR;
+	 break;
+      case GL_NEAREST_MIPMAP_NEAREST:
+      case GL_LINEAR_MIPMAP_NEAREST:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST;
+	 break;
+      case GL_NEAREST_MIPMAP_LINEAR:
+      case GL_LINEAR_MIPMAP_LINEAR:
+	 t->pp_txfilter |= RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR;
+	 break;
+      }
+   }
+
+   switch ( magf ) {
+   case GL_NEAREST:
+      t->pp_txfilter |= RADEON_MAG_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      t->pp_txfilter |= RADEON_MAG_FILTER_LINEAR;
+      break;
+   }
+}
+
+static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] )
+{
+   GLubyte c[4];
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+}
+
+#define SCALED_FLOAT_TO_BYTE( x, scale ) \
+		(((GLuint)((255.0F / scale) * (x))) / 2)
+
+static void radeonTexEnv( GLcontext *ctx, GLenum target,
+			  GLenum pname, const GLfloat *param )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+
+   if ( RADEON_DEBUG & RADEON_STATE ) {
+      fprintf( stderr, "%s( %s )\n",
+	       __FUNCTION__, _mesa_lookup_enum_by_nr( pname ) );
+   }
+
+   switch ( pname ) {
+   case GL_TEXTURE_ENV_COLOR: {
+      GLubyte c[4];
+      GLuint envColor;
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor );
+      envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] );
+      if ( rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] != envColor ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TFACTOR] = envColor;
+      }
+      break;
+   }
+
+   case GL_TEXTURE_LOD_BIAS_EXT: {
+      GLfloat bias, min;
+      GLuint b;
+
+      /* The Radeon's LOD bias is a signed 2's complement value with a
+       * range of -1.0 <= bias < 4.0.  We break this into two linear
+       * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping
+       * [0.0,4.0] to [0,127].
+       */
+      min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ?
+	  0.0 : -1.0;
+      bias = CLAMP( *param, min, 4.0 );
+      if ( bias == 0 ) {
+	 b = 0;
+      } else if ( bias > 0 ) {
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 4.0 )) << RADEON_LOD_BIAS_SHIFT;
+      } else {
+	 b = ((GLuint)SCALED_FLOAT_TO_BYTE( bias, 1.0 )) << RADEON_LOD_BIAS_SHIFT;
+      }
+      if ( (rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] & RADEON_LOD_BIAS_MASK) != b ) {
+	 RADEON_STATECHANGE( rmesa, tex[unit] );
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] &= ~RADEON_LOD_BIAS_MASK;
+	 rmesa->hw.tex[unit].cmd[TEX_PP_TXFILTER] |= (b & RADEON_LOD_BIAS_MASK);
+      }
+      break;
+   }
+
+   default:
+      return;
+   }
+}
+
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void radeonTexParameter( GLcontext *ctx, GLenum target,
+				struct gl_texture_object *texObj,
+				GLenum pname, const GLfloat *params )
+{
+   radeonTexObj* t = radeon_tex_obj(texObj);
+
+   radeon_print(RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__,
+	       _mesa_lookup_enum_by_nr( pname ) );
+
+   switch ( pname ) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+   case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+      radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy );
+      radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter );
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT );
+      break;
+
+   case GL_TEXTURE_BORDER_COLOR:
+      radeonSetTexBorderColor( t, texObj->BorderColor.f );
+      break;
+
+   case GL_TEXTURE_BASE_LEVEL:
+   case GL_TEXTURE_MAX_LEVEL:
+   case GL_TEXTURE_MIN_LOD:
+   case GL_TEXTURE_MAX_LOD:
+      t->validated = GL_FALSE;
+      break;
+
+   default:
+      return;
+   }
+}
+
+static void radeonDeleteTexture( GLcontext *ctx,
+				 struct gl_texture_object *texObj )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj* t = radeon_tex_obj(texObj);
+   int i;
+
+   radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+	 "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj,
+	       _mesa_lookup_enum_by_nr( texObj->Target ) );
+
+   if ( rmesa ) {
+     radeon_firevertices(&rmesa->radeon);
+     for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) {
+       if ( t == rmesa->state.texture.unit[i].texobj ) {
+	 rmesa->state.texture.unit[i].texobj = NULL;
+	 rmesa->hw.tex[i].dirty = GL_FALSE;
+	 rmesa->hw.cube[i].dirty = GL_FALSE;
+       }
+     }
+   }
+
+   radeon_miptree_unreference(&t->mt);
+
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, texObj);
+}
+
+/* Need:  
+ *  - Same GEN_MODE for all active bits
+ *  - Same EyePlane/ObjPlane for all active bits when using Eye/Obj
+ *  - STRQ presumably all supported (matrix means incoming R values
+ *    can end up in STQ, this has implications for vertex support,
+ *    presumably ok if maos is used, though?)
+ *  
+ * Basically impossible to do this on the fly - just collect some
+ * basic info & do the checks from ValidateState().
+ */
+static void radeonTexGen( GLcontext *ctx,
+			  GLenum coord,
+			  GLenum pname,
+			  const GLfloat *params )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLuint unit = ctx->Texture.CurrentUnit;
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+   _mesa_initialize_texture_object(&t->base, name, target);
+   t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+   t->border_fallback = GL_FALSE;
+
+   t->pp_txfilter = RADEON_BORDER_MODE_OGL;
+   t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP |
+		     RADEON_TXFORMAT_PERSPECTIVE_ENABLE);
+   
+   radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT );
+   radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy );
+   radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter );
+   radeonSetTexBorderColor( t, t->base.BorderColor.f );
+   return &t->base;
+}
+
+
+
+void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions )
+{
+   functions->ChooseTextureFormat	= radeonChooseTextureFormat_mesa;
+   functions->TexImage1D		= radeonTexImage1D;
+   functions->TexImage2D		= radeonTexImage2D;
+   functions->TexSubImage1D		= radeonTexSubImage1D;
+   functions->TexSubImage2D		= radeonTexSubImage2D;
+   functions->GetTexImage               = radeonGetTexImage;
+   functions->GetCompressedTexImage     = radeonGetCompressedTexImage;
+
+   functions->NewTextureObject		= radeonNewTextureObject;
+   //   functions->BindTexture		= radeonBindTexture;
+   functions->DeleteTexture		= radeonDeleteTexture;
+
+   functions->TexEnv			= radeonTexEnv;
+   functions->TexParameter		= radeonTexParameter;
+   functions->TexGen			= radeonTexGen;
+
+   functions->CompressedTexImage2D	= radeonCompressedTexImage2D;
+   functions->CompressedTexSubImage2D	= radeonCompressedTexSubImage2D;
+
+   if (radeon->radeonScreen->kernel_mm) {
+      functions->CopyTexImage2D = radeonCopyTexImage2D;
+      functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+   }
+
+   functions->GenerateMipmap = radeonGenerateMipmap;
+
+   functions->NewTextureImage = radeonNewTextureImage;
+   functions->FreeTexImageData = radeonFreeTexImageData;
+   functions->MapTexture = radeonMapTexture;
+   functions->UnmapTexture = radeonUnmapTexture;
+
+   driInitTextureFormats();
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h
new file mode 100644
index 0000000000..0113ffd3da
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tex.h
@@ -0,0 +1,57 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __RADEON_TEX_H__
+#define __RADEON_TEX_H__
+
+extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+                               unsigned long long offset, GLint depth,
+                               GLuint pitch);
+
+extern void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv);
+extern void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format,
+			       __DRIdrawable *dPriv);
+
+extern void radeonUpdateTextureState( GLcontext *ctx );
+
+extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t,
+				  GLuint face );
+
+extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t );
+
+extern void radeonInitTextureFuncs( radeonContextPtr radeon, struct dd_function_table *functions );
+
+#endif /* __RADEON_TEX_H__ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
new file mode 100644
index 0000000000..29fd31ac23
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "radeon_texture.h"
+
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "drivers/common/meta.h"
+
+#include "radeon_mipmap_tree.h"
+
+static GLboolean
+do_copy_texsubimage(GLcontext *ctx,
+                    GLenum target, GLint level,
+                    struct radeon_tex_obj *tobj,
+                    radeon_texture_image *timg,
+                    GLint dstx, GLint dsty,
+                    GLint x, GLint y,
+                    GLsizei width, GLsizei height)
+{
+    radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+    struct radeon_renderbuffer *rrb;
+    unsigned src_bpp;
+    unsigned dst_bpp;
+    gl_format src_mesaformat;
+    gl_format dst_mesaformat;
+    unsigned src_width;
+    unsigned dst_width;
+    unsigned flip_y;
+
+    if (!radeon->vtbl.blit) {
+        return GL_FALSE;
+    }
+
+    if (_mesa_get_format_bits(timg->base.TexFormat, GL_DEPTH_BITS) > 0) {
+        if (ctx->ReadBuffer->_DepthBuffer && ctx->ReadBuffer->_DepthBuffer->Wrapped) {
+            rrb = radeon_renderbuffer(ctx->ReadBuffer->_DepthBuffer->Wrapped);
+        } else {
+            rrb = radeon_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
+        }
+        flip_y = ctx->ReadBuffer->Attachment[BUFFER_DEPTH].Type == GL_NONE;
+    } else {
+        rrb = radeon_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
+        flip_y = ctx->ReadBuffer->Attachment[BUFFER_COLOR0].Type == GL_NONE;
+    }
+
+    // This is software renderbuffer, fallback to swrast
+    if (!rrb) {
+        return GL_FALSE;
+    }
+
+    if (!timg->mt) {
+        radeon_validate_texture_miptree(ctx, &tobj->base);
+    }
+
+    assert(rrb->bo);
+    assert(timg->mt);
+    assert(timg->mt->bo);
+    assert(timg->base.Width >= dstx + width);
+    assert(timg->base.Height >= dsty + height);
+
+    intptr_t src_offset = rrb->draw_offset;
+    intptr_t dst_offset = radeon_miptree_image_offset(timg->mt, _mesa_tex_target_to_face(target), level);
+
+    if (0) {
+        fprintf(stderr, "%s: copying to face %d, level %d\n",
+                __FUNCTION__, _mesa_tex_target_to_face(target), level);
+        fprintf(stderr, "to: x %d, y %d, offset %d\n", dstx, dsty, (uint32_t) dst_offset);
+        fprintf(stderr, "from (%dx%d) width %d, height %d, offset %d, pitch %d\n",
+                x, y, rrb->base.Width, rrb->base.Height, (uint32_t) src_offset, rrb->pitch/rrb->cpp);
+        fprintf(stderr, "src size %d, dst size %d\n", rrb->bo->size, timg->mt->bo->size);
+
+    }
+
+    src_mesaformat = rrb->base.Format;
+    dst_mesaformat = timg->base.TexFormat;
+    src_width = rrb->base.Width;
+    dst_width = timg->base.Width;
+    src_bpp = _mesa_get_format_bytes(src_mesaformat);
+    dst_bpp = _mesa_get_format_bytes(dst_mesaformat);
+    if (!radeon->vtbl.check_blit(dst_mesaformat)) {
+	    /* depth formats tend to be special */
+	    if (_mesa_get_format_bits(dst_mesaformat, GL_DEPTH_BITS) > 0)
+		    return GL_FALSE;
+
+	    if (src_bpp != dst_bpp)
+		    return GL_FALSE;
+
+	    switch (dst_bpp) {
+	    case 2:
+		    src_mesaformat = MESA_FORMAT_RGB565;
+		    dst_mesaformat = MESA_FORMAT_RGB565;
+		    break;
+	    case 4:
+		    src_mesaformat = MESA_FORMAT_ARGB8888;
+		    dst_mesaformat = MESA_FORMAT_ARGB8888;
+		    break;
+	    case 1:
+		    src_mesaformat = MESA_FORMAT_A8;
+		    dst_mesaformat = MESA_FORMAT_A8;
+		    break;
+	    default:
+		    return GL_FALSE;
+	    }
+    }
+
+    /* blit from src buffer to texture */
+    return radeon->vtbl.blit(ctx, rrb->bo, src_offset, src_mesaformat, rrb->pitch/rrb->cpp,
+                             src_width, rrb->base.Height, x, y,
+                             timg->mt->bo, dst_offset, dst_mesaformat,
+                             timg->mt->levels[level].rowstride / dst_bpp,
+                             dst_width, timg->base.Height,
+                             dstx, dsty, width, height, flip_y);
+}
+
+void
+radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+                     GLenum internalFormat,
+                     GLint x, GLint y, GLsizei width, GLsizei height,
+                     GLint border)
+{
+    struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+    struct gl_texture_object *texObj =
+        _mesa_select_tex_object(ctx, texUnit, target);
+    struct gl_texture_image *texImage =
+        _mesa_select_tex_image(ctx, texObj, target, level);
+    int srcx, srcy, dstx, dsty;
+
+    if (border)
+        goto fail;
+
+    /* Setup or redefine the texture object, mipmap tree and texture
+     * image.  Don't populate yet.
+     */
+    ctx->Driver.TexImage2D(ctx, target, level, internalFormat,
+                           width, height, border,
+                           GL_RGBA, GL_UNSIGNED_BYTE, NULL,
+                           &ctx->DefaultPacking, texObj, texImage);
+
+    srcx = x;
+    srcy = y;
+    dstx = 0;
+    dsty = 0;
+    if (!_mesa_clip_copytexsubimage(ctx,
+                                    &dstx, &dsty,
+                                    &srcx, &srcy,
+                                    &width, &height)) {
+        return;
+    }
+
+    if (!do_copy_texsubimage(ctx, target, level,
+                             radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
+                             0, 0, x, y, width, height)) {
+        goto fail;
+    }
+
+    return;
+
+fail:
+    radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+                 "Falling back to sw for glCopyTexImage2D (internalFormat %s, border %d)\n",
+                 _mesa_lookup_enum_by_nr(internalFormat), border);
+
+    _mesa_meta_CopyTexImage2D(ctx, target, level, internalFormat, x, y,
+                              width, height, border);
+}
+
+void
+radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+                        GLint xoffset, GLint yoffset,
+                        GLint x, GLint y,
+                        GLsizei width, GLsizei height)
+{
+    struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
+    struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
+    struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+
+    if (!do_copy_texsubimage(ctx, target, level,
+                             radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
+                             xoffset, yoffset, x, y, width, height)) {
+
+        radeon_print(RADEON_FALLBACKS, RADEON_NORMAL,
+                     "Falling back to sw for glCopyTexSubImage2D\n");
+
+        _mesa_meta_CopyTexSubImage2D(ctx, target, level,
+                                     xoffset, yoffset, x, y, width, height);
+    }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
new file mode 100644
index 0000000000..3ababb1ef5
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_getimage.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora.
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common_context.h"
+#include "radeon_texture.h"
+
+#include "main/texgetimage.h"
+
+/**
+ * Need to map texture image into memory before copying image data,
+ * then unmap it.
+ */
+static void
+radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
+             GLenum format, GLenum type, GLvoid * pixels,
+             struct gl_texture_object *texObj,
+             struct gl_texture_image *texImage, int compressed)
+{
+    radeon_texture_image *image = get_radeon_texture_image(texImage);
+
+    radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+                 "%s(%p, tex %p, image %p) compressed %d.\n",
+                 __func__, ctx, texObj, image, compressed);
+
+    if (image->mt) {
+        /* Map the texture image read-only */
+        radeon_teximage_map(image, GL_FALSE);
+    } else {
+        /* Image hasn't been uploaded to a miptree yet */
+        assert(image->base.Data);
+    }
+
+    if (compressed) {
+        /* FIXME: this can't work for small textures (mips) which
+                 use different hw stride */
+        _mesa_get_compressed_teximage(ctx, target, level, pixels,
+                          texObj, texImage);
+    } else {
+        _mesa_get_teximage(ctx, target, level, format, type, pixels,
+                   texObj, texImage);
+    }
+
+    if (image->mt) {
+        radeon_teximage_unmap(image);
+    }
+}
+
+void
+radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+          GLenum format, GLenum type, GLvoid * pixels,
+          struct gl_texture_object *texObj,
+          struct gl_texture_image *texImage)
+{
+    radeon_get_tex_image(ctx, target, level, format, type, pixels,
+                 texObj, texImage, 0);
+}
+
+void
+radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+                GLvoid *pixels,
+                struct gl_texture_object *texObj,
+                struct gl_texture_image *texImage)
+{
+    radeon_get_tex_image(ctx, target, level, 0, 0, pixels,
+                 texObj, texImage, 1);
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c
new file mode 100644
index 0000000000..f852116dee
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c
@@ -0,0 +1,1175 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Gareth Hughes <gareth@valinux.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/teximage.h"
+#include "main/texstate.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+
+#include "radeon_context.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_state.h"
+#include "radeon_ioctl.h"
+#include "radeon_swtcl.h"
+#include "radeon_tex.h"
+#include "radeon_tcl.h"
+
+
+#define RADEON_TXFORMAT_A8        RADEON_TXFORMAT_I8
+#define RADEON_TXFORMAT_L8        RADEON_TXFORMAT_I8
+#define RADEON_TXFORMAT_AL88      RADEON_TXFORMAT_AI88
+#define RADEON_TXFORMAT_YCBCR     RADEON_TXFORMAT_YVYU422
+#define RADEON_TXFORMAT_YCBCR_REV RADEON_TXFORMAT_VYUY422
+#define RADEON_TXFORMAT_RGB_DXT1  RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT1 RADEON_TXFORMAT_DXT1
+#define RADEON_TXFORMAT_RGBA_DXT3 RADEON_TXFORMAT_DXT23
+#define RADEON_TXFORMAT_RGBA_DXT5 RADEON_TXFORMAT_DXT45
+
+#define _COLOR(f) \
+    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, 0 }
+#define _COLOR_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f, 0 }
+#define _ALPHA(f) \
+    [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _ALPHA_REV(f) \
+    [ MESA_FORMAT_ ## f ## _REV ] = { RADEON_TXFORMAT_ ## f | RADEON_TXFORMAT_ALPHA_IN_MAP, 0 }
+#define _YUV(f) \
+   [ MESA_FORMAT_ ## f ] = { RADEON_TXFORMAT_ ## f, RADEON_YUV_TO_RGB }
+#define _INVALID(f) \
+    [ MESA_FORMAT_ ## f ] = { 0xffffffff, 0 }
+#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \
+			     && (tx_table[f].format != 0xffffffff) )
+
+struct tx_table {
+   GLuint format, filter;
+};
+
+/* XXX verify this table against MESA_FORMAT_x values */
+static const struct tx_table tx_table[] =
+{
+   _INVALID(NONE), /* MESA_FORMAT_NONE */
+   _ALPHA(RGBA8888),
+   _ALPHA_REV(RGBA8888),
+   _ALPHA(ARGB8888),
+   _ALPHA_REV(ARGB8888),
+   [ MESA_FORMAT_RGB888 ] = { RADEON_TXFORMAT_ARGB8888, 0 },
+   _COLOR(RGB565),
+   _COLOR_REV(RGB565),
+   _ALPHA(ARGB4444),
+   _ALPHA_REV(ARGB4444),
+   _ALPHA(ARGB1555),
+   _ALPHA_REV(ARGB1555),
+   _ALPHA(AL88),
+   _ALPHA_REV(AL88),
+   _ALPHA(A8),
+   _COLOR(L8),
+   _ALPHA(I8),
+   _INVALID(CI8),
+   _YUV(YCBCR),
+   _YUV(YCBCR_REV),
+   _INVALID(RGB_FXT1),
+   _INVALID(RGBA_FXT1),
+   _COLOR(RGB_DXT1),
+   _ALPHA(RGBA_DXT1),
+   _ALPHA(RGBA_DXT3),
+   _ALPHA(RGBA_DXT5),
+};
+
+#undef _COLOR
+#undef _ALPHA
+#undef _INVALID
+
+/* ================================================================
+ * Texture combine functions
+ */
+
+/* GL_ARB_texture_env_combine support
+ */
+
+/* The color tables have combine functions for GL_SRC_COLOR,
+ * GL_ONE_MINUS_SRC_COLOR, GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint radeon_texture_color[][RADEON_MAX_TEXTURE_UNITS] =
+{
+   {
+      RADEON_COLOR_ARG_A_T0_COLOR,
+      RADEON_COLOR_ARG_A_T1_COLOR,
+      RADEON_COLOR_ARG_A_T2_COLOR
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_COLOR | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T1_COLOR | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T2_COLOR | RADEON_COMP_ARG_A
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_ALPHA,
+      RADEON_COLOR_ARG_A_T1_ALPHA,
+      RADEON_COLOR_ARG_A_T2_ALPHA
+   },
+   {
+      RADEON_COLOR_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_COLOR_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
+   },
+};
+
+static GLuint radeon_tfactor_color[] =
+{
+   RADEON_COLOR_ARG_A_TFACTOR_COLOR,
+   RADEON_COLOR_ARG_A_TFACTOR_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_TFACTOR_ALPHA,
+   RADEON_COLOR_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_primary_color[] =
+{
+   RADEON_COLOR_ARG_A_DIFFUSE_COLOR,
+   RADEON_COLOR_ARG_A_DIFFUSE_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_DIFFUSE_ALPHA,
+   RADEON_COLOR_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_previous_color[] =
+{
+   RADEON_COLOR_ARG_A_CURRENT_COLOR,
+   RADEON_COLOR_ARG_A_CURRENT_COLOR | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_CURRENT_ALPHA,
+   RADEON_COLOR_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-3
+ * GL_ONE  table - indices 1-4
+ */
+static GLuint radeon_zero_color[] =
+{
+   RADEON_COLOR_ARG_A_ZERO,
+   RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_ZERO,
+   RADEON_COLOR_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_COLOR_ARG_A_ZERO
+};
+
+
+/* The alpha tables only have GL_SRC_ALPHA and GL_ONE_MINUS_SRC_ALPHA.
+ */
+static GLuint radeon_texture_alpha[][RADEON_MAX_TEXTURE_UNITS] =
+{
+   {
+      RADEON_ALPHA_ARG_A_T0_ALPHA,
+      RADEON_ALPHA_ARG_A_T1_ALPHA,
+      RADEON_ALPHA_ARG_A_T2_ALPHA
+   },
+   {
+      RADEON_ALPHA_ARG_A_T0_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_ALPHA_ARG_A_T1_ALPHA | RADEON_COMP_ARG_A,
+      RADEON_ALPHA_ARG_A_T2_ALPHA | RADEON_COMP_ARG_A
+   },
+};
+
+static GLuint radeon_tfactor_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_TFACTOR_ALPHA,
+   RADEON_ALPHA_ARG_A_TFACTOR_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_primary_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA,
+   RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA | RADEON_COMP_ARG_A
+};
+
+static GLuint radeon_previous_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_CURRENT_ALPHA,
+   RADEON_ALPHA_ARG_A_CURRENT_ALPHA | RADEON_COMP_ARG_A
+};
+
+/* GL_ZERO table - indices 0-1
+ * GL_ONE  table - indices 1-2
+ */
+static GLuint radeon_zero_alpha[] =
+{
+   RADEON_ALPHA_ARG_A_ZERO,
+   RADEON_ALPHA_ARG_A_ZERO | RADEON_COMP_ARG_A,
+   RADEON_ALPHA_ARG_A_ZERO
+};
+
+
+/* Extract the arg from slot A, shift it into the correct argument slot
+ * and set the corresponding complement bit.
+ */
+#define RADEON_COLOR_ARG( n, arg )			\
+do {							\
+   color_combine |=					\
+      ((color_arg[n] & RADEON_COLOR_ARG_MASK)		\
+       << RADEON_COLOR_ARG_##arg##_SHIFT);		\
+   color_combine |=					\
+      ((color_arg[n] >> RADEON_COMP_ARG_SHIFT)		\
+       << RADEON_COMP_ARG_##arg##_SHIFT);		\
+} while (0)
+
+#define RADEON_ALPHA_ARG( n, arg )			\
+do {							\
+   alpha_combine |=					\
+      ((alpha_arg[n] & RADEON_ALPHA_ARG_MASK)		\
+       << RADEON_ALPHA_ARG_##arg##_SHIFT);		\
+   alpha_combine |=					\
+      ((alpha_arg[n] >> RADEON_COMP_ARG_SHIFT)		\
+       << RADEON_COMP_ARG_##arg##_SHIFT);		\
+} while (0)
+
+
+/* ================================================================
+ * Texture unit state management
+ */
+
+static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint color_combine, alpha_combine;
+   const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO
+         | RADEON_COLOR_ARG_C_CURRENT_COLOR | RADEON_BLEND_CTL_ADD
+         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+   const GLuint alpha_combine0 = RADEON_ALPHA_ARG_A_ZERO | RADEON_ALPHA_ARG_B_ZERO
+         | RADEON_ALPHA_ARG_C_CURRENT_ALPHA | RADEON_BLEND_CTL_ADD
+         | RADEON_SCALE_1X | RADEON_CLAMP_TX;
+
+
+   /* texUnit->_Current can be NULL if and only if the texture unit is
+    * not actually enabled.
+    */
+   assert( (texUnit->_ReallyEnabled == 0)
+	   || (texUnit->_Current != NULL) );
+
+   if ( RADEON_DEBUG & RADEON_TEXTURE ) {
+      fprintf( stderr, "%s( %p, %d )\n", __FUNCTION__, (void *)ctx, unit );
+   }
+
+   /* Set the texture environment state.  Isn't this nice and clean?
+    * The chip will automagically set the texture alpha to 0xff when
+    * the texture format does not include an alpha component. This
+    * reduces the amount of special-casing we have to do, alpha-only
+    * textures being a notable exception. Doesn't work for luminance
+    * textures realized with I8 and ALPHA_IN_MAP not set neither (on r100).
+    */
+    /* Don't cache these results.
+    */
+   rmesa->state.texture.unit[unit].format = 0;
+   rmesa->state.texture.unit[unit].envMode = 0;
+
+   if ( !texUnit->_ReallyEnabled ) {
+      color_combine = color_combine0;
+      alpha_combine = alpha_combine0;
+   }
+   else {
+      GLuint color_arg[3], alpha_arg[3];
+      GLuint i;
+      const GLuint numColorArgs = texUnit->_CurrentCombine->_NumArgsRGB;
+      const GLuint numAlphaArgs = texUnit->_CurrentCombine->_NumArgsA;
+      GLuint RGBshift = texUnit->_CurrentCombine->ScaleShiftRGB;
+      GLuint Ashift = texUnit->_CurrentCombine->ScaleShiftA;
+
+
+      /* Step 1:
+       * Extract the color and alpha combine function arguments.
+       */
+      for ( i = 0 ; i < numColorArgs ; i++ ) {
+	 const GLint op = texUnit->_CurrentCombine->OperandRGB[i] - GL_SRC_COLOR;
+	 const GLuint srcRGBi = texUnit->_CurrentCombine->SourceRGB[i];
+	 assert(op >= 0);
+	 assert(op <= 3);
+	 switch ( srcRGBi ) {
+	 case GL_TEXTURE:
+	    if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_ALPHA)
+	       color_arg[i] = radeon_zero_color[op];
+	    else
+	       color_arg[i] = radeon_texture_color[op][unit];
+	    break;
+	 case GL_CONSTANT:
+	    color_arg[i] = radeon_tfactor_color[op];
+	    break;
+	 case GL_PRIMARY_COLOR:
+	    color_arg[i] = radeon_primary_color[op];
+	    break;
+	 case GL_PREVIOUS:
+	    color_arg[i] = radeon_previous_color[op];
+	    break;
+	 case GL_ZERO:
+	    color_arg[i] = radeon_zero_color[op];
+	    break;
+	 case GL_ONE:
+	    color_arg[i] = radeon_zero_color[op+1];
+	    break;
+	 case GL_TEXTURE0:
+	 case GL_TEXTURE1:
+	 case GL_TEXTURE2: {
+	    GLuint txunit = srcRGBi - GL_TEXTURE0;
+	    if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_ALPHA)
+	       color_arg[i] = radeon_zero_color[op];
+	    else
+	 /* implement ogl 1.4/1.5 core spec here, not specification of
+	  * GL_ARB_texture_env_crossbar (which would require disabling blending
+	  * instead of undefined results when referencing not enabled texunit) */
+	      color_arg[i] = radeon_texture_color[op][txunit];
+	    }
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+      }
+
+      for ( i = 0 ; i < numAlphaArgs ; i++ ) {
+	 const GLint op = texUnit->_CurrentCombine->OperandA[i] - GL_SRC_ALPHA;
+	 const GLuint srcAi = texUnit->_CurrentCombine->SourceA[i];
+	 assert(op >= 0);
+	 assert(op <= 1);
+	 switch ( srcAi ) {
+	 case GL_TEXTURE:
+	    if (texUnit->_Current->Image[0][0]->_BaseFormat == GL_LUMINANCE)
+	       alpha_arg[i] = radeon_zero_alpha[op+1];
+	    else
+	       alpha_arg[i] = radeon_texture_alpha[op][unit];
+	    break;
+	 case GL_CONSTANT:
+	    alpha_arg[i] = radeon_tfactor_alpha[op];
+	    break;
+	 case GL_PRIMARY_COLOR:
+	    alpha_arg[i] = radeon_primary_alpha[op];
+	    break;
+	 case GL_PREVIOUS:
+	    alpha_arg[i] = radeon_previous_alpha[op];
+	    break;
+	 case GL_ZERO:
+	    alpha_arg[i] = radeon_zero_alpha[op];
+	    break;
+	 case GL_ONE:
+	    alpha_arg[i] = radeon_zero_alpha[op+1];
+	    break;
+	 case GL_TEXTURE0:
+	 case GL_TEXTURE1:
+	 case GL_TEXTURE2: {    
+	    GLuint txunit = srcAi - GL_TEXTURE0;
+	    if (ctx->Texture.Unit[txunit]._Current->Image[0][0]->_BaseFormat == GL_LUMINANCE)
+	       alpha_arg[i] = radeon_zero_alpha[op+1];
+	    else
+	       alpha_arg[i] = radeon_texture_alpha[op][txunit];
+	    }
+	    break;
+	 default:
+	    return GL_FALSE;
+	 }
+      }
+
+      /* Step 2:
+       * Build up the color and alpha combine functions.
+       */
+      switch ( texUnit->_CurrentCombine->ModeRGB ) {
+      case GL_REPLACE:
+	 color_combine = (RADEON_COLOR_ARG_A_ZERO |
+			  RADEON_COLOR_ARG_B_ZERO |
+			  RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, C );
+	 break;
+      case GL_MODULATE:
+	 color_combine = (RADEON_COLOR_ARG_C_ZERO |
+			  RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, B );
+	 break;
+      case GL_ADD:
+	 color_combine = (RADEON_COLOR_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, C );
+	 break;
+      case GL_ADD_SIGNED:
+	 color_combine = (RADEON_COLOR_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_ADDSIGNED |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, C );
+	 break;
+      case GL_SUBTRACT:
+	 color_combine = (RADEON_COLOR_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_SUBTRACT |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, C );
+	 break;
+      case GL_INTERPOLATE:
+	 color_combine = (RADEON_BLEND_CTL_BLEND |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, B );
+	 RADEON_COLOR_ARG( 1, A );
+	 RADEON_COLOR_ARG( 2, C );
+	 break;
+
+      case GL_DOT3_RGB_EXT:
+      case GL_DOT3_RGBA_EXT:
+	 /* The EXT version of the DOT3 extension does not support the
+	  * scale factor, but the ARB version (and the version in OpenGL
+	  * 1.3) does.
+	  */
+	 RGBshift = 0;
+	 /* FALLTHROUGH */
+
+      case GL_DOT3_RGB:
+      case GL_DOT3_RGBA:
+	 /* The R100 / RV200 only support a 1X multiplier in hardware
+	  * w/the ARB version.
+	  */
+	 if ( RGBshift != (RADEON_SCALE_1X >> RADEON_SCALE_SHIFT) ) {
+	    return GL_FALSE;
+	 }
+
+	 RGBshift += 2;
+	 if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA_EXT)
+	    || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGBA) ) {
+            /* is it necessary to set this or will it be ignored anyway? */
+	    Ashift = RGBshift;
+	 }
+
+	 color_combine = (RADEON_COLOR_ARG_C_ZERO |
+			  RADEON_BLEND_CTL_DOT3 |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, B );
+	 break;
+
+      case GL_MODULATE_ADD_ATI:
+	 color_combine = (RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, C );
+	 RADEON_COLOR_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+	 color_combine = (RADEON_BLEND_CTL_ADDSIGNED |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, C );
+	 RADEON_COLOR_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SUBTRACT_ATI:
+	 color_combine = (RADEON_BLEND_CTL_SUBTRACT |
+			  RADEON_CLAMP_TX);
+	 RADEON_COLOR_ARG( 0, A );
+	 RADEON_COLOR_ARG( 1, C );
+	 RADEON_COLOR_ARG( 2, B );
+	 break;
+      default:
+	 return GL_FALSE;
+      }
+
+      switch ( texUnit->_CurrentCombine->ModeA ) {
+      case GL_REPLACE:
+	 alpha_combine = (RADEON_ALPHA_ARG_A_ZERO |
+			  RADEON_ALPHA_ARG_B_ZERO |
+			  RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, C );
+	 break;
+      case GL_MODULATE:
+	 alpha_combine = (RADEON_ALPHA_ARG_C_ZERO |
+			  RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, B );
+	 break;
+      case GL_ADD:
+	 alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, C );
+	 break;
+      case GL_ADD_SIGNED:
+	 alpha_combine = (RADEON_ALPHA_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_ADDSIGNED |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, C );
+	 break;
+      case GL_SUBTRACT:
+	 alpha_combine = (RADEON_COLOR_ARG_B_ZERO |
+			  RADEON_COMP_ARG_B |
+			  RADEON_BLEND_CTL_SUBTRACT |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, C );
+	 break;
+      case GL_INTERPOLATE:
+	 alpha_combine = (RADEON_BLEND_CTL_BLEND |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, B );
+	 RADEON_ALPHA_ARG( 1, A );
+	 RADEON_ALPHA_ARG( 2, C );
+	 break;
+
+      case GL_MODULATE_ADD_ATI:
+	 alpha_combine = (RADEON_BLEND_CTL_ADD |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, C );
+	 RADEON_ALPHA_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SIGNED_ADD_ATI:
+	 alpha_combine = (RADEON_BLEND_CTL_ADDSIGNED |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, C );
+	 RADEON_ALPHA_ARG( 2, B );
+	 break;
+      case GL_MODULATE_SUBTRACT_ATI:
+	 alpha_combine = (RADEON_BLEND_CTL_SUBTRACT |
+			  RADEON_CLAMP_TX);
+	 RADEON_ALPHA_ARG( 0, A );
+	 RADEON_ALPHA_ARG( 1, C );
+	 RADEON_ALPHA_ARG( 2, B );
+	 break;
+      default:
+	 return GL_FALSE;
+      }
+
+      if ( (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB_EXT)
+	   || (texUnit->_CurrentCombine->ModeRGB == GL_DOT3_RGB) ) {
+	 alpha_combine |= RADEON_DOT_ALPHA_DONT_REPLICATE;
+      }
+
+      /* Step 3:
+       * Apply the scale factor.
+       */
+      color_combine |= (RGBshift << RADEON_SCALE_SHIFT);
+      alpha_combine |= (Ashift   << RADEON_SCALE_SHIFT);
+
+      /* All done!
+       */
+   }
+
+   if ( rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] != color_combine ||
+	rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] != alpha_combine ) {
+      RADEON_STATECHANGE( rmesa, tex[unit] );
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXCBLEND] = color_combine;
+      rmesa->hw.tex[unit].cmd[TEX_PP_TXABLEND] = alpha_combine;
+   }
+
+   return GL_TRUE;
+}
+
+void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+                        unsigned long long offset, GLint depth, GLuint pitch)
+{
+	r100ContextPtr rmesa = pDRICtx->driverPrivate;
+	struct gl_texture_object *tObj =
+	    _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+	radeonTexObjPtr t = radeon_tex_obj(tObj);
+
+	if (tObj == NULL)
+		return;
+
+	t->image_override = GL_TRUE;
+
+	if (!offset)
+		return;
+	
+	t->bo = NULL;
+	t->override_offset = offset;
+	t->pp_txpitch = pitch - 32;
+
+	switch (depth) {
+	case 32:
+		t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter;
+		break;
+	case 24:
+	default:
+		t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter;
+		break;
+	case 16:
+		t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
+		break;
+	}
+}
+
+void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format,
+			 __DRIdrawable *dPriv)
+{
+	struct gl_texture_unit *texUnit;
+	struct gl_texture_object *texObj;
+	struct gl_texture_image *texImage;
+	struct radeon_renderbuffer *rb;
+	radeon_texture_image *rImage;
+	radeonContextPtr radeon;
+	r100ContextPtr rmesa;
+	struct radeon_framebuffer *rfb;
+	radeonTexObjPtr t;
+	uint32_t pitch_val;
+	uint32_t internalFormat, type, format;
+
+	type = GL_BGRA;
+	format = GL_UNSIGNED_BYTE;
+	internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+
+	radeon = pDRICtx->driverPrivate;
+	rmesa = pDRICtx->driverPrivate;
+
+	rfb = dPriv->driverPrivate;
+        texUnit = _mesa_get_current_tex_unit(radeon->glCtx);
+	texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+        texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+	rImage = get_radeon_texture_image(texImage);
+	t = radeon_tex_obj(texObj);
+        if (t == NULL) {
+    	    return;
+    	}
+
+	radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+	rb = rfb->color_rb[0];
+	if (rb->bo == NULL) {
+		/* Failed to BO for the buffer */
+		return;
+	}
+
+	_mesa_lock_texture(radeon->glCtx, texObj);
+	if (t->bo) {
+		radeon_bo_unref(t->bo);
+		t->bo = NULL;
+	}
+	if (rImage->bo) {
+		radeon_bo_unref(rImage->bo);
+		rImage->bo = NULL;
+	}
+
+	radeon_miptree_unreference(&t->mt);
+	radeon_miptree_unreference(&rImage->mt);
+
+	_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+				   rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+	texImage->RowStride = rb->pitch / rb->cpp;
+
+	rImage->bo = rb->bo;
+	radeon_bo_ref(rImage->bo);
+	t->bo = rb->bo;
+	radeon_bo_ref(t->bo);
+	t->tile_bits = 0;
+	t->image_override = GL_TRUE;
+	t->override_offset = 0;
+	switch (rb->cpp) {
+	case 4:
+		if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+			t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+		else
+			t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter;
+		break;
+	case 3:
+	default:
+		t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter;
+		break;
+	case 2:
+		t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format;
+		t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter;
+		break;
+	}
+
+	t->pp_txpitch &= (1 << 13) -1;
+	pitch_val = rb->pitch;
+
+        t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT)
+		| ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT);
+	if (target == GL_TEXTURE_RECTANGLE_NV) {
+		t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+		t->pp_txpitch = pitch_val;
+		t->pp_txpitch -= 32;
+	}
+	t->validated = GL_TRUE;
+	_mesa_unlock_texture(radeon->glCtx, texObj);
+	return;
+}
+
+
+void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+        radeonSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+
+
+#define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK |	\
+			      RADEON_MIN_FILTER_MASK | 		\
+			      RADEON_MAG_FILTER_MASK |		\
+			      RADEON_MAX_ANISO_MASK |		\
+			      RADEON_YUV_TO_RGB |		\
+			      RADEON_YUV_TEMPERATURE_MASK |	\
+			      RADEON_CLAMP_S_MASK | 		\
+			      RADEON_CLAMP_T_MASK | 		\
+			      RADEON_BORDER_MODE_D3D )
+
+#define TEXOBJ_TXFORMAT_MASK (RADEON_TXFORMAT_WIDTH_MASK |	\
+			      RADEON_TXFORMAT_HEIGHT_MASK |	\
+			      RADEON_TXFORMAT_FORMAT_MASK |	\
+                              RADEON_TXFORMAT_F5_WIDTH_MASK |	\
+                              RADEON_TXFORMAT_F5_HEIGHT_MASK |	\
+			      RADEON_TXFORMAT_ALPHA_IN_MAP |	\
+			      RADEON_TXFORMAT_CUBIC_MAP_ENABLE |	\
+                              RADEON_TXFORMAT_NON_POWER2)
+
+
+static void disable_tex_obj_state( r100ContextPtr rmesa, 
+				   int unit )
+{
+   RADEON_STATECHANGE( rmesa, tex[unit] );
+
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) |
+					     RADEON_Q_BIT(unit));
+   
+   if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) {
+     TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE);
+     rmesa->recheck_texgen[unit] = GL_TRUE;
+   }
+
+   if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) {
+     /* this seems to be a genuine (r100 only?) hw bug. Need to remove the
+	cubic_map bit on unit 2 when the unit is disabled, otherwise every
+	2nd (2d) mipmap on unit 0 will be broken (may not be needed for other
+	units, better be safe than sorry though).*/
+     RADEON_STATECHANGE( rmesa, tex[unit] );
+     rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE;
+   }
+
+   {
+      GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+      GLuint tmp = rmesa->TexGenEnabled;
+
+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit);
+      rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift);
+      rmesa->TexGenNeedNormals[unit] = 0;
+      rmesa->TexGenEnabled |= 
+	(RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift;
+
+      if (tmp != rmesa->TexGenEnabled) {
+	rmesa->recheck_texgen[unit] = GL_TRUE;
+	rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+      }
+   }
+}
+
+static void import_tex_obj_state( r100ContextPtr rmesa,
+				  int unit,
+				  radeonTexObjPtr texobj )
+{
+/* do not use RADEON_DB_STATE to avoid stale texture caches */
+   uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0];
+   GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT];
+
+   RADEON_STATECHANGE( rmesa, tex[unit] );
+
+   cmd[TEX_PP_TXFILTER] &= ~TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK;
+   cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK;
+   cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color;
+
+   if (texobj->pp_txformat & RADEON_TXFORMAT_NON_POWER2) {
+      uint32_t *txr_cmd = &rmesa->hw.txr[unit].cmd[TXR_CMD_0];
+      txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */
+      txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */
+      RADEON_STATECHANGE( rmesa, txr[unit] );
+   }
+
+   if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      se_coord_fmt |= RADEON_VTX_ST0_NONPARAMETRIC << unit;
+   }
+   else {
+      se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit);
+
+      if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) {
+	 uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0];
+
+	 RADEON_STATECHANGE( rmesa, cube[unit] );
+	 cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces;
+	 /* state filled out in the cube_emit */
+      }
+   }
+
+   if (se_coord_fmt != rmesa->hw.set.cmd[SET_SE_COORDFMT]) {
+      RADEON_STATECHANGE( rmesa, set );
+      rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt;
+   }
+
+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+}
+
+
+static void set_texgen_matrix( r100ContextPtr rmesa, 
+			       GLuint unit,
+			       const GLfloat *s_plane,
+			       const GLfloat *t_plane,
+			       const GLfloat *r_plane,
+			       const GLfloat *q_plane )
+{
+   rmesa->TexGenMatrix[unit].m[0]  = s_plane[0];
+   rmesa->TexGenMatrix[unit].m[4]  = s_plane[1];
+   rmesa->TexGenMatrix[unit].m[8]  = s_plane[2];
+   rmesa->TexGenMatrix[unit].m[12] = s_plane[3];
+
+   rmesa->TexGenMatrix[unit].m[1]  = t_plane[0];
+   rmesa->TexGenMatrix[unit].m[5]  = t_plane[1];
+   rmesa->TexGenMatrix[unit].m[9]  = t_plane[2];
+   rmesa->TexGenMatrix[unit].m[13] = t_plane[3];
+
+   rmesa->TexGenMatrix[unit].m[2]  = r_plane[0];
+   rmesa->TexGenMatrix[unit].m[6]  = r_plane[1];
+   rmesa->TexGenMatrix[unit].m[10] = r_plane[2];
+   rmesa->TexGenMatrix[unit].m[14] = r_plane[3];
+
+   rmesa->TexGenMatrix[unit].m[3]  = q_plane[0];
+   rmesa->TexGenMatrix[unit].m[7]  = q_plane[1];
+   rmesa->TexGenMatrix[unit].m[11] = q_plane[2];
+   rmesa->TexGenMatrix[unit].m[15] = q_plane[3];
+
+   rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit;
+   rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+}
+
+/* Returns GL_FALSE if fallback required.
+ */
+static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4;
+   GLuint tmp = rmesa->TexGenEnabled;
+   static const GLfloat reflect[16] = {
+      -1,  0,  0,  0,
+       0, -1,  0,  0,
+       0,  0,  -1, 0,
+       0,  0,  0,  1 };
+
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE << unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE << unit);
+   rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK << inputshift);
+   rmesa->TexGenNeedNormals[unit] = 0;
+
+   if ((texUnit->TexGenEnabled & (S_BIT|T_BIT|R_BIT|Q_BIT)) == 0) {
+      /* Disabled, no fallback:
+       */
+      rmesa->TexGenEnabled |=
+	 (RADEON_TEXGEN_INPUT_TEXCOORD_0 + unit) << inputshift;
+      return GL_TRUE;
+   }
+   /* the r100 cannot do texgen for some coords and not for others
+    * we do not detect such cases (certainly can't do it here) and just
+    * ASSUME that when S and T are texgen enabled we do not need other
+    * non-texgen enabled coords, no matter if the R and Q bits are texgen
+    * enabled. Still check for mixed mode texgen for all coords.
+    */
+   else if ( (texUnit->TexGenEnabled & S_BIT) &&
+	     (texUnit->TexGenEnabled & T_BIT) &&
+	     (texUnit->GenS.Mode == texUnit->GenT.Mode) ) {
+      if ( ((texUnit->TexGenEnabled & R_BIT) &&
+	    (texUnit->GenS.Mode != texUnit->GenR.Mode)) ||
+	   ((texUnit->TexGenEnabled & Q_BIT) &&
+	    (texUnit->GenS.Mode != texUnit->GenQ.Mode)) ) {
+	 /* Mixed modes, fallback:
+	  */
+	 if (RADEON_DEBUG & RADEON_FALLBACKS)
+	    fprintf(stderr, "fallback mixed texgen\n");
+	 return GL_FALSE;
+      }
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_TEXMAT_0_ENABLE << unit;
+   }
+   else {
+   /* some texgen mode not including both S and T bits */
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "fallback mixed texgen/nontexgen\n");
+      return GL_FALSE;
+   }
+
+   if ((texUnit->TexGenEnabled & (R_BIT | Q_BIT)) != 0) {
+      /* need this here for vtxfmt presumably. Argh we need to set
+         this from way too many places, would be much easier if we could leave
+         tcl q coord always enabled as on r200) */
+      RADEON_STATECHANGE( rmesa, tcl );
+      rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_Q_BIT(unit);
+   }
+
+   switch (texUnit->GenS.Mode) {
+   case GL_OBJECT_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_OBJ << inputshift;
+      set_texgen_matrix( rmesa, unit,
+			 texUnit->GenS.ObjectPlane,
+			 texUnit->GenT.ObjectPlane,
+			 texUnit->GenR.ObjectPlane,
+			 texUnit->GenQ.ObjectPlane);
+      break;
+
+   case GL_EYE_LINEAR:
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE << inputshift;
+      set_texgen_matrix( rmesa, unit,
+			 texUnit->GenS.EyePlane,
+			 texUnit->GenT.EyePlane,
+			 texUnit->GenR.EyePlane,
+			 texUnit->GenQ.EyePlane);
+      break;
+
+   case GL_REFLECTION_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_REFLECT << inputshift;
+      /* TODO: unknown if this is needed/correct */
+      set_texgen_matrix( rmesa, unit, reflect, reflect + 4,
+			reflect + 8, reflect + 12 );
+      break;
+
+   case GL_NORMAL_MAP_NV:
+      rmesa->TexGenNeedNormals[unit] = GL_TRUE;
+      rmesa->TexGenEnabled |= RADEON_TEXGEN_INPUT_EYE_NORMAL << inputshift;
+      break;
+
+   case GL_SPHERE_MAP:
+      /* the mode which everyone uses :-( */
+   default:
+      /* Unsupported mode, fallback:
+       */
+      if (RADEON_DEBUG & RADEON_FALLBACKS)
+	 fprintf(stderr, "fallback GL_SPHERE_MAP\n");
+      return GL_FALSE;
+   }
+
+   if (tmp != rmesa->TexGenEnabled) {
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+   return GL_TRUE;
+}
+
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit)
+{
+   const struct gl_texture_image *firstImage;
+   GLint log2Width, log2Height, log2Depth, texelBytes;
+
+   if ( t->bo ) {
+	return GL_TRUE;
+   }
+
+   firstImage = t->base.Image[0][t->minLod];
+
+   if (firstImage->Border > 0) {
+      fprintf(stderr, "%s: border\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   log2Width  = firstImage->WidthLog2;
+   log2Height = firstImage->HeightLog2;
+   log2Depth  = firstImage->DepthLog2;
+   texelBytes = _mesa_get_format_bytes(firstImage->TexFormat);
+
+   if (!t->image_override) {
+      if (VALID_FORMAT(firstImage->TexFormat)) {
+	const struct tx_table *table = tx_table;
+
+	 t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK |
+			     RADEON_TXFORMAT_ALPHA_IN_MAP);
+	 t->pp_txfilter &= ~RADEON_YUV_TO_RGB;	 
+	 
+	 t->pp_txformat |= table[ firstImage->TexFormat ].format;
+	 t->pp_txfilter |= table[ firstImage->TexFormat ].filter;
+      } else {
+	 _mesa_problem(NULL, "unexpected texture format in %s",
+		       __FUNCTION__);
+	 return GL_FALSE;
+      }
+   }
+
+   t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK;
+   t->pp_txfilter |= (t->maxLod - t->minLod) << RADEON_MAX_MIP_LEVEL_SHIFT;
+	
+   t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK |
+		       RADEON_TXFORMAT_HEIGHT_MASK |
+		       RADEON_TXFORMAT_CUBIC_MAP_ENABLE |
+		       RADEON_TXFORMAT_F5_WIDTH_MASK |
+		       RADEON_TXFORMAT_F5_HEIGHT_MASK);
+   t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) |
+		      (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT));
+
+   t->tile_bits = 0;
+
+   if (t->base.Target == GL_TEXTURE_CUBE_MAP) {
+      ASSERT(log2Width == log2Height);
+      t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) |
+			 (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) |
+			 /* don't think we need this bit, if it exists at all - fglrx does not set it */
+			 (RADEON_TXFORMAT_CUBIC_MAP_ENABLE));
+      t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_2_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_3_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) |
+                           (log2Width << RADEON_FACE_WIDTH_4_SHIFT) |
+                           (log2Height << RADEON_FACE_HEIGHT_4_SHIFT));
+   }
+
+   t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT)
+		   | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT));
+
+   if ( !t->image_override ) {
+      if (_mesa_is_format_compressed(firstImage->TexFormat))
+         t->pp_txpitch = (firstImage->Width + 63) & ~(63);
+      else
+         t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63);
+      t->pp_txpitch -= 32;
+   }
+
+   if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+      t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2;
+   }
+
+   return GL_TRUE;
+}
+
+static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit)
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   radeonTexObj *t = radeon_tex_obj(texObj);
+   int ret;
+
+   if (!radeon_validate_texture_miptree(ctx, texObj))
+      return GL_FALSE;
+
+   ret = setup_hardware_state(rmesa, t, unit);
+   if (ret == GL_FALSE)
+     return GL_FALSE;
+
+   /* yuv conversion only works in first unit */
+   if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB))
+      return GL_FALSE;
+
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= 
+     (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit;
+   RADEON_STATECHANGE( rmesa, tcl );
+   rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit);
+
+   rmesa->recheck_texgen[unit] = GL_TRUE;
+
+   import_tex_obj_state( rmesa, unit, t );
+
+   if (rmesa->recheck_texgen[unit]) {
+      GLboolean fallback = !radeon_validate_texgen( ctx, unit );
+      TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback);
+      rmesa->recheck_texgen[unit] = 0;
+      rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX;
+   }
+
+   if ( ! radeonUpdateTextureEnv( ctx, unit ) ) {
+     return GL_FALSE;
+   }
+   FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback );
+
+   t->validated = GL_TRUE;
+   return !t->border_fallback;
+}
+
+static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+
+   if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) {
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_FALSE;
+   }
+
+   if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
+     /* disable the unit */
+     disable_tex_obj_state(rmesa, unit);
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_TRUE;
+   }
+
+   if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) {
+    _mesa_warning(ctx,
+		  "failed to validate texture for unit %d.\n",
+		  unit);
+     rmesa->state.texture.unit[unit].texobj = NULL;
+     return GL_FALSE;
+   }
+   rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+   return GL_TRUE;
+}
+
+void radeonUpdateTextureState( GLcontext *ctx )
+{
+   r100ContextPtr rmesa = R100_CONTEXT(ctx);
+   GLboolean ok;
+
+   /* set the ctx all textures off */
+   RADEON_STATECHANGE( rmesa, ctx );
+   rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK));
+
+   ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
+	 radeonUpdateTextureUnit( ctx, 1 ) &&
+	 radeonUpdateTextureUnit( ctx, 2 ));
+
+   FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
+
+   if (rmesa->radeon.TclFallback)
+      radeonChooseVertexState( ctx );
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
new file mode 100644
index 0000000000..d2b190e42e
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -0,0 +1,1029 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora.
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/convolve.h"
+#include "main/enums.h"
+#include "main/mipmap.h"
+#include "main/texcompress.h"
+#include "main/texstore.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "drivers/common/meta.h"
+
+#include "xmlpool.h"		/* for symbolic values of enum-type options */
+
+#include "radeon_common.h"
+
+#include "radeon_mipmap_tree.h"
+
+
+void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+	GLuint numrows, GLuint rowsize)
+{
+	assert(rowsize <= dststride);
+	assert(rowsize <= srcstride);
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s dst %p, stride %u, src %p, stride %u, "
+		"numrows %u, rowsize %u.\n",
+		__func__, dst, dststride,
+		src, srcstride,
+		numrows, rowsize);
+
+	if (rowsize == srcstride && rowsize == dststride) {
+		memcpy(dst, src, numrows*rowsize);
+	} else {
+		GLuint i;
+		for(i = 0; i < numrows; ++i) {
+			memcpy(dst, src, rowsize);
+			dst += dststride;
+			src += srcstride;
+		}
+	}
+}
+
+/* textures */
+/**
+ * Allocate an empty texture image object.
+ */
+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx)
+{
+	return CALLOC(sizeof(radeon_texture_image));
+}
+
+/**
+ * Free memory associated with this texture image.
+ */
+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage)
+{
+	radeon_texture_image* image = get_radeon_texture_image(timage);
+
+	if (image->mt) {
+		radeon_miptree_unreference(&image->mt);
+		assert(!image->base.Data);
+	} else {
+		_mesa_free_texture_image_data(ctx, timage);
+	}
+	if (image->bo) {
+		radeon_bo_unref(image->bo);
+		image->bo = NULL;
+	}
+	if (timage->Data) {
+		_mesa_free_texmemory(timage->Data);
+		timage->Data = NULL;
+	}
+}
+
+/* Set Data pointer and additional data for mapped texture image */
+static void teximage_set_map_data(radeon_texture_image *image)
+{
+	radeon_mipmap_level *lvl;
+
+	if (!image->mt) {
+		radeon_warning("%s(%p) Trying to set map data without miptree.\n",
+				__func__, image);
+
+		return;
+	}
+
+	lvl = &image->mt->levels[image->mtlevel];
+
+	image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset;
+	image->base.RowStride = lvl->rowstride / _mesa_get_format_bytes(image->base.TexFormat);
+}
+
+
+/**
+ * Map a single texture image for glTexImage and friends.
+ */
+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable)
+{
+	radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s(img %p), write_enable %s.\n",
+			__func__, image,
+			write_enable ? "true": "false");
+	if (image->mt) {
+		assert(!image->base.Data);
+
+		radeon_bo_map(image->mt->bo, write_enable);
+		teximage_set_map_data(image);
+	}
+}
+
+
+void radeon_teximage_unmap(radeon_texture_image *image)
+{
+	radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s(img %p)\n",
+			__func__, image);
+	if (image->mt) {
+		assert(image->base.Data);
+
+		image->base.Data = 0;
+		radeon_bo_unmap(image->mt->bo);
+	}
+}
+
+static void map_override(GLcontext *ctx, radeonTexObj *t)
+{
+	radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
+
+	radeon_bo_map(t->bo, GL_FALSE);
+
+	img->base.Data = t->bo->ptr;
+}
+
+static void unmap_override(GLcontext *ctx, radeonTexObj *t)
+{
+	radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]);
+
+	radeon_bo_unmap(t->bo);
+
+	img->base.Data = NULL;
+}
+
+/**
+ * Map a validated texture for reading during software rendering.
+ */
+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	int face, level;
+
+	radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s(%p, tex %p)\n",
+			__func__, ctx, texObj);
+
+	if (!radeon_validate_texture_miptree(ctx, texObj)) {
+		radeon_error("%s(%p, tex %p) Failed to validate miptree for "
+			"sw fallback.\n",
+			__func__, ctx, texObj);
+		return;
+	}
+
+	if (t->image_override && t->bo) {
+		radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s(%p, tex %p) Work around for missing miptree in r100.\n",
+			__func__, ctx, texObj);
+
+		map_override(ctx, t);
+	}
+
+	/* for r100 3D sw fallbacks don't have mt */
+	if (!t->mt) {
+		radeon_warning("%s(%p, tex %p) No miptree in texture.\n",
+			__func__, ctx, texObj);
+		return;
+	}
+
+	radeon_bo_map(t->mt->bo, GL_FALSE);
+	for(face = 0; face < t->mt->faces; ++face) {
+		for(level = t->minLod; level <= t->maxLod; ++level)
+			teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level]));
+	}
+}
+
+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	int face, level;
+
+	radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s(%p, tex %p)\n",
+			__func__, ctx, texObj);
+
+	if (t->image_override && t->bo)
+		unmap_override(ctx, t);
+	/* for r100 3D sw fallbacks don't have mt */
+	if (!t->mt)
+	  return;
+
+	for(face = 0; face < t->mt->faces; ++face) {
+		for(level = t->minLod; level <= t->maxLod; ++level)
+			texObj->Image[face][level]->Data = 0;
+	}
+	radeon_bo_unmap(t->mt->bo);
+}
+
+/**
+ * Wraps Mesa's implementation to ensure that the base level image is mapped.
+ *
+ * This relies on internal details of _mesa_generate_mipmap, in particular
+ * the fact that the memory for recreated texture images is always freed.
+ */
+static void radeon_generate_mipmap(GLcontext *ctx, GLenum target,
+				   struct gl_texture_object *texObj)
+{
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
+	int i, face;
+
+	radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+			"%s(%p, tex %p) Target type %s.\n",
+			__func__, ctx, texObj,
+			_mesa_lookup_enum_by_nr(target));
+
+	_mesa_generate_mipmap(ctx, target, texObj);
+
+	for (face = 0; face < nr_faces; face++) {
+		for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) {
+			radeon_texture_image *image;
+
+			image = get_radeon_texture_image(texObj->Image[face][i]);
+
+			if (image == NULL)
+				break;
+
+			image->mtlevel = i;
+			image->mtface = face;
+
+			radeon_miptree_unreference(&image->mt);
+		}
+	}
+	
+}
+
+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	struct radeon_bo *bo;
+	GLuint face = _mesa_tex_target_to_face(target);
+	radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]);
+	bo = !baseimage->mt ? baseimage->bo : baseimage->mt->bo;
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s(%p, target %s, tex %p)\n",
+		__func__, ctx, _mesa_lookup_enum_by_nr(target),
+		texObj);
+
+	if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+		radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+			"%s(%p, tex %p) Trying to generate mipmap for texture "
+			"in processing by GPU.\n",
+			__func__, ctx, texObj);
+		radeon_firevertices(rmesa);
+	}
+
+	if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, texObj)) {
+		radeon_teximage_map(baseimage, GL_FALSE);
+		radeon_generate_mipmap(ctx, target, texObj);
+		radeon_teximage_unmap(baseimage);
+	} else {
+		_mesa_meta_GenerateMipmap(ctx, target, texObj);
+	}
+}
+
+
+/* try to find a format which will only need a memcopy */
+static gl_format radeonChoose8888TexFormat(radeonContextPtr rmesa,
+					   GLenum srcFormat,
+					   GLenum srcType, GLboolean fbo)
+{
+	const GLuint ui = 1;
+	const GLubyte littleEndian = *((const GLubyte *)&ui);
+
+	/* r100 can only do this */
+	if (IS_R100_CLASS(rmesa->radeonScreen) || fbo)
+	  return _dri_texformat_argb8888;
+
+	if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+	    (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+	    (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) {
+		return MESA_FORMAT_RGBA8888;
+	} else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) ||
+		   (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) ||
+		   (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) {
+		return MESA_FORMAT_RGBA8888_REV;
+	} else if (IS_R200_CLASS(rmesa->radeonScreen)) {
+		return _dri_texformat_argb8888;
+	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) ||
+					    srcType == GL_UNSIGNED_INT_8_8_8_8)) {
+		return MESA_FORMAT_ARGB8888_REV;
+	} else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) ||
+					    srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) {
+		return MESA_FORMAT_ARGB8888;
+	} else
+		return _dri_texformat_argb8888;
+}
+
+gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx,
+					 GLint internalFormat,
+					 GLenum format,
+					 GLenum type)
+{
+	return radeonChooseTextureFormat(ctx, internalFormat, format,
+					 type, 0);
+}
+
+gl_format radeonChooseTextureFormat(GLcontext * ctx,
+				    GLint internalFormat,
+				    GLenum format,
+				    GLenum type, GLboolean fbo)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	const GLboolean do32bpt =
+	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+	const GLboolean force16bpt =
+	    (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+	(void)format;
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+		"%s InternalFormat=%s(%d) type=%s format=%s\n",
+		__func__,
+		_mesa_lookup_enum_by_nr(internalFormat), internalFormat,
+		_mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format));
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+			"%s do32bpt=%d force16bpt=%d\n",
+			__func__, do32bpt, force16bpt);
+
+	switch (internalFormat) {
+	case 4:
+	case GL_RGBA:
+	case GL_COMPRESSED_RGBA:
+		switch (type) {
+		case GL_UNSIGNED_INT_10_10_10_2:
+		case GL_UNSIGNED_INT_2_10_10_10_REV:
+			return do32bpt ? _dri_texformat_argb8888 :
+			    _dri_texformat_argb1555;
+		case GL_UNSIGNED_SHORT_4_4_4_4:
+		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+			return _dri_texformat_argb4444;
+		case GL_UNSIGNED_SHORT_5_5_5_1:
+		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+			return _dri_texformat_argb1555;
+		default:
+			return do32bpt ? radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+			    _dri_texformat_argb4444;
+		}
+
+	case 3:
+	case GL_RGB:
+	case GL_COMPRESSED_RGB:
+		switch (type) {
+		case GL_UNSIGNED_SHORT_4_4_4_4:
+		case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+			return _dri_texformat_argb4444;
+		case GL_UNSIGNED_SHORT_5_5_5_1:
+		case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+			return _dri_texformat_argb1555;
+		case GL_UNSIGNED_SHORT_5_6_5:
+		case GL_UNSIGNED_SHORT_5_6_5_REV:
+			return _dri_texformat_rgb565;
+		default:
+			return do32bpt ? _dri_texformat_argb8888 :
+			    _dri_texformat_rgb565;
+		}
+
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return !force16bpt ?
+			radeonChoose8888TexFormat(rmesa, format, type, fbo) :
+			_dri_texformat_argb4444;
+
+	case GL_RGBA4:
+	case GL_RGBA2:
+		return _dri_texformat_argb4444;
+
+	case GL_RGB5_A1:
+		return _dri_texformat_argb1555;
+
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return !force16bpt ? _dri_texformat_argb8888 :
+		    _dri_texformat_rgb565;
+
+	case GL_RGB5:
+	case GL_RGB4:
+	case GL_R3_G3_B2:
+		return _dri_texformat_rgb565;
+
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+	case GL_COMPRESSED_ALPHA:
+		/* r200: can't use a8 format since interpreting hw I8 as a8 would result
+		   in wrong rgb values (same as alpha value instead of 0). */
+		if (IS_R200_CLASS(rmesa->radeonScreen))
+			return _dri_texformat_al88;
+		else
+			return _dri_texformat_a8;
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+	case GL_COMPRESSED_LUMINANCE:
+		return _dri_texformat_l8;
+
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+	case GL_COMPRESSED_LUMINANCE_ALPHA:
+		return _dri_texformat_al88;
+
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+	case GL_COMPRESSED_INTENSITY:
+		return _dri_texformat_i8;
+
+	case GL_YCBCR_MESA:
+		if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+		    type == GL_UNSIGNED_BYTE)
+			return MESA_FORMAT_YCBCR;
+		else
+			return MESA_FORMAT_YCBCR_REV;
+
+	case GL_RGB_S3TC:
+	case GL_RGB4_S3TC:
+	case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+		return MESA_FORMAT_RGB_DXT1;
+
+	case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+		return MESA_FORMAT_RGBA_DXT1;
+
+	case GL_RGBA_S3TC:
+	case GL_RGBA4_S3TC:
+	case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+		return MESA_FORMAT_RGBA_DXT3;
+
+	case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+		return MESA_FORMAT_RGBA_DXT5;
+
+	case GL_ALPHA16F_ARB:
+		return MESA_FORMAT_ALPHA_FLOAT16;
+	case GL_ALPHA32F_ARB:
+		return MESA_FORMAT_ALPHA_FLOAT32;
+	case GL_LUMINANCE16F_ARB:
+		return MESA_FORMAT_LUMINANCE_FLOAT16;
+	case GL_LUMINANCE32F_ARB:
+		return MESA_FORMAT_LUMINANCE_FLOAT32;
+	case GL_LUMINANCE_ALPHA16F_ARB:
+		return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16;
+	case GL_LUMINANCE_ALPHA32F_ARB:
+		return MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32;
+	case GL_INTENSITY16F_ARB:
+		return MESA_FORMAT_INTENSITY_FLOAT16;
+	case GL_INTENSITY32F_ARB:
+		return MESA_FORMAT_INTENSITY_FLOAT32;
+	case GL_RGB16F_ARB:
+		return MESA_FORMAT_RGBA_FLOAT16;
+	case GL_RGB32F_ARB:
+		return MESA_FORMAT_RGBA_FLOAT32;
+	case GL_RGBA16F_ARB:
+		return MESA_FORMAT_RGBA_FLOAT16;
+	case GL_RGBA32F_ARB:
+		return MESA_FORMAT_RGBA_FLOAT32;
+
+#ifdef RADEON_R300
+	case GL_DEPTH_COMPONENT:
+	case GL_DEPTH_COMPONENT16:
+		return MESA_FORMAT_Z16;
+	case GL_DEPTH_COMPONENT24:
+	case GL_DEPTH_COMPONENT32:
+	case GL_DEPTH_STENCIL_EXT:
+	case GL_DEPTH24_STENCIL8_EXT:
+		if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+			return MESA_FORMAT_S8_Z24;
+		else
+			return MESA_FORMAT_Z16;
+#else
+	case GL_DEPTH_COMPONENT:
+	case GL_DEPTH_COMPONENT16:
+	case GL_DEPTH_COMPONENT24:
+	case GL_DEPTH_COMPONENT32:
+	case GL_DEPTH_STENCIL_EXT:
+	case GL_DEPTH24_STENCIL8_EXT:
+		return MESA_FORMAT_S8_Z24;
+#endif
+
+	/* EXT_texture_sRGB */
+	case GL_SRGB:
+	case GL_SRGB8:
+	case GL_SRGB_ALPHA:
+	case GL_SRGB8_ALPHA8:
+	case GL_COMPRESSED_SRGB:
+	case GL_COMPRESSED_SRGB_ALPHA:
+		return MESA_FORMAT_SRGBA8;
+
+	case GL_SLUMINANCE:
+	case GL_SLUMINANCE8:
+	case GL_COMPRESSED_SLUMINANCE:
+		return MESA_FORMAT_SL8;
+
+	case GL_SLUMINANCE_ALPHA:
+	case GL_SLUMINANCE8_ALPHA8:
+	case GL_COMPRESSED_SLUMINANCE_ALPHA:
+		return MESA_FORMAT_SLA8;
+
+	case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+		return MESA_FORMAT_SRGB_DXT1;
+	case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+		return MESA_FORMAT_SRGBA_DXT1;
+	case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+		return MESA_FORMAT_SRGBA_DXT3;
+	case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+		return MESA_FORMAT_SRGBA_DXT5;
+
+	default:
+		_mesa_problem(ctx,
+			      "unexpected internalFormat 0x%x in %s",
+			      (int)internalFormat, __func__);
+		return MESA_FORMAT_NONE;
+	}
+
+	return MESA_FORMAT_NONE;		/* never get here */
+}
+
+/** Check if given image is valid within current texture object.
+ */
+static int image_matches_texture_obj(struct gl_texture_object *texObj,
+	struct gl_texture_image *texImage,
+	unsigned level)
+{
+	const struct gl_texture_image *baseImage = texObj->Image[0][texObj->BaseLevel];
+
+	if (!baseImage)
+		return 0;
+
+	if (level < texObj->BaseLevel || level > texObj->MaxLevel)
+		return 0;
+
+	const unsigned levelDiff = level - texObj->BaseLevel;
+	const unsigned refWidth = MAX2(baseImage->Width >> levelDiff, 1);
+	const unsigned refHeight = MAX2(baseImage->Height >> levelDiff, 1);
+	const unsigned refDepth = MAX2(baseImage->Depth >> levelDiff, 1);
+
+	return (texImage->Width == refWidth &&
+			texImage->Height == refHeight &&
+			texImage->Depth == refDepth);
+}
+
+static void teximage_assign_miptree(radeonContextPtr rmesa,
+	struct gl_texture_object *texObj,
+	struct gl_texture_image *texImage,
+	unsigned face,
+	unsigned level)
+{
+	radeonTexObj *t = radeon_tex_obj(texObj);
+	radeon_texture_image* image = get_radeon_texture_image(texImage);
+
+	/* Since miptree holds only images for levels <BaseLevel..MaxLevel>
+	 * don't allocate the miptree if the teximage won't fit.
+	 */
+	if (!image_matches_texture_obj(texObj, texImage, level))
+		return;
+
+	/* Try using current miptree, or create new if there isn't any */
+	if (!t->mt || !radeon_miptree_matches_image(t->mt, texImage, face, level)) {
+		radeon_miptree_unreference(&t->mt);
+		radeon_try_alloc_miptree(rmesa, t);
+		radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+				"%s: texObj %p, texImage %p, face %d, level %d, "
+				"texObj miptree doesn't match, allocated new miptree %p\n",
+				__FUNCTION__, texObj, texImage, face, level, t->mt);
+	}
+
+	/* Miptree alocation may have failed,
+	 * when there was no image for baselevel specified */
+	if (t->mt) {
+		image->mtface = face;
+		image->mtlevel = level;
+		radeon_miptree_reference(t->mt, &image->mt);
+	} else
+		radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+				"%s Failed to allocate miptree.\n", __func__);
+}
+
+static GLuint * allocate_image_offsets(GLcontext *ctx,
+	unsigned alignedWidth,
+	unsigned height,
+	unsigned depth)
+{
+	int i;
+	GLuint *offsets;
+
+	offsets = malloc(depth * sizeof(GLuint)) ;
+	if (!offsets) {
+		_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTex[Sub]Image");
+		return NULL;
+	}
+
+	for (i = 0; i < depth; ++i) {
+		offsets[i] = alignedWidth * height * i;
+	}
+
+	return offsets;
+}
+
+/**
+ * Update a subregion of the given texture image.
+ */
+static void radeon_store_teximage(GLcontext* ctx, int dims,
+		GLint xoffset, GLint yoffset, GLint zoffset,
+		GLsizei width, GLsizei height, GLsizei depth,
+		GLsizei imageSize,
+		GLenum format, GLenum type,
+		const GLvoid * pixels,
+		const struct gl_pixelstore_attrib *packing,
+		struct gl_texture_object *texObj,
+		struct gl_texture_image *texImage,
+		int compressed)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	radeonTexObj *t = radeon_tex_obj(texObj);
+	radeon_texture_image* image = get_radeon_texture_image(texImage);
+
+	GLuint dstRowStride;
+	GLuint *dstImageOffsets;
+
+	radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+			"%s(%p, tex %p, image %p) compressed %d\n",
+			__func__, ctx, texObj, texImage, compressed);
+
+	if (image->mt) {
+		dstRowStride = image->mt->levels[image->mtlevel].rowstride;
+	} else if (t->bo) {
+		/* TFP case */
+		dstRowStride = get_texture_image_row_stride(rmesa, texImage->TexFormat, width, 0);
+	} else {
+		dstRowStride = _mesa_format_row_stride(texImage->TexFormat, texImage->Width);
+	}
+
+	assert(dstRowStride);
+
+	if (dims == 3) {
+		unsigned alignedWidth = dstRowStride/_mesa_get_format_bytes(texImage->TexFormat);
+		dstImageOffsets = allocate_image_offsets(ctx, alignedWidth, texImage->Height, texImage->Depth);
+		if (!dstImageOffsets) {
+			radeon_warning("%s Failed to allocate dstImaeOffset.\n", __func__);
+			return;
+		}
+	} else {
+		dstImageOffsets = texImage->ImageOffsets;
+	}
+
+	radeon_teximage_map(image, GL_TRUE);
+
+	if (compressed) {
+		uint32_t srcRowStride, bytesPerRow, rows, block_width, block_height;
+		GLubyte *img_start;
+
+		_mesa_get_format_block_size(texImage->TexFormat, &block_width, &block_height);
+
+		if (!image->mt) {
+			dstRowStride = _mesa_format_row_stride(texImage->TexFormat, texImage->Width);
+			img_start = _mesa_compressed_image_address(xoffset, yoffset, 0,
+									texImage->TexFormat,
+									texImage->Width, texImage->Data);
+		}
+		else {
+			uint32_t offset;
+			offset = dstRowStride / _mesa_get_format_bytes(texImage->TexFormat) * yoffset / block_height + xoffset / block_width;
+			offset *= _mesa_get_format_bytes(texImage->TexFormat);
+			img_start = texImage->Data + offset;
+		}
+		srcRowStride = _mesa_format_row_stride(texImage->TexFormat, width);
+		bytesPerRow = srcRowStride;
+		rows = (height + block_height - 1) / block_height;
+
+		copy_rows(img_start, dstRowStride, pixels, srcRowStride, rows, bytesPerRow);
+	}
+	else {
+		if (!_mesa_texstore(ctx, dims, texImage->_BaseFormat,
+					texImage->TexFormat, texImage->Data,
+					xoffset, yoffset, zoffset,
+					dstRowStride,
+					dstImageOffsets,
+					width, height, depth,
+					format, type, pixels, packing)) {
+			_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage");
+		}
+	}
+
+	if (dims == 3) {
+		free(dstImageOffsets);
+	}
+
+	radeon_teximage_unmap(image);
+}
+
+/**
+ * All glTexImage calls go through this function.
+ */
+static void radeon_teximage(
+	GLcontext *ctx, int dims,
+	GLenum target, GLint level,
+	GLint internalFormat,
+	GLint width, GLint height, GLint depth,
+	GLsizei imageSize,
+	GLenum format, GLenum type, const GLvoid * pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *texObj,
+	struct gl_texture_image *texImage,
+	int compressed)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	radeon_texture_image* image = get_radeon_texture_image(texImage);
+	GLint postConvWidth = width;
+	GLint postConvHeight = height;
+	GLuint face = _mesa_tex_target_to_face(target);
+
+	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+			"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
+			__func__, dims, texObj, texImage, face, level);
+	{
+		struct radeon_bo *bo;
+		bo = !image->mt ? image->bo : image->mt->bo;
+		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+				"%s Calling teximage for texture that is "
+				"queued for GPU processing.\n",
+				__func__);
+			radeon_firevertices(rmesa);
+		}
+	}
+
+
+	t->validated = GL_FALSE;
+
+	if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
+	       _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
+						  &postConvHeight);
+	}
+
+	if (!_mesa_is_format_compressed(texImage->TexFormat)) {
+		GLuint texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+		/* Minimum pitch of 32 bytes */
+		if (postConvWidth * texelBytes < 32) {
+			postConvWidth = 32 / texelBytes;
+			texImage->RowStride = postConvWidth;
+		}
+		if (!image->mt) {
+			assert(texImage->RowStride == postConvWidth);
+		}
+	}
+
+	/* Mesa core only clears texImage->Data but not image->mt */
+	radeonFreeTexImageData(ctx, texImage);
+
+	if (!t->bo) {
+		teximage_assign_miptree(rmesa, texObj, texImage, face, level);
+		if (!image->mt) {
+			int size = _mesa_format_image_size(texImage->TexFormat,
+								texImage->Width,
+								texImage->Height,
+								texImage->Depth);
+			texImage->Data = _mesa_alloc_texmemory(size);
+			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+					"%s %dd: texObj %p, texImage %p, "
+					" no miptree assigned, using local memory %p\n",
+					__func__, dims, texObj, texImage, texImage->Data);
+		}
+	}
+
+	/* Upload texture image; note that the spec allows pixels to be NULL */
+	if (compressed) {
+		pixels = _mesa_validate_pbo_compressed_teximage(
+			ctx, imageSize, pixels, packing, "glCompressedTexImage");
+	} else {
+		pixels = _mesa_validate_pbo_teximage(
+			ctx, dims, width, height, depth,
+			format, type, pixels, packing, "glTexImage");
+	}
+
+	if (pixels) {
+		radeon_store_teximage(ctx, dims,
+			0, 0, 0,
+			width, height, depth,
+			imageSize, format, type,
+			pixels, packing,
+			texObj, texImage,
+			compressed);
+	}
+
+	_mesa_unmap_teximage_pbo(ctx, packing);
+}
+
+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage)
+{
+	radeon_teximage(ctx, 1, target, level, internalFormat, width, 1, 1,
+		0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+			   GLint internalFormat,
+			   GLint width, GLint height, GLint border,
+			   GLenum format, GLenum type, const GLvoid * pixels,
+			   const struct gl_pixelstore_attrib *packing,
+			   struct gl_texture_object *texObj,
+			   struct gl_texture_image *texImage)
+
+{
+	radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1,
+		0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
+				     GLint level, GLint internalFormat,
+				     GLint width, GLint height, GLint border,
+				     GLsizei imageSize, const GLvoid * data,
+				     struct gl_texture_object *texObj,
+				     struct gl_texture_image *texImage)
+{
+	radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1,
+		imageSize, 0, 0, data, &ctx->Unpack, texObj, texImage, 1);
+}
+
+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint height, GLint depth,
+		      GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage)
+{
+	radeon_teximage(ctx, 3, target, level, internalFormat, width, height, depth,
+		0, format, type, pixels, packing, texObj, texImage, 0);
+}
+
+/**
+ * All glTexSubImage calls go through this function.
+ */
+static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int level,
+		GLint xoffset, GLint yoffset, GLint zoffset,
+		GLsizei width, GLsizei height, GLsizei depth,
+		GLsizei imageSize,
+		GLenum format, GLenum type,
+		const GLvoid * pixels,
+		const struct gl_pixelstore_attrib *packing,
+		struct gl_texture_object *texObj,
+		struct gl_texture_image *texImage,
+		int compressed)
+{
+	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+	radeonTexObj* t = radeon_tex_obj(texObj);
+	radeon_texture_image* image = get_radeon_texture_image(texImage);
+
+	radeon_print(RADEON_TEXTURE, RADEON_NORMAL,
+			"%s %dd: texObj %p, texImage %p, face %d, level %d\n",
+			__func__, dims, texObj, texImage,
+			_mesa_tex_target_to_face(target), level);
+	{
+		struct radeon_bo *bo;
+		bo = !image->mt ? image->bo : image->mt->bo;
+		if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->cmdbuf.cs)) {
+			radeon_print(RADEON_TEXTURE, RADEON_VERBOSE,
+				"%s Calling texsubimage for texture that is "
+				"queued for GPU processing.\n",
+				__func__);
+			radeon_firevertices(rmesa);
+		}
+	}
+
+
+	t->validated = GL_FALSE;
+	if (compressed) {
+		pixels = _mesa_validate_pbo_compressed_teximage(
+			ctx, imageSize, pixels, packing, "glCompressedTexSubImage");
+	} else {
+		pixels = _mesa_validate_pbo_teximage(ctx, dims,
+			width, height, depth, format, type, pixels, packing, "glTexSubImage");
+	}
+
+	if (pixels) {
+		radeon_store_teximage(ctx, dims,
+			xoffset, yoffset, zoffset,
+			width, height, depth,
+			imageSize, format, type,
+			pixels, packing,
+			texObj, texImage,
+			compressed);
+	}
+
+	_mesa_unmap_teximage_pbo(ctx, packing);
+}
+
+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset,
+			 GLsizei width,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1, 0,
+		format, type, pixels, packing, texObj, texImage, 0);
+}
+
+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset, GLint yoffset,
+			 GLsizei width, GLsizei height,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1,
+			   0, format, type, pixels, packing, texObj, texImage,
+			   0);
+}
+
+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+				   GLint level, GLint xoffset,
+				   GLint yoffset, GLsizei width,
+				   GLsizei height, GLenum format,
+				   GLsizei imageSize, const GLvoid * data,
+				   struct gl_texture_object *texObj,
+				   struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1,
+		imageSize, format, 0, data, &ctx->Unpack, texObj, texImage, 1);
+}
+
+
+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset, GLint yoffset, GLint zoffset,
+			 GLsizei width, GLsizei height, GLsizei depth,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage)
+{
+	radeon_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, 0,
+		format, type, pixels, packing, texObj, texImage, 0);
+}
+
+unsigned radeonIsFormatRenderable(gl_format mesa_format)
+{
+	if (mesa_format == _dri_texformat_argb8888 || mesa_format == _dri_texformat_rgb565 ||
+		mesa_format == _dri_texformat_argb1555 || mesa_format == _dri_texformat_argb4444)
+		return 1;
+
+	switch (mesa_format)
+	{
+		case MESA_FORMAT_Z16:
+		case MESA_FORMAT_S8_Z24:
+			return 1;
+		default:
+			return 0;
+	}
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h
new file mode 100644
index 0000000000..4ce639ea34
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ * Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_TEXTURE_H
+#define RADEON_TEXTURE_H
+
+#include "main/formats.h"
+
+void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride,
+	GLuint numrows, GLuint rowsize);
+struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx);
+void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage);
+
+void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable);
+void radeon_teximage_unmap(radeon_texture_image *image);
+void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
+void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj);
+void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj);
+int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj);
+
+gl_format radeonChooseTextureFormat_mesa(GLcontext * ctx,
+                                         GLint internalFormat,
+                                         GLenum format,
+                                         GLenum type);
+
+gl_format radeonChooseTextureFormat(GLcontext * ctx,
+                                    GLint internalFormat,
+                                    GLenum format,
+                                    GLenum type, GLboolean fbo);
+
+void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage);
+void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint height, GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage);
+void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target,
+				GLint level, GLint internalFormat,
+				GLint width, GLint height, GLint border,
+				GLsizei imageSize, const GLvoid * data,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage);
+void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level,
+		      GLint internalFormat,
+		      GLint width, GLint height, GLint depth,
+		      GLint border,
+		      GLenum format, GLenum type, const GLvoid * pixels,
+		      const struct gl_pixelstore_attrib *packing,
+		      struct gl_texture_object *texObj,
+		      struct gl_texture_image *texImage);
+void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset,
+			 GLsizei width,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage);
+void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level,
+				GLint xoffset, GLint yoffset,
+				GLsizei width, GLsizei height,
+				GLenum format, GLenum type,
+				const GLvoid * pixels,
+				const struct gl_pixelstore_attrib *packing,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage);
+void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target,
+				   GLint level, GLint xoffset,
+				   GLint yoffset, GLsizei width,
+				   GLsizei height, GLenum format,
+				   GLsizei imageSize, const GLvoid * data,
+				   struct gl_texture_object *texObj,
+				   struct gl_texture_image *texImage);
+
+void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level,
+			 GLint xoffset, GLint yoffset, GLint zoffset,
+			 GLsizei width, GLsizei height, GLsizei depth,
+			 GLenum format, GLenum type,
+			 const GLvoid * pixels,
+			 const struct gl_pixelstore_attrib *packing,
+			 struct gl_texture_object *texObj,
+			 struct gl_texture_image *texImage);
+
+void radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+		       GLenum format, GLenum type, GLvoid * pixels,
+		       struct gl_texture_object *texObj,
+		       struct gl_texture_image *texImage);
+void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level,
+				 GLvoid *pixels,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage);
+
+void radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+			GLenum internalFormat,
+			GLint x, GLint y, GLsizei width, GLsizei height,
+			GLint border);
+
+void radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+			GLint xoffset, GLint yoffset,
+			GLint x, GLint y,
+			GLsizei width, GLsizei height);
+
+unsigned radeonIsFormatRenderable(gl_format mesa_format);
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_tile.c b/src/mesa/drivers/dri/radeon/radeon_tile.c
new file mode 100644
index 0000000000..403da11010
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tile.c
@@ -0,0 +1,512 @@
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_tile.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "main/macros.h"
+#include "radeon_debug.h"
+
+#define MICRO_TILE_SIZE 32
+
+static void micro_tile_8_x_4_8bit(const void * const src, unsigned src_pitch,
+                                  void * const dst, unsigned dst_pitch,
+                                  unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 8, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint8_t *src2 = (uint8_t *)src + src_pitch * row + col;
+            uint8_t *dst2 = (uint8_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint8_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+
+static void micro_tile_4_x_4_16bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 4, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
+            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+
+static void micro_tile_8_x_2_16bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 8, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + src_pitch * row + col;
+            uint16_t *dst2 = (uint16_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+
+static void micro_tile_4_x_2_32bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 4, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint32_t *src2 = (uint32_t *)src + src_pitch * row + col;
+            uint32_t *dst2 = (uint32_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint32_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+
+static void micro_tile_2_x_2_64bit(const void * const src, unsigned src_pitch,
+                                   void * const dst, unsigned dst_pitch,
+                                   unsigned width, unsigned height)
+{
+    unsigned row; /* current source row */
+    unsigned col; /* current source column */
+    unsigned k; /* number of processed tiles */
+    const unsigned tile_width = 2, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint64_t *src2 = (uint64_t *)src + src_pitch * row + col;
+            uint64_t *dst2 = (uint64_t *)dst + row * dst_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint64_t));
+                dst2 += tile_width;
+                src2 += src_pitch;
+            }
+        }
+    }
+}
+
+static void micro_tile_1_x_1_128bit(const void * src, unsigned src_pitch,
+                                    void * dst, unsigned dst_pitch,
+                                    unsigned width, unsigned height)
+{
+    unsigned i, j;
+    const unsigned elem_size = 16; /* sizeof(uint128_t) */
+
+    for (j = 0; j < height; ++j)
+    {
+        for (i = 0; i < width; ++i)
+        {
+            memcpy(dst, src, width * elem_size);
+            dst += dst_pitch * elem_size;
+            src += src_pitch * elem_size;
+        }
+    }
+}
+
+void tile_image(const void * src, unsigned src_pitch,
+                void *dst, unsigned dst_pitch,
+                gl_format format, unsigned width, unsigned height)
+{
+    assert(src_pitch >= width);
+    assert(dst_pitch >= width);
+
+    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                 "Software tiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
+                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
+
+    switch (_mesa_get_format_bytes(format))
+    {
+        case 16:
+            micro_tile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 8:
+            micro_tile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 4:
+            micro_tile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 2:
+            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
+            {
+                micro_tile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            else
+            {
+                micro_tile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            break;
+        case 1:
+            micro_tile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        default:
+            assert(0);
+            break;
+    }
+}
+
+static void micro_untile_8_x_4_8bit(const void * const src, unsigned src_pitch,
+                                    void * const dst, unsigned dst_pitch,
+                                    unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 8, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    assert(src_pitch % tile_width == 0);
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint8_t *src2 = (uint8_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint8_t);
+            uint8_t *dst2 = (uint8_t *)dst + dst_pitch * row + col;
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint8_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+
+static void micro_untile_8_x_2_16bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 8, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    assert(src_pitch % tile_width == 0);
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+
+static void micro_untile_4_x_4_16bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 4, tile_height = 4;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    assert(src_pitch % tile_width == 0);
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint16_t *src2 = (uint16_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint16_t);
+            uint16_t *dst2 = (uint16_t *)dst + dst_pitch * row + col;
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint16_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+
+static void micro_untile_4_x_2_32bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 4, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    assert(src_pitch % tile_width == 0);
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint32_t *src2 = (uint32_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint32_t);
+            uint32_t *dst2 = (uint32_t *)dst + dst_pitch * row + col;
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint32_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+
+static void micro_untile_2_x_2_64bit(const void * const src, unsigned src_pitch,
+                                     void * const dst, unsigned dst_pitch,
+                                     unsigned width, unsigned height)
+{
+    unsigned row; /* current destination row */
+    unsigned col; /* current destination column */
+    unsigned k; /* current tile number */
+    const unsigned tile_width = 2, tile_height = 2;
+    const unsigned tiles_in_row = (width + (tile_width - 1)) / tile_width;
+
+    assert(src_pitch % tile_width == 0);
+
+    k = 0;
+    for (row = 0; row < height; row += tile_height)
+    {
+        for (col = 0; col < width; col += tile_width, ++k)
+        {
+            uint64_t *src2 = (uint64_t *)src + row * src_pitch +
+                             (k % tiles_in_row) * MICRO_TILE_SIZE / sizeof(uint64_t);
+            uint64_t *dst2 = (uint64_t *)dst + dst_pitch * row + col;
+            unsigned j;
+
+            for (j = 0; j < MIN2(tile_height, height - row); ++j)
+            {
+                unsigned columns = MIN2(tile_width, width - col);
+                memcpy(dst2, src2, columns * sizeof(uint64_t));
+                dst2 += dst_pitch;
+                src2 += tile_width;
+            }
+        }
+    }
+}
+
+static void micro_untile_1_x_1_128bit(const void * src, unsigned src_pitch,
+                                      void * dst, unsigned dst_pitch,
+                                      unsigned width, unsigned height)
+{
+    unsigned i, j;
+    const unsigned elem_size = 16; /* sizeof(uint128_t) */
+
+    for (j = 0; j < height; ++j)
+    {
+        for (i = 0; i < width; ++i)
+        {
+            memcpy(dst, src, width * elem_size);
+            dst += dst_pitch * elem_size;
+            src += src_pitch * elem_size;
+        }
+    }
+}
+
+void untile_image(const void * src, unsigned src_pitch,
+                  void *dst, unsigned dst_pitch,
+                  gl_format format, unsigned width, unsigned height)
+{
+    assert(src_pitch >= width);
+    assert(dst_pitch >= width);
+
+    radeon_print(RADEON_TEXTURE, RADEON_TRACE,
+                 "Software untiling: src_pitch %d, dst_pitch %d, width %d, height %d, bpp %d\n",
+                 src_pitch, dst_pitch, width, height, _mesa_get_format_bytes(format));
+
+    switch (_mesa_get_format_bytes(format))
+    {
+        case 16:
+            micro_untile_1_x_1_128bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 8:
+            micro_untile_2_x_2_64bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 4:
+            micro_untile_4_x_2_32bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        case 2:
+            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
+            {
+                micro_untile_4_x_4_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            else
+            {
+                micro_untile_8_x_2_16bit(src, src_pitch, dst, dst_pitch, width, height);
+            }
+            break;
+        case 1:
+            micro_untile_8_x_4_8bit(src, src_pitch, dst, dst_pitch, width, height);
+            break;
+        default:
+            assert(0);
+            break;
+    }
+}
+
+void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height)
+{
+    switch (_mesa_get_format_bytes(format))
+    {
+        case 16:
+            *block_width = 1;
+            *block_height = 1;
+            break;
+        case 8:
+            *block_width = 2;
+            *block_height = 2;
+            break;
+        case 4:
+            *block_width = 4;
+            *block_height = 2;
+            break;
+        case 2:
+            if (_mesa_get_format_bits(format, GL_DEPTH_BITS))
+            {
+                *block_width = 4;
+                *block_height = 4;
+            }
+            else
+            {
+                *block_width = 8;
+                *block_height = 2;
+            }
+            break;
+        case 1:
+            *block_width = 8;
+            *block_height = 4;
+            break;
+        default:
+            assert(0);
+            break;
+    }
+}
diff --git a/src/mesa/drivers/dri/radeon/radeon_tile.h b/src/mesa/drivers/dri/radeon/radeon_tile.h
new file mode 100644
index 0000000000..31d9c5611c
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/radeon_tile.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2010 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <main/formats.h>
+
+void tile_image(const void * src, unsigned src_pitch,
+                void *dst, unsigned dst_pitch,
+                gl_format format, unsigned width, unsigned height);
+
+void untile_image(const void * src, unsigned src_pitch,
+                  void *dst, unsigned dst_pitch,
+                  gl_format format, unsigned width, unsigned height);
+
+void get_tile_size(gl_format format, unsigned *block_width, unsigned *block_height);
diff --git a/src/mesa/drivers/dri/radeon/server/radeon.h b/src/mesa/drivers/dri/radeon/server/radeon.h
new file mode 100644
index 0000000000..3fb1e37c53
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/server/radeon.h
@@ -0,0 +1,208 @@
+/**
+ * \file server/radeon.h
+ * \brief Radeon 2D driver data structures.
+ */
+
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _RADEON_H_
+#define _RADEON_H_
+
+#include "xf86drm.h"		/* drm_handle_t, etc */
+
+#       define RADEON_AGP_1X_MODE           0x01
+#       define RADEON_AGP_2X_MODE           0x02
+#       define RADEON_AGP_4X_MODE           0x04
+#       define RADEON_AGP_FW_MODE           0x10
+#       define RADEON_AGP_MODE_MASK         0x17
+#define RADEON_CP_CSQ_CNTL                  0x0740
+#       define RADEON_CSQ_CNT_PRIMARY_MASK     (0xff << 0)
+#       define RADEON_CSQ_PRIDIS_INDDIS        (0    << 28)
+#       define RADEON_CSQ_PRIPIO_INDDIS        (1    << 28)
+#       define RADEON_CSQ_PRIBM_INDDIS         (2    << 28)
+#       define RADEON_CSQ_PRIPIO_INDBM         (3    << 28)
+#       define RADEON_CSQ_PRIBM_INDBM          (4    << 28)
+#       define RADEON_CSQ_PRIPIO_INDPIO        (15   << 28)
+
+#define RADEON_PCIGART_TABLE_SIZE       32768
+
+#define PCI_CHIP_R200_BB                0x4242
+#define PCI_CHIP_RV250_Id               0x4964
+#define PCI_CHIP_RV250_Ie               0x4965
+#define PCI_CHIP_RV250_If               0x4966
+#define PCI_CHIP_RV250_Ig               0x4967
+#define PCI_CHIP_RADEON_LW		0x4C57
+#define PCI_CHIP_RADEON_LX		0x4C58
+#define PCI_CHIP_RADEON_LY		0x4C59
+#define PCI_CHIP_RADEON_LZ		0x4C5A
+#define PCI_CHIP_RV250_Ld		0x4C64
+#define PCI_CHIP_RV250_Le		0x4C65
+#define PCI_CHIP_RV250_Lf		0x4C66
+#define PCI_CHIP_RV250_Lg		0x4C67
+#define PCI_CHIP_R300_ND		0x4E44
+#define PCI_CHIP_R300_NE		0x4E45
+#define PCI_CHIP_R300_NF		0x4E46
+#define PCI_CHIP_R300_NG		0x4E47
+#define PCI_CHIP_RADEON_QD		0x5144
+#define PCI_CHIP_RADEON_QE		0x5145
+#define PCI_CHIP_RADEON_QF		0x5146
+#define PCI_CHIP_RADEON_QG		0x5147
+#define PCI_CHIP_R200_QL		0x514C
+#define PCI_CHIP_R200_QN		0x514E
+#define PCI_CHIP_R200_QO		0x514F
+#define PCI_CHIP_RV200_QW		0x5157
+#define PCI_CHIP_RV200_QX		0x5158
+#define PCI_CHIP_RADEON_QY		0x5159
+#define PCI_CHIP_RADEON_QZ		0x515A
+#define PCI_CHIP_R200_Ql		0x516C
+#define PCI_CHIP_RV370_5460             0x5460
+#define PCI_CHIP_RV280_Y_		0x5960
+#define PCI_CHIP_RV280_Ya		0x5961
+#define PCI_CHIP_RV280_Yb		0x5962
+#define PCI_CHIP_RV280_Yc		0x5963
+
+/**
+ * \brief Chip families.
+ */
+typedef enum {
+    CHIP_FAMILY_UNKNOW,
+    CHIP_FAMILY_LEGACY,
+    CHIP_FAMILY_R128,
+    CHIP_FAMILY_M3,
+    CHIP_FAMILY_RADEON,
+    CHIP_FAMILY_VE,
+    CHIP_FAMILY_M6,
+    CHIP_FAMILY_RV200,
+    CHIP_FAMILY_M7,
+    CHIP_FAMILY_R200,
+    CHIP_FAMILY_RV250,
+    CHIP_FAMILY_M9,
+    CHIP_FAMILY_RV280,
+    CHIP_FAMILY_R300,
+    CHIP_FAMILY_R350,
+    CHIP_FAMILY_RV350,
+    CHIP_FAMILY_RV380,  /* RV370/RV380/M22/M24 */
+    CHIP_FAMILY_R420,   /* R420/R423/M18 */
+} RADEONChipFamily;
+
+
+typedef unsigned long memType;
+
+
+/**
+ * \brief Radeon DDX driver private data.
+ */
+typedef struct {
+   int               Chipset;          /**< \brief Chipset number */
+   RADEONChipFamily  ChipFamily;       /**< \brief Chip family */
+
+   unsigned long     LinearAddr;       /**< \brief Frame buffer physical address */
+
+
+   drmSize           registerSize;     /**< \brief MMIO register map size */
+   drm_handle_t         registerHandle;   /**< \brief MMIO register map handle */
+
+   int               IsPCI;            /* Current card is a PCI card */
+   
+   /**
+    * \name AGP
+    */
+   /*@{*/
+   drmSize           gartSize;          /**< \brief AGP map size */
+   drm_handle_t         gartMemHandle;     /**< \brief AGP map handle */
+   unsigned long     gartOffset;        /**< \brief AGP offset */
+   int               gartMode;          /**< \brief AGP mode */
+   int               gartFastWrite;
+   /*@}*/
+
+   /**
+    * \name CP ring buffer data
+    */
+   /*@{*/
+   unsigned long     ringStart;        /**< \brief Offset into AGP space */
+   drm_handle_t         ringHandle;       /**< \brief Handle from drmAddMap() */
+   drmSize           ringMapSize;      /**< \brief Size of map */
+   int               ringSize;         /**< \brief Size of ring (in MB) */
+
+   unsigned long     ringReadOffset;   /**< \brief Read offset into AGP space */
+   drm_handle_t         ringReadPtrHandle;/**< \brief Handle from drmAddMap() */
+   drmSize           ringReadMapSize;  /**< \brief Size of map */
+   /*@}*/
+
+   /**
+    * \name CP vertex/indirect buffer data
+    */
+   /*@{*/
+   unsigned long     bufStart;         /**< \brief Offset into AGP space */
+   drm_handle_t         bufHandle;        /**< \brief Handle from drmAddMap() */
+   drmSize           bufMapSize;       /**< \brief Size of map */
+   int               bufSize;          /**< \brief Size of buffers (in MB) */
+   int               bufNumBufs;       /**< \brief Number of buffers */
+   /*@}*/
+
+   /**
+    * \name CP AGP Texture data
+    */
+   /*@{*/
+   unsigned long     gartTexStart;      /**< \brief Offset into AGP space */
+   drm_handle_t         gartTexHandle;     /**< \brief Handle from drmAddMap() */
+   drmSize           gartTexMapSize;    /**< \brief Size of map */
+   int               gartTexSize;       /**< \brief Size of AGP tex space (in MB) */
+   int               log2GARTTexGran;
+   /*@}*/
+
+   int               drmMinor;         /**< \brief DRM device minor number */
+
+   int               frontOffset;      /**< \brief Front color buffer offset */
+   int               frontPitch;       /**< \brief Front color buffer pitch */
+   int               backOffset;       /**< \brief Back color buffer offset */
+   int               backPitch;        /**< \brief Back color buffer pitch */
+   int               depthOffset;      /**< \brief Depth buffer offset */
+   int               depthPitch;       /**< \brief Depth buffer pitch */
+   int               textureOffset;    /**< \brief Texture area offset */
+   int               textureSize;      /**< \brief Texture area size */
+   int               log2TexGran;      /**< \brief Texture granularity in base 2 log */
+
+   unsigned int      frontPitchOffset;
+   unsigned int      backPitchOffset;
+   unsigned int      depthPitchOffset;
+   
+   int               colorTiling;      /**< \brief Enable color tiling */
+
+   int               irq;              /**< \brief IRQ number */
+   int               page_flip_enable; /**< \brief Page Flip enable */
+   unsigned int      gen_int_cntl;
+   unsigned int      crtc_offset_cntl;
+
+   unsigned long     pcieGartTableOffset;
+} RADEONInfoRec, *RADEONInfoPtr;
+
+
+#endif /* _RADEON_H_ */
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_dri.h b/src/mesa/drivers/dri/radeon/server/radeon_dri.h
new file mode 100644
index 0000000000..dc51372107
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/server/radeon_dri.h
@@ -0,0 +1,115 @@
+/**
+ * \file server/radeon_dri.h
+ * \brief Radeon server-side structures.
+ * 
+ * \author Kevin E. Martin <martin@xfree86.org>
+ * \author Rickard E. Faith <faith@valinux.com>
+ */
+
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario,
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _RADEON_DRI_
+#define _RADEON_DRI_
+
+#include "xf86drm.h"
+#include "drm.h"
+#include "radeon_drm.h"
+
+/* DRI Driver defaults */
+#define RADEON_DEFAULT_CP_PIO_MODE    RADEON_CSQ_PRIPIO_INDPIO
+#define RADEON_DEFAULT_CP_BM_MODE     RADEON_CSQ_PRIBM_INDBM
+#define RADEON_DEFAULT_AGP_MODE       1
+#define RADEON_DEFAULT_AGP_FAST_WRITE 0
+#define RADEON_DEFAULT_AGP_SIZE       8 /* MB (must be 2^n and > 4MB) */
+#define RADEON_DEFAULT_RING_SIZE      1 /* MB (must be page aligned) */
+#define RADEON_DEFAULT_BUFFER_SIZE    2 /* MB (must be page aligned) */
+#define RADEON_DEFAULT_AGP_TEX_SIZE   1 /* MB (must be page aligned) */
+#define RADEON_DEFAULT_CP_TIMEOUT     10000  /* usecs */
+#define RADEON_DEFAULT_PAGE_FLIP      0 /* page flipping diabled */
+#define RADEON_BUFFER_ALIGN           0x00000fff
+
+/**
+ * \brief Radeon DRI driver private data.
+ */
+typedef struct {
+    /**
+     * \name DRI screen private data
+     */
+    /*@{*/
+    int           deviceID;	 /**< \brief PCI device ID */
+    int           width;	 /**< \brief width in pixels of display */
+    int           height;	 /**< \brief height in scanlines of display */
+    int           depth;	 /**< \brief depth of display (8, 15, 16, 24) */
+    int           bpp;		 /**< \brief bit depth of display (8, 16, 24, 32) */
+
+    int           IsPCI;	 /**< \brief is current card a PCI card? */
+    int           AGPMode;	 /**< \brief AGP mode */
+
+    int           frontOffset;   /**< \brief front buffer offset */
+    int           frontPitch;	 /**< \brief front buffer pitch */
+    int           backOffset;    /**< \brief shared back buffer offset */
+    int           backPitch;     /**< \brief shared back buffer pitch */
+    int           depthOffset;   /**< \brief shared depth buffer offset */
+    int           depthPitch;    /**< \brief shared depth buffer pitch */
+    int           textureOffset; /**< \brief start of texture data in frame buffer */
+    int           textureSize;   /**< \brief size of texture date */
+    int           log2TexGran;   /**< \brief log2 texture granularity */
+    /*@}*/
+
+    /**
+     * \name MMIO register data
+     */
+    /*@{*/
+    drm_handle_t     registerHandle; /**< \brief MMIO register map size */
+    drmSize       registerSize;   /**< \brief MMIO register map handle */
+    /*@}*/
+
+    /**
+     * \name CP in-memory status information
+     */
+    /*@{*/
+    drm_handle_t     statusHandle;   /**< \brief status map handle */
+    drmSize       statusSize;     /**< \brief status map size */
+    /*@}*/
+
+    /**
+     * \name CP AGP Texture data
+     */
+    /*@{*/
+    drm_handle_t     gartTexHandle;   /**< \brief AGP texture area map handle */
+    drmSize       gartTexMapSize;  /**< \brief AGP texture area map size */
+    int           log2GARTTexGran; /**< \brief AGP texture granularity in log base 2 */
+    int           gartTexOffset;   /**< \brief AGP texture area offset in AGP space */
+    /*@}*/
+
+    unsigned int  sarea_priv_offset; /**< \brief offset of the private SAREA data*/
+} RADEONDRIRec, *RADEONDRIPtr;
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_macros.h b/src/mesa/drivers/dri/radeon/server/radeon_macros.h
new file mode 100644
index 0000000000..355262c9ba
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/server/radeon_macros.h
@@ -0,0 +1,128 @@
+/**
+ * \file server/radeon_macros.h
+ * \brief Macros for Radeon MMIO operation.
+ *
+ * \authors Kevin E. Martin <martin@xfree86.org>
+ * \authors Rickard E. Faith <faith@valinux.com>
+ * \authors Alan Hourihane <alanh@fairlite.demon.co.uk>
+ */
+
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _RADEON_MACROS_H_
+#define _RADEON_MACROS_H_
+
+#include <mmio.h>
+
+#  define MMIO_IN8(base, offset) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
+#  define MMIO_IN32(base, offset) \
+	read_MMIO_LE32(base, offset)
+#  define MMIO_OUT8(base, offset, val) \
+	*(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
+#  define MMIO_OUT32(base, offset, val) \
+	*(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
+
+
+				/* Memory mapped register access macros */
+#define INREG8(addr)        MMIO_IN8(RADEONMMIO, addr)
+#define INREG(addr)         MMIO_IN32(RADEONMMIO, addr)
+#define OUTREG8(addr, val)  MMIO_OUT8(RADEONMMIO, addr, val)
+#define OUTREG(addr, val)   MMIO_OUT32(RADEONMMIO, addr, val)
+
+#define ADDRREG(addr)       ((volatile GLuint *)(pointer)(RADEONMMIO + (addr)))
+
+
+#define OUTREGP(addr, val, mask)					\
+do {									\
+    GLuint tmp = INREG(addr);						\
+    tmp &= (mask);							\
+    tmp |= (val);							\
+    OUTREG(addr, tmp);							\
+} while (0)
+
+#define INPLL(dpy, addr) RADEONINPLL(dpy, addr)
+
+#define OUTPLL(addr, val)						\
+do {									\
+    OUTREG8(RADEON_CLOCK_CNTL_INDEX, (((addr) & 0x3f) |			\
+				      RADEON_PLL_WR_EN));		\
+    OUTREG(RADEON_CLOCK_CNTL_DATA, val);				\
+} while (0)
+
+#define OUTPLLP(dpy, addr, val, mask)					\
+do {									\
+    GLuint tmp = INPLL(dpy, addr);					\
+    tmp &= (mask);							\
+    tmp |= (val);							\
+    OUTPLL(addr, tmp);							\
+} while (0)
+
+#define OUTPAL_START(idx)						\
+do {									\
+    OUTREG8(RADEON_PALETTE_INDEX, (idx));				\
+} while (0)
+
+#define OUTPAL_NEXT(r, g, b)						\
+do {									\
+    OUTREG(RADEON_PALETTE_DATA, ((r) << 16) | ((g) << 8) | (b));	\
+} while (0)
+
+#define OUTPAL_NEXT_CARD32(v)						\
+do {									\
+    OUTREG(RADEON_PALETTE_DATA, (v & 0x00ffffff));			\
+} while (0)
+
+#define OUTPAL(idx, r, g, b)						\
+do {									\
+    OUTPAL_START((idx));						\
+    OUTPAL_NEXT((r), (g), (b));						\
+} while (0)
+
+#define INPAL_START(idx)						\
+do {									\
+    OUTREG(RADEON_PALETTE_INDEX, (idx) << 16);				\
+} while (0)
+
+#define INPAL_NEXT() INREG(RADEON_PALETTE_DATA)
+
+#define PAL_SELECT(idx)							\
+do {									\
+    if (!idx) {								\
+	OUTREG(RADEON_DAC_CNTL2, INREG(RADEON_DAC_CNTL2) &		\
+	       (GLuint)~RADEON_DAC2_PALETTE_ACC_CTL);			\
+    } else {								\
+	OUTREG(RADEON_DAC_CNTL2, INREG(RADEON_DAC_CNTL2) |		\
+	       RADEON_DAC2_PALETTE_ACC_CTL);				\
+    }									\
+} while (0)
+
+
+#endif
diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
new file mode 100644
index 0000000000..1b33de1edf
--- /dev/null
+++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h
@@ -0,0 +1,2174 @@
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@xfree86.org>
+ *   Rickard E. Faith <faith@valinux.com>
+ *   Alan Hourihane <alanh@fairlite.demon.co.uk>
+ *
+ * References:
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 VR/ RAGE 128 GL Register Reference Manual (Technical
+ *   Reference Manual P/N RRG-G04100-C Rev. 0.04), ATI Technologies: April
+ *   1999.
+ *
+ * !!!! FIXME !!!!
+ *   RAGE 128 Software Development Manual (Technical Reference Manual P/N
+ *   SDK-G04000 Rev. 0.01), ATI Technologies: June 1999.
+ *
+ */
+
+/* !!!! FIXME !!!!  NOTE: THIS FILE HAS BEEN CONVERTED FROM r128_reg.h
+ * AND CONTAINS REGISTERS AND REGISTER DEFINITIONS THAT ARE NOT CORRECT
+ * ON THE RADEON.  A FULL AUDIT OF THIS CODE IS NEEDED!  */
+
+#ifndef _RADEON_REG_H_
+#define _RADEON_REG_H_
+
+				/* Registers for 2D/Video/Overlay */
+#define RADEON_ADAPTER_ID                   0x0f2c /* PCI */
+#define RADEON_AGP_BASE                     0x0170
+#define RADEON_AGP_CNTL                     0x0174
+#       define RADEON_AGP_APER_SIZE_256MB   (0x00 << 0)
+#       define RADEON_AGP_APER_SIZE_128MB   (0x20 << 0)
+#       define RADEON_AGP_APER_SIZE_64MB    (0x30 << 0)
+#       define RADEON_AGP_APER_SIZE_32MB    (0x38 << 0)
+#       define RADEON_AGP_APER_SIZE_16MB    (0x3c << 0)
+#       define RADEON_AGP_APER_SIZE_8MB     (0x3e << 0)
+#       define RADEON_AGP_APER_SIZE_4MB     (0x3f << 0)
+#       define RADEON_AGP_APER_SIZE_MASK    (0x3f << 0)
+#define RADEON_AGP_COMMAND                  0x0f60 /* PCI */
+#define RADEON_AGP_COMMAND_PCI_CONFIG       0x0060 /* offset in PCI config*/
+#       define RADEON_AGP_ENABLE            (1<<8)
+#define RADEON_AGP_PLL_CNTL                 0x000b /* PLL */
+#define RADEON_AGP_STATUS                   0x0f5c /* PCI */
+#       define RADEON_AGP_1X_MODE           0x01
+#       define RADEON_AGP_2X_MODE           0x02
+#       define RADEON_AGP_4X_MODE           0x04
+#       define RADEON_AGP_FW_MODE           0x10
+#       define RADEON_AGP_MODE_MASK         0x17
+#define RADEON_ATTRDR                       0x03c1 /* VGA */
+#define RADEON_ATTRDW                       0x03c0 /* VGA */
+#define RADEON_ATTRX                        0x03c0 /* VGA */
+#define RADEON_AUX_SC_CNTL                  0x1660
+#       define RADEON_AUX1_SC_EN            (1 << 0)
+#       define RADEON_AUX1_SC_MODE_OR       (0 << 1)
+#       define RADEON_AUX1_SC_MODE_NAND     (1 << 1)
+#       define RADEON_AUX2_SC_EN            (1 << 2)
+#       define RADEON_AUX2_SC_MODE_OR       (0 << 3)
+#       define RADEON_AUX2_SC_MODE_NAND     (1 << 3)
+#       define RADEON_AUX3_SC_EN            (1 << 4)
+#       define RADEON_AUX3_SC_MODE_OR       (0 << 5)
+#       define RADEON_AUX3_SC_MODE_NAND     (1 << 5)
+#define RADEON_AUX1_SC_BOTTOM               0x1670
+#define RADEON_AUX1_SC_LEFT                 0x1664
+#define RADEON_AUX1_SC_RIGHT                0x1668
+#define RADEON_AUX1_SC_TOP                  0x166c
+#define RADEON_AUX2_SC_BOTTOM               0x1680
+#define RADEON_AUX2_SC_LEFT                 0x1674
+#define RADEON_AUX2_SC_RIGHT                0x1678
+#define RADEON_AUX2_SC_TOP                  0x167c
+#define RADEON_AUX3_SC_BOTTOM               0x1690
+#define RADEON_AUX3_SC_LEFT                 0x1684
+#define RADEON_AUX3_SC_RIGHT                0x1688
+#define RADEON_AUX3_SC_TOP                  0x168c
+#define RADEON_AUX_WINDOW_HORZ_CNTL         0x02d8
+#define RADEON_AUX_WINDOW_VERT_CNTL         0x02dc
+
+#define RADEON_BASE_CODE                    0x0f0b
+#define RADEON_BIOS_0_SCRATCH               0x0010
+#define RADEON_BIOS_1_SCRATCH               0x0014
+#define RADEON_BIOS_2_SCRATCH               0x0018
+#define RADEON_BIOS_3_SCRATCH               0x001c
+#define RADEON_BIOS_4_SCRATCH               0x0020
+#define RADEON_BIOS_5_SCRATCH               0x0024
+#define RADEON_BIOS_6_SCRATCH               0x0028
+#define RADEON_BIOS_7_SCRATCH               0x002c
+#define RADEON_BIOS_ROM                     0x0f30 /* PCI */
+#define RADEON_BIST                         0x0f0f /* PCI */
+#define RADEON_BRUSH_DATA0                  0x1480
+#define RADEON_BRUSH_DATA1                  0x1484
+#define RADEON_BRUSH_DATA10                 0x14a8
+#define RADEON_BRUSH_DATA11                 0x14ac
+#define RADEON_BRUSH_DATA12                 0x14b0
+#define RADEON_BRUSH_DATA13                 0x14b4
+#define RADEON_BRUSH_DATA14                 0x14b8
+#define RADEON_BRUSH_DATA15                 0x14bc
+#define RADEON_BRUSH_DATA16                 0x14c0
+#define RADEON_BRUSH_DATA17                 0x14c4
+#define RADEON_BRUSH_DATA18                 0x14c8
+#define RADEON_BRUSH_DATA19                 0x14cc
+#define RADEON_BRUSH_DATA2                  0x1488
+#define RADEON_BRUSH_DATA20                 0x14d0
+#define RADEON_BRUSH_DATA21                 0x14d4
+#define RADEON_BRUSH_DATA22                 0x14d8
+#define RADEON_BRUSH_DATA23                 0x14dc
+#define RADEON_BRUSH_DATA24                 0x14e0
+#define RADEON_BRUSH_DATA25                 0x14e4
+#define RADEON_BRUSH_DATA26                 0x14e8
+#define RADEON_BRUSH_DATA27                 0x14ec
+#define RADEON_BRUSH_DATA28                 0x14f0
+#define RADEON_BRUSH_DATA29                 0x14f4
+#define RADEON_BRUSH_DATA3                  0x148c
+#define RADEON_BRUSH_DATA30                 0x14f8
+#define RADEON_BRUSH_DATA31                 0x14fc
+#define RADEON_BRUSH_DATA32                 0x1500
+#define RADEON_BRUSH_DATA33                 0x1504
+#define RADEON_BRUSH_DATA34                 0x1508
+#define RADEON_BRUSH_DATA35                 0x150c
+#define RADEON_BRUSH_DATA36                 0x1510
+#define RADEON_BRUSH_DATA37                 0x1514
+#define RADEON_BRUSH_DATA38                 0x1518
+#define RADEON_BRUSH_DATA39                 0x151c
+#define RADEON_BRUSH_DATA4                  0x1490
+#define RADEON_BRUSH_DATA40                 0x1520
+#define RADEON_BRUSH_DATA41                 0x1524
+#define RADEON_BRUSH_DATA42                 0x1528
+#define RADEON_BRUSH_DATA43                 0x152c
+#define RADEON_BRUSH_DATA44                 0x1530
+#define RADEON_BRUSH_DATA45                 0x1534
+#define RADEON_BRUSH_DATA46                 0x1538
+#define RADEON_BRUSH_DATA47                 0x153c
+#define RADEON_BRUSH_DATA48                 0x1540
+#define RADEON_BRUSH_DATA49                 0x1544
+#define RADEON_BRUSH_DATA5                  0x1494
+#define RADEON_BRUSH_DATA50                 0x1548
+#define RADEON_BRUSH_DATA51                 0x154c
+#define RADEON_BRUSH_DATA52                 0x1550
+#define RADEON_BRUSH_DATA53                 0x1554
+#define RADEON_BRUSH_DATA54                 0x1558
+#define RADEON_BRUSH_DATA55                 0x155c
+#define RADEON_BRUSH_DATA56                 0x1560
+#define RADEON_BRUSH_DATA57                 0x1564
+#define RADEON_BRUSH_DATA58                 0x1568
+#define RADEON_BRUSH_DATA59                 0x156c
+#define RADEON_BRUSH_DATA6                  0x1498
+#define RADEON_BRUSH_DATA60                 0x1570
+#define RADEON_BRUSH_DATA61                 0x1574
+#define RADEON_BRUSH_DATA62                 0x1578
+#define RADEON_BRUSH_DATA63                 0x157c
+#define RADEON_BRUSH_DATA7                  0x149c
+#define RADEON_BRUSH_DATA8                  0x14a0
+#define RADEON_BRUSH_DATA9                  0x14a4
+#define RADEON_BRUSH_SCALE                  0x1470
+#define RADEON_BRUSH_Y_X                    0x1474
+#define RADEON_BUS_CNTL                     0x0030
+#       define RADEON_BUS_MASTER_DIS         (1 << 6)
+#       define RADEON_BUS_RD_DISCARD_EN      (1 << 24)
+#       define RADEON_BUS_RD_ABORT_EN        (1 << 25)
+#       define RADEON_BUS_MSTR_DISCONNECT_EN (1 << 28)
+#       define RADEON_BUS_WRT_BURST          (1 << 29)
+#       define RADEON_BUS_READ_BURST         (1 << 30)
+#define RADEON_BUS_CNTL1                    0x0034
+#       define RADEON_BUS_WAIT_ON_LOCK_EN    (1 << 4)
+
+#define RADEON_CACHE_CNTL                   0x1724
+#define RADEON_CACHE_LINE                   0x0f0c /* PCI */
+#define RADEON_CAP0_TRIG_CNTL               0x0950 /* ? */
+#define RADEON_CAP1_TRIG_CNTL               0x09c0 /* ? */
+#define RADEON_CAPABILITIES_ID              0x0f50 /* PCI */
+#define RADEON_CAPABILITIES_PTR             0x0f34 /* PCI */
+#define RADEON_CLK_PIN_CNTL                 0x0001 /* PLL */
+#define RADEON_CLOCK_CNTL_DATA              0x000c
+#define RADEON_CLOCK_CNTL_INDEX             0x0008
+#       define RADEON_PLL_WR_EN             (1 << 7)
+#       define RADEON_PLL_DIV_SEL           (3 << 8)
+#       define RADEON_PLL2_DIV_SEL_MASK     ~(3 << 8)
+#define RADEON_CLR_CMP_CLR_3D               0x1a24
+#define RADEON_CLR_CMP_CLR_DST              0x15c8
+#define RADEON_CLR_CMP_CLR_SRC              0x15c4
+#define RADEON_CLR_CMP_CNTL                 0x15c0
+#       define RADEON_SRC_CMP_EQ_COLOR      (4 <<  0)
+#       define RADEON_SRC_CMP_NEQ_COLOR     (5 <<  0)
+#       define RADEON_CLR_CMP_SRC_SOURCE    (1 << 24)
+#define RADEON_CLR_CMP_MASK                 0x15cc
+#       define RADEON_CLR_CMP_MSK           0xffffffff
+#define RADEON_CLR_CMP_MASK_3D              0x1A28
+#define RADEON_COMMAND                      0x0f04 /* PCI */
+#define RADEON_COMPOSITE_SHADOW_ID          0x1a0c
+#define RADEON_CONFIG_APER_0_BASE           0x0100
+#define RADEON_CONFIG_APER_1_BASE           0x0104
+#define RADEON_CONFIG_APER_SIZE             0x0108
+#define RADEON_CONFIG_BONDS                 0x00e8
+#define RADEON_CONFIG_CNTL                  0x00e0
+#       define RADEON_CFG_ATI_REV_A11       (0   << 16)
+#       define RADEON_CFG_ATI_REV_A12       (1   << 16)
+#       define RADEON_CFG_ATI_REV_A13       (2   << 16)
+#       define RADEON_CFG_ATI_REV_ID_MASK   (0xf << 16)
+#define RADEON_CONFIG_MEMSIZE               0x00f8
+#define RADEON_CONFIG_MEMSIZE_EMBEDDED      0x0114
+#define RADEON_CONFIG_REG_1_BASE            0x010c
+#define RADEON_CONFIG_REG_APER_SIZE         0x0110
+#define RADEON_CONFIG_XSTRAP                0x00e4
+#define RADEON_CONSTANT_COLOR_C             0x1d34
+#       define RADEON_CONSTANT_COLOR_MASK   0x00ffffff
+#       define RADEON_CONSTANT_COLOR_ONE    0x00ffffff
+#       define RADEON_CONSTANT_COLOR_ZERO   0x00000000
+#define RADEON_CRC_CMDFIFO_ADDR             0x0740
+#define RADEON_CRC_CMDFIFO_DOUT             0x0744
+#define RADEON_GRPH_BUFFER_CNTL             0x02f0
+#       define RADEON_GRPH_START_REQ_MASK          (0x7f)
+#       define RADEON_GRPH_START_REQ_SHIFT         0
+#       define RADEON_GRPH_STOP_REQ_MASK           (0x7f<<8)
+#       define RADEON_GRPH_STOP_REQ_SHIFT          8
+#       define RADEON_GRPH_CRITICAL_POINT_MASK     (0x7f<<16)
+#       define RADEON_GRPH_CRITICAL_POINT_SHIFT    16
+#       define RADEON_GRPH_CRITICAL_CNTL           (1<<28)
+#       define RADEON_GRPH_BUFFER_SIZE             (1<<29)
+#       define RADEON_GRPH_CRITICAL_AT_SOF         (1<<30)
+#       define RADEON_GRPH_STOP_CNTL               (1<<31)
+#define RADEON_GRPH2_BUFFER_CNTL            0x03f0
+#       define RADEON_GRPH2_START_REQ_MASK         (0x7f)
+#       define RADEON_GRPH2_START_REQ_SHIFT         0
+#       define RADEON_GRPH2_STOP_REQ_MASK          (0x7f<<8)
+#       define RADEON_GRPH2_STOP_REQ_SHIFT         8
+#       define RADEON_GRPH2_CRITICAL_POINT_MASK    (0x7f<<16)
+#       define RADEON_GRPH2_CRITICAL_POINT_SHIFT   16
+#       define RADEON_GRPH2_CRITICAL_CNTL          (1<<28)
+#       define RADEON_GRPH2_BUFFER_SIZE            (1<<29)
+#       define RADEON_GRPH2_CRITICAL_AT_SOF        (1<<30)
+#       define RADEON_GRPH2_STOP_CNTL              (1<<31)
+#define RADEON_CRTC_CRNT_FRAME              0x0214
+#define RADEON_CRTC_EXT_CNTL                0x0054
+#       define RADEON_CRTC_VGA_XOVERSCAN    (1 <<  0)
+#       define RADEON_VGA_ATI_LINEAR        (1 <<  3)
+#       define RADEON_XCRT_CNT_EN           (1 <<  6)
+#       define RADEON_CRTC_HSYNC_DIS        (1 <<  8)
+#       define RADEON_CRTC_VSYNC_DIS        (1 <<  9)
+#       define RADEON_CRTC_DISPLAY_DIS      (1 << 10)
+#       define RADEON_CRTC_SYNC_TRISTAT     (1 << 11)
+#       define RADEON_CRTC_CRT_ON           (1 << 15)
+#define RADEON_CRTC_EXT_CNTL_DPMS_BYTE      0x0055
+#       define RADEON_CRTC_HSYNC_DIS_BYTE   (1 <<  0)
+#       define RADEON_CRTC_VSYNC_DIS_BYTE   (1 <<  1)
+#       define RADEON_CRTC_DISPLAY_DIS_BYTE (1 <<  2)
+#define RADEON_CRTC_GEN_CNTL                0x0050
+#       define RADEON_CRTC_DBL_SCAN_EN      (1 <<  0)
+#       define RADEON_CRTC_INTERLACE_EN     (1 <<  1)
+#       define RADEON_CRTC_CSYNC_EN         (1 <<  4)
+#       define RADEON_CRTC_CUR_EN           (1 << 16)
+#       define RADEON_CRTC_CUR_MODE_MASK    (7 << 17)
+#       define RADEON_CRTC_ICON_EN          (1 << 20)
+#       define RADEON_CRTC_EXT_DISP_EN      (1 << 24)
+#       define RADEON_CRTC_EN               (1 << 25)
+#       define RADEON_CRTC_DISP_REQ_EN_B    (1 << 26)
+#define RADEON_CRTC2_GEN_CNTL               0x03f8
+#       define RADEON_CRTC2_DBL_SCAN_EN     (1 <<  0)
+#       define RADEON_CRTC2_INTERLACE_EN    (1 <<  1)
+#       define RADEON_CRTC2_SYNC_TRISTAT    (1 <<  4)
+#       define RADEON_CRTC2_HSYNC_TRISTAT   (1 <<  5)
+#       define RADEON_CRTC2_VSYNC_TRISTAT   (1 <<  6)
+#       define RADEON_CRTC2_CRT2_ON         (1 <<  7)
+#       define RADEON_CRTC2_ICON_EN         (1 << 15)
+#       define RADEON_CRTC2_CUR_EN          (1 << 16)
+#       define RADEON_CRTC2_CUR_MODE_MASK   (7 << 20)
+#       define RADEON_CRTC2_DISP_DIS        (1 << 23)
+#       define RADEON_CRTC2_EN              (1 << 25)
+#       define RADEON_CRTC2_DISP_REQ_EN_B   (1 << 26)
+#       define RADEON_CRTC2_CSYNC_EN        (1 << 27)
+#       define RADEON_CRTC2_HSYNC_DIS       (1 << 28)
+#       define RADEON_CRTC2_VSYNC_DIS       (1 << 29)
+#define RADEON_CRTC_MORE_CNTL               0x27c
+#       define RADEON_CRTC_H_CUTOFF_ACTIVE_EN (1<<4)   
+#       define RADEON_CRTC_V_CUTOFF_ACTIVE_EN (1<<5)   
+#define RADEON_CRTC_GUI_TRIG_VLINE          0x0218
+#define RADEON_CRTC_H_SYNC_STRT_WID         0x0204
+#       define RADEON_CRTC_H_SYNC_STRT_PIX        (0x07  <<  0)
+#       define RADEON_CRTC_H_SYNC_STRT_CHAR       (0x3ff <<  3)
+#       define RADEON_CRTC_H_SYNC_STRT_CHAR_SHIFT 3
+#       define RADEON_CRTC_H_SYNC_WID             (0x3f  << 16)
+#       define RADEON_CRTC_H_SYNC_WID_SHIFT       16
+#       define RADEON_CRTC_H_SYNC_POL             (1     << 23)
+#define RADEON_CRTC2_H_SYNC_STRT_WID        0x0304
+#       define RADEON_CRTC2_H_SYNC_STRT_PIX        (0x07  <<  0)
+#       define RADEON_CRTC2_H_SYNC_STRT_CHAR       (0x3ff <<  3)
+#       define RADEON_CRTC2_H_SYNC_STRT_CHAR_SHIFT 3
+#       define RADEON_CRTC2_H_SYNC_WID             (0x3f  << 16)
+#       define RADEON_CRTC2_H_SYNC_WID_SHIFT       16
+#       define RADEON_CRTC2_H_SYNC_POL             (1     << 23)
+#define RADEON_CRTC_H_TOTAL_DISP            0x0200
+#       define RADEON_CRTC_H_TOTAL          (0x03ff << 0)
+#       define RADEON_CRTC_H_TOTAL_SHIFT    0
+#       define RADEON_CRTC_H_DISP           (0x01ff << 16)
+#       define RADEON_CRTC_H_DISP_SHIFT     16
+#define RADEON_CRTC2_H_TOTAL_DISP           0x0300
+#       define RADEON_CRTC2_H_TOTAL         (0x03ff << 0)
+#       define RADEON_CRTC2_H_TOTAL_SHIFT   0
+#       define RADEON_CRTC2_H_DISP          (0x01ff << 16)
+#       define RADEON_CRTC2_H_DISP_SHIFT    16
+#define RADEON_CRTC_OFFSET                  0x0224
+#define RADEON_CRTC2_OFFSET                 0x0324
+#define RADEON_CRTC_OFFSET_CNTL             0x0228
+#       define RADEON_CRTC_TILE_EN          (1 << 15)
+#define RADEON_CRTC2_OFFSET_CNTL            0x0328
+#       define RADEON_CRTC2_TILE_EN         (1 << 15)
+#define RADEON_CRTC_PITCH                   0x022c
+#define RADEON_CRTC2_PITCH                  0x032c
+#define RADEON_CRTC_STATUS                  0x005c
+#       define RADEON_CRTC_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC_VBLANK_SAVE_CLEAR  (1 <<  1)
+#define RADEON_CRTC2_STATUS                  0x03fc
+#       define RADEON_CRTC2_VBLANK_SAVE      (1 <<  1)
+#       define RADEON_CRTC2_VBLANK_SAVE_CLEAR  (1 <<  1)
+#define RADEON_CRTC_V_SYNC_STRT_WID         0x020c
+#       define RADEON_CRTC_V_SYNC_STRT        (0x7ff <<  0)
+#       define RADEON_CRTC_V_SYNC_STRT_SHIFT  0
+#       define RADEON_CRTC_V_SYNC_WID         (0x1f  << 16)
+#       define RADEON_CRTC_V_SYNC_WID_SHIFT   16
+#       define RADEON_CRTC_V_SYNC_POL         (1     << 23)
+#define RADEON_CRTC2_V_SYNC_STRT_WID        0x030c
+#       define RADEON_CRTC2_V_SYNC_STRT       (0x7ff <<  0)
+#       define RADEON_CRTC2_V_SYNC_STRT_SHIFT 0
+#       define RADEON_CRTC2_V_SYNC_WID        (0x1f  << 16)
+#       define RADEON_CRTC2_V_SYNC_WID_SHIFT  16
+#       define RADEON_CRTC2_V_SYNC_POL        (1     << 23)
+#define RADEON_CRTC_V_TOTAL_DISP            0x0208
+#       define RADEON_CRTC_V_TOTAL          (0x07ff << 0)
+#       define RADEON_CRTC_V_TOTAL_SHIFT    0
+#       define RADEON_CRTC_V_DISP           (0x07ff << 16)
+#       define RADEON_CRTC_V_DISP_SHIFT     16
+#define RADEON_CRTC2_V_TOTAL_DISP           0x0308
+#       define RADEON_CRTC2_V_TOTAL         (0x07ff << 0)
+#       define RADEON_CRTC2_V_TOTAL_SHIFT   0
+#       define RADEON_CRTC2_V_DISP          (0x07ff << 16)
+#       define RADEON_CRTC2_V_DISP_SHIFT    16
+#define RADEON_CRTC_VLINE_CRNT_VLINE        0x0210
+#       define RADEON_CRTC_CRNT_VLINE_MASK  (0x7ff << 16)
+#define RADEON_CRTC2_CRNT_FRAME             0x0314
+#define RADEON_CRTC2_GUI_TRIG_VLINE         0x0318
+#define RADEON_CRTC2_STATUS                 0x03fc
+#define RADEON_CRTC2_VLINE_CRNT_VLINE       0x0310
+#define RADEON_CRTC8_DATA                   0x03d5 /* VGA, 0x3b5 */
+#define RADEON_CRTC8_IDX                    0x03d4 /* VGA, 0x3b4 */
+#define RADEON_CUR_CLR0                     0x026c
+#define RADEON_CUR_CLR1                     0x0270
+#define RADEON_CUR_HORZ_VERT_OFF            0x0268
+#define RADEON_CUR_HORZ_VERT_POSN           0x0264
+#define RADEON_CUR_OFFSET                   0x0260
+#       define RADEON_CUR_LOCK              (1 << 31)
+#define RADEON_CUR2_CLR0                    0x036c
+#define RADEON_CUR2_CLR1                    0x0370
+#define RADEON_CUR2_HORZ_VERT_OFF           0x0368
+#define RADEON_CUR2_HORZ_VERT_POSN          0x0364
+#define RADEON_CUR2_OFFSET                  0x0360
+#       define RADEON_CUR2_LOCK             (1 << 31)
+
+#define RADEON_DAC_CNTL                     0x0058
+#       define RADEON_DAC_RANGE_CNTL        (3 <<  0)
+#       define RADEON_DAC_RANGE_CNTL_MASK   0x03
+#       define RADEON_DAC_BLANKING          (1 <<  2)
+#       define RADEON_DAC_CMP_EN            (1 <<  3)
+#       define RADEON_DAC_CMP_OUTPUT        (1 <<  7)
+#       define RADEON_DAC_8BIT_EN           (1 <<  8)
+#       define RADEON_DAC_VGA_ADR_EN        (1 << 13)
+#       define RADEON_DAC_PDWN              (1 << 15)
+#       define RADEON_DAC_MASK_ALL          (0xff << 24)
+#define RADEON_DAC_CNTL2                    0x007c
+#       define RADEON_DAC2_DAC_CLK_SEL      (1 <<  0)
+#       define RADEON_DAC2_DAC2_CLK_SEL     (1 <<  1)
+#       define RADEON_DAC2_PALETTE_ACC_CTL  (1 <<  5)
+#define RADEON_DAC_EXT_CNTL                 0x0280
+#       define RADEON_DAC_FORCE_BLANK_OFF_EN (1 << 4)
+#       define RADEON_DAC_FORCE_DATA_EN      (1 << 5)
+#       define RADEON_DAC_FORCE_DATA_SEL_MASK (3 << 6)
+#       define RADEON_DAC_FORCE_DATA_MASK   0x0003ff00
+#       define RADEON_DAC_FORCE_DATA_SHIFT  8
+#define RADEON_TV_DAC_CNTL                  0x088c
+#       define RADEON_TV_DAC_STD_MASK       0x0300
+#       define RADEON_TV_DAC_RDACPD         (1 <<  24)
+#       define RADEON_TV_DAC_GDACPD         (1 <<  25)
+#       define RADEON_TV_DAC_BDACPD         (1 <<  26)
+#define RADEON_DISP_HW_DEBUG                0x0d14
+#       define RADEON_CRT2_DISP1_SEL        (1 <<  5)
+#define RADEON_DISP_OUTPUT_CNTL             0x0d64
+#       define RADEON_DISP_DAC_SOURCE_MASK  0x03
+#       define RADEON_DISP_DAC2_SOURCE_MASK  0x0c
+#       define RADEON_DISP_DAC_SOURCE_CRTC2 0x01
+#       define RADEON_DISP_DAC2_SOURCE_CRTC2 0x04
+#define RADEON_DAC_CRC_SIG                  0x02cc
+#define RADEON_DAC_DATA                     0x03c9 /* VGA */
+#define RADEON_DAC_MASK                     0x03c6 /* VGA */
+#define RADEON_DAC_R_INDEX                  0x03c7 /* VGA */
+#define RADEON_DAC_W_INDEX                  0x03c8 /* VGA */
+#define RADEON_DDA_CONFIG                   0x02e0
+#define RADEON_DDA_ON_OFF                   0x02e4
+#define RADEON_DEFAULT_OFFSET               0x16e0
+#define RADEON_DEFAULT_PITCH                0x16e4
+#define RADEON_DEFAULT_SC_BOTTOM_RIGHT      0x16e8
+#       define RADEON_DEFAULT_SC_RIGHT_MAX  (0x1fff <<  0)
+#       define RADEON_DEFAULT_SC_BOTTOM_MAX (0x1fff << 16)
+#define RADEON_DESTINATION_3D_CLR_CMP_VAL   0x1820
+#define RADEON_DESTINATION_3D_CLR_CMP_MSK   0x1824
+#define RADEON_DEVICE_ID                    0x0f02 /* PCI */
+#define RADEON_DISP_MISC_CNTL               0x0d00
+#       define RADEON_SOFT_RESET_GRPH_PP    (1 << 0)
+#define RADEON_DISP_MERGE_CNTL	          0x0d60
+#       define RADEON_DISP_ALPHA_MODE_MASK  0x03
+#       define RADEON_DISP_ALPHA_MODE_KEY   0
+#       define RADEON_DISP_ALPHA_MODE_PER_PIXEL 1
+#       define RADEON_DISP_ALPHA_MODE_GLOBAL 2
+#       define RADEON_DISP_RGB_OFFSET_EN    (1<<8)
+#       define RADEON_DISP_GRPH_ALPHA_MASK  (0xff << 16)
+#       define RADEON_DISP_OV0_ALPHA_MASK   (0xff << 24)
+#	define RADEON_DISP_LIN_TRANS_BYPASS (0x01 << 9)
+#define RADEON_DISP2_MERGE_CNTL	            0x0d68
+#       define RADEON_DISP2_RGB_OFFSET_EN   (1<<8)
+#define RADEON_DISP_LIN_TRANS_GRPH_A        0x0d80
+#define RADEON_DISP_LIN_TRANS_GRPH_B        0x0d84
+#define RADEON_DISP_LIN_TRANS_GRPH_C        0x0d88
+#define RADEON_DISP_LIN_TRANS_GRPH_D        0x0d8c
+#define RADEON_DISP_LIN_TRANS_GRPH_E        0x0d90
+#define RADEON_DISP_LIN_TRANS_GRPH_F        0x0d98
+#define RADEON_DP_BRUSH_BKGD_CLR            0x1478
+#define RADEON_DP_BRUSH_FRGD_CLR            0x147c
+#define RADEON_DP_CNTL                      0x16c0
+#       define RADEON_DST_X_LEFT_TO_RIGHT   (1 <<  0)
+#       define RADEON_DST_Y_TOP_TO_BOTTOM   (1 <<  1)
+#define RADEON_DP_CNTL_XDIR_YDIR_YMAJOR     0x16d0
+#       define RADEON_DST_Y_MAJOR             (1 <<  2)
+#       define RADEON_DST_Y_DIR_TOP_TO_BOTTOM (1 << 15)
+#       define RADEON_DST_X_DIR_LEFT_TO_RIGHT (1 << 31)
+#define RADEON_DP_DATATYPE                  0x16c4
+#       define RADEON_HOST_BIG_ENDIAN_EN    (1 << 29)
+#define RADEON_DP_GUI_MASTER_CNTL           0x146c
+#       define RADEON_GMC_SRC_PITCH_OFFSET_CNTL   (1    <<  0)
+#       define RADEON_GMC_DST_PITCH_OFFSET_CNTL   (1    <<  1)
+#       define RADEON_GMC_SRC_CLIPPING            (1    <<  2)
+#       define RADEON_GMC_DST_CLIPPING            (1    <<  3)
+#       define RADEON_GMC_BRUSH_DATATYPE_MASK     (0x0f <<  4)
+#       define RADEON_GMC_BRUSH_8X8_MONO_FG_BG    (0    <<  4)
+#       define RADEON_GMC_BRUSH_8X8_MONO_FG_LA    (1    <<  4)
+#       define RADEON_GMC_BRUSH_1X8_MONO_FG_BG    (4    <<  4)
+#       define RADEON_GMC_BRUSH_1X8_MONO_FG_LA    (5    <<  4)
+#       define RADEON_GMC_BRUSH_32x1_MONO_FG_BG   (6    <<  4)
+#       define RADEON_GMC_BRUSH_32x1_MONO_FG_LA   (7    <<  4)
+#       define RADEON_GMC_BRUSH_32x32_MONO_FG_BG  (8    <<  4)
+#       define RADEON_GMC_BRUSH_32x32_MONO_FG_LA  (9    <<  4)
+#       define RADEON_GMC_BRUSH_8x8_COLOR         (10   <<  4)
+#       define RADEON_GMC_BRUSH_1X8_COLOR         (12   <<  4)
+#       define RADEON_GMC_BRUSH_SOLID_COLOR       (13   <<  4)
+#       define RADEON_GMC_BRUSH_NONE              (15   <<  4)
+#       define RADEON_GMC_DST_8BPP_CI             (2    <<  8)
+#       define RADEON_GMC_DST_15BPP               (3    <<  8)
+#       define RADEON_GMC_DST_16BPP               (4    <<  8)
+#       define RADEON_GMC_DST_24BPP               (5    <<  8)
+#       define RADEON_GMC_DST_32BPP               (6    <<  8)
+#       define RADEON_GMC_DST_8BPP_RGB            (7    <<  8)
+#       define RADEON_GMC_DST_Y8                  (8    <<  8)
+#       define RADEON_GMC_DST_RGB8                (9    <<  8)
+#       define RADEON_GMC_DST_VYUY                (11   <<  8)
+#       define RADEON_GMC_DST_YVYU                (12   <<  8)
+#       define RADEON_GMC_DST_AYUV444             (14   <<  8)
+#       define RADEON_GMC_DST_ARGB4444            (15   <<  8)
+#       define RADEON_GMC_DST_DATATYPE_MASK       (0x0f <<  8)
+#       define RADEON_GMC_DST_DATATYPE_SHIFT      8
+#       define RADEON_GMC_SRC_DATATYPE_MASK       (3    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_MONO_FG_BG (0    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_MONO_FG_LA (1    << 12)
+#       define RADEON_GMC_SRC_DATATYPE_COLOR      (3    << 12)
+#       define RADEON_GMC_BYTE_PIX_ORDER          (1    << 14)
+#       define RADEON_GMC_BYTE_MSB_TO_LSB         (0    << 14)
+#       define RADEON_GMC_BYTE_LSB_TO_MSB         (1    << 14)
+#       define RADEON_GMC_CONVERSION_TEMP         (1    << 15)
+#       define RADEON_GMC_CONVERSION_TEMP_6500    (0    << 15)
+#       define RADEON_GMC_CONVERSION_TEMP_9300    (1    << 15)
+#       define RADEON_GMC_ROP3_MASK               (0xff << 16)
+#       define RADEON_DP_SRC_SOURCE_MASK          (7    << 24)
+#       define RADEON_DP_SRC_SOURCE_MEMORY        (2    << 24)
+#       define RADEON_DP_SRC_SOURCE_HOST_DATA     (3    << 24)
+#       define RADEON_GMC_3D_FCN_EN               (1    << 27)
+#       define RADEON_GMC_CLR_CMP_CNTL_DIS        (1    << 28)
+#       define RADEON_GMC_AUX_CLIP_DIS            (1    << 29)
+#       define RADEON_GMC_WR_MSK_DIS              (1    << 30)
+#       define RADEON_GMC_LD_BRUSH_Y_X            (1    << 31)
+#       define RADEON_ROP3_ZERO             0x00000000
+#       define RADEON_ROP3_DSa              0x00880000
+#       define RADEON_ROP3_SDna             0x00440000
+#       define RADEON_ROP3_S                0x00cc0000
+#       define RADEON_ROP3_DSna             0x00220000
+#       define RADEON_ROP3_D                0x00aa0000
+#       define RADEON_ROP3_DSx              0x00660000
+#       define RADEON_ROP3_DSo              0x00ee0000
+#       define RADEON_ROP3_DSon             0x00110000
+#       define RADEON_ROP3_DSxn             0x00990000
+#       define RADEON_ROP3_Dn               0x00550000
+#       define RADEON_ROP3_SDno             0x00dd0000
+#       define RADEON_ROP3_Sn               0x00330000
+#       define RADEON_ROP3_DSno             0x00bb0000
+#       define RADEON_ROP3_DSan             0x00770000
+#       define RADEON_ROP3_ONE              0x00ff0000
+#       define RADEON_ROP3_DPa              0x00a00000
+#       define RADEON_ROP3_PDna             0x00500000
+#       define RADEON_ROP3_P                0x00f00000
+#       define RADEON_ROP3_DPna             0x000a0000
+#       define RADEON_ROP3_D                0x00aa0000
+#       define RADEON_ROP3_DPx              0x005a0000
+#       define RADEON_ROP3_DPo              0x00fa0000
+#       define RADEON_ROP3_DPon             0x00050000
+#       define RADEON_ROP3_PDxn             0x00a50000
+#       define RADEON_ROP3_PDno             0x00f50000
+#       define RADEON_ROP3_Pn               0x000f0000
+#       define RADEON_ROP3_DPno             0x00af0000
+#       define RADEON_ROP3_DPan             0x005f0000
+#define RADEON_DP_GUI_MASTER_CNTL_C         0x1c84
+#define RADEON_DP_MIX                       0x16c8
+#define RADEON_DP_SRC_BKGD_CLR              0x15dc
+#define RADEON_DP_SRC_FRGD_CLR              0x15d8
+#define RADEON_DP_WRITE_MASK                0x16cc
+#define RADEON_DST_BRES_DEC                 0x1630
+#define RADEON_DST_BRES_ERR                 0x1628
+#define RADEON_DST_BRES_INC                 0x162c
+#define RADEON_DST_BRES_LNTH                0x1634
+#define RADEON_DST_BRES_LNTH_SUB            0x1638
+#define RADEON_DST_HEIGHT                   0x1410
+#define RADEON_DST_HEIGHT_WIDTH             0x143c
+#define RADEON_DST_HEIGHT_WIDTH_8           0x158c
+#define RADEON_DST_HEIGHT_WIDTH_BW          0x15b4
+#define RADEON_DST_HEIGHT_Y                 0x15a0
+#define RADEON_DST_LINE_START               0x1600
+#define RADEON_DST_LINE_END                 0x1604
+#define RADEON_DST_LINE_PATCOUNT            0x1608
+#       define RADEON_BRES_CNTL_SHIFT       8
+#define RADEON_DST_OFFSET                   0x1404
+#define RADEON_DST_PITCH                    0x1408
+#define RADEON_DST_PITCH_OFFSET             0x142c
+#define RADEON_DST_PITCH_OFFSET_C           0x1c80
+#       define RADEON_PITCH_SHIFT           21
+#       define RADEON_DST_TILE_LINEAR       (0 << 30)
+#       define RADEON_DST_TILE_MACRO        (1 << 30)
+#       define RADEON_DST_TILE_MICRO        (2 << 30)
+#       define RADEON_DST_TILE_BOTH         (3 << 30)
+#define RADEON_DST_WIDTH                    0x140c
+#define RADEON_DST_WIDTH_HEIGHT             0x1598
+#define RADEON_DST_WIDTH_X                  0x1588
+#define RADEON_DST_WIDTH_X_INCY             0x159c
+#define RADEON_DST_X                        0x141c
+#define RADEON_DST_X_SUB                    0x15a4
+#define RADEON_DST_X_Y                      0x1594
+#define RADEON_DST_Y                        0x1420
+#define RADEON_DST_Y_SUB                    0x15a8
+#define RADEON_DST_Y_X                      0x1438
+
+#define RADEON_FCP_CNTL                     0x0910
+#      define RADEON_FCP0_SRC_PCICLK             0
+#      define RADEON_FCP0_SRC_PCLK               1
+#      define RADEON_FCP0_SRC_PCLKb              2
+#      define RADEON_FCP0_SRC_HREF               3
+#      define RADEON_FCP0_SRC_GND                4
+#      define RADEON_FCP0_SRC_HREFb              5
+#define RADEON_FLUSH_1                      0x1704
+#define RADEON_FLUSH_2                      0x1708
+#define RADEON_FLUSH_3                      0x170c
+#define RADEON_FLUSH_4                      0x1710
+#define RADEON_FLUSH_5                      0x1714
+#define RADEON_FLUSH_6                      0x1718
+#define RADEON_FLUSH_7                      0x171c
+#define RADEON_FOG_3D_TABLE_START           0x1810
+#define RADEON_FOG_3D_TABLE_END             0x1814
+#define RADEON_FOG_3D_TABLE_DENSITY         0x181c
+#define RADEON_FOG_TABLE_INDEX              0x1a14
+#define RADEON_FOG_TABLE_DATA               0x1a18
+#define RADEON_FP_CRTC_H_TOTAL_DISP         0x0250
+#define RADEON_FP_CRTC_V_TOTAL_DISP         0x0254
+#define RADEON_FP_CRTC2_H_TOTAL_DISP        0x0350
+#define RADEON_FP_CRTC2_V_TOTAL_DISP        0x0354
+#       define RADEON_FP_CRTC_H_TOTAL_MASK      0x000003ff
+#       define RADEON_FP_CRTC_H_DISP_MASK       0x01ff0000
+#       define RADEON_FP_CRTC_V_TOTAL_MASK      0x00000fff
+#       define RADEON_FP_CRTC_V_DISP_MASK       0x0fff0000
+#       define RADEON_FP_H_SYNC_STRT_CHAR_MASK  0x00001ff8
+#       define RADEON_FP_H_SYNC_WID_MASK        0x003f0000
+#       define RADEON_FP_V_SYNC_STRT_MASK       0x00000fff
+#       define RADEON_FP_V_SYNC_WID_MASK        0x001f0000
+#       define RADEON_FP_CRTC_H_TOTAL_SHIFT     0x00000000
+#       define RADEON_FP_CRTC_H_DISP_SHIFT      0x00000010
+#       define RADEON_FP_CRTC_V_TOTAL_SHIFT     0x00000000
+#       define RADEON_FP_CRTC_V_DISP_SHIFT      0x00000010
+#       define RADEON_FP_H_SYNC_STRT_CHAR_SHIFT 0x00000003
+#       define RADEON_FP_H_SYNC_WID_SHIFT       0x00000010
+#       define RADEON_FP_V_SYNC_STRT_SHIFT      0x00000000
+#       define RADEON_FP_V_SYNC_WID_SHIFT       0x00000010
+#define RADEON_FP_GEN_CNTL                  0x0284
+#       define RADEON_FP_FPON                  (1 <<  0)
+#       define RADEON_FP_TMDS_EN               (1 <<  2)
+#       define RADEON_FP_PANEL_FORMAT          (1 <<  3)
+#       define RADEON_FP_EN_TMDS               (1 <<  7)
+#       define RADEON_FP_DETECT_SENSE          (1 <<  8)
+#       define RADEON_FP_SEL_CRTC2             (1 << 13)
+#       define RADEON_FP_CRTC_DONT_SHADOW_HPAR (1 << 15)
+#       define RADEON_FP_CRTC_DONT_SHADOW_VPAR (1 << 16)
+#       define RADEON_FP_CRTC_DONT_SHADOW_HEND (1 << 17)
+#       define RADEON_FP_CRTC_USE_SHADOW_VEND  (1 << 18)
+#       define RADEON_FP_RMX_HVSYNC_CONTROL_EN (1 << 20)
+#       define RADEON_FP_DFP_SYNC_SEL          (1 << 21)
+#       define RADEON_FP_CRTC_LOCK_8DOT        (1 << 22)
+#       define RADEON_FP_CRT_SYNC_SEL          (1 << 23)
+#       define RADEON_FP_USE_SHADOW_EN         (1 << 24)
+#       define RADEON_FP_CRT_SYNC_ALT          (1 << 26)
+#define RADEON_FP2_GEN_CNTL                 0x0288
+#       define RADEON_FP2_BLANK_EN             (1 <<  1)
+#       define RADEON_FP2_ON                   (1 <<  2)
+#       define RADEON_FP2_PANEL_FORMAT         (1 <<  3)
+#       define RADEON_FP2_SOURCE_SEL_MASK      (3 << 10)
+#       define RADEON_FP2_SOURCE_SEL_CRTC2     (1 << 10)
+#       define RADEON_FP2_SRC_SEL_MASK         (3 << 13)
+#       define RADEON_FP2_SRC_SEL_CRTC2        (1 << 13)
+#       define RADEON_FP2_FP_POL               (1 << 16)
+#       define RADEON_FP2_LP_POL               (1 << 17)
+#       define RADEON_FP2_SCK_POL              (1 << 18)
+#       define RADEON_FP2_LCD_CNTL_MASK        (7 << 19)
+#       define RADEON_FP2_PAD_FLOP_EN          (1 << 22)
+#       define RADEON_FP2_CRC_EN               (1 << 23)
+#       define RADEON_FP2_CRC_READ_EN          (1 << 24)
+#       define RADEON_FP2_DV0_EN               (1 << 25)
+#       define RADEON_FP2_DV0_RATE_SEL_SDR     (1 << 26)
+#define RADEON_FP_H_SYNC_STRT_WID           0x02c4
+#define RADEON_FP_H2_SYNC_STRT_WID          0x03c4
+#define RADEON_FP_HORZ_STRETCH              0x028c
+#define RADEON_FP_HORZ2_STRETCH             0x038c
+#       define RADEON_HORZ_STRETCH_RATIO_MASK 0xffff
+#       define RADEON_HORZ_STRETCH_RATIO_MAX  4096
+#       define RADEON_HORZ_PANEL_SIZE         (0x1ff   << 16)
+#       define RADEON_HORZ_PANEL_SHIFT        16
+#       define RADEON_HORZ_STRETCH_PIXREP     (0      << 25)
+#       define RADEON_HORZ_STRETCH_BLEND      (1      << 26)
+#       define RADEON_HORZ_STRETCH_ENABLE     (1      << 25)
+#       define RADEON_HORZ_AUTO_RATIO         (1      << 27)
+#       define RADEON_HORZ_FP_LOOP_STRETCH    (0x7    << 28)
+#       define RADEON_HORZ_AUTO_RATIO_INC     (1      << 31)
+#define RADEON_FP_V_SYNC_STRT_WID           0x02c8
+#define RADEON_FP_VERT_STRETCH              0x0290
+#define RADEON_FP_V2_SYNC_STRT_WID          0x03c8
+#define RADEON_FP_VERT2_STRETCH             0x0390
+#       define RADEON_VERT_PANEL_SIZE          (0xfff << 12)
+#       define RADEON_VERT_PANEL_SHIFT         12
+#       define RADEON_VERT_STRETCH_RATIO_MASK  0xfff
+#       define RADEON_VERT_STRETCH_RATIO_SHIFT 0
+#       define RADEON_VERT_STRETCH_RATIO_MAX   4096
+#       define RADEON_VERT_STRETCH_ENABLE      (1     << 25)
+#       define RADEON_VERT_STRETCH_LINEREP     (0     << 26)
+#       define RADEON_VERT_STRETCH_BLEND       (1     << 26)
+#       define RADEON_VERT_AUTO_RATIO_EN       (1     << 27)
+#       define RADEON_VERT_STRETCH_RESERVED    0xf1000000
+
+#define RADEON_GEN_INT_CNTL                 0x0040
+#define RADEON_GEN_INT_STATUS               0x0044
+#       define RADEON_VSYNC_INT_AK          (1 <<  2)
+#       define RADEON_VSYNC_INT             (1 <<  2)
+#       define RADEON_VSYNC2_INT_AK         (1 <<  6)
+#       define RADEON_VSYNC2_INT            (1 <<  6)
+#define RADEON_GENENB                       0x03c3 /* VGA */
+#define RADEON_GENFC_RD                     0x03ca /* VGA */
+#define RADEON_GENFC_WT                     0x03da /* VGA, 0x03ba */
+#define RADEON_GENMO_RD                     0x03cc /* VGA */
+#define RADEON_GENMO_WT                     0x03c2 /* VGA */
+#define RADEON_GENS0                        0x03c2 /* VGA */
+#define RADEON_GENS1                        0x03da /* VGA, 0x03ba */
+#define RADEON_GPIO_MONID                   0x0068 /* DDC interface via I2C */
+#define RADEON_GPIO_MONIDB                  0x006c
+#define RADEON_GPIO_CRT2_DDC                0x006c
+#define RADEON_GPIO_DVI_DDC                 0x0064
+#define RADEON_GPIO_VGA_DDC                 0x0060
+#       define RADEON_GPIO_A_0              (1 <<  0)
+#       define RADEON_GPIO_A_1              (1 <<  1)
+#       define RADEON_GPIO_Y_0              (1 <<  8)
+#       define RADEON_GPIO_Y_1              (1 <<  9)
+#       define RADEON_GPIO_Y_SHIFT_0        8
+#       define RADEON_GPIO_Y_SHIFT_1        9
+#       define RADEON_GPIO_EN_0             (1 << 16)
+#       define RADEON_GPIO_EN_1             (1 << 17)
+#       define RADEON_GPIO_MASK_0           (1 << 24) /*??*/
+#       define RADEON_GPIO_MASK_1           (1 << 25) /*??*/
+#define RADEON_GRPH8_DATA                   0x03cf /* VGA */
+#define RADEON_GRPH8_IDX                    0x03ce /* VGA */
+#define RADEON_GUI_SCRATCH_REG0             0x15e0
+#define RADEON_GUI_SCRATCH_REG1             0x15e4
+#define RADEON_GUI_SCRATCH_REG2             0x15e8
+#define RADEON_GUI_SCRATCH_REG3             0x15ec
+#define RADEON_GUI_SCRATCH_REG4             0x15f0
+#define RADEON_GUI_SCRATCH_REG5             0x15f4
+
+#define RADEON_HEADER                       0x0f0e /* PCI */
+#define RADEON_HOST_DATA0                   0x17c0
+#define RADEON_HOST_DATA1                   0x17c4
+#define RADEON_HOST_DATA2                   0x17c8
+#define RADEON_HOST_DATA3                   0x17cc
+#define RADEON_HOST_DATA4                   0x17d0
+#define RADEON_HOST_DATA5                   0x17d4
+#define RADEON_HOST_DATA6                   0x17d8
+#define RADEON_HOST_DATA7                   0x17dc
+#define RADEON_HOST_DATA_LAST               0x17e0
+#define RADEON_HOST_PATH_CNTL               0x0130
+#       define RADEON_HDP_SOFT_RESET        (1 << 26)
+#define RADEON_HTOTAL_CNTL                  0x0009 /* PLL */
+#define RADEON_HTOTAL2_CNTL                 0x002e /* PLL */
+
+#define RADEON_I2C_CNTL_1                   0x0094 /* ? */
+#define RADEON_DVI_I2C_CNTL_1               0x02e4 /* ? */
+#define RADEON_INTERRUPT_LINE               0x0f3c /* PCI */
+#define RADEON_INTERRUPT_PIN                0x0f3d /* PCI */
+#define RADEON_IO_BASE                      0x0f14 /* PCI */
+
+#define RADEON_LATENCY                      0x0f0d /* PCI */
+#define RADEON_LEAD_BRES_DEC                0x1608
+#define RADEON_LEAD_BRES_LNTH               0x161c
+#define RADEON_LEAD_BRES_LNTH_SUB           0x1624
+#define RADEON_LVDS_GEN_CNTL                0x02d0
+#       define RADEON_LVDS_ON               (1   <<  0)
+#       define RADEON_LVDS_DISPLAY_DIS      (1   <<  1)
+#       define RADEON_LVDS_PANEL_TYPE       (1   <<  2)
+#       define RADEON_LVDS_PANEL_FORMAT     (1   <<  3)
+#       define RADEON_LVDS_EN               (1   <<  7)
+#       define RADEON_LVDS_DIGON            (1   << 18)
+#       define RADEON_LVDS_BLON             (1   << 19)
+#       define RADEON_LVDS_SEL_CRTC2        (1   << 23)
+#define RADEON_LVDS_PLL_CNTL                0x02d4
+#       define RADEON_HSYNC_DELAY_SHIFT     28
+#       define RADEON_HSYNC_DELAY_MASK      (0xf << 28)
+
+#define RADEON_MAX_LATENCY                  0x0f3f /* PCI */
+#define RADEON_MC_AGP_LOCATION              0x014c
+#define RADEON_MC_FB_LOCATION               0x0148
+#define RADEON_DISPLAY_BASE_ADDR            0x23c
+#define RADEON_DISPLAY2_BASE_ADDR           0x33c
+#define RADEON_OV0_BASE_ADDR                0x43c
+#define RADEON_NB_TOM                       0x15c
+#define RADEON_MCLK_CNTL                    0x0012 /* PLL */
+#       define RADEON_FORCEON_MCLKA         (1 << 16)
+#       define RADEON_FORCEON_MCLKB         (1 << 17)
+#       define RADEON_FORCEON_YCLKA         (1 << 18)
+#       define RADEON_FORCEON_YCLKB         (1 << 19)
+#       define RADEON_FORCEON_MC            (1 << 20)
+#       define RADEON_FORCEON_AIC           (1 << 21)
+#define RADEON_MDGPIO_A_REG                 0x01ac
+#define RADEON_MDGPIO_EN_REG                0x01b0
+#define RADEON_MDGPIO_MASK                  0x0198
+#define RADEON_MDGPIO_Y_REG                 0x01b4
+#define RADEON_MEM_ADDR_CONFIG              0x0148
+#define RADEON_MEM_BASE                     0x0f10 /* PCI */
+#define RADEON_MEM_CNTL                     0x0140
+#       define RADEON_MEM_NUM_CHANNELS_MASK 0x01
+#       define RADEON_MEM_USE_B_CH_ONLY     (1<<1)
+#       define RV100_HALF_MODE              (1<<3)
+#       define R300_MEM_NUM_CHANNELS_MASK   0x03
+#       define R300_MEM_USE_CD_CH_ONLY      (1<<2)
+#define RADEON_MEM_TIMING_CNTL              0x0144 /* EXT_MEM_CNTL */
+#define RADEON_MEM_INIT_LAT_TIMER           0x0154
+#define RADEON_MEM_INTF_CNTL                0x014c
+#define RADEON_MEM_SDRAM_MODE_REG           0x0158
+#define RADEON_MEM_STR_CNTL                 0x0150
+#define RADEON_MEM_VGA_RP_SEL               0x003c
+#define RADEON_MEM_VGA_WP_SEL               0x0038
+#define RADEON_MIN_GRANT                    0x0f3e /* PCI */
+#define RADEON_MM_DATA                      0x0004
+#define RADEON_MM_INDEX                     0x0000
+#define RADEON_MPLL_CNTL                    0x000e /* PLL */
+#define RADEON_MPP_TB_CONFIG                0x01c0 /* ? */
+#define RADEON_MPP_GP_CONFIG                0x01c8 /* ? */
+#define R300_MC_IND_INDEX                   0x01f8
+#       define R300_MC_IND_ADDR_MASK        0x3f
+#define R300_MC_IND_DATA                    0x01fc
+#define R300_MC_READ_CNTL_AB                0x017c
+#       define R300_MEM_RBS_POSITION_A_MASK 0x03
+#define R300_MC_READ_CNTL_CD_mcind	    0x24
+#       define R300_MEM_RBS_POSITION_C_MASK 0x03
+
+#define RADEON_N_VIF_COUNT                  0x0248
+
+#define RADEON_OV0_AUTO_FLIP_CNTL           0x0470
+#define RADEON_OV0_COLOUR_CNTL              0x04E0
+#define RADEON_OV0_DEINTERLACE_PATTERN      0x0474
+#define RADEON_OV0_EXCLUSIVE_HORZ           0x0408
+#       define  RADEON_EXCL_HORZ_START_MASK        0x000000ff
+#       define  RADEON_EXCL_HORZ_END_MASK          0x0000ff00
+#       define  RADEON_EXCL_HORZ_BACK_PORCH_MASK   0x00ff0000
+#       define  RADEON_EXCL_HORZ_EXCLUSIVE_EN      0x80000000
+#define RADEON_OV0_EXCLUSIVE_VERT           0x040C
+#       define  RADEON_EXCL_VERT_START_MASK        0x000003ff
+#       define  RADEON_EXCL_VERT_END_MASK          0x03ff0000
+#define RADEON_OV0_FILTER_CNTL              0x04A0
+#define RADEON_OV0_FOUR_TAP_COEF_0          0x04B0
+#define RADEON_OV0_FOUR_TAP_COEF_1          0x04B4
+#define RADEON_OV0_FOUR_TAP_COEF_2          0x04B8
+#define RADEON_OV0_FOUR_TAP_COEF_3          0x04BC
+#define RADEON_OV0_FOUR_TAP_COEF_4          0x04C0
+#define RADEON_OV0_GAMMA_000_00F            0x0d40
+#define RADEON_OV0_GAMMA_010_01F            0x0d44
+#define RADEON_OV0_GAMMA_020_03F            0x0d48
+#define RADEON_OV0_GAMMA_040_07F            0x0d4c
+#define RADEON_OV0_GAMMA_080_0BF            0x0e00
+#define RADEON_OV0_GAMMA_0C0_0FF            0x0e04
+#define RADEON_OV0_GAMMA_100_13F            0x0e08
+#define RADEON_OV0_GAMMA_140_17F            0x0e0c
+#define RADEON_OV0_GAMMA_180_1BF            0x0e10
+#define RADEON_OV0_GAMMA_1C0_1FF            0x0e14
+#define RADEON_OV0_GAMMA_200_23F            0x0e18
+#define RADEON_OV0_GAMMA_240_27F            0x0e1c
+#define RADEON_OV0_GAMMA_280_2BF            0x0e20
+#define RADEON_OV0_GAMMA_2C0_2FF            0x0e24
+#define RADEON_OV0_GAMMA_300_33F            0x0e28
+#define RADEON_OV0_GAMMA_340_37F            0x0e2c
+#define RADEON_OV0_GAMMA_380_3BF            0x0d50
+#define RADEON_OV0_GAMMA_3C0_3FF            0x0d54
+#define RADEON_OV0_GRAPHICS_KEY_CLR_LOW     0x04EC
+#define RADEON_OV0_GRAPHICS_KEY_CLR_HIGH    0x04F0
+#define RADEON_OV0_H_INC                    0x0480
+#define RADEON_OV0_KEY_CNTL                 0x04F4
+#       define  RADEON_VIDEO_KEY_FN_MASK    0x00000003L
+#       define  RADEON_VIDEO_KEY_FN_FALSE   0x00000000L
+#       define  RADEON_VIDEO_KEY_FN_TRUE    0x00000001L
+#       define  RADEON_VIDEO_KEY_FN_EQ      0x00000002L
+#       define  RADEON_VIDEO_KEY_FN_NE      0x00000003L
+#       define  RADEON_GRAPHIC_KEY_FN_MASK  0x00000030L
+#       define  RADEON_GRAPHIC_KEY_FN_FALSE 0x00000000L
+#       define  RADEON_GRAPHIC_KEY_FN_TRUE  0x00000010L
+#       define  RADEON_GRAPHIC_KEY_FN_EQ    0x00000020L
+#       define  RADEON_GRAPHIC_KEY_FN_NE    0x00000030L
+#       define  RADEON_CMP_MIX_MASK         0x00000100L
+#       define  RADEON_CMP_MIX_OR           0x00000000L
+#       define  RADEON_CMP_MIX_AND          0x00000100L
+#define RADEON_OV0_LIN_TRANS_A              0x0d20
+#define RADEON_OV0_LIN_TRANS_B              0x0d24
+#define RADEON_OV0_LIN_TRANS_C              0x0d28
+#define RADEON_OV0_LIN_TRANS_D              0x0d2c
+#define RADEON_OV0_LIN_TRANS_E              0x0d30
+#define RADEON_OV0_LIN_TRANS_F              0x0d34
+#define RADEON_OV0_P1_BLANK_LINES_AT_TOP    0x0430
+#       define  RADEON_P1_BLNK_LN_AT_TOP_M1_MASK   0x00000fffL
+#       define  RADEON_P1_ACTIVE_LINES_M1          0x0fff0000L
+#define RADEON_OV0_P1_H_ACCUM_INIT          0x0488
+#define RADEON_OV0_P1_V_ACCUM_INIT          0x0428
+#       define  RADEON_OV0_P1_MAX_LN_IN_PER_LN_OUT 0x00000003L
+#       define  RADEON_OV0_P1_V_ACCUM_INIT_MASK    0x01ff8000L
+#define RADEON_OV0_P1_X_START_END           0x0494
+#define RADEON_OV0_P2_X_START_END           0x0498
+#define RADEON_OV0_P23_BLANK_LINES_AT_TOP   0x0434
+#       define  RADEON_P23_BLNK_LN_AT_TOP_M1_MASK  0x000007ffL
+#       define  RADEON_P23_ACTIVE_LINES_M1         0x07ff0000L
+#define RADEON_OV0_P23_H_ACCUM_INIT         0x048C
+#define RADEON_OV0_P23_V_ACCUM_INIT         0x042C
+#define RADEON_OV0_P3_X_START_END           0x049C
+#define RADEON_OV0_REG_LOAD_CNTL            0x0410
+#       define  RADEON_REG_LD_CTL_LOCK                 0x00000001L
+#       define  RADEON_REG_LD_CTL_VBLANK_DURING_LOCK   0x00000002L
+#       define  RADEON_REG_LD_CTL_STALL_GUI_UNTIL_FLIP 0x00000004L
+#       define  RADEON_REG_LD_CTL_LOCK_READBACK        0x00000008L
+#define RADEON_OV0_SCALE_CNTL               0x0420
+#       define  RADEON_SCALER_HORZ_PICK_NEAREST    0x00000004L
+#       define  RADEON_SCALER_VERT_PICK_NEAREST    0x00000008L
+#       define  RADEON_SCALER_SIGNED_UV            0x00000010L
+#       define  RADEON_SCALER_GAMMA_SEL_MASK       0x00000060L
+#       define  RADEON_SCALER_GAMMA_SEL_BRIGHT     0x00000000L
+#       define  RADEON_SCALER_GAMMA_SEL_G22        0x00000020L
+#       define  RADEON_SCALER_GAMMA_SEL_G18        0x00000040L
+#       define  RADEON_SCALER_GAMMA_SEL_G14        0x00000060L
+#       define  RADEON_SCALER_COMCORE_SHIFT_UP_ONE 0x00000080L
+#       define  RADEON_SCALER_SURFAC_FORMAT        0x00000f00L
+#       define  RADEON_SCALER_SOURCE_15BPP         0x00000300L
+#       define  RADEON_SCALER_SOURCE_16BPP         0x00000400L
+#       define  RADEON_SCALER_SOURCE_32BPP         0x00000600L
+#       define  RADEON_SCALER_SOURCE_YUV9          0x00000900L
+#       define  RADEON_SCALER_SOURCE_YUV12         0x00000A00L
+#       define  RADEON_SCALER_SOURCE_VYUY422       0x00000B00L
+#       define  RADEON_SCALER_SOURCE_YVYU422       0x00000C00L
+#       define  RADEON_SCALER_ADAPTIVE_DEINT       0x00001000L
+#       define  RADEON_SCALER_TEMPORAL_DEINT       0x00002000L
+#       define  RADEON_SCALER_SMART_SWITCH         0x00008000L
+#       define  RADEON_SCALER_BURST_PER_PLANE      0x007F0000L
+#       define  RADEON_SCALER_DOUBLE_BUFFER        0x01000000L
+#       define  RADEON_SCALER_DIS_LIMIT            0x08000000L
+#       define  RADEON_SCALER_INT_EMU              0x20000000L
+#       define  RADEON_SCALER_ENABLE               0x40000000L
+#       define  RADEON_SCALER_SOFT_RESET           0x80000000L
+#       define  RADEON_SCALER_ADAPTIVE_DEINT       0x00001000L
+#define RADEON_OV0_STEP_BY                  0x0484
+#define RADEON_OV0_TEST                     0x04F8
+#define RADEON_OV0_V_INC                    0x0424
+#define RADEON_OV0_VID_BUF_PITCH0_VALUE     0x0460
+#define RADEON_OV0_VID_BUF_PITCH1_VALUE     0x0464
+#define RADEON_OV0_VID_BUF0_BASE_ADRS       0x0440
+#       define  RADEON_VIF_BUF0_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF0_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF0_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF0_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF1_BASE_ADRS       0x0444
+#       define  RADEON_VIF_BUF1_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF1_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF1_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF1_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF2_BASE_ADRS       0x0448
+#       define  RADEON_VIF_BUF2_PITCH_SEL          0x00000001L
+#       define  RADEON_VIF_BUF2_TILE_ADRS          0x00000002L
+#       define  RADEON_VIF_BUF2_BASE_ADRS_MASK     0x03fffff0L
+#       define  RADEON_VIF_BUF2_1ST_LINE_LSBS_MASK 0x48000000L
+#define RADEON_OV0_VID_BUF3_BASE_ADRS       0x044C
+#define RADEON_OV0_VID_BUF4_BASE_ADRS       0x0450
+#define RADEON_OV0_VID_BUF5_BASE_ADRS       0x0454
+#define RADEON_OV0_VIDEO_KEY_CLR_HIGH       0x04E8
+#define RADEON_OV0_VIDEO_KEY_CLR_LOW        0x04E4
+#define RADEON_OV0_Y_X_START                0x0400
+#define RADEON_OV0_Y_X_END                  0x0404
+#define RADEON_OV1_Y_X_START                0x0600
+#define RADEON_OV1_Y_X_END                  0x0604
+#define RADEON_OVR_CLR                      0x0230
+#define RADEON_OVR_WID_LEFT_RIGHT           0x0234
+#define RADEON_OVR_WID_TOP_BOTTOM           0x0238
+
+#define RADEON_P2PLL_CNTL                   0x002a /* P2PLL */
+#       define RADEON_P2PLL_RESET                (1 <<  0)
+#       define RADEON_P2PLL_SLEEP                (1 <<  1)
+#       define RADEON_P2PLL_ATOMIC_UPDATE_EN     (1 << 16)
+#       define RADEON_P2PLL_VGA_ATOMIC_UPDATE_EN (1 << 17)
+#       define RADEON_P2PLL_ATOMIC_UPDATE_VSYNC  (1 << 18)
+#define RADEON_P2PLL_DIV_0                  0x002c
+#       define RADEON_P2PLL_FB0_DIV_MASK    0x07ff
+#       define RADEON_P2PLL_POST0_DIV_MASK  0x00070000
+#define RADEON_P2PLL_REF_DIV                0x002B /* PLL */
+#       define RADEON_P2PLL_REF_DIV_MASK    0x03ff
+#       define RADEON_P2PLL_ATOMIC_UPDATE_R (1 << 15) /* same as _W */
+#       define RADEON_P2PLL_ATOMIC_UPDATE_W (1 << 15) /* same as _R */
+#       define R300_PPLL_REF_DIV_ACC_MASK   (0x3ff << 18)
+#       define R300_PPLL_REF_DIV_ACC_SHIFT  18
+#define RADEON_PALETTE_DATA                 0x00b4
+#define RADEON_PALETTE_30_DATA              0x00b8
+#define RADEON_PALETTE_INDEX                0x00b0
+#define RADEON_PCI_GART_PAGE                0x017c
+#define RADEON_PIXCLKS_CNTL                 0x002d
+#       define RADEON_PIX2CLK_SRC_SEL_MASK     0x03
+#       define RADEON_PIX2CLK_SRC_SEL_CPUCLK   0x00
+#       define RADEON_PIX2CLK_SRC_SEL_PSCANCLK 0x01
+#       define RADEON_PIX2CLK_SRC_SEL_BYTECLK  0x02
+#       define RADEON_PIX2CLK_SRC_SEL_P2PLLCLK 0x03
+#       define RADEON_PIX2CLK_ALWAYS_ONb       (1<<6)
+#       define RADEON_PIX2CLK_DAC_ALWAYS_ONb   (1<<7)
+#       define RADEON_PIXCLK_TV_SRC_SEL        (1 << 8)
+#       define RADEON_PIXCLK_LVDS_ALWAYS_ONb   (1 << 14)
+#       define RADEON_PIXCLK_TMDS_ALWAYS_ONb   (1 << 15)
+#define RADEON_PLANE_3D_MASK_C              0x1d44
+#define RADEON_PLL_TEST_CNTL                0x0013 /* PLL */
+#define RADEON_PMI_CAP_ID                   0x0f5c /* PCI */
+#define RADEON_PMI_DATA                     0x0f63 /* PCI */
+#define RADEON_PMI_NXT_CAP_PTR              0x0f5d /* PCI */
+#define RADEON_PMI_PMC_REG                  0x0f5e /* PCI */
+#define RADEON_PMI_PMCSR_REG                0x0f60 /* PCI */
+#define RADEON_PMI_REGISTER                 0x0f5c /* PCI */
+#define RADEON_PPLL_CNTL                    0x0002 /* PLL */
+#       define RADEON_PPLL_RESET                (1 <<  0)
+#       define RADEON_PPLL_SLEEP                (1 <<  1)
+#       define RADEON_PPLL_ATOMIC_UPDATE_EN     (1 << 16)
+#       define RADEON_PPLL_VGA_ATOMIC_UPDATE_EN (1 << 17)
+#       define RADEON_PPLL_ATOMIC_UPDATE_VSYNC  (1 << 18)
+#define RADEON_PPLL_DIV_0                   0x0004 /* PLL */
+#define RADEON_PPLL_DIV_1                   0x0005 /* PLL */
+#define RADEON_PPLL_DIV_2                   0x0006 /* PLL */
+#define RADEON_PPLL_DIV_3                   0x0007 /* PLL */
+#       define RADEON_PPLL_FB3_DIV_MASK     0x07ff
+#       define RADEON_PPLL_POST3_DIV_MASK   0x00070000
+#define RADEON_PPLL_REF_DIV                 0x0003 /* PLL */
+#       define RADEON_PPLL_REF_DIV_MASK     0x03ff
+#       define RADEON_PPLL_ATOMIC_UPDATE_R  (1 << 15) /* same as _W */
+#       define RADEON_PPLL_ATOMIC_UPDATE_W  (1 << 15) /* same as _R */
+#define RADEON_PWR_MNGMT_CNTL_STATUS        0x0f60 /* PCI */
+
+#define RADEON_RBBM_GUICNTL                 0x172c
+#       define RADEON_HOST_DATA_SWAP_NONE   (0 << 0)
+#       define RADEON_HOST_DATA_SWAP_16BIT  (1 << 0)
+#       define RADEON_HOST_DATA_SWAP_32BIT  (2 << 0)
+#       define RADEON_HOST_DATA_SWAP_HDW    (3 << 0)
+#define RADEON_RBBM_SOFT_RESET              0x00f0
+#       define RADEON_SOFT_RESET_CP         (1 <<  0)
+#       define RADEON_SOFT_RESET_HI         (1 <<  1)
+#       define RADEON_SOFT_RESET_SE         (1 <<  2)
+#       define RADEON_SOFT_RESET_RE         (1 <<  3)
+#       define RADEON_SOFT_RESET_PP         (1 <<  4)
+#       define RADEON_SOFT_RESET_E2         (1 <<  5)
+#       define RADEON_SOFT_RESET_RB         (1 <<  6)
+#       define RADEON_SOFT_RESET_HDP        (1 <<  7)
+#define RADEON_RBBM_STATUS                  0x0e40
+#       define RADEON_RBBM_FIFOCNT_MASK     0x007f
+#       define RADEON_RBBM_ACTIVE           (1 << 31)
+#define RADEON_RB2D_DSTCACHE_CTLSTAT        0x342c
+#       define RADEON_RB2D_DC_FLUSH         (3 << 0)
+#       define RADEON_RB2D_DC_FREE          (3 << 2)
+#       define RADEON_RB2D_DC_FLUSH_ALL     0xf
+#       define RADEON_RB2D_DC_BUSY          (1 << 31)
+#define RADEON_RB2D_DSTCACHE_MODE           0x3428
+#define RADEON_REG_BASE                     0x0f18 /* PCI */
+#define RADEON_REGPROG_INF                  0x0f09 /* PCI */
+#define RADEON_REVISION_ID                  0x0f08 /* PCI */
+
+#define RADEON_SC_BOTTOM                    0x164c
+#define RADEON_SC_BOTTOM_RIGHT              0x16f0
+#define RADEON_SC_BOTTOM_RIGHT_C            0x1c8c
+#define RADEON_SC_LEFT                      0x1640
+#define RADEON_SC_RIGHT                     0x1644
+#define RADEON_SC_TOP                       0x1648
+#define RADEON_SC_TOP_LEFT                  0x16ec
+#define RADEON_SC_TOP_LEFT_C                0x1c88
+#       define RADEON_SC_SIGN_MASK_LO       0x8000
+#       define RADEON_SC_SIGN_MASK_HI       0x80000000
+#define RADEON_SCLK_CNTL                    0x000d /* PLL */
+#       define RADEON_DYN_STOP_LAT_MASK     0x00007ff8
+#       define RADEON_CP_MAX_DYN_STOP_LAT   0x0008
+#       define RADEON_SCLK_FORCEON_MASK     0xffff8000
+#define RADEON_SCLK_MORE_CNTL               0x0035 /* PLL */
+#       define RADEON_SCLK_MORE_FORCEON     0x0700
+#define RADEON_SDRAM_MODE_REG               0x0158
+#define RADEON_SEQ8_DATA                    0x03c5 /* VGA */
+#define RADEON_SEQ8_IDX                     0x03c4 /* VGA */
+#define RADEON_SNAPSHOT_F_COUNT             0x0244
+#define RADEON_SNAPSHOT_VH_COUNTS           0x0240
+#define RADEON_SNAPSHOT_VIF_COUNT           0x024c
+#define RADEON_SRC_OFFSET                   0x15ac
+#define RADEON_SRC_PITCH                    0x15b0
+#define RADEON_SRC_PITCH_OFFSET             0x1428
+#define RADEON_SRC_SC_BOTTOM                0x165c
+#define RADEON_SRC_SC_BOTTOM_RIGHT          0x16f4
+#define RADEON_SRC_SC_RIGHT                 0x1654
+#define RADEON_SRC_X                        0x1414
+#define RADEON_SRC_X_Y                      0x1590
+#define RADEON_SRC_Y                        0x1418
+#define RADEON_SRC_Y_X                      0x1434
+#define RADEON_STATUS                       0x0f06 /* PCI */
+#define RADEON_SUBPIC_CNTL                  0x0540 /* ? */
+#define RADEON_SUB_CLASS                    0x0f0a /* PCI */
+#define RADEON_SURFACE_CNTL                 0x0b00
+#       define RADEON_SURF_TRANSLATION_DIS  (1 << 8)
+#       define RADEON_NONSURF_AP0_SWP_16BPP (1 << 20)
+#       define RADEON_NONSURF_AP0_SWP_32BPP (1 << 21)
+#define RADEON_SURFACE0_INFO                0x0b0c
+#       define RADEON_SURF_TILE_COLOR_MACRO (0 << 16)
+#       define RADEON_SURF_TILE_COLOR_BOTH  (1 << 16)
+#       define RADEON_SURF_TILE_DEPTH_32BPP (2 << 16)
+#       define RADEON_SURF_TILE_DEPTH_16BPP (3 << 16)
+#       define R200_SURF_TILE_NONE          (0 << 16)
+#       define R200_SURF_TILE_COLOR_MACRO   (1 << 16)
+#       define R200_SURF_TILE_COLOR_MICRO   (2 << 16)
+#       define R200_SURF_TILE_COLOR_BOTH    (3 << 16)
+#       define R200_SURF_TILE_DEPTH_32BPP   (4 << 16)
+#       define R200_SURF_TILE_DEPTH_16BPP   (5 << 16)
+#       define RADEON_SURF_AP0_SWP_16BPP    (1 << 20)
+#       define RADEON_SURF_AP0_SWP_32BPP    (1 << 21)
+#       define RADEON_SURF_AP1_SWP_16BPP    (1 << 22)
+#       define RADEON_SURF_AP1_SWP_32BPP    (1 << 23)
+#define RADEON_SURFACE0_LOWER_BOUND         0x0b04
+#define RADEON_SURFACE0_UPPER_BOUND         0x0b08
+#define RADEON_SURFACE1_INFO                0x0b1c
+#define RADEON_SURFACE1_LOWER_BOUND         0x0b14
+#define RADEON_SURFACE1_UPPER_BOUND         0x0b18
+#define RADEON_SURFACE2_INFO                0x0b2c
+#define RADEON_SURFACE2_LOWER_BOUND         0x0b24
+#define RADEON_SURFACE2_UPPER_BOUND         0x0b28
+#define RADEON_SURFACE3_INFO                0x0b3c
+#define RADEON_SURFACE3_LOWER_BOUND         0x0b34
+#define RADEON_SURFACE3_UPPER_BOUND         0x0b38
+#define RADEON_SURFACE4_INFO                0x0b4c
+#define RADEON_SURFACE4_LOWER_BOUND         0x0b44
+#define RADEON_SURFACE4_UPPER_BOUND         0x0b48
+#define RADEON_SURFACE5_INFO                0x0b5c
+#define RADEON_SURFACE5_LOWER_BOUND         0x0b54
+#define RADEON_SURFACE5_UPPER_BOUND         0x0b58
+#define RADEON_SURFACE6_INFO                0x0b6c
+#define RADEON_SURFACE6_LOWER_BOUND         0x0b64
+#define RADEON_SURFACE6_UPPER_BOUND         0x0b68
+#define RADEON_SURFACE7_INFO                0x0b7c
+#define RADEON_SURFACE7_LOWER_BOUND         0x0b74
+#define RADEON_SURFACE7_UPPER_BOUND         0x0b78
+#define RADEON_SW_SEMAPHORE                 0x013c
+
+#define RADEON_TEST_DEBUG_CNTL              0x0120
+#define RADEON_TEST_DEBUG_MUX               0x0124
+#define RADEON_TEST_DEBUG_OUT               0x012c
+#define RADEON_TMDS_PLL_CNTL                0x02a8
+#define RADEON_TMDS_TRANSMITTER_CNTL        0x02a4
+#       define RADEON_TMDS_TRANSMITTER_PLLEN  1
+#       define RADEON_TMDS_TRANSMITTER_PLLRST 2
+#define RADEON_TRAIL_BRES_DEC               0x1614
+#define RADEON_TRAIL_BRES_ERR               0x160c
+#define RADEON_TRAIL_BRES_INC               0x1610
+#define RADEON_TRAIL_X                      0x1618
+#define RADEON_TRAIL_X_SUB                  0x1620
+
+#define RADEON_VCLK_ECP_CNTL                0x0008 /* PLL */
+#       define RADEON_VCLK_SRC_SEL_MASK     0x03
+#       define RADEON_VCLK_SRC_SEL_CPUCLK   0x00
+#       define RADEON_VCLK_SRC_SEL_PSCANCLK 0x01
+#       define RADEON_VCLK_SRC_SEL_BYTECLK  0x02
+#       define RADEON_VCLK_SRC_SEL_PPLLCLK  0x03
+#       define RADEON_PIXCLK_ALWAYS_ONb     (1<<6)
+#       define RADEON_PIXCLK_DAC_ALWAYS_ONb (1<<7)
+
+#define RADEON_VENDOR_ID                    0x0f00 /* PCI */
+#define RADEON_VGA_DDA_CONFIG               0x02e8
+#define RADEON_VGA_DDA_ON_OFF               0x02ec
+#define RADEON_VID_BUFFER_CONTROL           0x0900
+#define RADEON_VIDEOMUX_CNTL                0x0190
+#define RADEON_VIPH_CONTROL                 0x0c40 /* ? */
+
+#define RADEON_WAIT_UNTIL                   0x1720
+#       define RADEON_WAIT_CRTC_PFLIP       (1 << 0)
+#       define RADEON_WAIT_2D_IDLECLEAN     (1 << 16)
+#       define RADEON_WAIT_3D_IDLECLEAN     (1 << 17)
+#       define RADEON_WAIT_HOST_IDLECLEAN   (1 << 18)
+
+#define RADEON_X_MPLL_REF_FB_DIV            0x000a /* PLL */
+#define RADEON_XCLK_CNTL                    0x000d /* PLL */
+#define RADEON_XDLL_CNTL                    0x000c /* PLL */
+#define RADEON_XPLL_CNTL                    0x000b /* PLL */
+
+
+
+				/* Registers for 3D/TCL */
+#define RADEON_PP_BORDER_COLOR_0            0x1d40
+#define RADEON_PP_BORDER_COLOR_1            0x1d44
+#define RADEON_PP_BORDER_COLOR_2            0x1d48
+#define RADEON_PP_CNTL                      0x1c38
+#       define RADEON_STIPPLE_ENABLE        (1 <<  0)
+#       define RADEON_SCISSOR_ENABLE        (1 <<  1)
+#       define RADEON_PATTERN_ENABLE        (1 <<  2)
+#       define RADEON_SHADOW_ENABLE         (1 <<  3)
+#       define RADEON_TEX_ENABLE_MASK       (0xf << 4)
+#       define RADEON_TEX_0_ENABLE          (1 <<  4)
+#       define RADEON_TEX_1_ENABLE          (1 <<  5)
+#       define RADEON_TEX_2_ENABLE          (1 <<  6)
+#       define RADEON_TEX_3_ENABLE          (1 <<  7)
+#       define RADEON_TEX_BLEND_ENABLE_MASK (0xf << 12)
+#       define RADEON_TEX_BLEND_0_ENABLE    (1 << 12)
+#       define RADEON_TEX_BLEND_1_ENABLE    (1 << 13)
+#       define RADEON_TEX_BLEND_2_ENABLE    (1 << 14)
+#       define RADEON_TEX_BLEND_3_ENABLE    (1 << 15)
+#       define RADEON_PLANAR_YUV_ENABLE     (1 << 20)
+#       define RADEON_SPECULAR_ENABLE       (1 << 21)
+#       define RADEON_FOG_ENABLE            (1 << 22)
+#       define RADEON_ALPHA_TEST_ENABLE     (1 << 23)
+#       define RADEON_ANTI_ALIAS_NONE       (0 << 24)
+#       define RADEON_ANTI_ALIAS_LINE       (1 << 24)
+#       define RADEON_ANTI_ALIAS_POLY       (2 << 24)
+#       define RADEON_ANTI_ALIAS_LINE_POLY  (3 << 24)
+#       define RADEON_BUMP_MAP_ENABLE       (1 << 26)
+#       define RADEON_BUMPED_MAP_T0         (0 << 27)
+#       define RADEON_BUMPED_MAP_T1         (1 << 27)
+#       define RADEON_BUMPED_MAP_T2         (2 << 27)
+#       define RADEON_TEX_3D_ENABLE_0       (1 << 29)
+#       define RADEON_TEX_3D_ENABLE_1       (1 << 30)
+#       define RADEON_MC_ENABLE             (1 << 31)
+#define RADEON_PP_FOG_COLOR                 0x1c18
+#       define RADEON_FOG_COLOR_MASK        0x00ffffff
+#       define RADEON_FOG_VERTEX            (0 << 24)
+#       define RADEON_FOG_TABLE             (1 << 24)
+#       define RADEON_FOG_USE_DEPTH         (0 << 25)
+#       define RADEON_FOG_USE_DIFFUSE_ALPHA (2 << 25)
+#       define RADEON_FOG_USE_SPEC_ALPHA    (3 << 25)
+#define RADEON_PP_LUM_MATRIX                0x1d00
+#define RADEON_PP_MISC                      0x1c14
+#       define RADEON_REF_ALPHA_MASK        0x000000ff
+#       define RADEON_ALPHA_TEST_FAIL       (0 << 8)
+#       define RADEON_ALPHA_TEST_LESS       (1 << 8)
+#       define RADEON_ALPHA_TEST_LEQUAL     (2 << 8)
+#       define RADEON_ALPHA_TEST_EQUAL      (3 << 8)
+#       define RADEON_ALPHA_TEST_GEQUAL     (4 << 8)
+#       define RADEON_ALPHA_TEST_GREATER    (5 << 8)
+#       define RADEON_ALPHA_TEST_NEQUAL     (6 << 8)
+#       define RADEON_ALPHA_TEST_PASS       (7 << 8)
+#       define RADEON_ALPHA_TEST_OP_MASK    (7 << 8)
+#       define RADEON_CHROMA_FUNC_FAIL      (0 << 16)
+#       define RADEON_CHROMA_FUNC_PASS      (1 << 16)
+#       define RADEON_CHROMA_FUNC_NEQUAL    (2 << 16)
+#       define RADEON_CHROMA_FUNC_EQUAL     (3 << 16)
+#       define RADEON_CHROMA_KEY_NEAREST    (0 << 18)
+#       define RADEON_CHROMA_KEY_ZERO       (1 << 18)
+#       define RADEON_SHADOW_ID_AUTO_INC    (1 << 20)
+#       define RADEON_SHADOW_FUNC_EQUAL     (0 << 21)
+#       define RADEON_SHADOW_FUNC_NEQUAL    (1 << 21)
+#       define RADEON_SHADOW_PASS_1         (0 << 22)
+#       define RADEON_SHADOW_PASS_2         (1 << 22)
+#       define RADEON_RIGHT_HAND_CUBE_D3D   (0 << 24)
+#       define RADEON_RIGHT_HAND_CUBE_OGL   (1 << 24)
+#define RADEON_PP_ROT_MATRIX_0              0x1d58
+#define RADEON_PP_ROT_MATRIX_1              0x1d5c
+#define RADEON_PP_TXFILTER_0                0x1c54
+#define RADEON_PP_TXFILTER_1                0x1c6c
+#define RADEON_PP_TXFILTER_2                0x1c84
+#       define RADEON_MAG_FILTER_NEAREST                   (0  <<  0)
+#       define RADEON_MAG_FILTER_LINEAR                    (1  <<  0)
+#       define RADEON_MAG_FILTER_MASK                      (1  <<  0)
+#       define RADEON_MIN_FILTER_NEAREST                   (0  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR                    (1  <<  1)
+#       define RADEON_MIN_FILTER_NEAREST_MIP_NEAREST       (2  <<  1)
+#       define RADEON_MIN_FILTER_NEAREST_MIP_LINEAR        (3  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR_MIP_NEAREST        (6  <<  1)
+#       define RADEON_MIN_FILTER_LINEAR_MIP_LINEAR         (7  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST             (8  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_LINEAR              (9  <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (10 <<  1)
+#       define RADEON_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR  (11 <<  1)
+#       define RADEON_MIN_FILTER_MASK                      (15 <<  1)
+#       define RADEON_MAX_ANISO_1_TO_1                     (0  <<  5)
+#       define RADEON_MAX_ANISO_2_TO_1                     (1  <<  5)
+#       define RADEON_MAX_ANISO_4_TO_1                     (2  <<  5)
+#       define RADEON_MAX_ANISO_8_TO_1                     (3  <<  5)
+#       define RADEON_MAX_ANISO_16_TO_1                    (4  <<  5)
+#       define RADEON_MAX_ANISO_MASK                       (7  <<  5)
+#       define RADEON_LOD_BIAS_MASK                        (0xff <<  8)
+#       define RADEON_LOD_BIAS_SHIFT                       8
+#       define RADEON_MAX_MIP_LEVEL_MASK                   (0x0f << 16)
+#       define RADEON_MAX_MIP_LEVEL_SHIFT                  16
+#       define RADEON_YUV_TO_RGB                           (1  << 20)
+#       define RADEON_YUV_TEMPERATURE_COOL                 (0  << 21)
+#       define RADEON_YUV_TEMPERATURE_HOT                  (1  << 21)
+#       define RADEON_YUV_TEMPERATURE_MASK                 (1  << 21)
+#       define RADEON_WRAPEN_S                             (1  << 22)
+#       define RADEON_CLAMP_S_WRAP                         (0  << 23)
+#       define RADEON_CLAMP_S_MIRROR                       (1  << 23)
+#       define RADEON_CLAMP_S_CLAMP_LAST                   (2  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_LAST            (3  << 23)
+#       define RADEON_CLAMP_S_CLAMP_BORDER                 (4  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_BORDER          (5  << 23)
+#       define RADEON_CLAMP_S_CLAMP_GL                     (6  << 23)
+#       define RADEON_CLAMP_S_MIRROR_CLAMP_GL              (7  << 23)
+#       define RADEON_CLAMP_S_MASK                         (7  << 23)
+#       define RADEON_WRAPEN_T                             (1  << 26)
+#       define RADEON_CLAMP_T_WRAP                         (0  << 27)
+#       define RADEON_CLAMP_T_MIRROR                       (1  << 27)
+#       define RADEON_CLAMP_T_CLAMP_LAST                   (2  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_LAST            (3  << 27)
+#       define RADEON_CLAMP_T_CLAMP_BORDER                 (4  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_BORDER          (5  << 27)
+#       define RADEON_CLAMP_T_CLAMP_GL                     (6  << 27)
+#       define RADEON_CLAMP_T_MIRROR_CLAMP_GL              (7  << 27)
+#       define RADEON_CLAMP_T_MASK                         (7  << 27)
+#       define RADEON_BORDER_MODE_OGL                      (0  << 31)
+#       define RADEON_BORDER_MODE_D3D                      (1  << 31)
+#define RADEON_PP_TXFORMAT_0                0x1c58
+#define RADEON_PP_TXFORMAT_1                0x1c70
+#define RADEON_PP_TXFORMAT_2                0x1c88
+#       define RADEON_TXFORMAT_I8                 (0  <<  0)
+#       define RADEON_TXFORMAT_AI88               (1  <<  0)
+#       define RADEON_TXFORMAT_RGB332             (2  <<  0)
+#       define RADEON_TXFORMAT_ARGB1555           (3  <<  0)
+#       define RADEON_TXFORMAT_RGB565             (4  <<  0)
+#       define RADEON_TXFORMAT_ARGB4444           (5  <<  0)
+#       define RADEON_TXFORMAT_ARGB8888           (6  <<  0)
+#       define RADEON_TXFORMAT_RGBA8888           (7  <<  0)
+#       define RADEON_TXFORMAT_Y8                 (8  <<  0)
+#       define RADEON_TXFORMAT_VYUY422            (10 <<  0)
+#       define RADEON_TXFORMAT_YVYU422            (11 <<  0)
+#       define RADEON_TXFORMAT_DXT1               (12 <<  0)
+#       define RADEON_TXFORMAT_DXT23              (14 <<  0)
+#       define RADEON_TXFORMAT_DXT45              (15 <<  0)
+#       define RADEON_TXFORMAT_SHADOW16           (16 <<  0)
+#       define RADEON_TXFORMAT_SHADOW32           (17 <<  0)
+#       define RADEON_TXFORMAT_DUDV88             (18 <<  0)
+#       define RADEON_TXFORMAT_LDUDV655           (19 <<  0)
+#       define RADEON_TXFORMAT_LDUDUV8888         (20 <<  0)
+#       define RADEON_TXFORMAT_FORMAT_MASK        (31 <<  0)
+#       define RADEON_TXFORMAT_FORMAT_SHIFT       0
+#       define RADEON_TXFORMAT_APPLE_YUV_MODE     (1  <<  5)
+#       define RADEON_TXFORMAT_ALPHA_IN_MAP       (1  <<  6)
+#       define RADEON_TXFORMAT_NON_POWER2         (1  <<  7)
+#       define RADEON_TXFORMAT_WIDTH_MASK         (15 <<  8)
+#       define RADEON_TXFORMAT_WIDTH_SHIFT        8
+#       define RADEON_TXFORMAT_HEIGHT_MASK        (15 << 12)
+#       define RADEON_TXFORMAT_HEIGHT_SHIFT       12
+#       define RADEON_TXFORMAT_F5_WIDTH_MASK      (15 << 16)
+#       define RADEON_TXFORMAT_F5_WIDTH_SHIFT     16
+#       define RADEON_TXFORMAT_F5_HEIGHT_MASK     (15 << 20)
+#       define RADEON_TXFORMAT_F5_HEIGHT_SHIFT    20
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ0      (0  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_MASK      (3  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ1      (1  << 24)
+#       define RADEON_TXFORMAT_ST_ROUTE_STQ2      (2  << 24)
+#       define RADEON_TXFORMAT_ENDIAN_NO_SWAP     (0  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_16BPP_SWAP  (1  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_32BPP_SWAP  (2  << 26)
+#       define RADEON_TXFORMAT_ENDIAN_HALFDW_SWAP (3  << 26)
+#       define RADEON_TXFORMAT_ALPHA_MASK_ENABLE  (1  << 28)
+#       define RADEON_TXFORMAT_CHROMA_KEY_ENABLE  (1  << 29)
+#       define RADEON_TXFORMAT_CUBIC_MAP_ENABLE   (1  << 30)
+#       define RADEON_TXFORMAT_PERSPECTIVE_ENABLE (1  << 31)
+#define RADEON_PP_CUBIC_FACES_0             0x1d24
+#define RADEON_PP_CUBIC_FACES_1             0x1d28
+#define RADEON_PP_CUBIC_FACES_2             0x1d2c
+#       define RADEON_FACE_WIDTH_1_SHIFT          0
+#       define RADEON_FACE_HEIGHT_1_SHIFT         4
+#       define RADEON_FACE_WIDTH_1_MASK           (0xf << 0)
+#       define RADEON_FACE_HEIGHT_1_MASK          (0xf << 4)
+#       define RADEON_FACE_WIDTH_2_SHIFT          8
+#       define RADEON_FACE_HEIGHT_2_SHIFT         12
+#       define RADEON_FACE_WIDTH_2_MASK           (0xf << 8)
+#       define RADEON_FACE_HEIGHT_2_MASK          (0xf << 12)
+#       define RADEON_FACE_WIDTH_3_SHIFT          16
+#       define RADEON_FACE_HEIGHT_3_SHIFT         20
+#       define RADEON_FACE_WIDTH_3_MASK           (0xf << 16)
+#       define RADEON_FACE_HEIGHT_3_MASK          (0xf << 20)
+#       define RADEON_FACE_WIDTH_4_SHIFT          24
+#       define RADEON_FACE_HEIGHT_4_SHIFT         28
+#       define RADEON_FACE_WIDTH_4_MASK           (0xf << 24)
+#       define RADEON_FACE_HEIGHT_4_MASK          (0xf << 28)
+
+#define RADEON_PP_TXOFFSET_0                0x1c5c
+#define RADEON_PP_TXOFFSET_1                0x1c74
+#define RADEON_PP_TXOFFSET_2                0x1c8c
+#       define RADEON_TXO_ENDIAN_NO_SWAP     (0 << 0)
+#       define RADEON_TXO_ENDIAN_BYTE_SWAP   (1 << 0)
+#       define RADEON_TXO_ENDIAN_WORD_SWAP   (2 << 0)
+#       define RADEON_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+#       define RADEON_TXO_MACRO_LINEAR       (0 << 2)
+#       define RADEON_TXO_MACRO_TILE         (1 << 2)
+#       define RADEON_TXO_MICRO_LINEAR       (0 << 3)
+#       define RADEON_TXO_MICRO_TILE_X2      (1 << 3)
+#       define RADEON_TXO_MICRO_TILE_OPT     (2 << 3)
+#       define RADEON_TXO_OFFSET_MASK        0xffffffe0
+#       define RADEON_TXO_OFFSET_SHIFT       5
+
+#define RADEON_PP_CUBIC_OFFSET_T0_0         0x1dd0  /* bits [31:5] */
+#define RADEON_PP_CUBIC_OFFSET_T0_1         0x1dd4
+#define RADEON_PP_CUBIC_OFFSET_T0_2         0x1dd8
+#define RADEON_PP_CUBIC_OFFSET_T0_3         0x1ddc
+#define RADEON_PP_CUBIC_OFFSET_T0_4         0x1de0
+#define RADEON_PP_CUBIC_OFFSET_T1_0         0x1e00
+#define RADEON_PP_CUBIC_OFFSET_T1_1         0x1e04
+#define RADEON_PP_CUBIC_OFFSET_T1_2         0x1e08
+#define RADEON_PP_CUBIC_OFFSET_T1_3         0x1e0c
+#define RADEON_PP_CUBIC_OFFSET_T1_4         0x1e10
+#define RADEON_PP_CUBIC_OFFSET_T2_0         0x1e14
+#define RADEON_PP_CUBIC_OFFSET_T2_1         0x1e18
+#define RADEON_PP_CUBIC_OFFSET_T2_2         0x1e1c
+#define RADEON_PP_CUBIC_OFFSET_T2_3         0x1e20
+#define RADEON_PP_CUBIC_OFFSET_T2_4         0x1e24
+
+#define RADEON_PP_TEX_SIZE_0                0x1d04  /* NPOT */
+#define RADEON_PP_TEX_SIZE_1                0x1d0c
+#define RADEON_PP_TEX_SIZE_2                0x1d14
+#       define RADEON_TEX_USIZE_MASK        (0x7ff << 0)
+#       define RADEON_TEX_USIZE_SHIFT       0
+#       define RADEON_TEX_VSIZE_MASK        (0x7ff << 16)
+#       define RADEON_TEX_VSIZE_SHIFT       16
+#       define RADEON_SIGNED_RGB_MASK       (1 << 30)
+#       define RADEON_SIGNED_RGB_SHIFT      30
+#       define RADEON_SIGNED_ALPHA_MASK     (1 << 31)
+#       define RADEON_SIGNED_ALPHA_SHIFT    31
+#define RADEON_PP_TEX_PITCH_0               0x1d08  /* NPOT */
+#define RADEON_PP_TEX_PITCH_1               0x1d10  /* NPOT */
+#define RADEON_PP_TEX_PITCH_2               0x1d18  /* NPOT */
+/* note: bits 13-5: 32 byte aligned stride of texture map */
+
+#define RADEON_PP_TXCBLEND_0                0x1c60
+#define RADEON_PP_TXCBLEND_1                0x1c78
+#define RADEON_PP_TXCBLEND_2                0x1c90
+#       define RADEON_COLOR_ARG_A_SHIFT          0
+#       define RADEON_COLOR_ARG_A_MASK           (0x1f << 0)
+#       define RADEON_COLOR_ARG_A_ZERO           (0    << 0)
+#       define RADEON_COLOR_ARG_A_CURRENT_COLOR  (2    << 0)
+#       define RADEON_COLOR_ARG_A_CURRENT_ALPHA  (3    << 0)
+#       define RADEON_COLOR_ARG_A_DIFFUSE_COLOR  (4    << 0)
+#       define RADEON_COLOR_ARG_A_DIFFUSE_ALPHA  (5    << 0)
+#       define RADEON_COLOR_ARG_A_SPECULAR_COLOR (6    << 0)
+#       define RADEON_COLOR_ARG_A_SPECULAR_ALPHA (7    << 0)
+#       define RADEON_COLOR_ARG_A_TFACTOR_COLOR  (8    << 0)
+#       define RADEON_COLOR_ARG_A_TFACTOR_ALPHA  (9    << 0)
+#       define RADEON_COLOR_ARG_A_T0_COLOR       (10   << 0)
+#       define RADEON_COLOR_ARG_A_T0_ALPHA       (11   << 0)
+#       define RADEON_COLOR_ARG_A_T1_COLOR       (12   << 0)
+#       define RADEON_COLOR_ARG_A_T1_ALPHA       (13   << 0)
+#       define RADEON_COLOR_ARG_A_T2_COLOR       (14   << 0)
+#       define RADEON_COLOR_ARG_A_T2_ALPHA       (15   << 0)
+#       define RADEON_COLOR_ARG_A_T3_COLOR       (16   << 0)
+#       define RADEON_COLOR_ARG_A_T3_ALPHA       (17   << 0)
+#       define RADEON_COLOR_ARG_B_SHIFT          5
+#       define RADEON_COLOR_ARG_B_MASK           (0x1f << 5)
+#       define RADEON_COLOR_ARG_B_ZERO           (0    << 5)
+#       define RADEON_COLOR_ARG_B_CURRENT_COLOR  (2    << 5)
+#       define RADEON_COLOR_ARG_B_CURRENT_ALPHA  (3    << 5)
+#       define RADEON_COLOR_ARG_B_DIFFUSE_COLOR  (4    << 5)
+#       define RADEON_COLOR_ARG_B_DIFFUSE_ALPHA  (5    << 5)
+#       define RADEON_COLOR_ARG_B_SPECULAR_COLOR (6    << 5)
+#       define RADEON_COLOR_ARG_B_SPECULAR_ALPHA (7    << 5)
+#       define RADEON_COLOR_ARG_B_TFACTOR_COLOR  (8    << 5)
+#       define RADEON_COLOR_ARG_B_TFACTOR_ALPHA  (9    << 5)
+#       define RADEON_COLOR_ARG_B_T0_COLOR       (10   << 5)
+#       define RADEON_COLOR_ARG_B_T0_ALPHA       (11   << 5)
+#       define RADEON_COLOR_ARG_B_T1_COLOR       (12   << 5)
+#       define RADEON_COLOR_ARG_B_T1_ALPHA       (13   << 5)
+#       define RADEON_COLOR_ARG_B_T2_COLOR       (14   << 5)
+#       define RADEON_COLOR_ARG_B_T2_ALPHA       (15   << 5)
+#       define RADEON_COLOR_ARG_B_T3_COLOR       (16   << 5)
+#       define RADEON_COLOR_ARG_B_T3_ALPHA       (17   << 5)
+#       define RADEON_COLOR_ARG_C_SHIFT          10
+#       define RADEON_COLOR_ARG_C_MASK           (0x1f << 10)
+#       define RADEON_COLOR_ARG_C_ZERO           (0    << 10)
+#       define RADEON_COLOR_ARG_C_CURRENT_COLOR  (2    << 10)
+#       define RADEON_COLOR_ARG_C_CURRENT_ALPHA  (3    << 10)
+#       define RADEON_COLOR_ARG_C_DIFFUSE_COLOR  (4    << 10)
+#       define RADEON_COLOR_ARG_C_DIFFUSE_ALPHA  (5    << 10)
+#       define RADEON_COLOR_ARG_C_SPECULAR_COLOR (6    << 10)
+#       define RADEON_COLOR_ARG_C_SPECULAR_ALPHA (7    << 10)
+#       define RADEON_COLOR_ARG_C_TFACTOR_COLOR  (8    << 10)
+#       define RADEON_COLOR_ARG_C_TFACTOR_ALPHA  (9    << 10)
+#       define RADEON_COLOR_ARG_C_T0_COLOR       (10   << 10)
+#       define RADEON_COLOR_ARG_C_T0_ALPHA       (11   << 10)
+#       define RADEON_COLOR_ARG_C_T1_COLOR       (12   << 10)
+#       define RADEON_COLOR_ARG_C_T1_ALPHA       (13   << 10)
+#       define RADEON_COLOR_ARG_C_T2_COLOR       (14   << 10)
+#       define RADEON_COLOR_ARG_C_T2_ALPHA       (15   << 10)
+#       define RADEON_COLOR_ARG_C_T3_COLOR       (16   << 10)
+#       define RADEON_COLOR_ARG_C_T3_ALPHA       (17   << 10)
+#       define RADEON_COMP_ARG_A                 (1 << 15)
+#       define RADEON_COMP_ARG_A_SHIFT           15
+#       define RADEON_COMP_ARG_B                 (1 << 16)
+#       define RADEON_COMP_ARG_B_SHIFT           16
+#       define RADEON_COMP_ARG_C                 (1 << 17)
+#       define RADEON_COMP_ARG_C_SHIFT           17
+#       define RADEON_BLEND_CTL_MASK             (7 << 18)
+#       define RADEON_BLEND_CTL_ADD              (0 << 18)
+#       define RADEON_BLEND_CTL_SUBTRACT         (1 << 18)
+#       define RADEON_BLEND_CTL_ADDSIGNED        (2 << 18)
+#       define RADEON_BLEND_CTL_BLEND            (3 << 18)
+#       define RADEON_BLEND_CTL_DOT3             (4 << 18)
+#       define RADEON_SCALE_SHIFT                21
+#       define RADEON_SCALE_MASK                 (3 << 21)
+#       define RADEON_SCALE_1X                   (0 << 21)
+#       define RADEON_SCALE_2X                   (1 << 21)
+#       define RADEON_SCALE_4X                   (2 << 21)
+#       define RADEON_CLAMP_TX                   (1 << 23)
+#       define RADEON_T0_EQ_TCUR                 (1 << 24)
+#       define RADEON_T1_EQ_TCUR                 (1 << 25)
+#       define RADEON_T2_EQ_TCUR                 (1 << 26)
+#       define RADEON_T3_EQ_TCUR                 (1 << 27)
+#       define RADEON_COLOR_ARG_MASK             0x1f
+#       define RADEON_COMP_ARG_SHIFT             15
+#define RADEON_PP_TXABLEND_0                0x1c64
+#define RADEON_PP_TXABLEND_1                0x1c7c
+#define RADEON_PP_TXABLEND_2                0x1c94
+#       define RADEON_ALPHA_ARG_A_SHIFT          0
+#       define RADEON_ALPHA_ARG_A_MASK           (0xf << 0)
+#       define RADEON_ALPHA_ARG_A_ZERO           (0   << 0)
+#       define RADEON_ALPHA_ARG_A_CURRENT_ALPHA  (1   << 0)
+#       define RADEON_ALPHA_ARG_A_DIFFUSE_ALPHA  (2   << 0)
+#       define RADEON_ALPHA_ARG_A_SPECULAR_ALPHA (3   << 0)
+#       define RADEON_ALPHA_ARG_A_TFACTOR_ALPHA  (4   << 0)
+#       define RADEON_ALPHA_ARG_A_T0_ALPHA       (5   << 0)
+#       define RADEON_ALPHA_ARG_A_T1_ALPHA       (6   << 0)
+#       define RADEON_ALPHA_ARG_A_T2_ALPHA       (7   << 0)
+#       define RADEON_ALPHA_ARG_A_T3_ALPHA       (8   << 0)
+#       define RADEON_ALPHA_ARG_B_SHIFT          4
+#       define RADEON_ALPHA_ARG_B_MASK           (0xf << 4)
+#       define RADEON_ALPHA_ARG_B_ZERO           (0   << 4)
+#       define RADEON_ALPHA_ARG_B_CURRENT_ALPHA  (1   << 4)
+#       define RADEON_ALPHA_ARG_B_DIFFUSE_ALPHA  (2   << 4)
+#       define RADEON_ALPHA_ARG_B_SPECULAR_ALPHA (3   << 4)
+#       define RADEON_ALPHA_ARG_B_TFACTOR_ALPHA  (4   << 4)
+#       define RADEON_ALPHA_ARG_B_T0_ALPHA       (5   << 4)
+#       define RADEON_ALPHA_ARG_B_T1_ALPHA       (6   << 4)
+#       define RADEON_ALPHA_ARG_B_T2_ALPHA       (7   << 4)
+#       define RADEON_ALPHA_ARG_B_T3_ALPHA       (8   << 4)
+#       define RADEON_ALPHA_ARG_C_SHIFT          8
+#       define RADEON_ALPHA_ARG_C_MASK           (0xf << 8)
+#       define RADEON_ALPHA_ARG_C_ZERO           (0   << 8)
+#       define RADEON_ALPHA_ARG_C_CURRENT_ALPHA  (1   << 8)
+#       define RADEON_ALPHA_ARG_C_DIFFUSE_ALPHA  (2   << 8)
+#       define RADEON_ALPHA_ARG_C_SPECULAR_ALPHA (3   << 8)
+#       define RADEON_ALPHA_ARG_C_TFACTOR_ALPHA  (4   << 8)
+#       define RADEON_ALPHA_ARG_C_T0_ALPHA       (5   << 8)
+#       define RADEON_ALPHA_ARG_C_T1_ALPHA       (6   << 8)
+#       define RADEON_ALPHA_ARG_C_T2_ALPHA       (7   << 8)
+#       define RADEON_ALPHA_ARG_C_T3_ALPHA       (8   << 8)
+#       define RADEON_DOT_ALPHA_DONT_REPLICATE   (1   << 12)
+#       define RADEON_ALPHA_ARG_MASK             0xf
+
+#define RADEON_PP_TFACTOR_0                 0x1c68
+#define RADEON_PP_TFACTOR_1                 0x1c80
+#define RADEON_PP_TFACTOR_2                 0x1c98
+
+#define RADEON_RB3D_BLENDCNTL               0x1c20
+#       define RADEON_COMB_FCN_MASK                    (3  << 12)
+#       define RADEON_COMB_FCN_ADD_CLAMP               (0  << 12)
+#       define RADEON_COMB_FCN_ADD_NOCLAMP             (1  << 12)
+#       define RADEON_COMB_FCN_SUB_CLAMP               (2  << 12)
+#       define RADEON_COMB_FCN_SUB_NOCLAMP             (3  << 12)
+#       define RADEON_SRC_BLEND_GL_ZERO                (32 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE                 (33 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_COLOR           (34 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 16)
+#       define RADEON_SRC_BLEND_GL_DST_COLOR           (36 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_ALPHA           (38 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 16)
+#       define RADEON_SRC_BLEND_GL_DST_ALPHA           (40 << 16)
+#       define RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 16)
+#       define RADEON_SRC_BLEND_GL_SRC_ALPHA_SATURATE  (42 << 16)
+#       define RADEON_SRC_BLEND_MASK                   (63 << 16)
+#       define RADEON_DST_BLEND_GL_ZERO                (32 << 24)
+#       define RADEON_DST_BLEND_GL_ONE                 (33 << 24)
+#       define RADEON_DST_BLEND_GL_SRC_COLOR           (34 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_COLOR (35 << 24)
+#       define RADEON_DST_BLEND_GL_DST_COLOR           (36 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_DST_COLOR (37 << 24)
+#       define RADEON_DST_BLEND_GL_SRC_ALPHA           (38 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA (39 << 24)
+#       define RADEON_DST_BLEND_GL_DST_ALPHA           (40 << 24)
+#       define RADEON_DST_BLEND_GL_ONE_MINUS_DST_ALPHA (41 << 24)
+#       define RADEON_DST_BLEND_MASK                   (63 << 24)
+#define RADEON_RB3D_CNTL                    0x1c3c
+#       define RADEON_ALPHA_BLEND_ENABLE       (1  <<  0)
+#       define RADEON_PLANE_MASK_ENABLE        (1  <<  1)
+#       define RADEON_DITHER_ENABLE            (1  <<  2)
+#       define RADEON_ROUND_ENABLE             (1  <<  3)
+#       define RADEON_SCALE_DITHER_ENABLE      (1  <<  4)
+#       define RADEON_DITHER_INIT              (1  <<  5)
+#       define RADEON_ROP_ENABLE               (1  <<  6)
+#       define RADEON_STENCIL_ENABLE           (1  <<  7)
+#       define RADEON_Z_ENABLE                 (1  <<  8)
+#       define RADEON_DEPTH_XZ_OFFEST_ENABLE   (1  <<  9)
+#       define RADEON_COLOR_FORMAT_ARGB1555    (3  << 10)
+#       define RADEON_COLOR_FORMAT_RGB565      (4  << 10)
+#       define RADEON_COLOR_FORMAT_ARGB8888    (6  << 10)
+#       define RADEON_COLOR_FORMAT_RGB332      (7  << 10)
+#       define RADEON_COLOR_FORMAT_Y8          (8  << 10)
+#       define RADEON_COLOR_FORMAT_RGB8        (9  << 10)
+#       define RADEON_COLOR_FORMAT_YUV422_VYUY (11 << 10)
+#       define RADEON_COLOR_FORMAT_YUV422_YVYU (12 << 10)
+#       define RADEON_COLOR_FORMAT_aYUV444     (14 << 10)
+#       define RADEON_COLOR_FORMAT_ARGB4444    (15 << 10)
+#       define RADEON_CLRCMP_FLIP_ENABLE       (1  << 14)
+#       define RADEON_ZBLOCK16                 (1  << 15)
+#define RADEON_RB3D_COLOROFFSET             0x1c40
+#       define RADEON_COLOROFFSET_MASK      0xfffffff0
+#define RADEON_RB3D_COLORPITCH              0x1c48
+#       define RADEON_COLORPITCH_MASK         0x000001ff8
+#       define RADEON_COLOR_TILE_ENABLE       (1 << 16)
+#       define RADEON_COLOR_MICROTILE_ENABLE  (1 << 17)
+#       define RADEON_COLOR_ENDIAN_NO_SWAP    (0 << 18)
+#       define RADEON_COLOR_ENDIAN_WORD_SWAP  (1 << 18)
+#       define RADEON_COLOR_ENDIAN_DWORD_SWAP (2 << 18)
+#define RADEON_RB3D_DEPTHOFFSET             0x1c24
+#define RADEON_RB3D_DEPTHPITCH              0x1c28
+#       define RADEON_DEPTHPITCH_MASK         0x00001ff8
+#       define RADEON_DEPTH_HYPERZ            (3 << 16)
+#       define RADEON_DEPTH_ENDIAN_NO_SWAP    (0 << 18)
+#       define RADEON_DEPTH_ENDIAN_WORD_SWAP  (1 << 18)
+#       define RADEON_DEPTH_ENDIAN_DWORD_SWAP (2 << 18)
+#define RADEON_RB3D_PLANEMASK               0x1d84
+#define RADEON_RB3D_ROPCNTL                 0x1d80
+#       define RADEON_ROP_MASK              (15 << 8)
+#       define RADEON_ROP_CLEAR             (0  << 8)
+#       define RADEON_ROP_NOR               (1  << 8)
+#       define RADEON_ROP_AND_INVERTED      (2  << 8)
+#       define RADEON_ROP_COPY_INVERTED     (3  << 8)
+#       define RADEON_ROP_AND_REVERSE       (4  << 8)
+#       define RADEON_ROP_INVERT            (5  << 8)
+#       define RADEON_ROP_XOR               (6  << 8)
+#       define RADEON_ROP_NAND              (7  << 8)
+#       define RADEON_ROP_AND               (8  << 8)
+#       define RADEON_ROP_EQUIV             (9  << 8)
+#       define RADEON_ROP_NOOP              (10 << 8)
+#       define RADEON_ROP_OR_INVERTED       (11 << 8)
+#       define RADEON_ROP_COPY              (12 << 8)
+#       define RADEON_ROP_OR_REVERSE        (13 << 8)
+#       define RADEON_ROP_OR                (14 << 8)
+#       define RADEON_ROP_SET               (15 << 8)
+#define RADEON_RB3D_STENCILREFMASK          0x1d7c
+#       define RADEON_STENCIL_REF_SHIFT       0
+#       define RADEON_STENCIL_REF_MASK        (0xff << 0)
+#       define RADEON_STENCIL_MASK_SHIFT      16
+#       define RADEON_STENCIL_VALUE_MASK      (0xff << 16)
+#       define RADEON_STENCIL_WRITEMASK_SHIFT 24
+#       define RADEON_STENCIL_WRITE_MASK      (0xff << 24)
+#define RADEON_RB3D_ZPASS_DATA              0x3290
+#define RADEON_RB3D_ZPASS_ADDR              0x3294
+#define RADEON_RB3D_ZSTENCILCNTL            0x1c2c
+#       define RADEON_DEPTH_FORMAT_MASK          (0xf << 0)
+#       define RADEON_DEPTH_FORMAT_16BIT_INT_Z   (0  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_INT_Z   (2  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_FLOAT_Z (3  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_INT_Z   (4  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_FLOAT_Z (5  <<  0)
+#       define RADEON_DEPTH_FORMAT_16BIT_FLOAT_W (7  <<  0)
+#       define RADEON_DEPTH_FORMAT_24BIT_FLOAT_W (9  <<  0)
+#       define RADEON_DEPTH_FORMAT_32BIT_FLOAT_W (11 <<  0)
+#       define RADEON_Z_TEST_NEVER               (0  <<  4)
+#       define RADEON_Z_TEST_LESS                (1  <<  4)
+#       define RADEON_Z_TEST_LEQUAL              (2  <<  4)
+#       define RADEON_Z_TEST_EQUAL               (3  <<  4)
+#       define RADEON_Z_TEST_GEQUAL              (4  <<  4)
+#       define RADEON_Z_TEST_GREATER             (5  <<  4)
+#       define RADEON_Z_TEST_NEQUAL              (6  <<  4)
+#       define RADEON_Z_TEST_ALWAYS              (7  <<  4)
+#       define RADEON_Z_TEST_MASK                (7  <<  4)
+#       define RADEON_Z_HIERARCHY_ENABLE         (1  <<  8)
+#       define RADEON_STENCIL_TEST_NEVER         (0  << 12)
+#       define RADEON_STENCIL_TEST_LESS          (1  << 12)
+#       define RADEON_STENCIL_TEST_LEQUAL        (2  << 12)
+#       define RADEON_STENCIL_TEST_EQUAL         (3  << 12)
+#       define RADEON_STENCIL_TEST_GEQUAL        (4  << 12)
+#       define RADEON_STENCIL_TEST_GREATER       (5  << 12)
+#       define RADEON_STENCIL_TEST_NEQUAL        (6  << 12)
+#       define RADEON_STENCIL_TEST_ALWAYS        (7  << 12)
+#       define RADEON_STENCIL_TEST_MASK          (0x7 << 12)
+#       define RADEON_STENCIL_FAIL_KEEP          (0  << 16)
+#       define RADEON_STENCIL_FAIL_ZERO          (1  << 16)
+#       define RADEON_STENCIL_FAIL_REPLACE       (2  << 16)
+#       define RADEON_STENCIL_FAIL_INC           (3  << 16)
+#       define RADEON_STENCIL_FAIL_DEC           (4  << 16)
+#       define RADEON_STENCIL_FAIL_INVERT        (5  << 16)
+#       define RADEON_STENCIL_FAIL_INC_WRAP      (6  << 16)
+#       define RADEON_STENCIL_FAIL_DEC_WRAP      (7  << 16)
+#       define RADEON_STENCIL_FAIL_MASK          (0x7 << 16)
+#       define RADEON_STENCIL_ZPASS_KEEP         (0  << 20)
+#       define RADEON_STENCIL_ZPASS_ZERO         (1  << 20)
+#       define RADEON_STENCIL_ZPASS_REPLACE      (2  << 20)
+#       define RADEON_STENCIL_ZPASS_INC          (3  << 20)
+#       define RADEON_STENCIL_ZPASS_DEC          (4  << 20)
+#       define RADEON_STENCIL_ZPASS_INVERT       (5  << 20)
+#       define RADEON_STENCIL_ZPASS_INC_WRAP     (6  << 20)
+#       define RADEON_STENCIL_ZPASS_DEC_WRAP     (7  << 20)
+#       define RADEON_STENCIL_ZPASS_MASK         (0x7 << 20)
+#       define RADEON_STENCIL_ZFAIL_KEEP         (0  << 24)
+#       define RADEON_STENCIL_ZFAIL_ZERO         (1  << 24)
+#       define RADEON_STENCIL_ZFAIL_REPLACE      (2  << 24)
+#       define RADEON_STENCIL_ZFAIL_INC          (3  << 24)
+#       define RADEON_STENCIL_ZFAIL_DEC          (4  << 24)
+#       define RADEON_STENCIL_ZFAIL_INVERT       (5  << 24)
+#       define RADEON_STENCIL_ZFAIL_INC_WRAP     (6  << 24)
+#       define RADEON_STENCIL_ZFAIL_DEC_WRAP     (7  << 24)
+#       define RADEON_STENCIL_ZFAIL_MASK         (0x7 << 24)
+#       define RADEON_Z_COMPRESSION_ENABLE       (1  << 28)
+#       define RADEON_FORCE_Z_DIRTY              (1  << 29)
+#       define RADEON_Z_WRITE_ENABLE             (1  << 30)
+#       define RADEON_Z_DECOMPRESSION_ENABLE     (1  << 31)
+
+#define RADEON_RE_STIPPLE_ADDR              0x1cc8
+#define RADEON_RE_STIPPLE_DATA              0x1ccc
+#define RADEON_RE_LINE_PATTERN              0x1cd0
+#       define RADEON_LINE_PATTERN_MASK             0x0000ffff
+#       define RADEON_LINE_REPEAT_COUNT_SHIFT       16
+#       define RADEON_LINE_PATTERN_START_SHIFT      24
+#       define RADEON_LINE_PATTERN_LITTLE_BIT_ORDER (0 << 28)
+#       define RADEON_LINE_PATTERN_BIG_BIT_ORDER    (1 << 28)
+#       define RADEON_LINE_PATTERN_AUTO_RESET       (1 << 29)
+#define RADEON_RE_LINE_STATE                0x1cd4
+#       define RADEON_LINE_CURRENT_PTR_SHIFT   0
+#       define RADEON_LINE_CURRENT_COUNT_SHIFT 8
+#define RADEON_RE_MISC                      0x26c4
+#       define RADEON_STIPPLE_COORD_MASK       0x1f
+#       define RADEON_STIPPLE_X_OFFSET_SHIFT   0
+#       define RADEON_STIPPLE_X_OFFSET_MASK    (0x1f << 0)
+#       define RADEON_STIPPLE_Y_OFFSET_SHIFT   8
+#       define RADEON_STIPPLE_Y_OFFSET_MASK    (0x1f << 8)
+#       define RADEON_STIPPLE_LITTLE_BIT_ORDER (0 << 16)
+#       define RADEON_STIPPLE_BIG_BIT_ORDER    (1 << 16)
+#define RADEON_RE_SOLID_COLOR               0x1c1c
+#define RADEON_RE_TOP_LEFT                  0x26c0
+#       define RADEON_RE_LEFT_SHIFT         0
+#       define RADEON_RE_TOP_SHIFT          16
+#define RADEON_RE_WIDTH_HEIGHT              0x1c44
+#       define RADEON_RE_WIDTH_SHIFT        0
+#       define RADEON_RE_HEIGHT_SHIFT       16
+
+#define RADEON_SE_CNTL                      0x1c4c
+#       define RADEON_FFACE_CULL_CW          (0 <<  0)
+#       define RADEON_FFACE_CULL_CCW         (1 <<  0)
+#       define RADEON_FFACE_CULL_DIR_MASK    (1 <<  0)
+#       define RADEON_BFACE_CULL             (0 <<  1)
+#       define RADEON_BFACE_SOLID            (3 <<  1)
+#       define RADEON_FFACE_CULL             (0 <<  3)
+#       define RADEON_FFACE_SOLID            (3 <<  3)
+#       define RADEON_FFACE_CULL_MASK        (3 <<  3)
+#       define RADEON_BADVTX_CULL_DISABLE    (1 <<  5)
+#       define RADEON_FLAT_SHADE_VTX_0       (0 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_1       (1 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_2       (2 <<  6)
+#       define RADEON_FLAT_SHADE_VTX_LAST    (3 <<  6)
+#       define RADEON_DIFFUSE_SHADE_SOLID    (0 <<  8)
+#       define RADEON_DIFFUSE_SHADE_FLAT     (1 <<  8)
+#       define RADEON_DIFFUSE_SHADE_GOURAUD  (2 <<  8)
+#       define RADEON_DIFFUSE_SHADE_MASK     (3 <<  8)
+#       define RADEON_ALPHA_SHADE_SOLID      (0 << 10)
+#       define RADEON_ALPHA_SHADE_FLAT       (1 << 10)
+#       define RADEON_ALPHA_SHADE_GOURAUD    (2 << 10)
+#       define RADEON_ALPHA_SHADE_MASK       (3 << 10)
+#       define RADEON_SPECULAR_SHADE_SOLID   (0 << 12)
+#       define RADEON_SPECULAR_SHADE_FLAT    (1 << 12)
+#       define RADEON_SPECULAR_SHADE_GOURAUD (2 << 12)
+#       define RADEON_SPECULAR_SHADE_MASK    (3 << 12)
+#       define RADEON_FOG_SHADE_SOLID        (0 << 14)
+#       define RADEON_FOG_SHADE_FLAT         (1 << 14)
+#       define RADEON_FOG_SHADE_GOURAUD      (2 << 14)
+#       define RADEON_FOG_SHADE_MASK         (3 << 14)
+#       define RADEON_ZBIAS_ENABLE_POINT     (1 << 16)
+#       define RADEON_ZBIAS_ENABLE_LINE      (1 << 17)
+#       define RADEON_ZBIAS_ENABLE_TRI       (1 << 18)
+#       define RADEON_WIDELINE_ENABLE        (1 << 20)
+#       define RADEON_VPORT_XY_XFORM_ENABLE  (1 << 24)
+#       define RADEON_VPORT_Z_XFORM_ENABLE   (1 << 25)
+#       define RADEON_VTX_PIX_CENTER_D3D     (0 << 27)
+#       define RADEON_VTX_PIX_CENTER_OGL     (1 << 27)
+#       define RADEON_ROUND_MODE_TRUNC       (0 << 28)
+#       define RADEON_ROUND_MODE_ROUND       (1 << 28)
+#       define RADEON_ROUND_MODE_ROUND_EVEN  (2 << 28)
+#       define RADEON_ROUND_MODE_ROUND_ODD   (3 << 28)
+#       define RADEON_ROUND_PREC_16TH_PIX    (0 << 30)
+#       define RADEON_ROUND_PREC_8TH_PIX     (1 << 30)
+#       define RADEON_ROUND_PREC_4TH_PIX     (2 << 30)
+#       define RADEON_ROUND_PREC_HALF_PIX    (3 << 30)
+#define RADEON_SE_CNTL_STATUS               0x2140
+#       define RADEON_VC_NO_SWAP            (0 << 0)
+#       define RADEON_VC_16BIT_SWAP         (1 << 0)
+#       define RADEON_VC_32BIT_SWAP         (2 << 0)
+#       define RADEON_VC_HALF_DWORD_SWAP    (3 << 0)
+#       define RADEON_TCL_BYPASS            (1 << 8)
+#define RADEON_SE_COORD_FMT                 0x1c50
+#       define RADEON_VTX_XY_PRE_MULT_1_OVER_W0  (1 <<  0)
+#       define RADEON_VTX_Z_PRE_MULT_1_OVER_W0   (1 <<  1)
+#       define RADEON_VTX_ST0_NONPARAMETRIC      (1 <<  8)
+#       define RADEON_VTX_ST1_NONPARAMETRIC      (1 <<  9)
+#       define RADEON_VTX_ST2_NONPARAMETRIC      (1 << 10)
+#       define RADEON_VTX_ST3_NONPARAMETRIC      (1 << 11)
+#       define RADEON_VTX_W0_NORMALIZE           (1 << 12)
+#       define RADEON_VTX_W0_IS_NOT_1_OVER_W0    (1 << 16)
+#       define RADEON_VTX_ST0_PRE_MULT_1_OVER_W0 (1 << 17)
+#       define RADEON_VTX_ST1_PRE_MULT_1_OVER_W0 (1 << 19)
+#       define RADEON_VTX_ST2_PRE_MULT_1_OVER_W0 (1 << 21)
+#       define RADEON_VTX_ST3_PRE_MULT_1_OVER_W0 (1 << 23)
+#       define RADEON_TEX1_W_ROUTING_USE_W0      (0 << 26)
+#       define RADEON_TEX1_W_ROUTING_USE_Q1      (1 << 26)
+#define RADEON_SE_LINE_WIDTH                0x1db8
+#define RADEON_SE_TCL_LIGHT_MODEL_CTL       0x226c
+#       define RADEON_LIGHTING_ENABLE              (1 << 0)
+#       define RADEON_LIGHT_IN_MODELSPACE          (1 << 1)
+#       define RADEON_LOCAL_VIEWER                 (1 << 2)
+#       define RADEON_NORMALIZE_NORMALS            (1 << 3)
+#       define RADEON_RESCALE_NORMALS              (1 << 4)
+#       define RADEON_SPECULAR_LIGHTS              (1 << 5)
+#       define RADEON_DIFFUSE_SPECULAR_COMBINE     (1 << 6)
+#       define RADEON_LIGHT_ALPHA                  (1 << 7)
+#       define RADEON_LOCAL_LIGHT_VEC_GL           (1 << 8)
+#       define RADEON_LIGHT_NO_NORMAL_AMBIENT_ONLY (1 << 9)
+#       define RADEON_LM_SOURCE_STATE_PREMULT      0
+#       define RADEON_LM_SOURCE_STATE_MULT         1
+#       define RADEON_LM_SOURCE_VERTEX_DIFFUSE     2
+#       define RADEON_LM_SOURCE_VERTEX_SPECULAR    3
+#       define RADEON_EMISSIVE_SOURCE_SHIFT        16
+#       define RADEON_AMBIENT_SOURCE_SHIFT         18
+#       define RADEON_DIFFUSE_SOURCE_SHIFT         20
+#       define RADEON_SPECULAR_SOURCE_SHIFT        22
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_RED     0x2220
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_GREEN   0x2224
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_BLUE    0x2228
+#define RADEON_SE_TCL_MATERIAL_AMBIENT_ALPHA   0x222c
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_RED     0x2230
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_GREEN   0x2234
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_BLUE    0x2238
+#define RADEON_SE_TCL_MATERIAL_DIFFUSE_ALPHA   0x223c
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED   0x2210
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_GREEN 0x2214
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_BLUE  0x2218
+#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_ALPHA 0x221c
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_RED    0x2240
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_GREEN  0x2244
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_BLUE   0x2248
+#define RADEON_SE_TCL_MATERIAL_SPECULAR_ALPHA  0x224c
+#define RADEON_SE_TCL_MATRIX_SELECT_0       0x225c
+#       define RADEON_MODELVIEW_0_SHIFT        0
+#       define RADEON_MODELVIEW_1_SHIFT        4
+#       define RADEON_MODELVIEW_2_SHIFT        8
+#       define RADEON_MODELVIEW_3_SHIFT        12
+#       define RADEON_IT_MODELVIEW_0_SHIFT     16
+#       define RADEON_IT_MODELVIEW_1_SHIFT     20
+#       define RADEON_IT_MODELVIEW_2_SHIFT     24
+#       define RADEON_IT_MODELVIEW_3_SHIFT     28
+#define RADEON_SE_TCL_MATRIX_SELECT_1       0x2260
+#       define RADEON_MODELPROJECT_0_SHIFT     0
+#       define RADEON_MODELPROJECT_1_SHIFT     4
+#       define RADEON_MODELPROJECT_2_SHIFT     8
+#       define RADEON_MODELPROJECT_3_SHIFT     12
+#       define RADEON_TEXMAT_0_SHIFT           16
+#       define RADEON_TEXMAT_1_SHIFT           20
+#       define RADEON_TEXMAT_2_SHIFT           24
+#       define RADEON_TEXMAT_3_SHIFT           28
+
+
+#define RADEON_SE_TCL_OUTPUT_VTX_FMT        0x2254
+#       define RADEON_TCL_VTX_W0                 (1 <<  0)
+#       define RADEON_TCL_VTX_FP_DIFFUSE         (1 <<  1)
+#       define RADEON_TCL_VTX_FP_ALPHA           (1 <<  2)
+#       define RADEON_TCL_VTX_PK_DIFFUSE         (1 <<  3)
+#       define RADEON_TCL_VTX_FP_SPEC            (1 <<  4)
+#       define RADEON_TCL_VTX_FP_FOG             (1 <<  5)
+#       define RADEON_TCL_VTX_PK_SPEC            (1 <<  6)
+#       define RADEON_TCL_VTX_ST0                (1 <<  7)
+#       define RADEON_TCL_VTX_ST1                (1 <<  8)
+#       define RADEON_TCL_VTX_Q1                 (1 <<  9)
+#       define RADEON_TCL_VTX_ST2                (1 << 10)
+#       define RADEON_TCL_VTX_Q2                 (1 << 11)
+#       define RADEON_TCL_VTX_ST3                (1 << 12)
+#       define RADEON_TCL_VTX_Q3                 (1 << 13)
+#       define RADEON_TCL_VTX_Q0                 (1 << 14)
+#       define RADEON_TCL_VTX_WEIGHT_COUNT_SHIFT 15
+#       define RADEON_TCL_VTX_NORM0              (1 << 18)
+#       define RADEON_TCL_VTX_XY1                (1 << 27)
+#       define RADEON_TCL_VTX_Z1                 (1 << 28)
+#       define RADEON_TCL_VTX_W1                 (1 << 29)
+#       define RADEON_TCL_VTX_NORM1              (1 << 30)
+#       define RADEON_TCL_VTX_Z0                 (1 << 31)
+
+#define RADEON_SE_TCL_OUTPUT_VTX_SEL        0x2258
+#       define RADEON_TCL_COMPUTE_XYZW           (1 << 0)
+#       define RADEON_TCL_COMPUTE_DIFFUSE        (1 << 1)
+#       define RADEON_TCL_COMPUTE_SPECULAR       (1 << 2)
+#       define RADEON_TCL_FORCE_NAN_IF_COLOR_NAN (1 << 3)
+#       define RADEON_TCL_FORCE_INORDER_PROC     (1 << 4)
+#       define RADEON_TCL_TEX_INPUT_TEX_0        0
+#       define RADEON_TCL_TEX_INPUT_TEX_1        1
+#       define RADEON_TCL_TEX_INPUT_TEX_2        2
+#       define RADEON_TCL_TEX_INPUT_TEX_3        3
+#       define RADEON_TCL_TEX_COMPUTED_TEX_0     8
+#       define RADEON_TCL_TEX_COMPUTED_TEX_1     9
+#       define RADEON_TCL_TEX_COMPUTED_TEX_2     10
+#       define RADEON_TCL_TEX_COMPUTED_TEX_3     11
+#       define RADEON_TCL_TEX_0_OUTPUT_SHIFT     16
+#       define RADEON_TCL_TEX_1_OUTPUT_SHIFT     20
+#       define RADEON_TCL_TEX_2_OUTPUT_SHIFT     24
+#       define RADEON_TCL_TEX_3_OUTPUT_SHIFT     28
+
+#define RADEON_SE_TCL_PER_LIGHT_CTL_0       0x2270
+#       define RADEON_LIGHT_0_ENABLE               (1 <<  0)
+#       define RADEON_LIGHT_0_ENABLE_AMBIENT       (1 <<  1)
+#       define RADEON_LIGHT_0_ENABLE_SPECULAR      (1 <<  2)
+#       define RADEON_LIGHT_0_IS_LOCAL             (1 <<  3)
+#       define RADEON_LIGHT_0_IS_SPOT              (1 <<  4)
+#       define RADEON_LIGHT_0_DUAL_CONE            (1 <<  5)
+#       define RADEON_LIGHT_0_ENABLE_RANGE_ATTEN   (1 <<  6)
+#       define RADEON_LIGHT_0_CONSTANT_RANGE_ATTEN (1 <<  7)
+#       define RADEON_LIGHT_0_SHIFT                0
+#       define RADEON_LIGHT_1_ENABLE               (1 << 16)
+#       define RADEON_LIGHT_1_ENABLE_AMBIENT       (1 << 17)
+#       define RADEON_LIGHT_1_ENABLE_SPECULAR      (1 << 18)
+#       define RADEON_LIGHT_1_IS_LOCAL             (1 << 19)
+#       define RADEON_LIGHT_1_IS_SPOT              (1 << 20)
+#       define RADEON_LIGHT_1_DUAL_CONE            (1 << 21)
+#       define RADEON_LIGHT_1_ENABLE_RANGE_ATTEN   (1 << 22)
+#       define RADEON_LIGHT_1_CONSTANT_RANGE_ATTEN (1 << 23)
+#       define RADEON_LIGHT_1_SHIFT                16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_1       0x2274
+#       define RADEON_LIGHT_2_SHIFT            0
+#       define RADEON_LIGHT_3_SHIFT            16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_2       0x2278
+#       define RADEON_LIGHT_4_SHIFT            0
+#       define RADEON_LIGHT_5_SHIFT            16
+#define RADEON_SE_TCL_PER_LIGHT_CTL_3       0x227c
+#       define RADEON_LIGHT_6_SHIFT            0
+#       define RADEON_LIGHT_7_SHIFT            16
+
+#define RADEON_SE_TCL_STATE_FLUSH           0x2284
+
+#define RADEON_SE_TCL_SHININESS             0x2250
+
+#define RADEON_SE_TCL_TEXTURE_PROC_CTL      0x2268
+#       define RADEON_TEXGEN_TEXMAT_0_ENABLE      (1 << 0)
+#       define RADEON_TEXGEN_TEXMAT_1_ENABLE      (1 << 1)
+#       define RADEON_TEXGEN_TEXMAT_2_ENABLE      (1 << 2)
+#       define RADEON_TEXGEN_TEXMAT_3_ENABLE      (1 << 3)
+#       define RADEON_TEXMAT_0_ENABLE             (1 << 4)
+#       define RADEON_TEXMAT_1_ENABLE             (1 << 5)
+#       define RADEON_TEXMAT_2_ENABLE             (1 << 6)
+#       define RADEON_TEXMAT_3_ENABLE             (1 << 7)
+#       define RADEON_TEXGEN_INPUT_MASK           0xf
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_0     0
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_1     1
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_2     2
+#       define RADEON_TEXGEN_INPUT_TEXCOORD_3     3
+#       define RADEON_TEXGEN_INPUT_OBJ            4
+#       define RADEON_TEXGEN_INPUT_EYE            5
+#       define RADEON_TEXGEN_INPUT_EYE_NORMAL     6
+#       define RADEON_TEXGEN_INPUT_EYE_REFLECT    7
+#       define RADEON_TEXGEN_INPUT_EYE_NORMALIZED 8
+#       define RADEON_TEXGEN_0_INPUT_SHIFT        16
+#       define RADEON_TEXGEN_1_INPUT_SHIFT        20
+#       define RADEON_TEXGEN_2_INPUT_SHIFT        24
+#       define RADEON_TEXGEN_3_INPUT_SHIFT        28
+
+#define RADEON_SE_TCL_UCP_VERT_BLEND_CTL    0x2264
+#       define RADEON_UCP_IN_CLIP_SPACE            (1 <<  0)
+#       define RADEON_UCP_IN_MODEL_SPACE           (1 <<  1)
+#       define RADEON_UCP_ENABLE_0                 (1 <<  2)
+#       define RADEON_UCP_ENABLE_1                 (1 <<  3)
+#       define RADEON_UCP_ENABLE_2                 (1 <<  4)
+#       define RADEON_UCP_ENABLE_3                 (1 <<  5)
+#       define RADEON_UCP_ENABLE_4                 (1 <<  6)
+#       define RADEON_UCP_ENABLE_5                 (1 <<  7)
+#       define RADEON_TCL_FOG_MASK                 (3 <<  8)
+#       define RADEON_TCL_FOG_DISABLE              (0 <<  8)
+#       define RADEON_TCL_FOG_EXP                  (1 <<  8)
+#       define RADEON_TCL_FOG_EXP2                 (2 <<  8)
+#       define RADEON_TCL_FOG_LINEAR               (3 <<  8)
+#       define RADEON_RNG_BASED_FOG                (1 << 10)
+#       define RADEON_LIGHT_TWOSIDE                (1 << 11)
+#       define RADEON_BLEND_OP_COUNT_MASK          (7 << 12)
+#       define RADEON_BLEND_OP_COUNT_SHIFT         12
+#       define RADEON_POSITION_BLEND_OP_ENABLE     (1 << 16)
+#       define RADEON_NORMAL_BLEND_OP_ENABLE       (1 << 17)
+#       define RADEON_VERTEX_BLEND_SRC_0_PRIMARY   (0 << 18)
+#       define RADEON_VERTEX_BLEND_SRC_0_SECONDARY (1 << 18)
+#       define RADEON_VERTEX_BLEND_SRC_1_PRIMARY   (0 << 19)
+#       define RADEON_VERTEX_BLEND_SRC_1_SECONDARY (1 << 19)
+#       define RADEON_VERTEX_BLEND_SRC_2_PRIMARY   (0 << 20)
+#       define RADEON_VERTEX_BLEND_SRC_2_SECONDARY (1 << 20)
+#       define RADEON_VERTEX_BLEND_SRC_3_PRIMARY   (0 << 21)
+#       define RADEON_VERTEX_BLEND_SRC_3_SECONDARY (1 << 21)
+#       define RADEON_VERTEX_BLEND_WGT_MINUS_ONE   (1 << 22)
+#       define RADEON_CULL_FRONT_IS_CW             (0 << 28)
+#       define RADEON_CULL_FRONT_IS_CCW            (1 << 28)
+#       define RADEON_CULL_FRONT                   (1 << 29)
+#       define RADEON_CULL_BACK                    (1 << 30)
+#       define RADEON_FORCE_W_TO_ONE               (1 << 31)
+
+#define RADEON_SE_VPORT_XSCALE              0x1d98
+#define RADEON_SE_VPORT_XOFFSET             0x1d9c
+#define RADEON_SE_VPORT_YSCALE              0x1da0
+#define RADEON_SE_VPORT_YOFFSET             0x1da4
+#define RADEON_SE_VPORT_ZSCALE              0x1da8
+#define RADEON_SE_VPORT_ZOFFSET             0x1dac
+#define RADEON_SE_ZBIAS_FACTOR              0x1db0
+#define RADEON_SE_ZBIAS_CONSTANT            0x1db4
+
+#define RADEON_SE_VTX_FMT                   0x2080
+#       define RADEON_SE_VTX_FMT_XY         0x00000000
+#       define RADEON_SE_VTX_FMT_W0         0x00000001
+#       define RADEON_SE_VTX_FMT_FPCOLOR    0x00000002
+#       define RADEON_SE_VTX_FMT_FPALPHA    0x00000004
+#       define RADEON_SE_VTX_FMT_PKCOLOR    0x00000008
+#       define RADEON_SE_VTX_FMT_FPSPEC     0x00000010
+#       define RADEON_SE_VTX_FMT_FPFOG      0x00000020
+#       define RADEON_SE_VTX_FMT_PKSPEC     0x00000040
+#       define RADEON_SE_VTX_FMT_ST0        0x00000080
+#       define RADEON_SE_VTX_FMT_ST1        0x00000100
+#       define RADEON_SE_VTX_FMT_Q1         0x00000200
+#       define RADEON_SE_VTX_FMT_ST2        0x00000400
+#       define RADEON_SE_VTX_FMT_Q2         0x00000800
+#       define RADEON_SE_VTX_FMT_ST3        0x00001000
+#       define RADEON_SE_VTX_FMT_Q3         0x00002000
+#       define RADEON_SE_VTX_FMT_Q0         0x00004000
+#       define RADEON_SE_VTX_FMT_BLND_WEIGHT_CNT_MASK  0x00038000
+#       define RADEON_SE_VTX_FMT_N0         0x00040000
+#       define RADEON_SE_VTX_FMT_XY1        0x08000000
+#       define RADEON_SE_VTX_FMT_Z1         0x10000000
+#       define RADEON_SE_VTX_FMT_W1         0x20000000
+#       define RADEON_SE_VTX_FMT_N1         0x40000000
+#       define RADEON_SE_VTX_FMT_Z          0x80000000
+
+				/* Registers for CP and Microcode Engine */
+#define RADEON_CP_ME_RAM_ADDR               0x07d4
+#define RADEON_CP_ME_RAM_RADDR              0x07d8
+#define RADEON_CP_ME_RAM_DATAH              0x07dc
+#define RADEON_CP_ME_RAM_DATAL              0x07e0
+
+#define RADEON_CP_RB_BASE                   0x0700
+#define RADEON_CP_RB_CNTL                   0x0704
+#define RADEON_CP_RB_RPTR_ADDR              0x070c
+#define RADEON_CP_RB_RPTR                   0x0710
+#define RADEON_CP_RB_WPTR                   0x0714
+
+#define RADEON_CP_IB_BASE                   0x0738
+#define RADEON_CP_IB_BUFSZ                  0x073c
+
+#define RADEON_CP_CSQ_CNTL                  0x0740
+#       define RADEON_CSQ_CNT_PRIMARY_MASK     (0xff << 0)
+#       define RADEON_CSQ_PRIDIS_INDDIS        (0    << 28)
+#       define RADEON_CSQ_PRIPIO_INDDIS        (1    << 28)
+#       define RADEON_CSQ_PRIBM_INDDIS         (2    << 28)
+#       define RADEON_CSQ_PRIPIO_INDBM         (3    << 28)
+#       define RADEON_CSQ_PRIBM_INDBM          (4    << 28)
+#       define RADEON_CSQ_PRIPIO_INDPIO        (15   << 28)
+#define RADEON_CP_CSQ_STAT                  0x07f8
+#       define RADEON_CSQ_RPTR_PRIMARY_MASK    (0xff <<  0)
+#       define RADEON_CSQ_WPTR_PRIMARY_MASK    (0xff <<  8)
+#       define RADEON_CSQ_RPTR_INDIRECT_MASK   (0xff << 16)
+#       define RADEON_CSQ_WPTR_INDIRECT_MASK   (0xff << 24)
+#define RADEON_CP_CSQ_ADDR                  0x07f0
+#define RADEON_CP_CSQ_DATA                  0x07f4
+#define RADEON_CP_CSQ_APER_PRIMARY          0x1000
+#define RADEON_CP_CSQ_APER_INDIRECT         0x1300
+
+#define RADEON_CP_RB_WPTR_DELAY             0x0718
+#       define RADEON_PRE_WRITE_TIMER_SHIFT    0
+#       define RADEON_PRE_WRITE_LIMIT_SHIFT    23
+
+#define RADEON_AIC_CNTL                     0x01d0
+#       define RADEON_PCIGART_TRANSLATE_EN     (1 << 0)
+#define RADEON_AIC_LO_ADDR                  0x01dc
+
+
+
+				/* Constants */
+#define RADEON_LAST_FRAME_REG               RADEON_GUI_SCRATCH_REG0
+#define RADEON_LAST_CLEAR_REG               RADEON_GUI_SCRATCH_REG2
+
+
+
+				/* CP packet types */
+#define RADEON_CP_PACKET0                           0x00000000
+#define RADEON_CP_PACKET1                           0x40000000
+#define RADEON_CP_PACKET2                           0x80000000
+#define RADEON_CP_PACKET3                           0xC0000000
+#       define RADEON_CP_PACKET_MASK                0xC0000000
+#       define RADEON_CP_PACKET_COUNT_MASK          0x3fff0000
+#       define RADEON_CP_PACKET_MAX_DWORDS          (1 << 12)
+#       define RADEON_CP_PACKET0_REG_MASK           0x000007ff
+#       define RADEON_CP_PACKET1_REG0_MASK          0x000007ff
+#       define RADEON_CP_PACKET1_REG1_MASK          0x003ff800
+
+#define RADEON_CP_PACKET0_ONE_REG_WR                0x00008000
+
+#define RADEON_CP_PACKET3_NOP                       0xC0001000
+#define RADEON_CP_PACKET3_NEXT_CHAR                 0xC0001900
+#define RADEON_CP_PACKET3_PLY_NEXTSCAN              0xC0001D00
+#define RADEON_CP_PACKET3_SET_SCISSORS              0xC0001E00
+#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM     0xC0002300
+#define RADEON_CP_PACKET3_LOAD_MICROCODE            0xC0002400
+#define RADEON_CP_PACKET3_WAIT_FOR_IDLE             0xC0002600
+#define RADEON_CP_PACKET3_3D_DRAW_VBUF              0xC0002800
+#define RADEON_CP_PACKET3_3D_DRAW_IMMD              0xC0002900
+#define RADEON_CP_PACKET3_3D_DRAW_INDX              0xC0002A00
+#define RADEON_CP_PACKET3_LOAD_PALETTE              0xC0002C00
+#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR            0xC0002F00
+#define R200_CP_CMD_3D_DRAW_VBUF_2      0xC0003400
+#define R200_CP_CMD_3D_DRAW_IMMD_2      0xC0003500
+#define R200_CP_CMD_3D_DRAW_INDX_2      0xC0003600
+#define RADEON_CP_PACKET3_CNTL_PAINT                0xC0009100
+#define RADEON_CP_PACKET3_CNTL_BITBLT               0xC0009200
+#define RADEON_CP_PACKET3_CNTL_SMALLTEXT            0xC0009300
+#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT         0xC0009400
+#define RADEON_CP_PACKET3_CNTL_POLYLINE             0xC0009500
+#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES        0xC0009800
+#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI          0xC0009A00
+#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI         0xC0009B00
+#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT         0xC0009C00
+
+
+#define RADEON_CP_VC_FRMT_XY                        0x00000000
+#define RADEON_CP_VC_FRMT_W0                        0x00000001
+#define RADEON_CP_VC_FRMT_FPCOLOR                   0x00000002
+#define RADEON_CP_VC_FRMT_FPALPHA                   0x00000004
+#define RADEON_CP_VC_FRMT_PKCOLOR                   0x00000008
+#define RADEON_CP_VC_FRMT_FPSPEC                    0x00000010
+#define RADEON_CP_VC_FRMT_FPFOG                     0x00000020
+#define RADEON_CP_VC_FRMT_PKSPEC                    0x00000040
+#define RADEON_CP_VC_FRMT_ST0                       0x00000080
+#define RADEON_CP_VC_FRMT_ST1                       0x00000100
+#define RADEON_CP_VC_FRMT_Q1                        0x00000200
+#define RADEON_CP_VC_FRMT_ST2                       0x00000400
+#define RADEON_CP_VC_FRMT_Q2                        0x00000800
+#define RADEON_CP_VC_FRMT_ST3                       0x00001000
+#define RADEON_CP_VC_FRMT_Q3                        0x00002000
+#define RADEON_CP_VC_FRMT_Q0                        0x00004000
+#define RADEON_CP_VC_FRMT_BLND_WEIGHT_CNT_MASK      0x00038000
+#define RADEON_CP_VC_FRMT_N0                        0x00040000
+#define RADEON_CP_VC_FRMT_XY1                       0x08000000
+#define RADEON_CP_VC_FRMT_Z1                        0x10000000
+#define RADEON_CP_VC_FRMT_W1                        0x20000000
+#define RADEON_CP_VC_FRMT_N1                        0x40000000
+#define RADEON_CP_VC_FRMT_Z                         0x80000000
+
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_NONE            0x00000000
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_POINT           0x00000001
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE            0x00000002
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_LINE_STRIP      0x00000003
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_LIST        0x00000004
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN         0x00000005
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_STRIP       0x00000006
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_TYPE_2      0x00000007
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST       0x00000008
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_POINT_LIST 0x00000009
+#define RADEON_CP_VC_CNTL_PRIM_TYPE_3VRT_LINE_LIST  0x0000000a
+#define RADEON_CP_VC_CNTL_PRIM_WALK_IND             0x00000010
+#define RADEON_CP_VC_CNTL_PRIM_WALK_LIST            0x00000020
+#define RADEON_CP_VC_CNTL_PRIM_WALK_RING            0x00000030
+#define RADEON_CP_VC_CNTL_COLOR_ORDER_BGRA          0x00000000
+#define RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA          0x00000040
+#define RADEON_CP_VC_CNTL_MAOS_ENABLE               0x00000080
+#define RADEON_CP_VC_CNTL_VTX_FMT_NON_RADEON_MODE   0x00000000
+#define RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE       0x00000100
+#define RADEON_CP_VC_CNTL_TCL_DISABLE               0x00000000
+#define RADEON_CP_VC_CNTL_TCL_ENABLE                0x00000200
+#define RADEON_CP_VC_CNTL_NUM_SHIFT                 16
+
+#define RADEON_VS_MATRIX_0_ADDR                   0
+#define RADEON_VS_MATRIX_1_ADDR                   4
+#define RADEON_VS_MATRIX_2_ADDR                   8
+#define RADEON_VS_MATRIX_3_ADDR                  12
+#define RADEON_VS_MATRIX_4_ADDR                  16
+#define RADEON_VS_MATRIX_5_ADDR                  20
+#define RADEON_VS_MATRIX_6_ADDR                  24
+#define RADEON_VS_MATRIX_7_ADDR                  28
+#define RADEON_VS_MATRIX_8_ADDR                  32
+#define RADEON_VS_MATRIX_9_ADDR                  36
+#define RADEON_VS_MATRIX_10_ADDR                 40
+#define RADEON_VS_MATRIX_11_ADDR                 44
+#define RADEON_VS_MATRIX_12_ADDR                 48
+#define RADEON_VS_MATRIX_13_ADDR                 52
+#define RADEON_VS_MATRIX_14_ADDR                 56
+#define RADEON_VS_MATRIX_15_ADDR                 60
+#define RADEON_VS_LIGHT_AMBIENT_ADDR             64
+#define RADEON_VS_LIGHT_DIFFUSE_ADDR             72
+#define RADEON_VS_LIGHT_SPECULAR_ADDR            80
+#define RADEON_VS_LIGHT_DIRPOS_ADDR              88
+#define RADEON_VS_LIGHT_HWVSPOT_ADDR             96
+#define RADEON_VS_LIGHT_ATTENUATION_ADDR        104
+#define RADEON_VS_MATRIX_EYE2CLIP_ADDR          112
+#define RADEON_VS_UCP_ADDR                      116
+#define RADEON_VS_GLOBAL_AMBIENT_ADDR           122
+#define RADEON_VS_FOG_PARAM_ADDR                123
+#define RADEON_VS_EYE_VECTOR_ADDR               124
+
+#define RADEON_SS_LIGHT_DCD_ADDR                  0
+#define RADEON_SS_LIGHT_SPOT_EXPONENT_ADDR        8
+#define RADEON_SS_LIGHT_SPOT_CUTOFF_ADDR         16
+#define RADEON_SS_LIGHT_SPECULAR_THRESH_ADDR     24
+#define RADEON_SS_LIGHT_RANGE_CUTOFF_ADDR        32
+#define RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR       48
+#define RADEON_SS_VERT_GUARD_DISCARD_ADJ_ADDR    49
+#define RADEON_SS_HORZ_GUARD_CLIP_ADJ_ADDR       50
+#define RADEON_SS_HORZ_GUARD_DISCARD_ADJ_ADDR    51
+#define RADEON_SS_SHININESS                      60
+
+#define RADEON_TV_MASTER_CNTL                    0x0800
+#       define RADEON_TVCLK_ALWAYS_ONb           (1 << 30)
+#define RADEON_TV_DAC_CNTL                       0x088c
+#       define RADEON_TV_DAC_CMPOUT              (1 << 5)
+#define RADEON_TV_PRE_DAC_MUX_CNTL               0x0888
+#       define RADEON_Y_RED_EN                   (1 << 0)
+#       define RADEON_C_GRN_EN                   (1 << 1)
+#       define RADEON_CMP_BLU_EN                 (1 << 2)
+#       define RADEON_RED_MX_FORCE_DAC_DATA      (6 << 4)
+#       define RADEON_GRN_MX_FORCE_DAC_DATA      (6 << 8)
+#       define RADEON_BLU_MX_FORCE_DAC_DATA      (6 << 12)
+#       define RADEON_TV_FORCE_DAC_DATA_SHIFT    16
+#endif
diff --git a/src/mesa/drivers/dri/savage/Makefile b/src/mesa/drivers/dri/savage/Makefile
new file mode 100644
index 0000000000..53511552c6
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/Makefile
@@ -0,0 +1,26 @@
+# src/mesa/drivers/dri/r128/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = savage_dri.so
+
+DRIVER_SOURCES = \
+	savage_xmesa.c \
+	savagedd.c \
+	savagestate.c \
+	savagetex.c \
+	savagetris.c \
+	savagerender.c \
+	savageioctl.c \
+	savagespan.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES) 
+
+ASM_SOURCES = 
+
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/savage/savage_3d_reg.h b/src/mesa/drivers/dri/savage/savage_3d_reg.h
new file mode 100644
index 0000000000..45733cdf92
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savage_3d_reg.h
@@ -0,0 +1,711 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SAVAGE_3D_REG_H
+#define SAVAGE_3D_REG_H
+
+#define VIDEO_MEM_ADR                   0x02
+#define SYSTEM_MEM_ADR                  0x01
+#define AGP_MEM_ADR                     0x03
+
+/***********************************************************
+
+  ----------- 3D ENGINE UNIT Registers -------------
+
+  *********************************************************/
+
+typedef union
+{
+    struct
+    {
+        unsigned reserved : 4;
+        unsigned ofs      : 28;
+    }ni;
+    uint32_t ui;
+} savageRegZPixelOffset;
+
+/* This reg exists only on Savage4. */
+typedef union
+{
+    struct
+    {
+        unsigned cmpFunc     :  3;
+        unsigned stencilEn   :  1;
+        unsigned readMask    :  8;
+        unsigned writeMask   :  8;
+        unsigned failOp      :  3;
+        unsigned passZfailOp :  3;
+        unsigned passZpassOp :  3;
+        unsigned reserved    :  3;
+    }ni;
+    uint32_t ui;
+} savageRegStencilCtrl;
+
+/**************************
+ Texture Registers
+**************************/
+/* The layout of this reg differs between Savage4 and Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned tex0Width  : 4;
+        unsigned tex0Height : 4;
+        unsigned tex0Fmt    : 4;
+        unsigned tex1Width  : 4;
+        unsigned tex1Height : 4;
+        unsigned tex1Fmt    : 4;
+        unsigned texBLoopEn : 1;
+        unsigned tex0En     : 1;
+        unsigned tex1En     : 1;
+        unsigned orthProjEn : 1;
+        unsigned reserved   : 1;
+        unsigned palSize    : 2;
+        unsigned newPal     : 1;
+    }ni;
+    uint32_t ui;
+} savageRegTexDescr_s4;
+typedef union
+{
+    struct
+    {
+        unsigned texWidth  : 4;
+        unsigned reserved1 : 4;
+        unsigned texHeight : 4;
+        unsigned reserved2 : 4;
+	/* Savage3D supports only the first 8 texture formats defined in
+	   enum TexFmt in savge_bci.h. */
+        unsigned texFmt    : 3;
+        unsigned palSize   : 2;
+        unsigned reserved3 : 10;
+        unsigned newPal    : 1;
+    }ni;
+    uint32_t ui;
+} savageRegTexDescr_s3d;
+
+/* The layout of this reg is the same on Savage4 and Savage3D,
+   but the Savage4 has two of them, Savage3D has only one. */
+typedef union
+{
+    struct
+    {
+        unsigned inSysTex : 1;
+        unsigned inAGPTex : 1;
+        unsigned reserved : 1;
+        unsigned addr     : 29;
+    }ni;
+    uint32_t ui;
+} savageRegTexAddr;
+
+/* The layout of this reg is the same on Savage4 and Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned reserved : 3;
+        unsigned addr     : 29;
+    }ni;
+    uint32_t ui;
+} savageRegTexPalAddr;
+
+/* The layout of this reg on Savage4 and Savage3D are very similar. */
+typedef union
+{
+    struct
+    {
+        unsigned xprClr0 : 16;
+        unsigned xprClr1 : 16; /* this is reserved on Savage3D */
+    }ni;
+    uint32_t ui;
+} savageRegTexXprClr;   /* transparency color in RGB565 format*/
+
+/* The layout of this reg differs between Savage4 and Savage3D.
+ * Savage4 has two of them, Savage3D has only one. */
+typedef union
+{
+    struct
+    {
+        unsigned filterMode         : 2;
+        unsigned mipmapEnable       : 1;
+        unsigned dBias              : 9;
+        unsigned dMax               : 4;
+        unsigned uMode              : 2;
+        unsigned vMode              : 2;
+        unsigned useDFraction       : 1;
+        unsigned texXprEn           : 1;
+        unsigned clrBlendAlphaSel   : 2;
+        unsigned clrArg1CopyAlpha   : 1;
+        unsigned clrArg2CopyAlpha   : 1;
+        unsigned clrArg1Invert      : 1;
+        unsigned clrArg2Invert      : 1;
+        unsigned alphaBlendAlphaSel : 2;
+        unsigned alphaArg1Invert    : 1;
+        unsigned alphaArg2Invert    : 1;
+    }ni;
+    uint32_t ui;
+} savageRegTexCtrl_s4;
+typedef union
+{
+    struct
+    {
+        unsigned filterMode    : 2;
+        unsigned mipmapDisable : 1;
+        unsigned dBias         : 9;
+        unsigned uWrapEn       : 1;
+        unsigned vWrapEn       : 1;
+        unsigned wrapMode      : 2;
+        unsigned texEn         : 1;
+        unsigned useDFraction  : 1;
+        unsigned reserved1     : 1;
+	/* Color Compare Alpha Blend Control
+           0 -  reduce dest alpha to 0 or 1
+           1 - blend with destination
+	   The Utah-Driver doesn't know how to use it and sets it to 0. */
+        unsigned CCA           : 1;
+        unsigned texXprEn      : 1;
+        unsigned reserved2     : 11;
+    }ni;
+    uint32_t ui;
+} savageRegTexCtrl_s3d;
+
+/* This reg exists only on Savage4. */
+typedef union
+{
+    struct
+    {
+        unsigned colorArg1Sel    : 2;
+        unsigned colorArg2Sel    : 3;
+        unsigned colorInvAlphaEn : 1;
+        unsigned colorInvArg2En  : 1;
+        unsigned colorPremodSel  : 1;
+        unsigned colorMod1Sel    : 1;
+        unsigned colorMod2Sel    : 2;
+        unsigned colorAddSel     : 2;
+        unsigned colorDoBlend    : 1;
+        unsigned colorDo2sCompl  : 1;
+        unsigned colorAddBiasEn  : 1;
+        unsigned alphaArg1Sel    : 2;
+        unsigned alphaArg2Sel    : 3;
+        unsigned alphaMod1Sel    : 1;
+        unsigned alphaMod2Sel    : 2;
+        unsigned alphaAdd0Sel    : 1;
+        unsigned alphaDoBlend    : 1;
+        unsigned alphaDo2sCompl  : 1;
+        unsigned colorStageClamp : 1;
+        unsigned alphaStageClamp : 1;
+        unsigned colorDoDiffMul  : 1;
+        unsigned LeftShiftVal    : 2;
+    }ni;
+    uint32_t ui;
+} savageRegTexBlendCtrl;
+
+/* This reg exists only on Savage4. */
+typedef union
+{
+    struct
+    {
+        unsigned blue  : 8;
+        unsigned green : 8;
+        unsigned red   : 8;
+        unsigned alpha : 8;
+    }ni;
+    uint32_t ui;
+} savageRegTexBlendColor;
+
+/********************************
+ Tiled Surface Registers
+**********************************/
+
+typedef union
+{
+    struct
+    {
+        unsigned frmBufOffset : 13;
+        unsigned reserved     : 12;
+        unsigned widthInTile  : 6;
+        unsigned bitPerPixel  : 1;
+    }ni;
+    uint32_t ui;
+} savageRegTiledSurface;
+
+/********************************
+ Draw/Shading Control Registers
+**********************************/
+
+/* This reg exists only on Savage4. */
+typedef union
+{
+    struct
+    {
+        unsigned scissorXStart : 11;
+        unsigned dPerfAccelEn  : 1;
+        unsigned scissorYStart : 12;
+        unsigned alphaRefVal   : 8;
+    }ni;
+    uint32_t ui;
+} savageRegDrawCtrl0;
+
+/* This reg exists only on Savage4. */
+typedef union
+{
+    struct
+    {
+        unsigned scissorXEnd      : 11;
+        unsigned xyOffsetEn       :  1;
+        unsigned scissorYEnd      : 12;
+        unsigned ditherEn         :  1;
+        unsigned nonNormTexCoord  :  1;
+        unsigned cullMode         :  2;
+        unsigned alphaTestCmpFunc :  3;
+        unsigned alphaTestEn      :  1;
+    }ni;
+    uint32_t ui;
+} savageRegDrawCtrl1;
+
+/* This reg exists only on Savage4. */
+typedef union
+{
+    struct
+    {
+        unsigned dstAlphaMode        :  3;
+
+	/**
+	 * This bit enables \c GL_FUNC_SUBTRACT.  Like most DirectX oriented
+	 * hardware, there's no way to do \c GL_FUNC_REVERSE_SUBTRACT.
+	 * 
+	 * \todo
+	 * Add support for \c GL_FUNC_SUBTRACT!
+	 */
+        unsigned dstMinusSrc         :  1;
+        unsigned srcAlphaMode        :  3;
+        unsigned binaryFinalAlpha    :  1;
+        unsigned dstAlphaModeHighBit :  1;
+        unsigned srcAlphaModeHighBit :  1;
+        unsigned reserved1           : 15;
+        unsigned wrZafterAlphaTst    :  1;
+        unsigned drawUpdateEn        :  1;
+        unsigned zUpdateEn           :  1;
+        unsigned flatShadeEn         :  1;
+        unsigned specShadeEn         :  1;
+        unsigned flushPdDestWrites   :  1;
+        unsigned flushPdZbufWrites   :  1;
+    }ni;
+    uint32_t ui;
+} savageRegDrawLocalCtrl;
+
+/* This reg exists only on Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned ditherEn          : 1;
+        unsigned xyOffsetEn        : 1;
+        unsigned cullMode          : 2;
+        unsigned vertexCountReset  : 1;
+        unsigned flatShadeEn       : 1;
+        unsigned specShadeEn       : 1;
+        unsigned dstAlphaMode      : 3;
+        unsigned srcAlphaMode      : 3;
+        unsigned reserved1         : 1;
+        unsigned alphaTestCmpFunc  : 3;
+        unsigned alphaTestEn       : 1;
+        unsigned alphaRefVal       : 8;
+        unsigned texBlendCtrl      : 3;
+        unsigned flushPdDestWrites : 1;
+        unsigned flushPdZbufWrites : 1;
+
+	/**
+	 * Disable perspective correct interpolation for vertex color, vertex
+	 * fog, and vertex alpha.  For OpenGL, this should \b always be zero.
+	 */
+        unsigned interpMode        : 1;
+    }ni;
+    uint32_t ui;
+} savageRegDrawCtrl;
+
+#define SAVAGETBC_DECAL_S3D                     0
+#define SAVAGETBC_MODULATE_S3D                  1
+#define SAVAGETBC_DECALALPHA_S3D                2
+#define SAVAGETBC_MODULATEALPHA_S3D             3
+#define SAVAGETBC_4_S3D                         4
+#define SAVAGETBC_5_S3D                         5
+#define SAVAGETBC_COPY_S3D                      6
+#define SAVAGETBC_7_S3D                         7
+
+/* This reg exists only on Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned scissorXStart : 11;
+	unsigned reserved1     : 5;
+        unsigned scissorYStart : 11;
+	unsigned reserved2     : 5;
+    } ni;
+    uint32_t ui;
+} savageRegScissorsStart;
+
+/* This reg exists only on Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned scissorXEnd : 11;
+	unsigned reserved1   : 5;
+        unsigned scissorYEnd : 11;
+	unsigned reserved2   : 5;
+    } ni;
+    uint32_t ui;
+} savageRegScissorsEnd;
+
+/********************************
+ Address Registers
+**********************************/
+
+/* I havn't found a Savage3D equivalent of this reg in the Utah-driver. 
+ * But Tim Roberts claims that the Savage3D supports DMA vertex and
+ * command buffers. */
+typedef union
+{
+    struct
+    {
+        unsigned isSys    : 1;
+        unsigned isAGP    : 1;
+        unsigned reserved : 1;
+        unsigned addr     : 29; /*quad word aligned*/
+    }ni;
+    uint32_t ui;
+} savageRegVertBufAddr;
+
+/* I havn't found a Savage3D equivalent of this reg in the Utah-driver. 
+ * But Tim Roberts claims that the Savage3D supports DMA vertex and
+ * command buffers. */
+typedef union
+{
+    struct
+    {
+        unsigned isSys    : 1;
+        unsigned isAGP    : 1;
+        unsigned reserved : 1;
+        unsigned addr     : 29; /*4-quad word aligned*/
+    }ni;
+    uint32_t ui;
+} savageRegDMABufAddr;
+
+/********************************
+ H/W Debug Registers
+**********************************/
+/* The layout of this reg is the same on Savage4 and Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned y01        : 1;
+        unsigned y12        : 1;
+        unsigned y20        : 1;
+        unsigned u01        : 1;
+        unsigned u12        : 1;
+        unsigned u20        : 1;
+        unsigned v01        : 1;
+        unsigned v12        : 1;
+        unsigned v20        : 1;
+        unsigned cullEn     : 1;
+        unsigned cullOrient : 1;
+        unsigned loadNewTex : 1;
+        unsigned loadNewPal : 1;
+        unsigned doDSetup   : 1;
+        unsigned reserved   : 17;
+        unsigned kickOff    : 1;
+    }ni;
+    uint32_t ui;
+} savageRegFlag;
+
+/********************************
+ Z Buffer Registers -- Global
+**********************************/
+
+/* The layout of this reg differs between Savage4 and Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned zCmpFunc      : 3;
+        unsigned reserved1     : 2;
+        unsigned zBufEn        : 1;
+        unsigned reserved2     : 1;
+        unsigned zExpOffset    : 8;
+        unsigned reserved3     : 1;
+        unsigned stencilRefVal : 8;
+        unsigned autoZEnable   : 1;
+        unsigned frameID       : 1;
+        unsigned reserved4     : 4;
+        unsigned floatZEn      : 1;
+        unsigned wToZEn        : 1;
+    }ni;
+    uint32_t ui;
+} savageRegZBufCtrl_s4;
+typedef union
+{
+    struct {
+        unsigned zCmpFunc         : 3;
+        unsigned drawUpdateEn     : 1;
+        unsigned zUpdateEn        : 1;
+        unsigned zBufEn           : 1;
+
+        /**
+	 * We suspect that, in conjunction with
+	 * \c savageRegZBufOffset::zDepthSelect, these 2 bits are actually
+	 * \c stencilUpdateEn and \c stencilBufEn.  If not, then some of
+	 * the bits in \c reserved2 may fulfill that purpose.
+	 */
+        unsigned reserved1        : 2;
+
+        unsigned zExpOffset       : 8;
+        unsigned wrZafterAlphaTst : 1;
+        unsigned reserved2        : 15;
+    }ni;
+    uint32_t ui;
+} savageRegZBufCtrl_s3d;
+
+/* The layout of this reg on Savage4 and Savage3D is very similar. */
+typedef union
+{
+    struct
+    {
+	/* In the Utah-Driver the offset is defined as 13-bit, 2k-aligned. */
+        unsigned offset           : 14;
+        unsigned reserved         : 11; /* 12-bits in Utah-driver */
+        unsigned zBufWidthInTiles : 6;
+       
+        /**
+	 * 0 selects 16-bit depth buffer.  On Savage4 hardware, 1 selects
+	 * 24-bit depth buffer (with 8-bits for stencil).  Though it has never
+	 * been tried, we suspect that on Savage3D hardware, 1 selects 15-bit
+	 * depth buffer (with 1-bit for stencil).
+	 */
+        unsigned zDepthSelect     : 1;
+    }ni;
+    uint32_t ui;
+} savageRegZBufOffset;
+
+/* The layout of this reg is the same on Savage4 and Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned rLow      : 6;
+        unsigned reserved1 : 2;
+        unsigned rHigh     : 6;
+        unsigned reserved2 : 2;
+        unsigned wLow      : 6;
+        unsigned reserved3 : 2;
+        unsigned wHigh     : 6;
+        unsigned reserved4 : 2;
+    }ni;
+    uint32_t ui;
+} savageRegZWatermarks;
+
+/********************************
+ Fog Registers -- Global
+**********************************/
+/* The layout of this reg is the same on Savage4 and Savage3D. */
+typedef union
+{
+    struct
+    {
+        unsigned fogClr      : 24;
+        unsigned expShift    : 3;
+        unsigned reserved    : 1;
+        unsigned fogEn       : 1;
+        unsigned fogMode     : 1;
+        unsigned fogEndShift : 2;
+    }ni;
+    uint32_t ui;
+} savageRegFogCtrl;
+
+/*not in spec, but tempo for pp and driver*/
+typedef union
+{
+    struct
+    {
+        unsigned fogDensity : 16;
+        unsigned fogStart   : 16;
+    }ni;
+    uint32_t ui;
+} savageRegFogParam;
+
+/**************************************
+ Destination Buffer Registers -- Global
+***************************************/
+
+/* The layout of this reg on Savage4 and Savage3D are very similar. */
+typedef union
+{
+    struct
+    {
+        unsigned dstWidthInTile :  7;
+        unsigned reserved       :  1;
+	/* In the Utah-Driver the offset is defined as 13-bit, 2k-aligned. */
+        unsigned offset         : 14;
+        unsigned reserved1      :  7;
+	/* antiAliasMode does not exist in the Utah-driver. But it includes the
+	 * high bit of this in the destPixFmt. However, only values 0 and 2
+	 * are used as dstPixFmt, so antiAliasMode is effectively always 0
+	 * in the Utah-driver. In other words, treat as reserved on Savage3D.*/
+        unsigned antiAliasMode  :  2;
+        unsigned dstPixFmt      :  1;
+    }ni;
+    uint32_t ui;
+} savageRegDestCtrl;
+
+/* The layout of this reg on Savage4 and Savage3D are very similar. */
+typedef union
+{
+    struct
+    {
+        unsigned destReadLow   : 6;
+        unsigned destReadHigh  : 6;
+        unsigned destWriteLow  : 6;
+        unsigned destWriteHigh : 6;
+        unsigned texRead       : 4;
+        unsigned reserved4     : 2;
+	/* The Utah-driver calls this pixel FIFO length:
+	 * 00 - 240, 01 - 180, 10 - 120, 11 - 60
+	 * However, it is not used in either driver. */
+        unsigned destFlush     : 2;
+    }ni;
+    uint32_t ui;
+} savageRegDestTexWatermarks;
+
+/* Savage4/Twister/ProSavage register BCI addresses */
+#define SAVAGE_DRAWLOCALCTRL_S4       0x1e
+#define SAVAGE_TEXPALADDR_S4          0x1f
+#define SAVAGE_TEXCTRL0_S4            0x20
+#define SAVAGE_TEXCTRL1_S4            0x21
+#define SAVAGE_TEXADDR0_S4            0x22
+#define SAVAGE_TEXADDR1_S4            0x23
+#define SAVAGE_TEXBLEND0_S4           0x24
+#define SAVAGE_TEXBLEND1_S4           0x25
+#define SAVAGE_TEXXPRCLR_S4           0x26 /* never used */
+#define SAVAGE_TEXDESCR_S4            0x27
+#define SAVAGE_FOGTABLE_S4            0x28
+#define SAVAGE_FOGCTRL_S4             0x30
+#define SAVAGE_STENCILCTRL_S4         0x31
+#define SAVAGE_ZBUFCTRL_S4            0x32
+#define SAVAGE_ZBUFOFF_S4             0x33
+#define SAVAGE_DESTCTRL_S4            0x34
+#define SAVAGE_DRAWCTRLGLOBAL0_S4     0x35
+#define SAVAGE_DRAWCTRLGLOBAL1_S4     0x36
+#define SAVAGE_ZWATERMARK_S4          0x37
+#define SAVAGE_DESTTEXRWWATERMARK_S4  0x38
+#define SAVAGE_TEXBLENDCOLOR_S4       0x39
+/* Savage3D/MX/IC register BCI addresses */
+#define SAVAGE_TEXPALADDR_S3D         0x18
+#define SAVAGE_TEXXPRCLR_S3D          0x19 /* never used */
+#define SAVAGE_TEXADDR_S3D            0x1A
+#define SAVAGE_TEXDESCR_S3D           0x1B
+#define SAVAGE_TEXCTRL_S3D            0x1C
+#define SAVAGE_FOGTABLE_S3D           0x20
+#define SAVAGE_FOGCTRL_S3D            0x30
+#define SAVAGE_DRAWCTRL_S3D           0x31
+#define SAVAGE_ZBUFCTRL_S3D           0x32
+#define SAVAGE_ZBUFOFF_S3D            0x33
+#define SAVAGE_DESTCTRL_S3D           0x34
+#define SAVAGE_SCSTART_S3D            0x35
+#define SAVAGE_SCEND_S3D              0x36
+#define SAVAGE_ZWATERMARK_S3D         0x37 
+#define SAVAGE_DESTTEXRWWATERMARK_S3D 0x38
+
+#define SAVAGE_FIRST_REG 0x18
+#define SAVAGE_NR_REGS   34
+typedef struct savage_registers_s4_t {
+    uint32_t                   unused1[6];        /* 0x18-0x1d */
+    savageRegDrawLocalCtrl     drawLocalCtrl;     /* 0x1e */
+    savageRegTexPalAddr        texPalAddr;        /* 0x1f */
+    savageRegTexCtrl_s4        texCtrl[2];        /* 0x20, 0x21 */
+    savageRegTexAddr           texAddr[2];        /* 0x22, 0x23 */
+    savageRegTexBlendCtrl      texBlendCtrl[2];   /* 0x24, 0x25 */
+    savageRegTexXprClr         texXprClr;         /* 0x26 */
+    savageRegTexDescr_s4       texDescr;          /* 0x27 */
+    uint8_t                   fogTable[32];      /* 0x28-0x2f (8dwords) */
+    savageRegFogCtrl           fogCtrl;           /* 0x30 */
+    savageRegStencilCtrl       stencilCtrl;       /* 0x31 */
+    savageRegZBufCtrl_s4       zBufCtrl;          /* 0x32 */
+    savageRegZBufOffset        zBufOffset;        /* 0x33 */
+    savageRegDestCtrl          destCtrl;          /* 0x34 */
+    savageRegDrawCtrl0         drawCtrl0;         /* 0x35 */
+    savageRegDrawCtrl1         drawCtrl1;         /* 0x36 */
+    savageRegZWatermarks       zWatermarks;       /* 0x37 */
+    savageRegDestTexWatermarks destTexWatermarks; /* 0x38 */
+    savageRegTexBlendColor     texBlendColor;     /* 0x39 */
+} savageRegistersS4;
+typedef struct savage_registers_s3d_t {
+    savageRegTexPalAddr        texPalAddr;        /* 0x18 */
+    savageRegTexXprClr         texXprClr;         /* 0x19 */
+    savageRegTexAddr           texAddr;           /* 0x1a */
+    savageRegTexDescr_s3d      texDescr;          /* 0x1b */
+    savageRegTexCtrl_s3d       texCtrl;           /* 0x1c */
+    uint32_t                  unused1[3];        /* 0x1d-0x1f */
+    uint8_t                   fogTable[64];      /* 0x20-0x2f (16dwords) */
+    savageRegFogCtrl           fogCtrl;           /* 0x30 */
+    savageRegDrawCtrl          drawCtrl;          /* 0x31 */
+    savageRegZBufCtrl_s3d      zBufCtrl;          /* 0x32 */
+    savageRegZBufOffset        zBufOffset;        /* 0x33 */
+    savageRegDestCtrl          destCtrl;          /* 0x34 */
+    savageRegScissorsStart     scissorsStart;     /* 0x35 */
+    savageRegScissorsEnd       scissorsEnd;       /* 0x36 */
+    savageRegZWatermarks       zWatermarks;       /* 0x37 */
+    savageRegDestTexWatermarks destTexWatermarks; /* 0x38 */
+    uint32_t                   unused2;           /* 0x39 */
+} savageRegistersS3D;
+typedef union savage_registers_t {
+    savageRegistersS4  s4;
+    savageRegistersS3D s3d;
+    uint32_t           ui[SAVAGE_NR_REGS];
+} savageRegisters;
+
+
+#define DV_PF_555           (0x1<<8)
+#define DV_PF_565           (0x2<<8)
+#define DV_PF_8888          (0x4<<8)
+
+#define SAVAGEPACKCOLORA4L4(l,a) \
+  ((l >> 4) | (a & 0xf0))
+
+#define SAVAGEPACKCOLOR4444(r,g,b,a) \
+  ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4))
+
+#define SAVAGEPACKCOLOR1555(r,g,b,a) \
+  ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \
+    ((a) ? 0x8000 : 0))
+
+#define SAVAGEPACKCOLOR8888(r,g,b,a) \
+  (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+#define SAVAGEPACKCOLOR565(r,g,b) \
+  ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+
+#endif
diff --git a/src/mesa/drivers/dri/savage/savage_bci.h b/src/mesa/drivers/dri/savage/savage_bci.h
new file mode 100644
index 0000000000..33cfac32c7
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savage_bci.h
@@ -0,0 +1,612 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SAVAGE_BCI_H
+#define SAVAGE_BCI_H
+/***********************
+  3D and 2D command
+************************/
+
+typedef enum {
+    AMO_BurstCmdData=   0x01010000,
+    AMO_3DReg=          0x01048500,
+    AMO_MotionCompReg=  0x01048900,
+    AMO_VideoEngUnit=   0x01048A00,
+    AMO_CmdBufAddr=     0x01048c14,
+    AMO_TiledSurfReg0=  0x01048C40,
+    AMO_TiledSurfReg1=  0x01048C44,
+    AMO_TiledSurfReg2=  0x01048C48,
+    AMO_TiledSurfReg3=  0x01048C4C,
+    AMO_TiledSurfReg4=  0x01048C50,
+    AMO_TiledSurfReg5=  0x01048C54,
+    AMO_TiledSurfReg6=  0x01048C58,
+    AMO_TiledSurfReg7=  0x01048C5C,
+    AMO_LPBModeReg=     0x0100FF00,
+    AMO_LPBFifoSat=     0x0100FF04,
+    AMO_LPBIntFlag=     0x0100FF08,
+    AMO_LPBFmBufA0=     0x0100FF0C,
+    AMO_LPBFmBufA1=     0x0100FF10,
+    AMO_LPBRdWtAdr=     0x0100FF14,
+    AMO_LPBRdWtDat=     0x0100FF18,
+    AMO_LPBIOPort =     0x0100FF1C,
+    AMO_LPBSerPort=     0x0100FF20,
+    AMO_LPBVidInWinSz=  0x0100FF24,
+    AMO_LPBVidDatOffs=  0x0100FF28,
+    AMO_LPBHorScalCtrl= 0x0100FF2C,
+    AMO_LPBVerDeciCtrl= 0x0100FF30,
+    AMO_LPBLnStride=    0x0100FF34,
+    AMO_LPBFmBufAddr2=  0x0100FF38,
+    AMO_LPBVidCapVDCtrl=0x0100FF3C,
+
+    AMO_LPBVidCapFdStAd=0x0100FF60,
+    AMO_LPBVidCapFdMdAd=0x0100FF64,
+    AMO_LPBVidCapFdBtAd=0x0100FF68,
+    AMO_LPBVidCapFdSize=0x0100FF6C,
+    AMO_LPBBilinDecim1= 0x0100FF70,
+    AMO_LPBBilinDecim2= 0x0100FF74,
+    AMO_LPBBilinDecim3= 0x0100FF78,
+    AMO_LPBDspVEUHorSRR=0x0100FF7C,
+    AMO_LPBDspVEUVerSRR=0x0100FF80,
+    AMO_LPBDspVeuDnScDR=0x0100FF84,
+    AMO_LPB_VEUERPReg=  0x0100FF88,
+    AMO_LPB_VBISelReg=  0x0100FF8C,
+    AMO_LPB_VBIBasAdReg=0x0100FF90,
+    AMO_LPB_DatOffsReg= 0x0100FF94,
+    AMO_LPB_VBIVerDcReg=0x0100FF98,
+    AMO_LPB_VBICtrlReg= 0x0100FF9C,
+    AMO_LPB_VIPXferCtrl=0x0100FFA0,
+    AMO_LPB_FIFOWtMark= 0x0100FFA4,
+    AMO_LPB_FIFOCount=  0x0100FFA8,
+    AMO_LPBFdSkipPat=   0x0100FFAC,
+    AMO_LPBCapVEUHorSRR=0x0100FFB0,
+    AMO_LPBCapVEUVerSRR=0x0100FFB4,
+    AMO_LPBCapVeuDnScDR=0x0100FFB8
+
+}AddressMapOffset;   
+/*more to add*/
+
+
+typedef enum {
+  CMD_DrawPrim=0x10,          /*10000*/
+  CMD_DrawIdxPrim=0x11,       /*10001*/
+  CMD_SetRegister=0x12,       /*10010*/
+  CMD_UpdateShadowStat=0x13 , /*10011*/
+  CMD_PageFlip=0x14,          /* 10100*/
+  CMD_BusMasterImgXfer=0x15,  /* 10101*/
+  CMD_ScaledImgXfer=0x16,     /* 10110*/
+  CMD_Macroblock=0x17,         /*10111*/
+  CMD_Wait= 0x18,             /*11000*/
+  CMD_2D_NOP=0x08,            /* 01000*/
+  CMD_2D_RCT=0x09,            /*01001   rectangular fill*/
+  CMD_2D_SCNL=0x0a,           /* 01010   scan line*/
+  CMD_2D_LIN=0x0b,            /*01011   line*/
+  CMD_2D_SMTXT=0x0c,          /*01100*/
+  CMD_2D_BPTXT=0x0d,          /*01101*/
+  CMD_InitFlag=0x1f           /*11111, for S/W initialization control*/
+}Command;
+
+
+typedef enum {
+    VRR_List,
+    VRR_Strip,
+    VRR_Fan,
+    VRR_QuadList
+}VertexReplaceRule;
+
+/***********************
+   Destination
+************************/
+
+typedef enum {
+    DFT_RGB565 = 0,
+    DFT_XRGB8888
+}DestinationFmt;
+
+
+/*************************
+    Z Buffer / Alpha test
+*************************/
+
+typedef enum {
+    CF_Never,
+    CF_Less,
+    CF_Equal,
+    CF_LessEqual,
+    CF_Greater,
+    CF_NotEqual,
+    CF_GreaterEqual,
+    CF_Always
+}ZCmpFunc;   /* same for Alpha test and Stencil test compare function */
+
+typedef ZCmpFunc ACmpFunc;
+
+typedef enum {
+  ZDS_16i,    /* .16 fixed*/
+  ZDS_32f     /* 1.8.15 float*/
+}ZDepthSelect;
+
+
+/**********************************
+    BCI Register Addressing Index
+***********************************/
+typedef enum {
+
+    CRI_VTX0_X =    0x00,
+    CRI_VTX0_Y =    0x01,
+    CRI_VTX0_W =    0x02,
+    CRI_VTX0_DIFFU= 0x03,
+    CRI_VTX0_SPECU= 0x04,
+    CRI_VTX0_U =    0x05,
+    CRI_VTX0_V =    0x06,
+    CRI_VTX0_U2 =   0x07,
+    CRI_VTX0_V2 =   0x08,
+    CRI_VTX1_X =    0x09,
+    CRI_VTX1_Y =    0x0a,
+    CRI_VTX1_W =    0x0b,
+    CRI_VTX1_DIFFU= 0x0c,
+    CRI_VTX1_SPECU= 0x0d,
+    CRI_VTX1_U =    0x0e,
+    CRI_VTX1_V =    0x0f,
+    CRI_VTX1_U2 =   0x10,
+    CRI_VTX1_V2 =   0x11,
+    CRI_VTX2_X =    0x12,
+    CRI_VTX2_Y =    0x13,
+    CRI_VTX2_W =    0x14,
+    CRI_VTX2_DIFFU= 0x15,
+    CRI_VTX2_SPECU= 0x16,
+    CRI_VTX2_U =    0x17,
+    CRI_VTX2_V =    0x18,
+    CRI_VTX2_U2 =   0x19,
+    CRI_VTX2_V2 =   0x1a,
+
+    CRI_ZPixelOffset  = 0x1d,
+    CRI_DrawCtrlLocal = 0x1e,
+    CRI_TexPalAddr    = 0x1f,
+    CRI_TexCtrl0      = 0x20,
+    CRI_TexCtrl1      = 0x21,
+    CRI_TexAddr0      = 0x22,
+    CRI_TexAddr1      = 0x23,
+    CRI_TexBlendCtrl0 = 0x24,
+    CRI_TexBlendCtrl1 = 0x25,
+    CRI_TexXprClr     = 0x26,
+    CRI_TexDescr      = 0x27,
+
+    CRI_FogTable00= 0x28,
+    CRI_FogTable04= 0x29,
+    CRI_FogTable08= 0x2a,
+    CRI_FogTable12= 0x2b,
+    CRI_FogTable16= 0x2c,
+    CRI_FogTable20= 0x2d,
+    CRI_FogTable24= 0x2e,
+    CRI_FogTable28= 0x2f,
+    CRI_FogCtrl=    0x30,
+    CRI_StencilCtrl= 0x31,
+    CRI_ZBufCtrl=   0x32,
+    CRI_ZBufOffset= 0x33,
+    CRI_DstCtrl=    0x34,
+    CRI_DrawCtrlGlobal0=   0x35,
+    CRI_DrawCtrlGlobal1=   0x36,
+    CRI_ZRW_WTMK =  0x37,
+    CRI_DST_WTMK =  0x38,
+    CRI_TexBlendColor= 0x39,
+
+    CRI_VertBufAddr= 0x3e,
+    /* new in ms1*/
+    CRI_MauFrameAddr0 = 0x40,
+    CRI_MauFrameAddr1 = 0x41,
+    CRI_MauFrameAddr2 = 0x42,
+    CRI_MauFrameAddr3 = 0x43,
+    CRI_FrameDesc     = 0x44,
+    CRI_IDCT9bitEn    = 0x45,
+    CRI_MV0           = 0x46,
+    CRI_MV1           = 0x47,
+    CRI_MV2           = 0x48,
+    CRI_MV3           = 0x49,
+    CRI_MacroDescr    = 0x4a,  /*kickoff?*/
+    
+    CRI_MeuCtrl = 0x50,
+    CRI_SrcYAddr = 0x51,
+    CRI_DestAddr = 0x52,
+    CRI_FmtrSrcDimen = 0x53,
+    CRI_FmtrDestDimen = 0x54,
+    CRI_SrcCbAddr = 0x55,
+    CRI_SrcCrAddr = 0x56,
+    CRI_SrcCrCbStride = 0x57,
+    
+    CRI_BCI_Power= 0x5f,
+    
+    CRI_PSCtrl=0xA0,
+    CRI_SSClrKeyCtrl=0xA1,
+    CRI_SSCtrl=0xA4,
+    CRI_SSChromUpBound=0xA5,
+    CRI_SSHoriScaleCtrl=0xA6,
+    CRI_SSClrAdj=0xA7,
+    CRI_SSBlendCtrl=0xA8,
+    CRI_PSFBAddr0=0xB0,
+    CRI_PSFBAddr1=0xB1,
+    CRI_PSStride=0xB2,
+    CRI_DB_LPB_Support=0xB3,
+    CRI_SSFBAddr0=0xB4,
+    CRI_SSFBAddr1=0xB5,
+    CRI_SSStride=0xB6,
+    CRI_SSOpaqueCtrl=0xB7,
+    CRI_SSVertScaleCtrl=0xB8,
+    CRI_SSVertInitValue=0xB9,
+    CRI_SSSrcLineCnt=0xBA,
+    CRI_FIFO_RAS_Ctrl=0xBB,
+    CRI_PSWinStartCoord=0xBC,
+    CRI_PSWinSize=0xBD,
+    CRI_SSWinStartCoord=0xBE,
+    CRI_SSWinSize=0xBF,
+    CRI_PSFIFOMon0=0xC0,
+    CRI_SSFIFOMon0=0xC1,
+    CRI_PSFIFOMon1=0xC2,
+    CRI_SSFIFOMon1=0xC3,
+    CRI_PSFBSize=0xC4,
+    CRI_SSFBSize=0xC5,
+    CRI_SSFBAddr2=0xC6,
+    /* 2D register starts at D0*/
+    CRI_CurrXY=0xD0,
+    CRI_DstXYorStep=0xD1 ,
+    CRI_LineErr=0xD2 ,
+    CRI_DrawCmd=0xD3,   /*kick off for image xfer*/
+    CRI_ShortStrkVecXfer=0xD4,
+    CRI_BackClr=0xD5,
+    CRI_ForeClr=0xD6,
+    CRI_BitPlaneWtMask=0xD7,
+    CRI_BitPlaneRdMask=0xD8,
+    CRI_ClrCmp=0xD9 ,
+    CRI_BackAndForeMix=0xDA ,
+    CRI_TopLeftSciss=0xDB ,
+    CRI_BotRightSciss=0xDC ,
+    CRI_PixOrMultiCtrl=0xDD ,
+    CRI_MultiCtrlOrRdSelct=0xDE ,
+    CRI_MinorOrMajorAxisCnt=0xDF ,
+    CRI_GlobalBmpDesc1=0xE0 ,
+    CRI_GlobalBmpDesc2=0xE1 ,
+    CRI_BurstPriBmpDesc1=0xE2 ,
+    CRI_BurstPriBmpDesc2=0xE3 ,
+    CRI_BurstSecBmpDesc1=0xE4 ,
+    CRI_BurstSecBmpDesc2=0xE5,
+    CRI_ImageDataPort=0xF8
+
+}CtrlRegIdx;
+
+/***********************
+        Fog Mode
+************************/
+typedef enum
+{
+  FGM_Z_FOG,  /*Table*/
+  FGM_V_FOG   /*Vertex*/
+} FogMode;
+
+/***********************
+  Texture
+************************/
+typedef enum
+{
+    TAM_Wrap,
+    TAM_Clamp,
+    TAM_Mirror
+} TexAddressModel;
+
+typedef enum
+{
+    TFT_S3TC4Bit,
+    TFT_Pal8Bit565,
+    TFT_Pal8Bit1555,
+    TFT_ARGB8888,
+    TFT_ARGB1555,
+    TFT_ARGB4444,
+    TFT_RGB565,
+    TFT_Pal8Bit4444,
+    TFT_S3TC4A4Bit,  /*like S3TC4Bit but with 4 bit alpha*/
+    TFT_S3TC4CA4Bit, /*like S3TC4Bit, but with 4 bit compressed alpha*/
+    TFT_S3TCL4,
+    TFT_S3TCA4L4,
+    TFT_L8,
+    TFT_A4L4,
+    TFT_I8,
+    TFT_A8
+} TexFmt;
+
+typedef enum
+{
+    TPS_64,
+    TPS_128,
+    TPS_192,
+    TPS_256
+} TexPaletteSize;
+
+#define MAX_MIPMAP_LOD_BIAS 255
+#define MIN_MIPMAP_LOD_BIAS -255
+
+typedef enum
+{
+  TFM_Point,              /*1 TPP*/
+  TFM_Bilin,              /*2 TPP*/
+  TFM_Reserved,
+  TFM_Trilin             /*16 TPP*/
+} TexFilterMode;
+
+
+#define TBC_Decal       0x00850410
+#define TBC_Modul       0x00850011
+#define TBC_DecalAlpha  0x00852A04
+#define TBC_ModulAlpha  0x00110011
+#define TBC_Copy        0x00840410
+#define TBC_CopyAlpha   0x00900405
+#define TBC_NoTexMap    0x00850405
+#define TBC_Blend0      0x00810004
+#define TBC_Blend1      0x00870e02
+#define TBC_BlendAlpha0 0x00040004
+#define TBC_BlendAlpha1 TBC_Blend1
+#define TBC_BlendInt0   0x00040004
+#define TBC_BlendInt1   0x01c20e02
+#define TBC_AddAlpha    0x19910c11
+#define TBC_Add         0x18110c11
+
+#define TBC_Decal1      0x00870410
+#define TBC_Modul1      0x00870013
+#define TBC_DecalAlpha1 0x00832A00
+#define TBC_ModulAlpha1 0x00130013
+#define TBC_NoTexMap1   0x00870407
+#define TBC_Copy1       0x00870400
+#define TBC_CopyAlpha1  0x00900400
+#define TBC_AddAlpha1   0x19930c13
+#define TBC_Add1        0x18130c13
+
+/*
+ * derived from TexBlendCtrl
+ */
+
+typedef enum
+{
+    TBC_UseSrc,
+    TBC_UseTex,
+    TBC_TexTimesSrc,
+    TBC_BlendTexWithSrc
+} TexBlendCtrlMode;
+
+/***********************
+        Draw Control
+************************/
+typedef enum
+{
+    BCM_Reserved,
+    BCM_None,
+    BCM_CW,
+    BCM_CCW
+} BackfaceCullingMode;
+
+typedef enum
+{
+    SAM_Zero,
+    SAM_One,
+    SAM_DstClr,
+    SAM_1DstClr,
+    SAM_SrcAlpha,
+    SAM_1SrcAlpha,
+    SAM_DstAlpha,
+    SAM_1DstAlpha
+} SrcAlphaBlendMode;
+
+/* -1 from state*/
+typedef enum
+{
+    DAM_Zero,
+    DAM_One,
+    DAM_SrcClr,
+    DAM_1SrcClr,
+    DAM_SrcAlpha,
+    DAM_1SrcAlpha,
+    DAM_DstAlpha,
+    DAM_1DstAlpha
+} DstAlphaBlendMode;
+
+/*
+ * stencil control
+ */
+
+typedef enum
+{
+    STENCIL_Keep,
+    STENCIL_Zero,
+    STENCIL_Equal,
+    STENCIL_IncClamp,
+    STENCIL_DecClamp,
+    STENCIL_Invert,
+    STENCIL_Inc,
+    STENCIL_Dec
+} StencilOp;
+
+/***************************************************************
+*** Bitfield Structures for Programming Interface **************
+***************************************************************/
+
+/**************************
+ Command Header Entry
+**************************/
+
+typedef struct {  /*for DrawIndexPrimitive command, vert0Idx is meaningful.*/
+    unsigned int vert0Idx:16;
+    unsigned int vertCnt:8;
+    unsigned int cont:1;
+    unsigned int type:2;   /*00=list, 01=strip, 10=fan, 11=reserved*/
+    unsigned int cmd:5;
+}Reg_DrawIndexPrimitive;
+
+typedef struct {  /*for DrawIndexPrimitive command, vert0Idx is meaningful.*/
+    unsigned int noW:1;
+    unsigned int noCd:1;
+    unsigned int noCs:1;
+    unsigned int noU:1;
+    unsigned int noV:1;
+    unsigned int noU2:1;
+    unsigned int noV2:1;
+
+    unsigned int reserved:9;
+    unsigned int vertCnt:8;
+    unsigned int cont:1;
+    unsigned int type:2;   /* 00=list, 01=strip, 10=fan, 11=reserved*/
+    unsigned int cmd:5;
+}Reg_DrawPrimitive;
+
+
+typedef struct {
+    unsigned int startRegIdx:8;
+    unsigned int reserved:8;
+    unsigned int regCnt:8;
+    unsigned int resvered1:1;
+    unsigned int lowEn:1;
+    unsigned int highEn:1;
+    unsigned int cmd:5;
+}Reg_SetRegister;
+
+typedef struct {
+    unsigned int reserved1:22;
+    unsigned int isPrimary:1;
+    unsigned int MIU_SYNC:1;
+    unsigned int reserved2:3;
+    unsigned int cmd:5;
+}Reg_QueuedPageFlip;
+
+typedef struct {
+    unsigned int reserved1:22;
+    unsigned int DIR:1;
+    unsigned int CTG:1; /*set to 0*/
+    unsigned int BPP:1;
+    unsigned int reserved2:1;
+    unsigned int cmd:5;
+}Reg_MasterImgXfer;
+
+typedef struct {
+    unsigned int PD:4;   /*PM=mono, PS=descriptor specified*/
+    unsigned int PT:1;
+    unsigned int SD:4;
+    unsigned int ST:1;
+    unsigned int DD:3;
+    unsigned int DC:2; /*DC=destination clip*/
+  unsigned int CS:1;  /*cs=color specified*/
+    unsigned int MIX3:8;
+    unsigned int XP:1;
+    unsigned int YP:1;
+    unsigned int LP:1;
+    unsigned int cmd:5;
+}Reg_2D;
+
+typedef struct {
+    unsigned int CodedBlkPattern:6;
+    unsigned int DCT_Type:1;
+    unsigned int MB_Type:2;
+    unsigned int MotionType:2;
+    unsigned int MB_Row:6;
+    unsigned int MB_Column:6;
+    unsigned int mv3:1;
+    unsigned int mv2:1;
+    unsigned int mv1:1;
+    unsigned int mv0:1;
+    unsigned int cmd:5;
+}Reg_MacroBlock;
+
+typedef struct {
+    unsigned int scanLnCnt:11;
+    unsigned int clkCnt:5;
+    unsigned int e3d:1;
+    unsigned int e2d:1;
+    unsigned int mau:1;
+    unsigned int veu:1;
+    unsigned int meuMit:1;
+    unsigned int meuSit:1;
+    unsigned int meuVx:1;
+    unsigned int meuMau:1;
+    unsigned int pageFlip:1;
+    unsigned int scanLn:1;
+    unsigned int clk:1;
+    unsigned int cmd:5;
+}Reg_Wait;
+
+typedef struct{
+    unsigned int reserved:27;
+    unsigned int cmd:5;
+}Reg_ScaledImgXfer  ;
+
+typedef struct{
+    unsigned int eventTag:16;
+    unsigned int reserved2:6;
+    unsigned int ET:1;
+    unsigned int INT:1;
+    unsigned int reserved1:3;
+    unsigned int cmd:5;
+}Reg_UpdtShadowStat;
+
+typedef union {
+    Reg_DrawPrimitive  vert;
+    Reg_DrawIndexPrimitive  vertIdx;
+    Reg_SetRegister    set;
+    Reg_QueuedPageFlip pageFlip;
+    Reg_MasterImgXfer  masterImgXfer;
+    Reg_ScaledImgXfer  scaledImgXfer;
+    Reg_UpdtShadowStat updtShadow;
+    Reg_MacroBlock     macroBlk;
+    Reg_2D             cmd2D;
+    Reg_Wait           wait;
+}CmdHeaderUnion;
+
+
+/*frank 2001/11/14 add BCI write macros*/
+/* Registers not used in the X server
+ */
+
+#define SAVAGE_NOP_ID           0x2094
+#define SAVAGE_NOP_ID_MASK        ((1<<22)-1)
+
+
+/* 3D instructions
+ */
+
+/*          Draw Primitive Control */
+
+
+#define SAVAGE_HW_NO_Z          (1<<0)
+#define SAVAGE_HW_NO_W          (1<<1)
+#define SAVAGE_HW_NO_CD         (1<<2)
+#define SAVAGE_HW_NO_CS         (1<<3)
+#define SAVAGE_HW_NO_U0         (1<<4)
+#define SAVAGE_HW_NO_V0         (1<<5)
+#define SAVAGE_HW_NO_UV0        ((1<<4) | (1<<5))
+#define SAVAGE_HW_NO_U1         (1<<6)
+#define SAVAGE_HW_NO_V1         (1<<7)
+#define SAVAGE_HW_NO_UV1        ((1<<6) | (1<<7))
+#define SAVAGE_HW_SKIPFLAGS     0x000000ff
+
+#endif
+
+
+
+
+
+
diff --git a/src/mesa/drivers/dri/savage/savage_init.h b/src/mesa/drivers/dri/savage/savage_init.h
new file mode 100644
index 0000000000..bfd3077d70
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savage_init.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _SAVAGE_INIT_H_
+#define _SAVAGE_INIT_H_
+
+#include <sys/time.h>
+#include "dri_util.h"
+#include "main/mtypes.h"
+
+#include "xmlconfig.h"
+
+typedef struct {
+   drm_handle_t handle;
+   drmSize size;
+   char *map;
+} savageRegion, *savageRegionPtr;
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+
+   int cpp;			/* for front and back buffers */
+   int zpp;
+
+   int agpMode;
+
+   unsigned int bufferSize;
+
+#if 0 
+   int bitsPerPixel;
+#endif
+   unsigned int frontFormat;
+   unsigned int frontOffset;
+   unsigned int backOffset;
+   unsigned int depthOffset;
+
+   unsigned int aperturePitch;
+
+   unsigned int textureOffset[SAVAGE_NR_TEX_HEAPS];
+   unsigned int textureSize[SAVAGE_NR_TEX_HEAPS];
+   unsigned int logTextureGranularity[SAVAGE_NR_TEX_HEAPS];
+   drmAddress texVirtual[SAVAGE_NR_TEX_HEAPS];
+  
+   __DRIscreen *driScrnPriv;
+
+   savageRegion aperture;
+   savageRegion agpTextures;
+
+   drmBufMapPtr bufs;
+
+   unsigned int sarea_priv_offset;
+
+   /* Configuration cache with default values for all contexts */
+   driOptionCache optionCache;
+} savageScreenPrivate;
+
+
+#include "savagecontext.h"
+
+extern void savageGetLock( savageContextPtr imesa, GLuint flags );
+extern void savageXMesaSetClipRects(savageContextPtr imesa);
+
+
+#define GET_DISPATCH_AGE( imesa ) imesa->sarea->last_dispatch
+#define GET_ENQUEUE_AGE( imesa ) imesa->sarea->last_enqueue
+
+
+/* Lock the hardware and validate our state.  
+ */
+#define LOCK_HARDWARE( imesa )				\
+  do {							\
+    char __ret=0;					\
+    DRM_CAS(imesa->driHwLock, imesa->hHWContext,	\
+	    (DRM_LOCK_HELD|imesa->hHWContext), __ret);	\
+    if (__ret)						\
+        savageGetLock( imesa, 0 );			\
+  } while (0)
+
+
+
+/* Unlock the hardware using the global current context 
+ */
+#define UNLOCK_HARDWARE(imesa)					\
+    DRM_UNLOCK(imesa->driFd, imesa->driHwLock, imesa->hHWContext);
+
+
+/* This is the wrong way to do it, I'm sure.  Otherwise the drm
+ * bitches that I've already got the heavyweight lock.  At worst,
+ * this is 3 ioctls.  The best solution probably only gets me down 
+ * to 2 ioctls in the worst case.
+ */
+#define LOCK_HARDWARE_QUIESCENT( imesa ) do {	\
+   LOCK_HARDWARE( imesa );			\
+   savageRegetLockQuiescent( imesa );		\
+} while(0)
+
+/* The following definitions are copied from savage_regs.h in the XFree86
+ * driver. They are unlikely to change. If they do we need to keep them in
+ * sync. */
+
+#define S3_SAVAGE3D_SERIES(chip)  ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX))
+
+#define S3_SAVAGE4_SERIES(chip)  ((chip==S3_SAVAGE4)            \
+                                  || (chip==S3_PROSAVAGE)       \
+                                  || (chip==S3_TWISTER)         \
+                                  || (chip==S3_PROSAVAGEDDR))
+
+#define	S3_SAVAGE_MOBILE_SERIES(chip)	((chip==S3_SAVAGE_MX) || (chip==S3_SUPERSAVAGE))
+
+#define S3_SAVAGE_SERIES(chip)    ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE2000))
+
+#define S3_MOBILE_TWISTER_SERIES(chip)   ((chip==S3_TWISTER)    \
+                                          ||(chip==S3_PROSAVAGEDDR))
+
+/* Chip tags.  These are used to group the adapters into 
+ * related families.
+ */
+
+enum S3CHIPTAGS {
+    S3_UNKNOWN = 0,
+    S3_SAVAGE3D,
+    S3_SAVAGE_MX,
+    S3_SAVAGE4,
+    S3_PROSAVAGE,
+    S3_TWISTER,
+    S3_PROSAVAGEDDR,
+    S3_SUPERSAVAGE,
+    S3_SAVAGE2000,
+    S3_LAST
+};
+
+#endif
diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c
new file mode 100644
index 0000000000..cbdc9c87ee
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savage_xmesa.c
@@ -0,0 +1,1016 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <stdio.h>
+
+#include "main/context.h"
+#include "main/context.h"
+#include "main/matrix.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/simple_list.h"
+
+#include "utils.h"
+
+#include "main/extensions.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo.h"
+
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+
+#include "savagedd.h"
+#include "savagestate.h"
+#include "savagetex.h"
+#include "savagespan.h"
+#include "savagetris.h"
+#include "savageioctl.h"
+#include "savage_bci.h"
+
+#include "savage_dri.h"
+
+#include "drirenderbuffer.h"
+#include "texmem.h"
+
+#define need_GL_EXT_secondary_color
+#include "main/remap_helper.h"
+
+#include "xmlpool.h"
+
+/* Driver-specific options
+ */
+#define SAVAGE_ENABLE_VDMA(def) \
+DRI_CONF_OPT_BEGIN(enable_vdma,bool,def) \
+	DRI_CONF_DESC(en,"Use DMA for vertex transfers") \
+	DRI_CONF_DESC(de,"Benutze DMA für Vertextransfers") \
+DRI_CONF_OPT_END
+#define SAVAGE_ENABLE_FASTPATH(def) \
+DRI_CONF_OPT_BEGIN(enable_fastpath,bool,def) \
+	DRI_CONF_DESC(en,"Use fast path for unclipped primitives") \
+	DRI_CONF_DESC(de,"Schneller Codepfad für ungeschnittene Polygone") \
+DRI_CONF_OPT_END
+#define SAVAGE_SYNC_FRAMES(def) \
+DRI_CONF_OPT_BEGIN(sync_frames,bool,def) \
+	DRI_CONF_DESC(en,"Synchronize with graphics hardware after each frame") \
+	DRI_CONF_DESC(de,"Synchronisiere nach jedem Frame mit Grafikhardware") \
+DRI_CONF_OPT_END
+
+/* Configuration
+ */
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+        DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER)
+        DRI_CONF_FLOAT_DEPTH(false)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_PERFORMANCE
+        SAVAGE_ENABLE_VDMA(true)
+        SAVAGE_ENABLE_FASTPATH(true)
+        SAVAGE_SYNC_FRAMES(false)
+        DRI_CONF_MAX_TEXTURE_UNITS(2,1,2)
+    	DRI_CONF_TEXTURE_HEAPS(DRI_CONF_TEXTURE_HEAPS_ALL)
+        DRI_CONF_FORCE_S3TC_ENABLE(false)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 10;
+
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { "api",   DEBUG_VERBOSE_API },
+    { "tex",   DEBUG_VERBOSE_TEX },
+    { "verb",  DEBUG_VERBOSE_MSG },
+    { "dma",   DEBUG_DMA },
+    { "state", DEBUG_STATE },
+    { NULL,    0 }
+};
+#ifndef SAVAGE_DEBUG
+int SAVAGE_DEBUG = 0;
+#endif
+
+
+/*For time caculating test*/
+#if defined(DEBUG_TIME) && DEBUG_TIME
+struct timeval tv_s,tv_f;
+unsigned long time_sum=0;
+struct timeval tv_s1,tv_f1;
+#endif
+
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { NULL,                                NULL }
+};
+
+static const struct dri_extension s4_extensions[] =
+{
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { NULL,                                NULL }
+};
+
+extern struct tnl_pipeline_stage _savage_texnorm_stage;
+extern struct tnl_pipeline_stage _savage_render_stage;
+
+static const struct tnl_pipeline_stage *savage_pipeline[] = {
+
+   &_tnl_vertex_transform_stage,
+   &_tnl_normal_transform_stage,
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage,
+   &_tnl_texgen_stage,
+   &_tnl_texture_transform_stage,
+   &_savage_texnorm_stage,
+   &_savage_render_stage,
+   &_tnl_render_stage,
+   0,
+};
+
+
+PUBLIC const __DRIextension *savageScreenExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    &driReadDrawableExtension,
+};
+
+static GLboolean
+savageInitDriver(__DRIscreen *sPriv)
+{
+  savageScreenPrivate *savageScreen;
+  SAVAGEDRIPtr         gDRIPriv = (SAVAGEDRIPtr)sPriv->pDevPriv;
+
+   if (sPriv->devPrivSize != sizeof(SAVAGEDRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(SAVAGEDRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   savageScreen = (savageScreenPrivate *)malloc(sizeof(savageScreenPrivate));
+   if (!savageScreen)
+      return GL_FALSE;
+
+   savageScreen->driScrnPriv = sPriv;
+   sPriv->private = (void *)savageScreen;
+
+   savageScreen->chipset=gDRIPriv->chipset; 
+   savageScreen->width=gDRIPriv->width;
+   savageScreen->height=gDRIPriv->height;
+   savageScreen->mem=gDRIPriv->mem;
+   savageScreen->cpp=gDRIPriv->cpp;
+   savageScreen->zpp=gDRIPriv->zpp;
+
+   savageScreen->agpMode=gDRIPriv->agpMode;
+
+   savageScreen->bufferSize=gDRIPriv->bufferSize;
+
+   if (gDRIPriv->cpp == 4) 
+       savageScreen->frontFormat = DV_PF_8888;
+   else
+       savageScreen->frontFormat = DV_PF_565;
+   savageScreen->frontOffset=gDRIPriv->frontOffset;
+   savageScreen->backOffset = gDRIPriv->backOffset; 
+   savageScreen->depthOffset=gDRIPriv->depthOffset;
+
+   savageScreen->textureOffset[SAVAGE_CARD_HEAP] = 
+                                   gDRIPriv->textureOffset;
+   savageScreen->textureSize[SAVAGE_CARD_HEAP] = 
+                                   gDRIPriv->textureSize;
+   savageScreen->logTextureGranularity[SAVAGE_CARD_HEAP] = 
+                                   gDRIPriv->logTextureGranularity;
+
+   savageScreen->textureOffset[SAVAGE_AGP_HEAP] = 
+                                   gDRIPriv->agpTextureHandle;
+   savageScreen->textureSize[SAVAGE_AGP_HEAP] = 
+                                   gDRIPriv->agpTextureSize;
+   savageScreen->logTextureGranularity[SAVAGE_AGP_HEAP] =
+                                   gDRIPriv->logAgpTextureGranularity;
+
+   savageScreen->agpTextures.handle = gDRIPriv->agpTextureHandle;
+   savageScreen->agpTextures.size   = gDRIPriv->agpTextureSize;
+   if (gDRIPriv->agpTextureSize) {
+       if (drmMap(sPriv->fd, 
+		  savageScreen->agpTextures.handle,
+		  savageScreen->agpTextures.size,
+		  (drmAddress *)&(savageScreen->agpTextures.map)) != 0) {
+	   free(savageScreen);
+	   sPriv->private = NULL;
+	   return GL_FALSE;
+       }
+   } else
+       savageScreen->agpTextures.map = NULL;
+
+   savageScreen->texVirtual[SAVAGE_CARD_HEAP] = 
+             (drmAddress)(((GLubyte *)sPriv->pFB)+gDRIPriv->textureOffset);
+   savageScreen->texVirtual[SAVAGE_AGP_HEAP] = 
+                        (drmAddress)(savageScreen->agpTextures.map);
+
+   savageScreen->aperture.handle = gDRIPriv->apertureHandle;
+   savageScreen->aperture.size   = gDRIPriv->apertureSize;
+   savageScreen->aperturePitch   = gDRIPriv->aperturePitch;
+   if (drmMap(sPriv->fd, 
+	      savageScreen->aperture.handle, 
+	      savageScreen->aperture.size, 
+	      (drmAddress *)&savageScreen->aperture.map) != 0) 
+   {
+      free(savageScreen);
+      sPriv->private = NULL;
+      return GL_FALSE;
+   }
+
+   savageScreen->bufs = drmMapBufs(sPriv->fd);
+
+   savageScreen->sarea_priv_offset = gDRIPriv->sarea_priv_offset;
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&savageScreen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   sPriv->extensions = savageScreenExtensions;
+
+#if 0
+   savageDDFastPathInit();
+   savageDDTrifuncInit();
+   savageDDSetupInit();
+#endif
+   return GL_TRUE;
+}
+
+/* Accessed by dlsym from dri_mesa_init.c
+ */
+static void
+savageDestroyScreen(__DRIscreen *sPriv)
+{
+   savageScreenPrivate *savageScreen = (savageScreenPrivate *)sPriv->private;
+
+   if (savageScreen->bufs)
+       drmUnmapBufs(savageScreen->bufs);
+
+   /* free all option information */
+   driDestroyOptionInfo (&savageScreen->optionCache);
+
+   free(savageScreen);
+   sPriv->private = NULL;
+}
+
+static GLboolean
+savageCreateContext( gl_api api,
+		     const __GLcontextModes *mesaVis,
+		     __DRIcontext *driContextPriv,
+		     void *sharedContextPrivate )
+{
+   GLcontext *ctx, *shareCtx;
+   savageContextPtr imesa;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   struct dd_function_table functions;
+   savageScreenPrivate *savageScreen = (savageScreenPrivate *)sPriv->private;
+   drm_savage_sarea_t *saPriv=(drm_savage_sarea_t *)(((char*)sPriv->pSAREA)+
+						 savageScreen->sarea_priv_offset);
+   int textureSize[SAVAGE_NR_TEX_HEAPS];
+   int i;
+   imesa = (savageContextPtr)calloc(1, sizeof(savageContext));
+   if (!imesa) {
+      return GL_FALSE;
+   }
+
+   /* Init default driver functions then plug in savage-specific texture
+    * functions that are needed as early as during context creation. */
+   _mesa_init_driver_functions( &functions );
+   savageDDInitTextureFuncs( &functions );
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((savageContextPtr) sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+   ctx = _mesa_create_context(mesaVis, shareCtx, &functions, imesa);
+   if (!ctx) {
+      free(imesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = imesa;
+
+   imesa->cmdBuf.size = SAVAGE_CMDBUF_SIZE;
+   imesa->cmdBuf.base = imesa->cmdBuf.write =
+       malloc(SAVAGE_CMDBUF_SIZE * sizeof(drm_savage_cmd_header_t));
+   if (!imesa->cmdBuf.base)
+       return GL_FALSE;
+
+   /* Parse configuration files */
+   driParseConfigFiles (&imesa->optionCache, &savageScreen->optionCache,
+                        sPriv->myNum, "savage");
+
+   imesa->float_depth = driQueryOptionb(&imesa->optionCache, "float_depth") &&
+       savageScreen->chipset >= S3_SAVAGE4;
+   imesa->no_rast = driQueryOptionb(&imesa->optionCache, "no_rast");
+
+#if 0
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 3.0;
+   ctx->Const.MaxLineWidthAA = 3.0;
+   ctx->Const.LineWidthGranularity = 1.0;
+#endif
+
+   ctx->Const.MaxDrawBuffers = 1;
+
+   /* Dri stuff
+    */
+   imesa->hHWContext = driContextPriv->hHWContext;
+   imesa->driFd = sPriv->fd;
+   imesa->driHwLock = &sPriv->pSAREA->lock;
+   
+   imesa->savageScreen = savageScreen;
+   imesa->driScreen = sPriv;
+   imesa->sarea = saPriv;
+   imesa->glBuffer = NULL;
+   
+   /* DMA buffer */
+
+   for(i=0;i<5;i++)
+   {
+       imesa->apertureBase[i] = (GLubyte *)savageScreen->aperture.map + 
+	   0x01000000 * i;
+   }
+   
+   imesa->aperturePitch = savageScreen->aperturePitch;
+
+   /* change texHeap initialize to support two kind of texture heap*/
+   /* here is some parts of initialization, others in InitDriver() */
+    
+   (void) memset( imesa->textureHeaps, 0, sizeof( imesa->textureHeaps ) );
+   make_empty_list( & imesa->swapped );
+
+   textureSize[SAVAGE_CARD_HEAP] = savageScreen->textureSize[SAVAGE_CARD_HEAP];
+   textureSize[SAVAGE_AGP_HEAP] = savageScreen->textureSize[SAVAGE_AGP_HEAP];
+   imesa->lastTexHeap = savageScreen->texVirtual[SAVAGE_AGP_HEAP] ? 2 : 1;
+   switch(driQueryOptioni (&imesa->optionCache, "texture_heaps")) {
+   case DRI_CONF_TEXTURE_HEAPS_CARD: /* only use card memory, if available */
+       if (textureSize[SAVAGE_CARD_HEAP])
+	   imesa->lastTexHeap = 1;
+       break;
+   case DRI_CONF_TEXTURE_HEAPS_GART: /* only use gart memory, if available */
+       if (imesa->lastTexHeap == 2 && textureSize[SAVAGE_AGP_HEAP])
+	   textureSize[SAVAGE_CARD_HEAP] = 0;
+       break;
+   /*default: Nothing to do, use all available memory. */
+   }
+   
+   for (i = 0; i < imesa->lastTexHeap; i++) {
+       imesa->textureHeaps[i] = driCreateTextureHeap(
+	   i, imesa,
+	   textureSize[i],
+	   11,					/* 2^11 = 2k alignment */
+	   SAVAGE_NR_TEX_REGIONS,
+	   (drmTextureRegionPtr)imesa->sarea->texList[i],
+	    &imesa->sarea->texAge[i],
+	    &imesa->swapped,
+	    sizeof( savageTexObj ),
+	    (destroy_texture_object_t *) savageDestroyTexObj );
+       /* If textureSize[i] == 0 textureHeaps[i] is NULL. This can happen
+	* if there is not enough card memory for a card texture heap. */
+       if (imesa->textureHeaps[i])
+	   driSetTextureSwapCounterLocation( imesa->textureHeaps[i],
+					     & imesa->c_textureSwaps );
+   }
+   imesa->texture_depth = driQueryOptioni (&imesa->optionCache,
+					   "texture_depth");
+   if (imesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB)
+       imesa->texture_depth = ( savageScreen->cpp == 4 ) ?
+	   DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
+
+   if (savageScreen->chipset >= S3_SAVAGE4)
+       ctx->Const.MaxTextureUnits = 2;
+   else
+       ctx->Const.MaxTextureUnits = 1;
+   if (driQueryOptioni(&imesa->optionCache, "texture_units") <
+       ctx->Const.MaxTextureUnits)
+       ctx->Const.MaxTextureUnits =
+	   driQueryOptioni(&imesa->optionCache, "texture_units");
+   ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+
+   driCalculateMaxTextureLevels( imesa->textureHeaps,
+				 imesa->lastTexHeap,
+				 & ctx->Const,
+				 4,
+				 11, /* max 2D texture size is 2048x2048 */
+				 0,  /* 3D textures unsupported. */
+				 0,  /* cube textures unsupported. */
+				 0,  /* texture rectangles unsupported. */
+				 12,
+				 GL_FALSE,
+				 0 );
+   if (ctx->Const.MaxTextureLevels <= 6) { /*spec requires at least 64x64*/
+       __driUtilMessage("Not enough texture memory. "
+			"Falling back to indirect rendering.");
+       free(imesa);
+       return GL_FALSE;
+   }
+
+   imesa->hw_stencil = mesaVis->stencilBits && mesaVis->depthBits == 24;
+   imesa->depth_scale = (imesa->savageScreen->zpp == 2) ?
+       (1.0F/0xffff):(1.0F/0xffffff);
+
+   imesa->bufferSize = savageScreen->bufferSize;
+   imesa->dmaVtxBuf.total = 0;
+   imesa->dmaVtxBuf.used = 0;
+   imesa->dmaVtxBuf.flushed = 0;
+
+   imesa->clientVtxBuf.total = imesa->bufferSize / 4;
+   imesa->clientVtxBuf.used = 0;
+   imesa->clientVtxBuf.flushed = 0;
+   imesa->clientVtxBuf.buf = (uint32_t *)malloc(imesa->bufferSize);
+
+   imesa->vtxBuf = &imesa->clientVtxBuf;
+
+   imesa->firstElt = -1;
+
+   /* Uninitialized vertex format. Force setting the vertex state in
+    * savageRenderStart.
+    */
+   imesa->vertex_size = 0;
+
+   /* Utah stuff
+    */
+   imesa->new_state = ~0;
+   imesa->new_gl_state = ~0;
+   imesa->RenderIndex = ~0;
+   imesa->dirty = ~0;
+   imesa->lostContext = GL_TRUE;
+   imesa->CurrentTexObj[0] = 0;
+   imesa->CurrentTexObj[1] = 0;
+
+   _mesa_meta_init( ctx );
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, savage_pipeline );
+
+   imesa->enable_fastpath = driQueryOptionb(&imesa->optionCache,
+					    "enable_fastpath");
+   /* DRM versions before 2.1.3 would only render triangle lists. ELTS
+    * support was added in 2.2.0. */
+   if (imesa->enable_fastpath && sPriv->drm_version.minor < 2) {
+      fprintf (stderr,
+	       "*** Disabling fast path because your DRM version is buggy "
+	       "or doesn't\n*** support ELTS. You need at least Savage DRM "
+	       "version 2.2.\n");
+      imesa->enable_fastpath = GL_FALSE;
+   }
+
+   if (!savageScreen->bufs || savageScreen->chipset == S3_SUPERSAVAGE)
+       imesa->enable_vdma = GL_FALSE;
+   else
+       imesa->enable_vdma = driQueryOptionb(&imesa->optionCache, "enable_vdma");
+
+   imesa->sync_frames = driQueryOptionb(&imesa->optionCache, "sync_frames");
+
+   /* Configure swrast to match hardware characteristics:
+    */
+   _tnl_allow_pixel_fog( ctx, GL_FALSE );
+   _tnl_allow_vertex_fog( ctx, GL_TRUE );
+   _swrast_allow_pixel_fog( ctx, GL_FALSE );
+   _swrast_allow_vertex_fog( ctx, GL_TRUE );
+
+   ctx->DriverCtx = (void *) imesa;
+   imesa->glCtx = ctx;
+
+#ifndef SAVAGE_DEBUG
+   SAVAGE_DEBUG = driParseDebugString( getenv( "SAVAGE_DEBUG" ),
+				       debug_control );
+#endif
+
+   driInitExtensions( ctx, card_extensions, GL_TRUE );
+   if (savageScreen->chipset >= S3_SAVAGE4)
+       driInitExtensions( ctx, s4_extensions, GL_FALSE );
+   if (ctx->Mesa_DXTn ||
+       driQueryOptionb (&imesa->optionCache, "force_s3tc_enable")) {
+       _mesa_enable_extension( ctx, "GL_S3_s3tc" );
+       if (savageScreen->chipset >= S3_SAVAGE4)
+	   /* This extension needs DXT3 and DTX5 support in hardware.
+	    * Not available on Savage3D/MX/IX. */
+	   _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" );
+   }
+
+   savageDDInitStateFuncs( ctx );
+   savageDDInitSpanFuncs( ctx );
+   savageDDInitDriverFuncs( ctx );
+   savageDDInitIoctlFuncs( ctx );
+   savageInitTriFuncs( ctx );
+
+   savageDDInitState( imesa );
+
+   driContextPriv->driverPrivate = (void *) imesa;
+
+   return GL_TRUE;
+}
+
+static void
+savageDestroyContext(__DRIcontext *driContextPriv)
+{
+   savageContextPtr imesa = (savageContextPtr) driContextPriv->driverPrivate;
+   GLuint i;
+
+   assert (imesa); /* should never be NULL */
+   if (imesa) {
+      savageFlushVertices(imesa);
+      savageReleaseIndexedVerts(imesa);
+      savageFlushCmdBuf(imesa, GL_TRUE); /* release DMA buffer */
+      WAIT_IDLE_EMPTY(imesa);
+
+      for (i = 0; i < imesa->lastTexHeap; i++)
+	 driDestroyTextureHeap(imesa->textureHeaps[i]);
+
+      free(imesa->cmdBuf.base);
+      free(imesa->clientVtxBuf.buf);
+
+      _mesa_meta_free( imesa->glCtx );
+
+      _swsetup_DestroyContext(imesa->glCtx );
+      _tnl_DestroyContext( imesa->glCtx );
+      _vbo_DestroyContext( imesa->glCtx );
+      _swrast_DestroyContext( imesa->glCtx );
+
+      /* free the Mesa context */
+      imesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(imesa->glCtx);
+
+      /* no longer use vertex_dma_buf*/
+      free(imesa);
+   }
+}
+
+
+static GLboolean
+savageCreateBuffer( __DRIscreen *driScrnPriv,
+		    __DRIdrawable *driDrawPriv,
+		    const __GLcontextModes *mesaVis,
+		    GLboolean isPixmap)
+{
+   savageScreenPrivate *screen = (savageScreenPrivate *) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      GLboolean swStencil = mesaVis->stencilBits > 0 && mesaVis->depthBits != 24;
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+      /*
+       * XXX: this value needs to be set according to the config file
+       * setting.  But we don't get that until we create a rendering
+       * context!!!!
+       */
+      GLboolean float_depth = GL_FALSE;
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 (GLubyte *) screen->aperture.map
+                                 + 0x01000000 * TARGET_FRONT,
+                                 screen->cpp,
+                                 screen->frontOffset, screen->aperturePitch,
+                                 driDrawPriv);
+         savageSetSpanFunctions(frontRb, mesaVis, float_depth);
+         assert(frontRb->Base.Data);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888,
+                                 (GLubyte *) screen->aperture.map
+                                 + 0x01000000 * TARGET_BACK,
+                                 screen->cpp,
+                                 screen->backOffset, screen->aperturePitch,
+                                 driDrawPriv);
+         savageSetSpanFunctions(backRb, mesaVis, float_depth);
+         assert(backRb->Base.Data);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z16,
+                                 (GLubyte *) screen->aperture.map
+                                 + 0x01000000 * TARGET_DEPTH,
+                                 screen->zpp,
+                                 screen->depthOffset, screen->aperturePitch,
+                                 driDrawPriv);
+         savageSetSpanFunctions(depthRb, mesaVis, float_depth);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_S8_Z24,
+                                 (GLubyte *) screen->aperture.map
+                                 + 0x01000000 * TARGET_DEPTH,
+                                 screen->zpp,
+                                 screen->depthOffset, screen->aperturePitch,
+                                 driDrawPriv);
+         savageSetSpanFunctions(depthRb, mesaVis, float_depth);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      if (mesaVis->stencilBits > 0 && !swStencil) {
+         driRenderbuffer *stencilRb
+            = driNewRenderbuffer(MESA_FORMAT_S8,
+                                 (GLubyte *) screen->aperture.map
+                                 + 0x01000000 * TARGET_DEPTH,
+                                 screen->zpp,
+                                 screen->depthOffset, screen->aperturePitch,
+                                 driDrawPriv);
+         savageSetSpanFunctions(stencilRb, mesaVis, float_depth);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   swStencil,
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+static void
+savageDestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+#if 0
+void XMesaSwapBuffers(__DRIdrawable *driDrawPriv)
+{
+   /* XXX should do swap according to the buffer, not the context! */
+   savageContextPtr imesa = savageCtx; 
+
+   FLUSH_VB( imesa->glCtx, "swap buffers" );
+   savageSwapBuffers(imesa);
+}
+#endif
+
+
+void savageXMesaSetClipRects(savageContextPtr imesa)
+{
+   __DRIdrawable *dPriv = imesa->driDrawable;
+
+   if ((dPriv->numBackClipRects == 0)
+       || (imesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT)) {
+      imesa->numClipRects = dPriv->numClipRects;
+      imesa->pClipRects = dPriv->pClipRects;
+      imesa->drawX = dPriv->x;
+      imesa->drawY = dPriv->y;
+   } else {
+      imesa->numClipRects = dPriv->numBackClipRects;
+      imesa->pClipRects = dPriv->pBackClipRects;
+      imesa->drawX = dPriv->backX;
+      imesa->drawY = dPriv->backY;
+   }
+
+   savageCalcViewport( imesa->glCtx );
+}
+
+
+static void savageXMesaWindowMoved( savageContextPtr imesa ) 
+{
+   __DRIdrawable *const drawable = imesa->driDrawable;
+   __DRIdrawable *const readable = imesa->driReadable;
+
+   if (0)
+      fprintf(stderr, "savageXMesaWindowMoved\n\n");
+
+   savageXMesaSetClipRects(imesa);
+
+   driUpdateFramebufferSize(imesa->glCtx, drawable);
+   if (drawable != readable) {
+      driUpdateFramebufferSize(imesa->glCtx, readable);
+   }
+}
+
+
+static GLboolean
+savageUnbindContext(__DRIcontext *driContextPriv)
+{
+   savageContextPtr savage = (savageContextPtr) driContextPriv->driverPrivate;
+   if (savage)
+      savage->dirty = ~0;
+
+   return GL_TRUE;
+}
+
+#if 0
+static GLboolean
+savageOpenFullScreen(__DRIcontext *driContextPriv)
+{
+    
+  
+    
+    if (driContextPriv) {
+      savageContextPtr imesa = (savageContextPtr) driContextPriv->driverPrivate;
+      imesa->IsFullScreen = GL_TRUE;
+      imesa->backup_frontOffset = imesa->savageScreen->frontOffset;
+      imesa->backup_backOffset = imesa->savageScreen->backOffset;
+      imesa->backup_frontBitmapDesc = imesa->savageScreen->frontBitmapDesc;
+      imesa->savageScreen->frontBitmapDesc = imesa->savageScreen->backBitmapDesc;      
+      imesa->toggle = TARGET_BACK;
+   }
+
+    return GL_TRUE;
+}
+
+static GLboolean
+savageCloseFullScreen(__DRIcontext *driContextPriv)
+{
+    
+    if (driContextPriv) {
+      savageContextPtr imesa = (savageContextPtr) driContextPriv->driverPrivate;
+      WAIT_IDLE_EMPTY(imesa);
+      imesa->IsFullScreen = GL_FALSE;   
+      imesa->savageScreen->frontOffset = imesa->backup_frontOffset;
+      imesa->savageScreen->backOffset = imesa->backup_backOffset;
+      imesa->savageScreen->frontBitmapDesc = imesa->backup_frontBitmapDesc;
+   }
+    return GL_TRUE;
+}
+#endif
+
+static GLboolean
+savageMakeCurrent(__DRIcontext *driContextPriv,
+		  __DRIdrawable *driDrawPriv,
+		  __DRIdrawable *driReadPriv)
+{
+   if (driContextPriv) {
+      savageContextPtr imesa
+         = (savageContextPtr) driContextPriv->driverPrivate;
+      struct gl_framebuffer *drawBuffer
+         = (GLframebuffer *) driDrawPriv->driverPrivate;
+      struct gl_framebuffer *readBuffer
+         = (GLframebuffer *) driReadPriv->driverPrivate;
+      driRenderbuffer *frontRb = (driRenderbuffer *)
+         drawBuffer->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+      driRenderbuffer *backRb = (driRenderbuffer *)
+         drawBuffer->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+
+      assert(frontRb->Base.Data);
+      if (imesa->glCtx->Visual.doubleBufferMode) {
+         assert(backRb->Base.Data);
+      }
+
+      imesa->driReadable = driReadPriv;
+      imesa->driDrawable = driDrawPriv;
+      imesa->dirty = ~0;
+      
+      _mesa_make_current(imesa->glCtx, drawBuffer, readBuffer);
+      
+      savageXMesaWindowMoved( imesa );
+   }
+   else 
+   {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+   return GL_TRUE;
+}
+
+
+void savageGetLock( savageContextPtr imesa, GLuint flags ) 
+{
+   __DRIdrawable *const drawable = imesa->driDrawable;
+   __DRIdrawable *const readable = imesa->driReadable;
+   __DRIscreen *sPriv = imesa->driScreen;
+   drm_savage_sarea_t *sarea = imesa->sarea;
+   int me = imesa->hHWContext;
+   int stamp = drawable->lastStamp; 
+   int heap;
+   unsigned int timestamp = 0;
+
+  
+
+   /* We know there has been contention.
+    */
+   drmGetLock(imesa->driFd, imesa->hHWContext, flags);	
+
+
+   /* Note contention for throttling hint
+    */
+   imesa->any_contend = 1;
+
+   /* If the window moved, may need to set a new cliprect now.
+    *
+    * NOTE: This releases and regains the hw lock, so all state
+    * checking must be done *after* this call:
+    */
+   DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+   if (drawable != readable) {
+      DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
+   }
+
+
+   /* If we lost context, need to dump all registers to hardware.
+    * Note that we don't care about 2d contexts, even if they perform
+    * accelerated commands, so the DRI locking in the X server is even
+    * more broken than usual.
+    */
+   if (sarea->ctxOwner != me) {
+      imesa->dirty |= (SAVAGE_UPLOAD_LOCAL |
+		       SAVAGE_UPLOAD_GLOBAL |
+		       SAVAGE_UPLOAD_FOGTBL |
+		       SAVAGE_UPLOAD_TEX0 |
+		       SAVAGE_UPLOAD_TEX1 |
+		       SAVAGE_UPLOAD_TEXGLOBAL);
+      imesa->lostContext = GL_TRUE;
+      sarea->ctxOwner = me;
+   }
+
+   for (heap = 0; heap < imesa->lastTexHeap; ++heap) {
+      /* If a heap was changed, update its timestamp. Do this before
+       * DRI_AGE_TEXTURES updates the local_age. */
+      if (imesa->textureHeaps[heap] &&
+	  imesa->textureHeaps[heap]->global_age[0] >
+	  imesa->textureHeaps[heap]->local_age) {
+	 if (timestamp == 0)
+	    timestamp = savageEmitEventLocked(imesa, 0);
+	 imesa->textureHeaps[heap]->timestamp = timestamp;
+      }
+      DRI_AGE_TEXTURES( imesa->textureHeaps[heap] );
+   }
+
+   if (drawable->lastStamp != stamp) {
+      driUpdateFramebufferSize(imesa->glCtx, drawable);
+      savageXMesaWindowMoved( imesa );
+   }
+}
+
+static const  __DRIconfig **
+savageFillInModes( __DRIscreen *psp,
+		   unsigned pixel_bits, unsigned depth_bits,
+		   unsigned stencil_bits, GLboolean have_back_buffer )
+{
+    __DRIconfig **configs;
+    __GLcontextModes * m;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    GLenum fb_format;
+    GLenum fb_type;
+    int i;
+
+    /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+     * enough to add support.  Basically, if a context is created with an
+     * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+     * will never be used.
+     *
+     * FK: What about drivers that don't use page flipping? Could they
+     * just expose GLX_SWAP_COPY_OML?
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */
+    };
+
+    uint8_t depth_bits_array[2];
+    uint8_t stencil_bits_array[2];
+    uint8_t msaa_samples_array[1];
+
+    depth_bits_array[0] = depth_bits;
+    depth_bits_array[1] = depth_bits;
+    
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.  It will be a sw fallback, but some apps won't
+     * care about that.
+     */
+    stencil_bits_array[0] = 0;
+    stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    msaa_samples_array[0] = 0;
+
+    depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 2 : 1;
+    back_buffer_factor  = (have_back_buffer) ? 2 : 1;
+
+    if ( pixel_bits == 16 ) {
+        fb_format = GL_RGB;
+        fb_type = GL_UNSIGNED_SHORT_5_6_5;
+    }
+    else {
+        fb_format = GL_BGR;
+        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+    }
+
+    configs = driCreateConfigs(fb_format, fb_type,
+			       depth_bits_array, stencil_bits_array,
+			       depth_buffer_factor,
+			       back_buffer_modes, back_buffer_factor,
+                               msaa_samples_array, 1, GL_TRUE);
+    if (configs == NULL) {
+	fprintf( stderr, "[%s:%u] Error creating FBConfig!\n",
+		 __func__, __LINE__ );
+	return NULL;
+    }
+
+    /* Mark the visual as slow if there are "fake" stencil bits.
+     */
+    for (i = 0; configs[i]; i++) {
+	m = &configs[i]->modes;
+	if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) {
+	    m->visualRating = GLX_SLOW_CONFIG;
+	}
+    }
+
+    return (const __DRIconfig **) configs;
+}
+
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **
+savageInitScreen(__DRIscreen *psp)
+{
+   static const __DRIversion ddx_expected = { 2, 0, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 2, 1, 0 };
+   SAVAGEDRIPtr dri_priv = (SAVAGEDRIPtr)psp->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions2( "Savage",
+				      &psp->dri_version, & dri_expected,
+				      &psp->ddx_version, & ddx_expected,
+				      &psp->drm_version, & drm_expected ) )
+      return NULL;
+
+   if (!savageInitDriver(psp))
+       return NULL;
+
+   return savageFillInModes( psp,
+			     dri_priv->cpp*8,
+			     (dri_priv->cpp == 2) ? 16 : 24,
+			     (dri_priv->cpp == 2) ? 0  : 8,
+			     (dri_priv->backOffset != dri_priv->depthOffset) );
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   savageInitScreen, 
+   savageDestroyScreen,
+   savageCreateContext,
+   savageDestroyContext,
+   savageCreateBuffer,
+   savageDestroyBuffer,
+   savageSwapBuffers,
+   savageMakeCurrent,
+   savageUnbindContext
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/savage/savagecontext.h b/src/mesa/drivers/dri/savage/savagecontext.h
new file mode 100644
index 0000000000..ba1e6e1e1a
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagecontext.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+
+#ifndef SAVAGECONTEXT_INC
+#define SAVAGECONTEXT_INC
+
+typedef struct savage_context_t savageContext;
+typedef struct savage_context_t *savageContextPtr;
+typedef struct savage_texture_object_t *savageTextureObjectPtr;
+
+#include "dri_util.h"
+#include "main/mtypes.h"
+#include "xf86drm.h"
+#include "drm.h"
+#include "savage_drm.h"
+#include "savage_init.h"
+#include "savage_3d_reg.h"
+#include "main/mm.h"
+#include "tnl/t_vertex.h"
+
+#include "texmem.h"
+
+#include "xmlconfig.h"
+
+/* Reasons to fallback on all primitives.
+ */
+#define SAVAGE_FALLBACK_TEXTURE        0x1
+#define SAVAGE_FALLBACK_DRAW_BUFFER    0x2
+#define SAVAGE_FALLBACK_READ_BUFFER    0x4
+#define SAVAGE_FALLBACK_COLORMASK      0x8  
+#define SAVAGE_FALLBACK_SPECULAR       0x10 
+#define SAVAGE_FALLBACK_LOGICOP        0x20
+/*frank 2001/11/12 add the stencil fallbak*/
+#define SAVAGE_FALLBACK_STENCIL        0x40
+#define SAVAGE_FALLBACK_RENDERMODE     0x80
+#define SAVAGE_FALLBACK_BLEND_EQ       0x100
+#define SAVAGE_FALLBACK_NORAST         0x200
+#define SAVAGE_FALLBACK_PROJ_TEXTURE   0x400
+
+
+#define HW_CULL    1
+
+/* for savagectx.new_state - manage GL->driver state changes
+ */
+#define SAVAGE_NEW_TEXTURE 0x1
+#define SAVAGE_NEW_CULL    0x2
+
+/* What needs to be changed for the current vertex dma buffer?
+ * This will go away!
+ */
+#define SAVAGE_UPLOAD_LOCAL	0x1  /* DrawLocalCtrl (S4) or 
+					DrawCtrl and ZBufCtrl (S3D) */
+#define SAVAGE_UPLOAD_TEX0	0x2  /* texture unit 0 */
+#define SAVAGE_UPLOAD_TEX1	0x4  /* texture unit 1 (S4 only) */
+#define SAVAGE_UPLOAD_FOGTBL	0x8  /* fog table */
+#define SAVAGE_UPLOAD_GLOBAL	0x10 /* most global regs */
+#define SAVAGE_UPLOAD_TEXGLOBAL 0x20 /* TexBlendColor (S4 only) */
+
+/*define the max numer of vertex in vertex buf*/
+#define SAVAGE_MAX_VERTEXS 0x10000
+
+/* Don't make it too big. We don't want to buffer up a whole frame
+ * that would force the application to wait later. */
+#define SAVAGE_CMDBUF_SIZE 1024
+
+/* Use the templated vertex formats:
+ */
+#define TAG(x) savage##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+typedef void (*savage_tri_func)( savageContextPtr, savageVertex *,
+				 savageVertex *, savageVertex * );
+typedef void (*savage_line_func)( savageContextPtr,
+				  savageVertex *, savageVertex * );
+typedef void (*savage_point_func)( savageContextPtr, savageVertex * );
+
+
+/**************************************************************
+ ****************    enums for chip IDs ************************
+ **************************************************************/
+
+#define CHIP_S3GX3MS1NB             0x8A25
+#define CHIP_S3GX3MS1NBK            0x8A26
+#define CHIP_S3TWISTER              0x8D01
+#define CHIP_S3TWISTERK             0x8D02
+#define CHIP_S3TWISTER_P4M          0x8D04
+#define CHIP_S3PARAMOUNT128         0x8C22              /*SuperSavage 128/MX*/
+#define CHIP_S3TRISTAR128SDR        0x8C2A              /*SuperSavage 128/IX*/
+#define CHIP_S3TRISTAR64SDRM7       0x8C2C              /*SuperSavage/IX M7 Package*/
+#define CHIP_S3TRISTAR64SDR         0x8C2E              /*SuperSavage/IX*/
+#define CHIP_S3TRISTAR64CDDR        0x8C2F              /*SuperSavage/IXC DDR*/
+
+#define IS_SAVAGE(imesa) (imesa->savageScreen->deviceID == CHIP_S3GX3MS1NB ||	\
+			imesa->savageScreen->deviceID == CHIP_S3GX3MS1NBK || \
+                        imesa->savageScreen->deviceID == CHIP_S3TWISTER || \
+                        imesa->savageScreen->deviceID == CHIP_S3TWISTERK || \
+                        imesa->savageScreen->deviceID == CHIP_S3TWISTER_P4M || \
+                        imesa->savageScreen->deviceID == CHIP_S3PARAMOUNT128 || \
+                        imesa->savageScreen->deviceID == CHIP_S3TRISTAR128SDR || \
+                        imesa->savageScreen->deviceID == CHIP_S3TRISTAR64SDRM7 || \
+                        imesa->savageScreen->deviceID == CHIP_S3TRISTAR64SDR || \
+			imesa->savageScreen->deviceID == CHIP_S3TRISTAR64CDDR )
+
+
+struct savage_vtxbuf_t {
+    GLuint total, used, flushed; /* in 32 bit units */
+    GLuint idx;		/* for DMA buffers */
+    uint32_t *buf;
+};
+
+struct savage_cmdbuf_t {
+    GLuint size; /* size in qwords */
+    drm_savage_cmd_header_t *base;  /* initial state starts here */
+    drm_savage_cmd_header_t *start; /* drawing/state commands start here */
+    drm_savage_cmd_header_t *write; /* append stuff here */
+};
+
+struct savage_elt_t {
+    GLuint n;				/* number of elts currently allocated */
+    drm_savage_cmd_header_t *cmd;	/* the indexed drawing command */
+};
+
+
+struct savage_context_t {
+    GLint refcount;
+
+    GLcontext *glCtx;
+
+    int lastTexHeap;
+    driTexHeap *textureHeaps[SAVAGE_NR_TEX_HEAPS];
+    driTextureObject swapped;
+
+    driTextureObject *CurrentTexObj[2];
+
+    /* Hardware state
+     */
+
+    savageRegisters regs, oldRegs, globalRegMask;
+
+    /* Manage our own state */
+    GLuint new_state; 
+    GLuint new_gl_state;
+    GLboolean ptexHack;
+
+    /* Command buffer */
+    struct savage_cmdbuf_t cmdBuf;
+
+    /* Elt book-keeping */
+    struct savage_elt_t elts;
+    GLint firstElt;
+
+    /* Vertex buffers */
+    struct savage_vtxbuf_t dmaVtxBuf, clientVtxBuf;
+    struct savage_vtxbuf_t *vtxBuf;
+
+    /* aperture base */
+    GLubyte *apertureBase[5];
+    GLuint aperturePitch;
+    /* Manage hardware state */
+    GLuint dirty;
+    GLboolean lostContext;
+    GLuint bTexEn1;
+    /* One of the few bits of hardware state that can't be calculated
+     * completely on the fly:
+     */
+    GLuint LcsCullMode;
+    GLuint texEnvColor;
+
+   /* Vertex state 
+    */
+   GLuint vertex_size;
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+   char *verts;			/* points to tnl->clipspace.vertex_buf */
+
+   /* Rasterization state 
+    */
+   GLuint SetupNewInputs;
+   GLuint SetupIndex;
+   GLuint RenderIndex;
+   
+   GLuint hw_primitive;
+   GLenum raster_primitive;
+   GLenum render_primitive;
+
+   GLuint skip;
+   GLubyte HwPrim;
+   GLuint HwVertexSize;
+
+   /* Fallback rasterization functions 
+    */
+   savage_point_func draw_point;
+   savage_line_func draw_line;
+   savage_tri_func draw_tri;
+
+    /* Funny mesa mirrors
+     */
+    GLuint MonoColor;
+    GLuint ClearColor;
+    GLfloat depth_scale;
+    GLfloat hw_viewport[16];
+    /* DRI stuff */
+    GLuint bufferSize;
+
+    GLframebuffer *glBuffer;
+   
+    /* Two flags to keep track of fallbacks. */
+    GLuint Fallback;
+
+    GLuint needClip;
+
+    /* These refer to the current draw (front vs. back) buffer:
+     */
+    int drawX;   		/* origin of drawable in draw buffer */
+    int drawY;
+    GLuint numClipRects;		/* cliprects for that buffer */
+    GLint currentClip;
+    drm_clip_rect_t *pClipRects;
+
+    /*  use this bit to support single/double buffer */
+    GLuint IsDouble;
+    /*  use this to indicate Fullscreen mode */   
+    GLuint IsFullScreen; /* FIXME - open/close fullscreen is gone, is this needed? */
+    GLuint backup_frontOffset;
+    GLuint backup_backOffset;
+    GLuint backup_frontBitmapDesc;
+    GLuint toggle;
+    GLuint backup_streamFIFO;
+    GLuint NotFirstFrame;
+   
+    GLboolean inSwap;
+    GLuint lastSwap;
+    GLuint ctxAge;
+    GLuint dirtyAge;
+    GLuint any_contend;		/* throttle me harder */
+
+    /* Scissor state needs to be mirrored so buffered commands can be
+     * emitted with the old scissor state when scissor state changes.
+     */
+    struct {
+	GLboolean enabled;
+	GLint x, y;
+	GLsizei w, h;
+    } scissor;
+
+    drm_context_t hHWContext;
+    drm_hw_lock_t *driHwLock;
+    GLuint driFd;
+
+    __DRIdrawable *driDrawable;
+    __DRIdrawable *driReadable;
+
+    __DRIscreen *driScreen;
+    savageScreenPrivate *savageScreen; 
+    drm_savage_sarea_t *sarea;
+
+    GLboolean hw_stencil;
+
+    /* Performance counters
+     */
+    GLuint c_textureSwaps;
+
+    /* Configuration cache
+     */
+    driOptionCache optionCache;
+    GLint texture_depth;
+    GLboolean no_rast;
+    GLboolean float_depth;
+    GLboolean enable_fastpath;
+    GLboolean enable_vdma;
+    GLboolean sync_frames;
+};
+
+#define SAVAGE_CONTEXT(ctx) ((savageContextPtr)(ctx->DriverCtx))
+
+/* To remove all debugging, make sure SAVAGE_DEBUG is defined as a
+ * preprocessor symbol, and equal to zero.  
+ */
+#ifndef SAVAGE_DEBUG
+extern int SAVAGE_DEBUG;
+#endif
+
+#define DEBUG_FALLBACKS      0x001
+#define DEBUG_VERBOSE_API    0x002
+#define DEBUG_VERBOSE_TEX    0x004
+#define DEBUG_VERBOSE_MSG    0x008
+#define DEBUG_DMA            0x010
+#define DEBUG_STATE          0x020
+
+#define TARGET_FRONT    0x0
+#define TARGET_BACK     0x1
+#define TARGET_DEPTH    0x2
+
+#define SUBPIXEL_X -0.5
+#define SUBPIXEL_Y -0.375
+
+#endif
diff --git a/src/mesa/drivers/dri/savage/savagedd.c b/src/mesa/drivers/dri/savage/savagedd.c
new file mode 100644
index 0000000000..bbf49aec27
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagedd.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "main/mtypes.h"
+#include "main/framebuffer.h"
+
+#include <stdio.h>
+
+#include "main/mm.h"
+
+#include "savagedd.h"
+#include "savagestate.h"
+#include "savagetex.h"
+#include "savagecontext.h"
+
+#include "utils.h"
+
+
+#define DRIVER_DATE "20061110"
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+
+static const GLubyte *savageDDGetString( GLcontext *ctx, GLenum name )
+{
+   static char *cardNames[S3_LAST] = {
+       "Unknown",
+       "Savage3D",
+       "Savage/MX/IX",
+       "Savage4",
+       "ProSavage",
+       "Twister",
+       "ProSavageDDR",
+       "SuperSavage",
+       "Savage2000"
+   };
+   static char buffer[128];
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   savageScreenPrivate *screen = imesa->savageScreen;
+   enum S3CHIPTAGS chipset = screen->chipset;
+   unsigned offset;
+
+   if (chipset < S3_SAVAGE3D || chipset >= S3_LAST)
+      chipset = S3_UNKNOWN; /* should not happen */
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *)"S3 Graphics Inc.";
+   case GL_RENDERER:
+      offset = driGetRendererString( buffer, cardNames[chipset], DRIVER_DATE,
+				     screen->agpMode );
+      return (GLubyte *)buffer;
+   default:
+      return 0;
+   }
+}
+#if 0
+static GLint savageGetParameteri(const GLcontext *ctx, GLint param)
+{
+   switch (param) {
+   case DD_HAVE_HARDWARE_FOG:
+      return 1;
+   default:
+      return 0;
+   }
+}
+#endif
+
+
+void savageDDInitDriverFuncs( GLcontext *ctx )
+{
+   ctx->Driver.GetString = savageDDGetString;
+}
diff --git a/src/mesa/drivers/dri/savage/savagedd.h b/src/mesa/drivers/dri/savage/savagedd.h
new file mode 100644
index 0000000000..698a8d5de9
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagedd.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SAVAGEDD_INC
+#define SAVAGEDD_INC
+
+#include "main/context.h"
+
+void savageDDInitDriverFuncs( GLcontext *ctx );
+#endif
diff --git a/src/mesa/drivers/dri/savage/savageioctl.c b/src/mesa/drivers/dri/savage/savageioctl.c
new file mode 100644
index 0000000000..9e181ce3be
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savageioctl.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/dd.h"
+#include "main/context.h"
+#include "main/colormac.h"
+#include "main/mm.h"
+#include "swrast/swrast.h"
+
+#include "savagecontext.h"
+#include "savageioctl.h"
+#include "savagestate.h"
+#include "savagespan.h"
+
+#include "drm.h"
+#include <sys/timeb.h>
+
+#define DEPTH_SCALE_16 ((1<<16)-1)
+#define DEPTH_SCALE_24 ((1<<24)-1)
+
+
+void savageGetDMABuffer( savageContextPtr imesa )
+{
+   int idx = 0;
+   int size = 0;
+   drmDMAReq dma;
+   int retcode;
+   drmBufPtr buf;
+
+   assert (imesa->savageScreen->bufs);
+
+   if (SAVAGE_DEBUG & DEBUG_DMA)
+      fprintf(stderr,  "Getting dma buffer\n");
+
+   dma.context = imesa->hHWContext;
+   dma.send_count = 0;
+   dma.send_list = NULL;
+   dma.send_sizes = NULL;
+   dma.flags = 0;
+   dma.request_count = 1;
+   dma.request_size = imesa->bufferSize;
+   dma.request_list = &idx;
+   dma.request_sizes = &size;
+   dma.granted_count = 0;
+
+
+   if (SAVAGE_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "drmDMA (get) ctx %d count %d size 0x%x\n",
+	   dma.context, dma.request_count,
+	   dma.request_size);
+
+   while (1) {
+      retcode = drmDMA(imesa->driFd, &dma);
+
+      if (SAVAGE_DEBUG & DEBUG_DMA)
+	 fprintf(stderr, "retcode %d sz %d idx %d count %d\n",
+		 retcode,
+		 dma.request_sizes[0],
+		 dma.request_list[0],
+		 dma.granted_count);
+
+      if (retcode == 0 &&
+	  dma.request_sizes[0] &&
+	  dma.granted_count)
+	 break;
+
+      if (SAVAGE_DEBUG & DEBUG_DMA)
+	 fprintf(stderr, "\n\nflush");
+   }
+
+   buf = &(imesa->savageScreen->bufs->list[idx]);
+
+   if (SAVAGE_DEBUG & DEBUG_DMA)
+      fprintf(stderr,
+	   "drmDMA (get) returns size[0] 0x%x idx[0] %d\n"
+	   "dma_buffer now: buf idx: %d size: %d used: %d addr %p\n",
+	   dma.request_sizes[0], dma.request_list[0],
+	   buf->idx, buf->total,
+	   buf->used, buf->address);
+
+   imesa->dmaVtxBuf.total = buf->total / 4;
+   imesa->dmaVtxBuf.used = 0;
+   imesa->dmaVtxBuf.flushed = 0;
+   imesa->dmaVtxBuf.idx = buf->idx;
+   imesa->dmaVtxBuf.buf = (uint32_t *)buf->address;
+
+   if (SAVAGE_DEBUG & DEBUG_DMA)
+      fprintf(stderr, "finished getbuffer\n");
+}
+
+#if 0
+/* Still keeping this around because it demonstrates page flipping and
+ * automatic z-clear. */
+static void savage_BCI_clear(GLcontext *ctx, drm_savage_clear_t *pclear)
+{
+	savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+	int nbox = imesa->sarea->nbox;
+	drm_clip_rect_t *pbox = imesa->sarea->boxes;
+        int i;
+
+	
+      	if (nbox > SAVAGE_NR_SAREA_CLIPRECTS)
+     		nbox = SAVAGE_NR_SAREA_CLIPRECTS;
+
+	for (i = 0 ; i < nbox ; i++, pbox++) {
+		unsigned int x = pbox->x1;
+		unsigned int y = pbox->y1;
+		unsigned int width = pbox->x2 - x;
+		unsigned int height = pbox->y2 - y;
+ 		uint32_t *bciptr;
+
+		if (pbox->x1 > pbox->x2 ||
+		    pbox->y1 > pbox->y2 ||
+		    pbox->x2 > imesa->savageScreen->width ||
+		    pbox->y2 > imesa->savageScreen->height)
+			continue;
+
+	   	if ( pclear->flags & SAVAGE_FRONT ) {
+		        bciptr = savageDMAAlloc (imesa, 8);
+			WRITE_CMD((bciptr) , 0x4BCC8C00,uint32_t);
+			WRITE_CMD((bciptr) , imesa->savageScreen->frontOffset,uint32_t);
+			WRITE_CMD((bciptr) , imesa->savageScreen->frontBitmapDesc,uint32_t);
+			WRITE_CMD((bciptr) , pclear->clear_color,uint32_t);
+			WRITE_CMD((bciptr) , (y <<16) | x,uint32_t);
+			WRITE_CMD((bciptr) , (height << 16) | width,uint32_t);
+			savageDMACommit (imesa, bciptr);
+		}
+		if ( pclear->flags & SAVAGE_BACK ) {
+		        bciptr = savageDMAAlloc (imesa, 8);
+			WRITE_CMD((bciptr) , 0x4BCC8C00,uint32_t);
+			WRITE_CMD((bciptr) , imesa->savageScreen->backOffset,uint32_t);
+			WRITE_CMD((bciptr) , imesa->savageScreen->backBitmapDesc,uint32_t);
+			WRITE_CMD((bciptr) , pclear->clear_color,uint32_t);
+			WRITE_CMD((bciptr) , (y <<16) | x,uint32_t);
+			WRITE_CMD((bciptr) , (height << 16) | width,uint32_t);
+			savageDMACommit (imesa, bciptr);
+		}
+		
+		if ( pclear->flags & (SAVAGE_DEPTH |SAVAGE_STENCIL) ) {
+		        uint32_t writeMask = 0x0;
+		        if(imesa->hw_stencil)
+		        {        
+		            if(pclear->flags & SAVAGE_STENCIL)
+		            {
+		          
+		                 writeMask |= 0xFF000000;
+		            }
+		            if(pclear->flags & SAVAGE_DEPTH)
+		            {
+		                 writeMask |= 0x00FFFFFF;
+		            }
+                        }
+		        if(imesa->IsFullScreen && imesa->NotFirstFrame &&
+			   imesa->savageScreen->chipset >= S3_SAVAGE4)
+		        {
+		            imesa->regs.s4.zBufCtrl.ni.autoZEnable = GL_TRUE;
+                            imesa->regs.s4.zBufCtrl.ni.frameID =
+				~imesa->regs.s4.zBufCtrl.ni.frameID;
+                            
+                            imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+		        }
+		        else
+		        {
+		            if(imesa->IsFullScreen)
+		                imesa->NotFirstFrame = GL_TRUE;
+		                
+			    if(imesa->hw_stencil)
+			    {
+				bciptr = savageDMAAlloc (imesa, 10);
+			        if(writeMask != 0xFFFFFFFF)
+			        {
+                                    WRITE_CMD((bciptr) , 0x960100D7,uint32_t);
+                                    WRITE_CMD((bciptr) , writeMask,uint32_t);
+                                }
+                            }
+			    else
+			    {
+				bciptr = savageDMAAlloc (imesa, 6);
+			    }
+
+			    WRITE_CMD((bciptr) , 0x4BCC8C00,uint32_t);
+			    WRITE_CMD((bciptr) , imesa->savageScreen->depthOffset,uint32_t);
+			    WRITE_CMD((bciptr) , imesa->savageScreen->depthBitmapDesc,uint32_t);
+			    WRITE_CMD((bciptr) , pclear->clear_depth,uint32_t);
+			    WRITE_CMD((bciptr) , (y <<16) | x,uint32_t);
+			    WRITE_CMD((bciptr) , (height << 16) | width,uint32_t);
+			    if(imesa->hw_stencil)
+			    {
+			        if(writeMask != 0xFFFFFFFF)
+			        {
+			           WRITE_CMD((bciptr) , 0x960100D7,uint32_t);
+                                   WRITE_CMD((bciptr) , 0xFFFFFFFF,uint32_t);  
+			        }
+			    }
+			    savageDMACommit (imesa, bciptr);
+			}
+		}
+	}
+	/* FK: Make sure that the clear stuff is emitted. Otherwise a
+	   software fallback may get overwritten by a delayed clear. */
+	savageDMAFlush (imesa);
+}
+
+static void savage_BCI_swap(savageContextPtr imesa)
+{
+    int nbox = imesa->sarea->nbox;
+    drm_clip_rect_t *pbox = imesa->sarea->boxes;
+    int i;
+    volatile uint32_t *bciptr;
+    
+    if (nbox > SAVAGE_NR_SAREA_CLIPRECTS)
+        nbox = SAVAGE_NR_SAREA_CLIPRECTS;
+    savageDMAFlush (imesa);
+    
+    if(imesa->IsFullScreen)
+    { /* full screen*/
+        unsigned int tmp0;
+        tmp0 = imesa->savageScreen->frontOffset; 
+        imesa->savageScreen->frontOffset = imesa->savageScreen->backOffset;
+        imesa->savageScreen->backOffset = tmp0;
+        
+        if(imesa->toggle == TARGET_BACK)
+            imesa->toggle = TARGET_FRONT;
+        else
+            imesa->toggle = TARGET_BACK; 
+        
+        driFlipRenderbuffers(imesa->glCtx->DrawBuffer,
+                             imesa->toggle != TARGET_FRONT);
+
+        imesa->regs.s4.destCtrl.ni.offset = imesa->savageScreen->backOffset>>11;
+        imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+        bciptr = SAVAGE_GET_BCI_POINTER(imesa,3);
+        *(bciptr) = 0x960100B0;
+        *(bciptr) = (imesa->savageScreen->frontOffset); 
+        *(bciptr) = 0xA0000000;
+    } 
+    
+    else
+    {  /* Use bitblt copy from back to front buffer*/
+        
+        for (i = 0 ; i < nbox; i++, pbox++)
+        {
+            unsigned int w = pbox->x2 - pbox->x1;
+            unsigned int h = pbox->y2 - pbox->y1;
+            
+            if (pbox->x1 > pbox->x2 ||
+                pbox->y1 > pbox->y2 ||
+                pbox->x2 > imesa->savageScreen->width ||
+                pbox->y2 > imesa->savageScreen->height)
+                continue;
+
+            bciptr = SAVAGE_GET_BCI_POINTER(imesa,6);
+            
+            *(bciptr) = 0x4BCC00C0;
+            
+            *(bciptr) = imesa->savageScreen->backOffset;
+            *(bciptr) = imesa->savageScreen->backBitmapDesc;
+            *(bciptr) = (pbox->y1 <<16) | pbox->x1;   /*x0, y0*/
+            *(bciptr) = (pbox->y1 <<16) | pbox->x1;
+            *(bciptr) = (h << 16) | w;
+        }
+        
+    }
+}
+#endif
+
+
+static GLboolean intersect_rect( drm_clip_rect_t *out,
+				 const drm_clip_rect_t *a,
+				 const drm_clip_rect_t *b )
+{
+   *out = *a;
+   if (b->x1 > out->x1) out->x1 = b->x1;
+   if (b->y1 > out->y1) out->y1 = b->y1;
+   if (b->x2 < out->x2) out->x2 = b->x2;
+   if (b->y2 < out->y2) out->y2 = b->y2;
+
+   return ((out->x1 < out->x2) && (out->y1 < out->y2));
+}
+
+
+static GLuint savageIntersectClipRects(drm_clip_rect_t *dest,
+				       const drm_clip_rect_t *src,
+				       GLuint nsrc,
+				       const drm_clip_rect_t *clip)
+{
+    GLuint i, ndest;
+
+    for (i = 0, ndest = 0; i < nsrc; ++i, ++src) {
+	if (intersect_rect(dest, src, clip)) {
+	    dest++;
+	    ndest++;
+	}
+    }
+
+    return ndest;
+}
+
+
+static void savageDDClear( GLcontext *ctx, GLbitfield mask )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+   GLuint colorMask, depthMask, clearColor, clearDepth, flags;
+   GLint cx = ctx->DrawBuffer->_Xmin;
+   GLint cy = ctx->DrawBuffer->_Ymin;
+   GLint cw = ctx->DrawBuffer->_Xmax - cx;
+   GLint ch = ctx->DrawBuffer->_Ymax - cy;
+
+   /* XXX FIX ME: the cx,cy,cw,ch vars are currently ignored! */
+   (void) ch;
+   (void) cw;
+
+   if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+       fprintf (stderr, "%s\n", __FUNCTION__);
+
+   clearColor = imesa->ClearColor;
+   if (imesa->float_depth) {
+       if (imesa->savageScreen->zpp == 2)
+	   clearDepth = savageEncodeFloat16(1.0 - ctx->Depth.Clear);
+       else
+	   clearDepth = savageEncodeFloat24(1.0 - ctx->Depth.Clear);
+   } else {
+       if (imesa->savageScreen->zpp == 2)
+	   clearDepth = (GLuint) ((1.0 - ctx->Depth.Clear) * DEPTH_SCALE_16);
+       else
+	   clearDepth = (GLuint) ((1.0 - ctx->Depth.Clear) * DEPTH_SCALE_24);
+   }
+
+   colorMask = 0;
+   depthMask = 0;
+   switch (imesa->savageScreen->cpp) {
+   case 2:
+       colorMask = PACK_COLOR_565(ctx->Color.ColorMask[0][0],
+				  ctx->Color.ColorMask[0][1],
+				  ctx->Color.ColorMask[0][2]);
+       break;
+   case 4:
+       colorMask = PACK_COLOR_8888(ctx->Color.ColorMask[0][3],
+				   ctx->Color.ColorMask[0][2],
+				   ctx->Color.ColorMask[0][1],
+				   ctx->Color.ColorMask[0][0]);
+       break;
+   }
+
+   flags = 0;
+
+   if (mask & BUFFER_BIT_FRONT_LEFT) {
+      flags |= SAVAGE_FRONT;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if (mask & BUFFER_BIT_BACK_LEFT) {
+      flags |= SAVAGE_BACK;
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if ((mask & BUFFER_BIT_DEPTH) && ctx->Depth.Mask) {
+      flags |= SAVAGE_DEPTH;
+      depthMask |=
+	  (imesa->savageScreen->zpp == 2) ? 0xffffffff : 0x00ffffff;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+   
+   if((mask & BUFFER_BIT_STENCIL) && imesa->hw_stencil)
+   {
+      flags |= SAVAGE_DEPTH;
+      depthMask |= 0xff000000;
+      mask &= ~BUFFER_BIT_STENCIL;
+   }
+
+   savageFlushVertices(imesa);
+
+   if (flags) {
+       GLboolean depthCleared = GL_FALSE;
+       if (flags & (SAVAGE_FRONT|SAVAGE_BACK)) {
+	   drm_savage_cmd_header_t *cmd;
+	   cmd = savageAllocCmdBuf(imesa, sizeof(drm_savage_cmd_header_t));
+	   cmd[0].clear0.cmd = SAVAGE_CMD_CLEAR;
+	   if ((flags & SAVAGE_DEPTH) &&
+	       clearDepth == clearColor && depthMask == colorMask) {
+	       cmd[0].clear0.flags = flags;
+	       depthCleared = GL_TRUE;
+	   } else
+	       cmd[0].clear0.flags = flags & (SAVAGE_FRONT|SAVAGE_BACK);
+	   cmd[1].clear1.mask = colorMask;
+	   cmd[1].clear1.value = clearColor;
+       }
+
+       if ((flags & SAVAGE_DEPTH) && !depthCleared) {
+	   drm_savage_cmd_header_t *cmd;
+	   cmd = savageAllocCmdBuf(imesa, sizeof(drm_savage_cmd_header_t));
+	   cmd[0].clear0.cmd = SAVAGE_CMD_CLEAR;
+	   cmd[0].clear0.flags = SAVAGE_DEPTH;
+	   cmd[1].clear1.mask = depthMask;
+	   cmd[1].clear1.value = clearDepth;
+       }
+   }
+
+   if (mask) 
+      _swrast_Clear( ctx, mask );
+}
+
+/*
+ * Copy the back buffer to the front buffer. 
+ */
+void savageSwapBuffers( __DRIdrawable *dPriv )
+{
+   savageContextPtr imesa;
+
+   if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+       fprintf (stderr, "%s\n================================\n", __FUNCTION__);
+
+   assert(dPriv);
+   assert(dPriv->driContextPriv);
+   assert(dPriv->driContextPriv->driverPrivate);
+
+   imesa = (savageContextPtr) dPriv->driContextPriv->driverPrivate;
+   if (imesa->IsDouble)
+       _mesa_notifySwapBuffers( imesa->glCtx );
+
+   FLUSH_BATCH(imesa);
+
+   if (imesa->sync_frames)
+       imesa->lastSwap = savageEmitEvent( imesa, 0 );
+
+   if (imesa->lastSwap != 0)
+       savageWaitEvent( imesa, imesa->lastSwap );
+
+   {
+       drm_savage_cmd_header_t *cmd = savageAllocCmdBuf(imesa, 0);
+       cmd->cmd.cmd = SAVAGE_CMD_SWAP;
+       imesa->inSwap = GL_TRUE; /* ignore scissors in savageFlushCmdBuf */
+       savageFlushCmdBuf(imesa, GL_FALSE);
+       imesa->inSwap = GL_FALSE;
+   }
+
+   if (!imesa->sync_frames)
+       /* don't sync, but limit the lag to one frame. */
+       imesa->lastSwap = savageEmitEvent( imesa, 0 );
+}
+
+unsigned int savageEmitEventLocked( savageContextPtr imesa, unsigned int flags )
+{
+    drm_savage_event_emit_t event;
+    int ret;
+    event.count = 0;
+    event.flags = flags;
+    ret = drmCommandWriteRead( imesa->driFd, DRM_SAVAGE_BCI_EVENT_EMIT,
+			       &event, sizeof(event) );
+    if (ret) {
+	fprintf (stderr, "emit event returned %d\n", ret);
+	exit (1);
+    }
+    return event.count;
+}
+unsigned int savageEmitEvent( savageContextPtr imesa, unsigned int flags )
+{
+    unsigned int ret;
+    LOCK_HARDWARE( imesa );
+    ret = savageEmitEventLocked( imesa, flags );
+    UNLOCK_HARDWARE( imesa );
+    return ret;
+}
+
+
+void savageWaitEvent( savageContextPtr imesa, unsigned int count )
+{
+    drm_savage_event_wait_t event;
+    int ret;
+    event.count = count;
+    event.flags = 0;
+    ret = drmCommandWriteRead( imesa->driFd, DRM_SAVAGE_BCI_EVENT_WAIT,
+			       &event, sizeof(event) );
+    if (ret) {
+	fprintf (stderr, "wait event returned %d\n", ret);
+	exit (1);
+    }
+}
+
+
+void savageFlushVertices( savageContextPtr imesa )
+{
+    struct savage_vtxbuf_t *buffer = imesa->vtxBuf;
+
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+	fprintf (stderr, "%s\n", __FUNCTION__);
+
+    if (!buffer->total)
+	return;
+
+    if (buffer->used > buffer->flushed) {
+	drm_savage_cmd_header_t *cmd;
+	/* State must be updated "per primitive" because hardware
+	 * culling must be disabled for unfilled primitives, points
+	 * and lines. */
+	savageEmitChangedState (imesa);
+	cmd = savageAllocCmdBuf(imesa, 0);
+	cmd->prim.cmd = buffer == &imesa->dmaVtxBuf ?
+	    SAVAGE_CMD_DMA_PRIM : SAVAGE_CMD_VB_PRIM;
+	cmd->prim.prim = imesa->HwPrim;
+	cmd->prim.skip = imesa->skip;
+	cmd->prim.start = buffer->flushed / imesa->HwVertexSize;
+	cmd->prim.count = buffer->used / imesa->HwVertexSize - cmd->prim.start;
+	buffer->flushed = buffer->used;
+    }
+}
+
+void savageFlushCmdBufLocked( savageContextPtr imesa, GLboolean discard )
+{
+    __DRIdrawable *dPriv = imesa->driDrawable;
+
+    if (!imesa->dmaVtxBuf.total)
+	discard = GL_FALSE;
+
+    /* complete indexed drawing commands */
+    savageFlushElts(imesa);
+
+    if (imesa->cmdBuf.write != imesa->cmdBuf.start || discard) {
+	drm_savage_cmdbuf_t cmdbuf;
+	drm_savage_cmd_header_t *start;
+	int ret;
+
+	/* If we lost the context we must restore the initial state (at
+	 * the start of the command buffer). */
+	if (imesa->lostContext) {
+	    start = imesa->cmdBuf.base;
+	    imesa->lostContext = GL_FALSE;
+	} else
+	    start = imesa->cmdBuf.start;
+
+	if ((SAVAGE_DEBUG & DEBUG_DMA) && discard)
+	    fprintf (stderr, "Discarding DMA buffer, used=%u\n",
+		     imesa->dmaVtxBuf.used);
+
+	cmdbuf.dma_idx = imesa->dmaVtxBuf.idx;
+	cmdbuf.discard = discard;
+	cmdbuf.vb_addr = imesa->clientVtxBuf.buf;
+	cmdbuf.vb_size = imesa->clientVtxBuf.total*4;
+	cmdbuf.vb_stride = imesa->HwVertexSize;
+	cmdbuf.cmd_addr = start;
+	cmdbuf.size = (imesa->cmdBuf.write - start);
+	if (!imesa->inSwap && imesa->scissor.enabled) {
+	    drm_clip_rect_t *box = dPriv->pClipRects, *ibox;
+	    drm_clip_rect_t scissor;
+	    GLuint nbox = dPriv->numClipRects, nibox;
+	    /* transform and clip scissor to viewport */
+	    scissor.x1 = MAX2(imesa->scissor.x, 0) + dPriv->x;
+	    scissor.y1 = MAX2(dPriv->h - imesa->scissor.y - imesa->scissor.h,
+			      0) + dPriv->y;
+	    scissor.x2 = MIN2(imesa->scissor.x + imesa->scissor.w,
+			      dPriv->w) + dPriv->x;
+	    scissor.y2 = MIN2(dPriv->h - imesa->scissor.y,
+			      dPriv->h) + dPriv->y;
+	    /* intersect cliprects with scissor */
+	    ibox = malloc(dPriv->numClipRects*sizeof(drm_clip_rect_t));
+	    if (!ibox) {
+		fprintf(stderr, "Out of memory.\n");
+		exit(1);
+	    }
+	    nibox = savageIntersectClipRects(ibox, box, nbox, &scissor);
+	    cmdbuf.nbox = nibox;
+	    cmdbuf.box_addr = ibox;
+	} else {
+	    cmdbuf.nbox = dPriv->numClipRects;
+	    cmdbuf.box_addr = dPriv->pClipRects;
+	}
+
+	ret = drmCommandWrite( imesa->driFd, DRM_SAVAGE_BCI_CMDBUF,
+			       &cmdbuf, sizeof(cmdbuf) );
+	if (ret) {
+	    fprintf (stderr, "cmdbuf ioctl returned %d\n", ret);
+	    exit(1);
+	}
+
+	if (cmdbuf.box_addr != dPriv->pClipRects) {
+	    free(cmdbuf.box_addr);
+	}
+
+	/* Save the current state at the start of the command buffer. That
+	 * state will only be emitted, if the context was lost since the
+	 * last command buffer. */
+	imesa->cmdBuf.write = imesa->cmdBuf.base;
+	savageEmitOldState(imesa);
+	imesa->cmdBuf.start = imesa->cmdBuf.write;
+    }
+
+    if (discard) {
+	assert (!savageHaveIndexedVerts(imesa));
+	imesa->dmaVtxBuf.total = 0;
+	imesa->dmaVtxBuf.used = 0;
+	imesa->dmaVtxBuf.flushed = 0;
+    }
+    if (!savageHaveIndexedVerts(imesa)) {
+	imesa->clientVtxBuf.used = 0;
+	imesa->clientVtxBuf.flushed = 0;
+    }
+}
+
+
+void savageFlushCmdBuf( savageContextPtr imesa, GLboolean discard ) 
+{
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+	fprintf (stderr, "%s\n", __FUNCTION__);
+    LOCK_HARDWARE(imesa);
+    savageFlushCmdBufLocked (imesa, discard);
+    UNLOCK_HARDWARE(imesa);
+}
+
+
+static void savageDDFlush( GLcontext *ctx )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+	fprintf (stderr, "%s\n", __FUNCTION__);
+    savageFlushVertices (imesa);
+    savageFlushCmdBuf(imesa, GL_FALSE);
+}
+
+static void savageDDFinish( GLcontext *ctx  ) 
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+	fprintf (stderr, "%s\n", __FUNCTION__);
+    savageFlushVertices (imesa);
+    savageFlushCmdBuf(imesa, GL_FALSE);
+    WAIT_IDLE_EMPTY(imesa);
+}
+
+void savageDDInitIoctlFuncs( GLcontext *ctx )
+{
+   ctx->Driver.Clear = savageDDClear;
+   ctx->Driver.Flush = savageDDFlush;
+   ctx->Driver.Finish = savageDDFinish;
+}
diff --git a/src/mesa/drivers/dri/savage/savageioctl.h b/src/mesa/drivers/dri/savage/savageioctl.h
new file mode 100644
index 0000000000..e7e80816c1
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savageioctl.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SAVAGE_IOCTL_H
+#define SAVAGE_IOCTL_H
+
+#include "savagecontext.h"
+
+void savageFlushVertices( savageContextPtr mmesa ); 
+
+unsigned int savageEmitEventLocked( savageContextPtr imesa, unsigned int flags );
+unsigned int savageEmitEvent( savageContextPtr imesa, unsigned int flags );
+void savageWaitEvent( savageContextPtr imesa, unsigned int event);
+
+void savageFlushCmdBufLocked( savageContextPtr imesa, GLboolean discard );
+void savageFlushCmdBuf( savageContextPtr imesa, GLboolean discard );
+
+void savageDDInitIoctlFuncs( GLcontext *ctx );
+
+void savageSwapBuffers( __DRIdrawable *dPriv );
+
+#define WAIT_IDLE_EMPTY(imesa) do { \
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG) \
+        fprintf (stderr, "WAIT_IDLE_EMPTY in %s\n", __FUNCTION__); \
+    savageWaitEvent(imesa, \
+		    savageEmitEvent(imesa, SAVAGE_WAIT_2D|SAVAGE_WAIT_3D)); \
+} while (0)
+
+#define WAIT_IDLE_EMPTY_LOCKED(imesa) do { \
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG) \
+        fprintf (stderr, "WAIT_IDLE_EMPTY_LOCKED in %s\n", __FUNCTION__); \
+    savageWaitEvent(imesa, savageEmitEventLocked( \
+			imesa, SAVAGE_WAIT_2D|SAVAGE_WAIT_3D)); \
+} while (0)
+
+#define FLUSH_BATCH(imesa) do { \
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG) \
+        fprintf (stderr, "FLUSH_BATCH in %s\n", __FUNCTION__); \
+    savageFlushVertices(imesa); \
+    savageFlushCmdBuf(imesa, GL_FALSE); \
+} while (0)
+
+extern void savageGetDMABuffer( savageContextPtr imesa );
+
+static INLINE
+void savageReleaseIndexedVerts( savageContextPtr imesa )
+{
+    imesa->firstElt = -1;
+}
+
+static INLINE
+GLboolean savageHaveIndexedVerts( savageContextPtr imesa )
+{
+    return (imesa->firstElt != -1);
+}
+
+static INLINE
+uint32_t *savageAllocVtxBuf( savageContextPtr imesa, GLuint words )
+{
+   struct savage_vtxbuf_t *buffer = imesa->vtxBuf;
+   uint32_t *head;
+
+   if (buffer == &imesa->dmaVtxBuf) {
+       if (!buffer->total) {
+	   LOCK_HARDWARE(imesa);
+	   savageGetDMABuffer(imesa);
+	   UNLOCK_HARDWARE(imesa);
+       } else if (buffer->used + words > buffer->total) {
+	   if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+	       fprintf (stderr, "... flushing DMA buffer in %s\n",
+			__FUNCTION__);
+	   savageReleaseIndexedVerts(imesa);
+	   savageFlushVertices(imesa);
+	   LOCK_HARDWARE(imesa);
+	   savageFlushCmdBufLocked(imesa, GL_TRUE); /* discard DMA buffer */
+	   savageGetDMABuffer(imesa);
+	   UNLOCK_HARDWARE(imesa);
+       }
+   } else if (buffer->used + words > buffer->total) {
+       if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+	   fprintf (stderr, "... flushing client vertex buffer in %s\n",
+		    __FUNCTION__);
+       savageReleaseIndexedVerts(imesa);
+       savageFlushVertices(imesa);
+       LOCK_HARDWARE(imesa);
+       savageFlushCmdBufLocked(imesa, GL_FALSE); /* free clientVtxBuf */
+       UNLOCK_HARDWARE(imesa);
+   }
+
+   head = &buffer->buf[buffer->used];
+
+   buffer->used += words;
+   return head;
+}
+
+static INLINE
+uint32_t *savageAllocIndexedVerts( savageContextPtr imesa, GLuint n )
+{
+    uint32_t *ret;
+    savageFlushVertices(imesa);
+    ret = savageAllocVtxBuf(imesa, n*imesa->HwVertexSize);
+    imesa->firstElt = imesa->vtxBuf->flushed / imesa->HwVertexSize;
+    imesa->vtxBuf->flushed = imesa->vtxBuf->used;
+    return ret;
+}
+
+/* Flush Elts:
+ * - Complete the drawing command with the correct number of indices.
+ * - Actually allocate entries for the indices in the command buffer.
+ *   (This allocation must succeed without wrapping the cmd buffer!)
+ */
+static INLINE
+void savageFlushElts( savageContextPtr imesa )
+{
+    if (imesa->elts.cmd) {
+	GLuint qwords = (imesa->elts.n + 3) >> 2;
+	assert(imesa->cmdBuf.write - imesa->cmdBuf.base + qwords
+	       <= imesa->cmdBuf.size);
+	imesa->cmdBuf.write += qwords;
+
+	imesa->elts.cmd->idx.count = imesa->elts.n;
+	imesa->elts.cmd = NULL;
+    }
+}
+
+/* Allocate a command buffer entry with <bytes> bytes of arguments:
+ * - implies savageFlushElts
+ */
+static INLINE
+drm_savage_cmd_header_t *savageAllocCmdBuf( savageContextPtr imesa, GLuint bytes )
+{
+    drm_savage_cmd_header_t *ret;
+    GLuint qwords = ((bytes + 7) >> 3) + 1; /* round up */
+    assert (qwords < imesa->cmdBuf.size);
+
+    savageFlushElts(imesa);
+
+    if (imesa->cmdBuf.write - imesa->cmdBuf.base + qwords > imesa->cmdBuf.size)
+	savageFlushCmdBuf(imesa, GL_FALSE);
+
+    ret = (drm_savage_cmd_header_t *)imesa->cmdBuf.write;
+    imesa->cmdBuf.write += qwords;
+    return ret;
+}
+
+/* Allocate Elts:
+ * - if it doesn't fit, flush the cmd buffer first
+ * - allocates the drawing command on the cmd buffer if there is no
+ *   incomplete indexed drawing command yet
+ * - increments the number of elts. Final allocation is done in savageFlushElts
+ */
+static INLINE
+uint16_t *savageAllocElts( savageContextPtr imesa, GLuint n )
+{
+    uint16_t *ret;
+    GLuint qwords;
+    assert (savageHaveIndexedVerts(imesa));
+
+    if (imesa->elts.cmd)
+	qwords = (imesa->elts.n + n + 3) >> 2;
+    else
+	qwords = ((n + 3) >> 2) + 1;
+    if (imesa->cmdBuf.write - imesa->cmdBuf.base + qwords > imesa->cmdBuf.size)
+	savageFlushCmdBuf(imesa, GL_FALSE); /* implies savageFlushElts */
+
+    if (!imesa->elts.cmd) {
+	savageFlushVertices(imesa);
+	imesa->elts.cmd = savageAllocCmdBuf(imesa, 0);
+	imesa->elts.cmd->idx.cmd = (imesa->vtxBuf == &imesa->dmaVtxBuf) ?
+	    SAVAGE_CMD_DMA_IDX : SAVAGE_CMD_VB_IDX;
+	imesa->elts.cmd->idx.prim = imesa->HwPrim;
+	imesa->elts.cmd->idx.skip = imesa->skip;
+	imesa->elts.n = 0;
+    }
+
+    ret = (uint16_t *)(imesa->elts.cmd+1) + imesa->elts.n;
+    imesa->elts.n += n;
+    return ret;
+}
+
+#endif
diff --git a/src/mesa/drivers/dri/savage/savagerender.c b/src/mesa/drivers/dri/savage/savagerender.c
new file mode 100644
index 0000000000..c369bb124c
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagerender.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright 2005  Felix Kuehling
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+ * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Simulate missing primitives with indexed vertices.
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+
+#include "savagecontext.h"
+#include "savagestate.h"
+#include "savageioctl.h"
+
+/*
+ * Standard render tab for Savage4 and smooth shading on Savage3D
+ */
+#define HAVE_POINTS      0
+#define HAVE_LINES       0
+#define HAVE_LINE_STRIPS 0
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS    1
+#define HAVE_POLYGONS    0
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+
+#define HAVE_ELTS        1
+
+#define LOCAL_VARS savageContextPtr imesa = SAVAGE_CONTEXT(ctx) 
+#define INIT( prim ) do {						\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);			\
+   savageFlushVertices(imesa);						\
+   switch (prim) {							\
+   case GL_TRIANGLES:	   imesa->HwPrim = SAVAGE_PRIM_TRILIST; break;	\
+   case GL_TRIANGLE_STRIP: imesa->HwPrim = SAVAGE_PRIM_TRISTRIP; break;	\
+   case GL_TRIANGLE_FAN:   imesa->HwPrim = SAVAGE_PRIM_TRIFAN; break;	\
+   }									\
+} while (0)
+#define FLUSH()		savageFlushElts(imesa), savageFlushVertices(imesa)
+
+#define GET_CURRENT_VB_MAX_VERTS() \
+   ((imesa->bufferSize/4 - imesa->vtxBuf->used) / imesa->HwVertexSize)
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+   (imesa->bufferSize/4 / imesa->HwVertexSize)
+
+#define ALLOC_VERTS( nr ) \
+	savageAllocVtxBuf( imesa, (nr) * imesa->HwVertexSize )
+#define EMIT_VERTS( ctx, j, nr, buf ) \
+	_tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )
+
+#define ELTS_VARS( buf ) GLushort *dest = buf, firstElt = imesa->firstElt
+#define ELT_INIT( prim ) INIT(prim)
+
+/* (size - used - 1 qword for drawing command) * 4 elts per qword */
+#define GET_CURRENT_VB_MAX_ELTS() \
+   ((imesa->cmdBuf.size - (imesa->cmdBuf.write - imesa->cmdBuf.base) - 1)*4)
+/* (size - space for initial state - 1 qword for drawing command) * 4 elts
+ * imesa is not defined in validate_render :( */
+#define GET_SUBSEQUENT_VB_MAX_ELTS()					\
+   ((SAVAGE_CONTEXT(ctx)->cmdBuf.size - 				\
+     (SAVAGE_CONTEXT(ctx)->cmdBuf.start - 				\
+      SAVAGE_CONTEXT(ctx)->cmdBuf.base) - 1)*4)
+
+#define ALLOC_ELTS(nr) savageAllocElts(imesa, nr)
+#define EMIT_ELT(offset, x) do {					\
+   (dest)[offset] = (GLushort) ((x)+firstElt);				\
+} while (0)
+#define EMIT_TWO_ELTS(offset, x, y) do {				\
+   *(GLuint *)(dest + offset) = (((y)+firstElt) << 16) |		\
+				((x)+firstElt);				\
+} while (0)
+
+#define INCR_ELTS( nr ) dest += nr
+#define ELTPTR dest
+#define RELEASE_ELT_VERTS() \
+   savageReleaseIndexedVerts(imesa)
+
+#define EMIT_INDEXED_VERTS( ctx, start, count ) do {			\
+   GLuint *buf = savageAllocIndexedVerts(imesa, count-start);		\
+   EMIT_VERTS(ctx, start, count-start, buf);				\
+} while (0)
+
+#define TAG(x) savage_##x
+#include "tnl_dd/t_dd_dmatmp.h"
+
+/*
+ * On Savage3D triangle fans and strips are broken with flat
+ * shading. With triangles it wants the color for flat shading in the
+ * first vertex! So we make another template instance which uses
+ * triangles only (with reordered vertices: SAVAGE_PRIM_TRILIST_201).
+ * The reordering is done by the DRM.
+ */
+#undef  HAVE_TRI_STRIPS
+#undef  HAVE_TRI_FANS
+#define HAVE_TRI_STRIPS	0
+#define HAVE_TRI_FANS	0
+
+#undef  INIT
+#define INIT( prim ) do {						\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);			\
+   savageFlushVertices(imesa);						\
+   imesa->HwPrim = SAVAGE_PRIM_TRILIST_201;				\
+} while(0)
+
+#undef  TAG
+#define TAG(x) savage_flat_##x##_s3d
+#include "tnl_dd/t_dd_dmatmp.h"
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+static GLboolean savage_run_render( GLcontext *ctx,
+				    struct tnl_pipeline_stage *stage )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb; 
+   tnl_render_func *tab, *tab_elts;
+   GLboolean valid;
+   GLuint i;
+
+   if (savageHaveIndexedVerts(imesa))
+      savageReleaseIndexedVerts(imesa);
+
+   if (imesa->savageScreen->chipset < S3_SAVAGE4 &&
+       (ctx->_TriangleCaps & DD_FLATSHADE)) {
+      tab = savage_flat_render_tab_verts_s3d;
+      tab_elts = savage_flat_render_tab_elts_s3d;
+      valid = savage_flat_validate_render_s3d( ctx, VB );
+   } else {
+      tab = savage_render_tab_verts;
+      tab_elts = savage_render_tab_elts;
+      valid = savage_validate_render( ctx, VB );
+   }
+
+   /* Don't handle clipping or vertex manipulations.
+    */
+   if (imesa->RenderIndex != 0 || !valid) {
+      return GL_TRUE;
+   }
+   
+   tnl->Driver.Render.Start( ctx );
+   /* Check RenderIndex again. The ptexHack is detected late in RenderStart.
+    * Also check for ptex fallbacks detected late.
+    */
+   if (imesa->RenderIndex != 0 || imesa->Fallback != 0) {
+      return GL_TRUE;
+   }
+
+   /* setup for hardware culling */
+   imesa->raster_primitive = GL_TRIANGLES;
+   imesa->new_state |= SAVAGE_NEW_CULL;
+
+   /* update and emit state */
+   savageDDUpdateHwState(ctx);
+   savageEmitChangedState(imesa);
+
+   if (VB->Elts) {
+      tab = tab_elts;
+      if (!savageHaveIndexedVerts(imesa)) {
+	 if (VB->Count > GET_SUBSEQUENT_VB_MAX_VERTS())
+	    return GL_TRUE;
+	 EMIT_INDEXED_VERTS(ctx, 0, VB->Count);
+      }
+   }
+
+   for (i = 0 ; i < VB->PrimitiveCount ; i++)
+   {
+      GLuint prim = _tnl_translate_prim(&VB->Primitive[i]);
+      GLuint start = VB->Primitive[i].start;
+      GLuint length = VB->Primitive[i].count;
+
+      if (length)
+	 tab[prim & PRIM_MODE_MASK]( ctx, start, start+length, prim);
+   }
+
+   tnl->Driver.Render.Finish( ctx );
+
+   return GL_FALSE;		/* finished the pipe */
+}
+
+struct tnl_pipeline_stage _savage_render_stage = 
+{ 
+   "savage render",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   savage_run_render		/* run */
+};
+
+
+/**********************************************************************/
+/*         Pipeline stage for texture coordinate normalization        */
+/**********************************************************************/
+struct texnorm_stage_data {
+   GLboolean active;
+   GLvector4f texcoord[MAX_TEXTURE_UNITS];
+};
+
+#define TEXNORM_STAGE_DATA(stage) ((struct texnorm_stage_data *)stage->privatePtr)
+
+
+static GLboolean run_texnorm_stage( GLcontext *ctx,
+				    struct tnl_pipeline_stage *stage )
+{
+   struct texnorm_stage_data *store = TEXNORM_STAGE_DATA(stage);
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint i;
+
+   if (imesa->Fallback || !store->active)
+      return GL_TRUE;
+
+   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++) {
+      const GLbitfield reallyEnabled = ctx->Texture.Unit[i]._ReallyEnabled;
+      if (reallyEnabled) {
+         const struct gl_texture_object *texObj = ctx->Texture.Unit[i]._Current;
+         const GLboolean normalizeS = (texObj->WrapS == GL_REPEAT);
+         const GLboolean normalizeT = (reallyEnabled & TEXTURE_2D_BIT) &&
+            (texObj->WrapT == GL_REPEAT);
+         const GLfloat *in = (GLfloat *)VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->data;
+         const GLint instride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->stride;
+         GLfloat (*out)[4] = store->texcoord[i].data;
+         GLint j;
+
+         if (!ctx->Texture.Unit[i]._ReallyEnabled ||
+             VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size == 4)
+            /* Never try to normalize homogenous tex coords! */
+            continue;
+
+         if (normalizeS && normalizeT) {
+            /* take first texcoords as rough estimate of mean value */
+            GLfloat correctionS = -floor(in[0]+0.5);
+            GLfloat correctionT = -floor(in[1]+0.5);
+            for (j = 0; j < VB->Count; ++j) {
+               out[j][0] = in[0] + correctionS;
+               out[j][1] = in[1] + correctionT;
+               in = (GLfloat *)((GLubyte *)in + instride);
+            }
+         } else if (normalizeS) {
+            /* take first texcoords as rough estimate of mean value */
+            GLfloat correctionS = -floor(in[0]+0.5);
+            if (reallyEnabled & TEXTURE_2D_BIT) {
+               for (j = 0; j < VB->Count; ++j) {
+                  out[j][0] = in[0] + correctionS;
+                  out[j][1] = in[1];
+                  in = (GLfloat *)((GLubyte *)in + instride);
+               }
+            } else {
+               for (j = 0; j < VB->Count; ++j) {
+                  out[j][0] = in[0] + correctionS;
+                  in = (GLfloat *)((GLubyte *)in + instride);
+               }
+            }
+         } else if (normalizeT) {
+            /* take first texcoords as rough estimate of mean value */
+            GLfloat correctionT = -floor(in[1]+0.5);
+            for (j = 0; j < VB->Count; ++j) {
+               out[j][0] = in[0];
+               out[j][1] = in[1] + correctionT;
+               in = (GLfloat *)((GLubyte *)in + instride);
+            }
+         }
+
+         if (normalizeS || normalizeT)
+            VB->AttribPtr[_TNL_ATTRIB_TEX0 + i] = &store->texcoord[i];
+      }
+   }
+
+   return GL_TRUE;
+}
+
+/* Called the first time stage->run() is invoked.
+ */
+static GLboolean alloc_texnorm_data( GLcontext *ctx,
+				     struct tnl_pipeline_stage *stage )
+{
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   struct texnorm_stage_data *store;
+   GLuint i;
+
+   stage->privatePtr = CALLOC(sizeof(*store));
+   store = TEXNORM_STAGE_DATA(stage);
+   if (!store)
+      return GL_FALSE;
+
+   for (i = 0 ; i < ctx->Const.MaxTextureUnits ; i++)
+      _mesa_vector4f_alloc( &store->texcoord[i], 0, VB->Size, 32 );
+   
+   return GL_TRUE;
+}
+
+static void validate_texnorm( GLcontext *ctx,
+			      struct tnl_pipeline_stage *stage )
+{
+   struct texnorm_stage_data *store = TEXNORM_STAGE_DATA(stage);
+   GLuint flags = 0;
+
+   if (((ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) &&
+	(ctx->Texture.Unit[0]._Current->WrapS == GL_REPEAT)) ||
+       ((ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_2D_BIT) &&
+	(ctx->Texture.Unit[0]._Current->WrapT == GL_REPEAT)))
+      flags |= VERT_BIT_TEX0;
+
+   if (((ctx->Texture.Unit[1]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) &&
+	(ctx->Texture.Unit[1]._Current->WrapS == GL_REPEAT)) ||
+       ((ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_2D_BIT) &&
+	(ctx->Texture.Unit[1]._Current->WrapT == GL_REPEAT)))
+      flags |= VERT_BIT_TEX1;
+
+   store->active = (flags != 0);
+}
+
+static void free_texnorm_data( struct tnl_pipeline_stage *stage )
+{
+   struct texnorm_stage_data *store = TEXNORM_STAGE_DATA(stage);
+   GLuint i;
+
+   if (store) {
+      for (i = 0 ; i < MAX_TEXTURE_UNITS ; i++)
+	 if (store->texcoord[i].data)
+	    _mesa_vector4f_free( &store->texcoord[i] );
+      FREE( store );
+      stage->privatePtr = 0;
+   }
+}
+
+struct tnl_pipeline_stage _savage_texnorm_stage =
+{
+   "savage texture coordinate normalization stage", /* name */
+   NULL,				/* private data */
+   alloc_texnorm_data,			/* run -- initially set to init */
+   free_texnorm_data,			/* destructor */
+   validate_texnorm,
+   run_texnorm_stage
+};
diff --git a/src/mesa/drivers/dri/savage/savagespan.c b/src/mesa/drivers/dri/savage/savagespan.c
new file mode 100644
index 0000000000..0913dd1278
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagespan.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/mtypes.h"
+#include "savagedd.h"
+#include "savagespan.h"
+#include "savageioctl.h"
+#include "savage_3d_reg.h"
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define LOCAL_VARS						\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   __DRIdrawable *const dPriv = drb->dPriv;		\
+   GLuint cpp   = drb->cpp;					\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   GLubyte *buf = drb->Base.Data + dPriv->x * cpp + dPriv->y * pitch;	\
+   GLuint p;							\
+   (void) p
+
+#define LOCAL_DEPTH_VARS					\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
+   __DRIdrawable *const dPriv = drb->dPriv;		\
+   GLuint zpp   = drb->cpp;					\
+   GLuint pitch = drb->pitch;					\
+   GLuint height = dPriv->h;					\
+   GLubyte *buf = drb->Base.Data + dPriv->x * zpp + dPriv->y * pitch;
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
+
+#define Y_FLIP(_y) (height - _y - 1)
+
+#define HW_LOCK()
+
+#define HW_UNLOCK()
+
+#define HW_WRITE_LOCK()
+
+#define HW_READ_LOCK()
+
+
+/* 16 bit, 565 rgb color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x) savage##x##_565
+#define TAG2(x,y) savage##x##_565##y
+#include "spantmp2.h"
+
+
+/* 32 bit, 8888 ARGB color spanline and pixel functions
+ */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x) savage##x##_8888
+#define TAG2(x,y) savage##x##_8888##y
+#include "spantmp2.h"
+
+
+#undef HW_WRITE_LOCK
+#define HW_WRITE_LOCK()
+#undef HW_READ_LOCK
+#define HW_READ_LOCK()
+
+
+
+/* 16 bit integer depthbuffer functions
+ * Depth range is reversed. See also savageCalcViewport.
+ */
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH( _x, _y, d ) \
+    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = 0xFFFF - d
+
+#define READ_DEPTH( d, _x, _y ) \
+    d = 0xFFFF - *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch)
+
+#define TAG(x) savage##x##_z16
+#include "depthtmp.h"
+
+
+
+
+/* 16 bit float depthbuffer functions
+ */
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH( _x, _y, d ) \
+    *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) = \
+        savageEncodeFloat16( 1.0 - (GLfloat)d/65535.0 )
+
+#define READ_DEPTH( d, _x, _y ) \
+    d = 65535 - \
+        savageDecodeFloat16( *(GLushort *)(buf + ((_x)<<1) + (_y)*pitch) ) * \
+	65535.0
+
+#define TAG(x) savage##x##_z16f
+#include "depthtmp.h"
+
+
+
+
+/* 8-bit stencil /24-bit integer depth depthbuffer functions.
+ * Depth range is reversed. See also savageCalcViewport.
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH( _x, _y, d ) do {				\
+   GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
+   tmp &= 0xFF000000;						\
+   tmp |= 0x00FFFFFF - d;					\
+   *(GLuint *)(buf + (_x<<2) + _y*pitch)  = tmp;		\
+} while(0)
+
+#define READ_DEPTH( d, _x, _y )	\
+   d = 0x00FFFFFF - (*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0x00FFFFFF)
+
+#define TAG(x) savage##x##_s8_z24
+#include "depthtmp.h"
+
+
+
+
+/* 24 bit float depthbuffer functions
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH( _x, _y, d ) do {				\
+    GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
+    tmp &= 0xFF000000;						\
+    tmp |= savageEncodeFloat24( 1.0 - (GLfloat)d/16777215.0 );	\
+   *(GLuint *)(buf + (_x<<2) + _y*pitch)  = tmp;		\
+} while(0)
+
+#define READ_DEPTH( d, _x, _y )					\
+    d = 16777215 - savageDecodeFloat24(				\
+	*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0x00FFFFFF)	\
+	* 16777215.0
+
+#define TAG(x) savage##x##_s8_z24f
+#include "depthtmp.h"
+
+
+#define WRITE_STENCIL( _x, _y, d ) do {				\
+   GLuint tmp = *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch);	\
+   tmp &= 0x00FFFFFF;						\
+   tmp |= (((GLuint)d)<<24) & 0xFF000000;			\
+   *(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) = tmp;		\
+} while(0)
+
+#define READ_STENCIL( d, _x, _y ) \
+   d = (GLstencil)((*(GLuint *)(buf + ((_x)<<2) + (_y)*pitch) & 0xFF000000) >> 24)
+
+#define TAG(x) savage##x##_s8_z24
+#include "stenciltmp.h"
+
+
+
+/*
+ * Wrappers around _swrast_Copy/Draw/ReadPixels that make sure all
+ * primitives are flushed and the hardware is idle before accessing
+ * the frame buffer.
+ */
+static void
+savageCopyPixels( GLcontext *ctx,
+		  GLint srcx, GLint srcy, GLsizei width, GLsizei height,
+		  GLint destx, GLint desty,
+		  GLenum type )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    FLUSH_BATCH(imesa);
+    WAIT_IDLE_EMPTY(imesa);
+    _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type);
+}
+static void
+savageDrawPixels( GLcontext *ctx,
+		  GLint x, GLint y,
+		  GLsizei width, GLsizei height,
+		  GLenum format, GLenum type,
+		  const struct gl_pixelstore_attrib *packing,
+		  const GLvoid *pixels )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    FLUSH_BATCH(imesa);
+    WAIT_IDLE_EMPTY(imesa);
+    _swrast_DrawPixels(ctx, x, y, width, height, format, type, packing, pixels);
+}
+static void
+savageReadPixels( GLcontext *ctx,
+		  GLint x, GLint y, GLsizei width, GLsizei height,
+		  GLenum format, GLenum type,
+		  const struct gl_pixelstore_attrib *packing,
+		  GLvoid *pixels )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    FLUSH_BATCH(imesa);
+    WAIT_IDLE_EMPTY(imesa);
+    _swrast_ReadPixels(ctx, x, y, width, height, format, type, packing, pixels);
+}
+
+/*
+ * Make sure the hardware is idle when span-rendering.
+ */
+static void savageSpanRenderStart( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   FLUSH_BATCH(imesa);
+   WAIT_IDLE_EMPTY(imesa);
+}
+
+
+void savageDDInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart = savageSpanRenderStart;
+
+   /* XXX these should probably be plugged in elsewhere */
+   ctx->Driver.CopyPixels = savageCopyPixels;
+   ctx->Driver.DrawPixels = savageDrawPixels;
+   ctx->Driver.ReadPixels = savageReadPixels;
+}
+
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+savageSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis,
+                       GLboolean float_depth)
+{
+   if (drb->Base.Format == MESA_FORMAT_RGB565) {
+      savageInitPointers_565(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_ARGB8888) {
+      savageInitPointers_8888(&drb->Base);
+   }
+   else if (drb->Base.Format == MESA_FORMAT_Z16) {
+      if (float_depth) {
+         savageInitDepthPointers_z16f(&drb->Base);
+      }
+      else {
+         savageInitDepthPointers_z16(&drb->Base);
+      }
+   }
+   else if (drb->Base.Format == MESA_FORMAT_S8_Z24) {
+      if (float_depth) {
+         savageInitDepthPointers_s8_z24f(&drb->Base);
+      }
+      else {
+         savageInitDepthPointers_s8_z24(&drb->Base);
+      }
+   }
+   else if (drb->Base.Format == MESA_FORMAT_S8) {
+      savageInitStencilPointers_s8_z24(&drb->Base);
+   }
+}
diff --git a/src/mesa/drivers/dri/savage/savagespan.h b/src/mesa/drivers/dri/savage/savagespan.h
new file mode 100644
index 0000000000..53a7f8b97c
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagespan.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _SAVAGE_SPAN_H
+#define _SAVAGE_SPAN_H
+
+#include "drirenderbuffer.h"
+
+
+extern void savageDDInitSpanFuncs( GLcontext *ctx );
+
+extern void
+savageSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis,
+                       GLboolean float_depth);
+
+
+/*
+ * Savage 16-bit float depth format with zExpOffset=16:
+ *   4 bit unsigned exponent, 12 bit mantissa
+ *
+ * The meaning of the mantissa is different from IEEE floatint point
+ * formats. The same number can't be encoded with different exponents.
+ * So no bits are wasted.
+ *
+ * exponent | range encoded by mantissa | accuracy or mantissa
+ * ---------+---------------------------+---------------------
+ *       15 | 2^-1 .. 1                 | 2^-13
+ *       14 | 2^-2 .. 2^-1              | 2^-14
+ *       13 | 2^-3 .. 2^-2              | 2^-15
+ *      ... | ...                       |
+ *        2 | 2^-14 .. 2^-13            | 2^-27
+ *        1 | 2^-15 .. 2^-14            | 2^-27
+ *        0 | 2^-16 .. 2^-15            | 2^-28
+ *
+ * Note that there is no encoding for numbers < 2^-16.
+ */
+static INLINE GLuint savageEncodeFloat16( GLdouble x )
+{
+    GLint r = (GLint)(x * 0x10000000);
+    GLint exp = 0;
+    if (r < 0x1000)
+	return 0;
+    while (r - 0x1000 > 0x0fff) {
+	r >>= 1;
+	exp++;
+    }
+    return exp > 0xf ? 0xffff : (r - 0x1000) | (exp << 12);
+}
+static INLINE GLdouble savageDecodeFloat16( GLuint x )
+{
+    static const GLdouble pow2[16] = {
+	1.0/(1<<28), 1.0/(1<<27), 1.0/(1<<26), 1.0/(1<<25),
+	1.0/(1<<24), 1.0/(1<<23), 1.0/(1<<22), 1.0/(1<<21),
+	1.0/(1<<20), 1.0/(1<<19), 1.0/(1<<18), 1.0/(1<<17),
+	1.0/(1<<16), 1.0/(1<<15), 1.0/(1<<14), 1.0/(1<<13)
+    };
+    static const GLdouble bias[16] = {
+	1.0/(1<<16), 1.0/(1<<15), 1.0/(1<<14), 1.0/(1<<13),
+	1.0/(1<<12), 1.0/(1<<11), 1.0/(1<<10), 1.0/(1<< 9),
+	1.0/(1<< 8), 1.0/(1<< 7), 1.0/(1<< 6), 1.0/(1<< 5),
+	1.0/(1<< 4), 1.0/(1<< 3), 1.0/(1<< 2), 1.0/(1<< 1)
+    };
+    GLuint mant = x & 0x0fff;
+    GLuint exp = (x >> 12) & 0xf;
+    return bias[exp] + pow2[exp]*mant;
+}
+
+/*
+ * Savage 24-bit float depth format with zExpOffset=32:
+ *   5 bit unsigned exponent, 19 bit mantissa
+ *
+ * Details analogous to the 16-bit format.
+ */
+static INLINE GLuint savageEncodeFloat24( GLdouble x )
+{
+    int64_t r = (int64_t)(x * ((int64_t)1 << (19+32)));
+    GLint exp = 0;
+    if (r < 0x80000)
+	return 0;
+    while (r - 0x80000 > 0x7ffff) {
+	r >>= 1;
+	exp++;
+    }
+    return exp > 0x1f ? 0xffffff : (r - 0x80000) | (exp << 19);
+}
+#define _1 (int64_t)1
+static INLINE GLdouble savageDecodeFloat24( GLuint x )
+{
+    static const GLdouble pow2[32] = {
+	1.0/(_1<<51), 1.0/(_1<<50), 1.0/(_1<<49), 1.0/(_1<<48),
+	1.0/(_1<<47), 1.0/(_1<<46), 1.0/(_1<<45), 1.0/(_1<<44),
+	1.0/(_1<<43), 1.0/(_1<<42), 1.0/(_1<<41), 1.0/(_1<<40),
+	1.0/(_1<<39), 1.0/(_1<<38), 1.0/(_1<<37), 1.0/(_1<<36),
+	1.0/(_1<<35), 1.0/(_1<<34), 1.0/(_1<<33), 1.0/(_1<<32),
+	1.0/(_1<<31), 1.0/(_1<<30), 1.0/(_1<<29), 1.0/(_1<<28),
+	1.0/(_1<<27), 1.0/(_1<<26), 1.0/(_1<<25), 1.0/(_1<<24),
+	1.0/(_1<<23), 1.0/(_1<<22), 1.0/(_1<<21), 1.0/(_1<<20)
+    };
+    static const GLdouble bias[32] = {
+	1.0/(_1<<32), 1.0/(_1<<31), 1.0/(_1<<30), 1.0/(_1<<29),
+	1.0/(_1<<28), 1.0/(_1<<27), 1.0/(_1<<26), 1.0/(_1<<25),
+	1.0/(_1<<24), 1.0/(_1<<23), 1.0/(_1<<22), 1.0/(_1<<21),
+	1.0/(_1<<20), 1.0/(_1<<19), 1.0/(_1<<18), 1.0/(_1<<17),
+	1.0/(_1<<16), 1.0/(_1<<15), 1.0/(_1<<14), 1.0/(_1<<13),
+	1.0/(_1<<12), 1.0/(_1<<11), 1.0/(_1<<10), 1.0/(_1<< 9),
+	1.0/(_1<< 8), 1.0/(_1<< 7), 1.0/(_1<< 6), 1.0/(_1<< 5),
+	1.0/(_1<< 4), 1.0/(_1<< 3), 1.0/(_1<< 2), 1.0/(_1<< 1)
+    };
+    GLuint mant = x & 0x7ffff;
+    GLuint exp = (x >> 19) & 0x1f;
+    return bias[exp] + pow2[exp]*mant;
+}
+#undef _1
+
+
+#endif
diff --git a/src/mesa/drivers/dri/savage/savagestate.c b/src/mesa/drivers/dri/savage/savagestate.c
new file mode 100644
index 0000000000..84e1b52585
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagestate.c
@@ -0,0 +1,1730 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <stdio.h>
+
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/dd.h"
+
+#include "main/mm.h"
+#include "savagedd.h"
+#include "savagecontext.h"
+
+#include "savagestate.h"
+#include "savagetex.h"
+#include "savagetris.h"
+#include "savageioctl.h"
+#include "savage_bci.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "xmlpool.h"
+
+/* Savage4, ProSavage[DDR], SuperSavage watermarks */
+#define S4_ZRLO 24
+#define S4_ZRHI 24
+#define S4_ZWLO 0
+#define S4_ZWHI 0
+
+#define S4_DRLO 0
+#define S4_DRHI 0
+#define S4_DWLO 0
+#define S4_DWHI 0
+
+#define S4_TR   15
+
+/* Savage3D/MX/IX watermarks */
+#define S3D_ZRLO 8
+#define S3D_ZRHI 24
+#define S3D_ZWLO 0
+#define S3D_ZWHI 24
+
+#define S3D_DRLO 0
+#define S3D_DRHI 0
+#define S3D_DWLO 0
+#define S3D_DWHI 0
+
+#define S3D_TR   15
+
+static void savageBlendFunc_s4(GLcontext *);
+static void savageBlendFunc_s3d(GLcontext *);
+
+static INLINE GLuint savagePackColor(GLuint format, 
+                                         GLubyte r, GLubyte g, 
+                                         GLubyte b, GLubyte a)
+{
+    switch (format) {
+        case DV_PF_8888:
+            return SAVAGEPACKCOLOR8888(r,g,b,a);
+        case DV_PF_565:
+            return SAVAGEPACKCOLOR565(r,g,b);
+        default:
+            
+            return 0;
+    }
+}
+
+
+static void savageDDAlphaFunc_s4(GLcontext *ctx, GLenum func, GLfloat ref)
+{
+    savageBlendFunc_s4(ctx);
+}
+static void savageDDAlphaFunc_s3d(GLcontext *ctx, GLenum func, GLfloat ref)
+{
+    savageBlendFunc_s3d(ctx);
+}
+
+static void savageDDBlendEquationSeparate(GLcontext *ctx,
+					  GLenum modeRGB, GLenum modeA)
+{
+    assert( modeRGB == modeA );
+
+    /* BlendEquation sets ColorLogicOpEnabled in an unexpected 
+     * manner.  
+     */
+    FALLBACK( ctx, SAVAGE_FALLBACK_LOGICOP,
+	      (ctx->Color.ColorLogicOpEnabled && 
+	       ctx->Color.LogicOp != GL_COPY));
+
+   /* Can only do blend addition, not min, max, subtract, etc. */
+   FALLBACK( ctx, SAVAGE_FALLBACK_BLEND_EQ,
+	     modeRGB != GL_FUNC_ADD);
+}
+
+
+static void savageBlendFunc_s4(GLcontext *ctx)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    uint32_t drawLocalCtrl = imesa->regs.s4.drawLocalCtrl.ui;
+    uint32_t drawCtrl0 = imesa->regs.s4.drawCtrl0.ui;
+    uint32_t drawCtrl1 = imesa->regs.s4.drawCtrl1.ui;
+
+    /* set up draw control register (including blending, alpha
+     * test, and shading model)
+     */
+
+    imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_FALSE;
+
+    /*
+     * blend modes
+     */
+    if(ctx->Color.BlendEnabled){
+        switch (ctx->Color.BlendDstRGB)
+        {
+            case GL_ZERO:
+                imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_Zero;
+                break;
+
+            case GL_ONE:
+                imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_One;
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_SRC_COLOR:
+                imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_SrcClr;
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_SRC_COLOR:
+                imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_1SrcClr;
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_SRC_ALPHA:
+                imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_SrcAlpha;
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_SRC_ALPHA:
+                imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_1SrcAlpha;
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_One;
+                }
+                else
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode= DAM_DstAlpha;
+                }
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_Zero;
+                }
+                else
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode=DAM_1DstAlpha;
+                    imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites= GL_TRUE;
+                }
+                break;
+        }
+
+        switch (ctx->Color.BlendSrcRGB)
+        {
+            case GL_ZERO:
+                imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_Zero;
+                break;
+
+            case GL_ONE:
+                imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_One;
+                break;
+
+            case GL_DST_COLOR:
+                imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_DstClr;
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_DST_COLOR:
+                imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_1DstClr;
+                imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_SRC_ALPHA:
+                imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_SrcAlpha;
+                break;
+
+            case GL_ONE_MINUS_SRC_ALPHA:
+                imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_1SrcAlpha;
+                break;
+
+            case GL_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_One;
+                }
+                else
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode= SAM_DstAlpha;
+                    imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites= GL_TRUE;
+                }
+                break;
+
+            case GL_ONE_MINUS_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)          
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_Zero;
+                }
+                else
+                {
+                    imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode=SAM_1DstAlpha;
+                    imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites= GL_TRUE;
+                }
+                break;
+        }
+    }
+    else
+    {
+        imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_Zero;
+        imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_One;
+    }
+
+    /* alpha test*/
+
+    if(ctx->Color.AlphaEnabled) 
+    {
+        ACmpFunc a;
+	GLubyte alphaRef;
+
+	CLAMPED_FLOAT_TO_UBYTE(alphaRef,ctx->Color.AlphaRef);
+         
+        switch(ctx->Color.AlphaFunc)  { 
+	case GL_NEVER: a = CF_Never; break;
+	case GL_ALWAYS: a = CF_Always; break;
+	case GL_LESS: a = CF_Less; break; 
+	case GL_LEQUAL: a = CF_LessEqual; break;
+	case GL_EQUAL: a = CF_Equal; break;
+	case GL_GREATER: a = CF_Greater; break;
+	case GL_GEQUAL: a = CF_GreaterEqual; break;
+	case GL_NOTEQUAL: a = CF_NotEqual; break;
+	default:return;
+        }   
+      
+	imesa->regs.s4.drawCtrl1.ni.alphaTestEn = GL_TRUE;
+	imesa->regs.s4.drawCtrl1.ni.alphaTestCmpFunc = a;
+	imesa->regs.s4.drawCtrl0.ni.alphaRefVal = alphaRef;
+    }
+    else
+    {
+	imesa->regs.s4.drawCtrl1.ni.alphaTestEn      = GL_FALSE;
+    }
+
+    /* Set/Reset Z-after-alpha*/
+
+    imesa->regs.s4.drawLocalCtrl.ni.wrZafterAlphaTst =
+	imesa->regs.s4.drawCtrl1.ni.alphaTestEn;
+    /*imesa->regs.s4.drawLocalCtrl.ni.zUpdateEn =
+        ~drawLocalCtrl.ni.wrZafterAlphaTst;*/
+
+    if (drawLocalCtrl != imesa->regs.s4.drawLocalCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+    if (drawCtrl0 != imesa->regs.s4.drawCtrl0.ui ||
+	drawCtrl1 != imesa->regs.s4.drawCtrl1.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+}
+static void savageBlendFunc_s3d(GLcontext *ctx)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    uint32_t drawCtrl = imesa->regs.s3d.drawCtrl.ui;
+    uint32_t zBufCtrl = imesa->regs.s3d.zBufCtrl.ui;
+
+    /* set up draw control register (including blending, alpha
+     * test, dithering, and shading model)
+     */
+
+    imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = 0;
+
+    /*
+     * blend modes
+     */
+    if(ctx->Color.BlendEnabled){
+        switch (ctx->Color.BlendDstRGB)
+        {
+            case GL_ZERO:
+                imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_Zero;
+                break;
+
+            case GL_ONE:
+                imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_One;
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_SRC_COLOR:
+                imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_SrcClr;
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_SRC_COLOR:
+                imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_1SrcClr;
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_SRC_ALPHA:
+                imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_SrcAlpha;
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_SRC_ALPHA:
+                imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_1SrcAlpha;
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)
+                {
+                    imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_One;
+                }
+                else
+                {
+                    imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_DstAlpha;
+                }
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)
+                {
+                    imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_Zero;
+                }
+                else
+                {
+                    imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_1DstAlpha;
+                    imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                }
+                break;
+        }
+
+        switch (ctx->Color.BlendSrcRGB)
+        {
+            case GL_ZERO:
+                imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_Zero;
+                break;
+
+            case GL_ONE:
+                imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_One;
+                break;
+
+            case GL_DST_COLOR:
+                imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_DstClr;
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_ONE_MINUS_DST_COLOR:
+                imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_1DstClr;
+                imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                break;
+
+            case GL_SRC_ALPHA:
+                imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_SrcAlpha;
+                break;
+
+            case GL_ONE_MINUS_SRC_ALPHA:
+                imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_1SrcAlpha;
+                break;
+
+            case GL_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)
+                {
+                    imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_One;
+                }
+                else
+                {
+                    imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_DstAlpha;
+                    imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                }
+                break;
+
+            case GL_ONE_MINUS_DST_ALPHA:
+                if (imesa->glCtx->Visual.alphaBits == 0)          
+                {
+                    imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_Zero;
+                }
+                else
+                {
+                    imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_1DstAlpha;
+                    imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+                }
+                break;
+        }
+    }
+    else
+    {
+        imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_Zero;
+        imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_One;
+    }
+
+    /* alpha test*/
+
+    if(ctx->Color.AlphaEnabled) 
+    {
+        ACmpFunc a;
+	GLubyte alphaRef;
+
+	CLAMPED_FLOAT_TO_UBYTE(alphaRef,ctx->Color.AlphaRef);
+         
+        switch(ctx->Color.AlphaFunc)  { 
+	case GL_NEVER: a = CF_Never; break;
+	case GL_ALWAYS: a = CF_Always; break;
+	case GL_LESS: a = CF_Less; break; 
+	case GL_LEQUAL: a = CF_LessEqual; break;
+	case GL_EQUAL: a = CF_Equal; break;
+	case GL_GREATER: a = CF_Greater; break;
+	case GL_GEQUAL: a = CF_GreaterEqual; break;
+	case GL_NOTEQUAL: a = CF_NotEqual; break;
+	default:return;
+        }   
+
+	imesa->regs.s3d.drawCtrl.ni.alphaTestEn = GL_TRUE;
+	imesa->regs.s3d.drawCtrl.ni.alphaTestCmpFunc = a;
+	imesa->regs.s3d.drawCtrl.ni.alphaRefVal = alphaRef;
+    }
+    else
+    {
+	imesa->regs.s3d.drawCtrl.ni.alphaTestEn = GL_FALSE;
+    }
+
+    /* Set/Reset Z-after-alpha*/
+
+    imesa->regs.s3d.zBufCtrl.ni.wrZafterAlphaTst =
+	imesa->regs.s3d.drawCtrl.ni.alphaTestEn;
+
+    if (drawCtrl != imesa->regs.s3d.drawCtrl.ui ||
+	zBufCtrl != imesa->regs.s3d.zBufCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+}
+
+static void savageDDBlendFuncSeparate_s4( GLcontext *ctx, GLenum sfactorRGB, 
+					  GLenum dfactorRGB, GLenum sfactorA,
+					  GLenum dfactorA )
+{
+    assert (dfactorRGB == dfactorA && sfactorRGB == sfactorA);
+    savageBlendFunc_s4( ctx );
+}
+static void savageDDBlendFuncSeparate_s3d( GLcontext *ctx, GLenum sfactorRGB, 
+					   GLenum dfactorRGB, GLenum sfactorA,
+					   GLenum dfactorA )
+{
+    assert (dfactorRGB == dfactorA && sfactorRGB == sfactorA);
+    savageBlendFunc_s3d( ctx );
+}
+
+
+
+static void savageDDDepthFunc_s4(GLcontext *ctx, GLenum func)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    ZCmpFunc zmode;
+    uint32_t drawLocalCtrl = imesa->regs.s4.drawLocalCtrl.ui;
+    uint32_t zBufCtrl = imesa->regs.s4.zBufCtrl.ui;
+    uint32_t zWatermarks = imesa->regs.s4.zWatermarks.ui; /* FIXME: in DRM */
+
+    /* set up z-buffer control register (global)
+     * set up z-buffer offset register (global)
+     * set up z read/write watermarks register (global)
+     */
+
+    switch(func)  { /* reversed (see savageCalcViewport) */
+    case GL_NEVER: zmode = CF_Never; break;
+    case GL_ALWAYS: zmode = CF_Always; break;
+    case GL_LESS: zmode = CF_Greater; break; 
+    case GL_LEQUAL: zmode = CF_GreaterEqual; break;
+    case GL_EQUAL: zmode = CF_Equal; break;
+    case GL_GREATER: zmode = CF_Less; break;
+    case GL_GEQUAL: zmode = CF_LessEqual; break;
+    case GL_NOTEQUAL: zmode = CF_NotEqual; break;
+    default:return;
+    } 
+    if (ctx->Depth.Test)
+    {
+
+	imesa->regs.s4.zBufCtrl.ni.zCmpFunc = zmode;
+	imesa->regs.s4.drawLocalCtrl.ni.zUpdateEn = ctx->Depth.Mask;
+	imesa->regs.s4.drawLocalCtrl.ni.flushPdZbufWrites = GL_TRUE;
+	imesa->regs.s4.zBufCtrl.ni.zBufEn = GL_TRUE;
+    }
+    else if (imesa->glCtx->Stencil._Enabled && imesa->hw_stencil)
+    {
+        /* Need to keep Z on for Stencil. */
+	imesa->regs.s4.zBufCtrl.ni.zCmpFunc = CF_Always;
+	imesa->regs.s4.zBufCtrl.ni.zBufEn   = GL_TRUE;
+	imesa->regs.s4.drawLocalCtrl.ni.zUpdateEn = GL_FALSE;
+	imesa->regs.s4.drawLocalCtrl.ni.flushPdZbufWrites = GL_FALSE;
+    }
+    else
+    {
+
+        if (imesa->regs.s4.drawLocalCtrl.ni.drawUpdateEn == GL_FALSE)
+        {
+            imesa->regs.s4.zBufCtrl.ni.zCmpFunc = CF_Always;
+            imesa->regs.s4.zBufCtrl.ni.zBufEn   = GL_TRUE;
+        }
+        else
+
+            /* DRAWUPDATE_REQUIRES_Z_ENABLED*/
+        {
+	    imesa->regs.s4.zBufCtrl.ni.zBufEn         = GL_FALSE;
+        }
+	imesa->regs.s4.drawLocalCtrl.ni.zUpdateEn = GL_FALSE;
+	imesa->regs.s4.drawLocalCtrl.ni.flushPdZbufWrites = GL_FALSE;
+    }
+
+    if (drawLocalCtrl != imesa->regs.s4.drawLocalCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+    if (zBufCtrl != imesa->regs.s4.zBufCtrl.ui ||
+	zWatermarks != imesa->regs.s4.zWatermarks.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+}
+static void savageDDDepthFunc_s3d(GLcontext *ctx, GLenum func)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    ZCmpFunc zmode;
+    uint32_t drawCtrl = imesa->regs.s3d.drawCtrl.ui;
+    uint32_t zBufCtrl = imesa->regs.s3d.zBufCtrl.ui;
+    uint32_t zWatermarks = imesa->regs.s3d.zWatermarks.ui; /* FIXME: in DRM */
+
+    /* set up z-buffer control register (global)
+     * set up z-buffer offset register (global)
+     * set up z read/write watermarks register (global)
+     */
+    switch(func)  { /* reversed (see savageCalcViewport) */
+    case GL_NEVER: zmode = CF_Never; break;
+    case GL_ALWAYS: zmode = CF_Always; break;
+    case GL_LESS: zmode = CF_Greater; break; 
+    case GL_LEQUAL: zmode = CF_GreaterEqual; break;
+    case GL_EQUAL: zmode = CF_Equal; break;
+    case GL_GREATER: zmode = CF_Less; break;
+    case GL_GEQUAL: zmode = CF_LessEqual; break;
+    case GL_NOTEQUAL: zmode = CF_NotEqual; break;
+    default:return;
+    } 
+    if (ctx->Depth.Test)
+    {
+	imesa->regs.s3d.zBufCtrl.ni.zBufEn = GL_TRUE;
+	imesa->regs.s3d.zBufCtrl.ni.zCmpFunc = zmode;
+	imesa->regs.s3d.zBufCtrl.ni.zUpdateEn = ctx->Depth.Mask;
+	
+	imesa->regs.s3d.drawCtrl.ni.flushPdZbufWrites = GL_TRUE;
+    }
+    else
+    {
+	if (imesa->regs.s3d.zBufCtrl.ni.drawUpdateEn == GL_FALSE) {
+	    imesa->regs.s3d.zBufCtrl.ni.zCmpFunc = CF_Always;
+            imesa->regs.s3d.zBufCtrl.ni.zBufEn = GL_TRUE;
+	}
+        else
+
+            /* DRAWUPDATE_REQUIRES_Z_ENABLED*/
+        {
+	    imesa->regs.s3d.zBufCtrl.ni.zBufEn = GL_FALSE;
+        }
+	imesa->regs.s3d.zBufCtrl.ni.zUpdateEn = GL_FALSE;
+	imesa->regs.s3d.drawCtrl.ni.flushPdZbufWrites = GL_FALSE;
+    }
+  
+    if (drawCtrl != imesa->regs.s3d.drawCtrl.ui ||
+	zBufCtrl != imesa->regs.s3d.zBufCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+    if (zWatermarks != imesa->regs.s3d.zWatermarks.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+}
+
+static void savageDDDepthMask_s4(GLcontext *ctx, GLboolean flag)
+{
+    savageDDDepthFunc_s4(ctx,ctx->Depth.Func);
+}
+static void savageDDDepthMask_s3d(GLcontext *ctx, GLboolean flag)
+{
+    savageDDDepthFunc_s3d(ctx,ctx->Depth.Func);
+}
+
+
+
+
+/* =============================================================
+ * Hardware clipping
+ */
+
+
+static void savageDDScissor( GLcontext *ctx, GLint x, GLint y, 
+                             GLsizei w, GLsizei h )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+
+    /* Emit buffered commands with old scissor state. */
+    FLUSH_BATCH(imesa);
+
+    /* Mirror scissors in private context. */
+    imesa->scissor.enabled = ctx->Scissor.Enabled;
+    imesa->scissor.x = x;
+    imesa->scissor.y = y;
+    imesa->scissor.w = w;
+    imesa->scissor.h = h;
+}
+
+
+
+static void savageDDDrawBuffer(GLcontext *ctx, GLenum mode )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    uint32_t destCtrl = imesa->regs.s4.destCtrl.ui;
+
+    if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+	FALLBACK( ctx, SAVAGE_FALLBACK_DRAW_BUFFER, GL_TRUE );
+        return;
+    }
+
+    switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+    case BUFFER_FRONT_LEFT:
+        imesa->IsDouble = GL_FALSE;
+	imesa->regs.s4.destCtrl.ni.offset = imesa->savageScreen->frontOffset>>11;
+	break;
+    case BUFFER_BACK_LEFT:
+        imesa->IsDouble = GL_TRUE;
+	imesa->regs.s4.destCtrl.ni.offset = imesa->savageScreen->backOffset>>11;
+	break;
+    default:
+	FALLBACK( ctx, SAVAGE_FALLBACK_DRAW_BUFFER, GL_TRUE );
+	return;
+    }
+    
+    imesa->NotFirstFrame = GL_FALSE;
+    savageXMesaSetClipRects(imesa);
+    FALLBACK(ctx, SAVAGE_FALLBACK_DRAW_BUFFER, GL_FALSE);
+
+    if (destCtrl != imesa->regs.s4.destCtrl.ui)
+        imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+}
+
+static void savageDDReadBuffer(GLcontext *ctx, GLenum mode )
+{
+   /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */
+}
+
+#if 0
+static void savageDDSetColor(GLcontext *ctx, 
+                             GLubyte r, GLubyte g,
+                             GLubyte b, GLubyte a )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    imesa->MonoColor = savagePackColor( imesa->savageScreen->frontFormat, r, g, b, a );
+}
+#endif
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+void savageCalcViewport( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = imesa->hw_viewport;
+
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + imesa->drawX + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + imesa->driDrawable->h + imesa->drawY + SUBPIXEL_Y;
+   /* Depth range is reversed (far: 0, near: 1) so that float depth
+    * compensates for loss of accuracy of far coordinates. */
+   if (imesa->float_depth && imesa->savageScreen->zpp == 2) {
+       /* The Savage 16-bit floating point depth format can't encode
+	* numbers < 2^-16. Make sure all depth values stay greater
+	* than that. */
+       m[MAT_SZ] = - v[MAT_SZ] * imesa->depth_scale * (65535.0/65536.0);
+       m[MAT_TZ] = 1.0 - v[MAT_TZ] * imesa->depth_scale * (65535.0/65536.0);
+   } else {
+       m[MAT_SZ] = - v[MAT_SZ] * imesa->depth_scale;
+       m[MAT_TZ] = 1.0 - v[MAT_TZ] * imesa->depth_scale;
+   }
+
+   imesa->SetupNewInputs = ~0;
+}
+
+static void savageViewport( GLcontext *ctx, 
+			    GLint x, GLint y, 
+			    GLsizei width, GLsizei height )
+{
+   savageCalcViewport( ctx );
+}
+
+static void savageDepthRange( GLcontext *ctx, 
+			      GLclampd nearval, GLclampd farval )
+{
+   savageCalcViewport( ctx );
+}
+
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void savageDDClearColor(GLcontext *ctx, 
+			       const GLfloat color[4] )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    GLubyte c[4];
+    CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+    CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+    CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+    CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+    imesa->ClearColor = savagePackColor( imesa->savageScreen->frontFormat,
+					 c[0], c[1], c[2], c[3] );
+}
+
+/* Fallback to swrast for select and feedback.
+ */
+static void savageRenderMode( GLcontext *ctx, GLenum mode )
+{
+   FALLBACK( ctx, SAVAGE_FALLBACK_RENDERMODE, (mode != GL_RENDER) );
+}
+
+
+#if HW_CULL
+
+/* =============================================================
+ * Culling - the savage isn't quite as clean here as the rest of
+ *           its interfaces, but it's not bad.
+ */
+static void savageDDCullFaceFrontFace(GLcontext *ctx, GLenum unused)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    GLuint cullMode=imesa->LcsCullMode;        
+    switch (ctx->Polygon.CullFaceMode)
+    {
+        case GL_FRONT:
+            switch (ctx->Polygon.FrontFace)
+            {
+                case GL_CW:
+                    cullMode = BCM_CW;
+                    break;
+                case GL_CCW:
+                    cullMode = BCM_CCW;
+                    break;
+            }
+            break;
+
+        case GL_BACK:
+            switch (ctx->Polygon.FrontFace)
+            {
+                case GL_CW:
+                    cullMode = BCM_CCW;
+                    break;
+                case GL_CCW:
+                    cullMode = BCM_CW;
+                    break;
+            }
+            break;
+    }
+    imesa->LcsCullMode = cullMode;    
+    imesa->new_state |= SAVAGE_NEW_CULL;
+}
+#endif /* end #if HW_CULL */
+
+static void savageUpdateCull( GLcontext *ctx )
+{
+#if HW_CULL
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    GLuint cullMode;
+    if (ctx->Polygon.CullFlag &&
+	imesa->raster_primitive >= GL_TRIANGLES &&
+	ctx->Polygon.CullFaceMode != GL_FRONT_AND_BACK)
+	cullMode = imesa->LcsCullMode;
+    else
+	cullMode = BCM_None;
+    if (imesa->savageScreen->chipset >= S3_SAVAGE4) {
+	if (imesa->regs.s4.drawCtrl1.ni.cullMode != cullMode) {
+	    imesa->regs.s4.drawCtrl1.ni.cullMode = cullMode;
+	    imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+	}
+    } else {
+	if (imesa->regs.s3d.drawCtrl.ni.cullMode != cullMode) {
+	    imesa->regs.s3d.drawCtrl.ni.cullMode = cullMode;
+	    imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+	}
+    }
+#endif /* end  #if HW_CULL */
+}
+
+
+
+/* =============================================================
+ * Color masks
+ */
+
+/* Savage4 can disable draw updates when all channels are
+ * masked. Savage3D has a bit called drawUpdateEn, but it doesn't seem
+ * to have any effect. If only some channels are masked we need a
+ * software fallback on all chips.
+ */
+static void savageDDColorMask_s4(GLcontext *ctx, 
+				 GLboolean r, GLboolean g, 
+				 GLboolean b, GLboolean a )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+    GLboolean passAny, passAll;
+
+    if (ctx->Visual.alphaBits) {
+	passAny = b || g || r || a;
+	passAll = r && g && b && a;
+    } else {
+	passAny = b || g || r;
+	passAll = r && g && b;
+    }
+
+    if (passAny) {
+	if (!imesa->regs.s4.drawLocalCtrl.ni.drawUpdateEn) {
+	    imesa->regs.s4.drawLocalCtrl.ni.drawUpdateEn = GL_TRUE;
+	    imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+	}
+	FALLBACK (ctx, SAVAGE_FALLBACK_COLORMASK, !passAll);
+    } else if (imesa->regs.s4.drawLocalCtrl.ni.drawUpdateEn) {
+	imesa->regs.s4.drawLocalCtrl.ni.drawUpdateEn = GL_FALSE;
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+    }
+}
+static void savageDDColorMask_s3d(GLcontext *ctx, 
+				  GLboolean r, GLboolean g, 
+				  GLboolean b, GLboolean a )
+{
+    if (ctx->Visual.alphaBits)
+	FALLBACK (ctx, SAVAGE_FALLBACK_COLORMASK, !(r && g && b && a));
+    else
+	FALLBACK (ctx, SAVAGE_FALLBACK_COLORMASK, !(r && g && b));
+}
+
+static void savageUpdateSpecular_s4(GLcontext *ctx) {
+    savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+    uint32_t drawLocalCtrl = imesa->regs.s4.drawLocalCtrl.ui;
+
+    if (NEED_SECONDARY_COLOR(ctx)) {
+	imesa->regs.s4.drawLocalCtrl.ni.specShadeEn = GL_TRUE;
+    } else {
+	imesa->regs.s4.drawLocalCtrl.ni.specShadeEn = GL_FALSE;
+    }
+
+    if (drawLocalCtrl != imesa->regs.s4.drawLocalCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+}
+
+static void savageUpdateSpecular_s3d(GLcontext *ctx) {
+    savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+    uint32_t drawCtrl = imesa->regs.s3d.drawCtrl.ui;
+
+    if (NEED_SECONDARY_COLOR(ctx)) {
+	imesa->regs.s3d.drawCtrl.ni.specShadeEn = GL_TRUE;
+    } else {
+	imesa->regs.s3d.drawCtrl.ni.specShadeEn = GL_FALSE;
+    }
+
+    if (drawCtrl != imesa->regs.s3d.drawCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+}
+
+static void savageDDLightModelfv_s4(GLcontext *ctx, GLenum pname, 
+				    const GLfloat *param)
+{
+    savageUpdateSpecular_s4 (ctx);
+}
+static void savageDDLightModelfv_s3d(GLcontext *ctx, GLenum pname, 
+				     const GLfloat *param)
+{
+    savageUpdateSpecular_s3d (ctx);
+}
+
+static void savageDDShadeModel_s4(GLcontext *ctx, GLuint mod)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+    uint32_t drawLocalCtrl = imesa->regs.s4.drawLocalCtrl.ui;
+
+    if (mod == GL_SMOOTH)  
+    {    
+	imesa->regs.s4.drawLocalCtrl.ni.flatShadeEn = GL_FALSE;
+    }
+    else
+    {
+	imesa->regs.s4.drawLocalCtrl.ni.flatShadeEn = GL_TRUE;
+    }
+
+    if (drawLocalCtrl != imesa->regs.s4.drawLocalCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+}
+static void savageDDShadeModel_s3d(GLcontext *ctx, GLuint mod)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+    uint32_t drawCtrl = imesa->regs.s3d.drawCtrl.ui;
+
+    if (mod == GL_SMOOTH)  
+    {    
+	imesa->regs.s3d.drawCtrl.ni.flatShadeEn = GL_FALSE;
+    }
+    else
+    {
+	imesa->regs.s3d.drawCtrl.ni.flatShadeEn = GL_TRUE;
+    }
+
+    if (drawCtrl != imesa->regs.s3d.drawCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+}
+
+
+/* =============================================================
+ * Fog
+ * The fogCtrl register has the same position and the same layout
+ * on savage3d and savage4. No need for two separate functions.
+ */
+
+static void savageDDFogfv(GLcontext *ctx, GLenum pname, const GLfloat *param)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    GLuint  fogClr;
+    uint32_t fogCtrl = imesa->regs.s4.fogCtrl.ui;
+
+    /*if ((ctx->Fog.Enabled) &&(pname == GL_FOG_COLOR))*/
+    if (ctx->Fog.Enabled)
+    {
+        fogClr = (((GLubyte)(ctx->Fog.Color[0]*255.0F) << 16) |
+                  ((GLubyte)(ctx->Fog.Color[1]*255.0F) << 8) |
+                  ((GLubyte)(ctx->Fog.Color[2]*255.0F) << 0));
+	imesa->regs.s4.fogCtrl.ni.fogEn  = GL_TRUE;
+        /*cheap fog*/
+	imesa->regs.s4.fogCtrl.ni.fogMode  = GL_TRUE;
+	imesa->regs.s4.fogCtrl.ni.fogClr = fogClr;    
+    }    
+    else
+    {
+        /*No fog*/
+        
+	imesa->regs.s4.fogCtrl.ni.fogEn     = 0;
+	imesa->regs.s4.fogCtrl.ni.fogMode   = 0;
+    }
+
+    if (fogCtrl != imesa->regs.s4.fogCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+}
+
+
+static void
+savageDDStencilFuncSeparate(GLcontext *ctx, GLenum face, GLenum func,
+                            GLint ref, GLuint mask)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    unsigned a=0;
+    const uint32_t zBufCtrl = imesa->regs.s4.zBufCtrl.ui;
+    const uint32_t stencilCtrl = imesa->regs.s4.stencilCtrl.ui;
+
+    imesa->regs.s4.zBufCtrl.ni.stencilRefVal = ctx->Stencil.Ref[0] & 0xff;
+    imesa->regs.s4.stencilCtrl.ni.readMask  = ctx->Stencil.ValueMask[0] & 0xff;
+
+    switch (ctx->Stencil.Function[0])
+    {
+    case GL_NEVER: a = CF_Never; break;
+    case GL_ALWAYS: a = CF_Always; break;
+    case GL_LESS: a = CF_Less; break; 
+    case GL_LEQUAL: a = CF_LessEqual; break;
+    case GL_EQUAL: a = CF_Equal; break;
+    case GL_GREATER: a = CF_Greater; break;
+    case GL_GEQUAL: a = CF_GreaterEqual; break;
+    case GL_NOTEQUAL: a = CF_NotEqual; break;
+    default:
+        break;
+    }
+
+    imesa->regs.s4.stencilCtrl.ni.cmpFunc = a;
+
+    if (zBufCtrl != imesa->regs.s4.zBufCtrl.ui ||
+	stencilCtrl != imesa->regs.s4.stencilCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+}
+
+static void
+savageDDStencilMaskSeparate(GLcontext *ctx, GLenum face, GLuint mask)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+
+    if (imesa->regs.s4.stencilCtrl.ni.writeMask != (ctx->Stencil.WriteMask[0] & 0xff)) {
+	imesa->regs.s4.stencilCtrl.ni.writeMask = (ctx->Stencil.WriteMask[0] & 0xff);
+	imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+    }
+}
+
+static unsigned get_stencil_op_value( GLenum op )
+{
+    switch (op)
+    {
+    case GL_KEEP:      return STENCIL_Keep;
+    case GL_ZERO:      return STENCIL_Zero;
+    case GL_REPLACE:   return STENCIL_Equal;
+    case GL_INCR:      return STENCIL_IncClamp;
+    case GL_DECR:      return STENCIL_DecClamp;
+    case GL_INVERT:    return STENCIL_Invert;
+    case GL_INCR_WRAP: return STENCIL_Inc;
+    case GL_DECR_WRAP: return STENCIL_Dec;
+    }
+
+    /* Should *never* get here. */
+    return STENCIL_Keep;
+}
+
+static void
+savageDDStencilOpSeparate(GLcontext *ctx, GLenum face, GLenum fail,
+                          GLenum zfail, GLenum zpass)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    const uint32_t stencilCtrl = imesa->regs.s4.stencilCtrl.ui;
+
+    imesa->regs.s4.stencilCtrl.ni.failOp = get_stencil_op_value( ctx->Stencil.FailFunc[0] );
+    imesa->regs.s4.stencilCtrl.ni.passZfailOp = get_stencil_op_value( ctx->Stencil.ZFailFunc[0] );
+    imesa->regs.s4.stencilCtrl.ni.passZpassOp = get_stencil_op_value( ctx->Stencil.ZPassFunc[0] );
+
+    if (stencilCtrl != imesa->regs.s4.stencilCtrl.ui)
+	imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+}
+
+
+/* =============================================================
+ */
+
+static void savageDDEnable_s4(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    switch(cap) {
+        case GL_ALPHA_TEST:
+            /* we should consider the disable case*/
+            savageBlendFunc_s4(ctx);
+            break;
+        case GL_BLEND:
+            /*add the savageBlendFunc 2001/11/25
+             * if call no such function, then glDisable(GL_BLEND) will do noting,
+             *our chip has no disable bit
+             */ 
+            savageBlendFunc_s4(ctx);
+        case GL_COLOR_LOGIC_OP:
+            /* Fall through: 
+	     * For some reason enable(GL_BLEND) affects ColorLogicOpEnabled.
+             */
+	    FALLBACK (ctx, SAVAGE_FALLBACK_LOGICOP,
+		      (ctx->Color.ColorLogicOpEnabled &&
+		       ctx->Color.LogicOp != GL_COPY));
+            break;
+        case GL_DEPTH_TEST:
+            savageDDDepthFunc_s4(ctx,ctx->Depth.Func);
+            break;
+        case GL_SCISSOR_TEST:
+	    savageDDScissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			    ctx->Scissor.Width, ctx->Scissor.Height);
+            break;
+        case GL_STENCIL_TEST:
+	    if (!imesa->hw_stencil)
+		FALLBACK (ctx, SAVAGE_FALLBACK_STENCIL, state);
+	    else {
+		imesa->regs.s4.stencilCtrl.ni.stencilEn = state;
+		if (ctx->Stencil._Enabled &&
+		    imesa->regs.s4.zBufCtrl.ni.zBufEn != GL_TRUE)
+		{
+		    /* Stencil buffer requires Z enabled. */
+		    imesa->regs.s4.zBufCtrl.ni.zCmpFunc       = CF_Always;
+		    imesa->regs.s4.zBufCtrl.ni.zBufEn         = GL_TRUE;
+		    imesa->regs.s4.drawLocalCtrl.ni.zUpdateEn = GL_FALSE;
+		}
+		imesa->dirty |= SAVAGE_UPLOAD_GLOBAL | SAVAGE_UPLOAD_LOCAL;
+	    }
+            break;
+        case GL_FOG:
+            savageDDFogfv(ctx,0,0);	
+            break;
+        case GL_CULL_FACE:
+#if HW_CULL
+            if (state)
+            {
+                savageDDCullFaceFrontFace(ctx,0);
+            }
+            else
+            {
+		imesa->LcsCullMode = BCM_None;
+		imesa->new_state |= SAVAGE_NEW_CULL;
+            }
+#endif
+            break;
+        case GL_DITHER:
+            if (state)
+            {
+                if ( ctx->Color.DitherFlag )
+                {
+                    imesa->regs.s4.drawCtrl1.ni.ditherEn=GL_TRUE;
+                }
+            }   
+            if (!ctx->Color.DitherFlag )
+            {
+                imesa->regs.s4.drawCtrl1.ni.ditherEn=GL_FALSE;
+            }
+            imesa->dirty |= SAVAGE_UPLOAD_GLOBAL;
+            break;
+ 
+        case GL_LIGHTING:
+	    savageUpdateSpecular_s4 (ctx);
+            break;
+        case GL_TEXTURE_1D:      
+        case GL_TEXTURE_3D:      
+            imesa->new_state |= SAVAGE_NEW_TEXTURE;
+            break;
+        case GL_TEXTURE_2D:      
+            imesa->new_state |= SAVAGE_NEW_TEXTURE;
+            break;
+        default:
+            ; 
+    }    
+}
+static void savageDDEnable_s3d(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    switch(cap) {
+        case GL_ALPHA_TEST:
+            /* we should consider the disable case*/
+            savageBlendFunc_s3d(ctx);
+            break;
+        case GL_BLEND:
+            /*add the savageBlendFunc 2001/11/25
+             * if call no such function, then glDisable(GL_BLEND) will do noting,
+             *our chip has no disable bit
+             */ 
+            savageBlendFunc_s3d(ctx);
+        case GL_COLOR_LOGIC_OP:
+            /* Fall through: 
+	     * For some reason enable(GL_BLEND) affects ColorLogicOpEnabled.
+             */
+	    FALLBACK (ctx, SAVAGE_FALLBACK_LOGICOP,
+		      (ctx->Color.ColorLogicOpEnabled &&
+		       ctx->Color.LogicOp != GL_COPY));
+            break;
+        case GL_DEPTH_TEST:
+            savageDDDepthFunc_s3d(ctx,ctx->Depth.Func);
+            break;
+        case GL_SCISSOR_TEST:
+	    savageDDScissor(ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			    ctx->Scissor.Width, ctx->Scissor.Height);
+            break;
+        case GL_STENCIL_TEST:
+	    FALLBACK (ctx, SAVAGE_FALLBACK_STENCIL, state);
+	    break;
+        case GL_FOG:
+            savageDDFogfv(ctx,0,0);	
+            break;
+        case GL_CULL_FACE:
+#if HW_CULL
+            if (state)
+            {
+                savageDDCullFaceFrontFace(ctx,0);
+            }
+            else
+            {
+                imesa->LcsCullMode = BCM_None;
+		imesa->new_state |= SAVAGE_NEW_CULL;
+            }
+#endif
+            break;
+        case GL_DITHER:
+            if (state)
+            {
+                if ( ctx->Color.DitherFlag )
+                {
+                    imesa->regs.s3d.drawCtrl.ni.ditherEn=GL_TRUE;
+                }
+            }
+            if (!ctx->Color.DitherFlag )
+            {
+                imesa->regs.s3d.drawCtrl.ni.ditherEn=GL_FALSE;
+            }
+            imesa->dirty |= SAVAGE_UPLOAD_LOCAL;
+            break;
+ 
+        case GL_LIGHTING:
+	    savageUpdateSpecular_s3d (ctx);
+            break;
+        case GL_TEXTURE_1D:      
+        case GL_TEXTURE_3D:      
+            imesa->new_state |= SAVAGE_NEW_TEXTURE;
+            break;
+        case GL_TEXTURE_2D:      
+            imesa->new_state |= SAVAGE_NEW_TEXTURE;
+            break;
+        default:
+            ; 
+    }    
+}
+
+void savageDDUpdateHwState( GLcontext *ctx )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+
+    if (imesa->new_state) {
+	savageFlushVertices(imesa);
+	if (imesa->new_state & SAVAGE_NEW_TEXTURE) {
+	    savageUpdateTextureState( ctx );
+	}
+	if ((imesa->new_state & SAVAGE_NEW_CULL)) {
+	    savageUpdateCull(ctx);
+	}
+	imesa->new_state = 0;
+    }
+}
+
+
+static void savageDDPrintDirty( const char *msg, GLuint state )
+{
+    fprintf(stderr, "%s (0x%x): %s%s%s%s%s%s\n",	   
+            msg,
+            (unsigned int) state,
+            (state & SAVAGE_UPLOAD_LOCAL)      ? "upload-local, " : "",
+            (state & SAVAGE_UPLOAD_TEX0)       ? "upload-tex0, " : "",
+            (state & SAVAGE_UPLOAD_TEX1)       ? "upload-tex1, " : "",
+            (state & SAVAGE_UPLOAD_FOGTBL)     ? "upload-fogtbl, " : "",
+            (state & SAVAGE_UPLOAD_GLOBAL)     ? "upload-global, " : "",
+            (state & SAVAGE_UPLOAD_TEXGLOBAL)  ? "upload-texglobal, " : ""
+            );
+}
+
+
+/**
+ * Check if global registers were changed
+ */
+static GLboolean savageGlobalRegChanged (savageContextPtr imesa,
+					 GLuint first, GLuint last) {
+    GLuint i;
+    for (i = first - SAVAGE_FIRST_REG; i <= last - SAVAGE_FIRST_REG; ++i) {
+	if (((imesa->oldRegs.ui[i] ^ imesa->regs.ui[i]) &
+	     imesa->globalRegMask.ui[i]) != 0)
+	    return GL_TRUE;
+    }
+    return GL_FALSE;
+}
+static void savageEmitOldRegs (savageContextPtr imesa,
+			       GLuint first, GLuint last, GLboolean global) {
+    GLuint n = last-first+1;
+    drm_savage_cmd_header_t *cmd = savageAllocCmdBuf(imesa, n*4);
+    cmd->state.cmd = SAVAGE_CMD_STATE;
+    cmd->state.global = global;
+    cmd->state.count = n;
+    cmd->state.start = first;
+    memcpy(cmd+1, &imesa->oldRegs.ui[first-SAVAGE_FIRST_REG], n*4);
+}
+static void savageEmitContiguousRegs (savageContextPtr imesa,
+				      GLuint first, GLuint last) {
+    GLuint i;
+    GLuint n = last-first+1;
+    drm_savage_cmd_header_t *cmd = savageAllocCmdBuf(imesa, n*4);
+    cmd->state.cmd = SAVAGE_CMD_STATE;
+    cmd->state.global = savageGlobalRegChanged(imesa, first, last);
+    cmd->state.count = n;
+    cmd->state.start = first;
+    memcpy(cmd+1, &imesa->regs.ui[first-SAVAGE_FIRST_REG], n*4);
+    /* savageAllocCmdBuf may need to flush the cmd buffer and backup
+     * the current hardware state. It should see the "old" (current)
+     * state that has actually been emitted to the hardware. Therefore
+     * this update is done *after* savageAllocCmdBuf. */
+    for (i = first - SAVAGE_FIRST_REG; i <= last - SAVAGE_FIRST_REG; ++i)
+	imesa->oldRegs.ui[i] = imesa->regs.ui[i];
+    if (SAVAGE_DEBUG & DEBUG_STATE)
+	fprintf (stderr, "Emitting regs 0x%02x-0x%02x\n", first, last);
+}
+static void savageEmitChangedRegs (savageContextPtr imesa,
+				   GLuint first, GLuint last) {
+    GLuint i, firstChanged;
+    firstChanged = SAVAGE_NR_REGS;
+    for (i = first - SAVAGE_FIRST_REG; i <= last - SAVAGE_FIRST_REG; ++i) {
+	if (imesa->oldRegs.ui[i] != imesa->regs.ui[i]) {
+	    if (firstChanged == SAVAGE_NR_REGS)
+		firstChanged = i;
+	} else {
+	    if (firstChanged != SAVAGE_NR_REGS) {
+		savageEmitContiguousRegs (imesa, firstChanged+SAVAGE_FIRST_REG,
+					  i-1+SAVAGE_FIRST_REG);
+		firstChanged = SAVAGE_NR_REGS;
+	    }
+	}
+    }
+    if (firstChanged != SAVAGE_NR_REGS)
+	savageEmitContiguousRegs (imesa, firstChanged+SAVAGE_FIRST_REG,
+				  last);
+}
+static void savageEmitChangedRegChunk (savageContextPtr imesa,
+				       GLuint first, GLuint last) {
+    GLuint i;
+    for (i = first - SAVAGE_FIRST_REG; i <= last - SAVAGE_FIRST_REG; ++i) {
+	if (imesa->oldRegs.ui[i] != imesa->regs.ui[i]) {
+	    savageEmitContiguousRegs (imesa, first, last);
+	    break;
+	}
+    }
+}
+static void savageUpdateRegister_s4(savageContextPtr imesa)
+{
+    /* In case the texture image was changed without changing the
+     * texture address as well, we need to force emitting the texture
+     * address in order to flush texture cashes. */
+    if ((imesa->dirty & SAVAGE_UPLOAD_TEX0) &&
+	imesa->oldRegs.s4.texAddr[0].ui == imesa->regs.s4.texAddr[0].ui)
+	imesa->oldRegs.s4.texAddr[0].ui = 0xffffffff;
+    if ((imesa->dirty & SAVAGE_UPLOAD_TEX1) &&
+	imesa->oldRegs.s4.texAddr[1].ui == imesa->regs.s4.texAddr[1].ui)
+	imesa->oldRegs.s4.texAddr[1].ui = 0xffffffff;
+
+    /* Fix up watermarks */
+    if (imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites) {
+	imesa->regs.s4.destTexWatermarks.ni.destWriteLow = 0;
+	imesa->regs.s4.destTexWatermarks.ni.destFlush = 1;
+    } else
+	imesa->regs.s4.destTexWatermarks.ni.destWriteLow = S4_DWLO;
+    if (imesa->regs.s4.drawLocalCtrl.ni.flushPdZbufWrites)
+	imesa->regs.s4.zWatermarks.ni.wLow = 0;
+    else
+	imesa->regs.s4.zWatermarks.ni.wLow = S4_ZWLO;
+
+    savageEmitChangedRegs (imesa, 0x1e, 0x39);
+
+    imesa->dirty=0;
+}
+static void savageUpdateRegister_s3d(savageContextPtr imesa)
+{
+    /* In case the texture image was changed without changing the
+     * texture address as well, we need to force emitting the texture
+     * address in order to flush texture cashes. */
+    if ((imesa->dirty & SAVAGE_UPLOAD_TEX0) &&
+	imesa->oldRegs.s3d.texAddr.ui == imesa->regs.s3d.texAddr.ui)
+	imesa->oldRegs.s3d.texAddr.ui = 0xffffffff;
+
+    /* Fix up watermarks */
+    if (imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites) {
+	imesa->regs.s3d.destTexWatermarks.ni.destWriteLow = 0;
+	imesa->regs.s3d.destTexWatermarks.ni.destFlush = 1;
+    } else
+	imesa->regs.s3d.destTexWatermarks.ni.destWriteLow = S3D_DWLO;
+    if (imesa->regs.s3d.drawCtrl.ni.flushPdZbufWrites)
+	imesa->regs.s3d.zWatermarks.ni.wLow = 0;
+    else
+	imesa->regs.s3d.zWatermarks.ni.wLow = S3D_ZWLO;
+
+
+    /* the savage3d uses two contiguous ranges of BCI registers:
+     * 0x18-0x1c and 0x20-0x38. Some texture registers need to be
+     * emitted in one chunk or we get some funky rendering errors. */
+    savageEmitChangedRegs (imesa, 0x18, 0x19);
+    savageEmitChangedRegChunk (imesa, 0x1a, 0x1c);
+    savageEmitChangedRegs (imesa, 0x20, 0x38);
+
+    imesa->dirty=0;
+}
+
+
+void savageEmitOldState( savageContextPtr imesa )
+{
+    assert(imesa->cmdBuf.write == imesa->cmdBuf.base);
+    if (imesa->savageScreen->chipset >= S3_SAVAGE4) {
+	savageEmitOldRegs (imesa, 0x1e, 0x39, GL_TRUE);
+    } else {
+	savageEmitOldRegs (imesa, 0x18, 0x1c, GL_TRUE);
+	savageEmitOldRegs (imesa, 0x20, 0x38, GL_FALSE);
+    }
+}
+
+
+/* Push the state into the sarea and/or texture memory.
+ */
+void savageEmitChangedState( savageContextPtr imesa )
+{
+    if (SAVAGE_DEBUG & DEBUG_VERBOSE_API)
+        savageDDPrintDirty( "\n\n\nsavageEmitHwStateLocked", imesa->dirty );
+
+    if (imesa->dirty)
+    {
+	if (SAVAGE_DEBUG & DEBUG_VERBOSE_MSG)
+	    fprintf (stderr, "... emitting state\n");
+	if (imesa->savageScreen->chipset >= S3_SAVAGE4)
+	    savageUpdateRegister_s4(imesa);
+	else
+	    savageUpdateRegister_s3d(imesa);
+     }
+
+    imesa->dirty = 0;
+}
+
+
+static void savageDDInitState_s4( savageContextPtr imesa )
+{
+#if 1
+    imesa->regs.s4.destCtrl.ui          = 1<<7;
+#endif
+
+    imesa->regs.s4.zBufCtrl.ni.zCmpFunc = CF_Less;
+    imesa->regs.s4.zBufCtrl.ni.wToZEn               = GL_TRUE;
+    if (imesa->float_depth) {
+	imesa->regs.s4.zBufCtrl.ni.zExpOffset =
+	    imesa->savageScreen->zpp == 2 ? 16 : 32;
+	imesa->regs.s4.zBufCtrl.ni.floatZEn = GL_TRUE;
+    } else {
+	imesa->regs.s4.zBufCtrl.ni.zExpOffset = 0;
+	imesa->regs.s4.zBufCtrl.ni.floatZEn = GL_FALSE;
+    }
+    imesa->regs.s4.texBlendCtrl[0].ui            = TBC_NoTexMap;
+    imesa->regs.s4.texBlendCtrl[1].ui            = TBC_NoTexMap1;
+    imesa->regs.s4.drawCtrl0.ui         = 0;
+#if 0
+    imesa->regs.s4.drawCtrl1.ni.xyOffsetEn = 1;
+#endif
+
+    /* Set DestTexWatermarks_31,30 to 01 always.
+     *Has no effect if dest. flush is disabled.
+     */
+#if 0
+    imesa->regs.s4.zWatermarks.ui       = 0x12000C04;
+    imesa->regs.s4.destTexWatermarks.ui = 0x40200400;
+#else
+    /*imesa->regs.s4.zWatermarks.ui       = 0x16001808;*/
+    imesa->regs.s4.zWatermarks.ni.rLow  = S4_ZRLO;
+    imesa->regs.s4.zWatermarks.ni.rHigh = S4_ZRHI;
+    imesa->regs.s4.zWatermarks.ni.wLow  = S4_ZWLO;
+    imesa->regs.s4.zWatermarks.ni.wHigh = S4_ZWHI;
+    /*imesa->regs.s4.destTexWatermarks.ui = 0x4f000000;*/
+    imesa->regs.s4.destTexWatermarks.ni.destReadLow   = S4_DRLO;
+    imesa->regs.s4.destTexWatermarks.ni.destReadHigh  = S4_DRHI;
+    imesa->regs.s4.destTexWatermarks.ni.destWriteLow  = S4_DWLO;
+    imesa->regs.s4.destTexWatermarks.ni.destWriteHigh = S4_DWHI;
+    imesa->regs.s4.destTexWatermarks.ni.texRead       = S4_TR;
+    imesa->regs.s4.destTexWatermarks.ni.destFlush     = 1;
+#endif
+    imesa->regs.s4.drawCtrl0.ni.dPerfAccelEn = GL_TRUE;
+
+    /* clrCmpAlphaBlendCtrl is needed to get alphatest and
+     * alpha blending working properly
+     */
+
+    imesa->regs.s4.texCtrl[0].ni.dBias                 = 0x08;
+    imesa->regs.s4.texCtrl[1].ni.dBias                 = 0x08;
+    imesa->regs.s4.texCtrl[0].ni.texXprEn              = GL_TRUE;
+    imesa->regs.s4.texCtrl[1].ni.texXprEn              = GL_TRUE;
+    imesa->regs.s4.texCtrl[0].ni.dMax                  = 0x0f;
+    imesa->regs.s4.texCtrl[1].ni.dMax                  = 0x0f;
+    /* programm a valid tex address, in case texture state is emitted
+     * in wrong order. */
+    if (imesa->lastTexHeap == 2 && imesa->savageScreen->textureSize[1]) {
+	/* AGP textures available */
+	imesa->regs.s4.texAddr[0].ui = imesa->savageScreen->textureOffset[1]|3;
+	imesa->regs.s4.texAddr[1].ui = imesa->savageScreen->textureOffset[1]|3;
+    } else {
+	/* no AGP textures available, use local */
+	imesa->regs.s4.texAddr[0].ui = imesa->savageScreen->textureOffset[0]|2;
+	imesa->regs.s4.texAddr[1].ui = imesa->savageScreen->textureOffset[0]|2;
+    }
+    imesa->regs.s4.drawLocalCtrl.ni.drawUpdateEn     = GL_TRUE;
+    imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode    = SAM_One;
+    imesa->regs.s4.drawLocalCtrl.ni.wrZafterAlphaTst = GL_FALSE;
+    imesa->regs.s4.drawLocalCtrl.ni.flushPdZbufWrites= GL_TRUE;
+    imesa->regs.s4.drawLocalCtrl.ni.flushPdDestWrites= GL_TRUE;
+
+    imesa->regs.s4.drawLocalCtrl.ni.zUpdateEn= GL_TRUE;
+    imesa->regs.s4.drawCtrl1.ni.ditherEn = (
+	driQueryOptioni(&imesa->optionCache, "color_reduction") ==
+	DRI_CONF_COLOR_REDUCTION_DITHER) ? GL_TRUE : GL_FALSE;
+    imesa->regs.s4.drawCtrl1.ni.cullMode             = BCM_None;
+
+    imesa->regs.s4.zBufCtrl.ni.stencilRefVal      = 0x00;
+
+    imesa->regs.s4.stencilCtrl.ni.stencilEn       = GL_FALSE;
+    imesa->regs.s4.stencilCtrl.ni.cmpFunc         = CF_Always;
+    imesa->regs.s4.stencilCtrl.ni.failOp          = STENCIL_Keep;
+    imesa->regs.s4.stencilCtrl.ni.passZfailOp     = STENCIL_Keep;
+    imesa->regs.s4.stencilCtrl.ni.passZpassOp     = STENCIL_Keep;
+    imesa->regs.s4.stencilCtrl.ni.writeMask       = 0xff;
+    imesa->regs.s4.stencilCtrl.ni.readMask        = 0xff;
+
+    imesa->LcsCullMode=BCM_None;
+    imesa->regs.s4.texDescr.ni.palSize               = TPS_256;
+
+    /* clear the local registers in the global reg mask */
+    imesa->globalRegMask.s4.drawLocalCtrl.ui   = 0;
+    imesa->globalRegMask.s4.texPalAddr.ui      = 0;
+    imesa->globalRegMask.s4.texCtrl[0].ui      = 0;
+    imesa->globalRegMask.s4.texCtrl[1].ui      = 0;
+    imesa->globalRegMask.s4.texAddr[0].ui      = 0;
+    imesa->globalRegMask.s4.texAddr[1].ui      = 0;
+    imesa->globalRegMask.s4.texBlendCtrl[0].ui = 0;
+    imesa->globalRegMask.s4.texBlendCtrl[1].ui = 0;
+    imesa->globalRegMask.s4.texXprClr.ui       = 0;
+    imesa->globalRegMask.s4.texDescr.ui        = 0;
+}
+static void savageDDInitState_s3d( savageContextPtr imesa )
+{
+#if 1
+    imesa->regs.s3d.destCtrl.ui           = 1<<7;
+#endif
+
+    imesa->regs.s3d.zBufCtrl.ni.zCmpFunc  = CF_Less;
+#if 0
+    imesa->regs.s3d.drawCtrl.ni.xyOffsetEn = 1;
+#endif
+
+    /* Set DestTexWatermarks_31,30 to 01 always.
+     *Has no effect if dest. flush is disabled.
+     */
+#if 0
+    imesa->regs.s3d.zWatermarks.ui       = 0x12000C04;
+    imesa->regs.s3d.destTexWatermarks.ui = 0x40200400;
+#else
+    /*imesa->regs.s3d.zWatermarks.ui       = 0x16001808;*/
+    imesa->regs.s3d.zWatermarks.ni.rLow  = S3D_ZRLO;
+    imesa->regs.s3d.zWatermarks.ni.rHigh = S3D_ZRHI;
+    imesa->regs.s3d.zWatermarks.ni.wLow  = S3D_ZWLO;
+    imesa->regs.s3d.zWatermarks.ni.wHigh = S3D_ZWHI;
+    /*imesa->regs.s3d.destTexWatermarks.ui = 0x4f000000;*/
+    imesa->regs.s3d.destTexWatermarks.ni.destReadLow   = S3D_DRLO;
+    imesa->regs.s3d.destTexWatermarks.ni.destReadHigh  = S3D_DRHI;
+    imesa->regs.s3d.destTexWatermarks.ni.destWriteLow  = S3D_DWLO;
+    imesa->regs.s3d.destTexWatermarks.ni.destWriteHigh = S3D_DWHI;
+    imesa->regs.s3d.destTexWatermarks.ni.texRead       = S3D_TR;
+    imesa->regs.s3d.destTexWatermarks.ni.destFlush     = 1;
+#endif
+
+    imesa->regs.s3d.texCtrl.ni.dBias          = 0x08;
+    imesa->regs.s3d.texCtrl.ni.texXprEn       = GL_TRUE;
+    /* texXprEn is needed to get alphatest and alpha blending working
+     * properly. However, this makes texels with color texXprClr
+     * completely transparent in some texture environment modes. I
+     * couldn't find a way to disable this. So choose an arbitrary and
+     * improbable color. (0 is a bad choice, makes all black texels
+     * transparent.) */
+    imesa->regs.s3d.texXprClr.ui              = 0x26ae26ae;
+    /* programm a valid tex address, in case texture state is emitted
+     * in wrong order. */
+    if (imesa->lastTexHeap == 2 && imesa->savageScreen->textureSize[1]) {
+	/* AGP textures available */
+	imesa->regs.s3d.texAddr.ui = imesa->savageScreen->textureOffset[1]|3;
+    } else {
+	/* no AGP textures available, use local */
+	imesa->regs.s3d.texAddr.ui = imesa->savageScreen->textureOffset[0]|2;
+    }
+
+    imesa->regs.s3d.zBufCtrl.ni.drawUpdateEn     = GL_TRUE;
+    imesa->regs.s3d.zBufCtrl.ni.wrZafterAlphaTst = GL_FALSE;
+    imesa->regs.s3d.zBufCtrl.ni.zUpdateEn        = GL_TRUE;
+
+    imesa->regs.s3d.drawCtrl.ni.srcAlphaMode      = SAM_One;
+    imesa->regs.s3d.drawCtrl.ni.flushPdZbufWrites = GL_TRUE;
+    imesa->regs.s3d.drawCtrl.ni.flushPdDestWrites = GL_TRUE;
+
+    imesa->regs.s3d.drawCtrl.ni.ditherEn =  (
+	driQueryOptioni(&imesa->optionCache, "color_reduction") ==
+	DRI_CONF_COLOR_REDUCTION_DITHER) ? GL_TRUE : GL_FALSE;
+    imesa->regs.s3d.drawCtrl.ni.cullMode          = BCM_None;
+
+    imesa->LcsCullMode = BCM_None;
+    imesa->regs.s3d.texDescr.ni.palSize          = TPS_256;
+
+    /* clear the local registers in the global reg mask */
+    imesa->globalRegMask.s3d.texPalAddr.ui = 0;
+    imesa->globalRegMask.s3d.texXprClr.ui  = 0;
+    imesa->globalRegMask.s3d.texAddr.ui    = 0;
+    imesa->globalRegMask.s3d.texDescr.ui   = 0;
+    imesa->globalRegMask.s3d.texCtrl.ui    = 0;
+
+    imesa->globalRegMask.s3d.fogCtrl.ui = 0;
+
+    /* drawCtrl is local with some exceptions */
+    imesa->globalRegMask.s3d.drawCtrl.ui = 0;
+    imesa->globalRegMask.s3d.drawCtrl.ni.cullMode = 0x3;
+    imesa->globalRegMask.s3d.drawCtrl.ni.alphaTestCmpFunc = 0x7;
+    imesa->globalRegMask.s3d.drawCtrl.ni.alphaTestEn = 0x1;
+    imesa->globalRegMask.s3d.drawCtrl.ni.alphaRefVal = 0xff;
+
+    /* zBufCtrl is local with some exceptions */
+    imesa->globalRegMask.s3d.zBufCtrl.ui = 0;
+    imesa->globalRegMask.s3d.zBufCtrl.ni.zCmpFunc = 0x7;
+    imesa->globalRegMask.s3d.zBufCtrl.ni.zBufEn = 0x1;
+}
+void savageDDInitState( savageContextPtr imesa ) {
+    memset (imesa->regs.ui, 0, SAVAGE_NR_REGS*sizeof(uint32_t));
+    memset (imesa->globalRegMask.ui, 0xff, SAVAGE_NR_REGS*sizeof(uint32_t));
+    if (imesa->savageScreen->chipset >= S3_SAVAGE4)
+	savageDDInitState_s4 (imesa);
+    else
+	savageDDInitState_s3d (imesa);
+
+    /*fprintf(stderr,"DBflag:%d\n",imesa->glCtx->Visual->DBflag);*/
+    /* zbufoffset and destctrl have the same position and layout on
+     * savage4 and savage3d. */
+    if (imesa->glCtx->Visual.doubleBufferMode) {
+	imesa->IsDouble = GL_TRUE;
+	imesa->toggle = TARGET_BACK;
+	imesa->regs.s4.destCtrl.ni.offset =
+	    imesa->savageScreen->backOffset>>11;
+    } else {
+	imesa->IsDouble = GL_FALSE;
+	imesa->toggle = TARGET_FRONT;
+	imesa->regs.s4.destCtrl.ni.offset =
+	    imesa->savageScreen->frontOffset>>11;
+    }
+    if(imesa->savageScreen->cpp == 2) {
+        imesa->regs.s4.destCtrl.ni.dstPixFmt = 0;
+        imesa->regs.s4.destCtrl.ni.dstWidthInTile =
+            (imesa->savageScreen->width+63)>>6;
+    } else {
+        imesa->regs.s4.destCtrl.ni.dstPixFmt = 1;
+        imesa->regs.s4.destCtrl.ni.dstWidthInTile =
+            (imesa->savageScreen->width+31)>>5;
+    }
+    imesa->NotFirstFrame = GL_FALSE;
+
+    imesa->regs.s4.zBufOffset.ni.offset=imesa->savageScreen->depthOffset>>11;
+    if(imesa->savageScreen->zpp == 2) {
+        imesa->regs.s4.zBufOffset.ni.zBufWidthInTiles = 
+            (imesa->savageScreen->width+63)>>6;
+        imesa->regs.s4.zBufOffset.ni.zDepthSelect = 0;
+    } else {   
+        imesa->regs.s4.zBufOffset.ni.zBufWidthInTiles = 
+            (imesa->savageScreen->width+31)>>5;
+        imesa->regs.s4.zBufOffset.ni.zDepthSelect = 1;      
+    }
+
+    memcpy (imesa->oldRegs.ui, imesa->regs.ui, SAVAGE_NR_REGS*sizeof(uint32_t));
+
+    /* Emit the initial state to the (empty) command buffer. */
+    assert (imesa->cmdBuf.write == imesa->cmdBuf.base);
+    savageEmitOldState(imesa);
+    imesa->cmdBuf.start = imesa->cmdBuf.write;
+}
+
+
+#define INTERESTED (~(NEW_MODELVIEW|NEW_PROJECTION|\
+                      NEW_TEXTURE_MATRIX|\
+                      NEW_USER_CLIP|NEW_CLIENT_STATE))
+
+static void savageDDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   SAVAGE_CONTEXT(ctx)->new_gl_state |= new_state;
+}
+
+
+void savageDDInitStateFuncs(GLcontext *ctx)
+{
+    ctx->Driver.UpdateState = savageDDInvalidateState;
+    ctx->Driver.BlendEquationSeparate = savageDDBlendEquationSeparate;
+    ctx->Driver.Fogfv = savageDDFogfv;
+    ctx->Driver.Scissor = savageDDScissor;
+#if HW_CULL
+    ctx->Driver.CullFace = savageDDCullFaceFrontFace;
+    ctx->Driver.FrontFace = savageDDCullFaceFrontFace;
+#else
+    ctx->Driver.CullFace = 0;
+    ctx->Driver.FrontFace = 0;
+#endif /* end #if HW_CULL */
+    ctx->Driver.DrawBuffer = savageDDDrawBuffer;
+    ctx->Driver.ReadBuffer = savageDDReadBuffer;
+    ctx->Driver.ClearColor = savageDDClearColor;
+
+    ctx->Driver.DepthRange = savageDepthRange;
+    ctx->Driver.Viewport = savageViewport;
+    ctx->Driver.RenderMode = savageRenderMode;
+
+    if (SAVAGE_CONTEXT( ctx )->savageScreen->chipset >= S3_SAVAGE4) {
+	ctx->Driver.Enable = savageDDEnable_s4;
+	ctx->Driver.AlphaFunc = savageDDAlphaFunc_s4;
+	ctx->Driver.DepthFunc = savageDDDepthFunc_s4;
+	ctx->Driver.DepthMask = savageDDDepthMask_s4;
+	ctx->Driver.BlendFuncSeparate = savageDDBlendFuncSeparate_s4;
+	ctx->Driver.ColorMask = savageDDColorMask_s4;
+	ctx->Driver.ShadeModel = savageDDShadeModel_s4;
+	ctx->Driver.LightModelfv = savageDDLightModelfv_s4;
+	ctx->Driver.StencilFuncSeparate = savageDDStencilFuncSeparate;
+	ctx->Driver.StencilMaskSeparate = savageDDStencilMaskSeparate;
+	ctx->Driver.StencilOpSeparate = savageDDStencilOpSeparate;
+    } else {
+	ctx->Driver.Enable = savageDDEnable_s3d;
+	ctx->Driver.AlphaFunc = savageDDAlphaFunc_s3d;
+	ctx->Driver.DepthFunc = savageDDDepthFunc_s3d;
+	ctx->Driver.DepthMask = savageDDDepthMask_s3d;
+	ctx->Driver.BlendFuncSeparate = savageDDBlendFuncSeparate_s3d;
+	ctx->Driver.ColorMask = savageDDColorMask_s3d;
+	ctx->Driver.ShadeModel = savageDDShadeModel_s3d;
+	ctx->Driver.LightModelfv = savageDDLightModelfv_s3d;
+	ctx->Driver.StencilFuncSeparate = NULL;
+	ctx->Driver.StencilMaskSeparate = NULL;
+	ctx->Driver.StencilOpSeparate = NULL;
+    }
+}
diff --git a/src/mesa/drivers/dri/savage/savagestate.h b/src/mesa/drivers/dri/savage/savagestate.h
new file mode 100644
index 0000000000..5fe718d7a6
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagestate.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _SAVAGE_STATE_H
+#define _SAVAGE_STATE_H
+
+#include "savagecontext.h"
+
+void savageCalcViewport( GLcontext *ctx );
+void savageEmitOldState( savageContextPtr imesa );
+void savageEmitChangedState( savageContextPtr imesa );
+
+extern void savageDDUpdateHwState( GLcontext *ctx );
+extern void savageDDInitState( savageContextPtr imesa );
+extern void savageDDInitStateFuncs( GLcontext *ctx );
+extern void savageDDRenderStart(GLcontext *ctx);
+extern void savageDDRenderEnd(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/savage/savagetex.c b/src/mesa/drivers/dri/savage/savagetex.c
new file mode 100644
index 0000000000..1523af4065
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagetex.c
@@ -0,0 +1,2126 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "main/context.h"
+#include "main/mm.h"
+#include "main/macros.h"
+#include "main/texstore.h"
+#include "main/texobj.h"
+#include "main/convolve.h"
+#include "main/colormac.h"
+#include "main/simple_list.h"
+#include "main/enums.h"
+
+#include "savagecontext.h"
+#include "savagetex.h"
+#include "savagetris.h"
+#include "savageioctl.h"
+#include "savage_bci.h"
+
+#include "xmlpool.h"
+
+#define TILE_INDEX_DXT1 0
+#define TILE_INDEX_8    1
+#define TILE_INDEX_16   2
+#define TILE_INDEX_DXTn 3
+#define TILE_INDEX_32   4
+
+/* On Savage4 the texure LOD-bias needs an offset of ~ 0.3 to get
+ * somewhere close to software rendering.
+ */
+#define SAVAGE4_LOD_OFFSET 10
+
+/* Tile info for S3TC formats counts in 4x4 blocks instead of texels.
+ * In DXT1 each block is encoded in 64 bits. In DXT3 and 5 each block is
+ * encoded in 128 bits. */
+
+/* Size 1, 2 and 4 images are packed into the last subtile. Each image
+ * is repeated to fill a 4x4 pixel area. The figure below shows the
+ * layout of those 4x4 pixel areas in the 8x8 subtile.
+ *
+ *    4 2
+ *    x 1
+ *
+ * Yuck! 8-bit texture formats use 4x8 subtiles. See below.
+ */
+static const savageTileInfo tileInfo_pro[5] = {
+    {16, 16, 16, 8, 1, 2, {0x18, 0x10}}, /* DXT1 */
+    {64, 32, 16, 4, 4, 8, {0x30, 0x20}}, /* 8-bit */
+    {64, 16,  8, 2, 8, 8, {0x48, 0x08}}, /* 16-bit */
+    {16,  8, 16, 4, 1, 2, {0x30, 0x20}}, /* DXT3, DXT5 */
+    {32, 16,  4, 2, 8, 8, {0x90, 0x10}}, /* 32-bit */
+};
+
+/* Size 1, 2 and 4 images are packed into the last two subtiles. Each
+ * image is repeated to fill a 4x4 pixel area. The figures below show
+ * the layout of those 4x4 pixel areas in the two 4x8 subtiles.
+ *
+ * second last subtile: 4   last subtile: 2
+ *                      x                 1
+ */
+static const savageTileInfo tileInfo_s3d_s4[5] = {
+    {16, 16, 16, 8, 1, 2, {0x18, 0x10}}, /* DXT1 */
+    {64, 32, 16, 4, 4, 8, {0x30, 0x20}}, /* 8-bit */
+    {64, 16, 16, 2, 4, 8, {0x60, 0x40}}, /* 16-bit */
+    {16,  8, 16, 4, 1, 2, {0x30, 0x20}}, /* DXT3, DXT5 */
+    {32, 16,  8, 2, 4, 8, {0xc0, 0x80}}, /* 32-bit */
+};
+
+/** \brief Template for subtile uploads.
+ * \param h   height in pixels
+ * \param w   width in bytes
+ */
+#define SUBTILE_FUNC(w,h)					\
+static INLINE GLubyte *savageUploadSubtile_##w##x##h		\
+(GLubyte *dest, GLubyte *src, GLuint srcStride)			\
+{								\
+    GLuint y;							\
+    for (y = 0; y < h; ++y) {					\
+	memcpy (dest, src, w);					\
+	src += srcStride;					\
+	dest += w;						\
+    }								\
+    return dest;						\
+}
+
+SUBTILE_FUNC(2, 8) /* 4 bits per pixel, 4 pixels wide */
+SUBTILE_FUNC(4, 8)
+SUBTILE_FUNC(8, 8)
+SUBTILE_FUNC(16, 8)
+SUBTILE_FUNC(32, 8) /* 4 bytes per pixel, 8 pixels wide */
+
+SUBTILE_FUNC(8, 2) /* DXT1 */
+SUBTILE_FUNC(16, 2) /* DXT3 and DXT5 */
+
+/** \brief Upload a complete tile from src (srcStride) to dest
+ *
+ * \param tileInfo     Pointer to tiling information
+ * \param wInSub       Width of source/dest image in subtiles
+ * \param hInSub       Height of source/dest image in subtiles
+ * \param bpp          Bytes per pixel
+ * \param src          Pointer to source data
+ * \param srcStride    Byte stride of rows in the source data
+ * \param dest         Pointer to destination
+ *
+ * Writes linearly to the destination memory in order to exploit write
+ * combining.
+ *
+ * For a complete tile wInSub and hInSub are set to the same values as
+ * in tileInfo. If the source image is smaller than a whole tile in
+ * one or both dimensions then they are set to the values of the
+ * source image. This only works as long as the source image is bigger
+ * than 8x8 pixels.
+ */
+static void savageUploadTile (const savageTileInfo *tileInfo,
+			      GLuint wInSub, GLuint hInSub, GLuint bpp,
+			      GLubyte *src, GLuint srcStride, GLubyte *dest) {
+    GLuint subStride = tileInfo->subWidth * bpp;
+    GLubyte *srcSRow = src, *srcSTile = src;
+    GLubyte *(*subtileFunc) (GLubyte *, GLubyte *, GLuint);
+    GLuint sx, sy;
+    switch (subStride) {
+    case  2: subtileFunc = savageUploadSubtile_2x8; break;
+    case  4: subtileFunc = savageUploadSubtile_4x8; break;
+    case  8: subtileFunc = tileInfo->subHeight == 8 ?
+		 savageUploadSubtile_8x8 : savageUploadSubtile_8x2; break;
+    case 16: subtileFunc = tileInfo->subHeight == 8 ?
+		 savageUploadSubtile_16x8 : savageUploadSubtile_16x2; break;
+    case 32: subtileFunc = savageUploadSubtile_32x8; break;
+    default: assert(0);
+    }
+    for (sy = 0; sy < hInSub; ++sy) {
+	srcSTile = srcSRow;
+	for (sx = 0; sx < wInSub; ++sx) {
+	    src = srcSTile;
+	    dest = subtileFunc (dest, src, srcStride);
+	    srcSTile += subStride;
+	}
+	srcSRow += srcStride * tileInfo->subHeight;
+    }
+}
+
+/** \brief Upload a image that is smaller than 8 pixels in either dimension.
+ *
+ * \param tileInfo    Pointer to tiling information
+ * \param width       Width of the image
+ * \param height      Height of the image
+ * \param bpp         Bytes per pixel
+ * \param src         Pointer to source data
+ * \param dest        Pointer to destination
+ *
+ * This function handles all the special cases that need to be taken
+ * care off. The caller may need to call this function multiple times
+ * with the destination offset in different ways since small texture
+ * images must be repeated in order to fill a whole tile (or 4x4 for
+ * the last 3 levels).
+ *
+ * FIXME: Repeating inside this function would be more efficient.
+ */
+static void savageUploadTiny (const savageTileInfo *tileInfo,
+			      GLuint pixWidth, GLuint pixHeight,
+			      GLuint width, GLuint height, GLuint bpp,
+			      GLubyte *src, GLubyte *dest) {
+    GLuint size = MAX2(pixWidth, pixHeight);
+
+    if (width > tileInfo->subWidth) { /* assert: height <= subtile height */
+	GLuint wInSub = width / tileInfo->subWidth;
+	GLuint srcStride = width * bpp;
+	GLuint subStride = tileInfo->subWidth * bpp;
+	GLuint subSkip = (tileInfo->subHeight - height) * subStride;
+	GLubyte *srcSTile = src;
+	GLuint sx, y;
+	for (sx = 0; sx < wInSub; ++sx) {
+	    src = srcSTile;
+	    for (y = 0; y < height; ++y) {
+		memcpy (dest, src, subStride);
+		src += srcStride;
+		dest += subStride;
+	    }
+	    dest += subSkip;
+	    srcSTile += subStride;
+	}
+    } else if (size > 4) { /* a tile or less wide, except the last 3 levels */
+	GLuint srcStride = width * bpp;
+	GLuint subStride = tileInfo->subWidth * bpp;
+	/* if the subtile width is 4 we have to skip every other subtile */
+	GLuint subSkip = tileInfo->subWidth <= 4 ?
+	    subStride * tileInfo->subHeight : 0;
+	GLuint skipRemainder = tileInfo->subHeight - 1;
+	GLuint y;
+	for (y = 0; y < height; ++y) {
+	    memcpy (dest, src, srcStride);
+	    src += srcStride;
+	    dest += subStride;
+	    if ((y & skipRemainder) == skipRemainder)
+		dest += subSkip;
+	}
+    } else { /* the last 3 mipmap levels */
+	GLuint offset = (size <= 2 ? tileInfo->tinyOffset[size-1] : 0);
+	GLuint subStride = tileInfo->subWidth * bpp;
+	GLuint y;
+	dest += offset;
+	for (y = 0; y < height; ++y) {
+	    memcpy (dest, src, bpp*width);
+	    src += width * bpp;
+	    dest += subStride;
+	}
+    }
+}
+
+/** \brief Upload an image from mesa's internal copy.
+ */
+static void savageUploadTexLevel( savageTexObjPtr t, int level )
+{
+    const struct gl_texture_image *image = t->base.tObj->Image[0][level];
+    const savageTileInfo *tileInfo = t->tileInfo;
+    GLuint pixWidth = image->Width2, pixHeight = image->Height2;
+    GLuint bpp = t->texelBytes;
+    GLuint width, height;
+
+    /* FIXME: Need triangle (rather than pixel) fallbacks to simulate
+     * this using normal textured triangles.
+     *
+     * DO THIS IN DRIVER STATE MANAGMENT, not hardware state.
+     */
+    if(image->Border != 0) 
+	fprintf (stderr, "Not supported texture border %d.\n",
+		 (int) image->Border);
+
+    if (t->hwFormat == TFT_S3TC4A4Bit || t->hwFormat == TFT_S3TC4CA4Bit ||
+	t->hwFormat == TFT_S3TC4Bit) {
+	width = (pixWidth+3) / 4;
+	height = (pixHeight+3) / 4;
+    } else {
+	width = pixWidth;
+	height = pixHeight;
+    }
+
+    if (pixWidth >= 8 && pixHeight >= 8) {
+	GLuint *dirtyPtr = t->image[level].dirtyTiles;
+	GLuint dirtyMask = 1;
+
+	if (width >= tileInfo->width && height >= tileInfo->height) {
+	    GLuint wInTiles = width / tileInfo->width;
+	    GLuint hInTiles = height / tileInfo->height;
+	    GLubyte *srcTRow = image->Data, *src;
+	    GLubyte *dest = (GLubyte *)(t->bufAddr + t->image[level].offset);
+	    GLuint x, y;
+	    for (y = 0; y < hInTiles; ++y) {
+		src = srcTRow;
+		for (x = 0; x < wInTiles; ++x) {
+		    if (*dirtyPtr & dirtyMask) {
+			savageUploadTile (tileInfo,
+					  tileInfo->wInSub, tileInfo->hInSub,
+					  bpp, src, width * bpp, dest);
+		    }
+		    src += tileInfo->width * bpp;
+		    dest += 2048; /* tile size is always 2k */
+		    if (dirtyMask == 1<<31) {
+			dirtyMask = 1;
+			dirtyPtr++;
+		    } else
+			dirtyMask <<= 1;
+		}
+		srcTRow += width * tileInfo->height * bpp;
+	    }
+	} else if (width >= tileInfo->width) {
+	    GLuint wInTiles = width / tileInfo->width;
+	    GLubyte *src = image->Data;
+	    GLubyte *dest = (GLubyte *)(t->bufAddr + t->image[level].offset);
+	    GLuint tileStride = tileInfo->width * bpp * height;
+	    savageContextPtr imesa = (savageContextPtr)t->base.heap->driverContext;
+	    GLuint x;
+	    /* Savage3D-based chips seem so use a constant tile stride
+	     * of 2048 for vertically incomplete tiles, but only if
+	     * the color depth is 32bpp. Nobody said this was supposed
+	     * to be logical!
+	     */
+	    if (bpp == 4 && imesa->savageScreen->chipset < S3_SAVAGE4)
+		tileStride = 2048;
+	    for (x = 0; x < wInTiles; ++x) {
+		if (*dirtyPtr & dirtyMask) {
+		    savageUploadTile (tileInfo,
+				      tileInfo->wInSub,
+				      height / tileInfo->subHeight,
+				      bpp, src, width * bpp, dest);
+		}
+		src += tileInfo->width * bpp;
+		dest += tileStride;
+		if (dirtyMask == 1<<31) {
+		    dirtyMask = 1;
+		    dirtyPtr++;
+		} else
+		    dirtyMask <<= 1;
+	    }
+	} else {
+	    savageUploadTile (tileInfo, width / tileInfo->subWidth,
+			      height / tileInfo->subHeight, bpp,
+			      image->Data, width * bpp,
+			      (GLubyte *)(t->bufAddr+t->image[level].offset));
+	}
+    } else {
+	GLuint minHeight, minWidth, hRepeat, vRepeat, x, y;
+	if (t->hwFormat == TFT_S3TC4A4Bit || t->hwFormat == TFT_S3TC4CA4Bit ||
+	    t->hwFormat == TFT_S3TC4Bit)
+	    minWidth = minHeight = 1;
+	else
+	    minWidth = minHeight = 4;
+	if (width > minWidth || height > minHeight) {
+	    minWidth = tileInfo->subWidth;
+	    minHeight = tileInfo->subHeight;
+	}
+	hRepeat = width  >= minWidth  ? 1 : minWidth  / width;
+	vRepeat = height >= minHeight ? 1 : minHeight / height;
+	for (y = 0; y < vRepeat; ++y) {
+	    GLuint offset = y * tileInfo->subWidth*height * bpp;
+	    for (x = 0; x < hRepeat; ++x) {
+		savageUploadTiny (tileInfo, pixWidth, pixHeight,
+				  width, height, bpp, image->Data,
+				  (GLubyte *)(t->bufAddr +
+					      t->image[level].offset+offset));
+		offset += width * bpp;
+	    }
+	}
+    }
+}
+
+/** \brief Compute the destination size of a texture image
+ */
+static GLuint savageTexImageSize (GLuint width, GLuint height, GLuint bpp) {
+    /* full subtiles */
+    if (width >= 8 && height >= 8)
+	return width * height * bpp;
+    /* special case for the last three mipmap levels: the hardware computes
+     * the offset internally */
+    else if (width <= 4 && height <= 4)
+	return 0;
+    /* partially filled sub tiles waste memory
+     * on Savage3D and Savage4 with subtile width 4 every other subtile is
+     * skipped if width < 8 so we can assume a uniform subtile width of 8 */
+    else if (width >= 8)
+	return width * 8 * bpp;
+    else if (height >= 8)
+	return 8 * height * bpp;
+    else
+	return 64 * bpp;
+}
+
+/** \brief Compute the destination size of a compressed texture image
+ */
+static GLuint savageCompressedTexImageSize (GLuint width, GLuint height,
+					    GLuint bpp) {
+    width = (width+3) / 4;
+    height = (height+3) / 4;
+    /* full subtiles */
+    if (width >= 2 && height >= 2)
+	return width * height * bpp;
+    /* special case for the last three mipmap levels: the hardware computes
+     * the offset internally */
+    else if (width <= 1 && height <= 1)
+	return 0;
+    /* partially filled sub tiles waste memory
+     * on Savage3D and Savage4 with subtile width 4 every other subtile is
+     * skipped if width < 8 so we can assume a uniform subtile width of 8 */
+    else if (width >= 2)
+	return width * 2 * bpp;
+    else if (height >= 2)
+	return 2 * height * bpp;
+    else
+	return 4 * bpp;
+}
+
+/** \brief Compute the number of (partial) tiles of a texture image
+ */
+static GLuint savageTexImageTiles (GLuint width, GLuint height,
+				   const savageTileInfo *tileInfo)
+{
+   return (width + tileInfo->width - 1) / tileInfo->width *
+      (height + tileInfo->height - 1) / tileInfo->height;
+}
+
+/** \brief Mark dirty tiles
+ *
+ * Some care must be taken because tileInfo may not be set or not
+ * up-to-date. So we check if tileInfo is initialized and if the number
+ * of tiles in the bit vector matches the number of tiles computed from
+ * the current tileInfo.
+ */
+static void savageMarkDirtyTiles (savageTexObjPtr t, GLuint level,
+				  GLuint totalWidth, GLuint totalHeight,
+				  GLint xoffset, GLint yoffset,
+				  GLsizei width, GLsizei height)
+{
+   GLuint wInTiles, hInTiles;
+   GLuint x0, y0, x1, y1;
+   GLuint x, y;
+   if (!t->tileInfo)
+      return;
+   wInTiles = (totalWidth + t->tileInfo->width - 1) / t->tileInfo->width;
+   hInTiles = (totalHeight + t->tileInfo->height - 1) / t->tileInfo->height;
+   if (wInTiles * hInTiles != t->image[level].nTiles)
+      return;
+
+   x0 = xoffset / t->tileInfo->width;
+   y0 = yoffset / t->tileInfo->height;
+   x1 = (xoffset + width - 1) / t->tileInfo->width;
+   y1 = (yoffset + height - 1) / t->tileInfo->height;
+
+   for (y = y0; y <= y1; ++y) {
+      GLuint *ptr = t->image[level].dirtyTiles + (y * wInTiles + x0) / 32;
+      GLuint mask = 1 << (y * wInTiles + x0) % 32;
+      for (x = x0; x <= x1; ++x) {
+	 *ptr |= mask;
+	 if (mask == (1<<31)) {
+	    ptr++;
+	    mask = 1;
+	 } else {
+	    mask <<= 1;
+	 }
+      }
+   }
+}
+
+/** \brief Mark all tiles as dirty
+ */
+static void savageMarkAllTiles (savageTexObjPtr t, GLuint level)
+{
+   GLuint words = (t->image[level].nTiles + 31) / 32;
+   if (words)
+      memset(t->image[level].dirtyTiles, ~0, words*sizeof(GLuint));
+}
+
+
+static void savageSetTexWrapping(savageTexObjPtr tex, GLenum s, GLenum t)
+{
+    tex->setup.sWrapMode = s;
+    tex->setup.tWrapMode = t;
+}
+
+static void savageSetTexFilter(savageTexObjPtr t, GLenum minf, GLenum magf)
+{
+   t->setup.minFilter = minf;
+   t->setup.magFilter = magf;
+}
+
+
+/* Need a fallback ?
+ */
+static void savageSetTexBorderColor(savageTexObjPtr t, const GLfloat color[4])
+{
+/*    t->Setup[SAVAGE_TEXREG_TEXBORDERCOL] =  */
+    /*t->setup.borderColor = SAVAGEPACKCOLOR8888(color[0],color[1],color[2],color[3]); */
+}
+
+
+
+static savageTexObjPtr
+savageAllocTexObj( struct gl_texture_object *texObj ) 
+{
+   savageTexObjPtr t;
+
+   t = (savageTexObjPtr) calloc(1,sizeof(*t));
+   texObj->DriverData = t;
+   if ( t != NULL ) {
+      GLuint i;
+
+      /* Initialize non-image-dependent parts of the state:
+       */
+      t->base.tObj = texObj;
+      t->base.dirty_images[0] = 0;
+      t->dirtySubImages = 0;
+      t->tileInfo = NULL;
+
+      /* Initialize dirty tiles bit vectors
+       */
+      for (i = 0; i < SAVAGE_TEX_MAXLEVELS; ++i)
+	 t->image[i].nTiles = 0;
+
+      /* FIXME Something here to set initial values for other parts of
+       * FIXME t->setup?
+       */
+  
+      make_empty_list( &t->base );
+
+      savageSetTexWrapping(t,texObj->WrapS,texObj->WrapT);
+      savageSetTexFilter(t,texObj->MinFilter,texObj->MagFilter);
+      savageSetTexBorderColor(t,texObj->BorderColor.f);
+   }
+
+   return t;
+}
+
+/* Mesa texture formats for alpha-images on Savage3D/IX/MX
+ *
+ * Promoting texture images to ARGB888 or ARGB4444 doesn't work
+ * because we can't tell the hardware to ignore the color components
+ * and only use the alpha component. So we define our own texture
+ * formats that promote to ARGB8888 or ARGB4444 and set the color
+ * components to white. This way we get the correct result.
+ */
+
+#if 0
+/* Using MESA_FORMAT_RGBA8888 to store alpha-only textures should
+ * work but is space inefficient.
+ */
+
+static GLboolean
+_savage_texstore_a1114444(TEXSTORE_PARAMS);
+
+static GLboolean
+_savage_texstore_a1118888(TEXSTORE_PARAMS);
+
+static struct gl_texture_format _savage_texformat_a1114444 = {
+    MESA_FORMAT_ARGB4444,		/* MesaFormat */
+    GL_RGBA,				/* BaseFormat */
+    GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+    4,					/* RedBits */
+    4,					/* GreenBits */
+    4,					/* BlueBits */
+    4,					/* AlphaBits */
+    0,					/* LuminanceBits */
+    0,					/* IntensityBits */
+    0,					/* IndexBits */
+    0,					/* DepthBits */
+    0,					/* StencilBits */
+    2,					/* TexelBytes */
+    _savage_texstore_a1114444,		/* StoreTexImageFunc */
+    NULL, NULL, NULL, NULL, NULL, NULL  /* FetchTexel* filled in by 
+					 * savageDDInitTextureFuncs */
+};
+static struct gl_texture_format _savage_texformat_a1118888 = {
+    MESA_FORMAT_ARGB8888,		/* MesaFormat */
+    GL_RGBA,				/* BaseFormat */
+    GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+    8,					/* RedBits */
+    8,					/* GreenBits */
+    8,					/* BlueBits */
+    8,					/* AlphaBits */
+    0,					/* LuminanceBits */
+    0,					/* IntensityBits */
+    0,					/* IndexBits */
+    0,					/* DepthBits */
+    0,					/* StencilBits */
+    4,					/* TexelBytes */
+    _savage_texstore_a1118888,		/* StoreTexImageFunc */
+    NULL, NULL, NULL, NULL, NULL, NULL  /* FetchTexel* filled in by 
+					 * savageDDInitTextureFuncs */
+};
+
+
+static GLboolean
+_savage_texstore_a1114444(TEXSTORE_PARAMS)
+{
+    const GLchan *tempImage = _mesa_make_temp_chan_image(ctx, dims,
+                                                 baseInternalFormat,
+                                                 baseInternalFormat,
+                                                 srcWidth, srcHeight, srcDepth,
+                                                 srcFormat, srcType, srcAddr,
+                                                 srcPacking);
+    const GLchan *src = tempImage;
+    GLint img, row, col;
+
+    ASSERT(dstFormat == &_savage_texformat_a1114444);
+    ASSERT(baseInternalFormat == GL_ALPHA);
+
+    if (!tempImage)
+	return GL_FALSE;
+    _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
+    for (img = 0; img < srcDepth; img++) {
+        GLuint texelBytes = _mesa_get_format_bytes(dstFormat);
+        GLubyte *dstRow = (GLubyte *) dstAddr
+           + dstImageOffsets[dstZoffset + img] * texelBytes
+           + dstYoffset * dstRowStride
+           + dstXoffset * texelBytes;
+	for (row = 0; row < srcHeight; row++) {
+            GLushort *dstUI = (GLushort *) dstRow;
+	    for (col = 0; col < srcWidth; col++) {
+		dstUI[col] = PACK_COLOR_4444( CHAN_TO_UBYTE(src[0]),
+					      255, 255, 255 );
+		src += 1;
+            }
+            dstRow += dstRowStride;
+	}
+    }
+    free((void *) tempImage);
+
+    return GL_TRUE;
+}
+
+
+static GLboolean
+_savage_texstore_a1118888(TEXSTORE_PARAMS)
+{
+    const GLchan *tempImage = _mesa_make_temp_chan_image(ctx, dims,
+                                                 baseInternalFormat,
+                                                 baseInternalFormat,
+                                                 srcWidth, srcHeight, srcDepth,
+                                                 srcFormat, srcType, srcAddr,
+                                                 srcPacking);
+    const GLchan *src = tempImage;
+    GLint img, row, col;
+
+    ASSERT(dstFormat == &_savage_texformat_a1118888);
+    ASSERT(baseInternalFormat == GL_ALPHA);
+
+    if (!tempImage)
+	return GL_FALSE;
+    _mesa_adjust_image_for_convolution(ctx, dims, &srcWidth, &srcHeight);
+    for (img = 0; img < srcDepth; img++) {
+        GLuint texelBytes = _mesa_get_format_bytes(dstFormat);
+        GLubyte *dstRow = (GLubyte *) dstAddr
+           + dstImageOffsets[dstZoffset + img] * texelBytes
+           + dstYoffset * dstRowStride
+           + dstXoffset * texelBytes;
+	for (row = 0; row < srcHeight; row++) {
+            GLuint *dstUI = (GLuint *) dstRow;
+	    for (col = 0; col < srcWidth; col++) {
+		dstUI[col] = PACK_COLOR_8888( CHAN_TO_UBYTE(src[0]),
+					      255, 255, 255 );
+		src += 1;
+            }
+            dstRow += dstRowStride;
+	}
+    }
+    free((void *) tempImage);
+
+    return GL_TRUE;
+}
+#endif
+
+
+/* Called by the _mesa_store_teximage[123]d() functions. */
+static gl_format
+savageChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			   GLenum format, GLenum type )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   const GLboolean do32bpt =
+       ( imesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 );
+   const GLboolean force16bpt =
+       ( imesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 );
+   const GLboolean isSavage4 = (imesa->savageScreen->chipset >= S3_SAVAGE4);
+   (void) format;
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      switch ( type ) {
+      case GL_UNSIGNED_INT_10_10_10_2:
+      case GL_UNSIGNED_INT_2_10_10_10_REV:
+	 return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB1555;
+      case GL_UNSIGNED_SHORT_4_4_4_4:
+      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+	 return MESA_FORMAT_ARGB4444;
+      case GL_UNSIGNED_SHORT_5_5_5_1:
+      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+	 return MESA_FORMAT_ARGB1555;
+      default:
+         return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+      }
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      switch ( type ) {
+      case GL_UNSIGNED_SHORT_4_4_4_4:
+      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+	 return MESA_FORMAT_ARGB4444;
+      case GL_UNSIGNED_SHORT_5_5_5_1:
+      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+	 return MESA_FORMAT_ARGB1555;
+      case GL_UNSIGNED_SHORT_5_6_5:
+      case GL_UNSIGNED_SHORT_5_6_5_REV:
+	 return MESA_FORMAT_RGB565;
+      default:
+         return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+      }
+
+   case GL_RGBA8:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return !force16bpt ?
+	  MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+
+   case GL_RGB10_A2:
+      return !force16bpt ?
+	  MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB1555;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return MESA_FORMAT_ARGB4444;
+
+   case GL_RGB5_A1:
+      return MESA_FORMAT_ARGB1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return !force16bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return MESA_FORMAT_RGB565;
+
+   case GL_ALPHA:
+   case GL_COMPRESSED_ALPHA:
+#if 0
+      return isSavage4 ? MESA_FORMAT_a8 : (
+	 do32bpt ? &_savage_texformat_a1118888 : &_savage_texformat_a1114444);
+#else
+      if (isSavage4)
+         return MESA_FORMAT_A8;
+      else if (do32bpt)
+         return MESA_FORMAT_ARGB8888;
+      else
+         return MESA_FORMAT_ARGB4444;
+#endif
+   case GL_ALPHA4:
+#if 0
+      return isSavage4 ? MESA_FORMAT_a8 : &_savage_texformat_a1114444;
+#else
+      if (isSavage4)
+         return MESA_FORMAT_A8;
+      else
+         return MESA_FORMAT_ARGB4444;
+#endif
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+#if 0
+      return isSavage4 ? MESA_FORMAT_a8 : (
+	 !force16bpt ? &_savage_texformat_a1118888 : &_savage_texformat_a1114444);
+#else
+      if (isSavage4)
+         return MESA_FORMAT_A8;
+      else if (force16bpt)
+         return MESA_FORMAT_ARGB4444;
+      else
+         return MESA_FORMAT_ARGB8888;
+#endif
+   case 1:
+   case GL_LUMINANCE:
+   case GL_COMPRESSED_LUMINANCE:
+      /* no alpha, but use argb1555 in 16bit case to get pure grey values */
+      return isSavage4 ? MESA_FORMAT_L8 : (
+	 do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB1555);
+   case GL_LUMINANCE4:
+      return isSavage4 ? MESA_FORMAT_L8 : MESA_FORMAT_ARGB1555;
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+      return isSavage4 ? MESA_FORMAT_L8 : (
+	 !force16bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB1555);
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      /* Savage4 has a al44 texture format. But it's not supported by Mesa. */
+      return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+      return MESA_FORMAT_ARGB4444;
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+      return !force16bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+#if 0
+   /* TFT_I8 produces garbage on ProSavageDDR and subsequent texture
+    * disable keeps rendering garbage. Disabled for now. */
+   case GL_INTENSITY:
+   case GL_COMPRESSED_INTENSITY:
+      return isSavage4 ? MESA_FORMAT_i8 : (
+	 do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444);
+   case GL_INTENSITY4:
+      return isSavage4 ? MESA_FORMAT_i8 : MESA_FORMAT_ARGB4444;
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      return isSavage4 ? MESA_FORMAT_i8 : (
+	 !force16bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444);
+#else
+   case GL_INTENSITY:
+   case GL_COMPRESSED_INTENSITY:
+      return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+   case GL_INTENSITY4:
+      return MESA_FORMAT_ARGB4444;
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+      return !force16bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+#endif
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+      return MESA_FORMAT_RGB_DXT1;
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      return MESA_FORMAT_RGBA_DXT1;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      return MESA_FORMAT_RGBA_DXT3;
+
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+      if (!isSavage4)
+	 /* Not the best choice but Savage3D/MX/IX don't support DXT3 or DXT5. */
+	 return MESA_FORMAT_RGBA_DXT1;
+      /* fall through */
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return MESA_FORMAT_RGBA_DXT5;
+
+/*
+   case GL_COLOR_INDEX:
+   case GL_COLOR_INDEX1_EXT:
+   case GL_COLOR_INDEX2_EXT:
+   case GL_COLOR_INDEX4_EXT:
+   case GL_COLOR_INDEX8_EXT:
+   case GL_COLOR_INDEX12_EXT:
+   case GL_COLOR_INDEX16_EXT:
+      return &_mesa_texformat_ci8;
+*/
+   default:
+      _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__);
+      return MESA_FORMAT_NONE;
+   }
+}
+
+static void savageSetTexImages( savageContextPtr imesa,
+				const struct gl_texture_object *tObj )
+{
+   savageTexObjPtr t = (savageTexObjPtr) tObj->DriverData;
+   struct gl_texture_image *image = tObj->Image[0][tObj->BaseLevel];
+   GLuint offset, i, textureFormat, tileIndex, size;
+   GLint firstLevel, lastLevel;
+
+   assert(t);
+   assert(image);
+
+   switch (image->TexFormat) {
+   case MESA_FORMAT_ARGB8888:
+      textureFormat = TFT_ARGB8888;
+      t->texelBytes = tileIndex = 4;
+      break;
+   case MESA_FORMAT_ARGB1555:
+      textureFormat = TFT_ARGB1555;
+      t->texelBytes = tileIndex = 2;
+      break;
+   case MESA_FORMAT_ARGB4444:
+      textureFormat = TFT_ARGB4444;
+      t->texelBytes = tileIndex = 2;
+      break;
+   case MESA_FORMAT_RGB565:
+      textureFormat = TFT_RGB565;
+      t->texelBytes = tileIndex = 2;
+      break;
+   case MESA_FORMAT_L8:
+      textureFormat = TFT_L8;
+      t->texelBytes = tileIndex = 1;
+      break;
+   case MESA_FORMAT_I8:
+      textureFormat = TFT_I8;
+      t->texelBytes = tileIndex = 1;
+      break;
+   case MESA_FORMAT_A8:
+      textureFormat = TFT_A8;
+      t->texelBytes = tileIndex = 1;
+      break;
+   case MESA_FORMAT_RGB_DXT1:
+      textureFormat = TFT_S3TC4Bit;
+      tileIndex = TILE_INDEX_DXT1;
+      t->texelBytes = 8;
+      break;
+   case MESA_FORMAT_RGBA_DXT1:
+      textureFormat = TFT_S3TC4Bit;
+      tileIndex = TILE_INDEX_DXT1;
+      t->texelBytes = 8;
+      break;
+   case MESA_FORMAT_RGBA_DXT3:
+      textureFormat =  TFT_S3TC4A4Bit;
+      tileIndex = TILE_INDEX_DXTn;
+      t->texelBytes = 16;
+      break;
+   case MESA_FORMAT_RGBA_DXT5:
+      textureFormat = TFT_S3TC4CA4Bit;
+      tileIndex = TILE_INDEX_DXTn;
+      t->texelBytes = 16;
+      break;
+   default:
+      _mesa_problem(imesa->glCtx, "Bad texture format in %s", __FUNCTION__);
+      return;
+   }
+   t->hwFormat = textureFormat;
+
+   /* Select tiling format depending on the chipset and texture format */
+   if (imesa->savageScreen->chipset <= S3_SAVAGE4)
+       t->tileInfo = &tileInfo_s3d_s4[tileIndex];
+   else
+       t->tileInfo = &tileInfo_pro[tileIndex];
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    */
+   driCalculateTextureFirstLastLevel( &t->base );
+   firstLevel = t->base.firstLevel;
+   lastLevel  = t->base.lastLevel;
+
+   /* Figure out the size now (and count the levels).  Upload won't be
+    * done until later. If the number of tiles changes, it means that
+    * this function is called for the first time on this tex object or
+    * the image or the destination color format changed. So all tiles
+    * are marked as dirty.
+    */ 
+   offset = 0;
+   size = 1;
+   for ( i = firstLevel ; i <= lastLevel && tObj->Image[0][i] ; i++ ) {
+      GLuint nTiles;
+      nTiles = savageTexImageTiles (image->Width2, image->Height2, t->tileInfo);
+      if (t->image[i].nTiles != nTiles) {
+	 GLuint words = (nTiles + 31) / 32;
+	 if (t->image[i].nTiles != 0) {
+	    free(t->image[i].dirtyTiles);
+	 }
+	 t->image[i].dirtyTiles = malloc(words*sizeof(GLuint));
+	 memset(t->image[i].dirtyTiles, ~0, words*sizeof(GLuint));
+      }
+      t->image[i].nTiles = nTiles;
+
+      t->image[i].offset = offset;
+
+      image = tObj->Image[0][i];
+      if (t->texelBytes >= 8)
+	 size = savageCompressedTexImageSize (image->Width2, image->Height2,
+					      t->texelBytes);
+      else
+	 size = savageTexImageSize (image->Width2, image->Height2,
+				    t->texelBytes);
+      offset += size;
+   }
+
+   t->base.lastLevel = i-1;
+   t->base.totalSize = offset;
+   /* the last three mipmap levels don't add to the offset. They are packed
+    * into 64 pixels. */
+   if (size == 0)
+       t->base.totalSize += (t->texelBytes >= 8 ? 4 : 64) * t->texelBytes;
+   /* 2k-aligned (really needed?) */
+   t->base.totalSize = (t->base.totalSize + 2047UL) & ~2047UL;
+}
+
+void savageDestroyTexObj(savageContextPtr imesa, savageTexObjPtr t)
+{
+    GLuint i;
+
+    /* Free dirty tiles bit vectors */
+    for (i = 0; i < SAVAGE_TEX_MAXLEVELS; ++i) {
+	if (t->image[i].nTiles)
+	    free (t->image[i].dirtyTiles);
+    }
+
+    /* See if it was the driver's current object.
+     */
+    if ( imesa != NULL )
+    { 
+	for ( i = 0 ; i < imesa->glCtx->Const.MaxTextureUnits ; i++ )
+	{
+	    if ( &t->base == imesa->CurrentTexObj[ i ] ) {
+		assert( t->base.bound & (1 << i) );
+		imesa->CurrentTexObj[ i ] = NULL;
+	    }
+	}
+    }
+}
+
+/* Upload a texture's images to one of the texture heaps. May have to
+ * eject our own and/or other client's texture objects to make room
+ * for the upload.
+ */
+static void savageUploadTexImages( savageContextPtr imesa, savageTexObjPtr t )
+{
+   const GLint numLevels = t->base.lastLevel - t->base.firstLevel + 1;
+   GLuint i;
+
+   assert(t);
+
+   LOCK_HARDWARE(imesa);
+   
+   /* Do we need to eject LRU texture objects?
+    */
+   if (!t->base.memBlock) {
+      GLint heap;
+      GLuint ofs;
+
+      heap = driAllocateTexture(imesa->textureHeaps, imesa->lastTexHeap,
+				(driTextureObject *)t);
+      if (heap == -1) {
+	  UNLOCK_HARDWARE(imesa);
+	  return;
+      }
+
+      assert(t->base.memBlock);
+      ofs = t->base.memBlock->ofs;
+      t->setup.physAddr = imesa->savageScreen->textureOffset[heap] + ofs;
+      t->bufAddr = (GLubyte *)imesa->savageScreen->texVirtual[heap] + ofs;
+      imesa->dirty |= SAVAGE_UPLOAD_GLOBAL; /* FIXME: really needed? */
+   }
+
+   /* Let the world know we've used this memory recently.
+    */
+   driUpdateTextureLRU( &t->base );
+   UNLOCK_HARDWARE(imesa);
+
+   if (t->base.dirty_images[0] || t->dirtySubImages) {
+      if (SAVAGE_DEBUG & DEBUG_VERBOSE_TEX)
+	 fprintf(stderr, "Texture upload: |");
+
+      /* Heap timestamps are only reliable with Savage DRM 2.3.x or
+       * later. Earlier versions had only 16 bit time stamps which
+       * would wrap too frequently. */
+      if (imesa->savageScreen->driScrnPriv->drm_version.minor >= 3) {
+	  unsigned int heap = t->base.heap->heapId;
+	  LOCK_HARDWARE(imesa);
+	  savageWaitEvent (imesa, imesa->textureHeaps[heap]->timestamp);
+      } else {
+	  savageFlushVertices (imesa);
+	  LOCK_HARDWARE(imesa);
+	  savageFlushCmdBufLocked (imesa, GL_FALSE);
+	  WAIT_IDLE_EMPTY_LOCKED(imesa);
+      }
+
+      for (i = 0 ; i < numLevels ; i++) {
+         const GLint j = t->base.firstLevel + i;  /* the texObj's level */
+	 if (t->base.dirty_images[0] & (1 << j)) {
+	    savageMarkAllTiles(t, j);
+	    if (SAVAGE_DEBUG & DEBUG_VERBOSE_TEX)
+		fprintf (stderr, "*");
+	 } else if (SAVAGE_DEBUG & DEBUG_VERBOSE_TEX) {
+	    if (t->dirtySubImages & (1 << j))
+	       fprintf (stderr, ".");
+	    else
+	       fprintf (stderr, " ");
+	 }
+	 if ((t->base.dirty_images[0] | t->dirtySubImages) & (1 << j))
+	    savageUploadTexLevel( t, j );
+      }
+
+      UNLOCK_HARDWARE(imesa);
+      t->base.dirty_images[0] = 0;
+      t->dirtySubImages = 0;
+
+      if (SAVAGE_DEBUG & DEBUG_VERBOSE_TEX)
+	 fprintf(stderr, "|\n");
+   }
+}
+
+
+static void
+savage4_set_wrap_mode( savageContextPtr imesa, unsigned unit,
+		      GLenum s_mode, GLenum t_mode )
+{
+    switch( s_mode ) {
+    case GL_REPEAT:
+	imesa->regs.s4.texCtrl[ unit ].ni.uMode = TAM_Wrap;
+	break;
+    case GL_CLAMP:
+    case GL_CLAMP_TO_EDGE:
+	imesa->regs.s4.texCtrl[ unit ].ni.uMode = TAM_Clamp;
+	break;
+    case GL_MIRRORED_REPEAT:
+	imesa->regs.s4.texCtrl[ unit ].ni.uMode = TAM_Mirror;
+	break;
+    }
+
+    switch( t_mode ) {
+    case GL_REPEAT:
+	imesa->regs.s4.texCtrl[ unit ].ni.vMode = TAM_Wrap;
+	break;
+    case GL_CLAMP:
+    case GL_CLAMP_TO_EDGE:
+	imesa->regs.s4.texCtrl[ unit ].ni.vMode = TAM_Clamp;
+	break;
+    case GL_MIRRORED_REPEAT:
+	imesa->regs.s4.texCtrl[ unit ].ni.vMode = TAM_Mirror;
+	break;
+    }
+}
+
+
+/**
+ * Sets the hardware bits for the specified GL texture filter modes.
+ * 
+ * \todo
+ * Does the Savage4 have the ability to select the magnification filter?
+ */
+static void
+savage4_set_filter_mode( savageContextPtr imesa, unsigned unit,
+			 GLenum minFilter, GLenum magFilter )
+{
+    (void) magFilter;
+
+    switch (minFilter) {
+    case GL_NEAREST:
+	imesa->regs.s4.texCtrl[ unit ].ni.filterMode   = TFM_Point;
+	imesa->regs.s4.texCtrl[ unit ].ni.mipmapEnable = GL_FALSE;
+	break;
+
+    case GL_LINEAR:
+	imesa->regs.s4.texCtrl[ unit ].ni.filterMode   = TFM_Bilin;
+	imesa->regs.s4.texCtrl[ unit ].ni.mipmapEnable = GL_FALSE;
+	break;
+
+    case GL_NEAREST_MIPMAP_NEAREST:
+	imesa->regs.s4.texCtrl[ unit ].ni.filterMode   = TFM_Point;
+	imesa->regs.s4.texCtrl[ unit ].ni.mipmapEnable = GL_TRUE;
+	break;
+
+    case GL_LINEAR_MIPMAP_NEAREST:
+	imesa->regs.s4.texCtrl[ unit ].ni.filterMode   = TFM_Bilin;
+	imesa->regs.s4.texCtrl[ unit ].ni.mipmapEnable = GL_TRUE;
+	break;
+
+    case GL_NEAREST_MIPMAP_LINEAR:
+    case GL_LINEAR_MIPMAP_LINEAR:
+	imesa->regs.s4.texCtrl[ unit ].ni.filterMode   = TFM_Trilin;
+	imesa->regs.s4.texCtrl[ unit ].ni.mipmapEnable = GL_TRUE;
+	break;
+    }
+}
+
+
+static void savageUpdateTex0State_s4( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   struct gl_texture_object	*tObj;
+   struct gl_texture_image *image;
+   savageTexObjPtr t;
+   GLuint format;
+
+   /* disable */
+   imesa->regs.s4.texDescr.ni.tex0En = GL_FALSE;
+   imesa->regs.s4.texBlendCtrl[0].ui = TBC_NoTexMap;
+   imesa->regs.s4.texCtrl[0].ui = 0x20f040;
+   if (ctx->Texture.Unit[0]._ReallyEnabled == 0)
+      return;
+
+   tObj = ctx->Texture.Unit[0]._Current;
+   if ((ctx->Texture.Unit[0]._ReallyEnabled & ~(TEXTURE_1D_BIT|TEXTURE_2D_BIT))
+       || tObj->Image[0][tObj->BaseLevel]->Border > 0) {
+      /* 3D texturing enabled, or texture border - fallback */
+      FALLBACK (ctx, SAVAGE_FALLBACK_TEXTURE, GL_TRUE);
+      return;
+   }
+
+   /* Do 2D texture setup */
+
+   t = tObj->DriverData;
+   if (!t) {
+      t = savageAllocTexObj( tObj );
+      if (!t)
+         return;
+   }
+
+   imesa->CurrentTexObj[0] = &t->base;
+   t->base.bound |= 1;
+
+   if (t->base.dirty_images[0] || t->dirtySubImages) {
+       savageSetTexImages(imesa, tObj);
+       savageUploadTexImages(imesa, t); 
+   }
+   
+   driUpdateTextureLRU( &t->base );
+
+   format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+
+   switch (ctx->Texture.Unit[0].EnvMode) {
+   case GL_REPLACE:
+      imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_FALSE;
+      switch(format)
+      {
+          case GL_LUMINANCE:
+          case GL_RGB:
+               imesa->regs.s4.texBlendCtrl[0].ui = TBC_Decal;
+               break;
+
+          case GL_LUMINANCE_ALPHA:
+          case GL_RGBA:
+          case GL_INTENSITY:
+               imesa->regs.s4.texBlendCtrl[0].ui = TBC_Copy;
+               break;
+
+          case GL_ALPHA:
+               imesa->regs.s4.texBlendCtrl[0].ui = TBC_CopyAlpha;
+               break;
+      }
+       __HWEnvCombineSingleUnitScale(imesa, 0, 0,
+				     &imesa->regs.s4.texBlendCtrl[0]);
+      break;
+
+    case GL_DECAL:
+        imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_FALSE;
+        switch (format)
+        {
+            case GL_RGB:
+            case GL_LUMINANCE:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_Decal;
+                break;
+
+            case GL_RGBA:
+            case GL_INTENSITY:
+            case GL_LUMINANCE_ALPHA:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_DecalAlpha;
+                break;
+
+            /*
+             GL_LUMINANCE, GL_LUMINANCE_ALPHA, GL_ALPHA, GL_INTENSITY
+             are undefined with GL_DECAL
+            */
+
+            case GL_ALPHA:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_CopyAlpha;
+                break;
+        }
+        __HWEnvCombineSingleUnitScale(imesa, 0, 0,
+				      &imesa->regs.s4.texBlendCtrl[0]);
+        break;
+
+    case GL_MODULATE:
+        imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_FALSE;
+        imesa->regs.s4.texBlendCtrl[0].ui = TBC_ModulAlpha;
+        __HWEnvCombineSingleUnitScale(imesa, 0, 0,
+				      &imesa->regs.s4.texBlendCtrl[0]);
+        break;
+
+    case GL_BLEND:
+	imesa->regs.s4.texBlendColor.ui = imesa->texEnvColor;
+
+        switch (format)
+        {
+            case GL_ALPHA:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_ModulAlpha;
+                imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_FALSE;
+                break;
+
+            case GL_LUMINANCE:
+            case GL_RGB:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_Blend0;
+                imesa->regs.s4.texDescr.ni.tex1En = GL_TRUE;
+                imesa->regs.s4.texDescr.ni.texBLoopEn = GL_TRUE;
+                imesa->regs.s4.texDescr.ni.tex1Width  =
+		    imesa->regs.s4.texDescr.ni.tex0Width;
+                imesa->regs.s4.texDescr.ni.tex1Height =
+		    imesa->regs.s4.texDescr.ni.tex0Height;
+                imesa->regs.s4.texDescr.ni.tex1Fmt =
+		    imesa->regs.s4.texDescr.ni.tex0Fmt;
+
+		imesa->regs.s4.texAddr[1].ui = imesa->regs.s4.texAddr[0].ui;
+		imesa->regs.s4.texBlendCtrl[1].ui = TBC_Blend1;
+
+                imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_TRUE;
+                imesa->bTexEn1 = GL_TRUE;
+                break;
+
+            case GL_LUMINANCE_ALPHA:
+            case GL_RGBA:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_BlendAlpha0;
+                imesa->regs.s4.texDescr.ni.tex1En = GL_TRUE;
+                imesa->regs.s4.texDescr.ni.texBLoopEn = GL_TRUE;
+                imesa->regs.s4.texDescr.ni.tex1Width  =
+		    imesa->regs.s4.texDescr.ni.tex0Width;
+                imesa->regs.s4.texDescr.ni.tex1Height =
+		    imesa->regs.s4.texDescr.ni.tex0Height;
+                imesa->regs.s4.texDescr.ni.tex1Fmt =
+		    imesa->regs.s4.texDescr.ni.tex0Fmt;
+
+		imesa->regs.s4.texAddr[1].ui = imesa->regs.s4.texAddr[0].ui;
+		imesa->regs.s4.texBlendCtrl[1].ui = TBC_BlendAlpha1;
+
+                imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_TRUE;
+                imesa->bTexEn1 = GL_TRUE;
+                break;
+
+            case GL_INTENSITY:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_BlendInt0;
+                imesa->regs.s4.texDescr.ni.tex1En = GL_TRUE;
+                imesa->regs.s4.texDescr.ni.texBLoopEn = GL_TRUE;
+                imesa->regs.s4.texDescr.ni.tex1Width  =
+		    imesa->regs.s4.texDescr.ni.tex0Width;
+                imesa->regs.s4.texDescr.ni.tex1Height =
+		    imesa->regs.s4.texDescr.ni.tex0Height;
+                imesa->regs.s4.texDescr.ni.tex1Fmt =
+		    imesa->regs.s4.texDescr.ni.tex0Fmt;
+
+		imesa->regs.s4.texAddr[1].ui = imesa->regs.s4.texAddr[0].ui;
+		imesa->regs.s4.texBlendCtrl[1].ui = TBC_BlendInt1;
+
+                imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_TRUE;
+                imesa->regs.s4.texCtrl[0].ni.alphaArg1Invert = GL_TRUE;
+                imesa->bTexEn1 = GL_TRUE;
+                break;
+        }
+        __HWEnvCombineSingleUnitScale(imesa, 0, 0,
+				      &imesa->regs.s4.texBlendCtrl[0]);
+        break;
+
+    case GL_ADD:
+        imesa->regs.s4.texCtrl[0].ni.clrArg1Invert = GL_FALSE;
+        switch (format)
+        {
+            case GL_ALPHA:
+                imesa->regs.s4.texBlendCtrl[0].ui = TBC_ModulAlpha;
+		break;
+
+            case GL_LUMINANCE:
+            case GL_RGB:
+		imesa->regs.s4.texBlendCtrl[0].ui = TBC_Add;
+		break;
+
+            case GL_LUMINANCE_ALPHA:
+            case GL_RGBA:
+		imesa->regs.s4.texBlendCtrl[0].ui = TBC_Add;
+		break;
+
+            case GL_INTENSITY:
+		imesa->regs.s4.texBlendCtrl[0].ui = TBC_AddAlpha;
+		break;
+	}
+        __HWEnvCombineSingleUnitScale(imesa, 0, 0,
+				      &imesa->regs.s4.texBlendCtrl[0]);
+        break;
+
+#if GL_ARB_texture_env_combine
+    case GL_COMBINE_ARB:
+        __HWParseTexEnvCombine(imesa, 0, &imesa->regs.s4.texCtrl[0],
+			       &imesa->regs.s4.texBlendCtrl[0]);
+        break;
+#endif
+
+   default:
+      fprintf(stderr, "unknown tex env mode");
+      exit(1);
+      break;			
+   }
+
+    savage4_set_wrap_mode( imesa, 0, t->setup.sWrapMode, t->setup.tWrapMode );
+    savage4_set_filter_mode( imesa, 0, t->setup.minFilter, t->setup.magFilter );
+
+    if((ctx->Texture.Unit[0].LodBias !=0.0F) ||
+       (imesa->regs.s4.texCtrl[0].ni.dBias != 0))
+    {
+	int bias = (int)(ctx->Texture.Unit[0].LodBias * 32.0) +
+	    SAVAGE4_LOD_OFFSET;
+	if (bias < -256)
+	    bias = -256;
+	else if (bias > 255)
+	    bias = 255;
+	imesa->regs.s4.texCtrl[0].ni.dBias = bias & 0x1ff;
+    }
+
+    image = tObj->Image[0][tObj->BaseLevel];
+    imesa->regs.s4.texDescr.ni.tex0En = GL_TRUE;
+    imesa->regs.s4.texDescr.ni.tex0Width  = image->WidthLog2;
+    imesa->regs.s4.texDescr.ni.tex0Height = image->HeightLog2;
+    imesa->regs.s4.texDescr.ni.tex0Fmt = t->hwFormat;
+    imesa->regs.s4.texCtrl[0].ni.dMax = t->base.lastLevel - t->base.firstLevel;
+
+    if (imesa->regs.s4.texDescr.ni.tex1En)
+        imesa->regs.s4.texDescr.ni.texBLoopEn = GL_TRUE;
+
+    imesa->regs.s4.texAddr[0].ui = (uint32_t) t->setup.physAddr | 0x2;
+    if(t->base.heap->heapId == SAVAGE_AGP_HEAP)
+	imesa->regs.s4.texAddr[0].ui |= 0x1;
+    
+    return;
+}
+static void savageUpdateTex1State_s4( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   struct gl_texture_object	*tObj;
+   struct gl_texture_image *image;
+   savageTexObjPtr t;
+   GLuint format;
+
+   /* disable */
+   if(imesa->bTexEn1)
+   {
+       imesa->bTexEn1 = GL_FALSE;
+       return;
+   }
+
+   imesa->regs.s4.texDescr.ni.tex1En = GL_FALSE;
+   imesa->regs.s4.texBlendCtrl[1].ui = TBC_NoTexMap1;
+   imesa->regs.s4.texCtrl[1].ui = 0x20f040;
+   imesa->regs.s4.texDescr.ni.texBLoopEn = GL_FALSE;
+   if (ctx->Texture.Unit[1]._ReallyEnabled == 0)
+      return;
+
+   tObj = ctx->Texture.Unit[1]._Current;
+
+   if ((ctx->Texture.Unit[1]._ReallyEnabled & ~(TEXTURE_1D_BIT|TEXTURE_2D_BIT))
+       || tObj->Image[0][tObj->BaseLevel]->Border > 0) {
+      /* 3D texturing enabled, or texture border - fallback */
+      FALLBACK (ctx, SAVAGE_FALLBACK_TEXTURE, GL_TRUE);
+      return;
+   }
+
+   /* Do 2D texture setup */
+
+   t = tObj->DriverData;
+   if (!t) {
+      t = savageAllocTexObj( tObj );
+      if (!t)
+         return;
+   }
+    
+   imesa->CurrentTexObj[1] = &t->base;
+
+   t->base.bound |= 2;
+
+   if (t->base.dirty_images[0] || t->dirtySubImages) {
+       savageSetTexImages(imesa, tObj);
+       savageUploadTexImages(imesa, t);
+   }
+   
+   driUpdateTextureLRU( &t->base );
+
+   format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+
+   switch (ctx->Texture.Unit[1].EnvMode) {
+   case GL_REPLACE:
+        imesa->regs.s4.texCtrl[1].ni.clrArg1Invert = GL_FALSE;
+        switch (format)
+        {
+            case GL_LUMINANCE:
+            case GL_RGB:
+                imesa->regs.s4.texBlendCtrl[1].ui = TBC_Decal;
+                break;
+
+            case GL_LUMINANCE_ALPHA:
+            case GL_INTENSITY:
+            case GL_RGBA:
+                imesa->regs.s4.texBlendCtrl[1].ui = TBC_Copy;
+                break;
+
+            case GL_ALPHA:
+                imesa->regs.s4.texBlendCtrl[1].ui = TBC_CopyAlpha1;
+                break;
+        }
+        __HWEnvCombineSingleUnitScale(imesa, 0, 1, &imesa->regs.s4.texBlendCtrl);
+      break;
+   case GL_MODULATE:
+       imesa->regs.s4.texCtrl[1].ni.clrArg1Invert = GL_FALSE;
+       imesa->regs.s4.texBlendCtrl[1].ui = TBC_ModulAlpha1;
+       __HWEnvCombineSingleUnitScale(imesa, 0, 1, &imesa->regs.s4.texBlendCtrl);
+       break;
+
+    case GL_ADD:
+        imesa->regs.s4.texCtrl[1].ni.clrArg1Invert = GL_FALSE;
+        switch (format)
+        {
+            case GL_ALPHA:
+                imesa->regs.s4.texBlendCtrl[1].ui = TBC_ModulAlpha1;
+		break;
+
+            case GL_LUMINANCE:
+            case GL_RGB:
+		imesa->regs.s4.texBlendCtrl[1].ui = TBC_Add1;
+		break;
+
+            case GL_LUMINANCE_ALPHA:
+            case GL_RGBA:
+		imesa->regs.s4.texBlendCtrl[1].ui = TBC_Add1;
+		break;
+
+            case GL_INTENSITY:
+		imesa->regs.s4.texBlendCtrl[1].ui = TBC_AddAlpha1;
+		break;
+	}
+        __HWEnvCombineSingleUnitScale(imesa, 0, 1, &imesa->regs.s4.texBlendCtrl);
+        break;
+
+#if GL_ARB_texture_env_combine
+    case GL_COMBINE_ARB:
+        __HWParseTexEnvCombine(imesa, 1, &texCtrl, &imesa->regs.s4.texBlendCtrl);
+        break;
+#endif
+
+   case GL_DECAL:
+        imesa->regs.s4.texCtrl[1].ni.clrArg1Invert = GL_FALSE;
+
+        switch (format)
+        {
+            case GL_LUMINANCE:
+            case GL_RGB:
+                imesa->regs.s4.texBlendCtrl[1].ui = TBC_Decal1;
+                break;
+            case GL_LUMINANCE_ALPHA:
+            case GL_INTENSITY:
+            case GL_RGBA:
+                imesa->regs.s4.texBlendCtrl[1].ui = TBC_DecalAlpha1;
+                break;
+
+                /*
+                // GL_LUMINANCE, GL_LUMINANCE_ALPHA, GL_ALPHA, GL_INTENSITY
+                // are undefined with GL_DECAL
+                */
+            case GL_ALPHA:
+                imesa->regs.s4.texBlendCtrl[1].ui = TBC_CopyAlpha1;
+                break;
+        }
+        __HWEnvCombineSingleUnitScale(imesa, 0, 1, &imesa->regs.s4.texBlendCtrl);
+        break;
+
+   case GL_BLEND:
+        if (format == GL_LUMINANCE)
+        {
+            /*
+            // This is a hack for GLQuake, invert.
+            */
+            imesa->regs.s4.texCtrl[1].ni.clrArg1Invert = GL_TRUE;
+            imesa->regs.s4.texBlendCtrl[1].ui = 0;
+        }
+        __HWEnvCombineSingleUnitScale(imesa, 0, 1, &imesa->regs.s4.texBlendCtrl);
+      break;
+
+   default:
+      fprintf(stderr, "unknown tex 1 env mode\n");
+      exit(1);
+      break;			
+   }
+
+    savage4_set_wrap_mode( imesa, 1, t->setup.sWrapMode, t->setup.tWrapMode );
+    savage4_set_filter_mode( imesa, 1, t->setup.minFilter, t->setup.magFilter );
+
+    if((ctx->Texture.Unit[1].LodBias !=0.0F) ||
+       (imesa->regs.s4.texCtrl[1].ni.dBias != 0))
+    {
+	int bias = (int)(ctx->Texture.Unit[1].LodBias * 32.0) +
+	    SAVAGE4_LOD_OFFSET;
+	if (bias < -256)
+	    bias = -256;
+	else if (bias > 255)
+	    bias = 255;
+	imesa->regs.s4.texCtrl[1].ni.dBias = bias & 0x1ff;
+    }
+
+    image = tObj->Image[0][tObj->BaseLevel];
+    imesa->regs.s4.texDescr.ni.tex1En = GL_TRUE;
+    imesa->regs.s4.texDescr.ni.tex1Width  = image->WidthLog2;
+    imesa->regs.s4.texDescr.ni.tex1Height = image->HeightLog2;
+    imesa->regs.s4.texDescr.ni.tex1Fmt = t->hwFormat;
+    imesa->regs.s4.texCtrl[1].ni.dMax = t->base.lastLevel - t->base.firstLevel;
+    imesa->regs.s4.texDescr.ni.texBLoopEn = GL_TRUE;
+
+    imesa->regs.s4.texAddr[1].ui = (uint32_t) t->setup.physAddr | 2;
+    if(t->base.heap->heapId == SAVAGE_AGP_HEAP)
+	imesa->regs.s4.texAddr[1].ui |= 0x1;
+}
+static void savageUpdateTexState_s3d( GLcontext *ctx )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+    struct gl_texture_object *tObj;
+    struct gl_texture_image *image;
+    savageTexObjPtr t;
+    GLuint format;
+
+    /* disable */
+    imesa->regs.s3d.texCtrl.ui = 0;
+    imesa->regs.s3d.texCtrl.ni.texEn = GL_FALSE;
+    imesa->regs.s3d.texCtrl.ni.dBias = 0x08;
+    imesa->regs.s3d.texCtrl.ni.texXprEn = GL_TRUE;
+    if (ctx->Texture.Unit[0]._ReallyEnabled == 0)
+	return;
+
+    tObj = ctx->Texture.Unit[0]._Current;
+    if ((ctx->Texture.Unit[0]._ReallyEnabled & ~(TEXTURE_1D_BIT|TEXTURE_2D_BIT))
+	|| tObj->Image[0][tObj->BaseLevel]->Border > 0) {
+	/* 3D texturing enabled, or texture border - fallback */
+	FALLBACK (ctx, SAVAGE_FALLBACK_TEXTURE, GL_TRUE);
+	return;
+    }
+
+    /* Do 2D texture setup */
+    t = tObj->DriverData;
+    if (!t) {
+	t = savageAllocTexObj( tObj );
+	if (!t)
+	    return;
+    }
+
+    imesa->CurrentTexObj[0] = &t->base;
+    t->base.bound |= 1;
+
+    if (t->base.dirty_images[0] || t->dirtySubImages) {
+	savageSetTexImages(imesa, tObj);
+	savageUploadTexImages(imesa, t);
+    }
+
+    driUpdateTextureLRU( &t->base );
+
+    format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+
+    /* FIXME: copied from utah-glx, probably needs some tuning */
+    switch (ctx->Texture.Unit[0].EnvMode) {
+    case GL_DECAL:
+	imesa->regs.s3d.drawCtrl.ni.texBlendCtrl = SAVAGETBC_DECALALPHA_S3D;
+	break;
+    case GL_REPLACE:
+	switch (format) {
+	case GL_ALPHA: /* FIXME */
+	    imesa->regs.s3d.drawCtrl.ni.texBlendCtrl = 1;
+	    break;
+	case GL_LUMINANCE_ALPHA:
+	case GL_RGBA:
+	    imesa->regs.s3d.drawCtrl.ni.texBlendCtrl = 4;
+	    break;
+	case GL_RGB:
+	case GL_LUMINANCE:
+	    imesa->regs.s3d.drawCtrl.ni.texBlendCtrl = SAVAGETBC_DECAL_S3D;
+	    break;
+	case GL_INTENSITY:
+	    imesa->regs.s3d.drawCtrl.ni.texBlendCtrl = SAVAGETBC_COPY_S3D;
+	}
+	break;
+    case GL_BLEND: /* hardware can't do GL_BLEND */
+	FALLBACK (ctx, SAVAGE_FALLBACK_TEXTURE, GL_TRUE);
+	return;
+    case GL_MODULATE:
+	imesa->regs.s3d.drawCtrl.ni.texBlendCtrl = SAVAGETBC_MODULATEALPHA_S3D;
+	break;
+    default:
+	fprintf(stderr, "unknown tex env mode\n");
+	/*exit(1);*/
+	break;			
+    }
+
+    /* The Savage3D can't handle different wrapping modes in s and t.
+     * If they are not the same, fall back to software. */
+    if (t->setup.sWrapMode != t->setup.tWrapMode) {
+	FALLBACK (ctx, SAVAGE_FALLBACK_TEXTURE, GL_TRUE);
+	return;
+    }
+    imesa->regs.s3d.texCtrl.ni.uWrapEn = 0;
+    imesa->regs.s3d.texCtrl.ni.vWrapEn = 0;
+    imesa->regs.s3d.texCtrl.ni.wrapMode =
+	(t->setup.sWrapMode == GL_REPEAT) ? TAM_Wrap : TAM_Clamp;
+
+    switch (t->setup.minFilter) {
+    case GL_NEAREST:
+	imesa->regs.s3d.texCtrl.ni.filterMode    = TFM_Point;
+	imesa->regs.s3d.texCtrl.ni.mipmapDisable = GL_TRUE;
+	break;
+
+    case GL_LINEAR:
+	imesa->regs.s3d.texCtrl.ni.filterMode    = TFM_Bilin;
+	imesa->regs.s3d.texCtrl.ni.mipmapDisable = GL_TRUE;
+	break;
+
+    case GL_NEAREST_MIPMAP_NEAREST:
+	imesa->regs.s3d.texCtrl.ni.filterMode    = TFM_Point;
+	imesa->regs.s3d.texCtrl.ni.mipmapDisable = GL_FALSE;
+	break;
+
+    case GL_LINEAR_MIPMAP_NEAREST:
+	imesa->regs.s3d.texCtrl.ni.filterMode    = TFM_Bilin;
+	imesa->regs.s3d.texCtrl.ni.mipmapDisable = GL_FALSE;
+	break;
+
+    case GL_NEAREST_MIPMAP_LINEAR:
+    case GL_LINEAR_MIPMAP_LINEAR:
+	imesa->regs.s3d.texCtrl.ni.filterMode    = TFM_Trilin;
+	imesa->regs.s3d.texCtrl.ni.mipmapDisable = GL_FALSE;
+	break;
+    }
+
+    /* There is no way to specify a maximum mipmap level. We may have to
+       disable mipmapping completely. */
+    /*
+    if (t->max_level < t->image[0].image->WidthLog2 ||
+	t->max_level < t->image[0].image->HeightLog2) {
+	texCtrl.ni.mipmapEnable = GL_TRUE;
+	if (texCtrl.ni.filterMode == TFM_Trilin)
+	    texCtrl.ni.filterMode = TFM_Bilin;
+	texCtrl.ni.filterMode = TFM_Point;
+    }
+    */
+
+    if((ctx->Texture.Unit[0].LodBias !=0.0F) ||
+       (imesa->regs.s3d.texCtrl.ni.dBias != 0))
+    {
+	int bias = (int)(ctx->Texture.Unit[0].LodBias * 16.0);
+	if (bias < -256)
+	    bias = -256;
+	else if (bias > 255)
+	    bias = 255;
+	imesa->regs.s3d.texCtrl.ni.dBias = bias & 0x1ff;
+    }
+
+    image = tObj->Image[0][tObj->BaseLevel];
+    imesa->regs.s3d.texCtrl.ni.texEn = GL_TRUE;
+    imesa->regs.s3d.texDescr.ni.texWidth  = image->WidthLog2;
+    imesa->regs.s3d.texDescr.ni.texHeight = image->HeightLog2;
+    assert (t->hwFormat <= 7);
+    imesa->regs.s3d.texDescr.ni.texFmt = t->hwFormat;
+
+    imesa->regs.s3d.texAddr.ui = (uint32_t) t->setup.physAddr | 2;
+    if(t->base.heap->heapId == SAVAGE_AGP_HEAP)
+	imesa->regs.s3d.texAddr.ui |= 0x1;
+}
+
+
+static void savageTimestampTextures( savageContextPtr imesa )
+{
+   /* Timestamp current texture objects for texture heap aging.
+    * Only useful with long-lived 32-bit event tags available
+    * with Savage DRM 2.3.x or later. */
+   if ((imesa->CurrentTexObj[0] || imesa->CurrentTexObj[1]) &&
+       imesa->savageScreen->driScrnPriv->drm_version.minor >= 3) {
+       unsigned int e;
+       FLUSH_BATCH(imesa);
+       e = savageEmitEvent(imesa, SAVAGE_WAIT_3D);
+       if (imesa->CurrentTexObj[0])
+	   imesa->CurrentTexObj[0]->timestamp = e;
+       if (imesa->CurrentTexObj[1])
+	   imesa->CurrentTexObj[1]->timestamp = e;
+   }
+}
+
+
+static void savageUpdateTextureState_s4( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+
+   /* When a texture is about to change or be disabled, timestamp the
+    * old texture(s). We'll have to wait for this time stamp before
+    * uploading anything to the same texture heap.
+    */
+   if ((imesa->CurrentTexObj[0] && ctx->Texture.Unit[0]._ReallyEnabled &&
+	ctx->Texture.Unit[0]._Current->DriverData != imesa->CurrentTexObj[0]) ||
+       (imesa->CurrentTexObj[1] && ctx->Texture.Unit[1]._ReallyEnabled &&
+	ctx->Texture.Unit[1]._Current->DriverData != imesa->CurrentTexObj[1]) ||
+       (imesa->CurrentTexObj[0] && !ctx->Texture.Unit[0]._ReallyEnabled) ||
+       (imesa->CurrentTexObj[1] && !ctx->Texture.Unit[1]._ReallyEnabled))
+       savageTimestampTextures(imesa);
+
+   if (imesa->CurrentTexObj[0]) imesa->CurrentTexObj[0]->bound &= ~1;
+   if (imesa->CurrentTexObj[1]) imesa->CurrentTexObj[1]->bound &= ~2;
+   imesa->CurrentTexObj[0] = 0;
+   imesa->CurrentTexObj[1] = 0;   
+   savageUpdateTex0State_s4( ctx );
+   savageUpdateTex1State_s4( ctx );
+   imesa->dirty |= (SAVAGE_UPLOAD_TEX0 | 
+		    SAVAGE_UPLOAD_TEX1);
+}
+static void savageUpdateTextureState_s3d( GLcontext *ctx )
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+
+   /* When a texture is about to change or be disabled, timestamp the
+    * old texture(s). We'll have to wait for this time stamp before
+    * uploading anything to the same texture heap.
+    */
+    if ((imesa->CurrentTexObj[0] && ctx->Texture.Unit[0]._ReallyEnabled &&
+	 ctx->Texture.Unit[0]._Current->DriverData != imesa->CurrentTexObj[0]) ||
+	(imesa->CurrentTexObj[0] && !ctx->Texture.Unit[0]._ReallyEnabled))
+	savageTimestampTextures(imesa);
+
+    if (imesa->CurrentTexObj[0]) imesa->CurrentTexObj[0]->bound &= ~1;
+    imesa->CurrentTexObj[0] = 0;
+    savageUpdateTexState_s3d( ctx );
+    imesa->dirty |= (SAVAGE_UPLOAD_TEX0);
+}
+void savageUpdateTextureState( GLcontext *ctx)
+{
+    savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+    FALLBACK (ctx, SAVAGE_FALLBACK_TEXTURE, GL_FALSE);
+    FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_FALSE);
+    if (imesa->savageScreen->chipset >= S3_SAVAGE4)
+	savageUpdateTextureState_s4 (ctx);
+    else
+	savageUpdateTextureState_s3d (ctx);
+}
+
+
+
+/*****************************************
+ * DRIVER functions
+ *****************************************/
+
+static void savageTexEnv( GLcontext *ctx, GLenum target, 
+			GLenum pname, const GLfloat *param )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+
+   if (pname == GL_TEXTURE_ENV_MODE) {
+
+      imesa->new_state |= SAVAGE_NEW_TEXTURE;
+
+   } else if (pname == GL_TEXTURE_ENV_COLOR) {
+
+      struct gl_texture_unit *texUnit = 
+	 &ctx->Texture.Unit[ctx->Texture.CurrentUnit];
+      const GLfloat *fc = texUnit->EnvColor;
+      GLuint r, g, b, a;
+      CLAMPED_FLOAT_TO_UBYTE(r, fc[0]);
+      CLAMPED_FLOAT_TO_UBYTE(g, fc[1]);
+      CLAMPED_FLOAT_TO_UBYTE(b, fc[2]);
+      CLAMPED_FLOAT_TO_UBYTE(a, fc[3]);
+
+      imesa->texEnvColor = ((a << 24) | (r << 16) | 
+			    (g <<  8) | (b <<  0));
+    
+
+   } 
+}
+
+/* Update the heap's time stamp, so the new image is not uploaded
+ * while the old one is still in use. If the texture that is going to
+ * be changed is currently bound, we need to timestamp the texture
+ * first. */
+static void savageTexImageChanged (savageTexObjPtr t) {
+    if (t->base.heap) {
+	if (t->base.bound)
+	    savageTimestampTextures(
+		(savageContextPtr)t->base.heap->driverContext);
+	if (t->base.timestamp > t->base.heap->timestamp)
+	    t->base.heap->timestamp = t->base.timestamp;
+    }
+}
+
+static void savageTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			      GLint internalFormat,
+			      GLint width, GLint border,
+			      GLenum format, GLenum type, const GLvoid *pixels,
+			      const struct gl_pixelstore_attrib *packing,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage )
+{
+   savageTexObjPtr t = (savageTexObjPtr) texObj->DriverData;
+   if (t) {
+      savageTexImageChanged (t);
+   } else {
+      t = savageAllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D");
+         return;
+      }
+   }
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+   t->base.dirty_images[0] |= (1 << level);
+   SAVAGE_CONTEXT(ctx)->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void savageTexSubImage1D( GLcontext *ctx, 
+				 GLenum target,
+				 GLint level,	
+				 GLint xoffset,
+				 GLsizei width,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   savageTexObjPtr t = (savageTexObjPtr) texObj->DriverData;
+   assert( t ); /* this _should_ be true */
+   if (t) {
+      savageTexImageChanged (t);
+      savageMarkDirtyTiles(t, level, texImage->Width2, 1,
+			   xoffset, 0, width, 1);
+   } else {
+      t = savageAllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D");
+         return;
+      }
+      t->base.dirty_images[0] |= (1 << level);
+   }
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, 
+			     format, type, pixels, packing, texObj,
+			     texImage);
+   t->dirtySubImages |= (1 << level);
+   SAVAGE_CONTEXT(ctx)->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void savageTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			      GLint internalFormat,
+			      GLint width, GLint height, GLint border,
+			      GLenum format, GLenum type, const GLvoid *pixels,
+			      const struct gl_pixelstore_attrib *packing,
+			      struct gl_texture_object *texObj,
+			      struct gl_texture_image *texImage )
+{
+   savageTexObjPtr t = (savageTexObjPtr) texObj->DriverData;
+   if (t) {
+      savageTexImageChanged (t);
+   } else {
+      t = savageAllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+         return;
+      }
+   }
+   _mesa_store_teximage2d( ctx, target, level, internalFormat,
+			   width, height, border, format, type,
+			   pixels, packing, texObj, texImage );
+   t->base.dirty_images[0] |= (1 << level);
+   SAVAGE_CONTEXT(ctx)->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void savageTexSubImage2D( GLcontext *ctx, 
+				 GLenum target,
+				 GLint level,	
+				 GLint xoffset, GLint yoffset,
+				 GLsizei width, GLsizei height,
+				 GLenum format, GLenum type,
+				 const GLvoid *pixels,
+				 const struct gl_pixelstore_attrib *packing,
+				 struct gl_texture_object *texObj,
+				 struct gl_texture_image *texImage )
+{
+   savageTexObjPtr t = (savageTexObjPtr) texObj->DriverData;
+   assert( t ); /* this _should_ be true */
+   if (t) {
+      savageTexImageChanged (t);
+      savageMarkDirtyTiles(t, level, texImage->Width2, texImage->Height2,
+			   xoffset, yoffset, width, height);
+   } else {
+      t = savageAllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+         return;
+      }
+      t->base.dirty_images[0] |= (1 << level);
+   }
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, 
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+   t->dirtySubImages |= (1 << level);
+   SAVAGE_CONTEXT(ctx)->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void
+savageCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			    GLint internalFormat,
+			    GLint width, GLint height, GLint border,
+			    GLsizei imageSize, const GLvoid *data,
+			    struct gl_texture_object *texObj,
+			    struct gl_texture_image *texImage )
+{
+   savageTexObjPtr t = (savageTexObjPtr) texObj->DriverData;
+   if (t) {
+      savageTexImageChanged (t);
+   } else {
+      t = savageAllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+         return;
+      }
+   }
+   _mesa_store_compressed_teximage2d( ctx, target, level, internalFormat,
+				      width, height, border, imageSize,
+				      data, texObj, texImage );
+   t->base.dirty_images[0] |= (1 << level);
+   SAVAGE_CONTEXT(ctx)->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void
+savageCompressedTexSubImage2D( GLcontext *ctx, 
+			       GLenum target,
+			       GLint level,	
+			       GLint xoffset, GLint yoffset,
+			       GLsizei width, GLsizei height,
+			       GLenum format, GLsizei imageSize,
+			       const GLvoid *data,
+			       struct gl_texture_object *texObj,
+			       struct gl_texture_image *texImage )
+{
+   savageTexObjPtr t = (savageTexObjPtr) texObj->DriverData;
+   assert( t ); /* this _should_ be true */
+   if (t) {
+      savageTexImageChanged (t);
+      savageMarkDirtyTiles(t, level, texImage->Width2, texImage->Height2,
+			   xoffset, yoffset, width, height);
+   } else {
+      t = savageAllocTexObj(texObj);
+      if (!t) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+         return;
+      }
+      t->base.dirty_images[0] |= (1 << level);
+   }
+   _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset,
+					width, height, format, imageSize,
+					data, texObj, texImage);
+   t->dirtySubImages |= (1 << level);
+   SAVAGE_CONTEXT(ctx)->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void savageTexParameter( GLcontext *ctx, GLenum target,
+			      struct gl_texture_object *tObj,
+			      GLenum pname, const GLfloat *params )
+{
+   savageTexObjPtr t = (savageTexObjPtr) tObj->DriverData;
+   savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+
+   if (!t || (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D))
+      return;
+
+   switch (pname) {
+   case GL_TEXTURE_MIN_FILTER:
+   case GL_TEXTURE_MAG_FILTER:
+      savageSetTexFilter(t,tObj->MinFilter,tObj->MagFilter);
+      break;
+
+   case GL_TEXTURE_WRAP_S:
+   case GL_TEXTURE_WRAP_T:
+      savageSetTexWrapping(t,tObj->WrapS,tObj->WrapT);
+      break;
+  
+   case GL_TEXTURE_BORDER_COLOR:
+      savageSetTexBorderColor(t,tObj->BorderColor.f);
+      break;
+
+   default:
+      return;
+   }
+
+   imesa->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void savageBindTexture( GLcontext *ctx, GLenum target,
+			       struct gl_texture_object *tObj )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+   
+   assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D) ||
+	   (tObj->DriverData != NULL) );
+
+   imesa->new_state |= SAVAGE_NEW_TEXTURE;
+}
+
+static void savageDeleteTexture( GLcontext *ctx, struct gl_texture_object *tObj )
+{
+   driTextureObject *t = (driTextureObject *)tObj->DriverData;
+   savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+
+   if (t) {
+      if (t->bound)
+	 savageTimestampTextures(imesa);
+
+      driDestroyTextureObject(t);
+   }
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, tObj);
+}
+
+
+static struct gl_texture_object *
+savageNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+    struct gl_texture_object *obj;
+    obj = _mesa_new_texture_object(ctx, name, target);
+    savageAllocTexObj( obj );
+
+    return obj;
+}
+
+void savageDDInitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->TexEnv = savageTexEnv;
+   functions->ChooseTextureFormat = savageChooseTextureFormat;
+   functions->TexImage1D = savageTexImage1D;
+   functions->TexSubImage1D = savageTexSubImage1D;
+   functions->TexImage2D = savageTexImage2D;
+   functions->TexSubImage2D = savageTexSubImage2D;
+   functions->CompressedTexImage2D = savageCompressedTexImage2D;
+   functions->CompressedTexSubImage2D = savageCompressedTexSubImage2D;
+   functions->BindTexture = savageBindTexture;
+   functions->NewTextureObject = savageNewTextureObject;
+   functions->DeleteTexture = savageDeleteTexture;
+   functions->IsTextureResident = driIsTextureResident;
+   functions->TexParameter = savageTexParameter;
+
+   /* Texel fetching with our custom texture formats works just like
+    * the standard argb formats. */
+#if 0
+   _savage_texformat_a1114444.FetchTexel1D = _mesa_texformat_argb4444.FetchTexel1D;
+   _savage_texformat_a1114444.FetchTexel2D = _mesa_texformat_argb4444.FetchTexel2D;
+   _savage_texformat_a1114444.FetchTexel3D = _mesa_texformat_argb4444.FetchTexel3D;
+   _savage_texformat_a1114444.FetchTexel1Df= _mesa_texformat_argb4444.FetchTexel1Df;
+   _savage_texformat_a1114444.FetchTexel2Df= _mesa_texformat_argb4444.FetchTexel2Df;
+   _savage_texformat_a1114444.FetchTexel3Df= _mesa_texformat_argb4444.FetchTexel3Df;
+
+   _savage_texformat_a1118888.FetchTexel1D = _mesa_texformat_argb8888.FetchTexel1D;
+   _savage_texformat_a1118888.FetchTexel2D = _mesa_texformat_argb8888.FetchTexel2D;
+   _savage_texformat_a1118888.FetchTexel3D = _mesa_texformat_argb8888.FetchTexel3D;
+   _savage_texformat_a1118888.FetchTexel1Df= _mesa_texformat_argb8888.FetchTexel1Df;
+   _savage_texformat_a1118888.FetchTexel2Df= _mesa_texformat_argb8888.FetchTexel2Df;
+   _savage_texformat_a1118888.FetchTexel3Df= _mesa_texformat_argb8888.FetchTexel3Df;
+#endif
+}
diff --git a/src/mesa/drivers/dri/savage/savagetex.h b/src/mesa/drivers/dri/savage/savagetex.h
new file mode 100644
index 0000000000..e5f8a80f85
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagetex.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef SAVAGETEX_INC
+#define SAVAGETEX_INC
+
+#include "main/mtypes.h"
+
+#include "savagecontext.h"
+#include "texmem.h"
+
+#define SAVAGE_TEX_MAXLEVELS 12
+
+/** \brief Texture tiling information */
+typedef struct savage_tileinfo_t {
+    GLuint width, height;       /**< tile width and height */
+    GLuint wInSub, hInSub;      /**< tile width and height in subtiles */
+    GLuint subWidth, subHeight; /**< subtile width and height */
+    GLuint tinyOffset[2];       /**< internal offsets size 1 and 2 images */
+} savageTileInfo, *savageTileInfoPtr;
+
+typedef struct {
+    GLuint offset;
+    GLuint nTiles;
+    GLuint *dirtyTiles;		/* bit vector of dirty tiles (still unused) */
+} savageTexImage;
+
+typedef struct {
+    driTextureObject base;
+
+    GLubyte *bufAddr;
+
+    GLuint age;
+    savageTexImage image[SAVAGE_TEX_MAXLEVELS];
+    GLuint dirtySubImages;
+
+    struct {
+	GLuint sWrapMode, tWrapMode;
+	GLuint minFilter, magFilter;
+	GLuint physAddr;
+    } setup;
+
+    GLuint hwFormat;
+    GLuint texelBytes;
+    const savageTileInfo *tileInfo;
+} savageTexObj, *savageTexObjPtr;
+
+#define SAVAGE_NO_PALETTE        0x0
+#define SAVAGE_USE_PALETTE       0x1
+#define SAVAGE_UPDATE_PALETTE    0x2
+#define SAVAGE_FALLBACK_PALETTE  0x4
+#define __HWEnvCombineSingleUnitScale(imesa, flag0, flag1, TexBlendCtrl)
+#define __HWParseTexEnvCombine(imesa, flag0, TexCtrl, TexBlendCtrl)
+
+
+void savageUpdateTextureState( GLcontext *ctx );
+void savageDDInitTextureFuncs( struct dd_function_table *functions );
+
+void savageDestroyTexObj( savageContextPtr imesa, savageTexObjPtr t );
+
+#endif
diff --git a/src/mesa/drivers/dri/savage/savagetris.c b/src/mesa/drivers/dri/savage/savagetris.c
new file mode 100644
index 0000000000..0050485e31
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagetris.c
@@ -0,0 +1,1306 @@
+/* -*- c-basic-offset: 3 -*- */
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keithw@valinux.com>
+ *   Felix Kuehling <fxkuehl@gmx.de>
+ *
+ */
+
+#include <stdio.h>
+#include <math.h>
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/macros.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "savagetris.h"
+#include "savagestate.h"
+#include "savagetex.h"
+#include "savageioctl.h"
+
+static void savageRasterPrimitive( GLcontext *ctx, GLuint prim );
+static void savageRenderPrimitive( GLcontext *ctx, GLenum prim );
+
+
+static GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+ 
+/***********************************************************************
+ *                    Emit primitives                                  *
+ ***********************************************************************/
+
+#if defined (USE_X86_ASM)
+#define EMIT_VERT( j, vb, vertex_size, start, v )		\
+do {	int __tmp;						\
+	 vb += start;						\
+	__asm__ __volatile__( "rep ; movsl"			\
+			 : "=%c" (j), "=D" (vb), "=S" (__tmp)	\
+			 : "0" (vertex_size-start),		\
+			   "D" ((long)vb),			\
+			   "S" ((long)&(v)->ui[start]));	\
+} while (0)
+#else
+#define EMIT_VERT( j, vb, vertex_size, start, v )	\
+do {						\
+   for ( j = start ; j < vertex_size ; j++ )	\
+      vb[j] = (v)->ui[j];			\
+   vb += vertex_size;				\
+} while (0)
+#endif
+
+static void INLINE savage_draw_triangle (savageContextPtr imesa,
+					     savageVertexPtr v0,
+					     savageVertexPtr v1,
+					     savageVertexPtr v2) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 3*vertsize);
+   GLuint j;
+
+   EMIT_VERT (j, vb, vertsize, 0, v0);
+   EMIT_VERT (j, vb, vertsize, 0, v1);
+   EMIT_VERT (j, vb, vertsize, 0, v2);
+}
+
+static void INLINE savage_draw_quad (savageContextPtr imesa,
+					 savageVertexPtr v0,
+					 savageVertexPtr v1,
+					 savageVertexPtr v2,
+					 savageVertexPtr v3) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   GLuint j;
+
+   EMIT_VERT (j, vb, vertsize, 0, v0);
+   EMIT_VERT (j, vb, vertsize, 0, v1);
+   EMIT_VERT (j, vb, vertsize, 0, v3);
+   EMIT_VERT (j, vb, vertsize, 0, v1);
+   EMIT_VERT (j, vb, vertsize, 0, v2);
+   EMIT_VERT (j, vb, vertsize, 0, v3);
+}
+
+static INLINE void savage_draw_point (savageContextPtr imesa,
+					  savageVertexPtr tmp) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat x = tmp->v.x;
+   const GLfloat y = tmp->v.y;
+   const GLfloat sz = 0.5 * CLAMP(imesa->glCtx->Point.Size,
+                                  imesa->glCtx->Const.MinPointSize,
+                                  imesa->glCtx->Const.MaxPointSize);
+   GLuint j;
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, tmp);
+}
+
+static INLINE void savage_draw_line (savageContextPtr imesa,
+					 savageVertexPtr v0,
+					 savageVertexPtr v1 ) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat width = CLAMP(imesa->glCtx->Line.Width,
+                               imesa->glCtx->Const.MinLineWidth,
+                               imesa->glCtx->Const.MaxLineWidth);
+   GLfloat dx, dy, ix, iy;
+   GLuint j;
+
+   dx = v0->v.x - v1->v.x;
+   dy = v0->v.y - v1->v.y;
+
+   ix = width * .5; iy = 0;
+   if (dx * dx > dy * dy) {
+      iy = ix; ix = 0;
+   }
+
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, v0);
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, v1);
+
+   *(float *)&vb[0] = v0->v.x + ix;
+   *(float *)&vb[1] = v0->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, v0);
+
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, v0);
+
+   *(float *)&vb[0] = v1->v.x - ix;
+   *(float *)&vb[1] = v1->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, v1);
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, v1);
+} 
+
+/* Fallback drawing functions for the ptex hack. Code duplication
+ * (especially lines and points) isn't beautiful, but I didn't feel
+ * like inventing yet another template. :-/
+ */
+#define PTEX_VERTEX( j, tmp, vertex_size, start, v)	\
+do {							\
+   GLfloat rhw = 1.0 / v->f[vertex_size];		\
+   for ( j = start ; j < vertex_size ; j++ )		\
+      tmp.f[j] = v->f[j];				\
+   tmp.f[3] *= v->f[vertex_size];			\
+   tmp.f[vertex_size-2] *= rhw;				\
+   tmp.f[vertex_size-1] *= rhw;				\
+} while (0)
+
+static void INLINE savage_ptex_tri (savageContextPtr imesa,
+					savageVertexPtr v0,
+					savageVertexPtr v1,
+					savageVertexPtr v2) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 3*vertsize);
+   savageVertex tmp;
+   GLuint j;
+
+   PTEX_VERTEX (j, tmp, vertsize, 0, v0); EMIT_VERT (j, vb, vertsize, 0, &tmp);
+   PTEX_VERTEX (j, tmp, vertsize, 0, v1); EMIT_VERT (j, vb, vertsize, 0, &tmp);
+   PTEX_VERTEX (j, tmp, vertsize, 0, v2); EMIT_VERT (j, vb, vertsize, 0, &tmp);
+}
+
+static INLINE void savage_ptex_line (savageContextPtr imesa,
+					 savageVertexPtr v0,
+					 savageVertexPtr v1 ) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat width = CLAMP(imesa->glCtx->Line.Width,
+                               imesa->glCtx->Const.MinLineWidth,
+                               imesa->glCtx->Const.MaxLineWidth);
+   GLfloat dx, dy, ix, iy;
+   savageVertex tmp0, tmp1;
+   GLuint j;
+
+   PTEX_VERTEX (j, tmp0, vertsize, 2, v0);
+   PTEX_VERTEX (j, tmp1, vertsize, 2, v1);
+
+   dx = v0->v.x - v1->v.x;
+   dy = v0->v.y - v1->v.y;
+
+   ix = width * .5; iy = 0;
+   if (dx * dx > dy * dy) {
+      iy = ix; ix = 0;
+   }
+
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp0);
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp1);
+
+   *(float *)&vb[0] = v0->v.x + ix;
+   *(float *)&vb[1] = v0->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp0);
+
+   *(float *)&vb[0] = v0->v.x - ix;
+   *(float *)&vb[1] = v0->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp0);
+
+   *(float *)&vb[0] = v1->v.x - ix;
+   *(float *)&vb[1] = v1->v.y - iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp1);
+
+   *(float *)&vb[0] = v1->v.x + ix;
+   *(float *)&vb[1] = v1->v.y + iy;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp1);
+} 
+
+static INLINE void savage_ptex_point (savageContextPtr imesa,
+					  savageVertexPtr v0) {
+   GLuint vertsize = imesa->HwVertexSize;
+   uint32_t *vb = savageAllocVtxBuf (imesa, 6*vertsize);
+   const GLfloat x = v0->v.x;
+   const GLfloat y = v0->v.y;
+   const GLfloat sz = 0.5 * CLAMP(imesa->glCtx->Point.Size,
+                                  imesa->glCtx->Const.MinPointSize,
+                                  imesa->glCtx->Const.MaxPointSize);
+   savageVertex tmp;
+   GLuint j;
+
+   PTEX_VERTEX (j, tmp, vertsize, 2, v0);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x + sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y + sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+
+   *(float *)&vb[0] = x - sz;
+   *(float *)&vb[1] = y - sz;
+   EMIT_VERT (j, vb, vertsize, 2, &tmp);
+}
+ 
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do {						\
+   if (DO_FALLBACK)				\
+      imesa->draw_tri( imesa, a, b, c );	\
+   else						\
+      savage_draw_triangle( imesa, a, b, c );	\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do {						\
+   if (DO_FALLBACK) {				\
+      imesa->draw_tri( imesa, a, b, d );	\
+      imesa->draw_tri( imesa, b, c, d );	\
+   } else 					\
+      savage_draw_quad( imesa, a, b, c, d );	\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      imesa->draw_line( imesa, v0, v1 );	\
+   else 					\
+      savage_draw_line( imesa, v0, v1 );	\
+} while (0)
+
+#define POINT( v0 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      imesa->draw_point( imesa, v0 );		\
+   else 					\
+      savage_draw_point( imesa, v0 );		\
+} while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define SAVAGE_OFFSET_BIT	 0x1
+#define SAVAGE_TWOSIDE_BIT       0x2
+#define SAVAGE_UNFILLED_BIT      0x4
+#define SAVAGE_FALLBACK_BIT      0x8
+#define SAVAGE_MAX_TRIFUNC       0x10
+
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[SAVAGE_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & SAVAGE_FALLBACK_BIT)
+#define DO_OFFSET   (IND & SAVAGE_OFFSET_BIT)
+#define DO_UNFILLED (IND & SAVAGE_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & SAVAGE_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX savageVertex
+#define TAB rast_tab
+
+#define DEPTH_SCALE imesa->depth_scale
+#define REVERSE_DEPTH 1
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (imesa->verts + (e * imesa->vertex_size * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )					\
+do {								\
+   savage_color_t *color = (savage_color_t *)&((v)->ub4[coloroffset]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+
+#define VERT_SET_SPEC( v, c )					\
+do {								\
+   if (specoffset) {						\
+      savage_color_t *spec = (savage_color_t *)&((v)->ub4[specoffset]);	\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);		\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);		\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);		\
+   }								\
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )					\
+   if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset])
+#define VERT_SAVE_SPEC( idx )						\
+   if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx )					\
+   if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);		\
+   GLuint color[n] = { 0 };					\
+   GLuint spec[n] = { 0 };					\
+   GLuint coloroffset =						\
+      ((imesa->skip & SAVAGE_SKIP_W) ? 3 : 4);			\
+   GLboolean specoffset =					\
+      ((imesa->skip & SAVAGE_SKIP_C1) ? 0 : coloroffset+1);	\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (imesa->raster_primitive != reduced_prim[x]) \
+                        savageRasterPrimitive( ctx, x )
+#define RENDER_PRIMITIVE imesa->render_primitive
+#define IND SAVAGE_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT|SAVAGE_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_OFFSET_BIT|SAVAGE_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT|SAVAGE_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT|SAVAGE_OFFSET_BIT|SAVAGE_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_OFFSET_BIT|SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT|SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT|SAVAGE_OFFSET_BIT|SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_UNFILLED_BIT|SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_OFFSET_BIT|SAVAGE_UNFILLED_BIT|SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT|SAVAGE_UNFILLED_BIT|SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SAVAGE_TWOSIDE_BIT|SAVAGE_OFFSET_BIT|SAVAGE_UNFILLED_BIT| \
+	     SAVAGE_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+savage_fallback_tri( savageContextPtr imesa,
+		     savageVertexPtr v0,
+		     savageVertexPtr v1,
+		     savageVertexPtr v2 )
+{
+   GLcontext *ctx = imesa->glCtx;
+   SWvertex v[3];
+   FLUSH_BATCH(imesa);
+   WAIT_IDLE_EMPTY(imesa);
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   _swsetup_Translate( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void
+savage_fallback_line( savageContextPtr imesa,
+		      savageVertexPtr v0,
+		      savageVertexPtr v1 )
+{
+   GLcontext *ctx = imesa->glCtx;
+   SWvertex v[2];
+   FLUSH_BATCH(imesa);
+   WAIT_IDLE_EMPTY(imesa);
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void
+savage_fallback_point( savageContextPtr imesa,
+		       savageVertexPtr v0 )
+{
+   GLcontext *ctx = imesa->glCtx;
+   SWvertex v[1];
+   FLUSH_BATCH(imesa);
+   WAIT_IDLE_EMPTY(imesa);
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define VERT(x) (savageVertexPtr)(savageVerts + (x * vertsize * sizeof(int)))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      savage_draw_point( imesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+   savage_draw_line( imesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 )  \
+   savage_draw_triangle( imesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   savage_draw_quad( imesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do {					\
+   if (0) fprintf(stderr, "%s\n", __FUNCTION__);	\
+   savageRenderPrimitive( ctx, x );                     \
+   /*SAVAGE_CONTEXT(ctx)->render_primitive = x;*/       \
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    savageContextPtr imesa = SAVAGE_CONTEXT(ctx);		\
+    const GLuint vertsize = imesa->vertex_size;			\
+    const char *savageVerts = (char *)imesa->verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) savage_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) savage_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*                    Render clipped primitives                       */
+/**********************************************************************/
+
+static void savageRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+				     GLuint n )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /* Render the new vertices as an unclipped polygon.
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+}
+
+static void savageRenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+/*
+static void savageFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts,
+					 GLuint n )
+{
+   r128ContextPtr rmesa = R128_CONTEXT( ctx );
+   GLuint vertsize = rmesa->vertex_size;
+   GLuint *vb = r128AllocDmaLow( rmesa, (n-2) * 3 * 4 * vertsize );
+   GLubyte *r128verts = (GLubyte *)rmesa->verts;
+   const GLuint shift = rmesa->vertex_stride_shift;
+   const GLuint *start = (const GLuint *)VERT(elts[0]);
+   int i,j;
+
+   rmesa->num_verts += (n-2) * 3;
+
+   for (i = 2 ; i < n ; i++) {
+      COPY_DWORDS( j, vb, vertsize, (r128VertexPtr) start );
+      COPY_DWORDS( j, vb, vertsize, (r128VertexPtr) VERT(elts[i-1]) );
+      COPY_DWORDS( j, vb, vertsize, (r128VertexPtr) VERT(elts[i]) );
+   }
+}
+*/
+
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+#define _SAVAGE_NEW_RENDER_STATE (_DD_NEW_LINE_STIPPLE |	\
+			          _DD_NEW_LINE_SMOOTH |		\
+			          _DD_NEW_POINT_SMOOTH |	\
+			          _DD_NEW_TRI_STIPPLE |		\
+			          _DD_NEW_TRI_SMOOTH |		\
+			          _DD_NEW_TRI_UNFILLED |	\
+			          _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			          _DD_NEW_TRI_OFFSET)		\
+
+/* original driver didn't have DD_POINT_SMOOTH. really needed? */
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_STIPPLE|DD_LINE_SMOOTH)
+#define TRI_FALLBACK (DD_TRI_STIPPLE|DD_TRI_SMOOTH)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+
+
+static void savageChooseRenderState(GLcontext *ctx)
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   /* Hook in fallback functions for the ptex hack. Do this first, so
+    * that a real fallback will overwrite them with the respective
+    * savage_fallback_... function.
+    */
+   if (imesa->ptexHack) {
+      /* Do textures make sense with points? */
+      imesa->draw_point = savage_ptex_point;
+      imesa->draw_line = savage_ptex_line;
+      imesa->draw_tri = savage_ptex_tri;
+      index |= SAVAGE_FALLBACK_BIT;
+   } else {
+      imesa->draw_point = savage_draw_point;
+      imesa->draw_line = savage_draw_line;
+      imesa->draw_tri = savage_draw_triangle;
+   }
+
+   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE) index |= SAVAGE_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)        index |= SAVAGE_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)      index |= SAVAGE_UNFILLED_BIT;
+      }
+
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & ANY_FALLBACK_FLAGS) {
+	 if (flags & POINT_FALLBACK) imesa->draw_point = savage_fallback_point;
+	 if (flags & LINE_FALLBACK)  imesa->draw_line = savage_fallback_line;
+	 if (flags & TRI_FALLBACK)   imesa->draw_tri = savage_fallback_tri;
+	 index |= SAVAGE_FALLBACK_BIT;
+	 if (SAVAGE_DEBUG & DEBUG_FALLBACKS) {
+	    fprintf (stderr, "Per-primitive fallback, TriangleCaps=0x%x\n",
+		     ctx->_TriangleCaps);
+	 }
+      }
+   }
+
+   if (index != imesa->RenderIndex) {
+      TNLcontext *tnl = TNL_CONTEXT(ctx);
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = savage_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = savage_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+	 tnl->Driver.Render.ClippedPolygon = savageRenderClippedPoly/*r128FastRenderClippedPoly*/;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = savageRenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = savageRenderClippedPoly;
+      }
+
+      imesa->RenderIndex = index;
+   }
+}
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+static void savageRunPipeline( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+
+   if (imesa->no_rast)
+      FALLBACK(ctx, SAVAGE_FALLBACK_NORAST, GL_TRUE);
+
+   if (imesa->new_state)
+      savageDDUpdateHwState( ctx );
+
+   if (!imesa->Fallback) {
+      if (imesa->new_gl_state & _SAVAGE_NEW_RENDER_STATE)
+	 savageChooseRenderState( ctx );
+
+      /* choose the correct primitive type for tnl rendering */
+      if (imesa->savageScreen->chipset < S3_SAVAGE4 &&
+	  (ctx->_TriangleCaps & DD_FLATSHADE)) {
+	 if (imesa->HwPrim != SAVAGE_PRIM_TRILIST_201)
+	    savageFlushVertices(imesa);
+	 imesa->HwPrim = SAVAGE_PRIM_TRILIST_201;
+      } else {
+	 if (imesa->HwPrim != SAVAGE_PRIM_TRILIST)
+	    savageFlushVertices(imesa);
+	 imesa->HwPrim = SAVAGE_PRIM_TRILIST;
+      }
+
+      imesa->new_gl_state = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+
+   if (imesa->no_rast)
+      FALLBACK(ctx, SAVAGE_FALLBACK_NORAST, GL_FALSE);
+}
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+/* This is called when Mesa switches between rendering triangle
+ * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
+ * and lines, points and bitmaps.
+ *
+ * As the r128 uses triangles to render lines and points, it is
+ * necessary to turn off hardware culling when rendering these
+ * primitives.
+ */
+
+static void savageRasterPrimitive( GLcontext *ctx, GLuint prim )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT( ctx );
+
+   /* Update culling */
+   if (imesa->raster_primitive != prim) {
+      imesa->raster_primitive = prim;
+      imesa->new_state |= SAVAGE_NEW_CULL;
+      savageDDUpdateHwState (ctx);
+   }
+
+#if 0
+   if (ctx->Polygon.StippleFlag && mmesa->haveHwStipple)
+   {
+      mmesa->dirty |= MGA_UPLOAD_CONTEXT;
+      mmesa->setup.dwgctl &= ~(0xf<<20);
+      if (mmesa->raster_primitive == GL_TRIANGLES)
+	 mmesa->setup.dwgctl |= mmesa->poly_stipple;
+   }
+#endif
+}
+
+static void savageRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   GLuint rprim = reduced_prim[prim];
+
+   imesa->render_primitive = prim;
+
+   if (rprim == GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+       
+   if (imesa->raster_primitive != rprim) {
+      savageRasterPrimitive( ctx, rprim );
+   }
+}
+
+/* Check if projective texture coordinates are used and if we can fake
+ * them. Fallback to swrast we can't. Returns GL_TRUE if projective
+ * texture coordinates must be faked, GL_FALSE otherwise.
+ */
+static GLboolean savageCheckPTexHack( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 ) && VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4) {
+      if (!RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_ATTRIB_TEX1, _TNL_LAST_TEX ))
+	 return GL_TRUE; /* apply ptex hack */
+      else
+	 FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE);
+   }
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX1 ) && VB->AttribPtr[_TNL_ATTRIB_TEX1]->size == 4)
+      FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE);
+
+   return GL_FALSE; /* don't apply ptex hack */
+}
+
+
+#define DO_EMIT_ATTR( ATTR, STYLE )					\
+do {									\
+   imesa->vertex_attrs[imesa->vertex_attr_count].attrib = (ATTR);	\
+   imesa->vertex_attrs[imesa->vertex_attr_count].format = (STYLE);	\
+   imesa->vertex_attr_count++;						\
+} while (0)
+
+#define NEED_ATTR( INDEX, SKIP )					\
+do {									\
+   setupIndex |= (INDEX);						\
+   skip &= ~(SKIP);							\
+} while (0)
+
+#define EMIT_ATTR( ATTR, STYLE, INDEX, SKIP )				\
+do {									\
+   NEED_ATTR( INDEX, SKIP );						\
+   DO_EMIT_ATTR( ATTR, STYLE );						\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   imesa->vertex_attrs[imesa->vertex_attr_count].attrib = 0;		\
+   imesa->vertex_attrs[imesa->vertex_attr_count].format = EMIT_PAD;	\
+   imesa->vertex_attrs[imesa->vertex_attr_count].offset = (N);		\
+   imesa->vertex_attr_count++;						\
+} while (0)
+
+#define SAVAGE_EMIT_XYZ  0x0001
+#define SAVAGE_EMIT_W    0x0002
+#define SAVAGE_EMIT_C0   0x0004
+#define SAVAGE_EMIT_C1   0x0008
+#define SAVAGE_EMIT_FOG  0x0010
+#define SAVAGE_EMIT_S0   0x0020
+#define SAVAGE_EMIT_T0   0x0040
+#define SAVAGE_EMIT_Q0   0x0080
+#define SAVAGE_EMIT_ST0  0x0060
+#define SAVAGE_EMIT_STQ0 0x00e0
+#define SAVAGE_EMIT_S1   0x0100
+#define SAVAGE_EMIT_T1   0x0200
+#define SAVAGE_EMIT_ST1  0x0300
+
+
+static INLINE GLuint savageChooseVertexFormat_s3d( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint setupIndex = SAVAGE_EMIT_XYZ;
+   GLubyte skip;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+   imesa->vertex_attr_count = 0;
+
+   skip = SAVAGE_SKIP_ALL_S3D;
+   skip &= ~SAVAGE_SKIP_Z; /* all mesa vertices have a z coordinate */
+
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) || !(ctx->_TriangleCaps & DD_FLATSHADE))
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, SAVAGE_EMIT_W, SAVAGE_SKIP_W );
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, 0, 0 );
+      EMIT_PAD( 4 );
+      skip &= ~SAVAGE_SKIP_W;
+   }
+
+   /* t_context.c always includes a diffuse color */
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, SAVAGE_EMIT_C0, SAVAGE_SKIP_C0 );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ))
+      EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, SAVAGE_EMIT_C1, SAVAGE_SKIP_C1 );
+   else
+      EMIT_PAD( 3 );
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+      EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, SAVAGE_EMIT_FOG, SAVAGE_SKIP_C1 );
+   else
+      EMIT_PAD( 1 );
+   skip &= ~SAVAGE_SKIP_C1;
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 )) {
+      if (imesa->ptexHack)
+	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_3F_XYW, SAVAGE_EMIT_STQ0, SAVAGE_SKIP_ST0);
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4)
+	 assert (0); /* should be caught by savageCheckPTexHack */
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size >= 2)
+	 /* The chromium menu emits some 3D tex coords even though no
+	  * 3D texture is enabled. Ignore the 3rd coordinate. */
+	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, SAVAGE_EMIT_ST0, SAVAGE_SKIP_ST0 );
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 1) {
+	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_1F, SAVAGE_EMIT_S0, SAVAGE_SKIP_S0 );
+	 EMIT_PAD( 4 );
+      } else
+	 EMIT_PAD( 8 );
+   } else
+      EMIT_PAD( 8 );
+   skip &= ~SAVAGE_SKIP_ST0;
+
+   assert (skip == 0);
+   imesa->skip = skip;
+   return setupIndex;
+}
+
+
+static INLINE GLuint savageChooseVertexFormat_s4( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint setupIndex = SAVAGE_EMIT_XYZ;
+   GLubyte skip;
+   GLuint size, mask;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+   skip = SAVAGE_SKIP_ALL_S4;
+   skip &= ~SAVAGE_SKIP_Z; /* all mesa vertices have a z coordinate */
+
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) || !(ctx->_TriangleCaps & DD_FLATSHADE))
+      NEED_ATTR( SAVAGE_EMIT_W, SAVAGE_SKIP_W );
+
+   /* t_context.c always includes a diffuse color */
+   NEED_ATTR( SAVAGE_EMIT_C0, SAVAGE_SKIP_C0 );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ))
+      NEED_ATTR( SAVAGE_EMIT_C1, SAVAGE_SKIP_C1 );
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+      NEED_ATTR( SAVAGE_EMIT_FOG, SAVAGE_SKIP_C1 );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 )) {
+      if (imesa->ptexHack)
+	 NEED_ATTR( SAVAGE_EMIT_STQ0, SAVAGE_SKIP_ST0);
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4)
+	 assert (0); /* should be caught by savageCheckPTexHack */
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size >= 2)
+	 /* The chromium menu emits some 3D tex coords even though no
+	  * 3D texture is enabled. Ignore the 3rd coordinate. */
+	 NEED_ATTR( SAVAGE_EMIT_ST0, SAVAGE_SKIP_ST0 );
+      else
+	 NEED_ATTR( SAVAGE_EMIT_S0, SAVAGE_SKIP_S0 );
+   }
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX1 )) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX1]->size == 4)
+	 /* projective textures are not supported by the hardware */
+	 assert (0); /* should be caught by savageCheckPTexHack */
+      else if (VB->AttribPtr[_TNL_ATTRIB_TEX1]->size >= 2)
+	 NEED_ATTR( SAVAGE_EMIT_ST1, SAVAGE_SKIP_ST1 );
+      else
+	 NEED_ATTR( SAVAGE_EMIT_S1, SAVAGE_SKIP_S1 );
+   }
+
+   /* if nothing changed we can skip the rest */
+   if (setupIndex == imesa->SetupIndex && imesa->vertex_size != 0)
+      return setupIndex;
+
+   if (imesa->enable_vdma) {
+      mask = SAVAGE_SKIP_W;
+      size = 10 - (skip & 1) - (skip >> 1 & 1) -
+	 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
+	 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
+
+      while (size < 8) {
+	 if (skip & mask) {
+	    skip &= ~mask;
+	    size++;
+	 }
+	 mask <<= 1;
+      }
+   }
+
+   imesa->vertex_attr_count = 0;
+
+   if (skip & SAVAGE_SKIP_W)
+      DO_EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT );
+   else if (setupIndex & SAVAGE_EMIT_W)
+      DO_EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT );
+   else {
+      DO_EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT );
+      EMIT_PAD( 4 );
+   }
+
+   DO_EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA );
+
+   if (!(skip & SAVAGE_SKIP_C1)) {
+      if (!(setupIndex & (SAVAGE_EMIT_C1|SAVAGE_EMIT_FOG)))
+	 EMIT_PAD( 4 );
+      else {
+	 if (setupIndex & SAVAGE_EMIT_C1)
+	    DO_EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR );
+	 else
+	    EMIT_PAD( 3 );
+	 if (setupIndex & SAVAGE_EMIT_FOG)
+	    DO_EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F );
+	 else
+	    EMIT_PAD( 1 );
+      }
+   }
+
+   if ((skip & SAVAGE_SKIP_ST0) != SAVAGE_SKIP_ST0) {
+      if ((setupIndex & SAVAGE_EMIT_STQ0) == SAVAGE_EMIT_STQ0)
+	 DO_EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_3F_XYW );
+      else if ((setupIndex & SAVAGE_EMIT_ST0) == SAVAGE_EMIT_ST0)
+	 DO_EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F );
+      else if ((setupIndex & SAVAGE_EMIT_ST0) == SAVAGE_EMIT_S0) {
+	 DO_EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_1F );
+	 if (!(skip & SAVAGE_SKIP_T0)) EMIT_PAD( 4 );
+      } else {
+	 if (!(skip & SAVAGE_SKIP_S0)) EMIT_PAD( 4 );
+	 if (!(skip & SAVAGE_SKIP_T0)) EMIT_PAD( 4 );
+      }
+   }
+
+   if ((skip & SAVAGE_SKIP_ST1) != SAVAGE_SKIP_ST1) {
+      if ((setupIndex & SAVAGE_EMIT_ST1) == SAVAGE_EMIT_ST1)
+	 DO_EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F );
+      else if ((setupIndex & SAVAGE_EMIT_ST1) == SAVAGE_EMIT_S1) {
+	 DO_EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_1F );
+	 if (!(skip & SAVAGE_SKIP_T1)) EMIT_PAD( 4 );
+      } else {
+	 if (!(skip & SAVAGE_SKIP_S1)) EMIT_PAD( 4 );
+	 if (!(skip & SAVAGE_SKIP_T1)) EMIT_PAD( 4 );
+      }
+   }
+
+   imesa->skip = skip;
+   return setupIndex;
+}
+
+
+static void savageRenderStart( GLcontext *ctx )
+{
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint setupIndex = SAVAGE_EMIT_XYZ;
+   GLboolean ptexHack;
+
+   /* Check if we need to apply the ptex hack. Choose a new render
+    * state if necessary. (Note: this can't be done in
+    * savageRunPipeline, since the number of vertex coordinates can
+    * change in the pipeline. texmat or texgen or both?) */
+   ptexHack = savageCheckPTexHack( ctx );
+   if (ptexHack != imesa->ptexHack) {
+      imesa->ptexHack = ptexHack;
+      savageChooseRenderState (ctx);
+   }
+   /* Handle fallback cases identified in savageCheckPTexHack. */
+   if (SAVAGE_CONTEXT(ctx)->Fallback) {
+      tnl->Driver.Render.Start(ctx);
+      return;
+   }
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+ 
+   if (imesa->savageScreen->chipset < S3_SAVAGE4) {
+      setupIndex = savageChooseVertexFormat_s3d(ctx);
+   } else {
+      setupIndex = savageChooseVertexFormat_s4(ctx);
+   }
+
+   /* Need to change the vertex emit code if the SetupIndex changed or
+    * is set for the first time (indicated by vertex_size == 0). */
+   if (setupIndex != imesa->SetupIndex || imesa->vertex_size == 0) {
+      GLuint hwVertexSize;
+      imesa->vertex_size =
+	 _tnl_install_attrs( ctx, 
+			     imesa->vertex_attrs, 
+			     imesa->vertex_attr_count,
+			     imesa->hw_viewport, 0 );
+      imesa->vertex_size >>= 2;
+      imesa->SetupIndex = setupIndex;
+
+      hwVertexSize = imesa->vertex_size;
+      if (setupIndex & SAVAGE_EMIT_Q0) {
+	 /* The vertex setup code emits homogenous texture
+	  * coordinates. They are converted to normal 2D coords by
+	  * savage_ptex_tri/line/point. Now we have two different
+	  * vertex sizes. Functions that emit vertices to the hardware
+	  * need to use HwVertexSize, anything that manipulates the
+	  * vertices generated by t_vertex uses vertex_size. */
+	 hwVertexSize--;
+	 assert (imesa->ptexHack);
+      } else
+	 assert (!imesa->ptexHack);
+
+      if (hwVertexSize != imesa->HwVertexSize) {
+	 /* Changing the vertex size: flush vertex and command buffer and
+	  * discard the DMA buffer, if we were using one. */
+	 savageFlushVertices(imesa);
+	 savageFlushCmdBuf(imesa, GL_TRUE);
+	 if (hwVertexSize == 8 && imesa->enable_vdma) {
+	    if (SAVAGE_DEBUG & DEBUG_DMA)
+	       fprintf (stderr, "Using DMA, skip=0x%02x\n", imesa->skip);
+	    /* we can use vertex dma */
+	    imesa->vtxBuf = &imesa->dmaVtxBuf;
+	 } else {
+	    if (SAVAGE_DEBUG & DEBUG_DMA)
+	       fprintf (stderr, "Not using DMA, skip=0x%02x\n", imesa->skip);
+	    imesa->vtxBuf = &imesa->clientVtxBuf;
+	 }
+	 imesa->HwVertexSize = hwVertexSize;
+      }
+   }
+}
+
+static void savageRenderFinish( GLcontext *ctx )
+{
+   /* Flush the last primitive now, before any state is changed. */
+   savageFlushVertices(SAVAGE_CONTEXT(ctx));
+
+   if (SAVAGE_CONTEXT(ctx)->RenderIndex & SAVAGE_FALLBACK_BIT)
+      _swrast_flush( ctx );
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "Draw buffer",
+   "Read buffer",
+   "Color mask",
+   "Specular",
+   "LogicOp",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "glRenderMode(selection or feedback)",
+   "glBlendEquation",
+   "Hardware rasterization disabled",
+   "Projective texture",
+};
+
+void savageFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   savageContextPtr imesa = SAVAGE_CONTEXT(ctx);
+   GLuint oldfallback = imesa->Fallback;
+   GLuint index;
+   for (index = 0; (1 << index) < bit; ++index);
+
+   if (mode) {
+      imesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 /* the first fallback */
+	 _swsetup_Wakeup( ctx );
+	 imesa->RenderIndex = ~0;
+      }
+      if (!(oldfallback & bit) && (SAVAGE_DEBUG & DEBUG_FALLBACKS))
+	 fprintf (stderr, "Savage begin fallback: 0x%x %s\n",
+		  bit, fallbackStrings[index]);
+   }
+   else {
+      imesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 /* the last fallback */
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = savageRenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = savageRenderPrimitive;
+	 tnl->Driver.Render.Finish = savageRenderFinish;
+
+	 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	 tnl->Driver.Render.Interp = _tnl_interp;
+
+	 _tnl_invalidate_vertex_state( ctx, ~0 );
+	 _tnl_invalidate_vertices( ctx, ~0 );
+	 _tnl_install_attrs( ctx, 
+			     imesa->vertex_attrs, 
+			     imesa->vertex_attr_count,
+			     imesa->hw_viewport, 0 ); 
+
+	 imesa->new_gl_state |= _SAVAGE_NEW_RENDER_STATE;
+      }
+      if ((oldfallback & bit) && (SAVAGE_DEBUG & DEBUG_FALLBACKS))
+	 fprintf (stderr, "Savage end fallback: 0x%x %s\n",
+		  bit, fallbackStrings[index]);
+   }
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void savageInitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   tnl->Driver.RunPipeline = savageRunPipeline;
+   tnl->Driver.Render.Start = savageRenderStart;
+   tnl->Driver.Render.Finish = savageRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = savageRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+		       (6 + 2*ctx->Const.MaxTextureUnits) * sizeof(GLfloat) );
+   
+   SAVAGE_CONTEXT(ctx)->verts = (char *)tnl->clipspace.vertex_buf;
+}
diff --git a/src/mesa/drivers/dri/savage/savagetris.h b/src/mesa/drivers/dri/savage/savagetris.h
new file mode 100644
index 0000000000..a2a9375ed5
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/savagetris.h
@@ -0,0 +1,48 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+                     VA Linux Systems Inc., Fremont, California.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell <keithw@valinux.com>
+ *   Felix Kuehling <fxkuehl@gmx.de>
+ *
+ */
+
+#ifndef __R128_TRIS_H__
+#define __R128_TRIS_H__
+
+#include "main/mtypes.h"
+
+extern void savageInitTriFuncs( GLcontext *ctx );
+
+
+extern void savageFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( ctx, bit, mode ) savageFallback( ctx, bit, mode )
+
+
+#endif /* __R128_TRIS_H__ */
diff --git a/src/mesa/drivers/dri/savage/server/savage_dri.h b/src/mesa/drivers/dri/savage/server/savage_dri.h
new file mode 100644
index 0000000000..214d9851af
--- /dev/null
+++ b/src/mesa/drivers/dri/savage/server/savage_dri.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef __SAVAGE_DRI_H__
+#define __SAVAGE_DRI_H__
+
+#include "drm.h"
+
+typedef struct {
+   int chipset;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+   int zpp;
+
+   int agpMode; /* 0 for PCI cards */
+
+   unsigned int sarea_priv_offset;
+
+   unsigned int bufferSize; /* size of DMA buffers */
+   
+   unsigned int frontbufferSize;
+   unsigned int frontOffset;
+
+   unsigned int backbufferSize;
+   unsigned int backOffset;
+
+   unsigned int depthbufferSize;
+   unsigned int depthOffset;
+
+   unsigned int textureOffset;
+   unsigned int textureSize;
+   int logTextureGranularity;
+
+   /* Linear aperture */
+   drm_handle_t apertureHandle;
+   unsigned int apertureSize;
+   unsigned int aperturePitch;    /* in byte */
+
+   /* Status page (probably not needed, but no harm, read-only) */
+   drm_handle_t statusHandle;
+   unsigned int statusSize;
+
+   /* AGP textures */
+   drm_handle_t agpTextureHandle;
+   unsigned int agpTextureSize;
+   int logAgpTextureGranularity;
+
+   /* Not sure about this one */
+   drm_handle_t xvmcSurfHandle; /* ? */
+} SAVAGEDRIRec, *SAVAGEDRIPtr;
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/Makefile b/src/mesa/drivers/dri/sis/Makefile
new file mode 100644
index 0000000000..6b4f938bab
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/Makefile
@@ -0,0 +1,32 @@
+# src/mesa/drivers/dri/sis/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = sis_dri.so
+
+DRIVER_SOURCES = \
+	sis6326_state.c \
+	sis6326_clear.c \
+	sis_alloc.c \
+	sis_clear.c \
+	sis_context.c \
+	sis_dd.c \
+	sis_fog.c \
+	sis_lock.c \
+	sis_screen.c \
+	sis_span.c \
+	sis_state.c \
+	sis_stencil.c \
+	sis_tex.c \
+	sis_texstate.c \
+	sis_tris.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/sis/server/sis_common.h b/src/mesa/drivers/dri/sis/server/sis_common.h
new file mode 100644
index 0000000000..bd9bab846f
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/server/sis_common.h
@@ -0,0 +1,62 @@
+/*
+ * Common header definitions for SiS 2D/3D/DRM suite
+ *
+ * Copyright (C) 2003 Eric Anholt
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of the copyright holder not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  The copyright holder makes no representations
+ * about the suitability of this software for any purpose.  It is provided
+ * "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDER DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Author:
+ *   	Eric Anholt <anholt@FreeBSD.org>
+ *
+ */
+
+#ifndef _SIS_COMMON_H_
+#define _SIS_COMMON_H_
+
+#define DRM_SIS_FB_ALLOC	0x04
+#define DRM_SIS_FB_FREE		0x05
+#define DRM_SIS_FLIP		0x08
+#define DRM_SIS_FLIP_INIT	0x09
+#define DRM_SIS_FLIP_FINAL	0x10
+#define DRM_SIS_AGP_INIT	0x13
+#define DRM_SIS_AGP_ALLOC	0x14
+#define DRM_SIS_AGP_FREE	0x15
+#define DRM_SIS_FB_INIT		0x16
+
+typedef struct {
+  	int context;
+  	unsigned long offset;
+  	unsigned long size;
+  	void *free;
+} drm_sis_mem_t;
+
+typedef struct {
+  	unsigned long offset, size;
+} drm_sis_agp_t;
+
+typedef struct {
+  	unsigned long offset, size;
+} drm_sis_fb_t;
+
+typedef struct {
+  	unsigned int left, right;
+} drm_sis_flip_t;
+
+#endif /* _SIS_COMMON_H_ */
+
diff --git a/src/mesa/drivers/dri/sis/server/sis_dri.h b/src/mesa/drivers/dri/sis/server/sis_dri.h
new file mode 100644
index 0000000000..f0171f3c0f
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/server/sis_dri.h
@@ -0,0 +1,84 @@
+
+/* modified from tdfx_dri.h */
+
+#ifndef _SIS_DRI_
+#define _SIS_DRI_
+
+#include "xf86drm.h"
+#include "drm.h"
+
+#define SIS_MAX_DRAWABLES 256
+#define SISIOMAPSIZE (64*1024)
+
+typedef struct {
+  int CtxOwner;
+  int QueueLength;
+  unsigned int AGPCmdBufNext;
+  unsigned int FrameCount;
+#ifdef SIS315DRI
+  /* For 315 series */
+  unsigned long sharedWPoffset;
+#endif
+#if 0
+  unsigned char *AGPCmdBufBase;
+  unsigned long AGPCmdBufAddr;
+  unsigned long AGPCmdBufOffset;
+  unsigned int  AGPCmdBufSize;
+  unsigned long AGPCmdBufNext;
+#endif
+} SISSAREAPriv, *SISSAREAPrivPtr;
+
+#define AGPVtxBufNext AGPCmdBufNext
+
+#define SIS_FRONT 0
+#define SIS_BACK 1
+#define SIS_DEPTH 2
+
+typedef struct {
+  drm_handle_t handle;
+  drmSize size;
+} sisRegion, *sisRegionPtr;
+
+typedef struct {
+  sisRegion regs, agp;
+  int deviceID;
+  int width;
+  int height;
+  int mem;				/* unused in Mesa 3 DRI */
+  int bytesPerPixel;
+  int priv1;				/* unused in Mesa 3 DRI */
+  int priv2;				/* unused in Mesa 3 DRI */
+  int fbOffset;				/* unused in Mesa 3 DRI */
+  int backOffset;			/* unused in Mesa 3 DRI */
+  int depthOffset;			/* unused in Mesa 3 DRI */
+  int textureOffset;			/* unused in Mesa 3 DRI */
+  int textureSize;			/* unused in Mesa 3 DRI */
+  unsigned int AGPCmdBufOffset;
+  unsigned int AGPCmdBufSize;
+  int irqEnabled;			/* unused in Mesa 3 DRI */
+  unsigned int scrnX, scrnY;		/* unused in Mesa 3 DRI */
+} SISDRIRec, *SISDRIPtr;
+
+#define AGPVtxBufOffset AGPCmdBufOffset
+#define AGPVtxBufSize AGPCmdBufSize
+
+typedef struct {
+  /* Nothing here yet */
+  int dummy;
+} SISConfigPrivRec, *SISConfigPrivPtr;
+
+typedef struct {
+  /* Nothing here yet */
+  int dummy;
+} SISDRIContextRec, *SISDRIContextPtr;
+
+#ifdef XFree86Server
+
+#include "screenint.h"
+
+Bool SISDRIScreenInit(ScreenPtr pScreen);
+void SISDRICloseScreen(ScreenPtr pScreen);
+Bool SISDRIFinishScreenInit(ScreenPtr pScreen);
+
+#endif
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis6326_clear.c b/src/mesa/drivers/dri/sis/sis6326_clear.c
new file mode 100644
index 0000000000..d46ecc9cd2
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis6326_clear.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2005 Eric Anholt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ *
+ */
+
+#include "sis_context.h"
+#include "sis_state.h"
+#include "sis_lock.h"
+#include "sis_reg.h"
+
+#include "swrast/swrast.h"
+#include "main/macros.h"
+
+static void sis_clear_front_buffer(GLcontext *ctx, GLenum mask, GLint x,
+				   GLint y, GLint width, GLint height);
+static void sis_clear_back_buffer(GLcontext *ctx, GLenum mask, GLint x,
+				  GLint y, GLint width, GLint height);
+static void sis_clear_z_buffer(GLcontext * ctx, GLbitfield mask, GLint x,
+			       GLint y, GLint width, GLint height );
+
+static void
+set_color_pattern( sisContextPtr smesa, GLubyte red, GLubyte green,
+		   GLubyte blue, GLubyte alpha )
+{
+   /* XXX only RGB565 and ARGB8888 */
+   switch (smesa->colorFormat)
+   {
+   case DST_FORMAT_ARGB_8888:
+      smesa->clearColorPattern = (alpha << 24) +
+	 (red << 16) + (green << 8) + (blue);
+      break;
+   case DST_FORMAT_RGB_565:
+      smesa->clearColorPattern = ((red >> 3) << 11) +
+	 ((green >> 2) << 5) + (blue >> 3);
+      smesa->clearColorPattern |= smesa->clearColorPattern << 16;
+      break;
+   default:
+      sis_fatal_error("Bad dst color format\n");
+   }
+}
+
+void
+sis6326UpdateZPattern(sisContextPtr smesa, GLclampd z)
+{
+   CLAMPED_FLOAT_TO_USHORT(smesa->clearZStencilPattern, z * 65535.0);
+}
+
+void
+sis6326DDClear(GLcontext *ctx, GLbitfield mask)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLint x1, y1, width1, height1;
+
+   /* get region after locking: */
+   x1 = ctx->DrawBuffer->_Xmin;
+   y1 = ctx->DrawBuffer->_Ymin;
+   width1 = ctx->DrawBuffer->_Xmax - x1;
+   height1 = ctx->DrawBuffer->_Ymax - y1;
+   y1 = Y_FLIP(y1 + height1 - 1);
+
+   /* XXX: Scissoring */
+   
+   fprintf(stderr, "Clear\n");
+
+   /* Mask out any non-existent buffers */
+   if (smesa->depth.offset == 0 || !ctx->Depth.Mask)
+      mask &= ~BUFFER_BIT_DEPTH;
+
+   LOCK_HARDWARE();
+
+   if (mask & BUFFER_BIT_FRONT_LEFT) {
+      sis_clear_front_buffer(ctx, mask, x1, y1, width1, height1);
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if (mask & BUFFER_BIT_BACK_LEFT) {
+      sis_clear_back_buffer(ctx, mask, x1, y1, width1, height1);
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if (mask & BUFFER_BIT_DEPTH) {
+      sis_clear_z_buffer(ctx, mask, x1, y1, width1, height1);
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+
+   UNLOCK_HARDWARE();
+
+   if (mask != 0)
+      _swrast_Clear(ctx, mask);
+}
+
+
+void
+sis6326DDClearColor(GLcontext *ctx, const GLfloat color[4])
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLubyte c[4];
+
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+   set_color_pattern( smesa, c[0], c[1], c[2], c[3] );
+}
+
+void
+sis6326DDClearDepth(GLcontext *ctx, GLclampd d)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   sis6326UpdateZPattern(smesa, d);
+}
+
+static void
+sis_clear_back_buffer(GLcontext *ctx, GLenum mask, GLint x, GLint y,
+		      GLint width, GLint height)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   
+   /* XXX: The order of writing these registers seems to matter, while
+    * it actually shouldn't.
+    */
+   mWait3DCmdQueue(6);
+   MMIO(REG_6326_BitBlt_DstSrcPitch, smesa->back.pitch << 16);
+   MMIO(REG_6326_BitBlt_fgColor, SiS_ROP_PATCOPY |
+	smesa->clearColorPattern);
+   MMIO(REG_6326_BitBlt_bgColor, SiS_ROP_PATCOPY |
+	smesa->clearColorPattern);
+   MMIO(REG_6326_BitBlt_DstAddr, smesa->back.offset +
+	(y+height) * smesa->back.pitch +
+	(x+width) * smesa->bytesPerPixel);
+   MMIO(REG_6326_BitBlt_HeightWidth, ((height-1) << 16) |
+	(width * smesa->bytesPerPixel));
+   MMIO_WMB();
+   MMIO(REG_6326_BitBlt_Cmd, BLT_PAT_BG);
+}
+
+static void
+sis_clear_front_buffer(GLcontext *ctx, GLenum mask, GLint x, GLint y,
+		       GLint width, GLint height)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   int count;
+   drm_clip_rect_t *pExtents = NULL;
+   
+   pExtents = smesa->driDrawable->pClipRects;
+   count = smesa->driDrawable->numClipRects;
+
+   mWait3DCmdQueue(3);
+   MMIO(REG_6326_BitBlt_DstSrcPitch, smesa->front.pitch << 16);
+   MMIO(REG_6326_BitBlt_fgColor, SiS_ROP_PATCOPY |
+       smesa->clearColorPattern);
+   MMIO(REG_6326_BitBlt_bgColor, SiS_ROP_PATCOPY |
+       smesa->clearColorPattern);
+
+   while (count--) {
+      GLint x1 = pExtents->x1 - smesa->driDrawable->x;
+      GLint y1 = pExtents->y1 - smesa->driDrawable->y;
+      GLint x2 = pExtents->x2 - smesa->driDrawable->x;
+      GLint y2 = pExtents->y2 - smesa->driDrawable->y;
+
+      if (x > x1)
+	 x1 = x;
+      if (y > y1)
+	 y1 = y;
+
+      if (x + width < x2)
+	 x2 = x + width;
+      if (y + height < y2)
+	 y2 = y + height;
+      width = x2 - x1;
+      height = y2 - y1;
+
+      pExtents++;
+
+      if (width <= 0 || height <= 0)
+	 continue;
+
+      mWait3DCmdQueue(3);
+      MMIO(REG_6326_BitBlt_DstAddr, smesa->front.offset +
+	   (y2-1) * smesa->front.pitch + x2 * smesa->bytesPerPixel);
+      MMIO(REG_6326_BitBlt_HeightWidth, ((height-1) << 16) |
+	   (width * smesa->bytesPerPixel));
+      MMIO_WMB();
+      MMIO(REG_6326_BitBlt_Cmd, BLT_PAT_BG);
+   }
+}
+
+static void
+sis_clear_z_buffer(GLcontext * ctx, GLbitfield mask, GLint x, GLint y,
+		   GLint width, GLint height)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   mWait3DCmdQueue(6);
+   MMIO(REG_6326_BitBlt_DstAddr,
+	smesa->depth.offset + y * smesa->depth.pitch + x * 2);
+   MMIO(REG_6326_BitBlt_DstSrcPitch, smesa->depth.pitch << 16);
+   MMIO(REG_6326_BitBlt_HeightWidth, ((height-1) << 16) | (width * 2));
+   MMIO(REG_6326_BitBlt_fgColor, SiS_ROP_PATCOPY | smesa->clearZStencilPattern);
+   MMIO(REG_6326_BitBlt_bgColor, SiS_ROP_PATCOPY | smesa->clearZStencilPattern);
+   MMIO_WMB();
+   MMIO(REG_6326_BitBlt_Cmd, BLT_PAT_BG | BLT_XINC | BLT_YINC);
+}
+
diff --git a/src/mesa/drivers/dri/sis/sis6326_reg.h b/src/mesa/drivers/dri/sis/sis6326_reg.h
new file mode 100644
index 0000000000..8e645f0799
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis6326_reg.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright 2005 Eric Anholt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ *
+ */
+
+#ifndef _sis6326_reg_h_
+#define _sis6326_reg_h_
+
+#define REG_6326_BitBlt_SrcAddr		0x8280
+#define REG_6326_BitBlt_DstAddr		0x8284
+#define REG_6326_BitBlt_DstSrcPitch	0x8288
+#define REG_6326_BitBlt_HeightWidth	0x828c
+#define REG_6326_BitBlt_fgColor		0x8290
+#define REG_6326_BitBlt_bgColor		0x8294
+#define REG_6326_BitBlt_Mask30		0x8298
+#define REG_6326_BitBlt_Mask74		0x829c
+#define REG_6326_BitBlt_ClipTopLeft	0x82a0
+#define REG_6326_BitBlt_ClitBottomRight 0x82a4
+#define REG_6326_BitBlt_Cmd		0x82a8
+#define REG_6326_BitBlt_Pat		0x82ac
+
+#define REG_6326_3D_TSFSa		0x8800
+#define REG_6326_3D_TSZa		0x8804
+#define REG_6326_3D_TSXa		0x8808
+#define REG_6326_3D_TSYa		0x880C
+#define REG_6326_3D_TSARGBa		0x8810
+#define REG_6326_3D_TSUa		0x8814
+#define REG_6326_3D_TSVa		0x8818
+#define REG_6326_3D_TSWa		0x881C
+
+#define REG_6326_3D_TSFSb		0x8820
+#define REG_6326_3D_TSZb		0x8824
+#define REG_6326_3D_TSXb		0x8828
+#define REG_6326_3D_TSYb		0x882C
+#define REG_6326_3D_TSARGBb		0x8830
+#define REG_6326_3D_TSUb		0x8834
+#define REG_6326_3D_TSVb		0x8838
+#define REG_6326_3D_TSWb		0x883C
+
+#define REG_6326_3D_TSFSc		0x8840
+#define REG_6326_3D_TSZc		0x8844
+#define REG_6326_3D_TSXc		0x8848
+#define REG_6326_3D_TSYc		0x884C
+#define REG_6326_3D_TSARGBc		0x8850
+#define REG_6326_3D_TSUc		0x8854
+#define REG_6326_3D_TSVc		0x8858
+#define REG_6326_3D_TSWc		0x885C
+
+#define REG_6326_3D_TEnable		0x8A00
+#define REG_6326_3D_ZSet		0x8A04
+#define REG_6326_3D_ZAddress		0x8A08
+
+#define REG_6326_3D_AlphaSet		0x8A0C
+#define REG_6326_3D_AlphaAddress	0x8A10
+#define REG_6326_3D_DstSet		0x8A14
+#define REG_6326_3D_DstAddress		0x8A18
+#define REG_6326_3D_LinePattern		0x8A1C
+#define REG_6326_3D_FogSet		0x8A20
+
+#define REG_6326_3D_DstSrcBlendMode	0x8A28
+
+#define REG_6326_3D_ClipTopBottom	0x8A30
+#define REG_6326_3D_ClipLeftRight	0x8A34
+
+#define REG_6326_3D_TextureSet		0x8A38
+#define REG_6326_3D_TextureBlendSet	0x8A3C
+/* Low transparency value is in TextureBlendSet */
+#define REG_6326_3D_TextureTransparencyColorHigh	0x8A40
+
+#define REG_6326_3D_TextureAddress0	0x8A44
+#define REG_6326_3D_TextureAddress1	0x8A48
+#define REG_6326_3D_TextureAddress2	0x8A4C
+#define REG_6326_3D_TextureAddress3	0x8A50
+#define REG_6326_3D_TextureAddress4	0x8A54
+#define REG_6326_3D_TextureAddress5	0x8A58
+#define REG_6326_3D_TextureAddress6	0x8A5C
+#define REG_6326_3D_TextureAddress7	0x8A60
+#define REG_6326_3D_TextureAddress8	0x8A64
+#define REG_6326_3D_TextureAddress9	0x8A68
+
+#define REG_6326_3D_TexturePitch01	0x8A6C
+#define REG_6326_3D_TexturePitch23	0x8A70
+#define REG_6326_3D_TexturePitch45	0x8A74
+#define REG_6326_3D_TexturePitch67	0x8A78
+#define REG_6326_3D_TexturePitch89	0x8A7C
+
+#define REG_6326_3D_TextureWidthHeight	0x8A80
+#define REG_6326_3D_TextureBorderColor	0x8A90
+
+#define REG_6326_3D_EndPrimitiveList	0x8Aff
+
+/*
+ * REG_6326_BitBlt_fgColor		(0x8290-0x8293)
+ * REG_6326_BitBlt_bgColor		(0x8294-0x8297)
+ */
+#define MASK_BltRop			0xff000000
+#define MASK_BltColor			0x00ffffff
+
+#define SiS_ROP_SRCCOPY			0xcc000000
+#define SiS_ROP_PATCOPY			0xf0000000
+
+/*
+ * REG_6326_BitBlt_Cmd			(0x82a8-0x82ab)
+ */
+#define MASK_QueueStatus		0x0000ffff
+#define MASK_BltCmd0			0x00ff0000
+#define MASK_BltCmd1			0xff000000
+
+#define BLT_SRC_BG			0x00000000
+#define BLT_SRC_FG			0x00010000
+#define BLT_SRC_VID			0x00020000
+#define BLT_SRC_CPU			0x00030000
+#define BLT_PAT_BG			0x00000000
+#define BLT_PAT_FG			0x00040000
+#define BLT_PAT_PAT			0x000b0000
+#define BLT_XINC			0x00100000
+#define BLT_YINC			0x00200000
+#define BLT_CLIP			0x00400000
+#define BLT_BUSY			0x04000000
+
+/*
+ * REG_3D_PrimitiveSet -- Define Fire Primitive Mask (89F8h-89FBh)
+ */
+#define MASK_6326_DrawPrimitiveCommand	0x00000007
+#define MASK_6326_SetFirePosition	0x00000F00
+#define MASK_6326_ShadingMode		0x001c0000
+#define MASK_6326_Direction		0x0003f000
+
+/* OP_3D_{POINT,LINE,TRIANGLE}_DRAW same as 300-series */
+/* OP_3D_DIRECTION*_ same as 300-series */
+
+#define OP_6326_3D_FIRE_TFIRE		0x00000000
+#define OP_6326_3D_FIRE_TSARGBa		0x00000100
+#define OP_6326_3D_FIRE_TSWa		0x00000200
+#define OP_6326_3D_FIRE_TSARGBb		0x00000300
+#define OP_6326_3D_FIRE_TSWb		0x00000400
+#define OP_6326_3D_FIRE_TSARGBc		0x00000500
+#define OP_6326_3D_FIRE_TSWc		0x00000600
+#define OP_6326_3D_FIRE_TSVc		0x00000700
+
+#define OP_6326_3D_ATOP			0x00000000
+#define OP_6326_3D_BTOP			0x00010000
+#define OP_6326_3D_CTOP			0x00020000
+#define OP_6326_3D_AMID			0x00000000
+#define OP_6326_3D_BMID			0x00004000
+#define OP_6326_3D_CMID			0x00008000
+#define OP_6326_3D_ABOT			0x00000000
+#define OP_6326_3D_BBOT			0x00001000
+#define OP_6326_3D_CBOT			0x00002000
+
+#define OP_6326_3D_SHADE_FLAT_TOP	0x00040000
+#define OP_6326_3D_SHADE_FLAT_MID	0x00080000
+#define OP_6326_3D_SHADE_FLAT_BOT	0x000c0000
+#define OP_6326_3D_SHADE_FLAT_GOURAUD	0x00100000
+
+
+/*
+ * REG_6326_3D_EngineFire
+ */
+#define MASK_CmdQueueLen		0x0FFF0000
+#define ENG_3DIDLEQE			0x00000002
+#define ENG_3DIDLE			0x00000001
+
+/*
+ * REG_6326_3D_TEnable -- Define Capility Enable Mask (8A00h-8A03h)
+ */
+#define S_ENABLE_Dither			(1 << 0)
+#define S_ENABLE_Transparency		(1 << 1)
+#define S_ENABLE_Blend			(1 << 2)
+#define S_ENABLE_Fog			(1 << 3)
+#define S_ENABLE_Specular		(1 << 4)
+#define S_ENABLE_LargeCache		(1 << 5)
+#define S_ENABLE_TextureCache		(1 << 7)
+#define S_ENABLE_TextureTransparency	(1 << 8)
+#define S_ENABLE_TexturePerspective	(1 << 9)
+#define S_ENABLE_Texture		(1 << 10)
+#define S_ENABLE_PrimSetup		(1 << 11)
+#define S_ENABLE_LinePattern		(1 << 12)
+#define S_ENABLE_StippleAlpha		(1 << 13) /* requires S_ENABLE_Stipple */
+#define S_ENABLE_Stipple		(1 << 14)
+#define S_ENABLE_AlphaBuffer		(1 << 16)
+#define S_ENABLE_AlphaTest		(1 << 17)
+#define S_ENABLE_AlphaWrite		(1 << 18)
+#define S_ENABLE_ZTest			(1 << 20)
+#define S_ENABLE_ZWrite			(1 << 21)
+
+/*
+ * REG_3D_ZSet -- Define Z Buffer Setting Mask (8A08h-8A0Bh)
+ */
+#define MASK_6326_ZBufferPitch		0x00003FFF
+#define MASK_6326_ZTestMode		0x00070000
+#define MASK_6326_ZBufferFormat		0x00100000
+
+#define S_ZSET_FORMAT_8			0x00000000
+#define S_ZSET_FORMAT_16		0x00100000
+
+#define S_ZSET_PASS_NEVER		0x00000000
+#define S_ZSET_PASS_LESS		0x00010000
+#define S_ZSET_PASS_EQUAL		0x00020000
+#define S_ZSET_PASS_LEQUAL		0x00030000
+#define S_ZSET_PASS_GREATER		0x00040000
+#define S_ZSET_PASS_NOTEQUAL		0x00050000
+#define S_ZSET_PASS_GEQUAL		0x00060000
+#define S_ZSET_PASS_ALWAYS		0x00070000
+
+/*
+ * REG_3D_AlphaSet -- Define Alpha Buffer Setting Mask (8A0Ch-8A0Fh)
+ */
+#define MASK_AlphaBufferPitch		0x000003FF
+#define MASK_AlphaRefValue		0x00FF0000
+#define MASK_AlphaTestMode		0x07000000
+#define MASK_AlphaBufferFormat		0x30000000
+
+#define S_ASET_FORMAT_8			0x30000000
+
+#define S_ASET_PASS_NEVER		0x00000000
+#define S_ASET_PASS_LESS		0x01000000
+#define S_ASET_PASS_EQUAL		0x02000000
+#define S_ASET_PASS_LEQUAL		0x03000000
+#define S_ASET_PASS_GREATER		0x04000000
+#define S_ASET_PASS_NOTEQUAL		0x05000000
+#define S_ASET_PASS_GEQUAL		0x06000000
+#define S_ASET_PASS_ALWAYS		0x07000000
+
+/*
+ * REG_3D_DstSet -- Define Destination Buffer Setting Mask (8A14h-8A17h)
+ */
+/* pitch, format, depth, rgborder, rop bits same as 300-series */
+
+/*
+ * REG_6326_3D_FogSet -- Define Fog Mask (8A20h-8A23h)
+ */
+#define MASK_6326_FogColor		0x00FFFFFF
+#define MASK_6326_FogMode		0x01000000
+
+#define FOGMODE_6326_CONST		0x00000000
+#define FOGMODE_6326_LINEAR		0x01000000
+
+/*
+ * REG_6326_3D_DstSrcBlendMode		(0x8A28 - 0x8A2B)
+ */
+#define MASK_6326_SrcBlendMode		0xf0000000
+#define MASK_6326_DstBlendMode		0x0f000000
+#define MASK_6326_TransparencyColor	0x00ffffff
+
+#define S_DBLEND_ZERO			0x00000000
+#define S_DBLEND_ONE			0x10000000
+#define S_DBLEND_SRC_COLOR		0x20000000
+#define S_DBLEND_INV_SRC_COLOR		0x30000000
+#define S_DBLEND_SRC_ALPHA		0x40000000
+#define S_DBLEND_INV_SRC_ALPHA		0x50000000
+#define S_DBLEND_DST_ALPHA		0x60000000
+#define S_DBLEND_INV_DST_ALPHA		0x70000000
+
+#define S_SBLEND_ZERO			0x00000000
+#define S_SBLEND_ONE			0x01000000
+#define S_SBLEND_SRC_ALPHA		0x04000000
+#define S_SBLEND_INV_SRC_ALPHA		0x05000000
+#define S_SBLEND_DST_ALPHA		0x06000000
+#define S_SBLEND_INV_DST_ALPHA		0x07000000
+#define S_SBLEND_DST_COLOR		0x08000000
+#define S_SBLEND_INV_DST_COLOR		0x09000000
+#define S_SBLEND_SRC_ALPHA_SAT		0x0A000000
+#define S_SBLEND_BOTH_SRC_ALPHA		0x0B000000
+#define S_SBLEND_BOTH_INV_SRC_ALPHA	0x0C000000
+
+/* 
+ * REG_6326_3D_TextureSet		(0x8A38 - 0x8A3B)
+ */
+#define MASK_6326_TextureMinFilter	0x00000007
+#define MASK_6326_TextureMagFilter	0x00000008
+#define MASK_6326_ClearTexCache		0x00000010
+#define MASK_6326_TextureInSystem	0x00000020
+#define MASK_6326_TextureLevel		0x00000F00
+#define MASK_6326_TextureSignYUVFormat	0x00008000
+#define MASK_6326_TextureMappingMode	0x00FF0000
+
+#define TEXEL_6326_BGR_ORDER		0x80000000
+
+#define TEXEL_6326_INDEX1		0x00000000
+#define TEXEL_6326_INDEX2		0x01000000
+#define TEXEL_6326_INDEX4		0x02000000
+
+#define TEXEL_6326_M4			0x10000000
+#define TEXEL_6326_AM44			0x16000000
+
+#define TEXEL_6326_YUV422		0x20000000 /* YUYV */
+#define TEXEL_6326_YVU422		0x21000000 /* YVYU */
+#define TEXEL_6326_UVY422		0x22000000 /* UYVY */
+#define TEXEL_6326_VUY422		0x23000000 /* VYUY */
+
+#define TEXEL_6326_L1			0x30000000
+#define TEXEL_6326_L2			0x31000000
+#define TEXEL_6326_L4			0x32000000
+#define TEXEL_6326_L8			0x33000000
+
+#define TEXEL_6326_AL22			0x35000000
+#define TEXEL_6326_AL44			0x38000000
+#define TEXEL_6326_AL88			0x3c000000
+
+#define TEXEL_6326_RGB_332_8		0x40000000
+#define TEXEL_6326_RGB_233_8		0x41000000
+#define TEXEL_6326_RGB_232_8		0x42000000
+#define TEXEL_6326_ARGB_1232_8		0x43000000
+
+#define TEXEL_6326_RGB_555_16		0x50000000
+#define TEXEL_6326_RGB_565_16		0x51000000
+#define TEXEL_6326_ARGB_1555_16		0x52000000
+#define TEXEL_6326_ARGB_4444_16		0x53000000
+#define TEXEL_6326_ARGB_8332_16		0x54000000
+#define TEXEL_6326_ARGB_8233_16		0x55000000
+#define TEXEL_6326_ARGB_8232_16		0x56000000
+
+#define TEXEL_6326_ARGB_8565_24		0x63000000
+#define TEXEL_6326_ARGB_8555_24		0x67000000
+#define TEXEL_6326_RGB_888_24		0x68000000
+
+#define TEXEL_6326_ARGB_8888_32		0x73000000
+#define TEXEL_6326_ARGB_0888_32		0x74000000
+
+#define TEX_MAP_WRAP_U			0x00010000
+#define TEX_MAP_WRAP_V			0x00020000
+#define TEX_MAP_MIRROR_U		0x00040000
+#define TEX_MAP_MIRROR_V		0x00080000
+#define TEX_MAP_CLAMP_U			0x00100000
+#define TEX_MAP_CLAMP_V			0x00200000
+#define TEX_MAP_USE_CTB_SMOOTH		0x00400000
+#define TEX_MAP_USE_CTB			0x00800000
+
+#define TEX_FILTER_NEAREST		0x00000000
+#define TEX_FILTER_LINEAR		0x00000001
+#define TEX_FILTER_NEAREST_MIP_NEAREST	0x00000002
+#define TEX_FILTER_NEAREST_MIP_LINEAR	0x00000003
+#define TEX_FILTER_LINEAR_MIP_NEAREST	0x00000004
+#define TEX_FILTER_LINEAR_MIP_LINEAR	0x00000005
+#define TEX_FILTER_MAG_NEAREST		0x00000000
+#define TEX_FILTER_MAG_LINEAR		0x00000008
+
+/* 
+ * REG_6326_3D_TextureBlendSet		(0x8A3C - 0x8A3F)
+ */
+#define MASK_TextureTransparencyLowB	0x000000ff
+#define MASK_TextureTransparencyLowG	0x0000FF00
+#define MASK_TextureTransparencyLowR	0x00ff0000
+#define MASK_TextureBlend		0x0f000000
+
+#define TB_C_CS				(0 << 26)
+#define TB_C_CF				(1 << 26)
+#define TB_C_CFCS			(2 << 26) /* also 3 << 26 */
+#define TB_C_CFOMAS_ASCS		(4 << 26)
+#define TB_C_CSOMAF_AFCF		(6 << 26) /* also 7 << 26 */
+
+#define TB_A_AS				(0 << 24)
+#define TB_A_AF				(1 << 24)
+#define TB_A_AFAS			(1 << 24)
+
+/* 
+ * REG_6326_3D_TextureTransparencyColorHigh	(0x8A40 - 0x8A43)
+ */
+#define MASK_TextureTransparencyHighB	0x000000FF
+#define MASK_TextureTransparencyHighG	0x0000FF00
+#define MASK_TextureTransparencyHighR	0x00FF0000
+
+/*
+ * REG_3D_TexturePitch01-89		(0x8A6C - 0x8A7F)
+ */
+#define MASK_TexturePitchOdd		0x000003FF
+#define MASK_TexturePitchEven		0x03FF0000
+#define SHIFT_TexturePitchEven		16
+
+/* 
+ * REG_3D_TextureWidthHeightMix		(0x8A80 - 0x8A83)
+ */
+#define MASK_TextureWidthLog2		0xf0000000
+#define MASK_TextureHeightLog2		0x0f000000
+
+/* 
+ * REG_3D_TextureBorderColor		(0x8A90 - 0x8A93)
+ */
+#define MASK_TextureBorderColorB	0x000000FF
+#define MASK_TextureBorderColorG	0x0000FF00
+#define MASK_TextureBorderColorR	0x00FF0000
+#define MASK_TextureBorderColorA	0xFF000000
+
+#endif /* _sis6326_reg_h_ */
diff --git a/src/mesa/drivers/dri/sis/sis6326_state.c b/src/mesa/drivers/dri/sis/sis6326_state.c
new file mode 100644
index 0000000000..52008c7ea3
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis6326_state.c
@@ -0,0 +1,735 @@
+/*
+ * Copyright 2005 Eric Anholt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ *
+ */
+
+#include "sis_context.h"
+#include "sis_state.h"
+#include "sis_tris.h"
+#include "sis_lock.h"
+#include "sis_tex.h"
+#include "sis_reg.h"
+
+#include "main/context.h"
+#include "main/colormac.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void
+sis6326DDAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLubyte refbyte;
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   CLAMPED_FLOAT_TO_UBYTE(refbyte, ref);
+   current->hwAlpha = refbyte << 16;
+
+   /* Alpha Test function */
+   switch (func)
+   {
+   case GL_NEVER:
+      current->hwAlpha |= S_ASET_PASS_NEVER;
+      break;
+   case GL_LESS:
+      current->hwAlpha |= S_ASET_PASS_LESS;
+      break;
+   case GL_EQUAL:
+      current->hwAlpha |= S_ASET_PASS_EQUAL;
+      break;
+   case GL_LEQUAL:
+      current->hwAlpha |= S_ASET_PASS_LEQUAL;
+      break;
+   case GL_GREATER:
+      current->hwAlpha |= S_ASET_PASS_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      current->hwAlpha |= S_ASET_PASS_NOTEQUAL;
+      break;
+   case GL_GEQUAL:
+      current->hwAlpha |= S_ASET_PASS_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      current->hwAlpha |= S_ASET_PASS_ALWAYS;
+      break;
+   }
+
+   prev->hwAlpha = current->hwAlpha;
+   smesa->GlobalFlag |= GFLAG_ALPHASETTING;
+}
+
+static void
+sis6326DDBlendFuncSeparate( GLcontext *ctx, 
+			    GLenum sfactorRGB, GLenum dfactorRGB,
+			    GLenum sfactorA,   GLenum dfactorA )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   current->hwDstSrcBlend = 0;
+
+   switch (dfactorRGB)
+   {
+   case GL_ZERO:
+      current->hwDstSrcBlend |= S_DBLEND_ZERO;
+      break;
+   case GL_ONE:
+      current->hwDstSrcBlend |= S_DBLEND_ONE;
+      break;
+   case GL_SRC_COLOR:
+      current->hwDstSrcBlend |= S_DBLEND_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      current->hwDstSrcBlend |= S_DBLEND_INV_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      current->hwDstSrcBlend |= S_DBLEND_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      current->hwDstSrcBlend |= S_DBLEND_INV_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      current->hwDstSrcBlend |= S_DBLEND_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      current->hwDstSrcBlend |= S_DBLEND_INV_DST_ALPHA;
+      break;
+   }
+
+   switch (sfactorRGB)
+   {
+   case GL_ZERO:
+      current->hwDstSrcBlend |= S_SBLEND_ZERO;
+      break;
+   case GL_ONE:
+      current->hwDstSrcBlend |= S_SBLEND_ONE;
+      break;
+   case GL_SRC_ALPHA:
+      current->hwDstSrcBlend |= S_SBLEND_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      current->hwDstSrcBlend |= S_SBLEND_INV_SRC_ALPHA;
+      break;
+   case GL_DST_ALPHA:
+      current->hwDstSrcBlend |= S_SBLEND_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      current->hwDstSrcBlend |= S_SBLEND_INV_DST_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      current->hwDstSrcBlend |= S_SBLEND_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      current->hwDstSrcBlend |= S_SBLEND_INV_DST_COLOR;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      current->hwDstSrcBlend |= S_SBLEND_SRC_ALPHA_SAT;
+      break;
+   }
+
+   if (current->hwDstSrcBlend != prev->hwDstSrcBlend) {
+      prev->hwDstSrcBlend = current->hwDstSrcBlend;
+      smesa->GlobalFlag |= GFLAG_DSTBLEND;
+   }
+}
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void
+sis6326DDDepthFunc( GLcontext *ctx, GLenum func )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   current->hwZ &= ~MASK_6326_ZTestMode;
+   switch (func)
+   {
+   case GL_LESS:
+      current->hwZ |= S_ZSET_PASS_LESS;
+      break;
+   case GL_GEQUAL:
+      current->hwZ |= S_ZSET_PASS_GEQUAL;
+      break;
+   case GL_LEQUAL:
+      current->hwZ |= S_ZSET_PASS_LEQUAL;
+      break;
+   case GL_GREATER:
+      current->hwZ |= S_ZSET_PASS_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      current->hwZ |= S_ZSET_PASS_NOTEQUAL;
+      break;
+   case GL_EQUAL:
+      current->hwZ |= S_ZSET_PASS_EQUAL;
+      break;
+   case GL_ALWAYS:
+      current->hwZ |= S_ZSET_PASS_ALWAYS;
+      break;
+   case GL_NEVER:
+      current->hwZ |= S_ZSET_PASS_NEVER;
+      break;
+   }
+
+   if (current->hwZ != prev->hwZ) {
+      prev->hwZ = current->hwZ;
+      smesa->GlobalFlag |= GFLAG_ZSETTING;
+   }
+}
+
+static void
+sis6326DDDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *current = &smesa->current;
+
+   if (ctx->Depth.Test)
+      current->hwCapEnable |= S_ENABLE_ZWrite;
+   else
+      current->hwCapEnable &= ~S_ENABLE_ZWrite;
+}
+
+/* =============================================================
+ * Fog
+ */
+
+static void
+sis6326DDFogfv( GLcontext *ctx, GLenum pname, const GLfloat *params )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *current = &smesa->current;
+   __GLSiSHardware *prev = &smesa->prev;
+
+   GLint fogColor;
+
+   switch(pname)
+   {
+   case GL_FOG_COLOR:
+      fogColor  = FLOAT_TO_UBYTE( ctx->Fog.Color[0] ) << 16;
+      fogColor |= FLOAT_TO_UBYTE( ctx->Fog.Color[1] ) << 8;
+      fogColor |= FLOAT_TO_UBYTE( ctx->Fog.Color[2] );
+      current->hwFog = 0x01000000 | fogColor;
+      if (current->hwFog != prev->hwFog) {
+	 prev->hwFog = current->hwFog;
+	 smesa->GlobalFlag |= GFLAG_FOGSETTING;
+      }
+      break;
+   }
+}
+
+/* =============================================================
+ * Clipping
+ */
+
+void
+sis6326UpdateClipping(GLcontext *ctx)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   GLint x1, y1, x2, y2;
+
+   x1 = 0;
+   y1 = 0;
+   x2 = smesa->width - 1;
+   y2 = smesa->height - 1;
+
+   if (ctx->Scissor.Enabled) {
+      if (ctx->Scissor.X > x1)
+	 x1 = ctx->Scissor.X;
+      if (ctx->Scissor.Y > y1)
+	 y1 = ctx->Scissor.Y;
+      if (ctx->Scissor.X + ctx->Scissor.Width - 1 < x2)
+	 x2 = ctx->Scissor.X + ctx->Scissor.Width - 1;
+      if (ctx->Scissor.Y + ctx->Scissor.Height - 1 < y2)
+	 y2 = ctx->Scissor.Y + ctx->Scissor.Height - 1;
+   }
+
+   y1 = Y_FLIP(y1);
+   y2 = Y_FLIP(y2);
+
+   /*current->clipTopBottom = (y2 << 13) | y1;
+   current->clipLeftRight = (x1 << 13) | x2;*/ /* XXX */
+   current->clipTopBottom = (0 << 13) | smesa->height;
+   current->clipLeftRight = (0 << 13) | smesa->width;
+
+   if ((current->clipTopBottom != prev->clipTopBottom) ||
+       (current->clipLeftRight != prev->clipLeftRight)) {
+      prev->clipTopBottom = current->clipTopBottom;
+      prev->clipLeftRight = current->clipLeftRight;
+      smesa->GlobalFlag |= GFLAG_CLIPPING;
+   }
+}
+
+static void
+sis6326DDScissor( GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   if (ctx->Scissor.Enabled)
+      sis6326UpdateClipping( ctx );
+}
+
+/* =============================================================
+ * Culling
+ */
+
+static void
+sis6326UpdateCull( GLcontext *ctx )
+{
+   /* XXX culling */
+}
+
+
+static void
+sis6326DDCullFace( GLcontext *ctx, GLenum mode )
+{
+   sis6326UpdateCull( ctx );
+}
+
+static void
+sis6326DDFrontFace( GLcontext *ctx, GLenum mode )
+{
+   sis6326UpdateCull( ctx );
+}
+
+/* =============================================================
+ * Masks
+ */
+
+static void sis6326DDColorMask( GLcontext *ctx,
+				GLboolean r, GLboolean g,
+				GLboolean b, GLboolean a )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+	
+   if (r && g && b && ((ctx->Visual.alphaBits == 0) || a)) {
+      FALLBACK(smesa, SIS_FALLBACK_WRITEMASK, 0);
+   } else {
+      FALLBACK(smesa, SIS_FALLBACK_WRITEMASK, 1);
+   }
+}
+
+/* =============================================================
+ * Rendering attributes
+ */
+
+static void sis6326UpdateSpecular(GLcontext *ctx)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *current = &smesa->current;
+
+   if (NEED_SECONDARY_COLOR(ctx))
+      current->hwCapEnable |= S_ENABLE_Specular;
+   else
+      current->hwCapEnable &= ~S_ENABLE_Specular;
+}
+
+static void sis6326DDLightModelfv(GLcontext *ctx, GLenum pname,
+			      const GLfloat *param)
+{
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      sis6326UpdateSpecular(ctx);
+   }
+}
+static void sis6326DDShadeModel( GLcontext *ctx, GLenum mode )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   /* Signal to sisRasterPrimitive to recalculate dwPrimitiveSet */
+   smesa->hw_primitive = -1;
+}
+
+/* =============================================================
+ * Window position
+ */
+
+/* =============================================================
+ * Viewport
+ */
+
+static void sis6326CalcViewport( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = smesa->hw_viewport;
+
+   /* See also sis_translate_vertex.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + smesa->driDrawable->h + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * smesa->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * smesa->depth_scale;
+}
+
+static void sis6326DDViewport( GLcontext *ctx,
+			   GLint x, GLint y,
+			   GLsizei width, GLsizei height )
+{
+   sis6326CalcViewport( ctx );
+}
+
+static void sis6326DDDepthRange( GLcontext *ctx,
+			     GLclampd nearval, GLclampd farval )
+{
+   sis6326CalcViewport( ctx );
+}
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void
+sis6326DDLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   if (!ctx->Color.ColorLogicOpEnabled)
+      return;
+
+   current->hwDstSet &= ~MASK_ROP2;
+   switch (opcode)
+   {
+   case GL_CLEAR:
+      current->hwDstSet |= LOP_CLEAR;
+      break;
+   case GL_SET:
+      current->hwDstSet |= LOP_SET;
+      break;
+   case GL_COPY:
+      current->hwDstSet |= LOP_COPY;
+      break;
+   case GL_COPY_INVERTED:
+      current->hwDstSet |= LOP_COPY_INVERTED;
+      break;
+   case GL_NOOP:
+      current->hwDstSet |= LOP_NOOP;
+      break;
+   case GL_INVERT:
+      current->hwDstSet |= LOP_INVERT;
+      break;
+   case GL_AND:
+      current->hwDstSet |= LOP_AND;
+      break;
+   case GL_NAND:
+      current->hwDstSet |= LOP_NAND;
+      break;
+   case GL_OR:
+      current->hwDstSet |= LOP_OR;
+      break;
+   case GL_NOR:
+      current->hwDstSet |= LOP_NOR;
+      break;
+   case GL_XOR:
+      current->hwDstSet |= LOP_XOR;
+      break;
+   case GL_EQUIV:
+      current->hwDstSet |= LOP_EQUIV;
+      break;
+   case GL_AND_REVERSE:
+      current->hwDstSet |= LOP_AND_REVERSE;
+      break;
+   case GL_AND_INVERTED:
+      current->hwDstSet |= LOP_AND_INVERTED;
+      break;
+   case GL_OR_REVERSE:
+      current->hwDstSet |= LOP_OR_REVERSE;
+      break;
+   case GL_OR_INVERTED:
+      current->hwDstSet |= LOP_OR_INVERTED;
+      break;
+   }
+
+   if (current->hwDstSet != prev->hwDstSet) {
+      prev->hwDstSet = current->hwDstSet;
+      smesa->GlobalFlag |= GFLAG_DESTSETTING;
+   }
+}
+
+void sis6326DDDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   if(getenv("SIS_DRAW_FRONT"))
+      ctx->DrawBuffer->_ColorDrawBufferIndexes[0] = BUFFER_FRONT_LEFT;
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers > 1) {
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   current->hwDstSet &= ~MASK_DstBufferPitch;
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+      current->hwOffsetDest = smesa->front.offset;
+      current->hwDstSet |= smesa->front.pitch;
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   case BUFFER_BACK_LEFT:
+      current->hwOffsetDest = smesa->back.offset;
+      current->hwDstSet |= smesa->back.pitch;
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   default:
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   if (current->hwDstSet != prev->hwDstSet) {
+      prev->hwDstSet = current->hwDstSet;
+      smesa->GlobalFlag |= GFLAG_DESTSETTING;
+   }
+
+   if (current->hwOffsetDest != prev->hwOffsetDest) {
+      prev->hwOffsetDest = current->hwOffsetDest;
+      smesa->GlobalFlag |= GFLAG_DESTSETTING;
+   }
+}
+
+/* =============================================================
+ * Polygon stipple
+ */
+
+/* =============================================================
+ * Render mode
+ */
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void
+sis6326DDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *current = &smesa->current;
+
+   switch (cap)
+   {
+   case GL_ALPHA_TEST:
+      if (state)
+         current->hwCapEnable |= S_ENABLE_AlphaTest;
+      else
+         current->hwCapEnable &= ~S_ENABLE_AlphaTest;
+      break;
+   case GL_BLEND:
+      /* TODO: */
+      if (state)
+      /* if (state & !ctx->Color.ColorLogicOpEnabled) */
+         current->hwCapEnable |= S_ENABLE_Blend;
+      else
+         current->hwCapEnable &= ~S_ENABLE_Blend;
+      break;
+   case GL_CULL_FACE:
+      /* XXX culling */
+      break;
+   case GL_DEPTH_TEST:
+      if (state && smesa->depth.offset != 0)
+         current->hwCapEnable |= S_ENABLE_ZTest;
+      else
+         current->hwCapEnable &= ~S_ENABLE_ZTest;
+      sis6326DDDepthMask( ctx, ctx->Depth.Mask );
+      break;
+   case GL_DITHER:
+      if (state)
+         current->hwCapEnable |= S_ENABLE_Dither;
+      else
+         current->hwCapEnable &= ~S_ENABLE_Dither;
+      break;
+   case GL_FOG:
+      if (state)
+         current->hwCapEnable |= S_ENABLE_Fog;
+      else
+         current->hwCapEnable &= ~S_ENABLE_Fog;
+      break;
+   case GL_COLOR_LOGIC_OP:
+      if (state)
+         sis6326DDLogicOpCode( ctx, ctx->Color.LogicOp );
+      else
+         sis6326DDLogicOpCode( ctx, GL_COPY );
+      break;
+   case GL_SCISSOR_TEST:
+      sis6326UpdateClipping( ctx );
+      break;
+   case GL_STENCIL_TEST:
+      if (state) {
+         FALLBACK(smesa, SIS_FALLBACK_STENCIL, 1);
+      } else {
+         FALLBACK(smesa, SIS_FALLBACK_STENCIL, 0);
+      }
+      break;
+   case GL_LIGHTING:
+   case GL_COLOR_SUM_EXT:
+      sis6326UpdateSpecular(ctx);
+      break;
+    }
+}
+
+/* =============================================================
+ * State initialization, management
+ */
+
+/* Called before beginning of rendering. */
+void
+sis6326UpdateHWState( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   if (smesa->NewGLState & _NEW_TEXTURE)
+      sisUpdateTextureState( ctx );
+
+   if (current->hwCapEnable ^ prev->hwCapEnable) {
+      prev->hwCapEnable = current->hwCapEnable;
+      smesa->GlobalFlag |= GFLAG_ENABLESETTING;
+   }
+
+   if (smesa->GlobalFlag & GFLAG_RENDER_STATES)
+      sis_update_render_state( smesa );
+
+   if (smesa->GlobalFlag & GFLAG_TEXTURE_STATES)
+      sis_update_texture_state( smesa );
+}
+
+static void
+sis6326DDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+	sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+	_swrast_InvalidateState( ctx, new_state );
+	_swsetup_InvalidateState( ctx, new_state );
+	_vbo_InvalidateState( ctx, new_state );
+	_tnl_InvalidateState( ctx, new_state );
+	smesa->NewGLState |= new_state;
+}
+
+/* Initialize the context's hardware state.
+ */
+void sis6326DDInitState( sisContextPtr smesa )
+{
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+   GLcontext *ctx = smesa->glCtx;
+
+   /* add Texture Perspective Enable */
+   current->hwCapEnable = S_ENABLE_TextureCache |
+       S_ENABLE_TexturePerspective | S_ENABLE_Dither;
+
+   /* Z test mode is LESS */
+   current->hwZ = S_ZSET_PASS_LESS | S_ZSET_FORMAT_16;
+   if (ctx->Visual.depthBits > 0)
+      current->hwCapEnable |= S_ENABLE_ZWrite;
+
+   /* Alpha test mode is ALWAYS, alpha ref value is 0 */
+   current->hwAlpha = S_ASET_PASS_ALWAYS;
+
+   /* ROP2 is COPYPEN */
+   current->hwDstSet = LOP_COPY;
+
+   /* LinePattern is 0, Repeat Factor is 0 */
+   current->hwLinePattern = 0x00008000;
+
+   /* Src blend is BLEND_ONE, Dst blend is D3DBLEND_ZERO */
+   current->hwDstSrcBlend = S_SBLEND_ONE | S_DBLEND_ZERO;
+   
+   switch (smesa->bytesPerPixel)
+   {
+   case 2:
+      current->hwDstSet |= DST_FORMAT_RGB_565;
+      break;
+   case 4:
+      current->hwDstSet |= DST_FORMAT_ARGB_8888;
+      break;
+   }
+
+   smesa->depth_scale = 1.0 / (GLfloat)0xffff;
+
+   smesa->clearTexCache = GL_TRUE;
+
+   smesa->clearColorPattern = 0;
+
+   sis6326UpdateZPattern(smesa, 1.0);
+   sis6326UpdateCull(ctx);
+
+   /* Set initial fog settings. Start and end are the same case.  */
+   sis6326DDFogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   sis6326DDFogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   sis6326DDFogfv( ctx, GL_FOG_MODE, NULL );
+
+   memcpy(prev, current, sizeof(__GLSiSHardware));
+}
+
+/* Initialize the driver's state functions.
+ */
+void sis6326DDInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState		= sis6326DDInvalidateState;
+
+   ctx->Driver.Clear			= sis6326DDClear;
+   ctx->Driver.ClearColor		= sis6326DDClearColor;
+   ctx->Driver.ClearDepth		= sis6326DDClearDepth;
+
+   ctx->Driver.AlphaFunc		= sis6326DDAlphaFunc;
+   ctx->Driver.BlendFuncSeparate	= sis6326DDBlendFuncSeparate;
+   ctx->Driver.ColorMask		= sis6326DDColorMask;
+   ctx->Driver.CullFace			= sis6326DDCullFace;
+   ctx->Driver.DepthMask		= sis6326DDDepthMask;
+   ctx->Driver.DepthFunc		= sis6326DDDepthFunc;
+   ctx->Driver.DepthRange		= sis6326DDDepthRange;
+   ctx->Driver.DrawBuffer		= sis6326DDDrawBuffer;
+   ctx->Driver.Enable			= sis6326DDEnable;
+   ctx->Driver.FrontFace	 	= sis6326DDFrontFace;
+   ctx->Driver.Fogfv			= sis6326DDFogfv;
+   ctx->Driver.LogicOpcode	 	= sis6326DDLogicOpCode;
+   ctx->Driver.Scissor			= sis6326DDScissor;
+   ctx->Driver.ShadeModel		= sis6326DDShadeModel;
+   ctx->Driver.LightModelfv		= sis6326DDLightModelfv;
+   ctx->Driver.Viewport			= sis6326DDViewport;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_alloc.c b/src/mesa/drivers/dri/sis/sis_alloc.c
new file mode 100644
index 0000000000..ce34e44da2
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_alloc.c
@@ -0,0 +1,198 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "sis_context.h"
+#include "sis_alloc.h"
+
+#include "sis_common.h"
+
+#include <unistd.h>
+
+#define Z_BUFFER_HW_ALIGNMENT 16
+#define Z_BUFFER_HW_PLUS (16 + 4)
+
+/* 3D engine uses 2, and bitblt uses 4 */
+#define DRAW_BUFFER_HW_ALIGNMENT 16
+#define DRAW_BUFFER_HW_PLUS (16 + 4)
+
+#define ALIGNMENT(value, align) (((value) + (align) - 1) / (align) * (align))
+
+static int _total_video_memory_used = 0;
+static int _total_video_memory_count = 0;
+
+void *
+sisAllocFB( sisContextPtr smesa, GLuint size, void **handle )
+{
+   drm_sis_mem_t fb;
+
+   _total_video_memory_used += size;
+
+   fb.context = smesa->hHWContext;
+   fb.size = size;
+   if (drmCommandWriteRead( smesa->driFd, DRM_SIS_FB_ALLOC, &fb, 
+      sizeof(drm_sis_mem_t) ) || fb.offset == 0)
+   {
+      return NULL;
+   }
+   *handle = (void *)fb.free;
+
+   if (SIS_VERBOSE & VERBOSE_SIS_MEMORY) {
+      fprintf(stderr, "sisAllocFB: size=%d, offset=%lu, pid=%d, count=%d\n", 
+              size, fb.offset, (GLint)getpid(), 
+              ++_total_video_memory_count);
+   }
+
+   return (void *)(smesa->FbBase + fb.offset);
+}
+
+void
+sisFreeFB( sisContextPtr smesa, void *handle )
+{
+   drm_sis_mem_t fb;
+
+   if (SIS_VERBOSE & VERBOSE_SIS_MEMORY) {
+      fprintf(stderr, "sisFreeFB: free=%p, pid=%d, count=%d\n", 
+              handle, (GLint)getpid(), --_total_video_memory_count);
+   }
+
+   fb.context = smesa->hHWContext;
+   fb.free = handle;
+   drmCommandWrite( smesa->driFd, DRM_SIS_FB_FREE, &fb, sizeof(drm_sis_mem_t) );
+}
+
+void *
+sisAllocAGP( sisContextPtr smesa, GLuint size, void **handle )
+{
+   drm_sis_mem_t agp;
+   
+   if (smesa->AGPSize == 0)
+      return NULL;
+
+   agp.context = smesa->hHWContext;
+   agp.size = size;
+   if (drmCommandWriteRead( smesa->driFd, DRM_SIS_AGP_ALLOC, &agp,
+      sizeof(drm_sis_mem_t) ) || agp.offset == 0)
+   {
+      return NULL;
+   }
+   *handle = (void *)agp.free;
+
+   if (SIS_VERBOSE & VERBOSE_SIS_MEMORY) {
+      fprintf(stderr, "sisAllocAGP: size=%u, offset=%lu, pid=%d, count=%d\n", 
+              size, agp.offset, (GLint)getpid(), 
+              ++_total_video_memory_count);
+   }
+
+   return (void *)(smesa->AGPBase + agp.offset);
+}
+
+void
+sisFreeAGP( sisContextPtr smesa, void *handle )
+{
+   drm_sis_mem_t agp;
+
+   if (SIS_VERBOSE & VERBOSE_SIS_MEMORY) {
+      fprintf(stderr, "sisFreeAGP: free=%p, pid=%d, count=%d\n", 
+              handle, (GLint)getpid(), --_total_video_memory_count);
+   }
+  
+   agp.context = smesa->hHWContext;
+   agp.free = handle;
+   drmCommandWrite( smesa->driFd, DRM_SIS_AGP_FREE, &agp,
+      sizeof(drm_sis_mem_t) );
+}
+
+void
+sisAllocZStencilBuffer( sisContextPtr smesa )
+{
+   int cpp = ( smesa->glCtx->Visual.depthBits +
+               smesa->glCtx->Visual.stencilBits ) / 8;
+   char *addr;
+
+   smesa->depth.bpp = cpp * 8;
+   smesa->depth.pitch = ALIGNMENT(smesa->driDrawable->w * cpp, 4);
+   smesa->depth.size = smesa->depth.pitch * smesa->driDrawable->h;
+   smesa->depth.size += Z_BUFFER_HW_PLUS;
+
+   addr = sisAllocFB(smesa, smesa->depth.size, &smesa->depth.handle);
+   if (addr == NULL)
+      sis_fatal_error("Failure to allocate Z buffer.\n");
+   addr = (char *)ALIGNMENT((unsigned long)addr, Z_BUFFER_HW_ALIGNMENT);
+
+   smesa->depth.map = addr;
+   smesa->depth.offset = addr - (char *)smesa->FbBase;
+
+   /* stencil buffer is same as depth buffer */
+   smesa->stencil.size = smesa->depth.size;
+   smesa->stencil.offset = smesa->depth.offset;
+   smesa->stencil.handle = smesa->depth.handle;
+   smesa->stencil.pitch = smesa->depth.pitch;
+   smesa->stencil.bpp = smesa->depth.bpp;
+   smesa->stencil.map = smesa->depth.map;
+}
+
+void
+sisFreeZStencilBuffer( sisContextPtr smesa )
+{
+   sisFreeFB(smesa, smesa->depth.handle);
+   smesa->depth.map = NULL; 
+   smesa->depth.offset = 0; 
+}
+
+void
+sisAllocBackbuffer( sisContextPtr smesa )
+{
+   int cpp = smesa->bytesPerPixel;
+   char *addr;
+
+   smesa->back.bpp = smesa->bytesPerPixel * 8;
+   smesa->back.pitch = ALIGNMENT(smesa->driDrawable->w * cpp, 4);
+   smesa->back.size = smesa->back.pitch * smesa->driDrawable->h;
+   smesa->back.size += DRAW_BUFFER_HW_PLUS;
+
+   addr = sisAllocFB(smesa, smesa->back.size, &smesa->back.handle);
+   if (addr == NULL)
+      sis_fatal_error("Failure to allocate back buffer.\n");
+   addr = (char *)ALIGNMENT((unsigned long)addr, DRAW_BUFFER_HW_ALIGNMENT);
+
+   smesa->back.map = addr;
+   smesa->back.offset = addr - (char *)smesa->FbBase;
+}
+
+void
+sisFreeBackbuffer( sisContextPtr smesa )
+{
+   sisFreeFB(smesa, smesa->back.handle);
+   smesa->back.map = NULL; 
+   smesa->back.offset = 0; 
+}
diff --git a/src/mesa/drivers/dri/sis/sis_alloc.h b/src/mesa/drivers/dri/sis/sis_alloc.h
new file mode 100644
index 0000000000..eb784afad9
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_alloc.h
@@ -0,0 +1,43 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+enum {
+   VIDEO_TYPE,
+   AGP_TYPE
+};
+
+void sisAllocZStencilBuffer( sisContextPtr smesa );
+void sisFreeZStencilBuffer( sisContextPtr smesa );
+void sisAllocBackbuffer( sisContextPtr smesa );
+void sisFreeBackbuffer ( sisContextPtr smesa );
+void *sisAllocFB( sisContextPtr smesa, GLuint size, void **handle );
+void sisFreeFB( sisContextPtr smesa, void *handle );
+void *sisAllocAGP( sisContextPtr smesa, GLuint size, void **handle );
+void sisFreeAGP( sisContextPtr smesa, void *handle );
diff --git a/src/mesa/drivers/dri/sis/sis_clear.c b/src/mesa/drivers/dri/sis/sis_clear.c
new file mode 100644
index 0000000000..d358ef62dc
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_clear.c
@@ -0,0 +1,408 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "sis_context.h"
+#include "sis_state.h"
+#include "sis_lock.h"
+
+#include "swrast/swrast.h"
+#include "main/macros.h"
+
+static GLbitfield sis_3D_Clear( GLcontext * ctx, GLbitfield mask,
+				GLint x, GLint y, GLint width,
+				GLint height );
+static void sis_clear_color_buffer( GLcontext *ctx, GLenum mask, GLint x,
+				    GLint y, GLint width, GLint height );
+static void sis_clear_z_stencil_buffer( GLcontext * ctx,
+					GLbitfield mask, GLint x,
+					GLint y, GLint width,
+					GLint height );
+
+static void
+set_color_pattern( sisContextPtr smesa, GLubyte red, GLubyte green,
+		   GLubyte blue, GLubyte alpha )
+{
+   /* XXX only RGB565 and ARGB8888 */
+   switch (smesa->colorFormat)
+   {
+   case DST_FORMAT_ARGB_8888:
+      smesa->clearColorPattern = (alpha << 24) +
+	 (red << 16) + (green << 8) + (blue);
+      break;
+   case DST_FORMAT_RGB_565:
+      smesa->clearColorPattern = ((red >> 3) << 11) +
+	 ((green >> 2) << 5) + (blue >> 3);
+      smesa->clearColorPattern |= smesa->clearColorPattern << 16;
+      break;
+   default:
+      sis_fatal_error("Bad dst color format\n");
+   }
+}
+
+void
+sisUpdateZStencilPattern( sisContextPtr smesa, GLclampd z, GLint stencil )
+{
+   GLuint zPattern;
+
+   switch (smesa->zFormat)
+   {
+   case SiS_ZFORMAT_Z16:
+      CLAMPED_FLOAT_TO_USHORT(zPattern, z);
+      zPattern |= zPattern << 16;
+      break;
+   case SiS_ZFORMAT_S8Z24:
+      zPattern = FLOAT_TO_UINT(z) >> 8;
+      zPattern |= stencil << 24;
+      break;
+   case SiS_ZFORMAT_Z32:
+      zPattern = FLOAT_TO_UINT(z);
+      break;
+   default:
+      sis_fatal_error("Bad Z format\n");
+   }
+   smesa->clearZStencilPattern = zPattern;
+}
+
+void
+sisDDClear( GLcontext * ctx, GLbitfield mask )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   GLint x1, y1, width1, height1;
+
+   /* get region after locking: */
+   x1 = ctx->DrawBuffer->_Xmin;
+   y1 = ctx->DrawBuffer->_Ymin;
+   width1 = ctx->DrawBuffer->_Xmax - x1;
+   height1 = ctx->DrawBuffer->_Ymax - y1;
+   y1 = Y_FLIP(y1 + height1 - 1);
+
+   /* Mask out any non-existent buffers */
+   if (ctx->Visual.depthBits == 0 || !ctx->Depth.Mask)
+      mask &= ~BUFFER_BIT_DEPTH;
+   if (ctx->Visual.stencilBits == 0)
+      mask &= ~BUFFER_BIT_STENCIL;
+
+   LOCK_HARDWARE();
+
+   /* The 3d clear code is use for masked clears because apparently the SiS
+    * 300-series can't do write masks for 2d blits.  3d isn't used in general
+    * because it's slower, even in the case of clearing multiple buffers.
+    */
+   /* XXX: Appears to be broken with stencil. */
+   if ((smesa->current.hwCapEnable2 & (MASK_AlphaMaskWriteEnable |
+      MASK_ColorMaskWriteEnable) &&
+      (mask & (BUFFER_BIT_BACK_LEFT | BUFFER_BIT_FRONT_LEFT)) != 0) ||
+      ((ctx->Stencil.WriteMask[0] & 0xff) != 0xff && 
+       (mask & BUFFER_BIT_STENCIL) != 0) )
+   {
+      mask = sis_3D_Clear( ctx, mask, x1, y1, width1, height1 );
+   }
+
+   if ( mask & BUFFER_BIT_FRONT_LEFT || mask & BUFFER_BIT_BACK_LEFT) {
+      sis_clear_color_buffer( ctx, mask, x1, y1, width1, height1 );
+      mask &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT);
+   }
+
+   if (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)) {
+      if (smesa->depth.offset != 0)
+         sis_clear_z_stencil_buffer( ctx, mask, x1, y1, width1, height1 );
+      mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL);
+   }
+
+   UNLOCK_HARDWARE();
+
+   if (mask != 0)
+      _swrast_Clear( ctx, mask);
+}
+
+
+void
+sisDDClearColor( GLcontext * ctx, const GLfloat color[4] )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLubyte c[4];
+
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+
+   set_color_pattern( smesa, c[0], c[1], c[2], c[3] );
+}
+
+void
+sisDDClearDepth( GLcontext * ctx, GLclampd d )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   sisUpdateZStencilPattern( smesa, d, ctx->Stencil.Clear );
+}
+
+void
+sisDDClearStencil( GLcontext * ctx, GLint s )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   sisUpdateZStencilPattern( smesa, ctx->Depth.Clear, s );
+}
+
+static GLbitfield
+sis_3D_Clear( GLcontext * ctx, GLbitfield mask,
+	      GLint x, GLint y, GLint width, GLint height )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *current = &smesa->current;
+
+   float left, top, right, bottom, zClearVal;
+   GLboolean bClrColor, bClrDepth, bClrStencil;
+   GLint dwPrimitiveSet;
+   GLint dwEnable1 = 0, dwEnable2 = MASK_ColorMaskWriteEnable;
+   GLint dwDepthMask = 0, dwSten1 = 0, dwSten2 = 0;
+   GLint dirtyflags = GFLAG_ENABLESETTING | GFLAG_ENABLESETTING2 |
+      GFLAG_CLIPPING | GFLAG_DESTSETTING;
+   int count;
+   drm_clip_rect_t *pExtents;
+
+   bClrColor = (mask & (BUFFER_BIT_BACK_LEFT | BUFFER_BIT_FRONT_LEFT)) != 0;
+   bClrDepth = (mask & BUFFER_BIT_DEPTH) != 0;
+   bClrStencil = (mask & BUFFER_BIT_STENCIL) != 0;
+
+   if (smesa->GlobalFlag & GFLAG_RENDER_STATES)
+      sis_update_render_state( smesa );
+
+   if (bClrStencil) {
+      dwSten1 = STENCIL_FORMAT_8 | SiS_STENCIL_ALWAYS |
+         ((ctx->Stencil.Clear & 0xff) << 8) | 0xff;
+      dwSten2 = SiS_SFAIL_REPLACE | SiS_SPASS_ZFAIL_REPLACE |
+         SiS_SPASS_ZPASS_REPLACE;
+      dwEnable1 = MASK_ZWriteEnable | MASK_StencilWriteEnable |
+	MASK_StencilTestEnable;
+      dwEnable2 |= MASK_ZMaskWriteEnable;
+      dwDepthMask |= (ctx->Stencil.WriteMask[0] & 0xff) << 24;
+   } else if (bClrDepth) {
+      dwEnable1 = MASK_ZWriteEnable;
+      dwEnable2 |= MASK_ZMaskWriteEnable;
+   }
+
+   if (bClrDepth) {
+      zClearVal = ctx->Depth.Clear;
+      if (ctx->Visual.depthBits != 32)
+         dwDepthMask |= 0x00ffffff;
+      else
+         dwDepthMask = 0xffffffff;
+   } else
+      zClearVal = 0.0;
+
+   mWait3DCmdQueue(9);
+   MMIO(REG_3D_TEnable, dwEnable1);
+   MMIO(REG_3D_TEnable2, dwEnable2);
+   if (bClrDepth || bClrStencil) {
+      MMIO(REG_3D_ZSet, (current->hwZ & ~MASK_ZTestMode) | SiS_Z_COMP_ALWAYS);
+      dirtyflags |= GFLAG_ZSETTING;
+   }
+   if (bClrColor) {
+      MMIO(REG_3D_DstSet, (current->hwDstSet & ~MASK_ROP2) | LOP_COPY);
+   } else {
+      MMIO(REG_3D_DstAlphaWriteMask, 0L);
+   }
+   if (bClrStencil) {
+      MMIO(REG_3D_StencilSet, dwSten1);
+      MMIO(REG_3D_StencilSet2, dwSten2);
+      dirtyflags |= GFLAG_STENCILSETTING;
+   }
+
+   if (mask & BUFFER_BIT_FRONT_LEFT) {
+      pExtents = smesa->driDrawable->pClipRects;
+      count = smesa->driDrawable->numClipRects;
+   } else {
+      pExtents = NULL;
+      count = 1;
+   }
+
+   while(count--) {
+      left = x;
+      right = x + width;
+      top = y;
+      bottom = y + height;
+
+      if (pExtents != NULL) {
+         GLuint x1, y1, x2, y2;
+
+         x1 = pExtents->x1 - smesa->driDrawable->x;
+         y1 = pExtents->y1 - smesa->driDrawable->y;
+         x2 = pExtents->x2 - smesa->driDrawable->x - 1;
+         y2 = pExtents->y2 - smesa->driDrawable->y - 1;
+
+         left = (left > x1) ? left : x1;
+         right = (right > x2) ? x2 : right;
+         top = (top > y1) ? top : y1;
+         bottom = (bottom > y2) ? y2 : bottom;
+         pExtents++;
+         if (left > right || top > bottom)
+            continue;
+      }
+
+      mWait3DCmdQueue(20);
+
+      MMIO(REG_3D_ClipTopBottom, ((GLint)top << 13) | (GLint)bottom);
+      MMIO(REG_3D_ClipLeftRight, ((GLint)left << 13) | (GLint)right);
+
+      /* the first triangle */
+      dwPrimitiveSet = OP_3D_TRIANGLE_DRAW | OP_3D_FIRE_TSARGBc | 
+                        SHADE_FLAT_VertexC;
+      MMIO(REG_3D_PrimitiveSet, dwPrimitiveSet);
+
+      MMIO(REG_3D_TSZa, *(GLint *) &zClearVal);
+      MMIO(REG_3D_TSXa, *(GLint *) &right);
+      MMIO(REG_3D_TSYa, *(GLint *) &top);
+      MMIO(REG_3D_TSARGBa, smesa->clearColorPattern);
+
+      MMIO(REG_3D_TSZb, *(GLint *) &zClearVal);
+      MMIO(REG_3D_TSXb, *(GLint *) &left);
+      MMIO(REG_3D_TSYb, *(GLint *) &top);
+      MMIO(REG_3D_TSARGBb, smesa->clearColorPattern);
+
+      MMIO(REG_3D_TSZc, *(GLint *) &zClearVal);
+      MMIO(REG_3D_TSXc, *(GLint *) &left);
+      MMIO(REG_3D_TSYc, *(GLint *) &bottom);
+      MMIO(REG_3D_TSARGBc, smesa->clearColorPattern);
+
+      /* second triangle */
+      dwPrimitiveSet = OP_3D_TRIANGLE_DRAW | OP_3D_FIRE_TSARGBb |
+                        SHADE_FLAT_VertexB;
+      MMIO(REG_3D_PrimitiveSet, dwPrimitiveSet);
+
+      MMIO(REG_3D_TSZb, *(GLint *) &zClearVal);
+      MMIO(REG_3D_TSXb, *(GLint *) &right);
+      MMIO(REG_3D_TSYb, *(GLint *) &bottom);
+      MMIO(REG_3D_TSARGBb, smesa->clearColorPattern);
+   }
+
+   mEndPrimitive();
+
+   /* If BUFFER_BIT_FRONT_LEFT is set, we've only cleared the front buffer so far */
+   if ((mask & BUFFER_BIT_FRONT_LEFT) != 0 && (mask & BUFFER_BIT_BACK_LEFT) != 0)
+      sis_3D_Clear( ctx, BUFFER_BIT_BACK_LEFT, x, y, width, height );
+
+   smesa->GlobalFlag |= dirtyflags;
+
+   return mask & ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL | BUFFER_BIT_BACK_LEFT |
+      BUFFER_BIT_FRONT_LEFT);
+}
+
+static void
+sis_clear_color_buffer( GLcontext *ctx, GLenum mask, GLint x, GLint y,
+			GLint width, GLint height )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   int count;
+   drm_clip_rect_t *pExtents = NULL;
+   GLint xx, yy;
+   GLint x0, y0, width0, height0;
+
+   /* Clear back buffer */
+   if (mask & BUFFER_BIT_BACK_LEFT) {
+      mWait3DCmdQueue (8);
+      MMIO(REG_SRC_PITCH, (smesa->bytesPerPixel == 4) ? 
+			   BLIT_DEPTH_32 : BLIT_DEPTH_16);
+      MMIO(REG_DST_X_Y, (x << 16) | y);
+      MMIO(REG_DST_ADDR, smesa->back.offset);
+      MMIO(REG_DST_PITCH_HEIGHT, (smesa->virtualY << 16) | smesa->back.pitch);
+      MMIO(REG_WIDTH_HEIGHT, (height << 16) | width);
+      MMIO(REG_PATFG, smesa->clearColorPattern);
+      MMIO(REG_BLIT_CMD, CMD_DIR_X_INC | CMD_DIR_Y_INC | CMD_ROP_PAT);
+      MMIO(REG_CommandQueue, -1);
+   }
+  
+   if ((mask & BUFFER_BIT_FRONT_LEFT) == 0)
+      return;
+
+   /* Clear front buffer */
+   x0 = x;
+   y0 = y;
+   width0 = width;
+   height0 = height;
+
+   pExtents = smesa->driDrawable->pClipRects;
+   count = smesa->driDrawable->numClipRects;
+
+   while (count--) {
+      GLint x2 = pExtents->x1 - smesa->driDrawable->x;
+      GLint y2 = pExtents->y1 - smesa->driDrawable->y;
+      GLint xx2 = pExtents->x2 - smesa->driDrawable->x;
+      GLint yy2 = pExtents->y2 - smesa->driDrawable->y;
+
+      x = (x0 > x2) ? x0 : x2;
+      y = (y0 > y2) ? y0 : y2;
+      xx = ((x0 + width0) > (xx2)) ? xx2 : x0 + width0;
+      yy = ((y0 + height0) > (yy2)) ? yy2 : y0 + height0;
+      width = xx - x;
+      height = yy - y;
+      pExtents++;
+
+      if (width <= 0 || height <= 0)
+	continue;
+
+      mWait3DCmdQueue (8);
+      MMIO(REG_SRC_PITCH, (smesa->bytesPerPixel == 4) ? 
+			   BLIT_DEPTH_32 : BLIT_DEPTH_16);
+      MMIO(REG_DST_X_Y, (x << 16) | y);
+      MMIO(REG_DST_ADDR, smesa->front.offset);
+      MMIO(REG_DST_PITCH_HEIGHT, (smesa->virtualY << 16) | smesa->front.pitch);
+      MMIO(REG_WIDTH_HEIGHT, (height << 16) | width);
+      MMIO(REG_PATFG, smesa->clearColorPattern);
+      MMIO(REG_BLIT_CMD, CMD_DIR_X_INC | CMD_DIR_Y_INC | CMD_ROP_PAT);
+      MMIO(REG_CommandQueue, -1);
+   }
+}
+
+static void
+sis_clear_z_stencil_buffer( GLcontext * ctx, GLbitfield mask,
+			    GLint x, GLint y, GLint width, GLint height )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   mWait3DCmdQueue (8);
+   MMIO(REG_SRC_PITCH, (smesa->zFormat == SiS_ZFORMAT_Z16) ?
+			BLIT_DEPTH_16 : BLIT_DEPTH_32);
+   MMIO(REG_DST_X_Y, (x << 16) | y);
+   MMIO(REG_DST_ADDR, smesa->depth.offset);
+   MMIO(REG_DST_PITCH_HEIGHT, (smesa->virtualY << 16) | smesa->depth.pitch);
+   MMIO(REG_WIDTH_HEIGHT, (height << 16) | width);
+   MMIO(REG_PATFG, smesa->clearZStencilPattern);
+   MMIO(REG_BLIT_CMD, CMD_DIR_X_INC | CMD_DIR_Y_INC | CMD_ROP_PAT);
+   MMIO(REG_CommandQueue, -1);
+}
+
diff --git a/src/mesa/drivers/dri/sis/sis_context.c b/src/mesa/drivers/dri/sis/sis_context.c
new file mode 100644
index 0000000000..85f26a08b7
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_context.c
@@ -0,0 +1,719 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "sis_dri.h"
+
+#include "sis_context.h"
+#include "sis_state.h"
+#include "sis_dd.h"
+#include "sis_span.h"
+#include "sis_stencil.h"
+#include "sis_tex.h"
+#include "sis_tris.h"
+#include "sis_alloc.h"
+
+#include "main/imports.h"
+#include "utils.h"
+#include "main/framebuffer.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+#include "main/remap_helper.h"
+
+#ifndef SIS_DEBUG
+int SIS_DEBUG = 0;
+#endif
+
+int GlobalCurrentHwcx = -1;
+int GlobalHwcxCountBase = 1;
+int GlobalCmdQueueLen = 0;
+
+static struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_texture_border_clamp",       NULL },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    /*{ "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },*/
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_MESA_ycbcr_texture",             NULL },
+    { "GL_NV_blend_square",                NULL },
+    { NULL,                                NULL }
+};
+
+#if 0
+static struct dri_extension card_extensions_6326[] =
+{
+    /*{ "GL_ARB_texture_border_clamp",       NULL },*/
+    /*{ "GL_ARB_texture_mirrored_repeat",    NULL },*/
+    /*{ "GL_MESA_ycbcr_texture",             NULL },*/
+    { NULL,                                NULL }
+};
+#endif
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { NULL,    0 }
+};
+
+void
+WaitEngIdle (sisContextPtr smesa)
+{
+   GLuint engineState;
+
+   if (smesa->is6326) {
+      do {
+	 engineState = MMIO_READ(REG_3D_EngineFire); /* XXX right reg? */
+      } while ((engineState & ENG_3DIDLEQE) != 0);
+   } else {
+      do {
+	 engineState = MMIO_READ(REG_CommandQueue);
+      } while ((engineState & SiS_EngIdle) != SiS_EngIdle);
+   }
+}
+
+void
+Wait2DEngIdle (sisContextPtr smesa)
+{
+   GLuint engineState;
+
+   if (smesa->is6326) {
+      do {
+	 engineState = MMIO_READ(REG_6326_BitBlt_Cmd);
+      } while ((engineState & BLT_BUSY) != 0);
+   } else {
+      do {
+	 engineState = MMIO_READ(REG_CommandQueue);
+      } while ((engineState & SiS_EngIdle2d) != SiS_EngIdle2d);
+   }
+}
+
+/* To be called from mWait3DCmdQueue.  Separate function for profiling
+ * purposes, and speed doesn't matter because we're spinning anyway.
+ */
+void
+WaitingFor3dIdle(sisContextPtr smesa, int wLen)
+{
+   if (smesa->is6326) {
+      while (*(smesa->CurrentQueueLenPtr) < wLen) {
+	 *(smesa->CurrentQueueLenPtr) =
+	    ((GLuint)MMIO_READ(REG_3D_EngineFire) >> 16) * 2;
+      }
+   } else {
+      while (*(smesa->CurrentQueueLenPtr) < wLen) {
+	 *(smesa->CurrentQueueLenPtr) =
+            (MMIO_READ(REG_CommandQueue) & MASK_QueueLen) - 20;
+      }
+   }
+}
+
+void sisReAllocateBuffers(GLcontext *ctx, GLframebuffer *drawbuffer,
+                          GLuint width, GLuint height)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   sisUpdateBufferSize(smesa);
+
+   _mesa_resize_framebuffer(ctx, drawbuffer, width, height);
+}
+
+GLboolean
+sisCreateContext( gl_api api,
+		  const __GLcontextModes *glVisual,
+		  __DRIcontext *driContextPriv,
+                  void *sharedContextPrivate )
+{
+   GLcontext *ctx, *shareCtx;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   sisContextPtr smesa;
+   sisScreenPtr sisScreen;
+   int i;
+   struct dd_function_table functions;
+
+   smesa = (sisContextPtr)CALLOC( sizeof(*smesa) );
+   if (smesa == NULL)
+      return GL_FALSE;
+
+   /* Init default driver functions then plug in our SIS-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions(&functions);
+   sisInitDriverFuncs(&functions);
+   sisInitTextureFuncs(&functions);
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((sisContextPtr)sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+   smesa->glCtx = _mesa_create_context( glVisual, shareCtx,
+                                        &functions, (void *) smesa);
+   if (!smesa->glCtx) {
+      FREE(smesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = smesa;
+   ctx = smesa->glCtx;
+
+   sisScreen = smesa->sisScreen = (sisScreenPtr)(sPriv->private);
+
+   smesa->is6326 = GL_FALSE; /* XXX */
+   smesa->driContext = driContextPriv;
+   smesa->driScreen = sPriv;
+   smesa->driDrawable = NULL;
+   smesa->hHWContext = driContextPriv->hHWContext;
+   smesa->driHwLock = &sPriv->pSAREA->lock;
+   smesa->driFd = sPriv->fd;
+  
+   smesa->virtualX = sisScreen->screenX;
+   smesa->virtualY = sisScreen->screenY;
+   smesa->bytesPerPixel = sisScreen->cpp;
+   smesa->IOBase = sisScreen->mmio.map;
+   smesa->Chipset = sisScreen->deviceID;
+
+   smesa->FbBase = sPriv->pFB;
+   smesa->displayWidth = sPriv->fbWidth;
+   smesa->front.pitch = sPriv->fbStride;
+
+   smesa->sarea = (SISSAREAPriv *)((char *)sPriv->pSAREA +
+				   sisScreen->sarea_priv_offset);
+
+   /* support ARGB8888 and RGB565 */
+   switch (smesa->bytesPerPixel)
+   {
+   case 4:
+      smesa->redMask = 0x00ff0000;
+      smesa->greenMask = 0x0000ff00;
+      smesa->blueMask = 0x000000ff;
+      smesa->alphaMask = 0xff000000;
+      smesa->colorFormat = DST_FORMAT_ARGB_8888;
+      break;
+   case 2:
+      smesa->redMask = 0xf800;
+      smesa->greenMask = 0x07e0;
+      smesa->blueMask = 0x001f;
+      smesa->alphaMask = 0;
+      smesa->colorFormat = DST_FORMAT_RGB_565;
+      break;
+   default:
+      sis_fatal_error("Bad bytesPerPixel %d.\n", smesa->bytesPerPixel);
+   }
+
+   if (smesa->is6326) {
+      ctx->Const.MaxTextureUnits = 1;
+      ctx->Const.MaxTextureLevels = 9;
+   } else {
+      ctx->Const.MaxTextureUnits = 2;
+      ctx->Const.MaxTextureLevels = 11;
+   }
+   ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+
+   /* Parse configuration files */
+   driParseConfigFiles (&smesa->optionCache, &sisScreen->optionCache,
+			sisScreen->driScreen->myNum, "sis");
+
+#if DO_DEBUG
+   SIS_DEBUG = driParseDebugString(getenv("SIS_DEBUG"), debug_control);
+#endif
+
+   /* TODO: index mode */
+
+   smesa->CurrentQueueLenPtr = &(smesa->sarea->QueueLength);
+   smesa->FrameCountPtr = &(smesa->sarea->FrameCount);
+
+   /* set AGP */
+   smesa->AGPSize = sisScreen->agp.size;
+   smesa->AGPBase = sisScreen->agp.map;
+   smesa->AGPAddr = sisScreen->agpBaseOffset;
+
+   /* Create AGP command buffer */
+   if (smesa->AGPSize != 0 && 
+      !driQueryOptionb(&smesa->optionCache, "agp_disable"))
+   {
+      smesa->vb = sisAllocAGP(smesa, 64 * 1024, &smesa->vb_agp_handle);
+      if (smesa->vb != NULL) {
+	 smesa->using_agp = GL_TRUE;
+	 smesa->vb_cur = smesa->vb;
+	 smesa->vb_last = smesa->vb;
+	 smesa->vb_end = smesa->vb + 64 * 1024;
+	 smesa->vb_agp_offset = ((long)smesa->vb - (long)smesa->AGPBase +
+	    (long)smesa->AGPAddr);
+      }
+   }
+   if (!smesa->using_agp) {
+      smesa->vb = malloc(64 * 1024);
+      if (smesa->vb == NULL) {
+	 FREE(smesa);
+	 return GL_FALSE;
+      }
+      smesa->vb_cur = smesa->vb;
+      smesa->vb_last = smesa->vb;
+      smesa->vb_end = smesa->vb + 64 * 1024;
+   }
+
+   smesa->GlobalFlag = 0L;
+
+   smesa->Fallback = 0;
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   _swrast_allow_pixel_fog( ctx, GL_TRUE );
+   _swrast_allow_vertex_fog( ctx, GL_FALSE );
+   _tnl_allow_pixel_fog( ctx, GL_TRUE );
+   _tnl_allow_vertex_fog( ctx, GL_FALSE );
+
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   if (smesa->is6326) {
+      sis6326DDInitStateFuncs( ctx );
+      sis6326DDInitState( smesa ); /* Initializes smesa->zFormat, important */
+   } else {
+      sisDDInitStateFuncs( ctx );
+      sisDDInitState( smesa );	/* Initializes smesa->zFormat, important */
+      sisDDInitStencilFuncs( ctx );
+   }
+   sisInitTriFuncs( ctx );
+   sisDDInitSpanFuncs( ctx );
+
+   driInitExtensions( ctx, card_extensions, GL_FALSE );
+
+   for (i = 0; i < SIS_MAX_TEXTURES; i++) {
+      smesa->TexStates[i] = 0;
+      smesa->PrevTexFormat[i] = 0;
+   }
+
+   if (driQueryOptionb(&smesa->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(smesa, SIS_FALLBACK_DISABLE, 1);
+   }
+   smesa->texture_depth = driQueryOptioni(&smesa->optionCache, "texture_depth");
+
+   return GL_TRUE;
+}
+
+void
+sisDestroyContext ( __DRIcontext *driContextPriv )
+{
+   sisContextPtr smesa = (sisContextPtr)driContextPriv->driverPrivate;
+
+   assert( smesa != NULL );
+
+   if ( smesa != NULL ) {
+      _swsetup_DestroyContext( smesa->glCtx );
+      _tnl_DestroyContext( smesa->glCtx );
+      _vbo_DestroyContext( smesa->glCtx );
+      _swrast_DestroyContext( smesa->glCtx );
+
+      if (smesa->using_agp)
+	 sisFreeAGP(smesa, smesa->vb_agp_handle);
+
+      /* free the Mesa context */
+      /* XXX: Is the next line needed?  The DriverCtx (smesa) reference is
+       * needed for sisDDDeleteTexture, since it needs to call the FB/AGP free
+       * function.
+       */
+      /* smesa->glCtx->DriverCtx = NULL; */
+      _mesa_destroy_context(smesa->glCtx);
+   }
+
+   FREE( smesa );
+}
+
+GLboolean
+sisMakeCurrent( __DRIcontext *driContextPriv,
+                __DRIdrawable *driDrawPriv,
+                __DRIdrawable *driReadPriv )
+{
+   if ( driContextPriv ) {
+      GET_CURRENT_CONTEXT(ctx);
+      sisContextPtr oldSisCtx = ctx ? SIS_CONTEXT(ctx) : NULL;
+      sisContextPtr newSisCtx = (sisContextPtr) driContextPriv->driverPrivate;
+      struct gl_framebuffer *drawBuffer, *readBuffer;
+
+      if ( newSisCtx != oldSisCtx) {
+         newSisCtx->GlobalFlag = GFLAG_ALL;
+      }
+
+      newSisCtx->driDrawable = driDrawPriv;
+
+      drawBuffer = (GLframebuffer *)driDrawPriv->driverPrivate;
+      readBuffer = (GLframebuffer *)driReadPriv->driverPrivate;
+
+      _mesa_make_current( newSisCtx->glCtx, drawBuffer, readBuffer );
+
+      sisUpdateBufferSize( newSisCtx );
+      sisUpdateClipping( newSisCtx->glCtx );
+   } else {
+      _mesa_make_current( NULL, NULL, NULL );
+   }
+
+   return GL_TRUE;
+}
+
+GLboolean
+sisUnbindContext( __DRIcontext *driContextPriv )
+{
+   return GL_TRUE;
+}
+
+void
+sis_update_render_state( sisContextPtr smesa )
+{
+   __GLSiSHardware *prev = &smesa->prev;
+
+   mWait3DCmdQueue (45);
+
+   if (smesa->GlobalFlag & GFLAG_ENABLESETTING) {
+      if (!smesa->clearTexCache) {
+	 MMIO(REG_3D_TEnable, prev->hwCapEnable);
+      } else {
+	 MMIO(REG_3D_TEnable, prev->hwCapEnable | MASK_TextureCacheClear);
+	 MMIO(REG_3D_TEnable, prev->hwCapEnable);
+	 smesa->clearTexCache = GL_FALSE;
+      }
+   }
+
+   if (smesa->GlobalFlag & GFLAG_ENABLESETTING2)
+      MMIO(REG_3D_TEnable2, prev->hwCapEnable2);
+
+   /* Z Setting */
+   if (smesa->GlobalFlag & GFLAG_ZSETTING)
+   {
+      MMIO(REG_3D_ZSet, prev->hwZ);
+      MMIO(REG_3D_ZStWriteMask, prev->hwZMask);
+      MMIO(REG_3D_ZAddress, prev->hwOffsetZ);
+   }
+
+   /* Alpha Setting */
+   if (smesa->GlobalFlag & GFLAG_ALPHASETTING)
+      MMIO(REG_3D_AlphaSet, prev->hwAlpha);
+
+   if (smesa->GlobalFlag & GFLAG_DESTSETTING) {
+      MMIO(REG_3D_DstSet, prev->hwDstSet);
+      MMIO(REG_3D_DstAlphaWriteMask, prev->hwDstMask);
+      MMIO(REG_3D_DstAddress, prev->hwOffsetDest);
+   }
+
+   /* Line Setting */
+#if 0
+   if (smesa->GlobalFlag & GFLAG_LINESETTING) 
+      MMIO(REG_3D_LinePattern, prev->hwLinePattern);
+#endif
+
+   /* Fog Setting */
+   if (smesa->GlobalFlag & GFLAG_FOGSETTING)
+   {
+      MMIO(REG_3D_FogSet, prev->hwFog);
+      MMIO(REG_3D_FogInverseDistance, prev->hwFogInverse);
+      MMIO(REG_3D_FogFarDistance, prev->hwFogFar);
+      MMIO(REG_3D_FogFactorDensity, prev->hwFogDensity);
+   }
+
+   /* Stencil Setting */
+   if (smesa->GlobalFlag & GFLAG_STENCILSETTING) {
+      MMIO(REG_3D_StencilSet, prev->hwStSetting);
+      MMIO(REG_3D_StencilSet2, prev->hwStSetting2);
+   }
+
+   /* Miscellaneous Setting */
+   if (smesa->GlobalFlag & GFLAG_DSTBLEND)
+      MMIO(REG_3D_DstBlendMode, prev->hwDstSrcBlend);
+   if (smesa->GlobalFlag & GFLAG_CLIPPING) {
+      MMIO(REG_3D_ClipTopBottom, prev->clipTopBottom);
+      MMIO(REG_3D_ClipLeftRight, prev->clipLeftRight);
+   }
+
+  smesa->GlobalFlag &= ~GFLAG_RENDER_STATES;
+}
+
+void
+sis_update_texture_state (sisContextPtr smesa)
+{
+   __GLSiSHardware *prev = &smesa->prev;
+
+   mWait3DCmdQueue (55);
+   if (smesa->clearTexCache || (smesa->GlobalFlag & GFLAG_TEXTUREADDRESS)) {
+      MMIO(REG_3D_TEnable, prev->hwCapEnable | MASK_TextureCacheClear);
+      MMIO(REG_3D_TEnable, prev->hwCapEnable);
+      smesa->clearTexCache = GL_FALSE;
+   }
+
+   /* Texture Setting */
+   if (smesa->GlobalFlag & CFLAG_TEXTURERESET)
+      MMIO(REG_3D_TextureSet, prev->texture[0].hwTextureSet);
+
+   if (smesa->GlobalFlag & GFLAG_TEXTUREMIPMAP)
+      MMIO(REG_3D_TextureMip, prev->texture[0].hwTextureMip);
+
+  /*
+  MMIO(REG_3D_TextureTransparencyColorHigh, prev->texture[0].hwTextureClrHigh);
+  MMIO(REG_3D_TextureTransparencyColorLow, prev->texture[0].hwTextureClrLow);
+  */
+
+   if (smesa->GlobalFlag & GFLAG_TEXBORDERCOLOR)
+      MMIO(REG_3D_TextureBorderColor, prev->texture[0].hwTextureBorderColor);
+
+   if (smesa->GlobalFlag & GFLAG_TEXTUREADDRESS) {
+      switch ((prev->texture[0].hwTextureSet & MASK_TextureLevel) >> 8)
+      {
+      case 11:
+         MMIO(REG_3D_TextureAddress11, prev->texture[0].texOffset11);
+      case 10:
+         MMIO(REG_3D_TextureAddress10, prev->texture[0].texOffset10);
+         MMIO(REG_3D_TexturePitch10, prev->texture[0].texPitch10);
+      case 9:
+         MMIO(REG_3D_TextureAddress9, prev->texture[0].texOffset9);
+      case 8:
+         MMIO(REG_3D_TextureAddress8, prev->texture[0].texOffset8);
+         MMIO(REG_3D_TexturePitch8, prev->texture[0].texPitch89);
+      case 7:
+         MMIO(REG_3D_TextureAddress7, prev->texture[0].texOffset7);
+      case 6:
+         MMIO(REG_3D_TextureAddress6, prev->texture[0].texOffset6);
+         MMIO(REG_3D_TexturePitch6, prev->texture[0].texPitch67);
+      case 5:
+         MMIO(REG_3D_TextureAddress5, prev->texture[0].texOffset5);
+      case 4:
+         MMIO(REG_3D_TextureAddress4, prev->texture[0].texOffset4);
+         MMIO(REG_3D_TexturePitch4, prev->texture[0].texPitch45);
+      case 3:
+         MMIO(REG_3D_TextureAddress3, prev->texture[0].texOffset3);
+      case 2:
+         MMIO(REG_3D_TextureAddress2, prev->texture[0].texOffset2);
+         MMIO(REG_3D_TexturePitch2, prev->texture[0].texPitch23);
+      case 1:
+         MMIO(REG_3D_TextureAddress1, prev->texture[0].texOffset1);
+      case 0:
+	  MMIO(REG_3D_TextureAddress0, prev->texture[0].texOffset0);
+	  MMIO(REG_3D_TexturePitch0, prev->texture[0].texPitch01);
+      }
+   }
+   if (smesa->GlobalFlag & CFLAG_TEXTURERESET_1)
+      MMIO(REG_3D_Texture1Set, prev->texture[1].hwTextureSet);
+   if (smesa->GlobalFlag & GFLAG_TEXTUREMIPMAP_1)
+      MMIO(REG_3D_Texture1Mip, prev->texture[1].hwTextureMip);
+
+   if (smesa->GlobalFlag & GFLAG_TEXBORDERCOLOR_1) {
+      MMIO(REG_3D_Texture1BorderColor,
+	    prev->texture[1].hwTextureBorderColor);
+   }
+   if (smesa->GlobalFlag & GFLAG_TEXTUREADDRESS_1) {
+      switch ((prev->texture[1].hwTextureSet & MASK_TextureLevel) >> 8)
+      {
+      case 11:
+         MMIO(REG_3D_Texture1Address11, prev->texture[1].texOffset11);
+      case 10:
+         MMIO(REG_3D_Texture1Address10, prev->texture[1].texOffset10);
+         MMIO(REG_3D_Texture1Pitch10, prev->texture[1].texPitch10);
+      case 9:
+         MMIO(REG_3D_Texture1Address9, prev->texture[1].texOffset9);
+      case 8:
+         MMIO(REG_3D_Texture1Address8, prev->texture[1].texOffset8);
+         MMIO(REG_3D_Texture1Pitch8, prev->texture[1].texPitch89);
+      case 7:
+         MMIO(REG_3D_Texture1Address7, prev->texture[1].texOffset7);
+      case 6:
+         MMIO(REG_3D_Texture1Address6, prev->texture[1].texOffset6);
+         MMIO(REG_3D_Texture1Pitch6, prev->texture[1].texPitch67);
+      case 5:
+         MMIO(REG_3D_Texture1Address5, prev->texture[1].texOffset5);
+      case 4:
+         MMIO(REG_3D_Texture1Address4, prev->texture[1].texOffset4);
+         MMIO(REG_3D_Texture1Pitch4, prev->texture[1].texPitch45);
+      case 3:
+         MMIO(REG_3D_Texture1Address3, prev->texture[1].texOffset3);
+      case 2:
+         MMIO(REG_3D_Texture1Address2, prev->texture[1].texOffset2);
+         MMIO(REG_3D_Texture1Pitch2, prev->texture[1].texPitch23);
+      case 1:
+         MMIO(REG_3D_Texture1Address1, prev->texture[1].texOffset1);
+      case 0:
+         MMIO(REG_3D_Texture1Address0, prev->texture[1].texOffset0);
+         MMIO(REG_3D_Texture1Pitch0, prev->texture[1].texPitch01);
+      }
+   }
+
+   /* texture environment */
+   if (smesa->GlobalFlag & GFLAG_TEXTUREENV) {
+      MMIO(REG_3D_TextureBlendFactor, prev->hwTexEnvColor);
+      MMIO(REG_3D_TextureColorBlendSet0, prev->hwTexBlendColor0);
+      MMIO(REG_3D_TextureAlphaBlendSet0, prev->hwTexBlendAlpha0);
+   }
+   if (smesa->GlobalFlag & GFLAG_TEXTUREENV_1) {
+      MMIO(REG_3D_TextureBlendFactor, prev->hwTexEnvColor);
+      MMIO(REG_3D_TextureColorBlendSet1, prev->hwTexBlendColor1);
+      MMIO(REG_3D_TextureAlphaBlendSet1, prev->hwTexBlendAlpha1);
+   }
+
+   smesa->GlobalFlag &= ~GFLAG_TEXTURE_STATES;
+}
+
+void
+sis6326_update_render_state( sisContextPtr smesa )
+{
+   __GLSiSHardware *prev = &smesa->prev;
+
+   mWait3DCmdQueue (45);
+
+   if (smesa->GlobalFlag & GFLAG_ENABLESETTING) {
+      if (!smesa->clearTexCache) {
+	 MMIO(REG_6326_3D_TEnable, prev->hwCapEnable);
+      } else {
+	 MMIO(REG_6326_3D_TEnable, prev->hwCapEnable & ~S_ENABLE_TextureCache);
+	 MMIO(REG_6326_3D_TEnable, prev->hwCapEnable);
+	 smesa->clearTexCache = GL_FALSE;
+      }
+   }
+
+   /* Z Setting */
+   if (smesa->GlobalFlag & GFLAG_ZSETTING) {
+      MMIO(REG_6326_3D_ZSet, prev->hwZ);
+      MMIO(REG_6326_3D_ZAddress, prev->hwOffsetZ);
+   }
+
+   /* Alpha Setting */
+   if (smesa->GlobalFlag & GFLAG_ALPHASETTING)
+      MMIO(REG_6326_3D_AlphaSet, prev->hwAlpha);
+
+   if (smesa->GlobalFlag & GFLAG_DESTSETTING) {
+      MMIO(REG_6326_3D_DstSet, prev->hwDstSet);
+      MMIO(REG_6326_3D_DstAddress, prev->hwOffsetDest);
+   }
+
+   /* Fog Setting */
+   if (smesa->GlobalFlag & GFLAG_FOGSETTING) {
+      MMIO(REG_6326_3D_FogSet, prev->hwFog);
+   }
+
+   /* Miscellaneous Setting */
+   if (smesa->GlobalFlag & GFLAG_DSTBLEND)
+      MMIO(REG_6326_3D_DstSrcBlendMode, prev->hwDstSrcBlend);
+
+   if (smesa->GlobalFlag & GFLAG_CLIPPING) {
+      MMIO(REG_6326_3D_ClipTopBottom, prev->clipTopBottom);
+      MMIO(REG_6326_3D_ClipLeftRight, prev->clipLeftRight);
+   }
+
+  smesa->GlobalFlag &= ~GFLAG_RENDER_STATES;
+}
+
+void
+sis6326_update_texture_state (sisContextPtr smesa)
+{
+   __GLSiSHardware *prev = &smesa->prev;
+
+   mWait3DCmdQueue (55);
+   if (smesa->clearTexCache || (smesa->GlobalFlag & GFLAG_TEXTUREADDRESS)) {
+      MMIO(REG_6326_3D_TEnable, prev->hwCapEnable & ~S_ENABLE_TextureCache);
+      MMIO(REG_6326_3D_TEnable, prev->hwCapEnable);
+      smesa->clearTexCache = GL_FALSE;
+   }
+
+   /* Texture Setting */
+   if (smesa->GlobalFlag & CFLAG_TEXTURERESET)
+      MMIO(REG_6326_3D_TextureSet, prev->texture[0].hwTextureSet);
+
+   if (smesa->GlobalFlag & GFLAG_TEXTUREMIPMAP)
+      MMIO(REG_6326_3D_TextureWidthHeight, prev->texture[0].hwTexWidthHeight);
+
+  /*
+  MMIO(REG_3D_TextureTransparencyColorHigh, prev->texture[0].hwTextureClrHigh);
+  MMIO(REG_3D_TextureTransparencyColorLow, prev->texture[0].hwTextureClrLow);
+  */
+
+   if (smesa->GlobalFlag & GFLAG_TEXBORDERCOLOR)
+      MMIO(REG_6326_3D_TextureBorderColor, prev->texture[0].hwTextureBorderColor);
+
+   if (smesa->GlobalFlag & GFLAG_TEXTUREADDRESS) {
+      switch ((prev->texture[0].hwTextureSet & MASK_6326_TextureLevel) >> 8)
+      {
+      case 9:
+         MMIO(REG_6326_3D_TextureAddress9, prev->texture[0].texOffset9);
+         /* FALLTHROUGH */
+      case 8:
+         MMIO(REG_6326_3D_TextureAddress8, prev->texture[0].texOffset8);
+         MMIO(REG_6326_3D_TexturePitch89, prev->texture[0].texPitch89);
+         /* FALLTHROUGH */
+      case 7:
+         MMIO(REG_6326_3D_TextureAddress7, prev->texture[0].texOffset7);
+         /* FALLTHROUGH */
+      case 6:
+         MMIO(REG_6326_3D_TextureAddress6, prev->texture[0].texOffset6);
+         MMIO(REG_6326_3D_TexturePitch67, prev->texture[0].texPitch67);
+         /* FALLTHROUGH */
+      case 5:
+         MMIO(REG_6326_3D_TextureAddress5, prev->texture[0].texOffset5);
+         /* FALLTHROUGH */
+      case 4:
+         MMIO(REG_6326_3D_TextureAddress4, prev->texture[0].texOffset4);
+         MMIO(REG_6326_3D_TexturePitch45, prev->texture[0].texPitch45);
+         /* FALLTHROUGH */
+      case 3:
+         MMIO(REG_6326_3D_TextureAddress3, prev->texture[0].texOffset3);
+         /* FALLTHROUGH */
+      case 2:
+         MMIO(REG_6326_3D_TextureAddress2, prev->texture[0].texOffset2);
+         MMIO(REG_6326_3D_TexturePitch23, prev->texture[0].texPitch23);
+         /* FALLTHROUGH */
+      case 1:
+         MMIO(REG_6326_3D_TextureAddress1, prev->texture[0].texOffset1);
+         /* FALLTHROUGH */
+      case 0:
+	 MMIO(REG_6326_3D_TextureAddress0, prev->texture[0].texOffset0);
+	 MMIO(REG_6326_3D_TexturePitch01, prev->texture[0].texPitch01);
+	 break;
+      }
+   }
+
+   /* texture environment */
+   if (smesa->GlobalFlag & GFLAG_TEXTUREENV) {
+      MMIO(REG_6326_3D_TextureBlendSet, prev->hwTexBlendSet);
+   }
+
+   smesa->GlobalFlag &= ~GFLAG_TEXTURE_STATES;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_context.h b/src/mesa/drivers/dri/sis/sis_context.h
new file mode 100644
index 0000000000..132cee33ee
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_context.h
@@ -0,0 +1,479 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef _sis_ctx_h_
+#define _sis_ctx_h_
+
+#include "main/context.h"
+#include "dri_util.h"
+#include "drm.h"
+#include "drm_sarea.h"
+#include "xmlconfig.h"
+#include "tnl/t_vertex.h"
+
+#include "sis_screen.h"
+#include "sis_reg.h"
+#include "sis6326_reg.h"
+#include "sis_dri.h"
+
+/* for GLboolean */
+#include <GL/gl.h>
+
+#define PCI_CHIP_SIS300		0x0300
+#define PCI_CHIP_SIS630		0x6300
+#define PCI_CHIP_SIS540		0x5300
+
+#define NEW_TEXTURING		0x1
+#define NEW_TEXTURE_ENV		0x2
+
+/* Flags for software fallback cases:
+ */
+#define SIS_FALLBACK_TEXTURE		0x0001
+#define SIS_FALLBACK_TEXTURE0		0x0002
+#define SIS_FALLBACK_TEXTURE1		0x0004
+#define SIS_FALLBACK_TEXENV0		0x0008
+#define SIS_FALLBACK_TEXENV1		0x0010
+#define SIS_FALLBACK_DRAW_BUFFER	0x0020
+#define SIS_FALLBACK_STENCIL		0x0040
+#define SIS_FALLBACK_WRITEMASK		0x0080
+#define SIS_FALLBACK_DISABLE		0x0100
+
+/* Flags for hardware state that needs to be updated */
+#define GFLAG_ENABLESETTING		0x00000001
+#define GFLAG_ENABLESETTING2		0x00000002
+#define GFLAG_ZSETTING			0x00000004
+#define GFLAG_ALPHASETTING		0x00000008
+#define GFLAG_DESTSETTING		0x00000010
+#define GFLAG_LINESETTING		0x00000020
+#define GFLAG_STENCILSETTING		0x00000040
+#define GFLAG_FOGSETTING		0x00000080
+#define GFLAG_DSTBLEND			0x00000100
+#define GFLAG_CLIPPING			0x00000200
+#define CFLAG_TEXTURERESET		0x00000400
+#define GFLAG_TEXTUREMIPMAP		0x00000800
+#define GFLAG_TEXBORDERCOLOR		0x00001000
+#define GFLAG_TEXTUREADDRESS		0x00002000
+#define GFLAG_TEXTUREENV		0x00004000
+#define CFLAG_TEXTURERESET_1		0x00008000
+#define GFLAG_TEXTUREMIPMAP_1		0x00010000
+#define GFLAG_TEXBORDERCOLOR_1		0x00020000
+#define GFLAG_TEXTUREADDRESS_1		0x00040000
+#define GFLAG_TEXTUREENV_1		0x00080000
+#define GFLAG_ALL			0x000fffff
+
+#define GFLAG_TEXTURE_STATES (CFLAG_TEXTURERESET | GFLAG_TEXTUREMIPMAP | \
+			      GFLAG_TEXBORDERCOLOR | GFLAG_TEXTUREADDRESS | \
+			      CFLAG_TEXTURERESET_1 | GFLAG_TEXTUREMIPMAP_1 | \
+			      GFLAG_TEXBORDERCOLOR_1 | \
+			      GFLAG_TEXTUREADDRESS_1 | \
+			      GFLAG_TEXTUREENV | GFLAG_TEXTUREENV_1)
+
+
+#define GFLAG_RENDER_STATES  (GFLAG_ENABLESETTING | GFLAG_ENABLESETTING2 | \
+			      GFLAG_ZSETTING | GFLAG_ALPHASETTING | \
+			      GFLAG_DESTSETTING | GFLAG_FOGSETTING | \
+			      GFLAG_STENCILSETTING | GFLAG_DSTBLEND | \
+			      GFLAG_CLIPPING)
+
+/* Use the templated vertex format:
+ */
+#define TAG(x) sis##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+/* Subpixel offsets for window coordinates (triangles):
+ */
+#define SUBPIXEL_X  (-0.5F)
+#define SUBPIXEL_Y  (-0.5F)
+
+#define SIS_MAX_TEXTURE_SIZE 2048
+#define SIS_MAX_TEXTURES 2
+#define SIS_MAX_TEXTURE_LEVELS		11
+#define SIS_MAX_FRAME_LENGTH 3
+
+typedef struct {
+   GLubyte *Data;		/* Pointer to texture in offscreen */
+   GLuint memType;		/* VIDEO_TYPE or AGP_TYPE */
+   void *handle;		/* Handle for sisFree*() */
+   GLuint pitch;
+   GLuint size;
+} sisTexImage;
+
+typedef struct sis_tex_obj {
+   sisTexImage image[SIS_MAX_TEXTURE_LEVELS];	/* Image data for each mipmap
+						 * level */
+   GLenum format;		/* One of GL_ALPHA, GL_INTENSITY, GL_LUMINANCE,
+				 * GL_LUMINANCE_ALPHA, GL_RGB, GL_RGBA
+				 * MESA_YCBCR */
+   GLint hwformat;              /* One of the TEXEL_ defines */
+   GLint numImages;             /* Number of images loaded into .image */
+} sisTexObj, *sisTexObjPtr;
+
+/*
+ ** Device dependent context state
+ */
+typedef struct __GLSiSTextureRec
+{
+  GLint hwTextureSet;
+  GLint hwTextureMip;
+  GLint hwTextureClrHigh;
+  GLint hwTextureClrLow;
+  GLint hwTexWidthHeight;	/* 6326: Texture Blending Setting */
+  GLint hwTextureBorderColor;
+
+  GLint texOffset0;
+  GLint texOffset1;
+  GLint texOffset2;
+  GLint texOffset3;
+  GLint texOffset4;
+  GLint texOffset5;
+  GLint texOffset6;
+  GLint texOffset7;
+  GLint texOffset8;
+  GLint texOffset9;
+  GLint texOffset10;
+  GLint texOffset11;
+
+  GLint texPitch01;
+  GLint texPitch23;
+  GLint texPitch45;
+  GLint texPitch67;
+  GLint texPitch89;
+  GLint texPitch10;
+} __GLSiSTexture;
+
+typedef struct __GLSiSHardwareRec
+{
+  GLint hwCapEnable, hwCapEnable2;	/*  Enable Setting */
+
+  GLint hwOffsetZ, hwZ;		/* Z Setting */
+
+  GLint hwZBias, hwZMask;	/* Z Setting */
+
+  GLint hwAlpha;		/* Alpha Setting */
+
+  GLint hwDstSet, hwDstMask;	/* Destination Setting */
+
+  GLint hwOffsetDest;		/* Destination Setting */
+
+  GLint hwLinePattern;		/* Line Setting */
+
+  GLint hwFog;			/* Fog Setting */
+
+  GLint hwFogFar, hwFogInverse;	/* Fog Distance setting */
+
+  GLint hwFogDensity;		/* Fog factor & density */
+
+  GLint hwStSetting, hwStSetting2;	/* Stencil Setting */
+
+  GLint hwStOffset;		/* Stencil Setting */
+
+  GLint hwDstSrcBlend;		/* Blending mode Setting */
+
+  GLint clipTopBottom;		/* Clip for Top & Bottom */
+
+  GLint clipLeftRight;		/* Clip for Left & Right */
+
+  struct __GLSiSTextureRec texture[2];
+
+  GLint hwTexEnvColor;		/* Texture Blending Setting */
+
+  GLint hwTexBlendSet;		/* 6326 */
+  GLint hwTexBlendColor0;
+  GLint hwTexBlendColor1;
+  GLint hwTexBlendAlpha0;
+  GLint hwTexBlendAlpha1;
+
+}
+__GLSiSHardware;
+
+typedef struct sis_context sisContextRec;
+typedef struct sis_context *sisContextPtr;
+
+typedef void (*sis_quad_func)( sisContextPtr, 
+			       sisVertex *,
+			       sisVertex *,
+			       sisVertex *,
+			       sisVertex * );
+
+typedef void (*sis_tri_func)( sisContextPtr, 
+			      sisVertex *,
+			      sisVertex *,
+			      sisVertex * );
+
+typedef void (*sis_line_func)( sisContextPtr, 
+			       sisVertex *,
+			       sisVertex * );
+
+typedef void (*sis_point_func)( sisContextPtr,
+				sisVertex * );
+
+/**
+ * Derived from gl_renderbuffer.
+ */
+struct sis_renderbuffer {
+   struct gl_renderbuffer Base;  /* must be first! */
+   drmSize size;
+   GLuint offset;
+   void *handle;
+   GLuint pitch;
+   GLuint bpp;
+   char *map;
+};
+
+/* Device dependent context state */
+
+struct sis_context
+{
+  /* This must be first in this structure */
+  GLcontext *glCtx;
+
+  /* Vertex state */
+  GLuint vertex_size;
+  struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+  GLuint vertex_attr_count;
+  char *verts;			/* points to tnl->clipspace.vertex_buf */
+
+  /* Vertex buffer (in system memory or AGP) state. */
+  unsigned char *vb;		/* Beginning of vertex buffer */
+  unsigned char *vb_cur;	/* Current write location in vertex buffer */
+  unsigned char *vb_last;	/* Last written location in vertex buffer */
+  unsigned char *vb_end;	/* End of vertex buffer */
+  void *vb_agp_handle;
+  GLuint vb_agp_offset;
+  GLboolean using_agp;
+  GLint coloroffset;		/* Offset in vertex format of current color */
+  GLint specoffset;		/* Offset in vertex format of specular color */
+
+  GLuint NewGLState;
+  GLuint Fallback;
+  GLuint RenderIndex;
+  GLfloat hw_viewport[16];
+  GLfloat depth_scale;
+
+  unsigned int virtualX, virtualY;
+  unsigned int bytesPerPixel;
+  unsigned char *IOBase;
+  unsigned char *FbBase;
+  unsigned int displayWidth;
+
+  /* HW RGBA layout */
+  unsigned int redMask, greenMask, blueMask, alphaMask;
+  unsigned int colorFormat;
+
+  /* Z format */
+  unsigned int zFormat;
+
+  /* Clear patterns, 4 bytes */
+  unsigned int clearColorPattern;
+  unsigned int clearZStencilPattern;
+
+  /* Fallback rasterization functions 
+   */
+  sis_point_func draw_point;
+  sis_line_func draw_line;
+  sis_tri_func draw_tri;
+  sis_quad_func draw_quad;
+
+  GLuint hw_primitive;
+  GLenum raster_primitive;
+  GLenum render_primitive;
+
+  /* DRM fd */
+  int driFd;
+  
+  /* AGP Memory */
+  unsigned int AGPSize;
+  unsigned char *AGPBase;
+  unsigned int AGPAddr;
+  
+  /* register 0x89F4 */
+  GLint AGPParseSet;
+
+  /* register 0x89F8 */
+  GLint dwPrimitiveSet;
+
+  __GLSiSHardware prev, current;
+
+  int Chipset;
+  GLboolean is6326;
+
+  GLint drawableID;
+
+  GLint GlobalFlag;
+  DECLARE_RENDERINPUTS(last_tcl_state_bitset);
+
+  /* Stereo */
+  GLboolean useStereo;
+  GLboolean stereoEnabled;
+  int stereo_drawIndex;
+  int stereo_drawSide;
+  GLboolean irqEnabled;
+
+  GLboolean clearTexCache;
+
+  GLuint TexStates[SIS_MAX_TEXTURES];
+  GLuint PrevTexFormat[SIS_MAX_TEXTURES];
+
+  int *CurrentQueueLenPtr;
+  unsigned int *FrameCountPtr;
+
+  /* Front/back/depth buffer info */
+  GLuint width, height;			/* size of buffers */
+  GLint bottom;				/* used for FLIP macro */
+  /* XXX These don't belong here.  They should be per-drawable state. */
+  struct sis_renderbuffer front;
+  struct sis_renderbuffer back;
+  struct sis_renderbuffer depth;
+  struct sis_renderbuffer stencil; /* mirrors depth */
+
+  /* Mirrors of some DRI state
+   */
+  __DRIcontext	*driContext;	/* DRI context */
+  __DRIscreen	*driScreen;	/* DRI screen */
+  __DRIdrawable	*driDrawable;	/* DRI drawable bound to this ctx */
+
+  unsigned int lastStamp;	        /* mirror driDrawable->lastStamp */
+
+  drm_context_t hHWContext;
+  drm_hw_lock_t *driHwLock;
+
+  sisScreenPtr sisScreen;		/* Screen private DRI data */
+  SISSAREAPrivPtr sarea;		/* Private SAREA data */
+
+   /* Configuration cache */
+   driOptionCache optionCache;
+    GLint texture_depth;
+};
+
+#define SIS_CONTEXT(ctx)		((sisContextPtr)(ctx->DriverCtx))
+
+/* Macros */
+#define GET_IOBase(x) ((x)->IOBase)
+
+#define Y_FLIP(Y)  (smesa->bottom - (Y))
+
+#define SISPACKCOLOR565( r, g, b )					\
+   ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define SISPACKCOLOR8888( r, g, b, a )					\
+   (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+#define SIS_VERBOSE 0
+
+
+#define MMIO(reg, value) \
+{\
+   *(volatile GLint *)(smesa->IOBase + (reg)) = value;			\
+}
+
+#define MMIO_READ(reg) *(volatile GLint *)(smesa->IOBase + (reg))
+#define MMIO_READf(reg) *(volatile GLfloat *)(smesa->IOBase + (reg))
+
+#if defined(__i386__) || defined(__x86_64__)
+#define MMIO_WMB()	__asm __volatile("" : : : "memory")
+#elif defined(__ia64__)
+#define MMIO_WMB()	__asm __volatile("mf" : : : "memory")
+#else
+#error platform needs WMB
+#endif
+
+#define mEndPrimitive()  \
+{       \
+   *(volatile GLubyte *)(smesa->IOBase + REG_3D_EndPrimitiveList) = 0xff; \
+   *(volatile GLuint *)(smesa->IOBase + 0x8b60) = 0xffffffff;		\
+}
+
+#define sis_fatal_error(...)						\
+do {									\
+	fprintf(stderr, "[%s:%d]:", __FILE__, __LINE__);		\
+	fprintf(stderr, __VA_ARGS__);					\
+	exit(-1);							\
+} while (0)
+
+/* Lock required */
+#define mWait3DCmdQueue(wLen)						\
+/* Update the mirrored queue pointer if it doesn't indicate enough space */ \
+if (*(smesa->CurrentQueueLenPtr) < (wLen)) {				\
+   *(smesa->CurrentQueueLenPtr) =					\
+      (*(GLint *)(GET_IOBase(smesa) + REG_CommandQueue) & MASK_QueueLen) - 20; \
+   /* Spin and wait if the queue is actually too full */		\
+   if (*(smesa->CurrentQueueLenPtr) < (wLen))				\
+      WaitingFor3dIdle(smesa, wLen);					\
+   *(smesa->CurrentQueueLenPtr) -= wLen;				\
+}
+
+enum _sis_verbose {
+	VERBOSE_SIS_BUFFER  = 0x1,
+	VERBOSE_SIS_MEMORY  = 0x2
+};
+
+extern GLboolean sisCreateContext( gl_api api,
+				   const __GLcontextModes *glVisual,
+				   __DRIcontext *driContextPriv,
+                                   void *sharedContextPrivate );
+extern void sisDestroyContext( __DRIcontext * );
+
+void sisReAllocateBuffers(GLcontext *ctx, GLframebuffer *drawbuffer,
+                          GLuint width, GLuint height);
+
+extern GLboolean sisMakeCurrent( __DRIcontext *driContextPriv,
+                                  __DRIdrawable *driDrawPriv,
+                                  __DRIdrawable *driReadPriv );
+
+extern GLboolean sisUnbindContext( __DRIcontext *driContextPriv );
+
+void WaitEngIdle (sisContextPtr smesa);
+void Wait2DEngIdle (sisContextPtr smesa);
+void WaitingFor3dIdle(sisContextPtr smesa, int wLen);
+
+/* update to hw */
+extern void sis_update_texture_state( sisContextPtr smesa );
+extern void sis_update_render_state( sisContextPtr smesa );
+extern void sis6326_update_texture_state( sisContextPtr smesa );
+extern void sis6326_update_render_state( sisContextPtr smesa );
+
+/* ================================================================
+ * Debugging:
+ */
+#define DO_DEBUG		1
+
+#if DO_DEBUG
+extern int SIS_DEBUG;
+#else
+#define SIS_DEBUG		0
+#endif
+
+#define DEBUG_FALLBACKS		0x01
+
+#endif /* _sis_ctx_h_ */
diff --git a/src/mesa/drivers/dri/sis/sis_dd.c b/src/mesa/drivers/dri/sis/sis_dd.c
new file mode 100644
index 0000000000..fe4ade8592
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_dd.c
@@ -0,0 +1,267 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Sung-Ching Lin <sclin@sis.com.tw>
+ *    Eric Anholt <anholt@FreeBSD.org>
+ *
+ */
+
+#include "sis_context.h"
+#include "sis_dd.h"
+#include "sis_lock.h"
+#include "sis_alloc.h"
+#include "sis_span.h"
+#include "sis_state.h"
+#include "sis_tris.h"
+
+#include "main/formats.h"
+#include "main/renderbuffer.h"
+
+#include "utils.h"
+
+#define DRIVER_DATE	"20060710"
+
+/* Return the width and height of the given buffer.
+ */
+static void
+sisGetBufferSize( GLframebuffer *buffer,
+			      GLuint *width, GLuint *height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   LOCK_HARDWARE();
+   *width  = smesa->driDrawable->w;
+   *height = smesa->driDrawable->h;
+   UNLOCK_HARDWARE();
+}
+
+/* Return various strings for glGetString().
+ */
+static const GLubyte *
+sisGetString( GLcontext *ctx, GLenum name )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   static char buffer[128];
+   unsigned   offset;
+   GLuint agp_mode = (smesa->AGPSize > 0);
+
+   switch ( name )
+   {
+   case GL_VENDOR:
+      return (GLubyte *)"Eric Anholt";
+
+   case GL_RENDERER:
+      offset = driGetRendererString( buffer, "SiS", DRIVER_DATE, agp_mode );
+
+      return (GLubyte *)buffer;
+
+   default:
+      return NULL;
+   }
+}
+
+/* Send all commands to the hardware.
+ */
+static void
+sisFlush( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   SIS_FIREVERTICES(smesa);
+}
+
+/* Make sure all commands have been sent to the hardware and have
+ * completed processing.
+ */
+static void
+sisFinish( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   SIS_FIREVERTICES(smesa);
+   LOCK_HARDWARE();
+   WaitEngIdle( smesa );
+   UNLOCK_HARDWARE();
+}
+
+static void
+sisDeleteRenderbuffer(struct gl_renderbuffer *rb)
+{
+   /* Don't free() since we're contained in sis_context struct. */
+}
+
+static GLboolean
+sisRenderbufferStorage(GLcontext *ctx, struct gl_renderbuffer *rb,
+                       GLenum internalFormat, GLuint width, GLuint height)
+{
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+   return GL_TRUE;
+}
+
+static void
+sisInitRenderbuffer(struct gl_renderbuffer *rb, GLenum format)
+{
+   const GLuint name = 0;
+
+   _mesa_init_renderbuffer(rb, name);
+
+   /* Make sure we're using a null-valued GetPointer routine */
+   assert(rb->GetPointer(NULL, rb, 0, 0) == NULL);
+
+   rb->InternalFormat = format;
+
+   if (format == GL_RGBA) {
+      /* Color */
+      rb->Format = MESA_FORMAT_ARGB8888;
+      rb->DataType = GL_UNSIGNED_BYTE;
+   }
+   else if (format == GL_DEPTH_COMPONENT16) {
+      /* Depth */
+      /* we always Get/Put 32-bit Z values */
+      rb->Format = MESA_FORMAT_Z16;
+      rb->DataType = GL_UNSIGNED_INT;
+   }
+   else if (format == GL_DEPTH_COMPONENT24) {
+      /* Depth */
+      /* we always Get/Put 32-bit Z values */
+      rb->Format = MESA_FORMAT_Z32;
+      rb->DataType = GL_UNSIGNED_INT;
+   }
+   else {
+      /* Stencil */
+      ASSERT(format == GL_STENCIL_INDEX8_EXT);
+      rb->Format = MESA_FORMAT_S8;
+      rb->DataType = GL_UNSIGNED_BYTE;
+   }
+
+   rb->Delete = sisDeleteRenderbuffer;
+   rb->AllocStorage = sisRenderbufferStorage;
+}
+
+void
+sisUpdateBufferSize(sisContextPtr smesa)
+{
+   __GLSiSHardware *current = &smesa->current;
+   __GLSiSHardware *prev = &smesa->prev;
+   struct gl_framebuffer *fb = smesa->glCtx->DrawBuffer;
+
+   if (!smesa->front.Base.InternalFormat) {
+      /* do one-time init for the renderbuffers */
+      sisInitRenderbuffer(&smesa->front.Base, GL_RGBA);
+      sisSetSpanFunctions(&smesa->front, &fb->Visual);
+      _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &smesa->front.Base);
+
+      if (fb->Visual.doubleBufferMode) {
+         sisInitRenderbuffer(&smesa->back.Base, GL_RGBA);
+         sisSetSpanFunctions(&smesa->back, &fb->Visual);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &smesa->back.Base);
+      }
+
+      if (smesa->glCtx->Visual.depthBits > 0) {
+         sisInitRenderbuffer(&smesa->depth.Base, 
+                             (smesa->glCtx->Visual.depthBits == 16
+                              ? GL_DEPTH_COMPONENT16 : GL_DEPTH_COMPONENT24));
+         sisSetSpanFunctions(&smesa->depth, &fb->Visual);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &smesa->depth.Base);
+      }
+
+      if (smesa->glCtx->Visual.stencilBits > 0) {
+         sisInitRenderbuffer(&smesa->stencil.Base, GL_STENCIL_INDEX8_EXT);
+         sisSetSpanFunctions(&smesa->stencil, &fb->Visual);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &smesa->stencil.Base);
+      }
+   }
+
+   /* Make sure initialization did what we think it should */
+   assert(smesa->front.Base.InternalFormat);
+   assert(smesa->front.Base.AllocStorage);
+   if (fb->Visual.doubleBufferMode) {
+      assert(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+      assert(smesa->front.Base.AllocStorage);
+   }
+   if (fb->Visual.depthBits) {
+      assert(fb->Attachment[BUFFER_DEPTH].Renderbuffer);
+      assert(smesa->depth.Base.AllocStorage);
+   }
+
+   /* XXX Should get the base offset of the frontbuffer from the X Server */
+   smesa->front.offset = smesa->driDrawable->x * smesa->bytesPerPixel +
+			 smesa->driDrawable->y * smesa->front.pitch;
+   smesa->front.map = (char *) smesa->driScreen->pFB + smesa->front.offset;
+
+   if ( smesa->width == smesa->driDrawable->w &&
+	smesa->height == smesa->driDrawable->h )
+   {
+      return;
+   }
+
+   smesa->front.bpp = smesa->bytesPerPixel * 8;
+   /* Front pitch set on context create */
+   smesa->front.size = smesa->front.pitch * smesa->driDrawable->h;
+
+   smesa->width = smesa->driDrawable->w;
+   smesa->height = smesa->driDrawable->h;
+   smesa->bottom = smesa->height - 1;
+
+   if (smesa->back.offset)
+      sisFreeBackbuffer( smesa );
+   if (smesa->depth.offset)
+      sisFreeZStencilBuffer( smesa );
+
+   if ( smesa->glCtx->Visual.depthBits > 0 )
+      sisAllocZStencilBuffer( smesa );
+   if ( smesa->glCtx->Visual.doubleBufferMode )
+      sisAllocBackbuffer( smesa );
+
+   current->hwZ &= ~MASK_ZBufferPitch;
+   current->hwZ |= smesa->depth.pitch >> 2;
+   current->hwOffsetZ = smesa->depth.offset >> 2;
+
+   if ((current->hwOffsetZ != prev->hwOffsetZ) || (current->hwZ != prev->hwZ)) {
+      prev->hwOffsetZ = current->hwOffsetZ;
+      prev->hwZ = current->hwZ;
+      smesa->GlobalFlag |= GFLAG_ZSETTING;
+   }
+  
+   sisUpdateClipping( smesa->glCtx );
+}
+
+/* Initialize the driver's misc functions.
+ */
+void
+sisInitDriverFuncs( struct dd_function_table *functions )
+{
+   functions->GetBufferSize = sisGetBufferSize;
+   functions->GetString     = sisGetString;
+   functions->Finish        = sisFinish;
+   functions->Flush         = sisFlush;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_dd.h b/src/mesa/drivers/dri/sis/sis_dd.h
new file mode 100644
index 0000000000..b141243a59
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_dd.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_DD_H__
+#define __SIS_DD_H__
+
+extern void sisUpdateBufferSize( sisContextPtr smesa );
+
+extern void sisInitDriverFuncs( struct dd_function_table *functions );
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis_fog.c b/src/mesa/drivers/dri/sis/sis_fog.c
new file mode 100644
index 0000000000..6c774e010e
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_fog.c
@@ -0,0 +1,205 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "sis_context.h"
+#include "sis_state.h"
+
+#include "main/macros.h"
+
+static GLint convertFtToFogFt( GLfloat dwInValue );
+static GLint doFPtoFixedNoRound( GLfloat dwInValue, int nFraction );
+
+void
+sisDDFogfv( GLcontext *ctx, GLenum pname, const GLfloat *params )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   float fArg;
+   GLint fogColor;
+
+   switch (pname)
+   {
+   case GL_FOG_COORDINATE_SOURCE_EXT:
+      current->hwFog &= ~MASK_FogMode;
+      switch (ctx->Fog.FogCoordinateSource)
+      {
+      case GL_FOG_COORDINATE_EXT:
+         current->hwFog &= ~MASK_FogZLookup;
+         break;
+      case GL_FRAGMENT_DEPTH_EXT:
+         current->hwFog |= MASK_FogZLookup;
+         break;
+      }
+      if (current->hwFog != prev->hwFog) {
+         prev->hwFog = current->hwFog;
+         smesa->GlobalFlag |= GFLAG_FOGSETTING;
+      }
+      break;
+   case GL_FOG_MODE:
+      current->hwFog &= ~MASK_FogMode;
+      switch (ctx->Fog.Mode)
+      {
+      case GL_LINEAR:
+         current->hwFog |= FOGMODE_LINEAR;
+         break;
+      case GL_EXP:
+         current->hwFog |= FOGMODE_EXP;
+         break;
+      case GL_EXP2:
+         current->hwFog |= FOGMODE_EXP2;
+         break;
+      }
+      if (current->hwFog != prev->hwFog) {
+         prev->hwFog = current->hwFog;
+         smesa->GlobalFlag |= GFLAG_FOGSETTING;
+      }
+      break;
+   case GL_FOG_DENSITY:
+      current->hwFogDensity = convertFtToFogFt( ctx->Fog.Density );
+      if (current->hwFogDensity != prev->hwFogDensity) {
+         prev->hwFogDensity = current->hwFogDensity;
+         smesa->GlobalFlag |= GFLAG_FOGSETTING;
+      }
+      break;
+   case GL_FOG_START:
+   case GL_FOG_END:
+      fArg = 1.0 / (ctx->Fog.End - ctx->Fog.Start);
+      current->hwFogInverse = doFPtoFixedNoRound( fArg, 10 );
+      if (pname == GL_FOG_END)
+      {
+         if (smesa->Chipset == PCI_CHIP_SIS300)
+            current->hwFogFar = doFPtoFixedNoRound( ctx->Fog.End, 10 );
+         else
+            current->hwFogFar = doFPtoFixedNoRound( ctx->Fog.End, 6 );
+      }
+      if (current->hwFogFar != prev->hwFogFar ||
+          current->hwFogInverse != prev->hwFogInverse)
+      {
+         prev->hwFogFar = current->hwFogFar;
+         prev->hwFogInverse = current->hwFogInverse;
+         smesa->GlobalFlag |= GFLAG_FOGSETTING;
+      }
+      break;
+   case GL_FOG_INDEX:
+      /* TODO */
+      break;
+   case GL_FOG_COLOR:
+      fogColor  = FLOAT_TO_UBYTE( ctx->Fog.Color[0] ) << 16;
+      fogColor |= FLOAT_TO_UBYTE( ctx->Fog.Color[1] ) << 8;
+      fogColor |= FLOAT_TO_UBYTE( ctx->Fog.Color[2] );
+      current->hwFog &= 0xff000000;
+      current->hwFog |= fogColor;
+      if (current->hwFog != prev->hwFog) {
+          prev->hwFog = current->hwFog;
+         smesa->GlobalFlag |= GFLAG_FOGSETTING;
+      }
+      break;
+   }
+}
+
+static GLint
+doFPtoFixedNoRound( GLfloat dwInValue, int nFraction )
+{
+   GLint dwMantissa;
+   int nTemp;
+   union { int i; float f; } u;
+   GLint val;
+
+   u.f = dwInValue;
+   val = u.i;
+
+   if (val == 0)
+      return 0;
+   nTemp = (int) (val & 0x7F800000) >> 23;
+   nTemp = nTemp - 127 + nFraction - 23;
+   dwMantissa = (val & 0x007FFFFF) | 0x00800000;
+
+   if (nTemp < -25)
+       return 0;
+   if (nTemp > 0)
+      dwMantissa <<= nTemp;
+   else {
+      nTemp = -nTemp;
+      dwMantissa >>= nTemp;
+   }
+   if (val & 0x80000000)
+      dwMantissa = ~dwMantissa + 1;
+   return dwMantissa;
+}
+
+/* s[8].23->s[7].10 */
+static GLint
+convertFtToFogFt( GLfloat dwInValue )
+{
+   GLint dwMantissa, dwExp;
+   GLint dwRet;
+   union { int i; float f; } u;
+   GLint val;
+
+   u.f = dwInValue;
+   val = u.i;
+
+   if (val == 0)
+      return 0;
+
+   /* ----- Standard float Format: s[8].23                          -----
+    * -----     = (-1)^S * 2^(E      - 127) * (1 + M        / 2^23) -----
+    * -----     = (-1)^S * 2^((E-63) -  64) * (1 + (M/2^13) / 2^10) -----
+    * ----- Density float Format:  s[7].10                          -----
+    * -----     New Exponential = E - 63                            -----
+    * -----     New Mantissa    = M / 2^13                          -----
+    * -----                                                         -----
+    */
+
+   dwExp = (val & 0x7F800000) >> 23;
+   dwExp -= 63;
+
+   if (dwExp < 0)
+      return 0;
+
+   if (dwExp <= 0x7F)
+      dwMantissa = (val & 0x007FFFFF) >> (23 - 10);
+   else {
+      /* ----- To Return +Max(or -Max) ----- */
+      dwExp = 0x7F;
+      dwMantissa = 0x3FF;
+   }
+
+   dwRet = (val & 0x80000000) >> (31 - 17);  /* Shift Sign Bit */
+
+   dwRet |= (dwExp << 10) | dwMantissa;
+
+   return dwRet;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_lock.c b/src/mesa/drivers/dri/sis/sis_lock.c
new file mode 100644
index 0000000000..b8ff4e31e2
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_lock.c
@@ -0,0 +1,80 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "main/context.h"
+#include "sis_context.h"
+#include "sis_lock.h"
+#include "sis_dd.h"
+#include "sis_state.h"
+#include "drirenderbuffer.h"
+
+/* Update the hardware state.  This is called if another context has
+ * grabbed the hardware lock, which includes the X server.  This
+ * function also updates the driver's window state after the X server
+ * moves, resizes or restacks a window -- the change will be reflected
+ * in the drawable position and clip rects.  Since the X server grabs
+ * the hardware lock when it changes the window state, this routine will
+ * automatically be called after such a change.
+ */
+void
+sisGetLock( sisContextPtr smesa, GLuint flags )
+{
+   __DRIdrawable *dPriv = smesa->driDrawable;
+   __DRIscreen *sPriv = smesa->driScreen;
+   SISSAREAPrivPtr sarea = smesa->sarea;
+
+   drmGetLock( smesa->driFd, smesa->hHWContext, flags );
+
+   /* The window might have moved, so we might need to get new clip
+    * rects.
+    *
+    * NOTE: This releases and regrabs the hw lock to allow the X server
+    * to respond to the DRI protocol request for new drawable info.
+    * Since the hardware state depends on having the latest drawable
+    * clip rects, all state checking must be done _after_ this call.
+    */
+   DRI_VALIDATE_DRAWABLE_INFO( sPriv, dPriv );
+
+   if ( smesa->lastStamp != dPriv->lastStamp ) {
+      sisUpdateBufferSize( smesa );
+      sisUpdateClipping( smesa->glCtx );
+      if (smesa->is6326)
+	 sis6326DDDrawBuffer( smesa->glCtx, smesa->glCtx->Color.DrawBuffer[0] );
+      else
+	 sisDDDrawBuffer( smesa->glCtx, smesa->glCtx->Color.DrawBuffer[0] );
+      driUpdateFramebufferSize(smesa->glCtx, dPriv);
+      smesa->lastStamp = dPriv->lastStamp;
+   }
+
+   if ( sarea->CtxOwner != smesa->hHWContext ) {
+      sarea->CtxOwner = smesa->hHWContext;
+      smesa->GlobalFlag = GFLAG_ALL;
+   }
+}
diff --git a/src/mesa/drivers/dri/sis/sis_lock.h b/src/mesa/drivers/dri/sis/sis_lock.h
new file mode 100644
index 0000000000..54844e9b09
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_lock.h
@@ -0,0 +1,86 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_LOCK_H
+#define __SIS_LOCK_H
+
+extern void sisGetLock( sisContextPtr smesa, GLuint flags );
+
+#ifdef DEBUG_LOCKING
+extern char *prevLockFile;
+extern int prevLockLine;
+#define DEBUG_LOCK() \
+  do { \
+    prevLockFile=(__FILE__); \
+    prevLockLine=(__LINE__); \
+  } while (0)
+#define DEBUG_RESET() \
+  do { \
+    prevLockFile=NULL; \
+    prevLockLine=0; \
+  } while (0)
+#define DEBUG_CHECK_LOCK() \
+  do { \
+      if(prevLockFile){ \
+        fprintf(stderr, "LOCK SET : %s:%d\n", __FILE__, __LINE__); \
+      } \
+  } while (0)
+#else
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+#endif
+
+/* Lock the hardware using the global current context */
+#define LOCK_HARDWARE()							\
+  do {									\
+    char __ret=0;							\
+    mEndPrimitive();							\
+    DEBUG_CHECK_LOCK();							\
+    DRM_CAS( smesa->driHwLock, smesa->hHWContext,			\
+	     (DRM_LOCK_HELD | smesa->hHWContext), __ret );		\
+    if ( __ret != 0 )							\
+        sisGetLock( smesa, 0 );             					\
+    DEBUG_LOCK();							\
+  } while (0)
+
+/* Unlock the hardware using the global current context */
+#define UNLOCK_HARDWARE()						\
+  do {									\
+    mEndPrimitive(); 							\
+    DRM_UNLOCK(smesa->driFd, smesa->driHwLock, 				\
+	       smesa->hHWContext);					\
+    DEBUG_RESET(); 							\
+  } while (0)
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis_reg.h b/src/mesa/drivers/dri/sis/sis_reg.h
new file mode 100644
index 0000000000..e40c4371bf
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_reg.h
@@ -0,0 +1,902 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef _sis_reg_h_
+#define _sis_reg_h_
+
+/*
+ * Define All the Register Address of 6327
+ */
+#define REG_SRC_ADDR			0x8200
+#define REG_SRC_PITCH			0x8204
+#	define BLIT_DEPTH_8		0x00000000
+#	define BLIT_DEPTH_15		0x40000000
+#	define BLIT_DEPTH_16		0x80000000
+#	define BLIT_DEPTH_32		0xc0000000
+#define REG_SRC_X_Y			0x8208
+#define REG_DST_X_Y			0x820c
+#define REG_DST_ADDR			0x8210
+#define REG_DST_PITCH_HEIGHT		0x8214
+#define REG_WIDTH_HEIGHT		0x8218
+#define REG_PATFG			0x821c
+#define REG_PATBG			0x8220
+#define REG_SRCFG			0x8224
+#define REG_SRCBG			0x8228
+#define REG_MONOPAT0			0x822c
+#define REG_MONOPAT1			0x8230
+#define REG_CLIPLT			0x8234
+#define REG_CLIPRB			0x8238
+#define REG_BLIT_CMD			0x823c
+#	define CMD_ROP_PAT		0x0000f000
+#	define CMD_ROP_SRC		0x0000cc00
+#	define CMD_DD_ENABLE		0x00000006
+#	define CMD_SRC_VIDEO		0x00000000
+#	define CMD_SRC_CPU		0x00000010
+#	define CMD_DIR_X_DEC		0x00000000
+#	define CMD_DIR_X_INC		0x00010000
+#	define CMD_DIR_Y_DEC		0x00000000
+#	define CMD_DIR_Y_INC		0x00020000
+
+#define REG_CommandQueue		0x8240
+ 
+#define REG_3D_TSFSa                    0x8800
+#define REG_3D_TSZa                     0x8804
+#define REG_3D_TSXa                     0x8808
+#define REG_3D_TSYa                     0x880C
+#define REG_3D_TSARGBa                  0x8810
+#define REG_3D_TSWGa                    0x8814
+#define REG_3D_TSUAa                    0x8818
+#define REG_3D_TSVAa                    0x881C
+#define REG_3D_TSUBa                    0x8820
+#define REG_3D_TSVBa                    0x8824
+#define REG_3D_TSUCa                    0x8828
+#define REG_3D_TSVCa                    0x882C
+
+#define REG_3D_TSFSb                    0x8830
+#define REG_3D_TSZb                     0x8834
+#define REG_3D_TSXb                     0x8838
+#define REG_3D_TSYb                     0x883C
+#define REG_3D_TSARGBb                  0x8840
+#define REG_3D_TSWGb                    0x8844
+#define REG_3D_TSUAb                    0x8848
+#define REG_3D_TSVAb                    0x884C
+#define REG_3D_TSUBb                    0x8850
+#define REG_3D_TSVBb                    0x8854
+#define REG_3D_TSUCb                    0x8858
+#define REG_3D_TSVCb                    0x885C
+
+#define REG_3D_TSFSc                    0x8860
+#define REG_3D_TSZc                     0x8864
+#define REG_3D_TSXc                     0x8868
+#define REG_3D_TSYc                     0x886C
+#define REG_3D_TSARGBc                  0x8870
+#define REG_3D_TSWGc                    0x8874
+#define REG_3D_TSUAc                    0x8878
+#define REG_3D_TSVAc                    0x887C
+#define REG_3D_TSUBc                    0x8880
+#define REG_3D_TSVBc                    0x8884
+#define REG_3D_TSUCc                    0x8888
+#define REG_3D_TSVCc                    0x888C
+
+/*
+ * REG_3D_AGPCmdSetting (89e4h-89f7)
+ */
+#define REG_3D_AGPCmBase                0x89E4
+#define REG_3D_AGPRmDwNum               0x89E8
+#define REG_3D_AGPTtDwNum               0x89EC
+#define REG_3D_AGPCmFire                0x89F0
+
+#define REG_3D_ParsingSet               0x89F4
+#define REG_3D_PrimitiveSet             0x89F8
+#define REG_3D_ShadeMode                0x89F8
+#define REG_3D_EngineFire               0x89FC
+#define REG_3D_EngineStatus             0x89FC
+#define REG_3D_TEnable                  0x8A00
+#define REG_3D_TEnable2                 0x8A04
+
+#define REG_3D_ZSet                     0x8A08
+#define REG_3D_ZBias                    0x8A0C
+#define REG_3D_ZStWriteMask             0x8A10
+
+#define REG_3D_ZAddress                 0x8A14
+#define REG_3D_AlphaSet                 0x8A18
+#define REG_3D_AlphaAddress             0x8A1C
+#define REG_3D_DstSet                   0x8A20
+#define REG_3D_DstAlphaWriteMask        0x8A24
+
+#define REG_3D_DstAddress               0x8A28
+
+#define REG_3D_LinePattern              0x8A2C
+
+#define REG_3D_FogSet                   0x8A30
+
+#define REG_3D_FogFarDistance           0x8A34
+#define REG_3D_FogInverseDistance       0x8A38
+#define REG_3D_FogFactorDensity         0x8A3C
+
+#define REG_3D_StencilSet               0x8A44
+#define REG_3D_StencilSet2              0x8A48
+#define REG_3D_StencilAddress           0x8A4C
+
+#define REG_3D_DstBlendMode             0x8A50
+#define REG_3D_SrcBlendMode             0x8A50
+#define REG_3D_ClipTopBottom            0x8A54
+#define REG_3D_ClipLeftRight            0x8A58
+
+#define REG_3D_Brightness               0x8A5C
+
+#define REG_3D_BumpMapSet               0x8A68
+#define REG_3D_BumpMapAddress           0x8A6C
+#define REG_3D_BumpMapPitch             0x8A70
+#define REG_3D_BumpMapMatrix0           0x8A74
+#define REG_3D_BumpMapMatrix1           0x8A78
+
+/*
+ * Define the Texture Register Address of 6326
+ */
+#define REG_3D_TextureSet                     0x8A7C
+#define REG_3D_TextureWidthHeight             0x8A7C
+#define REG_3D_TextureMip                     0x8A80
+
+#define REG_3D_TextureTransparencyColorHigh    0x8A84
+#define REG_3D_TextureTransparencyColorLow     0x8A88
+#define REG_3D_TextureBorderColor              0x8A8C
+#define REG_3D_TextureAddress0                 0x8A90
+#define REG_3D_TextureAddress1                 0x8A94
+#define REG_3D_TextureAddress2                 0x8A98
+#define REG_3D_TextureAddress3                 0x8A9C
+#define REG_3D_TextureAddress4                 0x8AA0
+#define REG_3D_TextureAddress5                 0x8AA4
+#define REG_3D_TextureAddress6                 0x8AA8
+#define REG_3D_TextureAddress7                 0x8AAC
+#define REG_3D_TextureAddress8                 0x8AB0
+#define REG_3D_TextureAddress9                 0x8AB4
+#define REG_3D_TextureAddress10                0x8AB8
+#define REG_3D_TextureAddress11                0x8ABC
+#define REG_3D_TexturePitch0                   0x8AC0
+#define REG_3D_TexturePitch1                   0x8AC0
+#define REG_3D_TexturePitch2                   0x8AC4
+#define REG_3D_TexturePitch3                   0x8AC4
+#define REG_3D_TexturePitch4                   0x8AC8
+#define REG_3D_TexturePitch5                   0x8AC8
+#define REG_3D_TexturePitch6                   0x8ACC
+#define REG_3D_TexturePitch7                   0x8ACC
+#define REG_3D_TexturePitch8                   0x8AD0
+#define REG_3D_TexturePitch9                   0x8AD0
+#define REG_3D_TexturePitch10                  0x8AD4
+
+#define REG_3D_Texture1Set                     0x8ADC
+#define REG_3D_Texture1WidthHeight             0x8ADC
+#define REG_3D_Texture1Mip                     0x8AE0
+
+#define REG_3D_Texture1TransparencyColorHigh   0x8AE4
+#define REG_3D_Texture1TransparencyColorLow    0x8AE8
+#define REG_3D_Texture1BorderColor             0x8AEC
+#define REG_3D_Texture1Address0                0x8AF0
+#define REG_3D_Texture1Address1                0x8AF4
+#define REG_3D_Texture1Address2                0x8AF8
+#define REG_3D_Texture1Address3                0x8AFC
+#define REG_3D_Texture1Address4                0x8B00
+#define REG_3D_Texture1Address5                0x8B04
+#define REG_3D_Texture1Address6                0x8B08
+#define REG_3D_Texture1Address7                0x8B0C
+#define REG_3D_Texture1Address8                0x8B10
+#define REG_3D_Texture1Address9                0x8B14
+#define REG_3D_Texture1Address10               0x8B18
+#define REG_3D_Texture1Address11               0x8B1C
+#define REG_3D_Texture1Pitch0                  0x8B20
+#define REG_3D_Texture1Pitch1                  0x8B20
+#define REG_3D_Texture1Pitch2                  0x8B24
+#define REG_3D_Texture1Pitch3                  0x8B24
+#define REG_3D_Texture1Pitch4                  0x8B28
+#define REG_3D_Texture1Pitch5                  0x8B28
+#define REG_3D_Texture1Pitch6                  0x8B2C
+#define REG_3D_Texture1Pitch7                  0x8B2C
+#define REG_3D_Texture1Pitch8                  0x8B30
+#define REG_3D_Texture1Pitch9                  0x8B30
+#define REG_3D_Texture1Pitch10                 0x8B34
+
+#define REG_3D_TextureBlendFactor              0x8B3C
+#define REG_3D_TextureColorBlendSet0           0x8B40
+#define REG_3D_TextureColorBlendSet1           0x8B44
+#define REG_3D_TextureAlphaBlendSet0           0x8B48
+#define REG_3D_TextureAlphaBlendSet1           0x8B4C
+/*
+ * Define the End of Primitive List of 6326
+ */
+#define REG_3D_EndPrimitiveList                0X8B50
+
+
+/*
+ * Define the Stipple Register Address of 6326
+ */
+#define REG_3D_Stipple0                        0X8B60
+
+#define REG_3D_TexturePalette                  0x8C00
+
+/*
+ * REG_CommandQueue -- (8240h-8243h)
+ */
+#define MASK_QueueLen				0x0000ffff
+#define SiS_EngIdle2d				0x80000000
+#define SiS_EngIdle				0xe0000000
+#define MASK_EngState				0xf0000000
+
+/*
+ * REG_3D_ParsingSet -- Define Parsing Mask (89F4h-89F7h)
+ */
+#define MASK_VertexDWSize			0xf0000000
+#define MASK_VertexDataFormat			0x0fff0000
+/* Because the original MASK_PsVertex_* names of these bits appared to be
+ * wrong, new names SiS_PS_* based off of the 4.3.0 driver and research are
+ * below.
+ */
+#define SiS_PS_HAS_XYZ				MASK_PsVertex_HAS_RHW
+#define SiS_PS_HAS_W				MASK_PsVertex_HAS_NORMALXYZ
+#define SiS_PS_HAS_DIFFUSE			MASK_PsVertex_HAS_SPECULAR
+#define SiS_PS_HAS_SPECULAR			MASK_PsVertex_HAS_DIFFUSE
+#define SiS_PS_HAS_UV0				MASK_PsVertex_HAS_UVSet2
+#define SiS_PS_HAS_UV1				MASK_PsVertex_HAS_UVSet3
+#define MASK_PsVertex_HAS_RHW			0x08000000
+#define MASK_PsVertex_HAS_NORMALXYZ		0x04000000
+#define MASK_PsVertex_HAS_DIFFUSE		0x02000000
+#define MASK_PsVertex_HAS_SPECULAR		0x01000000
+#define MASK_PsUVSet				0x00ff0000
+#define MASK_PsVertex_HAS_1SetUV		0x00800000
+#define MASK_PsVertex_HAS_2SetUV		0x00c00000
+#define MASK_PsVertex_HAS_3SetUV		0x00e00000
+#define MASK_PsVertex_HAS_UVSet1		0x00800000
+#define MASK_PsVertex_HAS_UVSet2		0x00400000
+#define MASK_PsVertex_HAS_UVSet3		0x00200000
+#define MASK_PsCullDirection_CCW		0x00008000
+#define MASK_PsShadingMode			0x00007000
+/* XXX Shading modes just a guess, but seem to work*/
+#define MASK_PsShadingFlatA			0x00001000
+#define MASK_PsShadingFlatB			0x00002000
+#define MASK_PsShadingFlatC			0x00003000
+#define MASK_PsShadingSmooth			0x00004000
+#define MASK_PsTextureFrom			0x000003f0
+#define MASK_PsTexture0FromA			0x00000000
+#define MASK_PsTexture1FromA			0x00000000
+#define MASK_PsTexture1FromB			0x00000040
+#define MASK_PsBumpTextureFromA			0x00000000
+#define MASK_PsBumpTextureFromB			0x00000010
+#define MASK_PsBumpTextureFromC			0x00000020
+#define MASK_PsDataType				0x0000000f
+#define MASK_PsPointList			0x00000000
+#define MASK_PsLineList				0x00000004
+#define MASK_PsLineStrip			0x00000005
+#define MASK_PsTriangleList			0x00000008
+#define MASK_PsTriangleStrip			0x00000009
+#define MASK_PsTriangleFan			0x0000000a
+
+/*
+ * REG_3D_PrimitiveSet -- Define Fire Primitive Mask (89F8h-89FBh)
+ */
+#define MASK_DrawPrimitiveCommand       0x00000007
+#define MASK_SetFirePosition            0x00001F00
+#define MASK_BumpTextureFrom            0x00030000
+#define MASK_Texture1From               0x000C0000
+#define MASK_Texture0From               0x00300000
+#define MASK_ShadingMode                0x07000000
+#define MASK_CullDirection              0x08000000
+
+#define OP_3D_POINT_DRAW		0x00000000
+#define OP_3D_LINE_DRAW			0x00000001
+#define OP_3D_TRIANGLE_DRAW		0x00000002
+
+#define OP_3D_DIRECTION_RIGHT		0x00000000
+#define OP_3D_DIRECTION_LEFT		0x00000100
+#define OP_3D_DIRECTION_HORIZONTAL	0x00000000
+#define OP_3D_DIRECTION_VERTICAL	0x00000100
+
+#define OP_3D_FIRE_TFIRE		0x00000000
+#define OP_3D_FIRE_TSARGBa		0x00000100
+#define OP_3D_FIRE_TSWa			0x00000200
+#define OP_3D_FIRE_TSVAa		0x00000300
+#define OP_3D_FIRE_TSVBa		0x00000400
+#define OP_3D_FIRE_TSVCa		0x00000500
+
+#define OP_3D_FIRE_TSARGBb		0x00000900
+#define OP_3D_FIRE_TSWb			0x00000a00
+#define OP_3D_FIRE_TSVAb		0x00000b00
+#define OP_3D_FIRE_TSVBb		0x00000c00
+#define OP_3D_FIRE_TSVCb		0x00000d00
+
+#define OP_3D_FIRE_TSARGBc		0x00001100
+#define OP_3D_FIRE_TSWc			0x00001200
+#define OP_3D_FIRE_TSVAc		0x00001300
+#define OP_3D_FIRE_TSVBc		0x00001400
+#define OP_3D_FIRE_TSVCc		0x00001500
+
+#define OP_3D_Texture0FromA		0x00000000
+#define OP_3D_Texture0FromB		0x00100000
+#define OP_3D_Texture0FromC		0x00200000
+#define OP_3D_Texture1FromA             0x00000000
+#define OP_3D_Texture1FromB             0x00040000
+#define OP_3D_Texture1FromC             0x00080000
+#define OP_3D_TextureBumpFromA          0x00000000
+#define OP_3D_TextureBumpFromB          0x00010000
+#define OP_3D_TextureBumpFromC          0x00020000
+
+#define OP_3D_CullDirection_CCW		0x08000000
+
+#define SHADE_FLAT_VertexA		0x01000000
+#define SHADE_FLAT_VertexB		0x02000000
+#define SHADE_FLAT_VertexC		0x03000000
+#define SHADE_GOURAUD			0x04000000
+
+/*
+ *           Define Command Queue Length Mask (89FCh-89FF)
+ */
+#define MASK_CmdQueueLen                0x0FFF0000
+
+/*
+ * REG_3D_TEnable -- Define Capility Enable Mask (8A00h-8A03h)
+ */
+#define MASK_DitherEnable               0x00000001
+#define MASK_BlendEnable                0x00000002
+#define MASK_FogTestEnable              0x00000004
+#define MASK_FogEnable                  0x00000008
+#define MASK_SpecularEnable             0x00000010
+#define MASK_FogPerspectiveEnable      0x00000020
+#define MASK_TextureCacheClear          0x00000040
+#define MASK_TextureCacheEnable         0x00000080
+#define MASK_BumpMapEnable              0x00000100
+#define MASK_TexturePerspectiveEnable   0x00000200
+#define MASK_TextureEnable              0x00000400
+#define MASK_CullEnable                 0x00000800
+#define MASK_TextureNumUsed             0x0000F000
+#define MASK_AlphaBufferEnable          0x00010000
+#define MASK_AlphaTestEnable            0x00020000
+#define MASK_AlphaWriteEnable           0x00040000
+#define MASK_ZTestEnable                0x00080000
+#define MASK_ZWriteEnable               0x00100000
+#define MASK_StencilBufferEnable        0x00200000
+#define MASK_StencilTestEnable          0x00400000
+#define MASK_StencilWriteEnable         0x00800000
+#define MASK_Texture0TransparencyEnable 0x01000000
+#define MASK_Texture1TransparencyEnable 0x02000000
+#define MASK_TextureAWrapUCorrection    0x04000000
+#define MASK_TextureAWrapVCorrection    0x08000000
+#define MASK_TextureBWrapUCorrection    0x10000000
+#define MASK_TextureBWrapVCorrection    0x20000000
+#define MASK_TextureCWrapUCorrection    0x40000000
+#define MASK_TextureCWrapVCorrection    0x80000000
+
+/*
+ * REG_3D_TEnable2 -- Define Capility Enable Mask2 (8A04h-8A07h)
+ */
+#define MASK_Texture0BlockTextureEnable 0x00000001
+#define MASK_Texture1BlockTextureEnable 0x00000002
+#define MASK_Texture0AnisotropicEnable  0x00000010
+#define MASK_Texture1AnisotropicEnable  0x00000020
+#define MASK_TextureMipmapBiasEnable    0x00000040
+#define MASK_LinePatternEnable          0x00000100
+#define MASK_StippleAlphaEnable         0x00000200
+#define MASK_StippleEnable              0x00000400
+#define MASK_AntiAliasEnable            0x00000800
+#define MASK_ZMaskWriteEnable           0x00001000
+#define MASK_StencilMaskWriteEnable     0x00002000
+#define MASK_AlphaMaskWriteEnable       0x00004000
+#define MASK_ColorMaskWriteEnable       0x00008000
+#define MASK_ZCacheClear                0x00010000
+#define MASK_ZCacheEnable               0x00020000
+#define MASK_StencilCacheClear          0x00040000
+#define MASK_StencilCacheEnable         0x00080000
+#define MASK_AlphaCacheClear            0x00100000
+#define MASK_AlphaCacheEnable           0x00200000
+#define MASK_ColorCacheClear            0x00400000
+#define MASK_ColorCacheEnable           0x00800000
+
+/*
+ * REG_3D_ZSet -- Define Z Buffer Setting Mask (8A08h-8A0Bh)
+ */
+#define MASK_ZBufferPitch               0x00000FFF
+#define MASK_ZTestMode                  0x00070000
+#define MASK_ZBufferInSystem            0x00080000
+#define MASK_ZBufferFormat              0x01F00000
+
+#define SiS_Z_COMP_NEVER		0x00000000
+#define SiS_Z_COMP_S_LT_B		0x00010000
+#define SiS_Z_COMP_S_EQ_B		0x00020000
+#define SiS_Z_COMP_S_LE_B		0x00030000
+#define SiS_Z_COMP_S_GT_B		0x00040000
+#define SiS_Z_COMP_S_NE_B		0x00050000
+#define SiS_Z_COMP_S_GE_B		0x00060000
+#define SiS_Z_COMP_ALWAYS		0x00070000
+
+#define SiS_ZFORMAT_Z16			0x00000000
+#define SiS_ZFORMAT_Z16_INT		0x00100000
+#define SiS_ZFORMAT_S1Z15		0x00400000
+#define SiS_ZFORMAT_S1Z15_INT		0x00500000
+#define SiS_ZFORMAT_Z32			0x00800000
+#define SiS_ZFORMAT_S1Z31		0x00C00000
+#define SiS_ZFORMAT_S2Z30		0x00D00000
+#define SiS_ZFORMAT_S4Z28		0x00E00000
+#define SiS_ZFORMAT_S8Z24		0x00F00000
+#define SiS_ZFORMAT_FZ30		0x01800000
+#define SiS_ZFORMAT_FS1Z30		0x01C00000
+#define SiS_ZFORMAT_FS2Z30		0x01D00000
+
+/*
+ * REG_3D_ZBias -- Define Z Buffer Setting Mask (8A0Ch-8A0Fh)
+ */
+#define MASK_ZBias                      0xFFFFFFFF
+
+/*
+ * REG_3D_ZStWriteMask -- Define Z and Stencil Buffer Mask (8A10h-8A13h)
+ */
+#define MASK_ZWriteMask                 0x00FFFFFF
+
+/*
+ * REG_3D_ZAddress -- Define Z Buffer Base Address(8A14h-8A17h)
+ */
+#define MASK_ZAddress                   0xFFFFFFFF
+
+/*
+ * REG_3D_AlphaSet -- Define Alpha Buffer Setting Mask (8A18h-8A1Bh)
+ */
+#define MASK_AlphaBufferPitch           0x000003FF
+#define MASK_AlphaRefValue              0x00FF0000
+#define MASK_AlphaTestMode              0x07000000
+#define MASK_AlphaBufferInSystem        0x08000000
+#define MASK_AlphaBufferFormat          0x30000000
+
+#define SiS_ALPHA_NEVER			0x00000000
+#define SiS_ALPHA_LESS			0x01000000
+#define SiS_ALPHA_EQUAL			0x02000000
+#define SiS_ALPHA_LEQUAL		0x03000000
+#define SiS_ALPHA_GREATER		0x04000000
+#define SiS_ALPHA_NOTEQUAL		0x05000000
+#define SiS_ALPHA_GEQUAL		0x06000000
+#define SiS_ALPHA_ALWAYS		0x07000000
+
+/*
+ * REG_3D_AlphaAddress -- Define Alpha Buffer Base Address(8A1Ch-8A1Fh)
+ */
+#define MASK_AlphaAddress               0xFFFFFFFF
+
+/*
+ * REG_3D_DstSet -- Define Destination Buffer Setting Mask (8A20h-8A23h)
+ */
+#define MASK_DstBufferPitch             0x00000FFF
+#define MASK_DstBufferFormat            0x000F0000
+#define MASK_DstBufferBitDepth          0x00300000
+#define MASK_DstBufferRgbOrder          0x00400000
+#define MASK_DstBufferInSystem          0x00800000
+#define MASK_Dst7BitFormat              0x007F0000
+#define MASK_ROP2                       0x0F000000
+
+#define DST_FORMAT_RGB_555		0x00100000
+#define DST_FORMAT_RGB_565		0x00110000
+#define DST_FORMAT_ARGB_1555		0x00120000
+#define DST_FORMAT_ARGB_4444		0x00130000
+#define DST_FORMAT_ARGB_1888		0x00300000
+#define DST_FORMAT_ARGB_2888		0x00310000
+#define DST_FORMAT_ARGB_4888		0x00320000
+#define DST_FORMAT_ARGB_8888		0x00330000
+#define DST_FORMAT_ARGB_0888		0x00340000
+
+#define DST_FORMAT_BGR_555		0x00500000
+#define DST_FORMAT_BGR_565		0x00510000
+#define DST_FORMAT_ABGR_1555		0x00520000
+#define DST_FORMAT_ABGR_4444		0x00530000
+#define DST_FORMAT_ABGR_1888		0x00700000
+#define DST_FORMAT_ABGR_2888		0x00710000
+#define DST_FORMAT_ABGR_4888		0x00720000
+#define DST_FORMAT_ABGR_8888		0x00730000
+#define DST_FORMAT_ABGR_0888		0x00740000
+
+#define LOP_CLEAR			0x00000000
+#define LOP_NOR				0x01000000
+#define LOP_AND_INVERTED		0x02000000
+#define LOP_COPY_INVERTED		0x03000000
+#define LOP_AND_REVERSE			0x04000000
+#define LOP_INVERT			0x05000000
+#define LOP_XOR				0x06000000
+#define LOP_NAND			0x07000000
+#define LOP_AND				0x08000000
+#define LOP_EQUIV			0x09000000
+#define LOP_NOOP			0x0a000000
+#define LOP_OR_INVERTED			0x0b000000
+#define LOP_COPY			0x0c000000
+#define LOP_OR_REVERSE			0x0d000000
+#define LOP_OR				0x0e000000
+#define LOP_SET				0x0f000000
+
+/*
+ * REG_3D_DstAlphaWriteMask -- Define Destination/Alpha  Buffer Write Mask (8A24h-8A27h)
+ */
+#define MASK_ColorWriteMask             0x00FFFFFF
+#define MASK_AlphaWriteMask             0xFF000000
+
+/*
+ * REG_3D_DstAddress -- Define Destination Buffer Base Address(8A1Ch-8A1Fh)
+ */
+#define MASK_DstAddress                 0xFFFFFFFF
+
+/*
+ * REG_3D_LinePattern -- Define Line Pattern (8A2Ch-8A2Fh)
+ */
+#define MASK_LinePatternRepeatFactor    0x00007FFF
+#define MASK_LinePatternLastPixelFlag   0x00008000
+#define MASK_LinePattern                0xFFFF0000
+
+/*
+ * REG_3D_FogSet -- Define Fog Mask (8A30h-8A33h)
+ */
+#define MASK_FogColor                   0x00FFFFFF
+#define MASK_FogMode                    0x07000000
+#define MASK_FogZLookup		        0x08000000
+
+#define FOGMODE_CHEAP			0x04000000
+#define FOGMODE_LINEAR			0x05000000
+#define FOGMODE_EXP			0x06000000
+#define FOGMODE_EXP2			0x07000000
+
+/*
+ * REG_3D_FogStartEnd -- Define Fog Start End Setting	(0x8A34 - 0x8A37)
+ */
+#define MASK_FogFarDistance		0x0007FFFF
+
+/*
+ * REG_3D_FogStartEnd -- Define Fog End Setting		(0x8A38 - 0x8A3B)
+ */
+#define MASK_FogInvFarDistance		0x0007FFFF
+
+/*
+ * REG_3D_FogFactorDensity		(0x8A3C - 0x8A3F)
+ */
+#define MASK_FogDensity                 0x0003FFFF
+#define MASK_FogFactor                  0xFF000000
+
+/*
+ * REG_3D_StencilSet -- Define stencil test (8A44h-8A47h)
+ */
+#define MASK_StencilValueMask		0x000000ff
+#define MASK_StencilRefMask		0x0000ff00
+#define MASK_StencilTestMode		0x07000000
+#define MASK_StencilBufferInSystem	0x08000000
+#define MASK_StencilFormat		0x30000000
+
+#define SiS_STENCIL_NEVER		0x00000000
+#define SiS_STENCIL_LESS		0x01000000
+#define SiS_STENCIL_EQUAL		0x02000000
+#define SiS_STENCIL_LEQUAL		0x03000000
+#define SiS_STENCIL_GREATER		0x04000000
+#define SiS_STENCIL_NOTEQUAL		0x05000000
+#define SiS_STENCIL_GEQUAL		0x06000000
+#define SiS_STENCIL_ALWAYS		0x07000000
+
+#define STENCIL_FORMAT_1		0x00000000
+#define STENCIL_FORMAT_2		0x10000000
+#define STENCIL_FORMAT_4		0x20000000
+#define STENCIL_FORMAT_8		0x30000000
+
+/*
+ * REG_3D_StencilSet2 -- Define stencil test (8A4h-8A47h)
+ */
+#define MASK_StencilBufferPitch         0x00000FFF
+#define MASK_StencilZPassOp             0x00007000
+#define MASK_StencilZFailOp             0x00070000
+#define MASK_StencilFailOp              0x00700000
+#define MASK_StencilWriteMask           0xFF000000
+
+#define SiS_SFAIL_KEEP			0x00000000
+#define SiS_SFAIL_ZERO			0x00100000
+#define SiS_SFAIL_REPLACE		0x00200000
+#define SiS_SFAIL_INCR			0x00300000	/* guess -- was _WRAP */
+#define SiS_SFAIL_DECR			0x00400000	/* guess -- was _WRAP */
+#define SiS_SFAIL_INVERT		0x00500000
+#define SiS_SFAIL_INCR_WRAP		0x00600000	/* guess */
+#define SiS_SFAIL_DECR_WRAP		0x00700000	/* guess */
+
+#define SiS_SPASS_ZFAIL_KEEP		0x00000000
+#define SiS_SPASS_ZFAIL_ZERO		0x00010000
+#define SiS_SPASS_ZFAIL_REPLACE		0x00020000
+#define SiS_SPASS_ZFAIL_INCR		0x00030000	/* guess -- was _WRAP */
+#define SiS_SPASS_ZFAIL_DECR		0x00040000	/* guess -- was _WRAP */
+#define SiS_SPASS_ZFAIL_INVERT		0x00050000
+#define SiS_SPASS_ZFAIL_INCR_WRAP	0x00060000	/* guess */
+#define SiS_SPASS_ZFAIL_DECR_WRAP	0x00070000	/* guess */
+
+#define SiS_SPASS_ZPASS_KEEP		0x00000000
+#define SiS_SPASS_ZPASS_ZERO		0x00001000
+#define SiS_SPASS_ZPASS_REPLACE		0x00002000
+#define SiS_SPASS_ZPASS_INCR		0x00003000	/* guess -- was _WRAP */
+#define SiS_SPASS_ZPASS_DECR		0x00004000	/* guess -- was _WRAP */
+#define SiS_SPASS_ZPASS_INVERT		0x00005000
+#define SiS_SPASS_ZPASS_INCR_WRAP	0x00006000	/* guess */
+#define SiS_SPASS_ZPASS_DECR_WRAP	0x00007000	/* guess */
+
+/*
+ * REG_3D_DstBlendMode			(0x8A50 - 0x8A53)
+ */
+#define MASK_SrcBlendMode		0x0000000F
+#define MASK_DstBlendMode		0x000000F0
+
+#define SiS_D_ZERO			0x00000000
+#define SiS_D_ONE			0x00000010
+#define SiS_D_SRC_COLOR			0x00000020
+#define SiS_D_ONE_MINUS_SRC_COLOR	0x00000030
+#define SiS_D_SRC_ALPHA			0x00000040
+#define SiS_D_ONE_MINUS_SRC_ALPHA	0x00000050
+#define SiS_D_DST_ALPHA			0x00000060
+#define SiS_D_ONE_MINUS_DST_ALPHA	0x00000070
+#define SiS_D_DST_COLOR			0x00000080
+#define SiS_D_ONE_MINUS_DST_COLOR	0x00000090
+#define SiS_D_SRC_ALPHA_SAT		0x000000a0
+
+#define SiS_S_ZERO			0x00000000
+#define SiS_S_ONE			0x00000001
+#define SiS_S_SRC_COLOR			0x00000002
+#define SiS_S_ONE_MINUS_SRC_COLOR	0x00000003
+#define SiS_S_SRC_ALPHA			0x00000004
+#define SiS_S_ONE_MINUS_SRC_ALPHA	0x00000005
+#define SiS_S_DST_ALPHA			0x00000006
+#define SiS_S_ONE_MINUS_DST_ALPHA	0x00000007
+#define SiS_S_DST_COLOR			0x00000008
+#define SiS_S_ONE_MINUS_DST_COLOR	0x00000009
+#define SiS_S_SRC_ALPHA_SATURATE	0x0000000a
+#define SiS_S_BOTH_SRC_ALPHA		0x0000000b
+#define SiS_S_BOTH_ONE_MINUS_SRC_ALPHA	0x0000000c
+
+/*
+ * REG_3D_ClipTopBottom			(0x8A54 - 0x8A57)
+ */
+#define MASK_BottomClip			0x00001FFF
+#define MASK_TopClip			0x03FFE000
+
+/*
+ * REG_3D_ClipLeftRight			(0x8A58 - 0x8A5B)
+ */
+#define MASK_RightClip			0x00001FFF
+#define MASK_LeftClip			0x03FFE000
+
+/* 
+ * REG_3D_TextureSet			(0x8A7C - 0x8A7F)
+ * REG_3D_Texture1Set			(0x8ADC - 0x8ADF)
+ */
+#define MASK_TextureHeight		0x0000000F
+#define MASK_TextureWidth		0x000000F0
+#define MASK_TextureLevel		0x00000F00
+#define MASK_TextureSignYUVFormat	0x00001000
+#define MASK_TextureMappingMode		0x00FF0000
+#define MASK_TextureWrapU		0x00010000
+#define MASK_TextureWrapV		0x00020000
+#define MASK_TextureMirrorU		0x00040000
+#define MASK_TextureMirrorV		0x00080000
+#define MASK_TextureClampU		0x00100000
+#define MASK_TextureClampV		0x00200000
+#define MASK_TextureBorderU		0x00400000
+#define MASK_TextureBorderV		0x00800000
+#define MASK_TextureFormat		0xFF000000
+#define MASK_TextureBitDepth		0x70000000
+#define MASK_TextureRgbOrder		0x80000000
+
+#define TEXEL_INDEX1			0x00000000
+#define TEXEL_INDEX2			0x01000000
+#define TEXEL_INDEX4			0x02000000
+#define TEXEL_INDEX8			0x03000000
+
+#define TEXEL_INDEX1WithAlpha		0x04000000
+#define TEXEL_INDEX2WithAlpha		0x05000000
+#define TEXEL_INDEX4WithAlpha		0x06000000
+#define TEXEL_INDEX8WithAlpha		0x07000000
+
+#define TEXEL_I1			0x10000000
+#define TEXEL_I2			0x11000000
+#define TEXEL_I4			0x12000000
+#define TEXEL_I8			0x13000000
+
+#define TEXEL_DXT1			0x19000000
+#define TEXEL_DXT2			0x1A000000
+#define TEXEL_DXT3			0x1B000000
+
+#define TEXEL_YUV422			0x20000000
+#define TEXEL_YVU422			0x21000000
+#define TEXEL_UVY422			0x22000000
+#define TEXEL_VUY422			0x23000000
+#define TEXEL_YUV411			0x24000000
+
+#define TEXEL_L1			0x30000000
+#define TEXEL_L2			0x31000000
+#define TEXEL_L4			0x32000000
+#define TEXEL_L8			0x33000000
+
+#define TEXEL_AL11			0x34000000
+#define TEXEL_AL44			0x35000000
+#define TEXEL_AL26			0x37000000
+#define TEXEL_AL88			0x38000000
+
+#define TEXEL_A1			0x40000000
+#define TEXEL_A2			0x41000000
+#define TEXEL_A4			0x42000000
+#define TEXEL_A8			0x43000000
+
+#define TEXEL_RGB_332_8			0x50000000
+#define TEXEL_RGB_233_8			0x51000000
+#define TEXEL_RGB_232_8			0x52000000
+#define TEXEL_ARGB_1232_8		0x53000000
+#define TEXEL_ARGB_2222_8		0x54000000
+
+#define TEXEL_RGB_555_16		0x60000000
+#define TEXEL_RGB_565_16		0x61000000
+#define TEXEL_ARGB_1555_16		0x62000000
+#define TEXEL_ARGB_4444_16		0x63000000
+
+#define TEXEL_ARGB_1888_32		0x70000000
+#define TEXEL_ARGB_2888_32		0x71000000
+#define TEXEL_ARGB_4888_32		0x72000000
+#define TEXEL_ARGB_8888_32		0x73000000
+#define TEXEL_ARGB_0888_32		0x74000000
+
+#define TEXEL_BGR_332_8			0xD0000000
+#define TEXEL_BGR_233_8			0xD1000000
+#define TEXEL_BGR_232_8			0xD2000000
+#define TEXEL_ABGR_1232_8		0xD3000000
+#define TEXEL_ABGR_2222_8		0xD4000000
+
+#define TEXEL_BGR_555_16		0xE0000000
+#define TEXEL_BGR_565_16		0xE1000000
+#define TEXEL_ABGR_1555_16		0xE2000000
+#define TEXEL_ABGR_4444_16		0xE3000000
+
+#define TEXEL_ABGR_1888_32		0xF0000000
+#define TEXEL_ABGR_2888_32		0xF1000000
+#define TEXEL_ABGR_4888_32		0xF2000000
+#define TEXEL_ABGR_8888_32		0xF3000000
+#define TEXEL_ABGR_0888_32		0xF4000000
+
+#define TEXEL_VU88			0x00000000
+#define TEXEL_LVU655			0x00800000
+#define TEXEL_LVU888			0x01000000
+#define TEXEL_UV88			0x02000000
+#define TEXEL_LUV655			0x02800000
+#define TEXEL_LUV888			0x03000000
+
+/* 
+ * REG_3D_TextureMip			(0x8A80 - 0x8A83)
+ * REG_3D_Texture1Mip			(0x8AE0 - 0x8AE3)
+ */
+#define MASK_TextureAnisotropyRatio	0x0000000F
+#define MASK_TextureMipmapLodBias	0x00003FF0
+#define MASK_TextureFilterMin		0x0001C000
+#define MASK_TextureFilterMag		0x00020000
+#define MASK_TextureFilter		0x0003C000
+#define MASK_TextureLevelInSystem	0x3FFC0000
+#define MASK_TextureLevel0InSystem	0x00040000
+#define MASK_TextureBlockLength		0xF0000000
+
+#define TEXTURE_FILTER_NEAREST			0x00000000
+#define TEXTURE_FILTER_LINEAR			0x00004000
+#define TEXTURE_FILTER_NEAREST_MIP_NEAREST	0x00008000
+#define TEXTURE_FILTER_NEAREST_MIP_LINEAR	0x00010000
+#define TEXTURE_FILTER_LINEAR_MIP_NEAREST	0x0000c000
+#define TEXTURE_FILTER_LINEAR_MIP_LINEAR	0x00014000
+
+/* 
+ * REG_3D_TextureTransparencyColorHigh	(0x8A84 - 0x8A87)
+ * REG_3D_Texture1TransparencyColorHigh	(0x8AE4 - 0x8AE7)
+ */
+#define MASK_TextureTransparencyColorHighB	0x000000FF
+#define MASK_TextureTransparencyColorHighG	0x0000FF00
+#define MASK_TextureTransparencyColorHighR	0x00FF0000
+#define MASK_TextureAlphaTransparencyMode	0x08000000
+
+/* 
+ * REG_3D_TextureTransparencyColorLow	(0x8A88 - 0x8A8B)
+ * REG_3D_Texture1TransparencyColorLow	(0x8AE8 - 0x8AEB)
+ */
+#define MASK_TextureTransparencyColorLowB	0x000000FF
+#define MASK_TextureTransparencyColorLowG	0x0000FF00
+#define MASK_TextureTransparencyColorLowR	0x00FF0000
+#define MASK_TextureBlockHeight			0x07000000
+#define MASK_TextureBlockWidth			0x70000000
+
+/* 
+ * REG_3D_TextureTransparencyColorLow	(0x8A8C - 0x8A8F)
+ * REG_3D_Texture1TransparencyColorLow	(0x8AEC - 0x8AEF)
+ */
+#define MASK_TextureBorderColorB       0x000000FF
+#define MASK_TextureBorderColorG       0x0000FF00
+#define MASK_TextureBorderColorR       0x00FF0000
+#define MASK_TextureBorderColorA       0xFF000000
+
+/*
+ * REG_3D_TexturePitch0-10		(0x8AC0 - 0x8AD7)
+ * REG_3D_Texture1Pitch0-10		(0x8B20 - 0x8B37)
+ */
+#define MASK_TexturePitchOdd		0x000003FF
+#define MASK_TexturePitchEven		0x03FF0000
+#define SHIFT_TexturePitchEven		16
+
+/*
+ * REG_3D_TextureColorBlendSet0		(0x8B40 - 0x8B43)
+ * REG_3D_TextureColorBlendSet1		(0x8B44 - 0x8B46)
+ * REG_3D_TextureAlphaBlendSet0		(0x8B40 - 0x8B43)
+ * REG_3D_TextureAlphaBlendSet1		(0x8B44 - 0x8B46)
+ */
+#define STAGE0_C_CF			0xa1485000
+#define STAGE0_C_CS			0xc1485000
+#define STAGE0_C_CFCS			0xa1705000
+#define STAGE0_C_CFOMAS_CSAS		0xc534c001
+#define STAGE0_C_CFOMCS_CCCS		0x4530c001
+
+#define STAGE0_A_AF			0x63230000
+#define STAGE0_A_AS			0xc3230000
+#define STAGE0_A_AFAS			0x63c30000
+#define STAGE0_A_AFOMAS_ACAS		0x46c60001
+
+#define STAGE1_C_CF			0xa1485000
+#define STAGE1_C_CS			0xe1485000
+#define STAGE1_C_CFCS			0xa1785000
+#define STAGE1_C_CFOMAS_CSAS		0xe5394001
+#define STAGE1_C_CFOMCS_CCCS		0x45394001
+
+#define STAGE1_A_AF			0xa3230000
+#define STAGE1_A_AS			0xe3230000
+#define STAGE1_A_AFAS			0xa3e30000
+#define STAGE1_A_AFOMAS_ACAS		0x4aea0001
+
+/* What registers are these associated with? */
+#define MASK_BMMemoryInSystem           0x00000080
+#define MASK_BMHeight                   0x00000F00
+#define MASK_BMWidth                    0x0000F000
+#define MASK_BMFilter                   0x00010000
+#define MASK_BMMappingMode              0x007E0000
+#define MASK_BMFormat                   0x07800000
+#define MASK_BMTxBumpmap                0x08000000
+
+#define MASK_BMAddress                  0xFFFFFFFC
+
+#define MASK_BMOffset                   0xFF800000
+#define MASK_BMScale                    0x007FE000
+#define MASK_BMPitch                    0x00001FFF
+
+#define MASK_BMMatrix00                 0x000007FF
+#define MASK_BMMatrix01                 0x07FF0000
+#define MASK_BMMatrix10                 0x000007FF
+#define MASK_BMMatrix11                 0x07FF0000
+
+#define MASK_TextureRealInSystem        0x00000001
+#define MASK_TextureDowngrade           0x00000002
+
+#define ALPHA_BUFFER_FORMAT_1           0x00000000
+#define ALPHA_BUFFER_FORMAT_2           0x10000000
+#define ALPHA_BUFFER_FORMAT_4           0x20000000
+#define ALPHA_BUFFER_FORMAT_8           0x30000000
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis_screen.c b/src/mesa/drivers/dri/sis/sis_screen.c
new file mode 100644
index 0000000000..80fb455ec7
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_screen.c
@@ -0,0 +1,333 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "dri_util.h"
+
+#include "main/context.h"
+#include "utils.h"
+#include "main/imports.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+#include "sis_context.h"
+#include "sis_dri.h"
+#include "sis_lock.h"
+
+#include "xmlpool.h"
+
+#include "GL/internal/dri_interface.h"
+
+#define SIS_AGP_DISABLE(def) \
+DRI_CONF_OPT_BEGIN(agp_disable,bool,def)				\
+	DRI_CONF_DESC(en,"Disable AGP vertex dispatch")			\
+DRI_CONF_OPT_END
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+	DRI_CONF_SECTION_QUALITY
+		DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB)
+	DRI_CONF_SECTION_END
+	DRI_CONF_SECTION_DEBUG
+		SIS_AGP_DISABLE(true)
+		DRI_CONF_NO_RAST(false)
+	DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 3;
+
+extern const struct dri_extension card_extensions[];
+
+static const __DRIconfig **
+sisFillInModes(__DRIscreen *psp, int bpp)
+{
+   __DRIconfig **configs;
+   unsigned depth_buffer_factor;
+   unsigned back_buffer_factor;
+   GLenum fb_format;
+   GLenum fb_type;
+   static const GLenum back_buffer_modes[] = {
+      GLX_NONE, GLX_SWAP_UNDEFINED_OML
+   };
+   uint8_t depth_bits_array[4];
+   uint8_t stencil_bits_array[4];
+   uint8_t msaa_samples_array[1];
+
+   depth_bits_array[0] = 0;
+   stencil_bits_array[0] = 0;
+   depth_bits_array[1] = 16;
+   stencil_bits_array[1] = 0;
+   depth_bits_array[2] = 24;
+   stencil_bits_array[2] = 8;
+   depth_bits_array[3] = 32;
+   stencil_bits_array[3] = 0;
+
+   msaa_samples_array[0] = 0;
+
+   depth_buffer_factor = 4;
+   back_buffer_factor = 2;
+
+   if (bpp == 16) {
+      fb_format = GL_RGB;
+      fb_type = GL_UNSIGNED_SHORT_5_6_5;
+   } else {
+      fb_format = GL_BGRA;
+      fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+   }
+
+   configs = driCreateConfigs(fb_format, fb_type, depth_bits_array,
+			      stencil_bits_array, depth_buffer_factor,
+			      back_buffer_modes, back_buffer_factor,
+                              msaa_samples_array, 1, GL_TRUE);
+   if (configs == NULL) {
+      fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, __LINE__);
+      return NULL;
+   }
+
+   return (const __DRIconfig **) configs;
+}
+
+
+/* Create the device specific screen private data struct.
+ */
+static sisScreenPtr
+sisCreateScreen( __DRIscreen *sPriv )
+{
+   sisScreenPtr sisScreen;
+   SISDRIPtr sisDRIPriv = (SISDRIPtr)sPriv->pDevPriv;
+
+   if (sPriv->devPrivSize != sizeof(SISDRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(SISDRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   sisScreen = (sisScreenPtr)CALLOC( sizeof(*sisScreen) );
+   if ( sisScreen == NULL )
+      return NULL;
+
+   sisScreen->screenX = sisDRIPriv->width;
+   sisScreen->screenY = sisDRIPriv->height;
+   sisScreen->cpp = sisDRIPriv->bytesPerPixel;
+   sisScreen->deviceID = sisDRIPriv->deviceID;
+   sisScreen->AGPCmdBufOffset = sisDRIPriv->AGPCmdBufOffset;
+   sisScreen->AGPCmdBufSize = sisDRIPriv->AGPCmdBufSize;
+   sisScreen->sarea_priv_offset = sizeof(drm_sarea_t);
+
+   sisScreen->mmio.handle = sisDRIPriv->regs.handle;
+   sisScreen->mmio.size   = sisDRIPriv->regs.size;
+   if ( drmMap( sPriv->fd, sisScreen->mmio.handle, sisScreen->mmio.size,
+	       &sisScreen->mmio.map ) )
+   {
+      FREE( sisScreen );
+      return NULL;
+   }
+
+   if (sisDRIPriv->agp.size) {
+      sisScreen->agp.handle = sisDRIPriv->agp.handle;
+      sisScreen->agpBaseOffset = drmAgpBase(sPriv->fd);
+      sisScreen->agp.size   = sisDRIPriv->agp.size;
+      if ( drmMap( sPriv->fd, sisScreen->agp.handle, sisScreen->agp.size,
+                   &sisScreen->agp.map ) )
+      {
+         sisScreen->agp.size = 0;
+      }
+   }
+
+   sisScreen->driScreen = sPriv;
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo(&sisScreen->optionCache,
+		      __driConfigOptions, __driNConfigOptions);
+
+   return sisScreen;
+}
+
+/* Destroy the device specific screen private data struct.
+ */
+static void
+sisDestroyScreen( __DRIscreen *sPriv )
+{
+   sisScreenPtr sisScreen = (sisScreenPtr)sPriv->private;
+
+   if ( sisScreen == NULL )
+      return;
+
+   if (sisScreen->agp.size != 0)
+      drmUnmap( sisScreen->agp.map, sisScreen->agp.size );
+   drmUnmap( sisScreen->mmio.map, sisScreen->mmio.size );
+
+   FREE( sisScreen );
+   sPriv->private = NULL;
+}
+
+
+/* Create and initialize the Mesa and driver specific pixmap buffer
+ * data.
+ */
+static GLboolean
+sisCreateBuffer( __DRIscreen *driScrnPriv,
+                 __DRIdrawable *driDrawPriv,
+                 const __GLcontextModes *mesaVis,
+                 GLboolean isPixmap )
+{
+   /*sisScreenPtr screen = (sisScreenPtr) driScrnPriv->private;*/
+   struct gl_framebuffer *fb;
+
+   if (isPixmap)
+      return GL_FALSE; /* not implemented */
+
+   fb = _mesa_create_framebuffer(mesaVis);
+
+   _mesa_add_soft_renderbuffers(fb,
+				GL_FALSE, /* color */
+				GL_FALSE, /* depth */
+				mesaVis->stencilBits > 0,
+				mesaVis->accumRedBits > 0,
+				GL_FALSE, /* alpha */
+				GL_FALSE /* aux */);
+   driDrawPriv->driverPrivate = (void *) fb;
+
+   return (driDrawPriv->driverPrivate != NULL);
+}
+
+
+static void
+sisDestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+static void sisCopyBuffer( __DRIdrawable *dPriv )
+{
+   sisContextPtr smesa = (sisContextPtr)dPriv->driContextPriv->driverPrivate;
+   int i;
+
+   while ((*smesa->FrameCountPtr) - MMIO_READ(0x8a2c) > SIS_MAX_FRAME_LENGTH)
+      ;
+
+   LOCK_HARDWARE();
+
+   for (i = 0; i < dPriv->numClipRects; i++) {
+      drm_clip_rect_t *box = &dPriv->pClipRects[i];
+
+      mWait3DCmdQueue(10);
+      MMIO(REG_SRC_ADDR, smesa->back.offset);
+      MMIO(REG_SRC_PITCH, smesa->back.pitch | ((smesa->bytesPerPixel == 4) ? 
+			   BLIT_DEPTH_32 : BLIT_DEPTH_16));
+      MMIO(REG_SRC_X_Y, ((box->x1 - dPriv->x) << 16) | (box->y1 - dPriv->y));
+      MMIO(REG_DST_X_Y, ((box->x1 - dPriv->x) << 16) | (box->y1 - dPriv->y));
+      MMIO(REG_DST_ADDR, smesa->front.offset);
+      MMIO(REG_DST_PITCH_HEIGHT, (smesa->virtualY << 16) | smesa->front.pitch);
+      MMIO(REG_WIDTH_HEIGHT, ((box->y2 - box->y1) << 16) | (box->x2 - box->x1));
+      MMIO(REG_BLIT_CMD, CMD_DIR_X_INC | CMD_DIR_Y_INC | CMD_ROP_SRC);
+      MMIO(REG_CommandQueue, -1);
+   }
+
+   *(GLint *)(smesa->IOBase+0x8a2c) = *smesa->FrameCountPtr;
+   (*smesa->FrameCountPtr)++;  
+
+   UNLOCK_HARDWARE ();
+}
+
+
+/* Copy the back color buffer to the front color buffer */
+static void
+sisSwapBuffers(__DRIdrawable *dPriv)
+{
+   if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) {
+         sisContextPtr smesa = (sisContextPtr) dPriv->driContextPriv->driverPrivate;
+         GLcontext *ctx = smesa->glCtx;
+
+      if (ctx->Visual.doubleBufferMode) {
+         _mesa_notifySwapBuffers( ctx );  /* flush pending rendering comands */
+         sisCopyBuffer( dPriv );
+      }
+   } else {
+      /* XXX this shouldn't be an error but we can't handle it for now */
+      _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__);
+   }
+}
+
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **
+sisInitScreen(__DRIscreen *psp)
+{
+   static const __DRIversion ddx_expected = {0, 8, 0};
+   static const __DRIversion dri_expected = {4, 0, 0};
+   static const __DRIversion drm_expected = {1, 0, 0};
+   static const char *driver_name = "SiS";
+   SISDRIPtr dri_priv = (SISDRIPtr)psp->pDevPriv;
+
+   if (!driCheckDriDdxDrmVersions2(driver_name,
+				   &psp->dri_version, &dri_expected,
+				   &psp->ddx_version, &ddx_expected,
+				   &psp->drm_version, &drm_expected))
+      return NULL;
+
+   psp->private = sisCreateScreen(psp);
+
+   if (!psp->private) {
+      sisDestroyScreen(psp);
+      return NULL;
+   }
+
+   return sisFillInModes(psp, dri_priv->bytesPerPixel * 8);
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = sisInitScreen,
+   .DestroyScreen   = sisDestroyScreen,
+   .CreateContext   = sisCreateContext,
+   .DestroyContext  = sisDestroyContext,
+   .CreateBuffer    = sisCreateBuffer,
+   .DestroyBuffer   = sisDestroyBuffer,
+   .SwapBuffers     = sisSwapBuffers,
+   .MakeCurrent     = sisMakeCurrent,
+   .UnbindContext   = sisUnbindContext,
+   .GetSwapInfo     = NULL,
+   .GetDrawableMSC  = NULL,
+   .WaitForMSC      = NULL,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/sis/sis_screen.h b/src/mesa/drivers/dri/sis/sis_screen.h
new file mode 100644
index 0000000000..8009fecc31
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_screen.h
@@ -0,0 +1,61 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_SCREEN_H
+#define __SIS_SCREEN_H
+
+typedef struct {
+   drm_handle_t handle;			/* Handle to the DRM region */
+   drmSize size;			/* Size of the DRM region */
+   drmAddress map;			/* Mapping of the DRM region */
+} sisRegionRec2, *sisRegionPtr2;
+
+typedef struct {
+   sisRegionRec2 mmio;
+   sisRegionRec2 agp;
+   unsigned long agpBaseOffset;
+
+   unsigned int AGPCmdBufOffset;
+   unsigned int AGPCmdBufSize;
+
+   int deviceID;
+
+   int cpp;
+   unsigned int screenX, screenY;
+
+   __DRIscreen *driScreen;
+   unsigned int sarea_priv_offset;
+
+   /* Configuration cache with default values for all contexts */
+   driOptionCache optionCache;
+
+} sisScreenRec, *sisScreenPtr;
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis_span.c b/src/mesa/drivers/dri/sis/sis_span.c
new file mode 100644
index 0000000000..008b00160e
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_span.c
@@ -0,0 +1,197 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "sis_context.h"
+#include "sis_span.h"
+#include "sis_lock.h"
+#include "sis_tris.h"
+
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define LOCAL_VARS							\
+   sisContextPtr smesa = SIS_CONTEXT(ctx);				\
+   __DRIdrawable *dPriv = smesa->driDrawable;			\
+   struct sis_renderbuffer *srb = (struct sis_renderbuffer *) rb;	\
+   GLuint pitch = srb->pitch;						\
+   char *buf = srb->map;						\
+   GLuint p;								\
+   (void) buf; (void) p;
+   
+
+#define LOCAL_DEPTH_VARS						\
+   sisContextPtr smesa = SIS_CONTEXT(ctx);				\
+   __DRIdrawable *dPriv = smesa->driDrawable;			\
+   struct sis_renderbuffer *srb = (struct sis_renderbuffer *) rb;	\
+   char *buf = srb->map;
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
+
+#define HW_LOCK() do {} while(0);
+
+#define HW_UNLOCK() do {} while(0);
+
+/* RGB565 */
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    sis##x##_RGB565
+#define TAG2(x,y) sis##x##_RGB565##y
+#include "spantmp2.h"
+
+
+/* ARGB8888 */
+/* FIXME the old code always read back alpha as 0xff, i.e. fully opaque.
+   Was there a reason to do so ? If so that'll won't work with that template... */
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    sis##x##_ARGB8888
+#define TAG2(x,y) sis##x##_ARGB8888##y
+#include "spantmp2.h"
+
+
+/* 16 bit depthbuffer functions.
+ */
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH( _x, _y, d )	\
+   *(GLushort *)(buf + (_x)*2 + (_y)*srb->pitch) = d;
+
+#define READ_DEPTH( d, _x, _y )		\
+   d = *(GLushort *)(buf + (_x)*2 + (_y)*srb->pitch);
+
+#define TAG(x) sis##x##_z16
+#include "depthtmp.h"
+
+
+/* 32 bit depthbuffer functions.
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH( _x, _y, d )	\
+   *(GLuint *)(buf + (_x)*4 + (_y)*srb->pitch) = d;
+
+#define READ_DEPTH( d, _x, _y )		\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*srb->pitch);
+
+#define TAG(x) sis##x##_z32
+#include "depthtmp.h"
+
+
+/* 8/24 bit interleaved depth/stencil functions
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH( _x, _y, d ) {				\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*srb->pitch);	\
+   tmp &= 0xff000000;						\
+   tmp |= (d & 0x00ffffff);					\
+   *(GLuint *)(buf + (_x)*4 + (_y)*srb->pitch) = tmp;		\
+}
+
+#define READ_DEPTH( d, _x, _y )	{				\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*srb->pitch) & 0x00ffffff; \
+}
+
+#define TAG(x) sis##x##_z24_s8
+#include "depthtmp.h"
+
+#define WRITE_STENCIL( _x, _y, d ) {				\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*smesa->depth.pitch); \
+   tmp &= 0x00ffffff;						\
+   tmp |= (d << 24);						\
+   *(GLuint *)(buf + (_x)*4 + (_y)*srb->pitch) = tmp;	\
+}
+
+#define READ_STENCIL( d, _x, _y )			\
+   d = (*(GLuint *)(buf + (_x)*4 + (_y)*srb->pitch) & 0xff000000) >> 24;
+
+#define TAG(x) sis##x##_z24_s8
+#include "stenciltmp.h"
+
+
+
+void sisSpanRenderStart( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   SIS_FIREVERTICES(smesa);
+   LOCK_HARDWARE();
+   WaitEngIdle( smesa );
+}
+
+void sisSpanRenderFinish( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE();
+}
+
+void
+sisDDInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+   swdd->SpanRenderStart   = sisSpanRenderStart;
+   swdd->SpanRenderFinish  = sisSpanRenderFinish; 
+}
+
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+sisSetSpanFunctions(struct sis_renderbuffer *srb, const GLvisual *vis)
+{
+   if (srb->Base.Format == MESA_FORMAT_RGB565) {
+      sisInitPointers_RGB565( &srb->Base );
+   }
+   else if (srb->Base.Format == MESA_FORMAT_ARGB8888) {
+      sisInitPointers_ARGB8888( &srb->Base );
+   }
+   else if (srb->Base.Format == MESA_FORMAT_Z16) {
+      sisInitDepthPointers_z16(&srb->Base);
+   }
+   else if (srb->Base.Format == MESA_FORMAT_S8_Z24) {
+      sisInitDepthPointers_z24_s8(&srb->Base);
+   }
+   else if (srb->Base.Format == MESA_FORMAT_Z32) {
+      sisInitDepthPointers_z32(&srb->Base);
+   }
+   else if (srb->Base.Format == MESA_FORMAT_S8) {
+      sisInitStencilPointers_z24_s8(&srb->Base);
+   }
+}
diff --git a/src/mesa/drivers/dri/sis/sis_span.h b/src/mesa/drivers/dri/sis/sis_span.h
new file mode 100644
index 0000000000..a1f817c44c
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_span.h
@@ -0,0 +1,45 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_SPAN_H__
+#define __SIS_SPAN_H__
+ 
+#include "drirenderbuffer.h"
+
+
+extern void sisSpanRenderStart( GLcontext *ctx );
+extern void sisSpanRenderFinish( GLcontext *ctx );
+
+extern void sisDDInitSpanFuncs( GLcontext *ctx );
+
+extern void
+sisSetSpanFunctions(struct sis_renderbuffer *srb, const GLvisual *vis);
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis_state.c b/src/mesa/drivers/dri/sis/sis_state.c
new file mode 100644
index 0000000000..a22195ccce
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_state.c
@@ -0,0 +1,862 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Sung-Ching Lin <sclin@sis.com.tw>
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "sis_context.h"
+#include "sis_state.h"
+#include "sis_tris.h"
+#include "sis_lock.h"
+
+#include "main/context.h"
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void
+sisDDAlphaFunc( GLcontext * ctx, GLenum func, GLfloat ref )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLubyte refbyte;
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   CLAMPED_FLOAT_TO_UBYTE(refbyte, ref);
+   current->hwAlpha = refbyte << 16;
+
+   /* Alpha Test function */
+   switch (func)
+   {
+   case GL_NEVER:
+      current->hwAlpha |= SiS_ALPHA_NEVER;
+      break;
+   case GL_LESS:
+      current->hwAlpha |= SiS_ALPHA_LESS;
+      break;
+   case GL_EQUAL:
+      current->hwAlpha |= SiS_ALPHA_EQUAL;
+      break;
+   case GL_LEQUAL:
+      current->hwAlpha |= SiS_ALPHA_LEQUAL;
+      break;
+   case GL_GREATER:
+      current->hwAlpha |= SiS_ALPHA_GREATER;
+      break;
+   case GL_NOTEQUAL:
+      current->hwAlpha |= SiS_ALPHA_NOTEQUAL;
+      break;
+   case GL_GEQUAL:
+      current->hwAlpha |= SiS_ALPHA_GEQUAL;
+      break;
+   case GL_ALWAYS:
+      current->hwAlpha |= SiS_ALPHA_ALWAYS;
+      break;
+   }
+
+   prev->hwAlpha = current->hwAlpha;
+   smesa->GlobalFlag |= GFLAG_ALPHASETTING;
+}
+
+static void
+sisDDBlendFuncSeparate( GLcontext *ctx, 
+			GLenum sfactorRGB, GLenum dfactorRGB,
+			GLenum sfactorA,   GLenum dfactorA )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   current->hwDstSrcBlend = 0;
+
+   switch (dfactorRGB)
+   {
+   case GL_ZERO:
+      current->hwDstSrcBlend |= SiS_D_ZERO;
+      break;
+   case GL_ONE:
+      current->hwDstSrcBlend |= SiS_D_ONE;
+      break;
+   case GL_SRC_COLOR:
+      current->hwDstSrcBlend |= SiS_D_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      current->hwDstSrcBlend |= SiS_D_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      current->hwDstSrcBlend |= SiS_D_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      current->hwDstSrcBlend |= SiS_D_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      current->hwDstSrcBlend |= SiS_D_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      current->hwDstSrcBlend |= SiS_D_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_DST_ALPHA:
+      current->hwDstSrcBlend |= SiS_D_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      current->hwDstSrcBlend |= SiS_D_ONE_MINUS_DST_ALPHA;
+      break;
+   default:
+      fprintf(stderr, "Unknown dst blend function 0x%x\n", dfactorRGB);
+      break;
+   }
+
+   switch (sfactorRGB)
+   {
+   case GL_ZERO:
+      current->hwDstSrcBlend |= SiS_S_ZERO;
+      break;
+   case GL_ONE:
+      current->hwDstSrcBlend |= SiS_S_ONE;
+      break;
+   case GL_SRC_COLOR:
+      current->hwDstSrcBlend |= SiS_S_SRC_COLOR;
+      break;
+   case GL_ONE_MINUS_SRC_COLOR:
+      current->hwDstSrcBlend |= SiS_S_ONE_MINUS_SRC_COLOR;
+      break;
+   case GL_SRC_ALPHA:
+      current->hwDstSrcBlend |= SiS_S_SRC_ALPHA;
+      break;
+   case GL_ONE_MINUS_SRC_ALPHA:
+      current->hwDstSrcBlend |= SiS_S_ONE_MINUS_SRC_ALPHA;
+      break;
+   case GL_DST_COLOR:
+      current->hwDstSrcBlend |= SiS_S_DST_COLOR;
+      break;
+   case GL_ONE_MINUS_DST_COLOR:
+      current->hwDstSrcBlend |= SiS_S_ONE_MINUS_DST_COLOR;
+      break;
+   case GL_DST_ALPHA:
+      current->hwDstSrcBlend |= SiS_S_DST_ALPHA;
+      break;
+   case GL_ONE_MINUS_DST_ALPHA:
+      current->hwDstSrcBlend |= SiS_S_ONE_MINUS_DST_ALPHA;
+      break;
+   case GL_SRC_ALPHA_SATURATE:
+      current->hwDstSrcBlend |= SiS_S_SRC_ALPHA_SATURATE;
+      break;
+   default:
+      fprintf(stderr, "Unknown src blend function 0x%x\n", sfactorRGB);
+      break;
+   }
+
+   if (current->hwDstSrcBlend != prev->hwDstSrcBlend) {
+      prev->hwDstSrcBlend = current->hwDstSrcBlend;
+      smesa->GlobalFlag |= GFLAG_DSTBLEND;
+   }
+}
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void
+sisDDDepthFunc( GLcontext * ctx, GLenum func )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   current->hwZ &= ~MASK_ZTestMode;
+   switch (func)
+   {
+   case GL_LESS:
+      current->hwZ |= SiS_Z_COMP_S_LT_B;
+      break;
+   case GL_GEQUAL:
+      current->hwZ |= SiS_Z_COMP_S_GE_B;
+      break;
+   case GL_LEQUAL:
+      current->hwZ |= SiS_Z_COMP_S_LE_B;
+      break;
+   case GL_GREATER:
+      current->hwZ |= SiS_Z_COMP_S_GT_B;
+      break;
+   case GL_NOTEQUAL:
+      current->hwZ |= SiS_Z_COMP_S_NE_B;
+      break;
+   case GL_EQUAL:
+      current->hwZ |= SiS_Z_COMP_S_EQ_B;
+      break;
+   case GL_ALWAYS:
+      current->hwZ |= SiS_Z_COMP_ALWAYS;
+      break;
+   case GL_NEVER:
+      current->hwZ |= SiS_Z_COMP_NEVER;
+      break;
+   }
+
+   if (current->hwZ != prev->hwZ) {
+      prev->hwZ = current->hwZ;
+      smesa->GlobalFlag |= GFLAG_ZSETTING;
+   }
+}
+
+void
+sisDDDepthMask( GLcontext * ctx, GLboolean flag )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   if (!ctx->Depth.Test)
+      flag = GL_FALSE;
+
+   if (ctx->Visual.stencilBits) {
+      if (flag || (ctx->Stencil.WriteMask[0] != 0)) {
+         current->hwCapEnable |= MASK_ZWriteEnable;
+         if (flag && ((ctx->Stencil.WriteMask[0] & 0xff) == 0xff)) {
+	      current->hwCapEnable2 &= ~MASK_ZMaskWriteEnable;
+         } else {
+            current->hwCapEnable2 |= MASK_ZMaskWriteEnable;
+            current->hwZMask = (ctx->Stencil.WriteMask[0] << 24) |
+               ((flag) ? 0x00ffffff : 0);
+
+            if (current->hwZMask ^ prev->hwZMask) {
+               prev->hwZMask = current->hwZMask;
+               smesa->GlobalFlag |= GFLAG_ZSETTING;
+            }
+         }
+      } else {
+         current->hwCapEnable &= ~MASK_ZWriteEnable;
+      }
+   } else {
+      if (flag) {
+         current->hwCapEnable |= MASK_ZWriteEnable;
+         current->hwCapEnable2 &= ~MASK_ZMaskWriteEnable;
+      } else {
+         current->hwCapEnable &= ~MASK_ZWriteEnable;
+      }
+   }
+}
+
+/* =============================================================
+ * Clipping
+ */
+
+void
+sisUpdateClipping( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   GLint x1, y1, x2, y2;
+
+   if (smesa->is6326) {
+      /* XXX: 6326 has its own clipping for now. Should be fixed */
+      sis6326UpdateClipping(ctx);
+      return;
+   }
+
+   x1 = 0;
+   y1 = 0;
+   x2 = smesa->width - 1;
+   y2 = smesa->height - 1;
+
+   if (ctx->Scissor.Enabled) {
+      if (ctx->Scissor.X > x1)
+         x1 = ctx->Scissor.X;
+      if (ctx->Scissor.Y > y1)
+         y1 = ctx->Scissor.Y;
+      if (ctx->Scissor.X + ctx->Scissor.Width - 1 < x2)
+         x2 = ctx->Scissor.X + ctx->Scissor.Width - 1;
+      if (ctx->Scissor.Y + ctx->Scissor.Height - 1 < y2)
+         y2 = ctx->Scissor.Y + ctx->Scissor.Height - 1;
+   }
+
+   y1 = Y_FLIP(y1);
+   y2 = Y_FLIP(y2);
+
+   current->clipTopBottom = (y2 << 13) | y1;
+   current->clipLeftRight = (x1 << 13) | x2;
+
+   if ((current->clipTopBottom ^ prev->clipTopBottom) ||
+       (current->clipLeftRight ^ prev->clipLeftRight))
+   {
+      prev->clipTopBottom = current->clipTopBottom;
+      prev->clipLeftRight = current->clipLeftRight;
+      smesa->GlobalFlag |= GFLAG_CLIPPING;
+   }
+}
+
+static void
+sisDDScissor( GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h )
+{
+   if (ctx->Scissor.Enabled)
+      sisUpdateClipping( ctx );
+}
+
+/* =============================================================
+ * Culling
+ */
+
+static void
+sisUpdateCull( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLint cullflag, frontface;
+
+   cullflag = ctx->Polygon.CullFaceMode;
+   frontface = ctx->Polygon.FrontFace;
+
+   smesa->AGPParseSet &= ~(MASK_PsCullDirection_CCW);
+   smesa->dwPrimitiveSet &= ~(MASK_CullDirection);
+
+   if((cullflag == GL_FRONT && frontface == GL_CCW) ||
+      (cullflag == GL_BACK && frontface == GL_CW))
+   {
+      smesa->AGPParseSet |= MASK_PsCullDirection_CCW;
+      smesa->dwPrimitiveSet |= OP_3D_CullDirection_CCW;
+   }
+}
+
+
+static void
+sisDDCullFace( GLcontext *ctx, GLenum mode )
+{
+   sisUpdateCull( ctx );
+}
+
+static void
+sisDDFrontFace( GLcontext *ctx, GLenum mode )
+{
+   sisUpdateCull( ctx );
+}
+
+/* =============================================================
+ * Masks
+ */
+
+static void sisDDColorMask( GLcontext *ctx,
+			    GLboolean r, GLboolean g,
+			    GLboolean b, GLboolean a )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   if (r && g && b && ((ctx->Visual.alphaBits == 0) || a)) {
+      current->hwCapEnable2 &= ~(MASK_AlphaMaskWriteEnable |
+				 MASK_ColorMaskWriteEnable);
+   } else {
+      current->hwCapEnable2 |= (MASK_AlphaMaskWriteEnable |
+                             MASK_ColorMaskWriteEnable);
+
+      current->hwDstMask = (r) ? smesa->redMask : 0 |
+			   (g) ? smesa->greenMask : 0 |
+			   (b) ? smesa->blueMask : 0 |
+			   (a) ? smesa->alphaMask : 0;
+   }
+   
+   if (current->hwDstMask != prev->hwDstMask) {
+      prev->hwDstMask = current->hwDstMask;
+      smesa->GlobalFlag |= GFLAG_DESTSETTING;
+   }
+}
+
+/* =============================================================
+ * Rendering attributes
+ */
+
+static void sisUpdateSpecular(GLcontext *ctx)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *current = &smesa->current;
+
+   if (NEED_SECONDARY_COLOR(ctx))
+      current->hwCapEnable |= MASK_SpecularEnable;
+   else
+      current->hwCapEnable &= ~MASK_SpecularEnable;
+}
+
+static void sisDDLightModelfv(GLcontext *ctx, GLenum pname,
+			      const GLfloat *param)
+{
+   if (pname == GL_LIGHT_MODEL_COLOR_CONTROL) {
+      sisUpdateSpecular(ctx);
+   }
+}
+
+static void sisDDShadeModel( GLcontext *ctx, GLenum mode )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   /* Signal to sisRasterPrimitive to recalculate dwPrimitiveSet */
+   smesa->hw_primitive = -1;
+}
+
+/* =============================================================
+ * Window position
+ */
+
+/* =============================================================
+ * Viewport
+ */
+
+static void sisCalcViewport( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = smesa->hw_viewport;
+
+   /* See also sis_translate_vertex.
+    */
+   m[MAT_SX] =   v[MAT_SX];
+   m[MAT_TX] =   v[MAT_TX] + SUBPIXEL_X;
+   m[MAT_SY] = - v[MAT_SY];
+   m[MAT_TY] = - v[MAT_TY] + smesa->driDrawable->h + SUBPIXEL_Y;
+   m[MAT_SZ] =   v[MAT_SZ] * smesa->depth_scale;
+   m[MAT_TZ] =   v[MAT_TZ] * smesa->depth_scale;
+}
+
+static void sisDDViewport( GLcontext *ctx,
+			   GLint x, GLint y,
+			   GLsizei width, GLsizei height )
+{
+   sisCalcViewport( ctx );
+}
+
+static void sisDDDepthRange( GLcontext *ctx,
+			     GLclampd nearval, GLclampd farval )
+{
+   sisCalcViewport( ctx );
+}
+
+/* =============================================================
+ * Miscellaneous
+ */
+
+static void
+sisDDLogicOpCode( GLcontext *ctx, GLenum opcode )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   current->hwDstSet &= ~MASK_ROP2;
+   switch (opcode)
+   {
+   case GL_CLEAR:
+      current->hwDstSet |= LOP_CLEAR;
+      break;
+   case GL_SET:
+      current->hwDstSet |= LOP_SET;
+      break;
+   case GL_COPY:
+      current->hwDstSet |= LOP_COPY;
+      break;
+   case GL_COPY_INVERTED:
+      current->hwDstSet |= LOP_COPY_INVERTED;
+      break;
+   case GL_NOOP:
+      current->hwDstSet |= LOP_NOOP;
+      break;
+   case GL_INVERT:
+      current->hwDstSet |= LOP_INVERT;
+      break;
+   case GL_AND:
+      current->hwDstSet |= LOP_AND;
+      break;
+   case GL_NAND:
+      current->hwDstSet |= LOP_NAND;
+      break;
+   case GL_OR:
+      current->hwDstSet |= LOP_OR;
+      break;
+   case GL_NOR:
+      current->hwDstSet |= LOP_NOR;
+      break;
+   case GL_XOR:
+      current->hwDstSet |= LOP_XOR;
+      break;
+   case GL_EQUIV:
+      current->hwDstSet |= LOP_EQUIV;
+      break;
+   case GL_AND_REVERSE:
+      current->hwDstSet |= LOP_AND_REVERSE;
+      break;
+   case GL_AND_INVERTED:
+      current->hwDstSet |= LOP_AND_INVERTED;
+      break;
+   case GL_OR_REVERSE:
+      current->hwDstSet |= LOP_OR_REVERSE;
+      break;
+   case GL_OR_INVERTED:
+      current->hwDstSet |= LOP_OR_INVERTED;
+      break;
+   }
+
+   if (current->hwDstSet ^ prev->hwDstSet) {
+      prev->hwDstSet = current->hwDstSet;
+      smesa->GlobalFlag |= GFLAG_DESTSETTING;
+   }
+}
+
+void sisDDDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   current->hwDstSet &= ~MASK_DstBufferPitch;
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      current->hwOffsetDest = smesa->front.offset >> 1;
+      current->hwDstSet |= smesa->front.pitch >> 2;
+      break;
+   case BUFFER_BACK_LEFT:
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      current->hwOffsetDest = smesa->back.offset >> 1;
+      current->hwDstSet |= smesa->back.pitch >> 2;
+      break;
+   default:
+      FALLBACK( smesa, SIS_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   if (current->hwDstSet != prev->hwDstSet) {
+      prev->hwDstSet = current->hwDstSet;
+      smesa->GlobalFlag |= GFLAG_DESTSETTING;
+   }
+
+   if (current->hwOffsetDest != prev->hwOffsetDest) {
+      prev->hwOffsetDest = current->hwOffsetDest;
+      smesa->GlobalFlag |= GFLAG_DESTSETTING;
+   }
+}
+
+/* =============================================================
+ * Polygon stipple
+ */
+
+/* =============================================================
+ * Render mode
+ */
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void
+sisDDEnable( GLcontext * ctx, GLenum cap, GLboolean state )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *current = &smesa->current;
+
+   switch (cap)
+   {
+   case GL_ALPHA_TEST:
+      if (state)
+         current->hwCapEnable |= MASK_AlphaTestEnable;
+      else
+         current->hwCapEnable &= ~MASK_AlphaTestEnable;
+      break;
+   case GL_BLEND:
+      /* TODO: */
+      if (state)
+      /* if (state & !ctx->Color.ColorLogicOpEnabled) */
+         current->hwCapEnable |= MASK_BlendEnable;
+      else
+         current->hwCapEnable &= ~MASK_BlendEnable;
+      break;
+   case GL_CULL_FACE:
+      if (state)
+         current->hwCapEnable |= MASK_CullEnable;
+      else
+         current->hwCapEnable &= ~MASK_CullEnable;
+      break;
+   case GL_DEPTH_TEST:
+      if (state && smesa->depth.offset != 0)
+         current->hwCapEnable |= MASK_ZTestEnable;
+      else
+         current->hwCapEnable &= ~MASK_ZTestEnable;
+      sisDDDepthMask( ctx, ctx->Depth.Mask );
+      break;
+   case GL_DITHER:
+      if (state)
+         current->hwCapEnable |= MASK_DitherEnable;
+      else
+         current->hwCapEnable &= ~MASK_DitherEnable;
+      break;
+   case GL_FOG:
+      if (state)
+         current->hwCapEnable |= MASK_FogEnable;
+      else
+         current->hwCapEnable &= ~MASK_FogEnable;
+      break;
+   case GL_COLOR_LOGIC_OP:
+      if (state)
+         sisDDLogicOpCode( ctx, ctx->Color.LogicOp );
+      else
+         sisDDLogicOpCode( ctx, GL_COPY );
+      break;
+   case GL_SCISSOR_TEST:
+      sisUpdateClipping( ctx );
+      break;
+   case GL_STENCIL_TEST:
+      if (state) {
+         if (smesa->zFormat != SiS_ZFORMAT_S8Z24)
+            FALLBACK(smesa, SIS_FALLBACK_STENCIL, 1);
+         else
+            current->hwCapEnable |= (MASK_StencilTestEnable |
+				     MASK_StencilWriteEnable);
+      } else {
+         FALLBACK(smesa, SIS_FALLBACK_STENCIL, 0);
+         current->hwCapEnable &= ~(MASK_StencilTestEnable |
+				   MASK_StencilWriteEnable);
+      }
+      break;
+   case GL_LIGHTING:
+   case GL_COLOR_SUM_EXT:
+      sisUpdateSpecular(ctx);
+      break;
+   }
+}
+
+
+/* =============================================================
+ * State initialization, management
+ */
+
+/* Called before beginning of rendering. */
+void
+sisUpdateHWState( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   /* enable setting 1 */
+   if (current->hwCapEnable ^ prev->hwCapEnable) {
+      prev->hwCapEnable = current->hwCapEnable;
+      smesa->GlobalFlag |= GFLAG_ENABLESETTING;
+   }
+
+  /* enable setting 2 */
+   if (current->hwCapEnable2 ^ prev->hwCapEnable2) {
+      prev->hwCapEnable2 = current->hwCapEnable2;
+      smesa->GlobalFlag |= GFLAG_ENABLESETTING2;
+   }
+
+   if (smesa->GlobalFlag & GFLAG_RENDER_STATES)
+      sis_update_render_state( smesa );
+
+   if (smesa->GlobalFlag & GFLAG_TEXTURE_STATES)
+      sis_update_texture_state( smesa );
+}
+
+static void
+sisDDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   smesa->NewGLState |= new_state;
+}
+
+/* Initialize the context's hardware state.
+ */
+void sisDDInitState( sisContextPtr smesa )
+{
+   __GLSiSHardware *current = &smesa->current;
+   __GLSiSHardware *prev = &(smesa->prev);
+   GLcontext *ctx = smesa->glCtx;
+
+   /* add Texture Perspective Enable */
+   prev->hwCapEnable = MASK_FogPerspectiveEnable | MASK_TextureCacheEnable |
+      MASK_TexturePerspectiveEnable | MASK_DitherEnable;
+
+   /*
+   prev->hwCapEnable2 = 0x00aa0080;
+   */
+   /* if multi-texture enabled, disable Z pre-test */
+   prev->hwCapEnable2 = MASK_TextureMipmapBiasEnable;
+
+   /* Z test mode is LESS */
+   prev->hwZ = SiS_Z_COMP_S_LT_B;
+
+   /* Depth mask */
+   prev->hwZMask = 0xffffffff;
+
+   /* Alpha test mode is ALWAYS, alpha ref value is 0 */
+   prev->hwAlpha = SiS_ALPHA_ALWAYS;
+
+   /* ROP2 is COPYPEN */
+   prev->hwDstSet = LOP_COPY;
+
+   /* color mask */
+   prev->hwDstMask = 0xffffffff;
+
+   /* LinePattern is 0, Repeat Factor is 0 */
+   prev->hwLinePattern = 0x00008000;
+
+   /* Src blend is BLEND_ONE, Dst blend is D3DBLEND_ZERO */
+   prev->hwDstSrcBlend = SiS_S_ONE | SiS_D_ZERO;
+
+   /* Stenciling disabled, function ALWAYS, ref value zero, mask all ones */
+   prev->hwStSetting = STENCIL_FORMAT_8 | SiS_STENCIL_ALWAYS | 0xff;
+   /* Op is KEEP for all three operations */
+   prev->hwStSetting2 = SiS_SFAIL_KEEP | SiS_SPASS_ZFAIL_KEEP | 
+      SiS_SPASS_ZPASS_KEEP;
+
+   /* Texture mapping mode is Tile */
+#if 0
+   prev->texture[0].hwTextureSet = 0x00030000;
+#endif
+   /* Magnified & minified texture filter is NEAREST */
+#if 0
+   prev->texture[0].hwTextureMip = 0;
+#endif
+
+   /* Texture Blending setting -- use fragment color/alpha*/
+   prev->hwTexBlendColor0 = STAGE0_C_CF;
+   prev->hwTexBlendColor1 = STAGE1_C_CF;
+   prev->hwTexBlendAlpha0 = STAGE0_A_AF;
+   prev->hwTexBlendAlpha1 = STAGE1_A_AF;
+   
+   switch (smesa->bytesPerPixel)
+   {
+   case 2:
+      prev->hwDstSet |= DST_FORMAT_RGB_565;
+      break;
+   case 4:
+      prev->hwDstSet |= DST_FORMAT_ARGB_8888;
+      break;
+   }
+
+   switch (ctx->Visual.depthBits)
+   {
+   case 0:
+      prev->hwCapEnable &= ~MASK_ZWriteEnable;
+   case 16:
+      smesa->zFormat = SiS_ZFORMAT_Z16;
+      prev->hwCapEnable |= MASK_ZWriteEnable;
+      smesa->depth_scale = 1.0 / (GLfloat)0xffff;
+      break;
+   case 32:
+      smesa->zFormat = SiS_ZFORMAT_Z32;
+      prev->hwCapEnable |= MASK_ZWriteEnable;
+      smesa->depth_scale = 1.0 / (GLfloat)0xffffffff;
+      break;
+   case 24:
+      assert (ctx->Visual.stencilBits);
+      smesa->zFormat = SiS_ZFORMAT_S8Z24;
+      prev->hwCapEnable |= MASK_StencilBufferEnable;
+      prev->hwCapEnable |= MASK_ZWriteEnable;
+      smesa->depth_scale = 1.0 / (GLfloat)0xffffff;
+      break;
+   }
+
+   prev->hwZ |= smesa->zFormat;
+
+   /* TODO: need to clear cache? */
+   smesa->clearTexCache = GL_TRUE;
+
+   smesa->clearColorPattern = 0;
+
+   smesa->AGPParseSet = MASK_PsTexture1FromB | MASK_PsBumpTextureFromC;
+   smesa->dwPrimitiveSet = OP_3D_Texture1FromB | OP_3D_TextureBumpFromC;
+
+   sisUpdateZStencilPattern( smesa, 1.0, 0 );
+   sisUpdateCull( ctx );
+
+   memcpy( current, prev, sizeof (__GLSiSHardware) );
+
+   /* Set initial fog settings. Start and end are the same case.  */
+   sisDDFogfv( ctx, GL_FOG_DENSITY, &ctx->Fog.Density );
+   sisDDFogfv( ctx, GL_FOG_END, &ctx->Fog.End );
+   sisDDFogfv( ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL );
+   sisDDFogfv( ctx, GL_FOG_MODE, NULL );
+}
+
+/* Initialize the driver's state functions.
+ */
+void sisDDInitStateFuncs( GLcontext *ctx )
+{
+   ctx->Driver.UpdateState	 = sisDDInvalidateState;
+
+   ctx->Driver.Clear		 = sisDDClear;
+   ctx->Driver.ClearColor	 = sisDDClearColor;
+   ctx->Driver.ClearDepth	 = sisDDClearDepth;
+   ctx->Driver.ClearStencil	 = sisDDClearStencil;
+
+   ctx->Driver.AlphaFunc	 = sisDDAlphaFunc;
+   ctx->Driver.BlendFuncSeparate = sisDDBlendFuncSeparate;
+   ctx->Driver.ColorMask	 = sisDDColorMask;
+   ctx->Driver.CullFace		 = sisDDCullFace;
+   ctx->Driver.DepthMask	 = sisDDDepthMask;
+   ctx->Driver.DepthFunc	 = sisDDDepthFunc;
+   ctx->Driver.DepthRange	 = sisDDDepthRange;
+   ctx->Driver.DrawBuffer	 = sisDDDrawBuffer;
+   ctx->Driver.Enable		 = sisDDEnable;
+   ctx->Driver.FrontFace	 = sisDDFrontFace;
+   ctx->Driver.Fogfv		 = sisDDFogfv;
+   ctx->Driver.Hint		 = NULL;
+   ctx->Driver.Lightfv		 = NULL;
+   ctx->Driver.LogicOpcode	 = sisDDLogicOpCode;
+   ctx->Driver.PolygonMode	 = NULL;
+   ctx->Driver.PolygonStipple	 = NULL;
+   ctx->Driver.ReadBuffer	 = NULL;
+   ctx->Driver.RenderMode	 = NULL;
+   ctx->Driver.Scissor		 = sisDDScissor;
+   ctx->Driver.ShadeModel	 = sisDDShadeModel;
+   ctx->Driver.LightModelfv	 = sisDDLightModelfv;
+   ctx->Driver.Viewport		 = sisDDViewport;
+
+   /* XXX this should go away */
+   ctx->Driver.ResizeBuffers	 = sisReAllocateBuffers;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_state.h b/src/mesa/drivers/dri/sis/sis_state.h
new file mode 100644
index 0000000000..2d0ea9c5fb
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_state.h
@@ -0,0 +1,68 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_STATE_H__
+#define __SIS_STATE_H__
+
+#include "sis_context.h"
+
+/* sis6326_clear.c */
+extern void sis6326DDClear( GLcontext *ctx, GLbitfield mask );
+extern void sis6326DDClearColor( GLcontext * ctx, const GLfloat color[4] );
+extern void sis6326DDClearDepth( GLcontext * ctx, GLclampd d );
+extern void sis6326UpdateZPattern(sisContextPtr smesa, GLclampd z);
+
+/* sis_clear.c */
+extern void sisDDClear( GLcontext *ctx, GLbitfield mask );
+extern void sisDDClearColor( GLcontext * ctx, const GLfloat color[4] );
+extern void sisDDClearDepth( GLcontext * ctx, GLclampd d );
+extern void sisDDClearStencil( GLcontext * ctx, GLint s );
+extern void sisUpdateZStencilPattern( sisContextPtr smesa, GLclampd z,
+				      int stencil );
+
+/* sis_fog.c */
+extern void sisDDFogfv( GLcontext * ctx, GLenum pname, const GLfloat * params );
+
+/* sis6326_state.c */
+extern void sis6326DDInitState( sisContextPtr smesa );
+extern void sis6326DDInitStateFuncs( GLcontext *ctx );
+extern void sis6326UpdateClipping( GLcontext * gc );
+extern void sis6326DDDrawBuffer( GLcontext *ctx, GLenum mode );
+extern void sis6326UpdateHWState( GLcontext *ctx );
+
+/* sis_state.c */
+extern void sisDDInitState( sisContextPtr smesa );
+extern void sisDDInitStateFuncs( GLcontext *ctx );
+extern void sisDDDepthMask( GLcontext * ctx, GLboolean flag );
+extern void sisUpdateClipping( GLcontext * gc );
+extern void sisDDDrawBuffer( GLcontext *ctx, GLenum mode );
+extern void sisUpdateHWState( GLcontext *ctx );
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis_stencil.c b/src/mesa/drivers/dri/sis/sis_stencil.c
new file mode 100644
index 0000000000..55c0440eba
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_stencil.c
@@ -0,0 +1,205 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "sis_context.h"
+#include "sis_state.h"
+#include "sis_stencil.h"
+
+static void
+sisDDStencilFuncSeparate( GLcontext * ctx, GLenum face,
+                          GLenum func, GLint ref, GLuint mask )
+{
+  sisContextPtr smesa = SIS_CONTEXT(ctx);
+  __GLSiSHardware *prev = &smesa->prev;
+  __GLSiSHardware *current = &smesa->current;
+
+   /* set reference */ 
+   current->hwStSetting = (STENCIL_FORMAT_8 | 
+			   ((ctx->Stencil.Ref[0] & 0xff) << 8) |
+			   (ctx->Stencil.ValueMask[0] & 0xff));
+
+  switch (func)
+    {
+    case GL_NEVER:
+      current->hwStSetting |= SiS_STENCIL_NEVER;
+      break;
+    case GL_LESS:
+      current->hwStSetting |= SiS_STENCIL_LESS;
+      break;
+    case GL_EQUAL:
+      current->hwStSetting |= SiS_STENCIL_EQUAL;
+      break;
+    case GL_LEQUAL:
+      current->hwStSetting |= SiS_STENCIL_LEQUAL;
+      break;
+    case GL_GREATER:
+      current->hwStSetting |= SiS_STENCIL_GREATER;
+      break;
+    case GL_NOTEQUAL:
+      current->hwStSetting |= SiS_STENCIL_NOTEQUAL;
+      break;
+    case GL_GEQUAL:
+      current->hwStSetting |= SiS_STENCIL_GEQUAL;
+      break;
+    case GL_ALWAYS:
+      current->hwStSetting |= SiS_STENCIL_ALWAYS;
+      break;
+    }
+
+   if (current->hwStSetting != prev->hwStSetting)
+   {
+      prev->hwStSetting = current->hwStSetting;
+
+      smesa->GlobalFlag |= GFLAG_STENCILSETTING;
+   }
+}
+
+static void
+sisDDStencilMaskSeparate( GLcontext * ctx, GLenum face, GLuint mask )
+{
+  if (!ctx->Visual.stencilBits)
+    return;
+
+  /* set Z buffer Write Enable */
+  sisDDDepthMask (ctx, ctx->Depth.Mask);
+}
+
+static void
+sisDDStencilOpSeparate( GLcontext * ctx, GLenum face, GLenum fail,
+                        GLenum zfail, GLenum zpass )
+{
+  sisContextPtr smesa = SIS_CONTEXT(ctx);
+  __GLSiSHardware *prev = &smesa->prev;
+  __GLSiSHardware *current = &smesa->current;
+
+   current->hwStSetting2 &= ~(MASK_StencilZPassOp | MASK_StencilZFailOp |
+      MASK_StencilFailOp);
+
+  switch (fail)
+    {
+    case GL_KEEP:
+      current->hwStSetting2 |= SiS_SFAIL_KEEP;
+      break;
+    case GL_ZERO:
+      current->hwStSetting2 |= SiS_SFAIL_ZERO;
+      break;
+    case GL_REPLACE:
+      current->hwStSetting2 |= SiS_SFAIL_REPLACE;
+      break;
+    case GL_INVERT:
+      current->hwStSetting2 |= SiS_SFAIL_INVERT;
+      break;
+    case GL_INCR:
+      current->hwStSetting2 |= SiS_SFAIL_INCR;
+      break;
+    case GL_DECR:
+      current->hwStSetting2 |= SiS_SFAIL_DECR;
+      break;
+    case GL_INCR_WRAP:
+      current->hwStSetting2 |= SiS_SFAIL_INCR_WRAP;
+      break;
+    case GL_DECR_WRAP:
+      current->hwStSetting2 |= SiS_SFAIL_DECR_WRAP;
+      break;
+    }
+
+  switch (zfail)
+    {
+    case GL_KEEP:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_KEEP;
+      break;
+    case GL_ZERO:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_ZERO;
+      break;
+    case GL_REPLACE:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_REPLACE;
+      break;
+    case GL_INVERT:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_INVERT;
+      break;
+    case GL_INCR:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_INCR;
+      break;
+    case GL_DECR:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_DECR;
+      break;
+    case GL_INCR_WRAP:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_INCR_WRAP;
+      break;
+    case GL_DECR_WRAP:
+      current->hwStSetting2 |= SiS_SPASS_ZFAIL_DECR_WRAP;
+      break;
+    }
+
+  switch (zpass)
+    {
+    case GL_KEEP:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_KEEP;
+      break;
+    case GL_ZERO:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_ZERO;
+      break;
+    case GL_REPLACE:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_REPLACE;
+      break;
+    case GL_INVERT:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_INVERT;
+      break;
+    case GL_INCR:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_INCR;
+      break;
+    case GL_DECR:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_DECR;
+      break;
+    case GL_INCR_WRAP:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_INCR_WRAP;
+      break;
+    case GL_DECR_WRAP:
+      current->hwStSetting2 |= SiS_SPASS_ZPASS_DECR_WRAP;
+      break;
+    }
+
+   if (current->hwStSetting2 != prev->hwStSetting2)
+   {
+      prev->hwStSetting2 = current->hwStSetting2;
+      smesa->GlobalFlag |= GFLAG_STENCILSETTING;
+   }
+}
+
+void
+sisDDInitStencilFuncs( GLcontext *ctx )
+{
+  ctx->Driver.StencilFuncSeparate = sisDDStencilFuncSeparate;
+  ctx->Driver.StencilMaskSeparate = sisDDStencilMaskSeparate;
+  ctx->Driver.StencilOpSeparate   = sisDDStencilOpSeparate;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_stencil.h b/src/mesa/drivers/dri/sis/sis_stencil.h
new file mode 100644
index 0000000000..6b556c4378
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_stencil.h
@@ -0,0 +1,36 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_STENCIL_H__
+#define __SIS_STENCIL_H__
+
+extern void sisDDInitStencilFuncs( GLcontext *ctx );
+
+#endif
diff --git a/src/mesa/drivers/dri/sis/sis_tex.c b/src/mesa/drivers/dri/sis/sis_tex.c
new file mode 100644
index 0000000000..31709c3af6
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_tex.c
@@ -0,0 +1,568 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "swrast/swrast.h"
+#include "main/imports.h"
+#include "main/texstore.h"
+#include "main/texobj.h"
+
+#include "sis_context.h"
+#include "sis_alloc.h"
+#include "sis_tex.h"
+#include "xmlpool.h"
+
+#define ALIGN(value, align) (GLubyte *)((long)(value + align - 1) & ~(align - 1))
+
+#define TEXTURE_HW_ALIGNMENT 4
+#define TEXTURE_HW_PLUS (4 + 4)
+
+static sisTexObjPtr
+sisAllocTexObj( struct gl_texture_object *texObj )
+{
+   sisTexObjPtr t;
+
+   t = (sisTexObjPtr) CALLOC_STRUCT( sis_tex_obj );
+   texObj->DriverData = t;
+   return t;
+}
+
+static void
+sisAllocTexImage( sisContextPtr smesa, sisTexObjPtr t, int level,
+		  const struct gl_texture_image *image )
+{
+   char *addr;
+   int size, texel_size;
+
+   if (t->format == 0) {
+      t->format = image->_BaseFormat;
+      switch (image->TexFormat)
+      {
+      case MESA_FORMAT_ARGB8888:
+         t->hwformat = TEXEL_ARGB_8888_32;
+         break;
+      case MESA_FORMAT_ARGB4444:
+         t->hwformat = TEXEL_ARGB_4444_16;
+         break;
+      case MESA_FORMAT_ARGB1555:
+         t->hwformat = TEXEL_ARGB_1555_16;
+         break;
+      case MESA_FORMAT_RGB565:
+         t->hwformat = TEXEL_RGB_565_16;
+         break;
+      case MESA_FORMAT_RGB332:
+         t->hwformat = TEXEL_RGB_332_8;
+         break;
+      case MESA_FORMAT_I8:
+         t->hwformat = TEXEL_I8;
+         break;
+      case MESA_FORMAT_A8:
+         t->hwformat = TEXEL_A8;
+         break;
+      case MESA_FORMAT_L8:
+         t->hwformat = TEXEL_L8;
+         break;
+      case MESA_FORMAT_AL88:
+         t->hwformat = TEXEL_AL88;
+         break;
+      case MESA_FORMAT_YCBCR:
+         t->hwformat = TEXEL_YUV422;
+         break;
+      case MESA_FORMAT_YCBCR_REV:
+         t->hwformat = TEXEL_VUY422;
+         break;
+      default:
+         sis_fatal_error("Bad texture format 0x%x.\n", image->TexFormat);
+      }
+   }
+   assert(t->format == image->_BaseFormat);
+
+   texel_size = _mesa_get_format_bytes(image->TexFormat);
+   size = image->Width * image->Height * texel_size + TEXTURE_HW_PLUS;
+
+   addr = sisAllocFB( smesa, size, &t->image[level].handle );
+   if (addr == NULL) {
+      addr = sisAllocAGP( smesa, size, &t->image[level].handle );
+      if (addr == NULL)
+         sis_fatal_error("Failure to allocate texture memory.\n");
+      t->image[level].memType = AGP_TYPE;
+   }
+   else
+      t->image[level].memType = VIDEO_TYPE;
+   
+   t->image[level].Data = ALIGN(addr, TEXTURE_HW_ALIGNMENT);
+   t->image[level].pitch = image->Width * texel_size;
+   t->image[level].size = image->Width * image->Height * texel_size;
+   t->numImages++;
+}
+
+static void
+sisFreeTexImage( sisContextPtr smesa, sisTexObjPtr t, int level )
+{
+   assert(level >= 0);
+   assert(level < SIS_MAX_TEXTURE_LEVELS);
+   if (t->image[level].Data == NULL)
+      return;
+
+   switch (t->image[level].memType)
+   {
+   case VIDEO_TYPE:
+      sisFreeFB( smesa, t->image[level].handle );
+      break;
+   case AGP_TYPE:
+      sisFreeAGP( smesa, t->image[level].handle );
+      break;
+   }
+   t->image[level].Data = NULL;
+   t->image[level].handle = NULL;
+   /* If there are no textures loaded any more, reset the hw format so the 
+    * object can be reused for new formats
+    */
+   t->numImages--;
+   if (t->numImages == 0) {
+      t->format = 0;
+      t->hwformat = 0;
+   }
+}
+
+static void 
+sisTexEnv( GLcontext *ctx, GLenum target, GLenum pname, const GLfloat *param )
+{
+  sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+  smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURE_ENV;
+}
+
+static void
+sisTexParameter( GLcontext *ctx, GLenum target,
+                 struct gl_texture_object *texObj, GLenum pname,
+                 const GLfloat *params )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURING;
+}
+
+static void
+sisBindTexture( GLcontext *ctx, GLenum target,
+                struct gl_texture_object *texObj )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   sisTexObjPtr t;
+
+   if ( target == GL_TEXTURE_2D || target == GL_TEXTURE_1D ) {
+      if ( texObj->DriverData == NULL ) {
+         sisAllocTexObj( texObj );
+      }
+   }
+
+   t = texObj->DriverData;
+   if (!t)
+      return;
+
+   if (smesa->PrevTexFormat[ctx->Texture.CurrentUnit] != t->format) {
+      smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURE_ENV;
+      smesa->PrevTexFormat[ctx->Texture.CurrentUnit] = t->format;
+   }
+   smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURING;
+}
+
+static void
+sisDeleteTexture( GLcontext * ctx, struct gl_texture_object *texObj )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   sisTexObjPtr t;
+   int i;
+
+   smesa->clearTexCache = GL_TRUE;
+
+   t = texObj->DriverData;
+   if (t == NULL) {
+      /* 
+       * this shows the texture is default object and never be a 
+       * argument of sisTexImage*
+       */
+      return;
+   }
+   for (i = 0; i < SIS_MAX_TEXTURE_LEVELS; i++) {
+      sisFreeTexImage( smesa, t, i );
+   }
+
+   FREE(t);
+   texObj->DriverData = NULL;
+   /* Free mipmap images and the texture object itself */
+   _mesa_delete_texture_object(ctx, texObj);
+}
+
+static GLboolean sisIsTextureResident( GLcontext * ctx,
+			 		 struct gl_texture_object *texObj )
+{
+  return (texObj->DriverData != NULL);
+}
+
+static gl_format
+sisChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+			  GLenum format, GLenum type )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   const GLboolean do32bpt =
+       (smesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32);
+   const GLboolean force16bpt =
+       (smesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16);
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      switch ( type ) {
+      case GL_UNSIGNED_INT_10_10_10_2:
+      case GL_UNSIGNED_INT_2_10_10_10_REV:
+	 return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB1555;
+      case GL_UNSIGNED_SHORT_4_4_4_4:
+      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+	 return MESA_FORMAT_ARGB4444;
+      case GL_UNSIGNED_SHORT_5_5_5_1:
+      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+	 return MESA_FORMAT_ARGB1555;
+      default:
+         return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+      }
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      switch ( type ) {
+      case GL_UNSIGNED_SHORT_4_4_4_4:
+      case GL_UNSIGNED_SHORT_4_4_4_4_REV:
+	 return MESA_FORMAT_ARGB4444;
+      case GL_UNSIGNED_SHORT_5_5_5_1:
+      case GL_UNSIGNED_SHORT_1_5_5_5_REV:
+	 return MESA_FORMAT_ARGB1555;
+      case GL_UNSIGNED_SHORT_5_6_5:
+      case GL_UNSIGNED_SHORT_5_6_5_REV:
+	 return MESA_FORMAT_RGB565;
+      default:
+         return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+      }
+
+   case GL_RGBA8:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return !force16bpt ?
+	  MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+
+   case GL_RGB10_A2:
+      return !force16bpt ?
+	  MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB1555;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return MESA_FORMAT_ARGB4444;
+
+   case GL_RGB5_A1:
+      return MESA_FORMAT_ARGB1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return !force16bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+
+   case GL_RGB5:
+   case GL_RGB4:
+      return MESA_FORMAT_RGB565;
+
+   case GL_R3_G3_B2:
+      return MESA_FORMAT_RGB332;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:		/* FIXME: This could use its own texstore */
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return MESA_FORMAT_A8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:		/* FIXME: This could use its own texstore */
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return MESA_FORMAT_L8;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:	/* FIXME: This could use its own texstore */
+   case GL_LUMINANCE6_ALPHA2:	/* FIXME: This could use its own texstore */
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:	/* FIXME: This could use its own texstore */
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return MESA_FORMAT_AL88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return MESA_FORMAT_I8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_APPLE ||
+          type == GL_UNSIGNED_BYTE)
+         return MESA_FORMAT_YCBCR;
+      else
+         return MESA_FORMAT_YCBCR_REV;
+
+   default:
+      _mesa_problem(ctx, "unexpected format in sisDDChooseTextureFormat: %d",
+         internalFormat);
+      return MESA_FORMAT_NONE;
+   }
+}
+
+static void sisTexImage1D( GLcontext *ctx, GLenum target, GLint level,
+			     GLint internalFormat,
+			     GLint width, GLint border,
+			     GLenum format, GLenum type, const GLvoid *pixels,
+			     const struct gl_pixelstore_attrib *packing,
+			     struct gl_texture_object *texObj,
+			     struct gl_texture_image *texImage )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   sisTexObjPtr t;
+
+   if ( texObj->DriverData == NULL )
+      sisAllocTexObj( texObj );
+   t = texObj->DriverData;
+
+   /* Note, this will call sisChooseTextureFormat */
+   _mesa_store_teximage1d( ctx, target, level, internalFormat,
+			   width, border, format, type,
+			   pixels, packing, texObj, texImage );
+
+   /* Allocate offscreen space for the texture */
+   sisFreeTexImage(smesa, t, level);
+   sisAllocTexImage(smesa, t, level, texImage);
+
+   /* Upload the texture */
+   WaitEngIdle(smesa);
+   memcpy(t->image[level].Data, texImage->Data, t->image[level].size);
+   
+   if (smesa->PrevTexFormat[ctx->Texture.CurrentUnit] != t->format)
+   {
+      smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURE_ENV;
+      smesa->PrevTexFormat[ctx->Texture.CurrentUnit] = t->format;
+   }
+   smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURING;
+}
+
+
+static void sisTexSubImage1D( GLcontext *ctx,
+				GLenum target,
+				GLint level,
+				GLint xoffset,
+				GLsizei width,
+				GLenum format, GLenum type,
+				const GLvoid *pixels,
+				const struct gl_pixelstore_attrib *packing,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   sisTexObjPtr t;
+   GLuint copySize;
+   GLint texelBytes;
+   const char *src;
+   GLubyte *dst;
+
+   if ( texObj->DriverData == NULL )
+      sisAllocTexObj( texObj );
+   t = texObj->DriverData;
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+			     format, type, pixels, packing, texObj,
+			     texImage);
+
+   /* Allocate offscreen space for the texture */
+   sisFreeTexImage(smesa, t, level);
+   sisAllocTexImage(smesa, t, level, texImage);
+
+   /* Upload the texture */
+   WaitEngIdle(smesa);
+   texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+
+   copySize = width * texelBytes;
+   src = (char *)texImage->Data + xoffset * texelBytes;
+   dst = t->image[level].Data + xoffset * texelBytes;
+
+   memcpy( dst, src, copySize );
+
+   smesa->clearTexCache = GL_TRUE;
+
+   if (smesa->PrevTexFormat[ctx->Texture.CurrentUnit] != t->format)
+   {
+      smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURE_ENV;
+      smesa->PrevTexFormat[ctx->Texture.CurrentUnit] = t->format;
+   }
+   smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURING;
+}
+
+static void sisTexImage2D( GLcontext *ctx, GLenum target, GLint level,
+			     GLint internalFormat,
+			     GLint width, GLint height, GLint border,
+			     GLenum format, GLenum type, const GLvoid *pixels,
+			     const struct gl_pixelstore_attrib *packing,
+			     struct gl_texture_object *texObj,
+			     struct gl_texture_image *texImage )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   sisTexObjPtr t;
+
+   if ( texObj->DriverData == NULL )
+      sisAllocTexObj( texObj );
+   t = texObj->DriverData;
+
+   /* Note, this will call sisChooseTextureFormat */
+   _mesa_store_teximage2d(ctx, target, level, internalFormat,
+                          width, height, border, format, type, pixels,
+                          &ctx->Unpack, texObj, texImage);
+
+   /* Allocate offscreen space for the texture */
+   sisFreeTexImage(smesa, t, level);
+   sisAllocTexImage(smesa, t, level, texImage);
+
+   /* Upload the texture */
+   WaitEngIdle(smesa);
+   memcpy(t->image[level].Data, texImage->Data, t->image[level].size);
+   
+   if (smesa->PrevTexFormat[ctx->Texture.CurrentUnit] != t->format)
+   {
+      smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURE_ENV;
+      smesa->PrevTexFormat[ctx->Texture.CurrentUnit] = t->format;
+   }
+   smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURING;
+}
+
+static void sisTexSubImage2D( GLcontext *ctx,
+				GLenum target,
+				GLint level,
+				GLint xoffset, GLint yoffset,
+				GLsizei width, GLsizei height,
+				GLenum format, GLenum type,
+				const GLvoid *pixels,
+				const struct gl_pixelstore_attrib *packing,
+				struct gl_texture_object *texObj,
+				struct gl_texture_image *texImage )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   sisTexObjPtr t;
+   GLuint copySize;
+   GLint texelBytes;
+   const char *src;
+   GLubyte *dst;
+   int j;
+   GLuint soffset;
+
+   if ( texObj->DriverData == NULL )
+      sisAllocTexObj( texObj );
+   t = texObj->DriverData;
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+
+   /* Allocate offscreen space for the texture */
+   sisFreeTexImage(smesa, t, level);
+   sisAllocTexImage(smesa, t, level, texImage);
+
+   /* Upload the texture */
+   WaitEngIdle(smesa);
+   texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+
+   copySize = width * texelBytes;
+   src = (char *)texImage->Data + (xoffset + yoffset * texImage->Width) *
+      texelBytes;
+   dst = t->image[level].Data + (xoffset + yoffset * texImage->Width) *
+      texelBytes;
+   soffset = texImage->Width * texelBytes;
+
+   for (j = yoffset; j < yoffset + height; j++) {
+      memcpy( dst, src, copySize );
+      src += soffset;
+      dst += soffset;
+   }
+
+   smesa->clearTexCache = GL_TRUE;
+
+   if (smesa->PrevTexFormat[ctx->Texture.CurrentUnit] != t->format)
+   {
+      smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURE_ENV;
+      smesa->PrevTexFormat[ctx->Texture.CurrentUnit] = t->format;
+   }
+   smesa->TexStates[ctx->Texture.CurrentUnit] |= NEW_TEXTURING;
+}
+
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+sisNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj;
+   obj = _mesa_new_texture_object(ctx, name, target);
+   return obj;
+}
+
+
+void sisInitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->TexEnv			= sisTexEnv;
+   functions->ChooseTextureFormat	= sisChooseTextureFormat;
+   functions->TexImage1D		= sisTexImage1D;
+   functions->TexSubImage1D		= sisTexSubImage1D;
+   functions->TexImage2D		= sisTexImage2D;
+   functions->TexSubImage2D		= sisTexSubImage2D;
+   functions->TexParameter		= sisTexParameter;
+   functions->BindTexture		= sisBindTexture;
+   functions->NewTextureObject		= sisNewTextureObject;
+   functions->DeleteTexture		= sisDeleteTexture;
+   functions->IsTextureResident	= sisIsTextureResident;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_tex.h b/src/mesa/drivers/dri/sis/sis_tex.h
new file mode 100644
index 0000000000..c499e80e86
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_tex.h
@@ -0,0 +1,37 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_TEX_H__
+#define __SIS_TEX_H__
+
+extern void sisInitTextureFuncs( struct dd_function_table *table );
+extern void sisUpdateTextureState( GLcontext *ctx );
+
+#endif /* __SIS_TEX_H__ */
diff --git a/src/mesa/drivers/dri/sis/sis_texstate.c b/src/mesa/drivers/dri/sis/sis_texstate.c
new file mode 100644
index 0000000000..7b0eebd066
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_texstate.c
@@ -0,0 +1,712 @@
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/macros.h"
+
+#include "sis_context.h"
+#include "sis_tex.h"
+#include "sis_tris.h"
+#include "sis_alloc.h"
+
+static GLint TransferTexturePitch (GLint dwPitch);
+
+/* Handle texenv stuff, called from validate_texture (renderstart) */
+static void
+sis_set_texture_env0( GLcontext *ctx, struct gl_texture_object *texObj,
+   int unit )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLubyte c[4];
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   struct gl_texture_unit *texture_unit = &ctx->Texture.Unit[unit];
+
+   sisTexObjPtr t = texObj->DriverData;
+
+   switch (texture_unit->EnvMode)
+   {
+   case GL_REPLACE:
+      switch (t->format)
+      {
+      case GL_ALPHA:
+         current->hwTexBlendColor0 = STAGE0_C_CF;
+         current->hwTexBlendAlpha0 = STAGE0_A_AS;
+         break;
+      case GL_LUMINANCE:
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor0 = STAGE0_C_CS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AF;
+         break;
+      case GL_INTENSITY:
+      case GL_LUMINANCE_ALPHA:
+      case GL_RGBA:
+         current->hwTexBlendColor0 = STAGE0_C_CS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AS;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   case GL_MODULATE:
+      switch (t->format)
+      {
+      case GL_ALPHA:
+         current->hwTexBlendColor0 = STAGE0_C_CF;
+         current->hwTexBlendAlpha0 = STAGE0_A_AFAS;
+         break;
+      case GL_LUMINANCE:
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor0 = STAGE0_C_CFCS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AF;
+         break;
+      case GL_INTENSITY:
+      case GL_LUMINANCE_ALPHA:
+      case GL_RGBA:
+         current->hwTexBlendColor0 = STAGE0_C_CFCS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AFAS;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   case GL_DECAL:
+      switch (t->format)
+      {
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor0 = STAGE0_C_CS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AF;
+         break;
+      case GL_RGBA:
+         current->hwTexBlendColor0 = STAGE0_C_CFOMAS_CSAS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AF;
+         break;
+      case GL_ALPHA:
+      case GL_LUMINANCE:
+      case GL_INTENSITY:
+      case GL_LUMINANCE_ALPHA:
+         current->hwTexBlendColor0 = STAGE0_C_CF;
+         current->hwTexBlendAlpha0 = STAGE0_A_AF;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   case GL_BLEND:
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN(c, texture_unit->EnvColor);
+      current->hwTexEnvColor = ((GLint) (c[3])) << 24 |
+			       ((GLint) (c[0])) << 16 |
+			       ((GLint) (c[1])) << 8 |
+			       ((GLint) (c[2]));
+      switch (t->format)
+      {
+      case GL_ALPHA:
+         current->hwTexBlendColor0 = STAGE0_C_CF;
+         current->hwTexBlendAlpha0 = STAGE0_A_AFAS;
+         break;
+      case GL_LUMINANCE:
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor0 = STAGE0_C_CFOMCS_CCCS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AF;
+         break;
+      case GL_INTENSITY:
+         current->hwTexBlendColor0 = STAGE0_C_CFOMCS_CCCS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AFOMAS_ACAS;
+         break;
+      case GL_LUMINANCE_ALPHA:
+      case GL_RGBA:
+         current->hwTexBlendColor0 = STAGE0_C_CFOMCS_CCCS;
+         current->hwTexBlendAlpha0 = STAGE0_A_AFAS;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   default:
+      sis_fatal_error("unknown env mode 0x%x\n", texture_unit->EnvMode);
+   }
+
+   if ((current->hwTexBlendColor0 != prev->hwTexBlendColor0) ||
+       (current->hwTexBlendAlpha0 != prev->hwTexBlendAlpha0) ||
+       (current->hwTexEnvColor != prev->hwTexEnvColor))
+   {
+      prev->hwTexEnvColor = current->hwTexEnvColor;
+      prev->hwTexBlendColor0 = current->hwTexBlendColor0;
+      prev->hwTexBlendAlpha0 = current->hwTexBlendAlpha0;
+      smesa->GlobalFlag |= GFLAG_TEXTUREENV;
+   }
+}
+
+/* Handle texenv stuff, called from validate_texture (renderstart) */
+static void
+sis_set_texture_env1( GLcontext *ctx, struct gl_texture_object *texObj,
+   int unit)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLubyte c[4];
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   struct gl_texture_unit *texture_unit = &ctx->Texture.Unit[unit];
+
+   sisTexObjPtr t = texObj->DriverData;
+
+   switch (texture_unit->EnvMode)
+   {
+   case GL_REPLACE:
+      switch (t->format)
+      {
+      case GL_ALPHA:
+         current->hwTexBlendColor1 = STAGE1_C_CF;
+         current->hwTexBlendAlpha1 = STAGE1_A_AS;
+         break;
+      case GL_LUMINANCE:
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor1 = STAGE1_C_CS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AF;
+         break;
+      case GL_INTENSITY:
+      case GL_LUMINANCE_ALPHA:
+      case GL_RGBA:
+         current->hwTexBlendColor1 = STAGE1_C_CS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AS;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   case GL_MODULATE:
+      switch (t->format)
+      {
+      case GL_ALPHA:
+         current->hwTexBlendColor1 = STAGE1_C_CF;
+         current->hwTexBlendAlpha1 = STAGE1_A_AFAS;
+         break;
+      case GL_LUMINANCE:
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor1 = STAGE1_C_CFCS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AF;
+         break;
+      case GL_INTENSITY:
+      case GL_LUMINANCE_ALPHA:
+      case GL_RGBA:
+         current->hwTexBlendColor1 = STAGE1_C_CFCS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AFAS;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   case GL_DECAL:
+      switch (t->format)
+      {
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor1 = STAGE1_C_CS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AF;
+         break;
+      case GL_RGBA:
+         current->hwTexBlendColor1 = STAGE1_C_CFOMAS_CSAS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AF;
+         break;
+      case GL_ALPHA:
+      case GL_LUMINANCE:
+      case GL_INTENSITY:
+      case GL_LUMINANCE_ALPHA:
+         current->hwTexBlendColor1 = STAGE1_C_CF;
+         current->hwTexBlendAlpha1 = STAGE1_A_AF;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   case GL_BLEND:
+      UNCLAMPED_FLOAT_TO_RGBA_CHAN(c, texture_unit->EnvColor);
+      current->hwTexEnvColor = ((GLint) (c[3])) << 24 |
+			       ((GLint) (c[0])) << 16 |
+			       ((GLint) (c[1])) << 8 |
+			       ((GLint) (c[2]));
+      switch (t->format)
+      {
+      case GL_ALPHA:
+         current->hwTexBlendColor1 = STAGE1_C_CF;
+         current->hwTexBlendAlpha1 = STAGE1_A_AFAS;
+         break;
+      case GL_LUMINANCE:
+      case GL_RGB:
+      case GL_YCBCR_MESA:
+         current->hwTexBlendColor1 = STAGE1_C_CFOMCS_CCCS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AF;
+         break;
+      case GL_INTENSITY:
+         current->hwTexBlendColor1 = STAGE1_C_CFOMCS_CCCS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AFOMAS_ACAS;
+         break;
+      case GL_LUMINANCE_ALPHA:
+      case GL_RGBA:
+         current->hwTexBlendColor1 = STAGE1_C_CFOMCS_CCCS;
+         current->hwTexBlendAlpha1 = STAGE1_A_AFAS;
+         break;
+      default:
+	 sis_fatal_error("unknown base format 0x%x\n", t->format);
+      }
+      break;
+
+   default:
+      sis_fatal_error("unknown env mode 0x%x\n", texture_unit->EnvMode);
+   }
+
+   if ((current->hwTexBlendColor1 != prev->hwTexBlendColor1) ||
+       (current->hwTexBlendAlpha1 != prev->hwTexBlendAlpha1) ||
+       (current->hwTexEnvColor != prev->hwTexEnvColor))
+   {
+      prev->hwTexBlendColor1 = current->hwTexBlendColor1;
+      prev->hwTexBlendAlpha1 = current->hwTexBlendAlpha1;
+      prev->hwTexEnvColor = current->hwTexEnvColor;
+      smesa->GlobalFlag |= GFLAG_TEXTUREENV_1;
+   }
+}
+
+/* Returns 0 if a software fallback is necessary */
+static GLboolean
+sis_set_texobj_parm( GLcontext *ctx, struct gl_texture_object *texObj,
+   int hw_unit )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   int ok = 1;
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   sisTexObjPtr t = texObj->DriverData;
+
+   GLint firstLevel, lastLevel;
+   GLint i;
+
+   current->texture[hw_unit].hwTextureMip = 0UL;
+   current->texture[hw_unit].hwTextureSet = t->hwformat;
+
+   if ((texObj->MinFilter == GL_NEAREST) || (texObj->MinFilter == GL_LINEAR)) {
+      firstLevel = lastLevel = texObj->BaseLevel;
+   } else {
+      /* Compute which mipmap levels we really want to send to the hardware.
+       * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+       * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL and GL_TEXTURE_MAX_LEVEL.
+       * Yes, this looks overly complicated, but it's all needed.
+       */
+
+      firstLevel = texObj->BaseLevel + (GLint)(texObj->MinLod + 0.5);
+      firstLevel = MAX2(firstLevel, texObj->BaseLevel);
+      lastLevel = texObj->BaseLevel + (GLint)(texObj->MaxLod + 0.5);
+      lastLevel = MAX2(lastLevel, texObj->BaseLevel);
+      lastLevel = MIN2(lastLevel, texObj->BaseLevel +
+         texObj->Image[0][texObj->BaseLevel]->MaxLog2);
+      lastLevel = MIN2(lastLevel, texObj->MaxLevel);
+      lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */
+   }
+
+   current->texture[hw_unit].hwTextureSet |= (lastLevel << 8);
+
+   switch (texObj->MagFilter)
+   {
+   case GL_NEAREST:
+      current->texture[hw_unit].hwTextureMip |= TEXTURE_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      current->texture[hw_unit].hwTextureMip |= (TEXTURE_FILTER_LINEAR << 3);
+      break;
+   }
+
+   {
+      GLint b;
+
+      /* The mipmap lod biasing is based on experiment.  It seems there's a
+       * limit of around +4/-4 to the bias value; we're being conservative.
+       */
+      b = (GLint) (ctx->Texture.Unit[hw_unit].LodBias * 32.0);
+      if (b > 127)
+         b = 127;
+      else if (b < -128)
+         b = -128;
+
+      current->texture[hw_unit].hwTextureMip |= ((b << 4) &
+         MASK_TextureMipmapLodBias);
+   }
+
+   switch (texObj->MinFilter)
+   {
+   case GL_NEAREST:
+      current->texture[hw_unit].hwTextureMip |= TEXTURE_FILTER_NEAREST;
+      break;
+   case GL_LINEAR:
+      current->texture[hw_unit].hwTextureMip |= TEXTURE_FILTER_LINEAR;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      current->texture[hw_unit].hwTextureMip |=
+         TEXTURE_FILTER_NEAREST_MIP_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      current->texture[hw_unit].hwTextureMip |=
+         TEXTURE_FILTER_NEAREST_MIP_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      current->texture[hw_unit].hwTextureMip |=
+         TEXTURE_FILTER_LINEAR_MIP_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      current->texture[hw_unit].hwTextureMip |=
+         TEXTURE_FILTER_LINEAR_MIP_LINEAR;
+      break;
+   }
+
+   switch (texObj->WrapS)
+   {
+   case GL_REPEAT:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureWrapU;
+      break;
+   case GL_MIRRORED_REPEAT:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureMirrorU;
+      break;
+   case GL_CLAMP:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureClampU;
+       /* XXX: GL_CLAMP isn't conformant, but falling back makes the situation
+        * worse in other programs at the moment.
+        */
+      /*ok = 0;*/
+      break;
+   case GL_CLAMP_TO_EDGE:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureClampU;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureBorderU;
+      break;
+   }
+
+   switch (texObj->WrapT)
+   {
+   case GL_REPEAT:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureWrapV;
+      break;
+   case GL_MIRRORED_REPEAT:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureMirrorV;
+      break;
+   case GL_CLAMP:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureClampV;
+       /* XXX: GL_CLAMP isn't conformant, but falling back makes the situation
+        * worse in other programs at the moment.
+        */
+      /*ok = 0;*/
+      break;
+   case GL_CLAMP_TO_EDGE:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureClampV;
+      break;
+   case GL_CLAMP_TO_BORDER:
+      current->texture[hw_unit].hwTextureSet |= MASK_TextureBorderV;
+      break;
+   }
+
+   {
+      GLubyte c[4];
+      CLAMPED_FLOAT_TO_UBYTE(c[0], texObj->BorderColor.f[0]);
+      CLAMPED_FLOAT_TO_UBYTE(c[1], texObj->BorderColor.f[1]);
+      CLAMPED_FLOAT_TO_UBYTE(c[2], texObj->BorderColor.f[2]);
+      CLAMPED_FLOAT_TO_UBYTE(c[3], texObj->BorderColor.f[3]);
+
+      current->texture[hw_unit].hwTextureBorderColor = 
+         PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
+   }
+
+   if (current->texture[hw_unit].hwTextureBorderColor !=
+       prev->texture[hw_unit].hwTextureBorderColor) 
+   {
+      prev->texture[hw_unit].hwTextureBorderColor =
+         current->texture[hw_unit].hwTextureBorderColor; 
+      if (hw_unit == 1)
+         smesa->GlobalFlag |= GFLAG_TEXBORDERCOLOR_1; 
+      else
+         smesa->GlobalFlag |= GFLAG_TEXBORDERCOLOR;
+   }
+
+   current->texture[hw_unit].hwTextureSet |=
+      texObj->Image[0][firstLevel]->WidthLog2 << 4;
+   current->texture[hw_unit].hwTextureSet |=
+      texObj->Image[0][firstLevel]->HeightLog2;
+
+   if (hw_unit == 0)
+      smesa->GlobalFlag |= GFLAG_TEXTUREADDRESS;
+   else
+      smesa->GlobalFlag |= GFLAG_TEXTUREADDRESS_1;
+
+   for (i = firstLevel; i <= lastLevel; i++)
+   {
+      GLuint texOffset = 0;
+      GLuint texPitch = TransferTexturePitch( t->image[i].pitch );
+
+      switch (t->image[i].memType)
+      {
+      case VIDEO_TYPE:
+         texOffset = ((unsigned long)t->image[i].Data - (unsigned long)smesa->FbBase);
+         break;
+      case AGP_TYPE:
+         texOffset = ((unsigned long)t->image[i].Data - (unsigned long)smesa->AGPBase) +
+            (unsigned long) smesa->AGPAddr;
+         current->texture[hw_unit].hwTextureMip |=
+            (MASK_TextureLevel0InSystem << i);
+         break;
+      }
+
+      switch (i)
+      {
+      case 0:
+         prev->texture[hw_unit].texOffset0 = texOffset;
+         prev->texture[hw_unit].texPitch01 = texPitch << 16;
+         break;
+      case 1:
+         prev->texture[hw_unit].texOffset1 = texOffset;
+         prev->texture[hw_unit].texPitch01 |= texPitch;
+         break;
+      case 2:
+         prev->texture[hw_unit].texOffset2 = texOffset;
+         prev->texture[hw_unit].texPitch23 = texPitch << 16;
+         break;
+      case 3:
+         prev->texture[hw_unit].texOffset3 = texOffset;
+         prev->texture[hw_unit].texPitch23 |= texPitch;
+         break;
+      case 4:
+         prev->texture[hw_unit].texOffset4 = texOffset;
+         prev->texture[hw_unit].texPitch45 = texPitch << 16;
+         break;
+      case 5:
+         prev->texture[hw_unit].texOffset5 = texOffset;
+         prev->texture[hw_unit].texPitch45 |= texPitch;
+         break;
+      case 6:
+         prev->texture[hw_unit].texOffset6 = texOffset;
+         prev->texture[hw_unit].texPitch67 = texPitch << 16;
+         break;
+      case 7:
+         prev->texture[hw_unit].texOffset7 = texOffset;
+         prev->texture[hw_unit].texPitch67 |= texPitch;
+         break;
+      case 8:
+         prev->texture[hw_unit].texOffset8 = texOffset;
+         prev->texture[hw_unit].texPitch89 = texPitch << 16;
+         break;
+      case 9:
+         prev->texture[hw_unit].texOffset9 = texOffset;
+         prev->texture[hw_unit].texPitch89 |= texPitch;
+         break;
+      case 10:
+         prev->texture[hw_unit].texOffset10 = texOffset;
+         prev->texture[hw_unit].texPitch10 = texPitch << 16;
+         break;
+      case 11:
+         prev->texture[hw_unit].texOffset11 = texOffset;
+         prev->texture[hw_unit].texPitch10 |= texPitch;
+         break;
+      }
+   }
+
+   if (current->texture[hw_unit].hwTextureSet != 
+      prev->texture[hw_unit].hwTextureSet)
+   {
+      prev->texture[hw_unit].hwTextureSet =
+         current->texture[hw_unit].hwTextureSet;
+      if (hw_unit == 1)
+         smesa->GlobalFlag |= CFLAG_TEXTURERESET_1;
+      else
+         smesa->GlobalFlag |= CFLAG_TEXTURERESET;
+   }
+   if (current->texture[hw_unit].hwTextureMip != 
+      prev->texture[hw_unit].hwTextureMip)
+   {
+      prev->texture[hw_unit].hwTextureMip =
+         current->texture[hw_unit].hwTextureMip;
+      if (hw_unit == 1)
+         smesa->GlobalFlag |= GFLAG_TEXTUREMIPMAP_1;
+      else
+         smesa->GlobalFlag |= GFLAG_TEXTUREMIPMAP;
+   }
+
+   return ok;
+}
+
+/* Disable a texture unit, called from validate_texture */
+static void
+sis_reset_texture_env (GLcontext *ctx, int hw_unit)
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   __GLSiSHardware *prev = &smesa->prev;
+   __GLSiSHardware *current = &smesa->current;
+
+   if (hw_unit == 1)
+   {
+      current->hwTexBlendColor1 = STAGE1_C_CF;
+      current->hwTexBlendAlpha1 = STAGE1_A_AF;
+      
+      if ((current->hwTexBlendColor1 != prev->hwTexBlendColor1) ||
+          (current->hwTexBlendAlpha1 != prev->hwTexBlendAlpha1) ||
+          (current->hwTexEnvColor != prev->hwTexEnvColor))
+      {
+         prev->hwTexBlendColor1 = current->hwTexBlendColor1;
+         prev->hwTexBlendAlpha1 = current->hwTexBlendAlpha1;
+         prev->hwTexEnvColor = current->hwTexEnvColor;
+         smesa->GlobalFlag |= GFLAG_TEXTUREENV_1;
+      }
+   } else {
+      current->hwTexBlendColor0 = STAGE0_C_CF;
+      current->hwTexBlendAlpha0 = STAGE0_A_AF;
+      
+      if ((current->hwTexBlendColor0 != prev->hwTexBlendColor0) ||
+          (current->hwTexBlendAlpha0 != prev->hwTexBlendAlpha0) ||
+          (current->hwTexEnvColor != prev->hwTexEnvColor))
+      {
+         prev->hwTexBlendColor0 = current->hwTexBlendColor0;
+         prev->hwTexBlendAlpha0 = current->hwTexBlendAlpha0;
+         prev->hwTexEnvColor = current->hwTexEnvColor;
+         smesa->GlobalFlag |= GFLAG_TEXTUREENV;
+      }
+   }
+}
+
+static void updateTextureUnit( GLcontext *ctx, int unit )
+{
+   sisContextPtr smesa = SIS_CONTEXT( ctx );
+   const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *texObj = texUnit->_Current;
+   GLint fallbackbit;
+   
+   if (unit == 0)
+      fallbackbit = SIS_FALLBACK_TEXTURE0;
+   else
+      fallbackbit = SIS_FALLBACK_TEXTURE1;
+
+   if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) {
+      if (smesa->TexStates[unit] & NEW_TEXTURING) {
+         GLboolean ok;
+
+         ok = sis_set_texobj_parm (ctx, texObj, unit);
+         FALLBACK( smesa, fallbackbit, !ok );
+      }
+      if (smesa->TexStates[unit] & NEW_TEXTURE_ENV) {
+         if (unit == 0)
+            sis_set_texture_env0( ctx, texObj, unit );
+         else
+            sis_set_texture_env1( ctx, texObj, unit );
+      }
+      smesa->TexStates[unit] = 0;
+   } else if ( texUnit->_ReallyEnabled ) {
+      /* fallback */
+      FALLBACK( smesa, fallbackbit, 1 );
+   } else {
+      sis_reset_texture_env( ctx, unit );
+      FALLBACK( smesa, fallbackbit, 0 );
+   }
+}
+
+
+void sisUpdateTextureState( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT( ctx );
+   int i;
+   __GLSiSHardware *current = &smesa->current;
+
+#if 1
+   /* TODO : if unmark these, error in multitexture */ /* XXX */
+   for (i = 0; i < SIS_MAX_TEXTURES; i++)
+      smesa->TexStates[i] |= (NEW_TEXTURING | NEW_TEXTURE_ENV);
+#endif
+
+   updateTextureUnit( ctx, 0 );
+   updateTextureUnit( ctx, 1 );
+
+   /* XXX Issues with the 2nd unit but not the first being enabled? */
+   if ( ctx->Texture.Unit[0]._ReallyEnabled &
+        (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ||
+        ctx->Texture.Unit[1]._ReallyEnabled &
+        (TEXTURE_1D_BIT | TEXTURE_2D_BIT) )
+   {
+      current->hwCapEnable |= MASK_TextureEnable;
+      current->hwCapEnable &= ~MASK_TextureNumUsed;
+      if (ctx->Texture.Unit[1]._ReallyEnabled)
+         current->hwCapEnable |= 0x00002000;
+      else
+         current->hwCapEnable |= 0x00001000;
+   } else {
+      current->hwCapEnable &= ~MASK_TextureEnable;
+   }
+}
+
+static GLint
+BitScanForward( GLshort w )
+{
+   GLint i;
+
+   for (i = 0; i < 16; i++) {
+      if (w & (1 << i))
+         break;
+   }
+   return i;
+}
+
+static GLint
+TransferTexturePitch( GLint dwPitch )
+{
+   GLint dwRet, i;
+
+   i = BitScanForward( (GLshort)dwPitch );
+   dwRet = dwPitch >> i;
+   dwRet |= i << 9;
+   return dwRet;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_tris.c b/src/mesa/drivers/dri/sis/sis_tris.c
new file mode 100644
index 0000000000..d109a8c41e
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_tris.c
@@ -0,0 +1,1154 @@
+/* $XFree86*/ /* -*- c-basic-offset: 3 -*- */
+/**************************************************************************
+
+Copyright 2000 Silicon Integrated Systems Corp, Inc., HsinChu, Taiwan.
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT OR SILICON INTEGRATED SYSTEMS CORP BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Sung-Ching Lin <sclin@sis.com.tw>
+ *   Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/macros.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "sis_context.h"
+#include "sis_tris.h"
+#include "sis_state.h"
+#include "sis_lock.h"
+#include "sis_span.h"
+#include "sis_tex.h"
+
+/* 6326 and 300-series shared */
+static const GLuint hw_prim[GL_POLYGON+1] = {
+   OP_3D_POINT_DRAW,		/* GL_POINTS */
+   OP_3D_LINE_DRAW,		/* GL_LINES */
+   OP_3D_LINE_DRAW,		/* GL_LINE_LOOP */
+   OP_3D_LINE_DRAW,		/* GL_LINE_STRIP */
+   OP_3D_TRIANGLE_DRAW,		/* GL_TRIANGLES */
+   OP_3D_TRIANGLE_DRAW,		/* GL_TRIANGLE_STRIP */
+   OP_3D_TRIANGLE_DRAW,		/* GL_TRIANGLE_FAN */
+   OP_3D_TRIANGLE_DRAW,		/* GL_QUADS */
+   OP_3D_TRIANGLE_DRAW,		/* GL_QUAD_STRIP */
+   OP_3D_TRIANGLE_DRAW		/* GL_POLYGON */
+};
+
+static const GLuint hw_prim_mmio_fire[OP_3D_TRIANGLE_DRAW+1] = {
+   OP_3D_FIRE_TSARGBa,
+   OP_3D_FIRE_TSARGBb,
+   OP_3D_FIRE_TSARGBc
+};
+static const GLuint hw_prim_6326_mmio_fire[OP_3D_TRIANGLE_DRAW+1] = {
+   OP_6326_3D_FIRE_TSARGBa,
+   OP_6326_3D_FIRE_TSARGBb,
+   OP_6326_3D_FIRE_TSARGBc
+};
+
+static const GLuint hw_prim_mmio_shade[OP_3D_TRIANGLE_DRAW+1] = {
+   SHADE_FLAT_VertexA,
+   SHADE_FLAT_VertexB,
+   SHADE_FLAT_VertexC
+};
+
+static const GLuint hw_prim_agp_type[OP_3D_TRIANGLE_DRAW+1] = {
+   MASK_PsPointList,
+   MASK_PsLineList,
+   MASK_PsTriangleList
+};
+
+static const GLuint hw_prim_agp_shade[OP_3D_TRIANGLE_DRAW+1] = {
+   MASK_PsShadingFlatA,
+   MASK_PsShadingFlatB,
+   MASK_PsShadingFlatC
+};
+
+static void sisRasterPrimitive( GLcontext *ctx, GLuint hwprim );
+static void sisRenderPrimitive( GLcontext *ctx, GLenum prim );
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+
+#define HAVE_QUADS 0
+#define HAVE_LINES 1
+#define HAVE_POINTS 1
+#define CTX_ARG sisContextPtr smesa
+#define GET_VERTEX_DWORDS() smesa->vertex_size
+#define ALLOC_VERTS( n, size ) sisAllocDmaLow( smesa, n * size * sizeof(int) )
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+   sisContextPtr smesa = SIS_CONTEXT(ctx);			\
+   const char *vertptr = smesa->verts;
+#define VERT(x) (sisVertex *)(vertptr + (x * vertsize * sizeof(int)))
+#define VERTEX sisVertex 
+#undef TAG
+#define TAG(x) sis_##x
+#include "tnl_dd/t_dd_triemit.h"
+#undef TAG
+#undef LOCAL_VARS
+
+/***********************************************************************
+ *             Dispatch vertices to hardware through MMIO              *
+ ***********************************************************************/
+
+/* The ARGB write of the last vertex of the primitive fires the 3d engine, so
+ * save it until the end.
+ */
+#define SIS_MMIO_WRITE_VERTEX(_v, i, lastvert)			\
+do {								\
+   GLuint __color, __i = 0;					\
+   MMIO(REG_3D_TSXa+(i)*0x30, _v->ui[__i++]);			\
+   MMIO(REG_3D_TSYa+(i)*0x30, _v->ui[__i++]);			\
+   MMIO(REG_3D_TSZa+(i)*0x30, _v->ui[__i++]);			\
+   if (SIS_STATES & VERT_W)					\
+      MMIO(REG_3D_TSWGa+(i)*0x30, _v->ui[__i++]);		\
+   __color = _v->ui[__i++];					\
+   if (SIS_STATES & VERT_SPEC)					\
+      MMIO(REG_3D_TSFSa+(i)*0x30, _v->ui[__i++]);		\
+   if (SIS_STATES & VERT_UV0) {					\
+      MMIO(REG_3D_TSUAa+(i)*0x30, _v->ui[__i++]);		\
+      MMIO(REG_3D_TSVAa+(i)*0x30, _v->ui[__i++]);		\
+   }								\
+   if (SIS_STATES & VERT_UV1) {					\
+      MMIO(REG_3D_TSUBa+(i)*0x30, _v->ui[__i++]);		\
+      MMIO(REG_3D_TSVBa+(i)*0x30, _v->ui[__i++]);		\
+   }								\
+   if (lastvert || (SIS_STATES & VERT_SMOOTH))			\
+      MMIO(REG_3D_TSARGBa+(i)*0x30, __color);			\
+} while (0)
+
+#define SIS6326_MMIO_WRITE_VERTEX(_v, i, lastvert)		\
+do {								\
+   GLuint __color, __i = 0;					\
+   MMIO(REG_6326_3D_TSXa+(i)*0x20, _v->ui[__i++]);		\
+   MMIO(REG_6326_3D_TSYa+(i)*0x20, _v->ui[__i++]);		\
+   MMIO(REG_6326_3D_TSZa+(i)*0x20, _v->ui[__i++]);		\
+   if (SIS_STATES & VERT_W)					\
+      MMIO(REG_6326_3D_TSWa+(i)*0x20, _v->ui[__i++]);		\
+   __color = _v->ui[__i++];					\
+   if (SIS_STATES & VERT_SPEC)					\
+      MMIO(REG_6326_3D_TSFSa+(i)*0x20, _v->ui[__i++]);		\
+   if (SIS_STATES & VERT_UV0) {					\
+      MMIO(REG_6326_3D_TSUa+(i)*0x20, _v->ui[__i++]);		\
+      MMIO(REG_6326_3D_TSVa+(i)*0x20, _v->ui[__i++]);		\
+   }								\
+   if (lastvert || (SIS_STATES & VERT_SMOOTH))			\
+      MMIO(REG_6326_3D_TSARGBa+(i)*0x30, __color);		\
+} while (0)
+
+#define MMIO_VERT_REG_COUNT 10
+
+#define VERT_SMOOTH	0x01
+#define VERT_W		0x02
+#define VERT_SPEC	0x04
+#define VERT_UV0	0x08
+#define VERT_UV1	0x10
+#define VERT_6326	0x20	/* Right after UV1, but won't have a UV1 set */
+
+typedef void (*mmio_draw_func)(sisContextPtr smesa, char *verts);
+static mmio_draw_func sis_tri_func_mmio[48];
+static mmio_draw_func sis_line_func_mmio[48];
+static mmio_draw_func sis_point_func_mmio[48];
+
+#define SIS_STATES (0)
+#define TAG(x) x##_none
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH)
+#define TAG(x) x##_g
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W)
+#define TAG(x) x##_w
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W)
+#define TAG(x) x##_gw
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SPEC)
+#define TAG(x) x##_s
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_SPEC)
+#define TAG(x) x##_gs
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W | VERT_SPEC)
+#define TAG(x) x##_ws
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W | VERT_SPEC)
+#define TAG(x) x##_gws
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_UV0)
+#define TAG(x) x##_t0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_UV0)
+#define TAG(x) x##_gt0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W | VERT_UV0)
+#define TAG(x) x##_wt0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W | VERT_UV0)
+#define TAG(x) x##_gwt0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SPEC | VERT_UV0)
+#define TAG(x) x##_st0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_SPEC | VERT_UV0)
+#define TAG(x) x##_gst0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W | VERT_SPEC | VERT_UV0)
+#define TAG(x) x##_wst0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W | VERT_SPEC | VERT_UV0)
+#define TAG(x) x##_gwst0
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_UV1)
+#define TAG(x) x##_t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_UV1)
+#define TAG(x) x##_gt1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W | VERT_UV1)
+#define TAG(x) x##_wt1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W | VERT_UV1)
+#define TAG(x) x##_gwt1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SPEC | VERT_UV1)
+#define TAG(x) x##_st1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_SPEC | VERT_UV1)
+#define TAG(x) x##_gst1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W | VERT_SPEC | VERT_UV1)
+#define TAG(x) x##_wst1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W | VERT_SPEC | VERT_UV1)
+#define TAG(x) x##_gwst1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_t0t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_gt0t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W | VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_wt0t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W | VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_gwt0t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SPEC | VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_st0t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_SPEC | VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_gst0t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_W | VERT_SPEC | VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_wst0t1
+#include "sis_tritmp.h"
+
+#define SIS_STATES (VERT_SMOOTH | VERT_W | VERT_SPEC | VERT_UV0 | VERT_UV1)
+#define TAG(x) x##_gwst0t1
+#include "sis_tritmp.h"
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      smesa->draw_tri( smesa, a, b, c );	\
+   else						\
+      sis_triangle( smesa, a, b, c );		\
+} while (0)
+
+#define QUAD( a, b, c, d )			\
+do { 						\
+   if (DO_FALLBACK) {				\
+      smesa->draw_tri( smesa, a, b, d );	\
+      smesa->draw_tri( smesa, b, c, d );	\
+   } else					\
+      sis_quad( smesa, a, b, c, d );		\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      smesa->draw_line( smesa, v0, v1 );	\
+   else						\
+      sis_line( smesa, v0, v1 );		\
+} while (0)
+
+#define POINT( v0 )				\
+do { 						\
+   if (DO_FALLBACK)				\
+      smesa->draw_point( smesa, v0 );		\
+   else						\
+      sis_point( smesa, v0 );			\
+} while (0)
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define SIS_OFFSET_BIT 		0x01
+#define SIS_TWOSIDE_BIT		0x02
+#define SIS_UNFILLED_BIT	0x04
+#define SIS_FALLBACK_BIT	0x08
+#define SIS_MAX_TRIFUNC		0x10
+
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[SIS_MAX_TRIFUNC];
+
+
+#define DO_FALLBACK (IND & SIS_FALLBACK_BIT)
+#define DO_OFFSET   (IND & SIS_OFFSET_BIT)
+#define DO_UNFILLED (IND & SIS_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & SIS_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX sisVertex
+#define TAB rast_tab
+
+#define DEPTH_SCALE smesa->depth_scale
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a > 0)
+#define GET_VERTEX(e) (smesa->verts + (e * smesa->vertex_size * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )  					\
+do {								\
+   sis_color_t *color = (sis_color_t *)&((v)->ui[coloroffset]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v, c )					\
+do {								\
+   if (specoffset != 0) {					\
+      sis_color_t *spec = (sis_color_t *)&((v)->ui[specoffset]); \
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]);		\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]);		\
+      UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]);		\
+   }								\
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )				\
+do {								\
+   if (specoffset != 0) {					\
+      sis_color_t *spec0 = (sis_color_t *)&((v0)->ui[specoffset]); \
+      sis_color_t *spec1 = (sis_color_t *)&((v1)->ui[specoffset]); \
+      spec0->red   = spec1->red;				\
+      spec0->green = spec1->green;				\
+      spec0->blue  = spec1->blue; 				\
+   }								\
+} while (0)
+
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset != 0) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset != 0) v[idx]->ui[specoffset] = spec[idx]
+
+#define LOCAL_VARS(n)						\
+   sisContextPtr smesa = SIS_CONTEXT(ctx);			\
+   GLuint color[n] = { 0 };					\
+   GLuint spec[n] = { 0 };					\
+   GLuint coloroffset = smesa->coloroffset;			\
+   GLuint specoffset = smesa->specoffset;			\
+   (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (smesa->hw_primitive != hw_prim[x]) \
+                        sisRasterPrimitive( ctx, hw_prim[x] )
+#define RENDER_PRIMITIVE smesa->render_primitive
+#define IND SIS_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT|SIS_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_OFFSET_BIT|SIS_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT|SIS_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT|SIS_OFFSET_BIT|SIS_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_OFFSET_BIT|SIS_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT|SIS_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT|SIS_OFFSET_BIT|SIS_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_UNFILLED_BIT|SIS_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_OFFSET_BIT|SIS_UNFILLED_BIT|SIS_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT|SIS_UNFILLED_BIT|SIS_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (SIS_TWOSIDE_BIT|SIS_OFFSET_BIT|SIS_UNFILLED_BIT| \
+	     SIS_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+}
+
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+
+static void
+sis_fallback_tri( sisContextPtr smesa,
+		  sisVertex *v0,
+		  sisVertex *v1,
+		  sisVertex *v2 )
+{
+   GLcontext *ctx = smesa->glCtx;
+   SWvertex v[3];
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   _swsetup_Translate( ctx, v2, &v[2] );
+   sisSpanRenderStart( ctx );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+   sisSpanRenderFinish( ctx );
+   _swrast_flush( ctx );
+}
+
+
+static void
+sis_fallback_line( sisContextPtr smesa,
+		   sisVertex *v0,
+		   sisVertex *v1 )
+{
+   GLcontext *ctx = smesa->glCtx;
+   SWvertex v[2];
+   _swsetup_Translate( ctx, v0, &v[0] );
+   _swsetup_Translate( ctx, v1, &v[1] );
+   sisSpanRenderStart( ctx );
+   _swrast_Line( ctx, &v[0], &v[1] );
+   sisSpanRenderFinish( ctx );
+   _swrast_flush( ctx );
+}
+
+
+static void
+sis_fallback_point( sisContextPtr smesa,
+		    sisVertex *v0 )
+{
+   GLcontext *ctx = smesa->glCtx;
+   SWvertex v[1];
+   _swsetup_Translate( ctx, v0, &v[0] );
+   sisSpanRenderStart( ctx );
+   _swrast_Point( ctx, &v[0] );
+   sisSpanRenderFinish( ctx );
+   _swrast_flush( ctx );
+}
+
+
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+
+#define IND 0
+#define V(x) (sisVertex *)(vertptr + (x * vertsize * sizeof(int)))
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      POINT( V(ELT(start)) )
+#define RENDER_LINE( v0, v1 )         LINE( V(v0), V(v1) )
+#define RENDER_TRI(  v0, v1, v2 )     TRI(  V(v0), V(v1), V(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) QUAD( V(v0), V(v1), V(v2), V(v3) )
+#define INIT(x) sisRenderPrimitive( ctx, x )
+#undef LOCAL_VARS
+#define LOCAL_VARS				\
+    sisContextPtr smesa = SIS_CONTEXT(ctx);	\
+    const GLuint vertsize = smesa->vertex_size;		\
+    const char *vertptr = (char *)smesa->verts;		\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+#define RESET_STIPPLE
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) sis_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) sis_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_STIPPLE|DD_LINE_SMOOTH)
+#define TRI_FALLBACK (DD_TRI_STIPPLE|DD_TRI_SMOOTH)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+#define _SIS_NEW_RENDER_STATE (ANY_RASTER_FLAGS | ANY_FALLBACK_FLAGS)
+
+static void sisChooseRenderState(GLcontext *ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   sisContextPtr smesa = SIS_CONTEXT( ctx );
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (smesa->Fallback)
+      return;
+
+   if (flags & (ANY_RASTER_FLAGS|ANY_FALLBACK_FLAGS)) {
+
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE) index |= SIS_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)        index |= SIS_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)      index |= SIS_UNFILLED_BIT;
+      }
+
+      smesa->draw_point = sis_point;
+      smesa->draw_line = sis_line;
+      smesa->draw_tri = sis_triangle;
+      /* Hook in fallbacks for specific primitives.
+       */
+      if (flags & ANY_FALLBACK_FLAGS) {
+	 if (flags & POINT_FALLBACK)
+            smesa->draw_point = sis_fallback_point;
+	 if (flags & LINE_FALLBACK)
+            smesa->draw_line = sis_fallback_line;
+	 if (flags & TRI_FALLBACK)
+            smesa->draw_tri = sis_fallback_tri;
+	 index |= SIS_FALLBACK_BIT;
+      }
+   }
+
+   if (index != smesa->RenderIndex) {
+      smesa->RenderIndex = index;
+
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = sis_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = sis_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = sis_fast_clipped_poly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+      }
+   }
+}
+
+/**********************************************************************/
+/*                Multipass rendering for front buffering             */
+/**********************************************************************/
+static GLboolean multipass_cliprect( GLcontext *ctx, GLuint pass )
+{
+   sisContextPtr smesa = SIS_CONTEXT( ctx );
+
+   if (pass >= smesa->driDrawable->numClipRects) {
+      return GL_FALSE;
+   } else {
+      GLint x1, y1, x2, y2;
+
+      x1 = smesa->driDrawable->pClipRects[pass].x1 - smesa->driDrawable->x;
+      y1 = smesa->driDrawable->pClipRects[pass].y1 - smesa->driDrawable->y;
+      x2 = smesa->driDrawable->pClipRects[pass].x2 - smesa->driDrawable->x;
+      y2 = smesa->driDrawable->pClipRects[pass].y2 - smesa->driDrawable->y;
+
+      if (ctx->Scissor.Enabled) {
+         GLint scisy1 = Y_FLIP(ctx->Scissor.Y + ctx->Scissor.Height - 1);
+         GLint scisy2 = Y_FLIP(ctx->Scissor.Y);
+
+         if (ctx->Scissor.X > x1)
+            x1 = ctx->Scissor.X;
+         if (scisy1 > y1)
+            y1 = scisy1;
+         if (ctx->Scissor.X + ctx->Scissor.Width - 1 < x2)
+            x2 = ctx->Scissor.X + ctx->Scissor.Width - 1;
+         if (scisy2 < y2)
+            y2 = scisy2;
+      }
+
+      MMIO(REG_3D_ClipTopBottom, y1 << 13 | y2);
+      MMIO(REG_3D_ClipLeftRight, x1 << 13 | x2);
+      /* Mark that we clobbered these registers */
+      smesa->GlobalFlag |= GFLAG_CLIPPING;
+      return GL_TRUE;
+   }
+}
+
+
+
+/**********************************************************************/
+/*                 Validate state at pipeline start                   */
+/**********************************************************************/
+
+static void sisRunPipeline( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT( ctx );
+
+   if (smesa->NewGLState) {
+      SIS_FIREVERTICES(smesa);
+      if (smesa->NewGLState & _NEW_TEXTURE) {
+	 sisUpdateTextureState(ctx);
+      }
+
+      if (smesa->NewGLState & _SIS_NEW_RENDER_STATE)
+	 sisChooseRenderState( ctx );
+
+      smesa->NewGLState = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+
+   /* XXX: If we put flushing in sis_state.c and friends, we can avoid this.
+    * Is it worth it?
+    */
+   SIS_FIREVERTICES(smesa);
+}
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+/* This is called when Mesa switches between rendering triangle
+ * primitives (such as GL_POLYGON, GL_QUADS, GL_TRIANGLE_STRIP, etc),
+ * and lines, points and bitmaps.
+ */
+
+static void sisRasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   if (smesa->hw_primitive != hwprim) {
+      SIS_FIREVERTICES(smesa);
+      smesa->hw_primitive = hwprim;
+
+      smesa->AGPParseSet &= ~(MASK_PsDataType | MASK_PsShadingMode);
+      smesa->AGPParseSet |= hw_prim_agp_type[hwprim];
+
+      if (smesa->is6326) {
+	 smesa->dwPrimitiveSet &= ~(MASK_6326_DrawPrimitiveCommand |
+	    MASK_6326_SetFirePosition | MASK_6326_ShadingMode);
+	 smesa->dwPrimitiveSet |= hwprim | hw_prim_6326_mmio_fire[hwprim];
+      } else {
+	 smesa->dwPrimitiveSet &= ~(MASK_DrawPrimitiveCommand |
+	    MASK_SetFirePosition | MASK_ShadingMode);
+	 smesa->dwPrimitiveSet |= hwprim | hw_prim_mmio_fire[hwprim];
+      }
+
+      if (ctx->Light.ShadeModel == GL_FLAT) {
+	 smesa->AGPParseSet |= hw_prim_agp_shade[hwprim];
+	 smesa->dwPrimitiveSet |= hw_prim_mmio_shade[hwprim];
+      } else {
+	 smesa->AGPParseSet |= MASK_PsShadingSmooth;
+	 if (smesa->is6326) {
+	    smesa->dwPrimitiveSet |= OP_6326_3D_SHADE_FLAT_GOURAUD;
+	 } else {
+	    smesa->dwPrimitiveSet |= SHADE_GOURAUD;
+	 }
+      }
+   }
+}
+
+static void sisRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+
+   smesa->render_primitive = prim;
+
+   if (prim >= GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+   sisRasterPrimitive( ctx, hw_prim[prim] );
+}
+
+#define EMIT_ATTR( ATTR, STYLE)						\
+do {									\
+   smesa->vertex_attrs[smesa->vertex_attr_count].attrib = (ATTR);	\
+   smesa->vertex_attrs[smesa->vertex_attr_count].format = (STYLE);	\
+   smesa->vertex_attr_count++;						\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   smesa->vertex_attrs[smesa->vertex_attr_count].attrib = 0;		\
+   smesa->vertex_attrs[smesa->vertex_attr_count].format = EMIT_PAD;	\
+   smesa->vertex_attrs[smesa->vertex_attr_count].offset = (N);		\
+   smesa->vertex_attr_count++;						\
+} while (0)
+				
+static void sisRenderStart( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint AGPParseSet = smesa->AGPParseSet;
+   GLboolean tex_fallback = GL_FALSE;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   if (ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT && 
+      smesa->driDrawable->numClipRects != 0)
+   {
+      multipass_cliprect(ctx, 0);
+      if (smesa->driDrawable->numClipRects > 1)
+         tnl->Driver.Render.Multipass = multipass_cliprect;
+      else
+         tnl->Driver.Render.Multipass = NULL;
+   } else {
+      tnl->Driver.Render.Multipass = NULL;
+   }
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+   smesa->vertex_attr_count = 0;
+
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to build up a
+    * hardware vertex.
+    */
+
+   AGPParseSet &= ~(MASK_VertexDWSize | MASK_VertexDataFormat);
+   AGPParseSet |= SiS_PS_HAS_XYZ | SiS_PS_HAS_DIFFUSE;
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT);
+      AGPParseSet |= SiS_PS_HAS_W;
+      smesa->coloroffset = 4;
+   } else {
+      EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT);
+      smesa->coloroffset = 3;
+   }
+
+   EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA);
+
+   smesa->specoffset = 0;
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+      AGPParseSet |= SiS_PS_HAS_SPECULAR;
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR);
+	 smesa->specoffset = smesa->coloroffset + 1;
+      } else {
+	 EMIT_PAD(3);
+      }
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+	 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1UB_1F);
+      } else {
+	 EMIT_PAD(1);
+      }
+   }
+
+   /* projective textures are not supported by the hardware */
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 )) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size > 2)
+	 tex_fallback = GL_TRUE;
+      EMIT_ATTR(_TNL_ATTRIB_TEX0, EMIT_2F);
+      AGPParseSet |= SiS_PS_HAS_UV0;
+   }
+   /* Will only hit tex1 on SiS300 */
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX1 )) {
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX1]->size > 2)
+	 tex_fallback = GL_TRUE;
+      EMIT_ATTR(_TNL_ATTRIB_TEX1, EMIT_2F);
+      AGPParseSet |= SiS_PS_HAS_UV1;
+   }
+   FALLBACK(smesa, SIS_FALLBACK_TEXTURE, tex_fallback);
+
+   if (!RENDERINPUTS_EQUAL( smesa->last_tcl_state_bitset, index_bitset )) {
+      smesa->AGPParseSet = AGPParseSet;
+
+      smesa->vertex_size =  _tnl_install_attrs( ctx, smesa->vertex_attrs, 
+	 smesa->vertex_attr_count, smesa->hw_viewport, 0 );
+
+      smesa->vertex_size >>= 2;
+      smesa->AGPParseSet |= smesa->vertex_size << 28;
+   }
+}
+
+static void sisRenderFinish( GLcontext *ctx )
+{
+}
+
+/**********************************************************************/
+/*                    AGP/PCI vertex submission                       */
+/**********************************************************************/
+
+void
+sisFlushPrimsLocked(sisContextPtr smesa)
+{
+   if (smesa->vb_cur == smesa->vb_last)
+      return;
+
+   if (smesa->is6326)
+      sis6326UpdateHWState(smesa->glCtx);
+   else
+      sisUpdateHWState(smesa->glCtx);
+
+   if (smesa->using_agp) {
+      mWait3DCmdQueue(8);
+      mEndPrimitive();
+      MMIO(REG_3D_AGPCmBase, (smesa->vb_last - smesa->vb) +
+         smesa->vb_agp_offset);
+      MMIO(REG_3D_AGPTtDwNum, ((smesa->vb_cur - smesa->vb_last) / 4) |
+	 0x50000000);
+      MMIO(REG_3D_ParsingSet, smesa->AGPParseSet);
+      MMIO(REG_3D_AGPCmFire, (GLint)(-1));
+      mEndPrimitive();
+   } else {
+      int mmio_index = 0, incr = 0;
+      void (*sis_emit_func)(sisContextPtr smesa, char *verts) = NULL;
+
+      if (smesa->AGPParseSet & MASK_PsShadingSmooth)
+	 mmio_index |= VERT_SMOOTH;
+      if (smesa->AGPParseSet & SiS_PS_HAS_SPECULAR)
+	 mmio_index |= VERT_SPEC;
+      if (smesa->AGPParseSet & SiS_PS_HAS_W)
+	 mmio_index |= VERT_W;
+      if (smesa->AGPParseSet & SiS_PS_HAS_UV0)
+	 mmio_index |= VERT_UV0;
+      if (smesa->AGPParseSet & SiS_PS_HAS_UV1)
+	 mmio_index |= VERT_UV1;
+      if (smesa->is6326)
+	 mmio_index |= VERT_6326;
+
+      switch (smesa->AGPParseSet & MASK_PsDataType) {
+      case MASK_PsPointList:
+         incr = smesa->vertex_size * 4;
+	 sis_emit_func = sis_point_func_mmio[mmio_index];
+	 break;
+      case MASK_PsLineList:
+         incr = smesa->vertex_size * 4 * 2;
+	 sis_emit_func = sis_line_func_mmio[mmio_index];
+	 break;
+      case MASK_PsTriangleList:
+         incr = smesa->vertex_size * 4 * 3;
+	 sis_emit_func = sis_tri_func_mmio[mmio_index];
+	 break;
+      }
+
+      if (!smesa->is6326) {
+	 mWait3DCmdQueue(1);
+	 MMIO(REG_3D_PrimitiveSet, smesa->dwPrimitiveSet);
+      }
+      while (smesa->vb_last < smesa->vb_cur) {
+	 assert(sis_emit_func);
+	 sis_emit_func(smesa, (char *)smesa->vb_last);
+	 smesa->vb_last += incr;
+      }
+      mWait3DCmdQueue(1);
+      mEndPrimitive();
+
+      /* With PCI, we can just start writing to the start of the VB again. */
+      smesa->vb_cur = smesa->vb;
+   }
+   smesa->vb_last = smesa->vb_cur;
+}
+
+void sisFlushPrims(sisContextPtr smesa)
+{
+   LOCK_HARDWARE();
+   sisFlushPrimsLocked(smesa);
+   UNLOCK_HARDWARE();
+}
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+static const char * const fallbackStrings[] = {
+   "Texture mode",
+   "Texture 0 mode",
+   "Texture 1 mode",
+   "Texture 0 env",	/* Note: unused */
+   "Texture 1 env",	/* Note: unused */
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "glEnable(GL_STENCIL) without hw stencil buffer",
+   "write mask",
+   "no_rast",
+};
+
+static const char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+void sisFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   GLuint oldfallback = smesa->Fallback;
+
+   if (mode) {
+      smesa->Fallback |= bit;
+      if (oldfallback == 0) {
+	 SIS_FIREVERTICES(smesa);
+	 _swsetup_Wakeup( ctx );
+	 smesa->RenderIndex = ~0;
+         if (SIS_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "SiS begin rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      smesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = sisRenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = sisRenderPrimitive;
+	 tnl->Driver.Render.Finish = sisRenderFinish;
+
+	 tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	 tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	 tnl->Driver.Render.Interp = _tnl_interp;
+
+	 _tnl_invalidate_vertex_state( ctx, ~0 );
+	 _tnl_invalidate_vertices( ctx, ~0 );
+	 _tnl_install_attrs( ctx, 
+			     smesa->vertex_attrs, 
+			     smesa->vertex_attr_count,
+			     smesa->hw_viewport, 0 ); 
+
+	 smesa->NewGLState |= _SIS_NEW_RENDER_STATE;
+         if (SIS_DEBUG & DEBUG_FALLBACKS) {
+            fprintf(stderr, "SiS end rasterization fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+void sisInitTriFuncs( GLcontext *ctx )
+{
+   sisContextPtr smesa = SIS_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+
+      sis_vert_init_none();
+      sis_vert_init_g();
+      sis_vert_init_w();
+      sis_vert_init_gw();
+      sis_vert_init_s();
+      sis_vert_init_gs();
+      sis_vert_init_ws();
+      sis_vert_init_gws();
+      sis_vert_init_t0();
+      sis_vert_init_gt0();
+      sis_vert_init_wt0();
+      sis_vert_init_gwt0();
+      sis_vert_init_st0();
+      sis_vert_init_gst0();
+      sis_vert_init_wst0();
+      sis_vert_init_gwst0();
+      sis_vert_init_t1();
+      sis_vert_init_gt1();
+      sis_vert_init_wt1();
+      sis_vert_init_gwt1();
+      sis_vert_init_st1();
+      sis_vert_init_gst1();
+      sis_vert_init_wst1();
+      sis_vert_init_gwst1();
+      sis_vert_init_t0t1();
+      sis_vert_init_gt0t1();
+      sis_vert_init_wt0t1();
+      sis_vert_init_gwt0t1();
+      sis_vert_init_st0t1();
+      sis_vert_init_gst0t1();
+      sis_vert_init_wst0t1();
+      sis_vert_init_gwst0t1();
+   }
+
+   smesa->RenderIndex = ~0;
+   smesa->NewGLState |= _SIS_NEW_RENDER_STATE;
+
+   tnl->Driver.RunPipeline = sisRunPipeline;
+   tnl->Driver.Render.Start = sisRenderStart;
+   tnl->Driver.Render.Finish = sisRenderFinish;
+   tnl->Driver.Render.PrimitiveNotify = sisRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple = _swrast_ResetLineStipple;
+
+   tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+   tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+   tnl->Driver.Render.Interp = _tnl_interp;
+
+   _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+		       (6 + 2*ctx->Const.MaxTextureUnits) * sizeof(GLfloat) );
+
+   smesa->verts = (char *)tnl->clipspace.vertex_buf;
+}
diff --git a/src/mesa/drivers/dri/sis/sis_tris.h b/src/mesa/drivers/dri/sis/sis_tris.h
new file mode 100644
index 0000000000..b34fe8c7c9
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_tris.h
@@ -0,0 +1,70 @@
+/**************************************************************************
+
+Copyright 2003 Eric Anholt
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ERIC ANHOLT BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ */
+
+#ifndef __SIS_TRIS_H__
+#define __SIS_TRIS_H__
+
+#include "sis_lock.h"
+#include "main/mtypes.h"
+
+extern void sisInitTriFuncs( GLcontext *ctx );
+extern void sisFlushPrims( sisContextPtr smesa );
+extern void sisFlushPrimsLocked( sisContextPtr smesa );
+extern void sisFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+
+#define FALLBACK( smesa, bit, mode ) sisFallback( smesa->glCtx, bit, mode )
+
+#define SIS_FIREVERTICES(smesa)				\
+do {							\
+   if (smesa->vb_cur != smesa->vb_last)			\
+      sisFlushPrims(smesa);				\
+} while (0)
+
+static INLINE GLuint *sisAllocDmaLow(sisContextPtr smesa, int bytes)
+{
+   GLuint *start;
+
+   if (smesa->vb_cur + bytes >= smesa->vb_end) {
+      LOCK_HARDWARE();
+      sisFlushPrimsLocked(smesa);
+      if (smesa->using_agp) {
+	 WaitEngIdle(smesa);
+	 smesa->vb_cur = smesa->vb;
+	 smesa->vb_last = smesa->vb_cur;
+      }
+      UNLOCK_HARDWARE();
+   }
+
+   start = (GLuint *)smesa->vb_cur;
+   smesa->vb_cur += bytes;
+   return start;
+}
+
+#endif /* __SIS_TRIS_H__ */
diff --git a/src/mesa/drivers/dri/sis/sis_tritmp.h b/src/mesa/drivers/dri/sis/sis_tritmp.h
new file mode 100644
index 0000000000..f75e17318f
--- /dev/null
+++ b/src/mesa/drivers/dri/sis/sis_tritmp.h
@@ -0,0 +1,250 @@
+/* $XFree86*/ /* -*- c-basic-offset: 3 -*- */
+/*
+ * Copyright 2005 Eric Anholt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt@FreeBSD.org>
+ *    Jim Duchek <jim@linuxpimps.com>	-- Utah GLX 6326 code
+ *    Alan Cox <alan@redhat.com>	-- 6326 Debugging
+ *
+ */
+
+static void TAG(sis_draw_tri_mmio)(sisContextPtr smesa, char *verts)
+{
+   sisVertexPtr v0 = (sisVertexPtr)verts;
+   sisVertexPtr v1 = (sisVertexPtr)(verts + smesa->vertex_size * 4);
+   sisVertexPtr v2 = (sisVertexPtr)(verts + smesa->vertex_size * 4 * 2);
+
+   mWait3DCmdQueue (MMIO_VERT_REG_COUNT * 3);
+   SIS_MMIO_WRITE_VERTEX(v0, 0, 0);
+   SIS_MMIO_WRITE_VERTEX(v1, 1, 0);
+   SIS_MMIO_WRITE_VERTEX(v2, 2, 1);
+}
+
+static void TAG(sis_draw_line_mmio)(sisContextPtr smesa, char *verts)
+{
+   sisVertexPtr v0 = (sisVertexPtr)verts;
+   sisVertexPtr v1 = (sisVertexPtr)(verts + smesa->vertex_size * 4);
+
+   mWait3DCmdQueue (MMIO_VERT_REG_COUNT * 2);
+   SIS_MMIO_WRITE_VERTEX(v0, 0, 0);
+   SIS_MMIO_WRITE_VERTEX(v1, 1, 1);
+}
+
+static void TAG(sis_draw_point_mmio)(sisContextPtr smesa, char *verts)
+{
+   sisVertexPtr v0 = (sisVertexPtr)verts;
+
+   mWait3DCmdQueue (MMIO_VERT_REG_COUNT * 1);
+   SIS_MMIO_WRITE_VERTEX(v0, 1, 1);
+}
+
+#if !(SIS_STATES & VERT_UV1)
+static void TAG(sis6326_draw_tri_mmio)(sisContextPtr smesa, char *verts)
+{
+   sisVertexPtr v0 = (sisVertexPtr)verts;
+   sisVertexPtr v1 = (sisVertexPtr)(verts + smesa->vertex_size * 4);
+   sisVertexPtr v2 = (sisVertexPtr)(verts + smesa->vertex_size * 4 * 2);
+   GLfloat x0, x1, x2;
+   GLfloat y0, y1, y2;
+   GLfloat delt02, diffx02, diffy02, diffy12;
+   GLint dwPrimitiveSet = smesa->dwPrimitiveSet;
+   sisVertex tv0, tv1, tv2;
+   
+   /* XXX Culling? */
+
+   tv0 = *v0;
+   tv1 = *v1;
+   tv2 = *v2;
+   tv0.v.y = Y_FLIP(tv0.v.y);
+   tv1.v.y = Y_FLIP(tv1.v.y);
+   tv2.v.y = Y_FLIP(tv2.v.y);
+   v0 = &tv0;
+   v1 = &tv1;
+   v2 = &tv2;
+
+   /* Cull polygons we won't draw. The hardware draws funky things if it
+      is fed these */
+   if((((v1->v.x - v0->v.x) * (v0->v.y - v2->v.y)) +
+       ((v1->v.y - v0->v.y) * (v2->v.x - v0->v.x))) < 0)
+      return;
+   y0 = v0->v.y;
+   y1 = v1->v.y;
+   y2 = v2->v.y;
+   
+
+   if (y0 > y1) {
+      if (y1 > y2) {
+         x0 = v0->v.x;
+         x1 = v1->v.x;
+         x2 = v2->v.x;
+         dwPrimitiveSet |= OP_6326_3D_ATOP | OP_6326_3D_BMID | OP_6326_3D_CBOT;
+         if ((SIS_STATES & VERT_SMOOTH) == 0)
+            dwPrimitiveSet |= OP_6326_3D_SHADE_FLAT_BOT;
+      } else {
+         if (y0 > y2) {
+            x0 = v0->v.x;
+            x1 = v2->v.x;
+            y1 = v2->v.y;
+            dwPrimitiveSet |= OP_6326_3D_ATOP | OP_6326_3D_CMID |
+                OP_6326_3D_BBOT;
+            if ((SIS_STATES & VERT_SMOOTH) == 0)
+               dwPrimitiveSet |= OP_6326_3D_SHADE_FLAT_MID;
+         } else {
+            x0 = v2->v.x;
+            y0 = v2->v.y;
+            x1 = v0->v.x;
+            y1 = v0->v.y;
+            dwPrimitiveSet |= OP_6326_3D_CTOP | OP_6326_3D_AMID |
+                OP_6326_3D_BBOT;
+            if ((SIS_STATES & VERT_SMOOTH) == 0)
+               dwPrimitiveSet |= OP_6326_3D_SHADE_FLAT_TOP;
+         }
+         x2 = v1->v.x;
+         y2 = v1->v.y;
+      }
+   } else {
+      if (y0 > y2) {
+         x0 = v1->v.x;
+         y0 = v1->v.y;
+         x1 = v0->v.x;
+         y1 = v0->v.y;
+         x2 = v2->v.x;
+         dwPrimitiveSet |= OP_6326_3D_BTOP | OP_6326_3D_AMID | OP_6326_3D_CBOT;
+         if ((SIS_STATES & VERT_SMOOTH) == 0)
+            dwPrimitiveSet |= OP_6326_3D_SHADE_FLAT_BOT;
+      } else {
+         if (y1 > y2) {
+            x0 = v1->v.x;
+            y0 = v1->v.y;
+            x1 = v2->v.x;
+            y1 = v2->v.y;
+            dwPrimitiveSet |= OP_6326_3D_BTOP | OP_6326_3D_CMID |
+                OP_6326_3D_ABOT;
+            if ((SIS_STATES & VERT_SMOOTH) == 0)
+               dwPrimitiveSet |= OP_6326_3D_SHADE_FLAT_MID;
+         } else {
+            x0 = v2->v.x;
+            y0 = v2->v.y;
+            x1 = v1->v.x;
+            dwPrimitiveSet |= OP_6326_3D_CTOP | OP_6326_3D_BMID |
+                OP_6326_3D_ABOT;
+            if ((SIS_STATES & VERT_SMOOTH) == 0)
+               dwPrimitiveSet |= OP_6326_3D_SHADE_FLAT_TOP;
+         }
+         x2 = v0->v.x;
+         y2 = v0->v.y;
+      }
+   }
+
+   if (x1 <= x0 && x1 <= x2) {
+      dwPrimitiveSet |= OP_3D_DIRECTION_LEFT;
+   } else if (x1 < x0 || x1 < x2) {
+      GLfloat tmp;
+
+      diffx02 = x0 - x2;
+      diffy02 = y0 - y2;
+      diffy12 = y1 - y2;
+
+      delt02 = diffx02 / diffy02;
+      tmp = x1 - (diffy12 * delt02 + x2);
+
+      if (tmp <= 0.0)
+         dwPrimitiveSet |= OP_3D_DIRECTION_LEFT;
+   }
+   
+   tv0 = *v0;
+   tv1 = *v1;
+   tv2 = *v2;
+   tv0.v.y = Y_FLIP(tv0.v.y);
+   tv1.v.y = Y_FLIP(tv1.v.y);
+   tv2.v.y = Y_FLIP(tv2.v.y);
+   v0 = &tv0;
+   v1 = &tv1;
+   v2 = &tv2;
+   
+   y0 = v0->v.y;
+   y1 = v1->v.y;
+   y2 = v2->v.y;
+
+/*   fprintf(stderr, "Vertex0 %f %f %f\n", v0->v.x, v0->v.y, v0->v.z);
+   fprintf(stderr, "Vertex1 %f %f %f\n", v1->v.x, v1->v.y, v1->v.z);
+   fprintf(stderr, "Vertex2 %f %f %f\n", v2->v.x, v2->v.y, v2->v.z);*/
+   mWait3DCmdQueue(MMIO_VERT_REG_COUNT * 3 + 1);
+   MMIO(REG_3D_PrimitiveSet, dwPrimitiveSet); 
+   SIS_MMIO_WRITE_VERTEX(v0, 0, 0);
+   SIS_MMIO_WRITE_VERTEX(v1, 1, 0);
+   SIS_MMIO_WRITE_VERTEX(v2, 2, 1);
+   mEndPrimitive();
+}
+
+static void TAG(sis6326_draw_line_mmio)(sisContextPtr smesa, char *verts)
+{
+   sisVertexPtr v0 = (sisVertexPtr)verts;
+   sisVertexPtr v1 = (sisVertexPtr)(verts + smesa->vertex_size * 4);
+   GLint dwPrimitiveSet = smesa->dwPrimitiveSet;
+
+   if (abs(v0->v.y - v1->v.y) > abs(v0->v.x - v1->v.x))
+   {
+      dwPrimitiveSet |= OP_3D_DIRECTION_VERTICAL;
+      if (v0->v.y > v1->v.y)
+         dwPrimitiveSet |= OP_6326_3D_ATOP | OP_6326_3D_BBOT;
+      else
+         dwPrimitiveSet |= OP_6326_3D_BTOP | OP_6326_3D_ABOT;
+   } else {
+      if (v0->v.y > v1->v.y)
+         dwPrimitiveSet |= OP_6326_3D_BTOP | OP_6326_3D_ABOT;
+      else
+         dwPrimitiveSet |= OP_6326_3D_ATOP | OP_6326_3D_BBOT;
+   }
+
+   mWait3DCmdQueue (MMIO_VERT_REG_COUNT * 2 + 1);
+   MMIO(REG_3D_PrimitiveSet, dwPrimitiveSet); 
+   SIS_MMIO_WRITE_VERTEX(v0, 0, 0);
+   SIS_MMIO_WRITE_VERTEX(v1, 1, 1);
+}
+
+static void TAG(sis6326_draw_point_mmio)(sisContextPtr smesa, char *verts)
+{
+   sisVertexPtr v0 = (sisVertexPtr)verts;
+
+   mWait3DCmdQueue (MMIO_VERT_REG_COUNT * 1 + 1);
+   MMIO(REG_3D_PrimitiveSet, smesa->dwPrimitiveSet | OP_6326_3D_ATOP); 
+   SIS_MMIO_WRITE_VERTEX(v0, 1, 1);
+}
+#endif
+
+static INLINE void TAG(sis_vert_init)( void )
+{
+   sis_tri_func_mmio[SIS_STATES] = TAG(sis_draw_tri_mmio);
+   sis_line_func_mmio[SIS_STATES] = TAG(sis_draw_line_mmio);
+   sis_point_func_mmio[SIS_STATES] = TAG(sis_draw_point_mmio);
+#if !(SIS_STATES & VERT_UV1)
+   sis_tri_func_mmio[SIS_STATES | VERT_6326] = TAG(sis6326_draw_tri_mmio);
+   sis_line_func_mmio[SIS_STATES | VERT_6326] = TAG(sis6326_draw_line_mmio);
+   sis_point_func_mmio[SIS_STATES | VERT_6326] = TAG(sis6326_draw_point_mmio);
+#endif
+}
+
+#undef TAG
+#undef SIS_STATES
diff --git a/src/mesa/drivers/dri/swrast/Makefile b/src/mesa/drivers/dri/swrast/Makefile
new file mode 100644
index 0000000000..d2cf6dbc55
--- /dev/null
+++ b/src/mesa/drivers/dri/swrast/Makefile
@@ -0,0 +1,26 @@
+# src/mesa/drivers/dri/swrast/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = swrast_dri.so
+
+DRIVER_DEFINES = -D__NOT_HAVE_DRM_H
+
+DRIVER_SOURCES = \
+	swrast.c \
+	swrast_span.c
+
+C_SOURCES = \
+	$(SWRAST_COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+SWRAST_COMMON_SOURCES = \
+	../../common/driverfuncs.c \
+	../common/utils.c \
+	../common/drisw_util.c
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c
new file mode 100644
index 0000000000..f3903c2e38
--- /dev/null
+++ b/src/mesa/drivers/dri/swrast/swrast.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * DRI software rasterizer
+ *
+ * This is the mesa swrast module packaged into a DRI driver structure.
+ *
+ * The front-buffer is allocated by the loader. The loader provides read/write
+ * callbacks for access to the front-buffer. The driver uses a scratch row for
+ * front-buffer rendering to avoid repeated calls to the loader.
+ *
+ * The back-buffer is allocated by the driver and is private.
+ */
+
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/formats.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/renderbuffer.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "vbo/vbo.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "utils.h"
+
+#include "main/teximage.h"
+#include "main/texfetch.h"
+#include "main/texformat.h"
+#include "main/texstate.h"
+
+#include "swrast_priv.h"
+
+
+/**
+ * Screen and config-related functions
+ */
+
+static void swrastSetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+				GLint texture_format, __DRIdrawable *dPriv)
+{
+    struct dri_context *dri_ctx;
+    int x, y, w, h;
+    __DRIscreen *sPriv = dPriv->driScreenPriv;
+    struct gl_texture_unit *texUnit;
+    struct gl_texture_object *texObj;
+    struct gl_texture_image *texImage;
+    uint32_t internalFormat;
+
+    dri_ctx = pDRICtx->driverPrivate;
+
+    internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+
+    texUnit = _mesa_get_current_tex_unit(&dri_ctx->Base);
+    texObj = _mesa_select_tex_object(&dri_ctx->Base, texUnit, target);
+    texImage = _mesa_get_tex_image(&dri_ctx->Base, texObj, target, 0);
+
+    _mesa_lock_texture(&dri_ctx->Base, texObj);
+
+    sPriv->swrast_loader->getDrawableInfo(dPriv, &x, &y, &w, &h, dPriv->loaderPrivate);
+
+    _mesa_init_teximage_fields(&dri_ctx->Base, target, texImage,
+			       w, h, 1, 0, internalFormat);
+
+    if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+	texImage->TexFormat = MESA_FORMAT_XRGB8888;
+    else
+	texImage->TexFormat = MESA_FORMAT_ARGB8888;
+
+    _mesa_set_fetch_functions(texImage, 2);
+
+    sPriv->swrast_loader->getImage(dPriv, x, y, w, h, (char *)texImage->Data,
+				   dPriv->loaderPrivate);
+
+    _mesa_unlock_texture(&dri_ctx->Base, texObj);
+}
+
+static void swrastSetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+			       __DRIdrawable *dPriv)
+{
+    swrastSetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
+
+static const __DRItexBufferExtension swrastTexBufferExtension = {
+    { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
+    swrastSetTexBuffer,
+    swrastSetTexBuffer2,
+};
+
+static const __DRIextension *dri_screen_extensions[] = {
+    &swrastTexBufferExtension.base,
+    NULL
+};
+
+static __DRIconfig **
+swrastFillInModes(__DRIscreen *psp,
+		  unsigned pixel_bits, unsigned depth_bits,
+		  unsigned stencil_bits, GLboolean have_back_buffer)
+{
+    __DRIconfig **configs;
+    unsigned depth_buffer_factor;
+    unsigned back_buffer_factor;
+    GLenum fb_format;
+    GLenum fb_type;
+
+    /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't
+     * support pageflipping at all.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML
+    };
+
+    uint8_t depth_bits_array[4];
+    uint8_t stencil_bits_array[4];
+    uint8_t msaa_samples_array[1];
+
+    depth_bits_array[0] = 0;
+    depth_bits_array[1] = 0;
+    depth_bits_array[2] = depth_bits;
+    depth_bits_array[3] = depth_bits;
+
+    /* Just like with the accumulation buffer, always provide some modes
+     * with a stencil buffer.
+     */
+    stencil_bits_array[0] = 0;
+    stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits;
+    stencil_bits_array[2] = 0;
+    stencil_bits_array[3] = (stencil_bits == 0) ? 8 : stencil_bits;
+
+    msaa_samples_array[0] = 0;
+
+    depth_buffer_factor = 4;
+    back_buffer_factor = 2;
+
+    switch (pixel_bits) {
+    case 8:
+	fb_format = GL_RGB;
+	fb_type = GL_UNSIGNED_BYTE_2_3_3_REV;
+	break;
+    case 16:
+	fb_format = GL_RGB;
+	fb_type = GL_UNSIGNED_SHORT_5_6_5;
+	break;
+    case 24:
+	fb_format = GL_BGR;
+	fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+	break;
+    case 32:
+	fb_format = GL_BGRA;
+	fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+	break;
+    default:
+	fprintf(stderr, "[%s:%u] bad depth %d\n", __func__, __LINE__,
+		pixel_bits);
+	return NULL;
+    }
+
+    configs = driCreateConfigs(fb_format, fb_type,
+			       depth_bits_array, stencil_bits_array,
+			       depth_buffer_factor, back_buffer_modes,
+			       back_buffer_factor, msaa_samples_array, 1,
+			       GL_TRUE);
+    if (configs == NULL) {
+	fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+		__LINE__);
+	return NULL;
+    }
+
+    return configs;
+}
+
+static const __DRIconfig **
+dri_init_screen(__DRIscreen * psp)
+{
+    __DRIconfig **configs8, **configs16, **configs24, **configs32;
+
+    TRACE;
+
+    psp->extensions = dri_screen_extensions;
+
+    configs8  = swrastFillInModes(psp,  8,  8, 0, 1);
+    configs16 = swrastFillInModes(psp, 16, 16, 0, 1);
+    configs24 = swrastFillInModes(psp, 24, 24, 8, 1);
+    configs32 = swrastFillInModes(psp, 32, 24, 8, 1);
+
+    configs16 = driConcatConfigs(configs8, configs16);
+    configs24 = driConcatConfigs(configs16, configs24);
+    configs32 = driConcatConfigs(configs24, configs32);
+
+    return (const __DRIconfig **)configs32;
+}
+
+static void
+dri_destroy_screen(__DRIscreen * sPriv)
+{
+    TRACE;
+}
+
+
+/**
+ * Framebuffer and renderbuffer-related functions.
+ */
+
+static GLuint
+choose_pixel_format(const GLvisual *v)
+{
+    int depth = v->rgbBits;
+
+    if (depth == 32
+	&& v->redMask   == 0xff0000
+	&& v->greenMask == 0x00ff00
+	&& v->blueMask  == 0x0000ff)
+	return PF_A8R8G8B8;
+    else if (depth == 24
+	     && v->redMask   == 0xff0000
+	     && v->greenMask == 0x00ff00
+	     && v->blueMask  == 0x0000ff)
+	return PF_X8R8G8B8;
+    else if (depth == 16
+	     && v->redMask   == 0xf800
+	     && v->greenMask == 0x07e0
+	     && v->blueMask  == 0x001f)
+	return PF_R5G6B5;
+    else if (depth == 8
+	     && v->redMask   == 0x07
+	     && v->greenMask == 0x38
+	     && v->blueMask  == 0xc0)
+	return PF_R3G3B2;
+
+    _mesa_problem( NULL, "unexpected format in %s", __FUNCTION__ );
+    return 0;
+}
+
+static void
+swrast_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+    TRACE;
+
+    free(rb->Data);
+    free(rb);
+}
+
+/* see bytes_per_line in libGL */
+static INLINE int
+bytes_per_line(unsigned pitch_bits, unsigned mul)
+{
+   unsigned mask = mul - 1;
+
+   return ((pitch_bits + mask) & ~mask) / 8;
+}
+
+static GLboolean
+swrast_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+			   GLenum internalFormat, GLuint width, GLuint height)
+{
+    struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+
+    TRACE;
+
+    rb->Data = NULL;
+    rb->Width = width;
+    rb->Height = height;
+
+    xrb->pitch = bytes_per_line(width * xrb->bpp, 32);
+
+    return GL_TRUE;
+}
+
+static GLboolean
+swrast_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+			  GLenum internalFormat, GLuint width, GLuint height)
+{
+    struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+
+    TRACE;
+
+    free(rb->Data);
+
+    swrast_alloc_front_storage(ctx, rb, internalFormat, width, height);
+
+    rb->Data = malloc(height * xrb->pitch);
+
+    return GL_TRUE;
+}
+
+static struct swrast_renderbuffer *
+swrast_new_renderbuffer(const GLvisual *visual, GLboolean front)
+{
+    struct swrast_renderbuffer *xrb = calloc(1, sizeof *xrb);
+    GLuint pixel_format;
+
+    TRACE;
+
+    if (!xrb)
+	return NULL;
+
+    _mesa_init_renderbuffer(&xrb->Base, 0);
+
+    pixel_format = choose_pixel_format(visual);
+
+    xrb->Base.Delete = swrast_delete_renderbuffer;
+    if (front) {
+	xrb->Base.AllocStorage = swrast_alloc_front_storage;
+	swrast_set_span_funcs_front(xrb, pixel_format);
+    }
+    else {
+	xrb->Base.AllocStorage = swrast_alloc_back_storage;
+	swrast_set_span_funcs_back(xrb, pixel_format);
+    }
+
+    switch (pixel_format) {
+    case PF_A8R8G8B8:
+	xrb->Base.Format = MESA_FORMAT_ARGB8888;
+	xrb->Base.InternalFormat = GL_RGBA;
+	xrb->Base._BaseFormat = GL_RGBA;
+	xrb->Base.DataType = GL_UNSIGNED_BYTE;
+	xrb->bpp = 32;
+	break;
+    case PF_X8R8G8B8:
+	xrb->Base.Format = MESA_FORMAT_ARGB8888; /* XXX */
+	xrb->Base.InternalFormat = GL_RGB;
+	xrb->Base._BaseFormat = GL_RGB;
+	xrb->Base.DataType = GL_UNSIGNED_BYTE;
+	xrb->bpp = 32;
+	break;
+    case PF_R5G6B5:
+	xrb->Base.Format = MESA_FORMAT_RGB565;
+	xrb->Base.InternalFormat = GL_RGB;
+	xrb->Base._BaseFormat = GL_RGB;
+	xrb->Base.DataType = GL_UNSIGNED_BYTE;
+	xrb->bpp = 16;
+	break;
+    case PF_R3G3B2:
+	xrb->Base.Format = MESA_FORMAT_RGB332;
+	xrb->Base.InternalFormat = GL_RGB;
+	xrb->Base._BaseFormat = GL_RGB;
+	xrb->Base.DataType = GL_UNSIGNED_BYTE;
+	xrb->bpp = 8;
+	break;
+    default:
+	return NULL;
+    }
+
+    return xrb;
+}
+
+static GLboolean
+dri_create_buffer(__DRIscreen * sPriv,
+		  __DRIdrawable * dPriv,
+		  const __GLcontextModes * visual, GLboolean isPixmap)
+{
+    struct dri_drawable *drawable = NULL;
+    GLframebuffer *fb;
+    struct swrast_renderbuffer *frontrb, *backrb;
+
+    TRACE;
+
+    drawable = CALLOC_STRUCT(dri_drawable);
+    if (drawable == NULL)
+	goto drawable_fail;
+
+    dPriv->driverPrivate = drawable;
+    drawable->dPriv = dPriv;
+
+    drawable->row = malloc(MAX_WIDTH * 4);
+    if (drawable->row == NULL)
+	goto drawable_fail;
+
+    fb = &drawable->Base;
+
+    /* basic framebuffer setup */
+    _mesa_initialize_window_framebuffer(fb, visual);
+
+    /* add front renderbuffer */
+    frontrb = swrast_new_renderbuffer(visual, GL_TRUE);
+    _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontrb->Base);
+
+    /* add back renderbuffer */
+    if (visual->doubleBufferMode) {
+	backrb = swrast_new_renderbuffer(visual, GL_FALSE);
+	_mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backrb->Base);
+    }
+
+    /* add software renderbuffers */
+    _mesa_add_soft_renderbuffers(fb,
+				 GL_FALSE, /* color */
+				 visual->haveDepthBuffer,
+				 visual->haveStencilBuffer,
+				 visual->haveAccumBuffer,
+				 GL_FALSE, /* alpha */
+				 GL_FALSE /* aux bufs */);
+
+    return GL_TRUE;
+
+drawable_fail:
+
+    if (drawable)
+	free(drawable->row);
+
+    FREE(drawable);
+
+    return GL_FALSE;
+}
+
+static void
+dri_destroy_buffer(__DRIdrawable * dPriv)
+{
+    TRACE;
+
+    if (dPriv) {
+	struct dri_drawable *drawable = dri_drawable(dPriv);
+	GLframebuffer *fb;
+
+	free(drawable->row);
+
+	fb = &drawable->Base;
+
+	fb->DeletePending = GL_TRUE;
+	_mesa_reference_framebuffer(&fb, NULL);
+    }
+}
+
+static void
+dri_swap_buffers(__DRIdrawable * dPriv)
+{
+    __DRIscreen *sPriv = dPriv->driScreenPriv;
+
+    GET_CURRENT_CONTEXT(ctx);
+
+    struct dri_drawable *drawable = dri_drawable(dPriv);
+    GLframebuffer *fb;
+    struct swrast_renderbuffer *frontrb, *backrb;
+
+    TRACE;
+
+    fb = &drawable->Base;
+
+    frontrb =
+	swrast_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
+    backrb =
+	swrast_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
+
+    /* check for signle-buffered */
+    if (backrb == NULL)
+	return;
+
+    /* check if swapping currently bound buffer */
+    if (ctx && ctx->DrawBuffer == fb) {
+	/* flush pending rendering */
+	_mesa_notifySwapBuffers(ctx);
+    }
+
+    sPriv->swrast_loader->putImage(dPriv, __DRI_SWRAST_IMAGE_OP_SWAP,
+				   0, 0,
+				   frontrb->Base.Width,
+				   frontrb->Base.Height,
+				   backrb->Base.Data,
+				   dPriv->loaderPrivate);
+}
+
+
+/**
+ * General device driver functions.
+ */
+
+static void
+get_window_size( GLframebuffer *fb, GLsizei *w, GLsizei *h )
+{
+    __DRIdrawable *dPriv = swrast_drawable(fb)->dPriv;
+    __DRIscreen *sPriv = dPriv->driScreenPriv;
+    int x, y;
+
+    sPriv->swrast_loader->getDrawableInfo(dPriv,
+					  &x, &y, w, h,
+					  dPriv->loaderPrivate);
+}
+
+static void
+swrast_check_and_update_window_size( GLcontext *ctx, GLframebuffer *fb )
+{
+    GLsizei width, height;
+
+    get_window_size(fb, &width, &height);
+    if (fb->Width != width || fb->Height != height) {
+	_mesa_resize_framebuffer(ctx, fb, width, height);
+    }
+}
+
+static const GLubyte *
+get_string(GLcontext *ctx, GLenum pname)
+{
+    (void) ctx;
+    switch (pname) {
+	case GL_VENDOR:
+	    return (const GLubyte *) "Mesa Project";
+	case GL_RENDERER:
+	    return (const GLubyte *) "Software Rasterizer";
+	default:
+	    return NULL;
+    }
+}
+
+static void
+update_state( GLcontext *ctx, GLuint new_state )
+{
+    /* not much to do here - pass it on */
+    _swrast_InvalidateState( ctx, new_state );
+    _swsetup_InvalidateState( ctx, new_state );
+    _vbo_InvalidateState( ctx, new_state );
+    _tnl_InvalidateState( ctx, new_state );
+}
+
+static void
+viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+    GLframebuffer *draw = ctx->WinSysDrawBuffer;
+    GLframebuffer *read = ctx->WinSysReadBuffer;
+
+    swrast_check_and_update_window_size(ctx, draw);
+    swrast_check_and_update_window_size(ctx, read);
+}
+
+static gl_format swrastChooseTextureFormat(GLcontext * ctx,
+					   GLint internalFormat,
+					   GLenum format,
+					   GLenum type)
+{
+    if (internalFormat == GL_RGB)
+	return MESA_FORMAT_XRGB8888;
+    return _mesa_choose_tex_format(ctx, internalFormat, format, type);
+}
+
+static void
+swrast_init_driver_functions(struct dd_function_table *driver)
+{
+    driver->GetString = get_string;
+    driver->UpdateState = update_state;
+    driver->GetBufferSize = NULL;
+    driver->Viewport = viewport;
+    driver->ChooseTextureFormat = swrastChooseTextureFormat;
+}
+
+
+/**
+ * Context-related functions.
+ */
+
+static GLboolean
+dri_create_context(gl_api api,
+		   const __GLcontextModes * visual,
+		   __DRIcontext * cPriv, void *sharedContextPrivate)
+{
+    struct dri_context *ctx = NULL;
+    struct dri_context *share = (struct dri_context *)sharedContextPrivate;
+    GLcontext *mesaCtx = NULL;
+    GLcontext *sharedCtx = NULL;
+    struct dd_function_table functions;
+
+    TRACE;
+
+    ctx = CALLOC_STRUCT(dri_context);
+    if (ctx == NULL)
+	goto context_fail;
+
+    cPriv->driverPrivate = ctx;
+    ctx->cPriv = cPriv;
+
+    /* build table of device driver functions */
+    _mesa_init_driver_functions(&functions);
+    swrast_init_driver_functions(&functions);
+
+    if (share) {
+	sharedCtx = &share->Base;
+    }
+
+    mesaCtx = &ctx->Base;
+
+    /* basic context setup */
+    if (!_mesa_initialize_context(mesaCtx, visual, sharedCtx, &functions, (void *) cPriv)) {
+	goto context_fail;
+    }
+
+    /* do bounds checking to prevent segfaults and server crashes! */
+    mesaCtx->Const.CheckArrayBounds = GL_TRUE;
+
+    /* create module contexts */
+    _swrast_CreateContext( mesaCtx );
+    _vbo_CreateContext( mesaCtx );
+    _tnl_CreateContext( mesaCtx );
+    _swsetup_CreateContext( mesaCtx );
+    _swsetup_Wakeup( mesaCtx );
+
+    /* use default TCL pipeline */
+    {
+       TNLcontext *tnl = TNL_CONTEXT(mesaCtx);
+       tnl->Driver.RunPipeline = _tnl_run_pipeline;
+    }
+
+    _mesa_enable_sw_extensions(mesaCtx);
+    _mesa_enable_1_3_extensions(mesaCtx);
+    _mesa_enable_1_4_extensions(mesaCtx);
+    _mesa_enable_1_5_extensions(mesaCtx);
+    _mesa_enable_2_0_extensions(mesaCtx);
+    _mesa_enable_2_1_extensions(mesaCtx);
+
+    _mesa_meta_init(mesaCtx);
+
+    driInitExtensions( mesaCtx, NULL, GL_FALSE );
+
+    return GL_TRUE;
+
+context_fail:
+
+    FREE(ctx);
+
+    return GL_FALSE;
+}
+
+static void
+dri_destroy_context(__DRIcontext * cPriv)
+{
+    TRACE;
+
+    if (cPriv) {
+	struct dri_context *ctx = dri_context(cPriv);
+	GLcontext *mesaCtx;
+
+	mesaCtx = &ctx->Base;
+
+        _mesa_meta_free(mesaCtx);
+	_swsetup_DestroyContext( mesaCtx );
+	_swrast_DestroyContext( mesaCtx );
+	_tnl_DestroyContext( mesaCtx );
+	_vbo_DestroyContext( mesaCtx );
+	_mesa_destroy_context( mesaCtx );
+    }
+}
+
+static GLboolean
+dri_make_current(__DRIcontext * cPriv,
+		 __DRIdrawable * driDrawPriv,
+		 __DRIdrawable * driReadPriv)
+{
+    GLcontext *mesaCtx;
+    GLframebuffer *mesaDraw;
+    GLframebuffer *mesaRead;
+    TRACE;
+
+    if (cPriv) {
+	struct dri_context *ctx = dri_context(cPriv);
+	struct dri_drawable *draw;
+	struct dri_drawable *read;
+
+	if (!driDrawPriv || !driReadPriv)
+	    return GL_FALSE;
+
+	draw = dri_drawable(driDrawPriv);
+	read = dri_drawable(driReadPriv);
+	mesaCtx = &ctx->Base;
+	mesaDraw = &draw->Base;
+	mesaRead = &read->Base;
+
+	/* check for same context and buffer */
+	if (mesaCtx == _mesa_get_current_context()
+	    && mesaCtx->DrawBuffer == mesaDraw
+	    && mesaCtx->ReadBuffer == mesaRead) {
+	    return GL_TRUE;
+	}
+
+	_glapi_check_multithread();
+
+	swrast_check_and_update_window_size(mesaCtx, mesaDraw);
+	if (mesaRead != mesaDraw)
+	    swrast_check_and_update_window_size(mesaCtx, mesaRead);
+
+	_mesa_make_current( mesaCtx,
+			    mesaDraw,
+			    mesaRead );
+    }
+    else {
+	/* unbind */
+	_mesa_make_current( NULL, NULL, NULL );
+    }
+
+    return GL_TRUE;
+}
+
+static GLboolean
+dri_unbind_context(__DRIcontext * cPriv)
+{
+    TRACE;
+    (void) cPriv;
+    return GL_TRUE;
+}
+
+
+const struct __DriverAPIRec driDriverAPI = {
+    .InitScreen = dri_init_screen,
+    .DestroyScreen = dri_destroy_screen,
+    .CreateContext = dri_create_context,
+    .DestroyContext = dri_destroy_context,
+    .CreateBuffer = dri_create_buffer,
+    .DestroyBuffer = dri_destroy_buffer,
+    .SwapBuffers = dri_swap_buffers,
+    .MakeCurrent = dri_make_current,
+    .UnbindContext = dri_unbind_context,
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driSWRastExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/swrast/swrast_priv.h b/src/mesa/drivers/dri/swrast/swrast_priv.h
new file mode 100644
index 0000000000..6679061a98
--- /dev/null
+++ b/src/mesa/drivers/dri/swrast/swrast_priv.h
@@ -0,0 +1,138 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _SWRAST_PRIV_H
+#define _SWRAST_PRIV_H
+
+#include <GL/gl.h>
+#include <GL/internal/dri_interface.h>
+#include "main/mtypes.h"
+#include "drisw_util.h"
+
+
+/**
+ * Debugging
+ */
+#define DEBUG_CORE	0
+#define DEBUG_SPAN	0
+
+#if DEBUG_CORE
+#define TRACE printf("--> %s\n", __FUNCTION__)
+#else
+#define TRACE
+#endif
+
+#if DEBUG_SPAN
+#define TRACE_SPAN printf("--> %s\n", __FUNCTION__)
+#else
+#define TRACE_SPAN
+#endif
+
+
+/**
+ * Data types
+ */
+struct dri_context
+{
+    /* mesa, base class, must be first */
+    GLcontext Base;
+
+    /* dri */
+    __DRIcontext *cPriv;
+};
+
+static INLINE struct dri_context *
+dri_context(__DRIcontext * driContextPriv)
+{
+    return (struct dri_context *)driContextPriv->driverPrivate;
+}
+
+static INLINE struct dri_context *
+swrast_context(GLcontext *ctx)
+{
+    return (struct dri_context *) ctx;
+}
+
+struct dri_drawable
+{
+    /* mesa, base class, must be first */
+    GLframebuffer Base;
+
+    /* dri */
+    __DRIdrawable *dPriv;
+
+    /* scratch row for optimized front-buffer rendering */
+    char *row;
+};
+
+static INLINE struct dri_drawable *
+dri_drawable(__DRIdrawable * driDrawPriv)
+{
+    return (struct dri_drawable *)driDrawPriv->driverPrivate;
+}
+
+static INLINE struct dri_drawable *
+swrast_drawable(GLframebuffer *fb)
+{
+    return (struct dri_drawable *) fb;
+}
+
+struct swrast_renderbuffer {
+    struct gl_renderbuffer Base;
+
+    /* renderbuffer pitch (in bytes) */
+    GLuint pitch;
+   /* bits per pixel of storage */
+    GLuint bpp;
+};
+
+static INLINE struct swrast_renderbuffer *
+swrast_renderbuffer(struct gl_renderbuffer *rb)
+{
+    return (struct swrast_renderbuffer *) rb;
+}
+
+
+/**
+ * Pixel formats we support
+ */
+#define PF_A8R8G8B8   1		/**< 32bpp TrueColor:  8-A, 8-R, 8-G, 8-B bits */
+#define PF_R5G6B5     2		/**< 16bpp TrueColor:  5-R, 6-G, 5-B bits */
+#define PF_R3G3B2     3		/**<  8bpp TrueColor:  3-R, 3-G, 2-B bits */
+#define PF_X8R8G8B8   4		/**< 32bpp TrueColor:  8-R, 8-G, 8-B bits */
+
+
+/* swrast_span.c */
+
+extern void
+swrast_set_span_funcs_back(struct swrast_renderbuffer *xrb,
+			   GLuint pixel_format);
+
+extern void
+swrast_set_span_funcs_front(struct swrast_renderbuffer *xrb,
+			    GLuint pixel_format);
+
+#endif /* _SWRAST_PRIV_H_ */
diff --git a/src/mesa/drivers/dri/swrast/swrast_span.c b/src/mesa/drivers/dri/swrast/swrast_span.c
new file mode 100644
index 0000000000..c7d0bfdac7
--- /dev/null
+++ b/src/mesa/drivers/dri/swrast/swrast_span.c
@@ -0,0 +1,371 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright 2008, 2010 George Sapountzis <gsapountzis@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "swrast_priv.h"
+
+#define YFLIP(_xrb, Y) ((_xrb)->Base.Height - (Y) - 1)
+
+/*
+ * Dithering support takes the "computation" extreme in the "computation vs.
+ * storage" trade-off. This approach is very simple to implement and any
+ * computational overhead should be acceptable. XMesa uses table lookups for
+ * around 8KB of storage overhead per visual.
+ */
+#define DITHER 1
+
+static const GLubyte kernel[16] = {
+    0*16,  8*16,  2*16, 10*16,
+   12*16,  4*16, 14*16,  6*16,
+    3*16, 11*16,  1*16,  9*16,
+   15*16,  7*16, 13*16,  5*16,
+};
+
+#if DITHER
+#define DITHER_COMP(X, Y) kernel[((X) & 0x3) | (((Y) & 0x3) << 2)]
+
+#define DITHER_CLAMP(X) (((X) < CHAN_MAX) ? (X) : CHAN_MAX)
+#else
+#define DITHER_COMP(X, Y) 0
+
+#define DITHER_CLAMP(X) (X)
+#endif
+
+
+/*
+ * Pixel macros shared across front/back buffer span functions.
+ */
+
+/* 32-bit BGRA */
+#define STORE_PIXEL_A8R8G8B8(DST, X, Y, VALUE) \
+   *DST = VALUE[ACOMP] << 24 | VALUE[RCOMP] << 16 | VALUE[GCOMP] << 8 | VALUE[BCOMP]
+#define STORE_PIXEL_RGB_A8R8G8B8(DST, X, Y, VALUE) \
+   *DST = 0xff << 24 | VALUE[RCOMP] << 16 | VALUE[GCOMP] << 8 | VALUE[BCOMP]
+#define FETCH_PIXEL_A8R8G8B8(DST, SRC) \
+   DST[ACOMP] = *SRC >> 24;            \
+   DST[RCOMP] = (*SRC >> 16) & 0xff;   \
+   DST[GCOMP] = (*SRC >> 8) & 0xff;    \
+   DST[BCOMP] = *SRC & 0xff
+
+
+/* 32-bit BGRX */
+#define STORE_PIXEL_X8R8G8B8(DST, X, Y, VALUE) \
+   *DST = 0xff << 24 | VALUE[RCOMP] << 16 | VALUE[GCOMP] << 8 | VALUE[BCOMP]
+#define STORE_PIXEL_RGB_X8R8G8B8(DST, X, Y, VALUE) \
+   *DST = 0xff << 24 | VALUE[RCOMP] << 16 | VALUE[GCOMP] << 8 | VALUE[BCOMP]
+#define FETCH_PIXEL_X8R8G8B8(DST, SRC) \
+   DST[ACOMP] = 0xff;                  \
+   DST[RCOMP] = (*SRC >> 16) & 0xff;   \
+   DST[GCOMP] = (*SRC >> 8) & 0xff;    \
+   DST[BCOMP] = *SRC & 0xff
+
+
+/* 16-bit BGR */
+#define STORE_PIXEL_R5G6B5(DST, X, Y, VALUE) \
+   do { \
+   int d = DITHER_COMP(X, Y) >> 6; \
+   *DST = ( ((DITHER_CLAMP((VALUE[RCOMP]) + d) & 0xf8) << 8) | \
+            ((DITHER_CLAMP((VALUE[GCOMP]) + d) & 0xfc) << 3) | \
+            ((DITHER_CLAMP((VALUE[BCOMP]) + d) & 0xf8) >> 3) ); \
+   } while(0)
+#define FETCH_PIXEL_R5G6B5(DST, SRC) \
+   do { \
+   DST[ACOMP] = 0xff; \
+   DST[RCOMP] = ((*SRC >> 8) & 0xf8) * 255 / 0xf8; \
+   DST[GCOMP] = ((*SRC >> 3) & 0xfc) * 255 / 0xfc; \
+   DST[BCOMP] = ((*SRC << 3) & 0xf8) * 255 / 0xf8; \
+   } while(0)
+
+
+/* 8-bit BGR */
+#define STORE_PIXEL_R3G3B2(DST, X, Y, VALUE) \
+   do { \
+   int d = DITHER_COMP(X, Y) >> 3; \
+   GLubyte *p = (GLubyte *)DST; \
+   *p = ( ((DITHER_CLAMP((VALUE[RCOMP]) + d) & 0xe0) >> 5) | \
+	  ((DITHER_CLAMP((VALUE[GCOMP]) + d) & 0xe0) >> 2) | \
+	  ((DITHER_CLAMP((VALUE[BCOMP]) + d) & 0xc0) >> 0) ); \
+   } while(0)
+#define FETCH_PIXEL_R3G3B2(DST, SRC) \
+   do { \
+   GLubyte p = *(GLubyte *)SRC; \
+   DST[ACOMP] = 0xff; \
+   DST[RCOMP] = ((p << 5) & 0xe0) * 255 / 0xe0; \
+   DST[GCOMP] = ((p << 2) & 0xe0) * 255 / 0xe0; \
+   DST[BCOMP] = ((p << 0) & 0xc0) * 255 / 0xc0; \
+   } while(0)
+
+
+/*
+ * Generate code for back-buffer span functions.
+ */
+
+/* 32-bit BGRA */
+#define NAME(FUNC) FUNC##_A8R8G8B8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLuint *P = (GLuint *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch / 4 + (X)
+#define INC_PIXEL_PTR(P) P++
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_A8R8G8B8(DST, X, Y, VALUE)
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   STORE_PIXEL_RGB_A8R8G8B8(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_A8R8G8B8(DST, SRC)
+
+#include "swrast/s_spantemp.h"
+
+
+/* 32-bit BGRX */
+#define NAME(FUNC) FUNC##_X8R8G8B8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLuint *P = (GLuint *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch / 4 + (X);
+#define INC_PIXEL_PTR(P) P++
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_X8R8G8B8(DST, X, Y, VALUE)
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   STORE_PIXEL_RGB_X8R8G8B8(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_X8R8G8B8(DST, SRC)
+
+#include "swrast/s_spantemp.h"
+
+
+/* 16-bit BGR */
+#define NAME(FUNC) FUNC##_R5G6B5
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch / 2 + (X);
+#define INC_PIXEL_PTR(P) P++
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_R5G6B5(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_R5G6B5(DST, SRC)
+
+#include "swrast/s_spantemp.h"
+
+
+/* 8-bit BGR */
+#define NAME(FUNC) FUNC##_R3G3B2
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = (GLubyte *)xrb->Base.Data + YFLIP(xrb, Y) * xrb->pitch + (X) * 1;
+#define INC_PIXEL_PTR(P) P += 1
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_R3G3B2(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_R3G3B2(DST, SRC)
+
+#include "swrast/s_spantemp.h"
+
+
+/*
+ * Generate code for front-buffer span functions.
+ */
+
+/* 32-bit BGRA */
+#define NAME(FUNC) FUNC##_A8R8G8B8_front
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLuint *P = (GLuint *)row;
+#define INC_PIXEL_PTR(P) P++
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_A8R8G8B8(DST, X, Y, VALUE)
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   STORE_PIXEL_RGB_A8R8G8B8(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_A8R8G8B8(DST, SRC)
+
+#include "swrast_spantemp.h"
+
+
+/* 32-bit BGRX */
+#define NAME(FUNC) FUNC##_X8R8G8B8_front
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLuint *P = (GLuint *)row;
+#define INC_PIXEL_PTR(P) P++
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_X8R8G8B8(DST, X, Y, VALUE)
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   STORE_PIXEL_RGB_X8R8G8B8(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_X8R8G8B8(DST, SRC)
+
+#include "swrast_spantemp.h"
+
+
+/* 16-bit BGR */
+#define NAME(FUNC) FUNC##_R5G6B5_front
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *)row;
+#define INC_PIXEL_PTR(P) P++
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_R5G6B5(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_R5G6B5(DST, SRC)
+
+#include "swrast_spantemp.h"
+
+
+/* 8-bit BGR */
+#define NAME(FUNC) FUNC##_R3G3B2_front
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct swrast_renderbuffer *xrb = swrast_renderbuffer(rb);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = (GLubyte *)row;
+#define INC_PIXEL_PTR(P) P += 1
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   STORE_PIXEL_R3G3B2(DST, X, Y, VALUE)
+#define FETCH_PIXEL(DST, SRC) \
+   FETCH_PIXEL_R3G3B2(DST, SRC)
+
+#include "swrast_spantemp.h"
+
+
+/*
+ * Back-buffers are malloced memory and always private.
+ *
+ * BACK_PIXMAP (not supported)
+ * BACK_XIMAGE
+ */
+void
+swrast_set_span_funcs_back(struct swrast_renderbuffer *xrb,
+			   GLuint pixel_format)
+{
+    switch (pixel_format) {
+    case PF_A8R8G8B8:
+	xrb->Base.GetRow = get_row_A8R8G8B8;
+	xrb->Base.GetValues = get_values_A8R8G8B8;
+	xrb->Base.PutRow = put_row_A8R8G8B8;
+	xrb->Base.PutRowRGB = put_row_rgb_A8R8G8B8;
+	xrb->Base.PutMonoRow = put_mono_row_A8R8G8B8;
+	xrb->Base.PutValues = put_values_A8R8G8B8;
+	xrb->Base.PutMonoValues = put_mono_values_A8R8G8B8;
+	break;
+    case PF_X8R8G8B8:
+	xrb->Base.GetRow = get_row_X8R8G8B8;
+	xrb->Base.GetValues = get_values_X8R8G8B8;
+	xrb->Base.PutRow = put_row_X8R8G8B8;
+	xrb->Base.PutRowRGB = put_row_rgb_X8R8G8B8;
+	xrb->Base.PutMonoRow = put_mono_row_X8R8G8B8;
+	xrb->Base.PutValues = put_values_X8R8G8B8;
+	xrb->Base.PutMonoValues = put_mono_values_X8R8G8B8;
+	break;
+    case PF_R5G6B5:
+	xrb->Base.GetRow = get_row_R5G6B5;
+	xrb->Base.GetValues = get_values_R5G6B5;
+	xrb->Base.PutRow = put_row_R5G6B5;
+	xrb->Base.PutRowRGB = put_row_rgb_R5G6B5;
+	xrb->Base.PutMonoRow = put_mono_row_R5G6B5;
+	xrb->Base.PutValues = put_values_R5G6B5;
+	xrb->Base.PutMonoValues = put_mono_values_R5G6B5;
+	break;
+    case PF_R3G3B2:
+	xrb->Base.GetRow = get_row_R3G3B2;
+	xrb->Base.GetValues = get_values_R3G3B2;
+	xrb->Base.PutRow = put_row_R3G3B2;
+	xrb->Base.PutRowRGB = put_row_rgb_R3G3B2;
+	xrb->Base.PutMonoRow = put_mono_row_R3G3B2;
+	xrb->Base.PutValues = put_values_R3G3B2;
+	xrb->Base.PutMonoValues = put_mono_values_R3G3B2;
+	break;
+    default:
+	assert(0);
+	return;
+    }
+}
+
+
+/*
+ * Front-buffers are provided by the loader, the xorg loader uses pixmaps.
+ *
+ * WINDOW,          An X window
+ * GLXWINDOW,       GLX window
+ * PIXMAP,          GLX pixmap
+ * PBUFFER          GLX Pbuffer
+ */
+void
+swrast_set_span_funcs_front(struct swrast_renderbuffer *xrb,
+			    GLuint pixel_format)
+{
+    switch (pixel_format) {
+    case PF_A8R8G8B8:
+	xrb->Base.GetRow = get_row_A8R8G8B8_front;
+	xrb->Base.GetValues = get_values_A8R8G8B8_front;
+	xrb->Base.PutRow = put_row_A8R8G8B8_front;
+	xrb->Base.PutRowRGB = put_row_rgb_A8R8G8B8_front;
+	xrb->Base.PutMonoRow = put_mono_row_A8R8G8B8_front;
+	xrb->Base.PutValues = put_values_A8R8G8B8_front;
+	xrb->Base.PutMonoValues = put_mono_values_A8R8G8B8_front;
+	break;
+    case PF_X8R8G8B8:
+	xrb->Base.GetRow = get_row_X8R8G8B8_front;
+	xrb->Base.GetValues = get_values_X8R8G8B8_front;
+	xrb->Base.PutRow = put_row_X8R8G8B8_front;
+	xrb->Base.PutRowRGB = put_row_rgb_X8R8G8B8_front;
+	xrb->Base.PutMonoRow = put_mono_row_X8R8G8B8_front;
+	xrb->Base.PutValues = put_values_X8R8G8B8_front;
+	xrb->Base.PutMonoValues = put_mono_values_X8R8G8B8_front;
+	break;
+    case PF_R5G6B5:
+	xrb->Base.GetRow = get_row_R5G6B5_front;
+	xrb->Base.GetValues = get_values_R5G6B5_front;
+	xrb->Base.PutRow = put_row_R5G6B5_front;
+	xrb->Base.PutRowRGB = put_row_rgb_R5G6B5_front;
+	xrb->Base.PutMonoRow = put_mono_row_R5G6B5_front;
+	xrb->Base.PutValues = put_values_R5G6B5_front;
+	xrb->Base.PutMonoValues = put_mono_values_R5G6B5_front;
+	break;
+    case PF_R3G3B2:
+	xrb->Base.GetRow = get_row_R3G3B2_front;
+	xrb->Base.GetValues = get_values_R3G3B2_front;
+	xrb->Base.PutRow = put_row_R3G3B2_front;
+	xrb->Base.PutRowRGB = put_row_rgb_R3G3B2_front;
+	xrb->Base.PutMonoRow = put_mono_row_R3G3B2_front;
+	xrb->Base.PutValues = put_values_R3G3B2_front;
+	xrb->Base.PutMonoValues = put_mono_values_R3G3B2_front;
+	break;
+    default:
+	assert(0);
+	return;
+    }
+}
diff --git a/src/mesa/drivers/dri/swrast/swrast_spantemp.h b/src/mesa/drivers/dri/swrast/swrast_spantemp.h
new file mode 100644
index 0000000000..1e9405eebf
--- /dev/null
+++ b/src/mesa/drivers/dri/swrast/swrast_spantemp.h
@@ -0,0 +1,319 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Modified version of swrast/s_spantemp.h for front-buffer rendering. The
+ * no-mask paths use a scratch row to avoid repeated calls to the loader.
+ *
+ * For the mask paths we always use an array of 4 elements of RB_TYPE. This is
+ * to satisfy the xorg loader requirement of an image pitch of 32 bits and
+ * should be ok for other loaders also.
+ */
+
+
+#ifndef _SWRAST_SPANTEMP_ONCE
+#define _SWRAST_SPANTEMP_ONCE
+
+static INLINE void
+PUT_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLvoid *p )
+{
+    __DRIcontext *ctx = swrast_context(glCtx)->cPriv;
+    __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer)->dPriv;
+
+    __DRIscreen *screen = ctx->driScreenPriv;
+
+    screen->swrast_loader->putImage(draw, __DRI_SWRAST_IMAGE_OP_DRAW,
+				    x, y, 1, 1, (char *)p,
+				    draw->loaderPrivate);
+}
+
+
+static INLINE void
+GET_PIXEL( GLcontext *glCtx, GLint x, GLint y, GLubyte *p )
+{
+    __DRIcontext *ctx = swrast_context(glCtx)->cPriv;
+    __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer)->dPriv;
+
+    __DRIscreen *screen = ctx->driScreenPriv;
+
+    screen->swrast_loader->getImage(read, x, y, 1, 1, (char *)p,
+				    read->loaderPrivate);
+}
+
+static INLINE void
+PUT_ROW( GLcontext *glCtx, GLint x, GLint y, GLuint n, char *row )
+{
+    __DRIcontext *ctx = swrast_context(glCtx)->cPriv;
+    __DRIdrawable *draw = swrast_drawable(glCtx->DrawBuffer)->dPriv;
+
+    __DRIscreen *screen = ctx->driScreenPriv;
+
+    screen->swrast_loader->putImage(draw, __DRI_SWRAST_IMAGE_OP_DRAW,
+				    x, y, n, 1, row,
+				    draw->loaderPrivate);
+}
+
+static INLINE void
+GET_ROW( GLcontext *glCtx, GLint x, GLint y, GLuint n, char *row )
+{
+    __DRIcontext *ctx = swrast_context(glCtx)->cPriv;
+    __DRIdrawable *read = swrast_drawable(glCtx->ReadBuffer)->dPriv;
+
+    __DRIscreen *screen = ctx->driScreenPriv;
+
+    screen->swrast_loader->getImage(read, x, y, n, 1, row,
+				    read->loaderPrivate);
+}
+
+#endif /* _SWRAST_SPANTEMP_ONCE */
+
+
+/*
+ * Templates for the span/pixel-array write/read functions called via
+ * the gl_renderbuffer's GetRow, GetValues, PutRow, PutMonoRow, PutValues
+ * and PutMonoValues functions.
+ *
+ * Define the following macros before including this file:
+ *   NAME(BASE)  to generate the function name (i.e. add prefix or suffix)
+ *   RB_TYPE  the renderbuffer DataType
+ *   SPAN_VARS  to declare any local variables
+ *   INIT_PIXEL_PTR(P, X, Y)  to initialize a pointer to a pixel
+ *   INC_PIXEL_PTR(P)  to increment a pixel pointer by one pixel
+ *   STORE_PIXEL(DST, X, Y, VALUE)  to store pixel values in buffer
+ *   FETCH_PIXEL(DST, SRC)  to fetch pixel values from buffer
+ *
+ * Note that in the STORE_PIXEL macros, we also pass in the (X,Y) coordinates
+ * for the pixels to be stored.  This is useful when dithering and probably
+ * ignored otherwise.
+ */
+
+#include "main/macros.h"
+
+
+#if !defined(RB_COMPONENTS)
+#define RB_COMPONENTS 4
+#endif
+
+
+static void
+NAME(get_row)( GLcontext *ctx, struct gl_renderbuffer *rb,
+               GLuint count, GLint x, GLint y, void *values )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   RB_TYPE (*dest)[RB_COMPONENTS] = (RB_TYPE (*)[RB_COMPONENTS]) values;
+   GLuint i;
+   char *row = swrast_drawable(ctx->ReadBuffer)->row;
+   INIT_PIXEL_PTR(pixel, x, y);
+   GET_ROW( ctx, x, YFLIP(xrb, y), count, row );
+   for (i = 0; i < count; i++) {
+      FETCH_PIXEL(dest[i], pixel);
+      INC_PIXEL_PTR(pixel);
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(get_values)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                  GLuint count, const GLint x[], const GLint y[], void *values )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   RB_TYPE (*dest)[RB_COMPONENTS] = (RB_TYPE (*)[RB_COMPONENTS]) values;
+   GLuint i;
+   for (i = 0; i < count; i++) {
+      RB_TYPE pixel[4];
+      GET_PIXEL(ctx, x[i], YFLIP(xrb, y[i]), pixel);
+      FETCH_PIXEL(dest[i], pixel);
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_row)( GLcontext *ctx, struct gl_renderbuffer *rb,
+               GLuint count, GLint x, GLint y,
+               const void *values, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE (*src)[RB_COMPONENTS] = (const RB_TYPE (*)[RB_COMPONENTS]) values;
+   GLuint i;
+   if (mask) {
+      for (i = 0; i < count; i++) {
+         if (mask[i]) {
+            RB_TYPE row[4];
+            INIT_PIXEL_PTR(pixel, x, y);
+            STORE_PIXEL(pixel, x + i, y, src[i]);
+            PUT_PIXEL(ctx, x + i, YFLIP(xrb, y), pixel);
+         }
+      }
+   }
+   else {
+      char *row = swrast_drawable(ctx->DrawBuffer)->row;
+      INIT_PIXEL_PTR(pixel, x, y);
+      for (i = 0; i < count; i++) {
+         STORE_PIXEL(pixel, x + i, y, src[i]);
+         INC_PIXEL_PTR(pixel);
+      }
+      PUT_ROW( ctx, x, YFLIP(xrb, y), count, row );
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_row_rgb)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                   GLuint count, GLint x, GLint y,
+                   const void *values, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE (*src)[3] = (const RB_TYPE (*)[3]) values;
+   GLuint i;
+   if (mask) {
+      for (i = 0; i < count; i++) {
+         if (mask[i]) {
+            RB_TYPE row[4];
+            INIT_PIXEL_PTR(pixel, x, y);
+#ifdef STORE_PIXEL_RGB
+            STORE_PIXEL_RGB(pixel, x + i, y, src[i]);
+#else
+            STORE_PIXEL(pixel, x + i, y, src[i]);
+#endif
+            PUT_PIXEL(ctx, x + i, YFLIP(xrb, y), pixel);
+         }
+      }
+   }
+   else {
+      char *row = swrast_drawable(ctx->DrawBuffer)->row;
+      INIT_PIXEL_PTR(pixel, x, y);
+      for (i = 0; i < count; i++) {
+#ifdef STORE_PIXEL_RGB
+         STORE_PIXEL_RGB(pixel, x + i, y, src[i]);
+#else
+         STORE_PIXEL(pixel, x + i, y, src[i]);
+#endif
+         INC_PIXEL_PTR(pixel);
+      }
+      PUT_ROW( ctx, x, YFLIP(xrb, y), count, row );
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_mono_row)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                    GLuint count, GLint x, GLint y,
+                    const void *value, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE *src = (const RB_TYPE *) value;
+   GLuint i;
+   if (mask) {
+      for (i = 0; i < count; i++) {
+         if (mask[i]) {
+            RB_TYPE row[4];
+            INIT_PIXEL_PTR(pixel, x, y);
+            STORE_PIXEL(pixel, x + i, y, src);
+            PUT_PIXEL(ctx, x + i, YFLIP(xrb, y), pixel);
+         }
+      }
+   }
+   else {
+      char *row = swrast_drawable(ctx->DrawBuffer)->row;
+      INIT_PIXEL_PTR(pixel, x, y);
+      for (i = 0; i < count; i++) {
+         STORE_PIXEL(pixel, x + i, y, src);
+         INC_PIXEL_PTR(pixel);
+      }
+      PUT_ROW( ctx, x, YFLIP(xrb, y), count, row );
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_values)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                  GLuint count, const GLint x[], const GLint y[],
+                  const void *values, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE (*src)[RB_COMPONENTS] = (const RB_TYPE (*)[RB_COMPONENTS]) values;
+   GLuint i;
+   ASSERT(mask);
+   for (i = 0; i < count; i++) {
+      if (mask[i]) {
+         RB_TYPE row[4];
+         INIT_PIXEL_PTR(pixel, x, y);
+         STORE_PIXEL(pixel, x[i], y[i], src[i]);
+         PUT_PIXEL(ctx, x[i], YFLIP(xrb, y[i]), pixel);
+      }
+   }
+   (void) rb;
+}
+
+
+static void
+NAME(put_mono_values)( GLcontext *ctx, struct gl_renderbuffer *rb,
+                       GLuint count, const GLint x[], const GLint y[],
+                       const void *value, const GLubyte mask[] )
+{
+#ifdef SPAN_VARS
+   SPAN_VARS
+#endif
+   const RB_TYPE *src = (const RB_TYPE *) value;
+   GLuint i;
+   ASSERT(mask);
+   for (i = 0; i < count; i++) {
+      if (mask[i]) {
+         RB_TYPE row[4];
+         INIT_PIXEL_PTR(pixel, x, y);
+         STORE_PIXEL(pixel, x[i], y[i], src);
+         PUT_PIXEL(ctx, x[i], YFLIP(xrb, y[i]), pixel);
+      }
+   }
+   (void) rb;
+}
+
+
+#undef NAME
+#undef RB_TYPE
+#undef RB_COMPONENTS
+#undef SPAN_VARS
+#undef INIT_PIXEL_PTR
+#undef INC_PIXEL_PTR
+#undef STORE_PIXEL
+#undef STORE_PIXEL_RGB
+#undef FETCH_PIXEL
diff --git a/src/mesa/drivers/dri/tdfx/BUGS b/src/mesa/drivers/dri/tdfx/BUGS
new file mode 100644
index 0000000000..b15f6a91ed
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/BUGS
@@ -0,0 +1,64 @@
+REMOVE THIS FILE BEFORE MERGING WITH TRUNK
+------------------------------------------
+
+OUTSTANDING BUGS
+
+demos/reflect - reading back Z on Voodoo3, image offset to right
+	Fixed in latest Glide.
+
+Q3 - some polygons drawn as vertical strips, similar to bug that was
+	seen in demos/fire.  Voodoo3 only.  May be related to glDepthMask
+	or glColorMask.
+
+book/fog - not fogging
+	Fog in orthograph mode still not implemented.  Checking with
+	3dfx engineers for ideas.
+
+Q3 demo crashes after changing display settings
+	but the full Q3 game version seems OK.
+
+
+
+MORE OUTSTANDING BUGS
+
+private context was NULL! causing immediate failure of any glx prog. cant
+reproduce after restarting the X server. putting it down as halluc.
+
+texture object image was NULL, causing segmentation failure. happens with
+prboom. ive put a check in tdfx_texstate.c but this isn't a fix.
+
+prboom, wall textures near first chainsaw aren't bound properly. sideways
+movements causes the wall textures to move with you. prboom busted?
+
+16bpp mode, quake3, windowed, q3dm1, floor under rocketlauncher bands. it
+looks like multitexturing gone wrong. i'll disable a tmu and test.
+
+sof, polygons appear at wrong x,y,z positions, intermittent, have not yet
+found reliable way of reproducing. culling? sometimes polys disappear.
+
+descent3 is all black in 16bpp mode - FIXED (palette problems)
+
+smeared pixels in quake3 - FIXED (texture memory overlapped FB)
+
+
+
+PERFORMANCE COMPARISON  (Brian / Alan)
+
+  V3/16  is Voodoo3 in 16bpp on a P3/500
+  V5/16  is Voodoo5 in 16bpp on a P3/600
+  V5/32  is Voodoo5 in 32bpp on a P3/600
+  V5A/16 is Voodoo5 in 16bpp on an Alpha AXP/600
+  V5A/32 is Voodoo5 in 32bpp on an Alpha AXP/600
+
+                  tdfx-2-1-branch               tdfx-3-0-0-branch
+demo             V3/16 V5/16 V5/32       V3/16 V5/16 V5/32 V5A/16 V5A/32
+------------------------------------------------------------------------
+gloss             257   183   174         320    308  177   313    167
+fire               42                      39                52     41
+fire (no help)     98    80    50         106    113   73   124     80
+tunnel             61                      50                70     58
+tunnel (no help)  167   142    57         138    152  113   171    122
+gears             663   554   540         881   1232  776  1484    830
+teapot             20                      21                37     36
+teapot (no help)   22    14    14          24     30   30    43     42
+
diff --git a/src/mesa/drivers/dri/tdfx/Makefile b/src/mesa/drivers/dri/tdfx/Makefile
new file mode 100644
index 0000000000..96bd8f8202
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/Makefile
@@ -0,0 +1,32 @@
+# src/mesa/drivers/dri/tdfx/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = tdfx_dri.so
+
+DRIVER_SOURCES = \
+	tdfx_context.c \
+	tdfx_dd.c \
+	tdfx_lock.c \
+	tdfx_pixels.c \
+	tdfx_render.c \
+	tdfx_screen.c \
+	tdfx_span.c \
+	tdfx_state.c \
+	tdfx_tex.c \
+	tdfx_texman.c \
+	tdfx_texstate.c \
+	tdfx_tris.c \
+	tdfx_vb.c
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+
+ASM_SOURCES = 
+
+include ../Makefile.template
+
+
diff --git a/src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fastpath.S b/src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fastpath.S
new file mode 100644
index 0000000000..500c97c536
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fastpath.S
@@ -0,0 +1,83 @@
+
+#include "../../X86/assyntax.h"
+
+#define SETUP_RGBA  0x1
+#define SETUP_TMU0  0x2
+#define SETUP_TMU1  0x4
+
+
+/* Pack either rgba or texture into the remaining half of a 32 byte vertex.
+ */
+#define CLIP_R  24
+#define CLIP_G  16
+#define CLIP_B  20
+#define CLIP_A  28                      /*  defined inf fxdrv.h              */
+
+#define CLIP_S0 16
+#define CLIP_T0 20
+#define CLIP_S1 24
+#define CLIP_T1 28
+
+#define SIZE 4
+#define TYPE (0)
+#define TAG(x) x
+#include "fx_3dnow_fasttmp.h"
+
+#define SIZE 8
+#define TYPE (SETUP_RGBA)
+#define TAG(x) x##_RGBA
+#include "fx_3dnow_fasttmp.h"
+
+#define SIZE 6
+#define TYPE (SETUP_TMU0)
+#define TAG(x) x##_TMU0
+#include "fx_3dnow_fasttmp.h"
+
+#define SIZE 8
+#define TYPE (SETUP_TMU0|SETUP_TMU1)
+#define TAG(x) x##_TMU0_TMU1
+#include "fx_3dnow_fasttmp.h"
+
+#undef CLIP_S1
+#undef CLIP_T1
+#define CLIP_S1 16
+#define CLIP_T1 20
+
+#define SIZE 6
+#define TYPE (SETUP_TMU1)
+#define TAG(x) x##_TMU1
+#include "fx_3dnow_fasttmp.h"
+
+/* These three need to use a full 64 byte clip-space vertex.
+ */
+#undef CLIP_S0
+#undef CLIP_T0
+#undef CLIP_S1
+#undef CLIP_T1
+
+#define CLIP_S0 32
+#define CLIP_T0 36
+#define CLIP_S1 40
+#define CLIP_T1 44
+
+#define SIZE 10
+#define TYPE (SETUP_RGBA|SETUP_TMU0)
+#define TAG(x) x##_RGBA_TMU0
+#include "fx_3dnow_fasttmp.h"
+
+#define SIZE 12
+#define TYPE (SETUP_RGBA|SETUP_TMU0|SETUP_TMU1)
+#define TAG(x) x##_RGBA_TMU0_TMU1
+#include "fx_3dnow_fasttmp.h"
+
+#undef CLIP_S1
+#undef CLIP_T1
+#define CLIP_S1 32
+#define CLIP_T1 36
+
+#define SIZE 10
+#define TYPE (SETUP_RGBA|SETUP_TMU1)
+#define TAG(x) x##_RGBA_TMU1
+#include "fx_3dnow_fasttmp.h"
+
+
diff --git a/src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fasttmp.h b/src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fasttmp.h
new file mode 100644
index 0000000000..78c5fef746
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/X86/fx_3dnow_fasttmp.h
@@ -0,0 +1,313 @@
+
+#if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER)
+#define TAGLLBL(a) TAG(.L##a)
+#else
+#define TAGLLBL(a) TAG(a)
+#endif
+
+#if !GLIDE3
+
+#define GR_VERTEX_X_OFFSET              0
+#define GR_VERTEX_Y_OFFSET              4
+#define GR_VERTEX_Z_OFFSET              8
+#define GR_VERTEX_R_OFFSET              12
+#define GR_VERTEX_G_OFFSET              16
+#define GR_VERTEX_B_OFFSET              20
+#define GR_VERTEX_OOZ_OFFSET            24
+#define GR_VERTEX_A_OFFSET              28
+#define GR_VERTEX_OOW_OFFSET            32
+
+#else /* GLIDE3 */
+
+#define GR_VERTEX_X_OFFSET              0
+#define GR_VERTEX_Y_OFFSET              4
+#define GR_VERTEX_OOZ_OFFSET            8
+#define GR_VERTEX_OOW_OFFSET            12
+#define GR_VERTEX_R_OFFSET              16
+#define GR_VERTEX_G_OFFSET              20
+#define GR_VERTEX_B_OFFSET              24
+#define GR_VERTEX_A_OFFSET              28
+#define GR_VERTEX_Z_OFFSET              32
+
+#endif /* GLIDE3 */
+
+#define GR_VERTEX_SOW_TMU0_OFFSET       36
+#define GR_VERTEX_TOW_TMU0_OFFSET       40
+#define GR_VERTEX_OOW_TMU0_OFFSET       44
+#define GR_VERTEX_SOW_TMU1_OFFSET       48
+#define GR_VERTEX_TOW_TMU1_OFFSET       52
+#define GR_VERTEX_OOW_TMU1_OFFSET       56
+
+
+
+
+/*#define MAT_SX 0        /*  accessed by REGIND !! */
+#define MAT_SY 20
+#define MAT_SZ 40
+#define MAT_TX 48
+#define MAT_TY 52
+#define MAT_TZ 56
+
+
+
+
+/* Do viewport map, device scale and perspective projection.
+ *
+ * void project_verts( GLfloat *first,
+ *		       GLfloat *last,
+ *		       const GLfloat *m,
+ *		       GLuint stride )
+ *
+ *
+ * Rearrange fxVertices to look like grVertices.
+ */
+
+GLOBL GLNAME( TAG(fx_3dnow_project_vertices) )
+GLNAME( TAG(fx_3dnow_project_vertices) ):
+
+    PUSH_L    ( EBP )
+
+    MOV_L     ( REGOFF(8, ESP), ECX )    /* first_vert */
+    MOV_L     ( REGOFF(12, ESP), EDX )     /* last_vert */
+
+    CMP_L     ( ECX, EDX )
+    JE        ( TAGLLBL(FXPV_end) )
+
+    FEMMS
+
+    PREFETCH  ( REGIND(ECX) )         /* fetch the first vertex */
+
+    MOV_L     ( REGOFF(16, ESP), EBP )     /* matrix */
+    MOV_L     ( REGOFF(20, ESP), EAX )     /* stride */
+
+    MOVD      ( REGOFF(MAT_TX, EBP), MM6 )      /*             | tx           */
+    PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 )      /*  ty         | tx           */
+
+#if !defined(FX_V2)
+    MOV_L     ( CONST(0x49400000), REGOFF(-8, ESP) )    /*  snapper           */
+    MOV_L     ( CONST(0x49400000), REGOFF(-4, ESP) )    /*  snapper           */
+#endif
+
+    MOVQ      ( REGOFF(-8, ESP), MM4 )          /*  snapper    | snapper      */
+    PFADD     ( MM4, MM6 )                      /*  ty+snapper | tx+snapper   */
+
+    MOVD      ( REGIND(EBP), MM5 )
+    PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 )      /*  vsy        | vsx          */
+
+    MOVD      ( REGOFF(MAT_SZ, EBP), MM1 )      /*             | vsz          */
+
+
+ALIGNTEXT32
+TAGLLBL(FXPV_loop_start):
+
+    PREFETCH  ( REGOFF(64, ECX) )               /* fetch the next-ish vertex */
+
+
+    MOVD      ( REGOFF(12, ECX), MM0 )          /*              | f[3]        */
+    PFRCP     ( MM0, MM0 )                      /*  oow = 1/f[3]              */
+
+    MOVD      ( REGOFF(12, ECX), MM7 )          /*              | f[3]        */
+    PFRCPIT1  ( MM0, MM7 )
+    PFRCPIT2  ( MM0, MM7 )                      /*  oow         | oow         */
+
+    PUNPCKLDQ ( MM7, MM7 )
+
+
+#if (TYPE & SETUP_RGBA)
+    MOVD      ( REGOFF(CLIP_R, ECX ), MM0 )     /*  f[RCOORD] = f[CLIP_R];    */
+    MOVD      ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
+#endif
+
+#if (TYPE & SETUP_TMU1)
+    MOVQ      ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow  */
+    PFMUL     ( MM7, MM0 )                  /* f[T1COORD] = f[CLIP_T1] * oow  */
+    MOVQ      ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
+#endif
+
+
+#if (TYPE & SETUP_TMU0)
+    MOVQ      ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow  */
+    PFMUL     ( MM7, MM0 )                  /* f[T0COORD] = f[CLIP_T0] * oow  */
+    MOVQ      ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
+#endif
+
+
+
+
+
+/*  DO_SETUP_XYZ */
+
+    MOVQ      ( REGIND(ECX), MM2 )              /*  f[1]        | f[0]        */
+    PFMUL     ( MM7, MM2 )                      /*  f[1] * oow  | f[0] * oow  */
+
+    MOVD      ( REGOFF(8, ECX), MM3 )           /*              | f[2]        */
+    PFMUL     ( MM7, MM3 )                      /*              | f[2] * oow  */
+
+    MOVD      ( REGOFF(MAT_TZ, EBP), MM0 )      /*              | vtz         */
+    PFMUL     ( MM1, MM3 )                      /*              | f[2] *= vsz */
+
+    PFADD     ( MM0, MM3 )                      /*              | f[2] += vtz */
+    PFMUL     ( MM5, MM2 )                      /*  f[1] *= vsy | f[0] *= vsx */
+
+    PFADD     ( MM6, MM2 )                      /*  f[1] += vty | f[0] += vtx */
+
+#if !defined(FX_V2)
+    PFSUB     ( MM4, MM2 )                      /*  f[0,1] -= snapper         */
+#endif
+
+    MOVQ      ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
+    MOVD      ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
+
+
+/* end of DO_SETUP_XYZ   */
+
+    MOVD      ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
+    ADD_L     ( EAX, ECX )        /* f += stride */
+
+    CMP_L     ( ECX, EDX )	/* stall??? */
+    JA        ( TAGLLBL(FXPV_loop_start) )
+
+TAGLLBL(FXPV_end):
+    FEMMS
+    POP_L     ( EBP )
+    RET
+
+
+
+
+
+
+
+/* void project_verts( GLfloat *first,
+ *		       GLfloat *last,
+ *		       const GLfloat *m,
+ *		       GLuint stride,
+ *                     const GLubyte *mask )
+ *
+ */
+
+GLOBL GLNAME( TAG(fx_3dnow_project_clipped_vertices) )
+GLNAME( TAG(fx_3dnow_project_clipped_vertices) ):
+
+    PUSH_L    ( EBP )
+
+    MOV_L     ( REGOFF(8, ESP), ECX ) /* first FXDRIVER(VB)->verts*/
+    MOV_L     ( REGOFF(12, ESP), EDX ) /* last FXDRIVER(VB)->last_vert  */
+
+    FEMMS
+
+    PUSH_L    ( EDI )
+    PUSH_L    ( ESI )
+
+    PREFETCH  ( REGIND(ECX) )         /* fetch the first vertex */
+
+    MOV_L     ( REGOFF(24, ESP), EBP ) /* mat ctx->Viewport.WindowMap.M */
+    MOV_L     ( REGOFF(28, ESP), EAX )     /* stride */
+    MOV_L     ( REGOFF(32, ESP), ESI ) /* VB->ClipMask       */
+
+    MOVD      ( REGOFF(MAT_TX, EBP), MM6 )      /*             | tx           */
+    PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 )      /*  ty         | tx           */
+
+#if !defined(FX_V2)
+    MOV_L     ( CONST(0x49400000), REGOFF(-8, ESP) )    /*  snapper           */
+    MOV_L     ( CONST(0x49400000), REGOFF(-4, ESP) )    /*  snapper           */
+#endif
+
+    MOVQ      ( REGOFF(-8, ESP), MM4 )          /*  snapper    | snapper      */
+    PFADD     ( MM4, MM6 )                      /*  ty+snapper | tx+snapper   */
+
+    MOVD      ( REGIND(EBP), MM5 )
+    PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 )      /*  vsy        | vsx          */
+
+    MOVD      ( REGOFF(MAT_SZ, EBP), MM1 )      /*             | vsz          */
+
+
+
+ALIGNTEXT32
+TAGLLBL(FXPCV_loop_start):
+
+    PREFETCH  ( REGOFF(64, ECX) )         /* fetch the next-ish vertex */
+
+    CMP_B     ( CONST(0), REGIND(ESI) )
+    JNE       ( TAGLLBL(FXPCV_skip) )
+
+    MOVD      ( REGOFF(12, ECX), MM0)           /*              | f[3]        */
+    PFRCP     ( MM0, MM0 )                      /*  oow = 1/f[3]              */
+
+    MOVD      ( REGOFF(12, ECX), MM7)           /*              | f[3]        */
+    PFRCPIT1  ( MM0, MM7 )
+    PFRCPIT2  ( MM0, MM7 )                      /*  oow         | oow         */
+
+    PUNPCKLDQ ( MM7, MM7 )
+
+
+#if (TYPE & SETUP_RGBA)
+    MOVD      ( REGOFF(CLIP_R, ECX ), MM0 )     /*  f[RCOORD] = f[CLIP_R];    */
+    MOVD      ( MM0, REGOFF(GR_VERTEX_R_OFFSET, ECX) )
+#endif
+
+#if (TYPE & SETUP_TMU1)
+    MOVQ      ( REGOFF(CLIP_S1, ECX), MM0 ) /* f[S1COORD] = f[CLIP_S1] * oow  */
+    PFMUL     ( MM7, MM0 )                  /* f[T1COORD] = f[CLIP_T1] * oow  */
+    MOVQ      ( MM0, REGOFF(GR_VERTEX_SOW_TMU1_OFFSET, ECX) )
+#endif
+
+
+#if (TYPE & SETUP_TMU0)
+    MOVQ      ( REGOFF(CLIP_S0, ECX), MM0 ) /* f[S0COORD] = f[CLIP_S0] * oow  */
+    PFMUL     ( MM7, MM0 )                  /* f[T0COORD] = f[CLIP_T0] * oow  */
+    MOVQ      ( MM0, REGOFF(GR_VERTEX_SOW_TMU0_OFFSET, ECX) )
+#endif
+
+
+
+
+/*  DO_SETUP_XYZ */
+
+    MOVQ      ( REGIND(ECX), MM2 )              /*  f[1]        | f[0]        */
+    PFMUL     ( MM7, MM2 )                      /*  f[1] * oow  | f[0] * oow  */
+
+    MOVD      ( REGOFF(8, ECX), MM3 )           /*              | f[2]        */
+    PFMUL     ( MM7, MM3 )                      /*              | f[2] * oow  */
+
+    MOVD      ( REGOFF(MAT_TZ, EBP), MM0 )      /*              | vtz         */
+    PFMUL     ( MM1, MM3 )                      /*              | f[2] *= vsz */
+
+    PFADD     ( MM0, MM3 )                      /*              | f[2] += vtz */
+    PFMUL     ( MM5, MM2 )                      /*  f[1] *= vsy | f[0] *= vsx */
+
+    PFADD     ( MM6, MM2 )                      /*  f[1] += vty | f[0] += vtx */
+
+#if !defined(FX_V2)
+    PFSUB     ( MM4, MM2 )                      /*  f[0,1] -= snapper         */
+#endif
+
+    MOVQ      ( MM2, REGOFF(GR_VERTEX_X_OFFSET, ECX) )
+    MOVD      ( MM3, REGOFF(GR_VERTEX_OOZ_OFFSET, ECX) )
+
+
+/* end of DO_SETUP_XYZ   */
+
+    MOVD      ( MM7, REGOFF(GR_VERTEX_OOW_OFFSET, ECX) ) /* f[OOWCOORD] = oow */
+
+TAGLLBL(FXPCV_skip):
+    ADD_L     ( EAX, ECX )    /* f += stride     */
+
+    INC_L     ( ESI )                           /*  next ClipMask             */
+    CMP_L     ( ECX, EDX )
+    JA        ( TAGLLBL(FXPCV_loop_start) )
+
+    POP_L     ( ESI )
+    POP_L     ( EDI )
+
+TAGLLBL(FXPCV_end):
+    FEMMS
+    POP_L     ( EBP )
+    RET
+
+
+
+#undef TYPE
+#undef TAG
+#undef SIZE
+
diff --git a/src/mesa/drivers/dri/tdfx/dri_glide.h b/src/mesa/drivers/dri/tdfx/dri_glide.h
new file mode 100644
index 0000000000..3ad2bf68c6
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/dri_glide.h
@@ -0,0 +1,59 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __DRI_GLIDE_H__
+#define __DRI_GLIDE_H__
+
+#include <glide.h>
+#include "dri_mesaint.h"
+
+/*
+ * This is the private interface between Glide and the DRI.
+ */
+extern void grDRIOpen( char *pFB, char *pRegs, int deviceID,
+		       int width, int height,
+		       int mem, int cpp, int stride,
+		       int fifoOffset, int fifoSize,
+		       int fbOffset, int backOffset, int depthOffset,
+		       int textureOffset, int textureSize,
+		       volatile int *fifoPtr, volatile int *fifoRead );
+extern void grDRIPosition( int x, int y, int w, int h,
+			   int numClip, drm_clip_rect_t *pClip );
+extern void grDRILostContext( void );
+extern void grDRIImportFifo( int fifoPtr, int fifoRead );
+extern void grDRIInvalidateAll( void );
+extern void grDRIResetSAREA( void );
+extern void grDRIBufferSwap( FxU32 swapInterval );
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/server/tdfx_dri.h b/src/mesa/drivers/dri/tdfx/server/tdfx_dri.h
new file mode 100644
index 0000000000..dc29984a27
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/server/tdfx_dri.h
@@ -0,0 +1,27 @@
+
+#ifndef _TDFX_DRI_
+#define _TDFX_DRI_
+
+#include "xf86drm.h"
+#include "drm.h"
+
+typedef struct {
+  drm_handle_t regs;
+  drmSize regsSize;
+  int deviceID;
+  int width;
+  int height;
+  int mem;
+  int cpp;
+  int stride;
+  int fifoOffset;
+  int fifoSize;
+  int fbOffset;
+  int backOffset;
+  int depthOffset;
+  int textureOffset;
+  int textureSize;
+  unsigned int sarea_priv_offset;
+} TDFXDRIRec, *TDFXDRIPtr;
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.c b/src/mesa/drivers/dri/tdfx/tdfx_context.c
new file mode 100644
index 0000000000..c30fcf3a6f
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_context.c
@@ -0,0 +1,1015 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * \file tdfx_context.c
+ * Context management functions for 3Dfx hardware.
+ *
+ * \author Gareth Hughes <gareth@valinux.com> (original rewrite 29 Sep - 1 Oct 2000)
+ * \author Brian Paul <brianp@valinux.com>
+ * \author Daniel Borca <dborca@users.sourceforge.net> (new fixes 19 Jul 2004)
+ */
+
+#include <dlfcn.h>
+#include "tdfx_context.h"
+#include "tdfx_dd.h"
+#include "tdfx_state.h"
+#include "tdfx_vb.h"
+#include "tdfx_tex.h"
+#include "tdfx_tris.h"
+#include "tdfx_render.h"
+#include "tdfx_span.h"
+#include "tdfx_texman.h"
+#include "main/extensions.h"
+#include "main/hash.h"
+#include "main/texobj.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "utils.h"
+
+/* #define need_GL_ARB_point_parameters */
+#define need_GL_ARB_occlusion_query
+/* #define need_GL_ARB_vertex_program */
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_paletted_texture
+/* #define need_GL_EXT_secondary_color */
+/* #define need_GL_NV_vertex_program */
+#include "main/remap_helper.h"
+
+
+/**
+ * Common extension strings exported by all cards
+ */
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions },
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+
+    { "GL_EXT_blend_func_separate",        GL_EXT_blend_func_separate_functions },
+    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_paletted_texture",           GL_EXT_paletted_texture_functions },
+    { "GL_EXT_shared_texture_palette",     NULL },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_env_add",            NULL },
+    { "GL_EXT_texture_lod_bias",           NULL },
+
+#ifdef need_GL_ARB_point_parameters
+    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
+    { "GL_ARB_point_sprite",               NULL },
+#endif
+#ifdef need_GL_EXT_secondary_color
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+#endif
+#ifdef need_GL_ARB_vertex_program
+    { "GL_ARB_vertex_program",             GL_ARB_vertex_program_functions }
+#endif
+#ifdef need_GL_NV_vertex_program
+    { "GL_NV_vertex_program",              GL_NV_vertex_program_functions }
+    { "GL_NV_vertex_program1_1",           NULL },
+#endif
+    { NULL,                                NULL }
+};
+
+/**
+ * Extension strings exported only by Naplam (e.g., Voodoo4 & Voodoo5) cards.
+ */
+static const struct dri_extension napalm_extensions[] =
+{
+    { "GL_ARB_texture_env_combine",        NULL },
+    { "GL_EXT_blend_equation_separate",    GL_EXT_blend_equation_separate_functions },
+    { "GL_EXT_blend_subtract",             GL_EXT_blend_minmax_functions },
+    { "GL_EXT_texture_compression_s3tc",   NULL },
+    { "GL_EXT_texture_env_combine",        NULL },
+
+    { "GL_3DFX_texture_compression_FXT1",  NULL },
+    { "GL_NV_blend_square",                NULL },
+    { "GL_S3_s3tc",                        NULL },
+    { NULL,                                NULL }
+};
+
+/*
+ * Enable/Disable the extensions for this context.
+ */
+static void tdfxDDInitExtensions( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   driInitExtensions( ctx, card_extensions, GL_FALSE );
+
+   if ( fxMesa->haveTwoTMUs ) {
+      _mesa_enable_extension( ctx, "GL_ARB_multitexture" );
+   }
+
+   if ( TDFX_IS_NAPALM( fxMesa ) ) {
+      driInitExtensions( ctx, napalm_extensions, GL_FALSE );
+   } else {
+      _mesa_enable_extension( ctx, "GL_SGIS_generate_mipmap" );
+   }
+}
+
+
+
+static const struct tnl_pipeline_stage *tdfx_pipeline[] = {
+   &_tnl_vertex_transform_stage, 
+   &_tnl_normal_transform_stage, 
+   &_tnl_lighting_stage,
+   &_tnl_fog_coordinate_stage, 
+   &_tnl_texgen_stage, 
+   &_tnl_texture_transform_stage, 
+   &_tnl_point_attenuation_stage,
+   &_tnl_render_stage,		
+   0,
+};
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "dri",   DEBUG_VERBOSE_DRI },
+    { "sync",  DEBUG_ALWAYS_SYNC },
+    { "api",   DEBUG_VERBOSE_API },
+    { "fall",  DEBUG_VERBOSE_FALL },
+    { NULL,    0 }
+};
+
+GLboolean tdfxCreateContext( gl_api api,
+			     const __GLcontextModes *mesaVis,
+			     __DRIcontext *driContextPriv,
+                             void *sharedContextPrivate )
+{
+   tdfxContextPtr fxMesa;
+   GLcontext *ctx, *shareCtx;
+   __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+   tdfxScreenPrivate *fxScreen = (tdfxScreenPrivate *) sPriv->private;
+   TDFXSAREAPriv *saPriv = (TDFXSAREAPriv *) ((char *) sPriv->pSAREA +
+					      sizeof(drm_sarea_t));
+   struct dd_function_table functions;
+
+   /* Allocate tdfx context */
+   fxMesa = (tdfxContextPtr) CALLOC( sizeof(tdfxContextRec) );
+   if (!fxMesa)
+      return GL_FALSE;
+
+   /* Init default driver functions then plug in our tdfx-specific functions
+    * (the texture functions are especially important)
+    */
+   _mesa_init_driver_functions(&functions);
+   tdfxDDInitDriverFuncs(mesaVis, &functions);
+   tdfxInitTextureFuncs(&functions);
+   tdfxInitRenderFuncs(&functions);
+
+   /* Allocate the Mesa context */
+   if (sharedContextPrivate)
+      shareCtx = ((tdfxContextPtr) sharedContextPrivate)->glCtx;
+   else 
+      shareCtx = NULL;
+
+   fxMesa->glCtx = _mesa_create_context(mesaVis, shareCtx,
+                                        &functions, (void *) fxMesa);
+   if (!fxMesa->glCtx) {
+      FREE(fxMesa);
+      return GL_FALSE;
+   }
+   driContextPriv->driverPrivate = fxMesa;
+
+   /* Mirror some important DRI state
+    */
+   fxMesa->hHWContext = driContextPriv->hHWContext;
+   fxMesa->driHwLock = &sPriv->pSAREA->lock;
+   fxMesa->driFd = sPriv->fd;
+
+   fxMesa->driScreen = sPriv;
+   fxMesa->driContext = driContextPriv;
+   fxMesa->fxScreen = fxScreen;
+   fxMesa->sarea = saPriv;
+
+   /*JJJ - really?*/
+   fxMesa->haveHwAlpha = ( mesaVis->alphaBits &&
+                           ((mesaVis->greenBits == 8) ||
+                            (mesaVis->depthBits == 0)) );
+   fxMesa->haveHwStencil = ( TDFX_IS_NAPALM( fxMesa ) &&
+			     mesaVis->stencilBits &&
+			     mesaVis->depthBits == 24 );
+
+   fxMesa->screen_width = fxScreen->width;
+   fxMesa->screen_height = fxScreen->height;
+
+   fxMesa->new_gl_state = ~0;
+   fxMesa->new_state = ~0;
+   fxMesa->dirty = ~0;
+
+   /* Parse configuration files */
+   driParseConfigFiles (&fxMesa->optionCache, &fxScreen->optionCache,
+                        fxMesa->driScreen->myNum, "tdfx");
+
+   /* NOTE: This must be here before any Glide calls! */
+   if (!tdfxInitGlide( fxMesa )) {
+      FREE(fxMesa);
+      return GL_FALSE;
+   }
+
+   fxMesa->Glide.grDRIOpen( (char*) sPriv->pFB, fxScreen->regs.map, fxScreen->deviceID,
+	      fxScreen->width, fxScreen->height, fxScreen->mem, fxScreen->cpp,
+	      fxScreen->stride, fxScreen->fifoOffset, fxScreen->fifoSize,
+	      fxScreen->fbOffset, fxScreen->backOffset, fxScreen->depthOffset,
+	      fxScreen->textureOffset, fxScreen->textureSize, &saPriv->fifoPtr,
+	      &saPriv->fifoRead );
+
+   if ( getenv( "FX_GLIDE_SWAPINTERVAL" ) ) {
+      fxMesa->Glide.SwapInterval = atoi( getenv( "FX_GLIDE_SWAPINTERVAL" ) );
+   } else {
+      fxMesa->Glide.SwapInterval = 0;
+   }
+   if ( getenv( "FX_MAX_PENDING_SWAPS" ) ) {
+      fxMesa->Glide.MaxPendingSwaps = atoi( getenv( "FX_MAX_PENDING_SWAPS" ) );
+   } else {
+      fxMesa->Glide.MaxPendingSwaps = 2;
+   }
+
+   fxMesa->Glide.Initialized = GL_FALSE;
+   fxMesa->Glide.Board = 0;
+
+
+   if (getenv("FX_EMULATE_SINGLE_TMU")) {
+      fxMesa->haveTwoTMUs = GL_FALSE;
+   }
+   else {
+      if ( TDFX_IS_BANSHEE( fxMesa ) ) {
+         fxMesa->haveTwoTMUs = GL_FALSE;
+      } else {
+         fxMesa->haveTwoTMUs = GL_TRUE;
+      }
+   }
+
+   fxMesa->stats.swapBuffer = 0;
+   fxMesa->stats.reqTexUpload = 0;
+   fxMesa->stats.texUpload = 0;
+   fxMesa->stats.memTexUpload = 0;
+
+   fxMesa->tmuSrc = TDFX_TMU_NONE;
+
+   ctx = fxMesa->glCtx;
+   if ( TDFX_IS_NAPALM( fxMesa ) ) {
+      ctx->Const.MaxTextureLevels = 12;
+   } else {
+      ctx->Const.MaxTextureLevels = 9;
+   }
+   ctx->Const.MaxTextureUnits = TDFX_IS_BANSHEE( fxMesa ) ? 1 : 2;
+   ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+   ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+
+   /* No wide points.
+    */
+   ctx->Const.MinPointSize = 1.0;
+   ctx->Const.MinPointSizeAA = 1.0;
+   ctx->Const.MaxPointSize = 1.0;
+   ctx->Const.MaxPointSizeAA = 1.0;
+
+   /* Disable wide lines as we can't antialias them correctly in
+    * hardware.
+    */
+   ctx->Const.MinLineWidth = 1.0;
+   ctx->Const.MinLineWidthAA = 1.0;
+   ctx->Const.MaxLineWidth = 1.0;
+   ctx->Const.MaxLineWidthAA = 1.0;
+   ctx->Const.LineWidthGranularity = 1.0;
+
+   ctx->Const.MaxDrawBuffers = 1;
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   _swrast_CreateContext( ctx );
+   _vbo_CreateContext( ctx );
+   _tnl_CreateContext( ctx );
+   _swsetup_CreateContext( ctx );
+
+   /* Install the customized pipeline:
+    */
+   _tnl_destroy_pipeline( ctx );
+   _tnl_install_pipeline( ctx, tdfx_pipeline );
+
+   /* Configure swrast and T&L to match hardware characteristics:
+    */
+   _swrast_allow_pixel_fog( ctx, GL_TRUE );
+   _swrast_allow_vertex_fog( ctx, GL_FALSE );
+   _tnl_allow_pixel_fog( ctx, GL_TRUE );
+   _tnl_allow_vertex_fog( ctx, GL_FALSE );
+
+   tdfxDDInitExtensions( ctx );
+   /* XXX these should really go right after _mesa_init_driver_functions() */
+   tdfxDDInitSpanFuncs( ctx ); 
+   tdfxDDInitStateFuncs( ctx );
+   tdfxDDInitTriFuncs( ctx );
+   tdfxInitVB( ctx );
+   tdfxInitState( fxMesa );
+
+#if DO_DEBUG
+   TDFX_DEBUG = driParseDebugString( getenv( "TDFX_DEBUG" ), debug_control );
+#endif
+
+   if (driQueryOptionb(&fxMesa->optionCache, "no_rast")) {
+      fprintf(stderr, "disabling 3D acceleration\n");
+      FALLBACK(fxMesa, TDFX_FALLBACK_DISABLE, 1);
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean tdfxInitVertexFormats( tdfxContextPtr fxMesa )
+{
+   FxI32 result;
+   int i;
+
+   LOCK_HARDWARE( fxMesa );
+
+   fxMesa->Glide.grGet( GR_GLIDE_VERTEXLAYOUT_SIZE, sizeof(FxI32), &result );
+   for ( i = 0 ; i < TDFX_NUM_LAYOUTS ; i++ ) {
+      fxMesa->layout[i] = MALLOC( result );
+      if ( !fxMesa->layout[i] ) {
+	 UNLOCK_HARDWARE( fxMesa );
+	 return GL_FALSE;
+      }
+   }
+
+   /* Tiny vertex format - 16 bytes.
+    */
+   fxMesa->Glide.grReset( GR_VERTEX_PARAMETER );
+   fxMesa->Glide.grCoordinateSpace( GR_WINDOW_COORDS );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_XY,	TDFX_XY_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Z, TDFX_Z_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_PARGB, TDFX_ARGB_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grGlideGetVertexLayout( fxMesa->layout[TDFX_LAYOUT_TINY] );
+
+   /* Non textured vertex format - 24 bytes (Need w for table fog)
+    */
+   fxMesa->Glide.grReset( GR_VERTEX_PARAMETER );
+   fxMesa->Glide.grCoordinateSpace( GR_WINDOW_COORDS );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_XY,	TDFX_XY_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Z, TDFX_Z_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q, TDFX_Q_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_PARGB, TDFX_ARGB_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grGlideGetVertexLayout( fxMesa->layout[TDFX_LAYOUT_NOTEX] );
+
+   /* Single textured vertex format - 32 bytes.
+    */
+   fxMesa->Glide.grReset( GR_VERTEX_PARAMETER );
+   fxMesa->Glide.grCoordinateSpace( GR_WINDOW_COORDS );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_XY,	TDFX_XY_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Z, TDFX_Z_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q, TDFX_Q_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_PARGB, TDFX_ARGB_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_ST0, TDFX_ST0_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grGlideGetVertexLayout( fxMesa->layout[TDFX_LAYOUT_SINGLE] );
+
+   /* Multitextured vertex format - 40 bytes.
+    */
+   fxMesa->Glide.grReset( GR_VERTEX_PARAMETER );
+   fxMesa->Glide.grCoordinateSpace( GR_WINDOW_COORDS );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_XY, TDFX_XY_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Z, TDFX_Z_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q, TDFX_Q_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_PARGB, TDFX_ARGB_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_ST0, TDFX_ST0_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_ST1, TDFX_ST1_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grGlideGetVertexLayout( fxMesa->layout[TDFX_LAYOUT_MULTI] );
+
+   /* Projected texture vertex format - 36 bytes.
+    */
+   fxMesa->Glide.grReset( GR_VERTEX_PARAMETER );
+   fxMesa->Glide.grCoordinateSpace( GR_WINDOW_COORDS );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_XY, TDFX_XY_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Z, TDFX_Z_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q, TDFX_Q_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_PARGB, TDFX_ARGB_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_ST0, TDFX_ST0_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q0, TDFX_Q0_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grGlideGetVertexLayout( fxMesa->layout[TDFX_LAYOUT_PROJ1] );
+
+   /* Projected multitexture vertex format - 48 bytes.
+    */
+   fxMesa->Glide.grReset( GR_VERTEX_PARAMETER );
+   fxMesa->Glide.grCoordinateSpace( GR_WINDOW_COORDS );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_XY, TDFX_XY_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Z, TDFX_Z_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q, TDFX_Q_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_PARGB, TDFX_ARGB_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_ST0, TDFX_ST0_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q0, TDFX_Q0_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_ST1, TDFX_ST1_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grVertexLayout( GR_PARAM_Q1, TDFX_Q1_OFFSET, GR_PARAM_ENABLE );
+   fxMesa->Glide.grGlideGetVertexLayout( fxMesa->layout[TDFX_LAYOUT_PROJ2] );
+
+   UNLOCK_HARDWARE( fxMesa );
+
+   return GL_TRUE;
+}
+
+
+/*
+ * Initialize the state in an tdfxContextPtr struct.
+ */
+static GLboolean
+tdfxInitContext( __DRIdrawable *driDrawPriv, tdfxContextPtr fxMesa )
+{
+   /* KW: Would be nice to make one of these a member of the other.
+    */
+   FxI32 result[2];
+   const char *gext;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_DRI ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *)fxMesa );
+   }
+
+#if DEBUG_LOCKING
+   fprintf(stderr, "Debug locking enabled\n");
+#endif
+
+   if ( fxMesa->Glide.Initialized )
+      return GL_TRUE;
+
+   fxMesa->width = driDrawPriv->w;
+   fxMesa->height = driDrawPriv->h;
+
+   /* We have to use a light lock here, because we can't do any glide
+    * operations yet. No use of FX_* functions in this function.
+    */
+   DRM_LIGHT_LOCK( fxMesa->driFd, fxMesa->driHwLock, fxMesa->hHWContext );
+
+   fxMesa->Glide.grGlideInit();
+   fxMesa->Glide.grSstSelect( fxMesa->Glide.Board );
+
+   fxMesa->Glide.Context = fxMesa->Glide.grSstWinOpen( (FxU32) -1,
+					 GR_RESOLUTION_NONE,
+					 GR_REFRESH_NONE,
+					 fxMesa->Glide.ColorFormat,
+					 fxMesa->Glide.Origin,
+					 2, 1 );
+
+   fxMesa->Glide.grDRIResetSAREA();
+
+   DRM_UNLOCK( fxMesa->driFd, fxMesa->driHwLock, fxMesa->hHWContext );
+
+   if ( !fxMesa->Glide.Context )
+      return GL_FALSE;
+
+
+   /* Perform the Glide-dependant part of the context initialization.
+    */
+   FX_grColorMaskv( fxMesa->glCtx, true4 );
+
+   tdfxTMInit( fxMesa );
+
+   LOCK_HARDWARE( fxMesa );
+
+   /* JJJ - COMMAND_TRANSPORT, PALETTE6666 */
+   gext = fxMesa->Glide.grGetString( GR_EXTENSION );
+   fxMesa->Glide.HaveCombineExt = strstr(gext, "COMBINE") && !getenv("MESA_FX_IGNORE_CMBEXT");
+   fxMesa->Glide.HaveCommandTransportExt = GL_FALSE;
+   fxMesa->Glide.HaveFogCoordExt = GL_TRUE;
+   fxMesa->Glide.HavePixelExt = strstr(gext, "PIXEXT") && !getenv("MESA_FX_IGNORE_PIXEXT");
+   fxMesa->Glide.HaveTextureBufferExt = GL_TRUE;
+   fxMesa->Glide.HaveTexFmtExt = strstr(gext, "TEXFMT") && !getenv("MESA_FX_IGNORE_TEXFMT");
+   fxMesa->Glide.HaveTexUMAExt = strstr(gext, "TEXUMA") && !getenv("MESA_FX_IGNORE_TEXUMA");
+   fxMesa->Glide.HaveMirrorExt = strstr(gext, "TEXMIRROR") && !getenv("MESA_FX_IGNORE_MIREXT");
+   fxMesa->Glide.HaveTexus2 = GL_FALSE;
+
+   if ( fxMesa->glCtx->Visual.depthBits > 0 ) {
+      fxMesa->Glide.grDepthBufferMode(GR_DEPTHBUFFER_ZBUFFER);
+   } else {
+      fxMesa->Glide.grDepthBufferMode(GR_DEPTHBUFFER_DISABLE);
+   }
+
+   fxMesa->Glide.grLfbWriteColorFormat( GR_COLORFORMAT_ABGR );
+
+   fxMesa->Glide.grGet( GR_TEXTURE_ALIGN, sizeof(FxI32), result );
+   fxMesa->Glide.TextureAlign = result[0];
+
+   fxMesa->Glide.State = NULL;
+   fxMesa->Glide.grGet( GR_GLIDE_STATE_SIZE, sizeof(FxI32), result );
+   fxMesa->Glide.State = MALLOC( result[0] );
+
+   fxMesa->Fog.Table = NULL;
+   fxMesa->Glide.grGet( GR_FOG_TABLE_ENTRIES, sizeof(FxI32), result );
+   fxMesa->Fog.Table = MALLOC( result[0] * sizeof(GrFog_t) );
+
+   UNLOCK_HARDWARE( fxMesa );
+
+   if ( !fxMesa->Glide.State || !fxMesa->Fog.Table ) {
+      if ( fxMesa->Glide.State )
+	 FREE( fxMesa->Glide.State );
+      if ( fxMesa->Fog.Table )
+	 FREE( fxMesa->Fog.Table );
+      return GL_FALSE;
+   }
+
+   if ( !tdfxInitVertexFormats( fxMesa ) ) {
+      return GL_FALSE;
+   }
+
+   LOCK_HARDWARE( fxMesa );
+
+   fxMesa->Glide.grGlideGetState( fxMesa->Glide.State );
+
+   if ( getenv( "FX_GLIDE_INFO" ) ) {
+      printf( "GR_RENDERER  = %s\n", (char *) fxMesa->Glide.grGetString( GR_RENDERER ) );
+      printf( "GR_VERSION   = %s\n", (char *) fxMesa->Glide.grGetString( GR_VERSION ) );
+      printf( "GR_VENDOR    = %s\n", (char *) fxMesa->Glide.grGetString( GR_VENDOR ) );
+      printf( "GR_HARDWARE  = %s\n", (char *) fxMesa->Glide.grGetString( GR_HARDWARE ) );
+      printf( "GR_EXTENSION = %s\n", (char *) gext );
+   }
+
+   UNLOCK_HARDWARE( fxMesa );
+
+   fxMesa->numClipRects = 0;
+   fxMesa->pClipRects = NULL;
+   fxMesa->scissoredClipRects = GL_FALSE;
+
+   fxMesa->Glide.Initialized = GL_TRUE;
+
+   return GL_TRUE;
+}
+
+
+void
+tdfxDestroyContext( __DRIcontext *driContextPriv )
+{
+   tdfxContextPtr fxMesa = (tdfxContextPtr) driContextPriv->driverPrivate;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_DRI ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *)fxMesa );
+   }
+
+   if ( fxMesa ) {
+      if (fxMesa->glCtx->Shared->RefCount == 1 && fxMesa->driDrawable) {
+         /* This share group is about to go away, free our private
+          * texture object data.
+          */
+         struct _mesa_HashTable *textures = fxMesa->glCtx->Shared->TexObjects;
+         GLuint id;
+         for (id = _mesa_HashFirstEntry(textures);
+              id;
+              id = _mesa_HashNextEntry(textures, id)) {
+            struct gl_texture_object *tObj
+               = _mesa_lookup_texture(fxMesa->glCtx, id);
+            tdfxTMFreeTexture(fxMesa, tObj);
+         }
+      }
+
+      tdfxTMClose(fxMesa);  /* free texture memory */
+
+      _swsetup_DestroyContext( fxMesa->glCtx );
+      _tnl_DestroyContext( fxMesa->glCtx );
+      _vbo_DestroyContext( fxMesa->glCtx );
+      _swrast_DestroyContext( fxMesa->glCtx );
+
+      tdfxFreeVB( fxMesa->glCtx );
+
+      /* Free Mesa context */
+      fxMesa->glCtx->DriverCtx = NULL;
+      _mesa_destroy_context(fxMesa->glCtx);
+
+      /* free the tdfx context */
+      FREE( fxMesa );
+   }
+}
+
+
+GLboolean
+tdfxUnbindContext( __DRIcontext *driContextPriv )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_DRI ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *)driContextPriv );
+   }
+
+   if ( driContextPriv && (tdfxContextPtr) driContextPriv == fxMesa ) {
+      LOCK_HARDWARE(fxMesa);
+      fxMesa->Glide.grGlideGetState(fxMesa->Glide.State);
+      UNLOCK_HARDWARE(fxMesa);
+   }
+   return GL_TRUE;
+}
+
+
+GLboolean
+tdfxMakeCurrent( __DRIcontext *driContextPriv,
+                 __DRIdrawable *driDrawPriv,
+                 __DRIdrawable *driReadPriv )
+{
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_DRI ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *)driContextPriv );
+   }
+
+   if ( driContextPriv ) {
+      tdfxContextPtr newFx = (tdfxContextPtr) driContextPriv->driverPrivate;
+      GLcontext *newCtx = newFx->glCtx;
+      GET_CURRENT_CONTEXT(curCtx);
+
+      if ((newFx->driDrawable != driDrawPriv)
+	  || (newFx->driReadable != driReadPriv)) {
+	 newFx->driDrawable = driDrawPriv;
+	 newFx->driReadable = driReadPriv;
+	 newFx->dirty = ~0;
+      }
+      else {
+         if (curCtx == newCtx) {
+            /* same drawable, same context -> no-op */
+            /* Need to call _mesa_make_current2() in order to make sure API
+             * dispatch is set correctly.
+             */
+            _mesa_make_current( newCtx,
+                                (GLframebuffer *) driDrawPriv->driverPrivate,
+                                (GLframebuffer *) driReadPriv->driverPrivate );
+            return GL_TRUE;
+	 }
+	 /* [dBorca] tunnel2 requires this */
+	 newFx->dirty = ~0;
+      }
+
+      driUpdateFramebufferSize(newCtx, driDrawPriv);
+      if (driDrawPriv != driReadPriv) {
+	 driUpdateFramebufferSize(newCtx, driReadPriv);
+      }
+
+      if ( !newFx->Glide.Initialized ) {
+	 if ( !tdfxInitContext( driDrawPriv, newFx ) )
+	    return GL_FALSE;
+
+	 LOCK_HARDWARE( newFx );
+
+	 /* FIXME: Force loading of window information */
+	 newFx->width = 0;
+         tdfxUpdateClipping(newCtx);
+         tdfxUploadClipping(newFx);
+
+	 UNLOCK_HARDWARE( newFx );
+      } else {
+	 LOCK_HARDWARE( newFx );
+
+	 newFx->Glide.grSstSelect( newFx->Glide.Board );
+	 newFx->Glide.grGlideSetState( newFx->Glide.State );
+
+         tdfxUpdateClipping(newCtx);
+         tdfxUploadClipping(newFx);
+
+	 UNLOCK_HARDWARE( newFx );
+      }
+
+      _mesa_make_current( newCtx,
+                          (GLframebuffer *) driDrawPriv->driverPrivate,
+                          (GLframebuffer *) driReadPriv->driverPrivate );
+   } else {
+      _mesa_make_current( NULL, NULL, NULL );
+   }
+
+   return GL_TRUE;
+}
+
+
+/*
+ * Enable this to trace calls to various Glide functions.
+ */
+/*#define DEBUG_TRAP*/
+#ifdef DEBUG_TRAP
+static void (*real_grDrawTriangle)( const void *a, const void *b, const void *c );
+static void (*real_grDrawPoint)( const void *a );
+static void (*real_grDrawVertexArray)(FxU32 mode, FxU32 Count, void *pointers);
+static void (*real_grDrawVertexArrayContiguous)(FxU32 mode, FxU32 Count,
+                                       void *pointers, FxU32 stride);
+static void (*real_grClipWindow)( FxU32 minx, FxU32 miny, FxU32 maxx, FxU32 maxy );
+
+static void (*real_grVertexLayout)(FxU32 param, FxI32 offset, FxU32 mode);
+static void (*real_grGlideGetVertexLayout)( void *layout );
+static void (*real_grGlideSetVertexLayout)( const void *layout );
+
+static void (*real_grTexDownloadMipMapLevel)( GrChipID_t        tmu,
+                                     FxU32             startAddress,
+                                     GrLOD_t           thisLod,
+                                     GrLOD_t           largeLod,
+                                     GrAspectRatio_t   aspectRatio,
+                                     GrTextureFormat_t format,
+                                     FxU32             evenOdd,
+                                              void              *data );
+
+
+static void debug_grDrawTriangle( const void *a, const void *b, const void *c )
+{
+   printf("%s\n", __FUNCTION__);
+   (*real_grDrawTriangle)(a, b, c);
+}
+
+static void debug_grDrawPoint( const void *a )
+{
+   const float *f = (const float *) a;
+   printf("%s %g %g\n", __FUNCTION__, f[0], f[1]);
+   (*real_grDrawPoint)(a);
+}
+
+static void debug_grDrawVertexArray(FxU32 mode, FxU32 Count, void *pointers)
+{
+   printf("%s count=%d\n", __FUNCTION__, (int) Count);
+   (*real_grDrawVertexArray)(mode, Count, pointers);
+}
+
+static void debug_grDrawVertexArrayContiguous(FxU32 mode, FxU32 Count,
+                                       void *pointers, FxU32 stride)
+{
+   printf("%s mode=0x%x count=%d\n", __FUNCTION__, (int) mode, (int) Count);
+   (*real_grDrawVertexArrayContiguous)(mode, Count, pointers, stride);
+}
+
+static void debug_grClipWindow( FxU32 minx, FxU32 miny, FxU32 maxx, FxU32 maxy )
+{
+   printf("%s %d,%d .. %d,%d\n", __FUNCTION__,
+          (int) minx, (int) miny, (int) maxx, (int) maxy);
+   (*real_grClipWindow)(minx, miny, maxx, maxy);
+}
+
+static void debug_grVertexLayout(FxU32 param, FxI32 offset, FxU32 mode)
+{
+   (*real_grVertexLayout)(param, offset, mode);
+}
+
+static void debug_grGlideGetVertexLayout( void *layout )
+{
+   (*real_grGlideGetVertexLayout)(layout);
+}
+
+static void debug_grGlideSetVertexLayout( const void *layout )
+{
+   (*real_grGlideSetVertexLayout)(layout);
+}
+
+static void debug_grTexDownloadMipMapLevel( GrChipID_t        tmu,
+                                     FxU32             startAddress,
+                                     GrLOD_t           thisLod,
+                                     GrLOD_t           largeLod,
+                                     GrAspectRatio_t   aspectRatio,
+                                     GrTextureFormat_t format,
+                                     FxU32             evenOdd,
+                                     void              *data )
+{
+   (*real_grTexDownloadMipMapLevel)(tmu, startAddress, thisLod, largeLod,
+                                    aspectRatio, format, evenOdd, data);
+}
+
+#endif
+
+
+/*
+ * Examine the context's deviceID to determine what kind of 3dfx hardware
+ * is installed.  dlopen() the appropriate Glide library and initialize
+ * this context's Glide function pointers.
+ * Return:  true/false = success/failure
+ */
+GLboolean tdfxInitGlide(tdfxContextPtr tmesa)
+{
+   static const char *defaultGlide = "libglide3.so";
+   const char *libName;
+   void *libHandle;
+
+   /*
+    * XXX this code which selects a Glide library filename given the
+    * deviceID may need to be cleaned up a bit.
+    * Non-Linux systems may have different filenames, for example.
+    */
+   switch (tmesa->fxScreen->deviceID) {
+   case PCI_CHIP_BANSHEE:
+   case PCI_CHIP_VOODOO3:
+      libName = "libglide3-v3.so";
+      break;
+   case PCI_CHIP_VOODOO5:   /* same as PCI_CHIP_VOODOO4 */
+      libName = "libglide3-v5.so";
+      break;
+   default:
+      {
+         __driUtilMessage("unrecognized 3dfx deviceID: 0x%x",
+                 tmesa->fxScreen->deviceID);
+      }
+      return GL_FALSE;
+   }
+
+   libHandle = dlopen(libName, RTLD_NOW);
+   if (!libHandle) {
+      /* The device-specific Glide library filename didn't work, try the
+       * old, generic libglide3.so library.
+       */
+      libHandle = dlopen(defaultGlide, RTLD_NOW); 
+      if (!libHandle) {
+         __driUtilMessage(
+            "can't find Glide library, dlopen(%s) and dlopen(%s) both failed.",
+            libName, defaultGlide);
+         __driUtilMessage("dlerror() message: %s", dlerror());
+         return GL_FALSE;
+      }
+      libName = defaultGlide;
+   }
+
+   {
+      const char *env = getenv("LIBGL_DEBUG");
+      if (env && strstr(env, "verbose")) {
+         fprintf(stderr, "libGL: using Glide library %s\n", libName);
+      }
+   }         
+
+#define GET_FUNCTION(PTR, NAME)						\
+   tmesa->Glide.PTR = dlsym(libHandle, NAME);				\
+   if (!tmesa->Glide.PTR) {						\
+      __driUtilMessage("couldn't find Glide function %s in %s.",	\
+              NAME, libName);						\
+   }
+
+   GET_FUNCTION(grDrawPoint, "grDrawPoint");
+   GET_FUNCTION(grDrawLine, "grDrawLine");
+   GET_FUNCTION(grDrawTriangle, "grDrawTriangle");
+   GET_FUNCTION(grVertexLayout, "grVertexLayout");
+   GET_FUNCTION(grDrawVertexArray, "grDrawVertexArray");
+   GET_FUNCTION(grDrawVertexArrayContiguous, "grDrawVertexArrayContiguous");
+   GET_FUNCTION(grBufferClear, "grBufferClear");
+   /*GET_FUNCTION(grBufferSwap, "grBufferSwap");*/
+   GET_FUNCTION(grRenderBuffer, "grRenderBuffer");
+   GET_FUNCTION(grErrorSetCallback, "grErrorSetCallback");
+   GET_FUNCTION(grFinish, "grFinish");
+   GET_FUNCTION(grFlush, "grFlush");
+   GET_FUNCTION(grSstWinOpen, "grSstWinOpen");
+   GET_FUNCTION(grSstWinClose, "grSstWinClose");
+#if 0
+   /* Not in V3 lib, and not used anyway. */
+   GET_FUNCTION(grSetNumPendingBuffers, "grSetNumPendingBuffers");
+#endif
+   GET_FUNCTION(grSelectContext, "grSelectContext");
+   GET_FUNCTION(grSstOrigin, "grSstOrigin");
+   GET_FUNCTION(grSstSelect, "grSstSelect");
+   GET_FUNCTION(grAlphaBlendFunction, "grAlphaBlendFunction");
+   GET_FUNCTION(grAlphaCombine, "grAlphaCombine");
+   GET_FUNCTION(grAlphaControlsITRGBLighting, "grAlphaControlsITRGBLighting");
+   GET_FUNCTION(grAlphaTestFunction, "grAlphaTestFunction");
+   GET_FUNCTION(grAlphaTestReferenceValue, "grAlphaTestReferenceValue");
+   GET_FUNCTION(grChromakeyMode, "grChromakeyMode");
+   GET_FUNCTION(grChromakeyValue, "grChromakeyValue");
+   GET_FUNCTION(grClipWindow, "grClipWindow");
+   GET_FUNCTION(grColorCombine, "grColorCombine");
+   GET_FUNCTION(grColorMask, "grColorMask");
+   GET_FUNCTION(grCullMode, "grCullMode");
+   GET_FUNCTION(grConstantColorValue, "grConstantColorValue");
+   GET_FUNCTION(grDepthBiasLevel, "grDepthBiasLevel");
+   GET_FUNCTION(grDepthBufferFunction, "grDepthBufferFunction");
+   GET_FUNCTION(grDepthBufferMode, "grDepthBufferMode");
+   GET_FUNCTION(grDepthMask, "grDepthMask");
+   GET_FUNCTION(grDisableAllEffects, "grDisableAllEffects");
+   GET_FUNCTION(grDitherMode, "grDitherMode");
+   GET_FUNCTION(grFogColorValue, "grFogColorValue");
+   GET_FUNCTION(grFogMode, "grFogMode");
+   GET_FUNCTION(grFogTable, "grFogTable");
+   GET_FUNCTION(grLoadGammaTable, "grLoadGammaTable");
+   GET_FUNCTION(grSplash, "grSplash");
+   GET_FUNCTION(grGet, "grGet");
+   GET_FUNCTION(grGetString, "grGetString");
+   GET_FUNCTION(grQueryResolutions, "grQueryResolutions");
+   GET_FUNCTION(grReset, "grReset");
+   GET_FUNCTION(grGetProcAddress, "grGetProcAddress");
+   GET_FUNCTION(grEnable, "grEnable");
+   GET_FUNCTION(grDisable, "grDisable");
+   GET_FUNCTION(grCoordinateSpace, "grCoordinateSpace");
+   GET_FUNCTION(grDepthRange, "grDepthRange");
+   GET_FUNCTION(grStippleMode, "grStippleMode");
+   GET_FUNCTION(grStipplePattern, "grStipplePattern");
+   GET_FUNCTION(grViewport, "grViewport");
+   GET_FUNCTION(grTexCalcMemRequired, "grTexCalcMemRequired");
+   GET_FUNCTION(grTexTextureMemRequired, "grTexTextureMemRequired");
+   GET_FUNCTION(grTexMinAddress, "grTexMinAddress");
+   GET_FUNCTION(grTexMaxAddress, "grTexMaxAddress");
+   GET_FUNCTION(grTexNCCTable, "grTexNCCTable");
+   GET_FUNCTION(grTexSource, "grTexSource");
+   GET_FUNCTION(grTexClampMode, "grTexClampMode");
+   GET_FUNCTION(grTexCombine, "grTexCombine");
+   GET_FUNCTION(grTexDetailControl, "grTexDetailControl");
+   GET_FUNCTION(grTexFilterMode, "grTexFilterMode");
+   GET_FUNCTION(grTexLodBiasValue, "grTexLodBiasValue");
+   GET_FUNCTION(grTexDownloadMipMap, "grTexDownloadMipMap");
+   GET_FUNCTION(grTexDownloadMipMapLevel, "grTexDownloadMipMapLevel");
+   GET_FUNCTION(grTexDownloadMipMapLevelPartial, "grTexDownloadMipMapLevelPartial");
+   GET_FUNCTION(grTexDownloadTable, "grTexDownloadTable");
+   GET_FUNCTION(grTexDownloadTablePartial, "grTexDownloadTablePartial");
+   GET_FUNCTION(grTexMipMapMode, "grTexMipMapMode");
+   GET_FUNCTION(grTexMultibase, "grTexMultibase");
+   GET_FUNCTION(grTexMultibaseAddress, "grTexMultibaseAddress");
+   GET_FUNCTION(grLfbLock, "grLfbLock");
+   GET_FUNCTION(grLfbUnlock, "grLfbUnlock");
+   GET_FUNCTION(grLfbConstantAlpha, "grLfbConstantAlpha");
+   GET_FUNCTION(grLfbConstantDepth, "grLfbConstantDepth");
+   GET_FUNCTION(grLfbWriteColorSwizzle, "grLfbWriteColorSwizzle");
+   GET_FUNCTION(grLfbWriteColorFormat, "grLfbWriteColorFormat");
+   GET_FUNCTION(grLfbWriteRegion, "grLfbWriteRegion");
+   GET_FUNCTION(grLfbReadRegion, "grLfbReadRegion");
+   GET_FUNCTION(grGlideInit, "grGlideInit");
+   GET_FUNCTION(grGlideShutdown, "grGlideShutdown");
+   GET_FUNCTION(grGlideGetState, "grGlideGetState");
+   GET_FUNCTION(grGlideSetState, "grGlideSetState");
+   GET_FUNCTION(grGlideGetVertexLayout, "grGlideGetVertexLayout");
+   GET_FUNCTION(grGlideSetVertexLayout, "grGlideSetVertexLayout");
+
+   /* Glide utility functions */
+   GET_FUNCTION(guFogGenerateExp, "guFogGenerateExp");
+   GET_FUNCTION(guFogGenerateExp2, "guFogGenerateExp2");
+   GET_FUNCTION(guFogGenerateLinear, "guFogGenerateLinear");
+
+   /* DRI functions */
+   GET_FUNCTION(grDRIOpen, "grDRIOpen");
+   GET_FUNCTION(grDRIPosition, "grDRIPosition");
+   /*GET_FUNCTION(grDRILostContext, "grDRILostContext");*/
+   GET_FUNCTION(grDRIImportFifo, "grDRIImportFifo");
+   GET_FUNCTION(grDRIInvalidateAll, "grDRIInvalidateAll");
+   GET_FUNCTION(grDRIResetSAREA, "grDRIResetSAREA");
+   GET_FUNCTION(grDRIBufferSwap, "grDRIBufferSwap");
+
+   /*
+    * Extension functions:
+    * Just use dlysm() because we want a NULL pointer if the function is
+    * not found.
+    */
+   /* PIXEXT extension */
+   tmesa->Glide.grStencilFunc = dlsym(libHandle, "grStencilFunc");
+   tmesa->Glide.grStencilMask = dlsym(libHandle, "grStencilMask");
+   tmesa->Glide.grStencilOp = dlsym(libHandle, "grStencilOp");
+   tmesa->Glide.grBufferClearExt = dlsym(libHandle, "grBufferClearExt");
+   tmesa->Glide.grColorMaskExt = dlsym(libHandle, "grColorMaskExt");
+   /* COMBINE extension */
+   tmesa->Glide.grColorCombineExt = dlsym(libHandle, "grColorCombineExt");
+   tmesa->Glide.grTexColorCombineExt = dlsym(libHandle, "grTexColorCombineExt");
+   tmesa->Glide.grAlphaCombineExt = dlsym(libHandle, "grAlphaCombineExt");
+   tmesa->Glide.grTexAlphaCombineExt = dlsym(libHandle, "grTexAlphaCombineExt");
+   tmesa->Glide.grAlphaBlendFunctionExt = dlsym(libHandle, "grAlphaBlendFunctionExt");
+   tmesa->Glide.grConstantColorValueExt = dlsym(libHandle, "grConstantColorValueExt");
+   /* Texus 2 */
+   tmesa->Glide.txImgQuantize = dlsym(libHandle, "txImgQuantize");
+   tmesa->Glide.txImgDequantizeFXT1 = dlsym(libHandle, "_txImgDequantizeFXT1");
+   tmesa->Glide.txErrorSetCallback = dlsym(libHandle, "txErrorSetCallback");
+   
+#ifdef DEBUG_TRAP
+   /* wrap the drawing functions so we can trap them */
+   real_grDrawTriangle = tmesa->Glide.grDrawTriangle;
+   tmesa->Glide.grDrawTriangle = debug_grDrawTriangle;
+
+   real_grDrawPoint = tmesa->Glide.grDrawPoint;
+   tmesa->Glide.grDrawPoint = debug_grDrawPoint;
+
+   real_grDrawVertexArray = tmesa->Glide.grDrawVertexArray;
+   tmesa->Glide.grDrawVertexArray = debug_grDrawVertexArray;
+
+   real_grDrawVertexArrayContiguous = tmesa->Glide.grDrawVertexArrayContiguous;
+   tmesa->Glide.grDrawVertexArrayContiguous = debug_grDrawVertexArrayContiguous;
+
+   real_grClipWindow = tmesa->Glide.grClipWindow;
+   tmesa->Glide.grClipWindow = debug_grClipWindow;
+
+   real_grVertexLayout = tmesa->Glide.grVertexLayout;
+   tmesa->Glide.grVertexLayout = debug_grVertexLayout;
+
+   real_grGlideGetVertexLayout = tmesa->Glide.grGlideGetVertexLayout;
+   tmesa->Glide.grGlideGetVertexLayout = debug_grGlideGetVertexLayout;
+
+   real_grGlideSetVertexLayout = tmesa->Glide.grGlideSetVertexLayout;
+   tmesa->Glide.grGlideSetVertexLayout = debug_grGlideSetVertexLayout;
+
+   real_grTexDownloadMipMapLevel = tmesa->Glide.grTexDownloadMipMapLevel;
+   tmesa->Glide.grTexDownloadMipMapLevel = debug_grTexDownloadMipMapLevel;
+
+#endif
+   return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.h b/src/mesa/drivers/dri/tdfx/tdfx_context.h
new file mode 100644
index 0000000000..29b0876f9f
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_context.h
@@ -0,0 +1,1017 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * New fixes:
+ *	Daniel Borca <dborca@users.sourceforge.net>, 19 Jul 2004
+ *
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_CONTEXT_H__
+#define __TDFX_CONTEXT_H__
+
+#include <sys/time.h>
+#include "dri_util.h"
+#ifdef XFree86Server
+#include "GL/xf86glx.h"
+#else
+#include "main/glheader.h"
+#endif
+#if defined(__linux__)
+#include <signal.h>
+#endif
+
+#include "drm.h"
+#include "drm_sarea.h"
+#include "tdfx_glide.h"
+#include "xmlconfig.h"
+
+#include "main/clip.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/matrix.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tdfx_screen.h"
+
+
+
+
+#define TDFX_TMU0		GR_TMU0
+#define TDFX_TMU1		GR_TMU1
+#define TDFX_TMU_SPLIT		98
+#define TDFX_TMU_BOTH		99
+#define TDFX_TMU_NONE		100
+
+
+
+/* Flags for fxMesa->new_state
+ */
+#define TDFX_NEW_COLOR		0x0001
+#define TDFX_NEW_ALPHA		0x0002
+#define TDFX_NEW_DEPTH		0x0004
+#define TDFX_NEW_FOG		0x0008
+#define TDFX_NEW_STENCIL	0x0010
+#define TDFX_NEW_CLIP		0x0020
+#define TDFX_NEW_VIEWPORT	0x0040
+#define TDFX_NEW_CULL		0x0080
+#define TDFX_NEW_GLIDE		0x0100
+#define TDFX_NEW_TEXTURE	0x0200
+#define TDFX_NEW_CONTEXT	0x0400
+#define TDFX_NEW_LINE		0x0800
+#define TDFX_NEW_RENDER         0x1000
+#define TDFX_NEW_STIPPLE	0x2000
+#define TDFX_NEW_TEXTURE_BIND   0x4000 /* experimental */
+
+
+/* Flags for fxMesa->dirty
+ */
+#define TDFX_UPLOAD_COLOR_COMBINE	0x00000001
+#define TDFX_UPLOAD_ALPHA_COMBINE	0x00000002
+#define TDFX_UPLOAD_RENDER_BUFFER	0x00000004
+#define TDFX_UPLOAD_ALPHA_TEST		0x00000008
+#define TDFX_UPLOAD_ALPHA_REF		0x00000010
+#define TDFX_UPLOAD_BLEND_FUNC		0x00000020
+#define TDFX_UPLOAD_DEPTH_MODE		0x00000040
+#define TDFX_UPLOAD_DEPTH_BIAS		0x00000080
+#define TDFX_UPLOAD_DEPTH_FUNC		0x00000100
+#define TDFX_UPLOAD_DEPTH_MASK		0x00000200
+#define TDFX_UPLOAD_FOG_MODE		0x00000400
+#define TDFX_UPLOAD_FOG_COLOR		0x00000800
+#define TDFX_UPLOAD_FOG_TABLE		0x00001000
+#define TDFX_UPLOAD_CONSTANT_COLOR	0x00002000
+
+#define TDFX_UPLOAD_CLIP		0x00002000
+#define TDFX_UPLOAD_CULL		0x00004000
+#define TDFX_UPLOAD_VERTEX_LAYOUT	0x00008000
+#define TDFX_UPLOAD_COLOR_MASK		0x00010000
+#define TDFX_UPLOAD_DITHER		0x00040000
+#define TDFX_UPLOAD_STENCIL		0x00080000
+
+#define TDFX_UPLOAD_TEXTURE_SOURCE	0x00100000
+#define TDFX_UPLOAD_TEXTURE_PARAMS	0x00200000
+#define TDFX_UPLOAD_TEXTURE_PALETTE	0x00400000
+#define TDFX_UPLOAD_TEXTURE_ENV		0x00800000
+#define TDFX_UPLOAD_TEXTURE_IMAGES	0x01000000
+
+#define TDFX_UPLOAD_LINE		0x02000000
+
+#define TDFX_UPLOAD_STIPPLE		0x04000000
+
+/* Flags for software fallback cases */
+/* See correponding strings in tdfx_tris.c */
+#define TDFX_FALLBACK_TEXTURE_MAP	0x0001
+#define TDFX_FALLBACK_DRAW_BUFFER	0x0002
+#define TDFX_FALLBACK_SPECULAR		0x0004
+#define TDFX_FALLBACK_STENCIL		0x0008
+#define TDFX_FALLBACK_RENDER_MODE	0x0010
+#define TDFX_FALLBACK_LOGICOP		0x0020
+#define TDFX_FALLBACK_TEXTURE_ENV	0x0040
+#define TDFX_FALLBACK_TEXTURE_BORDER	0x0080
+#define TDFX_FALLBACK_COLORMASK		0x0100
+#define TDFX_FALLBACK_BLEND		0x0200
+#define TDFX_FALLBACK_LINE_STIPPLE	0x0400
+#define TDFX_FALLBACK_DISABLE		0x0800
+
+/* Different Glide vertex layouts
+ */
+#define TDFX_LAYOUT_TINY	0
+#define TDFX_LAYOUT_NOTEX	1
+#define TDFX_LAYOUT_SINGLE	2
+#define TDFX_LAYOUT_MULTI	3
+#define TDFX_LAYOUT_PROJ1	4
+#define TDFX_LAYOUT_PROJ2	5
+#define TDFX_NUM_LAYOUTS	6
+
+#define TDFX_XY_OFFSET		0
+#define TDFX_Z_OFFSET		8
+#define TDFX_Q_OFFSET		12
+#define TDFX_ARGB_OFFSET	16
+#define TDFX_FOG_OFFSET         20
+#define TDFX_ST0_OFFSET		24
+#define TDFX_ST1_OFFSET		32
+#define TDFX_Q0_OFFSET		40
+#define TDFX_Q1_OFFSET		44
+
+
+/* Flags for buffer clears
+ */
+#define TDFX_FRONT		0x1
+#define TDFX_BACK		0x2
+#define TDFX_DEPTH		0x4
+#define TDFX_STENCIL		0x8
+
+/*
+ * Subpixel offsets to adjust Mesa's (true) window coordinates to
+ * Glide coordinates.  We need these to ensure precise rasterization.
+ * Otherwise, we'll fail a bunch of conformance tests.
+ */
+#define TRI_X_OFFSET    ( 0.0F)
+#define TRI_Y_OFFSET    ( 0.0F)
+#define LINE_X_OFFSET   ( 0.0F)
+#define LINE_Y_OFFSET   ( 0.125F)
+#define PNT_X_OFFSET    ( 0.375F)
+#define PNT_Y_OFFSET    ( 0.375F)
+
+
+#define TDFX_DEPTH_BIAS_SCALE	128
+
+/* Including xf86PciInfo.h causes a bunch of errors
+ */
+#ifndef PCI_CHIP_BANSHEE
+#define PCI_CHIP_BANSHEE	0x0003
+#define PCI_CHIP_VOODOO3	0x0005
+#define PCI_CHIP_VOODOO4	0x0009
+#define PCI_CHIP_VOODOO5	0x0009
+#endif
+
+#define TDFX_IS_BANSHEE( fxMesa ) \
+		( fxMesa->fxScreen->deviceID == PCI_CHIP_BANSHEE )
+#define TDFX_IS_VOODOO3( fxMesa ) \
+		( fxMesa->fxScreen->deviceID == PCI_CHIP_VOODOO3 )
+#define TDFX_IS_VOODOO4( fxMesa ) \
+		( fxMesa->fxScreen->deviceID == PCI_CHIP_VOODOO4 )
+#define TDFX_IS_VOODOO5( fxMesa ) \
+		( fxMesa->fxScreen->deviceID == PCI_CHIP_VOODOO5 )
+#define TDFX_IS_NAPALM( fxMesa ) \
+                ( (fxMesa->fxScreen->deviceID == PCI_CHIP_VOODOO4) || \
+                  (fxMesa->fxScreen->deviceID == PCI_CHIP_VOODOO5) )
+
+
+#define PACK_BGRA32(R, G, B, A)  \
+    ( (((GLuint) (R)) << 16) | \
+      (((GLuint) (G)) <<  8) | \
+      (((GLuint) (B))      ) | \
+      (((GLuint) (A)) << 24) )
+
+#define PACK_RGBA32(R, G, B, A)  \
+    ( (((GLuint) (R))      ) | \
+      (((GLuint) (G)) <<  8) | \
+      (((GLuint) (B)) << 16) | \
+      (((GLuint) (A)) << 24) )
+
+/*
+ * The first two macros are to pack 8 bit color
+ * channel values into a 565 format.
+ */
+#define PACK_RGB16(R, G, B)         \
+    ((((GLuint) (R) & 0xF8) << 8) | \
+     (((GLuint) (G) & 0xFC) << 3) | \
+      (((GLuint) (B) & 0xFF)         >> 3))
+#define PACK_BGR16(R, G, B)         \
+    ((((GLuint) (B) & 0xF8) << 8) | \
+     (((GLuint) (G) & 0xFC) << 3) | \
+     (((GLuint) (R) & 0xFF) >> 3))
+/*
+ * The second two macros pack 8 bit color channel values
+ * into 1555 values.
+ */
+#define PACK_RGBA16(R, G, B, A)       \
+    (((((GLuint) (A) & 0xFF) > 0) << 15)| \
+     (((GLuint) (R)  & 0xF8)      << 7) | \
+     (((GLuint) (G)  & 0xF8)      << 2) | \
+     (((GLuint) (B)  & 0xF8)      >> 3))
+#define PACK_BGRA16(R, G, B, A) \
+    (((((GLuint) (A) & 0xFF) > 0) << 15)| \
+      (((GLuint) (B) & 0xF8)     << 7)  | \
+      (((GLuint) (G) & 0xF8)     << 2)  | \
+      (((GLuint) (R) & 0xF8)     >> 3))
+
+/* Used in calls to grColorMaskv()...
+ */
+extern const GLboolean false4[4];
+extern const GLboolean true4[4];
+
+
+typedef struct tdfx_context tdfxContextRec;
+typedef struct tdfx_context *tdfxContextPtr;
+
+
+typedef struct {
+   volatile int fifoPtr;
+   volatile int fifoRead;
+   volatile int fifoOwner;
+   volatile int ctxOwner;
+   volatile int texOwner;
+}
+TDFXSAREAPriv;
+
+
+typedef struct {
+   GLuint swapBuffer;
+   GLuint reqTexUpload;
+   GLuint texUpload;
+   GLuint memTexUpload;
+   GLuint texSwaps;
+} tdfxStats;
+
+
+
+/*
+ *  Memory range from startAddr to endAddr-1
+ */
+typedef struct mem_range {
+   struct mem_range *next;
+   FxU32 startAddr, endAddr;
+}
+tdfxMemRange;
+
+
+typedef struct {
+    GLsizei		width, height;	/* image size */
+    GLint		wScale, hScale; /* scale factors */
+    GrTextureFormat_t	glideFormat;	/* Glide image format */
+}
+tdfxMipMapLevel;
+
+
+#define TDFX_NUM_TMU		2
+
+
+typedef struct tdfxTexInfo_t
+{
+   GLboolean isInTM;
+   GLboolean reloadImages;  /* if true, resend images to Glide */
+   GLuint lastTimeUsed;
+   FxU32 whichTMU;
+
+   GrTexInfo info;
+   GrAspectRatio_t aspectRatio;
+   tdfxMemRange *tm[TDFX_NUM_TMU];
+
+   GLint minLevel, maxLevel;
+   GrTextureFilterMode_t minFilt;
+   GrTextureFilterMode_t magFilt;
+   GrTextureClampMode_t sClamp;
+   GrTextureClampMode_t tClamp;
+   FxBool LODblend;
+   GrMipMapMode_t mmMode;
+
+   GLfloat sScale, tScale;  /* texcoord scale factor */
+
+   GrTexTable_t paltype;
+   GuTexPalette palette;
+
+   GLboolean padded;
+}
+tdfxTexInfo;
+
+
+#define TDFX_TEXTURE_DATA(mesaObj) ((tdfxTexInfo *)((mesaObj)->DriverData))
+
+#define TDFX_TEXIMAGE_DATA(mesaImg) ((tdfxMipMapLevel *)((mesaImg)->DriverData))
+
+
+
+/*
+ * This is state which may be shared by several tdfx contexts.
+ * It hangs off of Mesa's gl_shared_state object (ctx->Shared->DriverData).
+ */
+struct tdfxSharedState {
+   GLboolean umaTexMemory;
+   GLuint totalTexMem[TDFX_NUM_TMU]; /* constant */
+   GLuint freeTexMem[TDFX_NUM_TMU]; /* changes as we go */
+   tdfxMemRange *tmPool;
+   tdfxMemRange *tmFree[TDFX_NUM_TMU];
+};
+
+
+
+/* ================================================================
+ * The vertex structures.
+ */
+/* The size of this union is not of relevence:
+ */
+typedef struct tdfx_vertex_t {
+   GLfloat x, y, z;			/* Coordinates in screen space */
+   GLfloat rhw;				/* Reciprocal homogeneous w */
+   GLubyte color[4];		/* Diffuse color */
+   GLfloat fog;
+   GLfloat tu0, tv0;		/* Texture 0 coordinates */
+   GLfloat tu1, tv1;		/* Texture 1 coordinates */
+   GLfloat tq0, tq1;		/* Texture 0/1 q coords */
+   unsigned char pspec[4];	/* B, G, R, A [0..255] */
+   float psize;		/* point size */
+   long pad[16 - 14];	/* ensure 64b structure */
+} tdfxVertex, *tdfxVertexPtr;
+
+
+/* ================================================================
+ *
+ * We want to keep a mirror of the Glide function call parameters so we
+ * can avoid updating our state too often.
+ *
+ * Each of these broad groups will typically have a new state flag
+ * associated with it, and will be updated together.  The individual
+ * Glide function calls each have a dirty flag and will only be called
+ * when absolutely necessary.
+ */
+
+/* for grTexSource() */
+struct tdfx_texsource {
+   FxU32 StartAddress;
+   FxU32 EvenOdd;
+   GrTexInfo *Info;
+};
+
+/* Texture object params */
+struct tdfx_texparams {
+   GrTextureClampMode_t sClamp;
+   GrTextureClampMode_t tClamp;
+   GrTextureFilterMode_t minFilt;
+   GrTextureFilterMode_t magFilt;
+   GrMipMapMode_t mmMode;
+   FxBool LODblend;
+   GLfloat LodBias;
+};
+
+/* for grTexDownloadTable() texture palettes */
+struct tdfx_texpalette {
+   GrTexTable_t Type;
+   void *Data;
+};
+
+/* for Voodoo3/Banshee's grColorCombine() and grAlphaCombine() */
+struct tdfx_combine {
+   GrCombineFunction_t Function;	/* Combine function */
+   GrCombineFactor_t Factor;		/* Combine scale factor */
+   GrCombineLocal_t Local;		/* Local combine source */
+   GrCombineOther_t Other;		/* Other combine source */
+   FxBool Invert;			/* Combine result inversion flag */
+};
+
+/* for Voodoo3's grTexCombine() */
+struct tdfx_texcombine {
+   GrCombineFunction_t FunctionRGB;
+   GrCombineFactor_t FactorRGB;
+   GrCombineFunction_t FunctionAlpha;
+   GrCombineFactor_t FactorAlpha;
+   FxBool InvertRGB;
+   FxBool InvertAlpha;
+};
+
+
+/* for Voodoo5's grColorCombineExt() */
+struct tdfx_combine_color_ext {
+   GrCCUColor_t SourceA;
+   GrCombineMode_t ModeA;
+   GrCCUColor_t SourceB;
+   GrCombineMode_t ModeB;
+   GrCCUColor_t SourceC;
+   FxBool InvertC;
+   GrCCUColor_t SourceD;
+   FxBool InvertD;
+   FxU32 Shift;
+   FxBool Invert;
+};
+
+/* for Voodoo5's grAlphaCombineExt() */
+struct tdfx_combine_alpha_ext {
+   GrACUColor_t SourceA;
+   GrCombineMode_t ModeA;
+   GrACUColor_t SourceB;
+   GrCombineMode_t ModeB;
+   GrACUColor_t SourceC;
+   FxBool InvertC;
+   GrACUColor_t SourceD;
+   FxBool InvertD;
+   FxU32 Shift;
+   FxBool Invert;
+};
+
+/* for Voodoo5's grTexColorCombineExt() */
+struct tdfx_color_texenv {
+   GrTCCUColor_t SourceA;
+   GrCombineMode_t ModeA;
+   GrTCCUColor_t SourceB;
+   GrCombineMode_t ModeB;
+   GrTCCUColor_t SourceC;
+   FxBool InvertC;
+   GrTCCUColor_t SourceD;
+   FxBool InvertD;
+   FxU32 Shift;
+   FxBool Invert;
+};
+
+/* for Voodoo5's grTexAlphaCombineExt() */
+struct tdfx_alpha_texenv {
+   GrTACUColor_t SourceA;
+   GrCombineMode_t ModeA;
+   GrTACUColor_t SourceB;
+   GrCombineMode_t ModeB;
+   GrTACUColor_t SourceC;
+   FxBool InvertC;
+   GrTCCUColor_t SourceD;
+   FxBool InvertD;
+   FxU32 Shift;
+   FxBool Invert;
+};
+
+/* Voodoo5's texture combine environment */
+struct tdfx_texcombine_ext {
+   struct tdfx_alpha_texenv Alpha;
+   struct tdfx_color_texenv Color;
+   GrColor_t EnvColor;
+};
+
+/* Used to track changes between Glide's state and Mesa's */
+struct tdfx_texstate {
+   GLuint Enabled[2];              /* values ala ctx->Texture.Unit[i]._ReallyEnabled */
+   GLenum EnvMode[TDFX_NUM_TMU];   /* index is Glide index, not OpenGL */
+   GLenum TexFormat[TDFX_NUM_TMU]; /* index is Glide index, not OpenGL */
+};
+
+struct tdfx_color {
+   GrColor_t ClearColor;		/* Buffer clear color value */
+   GrAlpha_t ClearAlpha;		/* Buffer clear alpha value */
+   FxBool ColorMask[4];			/* Per-channel write enable flags */
+
+   GrColor_t MonoColor;			/* Constant color value */
+
+   /* Alpha testing */
+   GrCmpFnc_t AlphaFunc;		/* Alpha test function */
+   GrAlpha_t AlphaRef;			/* Alpha ref value in range [0,255] */
+
+   /* Blending */
+   GrAlphaBlendFnc_t BlendSrcRGB;	/* Blend source RGB factor */
+   GrAlphaBlendFnc_t BlendDstRGB;	/* Blend destination RGB factor */
+   GrAlphaBlendOp_t BlendEqRGB;		/* Blend source RGB op */
+   GrAlphaBlendFnc_t BlendSrcA;		/* Blend source alpha factor */
+   GrAlphaBlendFnc_t BlendDstA;		/* Blend destination alpha factor */
+   GrAlphaBlendOp_t BlendEqA;		/* Blend source alpha op */
+
+   GrDitherMode_t Dither;		/* Dither enable */
+};
+
+struct tdfx_depth {
+   GrDepthBufferMode_t Mode;		/* Fixed-point Z or floating-point W */
+   FxI32 Bias;				/* Polygon offset factor */
+   GrCmpFnc_t Func;			/* Depth test function */
+   FxU32 Clear;				/* Buffer clear value */
+   FxBool Mask;				/* Write enable flag */
+};
+
+struct tdfx_stipple {
+   GrStippleMode_t Mode;		/* Stipple enable/disable */
+   FxU32 Pattern;			/* 8x4 Stipple Pattern */
+};
+
+struct tdfx_fog {
+   GrFogMode_t Mode;			/* Glide fog mode */
+   GrColor_t Color;			/* Fog color value */
+   GLenum TableMode;			/* GL fog mode currently in table */
+   GrFog_t *Table;			/* Fog value table */
+   FxFloat Density;			/* Density >= 0 */
+   FxFloat Near;			/* Start distance in eye coords */
+   FxFloat Far;				/* End distance in eye coords */
+};
+
+struct tdfx_stencil {
+   GrCmpFnc_t Function;			/* Stencil function */
+   GrStencil_t RefValue;		/* Stencil reference value */
+   GrStencil_t ValueMask;		/* Value mask */
+   GrStencil_t WriteMask;		/* Write mask */
+   GrStencil_t FailFunc;		/* Stencil fail function */
+   GrStencil_t ZFailFunc;		/* Stencil pass, depth fail function */
+   GrStencil_t ZPassFunc;		/* Stencil pass, depth pass function */
+   GrStencil_t Clear;			/* Buffer clear value */
+};
+
+struct tdfx_scissor {
+   FxU32 minX, minY;			/* Lower left corner */
+   FxU32 maxX, maxY;			/* Upper right corner */
+};
+
+struct tdfx_viewport {
+   GrCoordinateSpaceMode_t Mode;	/* Coordinate space */
+   FxI32 X, Y;				/* Position */
+   FxI32 Width, Height;			/* Size */
+   FxFloat Near, Far;			/* Depth buffer range */
+};
+
+struct tdfx_glide {
+   void *State;				/* Mirror of internal Glide state */
+   GrContext_t Context;			/* Glide context identifier */
+   FxI32 Board;				/* Current graphics subsystem */
+   GrColorFormat_t ColorFormat;		/* Framebuffer format */
+   GrOriginLocation_t Origin;		/* Location of screen space origin */
+
+   FxBool Initialized;			/* Glide initialization done? */
+
+   FxI32 SwapInterval;			/* SwapBuffers interval */
+   FxI32 MaxPendingSwaps;		/* Maximum outstanding SwapBuffers */
+   FxI32 TextureAlign;
+
+   /* Extensions */
+   FxBool HaveCombineExt;		/* COMBINE */
+   FxBool HaveCommandTransportExt;	/* COMMAND_TRANSPORT */
+   FxBool HaveFogCoordExt;		/* FOGCOORD */
+   FxBool HavePixelExt;			/* PIXEXT */
+   FxBool HaveTextureBufferExt;		/* TEXTUREBUFFER */
+   FxBool HaveTexFmtExt;		/* TEXFMT */
+   FxBool HaveTexUMAExt;		/* TEXUMA */
+   FxBool HaveMirrorExt;		/* MIRROR */
+   FxBool HaveTexus2;			/* Texus 2 - FXT1 */
+
+   /* Glide library function pointers */
+   void (*grDrawPoint)( const void *pt );
+   void (*grDrawLine)( const void *v1, const void *v2 );
+   void (*grDrawTriangle)( const void *a, const void *b, const void *c );
+   void (*grVertexLayout)(FxU32 param, FxI32 offset, FxU32 mode);
+   void (*grDrawVertexArray)(FxU32 mode, FxU32 Count, void *pointers);
+   void (*grDrawVertexArrayContiguous)(FxU32 mode, FxU32 Count,
+                                       void *pointers, FxU32 stride);
+   void (*grBufferClear)( GrColor_t color, GrAlpha_t alpha, FxU32 depth );
+   void (*grBufferSwap)( FxU32 swap_interval );
+   void (*grRenderBuffer)( GrBuffer_t buffer );
+   void (*grErrorSetCallback)( GrErrorCallbackFnc_t fnc );
+   void (*grFinish)(void);
+   void (*grFlush)(void);
+   GrContext_t (*grSstWinOpen)(FxU32                hWnd,
+                               GrScreenResolution_t screen_resolution,
+                               GrScreenRefresh_t    refresh_rate,
+                               GrColorFormat_t      color_format,
+                               GrOriginLocation_t   origin_location,
+                               int                  nColBuffers,
+                               int                  nAuxBuffers);
+   void (*grSstWinClose)( GrContext_t context );
+/* Not used */
+#if 0
+   void (*grSetNumPendingBuffers)(FxI32 NumPendingBuffers);
+#endif
+   void (*grSelectContext)( GrContext_t context );
+   void (*grSstOrigin)(GrOriginLocation_t  origin);
+   void (*grSstSelect)( int which_sst );
+   void (*grAlphaBlendFunction)(GrAlphaBlendFnc_t rgb_sf,
+                                GrAlphaBlendFnc_t rgb_df,
+                                GrAlphaBlendFnc_t alpha_sf,
+                                GrAlphaBlendFnc_t alpha_df);
+   void (*grAlphaCombine)(GrCombineFunction_t function,
+                          GrCombineFactor_t factor,
+                          GrCombineLocal_t local, GrCombineOther_t other,
+                          FxBool invert);
+   void (*grAlphaControlsITRGBLighting)( FxBool enable );
+   void (*grAlphaTestFunction)( GrCmpFnc_t function );
+   void (*grAlphaTestReferenceValue)( GrAlpha_t value );
+   void (*grChromakeyMode)( GrChromakeyMode_t mode );
+   void (*grChromakeyValue)( GrColor_t value );
+   void (*grClipWindow)( FxU32 minx, FxU32 miny, FxU32 maxx, FxU32 maxy );
+   void (*grColorCombine)( GrCombineFunction_t function,
+                           GrCombineFactor_t factor,
+                           GrCombineLocal_t local,
+                           GrCombineOther_t other,
+                           FxBool invert );
+   void (*grColorMask)( FxBool rgb, FxBool a );
+   void (*grCullMode)( GrCullMode_t mode );
+   void (*grConstantColorValue)( GrColor_t value );
+   void (*grDepthBiasLevel)( FxI32 level );
+   void (*grDepthBufferFunction)( GrCmpFnc_t function );
+   void (*grDepthBufferMode)( GrDepthBufferMode_t mode );
+   void (*grDepthMask)( FxBool mask );
+   void (*grDisableAllEffects)( void );
+   void (*grDitherMode)( GrDitherMode_t mode );
+   void (*grFogColorValue)( GrColor_t fogcolor );
+   void (*grFogMode)( GrFogMode_t mode );
+   void (*grFogTable)( const GrFog_t ft[] );
+   void (*grLoadGammaTable)( FxU32 nentries, FxU32 *red, FxU32 *green, FxU32 *blue);
+   void (*grSplash)(float x, float y, float width, float height, FxU32 frame);
+   FxU32 (*grGet)( FxU32 pname, FxU32 plength, FxI32 *params );
+   const char * (*grGetString)( FxU32 pname );
+   FxI32 (*grQueryResolutions)( const GrResolution *resTemplate,
+                                GrResolution *output );
+   FxBool (*grReset)( FxU32 what );
+   GrProc (*grGetProcAddress)( char *procName );
+   void (*grEnable)( GrEnableMode_t mode );
+   void (*grDisable)( GrEnableMode_t mode );
+   void (*grCoordinateSpace)( GrCoordinateSpaceMode_t mode );
+   void (*grDepthRange)( FxFloat n, FxFloat f );
+   void (*grStippleMode)( GrStippleMode_t mode );
+   void (*grStipplePattern)( GrStipplePattern_t mode );
+   void (*grViewport)( FxI32 x, FxI32 y, FxI32 width, FxI32 height );
+   FxU32 (*grTexCalcMemRequired)(GrLOD_t lodmin, GrLOD_t lodmax,
+                                GrAspectRatio_t aspect, GrTextureFormat_t fmt);
+   FxU32 (*grTexTextureMemRequired)( FxU32 evenOdd, GrTexInfo *info );
+   FxU32 (*grTexMinAddress)( GrChipID_t tmu );
+   FxU32 (*grTexMaxAddress)( GrChipID_t tmu );
+   void (*grTexNCCTable)( GrNCCTable_t table );
+   void (*grTexSource)( GrChipID_t tmu, FxU32 startAddress,
+                        FxU32 evenOdd, GrTexInfo *info );
+   void (*grTexClampMode)( GrChipID_t tmu,
+                           GrTextureClampMode_t s_clampmode,
+                           GrTextureClampMode_t t_clampmode );
+   void (*grTexCombine)( GrChipID_t tmu,
+                         GrCombineFunction_t rgb_function,
+                         GrCombineFactor_t rgb_factor, 
+                         GrCombineFunction_t alpha_function,
+                         GrCombineFactor_t alpha_factor,
+                         FxBool rgb_invert,
+                         FxBool alpha_invert);
+   void (*grTexDetailControl)( GrChipID_t tmu, int lod_bias,
+                               FxU8 detail_scale, float detail_max );
+   void (*grTexFilterMode)( GrChipID_t tmu,
+                            GrTextureFilterMode_t minfilter_mode,
+                            GrTextureFilterMode_t magfilter_mode );
+   void (*grTexLodBiasValue)(GrChipID_t tmu, float bias );
+   void (*grTexDownloadMipMap)( GrChipID_t tmu, FxU32 startAddress,
+                                FxU32 evenOdd, GrTexInfo *info );
+   void (*grTexDownloadMipMapLevel)( GrChipID_t        tmu,
+                                     FxU32             startAddress,
+                                     GrLOD_t           thisLod,
+                                     GrLOD_t           largeLod,
+                                     GrAspectRatio_t   aspectRatio,
+                                     GrTextureFormat_t format,
+                                     FxU32             evenOdd,
+                                     void              *data );
+   FxBool (*grTexDownloadMipMapLevelPartial)( GrChipID_t        tmu,
+                                              FxU32             startAddress,
+                                              GrLOD_t           thisLod,
+                                              GrLOD_t           largeLod,
+                                              GrAspectRatio_t   aspectRatio,
+                                              GrTextureFormat_t format,
+                                              FxU32             evenOdd,
+                                              void              *data,
+                                              int               start,
+                                              int               end );
+   void (*grTexDownloadTable)( GrTexTable_t type, void *data );
+   void (*grTexDownloadTablePartial)( GrTexTable_t type, 
+                                      void *data, int start, int end );
+   void (*grTexMipMapMode)( GrChipID_t tmu, GrMipMapMode_t mode,
+                            FxBool lodBlend );
+   void (*grTexMultibase)( GrChipID_t tmu, FxBool enable );
+   void (*grTexMultibaseAddress)( GrChipID_t       tmu,
+                                  GrTexBaseRange_t range,
+                                  FxU32            startAddress,
+                                  FxU32            evenOdd,
+                                  GrTexInfo        *info );
+   FxBool (*grLfbLock)( GrLock_t type, GrBuffer_t buffer,
+                        GrLfbWriteMode_t writeMode,
+                        GrOriginLocation_t origin, FxBool pixelPipeline, 
+                        GrLfbInfo_t *info );
+   FxBool (*grLfbUnlock)( GrLock_t type, GrBuffer_t buffer );
+   void (*grLfbConstantAlpha)( GrAlpha_t alpha );
+   void (*grLfbConstantDepth)( FxU32 depth );
+   void (*grLfbWriteColorSwizzle)(FxBool swizzleBytes, FxBool swapWords);
+   void (*grLfbWriteColorFormat)(GrColorFormat_t colorFormat);
+   FxBool (*grLfbWriteRegion)( GrBuffer_t dst_buffer, 
+                               FxU32 dst_x, FxU32 dst_y, 
+                               GrLfbSrcFmt_t src_format, 
+                               FxU32 src_width, FxU32 src_height, 
+                               FxBool pixelPipeline,
+                               FxI32 src_stride, void *src_data );
+   FxBool (*grLfbReadRegion)( GrBuffer_t src_buffer,
+                              FxU32 src_x, FxU32 src_y,
+                              FxU32 src_width, FxU32 src_height,
+                              FxU32 dst_stride, void *dst_data );
+   void (*grGlideInit)( void );
+   void (*grGlideShutdown)( void );
+   void (*grGlideGetState)( void *state );
+   void (*grGlideSetState)( const void *state );
+   void (*grGlideGetVertexLayout)( void *layout );
+   void (*grGlideSetVertexLayout)( const void *layout );
+   /* Glide utility functions */
+   void (*guFogGenerateExp)( GrFog_t *fogtable, float density );
+   void (*guFogGenerateExp2)( GrFog_t *fogtable, float density );
+   void (*guFogGenerateLinear)(GrFog_t *fogtable, float nearZ, float farZ );
+   /* DRI functions */
+   void (*grDRIOpen)( char *pFB, char *pRegs, int deviceID,
+                      int width, int height,
+                      int mem, int cpp, int stride,
+                      int fifoOffset, int fifoSize,
+                      int fbOffset, int backOffset, int depthOffset,
+                      int textureOffset, int textureSize,
+                      volatile int *fifoPtr, volatile int *fifoRead );
+   void (*grDRIPosition)( int x, int y, int w, int h,
+                          int numClip, drm_clip_rect_t *pClip );
+   void (*grDRILostContext)( void );
+   void (*grDRIImportFifo)( int fifoPtr, int fifoRead );
+   void (*grDRIInvalidateAll)( void );
+   void (*grDRIResetSAREA)( void );
+   void (*grDRIBufferSwap)( FxU32 swapInterval );
+   /* Glide extensions */
+   /* PIXEXT extension */
+   void (*grStencilFunc)( GrCmpFnc_t func, GrStencil_t ref, GrStencil_t mask );
+   void (*grStencilMask)( GrStencil_t mask );
+   void (*grStencilOp)( GrStencilOp_t fail, GrStencilOp_t zfail,
+                        GrStencilOp_t zpass );
+   void (*grBufferClearExt)( GrColor_t color, GrAlpha_t alpha,
+                             FxU32 depth, GrStencil_t stencil );
+   void (*grColorMaskExt)( FxBool r, FxBool g, FxBool b, FxBool a );
+   /* COMBINE extension */
+   void (*grColorCombineExt)( GrCCUColor_t a, GrCombineMode_t a_mode,
+                              GrCCUColor_t b, GrCombineMode_t b_mode,
+                              GrCCUColor_t c, FxBool c_invert,
+                              GrCCUColor_t d, FxBool d_invert,
+                              FxU32 shift, FxBool invert );
+   void (*grTexColorCombineExt)( FxU32 tmu,
+                                 GrTCCUColor_t a, GrCombineMode_t a_mode,
+                                 GrTCCUColor_t b, GrCombineMode_t b_mode,
+                                 GrTCCUColor_t c, FxBool c_invert,
+                                 GrTCCUColor_t d, FxBool d_invert,
+                                 FxU32 shift, FxBool invert );
+   void (*grAlphaCombineExt)( GrACUColor_t a, GrCombineMode_t a_mode,
+                              GrACUColor_t b, GrCombineMode_t b_mode,
+                              GrACUColor_t c, FxBool c_invert,
+                              GrACUColor_t d, FxBool d_invert,
+                              FxU32 shift, FxBool invert );
+   void (*grTexAlphaCombineExt)( FxU32 tmu,
+                                 GrTACUColor_t a, GrCombineMode_t a_mode,
+                                 GrTACUColor_t b, GrCombineMode_t b_mode,
+                                 GrTACUColor_t c, FxBool c_invert,
+                                 GrTACUColor_t d, FxBool d_invert,
+                                 FxU32 shift, FxBool invert );
+   void (*grAlphaBlendFunctionExt)( GrAlphaBlendFnc_t rgb_sf,
+                                    GrAlphaBlendFnc_t rgb_df,
+                                    GrAlphaBlendOp_t rgb_op,
+                                    GrAlphaBlendFnc_t alpha_sf,
+                                    GrAlphaBlendFnc_t alpha_df,
+                                    GrAlphaBlendOp_t alpha_op );
+   void (*grConstantColorValueExt)( FxU32 tmu, GrColor_t value );
+   /* Texus 2 */
+   void (*txImgQuantize)( void *xxx_unknown_arguments );
+   void (*txImgDequantizeFXT1)( void *txMip, void *pxMip );
+   void (*txErrorSetCallback)( void *fnc );
+};
+
+typedef void (*tdfx_tri_func)( tdfxContextPtr, tdfxVertex *, tdfxVertex *,
+			       tdfxVertex * );
+typedef void (*tdfx_line_func)( tdfxContextPtr, tdfxVertex *, tdfxVertex * );
+typedef void (*tdfx_point_func)( tdfxContextPtr, tdfxVertex * );
+
+struct tdfx_context {
+   /* Set once and never changed:
+    */
+   GLcontext *glCtx;			/* The core Mesa context */
+
+   GLuint new_gl_state;
+   GLuint new_state;
+   GLuint dirty;
+
+   /* Mirror of hardware state, Glide parameters
+    */
+   GLuint tmu_source[TDFX_NUM_TMU];
+   struct tdfx_texsource	TexSource[TDFX_NUM_TMU];
+   struct tdfx_texparams	TexParams[TDFX_NUM_TMU];
+   struct tdfx_texpalette	TexPalette;
+
+   /* Voodoo3 texture/color combine state */
+   struct tdfx_combine		ColorCombine;
+   struct tdfx_combine		AlphaCombine;
+   struct tdfx_texcombine	TexCombine[TDFX_NUM_TMU];
+
+   /* Voodoo5 texture/color combine state */
+   struct tdfx_combine_color_ext	ColorCombineExt;
+   struct tdfx_combine_alpha_ext	AlphaCombineExt;
+   struct tdfx_texcombine_ext		TexCombineExt[TDFX_NUM_TMU];
+
+   /* Tracks tex state difference between Glide and Mesa */
+   struct tdfx_texstate		TexState;
+
+   GrBuffer_t		DrawBuffer;	/* Current draw buffer */
+   GrBuffer_t		ReadBuffer;	/* Current read buffer */
+
+   struct tdfx_color	Color;
+   struct tdfx_depth	Depth;
+   struct tdfx_fog	Fog;
+   struct tdfx_stencil	Stencil;
+   struct tdfx_scissor	Scissor;
+   struct tdfx_viewport	Viewport;
+   struct tdfx_stipple	Stipple;
+
+   GrCullMode_t		CullMode;
+
+   struct tdfx_glide	Glide;
+
+   /* Fallback rasterization functions 
+    */
+   tdfx_point_func draw_point;
+   tdfx_line_func draw_line;
+   tdfx_tri_func draw_triangle;
+
+
+   /* Variable-size Glide vertex formats
+    */
+   GLuint vertexFormat;		/* the current format */
+   void *layout[TDFX_NUM_LAYOUTS];
+   tdfxVertex *verts;
+   
+   GLfloat hw_viewport[16];
+   
+   GLuint SetupIndex;
+   GLuint SetupNewInputs;
+   GLuint RenderIndex;
+   GLuint Fallback;
+   GLenum render_primitive;	/* what GL thinks */
+   GLenum raster_primitive;	/* what the hardware thinks */
+
+   GLfloat sScale0, tScale0;
+   GLfloat sScale1, tScale1;
+
+   GLuint texBindNumber;
+   GLint tmuSrc;
+
+   int screen_width;
+   int screen_height;
+
+   GLboolean haveTwoTMUs;      /* True if we have 2 tmu's  */
+   GLboolean haveHwAlpha;
+   GLboolean haveHwStencil;
+   GLboolean haveHwStipple;
+
+   GLint maxPendingSwapBuffers;
+
+   char rendererString[100];
+
+   /* stuff added for DRI */
+   __DRIscreen *driScreen;
+   __DRIcontext *driContext;
+
+   /**
+    * DRI drawable bound to this context for drawing.
+    */
+   __DRIdrawable	*driDrawable;
+
+   /**
+    * DRI drawable bound to this context for reading.
+    */
+   __DRIdrawable	*driReadable;
+
+   drm_context_t hHWContext;
+   drm_hw_lock_t *driHwLock;
+   int driFd;
+   tdfxScreenPrivate *fxScreen;
+   TDFXSAREAPriv *sarea;
+
+
+   /*
+    * Changes during execution:
+    */
+   int width, height;   /* size of window */
+   int x_offset;        /* distance from window left to screen left */
+   int y_offset;        /* distance from window top to screen top */
+   int y_delta;         /* distance from window bottom to screen bottom */
+
+   int numClipRects;
+   drm_clip_rect_t *pClipRects;
+   GLboolean scissoredClipRects;  /* if true, pClipRects is private storage */
+
+   GuTexPalette glbPalette;         /* global texture palette */
+
+   tdfxStats stats;
+
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+};
+
+#define TDFX_CONTEXT(ctx)	((tdfxContextPtr)((ctx)->DriverCtx))
+
+
+extern GLboolean
+tdfxCreateContext( gl_api api,
+		   const __GLcontextModes *mesaVis,
+                   __DRIcontext *driContextPriv,
+                   void *sharedContextPrivate );
+
+extern void
+tdfxDestroyContext( __DRIcontext *driContextPriv );
+
+extern GLboolean
+tdfxUnbindContext( __DRIcontext *driContextPriv );
+
+extern GLboolean
+tdfxMakeCurrent( __DRIcontext *driContextPriv,
+                 __DRIdrawable *driDrawPriv,
+                 __DRIdrawable *driReadPriv );
+
+extern GLboolean
+tdfxInitGlide( tdfxContextPtr tmesa );
+
+extern void
+FX_grColorMaskv(GLcontext *ctx, const GLboolean rgba[4]);
+
+extern void
+FX_grColorMaskv_NoLock(GLcontext *ctx, const GLboolean rgba[4]);
+
+
+/* Color packing utilities
+ */
+#define TDFXPACKCOLOR332( r, g, b )					   \
+   (((b) & 0xe0) | (((g) & 0xe0) >> 3) | (((r) & 0xc0) >> 6))
+
+#define TDFXPACKCOLOR1555( r, g, b, a )					   \
+   ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) |	   \
+    ((a) ? 0x8000 : 0))
+
+#define TDFXPACKCOLOR565( r, g, b )					   \
+   ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3))
+
+#define TDFXPACKCOLOR888( r, g, b )					   \
+   (((b) << 16) | ((g) << 8) | (r))
+
+#define TDFXPACKCOLOR8888( r, g, b, a )					   \
+   (((a) << 24) | ((r) << 16) | ((g) << 8) | (b))
+
+#define TDFXPACKCOLOR4444( r, g, b, a )					   \
+   ((((a) & 0xf0) << 8) | (((b) & 0xf0) << 4) | ((g) & 0xf0) | ((r) >> 4))
+
+static INLINE GrColor_t tdfxPackColor( GLuint cpp,
+                                       GLubyte r, GLubyte g,
+                                       GLubyte b, GLubyte a )
+{
+   switch ( cpp ) {
+   case 2:
+      return TDFXPACKCOLOR565( r, g, b );
+   case 4:
+      return TDFXPACKCOLOR8888( r, g, b, a );
+   default:
+      return 0;
+  }
+}
+
+#define DO_DEBUG		1
+#if DO_DEBUG
+extern int TDFX_DEBUG;
+#else
+#define TDFX_DEBUG		0
+#endif
+
+#define DEBUG_ALWAYS_SYNC	0x01
+#define DEBUG_VERBOSE_API	0x02
+#define DEBUG_VERBOSE_DRI	0x04
+#define DEBUG_VERBOSE_FALL	0x08
+
+/* conf */
+#define FX_COMPRESS_S3TC_AS_FXT1_HACK 1
+#define FX_TC_NAPALM 0
+
+#endif /* __TDFX_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_dd.c b/src/mesa/drivers/dri/tdfx/tdfx_dd.c
new file mode 100644
index 0000000000..2cbbeb8114
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_dd.c
@@ -0,0 +1,225 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * \file tdfx_dd.c
+ * Device driver interface functions for 3Dfx based cards.
+ * 
+ * \author Gareth Hughes <gareth@valinux.com> (Original rewrite 29 Sep - 1 Oct 2000)
+ * \author Brian Paul <brianp@valinux.com>
+ */
+
+#include "tdfx_context.h"
+#include "tdfx_dd.h"
+#include "tdfx_lock.h"
+#include "tdfx_pixels.h"
+
+#include "utils.h"
+#include "main/context.h"
+
+
+#define DRIVER_DATE	"20061113"
+
+
+/* These are used in calls to FX_grColorMaskv() */
+const GLboolean false4[4] = { GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE };
+const GLboolean true4[4] = { GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE };
+
+
+
+/* KW: Put the word Mesa in the render string because quakeworld
+ * checks for this rather than doing a glGet(GL_MAX_TEXTURE_SIZE).
+ * Why?
+ */
+static const GLubyte *tdfxDDGetString( GLcontext *ctx, GLenum name )
+{
+   tdfxContextPtr fxMesa = (tdfxContextPtr) ctx->DriverCtx;
+
+   switch (name) {
+   case GL_RENDERER:
+   {
+      /* The renderer string must be per-context state to handle
+       * multihead correctly.
+       */
+      char *const buffer = fxMesa->rendererString;
+      char hardware[64];
+
+      LOCK_HARDWARE(fxMesa);
+      strncpy(hardware, fxMesa->Glide.grGetString(GR_HARDWARE),
+	      sizeof(hardware));
+      hardware[sizeof(hardware) - 1] = '\0';
+      UNLOCK_HARDWARE(fxMesa);
+
+      if ((strncmp(hardware, "Voodoo3", 7) == 0)
+	  || (strncmp(hardware, "Voodoo4", 7) == 0)
+	  || (strncmp(hardware, "Voodoo5", 7) == 0)) {
+	 hardware[7] = '\0';
+      }
+      else if (strncmp(hardware, "Voodoo Banshee", 14) == 0) {
+	 strcpy(&hardware[6], "Banshee");
+      }
+      else {
+	 /* unexpected result: replace spaces with hyphens */
+	 int i;
+	 for (i = 0; i < sizeof(hardware) && hardware[i]; i++) {
+	    if (hardware[i] == ' ' || hardware[i] == '\t') {
+	       hardware[i] = '-';
+	    }
+	 }
+      }
+
+      (void) driGetRendererString(buffer, hardware, DRIVER_DATE, 0);
+      return (const GLubyte *) buffer;
+   }
+   case GL_VENDOR:
+      return (const GLubyte *)"VA Linux Systems, Inc.";
+   default:
+      return NULL;
+   }
+}
+
+
+static void
+tdfxBeginQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   (void) q;
+
+   if (q->Target == GL_SAMPLES_PASSED_ARB) {
+      LOCK_HARDWARE(fxMesa);
+      fxMesa->Glide.grFinish();
+      fxMesa->Glide.grReset(GR_STATS_PIXELS);
+      UNLOCK_HARDWARE(fxMesa);
+   }
+}
+
+
+static void
+tdfxEndQuery(GLcontext *ctx, struct gl_query_object *q)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   FxI32 total_pixels;
+   FxI32 z_fail_pixels;
+
+
+   if (q->Target == GL_SAMPLES_PASSED_ARB) {
+      LOCK_HARDWARE(fxMesa);
+      fxMesa->Glide.grFinish();
+
+      fxMesa->Glide.grGet(GR_STATS_PIXELS_DEPTHFUNC_FAIL, sizeof(FxI32),
+			  &z_fail_pixels);
+      fxMesa->Glide.grGet(GR_STATS_PIXELS_IN, sizeof(FxI32), &total_pixels);
+
+      q->Result = total_pixels - z_fail_pixels;
+      
+      /* Apparently, people have seen z_fail_pixels > total_pixels under
+       * some conditions on some 3Dfx hardware.  The occlusion query spec
+       * requires that we clamp to 0.
+       */
+      if (q->Result < 0) {
+	 q->Result = 0;
+      }
+
+      q->Ready = GL_TRUE;
+
+      UNLOCK_HARDWARE(fxMesa);
+   }
+}
+
+
+#define VISUAL_EQUALS_RGBA(vis, r, g, b, a)        \
+   ((vis->redBits == r) &&                         \
+    (vis->greenBits == g) &&                       \
+    (vis->blueBits == b) &&                        \
+    (vis->alphaBits == a))
+
+void tdfxDDInitDriverFuncs( const __GLcontextModes *visual,
+                            struct dd_function_table *functions )
+{
+   if ( MESA_VERBOSE & VERBOSE_DRIVER ) {
+      fprintf( stderr, "tdfx: %s()\n", __FUNCTION__ );
+   }
+
+   functions->GetString         = tdfxDDGetString;
+   functions->BeginQuery        = tdfxBeginQuery;
+   functions->EndQuery          = tdfxEndQuery;
+
+   /* Accelerated paths
+    */
+   if ( VISUAL_EQUALS_RGBA(visual, 8, 8, 8, 8) )
+   {
+      functions->DrawPixels	= tdfx_drawpixels_R8G8B8A8;
+      functions->ReadPixels	= tdfx_readpixels_R8G8B8A8;
+   }
+   else if ( VISUAL_EQUALS_RGBA(visual, 5, 6, 5, 0) )
+   {
+      functions->ReadPixels	= tdfx_readpixels_R5G6B5;
+   }
+}
+
+
+/*
+ * These are here for lack of a better place.
+ */
+
+void
+FX_grColorMaskv(GLcontext *ctx, const GLboolean rgba[4])
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   LOCK_HARDWARE(fxMesa);
+   if (ctx->Visual.redBits == 8) {
+      /* 32bpp mode */
+      ASSERT( fxMesa->Glide.grColorMaskExt );
+      fxMesa->Glide.grColorMaskExt(rgba[RCOMP], rgba[GCOMP],
+                                   rgba[BCOMP], rgba[ACOMP]);
+   }
+   else {
+      /* 16 bpp mode */
+      /* we never have an alpha buffer */
+      fxMesa->Glide.grColorMask(rgba[RCOMP] || rgba[GCOMP] || rgba[BCOMP],
+                                GL_FALSE);
+   }
+   UNLOCK_HARDWARE(fxMesa);
+}
+
+void
+FX_grColorMaskv_NoLock(GLcontext *ctx, const GLboolean rgba[4])
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   if (ctx->Visual.redBits == 8) {
+      /* 32bpp mode */
+      ASSERT( fxMesa->Glide.grColorMaskExt );
+      fxMesa->Glide.grColorMaskExt(rgba[RCOMP], rgba[GCOMP],
+                                   rgba[BCOMP], rgba[ACOMP]);
+   }
+   else {
+      /* 16 bpp mode */
+      /* we never have an alpha buffer */
+      fxMesa->Glide.grColorMask(rgba[RCOMP] || rgba[GCOMP] || rgba[BCOMP],
+                                GL_FALSE);
+   }
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_dd.h b/src/mesa/drivers/dri/tdfx/tdfx_dd.h
new file mode 100644
index 0000000000..f419c8426a
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_dd.h
@@ -0,0 +1,44 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_DD_H__
+#define __TDFX_DD_H__
+
+#include "main/context.h"
+
+extern void tdfxDDInitDriverFuncs( const __GLcontextModes *visual,
+                                   struct dd_function_table *functions );
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_glide.h b/src/mesa/drivers/dri/tdfx/tdfx_glide.h
new file mode 100644
index 0000000000..69e5399e72
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_glide.h
@@ -0,0 +1,605 @@
+/*
+ * This file defines macros and types necessary for accessing glide3.
+ */
+
+
+#ifndef NEWGLIDE_H
+#define NEWGLIDE_H
+
+#define FX_CALL
+
+typedef unsigned char FxU8;
+typedef signed char FxI8;
+typedef unsigned short FxU16;
+typedef signed short FxI16;
+#if defined(__alpha__) || defined (__LP64__)
+typedef signed int FxI32;
+typedef unsigned int FxU32;
+#else
+typedef signed long FxI32;
+typedef unsigned long FxU32;
+#endif
+typedef unsigned long AnyPtr;
+typedef int FxBool;
+typedef float FxFloat;
+typedef double FxDouble;
+
+typedef unsigned long FxColor_t;
+typedef struct
+{
+   float r, g, b, a;
+}
+FxColor4;
+
+typedef FxU32 GrColor_t;
+typedef FxU8 GrAlpha_t;
+typedef FxU32 GrMipMapId_t;
+typedef FxU32 GrStipplePattern_t;
+typedef FxU8 GrFog_t;
+typedef FxU32 GrContext_t;
+typedef int (FX_CALL * GrProc) (void);
+
+#define FXTRUE 1
+#define FXFALSE 0
+
+#define FXBIT(i) (1L << (i))
+
+#define GR_NULL_MIPMAP_HANDLE  ((GrMipMapId_t) -1)
+
+#define GR_MIPMAPLEVELMASK_EVEN FXBIT(0)
+#define GR_MIPMAPLEVELMASK_ODD FXBIT(1)
+#define GR_MIPMAPLEVELMASK_BOTH (GR_MIPMAPLEVELMASK_EVEN | GR_MIPMAPLEVELMASK_ODD )
+
+typedef FxI32 GrChipID_t;
+#define GR_TMU0 0x0
+#define GR_TMU1 0x1
+#define GR_TMU2 0x2
+
+#define GR_FBI  0x0
+
+typedef FxI32 GrCombineFunction_t;
+#define GR_COMBINE_FUNCTION_ZERO        0x0
+#define GR_COMBINE_FUNCTION_NONE        GR_COMBINE_FUNCTION_ZERO
+#define GR_COMBINE_FUNCTION_LOCAL       0x1
+#define GR_COMBINE_FUNCTION_LOCAL_ALPHA 0x2
+#define GR_COMBINE_FUNCTION_SCALE_OTHER 0x3
+#define GR_COMBINE_FUNCTION_BLEND_OTHER GR_COMBINE_FUNCTION_SCALE_OTHER
+#define GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL 0x4
+#define GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL_ALPHA 0x5
+#define GR_COMBINE_FUNCTION_SCALE_OTHER_MINUS_LOCAL 0x6
+#define GR_COMBINE_FUNCTION_SCALE_OTHER_MINUS_LOCAL_ADD_LOCAL 0x7
+#define GR_COMBINE_FUNCTION_BLEND GR_COMBINE_FUNCTION_SCALE_OTHER_MINUS_LOCAL_ADD_LOCAL
+#define GR_COMBINE_FUNCTION_SCALE_OTHER_MINUS_LOCAL_ADD_LOCAL_ALPHA 0x8
+#define GR_COMBINE_FUNCTION_SCALE_MINUS_LOCAL_ADD_LOCAL 0x9
+#define GR_COMBINE_FUNCTION_BLEND_LOCAL GR_COMBINE_FUNCTION_SCALE_MINUS_LOCAL_ADD_LOCAL
+#define GR_COMBINE_FUNCTION_SCALE_MINUS_LOCAL_ADD_LOCAL_ALPHA 0x10
+
+typedef FxI32 GrCombineFactor_t;
+#define GR_COMBINE_FACTOR_ZERO          0x0
+#define GR_COMBINE_FACTOR_NONE          GR_COMBINE_FACTOR_ZERO
+#define GR_COMBINE_FACTOR_LOCAL         0x1
+#define GR_COMBINE_FACTOR_OTHER_ALPHA   0x2
+#define GR_COMBINE_FACTOR_LOCAL_ALPHA   0x3
+#define GR_COMBINE_FACTOR_TEXTURE_ALPHA 0x4
+#define GR_COMBINE_FACTOR_TEXTURE_RGB   0x5
+#define GR_COMBINE_FACTOR_DETAIL_FACTOR GR_COMBINE_FACTOR_TEXTURE_ALPHA
+#define GR_COMBINE_FACTOR_LOD_FRACTION  0x5
+#define GR_COMBINE_FACTOR_ONE           0x8
+#define GR_COMBINE_FACTOR_ONE_MINUS_LOCAL 0x9
+#define GR_COMBINE_FACTOR_ONE_MINUS_OTHER_ALPHA 0xa
+#define GR_COMBINE_FACTOR_ONE_MINUS_LOCAL_ALPHA 0xb
+#define GR_COMBINE_FACTOR_ONE_MINUS_TEXTURE_ALPHA 0xc
+#define GR_COMBINE_FACTOR_ONE_MINUS_DETAIL_FACTOR GR_COMBINE_FACTOR_ONE_MINUS_TEXTURE_ALPHA
+#define GR_COMBINE_FACTOR_ONE_MINUS_LOD_FRACTION 0xd
+
+typedef FxI32 GrCombineLocal_t;
+#define GR_COMBINE_LOCAL_ITERATED 0x0
+#define GR_COMBINE_LOCAL_CONSTANT 0x1
+#define GR_COMBINE_LOCAL_NONE GR_COMBINE_LOCAL_CONSTANT
+#define GR_COMBINE_LOCAL_DEPTH  0x2
+
+typedef FxI32 GrCombineOther_t;
+#define GR_COMBINE_OTHER_ITERATED 0x0
+#define GR_COMBINE_OTHER_TEXTURE 0x1
+#define GR_COMBINE_OTHER_CONSTANT 0x2
+#define GR_COMBINE_OTHER_NONE GR_COMBINE_OTHER_CONSTANT
+
+typedef FxI32 GrAlphaSource_t;
+#define GR_ALPHASOURCE_CC_ALPHA 0x0
+#define GR_ALPHASOURCE_ITERATED_ALPHA 0x1
+#define GR_ALPHASOURCE_TEXTURE_ALPHA 0x2
+#define GR_ALPHASOURCE_TEXTURE_ALPHA_TIMES_ITERATED_ALPHA 0x3
+
+typedef FxI32 GrColorCombineFnc_t;
+#define GR_COLORCOMBINE_ZERO 0x0
+#define GR_COLORCOMBINE_CCRGB 0x1
+#define GR_COLORCOMBINE_ITRGB 0x2
+#define GR_COLORCOMBINE_ITRGB_DELTA0 0x3
+#define GR_COLORCOMBINE_DECAL_TEXTURE 0x4
+#define GR_COLORCOMBINE_TEXTURE_TIMES_CCRGB 0x5
+#define GR_COLORCOMBINE_TEXTURE_TIMES_ITRGB 0x6
+#define GR_COLORCOMBINE_TEXTURE_TIMES_ITRGB_DELTA0 0x7
+#define GR_COLORCOMBINE_TEXTURE_TIMES_ITRGB_ADD_ALPHA 0x8
+#define GR_COLORCOMBINE_TEXTURE_TIMES_ALPHA 0x9
+#define GR_COLORCOMBINE_TEXTURE_TIMES_ALPHA_ADD_ITRGB 0xa
+#define GR_COLORCOMBINE_TEXTURE_ADD_ITRGB 0xb
+#define GR_COLORCOMBINE_TEXTURE_SUB_ITRGB 0xc
+#define GR_COLORCOMBINE_CCRGB_BLEND_ITRGB_ON_TEXALPHA 0xd
+#define GR_COLORCOMBINE_DIFF_SPEC_A 0xe
+#define GR_COLORCOMBINE_DIFF_SPEC_B 0xf
+#define GR_COLORCOMBINE_ONE 0x10
+
+typedef FxI32 GrAlphaBlendFnc_t;
+#define GR_BLEND_ZERO 0x0
+#define GR_BLEND_SRC_ALPHA 0x1
+#define GR_BLEND_SRC_COLOR 0x2
+#define GR_BLEND_DST_COLOR GR_BLEND_SRC_COLOR
+#define GR_BLEND_DST_ALPHA 0x3
+#define GR_BLEND_ONE 0x4
+#define GR_BLEND_ONE_MINUS_SRC_ALPHA 0x5
+#define GR_BLEND_ONE_MINUS_SRC_COLOR 0x6
+#define GR_BLEND_ONE_MINUS_DST_COLOR GR_BLEND_ONE_MINUS_SRC_COLOR
+#define GR_BLEND_ONE_MINUS_DST_ALPHA 0x7
+#define GR_BLEND_RESERVED_8 0x8
+#define GR_BLEND_RESERVED_9 0x9
+#define GR_BLEND_RESERVED_A 0xa
+#define GR_BLEND_RESERVED_B 0xb
+#define GR_BLEND_RESERVED_C 0xc
+#define GR_BLEND_RESERVED_D 0xd
+#define GR_BLEND_RESERVED_E 0xe
+#define GR_BLEND_ALPHA_SATURATE 0xf
+#define GR_BLEND_PREFOG_COLOR GR_BLEND_ALPHA_SATURATE
+#define GR_BLEND_SAME_COLOR_EXT           0x08
+#define GR_BLEND_ONE_MINUS_SAME_COLOR_EXT 0x09
+
+typedef FxI32 GrAspectRatio_t;
+#define GR_ASPECT_LOG2_8x1        3
+#define GR_ASPECT_LOG2_4x1        2
+#define GR_ASPECT_LOG2_2x1        1
+#define GR_ASPECT_LOG2_1x1        0
+#define GR_ASPECT_LOG2_1x2       -1
+#define GR_ASPECT_LOG2_1x4       -2
+#define GR_ASPECT_LOG2_1x8       -3
+
+typedef FxI32 GrBuffer_t;
+#define GR_BUFFER_FRONTBUFFER   0x0
+#define GR_BUFFER_BACKBUFFER    0x1
+#define GR_BUFFER_AUXBUFFER     0x2
+#define GR_BUFFER_DEPTHBUFFER   0x3
+#define GR_BUFFER_ALPHABUFFER   0x4
+#define GR_BUFFER_TRIPLEBUFFER  0x5
+
+typedef FxI32 GrChromakeyMode_t;
+#define GR_CHROMAKEY_DISABLE    0x0
+#define GR_CHROMAKEY_ENABLE     0x1
+
+typedef FxI32 GrChromaRangeMode_t;
+#define GR_CHROMARANGE_RGB_ALL_EXT  0x0
+
+#define GR_CHROMARANGE_DISABLE_EXT  0x00
+#define GR_CHROMARANGE_ENABLE_EXT   0x01
+
+typedef FxI32 GrTexChromakeyMode_t;
+#define GR_TEXCHROMA_DISABLE_EXT               0x0
+#define GR_TEXCHROMA_ENABLE_EXT                0x1
+
+#define GR_TEXCHROMARANGE_RGB_ALL_EXT  0x0
+
+typedef FxI32 GrCmpFnc_t;
+#define GR_CMP_NEVER    0x0
+#define GR_CMP_LESS     0x1
+#define GR_CMP_EQUAL    0x2
+#define GR_CMP_LEQUAL   0x3
+#define GR_CMP_GREATER  0x4
+#define GR_CMP_NOTEQUAL 0x5
+#define GR_CMP_GEQUAL   0x6
+#define GR_CMP_ALWAYS   0x7
+
+typedef FxI32 GrColorFormat_t;
+#define GR_COLORFORMAT_ARGB     0x0
+#define GR_COLORFORMAT_ABGR     0x1
+
+#define GR_COLORFORMAT_RGBA     0x2
+#define GR_COLORFORMAT_BGRA     0x3
+
+typedef FxI32 GrCullMode_t;
+#define GR_CULL_DISABLE         0x0
+#define GR_CULL_NEGATIVE        0x1
+#define GR_CULL_POSITIVE        0x2
+
+typedef FxI32 GrDepthBufferMode_t;
+#define GR_DEPTHBUFFER_DISABLE                  0x0
+#define GR_DEPTHBUFFER_ZBUFFER                  0x1
+#define GR_DEPTHBUFFER_WBUFFER                  0x2
+#define GR_DEPTHBUFFER_ZBUFFER_COMPARE_TO_BIAS  0x3
+#define GR_DEPTHBUFFER_WBUFFER_COMPARE_TO_BIAS  0x4
+
+typedef FxI32 GrDitherMode_t;
+#define GR_DITHER_DISABLE       0x0
+#define GR_DITHER_2x2           0x1
+#define GR_DITHER_4x4           0x2
+
+typedef FxI32 GrStippleMode_t;
+#define GR_STIPPLE_DISABLE	0x0
+#define GR_STIPPLE_PATTERN	0x1
+#define GR_STIPPLE_ROTATE	0x2
+
+typedef FxI32 GrFogMode_t;
+#define GR_FOG_DISABLE                     0x0
+#define GR_FOG_WITH_TABLE_ON_FOGCOORD_EXT  0x1
+#define GR_FOG_WITH_TABLE_ON_Q             0x2
+#define GR_FOG_WITH_TABLE_ON_W             GR_FOG_WITH_TABLE_ON_Q
+#define GR_FOG_WITH_ITERATED_Z             0x3
+#define GR_FOG_WITH_ITERATED_ALPHA_EXT     0x4
+#define GR_FOG_MULT2                       0x100
+#define GR_FOG_ADD2                        0x200
+
+typedef FxU32 GrLock_t;
+#define GR_LFB_READ_ONLY  0x00
+#define GR_LFB_WRITE_ONLY 0x01
+#define GR_LFB_IDLE       0x00
+#define GR_LFB_NOIDLE     0x10
+
+typedef FxI32 GrLfbBypassMode_t;
+#define GR_LFBBYPASS_DISABLE    0x0
+#define GR_LFBBYPASS_ENABLE     0x1
+
+typedef FxI32 GrLfbWriteMode_t;
+#define GR_LFBWRITEMODE_565        0x0
+#define GR_LFBWRITEMODE_555        0x1
+#define GR_LFBWRITEMODE_1555       0x2
+#define GR_LFBWRITEMODE_RESERVED1  0x3
+#define GR_LFBWRITEMODE_888        0x4
+#define GR_LFBWRITEMODE_8888       0x5
+#define GR_LFBWRITEMODE_RESERVED2  0x6
+#define GR_LFBWRITEMODE_RESERVED3  0x7
+#define GR_LFBWRITEMODE_RESERVED4  0x8
+#define GR_LFBWRITEMODE_RESERVED5  0x9
+#define GR_LFBWRITEMODE_RESERVED6  0xa
+#define GR_LFBWRITEMODE_RESERVED7  0xb
+#define GR_LFBWRITEMODE_565_DEPTH  0xc
+#define GR_LFBWRITEMODE_555_DEPTH  0xd
+#define GR_LFBWRITEMODE_1555_DEPTH 0xe
+#define GR_LFBWRITEMODE_ZA16       0xf
+#define GR_LFBWRITEMODE_ANY        0xFF
+
+typedef FxI32 GrOriginLocation_t;
+#define GR_ORIGIN_UPPER_LEFT    0x0
+#define GR_ORIGIN_LOWER_LEFT    0x1
+#define GR_ORIGIN_ANY           0xFF
+
+typedef struct
+{
+   int size;
+   void *lfbPtr;
+   FxU32 strideInBytes;
+   GrLfbWriteMode_t writeMode;
+   GrOriginLocation_t origin;
+}
+GrLfbInfo_t;
+
+typedef FxI32 GrLOD_t;
+#define GR_LOD_LOG2_2048        0xb
+#define GR_LOD_LOG2_1024        0xa
+#define GR_LOD_LOG2_512         0x9
+#define GR_LOD_LOG2_256         0x8
+#define GR_LOD_LOG2_128         0x7
+#define GR_LOD_LOG2_64          0x6
+#define GR_LOD_LOG2_32          0x5
+#define GR_LOD_LOG2_16          0x4
+#define GR_LOD_LOG2_8           0x3
+#define GR_LOD_LOG2_4           0x2
+#define GR_LOD_LOG2_2           0x1
+#define GR_LOD_LOG2_1           0x0
+
+typedef FxI32 GrMipMapMode_t;
+#define GR_MIPMAP_DISABLE               0x0
+#define GR_MIPMAP_NEAREST               0x1
+#define GR_MIPMAP_NEAREST_DITHER        0x2
+
+typedef FxI32 GrSmoothingMode_t;
+#define GR_SMOOTHING_DISABLE    0x0
+#define GR_SMOOTHING_ENABLE     0x1
+
+typedef FxI32 GrTextureClampMode_t;
+#define GR_TEXTURECLAMP_WRAP        0x0
+#define GR_TEXTURECLAMP_CLAMP       0x1
+#define GR_TEXTURECLAMP_MIRROR_EXT  0x2
+
+typedef FxI32 GrTextureCombineFnc_t;
+#define GR_TEXTURECOMBINE_ZERO          0x0
+#define GR_TEXTURECOMBINE_DECAL         0x1
+#define GR_TEXTURECOMBINE_OTHER         0x2
+#define GR_TEXTURECOMBINE_ADD           0x3
+#define GR_TEXTURECOMBINE_MULTIPLY      0x4
+#define GR_TEXTURECOMBINE_SUBTRACT      0x5
+#define GR_TEXTURECOMBINE_DETAIL        0x6
+#define GR_TEXTURECOMBINE_DETAIL_OTHER  0x7
+#define GR_TEXTURECOMBINE_TRILINEAR_ODD 0x8
+#define GR_TEXTURECOMBINE_TRILINEAR_EVEN 0x9
+#define GR_TEXTURECOMBINE_ONE           0xa
+
+typedef FxI32 GrTextureFilterMode_t;
+#define GR_TEXTUREFILTER_POINT_SAMPLED  0x0
+#define GR_TEXTUREFILTER_BILINEAR       0x1
+
+typedef FxI32 GrTextureFormat_t;
+#define GR_TEXFMT_8BIT                  0x0
+#define GR_TEXFMT_RGB_332 GR_TEXFMT_8BIT
+#define GR_TEXFMT_YIQ_422               0x1
+#define GR_TEXFMT_ALPHA_8               0x2
+#define GR_TEXFMT_INTENSITY_8           0x3
+#define GR_TEXFMT_ALPHA_INTENSITY_44    0x4
+#define GR_TEXFMT_P_8                   0x5
+#define GR_TEXFMT_RSVD0                 0x6
+#define GR_TEXFMT_RSVD1                 0x7
+#define GR_TEXFMT_16BIT                 0x8
+#define GR_TEXFMT_ARGB_8332 GR_TEXFMT_16BIT
+#define GR_TEXFMT_AYIQ_8422             0x9
+#define GR_TEXFMT_RGB_565               0xa
+#define GR_TEXFMT_ARGB_1555             0xb
+#define GR_TEXFMT_ARGB_4444             0xc
+#define GR_TEXFMT_ALPHA_INTENSITY_88    0xd
+#define GR_TEXFMT_AP_88                 0xe
+#define GR_TEXFMT_RSVD2                 0xf
+#define GR_TEXFMT_ARGB_CMP_FXT1           0x11
+#define GR_TEXFMT_ARGB_8888               0x12
+#define GR_TEXFMT_YUYV_422                0x13
+#define GR_TEXFMT_UYVY_422                0x14
+#define GR_TEXFMT_AYUV_444                0x15
+#define GR_TEXFMT_ARGB_CMP_DXT1           0x16
+#define GR_TEXFMT_ARGB_CMP_DXT2           0x17
+#define GR_TEXFMT_ARGB_CMP_DXT3           0x18
+#define GR_TEXFMT_ARGB_CMP_DXT4           0x19
+#define GR_TEXFMT_ARGB_CMP_DXT5           0x1A
+
+typedef FxU32 GrTexTable_t;
+#define GR_TEXTABLE_NCC0                 0x0
+#define GR_TEXTABLE_NCC1                 0x1
+#define GR_TEXTABLE_PALETTE              0x2
+#define GR_TEXTABLE_PALETTE_6666_EXT     0x3
+
+typedef FxU32 GrNCCTable_t;
+#define GR_NCCTABLE_NCC0    0x0
+#define GR_NCCTABLE_NCC1    0x1
+
+typedef FxU32 GrTexBaseRange_t;
+#define GR_TEXBASE_256      0x3
+#define GR_TEXBASE_128      0x2
+#define GR_TEXBASE_64       0x1
+#define GR_TEXBASE_32_TO_1  0x0
+#define GR_TEXBASE_2048     0x7
+#define GR_TEXBASE_1024     0x6
+#define GR_TEXBASE_512      0x5
+#define GR_TEXBASE_256_TO_1 0x4
+
+typedef FxU32 GrEnableMode_t;
+#define GR_MODE_DISABLE     0x0
+#define GR_MODE_ENABLE      0x1
+
+#define GR_AA_ORDERED            0x01
+#define GR_ALLOW_MIPMAP_DITHER   0x02
+#define GR_PASSTHRU              0x03
+#define GR_SHAMELESS_PLUG        0x04
+#define GR_VIDEO_SMOOTHING       0x05
+
+typedef FxU32 GrCoordinateSpaceMode_t;
+#define GR_WINDOW_COORDS    0x00
+#define GR_CLIP_COORDS      0x01
+
+/* Parameters for strips */
+#define GR_PARAM_XY       0x01
+#define GR_PARAM_Z        0x02
+#define GR_PARAM_W        0x03
+#define GR_PARAM_Q        0x04
+#define GR_PARAM_FOG_EXT  0x05
+
+#define GR_PARAM_A        0x10
+
+#define GR_PARAM_RGB      0x20
+
+#define GR_PARAM_PARGB    0x30
+
+#define GR_PARAM_ST0      0x40
+#define GR_PARAM_ST1      GR_PARAM_ST0+1
+#define GR_PARAM_ST2      GR_PARAM_ST0+2
+
+#define GR_PARAM_Q0       0x50
+#define GR_PARAM_Q1       GR_PARAM_Q0+1
+#define GR_PARAM_Q2       GR_PARAM_Q0+2
+
+#define GR_PARAM_DISABLE  0x00
+#define GR_PARAM_ENABLE   0x01
+
+/* grDrawVertexArray/grDrawVertexArrayContiguous */
+#define GR_POINTS                        0
+#define GR_LINE_STRIP                    1
+#define GR_LINES                         2
+#define GR_POLYGON                       3
+#define GR_TRIANGLE_STRIP                4
+#define GR_TRIANGLE_FAN                  5
+#define GR_TRIANGLES                     6
+#define GR_TRIANGLE_STRIP_CONTINUE       7
+#define GR_TRIANGLE_FAN_CONTINUE         8
+
+/* grGet/grReset */
+#define GR_BITS_DEPTH                   0x01
+#define GR_BITS_RGBA                    0x02
+#define GR_FIFO_FULLNESS                0x03
+#define GR_FOG_TABLE_ENTRIES            0x04
+#define GR_GAMMA_TABLE_ENTRIES          0x05
+#define GR_GLIDE_STATE_SIZE             0x06
+#define GR_GLIDE_VERTEXLAYOUT_SIZE      0x07
+#define GR_IS_BUSY                      0x08
+#define GR_LFB_PIXEL_PIPE               0x09
+#define GR_MAX_TEXTURE_SIZE             0x0a
+#define GR_MAX_TEXTURE_ASPECT_RATIO     0x0b
+#define GR_MEMORY_FB                    0x0c
+#define GR_MEMORY_TMU                   0x0d
+#define GR_MEMORY_UMA                   0x0e
+#define GR_NUM_BOARDS                   0x0f
+#define GR_NON_POWER_OF_TWO_TEXTURES    0x10
+#define GR_NUM_FB                       0x11
+#define GR_NUM_SWAP_HISTORY_BUFFER      0x12
+#define GR_NUM_TMU                      0x13
+#define GR_PENDING_BUFFERSWAPS          0x14
+#define GR_REVISION_FB                  0x15
+#define GR_REVISION_TMU                 0x16
+#define GR_STATS_LINES                  0x17
+#define GR_STATS_PIXELS_AFUNC_FAIL      0x18
+#define GR_STATS_PIXELS_CHROMA_FAIL     0x19
+#define GR_STATS_PIXELS_DEPTHFUNC_FAIL  0x1a
+#define GR_STATS_PIXELS_IN              0x1b
+#define GR_STATS_PIXELS_OUT             0x1c
+#define GR_STATS_PIXELS                 0x1d
+#define GR_STATS_POINTS                 0x1e
+#define GR_STATS_TRIANGLES_IN           0x1f
+#define GR_STATS_TRIANGLES_OUT          0x20
+#define GR_STATS_TRIANGLES              0x21
+#define GR_SWAP_HISTORY                 0x22
+#define GR_SUPPORTS_PASSTHRU            0x23
+#define GR_TEXTURE_ALIGN                0x24
+#define GR_VIDEO_POSITION               0x25
+#define GR_VIEWPORT                     0x26
+#define GR_WDEPTH_MIN_MAX               0x27
+#define GR_ZDEPTH_MIN_MAX               0x28
+#define GR_VERTEX_PARAMETER             0x29
+#define GR_BITS_GAMMA                   0x2a
+#define GR_GET_RESERVED_1               0x1000
+
+/* grGetString types */
+#define GR_EXTENSION                    0xa0
+#define GR_HARDWARE                     0xa1
+#define GR_RENDERER                     0xa2
+#define GR_VENDOR                       0xa3
+#define GR_VERSION                      0xa4
+
+typedef FxI32 GrScreenRefresh_t;
+#define GR_REFRESH_NONE   0xff
+
+typedef FxI32 GrScreenResolution_t;
+#define GR_RESOLUTION_NONE      0xff
+
+typedef struct
+{
+   GrLOD_t smallLodLog2;
+   GrLOD_t largeLodLog2;
+   GrAspectRatio_t aspectRatioLog2;
+   GrTextureFormat_t format;
+   void *data;
+}
+GrTexInfo;
+
+typedef struct GrSstPerfStats_s
+{
+   FxU32 pixelsIn;
+   FxU32 chromaFail;
+   FxU32 zFuncFail;
+   FxU32 aFuncFail;
+   FxU32 pixelsOut;
+}
+GrSstPerfStats_t;
+
+typedef struct
+{
+   GrScreenResolution_t resolution;
+   GrScreenRefresh_t refresh;
+   int numColorBuffers;
+   int numAuxBuffers;
+}
+GrResolution;
+
+typedef GrResolution GlideResolution;
+#define GR_QUERY_ANY  ((FxU32)(~0))
+
+typedef FxU32 GrLfbSrcFmt_t;
+#define GR_LFB_SRC_FMT_565          0x00
+#define GR_LFB_SRC_FMT_555          0x01
+#define GR_LFB_SRC_FMT_1555         0x02
+#define GR_LFB_SRC_FMT_888          0x04
+#define GR_LFB_SRC_FMT_8888         0x05
+#define GR_LFB_SRC_FMT_565_DEPTH    0x0c
+#define GR_LFB_SRC_FMT_555_DEPTH    0x0d
+#define GR_LFB_SRC_FMT_1555_DEPTH   0x0e
+#define GR_LFB_SRC_FMT_ZA16         0x0f
+#define GR_LFB_SRC_FMT_RLE16        0x80
+
+typedef FxU32 GrPixelFormat_t;
+#define GR_PIXFMT_I_8                           0x0001
+#define GR_PIXFMT_AI_88                         0x0002
+#define GR_PIXFMT_RGB_565                       0x0003
+#define GR_PIXFMT_ARGB_1555                     0x0004
+#define GR_PIXFMT_ARGB_8888                     0x0005
+#define GR_PIXFMT_AA_2_RGB_565                  0x0006
+#define GR_PIXFMT_AA_2_ARGB_1555                0x0007
+#define GR_PIXFMT_AA_2_ARGB_8888                0x0008
+#define GR_PIXFMT_AA_4_RGB_565                  0x0009
+#define GR_PIXFMT_AA_4_ARGB_1555                0x000a
+#define GR_PIXFMT_AA_4_ARGB_8888                0x000b
+
+#define GR_LFBWRITEMODE_Z32                     0x0008
+
+typedef FxU32 GrAAMode_t;
+#define GR_AA_NONE                              0x0000
+#define GR_AA_4SAMPLES                          0x0001
+
+typedef FxU8 GrStencil_t;
+
+typedef FxU32 GrStencilOp_t;
+#define GR_STENCILOP_KEEP        0x00
+#define GR_STENCILOP_ZERO        0x01
+#define GR_STENCILOP_REPLACE     0x02
+#define GR_STENCILOP_INCR_CLAMP  0x03
+#define GR_STENCILOP_DECR_CLAMP  0x04
+#define GR_STENCILOP_INVERT      0x05
+#define GR_STENCILOP_INCR_WRAP   0x06
+#define GR_STENCILOP_DECR_WRAP   0x07
+
+#define GR_TEXTURE_UMA_EXT       0x06
+#define GR_STENCIL_MODE_EXT      0x07
+#define GR_OPENGL_MODE_EXT       0x08
+
+typedef FxU32 GrCCUColor_t;
+typedef FxU32 GrACUColor_t;
+typedef FxU32 GrTCCUColor_t;
+typedef FxU32 GrTACUColor_t;
+#define GR_CMBX_ZERO                      0x00
+#define GR_CMBX_TEXTURE_ALPHA             0x01
+#define GR_CMBX_ALOCAL                    0x02
+#define GR_CMBX_AOTHER                    0x03
+#define GR_CMBX_B                         0x04
+#define GR_CMBX_CONSTANT_ALPHA            0x05
+#define GR_CMBX_CONSTANT_COLOR            0x06
+#define GR_CMBX_DETAIL_FACTOR             0x07
+#define GR_CMBX_ITALPHA                   0x08
+#define GR_CMBX_ITRGB                     0x09
+#define GR_CMBX_LOCAL_TEXTURE_ALPHA       0x0a
+#define GR_CMBX_LOCAL_TEXTURE_RGB         0x0b
+#define GR_CMBX_LOD_FRAC                  0x0c
+#define GR_CMBX_OTHER_TEXTURE_ALPHA       0x0d
+#define GR_CMBX_OTHER_TEXTURE_RGB         0x0e
+#define GR_CMBX_TEXTURE_RGB               0x0f
+#define GR_CMBX_TMU_CALPHA                0x10
+#define GR_CMBX_TMU_CCOLOR                0x11
+
+typedef FxU32 GrCombineMode_t;
+#define GR_FUNC_MODE_ZERO                 0x00
+#define GR_FUNC_MODE_X                    0x01
+#define GR_FUNC_MODE_ONE_MINUS_X          0x02
+#define GR_FUNC_MODE_NEGATIVE_X           0x03
+#define GR_FUNC_MODE_X_MINUS_HALF         0x04
+
+typedef FxU32 GrAlphaBlendOp_t;
+#define GR_BLEND_OP_ADD                   0x00
+#define GR_BLEND_OP_SUB                   0x01
+#define GR_BLEND_OP_REVSUB                0x02
+
+typedef struct
+{
+   FxU32 data[256];
+}
+GuTexPalette;
+
+typedef void (*GrErrorCallbackFnc_t) (const char *string, FxBool fatal);
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_lock.c b/src/mesa/drivers/dri/tdfx/tdfx_lock.c
new file mode 100644
index 0000000000..f218e4ee57
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_lock.c
@@ -0,0 +1,100 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#include "tdfx_context.h"
+#include "tdfx_lock.h"
+#include "tdfx_state.h"
+#include "tdfx_render.h"
+#include "tdfx_texman.h"
+
+#include "drirenderbuffer.h"
+
+
+void tdfxGetLock( tdfxContextPtr fxMesa )
+{
+    __DRIcontext *cPriv = fxMesa->driContext;
+    __DRIdrawable *const drawable = cPriv->driDrawablePriv;
+    __DRIdrawable *const readable = cPriv->driReadablePriv;
+    __DRIscreen *sPriv = drawable->driScreenPriv;
+    TDFXSAREAPriv *saPriv = (TDFXSAREAPriv *) (((char *) sPriv->pSAREA) +
+					fxMesa->fxScreen->sarea_priv_offset);
+    unsigned int stamp = drawable->lastStamp;
+
+    drmGetLock( fxMesa->driFd, fxMesa->hHWContext, 0 );
+
+    /* This macro will update drawable's cliprects if needed */
+    DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable);
+    if (drawable != readable) {
+	DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable);
+    }
+
+    if ( saPriv->fifoOwner != fxMesa->hHWContext ) {
+        fxMesa->Glide.grDRIImportFifo( saPriv->fifoPtr, saPriv->fifoRead );
+    }
+
+    if ( saPriv->ctxOwner != fxMesa->hHWContext ) {
+        /* This sequence looks a little odd. Glide mirrors the state, and
+	 * when you get the state you are forcing the mirror to be up to
+	 * date, and then getting a copy from the mirror. You can then force
+	 * that state onto the hardware when you set the state.
+	 */
+        void *state;
+        FxI32 stateSize;
+        fxMesa->Glide.grGet(GR_GLIDE_STATE_SIZE, 4, &stateSize);
+        state = malloc(stateSize);
+        fxMesa->Glide.grGlideGetState( state );
+        fxMesa->Glide.grGlideSetState( state );
+        free( state );
+    }
+
+#if 0
+    if ( saPriv->texOwner != fxMesa->hHWContext ) {
+        tdfxTMRestoreTextures_NoLock( fxMesa );
+    }
+#endif
+
+    if ((*drawable->pStamp != stamp)
+	|| (saPriv->ctxOwner != fxMesa->hHWContext)) {
+       driUpdateFramebufferSize(fxMesa->glCtx, drawable);
+       if (drawable != readable) {
+	   driUpdateFramebufferSize(fxMesa->glCtx, readable);
+       }
+
+       tdfxUpdateClipping(fxMesa->glCtx);
+       tdfxUploadClipping(fxMesa);
+    }
+
+    DEBUG_LOCK();
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_lock.h b/src/mesa/drivers/dri/tdfx/tdfx_lock.h
new file mode 100644
index 0000000000..74e3f5c9cc
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_lock.h
@@ -0,0 +1,143 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_LOCK_H__
+#define __TDFX_LOCK_H__
+
+/* You can turn this on to find locking conflicts.
+ */
+#define DEBUG_LOCKING		0
+
+#if DEBUG_LOCKING
+extern char *prevLockFile;
+extern int prevLockLine;
+
+#define DEBUG_LOCK()							\
+   do {									\
+      prevLockFile = (__FILE__);					\
+      prevLockLine = (__LINE__);					\
+   } while (0)
+
+#define DEBUG_RESET()							\
+   do {									\
+      prevLockFile = 0;							\
+      prevLockLine = 0;							\
+   } while (0)
+
+#define DEBUG_CHECK_LOCK()						\
+   do {									\
+      if ( prevLockFile ) {						\
+	 fprintf( stderr,						\
+		  "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n",	\
+		  prevLockFile, prevLockLine, __FILE__, __LINE__ );	\
+	 exit( 1 );							\
+      }									\
+   } while (0)
+
+#else
+
+#define DEBUG_LOCK()
+#define DEBUG_RESET()
+#define DEBUG_CHECK_LOCK()
+
+#endif /* DEBUG_LOCKING */
+
+
+extern void tdfxGetLock( tdfxContextPtr fxMesa );
+
+
+/* !!! We may want to separate locks from locks with validation.
+   This could be used to improve performance for those things
+   commands that do not do any drawing !!! */
+
+#define DRM_LIGHT_LOCK_RETURN(fd,lock,context,__ret)                   \
+	do {                                                           \
+		DRM_CAS(lock,context,DRM_LOCK_HELD|context,__ret);     \
+                if (__ret) drmGetLock(fd,context,0);                   \
+        } while(0)
+
+#define LOCK_HARDWARE( fxMesa )						\
+   do {									\
+      char __ret = 0;							\
+									\
+      DEBUG_CHECK_LOCK();						\
+      DRM_CAS( fxMesa->driHwLock, fxMesa->hHWContext,			\
+	      DRM_LOCK_HELD | fxMesa->hHWContext, __ret );		\
+      if ( __ret ) {							\
+	 tdfxGetLock( fxMesa );						\
+      }									\
+      DEBUG_LOCK();							\
+   } while (0)
+
+/* Unlock the hardware using the global current context */
+#define UNLOCK_HARDWARE( fxMesa )					\
+  do {									\
+    DRM_UNLOCK( fxMesa->driFd, fxMesa->driHwLock, fxMesa->hHWContext );	\
+    DEBUG_RESET();							\
+  } while (0)
+
+/*
+ * This pair of macros makes a loop over the drawing operations
+ * so it is not self contained and doesn't have the nice single
+ * statement semantics of most macros.
+ */
+#define BEGIN_CLIP_LOOP(fxMesa)			\
+  do {						\
+    LOCK_HARDWARE( fxMesa );			\
+    BEGIN_CLIP_LOOP_LOCKED( fxMesa )
+
+#define BEGIN_CLIP_LOOP_LOCKED(fxMesa)				\
+  do {								\
+    int _nc = fxMesa->numClipRects;				\
+    while (_nc--) {						\
+      if (fxMesa->numClipRects > 1) {				\
+        int _height = fxMesa->screen_height;			\
+        fxMesa->Glide.grClipWindow(fxMesa->pClipRects[_nc].x1,	\
+                     _height - fxMesa->pClipRects[_nc].y2,	\
+                     fxMesa->pClipRects[_nc].x2,		\
+                     _height - fxMesa->pClipRects[_nc].y1);	\
+      }
+
+
+#define END_CLIP_LOOP_LOCKED( fxMesa )		\
+    }						\
+  } while (0)
+
+#define END_CLIP_LOOP( fxMesa )			\
+    END_CLIP_LOOP_LOCKED( fxMesa );		\
+    UNLOCK_HARDWARE( fxMesa );			\
+  } while (0)
+
+#endif /* __TDFX_LOCK_H__ */
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_pixels.c b/src/mesa/drivers/dri/tdfx/tdfx_pixels.c
new file mode 100644
index 0000000000..5a7184056d
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_pixels.c
@@ -0,0 +1,686 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *	Nathan Hand <nhand@valinux.com>
+ *
+ */
+
+#include "tdfx_context.h"
+#include "tdfx_dd.h"
+#include "tdfx_lock.h"
+#include "tdfx_pixels.h"
+#include "tdfx_render.h"
+
+#include "swrast/swrast.h"
+
+#include "main/image.h"
+
+
+#define FX_grLfbWriteRegion(fxMesa,dst_buffer,dst_x,dst_y,src_format,src_width,src_height,src_stride,src_data)		\
+  do {				\
+    LOCK_HARDWARE(fxMesa);		\
+    fxMesa->Glide.grLfbWriteRegion(dst_buffer,dst_x,dst_y,src_format,src_width,src_height,FXFALSE,src_stride,src_data);	\
+    UNLOCK_HARDWARE(fxMesa);		\
+  } while(0)
+
+
+#define FX_grLfbReadRegion(fxMesa,src_buffer,src_x,src_y,src_width,src_height,dst_stride,dst_data)			\
+  do {				\
+    LOCK_HARDWARE(fxMesa);		\
+    fxMesa->Glide.grLfbReadRegion(src_buffer,src_x,src_y,src_width,src_height,dst_stride,dst_data);				\
+    UNLOCK_HARDWARE(fxMesa);		\
+  } while (0);
+
+
+#if 0
+static FxBool
+FX_grLfbLock(tdfxContextPtr fxMesa, GrLock_t type, GrBuffer_t buffer,
+             GrLfbWriteMode_t writeMode, GrOriginLocation_t origin,
+             FxBool pixelPipeline, GrLfbInfo_t * info)
+{
+   FxBool result;
+
+   LOCK_HARDWARE(fxMesa);
+   result = fxMesa->Glide.grLfbLock(type, buffer, writeMode, origin, pixelPipeline, info);
+   UNLOCK_HARDWARE(fxMesa);
+   return result;
+}
+#endif
+
+
+#define FX_grLfbUnlock(fxMesa, t, b)	\
+  do {					\
+    LOCK_HARDWARE(fxMesa);		\
+    fxMesa->Glide.grLfbUnlock(t, b);	\
+    UNLOCK_HARDWARE(fxMesa);		\
+  } while (0)
+
+
+
+#if 0
+/* test if window coord (px,py) is visible */
+static GLboolean
+inClipRects(tdfxContextPtr fxMesa, int px, int py)
+{
+    int i;
+    for (i = 0; i < fxMesa->numClipRects; i++) {
+        if ((px >= fxMesa->pClipRects[i].x1) &&
+            (px < fxMesa->pClipRects[i].x2) &&
+            (py >= fxMesa->pClipRects[i].y1) &&
+            (py < fxMesa->pClipRects[i].y2)) return GL_TRUE;
+    }
+    return GL_FALSE;
+}
+#endif
+
+/* test if rectangle of pixels (px,py) (px+width,py+height) is visible */
+static GLboolean
+inClipRects_Region(tdfxContextPtr fxMesa, int x, int y, int width, int height)
+{
+    int i;
+    int x1, y1, x2, y2;
+    int xmin, xmax, ymin, ymax, pixelsleft;
+
+    y1 = y - height + 1; y2 = y;
+    x1 = x; x2 = x + width - 1;
+    pixelsleft = width * height;
+
+    for (i = 0; i < fxMesa->numClipRects; i++)
+    {
+        /* algorithm requires x1 < x2 and y1 < y2 */
+        if ((fxMesa->pClipRects[i].x1 < fxMesa->pClipRects[i].x2)) {
+            xmin = fxMesa->pClipRects[i].x1;
+            xmax = fxMesa->pClipRects[i].x2-1;
+        } else {
+            xmin = fxMesa->pClipRects[i].x2;
+            xmax = fxMesa->pClipRects[i].x1-1;
+        }
+        if ((fxMesa->pClipRects[i].y1 < fxMesa->pClipRects[i].y2)) {
+            ymin = fxMesa->pClipRects[i].y1;
+            ymax = fxMesa->pClipRects[i].y2-1;
+        } else {
+            ymin = fxMesa->pClipRects[i].y2;
+            ymax = fxMesa->pClipRects[i].y1-1;
+        }
+
+        /* reject trivial cases */
+        if (xmax < x1) continue;
+        if (ymax < y1) continue;
+        if (xmin > x2) continue;
+        if (ymin > y2) continue;
+
+        /* find the intersection */
+        if (xmin < x1) xmin = x1;
+        if (ymin < y1) ymin = y1;
+        if (xmax > x2) xmax = x2;
+        if (ymax > y2) ymax = y2;
+
+        pixelsleft -= (xmax-xmin+1) * (ymax-ymin+1);
+    }
+
+    return pixelsleft == 0;
+}
+
+#if 0
+GLboolean
+tdfx_bitmap_R5G6B5(GLcontext * ctx, GLint px, GLint py,
+		   GLsizei width, GLsizei height,
+		   const struct gl_pixelstore_attrib *unpack,
+		   const GLubyte * bitmap)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrLfbInfo_t info;
+   TdfxU16 color;
+   const struct gl_pixelstore_attrib *finalUnpack;
+   struct gl_pixelstore_attrib scissoredUnpack;
+
+   /* check if there's any raster operations enabled which we can't handle */
+   if (ctx->RasterMask & (ALPHATEST_BIT |
+			  BLEND_BIT |
+			  DEPTH_BIT |
+			  FOG_BIT |
+			  LOGIC_OP_BIT |
+			  SCISSOR_BIT |
+			  STENCIL_BIT |
+			  MASKING_BIT |
+			  MULTI_DRAW_BIT)) return GL_FALSE;
+
+   if (ctx->Scissor.Enabled) {
+      /* This is a bit tricky, but by carefully adjusting the px, py,
+       * width, height, skipPixels and skipRows values we can do
+       * scissoring without special code in the rendering loop.
+       */
+
+      /* we'll construct a new pixelstore struct */
+      finalUnpack = &scissoredUnpack;
+      scissoredUnpack = *unpack;
+      if (scissoredUnpack.RowLength == 0)
+	 scissoredUnpack.RowLength = width;
+
+      /* clip left */
+      if (px < ctx->Scissor.X) {
+	 scissoredUnpack.SkipPixels += (ctx->Scissor.X - px);
+	 width -= (ctx->Scissor.X - px);
+	 px = ctx->Scissor.X;
+      }
+      /* clip right */
+      if (px + width >= ctx->Scissor.X + ctx->Scissor.Width) {
+	 width -= (px + width - (ctx->Scissor.X + ctx->Scissor.Width));
+      }
+      /* clip bottom */
+      if (py < ctx->Scissor.Y) {
+	 scissoredUnpack.SkipRows += (ctx->Scissor.Y - py);
+	 height -= (ctx->Scissor.Y - py);
+	 py = ctx->Scissor.Y;
+      }
+      /* clip top */
+      if (py + height >= ctx->Scissor.Y + ctx->Scissor.Height) {
+	 height -= (py + height - (ctx->Scissor.Y + ctx->Scissor.Height));
+      }
+
+      if (width <= 0 || height <= 0)
+	 return GL_TRUE;     /* totally scissored away */
+   }
+   else {
+      finalUnpack = unpack;
+   }
+
+   /* compute pixel value */
+   {
+      GLint r = (GLint) (ctx->Current.RasterColor[0] * 255.0f);
+      GLint g = (GLint) (ctx->Current.RasterColor[1] * 255.0f);
+      GLint b = (GLint) (ctx->Current.RasterColor[2] * 255.0f);
+      /*GLint a = (GLint)(ctx->Current.RasterColor[3]*255.0f); */
+      if (fxMesa->bgrOrder) {
+	 color = (TdfxU16)
+	    (((TdfxU16) 0xf8 & b) << (11 - 3)) |
+	    (((TdfxU16) 0xfc & g) << (5 - 3 + 1)) |
+	    (((TdfxU16) 0xf8 & r) >> 3);
+      }
+      else
+	 color = (TdfxU16)
+	    (((TdfxU16) 0xf8 & r) << (11 - 3)) |
+	    (((TdfxU16) 0xfc & g) << (5 - 3 + 1)) |
+	    (((TdfxU16) 0xf8 & b) >> 3);
+   }
+
+   info.size = sizeof(info);
+   if (!TDFX_grLfbLock(fxMesa,
+		     GR_LFB_WRITE_ONLY,
+		     fxMesa->currentFB,
+		     GR_LFBWRITEMODE_565,
+		     GR_ORIGIN_UPPER_LEFT, FXFALSE, &info)) {
+#ifndef TDFX_SILENT
+      fprintf(stderr, "tdfx Driver: error locking the linear frame buffer\n");
+#endif
+      return GL_TRUE;
+   }
+
+   {
+      const GLint winX = fxMesa->x_offset;
+      const GLint winY = fxMesa->y_offset + fxMesa->height - 1;
+      /* The dest stride depends on the hardware and whether we're drawing
+       * to the front or back buffer.  This compile-time test seems to do
+       * the job for now.
+       */
+      const GLint dstStride = (fxMesa->glCtx->Color.DrawBuffer[0] == GL_FRONT)
+	 ? (fxMesa->screen_width) : (info.strideInBytes / 2);
+      GLint row;
+      /* compute dest address of bottom-left pixel in bitmap */
+      GLushort *dst = (GLushort *) info.lfbPtr
+	 + (winY - py) * dstStride + (winX + px);
+
+      for (row = 0; row < height; row++) {
+	 const GLubyte *src =
+	    (const GLubyte *) _mesa_image_address2d(finalUnpack,
+                                                    bitmap, width, height,
+                                                    GL_COLOR_INDEX,
+                                                    GL_BITMAP, row, 0);
+	 if (finalUnpack->LsbFirst) {
+	    /* least significan bit first */
+	    GLubyte mask = 1U << (finalUnpack->SkipPixels & 0x7);
+	    GLint col;
+	    for (col = 0; col < width; col++) {
+	       if (*src & mask) {
+		  if (inClipRects(fxMesa, winX + px + col, winY - py - row))
+		     dst[col] = color;
+	       }
+	       if (mask == 128U) {
+		  src++;
+		  mask = 1U;
+	       }
+	       else {
+		  mask = mask << 1;
+	       }
+	    }
+	    if (mask != 1)
+	       src++;
+	 }
+	 else {
+	    /* most significan bit first */
+	    GLubyte mask = 128U >> (finalUnpack->SkipPixels & 0x7);
+	    GLint col;
+	    for (col = 0; col < width; col++) {
+	       if (*src & mask) {
+		  if (inClipRects(fxMesa, winX + px + col, winY - py - row))
+		     dst[col] = color;
+	       }
+	       if (mask == 1U) {
+		  src++;
+		  mask = 128U;
+	       }
+	       else {
+		  mask = mask >> 1;
+	       }
+	    }
+	    if (mask != 128)
+	       src++;
+	 }
+	 dst -= dstStride;
+      }
+   }
+
+   TDFX_grLfbUnlock(fxMesa, GR_LFB_WRITE_ONLY, fxMesa->currentFB);
+   return GL_TRUE;
+}
+#endif
+
+#if 0
+GLboolean
+tdfx_bitmap_R8G8B8A8(GLcontext * ctx, GLint px, GLint py,
+		     GLsizei width, GLsizei height,
+		     const struct gl_pixelstore_attrib *unpack,
+		     const GLubyte * bitmap)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrLfbInfo_t info;
+   GLuint color;
+   const struct gl_pixelstore_attrib *finalUnpack;
+   struct gl_pixelstore_attrib scissoredUnpack;
+
+   /* check if there's any raster operations enabled which we can't handle */
+   if (ctx->RasterMask & (ALPHATEST_BIT |
+			  BLEND_BIT |
+			  DEPTH_BIT |
+			  FOG_BIT |
+			  LOGIC_OP_BIT |
+			  SCISSOR_BIT |
+			  STENCIL_BIT |
+			  MASKING_BIT |
+			  MULTI_DRAW_BIT)) return GL_FALSE;
+
+   if (ctx->Scissor.Enabled) {
+      /* This is a bit tricky, but by carefully adjusting the px, py,
+       * width, height, skipPixels and skipRows values we can do
+       * scissoring without special code in the rendering loop.
+       */
+
+      /* we'll construct a new pixelstore struct */
+      finalUnpack = &scissoredUnpack;
+      scissoredUnpack = *unpack;
+      if (scissoredUnpack.RowLength == 0)
+	 scissoredUnpack.RowLength = width;
+
+      /* clip left */
+      if (px < ctx->Scissor.X) {
+	 scissoredUnpack.SkipPixels += (ctx->Scissor.X - px);
+	 width -= (ctx->Scissor.X - px);
+	 px = ctx->Scissor.X;
+      }
+      /* clip right */
+      if (px + width >= ctx->Scissor.X + ctx->Scissor.Width) {
+	 width -= (px + width - (ctx->Scissor.X + ctx->Scissor.Width));
+      }
+      /* clip bottom */
+      if (py < ctx->Scissor.Y) {
+	 scissoredUnpack.SkipRows += (ctx->Scissor.Y - py);
+	 height -= (ctx->Scissor.Y - py);
+	 py = ctx->Scissor.Y;
+      }
+      /* clip top */
+      if (py + height >= ctx->Scissor.Y + ctx->Scissor.Height) {
+	 height -= (py + height - (ctx->Scissor.Y + ctx->Scissor.Height));
+      }
+
+      if (width <= 0 || height <= 0)
+	 return GL_TRUE;     /* totally scissored away */
+   }
+   else {
+      finalUnpack = unpack;
+   }
+
+   /* compute pixel value */
+   {
+      GLint r = (GLint) (ctx->Current.RasterColor[0] * 255.0f);
+      GLint g = (GLint) (ctx->Current.RasterColor[1] * 255.0f);
+      GLint b = (GLint) (ctx->Current.RasterColor[2] * 255.0f);
+      GLint a = (GLint) (ctx->Current.RasterColor[3] * 255.0f);
+      color = PACK_BGRA32(r, g, b, a);
+   }
+
+   info.size = sizeof(info);
+   if (!TDFX_grLfbLock(fxMesa, GR_LFB_WRITE_ONLY,
+		     fxMesa->currentFB, GR_LFBWRITEMODE_8888,
+		     GR_ORIGIN_UPPER_LEFT, FXFALSE, &info)) {
+#ifndef TDFX_SILENT
+      fprintf(stderr, "tdfx Driver: error locking the linear frame buffer\n");
+#endif
+      return GL_TRUE;
+   }
+
+   {
+      const GLint winX = fxMesa->x_offset;
+      const GLint winY = fxMesa->y_offset + fxMesa->height - 1;
+      GLint dstStride;
+      GLuint *dst;
+      GLint row;
+
+      if (fxMesa->glCtx->Color.DrawBuffer[0] == GL_FRONT) {
+	 dstStride = fxMesa->screen_width;
+	 dst =
+	    (GLuint *) info.lfbPtr + (winY - py) * dstStride + (winX +
+								px);
+      }
+      else {
+	 dstStride = info.strideInBytes / 4;
+	 dst =
+	    (GLuint *) info.lfbPtr + (winY - py) * dstStride + (winX +
+								px);
+      }
+
+      /* compute dest address of bottom-left pixel in bitmap */
+      for (row = 0; row < height; row++) {
+	 const GLubyte *src =
+	    (const GLubyte *) _mesa_image_address2d(finalUnpack,
+                                                    bitmap, width, height,
+                                                    GL_COLOR_INDEX,
+                                                    GL_BITMAP, row, 0);
+	 if (finalUnpack->LsbFirst) {
+	    /* least significan bit first */
+	    GLubyte mask = 1U << (finalUnpack->SkipPixels & 0x7);
+	    GLint col;
+	    for (col = 0; col < width; col++) {
+	       if (*src & mask) {
+		  if (inClipRects(fxMesa, winX + px + col, winY - py - row))
+		     dst[col] = color;
+	       }
+	       if (mask == 128U) {
+		  src++;
+		  mask = 1U;
+	       }
+	       else {
+		  mask = mask << 1;
+	       }
+	    }
+	    if (mask != 1)
+	       src++;
+	 }
+	 else {
+	    /* most significan bit first */
+	    GLubyte mask = 128U >> (finalUnpack->SkipPixels & 0x7);
+	    GLint col;
+	    for (col = 0; col < width; col++) {
+	       if (*src & mask) {
+		  if (inClipRects(fxMesa, winX + px + col, winY - py - row))
+		     dst[col] = color;
+	       }
+	       if (mask == 1U) {
+		  src++;
+		  mask = 128U;
+	       }
+	       else {
+		  mask = mask >> 1;
+	       }
+	    }
+	    if (mask != 128)
+	       src++;
+	 }
+	 dst -= dstStride;
+      }
+   }
+
+   TDFX_grLfbUnlock(fxMesa, GR_LFB_WRITE_ONLY, fxMesa->currentFB);
+   return GL_TRUE;
+}
+#endif
+
+void
+tdfx_readpixels_R5G6B5(GLcontext * ctx, GLint x, GLint y,
+		       GLsizei width, GLsizei height,
+		       GLenum format, GLenum type,
+		       const struct gl_pixelstore_attrib *packing,
+		       GLvoid * dstImage)
+{
+   if (format != GL_RGB ||
+       type != GL_UNSIGNED_SHORT_5_6_5 ||
+       (ctx->_ImageTransferState & (IMAGE_SCALE_BIAS_BIT|
+				    IMAGE_MAP_COLOR_BIT)))
+   {
+      _swrast_ReadPixels( ctx, x, y, width, height, format, type, packing,
+			  dstImage );
+      return;
+   }
+
+   {
+      tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+      GrLfbInfo_t info;
+      __DRIdrawable *const readable = fxMesa->driReadable;
+      const GLint winX = readable->x;
+      const GLint winY = readable->y + readable->h - 1;
+      const GLint scrX = winX + x;
+      const GLint scrY = winY - y;
+
+      LOCK_HARDWARE( fxMesa );
+      info.size = sizeof(info);
+      if (fxMesa->Glide.grLfbLock(GR_LFB_READ_ONLY,
+		    fxMesa->ReadBuffer,
+		    GR_LFBWRITEMODE_ANY,
+		    GR_ORIGIN_UPPER_LEFT, FXFALSE, &info)) {
+	 const GLint srcStride = (fxMesa->glCtx->Color.DrawBuffer[0] ==
+	     GL_FRONT) ? (fxMesa->screen_width) : (info.strideInBytes / 2);
+	 const GLushort *src = (const GLushort *) info.lfbPtr
+	    + scrY * srcStride + scrX;
+	 GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing,
+            dstImage, width, height, format, type, 0, 0);
+	 const GLint dstStride = _mesa_image_row_stride(packing,
+            width, format, type);
+
+	 /* directly memcpy 5R6G5B pixels into client's buffer */
+	 const GLint widthInBytes = width * 2;
+	 GLint row;
+	 for (row = 0; row < height; row++) {
+	    memcpy(dst, src, widthInBytes);
+	    dst += dstStride;
+	    src -= srcStride;
+	 }
+
+	 fxMesa->Glide.grLfbUnlock(GR_LFB_READ_ONLY, fxMesa->ReadBuffer);
+      }
+      UNLOCK_HARDWARE( fxMesa );
+      return;
+   }
+}
+
+void
+tdfx_readpixels_R8G8B8A8(GLcontext * ctx, GLint x, GLint y,
+                         GLsizei width, GLsizei height,
+                         GLenum format, GLenum type,
+                         const struct gl_pixelstore_attrib *packing,
+                         GLvoid * dstImage)
+{
+   if ((!(format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8) &&
+	!(format == GL_BGRA && type == GL_UNSIGNED_BYTE)) ||
+       (ctx->_ImageTransferState & (IMAGE_SCALE_BIAS_BIT|
+				    IMAGE_MAP_COLOR_BIT)))
+   {
+      _swrast_ReadPixels( ctx, x, y, width, height, format, type, packing,
+			  dstImage );
+      return;
+   }
+
+
+   {
+      tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+      GrLfbInfo_t info;
+      __DRIdrawable *const readable = fxMesa->driReadable;
+      const GLint winX = readable->x;
+      const GLint winY = readable->y + readable->h - 1;
+      const GLint scrX = winX + x;
+      const GLint scrY = winY - y;
+
+      LOCK_HARDWARE(fxMesa);
+      info.size = sizeof(info);
+      if (fxMesa->Glide.grLfbLock(GR_LFB_READ_ONLY,
+                    fxMesa->ReadBuffer,
+                    GR_LFBWRITEMODE_ANY,
+                    GR_ORIGIN_UPPER_LEFT, FXFALSE, &info))
+      {
+         const GLint srcStride = (fxMesa->glCtx->Color.DrawBuffer[0] == GL_FRONT)
+            ? (fxMesa->screen_width) : (info.strideInBytes / 4);
+         const GLuint *src = (const GLuint *) info.lfbPtr
+            + scrY * srcStride + scrX;
+         const GLint dstStride =
+            _mesa_image_row_stride(packing, width, format, type);
+         GLubyte *dst = (GLubyte *) _mesa_image_address2d(packing,
+            dstImage, width, height, format, type, 0, 0);
+         const GLint widthInBytes = width * 4;
+
+	 {
+            GLint row;
+            for (row = 0; row < height; row++) {
+               memcpy(dst, src, widthInBytes);
+               dst += dstStride;
+               src -= srcStride;
+            }
+         }
+
+         fxMesa->Glide.grLfbUnlock(GR_LFB_READ_ONLY, fxMesa->ReadBuffer);
+      }
+      UNLOCK_HARDWARE(fxMesa);
+   }
+}
+
+void
+tdfx_drawpixels_R8G8B8A8(GLcontext * ctx, GLint x, GLint y,
+                         GLsizei width, GLsizei height,
+                         GLenum format, GLenum type,
+                         const struct gl_pixelstore_attrib *unpack,
+                         const GLvoid * pixels)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if ((!(format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8) &&
+	!(format == GL_BGRA && type == GL_UNSIGNED_BYTE)) ||
+       ctx->Pixel.ZoomX != 1.0F || 
+       ctx->Pixel.ZoomY != 1.0F ||
+       (ctx->_ImageTransferState & (IMAGE_SCALE_BIAS_BIT|
+				    IMAGE_MAP_COLOR_BIT)) ||
+       ctx->Color.AlphaEnabled ||
+       ctx->Depth.Test ||
+       ctx->Fog.Enabled ||
+       ctx->Scissor.Enabled ||
+       ctx->Stencil._Enabled ||
+       !ctx->Color.ColorMask[0][0] ||
+       !ctx->Color.ColorMask[0][1] ||
+       !ctx->Color.ColorMask[0][2] ||
+       !ctx->Color.ColorMask[0][3] ||
+       ctx->Color.ColorLogicOpEnabled ||
+       ctx->Texture._EnabledUnits ||
+       fxMesa->Fallback)       
+   {
+      _swrast_DrawPixels( ctx, x, y, width, height, format, type, 
+			  unpack, pixels );
+      return; 
+   }
+
+   {
+      tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+      GrLfbInfo_t info;
+      GLboolean result = GL_FALSE;
+
+      const GLint winX = fxMesa->x_offset;
+      const GLint winY = fxMesa->y_offset + fxMesa->height - 1;
+      const GLint scrX = winX + x;
+      const GLint scrY = winY - y;
+
+      /* lock early to make sure cliprects are right */
+      LOCK_HARDWARE(fxMesa);
+
+      /* make sure hardware has latest blend funcs */
+      if (ctx->Color.BlendEnabled) {
+         fxMesa->dirty |= TDFX_UPLOAD_BLEND_FUNC;
+         tdfxEmitHwStateLocked( fxMesa );
+      }
+
+      /* look for clipmasks, giveup if region obscured */
+      if (fxMesa->glCtx->Color.DrawBuffer[0] == GL_FRONT) {
+         if (!inClipRects_Region(fxMesa, scrX, scrY, width, height)) {
+            UNLOCK_HARDWARE(fxMesa);
+	    _swrast_DrawPixels( ctx, x, y, width, height, format, type, 
+				unpack, pixels );
+            return;
+         }
+      }
+
+      info.size = sizeof(info);
+      if (fxMesa->Glide.grLfbLock(GR_LFB_WRITE_ONLY,
+                    fxMesa->DrawBuffer,
+                    GR_LFBWRITEMODE_8888,
+                    GR_ORIGIN_UPPER_LEFT, FXTRUE, &info))
+      {
+         const GLint dstStride = (fxMesa->glCtx->Color.DrawBuffer[0] == GL_FRONT)
+            ? (fxMesa->screen_width * 4) : (info.strideInBytes);
+         GLubyte *dst = (GLubyte *) info.lfbPtr
+            + scrY * dstStride + scrX * 4;
+         const GLint srcStride =
+            _mesa_image_row_stride(unpack, width, format, type);
+         const GLubyte *src = (GLubyte *) _mesa_image_address2d(unpack,
+            pixels, width, height, format, type, 0, 0);
+         const GLint widthInBytes = width * 4;
+
+         if ((format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8) ||
+             (format == GL_BGRA && type == GL_UNSIGNED_BYTE)) {
+            GLint row;
+            for (row = 0; row < height; row++) {
+               memcpy(dst, src, widthInBytes);
+               dst -= dstStride;
+               src += srcStride;
+            }
+            result = GL_TRUE;
+         }
+
+         fxMesa->Glide.grLfbUnlock(GR_LFB_WRITE_ONLY, fxMesa->DrawBuffer);
+      }
+      UNLOCK_HARDWARE(fxMesa);
+   }
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_pixels.h b/src/mesa/drivers/dri/tdfx/tdfx_pixels.h
new file mode 100644
index 0000000000..f5e5427653
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_pixels.h
@@ -0,0 +1,76 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *	Nathan Hand <nhand@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_PIXELS_H__
+#define __TDFX_PIXELS_H__
+
+#include "main/context.h"
+
+extern void
+tdfx_bitmap_R5G6B5( GLcontext *ctx, GLint px, GLint py,
+		    GLsizei width, GLsizei height,
+		    const struct gl_pixelstore_attrib *unpack,
+		    const GLubyte *bitmap );
+
+extern void
+tdfx_bitmap_R8G8B8A8( GLcontext *ctx, GLint px, GLint py,
+		      GLsizei width, GLsizei height,
+		      const struct gl_pixelstore_attrib *unpack,
+		      const GLubyte *bitmap );
+
+extern void
+tdfx_readpixels_R5G6B5( GLcontext *ctx, GLint x, GLint y,
+			GLsizei width, GLsizei height,
+			GLenum format, GLenum type,
+			const struct gl_pixelstore_attrib *packing,
+			GLvoid *dstImage );
+
+extern void
+tdfx_readpixels_R8G8B8A8( GLcontext *ctx, GLint x, GLint y,
+			  GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *packing,
+			  GLvoid *dstImage );
+
+extern void
+tdfx_drawpixels_R8G8B8A8( GLcontext *ctx, GLint x, GLint y,
+			  GLsizei width, GLsizei height,
+			  GLenum format, GLenum type,
+			  const struct gl_pixelstore_attrib *unpack,
+			  const GLvoid *pixels );
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_render.c b/src/mesa/drivers/dri/tdfx/tdfx_render.c
new file mode 100644
index 0000000000..979bcd4514
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_render.c
@@ -0,0 +1,804 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * New fixes:
+ *	Daniel Borca <dborca@users.sourceforge.net>, 19 Jul 2004
+ *
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#include "tdfx_context.h"
+#include "tdfx_render.h"
+#include "tdfx_state.h"
+#include "tdfx_texman.h"
+#include "swrast/swrast.h"
+
+/* Clear the color and/or depth buffers.
+ */
+static void tdfxClear( GLcontext *ctx, GLbitfield mask )
+{
+   tdfxContextPtr fxMesa = (tdfxContextPtr) ctx->DriverCtx;
+   GLbitfield softwareMask = mask & (BUFFER_BIT_ACCUM);
+   const GLuint stencil_size =
+      fxMesa->haveHwStencil ? fxMesa->glCtx->Visual.stencilBits : 0;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "tdfxClear(0x%x)\n", mask);
+   }
+
+   /* Need this check to respond to glScissor and clipping updates */
+   if ((fxMesa->new_state & (TDFX_NEW_CLIP | TDFX_NEW_DEPTH)) ||
+       (fxMesa->dirty & TDFX_UPLOAD_COLOR_MASK)) {
+      tdfxDDUpdateHwState(ctx);
+   }
+
+   /* we can't clear accum buffers */
+   mask &= ~(BUFFER_BIT_ACCUM);
+
+   if (mask & BUFFER_BIT_STENCIL) {
+      if (!fxMesa->haveHwStencil || (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) {
+         /* Napalm seems to have trouble with stencil write masks != 0xff */
+         /* do stencil clear in software */
+         mask &= ~(BUFFER_BIT_STENCIL);
+         softwareMask |= BUFFER_BIT_STENCIL;
+      }
+   }
+
+   if (fxMesa->glCtx->Visual.redBits != 8) {
+      /* can only do color masking if running in 24/32bpp on Napalm */
+      if (ctx->Color.ColorMask[0][RCOMP] != ctx->Color.ColorMask[0][GCOMP] ||
+          ctx->Color.ColorMask[0][GCOMP] != ctx->Color.ColorMask[0][BCOMP]) {
+         softwareMask |= (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT));
+         mask &= ~(BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT);
+      }
+   }
+
+   if (fxMesa->haveHwStencil) {
+      /*
+       * If we want to clear stencil, it must be enabled
+       * in the HW, even if the stencil test is not enabled
+       * in the OGL state.
+       */
+      LOCK_HARDWARE(fxMesa);
+      if (mask & BUFFER_BIT_STENCIL) {
+	 fxMesa->Glide.grStencilMask(/*ctx->Stencil.WriteMask*/ 0xff);
+	 /* set stencil ref value = desired clear value */
+	 fxMesa->Glide.grStencilFunc(GR_CMP_ALWAYS,
+                                     (fxMesa->Stencil.Clear & 0xff), 0xff);
+	 fxMesa->Glide.grStencilOp(GR_STENCILOP_REPLACE,
+                                   GR_STENCILOP_REPLACE, GR_STENCILOP_REPLACE);
+	 fxMesa->Glide.grEnable(GR_STENCIL_MODE_EXT);
+      }
+      else {
+	 fxMesa->Glide.grDisable(GR_STENCIL_MODE_EXT);
+      }
+      UNLOCK_HARDWARE(fxMesa);
+   }
+
+   /*
+    * This may be ugly, but it's needed in order to work around a number
+    * of Glide bugs.
+    */
+   BEGIN_CLIP_LOOP(fxMesa);
+   {
+      /*
+       * This could probably be done fancier but doing each possible case
+       * explicitly is less error prone.
+       */
+      switch (mask & ~BUFFER_BIT_STENCIL) {
+      case BUFFER_BIT_BACK_LEFT | BUFFER_BIT_DEPTH:
+	 /* back buffer & depth */
+	 FX_grColorMaskv_NoLock(ctx, true4); /* work around Voodoo3 bug */
+	 fxMesa->Glide.grDepthMask(FXTRUE);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_BACKBUFFER);
+	 if (stencil_size > 0) {
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+         }
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 if (!ctx->Depth.Mask || !ctx->Depth.Test) {
+            fxMesa->Glide.grDepthMask(FXFALSE);
+	 }
+	 break;
+      case BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_DEPTH:
+	 /* XXX it appears that the depth buffer isn't cleared when
+	  * glRenderBuffer(GR_BUFFER_FRONTBUFFER) is set.
+	  * This is a work-around/
+	  */
+	 /* clear depth */
+	 fxMesa->Glide.grDepthMask(FXTRUE);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_BACKBUFFER);
+	 FX_grColorMaskv_NoLock(ctx, false4);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear & 0xff);
+	 /* clear front */
+	 FX_grColorMaskv_NoLock(ctx, true4);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_FRONTBUFFER);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 if (!ctx->Depth.Mask || !ctx->Depth.Test) {
+            fxMesa->Glide.grDepthMask(FXFALSE);
+	 }
+	 break;
+      case BUFFER_BIT_BACK_LEFT:
+	 /* back buffer only */
+	 fxMesa->Glide.grDepthMask(FXFALSE);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_BACKBUFFER);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 if (ctx->Depth.Mask && ctx->Depth.Test) {
+            fxMesa->Glide.grDepthMask(FXTRUE);
+	 }
+	 break;
+      case BUFFER_BIT_FRONT_LEFT:
+	 /* front buffer only */
+	 fxMesa->Glide.grDepthMask(FXFALSE);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_FRONTBUFFER);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 if (ctx->Depth.Mask && ctx->Depth.Test) {
+            fxMesa->Glide.grDepthMask(FXTRUE);
+	 }
+	 break;
+      case BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT:
+	 /* front and back */
+	 fxMesa->Glide.grDepthMask(FXFALSE);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_BACKBUFFER);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_FRONTBUFFER);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 if (ctx->Depth.Mask && ctx->Depth.Test) {
+            fxMesa->Glide.grDepthMask(FXTRUE);
+	 }
+	 break;
+      case BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_BACK_LEFT | BUFFER_BIT_DEPTH:
+	 /* clear front */
+	 fxMesa->Glide.grDepthMask(FXFALSE);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_FRONTBUFFER);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 /* clear back and depth */
+	 fxMesa->Glide.grDepthMask(FXTRUE);
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_BACKBUFFER);
+         if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 if (!ctx->Depth.Mask || !ctx->Depth.Mask) {
+            fxMesa->Glide.grDepthMask(FXFALSE);
+	 }
+	 break;
+      case BUFFER_BIT_DEPTH:
+	 /* just the depth buffer */
+	 fxMesa->Glide.grRenderBuffer(GR_BUFFER_BACKBUFFER);
+	 FX_grColorMaskv_NoLock(ctx, false4);
+	 fxMesa->Glide.grDepthMask(FXTRUE);
+	 if (stencil_size > 0)
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+	 else
+            fxMesa->Glide.grBufferClear(fxMesa->Color.ClearColor,
+                                        fxMesa->Color.ClearAlpha,
+                                        fxMesa->Depth.Clear);
+	 FX_grColorMaskv_NoLock(ctx, true4);
+	 if (ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT)
+            fxMesa->Glide.grRenderBuffer(GR_BUFFER_FRONTBUFFER);
+	 if (!ctx->Depth.Test || !ctx->Depth.Mask)
+	    fxMesa->Glide.grDepthMask(FXFALSE);
+	 break;
+      default:
+         /* clear no color buffers or depth buffer but might clear stencil */
+	 if (stencil_size > 0 && (mask & BUFFER_BIT_STENCIL)) {
+            /* XXX need this RenderBuffer call to work around Glide bug */
+            fxMesa->Glide.grRenderBuffer(GR_BUFFER_BACKBUFFER);
+            fxMesa->Glide.grDepthMask(FXFALSE);
+            FX_grColorMaskv_NoLock(ctx, false4);
+            fxMesa->Glide.grBufferClearExt(fxMesa->Color.ClearColor,
+                                           fxMesa->Color.ClearAlpha,
+                                           fxMesa->Depth.Clear,
+                                           (FxU32) (ctx->Stencil.Clear & 0xff));
+            if (ctx->Depth.Mask && ctx->Depth.Test) {
+               fxMesa->Glide.grDepthMask(FXTRUE);
+            }
+            FX_grColorMaskv_NoLock(ctx, true4);
+            if (ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT)
+               fxMesa->Glide.grRenderBuffer(GR_BUFFER_FRONTBUFFER);
+         }
+      }
+   }
+   END_CLIP_LOOP(fxMesa);
+
+   if (fxMesa->haveHwStencil && (mask & BUFFER_BIT_STENCIL)) {
+      /* We changed the stencil state above.  Signal that we need to
+       * upload it again.
+       */
+      fxMesa->dirty |= TDFX_UPLOAD_STENCIL;
+   }
+
+   if (softwareMask)
+      _swrast_Clear(ctx, softwareMask);
+}
+
+
+
+static void tdfxFinish( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FLUSH_BATCH( fxMesa );
+
+   LOCK_HARDWARE( fxMesa );
+   fxMesa->Glide.grFinish();
+   UNLOCK_HARDWARE( fxMesa );
+}
+
+static void tdfxFlush( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FLUSH_BATCH( fxMesa );
+
+   LOCK_HARDWARE( fxMesa );
+   fxMesa->Glide.grFlush();
+   UNLOCK_HARDWARE( fxMesa );
+}
+
+
+#if 0
+static const char *texSource(int k)
+{
+   switch (k) {
+      case GR_CMBX_ZERO:
+         return "GR_CMBX_ZERO";
+      case GR_CMBX_TEXTURE_ALPHA:
+         return "GR_CMBX_TEXTURE_ALPHA";
+      case GR_CMBX_ALOCAL:
+         return "GR_CMBX_ALOCAL";
+      case GR_CMBX_AOTHER:
+         return "GR_CMBX_AOTHER";
+      case GR_CMBX_B:
+         return "GR_CMBX_B";
+      case GR_CMBX_CONSTANT_ALPHA:
+         return "GR_CMBX_CONSTANT_ALPHA";
+      case GR_CMBX_CONSTANT_COLOR:
+         return "GR_CMBX_CONSTANT_COLOR";
+      case GR_CMBX_DETAIL_FACTOR:
+         return "GR_CMBX_DETAIL_FACTOR";
+      case GR_CMBX_ITALPHA:
+         return "GR_CMBX_ITALPHA";
+      case GR_CMBX_ITRGB:
+         return "GR_CMBX_ITRGB";
+      case GR_CMBX_LOCAL_TEXTURE_ALPHA:
+         return "GR_CMBX_LOCAL_TEXTURE_ALPHA";
+      case GR_CMBX_LOCAL_TEXTURE_RGB:
+         return "GR_CMBX_LOCAL_TEXTURE_RGB";
+      case GR_CMBX_LOD_FRAC:
+         return "GR_CMBX_LOD_FRAC";
+      case GR_CMBX_OTHER_TEXTURE_ALPHA:
+         return "GR_CMBX_OTHER_TEXTURE_ALPHA";
+      case GR_CMBX_OTHER_TEXTURE_RGB:
+         return "GR_CMBX_OTHER_TEXTURE_RGB";
+      case GR_CMBX_TEXTURE_RGB:
+         return "GR_CMBX_TEXTURE_RGB";
+      case GR_CMBX_TMU_CALPHA:
+         return "GR_CMBX_TMU_CALPHA";
+      case GR_CMBX_TMU_CCOLOR:
+         return "GR_CMBX_TMU_CCOLOR";
+      default:
+         return "";
+   }
+}
+#endif
+
+#if 0
+static const char *texMode(int k)
+{
+   switch (k) {
+      case GR_FUNC_MODE_ZERO:
+         return "GR_FUNC_MODE_ZERO";
+      case GR_FUNC_MODE_X:
+         return "GR_FUNC_MODE_X";
+      case GR_FUNC_MODE_ONE_MINUS_X:
+         return "GR_FUNC_MODE_ONE_MINUS_X";
+      case GR_FUNC_MODE_NEGATIVE_X:
+         return "GR_FUNC_MODE_NEGATIVE_X";
+      case GR_FUNC_MODE_X_MINUS_HALF:
+         return "GR_FUNC_MODE_X_MINUS_HALF";
+      default:
+         return "";
+   }
+}
+#endif
+
+#if 0
+static const char *texInvert(int k)
+{
+   return k ? "FXTRUE" : "FXFALSE";
+}
+#endif
+
+static void uploadTextureEnv( tdfxContextPtr fxMesa )
+{
+   if (TDFX_IS_NAPALM(fxMesa)) {
+      int unit;
+      for (unit = 0; unit < TDFX_NUM_TMU; unit++) {
+#if 0
+         printf("upload env %d\n", unit);
+         printf("   cSourceA = %s\t", texSource(fxMesa->TexCombineExt[unit].Color.SourceA));
+         printf("     cModeA = %s\n", texMode(fxMesa->TexCombineExt[unit].Color.ModeA));
+         printf("   cSourceB = %s\t", texSource(fxMesa->TexCombineExt[unit].Color.SourceB));
+         printf("     cModeB = %s\n", texMode(fxMesa->TexCombineExt[unit].Color.ModeB));
+         printf("   cSourceC = %s\t", texSource(fxMesa->TexCombineExt[unit].Color.SourceC));
+         printf("   cInvertC = %s\n", texInvert(fxMesa->TexCombineExt[unit].Color.InvertC));
+         printf("   cSourceD = %s\t", texSource(fxMesa->TexCombineExt[unit].Color.SourceD));
+         printf("   cInvertD = %s\n", texInvert(fxMesa->TexCombineExt[unit].Color.InvertD));
+         printf("     cShift = %d\t", fxMesa->TexCombineExt[unit].Color.Shift);
+         printf("    cInvert = %d\n", fxMesa->TexCombineExt[unit].Color.Invert);
+         printf("   aSourceA = %s\t", texSource(fxMesa->TexCombineExt[unit].Alpha.SourceA));
+         printf("     aModeA = %s\n", texMode(fxMesa->TexCombineExt[unit].Alpha.ModeA));
+         printf("   aSourceB = %s\t", texSource(fxMesa->TexCombineExt[unit].Alpha.SourceB));
+         printf("     aModeB = %s\n", texMode(fxMesa->TexCombineExt[unit].Alpha.ModeB));
+         printf("   aSourceC = %s\t", texSource(fxMesa->TexCombineExt[unit].Alpha.SourceC));
+         printf("   aInvertC = %s\n", texInvert(fxMesa->TexCombineExt[unit].Alpha.InvertC));
+         printf("   aSourceD = %s\t", texSource(fxMesa->TexCombineExt[unit].Alpha.SourceD));
+         printf("   aInvertD = %s\n", texInvert(fxMesa->TexCombineExt[unit].Alpha.InvertD));
+         printf("     aShift = %d\t", fxMesa->TexCombineExt[unit].Alpha.Shift);
+         printf("    aInvert = %d\n", fxMesa->TexCombineExt[unit].Alpha.Invert);
+         printf("      Color = 0x%08x\n", fxMesa->TexCombineExt[unit].EnvColor);
+#endif
+         fxMesa->Glide.grTexColorCombineExt(TDFX_TMU0 + unit,
+                                     fxMesa->TexCombineExt[unit].Color.SourceA,
+                                     fxMesa->TexCombineExt[unit].Color.ModeA,
+                                     fxMesa->TexCombineExt[unit].Color.SourceB,
+                                     fxMesa->TexCombineExt[unit].Color.ModeB,
+                                     fxMesa->TexCombineExt[unit].Color.SourceC,
+                                     fxMesa->TexCombineExt[unit].Color.InvertC,
+                                     fxMesa->TexCombineExt[unit].Color.SourceD,
+                                     fxMesa->TexCombineExt[unit].Color.InvertD,
+                                     fxMesa->TexCombineExt[unit].Color.Shift,
+                                     fxMesa->TexCombineExt[unit].Color.Invert);
+         fxMesa->Glide.grTexAlphaCombineExt(TDFX_TMU0 + unit,
+                                     fxMesa->TexCombineExt[unit].Alpha.SourceA,
+                                     fxMesa->TexCombineExt[unit].Alpha.ModeA,
+                                     fxMesa->TexCombineExt[unit].Alpha.SourceB,
+                                     fxMesa->TexCombineExt[unit].Alpha.ModeB,
+                                     fxMesa->TexCombineExt[unit].Alpha.SourceC,
+                                     fxMesa->TexCombineExt[unit].Alpha.InvertC,
+                                     fxMesa->TexCombineExt[unit].Alpha.SourceD,
+                                     fxMesa->TexCombineExt[unit].Alpha.InvertD,
+                                     fxMesa->TexCombineExt[unit].Alpha.Shift,
+                                     fxMesa->TexCombineExt[unit].Alpha.Invert);
+         fxMesa->Glide.grConstantColorValueExt(TDFX_TMU0 + unit,
+                                        fxMesa->TexCombineExt[unit].EnvColor);
+      }
+   }
+   else {
+      /* Voodoo3 */
+      int unit;
+      for (unit = 0; unit < TDFX_NUM_TMU; unit++) {
+         struct tdfx_texcombine *comb = &fxMesa->TexCombine[unit];
+         fxMesa->Glide.grTexCombine(TDFX_TMU0 + unit,
+                                    comb->FunctionRGB,
+                                    comb->FactorRGB,
+                                    comb->FunctionAlpha,
+                                    comb->FactorAlpha,
+                                    comb->InvertRGB,
+                                    comb->InvertAlpha);
+      }
+   }
+}
+
+
+static void uploadTextureParams( tdfxContextPtr fxMesa )
+{
+   int unit;
+   for (unit = 0; unit < TDFX_NUM_TMU; unit++) {
+      const struct tdfx_texparams *p = &fxMesa->TexParams[unit];
+      /*
+      printf("upload params %d\n", unit);
+      printf("   clamp %x %x\n", env->sClamp, env->tClamp);
+      printf("   filter %x %x\n", env->minFilt, env->magFilt);
+      printf("   mipmap %x %x\n", env->mmMode, env->LODblend);
+      printf("   lod bias %f\n", env->LodBias);
+      */
+      fxMesa->Glide.grTexClampMode(GR_TMU0 + unit, p->sClamp, p->tClamp);
+      fxMesa->Glide.grTexFilterMode(GR_TMU0 + unit, p->minFilt, p->magFilt);
+      fxMesa->Glide.grTexMipMapMode(GR_TMU0 + unit, p->mmMode, p->LODblend);
+      fxMesa->Glide.grTexLodBiasValue(GR_TMU0 + unit, CLAMP(p->LodBias, -8, 7.75));
+   }
+}
+
+
+static void uploadTextureSource( tdfxContextPtr fxMesa )
+{
+   int unit;
+   for (unit = 0; unit < TDFX_NUM_TMU; unit++) {
+      const struct tdfx_texsource *src = &fxMesa->TexSource[unit];
+      /*
+      printf("upload source %d @ %d %p\n", unit, src->StartAddress, src->Info);
+      */
+      if (src->Info) {
+         /*
+         printf("  smallLodLog2=%d largeLodLog2=%d ar=%d format=%d data=%p\n",
+                src->Info->smallLodLog2, src->Info->largeLodLog2,
+                src->Info->aspectRatioLog2, src->Info->format,
+                src->Info->data);
+         */
+         fxMesa->Glide.grTexSource(GR_TMU0 + unit,
+                                   src->StartAddress,
+                                   src->EvenOdd,
+                                   src->Info);
+      }
+   }
+}
+
+
+static void uploadTextureImages( tdfxContextPtr fxMesa )
+{
+   GLcontext *ctx = fxMesa->glCtx;
+   int unit;
+   for (unit = 0; unit < TDFX_NUM_TMU; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) {
+         struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+         tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+         if (ti && ti->reloadImages && ti->whichTMU != TDFX_TMU_NONE) {
+            /*
+            printf("download texture image on unit %d\n", unit);
+            */
+            tdfxTMDownloadTexture(fxMesa, tObj);
+            ti->reloadImages = GL_FALSE;
+         }
+      }
+   }
+}
+
+
+
+/*
+ * If scissoring is enabled, compute intersection of scissor region
+ * with all X clip rects, resulting in new cliprect list.
+ * If number of cliprects is zero or one, call grClipWindow to setup
+ * the clip region.  Otherwise we'll call grClipWindow inside the
+ * BEGIN_CLIP_LOOP macro.
+ */
+void tdfxUploadClipping( tdfxContextPtr fxMesa )
+{
+   __DRIdrawable *dPriv = fxMesa->driDrawable;
+
+   assert(dPriv);
+
+   if (fxMesa->numClipRects == 0) {
+      /* all drawing clipped away */
+      fxMesa->Glide.grClipWindow(0, 0, 0, 0);
+   }
+   else if (fxMesa->numClipRects == 1) {
+      fxMesa->Glide.grClipWindow(fxMesa->pClipRects[0].x1,
+                            fxMesa->screen_height - fxMesa->pClipRects[0].y2,
+                            fxMesa->pClipRects[0].x2,
+                            fxMesa->screen_height - fxMesa->pClipRects[0].y1);
+   }
+   /* else, we'll do a cliprect loop around all drawing */
+
+   fxMesa->Glide.grDRIPosition( dPriv->x, dPriv->y, dPriv->w, dPriv->h,
+                                fxMesa->numClipRects, fxMesa->pClipRects );
+}
+
+
+void tdfxEmitHwStateLocked( tdfxContextPtr fxMesa )
+{
+   if ( !fxMesa->dirty )
+      return;
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_COLOR_COMBINE ) {
+      if (TDFX_IS_NAPALM(fxMesa)) {
+         fxMesa->Glide.grColorCombineExt(fxMesa->ColorCombineExt.SourceA,
+                                         fxMesa->ColorCombineExt.ModeA,
+                                         fxMesa->ColorCombineExt.SourceB,
+                                         fxMesa->ColorCombineExt.ModeB,
+                                         fxMesa->ColorCombineExt.SourceC,
+                                         fxMesa->ColorCombineExt.InvertC,
+                                         fxMesa->ColorCombineExt.SourceD,
+                                         fxMesa->ColorCombineExt.InvertD,
+                                         fxMesa->ColorCombineExt.Shift,
+                                         fxMesa->ColorCombineExt.Invert);
+      }
+      else {
+         /* Voodoo 3 */
+         fxMesa->Glide.grColorCombine( fxMesa->ColorCombine.Function,
+                                       fxMesa->ColorCombine.Factor,
+                                       fxMesa->ColorCombine.Local,
+                                       fxMesa->ColorCombine.Other,
+                                       fxMesa->ColorCombine.Invert );
+      }
+      fxMesa->dirty &= ~TDFX_UPLOAD_COLOR_COMBINE;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_ALPHA_COMBINE ) {
+      if (TDFX_IS_NAPALM(fxMesa)) {
+         fxMesa->Glide.grAlphaCombineExt(fxMesa->AlphaCombineExt.SourceA,
+                                         fxMesa->AlphaCombineExt.ModeA,
+                                         fxMesa->AlphaCombineExt.SourceB,
+                                         fxMesa->AlphaCombineExt.ModeB,
+                                         fxMesa->AlphaCombineExt.SourceC,
+                                         fxMesa->AlphaCombineExt.InvertC,
+                                         fxMesa->AlphaCombineExt.SourceD,
+                                         fxMesa->AlphaCombineExt.InvertD,
+                                         fxMesa->AlphaCombineExt.Shift,
+                                         fxMesa->AlphaCombineExt.Invert);
+      }
+      else {
+         /* Voodoo 3 */
+         fxMesa->Glide.grAlphaCombine( fxMesa->AlphaCombine.Function,
+                                       fxMesa->AlphaCombine.Factor,
+                                       fxMesa->AlphaCombine.Local,
+                                       fxMesa->AlphaCombine.Other,
+                                       fxMesa->AlphaCombine.Invert );
+      }
+      fxMesa->dirty &= ~TDFX_UPLOAD_ALPHA_COMBINE;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_RENDER_BUFFER ) {
+      fxMesa->Glide.grRenderBuffer( fxMesa->DrawBuffer );
+      fxMesa->dirty &= ~TDFX_UPLOAD_RENDER_BUFFER;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_STIPPLE) {
+      fxMesa->Glide.grStipplePattern( fxMesa->Stipple.Pattern );
+      fxMesa->Glide.grStippleMode( fxMesa->Stipple.Mode );
+      fxMesa->dirty &= ~TDFX_UPLOAD_STIPPLE;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_ALPHA_TEST ) {
+      fxMesa->Glide.grAlphaTestFunction( fxMesa->Color.AlphaFunc );
+      fxMesa->dirty &= ~TDFX_UPLOAD_ALPHA_TEST;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_ALPHA_REF ) {
+      fxMesa->Glide.grAlphaTestReferenceValue( fxMesa->Color.AlphaRef );
+      fxMesa->dirty &= ~TDFX_UPLOAD_ALPHA_REF;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_BLEND_FUNC ) {
+      if (fxMesa->Glide.grAlphaBlendFunctionExt) {
+         fxMesa->Glide.grAlphaBlendFunctionExt( fxMesa->Color.BlendSrcRGB,
+                                                fxMesa->Color.BlendDstRGB,
+                                                fxMesa->Color.BlendEqRGB,
+                                                fxMesa->Color.BlendSrcA,
+                                                fxMesa->Color.BlendDstA,
+                                                fxMesa->Color.BlendEqA );
+      }
+      else {
+         fxMesa->Glide.grAlphaBlendFunction( fxMesa->Color.BlendSrcRGB,
+                                             fxMesa->Color.BlendDstRGB,
+                                             fxMesa->Color.BlendSrcA,
+                                             fxMesa->Color.BlendDstA );
+      }
+      fxMesa->dirty &= ~TDFX_UPLOAD_BLEND_FUNC;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_DEPTH_MODE ) {
+      fxMesa->Glide.grDepthBufferMode( fxMesa->Depth.Mode );
+      fxMesa->dirty &= ~TDFX_UPLOAD_DEPTH_MODE;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_DEPTH_BIAS ) {
+      fxMesa->Glide.grDepthBiasLevel( fxMesa->Depth.Bias );
+      fxMesa->dirty &= ~TDFX_UPLOAD_DEPTH_BIAS;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_DEPTH_FUNC ) {
+      fxMesa->Glide.grDepthBufferFunction( fxMesa->Depth.Func );
+      fxMesa->dirty &= ~TDFX_UPLOAD_DEPTH_FUNC;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_DEPTH_MASK ) {
+      fxMesa->Glide.grDepthMask( fxMesa->Depth.Mask );
+      fxMesa->dirty &= ~TDFX_UPLOAD_DEPTH_MASK;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_DITHER) {
+      fxMesa->Glide.grDitherMode( fxMesa->Color.Dither );
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_FOG_MODE ) {
+      fxMesa->Glide.grFogMode( fxMesa->Fog.Mode );
+      fxMesa->dirty &= ~TDFX_UPLOAD_FOG_MODE;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_FOG_COLOR ) {
+      fxMesa->Glide.grFogColorValue( fxMesa->Fog.Color );
+      fxMesa->dirty &= ~TDFX_UPLOAD_FOG_COLOR;
+   }
+   if ( fxMesa->dirty & TDFX_UPLOAD_FOG_TABLE ) {
+      fxMesa->Glide.grFogTable( fxMesa->Fog.Table );
+      fxMesa->dirty &= ~TDFX_UPLOAD_FOG_TABLE;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_CULL ) {
+      fxMesa->Glide.grCullMode( fxMesa->CullMode );
+      fxMesa->dirty &= ~TDFX_UPLOAD_CULL;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_CLIP ) {
+      tdfxUploadClipping( fxMesa );
+      fxMesa->dirty &= ~TDFX_UPLOAD_CLIP;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_COLOR_MASK ) {
+      if ( fxMesa->Glide.grColorMaskExt
+           && fxMesa->glCtx->Visual.redBits == 8) {
+	 fxMesa->Glide.grColorMaskExt( fxMesa->Color.ColorMask[RCOMP],
+                                       fxMesa->Color.ColorMask[GCOMP],
+                                       fxMesa->Color.ColorMask[BCOMP],
+                                       fxMesa->Color.ColorMask[ACOMP] );
+      } else {
+	 fxMesa->Glide.grColorMask( fxMesa->Color.ColorMask[RCOMP] ||
+                                    fxMesa->Color.ColorMask[GCOMP] ||
+                                    fxMesa->Color.ColorMask[BCOMP],
+                                    /*fxMesa->Color.ColorMask[0][ACOMP]*/GL_FALSE/*[dBorca] no-no*/ );
+      }
+      fxMesa->dirty &= ~TDFX_UPLOAD_COLOR_MASK;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_CONSTANT_COLOR ) {
+      fxMesa->Glide.grConstantColorValue( fxMesa->Color.MonoColor );
+      fxMesa->dirty &= ~TDFX_UPLOAD_CONSTANT_COLOR;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_LINE ) {
+      if (fxMesa->glCtx->Line.SmoothFlag && fxMesa->glCtx->Line.Width == 1.0)
+         fxMesa->Glide.grEnable(GR_AA_ORDERED);
+      else
+         fxMesa->Glide.grDisable(GR_AA_ORDERED);
+      fxMesa->dirty &= ~TDFX_UPLOAD_LINE;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_STENCIL ) {
+      if (fxMesa->glCtx->Stencil._Enabled) {
+         fxMesa->Glide.grEnable(GR_STENCIL_MODE_EXT);
+         fxMesa->Glide.grStencilOp(fxMesa->Stencil.FailFunc,
+                                   fxMesa->Stencil.ZFailFunc,
+                                   fxMesa->Stencil.ZPassFunc);
+         fxMesa->Glide.grStencilFunc(fxMesa->Stencil.Function,
+                                     fxMesa->Stencil.RefValue,
+                                     fxMesa->Stencil.ValueMask);
+         fxMesa->Glide.grStencilMask(fxMesa->Stencil.WriteMask);
+      }
+      else {
+         fxMesa->Glide.grDisable(GR_STENCIL_MODE_EXT);
+      }
+      fxMesa->dirty &= ~TDFX_UPLOAD_STENCIL;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_VERTEX_LAYOUT ) {
+      fxMesa->Glide.grGlideSetVertexLayout( fxMesa->layout[fxMesa->vertexFormat] );
+      /* [dborca] enable fogcoord */
+      fxMesa->Glide.grVertexLayout(GR_PARAM_FOG_EXT, TDFX_FOG_OFFSET,
+	 fxMesa->Fog.Mode == GR_FOG_WITH_TABLE_ON_FOGCOORD_EXT);
+      fxMesa->dirty &= ~TDFX_UPLOAD_VERTEX_LAYOUT;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_TEXTURE_ENV ) {
+      uploadTextureEnv(fxMesa);
+      fxMesa->dirty &= ~TDFX_UPLOAD_TEXTURE_ENV;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_TEXTURE_PARAMS ) {
+      uploadTextureParams(fxMesa);
+      fxMesa->dirty &= ~TDFX_UPLOAD_TEXTURE_PARAMS;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_TEXTURE_PALETTE ) {
+      if (fxMesa->TexPalette.Data) {
+         fxMesa->Glide.grTexDownloadTable(fxMesa->TexPalette.Type, fxMesa->TexPalette.Data);
+      }
+      fxMesa->dirty &= ~TDFX_UPLOAD_TEXTURE_PALETTE;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_TEXTURE_SOURCE ) {
+      uploadTextureSource(fxMesa);
+      fxMesa->dirty &= ~TDFX_UPLOAD_TEXTURE_SOURCE;
+   }
+
+   if ( fxMesa->dirty & TDFX_UPLOAD_TEXTURE_IMAGES ) {
+      uploadTextureImages(fxMesa);
+      fxMesa->dirty &= ~TDFX_UPLOAD_TEXTURE_IMAGES;
+   }
+
+   fxMesa->dirty = 0;
+}
+
+
+
+void tdfxInitRenderFuncs( struct dd_function_table *functions )
+{
+   functions->Clear	= tdfxClear;
+   functions->Finish	= tdfxFinish;
+   functions->Flush	= tdfxFlush;
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_render.h b/src/mesa/drivers/dri/tdfx/tdfx_render.h
new file mode 100644
index 0000000000..18c6168333
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_render.h
@@ -0,0 +1,49 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_RENDER_H__
+#define __TDFX_RENDER_H__
+
+#include "tdfx_context.h"
+
+extern void tdfxInitRenderFuncs( struct dd_function_table *functions );
+
+extern void tdfxEmitHwStateLocked( tdfxContextPtr fxMesa );
+
+extern void tdfxUploadClipping( tdfxContextPtr fxMesa );
+
+#define FLUSH_BATCH( fxMesa )
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_screen.c b/src/mesa/drivers/dri/tdfx/tdfx_screen.c
new file mode 100644
index 0000000000..26de09503a
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_screen.c
@@ -0,0 +1,450 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#include "tdfx_dri.h"
+#include "tdfx_context.h"
+#include "tdfx_lock.h"
+#include "tdfx_span.h"
+
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "xmlpool.h"
+
+#include "utils.h"
+
+#ifdef DEBUG_LOCKING
+char *prevLockFile = 0;
+int prevLockLine = 0;
+#endif
+
+#ifndef TDFX_DEBUG
+int TDFX_DEBUG = 0;
+#endif
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+
+static const __DRIextension *tdfxExtensions[] = {
+    &driReadDrawableExtension,
+    NULL
+};
+
+static const GLuint __driNConfigOptions = 1;
+
+static GLboolean
+tdfxCreateScreen( __DRIscreen *sPriv )
+{
+   tdfxScreenPrivate *fxScreen;
+   TDFXDRIPtr fxDRIPriv = (TDFXDRIPtr) sPriv->pDevPriv;
+
+   if (sPriv->devPrivSize != sizeof(TDFXDRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(TDFXDRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+   }
+
+   /* Allocate the private area */
+   fxScreen = (tdfxScreenPrivate *) CALLOC( sizeof(tdfxScreenPrivate) );
+   if ( !fxScreen )
+      return GL_FALSE;
+
+   /* parse information in __driConfigOptions */
+   driParseOptionInfo (&fxScreen->optionCache,
+		       __driConfigOptions, __driNConfigOptions);
+
+   fxScreen->driScrnPriv = sPriv;
+   sPriv->private = (void *) fxScreen;
+
+   fxScreen->regs.handle	= fxDRIPriv->regs;
+   fxScreen->regs.size		= fxDRIPriv->regsSize;
+   fxScreen->deviceID		= fxDRIPriv->deviceID;
+   fxScreen->width		= fxDRIPriv->width;
+   fxScreen->height		= fxDRIPriv->height;
+   fxScreen->mem		= fxDRIPriv->mem;
+   fxScreen->cpp		= fxDRIPriv->cpp;
+   fxScreen->stride		= fxDRIPriv->stride;
+   fxScreen->fifoOffset		= fxDRIPriv->fifoOffset;
+   fxScreen->fifoSize		= fxDRIPriv->fifoSize;
+   fxScreen->fbOffset		= fxDRIPriv->fbOffset;
+   fxScreen->backOffset		= fxDRIPriv->backOffset;
+   fxScreen->depthOffset	= fxDRIPriv->depthOffset;
+   fxScreen->textureOffset	= fxDRIPriv->textureOffset;
+   fxScreen->textureSize	= fxDRIPriv->textureSize;
+   fxScreen->sarea_priv_offset	= fxDRIPriv->sarea_priv_offset;
+
+   if ( drmMap( sPriv->fd, fxScreen->regs.handle,
+		fxScreen->regs.size, &fxScreen->regs.map ) ) {
+      return GL_FALSE;
+   }
+
+   sPriv->extensions = tdfxExtensions;
+
+   return GL_TRUE;
+}
+
+
+static void
+tdfxDestroyScreen( __DRIscreen *sPriv )
+{
+   tdfxScreenPrivate *fxScreen = (tdfxScreenPrivate *) sPriv->private;
+
+   if (!fxScreen)
+      return;
+
+   drmUnmap( fxScreen->regs.map, fxScreen->regs.size );
+
+   /* free all option information */
+   driDestroyOptionInfo (&fxScreen->optionCache);
+
+   FREE( fxScreen );
+   sPriv->private = NULL;
+}
+
+
+static GLboolean
+tdfxInitDriver( __DRIscreen *sPriv )
+{
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_DRI ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *)sPriv );
+   }
+
+   if ( !tdfxCreateScreen( sPriv ) ) {
+      tdfxDestroyScreen( sPriv );
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+static GLboolean
+tdfxCreateBuffer( __DRIscreen *driScrnPriv,
+                  __DRIdrawable *driDrawPriv,
+                  const __GLcontextModes *mesaVis,
+                  GLboolean isPixmap )
+{
+   tdfxScreenPrivate *screen = (tdfxScreenPrivate *) driScrnPriv->private;
+
+   if (isPixmap) {
+      return GL_FALSE; /* not implemented */
+   }
+   else {
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888, NULL, screen->cpp,
+                                 screen->fbOffset, screen->width, driDrawPriv);
+         tdfxSetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888, NULL, screen->cpp,
+                                 screen->backOffset, screen->width,
+                                 driDrawPriv);
+         tdfxSetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+	 backRb->backBuffer = GL_TRUE;
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z16, NULL, screen->cpp,
+                                 screen->depthOffset, screen->width,
+                                 driDrawPriv);
+         tdfxSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z24_S8, NULL, screen->cpp,
+                                 screen->depthOffset, screen->width,
+                                 driDrawPriv);
+         tdfxSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      if (mesaVis->stencilBits > 0) {
+         driRenderbuffer *stencilRb
+            = driNewRenderbuffer(MESA_FORMAT_S8, NULL, screen->cpp,
+                                 screen->depthOffset, screen->width,
+                                 driDrawPriv);
+         tdfxSetSpanFunctions(stencilRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+      }
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   GL_FALSE, /*swStencil,*/
+                                   mesaVis->accumRedBits > 0,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+tdfxDestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+
+static void
+tdfxSwapBuffers( __DRIdrawable *driDrawPriv )
+
+{
+   GET_CURRENT_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = 0;
+   GLframebuffer *mesaBuffer;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_DRI ) {
+      fprintf( stderr, "%s( %p )\n", __FUNCTION__, (void *)driDrawPriv );
+   }
+
+   mesaBuffer = (GLframebuffer *) driDrawPriv->driverPrivate;
+   if ( !mesaBuffer->Visual.doubleBufferMode )
+      return; /* can't swap a single-buffered window */
+
+   /* If the current context's drawable matches the given drawable
+    * we have to do a glFinish (per the GLX spec).
+    */
+   if ( ctx ) {
+      __DRIdrawable *curDrawPriv;
+      fxMesa = TDFX_CONTEXT(ctx);
+      curDrawPriv = fxMesa->driContext->driDrawablePriv;
+
+      if ( curDrawPriv == driDrawPriv ) {
+	 /* swapping window bound to current context, flush first */
+	 _mesa_notifySwapBuffers( ctx );
+	 LOCK_HARDWARE( fxMesa );
+      }
+      else {
+         /* find the fxMesa context previously bound to the window */
+	 fxMesa = (tdfxContextPtr) driDrawPriv->driContextPriv->driverPrivate;
+         if (!fxMesa)
+            return;
+	 LOCK_HARDWARE( fxMesa );
+	 fxMesa->Glide.grSstSelect( fxMesa->Glide.Board );
+#ifdef DEBUG
+         printf("SwapBuf SetState 1\n");
+#endif
+	 fxMesa->Glide.grGlideSetState(fxMesa->Glide.State );
+      }
+   }
+
+#ifdef STATS
+   {
+      int stalls;
+      static int prevStalls = 0;
+
+      stalls = fxMesa->Glide.grFifoGetStalls();
+
+      fprintf( stderr, "%s:\n", __FUNCTION__ );
+      if ( stalls != prevStalls ) {
+	 fprintf( stderr, "    %d stalls occurred\n",
+		  stalls - prevStalls );
+	 prevStalls = stalls;
+      }
+      if ( fxMesa && fxMesa->texSwaps ) {
+	 fprintf( stderr, "    %d texture swaps occurred\n",
+		  fxMesa->texSwaps );
+	 fxMesa->texSwaps = 0;
+      }
+   }
+#endif
+
+   assert(fxMesa);
+
+   if (fxMesa->scissoredClipRects) {
+      /* restore clip rects without scissor box */
+      fxMesa->Glide.grDRIPosition( driDrawPriv->x, driDrawPriv->y,
+                                   driDrawPriv->w, driDrawPriv->h,
+                                   driDrawPriv->numClipRects,
+                                   driDrawPriv->pClipRects );
+   }
+
+   fxMesa->Glide.grDRIBufferSwap( fxMesa->Glide.SwapInterval );
+
+   if (fxMesa->scissoredClipRects) {
+      /* restore clip rects WITH scissor box */
+      fxMesa->Glide.grDRIPosition( driDrawPriv->x, driDrawPriv->y,
+                                   driDrawPriv->w, driDrawPriv->h,
+                                   fxMesa->numClipRects, fxMesa->pClipRects );
+   }
+
+
+#if 0
+   {
+      FxI32 result;
+      do {
+         FxI32 result;
+         fxMesa->Glide.grGet(GR_PENDING_BUFFERSWAPS, 4, &result);
+      } while ( result > fxMesa->maxPendingSwapBuffers );
+   }
+#endif
+
+   fxMesa->stats.swapBuffer++;
+
+   if (ctx) {
+      if (ctx->DriverCtx != fxMesa) {
+         fxMesa = TDFX_CONTEXT(ctx);
+	 fxMesa->Glide.grSstSelect( fxMesa->Glide.Board );
+#ifdef DEBUG
+         printf("SwapBuf SetState 2\n");
+#endif
+	 fxMesa->Glide.grGlideSetState(fxMesa->Glide.State );
+      }
+      UNLOCK_HARDWARE( fxMesa );
+   }
+}
+
+static const __DRIconfig **
+tdfxFillInModes(__DRIscreen *psp,
+		unsigned pixel_bits,
+		unsigned depth_bits,
+		unsigned stencil_bits,
+		GLboolean have_back_buffer)
+{
+	unsigned deep = (depth_bits > 17);
+
+	/* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+	 * enough to add support.  Basically, if a context is created with an
+	 * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+	 * will never be used.
+	 */
+
+	static const GLenum db_modes[2] = { GLX_NONE, GLX_SWAP_UNDEFINED_OML };
+	uint8_t depth_bits_array[4];
+	uint8_t stencil_bits_array[4];
+        uint8_t msaa_samples_array[1];
+	if(deep) {
+		depth_bits_array[0] = 0;
+		depth_bits_array[1] = 24;
+		stencil_bits_array[0] = 0;
+		stencil_bits_array[1] = 8;
+	} else {
+		depth_bits_array[0] = depth_bits;
+		depth_bits_array[1] = 0;
+		depth_bits_array[2] = depth_bits;
+		depth_bits_array[3] = 0;
+		stencil_bits_array[0] = 0;
+		stencil_bits_array[1] = 0;
+		stencil_bits_array[2] = 8;
+		stencil_bits_array[3] = 8;
+	}
+
+	msaa_samples_array[0] = 0;
+
+	return (const __DRIconfig **)
+	   driCreateConfigs(deep ? GL_RGBA : GL_RGB,
+			    deep ? GL_UNSIGNED_INT_8_8_8_8 :
+				   GL_UNSIGNED_SHORT_5_6_5,
+			    depth_bits_array,
+			    stencil_bits_array,
+			    deep ? 2 : 4,
+			    db_modes, 2,
+			    msaa_samples_array, 1,
+			    GL_TRUE);
+}
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **
+tdfxInitScreen(__DRIscreen *psp)
+{
+   static const __DRIversion ddx_expected = { 1, 1, 0 };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 1, 0, 0 };
+
+   /* divined from tdfx_dri.c, sketchy */
+   TDFXDRIPtr dri_priv = (TDFXDRIPtr) psp->pDevPriv;
+
+   /* XXX i wish it was like this */
+   /* bpp = dri_priv->bpp */
+   int bpp = (dri_priv->cpp > 2) ? 24 : 16;
+
+   if ( ! driCheckDriDdxDrmVersions2( "tdfx",
+				      &psp->dri_version, & dri_expected,
+				      &psp->ddx_version, & ddx_expected,
+				      &psp->drm_version, & drm_expected ) )
+      return NULL;
+
+   if (!tdfxInitDriver(psp))
+      return NULL;
+      
+   return tdfxFillInModes(psp,
+			  bpp, (bpp == 16) ? 16 : 24,
+			  (bpp == 16) ? 0 : 8,
+			  (dri_priv->backOffset!=dri_priv->depthOffset));
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = tdfxInitScreen,
+   .DestroyScreen   = tdfxDestroyScreen,
+   .CreateContext   = tdfxCreateContext,
+   .DestroyContext  = tdfxDestroyContext,
+   .CreateBuffer    = tdfxCreateBuffer,
+   .DestroyBuffer   = tdfxDestroyBuffer,
+   .SwapBuffers     = tdfxSwapBuffers,
+   .MakeCurrent     = tdfxMakeCurrent,
+   .UnbindContext   = tdfxUnbindContext,
+   .GetSwapInfo     = NULL,
+   .GetDrawableMSC  = NULL,
+   .WaitForMSC      = NULL,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_screen.h b/src/mesa/drivers/dri/tdfx/tdfx_screen.h
new file mode 100644
index 0000000000..6aa42e8667
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_screen.h
@@ -0,0 +1,72 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_SCREEN_H__
+#define __TDFX_SCREEN_H__
+
+typedef struct {
+   drm_handle_t handle;
+   drmSize size;
+   drmAddress map;
+} tdfxRegion, *tdfxRegionPtr;
+
+typedef struct {
+   tdfxRegion regs;
+
+   int deviceID;
+   int width;
+   int height;
+   int mem;
+   int cpp;
+   int stride;
+
+   int fifoOffset;
+   int fifoSize;
+
+   int fbOffset;
+   int backOffset;
+   int depthOffset;
+   int textureOffset;
+   int textureSize;
+
+   __DRIscreen *driScrnPriv;
+   unsigned int sarea_priv_offset;
+
+   /* Configuration cache with default values for all contexts */
+   driOptionCache optionCache;
+} tdfxScreenPrivate;
+
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_span.c b/src/mesa/drivers/dri/tdfx/tdfx_span.c
new file mode 100644
index 0000000000..3879d506ee
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_span.c
@@ -0,0 +1,1386 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *	Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#include "tdfx_context.h"
+#include "tdfx_lock.h"
+#include "tdfx_span.h"
+#include "tdfx_render.h"
+#include "swrast/swrast.h"
+
+
+#define DBG 0
+
+
+#define LOCAL_VARS							\
+   driRenderbuffer *drb = (driRenderbuffer *) rb;			\
+   __DRIdrawable *const dPriv = drb->dPriv;			\
+   GLuint pitch = drb->backBuffer ? info.strideInBytes			\
+     : (drb->pitch * drb->cpp);						\
+   const GLuint bottom = dPriv->h - 1;					\
+   char *buf = (char *)((char *)info.lfbPtr +				\
+			 (dPriv->x * drb->cpp) +			\
+			 (dPriv->y * pitch));				\
+   GLuint p;								\
+   (void) buf; (void) p;
+
+
+#define Y_FLIP(_y)		(bottom - _y)
+
+
+#define HW_WRITE_LOCK()							\
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);				\
+   GrLfbInfo_t info;							\
+   FLUSH_BATCH( fxMesa );						\
+   UNLOCK_HARDWARE( fxMesa );						\
+   LOCK_HARDWARE( fxMesa );						\
+   info.size = sizeof(GrLfbInfo_t);					\
+   if (fxMesa->Glide.grLfbLock(GR_LFB_WRITE_ONLY, fxMesa->DrawBuffer,	\
+			       LFB_MODE, GR_ORIGIN_UPPER_LEFT, FXFALSE,	\
+			       &info)) {
+
+#define HW_WRITE_UNLOCK()						\
+      fxMesa->Glide.grLfbUnlock( GR_LFB_WRITE_ONLY, fxMesa->DrawBuffer );\
+   }
+
+
+#define HW_READ_LOCK()							\
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);				\
+   GrLfbInfo_t info;							\
+   FLUSH_BATCH( fxMesa );						\
+   UNLOCK_HARDWARE( fxMesa );						\
+   LOCK_HARDWARE( fxMesa );						\
+   info.size = sizeof(GrLfbInfo_t);					\
+   if ( fxMesa->Glide.grLfbLock( GR_LFB_READ_ONLY, fxMesa->ReadBuffer,	\
+                   LFB_MODE, GR_ORIGIN_UPPER_LEFT, FXFALSE, &info ) )	\
+   {
+
+#define HW_READ_UNLOCK()						\
+      fxMesa->Glide.grLfbUnlock( GR_LFB_READ_ONLY, fxMesa->ReadBuffer );\
+   }
+
+
+#define HW_WRITE_CLIPLOOP()						\
+      do {								\
+         int _nc = fxMesa->numClipRects;				\
+         while (_nc--) {						\
+            int minx = fxMesa->pClipRects[_nc].x1 - fxMesa->x_offset;	\
+	    int miny = fxMesa->pClipRects[_nc].y1 - fxMesa->y_offset;	\
+	    int maxx = fxMesa->pClipRects[_nc].x2 - fxMesa->x_offset;	\
+	    int maxy = fxMesa->pClipRects[_nc].y2 - fxMesa->y_offset;
+
+#define HW_READ_CLIPLOOP()						\
+      do {								\
+         const __DRIdrawable *dPriv = fxMesa->driDrawable;	\
+         drm_clip_rect_t *rect = dPriv->pClipRects;			\
+         int _nc = dPriv->numClipRects;					\
+         while (_nc--) {						\
+            const int minx = rect->x1 - fxMesa->x_offset;		\
+            const int miny = rect->y1 - fxMesa->y_offset;		\
+            const int maxx = rect->x2 - fxMesa->x_offset;		\
+            const int maxy = rect->y2 - fxMesa->y_offset;		\
+            rect++;
+
+#define HW_ENDCLIPLOOP()						\
+	 }								\
+      } while (0)
+
+
+
+#define LFB_MODE	GR_LFBWRITEMODE_565
+
+
+/* 16 bit, RGB565 color spanline and pixel functions */			\
+
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+  p = TDFXPACKCOLOR565( color[0], color[1], color[2] )
+
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)r & 0xf8) << 8) |	\
+					   (((int)g & 0xfc) << 3) |	\
+					   (((int)b & 0xf8) >> 3))
+
+#define WRITE_PIXEL( _x, _y, p )					\
+    *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+    do {								\
+	GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
+	rgba[0] = (((p >> 11) & 0x1f) * 255) / 31;			\
+	rgba[1] = (((p >>  5) & 0x3f) * 255) / 63;			\
+	rgba[2] = (((p >>  0) & 0x1f) * 255) / 31;			\
+	rgba[3] = 0xff;							\
+    } while (0)
+
+#define TAG(x) tdfx##x##_RGB565
+#define BYTESPERPIXEL 2
+#include "spantmp.h"
+#undef BYTESPERPIXEL
+
+
+/* 16 bit, BGR565 color spanline and pixel functions */			\
+#if 0
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)b & 0xf8) << 8) |	\
+					   (((int)g & 0xfc) << 3) |	\
+					   (((int)r & 0xf8) >> 3))
+
+#define WRITE_PIXEL( _x, _y, p )					\
+    *(GLushort *)(buf + _x*2 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+    do {								\
+	GLushort p = *(GLushort *)(buf + _x*2 + _y*pitch);		\
+	rgba[0] = (p << 3) & 0xf8;					\
+	rgba[1] = (p >> 3) & 0xfc;					\
+	rgba[2] = (p >> 8) & 0xf8;					\
+	rgba[3] = 0xff;							\
+    } while (0)
+
+#define TAG(x) tdfx##x##_BGR565
+#define BYTESPERPIXEL 2
+#include "spantmp.h"
+#undef BYTESPERPIXEL
+#endif
+
+
+#undef LFB_MODE
+#define LFB_MODE	GR_LFBWRITEMODE_888
+
+
+/* 24 bit, RGB888 color spanline and pixel functions */
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+  p = TDFXPACKCOLOR888( color[0], color[1], color[2] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLuint *)(buf + _x*3 + _y*pitch) = ((b << 0) |			\
+					 (g << 8) |			\
+					 (r << 16))
+
+#define WRITE_PIXEL( _x, _y, p )					\
+   *(GLuint *)(buf + _x*3 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+do {									\
+   GLuint p = *(GLuint *)(buf + _x*3 + _y*pitch);			\
+   rgba[0] = (p >> 16) & 0xff;						\
+   rgba[1] = (p >> 8)  & 0xff;						\
+   rgba[2] = (p >> 0)  & 0xff;						\
+   rgba[3] = 0xff;							\
+} while (0)
+
+#define TAG(x) tdfx##x##_RGB888
+#define BYTESPERPIXEL 4
+#include "spantmp.h"
+#undef BYTESPERPIXEL
+
+
+#undef LFB_MODE
+#define LFB_MODE	GR_LFBWRITEMODE_8888
+
+
+/* 32 bit, ARGB8888 color spanline and pixel functions */
+#undef INIT_MONO_PIXEL
+#define INIT_MONO_PIXEL(p, color) \
+  p = TDFXPACKCOLOR8888( color[0], color[1], color[2], color[3] )
+
+#define WRITE_RGBA( _x, _y, r, g, b, a )				\
+   *(GLuint *)(buf + _x*4 + _y*pitch) = ((b <<  0) |			\
+					 (g <<  8) |			\
+					 (r << 16) |			\
+					 (a << 24) )
+
+#define WRITE_PIXEL( _x, _y, p )					\
+   *(GLuint *)(buf + _x*4 + _y*pitch) = p
+
+#define READ_RGBA( rgba, _x, _y )					\
+do {									\
+   GLuint p = *(GLuint *)(buf + _x*4 + _y*pitch);			\
+   rgba[0] = (p >> 16) & 0xff;						\
+   rgba[1] = (p >>  8) & 0xff;						\
+   rgba[2] = (p >>  0) & 0xff;						\
+   rgba[3] = (p >> 24) & 0xff;						\
+} while (0)
+
+#define TAG(x) tdfx##x##_ARGB8888
+#define BYTESPERPIXEL 4
+#include "spantmp.h"
+#undef BYTESPERPIXEL
+
+
+
+/* ================================================================
+ * Old span functions below...
+ */
+
+
+/*
+ * Examine the cliprects to generate an array of flags to indicate
+ * which pixels in a span are visible.  Note: (x,y) is a screen
+ * coordinate.
+ */
+static void
+generate_vismask(const tdfxContextPtr fxMesa, GLint x, GLint y, GLint n,
+                 GLubyte vismask[])
+{
+   GLboolean initialized = GL_FALSE;
+   GLint i, j;
+
+   /* Ensure we clear the visual mask */
+   memset(vismask, 0, n);
+
+   /* turn on flags for all visible pixels */
+   for (i = 0; i < fxMesa->numClipRects; i++) {
+      const drm_clip_rect_t *rect = &fxMesa->pClipRects[i];
+
+      if (y >= rect->y1 && y < rect->y2) {
+	 if (x >= rect->x1 && x + n <= rect->x2) {
+	    /* common case, whole span inside cliprect */
+	    memset(vismask, 1, n);
+	    return;
+	 }
+	 if (x < rect->x2 && x + n >= rect->x1) {
+	    /* some of the span is inside the rect */
+	    GLint start, end;
+	    if (!initialized) {
+	       memset(vismask, 0, n);
+	       initialized = GL_TRUE;
+	    }
+	    if (x < rect->x1)
+	       start = rect->x1 - x;
+	    else
+	       start = 0;
+	    if (x + n > rect->x2)
+	       end = rect->x2 - x;
+	    else
+	       end = n;
+	    assert(start >= 0);
+	    assert(end <= n);
+	    for (j = start; j < end; j++)
+	       vismask[j] = 1;
+	 }
+      }
+   }
+}
+
+/*
+ * Examine cliprects and determine if the given screen pixel is visible.
+ */
+static GLboolean
+visible_pixel(const tdfxContextPtr fxMesa, int scrX, int scrY)
+{
+   int i;
+   for (i = 0; i < fxMesa->numClipRects; i++) {
+      const drm_clip_rect_t *rect = &fxMesa->pClipRects[i];
+      if (scrX >= rect->x1 &&
+	  scrX < rect->x2 &&
+	  scrY >= rect->y1 && scrY < rect->y2) return GL_TRUE;
+   }
+   return GL_FALSE;
+}
+
+
+
+/*
+ * Depth buffer read/write functions.
+ */
+/*
+ * To read the frame buffer, we need to lock and unlock it.  The
+ * four macros {READ,WRITE}_FB_SPAN_{LOCK,UNLOCK}
+ * do this for us.
+ *
+ * Note that the lock must be matched with an unlock.  These
+ * macros include a spare curly brace, so they must
+ * be syntactically matched.
+ *
+ * Note, also, that you can't lock a buffer twice with different
+ * modes.  That is to say, you can't lock a buffer in both read
+ * and write modes.  The strideInBytes and LFB pointer will be
+ * the same with read and write locks, so you can use either.
+ * o The HW has different state for reads and writes, so
+ *   locking it twice may give screwy results.
+ * o The DRM won't let you lock twice.  It hangs.  This is probably
+ *   because of the LOCK_HARDWARE IN THE *_FB_SPAN_LOCK macros,
+ *   and could be eliminated with nonlocking lock routines.  But
+ *   what's the point after all.
+ */
+#define READ_FB_SPAN_LOCK(fxMesa, info, target_buffer)              \
+  UNLOCK_HARDWARE(fxMesa);                                          \
+  LOCK_HARDWARE(fxMesa);                                            \
+  (info).size=sizeof(info);                                         \
+  if (fxMesa->Glide.grLfbLock(GR_LFB_READ_ONLY,                     \
+                target_buffer,                                      \
+                GR_LFBWRITEMODE_ANY,                                \
+                GR_ORIGIN_UPPER_LEFT,                               \
+                FXFALSE,                                            \
+                &(info))) {
+
+#define READ_FB_SPAN_UNLOCK(fxMesa, target_buffer)                  \
+    fxMesa->Glide.grLfbUnlock(GR_LFB_READ_ONLY, target_buffer);     \
+  } else {                                                          \
+    fprintf(stderr, "tdfxDriver: Can't get %s (%d) read lock\n",    \
+            (target_buffer == GR_BUFFER_BACKBUFFER)                 \
+                ? "back buffer"                                     \
+            : ((target_buffer == GR_BUFFER_AUXBUFFER)               \
+                ? "depth buffer"                                    \
+               : "unknown buffer"),                                 \
+            target_buffer);                                         \
+  }
+
+#define WRITE_FB_SPAN_LOCK(fxMesa, info, target_buffer, write_mode) \
+  UNLOCK_HARDWARE(fxMesa);                                          \
+  LOCK_HARDWARE(fxMesa);                                            \
+  info.size=sizeof(info);                                           \
+  if (fxMesa->Glide.grLfbLock(GR_LFB_WRITE_ONLY,                    \
+                target_buffer,                                      \
+                write_mode,                                         \
+                GR_ORIGIN_UPPER_LEFT,                               \
+                FXFALSE,                                            \
+                &info)) {
+
+#define WRITE_FB_SPAN_UNLOCK(fxMesa, target_buffer)                 \
+    fxMesa->Glide.grLfbUnlock(GR_LFB_WRITE_ONLY, target_buffer);    \
+  } else {                                                          \
+    fprintf(stderr, "tdfxDriver: Can't get %s (%d) write lock\n",   \
+            (target_buffer == GR_BUFFER_BACKBUFFER)                 \
+                ? "back buffer"                                     \
+            : ((target_buffer == GR_BUFFER_AUXBUFFER)               \
+                ? "depth buffer"                                    \
+               : "unknown buffer"),                                 \
+            target_buffer);                                         \
+  }
+
+/*
+ * Because the Linear Frame Buffer is not necessarily aligned
+ * with the depth buffer, we have to do some fiddling
+ * around to get the right addresses.
+ *
+ * Perhaps a picture is in order.  The Linear Frame Buffer
+ * looks like this:
+ *
+ *   |<----------------------info.strideInBytes------------->|
+ *   |<-----physicalStrideInBytes------->|
+ *   +-----------------------------------+xxxxxxxxxxxxxxxxxxx+
+ *   |                                   |                   |
+ *   |          Legal Memory             |  Forbidden Zone   |
+ *   |                                   |                   |
+ *   +-----------------------------------+xxxxxxxxxxxxxxxxxxx+
+ *
+ * You can only reliably read and write legal locations.  Reads
+ * and writes from the Forbidden Zone will return undefined values,
+ * and may cause segmentation faults.
+ *
+ * Now, the depth buffer may not end up in a location such each
+ * scan line is an LFB line.  For example, the depth buffer may
+ * look like this:
+ *
+ *    wrapped               ordinary.
+ *   +-----------------------------------+xxxxxxxxxxxxxxxxxxx+
+ *   |0000000000000000000000             |                   | back
+ *   |1111111111111111111111             |                   | buffer
+ *   |2222222222222222222222             |                   |
+ *   |4096b align. padxx00000000000000000|  Forbidden Zone   | depth
+ *   |0000              11111111111111111|                   | buffer
+ *   |1111              22222222222222222|                   |
+ *   |2222                               |                   |
+ *   +-----------------------------------+xxxxxxxxxxxxxxxxxxx+
+ * where each number is the scan line number.  We know it will
+ * be aligned on 128 byte boundaries, at least.  Aligning this
+ * on a scanline boundary causes the back and depth buffers to
+ * thrash in the SST1 cache.  (Note that the back buffer is always
+ * allocated at the beginning of LFB memory, and so it is always
+ * properly aligned with the LFB stride.)
+ *
+ * We call the beginning of the line (which is the rightmost
+ * part of the depth line in the picture above) the *ordinary* part
+ * of the scanline, and the end of the line (which is the
+ * leftmost part, one line below) the *wrapped* part of the scanline.
+ * a.) We need to know what x value to subtract from the screen
+ *     x coordinate to index into the wrapped part.
+ * b.) We also need to figure out if we need to read from the ordinary
+ *     part scan line, or from the wrapped part of the scan line.
+ *
+ * [ad a]
+ * The first wrapped x coordinate is that coordinate such that
+ *           depthBufferOffset&(info.strideInBytes) + x*elmentSize  {*}
+ *                            > physicalStrideInBytes
+ *     where depthBufferOffset is the LFB distance in bytes
+ *     from the back buffer to the depth buffer.  The expression
+ *           depthBufferOffset&(info.strideInBytes)
+ *     is then the offset (in bytes) from the beginining of (any)
+ *     depth buffer line to first element in the line.
+ * Simplifying inequation {*} above we see that x is the smallest
+ * value such that
+ *         x*elementSize > physicalStrideInBytes                      {**}
+ *                            - depthBufferOffset&(info.strideInBytes)
+ * Now, we know that both the summands on the right are multiples of
+ * 128, and elementSize <= 4, so if equality holds in {**}, x would
+ * be a multiple of 32.  Thus we can set x to
+ *         xwrapped = (physicalStrideInBytes
+ *                      - depthBufferOffset&(info.strideInBytes))/elementSize
+ *                      + 1
+ *
+ * [ad b]
+ * Question b is now simple.  We read from the wrapped scan line if
+ * x is greater than xwrapped.
+ */
+#define TILE_WIDTH_IN_BYTES		128
+#define TILE_WIDTH_IN_ZOXELS(bpz)	(TILE_WIDTH_IN_BYTES/(bpz))
+#define TILE_HEIGHT_IN_LINES		32
+typedef struct
+{
+   void *lfbPtr;
+   void *lfbWrapPtr;
+   FxU32 LFBStrideInElts;
+   GLint firstWrappedX;
+}
+LFBParameters;
+
+/*
+ * We need information about the back buffer.  Note that
+ * this function *cannot be called* while the aux buffer
+ * is locked, or the caller will hang.
+ *
+ * Only Glide knows the LFB address of the back and depth
+ * offsets.  The upper levels of Mesa know the depth offset,
+ * but that is not in LFB space, it is tiled memory space,
+ * and is not useable for us.
+ */
+static void
+GetBackBufferInfo(tdfxContextPtr fxMesa, GrLfbInfo_t * backBufferInfo)
+{
+   READ_FB_SPAN_LOCK(fxMesa, *backBufferInfo, GR_BUFFER_BACKBUFFER);
+   READ_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_BACKBUFFER);
+}
+
+static void
+GetFbParams(tdfxContextPtr fxMesa,
+            GrLfbInfo_t * info,
+            GrLfbInfo_t * backBufferInfo,
+            LFBParameters * ReadParamsp, FxU32 elementSize)
+{
+   FxU32 physicalStrideInBytes, bufferOffset;
+   FxU32 strideInBytes = info->strideInBytes;
+   char *lfbPtr = (char *) (info->lfbPtr); /* For arithmetic, use char * */
+
+   /*
+    * These two come directly from the info structure.
+    */
+   ReadParamsp->lfbPtr = (void *) lfbPtr;
+   ReadParamsp->LFBStrideInElts = strideInBytes / elementSize;
+   /*
+    * Now, calculate the value of firstWrappedX.
+    *
+    * The physical stride is the screen width in bytes rounded up to
+    * the next highest multiple of 128 bytes.  Note that this fails
+    * when TILE_WIDTH_IN_BYTES is not a power of two.
+    *
+    * The buffer Offset is the distance between the beginning of
+    * the LFB space, which is the beginning of the back buffer,
+    * and the buffer we are gathering information about.
+    * We want to make this routine usable for operations on the
+    * back buffer, though we don't actually use it on the back
+    * buffer.  Note, then, that if bufferOffset == 0, the firstWrappedX
+    * is in the forbidden zone, and is therefore never reached.
+    *
+    * Note that if
+    *     physicalStrideInBytes
+    *             < bufferOffset&(info->strideInBytes-1)
+    * the buffer begins in the forbidden zone.  We assert for this.
+    */
+   bufferOffset = (FxU32)(lfbPtr - (char *) backBufferInfo->lfbPtr);
+   physicalStrideInBytes
+      = (fxMesa->screen_width * elementSize + TILE_WIDTH_IN_BYTES - 1)
+      & ~(TILE_WIDTH_IN_BYTES - 1);
+   assert(physicalStrideInBytes > (bufferOffset & (strideInBytes - 1)));
+   ReadParamsp->firstWrappedX
+      = (physicalStrideInBytes
+	 - (bufferOffset & (strideInBytes - 1))) / elementSize;
+   /*
+    * This is the address of the next physical line.
+    */
+   ReadParamsp->lfbWrapPtr
+      = (void *) ((char *) backBufferInfo->lfbPtr
+		  + (bufferOffset & ~(strideInBytes - 1))
+		  + (TILE_HEIGHT_IN_LINES) * strideInBytes);
+}
+
+/*
+ * These macros fetch data from the frame buffer.  The type is
+ * the type of data we want to fetch.  It should match the type
+ * whose size was used with GetFbParams to fill in the structure
+ * in *ReadParamsp.  We have a macro to read the ordinary
+ * part, a second macro to read the wrapped part, and one which
+ * will do either.  When we are reading a span, we will know
+ * when the ordinary part ends, so there's no need to test for
+ * it.  However, when reading and writing pixels, we don't
+ * necessarily know.  I suppose it's a matter of taste whether
+ * it's better in the macro or in the call.
+ *
+ * Recall that x and y are screen coordinates.
+ */
+#define GET_ORDINARY_FB_DATA(ReadParamsp, type, x, y)               \
+    (((type *)((ReadParamsp)->lfbPtr))                              \
+                 [(y) * ((ReadParamsp)->LFBStrideInElts)            \
+                   + (x)])
+#define GET_WRAPPED_FB_DATA(ReadParamsp, type, x, y)                \
+    (((type *)((ReadParamsp)->lfbWrapPtr))                          \
+                 [((y)) * ((ReadParamsp)->LFBStrideInElts)          \
+                   + ((x) - (ReadParamsp)->firstWrappedX)])
+#define GET_FB_DATA(ReadParamsp, type, x, y)                        \
+   (((x) < (ReadParamsp)->firstWrappedX)                            \
+        ? GET_ORDINARY_FB_DATA(ReadParamsp, type, x, y)             \
+        : GET_WRAPPED_FB_DATA(ReadParamsp, type, x, y))
+#define PUT_ORDINARY_FB_DATA(ReadParamsp, type, x, y, value)              \
+    (GET_ORDINARY_FB_DATA(ReadParamsp, type, x, y) = (type)(value))
+#define PUT_WRAPPED_FB_DATA(ReadParamsp, type, x, y, value)                \
+    (GET_WRAPPED_FB_DATA(ReadParamsp, type, x, y) = (type)(value))
+#define PUT_FB_DATA(ReadParamsp, type, x, y, value)                 \
+    do {                                                            \
+        if ((x) < (ReadParamsp)->firstWrappedX)                     \
+            PUT_ORDINARY_FB_DATA(ReadParamsp, type, x, y, value);   \
+        else                                                        \
+            PUT_WRAPPED_FB_DATA(ReadParamsp, type, x, y, value);    \
+    } while (0)
+
+
+static void
+tdfxDDWriteDepthSpan(GLcontext * ctx, struct gl_renderbuffer *rb,
+		     GLuint n, GLint x, GLint y, const void *values,
+		     const GLubyte mask[])
+{
+   const GLuint *depth = (const GLuint *) values;
+   tdfxContextPtr fxMesa = (tdfxContextPtr) ctx->DriverCtx;
+   GLint bottom = fxMesa->y_offset + fxMesa->height - 1;
+   GLuint depth_size = fxMesa->glCtx->Visual.depthBits;
+   GLuint stencil_size = fxMesa->glCtx->Visual.stencilBits;
+   GrLfbInfo_t info;
+   GLubyte visMask[MAX_WIDTH];
+
+   if (MESA_VERBOSE & VERBOSE_DRIVER) {
+      fprintf(stderr, "tdfxmesa: tdfxDDWriteDepthSpan(...)\n");
+   }
+
+   assert((depth_size == 16) || (depth_size == 24) || (depth_size == 32));
+   /*
+    * Convert x and y to screen coordinates.
+    */
+   x += fxMesa->x_offset;
+   y = bottom - y;
+   if (mask) {
+      GLint i;
+      GLushort d16;
+      GrLfbInfo_t backBufferInfo;
+
+      switch (depth_size) {
+      case 16:
+	 GetBackBufferInfo(fxMesa, &backBufferInfo);
+	 /*
+	  * Note that the _LOCK macro adds a curly brace,
+	  * and the UNLOCK macro removes it.
+	  */
+	 WRITE_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER,
+			    GR_LFBWRITEMODE_ANY);
+	 generate_vismask(fxMesa, x, y, n, visMask);
+	 {
+	    LFBParameters ReadParams;
+	    int wrappedPartStart;
+	    GetFbParams(fxMesa, &info, &backBufferInfo,
+			&ReadParams, sizeof(GLushort));
+	    if (ReadParams.firstWrappedX <= x) {
+	       wrappedPartStart = 0;
+	    }
+	    else if (n <= (ReadParams.firstWrappedX - x)) {
+	       wrappedPartStart = n;
+	    }
+	    else {
+	       wrappedPartStart = (ReadParams.firstWrappedX - x);
+	    }
+	    for (i = 0; i < wrappedPartStart; i++) {
+	       if (mask[i] && visMask[i]) {
+		  d16 = depth[i];
+		  PUT_ORDINARY_FB_DATA(&ReadParams, GLushort, x + i, y, d16);
+	       }
+	    }
+	    for (; i < n; i++) {
+	       if (mask[i] && visMask[i]) {
+		  d16 = depth[i];
+		  PUT_WRAPPED_FB_DATA(&ReadParams, GLushort, x + i, y, d16);
+	       }
+	    }
+	 }
+	 WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+	 break;
+      case 24:
+      case 32:
+	 GetBackBufferInfo(fxMesa, &backBufferInfo);
+	 /*
+	  * Note that the _LOCK macro adds a curly brace,
+	  * and the UNLOCK macro removes it.
+	  */
+	 WRITE_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER,
+			    GR_LFBWRITEMODE_ANY);
+	 generate_vismask(fxMesa, x, y, n, visMask);
+	 {
+	    LFBParameters ReadParams;
+	    int wrappedPartStart;
+	    GetFbParams(fxMesa, &info, &backBufferInfo,
+			&ReadParams, sizeof(GLuint));
+	    if (ReadParams.firstWrappedX <= x) {
+	       wrappedPartStart = 0;
+	    }
+	    else if (n <= (ReadParams.firstWrappedX - x)) {
+	       wrappedPartStart = n;
+	    }
+	    else {
+	       wrappedPartStart = (ReadParams.firstWrappedX - x);
+	    }
+	    for (i = 0; i < wrappedPartStart; i++) {
+	       GLuint d32;
+	       if (mask[i] && visMask[i]) {
+		  if (stencil_size > 0) {
+		     d32 =
+			GET_ORDINARY_FB_DATA(&ReadParams, GLuint,
+					     x + i, y);
+		     d32 =
+			(d32 & 0xFF000000) | (depth[i] & 0x00FFFFFF);
+		  }
+		  else {
+		     d32 = depth[i];
+		  }
+		  PUT_ORDINARY_FB_DATA(&ReadParams, GLuint, x + i, y, d32);
+	       }
+	    }
+	    for (; i < n; i++) {
+	       GLuint d32;
+	       if (mask[i] && visMask[i]) {
+		  if (stencil_size > 0) {
+		     d32 =
+			GET_WRAPPED_FB_DATA(&ReadParams, GLuint,
+					    x + i, y);
+		     d32 =
+			(d32 & 0xFF000000) | (depth[i] & 0x00FFFFFF);
+		  }
+		  else {
+		     d32 = depth[i];
+		  }
+		  PUT_WRAPPED_FB_DATA(&ReadParams, GLuint, x + i, y, d32);
+	       }
+	    }
+	 }
+	 WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+	 break;
+      }
+   }
+   else {
+      GLint i;
+      GLuint d32;
+      GLushort d16;
+      GrLfbInfo_t backBufferInfo;
+
+      switch (depth_size) {
+      case 16:
+	 GetBackBufferInfo(fxMesa, &backBufferInfo);
+	 /*
+	  * Note that the _LOCK macro adds a curly brace,
+	  * and the UNLOCK macro removes it.
+	  */
+	 WRITE_FB_SPAN_LOCK(fxMesa, info,
+			    GR_BUFFER_AUXBUFFER, GR_LFBWRITEMODE_ANY);
+	 generate_vismask(fxMesa, x, y, n, visMask);
+	 {
+	    LFBParameters ReadParams;
+	    GLuint wrappedPartStart;
+	    GetFbParams(fxMesa, &info, &backBufferInfo,
+			&ReadParams, sizeof(GLushort));
+	    if (ReadParams.firstWrappedX <= x) {
+	       wrappedPartStart = 0;
+	    }
+	    else if (n <= (ReadParams.firstWrappedX - x)) {
+	       wrappedPartStart = n;
+	    }
+	    else {
+	       wrappedPartStart = (ReadParams.firstWrappedX - x);
+	    }
+	    for (i = 0; i < wrappedPartStart; i++) {
+	       if (visMask[i]) {
+		  d16 = depth[i];
+		  PUT_ORDINARY_FB_DATA(&ReadParams,
+				       GLushort,
+				       x + i, y,
+				       d16);
+	       }
+	    }
+	    for (; i < n; i++) {
+	       if (visMask[i]) {
+		  d16 = depth[i];
+		  PUT_WRAPPED_FB_DATA(&ReadParams,
+				      GLushort,
+				      x + i, y,
+				      d16);
+	       }
+	    }
+	 }
+	 WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+	 break;
+      case 24:
+      case 32:
+	 GetBackBufferInfo(fxMesa, &backBufferInfo);
+	 /*
+	  * Note that the _LOCK macro adds a curly brace,
+	  * and the UNLOCK macro removes it.
+	  */
+	 WRITE_FB_SPAN_LOCK(fxMesa, info,
+			    GR_BUFFER_AUXBUFFER, GR_LFBWRITEMODE_ANY);
+	 generate_vismask(fxMesa, x, y, n, visMask);
+	 {
+	    LFBParameters ReadParams;
+	    GLuint wrappedPartStart;
+
+	    GetFbParams(fxMesa, &info, &backBufferInfo,
+			&ReadParams, sizeof(GLuint));
+	    if (ReadParams.firstWrappedX <= x) {
+	       wrappedPartStart = 0;
+	    }
+	    else if (n <= (ReadParams.firstWrappedX - x)) {
+	       wrappedPartStart = n;
+	    }
+	    else {
+	       wrappedPartStart = (ReadParams.firstWrappedX - x);
+	    }
+	    for (i = 0; i < wrappedPartStart; i++) {
+	       if (visMask[i]) {
+		  if (stencil_size > 0) {
+		     d32 = GET_ORDINARY_FB_DATA(&ReadParams, GLuint, x + i, y);
+		     d32 =
+			(d32 & 0xFF000000) | (depth[i] & 0x00FFFFFF);
+		  }
+		  else {
+		     d32 = depth[i];
+		  }
+		  PUT_ORDINARY_FB_DATA(&ReadParams, GLuint, x + i, y, d32);
+	       }
+	    }
+	    for (; i < n; i++) {
+	       if (visMask[i]) {
+		  if (stencil_size > 0) {
+		     d32 = GET_WRAPPED_FB_DATA(&ReadParams, GLuint, x + i, y);
+		     d32 =
+			(d32 & 0xFF000000) | (depth[i] & 0x00FFFFFF);
+		  }
+		  else {
+		     d32 = depth[i];
+		  }
+		  PUT_WRAPPED_FB_DATA(&ReadParams, GLuint, x + i, y, d32);
+	       }
+	    }
+	 }
+	 WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+	 break;
+      }
+   }
+}
+
+static void
+tdfxDDWriteMonoDepthSpan(GLcontext * ctx, struct gl_renderbuffer *rb,
+                         GLuint n, GLint x, GLint y, const void *value,
+                         const GLubyte mask[])
+{
+   GLuint depthVal = *((GLuint *) value);
+   GLuint depths[MAX_WIDTH];
+   GLuint i;
+   for (i = 0; i < n; i++)
+      depths[i] = depthVal;
+   tdfxDDWriteDepthSpan(ctx, rb, n, x, y, depths, mask);
+}
+
+
+static void
+tdfxDDReadDepthSpan(GLcontext * ctx, struct gl_renderbuffer *rb,
+		    GLuint n, GLint x, GLint y, void *values)
+{
+   GLuint *depth = (GLuint *) values;
+   tdfxContextPtr fxMesa = (tdfxContextPtr) ctx->DriverCtx;
+   GLint bottom = fxMesa->height + fxMesa->y_offset - 1;
+   GLuint i;
+   GLuint depth_size = fxMesa->glCtx->Visual.depthBits;
+   GrLfbInfo_t info;
+
+   if (MESA_VERBOSE & VERBOSE_DRIVER) {
+      fprintf(stderr, "tdfxmesa: tdfxDDReadDepthSpan(...)\n");
+   }
+
+   /*
+    * Convert to screen coordinates.
+    */
+   x += fxMesa->x_offset;
+   y = bottom - y;
+   switch (depth_size) {
+   case 16:
+   {
+      LFBParameters ReadParams;
+      GrLfbInfo_t backBufferInfo;
+      int wrappedPartStart;
+      GetBackBufferInfo(fxMesa, &backBufferInfo);
+      /*
+       * Note that the _LOCK macro adds a curly brace,
+       * and the UNLOCK macro removes it.
+       */
+      READ_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER);
+      GetFbParams(fxMesa, &info, &backBufferInfo,
+		  &ReadParams, sizeof(GLushort));
+      if (ReadParams.firstWrappedX <= x) {
+	 wrappedPartStart = 0;
+      }
+      else if (n <= (ReadParams.firstWrappedX - x)) {
+	 wrappedPartStart = n;
+      }
+      else {
+	 wrappedPartStart = (ReadParams.firstWrappedX - x);
+      }
+      /*
+       * Read the line.
+       */
+      for (i = 0; i < wrappedPartStart; i++) {
+	 depth[i] =
+	    GET_ORDINARY_FB_DATA(&ReadParams, GLushort, x + i, y);
+      }
+      for (; i < n; i++) {
+	 depth[i] = GET_WRAPPED_FB_DATA(&ReadParams, GLushort,
+					x + i, y);
+      }
+      READ_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+      break;
+   }
+   case 24:
+   case 32:
+   {
+      LFBParameters ReadParams;
+      GrLfbInfo_t backBufferInfo;
+      int wrappedPartStart;
+      GLuint stencil_size = fxMesa->glCtx->Visual.stencilBits;
+      GetBackBufferInfo(fxMesa, &backBufferInfo);
+      /*
+       * Note that the _LOCK macro adds a curly brace,
+       * and the UNLOCK macro removes it.
+       */
+      READ_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER);
+      GetFbParams(fxMesa, &info, &backBufferInfo,
+		  &ReadParams, sizeof(GLuint));
+      if (ReadParams.firstWrappedX <= x) {
+	 wrappedPartStart = 0;
+      }
+      else if (n <= (ReadParams.firstWrappedX - x)) {
+	 wrappedPartStart = n;
+      }
+      else {
+	 wrappedPartStart = (ReadParams.firstWrappedX - x);
+      }
+      /*
+       * Read the line.
+       */
+      for (i = 0; i < wrappedPartStart; i++) {
+	 const GLuint mask =
+	    (stencil_size > 0) ? 0x00FFFFFF : 0xFFFFFFFF;
+	 depth[i] =
+	    GET_ORDINARY_FB_DATA(&ReadParams, GLuint, x + i, y);
+	 depth[i] &= mask;
+      }
+      for (; i < n; i++) {
+	 const GLuint mask =
+	    (stencil_size > 0) ? 0x00FFFFFF : 0xFFFFFFFF;
+	 depth[i] = GET_WRAPPED_FB_DATA(&ReadParams, GLuint, x + i, y);
+	 depth[i] &= mask;
+      }
+      READ_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+      break;
+   }
+   }
+}
+
+
+static void
+tdfxDDWriteDepthPixels(GLcontext * ctx, struct gl_renderbuffer *rb,
+		       GLuint n, const GLint x[], const GLint y[],
+		       const void *values, const GLubyte mask[])
+{
+   const GLuint *depth = (const GLuint *) values;
+   tdfxContextPtr fxMesa = (tdfxContextPtr) ctx->DriverCtx;
+   GLint bottom = fxMesa->height + fxMesa->y_offset - 1;
+   GLuint i;
+   GLushort d16;
+   GLuint d32;
+   GLuint depth_size = fxMesa->glCtx->Visual.depthBits;
+   GLuint stencil_size = fxMesa->glCtx->Visual.stencilBits;
+   GrLfbInfo_t info;
+   int xpos;
+   int ypos;
+   GrLfbInfo_t backBufferInfo;
+
+   if (MESA_VERBOSE & VERBOSE_DRIVER) {
+      fprintf(stderr, "tdfxmesa: tdfxDDWriteDepthPixels(...)\n");
+   }
+
+   switch (depth_size) {
+   case 16:
+      GetBackBufferInfo(fxMesa, &backBufferInfo);
+      /*
+       * Note that the _LOCK macro adds a curly brace,
+       * and the UNLOCK macro removes it.
+       */
+      WRITE_FB_SPAN_LOCK(fxMesa, info,
+			 GR_BUFFER_AUXBUFFER, GR_LFBWRITEMODE_ANY);
+      {
+	 LFBParameters ReadParams;
+	 GetFbParams(fxMesa, &info, &backBufferInfo,
+		     &ReadParams, sizeof(GLushort));
+	 for (i = 0; i < n; i++) {
+	    if ((!mask || mask[i]) && visible_pixel(fxMesa, x[i], y[i])) {
+	       xpos = x[i] + fxMesa->x_offset;
+	       ypos = bottom - y[i];
+	       d16 = depth[i];
+	       PUT_FB_DATA(&ReadParams, GLushort, xpos, ypos, d16);
+	    }
+	 }
+      }
+      WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+      break;
+   case 24:
+   case 32:
+      GetBackBufferInfo(fxMesa, &backBufferInfo);
+      /*
+       * Note that the _LOCK macro adds a curly brace,
+       * and the UNLOCK macro removes it.
+       */
+      WRITE_FB_SPAN_LOCK(fxMesa, info,
+			 GR_BUFFER_AUXBUFFER, GR_LFBWRITEMODE_ANY);
+      {
+	 LFBParameters ReadParams;
+	 GetFbParams(fxMesa, &info, &backBufferInfo,
+		     &ReadParams, sizeof(GLuint));
+	 for (i = 0; i < n; i++) {
+	    if (!mask || mask[i]) {
+	       if (visible_pixel(fxMesa, x[i], y[i])) {
+		  xpos = x[i] + fxMesa->x_offset;
+		  ypos = bottom - y[i];
+		  if (stencil_size > 0) {
+		     d32 =
+			GET_FB_DATA(&ReadParams, GLuint, xpos, ypos);
+		     d32 = (d32 & 0xFF000000) | (depth[i] & 0xFFFFFF);
+		  }
+		  else {
+		     d32 = depth[i];
+		  }
+		  PUT_FB_DATA(&ReadParams, GLuint, xpos, ypos, d32);
+	       }
+	    }
+	 }
+      }
+      WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+      break;
+   }
+}
+
+
+static void
+tdfxDDReadDepthPixels(GLcontext * ctx, struct gl_renderbuffer *rb, GLuint n,
+		      const GLint x[], const GLint y[], void *values)
+{
+   GLuint *depth = (GLuint *) values;
+   tdfxContextPtr fxMesa = (tdfxContextPtr) ctx->DriverCtx;
+   GLint bottom = fxMesa->height + fxMesa->y_offset - 1;
+   GLuint i;
+   GLuint depth_size = fxMesa->glCtx->Visual.depthBits;
+   GLushort d16;
+   int xpos;
+   int ypos;
+   GrLfbInfo_t info;
+   GLuint stencil_size;
+   GrLfbInfo_t backBufferInfo;
+
+   if (MESA_VERBOSE & VERBOSE_DRIVER) {
+      fprintf(stderr, "tdfxmesa: tdfxDDReadDepthPixels(...)\n");
+   }
+
+   assert((depth_size == 16) || (depth_size == 24) || (depth_size == 32));
+   switch (depth_size) {
+   case 16:
+      GetBackBufferInfo(fxMesa, &backBufferInfo);
+      /*
+       * Note that the _LOCK macro adds a curly brace,
+       * and the UNLOCK macro removes it.
+       */
+      READ_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER);
+      {
+	 LFBParameters ReadParams;
+	 GetFbParams(fxMesa, &info, &backBufferInfo,
+		     &ReadParams, sizeof(GLushort));
+	 for (i = 0; i < n; i++) {
+	    /*
+	     * Convert to screen coordinates.
+	     */
+	    xpos = x[i] + fxMesa->x_offset;
+	    ypos = bottom - y[i];
+	    d16 = GET_FB_DATA(&ReadParams, GLushort, xpos, ypos);
+	    depth[i] = d16;
+	 }
+      }
+      READ_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+      break;
+   case 24:
+   case 32:
+      GetBackBufferInfo(fxMesa, &backBufferInfo);
+      /*
+       * Note that the _LOCK macro adds a curly brace,
+       * and the UNLOCK macro removes it.
+       */
+      READ_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER);
+      stencil_size = fxMesa->glCtx->Visual.stencilBits;
+      {
+	 LFBParameters ReadParams;
+	 GetFbParams(fxMesa, &info, &backBufferInfo,
+		     &ReadParams, sizeof(GLuint));
+	 for (i = 0; i < n; i++) {
+	    GLuint d32;
+
+	    /*
+	     * Convert to screen coordinates.
+	     */
+	    xpos = x[i] + fxMesa->x_offset;
+	    ypos = bottom - y[i];
+	    d32 = GET_FB_DATA(&ReadParams, GLuint, xpos, ypos);
+	    if (stencil_size > 0) {
+	       d32 &= 0x00FFFFFF;
+	    }
+	    depth[i] = d32;
+	 }
+      }
+      READ_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+      break;
+   default:
+      assert(0);
+   }
+}
+
+/*
+ * Stencil buffer read/write functions.
+ */
+#define EXTRACT_S_FROM_ZS(zs) (((zs) >> 24) & 0xFF)
+#define EXTRACT_Z_FROM_ZS(zs) ((zs) & 0xffffff)
+#define BUILD_ZS(z, s)  (((s) << 24) | (z))
+
+static void
+write_stencil_span(GLcontext * ctx, struct gl_renderbuffer *rb,
+                   GLuint n, GLint x, GLint y,
+                   const void *values, const GLubyte mask[])
+{
+   const GLubyte *stencil = (const GLubyte *) values;
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrLfbInfo_t info;
+   GrLfbInfo_t backBufferInfo;
+
+   GetBackBufferInfo(fxMesa, &backBufferInfo);
+   /*
+    * Note that the _LOCK macro adds a curly brace,
+    * and the UNLOCK macro removes it.
+    */
+   WRITE_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER, GR_LFBWRITEMODE_ANY);
+   {
+      const GLint winY = fxMesa->y_offset + fxMesa->height - 1;
+      const GLint winX = fxMesa->x_offset;
+      const GLint scrX = winX + x;
+      const GLint scrY = winY - y;
+      LFBParameters ReadParams;
+      GLubyte visMask[MAX_WIDTH];
+      GLuint i;
+      int wrappedPartStart;
+
+      GetFbParams(fxMesa, &info, &backBufferInfo, &ReadParams,
+		  sizeof(GLuint));
+      if (ReadParams.firstWrappedX <= x) {
+	 wrappedPartStart = 0;
+      }
+      else if (n <= (ReadParams.firstWrappedX - x)) {
+	 wrappedPartStart = n;
+      }
+      else {
+	 wrappedPartStart = (ReadParams.firstWrappedX - x);
+      }
+      generate_vismask(fxMesa, scrX, scrY, n, visMask);
+      for (i = 0; i < wrappedPartStart; i++) {
+	 if (visMask[i] && (!mask || mask[i])) {
+	    GLuint z = GET_ORDINARY_FB_DATA(&ReadParams, GLuint,
+					    scrX + i, scrY) & 0x00FFFFFF;
+	    z |= (stencil[i] & 0xFF) << 24;
+	    PUT_ORDINARY_FB_DATA(&ReadParams, GLuint, scrX + i, scrY, z);
+	 }
+      }
+      for (; i < n; i++) {
+	 if (visMask[i] && (!mask || mask[i])) {
+	    GLuint z = GET_WRAPPED_FB_DATA(&ReadParams, GLuint,
+					   scrX + i, scrY) & 0x00FFFFFF;
+	    z |= (stencil[i] & 0xFF) << 24;
+	    PUT_WRAPPED_FB_DATA(&ReadParams, GLuint, scrX + i, scrY, z);
+	 }
+      }
+   }
+   WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+}
+
+
+static void
+write_mono_stencil_span(GLcontext * ctx, struct gl_renderbuffer *rb,
+                        GLuint n, GLint x, GLint y,
+                        const void *value, const GLubyte mask[])
+{
+   GLbyte stencilVal = *((GLbyte *) value);
+   GLbyte stencils[MAX_WIDTH];
+   GLuint i;
+   for (i = 0; i < n; i++)
+      stencils[i] = stencilVal;
+   write_stencil_span(ctx, rb, n, x, y, stencils, mask);
+}
+
+
+static void
+read_stencil_span(GLcontext * ctx, struct gl_renderbuffer *rb,
+                  GLuint n, GLint x, GLint y,
+                  void *values)
+{
+   GLubyte *stencil = (GLubyte *) values;
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrLfbInfo_t info;
+   GrLfbInfo_t backBufferInfo;
+
+   GetBackBufferInfo(fxMesa, &backBufferInfo);
+   /*
+    * Note that the _LOCK macro adds a curly brace,
+    * and the UNLOCK macro removes it.
+    */
+   READ_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER);
+   {
+      const GLint winY = fxMesa->y_offset + fxMesa->height - 1;
+      const GLint winX = fxMesa->x_offset;
+      GLuint i;
+      LFBParameters ReadParams;
+      int wrappedPartStart;
+
+      /*
+       * Convert to screen coordinates.
+       */
+      x += winX;
+      y = winY - y;
+      GetFbParams(fxMesa, &info, &backBufferInfo, &ReadParams,
+		  sizeof(GLuint));
+      if (ReadParams.firstWrappedX <= x) {
+	 wrappedPartStart = 0;
+      }
+      else if (n <= (ReadParams.firstWrappedX - x)) {
+	 wrappedPartStart = n;
+      }
+      else {
+	 wrappedPartStart = (ReadParams.firstWrappedX - x);
+      }
+      for (i = 0; i < wrappedPartStart; i++) {
+	 stencil[i] = (GET_ORDINARY_FB_DATA(&ReadParams, GLuint,
+					    x + i, y) >> 24) & 0xFF;
+      }
+      for (; i < n; i++) {
+	 stencil[i] = (GET_WRAPPED_FB_DATA(&ReadParams, GLuint,
+					   x + i, y) >> 24) & 0xFF;
+      }
+   }
+   READ_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+}
+
+
+static void
+write_stencil_pixels(GLcontext * ctx, struct gl_renderbuffer *rb,
+                     GLuint n, const GLint x[], const GLint y[],
+                     const void *values, const GLubyte mask[])
+{
+   const GLubyte *stencil = (const GLubyte *) values;
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrLfbInfo_t info;
+   GrLfbInfo_t backBufferInfo;
+
+   GetBackBufferInfo(fxMesa, &backBufferInfo);
+   /*
+    * Note that the _LOCK macro adds a curly brace,
+    * and the UNLOCK macro removes it.
+    */
+   WRITE_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER, GR_LFBWRITEMODE_ANY);
+   {
+      const GLint winY = fxMesa->y_offset + fxMesa->height - 1;
+      const GLint winX = fxMesa->x_offset;
+      LFBParameters ReadParams;
+      GLuint i;
+
+      GetFbParams(fxMesa, &info, &backBufferInfo, &ReadParams,
+		  sizeof(GLuint));
+      for (i = 0; i < n; i++) {
+	 const GLint scrX = winX + x[i];
+	 const GLint scrY = winY - y[i];
+	 if ((!mask || mask[i]) && visible_pixel(fxMesa, scrX, scrY)) {
+	    GLuint z =
+	       GET_FB_DATA(&ReadParams, GLuint, scrX, scrY) & 0x00FFFFFF;
+	    z |= (stencil[i] & 0xFF) << 24;
+	    PUT_FB_DATA(&ReadParams, GLuint, scrX, scrY, z);
+	 }
+      }
+   }
+   WRITE_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+}
+
+
+static void
+read_stencil_pixels(GLcontext * ctx, struct gl_renderbuffer *rb,
+                    GLuint n, const GLint x[], const GLint y[],
+                    void *values)
+{
+   GLubyte *stencil = (GLubyte *) values;
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrLfbInfo_t info;
+   GrLfbInfo_t backBufferInfo;
+
+   GetBackBufferInfo(fxMesa, &backBufferInfo);
+   /*
+    * Note that the _LOCK macro adds a curly brace,
+    * and the UNLOCK macro removes it.
+    */
+   READ_FB_SPAN_LOCK(fxMesa, info, GR_BUFFER_AUXBUFFER);
+   {
+      const GLint winY = fxMesa->y_offset + fxMesa->height - 1;
+      const GLint winX = fxMesa->x_offset;
+      GLuint i;
+      LFBParameters ReadParams;
+
+      GetFbParams(fxMesa, &info, &backBufferInfo, &ReadParams,
+		  sizeof(GLuint));
+      for (i = 0; i < n; i++) {
+	 const GLint scrX = winX + x[i];
+	 const GLint scrY = winY - y[i];
+	 stencil[i] =
+	    (GET_FB_DATA(&ReadParams, GLuint, scrX, scrY) >> 24) & 0xFF;
+      }
+   }
+   READ_FB_SPAN_UNLOCK(fxMesa, GR_BUFFER_AUXBUFFER);
+}
+
+#define VISUAL_EQUALS_RGBA(vis, r, g, b, a)        \
+   ((vis.redBits == r) &&                         \
+    (vis.greenBits == g) &&                       \
+    (vis.blueBits == b) &&                        \
+    (vis.alphaBits == a))
+
+
+
+
+/**********************************************************************/
+/*                    Locking for swrast                              */
+/**********************************************************************/
+
+
+static void tdfxSpanRenderStart( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   LOCK_HARDWARE(fxMesa);
+}
+
+static void tdfxSpanRenderFinish( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE(fxMesa);
+}
+
+/**********************************************************************/
+/*                    Initialize swrast device driver                 */
+/**********************************************************************/
+
+void tdfxDDInitSpanFuncs( GLcontext *ctx )
+{
+   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference( ctx );
+   swdd->SpanRenderStart          = tdfxSpanRenderStart;
+   swdd->SpanRenderFinish         = tdfxSpanRenderFinish; 
+}
+
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+tdfxSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
+{
+   if (drb->Base.InternalFormat == GL_RGBA) {
+      if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
+         tdfxInitPointers_RGB565(&drb->Base);
+      }
+      else if (vis->redBits == 8 && vis->greenBits == 8
+               && vis->blueBits == 8 && vis->alphaBits == 0) {
+         tdfxInitPointers_RGB888(&drb->Base);
+      }
+      else if (vis->redBits == 8 && vis->greenBits == 8
+               && vis->blueBits == 8 && vis->alphaBits == 8) {
+         tdfxInitPointers_ARGB8888(&drb->Base);
+      }
+      else {
+         _mesa_problem(NULL, "problem in tdfxSetSpanFunctions");
+      }
+   }
+   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16 ||
+            drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
+      drb->Base.GetRow        = tdfxDDReadDepthSpan;
+      drb->Base.GetValues     = tdfxDDReadDepthPixels;
+      drb->Base.PutRow        = tdfxDDWriteDepthSpan;
+      drb->Base.PutMonoRow    = tdfxDDWriteMonoDepthSpan;
+      drb->Base.PutValues     = tdfxDDWriteDepthPixels;
+      drb->Base.PutMonoValues = NULL;
+   }
+   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
+      drb->Base.GetRow        = read_stencil_span;
+      drb->Base.GetValues     = read_stencil_pixels;
+      drb->Base.PutRow        = write_stencil_span;
+      drb->Base.PutMonoRow    = write_mono_stencil_span;
+      drb->Base.PutValues     = write_stencil_pixels;
+      drb->Base.PutMonoValues = NULL;
+   }
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_span.h b/src/mesa/drivers/dri/tdfx/tdfx_span.h
new file mode 100644
index 0000000000..6973f8d140
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_span.h
@@ -0,0 +1,48 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_SPAN_H__
+#define __TDFX_SPAN_H__
+
+#include "main/context.h"
+#include "drirenderbuffer.h"
+
+extern void tdfxDDInitSpanFuncs( GLcontext *ctx );
+
+extern void
+tdfxSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis);
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_state.c b/src/mesa/drivers/dri/tdfx/tdfx_state.c
new file mode 100644
index 0000000000..dcbc7647f2
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_state.c
@@ -0,0 +1,1429 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * New fixes:
+ *	Daniel Borca <dborca@users.sourceforge.net>, 19 Jul 2004
+ *
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *      Keith Whitwell <keith@tungstengraphics.com> (port to 3.5)
+ *
+ */
+
+#include "main/mtypes.h"
+#include "main/colormac.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "tdfx_context.h"
+#include "tdfx_state.h"
+#include "tdfx_tex.h"
+#include "tdfx_texman.h"
+#include "tdfx_texstate.h"
+#include "tdfx_render.h"
+
+
+
+/* =============================================================
+ * Alpha blending
+ */
+
+static void tdfxUpdateAlphaMode( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrCmpFnc_t func;
+   GrAlphaBlendFnc_t srcRGB, dstRGB, srcA, dstA;
+   GrAlphaBlendOp_t eqRGB, eqA;
+   GrAlpha_t ref = (GLint) (ctx->Color.AlphaRef * 255.0);
+   
+   GLboolean isNapalm = TDFX_IS_NAPALM(fxMesa);
+   GLboolean have32bpp = (ctx->Visual.greenBits == 8);
+   GLboolean haveAlpha = fxMesa->haveHwAlpha;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Color.AlphaEnabled ) {
+      func = ctx->Color.AlphaFunc - GL_NEVER + GR_CMP_NEVER;
+   } else {
+      func = GR_CMP_ALWAYS;
+   }
+
+   if ( ctx->Color.BlendEnabled
+        && (fxMesa->Fallback & TDFX_FALLBACK_BLEND) == 0 ) {
+      switch ( ctx->Color.BlendSrcRGB ) {
+      case GL_ZERO:
+	 srcRGB = GR_BLEND_ZERO;
+	 break;
+      case GL_ONE:
+	 srcRGB = GR_BLEND_ONE;
+	 break;
+      case GL_DST_COLOR:
+	 srcRGB = GR_BLEND_DST_COLOR;
+	 break;
+      case GL_ONE_MINUS_DST_COLOR:
+	 srcRGB = GR_BLEND_ONE_MINUS_DST_COLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 srcRGB = GR_BLEND_SRC_ALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 srcRGB = GR_BLEND_ONE_MINUS_SRC_ALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 srcRGB = haveAlpha ? GR_BLEND_DST_ALPHA : GR_BLEND_ONE/*JJJ*/;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 srcRGB = haveAlpha ? GR_BLEND_ONE_MINUS_DST_ALPHA : GR_BLEND_ZERO/*JJJ*/;
+	 break;
+      case GL_SRC_ALPHA_SATURATE:
+	 srcRGB = GR_BLEND_ALPHA_SATURATE;
+	 break;
+      case GL_SRC_COLOR:
+         if (isNapalm) {
+	    srcRGB = GR_BLEND_SAME_COLOR_EXT;
+	    break;
+         }
+      case GL_ONE_MINUS_SRC_COLOR:
+         if (isNapalm) {
+	    srcRGB = GR_BLEND_ONE_MINUS_SAME_COLOR_EXT;
+	    break;
+         }
+      default:
+	 srcRGB = GR_BLEND_ONE;
+      }
+
+      switch ( ctx->Color.BlendSrcA ) {
+      case GL_ZERO:
+	 srcA = GR_BLEND_ZERO;
+	 break;
+      case GL_ONE:
+	 srcA = GR_BLEND_ONE;
+	 break;
+      case GL_SRC_COLOR:
+      case GL_SRC_ALPHA:
+	 srcA = have32bpp ? GR_BLEND_SRC_ALPHA : GR_BLEND_ONE/*JJJ*/;
+	 break;
+      case GL_ONE_MINUS_SRC_COLOR:
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 srcA = have32bpp ? GR_BLEND_ONE_MINUS_SRC_ALPHA : GR_BLEND_ONE/*JJJ*/;
+	 break;
+      case GL_DST_COLOR:
+      case GL_DST_ALPHA:
+	 srcA = (have32bpp && haveAlpha) ? GR_BLEND_DST_ALPHA : GR_BLEND_ONE/*JJJ*/;
+	 break;
+      case GL_ONE_MINUS_DST_COLOR:
+      case GL_ONE_MINUS_DST_ALPHA:
+	 srcA = (have32bpp && haveAlpha) ? GR_BLEND_ONE_MINUS_DST_ALPHA : GR_BLEND_ZERO/*JJJ*/;
+	 break;
+      case GL_SRC_ALPHA_SATURATE:
+         srcA = GR_BLEND_ONE;
+	 break;
+      default:
+	 srcA = GR_BLEND_ONE;
+      }
+
+      switch ( ctx->Color.BlendDstRGB ) {
+      case GL_ZERO:
+	 dstRGB = GR_BLEND_ZERO;
+	 break;
+      case GL_ONE:
+	 dstRGB = GR_BLEND_ONE;
+	 break;
+      case GL_SRC_COLOR:
+	 dstRGB = GR_BLEND_SRC_COLOR;
+	 break;
+      case GL_ONE_MINUS_SRC_COLOR:
+	 dstRGB = GR_BLEND_ONE_MINUS_SRC_COLOR;
+	 break;
+      case GL_SRC_ALPHA:
+	 dstRGB = GR_BLEND_SRC_ALPHA;
+	 break;
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 dstRGB = GR_BLEND_ONE_MINUS_SRC_ALPHA;
+	 break;
+      case GL_DST_ALPHA:
+	 dstRGB = haveAlpha ? GR_BLEND_DST_ALPHA : GR_BLEND_ONE/*JJJ*/;
+	 break;
+      case GL_ONE_MINUS_DST_ALPHA:
+	 dstRGB = haveAlpha ? GR_BLEND_ONE_MINUS_DST_ALPHA : GR_BLEND_ZERO/*JJJ*/;
+	 break;
+      case GL_DST_COLOR:
+         if (isNapalm) {
+	    dstRGB = GR_BLEND_SAME_COLOR_EXT;
+	    break;
+         }
+      case GL_ONE_MINUS_DST_COLOR:
+         if (isNapalm) {
+	    dstRGB = GR_BLEND_ONE_MINUS_SAME_COLOR_EXT;
+	    break;
+         }
+      default:
+	 dstRGB = GR_BLEND_ZERO;
+      }
+
+      switch ( ctx->Color.BlendDstA ) {
+      case GL_ZERO:
+	 dstA = GR_BLEND_ZERO;
+	 break;
+      case GL_ONE:
+	 dstA = GR_BLEND_ONE;
+	 break;
+      case GL_SRC_COLOR:
+      case GL_SRC_ALPHA:
+	 dstA = have32bpp ? GR_BLEND_SRC_ALPHA : GR_BLEND_ZERO/*JJJ*/;
+	 break;
+      case GL_ONE_MINUS_SRC_COLOR:
+      case GL_ONE_MINUS_SRC_ALPHA:
+	 dstA = have32bpp ? GR_BLEND_ONE_MINUS_SRC_ALPHA : GR_BLEND_ZERO/*JJJ*/;
+	 break;
+      case GL_DST_COLOR:
+      case GL_DST_ALPHA:
+	 dstA = have32bpp ? GR_BLEND_DST_ALPHA : GR_BLEND_ONE/*JJJ*/;
+	 break;
+      case GL_ONE_MINUS_DST_COLOR:
+      case GL_ONE_MINUS_DST_ALPHA:
+	 dstA = have32bpp ? GR_BLEND_ONE_MINUS_DST_ALPHA : GR_BLEND_ZERO/*JJJ*/;
+	 break;
+      default:
+	 dstA = GR_BLEND_ZERO;
+      }
+
+      switch ( ctx->Color.BlendEquationRGB ) {
+      case GL_FUNC_SUBTRACT:
+	 eqRGB = GR_BLEND_OP_SUB;
+	 break;
+      case GL_FUNC_REVERSE_SUBTRACT:
+	 eqRGB = GR_BLEND_OP_REVSUB;
+	 break;
+      case GL_FUNC_ADD:
+      default:
+	 eqRGB = GR_BLEND_OP_ADD;
+	 break;
+      }
+
+      switch ( ctx->Color.BlendEquationA ) {
+      case GL_FUNC_SUBTRACT:
+	 eqA = GR_BLEND_OP_SUB;
+	 break;
+      case GL_FUNC_REVERSE_SUBTRACT:
+	 eqA = GR_BLEND_OP_REVSUB;
+	 break;
+      case GL_FUNC_ADD:
+      default:
+	 eqA = GR_BLEND_OP_ADD;
+	 break;
+      }
+   } else {
+      /* blend disabled */
+      srcRGB = GR_BLEND_ONE;
+      dstRGB = GR_BLEND_ZERO;
+      eqRGB = GR_BLEND_OP_ADD;
+      srcA = GR_BLEND_ONE;
+      dstA = GR_BLEND_ZERO;
+      eqA = GR_BLEND_OP_ADD;
+   }
+
+   if ( fxMesa->Color.AlphaFunc != func ) {
+      fxMesa->Color.AlphaFunc = func;
+      fxMesa->dirty |= TDFX_UPLOAD_ALPHA_TEST;
+   }
+   if ( fxMesa->Color.AlphaRef != ref ) {
+      fxMesa->Color.AlphaRef = ref;
+      fxMesa->dirty |= TDFX_UPLOAD_ALPHA_REF;
+   }
+
+   if ( fxMesa->Color.BlendSrcRGB != srcRGB ||
+	fxMesa->Color.BlendDstRGB != dstRGB ||
+	fxMesa->Color.BlendEqRGB != eqRGB ||
+	fxMesa->Color.BlendSrcA != srcA ||
+	fxMesa->Color.BlendDstA != dstA ||
+	fxMesa->Color.BlendEqA != eqA )
+   {
+      fxMesa->Color.BlendSrcRGB = srcRGB;
+      fxMesa->Color.BlendDstRGB = dstRGB;
+      fxMesa->Color.BlendEqRGB = eqRGB;
+      fxMesa->Color.BlendSrcA = srcA;
+      fxMesa->Color.BlendDstA = dstA;
+      fxMesa->Color.BlendEqA = eqA;
+      fxMesa->dirty |= TDFX_UPLOAD_BLEND_FUNC;
+   }
+}
+
+static void tdfxDDAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_ALPHA;
+}
+
+static void tdfxDDBlendEquationSeparate( GLcontext *ctx, 
+					 GLenum modeRGB, GLenum modeA )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   assert( modeRGB == modeA );
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_ALPHA;
+}
+
+static void tdfxDDBlendFuncSeparate( GLcontext *ctx,
+				     GLenum sfactorRGB, GLenum dfactorRGB,
+				     GLenum sfactorA, GLenum dfactorA )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_ALPHA;
+
+   /*
+    * XXX - Voodoo5 seems to suffer from precision problems in some
+    * blend modes.  To pass all the conformance tests we'd have to
+    * fall back to software for many modes.  Revisit someday.
+    */
+}
+
+/* =============================================================
+ * Stipple
+ */
+
+void tdfxUpdateStipple( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   GrStippleMode_t mode = GR_STIPPLE_DISABLE;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   FLUSH_BATCH( fxMesa );
+
+   if (ctx->Polygon.StippleFlag) {
+      mode = GR_STIPPLE_PATTERN;
+   }
+
+   if ( fxMesa->Stipple.Mode != mode ) {
+      fxMesa->Stipple.Mode = mode;
+      fxMesa->dirty |= TDFX_UPLOAD_STIPPLE;
+   }
+}
+
+
+/* =============================================================
+ * Depth testing
+ */
+
+static void tdfxUpdateZMode( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   GrCmpFnc_t func;
+   FxI32 bias;
+   FxBool mask;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) 
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+
+
+   bias = (FxI32) (ctx->Polygon.OffsetUnits * TDFX_DEPTH_BIAS_SCALE);
+
+   if ( ctx->Depth.Test ) {
+      func = ctx->Depth.Func - GL_NEVER + GR_CMP_NEVER;
+      mask = ctx->Depth.Mask;
+   }
+   else {
+      /* depth testing disabled */
+      func = GR_CMP_ALWAYS;  /* fragments always pass */
+      mask = FXFALSE;        /* zbuffer is not touched */
+   }
+
+   fxMesa->Depth.Clear = (FxU32) (ctx->DrawBuffer->_DepthMaxF * ctx->Depth.Clear);
+
+   if ( fxMesa->Depth.Bias != bias ) {
+      fxMesa->Depth.Bias = bias;
+      fxMesa->dirty |= TDFX_UPLOAD_DEPTH_BIAS;
+   }
+   if ( fxMesa->Depth.Func != func ) {
+      fxMesa->Depth.Func = func;
+      fxMesa->dirty |= TDFX_UPLOAD_DEPTH_FUNC | TDFX_UPLOAD_DEPTH_MASK;
+   }
+   if ( fxMesa->Depth.Mask != mask ) {
+      fxMesa->Depth.Mask = mask;
+      fxMesa->dirty |= TDFX_UPLOAD_DEPTH_MASK;
+   }
+}
+
+static void tdfxDDDepthFunc( GLcontext *ctx, GLenum func )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_DEPTH;
+}
+
+static void tdfxDDDepthMask( GLcontext *ctx, GLboolean flag )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_DEPTH;
+}
+
+static void tdfxDDClearDepth( GLcontext *ctx, GLclampd d )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_DEPTH;
+}
+
+
+
+/* =============================================================
+ * Stencil
+ */
+
+
+/* Evaluate all stencil state and make the Glide calls.
+ */
+static GrStencil_t convertGLStencilOp( GLenum op )
+{
+   switch ( op ) {
+   case GL_KEEP:
+      return GR_STENCILOP_KEEP;
+   case GL_ZERO:
+      return GR_STENCILOP_ZERO;
+   case GL_REPLACE:
+      return GR_STENCILOP_REPLACE;
+   case GL_INCR:
+      return GR_STENCILOP_INCR_CLAMP;
+   case GL_DECR:
+      return GR_STENCILOP_DECR_CLAMP;
+   case GL_INVERT:
+      return GR_STENCILOP_INVERT;
+   case GL_INCR_WRAP_EXT:
+      return GR_STENCILOP_INCR_WRAP;
+   case GL_DECR_WRAP_EXT:
+      return GR_STENCILOP_DECR_WRAP;
+   default:
+      _mesa_problem( NULL, "bad stencil op in convertGLStencilOp" );
+   }
+   return GR_STENCILOP_KEEP;   /* never get, silence compiler warning */
+}
+
+
+static void tdfxUpdateStencil( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   if (fxMesa->haveHwStencil) {
+      if (ctx->Stencil._Enabled) {
+         fxMesa->Stencil.Function = ctx->Stencil.Function[0] - GL_NEVER + GR_CMP_NEVER;
+         fxMesa->Stencil.RefValue = ctx->Stencil.Ref[0] & 0xff;
+         fxMesa->Stencil.ValueMask = ctx->Stencil.ValueMask[0] & 0xff;
+         fxMesa->Stencil.WriteMask = ctx->Stencil.WriteMask[0] & 0xff;
+         fxMesa->Stencil.FailFunc = convertGLStencilOp(ctx->Stencil.FailFunc[0]);
+         fxMesa->Stencil.ZFailFunc = convertGLStencilOp(ctx->Stencil.ZFailFunc[0]);
+         fxMesa->Stencil.ZPassFunc = convertGLStencilOp(ctx->Stencil.ZPassFunc[0]);
+         fxMesa->Stencil.Clear = ctx->Stencil.Clear & 0xff;
+      }
+      fxMesa->dirty |= TDFX_UPLOAD_STENCIL;
+   }
+}
+
+
+static void
+tdfxDDStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func,
+                           GLint ref, GLuint mask )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_STENCIL;
+}
+
+static void
+tdfxDDStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_STENCIL;
+}
+
+static void
+tdfxDDStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum sfail,
+                         GLenum zfail, GLenum zpass )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_STENCIL;
+}
+
+
+/* =============================================================
+ * Fog - orthographic fog still not working
+ */
+
+static void tdfxUpdateFogAttrib( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrFogMode_t mode;
+   GrColor_t color;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   if ( ctx->Fog.Enabled ) {
+      if (ctx->Fog.FogCoordinateSource == GL_FOG_COORDINATE_EXT) {
+         mode = GR_FOG_WITH_TABLE_ON_FOGCOORD_EXT;
+      } else {
+         mode = GR_FOG_WITH_TABLE_ON_Q;
+      }
+   } else {
+      mode = GR_FOG_DISABLE;
+   }
+
+   color = TDFXPACKCOLOR888((GLubyte)(ctx->Fog.Color[0]*255.0F),
+			    (GLubyte)(ctx->Fog.Color[1]*255.0F),
+			    (GLubyte)(ctx->Fog.Color[2]*255.0F));
+
+   if ( fxMesa->Fog.Mode != mode ) {
+      fxMesa->Fog.Mode = mode;
+      fxMesa->dirty |= TDFX_UPLOAD_FOG_MODE;
+      fxMesa->dirty |= TDFX_UPLOAD_VERTEX_LAYOUT;/*JJJ*/
+   }
+   if ( fxMesa->Fog.Color != color ) {
+      fxMesa->Fog.Color = color;
+      fxMesa->dirty |= TDFX_UPLOAD_FOG_COLOR;
+   }
+   if ( fxMesa->Fog.TableMode != ctx->Fog.Mode ||
+	fxMesa->Fog.Density != ctx->Fog.Density ||
+	fxMesa->Fog.Near != ctx->Fog.Start ||
+	fxMesa->Fog.Far != ctx->Fog.End )
+   {
+      switch( ctx->Fog.Mode ) {
+      case GL_EXP:
+	 fxMesa->Glide.guFogGenerateExp( fxMesa->Fog.Table, ctx->Fog.Density );
+	 break;
+      case GL_EXP2:
+	 fxMesa->Glide.guFogGenerateExp2( fxMesa->Fog.Table, ctx->Fog.Density);
+	 break;
+      case GL_LINEAR:
+	 fxMesa->Glide.guFogGenerateLinear( fxMesa->Fog.Table,
+                                            ctx->Fog.Start, ctx->Fog.End );
+	 break;
+      }
+
+      fxMesa->Fog.TableMode = ctx->Fog.Mode;
+      fxMesa->Fog.Density = ctx->Fog.Density;
+      fxMesa->Fog.Near = ctx->Fog.Start;
+      fxMesa->Fog.Far = ctx->Fog.End;
+      fxMesa->dirty |= TDFX_UPLOAD_FOG_TABLE;
+   }
+}
+
+static void tdfxDDFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_FOG;
+
+   switch (pname) {
+      case GL_FOG_COORDINATE_SOURCE_EXT: {
+         GLenum p = (GLenum)*param;
+         if (p == GL_FOG_COORDINATE_EXT) {
+            _swrast_allow_vertex_fog(ctx, GL_TRUE);
+            _swrast_allow_pixel_fog(ctx, GL_FALSE);
+            _tnl_allow_vertex_fog( ctx, GL_TRUE);
+            _tnl_allow_pixel_fog( ctx, GL_FALSE);
+         } else {
+            _swrast_allow_vertex_fog(ctx, GL_FALSE);
+            _swrast_allow_pixel_fog(ctx, GL_TRUE);
+            _tnl_allow_vertex_fog( ctx, GL_FALSE);
+            _tnl_allow_pixel_fog( ctx, GL_TRUE);
+         }
+         break;
+      }
+      default:
+         ;
+   }
+}
+
+
+/* =============================================================
+ * Clipping
+ */
+
+static int intersect_rect( drm_clip_rect_t *out,
+			   const drm_clip_rect_t *a,
+			   const drm_clip_rect_t *b)
+{
+   *out = *a;
+   if (b->x1 > out->x1) out->x1 = b->x1;
+   if (b->y1 > out->y1) out->y1 = b->y1;
+   if (b->x2 < out->x2) out->x2 = b->x2;
+   if (b->y2 < out->y2) out->y2 = b->y2;
+   if (out->x1 >= out->x2) return 0;
+   if (out->y1 >= out->y2) return 0;
+   return 1;
+}
+
+
+/*
+ * Examine XF86 cliprect list and scissor state to recompute our
+ * cliprect list.
+ */
+void tdfxUpdateClipping( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   __DRIdrawable *dPriv = fxMesa->driDrawable;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   assert(ctx);
+   assert(fxMesa);
+   assert(dPriv);
+
+   if ( dPriv->x != fxMesa->x_offset || dPriv->y != fxMesa->y_offset ||
+	dPriv->w != fxMesa->width || dPriv->h != fxMesa->height ) {
+      fxMesa->x_offset = dPriv->x;
+      fxMesa->y_offset = dPriv->y;
+      fxMesa->width = dPriv->w;
+      fxMesa->height = dPriv->h;
+      fxMesa->y_delta =
+	 fxMesa->screen_height - fxMesa->y_offset - fxMesa->height;
+      tdfxUpdateViewport( ctx );
+   }
+
+   if (fxMesa->scissoredClipRects && fxMesa->pClipRects) {
+      free(fxMesa->pClipRects);
+   }
+
+   if (ctx->Scissor.Enabled) {
+      /* intersect OpenGL scissor box with all cliprects to make a new
+       * list of cliprects.
+       */
+      drm_clip_rect_t scissor;
+      int x1 = ctx->Scissor.X + fxMesa->x_offset;
+      int y1 = fxMesa->screen_height - fxMesa->y_delta
+             - ctx->Scissor.Y - ctx->Scissor.Height;
+      int x2 = x1 + ctx->Scissor.Width;
+      int y2 = y1 + ctx->Scissor.Height;
+      scissor.x1 = MAX2(x1, 0);
+      scissor.y1 = MAX2(y1, 0);
+      scissor.x2 = MAX2(x2, 0);
+      scissor.y2 = MAX2(y2, 0);
+
+      assert(scissor.x2 >= scissor.x1);
+      assert(scissor.y2 >= scissor.y1);
+
+      fxMesa->pClipRects = malloc(dPriv->numClipRects
+                                  * sizeof(drm_clip_rect_t));
+      if (fxMesa->pClipRects) {
+         int i;
+         fxMesa->numClipRects = 0;
+         for (i = 0; i < dPriv->numClipRects; i++) {
+            if (intersect_rect(&fxMesa->pClipRects[fxMesa->numClipRects],
+                               &scissor, &dPriv->pClipRects[i])) {
+               fxMesa->numClipRects++;
+            }
+         }
+         fxMesa->scissoredClipRects = GL_TRUE;
+      }
+      else {
+         /* out of memory, forgo scissor */
+         fxMesa->numClipRects = dPriv->numClipRects;
+         fxMesa->pClipRects = dPriv->pClipRects;
+         fxMesa->scissoredClipRects = GL_FALSE;
+      }
+   }
+   else {
+      fxMesa->numClipRects = dPriv->numClipRects;
+      fxMesa->pClipRects = dPriv->pClipRects;
+      fxMesa->scissoredClipRects = GL_FALSE;
+   }
+
+   fxMesa->dirty |= TDFX_UPLOAD_CLIP;
+}
+
+
+
+/* =============================================================
+ * Culling
+ */
+
+void tdfxUpdateCull( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrCullMode_t mode = GR_CULL_DISABLE;
+
+   /* KW: don't need to check raster_primitive here as we don't
+    * attempt to draw lines or points with triangles.
+    */
+   if ( ctx->Polygon.CullFlag ) {
+      switch ( ctx->Polygon.CullFaceMode ) {
+      case GL_FRONT:
+	 if ( ctx->Polygon.FrontFace == GL_CCW ) {
+	    mode = GR_CULL_POSITIVE;
+	 } else {
+	    mode = GR_CULL_NEGATIVE;
+	 }
+	 break;
+
+      case GL_BACK:
+	 if ( ctx->Polygon.FrontFace == GL_CCW ) {
+	    mode = GR_CULL_NEGATIVE;
+	 } else {
+	    mode = GR_CULL_POSITIVE;
+	 }
+	 break;
+
+      case GL_FRONT_AND_BACK:
+	 /* Handled as a fallback on triangles in tdfx_tris.c */
+	 return;
+
+      default:
+	 ASSERT(0);
+	 break;
+      }
+   }
+
+   if ( fxMesa->CullMode != mode ) {
+      fxMesa->CullMode = mode;
+      fxMesa->dirty |= TDFX_UPLOAD_CULL;
+   }
+}
+
+static void tdfxDDCullFace( GLcontext *ctx, GLenum mode )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_CULL;
+}
+
+static void tdfxDDFrontFace( GLcontext *ctx, GLenum mode )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_CULL;
+}
+
+
+/* =============================================================
+ * Line drawing.
+ */
+
+static void tdfxUpdateLine( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   FLUSH_BATCH( fxMesa );
+   fxMesa->dirty |= TDFX_UPLOAD_LINE;
+}
+
+
+static void tdfxDDLineWidth( GLcontext *ctx, GLfloat width )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_LINE;
+}
+
+
+/* =============================================================
+ * Color Attributes
+ */
+
+static void tdfxDDColorMask( GLcontext *ctx,
+			     GLboolean r, GLboolean g,
+			     GLboolean b, GLboolean a )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FLUSH_BATCH( fxMesa );
+
+   if ( fxMesa->Color.ColorMask[RCOMP] != r ||
+	fxMesa->Color.ColorMask[GCOMP] != g ||
+	fxMesa->Color.ColorMask[BCOMP] != b ||
+	fxMesa->Color.ColorMask[ACOMP] != a ) {
+      fxMesa->Color.ColorMask[RCOMP] = r;
+      fxMesa->Color.ColorMask[GCOMP] = g;
+      fxMesa->Color.ColorMask[BCOMP] = b;
+      fxMesa->Color.ColorMask[ACOMP] = a;
+      fxMesa->dirty |= TDFX_UPLOAD_COLOR_MASK;
+
+      if (ctx->Visual.redBits < 8) {
+         /* Can't do RGB colormasking in 16bpp mode. */
+         /* We can completely ignore the alpha mask. */
+	 FALLBACK( fxMesa, TDFX_FALLBACK_COLORMASK, (r != g || g != b) );
+      }
+   }
+}
+
+
+static void tdfxDDClearColor( GLcontext *ctx,
+			      const GLfloat color[4] )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GLubyte c[4];
+   FLUSH_BATCH( fxMesa );
+   CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+   fxMesa->Color.ClearColor = TDFXPACKCOLOR888( c[0], c[1], c[2] );
+   fxMesa->Color.ClearAlpha = c[3];
+}
+
+
+/* =============================================================
+ * Light Model
+ */
+
+static void tdfxDDLightModelfv( GLcontext *ctx, GLenum pname,
+				const GLfloat *param )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if ( pname == GL_LIGHT_MODEL_COLOR_CONTROL ) {
+      FALLBACK( fxMesa, TDFX_FALLBACK_SPECULAR,
+		(ctx->Light.Enabled &&
+		 ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR ));
+   }
+}
+
+static void tdfxDDShadeModel( GLcontext *ctx, GLenum mode )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   /* FIXME: Can we implement native flat shading? */
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_TEXTURE;
+}
+
+
+/* =============================================================
+ * Scissor
+ */
+
+static void
+tdfxDDScissor(GLcontext * ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_CLIP;
+}
+
+/* =============================================================
+ * Render
+ */
+
+static void tdfxUpdateRenderAttrib( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   FLUSH_BATCH( fxMesa );
+   fxMesa->dirty |= TDFX_UPLOAD_RENDER_BUFFER;
+}
+
+/* =============================================================
+ * Viewport
+ */
+
+void tdfxUpdateViewport( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   GLfloat *m = fxMesa->hw_viewport;
+
+   m[MAT_SX] = v[MAT_SX];
+   m[MAT_TX] = v[MAT_TX] + fxMesa->x_offset + TRI_X_OFFSET;
+   m[MAT_SY] = v[MAT_SY];
+   m[MAT_TY] = v[MAT_TY] + fxMesa->y_delta + TRI_Y_OFFSET;
+   m[MAT_SZ] = v[MAT_SZ];
+   m[MAT_TZ] = v[MAT_TZ];
+
+   fxMesa->SetupNewInputs |= VERT_BIT_POS;
+}
+
+
+static void tdfxDDViewport( GLcontext *ctx, GLint x, GLint y,
+			    GLsizei w, GLsizei h )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_VIEWPORT;
+}
+
+
+static void tdfxDDDepthRange( GLcontext *ctx, GLclampd nearVal, GLclampd farVal )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   FLUSH_BATCH( fxMesa );
+   fxMesa->new_state |= TDFX_NEW_VIEWPORT;
+}
+
+
+/* =============================================================
+ * State enable/disable
+ */
+
+static void tdfxDDEnable( GLcontext *ctx, GLenum cap, GLboolean state )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   switch ( cap ) {
+   case GL_ALPHA_TEST:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_ALPHA;
+      break;
+
+   case GL_BLEND:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_ALPHA;
+      FALLBACK( fxMesa, TDFX_FALLBACK_LOGICOP,
+		(ctx->Color.ColorLogicOpEnabled &&
+		 ctx->Color.LogicOp != GL_COPY)/*JJJ - more blending*/);
+      break;
+
+   case GL_CULL_FACE:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_CULL;
+      break;
+
+   case GL_DEPTH_TEST:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_DEPTH;
+      break;
+
+   case GL_DITHER:
+      FLUSH_BATCH( fxMesa );
+      if ( state ) {
+	 fxMesa->Color.Dither = GR_DITHER_2x2;
+      } else {
+	 fxMesa->Color.Dither = GR_DITHER_DISABLE;
+      }
+      fxMesa->dirty |= TDFX_UPLOAD_DITHER;
+      break;
+
+   case GL_FOG:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_FOG;
+      break;
+
+   case GL_COLOR_LOGIC_OP:
+      FALLBACK( fxMesa, TDFX_FALLBACK_LOGICOP,
+		(ctx->Color.ColorLogicOpEnabled &&
+		 ctx->Color.LogicOp != GL_COPY));
+      break;
+
+   case GL_LIGHTING:
+      FALLBACK( fxMesa, TDFX_FALLBACK_SPECULAR,
+		(ctx->Light.Enabled &&
+		 ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR ));
+      break;
+
+   case GL_LINE_SMOOTH:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_LINE;
+      break;
+
+   case GL_LINE_STIPPLE:
+      FALLBACK(fxMesa, TDFX_FALLBACK_LINE_STIPPLE, state);
+      break;
+
+   case GL_POLYGON_STIPPLE:
+      FLUSH_BATCH(fxMesa);
+      fxMesa->new_state |= TDFX_NEW_STIPPLE;
+      break;
+
+   case GL_SCISSOR_TEST:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_CLIP;
+      break;
+
+   case GL_STENCIL_TEST:
+      FLUSH_BATCH( fxMesa );
+      FALLBACK( fxMesa, TDFX_FALLBACK_STENCIL, state && !fxMesa->haveHwStencil);
+      fxMesa->new_state |= TDFX_NEW_STENCIL;
+      break;
+
+   case GL_TEXTURE_3D:
+      FLUSH_BATCH( fxMesa );
+      FALLBACK( fxMesa, TDFX_FALLBACK_TEXTURE_MAP, state); /* wrong */
+      fxMesa->new_state |= TDFX_NEW_TEXTURE;
+      break;
+
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_2D:
+      FLUSH_BATCH( fxMesa );
+      fxMesa->new_state |= TDFX_NEW_TEXTURE;
+      break;
+
+   default:
+      return;
+   }
+}
+
+
+
+/* Set the buffer used for drawing */
+/* XXX support for separate read/draw buffers hasn't been tested */
+static void tdfxDDDrawBuffer( GLcontext *ctx, GLenum mode )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   FLUSH_BATCH( fxMesa );
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers > 1) {
+      FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      return;
+   }
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+      fxMesa->DrawBuffer = fxMesa->ReadBuffer = GR_BUFFER_FRONTBUFFER;
+      fxMesa->new_state |= TDFX_NEW_RENDER;
+      FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   case BUFFER_BACK_LEFT:
+      fxMesa->DrawBuffer = fxMesa->ReadBuffer = GR_BUFFER_BACKBUFFER;
+      fxMesa->new_state |= TDFX_NEW_RENDER;
+      FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   case -1:
+      FX_grColorMaskv( ctx, false4 );
+      FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_FALSE );
+      break;
+   default:
+      FALLBACK( fxMesa, TDFX_FALLBACK_DRAW_BUFFER, GL_TRUE );
+      break;
+   }
+}
+
+
+static void tdfxDDReadBuffer( GLcontext *ctx, GLenum mode )
+{
+   /* XXX ??? */
+}
+
+
+/* =============================================================
+ * Polygon stipple
+ */
+
+static void tdfxDDPolygonStipple( GLcontext *ctx, const GLubyte *mask )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   const GLubyte *m = mask;
+   GLubyte q[4];
+   int i,j,k;
+   GLboolean allBitsSet;
+
+/*     int active = (ctx->Polygon.StippleFlag &&  */
+/*  		 fxMesa->reduced_prim == GL_TRIANGLES); */
+
+   FLUSH_BATCH(fxMesa);
+   fxMesa->Stipple.Pattern = 0xffffffff;
+   fxMesa->dirty |= TDFX_UPLOAD_STIPPLE;
+   fxMesa->new_state |= TDFX_NEW_STIPPLE;
+
+   /* Check if the stipple pattern is fully opaque.  If so, use software
+    * rendering.  This basically a trick to make sure the OpenGL conformance
+    * test passes.
+    */
+   allBitsSet = GL_TRUE;
+   for (i = 0; i < 32; i++) {
+      if (((GLuint *) mask)[i] != 0xffffffff) {
+         allBitsSet = GL_FALSE;
+         break;
+      }
+   }
+   if (allBitsSet) {
+      fxMesa->haveHwStipple = GL_FALSE;
+      return;
+   }
+
+   q[0] = mask[0];
+   q[1] = mask[4];
+   q[2] = mask[8];
+   q[3] = mask[12];
+
+   for (k = 0 ; k < 8 ; k++)
+      for (j = 0 ; j < 4; j++)
+	 for (i = 0 ; i < 4 ; i++,m++) {
+	    if (*m != q[j]) {
+	       fxMesa->haveHwStipple = GL_FALSE;
+	       return;
+	    }
+         }
+
+   fxMesa->haveHwStipple = GL_TRUE;
+   fxMesa->Stipple.Pattern = ( (q[0] << 0) |
+                               (q[1] << 8) |
+                               (q[2] << 16) |
+                               (q[3] << 24) );
+}
+
+
+
+static void tdfxDDRenderMode( GLcontext *ctx, GLenum mode )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   FALLBACK( fxMesa, TDFX_FALLBACK_RENDER_MODE, (mode != GL_RENDER) );
+}
+
+
+
+static void tdfxDDPrintState( const char *msg, GLuint flags )
+{
+   fprintf( stderr,
+	    "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+	    msg,
+	    flags,
+	    (flags & TDFX_NEW_COLOR) ? "color, " : "",
+	    (flags & TDFX_NEW_ALPHA) ? "alpha, " : "",
+	    (flags & TDFX_NEW_DEPTH) ? "depth, " : "",
+	    (flags & TDFX_NEW_RENDER) ? "render, " : "",
+	    (flags & TDFX_NEW_FOG) ? "fog, " : "",
+	    (flags & TDFX_NEW_STENCIL) ? "stencil, " : "",
+	    (flags & TDFX_NEW_STIPPLE) ? "stipple, " : "",
+	    (flags & TDFX_NEW_CLIP) ? "clip, " : "",
+	    (flags & TDFX_NEW_VIEWPORT) ? "viewport, " : "",
+	    (flags & TDFX_NEW_CULL) ? "cull, " : "",
+	    (flags & TDFX_NEW_GLIDE) ? "glide, " : "",
+	    (flags & TDFX_NEW_TEXTURE) ? "texture, " : "",
+	    (flags & TDFX_NEW_CONTEXT) ? "context, " : "");
+}
+
+
+
+void tdfxDDUpdateHwState( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   int new_state = fxMesa->new_state;
+
+   if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+      fprintf( stderr, "%s()\n", __FUNCTION__ );
+   }
+
+   if ( new_state )
+   {
+      FLUSH_BATCH( fxMesa );
+
+      fxMesa->new_state = 0;
+
+      if ( 0 )
+	 tdfxDDPrintState( "tdfxUpdateHwState", new_state );
+
+      /* Update the various parts of the context's state.
+       */
+      if ( new_state & TDFX_NEW_ALPHA ) {
+	 tdfxUpdateAlphaMode( ctx );
+      }
+
+      if ( new_state & TDFX_NEW_DEPTH )
+	 tdfxUpdateZMode( ctx );
+
+      if ( new_state & TDFX_NEW_FOG )
+	 tdfxUpdateFogAttrib( ctx );
+
+      if ( new_state & TDFX_NEW_CLIP )
+	 tdfxUpdateClipping( ctx );
+
+      if ( new_state & TDFX_NEW_STIPPLE )
+	 tdfxUpdateStipple( ctx );
+
+      if ( new_state & TDFX_NEW_CULL )
+	 tdfxUpdateCull( ctx );
+
+      if ( new_state & TDFX_NEW_LINE )
+         tdfxUpdateLine( ctx );
+
+      if ( new_state & TDFX_NEW_VIEWPORT )
+	 tdfxUpdateViewport( ctx );
+
+      if ( new_state & TDFX_NEW_RENDER )
+	 tdfxUpdateRenderAttrib( ctx );
+
+      if ( new_state & TDFX_NEW_STENCIL )
+         tdfxUpdateStencil( ctx );
+
+      if ( new_state & TDFX_NEW_TEXTURE ) {
+	 tdfxUpdateTextureState( ctx );
+      }
+      else if ( new_state & TDFX_NEW_TEXTURE_BIND ) {
+	 tdfxUpdateTextureBinding( ctx );
+      }
+   }
+
+   if ( 0 ) {
+      FxI32 bias = (FxI32) (ctx->Polygon.OffsetUnits * TDFX_DEPTH_BIAS_SCALE);
+
+      if ( fxMesa->Depth.Bias != bias ) {
+	 fxMesa->Depth.Bias = bias;
+	 fxMesa->dirty |= TDFX_UPLOAD_DEPTH_BIAS;
+      }
+   }
+
+   if ( fxMesa->dirty ) {
+      LOCK_HARDWARE( fxMesa );
+      tdfxEmitHwStateLocked( fxMesa );
+      UNLOCK_HARDWARE( fxMesa );
+   }
+}
+
+
+static void tdfxDDInvalidateState( GLcontext *ctx, GLuint new_state )
+{
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   TDFX_CONTEXT(ctx)->new_gl_state |= new_state;
+}
+
+
+
+/* Initialize the context's Glide state mirror.  These values will be
+ * used as Glide function call parameters when the time comes.
+ */
+void tdfxInitState( tdfxContextPtr fxMesa )
+{
+   GLcontext *ctx = fxMesa->glCtx;
+   GLint i;
+
+   fxMesa->ColorCombine.Function	= GR_COMBINE_FUNCTION_LOCAL;
+   fxMesa->ColorCombine.Factor		= GR_COMBINE_FACTOR_NONE;
+   fxMesa->ColorCombine.Local		= GR_COMBINE_LOCAL_ITERATED;
+   fxMesa->ColorCombine.Other		= GR_COMBINE_OTHER_NONE;
+   fxMesa->ColorCombine.Invert		= FXFALSE;
+   fxMesa->AlphaCombine.Function	= GR_COMBINE_FUNCTION_LOCAL;
+   fxMesa->AlphaCombine.Factor		= GR_COMBINE_FACTOR_NONE;
+   fxMesa->AlphaCombine.Local		= GR_COMBINE_LOCAL_ITERATED;
+   fxMesa->AlphaCombine.Other		= GR_COMBINE_OTHER_NONE;
+   fxMesa->AlphaCombine.Invert		= FXFALSE;
+
+   fxMesa->ColorCombineExt.SourceA	= GR_CMBX_ITRGB;
+   fxMesa->ColorCombineExt.ModeA	= GR_FUNC_MODE_X;
+   fxMesa->ColorCombineExt.SourceB	= GR_CMBX_ZERO;
+   fxMesa->ColorCombineExt.ModeB	= GR_FUNC_MODE_ZERO;
+   fxMesa->ColorCombineExt.SourceC	= GR_CMBX_ZERO;
+   fxMesa->ColorCombineExt.InvertC	= FXTRUE;
+   fxMesa->ColorCombineExt.SourceD	= GR_CMBX_ZERO;
+   fxMesa->ColorCombineExt.InvertD	= FXFALSE;
+   fxMesa->ColorCombineExt.Shift	= 0;
+   fxMesa->ColorCombineExt.Invert	= FXFALSE;
+   fxMesa->AlphaCombineExt.SourceA	= GR_CMBX_ITALPHA;
+   fxMesa->AlphaCombineExt.ModeA	= GR_FUNC_MODE_X;
+   fxMesa->AlphaCombineExt.SourceB	= GR_CMBX_ZERO;
+   fxMesa->AlphaCombineExt.ModeB	= GR_FUNC_MODE_ZERO;
+   fxMesa->AlphaCombineExt.SourceC	= GR_CMBX_ZERO;
+   fxMesa->AlphaCombineExt.InvertC	= FXTRUE;
+   fxMesa->AlphaCombineExt.SourceD	= GR_CMBX_ZERO;
+   fxMesa->AlphaCombineExt.InvertD	= FXFALSE;
+   fxMesa->AlphaCombineExt.Shift	= 0;
+   fxMesa->AlphaCombineExt.Invert	= FXFALSE;
+
+   fxMesa->sScale0 = fxMesa->tScale0 = 1.0;
+   fxMesa->sScale1 = fxMesa->tScale1 = 1.0;
+
+   fxMesa->TexPalette.Type = 0;
+   fxMesa->TexPalette.Data = NULL;
+
+   for ( i = 0 ; i < TDFX_NUM_TMU ; i++ ) {
+      fxMesa->TexSource[i].StartAddress	= 0;
+      fxMesa->TexSource[i].EvenOdd	= GR_MIPMAPLEVELMASK_EVEN;
+      fxMesa->TexSource[i].Info		= NULL;
+
+      fxMesa->TexCombine[i].FunctionRGB		= 0;
+      fxMesa->TexCombine[i].FactorRGB		= 0;
+      fxMesa->TexCombine[i].FunctionAlpha	= 0;
+      fxMesa->TexCombine[i].FactorAlpha		= 0;
+      fxMesa->TexCombine[i].InvertRGB		= FXFALSE;
+      fxMesa->TexCombine[i].InvertAlpha		= FXFALSE;
+
+      fxMesa->TexCombineExt[i].Alpha.SourceA	= 0;
+      /* XXX more state to init here */
+      fxMesa->TexCombineExt[i].Color.SourceA	= 0;
+      fxMesa->TexCombineExt[i].EnvColor        = 0x0;
+
+      fxMesa->TexParams[i].sClamp 	= GR_TEXTURECLAMP_WRAP;
+      fxMesa->TexParams[i].tClamp	= GR_TEXTURECLAMP_WRAP;
+      fxMesa->TexParams[i].minFilt	= GR_TEXTUREFILTER_POINT_SAMPLED;
+      fxMesa->TexParams[i].magFilt	= GR_TEXTUREFILTER_BILINEAR;
+      fxMesa->TexParams[i].mmMode	= GR_MIPMAP_DISABLE;
+      fxMesa->TexParams[i].LODblend	= FXFALSE;
+      fxMesa->TexParams[i].LodBias	= 0.0;
+
+      fxMesa->TexState.EnvMode[i]	= ~0;
+      fxMesa->TexState.TexFormat[i]	= ~0;
+      fxMesa->TexState.Enabled[i]	= 0;
+   }
+
+   if ( ctx->Visual.doubleBufferMode) {
+      fxMesa->DrawBuffer		= GR_BUFFER_BACKBUFFER;
+      fxMesa->ReadBuffer		= GR_BUFFER_BACKBUFFER;
+   } else {
+      fxMesa->DrawBuffer		= GR_BUFFER_FRONTBUFFER;
+      fxMesa->ReadBuffer		= GR_BUFFER_FRONTBUFFER;
+   }
+
+   fxMesa->Color.ClearColor		= 0x00000000;
+   fxMesa->Color.ClearAlpha		= 0x00;
+   fxMesa->Color.ColorMask[RCOMP]	= FXTRUE;
+   fxMesa->Color.ColorMask[BCOMP]	= FXTRUE;
+   fxMesa->Color.ColorMask[GCOMP]	= FXTRUE;
+   fxMesa->Color.ColorMask[ACOMP]	= FXTRUE;
+   fxMesa->Color.MonoColor		= 0xffffffff;
+
+   fxMesa->Color.AlphaFunc		= GR_CMP_ALWAYS;
+   fxMesa->Color.AlphaRef		= 0x00;
+   fxMesa->Color.BlendSrcRGB		= GR_BLEND_ONE;
+   fxMesa->Color.BlendDstRGB		= GR_BLEND_ZERO;
+   fxMesa->Color.BlendSrcA		= GR_BLEND_ONE;
+   fxMesa->Color.BlendSrcA		= GR_BLEND_ZERO;
+
+   fxMesa->Color.Dither			= GR_DITHER_2x2;
+
+   if ( fxMesa->glCtx->Visual.depthBits > 0 ) {
+      fxMesa->Depth.Mode		= GR_DEPTHBUFFER_ZBUFFER;
+   } else {
+      fxMesa->Depth.Mode		= GR_DEPTHBUFFER_DISABLE;
+   }
+   fxMesa->Depth.Bias			= 0;
+   fxMesa->Depth.Func			= GR_CMP_LESS;
+   fxMesa->Depth.Clear			= 0; /* computed later */
+   fxMesa->Depth.Mask			= FXTRUE;
+
+
+   fxMesa->Fog.Mode			= GR_FOG_DISABLE;
+   fxMesa->Fog.Color			= 0x00000000;
+   fxMesa->Fog.Table			= NULL;
+   fxMesa->Fog.Density			= 1.0;
+   fxMesa->Fog.Near			= 1.0;
+   fxMesa->Fog.Far			= 1.0;
+
+   fxMesa->Stencil.Function		= GR_CMP_ALWAYS;
+   fxMesa->Stencil.RefValue		= 0;
+   fxMesa->Stencil.ValueMask		= 0xff;
+   fxMesa->Stencil.WriteMask		= 0xff;
+   fxMesa->Stencil.FailFunc		= 0;
+   fxMesa->Stencil.ZFailFunc		= 0;
+   fxMesa->Stencil.ZPassFunc		= 0;
+   fxMesa->Stencil.Clear		= 0;
+
+   fxMesa->Stipple.Mode                 = GR_STIPPLE_DISABLE;
+   fxMesa->Stipple.Pattern              = 0xffffffff;
+
+   fxMesa->Scissor.minX			= 0;
+   fxMesa->Scissor.minY			= 0;
+   fxMesa->Scissor.maxX			= 0;
+   fxMesa->Scissor.maxY			= 0;
+
+   fxMesa->Viewport.Mode		= GR_WINDOW_COORDS;
+   fxMesa->Viewport.X			= 0;
+   fxMesa->Viewport.Y			= 0;
+   fxMesa->Viewport.Width		= 0;
+   fxMesa->Viewport.Height		= 0;
+   fxMesa->Viewport.Near		= 0.0;
+   fxMesa->Viewport.Far			= 0.0;
+
+   fxMesa->CullMode			= GR_CULL_DISABLE;
+
+   fxMesa->Glide.ColorFormat		= GR_COLORFORMAT_ABGR;
+   fxMesa->Glide.Origin			= GR_ORIGIN_LOWER_LEFT;
+   fxMesa->Glide.Initialized		= FXFALSE;
+}
+
+
+
+void tdfxDDInitStateFuncs( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   ctx->Driver.UpdateState		= tdfxDDInvalidateState;
+
+   ctx->Driver.ClearColor		= tdfxDDClearColor;
+   ctx->Driver.DrawBuffer		= tdfxDDDrawBuffer;
+   ctx->Driver.ReadBuffer		= tdfxDDReadBuffer;
+
+   ctx->Driver.AlphaFunc		= tdfxDDAlphaFunc;
+   ctx->Driver.BlendEquationSeparate	= tdfxDDBlendEquationSeparate;
+   ctx->Driver.BlendFuncSeparate	= tdfxDDBlendFuncSeparate;
+   ctx->Driver.ClearDepth		= tdfxDDClearDepth;
+   ctx->Driver.ColorMask		= tdfxDDColorMask;
+   ctx->Driver.CullFace			= tdfxDDCullFace;
+   ctx->Driver.FrontFace		= tdfxDDFrontFace;
+   ctx->Driver.DepthFunc		= tdfxDDDepthFunc;
+   ctx->Driver.DepthMask		= tdfxDDDepthMask;
+   ctx->Driver.DepthRange		= tdfxDDDepthRange;
+   ctx->Driver.Enable			= tdfxDDEnable;
+   ctx->Driver.Fogfv			= tdfxDDFogfv;
+   ctx->Driver.LightModelfv		= tdfxDDLightModelfv;
+   ctx->Driver.LineWidth		= tdfxDDLineWidth;
+   ctx->Driver.PolygonStipple		= tdfxDDPolygonStipple;
+   ctx->Driver.RenderMode               = tdfxDDRenderMode;
+   ctx->Driver.Scissor			= tdfxDDScissor;
+   ctx->Driver.ShadeModel		= tdfxDDShadeModel;
+
+   if ( fxMesa->haveHwStencil ) {
+      ctx->Driver.StencilFuncSeparate	= tdfxDDStencilFuncSeparate;
+      ctx->Driver.StencilMaskSeparate	= tdfxDDStencilMaskSeparate;
+      ctx->Driver.StencilOpSeparate	= tdfxDDStencilOpSeparate;
+   }
+
+   ctx->Driver.Viewport			= tdfxDDViewport;
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_state.h b/src/mesa/drivers/dri/tdfx/tdfx_state.h
new file mode 100644
index 0000000000..4880b990fc
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_state.h
@@ -0,0 +1,60 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_STATE_H__
+#define __TDFX_STATE_H__
+
+#include "main/context.h"
+#include "tdfx_context.h"
+
+extern void tdfxDDInitStateFuncs( GLcontext *ctx );
+
+extern void tdfxDDUpdateHwState( GLcontext *ctx );
+
+extern void tdfxInitState( tdfxContextPtr fxMesa );
+
+extern void tdfxUpdateClipping( GLcontext *ctx );
+
+
+extern void tdfxFallback( GLcontext *ctx, GLuint bit, GLboolean mode );
+#define FALLBACK( rmesa, bit, mode ) tdfxFallback( rmesa->glCtx, bit, mode )
+
+extern void tdfxUpdateCull( GLcontext *ctx );
+extern void tdfxUpdateStipple( GLcontext *ctx );
+extern void tdfxUpdateViewport( GLcontext *ctx );
+
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_tex.c b/src/mesa/drivers/dri/tdfx/tdfx_tex.c
new file mode 100644
index 0000000000..1c51452c10
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_tex.c
@@ -0,0 +1,1868 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * New fixes:
+ *	Daniel Borca <dborca@users.sourceforge.net>, 19 Jul 2004
+ *
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mipmap.h"
+#include "main/texcompress.h"
+#include "main/teximage.h"
+#include "main/texstore.h"
+#include "main/texobj.h"
+#include "tdfx_context.h"
+#include "tdfx_tex.h"
+#include "tdfx_texman.h"
+
+
+/* no borders! can't halve 1x1! (stride > width * comp) not allowed */
+static void
+_mesa_halve2x2_teximage2d ( GLcontext *ctx,
+			    struct gl_texture_image *texImage,
+			    GLuint bytesPerPixel,
+			    GLint srcWidth, GLint srcHeight,
+			    const GLvoid *srcImage, GLvoid *dstImage )
+{
+   GLint i, j, k;
+   GLint dstWidth = srcWidth / 2;
+   GLint dstHeight = srcHeight / 2;
+   GLint srcRowStride = srcWidth * bytesPerPixel;
+   GLubyte *src = (GLubyte *)srcImage;
+   GLubyte *dst = dstImage;
+   GLuint dstImageOffsets = 0;
+
+   GLuint bpt = 0;
+   GLubyte *_s = NULL;
+   GLubyte *_d = NULL;
+   GLenum _t = 0;
+
+   if (texImage->TexFormat == MESA_FORMAT_RGB565) {
+      _t = GL_UNSIGNED_SHORT_5_6_5_REV;
+      bpt = bytesPerPixel;
+   } else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) {
+      _t = GL_UNSIGNED_SHORT_4_4_4_4_REV;
+      bpt = bytesPerPixel;
+   } else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) {
+      _t = GL_UNSIGNED_SHORT_1_5_5_5_REV;
+      bpt = bytesPerPixel;
+   }
+   if (bpt) {
+      bytesPerPixel = 4;
+      srcRowStride = srcWidth * bytesPerPixel;
+      if (dstWidth == 0) {
+         dstWidth = 1;
+      }
+      if (dstHeight == 0) {
+         dstHeight = 1;
+      }
+      _s = src = MALLOC(srcRowStride * srcHeight);
+      _d = dst = MALLOC(dstWidth * bytesPerPixel * dstHeight);
+      _mesa_texstore(ctx, 2, GL_RGBA,
+                     MESA_FORMAT_RGBA8888_REV, src,
+                     0, 0, 0, /* dstX/Y/Zoffset */
+                     srcRowStride, /* dstRowStride */
+                     &dstImageOffsets,
+                     srcWidth, srcHeight, 1,
+                     texImage->_BaseFormat, _t, srcImage, &ctx->DefaultPacking);
+   }
+
+   if (srcHeight == 1) {
+      for (i = 0; i < dstWidth; i++) {
+         for (k = 0; k < bytesPerPixel; k++) {
+            dst[0] = (src[0] + src[bytesPerPixel] + 1) / 2;
+            src++;
+            dst++;
+         }
+         src += bytesPerPixel;
+      }
+   } else if (srcWidth == 1) {
+      for (j = 0; j < dstHeight; j++) {
+         for (k = 0; k < bytesPerPixel; k++) {
+            dst[0] = (src[0] + src[srcRowStride] + 1) / 2;
+            src++;
+            dst++;
+         }
+         src += srcRowStride;
+      }
+   } else {
+      for (j = 0; j < dstHeight; j++) {
+         for (i = 0; i < dstWidth; i++) {
+            for (k = 0; k < bytesPerPixel; k++) {
+               dst[0] = (src[0] +
+                         src[bytesPerPixel] +
+                         src[srcRowStride] +
+                         src[srcRowStride + bytesPerPixel] + 2) / 4;
+               src++;
+               dst++;
+            }
+            src += bytesPerPixel;
+         }
+         src += srcRowStride;
+      }
+   }
+
+   if (bpt) {
+      src = _s;
+      dst = _d;
+      _mesa_texstore(ctx, 2, texImage->_BaseFormat,
+                     texImage->TexFormat, dstImage,
+                     0, 0, 0, /* dstX/Y/Zoffset */
+                     dstWidth * bpt,
+                     &dstImageOffsets,
+                     dstWidth, dstHeight, 1,
+                     GL_BGRA, CHAN_TYPE, dst, &ctx->DefaultPacking);
+      FREE(dst);
+      FREE(src);
+   }
+}
+
+
+static int
+logbase2(int n)
+{
+    GLint i = 1;
+    GLint log2 = 0;
+
+    if (n < 0) {
+        return -1;
+    }
+
+    while (n > i) {
+        i *= 2;
+        log2++;
+    }
+    if (i != n) {
+        return -1;
+    }
+    else {
+        return log2;
+    }
+}
+
+
+static void
+tdfxGenerateMipmap(GLcontext *ctx, GLenum target,
+                   struct gl_texture_object *texObj)
+{
+   GLint mipWidth, mipHeight;
+   tdfxMipMapLevel *mip;
+   struct gl_texture_image *mipImage; /* the new/next image */
+   struct gl_texture_image *texImage;
+   const GLint maxLevels = _mesa_max_texture_levels(ctx, texObj->Target);
+   GLint level = texObj->BaseLevel;
+   GLsizei width, height, texelBytes;
+   const tdfxMipMapLevel *mml;
+
+   texImage = _mesa_get_tex_image(ctx, texObj, target, level);
+   texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+
+   mml = TDFX_TEXIMAGE_DATA(texImage);
+
+   width = texImage->Width;
+   height = texImage->Height;
+   while (level < texObj->MaxLevel && level < maxLevels - 1) {
+      mipWidth = width / 2;
+      if (!mipWidth) {
+         mipWidth = 1;
+      }
+      mipHeight = height / 2;
+      if (!mipHeight) {
+         mipHeight = 1;
+      }
+      if ((mipWidth == width) && (mipHeight == height)) {
+         break;
+      }
+      ++level;
+      mipImage = _mesa_select_tex_image(ctx, texObj, target, level);
+      mip = TDFX_TEXIMAGE_DATA(mipImage);
+      _mesa_halve2x2_teximage2d(ctx,
+                                texImage,
+                                texelBytes,
+                                mml->width, mml->height,
+                                texImage->Data, mipImage->Data);
+      texImage = mipImage;
+      mml = mip;
+      width = mipWidth;
+      height = mipHeight;
+   }
+}
+
+
+/*
+ * Compute various texture image parameters.
+ * Input:  w, h - source texture width and height
+ * Output:  lodlevel - Glide lod level token for the larger texture dimension
+ *          aspectratio - Glide aspect ratio token
+ *          sscale - S scale factor used during triangle setup
+ *          tscale - T scale factor used during triangle setup
+ *          wscale - OpenGL -> Glide image width scale factor
+ *          hscale - OpenGL -> Glide image height scale factor
+ *
+ * Sample results:
+ *      w    h       lodlevel               aspectRatio
+ *     128  128  GR_LOD_LOG2_128 (=7)  GR_ASPECT_LOG2_1x1 (=0)
+ *      64   64  GR_LOD_LOG2_64 (=6)   GR_ASPECT_LOG2_1x1 (=0)
+ *      64   32  GR_LOD_LOG2_64 (=6)   GR_ASPECT_LOG2_2x1 (=1)
+ *      32   64  GR_LOD_LOG2_64 (=6)   GR_ASPECT_LOG2_1x2 (=-1)
+ *      32   32  GR_LOD_LOG2_32 (=5)   GR_ASPECT_LOG2_1x1 (=0)
+ */
+static void
+tdfxTexGetInfo(const GLcontext *ctx, int w, int h,
+               GrLOD_t *lodlevel, GrAspectRatio_t *aspectratio,
+               float *sscale, float *tscale,
+               int *wscale, int *hscale)
+{
+    int logw, logh, ar, lod, ws, hs;
+    float s, t;
+
+    ASSERT(w >= 1);
+    ASSERT(h >= 1);
+
+    logw = logbase2(w);
+    logh = logbase2(h);
+    ar = logw - logh;  /* aspect ratio = difference in log dimensions */
+    s = t = 256.0;
+    ws = hs = 1;
+
+    /* Hardware only allows a maximum aspect ratio of 8x1, so handle
+       |ar| > 3 by scaling the image and using an 8x1 aspect ratio */
+    if (ar >= 0) {
+        ASSERT(w >= h);
+        lod = logw;
+        if (ar <= GR_ASPECT_LOG2_8x1) {
+            t = 256 >> ar;
+        }
+        else {
+            /* have to stretch image height */
+            t = 32.0;
+            hs = 1 << (ar - 3);
+            ar = GR_ASPECT_LOG2_8x1;
+        }
+    }
+    else {
+        ASSERT(w < h);
+        lod = logh;
+        if (ar >= GR_ASPECT_LOG2_1x8) {
+            s = 256 >> -ar;
+        }
+        else {
+            /* have to stretch image width */
+            s = 32.0;
+            ws = 1 << (-ar - 3);
+            ar = GR_ASPECT_LOG2_1x8;
+        }
+    }
+
+    if (lodlevel)
+        *lodlevel = (GrLOD_t) lod;
+    if (aspectratio)
+        *aspectratio = (GrAspectRatio_t) ar;
+    if (sscale)
+        *sscale = s;
+    if (tscale)
+        *tscale = t;
+    if (wscale)
+        *wscale = ws;
+    if (hscale)
+        *hscale = hs;
+}
+
+
+/*
+ * We need to call this when a texture object's minification filter
+ * or texture image sizes change.
+ */
+static void RevalidateTexture(GLcontext *ctx, struct gl_texture_object *tObj)
+{
+    tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+    GLint minl, maxl;
+
+    if (!ti)
+       return;
+
+    minl = maxl = tObj->BaseLevel;
+
+    if (tObj->Image[0][minl]) {
+       maxl = MIN2(tObj->MaxLevel, tObj->Image[0][minl]->MaxLog2);
+
+       /* compute largeLodLog2, aspect ratio and texcoord scale factors */
+       tdfxTexGetInfo(ctx, tObj->Image[0][minl]->Width, tObj->Image[0][minl]->Height,
+                      &ti->info.largeLodLog2,
+                      &ti->info.aspectRatioLog2,
+                      &(ti->sScale), &(ti->tScale), NULL, NULL);
+    }
+
+    if (tObj->Image[0][maxl] && (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) {
+        /* mipmapping: need to compute smallLodLog2 */
+        tdfxTexGetInfo(ctx, tObj->Image[0][maxl]->Width,
+                       tObj->Image[0][maxl]->Height,
+                       &ti->info.smallLodLog2, NULL,
+                       NULL, NULL, NULL, NULL);
+    }
+    else {
+        /* not mipmapping: smallLodLog2 = largeLodLog2 */
+        ti->info.smallLodLog2 = ti->info.largeLodLog2;
+        maxl = minl;
+    }
+
+    ti->minLevel = minl;
+    ti->maxLevel = maxl;
+    ti->info.data = NULL;
+
+   /* this is necessary because of fxDDCompressedTexImage2D */
+   if (ti->padded) {
+      struct gl_texture_image *texImage = tObj->Image[0][minl];
+      tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+      if (mml->wScale != 1 || mml->hScale != 1) {
+         ti->sScale /= mml->wScale;
+         ti->tScale /= mml->hScale;
+      }
+   }
+}
+
+
+static tdfxTexInfo *
+fxAllocTexObjData(tdfxContextPtr fxMesa)
+{
+    tdfxTexInfo *ti;
+
+    if (!(ti = CALLOC(sizeof(tdfxTexInfo)))) {
+        _mesa_problem(NULL, "tdfx driver: out of memory");
+        return NULL;
+    }
+
+    ti->isInTM = GL_FALSE;
+
+    ti->whichTMU = TDFX_TMU_NONE;
+
+    ti->tm[TDFX_TMU0] = NULL;
+    ti->tm[TDFX_TMU1] = NULL;
+
+    ti->minFilt = GR_TEXTUREFILTER_POINT_SAMPLED;
+    ti->magFilt = GR_TEXTUREFILTER_BILINEAR;
+
+    ti->sClamp = GR_TEXTURECLAMP_WRAP;
+    ti->tClamp = GR_TEXTURECLAMP_WRAP;
+
+    ti->mmMode = GR_MIPMAP_NEAREST;
+    ti->LODblend = FXFALSE;
+
+    return ti;
+}
+
+
+/*
+ * Called via glBindTexture.
+ */
+static void
+tdfxBindTexture(GLcontext * ctx, GLenum target,
+                  struct gl_texture_object *tObj)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    tdfxTexInfo *ti;
+
+    if (MESA_VERBOSE & VERBOSE_DRIVER) {
+        fprintf(stderr, "fxmesa: fxDDTexBind(%d,%p)\n", tObj->Name,
+                tObj->DriverData);
+    }
+
+    if ((target != GL_TEXTURE_1D) && (target != GL_TEXTURE_2D))
+        return;
+
+    if (!tObj->DriverData) {
+        tObj->DriverData = fxAllocTexObjData(fxMesa);
+    }
+
+    ti = TDFX_TEXTURE_DATA(tObj);
+    ti->lastTimeUsed = fxMesa->texBindNumber++;
+
+    fxMesa->new_state |= TDFX_NEW_TEXTURE;
+}
+
+
+/*
+ * Called via glTexEnv.
+ */
+static void
+tdfxTexEnv(GLcontext * ctx, GLenum target, GLenum pname,
+             const GLfloat * param)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+    if ( TDFX_DEBUG & DEBUG_VERBOSE_API ) {
+        if (param)
+            fprintf(stderr, "fxmesa: texenv(%x,%x)\n", pname,
+                    (GLint) (*param));
+        else
+            fprintf(stderr, "fxmesa: texenv(%x)\n", pname);
+    }
+
+    /* XXX this is a bit of a hack to force the Glide texture
+     * state to be updated.
+     */
+    fxMesa->TexState.EnvMode[ctx->Texture.CurrentUnit]  = 0;
+
+    fxMesa->new_state |= TDFX_NEW_TEXTURE;
+}
+
+
+/*
+ * Called via glTexParameter.
+ */
+static void
+tdfxTexParameter(GLcontext * ctx, GLenum target,
+                   struct gl_texture_object *tObj,
+                   GLenum pname, const GLfloat * params)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    GLenum param = (GLenum) (GLint) params[0];
+    tdfxTexInfo *ti;
+
+    if (MESA_VERBOSE & VERBOSE_DRIVER) {
+        fprintf(stderr, "fxmesa: fxDDTexParam(%d,%p,%x,%x)\n", tObj->Name,
+                tObj->DriverData, pname, param);
+    }
+
+    if ((target != GL_TEXTURE_1D) && (target != GL_TEXTURE_2D))
+        return;
+
+    if (!tObj->DriverData)
+        tObj->DriverData = fxAllocTexObjData(fxMesa);
+
+    ti = TDFX_TEXTURE_DATA(tObj);
+
+    switch (pname) {
+    case GL_TEXTURE_MIN_FILTER:
+        switch (param) {
+        case GL_NEAREST:
+            ti->mmMode = GR_MIPMAP_DISABLE;
+            ti->minFilt = GR_TEXTUREFILTER_POINT_SAMPLED;
+            ti->LODblend = FXFALSE;
+            break;
+        case GL_LINEAR:
+            ti->mmMode = GR_MIPMAP_DISABLE;
+            ti->minFilt = GR_TEXTUREFILTER_BILINEAR;
+            ti->LODblend = FXFALSE;
+            break;
+        case GL_NEAREST_MIPMAP_LINEAR:
+            if (!fxMesa->Glide.HaveCombineExt) {
+                 if (fxMesa->haveTwoTMUs) {
+                     ti->mmMode = GR_MIPMAP_NEAREST;
+                     ti->LODblend = FXTRUE;
+                 }
+                 else {
+                     ti->mmMode = GR_MIPMAP_NEAREST_DITHER;
+                     ti->LODblend = FXFALSE;
+                 }
+                 ti->minFilt = GR_TEXTUREFILTER_POINT_SAMPLED;
+                 break;
+            }
+            /* XXX Voodoo3/Banshee mipmap blending seems to produce
+             * incorrectly filtered colors for the smallest mipmap levels.
+             * To work-around we fall-through here and use a different filter.
+             */
+        case GL_NEAREST_MIPMAP_NEAREST:
+            ti->mmMode = GR_MIPMAP_NEAREST;
+            ti->minFilt = GR_TEXTUREFILTER_POINT_SAMPLED;
+            ti->LODblend = FXFALSE;
+            break;
+        case GL_LINEAR_MIPMAP_LINEAR:
+            if (!fxMesa->Glide.HaveCombineExt) {
+                if (fxMesa->haveTwoTMUs) {
+                    ti->mmMode = GR_MIPMAP_NEAREST;
+                    ti->LODblend = FXTRUE;
+                }
+                else {
+                    ti->mmMode = GR_MIPMAP_NEAREST_DITHER;
+                    ti->LODblend = FXFALSE;
+                }
+                ti->minFilt = GR_TEXTUREFILTER_BILINEAR;
+                break;
+            }
+            /* XXX Voodoo3/Banshee mipmap blending seems to produce
+             * incorrectly filtered colors for the smallest mipmap levels.
+             * To work-around we fall-through here and use a different filter.
+             */
+        case GL_LINEAR_MIPMAP_NEAREST:
+            ti->mmMode = GR_MIPMAP_NEAREST;
+            ti->minFilt = GR_TEXTUREFILTER_BILINEAR;
+            ti->LODblend = FXFALSE;
+            break;
+        default:
+            break;
+        }
+        ti->reloadImages = GL_TRUE;
+        RevalidateTexture(ctx, tObj);
+        fxMesa->new_state |= TDFX_NEW_TEXTURE;
+        break;
+
+    case GL_TEXTURE_MAG_FILTER:
+        switch (param) {
+        case GL_NEAREST:
+            ti->magFilt = GR_TEXTUREFILTER_POINT_SAMPLED;
+            break;
+        case GL_LINEAR:
+            ti->magFilt = GR_TEXTUREFILTER_BILINEAR;
+            break;
+        default:
+            break;
+        }
+        fxMesa->new_state |= TDFX_NEW_TEXTURE;
+        break;
+
+    case GL_TEXTURE_WRAP_S:
+        switch (param) {
+        case GL_CLAMP_TO_BORDER:
+        case GL_CLAMP_TO_EDGE:
+        case GL_CLAMP:
+            ti->sClamp = GR_TEXTURECLAMP_CLAMP;
+            break;
+        case GL_REPEAT:
+            ti->sClamp = GR_TEXTURECLAMP_WRAP;
+            break;
+        case GL_MIRRORED_REPEAT:
+            ti->sClamp = GR_TEXTURECLAMP_MIRROR_EXT;
+            break;
+        default:
+            break;
+        }
+        fxMesa->new_state |= TDFX_NEW_TEXTURE;
+        break;
+
+    case GL_TEXTURE_WRAP_T:
+        switch (param) {
+        case GL_CLAMP_TO_BORDER:
+        case GL_CLAMP_TO_EDGE:
+        case GL_CLAMP:
+            ti->tClamp = GR_TEXTURECLAMP_CLAMP;
+            break;
+        case GL_REPEAT:
+            ti->tClamp = GR_TEXTURECLAMP_WRAP;
+            break;
+        case GL_MIRRORED_REPEAT:
+            ti->tClamp = GR_TEXTURECLAMP_MIRROR_EXT;
+            break;
+        default:
+            break;
+        }
+        fxMesa->new_state |= TDFX_NEW_TEXTURE;
+        break;
+
+    case GL_TEXTURE_BORDER_COLOR:
+        /* TO DO */
+        break;
+    case GL_TEXTURE_MIN_LOD:
+        /* TO DO */
+        break;
+    case GL_TEXTURE_MAX_LOD:
+        /* TO DO */
+        break;
+    case GL_TEXTURE_BASE_LEVEL:
+        RevalidateTexture(ctx, tObj);
+        break;
+    case GL_TEXTURE_MAX_LEVEL:
+        RevalidateTexture(ctx, tObj);
+        break;
+
+    default:
+        break;
+    }
+}
+
+
+/*
+ * Called via glDeleteTextures to delete a texture object.
+ * Here, we delete the Glide data associated with the texture.
+ */
+static void
+tdfxDeleteTexture(GLcontext * ctx, struct gl_texture_object *tObj)
+{
+    if (ctx && ctx->DriverCtx) {
+        tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+        tdfxTMFreeTexture(fxMesa, tObj);
+        fxMesa->new_state |= TDFX_NEW_TEXTURE;
+        /* Free mipmap images and the texture object itself */
+        _mesa_delete_texture_object(ctx, tObj);
+    }
+}
+
+
+/*
+ * Return true if texture is resident, false otherwise.
+ */
+static GLboolean
+tdfxIsTextureResident(GLcontext *ctx, struct gl_texture_object *tObj)
+{
+    tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+    return (GLboolean) (ti && ti->isInTM);
+}
+
+
+
+/*
+ * Convert a gl_color_table texture palette to Glide's format.
+ */
+static GrTexTable_t
+convertPalette(FxU32 data[256], const struct gl_color_table *table)
+{
+    const GLubyte *tableUB = table->TableUB;
+    GLint width = table->Size;
+    FxU32 r, g, b, a;
+    GLint i;
+
+    switch (table->_BaseFormat) {
+    case GL_INTENSITY:
+        for (i = 0; i < width; i++) {
+            r = tableUB[i];
+            g = tableUB[i];
+            b = tableUB[i];
+            a = tableUB[i];
+            data[i] = (a << 24) | (r << 16) | (g << 8) | b;
+        }
+        return GR_TEXTABLE_PALETTE_6666_EXT;
+    case GL_LUMINANCE:
+        for (i = 0; i < width; i++) {
+            r = tableUB[i];
+            g = tableUB[i];
+            b = tableUB[i];
+            a = 255;
+            data[i] = (a << 24) | (r << 16) | (g << 8) | b;
+        }
+        return GR_TEXTABLE_PALETTE;
+    case GL_ALPHA:
+        for (i = 0; i < width; i++) {
+            r = g = b = 255;
+            a = tableUB[i];
+            data[i] = (a << 24) | (r << 16) | (g << 8) | b;
+        }
+        return GR_TEXTABLE_PALETTE_6666_EXT;
+    case GL_LUMINANCE_ALPHA:
+        for (i = 0; i < width; i++) {
+            r = g = b = tableUB[i * 2 + 0];
+            a = tableUB[i * 2 + 1];
+            data[i] = (a << 24) | (r << 16) | (g << 8) | b;
+        }
+        return GR_TEXTABLE_PALETTE_6666_EXT;
+    case GL_RGB:
+        for (i = 0; i < width; i++) {
+            r = tableUB[i * 3 + 0];
+            g = tableUB[i * 3 + 1];
+            b = tableUB[i * 3 + 2];
+            a = 255;
+            data[i] = (a << 24) | (r << 16) | (g << 8) | b;
+        }
+        return GR_TEXTABLE_PALETTE;
+    case GL_RGBA:
+        for (i = 0; i < width; i++) {
+            r = tableUB[i * 4 + 0];
+            g = tableUB[i * 4 + 1];
+            b = tableUB[i * 4 + 2];
+            a = tableUB[i * 4 + 3];
+            data[i] = (a << 24) | (r << 16) | (g << 8) | b;
+        }
+        return GR_TEXTABLE_PALETTE_6666_EXT;
+    default:
+        /* XXX fixme: how can this happen? */
+        _mesa_error(NULL, GL_INVALID_ENUM, "convertPalette: table->Format == %s",
+                                           _mesa_lookup_enum_by_nr(table->Format));
+        return GR_TEXTABLE_PALETTE;
+    }
+}
+
+
+
+static void
+tdfxUpdateTexturePalette(GLcontext * ctx, struct gl_texture_object *tObj)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+    if (tObj) {
+        /* per-texture palette */
+        tdfxTexInfo *ti;
+        
+        /* This might be a proxy texture. */
+        if (!tObj->Palette.TableUB)
+            return;
+            
+        if (!tObj->DriverData)
+            tObj->DriverData = fxAllocTexObjData(fxMesa);
+        ti = TDFX_TEXTURE_DATA(tObj);
+        assert(ti);
+        ti->paltype = convertPalette(ti->palette.data, &tObj->Palette);
+        /*tdfxTexInvalidate(ctx, tObj);*/
+    }
+    else {
+        /* global texture palette */
+        fxMesa->TexPalette.Type = convertPalette(fxMesa->glbPalette.data, &ctx->Texture.Palette);
+	fxMesa->TexPalette.Data = &(fxMesa->glbPalette.data);
+	fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PALETTE;
+    }
+    fxMesa->new_state |= TDFX_NEW_TEXTURE; /* XXX too heavy-handed */
+}
+
+
+/**********************************************************************/
+/**** NEW TEXTURE IMAGE FUNCTIONS                                  ****/
+/**********************************************************************/
+
+#if 000
+static FxBool TexusFatalError = FXFALSE;
+static FxBool TexusError = FXFALSE;
+
+#define TX_DITHER_NONE                                  0x00000000
+
+static void
+fxTexusError(const char *string, FxBool fatal)
+{
+    _mesa_problem(NULL, string);
+   /*
+    * Just propagate the fatal value up.
+    */
+    TexusError = FXTRUE;
+    TexusFatalError = fatal;
+}
+#endif
+
+
+static gl_format
+tdfxChooseTextureFormat( GLcontext *ctx, GLint internalFormat,
+                           GLenum srcFormat, GLenum srcType )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   const GLboolean allow32bpt = TDFX_IS_NAPALM(fxMesa);
+
+   switch (internalFormat) {
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return MESA_FORMAT_A8;
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return MESA_FORMAT_L8;
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return MESA_FORMAT_AL88;
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return MESA_FORMAT_I8;
+   case GL_R3_G3_B2:
+   case GL_RGB4:
+   case GL_RGB5:
+      return MESA_FORMAT_RGB565;
+   case GL_COMPRESSED_RGB:
+      /* intentional fall-through */
+   case 3:
+   case GL_RGB:
+     if ( srcFormat == GL_RGB && srcType == GL_UNSIGNED_SHORT_5_6_5 ) {
+       return MESA_FORMAT_RGB565;
+     }
+     /* intentional fall through */
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return (allow32bpt) ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+   case GL_RGBA2:
+   case GL_RGBA4:
+      return MESA_FORMAT_ARGB4444;
+   case GL_COMPRESSED_RGBA:
+      /* intentional fall-through */
+   case 4:
+   case GL_RGBA:
+     if ( srcFormat == GL_BGRA ) {
+       if ( srcType == GL_UNSIGNED_INT_8_8_8_8_REV ) {
+         return MESA_FORMAT_ARGB8888;
+       }
+       else if ( srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
+         return MESA_FORMAT_ARGB4444;
+       }
+       else if ( srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
+         return MESA_FORMAT_ARGB1555;
+       }
+     }
+     /* intentional fall through */
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return allow32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+   case GL_RGB5_A1:
+      return MESA_FORMAT_ARGB1555;
+   case GL_COLOR_INDEX:
+   case GL_COLOR_INDEX1_EXT:
+   case GL_COLOR_INDEX2_EXT:
+   case GL_COLOR_INDEX4_EXT:
+   case GL_COLOR_INDEX8_EXT:
+   case GL_COLOR_INDEX12_EXT:
+   case GL_COLOR_INDEX16_EXT:
+      return MESA_FORMAT_CI8;
+   /* GL_EXT_texture_compression_s3tc */
+   /* GL_S3_s3tc */
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+      return MESA_FORMAT_RGB_DXT1;
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      return MESA_FORMAT_RGBA_DXT1;
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+      return MESA_FORMAT_RGBA_DXT3;
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return MESA_FORMAT_RGBA_DXT5;
+   /* GL_3DFX_texture_compression_FXT1 */
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+      return MESA_FORMAT_RGB_FXT1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+      return MESA_FORMAT_RGBA_FXT1;
+   default:
+      _mesa_problem(ctx, "unexpected format in tdfxChooseTextureFormat");
+      return MESA_FORMAT_NONE;
+   }
+}
+
+
+/*
+ * Return the Glide format for the given mesa texture format.
+ */
+static GrTextureFormat_t
+fxGlideFormat(GLint mesaFormat)
+{
+   switch (mesaFormat) {
+   case MESA_FORMAT_I8:
+      return GR_TEXFMT_ALPHA_8;
+   case MESA_FORMAT_A8:
+      return GR_TEXFMT_ALPHA_8;
+   case MESA_FORMAT_L8:
+      return GR_TEXFMT_INTENSITY_8;
+   case MESA_FORMAT_CI8:
+      return GR_TEXFMT_P_8;
+   case MESA_FORMAT_AL88:
+      return GR_TEXFMT_ALPHA_INTENSITY_88;
+   case MESA_FORMAT_RGB565:
+      return GR_TEXFMT_RGB_565;
+   case MESA_FORMAT_ARGB4444:
+      return GR_TEXFMT_ARGB_4444;
+   case MESA_FORMAT_ARGB1555:
+      return GR_TEXFMT_ARGB_1555;
+   case MESA_FORMAT_ARGB8888:
+      return GR_TEXFMT_ARGB_8888;
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+     return GR_TEXFMT_ARGB_CMP_FXT1;
+   case MESA_FORMAT_RGB_DXT1:
+   case MESA_FORMAT_RGBA_DXT1:
+     return GR_TEXFMT_ARGB_CMP_DXT1;
+   case MESA_FORMAT_RGBA_DXT3:
+     return GR_TEXFMT_ARGB_CMP_DXT3;
+   case MESA_FORMAT_RGBA_DXT5:
+     return GR_TEXFMT_ARGB_CMP_DXT5;
+   default:
+      _mesa_problem(NULL, "Unexpected format in fxGlideFormat");
+      return 0;
+   }
+}
+
+
+/* Texel-fetch functions for software texturing and glGetTexImage().
+ * We should have been able to use some "standard" fetch functions (which
+ * may get defined in texutil.c) but we have to account for scaled texture
+ * images on tdfx hardware (the 8:1 aspect ratio limit).
+ * Hence, we need special functions here.
+ */
+extern void
+fxt1_decode_1 (const void *texture, int width,
+               int i, int j, unsigned char *rgba);
+
+static void
+fetch_intensity8(const struct gl_texture_image *texImage,
+		 GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLubyte *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLubyte *) texImage->Data) + j * mml->width + i;
+    rgba[RCOMP] = *texel;
+    rgba[GCOMP] = *texel;
+    rgba[BCOMP] = *texel;
+    rgba[ACOMP] = *texel;
+}
+
+
+static void
+fetch_luminance8(const struct gl_texture_image *texImage,
+		 GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLubyte *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLubyte *) texImage->Data) + j * mml->width + i;
+    rgba[RCOMP] = *texel;
+    rgba[GCOMP] = *texel;
+    rgba[BCOMP] = *texel;
+    rgba[ACOMP] = 255;
+}
+
+
+static void
+fetch_alpha8(const struct gl_texture_image *texImage,
+	     GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLubyte *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLubyte *) texImage->Data) + j * mml->width + i;
+    rgba[RCOMP] = 255;
+    rgba[GCOMP] = 255;
+    rgba[BCOMP] = 255;
+    rgba[ACOMP] = *texel;
+}
+
+
+static void
+fetch_index8(const struct gl_texture_image *texImage,
+	     GLint i, GLint j, GLint k, GLchan * indexOut)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLubyte *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLubyte *) texImage->Data) + j * mml->width + i;
+    *indexOut = *texel;
+}
+
+
+static void
+fetch_luminance8_alpha8(const struct gl_texture_image *texImage,
+			GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLubyte *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLubyte *) texImage->Data) + (j * mml->width + i) * 2;
+    rgba[RCOMP] = texel[0];
+    rgba[GCOMP] = texel[0];
+    rgba[BCOMP] = texel[0];
+    rgba[ACOMP] = texel[1];
+}
+
+
+static void
+fetch_r5g6b5(const struct gl_texture_image *texImage,
+	     GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLushort *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLushort *) texImage->Data) + j * mml->width + i;
+    rgba[RCOMP] = (((*texel) >> 11) & 0x1f) * 255 / 31;
+    rgba[GCOMP] = (((*texel) >> 5) & 0x3f) * 255 / 63;
+    rgba[BCOMP] = (((*texel) >> 0) & 0x1f) * 255 / 31;
+    rgba[ACOMP] = 255;
+}
+
+
+static void
+fetch_r4g4b4a4(const struct gl_texture_image *texImage,
+	       GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLushort *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLushort *) texImage->Data) + j * mml->width + i;
+    rgba[RCOMP] = (((*texel) >> 12) & 0xf) * 255 / 15;
+    rgba[GCOMP] = (((*texel) >> 8) & 0xf) * 255 / 15;
+    rgba[BCOMP] = (((*texel) >> 4) & 0xf) * 255 / 15;
+    rgba[ACOMP] = (((*texel) >> 0) & 0xf) * 255 / 15;
+}
+
+
+static void
+fetch_r5g5b5a1(const struct gl_texture_image *texImage,
+	       GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLushort *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLushort *) texImage->Data) + j * mml->width + i;
+    rgba[RCOMP] = (((*texel) >> 11) & 0x1f) * 255 / 31;
+    rgba[GCOMP] = (((*texel) >> 6) & 0x1f) * 255 / 31;
+    rgba[BCOMP] = (((*texel) >> 1) & 0x1f) * 255 / 31;
+    rgba[ACOMP] = (((*texel) >> 0) & 0x01) * 255;
+}
+
+
+static void
+fetch_a8r8g8b8(const struct gl_texture_image *texImage,
+	       GLint i, GLint j, GLint k, GLchan * rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+    const GLuint *texel;
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    texel = ((GLuint *) texImage->Data) + j * mml->width + i;
+    rgba[RCOMP] = (((*texel) >> 16) & 0xff);
+    rgba[GCOMP] = (((*texel) >>  8) & 0xff);
+    rgba[BCOMP] = (((*texel)      ) & 0xff);
+    rgba[ACOMP] = (((*texel) >> 24) & 0xff);
+}
+
+
+static void
+fetch_rgb_fxt1(const struct gl_texture_image *texImage,
+	       GLint i, GLint j, GLint k, GLchan *rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    fxt1_decode_1(texImage->Data, mml->width, i, j, rgba);
+    rgba[ACOMP] = 255;
+}
+
+
+static void
+fetch_rgba_fxt1(const struct gl_texture_image *texImage,
+		GLint i, GLint j, GLint k, GLchan *rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    fxt1_decode_1(texImage->Data, mml->width, i, j, rgba);
+}
+
+
+static void
+fetch_rgb_dxt1(const struct gl_texture_image *texImage,
+	       GLint i, GLint j, GLint k, GLchan *rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    /* XXX Get fetch func from _mesa_get_texel_fetch_func()
+    _mesa_texformat_rgb_dxt1.FetchTexel2D(texImage, i, j, k, rgba);
+    */
+}
+
+
+static void
+fetch_rgba_dxt1(const struct gl_texture_image *texImage,
+		GLint i, GLint j, GLint k, GLchan *rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    /* XXX Get fetch func from _mesa_get_texel_fetch_func()
+    _mesa_texformat_rgba_dxt1.FetchTexel2D(texImage, i, j, k, rgba);
+    */
+}
+
+
+static void
+fetch_rgba_dxt3(const struct gl_texture_image *texImage,
+		GLint i, GLint j, GLint k, GLchan *rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    /* XXX Get fetch func from _mesa_get_texel_fetch_func()
+    _mesa_texformat_rgba_dxt3.FetchTexel2D(texImage, i, j, k, rgba);
+    */
+}
+
+
+static void
+fetch_rgba_dxt5(const struct gl_texture_image *texImage,
+		GLint i, GLint j, GLint k, GLchan *rgba)
+{
+    const tdfxMipMapLevel *mml = TDFX_TEXIMAGE_DATA(texImage);
+
+    i = i * mml->wScale;
+    j = j * mml->hScale;
+
+    /* XXX Get fetch func from _mesa_get_texel_fetch_func()
+    _mesa_texformat_rgba_dxt5.FetchTexel2D(texImage, i, j, k, rgba);
+    */
+}
+
+
+static FetchTexelFuncC
+fxFetchFunction(GLint mesaFormat)
+{
+   switch (mesaFormat) {
+   case MESA_FORMAT_I8:
+      return &fetch_intensity8;
+   case MESA_FORMAT_A8:
+      return &fetch_alpha8;
+   case MESA_FORMAT_L8:
+      return &fetch_luminance8;
+   case MESA_FORMAT_CI8:
+      return &fetch_index8;
+   case MESA_FORMAT_AL88:
+      return &fetch_luminance8_alpha8;
+   case MESA_FORMAT_RGB565:
+      return &fetch_r5g6b5;
+   case MESA_FORMAT_ARGB4444:
+      return &fetch_r4g4b4a4;
+   case MESA_FORMAT_ARGB1555:
+      return &fetch_r5g5b5a1;
+   case MESA_FORMAT_ARGB8888:
+      return &fetch_a8r8g8b8;
+   case MESA_FORMAT_RGB_FXT1:
+      return &fetch_rgb_fxt1;
+   case MESA_FORMAT_RGBA_FXT1:
+      return &fetch_rgba_fxt1;
+   case MESA_FORMAT_RGB_DXT1:
+      return &fetch_rgb_dxt1;
+   case MESA_FORMAT_RGBA_DXT1:
+      return &fetch_rgba_dxt1;
+   case MESA_FORMAT_RGBA_DXT3:
+      return &fetch_rgba_dxt3;
+   case MESA_FORMAT_RGBA_DXT5:
+      return &fetch_rgba_dxt5;
+   default:
+      _mesa_problem(NULL, "Unexpected format in fxFetchFunction");
+      return NULL;
+   }
+}
+
+
+static GLboolean
+adjust2DRatio (GLcontext *ctx,
+	       GLint xoffset, GLint yoffset,
+	       GLint width, GLint height,
+	       GLenum format, GLenum type, const GLvoid *pixels,
+	       const struct gl_pixelstore_attrib *packing,
+	       tdfxMipMapLevel *mml,
+	       struct gl_texture_image *texImage,
+	       GLint texelBytes,
+	       GLint dstRowStride)
+{
+   const GLint newWidth = width * mml->wScale;
+   const GLint newHeight = height * mml->hScale;
+   GLvoid *tempImage;
+   GLuint dstImageOffsets = 0;
+
+   if (!_mesa_is_format_compressed(texImage->TexFormat)) {
+      GLubyte *destAddr;
+
+      tempImage = MALLOC(width * height * texelBytes);
+      if (!tempImage) {
+         return GL_FALSE;
+      }
+
+      _mesa_texstore(ctx, 2, texImage->_BaseFormat,
+                     texImage->TexFormat, tempImage,
+                     0, 0, 0, /* dstX/Y/Zoffset */
+                     width * texelBytes, /* dstRowStride */
+                     &dstImageOffsets,
+                     width, height, 1,
+                     format, type, pixels, packing);
+
+      /* now rescale */
+      /* compute address of dest subimage within the overal tex image */
+      destAddr = (GLubyte *) texImage->Data
+         + (yoffset * mml->hScale * mml->width
+            + xoffset * mml->wScale) * texelBytes;
+
+      _mesa_rescale_teximage2d(texelBytes,
+                               width,
+                               dstRowStride, /* dst stride */
+                               width, height,
+                               newWidth, newHeight,
+                               tempImage, destAddr);
+   } else {
+      const GLint rawBytes = 4;
+      GLvoid *rawImage = MALLOC(width * height * rawBytes);
+
+      if (!rawImage) {
+         return GL_FALSE;
+      }
+      tempImage = MALLOC(newWidth * newHeight * rawBytes);
+      if (!tempImage) {
+	 FREE(rawImage);
+         return GL_FALSE;
+      }
+      /* unpack image, apply transfer ops and store in rawImage */
+      _mesa_texstore(ctx, 2, GL_RGBA,
+                     MESA_FORMAT_RGBA8888_REV, rawImage,
+                     0, 0, 0, /* dstX/Y/Zoffset */
+                     width * rawBytes, /* dstRowStride */
+                     &dstImageOffsets,
+                     width, height, 1,
+                     format, type, pixels, packing);
+      _mesa_rescale_teximage2d(rawBytes,
+                               width,
+                               newWidth * rawBytes, /* dst stride */
+                               width, height, /* src */
+                               newWidth, newHeight, /* dst */
+                               rawImage /*src*/, tempImage /*dst*/ );
+      _mesa_texstore(ctx, 2, texImage->_BaseFormat,
+                     texImage->TexFormat, texImage->Data,
+                     xoffset * mml->wScale, yoffset * mml->hScale, 0, /* dstX/Y/Zoffset */
+                     dstRowStride,
+                     &dstImageOffsets,
+                     newWidth, newHeight, 1,
+                     GL_RGBA, CHAN_TYPE, tempImage, &ctx->DefaultPacking);
+      FREE(rawImage);
+   }
+
+   FREE(tempImage);
+
+   return GL_TRUE;
+}
+
+
+static void
+tdfxTexImage2D(GLcontext *ctx, GLenum target, GLint level,
+               GLint internalFormat, GLint width, GLint height, GLint border,
+               GLenum format, GLenum type, const GLvoid *pixels,
+               const struct gl_pixelstore_attrib *packing,
+               struct gl_texture_object *texObj,
+               struct gl_texture_image *texImage)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    tdfxTexInfo *ti;
+    tdfxMipMapLevel *mml;
+    GLint texelBytes, dstRowStride;
+    GLuint mesaFormat;
+
+    /*
+    printf("TexImage id=%d int 0x%x  format 0x%x  type 0x%x  %dx%d\n",
+           texObj->Name, texImage->InternalFormat, format, type,
+           texImage->Width, texImage->Height);
+    */
+
+    ti = TDFX_TEXTURE_DATA(texObj);
+    if (!ti) {
+        texObj->DriverData = fxAllocTexObjData(fxMesa);
+        if (!texObj->DriverData) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+            return;
+        }
+        ti = TDFX_TEXTURE_DATA(texObj);
+    }
+    assert(ti);
+
+    mml = TDFX_TEXIMAGE_DATA(texImage);
+    if (!mml) {
+        texImage->DriverData = CALLOC(sizeof(tdfxMipMapLevel));
+        if (!texImage->DriverData) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+            return;
+        }
+        mml = TDFX_TEXIMAGE_DATA(texImage);
+    }
+
+    /* Determine width and height scale factors for texture.
+     * Remember, Glide is limited to 8:1 aspect ratios.
+     */
+    tdfxTexGetInfo(ctx,
+                   texImage->Width, texImage->Height,
+                   NULL,       /* lod level          */
+                   NULL,       /* aspect ratio       */
+                   NULL, NULL, /* sscale, tscale     */
+                   &mml->wScale, &mml->hScale);
+
+    /* rescaled size: */
+    mml->width = width * mml->wScale;
+    mml->height = height * mml->hScale;
+
+#if FX_COMPRESS_S3TC_AS_FXT1_HACK
+    /* [koolsmoky] substitute FXT1 for DXTn and Legacy S3TC */
+    /* [dBorca] we should update texture's attribute, then,
+     * because if the application asks us to decompress, we
+     * have to know the REAL format! Also, DXT3/5 might not
+     * be correct, since it would mess with "compressedSize".
+     * Ditto for GL_RGBA[4]_S3TC, which is always mapped to DXT3.
+     */
+    if (_mesa_is_format_compressed(texImage->TexFormat)) {
+      switch (internalFormat) {
+      case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+      case GL_RGB_S3TC:
+      case GL_RGB4_S3TC:
+        internalFormat = GL_COMPRESSED_RGB_FXT1_3DFX;
+        break;
+      case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      case GL_RGBA_S3TC:
+      case GL_RGBA4_S3TC:
+        internalFormat = GL_COMPRESSED_RGBA_FXT1_3DFX;
+      }
+      texImage->InternalFormat = internalFormat;
+    }
+#endif
+#if FX_TC_NAPALM
+    if (fxMesa->type >= GR_SSTTYPE_Voodoo4) {
+       GLenum texNapalm = 0;
+       if (internalFormat == GL_COMPRESSED_RGB) {
+          texNapalm = GL_COMPRESSED_RGB_FXT1_3DFX;
+       } else if (internalFormat == GL_COMPRESSED_RGBA) {
+          texNapalm = GL_COMPRESSED_RGBA_FXT1_3DFX;
+       }
+       if (texNapalm) {
+          texImage->InternalFormat = internalFormat = texNapalm;
+          _mesa_is_format_compressed(texImage->TexFormat) = GL_TRUE;
+       }
+    }
+#endif
+
+    mesaFormat = texImage->TexFormat;
+    mml->glideFormat = fxGlideFormat(mesaFormat);
+    ti->info.format = mml->glideFormat;
+    texImage->FetchTexelc = fxFetchFunction(mesaFormat);
+    texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+
+    if (_mesa_is_format_compressed(texImage->TexFormat)) {
+       GLuint compressedSize = _mesa_format_image_size(mesaFormat, mml->width,
+                                                       mml->height, 1);
+       dstRowStride = _mesa_format_row_stride(texImage->TexFormat, mml->width);
+       texImage->Data = _mesa_alloc_texmemory(compressedSize);
+    } else {
+       dstRowStride = mml->width * texelBytes;
+       texImage->Data = _mesa_alloc_texmemory(mml->width * mml->height * texelBytes);
+    }
+    if (!texImage->Data) {
+       _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+       return;
+    }
+
+    if (pixels != NULL) {
+       if (mml->wScale != 1 || mml->hScale != 1) {
+	  /* rescale image to overcome 1:8 aspect limitation */
+	  if (!adjust2DRatio(ctx,
+			    0, 0,
+			    width, height,
+			    format, type, pixels,
+			    packing,
+			    mml,
+			    texImage,
+			    texelBytes,
+			    dstRowStride)
+	     ) {
+	     _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+	     return;
+	  }
+       }
+       else {
+          /* no rescaling needed */
+          /* unpack image, apply transfer ops and store in texImage->Data */
+          _mesa_texstore(ctx, 2, texImage->_BaseFormat,
+                         texImage->TexFormat, texImage->Data,
+                         0, 0, 0, /* dstX/Y/Zoffset */
+                         dstRowStride,
+                         texImage->ImageOffsets,
+                         width, height, 1,
+                         format, type, pixels, packing);
+       }
+    }
+
+    RevalidateTexture(ctx, texObj);
+
+    ti->reloadImages = GL_TRUE;
+    fxMesa->new_state |= TDFX_NEW_TEXTURE;
+}
+
+
+static void
+tdfxTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
+                    GLint xoffset, GLint yoffset,
+                    GLsizei width, GLsizei height,
+                    GLenum format, GLenum type,
+                    const GLvoid *pixels,
+                    const struct gl_pixelstore_attrib *packing,
+                    struct gl_texture_object *texObj,
+                    struct gl_texture_image *texImage )
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    tdfxTexInfo *ti;
+    tdfxMipMapLevel *mml;
+    GLint texelBytes, dstRowStride;
+
+    if (!texObj->DriverData) {
+        _mesa_problem(ctx, "problem in fxDDTexSubImage2D");
+        return;
+    }
+
+    ti = TDFX_TEXTURE_DATA(texObj);
+    assert(ti);
+    mml = TDFX_TEXIMAGE_DATA(texImage);
+    assert(mml);
+
+    assert(texImage->Data);	/* must have an existing texture image! */
+    assert(texImage->_BaseFormat);
+
+    texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+    if (_mesa_is_format_compressed(texImage->TexFormat)) {
+       dstRowStride = _mesa_format_row_stride(texImage->TexFormat, mml->width);
+    } else {
+       dstRowStride = mml->width * texelBytes;
+    }
+
+    if (mml->wScale != 1 || mml->hScale != 1) {
+	/* need to rescale subimage to match mipmap level's rescale factors */
+	if (!adjust2DRatio(ctx,
+			    xoffset, yoffset,
+			    width, height,
+			    format, type, pixels,
+			    packing,
+			    mml,
+			    texImage,
+			    texelBytes,
+			    dstRowStride)
+           ) {
+           _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D");
+           return;
+	}
+    }
+    else {
+        /* no rescaling needed */
+       _mesa_texstore(ctx, 2, texImage->_BaseFormat,
+                      texImage->TexFormat, texImage->Data,
+                      xoffset, yoffset, 0,
+                      dstRowStride,
+                      texImage->ImageOffsets,
+                      width, height, 1,
+                      format, type, pixels, packing);
+    }
+
+    ti->reloadImages = GL_TRUE; /* signal the image needs to be reloaded */
+    fxMesa->new_state |= TDFX_NEW_TEXTURE;  /* XXX this might be a bit much */
+}
+
+
+static void
+tdfxTexImage1D(GLcontext *ctx, GLenum target, GLint level,
+               GLint internalFormat, GLint width, GLint border,
+               GLenum format, GLenum type, const GLvoid *pixels,
+               const struct gl_pixelstore_attrib *packing,
+               struct gl_texture_object *texObj,
+               struct gl_texture_image *texImage)
+{
+ tdfxTexImage2D(ctx, target, level,
+                internalFormat, width, 1, border,
+                format, type, pixels,
+                packing,
+                texObj,
+                texImage);
+}
+
+static void
+tdfxTexSubImage1D(GLcontext *ctx, GLenum target, GLint level,
+                    GLint xoffset,
+                    GLsizei width,
+                    GLenum format, GLenum type,
+                    const GLvoid *pixels,
+                    const struct gl_pixelstore_attrib *packing,
+                    struct gl_texture_object *texObj,
+                    struct gl_texture_image *texImage )
+{
+ tdfxTexSubImage2D(ctx, target, level,
+                    xoffset, 0,
+                    width, 1,
+                    format, type,
+                    pixels,
+                    packing,
+                    texObj,
+                    texImage);
+}
+
+/**********************************************************************/
+/**** COMPRESSED TEXTURE IMAGE FUNCTIONS                           ****/
+/**********************************************************************/
+
+static void
+tdfxCompressedTexImage2D (GLcontext *ctx, GLenum target,
+                          GLint level, GLint internalFormat,
+                          GLsizei width, GLsizei height, GLint border,
+                          GLsizei imageSize, const GLvoid *data,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    tdfxTexInfo *ti;
+    tdfxMipMapLevel *mml;
+    gl_format mesaFormat;
+    GLuint compressedSize = 0;
+
+    if (TDFX_DEBUG & DEBUG_VERBOSE_DRI) {
+        fprintf(stderr, "tdfxCompressedTexImage2D: id=%d int 0x%x  %dx%d\n",
+                        texObj->Name, internalFormat,
+                        width, height);
+    }
+
+    if ((target != GL_TEXTURE_1D && target != GL_TEXTURE_2D) || texImage->Border > 0) {
+       _mesa_problem(NULL, "tdfx: unsupported texture in tdfxCompressedTexImg()\n");
+       return;
+    }
+
+    ti = TDFX_TEXTURE_DATA(texObj);
+    if (!ti) {
+        texObj->DriverData = fxAllocTexObjData(fxMesa);
+        if (!texObj->DriverData) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+            return;
+        }
+        ti = TDFX_TEXTURE_DATA(texObj);
+    }
+    assert(ti);
+
+    mml = TDFX_TEXIMAGE_DATA(texImage);
+    if (!mml) {
+        texImage->DriverData = CALLOC(sizeof(tdfxMipMapLevel));
+        if (!texImage->DriverData) {
+            _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+            return;
+        }
+        mml = TDFX_TEXIMAGE_DATA(texImage);
+    }
+
+    tdfxTexGetInfo(ctx, width, height, NULL, NULL, NULL, NULL,
+		&mml->wScale, &mml->hScale);
+
+    mml->width = width * mml->wScale;
+    mml->height = height * mml->hScale;
+
+
+    /* Determine the appropriate Glide texel format,
+     * given the user's internal texture format hint.
+     */
+    mesaFormat = texImage->TexFormat;
+    mml->glideFormat = fxGlideFormat(mesaFormat);
+    ti->info.format = mml->glideFormat;
+    texImage->FetchTexelc = fxFetchFunction(mesaFormat);
+
+    /* allocate new storage for texture image, if needed */
+    if (!texImage->Data) {
+       compressedSize = _mesa_format_image_size(mesaFormat, mml->width,
+                                                mml->height, 1);
+       texImage->Data = _mesa_alloc_texmemory(compressedSize);
+       if (!texImage->Data) {
+          _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D");
+          return;
+       }
+    }
+
+    /* save the texture data */
+    if (mml->wScale != 1 || mml->hScale != 1) {
+       /* [dBorca] Hack alert:
+        * now we're screwed. We can't decompress,
+        * unless we do it in HW (via textureBuffer).
+        * We still have some chances:
+        * 1) we got FXT1 textures - we CAN decompress, rescale for
+        *    aspectratio, then compress back.
+        * 2) there is a chance that MIN("s", "t") won't be overflowed.
+        *    Thus, we don't care about textureclamp and we could lower
+        *    MIN("uscale", "vscale") below 32. We still have to have
+        *    our data aligned inside a 8:1 rectangle.
+        * 3) just in case if MIN("s", "t") gets overflowed with GL_REPEAT,
+        *    we replicate the data over the padded area.
+        * For now, we take 2) + 3) but texelfetchers will be wrong!
+        */
+       const GLuint mesaFormat = texImage->TexFormat;
+       GLuint srcRowStride = _mesa_format_row_stride(mesaFormat, width);
+ 
+       GLuint destRowStride = _mesa_format_row_stride(mesaFormat,
+                                                   mml->width);
+ 
+       _mesa_upscale_teximage2d(srcRowStride, (height+3) / 4,
+                                destRowStride, (mml->height+3) / 4,
+                                1, data, srcRowStride,
+                                texImage->Data);
+       ti->padded = GL_TRUE;
+    } else {
+       memcpy(texImage->Data, data, compressedSize);
+    }
+
+    RevalidateTexture(ctx, texObj);
+
+    ti->reloadImages = GL_TRUE;
+    fxMesa->new_state |= TDFX_NEW_TEXTURE;
+}
+
+
+static void
+tdfxCompressedTexSubImage2D( GLcontext *ctx, GLenum target,
+                             GLint level, GLint xoffset,
+                             GLint yoffset, GLsizei width,
+                             GLint height, GLenum format,
+                             GLsizei imageSize, const GLvoid *data,
+                             struct gl_texture_object *texObj,
+                             struct gl_texture_image *texImage )
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    tdfxTexInfo *ti;
+    tdfxMipMapLevel *mml;
+    GLint destRowStride, srcRowStride;
+    GLint i, rows;
+    GLubyte *dest;
+    const GLuint mesaFormat = texImage->TexFormat;
+
+    if (TDFX_DEBUG & DEBUG_VERBOSE_DRI) {
+        fprintf(stderr, "tdfxCompressedTexSubImage2D: id=%d\n", texObj->Name);
+    }
+
+    ti = TDFX_TEXTURE_DATA(texObj);
+    assert(ti);
+    mml = TDFX_TEXIMAGE_DATA(texImage);
+    assert(mml);
+
+    srcRowStride = _mesa_format_row_stride(mesaFormat, width);
+
+    destRowStride = _mesa_format_row_stride(mesaFormat, mml->width);
+    dest = _mesa_compressed_image_address(xoffset, yoffset, 0,
+                                          mesaFormat,
+                                          mml->width,
+                               (GLubyte*) texImage->Data);
+
+    rows = height / 4; /* [dBorca] hardcoded 4, but works for FXT1/DXTC */
+
+    for (i = 0; i < rows; i++) {
+       memcpy(dest, data, srcRowStride);
+       dest += destRowStride;
+       data = (GLvoid *)((intptr_t)data + (intptr_t)srcRowStride);
+    }
+
+    /* [dBorca] Hack alert:
+     * see fxDDCompressedTexImage2D for caveats
+     */
+    if (mml->wScale != 1 || mml->hScale != 1) {
+       srcRowStride = _mesa_format_row_stride(mesaFormat, texImage->Width);
+ 
+       destRowStride = _mesa_format_row_stride(mesaFormat, mml->width);
+       _mesa_upscale_teximage2d(srcRowStride, texImage->Height / 4,
+                                destRowStride, mml->height / 4,
+                                1, texImage->Data, destRowStride,
+                                texImage->Data);
+    }
+
+    RevalidateTexture(ctx, texObj);
+
+    ti->reloadImages = GL_TRUE;
+    fxMesa->new_state |= TDFX_NEW_TEXTURE;
+}
+
+
+#if	0
+static void
+PrintTexture(int w, int h, int c, const GLubyte * data)
+{
+    int i, j;
+    for (i = 0; i < h; i++) {
+        for (j = 0; j < w; j++) {
+            if (c == 2)
+                printf("%02x %02x  ", data[0], data[1]);
+            else if (c == 3)
+                printf("%02x %02x %02x  ", data[0], data[1], data[2]);
+            data += c;
+        }
+        printf("\n");
+    }
+}
+#endif
+
+
+GLboolean
+tdfxTestProxyTexImage(GLcontext *ctx, GLenum target,
+                        GLint level, GLint internalFormat,
+                        GLenum format, GLenum type,
+                        GLint width, GLint height,
+                        GLint depth, GLint border)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+
+    switch (target) {
+    case GL_PROXY_TEXTURE_1D:
+	/*JJJ wrong*/
+    case GL_PROXY_TEXTURE_2D:
+        {
+            struct gl_texture_object *tObj;
+            tdfxTexInfo *ti;
+            int memNeeded;
+
+            tObj = ctx->Texture.ProxyTex[TEXTURE_2D_INDEX];
+            if (!tObj->DriverData)
+                tObj->DriverData = fxAllocTexObjData(fxMesa);
+            ti = TDFX_TEXTURE_DATA(tObj);
+            assert(ti);
+
+            /* assign the parameters to test against */
+            tObj->Image[0][level]->Width = width;
+            tObj->Image[0][level]->Height = height;
+            tObj->Image[0][level]->Border = border;
+#if 0
+            tObj->Image[0][level]->InternalFormat = internalFormat;
+#endif
+            if (level == 0) {
+               /* don't use mipmap levels > 0 */
+               tObj->MinFilter = tObj->MagFilter = GL_NEAREST;
+            }
+            else {
+               /* test with all mipmap levels */
+               tObj->MinFilter = GL_LINEAR_MIPMAP_LINEAR;
+               tObj->MagFilter = GL_NEAREST;
+            }
+            RevalidateTexture(ctx, tObj);
+
+            /*
+            printf("small lodlog2 0x%x\n", ti->info.smallLodLog2);
+            printf("large lodlog2 0x%x\n", ti->info.largeLodLog2);
+            printf("aspect ratio 0x%x\n", ti->info.aspectRatioLog2);
+            printf("glide format 0x%x\n", ti->info.format);
+            printf("data %p\n", ti->info.data);
+            printf("lodblend %d\n", (int) ti->LODblend);
+            */
+
+            /* determine where texture will reside */
+            if (ti->LODblend && !shared->umaTexMemory) {
+                /* XXX GR_MIPMAPLEVELMASK_BOTH might not be right, but works */
+                memNeeded = fxMesa->Glide.grTexTextureMemRequired(
+                                        GR_MIPMAPLEVELMASK_BOTH, &(ti->info));
+            }
+            else {
+                /* XXX GR_MIPMAPLEVELMASK_BOTH might not be right, but works */
+                memNeeded = fxMesa->Glide.grTexTextureMemRequired(
+                                        GR_MIPMAPLEVELMASK_BOTH, &(ti->info));
+            }
+            /*
+            printf("Proxy test %d > %d\n", memNeeded, shared->totalTexMem[0]);
+            */
+            if (memNeeded > shared->totalTexMem[0])
+                return GL_FALSE;
+            else
+                return GL_TRUE;
+        }
+    case GL_PROXY_TEXTURE_3D:
+        return GL_TRUE;  /* software rendering */
+    default:
+        return GL_TRUE;  /* never happens, silence compiler */
+    }
+}
+
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Note: we could use containment here to 'derive' the driver-specific
+ * texture object from the core mesa gl_texture_object.  Not done at this time.
+ */
+static struct gl_texture_object *
+tdfxNewTextureObject( GLcontext *ctx, GLuint name, GLenum target )
+{
+   struct gl_texture_object *obj;
+   obj = _mesa_new_texture_object(ctx, name, target);
+   return obj;
+}
+
+
+void tdfxInitTextureFuncs( struct dd_function_table *functions )
+{
+   functions->BindTexture		= tdfxBindTexture;
+   functions->NewTextureObject		= tdfxNewTextureObject;
+   functions->DeleteTexture		= tdfxDeleteTexture;
+   functions->TexEnv			= tdfxTexEnv;
+   functions->TexParameter		= tdfxTexParameter;
+   functions->ChooseTextureFormat       = tdfxChooseTextureFormat;
+   functions->TexImage1D		= tdfxTexImage1D;
+   functions->TexSubImage1D		= tdfxTexSubImage1D;
+   functions->TexImage2D		= tdfxTexImage2D;
+   functions->TexSubImage2D		= tdfxTexSubImage2D;
+   functions->IsTextureResident		= tdfxIsTextureResident;
+   functions->CompressedTexImage2D	= tdfxCompressedTexImage2D;
+   functions->CompressedTexSubImage2D	= tdfxCompressedTexSubImage2D;
+   functions->UpdateTexturePalette      = tdfxUpdateTexturePalette;
+   functions->GenerateMipmap            = tdfxGenerateMipmap;
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_tex.h b/src/mesa/drivers/dri/tdfx/tdfx_tex.h
new file mode 100644
index 0000000000..a445935a01
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_tex.h
@@ -0,0 +1,101 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#ifndef _TDFX_TEX_H_
+#define _TDFX_TEX_H_
+
+
+#define tdfxDDIsCompressedFormatMacro(internalFormat) \
+    (((internalFormat) == GL_COMPRESSED_RGB_FXT1_3DFX) || \
+     ((internalFormat) == GL_COMPRESSED_RGBA_FXT1_3DFX))
+#define tdfxDDIsCompressedGlideFormatMacro(internalFormat) \
+    ((internalFormat) == GR_TEXFMT_ARGB_CMP_FXT1)
+
+
+
+extern void
+tdfxTexValidate(GLcontext * ctx, struct gl_texture_object *tObj);
+
+
+#if 000 /* DEAD? */
+extern void
+fxDDTexUseGlobalPalette(GLcontext * ctx, GLboolean state);
+#endif
+
+extern GLboolean
+tdfxTestProxyTexImage(GLcontext *ctx, GLenum target,
+                        GLint level, GLint internalFormat,
+                        GLenum format, GLenum type,
+                        GLint width, GLint height,
+                        GLint depth, GLint border);
+
+extern GLvoid *
+tdfxDDGetTexImage(GLcontext * ctx, GLenum target, GLint level,
+                  const struct gl_texture_object *texObj,
+                  GLenum * formatOut, GLenum * typeOut,
+                  GLboolean * freeImageOut);
+
+extern void
+tdfxDDGetCompressedTexImage( GLcontext *ctx, GLenum target,
+                             GLint lod, void *image,
+                             const struct gl_texture_object *texObj,
+                             struct gl_texture_image *texImage );
+
+extern GLint
+tdfxSpecificCompressedTexFormat(GLcontext *ctx,
+                                  GLint internalFormat,
+                                  GLint numDimensions);
+
+extern GLint
+tdfxBaseCompressedTexFormat(GLcontext *ctx,
+                              GLint internalFormat);
+
+extern GLboolean
+tdfxDDIsCompressedFormat(GLcontext *ctx, GLint internalFormat);
+
+extern GLsizei
+tdfxDDCompressedImageSize(GLcontext *ctx,
+                          GLenum intFormat,
+                          GLuint numDimensions,
+                          GLuint width,
+                          GLuint height,
+                          GLuint depth);
+
+
+extern void
+tdfxInitTextureFuncs( struct dd_function_table *functions );
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_texman.c b/src/mesa/drivers/dri/tdfx/tdfx_texman.c
new file mode 100644
index 0000000000..726cc58a10
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_texman.c
@@ -0,0 +1,993 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#include "tdfx_context.h"
+#include "tdfx_texman.h"
+#include "main/texobj.h"
+#include "main/hash.h"
+
+
+#define BAD_ADDRESS ((FxU32) -1)
+
+
+#if 0 /* DEBUG use */
+/*
+ * Verify the consistancy of the texture memory manager.
+ * This involves:
+ *    Traversing all texture objects and computing total memory used.
+ *    Traverse the free block list and computing total memory free.
+ *    Compare the total free and total used amounts to the total memory size.
+ *    Make various assertions about the results.
+ */
+static void
+VerifyFreeList(tdfxContextPtr fxMesa, FxU32 tmu)
+{
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+    tdfxMemRange *block;
+    int prevStart = -1, prevEnd = -1;
+    int totalFree = 0;
+    int numObj = 0, numRes = 0;
+    int totalUsed = 0;
+
+    for (block = shared->tmFree[tmu]; block; block = block->next) {
+       assert( block->endAddr > 0 );
+       assert( block->startAddr <= shared->totalTexMem[tmu] );
+       assert( block->endAddr <= shared->totalTexMem[tmu] );
+       assert( (int) block->startAddr > prevStart );
+       assert( (int) block->startAddr >= prevEnd );
+       prevStart = (int) block->startAddr;
+       prevEnd = (int) block->endAddr;
+       totalFree += (block->endAddr - block->startAddr);
+    }
+    assert(totalFree == shared->freeTexMem[tmu]);
+
+    {
+       struct _mesa_HashTable *textures = fxMesa->glCtx->Shared->TexObjects;
+       GLuint id;
+       for (id = _mesa_HashFirstEntry(textures);
+            id;
+            id = _mesa_HashNextEntry(textures, id)) {
+          struct gl_texture_object *tObj
+             = _mesa_lookup_texture(fxMesa->glCtx, id);
+          tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+          if (ti) {
+             if (ti->isInTM) {
+                numRes++;
+                assert(ti->tm[0]);
+                if (ti->tm[tmu])
+                   totalUsed += (ti->tm[tmu]->endAddr - ti->tm[tmu]->startAddr);
+             }
+             else {
+                assert(!ti->tm[0]);
+             }
+          }
+       }
+    }
+
+    printf("totalFree: %d  totalUsed: %d  totalMem: %d #objs=%d  #res=%d\n",
+           shared->freeTexMem[tmu], totalUsed, shared->totalTexMem[tmu],
+           numObj, numRes);
+
+    assert(totalUsed + totalFree == shared->totalTexMem[tmu]);
+}
+
+
+static void
+dump_texmem(tdfxContextPtr fxMesa)
+{
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    struct _mesa_HashTable *textures = mesaShared->TexObjects;
+    struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+    tdfxMemRange *r;
+    FxU32 prev;
+    GLuint id;
+
+    printf("DUMP Objects:\n");
+    for (id = _mesa_HashFirstEntry(textures);
+         id;
+         id = _mesa_HashNextEntry(textures, id)) {
+        struct gl_texture_object *obj
+           = _mesa_lookup_texture(fxMesa->glCtx, id);
+        tdfxTexInfo *info = TDFX_TEXTURE_DATA(obj);
+
+        if (info && info->isInTM) {
+        printf("Obj %8p: %4d  info = %p\n", obj, obj->Name, info);
+
+           printf("  isInTM=%d  whichTMU=%d  lastTimeUsed=%d\n",
+                  info->isInTM, info->whichTMU, info->lastTimeUsed);
+           printf("    tm[0] = %p", info->tm[0]);
+           assert(info->tm[0]);
+           if (info->tm[0]) {
+              printf("  tm startAddr = %d  endAddr = %d",
+                     info->tm[0]->startAddr,
+                     info->tm[0]->endAddr);
+           }
+           printf("\n");
+           printf("    tm[1] = %p", info->tm[1]);
+           if (info->tm[1]) {
+              printf("  tm startAddr = %d  endAddr = %d",
+                     info->tm[1]->startAddr,
+                     info->tm[1]->endAddr);
+           }
+           printf("\n");
+        }
+    }
+
+    VerifyFreeList(fxMesa, 0);
+    VerifyFreeList(fxMesa, 1);
+
+    printf("Free memory unit 0:  %d bytes\n", shared->freeTexMem[0]);
+    prev = 0;
+    for (r = shared->tmFree[0]; r; r = r->next) {
+       printf("%8p:  start %8d  end %8d  size %8d  gap %8d\n", r, r->startAddr, r->endAddr, r->endAddr - r->startAddr, r->startAddr - prev);
+       prev = r->endAddr;
+    }
+
+    printf("Free memory unit 1:  %d bytes\n", shared->freeTexMem[1]);
+    prev = 0;
+    for (r = shared->tmFree[1]; r; r = r->next) {
+       printf("%8p:  start %8d  end %8d  size %8d  gap %8d\n", r, r->startAddr, r->endAddr, r->endAddr - r->startAddr, r->startAddr - prev);
+       prev = r->endAddr;
+    }
+
+}
+#endif
+
+
+
+#ifdef TEXSANITY
+static void
+fubar(void)
+{
+}
+
+/*
+ * Sanity Check
+ */
+static void
+sanity(tdfxContextPtr fxMesa)
+{
+    tdfxMemRange *tmp, *prev, *pos;
+
+    prev = 0;
+    tmp = fxMesa->tmFree[0];
+    while (tmp) {
+        if (!tmp->startAddr && !tmp->endAddr) {
+            fprintf(stderr, "Textures fubar\n");
+            fubar();
+        }
+        if (tmp->startAddr >= tmp->endAddr) {
+            fprintf(stderr, "Node fubar\n");
+            fubar();
+        }
+        if (prev && (prev->startAddr >= tmp->startAddr ||
+                     prev->endAddr > tmp->startAddr)) {
+            fprintf(stderr, "Sorting fubar\n");
+            fubar();
+        }
+        prev = tmp;
+        tmp = tmp->next;
+    }
+    prev = 0;
+    tmp = fxMesa->tmFree[1];
+    while (tmp) {
+        if (!tmp->startAddr && !tmp->endAddr) {
+            fprintf(stderr, "Textures fubar\n");
+            fubar();
+        }
+        if (tmp->startAddr >= tmp->endAddr) {
+            fprintf(stderr, "Node fubar\n");
+            fubar();
+        }
+        if (prev && (prev->startAddr >= tmp->startAddr ||
+                     prev->endAddr > tmp->startAddr)) {
+            fprintf(stderr, "Sorting fubar\n");
+            fubar();
+        }
+        prev = tmp;
+        tmp = tmp->next;
+    }
+}
+#endif
+
+
+
+
+
+/*
+ * Allocate and initialize a new MemRange struct.
+ * Try to allocate it from the pool of free MemRange nodes rather than malloc.
+ */
+static tdfxMemRange *
+NewRangeNode(tdfxContextPtr fxMesa, FxU32 start, FxU32 end)
+{
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+    tdfxMemRange *result;
+
+    _glthread_LOCK_MUTEX(mesaShared->Mutex);
+    if (shared && shared->tmPool) {
+        result = shared->tmPool;
+        shared->tmPool = shared->tmPool->next;
+    }
+    else {
+        result = MALLOC(sizeof(tdfxMemRange));
+
+    }
+    _glthread_UNLOCK_MUTEX(mesaShared->Mutex);
+
+    if (!result) {
+        /*fprintf(stderr, "fxDriver: out of memory!\n");*/
+        return NULL;
+    }
+
+    result->startAddr = start;
+    result->endAddr = end;
+    result->next = NULL;
+
+    return result;
+}
+
+
+/*
+ * Initialize texture memory.
+ * We take care of one or both TMU's here.
+ */
+void
+tdfxTMInit(tdfxContextPtr fxMesa)
+{
+    if (!fxMesa->glCtx->Shared->DriverData) {
+        const char *extensions;
+        struct tdfxSharedState *shared = CALLOC_STRUCT(tdfxSharedState);
+        if (!shared)
+           return;
+
+        LOCK_HARDWARE(fxMesa);
+        extensions = fxMesa->Glide.grGetString(GR_EXTENSION);
+        UNLOCK_HARDWARE(fxMesa);
+        if (strstr(extensions, "TEXUMA")) {
+            FxU32 start, end;
+            shared->umaTexMemory = GL_TRUE;
+            LOCK_HARDWARE(fxMesa);
+            fxMesa->Glide.grEnable(GR_TEXTURE_UMA_EXT);
+            start = fxMesa->Glide.grTexMinAddress(0);
+            end = fxMesa->Glide.grTexMaxAddress(0);
+            UNLOCK_HARDWARE(fxMesa);
+            shared->totalTexMem[0] = end - start;
+            shared->totalTexMem[1] = 0;
+            shared->freeTexMem[0] = end - start;
+            shared->freeTexMem[1] = 0;
+            shared->tmFree[0] = NewRangeNode(fxMesa, start, end);
+            shared->tmFree[1] = NULL;
+            /*printf("UMA tex memory: %d\n", (int) (end - start));*/
+        }
+        else {
+            const int numTMUs = fxMesa->haveTwoTMUs ? 2 : 1;
+            int tmu;
+            shared->umaTexMemory = GL_FALSE;
+            LOCK_HARDWARE(fxMesa);
+            for (tmu = 0; tmu < numTMUs; tmu++) {
+                FxU32 start = fxMesa->Glide.grTexMinAddress(tmu);
+                FxU32 end = fxMesa->Glide.grTexMaxAddress(tmu);
+                shared->totalTexMem[tmu] = end - start;
+                shared->freeTexMem[tmu] = end - start;
+                shared->tmFree[tmu] = NewRangeNode(fxMesa, start, end);
+                /*printf("Split tex memory: %d\n", (int) (end - start));*/
+            }
+            UNLOCK_HARDWARE(fxMesa);
+        }
+
+        shared->tmPool = NULL;
+        fxMesa->glCtx->Shared->DriverData = shared;
+        /*printf("Texture memory init UMA: %d\n", shared->umaTexMemory);*/
+    }
+}
+
+
+/*
+ * Clean-up texture memory before destroying context.
+ */
+void
+tdfxTMClose(tdfxContextPtr fxMesa)
+{
+    if (fxMesa->glCtx->Shared->RefCount == 1 && fxMesa->driDrawable) {
+        /* refcount will soon go to zero, free our 3dfx stuff */
+        struct tdfxSharedState *shared = (struct tdfxSharedState *) fxMesa->glCtx->Shared->DriverData;
+
+        const int numTMUs = fxMesa->haveTwoTMUs ? 2 : 1;
+        int tmu;
+        tdfxMemRange *tmp, *next;
+
+        /* Deallocate the pool of free tdfxMemRange nodes */
+        tmp = shared->tmPool;
+        while (tmp) {
+            next = tmp->next;
+            FREE(tmp);
+            tmp = next;
+        }
+
+        /* Delete the texture memory block tdfxMemRange nodes */
+        for (tmu = 0; tmu < numTMUs; tmu++) {
+            tmp = shared->tmFree[tmu];
+            while (tmp) {
+                next = tmp->next;
+                FREE(tmp);
+                tmp = next;
+            }
+        }
+
+        FREE(shared);
+        fxMesa->glCtx->Shared->DriverData = NULL;
+    }
+}
+
+
+
+/*
+ * Delete a tdfxMemRange struct.
+ * We keep a linked list of free/available tdfxMemRange structs to
+ * avoid extra malloc/free calls.
+ */
+#if 0
+static void
+DeleteRangeNode_NoLock(struct TdfxSharedState *shared, tdfxMemRange *range)
+{
+    /* insert at head of list */
+    range->next = shared->tmPool;
+    shared->tmPool = range;
+}
+#endif
+
+#define DELETE_RANGE_NODE(shared, range) \
+    (range)->next = (shared)->tmPool;    \
+    (shared)->tmPool = (range)
+
+
+
+/*
+ * When we've run out of texture memory we have to throw out an
+ * existing texture to make room for the new one.  This function
+ * determins the texture to throw out.
+ */
+static struct gl_texture_object *
+FindOldestObject(tdfxContextPtr fxMesa, FxU32 tmu)
+{
+    const GLuint bindnumber = fxMesa->texBindNumber;
+    struct gl_texture_object *oldestObj, *lowestPriorityObj;
+    GLfloat lowestPriority;
+    GLuint oldestAge;
+    GLuint id;
+    struct _mesa_HashTable *textures = fxMesa->glCtx->Shared->TexObjects;
+
+    oldestObj = NULL;
+    oldestAge = 0;
+
+    lowestPriority = 1.0F;
+    lowestPriorityObj = NULL;
+
+    for (id = _mesa_HashFirstEntry(textures);
+         id;
+         id = _mesa_HashNextEntry(textures, id)) {
+        struct gl_texture_object *obj
+           = _mesa_lookup_texture(fxMesa->glCtx, id);
+        tdfxTexInfo *info = TDFX_TEXTURE_DATA(obj);
+
+        if (info && info->isInTM &&
+            ((info->whichTMU == tmu) || (info->whichTMU == TDFX_TMU_BOTH) ||
+             (info->whichTMU == TDFX_TMU_SPLIT))) {
+            GLuint age, lasttime;
+
+            assert(info->tm[0]);
+            lasttime = info->lastTimeUsed;
+
+            if (lasttime > bindnumber)
+                age = bindnumber + (UINT_MAX - lasttime + 1); /* TO DO: check wrap around */
+            else
+                age = bindnumber - lasttime;
+
+            if (age >= oldestAge) {
+                oldestAge = age;
+                oldestObj = obj;
+            }
+
+            /* examine priority */
+            if (obj->Priority < lowestPriority) {
+                lowestPriority = obj->Priority;
+                lowestPriorityObj = obj;
+            }
+        }
+    }
+
+    if (lowestPriority < 1.0) {
+        ASSERT(lowestPriorityObj);
+        /*
+        printf("discard %d pri=%f\n", lowestPriorityObj->Name, lowestPriority);
+        */
+        return lowestPriorityObj;
+    }
+    else {
+        /*
+        printf("discard %d age=%d\n", oldestObj->Name, oldestAge);
+        */
+        return oldestObj;
+    }
+}
+
+
+#if 0
+static void
+FlushTexMemory(tdfxContextPtr fxMesa)
+{
+    struct _mesa_HashTable *textures = fxMesa->glCtx->Shared->TexObjects;
+    GLuint id;
+
+    for (id = _mesa_HashFirstEntry(textures);
+         id;
+         id = _mesa_HashNextEntry(textures, id)) {
+       struct gl_texture_object *obj
+          = _mesa_lookup_texture(fxMesa->glCtx, id);
+       if (obj->RefCount < 2) {
+          /* don't flush currently bound textures */
+          tdfxTMMoveOutTM_NoLock(fxMesa, obj);
+       }
+    }
+}
+#endif
+
+
+/*
+ * Find the address (offset?) at which we can store a new texture.
+ * <tmu> is the texture unit.
+ * <size> is the texture size in bytes.
+ */
+static FxU32
+FindStartAddr(tdfxContextPtr fxMesa, FxU32 tmu, FxU32 size)
+{
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+    tdfxMemRange *prev, *block;
+    FxU32 result;
+#if 0
+    int discardedCount = 0;
+#define MAX_DISCARDS 10
+#endif
+
+    if (shared->umaTexMemory) {
+        assert(tmu == TDFX_TMU0);
+    }
+
+    _glthread_LOCK_MUTEX(mesaShared->Mutex);
+    while (1) {
+        prev = NULL;
+        block = shared->tmFree[tmu];
+        while (block) {
+            if (block->endAddr - block->startAddr >= size) {
+                /* The texture will fit here */
+                result = block->startAddr;
+                block->startAddr += size;
+                if (block->startAddr == block->endAddr) {
+                    /* Remove this node since it's empty */
+                    if (prev) {
+                        prev->next = block->next;
+                    }
+                    else {
+                        shared->tmFree[tmu] = block->next;
+                    }
+                    DELETE_RANGE_NODE(shared, block);
+                }
+                shared->freeTexMem[tmu] -= size;
+                _glthread_UNLOCK_MUTEX(mesaShared->Mutex);
+                return result;
+            }
+            prev = block;
+            block = block->next;
+        }
+        /* We failed to find a block large enough to accomodate <size> bytes.
+         * Find the oldest texObject and free it.
+         */
+#if 0
+        discardedCount++;
+        if (discardedCount > MAX_DISCARDS + 1) {
+            _mesa_problem(NULL, "%s: extreme texmem fragmentation", __FUNCTION__);
+            _glthread_UNLOCK_MUTEX(mesaShared->Mutex);
+            return BAD_ADDRESS;
+        }
+        else if (discardedCount > MAX_DISCARDS) {
+            /* texture memory is probably really fragmented, flush it */
+            FlushTexMemory(fxMesa);
+        }
+        else
+#endif
+        {
+            struct gl_texture_object *obj = FindOldestObject(fxMesa, tmu);
+            if (obj) {
+                tdfxTMMoveOutTM_NoLock(fxMesa, obj);
+                fxMesa->stats.texSwaps++;
+            }
+            else {
+                _mesa_problem(NULL, "%s: extreme texmem fragmentation", __FUNCTION__);
+                _glthread_UNLOCK_MUTEX(mesaShared->Mutex);
+                return BAD_ADDRESS;
+            }
+        }
+    }
+
+    /* never get here, but play it safe */
+    _glthread_UNLOCK_MUTEX(mesaShared->Mutex);
+    return BAD_ADDRESS;
+}
+
+
+/*
+ * Remove the given tdfxMemRange node from hardware texture memory.
+ */
+static void
+RemoveRange_NoLock(tdfxContextPtr fxMesa, FxU32 tmu, tdfxMemRange *range)
+{
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+    tdfxMemRange *block, *prev;
+
+    if (shared->umaTexMemory) {
+       assert(tmu == TDFX_TMU0);
+    }
+
+    if (!range)
+        return;
+
+    if (range->startAddr == range->endAddr) {
+        DELETE_RANGE_NODE(shared, range);
+        return;
+    }
+    shared->freeTexMem[tmu] += range->endAddr - range->startAddr;
+
+    /* find position in linked list to insert this tdfxMemRange node */
+    prev = NULL;
+    block = shared->tmFree[tmu];
+    while (block) {
+        assert(range->startAddr != block->startAddr);
+        if (range->startAddr > block->startAddr) {
+            prev = block;
+            block = block->next;
+        }
+        else {
+            break;
+        }
+    }
+
+    /* Insert the free block, combine with adjacent blocks when possible */
+    range->next = block;
+    if (block) {
+        if (range->endAddr == block->startAddr) {
+            /* Combine */
+            block->startAddr = range->startAddr;
+            DELETE_RANGE_NODE(shared, range);
+            range = block;
+        }
+    }
+    if (prev) {
+        if (prev->endAddr == range->startAddr) {
+            /* Combine */
+            prev->endAddr = range->endAddr;
+            prev->next = range->next;
+            DELETE_RANGE_NODE(shared, range);
+        }
+        else {
+            prev->next = range;
+        }
+    }
+    else {
+        shared->tmFree[tmu] = range;
+    }
+}
+
+
+#if 0 /* NOT USED */
+static void
+RemoveRange(tdfxContextPtr fxMesa, FxU32 tmu, tdfxMemRange *range)
+{
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    _glthread_LOCK_MUTEX(mesaShared->Mutex);
+    RemoveRange_NoLock(fxMesa, tmu, range);
+    _glthread_UNLOCK_MUTEX(mesaShared->Mutex);
+}
+#endif
+
+
+/*
+ * Allocate space for a texture image.
+ * <tmu> is the texture unit
+ * <texmemsize> is the number of bytes to allocate
+ */
+static tdfxMemRange *
+AllocTexMem(tdfxContextPtr fxMesa, FxU32 tmu, FxU32 texmemsize)
+{
+    FxU32 startAddr;
+    startAddr = FindStartAddr(fxMesa, tmu, texmemsize);
+    if (startAddr == BAD_ADDRESS) {
+        _mesa_problem(fxMesa->glCtx, "%s returned NULL!  tmu=%d texmemsize=%d",
+               __FUNCTION__, (int) tmu, (int) texmemsize);
+        return NULL;
+    }
+    else {
+        tdfxMemRange *range;
+        range = NewRangeNode(fxMesa, startAddr, startAddr + texmemsize);
+        return range;
+    }
+}
+
+
+/*
+ * Download (copy) the given texture data (all mipmap levels) into the
+ * Voodoo's texture memory.
+ * The texture memory must have already been allocated.
+ */
+void
+tdfxTMDownloadTexture(tdfxContextPtr fxMesa, struct gl_texture_object *tObj)
+{
+    tdfxTexInfo *ti;
+    GLint l;
+    FxU32 targetTMU;
+
+    assert(tObj);
+    ti = TDFX_TEXTURE_DATA(tObj);
+    assert(ti);
+    targetTMU = ti->whichTMU;
+
+    switch (targetTMU) {
+    case TDFX_TMU0:
+    case TDFX_TMU1:
+        if (ti->tm[targetTMU]) {
+            for (l = ti->minLevel; l <= ti->maxLevel
+                    && tObj->Image[0][l]->Data; l++) {
+                GrLOD_t glideLod = ti->info.largeLodLog2 - l + tObj->BaseLevel;
+                fxMesa->Glide.grTexDownloadMipMapLevel(targetTMU,
+                                                  ti->tm[targetTMU]->startAddr,
+                                                  glideLod,
+                                                  ti->info.largeLodLog2,
+                                                  ti->info.aspectRatioLog2,
+                                                  ti->info.format,
+                                                  GR_MIPMAPLEVELMASK_BOTH,
+                                                  tObj->Image[0][l]->Data);
+            }
+        }
+        break;
+    case TDFX_TMU_SPLIT:
+        if (ti->tm[TDFX_TMU0] && ti->tm[TDFX_TMU1]) {
+            for (l = ti->minLevel; l <= ti->maxLevel
+                    && tObj->Image[0][l]->Data; l++) {
+                GrLOD_t glideLod = ti->info.largeLodLog2 - l + tObj->BaseLevel;
+                fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU0,
+                                                  ti->tm[TDFX_TMU0]->startAddr,
+                                                  glideLod,
+                                                  ti->info.largeLodLog2,
+                                                  ti->info.aspectRatioLog2,
+                                                  ti->info.format,
+                                                  GR_MIPMAPLEVELMASK_ODD,
+                                                  tObj->Image[0][l]->Data);
+
+                fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU1,
+                                                  ti->tm[TDFX_TMU1]->startAddr,
+                                                  glideLod,
+                                                  ti->info.largeLodLog2,
+                                                  ti->info.aspectRatioLog2,
+                                                  ti->info.format,
+                                                  GR_MIPMAPLEVELMASK_EVEN,
+                                                  tObj->Image[0][l]->Data);
+            }
+        }
+        break;
+    case TDFX_TMU_BOTH:
+        if (ti->tm[TDFX_TMU0] && ti->tm[TDFX_TMU1]) {
+            for (l = ti->minLevel; l <= ti->maxLevel
+                    && tObj->Image[0][l]->Data; l++) {
+                GrLOD_t glideLod = ti->info.largeLodLog2 - l + tObj->BaseLevel;
+                fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU0,
+                                                  ti->tm[TDFX_TMU0]->startAddr,
+                                                  glideLod,
+                                                  ti->info.largeLodLog2,
+                                                  ti->info.aspectRatioLog2,
+                                                  ti->info.format,
+                                                  GR_MIPMAPLEVELMASK_BOTH,
+                                                  tObj->Image[0][l]->Data);
+
+                fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU1,
+                                                  ti->tm[TDFX_TMU1]->startAddr,
+                                                  glideLod,
+                                                  ti->info.largeLodLog2,
+                                                  ti->info.aspectRatioLog2,
+                                                  ti->info.format,
+                                                  GR_MIPMAPLEVELMASK_BOTH,
+                                                  tObj->Image[0][l]->Data);
+            }
+        }
+        break;
+    default:
+        _mesa_problem(NULL, "%s: bad tmu (%d)", __FUNCTION__, (int)targetTMU);
+        return;
+    }
+}
+
+
+void
+tdfxTMReloadMipMapLevel(GLcontext *ctx, struct gl_texture_object *tObj,
+                        GLint level)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+    GrLOD_t glideLod;
+    FxU32 tmu;
+
+    tmu = ti->whichTMU;
+    glideLod =  ti->info.largeLodLog2 - level + tObj->BaseLevel;
+    ASSERT(ti->isInTM);
+
+    LOCK_HARDWARE(fxMesa);
+
+    switch (tmu) {
+    case TDFX_TMU0:
+    case TDFX_TMU1:
+        fxMesa->Glide.grTexDownloadMipMapLevel(tmu,
+                                    ti->tm[tmu]->startAddr,
+                                    glideLod,
+                                    ti->info.largeLodLog2,
+                                    ti->info.aspectRatioLog2,
+                                    ti->info.format,
+                                    GR_MIPMAPLEVELMASK_BOTH,
+                                    tObj->Image[0][level]->Data);
+        break;
+    case TDFX_TMU_SPLIT:
+        fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU0,
+                                    ti->tm[GR_TMU0]->startAddr,
+                                    glideLod,
+                                    ti->info.largeLodLog2,
+                                    ti->info.aspectRatioLog2,
+                                    ti->info.format,
+                                    GR_MIPMAPLEVELMASK_ODD,
+                                    tObj->Image[0][level]->Data);
+
+        fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU1,
+                                    ti->tm[GR_TMU1]->startAddr,
+                                    glideLod,
+                                    ti->info.largeLodLog2,
+                                    ti->info.aspectRatioLog2,
+                                    ti->info.format,
+                                    GR_MIPMAPLEVELMASK_EVEN,
+                                    tObj->Image[0][level]->Data);
+        break;
+    case TDFX_TMU_BOTH:
+        fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU0,
+                                    ti->tm[GR_TMU0]->startAddr,
+                                    glideLod,
+                                    ti->info.largeLodLog2,
+                                    ti->info.aspectRatioLog2,
+                                    ti->info.format,
+                                    GR_MIPMAPLEVELMASK_BOTH,
+                                    tObj->Image[0][level]->Data);
+
+        fxMesa->Glide.grTexDownloadMipMapLevel(GR_TMU1,
+                                    ti->tm[GR_TMU1]->startAddr,
+                                    glideLod,
+                                    ti->info.largeLodLog2,
+                                    ti->info.aspectRatioLog2,
+                                    ti->info.format,
+                                    GR_MIPMAPLEVELMASK_BOTH,
+                                    tObj->Image[0][level]->Data);
+        break;
+
+    default:
+        _mesa_problem(ctx, "%s: bad tmu (%d)", __FUNCTION__, (int)tmu);
+        break;
+    }
+    UNLOCK_HARDWARE(fxMesa);
+}
+
+
+/*
+ * Allocate space for the given texture in texture memory then
+ * download (copy) it into that space.
+ */
+void
+tdfxTMMoveInTM_NoLock( tdfxContextPtr fxMesa, struct gl_texture_object *tObj,
+                       FxU32 targetTMU )
+{
+    tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+    FxU32 texmemsize;
+
+    fxMesa->stats.reqTexUpload++;
+
+    if (ti->isInTM) {
+        if (ti->whichTMU == targetTMU)
+            return;
+        if (targetTMU == TDFX_TMU_SPLIT || ti->whichTMU == TDFX_TMU_SPLIT) {
+            tdfxTMMoveOutTM_NoLock(fxMesa, tObj);
+        }
+        else {
+            if (ti->whichTMU == TDFX_TMU_BOTH)
+                return;
+            targetTMU = TDFX_TMU_BOTH;
+        }
+    }
+
+    ti->whichTMU = targetTMU;
+
+    switch (targetTMU) {
+    case TDFX_TMU0:
+    case TDFX_TMU1:
+        texmemsize = fxMesa->Glide.grTexTextureMemRequired(GR_MIPMAPLEVELMASK_BOTH,
+                                                       &(ti->info));
+        ti->tm[targetTMU] = AllocTexMem(fxMesa, targetTMU, texmemsize);
+        break;
+    case TDFX_TMU_SPLIT:
+        texmemsize = fxMesa->Glide.grTexTextureMemRequired(GR_MIPMAPLEVELMASK_ODD,
+                                                       &(ti->info));
+        ti->tm[TDFX_TMU0] = AllocTexMem(fxMesa, TDFX_TMU0, texmemsize);
+        if (ti->tm[TDFX_TMU0])
+           fxMesa->stats.memTexUpload += texmemsize;
+
+        texmemsize = fxMesa->Glide.grTexTextureMemRequired(GR_MIPMAPLEVELMASK_EVEN,
+                                                       &(ti->info));
+        ti->tm[TDFX_TMU1] = AllocTexMem(fxMesa, TDFX_TMU1, texmemsize);
+        break;
+    case TDFX_TMU_BOTH:
+        texmemsize = fxMesa->Glide.grTexTextureMemRequired(GR_MIPMAPLEVELMASK_BOTH,
+                                                       &(ti->info));
+        ti->tm[TDFX_TMU0] = AllocTexMem(fxMesa, TDFX_TMU0, texmemsize);
+        if (ti->tm[TDFX_TMU0])
+           fxMesa->stats.memTexUpload += texmemsize;
+
+        /*texmemsize = fxMesa->Glide.grTexTextureMemRequired(GR_MIPMAPLEVELMASK_BOTH,
+                                                       &(ti->info));*/
+        ti->tm[TDFX_TMU1] = AllocTexMem(fxMesa, TDFX_TMU1, texmemsize);
+        break;
+    default:
+        _mesa_problem(NULL, "%s: bad tmu (%d)", __FUNCTION__, (int)targetTMU);
+        return;
+    }
+
+    ti->reloadImages = GL_TRUE;
+    ti->isInTM = GL_TRUE;
+
+    fxMesa->stats.texUpload++;
+}
+
+
+/*
+ * Move the given texture out of hardware texture memory.
+ * This deallocates the texture's memory space.
+ */
+void
+tdfxTMMoveOutTM_NoLock( tdfxContextPtr fxMesa, struct gl_texture_object *tObj )
+{
+    struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+    tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+
+    if (MESA_VERBOSE & VERBOSE_DRIVER) {
+        fprintf(stderr, "fxmesa: %s(%p (%d))\n", __FUNCTION__, (void *)tObj, tObj->Name);
+    }
+
+    /*
+    VerifyFreeList(fxMesa, 0);
+    VerifyFreeList(fxMesa, 1);
+    */
+
+    if (!ti || !ti->isInTM)
+        return;
+
+    switch (ti->whichTMU) {
+    case TDFX_TMU0:
+    case TDFX_TMU1:
+        RemoveRange_NoLock(fxMesa, ti->whichTMU, ti->tm[ti->whichTMU]);
+        break;
+    case TDFX_TMU_SPLIT:
+    case TDFX_TMU_BOTH:
+        assert(!shared->umaTexMemory);
+        RemoveRange_NoLock(fxMesa, TDFX_TMU0, ti->tm[TDFX_TMU0]);
+        RemoveRange_NoLock(fxMesa, TDFX_TMU1, ti->tm[TDFX_TMU1]);
+        break;
+    default:
+        _mesa_problem(NULL, "%s: bad tmu (%d)", __FUNCTION__, (int)ti->whichTMU);
+        return;
+    }
+
+    ti->isInTM = GL_FALSE;
+    ti->tm[0] = NULL;
+    ti->tm[1] = NULL;
+    ti->whichTMU = TDFX_TMU_NONE;
+
+    /*
+    VerifyFreeList(fxMesa, 0);
+    VerifyFreeList(fxMesa, 1);
+    */
+}
+
+
+/*
+ * Called via glDeleteTexture to delete a texture object.
+ */
+void
+tdfxTMFreeTexture(tdfxContextPtr fxMesa, struct gl_texture_object *tObj)
+{
+    tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+    if (ti) {
+        tdfxTMMoveOutTM(fxMesa, tObj);
+        FREE(ti);
+        tObj->DriverData = NULL;
+    }
+    /*
+    VerifyFreeList(fxMesa, 0);
+    VerifyFreeList(fxMesa, 1);
+    */
+}
+
+
+
+/*
+ * After a context switch this function will be called to restore
+ * texture memory for the new context.
+ */
+void tdfxTMRestoreTextures_NoLock( tdfxContextPtr fxMesa )
+{
+   GLcontext *ctx = fxMesa->glCtx;
+   struct _mesa_HashTable *textures = fxMesa->glCtx->Shared->TexObjects;
+   GLuint id;
+
+   for (id = _mesa_HashFirstEntry(textures);
+        id;
+        id = _mesa_HashNextEntry(textures, id)) {
+      struct gl_texture_object *tObj
+         = _mesa_lookup_texture(fxMesa->glCtx, id);
+      tdfxTexInfo *ti = TDFX_TEXTURE_DATA( tObj );
+      if ( ti && ti->isInTM ) {
+         int i;
+	 for ( i = 0 ; i < MAX_TEXTURE_UNITS ; i++ ) {
+	    if ( ctx->Texture.Unit[i]._Current == tObj ) {
+	       tdfxTMDownloadTexture( fxMesa, tObj );
+	       break;
+	    }
+	 }
+	 if ( i == MAX_TEXTURE_UNITS ) {
+	    tdfxTMMoveOutTM_NoLock( fxMesa, tObj );
+	 }
+      }
+   }
+   /*
+   VerifyFreeList(fxMesa, 0);
+   VerifyFreeList(fxMesa, 1);
+   */
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_texman.h b/src/mesa/drivers/dri/tdfx/tdfx_texman.h
new file mode 100644
index 0000000000..a9af4cb7c5
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_texman.h
@@ -0,0 +1,83 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_TEXMAN_H__
+#define __TDFX_TEXMAN_H__
+
+
+#include "tdfx_lock.h"
+
+
+extern void tdfxTMInit( tdfxContextPtr fxMesa );
+
+extern void tdfxTMClose( tdfxContextPtr fxMesa );
+
+extern void tdfxTMDownloadTexture(tdfxContextPtr fxMesa,
+                                  struct gl_texture_object *tObj);
+
+extern void tdfxTMReloadMipMapLevel( GLcontext *ctx,
+				     struct gl_texture_object *tObj,
+				     GLint level );
+
+extern void tdfxTMMoveInTM_NoLock( tdfxContextPtr fxMesa,
+                                   struct gl_texture_object *tObj,
+                                   FxU32 targetTMU );
+
+extern void tdfxTMMoveOutTM_NoLock( tdfxContextPtr fxMesa,
+                                    struct gl_texture_object *tObj );
+
+extern void tdfxTMFreeTexture( tdfxContextPtr fxMesa,
+			       struct gl_texture_object *tObj );
+
+extern void tdfxTMRestoreTextures_NoLock( tdfxContextPtr fxMesa );
+
+
+#define tdfxTMMoveInTM( fxMesa, tObj, targetTMU )		\
+   do {								\
+      LOCK_HARDWARE( fxMesa );					\
+      tdfxTMMoveInTM_NoLock( fxMesa, tObj, targetTMU );		\
+      UNLOCK_HARDWARE( fxMesa );				\
+   } while (0)
+
+#define tdfxTMMoveOutTM( fxMesa, tObj )				\
+   do {								\
+      LOCK_HARDWARE( fxMesa );					\
+      tdfxTMMoveOutTM_NoLock( fxMesa, tObj );			\
+      UNLOCK_HARDWARE( fxMesa );				\
+   } while (0)
+
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_texstate.c b/src/mesa/drivers/dri/tdfx/tdfx_texstate.c
new file mode 100644
index 0000000000..b04f48c7a7
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_texstate.c
@@ -0,0 +1,2218 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * New fixes:
+ *	Daniel Borca <dborca@users.sourceforge.net>, 19 Jul 2004
+ *
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#include "tdfx_state.h"
+#include "tdfx_texman.h"
+#include "tdfx_texstate.h"
+
+
+/* =============================================================
+ * Texture
+ */
+
+/*
+ * These macros are used below when handling COMBINE_EXT.
+ */
+#define TEXENV_OPERAND_INVERTED(operand)                            \
+  (((operand) == GL_ONE_MINUS_SRC_ALPHA)                            \
+   || ((operand) == GL_ONE_MINUS_SRC_COLOR))
+#define TEXENV_OPERAND_ALPHA(operand)                               \
+  (((operand) == GL_SRC_ALPHA) || ((operand) == GL_ONE_MINUS_SRC_ALPHA))
+#define TEXENV_SETUP_ARG_A(param, source, operand, iteratedAlpha)   \
+    switch (source) {                                               \
+    case GL_TEXTURE:                                                \
+        param = GR_CMBX_LOCAL_TEXTURE_ALPHA;                        \
+        break;                                                      \
+    case GL_CONSTANT_EXT:                                           \
+        param = GR_CMBX_TMU_CALPHA;                                 \
+        break;                                                      \
+    case GL_PRIMARY_COLOR_EXT:                                      \
+        param = GR_CMBX_ITALPHA;                                    \
+        break;                                                      \
+    case GL_PREVIOUS_EXT:                                           \
+        param = iteratedAlpha;                                      \
+        break;                                                      \
+    default:                                                        \
+       /*                                                           \
+        * This is here just to keep from getting                    \
+        * compiler warnings.                                        \
+        */                                                          \
+        param = GR_CMBX_ZERO;                                       \
+        break;                                                      \
+    }
+
+#define TEXENV_SETUP_ARG_RGB(param, source, operand, iteratedColor, iteratedAlpha) \
+    if (!TEXENV_OPERAND_ALPHA(operand)) {                           \
+        switch (source) {                                           \
+        case GL_TEXTURE:                                            \
+            param = GR_CMBX_LOCAL_TEXTURE_RGB;                      \
+            break;                                                  \
+        case GL_CONSTANT_EXT:                                       \
+            param = GR_CMBX_TMU_CCOLOR;                             \
+            break;                                                  \
+        case GL_PRIMARY_COLOR_EXT:                                  \
+            param = GR_CMBX_ITRGB;                                  \
+            break;                                                  \
+        case GL_PREVIOUS_EXT:                                       \
+            param = iteratedColor;                                  \
+            break;                                                  \
+        default:                                                    \
+           /*                                                       \
+            * This is here just to keep from getting                \
+            * compiler warnings.                                    \
+            */                                                      \
+            param = GR_CMBX_ZERO;                                   \
+            break;                                                  \
+        }                                                           \
+    } else {                                                        \
+        switch (source) {                                           \
+        case GL_TEXTURE:                                            \
+            param = GR_CMBX_LOCAL_TEXTURE_ALPHA;                    \
+            break;                                                  \
+        case GL_CONSTANT_EXT:                                       \
+            param = GR_CMBX_TMU_CALPHA;                             \
+            break;                                                  \
+        case GL_PRIMARY_COLOR_EXT:                                  \
+            param = GR_CMBX_ITALPHA;                                \
+            break;                                                  \
+        case GL_PREVIOUS_EXT:                                       \
+            param = iteratedAlpha;                                  \
+            break;                                                  \
+        default:                                                    \
+           /*                                                       \
+            * This is here just to keep from getting                \
+            * compiler warnings.                                    \
+            */                                                      \
+            param = GR_CMBX_ZERO;                                   \
+            break;                                                  \
+        }                                                           \
+    }
+
+#define TEXENV_SETUP_MODE_RGB(param, operand)                       \
+    switch (operand) {                                              \
+    case GL_SRC_COLOR:                                              \
+    case GL_SRC_ALPHA:                                              \
+        param = GR_FUNC_MODE_X;                                     \
+        break;                                                      \
+    case GL_ONE_MINUS_SRC_ALPHA:                                    \
+    case GL_ONE_MINUS_SRC_COLOR:                                    \
+        param = GR_FUNC_MODE_ONE_MINUS_X;                           \
+        break;                                                      \
+    default:                                                        \
+        param = GR_FUNC_MODE_ZERO;                                  \
+        break;                                                      \
+    }
+
+#define TEXENV_SETUP_MODE_A(param, operand)                         \
+    switch (operand) {                                              \
+    case GL_SRC_ALPHA:                                              \
+        param = GR_FUNC_MODE_X;                                     \
+        break;                                                      \
+    case GL_ONE_MINUS_SRC_ALPHA:                                    \
+        param = GR_FUNC_MODE_ONE_MINUS_X;                           \
+        break;                                                      \
+    default:                                                        \
+        param = GR_FUNC_MODE_ZERO;                                  \
+        break;                                                      \
+    }
+
+
+
+/*
+ * Setup a texture environment on Voodoo5.
+ * Return GL_TRUE for success, GL_FALSE for failure.
+ * If we fail, we'll have to use software rendering.
+ */
+static GLboolean
+SetupTexEnvNapalm(GLcontext *ctx, GLboolean useIteratedRGBA,
+                  const struct gl_texture_unit *texUnit, GLenum baseFormat,
+                  struct tdfx_texcombine_ext *env)
+{
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+    GrTCCUColor_t incomingRGB, incomingAlpha;
+    const GLenum envMode = texUnit->EnvMode;
+
+    if (useIteratedRGBA) {
+        incomingRGB = GR_CMBX_ITRGB;
+        incomingAlpha = GR_CMBX_ITALPHA;
+    }
+    else {
+        incomingRGB = GR_CMBX_OTHER_TEXTURE_RGB;
+        incomingAlpha = GR_CMBX_OTHER_TEXTURE_ALPHA;
+    }
+
+    /* invariant: */
+    env->Color.Shift = 0;
+    env->Color.Invert = FXFALSE;
+    env->Alpha.Shift = 0;
+    env->Alpha.Invert = FXFALSE;
+
+    switch (envMode) {
+    case GL_REPLACE:
+        /* -- Setup RGB combiner */
+        if (baseFormat == GL_ALPHA) {
+            /* Rv = Rf */
+            env->Color.SourceA = incomingRGB;
+        }
+        else {
+            /* Rv = Rt */
+            env->Color.SourceA = GR_CMBX_LOCAL_TEXTURE_RGB;
+        }
+        env->Color.ModeA = GR_FUNC_MODE_X;
+        env->Color.SourceB = GR_CMBX_ZERO;
+        env->Color.ModeB = GR_FUNC_MODE_ZERO;
+        env->Color.SourceC = GR_CMBX_ZERO;
+        env->Color.InvertC = FXTRUE;
+        env->Color.SourceD = GR_CMBX_ZERO;
+        env->Color.InvertD = FXFALSE;
+        /* -- Setup Alpha combiner */
+        if (baseFormat == GL_LUMINANCE || baseFormat == GL_RGB) {
+            /* Av = Af */
+           env->Alpha.SourceD = incomingAlpha;
+        }
+        else {
+            /* Av = At */
+           env->Alpha.SourceD = GR_CMBX_LOCAL_TEXTURE_ALPHA;
+        }
+        env->Alpha.SourceA = GR_CMBX_ITALPHA;
+        env->Alpha.ModeA = GR_FUNC_MODE_ZERO;
+        env->Alpha.SourceB = GR_CMBX_ITALPHA;
+        env->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+        env->Alpha.SourceC = GR_CMBX_ZERO;
+        env->Alpha.InvertC = FXFALSE;
+        env->Alpha.InvertD = FXFALSE;
+        break;
+
+    case GL_MODULATE:
+        /* -- Setup RGB combiner */
+        if (baseFormat == GL_ALPHA) {
+            /* Rv = Rf */
+           env->Color.SourceC = GR_CMBX_ZERO;
+           env->Color.InvertC = FXTRUE;
+        }
+        else {
+            /* Result = Frag * Tex */
+           env->Color.SourceC = GR_CMBX_LOCAL_TEXTURE_RGB;
+           env->Color.InvertC = FXFALSE;
+        }
+        env->Color.SourceA = incomingRGB;
+        env->Color.ModeA = GR_FUNC_MODE_X;
+        env->Color.SourceB = GR_CMBX_ZERO;
+        env->Color.ModeB = GR_FUNC_MODE_ZERO;
+        env->Color.SourceD = GR_CMBX_ZERO;
+        env->Color.InvertD = FXFALSE;
+        /* -- Setup Alpha combiner */
+        if (baseFormat == GL_LUMINANCE || baseFormat == GL_RGB) {
+            /* Av = Af */
+           env->Alpha.SourceA = incomingAlpha;
+           env->Alpha.SourceC = GR_CMBX_ZERO;
+           env->Alpha.InvertC = FXTRUE;
+        }
+        else {
+            /* Av = Af * At */
+           env->Alpha.SourceA = GR_CMBX_LOCAL_TEXTURE_ALPHA;
+           env->Alpha.SourceC = incomingAlpha;
+           env->Alpha.InvertC = FXFALSE;
+        }
+        env->Alpha.ModeA = GR_FUNC_MODE_X;
+        env->Alpha.SourceB = GR_CMBX_ITALPHA;
+        env->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+        env->Alpha.SourceD = GR_CMBX_ZERO;
+        env->Alpha.InvertD = FXFALSE;
+        break;
+
+    case GL_DECAL:
+        /* -- Setup RGB combiner */
+        if (baseFormat == GL_RGB) {
+            /* Rv = Rt */
+           env->Color.SourceB = GR_CMBX_ZERO;
+           env->Color.ModeB = GR_FUNC_MODE_X;
+           env->Color.SourceC = GR_CMBX_ZERO;
+           env->Color.InvertC = FXTRUE;
+           env->Color.SourceD = GR_CMBX_ZERO;
+           env->Color.InvertD = FXFALSE;
+        }
+        else {
+            /* Rv = Rf * (1 - At) + Rt * At */
+           env->Color.SourceB = incomingRGB;
+           env->Color.ModeB = GR_FUNC_MODE_NEGATIVE_X;
+           env->Color.SourceC = GR_CMBX_LOCAL_TEXTURE_ALPHA;
+           env->Color.InvertC = FXFALSE;
+           env->Color.SourceD = GR_CMBX_B;
+           env->Color.InvertD = FXFALSE;
+        }
+        env->Color.SourceA = GR_CMBX_LOCAL_TEXTURE_RGB;
+        env->Color.ModeA = GR_FUNC_MODE_X;
+        /* -- Setup Alpha combiner */
+        /* Av = Af */
+        env->Alpha.SourceA = incomingAlpha;
+        env->Alpha.ModeA = GR_FUNC_MODE_X;
+        env->Alpha.SourceB = GR_CMBX_ITALPHA;
+        env->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+        env->Alpha.SourceC = GR_CMBX_ZERO;
+        env->Alpha.InvertC = FXTRUE;
+        env->Alpha.SourceD = GR_CMBX_ZERO;
+        env->Alpha.InvertD = FXFALSE;
+        break;
+
+    case GL_BLEND:
+        /* -- Setup RGB combiner */
+        if (baseFormat == GL_ALPHA) {
+            /* Rv = Rf */
+            env->Color.SourceA = incomingRGB;
+            env->Color.ModeA = GR_FUNC_MODE_X;
+            env->Color.SourceB = GR_CMBX_ZERO;
+            env->Color.ModeB = GR_FUNC_MODE_ZERO;
+            env->Color.SourceC = GR_CMBX_ZERO;
+            env->Color.InvertC = FXTRUE;
+            env->Color.SourceD = GR_CMBX_ZERO;
+            env->Color.InvertD = FXFALSE;
+        }
+        else {
+            /* Rv = Rf * (1 - Rt) + Rc * Rt */
+            env->Color.SourceA = GR_CMBX_TMU_CCOLOR;
+            env->Color.ModeA = GR_FUNC_MODE_X;
+            env->Color.SourceB = incomingRGB;
+            env->Color.ModeB = GR_FUNC_MODE_NEGATIVE_X;
+            env->Color.SourceC = GR_CMBX_LOCAL_TEXTURE_RGB;
+            env->Color.InvertC = FXFALSE;
+            env->Color.SourceD = GR_CMBX_B;
+            env->Color.InvertD = FXFALSE;
+        }
+        /* -- Setup Alpha combiner */
+        if (baseFormat == GL_LUMINANCE || baseFormat == GL_RGB) {
+            /* Av = Af */
+            env->Alpha.SourceA = incomingAlpha;
+            env->Alpha.ModeA = GR_FUNC_MODE_X;
+            env->Alpha.SourceB = GR_CMBX_ZERO;
+            env->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+            env->Alpha.SourceC = GR_CMBX_ZERO;
+            env->Alpha.InvertC = FXTRUE;
+            env->Alpha.SourceD = GR_CMBX_ZERO;
+            env->Alpha.InvertD = FXFALSE;
+        }
+        else if (baseFormat == GL_INTENSITY) {
+            /* Av = Af * (1 - It) + Ac * It */
+            env->Alpha.SourceA = GR_CMBX_TMU_CALPHA;
+            env->Alpha.ModeA = GR_FUNC_MODE_X;
+            env->Alpha.SourceB = incomingAlpha;
+            env->Alpha.ModeB = GR_FUNC_MODE_NEGATIVE_X;
+            env->Alpha.SourceC = GR_CMBX_LOCAL_TEXTURE_ALPHA;
+            env->Alpha.InvertC = FXFALSE;
+            env->Alpha.SourceD = GR_CMBX_B;
+            env->Alpha.InvertD = FXFALSE;
+        }
+        else {
+            /* Av = Af * At */
+            env->Alpha.SourceA = GR_CMBX_LOCAL_TEXTURE_ALPHA;
+            env->Alpha.ModeA = GR_FUNC_MODE_X;
+            env->Alpha.SourceB = GR_CMBX_ITALPHA;
+            env->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+            env->Alpha.SourceC = incomingAlpha;
+            env->Alpha.InvertC = FXFALSE;
+            env->Alpha.SourceD = GR_CMBX_ZERO;
+            env->Alpha.InvertD = FXFALSE;
+        }
+        /* Also have to set up the tex env constant color */
+        env->EnvColor = PACK_RGBA32(texUnit->EnvColor[0] * 255.0F,
+                                    texUnit->EnvColor[1] * 255.0F,
+                                    texUnit->EnvColor[2] * 255.0F,
+                                    texUnit->EnvColor[3] * 255.0F);
+        break;
+    case GL_ADD:
+        /* -- Setup RGB combiner */
+        if (baseFormat == GL_ALPHA) {
+            /* Rv = Rf */
+           env->Color.SourceB = GR_CMBX_ZERO;
+           env->Color.ModeB = GR_FUNC_MODE_ZERO;
+        }
+        else {
+            /* Rv = Rf + Tt */
+           env->Color.SourceB = GR_CMBX_LOCAL_TEXTURE_RGB;
+           env->Color.ModeB = GR_FUNC_MODE_X;
+        }
+        env->Color.SourceA = incomingRGB;
+        env->Color.ModeA = GR_FUNC_MODE_X;
+        env->Color.SourceC = GR_CMBX_ZERO;
+        env->Color.InvertC = FXTRUE;
+        env->Color.SourceD = GR_CMBX_ZERO;
+        env->Color.InvertD = FXFALSE;
+        /* -- Setup Alpha combiner */
+        if (baseFormat == GL_LUMINANCE || baseFormat == GL_RGB) {
+            /* Av = Af */
+           env->Alpha.SourceA = incomingAlpha;
+           env->Alpha.SourceB = GR_CMBX_ITALPHA;
+           env->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+           env->Alpha.SourceC = GR_CMBX_ZERO;
+           env->Alpha.InvertC = FXTRUE;
+
+        }
+        else if (baseFormat == GL_INTENSITY) {
+            /* Av = Af + It */
+           env->Alpha.SourceA = incomingAlpha;
+           env->Alpha.SourceB = GR_CMBX_LOCAL_TEXTURE_ALPHA;
+           env->Alpha.ModeB = GR_FUNC_MODE_X;
+           env->Alpha.SourceC = GR_CMBX_ZERO;
+           env->Alpha.InvertC = FXTRUE;
+        }
+        else {
+            /* Av = Af * At */
+           env->Alpha.SourceA = GR_CMBX_LOCAL_TEXTURE_ALPHA;
+           env->Alpha.SourceB = GR_CMBX_ITALPHA;
+           env->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+           env->Alpha.SourceC = incomingAlpha;
+           env->Alpha.InvertC = FXFALSE;
+        }
+        env->Alpha.ModeA = GR_FUNC_MODE_X;
+        env->Alpha.SourceD = GR_CMBX_ZERO;
+        env->Alpha.InvertD = FXFALSE;
+        break;
+
+    case GL_COMBINE_EXT:
+        {
+            FxU32 A_RGB, B_RGB, C_RGB, D_RGB;
+            FxU32 Amode_RGB, Bmode_RGB;
+            FxBool Cinv_RGB, Dinv_RGB, Ginv_RGB;
+            FxU32 Shift_RGB;
+            FxU32 A_A, B_A, C_A, D_A;
+            FxU32 Amode_A, Bmode_A;
+            FxBool Cinv_A, Dinv_A, Ginv_A;
+            FxU32 Shift_A;
+
+           /*
+            *
+            * In the formulas below, we write:
+            *  o "1(x)" for the identity function applied to x,
+            *    so 1(x) = x.
+            *  o "0(x)" for the constant function 0, so
+            *    0(x) = 0 for all values of x.
+            *
+            * Calculate the color combination.
+            */
+            Shift_RGB = texUnit->Combine.ScaleShiftRGB;
+            Shift_A = texUnit->Combine.ScaleShiftA;
+            switch (texUnit->Combine.ModeRGB) {
+            case GL_REPLACE:
+               /*
+                * The formula is: Arg0
+                * We implement this by the formula:
+                *   (Arg0 + 0(0))*(1-0) + 0
+                */
+                TEXENV_SETUP_ARG_RGB(A_RGB,
+                                     texUnit->Combine.SourceRGB[0],
+                                     texUnit->Combine.OperandRGB[0],
+                                     incomingRGB, incomingAlpha);
+                TEXENV_SETUP_MODE_RGB(Amode_RGB,
+                                      texUnit->Combine.OperandRGB[0]);
+                B_RGB = C_RGB = D_RGB = GR_CMBX_ZERO;
+                Bmode_RGB = GR_FUNC_MODE_ZERO;
+                Cinv_RGB = FXTRUE;
+                Dinv_RGB = Ginv_RGB = FXFALSE;
+                break;
+            case GL_MODULATE:
+               /*
+                * The formula is: Arg0 * Arg1
+                *
+                * We implement this by the formula
+                *   (Arg0 + 0(0)) * Arg1 + 0(0)
+                */
+                TEXENV_SETUP_ARG_RGB(A_RGB,
+                                     texUnit->Combine.SourceRGB[0],
+                                     texUnit->Combine.OperandRGB[0],
+                                     incomingRGB, incomingAlpha);
+                TEXENV_SETUP_MODE_RGB(Amode_RGB,
+                                      texUnit->Combine.OperandRGB[0]);
+                B_RGB = GR_CMBX_ZERO;
+                Bmode_RGB = GR_CMBX_ZERO;
+                TEXENV_SETUP_ARG_RGB(C_RGB,
+                                     texUnit->Combine.SourceRGB[1],
+                                     texUnit->Combine.OperandRGB[1],
+                                     incomingRGB, incomingAlpha);
+                Cinv_RGB = TEXENV_OPERAND_INVERTED
+                               (texUnit->Combine.OperandRGB[1]);
+                D_RGB = GR_CMBX_ZERO;
+                Dinv_RGB = Ginv_RGB = FXFALSE;
+                break;
+            case GL_ADD:
+               /*
+                * The formula is Arg0 + Arg1
+                */
+                TEXENV_SETUP_ARG_RGB(A_RGB,
+                                     texUnit->Combine.SourceRGB[0],
+                                     texUnit->Combine.OperandRGB[0],
+                                     incomingRGB, incomingAlpha);
+                TEXENV_SETUP_MODE_RGB(Amode_RGB,
+                                      texUnit->Combine.OperandRGB[0]);
+                TEXENV_SETUP_ARG_RGB(B_RGB,
+                                     texUnit->Combine.SourceRGB[1],
+                                     texUnit->Combine.OperandRGB[1],
+                                     incomingRGB, incomingAlpha);
+                TEXENV_SETUP_MODE_RGB(Bmode_RGB,
+                                      texUnit->Combine.OperandRGB[1]);
+                C_RGB = D_RGB = GR_CMBX_ZERO;
+                Cinv_RGB = FXTRUE;
+                Dinv_RGB = Ginv_RGB = FXFALSE;
+                break;
+            case GL_ADD_SIGNED_EXT:
+               /*
+                * The formula is: Arg0 + Arg1 - 0.5.
+                * We compute this by calculating:
+                *      (Arg0 - 1/2) + Arg1         if op0 is SRC_{COLOR,ALPHA}
+                *      Arg0 + (Arg1 - 1/2)         if op1 is SRC_{COLOR,ALPHA}
+                * If both op0 and op1 are ONE_MINUS_SRC_{COLOR,ALPHA}
+                * we cannot implement the formula properly.
+                */
+                TEXENV_SETUP_ARG_RGB(A_RGB,
+                                     texUnit->Combine.SourceRGB[0],
+                                     texUnit->Combine.OperandRGB[0],
+                                     incomingRGB, incomingAlpha);
+                TEXENV_SETUP_ARG_RGB(B_RGB,
+                                     texUnit->Combine.SourceRGB[1],
+                                     texUnit->Combine.OperandRGB[1],
+                                     incomingRGB, incomingAlpha);
+                if (!TEXENV_OPERAND_INVERTED(texUnit->Combine.OperandRGB[0])) {
+                   /*
+                    * A is not inverted.  So, choose it.
+                    */
+                    Amode_RGB = GR_FUNC_MODE_X_MINUS_HALF;
+                    if (!TEXENV_OPERAND_INVERTED
+                            (texUnit->Combine.OperandRGB[1])) {
+                        Bmode_RGB = GR_FUNC_MODE_X;
+                    }
+                    else {
+                        Bmode_RGB = GR_FUNC_MODE_ONE_MINUS_X;
+                    }
+                }
+                else {
+                   /*
+                    * A is inverted, so try to subtract 1/2
+                    * from B.
+                    */
+                    Amode_RGB = GR_FUNC_MODE_ONE_MINUS_X;
+                    if (!TEXENV_OPERAND_INVERTED
+                            (texUnit->Combine.OperandRGB[1])) {
+                        Bmode_RGB = GR_FUNC_MODE_X_MINUS_HALF;
+                    }
+                    else {
+                       /*
+                        * Both are inverted.  This is the case
+                        * we cannot handle properly.  We just
+                        * choose to not add the - 1/2.
+                        */
+                        Bmode_RGB = GR_FUNC_MODE_ONE_MINUS_X;
+                        return GL_FALSE;
+                    }
+                }
+                C_RGB = D_RGB = GR_CMBX_ZERO;
+                Cinv_RGB = FXTRUE;
+                Dinv_RGB = Ginv_RGB = FXFALSE;
+                break;
+            case GL_INTERPOLATE_EXT:
+               /*
+                * The formula is: Arg0 * Arg2 + Arg1 * (1 - Arg2).
+                * We compute this by the formula:
+                *            (Arg0 - Arg1) * Arg2 + Arg1
+                *               == Arg0 * Arg2 - Arg1 * Arg2 + Arg1
+                *               == Arg0 * Arg2 + Arg1 * (1 - Arg2)
+                * However, if both Arg1 is ONE_MINUS_X, the HW does
+                * not support it properly.
+                */
+                TEXENV_SETUP_ARG_RGB(A_RGB,
+                                     texUnit->Combine.SourceRGB[0],
+                                     texUnit->Combine.OperandRGB[0],
+                                     incomingRGB, incomingAlpha);
+                TEXENV_SETUP_MODE_RGB(Amode_RGB,
+                                      texUnit->Combine.OperandRGB[0]);
+                TEXENV_SETUP_ARG_RGB(B_RGB,
+                                     texUnit->Combine.SourceRGB[1],
+                                     texUnit->Combine.OperandRGB[1],
+                                     incomingRGB, incomingAlpha);
+                if (TEXENV_OPERAND_INVERTED(texUnit->Combine.OperandRGB[1])) {
+                   /*
+                    * This case is wrong.
+                    */
+                   Bmode_RGB = GR_FUNC_MODE_NEGATIVE_X;
+                   return GL_FALSE;
+                }
+                else {
+                    Bmode_RGB = GR_FUNC_MODE_NEGATIVE_X;
+                }
+               /*
+                * The Source/Operand for the C value must
+                * specify some kind of alpha value.
+                */
+                TEXENV_SETUP_ARG_A(C_RGB,
+                                   texUnit->Combine.SourceRGB[2],
+                                   texUnit->Combine.OperandRGB[2],
+                                   incomingAlpha);
+                Cinv_RGB = FXFALSE;
+                D_RGB = GR_CMBX_B;
+                Dinv_RGB = Ginv_RGB = FXFALSE;
+                break;
+            default:
+               /*
+                * This is here mostly to keep from getting
+                * a compiler warning about these not being set.
+                * However, this should set all the texture values
+                * to zero.
+                */
+                A_RGB = B_RGB = C_RGB = D_RGB = GR_CMBX_ZERO;
+                Amode_RGB = Bmode_RGB = GR_FUNC_MODE_X;
+                Cinv_RGB = Dinv_RGB = Ginv_RGB = FXFALSE;
+                break;
+            }
+           /*
+            * Calculate the alpha combination.
+            */
+            switch (texUnit->Combine.ModeA) {
+            case GL_REPLACE:
+               /*
+                * The formula is: Arg0
+                * We implement this by the formula:
+                *   (Arg0 + 0(0))*(1-0) + 0
+                */
+                TEXENV_SETUP_ARG_A(A_A,
+                                   texUnit->Combine.SourceA[0],
+                                   texUnit->Combine.OperandA[0],
+                                   incomingAlpha);
+                TEXENV_SETUP_MODE_A(Amode_A,
+                                    texUnit->Combine.OperandA[0]);
+                B_A = GR_CMBX_ITALPHA;
+                Bmode_A = GR_FUNC_MODE_ZERO;
+                C_A = D_A = GR_CMBX_ZERO;
+                Cinv_A = FXTRUE;
+                Dinv_A = Ginv_A = FXFALSE;
+                break;
+            case GL_MODULATE:
+               /*
+                * The formula is: Arg0 * Arg1
+                *
+                * We implement this by the formula
+                *   (Arg0 + 0(0)) * Arg1 + 0(0)
+                */
+                TEXENV_SETUP_ARG_A(A_A,
+                                   texUnit->Combine.SourceA[0],
+                                   texUnit->Combine.OperandA[0],
+                                   incomingAlpha);
+                TEXENV_SETUP_MODE_A(Amode_A,
+                                    texUnit->Combine.OperandA[0]);
+                B_A = GR_CMBX_ZERO;
+                Bmode_A = GR_CMBX_ZERO;
+                TEXENV_SETUP_ARG_A(C_A,
+                                   texUnit->Combine.SourceA[1],
+                                   texUnit->Combine.OperandA[1],
+                                   incomingAlpha);
+                Cinv_A = TEXENV_OPERAND_INVERTED
+                               (texUnit->Combine.OperandA[1]);
+                D_A = GR_CMBX_ZERO;
+                Dinv_A = Ginv_A = FXFALSE;
+                break;
+            case GL_ADD:
+               /*
+                * The formula is Arg0 + Arg1
+                */
+                TEXENV_SETUP_ARG_A(A_A,
+                                   texUnit->Combine.SourceA[0],
+                                   texUnit->Combine.OperandA[0],
+                                   incomingAlpha);
+                TEXENV_SETUP_MODE_A(Amode_A,
+                                    texUnit->Combine.OperandA[0]);
+                TEXENV_SETUP_ARG_A(B_A,
+                                   texUnit->Combine.SourceA[1],
+                                   texUnit->Combine.OperandA[1],
+                                   incomingAlpha);
+                TEXENV_SETUP_MODE_A(Bmode_A,
+                                    texUnit->Combine.OperandA[1]);
+                C_A = D_A = GR_CMBX_ZERO;
+                Cinv_A = FXTRUE;
+                Dinv_A = Ginv_A = FXFALSE;
+                break;
+            case GL_ADD_SIGNED_EXT:
+               /*
+                * The formula is: Arg0 + Arg1 - 0.5.
+                * We compute this by calculating:
+                *      (Arg0 - 1/2) + Arg1         if op0 is SRC_{COLOR,ALPHA}
+                *      Arg0 + (Arg1 - 1/2)         if op1 is SRC_{COLOR,ALPHA}
+                * If both op0 and op1 are ONE_MINUS_SRC_{COLOR,ALPHA}
+                * we cannot implement the formula properly.
+                */
+                TEXENV_SETUP_ARG_A(A_A,
+                                   texUnit->Combine.SourceA[0],
+                                   texUnit->Combine.OperandA[0],
+                                   incomingAlpha);
+                TEXENV_SETUP_ARG_A(B_A,
+                                   texUnit->Combine.SourceA[1],
+                                   texUnit->Combine.OperandA[1],
+                                   incomingAlpha);
+                if (!TEXENV_OPERAND_INVERTED(texUnit->Combine.OperandA[0])) {
+                   /*
+                    * A is not inverted.  So, choose it.
+                    */
+                    Amode_A = GR_FUNC_MODE_X_MINUS_HALF;
+                    if (!TEXENV_OPERAND_INVERTED
+                            (texUnit->Combine.OperandA[1])) {
+                        Bmode_A = GR_FUNC_MODE_X;
+                    } else {
+                        Bmode_A = GR_FUNC_MODE_ONE_MINUS_X;
+                    }
+                } else {
+                   /*
+                    * A is inverted, so try to subtract 1/2
+                    * from B.
+                    */
+                    Amode_A = GR_FUNC_MODE_ONE_MINUS_X;
+                    if (!TEXENV_OPERAND_INVERTED
+                            (texUnit->Combine.OperandA[1])) {
+                        Bmode_A = GR_FUNC_MODE_X_MINUS_HALF;
+                    } else {
+                       /*
+                        * Both are inverted.  This is the case
+                        * we cannot handle properly.  We just
+                        * choose to not add the - 1/2.
+                        */
+                        Bmode_A = GR_FUNC_MODE_ONE_MINUS_X;
+                        return GL_FALSE;
+                    }
+                }
+                C_A = D_A = GR_CMBX_ZERO;
+                Cinv_A = FXTRUE;
+                Dinv_A = Ginv_A = FXFALSE;
+                break;
+            case GL_INTERPOLATE_EXT:
+               /*
+                * The formula is: Arg0 * Arg2 + Arg1 * (1 - Arg2).
+                * We compute this by the formula:
+                *            (Arg0 - Arg1) * Arg2 + Arg1
+                *               == Arg0 * Arg2 - Arg1 * Arg2 + Arg1
+                *               == Arg0 * Arg2 + Arg1 * (1 - Arg2)
+                * However, if both Arg1 is ONE_MINUS_X, the HW does
+                * not support it properly.
+                */
+                TEXENV_SETUP_ARG_A(A_A,
+                                   texUnit->Combine.SourceA[0],
+                                   texUnit->Combine.OperandA[0],
+                                   incomingAlpha);
+                TEXENV_SETUP_MODE_A(Amode_A,
+                                    texUnit->Combine.OperandA[0]);
+                TEXENV_SETUP_ARG_A(B_A,
+                                   texUnit->Combine.SourceA[1],
+                                   texUnit->Combine.OperandA[1],
+                                   incomingAlpha);
+                if (!TEXENV_OPERAND_INVERTED(texUnit->Combine.OperandA[1])) {
+                    Bmode_A = GR_FUNC_MODE_NEGATIVE_X;
+                }
+                else {
+                   /*
+                    * This case is wrong.
+                    */
+                    Bmode_A = GR_FUNC_MODE_NEGATIVE_X;
+                    return GL_FALSE;
+                }
+               /*
+                * The Source/Operand for the C value must
+                * specify some kind of alpha value.
+                */
+                TEXENV_SETUP_ARG_A(C_A,
+                                   texUnit->Combine.SourceA[2],
+                                   texUnit->Combine.OperandA[2],
+                                   incomingAlpha);
+                Cinv_A = FXFALSE;
+                D_A = GR_CMBX_B;
+                Dinv_A = Ginv_A = FXFALSE;
+                break;
+            default:
+               /*
+                * This is here mostly to keep from getting
+                * a compiler warning about these not being set.
+                * However, this should set all the alpha values
+                * to one.
+                */
+                A_A = B_A = C_A = D_A = GR_CMBX_ZERO;
+                Amode_A = Bmode_A = GR_FUNC_MODE_X;
+                Cinv_A = Dinv_A = FXFALSE;
+                Ginv_A = FXTRUE;
+                break;
+            }
+           /*
+            * Save the parameters.
+            */
+            env->Color.SourceA = A_RGB;
+            env->Color.ModeA = Amode_RGB;
+            env->Color.SourceB = B_RGB;
+            env->Color.ModeB = Bmode_RGB;
+            env->Color.SourceC = C_RGB;
+            env->Color.InvertC = Cinv_RGB;
+            env->Color.SourceD = D_RGB;
+            env->Color.InvertD = Dinv_RGB;
+            env->Color.Shift = Shift_RGB;
+            env->Color.Invert = Ginv_RGB;
+            env->Alpha.SourceA = A_A;
+            env->Alpha.ModeA = Amode_A;
+            env->Alpha.SourceB = B_A;
+            env->Alpha.ModeB = Bmode_A;
+            env->Alpha.SourceC = C_A;
+            env->Alpha.InvertC = Cinv_A;
+            env->Alpha.SourceD = D_A;
+            env->Alpha.InvertD = Dinv_A;
+            env->Alpha.Shift = Shift_A;
+            env->Alpha.Invert = Ginv_A;
+            env->EnvColor = PACK_RGBA32(texUnit->EnvColor[0] * 255.0F,
+                                        texUnit->EnvColor[1] * 255.0F,
+                                        texUnit->EnvColor[2] * 255.0F,
+                                        texUnit->EnvColor[3] * 255.0F);
+        }
+        break;
+
+    default:
+        _mesa_problem(ctx, "%s: Bad envMode", __FUNCTION__);
+    }
+
+    fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_ENV;
+
+    fxMesa->ColorCombineExt.SourceA = GR_CMBX_TEXTURE_RGB;
+    fxMesa->ColorCombineExt.ModeA = GR_FUNC_MODE_X,
+    fxMesa->ColorCombineExt.SourceB = GR_CMBX_ZERO;
+    fxMesa->ColorCombineExt.ModeB = GR_FUNC_MODE_X;
+    fxMesa->ColorCombineExt.SourceC = GR_CMBX_ZERO;
+    fxMesa->ColorCombineExt.InvertC = FXTRUE;
+    fxMesa->ColorCombineExt.SourceD = GR_CMBX_ZERO;
+    fxMesa->ColorCombineExt.InvertD = FXFALSE;
+    fxMesa->ColorCombineExt.Shift = 0;
+    fxMesa->ColorCombineExt.Invert = FXFALSE;
+    fxMesa->dirty |= TDFX_UPLOAD_COLOR_COMBINE;
+    fxMesa->AlphaCombineExt.SourceA = GR_CMBX_TEXTURE_ALPHA;
+    fxMesa->AlphaCombineExt.ModeA = GR_FUNC_MODE_X;
+    fxMesa->AlphaCombineExt.SourceB = GR_CMBX_ZERO;
+    fxMesa->AlphaCombineExt.ModeB = GR_FUNC_MODE_X;
+    fxMesa->AlphaCombineExt.SourceC = GR_CMBX_ZERO;
+    fxMesa->AlphaCombineExt.InvertC = FXTRUE;
+    fxMesa->AlphaCombineExt.SourceD = GR_CMBX_ZERO;
+    fxMesa->AlphaCombineExt.InvertD = FXFALSE;
+    fxMesa->AlphaCombineExt.Shift = 0;
+    fxMesa->AlphaCombineExt.Invert = FXFALSE;
+    fxMesa->dirty |= TDFX_UPLOAD_ALPHA_COMBINE;
+    return GL_TRUE; /* success */
+}
+
+
+
+/*
+ * Setup the Voodoo3 texture environment for a single texture unit.
+ * Return GL_TRUE for success, GL_FALSE for failure.
+ * If failure, we'll use software rendering.
+ */
+static GLboolean
+SetupSingleTexEnvVoodoo3(GLcontext *ctx, int unit,
+                         GLenum envMode, GLenum baseFormat)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GrCombineLocal_t localc, locala;
+   struct tdfx_combine alphaComb, colorComb;
+
+   if (1 /*iteratedRGBA*/)
+      localc = locala = GR_COMBINE_LOCAL_ITERATED;
+   else
+      localc = locala = GR_COMBINE_LOCAL_CONSTANT;
+
+   switch (envMode) {
+   case GL_DECAL:
+      alphaComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+      alphaComb.Factor = GR_COMBINE_FACTOR_NONE;
+      alphaComb.Local = locala;
+      alphaComb.Other = GR_COMBINE_OTHER_NONE;
+      alphaComb.Invert = FXFALSE;
+      colorComb.Function = GR_COMBINE_FUNCTION_BLEND;
+      colorComb.Factor = GR_COMBINE_FACTOR_TEXTURE_ALPHA;
+      colorComb.Local = localc;
+      colorComb.Other = GR_COMBINE_OTHER_TEXTURE;
+      colorComb.Invert = FXFALSE;
+      break;
+   case GL_MODULATE:
+      alphaComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      alphaComb.Factor = GR_COMBINE_FACTOR_LOCAL;
+      alphaComb.Local = locala;
+      alphaComb.Other = GR_COMBINE_OTHER_TEXTURE;
+      alphaComb.Invert = FXFALSE;
+      if (baseFormat == GL_ALPHA) {
+         colorComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+         colorComb.Factor = GR_COMBINE_FACTOR_NONE;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_NONE;
+         colorComb.Invert = FXFALSE;
+      }
+      else {
+         colorComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         colorComb.Factor = GR_COMBINE_FACTOR_LOCAL;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_TEXTURE;
+         colorComb.Invert = FXFALSE;
+      }
+      break;
+
+   case GL_BLEND:
+      /*
+       * XXX we can't do real GL_BLEND mode.  These settings assume that
+       * the TexEnv color is black and incoming fragment color is white.
+       */
+      if (baseFormat == GL_LUMINANCE || baseFormat == GL_RGB) {
+         /* Av = Af */
+         alphaComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+         alphaComb.Factor = GR_COMBINE_FACTOR_NONE;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_NONE;
+         alphaComb.Invert = FXFALSE;
+      }
+      else if (baseFormat == GL_INTENSITY) {
+         /* Av = Af * (1 - It) + Ac * It */
+         alphaComb.Function = GR_COMBINE_FUNCTION_BLEND;
+         alphaComb.Factor = GR_COMBINE_FACTOR_TEXTURE_ALPHA;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_CONSTANT;
+         alphaComb.Invert = FXFALSE;
+      }
+      else {
+         /* Av = Af * At */
+         alphaComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         alphaComb.Factor = GR_COMBINE_FACTOR_LOCAL;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_TEXTURE;
+         alphaComb.Invert = FXFALSE;
+      }
+      if (baseFormat == GL_ALPHA) {
+         colorComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+         colorComb.Factor = GR_COMBINE_FACTOR_NONE;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_NONE;
+         colorComb.Invert = FXFALSE;
+      }
+      else {
+         colorComb.Function = GR_COMBINE_FUNCTION_BLEND;
+         colorComb.Factor = GR_COMBINE_FACTOR_TEXTURE_RGB;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_CONSTANT;
+         colorComb.Invert = FXTRUE;
+      }
+      fxMesa->Color.MonoColor = PACK_RGBA32(
+         ctx->Texture.Unit[unit].EnvColor[0] * 255.0f,
+         ctx->Texture.Unit[unit].EnvColor[1] * 255.0f,
+         ctx->Texture.Unit[unit].EnvColor[2] * 255.0f,
+         ctx->Texture.Unit[unit].EnvColor[3] * 255.0f);
+      fxMesa->dirty |= TDFX_UPLOAD_CONSTANT_COLOR;
+      break;
+
+   case GL_REPLACE:
+      if ((baseFormat == GL_RGB) || (baseFormat == GL_LUMINANCE)) {
+         alphaComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+         alphaComb.Factor = GR_COMBINE_FACTOR_NONE;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_NONE;
+         alphaComb.Invert = FXFALSE;
+      }
+      else {
+         alphaComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         alphaComb.Factor = GR_COMBINE_FACTOR_ONE;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_TEXTURE;
+         alphaComb.Invert = FXFALSE;
+      }
+      if (baseFormat == GL_ALPHA) {
+         colorComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+         colorComb.Factor = GR_COMBINE_FACTOR_NONE;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_NONE;
+         colorComb.Invert = FXFALSE;
+      }
+      else {
+         colorComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         colorComb.Factor = GR_COMBINE_FACTOR_ONE;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_TEXTURE;
+         colorComb.Invert = FXFALSE;
+      }
+      break;
+
+   case GL_ADD:
+      if (baseFormat == GL_ALPHA ||
+          baseFormat == GL_LUMINANCE_ALPHA ||
+          baseFormat == GL_RGBA) {
+         /* product of texel and fragment alpha */
+         alphaComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         alphaComb.Factor = GR_COMBINE_FACTOR_LOCAL;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_TEXTURE;
+         alphaComb.Invert = FXFALSE;
+      }
+      else if (baseFormat == GL_LUMINANCE || baseFormat == GL_RGB) {
+         /* fragment alpha is unchanged */
+         alphaComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+         alphaComb.Factor = GR_COMBINE_FACTOR_NONE;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_NONE;
+         alphaComb.Invert = FXFALSE;
+      }
+      else {
+         ASSERT(baseFormat == GL_INTENSITY);
+         /* sum of texel and fragment alpha */
+         alphaComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL,
+         alphaComb.Factor = GR_COMBINE_FACTOR_ONE;
+         alphaComb.Local = locala;
+         alphaComb.Other = GR_COMBINE_OTHER_TEXTURE;
+         alphaComb.Invert = FXFALSE;
+      }
+      if (baseFormat == GL_ALPHA) {
+         /* rgb unchanged */
+         colorComb.Function = GR_COMBINE_FUNCTION_LOCAL;
+         colorComb.Factor = GR_COMBINE_FACTOR_NONE;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_NONE;
+         colorComb.Invert = FXFALSE;
+      }
+      else {
+         /* sum of texel and fragment rgb */
+         colorComb.Function = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL,
+         colorComb.Factor = GR_COMBINE_FACTOR_ONE;
+         colorComb.Local = localc;
+         colorComb.Other = GR_COMBINE_OTHER_TEXTURE;
+         colorComb.Invert = FXFALSE;
+      }
+      break;
+
+   default: {
+      (void) memcpy(&colorComb, &fxMesa->ColorCombine, sizeof(colorComb));
+      (void) memcpy(&alphaComb, &fxMesa->AlphaCombine, sizeof(alphaComb));
+      _mesa_problem(ctx, "bad texture env mode in %s", __FUNCTION__);
+   }
+   }
+
+   if (colorComb.Function != fxMesa->ColorCombine.Function ||
+       colorComb.Factor != fxMesa->ColorCombine.Factor ||
+       colorComb.Local != fxMesa->ColorCombine.Local ||
+       colorComb.Other != fxMesa->ColorCombine.Other ||
+       colorComb.Invert != fxMesa->ColorCombine.Invert) {
+      fxMesa->ColorCombine = colorComb;
+      fxMesa->dirty |= TDFX_UPLOAD_COLOR_COMBINE;
+   }
+
+   if (alphaComb.Function != fxMesa->AlphaCombine.Function ||
+       alphaComb.Factor != fxMesa->AlphaCombine.Factor ||
+       alphaComb.Local != fxMesa->AlphaCombine.Local ||
+       alphaComb.Other != fxMesa->AlphaCombine.Other ||
+       alphaComb.Invert != fxMesa->AlphaCombine.Invert) {
+      fxMesa->AlphaCombine = alphaComb;
+      fxMesa->dirty |= TDFX_UPLOAD_ALPHA_COMBINE;
+   }
+   return GL_TRUE;
+}
+
+
+/*
+ * Setup the Voodoo3 texture environment for dual texture units.
+ * Return GL_TRUE for success, GL_FALSE for failure.
+ * If failure, we'll use software rendering.
+ */
+static GLboolean
+SetupDoubleTexEnvVoodoo3(GLcontext *ctx, int tmu0,
+                         GLenum envMode0, GLenum baseFormat0,
+                         GLenum envMode1, GLenum baseFormat1)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   const GrCombineLocal_t locala = GR_COMBINE_LOCAL_ITERATED;
+   const GrCombineLocal_t localc = GR_COMBINE_LOCAL_ITERATED;
+   const int tmu1 = 1 - tmu0;
+
+   if (envMode0 == GL_MODULATE && envMode1 == GL_MODULATE) {
+      GLboolean isalpha[TDFX_NUM_TMU];
+
+      isalpha[tmu0] = (baseFormat0 == GL_ALPHA);
+      isalpha[tmu1] = (baseFormat1 == GL_ALPHA);
+
+      if (isalpha[TDFX_TMU1]) {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_ZERO;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXTRUE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+      }
+      else {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+      }
+      if (isalpha[TDFX_TMU0]) {
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+      else {
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+      fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_LOCAL;
+      fxMesa->ColorCombine.Local = localc;
+      fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->ColorCombine.Invert = FXFALSE;
+      fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_LOCAL;
+      fxMesa->AlphaCombine.Local = locala;
+      fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->AlphaCombine.Invert = FXFALSE;
+   }
+   else if (envMode0 == GL_REPLACE && envMode1 == GL_BLEND) { /* Quake */
+      if (tmu0 == TDFX_TMU1) {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXTRUE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+      else {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE_MINUS_LOCAL;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE_MINUS_LOCAL;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+      fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_ONE;
+      fxMesa->ColorCombine.Local = localc;
+      fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->ColorCombine.Invert = FXFALSE;
+      fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_LOCAL;
+      fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_NONE;
+      fxMesa->AlphaCombine.Local = locala;
+      fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_NONE;
+      fxMesa->AlphaCombine.Invert = FXFALSE;
+   }
+   else if (envMode0 == GL_REPLACE && envMode1 == GL_MODULATE) {
+      /* Quake 2/3 */
+      if (tmu1 == TDFX_TMU1) {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_ZERO;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[1].InvertAlpha = FXTRUE;
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+      else {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_LOCAL;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND_OTHER;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+
+      fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_ONE;
+      fxMesa->ColorCombine.Local = localc;
+      fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->ColorCombine.Invert = FXFALSE;
+      if (baseFormat0 == GL_RGB) {
+         fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_NONE;
+         fxMesa->AlphaCombine.Local = locala;
+         fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_NONE;
+         fxMesa->AlphaCombine.Invert = FXFALSE;
+      }
+      else {
+         fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_ONE;
+         fxMesa->AlphaCombine.Local = locala;
+         fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_NONE;
+         fxMesa->AlphaCombine.Invert = FXFALSE;
+      }
+   }
+   else if (envMode0 == GL_MODULATE && envMode1 == GL_ADD) {
+      /* Quake 3 sky */
+      GLboolean isalpha[TDFX_NUM_TMU];
+
+      isalpha[tmu0] = (baseFormat0 == GL_ALPHA);
+      isalpha[tmu1] = (baseFormat1 == GL_ALPHA);
+
+      if (isalpha[TDFX_TMU1]) {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_ZERO;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXTRUE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+      }
+      else {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+      }
+      if (isalpha[TDFX_TMU0]) {
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+      else {
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+      fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_LOCAL;
+      fxMesa->ColorCombine.Local = localc;
+      fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->ColorCombine.Invert = FXFALSE;
+      fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_LOCAL;
+      fxMesa->AlphaCombine.Local = locala;
+      fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->AlphaCombine.Invert = FXFALSE;
+   }
+   else if (envMode0 == GL_REPLACE && envMode1 == GL_ADD) {
+      /* Vulpine sky */
+      GLboolean isalpha[TDFX_NUM_TMU];
+
+      isalpha[tmu0] = (baseFormat0 == GL_ALPHA);
+      isalpha[tmu1] = (baseFormat1 == GL_ALPHA);
+
+      if (isalpha[TDFX_TMU1]) {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_ZERO;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXTRUE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+      } else {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+      }
+
+      if (isalpha[TDFX_TMU0]) {
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      } else {
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+      }
+
+      fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_ONE;
+      fxMesa->ColorCombine.Local = localc;
+      fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->ColorCombine.Invert = FXFALSE;
+      fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+      fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_ONE;
+      fxMesa->AlphaCombine.Local = locala;
+      fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+      fxMesa->AlphaCombine.Invert = FXFALSE;
+   }
+   else if (envMode1 == GL_REPLACE) {
+      /* Homeworld2 */
+
+      fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_ZERO;
+      fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+      fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_ZERO;
+      fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+      fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+      fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+
+      fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+      fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_NONE;
+      fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+      fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+      fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+      fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+
+      if ((baseFormat0 == GL_RGB) || (baseFormat0 == GL_LUMINANCE)) {
+         fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_NONE;
+         fxMesa->AlphaCombine.Local = locala;
+         fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_NONE;
+         fxMesa->AlphaCombine.Invert = FXFALSE;
+      } else {
+         fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_ONE;
+         fxMesa->AlphaCombine.Local = locala;
+         fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+         fxMesa->AlphaCombine.Invert = FXFALSE;
+      }
+      if (baseFormat0 == GL_ALPHA) {
+         fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_NONE;
+         fxMesa->ColorCombine.Local = localc;
+         fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_NONE;
+         fxMesa->ColorCombine.Invert = FXFALSE;
+      } else {
+         fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_SCALE_OTHER;
+         fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_ONE;
+         fxMesa->ColorCombine.Local = localc;
+         fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_TEXTURE;
+         fxMesa->ColorCombine.Invert = FXFALSE;
+      }
+   }
+   else {
+      _mesa_problem(ctx, "%s: Unexpected dual texture mode encountered", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_ENV;
+   fxMesa->dirty |= TDFX_UPLOAD_COLOR_COMBINE;
+   fxMesa->dirty |= TDFX_UPLOAD_ALPHA_COMBINE;
+   return GL_TRUE;
+}
+
+
+/*
+ * This function makes sure that the correct mipmap levels are loaded
+ * in the right places in memory and then makes the Glide calls to
+ * setup the texture source pointers.
+ */
+static void
+setupSingleTMU(tdfxContextPtr fxMesa, struct gl_texture_object *tObj)
+{
+   struct tdfxSharedState *shared = (struct tdfxSharedState *) fxMesa->glCtx->Shared->DriverData;
+   tdfxTexInfo *ti = TDFX_TEXTURE_DATA(tObj);
+   const GLcontext *ctx = fxMesa->glCtx;
+
+   /* Make sure we're not loaded incorrectly */
+   if (ti->isInTM && !shared->umaTexMemory) {
+      /* if doing filtering between mipmap levels, alternate mipmap levels
+       * must be in alternate TMUs.
+       */
+      if (ti->LODblend) {
+         if (ti->whichTMU != TDFX_TMU_SPLIT)
+            tdfxTMMoveOutTM_NoLock(fxMesa, tObj);
+      }
+      else {
+         if (ti->whichTMU == TDFX_TMU_SPLIT)
+            tdfxTMMoveOutTM_NoLock(fxMesa, tObj);
+      }
+   }
+
+   /* Make sure we're loaded correctly */
+   if (!ti->isInTM) {
+      /* Have to download the texture */
+      if (shared->umaTexMemory) {
+         tdfxTMMoveInTM_NoLock(fxMesa, tObj, TDFX_TMU0);
+      }
+      else {
+         /* Voodoo3 (split texture memory) */
+         if (ti->LODblend) {
+            tdfxTMMoveInTM_NoLock(fxMesa, tObj, TDFX_TMU_SPLIT);
+         }
+         else {
+#if 0
+            /* XXX putting textures into the second memory bank when the
+             * first bank is full is not working at this time.
+             */
+            if (fxMesa->haveTwoTMUs) {
+               GLint memReq = fxMesa->Glide.grTexTextureMemRequired(
+                                       GR_MIPMAPLEVELMASK_BOTH, &(ti->info));
+               if (shared->freeTexMem[TDFX_TMU0] > memReq) {
+                  tdfxTMMoveInTM_NoLock(fxMesa, tObj, TDFX_TMU0);
+               }
+               else {
+                  tdfxTMMoveInTM_NoLock(fxMesa, tObj, TDFX_TMU1);
+               }
+            }
+            else
+#endif
+            {
+               tdfxTMMoveInTM_NoLock(fxMesa, tObj, TDFX_TMU0);
+            }
+         }
+      }
+   }
+
+   if (ti->LODblend && ti->whichTMU == TDFX_TMU_SPLIT) {
+      /* mipmap levels split between texture banks */
+      GLint u;
+
+      if (ti->info.format == GR_TEXFMT_P_8 && !ctx->Texture.SharedPalette) {
+         fxMesa->TexPalette.Type = ti->paltype;
+         fxMesa->TexPalette.Data = &(ti->palette);
+         fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PALETTE;
+      }
+
+      for (u = 0; u < 2; u++) {
+         fxMesa->TexParams[u].sClamp = ti->sClamp;
+         fxMesa->TexParams[u].tClamp = ti->tClamp;
+         fxMesa->TexParams[u].minFilt = ti->minFilt;
+         fxMesa->TexParams[u].magFilt = ti->magFilt;
+         fxMesa->TexParams[u].mmMode = ti->mmMode;
+         fxMesa->TexParams[u].LODblend = ti->LODblend;
+         fxMesa->TexParams[u].LodBias = ctx->Texture.Unit[u].LodBias;
+      }
+      fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PARAMS;
+
+      fxMesa->TexSource[0].StartAddress = ti->tm[TDFX_TMU0]->startAddr;
+      fxMesa->TexSource[0].EvenOdd = GR_MIPMAPLEVELMASK_ODD;
+      fxMesa->TexSource[0].Info = &(ti->info);
+      fxMesa->TexSource[1].StartAddress = ti->tm[TDFX_TMU1]->startAddr;
+      fxMesa->TexSource[1].EvenOdd = GR_MIPMAPLEVELMASK_EVEN;
+      fxMesa->TexSource[1].Info = &(ti->info);
+      fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_SOURCE;
+   }
+   else {
+      FxU32 tmu;
+
+      if (ti->whichTMU == TDFX_TMU_BOTH)
+         tmu = TDFX_TMU0;
+      else
+         tmu = ti->whichTMU;
+
+      if (shared->umaTexMemory) {
+         assert(ti->whichTMU == TDFX_TMU0);
+         assert(tmu == TDFX_TMU0);
+      }
+
+      if (ti->info.format == GR_TEXFMT_P_8 && !ctx->Texture.SharedPalette) {
+         fxMesa->TexPalette.Type = ti->paltype;
+         fxMesa->TexPalette.Data = &(ti->palette);
+         fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PALETTE;
+      }
+
+      /* KW: The alternative is to do the download to the other tmu.  If
+       * we get to this point, I think it means we are thrashing the
+       * texture memory, so perhaps it's not a good idea.
+       */
+
+      if (fxMesa->TexParams[tmu].sClamp != ti->sClamp ||
+          fxMesa->TexParams[tmu].tClamp != ti->tClamp ||
+          fxMesa->TexParams[tmu].minFilt != ti->minFilt ||
+          fxMesa->TexParams[tmu].magFilt != ti->magFilt ||
+          fxMesa->TexParams[tmu].mmMode != ti->mmMode ||
+          fxMesa->TexParams[tmu].LODblend != FXFALSE ||
+          fxMesa->TexParams[tmu].LodBias != ctx->Texture.Unit[tmu].LodBias) {
+         fxMesa->TexParams[tmu].sClamp = ti->sClamp;
+         fxMesa->TexParams[tmu].tClamp = ti->tClamp;
+         fxMesa->TexParams[tmu].minFilt = ti->minFilt;
+         fxMesa->TexParams[tmu].magFilt = ti->magFilt;
+         fxMesa->TexParams[tmu].mmMode = ti->mmMode;
+         fxMesa->TexParams[tmu].LODblend = FXFALSE;
+         fxMesa->TexParams[tmu].LodBias = ctx->Texture.Unit[tmu].LodBias;
+         fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PARAMS;
+      }
+
+      /* Glide texture source info */
+      fxMesa->TexSource[0].Info = NULL;
+      fxMesa->TexSource[1].Info = NULL;
+      if (ti->tm[tmu]) {
+         fxMesa->TexSource[tmu].StartAddress = ti->tm[tmu]->startAddr;
+         fxMesa->TexSource[tmu].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+         fxMesa->TexSource[tmu].Info = &(ti->info);
+         fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_SOURCE;
+      }
+   }
+
+   fxMesa->sScale0 = ti->sScale;
+   fxMesa->tScale0 = ti->tScale;
+}
+
+static void
+selectSingleTMUSrc(tdfxContextPtr fxMesa, GLint tmu, FxBool LODblend)
+{
+   if (LODblend) {
+      fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND;
+      fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE_MINUS_LOD_FRACTION;
+      fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND;
+      fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE_MINUS_LOD_FRACTION;
+      fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+      fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+
+      if (fxMesa->haveTwoTMUs) {
+         const struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+         const struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+         int tmu;
+
+         if (shared->umaTexMemory)
+            tmu = GR_TMU0;
+         else
+            tmu = GR_TMU1;
+
+         fxMesa->TexCombine[tmu].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[tmu].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[tmu].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[tmu].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[tmu].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[tmu].InvertAlpha = FXFALSE;
+      }
+      fxMesa->tmuSrc = TDFX_TMU_SPLIT;
+   }
+   else {
+      if (tmu != TDFX_TMU1) {
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+         if (fxMesa->haveTwoTMUs) {
+            fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_ZERO;
+            fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+            fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_ZERO;
+            fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+            fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+            fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+         }
+         fxMesa->tmuSrc = TDFX_TMU0;
+      }
+      else {
+         fxMesa->TexCombine[1].FunctionRGB = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorRGB = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].FunctionAlpha = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->TexCombine[1].FactorAlpha = GR_COMBINE_FACTOR_NONE;
+         fxMesa->TexCombine[1].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[1].InvertAlpha = FXFALSE;
+         /* GR_COMBINE_FUNCTION_SCALE_OTHER doesn't work ?!? */
+         fxMesa->TexCombine[0].FunctionRGB = GR_COMBINE_FUNCTION_BLEND;
+         fxMesa->TexCombine[0].FactorRGB = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].FunctionAlpha = GR_COMBINE_FUNCTION_BLEND;
+         fxMesa->TexCombine[0].FactorAlpha = GR_COMBINE_FACTOR_ONE;
+         fxMesa->TexCombine[0].InvertRGB = FXFALSE;
+         fxMesa->TexCombine[0].InvertAlpha = FXFALSE;
+         fxMesa->tmuSrc = TDFX_TMU1;
+      }
+   }
+
+   fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_ENV;
+}
+
+#if 0
+static void print_state(tdfxContextPtr fxMesa)
+{
+   GLcontext *ctx = fxMesa->glCtx;
+   struct gl_texture_object *tObj0 = ctx->Texture.Unit[0]._Current;
+   struct gl_texture_object *tObj1 = ctx->Texture.Unit[1]._Current;
+   GLenum base0 = tObj0->Image[0][tObj0->BaseLevel] ? tObj0->Image[0][tObj0->BaseLevel]->Format : 99;
+   GLenum base1 = tObj1->Image[0][tObj1->BaseLevel] ? tObj1->Image[0][tObj1->BaseLevel]->Format : 99;
+
+   printf("Unit 0: Enabled:  GL=%d   Gr=%d\n", ctx->Texture.Unit[0]._ReallyEnabled,
+          fxMesa->TexState.Enabled[0]);
+   printf("   EnvMode: GL=0x%x  Gr=0x%x\n", ctx->Texture.Unit[0].EnvMode,
+          fxMesa->TexState.EnvMode[0]);
+   printf("   BaseFmt: GL=0x%x  Gr=0x%x\n", base0, fxMesa->TexState.TexFormat[0]);
+
+
+   printf("Unit 1: Enabled:  GL=%d  Gr=%d\n", ctx->Texture.Unit[1]._ReallyEnabled,
+          fxMesa->TexState.Enabled[1]);
+   printf("   EnvMode: GL=0x%x  Gr:0x%x\n", ctx->Texture.Unit[1].EnvMode,
+          fxMesa->TexState.EnvMode[1]);
+   printf("   BaseFmt: GL=0x%x  Gr:0x%x\n", base1, fxMesa->TexState.TexFormat[1]);
+}
+#endif
+
+/*
+ * When we're only using a single texture unit, we always use the 0th
+ * Glide/hardware unit, regardless if it's GL_TEXTURE0_ARB or GL_TEXTURE1_ARB
+ * that's enalbed.
+ * Input:  ctx - the context
+ *         unit - the OpenGL texture unit to use.
+ */
+static void setupTextureSingleTMU(GLcontext * ctx, GLuint unit)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxTexInfo *ti;
+   struct gl_texture_object *tObj;
+   int tmu;
+   GLenum envMode, baseFormat;
+
+   tObj = ctx->Texture.Unit[unit]._Current;
+   if (tObj->Image[0][tObj->BaseLevel]->Border > 0) {
+      FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_BORDER, GL_TRUE);
+      return;
+   }
+
+   setupSingleTMU(fxMesa, tObj);
+
+   ti = TDFX_TEXTURE_DATA(tObj);
+   if (ti->whichTMU == TDFX_TMU_BOTH)
+      tmu = TDFX_TMU0;
+   else
+      tmu = ti->whichTMU;
+
+   if (fxMesa->tmuSrc != tmu) {
+      selectSingleTMUSrc(fxMesa, tmu, ti->LODblend);
+   }
+
+   if (ti->reloadImages)
+      fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_IMAGES;
+
+   /* Check if we really need to update the texenv state */
+   envMode = ctx->Texture.Unit[unit].EnvMode;
+   baseFormat = tObj->Image[0][tObj->BaseLevel]->_BaseFormat;
+
+   if (TDFX_IS_NAPALM(fxMesa)) {
+      /* see if we really need to update the unit */
+      if (1/*fxMesa->TexState.Enabled[unit] != ctx->Texture.Unit[unit]._ReallyEnabled ||
+          envMode != fxMesa->TexState.EnvMode[0] ||
+          envMode == GL_COMBINE_EXT ||
+          baseFormat != fxMesa->TexState.TexFormat[0]*/) {
+         struct tdfx_texcombine_ext *otherEnv;
+         if (!SetupTexEnvNapalm(ctx, GL_TRUE,
+                                &ctx->Texture.Unit[unit], baseFormat,
+                                &fxMesa->TexCombineExt[0])) {
+            /* software fallback */
+            FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_ENV, GL_TRUE);
+         }
+         /* disable other unit */
+         otherEnv = &fxMesa->TexCombineExt[1];
+         otherEnv->Color.SourceA = GR_CMBX_ZERO;
+         otherEnv->Color.ModeA = GR_FUNC_MODE_ZERO;
+         otherEnv->Color.SourceB = GR_CMBX_ZERO;
+         otherEnv->Color.ModeB = GR_FUNC_MODE_ZERO;
+         otherEnv->Color.SourceC = GR_CMBX_ZERO;
+         otherEnv->Color.InvertC = FXFALSE;
+         otherEnv->Color.SourceD = GR_CMBX_ZERO;
+         otherEnv->Color.InvertD = FXFALSE;
+         otherEnv->Color.Shift = 0;
+         otherEnv->Color.Invert = FXFALSE;
+         otherEnv->Alpha.SourceA = GR_CMBX_ITALPHA;
+         otherEnv->Alpha.ModeA = GR_FUNC_MODE_ZERO;
+         otherEnv->Alpha.SourceB = GR_CMBX_ITALPHA;
+         otherEnv->Alpha.ModeB = GR_FUNC_MODE_ZERO;
+         otherEnv->Alpha.SourceC = GR_CMBX_ZERO;
+         otherEnv->Alpha.InvertC = FXFALSE;
+         otherEnv->Alpha.SourceD = GR_CMBX_ZERO;
+         otherEnv->Alpha.InvertD = FXFALSE;
+         otherEnv->Alpha.Shift = 0;
+         otherEnv->Alpha.Invert = FXFALSE;
+
+#if 0/*JJJ*/
+         fxMesa->TexState.Enabled[unit] = ctx->Texture.Unit[unit]._ReallyEnabled;
+         fxMesa->TexState.EnvMode[0] = envMode;
+         fxMesa->TexState.TexFormat[0] = baseFormat;
+         fxMesa->TexState.EnvMode[1] = 0;
+         fxMesa->TexState.TexFormat[1] = 0;
+#endif
+      }
+   }
+   else {
+      /* Voodoo3 */
+
+      /* see if we really need to update the unit */
+      if (1/*fxMesa->TexState.Enabled[unit] != ctx->Texture.Unit[unit]._ReallyEnabled ||
+          envMode != fxMesa->TexState.EnvMode[0] ||
+          envMode == GL_COMBINE_EXT ||
+          baseFormat != fxMesa->TexState.TexFormat[0]*/) {
+         if (!SetupSingleTexEnvVoodoo3(ctx, unit, envMode, baseFormat)) {
+            /* software fallback */
+            FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_ENV, GL_TRUE);
+         }
+#if 0/*JJJ*/
+         fxMesa->TexState.Enabled[unit] = ctx->Texture.Unit[unit]._ReallyEnabled;
+         fxMesa->TexState.EnvMode[0] = envMode;
+         fxMesa->TexState.TexFormat[0] = baseFormat;
+         fxMesa->TexState.EnvMode[1] = 0;
+         fxMesa->TexState.TexFormat[1] = 0;
+#endif
+      }
+   }
+}
+
+
+static void
+setupDoubleTMU(tdfxContextPtr fxMesa,
+               struct gl_texture_object *tObj0,
+               struct gl_texture_object *tObj1)
+{
+#define T0_NOT_IN_TMU  0x01
+#define T1_NOT_IN_TMU  0x02
+#define T0_IN_TMU0     0x04
+#define T1_IN_TMU0     0x08
+#define T0_IN_TMU1     0x10
+#define T1_IN_TMU1     0x20
+
+    const struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    const struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+    const GLcontext *ctx = fxMesa->glCtx;
+    tdfxTexInfo *ti0 = TDFX_TEXTURE_DATA(tObj0);
+    tdfxTexInfo *ti1 = TDFX_TEXTURE_DATA(tObj1);
+    GLuint tstate = 0;
+    int tmu0 = 0, tmu1 = 1;
+
+    if (shared->umaTexMemory) {
+       if (!ti0->isInTM) {
+          tdfxTMMoveInTM_NoLock(fxMesa, tObj0, TDFX_TMU0);
+          assert(ti0->isInTM);
+       }
+       if (!ti1->isInTM) {
+          tdfxTMMoveInTM_NoLock(fxMesa, tObj1, TDFX_TMU0);
+          assert(ti1->isInTM);
+       }
+    }
+    else {
+       /* We shouldn't need to do this. There is something wrong with
+          multitexturing when the TMUs are swapped. So, we're forcing
+          them to always be loaded correctly. !!! */
+       if (ti0->whichTMU == TDFX_TMU1)
+           tdfxTMMoveOutTM_NoLock(fxMesa, tObj0);
+       if (ti1->whichTMU == TDFX_TMU0)
+           tdfxTMMoveOutTM_NoLock(fxMesa, tObj1);
+
+       if (ti0->isInTM) {
+           switch (ti0->whichTMU) {
+           case TDFX_TMU0:
+               tstate |= T0_IN_TMU0;
+               break;
+           case TDFX_TMU1:
+               tstate |= T0_IN_TMU1;
+               break;
+           case TDFX_TMU_BOTH:
+               tstate |= T0_IN_TMU0 | T0_IN_TMU1;
+               break;
+           case TDFX_TMU_SPLIT:
+               tstate |= T0_NOT_IN_TMU;
+               break;
+           }
+       }
+       else
+           tstate |= T0_NOT_IN_TMU;
+
+       if (ti1->isInTM) {
+           switch (ti1->whichTMU) {
+           case TDFX_TMU0:
+               tstate |= T1_IN_TMU0;
+               break;
+           case TDFX_TMU1:
+               tstate |= T1_IN_TMU1;
+               break;
+           case TDFX_TMU_BOTH:
+               tstate |= T1_IN_TMU0 | T1_IN_TMU1;
+               break;
+           case TDFX_TMU_SPLIT:
+               tstate |= T1_NOT_IN_TMU;
+               break;
+           }
+       }
+       else
+           tstate |= T1_NOT_IN_TMU;
+
+       /* Move texture maps into TMUs */
+
+       if (!(((tstate & T0_IN_TMU0) && (tstate & T1_IN_TMU1)) ||
+             ((tstate & T0_IN_TMU1) && (tstate & T1_IN_TMU0)))) {
+           if (tObj0 == tObj1) {
+              tdfxTMMoveInTM_NoLock(fxMesa, tObj1, TDFX_TMU_BOTH);
+           }
+           else {
+               /* Find the minimal way to correct the situation */
+               if ((tstate & T0_IN_TMU0) || (tstate & T1_IN_TMU1)) {
+                   /* We have one in the standard order, setup the other */
+                   if (tstate & T0_IN_TMU0) {
+                      /* T0 is in TMU0, put T1 in TMU1 */
+                      tdfxTMMoveInTM_NoLock(fxMesa, tObj1, TDFX_TMU1);
+                   }
+                   else {
+                       tdfxTMMoveInTM_NoLock(fxMesa, tObj0, TDFX_TMU0);
+                   }
+                   /* tmu0 and tmu1 are setup */
+               }
+               else if ((tstate & T0_IN_TMU1) || (tstate & T1_IN_TMU0)) {
+                   /* we have one in the reverse order, setup the other */
+                   if (tstate & T1_IN_TMU0) {
+                      /* T1 is in TMU0, put T0 in TMU1 */
+                      tdfxTMMoveInTM_NoLock(fxMesa, tObj0, TDFX_TMU1);
+                   }
+                   else {
+                       tdfxTMMoveInTM_NoLock(fxMesa, tObj1, TDFX_TMU0);
+                   }
+                   tmu0 = 1;
+                   tmu1 = 0;
+               }
+               else {              /* Nothing is loaded */
+                   tdfxTMMoveInTM_NoLock(fxMesa, tObj0, TDFX_TMU0);
+                   tdfxTMMoveInTM_NoLock(fxMesa, tObj1, TDFX_TMU1);
+                   /* tmu0 and tmu1 are setup */
+               }
+           }
+       }
+    }
+
+    ti0->lastTimeUsed = fxMesa->texBindNumber;
+    ti1->lastTimeUsed = fxMesa->texBindNumber;
+
+
+    if (!ctx->Texture.SharedPalette) {
+        if (ti0->info.format == GR_TEXFMT_P_8) {
+            fxMesa->TexPalette.Type = ti0->paltype;
+            fxMesa->TexPalette.Data = &(ti0->palette);
+            fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PALETTE;
+        }
+        else if (ti1->info.format == GR_TEXFMT_P_8) {
+            fxMesa->TexPalette.Type = ti1->paltype;
+            fxMesa->TexPalette.Data = &(ti1->palette);
+            fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PALETTE;
+        }
+        else {
+            fxMesa->TexPalette.Data = NULL;
+        }
+    }
+
+    /*
+     * Setup Unit 0
+     */
+    assert(ti0->isInTM);
+    assert(ti0->tm[tmu0]);
+    fxMesa->TexSource[tmu0].StartAddress = ti0->tm[tmu0]->startAddr;
+    fxMesa->TexSource[tmu0].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+    fxMesa->TexSource[tmu0].Info = &(ti0->info);
+    fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_SOURCE;
+
+    if (fxMesa->TexParams[tmu0].sClamp != ti0->sClamp ||
+        fxMesa->TexParams[tmu0].tClamp != ti0->tClamp ||
+        fxMesa->TexParams[tmu0].minFilt != ti0->minFilt ||
+        fxMesa->TexParams[tmu0].magFilt != ti0->magFilt ||
+        fxMesa->TexParams[tmu0].mmMode != ti0->mmMode ||
+        fxMesa->TexParams[tmu0].LODblend != FXFALSE ||
+        fxMesa->TexParams[tmu0].LodBias != ctx->Texture.Unit[tmu0].LodBias) {
+       fxMesa->TexParams[tmu0].sClamp = ti0->sClamp;
+       fxMesa->TexParams[tmu0].tClamp = ti0->tClamp;
+       fxMesa->TexParams[tmu0].minFilt = ti0->minFilt;
+       fxMesa->TexParams[tmu0].magFilt = ti0->magFilt;
+       fxMesa->TexParams[tmu0].mmMode = ti0->mmMode;
+       fxMesa->TexParams[tmu0].LODblend = FXFALSE;
+       fxMesa->TexParams[tmu0].LodBias = ctx->Texture.Unit[tmu0].LodBias;
+       fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PARAMS;
+    }
+
+    /*
+     * Setup Unit 1
+     */
+    if (shared->umaTexMemory) {
+        ASSERT(ti1->isInTM);
+        ASSERT(ti1->tm[0]);
+        fxMesa->TexSource[tmu1].StartAddress = ti1->tm[0]->startAddr;
+        fxMesa->TexSource[tmu1].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+        fxMesa->TexSource[tmu1].Info = &(ti1->info);
+    }
+    else {
+        ASSERT(ti1->isInTM);
+        ASSERT(ti1->tm[tmu1]);
+        fxMesa->TexSource[tmu1].StartAddress = ti1->tm[tmu1]->startAddr;
+        fxMesa->TexSource[tmu1].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+        fxMesa->TexSource[tmu1].Info = &(ti1->info);
+    }
+
+    if (fxMesa->TexParams[tmu1].sClamp != ti1->sClamp ||
+        fxMesa->TexParams[tmu1].tClamp != ti1->tClamp ||
+        fxMesa->TexParams[tmu1].minFilt != ti1->minFilt ||
+        fxMesa->TexParams[tmu1].magFilt != ti1->magFilt ||
+        fxMesa->TexParams[tmu1].mmMode != ti1->mmMode ||
+        fxMesa->TexParams[tmu1].LODblend != FXFALSE ||
+        fxMesa->TexParams[tmu1].LodBias != ctx->Texture.Unit[tmu1].LodBias) {
+       fxMesa->TexParams[tmu1].sClamp = ti1->sClamp;
+       fxMesa->TexParams[tmu1].tClamp = ti1->tClamp;
+       fxMesa->TexParams[tmu1].minFilt = ti1->minFilt;
+       fxMesa->TexParams[tmu1].magFilt = ti1->magFilt;
+       fxMesa->TexParams[tmu1].mmMode = ti1->mmMode;
+       fxMesa->TexParams[tmu1].LODblend = FXFALSE;
+       fxMesa->TexParams[tmu1].LodBias = ctx->Texture.Unit[tmu1].LodBias;
+       fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PARAMS;
+    }
+
+    fxMesa->sScale0 = ti0->sScale;
+    fxMesa->tScale0 = ti0->tScale;
+    fxMesa->sScale1 = ti1->sScale;
+    fxMesa->tScale1 = ti1->tScale;
+
+#undef T0_NOT_IN_TMU
+#undef T1_NOT_IN_TMU
+#undef T0_IN_TMU0
+#undef T1_IN_TMU0
+#undef T0_IN_TMU1
+#undef T1_IN_TMU1
+}
+
+static void setupTextureDoubleTMU(GLcontext * ctx)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   struct gl_texture_object *tObj0 = ctx->Texture.Unit[1]._Current;
+   struct gl_texture_object *tObj1 = ctx->Texture.Unit[0]._Current;
+   tdfxTexInfo *ti0 = TDFX_TEXTURE_DATA(tObj0);
+   tdfxTexInfo *ti1 = TDFX_TEXTURE_DATA(tObj1);
+   struct gl_texture_image *baseImage0 = tObj0->Image[0][tObj0->BaseLevel];
+   struct gl_texture_image *baseImage1 = tObj1->Image[0][tObj1->BaseLevel];
+#if 0/*JJJ*/
+   const GLenum envMode0 = ctx->Texture.Unit[0].EnvMode;
+   const GLenum envMode1 = ctx->Texture.Unit[1].EnvMode;
+#endif
+
+   if (baseImage0->Border > 0 || baseImage1->Border > 0) {
+      FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_BORDER, GL_TRUE);
+      return;
+   }
+
+   setupDoubleTMU(fxMesa, tObj0, tObj1);
+
+   if (ti0->reloadImages || ti1->reloadImages)
+      fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_IMAGES;
+
+   fxMesa->tmuSrc = TDFX_TMU_BOTH;
+
+   if (TDFX_IS_NAPALM(fxMesa)) {
+      /* Remember, Glide has its texture units numbered in backward
+       * order compared to OpenGL.
+       */
+      GLboolean hw1 = GL_TRUE, hw2 = GL_TRUE;
+
+      /* check if we really need to update glide unit 1 */
+      if (1/*fxMesa->TexState.Enabled[0] != ctx->Texture.Unit[0]._ReallyEnabled ||
+          envMode0 != fxMesa->TexState.EnvMode[1] ||
+          envMode0 == GL_COMBINE_EXT ||
+          baseImage0->Format != fxMesa->TexState.TexFormat[1] ||
+          (fxMesa->Fallback & TDFX_FALLBACK_TEXTURE_ENV)*/) {
+         hw1 = SetupTexEnvNapalm(ctx, GL_TRUE, &ctx->Texture.Unit[0],
+                                baseImage0->_BaseFormat, &fxMesa->TexCombineExt[1]);
+#if 0/*JJJ*/
+         fxMesa->TexState.EnvMode[1] = envMode0;
+         fxMesa->TexState.TexFormat[1] = baseImage0->_BaseFormat;
+         fxMesa->TexState.Enabled[0] = ctx->Texture.Unit[0]._ReallyEnabled;
+#endif
+      }
+
+      /* check if we really need to update glide unit 0 */
+      if (1/*fxMesa->TexState.Enabled[1] != ctx->Texture.Unit[1]._ReallyEnabled ||
+          envMode1 != fxMesa->TexState.EnvMode[0] ||
+          envMode1 == GL_COMBINE_EXT ||
+          baseImage1->_BaseFormat != fxMesa->TexState.TexFormat[0] ||
+          (fxMesa->Fallback & TDFX_FALLBACK_TEXTURE_ENV)*/) {
+         hw2 = SetupTexEnvNapalm(ctx, GL_FALSE, &ctx->Texture.Unit[1],
+                                baseImage1->_BaseFormat, &fxMesa->TexCombineExt[0]);
+#if 0/*JJJ*/
+         fxMesa->TexState.EnvMode[0] = envMode1;
+         fxMesa->TexState.TexFormat[0] = baseImage1->_BaseFormat;
+         fxMesa->TexState.Enabled[1] = ctx->Texture.Unit[1]._ReallyEnabled;
+#endif
+      }
+
+
+      if (!hw1 || !hw2) {
+         FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_ENV, GL_TRUE);
+      }
+   }
+   else {
+      int unit0, unit1;
+      if ((ti0->whichTMU == TDFX_TMU1) || (ti1->whichTMU == TDFX_TMU0))
+         unit0 = 1;
+      else
+         unit0 = 0;
+      unit1 = 1 - unit0;
+
+      if (1/*fxMesa->TexState.Enabled[0] != ctx->Texture.Unit[0]._ReallyEnabled ||
+          fxMesa->TexState.Enabled[1] != ctx->Texture.Unit[1]._ReallyEnabled ||
+          envMode0 != fxMesa->TexState.EnvMode[unit0] ||
+          envMode0 == GL_COMBINE_EXT ||
+          envMode1 != fxMesa->TexState.EnvMode[unit1] ||
+          envMode1 == GL_COMBINE_EXT ||
+          baseImage0->_BaseFormat != fxMesa->TexState.TexFormat[unit0] ||
+          baseImage1->_BaseFormat != fxMesa->TexState.TexFormat[unit1] ||
+          (fxMesa->Fallback & TDFX_FALLBACK_TEXTURE_ENV)*/) {
+
+         if (!SetupDoubleTexEnvVoodoo3(ctx, unit0,
+                         ctx->Texture.Unit[0].EnvMode, baseImage0->_BaseFormat,
+                         ctx->Texture.Unit[1].EnvMode, baseImage1->_BaseFormat)) {
+            FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_ENV, GL_TRUE);
+         }
+
+#if 0/*JJJ*/
+         fxMesa->TexState.EnvMode[unit0] = envMode0;
+         fxMesa->TexState.TexFormat[unit0] = baseImage0->_BaseFormat;
+         fxMesa->TexState.EnvMode[unit1] = envMode1;
+         fxMesa->TexState.TexFormat[unit1] = baseImage1->_BaseFormat;
+         fxMesa->TexState.Enabled[0] = ctx->Texture.Unit[0]._ReallyEnabled;
+         fxMesa->TexState.Enabled[1] = ctx->Texture.Unit[1]._ReallyEnabled;
+#endif
+      }
+   }
+}
+
+
+void
+tdfxUpdateTextureState( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_BORDER, GL_FALSE);
+   FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_ENV, GL_FALSE);
+
+   if (ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT) &&
+       ctx->Texture.Unit[1]._ReallyEnabled == 0) {
+      LOCK_HARDWARE( fxMesa );  /* XXX remove locking eventually */
+      setupTextureSingleTMU(ctx, 0);
+      UNLOCK_HARDWARE( fxMesa );
+   }
+   else if (ctx->Texture.Unit[0]._ReallyEnabled == 0 && 
+            ctx->Texture.Unit[1]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) {
+      LOCK_HARDWARE( fxMesa );
+      setupTextureSingleTMU(ctx, 1);
+      UNLOCK_HARDWARE( fxMesa );
+   }
+   else if (ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT) &&
+            ctx->Texture.Unit[1]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) {
+      LOCK_HARDWARE( fxMesa );
+      setupTextureDoubleTMU(ctx);
+      UNLOCK_HARDWARE( fxMesa );
+   }
+   else {
+      /* disable hardware texturing */
+      if (TDFX_IS_NAPALM(fxMesa)) {
+         fxMesa->ColorCombineExt.SourceA = GR_CMBX_ITRGB;
+         fxMesa->ColorCombineExt.ModeA = GR_FUNC_MODE_X;
+         fxMesa->ColorCombineExt.SourceB = GR_CMBX_ZERO;
+         fxMesa->ColorCombineExt.ModeB = GR_FUNC_MODE_ZERO;
+         fxMesa->ColorCombineExt.SourceC = GR_CMBX_ZERO;
+         fxMesa->ColorCombineExt.InvertC = FXTRUE;
+         fxMesa->ColorCombineExt.SourceD = GR_CMBX_ZERO;
+         fxMesa->ColorCombineExt.InvertD = FXFALSE;
+         fxMesa->ColorCombineExt.Shift = 0;
+         fxMesa->ColorCombineExt.Invert = FXFALSE;
+         fxMesa->AlphaCombineExt.SourceA = GR_CMBX_ITALPHA;
+         fxMesa->AlphaCombineExt.ModeA = GR_FUNC_MODE_X;
+         fxMesa->AlphaCombineExt.SourceB = GR_CMBX_ZERO;
+         fxMesa->AlphaCombineExt.ModeB = GR_FUNC_MODE_ZERO;
+         fxMesa->AlphaCombineExt.SourceC = GR_CMBX_ZERO;
+         fxMesa->AlphaCombineExt.InvertC = FXTRUE;
+         fxMesa->AlphaCombineExt.SourceD = GR_CMBX_ZERO;
+         fxMesa->AlphaCombineExt.InvertD = FXFALSE;
+         fxMesa->AlphaCombineExt.Shift = 0;
+         fxMesa->AlphaCombineExt.Invert = FXFALSE;
+      }
+      else {
+         /* Voodoo 3*/
+         fxMesa->ColorCombine.Function = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->ColorCombine.Factor = GR_COMBINE_FACTOR_NONE;
+         fxMesa->ColorCombine.Local = GR_COMBINE_LOCAL_ITERATED;
+         fxMesa->ColorCombine.Other = GR_COMBINE_OTHER_NONE;
+         fxMesa->ColorCombine.Invert = FXFALSE;
+         fxMesa->AlphaCombine.Function = GR_COMBINE_FUNCTION_LOCAL;
+         fxMesa->AlphaCombine.Factor = GR_COMBINE_FACTOR_NONE;
+         fxMesa->AlphaCombine.Local = GR_COMBINE_LOCAL_ITERATED;
+         fxMesa->AlphaCombine.Other = GR_COMBINE_OTHER_NONE;
+         fxMesa->AlphaCombine.Invert = FXFALSE;
+      }
+
+      fxMesa->TexState.Enabled[0] = 0;
+      fxMesa->TexState.Enabled[1] = 0;
+      fxMesa->TexState.EnvMode[0] = 0;
+      fxMesa->TexState.EnvMode[1] = 0;
+
+      fxMesa->dirty |= TDFX_UPLOAD_COLOR_COMBINE;
+      fxMesa->dirty |= TDFX_UPLOAD_ALPHA_COMBINE;
+
+      if (ctx->Texture.Unit[0]._ReallyEnabled != 0 ||
+          ctx->Texture.Unit[1]._ReallyEnabled != 0) {
+         /* software texture (cube map, rect tex, etc */
+         FALLBACK(fxMesa, TDFX_FALLBACK_TEXTURE_ENV, GL_TRUE);
+      }
+   }
+}
+
+
+
+/*
+ * This is a special case of texture state update.
+ * It's used when we've simply bound a new texture to a texture
+ * unit and the new texture has the exact same attributes as the
+ * previously bound texture.
+ * This is very common in Quake3.
+ */
+void
+tdfxUpdateTextureBinding( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   struct gl_texture_object *tObj0 = ctx->Texture.Unit[0]._Current;
+   struct gl_texture_object *tObj1 = ctx->Texture.Unit[1]._Current;
+   tdfxTexInfo *ti0 = TDFX_TEXTURE_DATA(tObj0);
+   tdfxTexInfo *ti1 = TDFX_TEXTURE_DATA(tObj1);
+
+    const struct gl_shared_state *mesaShared = fxMesa->glCtx->Shared;
+    const struct tdfxSharedState *shared = (struct tdfxSharedState *) mesaShared->DriverData;
+
+   if (ti0) {
+      fxMesa->sScale0 = ti0->sScale;
+      fxMesa->tScale0 = ti0->tScale;
+      if (ti0->info.format == GR_TEXFMT_P_8) {
+         fxMesa->TexPalette.Type = ti0->paltype;
+         fxMesa->TexPalette.Data = &(ti0->palette);
+         fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PALETTE;
+      }
+      else if (ti1 && ti1->info.format == GR_TEXFMT_P_8) {
+         fxMesa->TexPalette.Type = ti1->paltype;
+         fxMesa->TexPalette.Data = &(ti1->palette);
+         fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_PALETTE;
+      }
+   }
+   if (ti1) {
+      fxMesa->sScale1 = ti1->sScale;
+      fxMesa->tScale1 = ti1->tScale;
+   }
+
+   if (ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT) &&
+       ctx->Texture.Unit[0]._ReallyEnabled == 0) {
+      /* Only unit 0 2D enabled */
+      if (shared->umaTexMemory) {
+         assert(ti0);
+         fxMesa->TexSource[0].StartAddress = ti0->tm[0]->startAddr;
+         fxMesa->TexSource[0].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+         fxMesa->TexSource[0].Info = &(ti0->info);
+      }
+      else {
+         assert(ti0);
+         if (ti0->LODblend && ti0->whichTMU == TDFX_TMU_SPLIT) {
+            fxMesa->TexSource[0].StartAddress = ti0->tm[TDFX_TMU0]->startAddr;
+            fxMesa->TexSource[0].EvenOdd = GR_MIPMAPLEVELMASK_ODD;
+            fxMesa->TexSource[0].Info = &(ti0->info);
+            fxMesa->TexSource[1].StartAddress = ti0->tm[TDFX_TMU1]->startAddr;
+            fxMesa->TexSource[1].EvenOdd = GR_MIPMAPLEVELMASK_EVEN;
+            fxMesa->TexSource[1].Info = &(ti0->info);
+         }
+         else {
+            FxU32 tmu;
+            if (ti0->whichTMU == TDFX_TMU_BOTH)
+               tmu = TDFX_TMU0;
+            else
+               tmu = ti0->whichTMU;
+            fxMesa->TexSource[0].Info = NULL;
+            fxMesa->TexSource[1].Info = NULL;
+            if (ti0->tm[tmu]) {
+               fxMesa->TexSource[tmu].StartAddress = ti0->tm[tmu]->startAddr;
+               fxMesa->TexSource[tmu].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+               fxMesa->TexSource[tmu].Info = &(ti0->info);
+            }
+         }
+      }
+   }
+   else if (ctx->Texture.Unit[0]._ReallyEnabled == 0 && 
+            ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) {
+      /* Only unit 1 2D enabled */
+      if (shared->umaTexMemory) {
+         fxMesa->TexSource[0].StartAddress = ti1->tm[0]->startAddr;
+         fxMesa->TexSource[0].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+         fxMesa->TexSource[0].Info = &(ti1->info);
+      }
+   }
+   else if (ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT) && 
+            ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT|TEXTURE_2D_BIT)) {
+      /* Both 2D enabled */
+      if (shared->umaTexMemory) {
+         const FxU32 tmu0 = 0, tmu1 = 1;
+
+         assert(ti0);
+         fxMesa->TexSource[tmu0].StartAddress = ti0->tm[0]->startAddr;
+         fxMesa->TexSource[tmu0].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+         fxMesa->TexSource[tmu0].Info = &(ti0->info);
+
+         assert(ti1);
+         fxMesa->TexSource[tmu1].StartAddress = ti1->tm[0]->startAddr;
+         fxMesa->TexSource[tmu1].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+         fxMesa->TexSource[tmu1].Info = &(ti1->info);
+      }
+      else {
+         const FxU32 tmu0 = 0, tmu1 = 1;
+
+         assert(ti0);
+         fxMesa->TexSource[tmu0].StartAddress = ti0->tm[tmu0]->startAddr;
+         fxMesa->TexSource[tmu0].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+         fxMesa->TexSource[tmu0].Info = &(ti0->info);
+
+         assert(ti1);
+         fxMesa->TexSource[tmu1].StartAddress = ti1->tm[tmu1]->startAddr;
+         fxMesa->TexSource[tmu1].EvenOdd = GR_MIPMAPLEVELMASK_BOTH;
+         fxMesa->TexSource[tmu1].Info = &(ti1->info);
+      }
+   }
+
+
+   fxMesa->dirty |= TDFX_UPLOAD_TEXTURE_SOURCE;
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_texstate.h b/src/mesa/drivers/dri/tdfx/tdfx_texstate.h
new file mode 100644
index 0000000000..0c5c4101ca
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_texstate.h
@@ -0,0 +1,43 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Original rewrite:
+ *	Gareth Hughes <gareth@valinux.com>, 29 Sep - 1 Oct 2000
+ *
+ * Authors:
+ *	Gareth Hughes <gareth@valinux.com>
+ *	Brian Paul <brianp@valinux.com>
+ *
+ */
+
+#ifndef __TDFX_TEXSTATE_H__
+#define __TDFX_TEXSTATE_H__
+
+extern void tdfxUpdateTextureState( GLcontext *ctx );
+extern void tdfxUpdateTextureBinding( GLcontext *ctx );
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_tris.c b/src/mesa/drivers/dri/tdfx/tdfx_tris.c
new file mode 100644
index 0000000000..d65833c20b
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_tris.c
@@ -0,0 +1,1291 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* New fixes:
+ *	Daniel Borca <dborca@users.sourceforge.net>, 19 Jul 2004
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "tdfx_tris.h"
+#include "tdfx_state.h"
+#include "tdfx_vb.h"
+#include "tdfx_lock.h"
+#include "tdfx_render.h"
+
+
+static void tdfxRasterPrimitive( GLcontext *ctx, GLenum prim );
+static void tdfxRenderPrimitive( GLcontext *ctx, GLenum prim );
+
+static GLenum reduced_prim[GL_POLYGON+1] = {
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+/***********************************************************************
+ *          Macros for t_dd_tritmp.h to draw basic primitives          *
+ ***********************************************************************/
+
+#define TRI( a, b, c )				\
+do {						\
+   if (DO_FALLBACK)				\
+      fxMesa->draw_triangle( fxMesa, a, b, c );	\
+   else						\
+      fxMesa->Glide.grDrawTriangle( a, b, c );	\
+} while (0)					\
+
+#define QUAD( a, b, c, d )			\
+do {						\
+   if (DO_FALLBACK) {				\
+      fxMesa->draw_triangle( fxMesa, a, b, d );	\
+      fxMesa->draw_triangle( fxMesa, b, c, d );	\
+   } else {					\
+      tdfxVertex *_v_[4];			\
+      _v_[0] = d;				\
+      _v_[1] = a;				\
+      _v_[2] = b;				\
+      _v_[3] = c;				\
+      fxMesa->Glide.grDrawVertexArray(GR_TRIANGLE_FAN, 4, _v_);\
+      /*fxMesa->Glide.grDrawTriangle( a, b, d );*/\
+      /*fxMesa->Glide.grDrawTriangle( b, c, d );*/\
+   }						\
+} while (0)
+
+#define LINE( v0, v1 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      fxMesa->draw_line( fxMesa, v0, v1 );	\
+   else {					\
+      v0->x += LINE_X_OFFSET - TRI_X_OFFSET;	\
+      v0->y += LINE_Y_OFFSET - TRI_Y_OFFSET;	\
+      v1->x += LINE_X_OFFSET - TRI_X_OFFSET;	\
+      v1->y += LINE_Y_OFFSET - TRI_Y_OFFSET;	\
+      fxMesa->Glide.grDrawLine( v0, v1 );	\
+      v0->x -= LINE_X_OFFSET - TRI_X_OFFSET;	\
+      v0->y -= LINE_Y_OFFSET - TRI_Y_OFFSET;	\
+      v1->x -= LINE_X_OFFSET - TRI_X_OFFSET;	\
+      v1->y -= LINE_Y_OFFSET - TRI_Y_OFFSET;	\
+   }						\
+} while (0)
+
+#define POINT( v0 )				\
+do {						\
+   if (DO_FALLBACK)				\
+      fxMesa->draw_point( fxMesa, v0 );		\
+   else {					\
+      v0->x += PNT_X_OFFSET - TRI_X_OFFSET;	\
+      v0->y += PNT_Y_OFFSET - TRI_Y_OFFSET;	\
+      fxMesa->Glide.grDrawPoint( v0 );		\
+      v0->x -= PNT_X_OFFSET - TRI_X_OFFSET;	\
+      v0->y -= PNT_Y_OFFSET - TRI_Y_OFFSET;	\
+   }						\
+} while (0)
+
+
+/***********************************************************************
+ *              Fallback to swrast for basic primitives                *
+ ***********************************************************************/
+
+/* Build an SWvertex from a hardware vertex. 
+ *
+ * This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.  
+ */
+static void 
+tdfx_translate_vertex( GLcontext *ctx, const tdfxVertex *src, SWvertex *dst)
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if (fxMesa->vertexFormat == TDFX_LAYOUT_TINY) {
+      dst->attrib[FRAG_ATTRIB_WPOS][0] = src->x - fxMesa->x_offset;
+      dst->attrib[FRAG_ATTRIB_WPOS][1] = src->y - (fxMesa->screen_height - fxMesa->height - fxMesa->y_offset);
+      dst->attrib[FRAG_ATTRIB_WPOS][2] = src->z;
+      dst->attrib[FRAG_ATTRIB_WPOS][3] = 1.0;
+
+      dst->color[0] = src->color[2];
+      dst->color[1] = src->color[1];
+      dst->color[2] = src->color[0];
+      dst->color[3] = src->color[3];
+   } 
+   else {
+      GLfloat w = 1.0 / src->rhw;
+
+      dst->attrib[FRAG_ATTRIB_WPOS][0] = src->x - fxMesa->x_offset;
+      dst->attrib[FRAG_ATTRIB_WPOS][1] = src->y - (fxMesa->screen_height - fxMesa->height - fxMesa->y_offset);
+      dst->attrib[FRAG_ATTRIB_WPOS][2] = src->z;
+      dst->attrib[FRAG_ATTRIB_WPOS][3] = src->rhw;
+
+      dst->color[0] = src->color[2];
+      dst->color[1] = src->color[1];
+      dst->color[2] = src->color[0];
+      dst->color[3] = src->color[3];
+
+      dst->attrib[FRAG_ATTRIB_TEX0][0] = 1.0 / fxMesa->sScale0 * w * src->tu0;
+      dst->attrib[FRAG_ATTRIB_TEX0][1] = 1.0 / fxMesa->tScale0 * w * src->tv0;
+      if (fxMesa->vertexFormat == TDFX_LAYOUT_PROJ1 || fxMesa->vertexFormat == TDFX_LAYOUT_PROJ2) {
+         dst->attrib[FRAG_ATTRIB_TEX0][3] = w * src->tq0;
+      } else {
+	 dst->attrib[FRAG_ATTRIB_TEX0][3] = 1.0;
+      }
+
+      if (fxMesa->SetupIndex & TDFX_TEX1_BIT) {
+         dst->attrib[FRAG_ATTRIB_TEX1][0] = 1.0 / fxMesa->sScale1 * w * src->tu1;
+         dst->attrib[FRAG_ATTRIB_TEX1][1] = 1.0 / fxMesa->tScale1 * w * src->tv1;
+         if (fxMesa->vertexFormat == TDFX_LAYOUT_PROJ2) {
+            dst->attrib[FRAG_ATTRIB_TEX1][3] = w * src->tq1;
+         } else {
+	    dst->attrib[FRAG_ATTRIB_TEX1][3] = 1.0;
+         }
+      }
+   }
+
+   dst->pointSize = ctx->Point.Size;
+}
+
+
+static void 
+tdfx_fallback_tri( tdfxContextPtr fxMesa, 
+		   tdfxVertex *v0, 
+		   tdfxVertex *v1, 
+		   tdfxVertex *v2 )
+{
+   GLcontext *ctx = fxMesa->glCtx;
+   SWvertex v[3];
+   tdfx_translate_vertex( ctx, v0, &v[0] );
+   tdfx_translate_vertex( ctx, v1, &v[1] );
+   tdfx_translate_vertex( ctx, v2, &v[2] );
+   _swrast_Triangle( ctx, &v[0], &v[1], &v[2] );
+}
+
+
+static void 
+tdfx_fallback_line( tdfxContextPtr fxMesa,
+		    tdfxVertex *v0,
+		    tdfxVertex *v1 )
+{
+   GLcontext *ctx = fxMesa->glCtx;
+   SWvertex v[2];
+   tdfx_translate_vertex( ctx, v0, &v[0] );
+   tdfx_translate_vertex( ctx, v1, &v[1] );
+   _swrast_Line( ctx, &v[0], &v[1] );
+}
+
+
+static void 
+tdfx_fallback_point( tdfxContextPtr fxMesa, 
+		     tdfxVertex *v0 )
+{
+   GLcontext *ctx = fxMesa->glCtx;
+   SWvertex v[1];
+   tdfx_translate_vertex( ctx, v0, &v[0] );
+   _swrast_Point( ctx, &v[0] );
+}
+
+/***********************************************************************
+ *                 Functions to draw basic primitives                  *
+ ***********************************************************************/
+
+static void tdfx_print_vertex( GLcontext *ctx, const tdfxVertex *v )
+{
+   tdfxContextPtr tmesa = TDFX_CONTEXT( ctx );
+
+   fprintf(stderr, "vertex at %p\n", (void *)v);
+
+   if (tmesa->vertexFormat == TDFX_LAYOUT_TINY) {
+      fprintf(stderr, "x %f y %f z %f\n", v->x, v->y, v->z);
+   } 
+   else {
+      fprintf(stderr, "x %f y %f z %f oow %f\n", 
+	      v->x, v->y, v->z, v->rhw);
+   }
+   fprintf(stderr, "r %d g %d b %d a %d\n", 
+	      v->color[0],
+	      v->color[1],
+	      v->color[2],
+	      v->color[3]);
+   
+   fprintf(stderr, "\n");
+}
+
+#define DO_FALLBACK 0
+
+/* Need to do clip loop at each triangle when mixing swrast and hw
+ * rendering.  These functions are only used when mixed-mode rendering
+ * is occurring.
+ */
+static void tdfx_draw_triangle( tdfxContextPtr fxMesa,
+				tdfxVertexPtr v0,
+				tdfxVertexPtr v1,
+				tdfxVertexPtr v2 )
+{
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+/*     tdfx_print_vertex( fxMesa->glCtx, v0 ); */
+/*     tdfx_print_vertex( fxMesa->glCtx, v1 ); */
+/*     tdfx_print_vertex( fxMesa->glCtx, v2 ); */
+   BEGIN_CLIP_LOOP_LOCKED(fxMesa) {
+      TRI( v0, v1, v2 );
+   } END_CLIP_LOOP_LOCKED(fxMesa);
+}
+
+static void tdfx_draw_line( tdfxContextPtr fxMesa,
+			    tdfxVertexPtr v0,
+			    tdfxVertexPtr v1 )
+{
+   /* No support for wide lines (avoid wide/aa line fallback).
+    */
+   BEGIN_CLIP_LOOP_LOCKED(fxMesa) {
+      LINE(v0, v1);
+   } END_CLIP_LOOP_LOCKED(fxMesa);
+}
+
+static void tdfx_draw_point( tdfxContextPtr fxMesa,
+			     tdfxVertexPtr v0 )
+{
+   /* No support for wide points.
+    */
+   BEGIN_CLIP_LOOP_LOCKED(fxMesa) {
+      POINT( v0 );
+   } END_CLIP_LOOP_LOCKED(fxMesa);
+}
+
+#undef DO_FALLBACK
+
+
+#define TDFX_UNFILLED_BIT    0x1
+#define TDFX_OFFSET_BIT	     0x2
+#define TDFX_TWOSIDE_BIT     0x4
+#define TDFX_FLAT_BIT        0x8
+#define TDFX_FALLBACK_BIT    0x10
+#define TDFX_MAX_TRIFUNC     0x20
+
+static struct {
+   tnl_points_func	        points;
+   tnl_line_func		line;
+   tnl_triangle_func	triangle;
+   tnl_quad_func		quad;
+} rast_tab[TDFX_MAX_TRIFUNC];
+
+#define DO_FALLBACK (IND & TDFX_FALLBACK_BIT)
+#define DO_OFFSET   (IND & TDFX_OFFSET_BIT)
+#define DO_UNFILLED (IND & TDFX_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & TDFX_TWOSIDE_BIT)
+#define DO_FLAT     (IND & TDFX_FLAT_BIT)
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC   0
+#define HAVE_HW_FLATSHADE 0
+#define HAVE_BACK_COLORS  0
+#define VERTEX tdfxVertex
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->x
+#define VERT_Y(_v) _v->y
+#define VERT_Z(_v) _v->z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (fxMesa->verts + (e))
+
+#define VERT_SET_RGBA( dst, f )			\
+do {						\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst->color[2], f[0]);\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst->color[1], f[1]);\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst->color[0], f[2]);\
+   UNCLAMPED_FLOAT_TO_UBYTE(dst->color[3], f[3]);\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) 		\
+   *(GLuint *)&v0->color = *(GLuint *)&v1->color
+
+#define VERT_SAVE_RGBA( idx )  			\
+   *(GLuint *)&color[idx] = *(GLuint *)&v[idx]->color
+
+#define VERT_RESTORE_RGBA( idx )		\
+   *(GLuint *)&v[idx]->color = *(GLuint *)&color[idx]
+
+#define LOCAL_VARS(n)					\
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);		\
+   GLubyte color[n][4];					\
+   (void) color;
+
+
+
+/***********************************************************************
+ *            Functions to draw basic unfilled primitives              *
+ ***********************************************************************/
+
+#define RASTERIZE(x) if (fxMesa->raster_primitive != reduced_prim[x]) \
+                        tdfxRasterPrimitive( ctx, reduced_prim[x] )
+#define RENDER_PRIMITIVE fxMesa->render_primitive
+#define IND TDFX_FALLBACK_BIT
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+/***********************************************************************
+ *                 Functions to draw GL primitives                     *
+ ***********************************************************************/
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT|TDFX_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT|TDFX_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_UNFILLED_BIT|TDFX_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT|TDFX_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_UNFILLED_BIT|TDFX_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT| \
+	     TDFX_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+/* Tdfx doesn't support provoking-vertex flat-shading?
+ */
+#define IND (TDFX_FLAT_BIT)
+#define TAG(x) x##_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_offset_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_UNFILLED_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_offset_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_UNFILLED_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT|TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_offset_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT|TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_UNFILLED_BIT|TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT|TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_offset_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_UNFILLED_BIT|TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (TDFX_TWOSIDE_BIT|TDFX_OFFSET_BIT|TDFX_UNFILLED_BIT| \
+	     TDFX_FALLBACK_BIT|TDFX_FLAT_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback_flat
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+   init();
+   init_offset();
+   init_twoside();
+   init_twoside_offset();
+   init_unfilled();
+   init_offset_unfilled();
+   init_twoside_unfilled();
+   init_twoside_offset_unfilled();
+   init_fallback();
+   init_offset_fallback();
+   init_twoside_fallback();
+   init_twoside_offset_fallback();
+   init_unfilled_fallback();
+   init_offset_unfilled_fallback();
+   init_twoside_unfilled_fallback();
+   init_twoside_offset_unfilled_fallback();
+
+   init_flat();
+   init_offset_flat();
+   init_twoside_flat();
+   init_twoside_offset_flat();
+   init_unfilled_flat();
+   init_offset_unfilled_flat();
+   init_twoside_unfilled_flat();
+   init_twoside_offset_unfilled_flat();
+   init_fallback_flat();
+   init_offset_fallback_flat();
+   init_twoside_fallback_flat();
+   init_twoside_offset_fallback_flat();
+   init_unfilled_fallback_flat();
+   init_offset_unfilled_fallback_flat();
+   init_twoside_unfilled_fallback_flat();
+   init_twoside_offset_unfilled_fallback_flat();
+}
+
+
+/**********************************************************************/
+/*                 Render whole begin/end objects                     */
+/**********************************************************************/
+
+
+/* Accelerate vertex buffer rendering when renderindex == 0 and
+ * there is no clipping.
+ */
+#define INIT(x) tdfxRenderPrimitive( ctx, x )
+
+static void tdfx_render_vb_points( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   GLint i;
+   (void) flags;
+
+   INIT(GL_POINTS);
+
+   /* Adjust point coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x += PNT_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y += PNT_Y_OFFSET - TRI_Y_OFFSET;
+   }
+
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_POINTS, count-start,
+                                              fxVB + start, sizeof(tdfxVertex));
+   /* restore point coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x -= PNT_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y -= PNT_Y_OFFSET - TRI_Y_OFFSET;
+   }
+}
+
+static void tdfx_render_vb_line_strip( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   GLint i;
+   (void) flags;
+
+   INIT(GL_LINE_STRIP);
+
+   /* adjust line coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x += LINE_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y += LINE_Y_OFFSET - TRI_Y_OFFSET;
+   }
+
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_LINE_STRIP, count-start,
+                                              fxVB + start, sizeof(tdfxVertex) );
+
+   /* restore line coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x -= LINE_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y -= LINE_Y_OFFSET - TRI_Y_OFFSET;
+   }
+}
+
+static void tdfx_render_vb_line_loop( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   GLint i;
+   GLint j = start;
+   (void) flags;
+
+   INIT(GL_LINE_LOOP);
+
+   if (!(flags & PRIM_BEGIN)) {
+      j++;
+   }
+
+   /* adjust line coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x += LINE_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y += LINE_Y_OFFSET - TRI_Y_OFFSET;
+   }
+
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_LINE_STRIP, count-j,
+                                              fxVB + j, sizeof(tdfxVertex));
+
+   if (flags & PRIM_END) 
+      fxMesa->Glide.grDrawLine( fxVB + (count - 1), 
+                                fxVB + start );
+
+   /* restore line coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x -= LINE_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y -= LINE_Y_OFFSET - TRI_Y_OFFSET;
+   }
+}
+
+static void tdfx_render_vb_lines( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   GLint i;
+   (void) flags;
+
+   INIT(GL_LINES);
+
+   /* adjust line coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x += LINE_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y += LINE_Y_OFFSET - TRI_Y_OFFSET;
+   }
+
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_LINES, count-start,
+                                              fxVB + start, sizeof(tdfxVertex));
+
+   /* restore line coords */
+   for (i = start; i < count; i++) {
+      fxVB[i].x -= LINE_X_OFFSET - TRI_X_OFFSET;
+      fxVB[i].y -= LINE_Y_OFFSET - TRI_Y_OFFSET;
+   }
+}
+
+static void tdfx_render_vb_triangles( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   (void) flags;
+
+   INIT(GL_TRIANGLES);
+
+#if 0
+   /* [dBorca]
+    * apparently, this causes troubles with some programs (GLExcess);
+    * might be a bug in Glide... However, "grDrawVertexArrayContiguous"
+    * eventually calls "grDrawTriangle" for GR_TRIANGLES, so we're better
+    * off doing it by hand...
+    */
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_TRIANGLES, count-start,
+                                              fxVB + start, sizeof(tdfxVertex));
+#else
+   {
+    GLuint j;
+    for (j=start+2; j<count; j+=3) {
+        fxMesa->Glide.grDrawTriangle(fxVB + (j-2), fxVB + (j-1), fxVB + j);
+    }
+   }
+#endif
+}
+
+
+static void tdfx_render_vb_tri_strip( GLcontext *ctx,
+				      GLuint start,
+				      GLuint count,
+				      GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   int mode;
+   (void) flags;
+
+   INIT(GL_TRIANGLE_STRIP);
+
+/*     fprintf(stderr, "%s/%d\n", __FUNCTION__, 1<<shift); */
+/*     if(!prevLockLine) abort(); */
+
+   mode = GR_TRIANGLE_STRIP;
+
+   fxMesa->Glide.grDrawVertexArrayContiguous( mode, count-start,
+                                              fxVB + start, sizeof(tdfxVertex));
+}
+
+
+static void tdfx_render_vb_tri_fan( GLcontext *ctx,
+				    GLuint start,
+				    GLuint count,
+				    GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   (void) flags;
+
+   INIT(GL_TRIANGLE_FAN);
+
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_TRIANGLE_FAN, count-start,
+                                              fxVB + start, sizeof(tdfxVertex) );
+}
+
+static void tdfx_render_vb_quads( GLcontext *ctx,
+				       GLuint start,
+				       GLuint count,
+				       GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   GLuint i;
+   (void) flags;
+
+   INIT(GL_QUADS);
+   
+   for (i = start + 3 ; i < count ; i += 4 ) {
+#define VERT(x) (fxVB + (x))
+      tdfxVertex *_v_[4];
+      _v_[0] = VERT(i);
+      _v_[1] = VERT(i-3);
+      _v_[2] = VERT(i-2);
+      _v_[3] = VERT(i-1);
+      fxMesa->Glide.grDrawVertexArray(GR_TRIANGLE_FAN, 4, _v_);
+      /*fxMesa->Glide.grDrawTriangle( VERT(i-3), VERT(i-2), VERT(i) );*/
+      /*fxMesa->Glide.grDrawTriangle( VERT(i-2), VERT(i-1), VERT(i) );*/
+#undef VERT
+   }
+}
+
+static void tdfx_render_vb_quad_strip( GLcontext *ctx,
+				       GLuint start,
+				       GLuint count,
+				       GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   (void) flags;
+
+   INIT(GL_QUAD_STRIP);
+
+   count -= (count-start)&1;
+
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_TRIANGLE_STRIP,
+                                              count-start, fxVB + start, sizeof(tdfxVertex));
+}
+
+static void tdfx_render_vb_poly( GLcontext *ctx,
+				 GLuint start,
+				 GLuint count,
+				 GLuint flags )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   tdfxVertex *fxVB = fxMesa->verts;
+   (void) flags;
+
+   INIT(GL_POLYGON);
+   
+   fxMesa->Glide.grDrawVertexArrayContiguous( GR_POLYGON, count-start,
+                                              fxVB + start, sizeof(tdfxVertex));
+}
+
+static void tdfx_render_vb_noop( GLcontext *ctx,
+				 GLuint start,
+				 GLuint count,
+				 GLuint flags )
+{
+   (void) (ctx && start && count && flags);
+}
+
+static void (*tdfx_render_tab_verts[GL_POLYGON+2])(GLcontext *,
+						   GLuint,
+						   GLuint,
+						   GLuint) = 
+{
+   tdfx_render_vb_points,
+   tdfx_render_vb_lines,
+   tdfx_render_vb_line_loop,
+   tdfx_render_vb_line_strip,
+   tdfx_render_vb_triangles,
+   tdfx_render_vb_tri_strip,
+   tdfx_render_vb_tri_fan,
+   tdfx_render_vb_quads,
+   tdfx_render_vb_quad_strip,
+   tdfx_render_vb_poly,
+   tdfx_render_vb_noop,
+};
+#undef INIT
+
+
+/**********************************************************************/
+/*            Render whole (indexed) begin/end objects                */
+/**********************************************************************/
+
+
+#define VERT(x) (tdfxVertex *)(vertptr + (x))
+
+#define RENDER_POINTS( start, count )		\
+   for ( ; start < count ; start++)		\
+      fxMesa->Glide.grDrawPoint( VERT(ELT(start)) );
+
+#define RENDER_LINE( v0, v1 ) \
+   fxMesa->Glide.grDrawLine( VERT(v0), VERT(v1) )
+
+#define RENDER_TRI( v0, v1, v2 )  \
+   fxMesa->Glide.grDrawTriangle( VERT(v0), VERT(v1), VERT(v2) )
+
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+   do {					\
+      tdfxVertex *_v_[4];		\
+      _v_[0] = VERT(v3);		\
+      _v_[1] = VERT(v0);		\
+      _v_[2] = VERT(v1);		\
+      _v_[3] = VERT(v2);		\
+      fxMesa->Glide.grDrawVertexArray(GR_TRIANGLE_FAN, 4, _v_);\
+      /*fxMesa->Glide.grDrawTriangle( VERT(v0), VERT(v1), VERT(v3) );*/\
+      /*fxMesa->Glide.grDrawTriangle( VERT(v1), VERT(v2), VERT(v3) );*/\
+   } while (0)
+
+#define INIT(x) tdfxRenderPrimitive( ctx, x )
+
+#undef LOCAL_VARS
+#define LOCAL_VARS						\
+    tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);			\
+    tdfxVertex *vertptr = fxMesa->verts;			\
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;	\
+    (void) elt;
+
+#define RESET_STIPPLE 
+#define RESET_OCCLUSION 
+#define PRESERVE_VB_DEFS
+
+/* Elts, no clipping.
+ */
+#undef ELT
+#undef TAG
+#define TAG(x) tdfx_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl_dd/t_dd_rendertmp.h"
+
+/* Verts, no clipping.
+ */
+#undef ELT
+#undef TAG
+#define TAG(x) tdfx_##x##_verts
+#define ELT(x) x
+/*#include "tnl_dd/t_dd_rendertmp.h"*/
+
+
+
+/**********************************************************************/
+/*                   Render clipped primitives                        */
+/**********************************************************************/
+
+
+
+static void tdfxRenderClippedPoly( GLcontext *ctx, const GLuint *elts, 
+				   GLuint n )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint prim = fxMesa->render_primitive;
+
+   /* Render the new vertices as an unclipped polygon. 
+    */
+   {
+      GLuint *tmp = VB->Elts;
+      VB->Elts = (GLuint *)elts;
+      tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+      VB->Elts = tmp;
+   }
+
+   /* Restore the render primitive
+    */
+   if (prim != GL_POLYGON)
+      tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+}
+
+static void tdfxRenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+
+static void tdfxFastRenderClippedPoly( GLcontext *ctx, const GLuint *elts, 
+				       GLuint n )
+{
+   int i;
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   tdfxVertex *vertptr = fxMesa->verts;
+   if (n == 3) {
+      fxMesa->Glide.grDrawTriangle( VERT(elts[0]), VERT(elts[1]), VERT(elts[2]) );
+   } else if (n <= 32) {
+      tdfxVertex *newvptr[32];
+      for (i = 0 ; i < n ; i++) {
+         newvptr[i] = VERT(elts[i]);
+      }
+      fxMesa->Glide.grDrawVertexArray(GR_TRIANGLE_FAN, n, newvptr);
+   } else {
+      const tdfxVertex *start = VERT(elts[0]);
+      for (i = 2 ; i < n ; i++) {
+         fxMesa->Glide.grDrawTriangle( start, VERT(elts[i-1]), VERT(elts[i]) );
+      }
+   }
+}
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+
+#define POINT_FALLBACK (DD_POINT_SMOOTH)
+#define LINE_FALLBACK (DD_LINE_STIPPLE)
+#define TRI_FALLBACK (DD_TRI_SMOOTH)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK|DD_TRI_STIPPLE)
+#define ANY_RASTER_FLAGS (DD_FLATSHADE|DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET| \
+			  DD_TRI_UNFILLED)
+
+
+/* All state referenced below:
+ */
+#define _TDFX_NEW_RENDERSTATE (_DD_NEW_POINT_SMOOTH |		\
+                               _DD_NEW_LINE_STIPPLE |		\
+                               _DD_NEW_TRI_SMOOTH |		\
+			       _DD_NEW_FLATSHADE |		\
+			       _DD_NEW_TRI_UNFILLED |		\
+			       _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			       _DD_NEW_TRI_OFFSET |		\
+			       _DD_NEW_TRI_STIPPLE |		\
+			       _NEW_POLYGONSTIPPLE)
+
+
+static void tdfxChooseRenderState(GLcontext *ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (0) {
+      fxMesa->draw_point = tdfx_draw_point;
+      fxMesa->draw_line = tdfx_draw_line;
+      fxMesa->draw_triangle = tdfx_draw_triangle;
+      index |= TDFX_FALLBACK_BIT;
+   }
+
+   if (flags & (ANY_FALLBACK_FLAGS|ANY_RASTER_FLAGS)) {
+      if (flags & ANY_RASTER_FLAGS) {
+	 if (flags & DD_TRI_LIGHT_TWOSIDE)    index |= TDFX_TWOSIDE_BIT;
+	 if (flags & DD_TRI_OFFSET)	      index |= TDFX_OFFSET_BIT;
+	 if (flags & DD_TRI_UNFILLED)	      index |= TDFX_UNFILLED_BIT;
+	 if (flags & DD_FLATSHADE)	      index |= TDFX_FLAT_BIT;
+      }
+
+      fxMesa->draw_point = tdfx_draw_point;
+      fxMesa->draw_line = tdfx_draw_line;
+      fxMesa->draw_triangle = tdfx_draw_triangle;
+
+      /* Hook in fallbacks for specific primitives.
+       *
+       * DD_TRI_UNFILLED is here because the unfilled_tri functions use
+       * fxMesa->draw_tri *always*, and thus can't use the multipass
+       * approach to cliprects.
+       *
+       */
+      if (flags & (POINT_FALLBACK|
+		   LINE_FALLBACK|
+		   TRI_FALLBACK|
+		   DD_TRI_STIPPLE|
+		   DD_TRI_UNFILLED))
+      {
+	 if (flags & POINT_FALLBACK)
+	    fxMesa->draw_point = tdfx_fallback_point;
+
+	 if (flags & LINE_FALLBACK)
+	    fxMesa->draw_line = tdfx_fallback_line;
+
+	 if (flags & TRI_FALLBACK)
+	    fxMesa->draw_triangle = tdfx_fallback_tri;
+
+	 if ((flags & DD_TRI_STIPPLE) && !fxMesa->haveHwStipple)
+	    fxMesa->draw_triangle = tdfx_fallback_tri;
+
+	 index |= TDFX_FALLBACK_BIT;
+      }
+   }
+
+   if (fxMesa->RenderIndex != index) {
+      fxMesa->RenderIndex = index;
+
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = tdfx_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = tdfx_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+	 tnl->Driver.Render.ClippedPolygon = tdfxFastRenderClippedPoly;
+      } else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = tdfxRenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = tdfxRenderClippedPoly;
+      }
+   }
+}
+
+/**********************************************************************/
+/*                Use multipass rendering for cliprects               */
+/**********************************************************************/
+
+
+
+/* TODO: Benchmark this.
+ * TODO: Use single back-buffer cliprect where possible.  
+ * NOTE: <pass> starts at 1, not zero!
+ */
+static GLboolean multipass_cliprect( GLcontext *ctx, GLuint pass )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   if (pass >= fxMesa->numClipRects)
+      return GL_FALSE;
+   else {   
+      fxMesa->Glide.grClipWindow(fxMesa->pClipRects[pass].x1,
+		   fxMesa->screen_height - fxMesa->pClipRects[pass].y2,
+		   fxMesa->pClipRects[pass].x2,
+		   fxMesa->screen_height - fxMesa->pClipRects[pass].y1);
+      
+      return GL_TRUE;
+   }
+}
+
+
+/**********************************************************************/
+/*                Runtime render state and callbacks                  */
+/**********************************************************************/
+
+static void tdfxRunPipeline( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if (fxMesa->new_state) {
+      tdfxDDUpdateHwState( ctx );
+   }
+
+   if (!fxMesa->Fallback && fxMesa->new_gl_state) {
+      if (fxMesa->new_gl_state & _TDFX_NEW_RASTERSETUP)
+	 tdfxChooseVertexState( ctx );
+      
+      if (fxMesa->new_gl_state & _TDFX_NEW_RENDERSTATE)
+	 tdfxChooseRenderState( ctx );
+      
+      fxMesa->new_gl_state = 0;
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+
+static void tdfxRenderStart( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   tdfxCheckTexSizes( ctx );
+
+   LOCK_HARDWARE(fxMesa);
+
+   /* Make sure vertex format changes get uploaded before we start
+    * sending triangles.  
+    */
+   if (fxMesa->dirty) {
+      tdfxEmitHwStateLocked( fxMesa );
+   }
+
+   if (fxMesa->numClipRects && !(fxMesa->RenderIndex & TDFX_FALLBACK_BIT)) {
+      fxMesa->Glide.grClipWindow(fxMesa->pClipRects[0].x1,
+		   fxMesa->screen_height - fxMesa->pClipRects[0].y2,
+		   fxMesa->pClipRects[0].x2,
+		   fxMesa->screen_height - fxMesa->pClipRects[0].y1);
+      if (fxMesa->numClipRects > 1)
+         tnl->Driver.Render.Multipass = multipass_cliprect;
+      else
+         tnl->Driver.Render.Multipass = NULL;
+   }
+   else
+      tnl->Driver.Render.Multipass = NULL;
+}
+
+
+
+/* Always called between RenderStart and RenderFinish --> We already
+ * hold the lock.
+ */
+static void tdfxRasterPrimitive( GLcontext *ctx, GLenum prim )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   FLUSH_BATCH( fxMesa );
+
+   fxMesa->raster_primitive = prim;
+
+   tdfxUpdateCull(ctx);
+   if ( fxMesa->dirty & TDFX_UPLOAD_CULL ) {
+      fxMesa->Glide.grCullMode( fxMesa->CullMode );
+      fxMesa->dirty &= ~TDFX_UPLOAD_CULL;
+   }
+
+   tdfxUpdateStipple(ctx);
+   if ( fxMesa->dirty & TDFX_UPLOAD_STIPPLE ) {
+      fxMesa->Glide.grStipplePattern ( fxMesa->Stipple.Pattern );
+      fxMesa->Glide.grStippleMode ( fxMesa->Stipple.Mode );
+      fxMesa->dirty &= ~TDFX_UPLOAD_STIPPLE;
+   }
+}
+
+
+
+/* Determine the rasterized primitive when not drawing unfilled 
+ * polygons.
+ *
+ * Used only for the default render stage which always decomposes
+ * primitives to trianges/lines/points.  For the accelerated stage,
+ * which renders strips as strips, the equivalent calculations are
+ * performed in tdfx_render.c.
+ */
+static void tdfxRenderPrimitive( GLcontext *ctx, GLenum prim )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GLuint rprim = reduced_prim[prim];
+
+   fxMesa->render_primitive = prim;
+
+   if (rprim == GL_TRIANGLES && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+      return;
+       
+   if (fxMesa->raster_primitive != rprim) {
+      tdfxRasterPrimitive( ctx, rprim );
+   }
+}
+
+static void tdfxRenderFinish( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+
+   if (fxMesa->RenderIndex & TDFX_FALLBACK_BIT)
+      _swrast_flush( ctx );
+
+   UNLOCK_HARDWARE(fxMesa);
+}
+
+
+/**********************************************************************/
+/*               Manage total rasterization fallbacks                 */
+/**********************************************************************/
+
+static char *fallbackStrings[] = {
+   "3D/Rect/Cube Texture map",
+   "glDrawBuffer(GL_FRONT_AND_BACK)",
+   "Separate specular color",
+   "glEnable/Disable(GL_STENCIL_TEST)",
+   "glRenderMode(selection or feedback)",
+   "glLogicOp()",
+   "Texture env mode",
+   "Texture border",
+   "glColorMask",
+   "blend mode",
+   "line stipple",
+   "Rasterization disable"
+};
+
+
+static char *getFallbackString(GLuint bit)
+{
+   int i = 0;
+   while (bit > 1) {
+      i++;
+      bit >>= 1;
+   }
+   return fallbackStrings[i];
+}
+
+
+void tdfxFallback( GLcontext *ctx, GLuint bit, GLboolean mode )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GLuint oldfallback = fxMesa->Fallback;
+
+   if (mode) {
+      fxMesa->Fallback |= bit;
+      if (oldfallback == 0) {
+         /*printf("Go to software rendering, bit = 0x%x\n", bit);*/
+	 FLUSH_BATCH(fxMesa);
+	 _swsetup_Wakeup( ctx );
+	 fxMesa->RenderIndex = ~0;
+         if (TDFX_DEBUG & DEBUG_VERBOSE_FALL) {
+            fprintf(stderr, "Tdfx begin software fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+   else {
+      fxMesa->Fallback &= ~bit;
+      if (oldfallback == bit) {
+         /*printf("Go to hardware rendering, bit = 0x%x\n", bit);*/
+	 _swrast_flush( ctx );
+	 tnl->Driver.Render.Start = tdfxRenderStart;
+	 tnl->Driver.Render.PrimitiveNotify = tdfxRenderPrimitive;
+	 tnl->Driver.Render.Finish = tdfxRenderFinish;
+	 tnl->Driver.Render.BuildVertices = tdfxBuildVertices;
+	 fxMesa->new_gl_state |= (_TDFX_NEW_RENDERSTATE|
+				  _TDFX_NEW_RASTERSETUP);
+         if (TDFX_DEBUG & DEBUG_VERBOSE_FALL) {
+            fprintf(stderr, "Tdfx end software fallback: 0x%x %s\n",
+                    bit, getFallbackString(bit));
+         }
+      }
+   }
+}
+
+
+void tdfxDDInitTriFuncs( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   static int firsttime = 1;
+
+   if (firsttime) {
+      init_rast_tab();
+      firsttime = 0;
+   }
+
+   fxMesa->RenderIndex = ~0;
+	
+   tnl->Driver.RunPipeline              = tdfxRunPipeline;
+   tnl->Driver.Render.Start             = tdfxRenderStart;
+   tnl->Driver.Render.Finish            = tdfxRenderFinish; 
+   tnl->Driver.Render.PrimitiveNotify   = tdfxRenderPrimitive;
+   tnl->Driver.Render.ResetLineStipple  = _swrast_ResetLineStipple;
+   tnl->Driver.Render.BuildVertices     = tdfxBuildVertices;
+   tnl->Driver.Render.Multipass		= NULL;
+
+   (void) tdfx_print_vertex;
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_tris.h b/src/mesa/drivers/dri/tdfx/tdfx_tris.h
new file mode 100644
index 0000000000..ec48a48692
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_tris.h
@@ -0,0 +1,41 @@
+/* -*- mode: c; c-basic-offset: 3 -*-
+ *
+ * Copyright 2000 VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+ * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *	Keith Whitwell <keith@tungstengraphics.com>
+ *
+ */
+
+#ifndef TDFX_TRIS_INC
+#define TDFX_TRIS_INC
+
+#include "main/mtypes.h"
+
+extern void tdfxDDInitTriFuncs( GLcontext *ctx );
+
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_vb.c b/src/mesa/drivers/dri/tdfx/tdfx_vb.c
new file mode 100644
index 0000000000..546d89aa84
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_vb.c
@@ -0,0 +1,347 @@
+/*
+ * GLX Hardware Device Driver for Intel i810
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+ 
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+
+#include "tdfx_context.h"
+#include "tdfx_vb.h"
+#include "tdfx_render.h"
+
+static void copy_pv( GLcontext *ctx, GLuint edst, GLuint esrc )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   tdfxVertex *dst = fxMesa->verts + edst;
+   tdfxVertex *src = fxMesa->verts + esrc;
+   *(GLuint *)&dst->color = *(GLuint *)&src->color;
+}
+
+static struct {
+   void                (*emit)( GLcontext *, GLuint, GLuint, void * );
+   tnl_interp_func		interp;
+   tnl_copy_pv_func	        copy_pv;
+   GLboolean           (*check_tex_sizes)( GLcontext *ctx );
+   GLuint               vertex_format;
+} setup_tab[TDFX_MAX_SETUP];
+
+
+
+
+#define GET_COLOR(ptr, idx) ((ptr)->data[idx])
+
+
+static void interp_extras( GLcontext *ctx,
+			   GLfloat t,
+			   GLuint dst, GLuint out, GLuint in,
+			   GLboolean force_boundary )
+{
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   /*fprintf(stderr, "%s\n", __FUNCTION__);*/
+
+   if (VB->BackfaceColorPtr) {
+      INTERP_4F( t,
+		 GET_COLOR(VB->BackfaceColorPtr, dst),
+		 GET_COLOR(VB->BackfaceColorPtr, out),
+		 GET_COLOR(VB->BackfaceColorPtr, in) );
+   }
+
+   if (VB->EdgeFlag) {
+      VB->EdgeFlag[dst] = VB->EdgeFlag[out] || force_boundary;
+   }
+
+   setup_tab[TDFX_CONTEXT(ctx)->SetupIndex].interp(ctx, t, dst, out, in,
+						   force_boundary);
+}
+
+static void copy_pv_extras( GLcontext *ctx, GLuint dst, GLuint src )
+{
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   if (VB->BackfaceColorPtr) {
+      COPY_4FV( GET_COLOR(VB->BackfaceColorPtr, dst),
+		GET_COLOR(VB->BackfaceColorPtr, src) );
+   }
+
+   setup_tab[TDFX_CONTEXT(ctx)->SetupIndex].copy_pv(ctx, dst, src);
+}
+
+
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT)
+#define TAG(x) x##_wg
+#include "tdfx_vbtmp.h"
+
+/* Special for tdfx: fog requires w
+ */
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT)
+#define TAG(x) x##_wg_fog
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT)
+#define TAG(x) x##_wgt0
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT|TDFX_TEX1_BIT)
+#define TAG(x) x##_wgt0t1
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT|TDFX_PTEX_BIT)
+#define TAG(x) x##_wgpt0
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT|TDFX_TEX1_BIT|\
+             TDFX_PTEX_BIT)
+#define TAG(x) x##_wgpt0t1
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_RGBA_BIT)
+#define TAG(x) x##_g
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_TEX0_BIT)
+#define TAG(x) x##_t0
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_TEX0_BIT|TDFX_TEX1_BIT)
+#define TAG(x) x##_t0t1
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_RGBA_BIT|TDFX_TEX0_BIT)
+#define TAG(x) x##_gt0
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_RGBA_BIT|TDFX_TEX0_BIT|TDFX_TEX1_BIT)
+#define TAG(x) x##_gt0t1
+#include "tdfx_vbtmp.h"
+
+
+/* fogc { */
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_FOGC_BIT)
+#define TAG(x) x##_wgf
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT|TDFX_FOGC_BIT)
+#define TAG(x) x##_wgt0f
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT|TDFX_TEX1_BIT|TDFX_FOGC_BIT)
+#define TAG(x) x##_wgt0t1f
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT|TDFX_PTEX_BIT|TDFX_FOGC_BIT)
+#define TAG(x) x##_wgpt0f
+#include "tdfx_vbtmp.h"
+
+#define IND (TDFX_XYZ_BIT|TDFX_RGBA_BIT|TDFX_W_BIT|TDFX_TEX0_BIT|TDFX_TEX1_BIT|\
+             TDFX_PTEX_BIT|TDFX_FOGC_BIT)
+#define TAG(x) x##_wgpt0t1f
+#include "tdfx_vbtmp.h"
+/* fogc } */
+
+
+static void init_setup_tab( void )
+{
+   init_wg();
+   init_wg_fog();
+   init_wgt0();
+   init_wgt0t1();
+   init_wgpt0();
+   init_wgpt0t1();
+
+   init_g();
+   init_t0();
+   init_t0t1();
+   init_gt0();
+   init_gt0t1();
+
+   /* fogcoord */
+   init_wgf();
+   init_wgt0f();
+   init_wgt0t1f();
+   init_wgpt0f();
+   init_wgpt0t1f();
+}
+
+
+void tdfxPrintSetupFlags(char *msg, GLuint flags )
+{
+   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
+	   msg,
+	   (int)flags,
+	   (flags & TDFX_XYZ_BIT)     ? " xyz," : "", 
+	   (flags & TDFX_W_BIT)     ? " w," : "", 
+	   (flags & TDFX_RGBA_BIT)     ? " rgba," : "",
+	   (flags & TDFX_TEX0_BIT)     ? " tex-0," : "",
+	   (flags & TDFX_TEX1_BIT)     ? " tex-1," : "",
+	   (flags & TDFX_FOGC_BIT)     ? " fogc," : "");
+}
+
+
+
+void tdfxCheckTexSizes( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+
+   if (!setup_tab[fxMesa->SetupIndex].check_tex_sizes(ctx)) {
+      GLuint ind = fxMesa->SetupIndex |= (TDFX_PTEX_BIT|TDFX_RGBA_BIT);
+
+      /* Tdfx handles projective textures nicely; just have to change
+       * up to the new vertex format.
+       */
+      if (setup_tab[ind].vertex_format != fxMesa->vertexFormat) {
+	 FLUSH_BATCH(fxMesa);
+	 fxMesa->dirty |= TDFX_UPLOAD_VERTEX_LAYOUT;      
+	 fxMesa->vertexFormat = setup_tab[ind].vertex_format;
+
+	 /* This is required as we have just changed the vertex
+	  * format, so the interp and copy routines must also change.
+	  * In the unfilled and twosided cases we are using the
+	  * swrast_setup ones anyway, so leave them in place.
+	  */
+	 if (!(ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED))) {
+	    tnl->Driver.Render.Interp = setup_tab[fxMesa->SetupIndex].interp;
+	    tnl->Driver.Render.CopyPV = setup_tab[fxMesa->SetupIndex].copy_pv;
+	 }
+      }
+   }
+}
+
+
+void tdfxBuildVertices( GLcontext *ctx, GLuint start, GLuint end,
+			GLuint newinputs )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   tdfxVertex *v = fxMesa->verts + start;
+
+   newinputs |= fxMesa->SetupNewInputs;
+   fxMesa->SetupNewInputs = 0;
+
+   if (!newinputs)
+      return;
+
+   if (newinputs & VERT_BIT_POS) {
+      setup_tab[fxMesa->SetupIndex].emit( ctx, start, end, v );
+   } else {
+      GLuint ind = 0;
+
+      if (newinputs & VERT_BIT_COLOR0)
+	 ind |= TDFX_RGBA_BIT;
+
+      if (newinputs & VERT_BIT_FOG)
+	 ind |= TDFX_FOGC_BIT;
+      
+      if (newinputs & VERT_BIT_TEX0)
+	 ind |= TDFX_TEX0_BIT;
+
+      if (newinputs & VERT_BIT_TEX1)
+	 ind |= TDFX_TEX0_BIT|TDFX_TEX1_BIT;
+
+      if (fxMesa->SetupIndex & TDFX_PTEX_BIT)
+	 ind = ~0;
+
+      ind &= fxMesa->SetupIndex;
+
+      if (ind) {
+	 setup_tab[ind].emit( ctx, start, end, v );
+      }
+   }
+}
+
+
+void tdfxChooseVertexState( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   GLuint ind = TDFX_XYZ_BIT|TDFX_RGBA_BIT;
+
+   fxMesa->tmu_source[0] = 0;
+   fxMesa->tmu_source[1] = 1;
+
+   if (ctx->Texture._EnabledUnits & 0x2) {
+      if (ctx->Texture._EnabledUnits & 0x1) {
+         ind |= TDFX_TEX1_BIT;
+      }
+      ind |= TDFX_W_BIT|TDFX_TEX0_BIT;
+      fxMesa->tmu_source[0] = 1;
+      fxMesa->tmu_source[1] = 0;
+   } else if (ctx->Texture._EnabledUnits & 0x1) {
+      /* unit 0 enabled */
+      ind |= TDFX_W_BIT|TDFX_TEX0_BIT;
+   } else if (fxMesa->Fog.Mode != GR_FOG_DISABLE) {
+      ind |= TDFX_W_BIT;
+   }
+
+   if (fxMesa->Fog.Mode == GR_FOG_WITH_TABLE_ON_FOGCOORD_EXT) {
+      ind |= TDFX_FOGC_BIT;
+   }
+
+   fxMesa->SetupIndex = ind;
+
+   if (ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE|DD_TRI_UNFILLED)) {
+      tnl->Driver.Render.Interp = interp_extras;
+      tnl->Driver.Render.CopyPV = copy_pv_extras;
+   } else {
+      tnl->Driver.Render.Interp = setup_tab[ind].interp;
+      tnl->Driver.Render.CopyPV = setup_tab[ind].copy_pv;
+   }
+
+   if (setup_tab[ind].vertex_format != fxMesa->vertexFormat) {
+      FLUSH_BATCH(fxMesa);
+      fxMesa->dirty |= TDFX_UPLOAD_VERTEX_LAYOUT;      
+      fxMesa->vertexFormat = setup_tab[ind].vertex_format;
+   }
+}
+
+
+
+void tdfxInitVB( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   GLuint size = TNL_CONTEXT(ctx)->vb.Size;
+   static int firsttime = 1;
+   if (firsttime) {
+      init_setup_tab();
+      firsttime = 0;
+   }
+
+   fxMesa->verts = _mesa_align_malloc(size * sizeof(tdfxVertex), 32);
+   fxMesa->vertexFormat = TDFX_LAYOUT_TINY;
+   fxMesa->SetupIndex = TDFX_XYZ_BIT|TDFX_RGBA_BIT;
+}
+
+
+void tdfxFreeVB( GLcontext *ctx )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   if (fxMesa->verts) {
+      _mesa_align_free(fxMesa->verts);
+      fxMesa->verts = 0;
+   }
+}
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_vb.h b/src/mesa/drivers/dri/tdfx/tdfx_vb.h
new file mode 100644
index 0000000000..1e190e85f6
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_vb.h
@@ -0,0 +1,68 @@
+/*
+ * GLX Hardware Device Driver for Intel tdfx
+ * Copyright (C) 1999 Keith Whitwell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, 
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
+ * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+
+#ifndef TDFXVB_INC
+#define TDFXVB_INC
+
+#include "main/mtypes.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "math/m_xform.h"
+
+#define TDFX_XYZ_BIT        0x1
+#define TDFX_W_BIT          0x2
+#define TDFX_RGBA_BIT       0x4
+#define TDFX_TEX1_BIT       0x8
+#define TDFX_TEX0_BIT       0x10	
+#define TDFX_PTEX_BIT       0x20
+#define TDFX_FOGC_BIT       0x40
+#define TDFX_MAX_SETUP      0x80
+
+#define _TDFX_NEW_RASTERSETUP (_NEW_TEXTURE |			\
+			       _DD_NEW_SEPARATE_SPECULAR |	\
+			       _DD_NEW_TRI_UNFILLED |		\
+			       _DD_NEW_TRI_LIGHT_TWOSIDE |	\
+			       _NEW_FOG)
+
+
+extern void tdfxValidateBuildProjVerts(GLcontext *ctx,
+				       GLuint start, GLuint count,
+				       GLuint newinputs );
+
+extern void tdfxPrintSetupFlags(char *msg, GLuint flags );
+
+extern void tdfxInitVB( GLcontext *ctx );
+
+extern void tdfxFreeVB( GLcontext *ctx );
+
+extern void tdfxCheckTexSizes( GLcontext *ctx );
+
+extern void tdfxChooseVertexState( GLcontext *ctx );
+
+extern void tdfxBuildVertices( GLcontext *ctx, GLuint start, GLuint end,
+                               GLuint newinputs );
+
+#endif
diff --git a/src/mesa/drivers/dri/tdfx/tdfx_vbtmp.h b/src/mesa/drivers/dri/tdfx/tdfx_vbtmp.h
new file mode 100644
index 0000000000..19baf7d0d2
--- /dev/null
+++ b/src/mesa/drivers/dri/tdfx/tdfx_vbtmp.h
@@ -0,0 +1,266 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.1
+ *
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ *    Daniel Borca <dborca@users.sourceforge.net>
+ */
+
+
+#define VIEWPORT_X(dst,x) dst = s[0]  * x + s[12]
+#define VIEWPORT_Y(dst,y) dst = s[5]  * y + s[13]
+#define VIEWPORT_Z(dst,z) dst = s[10] * z + s[14]
+
+
+static void TAG(emit)( GLcontext *ctx,
+		       GLuint start, GLuint end,
+		       void *dest )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   GLuint tmu0_source = fxMesa->tmu_source[0];
+   GLuint tmu1_source = fxMesa->tmu_source[1];
+   GLfloat (*tc0)[4], (*tc1)[4];
+   GLfloat (*col)[4];
+   GLuint tc0_stride, tc1_stride, col_stride;
+   GLuint tc0_size, tc1_size, col_size;
+   GLfloat (*proj)[4] = VB->NdcPtr->data; 
+   GLuint proj_stride = VB->NdcPtr->stride;
+   GLfloat (*fog)[4];
+   GLuint fog_stride;
+   tdfxVertex *v = (tdfxVertex *)dest;
+   GLfloat u0scale,v0scale,u1scale,v1scale;
+   const GLubyte *mask = VB->ClipMask;
+   const GLfloat *s = fxMesa->hw_viewport;
+   int i;
+
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (IND & TDFX_TEX0_BIT) {
+      tc0_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + tmu0_source]->stride;
+      tc0 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + tmu0_source]->data;
+      u0scale = fxMesa->sScale0;
+      v0scale = fxMesa->tScale0;
+      if (IND & TDFX_PTEX_BIT)
+	 tc0_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + tmu0_source]->size;
+   }
+
+   if (IND & TDFX_TEX1_BIT) {
+      tc1 = VB->AttribPtr[_TNL_ATTRIB_TEX0 + tmu1_source]->data;
+      tc1_stride = VB->AttribPtr[_TNL_ATTRIB_TEX0 + tmu1_source]->stride;
+      u1scale = fxMesa->sScale1;
+      v1scale = fxMesa->tScale1;
+      if (IND & TDFX_PTEX_BIT)
+	 tc1_size = VB->AttribPtr[_TNL_ATTRIB_TEX0 + tmu1_source]->size;
+   }
+   
+   if (IND & TDFX_RGBA_BIT) {
+      col = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;
+      col_stride = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->stride;
+      col_size = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+   }
+   
+   if (IND & TDFX_FOGC_BIT) {
+      fog = VB->AttribPtr[_TNL_ATTRIB_FOG]->data;
+      fog_stride = VB->AttribPtr[_TNL_ATTRIB_FOG]->stride;
+   }
+
+   {
+      /* May have nonstandard strides:
+       */
+      if (start) {
+	 proj =  (GLfloat (*)[4])((GLubyte *)proj + start * proj_stride);
+	 if (IND & TDFX_TEX0_BIT)
+	    tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 + start * tc0_stride);
+	 if (IND & TDFX_TEX1_BIT) 
+	    tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 + start * tc1_stride);
+	 if (IND & TDFX_RGBA_BIT) 
+	    STRIDE_4F(col, start * col_stride);
+	 if (IND & TDFX_FOGC_BIT) 
+	    STRIDE_4F(fog, start * fog_stride);
+      }
+
+      for (i=start; i < end; i++, v++) {
+	 if (IND & TDFX_XYZ_BIT) {
+	    if (mask[i] == 0) {
+               /* unclipped */
+	       VIEWPORT_X(v->x, proj[0][0]);
+	       VIEWPORT_Y(v->y, proj[0][1]);
+	       VIEWPORT_Z(v->z, proj[0][2]);
+	       v->rhw = proj[0][3];	
+	    } else {
+               /* clipped */
+               v->rhw = 1.0;
+	    }
+	    proj =  (GLfloat (*)[4])((GLubyte *)proj +  proj_stride);
+	 }
+	 if (IND & TDFX_RGBA_BIT) {
+	    UNCLAMPED_FLOAT_TO_UBYTE(v->color[0], col[0][2]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v->color[1], col[0][1]);
+	    UNCLAMPED_FLOAT_TO_UBYTE(v->color[2], col[0][0]);
+	    if (col_size == 4) {
+	       UNCLAMPED_FLOAT_TO_UBYTE(v->color[3], col[0][3]);
+	    } else {
+	       v->color[3] = 255;
+	    }
+	    STRIDE_4F(col, col_stride);
+	 }
+	 if (IND & TDFX_FOGC_BIT) {
+	    v->fog = CLAMP(fog[0][0], 0.0f, 1.0f);
+	    STRIDE_4F(fog, fog_stride);
+	 }
+	 if (IND & TDFX_TEX0_BIT) {
+	    GLfloat w = v->rhw;
+	    v->tu0 = tc0[0][0] * u0scale * w;
+	    v->tv0 = tc0[0][1] * v0scale * w;
+	    if (IND & TDFX_PTEX_BIT) {
+	       v->tq0 = w;
+	       if (tc0_size == 4) 
+		  v->tq0 = tc0[0][3] * w;
+	    } 
+	    tc0 =  (GLfloat (*)[4])((GLubyte *)tc0 +  tc0_stride);
+	 }
+	 if (IND & TDFX_TEX1_BIT) {
+	    GLfloat w = v->rhw;
+	    v->tu1 = tc1[0][0] * u1scale * w;
+	    v->tv1 = tc1[0][1] * v1scale * w;
+	    if (IND & TDFX_PTEX_BIT) {
+	       v->tq1 = w;
+	       if (tc1_size == 4) 
+		  v->tq1 = tc1[0][3] * w;
+	    }
+	    tc1 =  (GLfloat (*)[4])((GLubyte *)tc1 +  tc1_stride);
+	 } 
+      }
+   }
+}
+
+
+static GLboolean TAG(check_tex_sizes)( GLcontext *ctx )
+{
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   if (IND & TDFX_PTEX_BIT)
+      return GL_TRUE;
+   
+   if (IND & TDFX_TEX0_BIT) {
+      struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+      if (IND & TDFX_TEX1_BIT) {
+	 if (VB->AttribPtr[_TNL_ATTRIB_TEX0] == 0)
+	    VB->AttribPtr[_TNL_ATTRIB_TEX0] = VB->AttribPtr[_TNL_ATTRIB_TEX1];
+	 
+	 if (VB->AttribPtr[_TNL_ATTRIB_TEX1]->size == 4)
+	    return GL_FALSE;
+      }
+
+      if (VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4)
+	 return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+static void TAG(interp)( GLcontext *ctx,
+			 GLfloat t, 
+			 GLuint edst, GLuint eout, GLuint ein,
+			 GLboolean force_boundary )
+{
+   tdfxContextPtr fxMesa = TDFX_CONTEXT( ctx );
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+   const GLfloat *dstclip = VB->ClipPtr->data[edst];
+   const GLfloat oow = (dstclip[3] == 0.0F) ? 1.0F : (1.0F / dstclip[3]);
+   const GLfloat *s = fxMesa->hw_viewport;
+   tdfxVertex *dst = fxMesa->verts + edst;
+   const tdfxVertex *out = fxMesa->verts + eout;
+   const tdfxVertex *in = fxMesa->verts + ein;
+   const GLfloat wout = oow / out->rhw;
+   const GLfloat win = oow / in->rhw;
+
+   VIEWPORT_X(dst->x, dstclip[0] * oow);
+   VIEWPORT_Y(dst->y, dstclip[1] * oow);
+   VIEWPORT_Z(dst->z, dstclip[2] * oow);
+   dst->rhw = oow;
+
+   INTERP_UB( t, dst->color[0], out->color[0],   in->color[0] );
+   INTERP_UB( t, dst->color[1], out->color[1],   in->color[1] );
+   INTERP_UB( t, dst->color[2], out->color[2],   in->color[2] );
+   INTERP_UB( t, dst->color[3], out->color[3],   in->color[3] );
+
+   if (IND & TDFX_FOGC_BIT) {
+      INTERP_F( t, dst->fog, out->fog, in->fog );
+   }
+
+   if (IND & TDFX_TEX0_BIT) {
+      INTERP_F( t, dst->tu0, out->tu0 * wout, in->tu0 * win );
+      INTERP_F( t, dst->tv0, out->tv0 * wout, in->tv0 * win );
+      if (IND & TDFX_PTEX_BIT) {
+         INTERP_F( t, dst->tq0, out->tq0 * wout, in->tq0 * win );
+      }
+   }
+   if (IND & TDFX_TEX1_BIT) {
+     INTERP_F( t, dst->tu1, out->tu1 * wout, in->tu1 * win );
+     INTERP_F( t, dst->tv1, out->tv1 * wout, in->tv1 * win );
+     if (IND & TDFX_PTEX_BIT) {
+        INTERP_F( t, dst->tq1, out->tq1 * wout, in->tq1 * win );
+     }
+   }
+}
+
+
+static void TAG(init)( void )
+{
+/*     fprintf(stderr, "%s\n", __FUNCTION__); */
+
+   setup_tab[IND].emit = TAG(emit);
+   setup_tab[IND].check_tex_sizes = TAG(check_tex_sizes);
+   setup_tab[IND].interp = TAG(interp);
+   setup_tab[IND].copy_pv = copy_pv;
+
+   if (IND & TDFX_TEX1_BIT) {
+      if (IND & TDFX_PTEX_BIT) {
+	 setup_tab[IND].vertex_format = TDFX_LAYOUT_PROJ2;
+      }
+      else {
+	 setup_tab[IND].vertex_format = TDFX_LAYOUT_MULTI;
+      }
+   } 
+   else if (IND & TDFX_TEX0_BIT) {
+      if (IND & TDFX_PTEX_BIT) {
+	 setup_tab[IND].vertex_format = TDFX_LAYOUT_PROJ1;
+      } else {
+	 setup_tab[IND].vertex_format = TDFX_LAYOUT_SINGLE;
+      }
+   }
+   else if (IND & TDFX_W_BIT) {
+      setup_tab[IND].vertex_format = TDFX_LAYOUT_NOTEX;
+   } else {
+      setup_tab[IND].vertex_format = TDFX_LAYOUT_TINY;
+   }
+}
+
+
+#undef IND
+#undef TAG
diff --git a/src/mesa/drivers/dri/unichrome/Makefile b/src/mesa/drivers/dri/unichrome/Makefile
new file mode 100644
index 0000000000..14cf9f3038
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/Makefile
@@ -0,0 +1,29 @@
+# src/mesa/drivers/dri/unichrome/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = unichrome_dri.so
+
+DRIVER_SOURCES = \
+	via_context.c \
+	via_fb.c \
+	via_tex.c \
+	via_ioctl.c \
+	via_memcpy.c \
+	via_render.c \
+	via_screen.c \
+	via_span.c \
+	via_state.c \
+	via_texcombine.c \
+	via_tris.c 
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/unichrome/server/via.h b/src/mesa/drivers/dri/unichrome/server/via.h
new file mode 100644
index 0000000000..2cfe6631ef
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/server/via.h
@@ -0,0 +1,11 @@
+#ifndef __VIA_H__
+#define __VIA_H__
+
+typedef struct VIAInfo
+{
+    size_t registerSize;
+    void * registerHandle;
+    void * data;
+} * VIAInfoPtr;
+
+#endif /* __VIA_H__ */
diff --git a/src/mesa/drivers/dri/unichrome/server/via_dri.h b/src/mesa/drivers/dri/unichrome/server/via_dri.h
new file mode 100644
index 0000000000..b47397d572
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/server/via_dri.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/*
+ * Keep this file in perfect sync between the ddx and dri drivers.
+ * At least bump the VIA_DRIDDX_VERSION defines appropriately.
+ *
+ */
+#ifndef _VIA_DRI_H_
+#define _VIA_DRI_H_ 1
+
+#define VIA_MAX_DRAWABLES 256
+
+#define VIA_DRIDDX_VERSION_MAJOR  5
+#define VIA_DRIDDX_VERSION_MINOR  0
+#define VIA_DRIDDX_VERSION_PATCH  0
+
+#ifndef XFree86Server
+typedef int Bool;
+#endif
+
+typedef struct {
+    drm_handle_t handle;
+    drmSize size;
+} viaRegion, *viaRegionPtr;
+
+typedef struct {
+    viaRegion regs, agp;
+    int deviceID;
+    int width;
+    int height;
+    int mem;
+    int bytesPerPixel;
+    int priv1;
+    int priv2;
+    int fbOffset;
+    int fbSize;
+    Bool drixinerama;
+    int backOffset;
+    int depthOffset;
+    int textureOffset;
+    int textureSize;
+    int irqEnabled;
+    unsigned int scrnX, scrnY;
+    int sarea_priv_offset;
+    int ringBufActive;
+    unsigned int reg_pause_addr;
+} VIADRIRec, *VIADRIPtr;
+
+typedef struct {
+    int dummy;
+} VIAConfigPrivRec, *VIAConfigPrivPtr;
+
+typedef struct {
+    int dummy;
+} VIADRIContextRec, *VIADRIContextPtr;
+
+#endif /* _VIA_DRI_H_ */
diff --git a/src/mesa/drivers/dri/unichrome/server/via_driver.h b/src/mesa/drivers/dri/unichrome/server/via_driver.h
new file mode 100644
index 0000000000..cd3b038bfd
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/server/via_driver.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VIA_DRIVER_H
+#define _VIA_DRIVER_H
+
+#if 0 /* DEBUG is use in VIA DRI code as a flag */
+/* #define DEBUG_PRINT */
+#ifdef DEBUG_PRINT
+#define DEBUG(x) x
+#else
+#define DEBUG(x)
+#endif
+#endif
+
+#if 0
+#include "vgaHW.h"
+#include "xf86.h"
+#include "xf86Resources.h"
+#include "xf86_ansic.h"
+#include "xf86Pci.h"
+#include "xf86PciInfo.h"
+#include "xf86_OSproc.h"
+#include "compiler.h"
+#include "xf86Cursor.h"
+#include "mipointer.h"
+#include "micmap.h"
+
+#define USE_FB
+#ifdef USE_FB
+#include "fb.h"
+#else
+#include "cfb.h"
+#include "cfb16.h"
+#include "cfb32.h"
+#endif
+
+#include "xf86cmap.h"
+#include "vbe.h"
+#include "xaa.h"
+
+#include "via_regs.h"
+#include "via_bios.h"
+#include "via_gpioi2c.h"
+#include "via_priv.h"
+#include "ginfo.h"
+
+#ifdef XF86DRI
+#define _XF86DRI_SERVER_
+#include "sarea.h"
+#include "dri.h"
+#include "GL/glxint.h"
+#include "via_dri.h"
+#endif
+#else
+#include "via_regs.h"
+
+#include "GL/internal/dri_interface.h"
+#include "via_dri.h"
+#endif
+
+/* _SOLO : copied from via_bios.h */
+/* System Memory CLK */
+#define		VIA_MEM_SDR66					0x00
+#define		VIA_MEM_SDR100					0x01
+#define		VIA_MEM_SDR133					0x02
+#define		VIA_MEM_DDR200					0x03
+#define		VIA_MEM_DDR266					0x04
+#define		VIA_MEM_DDR333					0x05
+#define		VIA_MEM_DDR400					0x06
+
+#define DRIVER_NAME     "via"
+#define DRIVER_VERSION  "4.1.0"
+#define VERSION_MAJOR   4
+#define VERSION_MINOR   1
+#define PATCHLEVEL      41
+#define VIA_VERSION     ((VERSION_MAJOR<<24) | (VERSION_MINOR<<16) | PATCHLEVEL)
+
+#define VGAIN8(addr)        MMIO_IN8(pVia->MapBase+0x8000, addr)
+#define VGAIN16(addr)       MMIO_IN16(pVia->MapBase+0x8000, addr)
+#define VGAIN(addr)         MMIO_IN32(pVia->MapBase+0x8000, addr)
+
+#define VGAOUT8(addr, val)  MMIO_OUT8(pVia->MapBase+0x8000, addr, val)
+#define VGAOUT16(addr, val) MMIO_OUT16(pVia->MapBase+0x8000, addr, val)
+#define VGAOUT(addr, val)   MMIO_OUT32(pVia->MapBase+0x8000, addr, val)
+
+#define INREG(addr)         MMIO_IN32(pVia->MapBase, addr)
+#define OUTREG(addr, val)   MMIO_OUT32(pVia->MapBase, addr, val)
+#define INREG16(addr)       MMIO_IN16(pVia->MapBase, addr)
+#define OUTREG16(addr, val) MMIO_OUT16(pVia->MapBase, addr, val)
+
+#define VIA_PIXMAP_CACHE_SIZE   (256 * 1024)
+#define VIA_CURSOR_SIZE         (4 * 1024)
+#define VIA_VQ_SIZE             (256 * 1024)
+
+typedef struct {
+    unsigned int    mode, refresh, resMode;
+    int             countWidthByQWord;
+    int             offsetWidthByQWord;
+    unsigned char   SR08, SR0A, SR0F;
+
+    /*   extended Sequencer registers */
+    unsigned char   SR10, SR11, SR12, SR13,SR14,SR15,SR16;
+    unsigned char   SR17, SR18, SR19, SR1A,SR1B,SR1C,SR1D,SR1E;
+    unsigned char   SR1F, SR20, SR21, SR22,SR23,SR24,SR25,SR26;
+    unsigned char   SR27, SR28, SR29, SR2A,SR2B,SR2C,SR2D,SR2E;
+    unsigned char   SR2F, SR30, SR31, SR32,SR33,SR34,SR40,SR41;
+    unsigned char   SR42, SR43, SR44, SR45,SR46,SR47;
+
+    unsigned char   Clock;
+
+    /*   extended CRTC registers */
+    unsigned char   CR13, CR30, CR31, CR32, CR33, CR34, CR35, CR36;
+    unsigned char   CR37, CR38, CR39, CR3A, CR40, CR41, CR42, CR43;
+    unsigned char   CR44, CR45, CR46, CR47, CR48, CR49, CR4A;
+    unsigned char   CRTCRegs[83];
+    unsigned char   TVRegs[0xCF];
+    unsigned char   TVRegs2[0xCF];
+/*    unsigned char   LCDRegs[0x40];*/
+
+} VIARegRec, *VIARegPtr;
+
+
+typedef struct _VIA {
+    VIARegRec           SavedReg;
+    VIARegRec           ModeReg;
+#if 0
+    xf86CursorInfoPtr   CursorInfoRec;
+    int                 stateMode;
+    VIAModeInfoPtr      VIAModeList;
+#endif
+    int                 ModeStructInit;
+    int                 Bpp, Bpl, ScissB;
+    unsigned            PlaneMask;
+
+    unsigned long       videoRambytes;
+    int                 videoRamKbytes;
+    int                 FBFreeStart;
+    int                 FBFreeEnd;
+    int                 CursorStart;
+    int                 VQStart;
+    int                 VQEnd;
+
+    /* These are physical addresses. */
+    unsigned long       FrameBufferBase;
+    unsigned long       MmioBase;
+
+    /* These are linear addresses. */
+    unsigned char*      MapBase;
+    unsigned char*      VidMapBase;
+    unsigned char*      BltBase;
+    unsigned char*      MapBaseDense;
+    unsigned char*      FBBase;
+    unsigned char*      FBStart;
+
+    int                 PrimaryVidMapped;
+    int                 dacSpeedBpp;
+    int                 minClock, maxClock;
+    int                 MCLK, REFCLK, LCDclk;
+    double              refclk_fact;
+
+    /* Here are all the Options */
+    int                 VQEnable;
+    int                 pci_burst;
+    int                 NoPCIRetry;
+    int                 hwcursor;
+    int                 NoAccel;
+    int                 shadowFB;
+    int                 NoDDCValue;
+    int                 rotate;
+
+#if 0
+    CloseScreenProcPtr  CloseScreen;
+    pciVideoPtr         PciInfo;
+    PCITAG              PciTag;
+#endif
+    int                 Chipset;
+    int                 ChipId;
+    int                 ChipRev;
+    /*vbeInfoPtr          pVbe;*/
+    int                 EntityIndex;
+
+    /* Support for shadowFB and rotation */
+    unsigned char*      ShadowPtr;
+    int                 ShadowPitch;
+    void                (*PointerMoved)(int index, int x, int y);
+
+    /* Support for XAA acceleration */
+#if 0
+    XAAInfoRecPtr       AccelInfoRec;
+    xRectangle          Rect;
+#endif
+    uint32_t            SavedCmd;
+    uint32_t            SavedFgColor;
+    uint32_t            SavedBgColor;
+    uint32_t            SavedPattern0;
+    uint32_t            SavedPattern1;
+    uint32_t            SavedPatternAddr;
+
+#if 0
+    /* Support for Int10 processing */
+    xf86Int10InfoPtr    pInt10;
+
+    /* BIOS Info Ptr */
+    VIABIOSInfoPtr      pBIOSInfo;
+    VGABIOSVERPtr       pBIOSVer;
+#endif
+
+    /* Support for DGA */
+    int                 numDGAModes;
+    /*DGAModePtr          DGAModes;*/
+    int                 DGAactive;
+    int                 DGAViewportStatus;
+
+    /* The various wait handlers. */
+    int                 (*myWaitIdle)(struct _VIA*);
+
+#if 0
+    /* I2C & DDC */
+    I2CBusPtr           I2C_Port1;
+    I2CBusPtr           I2C_Port2;
+    xf86MonPtr          DDC1;
+    xf86MonPtr          DDC2;
+#endif
+
+    /* MHS */
+    int                 IsSecondary;
+    int                 HasSecondary;
+
+#if 0
+    /* Capture information */
+    VIACAPINFO     CapInfo[2];      /* 2 capture information */
+#endif
+
+/*
+    uint32_t            Cap0_Deinterlace;
+    uint32_t            Cap1_Deinterlace;
+
+    int                 Cap0_FieldSwap;
+    int                 NoCap0_HFilter;
+    int                 Capture_OverScanOff;
+    int                 NoMPEGHQV_VFilter;
+*/
+#ifdef XF86DRI
+    int 		directRenderingEnabled;
+    DRIInfoPtr		pDRIInfo;
+    int 		drmFD;
+    int 		numVisualConfigs;
+    __GLXvisualConfig* 	pVisualConfigs;
+    VIAConfigPrivPtr 	pVisualConfigsPriv;
+    unsigned long 	agpHandle;
+    unsigned long 	registerHandle;
+    uint32_t            agpAddr;
+    unsigned char 	*agpBase;
+    unsigned int 	agpSize;
+    int  		IsPCI;
+    int  		drixinerama;
+#else
+    int 		drmFD;
+    unsigned long 	agpHandle;
+    unsigned long 	registerHandle;
+    unsigned long 	agpAddr;
+    unsigned char 	*agpBase;
+    unsigned int 	agpSize;
+    int  		IsPCI;
+#endif
+
+    int     V4LEnabled;
+    uint16_t    ActiveDevice;	/* if SAMM, non-equal pBIOSInfo->ActiveDevice */
+    unsigned char       *CursorImage;
+    uint32_t    CursorFG;
+    uint32_t    CursorBG;
+    uint32_t    CursorMC;
+
+    unsigned char	MemClk;
+    int 		EnableExtendedFIFO;
+    VIADRIPtr		devPrivate;
+} VIARec, *VIAPtr;
+
+
+/* Shortcuts.  These depend on a local symbol "pVia". */
+
+#define WaitIdle()      pVia->myWaitIdle(pVia)
+#define VIAPTR(p)       ((VIAPtr)((p)->driverPrivate))
+
+#endif /* _VIA_DRIVER_H */
+
diff --git a/src/mesa/drivers/dri/unichrome/server/via_priv.h b/src/mesa/drivers/dri/unichrome/server/via_priv.h
new file mode 100644
index 0000000000..352eac0597
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/server/via_priv.h
@@ -0,0 +1,69 @@
+
+#ifndef _VIA_PRIV_H_
+#define _VIA_PRIV_H_ 1
+
+//#include "ddmpeg.h"
+#include "via_common.h"
+
+#define MEM_BLOCKS		4
+
+typedef struct {
+    unsigned long   base;		/* Offset into fb */
+    int    pool;			/* Pool we drew from */
+    int    drm_fd;			/* Fd in DRM mode */
+    drm_via_mem_t drm;			/* DRM management object */
+    int    slot;			/* Pool 3 slot */
+    void  *pVia;			/* VIA driver pointer */
+    //FBLinearPtr linear;			/* X linear pool info ptr */
+} VIAMem;
+
+typedef VIAMem *VIAMemPtr;
+
+
+
+#if 0
+typedef struct  {
+    unsigned long   gdwVideoFlagTV1;
+    unsigned long   gdwVideoFlagSW;
+    unsigned long   gdwVideoFlagMPEG;
+    unsigned long   gdwAlphaEnabled;		/* For Alpha blending use*/
+
+    VIAMem SWOVMem;
+    VIAMem HQVMem;
+    VIAMem SWfbMem;
+
+    DDPIXELFORMAT DPFsrc; 
+    DDUPDATEOVERLAY UpdateOverlayBackup;    /* For HQVcontrol func use
+					    // To save MPEG updateoverlay info.*/
+
+/* device struct */
+    SWDEVICE   SWDevice;
+    SUBDEVICE   SUBDevice;
+    MPGDEVICE   MPGDevice;
+    OVERLAYRECORD   overlayRecordV1;
+    OVERLAYRECORD   overlayRecordV3;
+
+    BoxRec  AvailFBArea;
+    FBLinearPtr   SWOVlinear;
+
+    int MPEG_ON;
+    int SWVideo_ON;
+
+/*To solve the bandwidth issue */
+    unsigned long   gdwUseExtendedFIFO;
+
+/* For panning mode use */
+    int panning_old_x;
+    int panning_old_y;
+    int panning_x;
+    int panning_y;
+
+/*To solve the bandwidth issue */
+    unsigned char Save_3C4_16;
+    unsigned char Save_3C4_17;
+    unsigned char Save_3C4_18;
+
+} swovRec, *swovPtr;
+#endif
+
+#endif /* _VIA_PRIV_H_ */
diff --git a/src/mesa/drivers/dri/unichrome/server/via_regs.h b/src/mesa/drivers/dri/unichrome/server/via_regs.h
new file mode 100644
index 0000000000..87e1e9daa9
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/server/via_regs.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*************************************************************************
+ *
+ *  File:       via_regs.c
+ *  Content:    The defines of Via registers
+ *
+ ************************************************************************/
+
+#ifndef _VIA_REGS_H
+#define _VIA_REGS_H
+
+#include "via_driver.h"
+
+/*#define VIA_SERIES(chip)  (chip == VIA_CLE266)*/
+
+#define PCI_VIA_VENDOR_ID       0x1106
+
+#define PCI_CHIP_CLE3122        0x3122
+#define PCI_CHIP_CLE3022        0x3022
+#define PCI_CHIP_VT3205         0x3205
+#define PCI_CHIP_VT7205         0x7205
+#define PCI_CHIP_VT3204         0x3108
+#define PCI_CHIP_VT3259         0x3118
+#define PCI_CHIP_VT3344         0x3344
+
+
+#define BIOS_BSIZE              1024
+#define BIOS_BASE               0xc0000
+
+
+#define VIA_MMIO_REGSIZE        0x9000
+#define VIA_MMIO_REGBASE        0x0
+#define VIA_MMIO_VGABASE        0x8000
+#define VIA_MMIO_BLTBASE        0x200000
+#define VIA_MMIO_BLTSIZE        0x10000
+
+
+/* defines for VIA 2D registers */
+#define VIA_REG_GECMD           0x000
+#define VIA_REG_GEMODE          0x004
+#define VIA_REG_GESTATUS        0x004       /* as same as VIA_REG_GEMODE */
+#define VIA_REG_SRCPOS          0x008
+#define VIA_REG_DSTPOS          0x00C
+#define VIA_REG_LINE_K1K2       0x008
+#define VIA_REG_LINE_XY         0x00C
+#define VIA_REG_DIMENSION       0x010       /* width and height */
+#define VIA_REG_PATADDR         0x014
+#define VIA_REG_FGCOLOR         0x018
+#define VIA_REG_DSTCOLORKEY     0x018       /* as same as VIA_REG_FG */
+#define VIA_REG_BGCOLOR         0x01C
+#define VIA_REG_SRCCOLORKEY     0x01C       /* as same as VIA_REG_BG */
+#define VIA_REG_CLIPTL          0x020       /* top and left of clipping */
+#define VIA_REG_CLIPBR          0x024       /* bottom and right of clipping */
+#define VIA_REG_OFFSET          0x028
+#define VIA_REG_LINE_ERROR      0x028
+#define VIA_REG_KEYCONTROL      0x02C       /* color key control */
+#define VIA_REG_SRCBASE         0x030
+#define VIA_REG_DSTBASE         0x034
+#define VIA_REG_PITCH           0x038       /* pitch of src and dst */
+#define VIA_REG_MONOPAT0        0x03C
+#define VIA_REG_MONOPAT1        0x040
+#define VIA_REG_COLORPAT        0x100       /* from 0x100 to 0x1ff */
+
+
+
+/* defines for VIA video registers */
+#define VIA_REG_INTERRUPT       0x200
+#define VIA_REG_CRTCSTART       0x214
+
+
+/* defines for VIA HW cursor registers */
+#define VIA_REG_CURSOR_MODE     0x2D0
+#define VIA_REG_CURSOR_POS      0x2D4
+#define VIA_REG_CURSOR_ORG      0x2D8
+#define VIA_REG_CURSOR_BG       0x2DC
+#define VIA_REG_CURSOR_FG       0x2E0
+
+
+/* defines for VIA 3D registers */
+#define VIA_REG_STATUS          0x400
+#define VIA_REG_TRANSET         0x43C
+#define VIA_REG_TRANSPACE       0x440
+
+/* VIA_REG_STATUS(0x400): Engine Status */
+#define VIA_CMD_RGTR_BUSY       0x00000080  /* Command Regulator is busy */
+#define VIA_2D_ENG_BUSY         0x00000001  /* 2D Engine is busy */
+#define VIA_3D_ENG_BUSY         0x00000002  /* 3D Engine is busy */
+#define VIA_VR_QUEUE_BUSY       0x00020000 /* Virtual Queue is busy */
+
+
+/* VIA_REG_GECMD(0x00): 2D Engine Command  */
+#define VIA_GEC_NOOP            0x00000000
+#define VIA_GEC_BLT             0x00000001
+#define VIA_GEC_LINE            0x00000005
+
+#define VIA_GEC_SRC_XY          0x00000000
+#define VIA_GEC_SRC_LINEAR      0x00000010
+#define VIA_GEC_DST_XY          0x00000000
+#define VIA_GEC_DST_LINRAT      0x00000020
+
+#define VIA_GEC_SRC_FB          0x00000000
+#define VIA_GEC_SRC_SYS         0x00000040
+#define VIA_GEC_DST_FB          0x00000000
+#define VIA_GEC_DST_SYS         0x00000080
+
+#define VIA_GEC_SRC_MONO        0x00000100  /* source is mono */
+#define VIA_GEC_PAT_MONO        0x00000200  /* pattern is mono */
+
+#define VIA_GEC_MSRC_OPAQUE     0x00000000  /* mono src is opaque */
+#define VIA_GEC_MSRC_TRANS      0x00000400  /* mono src is transparent */
+
+#define VIA_GEC_PAT_FB          0x00000000  /* pattern is in frame buffer */
+#define VIA_GEC_PAT_REG         0x00000800  /* pattern is from reg setting */
+
+#define VIA_GEC_CLIP_DISABLE    0x00000000
+#define VIA_GEC_CLIP_ENABLE     0x00001000
+
+#define VIA_GEC_FIXCOLOR_PAT    0x00002000
+
+#define VIA_GEC_INCX            0x00000000
+#define VIA_GEC_DECY            0x00004000
+#define VIA_GEC_INCY            0x00000000
+#define VIA_GEC_DECX            0x00008000
+
+#define VIA_GEC_MPAT_OPAQUE     0x00000000  /* mono pattern is opaque */
+#define VIA_GEC_MPAT_TRANS      0x00010000  /* mono pattern is transparent */
+
+#define VIA_GEC_MONO_UNPACK     0x00000000
+#define VIA_GEC_MONO_PACK       0x00020000
+#define VIA_GEC_MONO_DWORD      0x00000000
+#define VIA_GEC_MONO_WORD       0x00040000
+#define VIA_GEC_MONO_BYTE       0x00080000
+
+#define VIA_GEC_LASTPIXEL_ON    0x00000000
+#define VIA_GEC_LASTPIXEL_OFF   0x00100000
+#define VIA_GEC_X_MAJOR         0x00000000
+#define VIA_GEC_Y_MAJOR         0x00200000
+#define VIA_GEC_QUICK_START     0x00800000
+
+
+/* VIA_REG_GEMODE(0x04): GE mode */
+#define VIA_GEM_8bpp            0x00000000
+#define VIA_GEM_16bpp           0x00000100
+#define VIA_GEM_32bpp           0x00000300
+
+#define VIA_GEM_640             0x00000000   /* 640*480 */
+#define VIA_GEM_800             0x00000400   /* 800*600 */
+#define VIA_GEM_1024            0x00000800   /* 1024*768 */
+#define VIA_GEM_1280            0x00000C00   /* 1280*1024 */
+#define VIA_GEM_1600            0x00001000   /* 1600*1200 */
+#define VIA_GEM_2048            0x00001400   /* 2048*1536 */
+
+/* VIA_REG_PITCH(0x38): Pitch Setting */
+#define VIA_PITCH_ENABLE        0x80000000
+
+
+#define MAXLOOP                 0xffffff
+
+
+#define VerticalRetraceWait() \
+{ \
+    VGAOUT8(vgaCRIndex, 0x17); \
+    if (VGAIN8(vgaCRReg) & 0x80) { \
+        while ((VGAIN8(vgaIOBase + 0x0a) & 0x08) == 0x00) ; \
+        while ((VGAIN8(vgaIOBase + 0x0a) & 0x08) == 0x08) ; \
+        while ((VGAIN8(vgaIOBase + 0x0a) & 0x08) == 0x00) ; \
+    } \
+}
+
+
+#define VIASETREG(addr, data)   *(volatile unsigned int *)(pVia->MapBase + (addr)) = (data)
+#define VIAGETREG(addr)         *(volatile unsigned int *)(pVia->MapBase + (addr))
+
+
+#endif /* _VIA_REGS_H */
diff --git a/src/mesa/drivers/dri/unichrome/via_3d_reg.h b/src/mesa/drivers/dri/unichrome/via_3d_reg.h
new file mode 100644
index 0000000000..77b24dc615
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_3d_reg.h
@@ -0,0 +1,1652 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef VIA_3D_REG_H
+#define VIA_3D_REG_H
+#define HC_REG_BASE             0x0400
+
+#define HC_ParaN_MASK           0xffffffff
+#define HC_Para_MASK            0x00ffffff
+#define HC_SubA_MASK            0xff000000
+#define HC_SubA_SHIFT           24
+/* Transmission Setting
+ */
+#define HC_REG_TRANS_SET        0x003c
+#define HC_ParaSubType_MASK     0xff000000
+#define HC_ParaType_MASK        0x00ff0000
+#define HC_ParaOS_MASK          0x0000ff00
+#define HC_ParaAdr_MASK         0x000000ff
+#define HC_ParaSubType_SHIFT    24
+#define HC_ParaType_SHIFT       16
+#define HC_ParaOS_SHIFT         8
+#define HC_ParaAdr_SHIFT        0
+
+#define HC_ParaType_CmdVdata    0x0000
+#define HC_ParaType_NotTex      0x0001
+#define HC_ParaType_Tex         0x0002
+#define HC_ParaType_Palette     0x0003
+#define HC_ParaType_PreCR       0x0010
+#define HC_ParaType_Auto        0x00fe
+
+/* Transmission Space
+ */
+#define HC_REG_Hpara0           0x0040          
+#define HC_REG_HpataAF          0x02fc          
+
+/* Read
+ */
+#define HC_REG_HREngSt          0x0000
+#define HC_REG_HRFIFOempty      0x0004
+#define HC_REG_HRFIFOfull       0x0008
+#define HC_REG_HRErr            0x000c
+#define HC_REG_FIFOstatus       0x0010
+/* HC_REG_HREngSt          0x0000
+ */
+#define HC_HDASZC_MASK          0x00010000
+#define HC_HSGEMI_MASK          0x0000f000
+#define HC_HLGEMISt_MASK        0x00000f00
+#define HC_HCRSt_MASK           0x00000080
+#define HC_HSE0St_MASK          0x00000040
+#define HC_HSE1St_MASK          0x00000020
+#define HC_HPESt_MASK           0x00000010
+#define HC_HXESt_MASK           0x00000008
+#define HC_HBESt_MASK           0x00000004
+#define HC_HE2St_MASK           0x00000002
+#define HC_HE3St_MASK           0x00000001
+/* HC_REG_HRFIFOempty      0x0004
+ */
+#define HC_HRZDempty_MASK       0x00000010
+#define HC_HRTXAempty_MASK      0x00000008
+#define HC_HRTXDempty_MASK      0x00000004
+#define HC_HWZDempty_MASK       0x00000002
+#define HC_HWCDempty_MASK       0x00000001
+/* HC_REG_HRFIFOfull       0x0008
+ */
+#define HC_HRZDfull_MASK        0x00000010
+#define HC_HRTXAfull_MASK       0x00000008
+#define HC_HRTXDfull_MASK       0x00000004
+#define HC_HWZDfull_MASK        0x00000002
+#define HC_HWCDfull_MASK        0x00000001
+/* HC_REG_HRErr            0x000c
+ */
+#define HC_HAGPCMErr_MASK       0x80000000
+#define HC_HAGPCMErrC_MASK      0x70000000
+/* HC_REG_FIFOstatus       0x0010
+ */
+#define HC_HRFIFOATall_MASK     0x80000000
+#define HC_HRFIFOATbusy_MASK    0x40000000
+#define HC_HRATFGMDo_MASK       0x00000100
+#define HC_HRATFGMDi_MASK       0x00000080
+#define HC_HRATFRZD_MASK        0x00000040
+#define HC_HRATFRTXA_MASK       0x00000020
+#define HC_HRATFRTXD_MASK       0x00000010
+#define HC_HRATFWZD_MASK        0x00000008
+#define HC_HRATFWCD_MASK        0x00000004
+#define HC_HRATTXTAG_MASK       0x00000002
+#define HC_HRATTXCH_MASK        0x00000001
+
+/* AGP Command Setting
+ */
+#define HC_SubA_HAGPBstL        0x0060
+#define HC_SubA_HAGPBendL       0x0061
+#define HC_SubA_HAGPCMNT        0x0062
+#define HC_SubA_HAGPBpL         0x0063
+#define HC_SubA_HAGPBpH         0x0064
+/* HC_SubA_HAGPCMNT        0x0062
+ */
+#define HC_HAGPCMNT_MASK        0x00800000
+#define HC_HCmdErrClr_MASK      0x00400000
+#define HC_HAGPBendH_MASK       0x0000ff00
+#define HC_HAGPBstH_MASK        0x000000ff
+#define HC_HAGPBendH_SHIFT      8
+#define HC_HAGPBstH_SHIFT       0
+/* HC_SubA_HAGPBpL         0x0063
+ */
+#define HC_HAGPBpL_MASK         0x00fffffc
+#define HC_HAGPBpID_MASK        0x00000003
+#define HC_HAGPBpID_PAUSE       0x00000000
+#define HC_HAGPBpID_JUMP        0x00000001
+#define HC_HAGPBpID_STOP        0x00000002
+/* HC_SubA_HAGPBpH         0x0064
+ */
+#define HC_HAGPBpH_MASK         0x00ffffff
+
+/* Miscellaneous Settings
+ */
+#define HC_SubA_HClipTB         0x0070
+#define HC_SubA_HClipLR         0x0071
+#define HC_SubA_HFPClipTL       0x0072
+#define HC_SubA_HFPClipBL       0x0073
+#define HC_SubA_HFPClipLL       0x0074
+#define HC_SubA_HFPClipRL       0x0075
+#define HC_SubA_HFPClipTBH      0x0076
+#define HC_SubA_HFPClipLRH      0x0077
+#define HC_SubA_HLP             0x0078
+#define HC_SubA_HLPRF           0x0079
+#define HC_SubA_HSolidCL        0x007a
+#define HC_SubA_HPixGC          0x007b
+#define HC_SubA_HSPXYOS         0x007c
+#define HC_SubA_HVertexCNT      0x007d
+
+#define HC_HClipT_MASK          0x00fff000
+#define HC_HClipT_SHIFT         12
+#define HC_HClipB_MASK          0x00000fff
+#define HC_HClipB_SHIFT         0
+#define HC_HClipL_MASK          0x00fff000
+#define HC_HClipL_SHIFT         12
+#define HC_HClipR_MASK          0x00000fff
+#define HC_HClipR_SHIFT         0
+#define HC_HFPClipBH_MASK       0x0000ff00
+#define HC_HFPClipBH_SHIFT      8
+#define HC_HFPClipTH_MASK       0x000000ff
+#define HC_HFPClipTH_SHIFT      0
+#define HC_HFPClipRH_MASK       0x0000ff00
+#define HC_HFPClipRH_SHIFT      8
+#define HC_HFPClipLH_MASK       0x000000ff
+#define HC_HFPClipLH_SHIFT      0
+#define HC_HSolidCH_MASK        0x000000ff
+#define HC_HPixGC_MASK          0x00800000
+#define HC_HSPXOS_MASK          0x00fff000
+#define HC_HSPXOS_SHIFT         12
+#define HC_HSPYOS_MASK          0x00000fff
+
+/* Command
+ * Command A
+ */
+#define HC_HCmdHeader_MASK      0xfe000000  /*0xffe00000*/
+#define HC_HE3Fire_MASK         0x00100000
+#define HC_HPMType_MASK         0x000f0000
+#define HC_HEFlag_MASK          0x0000e000
+#define HC_HShading_MASK        0x00001c00
+#define HC_HPMValidN_MASK       0x00000200
+#define HC_HPLEND_MASK          0x00000100
+#define HC_HVCycle_MASK         0x000000ff
+#define HC_HVCycle_Style_MASK   0x000000c0
+#define HC_HVCycle_ChgA_MASK    0x00000030
+#define HC_HVCycle_ChgB_MASK    0x0000000c
+#define HC_HVCycle_ChgC_MASK    0x00000003
+#define HC_HPMType_Point        0x00000000
+#define HC_HPMType_Line         0x00010000
+#define HC_HPMType_Tri          0x00020000
+#define HC_HPMType_TriWF        0x00040000
+#define HC_HEFlag_NoAA          0x00000000
+#define HC_HEFlag_ab            0x00008000
+#define HC_HEFlag_bc            0x00004000
+#define HC_HEFlag_ca            0x00002000
+#define HC_HShading_Solid       0x00000000
+#define HC_HShading_FlatA       0x00000400
+#define HC_HShading_FlatB       0x00000800
+#define HC_HShading_FlatC       0x00000c00
+#define HC_HShading_Gouraud     0x00001000
+#define HC_HVCycle_Full         0x00000000
+#define HC_HVCycle_AFP          0x00000040
+#define HC_HVCycle_One          0x000000c0
+#define HC_HVCycle_NewA         0x00000000
+#define HC_HVCycle_AA           0x00000010
+#define HC_HVCycle_AB           0x00000020
+#define HC_HVCycle_AC           0x00000030
+#define HC_HVCycle_NewB         0x00000000
+#define HC_HVCycle_BA           0x00000004
+#define HC_HVCycle_BB           0x00000008
+#define HC_HVCycle_BC           0x0000000c
+#define HC_HVCycle_NewC         0x00000000
+#define HC_HVCycle_CA           0x00000001
+#define HC_HVCycle_CB           0x00000002
+#define HC_HVCycle_CC           0x00000003
+
+/* Command B
+ */
+#define HC_HLPrst_MASK          0x00010000
+#define HC_HLLastP_MASK         0x00008000
+#define HC_HVPMSK_MASK          0x00007f80
+#define HC_HBFace_MASK          0x00000040
+#define HC_H2nd1VT_MASK         0x0000003f
+#define HC_HVPMSK_X             0x00004000
+#define HC_HVPMSK_Y             0x00002000
+#define HC_HVPMSK_Z             0x00001000
+#define HC_HVPMSK_W             0x00000800
+#define HC_HVPMSK_Cd            0x00000400
+#define HC_HVPMSK_Cs            0x00000200
+#define HC_HVPMSK_S             0x00000100
+#define HC_HVPMSK_T             0x00000080
+
+/* Enable Setting
+ */
+#define HC_SubA_HEnable         0x0000
+#define HC_HenTXEnvMap_MASK     0x00200000
+#define HC_HenVertexCNT_MASK    0x00100000
+#define HC_HenCPUDAZ_MASK       0x00080000
+#define HC_HenDASZWC_MASK       0x00040000
+#define HC_HenFBCull_MASK       0x00020000
+#define HC_HenCW_MASK           0x00010000
+#define HC_HenAA_MASK           0x00008000
+#define HC_HenST_MASK           0x00004000
+#define HC_HenZT_MASK           0x00002000
+#define HC_HenZW_MASK           0x00001000
+#define HC_HenAT_MASK           0x00000800
+#define HC_HenAW_MASK           0x00000400
+#define HC_HenSP_MASK           0x00000200
+#define HC_HenLP_MASK           0x00000100
+#define HC_HenTXCH_MASK         0x00000080
+#define HC_HenTXMP_MASK         0x00000040
+#define HC_HenTXPP_MASK         0x00000020
+#define HC_HenTXTR_MASK         0x00000010
+#define HC_HenCS_MASK           0x00000008
+#define HC_HenFOG_MASK          0x00000004
+#define HC_HenABL_MASK          0x00000002
+#define HC_HenDT_MASK           0x00000001
+
+/* Z Setting
+ */
+#define HC_SubA_HZWBBasL        0x0010
+#define HC_SubA_HZWBBasH        0x0011
+#define HC_SubA_HZWBType        0x0012
+#define HC_SubA_HZBiasL         0x0013
+#define HC_SubA_HZWBend         0x0014
+#define HC_SubA_HZWTMD          0x0015
+#define HC_SubA_HZWCDL          0x0016
+#define HC_SubA_HZWCTAGnum      0x0017
+#define HC_SubA_HZCYNum         0x0018
+#define HC_SubA_HZWCFire        0x0019
+/* HC_SubA_HZWBType
+ */
+#define HC_HZWBType_MASK        0x00800000
+#define HC_HZBiasedWB_MASK      0x00400000
+#define HC_HZONEasFF_MASK       0x00200000
+#define HC_HZOONEasFF_MASK      0x00100000
+#define HC_HZWBFM_MASK          0x00030000
+#define HC_HZWBLoc_MASK         0x0000c000
+#define HC_HZWBPit_MASK         0x00003fff
+#define HC_HZWBFM_16            0x00000000
+#define HC_HZWBFM_32            0x00020000
+#define HC_HZWBFM_24            0x00030000
+#define HC_HZWBLoc_Local        0x00000000
+#define HC_HZWBLoc_SyS          0x00004000
+/* HC_SubA_HZWBend
+ */
+#define HC_HZWBend_MASK         0x00ffe000
+#define HC_HZBiasH_MASK         0x000000ff
+#define HC_HZWBend_SHIFT        10
+/* HC_SubA_HZWTMD
+ */
+#define HC_HZWTMD_MASK          0x00070000
+#define HC_HEBEBias_MASK        0x00007f00
+#define HC_HZNF_MASK            0x000000ff
+#define HC_HZWTMD_NeverPass     0x00000000
+#define HC_HZWTMD_LT            0x00010000
+#define HC_HZWTMD_EQ            0x00020000
+#define HC_HZWTMD_LE            0x00030000
+#define HC_HZWTMD_GT            0x00040000
+#define HC_HZWTMD_NE            0x00050000
+#define HC_HZWTMD_GE            0x00060000
+#define HC_HZWTMD_AllPass       0x00070000
+#define HC_HEBEBias_SHIFT       8
+/* HC_SubA_HZWCDL          0x0016 
+ */
+#define HC_HZWCDL_MASK          0x00ffffff
+/* HC_SubA_HZWCTAGnum      0x0017 
+ */
+#define HC_HZWCTAGnum_MASK      0x00ff0000
+#define HC_HZWCTAGnum_SHIFT     16
+#define HC_HZWCDH_MASK          0x000000ff
+#define HC_HZWCDH_SHIFT         0
+/* HC_SubA_HZCYNum         0x0018
+ */
+#define HC_HZCYNum_MASK         0x00030000
+#define HC_HZCYNum_SHIFT        16
+#define HC_HZWCQWnum_MASK       0x00003fff
+#define HC_HZWCQWnum_SHIFT      0
+/* HC_SubA_HZWCFire        0x0019
+ */
+#define HC_ZWCFire_MASK         0x00010000
+#define HC_HZWCQWnumLast_MASK   0x00003fff
+#define HC_HZWCQWnumLast_SHIFT  0
+
+/* Stencil Setting
+ */
+#define HC_SubA_HSTREF          0x0023
+#define HC_SubA_HSTMD           0x0024
+/* HC_SubA_HSBFM
+ */
+#define HC_HSBFM_MASK           0x00030000
+#define HC_HSBLoc_MASK          0x0000c000
+#define HC_HSBPit_MASK          0x00003fff
+/* HC_SubA_HSTREF
+ */
+#define HC_HSTREF_MASK          0x00ff0000
+#define HC_HSTOPMSK_MASK        0x0000ff00
+#define HC_HSTBMSK_MASK         0x000000ff
+#define HC_HSTREF_SHIFT         16
+#define HC_HSTOPMSK_SHIFT       8
+/* HC_SubA_HSTMD
+ */
+#define HC_HSTMD_MASK           0x00070000
+#define HC_HSTOPSF_MASK         0x000001c0
+#define HC_HSTOPSPZF_MASK       0x00000038
+#define HC_HSTOPSPZP_MASK       0x00000007
+#define HC_HSTMD_NeverPass      0x00000000
+#define HC_HSTMD_LT             0x00010000
+#define HC_HSTMD_EQ             0x00020000
+#define HC_HSTMD_LE             0x00030000
+#define HC_HSTMD_GT             0x00040000
+#define HC_HSTMD_NE             0x00050000
+#define HC_HSTMD_GE             0x00060000
+#define HC_HSTMD_AllPass        0x00070000
+#define HC_HSTOPSF_KEEP         0x00000000
+#define HC_HSTOPSF_ZERO         0x00000040
+#define HC_HSTOPSF_REPLACE      0x00000080
+#define HC_HSTOPSF_INCRSAT      0x000000c0
+#define HC_HSTOPSF_DECRSAT      0x00000100
+#define HC_HSTOPSF_INVERT       0x00000140
+#define HC_HSTOPSF_INCR         0x00000180
+#define HC_HSTOPSF_DECR         0x000001c0
+#define HC_HSTOPSPZF_KEEP       0x00000000
+#define HC_HSTOPSPZF_ZERO       0x00000008
+#define HC_HSTOPSPZF_REPLACE    0x00000010
+#define HC_HSTOPSPZF_INCRSAT    0x00000018
+#define HC_HSTOPSPZF_DECRSAT    0x00000020
+#define HC_HSTOPSPZF_INVERT     0x00000028
+#define HC_HSTOPSPZF_INCR       0x00000030
+#define HC_HSTOPSPZF_DECR       0x00000038
+#define HC_HSTOPSPZP_KEEP       0x00000000
+#define HC_HSTOPSPZP_ZERO       0x00000001
+#define HC_HSTOPSPZP_REPLACE    0x00000002
+#define HC_HSTOPSPZP_INCRSAT    0x00000003
+#define HC_HSTOPSPZP_DECRSAT    0x00000004
+#define HC_HSTOPSPZP_INVERT     0x00000005
+#define HC_HSTOPSPZP_INCR       0x00000006
+#define HC_HSTOPSPZP_DECR       0x00000007
+
+/* Alpha Setting
+ */
+#define HC_SubA_HABBasL         0x0030
+#define HC_SubA_HABBasH         0x0031
+#define HC_SubA_HABFM           0x0032
+#define HC_SubA_HATMD           0x0033
+#define HC_SubA_HABLCsat        0x0034
+#define HC_SubA_HABLCop         0x0035
+#define HC_SubA_HABLAsat        0x0036
+#define HC_SubA_HABLAop         0x0037
+#define HC_SubA_HABLRCa         0x0038
+#define HC_SubA_HABLRFCa        0x0039
+#define HC_SubA_HABLRCbias      0x003a
+#define HC_SubA_HABLRCb         0x003b
+#define HC_SubA_HABLRFCb        0x003c
+#define HC_SubA_HABLRAa         0x003d
+#define HC_SubA_HABLRAb         0x003e
+/* HC_SubA_HABFM
+ */
+#define HC_HABFM_MASK           0x00030000
+#define HC_HABLoc_MASK          0x0000c000
+#define HC_HABPit_MASK          0x000007ff
+/* HC_SubA_HATMD
+ */
+#define HC_HATMD_MASK           0x00000700
+#define HC_HATREF_MASK          0x000000ff
+#define HC_HATMD_NeverPass      0x00000000
+#define HC_HATMD_LT             0x00000100
+#define HC_HATMD_EQ             0x00000200
+#define HC_HATMD_LE             0x00000300
+#define HC_HATMD_GT             0x00000400
+#define HC_HATMD_NE             0x00000500
+#define HC_HATMD_GE             0x00000600
+#define HC_HATMD_AllPass        0x00000700
+/* HC_SubA_HABLCsat
+ */
+#define HC_HABLCsat_MASK        0x00010000
+#define HC_HABLCa_MASK          0x0000fc00
+#define HC_HABLCa_C_MASK        0x0000c000
+#define HC_HABLCa_OPC_MASK      0x00003c00
+#define HC_HABLFCa_MASK         0x000003f0
+#define HC_HABLFCa_C_MASK       0x00000300
+#define HC_HABLFCa_OPC_MASK     0x000000f0
+#define HC_HABLCbias_MASK       0x0000000f
+#define HC_HABLCbias_C_MASK     0x00000008
+#define HC_HABLCbias_OPC_MASK   0x00000007
+/*-- Define the input color.
+ */
+#define HC_XC_Csrc              0x00000000
+#define HC_XC_Cdst              0x00000001
+#define HC_XC_Asrc              0x00000002
+#define HC_XC_Adst              0x00000003
+#define HC_XC_Fog               0x00000004
+#define HC_XC_HABLRC            0x00000005
+#define HC_XC_minSrcDst         0x00000006
+#define HC_XC_maxSrcDst         0x00000007
+#define HC_XC_mimAsrcInvAdst    0x00000008
+#define HC_XC_OPC               0x00000000
+#define HC_XC_InvOPC            0x00000010
+#define HC_XC_OPCp5             0x00000020
+/*-- Define the input Alpha
+ */
+#define HC_XA_OPA               0x00000000
+#define HC_XA_InvOPA            0x00000010
+#define HC_XA_OPAp5             0x00000020
+#define HC_XA_0                 0x00000000
+#define HC_XA_Asrc              0x00000001
+#define HC_XA_Adst              0x00000002
+#define HC_XA_Fog               0x00000003
+#define HC_XA_minAsrcFog        0x00000004
+#define HC_XA_minAsrcAdst       0x00000005
+#define HC_XA_maxAsrcFog        0x00000006
+#define HC_XA_maxAsrcAdst       0x00000007
+#define HC_XA_HABLRA            0x00000008
+#define HC_XA_minAsrcInvAdst    0x00000008
+#define HC_XA_HABLFRA           0x00000009
+/*--
+ */
+#define HC_HABLCa_OPC           (HC_XC_OPC << 10)
+#define HC_HABLCa_InvOPC        (HC_XC_InvOPC << 10)
+#define HC_HABLCa_OPCp5         (HC_XC_OPCp5 << 10)
+#define HC_HABLCa_Csrc          (HC_XC_Csrc << 10)
+#define HC_HABLCa_Cdst          (HC_XC_Cdst << 10)
+#define HC_HABLCa_Asrc          (HC_XC_Asrc << 10)
+#define HC_HABLCa_Adst          (HC_XC_Adst << 10)
+#define HC_HABLCa_Fog           (HC_XC_Fog << 10)
+#define HC_HABLCa_HABLRCa       (HC_XC_HABLRC << 10)
+#define HC_HABLCa_minSrcDst     (HC_XC_minSrcDst << 10)
+#define HC_HABLCa_maxSrcDst     (HC_XC_maxSrcDst << 10)
+#define HC_HABLFCa_OPC              (HC_XC_OPC << 4)
+#define HC_HABLFCa_InvOPC           (HC_XC_InvOPC << 4)
+#define HC_HABLFCa_OPCp5            (HC_XC_OPCp5 << 4)
+#define HC_HABLFCa_Csrc             (HC_XC_Csrc << 4)
+#define HC_HABLFCa_Cdst             (HC_XC_Cdst << 4)
+#define HC_HABLFCa_Asrc             (HC_XC_Asrc << 4)
+#define HC_HABLFCa_Adst             (HC_XC_Adst << 4)
+#define HC_HABLFCa_Fog              (HC_XC_Fog << 4)
+#define HC_HABLFCa_HABLRCa          (HC_XC_HABLRC << 4)
+#define HC_HABLFCa_minSrcDst        (HC_XC_minSrcDst << 4)
+#define HC_HABLFCa_maxSrcDst        (HC_XC_maxSrcDst << 4)
+#define HC_HABLFCa_mimAsrcInvAdst   (HC_XC_mimAsrcInvAdst << 4)
+#define HC_HABLCbias_HABLRCbias 0x00000000
+#define HC_HABLCbias_Asrc       0x00000001
+#define HC_HABLCbias_Adst       0x00000002
+#define HC_HABLCbias_Fog        0x00000003
+#define HC_HABLCbias_Cin        0x00000004
+/* HC_SubA_HABLCop         0x0035
+ */
+#define HC_HABLdot_MASK         0x00010000
+#define HC_HABLCop_MASK         0x00004000
+#define HC_HABLCb_MASK          0x00003f00
+#define HC_HABLCb_C_MASK        0x00003000
+#define HC_HABLCb_OPC_MASK      0x00000f00
+#define HC_HABLFCb_MASK         0x000000fc
+#define HC_HABLFCb_C_MASK       0x000000c0
+#define HC_HABLFCb_OPC_MASK     0x0000003c
+#define HC_HABLCshift_MASK      0x00000003
+#define HC_HABLCb_OPC           (HC_XC_OPC << 8)
+#define HC_HABLCb_InvOPC        (HC_XC_InvOPC << 8)
+#define HC_HABLCb_OPCp5         (HC_XC_OPCp5 << 8)
+#define HC_HABLCb_Csrc          (HC_XC_Csrc << 8)
+#define HC_HABLCb_Cdst          (HC_XC_Cdst << 8)
+#define HC_HABLCb_Asrc          (HC_XC_Asrc << 8)
+#define HC_HABLCb_Adst          (HC_XC_Adst << 8)
+#define HC_HABLCb_Fog           (HC_XC_Fog << 8)
+#define HC_HABLCb_HABLRCa       (HC_XC_HABLRC << 8)
+#define HC_HABLCb_minSrcDst     (HC_XC_minSrcDst << 8)
+#define HC_HABLCb_maxSrcDst     (HC_XC_maxSrcDst << 8)
+#define HC_HABLFCb_OPC              (HC_XC_OPC << 2)
+#define HC_HABLFCb_InvOPC           (HC_XC_InvOPC << 2)
+#define HC_HABLFCb_OPCp5            (HC_XC_OPCp5 << 2)
+#define HC_HABLFCb_Csrc             (HC_XC_Csrc << 2)
+#define HC_HABLFCb_Cdst             (HC_XC_Cdst << 2)
+#define HC_HABLFCb_Asrc             (HC_XC_Asrc << 2)
+#define HC_HABLFCb_Adst             (HC_XC_Adst << 2)
+#define HC_HABLFCb_Fog              (HC_XC_Fog << 2)
+#define HC_HABLFCb_HABLRCb          (HC_XC_HABLRC << 2)
+#define HC_HABLFCb_minSrcDst        (HC_XC_minSrcDst << 2)
+#define HC_HABLFCb_maxSrcDst        (HC_XC_maxSrcDst << 2)
+#define HC_HABLFCb_mimAsrcInvAdst   (HC_XC_mimAsrcInvAdst << 2)
+/* HC_SubA_HABLAsat        0x0036
+ */
+#define HC_HABLAsat_MASK        0x00010000
+#define HC_HABLAa_MASK          0x0000fc00
+#define HC_HABLAa_A_MASK        0x0000c000
+#define HC_HABLAa_OPA_MASK      0x00003c00
+#define HC_HABLFAa_MASK         0x000003f0
+#define HC_HABLFAa_A_MASK       0x00000300
+#define HC_HABLFAa_OPA_MASK     0x000000f0
+#define HC_HABLAbias_MASK       0x0000000f
+#define HC_HABLAbias_A_MASK     0x00000008
+#define HC_HABLAbias_OPA_MASK   0x00000007
+#define HC_HABLAa_OPA           (HC_XA_OPA << 10)
+#define HC_HABLAa_InvOPA        (HC_XA_InvOPA << 10)
+#define HC_HABLAa_OPAp5         (HC_XA_OPAp5 << 10)
+#define HC_HABLAa_0             (HC_XA_0 << 10)
+#define HC_HABLAa_Asrc          (HC_XA_Asrc << 10)
+#define HC_HABLAa_Adst          (HC_XA_Adst << 10)
+#define HC_HABLAa_Fog           (HC_XA_Fog << 10)
+#define HC_HABLAa_minAsrcFog    (HC_XA_minAsrcFog << 10)
+#define HC_HABLAa_minAsrcAdst   (HC_XA_minAsrcAdst << 10)
+#define HC_HABLAa_maxAsrcFog    (HC_XA_maxAsrcFog << 10)
+#define HC_HABLAa_maxAsrcAdst   (HC_XA_maxAsrcAdst << 10)
+#define HC_HABLAa_HABLRA        (HC_XA_HABLRA << 10)
+#define HC_HABLFAa_OPA          (HC_XA_OPA << 4)
+#define HC_HABLFAa_InvOPA       (HC_XA_InvOPA << 4)
+#define HC_HABLFAa_OPAp5        (HC_XA_OPAp5 << 4)
+#define HC_HABLFAa_0            (HC_XA_0 << 4)
+#define HC_HABLFAa_Asrc         (HC_XA_Asrc << 4)
+#define HC_HABLFAa_Adst         (HC_XA_Adst << 4)
+#define HC_HABLFAa_Fog          (HC_XA_Fog << 4)
+#define HC_HABLFAa_minAsrcFog   (HC_XA_minAsrcFog << 4)
+#define HC_HABLFAa_minAsrcAdst  (HC_XA_minAsrcAdst << 4)
+#define HC_HABLFAa_maxAsrcFog   (HC_XA_maxAsrcFog << 4)
+#define HC_HABLFAa_maxAsrcAdst  (HC_XA_maxAsrcAdst << 4)
+#define HC_HABLFAa_minAsrcInvAdst   (HC_XA_minAsrcInvAdst << 4)
+#define HC_HABLFAa_HABLFRA          (HC_XA_HABLFRA << 4)
+#define HC_HABLAbias_HABLRAbias 0x00000000
+#define HC_HABLAbias_Asrc       0x00000001
+#define HC_HABLAbias_Adst       0x00000002
+#define HC_HABLAbias_Fog        0x00000003
+#define HC_HABLAbias_Aaa        0x00000004
+/* HC_SubA_HABLAop         0x0037
+ */
+#define HC_HABLAop_MASK         0x00004000
+#define HC_HABLAb_MASK          0x00003f00
+#define HC_HABLAb_OPA_MASK      0x00000f00
+#define HC_HABLFAb_MASK         0x000000fc
+#define HC_HABLFAb_OPA_MASK     0x0000003c
+#define HC_HABLAshift_MASK      0x00000003
+#define HC_HABLAb_OPA           (HC_XA_OPA << 8)
+#define HC_HABLAb_InvOPA        (HC_XA_InvOPA << 8)
+#define HC_HABLAb_OPAp5         (HC_XA_OPAp5 << 8)
+#define HC_HABLAb_0             (HC_XA_0 << 8)
+#define HC_HABLAb_Asrc          (HC_XA_Asrc << 8)
+#define HC_HABLAb_Adst          (HC_XA_Adst << 8)
+#define HC_HABLAb_Fog           (HC_XA_Fog << 8)
+#define HC_HABLAb_minAsrcFog    (HC_XA_minAsrcFog << 8)
+#define HC_HABLAb_minAsrcAdst   (HC_XA_minAsrcAdst << 8)
+#define HC_HABLAb_maxAsrcFog    (HC_XA_maxAsrcFog << 8)
+#define HC_HABLAb_maxAsrcAdst   (HC_XA_maxAsrcAdst << 8)
+#define HC_HABLAb_HABLRA        (HC_XA_HABLRA << 8)
+#define HC_HABLFAb_OPA          (HC_XA_OPA << 2)
+#define HC_HABLFAb_InvOPA       (HC_XA_InvOPA << 2)
+#define HC_HABLFAb_OPAp5        (HC_XA_OPAp5 << 2)
+#define HC_HABLFAb_0            (HC_XA_0 << 2)
+#define HC_HABLFAb_Asrc         (HC_XA_Asrc << 2)
+#define HC_HABLFAb_Adst         (HC_XA_Adst << 2)
+#define HC_HABLFAb_Fog          (HC_XA_Fog << 2)
+#define HC_HABLFAb_minAsrcFog   (HC_XA_minAsrcFog << 2)
+#define HC_HABLFAb_minAsrcAdst  (HC_XA_minAsrcAdst << 2)
+#define HC_HABLFAb_maxAsrcFog   (HC_XA_maxAsrcFog << 2)
+#define HC_HABLFAb_maxAsrcAdst  (HC_XA_maxAsrcAdst << 2)
+#define HC_HABLFAb_minAsrcInvAdst   (HC_XA_minAsrcInvAdst << 2)
+#define HC_HABLFAb_HABLFRA          (HC_XA_HABLFRA << 2)
+/* HC_SubA_HABLRAa         0x003d
+ */
+#define HC_HABLRAa_MASK         0x00ff0000
+#define HC_HABLRFAa_MASK        0x0000ff00
+#define HC_HABLRAbias_MASK      0x000000ff
+#define HC_HABLRAa_SHIFT        16
+#define HC_HABLRFAa_SHIFT       8
+/* HC_SubA_HABLRAb         0x003e
+ */
+#define HC_HABLRAb_MASK         0x0000ff00
+#define HC_HABLRFAb_MASK        0x000000ff
+#define HC_HABLRAb_SHIFT        8
+
+/* Destination Setting
+ */
+#define HC_SubA_HDBBasL         0x0040
+#define HC_SubA_HDBBasH         0x0041
+#define HC_SubA_HDBFM           0x0042
+#define HC_SubA_HFBBMSKL        0x0043
+#define HC_SubA_HROP            0x0044
+/* HC_SubA_HDBFM           0x0042
+ */
+#define HC_HDBFM_MASK           0x001f0000
+#define HC_HDBLoc_MASK          0x0000c000
+#define HC_HDBPit_MASK          0x00003fff
+#define HC_HDBFM_RGB555         0x00000000
+#define HC_HDBFM_RGB565         0x00010000
+#define HC_HDBFM_ARGB4444       0x00020000
+#define HC_HDBFM_ARGB1555       0x00030000
+#define HC_HDBFM_BGR555         0x00040000
+#define HC_HDBFM_BGR565         0x00050000
+#define HC_HDBFM_ABGR4444       0x00060000
+#define HC_HDBFM_ABGR1555       0x00070000
+#define HC_HDBFM_ARGB0888       0x00080000
+#define HC_HDBFM_ARGB8888       0x00090000
+#define HC_HDBFM_ABGR0888       0x000a0000
+#define HC_HDBFM_ABGR8888       0x000b0000
+#define HC_HDBLoc_Local         0x00000000
+#define HC_HDBLoc_Sys           0x00004000
+/* HC_SubA_HROP            0x0044
+ */
+#define HC_HROP_MASK            0x00000f00
+#define HC_HFBBMSKH_MASK        0x000000ff
+#define HC_HROP_BLACK           0x00000000
+#define HC_HROP_DPon            0x00000100
+#define HC_HROP_DPna            0x00000200
+#define HC_HROP_Pn              0x00000300
+#define HC_HROP_PDna            0x00000400
+#define HC_HROP_Dn              0x00000500
+#define HC_HROP_DPx             0x00000600
+#define HC_HROP_DPan            0x00000700
+#define HC_HROP_DPa             0x00000800
+#define HC_HROP_DPxn            0x00000900
+#define HC_HROP_D               0x00000a00
+#define HC_HROP_DPno            0x00000b00
+#define HC_HROP_P               0x00000c00
+#define HC_HROP_PDno            0x00000d00
+#define HC_HROP_DPo             0x00000e00
+#define HC_HROP_WHITE           0x00000f00
+
+/* Fog Setting
+ */
+#define HC_SubA_HFogLF          0x0050
+#define HC_SubA_HFogCL          0x0051
+#define HC_SubA_HFogCH          0x0052
+#define HC_SubA_HFogStL         0x0053
+#define HC_SubA_HFogStH         0x0054
+#define HC_SubA_HFogOOdMF       0x0055
+#define HC_SubA_HFogOOdEF       0x0056
+#define HC_SubA_HFogEndL        0x0057
+#define HC_SubA_HFogDenst       0x0058
+/* HC_SubA_FogLF           0x0050
+ */
+#define HC_FogLF_MASK           0x00000010
+#define HC_FogEq_MASK           0x00000008
+#define HC_FogMD_MASK           0x00000007
+#define HC_FogMD_LocalFog        0x00000000
+#define HC_FogMD_LinearFog       0x00000002
+#define HC_FogMD_ExponentialFog  0x00000004
+#define HC_FogMD_Exponential2Fog 0x00000005
+/* #define HC_FogMD_FogTable       0x00000003 */
+
+/* HC_SubA_HFogDenst        0x0058
+ */
+#define HC_FogDenst_MASK        0x001fff00
+#define HC_FogEndL_MASK         0x000000ff
+
+/* Texture subtype definitions
+ */
+#define HC_SubType_Tex0         0x00000000
+#define HC_SubType_Tex1         0x00000001
+#define HC_SubType_TexGeneral   0x000000fe
+
+/* Attribute of texture n
+ */
+#define HC_SubA_HTXnL0BasL      0x0000
+#define HC_SubA_HTXnL1BasL      0x0001
+#define HC_SubA_HTXnL2BasL      0x0002
+#define HC_SubA_HTXnL3BasL      0x0003
+#define HC_SubA_HTXnL4BasL      0x0004
+#define HC_SubA_HTXnL5BasL      0x0005
+#define HC_SubA_HTXnL6BasL      0x0006
+#define HC_SubA_HTXnL7BasL      0x0007
+#define HC_SubA_HTXnL8BasL      0x0008
+#define HC_SubA_HTXnL9BasL      0x0009
+#define HC_SubA_HTXnLaBasL      0x000a
+#define HC_SubA_HTXnLbBasL      0x000b
+#define HC_SubA_HTXnLcBasL      0x000c
+#define HC_SubA_HTXnLdBasL      0x000d
+#define HC_SubA_HTXnLeBasL      0x000e
+#define HC_SubA_HTXnLfBasL      0x000f
+#define HC_SubA_HTXnL10BasL     0x0010
+#define HC_SubA_HTXnL11BasL     0x0011
+#define HC_SubA_HTXnL012BasH    0x0020
+#define HC_SubA_HTXnL345BasH    0x0021
+#define HC_SubA_HTXnL678BasH    0x0022
+#define HC_SubA_HTXnL9abBasH    0x0023
+#define HC_SubA_HTXnLcdeBasH    0x0024
+#define HC_SubA_HTXnLf1011BasH  0x0025
+#define HC_SubA_HTXnL0Pit       0x002b
+#define HC_SubA_HTXnL1Pit       0x002c
+#define HC_SubA_HTXnL2Pit       0x002d
+#define HC_SubA_HTXnL3Pit       0x002e
+#define HC_SubA_HTXnL4Pit       0x002f
+#define HC_SubA_HTXnL5Pit       0x0030
+#define HC_SubA_HTXnL6Pit       0x0031
+#define HC_SubA_HTXnL7Pit       0x0032
+#define HC_SubA_HTXnL8Pit       0x0033
+#define HC_SubA_HTXnL9Pit       0x0034
+#define HC_SubA_HTXnLaPit       0x0035
+#define HC_SubA_HTXnLbPit       0x0036
+#define HC_SubA_HTXnLcPit       0x0037
+#define HC_SubA_HTXnLdPit       0x0038
+#define HC_SubA_HTXnLePit       0x0039
+#define HC_SubA_HTXnLfPit       0x003a
+#define HC_SubA_HTXnL10Pit      0x003b
+#define HC_SubA_HTXnL11Pit      0x003c
+#define HC_SubA_HTXnL0_5WE      0x004b
+#define HC_SubA_HTXnL6_bWE      0x004c
+#define HC_SubA_HTXnLc_11WE     0x004d
+#define HC_SubA_HTXnL0_5HE      0x0051
+#define HC_SubA_HTXnL6_bHE      0x0052
+#define HC_SubA_HTXnLc_11HE     0x0053
+#define HC_SubA_HTXnL0OS        0x0077
+#define HC_SubA_HTXnTB          0x0078
+#define HC_SubA_HTXnMPMD        0x0079
+#define HC_SubA_HTXnCLODu       0x007a
+#define HC_SubA_HTXnFM          0x007b
+#define HC_SubA_HTXnTRCH        0x007c
+#define HC_SubA_HTXnTRCL        0x007d
+#define HC_SubA_HTXnTBC         0x007e
+#define HC_SubA_HTXnTRAH        0x007f
+#define HC_SubA_HTXnTBLCsat     0x0080
+#define HC_SubA_HTXnTBLCop      0x0081
+#define HC_SubA_HTXnTBLMPfog    0x0082
+#define HC_SubA_HTXnTBLAsat     0x0083
+#define HC_SubA_HTXnTBLRCa      0x0085
+#define HC_SubA_HTXnTBLRCb      0x0086
+#define HC_SubA_HTXnTBLRCc      0x0087
+#define HC_SubA_HTXnTBLRCbias   0x0088
+#define HC_SubA_HTXnTBLRAa      0x0089
+#define HC_SubA_HTXnTBLRFog     0x008a
+#define HC_SubA_HTXnBumpM00     0x0090
+#define HC_SubA_HTXnBumpM01     0x0091
+#define HC_SubA_HTXnBumpM10     0x0092
+#define HC_SubA_HTXnBumpM11     0x0093
+#define HC_SubA_HTXnLScale      0x0094
+#define HC_SubA_HTXSMD          0x0000
+/* HC_SubA_HTXnL012BasH    0x0020
+ */
+#define HC_HTXnL0BasH_MASK      0x000000ff
+#define HC_HTXnL1BasH_MASK      0x0000ff00
+#define HC_HTXnL2BasH_MASK      0x00ff0000
+#define HC_HTXnL1BasH_SHIFT     8
+#define HC_HTXnL2BasH_SHIFT     16
+/* HC_SubA_HTXnL345BasH    0x0021
+ */
+#define HC_HTXnL3BasH_MASK      0x000000ff
+#define HC_HTXnL4BasH_MASK      0x0000ff00
+#define HC_HTXnL5BasH_MASK      0x00ff0000
+#define HC_HTXnL4BasH_SHIFT     8
+#define HC_HTXnL5BasH_SHIFT     16
+/* HC_SubA_HTXnL678BasH    0x0022
+ */
+#define HC_HTXnL6BasH_MASK      0x000000ff
+#define HC_HTXnL7BasH_MASK      0x0000ff00
+#define HC_HTXnL8BasH_MASK      0x00ff0000
+#define HC_HTXnL7BasH_SHIFT     8
+#define HC_HTXnL8BasH_SHIFT     16
+/* HC_SubA_HTXnL9abBasH    0x0023
+ */
+#define HC_HTXnL9BasH_MASK      0x000000ff
+#define HC_HTXnLaBasH_MASK      0x0000ff00
+#define HC_HTXnLbBasH_MASK      0x00ff0000
+#define HC_HTXnLaBasH_SHIFT     8
+#define HC_HTXnLbBasH_SHIFT     16
+/* HC_SubA_HTXnLcdeBasH    0x0024
+ */
+#define HC_HTXnLcBasH_MASK      0x000000ff
+#define HC_HTXnLdBasH_MASK      0x0000ff00
+#define HC_HTXnLeBasH_MASK      0x00ff0000
+#define HC_HTXnLdBasH_SHIFT     8
+#define HC_HTXnLeBasH_SHIFT     16
+/* HC_SubA_HTXnLcdeBasH    0x0025
+ */
+#define HC_HTXnLfBasH_MASK      0x000000ff
+#define HC_HTXnL10BasH_MASK      0x0000ff00
+#define HC_HTXnL11BasH_MASK      0x00ff0000
+#define HC_HTXnL10BasH_SHIFT     8
+#define HC_HTXnL11BasH_SHIFT     16
+/* HC_SubA_HTXnL0Pit       0x002b
+ */
+#define HC_HTXnLnPit_MASK       0x00003fff
+#define HC_HTXnEnPit_MASK       0x00080000
+#define HC_HTXnLnPitE_MASK      0x00f00000
+#define HC_HTXnLnPitE_SHIFT     20
+/* HC_SubA_HTXnL0_5WE      0x004b
+ */
+#define HC_HTXnL0WE_MASK        0x0000000f
+#define HC_HTXnL1WE_MASK        0x000000f0
+#define HC_HTXnL2WE_MASK        0x00000f00
+#define HC_HTXnL3WE_MASK        0x0000f000
+#define HC_HTXnL4WE_MASK        0x000f0000
+#define HC_HTXnL5WE_MASK        0x00f00000
+#define HC_HTXnL1WE_SHIFT       4
+#define HC_HTXnL2WE_SHIFT       8
+#define HC_HTXnL3WE_SHIFT       12
+#define HC_HTXnL4WE_SHIFT       16
+#define HC_HTXnL5WE_SHIFT       20
+/* HC_SubA_HTXnL6_bWE      0x004c
+ */
+#define HC_HTXnL6WE_MASK        0x0000000f
+#define HC_HTXnL7WE_MASK        0x000000f0
+#define HC_HTXnL8WE_MASK        0x00000f00
+#define HC_HTXnL9WE_MASK        0x0000f000
+#define HC_HTXnLaWE_MASK        0x000f0000
+#define HC_HTXnLbWE_MASK        0x00f00000
+#define HC_HTXnL7WE_SHIFT       4
+#define HC_HTXnL8WE_SHIFT       8
+#define HC_HTXnL9WE_SHIFT       12
+#define HC_HTXnLaWE_SHIFT       16
+#define HC_HTXnLbWE_SHIFT       20
+/* HC_SubA_HTXnLc_11WE      0x004d
+ */
+#define HC_HTXnLcWE_MASK        0x0000000f
+#define HC_HTXnLdWE_MASK        0x000000f0
+#define HC_HTXnLeWE_MASK        0x00000f00
+#define HC_HTXnLfWE_MASK        0x0000f000
+#define HC_HTXnL10WE_MASK       0x000f0000
+#define HC_HTXnL11WE_MASK       0x00f00000
+#define HC_HTXnLdWE_SHIFT       4
+#define HC_HTXnLeWE_SHIFT       8
+#define HC_HTXnLfWE_SHIFT       12
+#define HC_HTXnL10WE_SHIFT      16
+#define HC_HTXnL11WE_SHIFT      20
+/* HC_SubA_HTXnL0_5HE      0x0051
+ */
+#define HC_HTXnL0HE_MASK        0x0000000f
+#define HC_HTXnL1HE_MASK        0x000000f0
+#define HC_HTXnL2HE_MASK        0x00000f00
+#define HC_HTXnL3HE_MASK        0x0000f000
+#define HC_HTXnL4HE_MASK        0x000f0000
+#define HC_HTXnL5HE_MASK        0x00f00000
+#define HC_HTXnL1HE_SHIFT       4
+#define HC_HTXnL2HE_SHIFT       8
+#define HC_HTXnL3HE_SHIFT       12
+#define HC_HTXnL4HE_SHIFT       16
+#define HC_HTXnL5HE_SHIFT       20
+/* HC_SubA_HTXnL6_bHE      0x0052
+ */
+#define HC_HTXnL6HE_MASK        0x0000000f
+#define HC_HTXnL7HE_MASK        0x000000f0
+#define HC_HTXnL8HE_MASK        0x00000f00
+#define HC_HTXnL9HE_MASK        0x0000f000
+#define HC_HTXnLaHE_MASK        0x000f0000
+#define HC_HTXnLbHE_MASK        0x00f00000
+#define HC_HTXnL7HE_SHIFT       4
+#define HC_HTXnL8HE_SHIFT       8
+#define HC_HTXnL9HE_SHIFT       12
+#define HC_HTXnLaHE_SHIFT       16
+#define HC_HTXnLbHE_SHIFT       20
+/* HC_SubA_HTXnLc_11HE      0x0053
+ */
+#define HC_HTXnLcHE_MASK        0x0000000f
+#define HC_HTXnLdHE_MASK        0x000000f0
+#define HC_HTXnLeHE_MASK        0x00000f00
+#define HC_HTXnLfHE_MASK        0x0000f000
+#define HC_HTXnL10HE_MASK       0x000f0000
+#define HC_HTXnL11HE_MASK       0x00f00000
+#define HC_HTXnLdHE_SHIFT       4
+#define HC_HTXnLeHE_SHIFT       8
+#define HC_HTXnLfHE_SHIFT       12
+#define HC_HTXnL10HE_SHIFT      16
+#define HC_HTXnL11HE_SHIFT      20
+/* HC_SubA_HTXnL0OS        0x0077
+ */
+#define HC_HTXnL0OS_MASK        0x003ff000
+#define HC_HTXnLVmax_MASK       0x00000fc0
+#define HC_HTXnLVmin_MASK       0x0000003f
+#define HC_HTXnL0OS_SHIFT       12
+#define HC_HTXnLVmax_SHIFT      6
+/* HC_SubA_HTXnTB          0x0078
+ */
+#define HC_HTXnTB_MASK          0x00f00000
+#define HC_HTXnFLSe_MASK        0x0000e000
+#define HC_HTXnFLSs_MASK        0x00001c00
+#define HC_HTXnFLTe_MASK        0x00000380
+#define HC_HTXnFLTs_MASK        0x00000070
+#define HC_HTXnFLDs_MASK        0x0000000f
+#define HC_HTXnTB_NoTB          0x00000000
+#define HC_HTXnTB_TBC_S         0x00100000
+#define HC_HTXnTB_TBC_T         0x00200000
+#define HC_HTXnTB_TB_S          0x00400000
+#define HC_HTXnTB_TB_T          0x00800000
+
+/* The "S" in FLS? means the S texture coordinate, and a "T" means the T
+ * texture coordinage.  The "e" in FL?e means the magnification ("enlarge")
+ * mode, and the "s" in FL?s means the minification ("shrink") mode.
+ *
+ * The "D" in FLD? means the intermipmap level mode.  That means that the
+ * GL_*_MIPMAP_LINEAR modes get FLDs_Linear, and the GL_*_MIPMAP_NEAREST modes
+ * get FLDs_Nearest.
+ */
+#define HC_HTXnFLSe_Nearest     0x00000000
+#define HC_HTXnFLSe_Linear      0x00002000
+#define HC_HTXnFLSe_NonLinear   0x00004000
+#define HC_HTXnFLSe_Sharp       0x00008000
+#define HC_HTXnFLSe_Flat_Gaussian_Cubic 0x0000c000
+#define HC_HTXnFLSs_Nearest     0x00000000
+#define HC_HTXnFLSs_Linear      0x00000400
+#define HC_HTXnFLSs_NonLinear   0x00000800
+#define HC_HTXnFLSs_Flat_Gaussian_Cubic 0x00001800
+#define HC_HTXnFLTe_Nearest     0x00000000
+#define HC_HTXnFLTe_Linear      0x00000080
+#define HC_HTXnFLTe_NonLinear   0x00000100
+#define HC_HTXnFLTe_Sharp       0x00000180
+#define HC_HTXnFLTe_Flat_Gaussian_Cubic 0x00000300
+#define HC_HTXnFLTs_Nearest     0x00000000
+#define HC_HTXnFLTs_Linear      0x00000010
+#define HC_HTXnFLTs_NonLinear   0x00000020
+#define HC_HTXnFLTs_Flat_Gaussian_Cubic 0x00000060
+#define HC_HTXnFLDs_Tex0        0x00000000
+#define HC_HTXnFLDs_Nearest     0x00000001
+#define HC_HTXnFLDs_Linear      0x00000002
+#define HC_HTXnFLDs_NonLinear   0x00000003
+#define HC_HTXnFLDs_Dither      0x00000004
+#define HC_HTXnFLDs_ConstLOD    0x00000005
+#define HC_HTXnFLDs_Ani         0x00000006
+#define HC_HTXnFLDs_AniDither   0x00000007
+
+/* HC_SubA_HTXnMPMD        0x0079
+ */
+#define HC_HTXnMPMD_SMASK       0x00070000
+#define HC_HTXnMPMD_TMASK       0x00380000
+#define HC_HTXnLODDTf_MASK      0x00000007
+#define HC_HTXnXY2ST_MASK       0x00000008
+#define HC_HTXnMPMD_Tsingle     0x00000000
+#define HC_HTXnMPMD_Tclamp      0x00080000
+#define HC_HTXnMPMD_Trepeat     0x00100000
+#define HC_HTXnMPMD_Tmirror     0x00180000
+#define HC_HTXnMPMD_Twrap       0x00200000
+#define HC_HTXnMPMD_Ssingle     0x00000000
+#define HC_HTXnMPMD_Sclamp      0x00010000
+#define HC_HTXnMPMD_Srepeat     0x00020000
+#define HC_HTXnMPMD_Smirror     0x00030000
+#define HC_HTXnMPMD_Swrap       0x00040000
+/* HC_SubA_HTXnCLODu       0x007a
+ */
+#define HC_HTXnCLODu_MASK       0x000ffc00
+#define HC_HTXnCLODd_MASK       0x000003ff
+#define HC_HTXnCLODu_SHIFT      10
+/* HC_SubA_HTXnFM          0x007b
+ */
+#define HC_HTXnFM_MASK          0x00ff0000
+#define HC_HTXnLoc_MASK         0x00000003
+#define HC_HTXnFM_INDEX         0x00000000 /*  0 << 19 */
+#define HC_HTXnFM_Intensity     0x00080000 /*  1 << 19 */
+#define HC_HTXnFM_Lum           0x00100000 /*  2 << 19 */
+#define HC_HTXnFM_Alpha         0x00180000 /*  3 << 19 */
+#define HC_HTXnFM_DX            0x00280000 /*  5 << 19 */
+#define HC_HTXnFM_BUMPMAP       0x00380000 /*  7 << 19 */
+#define HC_HTXnFM_ARGB16        0x00880000 /* 17 << 19 */
+#define HC_HTXnFM_ARGB32        0x00980000 /* 19 << 19 */
+#define HC_HTXnFM_ABGR16        0x00a80000 /* 21 << 19 */
+#define HC_HTXnFM_ABGR32        0x00b80000 /* 23 << 19 */
+#define HC_HTXnFM_RGBA16        0x00c80000 /* 25 << 19 */
+#define HC_HTXnFM_RGBA32        0x00d80000 /* 27 << 19 */
+#define HC_HTXnFM_BGRA16        0x00e80000 /* 29 << 19 */
+#define HC_HTXnFM_BGRA32        0x00f80000 /* 31 << 19 */
+#define HC_HTXnFM_Index1        (HC_HTXnFM_INDEX     | 0x00000000)
+#define HC_HTXnFM_Index2        (HC_HTXnFM_INDEX     | 0x00010000)
+#define HC_HTXnFM_Index4        (HC_HTXnFM_INDEX     | 0x00020000)
+#define HC_HTXnFM_Index8        (HC_HTXnFM_INDEX     | 0x00030000)
+#define HC_HTXnFM_T1            (HC_HTXnFM_Intensity | 0x00000000)
+#define HC_HTXnFM_T2            (HC_HTXnFM_Intensity | 0x00010000)
+#define HC_HTXnFM_T4            (HC_HTXnFM_Intensity | 0x00020000)
+#define HC_HTXnFM_T8            (HC_HTXnFM_Intensity | 0x00030000)
+#define HC_HTXnFM_L1            (HC_HTXnFM_Lum       | 0x00000000)
+#define HC_HTXnFM_L2            (HC_HTXnFM_Lum       | 0x00010000)
+#define HC_HTXnFM_L4            (HC_HTXnFM_Lum       | 0x00020000)
+#define HC_HTXnFM_L8            (HC_HTXnFM_Lum       | 0x00030000)
+#define HC_HTXnFM_AL44          (HC_HTXnFM_Lum       | 0x00040000)
+#define HC_HTXnFM_AL88          (HC_HTXnFM_Lum       | 0x00050000)
+#define HC_HTXnFM_A1            (HC_HTXnFM_Alpha     | 0x00000000)
+#define HC_HTXnFM_A2            (HC_HTXnFM_Alpha     | 0x00010000)
+#define HC_HTXnFM_A4            (HC_HTXnFM_Alpha     | 0x00020000)
+#define HC_HTXnFM_A8            (HC_HTXnFM_Alpha     | 0x00030000)
+#define HC_HTXnFM_DX1           (HC_HTXnFM_DX        | 0x00010000)
+#define HC_HTXnFM_DX23          (HC_HTXnFM_DX        | 0x00020000)
+#define HC_HTXnFM_DX45          (HC_HTXnFM_DX        | 0x00030000)
+#define HC_HTXnFM_RGB555        (HC_HTXnFM_ARGB16    | 0x00000000)
+#define HC_HTXnFM_RGB565        (HC_HTXnFM_ARGB16    | 0x00010000)
+#define HC_HTXnFM_ARGB1555      (HC_HTXnFM_ARGB16    | 0x00020000)
+#define HC_HTXnFM_ARGB4444      (HC_HTXnFM_ARGB16    | 0x00030000)
+#define HC_HTXnFM_ARGB0888      (HC_HTXnFM_ARGB32    | 0x00000000)
+#define HC_HTXnFM_ARGB8888      (HC_HTXnFM_ARGB32    | 0x00010000)
+#define HC_HTXnFM_BGR555        (HC_HTXnFM_ABGR16    | 0x00000000)
+#define HC_HTXnFM_BGR565        (HC_HTXnFM_ABGR16    | 0x00010000)
+#define HC_HTXnFM_ABGR1555      (HC_HTXnFM_ABGR16    | 0x00020000)
+#define HC_HTXnFM_ABGR4444      (HC_HTXnFM_ABGR16    | 0x00030000)
+#define HC_HTXnFM_ABGR0888      (HC_HTXnFM_ABGR32    | 0x00000000)
+#define HC_HTXnFM_ABGR8888      (HC_HTXnFM_ABGR32    | 0x00010000)
+#define HC_HTXnFM_RGBA5550      (HC_HTXnFM_RGBA16    | 0x00000000)
+#define HC_HTXnFM_RGBA5551      (HC_HTXnFM_RGBA16    | 0x00020000)
+#define HC_HTXnFM_RGBA4444      (HC_HTXnFM_RGBA16    | 0x00030000)
+#define HC_HTXnFM_RGBA8880      (HC_HTXnFM_RGBA32    | 0x00000000)
+#define HC_HTXnFM_RGBA8888      (HC_HTXnFM_RGBA32    | 0x00010000)
+#define HC_HTXnFM_BGRA5550      (HC_HTXnFM_BGRA16    | 0x00000000)
+#define HC_HTXnFM_BGRA5551      (HC_HTXnFM_BGRA16    | 0x00020000)
+#define HC_HTXnFM_BGRA4444      (HC_HTXnFM_BGRA16    | 0x00030000)
+#define HC_HTXnFM_BGRA8880      (HC_HTXnFM_BGRA32    | 0x00000000)
+#define HC_HTXnFM_BGRA8888      (HC_HTXnFM_BGRA32    | 0x00010000)
+#define HC_HTXnFM_VU88          (HC_HTXnFM_BUMPMAP   | 0x00000000)
+#define HC_HTXnFM_LVU655        (HC_HTXnFM_BUMPMAP   | 0x00010000)
+#define HC_HTXnFM_LVU888        (HC_HTXnFM_BUMPMAP   | 0x00020000)
+#define HC_HTXnLoc_Local        0x00000000
+#define HC_HTXnLoc_Sys          0x00000002
+#define HC_HTXnLoc_AGP          0x00000003
+/* HC_SubA_HTXnTRAH        0x007f
+ */
+#define HC_HTXnTRAH_MASK        0x00ff0000
+#define HC_HTXnTRAL_MASK        0x0000ff00
+#define HC_HTXnTBA_MASK         0x000000ff
+#define HC_HTXnTRAH_SHIFT       16
+#define HC_HTXnTRAL_SHIFT       8
+
+/*-- Define the input texture, for below
+ */
+#define HC_XTC_TOPC             0x00000000
+#define HC_XTC_InvTOPC          0x00000010
+#define HC_XTC_TOPCp5           0x00000020
+#define HC_XTC_Cbias            0x00000000
+#define HC_XTC_InvCbias         0x00000010
+#define HC_XTC_0                0x00000000
+#define HC_XTC_Dif              0x00000001
+#define HC_XTC_Spec             0x00000002
+#define HC_XTC_Tex              0x00000003
+#define HC_XTC_Cur              0x00000004
+#define HC_XTC_Adif             0x00000005
+#define HC_XTC_Fog              0x00000006
+#define HC_XTC_Atex             0x00000007
+#define HC_XTC_Acur             0x00000008
+#define HC_XTC_HTXnTBLRC        0x00000009
+#define HC_XTC_Ctexnext         0x0000000a 
+/** HC_SubA_HTXnTBLCsat     0x0080 
+ */
+#define HC_HTXnTBLCsat_MASK     0x00800000
+#define HC_HTXnTBLCa_MASK       0x000fc000
+#define HC_HTXnTBLCb_MASK       0x00001f80
+#define HC_HTXnTBLCc_MASK       0x0000003f
+#define HC_HTXnTBLCa_TOPC       (HC_XTC_TOPC << 14)
+#define HC_HTXnTBLCa_InvTOPC    (HC_XTC_InvTOPC << 14)
+#define HC_HTXnTBLCa_TOPCp5     (HC_XTC_TOPCp5 << 14)
+#define HC_HTXnTBLCa_0          (HC_XTC_0 << 14)
+#define HC_HTXnTBLCa_Dif        (HC_XTC_Dif << 14)
+#define HC_HTXnTBLCa_Spec       (HC_XTC_Spec << 14)
+#define HC_HTXnTBLCa_Tex        (HC_XTC_Tex << 14)
+#define HC_HTXnTBLCa_Cur        (HC_XTC_Cur << 14)
+#define HC_HTXnTBLCa_Adif       (HC_XTC_Adif << 14)
+#define HC_HTXnTBLCa_Fog        (HC_XTC_Fog << 14)
+#define HC_HTXnTBLCa_Atex       (HC_XTC_Atex << 14)
+#define HC_HTXnTBLCa_Acur       (HC_XTC_Acur << 14)
+#define HC_HTXnTBLCa_HTXnTBLRC  (HC_XTC_HTXnTBLRC << 14)
+#define HC_HTXnTBLCa_Ctexnext   (HC_XTC_Ctexnext << 14) 
+#define HC_HTXnTBLCb_TOPC       (HC_XTC_TOPC << 7)
+#define HC_HTXnTBLCb_InvTOPC    (HC_XTC_InvTOPC << 7)
+#define HC_HTXnTBLCb_TOPCp5     (HC_XTC_TOPCp5 << 7)
+#define HC_HTXnTBLCb_0          (HC_XTC_0 << 7)
+#define HC_HTXnTBLCb_Dif        (HC_XTC_Dif << 7)
+#define HC_HTXnTBLCb_Spec       (HC_XTC_Spec << 7)
+#define HC_HTXnTBLCb_Tex        (HC_XTC_Tex << 7)
+#define HC_HTXnTBLCb_Cur        (HC_XTC_Cur << 7)
+#define HC_HTXnTBLCb_Adif       (HC_XTC_Adif << 7)
+#define HC_HTXnTBLCb_Fog        (HC_XTC_Fog << 7)
+#define HC_HTXnTBLCb_Atex       (HC_XTC_Atex << 7)
+#define HC_HTXnTBLCb_Acur       (HC_XTC_Acur << 7)
+#define HC_HTXnTBLCb_HTXnTBLRC  (HC_XTC_HTXnTBLRC << 7)
+#define HC_HTXnTBLCb_Ctexnext   (HC_XTC_Ctexnext << 7) 
+#define HC_HTXnTBLCc_TOPC       (HC_XTC_TOPC << 0)
+#define HC_HTXnTBLCc_InvTOPC    (HC_XTC_InvTOPC << 0)
+#define HC_HTXnTBLCc_TOPCp5     (HC_XTC_TOPCp5 << 0)
+#define HC_HTXnTBLCc_0          (HC_XTC_0 << 0)
+#define HC_HTXnTBLCc_Dif        (HC_XTC_Dif << 0)
+#define HC_HTXnTBLCc_Spec       (HC_XTC_Spec << 0)
+#define HC_HTXnTBLCc_Tex        (HC_XTC_Tex << 0)
+#define HC_HTXnTBLCc_Cur        (HC_XTC_Cur << 0)
+#define HC_HTXnTBLCc_Adif       (HC_XTC_Adif << 0)
+#define HC_HTXnTBLCc_Fog        (HC_XTC_Fog << 0)
+#define HC_HTXnTBLCc_Atex       (HC_XTC_Atex << 0)
+#define HC_HTXnTBLCc_Acur       (HC_XTC_Acur << 0)
+#define HC_HTXnTBLCc_HTXnTBLRC  (HC_XTC_HTXnTBLRC << 0)
+#define HC_HTXnTBLCc_Ctexnext   (HC_XTC_Ctexnext << 0) 
+/* HC_SubA_HTXnTBLCop      0x0081
+ */
+#define HC_HTXnTBLdot_MASK      0x00c00000
+#define HC_HTXnTBLCop_MASK      0x00380000
+#define HC_HTXnTBLCbias_MASK    0x0007c000
+#define HC_HTXnTBLCshift_MASK   0x00001800
+#define HC_HTXnTBLAop_MASK      0x00000380
+#define HC_HTXnTBLAbias_MASK    0x00000078
+#define HC_HTXnTBLAshift_MASK   0x00000003
+#define HC_HTXnTBLDOT3          0x00800000
+#define HC_HTXnTBLDOT4          0x00c00000
+#define HC_HTXnTBLCop_Add       0x00000000
+#define HC_HTXnTBLCop_Sub       0x00080000
+#define HC_HTXnTBLCop_Min       0x00100000
+#define HC_HTXnTBLCop_Max       0x00180000
+#define HC_HTXnTBLCop_Mask      0x00200000
+#define HC_HTXnTBLCbias_Cbias           (HC_XTC_Cbias << 14)
+#define HC_HTXnTBLCbias_InvCbias        (HC_XTC_InvCbias << 14)
+#define HC_HTXnTBLCbias_0               (HC_XTC_0 << 14)
+#define HC_HTXnTBLCbias_Dif             (HC_XTC_Dif << 14)
+#define HC_HTXnTBLCbias_Spec            (HC_XTC_Spec << 14)
+#define HC_HTXnTBLCbias_Tex             (HC_XTC_Tex << 14)
+#define HC_HTXnTBLCbias_Cur             (HC_XTC_Cur << 14)
+#define HC_HTXnTBLCbias_Adif            (HC_XTC_Adif << 14)
+#define HC_HTXnTBLCbias_Fog             (HC_XTC_Fog << 14)
+#define HC_HTXnTBLCbias_Atex            (HC_XTC_Atex << 14)
+#define HC_HTXnTBLCbias_Acur            (HC_XTC_Acur << 14)
+#define HC_HTXnTBLCbias_HTXnTBLRC       (HC_XTC_HTXnTBLRC << 14)
+#define HC_HTXnTBLCshift_1      0x00000000
+#define HC_HTXnTBLCshift_2      0x00000800
+#define HC_HTXnTBLCshift_No     0x00001000
+#define HC_HTXnTBLCshift_DotP   0x00001800
+#define HC_HTXnTBLAop_Add       0x00000000
+#define HC_HTXnTBLAop_Sub       0x00000080
+#define HC_HTXnTBLAop_Min       0x00000100
+#define HC_HTXnTBLAop_Max       0x00000180
+#define HC_HTXnTBLAop_Mask      0x00000200
+#define HC_HTXnTBLAbias_Inv             0x00000040
+#define HC_HTXnTBLAbias_Adif            0x00000000
+#define HC_HTXnTBLAbias_Fog             0x00000008
+#define HC_HTXnTBLAbias_Acur            0x00000010
+#define HC_HTXnTBLAbias_HTXnTBLRAbias   0x00000018
+#define HC_HTXnTBLAbias_Atex            0x00000020
+#define HC_HTXnTBLAshift_1      0x00000000
+#define HC_HTXnTBLAshift_2      0x00000001
+#define HC_HTXnTBLAshift_No     0x00000002
+#define HC_HTXnTBLAshift_DotP   0x00000003
+/* HC_SubA_HTXnTBLMPFog    0x0082
+ */
+#define HC_HTXnTBLMPfog_MASK    0x00e00000
+#define HC_HTXnTBLMPfog_0       0x00000000
+#define HC_HTXnTBLMPfog_Adif    0x00200000
+#define HC_HTXnTBLMPfog_Fog     0x00400000
+#define HC_HTXnTBLMPfog_Atex    0x00600000
+#define HC_HTXnTBLMPfog_Acur    0x00800000
+#define HC_HTXnTBLMPfog_GHTXnTBLRFog    0x00a00000
+/* HC_SubA_HTXnTBLAsat     0x0083
+ *-- Define the texture alpha input.
+ */
+#define HC_XTA_TOPA             0x00000000
+#define HC_XTA_InvTOPA          0x00000008
+#define HC_XTA_TOPAp5           0x00000010
+#define HC_XTA_Adif             0x00000000
+#define HC_XTA_Fog              0x00000001
+#define HC_XTA_Acur             0x00000002
+#define HC_XTA_HTXnTBLRA        0x00000003
+#define HC_XTA_Atex             0x00000004
+#define HC_XTA_Atexnext         0x00000005 
+/*--
+ */
+#define HC_HTXnTBLAsat_MASK     0x00800000
+#define HC_HTXnTBLAMB_MASK      0x00700000
+#define HC_HTXnTBLAa_MASK       0x0007c000
+#define HC_HTXnTBLAb_MASK       0x00000f80
+#define HC_HTXnTBLAc_MASK       0x0000001f
+#define HC_HTXnTBLAMB_SHIFT     20
+#define HC_HTXnTBLAa_TOPA       (HC_XTA_TOPA << 14)
+#define HC_HTXnTBLAa_InvTOPA    (HC_XTA_InvTOPA << 14)
+#define HC_HTXnTBLAa_TOPAp5     (HC_XTA_TOPAp5 << 14)
+#define HC_HTXnTBLAa_Adif       (HC_XTA_Adif << 14)
+#define HC_HTXnTBLAa_Fog        (HC_XTA_Fog << 14)
+#define HC_HTXnTBLAa_Acur       (HC_XTA_Acur << 14)
+#define HC_HTXnTBLAa_HTXnTBLRA  (HC_XTA_HTXnTBLRA << 14)
+#define HC_HTXnTBLAa_Atex       (HC_XTA_Atex << 14)
+#define HC_HTXnTBLAa_Atexnext   (HC_XTA_Atexnext << 14) 
+#define HC_HTXnTBLAb_TOPA       (HC_XTA_TOPA << 7)
+#define HC_HTXnTBLAb_InvTOPA    (HC_XTA_InvTOPA << 7)
+#define HC_HTXnTBLAb_TOPAp5     (HC_XTA_TOPAp5 << 7)
+#define HC_HTXnTBLAb_Adif       (HC_XTA_Adif << 7)
+#define HC_HTXnTBLAb_Fog        (HC_XTA_Fog << 7)
+#define HC_HTXnTBLAb_Acur       (HC_XTA_Acur << 7)
+#define HC_HTXnTBLAb_HTXnTBLRA  (HC_XTA_HTXnTBLRA << 7)
+#define HC_HTXnTBLAb_Atex       (HC_XTA_Atex << 7)
+#define HC_HTXnTBLAb_Atexnext   (HC_XTA_Atexnext << 7) 
+#define HC_HTXnTBLAc_TOPA       (HC_XTA_TOPA << 0)
+#define HC_HTXnTBLAc_InvTOPA    (HC_XTA_InvTOPA << 0)
+#define HC_HTXnTBLAc_TOPAp5     (HC_XTA_TOPAp5 << 0)
+#define HC_HTXnTBLAc_Adif       (HC_XTA_Adif << 0)
+#define HC_HTXnTBLAc_Fog        (HC_XTA_Fog << 0)
+#define HC_HTXnTBLAc_Acur       (HC_XTA_Acur << 0)
+#define HC_HTXnTBLAc_HTXnTBLRA  (HC_XTA_HTXnTBLRA << 0)
+#define HC_HTXnTBLAc_Atex       (HC_XTA_Atex << 0)
+#define HC_HTXnTBLAc_Atexnext   (HC_XTA_Atexnext << 0) 
+/* HC_SubA_HTXnTBLRAa      0x0089
+ */
+#define HC_HTXnTBLRAa_MASK      0x00ff0000
+#define HC_HTXnTBLRAb_MASK      0x0000ff00
+#define HC_HTXnTBLRAc_MASK      0x000000ff
+#define HC_HTXnTBLRAa_SHIFT     16
+#define HC_HTXnTBLRAb_SHIFT     8
+#define HC_HTXnTBLRAc_SHIFT     0
+/* HC_SubA_HTXnTBLRFog     0x008a
+ */
+#define HC_HTXnTBLRFog_MASK     0x0000ff00
+#define HC_HTXnTBLRAbias_MASK   0x000000ff
+#define HC_HTXnTBLRFog_SHIFT    8
+#define HC_HTXnTBLRAbias_SHIFT  0
+/* HC_SubA_HTXnLScale      0x0094
+ */
+#define HC_HTXnLScale_MASK      0x0007fc00
+#define HC_HTXnLOff_MASK        0x000001ff
+#define HC_HTXnLScale_SHIFT     10
+/* HC_SubA_HTXSMD          0x0000
+ */
+#define HC_HTXSMD_MASK          0x00000080
+#define HC_HTXTMD_MASK          0x00000040
+#define HC_HTXNum_MASK          0x00000038
+#define HC_HTXTRMD_MASK         0x00000006
+#define HC_HTXCHCLR_MASK        0x00000001
+#define HC_HTXNum_SHIFT         3
+
+/* Texture Palette n
+ */
+#define HC_SubType_TexPalette0  0x00000000
+#define HC_SubType_TexPalette1  0x00000001
+#define HC_SubType_FogTable     0x00000010
+#define HC_SubType_Stipple      0x00000014
+/* HC_SubA_TexPalette0     0x0000
+ */
+#define HC_HTPnA_MASK           0xff000000
+#define HC_HTPnR_MASK           0x00ff0000
+#define HC_HTPnG_MASK           0x0000ff00
+#define HC_HTPnB_MASK           0x000000ff
+/* HC_SubA_FogTable        0x0010
+ */
+#define HC_HFPn3_MASK           0xff000000
+#define HC_HFPn2_MASK           0x00ff0000
+#define HC_HFPn1_MASK           0x0000ff00
+#define HC_HFPn_MASK            0x000000ff
+#define HC_HFPn3_SHIFT          24
+#define HC_HFPn2_SHIFT          16
+#define HC_HFPn1_SHIFT          8
+
+/* Auto Testing & Security
+ */
+#define HC_SubA_HenFIFOAT       0x0000
+#define HC_SubA_HFBDrawFirst    0x0004
+#define HC_SubA_HFBBasL         0x0005
+#define HC_SubA_HFBDst          0x0006
+/* HC_SubA_HenFIFOAT       0x0000
+ */
+#define HC_HenFIFOAT_MASK       0x00000020
+#define HC_HenGEMILock_MASK     0x00000010
+#define HC_HenFBASwap_MASK      0x00000008
+#define HC_HenOT_MASK           0x00000004
+#define HC_HenCMDQ_MASK         0x00000002
+#define HC_HenTXCTSU_MASK       0x00000001
+/* HC_SubA_HFBDrawFirst    0x0004
+ */
+#define HC_HFBDrawFirst_MASK    0x00000800
+#define HC_HFBQueue_MASK        0x00000400
+#define HC_HFBLock_MASK         0x00000200
+#define HC_HEOF_MASK            0x00000100
+#define HC_HFBBasH_MASK         0x000000ff
+
+/* GEMI Setting
+ */
+#define HC_SubA_HTArbRCM        0x0008
+#define HC_SubA_HTArbRZ         0x000a
+#define HC_SubA_HTArbWZ         0x000b
+#define HC_SubA_HTArbRTX        0x000c
+#define HC_SubA_HTArbRCW        0x000d
+#define HC_SubA_HTArbE2         0x000e
+#define HC_SubA_HArbRQCM        0x0010
+#define HC_SubA_HArbWQCM        0x0011
+#define HC_SubA_HGEMITout       0x0020
+#define HC_SubA_HFthRTXD        0x0040
+#define HC_SubA_HFthRTXA        0x0044
+#define HC_SubA_HCMDQstL        0x0050
+#define HC_SubA_HCMDQendL       0x0051
+#define HC_SubA_HCMDQLen        0x0052
+/* HC_SubA_HTArbRCM        0x0008
+ */
+#define HC_HTArbRCM_MASK        0x0000ffff
+/* HC_SubA_HTArbRZ         0x000a
+ */
+#define HC_HTArbRZ_MASK         0x0000ffff
+/* HC_SubA_HTArbWZ         0x000b
+ */
+#define HC_HTArbWZ_MASK         0x0000ffff
+/* HC_SubA_HTArbRTX        0x000c
+ */
+#define HC_HTArbRTX_MASK        0x0000ffff
+/* HC_SubA_HTArbRCW        0x000d
+ */
+#define HC_HTArbRCW_MASK        0x0000ffff
+/* HC_SubA_HTArbE2         0x000e
+ */
+#define HC_HTArbE2_MASK         0x0000ffff
+/* HC_SubA_HArbRQCM        0x0010
+ */
+#define HC_HTArbRQCM_MASK       0x0000ffff
+/* HC_SubA_HArbWQCM        0x0011
+ */
+#define HC_HArbWQCM_MASK        0x0000ffff
+/* HC_SubA_HGEMITout       0x0020
+ */
+#define HC_HGEMITout_MASK       0x000f0000
+#define HC_HNPArbZC_MASK        0x0000ffff
+#define HC_HGEMITout_SHIFT      16
+/* HC_SubA_HFthRTXD        0x0040
+ */
+#define HC_HFthRTXD_MASK        0x00ff0000
+#define HC_HFthRZD_MASK         0x0000ff00
+#define HC_HFthWZD_MASK         0x000000ff
+#define HC_HFthRTXD_SHIFT       16
+#define HC_HFthRZD_SHIFT        8
+/* HC_SubA_HFthRTXA        0x0044
+ */
+#define HC_HFthRTXA_MASK        0x000000ff
+
+/******************************************************************************
+** Define the Halcyon Internal register access constants. For simulator only.
+******************************************************************************/
+#define HC_SIMA_HAGPBstL        0x0000
+#define HC_SIMA_HAGPBendL       0x0001
+#define HC_SIMA_HAGPCMNT        0x0002
+#define HC_SIMA_HAGPBpL         0x0003
+#define HC_SIMA_HAGPBpH         0x0004
+#define HC_SIMA_HClipTB         0x0005
+#define HC_SIMA_HClipLR         0x0006
+#define HC_SIMA_HFPClipTL       0x0007
+#define HC_SIMA_HFPClipBL       0x0008
+#define HC_SIMA_HFPClipLL       0x0009
+#define HC_SIMA_HFPClipRL       0x000a
+#define HC_SIMA_HFPClipTBH      0x000b
+#define HC_SIMA_HFPClipLRH      0x000c
+#define HC_SIMA_HLP             0x000d
+#define HC_SIMA_HLPRF           0x000e
+#define HC_SIMA_HSolidCL        0x000f
+#define HC_SIMA_HPixGC          0x0010
+#define HC_SIMA_HSPXYOS         0x0011
+#define HC_SIMA_HCmdA           0x0012
+#define HC_SIMA_HCmdB           0x0013
+#define HC_SIMA_HEnable         0x0014
+#define HC_SIMA_HZWBBasL        0x0015
+#define HC_SIMA_HZWBBasH        0x0016
+#define HC_SIMA_HZWBType        0x0017
+#define HC_SIMA_HZBiasL         0x0018
+#define HC_SIMA_HZWBend         0x0019
+#define HC_SIMA_HZWTMD          0x001a
+#define HC_SIMA_HZWCDL          0x001b
+#define HC_SIMA_HZWCTAGnum      0x001c
+#define HC_SIMA_HZCYNum         0x001d
+#define HC_SIMA_HZWCFire        0x001e
+/* #define HC_SIMA_HSBBasL         0x001d */
+/* #define HC_SIMA_HSBBasH         0x001e */
+/* #define HC_SIMA_HSBFM           0x001f */
+#define HC_SIMA_HSTREF          0x0020
+#define HC_SIMA_HSTMD           0x0021
+#define HC_SIMA_HABBasL         0x0022
+#define HC_SIMA_HABBasH         0x0023
+#define HC_SIMA_HABFM           0x0024
+#define HC_SIMA_HATMD           0x0025
+#define HC_SIMA_HABLCsat        0x0026
+#define HC_SIMA_HABLCop         0x0027
+#define HC_SIMA_HABLAsat        0x0028
+#define HC_SIMA_HABLAop         0x0029
+#define HC_SIMA_HABLRCa         0x002a
+#define HC_SIMA_HABLRFCa        0x002b
+#define HC_SIMA_HABLRCbias      0x002c
+#define HC_SIMA_HABLRCb         0x002d
+#define HC_SIMA_HABLRFCb        0x002e
+#define HC_SIMA_HABLRAa         0x002f
+#define HC_SIMA_HABLRAb         0x0030
+#define HC_SIMA_HDBBasL         0x0031
+#define HC_SIMA_HDBBasH         0x0032
+#define HC_SIMA_HDBFM           0x0033
+#define HC_SIMA_HFBBMSKL        0x0034
+#define HC_SIMA_HROP            0x0035
+#define HC_SIMA_HFogLF          0x0036
+#define HC_SIMA_HFogCL          0x0037
+#define HC_SIMA_HFogCH          0x0038
+#define HC_SIMA_HFogStL         0x0039
+#define HC_SIMA_HFogStH         0x003a
+#define HC_SIMA_HFogOOdMF       0x003b
+#define HC_SIMA_HFogOOdEF       0x003c
+#define HC_SIMA_HFogEndL        0x003d
+#define HC_SIMA_HFogDenst       0x003e
+/*---- start of texture 0 setting ----
+ */
+#define HC_SIMA_HTX0L0BasL      0x0040
+#define HC_SIMA_HTX0L1BasL      0x0041
+#define HC_SIMA_HTX0L2BasL      0x0042
+#define HC_SIMA_HTX0L3BasL      0x0043
+#define HC_SIMA_HTX0L4BasL      0x0044
+#define HC_SIMA_HTX0L5BasL      0x0045
+#define HC_SIMA_HTX0L6BasL      0x0046
+#define HC_SIMA_HTX0L7BasL      0x0047
+#define HC_SIMA_HTX0L8BasL      0x0048
+#define HC_SIMA_HTX0L9BasL      0x0049
+#define HC_SIMA_HTX0LaBasL      0x004a
+#define HC_SIMA_HTX0LbBasL      0x004b
+#define HC_SIMA_HTX0LcBasL      0x004c
+#define HC_SIMA_HTX0LdBasL      0x004d
+#define HC_SIMA_HTX0LeBasL      0x004e
+#define HC_SIMA_HTX0LfBasL      0x004f
+#define HC_SIMA_HTX0L10BasL     0x0050
+#define HC_SIMA_HTX0L11BasL     0x0051
+#define HC_SIMA_HTX0L012BasH    0x0052
+#define HC_SIMA_HTX0L345BasH    0x0053
+#define HC_SIMA_HTX0L678BasH    0x0054
+#define HC_SIMA_HTX0L9abBasH    0x0055
+#define HC_SIMA_HTX0LcdeBasH    0x0056
+#define HC_SIMA_HTX0Lf1011BasH  0x0057
+#define HC_SIMA_HTX0L0Pit       0x0058
+#define HC_SIMA_HTX0L1Pit       0x0059
+#define HC_SIMA_HTX0L2Pit       0x005a
+#define HC_SIMA_HTX0L3Pit       0x005b
+#define HC_SIMA_HTX0L4Pit       0x005c
+#define HC_SIMA_HTX0L5Pit       0x005d
+#define HC_SIMA_HTX0L6Pit       0x005e
+#define HC_SIMA_HTX0L7Pit       0x005f
+#define HC_SIMA_HTX0L8Pit       0x0060
+#define HC_SIMA_HTX0L9Pit       0x0061
+#define HC_SIMA_HTX0LaPit       0x0062
+#define HC_SIMA_HTX0LbPit       0x0063
+#define HC_SIMA_HTX0LcPit       0x0064
+#define HC_SIMA_HTX0LdPit       0x0065
+#define HC_SIMA_HTX0LePit       0x0066
+#define HC_SIMA_HTX0LfPit       0x0067
+#define HC_SIMA_HTX0L10Pit      0x0068
+#define HC_SIMA_HTX0L11Pit      0x0069
+#define HC_SIMA_HTX0L0_5WE      0x006a
+#define HC_SIMA_HTX0L6_bWE      0x006b
+#define HC_SIMA_HTX0Lc_11WE     0x006c
+#define HC_SIMA_HTX0L0_5HE      0x006d
+#define HC_SIMA_HTX0L6_bHE      0x006e
+#define HC_SIMA_HTX0Lc_11HE     0x006f
+#define HC_SIMA_HTX0L0OS        0x0070
+#define HC_SIMA_HTX0TB          0x0071
+#define HC_SIMA_HTX0MPMD        0x0072
+#define HC_SIMA_HTX0CLODu       0x0073
+#define HC_SIMA_HTX0FM          0x0074
+#define HC_SIMA_HTX0TRCH        0x0075
+#define HC_SIMA_HTX0TRCL        0x0076
+#define HC_SIMA_HTX0TBC         0x0077
+#define HC_SIMA_HTX0TRAH        0x0078
+#define HC_SIMA_HTX0TBLCsat     0x0079
+#define HC_SIMA_HTX0TBLCop      0x007a
+#define HC_SIMA_HTX0TBLMPfog    0x007b
+#define HC_SIMA_HTX0TBLAsat     0x007c
+#define HC_SIMA_HTX0TBLRCa      0x007d
+#define HC_SIMA_HTX0TBLRCb      0x007e
+#define HC_SIMA_HTX0TBLRCc      0x007f
+#define HC_SIMA_HTX0TBLRCbias   0x0080
+#define HC_SIMA_HTX0TBLRAa      0x0081
+#define HC_SIMA_HTX0TBLRFog     0x0082
+#define HC_SIMA_HTX0BumpM00     0x0083
+#define HC_SIMA_HTX0BumpM01     0x0084
+#define HC_SIMA_HTX0BumpM10     0x0085
+#define HC_SIMA_HTX0BumpM11     0x0086
+#define HC_SIMA_HTX0LScale      0x0087
+/*---- end of texture 0 setting ----      0x008f
+ */
+#define HC_SIMA_TX0TX1_OFF      0x0050
+/*---- start of texture 1 setting ----
+ */
+#define HC_SIMA_HTX1L0BasL      (HC_SIMA_HTX0L0BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L1BasL      (HC_SIMA_HTX0L1BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L2BasL      (HC_SIMA_HTX0L2BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L3BasL      (HC_SIMA_HTX0L3BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L4BasL      (HC_SIMA_HTX0L4BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L5BasL      (HC_SIMA_HTX0L5BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L6BasL      (HC_SIMA_HTX0L6BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L7BasL      (HC_SIMA_HTX0L7BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L8BasL      (HC_SIMA_HTX0L8BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L9BasL      (HC_SIMA_HTX0L9BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LaBasL      (HC_SIMA_HTX0LaBasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LbBasL      (HC_SIMA_HTX0LbBasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LcBasL      (HC_SIMA_HTX0LcBasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LdBasL      (HC_SIMA_HTX0LdBasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LeBasL      (HC_SIMA_HTX0LeBasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LfBasL      (HC_SIMA_HTX0LfBasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L10BasL     (HC_SIMA_HTX0L10BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L11BasL     (HC_SIMA_HTX0L11BasL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L012BasH    (HC_SIMA_HTX0L012BasH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L345BasH    (HC_SIMA_HTX0L345BasH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L678BasH    (HC_SIMA_HTX0L678BasH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L9abBasH    (HC_SIMA_HTX0L9abBasH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LcdeBasH    (HC_SIMA_HTX0LcdeBasH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1Lf1011BasH  (HC_SIMA_HTX0Lf1011BasH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L0Pit       (HC_SIMA_HTX0L0Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L1Pit       (HC_SIMA_HTX0L1Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L2Pit       (HC_SIMA_HTX0L2Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L3Pit       (HC_SIMA_HTX0L3Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L4Pit       (HC_SIMA_HTX0L4Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L5Pit       (HC_SIMA_HTX0L5Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L6Pit       (HC_SIMA_HTX0L6Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L7Pit       (HC_SIMA_HTX0L7Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L8Pit       (HC_SIMA_HTX0L8Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L9Pit       (HC_SIMA_HTX0L9Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LaPit       (HC_SIMA_HTX0LaPit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LbPit       (HC_SIMA_HTX0LbPit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LcPit       (HC_SIMA_HTX0LcPit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LdPit       (HC_SIMA_HTX0LdPit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LePit       (HC_SIMA_HTX0LePit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LfPit       (HC_SIMA_HTX0LfPit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L10Pit      (HC_SIMA_HTX0L10Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L11Pit      (HC_SIMA_HTX0L11Pit + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L0_5WE      (HC_SIMA_HTX0L0_5WE + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L6_bWE      (HC_SIMA_HTX0L6_bWE + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1Lc_11WE     (HC_SIMA_HTX0Lc_11WE + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L0_5HE      (HC_SIMA_HTX0L0_5HE + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L6_bHE      (HC_SIMA_HTX0L6_bHE + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1Lc_11HE      (HC_SIMA_HTX0Lc_11HE + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1L0OS        (HC_SIMA_HTX0L0OS + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TB          (HC_SIMA_HTX0TB + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1MPMD        (HC_SIMA_HTX0MPMD + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1CLODu       (HC_SIMA_HTX0CLODu + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1FM          (HC_SIMA_HTX0FM + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TRCH        (HC_SIMA_HTX0TRCH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TRCL        (HC_SIMA_HTX0TRCL + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBC         (HC_SIMA_HTX0TBC + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TRAH        (HC_SIMA_HTX0TRAH + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LTC         (HC_SIMA_HTX0LTC + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LTA         (HC_SIMA_HTX0LTA + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLCsat     (HC_SIMA_HTX0TBLCsat + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLCop      (HC_SIMA_HTX0TBLCop + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLMPfog    (HC_SIMA_HTX0TBLMPfog + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLAsat     (HC_SIMA_HTX0TBLAsat + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLRCa      (HC_SIMA_HTX0TBLRCa + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLRCb      (HC_SIMA_HTX0TBLRCb + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLRCc      (HC_SIMA_HTX0TBLRCc + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLRCbias   (HC_SIMA_HTX0TBLRCbias + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLRAa      (HC_SIMA_HTX0TBLRAa + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1TBLRFog     (HC_SIMA_HTX0TBLRFog + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1BumpM00     (HC_SIMA_HTX0BumpM00 + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1BumpM01     (HC_SIMA_HTX0BumpM01 + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1BumpM10     (HC_SIMA_HTX0BumpM10 + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1BumpM11     (HC_SIMA_HTX0BumpM11 + HC_SIMA_TX0TX1_OFF)
+#define HC_SIMA_HTX1LScale      (HC_SIMA_HTX0LScale + HC_SIMA_TX0TX1_OFF)
+/*---- end of texture 1 setting ---- 0xaf
+ */
+#define HC_SIMA_HTXSMD          0x00b0
+#define HC_SIMA_HenFIFOAT       0x00b1
+#define HC_SIMA_HFBDrawFirst    0x00b2
+#define HC_SIMA_HFBBasL         0x00b3
+#define HC_SIMA_HTArbRCM        0x00b4
+#define HC_SIMA_HTArbRZ         0x00b5
+#define HC_SIMA_HTArbWZ         0x00b6
+#define HC_SIMA_HTArbRTX        0x00b7
+#define HC_SIMA_HTArbRCW        0x00b8
+#define HC_SIMA_HTArbE2         0x00b9
+#define HC_SIMA_HGEMITout       0x00ba
+#define HC_SIMA_HFthRTXD        0x00bb
+#define HC_SIMA_HFthRTXA        0x00bc
+/* Define the texture palette 0
+ */
+#define HC_SIMA_HTP0            0x0100
+#define HC_SIMA_HTP1            0x0200
+#define HC_SIMA_FOGTABLE        0x0300
+#define HC_SIMA_STIPPLE         0x0400
+#define HC_SIMA_HE3Fire         0x0440
+#define HC_SIMA_TRANS_SET       0x0441
+#define HC_SIMA_HREngSt         0x0442
+#define HC_SIMA_HRFIFOempty     0x0443
+#define HC_SIMA_HRFIFOfull      0x0444
+#define HC_SIMA_HRErr           0x0445
+#define HC_SIMA_FIFOstatus      0x0446
+
+/******************************************************************************
+** Define the AGP command header.
+******************************************************************************/
+#define HC_ACMD_MASK            0xfe000000      
+#define HC_ACMD_SUB_MASK        0x0c000000      
+#define HC_ACMD_HCmdA           0xee000000      
+#define HC_ACMD_HCmdB           0xec000000      
+#define HC_ACMD_HCmdC           0xea000000      
+#define HC_ACMD_H1              0xf0000000      
+#define HC_ACMD_H2              0xf2000000      
+#define HC_ACMD_H3              0xf4000000      
+#define HC_ACMD_H4              0xf6000000      
+
+#define HC_ACMD_H1IO_MASK       0x000001ff
+#define HC_ACMD_H2IO1_MASK      0x001ff000      
+#define HC_ACMD_H2IO2_MASK      0x000001ff
+#define HC_ACMD_H2IO1_SHIFT     12              
+#define HC_ACMD_H2IO2_SHIFT     0
+#define HC_ACMD_H3IO_MASK       0x000001ff
+#define HC_ACMD_H3COUNT_MASK    0x01fff000      
+#define HC_ACMD_H3COUNT_SHIFT   12              
+#define HC_ACMD_H4ID_MASK       0x000001ff
+#define HC_ACMD_H4COUNT_MASK    0x01fffe00
+#define HC_ACMD_H4COUNT_SHIFT   9
+
+/********************************************************************************
+** Define Header 
+********************************************************************************/
+#define HC_HEADER2		0xF210F110
+
+/********************************************************************************
+** Define Dummy Value 
+********************************************************************************/
+#define HC_DUMMY		0xCCCCCCCC
+/********************************************************************************
+** Define for DMA use 
+********************************************************************************/
+#define HALCYON_HEADER2     0XF210F110
+#define HALCYON_FIRECMD     0XEE100000  
+#define HALCYON_FIREMASK    0XFFF00000
+#define HALCYON_CMDB        0XEC000000  
+#define HALCYON_CMDBMASK    0XFFFE0000
+#define HALCYON_SUB_ADDR0   0X00000000
+#define HALCYON_HEADER1MASK 0XFFFFFF00
+#define HALCYON_HEADER1     0XF0000000
+#define HC_SubA_HAGPBstL        0x0060
+#define HC_SubA_HAGPBendL       0x0061
+#define HC_SubA_HAGPCMNT        0x0062
+#define HC_SubA_HAGPBpL         0x0063
+#define HC_SubA_HAGPBpH         0x0064
+#define HC_HAGPCMNT_MASK        0x00800000
+#define HC_HCmdErrClr_MASK      0x00400000
+#define HC_HAGPBendH_MASK       0x0000ff00
+#define HC_HAGPBstH_MASK        0x000000ff
+#define HC_HAGPBendH_SHIFT      8
+#define HC_HAGPBstH_SHIFT       0
+#define HC_HAGPBpL_MASK         0x00fffffc
+#define HC_HAGPBpID_MASK        0x00000003
+#define HC_HAGPBpID_PAUSE       0x00000000
+#define HC_HAGPBpID_JUMP        0x00000001
+#define HC_HAGPBpID_STOP        0x00000002
+#define HC_HAGPBpH_MASK         0x00ffffff
+
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c
new file mode 100644
index 0000000000..4298c94855
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_context.c
@@ -0,0 +1,956 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file via_context.c
+ * 
+ * \author John Sheng (presumably of either VIA Technologies or S3 Graphics)
+ * \author Others at VIA Technologies?
+ * \author Others at S3 Graphics?
+ */
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/formats.h"
+#include "main/simple_list.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo.h"
+
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+
+#include "via_screen.h"
+#include "via_dri.h"
+
+#include "via_state.h"
+#include "via_tex.h"
+#include "via_span.h"
+#include "via_tris.h"
+#include "via_ioctl.h"
+#include "via_fb.h"
+
+#include <stdio.h>
+#include "main/macros.h"
+#include "drirenderbuffer.h"
+
+#define need_GL_ARB_point_parameters
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_secondary_color
+#include "main/remap_helper.h"
+
+#define DRIVER_DATE	"20060710"
+
+#include "vblank.h"
+#include "utils.h"
+
+GLuint VIA_DEBUG = 0;
+
+/**
+ * Return various strings for \c glGetString.
+ *
+ * \sa glGetString
+ */
+static const GLubyte *viaGetString(GLcontext *ctx, GLenum name)
+{
+   static char buffer[128];
+   unsigned   offset;
+
+
+   switch (name) {
+   case GL_VENDOR:
+      return (GLubyte *)"VIA Technology";
+
+   case GL_RENDERER: {
+      static const char * const chipset_names[] = {
+	 "UniChrome",
+	 "CastleRock (CLE266)",
+	 "UniChrome (KM400)",
+	 "UniChrome (K8M800)",
+	 "UniChrome (PM8x0/CN400)",
+      };
+      struct via_context *vmesa = VIA_CONTEXT(ctx);
+      unsigned id = vmesa->viaScreen->deviceID;
+
+      offset = driGetRendererString( buffer, 
+				     chipset_names[(id > VIA_PM800) ? 0 : id],
+				     DRIVER_DATE, 0 );
+      return (GLubyte *)buffer;
+   }
+
+   default:
+      return NULL;
+   }
+}
+
+
+/**
+ * Calculate a width that satisfies the hardware's alignment requirements.
+ * On the Unichrome hardware, each scanline must be aligned to a multiple of
+ * 16 pixels.
+ *
+ * \param width  Minimum buffer width, in pixels.
+ * 
+ * \returns A pixel width that meets the alignment requirements.
+ */
+static INLINE unsigned
+buffer_align( unsigned width )
+{
+    return (width + 0x0f) & ~0x0f;
+}
+
+
+static void
+viaDeleteRenderbuffer(struct gl_renderbuffer *rb)
+{
+   /* Don't free() since we're contained in via_context struct. */
+}
+
+static GLboolean
+viaRenderbufferStorage(GLcontext *ctx, struct gl_renderbuffer *rb,
+                       GLenum internalFormat, GLuint width, GLuint height)
+{
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+   return GL_TRUE;
+}
+
+
+static void
+viaInitRenderbuffer(struct via_renderbuffer *vrb, GLenum format,
+		    __DRIdrawable *dPriv)
+{
+   const GLuint name = 0;
+   struct gl_renderbuffer *rb = & vrb->Base;
+
+   vrb->dPriv = dPriv;
+   _mesa_init_renderbuffer(rb, name);
+
+   /* Make sure we're using a null-valued GetPointer routine */
+   assert(rb->GetPointer(NULL, rb, 0, 0) == NULL);
+
+   rb->InternalFormat = format;
+
+   if (format == GL_RGBA) {
+      /* Color */
+      rb->_BaseFormat = GL_RGBA;
+      rb->Format = MESA_FORMAT_ARGB8888;
+      rb->DataType = GL_UNSIGNED_BYTE;
+   }
+   else if (format == GL_DEPTH_COMPONENT16) {
+      /* Depth */
+      rb->_BaseFormat = GL_DEPTH_COMPONENT;
+      /* we always Get/Put 32-bit Z values */
+      rb->Format = MESA_FORMAT_Z16;
+      rb->DataType = GL_UNSIGNED_INT;
+   }
+   else if (format == GL_DEPTH_COMPONENT24) {
+      /* Depth */
+      rb->_BaseFormat = GL_DEPTH_COMPONENT;
+      /* we always Get/Put 32-bit Z values */
+      rb->Format = MESA_FORMAT_Z32;
+      rb->DataType = GL_UNSIGNED_INT;
+   }
+   else {
+      /* Stencil */
+      ASSERT(format == GL_STENCIL_INDEX8_EXT);
+      rb->_BaseFormat = GL_STENCIL_INDEX;
+      rb->Format = MESA_FORMAT_S8;
+      rb->DataType = GL_UNSIGNED_BYTE;
+   }
+
+   rb->Delete = viaDeleteRenderbuffer;
+   rb->AllocStorage = viaRenderbufferStorage;
+}
+
+
+/**
+ * Calculate the framebuffer parameters for all buffers (front, back, depth,
+ * and stencil) associated with the specified context.
+ * 
+ * \warning
+ * This function also calls \c AllocateBuffer to actually allocate the
+ * buffers.
+ * 
+ * \sa AllocateBuffer
+ */
+static GLboolean
+calculate_buffer_parameters(struct via_context *vmesa,
+			    struct gl_framebuffer *fb,
+			    __DRIdrawable *dPriv)
+{
+   const unsigned shift = vmesa->viaScreen->bitsPerPixel / 16;
+   const unsigned extra = 32;
+   unsigned w;
+   unsigned h;
+
+   /* Normally, the renderbuffer would be added to the framebuffer just once
+    * when the framebuffer was created.  The VIA driver is a bit funny
+    * though in that the front/back/depth renderbuffers are in the per-context
+    * state!
+    * That should be fixed someday.
+    */
+
+   if (!vmesa->front.Base.InternalFormat) {
+      /* do one-time init for the renderbuffers */
+      viaInitRenderbuffer(&vmesa->front, GL_RGBA, dPriv);
+      viaSetSpanFunctions(&vmesa->front, &fb->Visual);
+      _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &vmesa->front.Base);
+
+      if (fb->Visual.doubleBufferMode) {
+         viaInitRenderbuffer(&vmesa->back, GL_RGBA, dPriv);
+         viaSetSpanFunctions(&vmesa->back, &fb->Visual);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &vmesa->back.Base);
+      }
+
+      if (vmesa->glCtx->Visual.depthBits > 0) {
+         viaInitRenderbuffer(&vmesa->depth,
+                             (vmesa->glCtx->Visual.depthBits == 16
+                              ? GL_DEPTH_COMPONENT16 : GL_DEPTH_COMPONENT24),
+			     dPriv);
+         viaSetSpanFunctions(&vmesa->depth, &fb->Visual);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &vmesa->depth.Base);
+      }
+
+      if (vmesa->glCtx->Visual.stencilBits > 0) {
+         viaInitRenderbuffer(&vmesa->stencil, GL_STENCIL_INDEX8_EXT,
+			     dPriv);
+         viaSetSpanFunctions(&vmesa->stencil, &fb->Visual);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &vmesa->stencil.Base);
+      }
+   }
+
+   assert(vmesa->front.Base.InternalFormat);
+   assert(vmesa->front.Base.AllocStorage);
+   if (fb->Visual.doubleBufferMode) {
+      assert(vmesa->back.Base.AllocStorage);
+   }
+   if (fb->Visual.depthBits) {
+      assert(vmesa->depth.Base.AllocStorage);
+   }
+
+
+   /* Allocate front-buffer */
+   if (vmesa->drawType == GLX_PBUFFER_BIT) {
+      w = vmesa->driDrawable->w;
+      h = vmesa->driDrawable->h;
+
+      vmesa->front.bpp = vmesa->viaScreen->bitsPerPixel;
+      vmesa->front.pitch = buffer_align( w ) << shift; /* bytes, not pixels */
+      vmesa->front.size = vmesa->front.pitch * h;
+
+      if (vmesa->front.map)
+	 via_free_draw_buffer(vmesa, &vmesa->front);
+      if (!via_alloc_draw_buffer(vmesa, &vmesa->front))
+	 return GL_FALSE;
+
+   } else {
+      w = vmesa->viaScreen->width;
+      h = vmesa->viaScreen->height;
+
+      vmesa->front.bpp = vmesa->viaScreen->bitsPerPixel;
+      vmesa->front.pitch = buffer_align( w ) << shift; /* bytes, not pixels */
+      vmesa->front.size = vmesa->front.pitch * h;
+      if (getenv("ALTERNATE_SCREEN")) 
+        vmesa->front.offset = vmesa->front.size;
+      else
+      	vmesa->front.offset = 0;
+      vmesa->front.map = (char *) vmesa->driScreen->pFB;
+   }
+
+
+   /* Allocate back-buffer */
+   if (vmesa->hasBack) {
+      vmesa->back.bpp = vmesa->viaScreen->bitsPerPixel;
+      vmesa->back.pitch = (buffer_align( vmesa->driDrawable->w ) << shift);
+      vmesa->back.pitch += extra;
+      vmesa->back.pitch = MIN2(vmesa->back.pitch, vmesa->front.pitch);
+      vmesa->back.size = vmesa->back.pitch * vmesa->driDrawable->h;
+      if (vmesa->back.map)
+	 via_free_draw_buffer(vmesa, &vmesa->back);
+      if (!via_alloc_draw_buffer(vmesa, &vmesa->back))
+	 return GL_FALSE;
+   }
+   else {
+      if (vmesa->back.map)
+	 via_free_draw_buffer(vmesa, &vmesa->back);
+      (void) memset( &vmesa->back, 0, sizeof( vmesa->back ) );
+   }
+
+
+   /* Allocate depth-buffer */
+   if ( vmesa->hasStencil || vmesa->hasDepth ) {
+      vmesa->depth.bpp = vmesa->depthBits;
+      if (vmesa->depth.bpp == 24)
+	 vmesa->depth.bpp = 32;
+
+      vmesa->depth.pitch = (buffer_align( vmesa->driDrawable->w ) * 
+			    (vmesa->depth.bpp/8)) + extra;
+      vmesa->depth.size = vmesa->depth.pitch * vmesa->driDrawable->h;
+
+      if (vmesa->depth.map)
+	 via_free_draw_buffer(vmesa, &vmesa->depth);
+      if (!via_alloc_draw_buffer(vmesa, &vmesa->depth)) {
+	 return GL_FALSE;
+      }
+   }
+   else {
+      if (vmesa->depth.map)
+   	 via_free_draw_buffer(vmesa, &vmesa->depth);
+      (void) memset( & vmesa->depth, 0, sizeof( vmesa->depth ) );
+   }
+
+   /* stencil buffer is same as depth buffer */
+   vmesa->stencil.handle = vmesa->depth.handle;
+   vmesa->stencil.size = vmesa->depth.size;
+   vmesa->stencil.offset = vmesa->depth.offset;
+   vmesa->stencil.index = vmesa->depth.index;
+   vmesa->stencil.pitch = vmesa->depth.pitch;
+   vmesa->stencil.bpp = vmesa->depth.bpp;
+   vmesa->stencil.map = vmesa->depth.map;
+   vmesa->stencil.orig = vmesa->depth.orig;
+   vmesa->stencil.origMap = vmesa->depth.origMap;
+
+   if( vmesa->viaScreen->width == vmesa->driDrawable->w && 
+       vmesa->viaScreen->height == vmesa->driDrawable->h ) {
+      vmesa->doPageFlip = vmesa->allowPageFlip;
+      if (vmesa->hasBack) {
+         assert(vmesa->back.pitch == vmesa->front.pitch);
+      }
+   }
+   else
+      vmesa->doPageFlip = GL_FALSE;
+
+   return GL_TRUE;
+}
+
+
+void viaReAllocateBuffers(GLcontext *ctx, GLframebuffer *drawbuffer,
+                          GLuint width, GLuint height)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    calculate_buffer_parameters(vmesa, drawbuffer, vmesa->driDrawable);
+
+    _mesa_resize_framebuffer(ctx, drawbuffer, width, height);
+}
+
+/* Extension strings exported by the Unichrome driver.
+ */
+static const struct dri_extension card_extensions[] =
+{
+    { "GL_ARB_multitexture",               NULL },
+    { "GL_ARB_point_parameters",           GL_ARB_point_parameters_functions },
+    { "GL_ARB_texture_env_add",            NULL },
+    { "GL_ARB_texture_env_combine",        NULL },
+/*    { "GL_ARB_texture_env_dot3",           NULL }, */
+    { "GL_ARB_texture_mirrored_repeat",    NULL },
+    { "GL_EXT_fog_coord",                  GL_EXT_fog_coord_functions },
+    { "GL_EXT_secondary_color",            GL_EXT_secondary_color_functions },
+    { "GL_EXT_stencil_wrap",               NULL },
+    { "GL_EXT_texture_env_combine",        NULL },
+/*    { "GL_EXT_texture_env_dot3",           NULL }, */
+    { "GL_EXT_texture_lod_bias",           NULL },
+    { "GL_NV_blend_square",                NULL },
+    { NULL,                                NULL }
+};
+
+extern const struct tnl_pipeline_stage _via_fastrender_stage;
+extern const struct tnl_pipeline_stage _via_render_stage;
+
+static const struct tnl_pipeline_stage *via_pipeline[] = {
+    &_tnl_vertex_transform_stage,
+    &_tnl_normal_transform_stage,
+    &_tnl_lighting_stage,
+    &_tnl_fog_coordinate_stage,
+    &_tnl_texgen_stage,
+    &_tnl_texture_transform_stage,
+    /* REMOVE: point attenuation stage */
+#if 1
+    &_via_fastrender_stage,     /* ADD: unclipped rastersetup-to-dma */
+#endif
+    &_tnl_render_stage,
+    0,
+};
+
+
+static const struct dri_debug_control debug_control[] =
+{
+    { "fall",  DEBUG_FALLBACKS },
+    { "tex",   DEBUG_TEXTURE },
+    { "ioctl", DEBUG_IOCTL },
+    { "prim",  DEBUG_PRIMS },
+    { "vert",  DEBUG_VERTS },
+    { "state", DEBUG_STATE },
+    { "verb",  DEBUG_VERBOSE },
+    { "dri",   DEBUG_DRI },
+    { "dma",   DEBUG_DMA },
+    { "san",   DEBUG_SANITY },
+    { "sync",  DEBUG_SYNC },
+    { "sleep", DEBUG_SLEEP },
+    { "pix",   DEBUG_PIXEL },
+    { "2d",    DEBUG_2D },
+    { NULL,    0 }
+};
+
+
+static GLboolean
+AllocateDmaBuffer(struct via_context *vmesa)
+{
+    if (vmesa->dma)
+        via_free_dma_buffer(vmesa);
+    
+    if (!via_alloc_dma_buffer(vmesa))
+        return GL_FALSE;
+
+    vmesa->dmaLow = 0;
+    vmesa->dmaCliprectAddr = ~0;
+    return GL_TRUE;
+}
+
+static void
+FreeBuffer(struct via_context *vmesa)
+{
+    if (vmesa->front.map && vmesa->drawType == GLX_PBUFFER_BIT)
+	via_free_draw_buffer(vmesa, &vmesa->front);
+
+    if (vmesa->back.map)
+        via_free_draw_buffer(vmesa, &vmesa->back);
+
+    if (vmesa->depth.map)
+        via_free_draw_buffer(vmesa, &vmesa->depth);
+
+    if (vmesa->breadcrumb.map)
+        via_free_draw_buffer(vmesa, &vmesa->breadcrumb);
+
+    if (vmesa->dma)
+        via_free_dma_buffer(vmesa);
+}
+
+
+GLboolean
+viaCreateContext(gl_api api,
+		 const __GLcontextModes *visual,
+                 __DRIcontext *driContextPriv,
+                 void *sharedContextPrivate)
+{
+    GLcontext *ctx, *shareCtx;
+    struct via_context *vmesa;
+    __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+    viaScreenPrivate *viaScreen = (viaScreenPrivate *)sPriv->private;
+    drm_via_sarea_t *saPriv = (drm_via_sarea_t *)
+        (((GLubyte *)sPriv->pSAREA) + viaScreen->sareaPrivOffset);
+    struct dd_function_table functions;
+
+    /* Allocate via context */
+    vmesa = (struct via_context *) CALLOC_STRUCT(via_context);
+    if (!vmesa) {
+        return GL_FALSE;
+    }
+
+    /* Parse configuration files.
+     */
+    driParseConfigFiles (&vmesa->optionCache, &viaScreen->optionCache,
+			 sPriv->myNum, "unichrome");
+
+    /* pick back buffer */
+    vmesa->hasBack = visual->doubleBufferMode;
+
+    switch(visual->depthBits) {
+    case 0:			
+       vmesa->hasDepth = GL_FALSE;
+       vmesa->depthBits = 0; 
+       vmesa->depth_max = 1.0;
+       break;
+    case 16:
+       vmesa->hasDepth = GL_TRUE;
+       vmesa->depthBits = visual->depthBits;
+       vmesa->have_hw_stencil = GL_FALSE;
+       vmesa->depth_max = (GLfloat)0xffff;
+       vmesa->depth_clear_mask = 0xf << 28;
+       vmesa->ClearDepth = 0xffff;
+       vmesa->polygon_offset_scale = 1.0 / vmesa->depth_max;
+       break;
+    case 24:
+       vmesa->hasDepth = GL_TRUE;
+       vmesa->depthBits = visual->depthBits;
+       vmesa->depth_max = (GLfloat) 0xffffff;
+       vmesa->depth_clear_mask = 0xe << 28;
+       vmesa->ClearDepth = 0xffffff00;
+
+       assert(visual->haveStencilBuffer);
+       assert(visual->stencilBits == 8);
+
+       vmesa->have_hw_stencil = GL_TRUE;
+       vmesa->stencilBits = visual->stencilBits;
+       vmesa->stencil_clear_mask = 0x1 << 28;
+       vmesa->polygon_offset_scale = 2.0 / vmesa->depth_max;
+       break;
+    case 32:
+       vmesa->hasDepth = GL_TRUE;
+       vmesa->depthBits = visual->depthBits;
+       assert(!visual->haveStencilBuffer);
+       vmesa->have_hw_stencil = GL_FALSE;
+       vmesa->depth_max = (GLfloat)0xffffffff;
+       vmesa->depth_clear_mask = 0xf << 28;
+       vmesa->ClearDepth = 0xffffffff;
+       vmesa->polygon_offset_scale = 2.0 / vmesa->depth_max;
+       break;
+    default:
+       assert(0); 
+       break;
+    }
+
+    make_empty_list(&vmesa->freed_tex_buffers);
+    make_empty_list(&vmesa->tex_image_list[VIA_MEM_VIDEO]);
+    make_empty_list(&vmesa->tex_image_list[VIA_MEM_AGP]);
+    make_empty_list(&vmesa->tex_image_list[VIA_MEM_SYSTEM]);
+
+    _mesa_init_driver_functions(&functions);
+    viaInitTextureFuncs(&functions);
+
+    /* Allocate the Mesa context */
+    if (sharedContextPrivate)
+        shareCtx = ((struct via_context *) sharedContextPrivate)->glCtx;
+    else
+        shareCtx = NULL;
+
+    vmesa->glCtx = _mesa_create_context(visual, shareCtx, &functions,
+					(void*) vmesa);
+    
+    vmesa->shareCtx = shareCtx;
+    
+    if (!vmesa->glCtx) {
+        FREE(vmesa);
+        return GL_FALSE;
+    }
+    driContextPriv->driverPrivate = vmesa;
+
+    ctx = vmesa->glCtx;
+
+    if (driQueryOptionb(&vmesa->optionCache, "excess_mipmap"))
+        ctx->Const.MaxTextureLevels = 11;
+    else
+        ctx->Const.MaxTextureLevels = 10;
+
+    ctx->Const.MaxTextureUnits = 2;
+    ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits;
+    ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits;
+
+    ctx->Const.MinLineWidth = 1.0;
+    ctx->Const.MinLineWidthAA = 1.0;
+    ctx->Const.MaxLineWidth = 1.0;
+    ctx->Const.MaxLineWidthAA = 1.0;
+    ctx->Const.LineWidthGranularity = 1.0;
+
+    ctx->Const.MinPointSize = 1.0;
+    ctx->Const.MinPointSizeAA = 1.0;
+    ctx->Const.MaxPointSize = 1.0;
+    ctx->Const.MaxPointSizeAA = 1.0;
+    ctx->Const.PointSizeGranularity = 1.0;
+
+    ctx->Const.MaxDrawBuffers = 1;
+
+    ctx->Driver.GetString = viaGetString;
+
+    ctx->DriverCtx = (void *)vmesa;
+    vmesa->glCtx = ctx;
+
+    /* Initialize the software rasterizer and helper modules.
+     */
+    _swrast_CreateContext(ctx);
+    _vbo_CreateContext(ctx);
+    _tnl_CreateContext(ctx);
+    _swsetup_CreateContext(ctx);
+
+    /* Install the customized pipeline:
+     */
+    _tnl_destroy_pipeline(ctx);
+    _tnl_install_pipeline(ctx, via_pipeline);
+
+    /* Configure swrast and T&L to match hardware characteristics:
+     */
+    _swrast_allow_pixel_fog(ctx, GL_FALSE);
+    _swrast_allow_vertex_fog(ctx, GL_TRUE);
+    _tnl_allow_pixel_fog(ctx, GL_FALSE);
+    _tnl_allow_vertex_fog(ctx, GL_TRUE);
+
+    vmesa->hHWContext = driContextPriv->hHWContext;
+    vmesa->driFd = sPriv->fd;
+    vmesa->driHwLock = &sPriv->pSAREA->lock;
+
+    vmesa->viaScreen = viaScreen;
+    vmesa->driScreen = sPriv;
+    vmesa->sarea = saPriv;
+
+    vmesa->renderIndex = ~0;
+    vmesa->setupIndex = ~0;
+    vmesa->hwPrimitive = GL_POLYGON+1;
+
+    /* KW: Hardwire this.  Was previously set bogusly in
+     * viaCreateBuffer.  Needs work before PBUFFER can be used:
+     */
+    vmesa->drawType = GLX_WINDOW_BIT;
+
+
+    _math_matrix_ctr(&vmesa->ViewportMatrix);
+
+    /* Do this early, before VIA_FLUSH_DMA can be called:
+     */
+    if (!AllocateDmaBuffer(vmesa)) {
+	fprintf(stderr ,"AllocateDmaBuffer fail\n");
+	FreeBuffer(vmesa);
+        FREE(vmesa);
+        return GL_FALSE;
+    }
+
+    /* Allocate a small piece of fb memory for synchronization:
+     */
+    vmesa->breadcrumb.bpp = 32;
+    vmesa->breadcrumb.pitch = buffer_align( 64 ) << 2;
+    vmesa->breadcrumb.size = vmesa->breadcrumb.pitch;
+
+    if (!via_alloc_draw_buffer(vmesa, &vmesa->breadcrumb)) {
+        fprintf(stderr ,"AllocateDmaBuffer fail\n");
+        FreeBuffer(vmesa);
+        FREE(vmesa);
+        return GL_FALSE;
+    }
+
+    driInitExtensions( ctx, card_extensions, GL_TRUE );
+    viaInitStateFuncs(ctx);
+    viaInitTriFuncs(ctx);
+    viaInitSpanFuncs(ctx);
+    viaInitIoctlFuncs(ctx);
+    viaInitState(ctx);
+        
+    if (getenv("VIA_DEBUG"))
+       VIA_DEBUG = driParseDebugString( getenv( "VIA_DEBUG" ),
+					debug_control );
+
+    if (getenv("VIA_NO_RAST") ||
+        driQueryOptionb(&vmesa->optionCache, "no_rast"))
+       FALLBACK(vmesa, VIA_FALLBACK_USER_DISABLE, 1);
+
+    if (getenv("VIA_PAGEFLIP"))
+       vmesa->allowPageFlip = 1;
+
+    (*sPriv->systemTime->getUST)( &vmesa->swap_ust );
+
+
+    vmesa->regMMIOBase = (GLuint *)((unsigned long)viaScreen->reg);
+    vmesa->pnGEMode = (GLuint *)((unsigned long)viaScreen->reg + 0x4);
+    vmesa->regEngineStatus = (GLuint *)((unsigned long)viaScreen->reg + 0x400);
+    vmesa->regTranSet = (GLuint *)((unsigned long)viaScreen->reg + 0x43C);
+    vmesa->regTranSpace = (GLuint *)((unsigned long)viaScreen->reg + 0x440);
+    vmesa->agpBase = viaScreen->agpBase;
+
+
+    return GL_TRUE;
+}
+
+void
+viaDestroyContext(__DRIcontext *driContextPriv)
+{
+    GET_CURRENT_CONTEXT(ctx);
+    struct via_context *vmesa =
+       (struct via_context *)driContextPriv->driverPrivate;
+    struct via_context *current = ctx ? VIA_CONTEXT(ctx) : NULL;
+
+    assert(vmesa); /* should never be null */
+
+    if (vmesa->driDrawable) {
+       viaWaitIdle(vmesa, GL_FALSE);
+
+       if (vmesa->doPageFlip) {
+	  LOCK_HARDWARE(vmesa);
+	  if (vmesa->pfCurrentOffset != 0) {
+	     fprintf(stderr, "%s - reset pf\n", __FUNCTION__);
+	     viaResetPageFlippingLocked(vmesa);
+	  }
+	  UNLOCK_HARDWARE(vmesa);
+       }
+    }
+
+    /* check if we're deleting the currently bound context */
+    if (vmesa == current) {
+      VIA_FLUSH_DMA(vmesa);
+      _mesa_make_current(NULL, NULL, NULL);
+    }
+
+    _swsetup_DestroyContext(vmesa->glCtx);
+    _tnl_DestroyContext(vmesa->glCtx);
+    _vbo_DestroyContext(vmesa->glCtx);
+    _swrast_DestroyContext(vmesa->glCtx);
+    /* free the Mesa context */
+    _mesa_destroy_context(vmesa->glCtx);
+    /* release our data */
+    FreeBuffer(vmesa);
+
+    assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_AGP]));
+    assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_VIDEO]));
+    assert (is_empty_list(&vmesa->tex_image_list[VIA_MEM_SYSTEM]));
+    assert (is_empty_list(&vmesa->freed_tex_buffers));
+
+    driDestroyOptionCache(&vmesa->optionCache);
+
+    FREE(vmesa);
+}
+
+
+void viaXMesaWindowMoved(struct via_context *vmesa)
+{
+   __DRIdrawable *const drawable = vmesa->driDrawable;
+   __DRIdrawable *const readable = vmesa->driReadable;
+   struct via_renderbuffer * draw_buffer;
+   struct via_renderbuffer * read_buffer;
+   GLuint bytePerPixel = vmesa->viaScreen->bitsPerPixel >> 3;
+
+   if (!drawable)
+      return;
+
+   draw_buffer =  (struct via_renderbuffer *) drawable->driverPrivate;
+   read_buffer =  (struct via_renderbuffer *) readable->driverPrivate;
+   
+   switch (vmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0]) {
+   case BUFFER_BACK_LEFT: 
+      if (drawable->numBackClipRects == 0) {
+	 vmesa->numClipRects = drawable->numClipRects;
+	 vmesa->pClipRects = drawable->pClipRects;
+      } 
+      else {
+	 vmesa->numClipRects = drawable->numBackClipRects;
+	 vmesa->pClipRects = drawable->pBackClipRects;
+      }
+      break;
+   case BUFFER_FRONT_LEFT:
+      vmesa->numClipRects = drawable->numClipRects;
+      vmesa->pClipRects = drawable->pClipRects;
+      break;
+   default:
+      vmesa->numClipRects = 0;
+      break;
+   }
+
+   if ((draw_buffer->drawW != drawable->w) 
+       || (draw_buffer->drawH != drawable->h)) {
+      calculate_buffer_parameters(vmesa, vmesa->glCtx->DrawBuffer,
+				  drawable);
+   }
+
+   draw_buffer->drawX = drawable->x;
+   draw_buffer->drawY = drawable->y;
+   draw_buffer->drawW = drawable->w;
+   draw_buffer->drawH = drawable->h;
+
+   if (drawable != readable) {
+      if ((read_buffer->drawW != readable->w) 
+	  || (read_buffer->drawH != readable->h)) {
+	 calculate_buffer_parameters(vmesa, vmesa->glCtx->ReadBuffer,
+				     readable);
+      }
+
+      read_buffer->drawX = readable->x;
+      read_buffer->drawY = readable->y;
+      read_buffer->drawW = readable->w;
+      read_buffer->drawH = readable->h;
+   }
+
+   vmesa->front.orig = (vmesa->front.offset + 
+			draw_buffer->drawY * vmesa->front.pitch + 
+			draw_buffer->drawX * bytePerPixel);
+
+   vmesa->front.origMap = (vmesa->front.map + 
+			draw_buffer->drawY * vmesa->front.pitch + 
+			draw_buffer->drawX * bytePerPixel);
+
+   vmesa->back.orig = (vmesa->back.offset +
+			draw_buffer->drawY * vmesa->back.pitch +
+			draw_buffer->drawX * bytePerPixel);
+
+   vmesa->back.origMap = (vmesa->back.map +
+			draw_buffer->drawY * vmesa->back.pitch +
+			draw_buffer->drawX * bytePerPixel);
+
+   vmesa->depth.orig = (vmesa->depth.offset +
+			draw_buffer->drawY * vmesa->depth.pitch +
+			draw_buffer->drawX * bytePerPixel);   
+
+   vmesa->depth.origMap = (vmesa->depth.map +
+			draw_buffer->drawY * vmesa->depth.pitch +
+			draw_buffer->drawX * bytePerPixel);
+
+   viaCalcViewport(vmesa->glCtx);
+}
+
+GLboolean
+viaUnbindContext(__DRIcontext *driContextPriv)
+{
+    return GL_TRUE;
+}
+
+GLboolean
+viaMakeCurrent(__DRIcontext *driContextPriv,
+               __DRIdrawable *driDrawPriv,
+               __DRIdrawable *driReadPriv)
+{
+    if (VIA_DEBUG & DEBUG_DRI) {
+	fprintf(stderr, "driContextPriv = %016lx\n", (unsigned long)driContextPriv);
+	fprintf(stderr, "driDrawPriv = %016lx\n", (unsigned long)driDrawPriv);    
+	fprintf(stderr, "driReadPriv = %016lx\n", (unsigned long)driReadPriv);
+    }	
+
+    if (driContextPriv) {
+        struct via_context *vmesa = 
+	   (struct via_context *)driContextPriv->driverPrivate;
+	GLcontext *ctx = vmesa->glCtx;
+        struct gl_framebuffer *drawBuffer, *readBuffer;
+
+        drawBuffer = (GLframebuffer *)driDrawPriv->driverPrivate;
+        readBuffer = (GLframebuffer *)driReadPriv->driverPrivate;
+
+       if ((vmesa->driDrawable != driDrawPriv)
+	   || (vmesa->driReadable != driReadPriv)) {
+	  if (driDrawPriv->swap_interval == (unsigned)-1) {
+	     driDrawPriv->vblFlags =
+		vmesa->viaScreen->irqEnabled ?
+		driGetDefaultVBlankFlags(&vmesa->optionCache) :
+		VBLANK_FLAG_NO_IRQ;
+
+	     driDrawableInitVBlank(driDrawPriv);
+	  }
+
+	  vmesa->driDrawable = driDrawPriv;
+	  vmesa->driReadable = driReadPriv;
+
+	  if ((drawBuffer->Width != driDrawPriv->w) 
+	      || (drawBuffer->Height != driDrawPriv->h)) {
+	     _mesa_resize_framebuffer(ctx, drawBuffer,
+				      driDrawPriv->w, driDrawPriv->h);
+	     drawBuffer->Initialized = GL_TRUE;
+	  }
+
+	  if (!calculate_buffer_parameters(vmesa, drawBuffer, driDrawPriv)) {
+	     return GL_FALSE;
+	  }
+
+	  if (driDrawPriv != driReadPriv) {
+	     if ((readBuffer->Width != driReadPriv->w)
+		 || (readBuffer->Height != driReadPriv->h)) {
+		_mesa_resize_framebuffer(ctx, readBuffer,
+					 driReadPriv->w, driReadPriv->h);
+		readBuffer->Initialized = GL_TRUE;
+	     }
+
+	     if (!calculate_buffer_parameters(vmesa, readBuffer, driReadPriv)) {
+		return GL_FALSE;
+	     }
+	  }
+       }
+
+        _mesa_make_current(vmesa->glCtx, drawBuffer, readBuffer);
+
+	ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] );
+	   
+        viaXMesaWindowMoved(vmesa);
+	ctx->Driver.Scissor(vmesa->glCtx,
+			    vmesa->glCtx->Scissor.X,
+			    vmesa->glCtx->Scissor.Y,
+			    vmesa->glCtx->Scissor.Width,
+			    vmesa->glCtx->Scissor.Height);
+    }
+    else {
+        _mesa_make_current(NULL, NULL, NULL);
+    }
+        
+    return GL_TRUE;
+}
+
+void viaGetLock(struct via_context *vmesa, GLuint flags)
+{
+    __DRIdrawable *dPriv = vmesa->driDrawable;
+    __DRIscreen *sPriv = vmesa->driScreen;
+
+    drmGetLock(vmesa->driFd, vmesa->hHWContext, flags);
+
+    DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv);
+    if (dPriv != vmesa->driReadable) {
+	DRI_VALIDATE_DRAWABLE_INFO(sPriv, vmesa->driReadable);
+    }
+
+    if (vmesa->sarea->ctxOwner != vmesa->hHWContext) {
+       vmesa->sarea->ctxOwner = vmesa->hHWContext;
+       vmesa->newEmitState = ~0;
+    }
+
+    if (vmesa->lastStamp != dPriv->lastStamp) {
+       viaXMesaWindowMoved(vmesa);
+       driUpdateFramebufferSize(vmesa->glCtx, dPriv);
+       vmesa->newEmitState = ~0;
+       vmesa->lastStamp = dPriv->lastStamp;
+    }
+
+    if (vmesa->doPageFlip &&
+	vmesa->pfCurrentOffset != vmesa->sarea->pfCurrentOffset) {
+       fprintf(stderr, "%s - reset pf\n", __FUNCTION__);
+       viaResetPageFlippingLocked(vmesa);
+    }
+}
+
+
+void
+viaSwapBuffers(__DRIdrawable *drawablePrivate)
+{
+    __DRIdrawable *dPriv = (__DRIdrawable *)drawablePrivate;
+
+    if (dPriv && 
+	dPriv->driContextPriv && 
+	dPriv->driContextPriv->driverPrivate) {
+        struct via_context *vmesa = 
+	   (struct via_context *)dPriv->driContextPriv->driverPrivate;
+        GLcontext *ctx = vmesa->glCtx;
+
+	_mesa_notifySwapBuffers(ctx);
+
+        if (ctx->Visual.doubleBufferMode) {
+            if (vmesa->doPageFlip) {
+                viaPageFlip(dPriv);
+            }
+            else {
+                viaCopyBuffer(dPriv);
+            }
+        }
+	else
+	    VIA_FLUSH_DMA(vmesa);
+    }
+    else {
+        _mesa_problem(NULL, "viaSwapBuffers: drawable has no context!\n");
+    }
+}
diff --git a/src/mesa/drivers/dri/unichrome/via_context.h b/src/mesa/drivers/dri/unichrome/via_context.h
new file mode 100644
index 0000000000..4e1ab3a6ca
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_context.h
@@ -0,0 +1,420 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _VIACONTEXT_H
+#define _VIACONTEXT_H
+
+#include "dri_util.h"
+
+#include "drm.h"
+
+#include "main/mtypes.h"
+#include "main/mm.h"
+#include "tnl/t_vertex.h"
+
+#include "via_screen.h"
+#include "via_tex.h"
+#include "via_drm.h"
+
+struct via_context;
+
+/* Chip tags.  These are used to group the adapters into
+ * related families.
+ */
+enum VIACHIPTAGS {
+    VIA_UNKNOWN = 0,
+    VIA_CLE266,
+    VIA_KM400,
+    VIA_K8M800,
+    VIA_PM800,
+    VIA_LAST
+};
+
+#define VIA_FALLBACK_TEXTURE           	0x1
+#define VIA_FALLBACK_DRAW_BUFFER       	0x2
+#define VIA_FALLBACK_READ_BUFFER       	0x4
+#define VIA_FALLBACK_COLORMASK         	0x8
+#define VIA_FALLBACK_SPECULAR          	0x20
+#define VIA_FALLBACK_LOGICOP           	0x40
+#define VIA_FALLBACK_RENDERMODE        	0x80
+#define VIA_FALLBACK_STENCIL           	0x100
+#define VIA_FALLBACK_BLEND_EQ          	0x200
+#define VIA_FALLBACK_BLEND_FUNC        	0x400
+#define VIA_FALLBACK_USER_DISABLE      	0x800
+#define VIA_FALLBACK_PROJ_TEXTURE      	0x1000
+#define VIA_FALLBACK_POLY_STIPPLE	0x2000
+
+#define VIA_DMA_BUFSIZ                  4096
+#define VIA_DMA_HIGHWATER               (VIA_DMA_BUFSIZ - 128)
+
+#define VIA_NO_CLIPRECTS 0x1
+
+
+/* Use the templated vertex formats:
+ */
+#define TAG(x) via##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+typedef void (*via_tri_func)(struct via_context *, viaVertex *, viaVertex *,
+                             viaVertex *);
+typedef void (*via_line_func)(struct via_context *, viaVertex *, viaVertex *);
+typedef void (*via_point_func)(struct via_context *, viaVertex *);
+
+/**
+ * Derived from gl_renderbuffer.
+ */
+struct via_renderbuffer {
+   struct gl_renderbuffer Base;  /* must be first! */
+   drm_handle_t handle;
+   drmSize size;
+   unsigned long offset;
+   unsigned long index;
+   GLuint pitch;
+   GLuint bpp;
+   char *map;
+   GLuint orig;		/* The drawing origin, 
+			 * at (drawX,drawY) in screen space.
+			 */
+   char *origMap;
+
+   int drawX;                   /* origin of drawable in draw buffer */
+   int drawY;    
+   int drawW;                  
+   int drawH;    
+
+   __DRIdrawable *dPriv;
+};
+
+
+#define VIA_MAX_TEXLEVELS	10
+
+struct via_tex_buffer {
+   struct via_tex_buffer *next, *prev;
+   struct via_texture_image *image;
+   unsigned long index;
+   unsigned long offset;
+   GLuint size;
+   GLuint memType;    
+   unsigned char *bufAddr;
+   GLuint texBase;
+   GLuint lastUsed;
+};
+
+
+
+struct via_texture_image {
+   struct gl_texture_image image;
+   struct via_tex_buffer *texMem;
+   GLint pitchLog2;
+};
+
+struct via_texture_object {
+   struct gl_texture_object obj; /* The "parent" object */
+
+   GLuint texelBytes;
+   GLuint memType;
+
+   GLuint regTexFM;
+   GLuint regTexWidthLog2[2];
+   GLuint regTexHeightLog2[2];
+   GLuint regTexBaseH[4];
+   struct {
+      GLuint baseL;
+      GLuint pitchLog2;
+   } regTexBaseAndPitch[12];
+
+   GLint firstLevel, lastLevel;  /* upload tObj->Image[first .. lastLevel] */
+};              
+
+
+
+struct via_context {
+   GLint refcount;   
+   GLcontext *glCtx;
+   GLcontext *shareCtx;
+
+   /* XXX These don't belong here.  They should be per-drawable state. */
+   struct via_renderbuffer front;
+   struct via_renderbuffer back;
+   struct via_renderbuffer depth;
+   struct via_renderbuffer stencil; /* mirrors depth */
+   struct via_renderbuffer breadcrumb;
+
+   GLboolean hasBack;
+   GLboolean hasDepth;
+   GLboolean hasStencil;
+   GLboolean hasAccum;
+   GLuint    depthBits;
+   GLuint    stencilBits;
+
+   GLboolean have_hw_stencil;
+   GLuint ClearDepth;
+   GLuint depth_clear_mask;
+   GLuint stencil_clear_mask;
+   GLfloat depth_max;
+   GLfloat polygon_offset_scale;
+
+   GLubyte    *dma;
+   viaRegion tex;
+    
+   /* Bit flag to keep 0track of fallbacks.
+    */
+   GLuint Fallback;
+
+   /* State for via_tris.c.
+    */
+   GLuint newState;            /* _NEW_* flags */
+   GLuint newEmitState;            /* _NEW_* flags */
+   GLuint newRenderState;            /* _NEW_* flags */
+
+   struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX];
+   GLuint vertex_attr_count;
+
+   GLuint setupIndex;
+   GLuint renderIndex;
+   GLmatrix ViewportMatrix;
+   GLenum renderPrimitive;
+   GLenum hwPrimitive;
+   GLenum hwShadeModel;
+   unsigned char *verts;
+
+   /* drmBufPtr dma_buffer;
+    */
+   GLuint dmaLow;
+   GLuint dmaCliprectAddr;
+   GLuint dmaLastPrim;
+   GLboolean useAgp;
+   
+
+   /* Fallback rasterization functions 
+    */
+   via_point_func drawPoint;
+   via_line_func drawLine;
+   via_tri_func drawTri;
+
+   /* Hardware register
+    */
+   GLuint regCmdA_End;
+   GLuint regCmdB;
+
+   GLuint regEnable;
+   GLuint regHFBBMSKL;
+   GLuint regHROP;
+
+   GLuint regHZWTMD;
+   GLuint regHSTREF;
+   GLuint regHSTMD;
+
+   GLuint regHATMD;
+   GLuint regHABLCsat;
+   GLuint regHABLCop;
+   GLuint regHABLAsat;
+   GLuint regHABLAop;
+   GLuint regHABLRCa;
+   GLuint regHABLRFCa;
+   GLuint regHABLRCbias;
+   GLuint regHABLRCb;
+   GLuint regHABLRFCb;
+   GLuint regHABLRAa;
+   GLuint regHABLRAb;
+   GLuint regHFogLF;
+   GLuint regHFogCL;
+   GLuint regHFogCH;
+
+   GLuint regHLP;
+   GLuint regHLPRF;
+   
+   GLuint regHTXnCLOD[2];
+   GLuint regHTXnTB[2];
+   GLuint regHTXnMPMD[2];
+   GLuint regHTXnTBLCsat[2];
+   GLuint regHTXnTBLCop[2];
+   GLuint regHTXnTBLMPfog[2];
+   GLuint regHTXnTBLAsat[2];
+   GLuint regHTXnTBLRCb[2];
+   GLuint regHTXnTBLRAa[2];
+   GLuint regHTXnTBLRFog[2];
+   GLuint regHTXnTBLRCa[2];
+   GLuint regHTXnTBLRCc[2];
+   GLuint regHTXnTBLRCbias[2];
+   GLuint regHTXnTBC[2];
+   GLuint regHTXnTRAH[2];
+
+   int vertexSize;
+   int hwVertexSize;
+   GLboolean ptexHack;
+   int coloroffset;
+   int specoffset;
+
+   GLint lastStamp;
+
+   GLuint ClearColor;
+   GLuint ClearMask;
+
+   /* DRI stuff
+    */
+   GLboolean doPageFlip;
+
+   struct via_renderbuffer *drawBuffer;
+
+   GLuint numClipRects;         /* cliprects for that buffer */
+   drm_clip_rect_t *pClipRects;
+
+   GLboolean scissor;
+   drm_clip_rect_t drawRect;
+   drm_clip_rect_t scissorRect;
+
+   drm_context_t hHWContext;
+   drm_hw_lock_t *driHwLock;
+   int driFd;
+
+   /**
+    * DRI drawable bound to this context for drawing.
+    */
+   __DRIdrawable	*driDrawable;
+
+   /**
+    * DRI drawable bound to this context for reading.
+    */
+   __DRIdrawable	*driReadable;
+
+   __DRIscreen *driScreen;
+   viaScreenPrivate *viaScreen;
+   drm_via_sarea_t *sarea;
+   volatile GLuint* regMMIOBase;
+   volatile GLuint* pnGEMode;
+   volatile GLuint* regEngineStatus;
+   volatile GLuint* regTranSet;
+   volatile GLuint* regTranSpace;
+   GLuint agpBase;
+   GLuint drawType;
+
+   GLuint nDoneFirstFlip;
+   GLuint agpFullCount;
+
+   GLboolean clearTexCache;
+   GLboolean thrashing;
+
+   /* Configuration cache
+    */
+   driOptionCache optionCache;
+
+   int64_t swap_ust;
+   int64_t swap_missed_ust;
+
+   GLuint swap_count;
+   GLuint swap_missed_count;
+
+
+   GLuint pfCurrentOffset;
+   GLboolean allowPageFlip;
+
+   GLuint lastBreadcrumbRead;
+   GLuint lastBreadcrumbWrite;
+   GLuint lastSwap[2];
+   GLuint lastDma;
+   
+   GLuint total_alloc[VIA_MEM_SYSTEM+1];
+
+   struct via_tex_buffer tex_image_list[VIA_MEM_SYSTEM+1];
+   struct via_tex_buffer freed_tex_buffers;
+   
+};
+
+
+
+#define VIA_CONTEXT(ctx)   ((struct via_context *)(ctx->DriverCtx))
+
+
+
+/* Lock the hardware and validate our state.  
+ */
+#define LOCK_HARDWARE(vmesa)                                	\
+	do {                                                    \
+    	    char __ret = 0;                                     \
+    	    DRM_CAS(vmesa->driHwLock, vmesa->hHWContext,        \
+        	(DRM_LOCK_HELD|vmesa->hHWContext), __ret);      \
+    	    if (__ret)                                          \
+        	viaGetLock(vmesa, 0);                           \
+	} while (0)
+
+
+/* Release the kernel lock.
+ */
+#define UNLOCK_HARDWARE(vmesa)                                  	\
+	DRM_UNLOCK(vmesa->driFd, vmesa->driHwLock, vmesa->hHWContext);	
+
+	
+
+extern GLuint VIA_DEBUG;
+
+#define DEBUG_TEXTURE	0x1
+#define DEBUG_STATE	0x2
+#define DEBUG_IOCTL	0x4
+#define DEBUG_PRIMS	0x8
+#define DEBUG_VERTS	0x10
+#define DEBUG_FALLBACKS	0x20
+#define DEBUG_VERBOSE	0x40
+#define DEBUG_DRI       0x80
+#define DEBUG_DMA       0x100
+#define DEBUG_SANITY    0x200
+#define DEBUG_SYNC      0x400
+#define DEBUG_SLEEP     0x800
+#define DEBUG_PIXEL     0x1000
+#define DEBUG_2D        0x2000
+
+
+extern void viaGetLock(struct via_context *vmesa, GLuint flags);
+extern void viaLock(struct via_context *vmesa, GLuint flags);
+extern void viaUnLock(struct via_context *vmesa, GLuint flags);
+extern void viaEmitHwStateLocked(struct via_context *vmesa);
+extern void viaEmitScissorValues(struct via_context *vmesa, int box_nr, int emit);
+extern void viaXMesaSetBackClipRects(struct via_context *vmesa);
+extern void viaXMesaSetFrontClipRects(struct via_context *vmesa);
+extern void viaReAllocateBuffers(GLcontext *ctx, GLframebuffer *drawbuffer, GLuint width, GLuint height);
+extern void viaXMesaWindowMoved(struct via_context *vmesa);
+
+extern GLboolean viaTexCombineState(struct via_context *vmesa,
+				    const struct gl_tex_env_combine_state * combine, 
+				    unsigned unit );
+
+/* Via hw already adjusted for GL pixel centers:
+ */
+#define SUBPIXEL_X 0
+#define SUBPIXEL_Y 0
+
+/* TODO XXX _SOLO temp defines to make code compilable */
+#ifndef GLX_PBUFFER_BIT
+#define GLX_PBUFFER_BIT        0x00000004
+#endif
+#ifndef GLX_WINDOW_BIT
+#define GLX_WINDOW_BIT 0x00000001
+#endif
+#ifndef VERT_BIT_CLIP
+#define VERT_BIT_CLIP       0x1000000
+#endif
+
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_drmclient.h b/src/mesa/drivers/dri/unichrome/via_drmclient.h
new file mode 100644
index 0000000000..7beff9a9d9
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_drmclient.h
@@ -0,0 +1,29 @@
+/*
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _VIA_DRMCLIENT_H_
+#define _VIA_DRMCLIENT_H_
+
+#define VIA_DMA_BUF_ORDER		12
+#define VIA_DMA_BUF_SZ 		        (1 << VIA_DMA_BUF_ORDER)
+#define VIA_DMA_BUF_NR                  256
+
+#endif				/* _VIA_DRMCLIENT_H_ */
diff --git a/src/mesa/drivers/dri/unichrome/via_fb.c b/src/mesa/drivers/dri/unichrome/via_fb.c
new file mode 100644
index 0000000000..bebf0619d0
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_fb.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "via_context.h"
+#include "via_ioctl.h"
+#include "via_fb.h"
+#include "xf86drm.h"
+#include "main/imports.h"
+#include "main/simple_list.h"
+#include <sys/ioctl.h>
+
+GLboolean
+via_alloc_draw_buffer(struct via_context *vmesa, struct via_renderbuffer *buf)
+{
+   drm_via_mem_t mem;
+   mem.context = vmesa->hHWContext;
+   mem.size = buf->size;
+   mem.type = VIA_MEM_VIDEO;
+   mem.offset = 0;
+   mem.index = 0;
+
+   if (ioctl(vmesa->driFd, DRM_IOCTL_VIA_ALLOCMEM, &mem)) 
+      return GL_FALSE;
+    
+    
+   buf->offset = mem.offset;
+   buf->map = (char *)vmesa->driScreen->pFB + mem.offset;
+   buf->index = mem.index;
+   return GL_TRUE;
+}
+
+void
+via_free_draw_buffer(struct via_context *vmesa, struct via_renderbuffer *buf)
+{
+   drm_via_mem_t mem;
+
+   if (!vmesa) return;
+
+   mem.context = vmesa->hHWContext;
+   mem.index = buf->index;
+   mem.type = VIA_MEM_VIDEO;
+   mem.offset = buf->offset;
+   mem.size = buf->size;
+
+   ioctl(vmesa->driFd, DRM_IOCTL_VIA_FREEMEM, &mem);
+   buf->map = NULL;
+}
+
+
+GLboolean
+via_alloc_dma_buffer(struct via_context *vmesa)
+{
+   drm_via_dma_init_t init;
+
+   vmesa->dma = (GLubyte *) malloc(VIA_DMA_BUFSIZ);
+    
+   /*
+    * Check whether AGP DMA has been initialized.
+    */
+   memset(&init, 0, sizeof(init));
+   init.func = VIA_DMA_INITIALIZED;
+
+   vmesa->useAgp = 
+     ( 0 == drmCommandWrite(vmesa->driFd, DRM_VIA_DMA_INIT, 
+			     &init, sizeof(init)));
+   if (VIA_DEBUG & DEBUG_DMA) {
+      if (vmesa->useAgp) 
+         fprintf(stderr, "unichrome_dri.so: Using AGP.\n");
+      else
+         fprintf(stderr, "unichrome_dri.so: Using PCI.\n");
+   }
+      
+   return ((vmesa->dma) ? GL_TRUE : GL_FALSE);
+}
+
+void
+via_free_dma_buffer(struct via_context *vmesa)
+{
+    if (!vmesa) return;
+    free(vmesa->dma);
+    vmesa->dma = 0;
+} 
+
+
+/* These functions now allocate and free the via_tex_buffer struct as well:
+ */
+struct via_tex_buffer *
+via_alloc_texture(struct via_context *vmesa,
+		  GLuint size,
+		  GLuint memType)
+{
+   struct via_tex_buffer *t = CALLOC_STRUCT(via_tex_buffer);
+   
+   if (!t)
+      goto cleanup;
+
+   t->size = size;
+   t->memType = memType;
+   insert_at_tail(&vmesa->tex_image_list[memType], t);
+
+   if (t->memType == VIA_MEM_AGP || 
+       t->memType == VIA_MEM_VIDEO) {
+      drm_via_mem_t fb;
+
+      fb.context = vmesa->hHWContext;
+      fb.size = t->size;
+      fb.type = t->memType;
+      fb.offset = 0;
+      fb.index = 0;
+
+      if (ioctl(vmesa->driFd, DRM_IOCTL_VIA_ALLOCMEM, &fb) != 0 || 
+	  fb.index == 0) 
+	 goto cleanup;
+
+      if (0)
+	 fprintf(stderr, "offset %lx index %lx\n", fb.offset, fb.index);
+
+      t->offset = fb.offset;
+      t->index = fb.index;
+      
+      if (t->memType == VIA_MEM_AGP) {
+	 t->bufAddr = (GLubyte *)((unsigned long)vmesa->viaScreen->agpLinearStart +
+				  fb.offset); 	
+	 t->texBase = vmesa->agpBase + fb.offset;
+      }
+      else {
+	 t->bufAddr = (GLubyte *)((unsigned long)vmesa->driScreen->pFB + fb.offset);
+	 t->texBase = fb.offset;
+      }
+
+      vmesa->total_alloc[t->memType] += t->size;
+      return t;
+   }
+   else if (t->memType == VIA_MEM_SYSTEM) {
+      
+      t->bufAddr = malloc(t->size);      
+      if (!t->bufAddr)
+	 goto cleanup;
+
+      vmesa->total_alloc[t->memType] += t->size;
+      return t;
+   }
+
+ cleanup:
+   if (t) {
+      remove_from_list(t);
+      FREE(t);
+   }
+
+   return NULL;
+}
+
+
+static void
+via_do_free_texture(struct via_context *vmesa, struct via_tex_buffer *t)
+{
+   drm_via_mem_t fb;
+
+   remove_from_list( t );
+
+   vmesa->total_alloc[t->memType] -= t->size;
+
+   fb.context = vmesa->hHWContext;
+   fb.index = t->index;
+   fb.offset = t->offset;
+   fb.type = t->memType;
+   fb.size = t->size;
+
+   if (ioctl(vmesa->driFd, DRM_IOCTL_VIA_FREEMEM, &fb)) {
+      fprintf(stderr, "via_free_texture fail\n");
+   }
+
+   FREE(t);
+}
+
+
+/* Release textures which were potentially still being referenced by
+ * hardware at the time when they were originally freed.
+ */
+void 
+via_release_pending_textures( struct via_context *vmesa )
+{
+   struct via_tex_buffer *s, *tmp;
+   
+   foreach_s( s, tmp, &vmesa->freed_tex_buffers ) {
+      if (!VIA_GEQ_WRAP(s->lastUsed, vmesa->lastBreadcrumbRead)) {
+	 if (VIA_DEBUG & DEBUG_TEXTURE)
+	    fprintf(stderr, "%s: release tex sz %d lastUsed %x\n",
+		    __FUNCTION__, s->size, s->lastUsed); 
+	 via_do_free_texture(vmesa, s);
+      }
+   }
+}
+      
+
+
+void
+via_free_texture(struct via_context *vmesa, struct via_tex_buffer *t)
+{
+   if (!t) {
+      return;
+   }
+   else if (t->memType == VIA_MEM_SYSTEM) {
+      remove_from_list(t);
+      vmesa->total_alloc[t->memType] -= t->size;
+      free(t->bufAddr);
+      free(t);
+   }
+   else if (t->index && viaCheckBreadcrumb(vmesa, t->lastUsed)) {
+      via_do_free_texture( vmesa, t );
+   }
+   else {
+      /* Close current breadcrumb so that we can free this eventually:
+       */
+      if (t->lastUsed == vmesa->lastBreadcrumbWrite) 
+	 viaEmitBreadcrumb(vmesa);
+
+      move_to_tail( &vmesa->freed_tex_buffers, t );
+   }
+}
diff --git a/src/mesa/drivers/dri/unichrome/via_fb.h b/src/mesa/drivers/dri/unichrome/via_fb.h
new file mode 100644
index 0000000000..2d329ac2a7
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_fb.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VIAFB_INC
+#define _VIAFB_INC
+
+#include "via_context.h"
+
+extern GLboolean via_alloc_draw_buffer(struct via_context *vmesa, struct via_renderbuffer *buf);
+extern GLboolean via_alloc_dma_buffer(struct via_context *vmesa);
+
+struct via_tex_buffer *
+via_alloc_texture(struct via_context *vmesa,
+		  GLuint size,
+		  GLuint memType);
+
+extern void via_free_draw_buffer(struct via_context *vmesa, struct via_renderbuffer *buf);
+extern void via_free_dma_buffer(struct via_context *vmesa);
+extern void via_free_texture(struct via_context *vmesa, struct via_tex_buffer *t);
+void via_release_pending_textures( struct via_context *vmesa );
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_ioctl.c b/src/mesa/drivers/dri/unichrome/via_ioctl.c
new file mode 100644
index 0000000000..25aad1b204
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_ioctl.c
@@ -0,0 +1,981 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <stdio.h>
+#include <unistd.h>
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/dd.h"
+#include "swrast/swrast.h"
+
+#include "main/mm.h"
+#include "via_context.h"
+#include "via_tris.h"
+#include "via_ioctl.h"
+#include "via_fb.h"
+#include "via_3d_reg.h"
+
+#include "vblank.h"
+#include "drm.h"
+#include "xf86drm.h"
+#include <sys/ioctl.h>
+#include <errno.h>
+
+
+#define VIA_REG_STATUS          0x400
+#define VIA_REG_GEMODE          0x004
+#define VIA_REG_SRCBASE         0x030
+#define VIA_REG_DSTBASE         0x034
+#define VIA_REG_PITCH           0x038      
+#define VIA_REG_SRCCOLORKEY     0x01C      
+#define VIA_REG_KEYCONTROL      0x02C       
+#define VIA_REG_SRCPOS          0x008
+#define VIA_REG_DSTPOS          0x00C
+#define VIA_REG_GECMD           0x000
+#define VIA_REG_DIMENSION       0x010       /* width and height */
+#define VIA_REG_FGCOLOR         0x018
+
+#define VIA_GEM_8bpp            0x00000000
+#define VIA_GEM_16bpp           0x00000100
+#define VIA_GEM_32bpp           0x00000300
+#define VIA_GEC_BLT             0x00000001
+#define VIA_PITCH_ENABLE        0x80000000
+#define VIA_GEC_INCX            0x00000000
+#define VIA_GEC_DECY            0x00004000
+#define VIA_GEC_INCY            0x00000000
+#define VIA_GEC_DECX            0x00008000
+#define VIA_GEC_FIXCOLOR_PAT    0x00002000
+
+
+#define VIA_BLIT_CLEAR 0x00
+#define VIA_BLIT_COPY 0xCC
+#define VIA_BLIT_FILL 0xF0
+#define VIA_BLIT_SET 0xFF
+
+static void dump_dma( struct via_context *vmesa )
+{
+   GLuint i;
+   GLuint *data = (GLuint *)vmesa->dma;
+   for (i = 0; i < vmesa->dmaLow; i += 16) {
+      fprintf(stderr, "%04x:   ", i);
+      fprintf(stderr, "%08x  ", *data++);
+      fprintf(stderr, "%08x  ", *data++);
+      fprintf(stderr, "%08x  ", *data++);
+      fprintf(stderr, "%08x\n", *data++);
+   }
+   fprintf(stderr, "******************************************\n");
+}
+
+
+
+void viaCheckDma(struct via_context *vmesa, GLuint bytes)
+{
+    VIA_FINISH_PRIM( vmesa );
+    if (vmesa->dmaLow + bytes > VIA_DMA_HIGHWATER) {
+	viaFlushDma(vmesa);
+    }
+}
+
+
+
+#define SetReg2DAGP(nReg, nData) do {		\
+    OUT_RING( ((nReg) >> 2) | 0xF0000000 );	\
+    OUT_RING( nData );				\
+} while (0)
+
+
+static void viaBlit(struct via_context *vmesa, GLuint bpp,
+		    GLuint srcBase, GLuint srcPitch, 
+		    GLuint dstBase, GLuint dstPitch,
+		    GLuint w, GLuint h, 
+		    GLuint blitMode, 
+		    GLuint color, GLuint nMask ) 
+{
+
+    GLuint dwGEMode, srcX, dstX, cmd;
+    RING_VARS;
+
+    if (VIA_DEBUG & DEBUG_2D)
+       fprintf(stderr, 
+	       "%s bpp %d src %x/%x dst %x/%x w %d h %d "
+	       " mode: %x color: 0x%08x mask 0x%08x\n",
+	       __FUNCTION__, bpp, srcBase, srcPitch, dstBase,
+	       dstPitch, w,h, blitMode, color, nMask);
+
+
+    if (!w || !h)
+        return;
+
+    switch (bpp) {
+    case 16:
+        dwGEMode = VIA_GEM_16bpp;
+	srcX = (srcBase & 0x1f) >> 1;
+	dstX = (dstBase & 0x1f) >> 1;
+        break;
+    case 32:
+        dwGEMode = VIA_GEM_32bpp;
+	srcX = (srcBase & 0x1f) >> 2;
+	dstX = (dstBase & 0x1f) >> 2;
+	break;
+    default:
+        return;
+    }
+
+    switch(blitMode) {
+    case VIA_BLIT_FILL:
+	cmd = VIA_GEC_BLT | VIA_GEC_FIXCOLOR_PAT | (VIA_BLIT_FILL << 24);
+	break;
+    case VIA_BLIT_COPY:
+	cmd = VIA_GEC_BLT | (VIA_BLIT_COPY << 24);
+	break;
+    default:
+        return;
+    }	
+
+    BEGIN_RING(22);
+    SetReg2DAGP( VIA_REG_GEMODE, dwGEMode);
+    SetReg2DAGP( VIA_REG_FGCOLOR, color);
+    SetReg2DAGP( 0x2C, nMask);
+    SetReg2DAGP( VIA_REG_SRCBASE, (srcBase & ~0x1f) >> 3);
+    SetReg2DAGP( VIA_REG_DSTBASE, (dstBase & ~0x1f) >> 3);
+    SetReg2DAGP( VIA_REG_PITCH, VIA_PITCH_ENABLE |
+	       (srcPitch >> 3) | ((dstPitch >> 3) << 16));
+    SetReg2DAGP( VIA_REG_SRCPOS, srcX);
+    SetReg2DAGP( VIA_REG_DSTPOS, dstX);
+    SetReg2DAGP( VIA_REG_DIMENSION, (((h - 1) << 16) | (w - 1)));
+    SetReg2DAGP( VIA_REG_GECMD, cmd);
+    SetReg2DAGP( 0x2C, 0x00000000);
+    ADVANCE_RING();
+}
+
+static void viaFillBuffer(struct via_context *vmesa,
+			  struct via_renderbuffer *buffer,
+			  drm_clip_rect_t *pbox,
+			  int nboxes,
+			  GLuint pixel,
+			  GLuint mask)
+{
+   GLuint bytePerPixel = buffer->bpp >> 3;
+   GLuint i;
+
+   for (i = 0; i < nboxes ; i++) {        
+      int x = pbox[i].x1 - buffer->drawX;
+      int y = pbox[i].y1 - buffer->drawY;
+      int w = pbox[i].x2 - pbox[i].x1;
+      int h = pbox[i].y2 - pbox[i].y1;
+
+      int offset = (buffer->offset + 
+		    y * buffer->pitch + 
+		    x * bytePerPixel);
+
+      viaBlit(vmesa,
+	      buffer->bpp, 
+	      offset, buffer->pitch,
+	      offset, buffer->pitch, 
+	      w, h,
+	      VIA_BLIT_FILL, pixel, mask); 
+   }
+}
+
+
+
+static void viaClear(GLcontext *ctx, GLbitfield mask)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   __DRIdrawable *dPriv = vmesa->driDrawable;
+   struct via_renderbuffer *const vrb = 
+     (struct via_renderbuffer *) dPriv->driverPrivate;
+   int flag = 0;
+   GLuint i = 0;
+   GLuint clear_depth_mask = 0xf << 28;
+   GLuint clear_depth = 0;
+
+   VIA_FLUSH_DMA(vmesa);
+
+   if (mask & BUFFER_BIT_FRONT_LEFT) {
+      flag |= VIA_FRONT;
+      mask &= ~BUFFER_BIT_FRONT_LEFT;
+   }
+
+   if (mask & BUFFER_BIT_BACK_LEFT) {
+      flag |= VIA_BACK;	
+      mask &= ~BUFFER_BIT_BACK_LEFT;
+   }
+
+   if (mask & BUFFER_BIT_DEPTH) {
+      flag |= VIA_DEPTH;
+      clear_depth = (GLuint)(ctx->Depth.Clear * vmesa->ClearDepth);
+      clear_depth_mask &= ~vmesa->depth_clear_mask;
+      mask &= ~BUFFER_BIT_DEPTH;
+   }
+    
+   if (mask & BUFFER_BIT_STENCIL) {
+      if (vmesa->have_hw_stencil) {
+	 if ((ctx->Stencil.WriteMask[0] & 0xff) == 0xff) {
+	    flag |= VIA_DEPTH;
+	    clear_depth &= ~0xff;
+	    clear_depth |= (ctx->Stencil.Clear & 0xff);
+	    clear_depth_mask &= ~vmesa->stencil_clear_mask;
+	    mask &= ~BUFFER_BIT_STENCIL;
+	 }
+	 else {
+	    if (VIA_DEBUG & DEBUG_2D)
+	       fprintf(stderr, "Clear stencil writemask %x\n", 
+		       ctx->Stencil.WriteMask[0]);
+	 }
+      }
+   }
+
+   /* 16bpp doesn't support masked clears */
+   if (vmesa->viaScreen->bytesPerPixel == 2 &&
+       vmesa->ClearMask & 0xf0000000) {
+      if (flag & VIA_FRONT)
+         mask |= BUFFER_BIT_FRONT_LEFT;
+      if (flag & VIA_BACK)
+         mask |= BUFFER_BIT_BACK_LEFT;
+      flag &= ~(VIA_FRONT | VIA_BACK);
+   }
+    
+   if (flag) {
+      drm_clip_rect_t *boxes, *tmp_boxes = 0;
+      int nr = 0;
+      GLint cx, cy, cw, ch;
+      GLboolean all;
+
+      LOCK_HARDWARE(vmesa);
+	    
+      /* get region after locking: */
+      cx = ctx->DrawBuffer->_Xmin;
+      cy = ctx->DrawBuffer->_Ymin;
+      cw = ctx->DrawBuffer->_Xmax - cx;
+      ch = ctx->DrawBuffer->_Ymax - cy;
+      all = (cw == ctx->DrawBuffer->Width && ch == ctx->DrawBuffer->Height);
+
+      /* flip top to bottom */
+      cy = dPriv->h - cy - ch;
+      cx += vrb->drawX;
+      cy += vrb->drawY;
+        
+      if (!all) {
+	 drm_clip_rect_t *b = vmesa->pClipRects;	 
+	 
+	 boxes = tmp_boxes = 
+	    (drm_clip_rect_t *)malloc(vmesa->numClipRects * 
+				      sizeof(drm_clip_rect_t)); 
+	 if (!boxes) {
+	    UNLOCK_HARDWARE(vmesa);
+	    return;
+	 }
+
+	 for (; i < vmesa->numClipRects; i++) {
+	    GLint x = b[i].x1;
+	    GLint y = b[i].y1;
+	    GLint w = b[i].x2 - x;
+	    GLint h = b[i].y2 - y;
+
+	    if (x < cx) w -= cx - x, x = cx;
+	    if (y < cy) h -= cy - y, y = cy;
+	    if (x + w > cx + cw) w = cx + cw - x;
+	    if (y + h > cy + ch) h = cy + ch - y;
+	    if (w <= 0) continue;
+	    if (h <= 0) continue;
+
+	    boxes[nr].x1 = x;
+	    boxes[nr].y1 = y;
+	    boxes[nr].x2 = x + w;
+	    boxes[nr].y2 = y + h;
+	    nr++;
+	 }
+      }
+      else {
+	 boxes = vmesa->pClipRects;
+	 nr = vmesa->numClipRects;
+      }
+	    
+      if (flag & VIA_FRONT) {
+	 viaFillBuffer(vmesa, &vmesa->front, boxes, nr, vmesa->ClearColor,
+		       vmesa->ClearMask);
+      } 
+		
+      if (flag & VIA_BACK) {
+	 viaFillBuffer(vmesa, &vmesa->back, boxes, nr, vmesa->ClearColor, 
+		       vmesa->ClearMask);
+      }
+
+      if (flag & VIA_DEPTH) {
+	 viaFillBuffer(vmesa, &vmesa->depth, boxes, nr, clear_depth,
+		       clear_depth_mask);
+      }		
+
+      viaFlushDmaLocked(vmesa, VIA_NO_CLIPRECTS);
+      UNLOCK_HARDWARE(vmesa);
+
+      if (tmp_boxes)
+	 free(tmp_boxes);
+   }
+   
+   if (mask)
+      _swrast_Clear(ctx, mask);
+}
+
+
+
+
+static void viaDoSwapBuffers(struct via_context *vmesa,
+			     drm_clip_rect_t *b,
+			     GLuint nbox)
+{    
+   GLuint bytePerPixel = vmesa->viaScreen->bitsPerPixel >> 3;
+   struct via_renderbuffer *front = &vmesa->front;
+   struct via_renderbuffer *back = &vmesa->back;
+   GLuint i;
+        
+   for (i = 0; i < nbox; i++, b++) {        
+      GLint x = b->x1 - back->drawX;
+      GLint y = b->y1 - back->drawY;
+      GLint w = b->x2 - b->x1;
+      GLint h = b->y2 - b->y1;
+	
+      GLuint src = back->offset + y * back->pitch + x * bytePerPixel;
+      GLuint dest = front->offset + y * front->pitch + x * bytePerPixel;
+
+      viaBlit(vmesa, 
+	      bytePerPixel << 3, 
+	      src, back->pitch,
+	      dest, front->pitch,
+	      w, h,
+	      VIA_BLIT_COPY, 0, 0); 
+   }
+
+   viaFlushDmaLocked(vmesa, VIA_NO_CLIPRECTS); /* redundant */
+}
+
+
+static void viaEmitBreadcrumbLocked( struct via_context *vmesa )
+{
+   struct via_renderbuffer *buffer = &vmesa->breadcrumb;
+   GLuint value = vmesa->lastBreadcrumbWrite + 1;
+
+   if (VIA_DEBUG & DEBUG_IOCTL) 
+      fprintf(stderr, "%s %d\n", __FUNCTION__, value);
+
+   assert(!vmesa->dmaLow);
+
+   viaBlit(vmesa,
+	   buffer->bpp, 
+	   buffer->offset, buffer->pitch,
+	   buffer->offset, buffer->pitch, 
+	   1, 1,
+	   VIA_BLIT_FILL, value, 0); 
+
+   viaFlushDmaLocked(vmesa, VIA_NO_CLIPRECTS); /* often redundant */
+   vmesa->lastBreadcrumbWrite = value;
+}
+
+void viaEmitBreadcrumb( struct via_context *vmesa )
+{
+   LOCK_HARDWARE(vmesa);
+   if (vmesa->dmaLow) 
+      viaFlushDmaLocked(vmesa, 0);
+
+   viaEmitBreadcrumbLocked( vmesa );
+   UNLOCK_HARDWARE(vmesa);
+}
+
+static GLboolean viaCheckIdle( struct via_context *vmesa )
+{
+   if ((vmesa->regEngineStatus[0] & 0xFFFEFFFF) == 0x00020000) {
+      return GL_TRUE;
+   }
+   return GL_FALSE;
+}
+
+
+GLboolean viaCheckBreadcrumb( struct via_context *vmesa, GLuint value )
+{
+   GLuint *buf = (GLuint *)vmesa->breadcrumb.map; 
+   vmesa->lastBreadcrumbRead = *buf;
+
+   if (VIA_DEBUG & DEBUG_IOCTL) 
+      fprintf(stderr, "%s %d < %d: %d\n", __FUNCTION__, value, 
+	      vmesa->lastBreadcrumbRead,
+	      !VIA_GEQ_WRAP(value, vmesa->lastBreadcrumbRead));
+
+   return !VIA_GEQ_WRAP(value, vmesa->lastBreadcrumbRead);
+}
+
+static void viaWaitBreadcrumb( struct via_context *vmesa, GLuint value )
+{
+   if (VIA_DEBUG & DEBUG_IOCTL) 
+      fprintf(stderr, "%s %d\n", __FUNCTION__, value);
+
+   assert(!VIA_GEQ_WRAP(value, vmesa->lastBreadcrumbWrite));
+
+   while (!viaCheckBreadcrumb( vmesa, value )) {
+      viaSwapOutWork( vmesa );
+      via_release_pending_textures( vmesa );
+   }
+}
+
+
+void viaWaitIdle( struct via_context *vmesa, GLboolean light )
+{
+   VIA_FLUSH_DMA(vmesa);
+
+   if (VIA_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s lastDma %d lastBreadcrumbWrite %d\n",
+	      __FUNCTION__, vmesa->lastDma, vmesa->lastBreadcrumbWrite);
+
+   /* Need to emit a new breadcrumb?
+    */
+   if (vmesa->lastDma == vmesa->lastBreadcrumbWrite) {
+      LOCK_HARDWARE(vmesa);
+      viaEmitBreadcrumbLocked( vmesa );
+      UNLOCK_HARDWARE(vmesa);
+   }
+
+   /* Need to wait?
+    */
+   if (VIA_GEQ_WRAP(vmesa->lastDma, vmesa->lastBreadcrumbRead)) 
+      viaWaitBreadcrumb( vmesa, vmesa->lastDma );
+
+   if (light) return;
+
+   LOCK_HARDWARE(vmesa);
+   while(!viaCheckIdle(vmesa))
+      ;
+   UNLOCK_HARDWARE(vmesa);
+   via_release_pending_textures(vmesa);
+}
+
+
+void viaWaitIdleLocked( struct via_context *vmesa, GLboolean light )
+{
+   if (vmesa->dmaLow) 
+      viaFlushDmaLocked(vmesa, 0);
+
+   if (VIA_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "%s lastDma %d lastBreadcrumbWrite %d\n",
+	      __FUNCTION__, vmesa->lastDma, vmesa->lastBreadcrumbWrite);
+
+   /* Need to emit a new breadcrumb?
+    */
+   if (vmesa->lastDma == vmesa->lastBreadcrumbWrite) {
+      viaEmitBreadcrumbLocked( vmesa );
+   }
+
+   /* Need to wait?
+    */
+   if (vmesa->lastDma >= vmesa->lastBreadcrumbRead) 
+      viaWaitBreadcrumb( vmesa, vmesa->lastDma );
+
+   if (light) return;
+
+   while(!viaCheckIdle(vmesa))
+      ;
+
+   via_release_pending_textures(vmesa);
+}
+
+
+
+/* Wait for command stream to be processed *and* the next vblank to
+ * occur.  Equivalent to calling WAIT_IDLE() and then WaitVBlank,
+ * except that WAIT_IDLE() will spin the CPU polling, while this is
+ * IRQ driven.
+ */
+static void viaWaitIdleVBlank(  __DRIdrawable *dPriv, 
+			       struct via_context *vmesa,
+			       GLuint value )
+{
+   GLboolean missed_target;
+   __DRIscreen *psp = dPriv->driScreenPriv;
+
+   VIA_FLUSH_DMA(vmesa); 
+
+   if (!value)
+      return;
+
+   do {
+      if (value < vmesa->lastBreadcrumbRead ||
+	  vmesa->thrashing)
+	 viaSwapOutWork(vmesa);
+
+      driWaitForVBlank( dPriv, & missed_target );
+      if ( missed_target ) {
+	 vmesa->swap_missed_count++;
+	 (*psp->systemTime->getUST)( &vmesa->swap_missed_ust );
+      }
+   } 
+   while (!viaCheckBreadcrumb(vmesa, value));	 
+
+   vmesa->thrashing = 0;	/* reset flag on swap */
+   vmesa->swap_count++;   
+   via_release_pending_textures( vmesa );
+}
+
+
+
+static void viaDoPageFlipLocked(struct via_context *vmesa, GLuint offset)
+{
+   RING_VARS;
+
+   if (VIA_DEBUG & DEBUG_2D)
+      fprintf(stderr, "%s %x\n", __FUNCTION__, offset);
+
+   if (!vmesa->nDoneFirstFlip) {
+      vmesa->nDoneFirstFlip = GL_TRUE;
+      BEGIN_RING(4);
+      OUT_RING(HALCYON_HEADER2);
+      OUT_RING(0x00fe0000);
+      OUT_RING(0x0000000e);
+      OUT_RING(0x0000000e);
+      ADVANCE_RING();
+   }
+
+   BEGIN_RING(4);
+   OUT_RING( HALCYON_HEADER2 );
+   OUT_RING( 0x00fe0000 );
+   OUT_RING((HC_SubA_HFBBasL << 24) | (offset & 0xFFFFF8) | 0x2);
+   OUT_RING((HC_SubA_HFBDrawFirst << 24) |
+	    ((offset & 0xFF000000) >> 24) | 0x0100);
+   ADVANCE_RING();
+
+   vmesa->pfCurrentOffset = vmesa->sarea->pfCurrentOffset = offset;
+
+   viaFlushDmaLocked(vmesa, VIA_NO_CLIPRECTS); /* often redundant */
+}
+
+void viaResetPageFlippingLocked(struct via_context *vmesa)
+{
+   if (VIA_DEBUG & DEBUG_2D)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   viaDoPageFlipLocked( vmesa, 0 );
+
+   if (vmesa->front.offset != 0) {
+      struct via_renderbuffer buffer_tmp;
+      memcpy(&buffer_tmp, &vmesa->back, sizeof(struct via_renderbuffer));
+      memcpy(&vmesa->back, &vmesa->front, sizeof(struct via_renderbuffer));
+      memcpy(&vmesa->front, &buffer_tmp, sizeof(struct via_renderbuffer));
+   }
+
+   assert(vmesa->front.offset == 0);
+   vmesa->doPageFlip = vmesa->allowPageFlip = 0;
+}
+
+
+/*
+ * Copy the back buffer to the front buffer. 
+ */
+void viaCopyBuffer(__DRIdrawable *dPriv)
+{
+   struct via_context *vmesa = 
+      (struct via_context *)dPriv->driContextPriv->driverPrivate;
+   __DRIscreen *psp = dPriv->driScreenPriv;
+
+   if (VIA_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, 
+	      "%s: lastSwap[1] %d lastSwap[0] %d lastWrite %d lastRead %d\n",
+	      __FUNCTION__,
+	      vmesa->lastSwap[1], 
+	      vmesa->lastSwap[0], 
+	      vmesa->lastBreadcrumbWrite,
+	      vmesa->lastBreadcrumbRead);
+
+   VIA_FLUSH_DMA(vmesa);
+
+   if (dPriv->vblFlags == VBLANK_FLAG_SYNC &&
+       vmesa->lastBreadcrumbWrite > 1)
+      viaWaitIdleVBlank(dPriv, vmesa, vmesa->lastBreadcrumbWrite-1);
+   else
+      viaWaitIdleVBlank(dPriv, vmesa, vmesa->lastSwap[1]);
+
+   LOCK_HARDWARE(vmesa);
+
+   /* Catch and cleanup situation where we were pageflipping but have
+    * stopped.
+    */
+   if (dPriv->numClipRects && vmesa->sarea->pfCurrentOffset != 0) {
+      viaResetPageFlippingLocked(vmesa);
+      UNLOCK_HARDWARE(vmesa);
+      return;
+   }
+
+   viaDoSwapBuffers(vmesa, dPriv->pClipRects, dPriv->numClipRects);
+   vmesa->lastSwap[1] = vmesa->lastSwap[0];
+   vmesa->lastSwap[0] = vmesa->lastBreadcrumbWrite;
+   viaEmitBreadcrumbLocked(vmesa);
+   UNLOCK_HARDWARE(vmesa);
+
+   (*psp->systemTime->getUST)( &vmesa->swap_ust );
+}
+
+
+void viaPageFlip(__DRIdrawable *dPriv)
+{
+    struct via_context *vmesa = 
+       (struct via_context *)dPriv->driContextPriv->driverPrivate;
+    struct via_renderbuffer buffer_tmp;
+    __DRIscreen *psp = dPriv->driScreenPriv;
+
+    VIA_FLUSH_DMA(vmesa);
+   if (dPriv->vblFlags == VBLANK_FLAG_SYNC &&
+       vmesa->lastBreadcrumbWrite > 1)
+      viaWaitIdleVBlank(dPriv, vmesa, vmesa->lastBreadcrumbWrite - 1);
+   else
+      viaWaitIdleVBlank(dPriv, vmesa, vmesa->lastSwap[0]);
+
+    LOCK_HARDWARE(vmesa);
+    viaDoPageFlipLocked(vmesa, vmesa->back.offset);
+    vmesa->lastSwap[1] = vmesa->lastSwap[0];
+    vmesa->lastSwap[0] = vmesa->lastBreadcrumbWrite;
+    viaEmitBreadcrumbLocked(vmesa);
+    UNLOCK_HARDWARE(vmesa);
+
+    (*psp->systemTime->getUST)( &vmesa->swap_ust );
+
+
+    /* KW: FIXME: When buffers are freed, could free frontbuffer by
+     * accident:
+     */
+    memcpy(&buffer_tmp, &vmesa->back, sizeof(struct via_renderbuffer));
+    memcpy(&vmesa->back, &vmesa->front, sizeof(struct via_renderbuffer));
+    memcpy(&vmesa->front, &buffer_tmp, sizeof(struct via_renderbuffer));
+}
+
+
+
+
+#define VIA_CMDBUF_MAX_LAG 50000
+
+static int fire_buffer(struct via_context *vmesa)
+{
+   drm_via_cmdbuffer_t bufI;
+   int ret;
+
+   bufI.buf = (char *)vmesa->dma;
+   bufI.size = vmesa->dmaLow;
+
+   if (vmesa->useAgp) {
+      drm_via_cmdbuf_size_t bSiz;
+
+      /* Do the CMDBUF_SIZE ioctl:
+       */
+      bSiz.func = VIA_CMDBUF_LAG;
+      bSiz.wait = 1;
+      bSiz.size = VIA_CMDBUF_MAX_LAG;
+      do {
+	 ret = drmCommandWriteRead(vmesa->driFd, DRM_VIA_CMDBUF_SIZE, 
+				   &bSiz, sizeof(bSiz));
+      } while (ret == -EAGAIN);
+      if (ret) {
+	 UNLOCK_HARDWARE(vmesa);
+	 fprintf(stderr, "%s: DRM_VIA_CMDBUF_SIZE returned %d\n",
+		 __FUNCTION__, ret);
+	 abort();
+	 return ret;
+      }
+
+      /* Actually fire the buffer:
+       */
+      do {
+	 ret = drmCommandWrite(vmesa->driFd, DRM_VIA_CMDBUFFER, 
+			       &bufI, sizeof(bufI));
+      } while (ret == -EAGAIN);
+      if (ret) {
+	 UNLOCK_HARDWARE(vmesa);
+	 fprintf(stderr, "%s: DRM_VIA_CMDBUFFER returned %d\n",
+		 __FUNCTION__, ret);
+	 abort();
+	 /* If this fails, the original code fell back to the PCI path. 
+	  */
+      }
+      else 
+	 return 0;
+
+      /* Fall through to PCI handling?!?
+       */
+      viaWaitIdleLocked(vmesa, GL_FALSE);
+   }
+	    
+   ret = drmCommandWrite(vmesa->driFd, DRM_VIA_PCICMD, &bufI, sizeof(bufI));
+   if (ret) {
+      UNLOCK_HARDWARE(vmesa);
+      dump_dma(vmesa);
+      fprintf(stderr, "%s: DRM_VIA_PCICMD returned %d\n", __FUNCTION__, ret); 
+      abort();
+   }
+
+   return ret;
+}
+
+
+/* Inserts the surface addresss and active cliprects one at a time
+ * into the head of the DMA buffer being flushed.  Fires the buffer
+ * for each cliprect.
+ */
+static void via_emit_cliprect(struct via_context *vmesa,
+			      drm_clip_rect_t *b) 
+{
+   struct via_renderbuffer *buffer = vmesa->drawBuffer;
+   GLuint *vb = (GLuint *)(vmesa->dma + vmesa->dmaCliprectAddr);
+
+   GLuint format = (vmesa->viaScreen->bitsPerPixel == 0x20 
+		    ? HC_HDBFM_ARGB8888 
+		    : HC_HDBFM_RGB565);
+
+   GLuint pitch = buffer->pitch;
+   GLuint offset = buffer->offset;
+
+   if (0)
+      fprintf(stderr, "emit cliprect for box %d,%d %d,%d\n", 
+	      b->x1, b->y1, b->x2, b->y2);
+
+   vb[0] = HC_HEADER2;
+   vb[1] = (HC_ParaType_NotTex << 16);
+
+   assert(vmesa->driDrawable);
+
+   if (vmesa->driDrawable->w == 0 || vmesa->driDrawable->h == 0) {
+      vb[2] = (HC_SubA_HClipTB << 24) | 0x0;
+      vb[3] = (HC_SubA_HClipLR << 24) | 0x0;
+   }
+   else {
+      vb[2] = (HC_SubA_HClipTB << 24) | (b->y1 << 12) | b->y2;
+      vb[3] = (HC_SubA_HClipLR << 24) | (b->x1 << 12) | b->x2;
+   }
+	    
+   vb[4] = (HC_SubA_HDBBasL << 24) | (offset & 0xFFFFFF);
+   vb[5] = (HC_SubA_HDBBasH << 24) | ((offset & 0xFF000000) >> 24); 
+
+   vb[6] = (HC_SubA_HSPXYOS << 24);
+   vb[7] = (HC_SubA_HDBFM << 24) | HC_HDBLoc_Local | format | pitch;
+}
+
+
+
+static int intersect_rect(drm_clip_rect_t *out,
+                          drm_clip_rect_t *a,
+                          drm_clip_rect_t *b)
+{
+    *out = *a;
+    
+    if (0)
+       fprintf(stderr, "intersect %d,%d %d,%d and %d,%d %d,%d\n", 
+	       a->x1, a->y1, a->x2, a->y2,
+	       b->x1, b->y1, b->x2, b->y2);
+
+    if (b->x1 > out->x1) out->x1 = b->x1;
+    if (b->x2 < out->x2) out->x2 = b->x2;
+    if (out->x1 >= out->x2) return 0;
+
+    if (b->y1 > out->y1) out->y1 = b->y1;
+    if (b->y2 < out->y2) out->y2 = b->y2;
+    if (out->y1 >= out->y2) return 0;
+
+    return 1;
+}
+
+void viaFlushDmaLocked(struct via_context *vmesa, GLuint flags)
+{
+   int i;
+   RING_VARS;
+
+   if (VIA_DEBUG & (DEBUG_IOCTL|DEBUG_DMA))
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (*(GLuint *)vmesa->driHwLock != (DRM_LOCK_HELD|vmesa->hHWContext) &&
+       *(GLuint *)vmesa->driHwLock != 
+       (DRM_LOCK_HELD|DRM_LOCK_CONT|vmesa->hHWContext)) {
+      fprintf(stderr, "%s called without lock held\n", __FUNCTION__);
+      abort();
+   }
+
+   if (vmesa->dmaLow == 0) {
+      return;
+   }
+
+   assert(vmesa->dmaLastPrim == 0);
+
+   /* viaFinishPrimitive can add up to 8 bytes beyond VIA_DMA_HIGHWATER:
+    */
+   if (vmesa->dmaLow > VIA_DMA_HIGHWATER + 8) {
+      fprintf(stderr, "buffer overflow in Flush Prims = %d\n",vmesa->dmaLow);
+      abort();
+   }
+
+   switch (vmesa->dmaLow & 0x1F) {	
+   case 8:
+      BEGIN_RING_NOCHECK( 6 );
+      OUT_RING( HC_HEADER2 );
+      OUT_RING( (HC_ParaType_NotTex << 16) );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      ADVANCE_RING();
+      break;
+   case 16:
+      BEGIN_RING_NOCHECK( 4 );
+      OUT_RING( HC_HEADER2 );
+      OUT_RING( (HC_ParaType_NotTex << 16) );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      ADVANCE_RING();
+      break;    
+   case 24:
+      BEGIN_RING_NOCHECK( 10 );
+      OUT_RING( HC_HEADER2 );
+      OUT_RING( (HC_ParaType_NotTex << 16) );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );
+      OUT_RING( HC_DUMMY );	
+      ADVANCE_RING();
+      break;    
+   case 0:
+      break;
+   default:
+      if (VIA_DEBUG & DEBUG_IOCTL)
+	 fprintf(stderr, "%s: unaligned value for vmesa->dmaLow: %x\n",
+		 __FUNCTION__, vmesa->dmaLow);
+   }
+
+   vmesa->lastDma = vmesa->lastBreadcrumbWrite;
+
+   if (VIA_DEBUG & DEBUG_DMA)
+      dump_dma( vmesa );
+
+   if (flags & VIA_NO_CLIPRECTS) {
+      if (0) fprintf(stderr, "%s VIA_NO_CLIPRECTS\n", __FUNCTION__);
+      assert(vmesa->dmaCliprectAddr == ~0);
+      fire_buffer( vmesa );
+   }
+   else if (vmesa->dmaCliprectAddr == ~0) {
+      /* Contains only state.  Could just dump the packet?
+       */
+      if (0) fprintf(stderr, "%s: no dmaCliprectAddr\n", __FUNCTION__);
+      if (0) fire_buffer( vmesa );
+   }
+   else if (vmesa->numClipRects) {
+      drm_clip_rect_t *pbox = vmesa->pClipRects;
+
+      for (i = 0; i < vmesa->numClipRects; i++) {
+	 drm_clip_rect_t b;
+
+	 b.x1 = pbox[i].x1;
+	 b.x2 = pbox[i].x2;
+	 b.y1 = pbox[i].y1;
+	 b.y2 = pbox[i].y2;
+
+	 if (vmesa->scissor &&
+	     !intersect_rect(&b, &b, &vmesa->scissorRect)) 
+	    continue;
+
+	 via_emit_cliprect(vmesa, &b);
+
+	 if (fire_buffer(vmesa) != 0) {
+	    dump_dma( vmesa );
+	    goto done;
+	 }
+      }
+   } else {
+      if (0) fprintf(stderr, "%s: no cliprects\n", __FUNCTION__);
+      UNLOCK_HARDWARE(vmesa);
+      sched_yield();
+      LOCK_HARDWARE(vmesa);
+   }
+
+ done:
+   /* Reset vmesa vars:
+    */
+   vmesa->dmaLow = 0;
+   vmesa->dmaCliprectAddr = ~0;
+   vmesa->newEmitState = ~0;
+}
+
+void viaWrapPrimitive( struct via_context *vmesa )
+{
+   GLenum renderPrimitive = vmesa->renderPrimitive;
+   GLenum hwPrimitive = vmesa->hwPrimitive;
+
+   if (VIA_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__);
+   
+   if (vmesa->dmaLastPrim)
+      viaFinishPrimitive( vmesa );
+   
+   viaFlushDma(vmesa);
+
+   if (renderPrimitive != GL_POLYGON + 1)
+      viaRasterPrimitive( vmesa->glCtx,
+			  renderPrimitive,
+			  hwPrimitive );
+
+}
+
+void viaFlushDma(struct via_context *vmesa)
+{
+   if (vmesa->dmaLow) {
+      assert(!vmesa->dmaLastPrim);
+
+      LOCK_HARDWARE(vmesa); 
+      viaFlushDmaLocked(vmesa, 0);
+      UNLOCK_HARDWARE(vmesa);
+   }
+}
+
+static void viaFlush(GLcontext *ctx)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    VIA_FLUSH_DMA(vmesa);
+}
+
+static void viaFinish(GLcontext *ctx)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    VIA_FLUSH_DMA(vmesa);
+    viaWaitIdle(vmesa, GL_FALSE);
+}
+
+static void viaClearStencil(GLcontext *ctx,  int s)
+{
+    return;
+}
+
+void viaInitIoctlFuncs(GLcontext *ctx)
+{
+    ctx->Driver.Flush = viaFlush;
+    ctx->Driver.Clear = viaClear;
+    ctx->Driver.Finish = viaFinish;
+    ctx->Driver.ClearStencil = viaClearStencil;
+}
+
+
+
diff --git a/src/mesa/drivers/dri/unichrome/via_ioctl.h b/src/mesa/drivers/dri/unichrome/via_ioctl.h
new file mode 100644
index 0000000000..c6b32cf085
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_ioctl.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VIAIOCTL_H
+#define _VIAIOCTL_H
+
+#include "via_context.h"
+
+
+void viaFinishPrimitive(struct via_context *vmesa);
+void viaFlushDma(struct via_context *vmesa);
+void viaFlushDmaLocked(struct via_context *vmesa, GLuint flags);
+
+void viaInitIoctlFuncs(GLcontext *ctx);
+void viaCopyBuffer(__DRIdrawable *dpriv);
+void viaPageFlip(__DRIdrawable *dpriv);
+void viaCheckDma(struct via_context *vmesa, GLuint bytes);
+void viaResetPageFlippingLocked(struct via_context *vmesa);
+void viaWaitIdle(struct via_context *vmesa, GLboolean light);
+void viaWaitIdleLocked(struct via_context *vmesa, GLboolean light);
+
+GLboolean viaCheckBreadcrumb( struct via_context *vmesa, GLuint value );
+void viaEmitBreadcrumb( struct via_context *vmesa );
+
+
+#define VIA_FINISH_PRIM(vmesa) do {		\
+   if (vmesa->dmaLastPrim)			\
+      viaFinishPrimitive( vmesa );		\
+} while (0)
+
+#define VIA_FLUSH_DMA(vmesa) do {		\
+   VIA_FINISH_PRIM(vmesa);			\
+   if (vmesa->dmaLow) 		\
+      viaFlushDma(vmesa);			\
+} while (0)
+    
+
+void viaWrapPrimitive( struct via_context *vmesa );
+
+static INLINE GLuint *viaAllocDma(struct via_context *vmesa, int bytes)
+{
+   if (vmesa->dmaLow + bytes > VIA_DMA_HIGHWATER) {
+      viaFlushDma(vmesa);
+   }
+
+   {
+      GLuint *start = (GLuint *)(vmesa->dma + vmesa->dmaLow);
+      vmesa->dmaLow += bytes;
+      return start;
+   }
+}
+
+
+static GLuint INLINE *viaExtendPrimitive(struct via_context *vmesa, int bytes)
+{
+   if (vmesa->dmaLow + bytes > VIA_DMA_HIGHWATER) {
+      viaWrapPrimitive(vmesa);
+   }
+
+   {
+      GLuint *start = (GLuint *)(vmesa->dma + vmesa->dmaLow);
+      vmesa->dmaLow += bytes;
+      return start;
+   }
+}
+
+
+
+
+#define RING_VARS GLuint *_vb = 0, _nr, _x;
+
+#define BEGIN_RING(n) do {				\
+   if (_vb != 0) abort();				\
+   _vb = viaAllocDma(vmesa, (n) * sizeof(GLuint));	\
+   _nr = (n);						\
+   _x = 0;						\
+} while (0)
+
+#define BEGIN_RING_NOCHECK(n) do {			\
+   if (_vb != 0) abort();				\
+   _vb = (GLuint *)(vmesa->dma + vmesa->dmaLow);	\
+   vmesa->dmaLow += (n) * sizeof(GLuint);		\
+   _nr = (n);						\
+   _x = 0;						\
+} while (0)
+
+#define OUT_RING(n) _vb[_x++] = (n)
+
+#define ADVANCE_RING() do {			\
+   if (_x != _nr) abort(); 			\
+   _vb = 0;						\
+} while (0)
+
+#define ADVANCE_RING_VARIABLE() do {			\
+   if (_x > _nr) abort();				\
+   vmesa->dmaLow -= (_nr - _x) * sizeof(GLuint);	\
+   _vb = 0;						\
+} while (0)
+
+
+#define QWORD_PAD_RING() do {			\
+   if (vmesa->dmaLow & 0x4) {			\
+      BEGIN_RING(1);				\
+      OUT_RING(HC_DUMMY);			\
+      ADVANCE_RING();				\
+   }						\
+} while (0)
+
+#define VIA_GEQ_WRAP(left, right) \
+    (((left) - (right)) < ( 1 << 23))
+      
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_memcpy.c b/src/mesa/drivers/dri/unichrome/via_memcpy.c
new file mode 100644
index 0000000000..d7b05bcaaf
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_memcpy.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2004 Thomas Hellstrom, All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE CODE SUPPLIER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Thomas' orginal gutted for mesa by Keith Whitwell
+ */
+
+#include "via_tex.h"
+
+#if defined( USE_SSE_ASM )
+
+#define SSE_PREFETCH "  prefetchnta "
+#define FENCE __asm__ __volatile__ ("sfence":::"memory");
+
+
+#define PREFETCH1(arch_prefetch,from)			\
+    __asm__ __volatile__ (				\
+			  "1:  " arch_prefetch "(%0)\n"	\
+			  arch_prefetch "32(%0)\n"	\
+			  arch_prefetch "64(%0)\n"	\
+			  arch_prefetch "96(%0)\n"	\
+			  arch_prefetch "128(%0)\n"	\
+			  arch_prefetch "160(%0)\n"	\
+			  arch_prefetch "192(%0)\n"	\
+			  arch_prefetch "256(%0)\n"	\
+			  arch_prefetch "288(%0)\n"	\
+			  "2:\n"			\
+			  : : "r" (from) );
+
+
+
+#define small_memcpy(to,from,n)						\
+    {									\
+	__asm__ __volatile__(						\
+			     "movl %2,%%ecx\n\t"			\
+                             "sarl $2,%%ecx\n\t"			\
+			     "rep ; movsl\n\t"				\
+			     "testb $2,%b2\n\t"				\
+			     "je 1f\n\t"				\
+			     "movsw\n"					\
+			     "1:\ttestb $1,%b2\n\t"			\
+			     "je 2f\n\t"				\
+			     "movsb\n"					\
+			     "2:"					\
+			     :"=&D" (to), "=&S" (from)			\
+			     :"q" (n),"0" ((long) to),"1" ((long) from) \
+			     : "%ecx","memory");			\
+    }
+
+
+#define SSE_CPY(prefetch,from,to,dummy,lcnt)				\
+    if ((unsigned long) from & 15)			 {		\
+	__asm__ __volatile__ (						\
+			      "1:\n"					\
+                              prefetch "320(%1)\n"			\
+			      "  movups (%1), %%xmm0\n"			\
+			      "  movups 16(%1), %%xmm1\n"		\
+			      "  movntps %%xmm0, (%0)\n"		\
+			      "  movntps %%xmm1, 16(%0)\n"		\
+                              prefetch "352(%1)\n"			\
+			      "  movups 32(%1), %%xmm2\n"		\
+			      "  movups 48(%1), %%xmm3\n"		\
+			      "  movntps %%xmm2, 32(%0)\n"		\
+			      "  movntps %%xmm3, 48(%0)\n"		\
+			      "  addl $64,%0\n"				\
+			      "  addl $64,%1\n"				\
+			      "  decl %2\n"				\
+			      "  jne 1b\n"				\
+			      :"=&D"(to), "=&S"(from), "=&r"(dummy)	\
+			      :"0" (to), "1" (from), "2" (lcnt): "memory"); \
+    } else {								\
+	__asm__ __volatile__ (						\
+			      "2:\n"					\
+			      prefetch "320(%1)\n"			\
+			      "  movaps (%1), %%xmm0\n"			\
+			      "  movaps 16(%1), %%xmm1\n"		\
+			      "  movntps %%xmm0, (%0)\n"		\
+			      "  movntps %%xmm1, 16(%0)\n"		\
+                              prefetch "352(%1)\n"			\
+			      "  movaps 32(%1), %%xmm2\n"		\
+			      "  movaps 48(%1), %%xmm3\n"		\
+			      "  movntps %%xmm2, 32(%0)\n"		\
+			      "  movntps %%xmm3, 48(%0)\n"		\
+			      "  addl $64,%0\n"				\
+			      "  addl $64,%1\n"				\
+			      "  decl %2\n"				\
+			      "  jne 2b\n"				\
+			      :"=&D"(to), "=&S"(from), "=&r"(dummy)	\
+			      :"0" (to), "1" (from), "2" (lcnt): "memory"); \
+    }
+
+
+
+/*
+ */
+void via_sse_memcpy(void *to,
+		    const void *from,
+		    size_t sz)
+
+{
+   int dummy;
+   int lcnt = sz >> 6;
+   int rest = sz & 63;
+
+   PREFETCH1(SSE_PREFETCH,from);
+
+   if (lcnt > 5) {
+      lcnt -= 5;
+      SSE_CPY(SSE_PREFETCH,from,to,dummy,lcnt);
+      lcnt = 5;
+   }
+   if (lcnt) {
+      SSE_CPY("#",from,to,dummy,lcnt);
+   }
+   if (rest) small_memcpy(to, from, rest);
+   FENCE;
+}
+
+#endif /* defined( USE_SSE_ASM ) */
diff --git a/src/mesa/drivers/dri/unichrome/via_render.c b/src/mesa/drivers/dri/unichrome/via_render.c
new file mode 100644
index 0000000000..896c43db1b
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_render.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware acceleration where possible.
+ *
+ */
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+
+#include "via_context.h"
+#include "via_tris.h"
+#include "via_ioctl.h"
+
+/*
+ * Render unclipped vertex buffers by emitting vertices directly to
+ * dma buffers.  Use strip/fan hardware primitives where possible.
+ * Try to simulate missing primitives with indexed vertices.
+ */
+#define HAVE_POINTS      1
+#define HAVE_LINES       1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_LINE_LOOP   1
+#define HAVE_TRIANGLES   1
+#define HAVE_TRI_STRIPS  1
+#define HAVE_TRI_STRIP_1 0  
+#define HAVE_TRI_FANS    1
+#define HAVE_POLYGONS    1
+#define HAVE_QUADS       0
+#define HAVE_QUAD_STRIPS 0
+
+#define HAVE_ELTS        0
+
+#define LOCAL_VARS struct via_context *vmesa = VIA_CONTEXT(ctx)
+#define INIT(prim) do {					\
+   viaRasterPrimitive(ctx, prim, prim);	\
+} while (0)
+#define GET_CURRENT_VB_MAX_VERTS() \
+    ((VIA_DMA_BUF_SZ - (512 + (int)vmesa->dmaLow)) / (vmesa->vertexSize * 4))
+#define GET_SUBSEQUENT_VB_MAX_VERTS() \
+    (VIA_DMA_BUF_SZ - 512) / (vmesa->vertexSize * 4)
+
+#define ALLOC_VERTS( nr ) \
+    viaExtendPrimitive( vmesa, (nr) * vmesa->vertexSize * 4)
+
+#define EMIT_VERTS(ctx, j, nr, buf) \
+    _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf )  
+    
+#define FLUSH() VIA_FINISH_PRIM( vmesa )
+
+#define TAG(x) via_fast##x
+#include "tnl_dd/t_dd_dmatmp.h"
+#undef TAG
+#undef LOCAL_VARS
+#undef INIT
+
+/**********************************************************************/
+/*                          Fast Render pipeline stage                */
+/**********************************************************************/
+static GLboolean via_run_fastrender(GLcontext *ctx,
+                                    struct tnl_pipeline_stage *stage)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    struct vertex_buffer *VB = &tnl->vb;
+    GLuint i;
+    
+
+    tnl->Driver.Render.Start(ctx);
+    
+    if (VB->ClipOrMask || 
+	vmesa->renderIndex != 0 || 
+	!via_fastvalidate_render( ctx, VB )) {
+	tnl->Driver.Render.Finish(ctx);
+        return GL_TRUE;
+    }
+
+    tnl->clipspace.new_inputs |= VERT_BIT_POS;
+
+    for (i = 0; i < VB->PrimitiveCount; ++i) {
+        GLuint mode = _tnl_translate_prim(&VB->Primitive[i]);
+        GLuint start = VB->Primitive[i].start;
+        GLuint length = VB->Primitive[i].count;
+        if (length)
+            via_fastrender_tab_verts[mode & PRIM_MODE_MASK](ctx, start, start+length, mode);
+    }
+
+    tnl->Driver.Render.Finish(ctx);
+
+    return GL_FALSE;            /* finished the pipe */
+}
+
+const struct tnl_pipeline_stage _via_fastrender_stage =
+{
+    "via fast render",
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    via_run_fastrender           /* run */
+};
+
+
diff --git a/src/mesa/drivers/dri/unichrome/via_screen.c b/src/mesa/drivers/dri/unichrome/via_screen.c
new file mode 100644
index 0000000000..ee10b569bf
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_screen.c
@@ -0,0 +1,448 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+
+#include "dri_util.h"
+#include "utils.h"
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+#include "main/simple_list.h"
+#include "vblank.h"
+
+#include "via_state.h"
+#include "via_tex.h"
+#include "via_span.h"
+#include "via_screen.h"
+#include "via_dri.h"
+
+#include "GL/internal/dri_interface.h"
+#include "drirenderbuffer.h"
+
+#include "xmlpool.h"
+
+PUBLIC const char __driConfigOptions[] =
+DRI_CONF_BEGIN
+    DRI_CONF_SECTION_PERFORMANCE
+        DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_QUALITY
+        DRI_CONF_EXCESS_MIPMAP(false)
+    DRI_CONF_SECTION_END
+    DRI_CONF_SECTION_DEBUG
+        DRI_CONF_NO_RAST(false)
+    DRI_CONF_SECTION_END
+DRI_CONF_END;
+static const GLuint __driNConfigOptions = 3;
+
+static drmBufMapPtr via_create_empty_buffers(void)
+{
+    drmBufMapPtr retval;
+
+    retval = (drmBufMapPtr)MALLOC(sizeof(drmBufMap));
+    if (retval == NULL) return NULL;
+    memset(retval, 0, sizeof(drmBufMap));
+
+    retval->list = (drmBufPtr)MALLOC(sizeof(drmBuf) * VIA_DMA_BUF_NR);
+    if (retval->list == NULL) {
+       FREE(retval);
+       return NULL;
+    }
+    memset(retval->list, 0, sizeof(drmBuf) * VIA_DMA_BUF_NR);
+    return retval;
+}
+
+static void via_free_empty_buffers( drmBufMapPtr bufs )
+{
+   if (bufs && bufs->list)
+      FREE(bufs->list);
+
+   if (bufs)
+      FREE(bufs);
+}
+
+
+static GLboolean
+viaInitDriver(__DRIscreen *sPriv)
+{
+    viaScreenPrivate *viaScreen;
+    VIADRIPtr gDRIPriv = (VIADRIPtr)sPriv->pDevPriv;
+    int i;
+
+    if (sPriv->devPrivSize != sizeof(VIADRIRec)) {
+      fprintf(stderr,"\nERROR!  sizeof(VIADRIRec) does not match passed size from device driver\n");
+      return GL_FALSE;
+    }
+
+    /* Allocate the private area */
+    viaScreen = (viaScreenPrivate *) CALLOC(sizeof(viaScreenPrivate));
+    if (!viaScreen) {
+        __driUtilMessage("viaInitDriver: alloc viaScreenPrivate struct failed");
+        return GL_FALSE;
+    }
+
+    /* parse information in __driConfigOptions */
+    driParseOptionInfo (&viaScreen->optionCache,
+			__driConfigOptions, __driNConfigOptions);
+
+
+    viaScreen->driScrnPriv = sPriv;
+    sPriv->private = (void *)viaScreen;
+
+    viaScreen->deviceID = gDRIPriv->deviceID;
+    viaScreen->width = gDRIPriv->width;
+    viaScreen->height = gDRIPriv->height;
+    viaScreen->mem = gDRIPriv->mem;
+    viaScreen->bitsPerPixel = gDRIPriv->bytesPerPixel * 8;
+    viaScreen->bytesPerPixel = gDRIPriv->bytesPerPixel;
+    viaScreen->fbOffset = 0;
+    viaScreen->fbSize = gDRIPriv->fbSize;
+    viaScreen->irqEnabled = gDRIPriv->irqEnabled;
+
+    if (VIA_DEBUG & DEBUG_DRI) {
+	fprintf(stderr, "deviceID = %08x\n", viaScreen->deviceID);
+	fprintf(stderr, "width = %08x\n", viaScreen->width);	
+	fprintf(stderr, "height = %08x\n", viaScreen->height);	
+	fprintf(stderr, "cpp = %08x\n", viaScreen->cpp);	
+	fprintf(stderr, "fbOffset = %08x\n", viaScreen->fbOffset);	
+    }
+
+    viaScreen->bufs = via_create_empty_buffers();
+    if (viaScreen->bufs == NULL) {
+        __driUtilMessage("viaInitDriver: via_create_empty_buffers() failed");
+        FREE(viaScreen);
+        return GL_FALSE;
+    }
+
+    if (drmMap(sPriv->fd,
+               gDRIPriv->regs.handle,
+               gDRIPriv->regs.size,
+               &viaScreen->reg) != 0) {
+        FREE(viaScreen);
+        sPriv->private = NULL;
+        __driUtilMessage("viaInitDriver: drmMap regs failed");
+        return GL_FALSE;
+    }
+
+    if (gDRIPriv->agp.size) {
+        if (drmMap(sPriv->fd,
+                   gDRIPriv->agp.handle,
+                   gDRIPriv->agp.size,
+	           (drmAddress *)&viaScreen->agpLinearStart) != 0) {
+	    drmUnmap(viaScreen->reg, gDRIPriv->regs.size);
+	    FREE(viaScreen);
+	    sPriv->private = NULL;
+	    __driUtilMessage("viaInitDriver: drmMap agp failed");
+	    return GL_FALSE;
+	}
+
+	viaScreen->agpBase = drmAgpBase(sPriv->fd);
+    } else
+	viaScreen->agpLinearStart = 0;
+
+    viaScreen->sareaPrivOffset = gDRIPriv->sarea_priv_offset;
+
+    i = 0;
+    viaScreen->extensions[i++] = &driFrameTrackingExtension.base;
+    viaScreen->extensions[i++] = &driReadDrawableExtension;
+    if ( viaScreen->irqEnabled ) {
+	viaScreen->extensions[i++] = &driSwapControlExtension.base;
+	viaScreen->extensions[i++] = &driMediaStreamCounterExtension.base;
+    }
+
+    viaScreen->extensions[i++] = NULL;
+    sPriv->extensions = viaScreen->extensions;
+
+    return GL_TRUE;
+}
+
+static void
+viaDestroyScreen(__DRIscreen *sPriv)
+{
+    viaScreenPrivate *viaScreen = (viaScreenPrivate *)sPriv->private;
+    VIADRIPtr gDRIPriv = (VIADRIPtr)sPriv->pDevPriv;
+
+    drmUnmap(viaScreen->reg, gDRIPriv->regs.size);
+    if (gDRIPriv->agp.size)
+        drmUnmap(viaScreen->agpLinearStart, gDRIPriv->agp.size);
+
+    via_free_empty_buffers(viaScreen->bufs);
+
+    driDestroyOptionInfo(&viaScreen->optionCache);
+
+    FREE(viaScreen);
+    sPriv->private = NULL;
+}
+
+
+static GLboolean
+viaCreateBuffer(__DRIscreen *driScrnPriv,
+                __DRIdrawable *driDrawPriv,
+                const __GLcontextModes *mesaVis,
+                GLboolean isPixmap)
+{
+#if 0
+    viaScreenPrivate *screen = (viaScreenPrivate *) driScrnPriv->private;
+#endif
+
+    GLboolean swStencil = (mesaVis->stencilBits > 0 && 
+			   mesaVis->depthBits != 24);
+    GLboolean swAccum = mesaVis->accumRedBits > 0;
+
+    if (isPixmap) {
+       /* KW: This needs work, disabled for now:
+	*/
+#if 0
+	driDrawPriv->driverPrivate = (void *)
+            _mesa_create_framebuffer(mesaVis,
+                                     GL_FALSE,	/* software depth buffer? */
+                                     swStencil,
+                                     mesaVis->accumRedBits > 0,
+                                     GL_FALSE 	/* s/w alpha planes */);
+
+        return (driDrawPriv->driverPrivate != NULL);
+#endif
+	return GL_FALSE;
+    }
+    else {
+      struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis);
+
+      /* The front color, back color and depth renderbuffers are
+       * set up later in calculate_buffer_parameters().
+       * Only create/connect software-based buffers here.
+       */
+
+#if 000
+      /* This code _should_ be put to use.  We have to move the
+       * viaRenderbuffer members out of the via_context structure.
+       * Those members should just be the renderbuffers hanging off the
+       * gl_framebuffer object.
+       */
+      /* XXX check/fix the offset/pitch parameters! */
+      {
+         driRenderbuffer *frontRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888, NULL,
+                                 screen->bytesPerPixel,
+                                 0, screen->width, driDrawPriv);
+         viaSetSpanFunctions(frontRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base);
+      }
+
+      if (mesaVis->doubleBufferMode) {
+         driRenderbuffer *backRb
+            = driNewRenderbuffer(MESA_FORMAT_ARGB8888, NULL,
+                                 screen->bytesPerPixel,
+                                 0, screen->width, driDrawPriv);
+         viaSetSpanFunctions(backRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base);
+      }
+
+      if (mesaVis->depthBits == 16) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z16, NULL,
+                                 screen->bytesPerPixel,
+                                 0, screen->width, driDrawPriv);
+         viaSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 24) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z24_S8, NULL,
+                                 screen->bytesPerPixel,
+                                 0, screen->width, driDrawPriv);
+         viaSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+      else if (mesaVis->depthBits == 32) {
+         driRenderbuffer *depthRb
+            = driNewRenderbuffer(MESA_FORMAT_Z32, NULL,
+                                 screen->bytesPerPixel,
+                                 0, screen->width, driDrawPriv);
+         viaSetSpanFunctions(depthRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base);
+      }
+
+      if (mesaVis->stencilBits > 0 && !swStencil) {
+         driRenderbuffer *stencilRb
+            = driNewRenderbuffer(MESA_FORMAT_S8, NULL,
+                                 screen->bytesPerPixel,
+                                 0, screen->width, driDrawPriv);
+         viaSetSpanFunctions(stencilRb, mesaVis);
+         _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base);
+      }
+#endif
+
+      _mesa_add_soft_renderbuffers(fb,
+                                   GL_FALSE, /* color */
+                                   GL_FALSE, /* depth */
+                                   swStencil,
+                                   swAccum,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */);
+      driDrawPriv->driverPrivate = (void *) fb;
+
+      return (driDrawPriv->driverPrivate != NULL);
+   }
+}
+
+
+static void
+viaDestroyBuffer(__DRIdrawable *driDrawPriv)
+{
+   _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL);
+}
+
+static const __DRIconfig **
+viaFillInModes( __DRIscreen *psp,
+		unsigned pixel_bits, GLboolean have_back_buffer )
+{
+    __DRIconfig **configs;
+    const unsigned back_buffer_factor = (have_back_buffer) ? 2 : 1;
+    GLenum fb_format;
+    GLenum fb_type;
+
+    /* Right now GLX_SWAP_COPY_OML isn't supported, but it would be easy
+     * enough to add support.  Basically, if a context is created with an
+     * fbconfig where the swap method is GLX_SWAP_COPY_OML, pageflipping
+     * will never be used.
+     */
+    static const GLenum back_buffer_modes[] = {
+	GLX_NONE, GLX_SWAP_UNDEFINED_OML /*, GLX_SWAP_COPY_OML */
+    };
+
+    /* The 32-bit depth-buffer mode isn't supported yet, so don't actually
+     * enable it.
+     */
+    static const uint8_t depth_bits_array[4]   = { 0, 16, 24, 32 };
+    static const uint8_t stencil_bits_array[4] = { 0,  0,  8,  0 };
+    uint8_t msaa_samples_array[1] = { 0 };
+    const unsigned depth_buffer_factor = 3;
+
+    if ( pixel_bits == 16 ) {
+        fb_format = GL_RGB;
+        fb_type = GL_UNSIGNED_SHORT_5_6_5;
+    }
+    else {
+        fb_format = GL_BGRA;
+        fb_type = GL_UNSIGNED_INT_8_8_8_8_REV;
+    }
+
+    configs = driCreateConfigs(fb_format, fb_type,
+			       depth_bits_array, stencil_bits_array,
+			       depth_buffer_factor, back_buffer_modes,
+			       back_buffer_factor,
+                               msaa_samples_array, 1, GL_TRUE);
+    if (configs == NULL) {
+	fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
+		__LINE__);
+	return NULL;
+    }
+
+    return (const __DRIconfig **) configs;
+}
+
+
+/**
+ * This is the driver specific part of the createNewScreen entry point.
+ * 
+ * \todo maybe fold this into intelInitDriver
+ *
+ * \return the __GLcontextModes supported by this driver
+ */
+static const __DRIconfig **
+viaInitScreen(__DRIscreen *psp)
+{
+   static const __DRIversion ddx_expected = { VIA_DRIDDX_VERSION_MAJOR,
+                                              VIA_DRIDDX_VERSION_MINOR,
+                                              VIA_DRIDDX_VERSION_PATCH };
+   static const __DRIversion dri_expected = { 4, 0, 0 };
+   static const __DRIversion drm_expected = { 2, 3, 0 };
+   static const char *driver_name = "Unichrome";
+   VIADRIPtr dri_priv = (VIADRIPtr) psp->pDevPriv;
+
+   if ( ! driCheckDriDdxDrmVersions2( driver_name,
+				      &psp->dri_version, & dri_expected,
+				      &psp->ddx_version, & ddx_expected,
+				      &psp->drm_version, & drm_expected) )
+      return NULL;
+
+   if (!viaInitDriver(psp))
+       return NULL;
+
+   return viaFillInModes( psp, dri_priv->bytesPerPixel * 8, GL_TRUE );
+
+}
+
+
+/**
+ * Get information about previous buffer swaps.
+ */
+static int
+getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo )
+{
+   struct via_context *vmesa;
+
+   if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL)
+	|| (dPriv->driContextPriv->driverPrivate == NULL)
+	|| (sInfo == NULL) ) {
+      return -1;
+   }
+
+   vmesa = (struct via_context *) dPriv->driContextPriv->driverPrivate;
+   sInfo->swap_count = vmesa->swap_count;
+   sInfo->swap_ust = vmesa->swap_ust;
+   sInfo->swap_missed_count = vmesa->swap_missed_count;
+
+   sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0)
+       ? driCalculateSwapUsage( dPriv, 0, vmesa->swap_missed_ust )
+       : 0.0;
+
+   return 0;
+}
+
+const struct __DriverAPIRec driDriverAPI = {
+   .InitScreen      = viaInitScreen,
+   .DestroyScreen   = viaDestroyScreen,
+   .CreateContext   = viaCreateContext,
+   .DestroyContext  = viaDestroyContext,
+   .CreateBuffer    = viaCreateBuffer,
+   .DestroyBuffer   = viaDestroyBuffer,
+   .SwapBuffers     = viaSwapBuffers,
+   .MakeCurrent     = viaMakeCurrent,
+   .UnbindContext   = viaUnbindContext,
+   .GetSwapInfo     = getSwapInfo,
+   .GetDrawableMSC  = driDrawableGetMSC32,
+   .WaitForMSC      = driWaitForMSC32,
+   .WaitForSBC      = NULL,
+   .SwapBuffersMSC  = NULL
+};
+
+/* This is the table of extensions that the loader will dlsym() for. */
+PUBLIC const __DRIextension *__driDriverExtensions[] = {
+    &driCoreExtension.base,
+    &driLegacyExtension.base,
+    NULL
+};
diff --git a/src/mesa/drivers/dri/unichrome/via_screen.h b/src/mesa/drivers/dri/unichrome/via_screen.h
new file mode 100644
index 0000000000..51df0ce4eb
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_screen.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VIAINIT_H
+#define _VIAINIT_H
+
+#include <sys/time.h>
+#include "dri_util.h"
+#include "via_dri.h"
+#include "xmlconfig.h"
+
+typedef struct {
+    viaRegion regs;
+    viaRegion agp;
+    int deviceID;
+    int width;
+    int height;
+    int mem;
+
+    int cpp;                    
+    int bitsPerPixel;
+    int bytesPerPixel;
+    int fbFormat;
+    int fbOffset;
+    int fbSize;
+    
+    int fbStride;
+
+    int backOffset;
+    int depthOffset;
+
+    int backPitch;
+    int backPitchBits;
+
+    int textureOffset;
+    int textureSize;
+    int logTextureGranularity;
+    
+    drmAddress reg;
+    drmAddress agpLinearStart;
+    GLuint agpBase;
+
+    __DRIscreen *driScrnPriv;
+    drmBufMapPtr bufs;
+    unsigned int sareaPrivOffset;
+    /*=* John Sheng [2003.12.9] Tuxracer & VQ *=*/
+    int VQEnable;
+    int irqEnabled;
+
+    /* Configuration cache with default values for all contexts */
+    driOptionCache optionCache;
+
+    const __DRIextension *extensions[5];
+} viaScreenPrivate;
+
+
+extern GLboolean
+viaCreateContext(gl_api api,
+		 const __GLcontextModes *mesaVis,
+                 __DRIcontext *driContextPriv,
+                 void *sharedContextPrivate);
+
+extern void
+viaDestroyContext(__DRIcontext *driContextPriv);
+
+extern GLboolean
+viaUnbindContext(__DRIcontext *driContextPriv);
+
+extern GLboolean
+viaMakeCurrent(__DRIcontext *driContextPriv,
+               __DRIdrawable *driDrawPriv,
+               __DRIdrawable *driReadPriv);
+
+extern void
+viaSwapBuffers(__DRIdrawable *drawablePrivate);
+
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_span.c b/src/mesa/drivers/dri/unichrome/via_span.c
new file mode 100644
index 0000000000..fa3cbf7a79
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_span.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "main/glheader.h"
+#include "main/formats.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "via_context.h"
+#include "via_span.h"
+#include "via_ioctl.h"
+#include "swrast/swrast.h"
+
+#define DBG 0
+
+#define Y_FLIP(_y) (height - _y - 1)
+
+#define HW_LOCK() 
+
+#define HW_UNLOCK()
+
+#undef LOCAL_VARS
+#define LOCAL_VARS                                                   	\
+    struct via_renderbuffer *vrb = (struct via_renderbuffer *) rb;   	\
+    __DRIdrawable *dPriv = vrb->dPriv;                           \
+    GLuint pitch = vrb->pitch;                                          \
+    GLuint height = dPriv->h;                                        	\
+    GLint p = 0;							\
+    char *buf = (char *)(vrb->origMap);					\
+    (void) p;
+
+/* ================================================================
+ * Color buffer
+ */
+
+/* 16 bit, RGB565 color spanline and pixel functions
+ */
+#define GET_PTR(_x, _y) (buf + (_x) * 2 + (_y) * pitch)
+#define SPANTMP_PIXEL_FMT GL_RGB
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
+
+#define TAG(x)    via##x##_565
+#define TAG2(x,y) via##x##_565##y
+#include "spantmp2.h"
+
+
+/* 32 bit, ARGB8888 color spanline and pixel functions
+ */
+#define GET_PTR(_x, _y) (buf + (_x) * 4 + (_y) * pitch)
+#define SPANTMP_PIXEL_FMT GL_BGRA
+#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
+
+#define TAG(x)    via##x##_8888
+#define TAG2(x,y) via##x##_8888##y
+#include "spantmp2.h"
+
+
+/* 16 bit depthbuffer functions.
+ */
+#define LOCAL_DEPTH_VARS                                            \
+    struct via_renderbuffer *vrb = (struct via_renderbuffer *) rb;  \
+    __DRIdrawable *dPriv = vrb->dPriv;                       \
+    GLuint depth_pitch = vrb->pitch;                                \
+    GLuint height = dPriv->h;                                       \
+    char *buf = (char *)(vrb->map)
+
+#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 
+
+#define VALUE_TYPE GLushort
+
+#define WRITE_DEPTH(_x, _y, d)                      \
+    *(GLushort *)(buf + (_x) * 2 + (_y) * depth_pitch) = d;
+
+#define READ_DEPTH(d, _x, _y)                       \
+    d = *(volatile GLushort *)(buf + (_x) * 2 + (_y) * depth_pitch);
+
+#define TAG(x) via##x##_z16
+#include "depthtmp.h"
+
+/* 32 bit depthbuffer functions.
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH(_x, _y, d)                      \
+    *(GLuint *)(buf + (_x) * 4 + (_y) * depth_pitch) = d;
+
+#define READ_DEPTH(d, _x, _y)                       \
+    d = *(volatile GLuint *)(buf + (_x) * 4 + (_y) * depth_pitch);
+
+#define TAG(x) via##x##_z32
+#include "depthtmp.h"
+
+
+
+/* 24/8 bit interleaved depth/stencil functions
+ */
+#define VALUE_TYPE GLuint
+
+#define WRITE_DEPTH( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*depth_pitch);	\
+   tmp &= 0x000000ff;					\
+   tmp |= ((d)<<8);				\
+   *(GLuint *)(buf + (_x)*4 + (_y)*depth_pitch) = tmp;		\
+}
+
+#define READ_DEPTH( d, _x, _y )		\
+   d = (*(GLuint *)(buf + (_x)*4 + (_y)*depth_pitch)) >> 8;
+
+
+#define TAG(x) via##x##_z24_s8
+#include "depthtmp.h"
+
+#define WRITE_STENCIL( _x, _y, d ) {			\
+   GLuint tmp = *(GLuint *)(buf + (_x)*4 + (_y)*depth_pitch);	\
+   tmp &= 0xffffff00;					\
+   tmp |= (d);					\
+   *(GLuint *)(buf + (_x)*4 + (_y)*depth_pitch) = tmp;		\
+}
+
+#define READ_STENCIL( d, _x, _y )			\
+   d = *(GLuint *)(buf + (_x)*4 + (_y)*depth_pitch) & 0xff;
+
+#define TAG(x) via##x##_z24_s8
+#include "stenciltmp.h"
+
+
+
+
+/* Move locking out to get reasonable span performance.
+ */
+void viaSpanRenderStart( GLcontext *ctx )
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);     
+   viaWaitIdle(vmesa, GL_FALSE);
+   LOCK_HARDWARE(vmesa);
+}
+
+void viaSpanRenderFinish( GLcontext *ctx )
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   _swrast_flush( ctx );
+   UNLOCK_HARDWARE( vmesa );
+}
+
+void viaInitSpanFuncs(GLcontext *ctx)
+{
+    struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
+    swdd->SpanRenderStart = viaSpanRenderStart;
+    swdd->SpanRenderFinish = viaSpanRenderFinish; 
+}
+
+
+
+/**
+ * Plug in the Get/Put routines for the given driRenderbuffer.
+ */
+void
+viaSetSpanFunctions(struct via_renderbuffer *vrb, const GLvisual *vis)
+{
+   if (vrb->Base.Format == MESA_FORMAT_RGB565) {
+      viaInitPointers_565(&vrb->Base);
+   }
+   else if (vrb->Base.Format == MESA_FORMAT_ARGB8888) {
+      viaInitPointers_8888(&vrb->Base);
+   }
+   else if (vrb->Base.Format == MESA_FORMAT_Z16) {
+      viaInitDepthPointers_z16(&vrb->Base);
+   }
+   else if (vrb->Base.Format == MESA_FORMAT_Z24_S8) {
+      viaInitDepthPointers_z24_s8(&vrb->Base);
+   }
+   else if (vrb->Base.Format == MESA_FORMAT_Z32) {
+      viaInitDepthPointers_z32(&vrb->Base);
+   }
+   else if (vrb->Base.Format == MESA_FORMAT_S8) {
+      viaInitStencilPointers_z24_s8(&vrb->Base);
+   }
+}
diff --git a/src/mesa/drivers/dri/unichrome/via_span.h b/src/mesa/drivers/dri/unichrome/via_span.h
new file mode 100644
index 0000000000..3dca0d5661
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_span.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VIA_SPAN_H
+#define _VIA_SPAN_H
+
+extern void viaInitSpanFuncs(GLcontext *ctx);
+extern void viaSpanRenderStart( GLcontext *ctx );
+extern void viaSpanRenderFinish( GLcontext *ctx );
+
+extern void
+viaSetSpanFunctions(struct via_renderbuffer *vrb, const GLvisual *vis);
+
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_state.c b/src/mesa/drivers/dri/unichrome/via_state.c
new file mode 100644
index 0000000000..f7029b9492
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_state.c
@@ -0,0 +1,1529 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+#include "main/enums.h"
+#include "main/dd.h"
+#include "main/mm.h"
+
+#include "via_context.h"
+#include "via_state.h"
+#include "via_tex.h"
+#include "via_ioctl.h"
+#include "via_3d_reg.h"
+
+#include "swrast/swrast.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "swrast_setup/swrast_setup.h"
+
+
+static GLuint ROP[16] = {
+    HC_HROP_BLACK,    /* GL_CLEAR           0                      	*/
+    HC_HROP_DPa,      /* GL_AND             s & d                  	*/
+    HC_HROP_PDna,     /* GL_AND_REVERSE     s & ~d  			*/
+    HC_HROP_P,        /* GL_COPY            s                       	*/
+    HC_HROP_DPna,     /* GL_AND_INVERTED    ~s & d                      */
+    HC_HROP_D,        /* GL_NOOP            d  		                */
+    HC_HROP_DPx,      /* GL_XOR             s ^ d                       */
+    HC_HROP_DPo,      /* GL_OR              s | d                       */
+    HC_HROP_DPon,     /* GL_NOR             ~(s | d)                    */
+    HC_HROP_DPxn,     /* GL_EQUIV           ~(s ^ d)                    */
+    HC_HROP_Dn,       /* GL_INVERT          ~d                       	*/
+    HC_HROP_PDno,     /* GL_OR_REVERSE      s | ~d                      */
+    HC_HROP_Pn,       /* GL_COPY_INVERTED   ~s                       	*/
+    HC_HROP_DPno,     /* GL_OR_INVERTED     ~s | d                      */
+    HC_HROP_DPan,     /* GL_NAND            ~(s & d)                    */
+    HC_HROP_WHITE     /* GL_SET             1                       	*/
+};
+
+/*
+ * Compute the 'S5.5' lod bias factor from the floating point OpenGL bias.
+ */
+static GLuint viaComputeLodBias(GLfloat bias)
+{
+   int b = (int) (bias * 32.0);
+   if (b > 511)
+      b = 511;
+   else if (b < -512)
+      b = -512;
+   return (GLuint) b;
+}
+
+void viaEmitState(struct via_context *vmesa)
+{
+   GLcontext *ctx = vmesa->glCtx;
+   GLuint i = 0;
+   GLuint j = 0;
+   RING_VARS;
+
+   viaCheckDma(vmesa, 0x110);
+    
+   BEGIN_RING(5);
+   OUT_RING( HC_HEADER2 );
+   OUT_RING( (HC_ParaType_NotTex << 16) );
+   OUT_RING( ((HC_SubA_HEnable << 24) | vmesa->regEnable) );
+   OUT_RING( ((HC_SubA_HFBBMSKL << 24) | vmesa->regHFBBMSKL) );    
+   OUT_RING( ((HC_SubA_HROP << 24) | vmesa->regHROP) );        
+   ADVANCE_RING();
+    
+   if (vmesa->have_hw_stencil) {
+      GLuint pitch, format, offset;
+	
+      format = HC_HZWBFM_24;	    	
+      offset = vmesa->depth.offset;
+      pitch = vmesa->depth.pitch;
+	
+      BEGIN_RING(6);
+      OUT_RING( (HC_SubA_HZWBBasL << 24) | (offset & 0xFFFFFF) );
+      OUT_RING( (HC_SubA_HZWBBasH << 24) | ((offset & 0xFF000000) >> 24) );	
+      OUT_RING( (HC_SubA_HZWBType << 24) | HC_HDBLoc_Local | HC_HZONEasFF_MASK |
+	         format | pitch );            
+      OUT_RING( (HC_SubA_HZWTMD << 24) | vmesa->regHZWTMD );
+      OUT_RING( (HC_SubA_HSTREF << 24) | vmesa->regHSTREF );
+      OUT_RING( (HC_SubA_HSTMD << 24) | vmesa->regHSTMD );
+      ADVANCE_RING();
+   }
+   else if (vmesa->hasDepth) {
+      GLuint pitch, format, offset;
+	
+      if (vmesa->depthBits == 16) {
+	 format = HC_HZWBFM_16;
+      }	    
+      else {
+	 format = HC_HZWBFM_32;
+      }
+	    
+	    
+      offset = vmesa->depth.offset;
+      pitch = vmesa->depth.pitch;
+	
+      BEGIN_RING(4);
+      OUT_RING( (HC_SubA_HZWBBasL << 24) | (offset & 0xFFFFFF) );
+      OUT_RING( (HC_SubA_HZWBBasH << 24) | ((offset & 0xFF000000) >> 24) );
+      OUT_RING( (HC_SubA_HZWBType << 24) | HC_HDBLoc_Local | HC_HZONEasFF_MASK |
+	         format | pitch );
+      OUT_RING( (HC_SubA_HZWTMD << 24) | vmesa->regHZWTMD );
+      ADVANCE_RING();
+   }
+    
+   if (ctx->Color.AlphaEnabled) {
+      BEGIN_RING(1);
+      OUT_RING( (HC_SubA_HATMD << 24) | vmesa->regHATMD );
+      ADVANCE_RING();
+      i++;
+   }   
+
+   if (ctx->Color.BlendEnabled) {
+      BEGIN_RING(11);
+      OUT_RING( (HC_SubA_HABLCsat << 24) | vmesa->regHABLCsat );
+      OUT_RING( (HC_SubA_HABLCop  << 24) | vmesa->regHABLCop ); 
+      OUT_RING( (HC_SubA_HABLAsat << 24) | vmesa->regHABLAsat );        
+      OUT_RING( (HC_SubA_HABLAop  << 24) | vmesa->regHABLAop ); 
+      OUT_RING( (HC_SubA_HABLRCa  << 24) | vmesa->regHABLRCa ); 
+      OUT_RING( (HC_SubA_HABLRFCa << 24) | vmesa->regHABLRFCa );        
+      OUT_RING( (HC_SubA_HABLRCbias << 24) | vmesa->regHABLRCbias ); 
+      OUT_RING( (HC_SubA_HABLRCb  << 24) | vmesa->regHABLRCb ); 
+      OUT_RING( (HC_SubA_HABLRFCb << 24) | vmesa->regHABLRFCb );        
+      OUT_RING( (HC_SubA_HABLRAa  << 24) | vmesa->regHABLRAa ); 
+      OUT_RING( (HC_SubA_HABLRAb  << 24) | vmesa->regHABLRAb ); 
+      ADVANCE_RING();
+   }
+    
+   if (ctx->Fog.Enabled) {
+      BEGIN_RING(3);
+      OUT_RING( (HC_SubA_HFogLF << 24) | vmesa->regHFogLF ); 
+      OUT_RING( (HC_SubA_HFogCL << 24) | vmesa->regHFogCL ); 
+      OUT_RING( (HC_SubA_HFogCH << 24) | vmesa->regHFogCH ); 
+      ADVANCE_RING();
+   }
+    
+   if (ctx->Line.StippleFlag) {
+      BEGIN_RING(2);
+      OUT_RING( (HC_SubA_HLP << 24) | ctx->Line.StipplePattern ); 
+      OUT_RING( (HC_SubA_HLPRF << 24) | ctx->Line.StippleFactor );
+      ADVANCE_RING();
+   }
+
+   BEGIN_RING(1);
+   OUT_RING( (HC_SubA_HPixGC << 24) | 0x0 ); 
+   ADVANCE_RING();
+    
+   QWORD_PAD_RING();
+
+
+   if (ctx->Texture._EnabledUnits) {
+    
+      struct gl_texture_unit *texUnit0 = &ctx->Texture.Unit[0];
+      struct gl_texture_unit *texUnit1 = &ctx->Texture.Unit[1];
+
+      {
+	 GLuint nDummyValue = 0;
+
+	 BEGIN_RING( 8 );
+	 OUT_RING( HC_HEADER2 );
+	 OUT_RING( (HC_ParaType_Tex << 16) | (HC_SubType_TexGeneral << 24) );
+
+	 if (texUnit0->Enabled && texUnit1->Enabled) {
+	    nDummyValue = (HC_SubA_HTXSMD << 24) | (1 << 3);                
+	 }
+	 else {
+	    nDummyValue = (HC_SubA_HTXSMD << 24) | 0;
+	 }
+
+	 if (vmesa->clearTexCache) {
+	    vmesa->clearTexCache = 0;
+	    OUT_RING( nDummyValue | HC_HTXCHCLR_MASK );
+	    OUT_RING( nDummyValue );
+	 }
+	 else {
+	    OUT_RING( nDummyValue );
+	    OUT_RING( nDummyValue );
+	 }
+
+	 OUT_RING( HC_HEADER2 );
+	 OUT_RING( HC_ParaType_NotTex << 16 );
+	 OUT_RING( (HC_SubA_HEnable << 24) | vmesa->regEnable );
+	 OUT_RING( (HC_SubA_HEnable << 24) | vmesa->regEnable );
+	 ADVANCE_RING();
+      }
+
+      if (texUnit0->Enabled) {
+	 struct gl_texture_object *texObj = texUnit0->_Current;
+	 struct via_texture_object *t = (struct via_texture_object *)texObj;
+	 GLuint numLevels = t->lastLevel - t->firstLevel + 1;
+	 if (VIA_DEBUG & DEBUG_STATE) {
+	    fprintf(stderr, "texture0 enabled\n");
+	 }		
+	 if (numLevels == 8) {
+	    BEGIN_RING(27);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Tex << 16) |  (0 << 24) );
+	    OUT_RING( t->regTexFM );
+	    OUT_RING( (HC_SubA_HTXnL0OS << 24) |
+	       ((t->lastLevel) << HC_HTXnLVmax_SHIFT) | t->firstLevel );
+	    OUT_RING( t->regTexWidthLog2[0] );
+	    OUT_RING( t->regTexWidthLog2[1] );
+	    OUT_RING( t->regTexHeightLog2[0] );
+	    OUT_RING( t->regTexHeightLog2[1] );
+	    OUT_RING( t->regTexBaseH[0] );
+	    OUT_RING( t->regTexBaseH[1] );
+	    OUT_RING( t->regTexBaseH[2] );
+	    OUT_RING( t->regTexBaseAndPitch[0].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[0].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[1].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[1].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[2].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[2].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[3].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[3].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[4].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[4].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[5].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[5].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[6].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[6].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[7].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[7].pitchLog2 );
+	    ADVANCE_RING();
+	 }
+	 else if (numLevels > 1) {
+
+	    BEGIN_RING(12 + numLevels * 2);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Tex << 16) |  (0 << 24) );
+	    OUT_RING( t->regTexFM );
+	    OUT_RING( (HC_SubA_HTXnL0OS << 24) |
+	       ((t->lastLevel) << HC_HTXnLVmax_SHIFT) | t->firstLevel );
+	    OUT_RING( t->regTexWidthLog2[0] );
+	    OUT_RING( t->regTexHeightLog2[0] );
+		
+	    if (numLevels > 6) {
+	       OUT_RING( t->regTexWidthLog2[1] );
+	       OUT_RING( t->regTexHeightLog2[1] );
+	    }
+                
+	    OUT_RING( t->regTexBaseH[0] );
+		
+	    if (numLevels > 3) {
+	       OUT_RING( t->regTexBaseH[1] );
+	    }
+	    if (numLevels > 6) {
+	       OUT_RING( t->regTexBaseH[2] );
+	    }
+	    if (numLevels > 9)  {
+	       OUT_RING( t->regTexBaseH[3] );
+	    }
+
+	    for (j = 0; j < numLevels; j++) {
+	       OUT_RING( t->regTexBaseAndPitch[j].baseL );
+	       OUT_RING( t->regTexBaseAndPitch[j].pitchLog2 );
+	    }
+
+	    ADVANCE_RING_VARIABLE();
+	 }
+	 else {
+
+	    BEGIN_RING(9);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Tex << 16) |  (0 << 24) );
+	    OUT_RING( t->regTexFM );
+	    OUT_RING( (HC_SubA_HTXnL0OS << 24) |
+	       ((t->lastLevel) << HC_HTXnLVmax_SHIFT) | t->firstLevel );
+	    OUT_RING( t->regTexWidthLog2[0] );
+	    OUT_RING( t->regTexHeightLog2[0] );
+	    OUT_RING( t->regTexBaseH[0] );
+	    OUT_RING( t->regTexBaseAndPitch[0].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[0].pitchLog2 );
+	    ADVANCE_RING();
+	 }
+
+	 BEGIN_RING(14);
+	 OUT_RING( (HC_SubA_HTXnTB << 24) | vmesa->regHTXnTB[0] );
+	 OUT_RING( (HC_SubA_HTXnMPMD << 24) | vmesa->regHTXnMPMD[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLCsat << 24) | vmesa->regHTXnTBLCsat[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLCop << 24) | vmesa->regHTXnTBLCop[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLMPfog << 24) | vmesa->regHTXnTBLMPfog[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLAsat << 24) | vmesa->regHTXnTBLAsat[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCb << 24) | vmesa->regHTXnTBLRCb[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLRAa << 24) | vmesa->regHTXnTBLRAa[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLRFog << 24) | vmesa->regHTXnTBLRFog[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCa << 24) | vmesa->regHTXnTBLRCa[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCc << 24) | vmesa->regHTXnTBLRCc[0] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCbias << 24) | vmesa->regHTXnTBLRCbias[0] );
+	 OUT_RING( (HC_SubA_HTXnTBC << 24) | vmesa->regHTXnTBC[0] );
+	 OUT_RING( (HC_SubA_HTXnTRAH << 24) | vmesa->regHTXnTRAH[0] );
+/* 	 OUT_RING( (HC_SubA_HTXnCLODu << 24) | vmesa->regHTXnCLOD[0] ); */
+	 ADVANCE_RING();
+
+	 /* KW:  This test never succeeds:
+	  */
+	 if (t->regTexFM == HC_HTXnFM_Index8) {
+	    const struct gl_color_table *table = &texObj->Palette;
+	    const GLfloat *tableF = table->TableF;
+
+	    BEGIN_RING(2 + table->Size);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Palette << 16) | (0 << 24) );
+	    for (j = 0; j < table->Size; j++) 
+	       OUT_RING( tableF[j] );
+	    ADVANCE_RING();
+	       
+	 }
+
+	 QWORD_PAD_RING();
+      }
+	
+      if (texUnit1->Enabled) {
+	 struct gl_texture_object *texObj = texUnit1->_Current;
+	 struct via_texture_object *t = (struct via_texture_object *)texObj;
+	 GLuint numLevels = t->lastLevel - t->firstLevel + 1;
+	 int texunit = (texUnit0->Enabled ? 1 : 0);
+	 if (VIA_DEBUG & DEBUG_STATE) {
+	    fprintf(stderr, "texture1 enabled\n");
+	 }		
+	 if (numLevels == 8) {
+	    BEGIN_RING(27);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Tex << 16) |  (texunit << 24) );
+	    OUT_RING( t->regTexFM );
+	    OUT_RING( (HC_SubA_HTXnL0OS << 24) |
+	       ((t->lastLevel) << HC_HTXnLVmax_SHIFT) | t->firstLevel );
+	    OUT_RING( t->regTexWidthLog2[0] );
+	    OUT_RING( t->regTexWidthLog2[1] );
+	    OUT_RING( t->regTexHeightLog2[0] );
+	    OUT_RING( t->regTexHeightLog2[1] );
+	    OUT_RING( t->regTexBaseH[0] );
+	    OUT_RING( t->regTexBaseH[1] );
+	    OUT_RING( t->regTexBaseH[2] );
+	    OUT_RING( t->regTexBaseAndPitch[0].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[0].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[1].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[1].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[2].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[2].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[3].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[3].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[4].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[4].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[5].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[5].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[6].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[6].pitchLog2 );
+	    OUT_RING( t->regTexBaseAndPitch[7].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[7].pitchLog2 );
+	    ADVANCE_RING();
+	 }
+	 else if (numLevels > 1) {
+	    BEGIN_RING(12 + numLevels * 2);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Tex << 16) |  (texunit << 24) );
+	    OUT_RING( t->regTexFM );
+	    OUT_RING( (HC_SubA_HTXnL0OS << 24) |
+	       ((t->lastLevel) << HC_HTXnLVmax_SHIFT) | t->firstLevel );
+	    OUT_RING( t->regTexWidthLog2[0] );
+	    OUT_RING( t->regTexHeightLog2[0] );
+		
+	    if (numLevels > 6) {
+	       OUT_RING( t->regTexWidthLog2[1] );
+	       OUT_RING( t->regTexHeightLog2[1] );
+	       i += 2;
+	    }
+                
+	    OUT_RING( t->regTexBaseH[0] );
+		
+	    if (numLevels > 3) { 
+	       OUT_RING( t->regTexBaseH[1] );
+	    }
+	    if (numLevels > 6) {
+	       OUT_RING( t->regTexBaseH[2] );
+	    }
+	    if (numLevels > 9)  {
+	       OUT_RING( t->regTexBaseH[3] );
+	    }
+		
+	    for (j = 0; j < numLevels; j++) {
+	       OUT_RING( t->regTexBaseAndPitch[j].baseL );
+	       OUT_RING( t->regTexBaseAndPitch[j].pitchLog2 );
+	    }
+	    ADVANCE_RING_VARIABLE();
+	 }
+	 else {
+	    BEGIN_RING(9);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Tex << 16) |  (texunit << 24) );
+	    OUT_RING( t->regTexFM );
+	    OUT_RING( (HC_SubA_HTXnL0OS << 24) |
+	       ((t->lastLevel) << HC_HTXnLVmax_SHIFT) | t->firstLevel );
+	    OUT_RING( t->regTexWidthLog2[0] );
+	    OUT_RING( t->regTexHeightLog2[0] );
+	    OUT_RING( t->regTexBaseH[0] );
+	    OUT_RING( t->regTexBaseAndPitch[0].baseL );
+	    OUT_RING( t->regTexBaseAndPitch[0].pitchLog2 );
+	    ADVANCE_RING();
+	 }
+
+	 BEGIN_RING(14);
+	 OUT_RING( (HC_SubA_HTXnTB << 24) | vmesa->regHTXnTB[1] );
+	 OUT_RING( (HC_SubA_HTXnMPMD << 24) | vmesa->regHTXnMPMD[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLCsat << 24) | vmesa->regHTXnTBLCsat[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLCop << 24) | vmesa->regHTXnTBLCop[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLMPfog << 24) | vmesa->regHTXnTBLMPfog[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLAsat << 24) | vmesa->regHTXnTBLAsat[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCb << 24) | vmesa->regHTXnTBLRCb[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLRAa << 24) | vmesa->regHTXnTBLRAa[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLRFog << 24) | vmesa->regHTXnTBLRFog[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCa << 24) | vmesa->regHTXnTBLRCa[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCc << 24) | vmesa->regHTXnTBLRCc[1] );
+	 OUT_RING( (HC_SubA_HTXnTBLRCbias << 24) | vmesa->regHTXnTBLRCbias[1] );
+	 OUT_RING( (HC_SubA_HTXnTBC << 24) | vmesa->regHTXnTBC[1] );
+	 OUT_RING( (HC_SubA_HTXnTRAH << 24) | vmesa->regHTXnTRAH[1] );
+/* 	 OUT_RING( (HC_SubA_HTXnCLODu << 24) | vmesa->regHTXnCLOD[1] ); */
+	 ADVANCE_RING();
+
+	 /* KW:  This test never succeeds:
+	  */
+	 if (t->regTexFM == HC_HTXnFM_Index8) {
+	    const struct gl_color_table *table = &texObj->Palette;
+	    const GLfloat *tableF = table->TableF;
+
+	    BEGIN_RING(2 + table->Size);
+	    OUT_RING( HC_HEADER2 );
+	    OUT_RING( (HC_ParaType_Palette << 16) | (texunit << 24) );
+	    for (j = 0; j < table->Size; j++) {
+	       OUT_RING( tableF[j] );
+	    }
+	    ADVANCE_RING();
+	 }
+
+	 QWORD_PAD_RING();
+      }
+   }
+    
+#if 0
+   /* Polygon stipple is broken - for certain stipple values,
+    * eg. 0xf0f0f0f0, the hardware will refuse to accept the stipple.
+    * Coincidentally, conform generates just such a stipple.
+    */
+   if (ctx->Polygon.StippleFlag) {
+      GLuint *stipple = &ctx->PolygonStipple[0];
+      __DRIdrawable *dPriv = vmesa->driDrawable;
+      struct via_renderbuffer *const vrb = 
+	(struct via_renderbuffer *) dPriv->driverPrivate;
+      GLint i;
+        
+      BEGIN_RING(38);
+      OUT_RING( HC_HEADER2 );             
+
+      OUT_RING( ((HC_ParaType_Palette << 16) | (HC_SubType_Stipple << 24)) );
+      for (i = 31; i >= 0; i--) {
+	 GLint j;
+	 GLuint k = 0;
+
+	 /* Need to flip bits left to right:
+	  */
+	 for (j = 0 ; j < 32; j++)
+	    if (stipple[i] & (1<<j))
+	       k |= 1 << (31-j);
+
+	 OUT_RING( k );     
+      }
+
+      OUT_RING( HC_HEADER2 );                     
+      OUT_RING( (HC_ParaType_NotTex << 16) );
+      OUT_RING( (HC_SubA_HSPXYOS << 24) );
+      OUT_RING( (HC_SubA_HSPXYOS << 24) );
+
+      ADVANCE_RING();
+   }
+#endif
+   
+   vmesa->newEmitState = 0;
+}
+
+
+static INLINE GLuint viaPackColor(GLuint bpp,
+                                  GLubyte r, GLubyte g,
+                                  GLubyte b, GLubyte a)
+{
+    switch (bpp) {
+    case 16:
+        return PACK_COLOR_565(r, g, b);
+    case 32:
+        return PACK_COLOR_8888(a, r, g, b);        
+    default:
+       assert(0);
+       return 0;
+   }
+}
+
+static void viaBlendEquationSeparate(GLcontext *ctx,
+				     GLenum rgbMode, 
+				     GLenum aMode)
+{
+    if (VIA_DEBUG & DEBUG_STATE) 
+       fprintf(stderr, "%s in\n", __FUNCTION__);
+
+    /* GL_EXT_blend_equation_separate not supported */
+    ASSERT(rgbMode == aMode);
+
+    /* Can only do GL_ADD equation in hardware */
+    FALLBACK(VIA_CONTEXT(ctx), VIA_FALLBACK_BLEND_EQ, 
+	     rgbMode != GL_FUNC_ADD_EXT);
+
+    /* BlendEquation sets ColorLogicOpEnabled in an unexpected
+     * manner.
+     */
+    FALLBACK(VIA_CONTEXT(ctx), VIA_FALLBACK_LOGICOP,
+             (ctx->Color.ColorLogicOpEnabled &&
+              ctx->Color.LogicOp != GL_COPY));
+}
+
+static void viaBlendFunc(GLcontext *ctx, GLenum sfactor, GLenum dfactor)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    GLboolean fallback = GL_FALSE;
+    if (VIA_DEBUG & DEBUG_STATE) 
+       fprintf(stderr, "%s in\n", __FUNCTION__);
+
+    switch (ctx->Color.BlendSrcRGB) {
+    case GL_SRC_ALPHA_SATURATE:  
+    case GL_CONSTANT_COLOR:
+    case GL_ONE_MINUS_CONSTANT_COLOR:
+    case GL_CONSTANT_ALPHA:
+    case GL_ONE_MINUS_CONSTANT_ALPHA:
+        fallback = GL_TRUE;
+        break;
+    default:
+        break;
+    }
+
+    switch (ctx->Color.BlendDstRGB) {
+    case GL_CONSTANT_COLOR:
+    case GL_ONE_MINUS_CONSTANT_COLOR:
+    case GL_CONSTANT_ALPHA:
+    case GL_ONE_MINUS_CONSTANT_ALPHA:
+        fallback = GL_TRUE;
+        break;
+    default:
+        break;
+    }
+
+    FALLBACK(vmesa, VIA_FALLBACK_BLEND_FUNC, fallback);
+}
+
+/* Shouldn't be called as the extension is disabled.
+ */
+static void viaBlendFuncSeparate(GLcontext *ctx, GLenum sfactorRGB,
+                                 GLenum dfactorRGB, GLenum sfactorA,
+                                 GLenum dfactorA)
+{
+    if (dfactorRGB != dfactorA || sfactorRGB != sfactorA) {
+        _mesa_error(ctx, GL_INVALID_OPERATION, "glBlendEquation (disabled)");
+    }
+
+    viaBlendFunc(ctx, sfactorRGB, dfactorRGB);
+}
+
+
+
+
+/* =============================================================
+ * Hardware clipping
+ */
+static void viaScissor(GLcontext *ctx, GLint x, GLint y,
+                       GLsizei w, GLsizei h)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    if (!vmesa->driDrawable)
+       return;
+
+    if (VIA_DEBUG & DEBUG_STATE)
+       fprintf(stderr, "%s %d,%d %dx%d, drawH %d\n", __FUNCTION__, 
+	       x,y,w,h, vmesa->driDrawable->h);
+
+    if (vmesa->scissor) {
+        VIA_FLUSH_DMA(vmesa); /* don't pipeline cliprect changes */
+    }
+
+    vmesa->scissorRect.x1 = x;
+    vmesa->scissorRect.y1 = vmesa->driDrawable->h - y - h;
+    vmesa->scissorRect.x2 = x + w;
+    vmesa->scissorRect.y2 = vmesa->driDrawable->h - y;
+}
+
+static void viaEnable(GLcontext *ctx, GLenum cap, GLboolean state)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+   switch (cap) {
+   case GL_SCISSOR_TEST:
+      VIA_FLUSH_DMA(vmesa);
+      vmesa->scissor = state;
+      break;
+   default:
+      break;
+   }
+}
+
+
+
+/* Fallback to swrast for select and feedback.
+ */
+static void viaRenderMode(GLcontext *ctx, GLenum mode)
+{
+    FALLBACK(VIA_CONTEXT(ctx), VIA_FALLBACK_RENDERMODE, (mode != GL_RENDER));
+}
+
+
+static void viaDrawBuffer(GLcontext *ctx, GLenum mode)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+   if (VIA_DEBUG & (DEBUG_DRI|DEBUG_STATE)) 
+      fprintf(stderr, "%s in\n", __FUNCTION__);
+
+   if (!ctx->DrawBuffer)
+      return;
+
+   if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) {
+      FALLBACK(vmesa, VIA_FALLBACK_DRAW_BUFFER, GL_TRUE);
+      return;
+   }
+
+   switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) {
+   case BUFFER_FRONT_LEFT:
+      VIA_FLUSH_DMA(vmesa);
+      vmesa->drawBuffer = &vmesa->front;
+      FALLBACK(vmesa, VIA_FALLBACK_DRAW_BUFFER, GL_FALSE);
+      break;
+   case BUFFER_BACK_LEFT:
+      VIA_FLUSH_DMA(vmesa);
+      vmesa->drawBuffer = &vmesa->back;
+      FALLBACK(vmesa, VIA_FALLBACK_DRAW_BUFFER, GL_FALSE);
+      break;
+   default:
+      FALLBACK(vmesa, VIA_FALLBACK_DRAW_BUFFER, GL_TRUE);
+      return;
+   }
+
+
+   viaXMesaWindowMoved(vmesa);
+}
+
+static void viaClearColor(GLcontext *ctx, const GLfloat color[4])
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    GLubyte pcolor[4];
+    CLAMPED_FLOAT_TO_UBYTE(pcolor[0], color[0]);
+    CLAMPED_FLOAT_TO_UBYTE(pcolor[1], color[1]);
+    CLAMPED_FLOAT_TO_UBYTE(pcolor[2], color[2]);
+    CLAMPED_FLOAT_TO_UBYTE(pcolor[3], color[3]);
+    vmesa->ClearColor = viaPackColor(vmesa->viaScreen->bitsPerPixel,
+                                     pcolor[0], pcolor[1],
+                                     pcolor[2], pcolor[3]);
+}
+
+#define WRITEMASK_ALPHA_SHIFT 31
+#define WRITEMASK_RED_SHIFT   30
+#define WRITEMASK_GREEN_SHIFT 29
+#define WRITEMASK_BLUE_SHIFT  28
+
+static void viaColorMask(GLcontext *ctx,
+			 GLboolean r, GLboolean g,
+			 GLboolean b, GLboolean a)
+{
+   struct via_context *vmesa = VIA_CONTEXT( ctx );
+
+   if (VIA_DEBUG & DEBUG_STATE)
+      fprintf(stderr, "%s r(%d) g(%d) b(%d) a(%d)\n", __FUNCTION__, r, g, b, a);
+
+   vmesa->ClearMask = (((!r) << WRITEMASK_RED_SHIFT) |
+		       ((!g) << WRITEMASK_GREEN_SHIFT) |
+		       ((!b) << WRITEMASK_BLUE_SHIFT) |
+		       ((!a) << WRITEMASK_ALPHA_SHIFT));
+}
+
+
+
+/* This hardware just isn't capable of private back buffers without
+ * glitches and/or a hefty locking scheme.
+ */
+void viaCalcViewport(GLcontext *ctx)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    __DRIdrawable *dPriv = vmesa->driDrawable;
+    struct via_renderbuffer *const vrb = 
+      (struct via_renderbuffer *) dPriv->driverPrivate;
+    const GLfloat *v = ctx->Viewport._WindowMap.m;
+    GLfloat *m = vmesa->ViewportMatrix.m;
+    
+    m[MAT_SX] =   v[MAT_SX];
+    m[MAT_TX] =   v[MAT_TX] + vrb->drawX + SUBPIXEL_X;
+    m[MAT_SY] = - v[MAT_SY];
+    m[MAT_TY] = - v[MAT_TY] + vrb->drawY + SUBPIXEL_Y + vrb->drawH;
+    m[MAT_SZ] =   v[MAT_SZ] * (1.0 / vmesa->depth_max);
+    m[MAT_TZ] =   v[MAT_TZ] * (1.0 / vmesa->depth_max);
+}
+
+static void viaViewport(GLcontext *ctx,
+                        GLint x, GLint y,
+                        GLsizei width, GLsizei height)
+{
+    viaCalcViewport(ctx);
+}
+
+static void viaDepthRange(GLcontext *ctx,
+                          GLclampd nearval, GLclampd farval)
+{
+    viaCalcViewport(ctx);
+}
+
+void viaInitState(GLcontext *ctx)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    vmesa->regCmdB = HC_ACMD_HCmdB;
+    vmesa->regEnable = HC_HenCW_MASK;
+
+   /* Mesa should do this for us:
+    */
+
+   ctx->Driver.BlendEquationSeparate( ctx, 
+				      ctx->Color.BlendEquationRGB,
+				      ctx->Color.BlendEquationA);
+
+   ctx->Driver.BlendFuncSeparate( ctx,
+				  ctx->Color.BlendSrcRGB,
+				  ctx->Color.BlendDstRGB,
+				  ctx->Color.BlendSrcA,
+				  ctx->Color.BlendDstA);
+
+   ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y,
+			ctx->Scissor.Width, ctx->Scissor.Height );
+
+   ctx->Driver.DrawBuffer( ctx, ctx->Color.DrawBuffer[0] );
+}
+
+/**
+ * Convert S and T texture coordinate wrap modes to hardware bits.
+ */
+static uint32_t
+get_wrap_mode( GLenum sWrap, GLenum tWrap )
+{
+    uint32_t v = 0;
+
+
+    switch( sWrap ) {
+    case GL_REPEAT:
+	v |= HC_HTXnMPMD_Srepeat;
+	break;
+    case GL_CLAMP:
+    case GL_CLAMP_TO_EDGE:
+	v |= HC_HTXnMPMD_Sclamp;
+	break;
+    case GL_MIRRORED_REPEAT:
+	v |= HC_HTXnMPMD_Smirror;
+	break;
+    }
+
+    switch( tWrap ) {
+    case GL_REPEAT:
+	v |= HC_HTXnMPMD_Trepeat;
+	break;
+    case GL_CLAMP:
+    case GL_CLAMP_TO_EDGE:
+	v |= HC_HTXnMPMD_Tclamp;
+	break;
+    case GL_MIRRORED_REPEAT:
+	v |= HC_HTXnMPMD_Tmirror;
+	break;
+    }
+    
+    return v;
+}
+
+static uint32_t
+get_minmag_filter( GLenum min, GLenum mag )
+{
+    uint32_t v = 0;
+
+    switch (min) {
+    case GL_NEAREST:
+        v = HC_HTXnFLSs_Nearest |
+            HC_HTXnFLTs_Nearest;
+        break;
+    case GL_LINEAR:
+        v = HC_HTXnFLSs_Linear |
+            HC_HTXnFLTs_Linear;
+        break;
+    case GL_NEAREST_MIPMAP_NEAREST:
+        v = HC_HTXnFLSs_Nearest |
+            HC_HTXnFLTs_Nearest;
+        v |= HC_HTXnFLDs_Nearest;
+        break;
+    case GL_LINEAR_MIPMAP_NEAREST:
+        v = HC_HTXnFLSs_Linear |
+            HC_HTXnFLTs_Linear;
+        v |= HC_HTXnFLDs_Nearest;
+        break;
+    case GL_NEAREST_MIPMAP_LINEAR:
+        v = HC_HTXnFLSs_Nearest |
+            HC_HTXnFLTs_Nearest;
+        v |= HC_HTXnFLDs_Linear;
+        break;
+    case GL_LINEAR_MIPMAP_LINEAR:
+        v = HC_HTXnFLSs_Linear |
+            HC_HTXnFLTs_Linear;
+        v |= HC_HTXnFLDs_Linear;
+        break;
+    default:
+        break;
+    }
+
+    switch (mag) {
+    case GL_LINEAR:
+        v |= HC_HTXnFLSe_Linear |
+             HC_HTXnFLTe_Linear;
+	break;
+    case GL_NEAREST:
+        v |= HC_HTXnFLSe_Nearest |
+             HC_HTXnFLTe_Nearest;
+	break;
+    default:
+        break;
+    }
+
+    return v;
+}
+
+
+static GLboolean viaChooseTextureState(GLcontext *ctx) 
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    struct gl_texture_unit *texUnit0 = &ctx->Texture.Unit[0];
+    struct gl_texture_unit *texUnit1 = &ctx->Texture.Unit[1];
+
+    if (texUnit0->_ReallyEnabled || texUnit1->_ReallyEnabled) {
+        vmesa->regEnable |= HC_HenTXMP_MASK | HC_HenTXCH_MASK | HC_HenTXPP_MASK;
+
+        if (texUnit0->_ReallyEnabled) {
+            struct gl_texture_object *texObj = texUnit0->_Current;
+   
+	    vmesa->regHTXnTB[0] = get_minmag_filter( texObj->MinFilter,
+						    texObj->MagFilter );
+
+	    vmesa->regHTXnMPMD[0] &= ~(HC_HTXnMPMD_SMASK | HC_HTXnMPMD_TMASK);
+	    vmesa->regHTXnMPMD[0] |= get_wrap_mode( texObj->WrapS,
+						   texObj->WrapT );
+
+	    vmesa->regHTXnTB[0] &= ~(HC_HTXnTB_TBC_S | HC_HTXnTB_TBC_T);
+            if (texObj->Image[0][texObj->BaseLevel]->Border > 0) {
+	       vmesa->regHTXnTB[0] |= (HC_HTXnTB_TBC_S | HC_HTXnTB_TBC_T);
+	       vmesa->regHTXnTBC[0] = 
+		  PACK_COLOR_888(FLOAT_TO_UBYTE(texObj->BorderColor.f[0]),
+				 FLOAT_TO_UBYTE(texObj->BorderColor.f[1]),
+				 FLOAT_TO_UBYTE(texObj->BorderColor.f[2]));
+	       vmesa->regHTXnTRAH[0] = FLOAT_TO_UBYTE(texObj->BorderColor.f[3]);
+            }
+
+	    if (texUnit0->LodBias != 0.0f) {
+	       GLuint b = viaComputeLodBias(texUnit0->LodBias);
+	       vmesa->regHTXnTB[0] &= ~HC_HTXnFLDs_MASK;
+	       vmesa->regHTXnTB[0] |= HC_HTXnFLDs_ConstLOD;
+	       vmesa->regHTXnCLOD[0] = (b&0x1f) | (((~b)&0x1f)<<10); /* FIXME */
+	    }
+
+	    if (!viaTexCombineState( vmesa, texUnit0->_CurrentCombine, 0 )) {
+	       if (VIA_DEBUG & DEBUG_TEXTURE)
+		  fprintf(stderr, "viaTexCombineState failed for unit 0\n");
+	       return GL_FALSE;
+	    }
+        }
+
+        if (texUnit1->_ReallyEnabled) {
+            struct gl_texture_object *texObj = texUnit1->_Current;
+
+	    vmesa->regHTXnTB[1] = get_minmag_filter( texObj->MinFilter,
+						    texObj->MagFilter );
+	    vmesa->regHTXnMPMD[1] &= ~(HC_HTXnMPMD_SMASK | HC_HTXnMPMD_TMASK);
+	    vmesa->regHTXnMPMD[1] |= get_wrap_mode( texObj->WrapS,
+						   texObj->WrapT );
+
+	    vmesa->regHTXnTB[1] &= ~(HC_HTXnTB_TBC_S | HC_HTXnTB_TBC_T);
+            if (texObj->Image[0][texObj->BaseLevel]->Border > 0) {
+	       vmesa->regHTXnTB[1] |= (HC_HTXnTB_TBC_S | HC_HTXnTB_TBC_T);
+	       vmesa->regHTXnTBC[1] = 
+		  PACK_COLOR_888(FLOAT_TO_UBYTE(texObj->BorderColor.f[0]),
+				 FLOAT_TO_UBYTE(texObj->BorderColor.f[1]),
+				 FLOAT_TO_UBYTE(texObj->BorderColor.f[2]));
+	       vmesa->regHTXnTRAH[1] = FLOAT_TO_UBYTE(texObj->BorderColor.f[3]);
+            }
+
+
+	    if (texUnit1->LodBias != 0.0f) {
+	       GLuint b = viaComputeLodBias(texUnit1->LodBias);
+	       vmesa->regHTXnTB[1] &= ~HC_HTXnFLDs_MASK;
+	       vmesa->regHTXnTB[1] |= HC_HTXnFLDs_ConstLOD;
+	       vmesa->regHTXnCLOD[1] = (b&0x1f) | (((~b)&0x1f)<<10); /* FIXME */
+	    }
+
+	    if (!viaTexCombineState( vmesa, texUnit1->_CurrentCombine, 1 )) {
+	       if (VIA_DEBUG & DEBUG_TEXTURE)
+		  fprintf(stderr, "viaTexCombineState failed for unit 1\n");
+	       return GL_FALSE;
+	    }
+        }
+    }
+    else {
+        vmesa->regEnable &= ~(HC_HenTXMP_MASK | HC_HenTXCH_MASK | 
+			      HC_HenTXPP_MASK);
+    }
+    
+    return GL_TRUE;
+}
+
+static void viaChooseColorState(GLcontext *ctx) 
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    GLenum s = ctx->Color.BlendSrcRGB;
+    GLenum d = ctx->Color.BlendDstRGB;
+
+    /* The HW's blending equation is:
+     * (Ca * FCa + Cbias + Cb * FCb) << Cshift
+     */
+
+    if (ctx->Color.BlendEnabled) {
+        vmesa->regEnable |= HC_HenABL_MASK;
+        /* Ca  -- always from source color.
+         */
+        vmesa->regHABLCsat = HC_HABLCsat_MASK | HC_HABLCa_OPC | HC_HABLCa_Csrc;
+        /* Aa  -- always from source alpha.
+         */
+        vmesa->regHABLAsat = HC_HABLAsat_MASK | HC_HABLAa_OPA | HC_HABLAa_Asrc;
+        /* FCa -- depend on following condition.
+         * FAa -- depend on following condition.
+         */
+        switch (s) {
+        case GL_ZERO:
+            /* (0, 0, 0, 0)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_OPC | HC_HABLFCa_HABLRCa;
+            vmesa->regHABLAsat |= HC_HABLFAa_OPA | HC_HABLFAa_HABLFRA;
+            vmesa->regHABLRFCa = 0x0;
+            vmesa->regHABLRAa = 0x0;
+            break;
+        case GL_ONE:
+            /* (1, 1, 1, 1)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_InvOPC | HC_HABLFCa_HABLRCa;
+            vmesa->regHABLAsat |= HC_HABLFAa_InvOPA | HC_HABLFAa_HABLFRA;
+            vmesa->regHABLRFCa = 0x0;
+            vmesa->regHABLRAa = 0x0;
+            break;
+        case GL_SRC_COLOR:
+            /* (Rs, Gs, Bs, As)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_OPC | HC_HABLFCa_Csrc;
+            vmesa->regHABLAsat |= HC_HABLFAa_OPA | HC_HABLFAa_Asrc;
+            break;
+        case GL_ONE_MINUS_SRC_COLOR:
+            /* (1, 1, 1, 1) - (Rs, Gs, Bs, As)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_InvOPC | HC_HABLFCa_Csrc;
+            vmesa->regHABLAsat |= HC_HABLFAa_InvOPA | HC_HABLFAa_Asrc;
+            break;
+        case GL_DST_COLOR:
+            /* (Rd, Gd, Bd, Ad)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_OPC | HC_HABLFCa_Cdst;
+            vmesa->regHABLAsat |= HC_HABLFAa_OPA | HC_HABLFAa_Adst;
+            break;
+        case GL_ONE_MINUS_DST_COLOR:
+            /* (1, 1, 1, 1) - (Rd, Gd, Bd, Ad)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_InvOPC | HC_HABLFCa_Cdst;
+            vmesa->regHABLAsat |= HC_HABLFAa_InvOPA | HC_HABLFAa_Adst;
+            break;
+        case GL_SRC_ALPHA:
+            /* (As, As, As, As)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_OPC | HC_HABLFCa_Asrc;
+            vmesa->regHABLAsat |= HC_HABLFAa_OPA | HC_HABLFAa_Asrc;
+            break;
+        case GL_ONE_MINUS_SRC_ALPHA:
+            /* (1, 1, 1, 1) - (As, As, As, As)
+             */
+            vmesa->regHABLCsat |= HC_HABLFCa_InvOPC | HC_HABLFCa_Asrc;
+            vmesa->regHABLAsat |= HC_HABLFAa_InvOPA | HC_HABLFAa_Asrc;
+            break;
+        case GL_DST_ALPHA:
+            {
+                if (vmesa->viaScreen->bitsPerPixel == 16) {
+                    /* (1, 1, 1, 1)
+                     */
+                    vmesa->regHABLCsat |= (HC_HABLFCa_InvOPC | 
+					   HC_HABLFCa_HABLRCa);
+                    vmesa->regHABLAsat |= (HC_HABLFAa_InvOPA | 
+					   HC_HABLFAa_HABLFRA);
+                    vmesa->regHABLRFCa = 0x0;
+                    vmesa->regHABLRAa = 0x0;
+                }
+                else {
+                    /* (Ad, Ad, Ad, Ad)
+                     */
+                    vmesa->regHABLCsat |= HC_HABLFCa_OPC | HC_HABLFCa_Adst;
+                    vmesa->regHABLAsat |= HC_HABLFAa_OPA | HC_HABLFAa_Adst;
+                }
+            }
+            break;
+        case GL_ONE_MINUS_DST_ALPHA:
+            {
+                if (vmesa->viaScreen->bitsPerPixel == 16) {
+                    /* (1, 1, 1, 1) - (1, 1, 1, 1) = (0, 0, 0, 0)
+                     */
+                    vmesa->regHABLCsat |= HC_HABLFCa_OPC | HC_HABLFCa_HABLRCa;
+                    vmesa->regHABLAsat |= HC_HABLFAa_OPA | HC_HABLFAa_HABLFRA;
+                    vmesa->regHABLRFCa = 0x0;
+                    vmesa->regHABLRAa = 0x0;
+                }
+                else {
+                    /* (1, 1, 1, 1) - (Ad, Ad, Ad, Ad)
+                     */
+                    vmesa->regHABLCsat |= HC_HABLFCa_InvOPC | HC_HABLFCa_Adst;
+                    vmesa->regHABLAsat |= HC_HABLFAa_InvOPA | HC_HABLFAa_Adst;
+                }
+            }
+            break;
+        case GL_SRC_ALPHA_SATURATE:
+            {
+                if (vmesa->viaScreen->bitsPerPixel == 16) {
+                    /* (f, f, f, 1), f = min(As, 1 - Ad) = min(As, 1 - 1) = 0
+                     * So (f, f, f, 1) = (0, 0, 0, 1)
+                     */
+                    vmesa->regHABLCsat |= HC_HABLFCa_OPC | HC_HABLFCa_HABLRCa;
+                    vmesa->regHABLAsat |= (HC_HABLFAa_InvOPA | 
+					   HC_HABLFAa_HABLFRA);
+                    vmesa->regHABLRFCa = 0x0;
+                    vmesa->regHABLRAa = 0x0;
+                }
+                else {
+                    /* (f, f, f, 1), f = min(As, 1 - Ad)
+                     */
+                    vmesa->regHABLCsat |= (HC_HABLFCa_OPC | 
+					   HC_HABLFCa_mimAsrcInvAdst);
+                    vmesa->regHABLAsat |= (HC_HABLFAa_InvOPA | 
+					   HC_HABLFAa_HABLFRA);
+                    vmesa->regHABLRFCa = 0x0;
+                    vmesa->regHABLRAa = 0x0;
+                }
+            }
+            break;
+        }
+
+        /* Op is add.
+         */
+
+        /* bias is 0.
+         */
+        vmesa->regHABLCsat |= HC_HABLCbias_HABLRCbias;
+        vmesa->regHABLAsat |= HC_HABLAbias_HABLRAbias;
+
+        /* Cb  -- always from destination color.
+         */
+        vmesa->regHABLCop = HC_HABLCb_OPC | HC_HABLCb_Cdst;
+        /* Ab  -- always from destination alpha.
+         */
+        vmesa->regHABLAop = HC_HABLAb_OPA | HC_HABLAb_Adst;
+        /* FCb -- depend on following condition.
+         */
+        switch (d) {
+        case GL_ZERO:
+            /* (0, 0, 0, 0)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_OPC | HC_HABLFCb_HABLRCb;
+            vmesa->regHABLAop |= HC_HABLFAb_OPA | HC_HABLFAb_HABLFRA;
+            vmesa->regHABLRFCb = 0x0;
+            vmesa->regHABLRAb = 0x0;
+            break;
+        case GL_ONE:
+            /* (1, 1, 1, 1)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_InvOPC | HC_HABLFCb_HABLRCb;
+            vmesa->regHABLAop |= HC_HABLFAb_InvOPA | HC_HABLFAb_HABLFRA;
+            vmesa->regHABLRFCb = 0x0;
+            vmesa->regHABLRAb = 0x0;
+            break;
+        case GL_SRC_COLOR:
+            /* (Rs, Gs, Bs, As)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_OPC | HC_HABLFCb_Csrc;
+            vmesa->regHABLAop |= HC_HABLFAb_OPA | HC_HABLFAb_Asrc;
+            break;
+        case GL_ONE_MINUS_SRC_COLOR:
+            /* (1, 1, 1, 1) - (Rs, Gs, Bs, As)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_InvOPC | HC_HABLFCb_Csrc;
+            vmesa->regHABLAop |= HC_HABLFAb_InvOPA | HC_HABLFAb_Asrc;
+            break;
+        case GL_DST_COLOR:
+            /* (Rd, Gd, Bd, Ad)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_OPC | HC_HABLFCb_Cdst;
+            vmesa->regHABLAop |= HC_HABLFAb_OPA | HC_HABLFAb_Adst;
+            break;
+        case GL_ONE_MINUS_DST_COLOR:
+            /* (1, 1, 1, 1) - (Rd, Gd, Bd, Ad)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_InvOPC | HC_HABLFCb_Cdst;
+            vmesa->regHABLAop |= HC_HABLFAb_InvOPA | HC_HABLFAb_Adst;
+            break;
+        case GL_SRC_ALPHA:
+            /* (As, As, As, As)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_OPC | HC_HABLFCb_Asrc;
+            vmesa->regHABLAop |= HC_HABLFAb_OPA | HC_HABLFAb_Asrc;
+            break;
+        case GL_ONE_MINUS_SRC_ALPHA:
+            /* (1, 1, 1, 1) - (As, As, As, As)
+             */
+            vmesa->regHABLCop |= HC_HABLFCb_InvOPC | HC_HABLFCb_Asrc;
+            vmesa->regHABLAop |= HC_HABLFAb_InvOPA | HC_HABLFAb_Asrc;
+            break;
+        case GL_DST_ALPHA:
+            {
+                if (vmesa->viaScreen->bitsPerPixel == 16) {
+                    /* (1, 1, 1, 1)
+                     */
+                    vmesa->regHABLCop |= HC_HABLFCb_InvOPC | HC_HABLFCb_HABLRCb;
+                    vmesa->regHABLAop |= HC_HABLFAb_InvOPA | HC_HABLFAb_HABLFRA;
+                    vmesa->regHABLRFCb = 0x0;
+                    vmesa->regHABLRAb = 0x0;
+                }
+                else {
+                    /* (Ad, Ad, Ad, Ad)
+                     */
+                    vmesa->regHABLCop |= HC_HABLFCb_OPC | HC_HABLFCb_Adst;
+                    vmesa->regHABLAop |= HC_HABLFAb_OPA | HC_HABLFAb_Adst;
+                }
+            }
+            break;
+        case GL_ONE_MINUS_DST_ALPHA:
+            {
+                if (vmesa->viaScreen->bitsPerPixel == 16) {
+                    /* (1, 1, 1, 1) - (1, 1, 1, 1) = (0, 0, 0, 0)
+                     */
+                    vmesa->regHABLCop |= HC_HABLFCb_OPC | HC_HABLFCb_HABLRCb;
+                    vmesa->regHABLAop |= HC_HABLFAb_OPA | HC_HABLFAb_HABLFRA;
+                    vmesa->regHABLRFCb = 0x0;
+                    vmesa->regHABLRAb = 0x0;
+                }
+                else {
+                    /* (1, 1, 1, 1) - (Ad, Ad, Ad, Ad)
+                     */
+                    vmesa->regHABLCop |= HC_HABLFCb_InvOPC | HC_HABLFCb_Adst;
+                    vmesa->regHABLAop |= HC_HABLFAb_InvOPA | HC_HABLFAb_Adst;
+                }
+            }
+            break;
+        default:
+            vmesa->regHABLCop |= HC_HABLFCb_OPC | HC_HABLFCb_HABLRCb;
+            vmesa->regHABLAop |= HC_HABLFAb_OPA | HC_HABLFAb_HABLFRA;
+            vmesa->regHABLRFCb = 0x0;
+            vmesa->regHABLRAb = 0x0;
+            break;
+        }
+
+        if (vmesa->viaScreen->bitsPerPixel <= 16)
+            vmesa->regEnable &= ~HC_HenDT_MASK;
+
+    }
+    else {
+        vmesa->regEnable &= (~HC_HenABL_MASK);
+    }
+
+    if (ctx->Color.AlphaEnabled) {
+        vmesa->regEnable |= HC_HenAT_MASK;
+        vmesa->regHATMD = FLOAT_TO_UBYTE(ctx->Color.AlphaRef) |
+            ((ctx->Color.AlphaFunc - GL_NEVER) << 8);
+    }
+    else {
+        vmesa->regEnable &= (~HC_HenAT_MASK);
+    }
+
+    if (ctx->Color.DitherFlag && (vmesa->viaScreen->bitsPerPixel < 32)) {
+        if (ctx->Color.BlendEnabled) {
+            vmesa->regEnable &= ~HC_HenDT_MASK;
+        }
+        else {
+            vmesa->regEnable |= HC_HenDT_MASK;
+        }
+    }
+
+
+    vmesa->regEnable &= ~HC_HenDT_MASK;
+
+    if (ctx->Color.ColorLogicOpEnabled) 
+        vmesa->regHROP = ROP[ctx->Color.LogicOp & 0xF];
+    else
+        vmesa->regHROP = HC_HROP_P;
+
+    vmesa->regHFBBMSKL = PACK_COLOR_888(ctx->Color.ColorMask[0][0],
+					ctx->Color.ColorMask[0][1],
+					ctx->Color.ColorMask[0][2]);
+    vmesa->regHROP |= ctx->Color.ColorMask[0][3];
+
+    if (ctx->Color.ColorMask[0][3])
+        vmesa->regEnable |= HC_HenAW_MASK;
+    else
+        vmesa->regEnable &= ~HC_HenAW_MASK;
+}
+
+static void viaChooseFogState(GLcontext *ctx) 
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    if (ctx->Fog.Enabled) {
+        GLubyte r, g, b, a;
+
+        vmesa->regEnable |= HC_HenFOG_MASK;
+
+        /* Use fog equation 0 (OpenGL's default) & local fog.
+         */
+        vmesa->regHFogLF = 0x0;
+
+        r = (GLubyte)(ctx->Fog.Color[0] * 255.0F);
+        g = (GLubyte)(ctx->Fog.Color[1] * 255.0F);
+        b = (GLubyte)(ctx->Fog.Color[2] * 255.0F);
+        a = (GLubyte)(ctx->Fog.Color[3] * 255.0F);
+        vmesa->regHFogCL = (r << 16) | (g << 8) | b;
+        vmesa->regHFogCH = a;
+    }
+    else {
+        vmesa->regEnable &= ~HC_HenFOG_MASK;
+    }
+}
+
+static void viaChooseDepthState(GLcontext *ctx) 
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    if (ctx->Depth.Test) {
+        vmesa->regEnable |= HC_HenZT_MASK;
+        if (ctx->Depth.Mask)
+            vmesa->regEnable |= HC_HenZW_MASK;
+        else
+            vmesa->regEnable &= (~HC_HenZW_MASK);
+	vmesa->regHZWTMD = (ctx->Depth.Func - GL_NEVER) << 16;
+	
+    }
+    else {
+        vmesa->regEnable &= ~HC_HenZT_MASK;
+        
+        /*=* [DBG] racer : can't display cars in car selection menu *=*/
+	/*if (ctx->Depth.Mask)
+            vmesa->regEnable |= HC_HenZW_MASK;
+        else
+            vmesa->regEnable &= (~HC_HenZW_MASK);*/
+	vmesa->regEnable &= (~HC_HenZW_MASK);
+    }
+}
+
+static void viaChooseLineState(GLcontext *ctx) 
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    if (ctx->Line.StippleFlag) {
+        vmesa->regEnable |= HC_HenLP_MASK;
+        vmesa->regHLP = ctx->Line.StipplePattern;
+        vmesa->regHLPRF = ctx->Line.StippleFactor;
+    }
+    else {
+        vmesa->regEnable &= ~HC_HenLP_MASK;
+    }
+}
+
+static void viaChoosePolygonState(GLcontext *ctx) 
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+#if 0
+    /* Polygon stipple is broken - see via_state.c
+     */
+    if (ctx->Polygon.StippleFlag) {
+        vmesa->regEnable |= HC_HenSP_MASK;
+    }
+    else {
+        vmesa->regEnable &= ~HC_HenSP_MASK;
+    }
+#else
+    FALLBACK(vmesa, VIA_FALLBACK_POLY_STIPPLE, 
+	     ctx->Polygon.StippleFlag);
+#endif
+
+    if (ctx->Polygon.CullFlag) {
+        vmesa->regEnable |= HC_HenFBCull_MASK;
+    }
+    else {
+        vmesa->regEnable &= ~HC_HenFBCull_MASK;
+    }
+}
+
+static void viaChooseStencilState(GLcontext *ctx) 
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    
+    if (ctx->Stencil._Enabled) {
+        GLuint temp;
+
+        vmesa->regEnable |= HC_HenST_MASK;
+        temp = (ctx->Stencil.Ref[0] & 0xFF) << HC_HSTREF_SHIFT;
+        temp |= 0xFF << HC_HSTOPMSK_SHIFT;
+        temp |= (ctx->Stencil.ValueMask[0] & 0xFF);
+        vmesa->regHSTREF = temp;
+
+        temp = (ctx->Stencil.Function[0] - GL_NEVER) << 16;
+
+        switch (ctx->Stencil.FailFunc[0]) {
+        case GL_KEEP:
+            temp |= HC_HSTOPSF_KEEP;
+            break;
+        case GL_ZERO:
+            temp |= HC_HSTOPSF_ZERO;
+            break;
+        case GL_REPLACE:
+            temp |= HC_HSTOPSF_REPLACE;
+            break;
+        case GL_INVERT:
+            temp |= HC_HSTOPSF_INVERT;
+            break;
+        case GL_INCR:
+            temp |= HC_HSTOPSF_INCR;
+            break;
+        case GL_DECR:
+            temp |= HC_HSTOPSF_DECR;
+            break;
+        }
+
+        switch (ctx->Stencil.ZFailFunc[0]) {
+        case GL_KEEP:
+            temp |= HC_HSTOPSPZF_KEEP;
+            break;
+        case GL_ZERO:
+            temp |= HC_HSTOPSPZF_ZERO;
+            break;
+        case GL_REPLACE:
+            temp |= HC_HSTOPSPZF_REPLACE;
+            break;
+        case GL_INVERT:
+            temp |= HC_HSTOPSPZF_INVERT;
+            break;
+        case GL_INCR:
+            temp |= HC_HSTOPSPZF_INCR;
+            break;
+        case GL_DECR:
+            temp |= HC_HSTOPSPZF_DECR;
+            break;
+        }
+
+        switch (ctx->Stencil.ZPassFunc[0]) {
+        case GL_KEEP:
+            temp |= HC_HSTOPSPZP_KEEP;
+            break;
+        case GL_ZERO:
+            temp |= HC_HSTOPSPZP_ZERO;
+            break;
+        case GL_REPLACE:
+            temp |= HC_HSTOPSPZP_REPLACE;
+            break;
+        case GL_INVERT:
+            temp |= HC_HSTOPSPZP_INVERT;
+            break;
+        case GL_INCR:
+            temp |= HC_HSTOPSPZP_INCR;
+            break;
+        case GL_DECR:
+            temp |= HC_HSTOPSPZP_DECR;
+            break;
+        }
+        vmesa->regHSTMD = temp;
+    }
+    else {
+        vmesa->regEnable &= ~HC_HenST_MASK;
+    }
+}
+
+
+
+static void viaChooseTriangle(GLcontext *ctx) 
+{       
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    if (ctx->Polygon.CullFlag == GL_TRUE) {
+        switch (ctx->Polygon.CullFaceMode) {
+        case GL_FRONT:
+            if (ctx->Polygon.FrontFace == GL_CCW)
+                vmesa->regCmdB |= HC_HBFace_MASK;
+            else
+                vmesa->regCmdB &= ~HC_HBFace_MASK;
+            break;
+        case GL_BACK:
+            if (ctx->Polygon.FrontFace == GL_CW)
+                vmesa->regCmdB |= HC_HBFace_MASK;
+            else
+                vmesa->regCmdB &= ~HC_HBFace_MASK;
+            break;
+        case GL_FRONT_AND_BACK:
+            return;
+        }
+    }
+}
+
+void viaValidateState( GLcontext *ctx )
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    if (vmesa->newState & _NEW_TEXTURE) {
+       GLboolean ok = (viaChooseTextureState(ctx) &&
+		       viaUpdateTextureState(ctx));
+
+       FALLBACK(vmesa, VIA_FALLBACK_TEXTURE, !ok);
+    }
+
+    if (vmesa->newState & _NEW_COLOR)
+        viaChooseColorState(ctx);
+
+    if (vmesa->newState & _NEW_DEPTH)
+        viaChooseDepthState(ctx);
+
+    if (vmesa->newState & _NEW_FOG)
+        viaChooseFogState(ctx);
+
+    if (vmesa->newState & _NEW_LINE)
+        viaChooseLineState(ctx);
+
+    if (vmesa->newState & (_NEW_POLYGON | _NEW_POLYGONSTIPPLE)) {
+        viaChoosePolygonState(ctx);
+	viaChooseTriangle(ctx);
+    }
+
+    if ((vmesa->newState & _NEW_STENCIL) && vmesa->have_hw_stencil)
+        viaChooseStencilState(ctx);
+    
+    if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR)
+        vmesa->regEnable |= HC_HenCS_MASK;
+    else
+        vmesa->regEnable &= ~HC_HenCS_MASK;
+
+    if (ctx->Point.SmoothFlag ||
+	ctx->Line.SmoothFlag ||
+	ctx->Polygon.SmoothFlag)
+        vmesa->regEnable |= HC_HenAA_MASK;
+    else 
+        vmesa->regEnable &= ~HC_HenAA_MASK;
+
+    vmesa->newEmitState |= vmesa->newState;
+    vmesa->newState = 0;
+}
+
+static void viaInvalidateState(GLcontext *ctx, GLuint newState)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+    VIA_FINISH_PRIM( vmesa );
+    vmesa->newState |= newState;
+
+    _swrast_InvalidateState(ctx, newState);
+    _swsetup_InvalidateState(ctx, newState);
+    _vbo_InvalidateState(ctx, newState);
+    _tnl_InvalidateState(ctx, newState);
+}
+
+void viaInitStateFuncs(GLcontext *ctx)
+{
+    /* Callbacks for internal Mesa events.
+     */
+    ctx->Driver.UpdateState = viaInvalidateState;
+
+    /* API callbacks
+     */
+    ctx->Driver.BlendEquationSeparate = viaBlendEquationSeparate;
+    ctx->Driver.BlendFuncSeparate = viaBlendFuncSeparate;
+    ctx->Driver.ClearColor = viaClearColor;
+    ctx->Driver.ColorMask = viaColorMask;
+    ctx->Driver.DrawBuffer = viaDrawBuffer;
+    ctx->Driver.RenderMode = viaRenderMode;
+    ctx->Driver.Scissor = viaScissor;
+    ctx->Driver.DepthRange = viaDepthRange;
+    ctx->Driver.Viewport = viaViewport;
+    ctx->Driver.Enable = viaEnable;
+
+    /* XXX this should go away */
+    ctx->Driver.ResizeBuffers = viaReAllocateBuffers;
+}
diff --git a/src/mesa/drivers/dri/unichrome/via_state.h b/src/mesa/drivers/dri/unichrome/via_state.h
new file mode 100644
index 0000000000..065ec57d33
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_state.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VIA_STATE_H
+#define _VIA_STATE_H
+
+#include "via_context.h"
+
+extern void viaInitState(GLcontext *ctx);
+extern void viaInitStateFuncs(GLcontext *ctx);
+extern void viaCalcViewport(GLcontext *ctx);
+extern void viaValidateState(GLcontext *ctx);
+extern void viaEmitState(struct via_context *vmesa);
+
+extern void viaFallback(struct via_context *vmesa, GLuint bit, GLboolean mode);
+#define FALLBACK(vmesa, bit, mode) viaFallback(vmesa, bit, mode)
+
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_tex.c b/src/mesa/drivers/dri/unichrome/via_tex.c
new file mode 100644
index 0000000000..a64f093326
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_tex.c
@@ -0,0 +1,965 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "main/enums.h"
+#include "main/formats.h"
+#include "main/colortab.h"
+#include "main/convolve.h"
+#include "main/context.h"
+#include "main/mipmap.h"
+#include "main/mm.h"
+#include "main/simple_list.h"
+#include "main/texobj.h"
+#include "main/texstore.h"
+
+#include "via_context.h"
+#include "via_fb.h"
+#include "via_tex.h"
+#include "via_ioctl.h"
+#include "via_3d_reg.h"
+
+static gl_format
+viaChooseTexFormat( GLcontext *ctx, GLint internalFormat,
+		    GLenum format, GLenum type )
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   const GLboolean do32bpt = ( vmesa->viaScreen->bitsPerPixel == 32
+/* 			       && vmesa->viaScreen->textureSize > 4*1024*1024 */
+      );
+
+
+   switch ( internalFormat ) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if ( format == GL_BGRA ) {
+	 if ( type == GL_UNSIGNED_INT_8_8_8_8_REV ||
+	      type == GL_UNSIGNED_BYTE ) {
+	    return MESA_FORMAT_ARGB8888;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_4_4_4_4_REV ) {
+            return MESA_FORMAT_ARGB4444;
+	 }
+         else if ( type == GL_UNSIGNED_SHORT_1_5_5_5_REV ) {
+	    return MESA_FORMAT_ARGB1555;
+	 }
+      }
+      else if ( type == GL_UNSIGNED_BYTE ||
+		type == GL_UNSIGNED_INT_8_8_8_8_REV ||
+		type == GL_UNSIGNED_INT_8_8_8_8 ) {
+	 return MESA_FORMAT_ARGB8888;
+      }
+      return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) {
+	 return MESA_FORMAT_RGB565;
+      }
+      else if ( type == GL_UNSIGNED_BYTE ) {
+	 return MESA_FORMAT_ARGB8888;
+      }
+      return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_RGB565;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return MESA_FORMAT_ARGB8888;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return MESA_FORMAT_ARGB4444;
+
+   case GL_RGB5_A1:
+      return MESA_FORMAT_ARGB1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return MESA_FORMAT_ARGB8888;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return MESA_FORMAT_RGB565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return MESA_FORMAT_A8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return MESA_FORMAT_L8;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return MESA_FORMAT_AL88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return MESA_FORMAT_I8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_MESA ||
+	  type == GL_UNSIGNED_BYTE)
+         return MESA_FORMAT_YCBCR;
+      else
+         return MESA_FORMAT_YCBCR_REV;
+
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+      return MESA_FORMAT_RGB_FXT1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+      return MESA_FORMAT_RGBA_FXT1;
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+      return MESA_FORMAT_RGB_DXT1;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      return MESA_FORMAT_RGBA_DXT1;
+
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      return MESA_FORMAT_RGBA_DXT3;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return MESA_FORMAT_RGBA_DXT5;
+
+   case GL_COLOR_INDEX:	
+   case GL_COLOR_INDEX1_EXT:	
+   case GL_COLOR_INDEX2_EXT:	
+   case GL_COLOR_INDEX4_EXT:	
+   case GL_COLOR_INDEX8_EXT:	
+   case GL_COLOR_INDEX12_EXT:	    
+   case GL_COLOR_INDEX16_EXT:
+      return MESA_FORMAT_CI8;
+
+   default:
+      fprintf(stderr, "unexpected texture format %s in %s\n", 
+	      _mesa_lookup_enum_by_nr(internalFormat),
+	      __FUNCTION__);
+      return MESA_FORMAT_NONE;
+   }
+
+   return MESA_FORMAT_NONE; /* never get here */
+}
+
+static int logbase2(int n)
+{
+   GLint i = 1;
+   GLint log2 = 0;
+
+   while (n > i) {
+      i *= 2;
+      log2++;
+   }
+
+   return log2;
+}
+
+static const char *get_memtype_name( GLint memType )
+{
+   static const char *names[] = {
+      "VIA_MEM_VIDEO",
+      "VIA_MEM_AGP",
+      "VIA_MEM_SYSTEM",
+      "VIA_MEM_MIXED",
+      "VIA_MEM_UNKNOWN"
+   };
+
+   return names[memType];
+}
+
+
+static GLboolean viaMoveTexBuffers( struct via_context *vmesa,
+				    struct via_tex_buffer **buffers,
+				    GLuint nr,
+				    GLint newMemType )
+{
+   struct via_tex_buffer *newTexBuf[VIA_MAX_TEXLEVELS];
+   GLint i;
+
+   if (VIA_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s to %s\n",
+	      __FUNCTION__,
+	      get_memtype_name(newMemType));
+
+   memset(newTexBuf, 0, sizeof(newTexBuf));
+
+   /* First do all the allocations (or fail):
+    */ 
+   for (i = 0; i < nr; i++) {    
+      if (buffers[i]->memType != newMemType) {	 
+
+	 /* Don't allow uploads in a thrash state.  Should try and
+	  * catch this earlier.
+	  */
+	 if (vmesa->thrashing && newMemType != VIA_MEM_SYSTEM)
+	    goto cleanup;
+
+	 newTexBuf[i] = via_alloc_texture(vmesa, 
+					  buffers[i]->size,
+					  newMemType);
+	 if (!newTexBuf[i]) 
+	    goto cleanup;
+      }
+   }
+
+
+   /* Now copy all the image data and free the old texture memory.
+    */
+   for (i = 0; i < nr; i++) {    
+      if (newTexBuf[i]) {
+	 memcpy(newTexBuf[i]->bufAddr,
+		buffers[i]->bufAddr, 
+		buffers[i]->size);
+
+	 newTexBuf[i]->image = buffers[i]->image;
+	 newTexBuf[i]->image->texMem = newTexBuf[i];
+	 newTexBuf[i]->image->image.Data = newTexBuf[i]->bufAddr;
+	 via_free_texture(vmesa, buffers[i]);
+      }
+   }
+
+   if (VIA_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s - success\n", __FUNCTION__);
+
+   return GL_TRUE;
+
+ cleanup:
+   /* Release any allocations made prior to failure:
+    */
+   if (VIA_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s - failed\n", __FUNCTION__);
+
+   for (i = 0; i < nr; i++) {    
+      if (newTexBuf[i]) {
+	 via_free_texture(vmesa, newTexBuf[i]);
+      }
+   }
+   
+   return GL_FALSE;   
+}
+
+
+static GLboolean viaMoveTexObject( struct via_context *vmesa,
+				   struct via_texture_object *viaObj,
+				   GLint newMemType )
+{   
+   struct via_texture_image **viaImage = 
+      (struct via_texture_image **)&viaObj->obj.Image[0][0];
+   struct via_tex_buffer *buffers[VIA_MAX_TEXLEVELS];
+   GLuint i, nr = 0;
+
+   for (i = viaObj->firstLevel; i <= viaObj->lastLevel; i++)
+      buffers[nr++] = viaImage[i]->texMem;
+
+   if (viaMoveTexBuffers( vmesa, &buffers[0], nr, newMemType )) {
+      viaObj->memType = newMemType;
+      return GL_TRUE;
+   }
+
+   return GL_FALSE;
+}
+
+
+
+static GLboolean viaSwapInTexObject( struct via_context *vmesa,
+				     struct via_texture_object *viaObj )
+{
+   const struct via_texture_image *baseImage = 
+      (struct via_texture_image *)viaObj->obj.Image[0][viaObj->obj.BaseLevel]; 
+
+   if (VIA_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (baseImage->texMem->memType != VIA_MEM_SYSTEM) 
+      return viaMoveTexObject( vmesa, viaObj, baseImage->texMem->memType );
+
+   return (viaMoveTexObject( vmesa, viaObj, VIA_MEM_AGP ) ||
+	   viaMoveTexObject( vmesa, viaObj, VIA_MEM_VIDEO ));
+}
+
+
+/* This seems crude, but it asks a fairly pertinent question and gives
+ * an accurate answer:
+ */
+static GLboolean viaIsTexMemLow( struct via_context *vmesa,
+				 GLuint heap )
+{
+   struct via_tex_buffer *buf =  via_alloc_texture(vmesa, 512 * 1024, heap );
+   if (!buf)
+      return GL_TRUE;
+   
+   via_free_texture(vmesa, buf);
+   return GL_FALSE;
+}
+
+
+/* Speculatively move texture images which haven't been used in a
+ * while back to system memory. 
+ * 
+ * TODO: only do this when texture memory is low.
+ * 
+ * TODO: use dma.
+ *
+ * TODO: keep the fb/agp version hanging around and use the local
+ * version as backing store, so re-upload might be avoided.
+ *
+ * TODO: do this properly in the kernel...
+ */
+GLboolean viaSwapOutWork( struct via_context *vmesa )
+{
+   struct via_tex_buffer *s, *tmp;
+   GLuint done = 0;
+   GLuint heap, target;
+
+   if (VIA_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s VID %d AGP %d SYS %d\n", __FUNCTION__,
+	      vmesa->total_alloc[VIA_MEM_VIDEO],
+	      vmesa->total_alloc[VIA_MEM_AGP],
+	      vmesa->total_alloc[VIA_MEM_SYSTEM]);
+
+   
+   for (heap = VIA_MEM_VIDEO; heap <= VIA_MEM_AGP; heap++) {
+      GLuint nr = 0, sz = 0;
+
+      if (vmesa->thrashing) {
+ 	 if (VIA_DEBUG & DEBUG_TEXTURE)
+	    fprintf(stderr, "Heap %d: trash flag\n", heap);
+	 target = 1*1024*1024;
+      }
+      else if (viaIsTexMemLow(vmesa, heap)) {
+ 	 if (VIA_DEBUG & DEBUG_TEXTURE)
+	    fprintf(stderr, "Heap %d: low memory\n", heap);
+	 target = 64*1024;
+      }
+      else {
+ 	 if (VIA_DEBUG & DEBUG_TEXTURE)
+	    fprintf(stderr, "Heap %d: nothing to do\n", heap);
+	 continue;
+      }
+
+      foreach_s( s, tmp, &vmesa->tex_image_list[heap] ) {
+	 if (s->lastUsed < vmesa->lastSwap[1]) {
+	    struct via_texture_object *viaObj = 
+	       (struct via_texture_object *) s->image->image.TexObject;
+
+	    if (VIA_DEBUG & DEBUG_TEXTURE)
+	       fprintf(stderr, 
+		       "back copy tex sz %d, lastUsed %d lastSwap %d\n", 
+		       s->size, s->lastUsed, vmesa->lastSwap[1]);
+
+	    if (viaMoveTexBuffers( vmesa, &s, 1, VIA_MEM_SYSTEM )) {
+	       viaObj->memType = VIA_MEM_MIXED;
+	       done += s->size;
+	    }
+	    else {
+	       if (VIA_DEBUG & DEBUG_TEXTURE)
+		  fprintf(stderr, "Failed to back copy texture!\n");
+	       sz += s->size;
+	    }
+	 }
+	 else {
+	    nr ++;
+	    sz += s->size;
+	 }
+
+	 if (done > target) {
+	    vmesa->thrashing = GL_FALSE; /* might not get set otherwise? */
+	    return GL_TRUE;
+	 }
+      }
+
+      assert(sz == vmesa->total_alloc[heap]);
+	 
+      if (VIA_DEBUG & DEBUG_TEXTURE)
+	 fprintf(stderr, "Heap %d: nr %d tot sz %d\n", heap, nr, sz);
+   }
+
+   
+   return done != 0;
+}
+
+
+
+/* Basically, just collect the image dimensions and addresses for each
+ * image and update the texture object state accordingly.
+ */
+static GLboolean viaSetTexImages(GLcontext *ctx,
+				 struct gl_texture_object *texObj)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   struct via_texture_object *viaObj = (struct via_texture_object *)texObj;
+   const struct via_texture_image *baseImage = 
+      (struct via_texture_image *)texObj->Image[0][texObj->BaseLevel];
+   GLint firstLevel, lastLevel, numLevels;
+   GLuint texFormat;
+   GLint w, h, p;
+   GLint i, j = 0, k = 0, l = 0, m = 0;
+   GLuint texBase;
+   GLuint basH = 0;
+   GLuint widthExp = 0;
+   GLuint heightExp = 0;    
+
+   switch (baseImage->image.TexFormat) {
+   case MESA_FORMAT_ARGB8888:
+      texFormat = HC_HTXnFM_ARGB8888;
+      break;
+   case MESA_FORMAT_ARGB4444:
+      texFormat = HC_HTXnFM_ARGB4444; 
+      break;
+   case MESA_FORMAT_RGB565:
+      texFormat = HC_HTXnFM_RGB565;   
+      break;
+   case MESA_FORMAT_ARGB1555:
+      texFormat = HC_HTXnFM_ARGB1555;   
+      break;
+   case MESA_FORMAT_RGB888:
+      texFormat = HC_HTXnFM_ARGB0888;
+      break;
+   case MESA_FORMAT_L8:
+      texFormat = HC_HTXnFM_L8;       
+      break;
+   case MESA_FORMAT_I8:
+      texFormat = HC_HTXnFM_T8;       
+      break;
+   case MESA_FORMAT_CI8:
+      texFormat = HC_HTXnFM_Index8;   
+      break;
+   case MESA_FORMAT_AL88:
+      texFormat = HC_HTXnFM_AL88;     
+      break;
+   case MESA_FORMAT_A8:
+      texFormat = HC_HTXnFM_A8;     
+      break;
+   default:
+      _mesa_problem(vmesa->glCtx, "Bad texture format in viaSetTexImages");
+      return GL_FALSE;
+   }
+
+   /* Compute which mipmap levels we really want to send to the hardware.
+    * This depends on the base image size, GL_TEXTURE_MIN_LOD,
+    * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL.
+    * Yes, this looks overly complicated, but it's all needed.
+    */
+   if (texObj->MinFilter == GL_LINEAR || texObj->MinFilter == GL_NEAREST) {
+      firstLevel = lastLevel = texObj->BaseLevel;
+   }
+   else {
+      firstLevel = texObj->BaseLevel + (GLint)(texObj->MinLod + 0.5);
+      firstLevel = MAX2(firstLevel, texObj->BaseLevel);
+      lastLevel = texObj->BaseLevel + (GLint)(texObj->MaxLod + 0.5);
+      lastLevel = MAX2(lastLevel, texObj->BaseLevel);
+      lastLevel = MIN2(lastLevel, texObj->BaseLevel + baseImage->image.MaxLog2);
+      lastLevel = MIN2(lastLevel, texObj->MaxLevel);
+      lastLevel = MAX2(firstLevel, lastLevel);     /* need at least one level */
+   }
+
+   numLevels = lastLevel - firstLevel + 1;
+
+   /* The hardware supports only 10 mipmap levels; ignore higher levels.
+    */
+   if ((numLevels > 10) && (ctx->Const.MaxTextureLevels > 10)) {
+       lastLevel -= numLevels - 10;
+       numLevels = 10;
+   }
+
+   /* save these values, check if they effect the residency of the
+    * texture:
+    */
+   if (viaObj->firstLevel != firstLevel ||
+       viaObj->lastLevel != lastLevel) {
+      viaObj->firstLevel = firstLevel;
+      viaObj->lastLevel = lastLevel;
+      viaObj->memType = VIA_MEM_MIXED;
+   }
+
+   if (VIA_DEBUG & DEBUG_TEXTURE & 0)
+      fprintf(stderr, "%s, current memType: %s\n",
+	      __FUNCTION__,
+	      get_memtype_name(viaObj->memType));
+
+   
+   if (viaObj->memType == VIA_MEM_MIXED ||
+       viaObj->memType == VIA_MEM_SYSTEM) {
+      if (!viaSwapInTexObject(vmesa, viaObj)) {
+ 	 if (VIA_DEBUG & DEBUG_TEXTURE) 
+	    if (!vmesa->thrashing)
+	       fprintf(stderr, "Thrashing flag set for frame %d\n", 
+		       vmesa->swap_count);
+	 vmesa->thrashing = GL_TRUE;
+	 return GL_FALSE;
+      }
+   }
+
+   if (viaObj->memType == VIA_MEM_AGP)
+      viaObj->regTexFM = (HC_SubA_HTXnFM << 24) | HC_HTXnLoc_AGP | texFormat;
+   else
+      viaObj->regTexFM = (HC_SubA_HTXnFM << 24) | HC_HTXnLoc_Local | texFormat;
+
+
+   for (i = 0; i < numLevels; i++) {    
+      struct via_texture_image *viaImage = 
+	 (struct via_texture_image *)texObj->Image[0][firstLevel + i];
+
+      w = viaImage->image.WidthLog2;
+      h = viaImage->image.HeightLog2;
+      p = viaImage->pitchLog2;
+
+      assert(viaImage->texMem->memType == viaObj->memType);
+
+      texBase = viaImage->texMem->texBase;
+      if (!texBase) {
+	 if (VIA_DEBUG & DEBUG_TEXTURE)
+	    fprintf(stderr, "%s: no texBase[%d]\n", __FUNCTION__, i); 
+	 return GL_FALSE;
+      }
+
+      /* Image has to remain resident until the coming fence is retired.
+       */
+      move_to_head( &vmesa->tex_image_list[viaImage->texMem->memType],
+		    viaImage->texMem );
+      viaImage->texMem->lastUsed = vmesa->lastBreadcrumbWrite;
+
+
+      viaObj->regTexBaseAndPitch[i].baseL = 
+	 ((HC_SubA_HTXnL0BasL + i) << 24) | (texBase & 0xFFFFFF);
+
+      viaObj->regTexBaseAndPitch[i].pitchLog2 = 
+	 ((HC_SubA_HTXnL0Pit + i) << 24) | (p << 20);
+					      
+					      
+      /* The base high bytes for each 3 levels are packed
+       * together into one register:
+       */
+      j = i / 3;
+      k = 3 - (i % 3);
+      basH |= ((texBase & 0xFF000000) >> (k << 3));
+      if (k == 1) {
+	 viaObj->regTexBaseH[j] = ((j + HC_SubA_HTXnL012BasH) << 24) | basH;
+	 basH = 0;
+      }
+            
+      /* Likewise, sets of 6 log2width and log2height values are
+       * packed into individual registers:
+       */
+      l = i / 6;
+      m = i % 6;
+      widthExp |= (((GLuint)w & 0xF) << (m << 2));
+      heightExp |= (((GLuint)h & 0xF) << (m << 2));
+      if (m == 5) {
+	 viaObj->regTexWidthLog2[l] = 
+	    (l + HC_SubA_HTXnL0_5WE) << 24 | widthExp;
+	 viaObj->regTexHeightLog2[l] = 
+	    (l + HC_SubA_HTXnL0_5HE) << 24 | heightExp;
+	 widthExp = 0;
+	 heightExp = 0;
+      }
+      if (w) w--;
+      if (h) h--;
+      if (p) p--;                                           
+   }
+        
+   if (k != 1) {
+      viaObj->regTexBaseH[j] = ((j + HC_SubA_HTXnL012BasH) << 24) | basH;      
+   }
+   if (m != 5) {
+      viaObj->regTexWidthLog2[l] = (l + HC_SubA_HTXnL0_5WE) << 24 | widthExp;
+      viaObj->regTexHeightLog2[l] = (l + HC_SubA_HTXnL0_5HE) << 24 | heightExp;
+   }
+
+   return GL_TRUE;
+}
+
+
+GLboolean viaUpdateTextureState( GLcontext *ctx )
+{
+   struct gl_texture_unit *texUnit = ctx->Texture.Unit;
+   GLuint i;
+
+   for (i = 0; i < 2; i++) {   
+      if (texUnit[i]._ReallyEnabled == TEXTURE_2D_BIT || 
+	  texUnit[i]._ReallyEnabled == TEXTURE_1D_BIT) {
+
+	 if (!viaSetTexImages(ctx, texUnit[i]._Current)) 
+	    return GL_FALSE;
+      }
+      else if (texUnit[i]._ReallyEnabled) {
+	 return GL_FALSE;
+      } 
+   }
+   
+   return GL_TRUE;
+}
+
+
+
+
+
+				 
+
+
+static void viaTexImage(GLcontext *ctx, 
+			GLint dims,
+			GLenum target, GLint level,
+			GLint internalFormat,
+			GLint width, GLint height, GLint border,
+			GLenum format, GLenum type, const void *pixels,
+			const struct gl_pixelstore_attrib *packing,
+			struct gl_texture_object *texObj,
+			struct gl_texture_image *texImage)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   GLint postConvWidth = width;
+   GLint postConvHeight = height;
+   GLint texelBytes, sizeInBytes;
+   struct via_texture_object *viaObj = (struct via_texture_object *)texObj;
+   struct via_texture_image *viaImage = (struct via_texture_image *)texImage;
+   int heaps[3], nheaps, i;
+
+   if (!is_empty_list(&vmesa->freed_tex_buffers)) {
+      viaCheckBreadcrumb(vmesa, 0);
+      via_release_pending_textures(vmesa);
+   }
+
+   if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) {
+      _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth,
+                                         &postConvHeight);
+   }
+
+   /* choose the texture format */
+   texImage->TexFormat = viaChooseTexFormat(ctx, internalFormat, 
+					    format, type);
+
+   assert(texImage->TexFormat);
+
+   texelBytes = _mesa_get_format_bytes(texImage->TexFormat);
+
+   /* Minimum pitch of 32 bytes */
+   if (postConvWidth * texelBytes < 32) {
+      postConvWidth = 32 / texelBytes;
+      texImage->RowStride = postConvWidth;
+   }
+
+   assert(texImage->RowStride == postConvWidth);
+   viaImage->pitchLog2 = logbase2(postConvWidth * texelBytes);
+
+   /* allocate memory */
+   if (_mesa_is_format_compressed(texImage->TexFormat))
+      sizeInBytes = _mesa_format_image_size(texImage->TexFormat,
+                                            texImage->Width,
+                                            texImage->Height,
+                                            texImage->Depth);
+   else
+      sizeInBytes = postConvWidth * postConvHeight * texelBytes;
+
+
+   /* Attempt to allocate texture memory directly, otherwise use main
+    * memory and this texture will always be a fallback.   FIXME!
+    *
+    * TODO: make room in agp if this fails.
+    * TODO: use fb ram for textures as well.
+    */
+   
+      
+   switch (viaObj->memType) {
+   case VIA_MEM_UNKNOWN:
+      heaps[0] = VIA_MEM_AGP;
+      heaps[1] = VIA_MEM_VIDEO;
+      heaps[2] = VIA_MEM_SYSTEM;
+      nheaps = 3;
+      break;
+   case VIA_MEM_AGP:
+   case VIA_MEM_VIDEO:
+      heaps[0] = viaObj->memType;
+      heaps[1] = VIA_MEM_SYSTEM;
+      nheaps = 2;
+      break;
+   case VIA_MEM_MIXED:
+   case VIA_MEM_SYSTEM:
+   default:
+      heaps[0] = VIA_MEM_SYSTEM;
+      nheaps = 1;
+      break;
+   }
+	
+   for (i = 0; i < nheaps && !viaImage->texMem; i++) {
+      if (VIA_DEBUG & DEBUG_TEXTURE) 
+	 fprintf(stderr, "try %s (obj %s)\n", get_memtype_name(heaps[i]),
+		 get_memtype_name(viaObj->memType));
+      viaImage->texMem = via_alloc_texture(vmesa, sizeInBytes, heaps[i]);
+   }
+
+   if (!viaImage->texMem) {
+      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+      return;
+   }
+
+   if (VIA_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "upload %d bytes to %s\n", sizeInBytes, 
+	      get_memtype_name(viaImage->texMem->memType));
+
+   viaImage->texMem->image = viaImage;
+   texImage->Data = viaImage->texMem->bufAddr;
+
+   if (viaObj->memType == VIA_MEM_UNKNOWN)
+      viaObj->memType = viaImage->texMem->memType;
+   else if (viaObj->memType != viaImage->texMem->memType)
+      viaObj->memType = VIA_MEM_MIXED;
+
+   if (VIA_DEBUG & DEBUG_TEXTURE)
+      fprintf(stderr, "%s, obj %s, image : %s\n",
+	      __FUNCTION__,	      
+	      get_memtype_name(viaObj->memType),
+	      get_memtype_name(viaImage->texMem->memType));
+
+   vmesa->clearTexCache = 1;
+
+   pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1, 
+					format, type,
+					pixels, packing, "glTexImage");
+   if (!pixels) {
+      /* Note: we check for a NULL image pointer here, _after_ we allocated
+       * memory for the texture.  That's what the GL spec calls for.
+       */
+      return;
+   }
+   else {
+      GLint dstRowStride;
+      GLboolean success;
+
+      if (_mesa_is_format_compressed(texImage->TexFormat)) {
+         dstRowStride = _mesa_format_row_stride(texImage->TexFormat, width);
+      }
+      else {
+         dstRowStride = postConvWidth * _mesa_get_format_bytes(texImage->TexFormat);
+      }
+      success = _mesa_texstore(ctx, dims,
+                               texImage->_BaseFormat,
+                               texImage->TexFormat,
+                               texImage->Data,
+                               0, 0, 0,  /* dstX/Y/Zoffset */
+                               dstRowStride,
+                               texImage->ImageOffsets,
+                               width, height, 1,
+                               format, type, pixels, packing);
+      if (!success) {
+         _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage");
+      }
+   }
+
+   _mesa_unmap_teximage_pbo(ctx, packing);
+}
+
+static void viaTexImage2D(GLcontext *ctx, 
+			  GLenum target, GLint level,
+			  GLint internalFormat,
+			  GLint width, GLint height, GLint border,
+			  GLenum format, GLenum type, const void *pixels,
+			  const struct gl_pixelstore_attrib *packing,
+			  struct gl_texture_object *texObj,
+			  struct gl_texture_image *texImage)
+{
+   viaTexImage( ctx, 2, target, level, 
+		internalFormat, width, height, border,
+		format, type, pixels,
+		packing, texObj, texImage );
+}
+
+static void viaTexSubImage2D(GLcontext *ctx,
+                             GLenum target,
+                             GLint level,
+                             GLint xoffset, GLint yoffset,
+                             GLsizei width, GLsizei height,
+                             GLenum format, GLenum type,
+                             const GLvoid *pixels,
+                             const struct gl_pixelstore_attrib *packing,
+                             struct gl_texture_object *texObj,
+                             struct gl_texture_image *texImage)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+  
+   viaWaitIdle(vmesa, GL_TRUE);
+   vmesa->clearTexCache = 1;
+
+   _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width,
+			     height, format, type, pixels, packing, texObj,
+			     texImage);
+}
+
+static void viaTexImage1D(GLcontext *ctx, 
+			  GLenum target, GLint level,
+			  GLint internalFormat,
+			  GLint width, GLint border,
+			  GLenum format, GLenum type, const void *pixels,
+			  const struct gl_pixelstore_attrib *packing,
+			  struct gl_texture_object *texObj,
+			  struct gl_texture_image *texImage)
+{
+   viaTexImage( ctx, 1, target, level, 
+		internalFormat, width, 1, border,
+		format, type, pixels,
+		packing, texObj, texImage );
+}
+
+static void viaTexSubImage1D(GLcontext *ctx,
+                             GLenum target,
+                             GLint level,
+                             GLint xoffset,
+                             GLsizei width,
+                             GLenum format, GLenum type,
+                             const GLvoid *pixels,
+                             const struct gl_pixelstore_attrib *packing,
+                             struct gl_texture_object *texObj,
+                             struct gl_texture_image *texImage)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+   viaWaitIdle(vmesa, GL_TRUE); 
+   vmesa->clearTexCache = 1;
+
+   _mesa_store_texsubimage1d(ctx, target, level, xoffset, width,
+			     format, type, pixels, packing, texObj,
+			     texImage);
+}
+
+
+
+static GLboolean viaIsTextureResident(GLcontext *ctx,
+                                      struct gl_texture_object *texObj)
+{
+   struct via_texture_object *viaObj = 
+      (struct via_texture_object *)texObj;
+
+   return (viaObj->memType == VIA_MEM_AGP ||
+	   viaObj->memType == VIA_MEM_VIDEO);
+}
+
+
+
+static struct gl_texture_image *viaNewTextureImage( GLcontext *ctx )
+{
+   (void) ctx;
+   return (struct gl_texture_image *)CALLOC_STRUCT(via_texture_image);
+}
+
+
+static struct gl_texture_object *viaNewTextureObject( GLcontext *ctx, 
+						      GLuint name, 
+						      GLenum target )
+{
+   struct via_texture_object *obj = CALLOC_STRUCT(via_texture_object);
+
+   _mesa_initialize_texture_object(&obj->obj, name, target);
+   (void) ctx;
+
+   obj->memType = VIA_MEM_UNKNOWN;
+
+   return &obj->obj;
+}
+
+
+static void viaFreeTextureImageData( GLcontext *ctx, 
+				     struct gl_texture_image *texImage )
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   struct via_texture_image *image = (struct via_texture_image *)texImage;
+
+   if (image->texMem) {
+      via_free_texture(vmesa, image->texMem);
+      image->texMem = NULL;
+   }
+   
+   texImage->Data = NULL;
+}
+
+
+
+
+void viaInitTextureFuncs(struct dd_function_table * functions)
+{
+   functions->ChooseTextureFormat = viaChooseTexFormat;
+   functions->TexImage1D = viaTexImage1D;
+   functions->TexImage2D = viaTexImage2D;
+   functions->TexSubImage1D = viaTexSubImage1D;
+   functions->TexSubImage2D = viaTexSubImage2D;
+
+   functions->NewTextureObject = viaNewTextureObject;
+   functions->NewTextureImage = viaNewTextureImage;
+   functions->DeleteTexture = _mesa_delete_texture_object;
+   functions->FreeTexImageData = viaFreeTextureImageData;
+
+#if 0 && defined( USE_SSE_ASM )
+   /*
+    * XXX this code is disabled for now because the via_sse_memcpy()
+    * routine causes segfaults with flightgear.
+    * See Mesa3d-dev mail list messages from 7/15/2005 for details.
+    * Note that this function is currently disabled in via_tris.c too.
+    */
+   if (getenv("VIA_NO_SSE"))
+      functions->TextureMemCpy = memcpy;
+   else
+      functions->TextureMemCpy = via_sse_memcpy;
+#else
+   functions->TextureMemCpy = memcpy;
+#endif
+
+   functions->UpdateTexturePalette = 0;
+   functions->IsTextureResident = viaIsTextureResident;
+}
+
+
diff --git a/src/mesa/drivers/dri/unichrome/via_tex.h b/src/mesa/drivers/dri/unichrome/via_tex.h
new file mode 100644
index 0000000000..25eeee32f3
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_tex.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _VIATEX_H
+#define _VIATEX_H
+
+#include "main/mtypes.h"
+
+struct via_context;
+
+GLboolean viaUpdateTextureState(GLcontext *ctx);
+void viaInitTextureFuncs(struct dd_function_table * functions);
+GLboolean viaSwapOutWork( struct via_context *vmesa );
+
+#if defined( USE_SSE_ASM )
+void via_sse_memcpy( void *to, const void *from, size_t sz );
+#endif /* defined( USE_SSE_ASM ) */
+
+#endif
diff --git a/src/mesa/drivers/dri/unichrome/via_texcombine.c b/src/mesa/drivers/dri/unichrome/via_texcombine.c
new file mode 100644
index 0000000000..f87ba071f3
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_texcombine.c
@@ -0,0 +1,423 @@
+/*
+ * (C) Copyright IBM Corporation 2004
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file via_texcombine.c
+ * Calculate texture combine hardware state.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+#include "main/enums.h"
+
+#include "via_context.h"
+#include "via_tex.h"
+#include "via_3d_reg.h"
+
+
+#define VIA_USE_ALPHA (HC_XTC_Adif - HC_XTC_Dif)
+
+#define INPUT_A_SHIFT     14
+#define INPUT_B_SHIFT     7
+#define INPUT_C_SHIFT     0
+#define INPUT_CBias_SHIFT 14
+
+#define CONST_ONE         (HC_XTC_0 | HC_XTC_InvTOPC)
+
+static const unsigned color_operand_modifier[4] = {
+   0,
+   HC_XTC_InvTOPC,
+   VIA_USE_ALPHA,
+   VIA_USE_ALPHA | HC_XTC_InvTOPC,
+};
+
+static const unsigned alpha_operand_modifier[2] = {
+   0, HC_XTA_InvTOPA
+};
+
+static const unsigned bias_alpha_operand_modifier[2] = {
+   0, HC_HTXnTBLAbias_Inv
+};
+
+
+static const unsigned c_shift_table[3] = {
+   HC_HTXnTBLCshift_No, HC_HTXnTBLCshift_1, HC_HTXnTBLCshift_2
+};
+
+static const unsigned  a_shift_table[3] = {
+   HC_HTXnTBLAshift_No, HC_HTXnTBLAshift_1, HC_HTXnTBLAshift_2
+};
+
+
+/**
+ * Calculate the hardware state for the specified texture combine mode
+ *
+ * \bug
+ * All forms of DOT3 bumpmapping are completely untested, and are most
+ * likely wrong.  KW: Looks like it will never be quite right as the
+ * hardware seems to experience overflow in color calculation at the
+ * 4x shift levels, which need to be programed for DOT3.  Maybe newer
+ * hardware fixes these issues.
+ *
+ * \bug 
+ * KW: needs attention to the case where texunit 1 is enabled but
+ * texunit 0 is not.
+ */
+GLboolean
+viaTexCombineState( struct via_context *vmesa,
+		    const struct gl_tex_env_combine_state * combine,
+		    unsigned unit )
+{
+   unsigned color_arg[3];
+   unsigned alpha_arg[3];
+   unsigned bias_alpha_arg[3];
+   unsigned color = HC_HTXnTBLCsat_MASK;
+   unsigned alpha = HC_HTXnTBLAsat_MASK;
+   unsigned bias = 0;
+   unsigned op = 0;
+   unsigned a_shift = combine->ScaleShiftA;
+   unsigned c_shift = combine->ScaleShiftRGB;
+   unsigned i;
+   unsigned constant_color[3];
+   unsigned ordered_constant_color[4];
+   unsigned constant_alpha[3];
+   unsigned bias_alpha = 0;
+   unsigned abc_alpha = 0;
+   const struct gl_texture_unit * texUnit = 
+      &vmesa->glCtx->Texture.Unit[unit];
+   unsigned env_color[4];
+
+   /* It seems that the color clamping can be overwhelmed at the 4x
+    * scale settings, necessitating this fallback:
+    */
+   if (c_shift == 2 || a_shift == 2) {
+      return GL_FALSE;
+   }
+
+   CLAMPED_FLOAT_TO_UBYTE(env_color[0], texUnit->EnvColor[0]);
+   CLAMPED_FLOAT_TO_UBYTE(env_color[1], texUnit->EnvColor[1]);
+   CLAMPED_FLOAT_TO_UBYTE(env_color[2], texUnit->EnvColor[2]);
+   CLAMPED_FLOAT_TO_UBYTE(env_color[3], texUnit->EnvColor[3]);
+
+   (void) memset( constant_color, 0, sizeof( constant_color ) );
+   (void) memset( ordered_constant_color, 0, sizeof( ordered_constant_color ) );
+   (void) memset( constant_alpha, 0, sizeof( constant_alpha ) );
+
+   for ( i = 0 ; i < combine->_NumArgsRGB ; i++ ) {
+      const GLint op = combine->OperandRGB[i] - GL_SRC_COLOR;
+
+      switch ( combine->SourceRGB[i] ) {
+      case GL_TEXTURE:
+	 color_arg[i] = HC_XTC_Tex;
+	 color_arg[i] += color_operand_modifier[op];
+	 break;
+      case GL_CONSTANT:
+	 color_arg[i] = HC_XTC_HTXnTBLRC;
+
+	 switch( op ) {
+	 case 0:		/* GL_SRC_COLOR */
+	    constant_color[i] = ((env_color[0] << 16) | 
+				 (env_color[1] << 8) | 
+				 env_color[2]);
+	    break;
+	 case 1:		/* GL_ONE_MINUS_SRC_COLOR */
+	    constant_color[i] = ~((env_color[0] << 16) | 
+				  (env_color[1] << 8) | 
+				  env_color[2]) & 0x00ffffff;
+	    break;
+	 case 2:		/* GL_SRC_ALPHA */
+	    constant_color[i] = ((env_color[3] << 16) | 
+				 (env_color[3] << 8) | 
+				 env_color[3]);
+	    break;
+	 case 3:		/* GL_ONE_MINUS_SRC_ALPHA */
+	    constant_color[i] = ~((env_color[3] << 16) | 
+				  (env_color[3] << 8) | 
+				  env_color[3]) & 0x00ffffff;
+	    break;
+	 }
+	 break;
+      case GL_PRIMARY_COLOR:
+	 color_arg[i] = HC_XTC_Dif;
+	 color_arg[i] += color_operand_modifier[op];
+	 break;
+      case GL_PREVIOUS:
+	 color_arg[i] = (unit == 0) ? HC_XTC_Dif : HC_XTC_Cur;
+	 color_arg[i] += color_operand_modifier[op];
+	 break;
+      }
+   }
+	
+   
+   /* On the Unichrome, all combine operations take on some form of:
+    *
+    *     (xA * (xB op xC) + xBias) << xShift
+    * 
+    * 'op' can be selected as add, subtract, min, max, or mask.  The min, max
+    * and mask modes are currently unused.  With the exception of DOT3, all
+    * standard GL_COMBINE modes can be implemented simply by selecting the
+    * correct inputs for A, B, C, and Bias and the correct operation for op.
+    *
+    * NOTE: xBias (when read from the constant registers) is signed,
+    * and scaled to fit -255..255 in 8 bits, ie 0x1 == 2.
+    */
+
+   switch( combine->ModeRGB ) {
+   /* Ca = 1.0, Cb = arg0, Cc = 0, Cbias = 0
+    */
+   case GL_REPLACE:
+      color |= ((CONST_ONE << INPUT_A_SHIFT) |
+		(color_arg[0] << INPUT_B_SHIFT));
+		
+      ordered_constant_color[1] = constant_color[0];
+      break;
+      
+   /* Ca = arg[0], Cb = arg[1], Cc = 0, Cbias = 0
+    */
+   case GL_MODULATE:
+      color |= ((color_arg[0] << INPUT_A_SHIFT) | 
+		(color_arg[1] << INPUT_B_SHIFT));
+
+      ordered_constant_color[0] = constant_color[0];
+      ordered_constant_color[1] = constant_color[1];
+      break;
+
+   /* Ca = 1.0, Cb = arg[0], Cc = arg[1], Cbias = 0
+    */
+   case GL_ADD:
+   case GL_SUBTRACT:
+      if ( combine->ModeRGB == GL_SUBTRACT ) {
+	 op |= HC_HTXnTBLCop_Sub;
+      }
+
+      color |= ((CONST_ONE << INPUT_A_SHIFT) |
+		(color_arg[0] << INPUT_B_SHIFT) |
+		(color_arg[1] << INPUT_C_SHIFT));
+
+      ordered_constant_color[1] = constant_color[0];
+      ordered_constant_color[2] = constant_color[1];
+      break;
+
+   /* Ca = 1.0, Cb = arg[0], Cc = arg[1], Cbias = -0.5
+    */
+   case GL_ADD_SIGNED:
+      color |= ((CONST_ONE << INPUT_A_SHIFT) |
+		(color_arg[0] << INPUT_B_SHIFT) | 
+		(color_arg[1] << INPUT_C_SHIFT));
+
+      bias |= HC_HTXnTBLCbias_HTXnTBLRC;
+
+      ordered_constant_color[1] = constant_color[0];
+      ordered_constant_color[2] = constant_color[1];
+      ordered_constant_color[3] = 0x00bfbfbf; /* -.5 */
+      break;
+
+   /* Ca = arg[2], Cb = arg[0], Cc = arg[1], Cbias = arg[1]
+    */
+   case GL_INTERPOLATE:
+      op |= HC_HTXnTBLCop_Sub;
+
+      color |= ((color_arg[2] << INPUT_A_SHIFT) |
+		(color_arg[0] << INPUT_B_SHIFT) |
+		(color_arg[1] << INPUT_C_SHIFT));
+
+      bias |= (color_arg[1] << INPUT_CBias_SHIFT);
+
+      ordered_constant_color[0] = constant_color[2];
+      ordered_constant_color[1] = constant_color[0];
+      ordered_constant_color[2] = constant_color[1];
+      ordered_constant_color[3] = (constant_color[1] >> 1) & 0x7f7f7f;
+      break;
+
+#if 0
+   /* At this point this code is completely untested.  It appears that the
+    * Unichrome has the same limitation as the Radeon R100.  The only
+    * supported post-scale when doing DOT3 bumpmapping is 1x.
+    */
+   case GL_DOT3_RGB_EXT:
+   case GL_DOT3_RGBA_EXT:
+   case GL_DOT3_RGB:
+   case GL_DOT3_RGBA:
+      c_shift = 2;
+      a_shift = 2;
+      color |= ((color_arg[0] << INPUT_A_SHIFT) |
+		(color_arg[1] << INPUT_B_SHIFT));
+      op |= HC_HTXnTBLDOT4;
+      break;
+#endif
+
+   default:
+      assert(0);
+      break;
+   }
+
+
+
+
+   /* The alpha blend stage has the annoying quirk of not having a
+    * hard-wired 0 input, like the color stage.  As a result, we have
+    * to program the constant register with 0 and use that as our
+    * 0 input.
+    *
+    *     (xA * (xB op xC) + xBias) << xShift
+    *
+    */
+
+   for ( i = 0 ; i < combine->_NumArgsA ; i++ ) {
+      const GLint op = combine->OperandA[i] - GL_SRC_ALPHA;
+
+      switch ( combine->SourceA[i] ) {
+      case GL_TEXTURE:
+	 alpha_arg[i] = HC_XTA_Atex;
+	 alpha_arg[i] += alpha_operand_modifier[op];
+	 bias_alpha_arg[i] = HC_HTXnTBLAbias_Atex;
+	 bias_alpha_arg[i] += bias_alpha_operand_modifier[op];
+	 break;
+      case GL_CONSTANT:
+	 alpha_arg[i] = HC_XTA_HTXnTBLRA;
+	 bias_alpha_arg[i] = HC_HTXnTBLAbias_HTXnTBLRAbias;
+	 constant_alpha[i] = (op == 0) ? env_color[3] : (~env_color[3] & 0xff);
+	 break;
+      case GL_PRIMARY_COLOR:
+	 alpha_arg[i] = HC_XTA_Adif;
+	 alpha_arg[i] += alpha_operand_modifier[op];
+	 bias_alpha_arg[i] = HC_HTXnTBLAbias_Adif;
+	 bias_alpha_arg[i] += bias_alpha_operand_modifier[op];
+	 break;
+      case GL_PREVIOUS:
+	 alpha_arg[i] = (unit == 0) ? HC_XTA_Adif : HC_XTA_Acur;
+	 alpha_arg[i] += alpha_operand_modifier[op];
+	 bias_alpha_arg[i] = (unit == 0 ? 
+			      HC_HTXnTBLAbias_Adif : 
+			      HC_HTXnTBLAbias_Acur);
+	 bias_alpha_arg[i] += bias_alpha_operand_modifier[op];
+	 break;
+      }
+   }
+
+   switch( combine->ModeA ) {
+   /* Aa = 0, Ab = 0, Ac = 0, Abias = arg0
+    */
+   case GL_REPLACE:
+      alpha |= ((HC_XTA_HTXnTBLRA << INPUT_A_SHIFT) |
+		(HC_XTA_HTXnTBLRA << INPUT_B_SHIFT) |
+		(HC_XTA_HTXnTBLRA << INPUT_C_SHIFT));
+      abc_alpha = 0;
+
+      bias |= bias_alpha_arg[0];
+      bias_alpha = constant_alpha[0] >> 1;
+      break;
+      
+   /* Aa = arg[0], Ab = arg[1], Ac = 0, Abias = 0
+    */
+   case GL_MODULATE:
+      alpha |= ((alpha_arg[1] << INPUT_A_SHIFT) | 
+		(alpha_arg[0] << INPUT_B_SHIFT) | 
+		(HC_XTA_HTXnTBLRA << INPUT_C_SHIFT));
+
+      abc_alpha = ((constant_alpha[1] << HC_HTXnTBLRAa_SHIFT) |
+		   (constant_alpha[0] << HC_HTXnTBLRAb_SHIFT) |
+		   (0 << HC_HTXnTBLRAc_SHIFT));
+
+      bias |= HC_HTXnTBLAbias_HTXnTBLRAbias;
+      bias_alpha = 0;
+      break;
+
+   /* Aa = 1.0, Ab = arg[0], Ac = arg[1], Abias = 0
+    */
+   case GL_ADD:
+   case GL_SUBTRACT:
+      if ( combine->ModeA == GL_SUBTRACT ) {
+	 op |= HC_HTXnTBLAop_Sub;
+      }
+
+      alpha |= ((HC_XTA_HTXnTBLRA << INPUT_A_SHIFT) |
+		(alpha_arg[0] << INPUT_B_SHIFT) |
+		(alpha_arg[1] << INPUT_C_SHIFT));
+
+      abc_alpha = ((0xff << HC_HTXnTBLRAa_SHIFT) |
+		   (constant_alpha[0] << HC_HTXnTBLRAb_SHIFT) |
+		   (constant_alpha[1] << HC_HTXnTBLRAc_SHIFT));
+
+      bias |= HC_HTXnTBLAbias_HTXnTBLRAbias;
+      bias_alpha = 0;
+      break;
+
+   /* Aa = 1.0, Ab = arg[0], Ac = arg[1], Abias = -0.5
+    */
+   case GL_ADD_SIGNED:
+      alpha |= ((HC_XTA_HTXnTBLRA << INPUT_A_SHIFT) |
+		(alpha_arg[0] << INPUT_B_SHIFT) | 
+		(alpha_arg[1] << INPUT_C_SHIFT));
+      abc_alpha = ((0xff << HC_HTXnTBLRAa_SHIFT) |
+		   (constant_alpha[0] << HC_HTXnTBLRAb_SHIFT) |
+		   (constant_alpha[1] << HC_HTXnTBLRAc_SHIFT));
+
+      bias |= HC_HTXnTBLAbias_HTXnTBLRAbias;
+      bias_alpha = 0xbf;
+      break;
+
+   /* Aa = arg[2], Ab = arg[0], Ac = arg[1], Abias = arg[1]
+    */
+   case GL_INTERPOLATE:
+      op |= HC_HTXnTBLAop_Sub;
+
+      alpha |= ((alpha_arg[2] << INPUT_A_SHIFT) |
+		(alpha_arg[0] << INPUT_B_SHIFT) |
+		(alpha_arg[1] << INPUT_C_SHIFT));
+      abc_alpha = ((constant_alpha[2] << HC_HTXnTBLRAa_SHIFT) |
+		   (constant_alpha[0] << HC_HTXnTBLRAb_SHIFT) |
+		   (constant_alpha[1] << HC_HTXnTBLRAc_SHIFT));
+
+      bias |= bias_alpha_arg[1];
+      bias_alpha = constant_alpha[1] >> 1;
+      break;
+   }
+   
+
+   op |= c_shift_table[ c_shift ] | a_shift_table[ a_shift ];
+
+
+   vmesa->regHTXnTBLMPfog[unit] = HC_HTXnTBLMPfog_Fog;
+
+   vmesa->regHTXnTBLCsat[unit] = color;
+   vmesa->regHTXnTBLAsat[unit] = alpha;
+   vmesa->regHTXnTBLCop[unit] = op | bias;
+   vmesa->regHTXnTBLRAa[unit] = abc_alpha;
+   vmesa->regHTXnTBLRFog[unit] = bias_alpha;
+
+   vmesa->regHTXnTBLRCa[unit] = ordered_constant_color[0];
+   vmesa->regHTXnTBLRCb[unit] = ordered_constant_color[1];
+   vmesa->regHTXnTBLRCc[unit] = ordered_constant_color[2];
+   vmesa->regHTXnTBLRCbias[unit] = ordered_constant_color[3];
+
+   return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/unichrome/via_tris.c b/src/mesa/drivers/dri/unichrome/via_tris.c
new file mode 100644
index 0000000000..be3c9a770f
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_tris.c
@@ -0,0 +1,1194 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <math.h>
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/mtypes.h"
+#include "main/macros.h"
+#include "main/colormac.h"
+#include "main/enums.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "via_context.h"
+#include "via_tris.h"
+#include "via_state.h"
+#include "via_span.h"
+#include "via_ioctl.h"
+#include "via_3d_reg.h"
+#include "via_tex.h"
+
+/***********************************************************************
+ *                    Emit primitives as inline vertices               *
+ ***********************************************************************/
+#define LINE_FALLBACK (0)
+#define POINT_FALLBACK (0)
+#define TRI_FALLBACK (0)
+#define ANY_FALLBACK_FLAGS (POINT_FALLBACK|LINE_FALLBACK|TRI_FALLBACK)
+#define ANY_RASTER_FLAGS (DD_TRI_LIGHT_TWOSIDE|DD_TRI_OFFSET|DD_TRI_UNFILLED)
+
+
+#if 0
+#define COPY_DWORDS(vb, vertsize, v) 		\
+do {						\
+   via_sse_memcpy(vb, v, vertsize * 4);		\
+   vb += vertsize;				\
+} while (0)
+#else
+#if defined( USE_X86_ASM )
+#define COPY_DWORDS(vb, vertsize, v)					\
+    do {								\
+        int j;								\
+        int __tmp;							\
+        __asm__ __volatile__("rep ; movsl"				\
+                              : "=%c" (j), "=D" (vb), "=S" (__tmp)	\
+                              : "0" (vertsize),				\
+                                "D" ((long)vb),				\
+                                "S" ((long)v));				\
+    } while (0)
+#else
+#define COPY_DWORDS(vb, vertsize, v)		\
+    do {					\
+        int j;					\
+        for (j = 0; j < vertsize; j++)		\
+            vb[j] = ((GLuint *)v)[j];		\
+        vb += vertsize;				\
+    } while (0)
+#endif
+#endif
+
+static void via_draw_triangle(struct via_context *vmesa,
+			      viaVertexPtr v0,
+			      viaVertexPtr v1,
+			      viaVertexPtr v2)
+{
+   GLuint vertsize = vmesa->vertexSize;
+   GLuint *vb = viaExtendPrimitive(vmesa, 3 * 4 * vertsize);
+
+   COPY_DWORDS(vb, vertsize, v0);
+   COPY_DWORDS(vb, vertsize, v1);
+   COPY_DWORDS(vb, vertsize, v2);
+}
+
+
+static void via_draw_quad(struct via_context *vmesa,
+			  viaVertexPtr v0,
+			  viaVertexPtr v1,
+			  viaVertexPtr v2,
+			  viaVertexPtr v3)
+{
+   GLuint vertsize = vmesa->vertexSize;
+   GLuint *vb = viaExtendPrimitive(vmesa, 6 * 4 * vertsize);
+
+   COPY_DWORDS(vb, vertsize, v0);
+   COPY_DWORDS(vb, vertsize, v1);
+   COPY_DWORDS(vb, vertsize, v3);
+   COPY_DWORDS(vb, vertsize, v1);
+   COPY_DWORDS(vb, vertsize, v2);
+   COPY_DWORDS(vb, vertsize, v3);
+}
+
+static void via_draw_line(struct via_context *vmesa,
+			  viaVertexPtr v0,
+			  viaVertexPtr v1)
+{
+   GLuint vertsize = vmesa->vertexSize;
+   GLuint *vb = viaExtendPrimitive(vmesa, 2 * 4 * vertsize);
+   COPY_DWORDS(vb, vertsize, v0);
+   COPY_DWORDS(vb, vertsize, v1);
+}
+
+
+static void via_draw_point(struct via_context *vmesa,
+			   viaVertexPtr v0)
+{
+   GLuint vertsize = vmesa->vertexSize;
+   GLuint *vb = viaExtendPrimitive(vmesa, 4 * vertsize);
+   COPY_DWORDS(vb, vertsize, v0);
+}
+
+
+/* Fallback drawing functions for the ptex hack.
+ */
+#define PTEX_VERTEX( tmp, vertex_size, v)	\
+do {							\
+   GLuint j;						\
+   GLfloat rhw = 1.0 / v->f[vertex_size];		\
+   for ( j = 0 ; j < vertex_size ; j++ )		\
+      tmp.f[j] = v->f[j];				\
+   tmp.f[3] *= v->f[vertex_size];			\
+   tmp.f[vertex_size-2] *= rhw;				\
+   tmp.f[vertex_size-1] *= rhw;				\
+} while (0)
+
+static void via_ptex_tri (struct via_context *vmesa,
+			  viaVertexPtr v0,
+			  viaVertexPtr v1,
+			  viaVertexPtr v2)
+{
+   GLuint vertsize = vmesa->hwVertexSize;
+   GLuint *vb = viaExtendPrimitive(vmesa, 3*4*vertsize);
+   viaVertex tmp;
+
+   PTEX_VERTEX(tmp, vertsize, v0); COPY_DWORDS(vb, vertsize, &tmp);
+   PTEX_VERTEX(tmp, vertsize, v1); COPY_DWORDS(vb, vertsize, &tmp);
+   PTEX_VERTEX(tmp, vertsize, v2); COPY_DWORDS(vb, vertsize, &tmp);
+}
+
+static void via_ptex_line (struct via_context *vmesa,
+			   viaVertexPtr v0,
+			   viaVertexPtr v1)
+{
+   GLuint vertsize = vmesa->hwVertexSize;
+   GLuint *vb = viaExtendPrimitive(vmesa, 2*4*vertsize);
+   viaVertex tmp;
+
+   PTEX_VERTEX(tmp, vertsize, v0); COPY_DWORDS(vb, vertsize, &tmp);
+   PTEX_VERTEX(tmp, vertsize, v1); COPY_DWORDS(vb, vertsize, &tmp);
+}
+
+static void via_ptex_point (struct via_context *vmesa,
+			    viaVertexPtr v0)
+{
+   GLuint vertsize = vmesa->hwVertexSize;
+   GLuint *vb = viaExtendPrimitive(vmesa, 1*4*vertsize);
+   viaVertex tmp;
+
+   PTEX_VERTEX(tmp, vertsize, v0); COPY_DWORDS(vb, vertsize, &tmp);
+}
+
+
+
+
+
+/***********************************************************************
+ *          Macros for via_dd_tritmp.h to draw basic primitives        *
+ ***********************************************************************/
+
+#define TRI(a, b, c)                                \
+    do {                                            \
+        if (DO_FALLBACK)                            \
+            vmesa->drawTri(vmesa, a, b, c);         \
+        else                                        \
+            via_draw_triangle(vmesa, a, b, c);      \
+    } while (0)
+
+#define QUAD(a, b, c, d)                            \
+    do {                                            \
+        if (DO_FALLBACK) {                          \
+            vmesa->drawTri(vmesa, a, b, d);         \
+            vmesa->drawTri(vmesa, b, c, d);         \
+        }                                           \
+        else                                        \
+            via_draw_quad(vmesa, a, b, c, d);       \
+    } while (0)
+
+#define LINE(v0, v1)                                \
+    do {                                            \
+        if (DO_FALLBACK)                            \
+            vmesa->drawLine(vmesa, v0, v1);         \
+        else                                        \
+            via_draw_line(vmesa, v0, v1);           \
+    } while (0)
+
+#define POINT(v0)                                    \
+    do {                                             \
+        if (DO_FALLBACK)                             \
+            vmesa->drawPoint(vmesa, v0);             \
+        else                                         \
+            via_draw_point(vmesa, v0);               \
+    } while (0)
+
+
+/***********************************************************************
+ *              Build render functions from dd templates               *
+ ***********************************************************************/
+
+#define VIA_OFFSET_BIT         0x01
+#define VIA_TWOSIDE_BIT        0x02
+#define VIA_UNFILLED_BIT       0x04
+#define VIA_FALLBACK_BIT       0x08
+#define VIA_MAX_TRIFUNC        0x10
+
+
+static struct {
+    tnl_points_func          points;
+    tnl_line_func            line;
+    tnl_triangle_func        triangle;
+    tnl_quad_func            quad;
+} rast_tab[VIA_MAX_TRIFUNC + 1];
+
+
+#define DO_FALLBACK (IND & VIA_FALLBACK_BIT)
+#define DO_OFFSET   (IND & VIA_OFFSET_BIT)
+#define DO_UNFILLED (IND & VIA_UNFILLED_BIT)
+#define DO_TWOSIDE  (IND & VIA_TWOSIDE_BIT)
+#define DO_FLAT      0
+#define DO_TRI       1
+#define DO_QUAD      1
+#define DO_LINE      1
+#define DO_POINTS    1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC         1
+#define HAVE_BACK_COLORS  0
+#define HAVE_HW_FLATSHADE 1
+#define VERTEX            viaVertex
+#define TAB               rast_tab
+
+/* Only used to pull back colors into vertices (ie, we know color is
+ * floating point).
+ */
+#define VIA_COLOR(dst, src)                     \
+    do {                                        \
+        dst[0] = src[2];                        \
+        dst[1] = src[1];                        \
+        dst[2] = src[0];                        \
+        dst[3] = src[3];                        \
+    } while (0)
+
+#define VIA_SPEC(dst, src)                      \
+    do {                                        \
+        dst[0] = src[2];                        \
+        dst[1] = src[1];                        \
+        dst[2] = src[0];                        \
+    } while (0)
+
+
+#define DEPTH_SCALE vmesa->polygon_offset_scale
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW(a) (a > 0)
+#define GET_VERTEX(e) (vmesa->verts + (e * vmesa->vertexSize * sizeof(int)))
+
+#define VERT_SET_RGBA( v, c )  					\
+do {								\
+   via_color_t *color = (via_color_t *)&((v)->ui[coloroffset]);	\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]);		\
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v, c )					\
+do {								\
+   if (specoffset) {						\
+     via_color_t *color = (via_color_t *)&((v)->ui[specoffset]);	\
+     UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]);		\
+     UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]);		\
+     UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]);		\
+   }								\
+} while (0)
+#define VERT_COPY_SPEC( v0, v1 )			\
+do {							\
+   if (specoffset) {					\
+      v0->ub4[specoffset][0] = v1->ub4[specoffset][0];	\
+      v0->ub4[specoffset][1] = v1->ub4[specoffset][1];	\
+      v0->ub4[specoffset][2] = v1->ub4[specoffset][2];	\
+   }							\
+} while (0)
+
+
+#define VERT_SAVE_RGBA( idx )    color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx )    if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+
+#define LOCAL_VARS(n)                                                   \
+    struct via_context *vmesa = VIA_CONTEXT(ctx);                             \
+    GLuint color[n] = { 0 };                                          \
+    GLuint spec[n] = { 0 };                                           \
+    GLuint coloroffset = vmesa->coloroffset;              \
+    GLuint specoffset = vmesa->specoffset;                       \
+    (void)color; (void)spec; (void)coloroffset; (void)specoffset;
+
+
+/***********************************************************************
+ *                Helpers for rendering unfilled primitives            *
+ ***********************************************************************/
+
+static const GLenum hwPrim[GL_POLYGON + 2] = {
+    GL_POINTS,
+    GL_LINES,
+    GL_LINES,
+    GL_LINES,
+    GL_TRIANGLES,
+    GL_TRIANGLES,
+    GL_TRIANGLES,
+    GL_TRIANGLES,
+    GL_TRIANGLES,
+    GL_TRIANGLES,
+    GL_POLYGON+1
+};
+
+
+#define RASTERIZE(x) viaRasterPrimitive( ctx, x, hwPrim[x] )
+#define RENDER_PRIMITIVE vmesa->renderPrimitive
+#define TAG(x) x
+#define IND VIA_FALLBACK_BIT
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+#undef RASTERIZE
+
+/***********************************************************************
+ *                      Generate GL render functions                   *
+ ***********************************************************************/
+#define RASTERIZE(x)
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_OFFSET_BIT)
+#define TAG(x) x##_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT)
+#define TAG(x) x##_twoside
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT|VIA_OFFSET_BIT)
+#define TAG(x) x##_twoside_offset
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_OFFSET_BIT|VIA_UNFILLED_BIT)
+#define TAG(x) x##_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT|VIA_UNFILLED_BIT)
+#define TAG(x) x##_twoside_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT|VIA_OFFSET_BIT|VIA_UNFILLED_BIT)
+#define TAG(x) x##_twoside_offset_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_FALLBACK_BIT)
+#define TAG(x) x##_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_OFFSET_BIT|VIA_FALLBACK_BIT)
+#define TAG(x) x##_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT|VIA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT|VIA_OFFSET_BIT|VIA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_UNFILLED_BIT|VIA_FALLBACK_BIT)
+#define TAG(x) x##_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_OFFSET_BIT|VIA_UNFILLED_BIT|VIA_FALLBACK_BIT)
+#define TAG(x) x##_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT|VIA_UNFILLED_BIT|VIA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (VIA_TWOSIDE_BIT|VIA_OFFSET_BIT|VIA_UNFILLED_BIT| \
+             VIA_FALLBACK_BIT)
+#define TAG(x) x##_twoside_offset_unfilled_fallback
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+/* Catchall case for flat, separate specular triangles (via has flat
+ * diffuse shading, but always does specular color with gouraud).
+ */
+#undef  DO_FALLBACK
+#undef  DO_OFFSET
+#undef  DO_UNFILLED
+#undef  DO_TWOSIDE
+#undef  DO_FLAT
+#define DO_FALLBACK (0)
+#define DO_OFFSET   (ctx->_TriangleCaps & DD_TRI_OFFSET)
+#define DO_UNFILLED (ctx->_TriangleCaps & DD_TRI_UNFILLED)
+#define DO_TWOSIDE  (ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE)
+#define DO_FLAT     1
+#define TAG(x) x##_flat_specular
+#define IND VIA_MAX_TRIFUNC
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab(void)
+{
+    init();
+    init_offset();
+    init_twoside();
+    init_twoside_offset();
+    init_unfilled();
+    init_offset_unfilled();
+    init_twoside_unfilled();
+    init_twoside_offset_unfilled();
+    init_fallback();
+    init_offset_fallback();
+    init_twoside_fallback();
+    init_twoside_offset_fallback();
+    init_unfilled_fallback();
+    init_offset_unfilled_fallback();
+    init_twoside_unfilled_fallback();
+    init_twoside_offset_unfilled_fallback();
+
+    init_flat_specular();	/* special! */
+}
+
+
+/***********************************************************************
+ *                    Rasterization fallback helpers                   *
+ ***********************************************************************/
+
+
+/* This code is hit only when a mix of accelerated and unaccelerated
+ * primitives are being drawn, and only for the unaccelerated
+ * primitives.
+ */
+static void
+via_fallback_tri(struct via_context *vmesa,
+                 viaVertex *v0,
+                 viaVertex *v1,
+                 viaVertex *v2)
+{    
+    GLcontext *ctx = vmesa->glCtx;
+    SWvertex v[3];
+    _swsetup_Translate(ctx, v0, &v[0]);
+    _swsetup_Translate(ctx, v1, &v[1]);
+    _swsetup_Translate(ctx, v2, &v[2]);
+    viaSpanRenderStart( ctx );
+    _swrast_Triangle(ctx, &v[0], &v[1], &v[2]);
+    viaSpanRenderFinish( ctx );
+}
+
+
+static void
+via_fallback_line(struct via_context *vmesa,
+                  viaVertex *v0,
+                  viaVertex *v1)
+{
+    GLcontext *ctx = vmesa->glCtx;
+    SWvertex v[2];
+    _swsetup_Translate(ctx, v0, &v[0]);
+    _swsetup_Translate(ctx, v1, &v[1]);
+    viaSpanRenderStart( ctx );
+    _swrast_Line(ctx, &v[0], &v[1]);
+    viaSpanRenderFinish( ctx );
+}
+
+
+static void
+via_fallback_point(struct via_context *vmesa,
+                   viaVertex *v0)
+{
+    GLcontext *ctx = vmesa->glCtx;
+    SWvertex v[1];
+    _swsetup_Translate(ctx, v0, &v[0]);
+    viaSpanRenderStart( ctx );
+    _swrast_Point(ctx, &v[0]);
+    viaSpanRenderFinish( ctx );
+}
+
+static void viaResetLineStipple( GLcontext *ctx )
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   vmesa->regCmdB |= HC_HLPrst_MASK;
+}
+
+/**********************************************************************/
+/*               Render unclipped begin/end objects                   */
+/**********************************************************************/
+#define IND 0
+#define V(x) (viaVertex *)(vertptr + ((x) * vertsize * sizeof(int)))
+#define RENDER_POINTS(start, count)   \
+    for (; start < count; start++) POINT(V(ELT(start)));
+#define RENDER_LINE(v0, v1)         LINE(V(v0), V(v1))
+#define RENDER_TRI( v0, v1, v2)     TRI( V(v0), V(v1), V(v2))
+#define RENDER_QUAD(v0, v1, v2, v3) QUAD(V(v0), V(v1), V(v2), V(v3))
+#define INIT(x) viaRasterPrimitive(ctx, x, hwPrim[x])
+#undef LOCAL_VARS
+#define LOCAL_VARS                                              \
+    struct via_context *vmesa = VIA_CONTEXT(ctx);                     \
+    GLubyte *vertptr = (GLubyte *)vmesa->verts;                 \
+    const GLuint vertsize = vmesa->vertexSize;          \
+    const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts;       \
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) elt; (void) stipple;
+#define RESET_STIPPLE	if ( stipple ) viaResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) x
+#define TAG(x) via_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) via_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#undef NEED_EDGEFLAG_SETUP
+#undef EDGEFLAG_GET
+#undef EDGEFLAG_SET
+#undef RESET_OCCLUSION
+
+
+/**********************************************************************/
+/*                   Render clipped primitives                        */
+/**********************************************************************/
+
+
+
+static void viaRenderClippedPoly(GLcontext *ctx, const GLuint *elts,
+                                 GLuint n)
+{
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+    GLuint prim = VIA_CONTEXT(ctx)->renderPrimitive;
+
+    /* Render the new vertices as an unclipped polygon.
+     */
+    {
+        GLuint *tmp = VB->Elts;
+        VB->Elts = (GLuint *)elts;
+        tnl->Driver.Render.PrimTabElts[GL_POLYGON](ctx, 0, n,
+                                                   PRIM_BEGIN|PRIM_END);
+        VB->Elts = tmp;
+    }
+
+    /* Restore the render primitive
+     */
+    if (prim != GL_POLYGON &&
+	prim != GL_POLYGON + 1)
+       tnl->Driver.Render.PrimitiveNotify( ctx, prim );
+}
+
+static void viaRenderClippedLine(GLcontext *ctx, GLuint ii, GLuint jj)
+{
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    tnl->Driver.Render.Line(ctx, ii, jj);
+}
+
+static void viaFastRenderClippedPoly(GLcontext *ctx, const GLuint *elts,
+                                     GLuint n)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    GLuint vertsize = vmesa->vertexSize;
+    GLuint *vb = viaExtendPrimitive(vmesa, (n - 2) * 3 * 4 * vertsize);
+    GLubyte *vertptr = (GLubyte *)vmesa->verts;
+    const GLuint *start = (const GLuint *)V(elts[0]);
+    int i;
+
+    for (i = 2; i < n; i++) {
+	COPY_DWORDS(vb, vertsize, V(elts[i - 1]));
+        COPY_DWORDS(vb, vertsize, V(elts[i]));
+	COPY_DWORDS(vb, vertsize, start);	
+    }
+}
+
+
+/**********************************************************************/
+/*                    Choose render functions                         */
+/**********************************************************************/
+
+
+#define _VIA_NEW_VERTEX (_NEW_TEXTURE |                         \
+                         _DD_NEW_SEPARATE_SPECULAR |            \
+                         _DD_NEW_TRI_UNFILLED |                 \
+                         _DD_NEW_TRI_LIGHT_TWOSIDE |            \
+                         _NEW_FOG)
+
+#define _VIA_NEW_RENDERSTATE (_DD_NEW_LINE_STIPPLE |            \
+                              _DD_NEW_TRI_UNFILLED |            \
+                              _DD_NEW_TRI_LIGHT_TWOSIDE |       \
+                              _DD_NEW_TRI_OFFSET |              \
+                              _DD_NEW_TRI_STIPPLE |             \
+                              _NEW_POLYGONSTIPPLE)
+
+
+static void viaChooseRenderState(GLcontext *ctx)
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   GLuint flags = ctx->_TriangleCaps;
+   GLuint index = 0;
+
+   if (vmesa->ptexHack) {
+      vmesa->drawPoint = via_ptex_point;
+      vmesa->drawLine = via_ptex_line;
+      vmesa->drawTri = via_ptex_tri;
+      index |= VIA_FALLBACK_BIT;
+   }
+   else {
+      vmesa->drawPoint = via_draw_point;
+      vmesa->drawLine = via_draw_line;
+      vmesa->drawTri = via_draw_triangle;
+   }
+
+   if (flags & (ANY_FALLBACK_FLAGS | ANY_RASTER_FLAGS)) {
+      if (ctx->Light.Enabled && ctx->Light.Model.TwoSide)
+         index |= VIA_TWOSIDE_BIT;
+      if (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL)
+         index |= VIA_UNFILLED_BIT;
+      if (flags & DD_TRI_OFFSET)
+         index |= VIA_OFFSET_BIT;
+      if (flags & ANY_FALLBACK_FLAGS)
+         index |= VIA_FALLBACK_BIT;
+
+      /* Hook in fallbacks for specific primitives. */
+      if (flags & POINT_FALLBACK)
+	 vmesa->drawPoint = via_fallback_point;
+      
+      if (flags & LINE_FALLBACK)
+	 vmesa->drawLine = via_fallback_line;
+
+      if (flags & TRI_FALLBACK)
+	 vmesa->drawTri = via_fallback_tri;
+   }
+
+   if ((flags & DD_SEPARATE_SPECULAR) && ctx->Light.ShadeModel == GL_FLAT)
+      index = VIA_MAX_TRIFUNC;	/* flat specular */
+
+   if (vmesa->renderIndex != index) {
+      vmesa->renderIndex = index;
+
+      tnl->Driver.Render.Points = rast_tab[index].points;
+      tnl->Driver.Render.Line = rast_tab[index].line;
+      tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+      tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+      if (index == 0) {
+	 tnl->Driver.Render.PrimTabVerts = via_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = via_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = line; /* from tritmp.h */
+	 tnl->Driver.Render.ClippedPolygon = viaFastRenderClippedPoly;
+      }
+      else {
+	 tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+	 tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+	 tnl->Driver.Render.ClippedLine = viaRenderClippedLine;
+	 tnl->Driver.Render.ClippedPolygon = viaRenderClippedPoly;
+      }
+   }
+}
+
+
+#define VIA_EMIT_TEX1	0x01
+#define VIA_EMIT_TEX0	0x02
+#define VIA_EMIT_PTEX0	0x04
+#define VIA_EMIT_RGBA	0x08
+#define VIA_EMIT_SPEC	0x10
+#define VIA_EMIT_FOG	0x20
+#define VIA_EMIT_W	0x40
+
+#define EMIT_ATTR( ATTR, STYLE, INDEX, REGB )				\
+do {									\
+   vmesa->vertex_attrs[vmesa->vertex_attr_count].attrib = (ATTR);	\
+   vmesa->vertex_attrs[vmesa->vertex_attr_count].format = (STYLE);	\
+   vmesa->vertex_attr_count++;						\
+   setupIndex |= (INDEX);						\
+   regCmdB |= (REGB);							\
+} while (0)
+
+#define EMIT_PAD( N )							\
+do {									\
+   vmesa->vertex_attrs[vmesa->vertex_attr_count].attrib = 0;		\
+   vmesa->vertex_attrs[vmesa->vertex_attr_count].format = EMIT_PAD;	\
+   vmesa->vertex_attrs[vmesa->vertex_attr_count].offset = (N);		\
+   vmesa->vertex_attr_count++;						\
+} while (0)
+
+
+
+static void viaChooseVertexState( GLcontext *ctx )
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLuint regCmdB = HC_HVPMSK_X | HC_HVPMSK_Y | HC_HVPMSK_Z;
+   GLuint setupIndex = 0;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+   vmesa->vertex_attr_count = 0;
+ 
+   /* EMIT_ATTR's must be in order as they tell t_vertex.c how to
+    * build up a hardware vertex.
+    */
+   if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, VIA_EMIT_W, HC_HVPMSK_W );
+      vmesa->coloroffset = 4;
+   }
+   else {
+      EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, 0, 0 );
+      vmesa->coloroffset = 3;
+   }
+
+   /* t_context.c always includes a diffuse color */
+   EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, VIA_EMIT_RGBA, 
+	      HC_HVPMSK_Cd );
+      
+   vmesa->specoffset = 0;
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) ||
+       RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) {
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) {
+	 vmesa->specoffset = vmesa->coloroffset + 1;
+	 EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_BGR, VIA_EMIT_SPEC, 
+		    HC_HVPMSK_Cs );
+      }
+      else
+	 EMIT_PAD( 3 );
+
+      if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG ))
+	 EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1UB_1F, VIA_EMIT_FOG, HC_HVPMSK_Cs );
+      else
+	 EMIT_PAD( 1 );
+   }
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 )) {
+      if (vmesa->ptexHack)
+	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_3F_XYW, VIA_EMIT_PTEX0, 
+		    (HC_HVPMSK_S | HC_HVPMSK_T) );
+      else 
+	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, VIA_EMIT_TEX0, 
+		    (HC_HVPMSK_S | HC_HVPMSK_T) );
+   }
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX1 )) {
+      EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F, VIA_EMIT_TEX1, 
+		 (HC_HVPMSK_S | HC_HVPMSK_T) );
+   }
+
+   if (setupIndex != vmesa->setupIndex) {
+      vmesa->vertexSize = _tnl_install_attrs( ctx, 
+					       vmesa->vertex_attrs, 
+					       vmesa->vertex_attr_count,
+					       vmesa->ViewportMatrix.m, 0 );
+      vmesa->vertexSize >>= 2;
+      vmesa->setupIndex = setupIndex;
+      vmesa->regCmdB &= ~HC_HVPMSK_MASK;
+      vmesa->regCmdB |= regCmdB;
+
+      if (vmesa->ptexHack) 
+	 vmesa->hwVertexSize = vmesa->vertexSize - 1;
+      else
+	 vmesa->hwVertexSize = vmesa->vertexSize;
+   }
+}
+
+
+
+
+/* Check if projective texture coordinates are used and if we can fake
+ * them. Fallback to swrast if we can't. Returns GL_TRUE if projective
+ * texture coordinates must be faked, GL_FALSE otherwise.
+ */
+static GLboolean viaCheckPTexHack( GLcontext *ctx )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   DECLARE_RENDERINPUTS(index_bitset);
+   GLboolean fallback = GL_FALSE;
+   GLboolean ptexHack = GL_FALSE;
+
+   RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset );
+
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX0 ) && VB->AttribPtr[_TNL_ATTRIB_TEX0]->size == 4) {
+      if (!RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_ATTRIB_TEX1, _TNL_LAST_TEX ))
+	 ptexHack = GL_TRUE; 
+      else
+	 fallback = GL_TRUE;
+   }
+   if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX1 ) && VB->AttribPtr[_TNL_ATTRIB_TEX1]->size == 4)
+      fallback = GL_TRUE;
+
+   FALLBACK(VIA_CONTEXT(ctx), VIA_FALLBACK_PROJ_TEXTURE, fallback);
+   return ptexHack;
+}
+
+
+
+
+/**********************************************************************/
+/*                 High level hooks for t_vb_render.c                 */
+/**********************************************************************/
+
+
+static void viaRenderStart(GLcontext *ctx)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
+
+   {
+      GLboolean ptexHack = viaCheckPTexHack( ctx );
+      if (ptexHack != vmesa->ptexHack) {
+	 vmesa->ptexHack = ptexHack;
+	 vmesa->newRenderState |= _VIA_NEW_RENDERSTATE;
+      }
+   }
+
+   if (vmesa->newState) {
+      vmesa->newRenderState |= vmesa->newState;
+      viaValidateState( ctx );
+   }
+
+   if (vmesa->Fallback) {
+      tnl->Driver.Render.Start(ctx);
+      return;
+   }
+
+   if (vmesa->newRenderState) {
+      viaChooseVertexState(ctx);
+      viaChooseRenderState(ctx);
+      vmesa->newRenderState = 0;
+   }
+
+   /* Important:
+    */
+   VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
+}
+
+static void viaRenderFinish(GLcontext *ctx)
+{
+   VIA_FINISH_PRIM(VIA_CONTEXT(ctx));
+}
+
+
+/* System to flush dma and emit state changes based on the rasterized
+ * primitive.
+ */
+void viaRasterPrimitive(GLcontext *ctx,
+			GLenum glprim,
+			GLenum hwprim)
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+   GLuint regCmdB;
+   RING_VARS;
+
+   if (VIA_DEBUG & DEBUG_PRIMS) 
+      fprintf(stderr, "%s: %s/%s/%s\n", 
+	      __FUNCTION__, _mesa_lookup_enum_by_nr(glprim),
+	      _mesa_lookup_enum_by_nr(hwprim),
+	      _mesa_lookup_enum_by_nr(ctx->Light.ShadeModel));
+
+   assert (!vmesa->newState);
+
+   vmesa->renderPrimitive = glprim;
+
+   if (hwprim != vmesa->hwPrimitive ||
+       ctx->Light.ShadeModel != vmesa->hwShadeModel) {
+
+      VIA_FINISH_PRIM(vmesa);
+
+      /* Ensure no wrapping inside this function  */    
+      viaCheckDma( vmesa, 1024 );	
+
+      if (vmesa->newEmitState) {
+	 viaEmitState(vmesa);
+      }
+       
+      vmesa->regCmdA_End = HC_ACMD_HCmdA;
+
+      if (ctx->Light.ShadeModel == GL_SMOOTH) {
+	 vmesa->regCmdA_End |= HC_HShading_Gouraud;
+      }
+      
+      vmesa->hwShadeModel = ctx->Light.ShadeModel;
+      regCmdB = vmesa->regCmdB;
+
+      switch (hwprim) {
+      case GL_POINTS:
+	 vmesa->regCmdA_End |= HC_HPMType_Point | HC_HVCycle_Full;
+	 vmesa->regCmdA_End |= HC_HShading_Gouraud; /* always Gouraud 
+						       shade points?!? */
+	 break;
+      case GL_LINES:
+	 vmesa->regCmdA_End |= HC_HPMType_Line | HC_HVCycle_Full;
+         regCmdB |= HC_HLPrst_MASK;
+	 if (ctx->Light.ShadeModel == GL_FLAT)
+            vmesa->regCmdA_End |= HC_HShading_FlatB; 
+	 break;
+      case GL_LINE_LOOP:
+      case GL_LINE_STRIP:
+	 vmesa->regCmdA_End |= HC_HPMType_Line | HC_HVCycle_AFP |
+	    HC_HVCycle_AB | HC_HVCycle_NewB;
+	 regCmdB |= HC_HVCycle_AB | HC_HVCycle_NewB | HC_HLPrst_MASK;
+	 if (ctx->Light.ShadeModel == GL_FLAT)
+            vmesa->regCmdA_End |= HC_HShading_FlatB; 
+	 break;
+      case GL_TRIANGLES:
+	 vmesa->regCmdA_End |= HC_HPMType_Tri | HC_HVCycle_Full;
+	 if (ctx->Light.ShadeModel == GL_FLAT)
+            vmesa->regCmdA_End |= HC_HShading_FlatC; 
+	 break;
+      case GL_TRIANGLE_STRIP:
+	 vmesa->regCmdA_End |= HC_HPMType_Tri | HC_HVCycle_AFP |
+	    HC_HVCycle_AC | HC_HVCycle_BB | HC_HVCycle_NewC;
+	 regCmdB |= HC_HVCycle_AA | HC_HVCycle_BC | HC_HVCycle_NewC;
+	 if (ctx->Light.ShadeModel == GL_FLAT)
+            vmesa->regCmdA_End |= HC_HShading_FlatC; 
+	 break;
+      case GL_TRIANGLE_FAN:
+	 vmesa->regCmdA_End |= HC_HPMType_Tri | HC_HVCycle_AFP |
+	    HC_HVCycle_AA | HC_HVCycle_BC | HC_HVCycle_NewC;
+	 regCmdB |= HC_HVCycle_AA | HC_HVCycle_BC | HC_HVCycle_NewC;
+	 if (ctx->Light.ShadeModel == GL_FLAT)
+            vmesa->regCmdA_End |= HC_HShading_FlatC; 
+	 break;
+      case GL_QUADS:
+	 abort();
+	 return;
+      case GL_QUAD_STRIP:
+	 abort();
+	 return;
+      case GL_POLYGON:
+	 vmesa->regCmdA_End |= HC_HPMType_Tri | HC_HVCycle_AFP |
+	    HC_HVCycle_AA | HC_HVCycle_BC | HC_HVCycle_NewC;
+	 regCmdB |= HC_HVCycle_AA | HC_HVCycle_BC | HC_HVCycle_NewC;
+	 if (ctx->Light.ShadeModel == GL_FLAT)
+            vmesa->regCmdA_End |= HC_HShading_FlatC; 
+	 break;                          
+      default:
+	 abort();
+	 return;
+      }
+    
+/*     assert((vmesa->dmaLow & 0x4) == 0); */
+
+      if (vmesa->dmaCliprectAddr == ~0) {
+	 if (VIA_DEBUG & DEBUG_DMA) 
+	    fprintf(stderr, "reserve cliprect space at %x\n", vmesa->dmaLow);
+	 vmesa->dmaCliprectAddr = vmesa->dmaLow;
+	 BEGIN_RING(8);
+	 OUT_RING( HC_HEADER2 );    
+	 OUT_RING( (HC_ParaType_NotTex << 16) );
+	 OUT_RING( 0xCCCCCCCC );
+	 OUT_RING( 0xCCCCCCCC );
+	 OUT_RING( 0xCCCCCCCC );
+	 OUT_RING( 0xCCCCCCCC );
+	 OUT_RING( 0xCCCCCCCC );
+	 OUT_RING( 0xCCCCCCCC );
+	 ADVANCE_RING();
+      }
+
+      assert(vmesa->dmaLastPrim == 0);
+
+      BEGIN_RING(8);
+      OUT_RING( HC_HEADER2 );    
+      OUT_RING( (HC_ParaType_NotTex << 16) );
+      OUT_RING( 0xCCCCCCCC );
+      OUT_RING( 0xDDDDDDDD );
+
+      OUT_RING( HC_HEADER2 );    
+      OUT_RING( (HC_ParaType_CmdVdata << 16) );
+      OUT_RING( regCmdB );
+      OUT_RING( vmesa->regCmdA_End );
+      ADVANCE_RING();
+
+      vmesa->hwPrimitive = hwprim;        
+      vmesa->dmaLastPrim = vmesa->dmaLow;
+   }
+   else {
+      assert(!vmesa->newEmitState);
+   }
+}
+
+/* Callback for mesa:
+ */
+static void viaRenderPrimitive( GLcontext *ctx, GLuint prim )
+{
+   viaRasterPrimitive( ctx, prim, hwPrim[prim] );
+}
+
+
+void viaFinishPrimitive(struct via_context *vmesa)
+{
+   if (VIA_DEBUG & (DEBUG_DMA|DEBUG_PRIMS)) 
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!vmesa->dmaLastPrim || vmesa->dmaCliprectAddr == ~0) {
+      assert(0);
+   }
+   else if (vmesa->dmaLow != vmesa->dmaLastPrim) {
+      GLuint cmdA = (vmesa->regCmdA_End | HC_HPLEND_MASK | 
+		     HC_HPMValidN_MASK | HC_HE3Fire_MASK); 
+      RING_VARS;
+
+      vmesa->dmaLastPrim = 0;
+
+      /* KW: modified 0x1 to 0x4 below:
+       */
+      if ((vmesa->dmaLow & 0x4) || !vmesa->useAgp) {
+	 BEGIN_RING_NOCHECK( 1 );
+	 OUT_RING( cmdA );
+	 ADVANCE_RING();
+      }   
+      else {      
+	 BEGIN_RING_NOCHECK( 2 );
+	 OUT_RING( cmdA );
+	 OUT_RING( cmdA );
+	 ADVANCE_RING();
+      }   
+
+      if (vmesa->dmaLow > VIA_DMA_HIGHWATER)
+	 viaFlushDma( vmesa );
+   }
+   else {
+      if (VIA_DEBUG & (DEBUG_DMA|DEBUG_PRIMS)) 
+	 fprintf(stderr, "remove empty primitive\n");
+
+      /* Remove the primitive header:
+       */
+      vmesa->dmaLastPrim = 0;
+      vmesa->dmaLow -= 8 * sizeof(GLuint);
+
+      /* Maybe remove the cliprect as well:
+       */
+      if (vmesa->dmaCliprectAddr == vmesa->dmaLow - 8 * sizeof(GLuint)) {
+	 vmesa->dmaLow -= 8 * sizeof(GLuint);
+	 vmesa->dmaCliprectAddr = ~0;
+      }
+   }
+
+   vmesa->renderPrimitive = GL_POLYGON + 1;
+   vmesa->hwPrimitive = GL_POLYGON + 1;
+   vmesa->dmaLastPrim = 0;
+}
+
+
+/**********************************************************************/
+/*           Transition to/from hardware rasterization.               */
+/**********************************************************************/
+
+
+void viaFallback(struct via_context *vmesa, GLuint bit, GLboolean mode)
+{
+    GLcontext *ctx = vmesa->glCtx;
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    GLuint oldfallback = vmesa->Fallback;
+    
+    if (mode) {
+        vmesa->Fallback |= bit;
+        if (oldfallback == 0) {
+	    VIA_FLUSH_DMA(vmesa);
+
+ 	    if (VIA_DEBUG & DEBUG_FALLBACKS) 
+	       fprintf(stderr, "ENTER FALLBACK %x\n", bit);
+
+            _swsetup_Wakeup(ctx);
+            vmesa->renderIndex = ~0;
+        }
+    }
+    else {
+        vmesa->Fallback &= ~bit;
+        if (oldfallback == bit) {
+	    _swrast_flush( ctx );
+
+ 	    if (VIA_DEBUG & DEBUG_FALLBACKS) 
+	       fprintf(stderr, "LEAVE FALLBACK %x\n", bit);
+
+	    tnl->Driver.Render.Start = viaRenderStart;
+            tnl->Driver.Render.PrimitiveNotify = viaRenderPrimitive;
+            tnl->Driver.Render.Finish = viaRenderFinish;
+
+	    tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+	    tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+	    tnl->Driver.Render.Interp = _tnl_interp;
+    	    tnl->Driver.Render.ResetLineStipple = viaResetLineStipple;
+
+	    _tnl_invalidate_vertex_state( ctx, ~0 );
+	    _tnl_invalidate_vertices( ctx, ~0 );
+	    _tnl_install_attrs( ctx, 
+				vmesa->vertex_attrs, 
+				vmesa->vertex_attr_count,
+				vmesa->ViewportMatrix.m, 0 ); 
+
+            vmesa->newState |= (_VIA_NEW_RENDERSTATE|_VIA_NEW_VERTEX);
+        }
+    }    
+}
+
+static void viaRunPipeline( GLcontext *ctx )
+{
+   struct via_context *vmesa = VIA_CONTEXT(ctx);
+
+   if (vmesa->newState) {
+      vmesa->newRenderState |= vmesa->newState;
+      viaValidateState( ctx );
+   }
+
+   _tnl_run_pipeline( ctx );
+}
+
+
+/**********************************************************************/
+/*                            Initialization.                         */
+/**********************************************************************/
+
+
+void viaInitTriFuncs(GLcontext *ctx)
+{
+    struct via_context *vmesa = VIA_CONTEXT(ctx);
+    TNLcontext *tnl = TNL_CONTEXT(ctx);
+    static int firsttime = 1;
+
+    if (firsttime) {
+        init_rast_tab();
+        firsttime = 0;
+    }
+
+    tnl->Driver.RunPipeline = viaRunPipeline;
+    tnl->Driver.Render.Start = viaRenderStart;
+    tnl->Driver.Render.Finish = viaRenderFinish;
+    tnl->Driver.Render.PrimitiveNotify = viaRenderPrimitive;
+    tnl->Driver.Render.ResetLineStipple = viaResetLineStipple;
+    tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+    tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+    tnl->Driver.Render.Interp = _tnl_interp;
+
+    _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 
+			(6 + 2*ctx->Const.MaxTextureUnits) * sizeof(GLfloat) );
+   
+    vmesa->verts = (GLubyte *)tnl->clipspace.vertex_buf;
+
+}
diff --git a/src/mesa/drivers/dri/unichrome/via_tris.h b/src/mesa/drivers/dri/unichrome/via_tris.h
new file mode 100644
index 0000000000..bc6ef4e4eb
--- /dev/null
+++ b/src/mesa/drivers/dri/unichrome/via_tris.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * VIA, S3 GRAPHICS, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _VIATRIS_H
+#define _VIATRIS_H
+
+#include "main/mtypes.h"
+
+extern void viaPrintRenderState(const char *msg, GLuint state);
+extern void viaInitTriFuncs(GLcontext *ctx);
+extern void viaRasterPrimitive(GLcontext *ctx, GLenum rPrim, GLuint hwPrim);
+extern void viaRasterPrimitiveFinish(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/fbdev/Makefile b/src/mesa/drivers/fbdev/Makefile
new file mode 100644
index 0000000000..5120e1ac9e
--- /dev/null
+++ b/src/mesa/drivers/fbdev/Makefile
@@ -0,0 +1,36 @@
+# src/mesa/drivers/fbdev/Makefile for libGL.so
+
+TOP = ../../../..
+
+include $(TOP)/configs/current
+
+
+SOURCES = glfbdev.c
+
+OBJECTS = $(SOURCES:.c=.o)
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mapi \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main
+
+CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a
+
+
+.c.o:
+	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
+
+default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
+
+
+$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(CORE_MESA) $(OBJECTS)
+	@ $(MKLIB) -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
+		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
+		$(CORE_MESA) $(OBJECTS) $(GL_LIB_DEPS)
+
+
+clean:
+	-rm -f $(OBJECTS)
diff --git a/src/mesa/drivers/fbdev/glfbdev.c b/src/mesa/drivers/fbdev/glfbdev.c
new file mode 100644
index 0000000000..2ad52d89fc
--- /dev/null
+++ b/src/mesa/drivers/fbdev/glfbdev.c
@@ -0,0 +1,828 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * OpenGL (Mesa) interface for fbdev.
+ * For info about fbdev:
+ * http://www.tldp.org/HOWTO/Framebuffer-HOWTO.html
+ *
+ * known VGA modes
+ * Colours   640x400 640x480 800x600 1024x768 1152x864 1280x1024 1600x1200
+ * --------+--------------------------------------------------------------
+ *  4 bits |    ?       ?     0x302      ?        ?        ?         ?
+ *  8 bits |  0x300   0x301   0x303    0x305    0x161    0x307     0x31C
+ * 15 bits |    ?     0x310   0x313    0x316    0x162    0x319     0x31D
+ * 16 bits |    ?     0x311   0x314    0x317    0x163    0x31A     0x31E
+ * 24 bits |    ?     0x312   0x315    0x318      ?      0x31B     0x31F
+ * 32 bits |    ?       ?       ?        ?      0x164      ?
+ */
+
+#ifdef USE_GLFBDEV_DRIVER
+
+#include "GL/glfbdev.h"
+#include <linux/fb.h>
+#include "main/glheader.h"
+#include "main/buffers.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/fbobject.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/renderbuffer.h"
+#include "main/texformat.h"
+#include "main/teximage.h"
+#include "main/texstore.h"
+#include "vbo/vbo.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "drivers/common/driverfuncs.h"
+
+
+/**
+ * Pixel formats we support:
+ */
+#define PF_B8G8R8     1
+#define PF_B8G8R8A8   2
+#define PF_B5G6R5     3
+#define PF_B5G5R5     4
+
+
+/**
+ * Derived from Mesa's GLvisual class.
+ */
+struct GLFBDevVisualRec {
+   GLvisual glvisual;              /* base class */
+   struct fb_fix_screeninfo fix;
+   struct fb_var_screeninfo var;
+   int pixelFormat;
+};
+
+/**
+ * Derived from Mesa's GLframebuffer class.
+ */
+struct GLFBDevBufferRec {
+   GLframebuffer glframebuffer;    /* base class */
+   GLFBDevVisualPtr visual;
+   struct fb_fix_screeninfo fix;
+   struct fb_var_screeninfo var;
+   size_t size;                    /* color buffer size in bytes */
+   GLuint bytesPerPixel;
+};
+
+/**
+ * Derived from Mesa's GLcontext class.
+ */
+struct GLFBDevContextRec {
+   GLcontext glcontext;            /* base class */
+   GLFBDevVisualPtr visual;
+   GLFBDevBufferPtr drawBuffer;
+   GLFBDevBufferPtr readBuffer;
+   GLFBDevBufferPtr curBuffer;
+};
+
+/**
+ * Derived from Mesa's gl_renderbuffer class.
+ */
+struct GLFBDevRenderbufferRec {
+   struct gl_renderbuffer Base;
+   GLubyte *bottom;                /* pointer to last row */
+   GLuint rowStride;               /* in bytes */
+   GLboolean mallocedBuffer;
+};
+
+
+/**********************************************************************/
+/* Internal device driver functions                                   */
+/**********************************************************************/
+
+
+static const GLubyte *
+get_string(GLcontext *ctx, GLenum pname)
+{
+   (void) ctx;
+   switch (pname) {
+      case GL_RENDERER:
+         return (const GLubyte *) "Mesa glfbdev";
+      default:
+         return NULL;
+   }
+}
+
+
+static void
+update_state( GLcontext *ctx, GLuint new_state )
+{
+   /* not much to do here - pass it on */
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+}
+
+
+static void
+get_buffer_size( GLframebuffer *buffer, GLuint *width, GLuint *height )
+{
+   const GLFBDevBufferPtr fbdevbuffer = (GLFBDevBufferPtr) buffer;
+   *width = fbdevbuffer->var.xres;
+   *height = fbdevbuffer->var.yres;
+}
+
+
+/**
+ * We only implement this function as a mechanism to check if the
+ * framebuffer size has changed (and update corresponding state).
+ */
+static void
+viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   GLuint newWidth, newHeight;
+   GLframebuffer *buffer;
+
+   buffer = ctx->WinSysDrawBuffer;
+   get_buffer_size( buffer, &newWidth, &newHeight );
+   if (buffer->Width != newWidth || buffer->Height != newHeight) {
+      _mesa_resize_framebuffer(ctx, buffer, newWidth, newHeight );
+   }
+
+   buffer = ctx->WinSysReadBuffer;
+   get_buffer_size( buffer, &newWidth, &newHeight );
+   if (buffer->Width != newWidth || buffer->Height != newHeight) {
+      _mesa_resize_framebuffer(ctx, buffer, newWidth, newHeight );
+   }
+}
+
+
+/*
+ * Generate code for span functions.
+ */
+
+/* 24-bit BGR */
+#define NAME(PREFIX) PREFIX##_B8G8R8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb;
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = frb->bottom - (Y) * frb->rowStride + (X) * 3
+#define INC_PIXEL_PTR(P) P += 3
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = VALUE[BCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[RCOMP]
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = CHAN_MAX
+
+#include "swrast/s_spantemp.h"
+
+
+/* 32-bit BGRA */
+#define NAME(PREFIX) PREFIX##_B8G8R8A8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb;
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = frb->bottom - (Y) * frb->rowStride + (X) * 4
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = VALUE[BCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[RCOMP];  \
+   DST[3] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[0] = VALUE[BCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[RCOMP];
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = SRC[3]
+
+#include "swrast/s_spantemp.h"
+
+
+/* 16-bit BGR (XXX implement dithering someday) */
+#define NAME(PREFIX) PREFIX##_B5G6R5
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb;
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) (frb->bottom - (Y) * frb->rowStride + (X) * 2)
+#define INC_PIXEL_PTR(P) P += 1
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = ( (((VALUE[RCOMP]) & 0xf8) << 8) | (((VALUE[GCOMP]) & 0xfc) << 3) | ((VALUE[BCOMP]) >> 3) )
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = ( (((SRC[0]) >> 8) & 0xf8) | (((SRC[0]) >> 11) & 0x7) ); \
+   DST[GCOMP] = ( (((SRC[0]) >> 3) & 0xfc) | (((SRC[0]) >>  5) & 0x3) ); \
+   DST[BCOMP] = ( (((SRC[0]) << 3) & 0xf8) | (((SRC[0])      ) & 0x7) ); \
+   DST[ACOMP] = CHAN_MAX
+
+#include "swrast/s_spantemp.h"
+
+
+/* 15-bit BGR (XXX implement dithering someday) */
+#define NAME(PREFIX) PREFIX##_B5G5R5
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb;
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) (frb->bottom - (Y) * frb->rowStride + (X) * 2)
+#define INC_PIXEL_PTR(P) P += 1
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = ( (((VALUE[RCOMP]) & 0xf8) << 7) | (((VALUE[GCOMP]) & 0xf8) << 2) | ((VALUE[BCOMP]) >> 3) )
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = ( (((SRC[0]) >> 7) & 0xf8) | (((SRC[0]) >> 10) & 0x7) ); \
+   DST[GCOMP] = ( (((SRC[0]) >> 2) & 0xf8) | (((SRC[0]) >>  5) & 0x7) ); \
+   DST[BCOMP] = ( (((SRC[0]) << 3) & 0xf8) | (((SRC[0])      ) & 0x7) ); \
+   DST[ACOMP] = CHAN_MAX
+
+#include "swrast/s_spantemp.h"
+
+
+/**********************************************************************/
+/* Public API functions                                               */
+/**********************************************************************/
+
+
+const char *
+glFBDevGetString( int str )
+{
+   switch (str) {
+   case GLFBDEV_VENDOR:
+      return "Mesa Project";
+   case GLFBDEV_VERSION:
+      return "1.0.1";
+   default:
+      return NULL;
+   }
+}
+
+
+GLFBDevProc
+glFBDevGetProcAddress( const char *procName )
+{
+   struct name_address {
+      const char *name;
+      const GLFBDevProc func;
+   };
+   static const struct name_address functions[] = {
+      { "glFBDevGetString", (GLFBDevProc) glFBDevGetString },
+      { "glFBDevGetProcAddress", (GLFBDevProc) glFBDevGetProcAddress },
+      { "glFBDevCreateVisual", (GLFBDevProc) glFBDevCreateVisual },
+      { "glFBDevDestroyVisual", (GLFBDevProc) glFBDevDestroyVisual },
+      { "glFBDevGetVisualAttrib", (GLFBDevProc) glFBDevGetVisualAttrib },
+      { "glFBDevCreateBuffer", (GLFBDevProc) glFBDevCreateBuffer },
+      { "glFBDevDestroyBuffer", (GLFBDevProc) glFBDevDestroyBuffer },
+      { "glFBDevGetBufferAttrib", (GLFBDevProc) glFBDevGetBufferAttrib },
+      { "glFBDevGetCurrentDrawBuffer", (GLFBDevProc) glFBDevGetCurrentDrawBuffer },
+      { "glFBDevGetCurrentReadBuffer", (GLFBDevProc) glFBDevGetCurrentReadBuffer },
+      { "glFBDevSwapBuffers", (GLFBDevProc) glFBDevSwapBuffers },
+      { "glFBDevCreateContext", (GLFBDevProc) glFBDevCreateContext },
+      { "glFBDevDestroyContext", (GLFBDevProc) glFBDevDestroyContext },
+      { "glFBDevGetContextAttrib", (GLFBDevProc) glFBDevGetContextAttrib },
+      { "glFBDevGetCurrentContext", (GLFBDevProc) glFBDevGetCurrentContext },
+      { "glFBDevMakeCurrent", (GLFBDevProc) glFBDevMakeCurrent },
+      { NULL, NULL }
+   };
+   const struct name_address *entry;
+   for (entry = functions; entry->name; entry++) {
+      if (strcmp(entry->name, procName) == 0) {
+         return entry->func;
+      }
+   }
+   return _glapi_get_proc_address(procName);
+}
+
+
+GLFBDevVisualPtr
+glFBDevCreateVisual( const struct fb_fix_screeninfo *fixInfo,
+                     const struct fb_var_screeninfo *varInfo,
+                     const int *attribs )
+{
+   GLFBDevVisualPtr vis;
+   const int *attrib;
+   GLboolean dbFlag = GL_FALSE, stereoFlag = GL_FALSE;
+   GLint redBits = 0, greenBits = 0, blueBits = 0, alphaBits = 0;
+   GLint depthBits = 0, stencilBits = 0;
+   GLint accumRedBits = 0, accumGreenBits = 0;
+   GLint accumBlueBits = 0, accumAlphaBits = 0;
+   GLint numSamples = 0;
+
+   ASSERT(fixInfo);
+   ASSERT(varInfo);
+
+   vis = CALLOC_STRUCT(GLFBDevVisualRec);
+   if (!vis)
+      return NULL;
+
+   vis->fix = *fixInfo;  /* struct assignment */
+   vis->var = *varInfo;  /* struct assignment */
+
+   for (attrib = attribs; attrib && *attrib != GLFBDEV_NONE; attrib++) {
+      switch (*attrib) {
+      case GLFBDEV_DOUBLE_BUFFER:
+         dbFlag = GL_TRUE;
+         break;
+      case GLFBDEV_DEPTH_SIZE:
+         depthBits = attrib[1];
+         attrib++;
+         break;
+      case GLFBDEV_STENCIL_SIZE:
+         stencilBits = attrib[1];
+         attrib++;
+         break;
+      case GLFBDEV_ACCUM_SIZE:
+         accumRedBits = accumGreenBits = accumBlueBits = accumAlphaBits
+            = attrib[1];
+         attrib++;
+         break;
+      case GLFBDEV_LEVEL:
+         /* ignored for now */
+         break;
+      case GLFBDEV_MULTISAMPLE:
+         numSamples = attrib[1];
+         attrib++;
+         break;
+      case GLFBDEV_COLOR_INDEX:
+         /* Mesa no longer supports color-index rendering. */
+      default:
+         /* unexpected token */
+         free(vis);
+         return NULL;
+      }
+   }
+
+   redBits   = varInfo->red.length;
+   greenBits = varInfo->green.length;
+   blueBits  = varInfo->blue.length;
+   alphaBits = varInfo->transp.length;
+
+   if (fixInfo->visual == FB_VISUAL_TRUECOLOR ||
+       fixInfo->visual == FB_VISUAL_DIRECTCOLOR) {
+      if (varInfo->bits_per_pixel == 24
+          && varInfo->red.offset == 16
+          && varInfo->green.offset == 8
+          && varInfo->blue.offset == 0) {
+         vis->pixelFormat = PF_B8G8R8;
+      }
+      else if (varInfo->bits_per_pixel == 32
+               && varInfo->red.offset == 16
+               && varInfo->green.offset == 8
+               && varInfo->blue.offset == 0) {
+         vis->pixelFormat = PF_B8G8R8A8;
+      }
+      else if (varInfo->bits_per_pixel == 16
+               && varInfo->red.offset == 11
+               && varInfo->green.offset == 5
+               && varInfo->blue.offset == 0) {
+         vis->pixelFormat = PF_B5G6R5;
+      }
+      else if (varInfo->bits_per_pixel == 16
+               && varInfo->red.offset == 10
+               && varInfo->green.offset == 5
+               && varInfo->blue.offset == 0) {
+         vis->pixelFormat = PF_B5G5R5;
+      }
+      else {
+         _mesa_problem(NULL, "Unsupported fbdev RGB visual/bitdepth!\n");
+         free(vis);
+         return NULL;
+      }
+   }
+
+   if (!_mesa_initialize_visual(&vis->glvisual, dbFlag, stereoFlag,
+                                redBits, greenBits, blueBits, alphaBits,
+                                depthBits, stencilBits,
+                                accumRedBits, accumGreenBits,
+                                accumBlueBits, accumAlphaBits,
+                                numSamples)) {
+      /* something was invalid */
+      free(vis);
+      return NULL;
+   }
+
+   return vis;
+}
+
+
+void
+glFBDevDestroyVisual( GLFBDevVisualPtr visual )
+{
+   if (visual)
+      free(visual);
+}
+
+
+int
+glFBDevGetVisualAttrib( const GLFBDevVisualPtr visual, int attrib)
+{
+   /* XXX unfinished */
+   (void) visual;
+   (void) attrib;
+   return -1;
+}
+
+
+static void
+delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+   struct GLFBDevRenderbufferRec *frb = (struct GLFBDevRenderbufferRec *) rb;
+   if (frb->mallocedBuffer) {
+      free(frb->Base.Data);
+   }
+   free(frb);
+}
+
+
+static GLboolean
+renderbuffer_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+                     GLenum internalFormat, GLuint width, GLuint height)
+{
+   /* no-op: the renderbuffer storage is allocated just once when it's
+    * created.  Never resized or reallocated.
+    */
+   return GL_TRUE;
+}
+
+
+static struct GLFBDevRenderbufferRec *
+new_glfbdev_renderbuffer(void *bufferStart, const GLFBDevVisualPtr visual)
+{
+   struct GLFBDevRenderbufferRec *rb = CALLOC_STRUCT(GLFBDevRenderbufferRec);
+   if (rb) {
+      GLuint name = 0;
+      int pixelFormat = visual->pixelFormat;
+
+      _mesa_init_renderbuffer(&rb->Base, name);
+
+      rb->Base.Delete = delete_renderbuffer;
+      rb->Base.AllocStorage = renderbuffer_storage;
+
+      if (pixelFormat == PF_B8G8R8) {
+         rb->Base.GetRow = get_row_B8G8R8;
+         rb->Base.GetValues = get_values_B8G8R8;
+         rb->Base.PutRow = put_row_B8G8R8;
+         rb->Base.PutRowRGB = put_row_rgb_B8G8R8;
+         rb->Base.PutMonoRow = put_mono_row_B8G8R8;
+         rb->Base.PutValues = put_values_B8G8R8;
+         rb->Base.PutMonoValues = put_mono_values_B8G8R8;
+      }
+      else if (pixelFormat == PF_B8G8R8A8) {
+         rb->Base.GetRow = get_row_B8G8R8A8;
+         rb->Base.GetValues = get_values_B8G8R8A8;
+         rb->Base.PutRow = put_row_B8G8R8A8;
+         rb->Base.PutRowRGB = put_row_rgb_B8G8R8A8;
+         rb->Base.PutMonoRow = put_mono_row_B8G8R8A8;
+         rb->Base.PutValues = put_values_B8G8R8A8;
+         rb->Base.PutMonoValues = put_mono_values_B8G8R8A8;
+      }
+      else if (pixelFormat == PF_B5G6R5) {
+         rb->Base.GetRow = get_row_B5G6R5;
+         rb->Base.GetValues = get_values_B5G6R5;
+         rb->Base.PutRow = put_row_B5G6R5;
+         rb->Base.PutRowRGB = put_row_rgb_B5G6R5;
+         rb->Base.PutMonoRow = put_mono_row_B5G6R5;
+         rb->Base.PutValues = put_values_B5G6R5;
+         rb->Base.PutMonoValues = put_mono_values_B5G6R5;
+      }
+      else if (pixelFormat == PF_B5G5R5) {
+         rb->Base.GetRow = get_row_B5G5R5;
+         rb->Base.GetValues = get_values_B5G5R5;
+         rb->Base.PutRow = put_row_B5G5R5;
+         rb->Base.PutRowRGB = put_row_rgb_B5G5R5;
+         rb->Base.PutMonoRow = put_mono_row_B5G5R5;
+         rb->Base.PutValues = put_values_B5G5R5;
+         rb->Base.PutMonoValues = put_mono_values_B5G5R5;
+      }
+
+      rb->Base.InternalFormat = GL_RGBA;
+      rb->Base._BaseFormat = GL_RGBA;
+      rb->Base.DataType = GL_UNSIGNED_BYTE;
+      rb->Base.Data = bufferStart;
+
+      rb->rowStride = visual->var.xres_virtual * visual->var.bits_per_pixel / 8;
+      rb->bottom = (GLubyte *) bufferStart
+                 + (visual->var.yres - 1) * rb->rowStride;
+
+      rb->Base.Width = visual->var.xres;
+      rb->Base.Height = visual->var.yres;
+
+      /*
+      rb->Base.RedBits = visual->var.red.length;
+      rb->Base.GreenBits = visual->var.green.length;
+      rb->Base.BlueBits = visual->var.blue.length;
+      rb->Base.AlphaBits = visual->var.transp.length;
+      */
+
+      rb->Base.InternalFormat = pixelFormat;
+   }
+   return rb;
+}
+
+GLFBDevBufferPtr
+glFBDevCreateBuffer( const struct fb_fix_screeninfo *fixInfo,
+                     const struct fb_var_screeninfo *varInfo,
+                     const GLFBDevVisualPtr visual,
+                     void *frontBuffer, void *backBuffer, size_t size )
+{
+   struct GLFBDevRenderbufferRec *frontrb, *backrb;
+   GLFBDevBufferPtr buf;
+
+   ASSERT(visual);
+   ASSERT(frontBuffer);
+   ASSERT(size > 0);
+
+   /* this is to update the visual if there was a resize and the
+      buffer is created again */
+   visual->var = *varInfo;
+   visual->fix = *fixInfo;
+
+   if (visual->fix.visual != fixInfo->visual ||
+       visual->fix.type != fixInfo->type ||
+       visual->var.bits_per_pixel != varInfo->bits_per_pixel ||
+       visual->var.grayscale != varInfo->grayscale ||
+       visual->var.red.offset != varInfo->red.offset ||
+       visual->var.green.offset != varInfo->green.offset ||
+       visual->var.blue.offset != varInfo->blue.offset ||
+       visual->var.transp.offset != varInfo->transp.offset) {
+      /* visual mismatch! */
+      return NULL;
+   }
+
+   buf = CALLOC_STRUCT(GLFBDevBufferRec);
+   if (!buf)
+      return NULL;
+
+   /* basic framebuffer setup */
+   _mesa_initialize_window_framebuffer(&buf->glframebuffer, &visual->glvisual);
+   /* add front renderbuffer */
+   frontrb = new_glfbdev_renderbuffer(frontBuffer, visual);
+   _mesa_add_renderbuffer(&buf->glframebuffer, BUFFER_FRONT_LEFT,
+                          &frontrb->Base);
+   /* add back renderbuffer */
+   if (visual->glvisual.doubleBufferMode) {
+      const int malloced = !backBuffer;
+      if (malloced) {
+         /* malloc a back buffer */
+         backBuffer = malloc(size);
+         if (!backBuffer) {
+            _mesa_free_framebuffer_data(&buf->glframebuffer);
+            free(buf);
+            return NULL;
+         }
+      }
+
+      backrb = new_glfbdev_renderbuffer(backBuffer, visual);
+      if (!backrb) {
+         /* out of mem */
+         return NULL;
+      }
+      backrb->mallocedBuffer = malloced;
+
+      _mesa_add_renderbuffer(&buf->glframebuffer, BUFFER_BACK_LEFT,
+                             &backrb->Base);
+   }
+   /* add software renderbuffers */
+   _mesa_add_soft_renderbuffers(&buf->glframebuffer,
+                                GL_FALSE, /* color */
+                                visual->glvisual.haveDepthBuffer,
+                                visual->glvisual.haveStencilBuffer,
+                                visual->glvisual.haveAccumBuffer,
+                                GL_FALSE, /* alpha */
+                                GL_FALSE /* aux bufs */);
+
+   buf->fix = *fixInfo;   /* struct assignment */
+   buf->var = *varInfo;   /* struct assignment */
+   buf->visual = visual;  /* ptr assignment */
+   buf->size = size;
+   buf->bytesPerPixel = visual->var.bits_per_pixel / 8;
+
+   return buf;
+}
+
+
+void
+glFBDevDestroyBuffer( GLFBDevBufferPtr buffer )
+{
+   if (buffer) {
+      /* check if destroying the current buffer */
+      GLFBDevBufferPtr curDraw = glFBDevGetCurrentDrawBuffer();
+      GLFBDevBufferPtr curRead = glFBDevGetCurrentReadBuffer();
+      if (buffer == curDraw || buffer == curRead) {
+         glFBDevMakeCurrent( NULL, NULL, NULL);
+      }
+      {
+         struct gl_framebuffer *fb = &buffer->glframebuffer;
+         _mesa_reference_framebuffer(&fb, NULL);
+      }
+   }
+}
+
+
+int
+glFBDevGetBufferAttrib( const GLFBDevBufferPtr buffer, int attrib)
+{
+   (void) buffer;
+   (void) attrib;
+   return -1;
+}
+
+
+GLFBDevBufferPtr
+glFBDevGetCurrentDrawBuffer( void )
+{
+   GLFBDevContextPtr fbdevctx = glFBDevGetCurrentContext();
+   if (fbdevctx)
+      return fbdevctx->drawBuffer;
+   else
+      return NULL;
+}
+
+
+GLFBDevBufferPtr
+glFBDevGetCurrentReadBuffer( void )
+{
+   GLFBDevContextPtr fbdevctx = glFBDevGetCurrentContext();
+   if (fbdevctx)
+      return fbdevctx->readBuffer;
+   else
+      return NULL;
+}
+
+
+void
+glFBDevSwapBuffers( GLFBDevBufferPtr buffer )
+{
+   GLFBDevContextPtr fbdevctx = glFBDevGetCurrentContext();
+   struct GLFBDevRenderbufferRec *frontrb = (struct GLFBDevRenderbufferRec *)
+      buffer->glframebuffer.Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+   struct GLFBDevRenderbufferRec *backrb = (struct GLFBDevRenderbufferRec *)
+      buffer->glframebuffer.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+
+   if (!buffer || !buffer->visual->glvisual.doubleBufferMode)
+      return;
+
+   /* check if swapping currently bound buffer */
+   if (fbdevctx->drawBuffer == buffer) {
+      /* flush pending rendering */
+      _mesa_notifySwapBuffers(&fbdevctx->glcontext);
+   }
+
+   ASSERT(frontrb->Base.Data);
+   ASSERT(backrb->Base.Data);
+   memcpy(frontrb->Base.Data, backrb->Base.Data, buffer->size);
+}
+
+
+GLFBDevContextPtr
+glFBDevCreateContext( const GLFBDevVisualPtr visual, GLFBDevContextPtr share )
+{
+   GLFBDevContextPtr ctx;
+   GLcontext *glctx;
+   struct dd_function_table functions;
+
+   ASSERT(visual);
+
+   ctx = CALLOC_STRUCT(GLFBDevContextRec);
+   if (!ctx)
+      return NULL;
+
+   /* build table of device driver functions */
+   _mesa_init_driver_functions(&functions);
+   functions.GetString = get_string;
+   functions.UpdateState = update_state;
+   functions.GetBufferSize = get_buffer_size;
+   functions.Viewport = viewport;
+
+   if (!_mesa_initialize_context(&ctx->glcontext, &visual->glvisual,
+                                 share ? &share->glcontext : NULL,
+                                 &functions, (void *) ctx)) {
+      free(ctx);
+      return NULL;
+   }
+
+   ctx->visual = visual;
+
+   /* Create module contexts */
+   glctx = (GLcontext *) &ctx->glcontext;
+   _swrast_CreateContext( glctx );
+   _vbo_CreateContext( glctx );
+   _tnl_CreateContext( glctx );
+   _swsetup_CreateContext( glctx );
+   _swsetup_Wakeup( glctx );
+
+   /* use default TCL pipeline */
+   {
+      TNLcontext *tnl = TNL_CONTEXT(glctx);
+      tnl->Driver.RunPipeline = _tnl_run_pipeline;
+   }
+
+   _mesa_enable_sw_extensions(glctx);
+   _mesa_enable_1_3_extensions(glctx);
+   _mesa_enable_1_4_extensions(glctx);
+   _mesa_enable_1_5_extensions(glctx);
+   _mesa_enable_2_0_extensions(glctx);
+   _mesa_enable_2_1_extensions(glctx);
+
+   return ctx;
+}
+
+
+void
+glFBDevDestroyContext( GLFBDevContextPtr context )
+{
+   GLFBDevContextPtr fbdevctx = glFBDevGetCurrentContext();
+
+   if (context) {
+      GLcontext *mesaCtx = &context->glcontext;
+
+      _swsetup_DestroyContext( mesaCtx );
+      _swrast_DestroyContext( mesaCtx );
+      _tnl_DestroyContext( mesaCtx );
+      _vbo_DestroyContext( mesaCtx );
+
+      if (fbdevctx == context) {
+         /* destroying current context */
+         _mesa_make_current(NULL, NULL, NULL);
+      }
+      _mesa_free_context_data(&context->glcontext);
+      free(context);
+   }
+}
+
+
+int
+glFBDevGetContextAttrib( const GLFBDevContextPtr context, int attrib)
+{
+   (void) context;
+   (void) attrib;
+   return -1;
+}
+
+
+GLFBDevContextPtr
+glFBDevGetCurrentContext( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   return (GLFBDevContextPtr) ctx;
+}
+
+
+int
+glFBDevMakeCurrent( GLFBDevContextPtr context,
+                    GLFBDevBufferPtr drawBuffer,
+                    GLFBDevBufferPtr readBuffer )
+{
+   if (context && drawBuffer && readBuffer) {
+      /* Make sure the context's visual and the buffers' visuals match.
+       * XXX we might do this by comparing specific fields like bits_per_pixel,
+       * visual, etc. in the future.
+       */
+      if (context->visual != drawBuffer->visual ||
+          context->visual != readBuffer->visual) {
+         return 0;
+      }
+      _mesa_make_current( &context->glcontext,
+                          &drawBuffer->glframebuffer,
+                          &readBuffer->glframebuffer );
+      context->drawBuffer = drawBuffer;
+      context->readBuffer = readBuffer;
+      context->curBuffer = drawBuffer;
+   }
+   else {
+      /* unbind */
+      _mesa_make_current( NULL, NULL, NULL );
+   }
+
+   return 1;
+}
+
+#endif /* USE_GLFBDEV_DRIVER */
diff --git a/src/mesa/drivers/glslcompiler/Makefile b/src/mesa/drivers/glslcompiler/Makefile
new file mode 100644
index 0000000000..6da9f93f59
--- /dev/null
+++ b/src/mesa/drivers/glslcompiler/Makefile
@@ -0,0 +1,43 @@
+# Makefile for stand-alone GL-SL compiler
+
+TOP = ../../../..
+
+include $(TOP)/configs/current
+
+
+PROGRAM = glslcompiler
+
+OBJECTS = \
+	glslcompiler.o \
+	../common/driverfuncs.o \
+	../../libmesa.a \
+	$(TOP)/src/mapi/glapi/libglapi.a
+
+INCLUDES = \
+	-I$(TOP)/include \
+	-I$(TOP)/include/GL/internal \
+	-I$(TOP)/src/mapi \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main \
+	-I$(TOP)/src/mesa/glapi \
+	-I$(TOP)/src/mesa/math \
+	-I$(TOP)/src/mesa/transform \
+	-I$(TOP)/src/mesa/shader \
+	-I$(TOP)/src/mesa/swrast \
+	-I$(TOP)/src/mesa/swrast_setup \
+
+
+default: $(PROGRAM)
+	$(INSTALL) $(PROGRAM) $(TOP)/bin
+
+
+glslcompiler: $(OBJECTS)
+	$(CC) $(OBJECTS) $(GL_LIB_DEPS) -o $@
+
+
+glslcompiler.o: glslcompiler.c
+	$(CC) -c $(INCLUDES) $(CFLAGS) glslcompiler.c -o $@
+
+
+clean:
+	-rm -f *.o *~ $(PROGRAM)
diff --git a/src/mesa/drivers/glslcompiler/glslcompiler.c b/src/mesa/drivers/glslcompiler/glslcompiler.c
new file mode 100644
index 0000000000..d58f32b293
--- /dev/null
+++ b/src/mesa/drivers/glslcompiler/glslcompiler.c
@@ -0,0 +1,416 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.3
+ *
+ * Copyright (C) 1999-2007  Brian Paul, Tungsten Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \mainpage
+ *
+ * Stand-alone Shading Language compiler.  
+ * Basically, a command-line program which accepts GLSL shaders and emits
+ * vertex/fragment programs (GPU instructions).
+ *
+ * This file is basically just a Mesa device driver but instead of building
+ * a shared library we build an executable.
+ *
+ * We can emit programs in three different formats:
+ *  1. ARB-style (GL_ARB_vertex/fragment_program)
+ *  2. NV-style (GL_NV_vertex/fragment_program)
+ *  3. debug-style (a slightly more sophisticated, internal format)
+ *
+ * Note that the ARB and NV program languages can't express all the
+ * features that might be used by a fragment program (examples being
+ * uniform and varying vars).  So, the ARB/NV programs that are
+ * emitted aren't always legal programs in those languages.
+ */
+
+
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/framebuffer.h"
+#include "main/shaders.h"
+#include "shader/shader_api.h"
+#include "shader/prog_print.h"
+#include "drivers/common/driverfuncs.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "swrast/swrast.h"
+#include "swrast/s_context.h"
+#include "swrast/s_triangle.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+
+static const char *Prog = "glslcompiler";
+
+
+struct options {
+   GLboolean LineNumbers;
+   GLboolean Link;
+   gl_prog_print_mode Mode;
+   const char *VertFile;
+   const char *FragFile;
+   const char *OutputFile;
+   GLboolean Params;
+   struct gl_sl_pragmas Pragmas;
+};
+
+static struct options Options;
+
+
+/**
+ * GLSL compiler driver context. (kind of an artificial thing for now)
+ */
+struct compiler_context
+{
+   GLcontext MesaContext;
+   int foo;
+};
+
+typedef struct compiler_context CompilerContext;
+
+
+
+static void
+UpdateState(GLcontext *ctx, GLuint new_state)
+{
+   /* easy - just propogate */
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+}
+
+
+
+static GLboolean
+CreateContext(void)
+{
+   struct dd_function_table ddFuncs;
+   GLvisual *vis;
+   GLframebuffer *buf;
+   GLcontext *ctx;
+   CompilerContext *cc;
+
+   vis = _mesa_create_visual(GL_FALSE, GL_FALSE, /* RGB */
+                             8, 8, 8, 8,  /* color */
+                             0, 0,  /* z, stencil */
+                             0, 0, 0, 0, 1);  /* accum */
+   buf = _mesa_create_framebuffer(vis);
+
+   cc = calloc(1, sizeof(*cc));
+   if (!vis || !buf || !cc) {
+      if (vis)
+         _mesa_destroy_visual(vis);
+      if (buf)
+         _mesa_destroy_framebuffer(buf);
+      return GL_FALSE;
+   }
+
+   _mesa_init_driver_functions(&ddFuncs);
+   ddFuncs.GetString = NULL;/*get_string;*/
+   ddFuncs.UpdateState = UpdateState;
+   ddFuncs.GetBufferSize = NULL;
+
+   ctx = &cc->MesaContext;
+   _mesa_initialize_context(ctx, vis, NULL, &ddFuncs, cc);
+   _mesa_enable_sw_extensions(ctx);
+
+   if (!_swrast_CreateContext( ctx ) ||
+       !_vbo_CreateContext( ctx ) ||
+       !_tnl_CreateContext( ctx ) ||
+       !_swsetup_CreateContext( ctx )) {
+      _mesa_destroy_visual(vis);
+      _mesa_free_context_data(ctx);
+      free(cc);
+      return GL_FALSE;
+   }
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+   _swsetup_Wakeup( ctx );
+
+   /* Override the context's default pragma settings */
+   ctx->Shader.DefaultPragmas = Options.Pragmas;
+
+   _mesa_make_current(ctx, buf, buf);
+
+   return GL_TRUE;
+}
+
+
+static void
+LoadAndCompileShader(GLuint shader, const char *text)
+{
+   GLint stat;
+   _mesa_ShaderSourceARB(shader, 1, (const GLchar **) &text, NULL);
+   _mesa_CompileShaderARB(shader);
+   _mesa_GetShaderiv(shader, GL_COMPILE_STATUS, &stat);
+   if (!stat) {
+      GLchar log[1000];
+      GLsizei len;
+      _mesa_GetShaderInfoLog(shader, 1000, &len, log);
+      fprintf(stderr, "%s: problem compiling shader: %s\n", Prog, log);
+      exit(1);
+   }
+   else {
+      printf("Shader compiled OK\n");
+   }
+}
+
+
+/**
+ * Read a shader from a file.
+ */
+static void
+ReadShader(GLuint shader, const char *filename)
+{
+   const int max = 100*1000;
+   int n;
+   char *buffer = (char*) malloc(max);
+   FILE *f = fopen(filename, "r");
+   if (!f) {
+      fprintf(stderr, "%s: Unable to open shader file %s\n", Prog, filename);
+      exit(1);
+   }
+
+   n = fread(buffer, 1, max, f);
+   /*
+   printf("%s: read %d bytes from shader file %s\n", Prog, n, filename);
+   */
+   if (n > 0) {
+      buffer[n] = 0;
+      LoadAndCompileShader(shader, buffer);
+   }
+
+   fclose(f);
+   free(buffer);
+}
+
+
+static void
+CheckLink(GLuint v_shader, GLuint f_shader)
+{
+   GLuint prog;
+   GLint stat;
+
+   prog = _mesa_CreateProgram();
+
+   _mesa_AttachShader(prog, v_shader);
+   _mesa_AttachShader(prog, f_shader);
+
+   _mesa_LinkProgramARB(prog);
+   _mesa_GetProgramiv(prog, GL_LINK_STATUS, &stat);
+   if (!stat) {
+      GLchar log[1000];
+      GLsizei len;
+      _mesa_GetProgramInfoLog(prog, 1000, &len, log);
+      fprintf(stderr, "Linker error:\n%s\n", log);
+   }
+   else {
+      fprintf(stderr, "Link success!\n");
+   }
+}
+
+
+static void
+PrintShaderInstructions(GLuint shader, FILE *f)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   struct gl_shader *sh = _mesa_lookup_shader(ctx, shader);
+   struct gl_program *prog = sh->Program;
+   _mesa_fprint_program_opt(stdout, prog, Options.Mode, Options.LineNumbers);
+   if (Options.Params)
+      _mesa_print_program_parameters(ctx, prog);
+}
+
+
+static GLuint
+CompileShader(const char *filename, GLenum type)
+{
+   GLuint shader;
+
+   assert(type == GL_FRAGMENT_SHADER ||
+          type == GL_VERTEX_SHADER);
+
+   shader = _mesa_CreateShader(type);
+   ReadShader(shader, filename);
+
+   return shader;
+}
+
+
+static void
+Usage(void)
+{
+   printf("Mesa GLSL stand-alone compiler\n");
+   printf("Usage:\n");
+   printf("  --vs FILE          vertex shader input filename\n");
+   printf("  --fs FILE          fragment shader input filename\n");
+   printf("  --arb              emit ARB-style instructions\n");
+   printf("  --nv               emit NV-style instructions\n");
+   printf("  --link             run linker\n");
+   printf("  --debug            force #pragma debug(on)\n");
+   printf("  --nodebug          force #pragma debug(off)\n");
+   printf("  --opt              force #pragma optimize(on)\n");
+   printf("  --noopt            force #pragma optimize(off)\n");
+   printf("  --number, -n       emit line numbers (if --arb or --nv)\n");
+   printf("  --output, -o FILE  output filename\n");
+   printf("  --params           also emit program parameter info\n");
+   printf("  --help             display this information\n");
+}
+
+
+static void
+ParseOptions(int argc, char *argv[])
+{
+   int i;
+
+   Options.LineNumbers = GL_FALSE;
+   Options.Mode = PROG_PRINT_DEBUG;
+   Options.VertFile = NULL;
+   Options.FragFile = NULL;
+   Options.OutputFile = NULL;
+   Options.Params = GL_FALSE;
+   Options.Pragmas.IgnoreOptimize = GL_FALSE;
+   Options.Pragmas.IgnoreDebug = GL_FALSE;
+   Options.Pragmas.Debug = GL_FALSE;
+   Options.Pragmas.Optimize = GL_TRUE;
+
+   if (argc == 1) {
+      Usage();
+      exit(0);
+   }
+
+   for (i = 1; i < argc; i++) {
+      if (strcmp(argv[i], "--vs") == 0) {
+         Options.VertFile = argv[i + 1];
+         i++;
+      }
+      else if (strcmp(argv[i], "--fs") == 0) {
+         Options.FragFile = argv[i + 1];
+         i++;
+      }
+      else if (strcmp(argv[i], "--arb") == 0) {
+         Options.Mode = PROG_PRINT_ARB;
+      }
+      else if (strcmp(argv[i], "--nv") == 0) {
+         Options.Mode = PROG_PRINT_NV;
+      }
+      else if (strcmp(argv[i], "--link") == 0) {
+         Options.Link = GL_TRUE;
+      }
+      else if (strcmp(argv[i], "--debug") == 0) {
+         Options.Pragmas.IgnoreDebug = GL_TRUE;
+         Options.Pragmas.Debug = GL_TRUE;
+      }
+      else if (strcmp(argv[i], "--nodebug") == 0) {
+         Options.Pragmas.IgnoreDebug = GL_TRUE;
+         Options.Pragmas.Debug = GL_FALSE;
+      }
+      else if (strcmp(argv[i], "--opt") == 0) {
+         Options.Pragmas.IgnoreOptimize = GL_TRUE;
+         Options.Pragmas.Optimize = GL_TRUE;
+      }
+      else if (strcmp(argv[i], "--noopt") == 0) {
+         Options.Pragmas.IgnoreOptimize = GL_TRUE;
+         Options.Pragmas.Optimize = GL_FALSE;
+      }
+      else if (strcmp(argv[i], "--number") == 0 ||
+               strcmp(argv[i], "-n") == 0) {
+         Options.LineNumbers = GL_TRUE;
+      }
+      else if (strcmp(argv[i], "--output") == 0 ||
+               strcmp(argv[i], "-o") == 0) {
+         Options.OutputFile = argv[i + 1];
+         i++;
+      }
+      else if (strcmp(argv[i], "--params") == 0) {
+         Options.Params = GL_TRUE;
+      }
+      else if (strcmp(argv[i], "--help") == 0) {
+         Usage();
+         exit(0);
+      }
+      else {
+         printf("Unknown option: %s\n", argv[i]);
+         Usage();
+         exit(1);
+      }
+   }
+
+   if (Options.Mode == PROG_PRINT_DEBUG) {
+      /* always print line numbers when emitting debug-style output */
+      Options.LineNumbers = GL_TRUE;
+   }
+}
+
+
+int
+main(int argc, char *argv[])
+{
+   GLuint v_shader = 0, f_shader = 0;
+
+   ParseOptions(argc, argv);
+
+   if (!CreateContext()) {
+      fprintf(stderr, "%s: Failed to create compiler context\n", Prog);
+      exit(1);
+   }
+
+   if (Options.VertFile) {
+      v_shader = CompileShader(Options.VertFile, GL_VERTEX_SHADER);
+   }
+
+   if (Options.FragFile) {
+      f_shader = CompileShader(Options.FragFile, GL_FRAGMENT_SHADER);
+   }
+
+   if (v_shader || f_shader) {
+      if (Options.OutputFile) {
+         fclose(stdout);
+         /*stdout =*/ freopen(Options.OutputFile, "w", stdout);
+      }
+      if (stdout && v_shader) {
+         PrintShaderInstructions(v_shader, stdout);
+      }
+      if (stdout && f_shader) {
+         PrintShaderInstructions(f_shader, stdout);
+      }
+      if (Options.OutputFile) {
+         fclose(stdout);
+      }
+   }
+
+   if (Options.Link) {
+      if (!v_shader || !f_shader) {
+         fprintf(stderr,
+                 "--link option requires both a vertex and fragment shader.\n");
+         exit(1);
+      }
+
+      CheckLink(v_shader, f_shader);
+   }
+
+   return 0;
+}
diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile
new file mode 100644
index 0000000000..c6b4a04085
--- /dev/null
+++ b/src/mesa/drivers/osmesa/Makefile
@@ -0,0 +1,52 @@
+# src/mesa/drivers/osmesa/Makefile for libOSMesa.so
+
+# Note that we may generate libOSMesa.so or libOSMesa16.so or libOSMesa32.so
+# with this Makefile
+
+
+TOP = ../../../..
+
+include $(TOP)/configs/current
+
+
+
+SOURCES = osmesa.c
+
+OBJECTS = $(SOURCES:.c=.o)
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mapi \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main
+
+CORE_MESA = \
+	$(TOP)/src/mesa/libmesa.a \
+	$(TOP)/src/mapi/glapi/libglapi.a \
+	$(TOP)/src/glsl/cl/libglslcl.a \
+	$(TOP)/src/glsl/pp/libglslpp.a
+
+.c.o:
+	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
+
+default: $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME)
+
+
+# libOSMesa can be used in conjuction with libGL or with all other Mesa
+# sources. We can also build libOSMesa16/libOSMesa32 by setting
+# -DCHAN_BITS=16/32.
+$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OBJECTS) $(CORE_MESA)
+	$(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
+		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
+		-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
+		$(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA)
+
+
+
+clean:
+	-rm -f *.o *~
+
+
+# XXX todo install rule?
diff --git a/src/mesa/drivers/osmesa/Makefile.win b/src/mesa/drivers/osmesa/Makefile.win
new file mode 100644
index 0000000000..36d520e65d
--- /dev/null
+++ b/src/mesa/drivers/osmesa/Makefile.win
@@ -0,0 +1,39 @@
+# Makefile for Win32
+
+!include <win32.mak>
+
+TOP = ..\..
+
+OSMESA_SRCS = osmesa.c
+OSMESA_OBJS = osmesa.obj
+
+ASM_SRCS =
+CORE_SRCS =
+DRIVER_SRCS = $(OSMESA_SRCS)
+
+SRCS	= $(OSMESA_SRCS)
+
+all	: osmesadll
+
+!include "$(TOP)/mesawin32.mak"
+
+osmesadll: $(OSMESADLL)
+
+CFLAGS	= $(cvarsdll) $(CFLAGS) -D_OPENGL32_ -DBUILD_GL32 -I$(TOP)/src
+LFLAGS	= $(dlllflags) $(lcommon) $(LFLAGS)
+
+OBJS	= $(ASM_SRCS:.S=.obj) $(CORE_SRCS:.c=.obj) $(DRIVER_SRCS:.c=.obj)
+LIBS	= $(guilibsdll) $(TOP)/lib/$(MESALIB)
+
+$(OSMESADLL)	: $(OBJS) osmesa.def
+	$(link) $(LFLAGS) -def:osmesa.def -out:$(OSMESADLL) $(OBJS) $(LIBS)
+	@echo "copying OSMesa library to library directory..."
+	-copy $(OSMESALIB) $(TOP)\lib
+	@echo "copying OSMesa dll to library directory..."
+	-copy $(OSMESADLL) $(TOP)\lib
+
+install	: $(OSMESADLL)
+	@echo "copying OSMesa library to system library directory..."
+	-copy $(OSMESALIB) $(LIBINSTALL)
+	@echo "copying OSMesa dll to system library directory..."
+	-copy $(OSMESADLL) $(DLLINSTALL)
diff --git a/src/mesa/drivers/osmesa/descrip.mms b/src/mesa/drivers/osmesa/descrip.mms
new file mode 100644
index 0000000000..6c2f3226f6
--- /dev/null
+++ b/src/mesa/drivers/osmesa/descrip.mms
@@ -0,0 +1,45 @@
+# Makefile for core library for VMS
+# contributed by Jouk Jansen  joukj@hrem.nano.tudelft.nl
+# Last revision : 3 October 2007
+
+.first
+	define gl [----.include.gl]
+	define math [--.math]
+	define tnl [--.tnl]
+	define vbo [--.vbo]
+	define swrast [--.swrast]
+	define swrast_setup [--.swrast_setup]
+	define array_cache [--.array_cache]
+	define drivers [-]
+	define glapi [--.glapi]
+	define main [--.main]
+	define shader [--.shader]
+
+.include [----]mms-config.
+
+##### MACROS #####
+
+VPATH = RCS
+
+INCDIR = [----.include],[--.main],[--.glapi],[--.shader]
+LIBDIR = [----.lib]
+CFLAGS = /include=($(INCDIR),[])/define=(PTHREADS=1)/name=(as_is,short)/float=ieee/ieee=denorm
+
+SOURCES = osmesa.c
+
+OBJECTS = osmesa.obj
+
+##### RULES #####
+
+VERSION=Mesa V3.4
+
+##### TARGETS #####
+# Make the library
+$(LIBDIR)$(GL_LIB) : $(OBJECTS)
+  @ library $(LIBDIR)$(GL_LIB) $(OBJECTS)
+
+clean :
+	purge
+	delete *.obj;*
+
+osmesa.obj : osmesa.c
diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c
new file mode 100644
index 0000000000..ead4050397
--- /dev/null
+++ b/src/mesa/drivers/osmesa/osmesa.c
@@ -0,0 +1,1566 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.3
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Off-Screen Mesa rendering / Rendering into client memory space
+ *
+ * Note on thread safety:  this driver is thread safe.  All
+ * functions are reentrant.  The notion of current context is
+ * managed by the core _mesa_make_current() and _mesa_get_current_context()
+ * functions.  Those functions are thread-safe.
+ */
+
+
+#include "main/glheader.h"
+#include "GL/osmesa.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/formats.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/renderbuffer.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast/s_context.h"
+#include "swrast/s_lines.h"
+#include "swrast/s_triangle.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "vbo/vbo.h"
+
+
+
+/**
+ * OSMesa rendering context, derived from core Mesa GLcontext.
+ */
+struct osmesa_context
+{
+   GLcontext mesa;		/*< Base class - this must be first */
+   GLvisual *gl_visual;		/*< Describes the buffers */
+   struct gl_renderbuffer *rb;  /*< The user's colorbuffer */
+   GLframebuffer *gl_buffer;	/*< The framebuffer, containing user's rb */
+   GLenum format;		/*< User-specified context format */
+   GLint userRowLength;		/*< user-specified number of pixels per row */
+   GLint rInd, gInd, bInd, aInd;/*< index offsets for RGBA formats */
+   GLvoid *rowaddr[MAX_HEIGHT];	/*< address of first pixel in each image row */
+   GLboolean yup;		/*< TRUE  -> Y increases upward */
+				/*< FALSE -> Y increases downward */
+};
+
+
+static INLINE OSMesaContext
+OSMESA_CONTEXT(GLcontext *ctx)
+{
+   /* Just cast, since we're using structure containment */
+   return (OSMesaContext) ctx;
+}
+
+
+/**********************************************************************/
+/*** Private Device Driver Functions                                ***/
+/**********************************************************************/
+
+
+static const GLubyte *
+get_string( GLcontext *ctx, GLenum name )
+{
+   (void) ctx;
+   switch (name) {
+      case GL_RENDERER:
+#if CHAN_BITS == 32
+         return (const GLubyte *) "Mesa OffScreen32";
+#elif CHAN_BITS == 16
+         return (const GLubyte *) "Mesa OffScreen16";
+#else
+         return (const GLubyte *) "Mesa OffScreen";
+#endif
+      default:
+         return NULL;
+   }
+}
+
+
+static void
+osmesa_update_state( GLcontext *ctx, GLuint new_state )
+{
+   /* easy - just propogate */
+   _swrast_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+}
+
+
+
+/**********************************************************************/
+/*****        Read/write spans/arrays of pixels                   *****/
+/**********************************************************************/
+
+/* 8-bit RGBA */
+#define NAME(PREFIX) PREFIX##_RGBA8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = (GLubyte *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[BCOMP];  \
+   DST[3] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[0] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[BCOMP];  \
+   DST[3] = 255
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[0];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[2];  \
+   DST[ACOMP] = SRC[3]
+#include "swrast/s_spantemp.h"
+
+/* 16-bit RGBA */
+#define NAME(PREFIX) PREFIX##_RGBA16
+#define RB_TYPE GLushort
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[BCOMP];  \
+   DST[3] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[0] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[BCOMP];  \
+   DST[3] = 65535
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[0];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[2];  \
+   DST[ACOMP] = SRC[3]
+#include "swrast/s_spantemp.h"
+
+/* 32-bit RGBA */
+#define NAME(PREFIX) PREFIX##_RGBA32
+#define RB_TYPE GLfloat
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLfloat *P = (GLfloat *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = MAX2((VALUE[RCOMP]), 0.0F); \
+   DST[1] = MAX2((VALUE[GCOMP]), 0.0F); \
+   DST[2] = MAX2((VALUE[BCOMP]), 0.0F); \
+   DST[3] = CLAMP((VALUE[ACOMP]), 0.0F, 1.0F)
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[0] = MAX2((VALUE[RCOMP]), 0.0F); \
+   DST[1] = MAX2((VALUE[GCOMP]), 0.0F); \
+   DST[2] = MAX2((VALUE[BCOMP]), 0.0F); \
+   DST[3] = 1.0F
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[0];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[2];  \
+   DST[ACOMP] = SRC[3]
+#include "swrast/s_spantemp.h"
+
+
+/* 8-bit BGRA */
+#define NAME(PREFIX) PREFIX##_BGRA8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = (GLubyte *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP];  \
+   DST[3] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP];  \
+   DST[3] = 255
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = SRC[3]
+#include "swrast/s_spantemp.h"
+
+/* 16-bit BGRA */
+#define NAME(PREFIX) PREFIX##_BGRA16
+#define RB_TYPE GLushort
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP];  \
+   DST[3] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP];  \
+   DST[3] = 65535
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = SRC[3]
+#include "swrast/s_spantemp.h"
+
+/* 32-bit BGRA */
+#define NAME(PREFIX) PREFIX##_BGRA32
+#define RB_TYPE GLfloat
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLfloat *P = (GLfloat *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP];  \
+   DST[3] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP];  \
+   DST[3] = 1.0F
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = SRC[3]
+#include "swrast/s_spantemp.h"
+
+
+/* 8-bit ARGB */
+#define NAME(PREFIX) PREFIX##_ARGB8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = (GLubyte *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[1] = VALUE[RCOMP];  \
+   DST[2] = VALUE[GCOMP];  \
+   DST[3] = VALUE[BCOMP];  \
+   DST[0] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[1] = VALUE[RCOMP];  \
+   DST[2] = VALUE[GCOMP];  \
+   DST[3] = VALUE[BCOMP];  \
+   DST[0] = 255
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[1];  \
+   DST[GCOMP] = SRC[2];  \
+   DST[BCOMP] = SRC[3];  \
+   DST[ACOMP] = SRC[0]
+#include "swrast/s_spantemp.h"
+
+/* 16-bit ARGB */
+#define NAME(PREFIX) PREFIX##_ARGB16
+#define RB_TYPE GLushort
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[1] = VALUE[RCOMP];  \
+   DST[2] = VALUE[GCOMP];  \
+   DST[3] = VALUE[BCOMP];  \
+   DST[0] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[1] = VALUE[RCOMP];  \
+   DST[2] = VALUE[GCOMP];  \
+   DST[3] = VALUE[BCOMP];  \
+   DST[0] = 65535
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[1];  \
+   DST[GCOMP] = SRC[2];  \
+   DST[BCOMP] = SRC[3];  \
+   DST[ACOMP] = SRC[0]
+#include "swrast/s_spantemp.h"
+
+/* 32-bit ARGB */
+#define NAME(PREFIX) PREFIX##_ARGB32
+#define RB_TYPE GLfloat
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLfloat *P = (GLfloat *) osmesa->rowaddr[Y] + 4 * (X)
+#define INC_PIXEL_PTR(P) P += 4
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[1] = VALUE[RCOMP];  \
+   DST[2] = VALUE[GCOMP];  \
+   DST[3] = VALUE[BCOMP];  \
+   DST[0] = VALUE[ACOMP]
+#define STORE_PIXEL_RGB(DST, X, Y, VALUE) \
+   DST[1] = VALUE[RCOMP];  \
+   DST[2] = VALUE[GCOMP];  \
+   DST[3] = VALUE[BCOMP];  \
+   DST[0] = 1.0F
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[1];  \
+   DST[GCOMP] = SRC[2];  \
+   DST[BCOMP] = SRC[3];  \
+   DST[ACOMP] = SRC[0]
+#include "swrast/s_spantemp.h"
+
+
+/* 8-bit RGB */
+#define NAME(PREFIX) PREFIX##_RGB8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = (GLubyte *) osmesa->rowaddr[Y] + 3 * (X)
+#define INC_PIXEL_PTR(P) P += 3
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[BCOMP]
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[0];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[2];  \
+   DST[ACOMP] = 255
+#include "swrast/s_spantemp.h"
+
+/* 16-bit RGB */
+#define NAME(PREFIX) PREFIX##_RGB16
+#define RB_TYPE GLushort
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) osmesa->rowaddr[Y] + 3 * (X)
+#define INC_PIXEL_PTR(P) P += 3
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[BCOMP]
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[0];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[2];  \
+   DST[ACOMP] = 65535U
+#include "swrast/s_spantemp.h"
+
+/* 32-bit RGB */
+#define NAME(PREFIX) PREFIX##_RGB32
+#define RB_TYPE GLfloat
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLfloat *P = (GLfloat *) osmesa->rowaddr[Y] + 3 * (X)
+#define INC_PIXEL_PTR(P) P += 3
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[0] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[2] = VALUE[BCOMP]
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[0];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[2];  \
+   DST[ACOMP] = 1.0F
+#include "swrast/s_spantemp.h"
+
+
+/* 8-bit BGR */
+#define NAME(PREFIX) PREFIX##_BGR8
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLubyte *P = (GLubyte *) osmesa->rowaddr[Y] + 3 * (X)
+#define INC_PIXEL_PTR(P) P += 3
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP]
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = 255
+#include "swrast/s_spantemp.h"
+
+/* 16-bit BGR */
+#define NAME(PREFIX) PREFIX##_BGR16
+#define RB_TYPE GLushort
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) osmesa->rowaddr[Y] + 3 * (X)
+#define INC_PIXEL_PTR(P) P += 3
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP]
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = 65535
+#include "swrast/s_spantemp.h"
+
+/* 32-bit BGR */
+#define NAME(PREFIX) PREFIX##_BGR32
+#define RB_TYPE GLfloat
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLfloat *P = (GLfloat *) osmesa->rowaddr[Y] + 3 * (X)
+#define INC_PIXEL_PTR(P) P += 3
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   DST[2] = VALUE[RCOMP];  \
+   DST[1] = VALUE[GCOMP];  \
+   DST[0] = VALUE[BCOMP]
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = SRC[2];  \
+   DST[GCOMP] = SRC[1];  \
+   DST[BCOMP] = SRC[0];  \
+   DST[ACOMP] = 1.0F
+#include "swrast/s_spantemp.h"
+
+
+/* 16-bit 5/6/5 RGB */
+#define NAME(PREFIX) PREFIX##_RGB_565
+#define RB_TYPE GLubyte
+#define SPAN_VARS \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define INIT_PIXEL_PTR(P, X, Y) \
+   GLushort *P = (GLushort *) osmesa->rowaddr[Y] + (X)
+#define INC_PIXEL_PTR(P) P += 1
+#define STORE_PIXEL(DST, X, Y, VALUE) \
+   *DST = ( (((VALUE[RCOMP]) & 0xf8) << 8) | (((VALUE[GCOMP]) & 0xfc) << 3) | ((VALUE[BCOMP]) >> 3) )
+#define FETCH_PIXEL(DST, SRC) \
+   DST[RCOMP] = ( (((*SRC) >> 8) & 0xf8) | (((*SRC) >> 11) & 0x7) ); \
+   DST[GCOMP] = ( (((*SRC) >> 3) & 0xfc) | (((*SRC) >>  5) & 0x3) ); \
+   DST[BCOMP] = ( (((*SRC) << 3) & 0xf8) | (((*SRC)      ) & 0x7) ); \
+   DST[ACOMP] = CHAN_MAX
+#include "swrast/s_spantemp.h"
+
+
+/**
+ * Macros for optimized line/triangle rendering.
+ * Only for 8-bit channel, RGBA, BGRA, ARGB formats.
+ */
+
+#define PACK_RGBA(DST, R, G, B, A)	\
+do {					\
+   (DST)[osmesa->rInd] = R;		\
+   (DST)[osmesa->gInd] = G;		\
+   (DST)[osmesa->bInd] = B;		\
+   (DST)[osmesa->aInd] = A;		\
+} while (0)
+
+#define PIXELADDR4(X,Y)  ((GLchan *) osmesa->rowaddr[Y] + 4 * (X))
+
+
+/**
+ * Draw a flat-shaded, RGB line into an osmesa buffer.
+ */
+#define NAME flat_rgba_line
+#define CLIP_HACK 1
+#define SETUP_CODE						\
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);		\
+   const GLchan *color = vert1->color;
+
+#define PLOT(X, Y)						\
+do {								\
+   GLchan *p = PIXELADDR4(X, Y);				\
+   PACK_RGBA(p, color[0], color[1], color[2], color[3]);	\
+} while (0)
+
+#ifdef WIN32
+#include "..\swrast\s_linetemp.h"
+#else
+#include "swrast/s_linetemp.h"
+#endif
+
+
+
+/**
+ * Draw a flat-shaded, Z-less, RGB line into an osmesa buffer.
+ */
+#define NAME flat_rgba_z_line
+#define CLIP_HACK 1
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define SETUP_CODE					\
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);	\
+   const GLchan *color = vert1->color;
+
+#define PLOT(X, Y)					\
+do {							\
+   if (Z < *zPtr) {					\
+      GLchan *p = PIXELADDR4(X, Y);			\
+      PACK_RGBA(p, color[RCOMP], color[GCOMP],		\
+                   color[BCOMP], color[ACOMP]);		\
+      *zPtr = Z;					\
+   }							\
+} while (0)
+
+#ifdef WIN32
+#include "..\swrast\s_linetemp.h"
+#else
+#include "swrast/s_linetemp.h"
+#endif
+
+
+
+/**
+ * Analyze context state to see if we can provide a fast line drawing
+ * function.  Otherwise, return NULL.
+ */
+static swrast_line_func
+osmesa_choose_line_function( GLcontext *ctx )
+{
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+   const SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (osmesa->rb->DataType != GL_UNSIGNED_BYTE)
+      return NULL;
+
+   if (ctx->RenderMode != GL_RENDER)      return NULL;
+   if (ctx->Line.SmoothFlag)              return NULL;
+   if (ctx->Texture._EnabledUnits)        return NULL;
+   if (ctx->Light.ShadeModel != GL_FLAT)  return NULL;
+   if (ctx->Line.Width != 1.0F)           return NULL;
+   if (ctx->Line.StippleFlag)             return NULL;
+   if (ctx->Line.SmoothFlag)              return NULL;
+   if (osmesa->format != OSMESA_RGBA &&
+       osmesa->format != OSMESA_BGRA &&
+       osmesa->format != OSMESA_ARGB)     return NULL;
+
+   if (swrast->_RasterMask==DEPTH_BIT
+       && ctx->Depth.Func==GL_LESS
+       && ctx->Depth.Mask==GL_TRUE
+       && ctx->Visual.depthBits == DEFAULT_SOFTWARE_DEPTH_BITS) {
+      return (swrast_line_func) flat_rgba_z_line;
+   }
+
+   if (swrast->_RasterMask == 0) {
+      return (swrast_line_func) flat_rgba_line;
+   }
+
+   return (swrast_line_func) NULL;
+}
+
+
+/**********************************************************************/
+/*****                 Optimized triangle rendering               *****/
+/**********************************************************************/
+
+
+/*
+ * Smooth-shaded, z-less triangle, RGBA color.
+ */
+#define NAME smooth_rgba_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define SETUP_CODE \
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLchan *img = PIXELADDR4(span.x, span.y); 			\
+   for (i = 0; i < span.end; i++, img += 4) {			\
+      const GLuint z = FixedToDepth(span.z);			\
+      if (z < zRow[i]) {					\
+         PACK_RGBA(img, FixedToChan(span.red),			\
+            FixedToChan(span.green), FixedToChan(span.blue),	\
+            FixedToChan(span.alpha));				\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.alpha += span.alphaStep;				\
+      span.z += span.zStep;					\
+   }                                                            \
+}
+#ifdef WIN32
+#include "..\swrast\s_tritemp.h"
+#else
+#include "swrast/s_tritemp.h"
+#endif
+
+
+
+/*
+ * Flat-shaded, z-less triangle, RGBA color.
+ */
+#define NAME flat_rgba_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define SETUP_CODE						\
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);		\
+   GLuint pixel;						\
+   PACK_RGBA((GLchan *) &pixel, v2->color[0], v2->color[1],	\
+                                v2->color[2], v2->color[3]);
+
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   GLuint *img = (GLuint *) PIXELADDR4(span.x, span.y);	\
+   for (i = 0; i < span.end; i++) {			\
+      const GLuint z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {				\
+         img[i] = pixel;				\
+         zRow[i] = z;					\
+      }							\
+      span.z += span.zStep;				\
+   }                                                    \
+}
+#ifdef WIN32
+#include "..\swrast\s_tritemp.h"
+#else
+#include "swrast/s_tritemp.h"
+#endif
+
+
+
+/**
+ * Return pointer to an optimized triangle function if possible.
+ */
+static swrast_tri_func
+osmesa_choose_triangle_function( GLcontext *ctx )
+{
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+   const SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (osmesa->rb->DataType != GL_UNSIGNED_BYTE)
+      return (swrast_tri_func) NULL;
+
+   if (ctx->RenderMode != GL_RENDER)    return (swrast_tri_func) NULL;
+   if (ctx->Polygon.SmoothFlag)         return (swrast_tri_func) NULL;
+   if (ctx->Polygon.StippleFlag)        return (swrast_tri_func) NULL;
+   if (ctx->Texture._EnabledUnits)      return (swrast_tri_func) NULL;
+   if (osmesa->format != OSMESA_RGBA &&
+       osmesa->format != OSMESA_BGRA &&
+       osmesa->format != OSMESA_ARGB)   return (swrast_tri_func) NULL;
+   if (ctx->Polygon.CullFlag && 
+       ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+                                        return (swrast_tri_func) NULL;
+
+   if (swrast->_RasterMask == DEPTH_BIT &&
+       ctx->Depth.Func == GL_LESS &&
+       ctx->Depth.Mask == GL_TRUE &&
+       ctx->Visual.depthBits == DEFAULT_SOFTWARE_DEPTH_BITS) {
+      if (ctx->Light.ShadeModel == GL_SMOOTH) {
+         return (swrast_tri_func) smooth_rgba_z_triangle;
+      }
+      else {
+         return (swrast_tri_func) flat_rgba_z_triangle;
+      }
+   }
+   return (swrast_tri_func) NULL;
+}
+
+
+
+/* Override for the swrast triangle-selection function.  Try to use one
+ * of our internal triangle functions, otherwise fall back to the
+ * standard swrast functions.
+ */
+static void
+osmesa_choose_triangle( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   swrast->Triangle = osmesa_choose_triangle_function( ctx );
+   if (!swrast->Triangle)
+      _swrast_choose_triangle( ctx );
+}
+
+static void
+osmesa_choose_line( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   swrast->Line = osmesa_choose_line_function( ctx );
+   if (!swrast->Line)
+      _swrast_choose_line( ctx );
+}
+
+
+
+/**
+ * Recompute the values of the context's rowaddr array.
+ */
+static void
+compute_row_addresses( OSMesaContext osmesa )
+{
+   GLint bytesPerPixel, bytesPerRow, i;
+   GLubyte *origin = (GLubyte *) osmesa->rb->Data;
+   GLint bpc; /* bytes per channel */
+   GLint rowlength; /* in pixels */
+   GLint height = osmesa->rb->Height;
+
+   if (osmesa->userRowLength)
+      rowlength = osmesa->userRowLength;
+   else
+      rowlength = osmesa->rb->Width;
+
+   if (osmesa->rb->DataType == GL_UNSIGNED_BYTE)
+      bpc = 1;
+   else if (osmesa->rb->DataType == GL_UNSIGNED_SHORT)
+      bpc = 2;
+   else if (osmesa->rb->DataType == GL_FLOAT)
+      bpc = 4;
+   else {
+      _mesa_problem(&osmesa->mesa,
+                    "Unexpected datatype in osmesa::compute_row_addresses");
+      return;
+   }
+
+   if ((osmesa->format == OSMESA_RGB) || (osmesa->format == OSMESA_BGR)) {
+      /* RGB mode */
+      bytesPerPixel = 3 * bpc;
+   }
+   else if (osmesa->format == OSMESA_RGB_565) {
+      /* 5/6/5 RGB pixel in 16 bits */
+      bytesPerPixel = 2;
+   }
+   else {
+      /* RGBA mode */
+      bytesPerPixel = 4 * bpc;
+   }
+
+   bytesPerRow = rowlength * bytesPerPixel;
+
+   if (osmesa->yup) {
+      /* Y=0 is bottom line of window */
+      for (i = 0; i < height; i++) {
+         osmesa->rowaddr[i] = (GLvoid *) ((GLubyte *) origin + i * bytesPerRow);
+      }
+   }
+   else {
+      /* Y=0 is top line of window */
+      for (i = 0; i < height; i++) {
+         GLint j = height - i - 1;
+         osmesa->rowaddr[i] = (GLvoid *) ((GLubyte *) origin + j * bytesPerRow);
+      }
+   }
+}
+
+
+
+/**
+ * Don't use _mesa_delete_renderbuffer since we can't free rb->Data.
+ */
+static void
+osmesa_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+   free(rb);
+}
+
+
+/**
+ * Allocate renderbuffer storage.  We don't actually allocate any storage
+ * since we're using a user-provided buffer.
+ * Just set up all the gl_renderbuffer methods.
+ */
+static GLboolean
+osmesa_renderbuffer_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+                            GLenum internalFormat, GLuint width, GLuint height)
+{
+   const OSMesaContext osmesa = OSMESA_CONTEXT(ctx);
+   GLint bpc; /* bits per channel */
+
+   if (rb->DataType == GL_UNSIGNED_BYTE)
+      bpc = 8;
+   else if (rb->DataType == GL_UNSIGNED_SHORT)
+      bpc = 16;
+   else
+      bpc = 32;
+
+   /* Note: we can ignoring internalFormat for "window-system" renderbuffers */
+   (void) internalFormat;
+
+   if (osmesa->format == OSMESA_RGBA) {
+      if (rb->DataType == GL_UNSIGNED_BYTE) {
+         rb->GetRow = get_row_RGBA8;
+         rb->GetValues = get_values_RGBA8;
+         rb->PutRow = put_row_RGBA8;
+         rb->PutRowRGB = put_row_rgb_RGBA8;
+         rb->PutMonoRow = put_mono_row_RGBA8;
+         rb->PutValues = put_values_RGBA8;
+         rb->PutMonoValues = put_mono_values_RGBA8;
+      }
+      else if (rb->DataType == GL_UNSIGNED_SHORT) {
+         rb->GetRow = get_row_RGBA16;
+         rb->GetValues = get_values_RGBA16;
+         rb->PutRow = put_row_RGBA16;
+         rb->PutRowRGB = put_row_rgb_RGBA16;
+         rb->PutMonoRow = put_mono_row_RGBA16;
+         rb->PutValues = put_values_RGBA16;
+         rb->PutMonoValues = put_mono_values_RGBA16;
+      }
+      else {
+         rb->GetRow = get_row_RGBA32;
+         rb->GetValues = get_values_RGBA32;
+         rb->PutRow = put_row_RGBA32;
+         rb->PutRowRGB = put_row_rgb_RGBA32;
+         rb->PutMonoRow = put_mono_row_RGBA32;
+         rb->PutValues = put_values_RGBA32;
+         rb->PutMonoValues = put_mono_values_RGBA32;
+      }
+   }
+   else if (osmesa->format == OSMESA_BGRA) {
+      if (rb->DataType == GL_UNSIGNED_BYTE) {
+         rb->GetRow = get_row_BGRA8;
+         rb->GetValues = get_values_BGRA8;
+         rb->PutRow = put_row_BGRA8;
+         rb->PutRowRGB = put_row_rgb_BGRA8;
+         rb->PutMonoRow = put_mono_row_BGRA8;
+         rb->PutValues = put_values_BGRA8;
+         rb->PutMonoValues = put_mono_values_BGRA8;
+      }
+      else if (rb->DataType == GL_UNSIGNED_SHORT) {
+         rb->GetRow = get_row_BGRA16;
+         rb->GetValues = get_values_BGRA16;
+         rb->PutRow = put_row_BGRA16;
+         rb->PutRowRGB = put_row_rgb_BGRA16;
+         rb->PutMonoRow = put_mono_row_BGRA16;
+         rb->PutValues = put_values_BGRA16;
+         rb->PutMonoValues = put_mono_values_BGRA16;
+      }
+      else {
+         rb->GetRow = get_row_BGRA32;
+         rb->GetValues = get_values_BGRA32;
+         rb->PutRow = put_row_BGRA32;
+         rb->PutRowRGB = put_row_rgb_BGRA32;
+         rb->PutMonoRow = put_mono_row_BGRA32;
+         rb->PutValues = put_values_BGRA32;
+         rb->PutMonoValues = put_mono_values_BGRA32;
+      }
+   }
+   else if (osmesa->format == OSMESA_ARGB) {
+      if (rb->DataType == GL_UNSIGNED_BYTE) {
+         rb->GetRow = get_row_ARGB8;
+         rb->GetValues = get_values_ARGB8;
+         rb->PutRow = put_row_ARGB8;
+         rb->PutRowRGB = put_row_rgb_ARGB8;
+         rb->PutMonoRow = put_mono_row_ARGB8;
+         rb->PutValues = put_values_ARGB8;
+         rb->PutMonoValues = put_mono_values_ARGB8;
+      }
+      else if (rb->DataType == GL_UNSIGNED_SHORT) {
+         rb->GetRow = get_row_ARGB16;
+         rb->GetValues = get_values_ARGB16;
+         rb->PutRow = put_row_ARGB16;
+         rb->PutRowRGB = put_row_rgb_ARGB16;
+         rb->PutMonoRow = put_mono_row_ARGB16;
+         rb->PutValues = put_values_ARGB16;
+         rb->PutMonoValues = put_mono_values_ARGB16;
+      }
+      else {
+         rb->GetRow = get_row_ARGB32;
+         rb->GetValues = get_values_ARGB32;
+         rb->PutRow = put_row_ARGB32;
+         rb->PutRowRGB = put_row_rgb_ARGB32;
+         rb->PutMonoRow = put_mono_row_ARGB32;
+         rb->PutValues = put_values_ARGB32;
+         rb->PutMonoValues = put_mono_values_ARGB32;
+      }
+   }
+   else if (osmesa->format == OSMESA_RGB) {
+      if (rb->DataType == GL_UNSIGNED_BYTE) {
+         rb->GetRow = get_row_RGB8;
+         rb->GetValues = get_values_RGB8;
+         rb->PutRow = put_row_RGB8;
+         rb->PutRowRGB = put_row_rgb_RGB8;
+         rb->PutMonoRow = put_mono_row_RGB8;
+         rb->PutValues = put_values_RGB8;
+         rb->PutMonoValues = put_mono_values_RGB8;
+      }
+      else if (rb->DataType == GL_UNSIGNED_SHORT) {
+         rb->GetRow = get_row_RGB16;
+         rb->GetValues = get_values_RGB16;
+         rb->PutRow = put_row_RGB16;
+         rb->PutRowRGB = put_row_rgb_RGB16;
+         rb->PutMonoRow = put_mono_row_RGB16;
+         rb->PutValues = put_values_RGB16;
+         rb->PutMonoValues = put_mono_values_RGB16;
+      }
+      else {
+         rb->GetRow = get_row_RGB32;
+         rb->GetValues = get_values_RGB32;
+         rb->PutRow = put_row_RGB32;
+         rb->PutRowRGB = put_row_rgb_RGB32;
+         rb->PutMonoRow = put_mono_row_RGB32;
+         rb->PutValues = put_values_RGB32;
+         rb->PutMonoValues = put_mono_values_RGB32;
+      }
+   }
+   else if (osmesa->format == OSMESA_BGR) {
+      if (rb->DataType == GL_UNSIGNED_BYTE) {
+         rb->GetRow = get_row_BGR8;
+         rb->GetValues = get_values_BGR8;
+         rb->PutRow = put_row_BGR8;
+         rb->PutRowRGB = put_row_rgb_BGR8;
+         rb->PutMonoRow = put_mono_row_BGR8;
+         rb->PutValues = put_values_BGR8;
+         rb->PutMonoValues = put_mono_values_BGR8;
+      }
+      else if (rb->DataType == GL_UNSIGNED_SHORT) {
+         rb->GetRow = get_row_BGR16;
+         rb->GetValues = get_values_BGR16;
+         rb->PutRow = put_row_BGR16;
+         rb->PutRowRGB = put_row_rgb_BGR16;
+         rb->PutMonoRow = put_mono_row_BGR16;
+         rb->PutValues = put_values_BGR16;
+         rb->PutMonoValues = put_mono_values_BGR16;
+      }
+      else {
+         rb->GetRow = get_row_BGR32;
+         rb->GetValues = get_values_BGR32;
+         rb->PutRow = put_row_BGR32;
+         rb->PutRowRGB = put_row_rgb_BGR32;
+         rb->PutMonoRow = put_mono_row_BGR32;
+         rb->PutValues = put_values_BGR32;
+         rb->PutMonoValues = put_mono_values_BGR32;
+      }
+   }
+   else if (osmesa->format == OSMESA_RGB_565) {
+      ASSERT(rb->DataType == GL_UNSIGNED_BYTE);
+      rb->GetRow = get_row_RGB_565;
+      rb->GetValues = get_values_RGB_565;
+      rb->PutRow = put_row_RGB_565;
+      rb->PutRowRGB = put_row_rgb_RGB_565;
+      rb->PutMonoRow = put_mono_row_RGB_565;
+      rb->PutValues = put_values_RGB_565;
+      rb->PutMonoValues = put_mono_values_RGB_565;
+   }
+   else {
+      _mesa_problem(ctx, "bad pixel format in osmesa renderbuffer_storage");
+   }
+
+   rb->Width = width;
+   rb->Height = height;
+
+   compute_row_addresses( osmesa );
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Allocate a new renderbuffer to describe the user-provided color buffer.
+ */
+static struct gl_renderbuffer *
+new_osmesa_renderbuffer(GLcontext *ctx, GLenum format, GLenum type)
+{
+   const GLuint name = 0;
+   struct gl_renderbuffer *rb = _mesa_new_renderbuffer(ctx, name);
+   if (rb) {
+      rb->RefCount = 1;
+      rb->Delete = osmesa_delete_renderbuffer;
+      rb->AllocStorage = osmesa_renderbuffer_storage;
+
+      rb->InternalFormat = GL_RGBA;
+      switch (type) {
+      case GL_UNSIGNED_BYTE:
+         rb->Format = MESA_FORMAT_RGBA8888;
+         break;
+      case GL_UNSIGNED_SHORT:
+         rb->Format = MESA_FORMAT_RGBA_16;
+         break;
+      case GL_FLOAT:
+         rb->Format = MESA_FORMAT_RGBA_FLOAT32;
+         break;
+      default:
+         assert(0 && "Unexpected type in new_osmesa_renderbuffer()");
+         rb->Format = MESA_FORMAT_RGBA8888;
+      }
+      rb->_BaseFormat = GL_RGBA;
+      rb->DataType = type;
+   }
+   return rb;
+}
+
+
+/**********************************************************************/
+/*****                    Public Functions                        *****/
+/**********************************************************************/
+
+
+/**
+ * Create an Off-Screen Mesa rendering context.  The only attribute needed is
+ * an RGBA vs Color-Index mode flag.
+ *
+ * Input:  format - Must be GL_RGBA
+ *         sharelist - specifies another OSMesaContext with which to share
+ *                     display lists.  NULL indicates no sharing.
+ * Return:  an OSMesaContext or 0 if error
+ */
+GLAPI OSMesaContext GLAPIENTRY
+OSMesaCreateContext( GLenum format, OSMesaContext sharelist )
+{
+   return OSMesaCreateContextExt(format, DEFAULT_SOFTWARE_DEPTH_BITS,
+                                 8, 0, sharelist);
+}
+
+
+
+/**
+ * New in Mesa 3.5
+ *
+ * Create context and specify size of ancillary buffers.
+ */
+GLAPI OSMesaContext GLAPIENTRY
+OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits,
+                        GLint accumBits, OSMesaContext sharelist )
+{
+   OSMesaContext osmesa;
+   struct dd_function_table functions;
+   GLint rind, gind, bind, aind;
+   GLint redBits = 0, greenBits = 0, blueBits = 0, alphaBits =0;
+
+   rind = gind = bind = aind = 0;
+   if (format==OSMESA_RGBA) {
+      redBits = CHAN_BITS;
+      greenBits = CHAN_BITS;
+      blueBits = CHAN_BITS;
+      alphaBits = CHAN_BITS;
+      rind = 0;
+      gind = 1;
+      bind = 2;
+      aind = 3;
+   }
+   else if (format==OSMESA_BGRA) {
+      redBits = CHAN_BITS;
+      greenBits = CHAN_BITS;
+      blueBits = CHAN_BITS;
+      alphaBits = CHAN_BITS;
+      bind = 0;
+      gind = 1;
+      rind = 2;
+      aind = 3;
+   }
+   else if (format==OSMESA_ARGB) {
+      redBits = CHAN_BITS;
+      greenBits = CHAN_BITS;
+      blueBits = CHAN_BITS;
+      alphaBits = CHAN_BITS;
+      aind = 0;
+      rind = 1;
+      gind = 2;
+      bind = 3;
+   }
+   else if (format==OSMESA_RGB) {
+      redBits = CHAN_BITS;
+      greenBits = CHAN_BITS;
+      blueBits = CHAN_BITS;
+      alphaBits = 0;
+      rind = 0;
+      gind = 1;
+      bind = 2;
+   }
+   else if (format==OSMESA_BGR) {
+      redBits = CHAN_BITS;
+      greenBits = CHAN_BITS;
+      blueBits = CHAN_BITS;
+      alphaBits = 0;
+      rind = 2;
+      gind = 1;
+      bind = 0;
+   }
+#if CHAN_TYPE == GL_UNSIGNED_BYTE
+   else if (format==OSMESA_RGB_565) {
+      redBits = 5;
+      greenBits = 6;
+      blueBits = 5;
+      alphaBits = 0;
+      rind = 0; /* not used */
+      gind = 0;
+      bind = 0;
+   }
+#endif
+   else {
+      return NULL;
+   }
+
+   osmesa = (OSMesaContext) CALLOC_STRUCT(osmesa_context);
+   if (osmesa) {
+      osmesa->gl_visual = _mesa_create_visual( GL_FALSE,    /* double buffer */
+                                               GL_FALSE,    /* stereo */
+                                               redBits,
+                                               greenBits,
+                                               blueBits,
+                                               alphaBits,
+                                               depthBits,
+                                               stencilBits,
+                                               accumBits,
+                                               accumBits,
+                                               accumBits,
+                                               alphaBits ? accumBits : 0,
+                                               1            /* num samples */
+                                               );
+      if (!osmesa->gl_visual) {
+         free(osmesa);
+         return NULL;
+      }
+
+      /* Initialize device driver function table */
+      _mesa_init_driver_functions(&functions);
+      /* override with our functions */
+      functions.GetString = get_string;
+      functions.UpdateState = osmesa_update_state;
+      functions.GetBufferSize = NULL;
+
+      if (!_mesa_initialize_context(&osmesa->mesa,
+                                    osmesa->gl_visual,
+                                    sharelist ? &sharelist->mesa
+                                              : (GLcontext *) NULL,
+                                    &functions, (void *) osmesa)) {
+         _mesa_destroy_visual( osmesa->gl_visual );
+         free(osmesa);
+         return NULL;
+      }
+
+      _mesa_enable_sw_extensions(&(osmesa->mesa));
+      _mesa_enable_1_3_extensions(&(osmesa->mesa));
+      _mesa_enable_1_4_extensions(&(osmesa->mesa));
+      _mesa_enable_1_5_extensions(&(osmesa->mesa));
+      _mesa_enable_2_0_extensions(&(osmesa->mesa));
+      _mesa_enable_2_1_extensions(&(osmesa->mesa));
+
+      osmesa->gl_buffer = _mesa_create_framebuffer(osmesa->gl_visual);
+      if (!osmesa->gl_buffer) {
+         _mesa_destroy_visual( osmesa->gl_visual );
+         _mesa_free_context_data( &osmesa->mesa );
+         free(osmesa);
+         return NULL;
+      }
+
+      /* Create depth/stencil/accum buffers.  We'll create the color
+       * buffer later in OSMesaMakeCurrent().
+       */
+      _mesa_add_soft_renderbuffers(osmesa->gl_buffer,
+                                   GL_FALSE, /* color */
+                                   osmesa->gl_visual->haveDepthBuffer,
+                                   osmesa->gl_visual->haveStencilBuffer,
+                                   osmesa->gl_visual->haveAccumBuffer,
+                                   GL_FALSE, /* alpha */
+                                   GL_FALSE /* aux */ );
+
+      osmesa->format = format;
+      osmesa->userRowLength = 0;
+      osmesa->yup = GL_TRUE;
+      osmesa->rInd = rind;
+      osmesa->gInd = gind;
+      osmesa->bInd = bind;
+      osmesa->aInd = aind;
+
+      _mesa_meta_init(&osmesa->mesa);
+
+      /* Initialize the software rasterizer and helper modules. */
+      {
+	 GLcontext *ctx = &osmesa->mesa;
+         SWcontext *swrast;
+         TNLcontext *tnl;
+
+	 if (!_swrast_CreateContext( ctx ) ||
+             !_vbo_CreateContext( ctx ) ||
+             !_tnl_CreateContext( ctx ) ||
+             !_swsetup_CreateContext( ctx )) {
+            _mesa_destroy_visual(osmesa->gl_visual);
+            _mesa_free_context_data(ctx);
+            free(osmesa);
+            return NULL;
+         }
+	
+	 _swsetup_Wakeup( ctx );
+
+         /* use default TCL pipeline */
+         tnl = TNL_CONTEXT(ctx);
+         tnl->Driver.RunPipeline = _tnl_run_pipeline;
+
+         /* Extend the software rasterizer with our optimized line and triangle
+          * drawing functions.
+          */
+         swrast = SWRAST_CONTEXT( ctx );
+         swrast->choose_line = osmesa_choose_line;
+         swrast->choose_triangle = osmesa_choose_triangle;
+      }
+   }
+   return osmesa;
+}
+
+
+/**
+ * Destroy an Off-Screen Mesa rendering context.
+ *
+ * \param osmesa  the context to destroy
+ */
+GLAPI void GLAPIENTRY
+OSMesaDestroyContext( OSMesaContext osmesa )
+{
+   if (osmesa) {
+      if (osmesa->rb)
+         _mesa_reference_renderbuffer(&osmesa->rb, NULL);
+
+      _mesa_meta_free( &osmesa->mesa );
+
+      _swsetup_DestroyContext( &osmesa->mesa );
+      _tnl_DestroyContext( &osmesa->mesa );
+      _vbo_DestroyContext( &osmesa->mesa );
+      _swrast_DestroyContext( &osmesa->mesa );
+
+      _mesa_destroy_visual( osmesa->gl_visual );
+      _mesa_reference_framebuffer( &osmesa->gl_buffer, NULL );
+
+      _mesa_free_context_data( &osmesa->mesa );
+      free( osmesa );
+   }
+}
+
+
+/**
+ * Bind an OSMesaContext to an image buffer.  The image buffer is just a
+ * block of memory which the client provides.  Its size must be at least
+ * as large as width*height*sizeof(type).  Its address should be a multiple
+ * of 4 if using RGBA mode.
+ *
+ * Image data is stored in the order of glDrawPixels:  row-major order
+ * with the lower-left image pixel stored in the first array position
+ * (ie. bottom-to-top).
+ *
+ * If the context's viewport hasn't been initialized yet, it will now be
+ * initialized to (0,0,width,height).
+ *
+ * Input:  osmesa - the rendering context
+ *         buffer - the image buffer memory
+ *         type - data type for pixel components
+ *            Normally, only GL_UNSIGNED_BYTE and GL_UNSIGNED_SHORT_5_6_5
+ *            are supported.  But if Mesa's been compiled with CHAN_BITS==16
+ *            then type may be GL_UNSIGNED_SHORT or GL_UNSIGNED_BYTE.  And if
+ *            Mesa's been build with CHAN_BITS==32 then type may be GL_FLOAT,
+ *            GL_UNSIGNED_SHORT or GL_UNSIGNED_BYTE.
+ *         width, height - size of image buffer in pixels, at least 1
+ * Return:  GL_TRUE if success, GL_FALSE if error because of invalid osmesa,
+ *          invalid buffer address, invalid type, width<1, height<1,
+ *          width>internal limit or height>internal limit.
+ */
+GLAPI GLboolean GLAPIENTRY
+OSMesaMakeCurrent( OSMesaContext osmesa, void *buffer, GLenum type,
+                   GLsizei width, GLsizei height )
+{
+   if (!osmesa || !buffer ||
+       width < 1 || height < 1 ||
+       width > MAX_WIDTH || height > MAX_HEIGHT) {
+      return GL_FALSE;
+   }
+
+   if (osmesa->format == OSMESA_RGB_565 && type != GL_UNSIGNED_SHORT_5_6_5) {
+      return GL_FALSE;
+   }
+
+#if 0
+   if (!(type == GL_UNSIGNED_BYTE ||
+         (type == GL_UNSIGNED_SHORT && CHAN_BITS >= 16) ||
+         (type == GL_FLOAT && CHAN_BITS == 32))) {
+      /* i.e. is sizeof(type) * 8 > CHAN_BITS? */
+      return GL_FALSE;
+   }
+#endif
+
+   osmesa_update_state( &osmesa->mesa, 0 );
+
+   /* Call this periodically to detect when the user has begun using
+    * GL rendering from multiple threads.
+    */
+   _glapi_check_multithread();
+
+
+   /* Create a front/left color buffer which wraps the user-provided buffer.
+    * There is no back color buffer.
+    * If the user tries to use a 8, 16 or 32-bit/channel buffer that
+    * doesn't match what Mesa was compiled for (CHAN_BITS) the
+    * _mesa_add_renderbuffer() function will create a "wrapper" renderbuffer
+    * that converts rendering from CHAN_BITS to the user-requested channel
+    * size.
+    */
+   osmesa->rb = new_osmesa_renderbuffer(&osmesa->mesa, osmesa->format, type);
+   _mesa_add_renderbuffer(osmesa->gl_buffer, BUFFER_FRONT_LEFT, osmesa->rb);
+   assert(osmesa->rb->RefCount == 2);
+
+   /* Set renderbuffer fields.  Set width/height = 0 to force 
+    * osmesa_renderbuffer_storage() being called by _mesa_resize_framebuffer()
+    */
+   osmesa->rb->Data = buffer;
+   osmesa->rb->Width = osmesa->rb->Height = 0;
+
+   /* Set the framebuffer's size.  This causes the
+    * osmesa_renderbuffer_storage() function to get called.
+    */
+   _mesa_resize_framebuffer(&osmesa->mesa, osmesa->gl_buffer, width, height);
+   osmesa->gl_buffer->Initialized = GL_TRUE; /* XXX TEMPORARY? */
+
+   _mesa_make_current( &osmesa->mesa, osmesa->gl_buffer, osmesa->gl_buffer );
+
+   /* Remove renderbuffer attachment, then re-add.  This installs the
+    * renderbuffer adaptor/wrapper if needed (for bpp conversion).
+    */
+   _mesa_remove_renderbuffer(osmesa->gl_buffer, BUFFER_FRONT_LEFT);
+   _mesa_add_renderbuffer(osmesa->gl_buffer, BUFFER_FRONT_LEFT, osmesa->rb);
+
+
+   /* this updates the visual's red/green/blue/alphaBits fields */
+   _mesa_update_framebuffer_visual(osmesa->gl_buffer);
+
+   /* update the framebuffer size */
+   _mesa_resize_framebuffer(&osmesa->mesa, osmesa->gl_buffer, width, height);
+
+   return GL_TRUE;
+}
+
+
+
+GLAPI OSMesaContext GLAPIENTRY
+OSMesaGetCurrentContext( void )
+{
+   GLcontext *ctx = _mesa_get_current_context();
+   if (ctx)
+      return (OSMesaContext) ctx;
+   else
+      return NULL;
+}
+
+
+
+GLAPI void GLAPIENTRY
+OSMesaPixelStore( GLint pname, GLint value )
+{
+   OSMesaContext osmesa = OSMesaGetCurrentContext();
+
+   switch (pname) {
+      case OSMESA_ROW_LENGTH:
+         if (value<0) {
+            _mesa_error( &osmesa->mesa, GL_INVALID_VALUE,
+                      "OSMesaPixelStore(value)" );
+            return;
+         }
+         osmesa->userRowLength = value;
+         break;
+      case OSMESA_Y_UP:
+         osmesa->yup = value ? GL_TRUE : GL_FALSE;
+         break;
+      default:
+         _mesa_error( &osmesa->mesa, GL_INVALID_ENUM, "OSMesaPixelStore(pname)" );
+         return;
+   }
+
+   compute_row_addresses( osmesa );
+}
+
+
+GLAPI void GLAPIENTRY
+OSMesaGetIntegerv( GLint pname, GLint *value )
+{
+   OSMesaContext osmesa = OSMesaGetCurrentContext();
+
+   switch (pname) {
+      case OSMESA_WIDTH:
+         if (osmesa->gl_buffer)
+            *value = osmesa->gl_buffer->Width;
+         else
+            *value = 0;
+         return;
+      case OSMESA_HEIGHT:
+         if (osmesa->gl_buffer)
+            *value = osmesa->gl_buffer->Height;
+         else
+            *value = 0;
+         return;
+      case OSMESA_FORMAT:
+         *value = osmesa->format;
+         return;
+      case OSMESA_TYPE:
+         /* current color buffer's data type */
+         if (osmesa->rb) {
+            *value = osmesa->rb->DataType;
+         }
+         else {
+            *value = 0;
+         }
+         return;
+      case OSMESA_ROW_LENGTH:
+         *value = osmesa->userRowLength;
+         return;
+      case OSMESA_Y_UP:
+         *value = osmesa->yup;
+         return;
+      case OSMESA_MAX_WIDTH:
+         *value = MAX_WIDTH;
+         return;
+      case OSMESA_MAX_HEIGHT:
+         *value = MAX_HEIGHT;
+         return;
+      default:
+         _mesa_error(&osmesa->mesa, GL_INVALID_ENUM, "OSMesaGetIntergerv(pname)");
+         return;
+   }
+}
+
+
+/**
+ * Return the depth buffer associated with an OSMesa context.
+ * Input:  c - the OSMesa context
+ * Output:  width, height - size of buffer in pixels
+ *          bytesPerValue - bytes per depth value (2 or 4)
+ *          buffer - pointer to depth buffer values
+ * Return:  GL_TRUE or GL_FALSE to indicate success or failure.
+ */
+GLAPI GLboolean GLAPIENTRY
+OSMesaGetDepthBuffer( OSMesaContext c, GLint *width, GLint *height,
+                      GLint *bytesPerValue, void **buffer )
+{
+   struct gl_renderbuffer *rb = NULL;
+
+   if (c->gl_buffer)
+      rb = c->gl_buffer->Attachment[BUFFER_DEPTH].Renderbuffer;
+
+   if (!rb || !rb->Data) {
+      *width = 0;
+      *height = 0;
+      *bytesPerValue = 0;
+      *buffer = 0;
+      return GL_FALSE;
+   }
+   else {
+      *width = rb->Width;
+      *height = rb->Height;
+      if (c->gl_visual->depthBits <= 16)
+         *bytesPerValue = sizeof(GLushort);
+      else
+         *bytesPerValue = sizeof(GLuint);
+      *buffer = rb->Data;
+      return GL_TRUE;
+   }
+}
+
+
+/**
+ * Return the color buffer associated with an OSMesa context.
+ * Input:  c - the OSMesa context
+ * Output:  width, height - size of buffer in pixels
+ *          format - the pixel format (OSMESA_FORMAT)
+ *          buffer - pointer to color buffer values
+ * Return:  GL_TRUE or GL_FALSE to indicate success or failure.
+ */
+GLAPI GLboolean GLAPIENTRY
+OSMesaGetColorBuffer( OSMesaContext osmesa, GLint *width,
+                      GLint *height, GLint *format, void **buffer )
+{
+   if (osmesa->rb && osmesa->rb->Data) {
+      *width = osmesa->rb->Width;
+      *height = osmesa->rb->Height;
+      *format = osmesa->format;
+      *buffer = osmesa->rb->Data;
+      return GL_TRUE;
+   }
+   else {
+      *width = 0;
+      *height = 0;
+      *format = 0;
+      *buffer = 0;
+      return GL_FALSE;
+   }
+}
+
+
+struct name_function
+{
+   const char *Name;
+   OSMESAproc Function;
+};
+
+static struct name_function functions[] = {
+   { "OSMesaCreateContext", (OSMESAproc) OSMesaCreateContext },
+   { "OSMesaCreateContextExt", (OSMESAproc) OSMesaCreateContextExt },
+   { "OSMesaDestroyContext", (OSMESAproc) OSMesaDestroyContext },
+   { "OSMesaMakeCurrent", (OSMESAproc) OSMesaMakeCurrent },
+   { "OSMesaGetCurrentContext", (OSMESAproc) OSMesaGetCurrentContext },
+   { "OSMesaPixelsStore", (OSMESAproc) OSMesaPixelStore },
+   { "OSMesaGetIntegerv", (OSMESAproc) OSMesaGetIntegerv },
+   { "OSMesaGetDepthBuffer", (OSMESAproc) OSMesaGetDepthBuffer },
+   { "OSMesaGetColorBuffer", (OSMESAproc) OSMesaGetColorBuffer },
+   { "OSMesaGetProcAddress", (OSMESAproc) OSMesaGetProcAddress },
+   { "OSMesaColorClamp", (OSMESAproc) OSMesaColorClamp },
+   { NULL, NULL }
+};
+
+
+GLAPI OSMESAproc GLAPIENTRY
+OSMesaGetProcAddress( const char *funcName )
+{
+   int i;
+   for (i = 0; functions[i].Name; i++) {
+      if (strcmp(functions[i].Name, funcName) == 0)
+         return functions[i].Function;
+   }
+   return _glapi_get_proc_address(funcName);
+}
+
+
+GLAPI void GLAPIENTRY
+OSMesaColorClamp(GLboolean enable)
+{
+   OSMesaContext osmesa = OSMesaGetCurrentContext();
+
+   if (enable == GL_TRUE) {
+      osmesa->mesa.Color.ClampFragmentColor = GL_TRUE;
+   }
+   else {
+      osmesa->mesa.Color.ClampFragmentColor = GL_FIXED_ONLY_ARB;
+   }
+}
+
+
diff --git a/src/mesa/drivers/osmesa/osmesa.def b/src/mesa/drivers/osmesa/osmesa.def
new file mode 100644
index 0000000000..06afab72b0
--- /dev/null
+++ b/src/mesa/drivers/osmesa/osmesa.def
@@ -0,0 +1,15 @@
+;DESCRIPTION 'Mesa OSMesa lib for Win32'
+VERSION 4.1
+
+EXPORTS
+	OSMesaColorClamp
+	OSMesaCreateContext
+	OSMesaCreateContextExt
+	OSMesaDestroyContext
+	OSMesaMakeCurrent
+	OSMesaGetCurrentContext
+	OSMesaPixelStore
+	OSMesaGetIntegerv
+	OSMesaGetDepthBuffer
+	OSMesaGetColorBuffer
+	OSMesaGetProcAddress
diff --git a/src/mesa/drivers/windows/fx/fx.rc b/src/mesa/drivers/windows/fx/fx.rc
new file mode 100644
index 0000000000..f920b8768d
--- /dev/null
+++ b/src/mesa/drivers/windows/fx/fx.rc
@@ -0,0 +1,39 @@
+#include <windows.h>
+
+#define PRODNAME                "Mesa 6.x"
+#define CONTACTSTR              "http://www.mesa3d.org"
+#define HWSTR                   "3dfx Voodoo Graphics, Voodoo Rush, Voodoo^2, Voodoo Banshee, Velocity 100/200, Voodoo3, Voodoo4, Voodoo5"
+#define COPYRIGHTSTR            "Copyright \251 Brian E. Paul"
+
+#define VERSIONSTR              "6.3.0.1"
+#define MANVERSION              6
+#define MANREVISION             3
+#define BUILD_NUMBER            1
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER
+ PRODUCTVERSION MANVERSION, MANREVISION, 0, BUILD_NUMBER
+ FILEFLAGSMASK 0x0030003FL
+
+ FILEOS VOS_DOS_WINDOWS32
+ FILETYPE VFT_DRV
+ FILESUBTYPE VFT2_DRV_INSTALLABLE
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904E4"
+        BEGIN
+            VALUE "FileDescription", PRODNAME
+            VALUE "FileVersion", VERSIONSTR
+            VALUE "LegalCopyright", COPYRIGHTSTR
+            VALUE "ProductName", PRODNAME
+            VALUE "Graphics Subsystem", HWSTR
+            VALUE "Contact", CONTACTSTR
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        /* the following line should be extended for localized versions */
+        VALUE "Translation", 0x409, 1252
+    END
+END
diff --git a/src/mesa/drivers/windows/fx/fxopengl.def b/src/mesa/drivers/windows/fx/fxopengl.def
new file mode 100644
index 0000000000..d65b763d25
--- /dev/null
+++ b/src/mesa/drivers/windows/fx/fxopengl.def
@@ -0,0 +1,955 @@
+LIBRARY OpenGL32
+DESCRIPTION "Mesa 5.1"
+EXPORTS
+ glAccum
+ glActiveStencilFaceEXT
+ glActiveTexture
+ glActiveTextureARB
+ glAlphaFunc
+ glAreProgramsResidentNV
+ glAreTexturesResident
+ glAreTexturesResidentEXT
+ glArrayElement
+ glArrayElementEXT
+ glBegin
+ glBeginQueryARB
+ glBindBufferARB
+ glBindProgramARB
+ glBindProgramNV
+ glBindTexture
+ glBindTextureEXT
+ glBitmap
+;glBlendColor
+;glBlendColorEXT
+ glBlendEquation
+ glBlendEquationEXT
+ glBlendFunc
+ glBlendFuncSeparate
+ glBlendFuncSeparateEXT
+ glBlendFuncSeparateINGR
+ glBufferDataARB
+ glBufferSubDataARB
+ glCallList
+ glCallLists
+ glClear
+ glClearAccum
+ glClearColor
+ glClearDepth
+ glClearIndex
+ glClearStencil
+ glClientActiveTexture
+ glClientActiveTextureARB
+ glClipPlane
+ glColorMask
+ glColorMaterial
+ glColorPointer
+ glColorPointerEXT
+ glColorSubTable
+ glColorSubTableEXT
+ glColorTable
+ glColorTableEXT
+ glColorTableParameterfv
+ glColorTableParameterfvSGI
+ glColorTableParameteriv
+ glColorTableParameterivSGI
+ glColorTableSGI
+ glColor3b
+ glColor3bv
+ glColor3d
+ glColor3dv
+ glColor3f
+ glColor3fv
+ glColor3i
+ glColor3iv
+ glColor3s
+ glColor3sv
+ glColor3ub
+ glColor3ubv
+ glColor3ui
+ glColor3uiv
+ glColor3us
+ glColor3usv
+ glColor4b
+ glColor4bv
+ glColor4d
+ glColor4dv
+ glColor4f
+ glColor4fv
+ glColor4i
+ glColor4iv
+ glColor4s
+ glColor4sv
+ glColor4ub
+ glColor4ubv
+ glColor4ui
+ glColor4uiv
+ glColor4us
+ glColor4usv
+ glCombinerInputNV
+ glCombinerOutputNV
+ glCombinerParameterfNV
+ glCombinerParameterfvNV
+ glCombinerParameteriNV
+ glCombinerParameterivNV
+ glCompressedTexImage1D
+ glCompressedTexImage1DARB
+ glCompressedTexImage2D
+ glCompressedTexImage2DARB
+ glCompressedTexImage3D
+ glCompressedTexImage3DARB
+ glCompressedTexSubImage1D
+ glCompressedTexSubImage1DARB
+ glCompressedTexSubImage2D
+ glCompressedTexSubImage2DARB
+ glCompressedTexSubImage3D
+ glCompressedTexSubImage3DARB
+ glConvolutionFilter1D
+ glConvolutionFilter1DEXT
+ glConvolutionFilter2D
+ glConvolutionFilter2DEXT
+ glConvolutionParameterf
+ glConvolutionParameterfEXT
+ glConvolutionParameterfv
+ glConvolutionParameterfvEXT
+ glConvolutionParameteri
+ glConvolutionParameteriEXT
+ glConvolutionParameteriv
+ glConvolutionParameterivEXT
+ glCopyColorSubTable
+ glCopyColorSubTableEXT
+ glCopyColorTable
+ glCopyColorTableSGI
+ glCopyConvolutionFilter1D
+ glCopyConvolutionFilter1DEXT
+ glCopyConvolutionFilter2D
+ glCopyConvolutionFilter2DEXT
+ glCopyPixels
+ glCopyTexImage1D
+ glCopyTexImage1DEXT
+ glCopyTexImage2D
+ glCopyTexImage2DEXT
+ glCopyTexSubImage1D
+ glCopyTexSubImage1DEXT
+ glCopyTexSubImage2D
+ glCopyTexSubImage2DEXT
+ glCopyTexSubImage3D
+ glCopyTexSubImage3DEXT
+ glCullFace
+ glCullParameterdvEXT
+ glCullParameterfvEXT
+ glDeleteBuffersARB
+ glDeleteFencesNV
+ glDeleteLists
+ glDeleteProgramsARB
+ glDeleteProgramsNV
+ glDeleteQueriesARB
+ glDeleteTextures
+ glDeleteTexturesEXT
+ glDepthBoundsEXT
+ glDepthFunc
+ glDepthMask
+ glDepthRange
+ glDetailTexFuncSGIS
+ glDisable
+ glDisableClientState
+ glDisableVertexAttribArrayARB
+ glDrawArrays
+ glDrawArraysEXT
+ glDrawBuffer
+ glDrawElements
+ glDrawPixels
+ glDrawRangeElements
+ glDrawRangeElementsEXT
+ glEdgeFlag
+ glEdgeFlagPointer
+ glEdgeFlagPointerEXT
+ glEdgeFlagv
+ glEnable
+ glEnableClientState
+ glEnableVertexAttribArrayARB
+ glEnd
+ glEndList
+ glEndQueryARB
+ glEvalCoord1d
+ glEvalCoord1dv
+ glEvalCoord1f
+ glEvalCoord1fv
+ glEvalCoord2d
+ glEvalCoord2dv
+ glEvalCoord2f
+ glEvalCoord2fv
+ glEvalMesh1
+ glEvalMesh2
+ glEvalPoint1
+ glEvalPoint2
+ glExecuteProgramNV
+ glFeedbackBuffer
+ glFinalCombinerInputNV
+ glFinish
+ glFinishFenceNV
+ glFlush
+ glFlushRasterSGIX
+ glFlushVertexArrayRangeNV
+ glFogCoordd
+ glFogCoorddEXT
+ glFogCoorddv
+ glFogCoorddvEXT
+ glFogCoordf
+ glFogCoordfEXT
+ glFogCoordfv
+ glFogCoordfvEXT
+ glFogCoordPointer
+ glFogCoordPointerEXT
+ glFogf
+ glFogfv
+ glFogi
+ glFogiv
+ glFragmentColorMaterialSGIX
+ glFragmentLightfSGIX
+ glFragmentLightfvSGIX
+ glFragmentLightiSGIX
+ glFragmentLightivSGIX
+ glFragmentLightModelfSGIX
+ glFragmentLightModelfvSGIX
+ glFragmentLightModeliSGIX
+ glFragmentLightModelivSGIX
+ glFragmentMaterialfSGIX
+ glFragmentMaterialfvSGIX
+ glFragmentMaterialiSGIX
+ glFragmentMaterialivSGIX
+ glFrameZoomSGIX
+ glFrontFace
+ glFrustum
+ glGenBuffersARB
+ glGenFencesNV
+ glGenLists
+ glGenProgramsARB
+ glGenProgramsNV
+ glGenQueriesARB
+ glGenTextures
+ glGenTexturesEXT
+ glGetBooleanv
+ glGetBufferParameterivARB
+ glGetBufferPointervARB
+ glGetBufferSubDataARB
+ glGetClipPlane
+ glGetColorTable
+ glGetColorTableEXT
+ glGetColorTableParameterfv
+ glGetColorTableParameterfvEXT
+ glGetColorTableParameterfvSGI
+ glGetColorTableParameteriv
+ glGetColorTableParameterivEXT
+ glGetColorTableParameterivSGI
+ glGetColorTableSGI
+ glGetCombinerInputParameterfvNV
+ glGetCombinerInputParameterivNV
+ glGetCombinerOutputParameterfvNV
+ glGetCombinerOutputParameterivNV
+ glGetCompressedTexImage
+ glGetCompressedTexImageARB
+ glGetConvolutionFilter
+ glGetConvolutionFilterEXT
+ glGetConvolutionParameterfv
+ glGetConvolutionParameterfvEXT
+ glGetConvolutionParameteriv
+ glGetConvolutionParameterivEXT
+ glGetDetailTexFuncSGIS
+ glGetDoublev
+ glGetError
+ glGetFenceivNV
+ glGetFinalCombinerInputParameterfvNV
+ glGetFinalCombinerInputParameterivNV
+ glGetFloatv
+ glGetFragmentLightfvSGIX
+ glGetFragmentLightivSGIX
+ glGetFragmentMaterialfvSGIX
+ glGetFragmentMaterialivSGIX
+ glGetHistogram
+ glGetHistogramEXT
+ glGetHistogramParameterfv
+ glGetHistogramParameterfvEXT
+ glGetHistogramParameteriv
+ glGetHistogramParameterivEXT
+ glGetInstrumentsSGIX
+ glGetIntegerv
+ glGetLightfv
+ glGetLightiv
+ glGetListParameterfvSGIX
+ glGetListParameterivSGIX
+ glGetMapdv
+ glGetMapfv
+ glGetMapiv
+ glGetMaterialfv
+ glGetMaterialiv
+ glGetMinmax
+ glGetMinmaxEXT
+ glGetMinmaxParameterfv
+ glGetMinmaxParameterfvEXT
+ glGetMinmaxParameteriv
+ glGetMinmaxParameterivEXT
+ glGetPixelMapfv
+ glGetPixelMapuiv
+ glGetPixelMapusv
+ glGetPixelTexGenParameterfvSGIS
+ glGetPixelTexGenParameterivSGIS
+ glGetPointerv
+ glGetPointervEXT
+ glGetPolygonStipple
+ glGetProgramEnvParameterdvARB
+ glGetProgramEnvParameterfvARB
+ glGetProgramivARB
+ glGetProgramivNV
+ glGetProgramLocalParameterdvARB
+ glGetProgramLocalParameterfvARB
+ glGetProgramNamedParameterdvNV
+ glGetProgramNamedParameterfvNV
+ glGetProgramParameterdvNV
+ glGetProgramParameterfvNV
+ glGetProgramStringARB
+ glGetProgramStringNV
+ glGetQueryivARB
+ glGetQueryObjectivARB
+ glGetQueryObjectuivARB
+ glGetSeparableFilter
+ glGetSeparableFilterEXT
+ glGetSharpenTexFuncSGIS
+ glGetString
+ glGetTexEnvfv
+ glGetTexEnviv
+ glGetTexFilterFuncSGIS
+ glGetTexGendv
+ glGetTexGenfv
+ glGetTexGeniv
+ glGetTexImage
+ glGetTexLevelParameterfv
+ glGetTexLevelParameteriv
+ glGetTexParameterfv
+ glGetTexParameteriv
+ glGetTrackMatrixivNV
+ glGetVertexAttribdvARB
+ glGetVertexAttribdvNV
+ glGetVertexAttribfvARB
+ glGetVertexAttribfvNV
+ glGetVertexAttribivARB
+ glGetVertexAttribivNV
+ glGetVertexAttribPointervARB
+ glGetVertexAttribPointervNV
+ glHint
+ glHintPGI
+ glHistogram
+ glHistogramEXT
+ glIndexd
+ glIndexdv
+ glIndexf
+ glIndexFuncEXT
+ glIndexfv
+ glIndexi
+ glIndexiv
+ glIndexMask
+ glIndexMaterialEXT
+ glIndexPointer
+ glIndexPointerEXT
+ glIndexs
+ glIndexsv
+ glIndexub
+ glIndexubv
+ glInitNames
+ glInstrumentsBufferSGIX
+ glInterleavedArrays
+ glIsBufferARB
+ glIsEnabled
+ glIsFenceNV
+ glIsList
+ glIsProgramARB
+ glIsProgramNV
+ glIsQueryARB
+ glIsTexture
+ glIsTextureEXT
+ glLightEnviSGIX
+ glLightf
+ glLightfv
+ glLighti
+ glLightiv
+ glLightModelf
+ glLightModelfv
+ glLightModeli
+ glLightModeliv
+ glLineStipple
+ glLineWidth
+ glListBase
+ glListParameterfSGIX
+ glListParameterfvSGIX
+ glListParameteriSGIX
+ glListParameterivSGIX
+ glLoadIdentity
+ glLoadMatrixd
+ glLoadMatrixf
+ glLoadName
+ glLoadProgramNV
+ glLoadTransposeMatrixd
+ glLoadTransposeMatrixdARB
+ glLoadTransposeMatrixf
+ glLoadTransposeMatrixfARB
+ glLockArraysEXT
+ glLogicOp
+ glMapBufferARB
+ glMapGrid1d
+ glMapGrid1f
+ glMapGrid2d
+ glMapGrid2f
+ glMap1d
+ glMap1f
+ glMap2d
+ glMap2f
+ glMaterialf
+ glMaterialfv
+ glMateriali
+ glMaterialiv
+ glMatrixMode
+ glMinmax
+ glMinmaxEXT
+ glMultiDrawArrays
+ glMultiDrawArraysEXT
+ glMultiDrawElements
+ glMultiDrawElementsEXT
+ glMultiModeDrawArraysIBM
+ glMultiModeDrawElementsIBM
+ glMultiTexCoord1d
+ glMultiTexCoord1dARB
+ glMultiTexCoord1dv
+ glMultiTexCoord1dvARB
+ glMultiTexCoord1f
+ glMultiTexCoord1fARB
+ glMultiTexCoord1fv
+ glMultiTexCoord1fvARB
+ glMultiTexCoord1i
+ glMultiTexCoord1iARB
+ glMultiTexCoord1iv
+ glMultiTexCoord1ivARB
+ glMultiTexCoord1s
+ glMultiTexCoord1sARB
+ glMultiTexCoord1sv
+ glMultiTexCoord1svARB
+ glMultiTexCoord2d
+ glMultiTexCoord2dARB
+ glMultiTexCoord2dv
+ glMultiTexCoord2dvARB
+ glMultiTexCoord2f
+ glMultiTexCoord2fARB
+ glMultiTexCoord2fv
+ glMultiTexCoord2fvARB
+ glMultiTexCoord2i
+ glMultiTexCoord2iARB
+ glMultiTexCoord2iv
+ glMultiTexCoord2ivARB
+ glMultiTexCoord2s
+ glMultiTexCoord2sARB
+ glMultiTexCoord2sv
+ glMultiTexCoord2svARB
+ glMultiTexCoord3d
+ glMultiTexCoord3dARB
+ glMultiTexCoord3dv
+ glMultiTexCoord3dvARB
+ glMultiTexCoord3f
+ glMultiTexCoord3fARB
+ glMultiTexCoord3fv
+ glMultiTexCoord3fvARB
+ glMultiTexCoord3i
+ glMultiTexCoord3iARB
+ glMultiTexCoord3iv
+ glMultiTexCoord3ivARB
+ glMultiTexCoord3s
+ glMultiTexCoord3sARB
+ glMultiTexCoord3sv
+ glMultiTexCoord3svARB
+ glMultiTexCoord4d
+ glMultiTexCoord4dARB
+ glMultiTexCoord4dv
+ glMultiTexCoord4dvARB
+ glMultiTexCoord4f
+ glMultiTexCoord4fARB
+ glMultiTexCoord4fv
+ glMultiTexCoord4fvARB
+ glMultiTexCoord4i
+ glMultiTexCoord4iARB
+ glMultiTexCoord4iv
+ glMultiTexCoord4ivARB
+ glMultiTexCoord4s
+ glMultiTexCoord4sARB
+ glMultiTexCoord4sv
+ glMultiTexCoord4svARB
+ glMultMatrixd
+ glMultMatrixf
+ glMultTransposeMatrixd
+ glMultTransposeMatrixdARB
+ glMultTransposeMatrixf
+ glMultTransposeMatrixfARB
+ glNewList
+ glNormalPointer
+ glNormalPointerEXT
+ glNormal3b
+ glNormal3bv
+ glNormal3d
+ glNormal3dv
+ glNormal3f
+ glNormal3fv
+ glNormal3i
+ glNormal3iv
+ glNormal3s
+ glNormal3sv
+ glOrtho
+ glPassThrough
+ glPixelMapfv
+ glPixelMapuiv
+ glPixelMapusv
+ glPixelStoref
+ glPixelStorei
+ glPixelTexGenParameterfSGIS
+ glPixelTexGenParameterfvSGIS
+ glPixelTexGenParameteriSGIS
+ glPixelTexGenParameterivSGIS
+ glPixelTexGenSGIX
+ glPixelTransferf
+ glPixelTransferi
+ glPixelZoom
+ glPointParameterf
+ glPointParameterfARB
+ glPointParameterfEXT
+ glPointParameterfSGIS
+ glPointParameterfv
+ glPointParameterfvARB
+ glPointParameterfvEXT
+ glPointParameterfvSGIS
+ glPointParameteri
+ glPointParameteriNV
+ glPointParameteriv
+ glPointParameterivNV
+ glPointSize
+ glPollInstrumentsSGIX
+ glPolygonMode
+ glPolygonOffset
+ glPolygonOffsetEXT
+ glPolygonStipple
+ glPopAttrib
+ glPopClientAttrib
+ glPopMatrix
+ glPopName
+ glPrioritizeTextures
+ glPrioritizeTexturesEXT
+ glProgramEnvParameter4dARB
+ glProgramEnvParameter4dvARB
+ glProgramEnvParameter4fARB
+ glProgramEnvParameter4fvARB
+ glProgramLocalParameter4dARB
+ glProgramLocalParameter4dvARB
+ glProgramLocalParameter4fARB
+ glProgramLocalParameter4fvARB
+ glProgramNamedParameter4dNV
+ glProgramNamedParameter4dvNV
+ glProgramNamedParameter4fNV
+ glProgramNamedParameter4fvNV
+ glProgramParameters4dvNV
+ glProgramParameters4fvNV
+ glProgramParameter4dNV
+ glProgramParameter4dvNV
+ glProgramParameter4fNV
+ glProgramParameter4fvNV
+ glProgramStringARB
+ glPushAttrib
+ glPushClientAttrib
+ glPushMatrix
+ glPushName
+ glRasterPos2d
+ glRasterPos2dv
+ glRasterPos2f
+ glRasterPos2fv
+ glRasterPos2i
+ glRasterPos2iv
+ glRasterPos2s
+ glRasterPos2sv
+ glRasterPos3d
+ glRasterPos3dv
+ glRasterPos3f
+ glRasterPos3fv
+ glRasterPos3i
+ glRasterPos3iv
+ glRasterPos3s
+ glRasterPos3sv
+ glRasterPos4d
+ glRasterPos4dv
+ glRasterPos4f
+ glRasterPos4fv
+ glRasterPos4i
+ glRasterPos4iv
+ glRasterPos4s
+ glRasterPos4sv
+ glReadBuffer
+ glReadInstrumentsSGIX
+ glReadPixels
+ glRectd
+ glRectdv
+ glRectf
+ glRectfv
+ glRecti
+ glRectiv
+ glRects
+ glRectsv
+ glReferencePlaneSGIX
+ glRenderMode
+ glRequestResidentProgramsNV
+ glResetHistogram
+ glResetHistogramEXT
+ glResetMinmax
+ glResetMinmaxEXT
+ glResizeBuffersMESA
+ glRotated
+ glRotatef
+ glSampleCoverage
+ glSampleCoverageARB
+ glSampleMaskEXT
+ glSampleMaskSGIS
+ glSamplePatternEXT
+ glSamplePatternSGIS
+ glScaled
+ glScalef
+ glScissor
+ glSecondaryColorPointer
+ glSecondaryColorPointerEXT
+ glSecondaryColor3b
+ glSecondaryColor3bEXT
+ glSecondaryColor3bv
+ glSecondaryColor3bvEXT
+ glSecondaryColor3d
+ glSecondaryColor3dEXT
+ glSecondaryColor3dv
+ glSecondaryColor3dvEXT
+ glSecondaryColor3f
+ glSecondaryColor3fEXT
+ glSecondaryColor3fv
+ glSecondaryColor3fvEXT
+ glSecondaryColor3i
+ glSecondaryColor3iEXT
+ glSecondaryColor3iv
+ glSecondaryColor3ivEXT
+ glSecondaryColor3s
+ glSecondaryColor3sEXT
+ glSecondaryColor3sv
+ glSecondaryColor3svEXT
+ glSecondaryColor3ub
+ glSecondaryColor3ubEXT
+ glSecondaryColor3ubv
+ glSecondaryColor3ubvEXT
+ glSecondaryColor3ui
+ glSecondaryColor3uiEXT
+ glSecondaryColor3uiv
+ glSecondaryColor3uivEXT
+ glSecondaryColor3us
+ glSecondaryColor3usEXT
+ glSecondaryColor3usv
+ glSecondaryColor3usvEXT
+ glSelectBuffer
+ glSeparableFilter2D
+ glSeparableFilter2DEXT
+ glSetFenceNV
+ glShadeModel
+ glSharpenTexFuncSGIS
+ glSpriteParameterfSGIX
+ glSpriteParameterfvSGIX
+ glSpriteParameteriSGIX
+ glSpriteParameterivSGIX
+ glStartInstrumentsSGIX
+ glStencilFunc
+ glStencilMask
+ glStencilOp
+ glStopInstrumentsSGIX
+ glTagSampleBufferSGIX
+ glTbufferMask3DFX
+ glTestFenceNV
+ glTexCoordPointer
+ glTexCoordPointerEXT
+ glTexCoord1d
+ glTexCoord1dv
+ glTexCoord1f
+ glTexCoord1fv
+ glTexCoord1i
+ glTexCoord1iv
+ glTexCoord1s
+ glTexCoord1sv
+ glTexCoord2d
+ glTexCoord2dv
+ glTexCoord2f
+ glTexCoord2fv
+ glTexCoord2i
+ glTexCoord2iv
+ glTexCoord2s
+ glTexCoord2sv
+ glTexCoord3d
+ glTexCoord3dv
+ glTexCoord3f
+ glTexCoord3fv
+ glTexCoord3i
+ glTexCoord3iv
+ glTexCoord3s
+ glTexCoord3sv
+ glTexCoord4d
+ glTexCoord4dv
+ glTexCoord4f
+ glTexCoord4fv
+ glTexCoord4i
+ glTexCoord4iv
+ glTexCoord4s
+ glTexCoord4sv
+ glTexEnvf
+ glTexEnvfv
+ glTexEnvi
+ glTexEnviv
+ glTexFilterFuncSGIS
+ glTexGend
+ glTexGendv
+ glTexGenf
+ glTexGenfv
+ glTexGeni
+ glTexGeniv
+ glTexImage1D
+ glTexImage2D
+ glTexImage3D
+ glTexImage3DEXT
+ glTexImage4DSGIS
+ glTexParameterf
+ glTexParameterfv
+ glTexParameteri
+ glTexParameteriv
+ glTexSubImage1D
+ glTexSubImage1DEXT
+ glTexSubImage2D
+ glTexSubImage2DEXT
+ glTexSubImage3D
+ glTexSubImage3DEXT
+ glTexSubImage4DSGIS
+ glTrackMatrixNV
+ glTranslated
+ glTranslatef
+ glUnlockArraysEXT
+ glUnmapBufferARB
+ glVertexArrayRangeNV
+ glVertexAttribPointerARB
+ glVertexAttribPointerNV
+ glVertexAttribs1dvNV
+ glVertexAttribs1fvNV
+ glVertexAttribs1svNV
+ glVertexAttribs2dvNV
+ glVertexAttribs2fvNV
+ glVertexAttribs2svNV
+ glVertexAttribs3dvNV
+ glVertexAttribs3fvNV
+ glVertexAttribs3svNV
+ glVertexAttribs4dvNV
+ glVertexAttribs4fvNV
+ glVertexAttribs4svNV
+ glVertexAttribs4ubvNV
+ glVertexAttrib1dARB
+ glVertexAttrib1dNV
+ glVertexAttrib1dvARB
+ glVertexAttrib1dvNV
+ glVertexAttrib1fARB
+ glVertexAttrib1fNV
+ glVertexAttrib1fvARB
+ glVertexAttrib1fvNV
+ glVertexAttrib1sARB
+ glVertexAttrib1sNV
+ glVertexAttrib1svARB
+ glVertexAttrib1svNV
+ glVertexAttrib2dARB
+ glVertexAttrib2dNV
+ glVertexAttrib2dvARB
+ glVertexAttrib2dvNV
+ glVertexAttrib2fARB
+ glVertexAttrib2fNV
+ glVertexAttrib2fvARB
+ glVertexAttrib2fvNV
+ glVertexAttrib2sARB
+ glVertexAttrib2sNV
+ glVertexAttrib2svARB
+ glVertexAttrib2svNV
+ glVertexAttrib3dARB
+ glVertexAttrib3dNV
+ glVertexAttrib3dvARB
+ glVertexAttrib3dvNV
+ glVertexAttrib3fARB
+ glVertexAttrib3fNV
+ glVertexAttrib3fvARB
+ glVertexAttrib3fvNV
+ glVertexAttrib3sARB
+ glVertexAttrib3sNV
+ glVertexAttrib3svARB
+ glVertexAttrib3svNV
+ glVertexAttrib4bvARB
+ glVertexAttrib4dARB
+ glVertexAttrib4dNV
+ glVertexAttrib4dvARB
+ glVertexAttrib4dvNV
+ glVertexAttrib4fARB
+ glVertexAttrib4fNV
+ glVertexAttrib4fvARB
+ glVertexAttrib4fvNV
+ glVertexAttrib4ivARB
+ glVertexAttrib4NbvARB
+ glVertexAttrib4NivARB
+ glVertexAttrib4NsvARB
+ glVertexAttrib4NubARB
+ glVertexAttrib4NubvARB
+ glVertexAttrib4NuivARB
+ glVertexAttrib4NusvARB
+ glVertexAttrib4sARB
+ glVertexAttrib4sNV
+ glVertexAttrib4svARB
+ glVertexAttrib4svNV
+ glVertexAttrib4ubNV
+ glVertexAttrib4ubvARB
+ glVertexAttrib4ubvNV
+ glVertexAttrib4uivARB
+ glVertexAttrib4usvARB
+ glVertexPointer
+ glVertexPointerEXT
+ glVertexWeightfEXT
+ glVertexWeightfvEXT
+ glVertexWeightPointerEXT
+ glVertex2d
+ glVertex2dv
+ glVertex2f
+ glVertex2fv
+ glVertex2i
+ glVertex2iv
+ glVertex2s
+ glVertex2sv
+ glVertex3d
+ glVertex3dv
+ glVertex3f
+ glVertex3fv
+ glVertex3i
+ glVertex3iv
+ glVertex3s
+ glVertex3sv
+ glVertex4d
+ glVertex4dv
+ glVertex4f
+ glVertex4fv
+ glVertex4i
+ glVertex4iv
+ glVertex4s
+ glVertex4sv
+ glViewport
+ glWindowPos2d
+ glWindowPos2dARB
+ glWindowPos2dMESA
+ glWindowPos2dv
+ glWindowPos2dvARB
+ glWindowPos2dvMESA
+ glWindowPos2f
+ glWindowPos2fARB
+ glWindowPos2fMESA
+ glWindowPos2fv
+ glWindowPos2fvARB
+ glWindowPos2fvMESA
+ glWindowPos2i
+ glWindowPos2iARB
+ glWindowPos2iMESA
+ glWindowPos2iv
+ glWindowPos2ivARB
+ glWindowPos2ivMESA
+ glWindowPos2s
+ glWindowPos2sARB
+ glWindowPos2sMESA
+ glWindowPos2sv
+ glWindowPos2svARB
+ glWindowPos2svMESA
+ glWindowPos3d
+ glWindowPos3dARB
+ glWindowPos3dMESA
+ glWindowPos3dv
+ glWindowPos3dvARB
+ glWindowPos3dvMESA
+ glWindowPos3f
+ glWindowPos3fARB
+ glWindowPos3fMESA
+ glWindowPos3fv
+ glWindowPos3fvARB
+ glWindowPos3fvMESA
+ glWindowPos3i
+ glWindowPos3iARB
+ glWindowPos3iMESA
+ glWindowPos3iv
+ glWindowPos3ivARB
+ glWindowPos3ivMESA
+ glWindowPos3s
+ glWindowPos3sARB
+ glWindowPos3sMESA
+ glWindowPos3sv
+ glWindowPos3svARB
+ glWindowPos3svMESA
+ glWindowPos4dMESA
+ glWindowPos4dvMESA
+ glWindowPos4fMESA
+ glWindowPos4fvMESA
+ glWindowPos4iMESA
+ glWindowPos4ivMESA
+ glWindowPos4sMESA
+ glWindowPos4svMESA
+ fxCloseHardware
+;fxGetScreenGeometry
+ fxMesaCreateBestContext
+ fxMesaCreateContext
+ fxMesaDestroyContext
+ fxMesaGetCurrentContext
+ fxMesaMakeCurrent
+ fxMesaSelectCurrentBoard
+;fxMesaSetNearFar
+ fxMesaSwapBuffers
+ fxMesaUpdateScreenSize
+ wglChoosePixelFormat
+ wglCopyContext
+ wglCreateContext
+ wglCreateLayerContext
+ wglDeleteContext
+ wglDescribeLayerPlane
+ wglDescribePixelFormat
+ wglGetCurrentContext
+ wglGetCurrentDC
+ wglGetDefaultProcAddress
+ wglGetLayerPaletteEntries
+ wglGetPixelFormat
+ wglGetProcAddress
+ wglMakeCurrent
+ wglRealizeLayerPalette
+ wglSetLayerPaletteEntries
+ wglSetPixelFormat
+ wglShareLists
+ wglSwapBuffers
+ wglSwapLayerBuffers
+ wglUseFontBitmapsA
+ wglUseFontBitmapsW
+ wglUseFontOutlinesA
+ wglUseFontOutlinesW
+ ChoosePixelFormat
+ DescribePixelFormat
+ GetPixelFormat
+ SetPixelFormat
+ SwapBuffers
+ DrvCopyContext
+ DrvCreateContext
+ DrvCreateLayerContext
+ DrvDeleteContext
+ DrvDescribeLayerPlane
+ DrvDescribePixelFormat
+ DrvGetLayerPaletteEntries
+ DrvGetProcAddress
+ DrvReleaseContext
+ DrvRealizeLayerPalette
+ DrvSetContext
+ DrvSetLayerPaletteEntries
+ DrvSetPixelFormat
+ DrvShareLists
+ DrvSwapBuffers
+ DrvSwapLayerBuffers
+ DrvValidateVersion
diff --git a/src/mesa/drivers/windows/fx/fxwgl.c b/src/mesa/drivers/windows/fx/fxwgl.c
new file mode 100644
index 0000000000..ce76ecd156
--- /dev/null
+++ b/src/mesa/drivers/windows/fx/fxwgl.c
@@ -0,0 +1,1307 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.0
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* Authors:
+ *    David Bucciarelli
+ *    Brian Paul
+ *    Keith Whitwell
+ *    Hiroshi Morii
+ *    Daniel Borca
+ */
+
+/* fxwgl.c - Microsoft wgl functions emulation for
+ *           3Dfx VooDoo/Mesa interface
+ */
+
+
+#ifdef _WIN32
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <windows.h>
+#define GL_GLEXT_PROTOTYPES
+#include "GL/gl.h"
+#include "GL/glext.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#include "GL/fxmesa.h"
+#include "glheader.h"
+#include "glapi.h"
+#include "imports.h"
+#include "../../glide/fxdrv.h"
+
+#define MAX_MESA_ATTRS  20
+
+#if (_MSC_VER >= 1200)
+#pragma warning( push )
+#pragma warning( disable : 4273 )
+#endif
+
+struct __extensions__ {
+   PROC proc;
+   char *name;
+};
+
+struct __pixelformat__ {
+   PIXELFORMATDESCRIPTOR pfd;
+   GLint mesaAttr[MAX_MESA_ATTRS];
+};
+
+WINGDIAPI void GLAPIENTRY gl3DfxSetPaletteEXT(GLuint *);
+static GLushort gammaTable[3 * 256];
+
+struct __pixelformat__ pix[] = {
+   /* 16bit RGB565 single buffer with depth */
+   {
+    {sizeof(PIXELFORMATDESCRIPTOR), 1,
+     PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL,
+     PFD_TYPE_RGBA,
+     16,
+     5, 0, 6, 5, 5, 11, 0, 0,
+     0, 0, 0, 0, 0,
+     16,
+     0,
+     0,
+     PFD_MAIN_PLANE,
+     0, 0, 0, 0}
+    ,
+    {FXMESA_COLORDEPTH, 16,
+     FXMESA_ALPHA_SIZE, 0,
+     FXMESA_DEPTH_SIZE, 16,
+     FXMESA_STENCIL_SIZE, 0,
+     FXMESA_ACCUM_SIZE, 0,
+     FXMESA_NONE}
+   }
+   ,
+   /* 16bit RGB565 double buffer with depth */
+   {
+    {sizeof(PIXELFORMATDESCRIPTOR), 1,
+     PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL |
+     PFD_DOUBLEBUFFER | PFD_SWAP_COPY,
+     PFD_TYPE_RGBA,
+     16,
+     5, 0, 6, 5, 5, 11, 0, 0,
+     0, 0, 0, 0, 0,
+     16,
+     0,
+     0,
+     PFD_MAIN_PLANE,
+     0, 0, 0, 0}
+    ,
+    {FXMESA_COLORDEPTH, 16,
+     FXMESA_DOUBLEBUFFER,
+     FXMESA_ALPHA_SIZE, 0,
+     FXMESA_DEPTH_SIZE, 16,
+     FXMESA_STENCIL_SIZE, 0,
+     FXMESA_ACCUM_SIZE, 0,
+     FXMESA_NONE}
+   }
+   ,
+   /* 16bit ARGB1555 single buffer with depth */
+   {
+    {sizeof(PIXELFORMATDESCRIPTOR), 1,
+     PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL,
+     PFD_TYPE_RGBA,
+     16,
+     5, 0, 5, 5, 5, 10, 1, 15,
+     0, 0, 0, 0, 0,
+     16,
+     0,
+     0,
+     PFD_MAIN_PLANE,
+     0, 0, 0, 0}
+    ,
+    {FXMESA_COLORDEPTH, 15,
+     FXMESA_ALPHA_SIZE, 1,
+     FXMESA_DEPTH_SIZE, 16,
+     FXMESA_STENCIL_SIZE, 0,
+     FXMESA_ACCUM_SIZE, 0,
+     FXMESA_NONE}
+   }
+   ,
+   /* 16bit ARGB1555 double buffer with depth */
+   {
+    {sizeof(PIXELFORMATDESCRIPTOR), 1,
+     PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL |
+     PFD_DOUBLEBUFFER | PFD_SWAP_COPY,
+     PFD_TYPE_RGBA,
+     16,
+     5, 0, 5, 5, 5, 10, 1, 15,
+     0, 0, 0, 0, 0,
+     16,
+     0,
+     0,
+     PFD_MAIN_PLANE,
+     0, 0, 0, 0}
+    ,
+    {FXMESA_COLORDEPTH, 15,
+     FXMESA_DOUBLEBUFFER,
+     FXMESA_ALPHA_SIZE, 1,
+     FXMESA_DEPTH_SIZE, 16,
+     FXMESA_STENCIL_SIZE, 0,
+     FXMESA_ACCUM_SIZE, 0,
+     FXMESA_NONE}
+   }
+   ,
+   /* 32bit ARGB8888 single buffer with depth */
+   {
+    {sizeof(PIXELFORMATDESCRIPTOR), 1,
+     PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL,
+     PFD_TYPE_RGBA,
+     32,
+     8, 0, 8, 8, 8, 16, 8, 24,
+     0, 0, 0, 0, 0,
+     24,
+     8,
+     0,
+     PFD_MAIN_PLANE,
+     0, 0, 0, 0}
+    ,
+    {FXMESA_COLORDEPTH, 32,
+     FXMESA_ALPHA_SIZE, 8,
+     FXMESA_DEPTH_SIZE, 24,
+     FXMESA_STENCIL_SIZE, 8,
+     FXMESA_ACCUM_SIZE, 0,
+     FXMESA_NONE}
+   }
+   ,
+   /* 32bit ARGB8888 double buffer with depth */
+   {
+    {sizeof(PIXELFORMATDESCRIPTOR), 1,
+     PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL |
+     PFD_DOUBLEBUFFER | PFD_SWAP_COPY,
+     PFD_TYPE_RGBA,
+     32,
+     8, 0, 8, 8, 8, 16, 8, 24,
+     0, 0, 0, 0, 0,
+     24,
+     8,
+     0,
+     PFD_MAIN_PLANE,
+     0, 0, 0, 0}
+    ,
+    {FXMESA_COLORDEPTH, 32,
+     FXMESA_DOUBLEBUFFER,
+     FXMESA_ALPHA_SIZE, 8,
+     FXMESA_DEPTH_SIZE, 24,
+     FXMESA_STENCIL_SIZE, 8,
+     FXMESA_ACCUM_SIZE, 0,
+     FXMESA_NONE}
+   }
+};
+
+static fxMesaContext ctx = NULL;
+static WNDPROC hWNDOldProc;
+static int curPFD = 0;
+static HDC hDC;
+static HWND hWND;
+
+static GLboolean haveDualHead;
+
+/* For the in-window-rendering hack */
+
+#ifndef GR_CONTROL_RESIZE
+/* Apparently GR_CONTROL_RESIZE can be ignored. OK? */
+#define GR_CONTROL_RESIZE -1
+#endif
+
+static GLboolean gdiWindowHack;
+static void *dibSurfacePtr;
+static BITMAPINFO *dibBMI;
+static HBITMAP dibHBM;
+static HWND dibWnd;
+
+static int
+env_check (const char *var, int val)
+{
+   const char *env = getenv(var);
+   return (env && (env[0] == val));
+}
+
+static LRESULT APIENTRY
+__wglMonitor (HWND hwnd, UINT message, UINT wParam, LONG lParam)
+{
+   long ret;                    /* Now gives the resized window at the end to hWNDOldProc */
+
+   if (ctx && hwnd == hWND) {
+      switch (message) {
+         case WM_PAINT:
+         case WM_MOVE:
+            break;
+         case WM_DISPLAYCHANGE:
+         case WM_SIZE:
+#if 0
+            if (wParam != SIZE_MINIMIZED) {
+               static int moving = 0;
+               if (!moving) {
+                  if (!FX_grSstControl(GR_CONTROL_RESIZE)) {
+                     moving = 1;
+                     SetWindowPos(hwnd, 0, 0, 0, 300, 300, SWP_NOMOVE | SWP_NOZORDER);
+                     moving = 0;
+                     if (!FX_grSstControl(GR_CONTROL_RESIZE)) {
+                        /*MessageBox(0,_T("Error changing windowsize"),_T("fxMESA"),MB_OK);*/
+                        PostMessage(hWND, WM_CLOSE, 0, 0);
+                     }
+                  }
+                  /* Do the clipping in the glide library */
+                  grClipWindow(0, 0, FX_grSstScreenWidth(), FX_grSstScreenHeight());
+                  /* And let the new size set in the context */
+                  fxMesaUpdateScreenSize(ctx);
+               }
+            }
+#endif
+            break;
+         case WM_ACTIVATE:
+            break;
+         case WM_SHOWWINDOW:
+            break;
+         case WM_SYSKEYDOWN:
+         case WM_SYSCHAR:
+            break;
+      }
+   }
+
+   /* Finally call the hWNDOldProc, which handles the resize with the
+    * now changed window sizes */
+   ret = CallWindowProc(hWNDOldProc, hwnd, message, wParam, lParam);
+
+   return ret;
+}
+
+static void
+wgl_error (long error)
+{
+#define WGL_INVALID_PIXELFORMAT ERROR_INVALID_PIXEL_FORMAT
+   SetLastError(0xC0000000      /* error severity */
+               |0x00070000      /* error facility (who we are) */
+               |error);
+}
+
+GLAPI BOOL GLAPIENTRY
+wglCopyContext (HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask)
+{
+   return FALSE;
+}
+
+GLAPI HGLRC GLAPIENTRY
+wglCreateContext (HDC hdc)
+{
+   HWND hWnd;
+   WNDPROC oldProc;
+   int error;
+
+   if (ctx) {
+      SetLastError(0);
+      return NULL;
+   }
+
+   if (!(hWnd = WindowFromDC(hdc))) {
+      SetLastError(0);
+      return NULL;
+   }
+
+   if (curPFD == 0) {
+      wgl_error(WGL_INVALID_PIXELFORMAT);
+      return NULL;
+   }
+
+   if ((oldProc = (WNDPROC)GetWindowLong(hWnd, GWL_WNDPROC)) != __wglMonitor) {
+      hWNDOldProc = oldProc;
+      SetWindowLong(hWnd, GWL_WNDPROC, (LONG)__wglMonitor);
+   }
+
+   /* always log when debugging, or if user demands */
+   if (TDFX_DEBUG || env_check("MESA_FX_INFO", 'r')) {
+      freopen("MESA.LOG", "w", stderr);
+   }
+
+   {
+      RECT cliRect;
+      ShowWindow(hWnd, SW_SHOWNORMAL);
+      SetForegroundWindow(hWnd);
+      Sleep(100);               /* a hack for win95 */
+      if (env_check("MESA_GLX_FX", 'w') && !(GetWindowLong(hWnd, GWL_STYLE) & WS_POPUP)) {
+         /* XXX todo - windowed modes */
+         error = !(ctx = fxMesaCreateContext((GLuint) hWnd, GR_RESOLUTION_NONE, GR_REFRESH_NONE, pix[curPFD - 1].mesaAttr));
+      } else {
+         GetClientRect(hWnd, &cliRect);
+         error = !(ctx = fxMesaCreateBestContext((GLuint) hWnd, cliRect.right, cliRect.bottom, pix[curPFD - 1].mesaAttr));
+      }
+   }
+
+   /*if (getenv("SST_DUALHEAD"))
+      haveDualHead =
+         ((atoi(getenv("SST_DUALHEAD")) == 1) ? GL_TRUE : GL_FALSE);
+   else
+      haveDualHead = GL_FALSE;*/
+
+   if (error) {
+      SetLastError(0);
+      return NULL;
+   }
+
+   hDC = hdc;
+   hWND = hWnd;
+
+   /* Required by the OpenGL Optimizer 1.1 (is it a Optimizer bug ?) */
+   wglMakeCurrent(hdc, (HGLRC)1);
+
+   return (HGLRC)1;
+}
+
+GLAPI HGLRC GLAPIENTRY
+wglCreateLayerContext (HDC hdc, int iLayerPlane)
+{
+   SetLastError(0);
+   return NULL;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglDeleteContext (HGLRC hglrc)
+{
+   if (ctx && hglrc == (HGLRC)1) {
+
+      fxMesaDestroyContext(ctx);
+
+      SetWindowLong(WindowFromDC(hDC), GWL_WNDPROC, (LONG) hWNDOldProc);
+
+      ctx = NULL;
+      hDC = 0;
+      return TRUE;
+   }
+
+   SetLastError(0);
+
+   return FALSE;
+}
+
+GLAPI HGLRC GLAPIENTRY
+wglGetCurrentContext (VOID)
+{
+   if (ctx)
+      return (HGLRC)1;
+
+   SetLastError(0);
+   return NULL;
+}
+
+GLAPI HDC GLAPIENTRY
+wglGetCurrentDC (VOID)
+{
+   if (ctx)
+      return hDC;
+
+   SetLastError(0);
+   return NULL;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglSwapIntervalEXT (int interval)
+{
+   if (ctx == NULL) {
+      return FALSE;
+   }
+   if (interval < 0) {
+      interval = 0;
+   } else if (interval > 3) {
+      interval = 3;
+   }
+   ctx->swapInterval = interval;
+   return TRUE;
+}
+
+GLAPI int GLAPIENTRY
+wglGetSwapIntervalEXT (void)
+{
+   return (ctx == NULL) ? -1 : ctx->swapInterval;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglGetDeviceGammaRamp3DFX (HDC hdc, LPVOID arrays)
+{
+   /* gammaTable should be per-context */
+   memcpy(arrays, gammaTable, 3 * 256 * sizeof(GLushort));
+   return TRUE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglSetDeviceGammaRamp3DFX (HDC hdc, LPVOID arrays)
+{
+   GLint i, tableSize, inc, index;
+   GLushort *red, *green, *blue;
+   FxU32 gammaTableR[256], gammaTableG[256], gammaTableB[256];
+
+   /* gammaTable should be per-context */
+   memcpy(gammaTable, arrays, 3 * 256 * sizeof(GLushort));
+
+   tableSize = FX_grGetInteger(GR_GAMMA_TABLE_ENTRIES);
+   inc = 256 / tableSize;
+   red = (GLushort *)arrays;
+   green = (GLushort *)arrays + 256;
+   blue = (GLushort *)arrays + 512;
+   for (i = 0, index = 0; i < tableSize; i++, index += inc) {
+      gammaTableR[i] = red[index] >> 8;
+      gammaTableG[i] = green[index] >> 8;
+      gammaTableB[i] = blue[index] >> 8;
+   }
+
+   grLoadGammaTable(tableSize, gammaTableR, gammaTableG, gammaTableB);
+
+   return TRUE;
+}
+
+typedef void *HPBUFFERARB;
+
+/* WGL_ARB_pixel_format */
+GLAPI BOOL GLAPIENTRY
+wglGetPixelFormatAttribivARB (HDC hdc,
+                              int iPixelFormat,
+                              int iLayerPlane,
+                              UINT nAttributes,
+                              const int *piAttributes,
+                              int *piValues)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglGetPixelFormatAttribfvARB (HDC hdc,
+                              int iPixelFormat,
+                              int iLayerPlane,
+                              UINT nAttributes,
+                              const int *piAttributes,
+                              FLOAT *pfValues)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglChoosePixelFormatARB (HDC hdc,
+                         const int *piAttribIList,
+                         const FLOAT *pfAttribFList,
+                         UINT nMaxFormats,
+                         int *piFormats,
+                         UINT *nNumFormats)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+/* WGL_ARB_render_texture */
+GLAPI BOOL GLAPIENTRY
+wglBindTexImageARB (HPBUFFERARB hPbuffer, int iBuffer)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglReleaseTexImageARB (HPBUFFERARB hPbuffer, int iBuffer)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglSetPbufferAttribARB (HPBUFFERARB hPbuffer,
+                        const int *piAttribList)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+/* WGL_ARB_pbuffer */
+GLAPI HPBUFFERARB GLAPIENTRY
+wglCreatePbufferARB (HDC hDC,
+                     int iPixelFormat,
+                     int iWidth,
+                     int iHeight,
+                     const int *piAttribList)
+{
+   SetLastError(0);
+   return NULL;
+}
+
+GLAPI HDC GLAPIENTRY
+wglGetPbufferDCARB (HPBUFFERARB hPbuffer)
+{
+   SetLastError(0);
+   return NULL;
+}
+
+GLAPI int GLAPIENTRY
+wglReleasePbufferDCARB (HPBUFFERARB hPbuffer, HDC hDC)
+{
+   SetLastError(0);
+   return -1;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglDestroyPbufferARB (HPBUFFERARB hPbuffer)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglQueryPbufferARB (HPBUFFERARB hPbuffer,
+                    int iAttribute,
+                    int *piValue)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI const char * GLAPIENTRY
+wglGetExtensionsStringEXT (void)
+{
+   return "WGL_3DFX_gamma_control "
+          "WGL_EXT_swap_control "
+          "WGL_EXT_extensions_string WGL_ARB_extensions_string"
+         /*WGL_ARB_pixel_format WGL_ARB_render_texture WGL_ARB_pbuffer*/;
+}
+
+GLAPI const char * GLAPIENTRY
+wglGetExtensionsStringARB (HDC hdc)
+{
+   return wglGetExtensionsStringEXT();
+}
+
+static struct {
+   const char *name;
+   PROC func;
+} wgl_ext[] = {
+       {"wglGetExtensionsStringARB",    (PROC)wglGetExtensionsStringARB},
+       {"wglGetExtensionsStringEXT",    (PROC)wglGetExtensionsStringEXT},
+       {"wglSwapIntervalEXT",           (PROC)wglSwapIntervalEXT},
+       {"wglGetSwapIntervalEXT",        (PROC)wglGetSwapIntervalEXT},
+       {"wglGetDeviceGammaRamp3DFX",    (PROC)wglGetDeviceGammaRamp3DFX},
+       {"wglSetDeviceGammaRamp3DFX",    (PROC)wglSetDeviceGammaRamp3DFX},
+       /* WGL_ARB_pixel_format */
+       {"wglGetPixelFormatAttribivARB", (PROC)wglGetPixelFormatAttribivARB},
+       {"wglGetPixelFormatAttribfvARB", (PROC)wglGetPixelFormatAttribfvARB},
+       {"wglChoosePixelFormatARB",      (PROC)wglChoosePixelFormatARB},
+       /* WGL_ARB_render_texture */
+       {"wglBindTexImageARB",           (PROC)wglBindTexImageARB},
+       {"wglReleaseTexImageARB",        (PROC)wglReleaseTexImageARB},
+       {"wglSetPbufferAttribARB",       (PROC)wglSetPbufferAttribARB},
+       /* WGL_ARB_pbuffer */
+       {"wglCreatePbufferARB",          (PROC)wglCreatePbufferARB},
+       {"wglGetPbufferDCARB",           (PROC)wglGetPbufferDCARB},
+       {"wglReleasePbufferDCARB",       (PROC)wglReleasePbufferDCARB},
+       {"wglDestroyPbufferARB",         (PROC)wglDestroyPbufferARB},
+       {"wglQueryPbufferARB",           (PROC)wglQueryPbufferARB},
+       {NULL, NULL}
+};
+
+GLAPI PROC GLAPIENTRY
+wglGetProcAddress (LPCSTR lpszProc)
+{
+   int i;
+   PROC p = (PROC)_glapi_get_proc_address((const char *)lpszProc);
+
+   /* we can't BlendColor. work around buggy applications */
+   if (p && strcmp(lpszProc, "glBlendColor")
+         && strcmp(lpszProc, "glBlendColorEXT"))
+      return p;
+
+   for (i = 0; wgl_ext[i].name; i++) {
+      if (!strcmp(lpszProc, wgl_ext[i].name)) {
+         return wgl_ext[i].func;
+      }
+   }
+
+   SetLastError(0);
+   return NULL;
+}
+
+GLAPI PROC GLAPIENTRY
+wglGetDefaultProcAddress (LPCSTR lpszProc)
+{
+   SetLastError(0);
+   return NULL;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglMakeCurrent (HDC hdc, HGLRC hglrc)
+{
+   if ((hdc == NULL) && (hglrc == NULL))
+      return TRUE;
+
+   if (!ctx || hglrc != (HGLRC)1 || WindowFromDC(hdc) != hWND) {
+      SetLastError(0);
+      return FALSE;
+   }
+
+   hDC = hdc;
+
+   fxMesaMakeCurrent(ctx);
+
+   return TRUE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglShareLists (HGLRC hglrc1, HGLRC hglrc2)
+{
+   if (!ctx || hglrc1 != (HGLRC)1 || hglrc1 != hglrc2) {
+      SetLastError(0);
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static BOOL
+wglUseFontBitmaps_FX (HDC fontDevice, DWORD firstChar, DWORD numChars,
+                      DWORD listBase)
+{
+   TEXTMETRIC metric;
+   BITMAPINFO *dibInfo;
+   HDC bitDevice;
+   COLORREF tempColor;
+   int i;
+
+   GetTextMetrics(fontDevice, &metric);
+
+   dibInfo = (BITMAPINFO *)calloc(sizeof(BITMAPINFO) + sizeof(RGBQUAD), 1);
+   dibInfo->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
+   dibInfo->bmiHeader.biPlanes = 1;
+   dibInfo->bmiHeader.biBitCount = 1;
+   dibInfo->bmiHeader.biCompression = BI_RGB;
+
+   bitDevice = CreateCompatibleDC(fontDevice);
+
+   /* Swap fore and back colors so the bitmap has the right polarity */
+   tempColor = GetBkColor(bitDevice);
+   SetBkColor(bitDevice, GetTextColor(bitDevice));
+   SetTextColor(bitDevice, tempColor);
+
+   /* Place chars based on base line */
+   SetTextAlign(bitDevice, TA_BASELINE);
+
+   for (i = 0; i < (int)numChars; i++) {
+      SIZE size;
+      char curChar;
+      int charWidth, charHeight, bmapWidth, bmapHeight, numBytes, res;
+      HBITMAP bitObject;
+      HGDIOBJ origBmap;
+      unsigned char *bmap;
+
+      curChar = (char)(i + firstChar); /* [koolsmoky] explicit cast */
+
+      /* Find how high/wide this character is */
+      GetTextExtentPoint32(bitDevice, &curChar, 1, &size);
+
+      /* Create the output bitmap */
+      charWidth = size.cx;
+      charHeight = size.cy;
+      bmapWidth = ((charWidth + 31) / 32) * 32; /* Round up to the next multiple of 32 bits */
+      bmapHeight = charHeight;
+      bitObject = CreateCompatibleBitmap(bitDevice, bmapWidth, bmapHeight);
+      /*VERIFY(bitObject);*/
+
+      /* Assign the output bitmap to the device */
+      origBmap = SelectObject(bitDevice, bitObject);
+
+      PatBlt(bitDevice, 0, 0, bmapWidth, bmapHeight, BLACKNESS);
+
+      /* Use our source font on the device */
+      SelectObject(bitDevice, GetCurrentObject(fontDevice, OBJ_FONT));
+
+      /* Draw the character */
+      TextOut(bitDevice, 0, metric.tmAscent, &curChar, 1);
+
+      /* Unselect our bmap object */
+      SelectObject(bitDevice, origBmap);
+
+      /* Convert the display dependant representation to a 1 bit deep DIB */
+      numBytes = (bmapWidth * bmapHeight) / 8;
+      bmap = MALLOC(numBytes);
+      dibInfo->bmiHeader.biWidth = bmapWidth;
+      dibInfo->bmiHeader.biHeight = bmapHeight;
+      res = GetDIBits(bitDevice, bitObject, 0, bmapHeight, bmap,
+                      dibInfo, DIB_RGB_COLORS);
+
+      /* Create the GL object */
+      glNewList(i + listBase, GL_COMPILE);
+      glBitmap(bmapWidth, bmapHeight, 0.0, metric.tmDescent,
+               charWidth, 0.0, bmap);
+      glEndList();
+      /* CheckGL(); */
+
+      /* Destroy the bmap object */
+      DeleteObject(bitObject);
+
+      /* Deallocate the bitmap data */
+      FREE(bmap);
+   }
+
+   /* Destroy the DC */
+   DeleteDC(bitDevice);
+
+   FREE(dibInfo);
+
+   return TRUE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglUseFontBitmapsW (HDC hdc, DWORD first, DWORD count, DWORD listBase)
+{
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglUseFontOutlinesA (HDC hdc, DWORD first, DWORD count,
+                     DWORD listBase, FLOAT deviation,
+                     FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglUseFontOutlinesW (HDC hdc, DWORD first, DWORD count,
+                     DWORD listBase, FLOAT deviation,
+                     FLOAT extrusion, int format, LPGLYPHMETRICSFLOAT lpgmf)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+
+GLAPI BOOL GLAPIENTRY
+wglSwapLayerBuffers (HDC hdc, UINT fuPlanes)
+{
+   if (ctx && WindowFromDC(hdc) == hWND) {
+      fxMesaSwapBuffers();
+
+      return TRUE;
+   }
+
+   SetLastError(0);
+   return FALSE;
+}
+
+static int
+pfd_tablen (void)
+{
+   /* we should take an envvar for `fxMesaSelectCurrentBoard' */
+   return (fxMesaSelectCurrentBoard(0) < GR_SSTTYPE_Voodoo4)
+         ? 2                      /* only 16bit entries */
+         : sizeof(pix) / sizeof(pix[0]);  /* full table */
+}
+
+GLAPI int GLAPIENTRY
+wglChoosePixelFormat (HDC hdc, const PIXELFORMATDESCRIPTOR *ppfd)
+{
+   int i, best = -1, qt_valid_pix;
+   PIXELFORMATDESCRIPTOR pfd = *ppfd;
+
+   qt_valid_pix = pfd_tablen();
+
+#if 1 || QUAKE2 || GORE
+   /* QUAKE2: 24+32 */
+   /* GORE  : 24+16 */
+   if ((pfd.cColorBits == 24) || (pfd.cColorBits == 32)) {
+      /* the first 2 entries are 16bit */
+      pfd.cColorBits = (qt_valid_pix > 2) ? 32 : 16;
+   }
+   if (pfd.cColorBits == 32) {
+      pfd.cDepthBits = 24;
+   } else if (pfd.cColorBits == 16) {
+      pfd.cDepthBits = 16;
+   }
+#endif
+
+   if (pfd.nSize != sizeof(PIXELFORMATDESCRIPTOR) || pfd.nVersion != 1) {
+      SetLastError(0);
+      return 0;
+   }
+
+   for (i = 0; i < qt_valid_pix; i++) {
+      if (pfd.cColorBits > 0 && pix[i].pfd.cColorBits != pfd.cColorBits)
+         continue;
+
+      if ((pfd.dwFlags & PFD_DRAW_TO_WINDOW)
+          && !(pix[i].pfd.dwFlags & PFD_DRAW_TO_WINDOW)) continue;
+      if ((pfd.dwFlags & PFD_DRAW_TO_BITMAP)
+          && !(pix[i].pfd.dwFlags & PFD_DRAW_TO_BITMAP)) continue;
+      if ((pfd.dwFlags & PFD_SUPPORT_GDI)
+          && !(pix[i].pfd.dwFlags & PFD_SUPPORT_GDI)) continue;
+      if ((pfd.dwFlags & PFD_SUPPORT_OPENGL)
+          && !(pix[i].pfd.dwFlags & PFD_SUPPORT_OPENGL)) continue;
+      if (!(pfd.dwFlags & PFD_DOUBLEBUFFER_DONTCARE)
+          && ((pfd.dwFlags & PFD_DOUBLEBUFFER) !=
+              (pix[i].pfd.dwFlags & PFD_DOUBLEBUFFER))) continue;
+#if 1 /* Doom3 fails here! */
+      if (!(pfd.dwFlags & PFD_STEREO_DONTCARE)
+          && ((pfd.dwFlags & PFD_STEREO) !=
+              (pix[i].pfd.dwFlags & PFD_STEREO))) continue;
+#endif
+
+      if (pfd.cDepthBits > 0 && pix[i].pfd.cDepthBits == 0)
+         continue;              /* need depth buffer */
+
+      if (pfd.cAlphaBits > 0 && pix[i].pfd.cAlphaBits == 0)
+         continue;              /* need alpha buffer */
+
+#if 0                           /* regression bug? */
+      if (pfd.cStencilBits > 0 && pix[i].pfd.cStencilBits == 0)
+         continue;              /* need stencil buffer */
+#endif
+
+      if (pfd.iPixelType == pix[i].pfd.iPixelType) {
+         best = i + 1;
+         break;
+      }
+   }
+
+   if (best == -1) {
+      FILE *err = fopen("MESA.LOG", "w");
+      if (err != NULL) {
+         fprintf(err, "wglChoosePixelFormat failed\n");
+         fprintf(err, "\tnSize           = %d\n", ppfd->nSize);
+         fprintf(err, "\tnVersion        = %d\n", ppfd->nVersion);
+         fprintf(err, "\tdwFlags         = %lu\n", ppfd->dwFlags);
+         fprintf(err, "\tiPixelType      = %d\n", ppfd->iPixelType);
+         fprintf(err, "\tcColorBits      = %d\n", ppfd->cColorBits);
+         fprintf(err, "\tcRedBits        = %d\n", ppfd->cRedBits);
+         fprintf(err, "\tcRedShift       = %d\n", ppfd->cRedShift);
+         fprintf(err, "\tcGreenBits      = %d\n", ppfd->cGreenBits);
+         fprintf(err, "\tcGreenShift     = %d\n", ppfd->cGreenShift);
+         fprintf(err, "\tcBlueBits       = %d\n", ppfd->cBlueBits);
+         fprintf(err, "\tcBlueShift      = %d\n", ppfd->cBlueShift);
+         fprintf(err, "\tcAlphaBits      = %d\n", ppfd->cAlphaBits);
+         fprintf(err, "\tcAlphaShift     = %d\n", ppfd->cAlphaShift);
+         fprintf(err, "\tcAccumBits      = %d\n", ppfd->cAccumBits);
+         fprintf(err, "\tcAccumRedBits   = %d\n", ppfd->cAccumRedBits);
+         fprintf(err, "\tcAccumGreenBits = %d\n", ppfd->cAccumGreenBits);
+         fprintf(err, "\tcAccumBlueBits  = %d\n", ppfd->cAccumBlueBits);
+         fprintf(err, "\tcAccumAlphaBits = %d\n", ppfd->cAccumAlphaBits);
+         fprintf(err, "\tcDepthBits      = %d\n", ppfd->cDepthBits);
+         fprintf(err, "\tcStencilBits    = %d\n", ppfd->cStencilBits);
+         fprintf(err, "\tcAuxBuffers     = %d\n", ppfd->cAuxBuffers);
+         fprintf(err, "\tiLayerType      = %d\n", ppfd->iLayerType);
+         fprintf(err, "\tbReserved       = %d\n", ppfd->bReserved);
+         fprintf(err, "\tdwLayerMask     = %lu\n", ppfd->dwLayerMask);
+         fprintf(err, "\tdwVisibleMask   = %lu\n", ppfd->dwVisibleMask);
+         fprintf(err, "\tdwDamageMask    = %lu\n", ppfd->dwDamageMask);
+         fclose(err);
+      }
+
+      SetLastError(0);
+      return 0;
+   }
+
+   return best;
+}
+
+GLAPI int GLAPIENTRY
+ChoosePixelFormat (HDC hdc, const PIXELFORMATDESCRIPTOR *ppfd)
+{
+
+   return wglChoosePixelFormat(hdc, ppfd);
+}
+
+GLAPI int GLAPIENTRY
+wglDescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes,
+                        LPPIXELFORMATDESCRIPTOR ppfd)
+{
+   int qt_valid_pix;
+
+   qt_valid_pix = pfd_tablen();
+
+   if (iPixelFormat < 1 || iPixelFormat > qt_valid_pix ||
+       ((nBytes != sizeof(PIXELFORMATDESCRIPTOR)) && (nBytes != 0))) {
+      SetLastError(0);
+      return qt_valid_pix;
+   }
+
+   if (nBytes != 0)
+      *ppfd = pix[iPixelFormat - 1].pfd;
+
+   return qt_valid_pix;
+}
+
+GLAPI int GLAPIENTRY
+DescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes,
+                     LPPIXELFORMATDESCRIPTOR ppfd)
+{
+   return wglDescribePixelFormat(hdc, iPixelFormat, nBytes, ppfd);
+}
+
+GLAPI int GLAPIENTRY
+wglGetPixelFormat (HDC hdc)
+{
+   if (curPFD == 0) {
+      SetLastError(0);
+      return 0;
+   }
+
+   return curPFD;
+}
+
+GLAPI int GLAPIENTRY
+GetPixelFormat (HDC hdc)
+{
+   return wglGetPixelFormat(hdc);
+}
+
+GLAPI BOOL GLAPIENTRY
+wglSetPixelFormat (HDC hdc, int iPixelFormat, const PIXELFORMATDESCRIPTOR *ppfd)
+{
+   int qt_valid_pix;
+
+   qt_valid_pix = pfd_tablen();
+
+   if (iPixelFormat < 1 || iPixelFormat > qt_valid_pix) {
+      if (ppfd == NULL) {
+         PIXELFORMATDESCRIPTOR my_pfd;
+         if (!wglDescribePixelFormat(hdc, iPixelFormat, sizeof(PIXELFORMATDESCRIPTOR), &my_pfd)) {
+            SetLastError(0);
+            return FALSE;
+         }
+      } else if (ppfd->nSize != sizeof(PIXELFORMATDESCRIPTOR)) {
+         SetLastError(0);
+         return FALSE;
+      }
+   }
+   curPFD = iPixelFormat;
+
+   return TRUE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglSwapBuffers (HDC hdc)
+{
+   if (!ctx) {
+      SetLastError(0);
+      return FALSE;
+   }
+
+   fxMesaSwapBuffers();
+
+   return TRUE;
+}
+
+GLAPI BOOL GLAPIENTRY
+SetPixelFormat (HDC hdc, int iPixelFormat, const PIXELFORMATDESCRIPTOR *ppfd)
+{
+   return wglSetPixelFormat(hdc, iPixelFormat, ppfd);
+}
+
+GLAPI BOOL GLAPIENTRY
+SwapBuffers(HDC hdc)
+{
+   return wglSwapBuffers(hdc);
+}
+
+static FIXED
+FixedFromDouble (double d)
+{
+   struct {
+      FIXED f;
+      long l;
+   } pun;
+   pun.l = (long)(d * 65536L);
+   return pun.f;
+}
+
+/*
+** This was yanked from windows/gdi/wgl.c
+*/
+GLAPI BOOL GLAPIENTRY
+wglUseFontBitmapsA (HDC hdc, DWORD first, DWORD count, DWORD listBase)
+{
+   int i;
+   GLuint font_list;
+   DWORD size;
+   GLYPHMETRICS gm;
+   HANDLE hBits;
+   LPSTR lpBits;
+   MAT2 mat;
+   int success = TRUE;
+
+   font_list = listBase;
+
+   mat.eM11 = FixedFromDouble(1);
+   mat.eM12 = FixedFromDouble(0);
+   mat.eM21 = FixedFromDouble(0);
+   mat.eM22 = FixedFromDouble(-1);
+
+   memset(&gm, 0, sizeof(gm));
+
+   /*
+    ** If we can't get the glyph outline, it may be because this is a fixed
+    ** font.  Try processing it that way.
+    */
+   if (GetGlyphOutline(hdc, first, GGO_BITMAP, &gm, 0, NULL, &mat) == GDI_ERROR) {
+      return wglUseFontBitmaps_FX(hdc, first, count, listBase);
+   }
+
+   /*
+    ** Otherwise process all desired characters.
+    */
+   for (i = 0; i < count; i++) {
+      DWORD err;
+
+      glNewList(font_list + i, GL_COMPILE);
+
+      /* allocate space for the bitmap/outline */
+      size = GetGlyphOutline(hdc, first + i, GGO_BITMAP, &gm, 0, NULL, &mat);
+      if (size == GDI_ERROR) {
+         glEndList();
+         err = GetLastError();
+         success = FALSE;
+         continue;
+      }
+
+      hBits = GlobalAlloc(GHND, size + 1);
+      lpBits = GlobalLock(hBits);
+
+      err = GetGlyphOutline(hdc,        /* handle to device context */
+                            first + i,  /* character to query */
+                            GGO_BITMAP, /* format of data to return */
+                            &gm,        /* pointer to structure for metrics */
+                            size,       /* size of buffer for data */
+                            lpBits,     /* pointer to buffer for data */
+                            &mat        /* pointer to transformation */
+                                        /* matrix structure */
+          );
+
+      if (err == GDI_ERROR) {
+         GlobalUnlock(hBits);
+         GlobalFree(hBits);
+
+         glEndList();
+         err = GetLastError();
+         success = FALSE;
+         continue;
+      }
+
+      glBitmap(gm.gmBlackBoxX, gm.gmBlackBoxY,
+               -gm.gmptGlyphOrigin.x,
+               gm.gmptGlyphOrigin.y,
+               gm.gmCellIncX, gm.gmCellIncY,
+               (const GLubyte *)lpBits);
+
+      GlobalUnlock(hBits);
+      GlobalFree(hBits);
+
+      glEndList();
+   }
+
+   return success;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglDescribeLayerPlane (HDC hdc, int iPixelFormat, int iLayerPlane,
+                       UINT nBytes, LPLAYERPLANEDESCRIPTOR ppfd)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI int GLAPIENTRY
+wglGetLayerPaletteEntries (HDC hdc, int iLayerPlane, int iStart,
+                           int cEntries, COLORREF *pcr)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI BOOL GLAPIENTRY
+wglRealizeLayerPalette (HDC hdc, int iLayerPlane, BOOL bRealize)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+GLAPI int GLAPIENTRY
+wglSetLayerPaletteEntries (HDC hdc, int iLayerPlane, int iStart,
+                           int cEntries, CONST COLORREF *pcr)
+{
+   SetLastError(0);
+   return FALSE;
+}
+
+
+/***************************************************************************
+ * [dBorca] simplistic ICD implementation, based on ICD code by Gregor Anich
+ */
+
+typedef struct _icdTable {
+   DWORD size;
+   PROC table[336];
+} ICDTABLE, *PICDTABLE;
+
+#ifdef USE_MGL_NAMESPACE
+#define GL_FUNC(func) mgl##func
+#else
+#define GL_FUNC(func) gl##func
+#endif
+
+static ICDTABLE icdTable = { 336, {
+#define ICD_ENTRY(func) (PROC)GL_FUNC(func),
+#include "../icd/icdlist.h"
+#undef ICD_ENTRY
+} };
+
+
+GLAPI BOOL GLAPIENTRY
+DrvCopyContext (HGLRC hglrcSrc, HGLRC hglrcDst, UINT mask)
+{
+   return wglCopyContext(hglrcSrc, hglrcDst, mask);
+}
+
+
+GLAPI HGLRC GLAPIENTRY
+DrvCreateContext (HDC hdc)
+{
+   return wglCreateContext(hdc);
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvDeleteContext (HGLRC hglrc)
+{
+   return wglDeleteContext(hglrc);
+}
+
+
+GLAPI HGLRC GLAPIENTRY
+DrvCreateLayerContext (HDC hdc, int iLayerPlane)
+{
+   return wglCreateContext(hdc);
+}
+
+
+GLAPI PICDTABLE GLAPIENTRY
+DrvSetContext (HDC hdc, HGLRC hglrc, void *callback)
+{
+   return wglMakeCurrent(hdc, hglrc) ? &icdTable : NULL;
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvReleaseContext (HGLRC hglrc)
+{
+   return TRUE;
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvShareLists (HGLRC hglrc1, HGLRC hglrc2)
+{
+   return wglShareLists(hglrc1, hglrc2);
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvDescribeLayerPlane (HDC hdc, int iPixelFormat,
+                       int iLayerPlane, UINT nBytes,
+                       LPLAYERPLANEDESCRIPTOR plpd)
+{
+   return wglDescribeLayerPlane(hdc, iPixelFormat, iLayerPlane, nBytes, plpd);
+}
+
+
+GLAPI int GLAPIENTRY
+DrvSetLayerPaletteEntries (HDC hdc, int iLayerPlane,
+                           int iStart, int cEntries, CONST COLORREF *pcr)
+{
+   return wglSetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr);
+}
+
+
+GLAPI int GLAPIENTRY
+DrvGetLayerPaletteEntries (HDC hdc, int iLayerPlane,
+                           int iStart, int cEntries, COLORREF *pcr)
+{
+   return wglGetLayerPaletteEntries(hdc, iLayerPlane, iStart, cEntries, pcr);
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvRealizeLayerPalette (HDC hdc, int iLayerPlane, BOOL bRealize)
+{
+   return wglRealizeLayerPalette(hdc, iLayerPlane, bRealize);
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvSwapLayerBuffers (HDC hdc, UINT fuPlanes)
+{
+   return wglSwapLayerBuffers(hdc, fuPlanes);
+}
+
+GLAPI int GLAPIENTRY
+DrvDescribePixelFormat (HDC hdc, int iPixelFormat, UINT nBytes,
+                        LPPIXELFORMATDESCRIPTOR ppfd)
+{
+   return wglDescribePixelFormat(hdc, iPixelFormat, nBytes, ppfd);
+}
+
+
+GLAPI PROC GLAPIENTRY
+DrvGetProcAddress (LPCSTR lpszProc)
+{
+   return wglGetProcAddress(lpszProc);
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvSetPixelFormat (HDC hdc, int iPixelFormat)
+{
+   return wglSetPixelFormat(hdc, iPixelFormat, NULL);
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvSwapBuffers (HDC hdc)
+{
+   return wglSwapBuffers(hdc);
+}
+
+
+GLAPI BOOL GLAPIENTRY
+DrvValidateVersion (DWORD version)
+{
+   (void)version;
+   return TRUE;
+}
+
+
+#if (_MSC_VER >= 1200)
+#pragma warning( pop )
+#endif
+
+#endif /* FX */
diff --git a/src/mesa/drivers/windows/gdi/InitCritSections.cpp b/src/mesa/drivers/windows/gdi/InitCritSections.cpp
new file mode 100644
index 0000000000..7145bffa51
--- /dev/null
+++ b/src/mesa/drivers/windows/gdi/InitCritSections.cpp
@@ -0,0 +1,32 @@
+#include "glapi.h"
+#include "glThread.h"
+
+#ifdef WIN32_THREADS
+extern "C" _glthread_Mutex OneTimeLock;
+extern "C" _glthread_Mutex GenTexturesLock;
+
+extern "C" void FreeAllTSD(void);
+
+class _CriticalSectionInit
+{
+public:
+	static _CriticalSectionInit	m_inst;
+
+	_CriticalSectionInit()
+	{
+		_glthread_INIT_MUTEX(OneTimeLock);
+		_glthread_INIT_MUTEX(GenTexturesLock);
+	}
+
+	~_CriticalSectionInit()
+	{
+		_glthread_DESTROY_MUTEX(OneTimeLock);
+		_glthread_DESTROY_MUTEX(GenTexturesLock);
+		FreeAllTSD();
+	}
+};
+
+_CriticalSectionInit _CriticalSectionInit::m_inst;
+
+
+#endif
diff --git a/src/mesa/drivers/windows/gdi/colors.h b/src/mesa/drivers/windows/gdi/colors.h
new file mode 100644
index 0000000000..03e512c1fa
--- /dev/null
+++ b/src/mesa/drivers/windows/gdi/colors.h
@@ -0,0 +1,29 @@
+/* Values for wmesa->pixelformat: */
+
+#define PF_8A8B8G8R	3	/* 32-bit TrueColor:  8-A, 8-B, 8-G, 8-R */
+#define PF_8R8G8B	4	/* 32-bit TrueColor:  8-R, 8-G, 8-B */
+#define PF_5R6G5B	5	/* 16-bit TrueColor:  5-R, 6-G, 5-B bits */
+#define PF_DITHER8	6	/* Dithered RGB using a lookup table */
+#define PF_LOOKUP	7	/* Undithered RGB using a lookup table */
+#define PF_GRAYSCALE	10	/* Grayscale or StaticGray */
+#define PF_BADFORMAT	11
+#define PF_INDEX8	12
+
+
+#define BGR8(r,g,b) (unsigned)(((BYTE)((b & 0xc0) | ((g & 0xe0)>>2) | \
+                                      ((r & 0xe0)>>5))))
+
+/* Windows uses 5,5,5 for 16-bit */
+#define BGR16(r,g,b) (  (((unsigned short)b       ) >> 3) | \
+                        (((unsigned short)g & 0xf8) << 2) | \
+                        (((unsigned short)r & 0xf8) << 7) )
+
+#define BGR24(r,g,b) (unsigned long)((DWORD)(((BYTE)(b)| \
+                                    ((WORD)((BYTE)(g))<<8))| \
+                                    (((DWORD)(BYTE)(r))<<16)))
+
+#define BGR32(r,g,b) (unsigned long)((DWORD)(((BYTE)(b)| \
+                                    ((WORD)((BYTE)(g))<<8))| \
+                                    (((DWORD)(BYTE)(r))<<16)))
+
+
diff --git a/src/mesa/drivers/windows/gdi/mesa.def b/src/mesa/drivers/windows/gdi/mesa.def
new file mode 100644
index 0000000000..b537b3460c
--- /dev/null
+++ b/src/mesa/drivers/windows/gdi/mesa.def
@@ -0,0 +1,963 @@
+; DO NOT EDIT - This file generated automatically by mesadef.py script
+;DESCRIPTION 'Mesa (OpenGL work-alike) for Win32'
+VERSION 6.5
+;
+; Module definition file for Mesa (OPENGL32.DLL)
+;
+; Note: The OpenGL functions use the STDCALL
+; function calling convention.  Microsoft's
+; OPENGL32 uses this convention and so must the
+; Mesa OPENGL32 so that the Mesa DLL can be used
+; as a drop-in replacement.
+;
+; The linker exports STDCALL entry points with
+; 'decorated' names; e.g., _glBegin@0, where the
+; trailing number is the number of bytes of 
+; parameter data pushed onto the stack.  The
+; callee is responsible for popping this data
+; off the stack, usually via a RETF n instruction.
+;
+; However, the Microsoft OPENGL32.DLL does not export
+; the decorated names, even though the calling convention
+; is STDCALL.  So, this module definition file is
+; needed to force the Mesa OPENGL32.DLL to export the
+; symbols in the same manner as the Microsoft DLL.
+; Were it not for this problem, this file would not
+; be needed (for the gl* functions) since the entry
+; points are compiled with dllexport declspec.
+;
+; However, this file is still needed to export "internal"
+; Mesa symbols for the benefit of the OSMESA32.DLL.
+;
+EXPORTS
+	glNewList
+	glEndList
+	glCallList
+	glCallLists
+	glDeleteLists
+	glGenLists
+	glListBase
+	glBegin
+	glBitmap
+	glColor3b
+	glColor3bv
+	glColor3d
+	glColor3dv
+	glColor3f
+	glColor3fv
+	glColor3i
+	glColor3iv
+	glColor3s
+	glColor3sv
+	glColor3ub
+	glColor3ubv
+	glColor3ui
+	glColor3uiv
+	glColor3us
+	glColor3usv
+	glColor4b
+	glColor4bv
+	glColor4d
+	glColor4dv
+	glColor4f
+	glColor4fv
+	glColor4i
+	glColor4iv
+	glColor4s
+	glColor4sv
+	glColor4ub
+	glColor4ubv
+	glColor4ui
+	glColor4uiv
+	glColor4us
+	glColor4usv
+	glEdgeFlag
+	glEdgeFlagv
+	glEnd
+	glIndexd
+	glIndexdv
+	glIndexf
+	glIndexfv
+	glIndexi
+	glIndexiv
+	glIndexs
+	glIndexsv
+	glNormal3b
+	glNormal3bv
+	glNormal3d
+	glNormal3dv
+	glNormal3f
+	glNormal3fv
+	glNormal3i
+	glNormal3iv
+	glNormal3s
+	glNormal3sv
+	glRasterPos2d
+	glRasterPos2dv
+	glRasterPos2f
+	glRasterPos2fv
+	glRasterPos2i
+	glRasterPos2iv
+	glRasterPos2s
+	glRasterPos2sv
+	glRasterPos3d
+	glRasterPos3dv
+	glRasterPos3f
+	glRasterPos3fv
+	glRasterPos3i
+	glRasterPos3iv
+	glRasterPos3s
+	glRasterPos3sv
+	glRasterPos4d
+	glRasterPos4dv
+	glRasterPos4f
+	glRasterPos4fv
+	glRasterPos4i
+	glRasterPos4iv
+	glRasterPos4s
+	glRasterPos4sv
+	glRectd
+	glRectdv
+	glRectf
+	glRectfv
+	glRecti
+	glRectiv
+	glRects
+	glRectsv
+	glTexCoord1d
+	glTexCoord1dv
+	glTexCoord1f
+	glTexCoord1fv
+	glTexCoord1i
+	glTexCoord1iv
+	glTexCoord1s
+	glTexCoord1sv
+	glTexCoord2d
+	glTexCoord2dv
+	glTexCoord2f
+	glTexCoord2fv
+	glTexCoord2i
+	glTexCoord2iv
+	glTexCoord2s
+	glTexCoord2sv
+	glTexCoord3d
+	glTexCoord3dv
+	glTexCoord3f
+	glTexCoord3fv
+	glTexCoord3i
+	glTexCoord3iv
+	glTexCoord3s
+	glTexCoord3sv
+	glTexCoord4d
+	glTexCoord4dv
+	glTexCoord4f
+	glTexCoord4fv
+	glTexCoord4i
+	glTexCoord4iv
+	glTexCoord4s
+	glTexCoord4sv
+	glVertex2d
+	glVertex2dv
+	glVertex2f
+	glVertex2fv
+	glVertex2i
+	glVertex2iv
+	glVertex2s
+	glVertex2sv
+	glVertex3d
+	glVertex3dv
+	glVertex3f
+	glVertex3fv
+	glVertex3i
+	glVertex3iv
+	glVertex3s
+	glVertex3sv
+	glVertex4d
+	glVertex4dv
+	glVertex4f
+	glVertex4fv
+	glVertex4i
+	glVertex4iv
+	glVertex4s
+	glVertex4sv
+	glClipPlane
+	glColorMaterial
+	glCullFace
+	glFogf
+	glFogfv
+	glFogi
+	glFogiv
+	glFrontFace
+	glHint
+	glLightf
+	glLightfv
+	glLighti
+	glLightiv
+	glLightModelf
+	glLightModelfv
+	glLightModeli
+	glLightModeliv
+	glLineStipple
+	glLineWidth
+	glMaterialf
+	glMaterialfv
+	glMateriali
+	glMaterialiv
+	glPointSize
+	glPolygonMode
+	glPolygonStipple
+	glScissor
+	glShadeModel
+	glTexParameterf
+	glTexParameterfv
+	glTexParameteri
+	glTexParameteriv
+	glTexImage1D
+	glTexImage2D
+	glTexEnvf
+	glTexEnvfv
+	glTexEnvi
+	glTexEnviv
+	glTexGend
+	glTexGendv
+	glTexGenf
+	glTexGenfv
+	glTexGeni
+	glTexGeniv
+	glFeedbackBuffer
+	glSelectBuffer
+	glRenderMode
+	glInitNames
+	glLoadName
+	glPassThrough
+	glPopName
+	glPushName
+	glDrawBuffer
+	glClear
+	glClearAccum
+	glClearIndex
+	glClearColor
+	glClearStencil
+	glClearDepth
+	glStencilMask
+	glColorMask
+	glDepthMask
+	glIndexMask
+	glAccum
+	glDisable
+	glEnable
+	glFinish
+	glFlush
+	glPopAttrib
+	glPushAttrib
+	glMap1d
+	glMap1f
+	glMap2d
+	glMap2f
+	glMapGrid1d
+	glMapGrid1f
+	glMapGrid2d
+	glMapGrid2f
+	glEvalCoord1d
+	glEvalCoord1dv
+	glEvalCoord1f
+	glEvalCoord1fv
+	glEvalCoord2d
+	glEvalCoord2dv
+	glEvalCoord2f
+	glEvalCoord2fv
+	glEvalMesh1
+	glEvalPoint1
+	glEvalMesh2
+	glEvalPoint2
+	glAlphaFunc
+	glBlendFunc
+	glLogicOp
+	glStencilFunc
+	glStencilOp
+	glDepthFunc
+	glPixelZoom
+	glPixelTransferf
+	glPixelTransferi
+	glPixelStoref
+	glPixelStorei
+	glPixelMapfv
+	glPixelMapuiv
+	glPixelMapusv
+	glReadBuffer
+	glCopyPixels
+	glReadPixels
+	glDrawPixels
+	glGetBooleanv
+	glGetClipPlane
+	glGetDoublev
+	glGetError
+	glGetFloatv
+	glGetIntegerv
+	glGetLightfv
+	glGetLightiv
+	glGetMapdv
+	glGetMapfv
+	glGetMapiv
+	glGetMaterialfv
+	glGetMaterialiv
+	glGetPixelMapfv
+	glGetPixelMapuiv
+	glGetPixelMapusv
+	glGetPolygonStipple
+	glGetString
+	glGetTexEnvfv
+	glGetTexEnviv
+	glGetTexGendv
+	glGetTexGenfv
+	glGetTexGeniv
+	glGetTexImage
+	glGetTexParameterfv
+	glGetTexParameteriv
+	glGetTexLevelParameterfv
+	glGetTexLevelParameteriv
+	glIsEnabled
+	glIsList
+	glDepthRange
+	glFrustum
+	glLoadIdentity
+	glLoadMatrixf
+	glLoadMatrixd
+	glMatrixMode
+	glMultMatrixf
+	glMultMatrixd
+	glOrtho
+	glPopMatrix
+	glPushMatrix
+	glRotated
+	glRotatef
+	glScaled
+	glScalef
+	glTranslated
+	glTranslatef
+	glViewport
+	glArrayElement
+	glColorPointer
+	glDisableClientState
+	glDrawArrays
+	glDrawElements
+	glEdgeFlagPointer
+	glEnableClientState
+	glGetPointerv
+	glIndexPointer
+	glInterleavedArrays
+	glNormalPointer
+	glTexCoordPointer
+	glVertexPointer
+	glPolygonOffset
+	glCopyTexImage1D
+	glCopyTexImage2D
+	glCopyTexSubImage1D
+	glCopyTexSubImage2D
+	glTexSubImage1D
+	glTexSubImage2D
+	glAreTexturesResident
+	glBindTexture
+	glDeleteTextures
+	glGenTextures
+	glIsTexture
+	glPrioritizeTextures
+	glIndexub
+	glIndexubv
+	glPopClientAttrib
+	glPushClientAttrib
+	glBlendColor
+	glBlendEquation
+	glDrawRangeElements
+	glColorTable
+	glColorTableParameterfv
+	glColorTableParameteriv
+	glCopyColorTable
+	glGetColorTable
+	glGetColorTableParameterfv
+	glGetColorTableParameteriv
+	glColorSubTable
+	glCopyColorSubTable
+	glConvolutionFilter1D
+	glConvolutionFilter2D
+	glConvolutionParameterf
+	glConvolutionParameterfv
+	glConvolutionParameteri
+	glConvolutionParameteriv
+	glCopyConvolutionFilter1D
+	glCopyConvolutionFilter2D
+	glGetConvolutionFilter
+	glGetConvolutionParameterfv
+	glGetConvolutionParameteriv
+	glGetSeparableFilter
+	glSeparableFilter2D
+	glGetHistogram
+	glGetHistogramParameterfv
+	glGetHistogramParameteriv
+	glGetMinmax
+	glGetMinmaxParameterfv
+	glGetMinmaxParameteriv
+	glHistogram
+	glMinmax
+	glResetHistogram
+	glResetMinmax
+	glTexImage3D
+	glTexSubImage3D
+	glCopyTexSubImage3D
+	glActiveTextureARB
+	glClientActiveTextureARB
+	glMultiTexCoord1dARB
+	glMultiTexCoord1dvARB
+	glMultiTexCoord1fARB
+	glMultiTexCoord1fvARB
+	glMultiTexCoord1iARB
+	glMultiTexCoord1ivARB
+	glMultiTexCoord1sARB
+	glMultiTexCoord1svARB
+	glMultiTexCoord2dARB
+	glMultiTexCoord2dvARB
+	glMultiTexCoord2fARB
+	glMultiTexCoord2fvARB
+	glMultiTexCoord2iARB
+	glMultiTexCoord2ivARB
+	glMultiTexCoord2sARB
+	glMultiTexCoord2svARB
+	glMultiTexCoord3dARB
+	glMultiTexCoord3dvARB
+	glMultiTexCoord3fARB
+	glMultiTexCoord3fvARB
+	glMultiTexCoord3iARB
+	glMultiTexCoord3ivARB
+	glMultiTexCoord3sARB
+	glMultiTexCoord3svARB
+	glMultiTexCoord4dARB
+	glMultiTexCoord4dvARB
+	glMultiTexCoord4fARB
+	glMultiTexCoord4fvARB
+	glMultiTexCoord4iARB
+	glMultiTexCoord4ivARB
+	glMultiTexCoord4sARB
+	glMultiTexCoord4svARB
+	glLoadTransposeMatrixfARB
+	glLoadTransposeMatrixdARB
+	glMultTransposeMatrixfARB
+	glMultTransposeMatrixdARB
+	glSampleCoverageARB
+	glCompressedTexImage3DARB
+	glCompressedTexImage2DARB
+	glCompressedTexImage1DARB
+	glCompressedTexSubImage3DARB
+	glCompressedTexSubImage2DARB
+	glCompressedTexSubImage1DARB
+	glGetCompressedTexImageARB
+	glActiveTexture
+	glClientActiveTexture
+	glMultiTexCoord1d
+	glMultiTexCoord1dv
+	glMultiTexCoord1f
+	glMultiTexCoord1fv
+	glMultiTexCoord1i
+	glMultiTexCoord1iv
+	glMultiTexCoord1s
+	glMultiTexCoord1sv
+	glMultiTexCoord2d
+	glMultiTexCoord2dv
+	glMultiTexCoord2f
+	glMultiTexCoord2fv
+	glMultiTexCoord2i
+	glMultiTexCoord2iv
+	glMultiTexCoord2s
+	glMultiTexCoord2sv
+	glMultiTexCoord3d
+	glMultiTexCoord3dv
+	glMultiTexCoord3f
+	glMultiTexCoord3fv
+	glMultiTexCoord3i
+	glMultiTexCoord3iv
+	glMultiTexCoord3s
+	glMultiTexCoord3sv
+	glMultiTexCoord4d
+	glMultiTexCoord4dv
+	glMultiTexCoord4f
+	glMultiTexCoord4fv
+	glMultiTexCoord4i
+	glMultiTexCoord4iv
+	glMultiTexCoord4s
+	glMultiTexCoord4sv
+	glLoadTransposeMatrixf
+	glLoadTransposeMatrixd
+	glMultTransposeMatrixf
+	glMultTransposeMatrixd
+	glSampleCoverage
+	glCompressedTexImage3D
+	glCompressedTexImage2D
+	glCompressedTexImage1D
+	glCompressedTexSubImage3D
+	glCompressedTexSubImage2D
+	glCompressedTexSubImage1D
+	glGetCompressedTexImage
+	glBlendColorEXT
+	glPolygonOffsetEXT
+	glTexImage3DEXT
+	glTexSubImage3DEXT
+	glTexSubImage1DEXT
+	glTexSubImage2DEXT
+	glCopyTexImage1DEXT
+	glCopyTexImage2DEXT
+	glCopyTexSubImage1DEXT
+	glCopyTexSubImage2DEXT
+	glCopyTexSubImage3DEXT
+	glAreTexturesResidentEXT
+	glBindTextureEXT
+	glDeleteTexturesEXT
+	glGenTexturesEXT
+	glIsTextureEXT
+	glPrioritizeTexturesEXT
+	glArrayElementEXT
+	glColorPointerEXT
+	glDrawArraysEXT
+	glEdgeFlagPointerEXT
+	glGetPointervEXT
+	glIndexPointerEXT
+	glNormalPointerEXT
+	glTexCoordPointerEXT
+	glVertexPointerEXT
+	glBlendEquationEXT
+	glPointParameterfEXT
+	glPointParameterfvEXT
+	glPointParameterfARB
+	glPointParameterfvARB
+	glColorTableEXT
+	glGetColorTableEXT
+	glGetColorTableParameterivEXT
+	glGetColorTableParameterfvEXT
+	glLockArraysEXT
+	glUnlockArraysEXT
+	glDrawRangeElementsEXT
+	glSecondaryColor3bEXT
+	glSecondaryColor3bvEXT
+	glSecondaryColor3dEXT
+	glSecondaryColor3dvEXT
+	glSecondaryColor3fEXT
+	glSecondaryColor3fvEXT
+	glSecondaryColor3iEXT
+	glSecondaryColor3ivEXT
+	glSecondaryColor3sEXT
+	glSecondaryColor3svEXT
+	glSecondaryColor3ubEXT
+	glSecondaryColor3ubvEXT
+	glSecondaryColor3uiEXT
+	glSecondaryColor3uivEXT
+	glSecondaryColor3usEXT
+	glSecondaryColor3usvEXT
+	glSecondaryColorPointerEXT
+	glMultiDrawArraysEXT
+	glMultiDrawElementsEXT
+	glFogCoordfEXT
+	glFogCoordfvEXT
+	glFogCoorddEXT
+	glFogCoorddvEXT
+	glFogCoordPointerEXT
+	glBlendFuncSeparateEXT
+	glFlushVertexArrayRangeNV
+	glVertexArrayRangeNV
+	glCombinerParameterfvNV
+	glCombinerParameterfNV
+	glCombinerParameterivNV
+	glCombinerParameteriNV
+	glCombinerInputNV
+	glCombinerOutputNV
+	glFinalCombinerInputNV
+	glGetCombinerInputParameterfvNV
+	glGetCombinerInputParameterivNV
+	glGetCombinerOutputParameterfvNV
+	glGetCombinerOutputParameterivNV
+	glGetFinalCombinerInputParameterfvNV
+	glGetFinalCombinerInputParameterivNV
+	glResizeBuffersMESA
+	glWindowPos2dMESA
+	glWindowPos2dvMESA
+	glWindowPos2fMESA
+	glWindowPos2fvMESA
+	glWindowPos2iMESA
+	glWindowPos2ivMESA
+	glWindowPos2sMESA
+	glWindowPos2svMESA
+	glWindowPos3dMESA
+	glWindowPos3dvMESA
+	glWindowPos3fMESA
+	glWindowPos3fvMESA
+	glWindowPos3iMESA
+	glWindowPos3ivMESA
+	glWindowPos3sMESA
+	glWindowPos3svMESA
+	glWindowPos4dMESA
+	glWindowPos4dvMESA
+	glWindowPos4fMESA
+	glWindowPos4fvMESA
+	glWindowPos4iMESA
+	glWindowPos4ivMESA
+	glWindowPos4sMESA
+	glWindowPos4svMESA
+	glWindowPos2dARB
+	glWindowPos2fARB
+	glWindowPos2iARB
+	glWindowPos2sARB
+	glWindowPos2dvARB
+	glWindowPos2fvARB
+	glWindowPos2ivARB
+	glWindowPos2svARB
+	glWindowPos3dARB
+	glWindowPos3fARB
+	glWindowPos3iARB
+	glWindowPos3sARB
+	glWindowPos3dvARB
+	glWindowPos3fvARB
+	glWindowPos3ivARB
+	glWindowPos3svARB
+	glAreProgramsResidentNV
+	glBindProgramNV
+	glDeleteProgramsNV
+	glExecuteProgramNV
+	glGenProgramsNV
+	glGetProgramParameterdvNV
+	glGetProgramParameterfvNV
+	glGetProgramivNV
+	glGetProgramStringNV
+	glGetTrackMatrixivNV
+	glGetVertexAttribdvNV
+	glGetVertexAttribfvNV
+	glGetVertexAttribivNV
+	glGetVertexAttribPointervNV
+	glIsProgramNV
+	glLoadProgramNV
+	glProgramParameter4dNV
+	glProgramParameter4dvNV
+	glProgramParameter4fNV
+	glProgramParameter4fvNV
+	glProgramParameters4dvNV
+	glProgramParameters4fvNV
+	glRequestResidentProgramsNV
+	glTrackMatrixNV
+	glVertexAttribPointerNV
+	glVertexAttrib1dNV
+	glVertexAttrib1dvNV
+	glVertexAttrib1fNV
+	glVertexAttrib1fvNV
+	glVertexAttrib1sNV
+	glVertexAttrib1svNV
+	glVertexAttrib2dNV
+	glVertexAttrib2dvNV
+	glVertexAttrib2fNV
+	glVertexAttrib2fvNV
+	glVertexAttrib2sNV
+	glVertexAttrib2svNV
+	glVertexAttrib3dNV
+	glVertexAttrib3dvNV
+	glVertexAttrib3fNV
+	glVertexAttrib3fvNV
+	glVertexAttrib3sNV
+	glVertexAttrib3svNV
+	glVertexAttrib4dNV
+	glVertexAttrib4dvNV
+	glVertexAttrib4fNV
+	glVertexAttrib4fvNV
+	glVertexAttrib4sNV
+	glVertexAttrib4svNV
+	glVertexAttrib4ubNV
+	glVertexAttrib4ubvNV
+	glVertexAttribs1dvNV
+	glVertexAttribs1fvNV
+	glVertexAttribs1svNV
+	glVertexAttribs2dvNV
+	glVertexAttribs2fvNV
+	glVertexAttribs2svNV
+	glVertexAttribs3dvNV
+	glVertexAttribs3fvNV
+	glVertexAttribs3svNV
+	glVertexAttribs4dvNV
+	glVertexAttribs4fvNV
+	glVertexAttribs4svNV
+	glVertexAttribs4ubvNV
+	glPointParameteriNV
+	glPointParameterivNV
+	glFogCoordf
+	glFogCoordfv
+	glFogCoordd
+	glFogCoorddv
+	glFogCoordPointer
+	glMultiDrawArrays
+	glMultiDrawElements
+	glPointParameterf
+	glPointParameterfv
+	glPointParameteri
+	glPointParameteriv
+	glSecondaryColor3b
+	glSecondaryColor3bv
+	glSecondaryColor3d
+	glSecondaryColor3dv
+	glSecondaryColor3f
+	glSecondaryColor3fv
+	glSecondaryColor3i
+	glSecondaryColor3iv
+	glSecondaryColor3s
+	glSecondaryColor3sv
+	glSecondaryColor3ub
+	glSecondaryColor3ubv
+	glSecondaryColor3ui
+	glSecondaryColor3uiv
+	glSecondaryColor3us
+	glSecondaryColor3usv
+	glSecondaryColorPointer
+	glWindowPos2d
+	glWindowPos2dv
+	glWindowPos2f
+	glWindowPos2fv
+	glWindowPos2i
+	glWindowPos2iv
+	glWindowPos2s
+	glWindowPos2sv
+	glWindowPos3d
+	glWindowPos3dv
+	glWindowPos3f
+	glWindowPos3fv
+	glWindowPos3i
+	glWindowPos3iv
+	glWindowPos3s
+	glWindowPos3sv
+	glVertexAttrib1sARB
+	glVertexAttrib1fARB
+	glVertexAttrib1dARB
+	glVertexAttrib2sARB
+	glVertexAttrib2fARB
+	glVertexAttrib2dARB
+	glVertexAttrib3sARB
+	glVertexAttrib3fARB
+	glVertexAttrib3dARB
+	glVertexAttrib4sARB
+	glVertexAttrib4fARB
+	glVertexAttrib4dARB
+	glVertexAttrib4NubARB
+	glVertexAttrib1svARB
+	glVertexAttrib1fvARB
+	glVertexAttrib1dvARB
+	glVertexAttrib2svARB
+	glVertexAttrib2fvARB
+	glVertexAttrib2dvARB
+	glVertexAttrib3svARB
+	glVertexAttrib3fvARB
+	glVertexAttrib3dvARB
+	glVertexAttrib4bvARB
+	glVertexAttrib4svARB
+	glVertexAttrib4ivARB
+	glVertexAttrib4ubvARB
+	glVertexAttrib4usvARB
+	glVertexAttrib4uivARB
+	glVertexAttrib4fvARB
+	glVertexAttrib4dvARB
+	glVertexAttrib4NbvARB
+	glVertexAttrib4NsvARB
+	glVertexAttrib4NivARB
+	glVertexAttrib4NubvARB
+	glVertexAttrib4NusvARB
+	glVertexAttrib4NuivARB
+	glVertexAttribPointerARB
+	glEnableVertexAttribArrayARB
+	glDisableVertexAttribArrayARB
+	glProgramStringARB
+	glBindProgramARB
+	glDeleteProgramsARB
+	glGenProgramsARB
+	glIsProgramARB
+	glProgramEnvParameter4dARB
+	glProgramEnvParameter4dvARB
+	glProgramEnvParameter4fARB
+	glProgramEnvParameter4fvARB
+	glProgramLocalParameter4dARB
+	glProgramLocalParameter4dvARB
+	glProgramLocalParameter4fARB
+	glProgramLocalParameter4fvARB
+	glGetProgramEnvParameterdvARB
+	glGetProgramEnvParameterfvARB
+	glGetProgramLocalParameterdvARB
+	glGetProgramLocalParameterfvARB
+	glGetProgramivARB
+	glGetProgramStringARB
+	glGetVertexAttribdvARB
+	glGetVertexAttribfvARB
+	glGetVertexAttribivARB
+	glGetVertexAttribPointervARB
+	glProgramNamedParameter4fNV
+	glProgramNamedParameter4dNV
+	glProgramNamedParameter4fvNV
+	glProgramNamedParameter4dvNV
+	glGetProgramNamedParameterfvNV
+	glGetProgramNamedParameterdvNV
+	glBindBufferARB
+	glBufferDataARB
+	glBufferSubDataARB
+	glDeleteBuffersARB
+	glGenBuffersARB
+	glGetBufferParameterivARB
+	glGetBufferPointervARB
+	glGetBufferSubDataARB
+	glIsBufferARB
+	glMapBufferARB
+	glUnmapBufferARB
+	glGenQueriesARB
+	glDeleteQueriesARB
+	glIsQueryARB
+	glBeginQueryARB
+	glEndQueryARB
+	glGetQueryivARB
+	glGetQueryObjectivARB
+	glGetQueryObjectuivARB
+	glBindBuffer
+	glBufferData
+	glBufferSubData
+	glDeleteBuffers
+	glGenBuffers
+	glGetBufferParameteriv
+	glGetBufferPointerv
+	glGetBufferSubData
+	glIsBuffer
+	glMapBuffer
+	glUnmapBuffer
+	glGenQueries
+	glDeleteQueries
+	glIsQuery
+	glBeginQuery
+	glEndQuery
+	glGetQueryiv
+	glGetQueryObjectiv
+	glGetQueryObjectuiv
+;
+; WGL API
+	wglChoosePixelFormat
+	wglCopyContext
+	wglCreateContext
+	wglCreateLayerContext
+	wglDeleteContext
+	wglDescribeLayerPlane
+	wglDescribePixelFormat
+	wglGetCurrentContext
+	wglGetCurrentDC
+	wglGetLayerPaletteEntries
+	wglGetPixelFormat
+	wglGetProcAddress
+	wglMakeCurrent
+	wglRealizeLayerPalette
+	wglSetLayerPaletteEntries
+	wglSetPixelFormat
+	wglShareLists
+	wglSwapBuffers
+	wglSwapLayerBuffers
+	wglUseFontBitmapsA
+	wglUseFontBitmapsW
+	wglUseFontOutlinesA
+	wglUseFontOutlinesW
+	wglGetExtensionsStringARB
+;
+; Mesa internals - mostly for OSMESA
+	_vbo_CreateContext
+	_vbo_DestroyContext
+	_vbo_InvalidateState
+	_glapi_check_multithread
+	_glapi_get_context
+	_glapi_get_proc_address
+	_mesa_add_soft_renderbuffers
+	_mesa_add_renderbuffer
+	_mesa_check_conditional_render
+	_mesa_choose_tex_format
+	_mesa_create_framebuffer
+	_mesa_create_visual
+	_mesa_delete_array_object
+	_mesa_delete_program
+	_mesa_delete_texture_object
+	_mesa_destroy_framebuffer
+	_mesa_destroy_visual
+	_mesa_enable_1_3_extensions
+	_mesa_enable_1_4_extensions
+	_mesa_enable_1_5_extensions
+	_mesa_enable_2_0_extensions
+	_mesa_enable_2_1_extensions
+	_mesa_enable_sw_extensions
+	_mesa_error
+	_mesa_finish_render_texture
+	_mesa_framebuffer_renderbuffer
+	_mesa_free_context_data
+	_mesa_free_texture_image_data
+	_mesa_generate_mipmap
+	_mesa_get_compressed_teximage
+	_mesa_get_current_context
+	_mesa_get_teximage
+	_mesa_init_driver_functions
+	_mesa_init_glsl_driver_functions
+	_mesa_init_renderbuffer
+	_mesa_initialize_context
+	_mesa_make_current
+	_mesa_new_array_object
+	_mesa_new_framebuffer
+	_mesa_new_program
+	_mesa_new_renderbuffer
+	_mesa_new_soft_renderbuffer
+	_mesa_new_texture_image
+	_mesa_new_texture_object
+	_mesa_problem
+	_mesa_reference_renderbuffer
+	_mesa_remove_renderbuffer
+	_mesa_render_texture
+	_mesa_ResizeBuffersMESA
+	_mesa_resize_framebuffer
+	_mesa_store_compressed_teximage1d
+	_mesa_store_compressed_teximage2d
+	_mesa_store_compressed_teximage3d
+	_mesa_store_compressed_texsubimage1d
+	_mesa_store_compressed_texsubimage2d
+	_mesa_store_compressed_texsubimage3d
+	_mesa_store_teximage1d
+	_mesa_store_teximage2d
+	_mesa_store_teximage3d
+	_mesa_store_texsubimage1d
+	_mesa_store_texsubimage2d
+	_mesa_store_texsubimage3d
+	_mesa_test_proxy_teximage
+	_mesa_reference_framebuffer
+	_mesa_update_framebuffer_visual
+	_mesa_use_program
+	_mesa_Viewport
+	_mesa_meta_init
+	_mesa_meta_free
+	_mesa_meta_CopyColorSubTable
+	_mesa_meta_CopyColorTable
+	_mesa_meta_CopyConvolutionFilter1D
+	_mesa_meta_CopyConvolutionFilter2D
+	_mesa_meta_CopyTexImage1D
+	_mesa_meta_CopyTexImage2D
+	_mesa_meta_CopyTexSubImage1D
+	_mesa_meta_CopyTexSubImage2D
+	_mesa_meta_CopyTexSubImage3D
+	_swrast_Accum
+	_swrast_Bitmap
+	_swrast_BlitFramebuffer
+	_swrast_CopyPixels
+	_swrast_DrawPixels
+	_swrast_GetDeviceDriverReference
+	_swrast_Clear
+	_swrast_choose_line
+	_swrast_choose_triangle
+	_swrast_CreateContext
+	_swrast_DestroyContext
+	_swrast_exec_fragment_program
+	_swrast_InvalidateState
+	_swrast_ReadPixels
+	_swsetup_Wakeup
+	_swsetup_CreateContext
+	_swsetup_DestroyContext
+	_swsetup_InvalidateState
+	_tnl_CreateContext
+	_tnl_DestroyContext
+	_tnl_InvalidateState
+	_tnl_run_pipeline
+	_tnl_program_string
+	_tnl_RasterPos
diff --git a/src/mesa/drivers/windows/gdi/wgl.c b/src/mesa/drivers/windows/gdi/wgl.c
new file mode 100644
index 0000000000..8d8087067f
--- /dev/null
+++ b/src/mesa/drivers/windows/gdi/wgl.c
@@ -0,0 +1,707 @@
+
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/*
+ * File name 	: wgl.c
+ * WGL stuff. Added by Oleg Letsinsky, ajl@ultersys.ru
+ * Some things originated from the 3Dfx WGL functions
+ */
+
+/* 
+ * This file contains the implementation of the wgl* functions for
+ * Mesa on Windows.  Since these functions are provided by Windows in
+ * GDI/OpenGL, we must supply our versions that work with Mesa here.
+ */
+
+
+/* We're essentially building part of GDI here, so define this so that
+ * we get the right export linkage. */
+#ifdef __MINGW32__
+
+#include <stdarg.h>
+#include <windef.h>
+#include <wincon.h>
+#include <winbase.h>
+
+#  if defined(BUILD_GL32)
+#    define WINGDIAPI __declspec(dllexport)	
+#  else
+#    define __W32API_USE_DLLIMPORT__
+#  endif
+
+#include <wingdi.h>
+#include "GL/mesa_wgl.h"
+#include <stdlib.h>
+
+#else
+
+#define _GDI32_
+#include <windows.h>
+
+#endif
+#include "config.h"
+#include "glapi.h"
+#include "GL/wmesa.h"   /* protos for wmesa* functions */
+
+/*
+ * Pixel Format Descriptors
+ */
+
+/* Extend the PFD to include DB flag */
+struct __pixelformat__
+{
+    PIXELFORMATDESCRIPTOR pfd;
+    GLboolean doubleBuffered;
+};
+
+
+
+/* These are the PFD's supported by this driver. */
+struct __pixelformat__	pfd[] =
+{
+#if 0 
+    /* Double Buffer, alpha */
+    {	
+	{	
+	    sizeof(PIXELFORMATDESCRIPTOR),	1,
+	    PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL|
+	    PFD_GENERIC_FORMAT|PFD_DOUBLEBUFFER|PFD_SWAP_COPY,
+	    PFD_TYPE_RGBA,
+	    24,	
+	    8, 0,	
+	    8, 8,	
+	    8, 16,	
+	    8, 24,
+	    0, 0, 0, 0, 0,	
+	    DEFAULT_SOFTWARE_DEPTH_BITS,	8,	
+	    0, 0, 0,	
+	    0, 0, 0 
+	},
+        GL_TRUE
+    },
+    /* Single Buffer, alpha */
+    {	
+	{	
+	    sizeof(PIXELFORMATDESCRIPTOR),	1,
+	    PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL|
+	    PFD_GENERIC_FORMAT,
+	    PFD_TYPE_RGBA,
+	    24,	
+	    8, 0,	
+	    8, 8,	
+	    8, 16,	
+	    8, 24,
+	    0, 0, 0, 0,	0,	
+	    DEFAULT_SOFTWARE_DEPTH_BITS,	8,	
+	    0, 0, 0,	
+	    0, 0, 0
+	},
+        GL_FALSE
+    },
+#endif 
+    /* Double Buffer, no alpha */
+    {	
+	{	
+	    sizeof(PIXELFORMATDESCRIPTOR),	1,
+	    PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL|
+	    PFD_GENERIC_FORMAT|PFD_DOUBLEBUFFER|PFD_SWAP_COPY,
+	    PFD_TYPE_RGBA,
+	    24,	
+	    8, 0,
+	    8, 8,
+	    8, 16,
+	    0, 0,
+	    0, 0, 0, 0,	0,
+	    DEFAULT_SOFTWARE_DEPTH_BITS,	8,	
+	    0, 0, 0, 
+	    0, 0, 0 
+	},
+        GL_TRUE
+    },
+    /* Single Buffer, no alpha */
+    {	
+	{
+	    sizeof(PIXELFORMATDESCRIPTOR),	1,
+	    PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL|
+	    PFD_GENERIC_FORMAT,
+	    PFD_TYPE_RGBA,
+	    24,	
+	    8, 0,
+	    8, 8,
+	    8, 16,
+	    0, 0,
+	    0, 0, 0, 0,	0,
+	    DEFAULT_SOFTWARE_DEPTH_BITS,	8,	
+	    0, 0, 0,
+	    0, 0, 0 
+	},
+        GL_FALSE
+    },
+};
+
+int npfd = sizeof(pfd) / sizeof(pfd[0]);
+
+
+/*
+ * Contexts
+ */
+
+typedef struct {
+    WMesaContext ctx;
+} MesaWglCtx;
+
+#define MESAWGL_CTX_MAX_COUNT 20
+
+static MesaWglCtx wgl_ctx[MESAWGL_CTX_MAX_COUNT];
+
+static unsigned ctx_count = 0;
+static int ctx_current = -1;
+static unsigned curPFD = 0;
+
+static HDC CurrentHDC = 0;
+
+
+WINGDIAPI HGLRC GLAPIENTRY wglCreateContext(HDC hdc)
+{
+    int i = 0;
+    if (!ctx_count) {
+	for(i=0;i<MESAWGL_CTX_MAX_COUNT;i++) {
+	    wgl_ctx[i].ctx = NULL;
+	}
+    }
+    for( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ ) {
+        if ( wgl_ctx[i].ctx == NULL ) {
+            wgl_ctx[i].ctx = 
+		WMesaCreateContext(hdc, NULL, (GLboolean)GL_TRUE,
+				   (GLboolean) (pfd[curPFD-1].doubleBuffered ?
+                                   GL_TRUE : GL_FALSE), 
+				   (GLboolean)(pfd[curPFD-1].pfd.cAlphaBits ? 
+				   GL_TRUE : GL_FALSE) );
+            if (wgl_ctx[i].ctx == NULL)
+                break;
+            ctx_count++;
+            return ((HGLRC)wgl_ctx[i].ctx);
+        }
+    }
+    SetLastError(0);
+    return(NULL);
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglDeleteContext(HGLRC hglrc)
+{
+    int i;
+    for ( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ ) {
+    	if ( wgl_ctx[i].ctx == (WMesaContext) hglrc ){
+            WMesaMakeCurrent((WMesaContext) hglrc, NULL);
+            WMesaDestroyContext(wgl_ctx[i].ctx);
+            wgl_ctx[i].ctx = NULL;
+            ctx_count--;
+            return(TRUE);
+    	}
+    }
+    SetLastError(0);
+    return(FALSE);
+}
+
+WINGDIAPI HGLRC GLAPIENTRY wglGetCurrentContext(VOID)
+{
+    if (ctx_current < 0)
+	return 0;
+    else
+	return (HGLRC) wgl_ctx[ctx_current].ctx;
+}
+
+WINGDIAPI HDC GLAPIENTRY wglGetCurrentDC(VOID)
+{
+    return CurrentHDC;
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglMakeCurrent(HDC hdc, HGLRC hglrc)
+{
+    int i;
+    
+    CurrentHDC = hdc;
+
+    if (!hdc || !hglrc) {
+	WMesaMakeCurrent(NULL, NULL);
+	ctx_current = -1;
+	return TRUE;
+    }
+    
+    for ( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ ) {
+	if ( wgl_ctx[i].ctx == (WMesaContext) hglrc ) {
+	    WMesaMakeCurrent( (WMesaContext) hglrc, hdc );
+	    ctx_current = i;
+	    return TRUE;
+	}
+    }
+    return FALSE;
+}
+
+
+WINGDIAPI int GLAPIENTRY wglChoosePixelFormat(HDC hdc,
+					      CONST 
+					      PIXELFORMATDESCRIPTOR *ppfd)
+{
+    int		i,best = -1,bestdelta = 0x7FFFFFFF,delta;
+    (void) hdc;
+    
+    if(ppfd->nSize != sizeof(PIXELFORMATDESCRIPTOR) || ppfd->nVersion != 1)
+	{
+	    SetLastError(0);
+	    return(0);
+	}
+    for(i = 0; i < npfd;i++)
+	{
+	    delta = 0;
+	    if(
+		(ppfd->dwFlags & PFD_DRAW_TO_WINDOW) &&
+		!(pfd[i].pfd.dwFlags & PFD_DRAW_TO_WINDOW))
+		continue;
+	    if(
+		(ppfd->dwFlags & PFD_DRAW_TO_BITMAP) &&
+		!(pfd[i].pfd.dwFlags & PFD_DRAW_TO_BITMAP))
+		continue;
+	    if(
+		(ppfd->dwFlags & PFD_SUPPORT_GDI) &&
+		!(pfd[i].pfd.dwFlags & PFD_SUPPORT_GDI))
+		continue;
+	    if(
+		(ppfd->dwFlags & PFD_SUPPORT_OPENGL) &&
+		!(pfd[i].pfd.dwFlags & PFD_SUPPORT_OPENGL))
+		continue;
+	    if(
+		!(ppfd->dwFlags & PFD_DOUBLEBUFFER_DONTCARE) &&
+		((ppfd->dwFlags & PFD_DOUBLEBUFFER) != 
+		 (pfd[i].pfd.dwFlags & PFD_DOUBLEBUFFER)))
+		continue;
+	    if(
+		!(ppfd->dwFlags & PFD_STEREO_DONTCARE) &&
+		((ppfd->dwFlags & PFD_STEREO) != 
+		 (pfd[i].pfd.dwFlags & PFD_STEREO)))
+		continue;
+	    if(ppfd->iPixelType != pfd[i].pfd.iPixelType)
+		delta++;
+	    if(ppfd->cAlphaBits != pfd[i].pfd.cAlphaBits)
+		delta++;
+	    if(delta < bestdelta)
+		{
+		    best = i + 1;
+		    bestdelta = delta;
+		    if(bestdelta == 0)
+			break;
+		}
+	}
+    if(best == -1)
+	{
+	    SetLastError(0);
+	    return(0);
+	}
+    return(best);
+}
+
+WINGDIAPI int GLAPIENTRY wglDescribePixelFormat(HDC hdc,
+					        int iPixelFormat,
+					        UINT nBytes,
+					        LPPIXELFORMATDESCRIPTOR ppfd)
+{
+    (void) hdc;
+    
+    if(ppfd == NULL)
+	return(npfd);
+    if(iPixelFormat < 1 || iPixelFormat > npfd || 
+       nBytes != sizeof(PIXELFORMATDESCRIPTOR))
+	{
+	    SetLastError(0);
+	    return(0);
+	}
+    *ppfd = pfd[iPixelFormat - 1].pfd;
+    return(npfd);
+}
+
+WINGDIAPI PROC GLAPIENTRY wglGetProcAddress(LPCSTR lpszProc)
+{
+    PROC p = (PROC) _glapi_get_proc_address((const char *) lpszProc);
+    if (p)
+	return p;
+    
+    SetLastError(0);
+    return(NULL);
+}
+
+WINGDIAPI int GLAPIENTRY wglGetPixelFormat(HDC hdc)
+{
+    (void) hdc;
+    if(curPFD == 0) {
+	SetLastError(0);
+	return(0);
+    }
+    return(curPFD);
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglSetPixelFormat(HDC hdc,int iPixelFormat,
+					const PIXELFORMATDESCRIPTOR *ppfd)
+{
+    (void) hdc;
+    
+    if(iPixelFormat < 1 || iPixelFormat > npfd || 
+       ppfd->nSize != sizeof(PIXELFORMATDESCRIPTOR)) {
+	SetLastError(0);
+	return(FALSE);
+    }
+    curPFD = iPixelFormat;
+    return(TRUE);
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglSwapBuffers(HDC hdc)
+{
+    WMesaSwapBuffers(hdc);
+    return TRUE;
+}
+
+static FIXED FixedFromDouble(double d)
+{
+   long l = (long) (d * 65536L);
+   return *(FIXED *) (void *) &l;
+}
+
+
+/*
+** This is cribbed from FX/fxwgl.c, and seems to implement support
+** for bitmap fonts where the wglUseFontBitmapsA() code implements
+** support for outline fonts.  In combination they hopefully give
+** fairly generic support for fonts.
+*/
+static BOOL wglUseFontBitmaps_FX(HDC fontDevice, DWORD firstChar,
+                                 DWORD numChars, DWORD listBase)
+{
+#define VERIFY(a) a
+    
+    TEXTMETRIC metric;
+    BITMAPINFO *dibInfo;
+    HDC bitDevice;
+    COLORREF tempColor;
+    int i;
+    
+    VERIFY(GetTextMetrics(fontDevice, &metric));
+    
+    dibInfo = (BITMAPINFO *) calloc(sizeof(BITMAPINFO) + sizeof(RGBQUAD), 1);
+    dibInfo->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
+    dibInfo->bmiHeader.biPlanes = 1;
+    dibInfo->bmiHeader.biBitCount = 1;
+    dibInfo->bmiHeader.biCompression = BI_RGB;
+    
+    bitDevice = CreateCompatibleDC(fontDevice);
+    
+    /* Swap fore and back colors so the bitmap has the right polarity */
+    tempColor = GetBkColor(bitDevice);
+    SetBkColor(bitDevice, GetTextColor(bitDevice));
+    SetTextColor(bitDevice, tempColor);
+    
+    /* Place chars based on base line */
+    VERIFY(SetTextAlign(bitDevice, TA_BASELINE) != GDI_ERROR ? 1 : 0);
+    
+    for(i = 0; i < (int)numChars; i++) {
+	SIZE size;
+	char curChar;
+	int charWidth,charHeight,bmapWidth,bmapHeight,numBytes,res;
+	HBITMAP bitObject;
+	HGDIOBJ origBmap;
+	unsigned char *bmap;
+	
+	curChar = (char)(i + firstChar);
+	
+	/* Find how high/wide this character is */
+	VERIFY(GetTextExtentPoint32(bitDevice, &curChar, 1, &size));
+	
+	/* Create the output bitmap */
+	charWidth = size.cx;
+	charHeight = size.cy;
+	/* Round up to the next multiple of 32 bits */
+	bmapWidth = ((charWidth + 31) / 32) * 32;   
+	bmapHeight = charHeight;
+	bitObject = CreateCompatibleBitmap(bitDevice,
+					   bmapWidth,
+					   bmapHeight);
+	/* VERIFY(bitObject); */
+	
+	/* Assign the output bitmap to the device */
+	origBmap = SelectObject(bitDevice, bitObject);
+	(void) VERIFY(origBmap);
+	
+	VERIFY( PatBlt( bitDevice, 0, 0, bmapWidth, bmapHeight,BLACKNESS ) );
+	
+	/* Use our source font on the device */
+	VERIFY(SelectObject(bitDevice, GetCurrentObject(fontDevice,OBJ_FONT)));
+	
+	/* Draw the character */
+	VERIFY(TextOut(bitDevice, 0, metric.tmAscent, &curChar, 1));
+	
+	/* Unselect our bmap object */
+	VERIFY(SelectObject(bitDevice, origBmap));
+	
+	/* Convert the display dependant representation to a 1 bit deep DIB */
+	numBytes = (bmapWidth * bmapHeight) / 8;
+	bmap = malloc(numBytes);
+	dibInfo->bmiHeader.biWidth = bmapWidth;
+	dibInfo->bmiHeader.biHeight = bmapHeight;
+	res = GetDIBits(bitDevice, bitObject, 0, bmapHeight, bmap,
+			dibInfo,
+			DIB_RGB_COLORS);
+	/* VERIFY(res); */
+	
+	/* Create the GL object */
+	glNewList(i + listBase, GL_COMPILE);
+	glBitmap(bmapWidth, bmapHeight, 0.0, (GLfloat)metric.tmDescent,
+		 (GLfloat)charWidth, 0.0,
+		 bmap);
+	glEndList();
+	/* CheckGL(); */
+	
+	/* Destroy the bmap object */
+	DeleteObject(bitObject);
+	
+	/* Deallocate the bitmap data */
+	free(bmap);
+    }
+    
+    /* Destroy the DC */
+    VERIFY(DeleteDC(bitDevice));
+    
+    free(dibInfo);
+    
+    return TRUE;
+#undef VERIFY
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglUseFontBitmapsA(HDC hdc, DWORD first,
+					     DWORD count, DWORD listBase)
+{
+    int i;
+    GLuint font_list;
+    DWORD size;
+    GLYPHMETRICS gm;
+    HANDLE hBits;
+    LPSTR lpBits;
+    MAT2 mat;
+    int  success = TRUE;
+    
+    if (count == 0)
+	return FALSE;
+    
+    font_list = listBase;
+    
+    mat.eM11 = FixedFromDouble(1);
+    mat.eM12 = FixedFromDouble(0);
+    mat.eM21 = FixedFromDouble(0);
+    mat.eM22 = FixedFromDouble(-1);
+    
+    memset(&gm,0,sizeof(gm));
+    
+    /*
+    ** If we can't get the glyph outline, it may be because this is a fixed
+    ** font.  Try processing it that way.
+    */
+    if( GetGlyphOutline(hdc, first, GGO_BITMAP, &gm, 0, NULL, &mat)
+	== GDI_ERROR ) {
+	return wglUseFontBitmaps_FX( hdc, first, count, listBase );
+    }
+    
+    /*
+    ** Otherwise process all desired characters.
+    */
+    for (i = 0; i < (int)count; i++) {
+	DWORD err;
+	
+	glNewList( font_list+i, GL_COMPILE );
+	
+	/* allocate space for the bitmap/outline */
+	size = GetGlyphOutline(hdc, first + i, GGO_BITMAP, 
+			       &gm, 0, NULL, &mat);
+	if (size == GDI_ERROR) {
+	    glEndList( );
+	    err = GetLastError();
+	    success = FALSE;
+	    continue;
+	}
+	
+	hBits  = GlobalAlloc(GHND, size+1);
+	lpBits = GlobalLock(hBits);
+	
+	err = 
+	    GetGlyphOutline(hdc,         /* handle to device context */
+			    first + i,   /* character to query */
+			    GGO_BITMAP,  /* format of data to return */
+			    &gm,         /* ptr to structure for metrics*/
+			    size,        /* size of buffer for data */
+			    lpBits,      /* pointer to buffer for data */
+			    &mat         /* pointer to transformation */
+			    /* matrix structure */
+		);
+	
+	if (err == GDI_ERROR) {
+	    GlobalUnlock(hBits);
+	    GlobalFree(hBits);
+	    
+	    glEndList( );
+	    err = GetLastError();
+	    success = FALSE;
+	    continue;
+	}
+	
+	glBitmap(gm.gmBlackBoxX,gm.gmBlackBoxY,
+		 (GLfloat)-gm.gmptGlyphOrigin.x,
+		 (GLfloat)gm.gmptGlyphOrigin.y,
+		 (GLfloat)gm.gmCellIncX,
+		 (GLfloat)gm.gmCellIncY,
+		 (const GLubyte * )lpBits);
+	
+	GlobalUnlock(hBits);
+	GlobalFree(hBits);
+	
+	glEndList( );
+    }
+    
+    return success;
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglShareLists(HGLRC hglrc1,
+					HGLRC hglrc2)
+{
+    WMesaShareLists((WMesaContext)hglrc1, (WMesaContext)hglrc2);
+    return(TRUE);
+}
+
+
+
+/* NOT IMPLEMENTED YET */
+WINGDIAPI BOOL GLAPIENTRY wglCopyContext(HGLRC hglrcSrc,
+					 HGLRC hglrcDst,
+					 UINT mask)
+{
+    (void) hglrcSrc; (void) hglrcDst; (void) mask;
+    return(FALSE);
+}
+
+WINGDIAPI HGLRC GLAPIENTRY wglCreateLayerContext(HDC hdc,
+						 int iLayerPlane)
+{
+    SetLastError(0);
+    if (iLayerPlane == 0)
+        return wglCreateContext( hdc );
+    return(NULL);
+}
+
+
+WINGDIAPI BOOL GLAPIENTRY wglUseFontBitmapsW(HDC hdc,
+					     DWORD first,
+					     DWORD count,
+					     DWORD listBase)
+{
+    (void) hdc; (void) first; (void) count; (void) listBase;
+    return FALSE;
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglUseFontOutlinesA(HDC hdc,
+					      DWORD first,
+					      DWORD count,
+					      DWORD listBase,
+					      FLOAT deviation,
+					      FLOAT extrusion,
+					      int format,
+					      LPGLYPHMETRICSFLOAT lpgmf)
+{
+    (void) hdc; (void) first; (void) count;
+    (void) listBase; (void) deviation; (void) extrusion; (void) format;
+    (void) lpgmf;
+    SetLastError(0);
+    return(FALSE);
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglUseFontOutlinesW(HDC hdc,
+					      DWORD first,
+					      DWORD count,
+					      DWORD listBase,
+					      FLOAT deviation,
+					      FLOAT extrusion,
+					      int format,
+					      LPGLYPHMETRICSFLOAT lpgmf)
+{
+    (void) hdc; (void) first; (void) count;
+    (void) listBase; (void) deviation; (void) extrusion; (void) format;
+    (void) lpgmf;
+    SetLastError(0);
+    return(FALSE);
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglDescribeLayerPlane(HDC hdc,
+						int iPixelFormat,
+						int iLayerPlane,
+						UINT nBytes,
+						LPLAYERPLANEDESCRIPTOR plpd)
+{
+    (void) hdc; (void) iPixelFormat; (void) iLayerPlane; 
+    (void) nBytes; (void) plpd;
+    SetLastError(0);
+    return(FALSE);
+}
+
+WINGDIAPI int GLAPIENTRY wglSetLayerPaletteEntries(HDC hdc,
+						   int iLayerPlane,
+						   int iStart,
+						   int cEntries,
+						   CONST COLORREF *pcr)
+{
+    (void) hdc; (void) iLayerPlane; (void) iStart; 
+    (void) cEntries; (void) pcr;
+    SetLastError(0);
+    return(0);
+}
+
+WINGDIAPI int GLAPIENTRY wglGetLayerPaletteEntries(HDC hdc,
+						   int iLayerPlane,
+						   int iStart,
+						   int cEntries,
+						   COLORREF *pcr)
+{
+    (void) hdc; (void) iLayerPlane; (void) iStart; (void) cEntries; (void) pcr;
+    SetLastError(0);
+    return(0);
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglRealizeLayerPalette(HDC hdc,
+						 int iLayerPlane,
+						 BOOL bRealize)
+{
+    (void) hdc; (void) iLayerPlane; (void) bRealize;
+    SetLastError(0);
+    return(FALSE);
+}
+
+WINGDIAPI BOOL GLAPIENTRY wglSwapLayerBuffers(HDC hdc,
+					      UINT fuPlanes)
+{
+    (void) hdc; (void) fuPlanes;
+    SetLastError(0);
+    return(FALSE);
+}
+
+WINGDIAPI const char * GLAPIENTRY wglGetExtensionsStringARB(HDC hdc)
+{
+    return "WGL_ARB_extensions_string";
+}
diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c
new file mode 100644
index 0000000000..22b0c46b4f
--- /dev/null
+++ b/src/mesa/drivers/windows/gdi/wmesa.c
@@ -0,0 +1,1661 @@
+/*
+ * Windows (Win32/Win64) device driver for Mesa
+ *
+ */
+
+#include "wmesadef.h"
+#include "colors.h"
+#include <GL/wmesa.h>
+#include <winuser.h>
+#include "context.h"
+#include "extensions.h"
+#include "framebuffer.h"
+#include "renderbuffer.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+#include "vbo/vbo.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+
+/* linked list of our Framebuffers (windows) */
+static WMesaFramebuffer FirstFramebuffer = NULL;
+
+
+/**
+ * Create a new WMesaFramebuffer object which will correspond to the
+ * given HDC (Window handle).
+ */
+WMesaFramebuffer
+wmesa_new_framebuffer(HDC hdc, GLvisual *visual)
+{
+    WMesaFramebuffer pwfb
+        = (WMesaFramebuffer) malloc(sizeof(struct wmesa_framebuffer));
+    if (pwfb) {
+        _mesa_initialize_window_framebuffer(&pwfb->Base, visual);
+        pwfb->hDC = hdc;
+        /* insert at head of list */
+        pwfb->next = FirstFramebuffer;
+        FirstFramebuffer = pwfb;
+    }
+    return pwfb;
+}
+
+/**
+ * Given an hdc, free the corresponding WMesaFramebuffer
+ */
+void
+wmesa_free_framebuffer(HDC hdc)
+{
+    WMesaFramebuffer pwfb, prev;
+    for (pwfb = FirstFramebuffer; pwfb; pwfb = pwfb->next) {
+        if (pwfb->hDC == hdc)
+            break;
+	prev = pwfb;
+    }
+    if (pwfb) {
+        struct gl_framebuffer *fb;
+	if (pwfb == FirstFramebuffer)
+	    FirstFramebuffer = pwfb->next;
+	else
+	    prev->next = pwfb->next;
+        fb = &pwfb->Base;
+        _mesa_reference_framebuffer(&fb, NULL); 
+    }
+}
+
+/**
+ * Given an hdc, return the corresponding WMesaFramebuffer
+ */
+WMesaFramebuffer
+wmesa_lookup_framebuffer(HDC hdc)
+{
+    WMesaFramebuffer pwfb;
+    for (pwfb = FirstFramebuffer; pwfb; pwfb = pwfb->next) {
+        if (pwfb->hDC == hdc)
+            return pwfb;
+    }
+    return NULL;
+}
+
+
+/**
+ * Given a GLframebuffer, return the corresponding WMesaFramebuffer.
+ */
+static WMesaFramebuffer wmesa_framebuffer(GLframebuffer *fb)
+{
+    return (WMesaFramebuffer) fb;
+}
+
+
+/**
+ * Given a GLcontext, return the corresponding WMesaContext.
+ */
+static WMesaContext wmesa_context(const GLcontext *ctx)
+{
+    return (WMesaContext) ctx;
+}
+
+
+/*
+ * Every driver should implement a GetString function in order to
+ * return a meaningful GL_RENDERER string.
+ */
+static const GLubyte *wmesa_get_string(GLcontext *ctx, GLenum name)
+{
+    return (name == GL_RENDERER) ? 
+	(GLubyte *) "Mesa Windows GDI Driver" : NULL;
+}
+
+
+/*
+ * Determine the pixel format based on the pixel size.
+ */
+static void wmSetPixelFormat(WMesaFramebuffer pwfb, HDC hDC)
+{
+    pwfb->cColorBits = GetDeviceCaps(hDC, BITSPIXEL);
+
+    /* Only 16 and 32 bit targets are supported now */
+    assert(pwfb->cColorBits == 0 ||
+	   pwfb->cColorBits == 16 || 
+	   pwfb->cColorBits == 24 || 
+	   pwfb->cColorBits == 32);
+
+    switch(pwfb->cColorBits){
+    case 8:
+	pwfb->pixelformat = PF_INDEX8;
+	break;
+    case 16:
+	pwfb->pixelformat = PF_5R6G5B;
+	break;
+    case 24:
+    case 32:
+	pwfb->pixelformat = PF_8R8G8B;
+	break;
+    default:
+	pwfb->pixelformat = PF_BADFORMAT;
+    }
+}
+
+
+/**
+ * Create DIB for back buffer.
+ * We write into this memory with the span routines and then blit it
+ * to the window on a buffer swap.
+ */
+BOOL wmCreateBackingStore(WMesaFramebuffer pwfb, long lxSize, long lySize)
+{
+    HDC          hdc = pwfb->hDC;
+    LPBITMAPINFO pbmi = &(pwfb->bmi);
+    HDC          hic;
+
+    pbmi->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
+    pbmi->bmiHeader.biWidth = lxSize;
+    pbmi->bmiHeader.biHeight= -lySize;
+    pbmi->bmiHeader.biPlanes = 1;
+    pbmi->bmiHeader.biBitCount = GetDeviceCaps(pwfb->hDC, BITSPIXEL);
+    pbmi->bmiHeader.biCompression = BI_RGB;
+    pbmi->bmiHeader.biSizeImage = 0;
+    pbmi->bmiHeader.biXPelsPerMeter = 0;
+    pbmi->bmiHeader.biYPelsPerMeter = 0;
+    pbmi->bmiHeader.biClrUsed = 0;
+    pbmi->bmiHeader.biClrImportant = 0;
+    
+    pwfb->cColorBits = pbmi->bmiHeader.biBitCount;
+    pwfb->ScanWidth = (lxSize * (pwfb->cColorBits / 8) + 3) & ~3;
+    
+    hic = CreateIC("display", NULL, NULL, NULL);
+    pwfb->dib_hDC = CreateCompatibleDC(hic);
+    
+    pwfb->hbmDIB = CreateDIBSection(hic,
+				   &pwfb->bmi,
+				   DIB_RGB_COLORS,
+				   (void **)&(pwfb->pbPixels),
+				   0,
+				   0);
+    pwfb->hOldBitmap = SelectObject(pwfb->dib_hDC, pwfb->hbmDIB);
+    
+    DeleteDC(hic);
+
+    wmSetPixelFormat(pwfb, pwfb->hDC);
+    return TRUE;
+}
+
+
+static wmDeleteBackingStore(WMesaFramebuffer pwfb)
+{
+    if (pwfb->hbmDIB) {
+	SelectObject(pwfb->dib_hDC, pwfb->hOldBitmap);
+	DeleteDC(pwfb->dib_hDC);
+	DeleteObject(pwfb->hbmDIB);
+    }
+}
+
+
+/**
+ * Find the width and height of the window named by hdc.
+ */
+static void
+get_window_size(HDC hdc, GLuint *width, GLuint *height)
+{
+    if (WindowFromDC(hdc)) {
+        RECT rect;
+        GetClientRect(WindowFromDC(hdc), &rect);
+        *width = rect.right - rect.left;
+        *height = rect.bottom - rect.top;
+    }
+    else { /* Memory context */
+        /* From contributed code - use the size of the desktop
+         * for the size of a memory context (?) */
+        *width = GetDeviceCaps(hdc, HORZRES);
+        *height = GetDeviceCaps(hdc, VERTRES);
+    }
+}
+
+
+static void
+wmesa_get_buffer_size(GLframebuffer *buffer, GLuint *width, GLuint *height)
+{
+    WMesaFramebuffer pwfb = wmesa_framebuffer(buffer);
+    get_window_size(pwfb->hDC, width, height);
+}
+
+
+static void wmesa_flush(GLcontext *ctx)
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->WinSysDrawBuffer);
+
+    if (ctx->Visual.doubleBufferMode == 1) {
+	BitBlt(pwfb->hDC, 0, 0, pwfb->Base.Width, pwfb->Base.Height,
+	       pwfb->dib_hDC, 0, 0, SRCCOPY);
+    }
+    else {
+	/* Do nothing for single buffer */
+    }
+}
+
+
+/**********************************************************************/
+/*****                   CLEAR Functions                          *****/
+/**********************************************************************/
+
+/* If we do not implement these, Mesa clears the buffers via the pixel
+ * span writing interface, which is very slow for a clear operation.
+ */
+
+/*
+ * Set the color used to clear the color buffer.
+ */
+static void clear_color(GLcontext *ctx, const GLfloat color[4])
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    GLubyte col[3];
+    UINT    bytesPerPixel = pwfb->cColorBits / 8; 
+
+    CLAMPED_FLOAT_TO_UBYTE(col[0], color[0]);
+    CLAMPED_FLOAT_TO_UBYTE(col[1], color[1]);
+    CLAMPED_FLOAT_TO_UBYTE(col[2], color[2]);
+    pwc->clearColorRef = RGB(col[0], col[1], col[2]);
+    DeleteObject(pwc->clearPen);
+    DeleteObject(pwc->clearBrush);
+    pwc->clearPen = CreatePen(PS_SOLID, 1, pwc->clearColorRef); 
+    pwc->clearBrush = CreateSolidBrush(pwc->clearColorRef); 
+}
+
+
+/* 
+ * Clear the specified region of the color buffer using the clear color 
+ * or index as specified by one of the two functions above. 
+ * 
+ * This procedure clears either the front and/or the back COLOR buffers. 
+ * Only the "left" buffer is cleared since we are not stereo. 
+ * Clearing of the other non-color buffers is left to the swrast. 
+ */ 
+
+static void clear(GLcontext *ctx, GLbitfield mask)
+{
+#define FLIP(Y)  (ctx->DrawBuffer->Height - (Y) - 1)
+    const GLint x = ctx->DrawBuffer->_Xmin;
+    const GLint y = ctx->DrawBuffer->_Ymin;
+    const GLint height = ctx->DrawBuffer->_Ymax - ctx->DrawBuffer->_Ymin;
+    const GLint width  = ctx->DrawBuffer->_Xmax - ctx->DrawBuffer->_Xmin;
+
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    int done = 0;
+
+    /* Let swrast do all the work if the masks are not set to
+     * clear all channels. */
+    if (!ctx->Color.ColorMask[0][0] ||
+	!ctx->Color.ColorMask[0][1] ||
+	!ctx->Color.ColorMask[0][2] ||
+	!ctx->Color.ColorMask[0][3]) {
+	_swrast_Clear(ctx, mask);
+	return;
+    }
+
+    /* Back buffer */
+    if (mask & BUFFER_BIT_BACK_LEFT) { 
+	
+	int     i, rowSize; 
+	UINT    bytesPerPixel = pwfb->cColorBits / 8; 
+	LPBYTE  lpb, clearRow;
+	LPWORD  lpw;
+	BYTE    bColor; 
+	WORD    wColor; 
+	BYTE    r, g, b; 
+	DWORD   dwColor; 
+	LPDWORD lpdw; 
+	
+	/* Try for a fast clear - clearing entire buffer with a single
+	 * byte value. */
+	if (width == ctx->DrawBuffer->Width &&
+            height == ctx->DrawBuffer->Height) { /* entire buffer */
+	    /* Now check for an easy clear value */
+	    switch (bytesPerPixel) {
+	    case 1:
+		bColor = BGR8(GetRValue(pwc->clearColorRef), 
+			      GetGValue(pwc->clearColorRef), 
+			      GetBValue(pwc->clearColorRef));
+		memset(pwfb->pbPixels, bColor, 
+		       pwfb->ScanWidth * height);
+		done = 1;
+		break;
+	    case 2:
+		wColor = BGR16(GetRValue(pwc->clearColorRef), 
+			       GetGValue(pwc->clearColorRef), 
+			       GetBValue(pwc->clearColorRef)); 
+		if (((wColor >> 8) & 0xff) == (wColor & 0xff)) {
+		    memset(pwfb->pbPixels, wColor & 0xff, 
+			   pwfb->ScanWidth * height);
+		    done = 1;
+		}
+		break;
+	    case 3:
+		/* fall through */
+	    case 4:
+		if (GetRValue(pwc->clearColorRef) == 
+		    GetGValue(pwc->clearColorRef) &&
+		    GetRValue(pwc->clearColorRef) == 
+		    GetBValue(pwc->clearColorRef)) {
+		    memset(pwfb->pbPixels, 
+			   GetRValue(pwc->clearColorRef), 
+			   pwfb->ScanWidth * height);
+		    done = 1;
+		}
+		break;
+	    default:
+		break;
+	    }
+	} /* all */
+
+	if (!done) {
+	    /* Need to clear a row at a time.  Begin by setting the first
+	     * row in the area to be cleared to the clear color. */
+	    
+	    clearRow = pwfb->pbPixels + 
+		pwfb->ScanWidth * FLIP(y) +
+		bytesPerPixel * x; 
+	    switch (bytesPerPixel) {
+	    case 1:
+		lpb = clearRow;
+		bColor = BGR8(GetRValue(pwc->clearColorRef), 
+			      GetGValue(pwc->clearColorRef), 
+			      GetBValue(pwc->clearColorRef));
+		memset(lpb, bColor, width);
+		break;
+	    case 2:
+		lpw = (LPWORD)clearRow;
+		wColor = BGR16(GetRValue(pwc->clearColorRef), 
+			       GetGValue(pwc->clearColorRef), 
+			       GetBValue(pwc->clearColorRef)); 
+		for (i=0; i<width; i++)
+		    *lpw++ = wColor;
+		break;
+	    case 3: 
+		lpb = clearRow;
+		r = GetRValue(pwc->clearColorRef); 
+		g = GetGValue(pwc->clearColorRef); 
+		b = GetBValue(pwc->clearColorRef); 
+		for (i=0; i<width; i++) {
+		    *lpb++ = b; 
+		    *lpb++ = g; 
+		    *lpb++ = r; 
+		} 
+		break;
+	    case 4: 
+		lpdw = (LPDWORD)clearRow; 
+		dwColor = BGR32(GetRValue(pwc->clearColorRef), 
+				GetGValue(pwc->clearColorRef), 
+				GetBValue(pwc->clearColorRef)); 
+		for (i=0; i<width; i++)
+		    *lpdw++ = dwColor;
+		break;
+	    default:
+		break;
+	    } /* switch */
+	    
+	    /* copy cleared row to other rows in buffer */
+	    lpb = clearRow - pwfb->ScanWidth;
+	    rowSize = width * bytesPerPixel;
+	    for (i=1; i<height; i++) { 
+		memcpy(lpb, clearRow, rowSize); 
+		lpb -= pwfb->ScanWidth; 
+	    } 
+	} /* not done */
+	mask &= ~BUFFER_BIT_BACK_LEFT;
+    } /* back buffer */ 
+
+    /* front buffer */
+    if (mask & BUFFER_BIT_FRONT_LEFT) { 
+	HDC DC = pwc->hDC; 
+	HPEN Old_Pen = SelectObject(DC, pwc->clearPen); 
+	HBRUSH Old_Brush = SelectObject(DC, pwc->clearBrush);
+	Rectangle(DC,
+		  x,
+		  FLIP(y) + 1,
+		  x + width + 1,
+		  FLIP(y) - height + 1);
+	SelectObject(DC, Old_Pen); 
+	SelectObject(DC, Old_Brush); 
+	mask &= ~BUFFER_BIT_FRONT_LEFT;
+    } /* front buffer */ 
+    
+    /* Call swrast if there is anything left to clear (like DEPTH) */ 
+    if (mask) 
+	_swrast_Clear(ctx, mask);
+  
+#undef FLIP
+} 
+
+
+/**********************************************************************/
+/*****                   PIXEL Functions                          *****/
+/**********************************************************************/
+
+#define FLIP(Y)  (rb->Height - (Y) - 1)
+
+
+/**
+ ** Front Buffer reading/writing
+ ** These are slow, but work with all non-indexed visual types.
+ **/
+
+/* Write a horizontal span of RGBA color pixels with a boolean mask. */
+static void write_rgba_span_front(const GLcontext *ctx, 
+				   struct gl_renderbuffer *rb, 
+				   GLuint n, GLint x, GLint y,
+				   const GLubyte rgba[][4], 
+				   const GLubyte mask[] )
+{
+   WMesaContext pwc = wmesa_context(ctx);
+   WMesaFramebuffer pwfb = wmesa_lookup_framebuffer(pwc->hDC);
+   CONST BITMAPINFO bmi=
+   {
+      {
+         sizeof(BITMAPINFOHEADER),
+         n, 1, 1, 32, BI_RGB, 0, 1, 1, 0, 0
+      }
+   };
+   HBITMAP bmp=0;
+   HDC mdc=0;
+   typedef union
+   {
+      unsigned i;
+      struct {
+         unsigned b:8, g:8, r:8, a:8;
+      };
+   } BGRA;
+   BGRA *bgra, c;
+   GLuint i;
+
+   if (n < 16) {   // the value 16 is just guessed
+      y=FLIP(y);
+      if (mask) {
+         for (i=0; i<n; i++)
+            if (mask[i])
+               SetPixel(pwc->hDC, x+i, y,
+                        RGB(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]));
+      }
+      else {
+         for (i=0; i<n; i++)
+            SetPixel(pwc->hDC, x+i, y,
+                     RGB(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]));
+      }
+   }
+   else {
+      if (!pwfb) {
+         _mesa_problem(NULL, "wmesa: write_rgba_span_front on unknown hdc");
+         return;
+      }
+      bgra=malloc(n*sizeof(BGRA));
+      if (!bgra) {
+         _mesa_problem(NULL, "wmesa: write_rgba_span_front: out of memory");
+         return;
+      }
+      c.a=0;
+      if (mask) {
+         for (i=0; i<n; i++) {
+            if (mask[i]) {
+               c.r=rgba[i][RCOMP];
+               c.g=rgba[i][GCOMP];
+               c.b=rgba[i][BCOMP];
+               c.a=rgba[i][ACOMP];
+               bgra[i]=c;
+            }
+            else
+               bgra[i].i=0;
+         }
+      }
+      else {
+         for (i=0; i<n; i++) {
+            c.r=rgba[i][RCOMP];
+            c.g=rgba[i][GCOMP];
+            c.b=rgba[i][BCOMP];
+            c.a=rgba[i][ACOMP];
+            bgra[i]=c;
+         }
+      }
+      bmp=CreateBitmap(n, 1,  1, 32, bgra);
+      mdc=CreateCompatibleDC(pwfb->hDC);
+      SelectObject(mdc, bmp);
+      y=FLIP(y);
+      BitBlt(pwfb->hDC, x, y, n, 1, mdc, 0, 0, SRCCOPY);
+      SelectObject(mdc, 0);
+      DeleteObject(bmp);
+      DeleteDC(mdc);
+      free(bgra);
+   }
+}
+
+/* Write a horizontal span of RGB color pixels with a boolean mask. */
+static void write_rgb_span_front(const GLcontext *ctx, 
+				  struct gl_renderbuffer *rb, 
+				  GLuint n, GLint x, GLint y,
+				  const GLubyte rgb[][3], 
+				  const GLubyte mask[] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    GLuint i;
+    
+    (void) ctx;
+    y=FLIP(y);
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+		SetPixel(pwc->hDC, x+i, y, RGB(rgb[i][RCOMP], rgb[i][GCOMP], 
+					       rgb[i][BCOMP]));
+    }
+    else {
+	for (i=0; i<n; i++)
+	    SetPixel(pwc->hDC, x+i, y, RGB(rgb[i][RCOMP], rgb[i][GCOMP], 
+					   rgb[i][BCOMP]));
+    }
+    
+}
+
+/*
+ * Write a horizontal span of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_span_front(const GLcontext *ctx, 
+					struct gl_renderbuffer *rb,
+					GLuint n, GLint x, GLint y,
+					const GLchan color[4], 
+					const GLubyte mask[])
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    COLORREF colorref;
+
+    (void) ctx;
+    colorref = RGB(color[RCOMP], color[GCOMP], color[BCOMP]);
+    y=FLIP(y);
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+		SetPixel(pwc->hDC, x+i, y, colorref);
+    }
+    else
+	for (i=0; i<n; i++)
+	    SetPixel(pwc->hDC, x+i, y, colorref);
+
+}
+
+/* Write an array of RGBA pixels with a boolean mask. */
+static void write_rgba_pixels_front(const GLcontext *ctx, 
+				     struct gl_renderbuffer *rb,
+				     GLuint n, 
+				     const GLint x[], const GLint y[],
+				     const GLubyte rgba[][4], 
+				     const GLubyte mask[] )
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    (void) ctx;
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    SetPixel(pwc->hDC, x[i], FLIP(y[i]), 
+		     RGB(rgba[i][RCOMP], rgba[i][GCOMP], 
+			 rgba[i][BCOMP]));
+}
+
+
+
+/*
+ * Write an array of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_pixels_front(const GLcontext *ctx, 
+					  struct gl_renderbuffer *rb,
+					  GLuint n,
+					  const GLint x[], const GLint y[],
+					  const GLchan color[4],
+					  const GLubyte mask[] )
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    COLORREF colorref;
+    (void) ctx;
+    colorref = RGB(color[RCOMP], color[GCOMP], color[BCOMP]);
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    SetPixel(pwc->hDC, x[i], FLIP(y[i]), colorref);
+}
+
+/* Read a horizontal span of color pixels. */
+static void read_rgba_span_front(const GLcontext *ctx, 
+				  struct gl_renderbuffer *rb,
+				  GLuint n, GLint x, GLint y,
+				  GLubyte rgba[][4] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    GLuint i;
+    COLORREF Color;
+    y = FLIP(y);
+    for (i=0; i<n; i++) {
+	Color = GetPixel(pwc->hDC, x+i, y);
+	rgba[i][RCOMP] = GetRValue(Color);
+	rgba[i][GCOMP] = GetGValue(Color);
+	rgba[i][BCOMP] = GetBValue(Color);
+	rgba[i][ACOMP] = 255;
+    }
+}
+
+
+/* Read an array of color pixels. */
+static void read_rgba_pixels_front(const GLcontext *ctx, 
+				    struct gl_renderbuffer *rb,
+				    GLuint n, const GLint x[], const GLint y[],
+				    GLubyte rgba[][4])
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    GLuint i;
+    COLORREF Color;
+    for (i=0; i<n; i++) {
+        GLint y2 = FLIP(y[i]);
+        Color = GetPixel(pwc->hDC, x[i], y2);
+        rgba[i][RCOMP] = GetRValue(Color);
+        rgba[i][GCOMP] = GetGValue(Color);
+        rgba[i][BCOMP] = GetBValue(Color);
+        rgba[i][ACOMP] = 255;
+    }
+}
+
+/*********************************************************************/
+
+/* DOUBLE BUFFER 32-bit */
+
+#define WMSETPIXEL32(pwc, y, x, r, g, b) { \
+LPDWORD lpdw = ((LPDWORD)((pwc)->pbPixels + (pwc)->ScanWidth * (y)) + (x)); \
+*lpdw = BGR32((r),(g),(b)); }
+
+
+
+/* Write a horizontal span of RGBA color pixels with a boolean mask. */
+static void write_rgba_span_32(const GLcontext *ctx, 
+			       struct gl_renderbuffer *rb, 
+			       GLuint n, GLint x, GLint y,
+			       const GLubyte rgba[][4], 
+			       const GLubyte mask[] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    GLuint i;
+    LPDWORD lpdw;
+
+    (void) ctx;
+    
+    y=FLIP(y);
+    lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+                lpdw[i] = BGR32(rgba[i][RCOMP], rgba[i][GCOMP], 
+				rgba[i][BCOMP]);
+    }
+    else {
+	for (i=0; i<n; i++)
+                *lpdw++ = BGR32(rgba[i][RCOMP], rgba[i][GCOMP], 
+				rgba[i][BCOMP]);
+    }
+}
+
+
+/* Write a horizontal span of RGB color pixels with a boolean mask. */
+static void write_rgb_span_32(const GLcontext *ctx, 
+			      struct gl_renderbuffer *rb, 
+			      GLuint n, GLint x, GLint y,
+			      const GLubyte rgb[][3], 
+			      const GLubyte mask[] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    GLuint i;
+    LPDWORD lpdw;
+
+    (void) ctx;
+    
+    y=FLIP(y);
+    lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+                lpdw[i] = BGR32(rgb[i][RCOMP], rgb[i][GCOMP], 
+				rgb[i][BCOMP]);
+    }
+    else {
+	for (i=0; i<n; i++)
+                *lpdw++ = BGR32(rgb[i][RCOMP], rgb[i][GCOMP], 
+				rgb[i][BCOMP]);
+    }
+}
+
+/*
+ * Write a horizontal span of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_span_32(const GLcontext *ctx, 
+				    struct gl_renderbuffer *rb,
+				    GLuint n, GLint x, GLint y,
+				    const GLchan color[4], 
+				    const GLubyte mask[])
+{
+    LPDWORD lpdw;
+    DWORD pixel;
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    y=FLIP(y);
+    pixel = BGR32(color[RCOMP], color[GCOMP], color[BCOMP]);
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+                lpdw[i] = pixel;
+    }
+    else
+	for (i=0; i<n; i++)
+                *lpdw++ = pixel;
+
+}
+
+/* Write an array of RGBA pixels with a boolean mask. */
+static void write_rgba_pixels_32(const GLcontext *ctx, 
+				 struct gl_renderbuffer *rb,
+				 GLuint n, const GLint x[], const GLint y[],
+				 const GLubyte rgba[][4], 
+				 const GLubyte mask[])
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    WMSETPIXEL32(pwfb, FLIP(y[i]), x[i],
+			 rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+}
+
+/*
+ * Write an array of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_pixels_32(const GLcontext *ctx, 
+				      struct gl_renderbuffer *rb,
+				      GLuint n,
+				      const GLint x[], const GLint y[],
+				      const GLchan color[4],
+				      const GLubyte mask[])
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    WMSETPIXEL32(pwfb, FLIP(y[i]),x[i],color[RCOMP],
+			 color[GCOMP], color[BCOMP]);
+}
+
+/* Read a horizontal span of color pixels. */
+static void read_rgba_span_32(const GLcontext *ctx, 
+			      struct gl_renderbuffer *rb,
+			      GLuint n, GLint x, GLint y,
+			      GLubyte rgba[][4] )
+{
+    GLuint i;
+    DWORD pixel;
+    LPDWORD lpdw;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    
+    y = FLIP(y);
+    lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    for (i=0; i<n; i++) {
+	pixel = lpdw[i];
+	rgba[i][RCOMP] = (GLubyte)((pixel & 0x00ff0000) >> 16);
+	rgba[i][GCOMP] = (GLubyte)((pixel & 0x0000ff00) >> 8);
+	rgba[i][BCOMP] = (GLubyte)(pixel & 0x000000ff);
+	rgba[i][ACOMP] = 255;
+    }
+}
+
+
+/* Read an array of color pixels. */
+static void read_rgba_pixels_32(const GLcontext *ctx, 
+				struct gl_renderbuffer *rb,
+				GLuint n, const GLint x[], const GLint y[],
+				GLubyte rgba[][4])
+{
+    GLuint i;
+    DWORD pixel;
+    LPDWORD lpdw;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+
+    for (i=0; i<n; i++) {
+	GLint y2 = FLIP(y[i]);
+	lpdw = ((LPDWORD)(pwfb->pbPixels + pwfb->ScanWidth * y2)) + x[i];
+	pixel = *lpdw;
+	rgba[i][RCOMP] = (GLubyte)((pixel & 0x00ff0000) >> 16);
+	rgba[i][GCOMP] = (GLubyte)((pixel & 0x0000ff00) >> 8);
+	rgba[i][BCOMP] = (GLubyte)(pixel & 0x000000ff);
+	rgba[i][ACOMP] = 255;
+  }
+}
+
+
+/*********************************************************************/
+
+/* DOUBLE BUFFER 24-bit */
+
+#define WMSETPIXEL24(pwc, y, x, r, g, b) { \
+LPBYTE lpb = ((LPBYTE)((pwc)->pbPixels + (pwc)->ScanWidth * (y)) + (3 * x)); \
+lpb[0] = (b); \
+lpb[1] = (g); \
+lpb[2] = (r); }
+
+/* Write a horizontal span of RGBA color pixels with a boolean mask. */
+static void write_rgba_span_24(const GLcontext *ctx, 
+			       struct gl_renderbuffer *rb, 
+			       GLuint n, GLint x, GLint y,
+			       const GLubyte rgba[][4], 
+			       const GLubyte mask[] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    GLuint i;
+    LPBYTE lpb;
+
+    (void) ctx;
+    
+    y=FLIP(y);
+    lpb = ((LPBYTE)(pwfb->pbPixels + pwfb->ScanWidth * y)) + (3 * x);
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i]) {
+                lpb[3*i] = rgba[i][BCOMP];
+                lpb[3*i+1] = rgba[i][GCOMP];
+                lpb[3*i+2] = rgba[i][RCOMP];
+	    }
+    }
+    else {
+	    for (i=0; i<n; i++) {
+            *lpb++ = rgba[i][BCOMP];
+            *lpb++ = rgba[i][GCOMP];
+            *lpb++ = rgba[i][RCOMP];
+	    }
+    }
+}
+
+
+/* Write a horizontal span of RGB color pixels with a boolean mask. */
+static void write_rgb_span_24(const GLcontext *ctx, 
+			      struct gl_renderbuffer *rb, 
+			      GLuint n, GLint x, GLint y,
+			      const GLubyte rgb[][3], 
+			      const GLubyte mask[] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    GLuint i;
+    LPBYTE lpb;
+
+    (void) ctx;
+    
+    y=FLIP(y);
+    lpb = ((LPBYTE)(pwfb->pbPixels + pwfb->ScanWidth * y)) + (3 * x);
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i]) {
+            lpb[3*i] = rgb[i][BCOMP];
+            lpb[3*i+1] = rgb[i][GCOMP];
+            lpb[3*i+2] = rgb[i][RCOMP];
+	    }
+    }
+    else {
+    	for (i=0; i<n; i++) {
+    		*lpb++ = rgb[i][BCOMP];
+    		*lpb++ = rgb[i][GCOMP];
+    		*lpb++ = rgb[i][RCOMP];
+    	}
+    }
+}
+
+/*
+ * Write a horizontal span of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_span_24(const GLcontext *ctx, 
+				    struct gl_renderbuffer *rb,
+				    GLuint n, GLint x, GLint y,
+				    const GLchan color[4], 
+				    const GLubyte mask[])
+{
+    LPBYTE lpb;
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    lpb = ((LPBYTE)(pwfb->pbPixels + pwfb->ScanWidth * y)) + (3 * x);
+    y=FLIP(y);
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i]) {
+	    	lpb[3*i] = color[BCOMP];
+	    	lpb[3*i+1] = color[GCOMP];
+	    	lpb[3*i+2] = color[RCOMP];
+	    }
+    }
+    else
+	for (i=0; i<n; i++) {
+		*lpb++ = color[BCOMP];
+		*lpb++ = color[GCOMP];
+		*lpb++ = color[RCOMP];		
+	}
+}
+
+/* Write an array of RGBA pixels with a boolean mask. */
+static void write_rgba_pixels_24(const GLcontext *ctx, 
+				 struct gl_renderbuffer *rb,
+				 GLuint n, const GLint x[], const GLint y[],
+				 const GLubyte rgba[][4], 
+				 const GLubyte mask[])
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    WMSETPIXEL24(pwfb, FLIP(y[i]), x[i],
+			 rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+}
+
+/*
+ * Write an array of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_pixels_24(const GLcontext *ctx, 
+				      struct gl_renderbuffer *rb,
+				      GLuint n,
+				      const GLint x[], const GLint y[],
+				      const GLchan color[4],
+				      const GLubyte mask[])
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    WMSETPIXEL24(pwfb, FLIP(y[i]),x[i],color[RCOMP],
+			 color[GCOMP], color[BCOMP]);
+}
+
+/* Read a horizontal span of color pixels. */
+static void read_rgba_span_24(const GLcontext *ctx, 
+			      struct gl_renderbuffer *rb,
+			      GLuint n, GLint x, GLint y,
+			      GLubyte rgba[][4] )
+{
+    GLuint i;
+    LPBYTE lpb;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    
+    y = FLIP(y);
+    lpb = ((LPBYTE)(pwfb->pbPixels + pwfb->ScanWidth * y)) + (3 * x);
+    for (i=0; i<n; i++) {
+	rgba[i][RCOMP] = lpb[3*i+2];
+	rgba[i][GCOMP] = lpb[3*i+1];
+	rgba[i][BCOMP] = lpb[3*i];
+	rgba[i][ACOMP] = 255;
+    }
+}
+
+
+/* Read an array of color pixels. */
+static void read_rgba_pixels_24(const GLcontext *ctx, 
+				struct gl_renderbuffer *rb,
+				GLuint n, const GLint x[], const GLint y[],
+				GLubyte rgba[][4])
+{
+    GLuint i;
+    LPBYTE lpb;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+
+    for (i=0; i<n; i++) {
+	GLint y2 = FLIP(y[i]);
+	lpb = ((LPBYTE)(pwfb->pbPixels + pwfb->ScanWidth * y2)) + (3 * x[i]);
+	rgba[i][RCOMP] = lpb[3*i+2];
+	rgba[i][GCOMP] = lpb[3*i+1];
+	rgba[i][BCOMP] = lpb[3*i];
+	rgba[i][ACOMP] = 255;
+  }
+}
+
+
+/*********************************************************************/
+
+/* DOUBLE BUFFER 16-bit */
+
+#define WMSETPIXEL16(pwc, y, x, r, g, b) { \
+LPWORD lpw = ((LPWORD)((pwc)->pbPixels + (pwc)->ScanWidth * (y)) + (x)); \
+*lpw = BGR16((r),(g),(b)); }
+
+
+
+/* Write a horizontal span of RGBA color pixels with a boolean mask. */
+static void write_rgba_span_16(const GLcontext *ctx, 
+			       struct gl_renderbuffer *rb, 
+			       GLuint n, GLint x, GLint y,
+			       const GLubyte rgba[][4], 
+			       const GLubyte mask[] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    GLuint i;
+    LPWORD lpw;
+
+    (void) ctx;
+    
+    y=FLIP(y);
+    lpw = ((LPWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+                lpw[i] = BGR16(rgba[i][RCOMP], rgba[i][GCOMP], 
+			       rgba[i][BCOMP]);
+    }
+    else {
+	for (i=0; i<n; i++)
+                *lpw++ = BGR16(rgba[i][RCOMP], rgba[i][GCOMP], 
+			       rgba[i][BCOMP]);
+    }
+}
+
+
+/* Write a horizontal span of RGB color pixels with a boolean mask. */
+static void write_rgb_span_16(const GLcontext *ctx, 
+			      struct gl_renderbuffer *rb, 
+			      GLuint n, GLint x, GLint y,
+			      const GLubyte rgb[][3], 
+			      const GLubyte mask[] )
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    GLuint i;
+    LPWORD lpw;
+
+    (void) ctx;
+    
+    y=FLIP(y);
+    lpw = ((LPWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+                lpw[i] = BGR16(rgb[i][RCOMP], rgb[i][GCOMP], 
+			       rgb[i][BCOMP]);
+    }
+    else {
+	for (i=0; i<n; i++)
+                *lpw++ = BGR16(rgb[i][RCOMP], rgb[i][GCOMP], 
+			       rgb[i][BCOMP]);
+    }
+}
+
+/*
+ * Write a horizontal span of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_span_16(const GLcontext *ctx, 
+				    struct gl_renderbuffer *rb,
+				    GLuint n, GLint x, GLint y,
+				    const GLchan color[4], 
+				    const GLubyte mask[])
+{
+    LPWORD lpw;
+    WORD pixel;
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    (void) ctx;
+    lpw = ((LPWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    y=FLIP(y);
+    pixel = BGR16(color[RCOMP], color[GCOMP], color[BCOMP]);
+    if (mask) {
+	for (i=0; i<n; i++)
+	    if (mask[i])
+                lpw[i] = pixel;
+    }
+    else
+	for (i=0; i<n; i++)
+                *lpw++ = pixel;
+
+}
+
+/* Write an array of RGBA pixels with a boolean mask. */
+static void write_rgba_pixels_16(const GLcontext *ctx, 
+				 struct gl_renderbuffer *rb,
+				 GLuint n, const GLint x[], const GLint y[],
+				 const GLubyte rgba[][4], 
+				 const GLubyte mask[])
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    (void) ctx;
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    WMSETPIXEL16(pwfb, FLIP(y[i]), x[i],
+			 rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+}
+
+/*
+ * Write an array of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_pixels_16(const GLcontext *ctx, 
+				      struct gl_renderbuffer *rb,
+				      GLuint n,
+				      const GLint x[], const GLint y[],
+				      const GLchan color[4],
+				      const GLubyte mask[])
+{
+    GLuint i;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    (void) ctx;
+    for (i=0; i<n; i++)
+	if (mask[i])
+	    WMSETPIXEL16(pwfb, FLIP(y[i]),x[i],color[RCOMP],
+			 color[GCOMP], color[BCOMP]);
+}
+
+/* Read a horizontal span of color pixels. */
+static void read_rgba_span_16(const GLcontext *ctx, 
+			      struct gl_renderbuffer *rb,
+			      GLuint n, GLint x, GLint y,
+			      GLubyte rgba[][4] )
+{
+    GLuint i, pixel;
+    LPWORD lpw;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+    
+    y = FLIP(y);
+    lpw = ((LPWORD)(pwfb->pbPixels + pwfb->ScanWidth * y)) + x;
+    for (i=0; i<n; i++) {
+	pixel = lpw[i];
+	/* Windows uses 5,5,5 for 16-bit */
+	rgba[i][RCOMP] = (pixel & 0x7c00) >> 7;
+	rgba[i][GCOMP] = (pixel & 0x03e0) >> 2;
+	rgba[i][BCOMP] = (pixel & 0x001f) << 3;
+	rgba[i][ACOMP] = 255;
+    }
+}
+
+
+/* Read an array of color pixels. */
+static void read_rgba_pixels_16(const GLcontext *ctx, 
+				struct gl_renderbuffer *rb,
+				GLuint n, const GLint x[], const GLint y[],
+				GLubyte rgba[][4])
+{
+    GLuint i, pixel;
+    LPWORD lpw;
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(ctx->DrawBuffer);
+
+    for (i=0; i<n; i++) {
+	GLint y2 = FLIP(y[i]);
+	lpw = ((LPWORD)(pwfb->pbPixels + pwfb->ScanWidth * y2)) + x[i];
+	pixel = *lpw;
+	/* Windows uses 5,5,5 for 16-bit */
+	rgba[i][RCOMP] = (pixel & 0x7c00) >> 7;
+	rgba[i][GCOMP] = (pixel & 0x03e0) >> 2;
+	rgba[i][BCOMP] = (pixel & 0x001f) << 3;
+	rgba[i][ACOMP] = 255;
+  }
+}
+
+
+
+
+/**********************************************************************/
+/*****                   BUFFER Functions                         *****/
+/**********************************************************************/
+
+
+
+
+static void
+wmesa_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+    free(rb);
+}
+
+
+/**
+ * This is called by Mesa whenever it determines that the window size
+ * has changed.  Do whatever's needed to cope with that.
+ */
+static GLboolean
+wmesa_renderbuffer_storage(GLcontext *ctx, 
+			   struct gl_renderbuffer *rb,
+			   GLenum internalFormat, 
+			   GLuint width, 
+			   GLuint height)
+{
+    rb->Width = width;
+    rb->Height = height;
+    return GL_TRUE;
+}
+
+
+/**
+ * Plug in the Get/PutRow/Values functions for a renderbuffer depending
+ * on if we're drawing to the front or back color buffer.
+ */
+void wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat,
+                                  int cColorBits, int double_buffer)
+{
+    if (double_buffer) {
+        /* back buffer */
+	/* Picking the correct span functions is important because
+	 * the DIB was allocated with the indicated depth. */
+	switch(pixelformat) {
+	case PF_5R6G5B:
+	    rb->PutRow = write_rgba_span_16;
+	    rb->PutRowRGB = write_rgb_span_16;
+	    rb->PutMonoRow = write_mono_rgba_span_16;
+	    rb->PutValues = write_rgba_pixels_16;
+	    rb->PutMonoValues = write_mono_rgba_pixels_16;
+	    rb->GetRow = read_rgba_span_16;
+	    rb->GetValues = read_rgba_pixels_16;
+	    break;
+	case PF_8R8G8B:
+		if (cColorBits == 24)
+		{
+		    rb->PutRow = write_rgba_span_24;
+		    rb->PutRowRGB = write_rgb_span_24;
+		    rb->PutMonoRow = write_mono_rgba_span_24;
+		    rb->PutValues = write_rgba_pixels_24;
+		    rb->PutMonoValues = write_mono_rgba_pixels_24;
+		    rb->GetRow = read_rgba_span_24;
+		    rb->GetValues = read_rgba_pixels_24;
+		}
+		else
+		{
+	        rb->PutRow = write_rgba_span_32;
+	        rb->PutRowRGB = write_rgb_span_32;
+	        rb->PutMonoRow = write_mono_rgba_span_32;
+	        rb->PutValues = write_rgba_pixels_32;
+	        rb->PutMonoValues = write_mono_rgba_pixels_32;
+	        rb->GetRow = read_rgba_span_32;
+	        rb->GetValues = read_rgba_pixels_32;
+		}
+	    break;
+	default:
+	    break;
+	}
+    }
+    else {
+        /* front buffer (actual Windows window) */
+	rb->PutRow = write_rgba_span_front;
+	rb->PutRowRGB = write_rgb_span_front;
+	rb->PutMonoRow = write_mono_rgba_span_front;
+	rb->PutValues = write_rgba_pixels_front;
+	rb->PutMonoValues = write_mono_rgba_pixels_front;
+	rb->GetRow = read_rgba_span_front;
+	rb->GetValues = read_rgba_pixels_front;
+    }
+}
+
+/**
+ * Called by ctx->Driver.ResizeBuffers()
+ * Resize the front/back colorbuffers to match the latest window size.
+ */
+static void
+wmesa_resize_buffers(GLcontext *ctx, GLframebuffer *buffer,
+                     GLuint width, GLuint height)
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_framebuffer(buffer);
+
+    if (pwfb->Base.Width != width || pwfb->Base.Height != height) {
+	/* Realloc back buffer */
+	if (ctx->Visual.doubleBufferMode == 1) {
+	    wmDeleteBackingStore(pwfb);
+	    wmCreateBackingStore(pwfb, width, height);
+	}
+    }
+    _mesa_resize_framebuffer(ctx, buffer, width, height);
+}
+
+
+/**
+ * Called by glViewport.
+ * This is a good time for us to poll the current window size and adjust
+ * our renderbuffers to match the current window size.
+ * Remember, we have no opportunity to respond to conventional
+ * resize events since the driver has no event loop.
+ * Thus, we poll.
+ * MakeCurrent also ends up making a call here, so that ensures
+ * we get the viewport set correctly, even if the app does not call
+ * glViewport and relies on the defaults.
+ */
+static void wmesa_viewport(GLcontext *ctx, 
+			   GLint x, GLint y, 
+			   GLsizei width, GLsizei height)
+{
+    WMesaContext pwc = wmesa_context(ctx);
+    GLuint new_width, new_height;
+
+    wmesa_get_buffer_size(ctx->WinSysDrawBuffer, &new_width, &new_height);
+
+    /**
+     * Resize buffers if the window size changed.
+     */
+    wmesa_resize_buffers(ctx, ctx->WinSysDrawBuffer, new_width, new_height);
+    ctx->NewState |= _NEW_BUFFERS;  /* to update scissor / window bounds */
+}
+
+
+
+
+/**
+ * Called when the driver should update it's state, based on the new_state
+ * flags.
+ */
+static void wmesa_update_state(GLcontext *ctx, GLuint new_state)
+{
+    _swrast_InvalidateState(ctx, new_state);
+    _swsetup_InvalidateState(ctx, new_state);
+    _vbo_InvalidateState(ctx, new_state);
+    _tnl_InvalidateState(ctx, new_state);
+
+    /* TODO - This code is not complete yet because I 
+     * don't know what to do for all state updates.
+     */
+
+    if (new_state & _NEW_BUFFERS) {
+    }
+}
+
+
+
+
+
+/**********************************************************************/
+/*****                   WMESA Functions                          *****/
+/**********************************************************************/
+
+WMesaContext WMesaCreateContext(HDC hDC, 
+				HPALETTE* Pal,
+				GLboolean rgb_flag,
+				GLboolean db_flag,
+				GLboolean alpha_flag)
+{
+    WMesaContext c;
+    struct dd_function_table functions;
+    GLint red_bits, green_bits, blue_bits, alpha_bits;
+    GLcontext *ctx;
+    GLvisual *visual;
+
+    (void) Pal;
+    
+    /* Indexed mode not supported */
+    if (!rgb_flag)
+	return NULL;
+
+    /* Allocate wmesa context */
+    c = CALLOC_STRUCT(wmesa_context);
+    if (!c)
+	return NULL;
+
+#if 0
+    /* I do not understand this contributed code */
+    /* Support memory and device contexts */
+    if(WindowFromDC(hDC) != NULL) {
+	c->hDC = GetDC(WindowFromDC(hDC)); /* huh ???? */
+    }
+    else {
+	c->hDC = hDC;
+    }
+#else
+    c->hDC = hDC;
+#endif
+
+    /* Get data for visual */
+    /* Dealing with this is actually a bit of overkill because Mesa will end
+     * up treating all color component size requests less than 8 by using 
+     * a single byte per channel.  In addition, the interface to the span
+     * routines passes colors as an entire byte per channel anyway, so there
+     * is nothing to be saved by telling the visual to be 16 bits if the device
+     * is 16 bits.  That is, Mesa is going to compute colors down to 8 bits per
+     * channel anyway.
+     * But we go through the motions here anyway.
+     */
+    switch (GetDeviceCaps(c->hDC, BITSPIXEL)) {
+    case 16:
+	red_bits = green_bits = blue_bits = 5;
+	alpha_bits = 0;
+	break;
+    default:
+	red_bits = green_bits = blue_bits = 8;
+	alpha_bits = 8;
+	break;
+    }
+    /* Create visual based on flags */
+    visual = _mesa_create_visual(db_flag,    /* db_flag */
+                                 GL_FALSE,   /* stereo */
+                                 red_bits, green_bits, blue_bits, /* color RGB */
+                                 alpha_flag ? alpha_bits : 0, /* color A */
+                                 DEFAULT_SOFTWARE_DEPTH_BITS, /* depth_bits */
+                                 8,          /* stencil_bits */
+                                 16,16,16,   /* accum RGB */
+                                 alpha_flag ? 16 : 0, /* accum A */
+                                 1);         /* num samples */
+    
+    if (!visual) {
+	free(c);
+	return NULL;
+    }
+
+    /* Set up driver functions */
+    _mesa_init_driver_functions(&functions);
+    functions.GetString = wmesa_get_string;
+    functions.UpdateState = wmesa_update_state;
+    functions.GetBufferSize = wmesa_get_buffer_size;
+    functions.Flush = wmesa_flush;
+    functions.Clear = clear;
+    functions.ClearColor = clear_color;
+    functions.ResizeBuffers = wmesa_resize_buffers;
+    functions.Viewport = wmesa_viewport;
+
+    /* initialize the Mesa context data */
+    ctx = &c->gl_ctx;
+    _mesa_initialize_context(ctx, visual, NULL, &functions, (void *)c);
+
+    /* visual no longer needed - it was copied by _mesa_initialize_context() */
+    _mesa_destroy_visual(visual);
+
+    _mesa_enable_sw_extensions(ctx);
+    _mesa_enable_1_3_extensions(ctx);
+    _mesa_enable_1_4_extensions(ctx);
+    _mesa_enable_1_5_extensions(ctx);
+    _mesa_enable_2_0_extensions(ctx);
+    _mesa_enable_2_1_extensions(ctx);
+  
+    _mesa_meta_init(ctx);
+
+    /* Initialize the software rasterizer and helper modules. */
+    if (!_swrast_CreateContext(ctx) ||
+        !_vbo_CreateContext(ctx) ||
+        !_tnl_CreateContext(ctx) ||
+	!_swsetup_CreateContext(ctx)) {
+	_mesa_free_context_data(ctx);
+	free(c);
+	return NULL;
+    }
+    _swsetup_Wakeup(ctx);
+    TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+    return c;
+}
+
+
+void WMesaDestroyContext( WMesaContext pwc )
+{
+    GLcontext *ctx = &pwc->gl_ctx;
+    WMesaFramebuffer pwfb;
+    GET_CURRENT_CONTEXT(cur_ctx);
+
+    if (cur_ctx == ctx) {
+        /* unbind current if deleting current context */
+        WMesaMakeCurrent(NULL, NULL);
+    }
+
+    /* clean up frame buffer resources */
+    pwfb = wmesa_lookup_framebuffer(pwc->hDC);
+    if (pwfb) {
+	if (ctx->Visual.doubleBufferMode == 1)
+	    wmDeleteBackingStore(pwfb);
+	wmesa_free_framebuffer(pwc->hDC);
+    }
+
+    /* Release for device, not memory contexts */
+    if (WindowFromDC(pwc->hDC) != NULL)
+    {
+      ReleaseDC(WindowFromDC(pwc->hDC), pwc->hDC);
+    }
+    DeleteObject(pwc->clearPen); 
+    DeleteObject(pwc->clearBrush); 
+    
+    _mesa_meta_free(ctx);
+
+    _swsetup_DestroyContext(ctx);
+    _tnl_DestroyContext(ctx);
+    _vbo_DestroyContext(ctx);
+    _swrast_DestroyContext(ctx);
+    
+    _mesa_free_context_data(ctx);
+    free(pwc);
+}
+
+
+/**
+ * Create a new color renderbuffer.
+ */
+struct gl_renderbuffer *
+wmesa_new_renderbuffer(void)
+{
+    struct gl_renderbuffer *rb = CALLOC_STRUCT(gl_renderbuffer);
+    if (!rb)
+        return NULL;
+
+    _mesa_init_renderbuffer(rb, (GLuint)0);
+    
+    rb->_BaseFormat = GL_RGBA;
+    rb->InternalFormat = GL_RGBA;
+    rb->DataType = CHAN_TYPE;
+    rb->Delete = wmesa_delete_renderbuffer;
+    rb->AllocStorage = wmesa_renderbuffer_storage;
+    return rb;
+}
+
+
+void WMesaMakeCurrent(WMesaContext c, HDC hdc)
+{
+    WMesaFramebuffer pwfb;
+
+    {
+        /* return if already current */
+        GET_CURRENT_CONTEXT(ctx);
+        WMesaContext pwc = wmesa_context(ctx);
+        if (pwc && c == pwc && pwc->hDC == hdc)
+            return;
+    }
+
+    pwfb = wmesa_lookup_framebuffer(hdc);
+
+    /* Lazy creation of framebuffers */
+    if (c && !pwfb && hdc) {
+        struct gl_renderbuffer *rb;
+        GLvisual *visual = &c->gl_ctx.Visual;
+        GLuint width, height;
+
+        get_window_size(hdc, &width, &height);
+
+	c->clearPen = CreatePen(PS_SOLID, 1, 0); 
+	c->clearBrush = CreateSolidBrush(0); 
+
+        pwfb = wmesa_new_framebuffer(hdc, visual);
+
+	/* Create back buffer if double buffered */
+	if (visual->doubleBufferMode == 1) {
+	    wmCreateBackingStore(pwfb, width, height);
+	}
+	
+        /* make render buffers */
+        if (visual->doubleBufferMode == 1) {
+            rb = wmesa_new_renderbuffer();
+            _mesa_add_renderbuffer(&pwfb->Base, BUFFER_BACK_LEFT, rb);
+            wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 1);
+	}
+        rb = wmesa_new_renderbuffer();
+        _mesa_add_renderbuffer(&pwfb->Base, BUFFER_FRONT_LEFT, rb);
+        wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 0);
+
+	/* Let Mesa own the Depth, Stencil, and Accum buffers */
+        _mesa_add_soft_renderbuffers(&pwfb->Base,
+                                     GL_FALSE, /* color */
+                                     visual->depthBits > 0,
+                                     visual->stencilBits > 0,
+                                     visual->accumRedBits > 0,
+                                     visual->alphaBits >0, 
+                                     GL_FALSE);
+    }
+
+    if (c && pwfb)
+	_mesa_make_current(&c->gl_ctx, &pwfb->Base, &pwfb->Base);
+    else
+        _mesa_make_current(NULL, NULL, NULL);
+}
+
+
+void WMesaSwapBuffers( HDC hdc )
+{
+    GET_CURRENT_CONTEXT(ctx);
+    WMesaContext pwc = wmesa_context(ctx);
+    WMesaFramebuffer pwfb = wmesa_lookup_framebuffer(hdc);
+
+    if (!pwfb) {
+        _mesa_problem(NULL, "wmesa: swapbuffers on unknown hdc");
+        return;
+    }
+
+    /* If we're swapping the buffer associated with the current context
+     * we have to flush any pending rendering commands first.
+     */
+    if (pwc->hDC == hdc) {
+	_mesa_notifySwapBuffers(ctx);
+
+	BitBlt(pwfb->hDC, 0, 0, pwfb->Base.Width, pwfb->Base.Height,
+	       pwfb->dib_hDC, 0, 0, SRCCOPY);
+    }
+    else {
+        /* XXX for now only allow swapping current window */
+        _mesa_problem(NULL, "wmesa: can't swap non-current window");
+    }
+}
+
+void WMesaShareLists(WMesaContext ctx_to_share, WMesaContext ctx)
+{
+	_mesa_share_state(&ctx->gl_ctx, &ctx_to_share->gl_ctx);	
+}
+
diff --git a/src/mesa/drivers/windows/gdi/wmesadef.h b/src/mesa/drivers/windows/gdi/wmesadef.h
new file mode 100644
index 0000000000..1c0e245111
--- /dev/null
+++ b/src/mesa/drivers/windows/gdi/wmesadef.h
@@ -0,0 +1,43 @@
+#ifndef WMESADEF_H
+#define WMESADEF_H
+#ifdef __MINGW32__
+#include <windows.h>
+#endif
+#include "context.h"
+
+
+/**
+ * The Windows Mesa rendering context, derived from GLcontext.
+ */
+struct wmesa_context {
+    GLcontext           gl_ctx;	        /* The core GL/Mesa context */
+    HDC                 hDC;
+    COLORREF		clearColorRef;
+    HPEN                clearPen;
+    HBRUSH              clearBrush;
+};
+
+
+/**
+ * Windows framebuffer, derived from gl_framebuffer
+ */
+struct wmesa_framebuffer
+{
+    struct gl_framebuffer Base;
+    HDC                 hDC;
+    int			pixelformat;
+    GLuint		ScanWidth;
+    int			cColorBits;
+    /* back buffer DIB fields */
+    HDC                 dib_hDC;
+    BITMAPINFO          bmi;
+    HBITMAP             hbmDIB;
+    HBITMAP             hOldBitmap;
+    PBYTE               pbPixels;
+    struct wmesa_framebuffer *next;
+};
+
+typedef struct wmesa_framebuffer *WMesaFramebuffer;
+
+
+#endif /* WMESADEF_H */
diff --git a/src/mesa/drivers/windows/gldirect/ddlog.c b/src/mesa/drivers/windows/gldirect/ddlog.c
new file mode 100644
index 0000000000..4ae79e2fda
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/ddlog.c
@@ -0,0 +1,192 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Logging functions.
+*
+****************************************************************************/
+
+#define STRICT
+#include <windows.h>
+
+#include "ddlog.h"
+#include "gld_driver.h"
+
+// ***********************************************************************
+
+static char						ddlogbuf[256];
+static FILE*					fpDDLog = NULL; // Log file pointer
+static char						szDDLogName[_MAX_PATH] = {"gldirect.log"}; 	// Filename of the log
+static DDLOG_loggingMethodType	ddlogLoggingMethod = DDLOG_NONE; 	// Default to No Logging
+static DDLOG_severityType		ddlogDebugLevel;
+static BOOL						bUIWarning = FALSE;	// MessageBox warning ?
+
+// ***********************************************************************
+
+void ddlogOpen(
+	DDLOG_loggingMethodType LoggingMethod,
+	DDLOG_severityType Severity)
+{
+	if (fpDDLog != NULL) {
+		// Tried to re-open the log
+		ddlogMessage(DDLOG_WARN, "Tried to re-open the log file\n");
+		return;
+	}
+
+	ddlogLoggingMethod = LoggingMethod;
+	ddlogDebugLevel = Severity;
+
+	if (ddlogLoggingMethod == DDLOG_NORMAL) {
+		fpDDLog = fopen(szDDLogName, "wt");
+        if (fpDDLog == NULL)
+            return;
+    }
+
+	ddlogMessage(DDLOG_SYSTEM, "\n");
+	ddlogMessage(DDLOG_SYSTEM, "-> Logging Started\n");
+}
+
+// ***********************************************************************
+
+void ddlogClose()
+{
+	// Determine whether the log is already closed
+	if (fpDDLog == NULL && ddlogLoggingMethod == DDLOG_NORMAL)
+		return; // Nothing to do.
+
+	ddlogMessage(DDLOG_SYSTEM, "<- Logging Ended\n");
+
+	if (ddlogLoggingMethod == DDLOG_NORMAL) {
+		fclose(fpDDLog);
+		fpDDLog = NULL;
+	}
+}
+
+// ***********************************************************************
+
+void ddlogMessage(
+	DDLOG_severityType severity,
+	LPSTR message)
+{
+	char buf[256];
+
+	// Bail if logging is disabled
+	if (ddlogLoggingMethod == DDLOG_NONE)
+		return;
+
+	if (ddlogLoggingMethod == DDLOG_CRASHPROOF)
+		fpDDLog = fopen(szDDLogName, "at");
+
+	if (fpDDLog == NULL)
+		return;
+
+	if (severity >= ddlogDebugLevel) {
+		sprintf(buf, "DDLog: (%s) %s", ddlogSeverityMessages[severity], message);
+		fputs(buf, fpDDLog); // Write string to file
+		OutputDebugString(buf); // Echo to debugger
+	}
+
+	if (ddlogLoggingMethod == DDLOG_CRASHPROOF) {
+		fflush(fpDDLog); // Write info to disk
+		fclose(fpDDLog);
+		fpDDLog = NULL;
+	}
+
+	// Popup message box if critical error
+	if (bUIWarning && severity == DDLOG_CRITICAL) {
+		MessageBox(NULL, buf, "GLDirect", MB_OK | MB_ICONWARNING | MB_TASKMODAL);
+	}
+}
+
+// ***********************************************************************
+
+// Write a string value to the log file
+void ddlogError(
+	DDLOG_severityType severity,
+	LPSTR message,
+	HRESULT hResult)
+{
+#ifdef _USE_GLD3_WGL
+	char dxErrStr[1024];
+	_gldDriver.GetDXErrorString(hResult, &dxErrStr[0], sizeof(dxErrStr));
+	if (FAILED(hResult)) {
+		sprintf(ddlogbuf, "DDLog: %s %8x:[ %s ]\n", message, hResult, dxErrStr);
+	} else
+		sprintf(ddlogbuf, "DDLog: %s\n", message);
+#else
+	if (FAILED(hResult)) {
+		sprintf(ddlogbuf, "DDLog: %s %8x:[ %s ]\n", message, hResult, DDErrorToString(hResult));
+	} else
+		sprintf(ddlogbuf, "DDLog: %s\n", message);
+#endif
+	ddlogMessage(severity, ddlogbuf);
+}
+
+// ***********************************************************************
+
+void ddlogPrintf(
+	DDLOG_severityType severity,
+	LPSTR message,
+	...)
+{
+	va_list args;
+
+	va_start(args, message);
+	vsprintf(ddlogbuf, message, args);
+	va_end(args);
+
+	lstrcat(ddlogbuf, "\n");
+
+	ddlogMessage(severity, ddlogbuf);
+}
+
+// ***********************************************************************
+
+void ddlogWarnOption(
+	BOOL bWarnOption)
+{
+	bUIWarning = bWarnOption;
+}
+
+// ***********************************************************************
+
+void ddlogPathOption(
+	LPSTR szPath)
+{
+	char szPathName[_MAX_PATH];
+
+	strcpy(szPathName, szPath);
+    strcat(szPathName, "\\");
+    strcat(szPathName, szDDLogName);
+    strcpy(szDDLogName, szPathName);
+}
+
+// ***********************************************************************
diff --git a/src/mesa/drivers/windows/gldirect/ddlog.h b/src/mesa/drivers/windows/gldirect/ddlog.h
new file mode 100644
index 0000000000..d64067e224
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/ddlog.h
@@ -0,0 +1,109 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Logging functions.
+*
+****************************************************************************/
+
+#ifndef __DDLOG_H
+#define __DDLOG_H
+
+#include <stdio.h>
+
+#ifndef _USE_GLD3_WGL
+#include "dderrstr.h" // ddraw/d3d error string
+#endif
+
+/*---------------------- Macros and type definitions ----------------------*/
+
+typedef enum {
+	DDLOG_NONE					= 0,			// No log output
+	DDLOG_NORMAL				= 1,			// Log is kept open
+	DDLOG_CRASHPROOF			= 2,			// Log is closed and flushed
+	DDLOG_METHOD_FORCE_DWORD	= 0x7fffffff,
+} DDLOG_loggingMethodType;
+
+// Denotes type of message sent to the logging functions
+typedef enum {
+	DDLOG_INFO					= 0,			// Information only
+	DDLOG_WARN					= 1,			// Warning only
+	DDLOG_ERROR					= 2,			// Notify user of an error
+	DDLOG_CRITICAL				= 3,			// Exceptionally severe error
+	DDLOG_SYSTEM				= 4,			// System message. Not an error
+												// but must always be printed.
+	DDLOG_SEVERITY_FORCE_DWORD	= 0x7fffffff,	// Make enum dword
+} DDLOG_severityType;
+
+#ifdef _USE_GLD3_WGL
+// Synomyms
+#define GLDLOG_INFO		DDLOG_INFO
+#define GLDLOG_WARN		DDLOG_WARN
+#define GLDLOG_ERROR	DDLOG_ERROR
+#define GLDLOG_CRITICAL	DDLOG_CRITICAL
+#define GLDLOG_SYSTEM	DDLOG_SYSTEM
+#endif
+
+// The message that will be output to the log
+static const char *ddlogSeverityMessages[] = {
+	"INFO",
+	"WARN",
+	"ERROR",
+	"*CRITICAL*",
+	"System",
+};
+
+/*------------------------- Function Prototypes ---------------------------*/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+void ddlogOpen(DDLOG_loggingMethodType LoggingMethod, DDLOG_severityType Severity);
+void ddlogClose();
+void ddlogMessage(DDLOG_severityType severity, LPSTR message);
+void ddlogError(DDLOG_severityType severity, LPSTR message, HRESULT hResult);
+void ddlogPrintf(DDLOG_severityType severity, LPSTR message, ...);
+void ddlogWarnOption(BOOL bWarnOption);
+void ddlogPathOption(LPSTR szPath);
+
+#ifdef _USE_GLD3_WGL
+// Synomyms
+#define gldLogMessage	ddlogMessage
+#define gldLogError		ddlogError
+#define gldLogPrintf	ddlogPrintf
+#endif
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dglcontext.c b/src/mesa/drivers/windows/gldirect/dglcontext.c
new file mode 100644
index 0000000000..a420b36ffb
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglcontext.c
@@ -0,0 +1,2212 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Context handling.
+*
+****************************************************************************/
+
+#include "dglcontext.h"
+
+// Get compile errors without this. KeithH
+//#include "scitech.h"	// ibool, etc.
+
+#ifdef _USE_GLD3_WGL
+#include "gld_driver.h"
+
+extern void _gld_mesa_warning(GLcontext *, char *);
+extern void _gld_mesa_fatal(GLcontext *, char *);
+#endif // _USE_GLD3_WGL
+
+// TODO: Clean out old DX6-specific code from GLD 2.x CAD driver
+// if it is no longer being built as part of GLDirect. (DaveM)
+
+// ***********************************************************************
+
+#define GLDERR_NONE     0
+#define GLDERR_MEM      1
+#define GLDERR_DDRAW    2
+#define GLDERR_D3D      3
+#define GLDERR_BPP      4
+
+char szResourceWarning[] =
+"GLDirect does not have enough video memory resources\n"
+"to support the requested OpenGL rendering context.\n\n"
+"You may have to reduce the current display resolution\n"
+"to obtain satisfactory OpenGL performance.\n";
+
+char szDDrawWarning[] =
+"GLDirect is unable to initialize DirectDraw for the\n"
+"requested OpenGL rendering context.\n\n"
+"You will have to check the DirectX control panel\n"
+"for further information.\n";
+
+char szD3DWarning[] =
+"GLDirect is unable to initialize Direct3D for the\n"
+"requested OpenGL rendering context.\n\n"
+"You may have to change the display mode resolution\n"
+"color depth or check the DirectX control panel for\n"
+"further information.\n";
+
+char szBPPWarning[] =
+"GLDirect is unable to use the selected color depth for\n"
+"the requested OpenGL rendering context.\n\n"
+"You will have to change the display mode resolution\n"
+"color depth with the Display Settings control panel.\n";
+
+int nContextError = GLDERR_NONE;
+
+// ***********************************************************************
+
+#define VENDORID_ATI 0x1002
+
+static DWORD devATIRagePro[] = {
+	0x4742, // 3D RAGE PRO BGA AGP 1X/2X
+	0x4744, // 3D RAGE PRO BGA AGP 1X only
+	0x4749, // 3D RAGE PRO BGA PCI 33 MHz
+	0x4750, // 3D RAGE PRO PQFP PCI 33 MHz
+	0x4751, // 3D RAGE PRO PQFP PCI 33 MHz limited 3D
+	0x4C42, // 3D RAGE LT PRO BGA-312 AGP 133 MHz
+	0x4C44, // 3D RAGE LT PRO BGA-312 AGP 66 MHz
+	0x4C49, // 3D RAGE LT PRO BGA-312 PCI 33 MHz
+	0x4C50, // 3D RAGE LT PRO BGA-256 PCI 33 MHz
+	0x4C51, // 3D RAGE LT PRO BGA-256 PCI 33 MHz limited 3D
+};
+
+static DWORD devATIRageIIplus[] = {
+	0x4755, // 3D RAGE II+
+	0x4756, // 3D RAGE IIC PQFP PCI
+	0x4757, // 3D RAGE IIC BGA AGP
+	0x475A, // 3D RAGE IIC PQFP AGP
+	0x4C47, // 3D RAGE LT-G
+};
+
+// ***********************************************************************
+
+#ifndef _USE_GLD3_WGL
+extern DGL_mesaFuncs mesaFuncs;
+#endif
+
+extern DWORD dwLogging;
+
+#ifdef GLD_THREADS
+#pragma message("compiling DGLCONTEXT.C vars for multi-threaded support")
+CRITICAL_SECTION CriticalSection;		// for serialized access
+DWORD		dwTLSCurrentContext = 0xFFFFFFFF;	// TLS index for current context
+DWORD		dwTLSPixelFormat = 0xFFFFFFFF;		// TLS index for current pixel format
+#endif
+HGLRC		iCurrentContext = 0;		// Index of current context (static)
+BOOL		bContextReady = FALSE;		// Context state ready ?
+
+DGL_ctx		ctxlist[DGL_MAX_CONTEXTS];	// Context list
+
+// ***********************************************************************
+
+static BOOL bHaveWin95 = FALSE;
+static BOOL bHaveWinNT = FALSE;
+static BOOL bHaveWin2K = FALSE;
+
+/****************************************************************************
+REMARKS:
+Detect the installed OS type.
+****************************************************************************/
+static void DetectOS(void)
+{
+    OSVERSIONINFO VersionInformation;
+    LPOSVERSIONINFO lpVersionInformation = &VersionInformation;
+
+    VersionInformation.dwOSVersionInfoSize = sizeof(VersionInformation);
+
+	GetVersionEx(lpVersionInformation);
+
+    switch (VersionInformation.dwPlatformId) {
+    	case VER_PLATFORM_WIN32_WINDOWS:
+			bHaveWin95 = TRUE;
+			bHaveWinNT = FALSE;
+			bHaveWin2K = FALSE;
+            break;
+    	case VER_PLATFORM_WIN32_NT:
+			bHaveWin95 = FALSE;
+			if (VersionInformation.dwMajorVersion <= 4) {
+				bHaveWinNT = TRUE;
+				bHaveWin2K = FALSE;
+                }
+            else {
+				bHaveWinNT = FALSE;
+				bHaveWin2K = TRUE;
+                }
+			break;
+		case VER_PLATFORM_WIN32s:
+			bHaveWin95 = FALSE;
+			bHaveWinNT = FALSE;
+			bHaveWin2K = FALSE;
+			break;
+        }
+}
+
+// ***********************************************************************
+
+HWND hWndEvent = NULL;					// event monitor window
+HWND hWndLastActive = NULL;				// last active client window
+LONG __stdcall GLD_EventWndProc(HWND hwnd,UINT msg,WPARAM wParam,LPARAM lParam);
+
+// ***********************************************************************
+
+// Checks if the HGLRC is valid in range of context list.
+BOOL dglIsValidContext(
+	HGLRC a)
+{
+	return ((int)a > 0 && (int)a <= DGL_MAX_CONTEXTS);
+}
+
+// ***********************************************************************
+
+// Convert a HGLRC to a pointer into the context list.
+DGL_ctx* dglGetContextAddress(
+	const HGLRC a)
+{
+	if (dglIsValidContext(a))
+		return &ctxlist[(int)a-1];
+	return NULL;
+}
+
+// ***********************************************************************
+
+// Return the current HGLRC (however it may be stored for multi-threading).
+HGLRC dglGetCurrentContext(void)
+{
+#ifdef GLD_THREADS
+	HGLRC hGLRC;
+	// load from thread-specific instance
+	if (glb.bMultiThreaded) {
+		// protect against calls from arbitrary threads
+		__try {
+			hGLRC = (HGLRC)TlsGetValue(dwTLSCurrentContext);
+		}
+		__except(EXCEPTION_EXECUTE_HANDLER) {
+			hGLRC = iCurrentContext;
+		}
+	}
+	// load from global static var
+	else {
+		hGLRC = iCurrentContext;
+	}
+	return hGLRC;
+#else
+	return iCurrentContext;
+#endif
+}
+
+// ***********************************************************************
+
+// Set the current HGLRC (however it may be stored for multi-threading).
+void dglSetCurrentContext(HGLRC hGLRC)
+{
+#ifdef GLD_THREADS
+	// store in thread-specific instance
+	if (glb.bMultiThreaded) {
+		// protect against calls from arbitrary threads
+		__try {
+			TlsSetValue(dwTLSCurrentContext, (LPVOID)hGLRC);
+		}
+		__except(EXCEPTION_EXECUTE_HANDLER) {
+			iCurrentContext = hGLRC;
+		}
+	}
+	// store in global static var
+	else {
+		iCurrentContext = hGLRC;
+	}
+#else
+	iCurrentContext = hGLRC;
+#endif
+}
+
+// ***********************************************************************
+
+// Return the current HDC only for a currently active HGLRC.
+HDC dglGetCurrentDC(void)
+{
+	HGLRC hGLRC;
+	DGL_ctx* lpCtx;
+
+	hGLRC = dglGetCurrentContext();
+	if (hGLRC) {
+		lpCtx = dglGetContextAddress(hGLRC);
+		return lpCtx->hDC;
+	}
+	return 0;
+}
+
+// ***********************************************************************
+
+void dglInitContextState()
+{
+	int i;
+	WNDCLASS wc;
+
+#ifdef GLD_THREADS
+	// Allocate thread local storage indexes for current context and pixel format
+	dwTLSCurrentContext = TlsAlloc();
+	dwTLSPixelFormat = TlsAlloc();
+#endif
+
+	dglSetCurrentContext(NULL); // No current rendering context
+
+	 // Clear all context data
+	ZeroMemory(ctxlist, sizeof(ctxlist[0]) * DGL_MAX_CONTEXTS);
+
+	for (i=0; i<DGL_MAX_CONTEXTS; i++)
+		ctxlist[i].bAllocated = FALSE; // Flag context as unused
+
+	// This section of code crashes the dll in circumstances where the app
+	// creates and destroys contexts.
+/*
+	// Register the class for our event monitor window
+	wc.style = 0;
+	wc.lpfnWndProc = GLD_EventWndProc;
+	wc.cbClsExtra = 0;
+	wc.cbWndExtra = 0;
+	wc.hInstance = GetModuleHandle(NULL);
+	wc.hIcon = LoadIcon(GetModuleHandle(NULL), IDI_APPLICATION);
+	wc.hCursor = LoadCursor(NULL, IDC_ARROW);
+	wc.hbrBackground = (HBRUSH)GetStockObject(BLACK_BRUSH);
+	wc.lpszMenuName = NULL;
+	wc.lpszClassName = "GLDIRECT";
+	RegisterClass(&wc);
+
+	// Create the non-visible window to monitor all broadcast messages
+	hWndEvent = CreateWindowEx(
+		WS_EX_TOOLWINDOW,"GLDIRECT","GLDIRECT",WS_POPUP,
+		0,0,0,0,
+		NULL,NULL,GetModuleHandle(NULL),NULL);
+*/
+
+#ifdef GLD_THREADS
+	// Create a critical section object for serializing access to
+	// DirectDraw and DDStereo create/destroy functions in multiple threads
+	if (glb.bMultiThreaded)
+		InitializeCriticalSection(&CriticalSection);
+#endif
+
+	// Context state is now initialized and ready
+	bContextReady = TRUE;
+}
+
+// ***********************************************************************
+
+void dglDeleteContextState()
+{
+	int i;
+	static BOOL bOnceIsEnough = FALSE;
+
+	// Only call once, from either DGL_exitDriver(), or DLL_PROCESS_DETACH
+	if (bOnceIsEnough)
+		return;
+	bOnceIsEnough = TRUE;
+
+	for (i=0; i<DGL_MAX_CONTEXTS; i++) {
+		if (ctxlist[i].bAllocated == TRUE) {
+			ddlogPrintf(DDLOG_WARN, "** Context %i not deleted - cleaning up.", (i+1));
+			dglDeleteContext((HGLRC)(i+1));
+		}
+	}
+
+	// Context state is no longer ready
+	bContextReady = FALSE;
+
+    // If executed when DLL unloads, DDraw objects may be invalid.
+    // So catch any page faults with this exception handler.
+__try {
+
+	// Release final DirectDraw interfaces
+	if (glb.bDirectDrawPersistant) {
+//		RELEASE(glb.lpGlobalPalette);
+//		RELEASE(glb.lpDepth4);
+//		RELEASE(glb.lpBack4);
+//		RELEASE(glb.lpPrimary4);
+//	    RELEASE(glb.lpDD4);
+    }
+}
+__except(EXCEPTION_EXECUTE_HANDLER) {
+    ddlogPrintf(DDLOG_WARN, "Exception raised in dglDeleteContextState.");
+}
+
+	// Destroy our event monitor window
+	if (hWndEvent) {
+		DestroyWindow(hWndEvent);
+		hWndEvent = hWndLastActive = NULL;
+	}
+
+#ifdef GLD_THREADS
+	// Destroy the critical section object
+	if (glb.bMultiThreaded)
+		DeleteCriticalSection(&CriticalSection);
+
+	// Release thread local storage indexes for current HGLRC and pixel format
+	TlsFree(dwTLSPixelFormat);
+	TlsFree(dwTLSCurrentContext);
+#endif
+}
+
+// ***********************************************************************
+
+// Application Window message handler interception
+static LONG __stdcall dglWndProc(
+	HWND hwnd,
+	UINT msg,
+	WPARAM wParam,
+	LPARAM lParam)
+{
+	DGL_ctx* 	lpCtx = NULL;
+	LONG 		lpfnWndProc = 0L;
+	int  		i;
+	HGLRC 		hGLRC;
+	RECT 		rect;
+	PAINTSTRUCT	ps;
+    BOOL        bQuit = FALSE;
+    BOOL        bMain = FALSE;
+    LONG        rc;
+
+    // Get the window's message handler *before* it is unhooked in WM_DESTROY
+
+    // Is this the main window?
+    if (hwnd == glb.hWndActive) {
+        bMain = TRUE;
+        lpfnWndProc = glb.lpfnWndProc;
+    }
+    // Search for DGL context matching window handle
+    for (i=0; i<DGL_MAX_CONTEXTS; i++) {
+	    if (ctxlist[i].hWnd == hwnd) {
+	        lpCtx = &ctxlist[i];
+	        lpfnWndProc = lpCtx->lpfnWndProc;
+		    break;
+        }
+    }
+	// Not one of ours...
+	if (!lpfnWndProc)
+	    return DefWindowProc(hwnd, msg, wParam, lParam);
+
+    // Intercept messages amd process *before* passing on to window
+	switch (msg) {
+#ifdef _USE_GLD3_WGL
+	case WM_DISPLAYCHANGE:
+		glb.bPixelformatsDirty = TRUE;
+		break;
+#endif
+	case WM_ACTIVATEAPP:
+		glb.bAppActive = (BOOL)wParam;
+		ddlogPrintf(DDLOG_INFO, "Calling app has been %s", glb.bAppActive ? "activated" : "de-activated");
+		break;
+	case WM_ERASEBKGND:
+		// Eat the GDI erase event for the GL window
+        if (!lpCtx || !lpCtx->bHasBeenCurrent)
+            break;
+		lpCtx->bGDIEraseBkgnd = TRUE;
+		return TRUE;
+	case WM_PAINT:
+		// Eat the invalidated update region if render scene is in progress
+        if (!lpCtx || !lpCtx->bHasBeenCurrent)
+            break;
+		if (lpCtx->bFrameStarted) {
+			if (GetUpdateRect(hwnd, &rect, FALSE)) {
+				BeginPaint(hwnd, &ps);
+				EndPaint(hwnd, &ps);
+				ValidateRect(hwnd, &rect);
+				return TRUE;
+				}
+			}
+		break;
+	}
+	// Call the appropriate window message handler
+	rc = CallWindowProc((WNDPROC)lpfnWndProc, hwnd, msg, wParam, lParam);
+
+    // Intercept messages and process *after* passing on to window
+	switch (msg) {
+    case WM_QUIT:
+	case WM_DESTROY:
+        bQuit = TRUE;
+		if (lpCtx && lpCtx->bAllocated) {
+			ddlogPrintf(DDLOG_WARN, "WM_DESTROY detected for HWND=%X, HDC=%X, HGLRC=%d", hwnd, lpCtx->hDC, i+1);
+			dglDeleteContext((HGLRC)(i+1));
+		}
+		break;
+#if 0
+	case WM_SIZE:
+		// Resize surfaces to fit window but not viewport (in case app did not bother)
+        if (!lpCtx || !lpCtx->bHasBeenCurrent)
+            break;
+		w = LOWORD(lParam);
+		h = HIWORD(lParam);
+		if (lpCtx->dwWidth < w || lpCtx->dwHeight < h) {
+			if (!dglWglResizeBuffers(lpCtx->glCtx, TRUE))
+                 dglWglResizeBuffers(lpCtx->glCtx, FALSE);
+        }
+		break;
+#endif
+    }
+
+    // If the main window is quitting, then so should we...
+    if (bMain && bQuit) {
+		ddlogPrintf(DDLOG_SYSTEM, "shutting down after WM_DESTROY detected for main HWND=%X", hwnd);
+        dglDeleteContextState();
+        dglExitDriver();
+    }
+
+    return rc;
+}
+
+// ***********************************************************************
+
+// Driver Window message handler
+static LONG __stdcall GLD_EventWndProc(
+	HWND hwnd,
+	UINT msg,
+	WPARAM wParam,
+	LPARAM lParam)
+{
+	switch (msg) {
+        // May be sent by splash screen dialog on exit
+        case WM_ACTIVATE:
+            if (LOWORD(wParam) == WA_ACTIVE && glb.hWndActive) {
+                SetForegroundWindow(glb.hWndActive);
+                return 0;
+                }
+            break;
+	}
+	return DefWindowProc(hwnd, msg, wParam, lParam);
+}
+
+// ***********************************************************************
+
+// Intercepted Keyboard handler for detecting hot keys.
+LRESULT CALLBACK dglKeyProc(
+	int code,
+	WPARAM wParam,
+	LPARAM lParam)
+{
+	HWND hWnd, hWndFrame;
+	HGLRC hGLRC = NULL;
+	DGL_ctx* lpCtx = NULL;
+	int cmd = 0, dx1 = 0, dx2 = 0, i;
+	static BOOL bAltPressed = FALSE;
+	static BOOL bCtrlPressed = FALSE;
+	static BOOL bShiftPressed = FALSE;
+    RECT r, rf, rc;
+    POINT pt;
+    BOOL bForceReshape = FALSE;
+
+	return CallNextHookEx(hKeyHook, code, wParam, lParam);
+}
+
+// ***********************************************************************
+
+HWND hWndMatch;
+
+// Window handle enumeration procedure.
+BOOL CALLBACK dglEnumChildProc(
+    HWND hWnd,
+    LPARAM lParam)
+{
+    RECT rect;
+
+    // Find window handle with matching client rect.
+    GetClientRect(hWnd, &rect);
+    if (EqualRect(&rect, (RECT*)lParam)) {
+        hWndMatch = hWnd;
+        return FALSE;
+        }
+    // Continue with next child window.
+    return TRUE;
+}
+
+// ***********************************************************************
+
+// Find window handle with matching client rect.
+HWND dglFindWindowRect(
+    RECT* pRect)
+{
+    hWndMatch = NULL;
+    EnumChildWindows(GetForegroundWindow(), dglEnumChildProc, (LPARAM)pRect);
+    return hWndMatch;
+}
+
+// ***********************************************************************
+#ifndef _USE_GLD3_WGL
+void dglChooseDisplayMode(
+	DGL_ctx *lpCtx)
+{
+	// Note: Choose an exact match if possible.
+
+	int				i;
+	DWORD			area;
+	DWORD			bestarea;
+	DDSURFACEDESC2	*lpDDSD		= NULL;	// Mode list pointer
+	DDSURFACEDESC2	*lpBestDDSD = NULL;	// Pointer to best
+
+	lpDDSD = glb.lpDisplayModes;
+	for (i=0; i<glb.nDisplayModeCount; i++, lpDDSD++) {
+		if ((lpDDSD->dwWidth == lpCtx->dwWidth) &&
+			(lpDDSD->dwHeight == lpCtx->dwHeight))
+			goto matched; // Mode has been exactly matched
+		// Choose modes that are larger in both dimensions than
+		// the window, but smaller in area than the current best.
+		if ( (lpDDSD->dwWidth >= lpCtx->dwWidth) &&
+			 (lpDDSD->dwHeight >= lpCtx->dwHeight))
+		{
+			if (lpBestDDSD == NULL) {
+				lpBestDDSD = lpDDSD;
+				bestarea = lpDDSD->dwWidth * lpDDSD->dwHeight;
+				continue;
+			}
+			area = lpDDSD->dwWidth * lpDDSD->dwHeight;
+			if (area < bestarea) {
+				lpBestDDSD = lpDDSD;
+				bestarea = area;
+			}
+		}
+	}
+
+	// Safety check
+	if (lpBestDDSD == NULL) {
+		ddlogMessage(DDLOG_CRITICAL, "dglChooseDisplayMode");
+		return;
+	}
+
+	lpCtx->dwModeWidth = lpBestDDSD->dwWidth;
+	lpCtx->dwModeHeight = lpBestDDSD->dwHeight;
+matched:
+	ddlogPrintf(DDLOG_INFO, "Matched (%ldx%ld) to (%ldx%ld)",
+		lpCtx->dwWidth, lpCtx->dwHeight, lpCtx->dwModeWidth, lpCtx->dwModeHeight);
+}
+#endif // _USE_GLD3_WGL
+// ***********************************************************************
+
+static BOOL IsDevice(
+	DWORD *lpDeviceIdList,
+	DWORD dwDeviceId,
+	int count)
+{
+	int i;
+
+	for (i=0; i<count; i++)
+		if (dwDeviceId == lpDeviceIdList[i])
+			return TRUE;
+
+	return FALSE;
+}
+
+// ***********************************************************************
+
+void dglTestForBrokenCards(
+	DGL_ctx *lpCtx)
+{
+#ifndef _GLD3
+	DDDEVICEIDENTIFIER	dddi; // DX6 device identifier
+
+	// Sanity check.
+	if (lpCtx == NULL) {
+		// Testing for broken cards is sensitive area, so we don't want
+		// anything saying "broken cards" in the error message. ;)
+		ddlogMessage(DDLOG_ERROR, "Null context passed to TFBC\n");
+		return;
+	}
+
+	if (lpCtx->lpDD4 == NULL) {
+		// Testing for broken cards is sensitive area, so we don't want
+		// anything saying "broken cards" in the error message. ;)
+		ddlogMessage(DDLOG_ERROR, "Null DD4 passed to TFBC\n");
+		return;
+	}
+
+	// Microsoft really fucked up with the GetDeviceIdentifier function
+	// on Windows 2000, since it locks up on stock driers on the CD. Updated
+	// drivers from vendors appear to work, but we can't identify the drivers
+	// without this function!!! For now we skip these tests on Windows 2000.
+	if ((GetVersion() & 0x80000000UL) == 0)
+		return;
+
+	// Obtain device info
+	if (FAILED(IDirectDraw4_GetDeviceIdentifier(lpCtx->lpDD4, &dddi, 0)))
+		return;
+
+	// Useful info. Log it.
+	ddlogPrintf(DDLOG_INFO, "DirectDraw: VendorId=0x%x, DeviceId=0x%x", dddi.dwVendorId, dddi.dwDeviceId);
+
+	// Vendor 1: ATI
+	if (dddi.dwVendorId == VENDORID_ATI) {
+		// Test A: ATI Rage PRO
+		if (IsDevice(devATIRagePro, dddi.dwDeviceId, sizeof(devATIRagePro)))
+			glb.bUseMipmaps = FALSE;
+		// Test B: ATI Rage II+
+		if (IsDevice(devATIRageIIplus, dddi.dwDeviceId, sizeof(devATIRageIIplus)))
+			glb.bEmulateAlphaTest = TRUE;
+	}
+
+	// Vendor 2: Matrox
+	if (dddi.dwVendorId == 0x102B) {
+		// Test: Matrox G400 stencil buffer support does not work for AutoCAD
+		if (dddi.dwDeviceId == 0x0525) {
+			lpCtx->lpPF->pfd.cStencilBits = 0;
+			if (lpCtx->lpPF->iZBufferPF != -1) {
+				glb.lpZBufferPF[lpCtx->lpPF->iZBufferPF].dwStencilBitDepth = 0;
+				glb.lpZBufferPF[lpCtx->lpPF->iZBufferPF].dwStencilBitMask = 0;
+				glb.lpZBufferPF[lpCtx->lpPF->iZBufferPF].dwFlags &= ~DDPF_STENCILBUFFER;
+			}
+		}
+	}
+#endif // _GLD3
+}
+
+// ***********************************************************************
+
+BOOL dglCreateContextBuffers(
+	HDC a,
+	DGL_ctx *lpCtx,
+	BOOL bFallback)
+{
+	HRESULT				hResult;
+
+	int					i;
+//	HGLRC				hGLRC;
+//	DGL_ctx*			lpCtx;
+
+#ifndef _USE_GLD3_WGL
+	DWORD				dwFlags;
+	DDSURFACEDESC2		ddsd2;
+	DDSCAPS2			ddscaps2;
+	LPDIRECTDRAWCLIPPER	lpddClipper;
+	D3DDEVICEDESC		D3DHWDevDesc;	// Direct3D Hardware description
+	D3DDEVICEDESC		D3DHELDevDesc;	// Direct3D Hardware Emulation Layer
+#endif // _USE_GLD3_WGL
+
+	float				inv_aspect;
+
+	GLenum				bDoubleBuffer;	// TRUE if double buffer required
+	GLenum				bDepthBuffer;	// TRUE if depth buffer required
+
+	const PIXELFORMATDESCRIPTOR	*lpPFD = &lpCtx->lpPF->pfd;
+
+	// Vars for Mesa visual
+	DWORD				dwDepthBits		= 0;
+	DWORD				dwStencilBits	= 0;
+	DWORD				dwAlphaBits		= 0;
+	DWORD				bAlphaSW		= GL_FALSE;
+	DWORD				bDouble			= GL_FALSE;
+
+	DDSURFACEDESC2 		ddsd2DisplayMode;
+	BOOL				bFullScrnWin	= FALSE;	// fullscreen-size window ?
+	DDBLTFX 			ddbltfx;
+	DWORD				dwMemoryType 	= (bFallback) ? DDSCAPS_SYSTEMMEMORY : glb.dwMemoryType;
+	BOOL				bBogusWindow	= FALSE;	// non-drawable window ?
+	DWORD               dwColorRef      = 0;        // GDI background color
+	RECT				rcDst;						// GDI window rect
+	POINT				pt;							// GDI window point
+
+	// Palette used for creating default global palette
+	PALETTEENTRY	ppe[256];
+
+#ifndef _USE_GLD3_WGL
+	// Vertex buffer description. Used for creation of vertex buffers
+	D3DVERTEXBUFFERDESC vbufdesc;
+#endif // _USE_GLD3_WGL
+
+#define DDLOG_CRITICAL_OR_WARN	(bFallback ? DDLOG_CRITICAL : DDLOG_WARN)
+
+	ddlogPrintf(DDLOG_SYSTEM, "dglCreateContextBuffers for HDC=%X", a);
+    nContextError = GLDERR_NONE;
+
+#ifdef GLD_THREADS
+	// Serialize access to DirectDraw object creation or DDS start
+	if (glb.bMultiThreaded)
+		EnterCriticalSection(&CriticalSection);
+#endif
+
+	// Check for back buffer
+	bDoubleBuffer = GL_TRUE; //(lpPFD->dwFlags & PFD_DOUBLEBUFFER) ? GL_TRUE : GL_FALSE;
+	// Since we always do back buffering, check if we emulate front buffering
+	lpCtx->EmulateSingle =
+		(lpPFD->dwFlags & PFD_DOUBLEBUFFER) ? FALSE : TRUE;
+#if 0	// Don't have to mimic MS OpenGL behavior for front-buffering (DaveM)
+	lpCtx->EmulateSingle |=
+		(lpPFD->dwFlags & PFD_SUPPORT_GDI) ? TRUE : FALSE;
+#endif
+
+	// Check for depth buffer
+	bDepthBuffer = (lpPFD->cDepthBits) ? GL_TRUE : GL_FALSE;
+
+	lpCtx->bDoubleBuffer = bDoubleBuffer;
+	lpCtx->bDepthBuffer = bDepthBuffer;
+
+	// Set the Fullscreen flag for the context.
+//	lpCtx->bFullscreen = glb.bFullscreen;
+
+	// Obtain the dimensions of the rendering window
+	lpCtx->hDC = a; // Cache DC
+	lpCtx->hWnd = WindowFromDC(lpCtx->hDC);
+	// Check for non-window DC = memory DC ?
+	if (lpCtx->hWnd == NULL) {
+        // bitmap memory contexts are always single-buffered
+        lpCtx->EmulateSingle = TRUE;
+		bBogusWindow = TRUE;
+		ddlogPrintf(DDLOG_INFO, "Non-Window Memory Device Context");
+		if (GetClipBox(lpCtx->hDC, &lpCtx->rcScreenRect) == ERROR) {
+			ddlogMessage(DDLOG_WARN, "GetClipBox failed in dglCreateContext\n");
+			SetRect(&lpCtx->rcScreenRect, 0, 0, 0, 0);
+		}
+	}
+	else if (!GetClientRect(lpCtx->hWnd, &lpCtx->rcScreenRect)) {
+		bBogusWindow = TRUE;
+		ddlogMessage(DDLOG_WARN, "GetClientRect failed in dglCreateContext\n");
+		SetRect(&lpCtx->rcScreenRect, 0, 0, 0, 0);
+	}
+	lpCtx->dwWidth = lpCtx->rcScreenRect.right - lpCtx->rcScreenRect.left;
+	lpCtx->dwHeight = lpCtx->rcScreenRect.bottom - lpCtx->rcScreenRect.top;
+
+	ddlogPrintf(DDLOG_INFO, "Input window %X: w=%i, h=%i",
+							lpCtx->hWnd, lpCtx->dwWidth, lpCtx->dwHeight);
+
+	// What if app only zeroes one dimension instead of both? (DaveM)
+	if ( (lpCtx->dwWidth == 0) || (lpCtx->dwHeight == 0) ) {
+		// Make the buffer size something sensible
+		lpCtx->dwWidth = 8;
+		lpCtx->dwHeight = 8;
+	}
+
+	// Set defaults
+	lpCtx->dwModeWidth = lpCtx->dwWidth;
+	lpCtx->dwModeHeight = lpCtx->dwHeight;
+/*
+	// Find best display mode for fullscreen
+	if (glb.bFullscreen || !glb.bPrimary) {
+		dglChooseDisplayMode(lpCtx);
+	}
+*/
+	// Misc initialisation
+	lpCtx->bCanRender = FALSE; // No rendering allowed yet
+	lpCtx->bSceneStarted = FALSE;
+	lpCtx->bFrameStarted = FALSE;
+
+	// Detect OS (specifically 'Windows 2000' or 'Windows XP')
+	DetectOS();
+
+	// NOTE: WinNT not supported
+	ddlogPrintf(DDLOG_INFO, "OS: %s", bHaveWin95 ? "Win9x" : (bHaveWin2K ? "Win2000/XP" : "Unsupported") );
+
+	// Test for Fullscreen
+	if (bHaveWin95) { // Problems with fullscreen on Win2K/XP
+		if ((GetSystemMetrics(SM_CXSCREEN) == lpCtx->dwWidth) && 
+			(GetSystemMetrics(SM_CYSCREEN) == lpCtx->dwHeight))
+		{
+			// Workaround for some apps that crash when going fullscreen.
+			//lpCtx->bFullscreen = TRUE;
+		}
+		
+	}
+
+#ifdef _USE_GLD3_WGL
+	_gldDriver.CreateDrawable(lpCtx, glb.bDirectDrawPersistant, glb.bPersistantBuffers);
+#else
+	// Check if DirectDraw has already been created by original GLRC (DaveM)
+	if (glb.bDirectDrawPersistant && glb.bDirectDraw) {
+		lpCtx->lpDD4 = glb.lpDD4;
+		IDirectDraw4_AddRef(lpCtx->lpDD4);
+		goto SkipDirectDrawCreate;
+	}
+
+	// Create DirectDraw object
+	if (glb.bPrimary)
+		hResult = DirectDrawCreate(NULL, &lpCtx->lpDD1, NULL);
+	else {
+		// A non-primary device is to be used.
+		// Force context to be Fullscreen, secondary adaptors can not
+		// be used in a window.
+		hResult = DirectDrawCreate(&glb.ddGuid, &lpCtx->lpDD1, NULL);
+		lpCtx->bFullscreen = TRUE;
+	}
+	if (FAILED(hResult)) {
+		MessageBox(NULL, "Unable to initialize DirectDraw", "GLDirect", MB_OK);
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "Unable to create DirectDraw interface", hResult);
+        nContextError = GLDERR_DDRAW;
+		goto return_with_error;
+	}
+
+	// Query for DX6 IDirectDraw4.
+	hResult = IDirectDraw_QueryInterface(lpCtx->lpDD1,
+										 &IID_IDirectDraw4,
+										 (void**)&lpCtx->lpDD4);
+	if (FAILED(hResult)) {
+		MessageBox(NULL, "GLDirect requires DirectX 6.0 or above", "GLDirect", MB_OK);
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "Unable to create DirectDraw4 interface", hResult);
+        nContextError = GLDERR_DDRAW;
+		goto return_with_error;
+	}
+
+	// Cache DirectDraw interface for subsequent GLRCs
+	if (glb.bDirectDrawPersistant && !glb.bDirectDraw) {
+		glb.lpDD4 = lpCtx->lpDD4;
+		IDirectDraw4_AddRef(glb.lpDD4);
+		glb.bDirectDraw = TRUE;
+	}
+SkipDirectDrawCreate:
+
+	// Now we have a DD4 interface we can check for broken cards
+	dglTestForBrokenCards(lpCtx);
+
+	// Test if primary device can use flipping instead of blitting
+	ZeroMemory(&ddsd2DisplayMode, sizeof(ddsd2DisplayMode));
+	ddsd2DisplayMode.dwSize = sizeof(ddsd2DisplayMode);
+	hResult = IDirectDraw4_GetDisplayMode(
+					lpCtx->lpDD4,
+					&ddsd2DisplayMode);
+	if (SUCCEEDED(hResult)) {
+		if ( (lpCtx->dwWidth == ddsd2DisplayMode.dwWidth) &&
+				 (lpCtx->dwHeight == ddsd2DisplayMode.dwHeight) ) {
+			// We have a fullscreen-size window
+			bFullScrnWin = TRUE;
+			// OK to use DirectDraw fullscreen mode ?
+			if (glb.bPrimary && !glb.bFullscreenBlit && !lpCtx->EmulateSingle && !glb.bDirectDrawPersistant) {
+				lpCtx->bFullscreen = TRUE;
+				ddlogMessage(DDLOG_INFO, "Primary upgraded to page flipping.\n");
+			}
+		}
+		// Cache the display mode dimensions
+		lpCtx->dwModeWidth = ddsd2DisplayMode.dwWidth;
+		lpCtx->dwModeHeight = ddsd2DisplayMode.dwHeight;
+	}
+
+	// Clamp the effective window dimensions to primary surface.
+	// We need to do this for D3D viewport dimensions even if wide
+	// surfaces are supported. This also is a good idea for handling
+	// whacked-out window dimensions passed for non-drawable windows
+	// like Solid Edge. (DaveM)
+	if (lpCtx->dwWidth > ddsd2DisplayMode.dwWidth)
+		lpCtx->dwWidth = ddsd2DisplayMode.dwWidth;
+	if (lpCtx->dwHeight > ddsd2DisplayMode.dwHeight)
+		lpCtx->dwHeight = ddsd2DisplayMode.dwHeight;
+
+	// Check for non-RGB desktop resolution
+	if (!lpCtx->bFullscreen && ddsd2DisplayMode.ddpfPixelFormat.dwRGBBitCount <= 8) {
+		ddlogPrintf(DDLOG_CRITICAL_OR_WARN, "Desktop color depth %d bpp not supported",
+			ddsd2DisplayMode.ddpfPixelFormat.dwRGBBitCount);
+        nContextError = GLDERR_BPP;
+		goto return_with_error;
+	}
+#endif // _USE_GLD3_WGL
+
+	ddlogPrintf(DDLOG_INFO, "Window: w=%i, h=%i (%s)",
+							lpCtx->dwWidth,
+							lpCtx->dwHeight,
+							lpCtx->bFullscreen ? "fullscreen" : "windowed");
+
+#ifndef _USE_GLD3_WGL
+	// Obtain ddraw caps
+    ZeroMemory(&lpCtx->ddCaps, sizeof(DDCAPS));
+	lpCtx->ddCaps.dwSize = sizeof(DDCAPS);
+	if (glb.bHardware) {
+		// Get HAL caps
+		IDirectDraw4_GetCaps(lpCtx->lpDD4, &lpCtx->ddCaps, NULL);
+	} else {
+		// Get HEL caps
+		IDirectDraw4_GetCaps(lpCtx->lpDD4, NULL, &lpCtx->ddCaps);
+	}
+
+	// If this flag is present then we can't default to Mesa
+	// SW rendering between BeginScene() and EndScene().
+	if (lpCtx->ddCaps.dwCaps2 & DDCAPS2_NO2DDURING3DSCENE) {
+		ddlogMessage(DDLOG_INFO,
+			"Warning          : No 2D allowed during 3D scene.\n");
+	}
+
+	// Query for DX6 Direct3D3 interface
+	hResult = IDirectDraw4_QueryInterface(lpCtx->lpDD4,
+										  &IID_IDirect3D3,
+										  (void**)&lpCtx->lpD3D3);
+	if (FAILED(hResult)) {
+		MessageBox(NULL, "Unable to initialize Direct3D", "GLDirect", MB_OK);
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "Unable to create Direct3D interface", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Context creation
+	if (lpCtx->bFullscreen) {
+		// FULLSCREEN
+
+        // Disable warning popups when in fullscreen mode
+        ddlogWarnOption(FALSE);
+
+		// Have to release persistant primary surface if fullscreen mode
+		if (glb.bDirectDrawPersistant && glb.bDirectDrawPrimary) {
+			RELEASE(glb.lpPrimary4);
+			glb.bDirectDrawPrimary = FALSE;
+		}
+
+		dwFlags = DDSCL_EXCLUSIVE | DDSCL_FULLSCREEN | DDSCL_ALLOWREBOOT;
+		if (glb.bFastFPU)
+			dwFlags |= DDSCL_FPUSETUP;	// fast FPU setup optional (DaveM)
+		hResult = IDirectDraw4_SetCooperativeLevel(lpCtx->lpDD4, lpCtx->hWnd, dwFlags);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "Unable to set Exclusive Fullscreen mode", hResult);
+			goto return_with_error;
+		}
+
+		hResult = IDirectDraw4_SetDisplayMode(lpCtx->lpDD4,
+											  lpCtx->dwModeWidth,
+											  lpCtx->dwModeHeight,
+											  lpPFD->cColorBits,
+											  0,
+											  0);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "SetDisplayMode failed", hResult);
+			goto return_with_error;
+		}
+
+		// ** The display mode has changed, so dont use MessageBox! **
+
+		ZeroMemory(&ddsd2, sizeof(ddsd2));
+		ddsd2.dwSize = sizeof(ddsd2);
+
+		if (bDoubleBuffer) {
+			// Double buffered
+			// Primary surface
+			ddsd2.dwFlags = DDSD_CAPS | DDSD_BACKBUFFERCOUNT;
+			ddsd2.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE |
+								   DDSCAPS_FLIP |
+								   DDSCAPS_COMPLEX |
+								   DDSCAPS_3DDEVICE |
+								   dwMemoryType;
+			ddsd2.dwBackBufferCount = 1;
+
+			hResult = IDirectDraw4_CreateSurface(lpCtx->lpDD4, &ddsd2, &lpCtx->lpFront4, NULL);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateSurface (primary) failed", hResult);
+                nContextError = GLDERR_MEM;
+				goto return_with_error;
+			}
+
+			// Render target surface
+			ZeroMemory(&ddscaps2, sizeof(ddscaps2)); // Clear the entire struct.
+			ddscaps2.dwCaps = DDSCAPS_BACKBUFFER;
+			hResult = IDirectDrawSurface4_GetAttachedSurface(lpCtx->lpFront4, &ddscaps2, &lpCtx->lpBack4);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "GetAttachedSurface failed", hResult);
+                nContextError = GLDERR_MEM;
+				goto return_with_error;
+			}
+		} else {
+			// Single buffered
+			// Primary surface
+			ddsd2.dwFlags = DDSD_CAPS;
+			ddsd2.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE |
+								   //DDSCAPS_3DDEVICE |
+								   dwMemoryType;
+
+			hResult = IDirectDraw4_CreateSurface(lpCtx->lpDD4, &ddsd2, &lpCtx->lpFront4, NULL);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateSurface (primary) failed", hResult);
+                nContextError = GLDERR_MEM;
+				goto return_with_error;
+			}
+
+			lpCtx->lpBack4 = NULL;
+		}
+	} else {
+		// WINDOWED
+
+        // OK to enable warning popups in windowed mode
+        ddlogWarnOption(glb.bMessageBoxWarnings);
+
+		dwFlags = DDSCL_NORMAL;
+		if (glb.bMultiThreaded)
+			dwFlags |= DDSCL_MULTITHREADED;
+		if (glb.bFastFPU)
+			dwFlags |= DDSCL_FPUSETUP;	// fast FPU setup optional (DaveM)
+		hResult = IDirectDraw4_SetCooperativeLevel(lpCtx->lpDD4,
+												  lpCtx->hWnd,
+												  dwFlags);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "Unable to set Normal coop level", hResult);
+			goto return_with_error;
+		}
+		// Has Primary surface already been created for original GLRC ?
+		// Note this can only be applicable for windowed modes
+		if (glb.bDirectDrawPersistant && glb.bDirectDrawPrimary) {
+			lpCtx->lpFront4 = glb.lpPrimary4;
+			IDirectDrawSurface4_AddRef(lpCtx->lpFront4);
+			// Update the window on the default clipper
+			IDirectDrawSurface4_GetClipper(lpCtx->lpFront4, &lpddClipper);
+			IDirectDrawClipper_SetHWnd(lpddClipper, 0, lpCtx->hWnd);
+			IDirectDrawClipper_Release(lpddClipper);
+			goto SkipPrimaryCreate;
+		}
+
+		// Primary surface
+		ZeroMemory(&ddsd2, sizeof(ddsd2));
+		ddsd2.dwSize = sizeof(ddsd2);
+		ddsd2.dwFlags = DDSD_CAPS;
+		ddsd2.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE;
+		hResult = IDirectDraw4_CreateSurface(lpCtx->lpDD4, &ddsd2, &lpCtx->lpFront4, NULL);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateSurface (primary) failed", hResult);
+            nContextError = GLDERR_MEM;
+			goto return_with_error;
+		}
+
+		// Cache Primary surface for subsequent GLRCs
+		// Note this can only be applicable to subsequent windowed modes
+		if (glb.bDirectDrawPersistant && !glb.bDirectDrawPrimary) {
+			glb.lpPrimary4 = lpCtx->lpFront4;
+			IDirectDrawSurface4_AddRef(glb.lpPrimary4);
+			glb.bDirectDrawPrimary = TRUE;
+		}
+
+		// Clipper object
+		hResult = DirectDrawCreateClipper(0, &lpddClipper, NULL);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateClipper failed", hResult);
+			goto return_with_error;
+		}
+		hResult = IDirectDrawClipper_SetHWnd(lpddClipper, 0, lpCtx->hWnd);
+		if (FAILED(hResult)) {
+			RELEASE(lpddClipper);
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "SetHWnd failed", hResult);
+			goto return_with_error;
+		}
+		hResult = IDirectDrawSurface4_SetClipper(lpCtx->lpFront4, lpddClipper);
+		RELEASE(lpddClipper); // We have finished with it.
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "SetClipper failed", hResult);
+			goto return_with_error;
+		}
+SkipPrimaryCreate:
+
+		if (bDoubleBuffer) {
+			// Render target surface
+			ZeroMemory(&ddsd2, sizeof(ddsd2));
+			ddsd2.dwSize = sizeof(ddsd2);
+			ddsd2.dwFlags        = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT;
+			ddsd2.dwWidth        = lpCtx->dwWidth;
+			ddsd2.dwHeight       = lpCtx->dwHeight;
+			ddsd2.ddsCaps.dwCaps = DDSCAPS_3DDEVICE |
+								   DDSCAPS_OFFSCREENPLAIN |
+								   dwMemoryType;
+
+			// Reserve the entire desktop size for persistant buffers option
+			if (glb.bDirectDrawPersistant && glb.bPersistantBuffers) {
+				ddsd2.dwWidth = ddsd2DisplayMode.dwWidth;
+				ddsd2.dwHeight = ddsd2DisplayMode.dwHeight;
+			}
+			// Re-use original back buffer if persistant buffers exist
+			if (glb.bDirectDrawPersistant && glb.bPersistantBuffers && glb.lpBack4)
+				hResult = IDirectDrawSurface4_AddRef(lpCtx->lpBack4 = glb.lpBack4);
+			else
+				hResult = IDirectDraw4_CreateSurface(lpCtx->lpDD4, &ddsd2, &lpCtx->lpBack4, NULL);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "Create Backbuffer failed", hResult);
+                nContextError = GLDERR_MEM;
+				goto return_with_error;
+			}
+			if (glb.bDirectDrawPersistant && glb.bPersistantBuffers && !glb.lpBack4)
+				IDirectDrawSurface4_AddRef(glb.lpBack4 = lpCtx->lpBack4);
+		} else {
+			lpCtx->lpBack4 = NULL;
+		}
+	}
+
+	//
+	// Now create the Z-buffer
+	//
+	lpCtx->bStencil = FALSE; // Default to no stencil buffer
+	if (bDepthBuffer && (lpCtx->lpPF->iZBufferPF != -1)) {
+		// Get z-buffer dimensions from the render target
+		// Setup the surface desc for the z-buffer.
+		ZeroMemory(&ddsd2, sizeof(ddsd2));
+		ddsd2.dwSize = sizeof(ddsd2);
+		ddsd2.dwFlags = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
+		ddsd2.ddsCaps.dwCaps = DDSCAPS_ZBUFFER | dwMemoryType;
+		ddsd2.dwWidth = lpCtx->dwWidth;
+		ddsd2.dwHeight = lpCtx->dwHeight;
+		memcpy(&ddsd2.ddpfPixelFormat,
+			&glb.lpZBufferPF[lpCtx->lpPF->iZBufferPF],
+			sizeof(DDPIXELFORMAT) );
+
+		// Reserve the entire desktop size for persistant buffers option
+		if (glb.bDirectDrawPersistant && glb.bPersistantBuffers) {
+			ddsd2.dwWidth = ddsd2DisplayMode.dwWidth;
+			ddsd2.dwHeight = ddsd2DisplayMode.dwHeight;
+		}
+
+		// Create a z-buffer
+		if (glb.bDirectDrawPersistant && glb.bPersistantBuffers && glb.lpDepth4)
+			hResult = IDirectDrawSurface4_AddRef(lpCtx->lpDepth4 = glb.lpDepth4);
+		else
+			hResult = IDirectDraw4_CreateSurface(lpCtx->lpDD4, &ddsd2, &lpCtx->lpDepth4, NULL);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateSurface (ZBuffer) failed", hResult);
+            nContextError = GLDERR_MEM;
+			goto return_with_error;
+		}
+		if (glb.bDirectDrawPersistant && glb.bPersistantBuffers && !glb.lpDepth4)
+			IDirectDrawSurface4_AddRef(glb.lpDepth4 = lpCtx->lpDepth4);
+		else if (glb.bDirectDrawPersistant && glb.bPersistantBuffers && glb.lpDepth4 && glb.lpBack4)
+			IDirectDrawSurface4_DeleteAttachedSurface(glb.lpBack4, 0, glb.lpDepth4);
+
+		// Attach Zbuffer to render target
+		TRY(IDirectDrawSurface4_AddAttachedSurface(
+			bDoubleBuffer ? lpCtx->lpBack4 : lpCtx->lpFront4,
+			lpCtx->lpDepth4),
+			"dglCreateContext: Attach Zbuffer");
+		if (glb.lpZBufferPF[lpCtx->lpPF->iZBufferPF].dwFlags & DDPF_STENCILBUFFER) {
+			lpCtx->bStencil = TRUE;
+			ddlogMessage(DDLOG_INFO, "Depth buffer has stencil\n");
+		}
+	}
+
+	// Clear all back buffers and Z-buffers in case of memory recycling.
+	ZeroMemory(&ddbltfx, sizeof(ddbltfx));
+	ddbltfx.dwSize = sizeof(ddbltfx);
+	IDirectDrawSurface4_Blt(lpCtx->lpBack4, NULL, NULL, NULL,
+		DDBLT_COLORFILL | DDBLT_WAIT, &ddbltfx);
+	if (lpCtx->lpDepth4)
+		IDirectDrawSurface4_Blt(lpCtx->lpDepth4, NULL, NULL, NULL,
+			DDBLT_COLORFILL | DDBLT_WAIT, &ddbltfx);
+
+	// Now that we have a Z-buffer we can create the 3D device
+	hResult = IDirect3D3_CreateDevice(lpCtx->lpD3D3,
+									  &glb.d3dGuid,
+									  bDoubleBuffer ? lpCtx->lpBack4 : lpCtx->lpFront4,
+									  &lpCtx->lpDev3,
+									  NULL);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "Unable to create Direct3D device", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// We must do this as soon as the device is created
+	dglInitStateCaches(lpCtx);
+
+	// Obtain the D3D Device Description
+	D3DHWDevDesc.dwSize = D3DHELDevDesc.dwSize = sizeof(D3DDEVICEDESC);
+	TRY(IDirect3DDevice3_GetCaps(lpCtx->lpDev3,
+								 &D3DHWDevDesc,
+								 &D3DHELDevDesc),
+								 "dglCreateContext: GetCaps failed");
+
+	// Choose the relevant description and cache it in the context.
+	// We will use this description later for caps checking
+	memcpy(	&lpCtx->D3DDevDesc,
+			glb.bHardware ? &D3DHWDevDesc : &D3DHELDevDesc,
+			sizeof(D3DDEVICEDESC));
+
+	// Now we can examine the texture formats
+	if (!dglBuildTextureFormatList(lpCtx->lpDev3)) {
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, "dglBuildTextureFormatList failed\n");
+		goto return_with_error;
+	}
+
+	// Get the pixel format of the back buffer
+	lpCtx->ddpfRender.dwSize = sizeof(lpCtx->ddpfRender);
+	if (bDoubleBuffer)
+		hResult = IDirectDrawSurface4_GetPixelFormat(
+					lpCtx->lpBack4,
+					&lpCtx->ddpfRender);
+	else
+		hResult = IDirectDrawSurface4_GetPixelFormat(
+					lpCtx->lpFront4,
+					&lpCtx->ddpfRender);
+
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "GetPixelFormat failed", hResult);
+		goto return_with_error;
+	}
+	// Find a pixel packing function suitable for this surface
+	pxClassifyPixelFormat(&lpCtx->ddpfRender,
+						  &lpCtx->fnPackFunc,
+						  &lpCtx->fnUnpackFunc,
+						  &lpCtx->fnPackSpanFunc);
+
+	// Viewport
+	hResult = IDirect3D3_CreateViewport(lpCtx->lpD3D3, &lpCtx->lpViewport3, NULL);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateViewport failed", hResult);
+		goto return_with_error;
+	}
+
+	hResult = IDirect3DDevice3_AddViewport(lpCtx->lpDev3, lpCtx->lpViewport3);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "AddViewport failed", hResult);
+		goto return_with_error;
+	}
+
+	// Initialise the viewport
+	// Note screen coordinates are used for viewport clipping since D3D
+	// transform operations are not used in the GLD CAD driver. (DaveM)
+	inv_aspect = (float)lpCtx->dwHeight/(float)lpCtx->dwWidth;
+
+	lpCtx->d3dViewport.dwSize = sizeof(lpCtx->d3dViewport);
+	lpCtx->d3dViewport.dwX = 0;
+	lpCtx->d3dViewport.dwY = 0;
+	lpCtx->d3dViewport.dwWidth = lpCtx->dwWidth;
+	lpCtx->d3dViewport.dwHeight = lpCtx->dwHeight;
+	lpCtx->d3dViewport.dvClipX = 0; // -1.0f;
+	lpCtx->d3dViewport.dvClipY = 0; // inv_aspect;
+	lpCtx->d3dViewport.dvClipWidth = lpCtx->dwWidth; // 2.0f;
+	lpCtx->d3dViewport.dvClipHeight = lpCtx->dwHeight; // 2.0f * inv_aspect;
+	lpCtx->d3dViewport.dvMinZ = 0.0f;
+	lpCtx->d3dViewport.dvMaxZ = 1.0f;
+	TRY(IDirect3DViewport3_SetViewport2(lpCtx->lpViewport3, &lpCtx->d3dViewport), "dglCreateContext: SetViewport2");
+
+	hResult = IDirect3DDevice3_SetCurrentViewport(lpCtx->lpDev3, lpCtx->lpViewport3);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "SetCurrentViewport failed", hResult);
+		goto return_with_error;
+	}
+
+	lpCtx->dwBPP = lpPFD->cColorBits;
+	lpCtx->iZBufferPF = lpCtx->lpPF->iZBufferPF;
+
+	// Set last texture to NULL
+	for (i=0; i<MAX_TEXTURE_UNITS; i++) {
+		lpCtx->ColorOp[i] = D3DTOP_DISABLE;
+		lpCtx->AlphaOp[i] = D3DTOP_DISABLE;
+		lpCtx->tObj[i] = NULL;
+	}
+
+	// Default to perspective correct texture mapping
+	dglSetRenderState(lpCtx, D3DRENDERSTATE_TEXTUREPERSPECTIVE, TRUE, "TexturePersp");
+
+	// Set the default culling mode
+	lpCtx->cullmode = D3DCULL_NONE;
+	dglSetRenderState(lpCtx, D3DRENDERSTATE_CULLMODE, D3DCULL_NONE, "CullMode");
+
+	// Disable specular
+	dglSetRenderState(lpCtx, D3DRENDERSTATE_SPECULARENABLE, FALSE, "SpecularEnable");
+	// Disable subpixel correction
+//	dglSetRenderState(lpCtx, D3DRENDERSTATE_SUBPIXEL, FALSE, "SubpixelEnable");
+	// Disable dithering
+	dglSetRenderState(lpCtx, D3DRENDERSTATE_DITHERENABLE, FALSE, "DitherEnable");
+
+	// Initialise the primitive caches
+//	lpCtx->dwNextLineVert	= 0;
+//	lpCtx->dwNextTriVert	= 0;
+
+	// Init the global texture palette
+	lpCtx->lpGlobalPalette = NULL;
+
+	// Init the HW/SW usage counters
+//	lpCtx->dwHWUsageCount = lpCtx->dwSWUsageCount = 0L;
+
+	//
+	// Create two D3D vertex buffers.
+	// One will hold the pre-transformed data with the other one
+	// being used to hold the post-transformed & clipped verts.
+	//
+#if 0  // never used (DaveM)
+	vbufdesc.dwSize = sizeof(D3DVERTEXBUFFERDESC);
+	vbufdesc.dwCaps = D3DVBCAPS_WRITEONLY;
+	if (glb.bHardware == FALSE)
+		vbufdesc.dwCaps = D3DVBCAPS_SYSTEMMEMORY;
+	vbufdesc.dwNumVertices = 32768; // For the time being
+
+	// Source vertex buffer
+	vbufdesc.dwFVF = DGL_LVERTEX;
+	hResult = IDirect3D3_CreateVertexBuffer(lpCtx->lpD3D3, &vbufdesc, &lpCtx->m_vbuf, 0, NULL);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateVertexBuffer(src) failed", hResult);
+		goto return_with_error;
+	}
+
+	// Destination vertex buffer
+	vbufdesc.dwFVF = (glb.bMultitexture == FALSE) ? D3DFVF_TLVERTEX : (D3DFVF_XYZRHW | D3DFVF_DIFFUSE | D3DFVF_TEX2);
+	hResult = IDirect3D3_CreateVertexBuffer(lpCtx->lpD3D3, &vbufdesc, &lpCtx->m_pvbuf, 0, NULL);
+	if(FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "CreateVertexBuffer(dst) failed", hResult);
+		goto return_with_error;
+	}
+#endif
+
+#endif _USE_GLD3_WGL
+
+	//
+	//	Now create the Mesa context
+	//
+
+	// Create the Mesa visual
+	if (lpPFD->cDepthBits)
+		dwDepthBits = 16;
+	if (lpPFD->cStencilBits)
+		dwStencilBits = 8;
+	if (lpPFD->cAlphaBits) {
+		dwAlphaBits = 8;
+		bAlphaSW = GL_TRUE;
+	}
+	if (lpPFD->dwFlags & PFD_DOUBLEBUFFER)
+		bDouble = GL_TRUE;
+//	lpCtx->EmulateSingle =
+//		(lpPFD->dwFlags & PFD_DOUBLEBUFFER) ? FALSE : TRUE;
+
+#ifdef _USE_GLD3_WGL
+	lpCtx->glVis = _mesa_create_visual(
+		bDouble,    /* double buffer */
+		GL_FALSE,			// stereo
+		lpPFD->cRedBits,
+		lpPFD->cGreenBits,
+		lpPFD->cBlueBits,
+		dwAlphaBits,
+		dwDepthBits,
+		dwStencilBits,
+		lpPFD->cAccumRedBits,	// accum bits
+		lpPFD->cAccumGreenBits,	// accum bits
+		lpPFD->cAccumBlueBits,	// accum bits
+		lpPFD->cAccumAlphaBits,	// accum alpha bits
+		1				// num samples
+		);
+#else // _USE_GLD3_WGL
+	lpCtx->glVis = (*mesaFuncs.gl_create_visual)(
+		GL_TRUE,			// RGB mode
+		bAlphaSW,			// Is an alpha buffer required?
+		bDouble,			// Is an double-buffering required?
+		GL_FALSE,			// stereo
+		dwDepthBits,		// depth_size
+		dwStencilBits,		// stencil_size
+		lpPFD->cAccumBits,	// accum_size
+		0,					// colour-index bits
+		lpPFD->cRedBits,	// Red bit count
+		lpPFD->cGreenBits,	// Green bit count
+		lpPFD->cBlueBits,	// Blue bit count
+		dwAlphaBits			// Alpha bit count
+		);
+#endif // _USE_GLD3_WGL
+
+	if (lpCtx->glVis == NULL) {
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, "gl_create_visual failed\n");
+		goto return_with_error;
+	}
+
+#ifdef _USE_GLD3_WGL
+	lpCtx->glCtx = _mesa_create_context(lpCtx->glVis, NULL, (void *)lpCtx, GL_TRUE);
+#else
+	// Create the Mesa context
+	lpCtx->glCtx = (*mesaFuncs.gl_create_context)(
+					lpCtx->glVis,	// Mesa visual
+					NULL,			// share list context
+					(void *)lpCtx,	// Pointer to our driver context
+					GL_TRUE			// Direct context flag
+				   );
+#endif // _USE_GLD3_WGL
+
+	if (lpCtx->glCtx == NULL) {
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, "gl_create_context failed\n");
+		goto return_with_error;
+	}
+
+	// Create the Mesa framebuffer
+#ifdef _USE_GLD3_WGL
+	lpCtx->glBuffer = _mesa_create_framebuffer(
+		lpCtx->glVis,
+		lpCtx->glVis->depthBits > 0,
+		lpCtx->glVis->stencilBits > 0,
+		lpCtx->glVis->accumRedBits > 0,
+		GL_FALSE //swalpha
+		);
+#else
+	lpCtx->glBuffer = (*mesaFuncs.gl_create_framebuffer)(lpCtx->glVis);
+#endif // _USE_GLD3_WGL
+
+	if (lpCtx->glBuffer == NULL) {
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, "gl_create_framebuffer failed\n");
+		goto return_with_error;
+	}
+
+#ifdef _USE_GLD3_WGL
+	// Init Mesa internals
+	_swrast_CreateContext( lpCtx->glCtx );
+	_vbo_CreateContext( lpCtx->glCtx );
+	_tnl_CreateContext( lpCtx->glCtx );
+	_swsetup_CreateContext( lpCtx->glCtx );
+
+	_gldDriver.InitialiseMesa(lpCtx);
+	
+	lpCtx->glCtx->imports.warning	= _gld_mesa_warning;
+	lpCtx->glCtx->imports.fatal		= _gld_mesa_fatal;
+
+#else
+	// Tell Mesa how many texture stages we have
+	glb.wMaxSimultaneousTextures = lpCtx->D3DDevDesc.wMaxSimultaneousTextures;
+	// Only use as many Units as the spec requires
+	if (glb.wMaxSimultaneousTextures > MAX_TEXTURE_UNITS)
+		glb.wMaxSimultaneousTextures = MAX_TEXTURE_UNITS;
+	lpCtx->glCtx->Const.MaxTextureUnits = glb.wMaxSimultaneousTextures;
+	ddlogPrintf(DDLOG_INFO, "Texture stages   : %d", glb.wMaxSimultaneousTextures);
+
+	// Set the max texture size.
+	// NOTE: clamped to a max of 1024 for extra performance!
+	lpCtx->dwMaxTextureSize = (lpCtx->D3DDevDesc.dwMaxTextureWidth <= 1024) ? lpCtx->D3DDevDesc.dwMaxTextureWidth : 1024;
+
+// Texture resize takes place elsewhere. KH
+// NOTE: This was added to workaround an issue with the Intel app.
+#if 0
+	lpCtx->glCtx->Const.MaxTextureSize = lpCtx->dwMaxTextureSize;
+#else
+	lpCtx->glCtx->Const.MaxTextureSize = 1024;
+#endif
+	lpCtx->glCtx->Const.MaxDrawBuffers = 1;
+
+	// Setup the Display Driver pointers
+	dglSetupDDPointers(lpCtx->glCtx);
+
+	// Initialise all the Direct3D renderstates
+	dglInitStateD3D(lpCtx->glCtx);
+
+#if 0
+	// Signal a reload of texture state on next glBegin
+	lpCtx->m_texHandleValid = FALSE;
+	lpCtx->m_mtex = FALSE;
+	lpCtx->m_texturing = FALSE;
+#else
+	// Set default texture unit state
+//	dglSetTexture(lpCtx, 0, NULL);
+//	dglSetTexture(lpCtx, 1, NULL);
+#endif
+
+	//
+	// Set the global texture palette to default values.
+	//
+
+	// Clear the entire palette
+	ZeroMemory(ppe, sizeof(PALETTEENTRY) * 256);
+
+	// Fill the palette with a default colour.
+	// A garish colour is used to catch bugs. Here Magenta is used.
+	for (i=0; i < 256; i++) {
+		ppe[i].peRed	= 255;
+		ppe[i].peGreen	= 0;
+		ppe[i].peBlue	= 255;
+	}
+
+	RELEASE(lpCtx->lpGlobalPalette);
+
+	if (glb.bDirectDrawPersistant && glb.bPersistantBuffers && glb.lpGlobalPalette)
+		hResult = IDirectDrawPalette_AddRef(lpCtx->lpGlobalPalette = glb.lpGlobalPalette);
+	else
+		hResult = IDirectDraw4_CreatePalette(
+				lpCtx->lpDD4,
+				DDPCAPS_INITIALIZE | DDPCAPS_8BIT | DDPCAPS_ALLOW256,
+				ppe,
+				&(lpCtx->lpGlobalPalette),
+				NULL);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_ERROR, "Default CreatePalette failed\n", hResult);
+		lpCtx->lpGlobalPalette = NULL;
+		goto return_with_error;
+	}
+	if (glb.bDirectDrawPersistant && glb.bPersistantBuffers && !glb.lpGlobalPalette)
+		IDirectDrawPalette_AddRef(glb.lpGlobalPalette = lpCtx->lpGlobalPalette);
+
+#endif // _USE_GLD3_WGL
+
+	// ** If we have made it to here then we can enable rendering **
+	lpCtx->bCanRender = TRUE;
+
+//	ddlogMessage(DDLOG_SYSTEM, "dglCreateContextBuffers succeded\n");
+
+#ifdef GLD_THREADS
+	// Release serialized access
+	if (glb.bMultiThreaded)
+		LeaveCriticalSection(&CriticalSection);
+#endif
+
+	return TRUE;
+
+return_with_error:
+	// Clean up before returning.
+	// This is critical for secondary devices.
+
+	lpCtx->bCanRender = FALSE;
+
+#ifdef _USE_GLD3_WGL
+	// Destroy the Mesa context
+	if (lpCtx->glBuffer)
+		_mesa_destroy_framebuffer(lpCtx->glBuffer);
+	if (lpCtx->glCtx)
+		_mesa_destroy_context(lpCtx->glCtx);
+	if (lpCtx->glVis)
+		_mesa_destroy_visual(lpCtx->glVis);
+
+	// Destroy driver data
+	_gldDriver.DestroyDrawable(lpCtx);
+#else
+	// Destroy the Mesa context
+	if (lpCtx->glBuffer)
+		(*mesaFuncs.gl_destroy_framebuffer)(lpCtx->glBuffer);
+	if (lpCtx->glCtx)
+		(*mesaFuncs.gl_destroy_context)(lpCtx->glCtx);
+	if (lpCtx->glVis)
+		(*mesaFuncs.gl_destroy_visual)(lpCtx->glVis);
+
+	RELEASE(lpCtx->m_pvbuf); // Release D3D vertex buffer
+	RELEASE(lpCtx->m_vbuf); // Release D3D vertex buffer
+
+	if (lpCtx->lpViewport3) {
+		if (lpCtx->lpDev3) IDirect3DDevice3_DeleteViewport(lpCtx->lpDev3, lpCtx->lpViewport3);
+		RELEASE(lpCtx->lpViewport3);
+		lpCtx->lpViewport3 = NULL;
+	}
+
+	RELEASE(lpCtx->lpDev3);
+	if (lpCtx->lpDepth4) {
+		if (lpCtx->lpBack4)
+			IDirectDrawSurface4_DeleteAttachedSurface(lpCtx->lpBack4, 0L, lpCtx->lpDepth4);
+		else
+			IDirectDrawSurface4_DeleteAttachedSurface(lpCtx->lpFront4, 0L, lpCtx->lpDepth4);
+		RELEASE(lpCtx->lpDepth4);
+		lpCtx->lpDepth4 = NULL;
+	}
+	RELEASE(lpCtx->lpBack4);
+	RELEASE(lpCtx->lpFront4);
+	else
+	if (lpCtx->bFullscreen) {
+		IDirectDraw4_RestoreDisplayMode(lpCtx->lpDD4);
+		IDirectDraw4_SetCooperativeLevel(lpCtx->lpDD4, NULL, DDSCL_NORMAL);
+	}
+	RELEASE(lpCtx->lpD3D3);
+	RELEASE(lpCtx->lpDD4);
+	RELEASE(lpCtx->lpDD1);
+#endif // _USE_GLD3_WGL
+
+	lpCtx->bAllocated = FALSE;
+
+#ifdef GLD_THREADS
+	// Release serialized access
+	if (glb.bMultiThreaded)
+		LeaveCriticalSection(&CriticalSection);
+#endif
+
+	return FALSE;
+
+#undef DDLOG_CRITICAL_OR_WARN
+}
+
+// ***********************************************************************
+
+HGLRC dglCreateContext(
+	HDC a,
+	const DGL_pixelFormat *lpPF)
+{
+	int i;
+	HGLRC				hGLRC;
+	DGL_ctx*			lpCtx;
+	static BOOL			bWarnOnce = TRUE;
+	DWORD				dwThreadId = GetCurrentThreadId();
+    char                szMsg[256];
+    HWND                hWnd;
+    LONG                lpfnWndProc;
+
+	// Validate license
+	if (!dglValidate())
+		return NULL;
+
+	// Is context state ready ?
+	if (!bContextReady)
+		return NULL;
+
+	ddlogPrintf(DDLOG_SYSTEM, "dglCreateContext for HDC=%X, ThreadId=%X", a, dwThreadId);
+
+	// Find next free context.
+	// Also ensure that only one Fullscreen context is created at any one time.
+	hGLRC = 0; // Default to Not Found
+	for (i=0; i<DGL_MAX_CONTEXTS; i++) {
+		if (ctxlist[i].bAllocated) {
+			if (/*glb.bFullscreen && */ctxlist[i].bFullscreen)
+				break;
+		} else {
+			hGLRC = (HGLRC)(i+1);
+			break;
+		}
+	}
+
+	// Bail if no GLRC was found
+	if (!hGLRC)
+		return NULL;
+
+	// Set the context pointer
+	lpCtx = dglGetContextAddress(hGLRC);
+	// Make sure that context is zeroed before we do anything.
+	// MFC and C++ apps call wglCreateContext() and wglDeleteContext() multiple times,
+	// even though only one context is ever used by the app, so keep it clean. (DaveM)
+	ZeroMemory(lpCtx, sizeof(DGL_ctx));
+	lpCtx->bAllocated = TRUE;
+	// Flag that buffers need creating on next wglMakeCurrent call.
+	lpCtx->bHasBeenCurrent = FALSE;
+	lpCtx->lpPF = (DGL_pixelFormat *)lpPF;	// cache pixel format
+	lpCtx->bCanRender = FALSE;
+
+	// Create all the internal resources here, not in dglMakeCurrent().
+	// We do a re-size check in dglMakeCurrent in case of re-allocations. (DaveM)
+	// We now try context allocations twice, first with video memory,
+	// then again with system memory. This is similar to technique
+	// used for dglWglResizeBuffers(). (DaveM)
+	if (lpCtx->bHasBeenCurrent == FALSE) {
+		if (!dglCreateContextBuffers(a, lpCtx, FALSE)) {
+			if (glb.bMessageBoxWarnings && bWarnOnce && dwLogging) {
+				bWarnOnce = FALSE;
+                switch (nContextError) {
+                   case GLDERR_DDRAW: strcpy(szMsg, szDDrawWarning); break;
+                   case GLDERR_D3D: strcpy(szMsg, szD3DWarning); break;
+                   case GLDERR_MEM: strcpy(szMsg, szResourceWarning); break;
+                   case GLDERR_BPP: strcpy(szMsg, szBPPWarning); break;
+                   default: strcpy(szMsg, "");
+                }
+                if (strlen(szMsg))
+                    MessageBox(NULL, szMsg, "GLDirect", MB_OK | MB_ICONWARNING);
+			}
+            // Only need to try again if memory error
+            if (nContextError == GLDERR_MEM) {
+			    ddlogPrintf(DDLOG_WARN, "dglCreateContext failed 1st time with video memory");
+            }
+            else {
+			    ddlogPrintf(DDLOG_ERROR, "dglCreateContext failed");
+                return NULL;
+            }
+		}
+	}
+
+	// Now that we have a hWnd, we can intercept the WindowProc.
+    hWnd = lpCtx->hWnd;
+    if (hWnd) {
+		// Only hook individual window handler once if not hooked before.
+		lpfnWndProc = GetWindowLong(hWnd, GWL_WNDPROC);
+		if (lpfnWndProc != (LONG)dglWndProc) {
+			lpCtx->lpfnWndProc = lpfnWndProc;
+			SetWindowLong(hWnd, GWL_WNDPROC, (LONG)dglWndProc);
+			}
+        // Find the parent window of the app too.
+        if (glb.hWndActive == NULL) {
+            while (hWnd != NULL) {
+                glb.hWndActive = hWnd;
+                hWnd = GetParent(hWnd);
+            }
+            // Hook the parent window too.
+            lpfnWndProc = GetWindowLong(glb.hWndActive, GWL_WNDPROC);
+            if (glb.hWndActive == lpCtx->hWnd)
+                glb.lpfnWndProc = lpCtx->lpfnWndProc;
+            else if (lpfnWndProc != (LONG)dglWndProc)
+                glb.lpfnWndProc = lpfnWndProc;
+            if (glb.lpfnWndProc)
+                SetWindowLong(glb.hWndActive, GWL_WNDPROC, (LONG)dglWndProc);
+        }
+    }
+
+	ddlogPrintf(DDLOG_SYSTEM, "dglCreateContext succeeded for HGLRC=%d", (int)hGLRC);
+
+	return hGLRC;
+}
+
+// ***********************************************************************
+// Make a DirectGL context current
+// Used by wgl functions and dgl functions
+BOOL dglMakeCurrent(
+	HDC a,
+	HGLRC b)
+{
+	int context;
+	DGL_ctx* lpCtx;
+	HWND hWnd;
+	BOOL bNeedResize = FALSE;
+	BOOL bWindowChanged, bContextChanged;
+	LPDIRECTDRAWCLIPPER	lpddClipper;
+	DWORD dwThreadId = GetCurrentThreadId();
+	LONG lpfnWndProc;
+
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	// Is context state ready ?
+	if (!bContextReady)
+		return FALSE;
+
+	context = (int)b; // This is as a result of STRICT!
+	ddlogPrintf(DDLOG_SYSTEM, "dglMakeCurrent: HDC=%X, HGLRC=%d, ThreadId=%X", a, context, dwThreadId);
+
+	// If the HGLRC is NULL then make no context current;
+	// Ditto if the HDC is NULL either. (DaveM)
+	if (context == 0 || a == 0) {
+		// Corresponding Mesa operation
+#ifdef _USE_GLD3_WGL
+		_mesa_make_current(NULL, NULL);
+#else
+		(*mesaFuncs.gl_make_current)(NULL, NULL);
+#endif
+		dglSetCurrentContext(0);
+		return TRUE;
+	}
+
+	// Make sure the HGLRC is in range
+	if ((context > DGL_MAX_CONTEXTS) || (context < 0)) {
+		ddlogMessage(DDLOG_ERROR, "dglMakeCurrent: HGLRC out of range\n");
+		return FALSE;
+	}
+
+	// Find address of context and make sure that it has been allocated
+	lpCtx = dglGetContextAddress(b);
+	if (!lpCtx->bAllocated) {
+		ddlogMessage(DDLOG_ERROR, "dglMakeCurrent: Context not allocated\n");
+//		return FALSE;
+		return TRUE; // HACK: Shuts up "WebLab Viewer Pro". KeithH
+	}
+
+#ifdef GLD_THREADS
+	// Serialize access to DirectDraw or DDS operations
+	if (glb.bMultiThreaded)
+		EnterCriticalSection(&CriticalSection);
+#endif
+
+	// Check if window has changed
+	hWnd = (a != lpCtx->hDC) ? WindowFromDC(a) : lpCtx->hWnd;
+	bWindowChanged = (hWnd != lpCtx->hWnd) ? TRUE : FALSE;
+	bContextChanged = (b != dglGetCurrentContext()) ? TRUE : FALSE;
+
+	// If the window has changed, make sure the clipper is updated. (DaveM)
+	if (glb.bDirectDrawPersistant && !lpCtx->bFullscreen && (bWindowChanged || bContextChanged)) {
+		lpCtx->hWnd = hWnd;
+#ifndef _USE_GLD3_WGL
+		IDirectDrawSurface4_GetClipper(lpCtx->lpFront4, &lpddClipper);
+		IDirectDrawClipper_SetHWnd(lpddClipper, 0, lpCtx->hWnd);
+		IDirectDrawClipper_Release(lpddClipper);
+#endif // _USE_GLD3_WGL
+	}
+
+	// Make sure hDC and hWnd is current. (DaveM)
+	// Obtain the dimensions of the rendering window
+	lpCtx->hDC = a; // Cache DC
+	lpCtx->hWnd = hWnd;
+	hWndLastActive = hWnd;
+
+	// Check for non-window DC = memory DC ?
+	if (hWnd == NULL) {
+		if (GetClipBox(a, &lpCtx->rcScreenRect) == ERROR) {
+			ddlogMessage(DDLOG_WARN, "GetClipBox failed in dglMakeCurrent\n");
+			SetRect(&lpCtx->rcScreenRect, 0, 0, 0, 0);
+		}
+	}
+	else if (!GetClientRect(lpCtx->hWnd, &lpCtx->rcScreenRect)) {
+		ddlogMessage(DDLOG_WARN, "GetClientRect failed in dglMakeCurrent\n");
+		SetRect(&lpCtx->rcScreenRect, 0, 0, 0, 0);
+	}
+	// Check if buffers need to be re-sized;
+	// If so, wait until Mesa GL stuff is setup before re-sizing;
+	if (lpCtx->dwWidth != lpCtx->rcScreenRect.right - lpCtx->rcScreenRect.left ||
+		lpCtx->dwHeight != lpCtx->rcScreenRect.bottom - lpCtx->rcScreenRect.top)
+		bNeedResize = TRUE;
+
+	// Now we can update our globals
+	dglSetCurrentContext(b);
+
+	// Corresponding Mesa operation
+#ifdef _USE_GLD3_WGL
+	_mesa_make_current(lpCtx->glCtx, lpCtx->glBuffer);
+	lpCtx->glCtx->Driver.UpdateState(lpCtx->glCtx, _NEW_ALL);
+	if (bNeedResize) {
+		// Resize buffers (Note Mesa GL needs to be setup beforehand);
+		// Resize Mesa internal buffer too via glViewport() command,
+		// which subsequently calls dglWglResizeBuffers() too.
+		lpCtx->glCtx->Driver.Viewport(lpCtx->glCtx, 0, 0, lpCtx->dwWidth, lpCtx->dwHeight);
+		lpCtx->bHasBeenCurrent = TRUE;
+	}
+#else
+	(*mesaFuncs.gl_make_current)(lpCtx->glCtx, lpCtx->glBuffer);
+
+	dglSetupDDPointers(lpCtx->glCtx);
+
+	// Insure DirectDraw surfaces fit current window DC
+	if (bNeedResize) {
+		// Resize buffers (Note Mesa GL needs to be setup beforehand);
+		// Resize Mesa internal buffer too via glViewport() command,
+		// which subsequently calls dglWglResizeBuffers() too.
+		(*mesaFuncs.gl_Viewport)(lpCtx->glCtx, 0, 0, lpCtx->dwWidth, lpCtx->dwHeight);
+		lpCtx->bHasBeenCurrent = TRUE;
+	}
+#endif // _USE_GLD3_WGL
+	ddlogPrintf(DDLOG_SYSTEM, "dglMakeCurrent: width = %d, height = %d", lpCtx->dwWidth, lpCtx->dwHeight);
+
+	// We have to clear D3D back buffer and render state if emulated front buffering
+	// for different window (but not context) like in Solid Edge.
+	if (glb.bDirectDrawPersistant && glb.bPersistantBuffers
+		&& (bWindowChanged /* || bContextChanged */) && lpCtx->EmulateSingle) {
+#ifdef _USE_GLD3_WGL
+//		IDirect3DDevice8_EndScene(lpCtx->pDev);
+//		lpCtx->bSceneStarted = FALSE;
+		lpCtx->glCtx->Driver.Clear(lpCtx->glCtx, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT,
+			GL_TRUE, 0, 0, lpCtx->dwWidth, lpCtx->dwHeight);
+#else
+		IDirect3DDevice3_EndScene(lpCtx->lpDev3);
+		lpCtx->bSceneStarted = FALSE;
+		dglClearD3D(lpCtx->glCtx, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT,
+			GL_TRUE, 0, 0, lpCtx->dwWidth, lpCtx->dwHeight);
+#endif // _USE_GLD3_WGL
+	}
+
+	// The first time we call MakeCurrent we set the initial viewport size
+	if (lpCtx->bHasBeenCurrent == FALSE)
+#ifdef _USE_GLD3_WGL
+		lpCtx->glCtx->Driver.Viewport(lpCtx->glCtx, 0, 0, lpCtx->dwWidth, lpCtx->dwHeight);
+#else
+		(*mesaFuncs.gl_Viewport)(lpCtx->glCtx, 0, 0, lpCtx->dwWidth, lpCtx->dwHeight);
+#endif // _USE_GLD3_WGL
+	lpCtx->bHasBeenCurrent = TRUE;
+
+#ifdef GLD_THREADS
+	// Release serialized access
+	if (glb.bMultiThreaded)
+		LeaveCriticalSection(&CriticalSection);
+#endif
+
+	return TRUE;
+}
+
+// ***********************************************************************
+
+BOOL dglDeleteContext(
+	HGLRC a)
+{
+	DGL_ctx* lpCtx;
+	DWORD dwThreadId = GetCurrentThreadId();
+    char argstr[256];
+
+#if 0	// We have enough trouble throwing exceptions as it is... (DaveM)
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+#endif
+
+	// Is context state ready ?
+	if (!bContextReady)
+		return FALSE;
+
+	ddlogPrintf(DDLOG_SYSTEM, "dglDeleteContext: Deleting context HGLRC=%d, ThreadId=%X", (int)a, dwThreadId);
+
+	// Make sure the HGLRC is in range
+	if (((int) a> DGL_MAX_CONTEXTS) || ((int)a < 0)) {
+		ddlogMessage(DDLOG_ERROR, "dglDeleteCurrent: HGLRC out of range\n");
+		return FALSE;
+	}
+
+	// Make sure context is valid
+	lpCtx = dglGetContextAddress(a);
+	if (!lpCtx->bAllocated) {
+		ddlogPrintf(DDLOG_WARN, "Tried to delete unallocated context HGLRC=%d", (int)a);
+//		return FALSE;
+		return TRUE; // HACK: Shuts up "WebLab Viewer Pro". KeithH
+	}
+
+	// Make sure context is de-activated
+	if (a == dglGetCurrentContext()) {
+		ddlogPrintf(DDLOG_WARN, "dglDeleteContext: context HGLRC=%d still active", (int)a);
+		dglMakeCurrent(NULL, NULL);
+	}
+
+#ifdef GLD_THREADS
+	// Serialize access to DirectDraw or DDS operations
+	if (glb.bMultiThreaded)
+		EnterCriticalSection(&CriticalSection);
+#endif
+
+	// We are about to destroy all Direct3D objects.
+	// Therefore we must disable rendering
+	lpCtx->bCanRender = FALSE;
+
+	// This exception handler was installed to catch some
+	// particularly nasty apps. Console apps that call exit()
+	// fall into this catagory (i.e. Win32 Glut).
+
+    // VC cannot successfully implement multiple exception handlers
+    // if more than one exception occurs. Therefore reverting back to
+    // single exception handler as Keith originally had it. (DaveM)
+
+#define WARN_MESSAGE(p) strcpy(argstr, (#p));
+#define SAFE_RELEASE(p) WARN_MESSAGE(p); RELEASE(p);
+
+__try {
+#ifdef _USE_GLD3_WGL
+    WARN_MESSAGE(gl_destroy_framebuffer);
+	if (lpCtx->glBuffer)
+		_mesa_destroy_framebuffer(lpCtx->glBuffer);
+    WARN_MESSAGE(gl_destroy_context);
+	if (lpCtx->glCtx)
+		_mesa_destroy_context(lpCtx->glCtx);
+    WARN_MESSAGE(gl_destroy_visual);
+	if (lpCtx->glVis)
+		_mesa_destroy_visual(lpCtx->glVis);
+
+	_gldDriver.DestroyDrawable(lpCtx);
+#else
+	// Destroy the Mesa context
+    WARN_MESSAGE(gl_destroy_framebuffer);
+	if (lpCtx->glBuffer)
+		(*mesaFuncs.gl_destroy_framebuffer)(lpCtx->glBuffer);
+    WARN_MESSAGE(gl_destroy_context);
+	if (lpCtx->glCtx)
+		(*mesaFuncs.gl_destroy_context)(lpCtx->glCtx);
+    WARN_MESSAGE(gl_destroy_visual);
+	if (lpCtx->glVis)
+		(*mesaFuncs.gl_destroy_visual)(lpCtx->glVis);
+
+	SAFE_RELEASE(lpCtx->m_pvbuf); // release D3D vertex buffer
+	SAFE_RELEASE(lpCtx->m_vbuf); // release D3D vertex buffer
+
+	// Delete the global palette
+	SAFE_RELEASE(lpCtx->lpGlobalPalette);
+
+	// Clean up.
+	if (lpCtx->lpViewport3) {
+		if (lpCtx->lpDev3) IDirect3DDevice3_DeleteViewport(lpCtx->lpDev3, lpCtx->lpViewport3);
+		SAFE_RELEASE(lpCtx->lpViewport3);
+		lpCtx->lpViewport3 = NULL;
+	}
+
+	SAFE_RELEASE(lpCtx->lpDev3);
+	if (lpCtx->lpDepth4) {
+		if (lpCtx->lpBack4)
+			IDirectDrawSurface4_DeleteAttachedSurface(lpCtx->lpBack4, 0L, lpCtx->lpDepth4);
+		else
+			IDirectDrawSurface4_DeleteAttachedSurface(lpCtx->lpFront4, 0L, lpCtx->lpDepth4);
+		SAFE_RELEASE(lpCtx->lpDepth4);
+		lpCtx->lpDepth4 = NULL;
+	}
+	SAFE_RELEASE(lpCtx->lpBack4);
+	SAFE_RELEASE(lpCtx->lpFront4);
+	if (lpCtx->bFullscreen) {
+		IDirectDraw4_RestoreDisplayMode(lpCtx->lpDD4);
+		IDirectDraw4_SetCooperativeLevel(lpCtx->lpDD4, NULL, DDSCL_NORMAL);
+	}
+	SAFE_RELEASE(lpCtx->lpD3D3);
+	SAFE_RELEASE(lpCtx->lpDD4);
+	SAFE_RELEASE(lpCtx->lpDD1);
+#endif // _ULSE_GLD3_WGL
+
+}
+__except(EXCEPTION_EXECUTE_HANDLER) {
+    ddlogPrintf(DDLOG_WARN, "Exception raised in dglDeleteContext: %s", argstr);
+}
+
+	// Restore the window message handler because this context may be used
+	// again by another window with a *different* message handler. (DaveM)
+	if (lpCtx->lpfnWndProc) {
+		SetWindowLong(lpCtx->hWnd, GWL_WNDPROC, (LONG)lpCtx->lpfnWndProc);
+		lpCtx->lpfnWndProc = (LONG)NULL;
+		}
+
+	lpCtx->bAllocated = FALSE; // This context is now free for use
+
+#ifdef GLD_THREADS
+	// Release serialized access
+	if (glb.bMultiThreaded)
+		LeaveCriticalSection(&CriticalSection);
+#endif
+
+	return TRUE;
+}
+
+// ***********************************************************************
+
+BOOL dglSwapBuffers(
+	HDC hDC)
+{
+	RECT		rSrcRect;	// Source rectangle
+	RECT		rDstRect;	// Destination rectangle
+	POINT		pt;
+	HRESULT		hResult;
+
+	DDBLTFX		bltFX;
+	DWORD		dwBlitFlags;
+	DDBLTFX		*lpBltFX;
+
+//	DWORD		dwThreadId = GetCurrentThreadId();
+	HGLRC		hGLRC = dglGetCurrentContext();
+	DGL_ctx		*lpCtx = dglGetContextAddress(hGLRC);
+	HWND		hWnd;
+
+	HDC 		hDCAux;		// for memory DC
+	int 		x,y,w,h;	// for memory DC BitBlt
+
+#if 0	// Perhaps not a good idea. Called too often. KH
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+#endif
+
+	if (!lpCtx) {
+		return TRUE; //FALSE; // No current context
+	}
+
+	if (!lpCtx->bCanRender) {
+		// Don't return false else some apps will bail.
+		return TRUE;
+	}
+
+	hWnd = lpCtx->hWnd;
+	if (hDC != lpCtx->hDC) {
+		ddlogPrintf(DDLOG_WARN, "dglSwapBuffers: HDC=%X does not match HDC=%X for HGLRC=%d", hDC, lpCtx->hDC, hGLRC);
+		hWnd = WindowFromDC(hDC);
+	}
+
+#ifndef _USE_GLD3_WGL
+	// Ensure that the surfaces exist before we tell
+	// the device to render to them.
+	IDirectDraw4_RestoreAllSurfaces(lpCtx->lpDD4);
+
+	// Make sure that the vertex caches have been emptied
+//	dglStateChange(lpCtx);
+
+	// Some OpenGL programs don't issue a glFinish - check for it here.
+	if (lpCtx->bSceneStarted) {
+		IDirect3DDevice3_EndScene(lpCtx->lpDev3);
+		lpCtx->bSceneStarted = FALSE;
+	}
+#endif
+
+#if 0
+	// If the calling app is not active then we don't need to Blit/Flip.
+	// We can therefore simply return TRUE.
+	if (!glb.bAppActive)
+		return TRUE;
+	// Addendum: This is WRONG! We should bail if the app is *minimized*,
+	//           not merely if the app is just plain 'not active'.
+	//           KeithH, 27/May/2000.
+#endif
+
+	// Check for non-window DC = memory DC ?
+	if (hWnd == NULL) {
+		if (GetClipBox(hDC, &rSrcRect) == ERROR)
+			return TRUE;
+		// Use GDI BitBlt instead from compatible DirectDraw DC
+		x = rSrcRect.left;
+		y = rSrcRect.top;
+		w = rSrcRect.right - rSrcRect.left;
+		h = rSrcRect.bottom - rSrcRect.top;
+
+		// Ack. DX8 does not have a GetDC() function...
+                // TODO: Defer to DX7 or DX9 drivers... (DaveM)
+		return TRUE;
+	}
+
+	// Bail if window client region is not drawable, like in Solid Edge
+	if (!IsWindow(hWnd) /* || !IsWindowVisible(hWnd) */ || !GetClientRect(hWnd, &rSrcRect))
+		return TRUE;
+
+#ifdef GLD_THREADS
+	// Serialize access to DirectDraw or DDS operations
+	if (glb.bMultiThreaded)
+		EnterCriticalSection(&CriticalSection);
+#endif
+
+#ifdef _USE_GLD3_WGL
+	// Notify Mesa of impending swap, so Mesa can flush internal buffers.
+	_mesa_notifySwapBuffers(lpCtx->glCtx);
+	// Now perform driver buffer swap
+	_gldDriver.SwapBuffers(lpCtx, hDC, hWnd);
+#else
+	if (lpCtx->bFullscreen) {
+		// Sync with retrace if required
+		if (glb.bWaitForRetrace) {
+			IDirectDraw4_WaitForVerticalBlank(
+				lpCtx->lpDD4,
+				DDWAITVB_BLOCKBEGIN,
+				0);
+		}
+
+		// Perform the fullscreen flip
+		TRY(IDirectDrawSurface4_Flip(
+			lpCtx->lpFront4,
+			NULL,
+			DDFLIP_WAIT),
+			"dglSwapBuffers: Flip");
+	} else {
+		// Calculate current window position and size
+		pt.x = pt.y = 0;
+		ClientToScreen(hWnd, &pt);
+		GetClientRect(hWnd, &rDstRect);
+		if (rDstRect.right > lpCtx->dwModeWidth)
+			rDstRect.right = lpCtx->dwModeWidth;
+		if (rDstRect.bottom > lpCtx->dwModeHeight)
+			rDstRect.bottom = lpCtx->dwModeHeight;
+		OffsetRect(&rDstRect, pt.x, pt.y);
+		rSrcRect.left = rSrcRect.top = 0;
+		rSrcRect.right = lpCtx->dwWidth;
+		rSrcRect.bottom = lpCtx->dwHeight;
+		if (rSrcRect.right > lpCtx->dwModeWidth)
+			rSrcRect.right = lpCtx->dwModeWidth;
+		if (rSrcRect.bottom > lpCtx->dwModeHeight)
+			rSrcRect.bottom = lpCtx->dwModeHeight;
+
+		if (glb.bWaitForRetrace) {
+			// Sync the blit to the vertical retrace
+			ZeroMemory(&bltFX, sizeof(bltFX));
+			bltFX.dwSize = sizeof(bltFX);
+			bltFX.dwDDFX = DDBLTFX_NOTEARING;
+			dwBlitFlags = DDBLT_WAIT | DDBLT_DDFX;
+			lpBltFX = &bltFX;
+		} else {
+			dwBlitFlags = DDBLT_WAIT;
+			lpBltFX = NULL;
+		}
+
+		// Perform the actual blit
+		TRY(IDirectDrawSurface4_Blt(
+			lpCtx->lpFront4,
+			&rDstRect,
+			lpCtx->lpBack4, // Blit source
+			&rSrcRect,
+			dwBlitFlags,
+			lpBltFX),
+			"dglSwapBuffers: Blt");
+	}
+#endif // _USE_GLD3_WGL
+
+#ifdef GLD_THREADS
+	// Release serialized access
+	if (glb.bMultiThreaded)
+		LeaveCriticalSection(&CriticalSection);
+#endif
+
+    // TODO: Re-instate rendering bitmap snapshot feature??? (DaveM)
+
+	// Render frame is completed
+	ValidateRect(hWnd, NULL);
+	lpCtx->bFrameStarted = FALSE;
+
+	return TRUE;
+}
+
+// ***********************************************************************
diff --git a/src/mesa/drivers/windows/gldirect/dglcontext.h b/src/mesa/drivers/windows/gldirect/dglcontext.h
new file mode 100644
index 0000000000..5c433b857e
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglcontext.h
@@ -0,0 +1,281 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  OpenGL context handling.
+*
+****************************************************************************/
+
+#ifndef __DGLCONTEXT_H
+#define __DGLCONTEXT_H
+
+// Disable compiler complaints about DLL linkage
+#pragma warning (disable:4273)
+
+// Macros to control compilation
+#ifndef STRICT
+#define STRICT
+#endif // STRICT
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <GL\gl.h>
+
+#ifdef _USE_GLD3_WGL
+	#include "dglmacros.h"
+	#include "dglglobals.h"
+	#include "pixpack.h"
+	#include "ddlog.h"
+	#include "dglpf.h"
+	#include "context.h"	// Mesa context
+#else
+	#include <ddraw.h>
+	#include <d3d.h>
+
+	#include "dglmacros.h"
+	#include "dglglobals.h"
+	#include "pixpack.h"
+	#include "ddlog.h"
+	#include "dglpf.h"
+	#include "d3dvertex.h"
+
+	#include "DirectGL.h"
+
+	#include "context.h"	// Mesa context
+	#include "vb.h"			// Mesa vertex buffer
+#endif // _USE_GLD3_WGL
+
+/*---------------------- Macros and type definitions ----------------------*/
+
+// TODO: Use a list instead of this constant!
+#define DGL_MAX_CONTEXTS 32
+
+// Structure for describing an OpenGL context
+#ifdef _USE_GLD3_WGL
+typedef struct {
+	BOOL				bHasBeenCurrent;
+	DGL_pixelFormat		*lpPF;
+
+	// Pointer to private driver data (this also contains the drawable).
+	void				*glPriv;
+
+	// Mesa vars:
+	GLcontext			*glCtx;			// The core Mesa context
+	GLvisual			*glVis;			// Describes the color buffer
+	GLframebuffer		*glBuffer;		// Ancillary buffers
+
+	GLuint				ClearIndex;
+	GLuint				CurrentIndex;
+	GLubyte				ClearColor[4];
+	GLubyte				CurrentColor[4];
+
+	BOOL				EmulateSingle;	// Emulate single-buffering
+	BOOL				bDoubleBuffer;
+	BOOL				bDepthBuffer;
+
+	// Shared driver vars:
+	BOOL				bAllocated;
+    BOOL				bFullscreen;	// Is this a fullscreen context?
+    BOOL				bSceneStarted;	// Has a lpDev->BeginScene been issued?
+    BOOL				bCanRender;		// Flag: states whether rendering is OK
+	BOOL				bFrameStarted;	// Has frame update started at all?
+	BOOL				bStencil;		// TRUE if this context has stencil
+	BOOL				bGDIEraseBkgnd; // GDI Erase Background command
+
+	// Window information
+	HWND				hWnd;			// Window handle
+	HDC					hDC;			// Windows' Device Context of the window
+	DWORD				dwWidth;		// Window width
+	DWORD				dwHeight;		// Window height
+	DWORD				dwBPP;			// Window bits-per-pixel;
+	RECT				rcScreenRect;	// Screen rectangle
+	DWORD				dwModeWidth;	// Display mode width
+	DWORD				dwModeHeight;	// Display mode height
+	float				dvClipX;
+	float				dvClipY;
+	LONG				lpfnWndProc;	// window message handler function
+
+} DGL_ctx;
+
+#define GLD_context			DGL_ctx
+#define GLD_GET_CONTEXT(c)	(GLD_context*)(c)->DriverCtx
+
+#else // _USE_GLD3_WGL
+
+typedef struct {
+	BOOL				bHasBeenCurrent;
+	DGL_pixelFormat		*lpPF;
+	//
+	// Mesa context vars:
+	//
+	GLcontext			*glCtx;			// The core Mesa context
+	GLvisual			*glVis;			// Describes the color buffer
+	GLframebuffer		*glBuffer;		// Ancillary buffers
+
+	GLuint				ClearIndex;
+	GLuint				CurrentIndex;
+	GLubyte				ClearColor[4];
+	GLubyte				CurrentColor[4];
+
+	BOOL				EmulateSingle;	// Emulate single-buffering
+	BOOL				bDoubleBuffer;
+	BOOL				bDepthBuffer;
+	int					iZBufferPF;		// Index of Zbuffer pixel format
+
+	// Vertex buffer: one-to-one correlation with Mesa's vertex buffer.
+	// This will be filled by our setup function (see d3dvsetup.c)
+	DGL_TLvertex		gWin[VB_SIZE];	// Transformed and lit vertices
+//	DGL_Lvertex			gObj[VB_SIZE];	// Lit vertices in object coordinates.
+
+	// Indices for DrawIndexedPrimitive.
+	// Clipped quads are drawn seperately, so use VB_SIZE.
+	// 6 indices are needed to make 2 triangles for each possible quad
+//	WORD				wIndices[(VB_SIZE / 4) * 6];
+	WORD				wIndices[32768];
+
+	//
+	// Device driver vars:
+	//
+	BOOL				bAllocated;
+    BOOL				bFullscreen;	// Is this a fullscreen context?
+    BOOL				bSceneStarted;	// Has a lpDev->BeginScene been issued?
+    BOOL				bCanRender;		// Flag: states whether rendering is OK
+	BOOL				bFrameStarted;	// Has frame update started at all?
+
+    // DirectX COM interfaces, postfixed with the interface number
+	IDirectDraw				*lpDD1;
+	IDirectDraw4			*lpDD4;
+	IDirect3D3				*lpD3D3;
+	IDirect3DDevice3		*lpDev3;
+	IDirect3DViewport3		*lpViewport3;
+	IDirectDrawSurface4		*lpFront4;
+	IDirectDrawSurface4		*lpBack4;
+	IDirectDrawSurface4		*lpDepth4;
+
+	// Vertex buffers
+	BOOL					bD3DPipeline; // True if using D3D geometry pipeline
+	IDirect3DVertexBuffer	*m_vbuf;	// Unprocessed vertices
+	IDirect3DVertexBuffer	*m_pvbuf;	// Processed vertices ready to be rendered
+
+	D3DTEXTUREOP		ColorOp[MAX_TEXTURE_UNITS]; // Used for re-enabling texturing
+	D3DTEXTUREOP		AlphaOp[MAX_TEXTURE_UNITS]; // Used for re-enabling texturing
+	struct gl_texture_object *tObj[MAX_TEXTURE_UNITS];
+
+	DDCAPS				ddCaps;			// DirectDraw caps
+	D3DDEVICEDESC		D3DDevDesc;		// Direct3D Device description
+
+	DDPIXELFORMAT		ddpfRender;		// Pixel format of the render buffer
+	DDPIXELFORMAT		ddpfDepth;		// Pixel format of the depth buffer
+
+	BOOL				bStencil;		// TRUE is this context has stencil
+
+	PX_packFunc			fnPackFunc;		// Pixel packing function for SW
+	PX_unpackFunc		fnUnpackFunc;	// Pixel unpacking function for SW
+	PX_packSpanFunc		fnPackSpanFunc;	// Pixel span packer
+
+	D3DVIEWPORT2		d3dViewport;	// D3D Viewport object
+
+	D3DCULL				cullmode;		// Direct3D cull mode
+	D3DCOLOR			curcolor;		// Current color
+	DWORD				dwColorPF;		// Current color, in format of target surface
+	D3DCOLOR			d3dClearColor;	// Clear color
+	D3DCOLOR			ConstantColor;	// For flat shading
+	DWORD				dwClearColorPF;	// Clear color, in format of target surface
+	BOOL				bGDIEraseBkgnd; // GDI Erase Background command
+
+	// Primitive caches
+//	DGL_vertex			LineCache[DGL_MAX_LINE_VERTS];
+//	DGL_vertex			TriCache[DGL_MAX_TRI_VERTS];
+//	DWORD				dwNextLineVert;
+//	DWORD				dwNextTriVert;
+
+	// Window information
+	HWND				hWnd;			// Window handle
+	HDC					hDC;			// Windows' Device Context of the window
+	DWORD				dwWidth;		// Window width
+	DWORD				dwHeight;		// Window height
+	DWORD				dwBPP;			// Window bits-per-pixel;
+	RECT				rcScreenRect;	// Screen rectangle
+	DWORD				dwModeWidth;	// Display mode width
+	DWORD				dwModeHeight;	// Display mode height
+	float				dvClipX;
+	float				dvClipY;
+	LONG				lpfnWndProc;	// window message handler function
+
+	// Shared texture palette
+	IDirectDrawPalette	*lpGlobalPalette;
+
+	// Usage counters.
+	// One of these counters will be incremented when we choose
+	// between hardware and software rendering functions.
+//	DWORD				dwHWUsageCount;	// Hardware usage count
+//	DWORD				dwSWUsageCount;	// Software usage count
+
+	// Texture state flags.
+//	BOOL				m_texturing;		// TRUE is texturing
+//	BOOL				m_mtex;				// TRUE if multitexture
+//	BOOL				m_texHandleValid;	// TRUE if tex state valid
+
+	// Renderstate caches to ensure no redundant state changes
+	DWORD				dwRS[256];		// Renderstates
+	DWORD				dwTSS[2][24];	// Texture-stage states
+	LPDIRECT3DTEXTURE2	lpTex[2];		// Texture (1 per stage)
+
+	DWORD				dwMaxTextureSize;	// Max texture size:
+											// clamped to 1024.
+
+} DGL_ctx;
+#endif // _USE_GLD3_WGL
+
+/*------------------------- Function Prototypes ---------------------------*/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+HHOOK	hKeyHook;
+LRESULT CALLBACK dglKeyProc(int code,WPARAM wParam,LPARAM lParam);
+
+void		dglInitContextState();
+void		dglDeleteContextState();
+BOOL 		dglIsValidContext(HGLRC a);
+DGL_ctx*	dglGetContextAddress(const HGLRC a);
+HDC 		dglGetCurrentDC(void);
+HGLRC 		dglGetCurrentContext(void);
+HGLRC		dglCreateContext(HDC a, const DGL_pixelFormat *lpPF);
+BOOL		dglMakeCurrent(HDC a, HGLRC b);
+BOOL		dglDeleteContext(HGLRC a);
+BOOL		dglSwapBuffers(HDC hDC);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dglglobals.c b/src/mesa/drivers/windows/gldirect/dglglobals.c
new file mode 100644
index 0000000000..c633e3bcfa
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglglobals.c
@@ -0,0 +1,149 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Global variables.
+*
+****************************************************************************/
+
+#include "dglglobals.h"
+
+// =======================================================================
+// Global Variables
+// =======================================================================
+
+char szCopyright[]	= "Copyright (c) 1998 SciTech Software, Inc.";
+char szDllName[]	= "Scitech GLDirect";
+char szErrorTitle[]	= "GLDirect Error";
+
+DGL_globals glb;
+
+// Shared result variable
+HRESULT hResult;
+
+// ***********************************************************************
+
+// Patch function for missing function in Mesa
+int finite(
+	double x)
+{
+	return _finite(x);
+};
+
+// ***********************************************************************
+
+void dglInitGlobals()
+{
+    // Zero all fields just in case
+    memset(&glb, 0, sizeof(glb));
+
+	// Set the global defaults
+	glb.bPrimary			= FALSE;		// Not the primary device
+	glb.bHardware			= FALSE;		// Not a hardware device
+//	glb.bFullscreen			= FALSE;		// Not running fullscreen
+	glb.bSquareTextures		= FALSE;		// Device does not need sq
+	glb.bPAL8				= FALSE;		// Device cannot do 8bit
+	glb.dwMemoryType		= DDSCAPS_SYSTEMMEMORY;
+	glb.dwRendering			= DGL_RENDER_D3D;
+
+	glb.bWaitForRetrace		= TRUE;			// Sync to vertical retrace
+	glb.bFullscreenBlit		= FALSE;
+
+	glb.nPixelFormatCount	= 0;
+	glb.lpPF				= NULL;			// Pixel format list
+#ifndef _USE_GLD3_WGL
+	glb.nZBufferPFCount		= 0;
+	glb.lpZBufferPF			= NULL;
+	glb.nDisplayModeCount	= 0;
+	glb.lpDisplayModes		= NULL;
+	glb.nTextureFormatCount	= 0;
+	glb.lpTextureFormat		= NULL;
+#endif // _USE_GLD3_WGL
+
+	glb.wMaxSimultaneousTextures = 1;
+
+	// Enable support for multitexture, if available.
+	glb.bMultitexture		= TRUE;
+
+	// Enable support for mipmapping
+	glb.bUseMipmaps			= TRUE;
+
+	// Alpha emulation via chroma key
+	glb.bEmulateAlphaTest	= FALSE;
+
+	// Use Mesa pipeline always (for compatibility)
+	glb.bForceMesaPipeline	= FALSE;
+
+	// Init support for multiple GLRCs
+	glb.bDirectDraw			= FALSE;
+	glb.bDirectDrawPrimary	= FALSE;
+	glb.bDirect3D			= FALSE;
+	glb.bDirect3DDevice		= FALSE;
+	glb.bDirectDrawStereo	= FALSE;
+	glb.iDirectDrawStereo	= 0;
+	glb.hWndActive			= NULL;
+	// Init DirectX COM interfaces for multiple GLRCs
+//	glb.lpDD4				= NULL;
+//	glb.lpPrimary4			= NULL;
+//	glb.lpBack4				= NULL;
+//	glb.lpDepth4			= NULL;
+//	glb.lpGlobalPalette		= NULL;
+
+	// Init special support options
+	glb.bMessageBoxWarnings = TRUE;
+	glb.bDirectDrawPersistant = FALSE;
+	glb.bPersistantBuffers	= FALSE;
+
+	// Do not assume single-precision-only FPU (for compatibility)
+	glb.bFastFPU			= FALSE;
+
+	// Allow hot-key support
+	glb.bHotKeySupport		= TRUE;
+
+	// Default to single-threaded support (for simplicity)
+	glb.bMultiThreaded		= FALSE;
+
+	// Use application-specific customizations (for end-user convenience)
+	glb.bAppCustomizations	= TRUE;
+
+#ifdef _USE_GLD3_WGL
+	// Registry/ini-file settings for GLDirect 3.x
+	glb.dwAdapter				= 0;	// Primary DX8 adapter
+	glb.dwTnL					= 1;	// MesaSW TnL
+	glb.dwMultisample			= 0;	// Multisample Off
+	glb.dwDriver				= 2;	// Direct3D HW
+
+	// Signal a pixelformat list rebuild
+	glb.bPixelformatsDirty		= TRUE;
+#endif
+}
+
+// ***********************************************************************
diff --git a/src/mesa/drivers/windows/gldirect/dglglobals.h b/src/mesa/drivers/windows/gldirect/dglglobals.h
new file mode 100644
index 0000000000..995f1cd5e4
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglglobals.h
@@ -0,0 +1,198 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Globals.
+*
+****************************************************************************/
+
+#ifndef __DGLGLOBALS_H
+#define __DGLGLOBALS_H
+
+#include "dglcontext.h"
+#include "dglpf.h"		// Pixel format
+#ifndef _USE_GLD3_WGL
+#include "d3dtexture.h"
+#endif
+
+/*---------------------- Macros and type definitions ----------------------*/
+
+typedef enum {
+	DGL_RENDER_MESASW		= 0,
+	DGL_RENDER_D3D			= 1,
+	DGL_RENDER_FORCE_DWORD	= 0x7ffffff,
+} DGL_renderType;
+
+#ifdef _USE_GLD3_WGL
+
+// Same as DGL_renderType? KeithH
+typedef enum {
+	GLDS_DRIVER_MESA_SW			= 0,	// Mesa SW rendering
+	GLDS_DRIVER_REF				= 1,	// Direct3D Reference Rasteriser
+	GLDS_DRIVER_HAL				= 2,	// Direct3D HW rendering
+} GLDS_DRIVER;
+
+typedef enum {
+	GLDS_TNL_DEFAULT			= 0,	// Choose best TnL method
+	GLDS_TNL_MESA				= 1,	// Mesa TnL
+	GLDS_TNL_D3DSW				= 2,	// D3D Software TnL
+	GLDS_TNL_D3DHW				= 3,	// D3D Hardware TnL
+} GLDS_TNL;
+
+typedef enum {
+	GLDS_MULTISAMPLE_NONE		= 0,
+	GLDS_MULTISAMPLE_FASTEST	= 1,
+	GLDS_MULTISAMPLE_NICEST		= 2,
+} GLDS_MULTISAMPLE;
+#endif
+
+typedef struct {
+	// Registry settings
+	char				szDDName[MAX_DDDEVICEID_STRING]; // DirectDraw device name
+	char				szD3DName[MAX_DDDEVICEID_STRING]; // Direct3D driver name
+	BOOL				bPrimary; // Is ddraw device the Primary device?
+	BOOL				bHardware; // Is the d3d driver a Hardware driver?
+#ifndef _USE_GLD3_WGL
+	GUID				ddGuid; // GUID of the ddraw device 
+	GUID				d3dGuid; // GUID of the direct3d driver
+#endif // _USE_GLD3_WGL
+//	BOOL				bFullscreen; // Force fullscreen - only useful for primary adaptors.
+	BOOL				bSquareTextures; // Does this driver require square textures?
+	DWORD               dwRendering; // Type of rendering required
+
+	BOOL				bWaitForRetrace; // Sync to vertical retrace
+	BOOL				bFullscreenBlit; // Use Blt instead of Flip in fullscreen modes
+
+	// Multitexture
+	BOOL				bMultitexture;
+
+	BOOL				bUseMipmaps;
+
+	DWORD				dwMemoryType; // Sysmem or Vidmem
+
+	// Global palette
+	BOOL				bPAL8;
+	DDPIXELFORMAT		ddpfPAL8;
+
+	// Multitexture
+	WORD				wMaxSimultaneousTextures;
+
+	// Win32 internals
+	BOOL				bAppActive; // Keep track of Alt-Tab
+	LONG				lpfnWndProc; // WndProc of calling app
+
+	// Pixel Format Descriptior list.
+	int					nPixelFormatCount;
+	DGL_pixelFormat		*lpPF;
+#ifndef _USE_GLD3_WGL
+	// ZBuffer pixel formats
+	int					nZBufferPFCount; // Count of Zbuffer pixel formats
+	DDPIXELFORMAT		*lpZBufferPF; // ZBuffer pixel formats
+
+	// Display modes (for secondary devices)
+	int					nDisplayModeCount;
+	DDSURFACEDESC2		*lpDisplayModes;
+
+	// Texture pixel formats
+	int					nTextureFormatCount;
+	DGL_textureFormat	*lpTextureFormat;
+#endif // _USE_GLD3_WGL
+	// Alpha emulation via chroma key
+	BOOL				bEmulateAlphaTest;
+
+	// Geom pipeline override.
+	// If this is set TRUE then the D3D pipeline will never be used,
+	// and the Mesa pipline will always be used.
+	BOOL				bForceMesaPipeline;
+
+#ifdef _USE_GLD3_WGL
+	BOOL				bPixelformatsDirty;	// Signal a list rebuild
+#endif
+
+	// Additional globals to support multiple GL rendering contexts, GLRCs
+	BOOL				bDirectDraw;			// DirectDraw interface exists ?
+	BOOL				bDirectDrawPrimary;		// DirectDraw primary surface exists ?
+	BOOL				bDirect3D;				// Direct3D interface exists ?
+	BOOL				bDirect3DDevice;		// Direct3D device exists ?
+
+	BOOL 				bDirectDrawStereo;		// DirectDraw Stereo driver started ?
+	int 				iDirectDrawStereo;		// DirectDraw Stereo driver reference count
+	HWND				hWndActive;				// copy of active window
+
+    // Copies of DirectX COM interfaces for re-referencing across multiple GLRCs
+//	IDirectDraw4			*lpDD4;				// copy of DirectDraw interface
+//	IDirectDrawSurface4		*lpPrimary4;		// copy of DirectDraw primary surface
+//	IDirectDrawSurface4		*lpBack4;
+//	IDirectDrawSurface4		*lpDepth4;
+//	IDirectDrawPalette		*lpGlobalPalette;
+
+	// Aids for heavy-duty MFC-windowed OGL apps, like AutoCAD
+	BOOL				bMessageBoxWarnings;	// popup message box warning
+	BOOL				bDirectDrawPersistant;  // DirectDraw is persisitant
+	BOOL				bPersistantBuffers;  	// DirectDraw buffers persisitant
+
+	// FPU setup option for CAD precision (AutoCAD) vs GAME speed (Quake)
+	BOOL				bFastFPU;				// single-precision-only FPU ?
+
+	// Hot-Key support, like for real-time stereo parallax adjustments
+	BOOL				bHotKeySupport;			// hot-key support ?
+
+	// Multi-threaded support, for apps like 3DStudio
+	BOOL				bMultiThreaded;			// multi-threaded ?
+
+	// Detect and use app-specific customizations for apps like 3DStudio
+	BOOL				bAppCustomizations;		// app customizations ?
+
+#ifdef _USE_GLD3_WGL
+	DWORD				dwAdapter;				// Primary DX8 adapter
+	DWORD				dwTnL;					// MesaSW TnL
+	DWORD				dwMultisample;			// Multisample Off
+	DWORD				dwDriver;				// Direct3D HW
+	void				*pDrvPrivate;			// Driver-specific data
+#endif
+
+} DGL_globals;
+
+/*------------------------- Function Prototypes ---------------------------*/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+DGL_globals	glb;
+
+void		dglInitGlobals();
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dglmacros.h b/src/mesa/drivers/windows/gldirect/dglmacros.h
new file mode 100644
index 0000000000..aed0f2110e
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglmacros.h
@@ -0,0 +1,91 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Useful generic macros.
+*
+****************************************************************************/
+
+#ifndef __DGLMACROS_H
+#define __DGLMACROS_H
+
+#include <ddraw.h>
+
+// Define the relevant RELEASE macro depending on C or C++
+#if !defined(__cplusplus) || defined(CINTERFACE)
+	// Standard C version
+	#define RELEASE(x) if (x!=NULL) { x->lpVtbl->Release(x); x=NULL; }
+#else
+	// C++ version
+	#define RELEASE(x) if (x!=NULL) { x->Release(); x=NULL; }
+#endif
+
+// We don't want a message *every* time we call an unsupported function
+#define UNSUPPORTED(x)												\
+	{																\
+		static BOOL bFirstTime = TRUE;								\
+		if (bFirstTime) {											\
+			bFirstTime = FALSE;										\
+			ddlogError(DDLOG_WARN, (x), DDERR_CURRENTLYNOTAVAIL);	\
+		}															\
+	}
+
+#define DGL_CHECK_CONTEXT		\
+	if (ctx == NULL) return;
+
+// Don't render if bCanRender is not TRUE.
+#define DGL_CHECK_RENDER		\
+	if (!dgl->bCanRender) return;
+
+#if 0
+#define TRY(a,b) (a)
+#define TRY_ERR(a,b) (a)
+#else
+// hResult should be defined in the function
+// Return codes should be checked via SUCCEDDED and FAILED macros
+#define TRY(a,b)									\
+	{												\
+		if (FAILED(hResult=(a)))					\
+			ddlogError(DDLOG_ERROR, (b), hResult);	\
+	}
+
+// hResult is a global
+// The label exit_with_error should be defined within the calling scope
+#define TRY_ERR(a,b)								\
+	{												\
+		if (FAILED(hResult=(a))) {					\
+			ddlogError(DDLOG_ERROR, (b), hResult);	\
+			goto exit_with_error;					\
+		}											\
+	}
+#endif // #if 1
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dglpf.c b/src/mesa/drivers/windows/gldirect/dglpf.c
new file mode 100644
index 0000000000..4cd4d0334a
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglpf.c
@@ -0,0 +1,620 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ========================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Pixel Formats.
+*
+****************************************************************************/
+
+#include "dglpf.h"
+
+#ifdef _USE_GLD3_WGL
+#include "gld_driver.h"
+#endif
+
+// ***********************************************************************
+
+char	szColorDepthWarning[] =
+"GLDirect does not support the current desktop\n\
+color depth.\n\n\
+You may need to change the display resolution to\n\
+16 bits per pixel or higher color depth using\n\
+the Windows Display Settings control panel\n\
+before running this OpenGL application.\n";
+
+// ***********************************************************************
+// This pixel format will be used as a template when compiling the list
+// of pixel formats supported by the hardware. Many fields will be
+// filled in at runtime.
+// PFD flag defaults are upgraded to match ChoosePixelFormat() -- DaveM
+DGL_pixelFormat pfTemplateHW =
+{
+    {
+	sizeof(PIXELFORMATDESCRIPTOR),	// Size of the data structure
+		1,							// Structure version - should be 1
+									// Flags:
+		PFD_DRAW_TO_WINDOW |		// The buffer can draw to a window or device surface.
+		PFD_DRAW_TO_BITMAP |		// The buffer can draw to a bitmap. (DaveM)
+		PFD_SUPPORT_GDI |			// The buffer supports GDI drawing. (DaveM)
+		PFD_SUPPORT_OPENGL |		// The buffer supports OpenGL drawing.
+		PFD_DOUBLEBUFFER |			// The buffer is double-buffered.
+		0,							// Placeholder for easy commenting of above flags
+		PFD_TYPE_RGBA,				// Pixel type RGBA.
+		16,							// Total colour bitplanes (excluding alpha bitplanes)
+		5, 0,						// Red bits, shift
+		5, 5,						// Green bits, shift
+		5, 10,						// Blue bits, shift
+		0, 0,						// Alpha bits, shift (destination alpha)
+		0,							// Accumulator bits (total)
+		0, 0, 0, 0,					// Accumulator bits: Red, Green, Blue, Alpha
+		0,							// Depth bits
+		0,							// Stencil bits
+		0,							// Number of auxiliary buffers
+		0,							// Layer type
+		0,							// Specifies the number of overlay and underlay planes.
+		0,							// Layer mask
+		0,							// Specifies the transparent color or index of an underlay plane.
+		0							// Damage mask
+	},
+	-1,	// No depth/stencil buffer
+};
+
+// ***********************************************************************
+// Return the count of the number of bits in a Bit Mask.
+int BitCount(
+	DWORD dw)
+{
+	int i;
+
+	if (dw == 0)
+		return 0;	// account for non-RGB mode
+
+	for (i=0; dw; dw=dw>>1)
+        i += (dw & 1);
+    return i;
+}
+
+// ***********************************************************************
+
+DWORD BitShift(
+	DWORD dwMaskIn)
+{
+	DWORD dwShift, dwMask;
+
+	if (dwMaskIn == 0)
+		return 0;	// account for non-RGB mode
+
+	for (dwShift=0, dwMask=dwMaskIn; !(dwMask&1); dwShift++, dwMask>>=1);
+
+    return dwShift;
+}
+
+// ***********************************************************************
+
+BOOL IsValidPFD(int iPFD)
+{
+	DGL_pixelFormat *lpPF;
+
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	if ((glb.lpPF == NULL) ||
+		(glb.nPixelFormatCount == 0))
+		return FALSE;
+
+	// Check PFD range
+	if ( (iPFD < 1) || (iPFD > glb.nPixelFormatCount) ) {
+		ddlogMessage(DDLOG_ERROR, "PFD out of range\n");
+		return FALSE; // PFD is invalid
+	}
+
+	// Make a pointer to the pixel format
+	lpPF = &glb.lpPF[iPFD-1];
+
+	// Check size
+	if (lpPF->pfd.nSize != sizeof(PIXELFORMATDESCRIPTOR)) {
+		ddlogMessage(DDLOG_ERROR, "Bad PFD size\n");
+		return FALSE; // PFD is invalid
+	}
+
+	// Check version
+	if (lpPF->pfd.nVersion != 1) {
+		ddlogMessage(DDLOG_ERROR, "PFD is not Version 1\n");
+		return FALSE; // PFD is invalid
+	}
+
+	return TRUE; // PFD is valid
+}
+
+// ***********************************************************************
+
+#ifndef _USE_GLD3_WGL
+
+int		iEnumCount;			// Enumeration count
+DWORD	dwDisplayBitDepth;	// Bit depth of current display mode
+
+// ***********************************************************************
+
+HRESULT WINAPI EnumDisplayModesCallback(
+	DDSURFACEDESC2* pddsd,
+	void *pvContext)
+{
+	DWORD			dwModeDepth;
+	DDSURFACEDESC2	*lpDisplayMode;
+	char			buf[32];
+
+    // Check parameters
+	if (pddsd == NULL)
+		return DDENUMRET_CANCEL;
+
+    dwModeDepth = pddsd->ddpfPixelFormat.dwRGBBitCount;
+	lpDisplayMode = (DDSURFACEDESC2 *)pvContext;
+
+	// Check mode for compatability with device.
+	if (dwModeDepth != dwDisplayBitDepth)
+		return DDENUMRET_OK;
+
+	if (lpDisplayMode != NULL) {
+		memcpy(&lpDisplayMode[iEnumCount], pddsd, sizeof(DDSURFACEDESC2));
+		sprintf(buf, TEXT("Mode: %ld x %ld x %ld\n"),
+				pddsd->dwWidth, pddsd->dwHeight, dwModeDepth);
+		ddlogMessage(DDLOG_INFO, buf);
+	}
+
+	iEnumCount++;
+
+	return DDENUMRET_OK;
+}
+
+// ***********************************************************************
+
+HRESULT CALLBACK d3dEnumZBufferFormatsCallback(
+	DDPIXELFORMAT* pddpf,
+	VOID* lpZBufferPF )
+{
+	char buf[64];
+
+	if(pddpf == NULL)
+		return D3DENUMRET_CANCEL;
+
+	if (pddpf->dwFlags & DDPF_ZBUFFER) {
+		if (lpZBufferPF == NULL) {
+			// Pass 1. Merely counting the PF
+			glb.nZBufferPFCount++;
+		} else {
+			// Pass 2. Save the PF
+			if (pddpf->dwFlags & DDPF_STENCILBUFFER) {
+				sprintf(buf, " %d: Z=%d S=%d\n",
+					iEnumCount,
+					pddpf->dwZBufferBitDepth,
+					pddpf->dwStencilBitDepth);
+			} else {
+				sprintf(buf, " %d: Z=%d S=0\n",
+					iEnumCount,
+					pddpf->dwZBufferBitDepth);
+			}
+			ddlogMessage(DDLOG_INFO, buf);
+
+			memcpy(&glb.lpZBufferPF[iEnumCount++],
+				pddpf,
+				sizeof(DDPIXELFORMAT));
+		}
+	}
+
+	return D3DENUMRET_OK;
+}
+#endif // _USE_GLD3_WGL
+
+// ***********************************************************************
+
+BOOL IsStencilSupportBroken(LPDIRECTDRAW4 lpDD4)
+{
+	DDDEVICEIDENTIFIER	dddi; // DX6 device identifier
+	BOOL				bBroken = FALSE;
+
+	// Microsoft really fucked up with the GetDeviceIdentifier function
+	// on Windows 2000, since it locks up on stock driers on the CD. Updated
+	// drivers from vendors appear to work, but we can't identify the drivers
+	// without this function!!! For now we skip these tests on Windows 2000.
+	if ((GetVersion() & 0x80000000UL) == 0)
+		return FALSE;
+
+	// Obtain device info
+	if (FAILED(IDirectDraw4_GetDeviceIdentifier(lpDD4, &dddi, 0)))
+		return FALSE;
+
+	// Matrox G400 stencil buffer support does not draw anything in AutoCAD,
+	// but ordinary Z buffers draw shaded models fine. (DaveM)
+	if (dddi.dwVendorId == 0x102B) {		// Matrox
+		if (dddi.dwDeviceId == 0x0525) {	// G400
+			bBroken = TRUE;
+		}
+	}
+
+	return bBroken;
+}
+
+// ***********************************************************************
+
+void dglBuildPixelFormatList()
+{
+	int				i;
+	char			buf[128];
+	char			cat[8];
+	DGL_pixelFormat	*lpPF;
+
+#ifdef _USE_GLD3_WGL
+	_gldDriver.BuildPixelformatList();
+#else
+	HRESULT			hRes;
+	IDirectDraw		*lpDD1 = NULL;
+	IDirectDraw4	*lpDD4 = NULL;
+	IDirect3D3		*lpD3D3 = NULL;
+	DDSURFACEDESC2	ddsdDisplayMode;
+
+	DWORD			dwRb, dwGb, dwBb, dwAb; // Bit counts
+	DWORD			dwRs, dwGs, dwBs, dwAs; // Bit shifts
+	DWORD			dwPixelType;			// RGB or color index
+
+	// Set defaults
+	glb.nPixelFormatCount	= 0;
+	glb.lpPF				= NULL;
+	glb.nZBufferPFCount		= 0;
+	glb.lpZBufferPF			= NULL;
+	glb.nDisplayModeCount	= 0;
+	glb.lpDisplayModes		= NULL;
+
+	//
+	// Examine the hardware for depth and stencil
+	//
+
+	if (glb.bPrimary)
+		hRes = DirectDrawCreate(NULL, &lpDD1, NULL);
+	else
+		hRes = DirectDrawCreate(&glb.ddGuid, &lpDD1, NULL);
+		
+	if (FAILED(hRes)) {
+		ddlogError(DDLOG_ERROR, "dglBPFL: DirectDrawCreate failed", hRes);
+		return;
+	}
+
+	// Query for DX6 IDirectDraw4.
+	hRes = IDirectDraw_QueryInterface(
+				lpDD1,
+				&IID_IDirectDraw4,
+				(void**)&lpDD4);
+	if (FAILED(hRes)) {
+		ddlogError(DDLOG_ERROR, "dglBPFL: QueryInterface (DD4) failed", hRes);
+		goto clean_up;
+	}
+
+
+	// Retrieve caps of current display mode
+	ZeroMemory(&ddsdDisplayMode, sizeof(ddsdDisplayMode));
+	ddsdDisplayMode.dwSize = sizeof(ddsdDisplayMode);
+	hRes = IDirectDraw4_GetDisplayMode(lpDD4, &ddsdDisplayMode);
+	if (FAILED(hRes))
+		goto clean_up;
+
+	dwDisplayBitDepth = ddsdDisplayMode.ddpfPixelFormat.dwRGBBitCount;
+	dwPixelType = (dwDisplayBitDepth <= 8) ? PFD_TYPE_COLORINDEX : PFD_TYPE_RGBA;
+	dwRb = BitCount(ddsdDisplayMode.ddpfPixelFormat.dwRBitMask);
+	dwGb = BitCount(ddsdDisplayMode.ddpfPixelFormat.dwGBitMask);
+	dwBb = BitCount(ddsdDisplayMode.ddpfPixelFormat.dwBBitMask);
+	dwRs = BitShift(ddsdDisplayMode.ddpfPixelFormat.dwRBitMask);
+	dwGs = BitShift(ddsdDisplayMode.ddpfPixelFormat.dwGBitMask);
+	dwBs = BitShift(ddsdDisplayMode.ddpfPixelFormat.dwBBitMask);
+
+	if (BitCount(ddsdDisplayMode.ddpfPixelFormat.dwRGBAlphaBitMask)) {
+		dwAb = BitCount(ddsdDisplayMode.ddpfPixelFormat.dwRGBAlphaBitMask);
+		dwAs = BitShift(ddsdDisplayMode.ddpfPixelFormat.dwRGBAlphaBitMask);
+	} else {
+		dwAb = 0;
+		dwAs = 0;
+	}
+
+	// Query for available display modes
+	ddlogMessage(DDLOG_INFO, "\n");
+	ddlogMessage(DDLOG_INFO, "Display Modes:\n");
+
+	// Pass 1: Determine count
+	iEnumCount = 0;
+	hRes = IDirectDraw4_EnumDisplayModes(
+				lpDD4,
+				0,
+				NULL,
+				NULL,
+				EnumDisplayModesCallback);
+	if (FAILED(hRes)) {
+		ddlogError(DDLOG_ERROR, "dglBPFL: EnumDisplayModes failed", hRes);
+		goto clean_up;
+	}
+	if (iEnumCount == 0) {
+		ddlogMessage(DDLOG_ERROR, "dglBPFL: No display modes found");
+		goto clean_up;
+	}
+	glb.lpDisplayModes = (DDSURFACEDESC2 *)calloc(iEnumCount,
+												sizeof(DDSURFACEDESC2));
+	if (glb.lpDisplayModes == NULL) {
+		ddlogMessage(DDLOG_ERROR, "dglBPFL: DDSURFACEDESC2 calloc failed");
+		goto clean_up;
+	}
+	glb.nDisplayModeCount = iEnumCount;
+	// Pass 2: Save modes
+	iEnumCount = 0;
+	hRes = IDirectDraw4_EnumDisplayModes(
+				lpDD4,
+				0,
+				NULL,
+				(void *)glb.lpDisplayModes,
+				EnumDisplayModesCallback);
+	if (FAILED(hRes)) {
+		ddlogError(DDLOG_ERROR, "dglBPFL: EnumDisplayModes failed", hRes);
+		goto clean_up;
+	}
+							  // Query for IDirect3D3 interface
+	hRes = IDirectDraw4_QueryInterface(
+				lpDD4,
+				&IID_IDirect3D3,
+				(void**)&lpD3D3);
+	if (FAILED(hRes)) {
+		ddlogError(DDLOG_ERROR, "dglBPFL: QueryInterface (D3D3) failed", hRes);
+		goto clean_up;
+	}
+
+	ddlogMessage(DDLOG_INFO, "\n");
+	ddlogMessage(DDLOG_INFO, "ZBuffer formats:\n");
+
+	// Pass 1. Count the ZBuffer pixel formats
+	hRes = IDirect3D3_EnumZBufferFormats(
+				lpD3D3,
+				&glb.d3dGuid,
+				d3dEnumZBufferFormatsCallback,
+				NULL);
+
+	if (FAILED(hRes))
+		goto clean_up;
+
+	if (glb.nZBufferPFCount) {
+		glb.lpZBufferPF = (DDPIXELFORMAT *)calloc(glb.nZBufferPFCount,
+												sizeof(DDPIXELFORMAT));
+		if(glb.lpZBufferPF == NULL)
+			goto clean_up;
+
+		// Pass 2. Cache the ZBuffer pixel formats
+		iEnumCount = 0; // (Used by the enum function)
+		hRes = IDirect3D3_EnumZBufferFormats(
+					lpD3D3,
+					&glb.d3dGuid,
+					d3dEnumZBufferFormatsCallback,
+					glb.lpZBufferPF);
+
+		if (FAILED(hRes))
+			goto clean_up;
+	}
+
+	// Remove stencil support for boards which don't work for AutoCAD;
+	// Matrox G400 does not work, but NVidia TNT2 and ATI Rage128 do... (DaveM)
+	if (IsStencilSupportBroken(lpDD4)) {
+		for (i=0; i<iEnumCount; i++)
+			if (glb.lpZBufferPF[i].dwFlags & DDPF_STENCILBUFFER)
+				glb.nZBufferPFCount--;
+	}
+
+	// One each for every ZBuffer pixel format (including no depth buffer)
+	// Times-two because duplicated for single buffering (as opposed to double)
+	glb.nPixelFormatCount = 2 * (glb.nZBufferPFCount + 1);
+	glb.lpPF = (DGL_pixelFormat *)calloc(glb.nPixelFormatCount,
+										sizeof(DGL_pixelFormat));
+	if (glb.lpPF == NULL)
+		goto clean_up;
+	//
+	// Fill in the pixel formats
+	// Note: Depth buffer bits are really (dwZBufferBitDepth-dwStencilBitDepth)
+	//		 but this will pass wierd numbers to the OpenGL app. (?)
+	//
+
+	pfTemplateHW.pfd.iPixelType		= dwPixelType;
+	pfTemplateHW.pfd.cColorBits		= dwDisplayBitDepth;
+	pfTemplateHW.pfd.cRedBits		= dwRb;
+	pfTemplateHW.pfd.cGreenBits		= dwGb;
+	pfTemplateHW.pfd.cBlueBits		= dwBb;
+	pfTemplateHW.pfd.cAlphaBits		= dwAb;
+	pfTemplateHW.pfd.cRedShift		= dwRs;
+	pfTemplateHW.pfd.cGreenShift	= dwGs;
+	pfTemplateHW.pfd.cBlueShift		= dwBs;
+	pfTemplateHW.pfd.cAlphaShift	= dwAs;
+
+	lpPF = glb.lpPF;
+
+	// Fill in the double-buffered pixel formats
+	for (i=0; i<(glb.nZBufferPFCount + 1); i++, lpPF++) {
+		memcpy(lpPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		if (i) {
+			lpPF->iZBufferPF		= i-1;
+			lpPF->pfd.cDepthBits	= glb.lpZBufferPF[i-1].dwZBufferBitDepth;
+			lpPF->pfd.cStencilBits	= glb.lpZBufferPF[i-1].dwStencilBitDepth;
+		}
+	}
+	// Fill in the single-buffered pixel formats
+	for (i=0; i<(glb.nZBufferPFCount + 1); i++, lpPF++) {
+		memcpy(lpPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		if (i) {
+			lpPF->iZBufferPF		= i-1;
+			lpPF->pfd.cDepthBits	= glb.lpZBufferPF[i-1].dwZBufferBitDepth;
+			lpPF->pfd.cStencilBits	= glb.lpZBufferPF[i-1].dwStencilBitDepth;
+		}
+		// Remove double-buffer flag. Could use XOR instead...
+		lpPF->pfd.dwFlags &= (~(PFD_DOUBLEBUFFER));
+		// Insert GDI flag for single buffered format only.
+		lpPF->pfd.dwFlags |= PFD_SUPPORT_GDI;
+	}
+#endif // _USE_GLD3_WGL
+
+	// Lets dump the list to the log
+	// ** Based on "wglinfo" by Nate Robins **
+	ddlogMessage(DDLOG_INFO, "\n");
+	ddlogMessage(DDLOG_INFO, "Pixel Formats:\n");
+	ddlogMessage(DDLOG_INFO,
+		"   visual  x  bf lv rg d st  r  g  b a  ax dp st accum buffs  ms\n");
+	ddlogMessage(DDLOG_INFO,
+		" id dep cl sp sz l  ci b ro sz sz sz sz bf th cl  r  g  b  a ns b\n");
+	ddlogMessage(DDLOG_INFO,
+		"-----------------------------------------------------------------\n");
+	for (i=0, lpPF = glb.lpPF; i<glb.nPixelFormatCount; i++, lpPF++) {
+		sprintf(buf, "0x%02x ", i+1);
+
+		sprintf(cat, "%2d ", lpPF->pfd.cColorBits);
+		strcat(buf, cat);
+		if(lpPF->pfd.dwFlags & PFD_DRAW_TO_WINDOW)      sprintf(cat, "wn ");
+		else if(lpPF->pfd.dwFlags & PFD_DRAW_TO_BITMAP) sprintf(cat, "bm ");
+		else sprintf(cat, ".  ");
+		strcat(buf, cat);
+
+		/* should find transparent pixel from LAYERPLANEDESCRIPTOR */
+		sprintf(cat, " . "); 
+		strcat(buf, cat);
+
+		sprintf(cat, "%2d ", lpPF->pfd.cColorBits);
+		strcat(buf, cat);
+
+		/* bReserved field indicates number of over/underlays */
+		if(lpPF->pfd.bReserved) sprintf(cat, " %d ", lpPF->pfd.bReserved);
+		else sprintf(cat, " . "); 
+		strcat(buf, cat);
+
+		sprintf(cat, " %c ", lpPF->pfd.iPixelType == PFD_TYPE_RGBA ? 'r' : 'c');
+		strcat(buf, cat);
+
+		sprintf(cat, "%c ", lpPF->pfd.dwFlags & PFD_DOUBLEBUFFER ? 'y' : '.');
+		strcat(buf, cat);
+
+		sprintf(cat, " %c ", lpPF->pfd.dwFlags & PFD_STEREO ? 'y' : '.');
+		strcat(buf, cat);
+
+		if(lpPF->pfd.cRedBits && lpPF->pfd.iPixelType == PFD_TYPE_RGBA) 
+		    sprintf(cat, "%2d ", lpPF->pfd.cRedBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+
+		if(lpPF->pfd.cGreenBits && lpPF->pfd.iPixelType == PFD_TYPE_RGBA) 
+		    sprintf(cat, "%2d ", lpPF->pfd.cGreenBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+
+		if(lpPF->pfd.cBlueBits && lpPF->pfd.iPixelType == PFD_TYPE_RGBA) 
+		    sprintf(cat, "%2d ", lpPF->pfd.cBlueBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		if(lpPF->pfd.cAlphaBits && lpPF->pfd.iPixelType == PFD_TYPE_RGBA) 
+			sprintf(cat, "%2d ", lpPF->pfd.cAlphaBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		if(lpPF->pfd.cAuxBuffers)     sprintf(cat, "%2d ", lpPF->pfd.cAuxBuffers);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		if(lpPF->pfd.cDepthBits)      sprintf(cat, "%2d ", lpPF->pfd.cDepthBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		if(lpPF->pfd.cStencilBits)    sprintf(cat, "%2d ", lpPF->pfd.cStencilBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		if(lpPF->pfd.cAccumRedBits)   sprintf(cat, "%2d ", lpPF->pfd.cAccumRedBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+
+		if(lpPF->pfd.cAccumGreenBits) sprintf(cat, "%2d ", lpPF->pfd.cAccumGreenBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		if(lpPF->pfd.cAccumBlueBits)  sprintf(cat, "%2d ", lpPF->pfd.cAccumBlueBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		if(lpPF->pfd.cAccumAlphaBits) sprintf(cat, "%2d ", lpPF->pfd.cAccumAlphaBits);
+		else sprintf(cat, " . ");
+		strcat(buf, cat);
+	
+		/* no multisample in Win32 */
+		sprintf(cat, " . .\n");
+		strcat(buf, cat);
+
+		ddlogMessage(DDLOG_INFO, buf);
+	}
+	ddlogMessage(DDLOG_INFO,
+		"-----------------------------------------------------------------\n");
+	ddlogMessage(DDLOG_INFO, "\n");
+
+#ifndef _USE_GLD3_WGL
+clean_up:
+	// Release COM objects
+	RELEASE(lpD3D3);
+	RELEASE(lpDD4);
+	RELEASE(lpDD1);
+
+	// Popup warning message if non RGB color mode
+	if (dwDisplayBitDepth <= 8) {
+		ddlogPrintf(DDLOG_WARN, "Current Color Depth %d bpp is not supported", dwDisplayBitDepth);
+		MessageBox(NULL, szColorDepthWarning, "GLDirect", MB_OK | MB_ICONWARNING);
+	}
+#endif // _USE_GLD3_WGL
+}
+
+// ***********************************************************************
+
+void dglReleasePixelFormatList()
+{
+	glb.nPixelFormatCount = 0;
+	if (glb.lpPF) {
+		free(glb.lpPF);
+		glb.lpPF = NULL;
+	}
+#ifndef _USE_GLD3_WGL
+	glb.nZBufferPFCount = 0;
+	if (glb.lpZBufferPF) {
+		free(glb.lpZBufferPF);
+		glb.lpZBufferPF = NULL;
+	}
+	glb.nDisplayModeCount = 0;
+	if (glb.lpDisplayModes) {
+		free(glb.lpDisplayModes);
+		glb.lpDisplayModes = NULL;
+	}
+#endif // _USE_GLD3_WGL
+}
+
+// ***********************************************************************
diff --git a/src/mesa/drivers/windows/gldirect/dglpf.h b/src/mesa/drivers/windows/gldirect/dglpf.h
new file mode 100644
index 0000000000..8a7e38c4b3
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglpf.h
@@ -0,0 +1,77 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Pixel Formats.
+*
+****************************************************************************/
+
+#ifndef __DGLPF_H
+#define __DGLPF_H
+
+#ifndef STRICT
+#define STRICT
+#endif // STRICT
+#define WIN32_LEAN_AND_MEAN
+
+#include <windows.h>
+
+/*---------------------- Macros and type definitions ----------------------*/
+
+typedef struct {
+	PIXELFORMATDESCRIPTOR	pfd;		// Win32 Pixel Format Descriptor
+#ifdef _USE_GLD3_WGL
+	// Driver-specific data.
+	// Example: The DX8 driver uses this to hold an index into a
+	// list of depth-stencil descriptions.
+	DWORD					dwDriverData;
+#else
+	int						iZBufferPF; // Index of depth buffer pixel format
+#endif
+} DGL_pixelFormat;
+
+#include "dglglobals.h"
+
+/*------------------------- Function Prototypes ---------------------------*/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+BOOL	IsValidPFD(int iPFD);
+void	dglBuildPixelFormatList();
+void	dglReleasePixelFormatList();
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dglwgl.c b/src/mesa/drivers/windows/gldirect/dglwgl.c
new file mode 100644
index 0000000000..74ecb01a5b
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglwgl.c
@@ -0,0 +1,2964 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  OpenGL window  functions (wgl*).
+*
+****************************************************************************/
+
+#include "dglwgl.h"
+#ifdef _USE_GLD3_WGL
+#include "gld_driver.h"
+#endif
+
+#include "gl/glu.h"	// MUST USE MICROSOFT'S GLU32!
+
+#ifndef _USE_GLD3_WGL
+extern DGL_mesaFuncs mesaFuncs;
+#endif
+
+// Need to export wgl* functions if using GLD3,
+// otherwise export GLD2 DGL_* functions.
+#ifdef _USE_GLD3_WGL
+#define _GLD_WGL_EXPORT(a) wgl##a
+#else
+#define _GLD_WGL_EXPORT(a) DGL_##a
+#endif
+
+// Calls into Mesa 4.x are different
+#ifdef _USE_GLD3_WGL
+#include "dlist.h"
+#include "drawpix.h"
+#include "get.h"
+#include "matrix.h"
+// NOTE: All the _GLD* macros now call the gl* functions direct.
+//       This ensures that the correct internal pathway is taken. KeithH
+#define _GLD_glNewList		glNewList
+#define _GLD_glBitmap		glBitmap
+#define _GLD_glEndList		glEndList
+#define _GLD_glDeleteLists	glDeleteLists
+#define _GLD_glGetError		glGetError
+#define _GLD_glTranslatef	glTranslatef
+#define _GLD_glBegin		glBegin
+#define _GLD_glVertex2fv	glVertex2fv
+#define _GLD_glEnd			glEnd
+#define _GLD_glNormal3f		glNormal3f
+#define _GLD_glVertex3f		glVertex3f
+#define _GLD_glVertex3fv	glVertex3fv
+#else // _USE_GLD3_WGL
+#define _GLD_glNewList		(*mesaFuncs.glNewList)
+#define _GLD_glBitmap		(*mesaFuncs.glBitmap)
+#define _GLD_glEndList		(*mesaFuncs.glEndList)
+#define _GLD_glDeleteLists	(*mesaFuncs.glDeleteLists)
+#define _GLD_glGetError		(*mesaFuncs.glGetError)
+#define _GLD_glTranslatef	(*mesaFuncs.glTranslatef)
+#define _GLD_glBegin		(*mesaFuncs.glBegin)
+#define _GLD_glVertex2fv	(*mesaFuncs.glVertex2fv)
+#define _GLD_glEnd			(*mesaFuncs.glEnd)
+#define _GLD_glNormal3f		(*mesaFuncs.glNormal3f)
+#define _GLD_glVertex3f		(*mesaFuncs.glVertex3f)
+#define _GLD_glVertex3fv	(*mesaFuncs.glVertex3fv)
+#endif // _USE_GLD3_WGL
+
+// ***********************************************************************
+
+// Emulate SGI DDK calls.
+#define __wglMalloc(a) GlobalAlloc(GPTR, (a))
+#define __wglFree(a) GlobalFree((a))
+
+// ***********************************************************************
+
+// Mesa glu.h and MS glu.h call these different things...
+//#define GLUtesselator GLUtriangulatorObj
+//#define GLU_TESS_VERTEX_DATA GLU_VERTEX_DATA
+
+// For wglFontOutlines
+
+typedef GLUtesselator *(APIENTRY *gluNewTessProto)(void);
+typedef void (APIENTRY *gluDeleteTessProto)(GLUtesselator *tess);
+typedef void (APIENTRY *gluTessBeginPolygonProto)(GLUtesselator *tess, void *polygon_data);
+typedef void (APIENTRY *gluTessBeginContourProto)(GLUtesselator *tess);
+typedef void (APIENTRY *gluTessVertexProto)(GLUtesselator *tess, GLdouble coords[3], void *data);
+typedef void (APIENTRY *gluTessEndContourProto)(GLUtesselator *tess);
+typedef void (APIENTRY *gluTessEndPolygonProto)(GLUtesselator *tess);
+typedef void (APIENTRY *gluTessPropertyProto)(GLUtesselator *tess, GLenum which, GLdouble value);
+typedef void (APIENTRY *gluTessNormalProto)(GLUtesselator *tess, GLdouble x, GLdouble y, GLdouble z);
+typedef void (APIENTRY *gluTessCallbackProto)(GLUtesselator *tess, GLenum which, void (CALLBACK *)());
+
+static HINSTANCE		gluModuleHandle;
+static gluNewTessProto		gluNewTessProc;
+static gluDeleteTessProto	gluDeleteTessProc;
+static gluTessBeginPolygonProto	gluTessBeginPolygonProc;
+static gluTessBeginContourProto	gluTessBeginContourProc;
+static gluTessVertexProto	gluTessVertexProc;
+static gluTessEndContourProto	gluTessEndContourProc;
+static gluTessEndPolygonProto	gluTessEndPolygonProc;
+static gluTessPropertyProto	gluTessPropertyProc;
+static gluTessNormalProto	gluTessNormalProc;
+static gluTessCallbackProto	gluTessCallbackProc;
+
+static HFONT	hNewFont, hOldFont;
+static FLOAT	ScaleFactor;
+
+#define LINE_BUF_QUANT 4000
+#define VERT_BUF_QUANT 4000
+
+static FLOAT*	LineBuf;
+static DWORD	LineBufSize;
+static DWORD	LineBufIndex;
+static FLOAT*	VertBuf;
+static DWORD	VertBufSize;
+static DWORD	VertBufIndex;
+static GLenum	TessErrorOccurred;
+
+static int AppendToLineBuf(
+	FLOAT value);
+
+static int AppendToVertBuf(
+	FLOAT value);
+
+static int DrawGlyph(
+	UCHAR*		glyphBuf,
+	DWORD		glyphSize,
+	FLOAT		chordalDeviation,
+	FLOAT		extrusion,
+	INT		format);
+
+static void FreeLineBuf(void);
+
+static void FreeVertBuf(void);
+
+static long GetWord(
+	UCHAR**		p);
+
+static long GetDWord(
+	UCHAR**		p);
+
+static double GetFixed(
+	UCHAR**		p);
+
+static int InitLineBuf(void);
+
+static int InitVertBuf(void);
+
+static HFONT CreateHighResolutionFont(
+	HDC		hDC);
+
+static int MakeDisplayListFromGlyph(
+	DWORD			listName,
+	UCHAR*			glyphBuf,
+	DWORD			glyphSize,
+	LPGLYPHMETRICSFLOAT	glyphMetricsFloat,
+	FLOAT			chordalDeviation,
+	FLOAT			extrusion,
+	INT			format);
+
+static BOOL LoadGLUTesselator(void);
+static BOOL UnloadGLUTesselator(void);
+
+static int MakeLinesFromArc(
+	FLOAT		x0,
+	FLOAT		y0,
+	FLOAT		x1,
+	FLOAT		y1,
+	FLOAT		x2,
+	FLOAT		y2,
+	DWORD		vertexCountIndex,
+	FLOAT		chordalDeviationSquared);
+
+static int MakeLinesFromGlyph(		UCHAR*		glyphBuf,
+					DWORD		glyphSize,
+					FLOAT		chordalDeviation);
+
+static int MakeLinesFromTTLine(		UCHAR**		pp,
+					DWORD		vertexCountIndex,
+					WORD		pointCount);
+
+static int MakeLinesFromTTPolycurve(	UCHAR**		pp,
+					DWORD		vertexCountIndex,
+					FLOAT		chordalDeviation);
+
+static int MakeLinesFromTTPolygon(	UCHAR**		pp,
+					FLOAT		chordalDeviation);
+
+static int MakeLinesFromTTQSpline(	UCHAR**		pp,
+					DWORD		vertexCountIndex,
+					WORD		pointCount,
+					FLOAT		chordalDeviation);
+
+static void CALLBACK TessCombine(	double		coords[3],
+					void*		vertex_data[4],
+					FLOAT		weight[4],
+					void**		outData);
+
+static void CALLBACK TessError(		GLenum		error);
+
+static void CALLBACK TessVertexOutData(	FLOAT		p[3],
+					GLfloat 	z);
+
+// ***********************************************************************
+
+#ifdef GLD_THREADS
+#pragma message("compiling DGLWGL.C vars for multi-threaded support")
+extern CRITICAL_SECTION CriticalSection;
+extern DWORD dwTLSPixelFormat;			// TLS index for current pixel format
+#endif
+int curPFD = 0;							// Current PFD (static)
+
+// ***********************************************************************
+
+int dglGetPixelFormat(void)
+{
+#ifdef GLD_THREADS
+	int iPixelFormat;
+	// get thread-specific instance
+	if (glb.bMultiThreaded) {
+		__try {
+			iPixelFormat = (int)TlsGetValue(dwTLSPixelFormat);
+		}
+		__except(EXCEPTION_EXECUTE_HANDLER) {
+			iPixelFormat = curPFD;
+		}
+	}
+	// get global static var
+	else {
+		iPixelFormat = curPFD;
+	}
+	return iPixelFormat;
+#else
+	return curPFD;
+#endif
+}
+
+// ***********************************************************************
+
+void dglSetPixelFormat(int iPixelFormat)
+{
+#ifdef GLD_THREADS
+	// set thread-specific instance
+	if (glb.bMultiThreaded) {
+		__try {
+			TlsSetValue(dwTLSPixelFormat, (LPVOID)iPixelFormat);
+		}
+		__except(EXCEPTION_EXECUTE_HANDLER) {
+			curPFD = iPixelFormat;
+		}
+	}
+	// set global static var
+	else {
+		curPFD = iPixelFormat;
+	}
+#else
+	curPFD = iPixelFormat;
+#endif
+}
+
+// ***********************************************************************
+
+int APIENTRY _GLD_WGL_EXPORT(ChoosePixelFormat)(
+	HDC a,
+	CONST PIXELFORMATDESCRIPTOR *ppfd)
+{
+	DGL_pixelFormat			*lpPF = glb.lpPF;
+
+    PIXELFORMATDESCRIPTOR	ppfdBest;
+    int						i;
+	int						bestIndex = -1;
+    int						numPixelFormats;
+	DWORD					dwFlags;
+
+	char					buf[128];
+	char					cat[8];
+
+	DWORD dwAllFlags = 
+					PFD_DRAW_TO_WINDOW |
+					PFD_DRAW_TO_BITMAP |
+					PFD_SUPPORT_GDI |
+					PFD_SUPPORT_OPENGL |
+					PFD_GENERIC_FORMAT |
+					PFD_NEED_PALETTE |
+					PFD_NEED_SYSTEM_PALETTE |
+					PFD_DOUBLEBUFFER |
+					PFD_STEREO |
+					/*PFD_SWAP_LAYER_BUFFERS |*/
+					PFD_DOUBLEBUFFER_DONTCARE |
+					PFD_STEREO_DONTCARE |
+					PFD_SWAP_COPY |
+					PFD_SWAP_EXCHANGE |
+					PFD_GENERIC_ACCELERATED |
+					0;
+
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+	// List may not be built until dglValidate() is called! KeithH
+	lpPF = glb.lpPF;
+
+	//
+	// Lets print the input pixel format to the log
+	// ** Based on "wglinfo" by Nate Robins **
+	//
+	ddlogMessage(DDLOG_SYSTEM, "ChoosePixelFormat:\n");
+	ddlogMessage(DDLOG_INFO, "Input pixel format for ChoosePixelFormat:\n");
+	ddlogMessage(DDLOG_INFO,
+		"   visual  x  bf lv rg d st  r  g  b a  ax dp st accum buffs  ms\n");
+	ddlogMessage(DDLOG_INFO,
+		" id dep cl sp sz l  ci b ro sz sz sz sz bf th cl  r  g  b  a ns b\n");
+	ddlogMessage(DDLOG_INFO,
+		"-----------------------------------------------------------------\n");
+	sprintf(buf, "  .  ");
+
+	sprintf(cat, "%2d ", ppfd->cColorBits);
+	strcat(buf, cat);
+	if(ppfd->dwFlags & PFD_DRAW_TO_WINDOW)      sprintf(cat, "wn ");
+	else if(ppfd->dwFlags & PFD_DRAW_TO_BITMAP) sprintf(cat, "bm ");
+	else sprintf(cat, ".  ");
+	strcat(buf, cat);
+
+	/* should find transparent pixel from LAYERPLANEDESCRIPTOR */
+	sprintf(cat, " . "); 
+	strcat(buf, cat);
+
+	sprintf(cat, "%2d ", ppfd->cColorBits);
+	strcat(buf, cat);
+
+	/* bReserved field indicates number of over/underlays */
+	if(ppfd->bReserved) sprintf(cat, " %d ", ppfd->bReserved);
+	else sprintf(cat, " . "); 
+	strcat(buf, cat);
+
+	sprintf(cat, " %c ", ppfd->iPixelType == PFD_TYPE_RGBA ? 'r' : 'c');
+	strcat(buf, cat);
+
+	sprintf(cat, "%c ", ppfd->dwFlags & PFD_DOUBLEBUFFER ? 'y' : '.');
+	strcat(buf, cat);
+
+	sprintf(cat, " %c ", ppfd->dwFlags & PFD_STEREO ? 'y' : '.');
+	strcat(buf, cat);
+
+	if(ppfd->cRedBits && ppfd->iPixelType == PFD_TYPE_RGBA) 
+	    sprintf(cat, "%2d ", ppfd->cRedBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+
+	if(ppfd->cGreenBits && ppfd->iPixelType == PFD_TYPE_RGBA) 
+	    sprintf(cat, "%2d ", ppfd->cGreenBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+
+	if(ppfd->cBlueBits && ppfd->iPixelType == PFD_TYPE_RGBA) 
+	    sprintf(cat, "%2d ", ppfd->cBlueBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	if(ppfd->cAlphaBits && ppfd->iPixelType == PFD_TYPE_RGBA) 
+		sprintf(cat, "%2d ", ppfd->cAlphaBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	if(ppfd->cAuxBuffers)     sprintf(cat, "%2d ", ppfd->cAuxBuffers);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	if(ppfd->cDepthBits)      sprintf(cat, "%2d ", ppfd->cDepthBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	if(ppfd->cStencilBits)    sprintf(cat, "%2d ", ppfd->cStencilBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	if(ppfd->cAccumRedBits)   sprintf(cat, "%2d ", ppfd->cAccumRedBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+
+	if(ppfd->cAccumGreenBits) sprintf(cat, "%2d ", ppfd->cAccumGreenBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	if(ppfd->cAccumBlueBits)  sprintf(cat, "%2d ", ppfd->cAccumBlueBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	if(ppfd->cAccumAlphaBits) sprintf(cat, "%2d ", ppfd->cAccumAlphaBits);
+	else sprintf(cat, " . ");
+	strcat(buf, cat);
+	
+	/* no multisample in Win32 */
+	sprintf(cat, " . .\n");
+	strcat(buf, cat);
+
+	ddlogMessage(DDLOG_INFO, buf);
+	ddlogMessage(DDLOG_INFO,
+		"-----------------------------------------------------------------\n");
+	ddlogMessage(DDLOG_INFO, "\n");
+
+	//
+	// Examine the flags for correctness
+	//
+	dwFlags = ppfd->dwFlags;
+    if (dwFlags != (dwFlags & dwAllFlags))
+    {
+		/* error: bad dwFlags */
+		ddlogPrintf(DDLOG_WARN,
+					"ChoosePixelFormat: bad flags (0x%x)",
+					dwFlags & (~dwAllFlags));
+		// Mask illegal flags and continue
+		dwFlags = dwFlags & dwAllFlags;
+    }
+	
+    switch (ppfd->iPixelType) {
+    case PFD_TYPE_RGBA:
+    case PFD_TYPE_COLORINDEX:
+		break;
+    default:
+		/* error: bad iPixelType */
+		ddlogMessage(DDLOG_WARN, "ChoosePixelFormat: bad pixel type\n");
+		return 0;
+    }
+	
+    switch (ppfd->iLayerType) {
+    case PFD_MAIN_PLANE:
+    case PFD_OVERLAY_PLANE:
+    case PFD_UNDERLAY_PLANE:
+		break;
+    default:
+		/* error: bad iLayerType */
+		ddlogMessage(DDLOG_WARN, "ChoosePixelFormat: bad layer type\n");
+		return 0;
+    }
+	
+    numPixelFormats = glb.nPixelFormatCount;
+	
+    /* loop through candidate pixel format descriptors */
+    for (i=0; i<numPixelFormats; ++i) {
+		PIXELFORMATDESCRIPTOR ppfdCandidate;
+		
+		memcpy(&ppfdCandidate, &lpPF[i].pfd, sizeof(PIXELFORMATDESCRIPTOR));
+		
+		/*
+		** Check attributes which must match
+		*/
+		if (ppfd->iPixelType != ppfdCandidate.iPixelType) {
+			continue;
+		}
+
+		if (ppfd->iLayerType != ppfdCandidate.iLayerType) {
+			continue;
+		}
+		
+		if (((dwFlags ^ ppfdCandidate.dwFlags) & dwFlags) &
+			(PFD_DRAW_TO_WINDOW | PFD_DRAW_TO_BITMAP |
+			PFD_SUPPORT_GDI | PFD_SUPPORT_OPENGL))
+		{
+			continue;
+		}
+		
+		if (!(dwFlags & PFD_DOUBLEBUFFER_DONTCARE)) {
+			if ((dwFlags & PFD_DOUBLEBUFFER) !=
+				(ppfdCandidate.dwFlags & PFD_DOUBLEBUFFER))
+			{
+				continue;
+			}
+		}
+		
+//		if (!(dwFlags & PFD_STEREO_DONTCARE)) {
+			if ((dwFlags & PFD_STEREO) !=
+				(ppfdCandidate.dwFlags & PFD_STEREO))
+			{
+				continue;
+			}
+//		}
+		
+        if (ppfd->iPixelType==PFD_TYPE_RGBA
+            && ppfd->cAlphaBits && !ppfdCandidate.cAlphaBits) {
+            continue;
+		}
+		
+        if (ppfd->iPixelType==PFD_TYPE_RGBA
+			&& ppfd->cAccumBits && !ppfdCandidate.cAccumBits) {
+			continue;
+        }
+		
+        if (ppfd->cDepthBits && !ppfdCandidate.cDepthBits) {
+			continue;
+        }
+		
+        if (ppfd->cStencilBits && !ppfdCandidate.cStencilBits) {
+            continue;
+        }
+
+		if (ppfd->cAuxBuffers && !ppfdCandidate.cAuxBuffers) {
+			continue;
+		}
+		
+		/*
+		** See if candidate is better than the previous best choice
+		*/
+		if (bestIndex == -1) {
+			ppfdBest = ppfdCandidate;
+			bestIndex = i;
+			continue;
+		}
+		
+		if ((ppfd->cColorBits > ppfdBest.cColorBits &&
+			ppfdCandidate.cColorBits > ppfdBest.cColorBits) ||
+			(ppfd->cColorBits <= ppfdCandidate.cColorBits &&
+			ppfdCandidate.cColorBits < ppfdBest.cColorBits))
+		{
+			ppfdBest = ppfdCandidate;
+			bestIndex = i;
+			continue;
+		}
+		
+		if (ppfd->iPixelType==PFD_TYPE_RGBA
+            && ppfd->cAlphaBits
+            && ppfdCandidate.cAlphaBits > ppfdBest.cAlphaBits)
+		{
+			ppfdBest = ppfdCandidate;
+			bestIndex = i;
+			continue;
+		}
+		
+		if (ppfd->iPixelType==PFD_TYPE_RGBA
+			&& ppfd->cAccumBits
+            && ppfdCandidate.cAccumBits > ppfdBest.cAccumBits)
+		{
+			ppfdBest = ppfdCandidate;
+			bestIndex = i;
+			continue;
+		}
+		
+		if ((ppfd->cDepthBits > ppfdBest.cDepthBits &&
+			ppfdCandidate.cDepthBits > ppfdBest.cDepthBits) ||
+			(ppfd->cDepthBits <= ppfdCandidate.cDepthBits &&
+			ppfdCandidate.cDepthBits < ppfdBest.cDepthBits))
+		{
+			ppfdBest = ppfdCandidate;
+			bestIndex = i;
+			continue;
+		}
+		
+		if (ppfd->cStencilBits &&
+			ppfdCandidate.cStencilBits > ppfdBest.cStencilBits)
+		{
+			ppfdBest = ppfdCandidate;
+			bestIndex = i;
+			continue;
+		}
+		
+		if (ppfd->cAuxBuffers &&
+			ppfdCandidate.cAuxBuffers > ppfdBest.cAuxBuffers)
+		{
+			ppfdBest = ppfdCandidate;
+			bestIndex = i;
+			continue;
+		}
+    }
+
+	if (bestIndex != -1) {
+		ddlogPrintf(DDLOG_SYSTEM, "Pixel Format %d chosen as best match", bestIndex+1);
+	    return bestIndex + 1;
+	}
+
+	// Return the pixelformat that has the most capabilities.
+	// ** NOTE: This is only possible due to the way the list
+	// of pixelformats is built. **
+	// Now picks best pixelformat. KeithH
+	bestIndex = numPixelFormats;	// most capable double buffer format
+	ddlogPrintf(DDLOG_SYSTEM, "Pixel Format %d chosen by default", bestIndex);
+	return (bestIndex);
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(CopyContext)(
+	HGLRC a,
+	HGLRC b,
+	UINT c)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+    UNSUPPORTED("wglCopyContext")
+    return FALSE; // Failed
+}
+
+// ***********************************************************************
+
+HGLRC APIENTRY _GLD_WGL_EXPORT(CreateContext)(
+	HDC a)
+{
+	int ipf;
+
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+	// Check that the current PFD is valid
+	ipf = dglGetPixelFormat();
+	if (!IsValidPFD(ipf))
+		return (HGLRC)0;
+
+	return dglCreateContext(a, &glb.lpPF[ipf-1]);
+}
+
+// ***********************************************************************
+
+HGLRC APIENTRY _GLD_WGL_EXPORT(CreateLayerContext)(
+	HDC a,
+	int b)
+{
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+    UNSUPPORTED("wglCreateLayerContext")
+    return NULL; // Failed
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(DeleteContext)(
+	HGLRC a)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+    return dglDeleteContext(a);
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(DescribeLayerPlane)(
+	HDC hDC,
+	int iPixelFormat,
+	int iLayerPlane,
+	UINT nBytes,
+	LPLAYERPLANEDESCRIPTOR plpd)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	UNSUPPORTED("DGL_DescribeLayerPlane")
+
+//	gldLogPrintf(GLDLOG_INFO, "DescribeLayerPlane: %d, %d", iPixelFormat, iLayerPlane);
+
+	return FALSE;
+}
+
+// ***********************************************************************
+
+int APIENTRY _GLD_WGL_EXPORT(DescribePixelFormat)(
+	HDC a,
+	int b,
+	UINT c,
+	LPPIXELFORMATDESCRIPTOR d)
+{
+	UINT nSize;
+
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+	if (d == NULL) // Calling app requires max number of PF's
+		return glb.nPixelFormatCount;
+
+	// The supplied buffer may be larger than the info that we
+	// will be copying.
+	if (c > sizeof(PIXELFORMATDESCRIPTOR))
+		nSize = sizeof(PIXELFORMATDESCRIPTOR);
+	else
+		nSize = c;
+
+    // Setup an empty PFD before doing validation check
+    memset(d, 0, nSize);
+    d->nSize = nSize;
+    d->nVersion = 1;
+
+	if (!IsValidPFD(b))
+		return 0; // Bail if PFD index is invalid
+
+	memcpy(d, &glb.lpPF[b-1].pfd, nSize);
+
+	return glb.nPixelFormatCount;
+}
+
+// ***********************************************************************
+
+HGLRC APIENTRY _GLD_WGL_EXPORT(GetCurrentContext)(void)
+{
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+	return dglGetCurrentContext();
+}
+
+// ***********************************************************************
+
+HDC APIENTRY _GLD_WGL_EXPORT(GetCurrentDC)(void)
+{
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+	return dglGetCurrentDC();
+}
+
+// ***********************************************************************
+
+PROC APIENTRY _GLD_WGL_EXPORT(GetDefaultProcAddress)(
+	LPCSTR a)
+{
+	// Validate license
+	if (!dglValidate())
+		return NULL;
+
+    UNSUPPORTED("DGL_GetDefaultProcAddress")
+    return NULL;
+}
+
+// ***********************************************************************
+
+int APIENTRY _GLD_WGL_EXPORT(GetLayerPaletteEntries)(
+	HDC a,
+	int b,
+	int c,
+	int d,
+	COLORREF *e)
+{
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+    UNSUPPORTED("DGL_GetLayerPaletteEntries")
+    return 0;
+}
+
+// ***********************************************************************
+
+int APIENTRY _GLD_WGL_EXPORT(GetPixelFormat)(
+	HDC a)
+{
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+	return dglGetPixelFormat();
+}
+
+// ***********************************************************************
+
+PROC APIENTRY _GLD_WGL_EXPORT(GetProcAddress)(
+	LPCSTR a)
+{
+	PROC dglGetProcAddressD3D(LPCSTR a);
+
+	// Validate license
+	if (!dglValidate())
+		return NULL;
+
+#ifdef _USE_GLD3_WGL
+	return _gldDriver.wglGetProcAddress(a);
+#else
+	return dglGetProcAddressD3D(a);
+#endif
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(MakeCurrent)(
+	HDC a,
+	HGLRC b)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	return dglMakeCurrent(a, b);
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(RealizeLayerPalette)(
+	HDC a,
+	int b,
+	BOOL c)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+    UNSUPPORTED("DGL_RealizeLayerPalette")
+	return FALSE;
+}
+
+// ***********************************************************************
+
+int APIENTRY _GLD_WGL_EXPORT(SetLayerPaletteEntries)(
+	HDC a,
+	int b,
+	int c,
+	int d,
+	CONST COLORREF *e)
+{
+	// Validate license
+	if (!dglValidate())
+		return 0;
+
+    UNSUPPORTED("DGL_SetLayerPaletteEntries")
+	return 0;
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(SetPixelFormat)(
+	HDC a,
+	int b,
+	CONST PIXELFORMATDESCRIPTOR *c)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	if (IsValidPFD(b)) {
+		ddlogPrintf(DDLOG_SYSTEM, "SetPixelFormat: PixelFormat %d has been set", b);
+		dglSetPixelFormat(b);
+		return TRUE;
+	} else {
+		ddlogPrintf(DDLOG_ERROR,
+					"SetPixelFormat: PixelFormat %d is invalid and cannot be set", b);
+		return FALSE;
+	}
+}
+
+// ***********************************************************************
+/*
+ * Share lists between two gl_context structures.
+ * This was added for WIN32 WGL function support, since wglShareLists()
+ * must be called *after* wglCreateContext() with valid GLRCs. (DaveM)
+ */
+//
+// Copied from GLD2.x. KeithH
+//
+static GLboolean _gldShareLists(
+	GLcontext *ctx1,
+	GLcontext *ctx2)
+{
+	/* Sanity check context pointers */
+	if (ctx1 == NULL || ctx2 == NULL)
+		return GL_FALSE;
+	/* Sanity check shared list pointers */
+	if (ctx1->Shared == NULL || ctx2->Shared == NULL)
+		return GL_FALSE;
+	/* Decrement reference count on sharee to release previous list */
+	ctx2->Shared->RefCount--;
+#if 0	/* 3DStudio exits on this memory release */
+	if (ctx2->Shared->RefCount == 0)
+		free_shared_state(ctx2, ctx2->Shared);
+#endif
+	/* Re-assign list from sharer to sharee and increment reference count */
+	ctx2->Shared = ctx1->Shared;
+	ctx1->Shared->RefCount++;
+	return GL_TRUE;
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(ShareLists)(
+	HGLRC a,
+	HGLRC b)
+{
+	DGL_ctx *dgl1, *dgl2;
+
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	// Mesa supports shared lists, but you need to supply the shared
+	// GL context info when calling gl_create_context(). An auxiliary
+	// function gl_share_lists() has been added to update the shared
+	// list info after the GL contexts have been created. (DaveM)
+	dgl1 = dglGetContextAddress(a);
+	dgl2 = dglGetContextAddress(b);
+	if (dgl1->bAllocated && dgl2->bAllocated) {
+#ifdef _USE_GLD3_WGL
+		return _gldShareLists(dgl1->glCtx, dgl2->glCtx);
+#else
+		return (*mesaFuncs.gl_share_lists)(dgl1->glCtx, dgl2->glCtx);
+#endif
+	}
+	return FALSE;
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(SwapBuffers)(
+	HDC a)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	return dglSwapBuffers(a);
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(SwapLayerBuffers)(
+	HDC a,
+	UINT b)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	return dglSwapBuffers(a);
+}
+
+// ***********************************************************************
+
+// ***********************************************************************
+// Note: This ResizeBuffers() function may be called from
+// either MESA glViewport() or GLD wglMakeCurrent().
+
+BOOL dglWglResizeBuffers(
+	GLcontext *ctx,
+	BOOL bDefaultDriver)
+{
+	DGL_ctx						*dgl = NULL;
+	RECT						rcScreenRect;
+	DWORD						dwWidth;
+	DWORD						dwHeight;
+	DDSURFACEDESC2				ddsd2;
+	DDSCAPS2					ddscaps2;
+	IDirectDrawClipper			*lpddClipper = NULL;
+	DWORD						dwFlags;
+	HRESULT						hResult;
+
+	DWORD						dwMemoryType;
+
+	int							i;
+	struct gl_texture_object	*tObj;
+	struct gl_texture_image		*image;
+
+	BOOL						bWasFullscreen;
+	BOOL						bSaveDesktop;
+	BOOL						bFullScrnWin = FALSE;
+	DDSURFACEDESC2 				ddsd2DisplayMode;
+
+	DDBLTFX						ddbltfx;
+	POINT						pt;
+	RECT						rcDst;
+#ifdef _USE_GLD3_WGL
+	GLD_displayMode				glddm;
+#endif
+
+#define DDLOG_CRITICAL_OR_WARN	(bDefaultDriver ? DDLOG_WARN : DDLOG_CRITICAL)
+
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	// Sanity checks
+	if (ctx == NULL)
+		return FALSE;
+	dgl = ctx->DriverCtx;
+	if (dgl == NULL)
+		return FALSE;
+
+	// Get the window size and calculate its dimensions
+	if (dgl->hWnd == NULL) {
+		// Check for non-window DC = memory DC ?
+		if (GetClipBox(dgl->hDC, &rcScreenRect) == ERROR)
+			SetRect(&rcScreenRect, 0, 0, 0, 0);
+	}
+	else if (!GetClientRect(dgl->hWnd, &rcScreenRect))
+		SetRect(&rcScreenRect, 0, 0, 0, 0);
+	dwWidth = rcScreenRect.right - rcScreenRect.left;
+	dwHeight = rcScreenRect.bottom - rcScreenRect.top;
+    CopyRect(&dgl->rcScreenRect, &rcScreenRect);
+
+	// This will occur on Alt-Tab
+	if ((dwWidth == 0) && (dwHeight == 0)) {
+		//dgl->bCanRender = FALSE;
+		return TRUE; // No resize possible!
+	}
+
+	// Some apps zero only 1 dimension for non-visible window... (DaveM)
+	if ((dwWidth == 0) || (dwHeight == 0)) {
+		dwWidth = 8;
+		dwHeight = 8;
+	}
+
+	// Test to see if a resize is required.
+	// Note that the dimensions will be the same if a prior resize attempt failed.
+	if ((dwWidth == dgl->dwWidth) && (dwHeight == dgl->dwHeight) && bDefaultDriver) {
+		return TRUE; // No resize required
+	}
+
+	ddlogPrintf(DDLOG_SYSTEM, "dglResize: %dx%d", dwWidth, dwHeight);
+#ifndef _USE_GLD3_WGL
+	// Work out where we want our surfaces created
+	dwMemoryType = (bDefaultDriver) ? glb.dwMemoryType : DDSCAPS_SYSTEMMEMORY;
+#endif // _USE_GLD3_WGL
+
+	// Note previous fullscreen vs window display status
+	bWasFullscreen = dgl->bFullscreen;
+
+#ifdef _USE_GLD3_WGL
+	if (_gldDriver.GetDisplayMode(dgl, &glddm)) {
+		if ( (dwWidth == glddm.Width) &&
+				 (dwHeight == glddm.Height) ) {
+			bFullScrnWin = TRUE;
+		}
+		if (bFullScrnWin && glb.bPrimary && !glb.bFullscreenBlit && !glb.bDirectDrawPersistant) {
+			dgl->bFullscreen = TRUE;
+			ddlogMessage(DDLOG_INFO, "Fullscreen window after resize.\n");
+		}
+		else {
+			dgl->bFullscreen = FALSE;
+			ddlogMessage(DDLOG_INFO, "Non-Fullscreen window after resize.\n");
+		}
+		// Cache the display mode dimensions
+		dgl->dwModeWidth = glddm.Width;
+		dgl->dwModeHeight = glddm.Height;
+	}
+
+	// Clamp the effective window dimensions to primary surface.
+	// We need to do this for D3D viewport dimensions even if wide
+	// surfaces are supported. This also is a good idea for handling
+	// whacked-out window dimensions passed for non-drawable windows
+	// like Solid Edge. (DaveM)
+	if (dgl->dwWidth > glddm.Width)
+		dgl->dwWidth = glddm.Width;
+	if (dgl->dwHeight > glddm.Height)
+		dgl->dwHeight = glddm.Height;
+#else // _USE_GLD3_WGL
+	// Window resize may have changed to fullscreen
+	ZeroMemory(&ddsd2DisplayMode, sizeof(ddsd2DisplayMode));
+	ddsd2DisplayMode.dwSize = sizeof(ddsd2DisplayMode);
+	hResult = IDirectDraw4_GetDisplayMode(
+					dgl->lpDD4,
+					&ddsd2DisplayMode);
+	if (SUCCEEDED(hResult)) {
+		if ( (dwWidth == ddsd2DisplayMode.dwWidth) &&
+				 (dwHeight == ddsd2DisplayMode.dwHeight) ) {
+			bFullScrnWin = TRUE;
+		}
+		if (bFullScrnWin && glb.bPrimary && !glb.bFullscreenBlit && !glb.bDirectDrawPersistant) {
+			dgl->bFullscreen = TRUE;
+			ddlogMessage(DDLOG_INFO, "Fullscreen window after resize.\n");
+		}
+		else {
+			dgl->bFullscreen = FALSE;
+			ddlogMessage(DDLOG_INFO, "Non-Fullscreen window after resize.\n");
+		}
+		// Cache the display mode dimensions
+		dgl->dwModeWidth = ddsd2DisplayMode.dwWidth;
+		dgl->dwModeHeight = ddsd2DisplayMode.dwHeight;
+	}
+
+	// Clamp the effective window dimensions to primary surface.
+	// We need to do this for D3D viewport dimensions even if wide
+	// surfaces are supported. This also is a good idea for handling
+	// whacked-out window dimensions passed for non-drawable windows
+	// like Solid Edge. (DaveM)
+	if (dgl->dwWidth > ddsd2DisplayMode.dwWidth)
+		dgl->dwWidth = ddsd2DisplayMode.dwWidth;
+	if (dgl->dwHeight > ddsd2DisplayMode.dwHeight)
+		dgl->dwHeight = ddsd2DisplayMode.dwHeight;
+#endif // _USE_GLD3_WGL
+
+	// Note if fullscreen vs window display has changed?
+	bSaveDesktop = (!bWasFullscreen && !dgl->bFullscreen) ? TRUE : FALSE;
+	// Save the desktop primary surface from being destroyed
+	// whenever remaining in windowed mode, since the stereo mode
+	// switches are expensive...
+
+#ifndef _USE_GLD3_WGL
+	// Don't need to re-allocate persistant buffers. (DaveM)
+	// Though we should clear the back buffers to hide artifacts.
+	if (glb.bDirectDrawPersistant && glb.bPersistantBuffers) {
+		dgl->dwWidth = dwWidth;
+		dgl->dwHeight = dwHeight;
+		ZeroMemory(&ddbltfx, sizeof(ddbltfx));
+		ddbltfx.dwSize = sizeof(ddbltfx);
+		ddbltfx.dwFillColor = dgl->dwClearColorPF;
+		IDirectDrawSurface4_Blt(dgl->lpBack4, &rcScreenRect, NULL, NULL,
+			DDBLT_WAIT | DDBLT_COLORFILL, &ddbltfx);
+		return TRUE;
+	}
+
+	// Ensure all rendering is complete
+	if (ctx->Driver.Finish)
+		(*ctx->Driver.Finish)(ctx);
+	if (dgl->bSceneStarted == TRUE) {
+		IDirect3DDevice3_EndScene(dgl->lpDev3);
+		dgl->bSceneStarted = FALSE;
+	}
+#endif // _USE_GLD3_WGL
+	dgl->bCanRender = FALSE;
+
+#ifdef GLD_THREADS
+	// Serialize access to DirectDraw and DDS operations
+	if (glb.bMultiThreaded)
+		EnterCriticalSection(&CriticalSection);
+#endif
+
+#ifndef _USE_GLD3_WGL
+	// Release existing surfaces
+	RELEASE(dgl->lpDev3);
+	RELEASE(dgl->lpDepth4);
+	RELEASE(dgl->lpBack4);
+	if (glb.bDirectDrawPersistant && glb.bDirectDrawPrimary)
+        ;
+	else
+	RELEASE(dgl->lpFront4);
+#endif // _USE_GLD3_WGL
+	dgl->dwWidth = dwWidth;
+	dgl->dwHeight = dwHeight;
+
+	// Set defaults
+	dgl->dwModeWidth = dgl->dwWidth;
+	dgl->dwModeHeight = dgl->dwHeight;
+
+#ifdef _USE_GLD3_WGL
+	if (!_gldDriver.ResizeDrawable(dgl, bDefaultDriver, glb.bDirectDrawPersistant, glb.bPersistantBuffers))
+		goto cleanup_and_return_with_error;
+#else // _USE_GLD3_WGL
+
+	if (dgl->bFullscreen) {
+		//
+		// FULLSCREEN
+		//
+
+        // Disable warning popups when in fullscreen mode
+        ddlogWarnOption(FALSE);
+
+		// Have to release the persistant DirectDraw primary surface
+		// if switching to fullscreen mode. So if application wants
+		// persistant display in fullscreen mode, a fullscreen-size
+		// window should be used instead via fullscreen-blit option.
+		if (glb.bDirectDrawPersistant && glb.bDirectDrawPrimary) {
+			RELEASE(glb.lpPrimary4);
+			glb.bDirectDrawPrimary = FALSE;
+		}
+
+		dwFlags = DDSCL_EXCLUSIVE | DDSCL_FULLSCREEN | DDSCL_ALLOWREBOOT;
+		if (glb.bFastFPU)
+			dwFlags |= DDSCL_FPUSETUP;	// optional
+		hResult = IDirectDraw4_SetCooperativeLevel(dgl->lpDD4, dgl->hWnd, dwFlags);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: Unable to set Exclusive Fullscreen mode", hResult);
+			goto cleanup_and_return_with_error;
+		}
+
+		hResult = IDirectDraw4_SetDisplayMode(dgl->lpDD4,
+											  dgl->dwModeWidth,
+											  dgl->dwModeHeight,
+											  dgl->dwBPP,
+											  0,
+											  0);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: SetDisplayMode failed", hResult);
+			goto cleanup_and_return_with_error;
+		}
+
+		// ** The display mode has changed, so dont use MessageBox! **
+
+		ZeroMemory(&ddsd2, sizeof(ddsd2));
+		ddsd2.dwSize = sizeof(ddsd2);
+
+		if (dgl->bDoubleBuffer) {
+			// Double buffered
+			// Primary surface
+			ddsd2.dwFlags = DDSD_CAPS | DDSD_BACKBUFFERCOUNT;
+			ddsd2.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE |
+								   DDSCAPS_FLIP |
+								   DDSCAPS_COMPLEX |
+								   DDSCAPS_3DDEVICE |
+								   dwMemoryType;
+			ddsd2.dwBackBufferCount = 1;
+			hResult = IDirectDraw4_CreateSurface(dgl->lpDD4, &ddsd2, &dgl->lpFront4, NULL);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: CreateSurface (primary) failed", hResult);
+				goto cleanup_and_return_with_error;
+			}
+			// Render target surface
+			ZeroMemory(&ddscaps2, sizeof(ddscaps2)); // Clear the entire struct.
+			ddscaps2.dwCaps = DDSCAPS_BACKBUFFER;
+			hResult = IDirectDrawSurface4_GetAttachedSurface(dgl->lpFront4, &ddscaps2, &dgl->lpBack4);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: GetAttachedSurface failed", hResult);
+				goto cleanup_and_return_with_error;
+			}
+		} else {
+			// Single buffered
+			// Primary surface
+			ddsd2.dwFlags = DDSD_CAPS;
+			ddsd2.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE |
+								   //DDSCAPS_3DDEVICE |
+								   dwMemoryType;
+
+			hResult = IDirectDraw4_CreateSurface(dgl->lpDD4, &ddsd2, &dgl->lpFront4, NULL);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: CreateSurface (primary) failed", hResult);
+				goto cleanup_and_return_with_error;
+			}
+
+			dgl->lpBack4 = NULL;
+		}
+	} else {
+		// WINDOWED
+
+        // OK to enable warning popups in windowed mode
+        ddlogWarnOption(glb.bMessageBoxWarnings);
+
+		// Ditto if persistant DirectDraw primary
+		if (glb.bDirectDrawPersistant && glb.bDirectDrawPrimary)
+			goto DoClipperOnly;
+
+		// WINDOWED
+		dwFlags = DDSCL_NORMAL;
+		if (glb.bMultiThreaded)
+			dwFlags |= DDSCL_MULTITHREADED;
+		if (glb.bFastFPU)
+			dwFlags |= DDSCL_FPUSETUP;	// optional
+		hResult = IDirectDraw4_SetCooperativeLevel(dgl->lpDD4,
+												  dgl->hWnd,
+												  dwFlags);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: Unable to set Normal coop level", hResult);
+			goto cleanup_and_return_with_error;
+		}
+		// Primary surface
+		ZeroMemory(&ddsd2, sizeof(ddsd2));
+		ddsd2.dwSize = sizeof(ddsd2);
+		ddsd2.dwFlags = DDSD_CAPS;
+		ddsd2.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE;
+		hResult = IDirectDraw4_CreateSurface(dgl->lpDD4, &ddsd2, &dgl->lpFront4, NULL);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: CreateSurface (primary) failed", hResult);
+			goto cleanup_and_return_with_error;
+		}
+
+		// Cache the primary surface for persistant DirectDraw state
+		if (glb.bDirectDrawPersistant && !glb.bDirectDrawPrimary) {
+			glb.lpPrimary4 = dgl->lpFront4;
+			IDirectDrawSurface4_AddRef(glb.lpPrimary4);
+			glb.bDirectDrawPrimary = TRUE;
+		}
+
+		// Clipper object
+		hResult = DirectDrawCreateClipper(0, &lpddClipper, NULL);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: CreateClipper failed", hResult);
+			goto cleanup_and_return_with_error;
+		}
+		hResult = IDirectDrawClipper_SetHWnd(lpddClipper, 0, dgl->hWnd);
+		if (FAILED(hResult)) {
+			RELEASE(lpddClipper);
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: SetHWnd failed", hResult);
+			goto cleanup_and_return_with_error;
+		}
+		hResult = IDirectDrawSurface4_SetClipper(dgl->lpFront4, lpddClipper);
+		RELEASE(lpddClipper); // We have finished with it.
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: SetClipper failed", hResult);
+			goto cleanup_and_return_with_error;
+		}
+DoClipperOnly:
+		// Update the window for the original clipper
+		if ((glb.bDirectDrawPersistant && glb.bDirectDrawPrimary) || bSaveDesktop) {
+			IDirectDrawSurface4_GetClipper(dgl->lpFront4, &lpddClipper);
+			IDirectDrawClipper_SetHWnd(lpddClipper, 0, dgl->hWnd);
+			RELEASE(lpddClipper);
+		}
+
+		if (dgl->bDoubleBuffer) {
+			// Render target surface
+			ZeroMemory(&ddsd2, sizeof(ddsd2));
+			ddsd2.dwSize = sizeof(ddsd2);
+			ddsd2.dwFlags        = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT;
+			ddsd2.dwWidth        = dgl->dwWidth;
+			ddsd2.dwHeight       = dgl->dwHeight;
+			ddsd2.ddsCaps.dwCaps = DDSCAPS_3DDEVICE |
+								   DDSCAPS_OFFSCREENPLAIN |
+								   dwMemoryType;
+			hResult = IDirectDraw4_CreateSurface(dgl->lpDD4, &ddsd2, &dgl->lpBack4, NULL);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: Create Backbuffer failed", hResult);
+				goto cleanup_and_return_with_error;
+			}
+
+		} else {
+			dgl->lpBack4 = NULL;
+		}
+	}
+
+	//
+	// Now create the Zbuffer
+	//
+	if (dgl->bDepthBuffer) {
+		// Get z-buffer dimensions from the render target
+		// Setup the surface desc for the z-buffer.
+		ZeroMemory(&ddsd2, sizeof(ddsd2));
+		ddsd2.dwSize = sizeof(ddsd2);
+		ddsd2.dwFlags = DDSD_CAPS | DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT;
+		ddsd2.ddsCaps.dwCaps = DDSCAPS_ZBUFFER | dwMemoryType;
+		ddsd2.dwWidth = dgl->dwWidth;
+		ddsd2.dwHeight = dgl->dwHeight;
+		memcpy(&ddsd2.ddpfPixelFormat,
+			   &glb.lpZBufferPF[dgl->iZBufferPF],
+			   sizeof(DDPIXELFORMAT) );
+
+		// Create a z-buffer
+		hResult = IDirectDraw4_CreateSurface(dgl->lpDD4, &ddsd2, &dgl->lpDepth4, NULL);
+		if (FAILED(hResult)) {
+			ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: CreateSurface (ZBuffer) failed", hResult);
+			goto cleanup_and_return_with_error;
+		}
+
+		// Attach Zbuffer to render target
+		TRY(IDirectDrawSurface4_AddAttachedSurface(
+			dgl->bDoubleBuffer ? dgl->lpBack4 : dgl->lpFront4,
+			dgl->lpDepth4),
+			"dglResize: Attach Zbuffer");
+
+	}
+
+	// Clear the newly resized back buffers for the window client area.
+	ZeroMemory(&ddbltfx, sizeof(ddbltfx));
+	ddbltfx.dwSize = sizeof(ddbltfx);
+	ddbltfx.dwFillColor = dgl->dwClearColorPF;
+	IDirectDrawSurface4_Blt(dgl->lpBack4, &rcScreenRect, NULL, NULL,
+		DDBLT_WAIT | DDBLT_COLORFILL, &ddbltfx);
+
+	//
+	// Now that we have a zbuffer we can create the 3D device
+	//
+	hResult = IDirect3D3_CreateDevice(dgl->lpD3D3,
+									  bDefaultDriver ? &glb.d3dGuid : &IID_IDirect3DRGBDevice,
+									  dgl->bDoubleBuffer ? dgl->lpBack4 : dgl->lpFront4,
+									  &dgl->lpDev3,
+									  NULL);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: Could not create Direct3D device", hResult);
+		goto cleanup_and_return_with_error;
+	}
+
+	// We must do this as soon as the device is created
+	dglInitStateCaches(dgl);
+
+	//
+	// Viewport
+	//
+	hResult = IDirect3DDevice3_AddViewport(dgl->lpDev3, dgl->lpViewport3);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: AddViewport failed", hResult);
+		goto cleanup_and_return_with_error;
+	}
+
+	// Initialise the viewport
+	dgl->d3dViewport.dwSize = sizeof(dgl->d3dViewport);
+	dgl->d3dViewport.dwX = 0;
+	dgl->d3dViewport.dwY = 0;
+	dgl->d3dViewport.dwWidth = dgl->dwWidth;
+	dgl->d3dViewport.dwHeight = dgl->dwHeight;
+	dgl->d3dViewport.dvClipX = 0;
+	dgl->d3dViewport.dvClipY = 0;
+	dgl->d3dViewport.dvClipWidth = dgl->dwWidth;
+	dgl->d3dViewport.dvClipHeight = dgl->dwHeight;
+//	dgl->d3dViewport.dvMinZ = 0.0f;
+//	dgl->d3dViewport.dvMaxZ = 1.0f;
+	TRY(IDirect3DViewport3_SetViewport2(dgl->lpViewport3, &dgl->d3dViewport),
+		"dglResize: SetViewport2");
+
+	hResult = IDirect3DDevice3_SetCurrentViewport(dgl->lpDev3, dgl->lpViewport3);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: SetCurrentViewport failed", hResult);
+		goto cleanup_and_return_with_error;
+	}
+
+	// (Re)Initialise all the Direct3D renderstates
+	dglInitStateD3D(ctx);
+
+	// Now we have to recreate all of our textures (+ mipmaps).
+	// Walk over all textures in hash table
+	// XXX what about the default texture objects (id=0)?
+	{
+		struct _mesa_HashTable *textures = ctx->Shared->TexObjects;
+		GLuint id;
+		for (id = _mesa_HashFirstEntry(textures);
+				 id;
+				 id = _mesa_HashNextEntry(textures, id)) {
+			tObj = (struct gl_texture_object *) _mesa_HashLookup(textures, id);
+			if (tObj->DriverData) {
+				// We could call our TexImage function directly, but it's
+				// safer to use the driver pointer.
+				for (i=0; i<MAX_TEXTURE_LEVELS; i++) {
+					image = tObj->Image[i];
+					if (image) {
+						switch (tObj->Dimensions){
+						case 1:
+							if (ctx->Driver.TexImage)
+								(*ctx->Driver.TexImage)(ctx, GL_TEXTURE_1D, tObj, i, image->Format, image);
+							break;
+						case 2:
+							if (ctx->Driver.TexImage)
+								(*ctx->Driver.TexImage)(ctx, GL_TEXTURE_2D, tObj, i, image->Format, image);
+							break;
+						default:
+							break;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	// Re-Bind each texture Unit
+	for (i=0; i<glb.wMaxSimultaneousTextures; i++) {
+		tObj = ctx->Texture.Unit[i].Current;
+		if (tObj) {
+			DGL_texture *lpTex = (DGL_texture *)tObj->DriverData;
+			hResult = dglSetTexture(dgl, i, lpTex ? lpTex->lpTexture : NULL);
+			if (FAILED(hResult)) {
+				ddlogError(DDLOG_ERROR, "dglResize: SetTexture failed", hResult);
+			}
+		}
+	}
+#endif // _USE_GLD3_WGL
+
+	dgl->bCanRender = TRUE;
+
+#ifdef GLD_THREADS
+	// Release serialized access
+	if (glb.bMultiThreaded)
+		LeaveCriticalSection(&CriticalSection);
+#endif
+
+	// SUCCESS.
+	return TRUE;
+
+cleanup_and_return_with_error:
+	// Relase all interfaces before returning.
+#ifdef _USE_GLD3_WGL
+	_gldDriver.DestroyDrawable(dgl);
+#else // _USE_GLD3_WGL
+	RELEASE(dgl->lpDev3);
+	RELEASE(dgl->lpDepth4);
+	RELEASE(dgl->lpBack4);
+	if (glb.bDirectDrawPersistant && glb.bDirectDrawPrimary)
+		;
+	else
+	RELEASE(dgl->lpFront4);
+
+#undef DDLOG_CRITICAL_OR_WARN
+#endif // _USE_GLD3_WGL
+
+	// Mark context as not being able to render
+	dgl->bCanRender = FALSE;
+
+#ifdef GLD_THREADS
+	// Release serialized access
+	if (glb.bMultiThreaded)
+		LeaveCriticalSection(&CriticalSection);
+#endif
+
+	return FALSE;
+}
+
+// ***********************************************************************
+// ***********************************************************************
+// Support for bitmap fonts.
+// ***********************************************************************
+// ***********************************************************************
+
+/*****************************************************************************
+**
+** InvertGlyphBitmap.
+**
+** Invert the bitmap so that it suits OpenGL's representation.
+** Each row starts on a double word boundary.
+**
+*****************************************************************************/
+
+static void InvertGlyphBitmap(
+	int w,
+	int h,
+	DWORD *fptr,
+	DWORD *tptr)
+{
+	int dWordsInRow = (w+31)/32;
+	int i, j;
+	DWORD *tmp = tptr;
+
+	if (w <= 0 || h <= 0) {
+	return;
+	}
+
+	tptr += ((h-1)*dWordsInRow);
+	for (i = 0; i < h; i++) {
+	for (j = 0; j < dWordsInRow; j++) {
+		*(tptr + j) = *(fptr + j);
+	}
+	tptr -= dWordsInRow;
+	fptr += dWordsInRow;
+	}
+}
+
+// ***********************************************************************
+
+/*****************************************************************************
+ * wglUseFontBitmaps
+ *
+ * Converts a subrange of the glyphs in a GDI font to OpenGL display
+ * lists.
+ *
+ * Extended to support any GDI font, not just TrueType fonts. (DaveM)
+ *
+ *****************************************************************************/
+
+BOOL APIENTRY _GLD_WGL_EXPORT(UseFontBitmapsA)(
+	HDC hDC,
+	DWORD first,
+	DWORD count,
+	DWORD listBase)
+{
+	int					i, ox, oy, ix, iy;
+	int					w, h;
+	int					iBufSize, iCurBufSize = 0;
+	DWORD				*bitmapBuffer = NULL;
+	DWORD				*invertedBitmapBuffer = NULL;
+	BOOL				bSuccessOrFail = TRUE;
+	BOOL				bTrueType = FALSE;
+	TEXTMETRIC			tm;
+	GLYPHMETRICS		gm;
+	RASTERIZER_STATUS	rs;
+	MAT2				mat;
+	SIZE				size;
+	RECT				rect;
+	HDC					hDCMem;
+	HBITMAP				hBitmap;
+	BITMAPINFO			bmi;
+	HFONT				hFont;
+
+	// Validate SciTech DirectGL license
+	if (!dglValidate())
+		return FALSE;
+
+	// Set up a unity matrix.
+	ZeroMemory(&mat, sizeof(mat));
+	mat.eM11.value = 1;
+	mat.eM22.value = 1;
+
+	// Test to see if selected font is TrueType or not
+	ZeroMemory(&tm, sizeof(tm));
+	if (!GetTextMetrics(hDC, &tm)) {
+		ddlogMessage(DDLOG_ERROR, "DGL_UseFontBitmaps: Font metrics error\n");
+		return (FALSE);
+	}
+	bTrueType = (tm.tmPitchAndFamily & TMPF_TRUETYPE) ? TRUE : FALSE;
+
+	// Test to see if TRUE-TYPE capabilities are installed
+	// (only necessary if TrueType font selected)
+	ZeroMemory(&rs, sizeof(rs));
+	if (bTrueType) {
+		if (!GetRasterizerCaps (&rs, sizeof (RASTERIZER_STATUS))) {
+			ddlogMessage(DDLOG_ERROR, "DGL_UseFontBitmaps: Raster caps error\n");
+			return (FALSE);
+		}
+		if (!(rs.wFlags & TT_ENABLED)) {
+			ddlogMessage(DDLOG_ERROR, "DGL_UseFontBitmaps: No TrueType caps\n");
+			return (FALSE);
+		}
+	}
+
+	// Trick to get the current font handle
+	hFont = SelectObject(hDC, GetStockObject(SYSTEM_FONT));
+	SelectObject(hDC, hFont);
+
+	// Have memory device context available for holding bitmaps of font glyphs
+	hDCMem = CreateCompatibleDC(hDC);
+	SelectObject(hDCMem, hFont);
+	SetTextColor(hDCMem, RGB(0xFF, 0xFF, 0xFF));
+	SetBkColor(hDCMem, 0);
+
+	for (i = first; (DWORD) i < (first + count); i++) {
+		// Find out how much space is needed for the bitmap so we can
+		// Set the buffer size correctly.
+		if (bTrueType) {
+			// Use TrueType support to get bitmap size of glyph
+			iBufSize = GetGlyphOutline(hDC, i, GGO_BITMAP, &gm,
+				0, NULL, &mat);
+			if (iBufSize == GDI_ERROR) {
+				bSuccessOrFail = FALSE;
+				break;
+			}
+		}
+		else {
+			// Use generic GDI support to compute bitmap size of glyph
+			w = tm.tmMaxCharWidth;
+			h = tm.tmHeight;
+			if (GetTextExtentPoint32(hDC, (LPCTSTR)&i, 1, &size)) {
+				w = size.cx;
+				h = size.cy;
+			}
+			iBufSize = w * h;
+			// Use DWORD multiple for compatibility
+			iBufSize += 3;
+			iBufSize /= 4;
+			iBufSize *= 4;
+		}
+
+		// If we need to allocate Larger Buffers, then do so - but allocate
+		// An extra 50 % so that we don't do too many mallocs !
+		if (iBufSize > iCurBufSize) {
+			if (bitmapBuffer) {
+				__wglFree(bitmapBuffer);
+			}
+			if (invertedBitmapBuffer) {
+				__wglFree(invertedBitmapBuffer);
+			}
+
+			iCurBufSize = iBufSize * 2;
+			bitmapBuffer = (DWORD *) __wglMalloc(iCurBufSize);
+			invertedBitmapBuffer = (DWORD *) __wglMalloc(iCurBufSize);
+
+			if (bitmapBuffer == NULL || invertedBitmapBuffer == NULL) {
+				bSuccessOrFail = FALSE;
+				break;
+			}
+		}
+
+		// If we fail to get the Glyph data, delete the display lists
+		// Created so far and return FALSE.
+		if (bTrueType) {
+			// Use TrueType support to get bitmap of glyph
+			if (GetGlyphOutline(hDC, i, GGO_BITMAP, &gm,
+					iBufSize, bitmapBuffer, &mat) == GDI_ERROR) {
+				bSuccessOrFail = FALSE;
+				break;
+			}
+
+			// Setup glBitmap parameters for current font glyph
+			w  = gm.gmBlackBoxX;
+			h  = gm.gmBlackBoxY;
+			ox = gm.gmptGlyphOrigin.x;
+			oy = gm.gmptGlyphOrigin.y;
+			ix = gm.gmCellIncX;
+			iy = gm.gmCellIncY;
+		}
+		else {
+			// Use generic GDI support to create bitmap of glyph
+			ZeroMemory(bitmapBuffer, iBufSize);
+
+			if (i >= tm.tmFirstChar && i <= tm.tmLastChar) {
+				// Only create bitmaps for actual font glyphs
+				hBitmap = CreateBitmap(w, h, 1, 1, NULL);
+				SelectObject(hDCMem, hBitmap);
+				// Make bitmap of current font glyph
+				SetRect(&rect, 0, 0, w, h);
+				DrawText(hDCMem, (LPCTSTR)&i, 1, &rect,
+					DT_LEFT | DT_BOTTOM | DT_SINGLELINE | DT_NOCLIP);
+				// Make copy of bitmap in our local buffer
+				ZeroMemory(&bmi, sizeof(bmi));
+				bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
+				bmi.bmiHeader.biWidth = w;
+				bmi.bmiHeader.biHeight = -h;
+				bmi.bmiHeader.biPlanes = 1;
+				bmi.bmiHeader.biBitCount = 1;
+				bmi.bmiHeader.biCompression = BI_RGB;
+				GetDIBits(hDCMem, hBitmap, 0, h, bitmapBuffer, &bmi, 0);
+				DeleteObject(hBitmap);
+			}
+			else {
+				// Otherwise use empty display list for non-existing glyph
+				iBufSize = 0;
+			}
+
+			// Setup glBitmap parameters for current font glyph
+			ox = 0;
+			oy = tm.tmDescent;
+			ix = w;
+			iy = 0;
+		}
+
+		// Create an OpenGL display list.
+		_GLD_glNewList((listBase + i), GL_COMPILE);
+
+		// Some fonts have no data for the space character, yet advertise
+		// a non-zero size.
+		if (0 == iBufSize) {
+			_GLD_glBitmap(0, 0, 0.0f, 0.0f, (GLfloat) ix, (GLfloat) iy, NULL);
+		} else {
+			// Invert the Glyph data.
+			InvertGlyphBitmap(w, h, bitmapBuffer, invertedBitmapBuffer);
+
+			// Render an OpenGL bitmap and invert the origin.
+			_GLD_glBitmap(w, h,
+				(GLfloat) ox, (GLfloat) (h-oy),
+				(GLfloat) ix, (GLfloat) iy,
+				(GLubyte *) invertedBitmapBuffer);
+		}
+
+		// Close this display list.
+		_GLD_glEndList();
+	}
+
+	if (bSuccessOrFail == FALSE) {
+		ddlogMessage(DDLOG_ERROR, "DGL_UseFontBitmaps: Get glyph failed\n");
+		_GLD_glDeleteLists((i+listBase), (i-first));
+	}
+
+	// Release resources used
+	DeleteObject(hFont);
+	DeleteDC(hDCMem);
+
+	if (bitmapBuffer)
+		__wglFree(bitmapBuffer);
+	if (invertedBitmapBuffer)
+		__wglFree(invertedBitmapBuffer);
+
+	return(bSuccessOrFail);
+}
+
+// ***********************************************************************
+
+BOOL APIENTRY _GLD_WGL_EXPORT(UseFontBitmapsW)(
+	HDC a,
+	DWORD b,
+	DWORD c,
+	DWORD d)
+{
+	// Validate license
+	if (!dglValidate())
+		return FALSE;
+
+	return _GLD_WGL_EXPORT(UseFontBitmapsA)(a, b, c, d);
+}
+
+// ***********************************************************************
+// ***********************************************************************
+// Support for outline TrueType fonts.
+// ***********************************************************************
+// ***********************************************************************
+
+void * __wglRealloc(
+	void *oldPtr,
+	size_t newSize)
+{
+    void *newPtr = NULL;
+	
+    if (newSize != 0) {
+		newPtr = (void *) GlobalAlloc(GPTR, newSize);
+		if (oldPtr && newPtr) {
+			DWORD oldSize = GlobalSize(oldPtr);
+			
+			memcpy(newPtr, oldPtr, (oldSize <= newSize ? oldSize : newSize));
+			GlobalFree(oldPtr);
+		}
+    } else if (oldPtr) {
+		GlobalFree(oldPtr);
+    }
+    if (newPtr == NULL) {
+		return NULL;	/* XXX out of memory error */
+    }
+    return newPtr;
+}
+
+// ***********************************************************************
+
+
+/*****************************************************************************
+ * wglUseFontOutlinesW
+ *
+ * Converts a subrange of the glyphs in a TrueType font to OpenGL display
+ * lists.
+ *****************************************************************************/
+
+BOOL APIENTRY _GLD_WGL_EXPORT(UseFontOutlinesW)(
+	IN	HDC			hDC,
+	IN	DWORD			first,
+	IN	DWORD			count,
+	IN	DWORD			listBase,
+	IN	FLOAT			chordalDeviation,
+	IN	FLOAT			extrusion,
+	IN	INT			format,
+	OUT	LPGLYPHMETRICSFLOAT	lpgmf)
+{
+	return _GLD_WGL_EXPORT(UseFontOutlinesA)(hDC, first, count, listBase,
+		chordalDeviation, extrusion, format, lpgmf);
+}
+
+/*****************************************************************************
+ * wglUseFontOutlinesA
+ *
+ * Converts a subrange of the glyphs in a TrueType font to OpenGL display
+ * lists.
+ *****************************************************************************/
+
+BOOL APIENTRY _GLD_WGL_EXPORT(UseFontOutlinesA)(
+	IN	HDC			hDC,
+			IN	DWORD			first,
+			IN	DWORD			count,
+			IN	DWORD			listBase,
+			IN	FLOAT			chordalDeviation,
+			IN	FLOAT			extrusion,
+			IN	INT			format,
+			OUT	LPGLYPHMETRICSFLOAT	glyphMetricsFloatArray)
+	{
+	DWORD	glyphIndex;
+	UCHAR*	glyphBuf;
+	DWORD	glyphBufSize;
+
+
+	/*
+	 * Flush any previous OpenGL errors.  This allows us to check for
+	 * new errors so they can be reported via the function return value.
+	 */
+	while (_GLD_glGetError() != GL_NO_ERROR)
+		;
+
+	/*
+	 * Make sure that the current font can be sampled accurately.
+	 */
+	hNewFont = CreateHighResolutionFont(hDC);
+	if (!hNewFont)
+		return FALSE;
+
+	hOldFont = SelectObject(hDC, hNewFont);
+	if (!hOldFont)
+		return FALSE;
+
+	/*
+	 * Preallocate a buffer for the outline data, and track its size:
+	 */
+	glyphBuf = (UCHAR*) __wglMalloc(glyphBufSize = 10240);
+	if (!glyphBuf)
+		return FALSE; /*WGL_STATUS_NOT_ENOUGH_MEMORY*/
+
+	/*
+	 * Process each glyph in the given range:
+	 */
+	for (glyphIndex = first; glyphIndex - first < count; ++glyphIndex)
+		{
+		GLYPHMETRICS	glyphMetrics;
+		DWORD		glyphSize;
+		static MAT2	matrix =
+			{
+			{0, 1},		{0, 0},
+			{0, 0},		{0, 1}
+			};
+		LPGLYPHMETRICSFLOAT glyphMetricsFloat =
+			&glyphMetricsFloatArray[glyphIndex - first];
+
+
+		/*
+		 * Determine how much space is needed to store the glyph's
+		 * outlines.  If our glyph buffer isn't large enough,
+		 * resize it.
+		 */
+		glyphSize = GetGlyphOutline(	hDC,
+						glyphIndex,
+						GGO_NATIVE,
+						&glyphMetrics,
+						0,
+						NULL,
+						&matrix
+						);
+		if (glyphSize < 0)
+			return FALSE; /*WGL_STATUS_FAILURE*/
+		if (glyphSize > glyphBufSize)
+			{
+			__wglFree(glyphBuf);
+			glyphBuf = (UCHAR*) __wglMalloc(glyphBufSize = glyphSize);
+			if (!glyphBuf)
+				return FALSE; /*WGL_STATUS_NOT_ENOUGH_MEMORY*/
+			}
+
+
+		/*
+		 * Get the glyph's outlines.
+		 */
+		if (GetGlyphOutline(	hDC,
+					glyphIndex,
+					GGO_NATIVE,
+					&glyphMetrics,
+					glyphBufSize,
+					glyphBuf,
+					&matrix
+					) < 0)
+			{
+			__wglFree(glyphBuf);
+			return FALSE; /*WGL_STATUS_FAILURE*/
+			}
+		
+		glyphMetricsFloat->gmfBlackBoxX =
+			(FLOAT) glyphMetrics.gmBlackBoxX * ScaleFactor;
+		glyphMetricsFloat->gmfBlackBoxY =
+			(FLOAT) glyphMetrics.gmBlackBoxY * ScaleFactor;
+		glyphMetricsFloat->gmfptGlyphOrigin.x =
+			(FLOAT) glyphMetrics.gmptGlyphOrigin.x * ScaleFactor;
+		glyphMetricsFloat->gmfptGlyphOrigin.y =
+			(FLOAT) glyphMetrics.gmptGlyphOrigin.y * ScaleFactor;
+		glyphMetricsFloat->gmfCellIncX =
+			(FLOAT) glyphMetrics.gmCellIncX * ScaleFactor;
+		glyphMetricsFloat->gmfCellIncY =
+			(FLOAT) glyphMetrics.gmCellIncY * ScaleFactor;
+		
+		/*
+		 * Turn the glyph into a display list:
+		 */
+		if (!MakeDisplayListFromGlyph(	(glyphIndex - first) + listBase,
+						glyphBuf,
+						glyphSize,
+						glyphMetricsFloat,
+						chordalDeviation + ScaleFactor,
+						extrusion,
+						format))
+			{
+			__wglFree(glyphBuf);
+			return FALSE; /*WGL_STATUS_FAILURE*/
+			}
+		}
+
+
+	/*
+	 * Clean up temporary storage and return.  If an error occurred,
+	 * clear all OpenGL error flags and return FAILURE status;
+	 * otherwise just return SUCCESS.
+	 */
+	__wglFree(glyphBuf);
+
+	SelectObject(hDC, hOldFont);
+
+	if (_GLD_glGetError() == GL_NO_ERROR)
+		return TRUE; /*WGL_STATUS_SUCCESS*/
+	else
+		{
+		while (_GLD_glGetError() != GL_NO_ERROR)
+			;
+		return FALSE; /*WGL_STATUS_FAILURE*/
+		}
+	}
+
+
+
+/*****************************************************************************
+ * CreateHighResolutionFont
+ *
+ * Gets metrics for the current font and creates an equivalent font
+ * scaled to the design units of the font.
+ * 
+ *****************************************************************************/
+
+static HFONT
+CreateHighResolutionFont(HDC hDC)
+	{
+	UINT otmSize;
+	OUTLINETEXTMETRIC *otm;
+	LONG fontHeight, fontWidth, fontUnits;
+	LOGFONT logFont;
+
+	otmSize = GetOutlineTextMetrics(hDC, 0, NULL);
+	if (otmSize == 0) 
+		return NULL;
+
+	otm = (OUTLINETEXTMETRIC *) __wglMalloc(otmSize);
+	if (otm == NULL)
+		return NULL;
+
+	otm->otmSize = otmSize;
+	if (GetOutlineTextMetrics(hDC, otmSize, otm) == 0) 
+		return NULL;
+	
+	fontHeight = otm->otmTextMetrics.tmHeight -
+			otm->otmTextMetrics.tmInternalLeading;
+	fontWidth = otm->otmTextMetrics.tmAveCharWidth;
+	fontUnits = (LONG) otm->otmEMSquare;
+	
+	ScaleFactor = 1.0F / (FLOAT) fontUnits;
+
+	logFont.lfHeight = - ((LONG) fontUnits);
+	logFont.lfWidth = (LONG)
+		((FLOAT) (fontWidth * fontUnits) / (FLOAT) fontHeight);
+	logFont.lfEscapement = 0;
+	logFont.lfOrientation = 0;
+	logFont.lfWeight = otm->otmTextMetrics.tmWeight;
+	logFont.lfItalic = otm->otmTextMetrics.tmItalic;
+	logFont.lfUnderline = otm->otmTextMetrics.tmUnderlined;
+	logFont.lfStrikeOut = otm->otmTextMetrics.tmStruckOut;
+	logFont.lfCharSet = otm->otmTextMetrics.tmCharSet;
+	logFont.lfOutPrecision = OUT_OUTLINE_PRECIS;
+	logFont.lfClipPrecision = CLIP_DEFAULT_PRECIS;
+	logFont.lfQuality = DEFAULT_QUALITY;
+	logFont.lfPitchAndFamily =
+		otm->otmTextMetrics.tmPitchAndFamily & 0xf0;
+	strcpy(logFont.lfFaceName,
+	       (char *)otm + (int)otm->otmpFaceName);
+
+	hNewFont = CreateFontIndirect(&logFont);
+	if (hNewFont == NULL)
+		return NULL;
+
+	__wglFree(otm);
+
+	return hNewFont;
+	}
+
+
+
+/*****************************************************************************
+ * MakeDisplayListFromGlyph
+ * 
+ * Converts the outline of a glyph to an OpenGL display list.
+ *
+ * Return value is nonzero for success, zero for failure.
+ *
+ * Does not check for OpenGL errors, so if the caller needs to know about them,
+ * it should call glGetError().
+ *****************************************************************************/
+
+static int
+MakeDisplayListFromGlyph(	IN  DWORD		listName,
+				IN  UCHAR*		glyphBuf,
+				IN  DWORD		glyphSize,
+				IN  LPGLYPHMETRICSFLOAT	glyphMetricsFloat,
+				IN  FLOAT		chordalDeviation,
+				IN  FLOAT		extrusion,
+				IN  INT			format)
+	{
+	int status;
+
+	_GLD_glNewList(listName, GL_COMPILE);
+		status = DrawGlyph(	glyphBuf,
+					glyphSize,
+					chordalDeviation,
+					extrusion,
+					format);
+		
+	_GLD_glTranslatef(glyphMetricsFloat->gmfCellIncX,
+		     glyphMetricsFloat->gmfCellIncY,
+		     0.0F);
+	_GLD_glEndList();
+
+	return status;
+	}
+
+
+
+/*****************************************************************************
+ * DrawGlyph
+ * 
+ * Converts the outline of a glyph to OpenGL drawing primitives, tessellating
+ * as needed, and then draws the glyph.  Tessellation of the quadratic splines
+ * in the outline is controlled by "chordalDeviation", and the drawing
+ * primitives (lines or polygons) are selected by "format".
+ *
+ * Return value is nonzero for success, zero for failure.
+ *
+ * Does not check for OpenGL errors, so if the caller needs to know about them,
+ * it should call glGetError().
+ *****************************************************************************/
+
+static int
+DrawGlyph(	IN  UCHAR*	glyphBuf,
+		IN  DWORD	glyphSize,
+		IN  FLOAT	chordalDeviation,
+		IN  FLOAT	extrusion,
+		IN  INT		format)
+	{
+	INT			status = 0;
+	FLOAT*			p;
+	DWORD			loop;
+	DWORD			point;
+	GLUtesselator*		tess = NULL;
+
+
+	/*
+	 * Initialize the global buffer into which we place the outlines:
+	 */
+	if (!InitLineBuf())
+		goto exit;
+
+
+	/*
+	 * Convert the glyph outlines to a set of polyline loops.
+	 * (See MakeLinesFromGlyph() for the format of the loop data
+	 * structure.)
+	 */
+	if (!MakeLinesFromGlyph(glyphBuf, glyphSize, chordalDeviation))
+		goto exit;
+	p = LineBuf;
+
+
+	/*
+	 * Now draw the loops in the appropriate format:
+	 */
+	if (format == WGL_FONT_LINES)
+		{
+		/*
+		 * This is the easy case.  Just draw the outlines.
+		 */
+		for (loop = (DWORD) *p++; loop; --loop)
+			{
+			_GLD_glBegin(GL_LINE_LOOP);
+				for (point = (DWORD) *p++; point; --point)
+					{
+					_GLD_glVertex2fv(p);
+					p += 2;
+					}
+			_GLD_glEnd();
+			}
+		status = 1;
+		}
+
+	else if (format == WGL_FONT_POLYGONS)
+		{
+		double v[3];
+		FLOAT *save_p = p;
+		GLfloat z_value;
+		
+		/*
+		 * This is the hard case.  We have to set up a tessellator
+		 * to convert the outlines into a set of polygonal
+		 * primitives, which the tessellator passes to some
+		 * auxiliary routines for drawing.
+		 */
+		if (!LoadGLUTesselator())
+			goto exit;
+		if (!InitVertBuf())
+			goto exit;
+		if (!(tess = gluNewTessProc()))
+			goto exit;
+		gluTessCallbackProc(tess,	GLU_BEGIN,	(void(CALLBACK *)()) _GLD_glBegin);
+		gluTessCallbackProc(tess,	GLU_TESS_VERTEX_DATA,
+				    (void(CALLBACK *)()) TessVertexOutData);
+		gluTessCallbackProc(tess,	GLU_END,	(void(CALLBACK *)()) _GLD_glEnd);
+		gluTessCallbackProc(tess,	GLU_ERROR,	(void(CALLBACK *)()) TessError);
+		gluTessCallbackProc(tess,	GLU_TESS_COMBINE, (void(CALLBACK *)()) TessCombine);
+		gluTessNormalProc(tess,	0.0F, 0.0F, 1.0F);
+
+		TessErrorOccurred = 0;
+		_GLD_glNormal3f(0.0f, 0.0f, 1.0f);
+		v[2] = 0.0;
+		z_value = 0.0f;
+
+		gluTessBeginPolygonProc(tess, (void *)*(int *)&z_value);
+			for (loop = (DWORD) *p++; loop; --loop)
+				{
+				gluTessBeginContourProc(tess);
+				
+				for (point = (DWORD) *p++; point; --point)
+					{
+					v[0] = p[0];
+					v[1] = p[1];
+					gluTessVertexProc(tess, v, p);
+					p += 2;
+					}
+
+				gluTessEndContourProc(tess);
+				}
+		gluTessEndPolygonProc(tess);
+
+		status = !TessErrorOccurred;
+
+		/* Extrusion code */
+		if (extrusion) {
+			DWORD loops;
+			GLfloat thickness = (GLfloat) -extrusion;
+			FLOAT *vert, *vert2;
+			DWORD count;
+
+			p = save_p;
+			loops = (DWORD) *p++;
+
+			for (loop = 0; loop < loops; loop++) {
+				GLfloat dx, dy, len;
+				DWORD last;
+
+				count = (DWORD) *p++;
+				_GLD_glBegin(GL_QUAD_STRIP);
+
+				/* Check if the first and last vertex are identical
+				 * so we don't draw the same quad twice.
+				 */
+				vert = p + (count-1)*2;
+				last = (p[0] == vert[0] && p[1] == vert[1]) ? count-1 : count;
+
+				for (point = 0; point <= last; point++) {
+					vert  = p + 2 * (point % last);
+					vert2 = p + 2 * ((point+1) % last);
+
+					dx = vert[0] - vert2[0];
+					dy = vert[1] - vert2[1];
+					len = (GLfloat)sqrt(dx * dx + dy * dy);
+
+					_GLD_glNormal3f(dy / len, -dx / len, 0.0f);
+					_GLD_glVertex3f((GLfloat) vert[0],
+							   (GLfloat) vert[1], thickness);
+					_GLD_glVertex3f((GLfloat) vert[0],
+							   (GLfloat) vert[1], 0.0f);
+				}
+
+				_GLD_glEnd();
+				p += count*2;
+			}
+
+			/* Draw the back face */
+			p = save_p;
+			v[2] = thickness;
+			_GLD_glNormal3f(0.0f, 0.0f, -1.0f);
+			gluTessNormalProc(tess,	0.0F, 0.0F, -1.0F);
+
+			gluTessBeginPolygonProc(tess, (void *)*(int *)&thickness);
+
+			for (loop = (DWORD) *p++; loop; --loop)
+			{
+				count = (DWORD) *p++;
+
+				gluTessBeginContourProc(tess);
+				
+				for (point = 0; point < count; point++)
+				{
+					vert = p + ((count-point-1)<<1);
+					v[0] = vert[0];
+					v[1] = vert[1];
+					gluTessVertexProc(tess, v, vert);
+				}
+				p += count*2;
+
+				gluTessEndContourProc(tess);
+			}
+			gluTessEndPolygonProc(tess);
+		}
+
+#if DEBUG
+	if (TessErrorOccurred)
+		printf("Tessellation error %s\n",
+			gluErrorString(TessErrorOccurred));
+#endif
+		}
+
+
+exit:
+	FreeLineBuf();
+	if (tess)
+		gluDeleteTessProc(tess);
+	// UnloadGLUTesselator();
+	FreeVertBuf();
+	return status;
+	}
+
+
+
+/*****************************************************************************
+ * LoadGLUTesselator
+ *
+ * Maps the glu32.dll module and gets function pointers for the 
+ * tesselator functions.
+ *****************************************************************************/
+
+static BOOL
+LoadGLUTesselator(void)
+	{
+	if (gluModuleHandle != NULL)
+		return TRUE;
+
+	{
+		extern HINSTANCE hInstanceOpenGL;
+		char *gluName = "GLU32.DLL";
+//		char name[256];
+//		char *ptr;
+//		int len;
+
+/*
+		len = GetModuleFileName(hInstanceOpenGL, name, 255);
+		if (len != 0)
+			{
+			ptr = name+len-1;
+			while (ptr > name && *ptr != '\\')
+				ptr--;
+			if (*ptr == '\\')
+				ptr++;
+			if (!stricmp(ptr, "cosmogl.dll"))
+				{
+				gluName = "COSMOGLU.DLL";
+				}
+			else if (!stricmp(ptr, "opengl32.dll"))
+				{
+				gluName = "GLU32.DLL";
+				}
+			}
+*/
+		if ((gluModuleHandle = LoadLibrary(gluName)) == NULL)
+			return FALSE;
+	}
+
+	if ((gluNewTessProc = (gluNewTessProto)
+		GetProcAddress(gluModuleHandle, "gluNewTess")) == NULL)
+		return FALSE;
+	
+	if ((gluDeleteTessProc = (gluDeleteTessProto)
+		GetProcAddress(gluModuleHandle, "gluDeleteTess")) == NULL)
+		return FALSE;
+	
+	if ((gluTessBeginPolygonProc = (gluTessBeginPolygonProto)
+		GetProcAddress(gluModuleHandle, "gluTessBeginPolygon")) == NULL)
+		return FALSE;
+	
+	if ((gluTessBeginContourProc = (gluTessBeginContourProto)
+		GetProcAddress(gluModuleHandle, "gluTessBeginContour")) == NULL)
+		return FALSE;
+	
+	if ((gluTessVertexProc = (gluTessVertexProto)
+		GetProcAddress(gluModuleHandle, "gluTessVertex")) == NULL)
+		return FALSE;
+	
+	if ((gluTessEndContourProc = (gluTessEndContourProto)
+		GetProcAddress(gluModuleHandle, "gluTessEndContour")) == NULL)
+		return FALSE;
+	
+	if ((gluTessEndPolygonProc = (gluTessEndPolygonProto)
+		GetProcAddress(gluModuleHandle, "gluTessEndPolygon")) == NULL)
+		return FALSE;
+	
+	if ((gluTessPropertyProc = (gluTessPropertyProto)
+		GetProcAddress(gluModuleHandle, "gluTessProperty")) == NULL)
+		return FALSE;
+
+	if ((gluTessNormalProc = (gluTessNormalProto)
+		GetProcAddress(gluModuleHandle, "gluTessNormal")) == NULL)
+		return FALSE;
+	
+	if ((gluTessCallbackProc = (gluTessCallbackProto)
+		GetProcAddress(gluModuleHandle, "gluTessCallback")) == NULL)
+		return FALSE;
+
+	return TRUE;
+	}
+
+
+
+/*****************************************************************************
+ * UnloadGLUTesselator
+ *
+ * Unmaps the glu32.dll module.
+ *****************************************************************************/
+
+static BOOL
+UnloadGLUTesselator(void)
+	{
+	if (gluModuleHandle != NULL)
+	    if (FreeLibrary(gluModuleHandle) == FALSE)
+		return FALSE;
+	gluModuleHandle = NULL;
+	}
+
+
+
+/*****************************************************************************
+ * TessVertexOut
+ *
+ * Used by tessellator to handle output vertexes.
+ *****************************************************************************/
+ 
+static void CALLBACK
+TessVertexOut(FLOAT	p[3])
+	{
+	    GLfloat v[2];
+
+	    v[0] = p[0] * ScaleFactor;
+	    v[1] = p[1] * ScaleFactor;
+	    _GLD_glVertex2fv(v);
+	}
+
+static void CALLBACK
+TessVertexOutData(FLOAT	p[3], GLfloat z)
+{
+    GLfloat v[3];
+
+    v[0] = (GLfloat) p[0];
+    v[1] = (GLfloat) p[1];
+    v[2] = z;
+    _GLD_glVertex3fv(v);
+}
+
+
+/*****************************************************************************
+ * TessCombine
+ *
+ * Used by tessellator to handle self-intersecting contours and degenerate
+ * geometry.
+ *****************************************************************************/
+ 
+static void CALLBACK
+TessCombine(double	coords[3],
+	    void*	vertex_data[4],
+	    FLOAT	weight[4],
+	    void**	outData)
+	{
+	if (!AppendToVertBuf((FLOAT) coords[0])
+	 || !AppendToVertBuf((FLOAT) coords[1])
+	 || !AppendToVertBuf((FLOAT) coords[2]))
+		TessErrorOccurred = GL_OUT_OF_MEMORY;
+	*outData = VertBuf + (VertBufIndex - 3);
+	}
+
+
+
+/*****************************************************************************
+ * TessError
+ *
+ * Saves the last tessellator error code in the global TessErrorOccurred.
+ *****************************************************************************/
+ 
+static void CALLBACK
+TessError(GLenum error)
+	{
+	TessErrorOccurred = error;
+	}
+
+
+
+/*****************************************************************************
+ * MakeLinesFromGlyph
+ * 
+ * Converts the outline of a glyph from the TTPOLYGON format to a simple
+ * array of floating-point values containing one or more loops.
+ *
+ * The first element of the output array is a count of the number of loops.
+ * The loop data follows this count.  Each loop consists of a count of the
+ * number of vertices it contains, followed by the vertices.  Each vertex
+ * is an X and Y coordinate.  For example, a single triangle might be
+ * described by this array:
+ *
+ *	1.,	3.,	0., 0.,		1., 0.,		0., 1.
+ *       ^	 ^	 ^    ^		 ^    ^		 ^    ^
+ *     #loops	#verts	 x1   y1	 x2   y2	 x3   y3
+ *
+ * A two-loop glyph would look like this:
+ *
+ *	2.,	3.,  0.,0.,  1.,0.,  0.,1.,	3.,  .2,.2,  .4,.2,  .2,.4
+ *
+ * Line segments from the TTPOLYGON are transferred to the output array in
+ * the obvious way.  Quadratic splines in the TTPOLYGON are converted to
+ * collections of line segments
+ *****************************************************************************/
+
+static int
+MakeLinesFromGlyph(IN  UCHAR*	glyphBuf,
+		   IN  DWORD	glyphSize,
+		   IN  FLOAT	chordalDeviation)
+	{
+	UCHAR*	p;
+	int	status = 0;
+
+
+	/*
+	 * Pick up all the polygons (aka loops) that make up the glyph:
+	 */
+	if (!AppendToLineBuf(0.0F))	/* loop count at LineBuf[0] */
+		goto exit;
+
+	p = glyphBuf;
+	while (p < glyphBuf + glyphSize)
+		{
+		if (!MakeLinesFromTTPolygon(&p, chordalDeviation))
+			goto exit;
+		LineBuf[0] += 1.0F;	/* increment loop count */
+		}
+
+	status = 1;
+
+exit:
+	return status;
+	}
+
+
+
+/*****************************************************************************
+ * MakeLinesFromTTPolygon
+ *
+ * Converts a TTPOLYGONHEADER and its associated curve structures into a
+ * single polyline loop in the global LineBuf.
+ *****************************************************************************/
+
+static int
+MakeLinesFromTTPolygon(	IN OUT	UCHAR**	pp,
+			IN	FLOAT	chordalDeviation)
+	{
+	DWORD	polySize;
+	UCHAR*	polyStart;
+	DWORD	vertexCountIndex;
+
+	/*
+	 * Record where the polygon data begins, and where the loop's
+	 * vertex count resides:
+	 */
+	polyStart = *pp;
+	vertexCountIndex = LineBufIndex;
+	if (!AppendToLineBuf(0.0F))
+		return 0;
+
+	/*
+	 * Extract relevant data from the TTPOLYGONHEADER:
+	 */
+	polySize = GetDWord(pp);
+	if (GetDWord(pp) != TT_POLYGON_TYPE)	/* polygon type */
+		return 0;
+	if (!AppendToLineBuf((FLOAT) GetFixed(pp)))	/* first X coord */
+		return 0;
+	if (!AppendToLineBuf((FLOAT) GetFixed(pp)))	/* first Y coord */
+		return 0;
+	LineBuf[vertexCountIndex] += 1.0F;
+
+	/*
+	 * Process each of the TTPOLYCURVE structures in the polygon:
+	 */
+	while (*pp < polyStart + polySize)
+		if (!MakeLinesFromTTPolycurve(	pp,
+						vertexCountIndex,
+						chordalDeviation))
+		return 0;
+
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * MakeLinesFromTTPolyCurve
+ *
+ * Converts the lines and splines in a single TTPOLYCURVE structure to points
+ * in the global LineBuf.
+ *****************************************************************************/
+
+static int
+MakeLinesFromTTPolycurve(	IN OUT	UCHAR**	pp,
+				IN	DWORD	vertexCountIndex,
+				IN	FLOAT	chordalDeviation)
+	{
+	WORD type;
+	WORD pointCount;
+
+
+	/*
+	 * Pick up the relevant fields of the TTPOLYCURVE structure:
+	 */
+	type = (WORD) GetWord(pp);
+	pointCount = (WORD) GetWord(pp);
+
+	/*
+	 * Convert the "curve" to line segments:
+	 */
+	if (type == TT_PRIM_LINE)
+		return MakeLinesFromTTLine(	pp,
+						vertexCountIndex,
+						pointCount);
+	else if (type == TT_PRIM_QSPLINE)
+		return MakeLinesFromTTQSpline(	pp,
+						vertexCountIndex,
+						pointCount,
+						chordalDeviation);
+	else
+		return 0;
+	}
+
+
+
+/*****************************************************************************
+ * MakeLinesFromTTLine
+ *
+ * Converts points from the polyline in a TT_PRIM_LINE structure to
+ * equivalent points in the global LineBuf.
+ *****************************************************************************/
+static int
+MakeLinesFromTTLine(	IN OUT	UCHAR**	pp,
+			IN	DWORD	vertexCountIndex,
+			IN	WORD	pointCount)
+	{
+	/*
+	 * Just copy the line segments into the line buffer (converting
+	 * type as we go):
+	 */
+	LineBuf[vertexCountIndex] += pointCount;
+	while (pointCount--)
+		{
+		if (!AppendToLineBuf((FLOAT) GetFixed(pp))	/* X coord */
+		 || !AppendToLineBuf((FLOAT) GetFixed(pp)))	/* Y coord */
+			return 0;
+		}
+
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * MakeLinesFromTTQSpline
+ *
+ * Converts points from the poly quadratic spline in a TT_PRIM_QSPLINE
+ * structure to polyline points in the global LineBuf.
+ *****************************************************************************/
+
+static int
+MakeLinesFromTTQSpline(	IN OUT	UCHAR**	pp,
+			IN	DWORD	vertexCountIndex,
+			IN	WORD	pointCount,
+			IN	FLOAT	chordalDeviation)
+	{
+	FLOAT x0, y0, x1, y1, x2, y2;
+	WORD point;
+
+	/*
+	 * Process each of the non-interpolated points in the outline.
+	 * To do this, we need to generate two interpolated points (the
+	 * start and end of the arc) for each non-interpolated point.
+	 * The first interpolated point is always the one most recently
+	 * stored in LineBuf, so we just extract it from there.  The
+	 * second interpolated point is either the average of the next
+	 * two points in the QSpline, or the last point in the QSpline
+	 * if only one remains.
+	 */
+	for (point = 0; point < pointCount - 1; ++point)
+		{
+		x0 = LineBuf[LineBufIndex - 2];
+		y0 = LineBuf[LineBufIndex - 1];
+
+		x1 = (FLOAT) GetFixed(pp);
+		y1 = (FLOAT) GetFixed(pp);
+
+		if (point == pointCount - 2)
+			{
+			/*
+			 * This is the last arc in the QSpline.  The final
+			 * point is the end of the arc.
+			 */
+			x2 = (FLOAT) GetFixed(pp);
+			y2 = (FLOAT) GetFixed(pp);
+			}
+		else
+			{
+			/*
+			 * Peek at the next point in the input to compute
+			 * the end of the arc:
+			 */
+			x2 = 0.5F * (x1 + (FLOAT) GetFixed(pp));
+			y2 = 0.5F * (y1 + (FLOAT) GetFixed(pp));
+			/*
+			 * Push the point back onto the input so it will
+			 * be reused as the next off-curve point:
+			 */
+			*pp -= 8;
+			}
+
+		if (!MakeLinesFromArc(	x0, y0,
+					x1, y1,
+					x2, y2,
+					vertexCountIndex,
+					chordalDeviation * chordalDeviation))
+			return 0;
+		}
+
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * MakeLinesFromArc
+ *
+ * Subdivides one arc of a quadratic spline until the chordal deviation
+ * tolerance requirement is met, then places the resulting set of line
+ * segments in the global LineBuf.
+ *****************************************************************************/
+
+static int
+MakeLinesFromArc(	IN	FLOAT	x0,
+			IN	FLOAT	y0,
+			IN	FLOAT	x1,
+			IN	FLOAT	y1,
+			IN	FLOAT	x2,
+			IN	FLOAT	y2,
+			IN	DWORD	vertexCountIndex,
+			IN	FLOAT	chordalDeviationSquared)
+	{
+	FLOAT	x01;
+	FLOAT	y01;
+	FLOAT	x12;
+	FLOAT	y12;
+	FLOAT	midPointX;
+	FLOAT	midPointY;
+	FLOAT	deltaX;
+	FLOAT	deltaY;
+
+	/*
+	 * Calculate midpoint of the curve by de Casteljau:
+	 */
+	x01 = 0.5F * (x0 + x1);
+	y01 = 0.5F * (y0 + y1);
+	x12 = 0.5F * (x1 + x2);
+	y12 = 0.5F * (y1 + y2);
+	midPointX = 0.5F * (x01 + x12);
+	midPointY = 0.5F * (y01 + y12);
+
+
+	/*
+	 * Estimate chordal deviation by the distance from the midpoint
+	 * of the curve to its non-interpolated control point.  If this
+	 * distance is greater than the specified chordal deviation
+	 * constraint, then subdivide.  Otherwise, generate polylines
+	 * from the three control points.
+	 */
+	deltaX = midPointX - x1;
+	deltaY = midPointY - y1;
+	if (deltaX * deltaX + deltaY * deltaY > chordalDeviationSquared)
+		{
+		MakeLinesFromArc(	x0, y0,
+					x01, y01,
+					midPointX, midPointY,
+					vertexCountIndex,
+					chordalDeviationSquared);
+		
+		MakeLinesFromArc(	midPointX, midPointY,
+					x12, y12,
+					x2, y2,
+					vertexCountIndex,
+					chordalDeviationSquared);
+		}
+	else
+		{
+		/*
+		 * The "pen" is already at (x0, y0), so we don't need to
+		 * add that point to the LineBuf.
+		 */
+		if (!AppendToLineBuf(x1)
+		 || !AppendToLineBuf(y1)
+		 || !AppendToLineBuf(x2)
+		 || !AppendToLineBuf(y2))
+			return 0;
+		LineBuf[vertexCountIndex] += 2.0F;
+		}
+
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * InitLineBuf
+ *
+ * Initializes the global LineBuf and its associated size and current-element
+ * counters.
+ *****************************************************************************/
+
+static int
+InitLineBuf(void)
+	{
+	if (!(LineBuf = (FLOAT*)
+		__wglMalloc((LineBufSize = LINE_BUF_QUANT) * sizeof(FLOAT))))
+			return 0;
+	LineBufIndex = 0;
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * InitVertBuf
+ *
+ * Initializes the global VertBuf and its associated size and current-element
+ * counters.
+ *****************************************************************************/
+
+static int
+InitVertBuf(void)
+	{
+	if (!(VertBuf = (FLOAT*)
+		__wglMalloc((VertBufSize = VERT_BUF_QUANT) * sizeof(FLOAT))))
+			return 0;
+	VertBufIndex = 0;
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * AppendToLineBuf
+ *
+ * Appends one floating-point value to the global LineBuf array.  Return value
+ * is non-zero for success, zero for failure.
+ *****************************************************************************/
+
+static int
+AppendToLineBuf(FLOAT value)
+	{
+	if (LineBufIndex >= LineBufSize)
+		{
+		FLOAT* f;
+		
+		f = (FLOAT*) __wglRealloc(LineBuf,
+			(LineBufSize += LINE_BUF_QUANT) * sizeof(FLOAT));
+		if (!f)
+			return 0;
+		LineBuf = f;
+		}
+	LineBuf[LineBufIndex++] = value;
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * AppendToVertBuf
+ *
+ * Appends one floating-point value to the global VertBuf array.  Return value
+ * is non-zero for success, zero for failure.
+ *
+ * Note that we can't realloc this one, because the tessellator is using
+ * pointers into it.
+ *****************************************************************************/
+
+static int
+AppendToVertBuf(FLOAT value)
+	{
+	if (VertBufIndex >= VertBufSize)
+		return 0;
+	VertBuf[VertBufIndex++] = value;
+	return 1;
+	}
+
+
+
+/*****************************************************************************
+ * FreeLineBuf
+ *
+ * Cleans up vertex buffer structure.
+ *****************************************************************************/
+
+static void
+FreeLineBuf(void)
+	{
+	if (LineBuf)
+		{
+		__wglFree(LineBuf);
+		LineBuf = NULL;
+		}
+	}
+
+
+
+/*****************************************************************************
+ * FreeVertBuf
+ *
+ * Cleans up vertex buffer structure.
+ *****************************************************************************/
+
+static void
+FreeVertBuf(void)
+	{
+	if (VertBuf)
+		{
+		__wglFree(VertBuf);
+		VertBuf = NULL;
+		}
+	}
+
+
+
+/*****************************************************************************
+ * GetWord
+ *
+ * Fetch the next 16-bit word from a little-endian byte stream, and increment
+ * the stream pointer to the next unscanned byte.
+ *****************************************************************************/
+
+static long GetWord(UCHAR** p)
+	{
+	long value;
+
+	value = ((*p)[1] << 8) + (*p)[0];
+	*p += 2;
+	return value;
+	}
+
+
+
+/*****************************************************************************
+ * GetDWord
+ *
+ * Fetch the next 32-bit word from a little-endian byte stream, and increment
+ * the stream pointer to the next unscanned byte.
+ *****************************************************************************/
+
+static long GetDWord(UCHAR** p)
+	{
+	long value;
+
+	value = ((*p)[3] << 24) + ((*p)[2] << 16) + ((*p)[1] << 8) + (*p)[0];
+	*p += 4;
+	return value;
+	}
+
+
+
+
+/*****************************************************************************
+ * GetFixed
+ *
+ * Fetch the next 32-bit fixed-point value from a little-endian byte stream,
+ * convert it to floating-point, and increment the stream pointer to the next
+ * unscanned byte.
+ *****************************************************************************/
+
+static double GetFixed(
+	UCHAR** p)
+{
+	long hiBits, loBits;
+	double value;
+
+	loBits = GetWord(p);
+	hiBits = GetWord(p);
+	value = (double) ((hiBits << 16) | loBits) / 65536.0;
+
+	return value * ScaleFactor;
+}
+
+// ***********************************************************************
+
diff --git a/src/mesa/drivers/windows/gldirect/dglwgl.h b/src/mesa/drivers/windows/gldirect/dglwgl.h
new file mode 100644
index 0000000000..aac0410333
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dglwgl.h
@@ -0,0 +1,127 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  OpenGL window  functions (wgl*).
+*
+****************************************************************************/
+
+#ifndef __DGLWGL_H
+#define __DGLWGL_H
+
+// Disable compiler complaints about DLL linkage
+#pragma warning (disable:4273)
+
+// Macros to control compilation
+#define STRICT
+#define WIN32_LEAN_AND_MEAN
+
+#include <windows.h>
+#include <GL\gl.h>
+
+#include "dglcontext.h"
+#include "dglglobals.h"
+#include "dglmacros.h"
+#include "ddlog.h"
+#include "dglpf.h"
+
+/*---------------------- Macros and type definitions ----------------------*/
+
+typedef struct {
+	PROC proc;
+	char *name;
+}  DGL_extension;
+
+#ifndef __MINGW32__
+/* XXX why is this here?
+ * It should probaby be somewhere in src/mesa/drivers/windows/
+ */
+#if defined(_WIN32) && !defined(_WINGDI_) && !defined(_WINGDI_H) && !defined(_GNU_H_WINDOWS32_DEFINES) && !defined(OPENSTEP) && !defined(BUILD_FOR_SNAP) 
+#	define WGL_FONT_LINES      0
+#	define WGL_FONT_POLYGONS   1
+#ifndef _GNU_H_WINDOWS32_FUNCTIONS
+#	ifdef UNICODE
+#		define wglUseFontBitmaps  wglUseFontBitmapsW
+#		define wglUseFontOutlines  wglUseFontOutlinesW
+#	else
+#		define wglUseFontBitmaps  wglUseFontBitmapsA
+#		define wglUseFontOutlines  wglUseFontOutlinesA
+#	endif /* !UNICODE */
+#endif /* _GNU_H_WINDOWS32_FUNCTIONS */
+typedef struct tagLAYERPLANEDESCRIPTOR LAYERPLANEDESCRIPTOR, *PLAYERPLANEDESCRIPTOR, *LPLAYERPLANEDESCRIPTOR;
+typedef struct _GLYPHMETRICSFLOAT GLYPHMETRICSFLOAT, *PGLYPHMETRICSFLOAT, *LPGLYPHMETRICSFLOAT;
+typedef struct tagPIXELFORMATDESCRIPTOR PIXELFORMATDESCRIPTOR, *PPIXELFORMATDESCRIPTOR, *LPPIXELFORMATDESCRIPTOR;
+#if !defined(GLX_USE_MESA)
+#include <GL/mesa_wgl.h>
+#endif
+#endif
+#endif /* !__MINGW32__ */
+
+/*------------------------- Function Prototypes ---------------------------*/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#ifndef _USE_GLD3_WGL
+int		APIENTRY DGL_ChoosePixelFormat(HDC a, CONST PIXELFORMATDESCRIPTOR *ppfd);
+BOOL	APIENTRY DGL_CopyContext(HGLRC a, HGLRC b, UINT c);
+HGLRC	APIENTRY DGL_CreateContext(HDC a);
+HGLRC	APIENTRY DGL_CreateLayerContext(HDC a, int b);
+BOOL	APIENTRY DGL_DeleteContext(HGLRC a);
+BOOL	APIENTRY DGL_DescribeLayerPlane(HDC a, int b, int c, UINT d, LPLAYERPLANEDESCRIPTOR e);
+int		APIENTRY DGL_DescribePixelFormat(HDC a, int b, UINT c, LPPIXELFORMATDESCRIPTOR d);
+HGLRC	APIENTRY DGL_GetCurrentContext(void);
+HDC		APIENTRY DGL_GetCurrentDC(void);
+PROC	APIENTRY DGL_GetDefaultProcAddress(LPCSTR a);
+int		APIENTRY DGL_GetLayerPaletteEntries(HDC a, int b, int c, int d, COLORREF *e);
+int		APIENTRY DGL_GetPixelFormat(HDC a);
+PROC	APIENTRY DGL_GetProcAddress(LPCSTR a);
+BOOL	APIENTRY DGL_MakeCurrent(HDC a, HGLRC b);
+BOOL	APIENTRY DGL_RealizeLayerPalette(HDC a, int b, BOOL c);
+int		APIENTRY DGL_SetLayerPaletteEntries(HDC a, int b, int c, int d, CONST COLORREF *e);
+BOOL	APIENTRY DGL_SetPixelFormat(HDC a, int b, CONST PIXELFORMATDESCRIPTOR *c);
+BOOL	APIENTRY DGL_ShareLists(HGLRC a, HGLRC b);
+BOOL	APIENTRY DGL_SwapBuffers(HDC a);
+BOOL	APIENTRY DGL_SwapLayerBuffers(HDC a, UINT b);
+BOOL	APIENTRY DGL_UseFontBitmapsA(HDC a, DWORD b, DWORD c, DWORD d);
+BOOL	APIENTRY DGL_UseFontBitmapsW(HDC a, DWORD b, DWORD c, DWORD d);
+BOOL	APIENTRY DGL_UseFontOutlinesA(HDC a, DWORD b, DWORD c, DWORD d, FLOAT e, FLOAT f, int g, LPGLYPHMETRICSFLOAT h);
+BOOL	APIENTRY DGL_UseFontOutlinesW(HDC a, DWORD b, DWORD c, DWORD d, FLOAT e, FLOAT f, int g, LPGLYPHMETRICSFLOAT h);
+#endif //_USE_GLD3_WGL
+
+BOOL	dglWglResizeBuffers(GLcontext *ctx, BOOL bDefaultDriver);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dll_main.c b/src/mesa/drivers/windows/gldirect/dll_main.c
new file mode 100644
index 0000000000..1d7ac64f49
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dll_main.c
@@ -0,0 +1,817 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Win32 DllMain functions.
+*
+****************************************************************************/
+
+// INITGUID must only be defined once.
+// Don't put it in a shared header file!
+// GLD3 uses dxguid.lib, so INITGUID must *not* be used!
+#ifndef _USE_GLD3_WGL
+#define INITGUID
+#endif // _USE_GLD3_WGL
+
+#include "dllmain.h"
+
+//#include "snap/graphics.h"
+//#include "drvlib/os/os.h"
+
+#ifdef _USE_GLD3_WGL
+typedef void (APIENTRY *LPDGLSPLASHSCREEN)(int, int, char*);
+#include "gld_driver.h"
+#endif
+
+// ***********************************************************************
+
+BOOL bInitialized = FALSE;              // callback driver initialized?
+BOOL bExited = FALSE;                   // callback driver exited this instance?
+HINSTANCE hInstanceDll = NULL;          // DLL instance handle
+
+static BOOL bDriverValidated = FALSE;	// prior validation status
+static BOOL	bSplashScreen = TRUE;	    // Splash Screen ?
+static BOOL bValidINIFound = FALSE;     // Have we found a valid INI file?
+
+HHOOK 	hKeyHook = NULL;				// global keyboard handler hook
+
+// Multi-threaded support needs to be reflected in Mesa code. (DaveM)
+int _gld_bMultiThreaded = FALSE;
+
+// ***********************************************************************
+
+DWORD dwLogging = 0; 					// Logging flag
+DWORD dwDebugLevel = 0;                 // Log debug level
+
+char szLogPath[_MAX_PATH] = {"\0"};		// Log file path
+char szSNAPPath[_MAX_PATH] = {"\0"};	// SNAP driver path
+
+#ifndef _USE_GLD3_WGL
+DGL_wglFuncs wglFuncs = {
+	sizeof(DGL_wglFuncs),
+	DGL_ChoosePixelFormat,
+	DGL_CopyContext,
+	DGL_CreateContext,
+	DGL_CreateLayerContext,
+	DGL_DeleteContext,
+	DGL_DescribeLayerPlane,
+	DGL_DescribePixelFormat,
+	DGL_GetCurrentContext,
+	DGL_GetCurrentDC,
+	DGL_GetDefaultProcAddress,
+	DGL_GetLayerPaletteEntries,
+	DGL_GetPixelFormat,
+	DGL_GetProcAddress,
+	DGL_MakeCurrent,
+	DGL_RealizeLayerPalette,
+	DGL_SetLayerPaletteEntries,
+	DGL_SetPixelFormat,
+	DGL_ShareLists,
+	DGL_SwapBuffers,
+	DGL_SwapLayerBuffers,
+	DGL_UseFontBitmapsA,
+	DGL_UseFontBitmapsW,
+	DGL_UseFontOutlinesA,
+	DGL_UseFontOutlinesW,
+};
+
+DGL_mesaFuncs mesaFuncs = {
+	sizeof(DGL_mesaFuncs),
+};
+#endif // _USE_GLD3_WGL
+
+// ***********************************************************************
+
+typedef struct {
+	DWORD	dwDriver;			// 0=SciTech SW, 1=Direct3D SW, 2=Direct3D HW
+	BOOL	bMipmapping;		// 0=off, 1=on
+	BOOL	bMultitexture;		// 0=off, 1=on
+	BOOL	bWaitForRetrace;	// 0=off, 1=on
+	BOOL	bFullscreenBlit;	// 0=off, 1=on
+	BOOL	bFastFPU;			// 0=off, 1=on
+	BOOL	bDirectDrawPersistant;// 0=off, 1=on
+	BOOL	bPersistantBuffers; // 0=off, 1=on
+	DWORD	dwLogging;			// 0=off, 1=normal, 2=crash-proof
+	DWORD	dwLoggingSeverity;	// 0=all, 1=warnings+errors, 2=errors only
+	BOOL	bMessageBoxWarnings;// 0=off, 1=on
+	BOOL	bMultiThreaded;		// 0=off, 1=on
+	BOOL	bAppCustomizations;	// 0=off, 1=on
+	BOOL	bHotKeySupport;		// 0=off, 1=on
+	BOOL	bSplashScreen;		// 0=off, 1=on
+
+#ifdef _USE_GLD3_WGL
+	//
+	// New for GLDirect 3.0
+	//
+	DWORD	dwAdapter;			// DX8 adpater ordinal
+	DWORD	dwTnL;				// Transform & Lighting type
+	DWORD	dwMultisample;		// DX8 multisample type
+#endif // _USE_GLD3_WGL
+} INI_settings;
+
+static INI_settings ini;
+
+// ***********************************************************************
+
+BOOL APIENTRY DGL_initDriver(
+#ifdef _USE_GLD3_WGL
+	void)
+{
+#else
+	DGL_wglFuncs *lpWglFuncs,
+	DGL_mesaFuncs *lpMesaFuncs)
+{
+	// Check for valid pointers
+	if ((lpWglFuncs == NULL) || (lpMesaFuncs == NULL))
+		return FALSE;
+
+	// Check for valid structs
+	if (lpWglFuncs->dwSize != sizeof(DGL_wglFuncs)) {
+		return FALSE;
+	}
+
+	// Check for valid structs
+	if (lpMesaFuncs->dwSize != sizeof(DGL_mesaFuncs)) {
+		return FALSE;
+	}
+
+	// Copy the Mesa functions
+	memcpy(&mesaFuncs, lpMesaFuncs, sizeof(DGL_mesaFuncs));
+
+	// Pass back the wgl functions
+	memcpy(lpWglFuncs, &wglFuncs, sizeof(DGL_wglFuncs));
+#endif // _USE_GLD3_WGL
+
+    // Finally initialize the callback driver
+    if (!dglInitDriver())
+        return FALSE;
+
+	return TRUE;
+};
+
+// ***********************************************************************
+
+BOOL ReadINIFile(
+	HINSTANCE hInstance)
+{
+	char		szModuleFilename[MAX_PATH];
+	char		szSystemDirectory[MAX_PATH];
+	const char	szSectionName[] = "Config";
+	char		szINIFile[MAX_PATH];
+	int			pos;
+
+	// Now using the DLL module handle. KeithH, 24/May/2000.
+	// Addendum: GetModuleFileName(NULL, ...    returns process filename,
+	//           GetModuleFileName(hModule, ... returns DLL filename,
+
+	// Get the dll path and filename.
+	GetModuleFileName(hInstance, &szModuleFilename[0], MAX_PATH); // NULL for current process
+	// Get the System directory.
+	GetSystemDirectory(&szSystemDirectory[0], MAX_PATH);
+
+	// Test to see if DLL is in system directory.
+	if (strnicmp(szModuleFilename, szSystemDirectory, strlen(szSystemDirectory))==0) {
+		// DLL *is* in system directory.
+		// Return FALSE to indicate that registry keys should be read.
+		return FALSE;
+	}
+
+	// Compose filename of INI file
+	strcpy(szINIFile, szModuleFilename);
+	pos = strlen(szINIFile);
+	while (szINIFile[pos] != '\\') {
+		pos--;
+	}
+	szINIFile[pos+1] = '\0';
+    // Use run-time DLL path for log file too
+    strcpy(szLogPath, szINIFile);
+    szLogPath[pos] = '\0';
+    // Complete full INI file path
+	strcat(szINIFile, "gldirect.ini");
+
+	// Read settings from private INI file.
+	// Note that defaults are contained in the calls.
+	ini.dwDriver = GetPrivateProfileInt(szSectionName, "dwDriver", 2, szINIFile);
+	ini.bMipmapping = GetPrivateProfileInt(szSectionName, "bMipmapping", 1, szINIFile);
+	ini.bMultitexture = GetPrivateProfileInt(szSectionName, "bMultitexture", 1, szINIFile);
+	ini.bWaitForRetrace = GetPrivateProfileInt(szSectionName, "bWaitForRetrace", 0, szINIFile);
+	ini.bFullscreenBlit = GetPrivateProfileInt(szSectionName, "bFullscreenBlit", 0, szINIFile);
+	ini.bFastFPU = GetPrivateProfileInt(szSectionName, "bFastFPU", 1, szINIFile);
+	ini.bDirectDrawPersistant = GetPrivateProfileInt(szSectionName, "bPersistantDisplay", 0, szINIFile);
+	ini.bPersistantBuffers = GetPrivateProfileInt(szSectionName, "bPersistantResources", 0, szINIFile);
+	ini.dwLogging = GetPrivateProfileInt(szSectionName, "dwLogging", 0, szINIFile);
+	ini.dwLoggingSeverity = GetPrivateProfileInt(szSectionName, "dwLoggingSeverity", 0, szINIFile);
+	ini.bMessageBoxWarnings = GetPrivateProfileInt(szSectionName, "bMessageBoxWarnings", 0, szINIFile);
+	ini.bMultiThreaded = GetPrivateProfileInt(szSectionName, "bMultiThreaded", 0, szINIFile);
+	ini.bAppCustomizations = GetPrivateProfileInt(szSectionName, "bAppCustomizations", 1, szINIFile);
+	ini.bHotKeySupport = GetPrivateProfileInt(szSectionName, "bHotKeySupport", 0, szINIFile);
+	ini.bSplashScreen = GetPrivateProfileInt(szSectionName, "bSplashScreen", 1, szINIFile);
+
+#ifdef _USE_GLD3_WGL
+	// New for GLDirect 3.x
+	ini.dwAdapter		= GetPrivateProfileInt(szSectionName, "dwAdapter", 0, szINIFile);
+	// dwTnL now defaults to zero (chooses TnL at runtime). KeithH
+	ini.dwTnL			= GetPrivateProfileInt(szSectionName, "dwTnL", 0, szINIFile);
+	ini.dwMultisample	= GetPrivateProfileInt(szSectionName, "dwMultisample", 0, szINIFile);
+#endif
+
+	return TRUE;
+}
+
+// ***********************************************************************
+
+BOOL dllReadRegistry(
+	HINSTANCE hInstance)
+{
+	// Read settings from INI file, if available
+    bValidINIFound = FALSE;
+	if (ReadINIFile(hInstance)) {
+		const char *szRendering[3] = {
+			"SciTech Software Renderer",
+			"Direct3D MMX Software Renderer",
+			"Direct3D Hardware Renderer"
+		};
+		// Set globals
+		glb.bPrimary = 1;
+		glb.bHardware = (ini.dwDriver == 2) ? 1 : 0;
+#ifndef _USE_GLD3_WGL
+		memset(&glb.ddGuid, 0, sizeof(glb.ddGuid));
+		glb.d3dGuid = (ini.dwDriver == 2) ? IID_IDirect3DHALDevice : IID_IDirect3DRGBDevice;
+#endif // _USE_GLD3_WGL
+		strcpy(glb.szDDName, "Primary");
+		strcpy(glb.szD3DName, szRendering[ini.dwDriver]);
+		glb.dwRendering = ini.dwDriver;
+		glb.bUseMipmaps = ini.bMipmapping;
+		glb.bMultitexture = ini.bMultitexture;
+		glb.bWaitForRetrace = ini.bWaitForRetrace;
+		glb.bFullscreenBlit = ini.bFullscreenBlit;
+		glb.bFastFPU = ini.bFastFPU;
+		glb.bDirectDrawPersistant = ini.bDirectDrawPersistant;
+		glb.bPersistantBuffers = ini.bPersistantBuffers;
+		dwLogging = ini.dwLogging;
+		dwDebugLevel = ini.dwLoggingSeverity;
+		glb.bMessageBoxWarnings = ini.bMessageBoxWarnings;
+		glb.bMultiThreaded = ini.bMultiThreaded;
+		glb.bAppCustomizations = ini.bAppCustomizations;
+        glb.bHotKeySupport = ini.bHotKeySupport;
+		bSplashScreen = ini.bSplashScreen;
+#ifdef _USE_GLD3_WGL
+		// New for GLDirect 3.x
+		glb.dwAdapter		= ini.dwAdapter;
+		glb.dwDriver		= ini.dwDriver;
+		glb.dwTnL			= ini.dwTnL;
+		glb.dwMultisample	= ini.dwMultisample;
+#endif
+        bValidINIFound = TRUE;
+		return TRUE;
+	}
+	// Read settings from registry
+	else {
+	HKEY	hReg;
+	DWORD	cbValSize;
+	DWORD	dwType = REG_SZ; // Registry data type for strings
+	BOOL	bRegistryError;
+	BOOL	bSuccess;
+
+#define REG_READ_DWORD(a, b)							\
+	cbValSize = sizeof(b);								\
+	if (ERROR_SUCCESS != RegQueryValueEx( hReg, (a),	\
+		NULL, NULL, (LPBYTE)&(b), &cbValSize ))			\
+		bRegistryError = TRUE;
+
+#define REG_READ_DEVICEID(a, b)									\
+	cbValSize = MAX_DDDEVICEID_STRING;							\
+	if(ERROR_SUCCESS != RegQueryValueEx(hReg, (a), 0, &dwType,	\
+					(LPBYTE)&(b), &cbValSize))					\
+		bRegistryError = TRUE;
+
+#define REG_READ_STRING(a, b)									\
+	cbValSize = sizeof((b));									\
+	if(ERROR_SUCCESS != RegQueryValueEx(hReg, (a), 0, &dwType,	\
+					(LPBYTE)&(b), &cbValSize))					\
+		bRegistryError = TRUE;
+
+	// Read settings from the registry.
+
+	// Open the registry key for the current user if it exists.
+	bSuccess = (ERROR_SUCCESS == RegOpenKeyEx(HKEY_CURRENT_USER,
+									  DIRECTGL_REG_SETTINGS_KEY,
+									  0,
+									  KEY_READ,
+									  &hReg));
+    // Otherwise open the registry key for the local machine.
+    if (!bSuccess)
+        bSuccess = (ERROR_SUCCESS == RegOpenKeyEx(DIRECTGL_REG_KEY_ROOT,
+									  DIRECTGL_REG_SETTINGS_KEY,
+									  0,
+									  KEY_READ,
+									  &hReg));
+    if (!bSuccess)
+        return FALSE;
+
+	bRegistryError = FALSE;
+
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_PRIMARY, glb.bPrimary);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_D3D_HW, glb.bHardware);
+#ifndef _USE_GLD3_WGL
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_DD_GUID, glb.ddGuid);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_D3D_GUID, glb.d3dGuid);
+#endif // _USE_GLD3_WGL
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_LOGGING, dwLogging);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_DEBUGLEVEL, dwDebugLevel);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_RENDERING, glb.dwRendering);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_MULTITEXTURE, glb.bMultitexture);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_WAITFORRETRACE, glb.bWaitForRetrace);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_FULLSCREENBLIT, glb.bFullscreenBlit);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_USEMIPMAPS, glb.bUseMipmaps);
+
+	REG_READ_DEVICEID(DIRECTGL_REG_SETTING_DD_NAME, glb.szDDName);
+	REG_READ_DEVICEID(DIRECTGL_REG_SETTING_D3D_NAME, glb.szD3DName);
+
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_MSGBOXWARNINGS, glb.bMessageBoxWarnings);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_PERSISTDISPLAY, glb.bDirectDrawPersistant);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_PERSISTBUFFERS, glb.bPersistantBuffers);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_FASTFPU, glb.bFastFPU);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_HOTKEYS, glb.bHotKeySupport);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_MULTITHREAD, glb.bMultiThreaded);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_APPCUSTOM, glb.bAppCustomizations);
+    REG_READ_DWORD(DIRECTGL_REG_SETTING_SPLASHSCREEN, bSplashScreen);
+
+#ifdef _USE_GLD3_WGL
+	// New for GLDirect 3.x
+	glb.dwDriver = glb.dwRendering;
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_ADAPTER, glb.dwAdapter);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_TNL, glb.dwTnL);
+	REG_READ_DWORD(DIRECTGL_REG_SETTING_MULTISAMPLE, glb.dwMultisample);
+#endif
+
+	RegCloseKey(hReg);
+
+	// Open the global registry key for GLDirect
+	bSuccess = (ERROR_SUCCESS == RegOpenKeyEx(HKEY_LOCAL_MACHINE,
+									  DIRECTGL_REG_SETTINGS_KEY,
+									  0,
+									  KEY_READ,
+									  &hReg));
+    if (bSuccess) {
+	    // Read the installation path for GLDirect
+	    REG_READ_STRING("InstallLocation",szLogPath);
+	    RegCloseKey(hReg);
+        }
+
+	if (bRegistryError || !bSuccess)
+		return FALSE;
+	else
+		
+		return TRUE;
+
+#undef REG_READ_DWORD
+#undef REG_READ_DEVICEID
+#undef REG_READ_STRING
+	}
+}
+
+// ***********************************************************************
+
+BOOL dllWriteRegistry(
+	void )
+{
+	HKEY 	hReg;
+	DWORD 	dwCreateDisposition, cbValSize;
+	BOOL 	bRegistryError = FALSE;
+
+#define REG_WRITE_DWORD(a, b)							\
+	cbValSize = sizeof(b);								\
+	if (ERROR_SUCCESS != RegSetValueEx( hReg, (a),		\
+		0, REG_DWORD, (LPBYTE)&(b), cbValSize ))		\
+		bRegistryError = TRUE;
+
+	if (ERROR_SUCCESS == RegCreateKeyEx( DIRECTGL_REG_KEY_ROOT, DIRECTGL_REG_SETTINGS_KEY,
+										0, NULL, 0, KEY_WRITE, NULL, &hReg,
+										&dwCreateDisposition )) {
+		RegFlushKey(hReg); // Make sure keys are written to disk
+		RegCloseKey(hReg);
+		hReg = NULL;
+		}
+
+	if (bRegistryError)
+		return FALSE;
+	else
+		return TRUE;
+
+#undef REG_WRITE_DWORD
+}
+
+// ***********************************************************************
+
+void dglInitHotKeys(HINSTANCE hInstance)
+{
+	// Hot-Key support at all?
+	if (!glb.bHotKeySupport)
+		return;
+
+	// Install global keyboard interceptor
+	hKeyHook = SetWindowsHookEx(WH_KEYBOARD, dglKeyProc, hInstance, 0);
+}
+
+// ***********************************************************************
+
+void dglExitHotKeys(void)
+{
+	// Hot-Key support at all?
+	if (!glb.bHotKeySupport)
+		return;
+
+	// Remove global keyboard interceptor
+	if (hKeyHook)
+		UnhookWindowsHookEx(hKeyHook);
+	hKeyHook = NULL;
+}
+
+// ***********************************************************************
+
+// Note: This app-customization step must be performed in both the main
+// OpenGL32 driver and the callback driver DLLs for multithreading option.
+void dglSetAppCustomizations(void)
+{
+	char		szModuleFileName[MAX_PATH];
+	int			iSize = MAX_PATH;
+
+	// Get the currently loaded EXE filename.
+	GetModuleFileName(NULL, &szModuleFileName[0], MAX_PATH); // NULL for current process
+	strupr(szModuleFileName);
+	iSize = strlen(szModuleFileName);
+
+	// Check for specific EXEs and adjust global settings accordingly
+
+	// NOTE: In GLD3.x "bDirectDrawPersistant" corresponds to IDirect3D8 and
+	//       "bPersistantBuffers" corresponds to IDirect3DDevice8. KeithH
+
+	// Case 1: 3DStudio must be multi-threaded
+	// Added: Discreet GMAX (3DStudio MAX 4 for gamers. KeithH)
+	if (strstr(szModuleFileName, "3DSMAX.EXE")
+		|| strstr(szModuleFileName, "3DSVIZ.EXE")
+		|| strstr(szModuleFileName, "GMAX.EXE")) {
+		glb.bMultiThreaded = TRUE;
+		glb.bDirectDrawPersistant = FALSE;
+		glb.bPersistantBuffers = FALSE;
+		return;
+	}
+
+	// Case 2: Solid Edge must use pre-allocated resources for all GLRCs
+	if (strstr(szModuleFileName, "PART.EXE")
+		|| strstr(szModuleFileName, "ASSEMBL.EXE")
+		|| strstr(szModuleFileName, "DRAFT.EXE")
+		|| strstr(szModuleFileName, "SMARTVW.EXE")
+		|| strstr(szModuleFileName, "SMETAL.EXE")) {
+		glb.bMultiThreaded = FALSE;
+		glb.bDirectDrawPersistant = TRUE;
+		glb.bPersistantBuffers = FALSE;
+		return;
+	}
+
+	// Case 3: Sudden Depth creates and destroys GLRCs on paint commands
+	if (strstr(szModuleFileName, "SUDDEPTH.EXE")
+		|| strstr(szModuleFileName, "SUDDEMO.EXE")) {
+		glb.bMultiThreaded = FALSE;
+		glb.bDirectDrawPersistant = TRUE;
+		glb.bPersistantBuffers = TRUE;
+		glb.bFullscreenBlit = TRUE;
+		return;
+	}
+
+	// Case 4: StereoGraphics test apps create and destroy GLRCs on paint commands
+	if (strstr(szModuleFileName, "REDBLUE.EXE")
+		|| strstr(szModuleFileName, "DIAGNOSE.EXE")) {
+		glb.bMultiThreaded = FALSE;
+		glb.bDirectDrawPersistant = TRUE;
+		glb.bPersistantBuffers = TRUE;
+		return;
+	}
+
+	// Case 5: Pipes screen savers share multiple GLRCs for same window
+	if (strstr(szModuleFileName, "PIPES.SCR")
+		|| (strstr(szModuleFileName, "PIPES") && strstr(szModuleFileName, ".SCR"))) {
+		glb.bMultiThreaded = FALSE;
+		glb.bDirectDrawPersistant = TRUE;
+		glb.bPersistantBuffers = TRUE;
+		return;
+	}
+
+	// Case 6: AutoVue uses sub-viewport ops which are temporarily broken in stereo window
+	if (strstr(szModuleFileName, "AVWIN.EXE")) {
+		glb.bMultiThreaded = FALSE;
+		glb.bDirectDrawPersistant = TRUE;
+		glb.bPersistantBuffers = TRUE;
+		return;
+	}
+	// Case 7: Quake3 is waiting for DDraw objects to be released at exit
+	if (strstr(szModuleFileName, "QUAKE")) {
+		glb.bMultiThreaded = FALSE;
+		glb.bDirectDrawPersistant = FALSE;
+		glb.bPersistantBuffers = FALSE;
+        glb.bFullscreenBlit = FALSE;
+		return;
+	}
+	// Case 8: Reflection GLX server is unable to switch contexts at run-time
+	if (strstr(szModuleFileName, "RX.EXE")) {
+		glb.bMultiThreaded = FALSE;
+        glb.bMessageBoxWarnings = FALSE;
+		return;
+	}
+	// Case 9: Original AutoCAD 2000 must share DDraw objects across GLRCs
+	if (strstr(szModuleFileName, "ACAD.EXE")) {
+		glb.bFastFPU = FALSE;
+        if (GetModuleHandle("wopengl6.hdi") != NULL) {
+		glb.bMultiThreaded = FALSE;
+		glb.bDirectDrawPersistant = TRUE;
+		glb.bPersistantBuffers = FALSE;
+		}
+		return;
+	}
+}
+
+// ***********************************************************************
+
+BOOL dglInitDriver(void)
+{
+	UCHAR szExeName[MAX_PATH];
+	const char *szRendering[] = {
+		"Mesa Software",
+		"Direct3D RGB SW",
+		"Direct3D HW",
+	};
+    static BOOL bWarnOnce = FALSE;
+
+    // Already initialized?
+    if (bInitialized)
+        return TRUE;
+
+    // Moved from DllMain DLL_PROCESS_ATTACH:
+
+		// (Re-)Init defaults
+		dglInitGlobals();
+
+		// Read registry or INI file settings
+		if (!dllReadRegistry(hInstanceDll)) {
+            if (!bWarnOnce)
+			    MessageBox( NULL, "GLDirect has not been configured.\n\n"
+							  "Please run the configuration program\n"
+                              "before using GLDirect with applications.\n",
+							  "GLDirect", MB_OK | MB_ICONWARNING);
+            bWarnOnce = TRUE;
+            return FALSE;
+		}
+
+#ifdef _USE_GLD3_WGL
+		// Must do this as early as possible.
+		// Need to read regkeys/ini-file first though.
+		gldInitDriverPointers(glb.dwDriver);
+
+		// Create private driver globals
+		_gldDriver.CreatePrivateGlobals();
+#endif
+		// Overide settings with application customizations
+		if (glb.bAppCustomizations)
+			dglSetAppCustomizations();
+
+//#ifndef _USE_GLD3_WGL
+		// Set the global memory type to either sysmem or vidmem
+		glb.dwMemoryType = glb.bHardware ? DDSCAPS_VIDEOMEMORY : DDSCAPS_SYSTEMMEMORY;
+//#endif
+
+		// Multi-threaded support overides persistant display support
+		if (glb.bMultiThreaded)
+			glb.bDirectDrawPersistant = glb.bPersistantBuffers = FALSE;
+
+        // Multi-threaded support needs to be reflected in Mesa code. (DaveM)
+        _gld_bMultiThreaded = glb.bMultiThreaded;
+
+		// Start logging
+        ddlogPathOption(szLogPath);
+		ddlogWarnOption(glb.bMessageBoxWarnings);
+		ddlogOpen((DDLOG_loggingMethodType)dwLogging,
+				  (DDLOG_severityType)dwDebugLevel);
+
+		// Obtain the name of the calling app
+		ddlogMessage(DDLOG_SYSTEM, "Driver           : SciTech GLDirect 4.0\n");
+		GetModuleFileName(NULL, szExeName, sizeof(szExeName));
+		ddlogPrintf(DDLOG_SYSTEM, "Executable       : %s", szExeName);
+
+		ddlogPrintf(DDLOG_SYSTEM, "DirectDraw device: %s", glb.szDDName);
+		ddlogPrintf(DDLOG_SYSTEM, "Direct3D driver  : %s", glb.szD3DName);
+
+		ddlogPrintf(DDLOG_SYSTEM, "Rendering type   : %s", szRendering[glb.dwRendering]);
+
+		ddlogPrintf(DDLOG_SYSTEM, "Multithreaded    : %s", glb.bMultiThreaded ? "Enabled" : "Disabled");
+		ddlogPrintf(DDLOG_SYSTEM, "Display resources: %s", glb.bDirectDrawPersistant ? "Persistant" : "Instanced");
+		ddlogPrintf(DDLOG_SYSTEM, "Buffer resources : %s", glb.bPersistantBuffers ? "Persistant" : "Instanced");
+
+		dglInitContextState();
+		dglBuildPixelFormatList();
+		//dglBuildTextureFormatList();
+
+    // D3D callback driver is now successfully initialized
+    bInitialized = TRUE;
+    // D3D callback driver is now ready to be exited
+    bExited = FALSE;
+
+    return TRUE;
+}
+
+// ***********************************************************************
+
+void dglExitDriver(void)
+{
+
+	// Only need to clean up once per instance:
+	// May be called implicitly from DLL_PROCESS_DETACH,
+	// or explicitly from DGL_exitDriver().
+	if (bExited)
+		return;
+	bExited = TRUE;
+
+    // DDraw objects may be invalid when DLL unloads.
+__try {
+
+	// Clean-up sequence (moved from DLL_PROCESS_DETACH)
+#ifndef _USE_GLD3_WGL
+	dglReleaseTextureFormatList();
+#endif
+	dglReleasePixelFormatList();
+	dglDeleteContextState();
+
+#ifdef _USE_GLD3_WGL
+	_gldDriver.DestroyPrivateGlobals();
+#endif
+
+}
+__except(EXCEPTION_EXECUTE_HANDLER) {
+	    ddlogPrintf(DDLOG_WARN, "Exception raised in dglExitDriver.");
+}
+
+	// Close the log file
+	ddlogClose();
+}
+
+// ***********************************************************************
+
+int WINAPI DllMain(
+	HINSTANCE hInstance,
+	DWORD fdwReason,
+	PVOID pvReserved)
+{
+	switch (fdwReason) {
+	case DLL_PROCESS_ATTACH:
+        // Cache DLL instance handle
+        hInstanceDll = hInstance;
+
+        // Flag that callback driver has yet to be initialized
+        bInitialized = bExited = FALSE;
+
+#ifndef _USE_GLD3_WGL
+        // Init internal Mesa function pointers
+		memset(&mesaFuncs, 0, sizeof(DGL_mesaFuncs));
+#endif // _USE_GLD3_WGL
+
+		// Init defaults
+		dglInitGlobals();
+
+        // Defer rest of DLL initialization to 1st WGL function call
+		break;
+
+	case DLL_PROCESS_DETACH:
+		// Call exit clean-up sequence
+		dglExitDriver();
+		break;
+	}
+
+	return TRUE;
+}
+
+// ***********************************************************************
+
+void APIENTRY DGL_exitDriver(void)
+{
+	// Call exit clean-up sequence
+	dglExitDriver();
+}
+
+// ***********************************************************************
+
+void APIENTRY DGL_reinitDriver(void)
+{
+	// Force init sequence again
+    bInitialized = bExited = FALSE;
+	dglInitDriver();
+}
+
+// ***********************************************************************
+
+int WINAPI DllInitialize(
+	HINSTANCE hInstance,
+	DWORD fdwReason,
+	PVOID pvReserved)
+{
+	// Some Watcom compiled executables require this.
+	return DllMain(hInstance, fdwReason, pvReserved);
+}
+
+// ***********************************************************************
+
+void DGL_LoadSplashScreen(int piReg, char* pszUser)
+{
+	HINSTANCE			hSplashDll = NULL;
+	LPDGLSPLASHSCREEN 	dglSplashScreen = NULL;
+	static BOOL 		bOnce = FALSE;
+    static int          iReg = 0;
+    static char         szUser[255] = {"\0"};
+
+    // Display splash screen at all?
+    if (!bSplashScreen)
+        return;
+
+	// Only display splash screen once
+	if (bOnce)
+		return;
+	bOnce = TRUE;
+
+    // Make local copy of string for passing to DLL
+    if (pszUser)
+        strcpy(szUser, pszUser);
+    iReg = piReg;
+
+	// Load Splash Screen DLL
+	// (If it fails to load for any reason, we don't care...)
+	hSplashDll = LoadLibrary("gldsplash.dll");
+	if (hSplashDll) {
+		// Execute the Splash Screen function
+		dglSplashScreen = (LPDGLSPLASHSCREEN)GetProcAddress(hSplashDll, "GLDSplashScreen");
+		if (dglSplashScreen)
+			(*dglSplashScreen)(1, iReg, szUser);
+		// Don't unload the DLL since splash screen dialog is modeless now
+		}
+}
+
+// ***********************************************************************
+
+BOOL dglValidate()
+{
+	char *szCaption = "SciTech GLDirect Driver";
+	UINT uType = MB_OK | MB_ICONEXCLAMATION;
+
+#ifdef _USE_GLD3_WGL
+	// (Re)build pixelformat list
+	if (glb.bPixelformatsDirty)
+		_gldDriver.BuildPixelformatList();
+#endif
+
+	// Check to see if we have already validated
+	if (bDriverValidated && bInitialized)
+		return TRUE;
+
+    // Since all (most) the WGL functions must be validated at this point,
+    // this also insure that the callback driver is completely initialized.
+    if (!bInitialized)
+        if (!dglInitDriver()) {
+			MessageBox(NULL,
+				"The GLDirect driver could not initialize.\n\n"
+				"Please run the configuration program to\n"
+				"properly configure the driver, or else\n"
+                "re-run the installation program.", szCaption, uType);
+			_exit(1); // Bail
+        }
+
+    return TRUE;
+}
+
+// ***********************************************************************
+
diff --git a/src/mesa/drivers/windows/gldirect/dllmain.h b/src/mesa/drivers/windows/gldirect/dllmain.h
new file mode 100644
index 0000000000..03343ef7ad
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dllmain.h
@@ -0,0 +1,64 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Win32 DllMain functions.
+*
+****************************************************************************/
+
+#ifndef __DLLMAIN_H
+#define __DLLMAIN_H
+
+// Macros to control compilation
+#define STRICT
+#define WIN32_LEAN_AND_MEAN
+
+#include <windows.h>
+
+#ifndef _USE_GLD3_WGL
+#include "DirectGL.h"
+#endif // _USE_GLD3_WGL
+
+//#include "gldirect/regkeys.h"
+#include "dglglobals.h"
+#include "ddlog.h"
+#ifndef _USE_GLD3_WGL
+#include "d3dtexture.h"
+#endif // _USE_GLD3_WGL
+
+#include "dglwgl.h"
+
+extern BOOL bInitialized;
+
+BOOL dglInitDriver(void);
+void dglExitDriver(void);
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c
new file mode 100644
index 0000000000..7b202dfda7
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c
@@ -0,0 +1,1196 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Driver interface code to Mesa
+*
+****************************************************************************/
+
+//#include <windows.h>
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx7.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+extern BOOL dglSwapBuffers(HDC hDC);
+
+// HACK: Hack the _33 member of the OpenGL perspective projection matrix
+const float _fPersp_33 = 1.6f;
+
+//---------------------------------------------------------------------------
+// Internal functions
+//---------------------------------------------------------------------------
+
+void _gld_mesa_warning(
+	__GLcontext *gc,
+	char *str)
+{
+	// Intercept Mesa's internal warning mechanism
+	gldLogPrintf(GLDLOG_WARN, "Mesa warning: %s", str);
+}
+
+//---------------------------------------------------------------------------
+
+void _gld_mesa_fatal(
+	__GLcontext *gc,
+	char *str)
+{
+	// Intercept Mesa's internal fatal-message mechanism
+	gldLogPrintf(GLDLOG_CRITICAL, "Mesa FATAL: %s", str);
+
+	// Mesa calls abort(0) here.
+	ddlogClose();
+	exit(0);
+}
+
+//---------------------------------------------------------------------------
+
+D3DSTENCILOP _gldConvertStencilOp(
+	GLenum StencilOp)
+{
+	// Used by Stencil: pass, fail and zfail
+
+	switch (StencilOp) {
+	case GL_KEEP:
+		return D3DSTENCILOP_KEEP;
+	case GL_ZERO:
+		return D3DSTENCILOP_ZERO;
+	case GL_REPLACE:
+	    return D3DSTENCILOP_REPLACE;
+	case GL_INCR:
+		return D3DSTENCILOP_INCRSAT;
+	case GL_DECR:
+	    return D3DSTENCILOP_DECRSAT;
+	case GL_INVERT:
+		return D3DSTENCILOP_INVERT;
+	case GL_INCR_WRAP_EXT:	// GL_EXT_stencil_wrap
+		return D3DSTENCILOP_INCR;
+	case GL_DECR_WRAP_EXT:	// GL_EXT_stencil_wrap
+	    return D3DSTENCILOP_DECR;
+	}
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertStencilOp: Unknown StencilOp\n");
+#endif
+
+	return D3DSTENCILOP_KEEP;
+}
+
+//---------------------------------------------------------------------------
+
+D3DCMPFUNC _gldConvertCompareFunc(
+	GLenum CmpFunc)
+{
+	// Used for Alpha func, depth func and stencil func.
+
+	switch (CmpFunc) {
+	case GL_NEVER:
+		return D3DCMP_NEVER;
+	case GL_LESS:
+		return D3DCMP_LESS;
+	case GL_EQUAL:
+		return D3DCMP_EQUAL;
+	case GL_LEQUAL:
+		return D3DCMP_LESSEQUAL;
+	case GL_GREATER:
+		return D3DCMP_GREATER;
+	case GL_NOTEQUAL:
+		return D3DCMP_NOTEQUAL;
+	case GL_GEQUAL:
+		return D3DCMP_GREATEREQUAL;
+	case GL_ALWAYS:
+		return D3DCMP_ALWAYS;
+	};
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertCompareFunc: Unknown CompareFunc\n");
+#endif
+
+	return D3DCMP_ALWAYS;
+}
+
+//---------------------------------------------------------------------------
+
+D3DBLEND _gldConvertBlendFunc(
+	GLenum blend,
+	GLenum DefaultBlend)
+{
+	switch (blend) {
+	case GL_ZERO:
+		return D3DBLEND_ZERO;
+	case GL_ONE:
+		return D3DBLEND_ONE;
+	case GL_DST_COLOR:
+		return D3DBLEND_DESTCOLOR;
+	case GL_SRC_COLOR:
+		return D3DBLEND_SRCCOLOR;
+	case GL_ONE_MINUS_DST_COLOR:
+		return D3DBLEND_INVDESTCOLOR;
+	case GL_ONE_MINUS_SRC_COLOR:
+		return D3DBLEND_INVSRCCOLOR;
+	case GL_SRC_ALPHA:
+		return D3DBLEND_SRCALPHA;
+	case GL_ONE_MINUS_SRC_ALPHA:
+		return D3DBLEND_INVSRCALPHA;
+	case GL_DST_ALPHA:
+		return D3DBLEND_DESTALPHA;
+	case GL_ONE_MINUS_DST_ALPHA:
+		return D3DBLEND_INVDESTALPHA;
+	case GL_SRC_ALPHA_SATURATE:
+		return D3DBLEND_SRCALPHASAT;
+	}
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertBlendFunc: Unknown BlendFunc\n");
+#endif
+
+	return DefaultBlend;
+}
+
+//---------------------------------------------------------------------------
+// Misc. functions
+//---------------------------------------------------------------------------
+
+void gld_Noop_DX7(
+	GLcontext *ctx)
+{
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "gld_Noop called!\n");
+#endif
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Error_DX7(
+	GLcontext *ctx)
+{
+#ifdef _DEBUG
+	// Quite useless.
+//	gldLogMessage(GLDLOG_ERROR, "ctx->Driver.Error called!\n");
+#endif
+}
+
+//---------------------------------------------------------------------------
+// Required Mesa functions
+//---------------------------------------------------------------------------
+
+static GLboolean gld_set_draw_buffer_DX7(
+	GLcontext *ctx,
+	GLenum mode)
+{
+   (void) ctx;
+   if ((mode==GL_FRONT_LEFT) || (mode == GL_BACK_LEFT)) {
+      return GL_TRUE;
+   }
+   else {
+      return GL_FALSE;
+   }
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_set_read_buffer_DX7(
+	GLcontext *ctx,
+	GLframebuffer *buffer,
+	GLenum mode)
+{
+   /* separate read buffer not supported */
+/*
+   ASSERT(buffer == ctx->DrawBuffer);
+   ASSERT(mode == GL_FRONT_LEFT);
+*/
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Clear_DX7(
+	GLcontext *ctx,
+	GLbitfield mask,
+	GLboolean all,
+	GLint x,
+	GLint y,
+	GLint width,
+	GLint height)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	DWORD		dwFlags = 0;
+	D3DCOLOR	Color = 0;
+	float		Z = 0.0f;
+	DWORD		Stencil = 0;
+	D3DRECT		d3dClearRect;
+
+	// TODO: Colourmask
+	const GLuint *colorMask = (GLuint *) &ctx->Color.ColorMask[0];
+
+	if (!gld->pDev)
+		return;
+
+	if (mask & (DD_FRONT_LEFT_BIT | DD_BACK_LEFT_BIT)) {
+		GLubyte col[4];
+		CLAMPED_FLOAT_TO_UBYTE(col[0], ctx->Color.ClearColor[0]);
+		CLAMPED_FLOAT_TO_UBYTE(col[1], ctx->Color.ClearColor[1]);
+		CLAMPED_FLOAT_TO_UBYTE(col[2], ctx->Color.ClearColor[2]);
+		CLAMPED_FLOAT_TO_UBYTE(col[3], ctx->Color.ClearColor[3]);
+		dwFlags |= D3DCLEAR_TARGET;
+		Color = D3DCOLOR_RGBA(col[0], col[1], col[2], col[3]);
+//								ctx->Color.ClearColor[1], 
+//								ctx->Color.ClearColor[2], 
+//								ctx->Color.ClearColor[3]);
+	}
+
+	if (mask & DD_DEPTH_BIT) {
+		// D3D7 will fail the Clear call if we try and clear a
+		// depth buffer and we haven't created one.
+		// Also, some apps try and clear a depth buffer,
+		// when a depth buffer hasn't been requested by the app.
+		if (ctx->Visual.depthBits == 0) {
+			mask &= ~DD_DEPTH_BIT; // Remove depth bit from mask
+		} else {
+			dwFlags |= D3DCLEAR_ZBUFFER;
+			Z = ctx->Depth.Clear;
+		}
+	}
+
+	if (mask & DD_STENCIL_BIT) {
+		if (ctx->Visual.stencilBits == 0) {
+			// No stencil bits in depth buffer
+			mask &= ~DD_STENCIL_BIT; // Remove stencil bit from mask
+		} else {
+			dwFlags |= D3DCLEAR_STENCIL;
+			Stencil = ctx->Stencil.Clear;
+		}
+	}
+
+	// Some apps do really weird things with the rect, such as Quake3.
+	if ((x < 0) || (y < 0) || (width <= 0) || (height <= 0)) {
+		all = GL_TRUE;
+	}
+
+	if (!all) {
+		// Calculate clear subrect
+		d3dClearRect.x1	= x;
+		d3dClearRect.y1	= gldCtx->dwHeight - (y + height);
+		d3dClearRect.x2	= x + width;
+		d3dClearRect.y2	= d3dClearRect.y1 + height;
+	}
+
+	// dwFlags will be zero if there's nothing to clear
+	if (dwFlags) {
+		_GLD_DX7_DEV(Clear(
+			gld->pDev,
+			all ? 0 : 1,
+			all ? NULL : &d3dClearRect,
+			dwFlags,
+			Color, Z, Stencil));
+	}
+
+	if (mask & DD_ACCUM_BIT) {
+		// Clear accumulation buffer
+	}
+}
+
+//---------------------------------------------------------------------------
+
+// Mesa 5: Parameter change
+static void gld_buffer_size_DX7(
+//	GLcontext *ctx,
+	GLframebuffer *fb,
+	GLuint *width,
+	GLuint *height)
+{
+//	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+
+	*width = fb->Width; // gldCtx->dwWidth;
+	*height = fb->Height; // gldCtx->dwHeight;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_Finish_DX7(
+	GLcontext *ctx)
+{
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_Flush_DX7(
+	GLcontext *ctx)
+{
+	GLD_context		*gld	= GLD_GET_CONTEXT(ctx);
+
+	// TODO: Detect apps that glFlush() then SwapBuffers() ?
+
+	if (gld->EmulateSingle) {
+		// Emulating a single-buffered context.
+		// [Direct3D doesn't allow rendering to front buffer]
+		dglSwapBuffers(gld->hDC);
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_STENCIL(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	// Two-sided stencil. New for Mesa 5
+	const GLuint		uiFace	= 0UL;
+
+	struct gl_stencil_attrib *pStencil = &ctx->Stencil;
+
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILENABLE, pStencil->Enabled ? TRUE : FALSE));
+	if (pStencil->Enabled) {
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILFUNC, _gldConvertCompareFunc(pStencil->Function[uiFace])));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILREF, pStencil->Ref[uiFace]));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILMASK, pStencil->ValueMask[uiFace]));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILWRITEMASK, pStencil->WriteMask[uiFace]));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILFAIL, _gldConvertStencilOp(pStencil->FailFunc[uiFace])));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILZFAIL, _gldConvertStencilOp(pStencil->ZFailFunc[uiFace])));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_STENCILPASS, _gldConvertStencilOp(pStencil->ZPassFunc[uiFace])));
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_COLOR(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	DWORD		dwFlags = 0;
+	D3DBLEND	src;
+	D3DBLEND	dest;
+
+	// Alpha func
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ALPHAFUNC, _gldConvertCompareFunc(ctx->Color.AlphaFunc)));
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ALPHAREF, (DWORD)ctx->Color.AlphaRef));
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ALPHATESTENABLE, ctx->Color.AlphaEnabled));
+
+	// Blend func
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ALPHABLENDENABLE, ctx->Color.BlendEnabled));
+	src		= _gldConvertBlendFunc(ctx->Color.BlendSrcRGB, GL_ONE);
+	dest	= _gldConvertBlendFunc(ctx->Color.BlendDstRGB, GL_ZERO);
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_SRCBLEND, src));
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_DESTBLEND, dest));
+
+/*
+	// Color mask - unsupported by DX7
+	if (ctx->Color.ColorMask[0][0]) dwFlags |= D3DCOLORWRITEENABLE_RED;
+	if (ctx->Color.ColorMask[0][1]) dwFlags |= D3DCOLORWRITEENABLE_GREEN;
+	if (ctx->Color.ColorMask[0][2]) dwFlags |= D3DCOLORWRITEENABLE_BLUE;
+	if (ctx->Color.ColorMask[0][3]) dwFlags |= D3DCOLORWRITEENABLE_ALPHA;
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_COLORWRITEENABLE, dwFlags));
+*/
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_DEPTH(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ZENABLE, ctx->Depth.Test ? D3DZB_TRUE : D3DZB_FALSE));
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ZFUNC, _gldConvertCompareFunc(ctx->Depth.Func)));
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ZWRITEENABLE, ctx->Depth.Mask ? TRUE : FALSE));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_POLYGON(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	D3DFILLMODE	d3dFillMode = D3DFILL_SOLID;
+	D3DCULL		d3dCullMode = D3DCULL_NONE;
+	int			iOffset = 0;
+
+	// Fillmode
+	switch (ctx->Polygon.FrontMode) {
+	case GL_POINT:
+		d3dFillMode = D3DFILL_POINT;
+		break;
+	case GL_LINE:
+		d3dFillMode = D3DFILL_WIREFRAME;
+		break;
+	case GL_FILL:
+		d3dFillMode = D3DFILL_SOLID;
+		break;
+	}
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FILLMODE, d3dFillMode));
+
+	if (ctx->Polygon.CullFlag) {
+		switch (ctx->Polygon.CullFaceMode) {
+		case GL_BACK:
+			if (ctx->Polygon.FrontFace == GL_CCW)
+				d3dCullMode = D3DCULL_CW;
+			else
+				d3dCullMode = D3DCULL_CCW;
+			break;
+		case GL_FRONT:
+			if (ctx->Polygon.FrontFace == GL_CCW)
+				d3dCullMode = D3DCULL_CCW;
+			else
+				d3dCullMode = D3DCULL_CW;
+			break;
+		case GL_FRONT_AND_BACK:
+			d3dCullMode = D3DCULL_NONE;
+			break;
+		default:
+			break;
+		}
+	} else {
+		d3dCullMode = D3DCULL_NONE;
+	}
+//	d3dCullMode = D3DCULL_NONE; // TODO: DEBUGGING
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_CULLMODE, d3dCullMode));
+
+	// Polygon offset
+	// ZBIAS ranges from 0 to 16 and can only move towards the viewer
+	// Mesa5: ctx->Polygon._OffsetAny removed
+	if (ctx->Polygon.OffsetFill) {
+		iOffset = (int)ctx->Polygon.OffsetUnits;
+		if (iOffset < 0)
+			iOffset = -iOffset;
+		else
+			iOffset = 0; // D3D can't push away
+	}
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_ZBIAS, iOffset));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_FOG(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	D3DCOLOR	d3dFogColour;
+	D3DFOGMODE	d3dFogMode = D3DFOG_LINEAR;
+
+	// TODO: Fog is calculated seperately in the Mesa pipeline
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGENABLE, FALSE));
+	return;
+
+	// Fog enable
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGENABLE, ctx->Fog.Enabled));
+	if (!ctx->Fog.Enabled) {
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGTABLEMODE, D3DFOG_NONE));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGVERTEXMODE, D3DFOG_NONE));
+		return; // If disabled, don't bother setting any fog state
+	}
+
+	// Fog colour
+	d3dFogColour = D3DCOLOR_COLORVALUE(	ctx->Fog.Color[0],
+								ctx->Fog.Color[1],
+								ctx->Fog.Color[2],
+								ctx->Fog.Color[3]);
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGCOLOR, d3dFogColour));
+
+	// Fog density
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGDENSITY, *((DWORD*) (&ctx->Fog.Density))));
+
+	// Fog start
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGSTART, *((DWORD*) (&ctx->Fog.Start))));
+
+	// Fog end
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGEND, *((DWORD*) (&ctx->Fog.End))));
+
+	// Fog mode
+	switch (ctx->Fog.Mode) {
+	case GL_LINEAR:
+		d3dFogMode = D3DFOG_LINEAR;
+		break;
+	case GL_EXP:
+		d3dFogMode = D3DFOG_EXP;
+		break;
+	case GL_EXP2:
+		d3dFogMode = D3DFOG_EXP2;
+		break;
+	}
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGTABLEMODE, d3dFogMode));
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_FOGVERTEXMODE, D3DFOG_NONE));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_LIGHT(
+	GLcontext *ctx)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+	DWORD			dwSpecularEnable;
+
+	// Shademode
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_SHADEMODE, (ctx->Light.ShadeModel == GL_SMOOTH) ? D3DSHADE_GOURAUD : D3DSHADE_FLAT));
+
+	// Separate specular colour
+	if (ctx->Light.Enabled)
+		dwSpecularEnable = (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) ? TRUE: FALSE;
+	else
+		dwSpecularEnable = FALSE;
+	_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_SPECULARENABLE, dwSpecularEnable));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_MODELVIEW(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	D3DMATRIX	m;
+	//GLfloat		*pM = ctx->ModelView.m;
+	// Mesa5: Model-view is now a stack
+	GLfloat		*pM = ctx->ModelviewMatrixStack.Top->m;
+	m._11 = pM[0];
+	m._12 = pM[1];
+	m._13 = pM[2];
+	m._14 = pM[3];
+	m._21 = pM[4];
+	m._22 = pM[5];
+	m._23 = pM[6];
+	m._24 = pM[7];
+	m._31 = pM[8];
+	m._32 = pM[9];
+	m._33 = pM[10];
+	m._34 = pM[11];
+	m._41 = pM[12];
+	m._42 = pM[13];
+	m._43 = pM[14];
+	m._44 = pM[15];
+/*	m[0][0] = pM[0];
+	m[0][1] = pM[1];
+	m[0][2] = pM[2];
+	m[0][3] = pM[3];
+	m[1][0] = pM[4];
+	m[1][1] = pM[5];
+	m[1][2] = pM[6];
+	m[1][3] = pM[7];
+	m[2][0] = pM[8];
+	m[2][1] = pM[9];
+	m[2][2] = pM[10];
+	m[2][3] = pM[11];
+	m[3][0] = pM[12];
+	m[3][1] = pM[13];
+	m[3][2] = pM[14];
+	m[3][3] = pM[15];*/
+
+	gld->matModelView = m;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_PROJECTION(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	D3DMATRIX	m;
+	//GLfloat		*pM = ctx->ProjectionMatrix.m;
+	// Mesa 5: Now a stack
+	GLfloat		*pM = ctx->ProjectionMatrixStack.Top->m;
+	m._11 = pM[0];
+	m._12 = pM[1];
+	m._13 = pM[2];
+	m._14 = pM[3];
+
+	m._21 = pM[4];
+	m._22 = pM[5];
+	m._23 = pM[6];
+	m._24 = pM[7];
+
+	m._31 = pM[8];
+	m._32 = pM[9];
+	m._33 = pM[10] / _fPersp_33; // / 1.6f;
+	m._34 = pM[11];
+
+	m._41 = pM[12];
+	m._42 = pM[13];
+	m._43 = pM[14] / 2.0f;
+	m._44 = pM[15];
+
+	gld->matProjection = m;
+}
+
+//---------------------------------------------------------------------------
+/*
+void gldFrustumHook_DX7(
+	GLdouble left,
+	GLdouble right,
+	GLdouble bottom,
+	GLdouble top,
+	GLdouble nearval,
+	GLdouble farval)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	// Pass values on to Mesa first (in case we mess with them)
+	_mesa_Frustum(left, right, bottom, top, nearval, farval);
+
+	_fPersp_33 = farval / (nearval - farval);
+
+//	ddlogPrintf(GLDLOG_SYSTEM, "Frustum: %f", farval/nearval);
+}
+
+//---------------------------------------------------------------------------
+
+void gldOrthoHook_DX7(
+	GLdouble left,
+	GLdouble right,
+	GLdouble bottom,
+	GLdouble top,
+	GLdouble nearval,
+	GLdouble farval)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	// Pass values on to Mesa first (in case we mess with them)
+	_mesa_Ortho(left, right, bottom, top, nearval, farval);
+
+	_fPersp_33 = 1.6f;
+
+//	ddlogPrintf(GLDLOG_SYSTEM, "Ortho: %f", farval/nearval);
+}
+*/
+//---------------------------------------------------------------------------
+
+void gld_NEW_VIEWPORT(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	D3DVIEWPORT7	d3dvp;
+//	GLint			x, y;
+//	GLsizei			w, h;
+
+	// Set depth range
+	_GLD_DX7_DEV(GetViewport(gld->pDev, &d3dvp));
+	// D3D can't do Quake1/Quake2 z-trick
+	if (ctx->Viewport.Near <= ctx->Viewport.Far) {
+		d3dvp.dvMinZ		= ctx->Viewport.Near;
+		d3dvp.dvMaxZ		= ctx->Viewport.Far;
+	} else {
+		d3dvp.dvMinZ		= ctx->Viewport.Far;
+		d3dvp.dvMaxZ		= ctx->Viewport.Near;
+	}
+/*	x = ctx->Viewport.X;
+	y = ctx->Viewport.Y;
+	w = ctx->Viewport.Width;
+	h = ctx->Viewport.Height;
+	if (x < 0) x = 0;
+	if (y < 0) y = 0;
+	if (w > gldCtx->dwWidth) 		w = gldCtx->dwWidth;
+	if (h > gldCtx->dwHeight) 		h = gldCtx->dwHeight;
+	// Ditto for D3D viewport dimensions
+	if (w+x > gldCtx->dwWidth) 		w = gldCtx->dwWidth-x;
+	if (h+y > gldCtx->dwHeight) 	h = gldCtx->dwHeight-y;
+	d3dvp.X			= x;
+	d3dvp.Y			= gldCtx->dwHeight - (y + h);
+	d3dvp.Width		= w;
+	d3dvp.Height	= h;*/
+	_GLD_DX7_DEV(SetViewport(gld->pDev, &d3dvp));
+
+//	gld->fFlipWindowY = (float)gldCtx->dwHeight;
+}
+
+//---------------------------------------------------------------------------
+
+__inline BOOL _gldAnyEvalEnabled(
+	GLcontext *ctx)
+{
+	struct gl_eval_attrib *eval = &ctx->Eval;
+
+	if ((eval->AutoNormal) ||
+		(eval->Map1Color4) ||
+		(eval->Map1Index) ||
+		(eval->Map1Normal) ||
+		(eval->Map1TextureCoord1) ||
+		(eval->Map1TextureCoord2) ||
+		(eval->Map1TextureCoord3) ||
+		(eval->Map1TextureCoord4) ||
+		(eval->Map1Vertex3) ||
+		(eval->Map1Vertex4) ||
+		(eval->Map2Color4) ||
+		(eval->Map2Index) ||
+		(eval->Map2Normal) ||
+		(eval->Map2TextureCoord1) ||
+		(eval->Map2TextureCoord2) ||
+		(eval->Map2TextureCoord3) ||
+		(eval->Map2TextureCoord4) ||
+		(eval->Map2Vertex3) ||
+		(eval->Map2Vertex4)
+		)
+	return TRUE;
+
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL _gldChooseInternalPipeline(
+	GLcontext *ctx,
+	GLD_driver_dx7 *gld)
+{
+//	return TRUE;	// DEBUGGING: ALWAYS USE MESA
+//	return FALSE;	// DEBUGGING: ALWAYS USE D3D
+
+	if ((glb.dwTnL == GLDS_TNL_MESA) || (gld->bHasHWTnL == FALSE))
+	{
+		gld->PipelineUsage.qwMesa.QuadPart++;
+		return TRUE; // Force Mesa TnL
+	}
+
+	if ((ctx->Light.Enabled) ||
+		(1) ||
+		(ctx->Texture._TexGenEnabled) ||
+		(ctx->Texture._TexMatEnabled) ||
+//		(ctx->Transform._AnyClip) ||
+		(ctx->Scissor.Enabled) ||
+		_gldAnyEvalEnabled(ctx) // Put this last so we can early-out
+		)
+	{
+		gld->PipelineUsage.qwMesa.QuadPart++;
+		return TRUE;
+	}
+
+	gld->PipelineUsage.qwD3DFVF.QuadPart++;
+	return FALSE;
+
+/*	// Force Mesa pipeline?
+	if (glb.dwTnL == GLDS_TNL_MESA) {
+		gld->PipelineUsage.dwMesa.QuadPart++;
+		return GLD_PIPELINE_MESA;
+	}
+
+	// Test for functionality not exposed in the D3D pathways
+	if ((ctx->Texture._GenFlags)) {
+		gld->PipelineUsage.dwMesa.QuadPart++;
+		return GLD_PIPELINE_MESA;
+	}
+
+	// Now decide if vertex shader can be used.
+	// If two sided lighting is enabled then we must either
+	// use Mesa TnL or the vertex shader
+	if (ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) {
+		if (gld->VStwosidelight.hShader && !ctx->Fog.Enabled) {
+			// Use Vertex Shader
+			gld->PipelineUsage.dwD3D2SVS.QuadPart++;
+			return GLD_PIPELINE_D3D_VS_TWOSIDE;
+		} else {
+			// Use Mesa TnL
+			gld->PipelineUsage.dwMesa.QuadPart++;
+			return GLD_PIPELINE_MESA;
+		}
+	}
+
+	// Must be D3D fixed-function pipeline
+	gld->PipelineUsage.dwD3DFVF.QuadPart++;
+	return GLD_PIPELINE_D3D_FVF;
+*/
+}
+
+//---------------------------------------------------------------------------
+
+void gld_update_state_DX7(
+	GLcontext *ctx,
+	GLuint new_state)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+	TNLcontext		*tnl = TNL_CONTEXT(ctx);
+	GLD_pb_dx7		*gldPB;
+
+	if (!gld || !gld->pDev)
+		return;
+
+	_swsetup_InvalidateState( ctx, new_state );
+	_vbo_InvalidateState( ctx, new_state );
+	_tnl_InvalidateState( ctx, new_state );
+
+	// SetupIndex will be used in the pipelines for choosing setup function
+	if ((ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE | DD_SEPARATE_SPECULAR)) ||
+		(ctx->Fog.Enabled))
+	{
+		if (ctx->_TriangleCaps & DD_FLATSHADE)
+			gld->iSetupFunc = GLD_SI_FLAT_EXTRAS;
+		else
+			gld->iSetupFunc = GLD_SI_SMOOTH_EXTRAS;
+	} else {
+		if (ctx->_TriangleCaps & DD_FLATSHADE)
+			gld->iSetupFunc = GLD_SI_FLAT;	// Setup flat shade + texture
+		else
+			gld->iSetupFunc = GLD_SI_SMOOTH; // Setup smooth shade + texture
+	}
+
+	gld->bUseMesaTnL = _gldChooseInternalPipeline(ctx, gld);
+	if (gld->bUseMesaTnL) {
+		gldPB = &gld->PB2d;
+		// DX7 Does not implement D3DRS_SOFTWAREVERTEXPROCESSING
+//		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_SOFTWAREVERTEXPROCESSING, TRUE));
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_CLIPPING, FALSE));
+//		_GLD_DX7_DEV(SetVertexShader(gld->pDev, gldPB->dwFVF));
+	} else {
+		gldPB = &gld->PB3d;
+		_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_CLIPPING, TRUE));
+//		if (gld->TnLPipeline == GLD_PIPELINE_D3D_VS_TWOSIDE) {
+//			_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_SOFTWAREVERTEXPROCESSING, !gld->VStwosidelight.bHardware));
+//			_GLD_DX7_DEV(SetVertexShader(gld->pDev, gld->VStwosidelight.hShader));
+//		} else {
+			// DX7 Does not implement D3DRS_SOFTWAREVERTEXPROCESSING
+//			_GLD_DX7_DEV(SetRenderState(gld->pDev, D3DRENDERSTATE_SOFTWAREVERTEXPROCESSING, !gld->bHasHWTnL));
+//			_GLD_DX7_DEV(SetVertexShader(gld->pDev, gldPB->dwFVF));
+//		}
+	}
+
+#define _GLD_TEST_STATE(a)		\
+	if (new_state & (a)) {		\
+		gld##a(ctx);			\
+		new_state &= ~(a);		\
+	}
+
+#define _GLD_TEST_STATE_DX7(a)	\
+	if (new_state & (a)) {		\
+		gld##a##_DX7(ctx);		\
+		new_state &= ~(a);		\
+	}
+
+#define _GLD_IGNORE_STATE(a) new_state &= ~(a);
+
+//	if (!gld->bUseMesaTnL) {
+		// Not required if Mesa is doing the TnL.
+	// Problem: If gld->bUseMesaTnL is TRUE when these are signaled,
+	// then we'll miss updating the D3D TnL pipeline.
+	// Therefore, don't test for gld->bUseMesaTnL
+	_GLD_TEST_STATE(_NEW_MODELVIEW);
+	_GLD_TEST_STATE(_NEW_PROJECTION);
+//	}
+
+	_GLD_TEST_STATE_DX7(_NEW_TEXTURE); // extern, so guard with _DX7
+	_GLD_TEST_STATE(_NEW_COLOR);
+	_GLD_TEST_STATE(_NEW_DEPTH);
+	_GLD_TEST_STATE(_NEW_POLYGON);
+	_GLD_TEST_STATE(_NEW_STENCIL);
+	_GLD_TEST_STATE(_NEW_FOG);
+	_GLD_TEST_STATE(_NEW_LIGHT);
+	_GLD_TEST_STATE(_NEW_VIEWPORT);
+
+	_GLD_IGNORE_STATE(_NEW_TRANSFORM);
+
+
+// Stubs for future use.
+/*	_GLD_TEST_STATE(_NEW_TEXTURE_MATRIX);
+	_GLD_TEST_STATE(_NEW_COLOR_MATRIX);
+	_GLD_TEST_STATE(_NEW_ACCUM);
+	_GLD_TEST_STATE(_NEW_EVAL);
+	_GLD_TEST_STATE(_NEW_HINT);
+	_GLD_TEST_STATE(_NEW_LINE);
+	_GLD_TEST_STATE(_NEW_PIXEL);
+	_GLD_TEST_STATE(_NEW_POINT);
+	_GLD_TEST_STATE(_NEW_POLYGONSTIPPLE);
+	_GLD_TEST_STATE(_NEW_SCISSOR);
+	_GLD_TEST_STATE(_NEW_PACKUNPACK);
+	_GLD_TEST_STATE(_NEW_ARRAY);
+	_GLD_TEST_STATE(_NEW_RENDERMODE);
+	_GLD_TEST_STATE(_NEW_BUFFERS);
+	_GLD_TEST_STATE(_NEW_MULTISAMPLE);
+*/
+
+// For debugging.
+#if 0
+#define _GLD_TEST_UNHANDLED_STATE(a)									\
+	if (new_state & (a)) {									\
+		gldLogMessage(GLDLOG_ERROR, "Unhandled " #a "\n");	\
+	}
+	_GLD_TEST_UNHANDLED_STATE(_NEW_TEXTURE_MATRIX);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_COLOR_MATRIX);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_ACCUM);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_EVAL);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_HINT);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_LINE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_PIXEL);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_POINT);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_POLYGONSTIPPLE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_SCISSOR);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_PACKUNPACK);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_ARRAY);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_RENDERMODE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_BUFFERS);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_MULTISAMPLE);
+#undef _GLD_UNHANDLED_STATE
+#endif
+
+#undef _GLD_TEST_STATE
+}
+
+//---------------------------------------------------------------------------
+// Viewport
+//---------------------------------------------------------------------------
+
+void gld_Viewport_DX7(
+	GLcontext *ctx,
+	GLint x,
+	GLint y,
+	GLsizei w,
+	GLsizei h)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	D3DVIEWPORT7	d3dvp;
+
+	if (!gld || !gld->pDev)
+		return;
+
+	// This is a hack. When the app is minimized, Mesa passes
+	// w=1 and h=1 for viewport dimensions. Without this test
+	// we get a GPF in gld_wgl_resize_buffers().
+	if ((w==1) && (h==1))
+		return;
+
+	// Call ResizeBuffersMESA. This function will early-out
+	// if no resize is needed.
+	//ctx->Driver.ResizeBuffersMESA(ctx);
+	// Mesa 5: Changed parameters
+	ctx->Driver.ResizeBuffers(gldCtx->glBuffer);
+
+#if 0
+	ddlogPrintf(GLDLOG_SYSTEM, ">> Viewport x=%d y=%d w=%d h=%d", x,y,w,h);
+#endif
+
+	// ** D3D viewport must not be outside the render target surface **
+	// Sanity check the GL viewport dimensions
+	if (x < 0) x = 0;
+	if (y < 0) y = 0;
+	if (w > gldCtx->dwWidth) 		w = gldCtx->dwWidth;
+	if (h > gldCtx->dwHeight) 		h = gldCtx->dwHeight;
+	// Ditto for D3D viewport dimensions
+	if (w+x > gldCtx->dwWidth) 		w = gldCtx->dwWidth-x;
+	if (h+y > gldCtx->dwHeight) 	h = gldCtx->dwHeight-y;
+
+	d3dvp.dwX		= x;
+	d3dvp.dwY		= gldCtx->dwHeight - (y + h);
+	d3dvp.dwWidth	= w;
+	d3dvp.dwHeight	= h;
+	if (ctx->Viewport.Near <= ctx->Viewport.Far) {
+		d3dvp.dvMinZ	= ctx->Viewport.Near;
+		d3dvp.dvMaxZ	= ctx->Viewport.Far;
+	} else {
+		d3dvp.dvMinZ	= ctx->Viewport.Far;
+		d3dvp.dvMaxZ	= ctx->Viewport.Near;
+	}
+
+	// TODO: DEBUGGING
+//	d3dvp.MinZ		= 0.0f;
+//	d3dvp.MaxZ		= 1.0f;
+
+	_GLD_DX7_DEV(SetViewport(gld->pDev, &d3dvp));
+
+}
+
+//---------------------------------------------------------------------------
+
+extern BOOL dglWglResizeBuffers(GLcontext *ctx, BOOL bDefaultDriver);
+
+// Mesa 5: Parameter change
+void gldResizeBuffers_DX7(
+//	GLcontext *ctx)
+	GLframebuffer *fb)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	dglWglResizeBuffers(ctx, TRUE);
+}
+
+//---------------------------------------------------------------------------
+#ifdef _DEBUG
+// This is only for debugging.
+// To use, plug into ctx->Driver.Enable pointer below.
+void gld_Enable(
+	GLcontext *ctx,
+	GLenum e,
+	GLboolean b)
+{
+	char buf[1024];
+	sprintf(buf, "Enable: %s (%s)\n", _mesa_lookup_enum_by_nr(e), b?"TRUE":"FALSE");
+	ddlogMessage(DDLOG_SYSTEM, buf);
+}
+#endif
+//---------------------------------------------------------------------------
+// Driver pointer setup
+//---------------------------------------------------------------------------
+
+extern const GLubyte* _gldGetStringGeneric(GLcontext*, GLenum);
+
+void gldSetupDriverPointers_DX7(
+	GLcontext *ctx)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+	// Mandatory functions
+	ctx->Driver.GetString				= _gldGetStringGeneric;
+	ctx->Driver.UpdateState				= gld_update_state_DX7;
+	ctx->Driver.Clear					= gld_Clear_DX7;
+	ctx->Driver.DrawBuffer				= gld_set_draw_buffer_DX7;
+	ctx->Driver.GetBufferSize			= gld_buffer_size_DX7;
+	ctx->Driver.Finish					= gld_Finish_DX7;
+	ctx->Driver.Flush					= gld_Flush_DX7;
+	ctx->Driver.Error					= gld_Error_DX7;
+
+	// Hardware accumulation buffer
+	ctx->Driver.Accum					= NULL; // TODO: gld_Accum;
+
+	// Bitmap functions
+	ctx->Driver.CopyPixels				= gld_CopyPixels_DX7;
+	ctx->Driver.DrawPixels				= gld_DrawPixels_DX7;
+	ctx->Driver.ReadPixels				= gld_ReadPixels_DX7;
+	ctx->Driver.Bitmap					= gld_Bitmap_DX7;
+
+	// Buffer resize
+	ctx->Driver.ResizeBuffers			= gldResizeBuffers_DX7;
+	
+	// Texture image functions
+	ctx->Driver.ChooseTextureFormat		= gld_ChooseTextureFormat_DX7;
+	ctx->Driver.TexImage1D				= gld_TexImage1D_DX7;
+	ctx->Driver.TexImage2D				= gld_TexImage2D_DX7;
+	ctx->Driver.TexImage3D				= _mesa_store_teximage3d;
+	ctx->Driver.TexSubImage1D			= gld_TexSubImage1D_DX7;
+	ctx->Driver.TexSubImage2D			= gld_TexSubImage2D_DX7;
+	ctx->Driver.TexSubImage3D			= _mesa_store_texsubimage3d;
+	
+	ctx->Driver.CopyTexImage1D			= gldCopyTexImage1D_DX7; //NULL;
+	ctx->Driver.CopyTexImage2D			= gldCopyTexImage2D_DX7; //NULL;
+	ctx->Driver.CopyTexSubImage1D		= gldCopyTexSubImage1D_DX7; //NULL;
+	ctx->Driver.CopyTexSubImage2D		= gldCopyTexSubImage2D_DX7; //NULL;
+	ctx->Driver.CopyTexSubImage3D		= gldCopyTexSubImage3D_DX7;
+	ctx->Driver.TestProxyTexImage		= _mesa_test_proxy_teximage;
+
+	// Texture object functions
+	ctx->Driver.BindTexture				= NULL;
+	ctx->Driver.NewTextureObject		= NULL; // Not yet implemented by Mesa!;
+	ctx->Driver.DeleteTexture			= gld_DeleteTexture_DX7;
+	ctx->Driver.PrioritizeTexture		= NULL;
+
+	// Imaging functionality
+	ctx->Driver.CopyColorTable			= NULL;
+	ctx->Driver.CopyColorSubTable		= NULL;
+	ctx->Driver.CopyConvolutionFilter1D = NULL;
+	ctx->Driver.CopyConvolutionFilter2D = NULL;
+
+	// State changing functions
+	ctx->Driver.AlphaFunc				= NULL; //gld_AlphaFunc;
+	ctx->Driver.BlendFuncSeparate		= NULL; //gld_BlendFunc;
+	ctx->Driver.ClearColor				= NULL; //gld_ClearColor;
+	ctx->Driver.ClearDepth				= NULL; //gld_ClearDepth;
+	ctx->Driver.ClearStencil			= NULL; //gld_ClearStencil;
+	ctx->Driver.ColorMask				= NULL; //gld_ColorMask;
+	ctx->Driver.CullFace				= NULL; //gld_CullFace;
+	ctx->Driver.ClipPlane				= NULL; //gld_ClipPlane;
+	ctx->Driver.FrontFace				= NULL; //gld_FrontFace;
+	ctx->Driver.DepthFunc				= NULL; //gld_DepthFunc;
+	ctx->Driver.DepthMask				= NULL; //gld_DepthMask;
+	ctx->Driver.DepthRange				= NULL;
+	ctx->Driver.Enable					= NULL; //gld_Enable;
+	ctx->Driver.Fogfv					= NULL; //gld_Fogfv;
+	ctx->Driver.Hint					= NULL; //gld_Hint;
+	ctx->Driver.Lightfv					= NULL; //gld_Lightfv;
+	ctx->Driver.LightModelfv			= NULL; //gld_LightModelfv;
+	ctx->Driver.LineStipple				= NULL; //gld_LineStipple;
+	ctx->Driver.LineWidth				= NULL; //gld_LineWidth;
+	ctx->Driver.LogicOpcode				= NULL; //gld_LogicOpcode;
+	ctx->Driver.PointParameterfv		= NULL; //gld_PointParameterfv;
+	ctx->Driver.PointSize				= NULL; //gld_PointSize;
+	ctx->Driver.PolygonMode				= NULL; //gld_PolygonMode;
+	ctx->Driver.PolygonOffset			= NULL; //gld_PolygonOffset;
+	ctx->Driver.PolygonStipple			= NULL; //gld_PolygonStipple;
+	ctx->Driver.RenderMode				= NULL; //gld_RenderMode;
+	ctx->Driver.Scissor					= NULL; //gld_Scissor;
+	ctx->Driver.ShadeModel				= NULL; //gld_ShadeModel;
+	ctx->Driver.StencilFunc				= NULL; //gld_StencilFunc;
+	ctx->Driver.StencilMask				= NULL; //gld_StencilMask;
+	ctx->Driver.StencilOp				= NULL; //gld_StencilOp;
+	ctx->Driver.TexGen					= NULL; //gld_TexGen;
+	ctx->Driver.TexEnv					= NULL;
+	ctx->Driver.TexParameter			= NULL;
+	ctx->Driver.TextureMatrix			= NULL; //gld_TextureMatrix;
+	ctx->Driver.Viewport				= gld_Viewport_DX7;
+
+	_swsetup_Wakeup(ctx);
+
+	tnl->Driver.RunPipeline				= _tnl_run_pipeline;
+	tnl->Driver.Render.ResetLineStipple	= gld_ResetLineStipple_DX7;
+	tnl->Driver.Render.ClippedPolygon	= _tnl_RenderClippedPolygon;
+	tnl->Driver.Render.ClippedLine		= _tnl_RenderClippedLine;
+
+	// Hook into glFrustum() and glOrtho()
+//	ctx->Exec->Frustum					= gldFrustumHook_DX7;
+//	ctx->Exec->Ortho					= gldOrthoHook_DX7;
+
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_dx7.h b/src/mesa/drivers/windows/gldirect/dx7/gld_dx7.h
new file mode 100644
index 0000000000..b5a491e41b
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_dx7.h
@@ -0,0 +1,292 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect Direct3D 7.0a header file
+*
+****************************************************************************/
+
+#ifndef _GLD_DX7_H
+#define _GLD_DX7_H
+
+//---------------------------------------------------------------------------
+// Windows includes
+//---------------------------------------------------------------------------
+
+#define DIRECTDRAW_VERSION	0x0700
+#define DIRECT3D_VERSION	0x0700
+#include <d3d.h>
+#include <d3dx.h>
+
+// Typedef for obtaining function from d3d7.dll
+//typedef IDirect3D7* (WINAPI *FNDIRECT3DCREATE7) (UINT);
+
+
+//---------------------------------------------------------------------------
+// Defines
+//---------------------------------------------------------------------------
+
+#ifdef _DEBUG
+// Debug build tests the return value of D3D calls
+#define _GLD_TEST_HRESULT(h)					\
+{												\
+	HRESULT _hr = (h);							\
+	if (FAILED(_hr)) {							\
+		gldLogError(GLDLOG_ERROR, #h, _hr);		\
+	}											\
+}
+#define _GLD_DX7(func)		_GLD_TEST_HRESULT(IDirect3D7_##func##)
+#define _GLD_DX7_DEV(func)	_GLD_TEST_HRESULT(IDirect3DDevice7_##func##)
+#define _GLD_DX7_VB(func)	_GLD_TEST_HRESULT(IDirect3DVertexBuffer7_##func##)
+#define _GLD_DX7_TEX(func)	_GLD_TEST_HRESULT(IDirectDrawSurface7_##func##)
+#else
+#define _GLD_DX7(func)		IDirect3D7_##func
+#define _GLD_DX7_DEV(func)	IDirect3DDevice7_##func
+#define _GLD_DX7_VB(func)	IDirect3DVertexBuffer7_##func
+#define _GLD_DX7_TEX(func)	IDirectDrawSurface7_##func
+#endif
+
+#define SAFE_RELEASE(p)			\
+{								\
+	if (p) {					\
+		(p)->lpVtbl->Release(p);	\
+		(p) = NULL;				\
+	}							\
+}
+
+#define SAFE_RELEASE_VB7(p)						\
+{												\
+	if (p) {									\
+		IDirect3DVertexBuffer7_Release((p));	\
+		(p) = NULL;								\
+	}											\
+}
+
+#define SAFE_RELEASE_SURFACE7(p)		\
+{										\
+	if (p) {							\
+		IDirectDrawSurface7_Release((p));	\
+		(p) = NULL;						\
+	}									\
+}
+
+// Emulate some DX8 defines
+#define D3DCOLOR_ARGB(a,r,g,b) ((D3DCOLOR)((((a)&0xff)<<24)|(((r)&0xff)<<16)|(((g)&0xff)<<8)|((b)&0xff)))
+#define D3DCOLOR_RGBA(r,g,b,a) D3DCOLOR_ARGB(a,r,g,b)
+#define D3DCOLOR_COLORVALUE(r,g,b,a) D3DCOLOR_RGBA((DWORD)((r)*255.f),(DWORD)((g)*255.f),(DWORD)((b)*255.f),(DWORD)((a)*255.f))
+
+
+// Setup index.
+enum {
+	GLD_SI_FLAT				= 0,
+	GLD_SI_SMOOTH			= 1,
+	GLD_SI_FLAT_EXTRAS		= 2,
+	GLD_SI_SMOOTH_EXTRAS	= 3,
+};
+
+//---------------------------------------------------------------------------
+// Vertex definitions for Fixed-Function pipeline
+//---------------------------------------------------------------------------
+
+//
+// NOTE: If the number of texture units is altered then most of
+//       the texture code will need to be revised.
+//
+
+#define GLD_MAX_TEXTURE_UNITS_DX7	2
+
+//
+// 2D vertex transformed by Mesa
+//
+#define GLD_FVF_2D_VERTEX (	D3DFVF_XYZRHW |		\
+							D3DFVF_DIFFUSE |	\
+							D3DFVF_SPECULAR |	\
+							D3DFVF_TEX2)
+typedef struct {
+	FLOAT	x, y;		// 2D raster coords
+	FLOAT	sz;			// Screen Z (depth)
+	FLOAT	rhw;		// Reciprocal homogenous W
+	DWORD	diffuse;	// Diffuse colour
+	DWORD	specular;	// For separate-specular support
+	FLOAT	t0_u, t0_v;	// 1st set of texture coords
+	FLOAT	t1_u, t1_v;	// 2nd set of texture coords
+} GLD_2D_VERTEX;
+
+
+//
+// 3D vertex transformed by Direct3D
+//
+#define GLD_FVF_3D_VERTEX (	D3DFVF_XYZ |				\
+							D3DFVF_DIFFUSE |			\
+							D3DFVF_TEX2)
+
+typedef struct {
+	D3DXVECTOR3		Position;		// XYZ Vector in object space
+	D3DCOLOR		Diffuse;		// Diffuse colour
+	D3DXVECTOR2		TexUnit0;		// Texture unit 0
+	D3DXVECTOR2		TexUnit1;		// Texture unit 1
+} GLD_3D_VERTEX;
+
+//---------------------------------------------------------------------------
+// Structs
+//---------------------------------------------------------------------------
+
+// This keeps a count of how many times we choose each individual internal
+// pathway. Useful for seeing if a certain pathway was ever used by an app, and
+// how much each pathway is biased.
+// Zero the members at context creation and dump stats at context deletion.
+typedef struct {
+	// Note: DWORD is probably too small
+	ULARGE_INTEGER	qwMesa;		// Mesa TnL pipeline
+	ULARGE_INTEGER	qwD3DFVF;	// Direct3D Fixed-Function pipeline
+} GLD_pipeline_usage;
+
+// GLDirect Primitive Buffer (points, lines, triangles and quads)
+typedef struct {
+	// Data for IDirect3D7::CreateVertexBuffer()
+	DWORD					dwStride;		// Stride of vertex
+	DWORD					dwCreateFlags;	// Create flags
+	DWORD					dwFVF;			// Direct3D Flexible Vertex Format
+
+	IDirect3DVertexBuffer7	*pVB;			// Holds points, lines, tris and quads.
+
+	// Point list is assumed to be at start of buffer
+	DWORD					iFirstLine;		// Index of start of line list
+	DWORD					iFirstTriangle;	// Index of start of triangle list
+
+	BYTE					*pPoints;		// Pointer to next free point
+	BYTE					*pLines;		// Pointer to next free line
+	BYTE					*pTriangles;	// Pointer to next free triangle
+
+	DWORD					nPoints;		// Number of points ready to render
+	DWORD					nLines;			// Number of lines ready to render
+	DWORD					nTriangles;		// Number of triangles ready to render
+} GLD_pb_dx7;
+
+// GLDirect DX7 driver data
+typedef struct {
+	// GLDirect vars
+	BOOL					bDoublebuffer;	// Doublebuffer (otherwise single-buffered)
+	BOOL					bDepthStencil;	// Depth buffer needed (stencil optional)
+	D3DX_SURFACEFORMAT		RenderFormat;	// Format of back/front buffer
+	D3DX_SURFACEFORMAT		DepthFormat;	// Format of depth/stencil
+
+	// Direct3D vars
+	DDCAPS					ddCaps;
+	D3DDEVICEDESC7			d3dCaps;
+	BOOL					bHasHWTnL;		// Device has Hardware Transform/Light?
+	ID3DXContext			*pD3DXContext;	// Base D3DX context
+	IDirectDraw7			*pDD;			// DirectDraw7 interface
+	IDirect3D7				*pD3D;			// Base Direct3D7 interface
+	IDirect3DDevice7		*pDev;			// Direct3D7 Device interface
+	GLD_pb_dx7				PB2d;			// Vertices transformed by Mesa
+	GLD_pb_dx7				PB3d;			// Vertices transformed by Direct3D
+	D3DPRIMITIVETYPE		d3dpt;			// Current Direct3D primitive type
+	D3DMATRIX				matProjection;	// Projection matrix for D3D TnL
+	D3DMATRIX				matModelView;	// Model/View matrix for D3D TnL
+	int						iSetupFunc;		// Which setup functions to use
+	BOOL					bUseMesaTnL;	// Whether to use Mesa or D3D for TnL
+
+	GLD_pipeline_usage		PipelineUsage;
+} GLD_driver_dx7;
+
+#define GLD_GET_DX7_DRIVER(c) (GLD_driver_dx7*)(c)->glPriv
+
+//---------------------------------------------------------------------------
+// Function prototypes
+//---------------------------------------------------------------------------
+
+PROC	gldGetProcAddress_DX7(LPCSTR a);
+void	gldEnableExtensions_DX7(GLcontext *ctx);
+void	gldInstallPipeline_DX7(GLcontext *ctx);
+void	gldSetupDriverPointers_DX7(GLcontext *ctx);
+void	gldResizeBuffers_DX7(GLframebuffer *fb);
+
+
+// Texture functions
+
+void	gldCopyTexImage1D_DX7(GLcontext *ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border);
+void	gldCopyTexImage2D_DX7(GLcontext *ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+void	gldCopyTexSubImage1D_DX7(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width );
+void	gldCopyTexSubImage2D_DX7(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height );
+void	gldCopyTexSubImage3D_DX7(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height );
+
+void	gld_NEW_TEXTURE_DX7(GLcontext *ctx);
+void	gld_DrawPixels_DX7(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels);
+void	gld_ReadPixels_DX7(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, GLvoid *dest);
+void	gld_CopyPixels_DX7(GLcontext *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type);
+void	gld_Bitmap_DX7(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap);
+const struct gl_texture_format* gld_ChooseTextureFormat_DX7(GLcontext *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType);
+void	gld_TexImage2D_DX7(GLcontext *ctx, GLenum target, GLint level, GLint internalFormat, GLint width, GLint height, GLint border, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *tObj, struct gl_texture_image *texImage);
+void	gld_TexImage1D_DX7(GLcontext *ctx, GLenum target, GLint level, GLint internalFormat, GLint width, GLint border, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage );
+void	gld_TexSubImage2D_DX7( GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage );
+void	gld_TexSubImage1D_DX7(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage);
+void	gld_DeleteTexture_DX7(GLcontext *ctx, struct gl_texture_object *tObj);
+void	gld_ResetLineStipple_DX7(GLcontext *ctx);
+
+// 2D primitive functions
+
+void	gld_Points2D_DX7(GLcontext *ctx, GLuint first, GLuint last);
+
+void	gld_Line2DFlat_DX7(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Line2DSmooth_DX7(GLcontext *ctx, GLuint v0, GLuint v1);
+
+void	gld_Triangle2DFlat_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmooth_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DFlatExtras_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmoothExtras_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+
+void	gld_Quad2DFlat_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmooth_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DFlatExtras_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmoothExtras_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+// 3D primitive functions
+
+void	gld_Points3D_DX7(GLcontext *ctx, GLuint first, GLuint last);
+void	gld_Line3DFlat_DX7(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle3DFlat_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad3DFlat_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Line3DSmooth_DX7(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle3DSmooth_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad3DSmooth_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+// Primitive functions for Two-sided-lighting Vertex Shader
+
+void	gld_Points2DTwoside_DX7(GLcontext *ctx, GLuint first, GLuint last);
+void	gld_Line2DFlatTwoside_DX7(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Line2DSmoothTwoside_DX7(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle2DFlatTwoside_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmoothTwoside_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad2DFlatTwoside_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmoothTwoside_DX7(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_dxerr7.h b/src/mesa/drivers/windows/gldirect/dx7/gld_dxerr7.h
new file mode 100644
index 0000000000..df6fceb43e
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_dxerr7.h
@@ -0,0 +1,77 @@
+/*==========================================================================;
+ *
+ *
+ *  File:   dxerr8.h
+ *  Content:    DirectX Error Library Include File
+ *
+ ****************************************************************************/
+
+#ifndef _GLD_DXERR7_H_
+#define _GLD_DXERR7_H_
+
+
+#include <d3d.h>
+
+//
+//  DXGetErrorString8
+//  
+//  Desc:  Converts an DirectX HRESULT to a string 
+//
+//  Args:  HRESULT hr   Can be any error code from
+//                      DPLAY D3D8 D3DX8 DMUSIC DSOUND
+//
+//  Return: Converted string 
+//
+const char*  __stdcall DXGetErrorString8A(HRESULT hr);
+const WCHAR* __stdcall DXGetErrorString8W(HRESULT hr);
+
+#ifdef UNICODE
+    #define DXGetErrorString8 DXGetErrorString8W
+#else
+    #define DXGetErrorString8 DXGetErrorString8A
+#endif 
+
+
+//
+//  DXTrace
+//
+//  Desc:  Outputs a formatted error message to the debug stream
+//
+//  Args:  CHAR* strFile   The current file, typically passed in using the 
+//                         __FILE__ macro.
+//         DWORD dwLine    The current line number, typically passed in using the 
+//                         __LINE__ macro.
+//         HRESULT hr      An HRESULT that will be traced to the debug stream.
+//         CHAR* strMsg    A string that will be traced to the debug stream (may be NULL)
+//         BOOL bPopMsgBox If TRUE, then a message box will popup also containing the passed info.
+//
+//  Return: The hr that was passed in.  
+//
+//HRESULT __stdcall DXTraceA( char* strFile, DWORD dwLine, HRESULT hr, char* strMsg, BOOL bPopMsgBox = FALSE );
+//HRESULT __stdcall DXTraceW( char* strFile, DWORD dwLine, HRESULT hr, WCHAR* strMsg, BOOL bPopMsgBox = FALSE );
+HRESULT __stdcall DXTraceA( char* strFile, DWORD dwLine, HRESULT hr, char* strMsg, BOOL bPopMsgBox);
+HRESULT __stdcall DXTraceW( char* strFile, DWORD dwLine, HRESULT hr, WCHAR* strMsg, BOOL bPopMsgBox);
+
+#ifdef UNICODE
+    #define DXTrace DXTraceW
+#else
+    #define DXTrace DXTraceA
+#endif 
+
+
+//
+// Helper macros
+//
+#if defined(DEBUG) | defined(_DEBUG)
+    #define DXTRACE_MSG(str)              DXTrace( __FILE__, (DWORD)__LINE__, 0, str, FALSE )
+    #define DXTRACE_ERR(str,hr)           DXTrace( __FILE__, (DWORD)__LINE__, hr, str, TRUE )
+    #define DXTRACE_ERR_NOMSGBOX(str,hr)  DXTrace( __FILE__, (DWORD)__LINE__, hr, str, FALSE )
+#else
+    #define DXTRACE_MSG(str)              (0L)
+    #define DXTRACE_ERR(str,hr)           (hr)
+    #define DXTRACE_ERR_NOMSGBOX(str,hr)  (hr)
+#endif
+
+
+#endif
+
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_ext_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_ext_dx7.c
new file mode 100644
index 0000000000..ba60980bbe
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_ext_dx7.c
@@ -0,0 +1,346 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GL extensions
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "../gld_settings.h"
+
+#include <windows.h>
+#define GL_GLEXT_PROTOTYPES
+#include <GL/gl.h>
+#include <GL/glext.h>
+
+//#include "ddlog.h"
+//#include "gld_dx8.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "dglcontext.h"
+#include "extensions.h"
+
+// For some reason this is not defined in an above header...
+extern void _mesa_enable_imaging_extensions(GLcontext *ctx);
+
+//---------------------------------------------------------------------------
+// Hack for the SGIS_multitexture extension that was removed from Mesa
+// NOTE: SGIS_multitexture enums also clash with GL_SGIX_async_pixel
+
+	// NOTE: Quake2 ran *slower* with this enabled, so I've
+	// disabled it for now.
+	// To enable, uncomment:
+	//  _mesa_add_extension(ctx, GL_TRUE, szGL_SGIS_multitexture, 0);
+
+//---------------------------------------------------------------------------
+
+enum {
+	/* Quake2 GL_SGIS_multitexture */
+	GL_SELECTED_TEXTURE_SGIS			= 0x835B,
+	GL_SELECTED_TEXTURE_COORD_SET_SGIS	= 0x835C,
+	GL_MAX_TEXTURES_SGIS				= 0x835D,
+	GL_TEXTURE0_SGIS					= 0x835E,
+	GL_TEXTURE1_SGIS					= 0x835F,
+	GL_TEXTURE2_SGIS					= 0x8360,
+	GL_TEXTURE3_SGIS					= 0x8361,
+	GL_TEXTURE_COORD_SET_SOURCE_SGIS	= 0x8363,
+};
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldSelectTextureSGIS(
+	GLenum target)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glActiveTextureARB(ARB_target);
+}
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldMTexCoord2fSGIS(
+	GLenum target,
+	GLfloat s,
+	GLfloat t)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glMultiTexCoord2fARB(ARB_target, s, t);
+}
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldMTexCoord2fvSGIS(
+	GLenum target,
+	const GLfloat *v)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glMultiTexCoord2fvARB(ARB_target, v);
+}
+
+//---------------------------------------------------------------------------
+// Extensions
+//---------------------------------------------------------------------------
+
+typedef struct {
+	PROC proc;
+	char *name;
+}  GLD_extension;
+
+GLD_extension GLD_extList[] = {
+#ifdef GL_EXT_polygon_offset
+    {	(PROC)glPolygonOffsetEXT,		"glPolygonOffsetEXT"		},
+#endif
+    {	(PROC)glBlendEquationEXT,		"glBlendEquationEXT"		},
+    {	(PROC)glBlendColorEXT,			"glBlendColorExt"			},
+    {	(PROC)glVertexPointerEXT,		"glVertexPointerEXT"		},
+    {	(PROC)glNormalPointerEXT,		"glNormalPointerEXT"		},
+    {	(PROC)glColorPointerEXT,		"glColorPointerEXT"			},
+    {	(PROC)glIndexPointerEXT,		"glIndexPointerEXT"			},
+    {	(PROC)glTexCoordPointerEXT,		"glTexCoordPointer"			},
+    {	(PROC)glEdgeFlagPointerEXT,		"glEdgeFlagPointerEXT"		},
+    {	(PROC)glGetPointervEXT,			"glGetPointervEXT"			},
+    {	(PROC)glArrayElementEXT,		"glArrayElementEXT"			},
+    {	(PROC)glDrawArraysEXT,			"glDrawArrayEXT"			},
+    {	(PROC)glAreTexturesResidentEXT,	"glAreTexturesResidentEXT"	},
+    {	(PROC)glBindTextureEXT,			"glBindTextureEXT"			},
+    {	(PROC)glDeleteTexturesEXT,		"glDeleteTexturesEXT"		},
+    {	(PROC)glGenTexturesEXT,			"glGenTexturesEXT"			},
+    {	(PROC)glIsTextureEXT,			"glIsTextureEXT"			},
+    {	(PROC)glPrioritizeTexturesEXT,	"glPrioritizeTexturesEXT"	},
+    {	(PROC)glCopyTexSubImage3DEXT,	"glCopyTexSubImage3DEXT"	},
+    {	(PROC)glTexImage3DEXT,			"glTexImage3DEXT"			},
+    {	(PROC)glTexSubImage3DEXT,		"glTexSubImage3DEXT"		},
+    {	(PROC)glPointParameterfEXT,		"glPointParameterfEXT"		},
+    {	(PROC)glPointParameterfvEXT,	"glPointParameterfvEXT"		},
+
+    {	(PROC)glLockArraysEXT,			"glLockArraysEXT"			},
+    {	(PROC)glUnlockArraysEXT,		"glUnlockArraysEXT"			},
+	{	NULL,							"\0"						}
+};
+
+GLD_extension GLD_multitexList[] = {
+/*
+    {	(PROC)glMultiTexCoord1dSGIS,		"glMTexCoord1dSGIS"			},
+    {	(PROC)glMultiTexCoord1dvSGIS,		"glMTexCoord1dvSGIS"		},
+    {	(PROC)glMultiTexCoord1fSGIS,		"glMTexCoord1fSGIS"			},
+    {	(PROC)glMultiTexCoord1fvSGIS,		"glMTexCoord1fvSGIS"		},
+    {	(PROC)glMultiTexCoord1iSGIS,		"glMTexCoord1iSGIS"			},
+    {	(PROC)glMultiTexCoord1ivSGIS,		"glMTexCoord1ivSGIS"		},
+    {	(PROC)glMultiTexCoord1sSGIS,		"glMTexCoord1sSGIS"			},
+    {	(PROC)glMultiTexCoord1svSGIS,		"glMTexCoord1svSGIS"		},
+    {	(PROC)glMultiTexCoord2dSGIS,		"glMTexCoord2dSGIS"			},
+    {	(PROC)glMultiTexCoord2dvSGIS,		"glMTexCoord2dvSGIS"		},
+    {	(PROC)glMultiTexCoord2fSGIS,		"glMTexCoord2fSGIS"			},
+    {	(PROC)glMultiTexCoord2fvSGIS,		"glMTexCoord2fvSGIS"		},
+    {	(PROC)glMultiTexCoord2iSGIS,		"glMTexCoord2iSGIS"			},
+    {	(PROC)glMultiTexCoord2ivSGIS,		"glMTexCoord2ivSGIS"		},
+    {	(PROC)glMultiTexCoord2sSGIS,		"glMTexCoord2sSGIS"			},
+    {	(PROC)glMultiTexCoord2svSGIS,		"glMTexCoord2svSGIS"		},
+    {	(PROC)glMultiTexCoord3dSGIS,		"glMTexCoord3dSGIS"			},
+    {	(PROC)glMultiTexCoord3dvSGIS,		"glMTexCoord3dvSGIS"		},
+    {	(PROC)glMultiTexCoord3fSGIS,		"glMTexCoord3fSGIS"			},
+    {	(PROC)glMultiTexCoord3fvSGIS,		"glMTexCoord3fvSGIS"		},
+    {	(PROC)glMultiTexCoord3iSGIS,		"glMTexCoord3iSGIS"			},
+    {	(PROC)glMultiTexCoord3ivSGIS,		"glMTexCoord3ivSGIS"		},
+    {	(PROC)glMultiTexCoord3sSGIS,		"glMTexCoord3sSGIS"			},
+    {	(PROC)glMultiTexCoord3svSGIS,		"glMTexCoord3svSGIS"		},
+    {	(PROC)glMultiTexCoord4dSGIS,		"glMTexCoord4dSGIS"			},
+    {	(PROC)glMultiTexCoord4dvSGIS,		"glMTexCoord4dvSGIS"		},
+    {	(PROC)glMultiTexCoord4fSGIS,		"glMTexCoord4fSGIS"			},
+    {	(PROC)glMultiTexCoord4fvSGIS,		"glMTexCoord4fvSGIS"		},
+    {	(PROC)glMultiTexCoord4iSGIS,		"glMTexCoord4iSGIS"			},
+    {	(PROC)glMultiTexCoord4ivSGIS,		"glMTexCoord4ivSGIS"		},
+    {	(PROC)glMultiTexCoord4sSGIS,		"glMTexCoord4sSGIS"			},
+    {	(PROC)glMultiTexCoord4svSGIS,		"glMTexCoord4svSGIS"		},
+    {	(PROC)glMultiTexCoordPointerSGIS,	"glMTexCoordPointerSGIS"	},
+    {	(PROC)glSelectTextureSGIS,			"glSelectTextureSGIS"			},
+    {	(PROC)glSelectTextureCoordSetSGIS,	"glSelectTextureCoordSetSGIS"	},
+*/
+    {	(PROC)glActiveTextureARB,		"glActiveTextureARB"		},
+    {	(PROC)glClientActiveTextureARB,	"glClientActiveTextureARB"	},
+    {	(PROC)glMultiTexCoord1dARB,		"glMultiTexCoord1dARB"		},
+    {	(PROC)glMultiTexCoord1dvARB,	"glMultiTexCoord1dvARB"		},
+    {	(PROC)glMultiTexCoord1fARB,		"glMultiTexCoord1fARB"		},
+    {	(PROC)glMultiTexCoord1fvARB,	"glMultiTexCoord1fvARB"		},
+    {	(PROC)glMultiTexCoord1iARB,		"glMultiTexCoord1iARB"		},
+    {	(PROC)glMultiTexCoord1ivARB,	"glMultiTexCoord1ivARB"		},
+    {	(PROC)glMultiTexCoord1sARB,		"glMultiTexCoord1sARB"		},
+    {	(PROC)glMultiTexCoord1svARB,	"glMultiTexCoord1svARB"		},
+    {	(PROC)glMultiTexCoord2dARB,		"glMultiTexCoord2dARB"		},
+    {	(PROC)glMultiTexCoord2dvARB,	"glMultiTexCoord2dvARB"		},
+    {	(PROC)glMultiTexCoord2fARB,		"glMultiTexCoord2fARB"		},
+    {	(PROC)glMultiTexCoord2fvARB,	"glMultiTexCoord2fvARB"		},
+    {	(PROC)glMultiTexCoord2iARB,		"glMultiTexCoord2iARB"		},
+    {	(PROC)glMultiTexCoord2ivARB,	"glMultiTexCoord2ivARB"		},
+    {	(PROC)glMultiTexCoord2sARB,		"glMultiTexCoord2sARB"		},
+    {	(PROC)glMultiTexCoord2svARB,	"glMultiTexCoord2svARB"		},
+    {	(PROC)glMultiTexCoord3dARB,		"glMultiTexCoord3dARB"		},
+    {	(PROC)glMultiTexCoord3dvARB,	"glMultiTexCoord3dvARB"		},
+    {	(PROC)glMultiTexCoord3fARB,		"glMultiTexCoord3fARB"		},
+    {	(PROC)glMultiTexCoord3fvARB,	"glMultiTexCoord3fvARB"		},
+    {	(PROC)glMultiTexCoord3iARB,		"glMultiTexCoord3iARB"		},
+    {	(PROC)glMultiTexCoord3ivARB,	"glMultiTexCoord3ivARB"		},
+    {	(PROC)glMultiTexCoord3sARB,		"glMultiTexCoord3sARB"		},
+    {	(PROC)glMultiTexCoord3svARB,	"glMultiTexCoord3svARB"		},
+    {	(PROC)glMultiTexCoord4dARB,		"glMultiTexCoord4dARB"		},
+    {	(PROC)glMultiTexCoord4dvARB,	"glMultiTexCoord4dvARB"		},
+    {	(PROC)glMultiTexCoord4fARB,		"glMultiTexCoord4fARB"		},
+    {	(PROC)glMultiTexCoord4fvARB,	"glMultiTexCoord4fvARB"		},
+    {	(PROC)glMultiTexCoord4iARB,		"glMultiTexCoord4iARB"		},
+    {	(PROC)glMultiTexCoord4ivARB,	"glMultiTexCoord4ivARB"		},
+    {	(PROC)glMultiTexCoord4sARB,		"glMultiTexCoord4sARB"		},
+    {	(PROC)glMultiTexCoord4svARB,	"glMultiTexCoord4svARB"		},
+
+	// Descent3 doesn't use correct string, hence this hack
+    {	(PROC)glMultiTexCoord4fARB,		"glMultiTexCoord4f"			},
+
+	// Quake2 SGIS multitexture
+    {	(PROC)gldSelectTextureSGIS,		"glSelectTextureSGIS"		},
+    {	(PROC)gldMTexCoord2fSGIS,		"glMTexCoord2fSGIS"			},
+    {	(PROC)gldMTexCoord2fvSGIS,		"glMTexCoord2fvSGIS"		},
+
+	{	NULL,							"\0"						}
+};
+
+//---------------------------------------------------------------------------
+
+PROC gldGetProcAddress_DX(
+	LPCSTR a)
+{
+	int		i;
+	PROC	proc = NULL;
+
+	for (i=0; GLD_extList[i].proc; i++) {
+		if (!strcmp(a, GLD_extList[i].name)) {
+			proc = GLD_extList[i].proc;
+			break;
+		}
+	}
+
+	if (glb.bMultitexture) {
+		for (i=0; GLD_multitexList[i].proc; i++) {
+			if (!strcmp(a, GLD_multitexList[i].name)) {
+				proc = GLD_multitexList[i].proc;
+				break;
+			}
+		}
+	}
+
+	gldLogPrintf(GLDLOG_INFO, "GetProcAddress: %s (%s)", a, proc ? "OK" : "Failed");
+
+	return proc;
+}
+
+//---------------------------------------------------------------------------
+
+void gldEnableExtensions_DX7(
+	GLcontext *ctx)
+{
+	GLuint i;
+
+	// Mesa enables some extensions by default.
+	// This table decides which ones we want to switch off again.
+
+	// NOTE: GL_EXT_compiled_vertex_array appears broken.
+
+	const char *gld_disable_extensions[] = {
+//		"GL_ARB_transpose_matrix",
+//		"GL_EXT_compiled_vertex_array",
+//		"GL_EXT_polygon_offset",
+//		"GL_EXT_rescale_normal",
+		"GL_EXT_texture3D",
+//		"GL_NV_texgen_reflection",
+		"GL_EXT_abgr",
+		"GL_EXT_bgra",
+		NULL
+	};
+
+	const char *gld_multitex_extensions[] = {
+		"GL_ARB_multitexture",		// Quake 3
+		NULL
+	};
+
+	// Quake 2 engines
+	const char *szGL_SGIS_multitexture = "GL_SGIS_multitexture";
+
+	const char *gld_enable_extensions[] = {
+		"GL_EXT_texture_env_add",	// Quake 3
+		"GL_ARB_texture_env_add",	// Quake 3
+		NULL
+	};
+	
+	for (i=0; gld_disable_extensions[i]; i++) {
+		_mesa_disable_extension(ctx, gld_disable_extensions[i]);
+	}
+	
+	for (i=0; gld_enable_extensions[i]; i++) {
+		_mesa_enable_extension(ctx, gld_enable_extensions[i]);
+	}
+
+	if (glb.bMultitexture) {	
+		for (i=0; gld_multitex_extensions[i]; i++) {
+			_mesa_enable_extension(ctx, gld_multitex_extensions[i]);
+		}
+
+		// GL_SGIS_multitexture
+		// NOTE: Quake2 ran *slower* with this enabled, so I've
+		// disabled it for now.
+		// Fair bit slower on GeForce256,
+		// Much slower on 3dfx Voodoo5 5500.
+//		_mesa_add_extension(ctx, GL_TRUE, szGL_SGIS_multitexture, 0);
+
+	}
+
+	_mesa_enable_imaging_extensions(ctx);
+	_mesa_enable_1_3_extensions(ctx);
+	_mesa_enable_1_4_extensions(ctx);
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_pipeline_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_pipeline_dx7.c
new file mode 100644
index 0000000000..9ccec69b98
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_pipeline_dx7.c
@@ -0,0 +1,77 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Mesa transformation pipeline with GLDirect fastpath
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx7.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+//---------------------------------------------------------------------------
+
+extern struct tnl_pipeline_stage _gld_d3d_render_stage;
+extern struct tnl_pipeline_stage _gld_mesa_render_stage;
+
+static const struct tnl_pipeline_stage *gld_pipeline[] = {
+	&_gld_d3d_render_stage,			// Direct3D TnL
+	&_tnl_vertex_transform_stage,
+	&_tnl_normal_transform_stage,
+	&_tnl_lighting_stage,
+	&_tnl_fog_coordinate_stage,	/* TODO: Omit fog stage. ??? */
+	&_tnl_texgen_stage,
+	&_tnl_texture_transform_stage,
+	&_tnl_point_attenuation_stage,
+	&_gld_mesa_render_stage,		// Mesa TnL, D3D rendering
+	0,
+};
+
+//---------------------------------------------------------------------------
+
+void gldInstallPipeline_DX7(
+	GLcontext *ctx)
+{
+	// Remove any existing pipeline	stages,
+	// then install GLDirect pipeline stages.
+
+	_tnl_destroy_pipeline(ctx);
+	_tnl_install_pipeline(ctx, gld_pipeline);
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c
new file mode 100644
index 0000000000..0b373814fe
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c
@@ -0,0 +1,1448 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Primitive (points/lines/tris/quads) rendering
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx7.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "swrast/s_context.h"
+#include "swrast/s_depth.h"
+#include "swrast/s_lines.h"
+#include "swrast/s_triangle.h"
+#include "swrast/s_trispan.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+// Disable compiler complaints about unreferenced local variables
+#pragma warning (disable:4101)
+
+//---------------------------------------------------------------------------
+// Helper defines for primitives
+//---------------------------------------------------------------------------
+
+//static const float ooZ		= 1.0f / 65536.0f; // One over Z
+
+#define GLD_COLOUR (D3DCOLOR_RGBA(swv->color[0], swv->color[1], swv->color[2], swv->color[3]))
+#define GLD_SPECULAR (D3DCOLOR_RGBA(swv->specular[0], swv->specular[1], swv->specular[2], swv->specular[3]))
+#define GLD_FLIP_Y(y) (gldCtx->dwHeight - (y))
+
+//---------------------------------------------------------------------------
+// 2D vertex setup
+//---------------------------------------------------------------------------
+
+#define GLD_SETUP_2D_VARS_POINTS							\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pPoints;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour
+
+#define GLD_SETUP_2D_VARS_LINES								\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pLines;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour
+
+#define GLD_SETUP_2D_VARS_TRIANGLES							\
+	BOOL			bFog = ctx->Fog.Enabled;				\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pTriangles;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour;							\
+	GLuint					facing = 0;						\
+	struct vertex_buffer	*VB;							\
+	GLchan					(*vbcolor)[4];					\
+	GLchan					(*vbspec)[4]
+
+#define GLD_SETUP_GET_SWVERT(s)					\
+	swv = &ss->verts[##s]
+
+#define GLD_SETUP_2D_VERTEX						\
+	pV->x			= swv->win[0];				\
+	pV->y			= GLD_FLIP_Y(swv->win[1]);	\
+	pV->rhw			= swv->win[3]
+
+#define GLD_SETUP_SMOOTH_COLOUR					\
+	pV->diffuse		= GLD_COLOUR
+
+#define GLD_SETUP_GET_FLAT_COLOUR				\
+	dwFlatColour	= GLD_COLOUR
+#define GLD_SETUP_GET_FLAT_FOG_COLOUR			\
+	dwFlatColour	= _gldComputeFog(ctx, swv)
+
+#define GLD_SETUP_USE_FLAT_COLOUR				\
+	pV->diffuse		= dwFlatColour
+
+#define GLD_SETUP_GET_FLAT_SPECULAR				\
+	dwSpecularColour= GLD_SPECULAR
+
+#define GLD_SETUP_USE_FLAT_SPECULAR				\
+	pV->specular	= dwSpecularColour
+
+#define GLD_SETUP_DEPTH							\
+	pV->sz			= swv->win[2] / ctx->DepthMaxF
+//	pV->z			= swv->win[2] * ooZ;
+
+#define GLD_SETUP_SPECULAR						\
+	pV->specular	= GLD_SPECULAR
+
+#define GLD_SETUP_FOG							\
+	pV->diffuse		= _gldComputeFog(ctx, swv)
+
+#define GLD_SETUP_TEX0							\
+	pV->t0_u		= swv->texcoord[0][0];		\
+	pV->t0_v		= swv->texcoord[0][1]
+
+#define GLD_SETUP_TEX1							\
+	pV->t1_u		= swv->texcoord[1][0];		\
+	pV->t1_v		= swv->texcoord[1][1]
+
+#define GLD_SETUP_LIGHTING(v)			\
+	if (facing == 1) {					\
+		pV->diffuse	= D3DCOLOR_RGBA(vbcolor[##v][0], vbcolor[##v][1], vbcolor[##v][2], vbcolor[##v][3]);	\
+		if (vbspec) {																					\
+			pV->specular = D3DCOLOR_RGBA(vbspec[##v][0], vbspec[##v][1], vbspec[##v][2], vbspec[##v][3]);	\
+		}	\
+	} else {	\
+		if (bFog)						\
+			GLD_SETUP_FOG;				\
+		else							\
+			GLD_SETUP_SMOOTH_COLOUR;	\
+		GLD_SETUP_SPECULAR;				\
+	}
+
+#define GLD_SETUP_GET_FLAT_LIGHTING(v)	\
+	if (facing == 1) {					\
+		dwFlatColour = D3DCOLOR_RGBA(vbcolor[##v][0], vbcolor[##v][1], vbcolor[##v][2], vbcolor[##v][3]);	\
+		if (vbspec) {																					\
+			dwSpecularColour = D3DCOLOR_RGBA(vbspec[##v][0], vbspec[##v][1], vbspec[##v][2], vbspec[##v][3]);	\
+		}	\
+	}
+
+#define GLD_SETUP_TWOSIDED_LIGHTING		\
+	/* Two-sided lighting */				\
+	if (ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) {	\
+		SWvertex	*verts = SWSETUP_CONTEXT(ctx)->verts;	\
+		SWvertex	*v[3];									\
+		GLfloat		ex,ey,fx,fy,cc;							\
+		/* Get vars for later */							\
+		VB		= &TNL_CONTEXT(ctx)->vb;					\
+		vbcolor	= (GLchan (*)[4])VB->BackfaceColorPtr->data;	\
+		if (VB->BackfaceSecondaryColorPtr) {			\
+			vbspec = (GLchan (*)[4])VB->BackfaceSecondaryColorPtr->data;	\
+		} else {													\
+			vbspec = NULL;											\
+		}															\
+		v[0] = &verts[v0];											\
+		v[1] = &verts[v1];											\
+		v[2] = &verts[v2];											\
+		ex = v[0]->win[0] - v[2]->win[0];	\
+		ey = v[0]->win[1] - v[2]->win[1];	\
+		fx = v[1]->win[0] - v[2]->win[0];	\
+		fy = v[1]->win[1] - v[2]->win[1];	\
+		cc  = ex*fy - ey*fx;				\
+		facing = (cc < 0.0) ^ ctx->Polygon._FrontBit;	\
+	}
+
+//---------------------------------------------------------------------------
+// 3D vertex setup
+//---------------------------------------------------------------------------
+
+#define GLD_SETUP_3D_VARS_POINTS											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pPoints;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VARS_LINES											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pLines;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VARS_TRIANGLES											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx7	*gld	= GLD_GET_DX7_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pTriangles;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VERTEX(v)					\
+	p4f = VB->AttribPtr[_TNL_ATTRIB_POS]->data;		\
+	pV->Position.x	= p4f[##v][0];				\
+	pV->Position.y	= p4f[##v][1];				\
+	pV->Position.z	= p4f[##v][2];
+
+#define GLD_SETUP_SMOOTH_COLOUR_3D(v)															\
+	p4f = (GLfloat (*)[4])VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;										\
+	pV->Diffuse	= D3DCOLOR_COLORVALUE(p4f[##v][0], p4f[##v][1], p4f[##v][2], p4f[##v][3]);
+
+
+#define GLD_SETUP_GET_FLAT_COLOUR_3D(v)													\
+	p4f = (GLfloat (*)[4])VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;	\
+	dwColor	= D3DCOLOR_COLORVALUE(p4f[##v][0], p4f[##v][1], p4f[##v][2], p4f[##v][3]);
+
+#define GLD_SETUP_USE_FLAT_COLOUR_3D			\
+	pV->Diffuse = dwColor;
+
+#define GLD_SETUP_TEX0_3D(v)						\
+	if (VB->AttribPtr[_TNL_ATTRIB_TEX0]) {				\
+		tc = VB->AttribPtr[_TNL_ATTRIB_TEX0]->data;		\
+		pV->TexUnit0.x	= tc[##v][0];				\
+		pV->TexUnit0.y	= tc[##v][1];				\
+	}
+
+#define GLD_SETUP_TEX1_3D(v)						\
+	if (VB->AttribPtr[_TNL_ATTRIB_TEX1]) {				\
+		tc = VB->AttribPtr[_TNL_ATTRIB_TEX1]->data;		\
+		pV->TexUnit1.x	= tc[##v][0];				\
+		pV->TexUnit1.y	= tc[##v][1];				\
+	}
+
+//---------------------------------------------------------------------------
+// Helper functions
+//---------------------------------------------------------------------------
+
+__inline DWORD _gldComputeFog(
+	GLcontext *ctx,
+	SWvertex *swv)
+{
+	// Full fog calculation.
+	// Based on Mesa code.
+
+	GLchan			rFog, gFog, bFog;
+	GLchan			fR, fG, fB;
+	const GLfloat	f = swv->fog;
+	const GLfloat	g = 1.0 - f;
+	
+	UNCLAMPED_FLOAT_TO_CHAN(rFog, ctx->Fog.Color[RCOMP]);
+	UNCLAMPED_FLOAT_TO_CHAN(gFog, ctx->Fog.Color[GCOMP]);
+	UNCLAMPED_FLOAT_TO_CHAN(bFog, ctx->Fog.Color[BCOMP]);
+	fR = f * swv->color[0] + g * rFog;
+	fG = f * swv->color[1] + g * gFog;
+	fB = f * swv->color[2] + g * bFog;
+	return D3DCOLOR_RGBA(fR, fG, fB, swv->color[3]);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_ResetLineStipple_DX7(
+	GLcontext *ctx)
+{
+	// TODO: Fake stipple with a 32x32 texture.
+}
+
+//---------------------------------------------------------------------------
+// 2D (post-transformed) primitives
+//---------------------------------------------------------------------------
+
+void gld_Points2D_DX7(
+	GLcontext *ctx,
+	GLuint first,
+	GLuint last)
+{
+	GLD_SETUP_2D_VARS_POINTS;
+
+	unsigned				i;
+	struct vertex_buffer	*VB = &TNL_CONTEXT(ctx)->vb;
+
+	// _Size is already clamped to MaxPointSize and MinPointSize
+	// Not supported by DX7
+//	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_POINTSIZE, *((DWORD*)&ctx->Point._Size));
+
+	if (VB->Elts) {
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[VB->Elts[i]] == 0) {
+//				_swrast_Point( ctx, &verts[VB->Elts[i]] );
+				GLD_SETUP_GET_SWVERT(VB->Elts[i]);
+				GLD_SETUP_2D_VERTEX;
+				GLD_SETUP_SMOOTH_COLOUR;
+				GLD_SETUP_DEPTH;
+				GLD_SETUP_SPECULAR;
+				GLD_SETUP_TEX0;
+				GLD_SETUP_TEX1;
+			}
+		}
+	} else {
+		GLD_SETUP_GET_SWVERT(first);
+		for (i=first; i<last; i++, swv++, pV++) {
+			if (VB->ClipMask[i] == 0) {
+//				_swrast_Point( ctx, &verts[i] );
+				GLD_SETUP_2D_VERTEX;
+				GLD_SETUP_SMOOTH_COLOUR;
+				GLD_SETUP_DEPTH;
+				GLD_SETUP_SPECULAR;
+				GLD_SETUP_TEX0;
+				GLD_SETUP_TEX1;
+			}
+		}
+	}
+
+	gld->PB2d.pPoints = (BYTE*)pV;
+	gld->PB2d.nPoints += (last-first);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DFlat_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_2D_VARS_LINES;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pLines = (BYTE*)pV;
+	gld->PB2d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DSmooth_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_2D_VARS_LINES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_SPECULAR;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_SPECULAR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pLines = (BYTE*)pV;
+	gld->PB2d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlat_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmooth_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlatExtras_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v2);
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	if (bFog)
+		GLD_SETUP_GET_FLAT_FOG_COLOUR;
+	else
+		GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_GET_FLAT_LIGHTING(v2);
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmoothExtras_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v0);
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v1);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlat_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmooth_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlatExtras_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v3);
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	if (bFog)
+		GLD_SETUP_GET_FLAT_FOG_COLOUR;
+	else
+		GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_GET_FLAT_LIGHTING(v3);
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmoothExtras_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v0);
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v1);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v3);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+// 3D (pre-transformed) primitives
+//---------------------------------------------------------------------------
+
+void gld_Points3D_DX7(
+	GLcontext *ctx,
+	GLuint first,
+	GLuint last)
+{
+	GLD_SETUP_3D_VARS_POINTS
+
+	unsigned				i;
+//	struct vertex_buffer	*VB = &TNL_CONTEXT(ctx)->vb;
+
+	// _Size is already clamped to MaxPointSize and MinPointSize
+	// Not supported by DX7
+//	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_POINTSIZE, *((DWORD*)&ctx->Point._Size));
+
+	if (VB->Elts) {
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[VB->Elts[i]] == 0) {
+//				_swrast_Point( ctx, &verts[VB->Elts[i]] );
+//				GLD_SETUP_GET_SWVERT(VB->Elts[i]);
+				GLD_SETUP_3D_VERTEX(VB->Elts[i])
+				GLD_SETUP_SMOOTH_COLOUR_3D(i)
+				GLD_SETUP_TEX0_3D(i)
+				GLD_SETUP_TEX1_3D(i)
+			}
+		}
+	} else {
+//		GLD_SETUP_GET_SWVERT(first);
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[i] == 0) {
+//				_swrast_Point( ctx, &verts[i] );
+				GLD_SETUP_3D_VERTEX(i)
+				GLD_SETUP_SMOOTH_COLOUR_3D(i)
+				GLD_SETUP_TEX0_3D(i)
+				GLD_SETUP_TEX1_3D(i)
+			}
+		}
+	}
+/*
+	for (i=first; i<last; i++, pV++) {
+		GLD_SETUP_3D_VERTEX(i)
+		GLD_SETUP_SMOOTH_COLOUR_3D(i)
+		GLD_SETUP_TEX0_3D(i)
+		GLD_SETUP_TEX1_3D(i)
+	}
+*/
+	gld->PB3d.pPoints = (BYTE*)pV;
+	gld->PB3d.nPoints += (last-first);
+}
+
+//---------------------------------------------------------------------------
+// Line functions
+//---------------------------------------------------------------------------
+
+void gld_Line3DFlat_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_3D_VARS_LINES
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pLines = (BYTE*)pV;
+	gld->PB3d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line3DSmooth_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_3D_VARS_LINES
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pLines = (BYTE*)pV;
+	gld->PB3d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+// Triangle functions
+//---------------------------------------------------------------------------
+
+void gld_Triangle3DFlat_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v2)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle3DSmooth_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+// Quad functions
+//---------------------------------------------------------------------------
+
+void gld_Quad3DFlat_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v3)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad3DSmooth_DX7(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v3)
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+// Vertex setup for two-sided-lighting vertex shader
+//---------------------------------------------------------------------------
+
+/*
+
+void gld_Points2DTwoside_DX8(GLcontext *ctx, GLuint first, GLuint last)
+{
+	// NOTE: Two-sided lighting does not apply to Points
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1)
+{
+	// NOTE: Two-sided lighting does not apply to Lines
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1)
+{
+	// NOTE: Two-sided lighting does not apply to Lines
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2)
+{
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 4th vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 5th vert
+	swv = &ss->verts[v3];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 6th vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 4th vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 5th vert
+	swv = &ss->verts[v3];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 6th vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+*/
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_texture_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_texture_dx7.c
new file mode 100644
index 0000000000..bbe673516d
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_texture_dx7.c
@@ -0,0 +1,2196 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Texture / Bitmap functions
+*
+****************************************************************************/
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx7.h"
+
+//#include <d3dx8tex.h>
+
+#include "texformat.h"
+#include "colormac.h"
+#include "texstore.h"
+#include "image.h"
+// #include "mem.h"
+
+//---------------------------------------------------------------------------
+
+#define GLD_FLIP_HEIGHT(y,h) (gldCtx->dwHeight - (y) - (h))
+
+D3DX_SURFACEFORMAT _gldD3DXFormatFromSurface(IDirectDrawSurface7 *pSurface);
+
+//---------------------------------------------------------------------------
+// 1D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	((GLchan *)(t)->Data + (i) * (sz))
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + (i) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + (i))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + (i))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// 2D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	((GLchan *)(t)->Data + ((t)->Width * (j) + (i)) * (sz))
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + ((t)->Width * (j) + (i)) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + ((t)->Width * (j) + (i)))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + ((t)->Width * (j) + (i)))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// 3D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	(GLchan *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				(t)->Width + (i)) * (sz)
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				 (t)->Width + (i)) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				  (t)->Width + (i)))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				  (t)->Width + (i)))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// Direct3D texture formats that have no Mesa equivalent
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format _gld_texformat_X8R8G8B8 = {
+   MESA_FORMAT_ARGB8888,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   8,					/* RedBits */
+   8,					/* GreenBits */
+   8,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   4,					/* TexelBytes */
+   _mesa_texstore_argb8888,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X8R8G8B8,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X8R8G8B8,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X8R8G8B8,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X8R8G8B8,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X8R8G8B8,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X8R8G8B8,		/* FetchTexel3Df */
+};
+
+const struct gl_texture_format _gld_texformat_X1R5G5B5 = {
+   MESA_FORMAT_ARGB1555,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   5,					/* RedBits */
+   5,					/* GreenBits */
+   5,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   2,					/* TexelBytes */
+   _mesa_texstore_argb1555,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X1R5G5B5,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X1R5G5B5,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X1R5G5B5,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X1R5G5B5,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X1R5G5B5,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X1R5G5B5,		/* FetchTexel3Df */
+};
+
+const struct gl_texture_format _gld_texformat_X4R4G4B4 = {
+   MESA_FORMAT_ARGB4444,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   4,					/* RedBits */
+   4,					/* GreenBits */
+   4,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   2,					/* TexelBytes */
+   _mesa_texstore_argb4444,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X4R4G4B4,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X4R4G4B4,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X4R4G4B4,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X4R4G4B4,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X4R4G4B4,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X4R4G4B4,		/* FetchTexel3Df */
+};
+
+//---------------------------------------------------------------------------
+// Texture unit constants
+//---------------------------------------------------------------------------
+
+// List of possible combinations of texture environments.
+// Example: GLD_TEXENV_MODULATE_RGBA means 
+//          GL_MODULATE, GL_RGBA base internal format.
+#define GLD_TEXENV_DECAL_RGB		0
+#define GLD_TEXENV_DECAL_RGBA		1
+#define GLD_TEXENV_DECAL_ALPHA		2
+#define GLD_TEXENV_REPLACE_RGB		3
+#define GLD_TEXENV_REPLACE_RGBA		4
+#define GLD_TEXENV_REPLACE_ALPHA	5
+#define GLD_TEXENV_MODULATE_RGB		6
+#define GLD_TEXENV_MODULATE_RGBA	7
+#define GLD_TEXENV_MODULATE_ALPHA	8
+#define GLD_TEXENV_BLEND_RGB		9
+#define GLD_TEXENV_BLEND_RGBA		10
+#define GLD_TEXENV_BLEND_ALPHA		11
+#define GLD_TEXENV_ADD_RGB			12
+#define GLD_TEXENV_ADD_RGBA			13
+#define GLD_TEXENV_ADD_ALPHA		14
+
+// Per-stage (i.e. per-unit) texture environment
+typedef struct {
+	DWORD			ColorArg1;	// Colour argument 1
+	D3DTEXTUREOP	ColorOp;	// Colour operation
+	DWORD			ColorArg2;	// Colour argument 2
+	DWORD			AlphaArg1;	// Alpha argument 1
+	D3DTEXTUREOP	AlphaOp;	// Alpha operation
+	DWORD			AlphaArg2;	// Alpha argument 2
+} GLD_texenv;
+
+// TODO: Do we really need to set ARG1 and ARG2 every time?
+//       They seem to always be TEXTURE and CURRENT respectively.
+
+// C = Colour out
+// A = Alpha out
+// Ct = Colour from Texture
+// Cf = Colour from fragment (diffuse)
+// At = Alpha from Texture
+// Af = Alpha from fragment (diffuse)
+// Cc = GL_TEXTURE_ENV_COLOUR (GL_BLEND)
+const GLD_texenv gldTexEnv[] = {
+	// DECAL_RGB: C=Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// DECAL_RGBA: C=Cf(1-At)+CtAt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_BLENDTEXTUREALPHA, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// DECAL_ALPHA: <undefined> use DECAL_RGB
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+
+	// REPLACE_RGB: C=Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// REPLACE_RGBA: C=Ct, A=At
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT},
+	// REPLACE_ALPHA: C=Cf, A=At
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT},
+
+	// MODULATE_RGB: C=CfCt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// MODULATE_RGBA: C=CfCt, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// MODULATE_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+
+	//
+	// DX7 Does not support D3DTOP_LERP
+	// Emulate(?) via D3DTOP_ADDSMOOTH
+	//
+#if 0
+	// BLEND_RGB: C=Cf(1-Ct)+CcCt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_LERP, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// BLEND_RGBA: C=Cf(1-Ct)+CcCt, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_LERP, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+#else
+	// BLEND_RGB: C=Cf(1-Ct)+CcCt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_ADDSMOOTH, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// BLEND_RGBA: C=Cf(1-Ct)+CcCt, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_ADDSMOOTH, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+#endif
+	// BLEND_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+
+	// ADD_RGB: C=Cf+Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_ADD, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// ADD_RGBA: C=Cf+Ct, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_ADD, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// ADD_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+};
+
+//---------------------------------------------------------------------------
+
+D3DTEXTUREADDRESS _gldConvertWrap(
+	GLenum wrap)
+{
+//	ASSERT(wrap==GL_CLAMP || wrap==GL_REPEAT);
+	return (wrap == GL_CLAMP) ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP;
+}
+
+//---------------------------------------------------------------------------
+
+D3DTEXTUREMAGFILTER _gldConvertMagFilter(
+	GLenum magfilter)
+{
+	ASSERT(magfilter==GL_LINEAR || magfilter==GL_NEAREST);
+	return (magfilter == GL_LINEAR) ? D3DTFG_LINEAR : D3DTFG_POINT;
+}
+
+//---------------------------------------------------------------------------
+
+void _gldConvertMinFilter(
+	GLenum minfilter,
+	D3DTEXTUREMINFILTER *min_filter,
+	D3DTEXTUREMIPFILTER *mip_filter)
+{
+	switch (minfilter) {
+	case GL_NEAREST:
+		*min_filter = D3DTFN_POINT;
+		*mip_filter = D3DTFP_NONE;
+		break;
+	case GL_LINEAR:
+		*min_filter = D3DTFN_LINEAR;
+		*mip_filter = D3DTFP_NONE;
+		break;
+	case GL_NEAREST_MIPMAP_NEAREST:
+		*min_filter = D3DTFN_POINT;
+		*mip_filter = D3DTFP_POINT;
+		break;
+	case GL_LINEAR_MIPMAP_NEAREST:
+		*min_filter = D3DTFN_LINEAR;
+		*mip_filter = D3DTFP_POINT;
+		break;
+	case GL_NEAREST_MIPMAP_LINEAR:
+		*min_filter = D3DTFN_POINT;
+		*mip_filter = D3DTFP_LINEAR;
+		break;
+	case GL_LINEAR_MIPMAP_LINEAR:
+		*min_filter = D3DTFN_LINEAR;
+		*mip_filter = D3DTFP_LINEAR;
+		break;
+	default:
+		ASSERT(0);
+	}
+}
+
+//---------------------------------------------------------------------------
+
+D3DX_SURFACEFORMAT _gldGLFormatToD3DFormat(
+	GLenum internalFormat)
+{
+	switch (internalFormat) {
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+		// LUNIMANCE != INTENSITY, but D3D doesn't have I8 textures
+		return D3DX_SF_L8;
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+		return D3DX_SF_L8;
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+		return D3DX_SF_A8;
+	case GL_COLOR_INDEX:
+	case GL_COLOR_INDEX1_EXT:
+	case GL_COLOR_INDEX2_EXT:
+	case GL_COLOR_INDEX4_EXT:
+	case GL_COLOR_INDEX8_EXT:
+	case GL_COLOR_INDEX12_EXT:
+	case GL_COLOR_INDEX16_EXT:
+		return D3DX_SF_X8R8G8B8;
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+		return D3DX_SF_A8L8;
+	case GL_R3_G3_B2:
+		// TODO: Mesa does not support RGB332 internally
+		return D3DX_SF_X4R4G4B4; //D3DFMT_R3G3B2;
+	case GL_RGB4:
+		return D3DX_SF_X4R4G4B4;
+	case GL_RGB5:
+		return D3DX_SF_R5G5B5;
+	case 3:
+	case GL_RGB:
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return D3DX_SF_R8G8B8;
+	case GL_RGBA4:
+		return D3DX_SF_A4R4G4B4;
+	case 4:
+	case GL_RGBA:
+	case GL_RGBA2:
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return D3DX_SF_A8R8G8B8;
+	case GL_RGB5_A1:
+		return D3DX_SF_A1R5G5B5;
+	}
+
+	ASSERT(0);
+
+	// Return an acceptable default
+	return D3DX_SF_A8R8G8B8;
+}
+
+//---------------------------------------------------------------------------
+
+GLenum _gldDecodeBaseFormat(
+	IDirectDrawSurface7 *pTex)
+{
+	// Examine Direct3D texture and return base OpenGL internal texture format
+	// NOTE: We can't use any base format info from Mesa because D3D might have
+	// used a different texture format when we used D3DXCreateTexture().
+
+	// Base internal format is one of (Red Book p355):
+	//	GL_ALPHA, 
+	//	GL_LUMINANCE, 
+	//	GL_LUMINANCE_ALPHA, 
+	//	GL_INTENSITY, 
+	//	GL_RGB, 
+	//	GL_RGBA
+
+	// NOTE: INTENSITY not used (not supported by Direct3D)
+	//       LUMINANCE has same texture functions as RGB
+	//       LUMINANCE_ALPHA has same texture functions as RGBA
+
+	// TODO: cache format instead of using GetLevelDesc()
+//	D3DSURFACE_DESC desc;
+//	_GLD_DX7_TEX(GetLevelDesc(pTex, 0, &desc));
+
+	D3DX_SURFACEFORMAT	sf;
+
+	sf = _gldD3DXFormatFromSurface(pTex);
+
+	switch (sf) {
+    case D3DX_SF_R8G8B8:
+    case D3DX_SF_X8R8G8B8:
+    case D3DX_SF_R5G6B5:
+    case D3DX_SF_R5G5B5:
+    case D3DX_SF_R3G3B2:
+    case D3DX_SF_X4R4G4B4:
+    case D3DX_SF_PALETTE8:
+    case D3DX_SF_L8:
+		return GL_RGB;
+    case D3DX_SF_A8R8G8B8:
+    case D3DX_SF_A1R5G5B5:
+    case D3DX_SF_A4R4G4B4:
+//    case D3DX_SF_A8R3G3B2:	// Unsupported by DX7
+//    case D3DX_SF_A8P8:		// Unsupported by DX7
+    case D3DX_SF_A8L8:
+//    case D3DX_SF_A4L4:		// Unsupported by DX7
+		return GL_RGBA;
+    case D3DX_SF_A8:
+		return GL_ALPHA;
+	// Compressed texture formats. Need to check these...
+    case D3DX_SF_DXT1:
+		return GL_RGBA;
+//    case D3DX_SF_DXT2:		// Unsupported by DX7
+		return GL_RGB;
+    case D3DX_SF_DXT3:
+		return GL_RGBA;
+//    case D3DX_SF_DXT4:		// Unsupported by DX7
+		return GL_RGB;
+    case D3DX_SF_DXT5:
+		return GL_RGBA;
+	}
+
+	// Fell through. Return arbitary default.
+	ASSERT(0); // BANG!
+	return GL_RGBA;
+}
+
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format* _gldMesaFormatForD3DFormat(
+	D3DX_SURFACEFORMAT d3dfmt)
+{
+	switch (d3dfmt) {
+	case D3DX_SF_A8R8G8B8:
+		return &_mesa_texformat_argb8888;
+	case D3DX_SF_R8G8B8:
+		return &_mesa_texformat_rgb888;
+	case D3DX_SF_R5G6B5:
+		return &_mesa_texformat_rgb565;
+	case D3DX_SF_A4R4G4B4:
+		return &_mesa_texformat_argb4444;
+	case D3DX_SF_A1R5G5B5:
+		return &_mesa_texformat_argb1555;
+	case D3DX_SF_A8L8:
+		return &_mesa_texformat_al88;
+	case D3DX_SF_R3G3B2:
+		return &_mesa_texformat_rgb332;
+	case D3DX_SF_A8:
+		return &_mesa_texformat_a8;
+	case D3DX_SF_L8:
+		return &_mesa_texformat_l8;
+	case D3DX_SF_X8R8G8B8:
+		return &_gld_texformat_X8R8G8B8;
+	case D3DX_SF_R5G5B5:
+		return &_gld_texformat_X1R5G5B5;
+	case D3DX_SF_X4R4G4B4:
+		return &_gld_texformat_X4R4G4B4;
+	}
+
+	// If we reach here then we've made an error somewhere else
+	// by allowing a format that is not supported.
+	ASSERT(0);
+
+	return NULL; // Shut up compiler warning
+}
+
+//---------------------------------------------------------------------------
+
+D3DX_SURFACEFORMAT _gldD3DXFormatFromSurface(
+	IDirectDrawSurface7	*pSurface)
+{
+	DDPIXELFORMAT ddpf;
+
+	ddpf.dwSize = sizeof(ddpf);
+
+	// Obtain pixel format of surface
+	_GLD_DX7_TEX(GetPixelFormat(pSurface, &ddpf));
+	// Decode to D3DX surface format
+	return D3DXMakeSurfaceFormat(&ddpf);
+}
+
+//---------------------------------------------------------------------------
+
+void _gldClearSurface(
+	IDirectDrawSurface *pSurface,
+	D3DCOLOR dwColour)
+{
+	DDBLTFX bltFX;			// Used for colour fill
+
+	// Initialise struct
+	bltFX.dwSize = sizeof(bltFX);
+	// Set clear colour
+	bltFX.dwFillColor = dwColour;
+	// Clear surface. HW accelerated if available.
+	IDirectDrawSurface7_Blt(pSurface, NULL, NULL, NULL, DDBLT_COLORFILL, &bltFX);
+}
+
+//---------------------------------------------------------------------------
+// Copy* functions
+//---------------------------------------------------------------------------
+
+void gldCopyTexImage1D_DX7(
+	GLcontext *ctx,
+	GLenum target, GLint level,
+	GLenum internalFormat,
+	GLint x, GLint y,
+	GLsizei width, GLint border )
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexImage2D_DX7(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLenum internalFormat,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height,
+	GLint border)
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage1D_DX7(
+	GLcontext *ctx,
+	GLenum target, GLint level,
+	GLint xoffset, GLint x, GLint y, GLsizei width )
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage2D_DX7(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint xoffset,
+	GLint yoffset,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height)
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage3D_DX7(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint xoffset,
+	GLint yoffset,
+	GLint zoffset,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height )
+{
+	// TODO ?
+}
+
+//---------------------------------------------------------------------------
+// Bitmap/Pixel functions
+//---------------------------------------------------------------------------
+
+#define GLD_FLIP_Y(y) (gldCtx->dwHeight - (y))
+
+#define _GLD_FVF_IMAGE	(D3DFVF_XYZRHW | D3DFVF_TEX1)
+
+typedef struct {
+	FLOAT	x, y;		// 2D raster coords
+	FLOAT	z;			// depth value
+	FLOAT	rhw;		// reciprocal homogenous W (always 1.0f)
+	FLOAT	tu, tv;		// texture coords
+} _GLD_IMAGE_VERTEX;
+
+//---------------------------------------------------------------------------
+
+HRESULT _gldDrawPixels(
+	GLcontext *ctx,
+	BOOL bChromakey,	// Alpha test for glBitmap() images
+	GLint x,			// GL x position
+	GLint y,			// GL y position (needs flipping)
+	GLsizei width,		// Width of input image
+	GLsizei height,		// Height of input image
+	IDirectDrawSurface7 *pImage)
+{
+	//
+	// Draw input image as texture implementing PixelZoom and clipping.
+	// Any fragment operations currently enabled will be used.
+	//
+
+	// NOTE:	This DX7 version does not create a new texture in which
+	//			to copy the input image, as the image is already a texture.
+
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	DDSURFACEDESC2		ddsd;
+	_GLD_IMAGE_VERTEX	v[4];
+
+	float				ZoomWidth, ZoomHeight;
+	float				ScaleWidth, ScaleHeight;
+
+	// Fixup for rasterisation rules
+	const float cfEpsilon = 1.0f / (float)height;
+
+	//
+	// Set up the quad like this (ascii-art ahead!)
+	//
+	// 3--2
+	// |  |
+	// 0--1
+	//
+	//
+
+	// Set depth
+	v[0].z = v[1].z = v[2].z = v[3].z = ctx->Current.RasterPos[2];
+	// Set Reciprocal Homogenous W
+	v[0].rhw = v[1].rhw = v[2].rhw = v[3].rhw = 1.0f;
+
+	// Set texcoords
+	// Examine texture size - if different to input width and height
+	// then we'll need to munge the texcoords to fit.
+	ddsd.dwSize = sizeof(DDSURFACEDESC2);
+	IDirectDrawSurface7_GetSurfaceDesc(pImage, &ddsd);
+	ScaleWidth	= (float)width / (float)ddsd.dwWidth;
+	ScaleHeight	= (float)height / (float)ddsd.dwHeight;
+	v[0].tu = 0.0f;			v[0].tv = 0.0f;
+	v[1].tu = ScaleWidth;	v[1].tv = 0.0f;
+	v[2].tu = ScaleWidth;	v[2].tv = ScaleHeight;
+	v[3].tu = 0.0f;			v[3].tv = ScaleHeight;
+
+	// Set raster positions
+	ZoomWidth = (float)width * ctx->Pixel.ZoomX;
+	ZoomHeight = (float)height * ctx->Pixel.ZoomY;
+
+	v[0].x = x;				v[0].y = GLD_FLIP_Y(y+cfEpsilon);
+	v[1].x = x+ZoomWidth;	v[1].y = GLD_FLIP_Y(y+cfEpsilon);
+	v[2].x = x+ZoomWidth;	v[2].y = GLD_FLIP_Y(y+ZoomHeight+cfEpsilon);
+	v[3].x = x;				v[3].y = GLD_FLIP_Y(y+ZoomHeight+cfEpsilon);
+
+	// Draw image with full HW acceleration
+	// NOTE: Be nice to use a State Block for all this state...
+	IDirect3DDevice7_SetTexture(gld->pDev, 0, pImage);
+	IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_CULLMODE, D3DCULL_NONE);
+	IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_CLIPPING, TRUE);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_MINFILTER, D3DTFN_POINT);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_MIPFILTER, D3DTFP_POINT);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_MAGFILTER, D3DTFG_POINT);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_ADDRESSU, D3DTADDRESS_CLAMP);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_ADDRESSV, D3DTADDRESS_CLAMP);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_COLOROP, D3DTOP_SELECTARG1);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_ALPHAOP, D3DTOP_SELECTARG1);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
+	// Ensure texture unit 1 is disabled
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 1, D3DTSS_COLOROP, D3DTOP_DISABLE);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
+
+	//
+	// Emulate Chromakey with an Alpha Test.
+	// [Alpha Test is more widely supported anyway]
+	//
+	if (bChromakey) {
+		// Switch on alpha testing
+		IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_ALPHATESTENABLE, TRUE);
+		// Fragment passes is alpha is greater than reference value
+		IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_ALPHAFUNC, D3DCMP_GREATER);
+		// Set alpha reference value between Bitmap alpha values of
+		// zero (transparent) and one (opaque).
+		IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_ALPHAREF, 0x7f);
+	}
+
+	IDirect3DDevice7_DrawPrimitive(gld->pDev, D3DPT_TRIANGLEFAN, _GLD_FVF_IMAGE, &v, 4, 0);
+
+	// Reset state to before we messed it up
+	FLUSH_VERTICES(ctx, _NEW_ALL);
+
+	return S_OK;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_DrawPixels_DX7(
+	GLcontext *ctx,
+	GLint x, GLint y, GLsizei width, GLsizei height,
+	GLenum format, GLenum type,
+	const struct gl_pixelstore_attrib *unpack,
+	const GLvoid *pixels )
+{
+	GLD_context			*gldCtx;
+	GLD_driver_dx7		*gld;
+
+	IDirectDrawSurface7	*pImage;
+	HRESULT				hr;
+	DDSURFACEDESC2		ddsd;
+	DWORD				dwFlags;
+	D3DX_SURFACEFORMAT	sf;
+	DWORD				dwMipmaps;
+
+	const struct gl_texture_format	*MesaFormat;
+
+	MesaFormat = _mesa_choose_tex_format(ctx, format, format, type);
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX7_DRIVER(gldCtx);
+
+	dwFlags		= D3DX_TEXTURE_NOMIPMAP;
+	sf			= D3DX_SF_A8R8G8B8;
+	dwMipmaps	= 1;
+
+	hr = D3DXCreateTexture(
+		gld->pDev,
+		&dwFlags,
+		&width, &height,
+		&sf,		// format
+		NULL,		// palette
+		&pImage,	// Output texture
+		&dwMipmaps);
+	if (FAILED(hr)) {
+		return;
+	}
+
+	// D3DXCreateTexture() may not clear the texture is creates.
+	_gldClearSurface(pImage, 0);
+
+	//
+	// Use Mesa to fill in image
+	//
+
+	// Lock all of surface 
+	ddsd.dwSize = sizeof(DDSURFACEDESC2);
+	dwFlags = DDLOCK_SURFACEMEMORYPTR | DDLOCK_WAIT;
+	hr = IDirectDrawSurface7_Lock(pImage, NULL, &ddsd, dwFlags, NULL);
+	if (FAILED(hr)) {
+		SAFE_RELEASE_SURFACE7(pImage);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	MesaFormat->StoreImage(
+		ctx,
+		2,
+		GL_RGBA,
+		&_mesa_texformat_argb8888,
+		ddsd.lpSurface,
+		width, height, 1, 0, 0, 0,
+		ddsd.lPitch,
+		0, /* dstImageStride */
+		format, type, pixels, unpack);
+
+	IDirectDrawSurface7_Unlock(pImage, NULL);
+
+	_gldDrawPixels(ctx, FALSE, x, y, width, height, pImage);
+
+	SAFE_RELEASE_SURFACE7(pImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_ReadPixels_DX7(
+	GLcontext *ctx,
+	GLint x, GLint y, GLsizei width, GLsizei height,
+	GLenum format, GLenum type,
+	const struct gl_pixelstore_attrib *pack,
+	GLvoid *dest)
+{
+// TODO
+#if 0
+	GLD_context						*gldCtx;
+	GLD_driver_dx7					*gld;
+
+	IDirect3DSurface8				*pBackbuffer = NULL;
+	IDirect3DSurface8				*pNativeImage = NULL;
+	IDirect3DSurface8				*pCanonicalImage = NULL;
+
+	D3DSURFACE_DESC					d3dsd;
+	RECT							rcSrc; // Source rect
+	POINT							ptDst; // Dest point
+	HRESULT							hr;
+	D3DLOCKED_RECT					d3dLockedRect;
+	struct gl_pixelstore_attrib		srcPacking;
+	int								i;
+	GLint							DstRowStride;
+	const struct gl_texture_format	*MesaFormat;
+
+	switch (format) {
+	case GL_STENCIL_INDEX:
+	case GL_DEPTH_COMPONENT:
+		return;
+	}
+	
+	MesaFormat = _mesa_choose_tex_format(ctx, format, format, type);
+	DstRowStride = _mesa_image_row_stride(pack, width, format, type);
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX7_DRIVER(gldCtx);
+
+	// Get backbuffer
+	hr = IDirect3DDevice8_GetBackBuffer(
+		gld->pDev,
+		0, // First backbuffer
+		D3DBACKBUFFER_TYPE_MONO,
+		&pBackbuffer);
+	if (FAILED(hr))
+		return;
+
+	// Get backbuffer description
+	hr = IDirect3DSurface8_GetDesc(pBackbuffer, &d3dsd);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX7_return;
+	}
+
+	// Create a surface compatible with backbuffer
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		d3dsd.Format,
+		&pNativeImage);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX7_return;
+	}
+
+	// Compute source rect and dest point
+	SetRect(&rcSrc, 0, 0, width, height);
+	OffsetRect(&rcSrc, x, GLD_FLIP_HEIGHT(y, height));
+	ptDst.x = ptDst.y = 0;
+
+	// Get source pixels.
+	//
+	// This intermediate surface ensure that we can use CopyRects()
+	// instead of relying on D3DXLoadSurfaceFromSurface(), which may
+	// try and lock the backbuffer. This way seems safer.
+	//
+	hr = IDirect3DDevice8_CopyRects(
+		gld->pDev,
+		pBackbuffer,
+		&rcSrc,
+		1,
+		pNativeImage,
+		&ptDst);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX7_return;
+	}
+
+	// Create an RGBA8888 surface
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		D3DFMT_A8R8G8B8,
+		&pCanonicalImage);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX7_return;
+	}
+
+	// Convert to RGBA8888
+	hr = D3DXLoadSurfaceFromSurface(
+		pCanonicalImage,	// Dest surface
+		NULL, NULL,			// Dest palette, RECT
+		pNativeImage,		// Src surface
+		NULL, NULL,			// Src palette, RECT
+		D3DX_FILTER_NONE,	// Filter
+		0);					// Colourkey
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX7_return;
+	}
+
+	srcPacking.Alignment	= 1;
+	srcPacking.ImageHeight	= height;
+	srcPacking.LsbFirst		= GL_FALSE;
+	srcPacking.RowLength	= 0;
+	srcPacking.SkipImages	= 0;
+	srcPacking.SkipPixels	= 0;
+	srcPacking.SkipRows		= 0;
+	srcPacking.SwapBytes	= GL_FALSE;
+
+	// Lock all of image
+	hr = IDirect3DSurface8_LockRect(pCanonicalImage, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX7_return;
+	}
+
+	// We need to flip the data. Yuck.
+	// Perhaps Mesa has a span packer we can use in future...
+	for (i=0; i<height; i++) {
+		BYTE *pDestRow = (BYTE*)_mesa_image_address(2,pack, dest, width, height, format, type, 0, i, 0);
+		BYTE *pSrcRow = (BYTE*)d3dLockedRect.pBits + (d3dLockedRect.Pitch * (height-i-1));
+		texImage->TexFormat->StoreImage(
+			ctx,
+			2,
+			GL_RGBA,				// base format
+			MesaFormat,				// dst format
+			pDestRow,				// dest addr
+			width, 1, 1, 0, 0, 0,	// src x,y,z & dst offsets x,y,z
+			DstRowStride,			// dst row stride
+			0,						// dstImageStride
+			GL_BGRA,				// src format
+			GL_UNSIGNED_BYTE,		// src type
+			pSrcRow,				// src addr
+			&srcPacking);			// packing params of source image
+	}
+
+	IDirect3DSurface8_UnlockRect(pCanonicalImage);
+
+gld_ReadPixels_DX7_return:
+	SAFE_RELEASE_SURFACE8(pCanonicalImage);
+	SAFE_RELEASE_SURFACE8(pNativeImage);
+	SAFE_RELEASE_SURFACE8(pBackbuffer);
+#endif
+}
+
+//---------------------------------------------------------------------------
+
+void gld_CopyPixels_DX7(
+	GLcontext *ctx,
+	GLint srcx,
+	GLint srcy,
+	GLsizei width,
+	GLsizei height,
+	GLint dstx,
+	GLint dsty,
+	GLenum type)
+{
+// TODO
+#if 0
+	//
+	// NOTE: Not allowed to copy vidmem to vidmem!
+	//       Therefore we use an intermediate image surface.
+	//
+
+	GLD_context			*gldCtx;
+	GLD_driver_dx7		*gld;
+
+	IDirect3DSurface8	*pBackbuffer;
+	D3DSURFACE_DESC		d3dsd;
+	IDirect3DSurface8	*pImage;
+	RECT				rcSrc; // Source rect
+	POINT				ptDst; // Dest point
+	HRESULT				hr;
+
+	// Only backbuffer
+	if (type != GL_COLOR)
+		return;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX7_DRIVER(gldCtx);
+
+	// Get backbuffer
+	hr = IDirect3DDevice8_GetBackBuffer(
+		gld->pDev,
+		0, // First backbuffer
+		D3DBACKBUFFER_TYPE_MONO,
+		&pBackbuffer);
+	if (FAILED(hr))
+		return;
+
+	// Get backbuffer description
+	hr = IDirect3DSurface8_GetDesc(pBackbuffer, &d3dsd);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pBackbuffer);
+		return;
+	}
+
+	// Create a surface compatible with backbuffer
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		d3dsd.Format,
+		&pImage);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pBackbuffer);
+		return;
+	}
+
+	// Compute source rect and dest point
+	SetRect(&rcSrc, 0, 0, width, height);
+	OffsetRect(&rcSrc, srcx, GLD_FLIP_HEIGHT(srcy, height));
+	ptDst.x = ptDst.y = 0;
+
+	// Get source pixels
+	hr = IDirect3DDevice8_CopyRects(
+		gld->pDev,
+		pBackbuffer,
+		&rcSrc,
+		1,
+		pImage,
+		&ptDst);
+	IDirect3DSurface8_Release(pBackbuffer);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pImage);
+		return;
+	}
+
+	_gldDrawPixels(ctx, FALSE, dstx, dsty, width, height, pImage);
+
+	IDirect3DSurface8_Release(pImage);
+#endif
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Bitmap_DX7(
+	GLcontext *ctx,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height,
+	const struct gl_pixelstore_attrib *unpack,
+	const GLubyte *bitmap)
+{
+	GLD_context			*gldCtx;
+	GLD_driver_dx7		*gld;
+
+	IDirectDrawSurface7	*pImage;		// Bitmap texture
+	HRESULT				hr;
+	BYTE				*pTempBitmap;	// Pointer to unpacked bitmap
+	D3DCOLOR			clBitmapOne;	// Opaque bitmap colour
+	D3DCOLOR			clBitmapZero;	// Transparent bitmap colour
+	D3DCOLOR			*pBits;			// Pointer to texture surface
+	const GLubyte		*src;
+	int					i, j, k;
+
+	DDSURFACEDESC2		ddsd;			// Surface desc returned by lock call
+	DWORD				dwFlags;
+	D3DX_SURFACEFORMAT	sf;
+	DWORD				dwMipmaps;
+
+	// Keep a copy of width/height as D3DXCreateTexture() call may alter input dimensions
+	GLsizei				dwWidth = width;
+	GLsizei				dwHeight = height;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX7_DRIVER(gldCtx);
+
+	// Bail if no bitmap (only raster pos is updated)
+	if ((bitmap == NULL) && (width==0) && (height==0))
+		return;
+
+	//
+	// TODO:	Detect conditions when created texture (pImage) is non-pow2.
+	//			Texture coords may need to be adjusted to compensate.
+	//
+
+	clBitmapZero	= D3DCOLOR_RGBA(0,0,0,0); // NOTE: Alpha is Zero
+	clBitmapOne		= D3DCOLOR_COLORVALUE(
+		ctx->Current.RasterColor[0],
+		ctx->Current.RasterColor[1],
+		ctx->Current.RasterColor[2],
+		1.0f); // NOTE: Alpha is One
+
+	// Use Mesa to unpack bitmap into a canonical format
+	pTempBitmap = _mesa_unpack_bitmap(width, height, bitmap, unpack);
+	if (pTempBitmap == NULL)
+		return;
+
+	// Flags for texture creation
+	dwFlags		= D3DX_TEXTURE_NOMIPMAP;
+	sf			= D3DX_SF_A8R8G8B8;
+	dwMipmaps	= 1;
+
+	// Create a D3D texture to hold the bitmap
+	hr = D3DXCreateTexture(
+		gld->pDev,
+		&dwFlags,
+		&dwWidth, &dwHeight,
+		&sf,		// format
+		NULL,		// palette
+		&pImage,	// Output texture
+		&dwMipmaps);
+	if (FAILED(hr)) {
+		FREE(pTempBitmap);
+		return;
+	}
+
+	// D3DXCreateTexture may return a texture bigger than we asked for
+	// (i.e. padded to POW2) so let's clear the entire image bitmap.
+	// Additional: Looks like this is not strictly necessary.
+//	_gldClearSurface(pImage, clBitmapZero);
+
+	ddsd.dwSize = sizeof(DDSURFACEDESC2);
+	dwFlags = DDLOCK_SURFACEMEMORYPTR | DDLOCK_WAIT;
+	hr = IDirectDrawSurface7_Lock(pImage, NULL, &ddsd, dwFlags, NULL);
+	if (FAILED(hr)) {
+		FREE(pTempBitmap);
+		SAFE_RELEASE_SURFACE7(pImage);
+		return;
+	}
+
+#if 0
+	// DEBUG CODE
+	if (!(width==ddsd.dwWidth && height==ddsd.dwHeight))
+	ddlogPrintf(GLDLOG_WARN, "gld_Bitmap: In=%d,%d / Tex=%d,%d", width,height,ddsd.dwWidth,ddsd.dwHeight);
+#endif
+
+#if 0
+	// DEBUG CODE
+	ddlogPrintf(GLDLOG_SYSTEM, "gld_Bitmap: In=%d,%d / Tex=%d,%d", width,height,ddsd.dwWidth,ddsd.dwHeight);
+	ddlogPrintf(GLDLOG_SYSTEM, "gld_Bitmap: bpp=%d", ddsd.ddpfPixelFormat.dwRGBBitCount);
+#endif
+
+	// Cast texel pointer to texture surface.
+	// We can do this because we used D3DX_SF_A8R8G8B8 as the format
+	pBits = (D3DCOLOR*)ddsd.lpSurface;
+
+
+	// Copy from the input bitmap into the texture
+	for (i=0; i<height; i++) {
+		GLubyte byte;
+		pBits = (D3DCOLOR*)((BYTE*)ddsd.lpSurface + (i*ddsd.lPitch));
+		src = (const GLubyte *) _mesa_image_address(2,
+			&ctx->DefaultPacking, pTempBitmap, width, height, GL_COLOR_INDEX, GL_BITMAP,
+			0, i, 0);
+		for (j=0; j<(width>>3); j++) {
+			byte = *src++;
+			for (k=0; k<8; k++) {
+				*pBits++ = (byte & 128) ? clBitmapOne : clBitmapZero;
+				byte <<= 1;
+			}
+		}
+		// Fill remaining bits from bitmap
+		if (width & 7) {
+			byte = *src;
+			for (k=0; k<(width & 7); k++) {
+				*pBits++ = (byte & 128) ? clBitmapOne : clBitmapZero;
+				byte <<= 1;
+			}
+		}
+	}
+
+	// We're done with the unpacked bitmap
+	FREE(pTempBitmap);
+
+	// Finished with texture surface - unlock it
+	IDirectDrawSurface7_Unlock(pImage, NULL);
+
+	// Use internal function to draw bitmap onto rendertarget
+	_gldDrawPixels(ctx, TRUE, x, y, width, height, pImage);
+
+	// We're done with the bitmap texure - release it
+	IDirectDrawSurface7_Release(pImage);
+}
+
+//---------------------------------------------------------------------------
+// Texture functions
+//---------------------------------------------------------------------------
+
+void _gldAllocateTexture(
+	GLcontext *ctx,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	HRESULT				hr;
+	IDirectDrawSurface7	*pTex;
+	D3DX_SURFACEFORMAT	d3dFormat;
+	DWORD				dwFlags;
+	DWORD				dwMipmaps;
+	DWORD				dwWidth, dwHeight;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirectDrawSurface7*)tObj->DriverData;
+	if (pTex) {
+		// Decide whether we can keep existing D3D texture
+		// by examining top-level surface.
+		DDSURFACEDESC2 ddsd;
+		ddsd.dwSize = sizeof(DDSURFACEDESC2);
+		_GLD_DX7_TEX(GetSurfaceDesc(pTex, &ddsd));
+		// Release existing texture if not compatible
+		if ((ddsd.dwWidth == texImage->Width) || 
+			(ddsd.dwHeight == texImage->Height))
+		{
+			return; // Keep the existing texture
+		}
+		tObj->DriverData = NULL;
+		_GLD_DX7_TEX(Release(pTex));
+	}
+
+	dwFlags		= (glb.bUseMipmaps) ? 0 : D3DX_TEXTURE_NOMIPMAP;
+	dwMipmaps	= (glb.bUseMipmaps) ? D3DX_DEFAULT : 1;
+	dwWidth		= texImage->Width;
+	dwHeight	= texImage->Height;
+
+	d3dFormat = _gldGLFormatToD3DFormat(texImage->IntFormat);
+	hr = D3DXCreateTexture(
+		gld->pDev,
+		&dwFlags,
+		&dwWidth,
+		&dwHeight,
+		&d3dFormat,
+		NULL,
+		&pTex,
+		&dwMipmaps);
+	if (FAILED(hr)) {
+		gldLogError(GLDLOG_ERROR, "AllocateTexture failed", hr);
+	}
+	tObj->DriverData = pTex;
+}
+
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format* gld_ChooseTextureFormat_DX7(
+	GLcontext *ctx,
+	GLint internalFormat,
+	GLenum srcFormat,
+	GLenum srcType)
+{
+	// [Based on mesa_choose_tex_format()]
+	//
+	// We will choose only texture formats that are supported
+	// by Direct3D. If the hardware doesn't support a particular
+	// texture format, then the D3DX texture calls that we use
+	// will automatically use a HW supported format.
+	//
+	// The most critical aim is to reduce copying; if we can use
+	// texture-image data directly then it will be a big performance assist.
+	//
+
+	switch (internalFormat) {
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+		return &_mesa_texformat_l8; // D3DFMT_L8
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+		return &_mesa_texformat_l8; // D3DFMT_L8
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+		return &_mesa_texformat_a8; // D3DFMT_A8
+	case GL_COLOR_INDEX:
+	case GL_COLOR_INDEX1_EXT:
+	case GL_COLOR_INDEX2_EXT:
+	case GL_COLOR_INDEX4_EXT:
+	case GL_COLOR_INDEX8_EXT:
+	case GL_COLOR_INDEX12_EXT:
+	case GL_COLOR_INDEX16_EXT:
+		return &_mesa_texformat_rgb565; // D3DFMT_R5G6B5
+		// Mesa will convert this for us later...
+		//      return &_mesa_texformat_ci8; // D3DFMT_R5G6B5
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+		return &_mesa_texformat_al88; // D3DFMT_A8L8
+	case GL_R3_G3_B2:
+		return &_mesa_texformat_rgb332; // D3DFMT_R3G3B2
+	case GL_RGB4:
+	case GL_RGBA4:
+	case GL_RGBA2:
+		return &_mesa_texformat_argb4444; // D3DFMT_A4R4G4B4
+	case 3:
+	case GL_RGB:
+	case GL_RGB5:
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return &_mesa_texformat_rgb565;
+	case 4:
+	case GL_RGBA:
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return &_mesa_texformat_argb8888;
+	case GL_RGB5_A1:
+		return &_mesa_texformat_argb1555;
+	default:
+		_mesa_problem(NULL, "unexpected format in fxDDChooseTextureFormat");
+		return NULL;
+   }
+}
+
+//---------------------------------------------------------------------------
+
+/*
+// Safer(?), slower version.
+void gld_TexImage2D_DX7(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint internalFormat,
+	GLint width,
+	GLint height,
+	GLint border,
+	GLenum format,
+	GLenum type,
+	const GLvoid *pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	IDirect3DTexture8	*pTex;
+	IDirect3DSurface8	*pSurface;
+	RECT				rcSrcRect;
+	HRESULT				hr;
+	GLint				texelBytes = 4;
+	GLvoid				*tempImage;
+
+	if (!tObj || !texImage)
+		return;
+
+	if (level == 0) {
+		_gldAllocateTexture(ctx, tObj, texImage);
+	}
+
+	pTex = (IDirect3DTexture8*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= IDirect3DTexture8_GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = IDirect3DTexture8_GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	tempImage = MALLOC(width * height * texelBytes);
+	if (!tempImage) {
+		_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+		IDirect3DSurface8_Release(pSurface);
+		return;
+	}
+	// unpack image, apply transfer ops and store in tempImage
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		&_mesa_texformat_argb8888, // dest format
+		tempImage,
+		width, height, 1, 0, 0, 0,
+		width * texelBytes,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	SetRect(&rcSrcRect, 0, 0, width, height);
+	D3DXLoadSurfaceFromMemory(
+		pSurface,
+		NULL,
+		NULL,
+		tempImage,
+		D3DFMT_A8R8G8B8,
+		width * texelBytes,
+		NULL,
+		&rcSrcRect,
+		D3DX_FILTER_NONE,
+		0);
+
+	FREE(tempImage);
+	IDirect3DSurface8_Release(pSurface);
+}
+*/
+
+//---------------------------------------------------------------------------
+
+// Faster, more efficient version.
+// Copies subimage straight to dest texture
+void gld_TexImage2D_DX7(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint internalFormat,
+	GLint width,
+	GLint height,
+	GLint border,
+	GLenum format,
+	GLenum type,
+	const GLvoid *pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	IDirectDrawSurface7	*pTex = NULL;
+	IDirectDrawSurface7	*pSurface = NULL;
+	HRESULT				hr;
+	DDSURFACEDESC2		ddsd;
+	int					i;
+	DDSCAPS2			ddsCaps;
+
+	if (!tObj || !texImage)
+		return;
+
+	// GLQUAKE FIX
+	// Test for input alpha data with non-alpha internalformat
+	if (((internalFormat==3) || (internalFormat==GL_RGB)) && (format==GL_RGBA)) {
+		// Input format has alpha, but a non-alpha format has been requested.
+		texImage->IntFormat = GL_RGBA;
+		internalFormat = GL_RGBA;
+	}
+
+	if (level == 0) {
+		_gldAllocateTexture(ctx, tObj, texImage);
+	}
+
+	pTex = (IDirectDrawSurface7*)tObj->DriverData;
+	if (!pTex) {
+		ASSERT(0);
+		return; // Texture has not been created
+	}
+
+	pSurface = pTex;
+	if (level != 0) {
+		ddsd.dwSize = sizeof(ddsd);
+		_GLD_DX7_TEX(GetSurfaceDesc(pTex, &ddsd));
+		if ((level > 0) && (level >= ddsd.dwMipMapCount))
+			return; // Level does not exist
+		ZeroMemory(&ddsCaps, sizeof(ddsCaps));
+		for (i=0; i<level; i++) {
+			ddsCaps.dwCaps = DDSCAPS_TEXTURE | DDSCAPS_MIPMAP;
+			hr = IDirectDrawSurface7_GetAttachedSurface(
+				pSurface,
+				&ddsCaps,
+				&pSurface);
+			if (SUCCEEDED(hr)) {
+				IDirectDrawSurface7_Release(pSurface);
+			} else {
+				;
+			}
+		}
+	}
+
+	// Lock all of surface 
+	ddsd.dwSize = sizeof(ddsd);
+	hr = IDirectDrawSurface7_Lock(pSurface, NULL, &ddsd, 0, 0);
+	if (FAILED(hr)) {
+		IDirectDrawSurface7_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(
+		ctx,
+		2,
+		texImage->Format,
+		//_gldMesaFormatForD3DFormat(d3dsd.Format),
+		_gldMesaFormatForD3DFormat(_gldD3DXFormatFromSurface(pSurface)),
+		ddsd.lpSurface,
+		width, height, 1, 0, 0, 0,
+		ddsd.lPitch,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	IDirectDrawSurface7_Unlock(pSurface, NULL);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_TexImage1D_DX7(GLcontext *ctx, GLenum target, GLint level,
+                       GLint internalFormat,
+                       GLint width, GLint border,
+                       GLenum format, GLenum type, const GLvoid *pixels,
+                       const struct gl_pixelstore_attrib *packing,
+                       struct gl_texture_object *texObj,
+                       struct gl_texture_image *texImage )
+{
+	// A 1D texture is a 2D texture with a height of zero
+	gld_TexImage2D_DX7(ctx, target, level, internalFormat, width, 1, border, format, type, pixels, packing, texObj, texImage);
+}
+
+//---------------------------------------------------------------------------
+
+/*
+void gld_TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLint yoffset,
+                          GLsizei width, GLsizei height,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *tObj,
+                          struct gl_texture_image *texImage )
+{
+	GLD_GET_CONTEXT
+	IDirect3DTexture8	*pTex;
+	IDirect3DSurface8	*pSurface;
+	D3DFORMAT			d3dFormat;
+	HRESULT				hr;
+	GLint				texelBytes = 4;
+	GLvoid				*tempImage;
+	RECT				rcSrcRect;
+	RECT				rcDstRect;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirect3DTexture8*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= _GLD_DX8_TEX(GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = _GLD_DX8_TEX(GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	d3dFormat = _gldGLFormatToD3DFormat(texImage->Format);
+	tempImage = MALLOC(width * height * texelBytes);
+	if (!tempImage) {
+		_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+		IDirect3DSurface8_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store in tempImage
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		&_mesa_texformat_argb8888, // dest format
+		tempImage,
+		width, height, 1, 0, 0, 0,
+		width * texelBytes,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	// Source rectangle is whole of input image
+	SetRect(&rcSrcRect, 0, 0, width, height);
+
+	// Dest rectangle must be offset to dest image
+	SetRect(&rcDstRect, 0, 0, width, height);
+	OffsetRect(&rcDstRect, xoffset, yoffset);
+
+	D3DXLoadSurfaceFromMemory(
+		pSurface,
+		NULL,
+		&rcDstRect,
+		tempImage,
+		D3DFMT_A8R8G8B8,
+		width * texelBytes,
+		NULL,
+		&rcSrcRect,
+		D3DX_FILTER_NONE,
+		0);
+
+	FREE(tempImage);
+	IDirect3DSurface8_Release(pSurface);
+}
+*/
+
+//---------------------------------------------------------------------------
+
+// Faster, more efficient version.
+// Copies subimage straight to dest texture
+void gld_TexSubImage2D_DX7( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLint yoffset,
+                          GLsizei width, GLsizei height,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *tObj,
+                          struct gl_texture_image *texImage )
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	IDirectDrawSurface7	*pTex;
+	IDirectDrawSurface7	*pSurface;
+	HRESULT				hr;
+	RECT				rcDstRect;
+	DDSURFACEDESC2		ddsd;
+	int					i;
+	DDSCAPS2			ddsCaps;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirectDrawSurface7*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+
+	__try {
+
+	ddsd.dwSize = sizeof(ddsd);
+	_GLD_DX7_TEX(GetSurfaceDesc(pTex, &ddsd));
+	if ((level > 0) && (level >= ddsd.dwMipMapCount))
+		return; // Level does not exist
+
+	ZeroMemory(&ddsCaps, sizeof(ddsCaps));
+	pSurface = pTex;
+	for (i=0; i<level; i++) {
+		ddsCaps.dwCaps = DDSCAPS_TEXTURE | DDSCAPS_MIPMAP;
+		hr = IDirectDrawSurface7_GetAttachedSurface(
+			pSurface,
+			&ddsCaps,
+			&pSurface);
+		if(SUCCEEDED(hr)) {
+			IDirectDrawSurface7_Release(pSurface);
+		} else {
+			return;
+		}
+	}
+
+	// Dest rectangle must be offset to dest image
+	SetRect(&rcDstRect, 0, 0, width, height);
+	OffsetRect(&rcDstRect, xoffset, yoffset);
+
+	// Lock sub-rect of surface 
+	hr = IDirectDrawSurface7_Lock(pSurface, &rcDstRect, &ddsd, 0, 0);
+	if (FAILED(hr)) {
+		IDirectDrawSurface7_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		_gldMesaFormatForD3DFormat(_gldD3DXFormatFromSurface(pSurface)),
+		ddsd.lpSurface,
+		width, height, 1,
+		0, 0, 0, // NOTE: d3dLockedRect.pBits is already offset!!!
+		ddsd.lPitch,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+
+	IDirectDrawSurface7_Unlock(pSurface, &rcDstRect);
+	}
+	__except(EXCEPTION_EXECUTE_HANDLER) {
+		;
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_TexSubImage1D_DX7( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLsizei width,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage )
+{
+	gld_TexSubImage2D_DX7(ctx, target, level, xoffset, 0, width, 1, format, type, pixels, packing, texObj, texImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_DeleteTexture_DX7(
+	GLcontext *ctx,
+	struct gl_texture_object *tObj)
+{
+	GLD_context *gld = (GLD_context*)(ctx->DriverCtx);
+
+	__try {
+
+	if (tObj) {
+		IDirectDrawSurface7 *pTex = (IDirectDrawSurface7*)tObj->DriverData;
+		if (pTex) {
+/*			// Make sure texture is not bound to a stage before releasing it
+			for (int i=0; i<MAX_TEXTURE_UNITS; i++) {
+				if (gld->CurrentTexture[i] == pTex) {
+					gld->pDev->SetTexture(i, NULL);
+					gld->CurrentTexture[i] = NULL;
+				}
+			}*/
+			_GLD_DX7_TEX(Release(pTex));
+			tObj->DriverData = NULL;
+		}
+	}
+
+	}
+	__except(EXCEPTION_EXECUTE_HANDLER) {
+		;
+	}
+}
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetColorOps(
+	const GLD_driver_dx7 *gld,
+	GLuint unit,
+	DWORD ColorArg1,
+	D3DTEXTUREOP ColorOp,
+	DWORD ColorArg2)
+{
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG1, ColorArg1));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLOROP, ColorOp));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG2, ColorArg2));
+}
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetAlphaOps(
+	const GLD_driver_dx7 *gld,
+	GLuint unit,
+	DWORD AlphaArg1,
+	D3DTEXTUREOP AlphaOp,
+	DWORD AlphaArg2)
+{
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAARG1, AlphaArg1));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAOP, AlphaOp));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAARG2, AlphaArg2));
+}
+
+//---------------------------------------------------------------------------
+
+void gldUpdateTextureUnit(
+	GLcontext *ctx,
+	GLuint unit,
+	BOOL bPassThrough)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	D3DTEXTUREMINFILTER	minfilter;
+	D3DTEXTUREMIPFILTER	mipfilter;
+	GLenum				BaseFormat;
+	DWORD				dwColorArg0;
+	int					iTexEnv = 0;
+	GLD_texenv			*pTexenv;
+
+	// NOTE: If bPassThrough is FALSE then texture stage can be
+	// disabled otherwise it must pass-through it's current fragment.
+
+	const struct gl_texture_unit *pUnit = &ctx->Texture.Unit[unit];
+	const struct gl_texture_object *tObj = pUnit->_Current;
+
+	IDirectDrawSurface7 *pTex = NULL;
+	if (tObj) {
+		pTex = (IDirectDrawSurface7*)tObj->DriverData;
+	}
+
+	__try {
+
+	// Enable texturing if unit is enabled and a valid D3D texture exists
+	// Mesa 5: TEXTUREn_x altered to TEXTURE_nD_BIT
+	//if (pTex && (pUnit->Enabled & (TEXTURE0_1D | TEXTURE0_2D))) {
+	if (pTex && (pUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT))) {
+		// Enable texturing
+		_GLD_DX7_DEV(SetTexture(gld->pDev, unit, pTex));
+	} else {
+		// Disable texturing, then return
+		_GLD_DX7_DEV(SetTexture(gld->pDev, unit, NULL));
+		if (bPassThrough) {
+			_gldSetColorOps(gld, unit, D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_DIFFUSE);
+			_gldSetAlphaOps(gld, unit, D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_DIFFUSE);
+		} else {
+			_gldSetColorOps(gld, unit, D3DTA_TEXTURE, D3DTOP_DISABLE, D3DTA_DIFFUSE);
+			_gldSetAlphaOps(gld, unit, D3DTA_TEXTURE, D3DTOP_DISABLE, D3DTA_DIFFUSE);
+		}
+		return;
+	}
+
+	// Texture parameters
+	_gldConvertMinFilter(tObj->MinFilter, &minfilter, &mipfilter);
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MINFILTER, minfilter));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MIPFILTER, mipfilter));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MAGFILTER, _gldConvertMagFilter(tObj->MagFilter)));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ADDRESSU, _gldConvertWrap(tObj->WrapS)));
+	_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ADDRESSV, _gldConvertWrap(tObj->WrapT)));
+
+	// Texture priority
+	_GLD_DX7_TEX(SetPriority(pTex, (DWORD)(tObj->Priority*65535.0f)));
+
+	// Texture environment
+	// TODO: Examine input texture for alpha and use specific alpha/non-alpha ops.
+	//       See Page 355 of the Red Book.
+	BaseFormat = _gldDecodeBaseFormat(pTex);
+
+	switch (BaseFormat) {
+	case GL_RGB:
+		iTexEnv = 0;
+		break;
+	case GL_RGBA:
+		iTexEnv = 1;
+		break;
+	case GL_ALPHA:
+		iTexEnv = 2;
+		break;
+	}
+
+	switch (pUnit->EnvMode) {
+	case GL_DECAL:
+		iTexEnv += 0;
+		break;
+	case GL_REPLACE:
+		iTexEnv += 3;
+		break;
+	case GL_MODULATE:
+		iTexEnv += 6;
+		break;
+	case GL_BLEND:
+		// Set blend colour
+		// Unsupported by DX7
+//		dwColorArg0 = D3DCOLOR_COLORVALUE(pUnit->EnvColor[0], pUnit->EnvColor[1], pUnit->EnvColor[2], pUnit->EnvColor[3]);
+//		_GLD_DX7_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG0, dwColorArg0));
+//		gldLogMessage(GLDLOG_WARN, "GL_BLEND\n");
+		iTexEnv += 9;
+		break;
+	case GL_ADD:
+		iTexEnv += 12;
+		break;
+	}
+	pTexenv = (GLD_texenv*)&gldTexEnv[iTexEnv];
+	_gldSetColorOps(gld, unit, pTexenv->ColorArg1, pTexenv->ColorOp, pTexenv->ColorArg2);
+	_gldSetAlphaOps(gld, unit, pTexenv->AlphaArg1, pTexenv->AlphaOp, pTexenv->AlphaArg2);
+
+	}
+	__except(EXCEPTION_EXECUTE_HANDLER) {
+		;
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_TEXTURE_DX7(
+	GLcontext *ctx)
+{
+	// TODO: Support for three (ATI Radeon) or more (nVidia GeForce3) texture units
+
+	BOOL bUnit0Enabled;
+	BOOL bUnit1Enabled;
+
+	if (!ctx)
+		return; // Sanity check
+
+	if (ctx->Const.MaxTextureUnits == 1) {
+		gldUpdateTextureUnit(ctx, 0, TRUE);
+		return;
+	}
+
+	//
+	// NOTE: THE FOLLOWING RELATES TO TWO TEXTURE UNITS, AND TWO ONLY!!
+	//
+
+	// Mesa 5: Texture Units altered
+	bUnit0Enabled = (ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) ? TRUE : FALSE;
+	bUnit1Enabled = (ctx->Texture.Unit[1]._ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) ? TRUE : FALSE;
+
+	// If Unit0 is disabled and Unit1 is enabled then we must pass-though
+	gldUpdateTextureUnit(ctx, 0, (!bUnit0Enabled && bUnit1Enabled) ? TRUE : FALSE);
+	// We can always disable the last texture unit
+	gldUpdateTextureUnit(ctx, 1, FALSE);
+
+#ifdef _DEBUG
+	{
+		// Find out whether device supports current renderstates
+		GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+		GLD_driver_dx7		*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+		DWORD dwPasses;
+		_GLD_DX7_DEV(ValidateDevice(gld->pDev, &dwPasses));
+#if 0
+		if (FAILED(hr)) {
+			gldLogError(GLDLOG_ERROR, "ValidateDevice failed", hr);
+		}
+#endif
+		if (dwPasses != 1) {
+			gldLogMessage(GLDLOG_ERROR, "ValidateDevice: Can't do in one pass\n");
+		}
+	}
+#endif
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_vb_d3d_render_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_vb_d3d_render_dx7.c
new file mode 100644
index 0000000000..c39775cad3
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_vb_d3d_render_dx7.c
@@ -0,0 +1,257 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect fastpath pipeline stage
+*
+****************************************************************************/
+
+//---------------------------------------------------------------------------
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx7.h"
+
+//---------------------------------------------------------------------------
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+// #include "mem.h"
+#include "mtypes.h"
+//#include "mmath.h"
+
+#include "math/m_matrix.h"
+#include "math/m_xform.h"
+
+#include "tnl/t_pipeline.h"
+
+//---------------------------------------------------------------------------
+/*
+__inline void _gldSetVertexShaderConstants(
+	GLcontext *ctx,
+	GLD_driver_dx8 *gld)
+{
+	D3DXMATRIX mat, matView, matProj;
+	GLfloat		*pM;
+
+	// Mesa 5: Altered to a Stack
+	//pM = ctx->ModelView.m;
+	pM = ctx->ModelviewMatrixStack.Top->m;
+	matView._11 = pM[0];
+	matView._12 = pM[1];
+	matView._13 = pM[2];
+	matView._14 = pM[3];
+	matView._21 = pM[4];
+	matView._22 = pM[5];
+	matView._23 = pM[6];
+	matView._24 = pM[7];
+	matView._31 = pM[8];
+	matView._32 = pM[9];
+	matView._33 = pM[10];
+	matView._34 = pM[11];
+	matView._41 = pM[12];
+	matView._42 = pM[13];
+	matView._43 = pM[14];
+	matView._44 = pM[15];
+
+	// Mesa 5: Altered to a Stack
+	//pM = ctx->ProjectionMatrix.m;
+	pM = ctx->ProjectionMatrixStack.Top->m;
+	matProj._11 = pM[0];
+	matProj._12 = pM[1];
+	matProj._13 = pM[2];
+	matProj._14 = pM[3];
+	matProj._21 = pM[4];
+	matProj._22 = pM[5];
+	matProj._23 = pM[6];
+	matProj._24 = pM[7];
+	matProj._31 = pM[8];
+	matProj._32 = pM[9];
+	matProj._33 = pM[10];
+	matProj._34 = pM[11];
+	matProj._41 = pM[12];
+	matProj._42 = pM[13];
+	matProj._43 = pM[14];
+	matProj._44 = pM[15];
+
+	D3DXMatrixMultiply( &mat, &matView, &matProj );
+	D3DXMatrixTranspose( &mat, &mat );
+
+	_GLD_DX8_DEV(SetVertexShaderConstant(gld->pDev, 0, &mat, 4));
+}
+*/
+//---------------------------------------------------------------------------
+
+static GLboolean gld_d3d_render_stage_run(
+	GLcontext *ctx,
+	struct tnl_pipeline_stage *stage)
+{
+	GLD_context				*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7			*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+
+	TNLcontext				*tnl;
+	struct vertex_buffer	*VB;
+	tnl_render_func				*tab;
+	GLint					pass;
+	GLD_pb_dx7				*gldPB = &gld->PB3d;
+	DWORD					dwFlags;
+
+/*
+	static int count = 0;
+	count++;
+	if (count != 2)
+		return GL_FALSE;
+*/
+	// The "check" function should disable this stage,
+	// but we'll test gld->bUseMesaTnL anyway.
+	if (gld->bUseMesaTnL) {
+		// Do nothing in this stage, but continue pipeline
+		return GL_TRUE;
+	}
+	
+	tnl = TNL_CONTEXT(ctx);
+	VB = &tnl->vb;
+	pass = 0;
+
+   tnl->Driver.Render.Start( ctx );
+
+#if 0
+   // For debugging: Useful to see if an app passes colour data in
+   // an unusual format.
+   switch (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->Type) {
+   case GL_FLOAT:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: GL_FLOAT\n");
+	   break;
+   case GL_UNSIGNED_BYTE:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: GL_UNSIGNED_BYTE\n");
+	   break;
+   default:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: *?*\n");
+	   break;
+   }
+#endif
+
+   tnl->Driver.Render.Points		= gld_Points3D_DX7;
+   if (ctx->_TriangleCaps & DD_FLATSHADE) {
+	   tnl->Driver.Render.Line		= gld_Line3DFlat_DX7;
+	   tnl->Driver.Render.Triangle	= gld_Triangle3DFlat_DX7;
+	   tnl->Driver.Render.Quad		= gld_Quad3DFlat_DX7;
+   } else {
+	   tnl->Driver.Render.Line		= gld_Line3DSmooth_DX7;
+	   tnl->Driver.Render.Triangle	= gld_Triangle3DSmooth_DX7;
+	   tnl->Driver.Render.Quad		= gld_Quad3DSmooth_DX7;
+   }
+
+//	_GLD_DX7_VB(Lock(gldPB->pVB, 0, 0, &gldPB->pPoints, D3DLOCK_DISCARD));
+	dwFlags = DDLOCK_DISCARDCONTENTS | DDLOCK_WAIT | DDLOCK_SURFACEMEMORYPTR | DDLOCK_WRITEONLY;
+	_GLD_DX7_VB(Lock(gldPB->pVB, dwFlags, &gldPB->pPoints, NULL));
+	gldPB->nPoints = gldPB->nLines = gldPB->nTriangles = 0;
+	// Allocate primitive pointers
+	// gldPB->pPoints is always first
+	gldPB->pLines		= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstLine);
+	gldPB->pTriangles	= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstTriangle);
+	
+	ASSERT(tnl->Driver.Render.BuildVertices);
+	ASSERT(tnl->Driver.Render.PrimitiveNotify);
+	ASSERT(tnl->Driver.Render.Points);
+	ASSERT(tnl->Driver.Render.Line);
+	ASSERT(tnl->Driver.Render.Triangle);
+	ASSERT(tnl->Driver.Render.Quad);
+	ASSERT(tnl->Driver.Render.ResetLineStipple);
+	ASSERT(tnl->Driver.Render.Interp);
+	ASSERT(tnl->Driver.Render.CopyPV);
+	ASSERT(tnl->Driver.Render.ClippedLine);
+	ASSERT(tnl->Driver.Render.ClippedPolygon);
+	ASSERT(tnl->Driver.Render.Finish);
+
+	tab = (VB->Elts ? tnl->Driver.Render.PrimTabElts : tnl->Driver.Render.PrimTabVerts);
+	
+	do {
+		GLuint i, length, flags = 0;
+		for (i = 0 ; !(flags & PRIM_END) ; i += length)
+		{
+			flags = VB->Primitive[i].mode;
+			length= VB->Primitive[i].count;
+			ASSERT(length || (flags & PRIM_END));
+			ASSERT((flags & PRIM_MODE_MASK) <= GL_POLYGON+1);
+			if (length)
+				tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+		}
+	} while (tnl->Driver.Render.Multipass &&
+		tnl->Driver.Render.Multipass( ctx, ++pass ));
+	
+	_GLD_DX7_VB(Unlock(gldPB->pVB));
+
+//	_GLD_DX7_DEV(SetStreamSource(gld->pDev, 0, gldPB->pVB, gldPB->dwStride));
+
+	_GLD_DX7_DEV(SetTransform(gld->pDev, D3DTRANSFORMSTATE_PROJECTION, &gld->matProjection));
+	_GLD_DX7_DEV(SetTransform(gld->pDev, D3DTRANSFORMSTATE_WORLD, &gld->matModelView));
+
+	if (gldPB->nPoints) {
+//		_GLD_DX7_DEV(DrawPrimitive(gld->pDev, D3DPT_POINTLIST, 0, gldPB->nPoints));
+		_GLD_DX7_DEV(DrawPrimitiveVB(gld->pDev, D3DPT_POINTLIST, gldPB->pVB, 0, gldPB->nPoints, 0));
+		gldPB->nPoints = 0;
+	}
+
+	if (gldPB->nLines) {
+//		_GLD_DX7_DEV(DrawPrimitive(gld->pDev, D3DPT_LINELIST, gldPB->iFirstLine, gldPB->nLines));
+		_GLD_DX7_DEV(DrawPrimitiveVB(gld->pDev, D3DPT_LINELIST, gldPB->pVB, gldPB->iFirstLine, gldPB->nLines, 0));
+		gldPB->nLines = 0;
+	}
+
+	if (gldPB->nTriangles) {
+//		_GLD_DX7_DEV(DrawPrimitive(gld->pDev, D3DPT_TRIANGLELIST, gldPB->iFirstTriangle, gldPB->nTriangles));
+		_GLD_DX7_DEV(DrawPrimitiveVB(gld->pDev, D3DPT_TRIANGLELIST, gldPB->pVB, gldPB->iFirstTriangle, gldPB->nTriangles, 0));
+		gldPB->nTriangles = 0;
+	}
+
+	return GL_FALSE;		/* finished the pipe */
+}
+
+
+//---------------------------------------------------------------------------
+
+const struct tnl_pipeline_stage _gld_d3d_render_stage =
+{
+   "gld_d3d_render_stage",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   gld_d3d_render_stage_run			/* run */
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_vb_mesa_render_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_vb_mesa_render_dx7.c
new file mode 100644
index 0000000000..72e5e1308c
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_vb_mesa_render_dx7.c
@@ -0,0 +1,422 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ */
+
+
+/*
+ * Render whole vertex buffers, including projection of vertices from
+ * clip space and clipping of primitives.
+ *
+ * This file makes calls to project vertices and to the point, line
+ * and triangle rasterizers via the function pointers:
+ *
+ *    context->Driver.Render.*
+ *
+ */
+
+
+//---------------------------------------------------------------------------
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx7.h"
+
+//---------------------------------------------------------------------------
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+// #include "mem.h"
+#include "mtypes.h"
+//#include "mmath.h"
+
+#include "math/m_matrix.h"
+#include "math/m_xform.h"
+
+#include "tnl/t_pipeline.h"
+
+/**********************************************************************/
+/*                        Clip single primitives                      */
+/**********************************************************************/
+
+
+#if defined(USE_IEEE)
+#define NEGATIVE(x) (GET_FLOAT_BITS(x) & (1<<31))
+//#define DIFFERENT_SIGNS(x,y) ((GET_FLOAT_BITS(x) ^ GET_FLOAT_BITS(y)) & (1<<31))
+#else
+#define NEGATIVE(x) (x < 0)
+//#define DIFFERENT_SIGNS(x,y) (x * y <= 0 && x - y != 0)
+/* Could just use (x*y<0) except for the flatshading requirements.
+ * Maybe there's a better way?
+ */
+#endif
+
+
+#define W(i) coord[i][3]
+#define Z(i) coord[i][2]
+#define Y(i) coord[i][1]
+#define X(i) coord[i][0]
+#define SIZE 4
+#define TAG(x) x##_4
+#include "tnl/t_vb_cliptmp.h"
+
+
+
+/**********************************************************************/
+/*              Clip and render whole begin/end objects               */
+/**********************************************************************/
+
+#define NEED_EDGEFLAG_SETUP (ctx->_TriangleCaps & DD_TRI_UNFILLED)
+#define EDGEFLAG_GET(idx) VB->EdgeFlag[idx]
+#define EDGEFLAG_SET(idx, val) VB->EdgeFlag[idx] = val
+
+
+/* Vertices, with the possibility of clipping.
+ */
+#define RENDER_POINTS( start, count ) \
+   tnl->Driver.Render.Points( ctx, start, count )
+
+#define RENDER_LINE( v1, v2 )			\
+do {						\
+   GLubyte c1 = mask[v1], c2 = mask[v2];	\
+   GLubyte ormask = c1|c2;			\
+   if (!ormask)					\
+      LineFunc( ctx, v1, v2 );			\
+   else if (!(c1 & c2 & 0x3f))			\
+      clip_line_4( ctx, v1, v2, ormask );	\
+} while (0)
+
+#define RENDER_TRI( v1, v2, v3 )			\
+do {							\
+   GLubyte c1 = mask[v1], c2 = mask[v2], c3 = mask[v3];	\
+   GLubyte ormask = c1|c2|c3;				\
+   if (!ormask)						\
+      TriangleFunc( ctx, v1, v2, v3 );			\
+   else if (!(c1 & c2 & c3 & 0x3f)) 			\
+      clip_tri_4( ctx, v1, v2, v3, ormask );    	\
+} while (0)
+
+#define RENDER_QUAD( v1, v2, v3, v4 )			\
+do {							\
+   GLubyte c1 = mask[v1], c2 = mask[v2];		\
+   GLubyte c3 = mask[v3], c4 = mask[v4];		\
+   GLubyte ormask = c1|c2|c3|c4;			\
+   if (!ormask)						\
+      QuadFunc( ctx, v1, v2, v3, v4 );			\
+   else if (!(c1 & c2 & c3 & c4 & 0x3f)) 		\
+      clip_quad_4( ctx, v1, v2, v3, v4, ormask );	\
+} while (0)
+
+
+#define LOCAL_VARS						\
+   TNLcontext *tnl = TNL_CONTEXT(ctx);				\
+   struct vertex_buffer *VB = &tnl->vb;				\
+   const GLuint * const elt = VB->Elts;				\
+   const GLubyte *mask = VB->ClipMask;				\
+   const GLuint sz = VB->ClipPtr->size;				\
+   const tnl_line_func LineFunc = tnl->Driver.Render.Line;		\
+   const tnl_triangle_func TriangleFunc = tnl->Driver.Render.Triangle;	\
+   const tnl_quad_func QuadFunc = tnl->Driver.Render.Quad;		\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) (LineFunc && TriangleFunc && QuadFunc);		\
+   (void) elt; (void) mask; (void) sz; (void) stipple;
+
+#define TAG(x) clip_##x##_verts
+#define INIT(x) tnl->Driver.Render.PrimitiveNotify( ctx, x )
+#define RESET_STIPPLE if (stipple) tnl->Driver.Render.ResetLineStipple( ctx )
+#define PRESERVE_VB_DEFS
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/* Elts, with the possibility of clipping.
+ */
+#undef ELT
+#undef TAG
+#define ELT(x) elt[x]
+#define TAG(x) clip_##x##_elts
+#include "tnl/t_vb_rendertmp.h"
+
+/* TODO: do this for all primitives, verts and elts:
+ */
+static void clip_elt_triangles( GLcontext *ctx,
+				GLuint start,
+				GLuint count,
+				GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl_render_func render_tris = tnl->Driver.Render.PrimTabElts[GL_TRIANGLES];
+   struct vertex_buffer *VB = &tnl->vb;
+   const GLuint * const elt = VB->Elts;
+   GLubyte *mask = VB->ClipMask;
+   GLuint last = count-2;
+   GLuint j;
+   (void) flags;
+
+   tnl->Driver.Render.PrimitiveNotify( ctx, GL_TRIANGLES );
+
+   for (j=start; j < last; j+=3 ) {
+      GLubyte c1 = mask[elt[j]];
+      GLubyte c2 = mask[elt[j+1]];
+      GLubyte c3 = mask[elt[j+2]];
+      GLubyte ormask = c1|c2|c3;
+      if (ormask) {
+	 if (start < j)
+	    render_tris( ctx, start, j, 0 );
+	 if (!(c1&c2&c3&0x3f))
+	    clip_tri_4( ctx, elt[j], elt[j+1], elt[j+2], ormask );
+	 start = j+3;
+      }
+   }
+
+   if (start < j)
+      render_tris( ctx, start, j, 0 );
+}
+
+/**********************************************************************/
+/*                  Render whole begin/end objects                    */
+/**********************************************************************/
+
+#define NEED_EDGEFLAG_SETUP (ctx->_TriangleCaps & DD_TRI_UNFILLED)
+#define EDGEFLAG_GET(idx) VB->EdgeFlag[idx]
+#define EDGEFLAG_SET(idx, val) VB->EdgeFlag[idx] = val
+
+
+/* Vertices, no clipping.
+ */
+#define RENDER_POINTS( start, count ) \
+   tnl->Driver.Render.Points( ctx, start, count )
+
+#define RENDER_LINE( v1, v2 ) \
+   LineFunc( ctx, v1, v2 )
+
+#define RENDER_TRI( v1, v2, v3 ) \
+   TriangleFunc( ctx, v1, v2, v3 )
+
+#define RENDER_QUAD( v1, v2, v3, v4 ) \
+   QuadFunc( ctx, v1, v2, v3, v4 )
+
+#define TAG(x) _gld_tnl_##x##_verts
+
+#define LOCAL_VARS						\
+   TNLcontext *tnl = TNL_CONTEXT(ctx);				\
+   struct vertex_buffer *VB = &tnl->vb;				\
+   const GLuint * const elt = VB->Elts;				\
+   const tnl_line_func LineFunc = tnl->Driver.Render.Line;		\
+   const tnl_triangle_func TriangleFunc = tnl->Driver.Render.Triangle;	\
+   const tnl_quad_func QuadFunc = tnl->Driver.Render.Quad;		\
+   (void) (LineFunc && TriangleFunc && QuadFunc);		\
+   (void) elt;
+
+#define RESET_STIPPLE tnl->Driver.Render.ResetLineStipple( ctx )
+#define INIT(x) tnl->Driver.Render.PrimitiveNotify( ctx, x )
+#define RENDER_TAB_QUALIFIER
+#define PRESERVE_VB_DEFS
+#include "tnl/t_vb_rendertmp.h"
+
+
+/* Elts, no clipping.
+ */
+#undef ELT
+#define TAG(x) _gld_tnl_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*              Helper functions for drivers                  */
+/**********************************************************************/
+/*
+void _tnl_RenderClippedPolygon( GLcontext *ctx, const GLuint *elts, GLuint n )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint *tmp = VB->Elts;
+
+   VB->Elts = (GLuint *)elts;
+   tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+   VB->Elts = tmp;
+}
+
+void _tnl_RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+*/
+
+
+/**********************************************************************/
+/*              Clip and render whole vertex buffers                  */
+/**********************************************************************/
+
+tnl_points_func _gldSetupPoints[4] = {
+	gld_Points2D_DX7,
+	gld_Points2D_DX7,
+	gld_Points2D_DX7,
+	gld_Points2D_DX7
+};
+tnl_line_func _gldSetupLine[4] = {
+	gld_Line2DFlat_DX7,
+	gld_Line2DSmooth_DX7,
+	gld_Line2DFlat_DX7,
+	gld_Line2DSmooth_DX7,
+};
+tnl_triangle_func _gldSetupTriangle[4] = {
+	gld_Triangle2DFlat_DX7,
+	gld_Triangle2DSmooth_DX7,
+	gld_Triangle2DFlatExtras_DX7,
+	gld_Triangle2DSmoothExtras_DX7
+};
+tnl_quad_func _gldSetupQuad[4] = {
+	gld_Quad2DFlat_DX7,
+	gld_Quad2DSmooth_DX7,
+	gld_Quad2DFlatExtras_DX7,
+	gld_Quad2DSmoothExtras_DX7
+};
+
+//---------------------------------------------------------------------------
+
+static GLboolean _gld_mesa_render_stage_run(
+	GLcontext *ctx,
+	struct tnl_pipeline_stage *stage)
+{
+	GLD_context				*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx7			*gld	= GLD_GET_DX7_DRIVER(gldCtx);
+		
+	TNLcontext				*tnl = TNL_CONTEXT(ctx);
+	struct vertex_buffer	*VB = &tnl->vb;
+	tnl_render_func				*tab;
+	GLint					pass = 0;
+	GLD_pb_dx7				*gldPB;
+	DWORD					dwFlags;
+
+	/* Allow the drivers to lock before projected verts are built so
+    * that window coordinates are guarenteed not to change before
+    * rendering.
+    */
+	ASSERT(tnl->Driver.Render.Start);
+	
+	tnl->Driver.Render.Start( ctx );
+	
+	gldPB = &gld->PB2d;
+	tnl->Driver.Render.Points	= _gldSetupPoints[gld->iSetupFunc];
+	tnl->Driver.Render.Line		= _gldSetupLine[gld->iSetupFunc];
+	tnl->Driver.Render.Triangle	= _gldSetupTriangle[gld->iSetupFunc];
+	tnl->Driver.Render.Quad		= _gldSetupQuad[gld->iSetupFunc];
+
+	dwFlags = DDLOCK_DISCARDCONTENTS | DDLOCK_WAIT | DDLOCK_SURFACEMEMORYPTR | DDLOCK_WRITEONLY;
+	_GLD_DX7_VB(Lock(gldPB->pVB, dwFlags, &gldPB->pPoints, NULL));
+	gldPB->nPoints = gldPB->nLines = gldPB->nTriangles = 0;
+
+	// Allocate primitive pointers - gldPB->pPoints is always first
+	gldPB->pLines		= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstLine);
+	gldPB->pTriangles	= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstTriangle);
+
+	ASSERT(tnl->Driver.Render.BuildVertices);
+	ASSERT(tnl->Driver.Render.PrimitiveNotify);
+	ASSERT(tnl->Driver.Render.Points);
+	ASSERT(tnl->Driver.Render.Line);
+	ASSERT(tnl->Driver.Render.Triangle);
+	ASSERT(tnl->Driver.Render.Quad);
+	ASSERT(tnl->Driver.Render.ResetLineStipple);
+	ASSERT(tnl->Driver.Render.Interp);
+	ASSERT(tnl->Driver.Render.CopyPV);
+	ASSERT(tnl->Driver.Render.ClippedLine);
+	ASSERT(tnl->Driver.Render.ClippedPolygon);
+	ASSERT(tnl->Driver.Render.Finish);
+	
+	tnl->Driver.Render.BuildVertices( ctx, 0, VB->Count, ~0 );
+	
+	if (VB->ClipOrMask) {
+		tab = VB->Elts ? clip_render_tab_elts : clip_render_tab_verts;
+		clip_render_tab_elts[GL_TRIANGLES] = clip_elt_triangles;
+	}
+	else {
+		tab = (VB->Elts ? 
+			tnl->Driver.Render.PrimTabElts : 
+		tnl->Driver.Render.PrimTabVerts);
+	}
+	
+	do {
+		GLuint i, length, flags = 0;
+		for (i = 0 ; !(flags & PRIM_END) ; i += length) {
+			flags = VB->Primitive[i].mode;
+			length= VB->Primitive[i].count;
+			ASSERT(length || (flags & PRIM_END));
+			ASSERT((flags & PRIM_MODE_MASK) <= GL_POLYGON+1);
+			if (length)
+				tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+		}
+	} while (tnl->Driver.Render.Multipass &&
+		tnl->Driver.Render.Multipass( ctx, ++pass ));
+	
+	
+//	tnl->Driver.Render.Finish( ctx );
+	
+	_GLD_DX7_VB(Unlock(gldPB->pVB));
+
+	if (gldPB->nPoints) {
+		_GLD_DX7_DEV(DrawPrimitiveVB(gld->pDev, D3DPT_POINTLIST, gldPB->pVB, 0, gldPB->nPoints, 0));
+		gldPB->nPoints = 0;
+	}
+
+	if (gldPB->nLines) {
+		_GLD_DX7_DEV(DrawPrimitiveVB(gld->pDev, D3DPT_LINELIST, gldPB->pVB, gldPB->iFirstLine, gldPB->nLines*2, 0));
+		gldPB->nLines = 0;
+	}
+
+	if (gldPB->nTriangles) {
+		_GLD_DX7_DEV(DrawPrimitiveVB(gld->pDev, D3DPT_TRIANGLELIST, gldPB->pVB, gldPB->iFirstTriangle, gldPB->nTriangles*3, 0));
+		gldPB->nTriangles = 0;
+	}
+
+	return GL_FALSE;		/* finished the pipe */
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+
+const struct tnl_pipeline_stage _gld_mesa_render_stage =
+{
+   "gld_mesa_render_stage",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   _gld_mesa_render_stage_run	/* run */
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_wgl_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_wgl_dx7.c
new file mode 100644
index 0000000000..fa44a952a0
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx7/gld_wgl_dx7.c
@@ -0,0 +1,1613 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect Direct3D 8.x WGL (WindowsGL)
+*
+****************************************************************************/
+
+#include "dglcontext.h"
+#include "gld_driver.h"
+//#include "gld_dxerr8.h"
+#include "gld_dx7.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+// Copied from dglcontect.c
+#define GLDERR_NONE     0
+#define GLDERR_MEM      1
+#define GLDERR_DDRAW    2
+#define GLDERR_D3D      3
+#define GLDERR_BPP      4
+#define GLDERR_DDS      5
+// This external var keeps track of any error
+extern int nContextError;
+
+// Uncomment this for persistant resources
+//#define _GLD_PERSISTANT
+
+#define DDLOG_CRITICAL_OR_WARN	DDLOG_CRITICAL
+
+extern void _gld_mesa_warning(GLcontext *, char *);
+extern void _gld_mesa_fatal(GLcontext *, char *);
+
+//---------------------------------------------------------------------------
+
+static char	szColorDepthWarning[] =
+"GLDirect does not support the current desktop\n\
+color depth.\n\n\
+You may need to change the display resolution to\n\
+16 bits per pixel or higher color depth using\n\
+the Windows Display Settings control panel\n\
+before running this OpenGL application.\n";
+
+// The only depth-stencil formats currently supported by Direct3D
+// Surface Format	Depth	Stencil		Total Bits
+// D3DFMT_D32		32		-			32
+// D3DFMT_D15S1		15		1			16
+// D3DFMT_D24S8		24		8			32
+// D3DFMT_D16		16		-			16
+// D3DFMT_D24X8		24		-			32
+// D3DFMT_D24X4S4	24		4			32
+
+// This pixel format will be used as a template when compiling the list
+// of pixel formats supported by the hardware. Many fields will be
+// filled in at runtime.
+// PFD flag defaults are upgraded to match ChoosePixelFormat() -- DaveM
+static DGL_pixelFormat pfTemplateHW =
+{
+    {
+	sizeof(PIXELFORMATDESCRIPTOR),	// Size of the data structure
+		1,							// Structure version - should be 1
+									// Flags:
+		PFD_DRAW_TO_WINDOW |		// The buffer can draw to a window or device surface.
+		PFD_DRAW_TO_BITMAP |		// The buffer can draw to a bitmap. (DaveM)
+		PFD_SUPPORT_GDI |			// The buffer supports GDI drawing. (DaveM)
+		PFD_SUPPORT_OPENGL |		// The buffer supports OpenGL drawing.
+		PFD_DOUBLEBUFFER |			// The buffer is double-buffered.
+		0,							// Placeholder for easy commenting of above flags
+		PFD_TYPE_RGBA,				// Pixel type RGBA.
+		16,							// Total colour bitplanes (excluding alpha bitplanes)
+		5, 0,						// Red bits, shift
+		5, 0,						// Green bits, shift
+		5, 0,						// Blue bits, shift
+		0, 0,						// Alpha bits, shift (destination alpha)
+		0,							// Accumulator bits (total)
+		0, 0, 0, 0,					// Accumulator bits: Red, Green, Blue, Alpha
+		0,							// Depth bits
+		0,							// Stencil bits
+		0,							// Number of auxiliary buffers
+		0,							// Layer type
+		0,							// Specifies the number of overlay and underlay planes.
+		0,							// Layer mask
+		0,							// Specifies the transparent color or index of an underlay plane.
+		0							// Damage mask
+	},
+	D3DX_SF_UNKNOWN,	// No depth/stencil buffer
+};
+
+//---------------------------------------------------------------------------
+// Vertex Shaders
+//---------------------------------------------------------------------------
+/*
+// Vertex Shader Declaration
+static DWORD dwTwoSidedLightingDecl[] =
+{
+	D3DVSD_STREAM(0),
+	D3DVSD_REG(0,  D3DVSDT_FLOAT3), 	 // XYZ position
+	D3DVSD_REG(1,  D3DVSDT_FLOAT3), 	 // XYZ normal
+	D3DVSD_REG(2,  D3DVSDT_D3DCOLOR),	 // Diffuse color
+	D3DVSD_REG(3,  D3DVSDT_D3DCOLOR),	 // Specular color
+	D3DVSD_REG(4,  D3DVSDT_FLOAT2), 	 // 2D texture unit 0
+	D3DVSD_REG(5,  D3DVSDT_FLOAT2), 	 // 2D texture unit 1
+	D3DVSD_END()
+};
+
+// Vertex Shader for two-sided lighting
+static char *szTwoSidedLightingVS =
+// This is a test shader!
+"vs.1.0\n"
+"m4x4 oPos,v0,c0\n"
+"mov oD0,v2\n"
+"mov oD1,v3\n"
+"mov oT0,v4\n"
+"mov oT1,v5\n"
+;
+*/
+//---------------------------------------------------------------------------
+//---------------------------------------------------------------------------
+
+typedef struct {
+//	HINSTANCE			hD3D8DLL;			// Handle to d3d8.dll
+//	FNDIRECT3DCREATE7	fnDirect3DCreate7;	// Direct3DCreate8 function prototype
+//	BOOL				bDirect3D;			// Persistant Direct3D7 exists
+//	BOOL				bDirect3DDevice;	// Persistant Direct3DDevice7 exists
+//	IDirect3D7			*pD3D;				// Persistant Direct3D7
+//	IDirect3DDevice7	*pDev;				// Persistant Direct3DDevice7
+	BOOL				bD3DXStarted;
+} GLD_dx7_globals;
+
+// These are "global" to all DX7 contexts. KeithH
+static GLD_dx7_globals dx7Globals;
+
+// Added for correct clipping of multiple open windows. (DaveM)
+LPDIRECTDRAWSURFACE7 lpDDSPrimary = NULL;
+LPDIRECTDRAWCLIPPER lpDDClipper = NULL;
+
+//---------------------------------------------------------------------------
+//---------------------------------------------------------------------------
+
+BOOL gldGetDXErrorString_DX(
+	HRESULT hr,
+	char *buf,
+	int nBufSize)
+{
+	//
+	// Return a string describing the input HRESULT error code
+	//
+
+	D3DXGetErrorString(hr, nBufSize, buf);
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+//
+// DX7 does not support multisample
+/*
+static D3DMULTISAMPLE_TYPE _gldGetDeviceMultiSampleType(
+	IDirect3D8 *pD3D8,
+	D3DFORMAT SurfaceFormat,
+	D3DDEVTYPE d3dDevType,
+	BOOL Windowed)
+{
+	int			i;
+	HRESULT		hr;
+
+	if (glb.dwMultisample == GLDS_MULTISAMPLE_NONE)
+		return D3DMULTISAMPLE_NONE;
+
+	if (glb.dwMultisample == GLDS_MULTISAMPLE_FASTEST) {
+		// Find fastest multisample
+		for (i=2; i<17; i++) {
+			hr = IDirect3D8_CheckDeviceMultiSampleType(
+					pD3D8,
+					glb.dwAdapter,
+					d3dDevType,
+					SurfaceFormat,
+					Windowed,
+					(D3DMULTISAMPLE_TYPE)i);
+			if (SUCCEEDED(hr)) {
+				return (D3DMULTISAMPLE_TYPE)i;
+			}
+		}
+	} else {
+		// Find nicest multisample
+		for (i=16; i>1; i--) {
+			hr = IDirect3D8_CheckDeviceMultiSampleType(
+					pD3D8,
+					glb.dwAdapter,
+					d3dDevType,
+					SurfaceFormat,
+					Windowed,
+					(D3DMULTISAMPLE_TYPE)i);
+			if (SUCCEEDED(hr)) {
+				return (D3DMULTISAMPLE_TYPE)i;
+			}
+		}
+	}
+
+	// Nothing found - return default
+	return D3DMULTISAMPLE_NONE;
+}
+*/
+//---------------------------------------------------------------------------
+
+void _gldDestroyPrimitiveBuffer(
+	GLD_pb_dx7 *gldVB)
+{
+	SAFE_RELEASE(gldVB->pVB);
+
+	// Sanity check...
+	gldVB->nLines = gldVB->nPoints = gldVB->nTriangles = 0;
+}
+
+//---------------------------------------------------------------------------
+
+HRESULT _gldCreatePrimitiveBuffer(
+	GLcontext *ctx,
+	GLD_driver_dx7 *lpCtx,
+	GLD_pb_dx7 *gldVB)
+{
+	HRESULT				hResult;
+	char				*szCreateVertexBufferFailed = "CreateVertexBuffer failed";
+	DWORD				dwMaxVertices;	// Max number of vertices in vertex buffer
+	DWORD				dwVBSize;		// Total size of vertex buffer
+	D3DVERTEXBUFFERDESC	vbdesc;
+
+	// If CVA (Compiled Vertex Array) is used by an OpenGL app, then we
+	// will need enough vertices to cater for Mesa::Const.MaxArrayLockSize.
+	// We'll use IMM_SIZE if it's larger (which it should not be).
+	dwMaxVertices = MAX_ARRAY_LOCK_SIZE;
+
+    // Max vertex buffer size limited in DX7. (DaveM)
+    if (dwMaxVertices*9 > D3DMAXNUMVERTICES)
+        dwMaxVertices = D3DMAXNUMVERTICES/9;
+
+	// Now calculate how many vertices to allow for in total
+	// 1 per point, 2 per line, 6 per quad = 9
+	dwVBSize = dwMaxVertices * 9 * gldVB->dwStride;
+
+	vbdesc.dwSize			= sizeof(vbdesc);
+	vbdesc.dwCaps			= gldVB->dwCreateFlags;
+	vbdesc.dwFVF			= gldVB->dwFVF;
+	vbdesc.dwNumVertices	= dwMaxVertices * 9;
+
+/*	hResult = IDirect3DDevice8_CreateVertexBuffer(
+		lpCtx->pDev,
+		dwVBSize,
+RAgldVB->dwUsage,
+		gldVB->dwFVF,
+		gldVB->dwPool,
+		&gldVB->pVB);*/
+	hResult = IDirect3D7_CreateVertexBuffer(
+		lpCtx->pD3D,
+		&vbdesc,
+		&gldVB->pVB,
+		0);
+	if (FAILED(hResult)) {
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, szCreateVertexBufferFailed);
+		return hResult;
+	}
+
+	gldVB->nLines = gldVB->nPoints = gldVB->nTriangles = 0;
+	gldVB->pPoints	= gldVB->pLines = gldVB->pTriangles = NULL;
+	gldVB->iFirstLine = dwMaxVertices; // Index of first line in VB
+	gldVB->iFirstTriangle = dwMaxVertices*3; // Index of first triangle in VB
+
+	return S_OK;
+}
+
+//---------------------------------------------------------------------------
+// Function: _gldCreateVertexShaders
+// Create DX8 Vertex Shaders.
+//---------------------------------------------------------------------------
+/*
+void _gldCreateVertexShaders(
+	GLD_driver_dx8 *gld)
+{
+	DWORD			dwFlags;
+	LPD3DXBUFFER	pVSOpcodeBuffer; // Vertex Shader opcode buffer
+	HRESULT			hr;
+
+#ifdef _DEBUG
+	dwFlags = D3DXASM_DEBUG;
+#else
+	dwFlags = 0; // D3DXASM_SKIPVALIDATION;
+#endif
+
+	ddlogMessage(DDLOG_INFO, "Creating shaders...\n");
+
+	// Init the shader handle
+	gld->VStwosidelight.hShader = 0;
+
+	if (gld->d3dCaps8.MaxStreams == 0) {
+		// Lame DX8 driver doesn't support streams
+		// Not fatal, as defaults will be used
+		ddlogMessage(DDLOG_WARN, "Driver doesn't support Vertex Shaders (MaxStreams==0)\n");
+		return;
+	}
+
+	// ** THIS DISABLES VERTEX SHADER SUPPORT **
+//	return;
+	// ** THIS DISABLES VERTEX SHADER SUPPORT **
+
+	//
+	// Two-sided lighting
+	//
+
+#if 0
+	//
+	// DEBUGGING: Load shader from a text file
+	//
+	{
+	LPD3DXBUFFER	pVSErrorBuffer; // Vertex Shader error buffer
+	hr = D3DXAssembleShaderFromFile(
+			"twoside.vsh",
+			dwFlags,
+			NULL, // No constants
+			&pVSOpcodeBuffer,
+			&pVSErrorBuffer);
+	if (pVSErrorBuffer && pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer))
+		ddlogMessage(DDLOG_INFO, pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer));
+	SAFE_RELEASE(pVSErrorBuffer);
+	}
+#else
+	{
+	LPD3DXBUFFER	pVSErrorBuffer; // Vertex Shader error buffer
+	// Assemble ascii shader text into shader opcodes
+	hr = D3DXAssembleShader(
+			szTwoSidedLightingVS,
+			strlen(szTwoSidedLightingVS),
+			dwFlags,
+			NULL, // No constants
+			&pVSOpcodeBuffer,
+			&pVSErrorBuffer);
+	if (pVSErrorBuffer && pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer))
+		ddlogMessage(DDLOG_INFO, pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer));
+	SAFE_RELEASE(pVSErrorBuffer);
+	}
+#endif
+	if (FAILED(hr)) {
+		ddlogError(DDLOG_WARN, "AssembleShader failed", hr);
+		SAFE_RELEASE(pVSOpcodeBuffer);
+		return;
+	}
+
+// This is for debugging. Remove to enable vertex shaders in HW
+#define _GLD_FORCE_SW_VS 0
+
+	if (_GLD_FORCE_SW_VS) {
+		// _GLD_FORCE_SW_VS should be disabled for Final Release
+		ddlogMessage(DDLOG_SYSTEM, "[Forcing shaders in SW]\n");
+	}
+
+	// Try and create shader in hardware.
+	// NOTE: The D3D Ref device appears to succeed when trying to
+	//       create the device in hardware, but later complains
+	//       when trying to set it with SetVertexShader(). Go figure.
+	if (_GLD_FORCE_SW_VS || glb.dwDriver == GLDS_DRIVER_REF) {
+		// Don't try and create a hardware shader with the Ref device
+		hr = E_FAIL; // COM error/fail result
+	} else {
+		gld->VStwosidelight.bHardware = TRUE;
+		hr = IDirect3DDevice8_CreateVertexShader(
+			gld->pDev,
+			dwTwoSidedLightingDecl,
+			pVSOpcodeBuffer->lpVtbl->GetBufferPointer(pVSOpcodeBuffer),
+			&gld->VStwosidelight.hShader,
+			0);
+	}
+	if (FAILED(hr)) {
+		ddlogMessage(DDLOG_INFO, "... HW failed, trying SW...\n");
+		// Failed. Try and create shader for software processing
+		hr = IDirect3DDevice8_CreateVertexShader(
+			gld->pDev,
+			dwTwoSidedLightingDecl,
+			pVSOpcodeBuffer->lpVtbl->GetBufferPointer(pVSOpcodeBuffer),
+			&gld->VStwosidelight.hShader,
+			D3DUSAGE_SOFTWAREPROCESSING);
+		if (FAILED(hr)) {
+			gld->VStwosidelight.hShader = 0; // Sanity check
+			ddlogError(DDLOG_WARN, "CreateVertexShader failed", hr);
+			return;
+		}
+		// Succeeded, but for software processing
+		gld->VStwosidelight.bHardware = FALSE;
+	}
+
+	SAFE_RELEASE(pVSOpcodeBuffer);
+
+	ddlogMessage(DDLOG_INFO, "... OK\n");
+}
+
+//---------------------------------------------------------------------------
+
+void _gldDestroyVertexShaders(
+	GLD_driver_dx8 *gld)
+{
+	if (gld->VStwosidelight.hShader) {
+		IDirect3DDevice8_DeleteVertexShader(gld->pDev, gld->VStwosidelight.hShader);
+		gld->VStwosidelight.hShader = 0;
+	}
+}
+*/
+//---------------------------------------------------------------------------
+
+BOOL gldCreateDrawable_DX(
+	DGL_ctx *ctx,
+//	BOOL bDefaultDriver,
+	BOOL bDirectDrawPersistant,
+	BOOL bPersistantBuffers)
+{
+	//
+	// bDirectDrawPersistant:	applies to IDirect3D8
+	// bPersistantBuffers:		applies to IDirect3DDevice8
+	//
+
+//	D3DDEVTYPE				d3dDevType;
+//	D3DPRESENT_PARAMETERS	d3dpp;
+//	D3DDISPLAYMODE			d3ddm;
+//	DWORD					dwBehaviourFlags;
+//	D3DADAPTER_IDENTIFIER8	d3dIdent;
+
+	HRESULT				hr;
+	GLD_driver_dx7		*lpCtx = NULL;
+	D3DX_VIDMODEDESC	d3ddm;
+
+	// Parameters for D3DXCreateContextEx
+	// These will be different for fullscreen and windowed
+	DWORD				dwDeviceIndex;
+	DWORD				dwFlags;
+	HWND				hwnd;
+	HWND				hwndFocus;
+	DWORD				numColorBits;
+	DWORD				numAlphaBits;
+	DWORD				numDepthBits;
+	DWORD				numStencilBits;
+	DWORD				numBackBuffers;
+	DWORD				dwWidth;
+	DWORD				dwHeight;
+	DWORD				refreshRate;
+
+	// Error if context is NULL.
+	if (ctx == NULL)
+		return FALSE;
+
+	if (ctx->glPriv) {
+		lpCtx = ctx->glPriv;
+		// Release any existing interfaces (in reverse order)
+		SAFE_RELEASE(lpCtx->pDev);
+		SAFE_RELEASE(lpCtx->pD3D);
+		lpCtx->pD3DXContext->lpVtbl->Release(lpCtx->pD3DXContext);
+		lpCtx->pD3DXContext = NULL;
+	} else {
+		lpCtx = (GLD_driver_dx7*)malloc(sizeof(GLD_driver_dx7));
+		ZeroMemory(lpCtx, sizeof(lpCtx));
+	}
+
+//	d3dDevType = (glb.dwDriver == GLDS_DRIVER_HAL) ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF;
+	// Use REF device if requested. Otherwise D3DX_DEFAULT will choose highest level
+	// of HW acceleration.
+	dwDeviceIndex = (glb.dwDriver == GLDS_DRIVER_REF) ? D3DX_HWLEVEL_REFERENCE : D3DX_DEFAULT;
+
+	// TODO: Check this
+//	if (bDefaultDriver)
+//		d3dDevType = D3DDEVTYPE_REF;
+
+#ifdef _GLD_PERSISTANT
+	// Use persistant interface if needed
+	if (bDirectDrawPersistant && dx7Globals.bDirect3D) {
+		lpCtx->pD3D = dx7Globals.pD3D;
+		IDirect3D7_AddRef(lpCtx->pD3D);
+		goto SkipDirectDrawCreate;
+	}
+#endif
+/*
+	// Create Direct3D7 object
+	lpCtx->pD3D = dx7Globals.fnDirect3DCreate8(D3D_SDK_VERSION_DX8_SUPPORT_WIN95);
+	if (lpCtx->pD3D == NULL) {
+		MessageBox(NULL, "Unable to initialize Direct3D8", "GLDirect", MB_OK);
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, "Unable to create Direct3D8 interface");
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+*/
+
+#ifdef _GLD_PERSISTANT
+	// Cache Direct3D interface for subsequent GLRCs
+	if (bDirectDrawPersistant && !dx8Globals.bDirect3D) {
+		dx7Globals.pD3D = lpCtx->pD3D;
+		IDirect3D7_AddRef(dx7Globals.pD3D);
+		dx7Globals.bDirect3D = TRUE;
+	}
+SkipDirectDrawCreate:
+#endif
+/*
+	// Get the display mode so we can make a compatible backbuffer
+	hResult = IDirect3D8_GetAdapterDisplayMode(lpCtx->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hResult)) {
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+*/
+
+#if 0
+	// Get device caps
+	hResult = IDirect3D8_GetDeviceCaps(lpCtx->pD3D, glb.dwAdapter, d3dDevType, &lpCtx->d3dCaps8);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "IDirect3D8_GetDeviceCaps failed", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Check for hardware transform & lighting
+	lpCtx->bHasHWTnL = lpCtx->d3dCaps8.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT ? TRUE : FALSE;
+
+	// If this flag is present then we can't default to Mesa
+	// SW rendering between BeginScene() and EndScene().
+	if (lpCtx->d3dCaps8.Caps2 & D3DCAPS2_NO2DDURING3DSCENE) {
+		ddlogMessage(DDLOG_WARN,
+			"Warning          : No 2D allowed during 3D scene.\n");
+	}
+#endif
+
+	//
+	//	Create the Direct3D context
+	//
+
+#ifdef _GLD_PERSISTANT
+	// Re-use original IDirect3DDevice if persistant buffers exist.
+	// Note that we test for persistant IDirect3D8 as well
+	// bDirectDrawPersistant == persistant IDirect3D8 (DirectDraw8 does not exist)
+	if (bDirectDrawPersistant && bPersistantBuffers && dx7Globals.pD3D && dx7Globals.pDev) {
+		lpCtx->pDev = dx7Globals.pDev;
+		IDirect3DDevice7_AddRef(dx7Globals.pDev);
+		goto skip_direct3ddevice_create;
+	}
+#endif
+/*
+	// Clear the presentation parameters (sets all members to zero)
+	ZeroMemory(&d3dpp, sizeof(d3dpp));
+
+	// Recommended by MS; needed for MultiSample.
+	// Be careful if altering this for FullScreenBlit
+	d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
+
+	d3dpp.BackBufferFormat	= d3ddm.Format;
+	d3dpp.BackBufferCount	= 1;
+	d3dpp.MultiSampleType	= _gldGetDeviceMultiSampleType(lpCtx->pD3D, d3ddm.Format, d3dDevType, !ctx->bFullscreen);
+	d3dpp.AutoDepthStencilFormat	= ctx->lpPF->dwDriverData;
+	d3dpp.EnableAutoDepthStencil	= (d3dpp.AutoDepthStencilFormat == D3DFMT_UNKNOWN) ? FALSE : TRUE;
+
+	if (ctx->bFullscreen) {
+		ddlogWarnOption(FALSE); // Don't popup any messages in fullscreen 
+		d3dpp.Windowed							= FALSE;
+		d3dpp.BackBufferWidth					= d3ddm.Width;
+		d3dpp.BackBufferHeight					= d3ddm.Height;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= D3DPRESENT_RATE_DEFAULT;
+
+		// Support for vertical retrace synchronisation.
+		// Set default presentation interval in case caps bits are missing
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+		if (glb.bWaitForRetrace) {
+			if (lpCtx->d3dCaps8.PresentationIntervals & D3DPRESENT_INTERVAL_ONE)
+				d3dpp.FullScreen_PresentationInterval = D3DPRESENT_INTERVAL_ONE;
+		} else {
+			if (lpCtx->d3dCaps8.PresentationIntervals & D3DPRESENT_INTERVAL_IMMEDIATE)
+				d3dpp.FullScreen_PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
+		}
+	} else {
+		ddlogWarnOption(glb.bMessageBoxWarnings); // OK to popup messages
+		d3dpp.Windowed							= TRUE;
+		d3dpp.BackBufferWidth					= ctx->dwWidth;
+		d3dpp.BackBufferHeight					= ctx->dwHeight;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= 0;
+		// FullScreen_PresentationInterval must be default for Windowed mode
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+	}
+
+	// Decide if we can use hardware TnL
+	dwBehaviourFlags = (lpCtx->bHasHWTnL) ?
+		D3DCREATE_MIXED_VERTEXPROCESSING : D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+	// Add flag to tell D3D to be thread-safe
+	if (glb.bMultiThreaded)
+		dwBehaviourFlags |= D3DCREATE_MULTITHREADED;
+	hResult = IDirect3D8_CreateDevice(lpCtx->pD3D,
+								glb.dwAdapter,
+								d3dDevType,
+								ctx->hWnd,
+								dwBehaviourFlags,
+								&d3dpp,
+								&lpCtx->pDev);
+    if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "IDirect3D8_CreateDevice failed", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+*/
+
+	// Create D3DX context
+	if (ctx->bFullscreen) {
+		//
+		// FULLSCREEN
+		//
+
+		// Get display mode
+		D3DXGetCurrentVideoMode(D3DX_DEFAULT, &d3ddm);
+
+		// Fullscreen Parameters
+		dwFlags			= D3DX_CONTEXT_FULLSCREEN;
+		hwnd			= ctx->hWnd;
+		hwndFocus		= ctx->hWnd;
+		numColorBits	= ctx->lpPF->pfd.cColorBits;
+		numAlphaBits	= ctx->lpPF->pfd.cAlphaBits;
+		numDepthBits	= ctx->lpPF->pfd.cDepthBits + ctx->lpPF->pfd.cStencilBits;
+		numStencilBits	= ctx->lpPF->pfd.cStencilBits;
+		numBackBuffers	= D3DX_DEFAULT; // Default is 1 backbuffer
+		dwWidth			= d3ddm.width;
+		dwHeight		= d3ddm.height;
+		refreshRate		= d3ddm.refreshRate; // D3DX_DEFAULT;
+	} else {
+		//
+		// WINDOWED
+		//
+
+		// Windowed Parameters
+		dwFlags			= 0; // No flags means "windowed"
+		hwnd			= ctx->hWnd;
+		hwndFocus		= (HWND)D3DX_DEFAULT;
+		numColorBits	= D3DX_DEFAULT; // Use Desktop depth
+		numAlphaBits	= ctx->lpPF->pfd.cAlphaBits;
+		numDepthBits	= ctx->lpPF->pfd.cDepthBits + ctx->lpPF->pfd.cStencilBits;
+		numStencilBits	= ctx->lpPF->pfd.cStencilBits;
+		numBackBuffers	= D3DX_DEFAULT; // Default is 1 backbuffer
+		dwWidth			= ctx->dwWidth;
+		dwHeight		= ctx->dwHeight;
+		refreshRate		= D3DX_DEFAULT;
+	}
+	hr = D3DXCreateContextEx(dwDeviceIndex, dwFlags, hwnd, hwndFocus,
+							numColorBits, numAlphaBits, numDepthBits, numStencilBits,
+							numBackBuffers,
+							dwWidth, dwHeight, refreshRate,
+							&lpCtx->pD3DXContext);
+    if (FAILED(hr)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "D3DXCreateContextEx failed", hr);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Obtain D3D7 interfaces from ID3DXContext
+//	lpCtx->pDD	= ID3DXContext_GetDD(lpCtx->pD3DXContext);
+	lpCtx->pDD	= lpCtx->pD3DXContext->lpVtbl->GetDD(lpCtx->pD3DXContext);
+	if (lpCtx->pDD == NULL)
+		goto return_with_error;
+	lpCtx->pD3D	= lpCtx->pD3DXContext->lpVtbl->GetD3D(lpCtx->pD3DXContext);
+	if (lpCtx->pD3D == NULL)
+		goto return_with_error;
+	lpCtx->pDev	= lpCtx->pD3DXContext->lpVtbl->GetD3DDevice(lpCtx->pD3DXContext);
+	if (lpCtx->pDev == NULL)
+		goto return_with_error;
+
+    // Need to manage clipper manually for multiple windows
+    // since DX7 D3DX utility lib does not appear to do that. (DaveM)
+    if (!ctx->bFullscreen) {
+        // Get primary surface too
+        lpDDSPrimary = lpCtx->pD3DXContext->lpVtbl->GetPrimary(lpCtx->pD3DXContext);
+	    if (lpDDSPrimary == NULL) {
+		    ddlogPrintf(DDLOG_WARN, "GetPrimary");
+            goto return_with_error;
+	    }
+	    // Create clipper for correct window updates
+        if (IDirectDraw7_CreateClipper(lpCtx->pDD, 0, &lpDDClipper, NULL) != DD_OK) {
+		    ddlogPrintf(DDLOG_WARN, "CreateClipper");
+		    goto return_with_error;
+	    }
+        // Set the window that the clipper belongs to
+        if (IDirectDrawClipper_SetHWnd(lpDDClipper, 0, hwnd) != DD_OK) {
+		    ddlogPrintf(DDLOG_WARN, "SetHWnd");
+		    goto return_with_error;
+	    }
+        // Attach the clipper to the primary surface
+        if (IDirectDrawSurface7_SetClipper(lpDDSPrimary, lpDDClipper) != DD_OK) {
+		    ddlogPrintf(DDLOG_WARN, "SetClipper");
+            goto return_with_error;
+	    }
+    }
+
+	// Get device caps
+	IDirect3DDevice7_GetCaps(lpCtx->pDev, &lpCtx->d3dCaps);
+
+	// Determine HW TnL
+	lpCtx->bHasHWTnL = lpCtx->d3dCaps.dwDevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT ? TRUE : FALSE;
+
+#ifdef _GLD_PERSISTANT
+	if (bDirectDrawPersistant && bPersistantBuffers && dx7Globals.pD3D) {
+		dx7Globals.pDev = lpCtx->pDev;
+		dx7Globals.bDirect3DDevice = TRUE;
+	}
+#endif
+
+#if 0
+	// Dump some useful stats
+	hResult = IDirect3D8_GetAdapterIdentifier(
+		lpCtx->pD3D,
+		glb.dwAdapter,
+		D3DENUM_NO_WHQL_LEVEL, // Avoids 1 to 2 second delay
+		&d3dIdent);
+	if (SUCCEEDED(hResult)) {
+		ddlogPrintf(DDLOG_INFO, "[Driver Description: %s]", &d3dIdent.Description);
+		ddlogPrintf(DDLOG_INFO, "[Driver file: %s %d.%d.%02d.%d]",
+			d3dIdent.Driver,
+			HIWORD(d3dIdent.DriverVersion.HighPart),
+			LOWORD(d3dIdent.DriverVersion.HighPart),
+			HIWORD(d3dIdent.DriverVersion.LowPart),
+			LOWORD(d3dIdent.DriverVersion.LowPart));
+		ddlogPrintf(DDLOG_INFO, "[VendorId: 0x%X, DeviceId: 0x%X, SubSysId: 0x%X, Revision: 0x%X]",
+			d3dIdent.VendorId, d3dIdent.DeviceId, d3dIdent.SubSysId, d3dIdent.Revision);
+	}
+#endif
+
+	// Init projection matrix for D3D TnL
+	D3DXMatrixIdentity((D3DXMATRIX*)&lpCtx->matProjection);
+	lpCtx->matModelView = lpCtx->matProjection;
+//		gld->bUseMesaProjection = TRUE;
+
+skip_direct3ddevice_create:
+
+	// Create buffers to hold primitives
+	lpCtx->PB2d.dwFVF			= GLD_FVF_2D_VERTEX;
+//	lpCtx->PB2d.dwPool			= D3DPOOL_SYSTEMMEM;
+	lpCtx->PB2d.dwStride		= sizeof(GLD_2D_VERTEX);
+	lpCtx->PB2d.dwCreateFlags	= D3DVBCAPS_DONOTCLIP |
+									D3DVBCAPS_SYSTEMMEMORY |
+									D3DVBCAPS_WRITEONLY;
+	hr = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PB2d);
+	if (FAILED(hr))
+		goto return_with_error;
+
+	lpCtx->PB3d.dwFVF			= GLD_FVF_3D_VERTEX;
+//	lpCtx->PB3d.dwPool			= D3DPOOL_DEFAULT;
+	lpCtx->PB3d.dwStride		= sizeof(GLD_3D_VERTEX);
+	lpCtx->PB3d.dwCreateFlags	= D3DVBCAPS_WRITEONLY;
+
+	hr = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PB3d);
+	if (FAILED(hr))
+		goto return_with_error;
+
+	// Zero the pipeline usage counters
+	lpCtx->PipelineUsage.qwMesa.QuadPart = 
+//	lpCtx->PipelineUsage.dwD3D2SVS.QuadPart =
+	lpCtx->PipelineUsage.qwD3DFVF.QuadPart = 0;
+
+	// Assign drawable to GL private
+	ctx->glPriv = lpCtx;
+	return TRUE;
+
+return_with_error:
+	// Clean up and bail
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB3d);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB2d);
+
+	SAFE_RELEASE(lpCtx->pDev);
+	SAFE_RELEASE(lpCtx->pD3D);
+	//SAFE_RELEASE(lpCtx->pD3DXContext);
+	lpCtx->pD3DXContext->lpVtbl->Release(lpCtx->pD3DXContext);
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldResizeDrawable_DX(
+	DGL_ctx *ctx,
+	BOOL bDefaultDriver,
+	BOOL bPersistantInterface,
+	BOOL bPersistantBuffers)
+{
+	GLD_driver_dx7			*gld = NULL;
+//	D3DDEVTYPE				d3dDevType;
+//	D3DPRESENT_PARAMETERS	d3dpp;
+//	D3DDISPLAYMODE			d3ddm;
+	D3DX_VIDMODEDESC		d3ddm;
+	HRESULT					hr;
+	DWORD					dwWidth, dwHeight;
+
+	// Error if context is NULL.
+	if (ctx == NULL)
+		return FALSE;
+
+	gld = ctx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (ctx->bSceneStarted) {
+		IDirect3DDevice7_EndScene(gld->pDev);
+		ctx->bSceneStarted = FALSE;
+	}
+/*
+	d3dDevType = (glb.dwDriver == GLDS_DRIVER_HAL) ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF;
+	if (!bDefaultDriver)
+		d3dDevType = D3DDEVTYPE_REF; // Force Direct3D Reference Rasterise (software)
+
+	// Get the display mode so we can make a compatible backbuffer
+	hResult = IDirect3D8_GetAdapterDisplayMode(gld->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hResult)) {
+        nContextError = GLDERR_D3D;
+//		goto return_with_error;
+		return FALSE;
+	}
+*/
+	// Release objects before Reset()
+	_gldDestroyPrimitiveBuffer(&gld->PB3d);
+	_gldDestroyPrimitiveBuffer(&gld->PB2d);
+
+/*
+	// Clear the presentation parameters (sets all members to zero)
+	ZeroMemory(&d3dpp, sizeof(d3dpp));
+
+	// Recommended by MS; needed for MultiSample.
+	// Be careful if altering this for FullScreenBlit
+	d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
+
+	d3dpp.BackBufferFormat	= d3ddm.Format;
+	d3dpp.BackBufferCount	= 1;
+	d3dpp.MultiSampleType	= _gldGetDeviceMultiSampleType(gld->pD3D, d3ddm.Format, d3dDevType, !ctx->bFullscreen);
+	d3dpp.AutoDepthStencilFormat	= ctx->lpPF->dwDriverData;
+	d3dpp.EnableAutoDepthStencil	= (d3dpp.AutoDepthStencilFormat == D3DFMT_UNKNOWN) ? FALSE : TRUE;
+
+	// TODO: Sync to refresh
+
+	if (ctx->bFullscreen) {
+		ddlogWarnOption(FALSE); // Don't popup any messages in fullscreen 
+		d3dpp.Windowed							= FALSE;
+		d3dpp.BackBufferWidth					= d3ddm.Width;
+		d3dpp.BackBufferHeight					= d3ddm.Height;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= D3DPRESENT_RATE_DEFAULT;
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+		// Get better benchmark results? KeithH
+//		d3dpp.FullScreen_RefreshRateInHz		= D3DPRESENT_RATE_UNLIMITED;
+	} else {
+		ddlogWarnOption(glb.bMessageBoxWarnings); // OK to popup messages
+		d3dpp.Windowed							= TRUE;
+		d3dpp.BackBufferWidth					= ctx->dwWidth;
+		d3dpp.BackBufferHeight					= ctx->dwHeight;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= 0;
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+	}
+	hResult = IDirect3DDevice8_Reset(gld->pDev, &d3dpp);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: Reset failed", hResult);
+		return FALSE;
+		//goto cleanup_and_return_with_error;
+	}
+*/
+	// Obtain dimensions of 'window'
+	if (ctx->bFullscreen) {
+		D3DXGetCurrentVideoMode(D3DX_DEFAULT, &d3ddm);
+		dwWidth = d3ddm.width;
+		dwHeight = d3ddm.height;
+	} else {
+		dwWidth = ctx->dwWidth;
+		dwHeight = ctx->dwHeight;
+	}
+
+	// Resize context
+	hr = gld->pD3DXContext->lpVtbl->Resize(gld->pD3DXContext, dwWidth, dwHeight);
+	if (FAILED(hr)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "gldResizeDrawable_DX: Resize failed", hr);
+		return FALSE;
+	}
+
+	// Clear the resized surface (DaveM)
+	{
+		D3DVIEWPORT7 vp1, vp2;
+		IDirect3DDevice7_GetViewport(gld->pDev, &vp1);
+		IDirect3DDevice7_GetViewport(gld->pDev, &vp2);
+		vp2.dwX = 0;
+		vp2.dwY = 0;
+		vp2.dwWidth = dwWidth;
+		vp2.dwHeight = dwHeight;
+		IDirect3DDevice7_SetViewport(gld->pDev, &vp2);
+		hr = gld->pD3DXContext->lpVtbl->Clear(gld->pD3DXContext, D3DCLEAR_TARGET);
+		if (FAILED(hr))
+			ddlogError(DDLOG_WARN, "gldResizeDrawable_DX: Clear failed", hr);
+		IDirect3DDevice7_SetViewport(gld->pDev, &vp1);
+	}
+
+	//
+	// Recreate objects
+	//
+	_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB2d);
+	_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB3d);
+
+	// Signal a complete state update
+	ctx->glCtx->Driver.UpdateState(ctx->glCtx, _NEW_ALL);
+
+	// Begin a new scene
+	IDirect3DDevice7_BeginScene(gld->pDev);
+	ctx->bSceneStarted = TRUE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyDrawable_DX(
+	DGL_ctx *ctx)
+{
+	GLD_driver_dx7			*lpCtx = NULL;
+
+	// Error if context is NULL.
+	if (!ctx)
+		return FALSE;
+
+	// Error if the drawable does not exist.
+	if (!ctx->glPriv)
+		return FALSE;
+
+	lpCtx = ctx->glPriv;
+
+#ifdef _DEBUG
+	// Dump out stats
+	ddlogPrintf(DDLOG_SYSTEM, "Usage: M:0x%X%X, D:0x%X%X",
+		lpCtx->PipelineUsage.qwMesa.HighPart,
+		lpCtx->PipelineUsage.qwMesa.LowPart,
+		lpCtx->PipelineUsage.qwD3DFVF.HighPart,
+		lpCtx->PipelineUsage.qwD3DFVF.LowPart);
+#endif
+
+	// Destroy Primtive Buffers
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB3d);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB2d);
+
+	// Release DX interfaces (in reverse order)
+	SAFE_RELEASE(lpCtx->pDev);
+	SAFE_RELEASE(lpCtx->pD3D);
+	//SAFE_RELEASE(lpCtx->pD3DXContext);
+	lpCtx->pD3DXContext->lpVtbl->Release(lpCtx->pD3DXContext);
+
+	// Free the private drawable data
+	free(ctx->glPriv);
+	ctx->glPriv = NULL;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldCreatePrivateGlobals_DX(void)
+{
+/*
+	ZeroMemory(&dx7Globals, sizeof(dx7Globals));
+
+	// Load d3d8.dll
+	dx8Globals.hD3D8DLL = LoadLibrary("D3D8.DLL");
+	if (dx8Globals.hD3D8DLL == NULL)
+		return FALSE;
+
+	// Now try and obtain Direct3DCreate8
+	dx8Globals.fnDirect3DCreate8 = (FNDIRECT3DCREATE8)GetProcAddress(dx8Globals.hD3D8DLL, "Direct3DCreate8");
+	if (dx8Globals.fnDirect3DCreate8 == NULL) {
+		FreeLibrary(dx8Globals.hD3D8DLL);
+		return FALSE;
+	}
+*/
+	
+	// Initialise D3DX
+	return FAILED(D3DXInitialize()) ? FALSE : TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyPrivateGlobals_DX(void)
+{
+/*
+	if (dx7Globals.bDirect3DDevice) {
+		SAFE_RELEASE(dx7Globals.pDev);
+		dx7Globals.bDirect3DDevice = FALSE;
+	}
+	if (dx7Globals.bDirect3D) {
+		SAFE_RELEASE(dx7Globals.pD3D);
+		dx7Globals.bDirect3D = FALSE;
+	}
+
+	FreeLibrary(dx8Globals.hD3D8DLL);
+	dx8Globals.hD3D8DLL = NULL;
+	dx8Globals.fnDirect3DCreate8 = NULL;
+*/
+	return FAILED(D3DXUninitialize()) ? FALSE : TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+static void _BitsFromDisplayFormat(
+	D3DX_SURFACEFORMAT fmt,
+	BYTE *cColorBits,
+	BYTE *cRedBits,
+	BYTE *cGreenBits,
+	BYTE *cBlueBits,
+	BYTE *cAlphaBits)
+{
+	switch (fmt) {
+/*	case D3DX_SF_X1R5G5B5:
+		*cColorBits = 16;
+		*cRedBits = 5;
+		*cGreenBits = 5;
+		*cBlueBits = 5;
+		*cAlphaBits = 0;
+		return;*/
+	case D3DX_SF_R5G5B5:
+		*cColorBits = 16;
+		*cRedBits = 5;
+		*cGreenBits = 5;
+		*cBlueBits = 5;
+		*cAlphaBits = 0;
+		return;
+	case D3DX_SF_R5G6B5:
+		*cColorBits = 16;
+		*cRedBits = 5;
+		*cGreenBits = 6;
+		*cBlueBits = 5;
+		*cAlphaBits = 0;
+		return;
+	case D3DX_SF_X8R8G8B8:
+		*cColorBits = 32;
+		*cRedBits = 8;
+		*cGreenBits = 8;
+		*cBlueBits = 8;
+		*cAlphaBits = 0;
+		return;
+	case D3DX_SF_A8R8G8B8:
+		*cColorBits = 32;
+		*cRedBits = 8;
+		*cGreenBits = 8;
+		*cBlueBits = 8;
+		*cAlphaBits = 8;
+		return;
+	}
+
+	// Should not get here!
+	*cColorBits = 32;
+	*cRedBits = 8;
+	*cGreenBits = 8;
+	*cBlueBits = 8;
+	*cAlphaBits = 0;
+}
+
+//---------------------------------------------------------------------------
+
+static void _BitsFromDepthStencilFormat(
+	D3DX_SURFACEFORMAT fmt,
+	BYTE *cDepthBits,
+	BYTE *cStencilBits)
+{
+	// NOTE: GL expects either 32 or 16 as depth bits.
+	switch (fmt) {
+	case D3DX_SF_Z16S0:
+		*cDepthBits = 16;
+		*cStencilBits = 0;
+		return;
+	case D3DX_SF_Z32S0:
+		*cDepthBits = 32;
+		*cStencilBits = 0;
+		return;
+	case D3DX_SF_Z15S1:
+		*cDepthBits = 15;
+		*cStencilBits = 1;
+		return;
+	case D3DX_SF_Z24S8:
+		*cDepthBits = 24;
+		*cStencilBits = 8;
+		return;
+	case D3DX_SF_S1Z15:
+		*cDepthBits = 15;
+		*cStencilBits = 1;
+		return;
+	case D3DX_SF_S8Z24:
+		*cDepthBits = 24;
+		*cStencilBits = 8;
+		return;
+	}
+}
+
+//---------------------------------------------------------------------------
+/*
+BOOL GLD_CheckDepthStencilMatch(
+	DWORD dwDeviceIndex,
+	D3DX_SURFACEFORMAT sfWant)
+{
+	// Emulate function built in to DX9
+	D3DX_SURFACEFORMAT	sfFound;
+	int i;
+	int nFormats = D3DXGetMaxSurfaceFormats(dwDeviceIndex, NULL, D3DX_SC_DEPTHBUFFER);
+	if (nFormats) {
+		for (i=0; i<nFormats; i++) {
+		D3DXGetSurfaceFormat(dwDeviceIndex, NULL, D3DX_SC_DEPTHBUFFER, i, &sfFound);		}
+		if (sfFound == sfWant)
+			return TRUE;
+	}
+
+	return FALSE;
+}
+*/
+//---------------------------------------------------------------------------
+
+D3DX_SURFACEFORMAT _gldFindCompatibleDepthStencilFormat(
+	DWORD dwDeviceIndex)
+{
+	// Jump through some hoops...
+
+	ID3DXContext		*pD3DXContext = NULL;
+	IDirectDrawSurface7	*pZBuffer = NULL;
+	DDPIXELFORMAT		ddpf;
+	HWND				hWnd;
+
+	// Get an HWND - use Desktop's
+	hWnd = GetDesktopWindow();
+
+	// Create a fully specified default context.
+	D3DXCreateContextEx(dwDeviceIndex, 0, hWnd, (HWND)D3DX_DEFAULT,
+						D3DX_DEFAULT, D3DX_DEFAULT, D3DX_DEFAULT, D3DX_DEFAULT,
+						D3DX_DEFAULT, D3DX_DEFAULT, D3DX_DEFAULT, D3DX_DEFAULT,
+						&pD3DXContext);
+
+	// Obtain depth buffer that was created in context
+	pZBuffer = pD3DXContext->lpVtbl->GetZBuffer(pD3DXContext);
+
+	// Get pixel format of depth buffer
+	ddpf.dwSize = sizeof(ddpf);
+	pZBuffer->lpVtbl->GetPixelFormat(pZBuffer, &ddpf);
+	// Done with surface - release it
+	pZBuffer->lpVtbl->Release(pZBuffer);
+
+	// Done with D3DX context
+	pD3DXContext->lpVtbl->Release(pD3DXContext);
+
+	// Convert and return
+	return D3DXMakeSurfaceFormat(&ddpf);
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldBuildPixelformatList_DX(void)
+{
+	D3DX_DEVICEDESC		d3dxdd;
+	D3DX_VIDMODEDESC	d3ddm;
+	D3DX_SURFACEFORMAT	fmt[64]; // 64 should be enough...
+	DWORD				dwDeviceIndex;
+	DWORD				surfClassFlags;
+//	IDirect3D7			*pD3D = NULL;
+	HRESULT				hr;
+	int					nSupportedFormats = 0;		// Total formats
+	int					nDepthOnlyFormats = 0;
+	int					nDepthStencilFormats = 0;
+	int					i;
+	DGL_pixelFormat		*pPF;
+	BYTE				cColorBits, cRedBits, cGreenBits, cBlueBits, cAlphaBits;
+//	char				buf[128];
+//	char				cat[8];
+
+	// Direct3D (SW or HW)
+	// These are arranged so that 'best' pixelformat
+	// is higher in the list (for ChoosePixelFormat).
+/*	const D3DFORMAT DepthStencil[4] = {
+		D3DX_SF_Z16S0, //D3DX_SF_D16,
+		D3DX_SF_Z15S1, //D3DX_SF_D15S1,
+		D3DX_SF_Z32S0, //D3DX_SF_D32,
+		D3DX_SF_Z24S8, //D3DX_SF_D24S8,
+		//D3DX_SF_D24X8,
+		//D3DX_SF_D24X4S4,
+	};*/
+
+	// Dump DX version
+	ddlogMessage(GLDLOG_SYSTEM, "DirectX Version  : 7.0\n");
+
+	// Release any existing pixelformat list
+	if (glb.lpPF) {
+		free(glb.lpPF);
+	}
+
+	glb.nPixelFormatCount	= 0;
+	glb.lpPF				= NULL;
+
+	//
+	// Pixelformats for Direct3D (SW or HW) rendering
+	//
+
+	dwDeviceIndex = (glb.dwDriver == GLDS_DRIVER_REF) ? D3DX_HWLEVEL_REFERENCE : D3DX_DEFAULT;
+
+	// Dump description
+	D3DXGetDeviceDescription(dwDeviceIndex, &d3dxdd);
+	ddlogPrintf(GLDLOG_SYSTEM, "Device: %s", d3dxdd.driverDesc);
+
+	// Get display mode
+	D3DXGetCurrentVideoMode(D3DX_DEFAULT, &d3ddm);
+
+#if 0
+	// Phooey - this don't work...
+/*
+	// Since D3DXGetMaxSurfaceFormats() can lie to us, we'll need a workaround.
+	// Explicitly test for matching depth/stencil to display bpp.
+	if (d3ddm.bpp <= 16) {
+		if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_Z16S0))
+			fmt[nSupportedFormats++] = D3DX_SF_Z16S0;
+		if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_Z15S1))
+			fmt[nSupportedFormats++] = D3DX_SF_Z15S1;
+		if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_S1Z15))
+			fmt[nSupportedFormats++] = D3DX_SF_S1Z15;
+		// Didn't find anything? Try default
+		if (nSupportedFormats == 0) {
+			if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_Z32S0))
+				fmt[nSupportedFormats++] = D3DX_SF_Z32S0;
+		}
+	} else {
+		if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_Z32S0))
+			fmt[nSupportedFormats++] = D3DX_SF_Z32S0;
+		if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_Z24S8))
+			fmt[nSupportedFormats++] = D3DX_SF_Z24S8;
+		if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_S8Z24))
+			fmt[nSupportedFormats++] = D3DX_SF_S8Z24;
+		// Didn't find anything? Try default
+		if (nSupportedFormats == 0) {
+			if (GLD_CheckDepthStencilMatch(dwDeviceIndex, D3DX_SF_Z16S0))
+				fmt[nSupportedFormats++] = D3DX_SF_Z16S0;
+		}
+	}
+*/
+	// Go the Whole Hog...
+	fmt[nSupportedFormats++] = _gldFindCompatibleDepthStencilFormat(dwDeviceIndex);
+#else
+	//
+	// Depth buffer formats WITHOUT stencil
+	//
+	surfClassFlags = D3DX_SC_DEPTHBUFFER;
+	nDepthOnlyFormats = D3DXGetMaxSurfaceFormats(dwDeviceIndex, NULL, surfClassFlags);
+	//
+	// Depth buffer formats WITH stencil
+	//
+	surfClassFlags = D3DX_SC_DEPTHBUFFER | D3DX_SC_STENCILBUFFER;
+	nDepthStencilFormats = D3DXGetMaxSurfaceFormats(dwDeviceIndex, NULL, surfClassFlags);
+
+	// Work out how many formats we have in total
+	if ((nDepthOnlyFormats + nDepthStencilFormats) == 0)
+		return FALSE; // Bail: no compliant pixelformats
+
+	// Get depth buffer formats WITHOUT stencil
+	surfClassFlags = D3DX_SC_DEPTHBUFFER;
+	for (i=0; i<nDepthOnlyFormats; i++) {
+		D3DXGetSurfaceFormat(dwDeviceIndex, NULL, surfClassFlags, i, &fmt[nSupportedFormats++]);
+	}
+	// NOTE: For some reason we already get stencil formats when only specifying D3DX_SC_DEPTHBUFFER
+	/*
+		// Get depth buffer formats WITH stencil
+		surfClassFlags = D3DX_SC_DEPTHBUFFER | D3DX_SC_STENCILBUFFER;
+		for (i=0; i<nDepthStencilFormats; i++) {
+			D3DXGetSurfaceFormat(dwDeviceIndex, NULL, surfClassFlags, i, &fmt[nSupportedFormats++]);
+		}
+	*/
+#endif
+
+	// Total count of pixelformats is:
+	// (nSupportedFormats+1)*2
+	glb.lpPF = (DGL_pixelFormat *)calloc((nSupportedFormats)*2, sizeof(DGL_pixelFormat));
+	glb.nPixelFormatCount = (nSupportedFormats)*2;
+	if (glb.lpPF == NULL) {
+		glb.nPixelFormatCount = 0;
+		return FALSE;
+	}
+
+	// Get a copy of pointer that we can alter
+	pPF = glb.lpPF;
+
+	// Cache colour bits from display format
+//	_BitsFromDisplayFormat(d3ddm.Format, &cColorBits, &cRedBits, &cGreenBits, &cBlueBits, &cAlphaBits);
+	// Get display mode
+	D3DXGetCurrentVideoMode(D3DX_DEFAULT, &d3ddm);
+	cColorBits = d3ddm.bpp;
+	cAlphaBits = 0;
+	switch (d3ddm.bpp) {
+	case 15:
+		cRedBits = 5; cGreenBits = 5; cBlueBits = 5;
+		break;
+	case 16:
+		cRedBits = 5; cGreenBits = 6; cBlueBits = 5;
+		break;
+	case 24:
+	case 32:
+		cRedBits = 8; cGreenBits = 8; cBlueBits = 8;
+		break;
+	default:
+		cRedBits = 5; cGreenBits = 5; cBlueBits = 5;
+	}
+
+	//
+	// Add single-buffer formats
+	//
+
+/*	// Single-buffer, no depth-stencil buffer
+	memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+	pPF->pfd.dwFlags &= ~PFD_DOUBLEBUFFER; // Remove doublebuffer flag
+	pPF->pfd.cColorBits		= cColorBits;
+	pPF->pfd.cRedBits		= cRedBits;
+	pPF->pfd.cGreenBits		= cGreenBits;
+	pPF->pfd.cBlueBits		= cBlueBits;
+	pPF->pfd.cAlphaBits		= cAlphaBits;
+	pPF->pfd.cDepthBits		= 0;
+	pPF->pfd.cStencilBits	= 0;
+	pPF->dwDriverData		= D3DX_SF_UNKNOWN;
+	pPF++;*/
+
+	for (i=0; i<nSupportedFormats; i++, pPF++) {
+		memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		pPF->pfd.dwFlags &= ~PFD_DOUBLEBUFFER; // Remove doublebuffer flag
+		pPF->pfd.cColorBits		= cColorBits;
+		pPF->pfd.cRedBits		= cRedBits;
+		pPF->pfd.cGreenBits		= cGreenBits;
+		pPF->pfd.cBlueBits		= cBlueBits;
+		pPF->pfd.cAlphaBits		= cAlphaBits;
+		_BitsFromDepthStencilFormat(fmt[i], &pPF->pfd.cDepthBits, &pPF->pfd.cStencilBits);
+		pPF->dwDriverData		= fmt[i];
+	}
+
+	//
+	// Add double-buffer formats
+	//
+
+/*	memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+	pPF->pfd.cColorBits		= cColorBits;
+	pPF->pfd.cRedBits		= cRedBits;
+	pPF->pfd.cGreenBits		= cGreenBits;
+	pPF->pfd.cBlueBits		= cBlueBits;
+	pPF->pfd.cAlphaBits		= cAlphaBits;
+	pPF->pfd.cDepthBits		= 0;
+	pPF->pfd.cStencilBits	= 0;
+	pPF->dwDriverData		= D3DX_SF_UNKNOWN;
+	pPF++;*/
+
+	for (i=0; i<nSupportedFormats; i++, pPF++) {
+		memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		pPF->pfd.cColorBits		= cColorBits;
+		pPF->pfd.cRedBits		= cRedBits;
+		pPF->pfd.cGreenBits		= cGreenBits;
+		pPF->pfd.cBlueBits		= cBlueBits;
+		pPF->pfd.cAlphaBits		= cAlphaBits;
+		_BitsFromDepthStencilFormat(fmt[i], &pPF->pfd.cDepthBits, &pPF->pfd.cStencilBits);
+		pPF->dwDriverData		= fmt[i];
+	}
+
+	// Popup warning message if non RGB color mode
+	{
+		// This is a hack. KeithH
+		HDC hdcDesktop = GetDC(NULL);
+		DWORD dwDisplayBitDepth = GetDeviceCaps(hdcDesktop, BITSPIXEL);
+		ReleaseDC(0, hdcDesktop);
+		if (dwDisplayBitDepth <= 8) {
+			ddlogPrintf(DDLOG_WARN, "Current Color Depth %d bpp is not supported", dwDisplayBitDepth);
+			MessageBox(NULL, szColorDepthWarning, "GLDirect", MB_OK | MB_ICONWARNING);
+		}
+	}
+
+	// Mark list as 'current'
+	glb.bPixelformatsDirty = FALSE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldInitialiseMesa_DX(
+	DGL_ctx *lpCtx)
+{
+	GLD_driver_dx7	*gld = NULL;
+	int				MaxTextureSize, TextureLevels;
+	BOOL			bSoftwareTnL;
+
+	if (lpCtx == NULL)
+		return FALSE;
+
+	gld = lpCtx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (glb.bMultitexture) {
+		lpCtx->glCtx->Const.MaxTextureUnits = gld->d3dCaps.wMaxSimultaneousTextures;
+		// Only support MAX_TEXTURE_UNITS texture units.
+		// ** If this is altered then the FVF formats must be reviewed **.
+		if (lpCtx->glCtx->Const.MaxTextureUnits > GLD_MAX_TEXTURE_UNITS_DX7)
+			lpCtx->glCtx->Const.MaxTextureUnits = GLD_MAX_TEXTURE_UNITS_DX7;
+	} else {
+		// Multitexture override
+		lpCtx->glCtx->Const.MaxTextureUnits = 1;
+	}
+
+	lpCtx->glCtx->Const.MaxDrawBuffers = 1;
+
+	// max texture size
+//	MaxTextureSize = min(gld->d3dCaps8.MaxTextureHeight, gld->d3dCaps8.MaxTextureWidth);
+	MaxTextureSize = min(gld->d3dCaps.dwMaxTextureHeight, gld->d3dCaps.dwMaxTextureWidth);
+	if (MaxTextureSize == 0)
+		MaxTextureSize = 256; // Sanity check
+
+	//
+	// HACK!!
+	if (MaxTextureSize > 1024)
+		MaxTextureSize = 1024; // HACK - CLAMP TO 1024
+	// HACK!!
+	//
+
+	// TODO: Check this again for Mesa 5
+	// Got to set MAX_TEXTURE_SIZE as max levels.
+	// Who thought this stupid idea up? ;)
+	TextureLevels = 0;
+	// Calculate power-of-two.
+	while (MaxTextureSize) {
+		TextureLevels++;
+		MaxTextureSize >>= 1;
+	}
+	lpCtx->glCtx->Const.MaxTextureLevels = (TextureLevels) ? TextureLevels : 8;
+
+	// Defaults
+	IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_LIGHTING, FALSE);
+	IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_CULLMODE, D3DCULL_NONE);
+	IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_DITHERENABLE, TRUE);
+	IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_SHADEMODE, D3DSHADE_GOURAUD);
+
+	// Set texture coord set to be used with each stage
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 0, D3DTSS_TEXCOORDINDEX, 0);
+	IDirect3DDevice7_SetTextureStageState(gld->pDev, 1, D3DTSS_TEXCOORDINDEX, 1);
+
+	// Set up Depth buffer
+	IDirect3DDevice7_SetRenderState(gld->pDev, D3DRENDERSTATE_ZENABLE,
+		(lpCtx->lpPF->dwDriverData!=D3DX_SF_UNKNOWN) ? D3DZB_TRUE : D3DZB_FALSE);
+
+	// Set the view matrix
+	{
+		D3DXMATRIX	vm;
+#if 1
+		D3DXMatrixIdentity(&vm);
+#else
+		D3DXVECTOR3 Eye(0.0f, 0.0f, 0.0f);
+		D3DXVECTOR3 At(0.0f, 0.0f, -1.0f);
+		D3DXVECTOR3 Up(0.0f, 1.0f, 0.0f);
+		D3DXMatrixLookAtRH(&vm, &Eye, &At, &Up);
+		vm._31 = -vm._31;
+		vm._32 = -vm._32;
+		vm._33 = -vm._33;
+		vm._34 = -vm._34;
+#endif
+		IDirect3DDevice7_SetTransform(gld->pDev, D3DTRANSFORMSTATE_VIEW, &vm);
+	}
+
+// DX7 does not support D3DRS_SOFTWAREVERTEXPROCESSING
+/*
+	if (gld->bHasHWTnL) {
+		if (glb.dwTnL == GLDS_TNL_DEFAULT)
+			bSoftwareTnL = FALSE; // HW TnL
+		else {
+			bSoftwareTnL = ((glb.dwTnL == GLDS_TNL_MESA) || (glb.dwTnL == GLDS_TNL_D3DSW)) ? TRUE : FALSE;
+		}
+	} else {
+		// No HW TnL, so no choice possible
+		bSoftwareTnL = TRUE;
+	}
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, bSoftwareTnL);
+*/
+
+// Dump this in a Release build as well, now.
+//#ifdef _DEBUG
+	ddlogPrintf(DDLOG_INFO, "HW TnL: %s",
+//		gld->bHasHWTnL ? (bSoftwareTnL ? "Disabled" : "Enabled") : "Unavailable");
+		gld->bHasHWTnL ? "Enabled" : "Unavailable");
+//#endif
+
+	// Set up interfaces to Mesa
+	gldEnableExtensions_DX7(lpCtx->glCtx);
+	gldInstallPipeline_DX7(lpCtx->glCtx);
+	gldSetupDriverPointers_DX7(lpCtx->glCtx);
+
+	// Signal a complete state update
+	lpCtx->glCtx->Driver.UpdateState(lpCtx->glCtx, _NEW_ALL);
+
+	// Start a scene
+	IDirect3DDevice7_BeginScene(gld->pDev);
+	lpCtx->bSceneStarted = TRUE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldSwapBuffers_DX(
+	DGL_ctx *ctx,
+	HDC hDC,
+	HWND hWnd)
+{
+	HRESULT			hr;
+	GLD_driver_dx7	*gld = NULL;
+	DWORD			dwFlags;
+
+	if (ctx == NULL)
+		return FALSE;
+
+	gld = ctx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+
+	// End the scene if one is started
+	if (ctx->bSceneStarted) {
+		IDirect3DDevice7_EndScene(gld->pDev);
+		ctx->bSceneStarted = FALSE;
+	}
+
+	// Needed by D3DX for MDI multi-window apps (DaveM)
+	if (lpDDClipper)
+		IDirectDrawClipper_SetHWnd(lpDDClipper, 0, hWnd);
+
+	// Swap the buffers. hWnd may override the hWnd used for CreateDevice()
+//	hr = IDirect3DDevice8_Present(gld->pDev, NULL, NULL, hWnd, NULL);
+
+	// Set refresh sync flag
+	dwFlags = glb.bWaitForRetrace ? 0 : D3DX_UPDATE_NOVSYNC;
+	// Render and show frame
+	hr = gld->pD3DXContext->lpVtbl->UpdateFrame(gld->pD3DXContext, dwFlags);
+	if (FAILED(hr)) 
+		ddlogError(DDLOG_WARN, "gldSwapBuffers_DX: UpdateFrame", hr);
+
+	if (hr == DDERR_SURFACELOST) {
+	hr = gld->pD3DXContext->lpVtbl->RestoreSurfaces(gld->pD3DXContext);
+	if (FAILED(hr)) 
+		ddlogError(DDLOG_WARN, "gldSwapBuffers_DX: RestoreSurfaces", hr);
+	}
+
+exit_swap:
+	// Begin a new scene
+	IDirect3DDevice7_BeginScene(gld->pDev);
+	ctx->bSceneStarted = TRUE;
+
+	return (FAILED(hr)) ? FALSE : TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldGetDisplayMode_DX(
+	DGL_ctx *ctx,
+	GLD_displayMode *glddm)
+{
+//	D3DDISPLAYMODE		d3ddm;
+	D3DX_VIDMODEDESC	d3ddm;
+	HRESULT				hr;
+	GLD_driver_dx7		*lpCtx = NULL;
+	BYTE cColorBits, cRedBits, cGreenBits, cBlueBits, cAlphaBits;
+
+	if ((glddm == NULL) || (ctx == NULL))
+		return FALSE;
+
+	lpCtx = ctx->glPriv;
+	if (lpCtx == NULL)
+		return FALSE;
+
+	if (lpCtx->pD3D == NULL)
+		return FALSE;
+
+//	hr = IDirect3D8_GetAdapterDisplayMode(lpCtx->pD3D, glb.dwAdapter, &d3ddm);
+	hr = D3DXGetCurrentVideoMode(D3DX_DEFAULT, &d3ddm);
+	if (FAILED(hr))
+		return FALSE;
+
+	// Get info from the display format
+//	_BitsFromDisplayFormat(d3ddm.Format,
+//		&cColorBits, &cRedBits, &cGreenBits, &cBlueBits, &cAlphaBits);
+
+	glddm->Width	= d3ddm.width;
+	glddm->Height	= d3ddm.height;
+	glddm->BPP		= d3ddm.bpp;
+	glddm->Refresh	= d3ddm.refreshRate;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c
new file mode 100644
index 0000000000..7eeb9db2d1
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c
@@ -0,0 +1,1176 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Driver interface code to Mesa
+*
+****************************************************************************/
+
+//#include <windows.h>
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx8.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+extern BOOL dglSwapBuffers(HDC hDC);
+
+// HACK: Hack the _33 member of the OpenGL perspective projection matrix
+const float _fPersp_33 = 1.6f;
+
+//---------------------------------------------------------------------------
+// Internal functions
+//---------------------------------------------------------------------------
+
+void _gld_mesa_warning(
+	__GLcontext *gc,
+	char *str)
+{
+	// Intercept Mesa's internal warning mechanism
+	gldLogPrintf(GLDLOG_WARN, "Mesa warning: %s", str);
+}
+
+//---------------------------------------------------------------------------
+
+void _gld_mesa_fatal(
+	__GLcontext *gc,
+	char *str)
+{
+	// Intercept Mesa's internal fatal-message mechanism
+	gldLogPrintf(GLDLOG_CRITICAL, "Mesa FATAL: %s", str);
+
+	// Mesa calls abort(0) here.
+	ddlogClose();
+	exit(0);
+}
+
+//---------------------------------------------------------------------------
+
+D3DSTENCILOP _gldConvertStencilOp(
+	GLenum StencilOp)
+{
+	// Used by Stencil: pass, fail and zfail
+
+	switch (StencilOp) {
+	case GL_KEEP:
+		return D3DSTENCILOP_KEEP;
+	case GL_ZERO:
+		return D3DSTENCILOP_ZERO;
+	case GL_REPLACE:
+	    return D3DSTENCILOP_REPLACE;
+	case GL_INCR:
+		return D3DSTENCILOP_INCRSAT;
+	case GL_DECR:
+	    return D3DSTENCILOP_DECRSAT;
+	case GL_INVERT:
+		return D3DSTENCILOP_INVERT;
+	case GL_INCR_WRAP_EXT:	// GL_EXT_stencil_wrap
+		return D3DSTENCILOP_INCR;
+	case GL_DECR_WRAP_EXT:	// GL_EXT_stencil_wrap
+	    return D3DSTENCILOP_DECR;
+	}
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertStencilOp: Unknown StencilOp\n");
+#endif
+
+	return D3DSTENCILOP_KEEP;
+}
+
+//---------------------------------------------------------------------------
+
+D3DCMPFUNC _gldConvertCompareFunc(
+	GLenum CmpFunc)
+{
+	// Used for Alpha func, depth func and stencil func.
+
+	switch (CmpFunc) {
+	case GL_NEVER:
+		return D3DCMP_NEVER;
+	case GL_LESS:
+		return D3DCMP_LESS;
+	case GL_EQUAL:
+		return D3DCMP_EQUAL;
+	case GL_LEQUAL:
+		return D3DCMP_LESSEQUAL;
+	case GL_GREATER:
+		return D3DCMP_GREATER;
+	case GL_NOTEQUAL:
+		return D3DCMP_NOTEQUAL;
+	case GL_GEQUAL:
+		return D3DCMP_GREATEREQUAL;
+	case GL_ALWAYS:
+		return D3DCMP_ALWAYS;
+	};
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertCompareFunc: Unknown CompareFunc\n");
+#endif
+
+	return D3DCMP_ALWAYS;
+}
+
+//---------------------------------------------------------------------------
+
+D3DBLEND _gldConvertBlendFunc(
+	GLenum blend,
+	GLenum DefaultBlend)
+{
+	switch (blend) {
+	case GL_ZERO:
+		return D3DBLEND_ZERO;
+	case GL_ONE:
+		return D3DBLEND_ONE;
+	case GL_DST_COLOR:
+		return D3DBLEND_DESTCOLOR;
+	case GL_SRC_COLOR:
+		return D3DBLEND_SRCCOLOR;
+	case GL_ONE_MINUS_DST_COLOR:
+		return D3DBLEND_INVDESTCOLOR;
+	case GL_ONE_MINUS_SRC_COLOR:
+		return D3DBLEND_INVSRCCOLOR;
+	case GL_SRC_ALPHA:
+		return D3DBLEND_SRCALPHA;
+	case GL_ONE_MINUS_SRC_ALPHA:
+		return D3DBLEND_INVSRCALPHA;
+	case GL_DST_ALPHA:
+		return D3DBLEND_DESTALPHA;
+	case GL_ONE_MINUS_DST_ALPHA:
+		return D3DBLEND_INVDESTALPHA;
+	case GL_SRC_ALPHA_SATURATE:
+		return D3DBLEND_SRCALPHASAT;
+	}
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertBlendFunc: Unknown BlendFunc\n");
+#endif
+
+	return DefaultBlend;
+}
+
+//---------------------------------------------------------------------------
+// Misc. functions
+//---------------------------------------------------------------------------
+
+void gld_Noop_DX8(
+	GLcontext *ctx)
+{
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "gld_Noop called!\n");
+#endif
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Error_DX8(
+	GLcontext *ctx)
+{
+#ifdef _DEBUG
+	// Quite useless.
+//	gldLogMessage(GLDLOG_ERROR, "ctx->Driver.Error called!\n");
+#endif
+}
+
+//---------------------------------------------------------------------------
+// Required Mesa functions
+//---------------------------------------------------------------------------
+
+static GLboolean gld_set_draw_buffer_DX8(
+	GLcontext *ctx,
+	GLenum mode)
+{
+   (void) ctx;
+   if ((mode==GL_FRONT_LEFT) || (mode == GL_BACK_LEFT)) {
+      return GL_TRUE;
+   }
+   else {
+      return GL_FALSE;
+   }
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_set_read_buffer_DX8(
+	GLcontext *ctx,
+	GLframebuffer *buffer,
+	GLenum mode)
+{
+   /* separate read buffer not supported */
+/*
+   ASSERT(buffer == ctx->DrawBuffer);
+   ASSERT(mode == GL_FRONT_LEFT);
+*/
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Clear_DX8(
+	GLcontext *ctx,
+	GLbitfield mask,
+	GLboolean all,
+	GLint x,
+	GLint y,
+	GLint width,
+	GLint height)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	DWORD		dwFlags = 0;
+	D3DCOLOR	Color = 0;
+	float		Z = 0.0f;
+	DWORD		Stencil = 0;
+	D3DRECT		d3dClearRect;
+
+	// TODO: Colourmask
+	const GLuint *colorMask = (GLuint *) &ctx->Color.ColorMask[0];
+
+	if (!gld->pDev)
+		return;
+
+	if (mask & (DD_FRONT_LEFT_BIT | DD_BACK_LEFT_BIT)) {
+		GLubyte col[4];
+		CLAMPED_FLOAT_TO_UBYTE(col[0], ctx->Color.ClearColor[0]);
+		CLAMPED_FLOAT_TO_UBYTE(col[1], ctx->Color.ClearColor[1]);
+		CLAMPED_FLOAT_TO_UBYTE(col[2], ctx->Color.ClearColor[2]);
+		CLAMPED_FLOAT_TO_UBYTE(col[3], ctx->Color.ClearColor[3]);
+		dwFlags |= D3DCLEAR_TARGET;
+		Color = D3DCOLOR_RGBA(col[0], col[1], col[2], col[3]);
+//								ctx->Color.ClearColor[1], 
+//								ctx->Color.ClearColor[2], 
+//								ctx->Color.ClearColor[3]);
+	}
+
+	if (mask & DD_DEPTH_BIT) {
+		// D3D8 will fail the Clear call if we try and clear a
+		// depth buffer and we haven't created one.
+		// Also, some apps try and clear a depth buffer,
+		// when a depth buffer hasn't been requested by the app.
+		if (ctx->Visual.depthBits == 0) {
+			mask &= ~DD_DEPTH_BIT; // Remove depth bit from mask
+		} else {
+			dwFlags |= D3DCLEAR_ZBUFFER;
+			Z = ctx->Depth.Clear;
+		}
+	}
+
+	if (mask & DD_STENCIL_BIT) {
+		if (ctx->Visual.stencilBits == 0) {
+			// No stencil bits in depth buffer
+			mask &= ~DD_STENCIL_BIT; // Remove stencil bit from mask
+		} else {
+			dwFlags |= D3DCLEAR_STENCIL;
+			Stencil = ctx->Stencil.Clear;
+		}
+	}
+
+	// Some apps do really weird things with the rect, such as Quake3.
+	if ((x < 0) || (y < 0) || (width <= 0) || (height <= 0)) {
+		all = GL_TRUE;
+	}
+
+	if (!all) {
+		// Calculate clear subrect
+		d3dClearRect.x1	= x;
+		d3dClearRect.y1	= gldCtx->dwHeight - (y + height);
+		d3dClearRect.x2	= x + width;
+		d3dClearRect.y2	= d3dClearRect.y1 + height;
+	}
+
+	// dwFlags will be zero if there's nothing to clear
+	if (dwFlags) {
+		_GLD_DX8_DEV(Clear(
+			gld->pDev,
+			all ? 0 : 1,
+			all ? NULL : &d3dClearRect,
+			dwFlags,
+			Color, Z, Stencil));
+	}
+
+	if (mask & DD_ACCUM_BIT) {
+		// Clear accumulation buffer
+	}
+}
+
+//---------------------------------------------------------------------------
+
+// Mesa 5: Parameter change
+static void gld_buffer_size_DX8(
+//	GLcontext *ctx,
+	GLframebuffer *fb,
+	GLuint *width,
+	GLuint *height)
+{
+//	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+
+	*width = fb->Width; // gldCtx->dwWidth;
+	*height = fb->Height; // gldCtx->dwHeight;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_Finish_DX8(
+	GLcontext *ctx)
+{
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_Flush_DX8(
+	GLcontext *ctx)
+{
+	GLD_context		*gld	= GLD_GET_CONTEXT(ctx);
+
+	// TODO: Detect apps that glFlush() then SwapBuffers() ?
+
+	if (gld->EmulateSingle) {
+		// Emulating a single-buffered context.
+		// [Direct3D doesn't allow rendering to front buffer]
+		dglSwapBuffers(gld->hDC);
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_STENCIL(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	// Two-sided stencil. New for Mesa 5
+	const GLuint		uiFace	= 0UL;
+
+	struct gl_stencil_attrib *pStencil = &ctx->Stencil;
+
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILENABLE, pStencil->Enabled ? TRUE : FALSE));
+	if (pStencil->Enabled) {
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILFUNC, _gldConvertCompareFunc(pStencil->Function[uiFace])));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILREF, pStencil->Ref[uiFace]));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILMASK, pStencil->ValueMask[uiFace]));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILWRITEMASK, pStencil->WriteMask[uiFace]));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILFAIL, _gldConvertStencilOp(pStencil->FailFunc[uiFace])));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILZFAIL, _gldConvertStencilOp(pStencil->ZFailFunc[uiFace])));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_STENCILPASS, _gldConvertStencilOp(pStencil->ZPassFunc[uiFace])));
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_COLOR(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	DWORD		dwFlags = 0;
+	D3DBLEND	src;
+	D3DBLEND	dest;
+
+	// Alpha func
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ALPHAFUNC, _gldConvertCompareFunc(ctx->Color.AlphaFunc)));
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ALPHAREF, (DWORD)ctx->Color.AlphaRef));
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ALPHATESTENABLE, ctx->Color.AlphaEnabled));
+
+	// Blend func
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ALPHABLENDENABLE, ctx->Color.BlendEnabled));
+	src		= _gldConvertBlendFunc(ctx->Color.BlendSrcRGB, GL_ONE);
+	dest	= _gldConvertBlendFunc(ctx->Color.BlendDstRGB, GL_ZERO);
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_SRCBLEND, src));
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_DESTBLEND, dest));
+
+	// Color mask
+	if (ctx->Color.ColorMask[0][0]) dwFlags |= D3DCOLORWRITEENABLE_RED;
+	if (ctx->Color.ColorMask[0][1]) dwFlags |= D3DCOLORWRITEENABLE_GREEN;
+	if (ctx->Color.ColorMask[0][2]) dwFlags |= D3DCOLORWRITEENABLE_BLUE;
+	if (ctx->Color.ColorMask[0][3]) dwFlags |= D3DCOLORWRITEENABLE_ALPHA;
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_COLORWRITEENABLE, dwFlags));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_DEPTH(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ZENABLE, ctx->Depth.Test ? D3DZB_TRUE : D3DZB_FALSE));
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ZFUNC, _gldConvertCompareFunc(ctx->Depth.Func)));
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ZWRITEENABLE, ctx->Depth.Mask ? TRUE : FALSE));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_POLYGON(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	D3DFILLMODE	d3dFillMode = D3DFILL_SOLID;
+	D3DCULL		d3dCullMode = D3DCULL_NONE;
+	int			iOffset = 0;
+
+	// Fillmode
+	switch (ctx->Polygon.FrontMode) {
+	case GL_POINT:
+		d3dFillMode = D3DFILL_POINT;
+		break;
+	case GL_LINE:
+		d3dFillMode = D3DFILL_WIREFRAME;
+		break;
+	case GL_FILL:
+		d3dFillMode = D3DFILL_SOLID;
+		break;
+	}
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FILLMODE, d3dFillMode));
+
+	if (ctx->Polygon.CullFlag) {
+		switch (ctx->Polygon.CullFaceMode) {
+		case GL_BACK:
+			if (ctx->Polygon.FrontFace == GL_CCW)
+				d3dCullMode = D3DCULL_CW;
+			else
+				d3dCullMode = D3DCULL_CCW;
+			break;
+		case GL_FRONT:
+			if (ctx->Polygon.FrontFace == GL_CCW)
+				d3dCullMode = D3DCULL_CCW;
+			else
+				d3dCullMode = D3DCULL_CW;
+			break;
+		case GL_FRONT_AND_BACK:
+			d3dCullMode = D3DCULL_NONE;
+			break;
+		default:
+			break;
+		}
+	} else {
+		d3dCullMode = D3DCULL_NONE;
+	}
+//	d3dCullMode = D3DCULL_NONE; // TODO: DEBUGGING
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_CULLMODE, d3dCullMode));
+
+	// Polygon offset
+	// ZBIAS ranges from 0 to 16 and can only move towards the viewer
+	// Mesa5: ctx->Polygon._OffsetAny removed
+	if (ctx->Polygon.OffsetFill) {
+		iOffset = (int)ctx->Polygon.OffsetUnits;
+		if (iOffset < 0)
+			iOffset = -iOffset;
+		else
+			iOffset = 0; // D3D can't push away
+	}
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_ZBIAS, iOffset));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_FOG(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	D3DCOLOR	d3dFogColour;
+	D3DFOGMODE	d3dFogMode = D3DFOG_LINEAR;
+
+	// TODO: Fog is calculated seperately in the Mesa pipeline
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGENABLE, FALSE));
+	return;
+
+	// Fog enable
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGENABLE, ctx->Fog.Enabled));
+	if (!ctx->Fog.Enabled) {
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGTABLEMODE, D3DFOG_NONE));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGVERTEXMODE, D3DFOG_NONE));
+		return; // If disabled, don't bother setting any fog state
+	}
+
+	// Fog colour
+	d3dFogColour = D3DCOLOR_COLORVALUE(	ctx->Fog.Color[0],
+								ctx->Fog.Color[1],
+								ctx->Fog.Color[2],
+								ctx->Fog.Color[3]);
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGCOLOR, d3dFogColour));
+
+	// Fog density
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGDENSITY, *((DWORD*) (&ctx->Fog.Density))));
+
+	// Fog start
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGSTART, *((DWORD*) (&ctx->Fog.Start))));
+
+	// Fog end
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGEND, *((DWORD*) (&ctx->Fog.End))));
+
+	// Fog mode
+	switch (ctx->Fog.Mode) {
+	case GL_LINEAR:
+		d3dFogMode = D3DFOG_LINEAR;
+		break;
+	case GL_EXP:
+		d3dFogMode = D3DFOG_EXP;
+		break;
+	case GL_EXP2:
+		d3dFogMode = D3DFOG_EXP2;
+		break;
+	}
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGTABLEMODE, d3dFogMode));
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_FOGVERTEXMODE, D3DFOG_NONE));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_LIGHT(
+	GLcontext *ctx)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	DWORD			dwSpecularEnable;
+
+	// Shademode
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_SHADEMODE, (ctx->Light.ShadeModel == GL_SMOOTH) ? D3DSHADE_GOURAUD : D3DSHADE_FLAT));
+
+	// Separate specular colour
+	if (ctx->Light.Enabled)
+		dwSpecularEnable = (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) ? TRUE: FALSE;
+	else
+		dwSpecularEnable = FALSE;
+	_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_SPECULARENABLE, dwSpecularEnable));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_MODELVIEW(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	D3DMATRIX	m;
+	//GLfloat		*pM = ctx->ModelView.m;
+	// Mesa5: Model-view is now a stack
+	GLfloat		*pM = ctx->ModelviewMatrixStack.Top->m;
+	m._11 = pM[0];
+	m._12 = pM[1];
+	m._13 = pM[2];
+	m._14 = pM[3];
+	m._21 = pM[4];
+	m._22 = pM[5];
+	m._23 = pM[6];
+	m._24 = pM[7];
+	m._31 = pM[8];
+	m._32 = pM[9];
+	m._33 = pM[10];
+	m._34 = pM[11];
+	m._41 = pM[12];
+	m._42 = pM[13];
+	m._43 = pM[14];
+	m._44 = pM[15];
+
+	gld->matModelView = m;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_PROJECTION(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	D3DMATRIX	m;
+	//GLfloat		*pM = ctx->ProjectionMatrix.m;
+	// Mesa 5: Now a stack
+	GLfloat		*pM = ctx->ProjectionMatrixStack.Top->m;
+	m._11 = pM[0];
+	m._12 = pM[1];
+	m._13 = pM[2];
+	m._14 = pM[3];
+
+	m._21 = pM[4];
+	m._22 = pM[5];
+	m._23 = pM[6];
+	m._24 = pM[7];
+
+	m._31 = pM[8];
+	m._32 = pM[9];
+	m._33 = pM[10] / _fPersp_33; // / 1.6f;
+	m._34 = pM[11];
+
+	m._41 = pM[12];
+	m._42 = pM[13];
+	m._43 = pM[14] / 2.0f;
+	m._44 = pM[15];
+
+	gld->matProjection = m;
+}
+
+//---------------------------------------------------------------------------
+/*
+void gldFrustumHook_DX8(
+	GLdouble left,
+	GLdouble right,
+	GLdouble bottom,
+	GLdouble top,
+	GLdouble nearval,
+	GLdouble farval)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	// Pass values on to Mesa first (in case we mess with them)
+	_mesa_Frustum(left, right, bottom, top, nearval, farval);
+
+	_fPersp_33 = farval / (nearval - farval);
+
+//	ddlogPrintf(GLDLOG_SYSTEM, "Frustum: %f", farval/nearval);
+}
+
+//---------------------------------------------------------------------------
+
+void gldOrthoHook_DX8(
+	GLdouble left,
+	GLdouble right,
+	GLdouble bottom,
+	GLdouble top,
+	GLdouble nearval,
+	GLdouble farval)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	// Pass values on to Mesa first (in case we mess with them)
+	_mesa_Ortho(left, right, bottom, top, nearval, farval);
+
+	_fPersp_33 = 1.6f;
+
+//	ddlogPrintf(GLDLOG_SYSTEM, "Ortho: %f", farval/nearval);
+}
+*/
+//---------------------------------------------------------------------------
+
+void gld_NEW_VIEWPORT(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	D3DVIEWPORT8	d3dvp;
+//	GLint			x, y;
+//	GLsizei			w, h;
+
+	// Set depth range
+	_GLD_DX8_DEV(GetViewport(gld->pDev, &d3dvp));
+	// D3D can't do Quake1/Quake2 z-trick
+	if (ctx->Viewport.Near <= ctx->Viewport.Far) {
+		d3dvp.MinZ		= ctx->Viewport.Near;
+		d3dvp.MaxZ		= ctx->Viewport.Far;
+	} else {
+		d3dvp.MinZ		= ctx->Viewport.Far;
+		d3dvp.MaxZ		= ctx->Viewport.Near;
+	}
+/*	x = ctx->Viewport.X;
+	y = ctx->Viewport.Y;
+	w = ctx->Viewport.Width;
+	h = ctx->Viewport.Height;
+	if (x < 0) x = 0;
+	if (y < 0) y = 0;
+	if (w > gldCtx->dwWidth) 		w = gldCtx->dwWidth;
+	if (h > gldCtx->dwHeight) 		h = gldCtx->dwHeight;
+	// Ditto for D3D viewport dimensions
+	if (w+x > gldCtx->dwWidth) 		w = gldCtx->dwWidth-x;
+	if (h+y > gldCtx->dwHeight) 	h = gldCtx->dwHeight-y;
+	d3dvp.X			= x;
+	d3dvp.Y			= gldCtx->dwHeight - (y + h);
+	d3dvp.Width		= w;
+	d3dvp.Height	= h;*/
+	_GLD_DX8_DEV(SetViewport(gld->pDev, &d3dvp));
+
+//	gld->fFlipWindowY = (float)gldCtx->dwHeight;
+}
+
+//---------------------------------------------------------------------------
+
+__inline BOOL _gldAnyEvalEnabled(
+	GLcontext *ctx)
+{
+	struct gl_eval_attrib *eval = &ctx->Eval;
+
+	if ((eval->AutoNormal) ||
+		(eval->Map1Color4) ||
+		(eval->Map1Index) ||
+		(eval->Map1Normal) ||
+		(eval->Map1TextureCoord1) ||
+		(eval->Map1TextureCoord2) ||
+		(eval->Map1TextureCoord3) ||
+		(eval->Map1TextureCoord4) ||
+		(eval->Map1Vertex3) ||
+		(eval->Map1Vertex4) ||
+		(eval->Map2Color4) ||
+		(eval->Map2Index) ||
+		(eval->Map2Normal) ||
+		(eval->Map2TextureCoord1) ||
+		(eval->Map2TextureCoord2) ||
+		(eval->Map2TextureCoord3) ||
+		(eval->Map2TextureCoord4) ||
+		(eval->Map2Vertex3) ||
+		(eval->Map2Vertex4)
+		)
+	return TRUE;
+
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL _gldChooseInternalPipeline(
+	GLcontext *ctx,
+	GLD_driver_dx8 *gld)
+{
+//	return TRUE;	// DEBUGGING: ALWAYS USE MESA
+//	return FALSE;	// DEBUGGING: ALWAYS USE D3D
+
+	if ((glb.dwTnL == GLDS_TNL_MESA) || (gld->bHasHWTnL == FALSE))
+	{
+		gld->PipelineUsage.qwMesa.QuadPart++;
+		return TRUE; // Force Mesa TnL
+	}
+
+	if ((ctx->Light.Enabled) ||
+		(1) ||
+		(ctx->Texture._TexGenEnabled) ||
+		(ctx->Texture._TexMatEnabled) ||
+//		(ctx->Transform._AnyClip) ||
+		(ctx->Scissor.Enabled) ||
+		_gldAnyEvalEnabled(ctx) // Put this last so we can early-out
+		)
+	{
+		gld->PipelineUsage.qwMesa.QuadPart++;
+		return TRUE;
+	}
+
+	gld->PipelineUsage.qwD3DFVF.QuadPart++;
+	return FALSE;
+
+/*	// Force Mesa pipeline?
+	if (glb.dwTnL == GLDS_TNL_MESA) {
+		gld->PipelineUsage.dwMesa.QuadPart++;
+		return GLD_PIPELINE_MESA;
+	}
+
+	// Test for functionality not exposed in the D3D pathways
+	if ((ctx->Texture._GenFlags)) {
+		gld->PipelineUsage.dwMesa.QuadPart++;
+		return GLD_PIPELINE_MESA;
+	}
+
+	// Now decide if vertex shader can be used.
+	// If two sided lighting is enabled then we must either
+	// use Mesa TnL or the vertex shader
+	if (ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) {
+		if (gld->VStwosidelight.hShader && !ctx->Fog.Enabled) {
+			// Use Vertex Shader
+			gld->PipelineUsage.dwD3D2SVS.QuadPart++;
+			return GLD_PIPELINE_D3D_VS_TWOSIDE;
+		} else {
+			// Use Mesa TnL
+			gld->PipelineUsage.dwMesa.QuadPart++;
+			return GLD_PIPELINE_MESA;
+		}
+	}
+
+	// Must be D3D fixed-function pipeline
+	gld->PipelineUsage.dwD3DFVF.QuadPart++;
+	return GLD_PIPELINE_D3D_FVF;
+*/
+}
+
+//---------------------------------------------------------------------------
+
+void gld_update_state_DX8(
+	GLcontext *ctx,
+	GLuint new_state)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	TNLcontext		*tnl = TNL_CONTEXT(ctx);
+	GLD_pb_dx8		*gldPB;
+
+	if (!gld || !gld->pDev)
+		return;
+
+	_swsetup_InvalidateState( ctx, new_state );
+	_vbo_InvalidateState( ctx, new_state );
+	_tnl_InvalidateState( ctx, new_state );
+
+	// SetupIndex will be used in the pipelines for choosing setup function
+	if ((ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE | DD_SEPARATE_SPECULAR)) ||
+		(ctx->Fog.Enabled))
+	{
+		if (ctx->_TriangleCaps & DD_FLATSHADE)
+			gld->iSetupFunc = GLD_SI_FLAT_EXTRAS;
+		else
+			gld->iSetupFunc = GLD_SI_SMOOTH_EXTRAS;
+	} else {
+		if (ctx->_TriangleCaps & DD_FLATSHADE)
+			gld->iSetupFunc = GLD_SI_FLAT;	// Setup flat shade + texture
+		else
+			gld->iSetupFunc = GLD_SI_SMOOTH; // Setup smooth shade + texture
+	}
+
+	gld->bUseMesaTnL = _gldChooseInternalPipeline(ctx, gld);
+	if (gld->bUseMesaTnL) {
+		gldPB = &gld->PB2d;
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, TRUE));
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_CLIPPING, FALSE));
+		_GLD_DX8_DEV(SetVertexShader(gld->pDev, gldPB->dwFVF));
+	} else {
+		gldPB = &gld->PB3d;
+		_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_CLIPPING, TRUE));
+//		if (gld->TnLPipeline == GLD_PIPELINE_D3D_VS_TWOSIDE) {
+//			_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, !gld->VStwosidelight.bHardware));
+//			_GLD_DX8_DEV(SetVertexShader(gld->pDev, gld->VStwosidelight.hShader));
+//		} else {
+			_GLD_DX8_DEV(SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, !gld->bHasHWTnL));
+			_GLD_DX8_DEV(SetVertexShader(gld->pDev, gldPB->dwFVF));
+//		}
+	}
+
+#define _GLD_TEST_STATE(a)		\
+	if (new_state & (a)) {		\
+		gld##a(ctx);			\
+		new_state &= ~(a);		\
+	}
+
+#define _GLD_TEST_STATE_DX8(a)	\
+	if (new_state & (a)) {		\
+		gld##a##_DX8(ctx);		\
+		new_state &= ~(a);		\
+	}
+
+#define _GLD_IGNORE_STATE(a) new_state &= ~(a);
+
+//	if (!gld->bUseMesaTnL) {
+		// Not required if Mesa is doing the TnL.
+	// Problem: If gld->bUseMesaTnL is TRUE when these are signaled,
+	// then we'll miss updating the D3D TnL pipeline.
+	// Therefore, don't test for gld->bUseMesaTnL
+	_GLD_TEST_STATE(_NEW_MODELVIEW);
+	_GLD_TEST_STATE(_NEW_PROJECTION);
+//	}
+
+	_GLD_TEST_STATE_DX8(_NEW_TEXTURE); // extern, so guard with _DX8
+	_GLD_TEST_STATE(_NEW_COLOR);
+	_GLD_TEST_STATE(_NEW_DEPTH);
+	_GLD_TEST_STATE(_NEW_POLYGON);
+	_GLD_TEST_STATE(_NEW_STENCIL);
+	_GLD_TEST_STATE(_NEW_FOG);
+	_GLD_TEST_STATE(_NEW_LIGHT);
+	_GLD_TEST_STATE(_NEW_VIEWPORT);
+
+	_GLD_IGNORE_STATE(_NEW_TRANSFORM);
+
+
+// Stubs for future use.
+/*	_GLD_TEST_STATE(_NEW_TEXTURE_MATRIX);
+	_GLD_TEST_STATE(_NEW_COLOR_MATRIX);
+	_GLD_TEST_STATE(_NEW_ACCUM);
+	_GLD_TEST_STATE(_NEW_EVAL);
+	_GLD_TEST_STATE(_NEW_HINT);
+	_GLD_TEST_STATE(_NEW_LINE);
+	_GLD_TEST_STATE(_NEW_PIXEL);
+	_GLD_TEST_STATE(_NEW_POINT);
+	_GLD_TEST_STATE(_NEW_POLYGONSTIPPLE);
+	_GLD_TEST_STATE(_NEW_SCISSOR);
+	_GLD_TEST_STATE(_NEW_PACKUNPACK);
+	_GLD_TEST_STATE(_NEW_ARRAY);
+	_GLD_TEST_STATE(_NEW_RENDERMODE);
+	_GLD_TEST_STATE(_NEW_BUFFERS);
+	_GLD_TEST_STATE(_NEW_MULTISAMPLE);
+*/
+
+// For debugging.
+#if 0
+#define _GLD_TEST_UNHANDLED_STATE(a)									\
+	if (new_state & (a)) {									\
+		gldLogMessage(GLDLOG_ERROR, "Unhandled " #a "\n");	\
+	}
+	_GLD_TEST_UNHANDLED_STATE(_NEW_TEXTURE_MATRIX);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_COLOR_MATRIX);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_ACCUM);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_EVAL);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_HINT);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_LINE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_PIXEL);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_POINT);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_POLYGONSTIPPLE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_SCISSOR);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_PACKUNPACK);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_ARRAY);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_RENDERMODE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_BUFFERS);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_MULTISAMPLE);
+#undef _GLD_UNHANDLED_STATE
+#endif
+
+#undef _GLD_TEST_STATE
+}
+
+//---------------------------------------------------------------------------
+// Viewport
+//---------------------------------------------------------------------------
+
+void gld_Viewport_DX8(
+	GLcontext *ctx,
+	GLint x,
+	GLint y,
+	GLsizei w,
+	GLsizei h)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	D3DVIEWPORT8	d3dvp;
+
+	if (!gld || !gld->pDev)
+		return;
+
+	// This is a hack. When the app is minimized, Mesa passes
+	// w=1 and h=1 for viewport dimensions. Without this test
+	// we get a GPF in gld_wgl_resize_buffers().
+	if ((w==1) && (h==1))
+		return;
+
+	// Call ResizeBuffersMESA. This function will early-out
+	// if no resize is needed.
+	//ctx->Driver.ResizeBuffersMESA(ctx);
+	// Mesa 5: Changed parameters
+	ctx->Driver.ResizeBuffers(gldCtx->glBuffer);
+
+#if 0
+	ddlogPrintf(GLDLOG_SYSTEM, ">> Viewport x=%d y=%d w=%d h=%d", x,y,w,h);
+#endif
+
+	// ** D3D viewport must not be outside the render target surface **
+	// Sanity check the GL viewport dimensions
+	if (x < 0) x = 0;
+	if (y < 0) y = 0;
+	if (w > gldCtx->dwWidth) 		w = gldCtx->dwWidth;
+	if (h > gldCtx->dwHeight) 		h = gldCtx->dwHeight;
+	// Ditto for D3D viewport dimensions
+	if (w+x > gldCtx->dwWidth) 		w = gldCtx->dwWidth-x;
+	if (h+y > gldCtx->dwHeight) 	h = gldCtx->dwHeight-y;
+
+	d3dvp.X			= x;
+	d3dvp.Y			= gldCtx->dwHeight - (y + h);
+	d3dvp.Width		= w;
+	d3dvp.Height	= h;
+	if (ctx->Viewport.Near <= ctx->Viewport.Far) {
+		d3dvp.MinZ		= ctx->Viewport.Near;
+		d3dvp.MaxZ		= ctx->Viewport.Far;
+	} else {
+		d3dvp.MinZ		= ctx->Viewport.Far;
+		d3dvp.MaxZ		= ctx->Viewport.Near;
+	}
+
+	// TODO: DEBUGGING
+//	d3dvp.MinZ		= 0.0f;
+//	d3dvp.MaxZ		= 1.0f;
+
+	_GLD_DX8_DEV(SetViewport(gld->pDev, &d3dvp));
+
+}
+
+//---------------------------------------------------------------------------
+
+extern BOOL dglWglResizeBuffers(GLcontext *ctx, BOOL bDefaultDriver);
+
+// Mesa 5: Parameter change
+void gldResizeBuffers_DX8(
+//	GLcontext *ctx)
+	GLframebuffer *fb)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	dglWglResizeBuffers(ctx, TRUE);
+}
+
+//---------------------------------------------------------------------------
+#ifdef _DEBUG
+// This is only for debugging.
+// To use, plug into ctx->Driver.Enable pointer below.
+void gld_Enable(
+	GLcontext *ctx,
+	GLenum e,
+	GLboolean b)
+{
+	char buf[1024];
+	sprintf(buf, "Enable: %s (%s)\n", _mesa_lookup_enum_by_nr(e), b?"TRUE":"FALSE");
+	ddlogMessage(DDLOG_SYSTEM, buf);
+}
+#endif
+//---------------------------------------------------------------------------
+// Driver pointer setup
+//---------------------------------------------------------------------------
+
+extern const GLubyte* _gldGetStringGeneric(GLcontext*, GLenum);
+
+void gldSetupDriverPointers_DX8(
+	GLcontext *ctx)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+	// Mandatory functions
+	ctx->Driver.GetString				= _gldGetStringGeneric;
+	ctx->Driver.UpdateState				= gld_update_state_DX8;
+	ctx->Driver.Clear					= gld_Clear_DX8;
+	ctx->Driver.DrawBuffer				= gld_set_draw_buffer_DX8;
+	ctx->Driver.GetBufferSize			= gld_buffer_size_DX8;
+	ctx->Driver.Finish					= gld_Finish_DX8;
+	ctx->Driver.Flush					= gld_Flush_DX8;
+	ctx->Driver.Error					= gld_Error_DX8;
+
+	// Hardware accumulation buffer
+	ctx->Driver.Accum					= NULL; // TODO: gld_Accum;
+
+	// Bitmap functions
+	ctx->Driver.CopyPixels				= gld_CopyPixels_DX8;
+	ctx->Driver.DrawPixels				= gld_DrawPixels_DX8;
+	ctx->Driver.ReadPixels				= gld_ReadPixels_DX8;
+	ctx->Driver.Bitmap					= gld_Bitmap_DX8;
+
+	// Buffer resize
+	ctx->Driver.ResizeBuffers			= gldResizeBuffers_DX8;
+	
+	// Texture image functions
+	ctx->Driver.ChooseTextureFormat		= gld_ChooseTextureFormat_DX8;
+	ctx->Driver.TexImage1D				= gld_TexImage1D_DX8;
+	ctx->Driver.TexImage2D				= gld_TexImage2D_DX8;
+	ctx->Driver.TexImage3D				= _mesa_store_teximage3d;
+	ctx->Driver.TexSubImage1D			= gld_TexSubImage1D_DX8;
+	ctx->Driver.TexSubImage2D			= gld_TexSubImage2D_DX8;
+	ctx->Driver.TexSubImage3D			= _mesa_store_texsubimage3d;
+	
+	ctx->Driver.CopyTexImage1D			= gldCopyTexImage1D_DX8; //NULL;
+	ctx->Driver.CopyTexImage2D			= gldCopyTexImage2D_DX8; //NULL;
+	ctx->Driver.CopyTexSubImage1D		= gldCopyTexSubImage1D_DX8; //NULL;
+	ctx->Driver.CopyTexSubImage2D		= gldCopyTexSubImage2D_DX8; //NULL;
+	ctx->Driver.CopyTexSubImage3D		= gldCopyTexSubImage3D_DX8;
+	ctx->Driver.TestProxyTexImage		= _mesa_test_proxy_teximage;
+
+	// Texture object functions
+	ctx->Driver.BindTexture				= NULL;
+	ctx->Driver.NewTextureObject		= NULL; // Not yet implemented by Mesa!;
+	ctx->Driver.DeleteTexture			= gld_DeleteTexture_DX8;
+	ctx->Driver.PrioritizeTexture		= NULL;
+
+	// Imaging functionality
+	ctx->Driver.CopyColorTable			= NULL;
+	ctx->Driver.CopyColorSubTable		= NULL;
+	ctx->Driver.CopyConvolutionFilter1D = NULL;
+	ctx->Driver.CopyConvolutionFilter2D = NULL;
+
+	// State changing functions
+	ctx->Driver.AlphaFunc				= NULL; //gld_AlphaFunc;
+	ctx->Driver.BlendFuncSeparate		= NULL; //gld_BlendFunc;
+	ctx->Driver.ClearColor				= NULL; //gld_ClearColor;
+	ctx->Driver.ClearDepth				= NULL; //gld_ClearDepth;
+	ctx->Driver.ClearStencil			= NULL; //gld_ClearStencil;
+	ctx->Driver.ColorMask				= NULL; //gld_ColorMask;
+	ctx->Driver.CullFace				= NULL; //gld_CullFace;
+	ctx->Driver.ClipPlane				= NULL; //gld_ClipPlane;
+	ctx->Driver.FrontFace				= NULL; //gld_FrontFace;
+	ctx->Driver.DepthFunc				= NULL; //gld_DepthFunc;
+	ctx->Driver.DepthMask				= NULL; //gld_DepthMask;
+	ctx->Driver.DepthRange				= NULL;
+	ctx->Driver.Enable					= NULL; //gld_Enable;
+	ctx->Driver.Fogfv					= NULL; //gld_Fogfv;
+	ctx->Driver.Hint					= NULL; //gld_Hint;
+	ctx->Driver.Lightfv					= NULL; //gld_Lightfv;
+	ctx->Driver.LightModelfv			= NULL; //gld_LightModelfv;
+	ctx->Driver.LineStipple				= NULL; //gld_LineStipple;
+	ctx->Driver.LineWidth				= NULL; //gld_LineWidth;
+	ctx->Driver.LogicOpcode				= NULL; //gld_LogicOpcode;
+	ctx->Driver.PointParameterfv		= NULL; //gld_PointParameterfv;
+	ctx->Driver.PointSize				= NULL; //gld_PointSize;
+	ctx->Driver.PolygonMode				= NULL; //gld_PolygonMode;
+	ctx->Driver.PolygonOffset			= NULL; //gld_PolygonOffset;
+	ctx->Driver.PolygonStipple			= NULL; //gld_PolygonStipple;
+	ctx->Driver.RenderMode				= NULL; //gld_RenderMode;
+	ctx->Driver.Scissor					= NULL; //gld_Scissor;
+	ctx->Driver.ShadeModel				= NULL; //gld_ShadeModel;
+	ctx->Driver.StencilFunc				= NULL; //gld_StencilFunc;
+	ctx->Driver.StencilMask				= NULL; //gld_StencilMask;
+	ctx->Driver.StencilOp				= NULL; //gld_StencilOp;
+	ctx->Driver.TexGen					= NULL; //gld_TexGen;
+	ctx->Driver.TexEnv					= NULL;
+	ctx->Driver.TexParameter			= NULL;
+	ctx->Driver.TextureMatrix			= NULL; //gld_TextureMatrix;
+	ctx->Driver.Viewport				= gld_Viewport_DX8;
+
+	_swsetup_Wakeup(ctx);
+
+	tnl->Driver.RunPipeline				= _tnl_run_pipeline;
+	tnl->Driver.Render.ResetLineStipple	= gld_ResetLineStipple_DX8;
+	tnl->Driver.Render.ClippedPolygon	= _tnl_RenderClippedPolygon;
+	tnl->Driver.Render.ClippedLine		= _tnl_RenderClippedLine;
+
+	// Hook into glFrustum() and glOrtho()
+//	ctx->Exec->Frustum					= gldFrustumHook_DX8;
+//	ctx->Exec->Ortho					= gldOrthoHook_DX8;
+
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_dx8.h b/src/mesa/drivers/windows/gldirect/dx8/gld_dx8.h
new file mode 100644
index 0000000000..7efec7cae8
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_dx8.h
@@ -0,0 +1,324 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect Direct3D 8.0 header file
+*
+****************************************************************************/
+
+#ifndef _GLD_DX8_H
+#define _GLD_DX8_H
+
+//---------------------------------------------------------------------------
+// Windows includes
+//---------------------------------------------------------------------------
+
+//#ifndef STRICT
+//#define STRICT
+//#endif
+
+//#define WIN32_LEAN_AND_MEAN
+//#include <windows.h>
+#include <d3d8.h>
+#include <d3dx8.h>
+
+// MS screwed up with the DX8.1 SDK - there's no compile-time
+// method of compiling for 8.0 via the 8.1 SDK unless you
+// "make sure you don't use any 8.1 interfaces".
+// We CAN use 8.1 D3DX static functions, though - just not new 8.1 interfaces.
+//
+// D3D_SDK_VERSION is 120 for 8.0 (supported by Windows 95).
+// D3D_SDK_VERSION is 220 for 8.1 (NOT supported by Windows 95).
+//
+#define D3D_SDK_VERSION_DX8_SUPPORT_WIN95 120
+
+// Typedef for obtaining function from d3d8.dll
+typedef IDirect3D8* (WINAPI *FNDIRECT3DCREATE8) (UINT);
+
+
+//---------------------------------------------------------------------------
+// Defines
+//---------------------------------------------------------------------------
+
+#ifdef _DEBUG
+#define _GLD_TEST_HRESULT(h)					\
+{												\
+	HRESULT _hr = (h);							\
+	if (FAILED(_hr)) {							\
+		gldLogError(GLDLOG_ERROR, #h, _hr);		\
+	}											\
+}
+#define _GLD_DX8(func)		_GLD_TEST_HRESULT(IDirect3D8_##func##)
+#define _GLD_DX8_DEV(func)	_GLD_TEST_HRESULT(IDirect3DDevice8_##func##)
+#define _GLD_DX8_VB(func)	_GLD_TEST_HRESULT(IDirect3DVertexBuffer8_##func##)
+#define _GLD_DX8_TEX(func)	_GLD_TEST_HRESULT(IDirect3DTexture8_##func##)
+#else
+#define _GLD_DX8(func)		IDirect3D8_##func
+#define _GLD_DX8_DEV(func)	IDirect3DDevice8_##func
+#define _GLD_DX8_VB(func)	IDirect3DVertexBuffer8_##func
+#define _GLD_DX8_TEX(func)	IDirect3DTexture8_##func
+#endif
+
+#define SAFE_RELEASE(p)			\
+{								\
+	if (p) {					\
+		(p)->lpVtbl->Release(p);	\
+		(p) = NULL;				\
+	}							\
+}
+
+#define SAFE_RELEASE_VB8(p)						\
+{												\
+	if (p) {									\
+		IDirect3DVertexBuffer8_Release((p));	\
+		(p) = NULL;								\
+	}											\
+}
+
+#define SAFE_RELEASE_SURFACE8(p)		\
+{										\
+	if (p) {							\
+		IDirect3DSurface8_Release((p));	\
+		(p) = NULL;						\
+	}									\
+}
+
+// Setup index.
+enum {
+	GLD_SI_FLAT				= 0,
+	GLD_SI_SMOOTH			= 1,
+	GLD_SI_FLAT_EXTRAS		= 2,
+	GLD_SI_SMOOTH_EXTRAS	= 3,
+};
+/*
+// Internal pipeline
+typedef enum {
+	GLD_PIPELINE_MESA			= 0,	// Mesa pipeline
+	GLD_PIPELINE_D3D_FVF		= 1,	// Direct3D Fixed-function pipeline
+	GLD_PIPELINE_D3D_VS_TWOSIDE	= 2		// Direct3D two-sided-lighting vertex shader
+} GLD_tnl_pipeline;
+*/
+//---------------------------------------------------------------------------
+// Vertex definitions for Fixed-Function pipeline
+//---------------------------------------------------------------------------
+
+//
+// NOTE: If the number of texture units is altered then most of
+//       the texture code will need to be revised.
+//
+
+#define GLD_MAX_TEXTURE_UNITS_DX8	2
+
+//
+// 2D vertex transformed by Mesa
+//
+#define GLD_FVF_2D_VERTEX (	D3DFVF_XYZRHW |		\
+							D3DFVF_DIFFUSE |	\
+							D3DFVF_SPECULAR |	\
+							D3DFVF_TEX2)
+typedef struct {
+	FLOAT	x, y;		// 2D raster coords
+	FLOAT	sz;			// Screen Z (depth)
+	FLOAT	rhw;		// Reciprocal homogenous W
+	DWORD	diffuse;	// Diffuse colour
+	DWORD	specular;	// For separate-specular support
+	FLOAT	t0_u, t0_v;	// 1st set of texture coords
+	FLOAT	t1_u, t1_v;	// 2nd set of texture coords
+} GLD_2D_VERTEX;
+
+
+//
+// 3D vertex transformed by Direct3D
+//
+#define GLD_FVF_3D_VERTEX (	D3DFVF_XYZ |				\
+							D3DFVF_DIFFUSE |			\
+							D3DFVF_TEX2)
+
+typedef struct {
+	D3DXVECTOR3		Position;		// XYZ Vector in object space
+	D3DCOLOR		Diffuse;		// Diffuse colour
+	D3DXVECTOR2		TexUnit0;		// Texture unit 0
+	D3DXVECTOR2		TexUnit1;		// Texture unit 1
+} GLD_3D_VERTEX;
+
+//---------------------------------------------------------------------------
+// Vertex Shaders
+//---------------------------------------------------------------------------
+/*
+// DX8 Vertex Shader
+typedef struct {
+	DWORD	hShader;	// If NULL, shader is invalid and cannot be used
+	BOOL	bHardware;	// If TRUE then shader was created for hardware,
+						// otherwise shader was created for software.
+} GLD_vertexShader;
+*/
+//---------------------------------------------------------------------------
+// Structs
+//---------------------------------------------------------------------------
+
+// This keeps a count of how many times we choose each individual internal
+// pathway. Useful for seeing if a certain pathway was ever used by an app, and
+// how much each pathway is biased.
+// Zero the members at context creation and dump stats at context deletion.
+typedef struct {
+	// Note: DWORD is probably too small
+	ULARGE_INTEGER	qwMesa;		// Mesa TnL pipeline
+	ULARGE_INTEGER	qwD3DFVF;	// Direct3D Fixed-Function pipeline
+//	ULARGE_INTEGER	dwD3D2SVS;	// Direct3D Two-Sided Vertex Shader pipeline
+} GLD_pipeline_usage;
+
+// GLDirect Primitive Buffer (points, lines, triangles and quads)
+typedef struct {
+	// Data for IDirect3DDevice8::CreateVertexBuffer()
+	DWORD					dwStride;		// Stride of vertex
+	DWORD					dwUsage;		// Usage flags
+	DWORD					dwFVF;			// Direct3D Flexible Vertex Format
+	DWORD					dwPool;			// Pool flags
+
+	IDirect3DVertexBuffer8	*pVB;			// Holds points, lines, tris and quads.
+
+	// Point list is assumed to be at start of buffer
+	DWORD					iFirstLine;		// Index of start of line list
+	DWORD					iFirstTriangle;	// Index of start of triangle list
+
+	BYTE					*pPoints;		// Pointer to next free point
+	BYTE					*pLines;		// Pointer to next free line
+	BYTE					*pTriangles;	// Pointer to next free triangle
+
+	DWORD					nPoints;		// Number of points ready to render
+	DWORD					nLines;			// Number of lines ready to render
+	DWORD					nTriangles;		// Number of triangles ready to render
+} GLD_pb_dx8;
+
+// GLDirect DX8 driver data
+typedef struct {
+	// GLDirect vars
+	BOOL					bDoublebuffer;	// Doublebuffer (otherwise single-buffered)
+	BOOL					bDepthStencil;	// Depth buffer needed (stencil optional)
+	D3DFORMAT				RenderFormat;	// Format of back/front buffer
+	D3DFORMAT				DepthFormat;	// Format of depth/stencil
+//	float					fFlipWindowY;	// Value for flipping viewport Y coord
+
+	// Direct3D vars
+	D3DCAPS8				d3dCaps8;
+	BOOL					bHasHWTnL;		// Device has Hardware Transform/Light?
+	IDirect3D8				*pD3D;			// Base Direct3D8 interface
+	IDirect3DDevice8		*pDev;			// Direct3D8 Device interface
+	GLD_pb_dx8				PB2d;			// Vertices transformed by Mesa
+	GLD_pb_dx8				PB3d;			// Vertices transformed by Direct3D
+	D3DPRIMITIVETYPE		d3dpt;			// Current Direct3D primitive type
+	D3DXMATRIX				matProjection;	// Projection matrix for D3D TnL
+	D3DXMATRIX				matModelView;	// Model/View matrix for D3D TnL
+	int						iSetupFunc;		// Which setup functions to use
+	BOOL					bUseMesaTnL;	// Whether to use Mesa or D3D for TnL
+
+	// Direct3D vars for two-sided lighting
+//	GLD_vertexShader		VStwosidelight;	// Vertex Shader for two-sided lighting
+//	D3DXMATRIX				matWorldViewProj;// World/View/Projection matrix for shaders
+
+
+//	GLD_tnl_pipeline		TnLPipeline;	// Index of current internal pipeline
+	GLD_pipeline_usage		PipelineUsage;
+} GLD_driver_dx8;
+
+#define GLD_GET_DX8_DRIVER(c) (GLD_driver_dx8*)(c)->glPriv
+
+//---------------------------------------------------------------------------
+// Function prototypes
+//---------------------------------------------------------------------------
+
+PROC	gldGetProcAddress_DX8(LPCSTR a);
+void	gldEnableExtensions_DX8(GLcontext *ctx);
+void	gldInstallPipeline_DX8(GLcontext *ctx);
+void	gldSetupDriverPointers_DX8(GLcontext *ctx);
+//void	gldResizeBuffers_DX8(GLcontext *ctx);
+void	gldResizeBuffers_DX8(GLframebuffer *fb);
+
+
+// Texture functions
+
+void	gldCopyTexImage1D_DX8(GLcontext *ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border);
+void	gldCopyTexImage2D_DX8(GLcontext *ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+void	gldCopyTexSubImage1D_DX8(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width );
+void	gldCopyTexSubImage2D_DX8(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height );
+void	gldCopyTexSubImage3D_DX8(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height );
+
+void	gld_NEW_TEXTURE_DX8(GLcontext *ctx);
+void	gld_DrawPixels_DX8(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels);
+void	gld_ReadPixels_DX8(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, GLvoid *dest);
+void	gld_CopyPixels_DX8(GLcontext *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type);
+void	gld_Bitmap_DX8(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap);
+const struct gl_texture_format* gld_ChooseTextureFormat_DX8(GLcontext *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType);
+void	gld_TexImage2D_DX8(GLcontext *ctx, GLenum target, GLint level, GLint internalFormat, GLint width, GLint height, GLint border, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *tObj, struct gl_texture_image *texImage);
+void	gld_TexImage1D_DX8(GLcontext *ctx, GLenum target, GLint level, GLint internalFormat, GLint width, GLint border, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage );
+void	gld_TexSubImage2D_DX8( GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage );
+void	gld_TexSubImage1D_DX8(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage);
+void	gld_DeleteTexture_DX8(GLcontext *ctx, struct gl_texture_object *tObj);
+void	gld_ResetLineStipple_DX8(GLcontext *ctx);
+
+// 2D primitive functions
+
+void	gld_Points2D_DX8(GLcontext *ctx, GLuint first, GLuint last);
+
+void	gld_Line2DFlat_DX8(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Line2DSmooth_DX8(GLcontext *ctx, GLuint v0, GLuint v1);
+
+void	gld_Triangle2DFlat_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmooth_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DFlatExtras_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmoothExtras_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+
+void	gld_Quad2DFlat_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmooth_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DFlatExtras_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmoothExtras_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+// 3D primitive functions
+
+void	gld_Points3D_DX8(GLcontext *ctx, GLuint first, GLuint last);
+void	gld_Line3DFlat_DX8(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle3DFlat_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad3DFlat_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Line3DSmooth_DX8(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle3DSmooth_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad3DSmooth_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+// Primitive functions for Two-sided-lighting Vertex Shader
+
+void	gld_Points2DTwoside_DX8(GLcontext *ctx, GLuint first, GLuint last);
+void	gld_Line2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Line2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_dxerr8.h b/src/mesa/drivers/windows/gldirect/dx8/gld_dxerr8.h
new file mode 100644
index 0000000000..f8e92b936e
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_dxerr8.h
@@ -0,0 +1,77 @@
+/*==========================================================================;
+ *
+ *
+ *  File:   dxerr8.h
+ *  Content:    DirectX Error Library Include File
+ *
+ ****************************************************************************/
+
+#ifndef _GLD_DXERR8_H_
+#define _GLD_DXERR8_H_
+
+
+#include <d3d8.h>
+
+//
+//  DXGetErrorString8
+//  
+//  Desc:  Converts an DirectX HRESULT to a string 
+//
+//  Args:  HRESULT hr   Can be any error code from
+//                      DPLAY D3D8 D3DX8 DMUSIC DSOUND
+//
+//  Return: Converted string 
+//
+const char*  __stdcall DXGetErrorString8A(HRESULT hr);
+const WCHAR* __stdcall DXGetErrorString8W(HRESULT hr);
+
+#ifdef UNICODE
+    #define DXGetErrorString8 DXGetErrorString8W
+#else
+    #define DXGetErrorString8 DXGetErrorString8A
+#endif 
+
+
+//
+//  DXTrace
+//
+//  Desc:  Outputs a formatted error message to the debug stream
+//
+//  Args:  CHAR* strFile   The current file, typically passed in using the 
+//                         __FILE__ macro.
+//         DWORD dwLine    The current line number, typically passed in using the 
+//                         __LINE__ macro.
+//         HRESULT hr      An HRESULT that will be traced to the debug stream.
+//         CHAR* strMsg    A string that will be traced to the debug stream (may be NULL)
+//         BOOL bPopMsgBox If TRUE, then a message box will popup also containing the passed info.
+//
+//  Return: The hr that was passed in.  
+//
+//HRESULT __stdcall DXTraceA( char* strFile, DWORD dwLine, HRESULT hr, char* strMsg, BOOL bPopMsgBox = FALSE );
+//HRESULT __stdcall DXTraceW( char* strFile, DWORD dwLine, HRESULT hr, WCHAR* strMsg, BOOL bPopMsgBox = FALSE );
+HRESULT __stdcall DXTraceA( char* strFile, DWORD dwLine, HRESULT hr, char* strMsg, BOOL bPopMsgBox);
+HRESULT __stdcall DXTraceW( char* strFile, DWORD dwLine, HRESULT hr, WCHAR* strMsg, BOOL bPopMsgBox);
+
+#ifdef UNICODE
+    #define DXTrace DXTraceW
+#else
+    #define DXTrace DXTraceA
+#endif 
+
+
+//
+// Helper macros
+//
+#if defined(DEBUG) | defined(_DEBUG)
+    #define DXTRACE_MSG(str)              DXTrace( __FILE__, (DWORD)__LINE__, 0, str, FALSE )
+    #define DXTRACE_ERR(str,hr)           DXTrace( __FILE__, (DWORD)__LINE__, hr, str, TRUE )
+    #define DXTRACE_ERR_NOMSGBOX(str,hr)  DXTrace( __FILE__, (DWORD)__LINE__, hr, str, FALSE )
+#else
+    #define DXTRACE_MSG(str)              (0L)
+    #define DXTRACE_ERR(str,hr)           (hr)
+    #define DXTRACE_ERR_NOMSGBOX(str,hr)  (hr)
+#endif
+
+
+#endif
+
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_ext_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_ext_dx8.c
new file mode 100644
index 0000000000..b51bba9b3c
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_ext_dx8.c
@@ -0,0 +1,344 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GL extensions
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "../gld_settings.h"
+
+#include <windows.h>
+#define GL_GLEXT_PROTOTYPES
+#include <GL/gl.h>
+#include <GL/glext.h>
+
+//#include "ddlog.h"
+//#include "gld_dx8.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "dglcontext.h"
+#include "extensions.h"
+
+// For some reason this is not defined in an above header...
+extern void _mesa_enable_imaging_extensions(GLcontext *ctx);
+
+//---------------------------------------------------------------------------
+// Hack for the SGIS_multitexture extension that was removed from Mesa
+// NOTE: SGIS_multitexture enums also clash with GL_SGIX_async_pixel
+
+	// NOTE: Quake2 ran *slower* with this enabled, so I've
+	// disabled it for now.
+	// To enable, uncomment:
+	//  _mesa_add_extension(ctx, GL_TRUE, szGL_SGIS_multitexture, 0);
+
+//---------------------------------------------------------------------------
+
+enum {
+	/* Quake2 GL_SGIS_multitexture */
+	GL_SELECTED_TEXTURE_SGIS			= 0x835B,
+	GL_SELECTED_TEXTURE_COORD_SET_SGIS	= 0x835C,
+	GL_MAX_TEXTURES_SGIS				= 0x835D,
+	GL_TEXTURE0_SGIS					= 0x835E,
+	GL_TEXTURE1_SGIS					= 0x835F,
+	GL_TEXTURE2_SGIS					= 0x8360,
+	GL_TEXTURE3_SGIS					= 0x8361,
+	GL_TEXTURE_COORD_SET_SOURCE_SGIS	= 0x8363,
+};
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldSelectTextureSGIS(
+	GLenum target)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glActiveTextureARB(ARB_target);
+}
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldMTexCoord2fSGIS(
+	GLenum target,
+	GLfloat s,
+	GLfloat t)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glMultiTexCoord2fARB(ARB_target, s, t);
+}
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldMTexCoord2fvSGIS(
+	GLenum target,
+	const GLfloat *v)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glMultiTexCoord2fvARB(ARB_target, v);
+}
+
+//---------------------------------------------------------------------------
+// Extensions
+//---------------------------------------------------------------------------
+
+typedef struct {
+	PROC proc;
+	char *name;
+}  GLD_extension;
+
+GLD_extension GLD_extList[] = {
+#ifdef GL_EXT_polygon_offset
+    {	(PROC)glPolygonOffsetEXT,		"glPolygonOffsetEXT"		},
+#endif
+    {	(PROC)glBlendEquationEXT,		"glBlendEquationEXT"		},
+    {	(PROC)glBlendColorEXT,			"glBlendColorExt"			},
+    {	(PROC)glVertexPointerEXT,		"glVertexPointerEXT"		},
+    {	(PROC)glNormalPointerEXT,		"glNormalPointerEXT"		},
+    {	(PROC)glColorPointerEXT,		"glColorPointerEXT"			},
+    {	(PROC)glIndexPointerEXT,		"glIndexPointerEXT"			},
+    {	(PROC)glTexCoordPointerEXT,		"glTexCoordPointer"			},
+    {	(PROC)glEdgeFlagPointerEXT,		"glEdgeFlagPointerEXT"		},
+    {	(PROC)glGetPointervEXT,			"glGetPointervEXT"			},
+    {	(PROC)glArrayElementEXT,		"glArrayElementEXT"			},
+    {	(PROC)glDrawArraysEXT,			"glDrawArrayEXT"			},
+    {	(PROC)glAreTexturesResidentEXT,	"glAreTexturesResidentEXT"	},
+    {	(PROC)glBindTextureEXT,			"glBindTextureEXT"			},
+    {	(PROC)glDeleteTexturesEXT,		"glDeleteTexturesEXT"		},
+    {	(PROC)glGenTexturesEXT,			"glGenTexturesEXT"			},
+    {	(PROC)glIsTextureEXT,			"glIsTextureEXT"			},
+    {	(PROC)glPrioritizeTexturesEXT,	"glPrioritizeTexturesEXT"	},
+    {	(PROC)glCopyTexSubImage3DEXT,	"glCopyTexSubImage3DEXT"	},
+    {	(PROC)glTexImage3DEXT,			"glTexImage3DEXT"			},
+    {	(PROC)glTexSubImage3DEXT,		"glTexSubImage3DEXT"		},
+    {	(PROC)glPointParameterfEXT,		"glPointParameterfEXT"		},
+    {	(PROC)glPointParameterfvEXT,	"glPointParameterfvEXT"		},
+
+    {	(PROC)glLockArraysEXT,			"glLockArraysEXT"			},
+    {	(PROC)glUnlockArraysEXT,		"glUnlockArraysEXT"			},
+	{	NULL,							"\0"						}
+};
+
+GLD_extension GLD_multitexList[] = {
+/*
+    {	(PROC)glMultiTexCoord1dSGIS,		"glMTexCoord1dSGIS"			},
+    {	(PROC)glMultiTexCoord1dvSGIS,		"glMTexCoord1dvSGIS"		},
+    {	(PROC)glMultiTexCoord1fSGIS,		"glMTexCoord1fSGIS"			},
+    {	(PROC)glMultiTexCoord1fvSGIS,		"glMTexCoord1fvSGIS"		},
+    {	(PROC)glMultiTexCoord1iSGIS,		"glMTexCoord1iSGIS"			},
+    {	(PROC)glMultiTexCoord1ivSGIS,		"glMTexCoord1ivSGIS"		},
+    {	(PROC)glMultiTexCoord1sSGIS,		"glMTexCoord1sSGIS"			},
+    {	(PROC)glMultiTexCoord1svSGIS,		"glMTexCoord1svSGIS"		},
+    {	(PROC)glMultiTexCoord2dSGIS,		"glMTexCoord2dSGIS"			},
+    {	(PROC)glMultiTexCoord2dvSGIS,		"glMTexCoord2dvSGIS"		},
+    {	(PROC)glMultiTexCoord2fSGIS,		"glMTexCoord2fSGIS"			},
+    {	(PROC)glMultiTexCoord2fvSGIS,		"glMTexCoord2fvSGIS"		},
+    {	(PROC)glMultiTexCoord2iSGIS,		"glMTexCoord2iSGIS"			},
+    {	(PROC)glMultiTexCoord2ivSGIS,		"glMTexCoord2ivSGIS"		},
+    {	(PROC)glMultiTexCoord2sSGIS,		"glMTexCoord2sSGIS"			},
+    {	(PROC)glMultiTexCoord2svSGIS,		"glMTexCoord2svSGIS"		},
+    {	(PROC)glMultiTexCoord3dSGIS,		"glMTexCoord3dSGIS"			},
+    {	(PROC)glMultiTexCoord3dvSGIS,		"glMTexCoord3dvSGIS"		},
+    {	(PROC)glMultiTexCoord3fSGIS,		"glMTexCoord3fSGIS"			},
+    {	(PROC)glMultiTexCoord3fvSGIS,		"glMTexCoord3fvSGIS"		},
+    {	(PROC)glMultiTexCoord3iSGIS,		"glMTexCoord3iSGIS"			},
+    {	(PROC)glMultiTexCoord3ivSGIS,		"glMTexCoord3ivSGIS"		},
+    {	(PROC)glMultiTexCoord3sSGIS,		"glMTexCoord3sSGIS"			},
+    {	(PROC)glMultiTexCoord3svSGIS,		"glMTexCoord3svSGIS"		},
+    {	(PROC)glMultiTexCoord4dSGIS,		"glMTexCoord4dSGIS"			},
+    {	(PROC)glMultiTexCoord4dvSGIS,		"glMTexCoord4dvSGIS"		},
+    {	(PROC)glMultiTexCoord4fSGIS,		"glMTexCoord4fSGIS"			},
+    {	(PROC)glMultiTexCoord4fvSGIS,		"glMTexCoord4fvSGIS"		},
+    {	(PROC)glMultiTexCoord4iSGIS,		"glMTexCoord4iSGIS"			},
+    {	(PROC)glMultiTexCoord4ivSGIS,		"glMTexCoord4ivSGIS"		},
+    {	(PROC)glMultiTexCoord4sSGIS,		"glMTexCoord4sSGIS"			},
+    {	(PROC)glMultiTexCoord4svSGIS,		"glMTexCoord4svSGIS"		},
+    {	(PROC)glMultiTexCoordPointerSGIS,	"glMTexCoordPointerSGIS"	},
+    {	(PROC)glSelectTextureSGIS,			"glSelectTextureSGIS"			},
+    {	(PROC)glSelectTextureCoordSetSGIS,	"glSelectTextureCoordSetSGIS"	},
+*/
+    {	(PROC)glActiveTextureARB,		"glActiveTextureARB"		},
+    {	(PROC)glClientActiveTextureARB,	"glClientActiveTextureARB"	},
+    {	(PROC)glMultiTexCoord1dARB,		"glMultiTexCoord1dARB"		},
+    {	(PROC)glMultiTexCoord1dvARB,	"glMultiTexCoord1dvARB"		},
+    {	(PROC)glMultiTexCoord1fARB,		"glMultiTexCoord1fARB"		},
+    {	(PROC)glMultiTexCoord1fvARB,	"glMultiTexCoord1fvARB"		},
+    {	(PROC)glMultiTexCoord1iARB,		"glMultiTexCoord1iARB"		},
+    {	(PROC)glMultiTexCoord1ivARB,	"glMultiTexCoord1ivARB"		},
+    {	(PROC)glMultiTexCoord1sARB,		"glMultiTexCoord1sARB"		},
+    {	(PROC)glMultiTexCoord1svARB,	"glMultiTexCoord1svARB"		},
+    {	(PROC)glMultiTexCoord2dARB,		"glMultiTexCoord2dARB"		},
+    {	(PROC)glMultiTexCoord2dvARB,	"glMultiTexCoord2dvARB"		},
+    {	(PROC)glMultiTexCoord2fARB,		"glMultiTexCoord2fARB"		},
+    {	(PROC)glMultiTexCoord2fvARB,	"glMultiTexCoord2fvARB"		},
+    {	(PROC)glMultiTexCoord2iARB,		"glMultiTexCoord2iARB"		},
+    {	(PROC)glMultiTexCoord2ivARB,	"glMultiTexCoord2ivARB"		},
+    {	(PROC)glMultiTexCoord2sARB,		"glMultiTexCoord2sARB"		},
+    {	(PROC)glMultiTexCoord2svARB,	"glMultiTexCoord2svARB"		},
+    {	(PROC)glMultiTexCoord3dARB,		"glMultiTexCoord3dARB"		},
+    {	(PROC)glMultiTexCoord3dvARB,	"glMultiTexCoord3dvARB"		},
+    {	(PROC)glMultiTexCoord3fARB,		"glMultiTexCoord3fARB"		},
+    {	(PROC)glMultiTexCoord3fvARB,	"glMultiTexCoord3fvARB"		},
+    {	(PROC)glMultiTexCoord3iARB,		"glMultiTexCoord3iARB"		},
+    {	(PROC)glMultiTexCoord3ivARB,	"glMultiTexCoord3ivARB"		},
+    {	(PROC)glMultiTexCoord3sARB,		"glMultiTexCoord3sARB"		},
+    {	(PROC)glMultiTexCoord3svARB,	"glMultiTexCoord3svARB"		},
+    {	(PROC)glMultiTexCoord4dARB,		"glMultiTexCoord4dARB"		},
+    {	(PROC)glMultiTexCoord4dvARB,	"glMultiTexCoord4dvARB"		},
+    {	(PROC)glMultiTexCoord4fARB,		"glMultiTexCoord4fARB"		},
+    {	(PROC)glMultiTexCoord4fvARB,	"glMultiTexCoord4fvARB"		},
+    {	(PROC)glMultiTexCoord4iARB,		"glMultiTexCoord4iARB"		},
+    {	(PROC)glMultiTexCoord4ivARB,	"glMultiTexCoord4ivARB"		},
+    {	(PROC)glMultiTexCoord4sARB,		"glMultiTexCoord4sARB"		},
+    {	(PROC)glMultiTexCoord4svARB,	"glMultiTexCoord4svARB"		},
+
+	// Descent3 doesn't use correct string, hence this hack
+    {	(PROC)glMultiTexCoord4fARB,		"glMultiTexCoord4f"			},
+
+	// Quake2 SGIS multitexture
+    {	(PROC)gldSelectTextureSGIS,		"glSelectTextureSGIS"		},
+    {	(PROC)gldMTexCoord2fSGIS,		"glMTexCoord2fSGIS"			},
+    {	(PROC)gldMTexCoord2fvSGIS,		"glMTexCoord2fvSGIS"		},
+
+	{	NULL,							"\0"						}
+};
+
+//---------------------------------------------------------------------------
+
+PROC gldGetProcAddress_DX(
+	LPCSTR a)
+{
+	int		i;
+	PROC	proc = NULL;
+
+	for (i=0; GLD_extList[i].proc; i++) {
+		if (!strcmp(a, GLD_extList[i].name)) {
+			proc = GLD_extList[i].proc;
+			break;
+		}
+	}
+
+	if (glb.bMultitexture) {
+		for (i=0; GLD_multitexList[i].proc; i++) {
+			if (!strcmp(a, GLD_multitexList[i].name)) {
+				proc = GLD_multitexList[i].proc;
+				break;
+			}
+		}
+	}
+
+	gldLogPrintf(GLDLOG_INFO, "GetProcAddress: %s (%s)", a, proc ? "OK" : "Failed");
+
+	return proc;
+}
+
+//---------------------------------------------------------------------------
+
+void gldEnableExtensions_DX8(
+	GLcontext *ctx)
+{
+	GLuint i;
+
+	// Mesa enables some extensions by default.
+	// This table decides which ones we want to switch off again.
+
+	// NOTE: GL_EXT_compiled_vertex_array appears broken.
+
+	const char *gld_disable_extensions[] = {
+//		"GL_ARB_transpose_matrix",
+//		"GL_EXT_compiled_vertex_array",
+//		"GL_EXT_polygon_offset",
+//		"GL_EXT_rescale_normal",
+		"GL_EXT_texture3D",
+//		"GL_NV_texgen_reflection",
+		NULL
+	};
+
+	const char *gld_multitex_extensions[] = {
+		"GL_ARB_multitexture",		// Quake 3
+		NULL
+	};
+
+	// Quake 2 engines
+	const char *szGL_SGIS_multitexture = "GL_SGIS_multitexture";
+
+	const char *gld_enable_extensions[] = {
+		"GL_EXT_texture_env_add",	// Quake 3
+		"GL_ARB_texture_env_add",	// Quake 3
+		NULL
+	};
+	
+	for (i=0; gld_disable_extensions[i]; i++) {
+		_mesa_disable_extension(ctx, gld_disable_extensions[i]);
+	}
+	
+	for (i=0; gld_enable_extensions[i]; i++) {
+		_mesa_enable_extension(ctx, gld_enable_extensions[i]);
+	}
+
+	if (glb.bMultitexture) {	
+		for (i=0; gld_multitex_extensions[i]; i++) {
+			_mesa_enable_extension(ctx, gld_multitex_extensions[i]);
+		}
+
+		// GL_SGIS_multitexture
+		// NOTE: Quake2 ran *slower* with this enabled, so I've
+		// disabled it for now.
+		// Fair bit slower on GeForce256,
+		// Much slower on 3dfx Voodoo5 5500.
+//		_mesa_add_extension(ctx, GL_TRUE, szGL_SGIS_multitexture, 0);
+
+	}
+
+	_mesa_enable_imaging_extensions(ctx);
+	_mesa_enable_1_3_extensions(ctx);
+	_mesa_enable_1_4_extensions(ctx);
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_pipeline_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_pipeline_dx8.c
new file mode 100644
index 0000000000..2baea57443
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_pipeline_dx8.c
@@ -0,0 +1,77 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Mesa transformation pipeline with GLDirect fastpath
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx8.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+//---------------------------------------------------------------------------
+
+extern struct tnl_pipeline_stage _gld_d3d_render_stage;
+extern struct tnl_pipeline_stage _gld_mesa_render_stage;
+
+static const struct tnl_pipeline_stage *gld_pipeline[] = {
+	&_gld_d3d_render_stage,			// Direct3D TnL
+	&_tnl_vertex_transform_stage,
+	&_tnl_normal_transform_stage,
+	&_tnl_lighting_stage,
+	&_tnl_fog_coordinate_stage,	/* TODO: Omit fog stage. ??? */
+	&_tnl_texgen_stage,
+	&_tnl_texture_transform_stage,
+	&_tnl_point_attenuation_stage,
+	&_gld_mesa_render_stage,		// Mesa TnL, D3D rendering
+	0,
+};
+
+//---------------------------------------------------------------------------
+
+void gldInstallPipeline_DX8(
+	GLcontext *ctx)
+{
+	// Remove any existing pipeline	stages,
+	// then install GLDirect pipeline stages.
+
+	_tnl_destroy_pipeline(ctx);
+	_tnl_install_pipeline(ctx, gld_pipeline);
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c
new file mode 100644
index 0000000000..990922580a
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c
@@ -0,0 +1,1446 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Primitive (points/lines/tris/quads) rendering
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx8.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "swrast/s_context.h"
+#include "swrast/s_depth.h"
+#include "swrast/s_lines.h"
+#include "swrast/s_triangle.h"
+#include "swrast/s_trispan.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+// Disable compiler complaints about unreferenced local variables
+#pragma warning (disable:4101)
+
+//---------------------------------------------------------------------------
+// Helper defines for primitives
+//---------------------------------------------------------------------------
+
+//static const float ooZ		= 1.0f / 65536.0f; // One over Z
+
+#define GLD_COLOUR (D3DCOLOR_RGBA(swv->color[0], swv->color[1], swv->color[2], swv->color[3]))
+#define GLD_SPECULAR (D3DCOLOR_RGBA(swv->specular[0], swv->specular[1], swv->specular[2], swv->specular[3]))
+#define GLD_FLIP_Y(y) (gldCtx->dwHeight - (y))
+
+//---------------------------------------------------------------------------
+// 2D vertex setup
+//---------------------------------------------------------------------------
+
+#define GLD_SETUP_2D_VARS_POINTS							\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pPoints;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour
+
+#define GLD_SETUP_2D_VARS_LINES								\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pLines;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour
+
+#define GLD_SETUP_2D_VARS_TRIANGLES							\
+	BOOL			bFog = ctx->Fog.Enabled;				\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pTriangles;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour;							\
+	GLuint					facing = 0;						\
+	struct vertex_buffer	*VB;							\
+	GLchan					(*vbcolor)[4];					\
+	GLchan					(*vbspec)[4]
+
+#define GLD_SETUP_GET_SWVERT(s)					\
+	swv = &ss->verts[##s]
+
+#define GLD_SETUP_2D_VERTEX						\
+	pV->x			= swv->win[0];				\
+	pV->y			= GLD_FLIP_Y(swv->win[1]);	\
+	pV->rhw			= swv->win[3]
+
+#define GLD_SETUP_SMOOTH_COLOUR					\
+	pV->diffuse		= GLD_COLOUR
+
+#define GLD_SETUP_GET_FLAT_COLOUR				\
+	dwFlatColour	= GLD_COLOUR
+#define GLD_SETUP_GET_FLAT_FOG_COLOUR			\
+	dwFlatColour	= _gldComputeFog(ctx, swv)
+
+#define GLD_SETUP_USE_FLAT_COLOUR				\
+	pV->diffuse		= dwFlatColour
+
+#define GLD_SETUP_GET_FLAT_SPECULAR				\
+	dwSpecularColour= GLD_SPECULAR
+
+#define GLD_SETUP_USE_FLAT_SPECULAR				\
+	pV->specular	= dwSpecularColour
+
+#define GLD_SETUP_DEPTH							\
+	pV->sz			= swv->win[2] / ctx->DepthMaxF
+//	pV->z			= swv->win[2] * ooZ;
+
+#define GLD_SETUP_SPECULAR						\
+	pV->specular	= GLD_SPECULAR
+
+#define GLD_SETUP_FOG							\
+	pV->diffuse		= _gldComputeFog(ctx, swv)
+
+#define GLD_SETUP_TEX0							\
+	pV->t0_u		= swv->texcoord[0][0];		\
+	pV->t0_v		= swv->texcoord[0][1]
+
+#define GLD_SETUP_TEX1							\
+	pV->t1_u		= swv->texcoord[1][0];		\
+	pV->t1_v		= swv->texcoord[1][1]
+
+#define GLD_SETUP_LIGHTING(v)			\
+	if (facing == 1) {					\
+		pV->diffuse	= D3DCOLOR_RGBA(vbcolor[##v][0], vbcolor[##v][1], vbcolor[##v][2], vbcolor[##v][3]);	\
+		if (vbspec) {																					\
+			pV->specular = D3DCOLOR_RGBA(vbspec[##v][0], vbspec[##v][1], vbspec[##v][2], vbspec[##v][3]);	\
+		}	\
+	} else {	\
+		if (bFog)						\
+			GLD_SETUP_FOG;				\
+		else							\
+			GLD_SETUP_SMOOTH_COLOUR;	\
+		GLD_SETUP_SPECULAR;				\
+	}
+
+#define GLD_SETUP_GET_FLAT_LIGHTING(v)	\
+	if (facing == 1) {					\
+		dwFlatColour = D3DCOLOR_RGBA(vbcolor[##v][0], vbcolor[##v][1], vbcolor[##v][2], vbcolor[##v][3]);	\
+		if (vbspec) {																					\
+			dwSpecularColour = D3DCOLOR_RGBA(vbspec[##v][0], vbspec[##v][1], vbspec[##v][2], vbspec[##v][3]);	\
+		}	\
+	}
+
+#define GLD_SETUP_TWOSIDED_LIGHTING		\
+	/* Two-sided lighting */				\
+	if (ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) {	\
+		SWvertex	*verts = SWSETUP_CONTEXT(ctx)->verts;	\
+		SWvertex	*v[3];									\
+		GLfloat		ex,ey,fx,fy,cc;							\
+		/* Get vars for later */							\
+		VB		= &TNL_CONTEXT(ctx)->vb;					\
+		vbcolor	= (GLchan (*)[4])VB->BackfaceColorPtr->data;	\
+		if (VB->BackfaceSecondaryColorPtr) {			\
+			vbspec = (GLchan (*)[4])VB->BackfaceSecondaryColorPtr->data;	\
+		} else {													\
+			vbspec = NULL;											\
+		}															\
+		v[0] = &verts[v0];											\
+		v[1] = &verts[v1];											\
+		v[2] = &verts[v2];											\
+		ex = v[0]->win[0] - v[2]->win[0];	\
+		ey = v[0]->win[1] - v[2]->win[1];	\
+		fx = v[1]->win[0] - v[2]->win[0];	\
+		fy = v[1]->win[1] - v[2]->win[1];	\
+		cc  = ex*fy - ey*fx;				\
+		facing = (cc < 0.0) ^ ctx->Polygon._FrontBit;	\
+	}
+
+//---------------------------------------------------------------------------
+// 3D vertex setup
+//---------------------------------------------------------------------------
+
+#define GLD_SETUP_3D_VARS_POINTS											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pPoints;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VARS_LINES											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pLines;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VARS_TRIANGLES											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx8	*gld	= GLD_GET_DX8_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pTriangles;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VERTEX(v)					\
+	p4f = VB->AttribPtr[_TNL_ATTRIB_POS]->data;		\
+	pV->Position.x	= p4f[##v][0];				\
+	pV->Position.y	= p4f[##v][1];				\
+	pV->Position.z	= p4f[##v][2];
+
+#define GLD_SETUP_SMOOTH_COLOUR_3D(v)															\
+	p4f = (GLfloat (*)[4])VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;	\
+	pV->Diffuse	= D3DCOLOR_COLORVALUE(p4f[##v][0], p4f[##v][1], p4f[##v][2], p4f[##v][3]);
+
+
+#define GLD_SETUP_GET_FLAT_COLOUR_3D(v)													\
+	p4f = (GLfloat (*)[4])VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;	\
+	dwColor	= D3DCOLOR_COLORVALUE(p4f[##v][0], p4f[##v][1], p4f[##v][2], p4f[##v][3]);
+
+#define GLD_SETUP_USE_FLAT_COLOUR_3D			\
+	pV->Diffuse = dwColor;
+
+#define GLD_SETUP_TEX0_3D(v)						\
+	if (VB->AttribPtr[_TNL_ATTRIB_TEX0]) {				\
+		tc = VB->TnlAttribPtr[_TNL_ATTRIB_TEX0]->data;		\
+		pV->TexUnit0.x	= tc[##v][0];				\
+		pV->TexUnit0.y	= tc[##v][1];				\
+	}
+
+#define GLD_SETUP_TEX1_3D(v)						\
+	if (VB->TnlAttribPtr[_TNL_ATTRIB_TEX1]) {			\
+		tc = VB->TnlAttribPtr[_TNL_ATTRIB_TEX1]->data;		\
+		pV->TexUnit1.x	= tc[##v][0];				\
+		pV->TexUnit1.y	= tc[##v][1];				\
+	}
+
+//---------------------------------------------------------------------------
+// Helper functions
+//---------------------------------------------------------------------------
+
+__inline DWORD _gldComputeFog(
+	GLcontext *ctx,
+	SWvertex *swv)
+{
+	// Full fog calculation.
+	// Based on Mesa code.
+
+	GLchan			rFog, gFog, bFog;
+	GLchan			fR, fG, fB;
+	const GLfloat	f = swv->fog;
+	const GLfloat	g = 1.0f - f;
+	
+	UNCLAMPED_FLOAT_TO_CHAN(rFog, ctx->Fog.Color[RCOMP]);
+	UNCLAMPED_FLOAT_TO_CHAN(gFog, ctx->Fog.Color[GCOMP]);
+	UNCLAMPED_FLOAT_TO_CHAN(bFog, ctx->Fog.Color[BCOMP]);
+	fR = f * swv->color[0] + g * rFog;
+	fG = f * swv->color[1] + g * gFog;
+	fB = f * swv->color[2] + g * bFog;
+	return D3DCOLOR_RGBA(fR, fG, fB, swv->color[3]);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_ResetLineStipple_DX8(
+	GLcontext *ctx)
+{
+	// TODO: Fake stipple with a 32x32 texture.
+}
+
+//---------------------------------------------------------------------------
+// 2D (post-transformed) primitives
+//---------------------------------------------------------------------------
+
+void gld_Points2D_DX8(
+	GLcontext *ctx,
+	GLuint first,
+	GLuint last)
+{
+	GLD_SETUP_2D_VARS_POINTS;
+
+	unsigned				i;
+	struct vertex_buffer	*VB = &TNL_CONTEXT(ctx)->vb;
+
+	// _Size is already clamped to MaxPointSize and MinPointSize
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_POINTSIZE, *((DWORD*)&ctx->Point._Size));
+
+	if (VB->Elts) {
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[VB->Elts[i]] == 0) {
+//				_swrast_Point( ctx, &verts[VB->Elts[i]] );
+				GLD_SETUP_GET_SWVERT(VB->Elts[i]);
+				GLD_SETUP_2D_VERTEX;
+				GLD_SETUP_SMOOTH_COLOUR;
+				GLD_SETUP_DEPTH;
+				GLD_SETUP_SPECULAR;
+				GLD_SETUP_TEX0;
+				GLD_SETUP_TEX1;
+			}
+		}
+	} else {
+		GLD_SETUP_GET_SWVERT(first);
+		for (i=first; i<last; i++, swv++, pV++) {
+			if (VB->ClipMask[i] == 0) {
+//				_swrast_Point( ctx, &verts[i] );
+				GLD_SETUP_2D_VERTEX;
+				GLD_SETUP_SMOOTH_COLOUR;
+				GLD_SETUP_DEPTH;
+				GLD_SETUP_SPECULAR;
+				GLD_SETUP_TEX0;
+				GLD_SETUP_TEX1;
+			}
+		}
+	}
+
+	gld->PB2d.pPoints = (BYTE*)pV;
+	gld->PB2d.nPoints += (last-first);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DFlat_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_2D_VARS_LINES;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pLines = (BYTE*)pV;
+	gld->PB2d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DSmooth_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_2D_VARS_LINES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_SPECULAR;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_SPECULAR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pLines = (BYTE*)pV;
+	gld->PB2d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlat_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmooth_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlatExtras_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v2);
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	if (bFog)
+		GLD_SETUP_GET_FLAT_FOG_COLOUR;
+	else
+		GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_GET_FLAT_LIGHTING(v2);
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmoothExtras_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v0);
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v1);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlat_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmooth_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlatExtras_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v3);
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	if (bFog)
+		GLD_SETUP_GET_FLAT_FOG_COLOUR;
+	else
+		GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_GET_FLAT_LIGHTING(v3);
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmoothExtras_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v0);
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v1);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v3);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+// 3D (pre-transformed) primitives
+//---------------------------------------------------------------------------
+
+void gld_Points3D_DX8(
+	GLcontext *ctx,
+	GLuint first,
+	GLuint last)
+{
+	GLD_SETUP_3D_VARS_POINTS
+
+	unsigned				i;
+//	struct vertex_buffer	*VB = &TNL_CONTEXT(ctx)->vb;
+
+	// _Size is already clamped to MaxPointSize and MinPointSize
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_POINTSIZE, *((DWORD*)&ctx->Point._Size));
+
+	if (VB->Elts) {
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[VB->Elts[i]] == 0) {
+//				_swrast_Point( ctx, &verts[VB->Elts[i]] );
+//				GLD_SETUP_GET_SWVERT(VB->Elts[i]);
+				GLD_SETUP_3D_VERTEX(VB->Elts[i])
+				GLD_SETUP_SMOOTH_COLOUR_3D(i)
+				GLD_SETUP_TEX0_3D(i)
+				GLD_SETUP_TEX1_3D(i)
+			}
+		}
+	} else {
+//		GLD_SETUP_GET_SWVERT(first);
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[i] == 0) {
+//				_swrast_Point( ctx, &verts[i] );
+				GLD_SETUP_3D_VERTEX(i)
+				GLD_SETUP_SMOOTH_COLOUR_3D(i)
+				GLD_SETUP_TEX0_3D(i)
+				GLD_SETUP_TEX1_3D(i)
+			}
+		}
+	}
+/*
+	for (i=first; i<last; i++, pV++) {
+		GLD_SETUP_3D_VERTEX(i)
+		GLD_SETUP_SMOOTH_COLOUR_3D(i)
+		GLD_SETUP_TEX0_3D(i)
+		GLD_SETUP_TEX1_3D(i)
+	}
+*/
+	gld->PB3d.pPoints = (BYTE*)pV;
+	gld->PB3d.nPoints += (last-first);
+}
+
+//---------------------------------------------------------------------------
+// Line functions
+//---------------------------------------------------------------------------
+
+void gld_Line3DFlat_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_3D_VARS_LINES
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pLines = (BYTE*)pV;
+	gld->PB3d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line3DSmooth_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_3D_VARS_LINES
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pLines = (BYTE*)pV;
+	gld->PB3d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+// Triangle functions
+//---------------------------------------------------------------------------
+
+void gld_Triangle3DFlat_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v2)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle3DSmooth_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+// Quad functions
+//---------------------------------------------------------------------------
+
+void gld_Quad3DFlat_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v3)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad3DSmooth_DX8(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v3)
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+// Vertex setup for two-sided-lighting vertex shader
+//---------------------------------------------------------------------------
+
+/*
+
+void gld_Points2DTwoside_DX8(GLcontext *ctx, GLuint first, GLuint last)
+{
+	// NOTE: Two-sided lighting does not apply to Points
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1)
+{
+	// NOTE: Two-sided lighting does not apply to Lines
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1)
+{
+	// NOTE: Two-sided lighting does not apply to Lines
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2)
+{
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlatTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 4th vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 5th vert
+	swv = &ss->verts[v3];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 6th vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmoothTwoside_DX8(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 4th vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 5th vert
+	swv = &ss->verts[v3];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 6th vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+*/
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_texture_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_texture_dx8.c
new file mode 100644
index 0000000000..f24b3cfb74
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_texture_dx8.c
@@ -0,0 +1,2046 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Texture / Bitmap functions
+*
+****************************************************************************/
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx8.h"
+
+#include <d3dx8tex.h>
+
+#include "texformat.h"
+#include "colormac.h"
+#include "texstore.h"
+#include "image.h"
+// #include "mem.h"
+
+//---------------------------------------------------------------------------
+
+#define GLD_FLIP_HEIGHT(y,h) (gldCtx->dwHeight - (y) - (h))
+
+//---------------------------------------------------------------------------
+// 1D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	((GLchan *)(t)->Data + (i) * (sz))
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + (i) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + (i))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + (i))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// 2D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	((GLchan *)(t)->Data + ((t)->Width * (j) + (i)) * (sz))
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + ((t)->Width * (j) + (i)) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + ((t)->Width * (j) + (i)))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + ((t)->Width * (j) + (i)))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// 3D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	(GLchan *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				(t)->Width + (i)) * (sz)
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				 (t)->Width + (i)) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				  (t)->Width + (i)))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				  (t)->Width + (i)))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// Direct3D texture formats that have no Mesa equivalent
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format _gld_texformat_X8R8G8B8 = {
+   MESA_FORMAT_ARGB8888,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   8,					/* RedBits */
+   8,					/* GreenBits */
+   8,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   4,					/* TexelBytes */
+   _mesa_texstore_argb8888,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X8R8G8B8,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X8R8G8B8,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X8R8G8B8,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X8R8G8B8,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X8R8G8B8,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X8R8G8B8,		/* FetchTexel3Df */
+};
+
+const struct gl_texture_format _gld_texformat_X1R5G5B5 = {
+   MESA_FORMAT_ARGB1555,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   5,					/* RedBits */
+   5,					/* GreenBits */
+   5,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   2,					/* TexelBytes */
+   _mesa_texstore_argb1555,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X1R5G5B5,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X1R5G5B5,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X1R5G5B5,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X1R5G5B5,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X1R5G5B5,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X1R5G5B5,		/* FetchTexel3Df */
+};
+
+const struct gl_texture_format _gld_texformat_X4R4G4B4 = {
+   MESA_FORMAT_ARGB4444,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   4,					/* RedBits */
+   4,					/* GreenBits */
+   4,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   2,					/* TexelBytes */
+   _mesa_texstore_argb4444,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X4R4G4B4,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X4R4G4B4,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X4R4G4B4,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X4R4G4B4,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X4R4G4B4,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X4R4G4B4,		/* FetchTexel3Df */
+};
+
+//---------------------------------------------------------------------------
+// Texture unit constants
+//---------------------------------------------------------------------------
+
+// List of possible combinations of texture environments.
+// Example: GLD_TEXENV_MODULATE_RGBA means 
+//          GL_MODULATE, GL_RGBA base internal format.
+#define GLD_TEXENV_DECAL_RGB		0
+#define GLD_TEXENV_DECAL_RGBA		1
+#define GLD_TEXENV_DECAL_ALPHA		2
+#define GLD_TEXENV_REPLACE_RGB		3
+#define GLD_TEXENV_REPLACE_RGBA		4
+#define GLD_TEXENV_REPLACE_ALPHA	5
+#define GLD_TEXENV_MODULATE_RGB		6
+#define GLD_TEXENV_MODULATE_RGBA	7
+#define GLD_TEXENV_MODULATE_ALPHA	8
+#define GLD_TEXENV_BLEND_RGB		9
+#define GLD_TEXENV_BLEND_RGBA		10
+#define GLD_TEXENV_BLEND_ALPHA		11
+#define GLD_TEXENV_ADD_RGB			12
+#define GLD_TEXENV_ADD_RGBA			13
+#define GLD_TEXENV_ADD_ALPHA		14
+
+// Per-stage (i.e. per-unit) texture environment
+typedef struct {
+	DWORD			ColorArg1;	// Colour argument 1
+	D3DTEXTUREOP	ColorOp;	// Colour operation
+	DWORD			ColorArg2;	// Colour argument 2
+	DWORD			AlphaArg1;	// Alpha argument 1
+	D3DTEXTUREOP	AlphaOp;	// Alpha operation
+	DWORD			AlphaArg2;	// Alpha argument 2
+} GLD_texenv;
+
+// TODO: Do we really need to set ARG1 and ARG2 every time?
+//       They seem to always be TEXTURE and CURRENT respectively.
+
+// C = Colour out
+// A = Alpha out
+// Ct = Colour from Texture
+// Cf = Colour from fragment (diffuse)
+// At = Alpha from Texture
+// Af = Alpha from fragment (diffuse)
+// Cc = GL_TEXTURE_ENV_COLOUR (GL_BLEND)
+const GLD_texenv gldTexEnv[] = {
+	// DECAL_RGB: C=Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// DECAL_RGBA: C=Cf(1-At)+CtAt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_BLENDTEXTUREALPHA, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// DECAL_ALPHA: <undefined> use DECAL_RGB
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+
+	// REPLACE_RGB: C=Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// REPLACE_RGBA: C=Ct, A=At
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT},
+	// REPLACE_ALPHA: C=Cf, A=At
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT},
+
+	// MODULATE_RGB: C=CfCt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// MODULATE_RGBA: C=CfCt, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// MODULATE_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+
+	// BLEND_RGB: C=Cf(1-Ct)+CcCt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_LERP, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// BLEND_RGBA: C=Cf(1-Ct)+CcCt, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_LERP, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// BLEND_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+
+	// ADD_RGB: C=Cf+Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_ADD, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// ADD_RGBA: C=Cf+Ct, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_ADD, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// ADD_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+};
+
+//---------------------------------------------------------------------------
+
+D3DTEXTUREADDRESS _gldConvertWrap(
+	GLenum wrap)
+{
+	return (wrap == GL_CLAMP) ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP;
+}
+
+//---------------------------------------------------------------------------
+
+D3DTEXTUREFILTERTYPE _gldConvertMagFilter(
+	GLenum magfilter)
+{
+	return (magfilter == GL_LINEAR) ? D3DTEXF_LINEAR : D3DTEXF_POINT;
+}
+
+//---------------------------------------------------------------------------
+
+void _gldConvertMinFilter(
+	GLenum minfilter,
+	D3DTEXTUREFILTERTYPE *min_filter,
+	D3DTEXTUREFILTERTYPE *mip_filter)
+{
+	switch (minfilter) {
+	case GL_NEAREST:
+		*min_filter = D3DTEXF_POINT;
+		*mip_filter = D3DTEXF_NONE;
+		break;
+	case GL_LINEAR:
+		*min_filter = D3DTEXF_LINEAR;
+		*mip_filter = D3DTEXF_NONE;
+		break;
+	case GL_NEAREST_MIPMAP_NEAREST:
+		*min_filter = D3DTEXF_POINT;
+		*mip_filter = D3DTEXF_POINT;
+		break;
+	case GL_LINEAR_MIPMAP_NEAREST:
+		*min_filter = D3DTEXF_LINEAR;
+		*mip_filter = D3DTEXF_POINT;
+		break;
+	case GL_NEAREST_MIPMAP_LINEAR:
+		*min_filter = D3DTEXF_POINT;
+		*mip_filter = D3DTEXF_LINEAR;
+		break;
+	case GL_LINEAR_MIPMAP_LINEAR:
+		*min_filter = D3DTEXF_LINEAR;
+		*mip_filter = D3DTEXF_LINEAR;
+		break;
+	}
+}
+
+//---------------------------------------------------------------------------
+
+D3DFORMAT _gldGLFormatToD3DFormat(
+	GLenum internalFormat)
+{
+	switch (internalFormat) {
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+		// LUNIMANCE != INTENSITY, but D3D doesn't have I8 textures
+		return D3DFMT_L8;
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+		return D3DFMT_L8;
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+		return D3DFMT_A8;
+	case GL_COLOR_INDEX:
+	case GL_COLOR_INDEX1_EXT:
+	case GL_COLOR_INDEX2_EXT:
+	case GL_COLOR_INDEX4_EXT:
+	case GL_COLOR_INDEX8_EXT:
+	case GL_COLOR_INDEX12_EXT:
+	case GL_COLOR_INDEX16_EXT:
+		return D3DFMT_X8R8G8B8;
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+		return D3DFMT_A8L8;
+	case GL_R3_G3_B2:
+		// TODO: Mesa does not support RGB332 internally
+		return D3DFMT_X4R4G4B4; //D3DFMT_R3G3B2;
+	case GL_RGB4:
+		return D3DFMT_X4R4G4B4;
+	case GL_RGB5:
+		return D3DFMT_X1R5G5B5;
+	case 3:
+	case GL_RGB:
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return D3DFMT_R8G8B8;
+	case GL_RGBA4:
+		return D3DFMT_A4R4G4B4;
+	case 4:
+	case GL_RGBA:
+	case GL_RGBA2:
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return D3DFMT_A8R8G8B8;
+	case GL_RGB5_A1:
+		return D3DFMT_A1R5G5B5;
+	}
+
+	// Return an acceptable default
+	return D3DFMT_A8R8G8B8;
+}
+
+//---------------------------------------------------------------------------
+
+GLenum _gldDecodeBaseFormat(
+	IDirect3DTexture8 *pTex)
+{
+	// Examine Direct3D texture and return base OpenGL internal texture format
+	// NOTE: We can't use any base format info from Mesa because D3D might have
+	// used a different texture format when we used D3DXCreateTexture().
+
+	// Base internal format is one of (Red Book p355):
+	//	GL_ALPHA, 
+	//	GL_LUMINANCE, 
+	//	GL_LUMINANCE_ALPHA, 
+	//	GL_INTENSITY, 
+	//	GL_RGB, 
+	//	GL_RGBA
+
+	// NOTE: INTENSITY not used (not supported by Direct3D)
+	//       LUMINANCE has same texture functions as RGB
+	//       LUMINANCE_ALPHA has same texture functions as RGBA
+
+	// TODO: cache format instead of using GetLevelDesc()
+	D3DSURFACE_DESC desc;
+	_GLD_DX8_TEX(GetLevelDesc(pTex, 0, &desc));
+
+	switch (desc.Format) {
+    case D3DFMT_R8G8B8:
+    case D3DFMT_X8R8G8B8:
+    case D3DFMT_R5G6B5:
+    case D3DFMT_X1R5G5B5:
+    case D3DFMT_R3G3B2:
+    case D3DFMT_X4R4G4B4:
+    case D3DFMT_P8:
+    case D3DFMT_L8:
+		return GL_RGB;
+    case D3DFMT_A8R8G8B8:
+    case D3DFMT_A1R5G5B5:
+    case D3DFMT_A4R4G4B4:
+    case D3DFMT_A8R3G3B2:
+    case D3DFMT_A8P8:
+    case D3DFMT_A8L8:
+    case D3DFMT_A4L4:
+		return GL_RGBA;
+    case D3DFMT_A8:
+		return GL_ALPHA;
+	// Compressed texture formats. Need to check these...
+    case D3DFMT_DXT1:
+		return GL_RGBA;
+    case D3DFMT_DXT2:
+		return GL_RGB;
+    case D3DFMT_DXT3:
+		return GL_RGBA;
+    case D3DFMT_DXT4:
+		return GL_RGB;
+    case D3DFMT_DXT5:
+		return GL_RGBA;
+	}
+
+	// Fell through. Return arbitary default.
+	return GL_RGBA;
+}
+
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format* _gldMesaFormatForD3DFormat(
+	D3DFORMAT d3dfmt)
+{
+	switch (d3dfmt) {
+	case D3DFMT_A8R8G8B8:
+		return &_mesa_texformat_argb8888;
+	case D3DFMT_R8G8B8:
+		return &_mesa_texformat_rgb888;
+	case D3DFMT_R5G6B5:
+		return &_mesa_texformat_rgb565;
+	case D3DFMT_A4R4G4B4:
+		return &_mesa_texformat_argb4444;
+	case D3DFMT_A1R5G5B5:
+		return &_mesa_texformat_argb1555;
+	case D3DFMT_A8L8:
+		return &_mesa_texformat_al88;
+	case D3DFMT_R3G3B2:
+		return &_mesa_texformat_rgb332;
+	case D3DFMT_A8:
+		return &_mesa_texformat_a8;
+	case D3DFMT_L8:
+		return &_mesa_texformat_l8;
+	case D3DFMT_X8R8G8B8:
+		return &_gld_texformat_X8R8G8B8;
+	case D3DFMT_X1R5G5B5:
+		return &_gld_texformat_X1R5G5B5;
+	case D3DFMT_X4R4G4B4:
+		return &_gld_texformat_X4R4G4B4;
+	}
+
+	// If we reach here then we've made an error somewhere else
+	// by allowing a format that is not supported.
+	assert(0);
+
+	return NULL; // Shut up compiler warning
+}
+
+//---------------------------------------------------------------------------
+// Copy* functions
+//---------------------------------------------------------------------------
+
+void gldCopyTexImage1D_DX8(
+	GLcontext *ctx,
+	GLenum target, GLint level,
+	GLenum internalFormat,
+	GLint x, GLint y,
+	GLsizei width, GLint border )
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexImage2D_DX8(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLenum internalFormat,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height,
+	GLint border)
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage1D_DX8(
+	GLcontext *ctx,
+	GLenum target, GLint level,
+	GLint xoffset, GLint x, GLint y, GLsizei width )
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage2D_DX8(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint xoffset,
+	GLint yoffset,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height)
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage3D_DX8(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint xoffset,
+	GLint yoffset,
+	GLint zoffset,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height )
+{
+	// TODO ?
+}
+
+//---------------------------------------------------------------------------
+// Bitmap/Pixel functions
+//---------------------------------------------------------------------------
+
+#define GLD_FLIP_Y(y) (gldCtx->dwHeight - (y))
+
+#define _GLD_FVF_IMAGE	(D3DFVF_XYZRHW | D3DFVF_TEX1)
+
+typedef struct {
+	FLOAT	x, y;		// 2D raster coords
+	FLOAT	z;			// depth value
+	FLOAT	rhw;		// reciprocal homogenous W (always 1.0f)
+	FLOAT	tu, tv;		// texture coords
+} _GLD_IMAGE_VERTEX;
+
+//---------------------------------------------------------------------------
+
+HRESULT _gldDrawPixels(
+	GLcontext *ctx,
+	BOOL bChromakey,	// Alpha test for glBitmap() images
+	GLint x,			// GL x position
+	GLint y,			// GL y position (needs flipping)
+	GLsizei width,		// Width of input image
+	GLsizei height,		// Height of input image
+	IDirect3DSurface8 *pImage)
+{
+	//
+	// Draw input image as texture implementing PixelZoom and clipping.
+	// Any fragment operations currently enabled will be used.
+	//
+
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	IDirect3DTexture8	*pTexture;
+	D3DSURFACE_DESC		d3dsd;
+	IDirect3DSurface8	*pSurface;
+	_GLD_IMAGE_VERTEX	v[4];
+	HRESULT				hr;
+
+	float				ZoomWidth, ZoomHeight;
+	float				ScaleWidth, ScaleHeight;
+
+	// Create a texture to hold image
+	hr = D3DXCreateTexture(
+		gld->pDev,
+		width, height,
+		1, // miplevels
+		0, // usage
+		D3DFMT_A8R8G8B8, // format
+		D3DPOOL_MANAGED, // pool
+		&pTexture);
+	if (FAILED(hr))
+		return hr;
+
+	hr = IDirect3DTexture8_GetSurfaceLevel(pTexture, 0, &pSurface);
+	if (FAILED(hr)) {
+		IDirect3DTexture8_Release(pTexture);
+		return hr;
+	}
+
+	// Copy image into texture
+	hr = D3DXLoadSurfaceFromSurface(
+		pSurface, NULL, NULL,	// Dest surface
+		pImage, NULL, NULL,		// Src surface
+		D3DX_FILTER_NONE,
+		0);
+	IDirect3DSurface8_Release(pSurface);
+	if (FAILED(hr)) {
+		IDirect3DTexture8_Release(pTexture);
+		return hr;
+	}
+
+	//
+	// Set up the quad like this (ascii-art ahead!)
+	//
+	// 3--2
+	// |  |
+	// 0--1
+	//
+	//
+
+	// Set depth
+	v[0].z = v[1].z = v[2].z = v[3].z = ctx->Current.RasterPos[2];
+	// Set Reciprocal Homogenous W
+	v[0].rhw = v[1].rhw = v[2].rhw = v[3].rhw = 1.0f;
+
+	// Set texcoords
+	// Examine texture size - if different to input width and height
+	// then we'll need to munge the texcoords to fit.
+	IDirect3DTexture8_GetLevelDesc(pTexture, 0, &d3dsd);
+	ScaleWidth = (float)width / (float)d3dsd.Width;
+	ScaleHeight = (float)height / (float)d3dsd.Height;
+	v[0].tu = 0.0f;			v[0].tv = 0.0f;
+	v[1].tu = ScaleWidth;	v[1].tv = 0.0f;
+	v[2].tu = ScaleWidth;	v[2].tv = ScaleHeight;
+	v[3].tu = 0.0f;			v[3].tv = ScaleHeight;
+
+	// Set raster positions
+	ZoomWidth = (float)width * ctx->Pixel.ZoomX;
+	ZoomHeight = (float)height * ctx->Pixel.ZoomY;
+
+	v[0].x = x;				v[0].y = GLD_FLIP_Y(y);
+	v[1].x = x+ZoomWidth;	v[1].y = GLD_FLIP_Y(y);
+	v[2].x = x+ZoomWidth;	v[2].y = GLD_FLIP_Y(y+ZoomHeight);
+	v[3].x = x;				v[3].y = GLD_FLIP_Y(y+ZoomHeight);
+
+	// Draw image with full HW acceleration
+	// NOTE: Be nice to use a State Block for all this state...
+	IDirect3DDevice8_SetTexture(gld->pDev, 0, pTexture);
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_CULLMODE, D3DCULL_NONE);
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_CLIPPING, TRUE);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_MINFILTER, D3DTEXF_POINT);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_MIPFILTER, D3DTEXF_POINT);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_MAGFILTER, D3DTEXF_POINT);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_ADDRESSU, D3DTADDRESS_CLAMP);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_ADDRESSV, D3DTADDRESS_CLAMP);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_COLOROP, D3DTOP_SELECTARG1);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_ALPHAOP, D3DTOP_SELECTARG1);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 1, D3DTSS_COLOROP, D3DTOP_DISABLE);
+	IDirect3DDevice8_SetTextureStageState(gld->pDev, 1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
+	IDirect3DDevice8_SetVertexShader(gld->pDev, _GLD_FVF_IMAGE);
+
+	//
+	// Emulate Chromakey with an Alpha Test.
+	// [Alpha Test is more widely supported anyway]
+	//
+	if (bChromakey) {
+		// Switch on alpha testing
+		IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_ALPHATESTENABLE, TRUE);
+		// Fragment passes is alpha is greater than reference value
+		IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_ALPHAFUNC, D3DCMP_GREATER);
+		// Set alpha reference value between Bitmap alpha values of
+		// zero (transparent) and one (opaque).
+		IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_ALPHAREF, 0x7f);
+	}
+
+	IDirect3DDevice8_DrawPrimitiveUP(gld->pDev, D3DPT_TRIANGLEFAN, 2, &v, sizeof(_GLD_IMAGE_VERTEX));
+
+	// Release texture
+	IDirect3DDevice8_SetTexture(gld->pDev, 0, NULL);
+	IDirect3DTexture8_Release(pTexture);
+
+	// Reset state to before we messed it up
+	FLUSH_VERTICES(ctx, _NEW_ALL);
+
+	return S_OK;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_DrawPixels_DX8(
+	GLcontext *ctx,
+	GLint x, GLint y, GLsizei width, GLsizei height,
+	GLenum format, GLenum type,
+	const struct gl_pixelstore_attrib *unpack,
+	const GLvoid *pixels )
+{
+	GLD_context			*gldCtx;
+	GLD_driver_dx8		*gld;
+
+	IDirect3DSurface8	*pImage;
+	HRESULT				hr;
+	D3DLOCKED_RECT		d3dLockedRect;
+
+	const struct gl_texture_format	*MesaFormat;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX8_DRIVER(gldCtx);
+
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		D3DFMT_A8R8G8B8,
+		&pImage);
+	if (FAILED(hr)) {
+		return;
+	}
+
+	//
+	// Use Mesa to fill in image
+	//
+
+	// Lock all of surface 
+	hr = IDirect3DSurface8_LockRect(pImage, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pImage);
+		return;
+	}
+
+	MesaFormat = _mesa_choose_tex_format(ctx, format, format, type);
+
+	// unpack image, apply transfer ops and store directly in texture
+	MesaFormat->StoreImage(
+		ctx,
+		2,
+		GL_RGBA,
+		&_mesa_texformat_argb8888,
+		d3dLockedRect.pBits,
+		width, height, 1, 0, 0, 0,
+		d3dLockedRect.Pitch,
+		0, /* dstImageStride */
+		format, type, pixels, unpack);
+
+	IDirect3DSurface8_UnlockRect(pImage);
+
+	_gldDrawPixels(ctx, FALSE, x, y, width, height, pImage);
+
+	IDirect3DSurface8_Release(pImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_ReadPixels_DX8(
+	GLcontext *ctx,
+	GLint x, GLint y, GLsizei width, GLsizei height,
+	GLenum format, GLenum type,
+	const struct gl_pixelstore_attrib *pack,
+	GLvoid *dest)
+{
+
+	GLD_context						*gldCtx;
+	GLD_driver_dx8					*gld;
+
+	IDirect3DSurface8				*pBackbuffer = NULL;
+	IDirect3DSurface8				*pNativeImage = NULL;
+	IDirect3DSurface8				*pCanonicalImage = NULL;
+
+	D3DSURFACE_DESC					d3dsd;
+	RECT							rcSrc; // Source rect
+	POINT							ptDst; // Dest point
+	HRESULT							hr;
+	D3DLOCKED_RECT					d3dLockedRect;
+	struct gl_pixelstore_attrib		srcPacking;
+	int								i;
+	GLint							DstRowStride;
+	const struct gl_texture_format	*MesaFormat;
+
+	switch (format) {
+	case GL_STENCIL_INDEX:
+	case GL_DEPTH_COMPONENT:
+		return;
+	}
+	
+	MesaFormat = _mesa_choose_tex_format(ctx, format, format, type);
+	DstRowStride = _mesa_image_row_stride(pack, width, format, type);
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX8_DRIVER(gldCtx);
+
+	// Get backbuffer
+	hr = IDirect3DDevice8_GetBackBuffer(
+		gld->pDev,
+		0, // First backbuffer
+		D3DBACKBUFFER_TYPE_MONO,
+		&pBackbuffer);
+	if (FAILED(hr))
+		return;
+
+	// Get backbuffer description
+	hr = IDirect3DSurface8_GetDesc(pBackbuffer, &d3dsd);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX8_return;
+	}
+
+	// Create a surface compatible with backbuffer
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		d3dsd.Format,
+		&pNativeImage);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX8_return;
+	}
+
+	// Compute source rect and dest point
+	SetRect(&rcSrc, 0, 0, width, height);
+	OffsetRect(&rcSrc, x, GLD_FLIP_HEIGHT(y, height));
+	ptDst.x = ptDst.y = 0;
+
+	// Get source pixels.
+	//
+	// This intermediate surface ensure that we can use CopyRects()
+	// instead of relying on D3DXLoadSurfaceFromSurface(), which may
+	// try and lock the backbuffer. This way seems safer.
+	//
+	hr = IDirect3DDevice8_CopyRects(
+		gld->pDev,
+		pBackbuffer,
+		&rcSrc,
+		1,
+		pNativeImage,
+		&ptDst);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX8_return;
+	}
+
+	// Create an RGBA8888 surface
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		D3DFMT_A8R8G8B8,
+		&pCanonicalImage);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX8_return;
+	}
+
+	// Convert to RGBA8888
+	hr = D3DXLoadSurfaceFromSurface(
+		pCanonicalImage,	// Dest surface
+		NULL, NULL,			// Dest palette, RECT
+		pNativeImage,		// Src surface
+		NULL, NULL,			// Src palette, RECT
+		D3DX_FILTER_NONE,	// Filter
+		0);					// Colourkey
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX8_return;
+	}
+
+	srcPacking.Alignment	= 1;
+	srcPacking.ImageHeight	= height;
+	srcPacking.LsbFirst		= GL_FALSE;
+	srcPacking.RowLength	= 0;
+	srcPacking.SkipImages	= 0;
+	srcPacking.SkipPixels	= 0;
+	srcPacking.SkipRows		= 0;
+	srcPacking.SwapBytes	= GL_FALSE;
+
+	// Lock all of image
+	hr = IDirect3DSurface8_LockRect(pCanonicalImage, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX8_return;
+	}
+
+	// We need to flip the data. Yuck.
+	// Perhaps Mesa has a span packer we can use in future...
+	for (i=0; i<height; i++) {
+		BYTE *pDestRow = (BYTE*)_mesa_image_address(2,pack, dest, width, height, format, type, 0, i, 0);
+		BYTE *pSrcRow = (BYTE*)d3dLockedRect.pBits + (d3dLockedRect.Pitch * (height-i-1));
+		MesaFormat->StoreImage(
+			ctx,
+			2,
+			GL_RGBA,				// base format
+			MesaFormat,				// dst format
+			pDestRow,				// dest addr
+			width, 1, 1, 0, 0, 0,	// src x,y,z & dst offsets x,y,z
+			DstRowStride,			// dst row stride
+			0,						// dstImageStride
+			GL_BGRA,				// src format
+			GL_UNSIGNED_BYTE,		// src type
+			pSrcRow,				// src addr
+			&srcPacking);			// packing params of source image
+	}
+
+	IDirect3DSurface8_UnlockRect(pCanonicalImage);
+
+gld_ReadPixels_DX8_return:
+	SAFE_RELEASE_SURFACE8(pCanonicalImage);
+	SAFE_RELEASE_SURFACE8(pNativeImage);
+	SAFE_RELEASE_SURFACE8(pBackbuffer);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_CopyPixels_DX8(
+	GLcontext *ctx,
+	GLint srcx,
+	GLint srcy,
+	GLsizei width,
+	GLsizei height,
+	GLint dstx,
+	GLint dsty,
+	GLenum type)
+{
+	//
+	// NOTE: Not allowed to copy vidmem to vidmem!
+	//       Therefore we use an intermediate image surface.
+	//
+
+	GLD_context			*gldCtx;
+	GLD_driver_dx8		*gld;
+
+	IDirect3DSurface8	*pBackbuffer;
+	D3DSURFACE_DESC		d3dsd;
+	IDirect3DSurface8	*pImage;
+	RECT				rcSrc; // Source rect
+	POINT				ptDst; // Dest point
+	HRESULT				hr;
+
+	// Only backbuffer
+	if (type != GL_COLOR)
+		return;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX8_DRIVER(gldCtx);
+
+	// Get backbuffer
+	hr = IDirect3DDevice8_GetBackBuffer(
+		gld->pDev,
+		0, // First backbuffer
+		D3DBACKBUFFER_TYPE_MONO,
+		&pBackbuffer);
+	if (FAILED(hr))
+		return;
+
+	// Get backbuffer description
+	hr = IDirect3DSurface8_GetDesc(pBackbuffer, &d3dsd);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pBackbuffer);
+		return;
+	}
+
+	// Create a surface compatible with backbuffer
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		d3dsd.Format,
+		&pImage);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pBackbuffer);
+		return;
+	}
+
+	// Compute source rect and dest point
+	SetRect(&rcSrc, 0, 0, width, height);
+	OffsetRect(&rcSrc, srcx, GLD_FLIP_HEIGHT(srcy, height));
+	ptDst.x = ptDst.y = 0;
+
+	// Get source pixels
+	hr = IDirect3DDevice8_CopyRects(
+		gld->pDev,
+		pBackbuffer,
+		&rcSrc,
+		1,
+		pImage,
+		&ptDst);
+	IDirect3DSurface8_Release(pBackbuffer);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pImage);
+		return;
+	}
+
+	_gldDrawPixels(ctx, FALSE, dstx, dsty, width, height, pImage);
+
+	IDirect3DSurface8_Release(pImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Bitmap_DX8(
+	GLcontext *ctx,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height,
+	const struct gl_pixelstore_attrib *unpack,
+	const GLubyte *bitmap)
+{
+	GLD_context			*gldCtx;
+	GLD_driver_dx8		*gld;
+
+	IDirect3DSurface8	*pImage;
+	HRESULT				hr;
+	D3DLOCKED_RECT		d3dLockedRect;
+	BYTE				*pTempBitmap;
+	D3DCOLOR			clBitmapOne, clBitmapZero;
+	D3DCOLOR			*pBits;
+	const GLubyte		*src;
+	int					i, j, k;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX8_DRIVER(gldCtx);
+
+	clBitmapZero	= D3DCOLOR_RGBA(0,0,0,0); // NOTE: Alpha is Zero
+	clBitmapOne		= D3DCOLOR_COLORVALUE(
+		ctx->Current.RasterColor[0],
+		ctx->Current.RasterColor[1],
+		ctx->Current.RasterColor[2],
+		1.0f); // NOTE: Alpha is One
+
+	hr = IDirect3DDevice8_CreateImageSurface(
+		gld->pDev, 
+		width,
+		height,
+		D3DFMT_A8R8G8B8,
+		&pImage);
+	if (FAILED(hr)) {
+		return;
+	}
+
+	// Lock all of surface 
+	hr = IDirect3DSurface8_LockRect(pImage, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pImage);
+		return;
+	}
+
+	pTempBitmap = _mesa_unpack_bitmap(width, height, bitmap, unpack);
+	if (pTempBitmap == NULL) {
+		IDirect3DSurface8_Release(pImage);
+		return;
+	}
+
+	pBits = (D3DCOLOR*)d3dLockedRect.pBits;
+
+	for (i=0; i<height; i++) {
+		GLubyte byte;
+		pBits = (D3DCOLOR*)((BYTE*)d3dLockedRect.pBits + (i*d3dLockedRect.Pitch));
+		src = (const GLubyte *) _mesa_image_address(2,
+			&ctx->DefaultPacking, pTempBitmap, width, height, GL_COLOR_INDEX, GL_BITMAP,
+			0, i, 0);
+		for (j=0; j<(width>>3); j++) {
+			byte = *src++;
+			for (k=0; k<8; k++) {
+				*pBits++ = (byte & 128) ? clBitmapOne : clBitmapZero;
+				byte <<= 1;
+			}
+		}
+		// Fill remaining bits from bitmap
+		if (width & 7) {
+			byte = *src;
+			for (k=0; k<(width & 7); k++) {
+				*pBits++ = (byte & 128) ? clBitmapOne : clBitmapZero;
+				byte <<= 1;
+			}
+		}
+	}
+
+	FREE(pTempBitmap);
+
+/*
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(
+		ctx,
+		2,
+		GL_BITMAP,
+		&_mesa_texformat_argb8888,
+		d3dLockedRect.pBits,
+		width, height, 1, 0, 0, 0,
+		d3dLockedRect.Pitch,
+		0, // dstImageStride
+		GL_BITMAP, GL_COLOR_INDEX, bitmap, unpack);
+*/
+	IDirect3DSurface8_UnlockRect(pImage);
+
+	_gldDrawPixels(ctx, TRUE, x, y, width, height, pImage);
+
+	IDirect3DSurface8_Release(pImage);
+}
+
+//---------------------------------------------------------------------------
+// Texture functions
+//---------------------------------------------------------------------------
+
+void _gldAllocateTexture(
+	GLcontext *ctx,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	IDirect3DTexture8	*pTex;
+	D3DFORMAT			d3dFormat;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirect3DTexture8*)tObj->DriverData;
+	if (pTex) {
+		// Decide whether we can keep existing D3D texture
+		// by examining top-level surface.
+		D3DSURFACE_DESC d3dsd;
+		_GLD_DX8_TEX(GetLevelDesc(pTex, 0, &d3dsd));
+		// Release existing texture if not compatible
+		if ((d3dsd.Width == texImage->Width) || 
+			(d3dsd.Height == texImage->Height))
+		{
+			return; // Keep the existing texture
+		}
+		tObj->DriverData = NULL;
+		_GLD_DX8_TEX(Release(pTex));
+	}
+
+	d3dFormat = _gldGLFormatToD3DFormat(texImage->IntFormat);
+	D3DXCreateTexture(
+		gld->pDev,
+		texImage->Width,
+		texImage->Height,
+		// TODO: Re-evaluate mipmapping
+		(glb.bUseMipmaps) ? D3DX_DEFAULT : 1,
+		0,				// Usage
+		d3dFormat,
+		D3DPOOL_MANAGED,
+		&pTex);
+	tObj->DriverData = pTex;
+}
+
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format* gld_ChooseTextureFormat_DX8(
+	GLcontext *ctx,
+	GLint internalFormat,
+	GLenum srcFormat,
+	GLenum srcType)
+{
+	// [Based on mesa_choose_tex_format()]
+	//
+	// We will choose only texture formats that are supported
+	// by Direct3D. If the hardware doesn't support a particular
+	// texture format, then the D3DX texture calls that we use
+	// will automatically use a HW supported format.
+	//
+	// The most critical aim is to reduce copying; if we can use
+	// texture-image data directly then it will be a big performance assist.
+	//
+
+	switch (internalFormat) {
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+		return &_mesa_texformat_l8; // D3DFMT_L8
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+		return &_mesa_texformat_l8; // D3DFMT_L8
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+		return &_mesa_texformat_a8; // D3DFMT_A8
+	case GL_COLOR_INDEX:
+	case GL_COLOR_INDEX1_EXT:
+	case GL_COLOR_INDEX2_EXT:
+	case GL_COLOR_INDEX4_EXT:
+	case GL_COLOR_INDEX8_EXT:
+	case GL_COLOR_INDEX12_EXT:
+	case GL_COLOR_INDEX16_EXT:
+		return &_mesa_texformat_rgb565; // D3DFMT_R5G6B5
+		// Mesa will convert this for us later...
+		//      return &_mesa_texformat_ci8; // D3DFMT_R5G6B5
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+		return &_mesa_texformat_al88; // D3DFMT_A8L8
+	case GL_R3_G3_B2:
+		return &_mesa_texformat_rgb332; // D3DFMT_R3G3B2
+	case GL_RGB4:
+	case GL_RGBA4:
+	case GL_RGBA2:
+		return &_mesa_texformat_argb4444; // D3DFMT_A4R4G4B4
+	case 3:
+	case GL_RGB:
+	case GL_RGB5:
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return &_mesa_texformat_rgb565;
+	case 4:
+	case GL_RGBA:
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return &_mesa_texformat_argb8888;
+	case GL_RGB5_A1:
+		return &_mesa_texformat_argb1555;
+	default:
+		_mesa_problem(NULL, "unexpected format in fxDDChooseTextureFormat");
+		return NULL;
+   }
+}
+
+//---------------------------------------------------------------------------
+
+/*
+// Safer(?), slower version.
+void gld_TexImage2D_DX8(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint internalFormat,
+	GLint width,
+	GLint height,
+	GLint border,
+	GLenum format,
+	GLenum type,
+	const GLvoid *pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	IDirect3DTexture8	*pTex;
+	IDirect3DSurface8	*pSurface;
+	RECT				rcSrcRect;
+	HRESULT				hr;
+	GLint				texelBytes = 4;
+	GLvoid				*tempImage;
+
+	if (!tObj || !texImage)
+		return;
+
+	if (level == 0) {
+		_gldAllocateTexture(ctx, tObj, texImage);
+	}
+
+	pTex = (IDirect3DTexture8*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= IDirect3DTexture8_GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = IDirect3DTexture8_GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	tempImage = MALLOC(width * height * texelBytes);
+	if (!tempImage) {
+		_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+		IDirect3DSurface8_Release(pSurface);
+		return;
+	}
+	// unpack image, apply transfer ops and store in tempImage
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		&_mesa_texformat_argb8888, // dest format
+		tempImage,
+		width, height, 1, 0, 0, 0,
+		width * texelBytes,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	SetRect(&rcSrcRect, 0, 0, width, height);
+	D3DXLoadSurfaceFromMemory(
+		pSurface,
+		NULL,
+		NULL,
+		tempImage,
+		D3DFMT_A8R8G8B8,
+		width * texelBytes,
+		NULL,
+		&rcSrcRect,
+		D3DX_FILTER_NONE,
+		0);
+
+	FREE(tempImage);
+	IDirect3DSurface8_Release(pSurface);
+}
+*/
+
+//---------------------------------------------------------------------------
+
+// Faster, more efficient version.
+// Copies subimage straight to dest texture
+void gld_TexImage2D_DX8(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint internalFormat,
+	GLint width,
+	GLint height,
+	GLint border,
+	GLenum format,
+	GLenum type,
+	const GLvoid *pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	IDirect3DTexture8	*pTex;
+	IDirect3DSurface8	*pSurface;
+	HRESULT				hr;
+	D3DLOCKED_RECT		d3dLockedRect;
+	D3DSURFACE_DESC		d3dsd;
+
+	if (!tObj || !texImage)
+		return;
+
+	// GLQUAKE FIX
+	// Test for input alpha data with non-alpha internalformat
+	if (((internalFormat==3) || (internalFormat==GL_RGB)) && (format==GL_RGBA)) {
+		// Input format has alpha, but a non-alpha format has been requested.
+		texImage->IntFormat = GL_RGBA;
+		internalFormat = GL_RGBA;
+	}
+
+	if (level == 0) {
+		_gldAllocateTexture(ctx, tObj, texImage);
+	}
+
+	pTex = (IDirect3DTexture8*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= IDirect3DTexture8_GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = IDirect3DTexture8_GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	IDirect3DSurface8_GetDesc(pSurface, &d3dsd);
+
+	// Lock all of surface 
+	hr = IDirect3DSurface8_LockRect(pSurface, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(
+		ctx,
+		2,
+		texImage->Format,
+		_gldMesaFormatForD3DFormat(d3dsd.Format),
+		d3dLockedRect.pBits,
+		width, height, 1, 0, 0, 0,
+		d3dLockedRect.Pitch,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	IDirect3DSurface8_UnlockRect(pSurface);
+	IDirect3DSurface8_Release(pSurface);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_TexImage1D_DX8(GLcontext *ctx, GLenum target, GLint level,
+                       GLint internalFormat,
+                       GLint width, GLint border,
+                       GLenum format, GLenum type, const GLvoid *pixels,
+                       const struct gl_pixelstore_attrib *packing,
+                       struct gl_texture_object *texObj,
+                       struct gl_texture_image *texImage )
+{
+	// A 1D texture is a 2D texture with a height of zero
+	gld_TexImage2D_DX8(ctx, target, level, internalFormat, width, 1, border, format, type, pixels, packing, texObj, texImage);
+}
+
+//---------------------------------------------------------------------------
+
+/*
+void gld_TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLint yoffset,
+                          GLsizei width, GLsizei height,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *tObj,
+                          struct gl_texture_image *texImage )
+{
+	GLD_GET_CONTEXT
+	IDirect3DTexture8	*pTex;
+	IDirect3DSurface8	*pSurface;
+	D3DFORMAT			d3dFormat;
+	HRESULT				hr;
+	GLint				texelBytes = 4;
+	GLvoid				*tempImage;
+	RECT				rcSrcRect;
+	RECT				rcDstRect;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirect3DTexture8*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= _GLD_DX8_TEX(GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = _GLD_DX8_TEX(GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	d3dFormat = _gldGLFormatToD3DFormat(texImage->Format);
+	tempImage = MALLOC(width * height * texelBytes);
+	if (!tempImage) {
+		_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+		IDirect3DSurface8_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store in tempImage
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		&_mesa_texformat_argb8888, // dest format
+		tempImage,
+		width, height, 1, 0, 0, 0,
+		width * texelBytes,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	// Source rectangle is whole of input image
+	SetRect(&rcSrcRect, 0, 0, width, height);
+
+	// Dest rectangle must be offset to dest image
+	SetRect(&rcDstRect, 0, 0, width, height);
+	OffsetRect(&rcDstRect, xoffset, yoffset);
+
+	D3DXLoadSurfaceFromMemory(
+		pSurface,
+		NULL,
+		&rcDstRect,
+		tempImage,
+		D3DFMT_A8R8G8B8,
+		width * texelBytes,
+		NULL,
+		&rcSrcRect,
+		D3DX_FILTER_NONE,
+		0);
+
+	FREE(tempImage);
+	IDirect3DSurface8_Release(pSurface);
+}
+*/
+
+//---------------------------------------------------------------------------
+
+// Faster, more efficient version.
+// Copies subimage straight to dest texture
+void gld_TexSubImage2D_DX8( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLint yoffset,
+                          GLsizei width, GLsizei height,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *tObj,
+                          struct gl_texture_image *texImage )
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	IDirect3DTexture8	*pTex;
+	IDirect3DSurface8	*pSurface;
+	HRESULT				hr;
+	RECT				rcDstRect;
+	D3DLOCKED_RECT		d3dLockedRect;
+	D3DSURFACE_DESC		d3dsd;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirect3DTexture8*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= IDirect3DTexture8_GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = IDirect3DTexture8_GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	IDirect3DSurface8_GetDesc(pSurface, &d3dsd);
+
+	// Dest rectangle must be offset to dest image
+	SetRect(&rcDstRect, 0, 0, width, height);
+	OffsetRect(&rcDstRect, xoffset, yoffset);
+
+	// Lock sub-rect of surface 
+	hr = IDirect3DSurface8_LockRect(pSurface, &d3dLockedRect, &rcDstRect, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface8_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		_gldMesaFormatForD3DFormat(d3dsd.Format),
+		d3dLockedRect.pBits,
+		width, height, 1,
+		0, 0, 0, // NOTE: d3dLockedRect.pBits is already offset!!!
+		d3dLockedRect.Pitch,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+
+	IDirect3DSurface8_UnlockRect(pSurface);
+	IDirect3DSurface8_Release(pSurface);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_TexSubImage1D_DX8( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLsizei width,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage )
+{
+	gld_TexSubImage2D_DX8(ctx, target, level, xoffset, 0, width, 1, format, type, pixels, packing, texObj, texImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_DeleteTexture_DX8(
+	GLcontext *ctx,
+	struct gl_texture_object *tObj)
+{
+	GLD_context *gld = (GLD_context*)(ctx->DriverCtx);
+
+	if (tObj) {
+		IDirect3DTexture8 *pTex = (IDirect3DTexture8*)tObj->DriverData;
+		if (pTex) {
+/*			// Make sure texture is not bound to a stage before releasing it
+			for (int i=0; i<MAX_TEXTURE_UNITS; i++) {
+				if (gld->CurrentTexture[i] == pTex) {
+					gld->pDev->SetTexture(i, NULL);
+					gld->CurrentTexture[i] = NULL;
+				}
+			}*/
+			_GLD_DX8_TEX(Release(pTex));
+			tObj->DriverData = NULL;
+		}
+	}
+}
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetColorOps(
+	const GLD_driver_dx8 *gld,
+	GLuint unit,
+	DWORD ColorArg1,
+	D3DTEXTUREOP ColorOp,
+	DWORD ColorArg2)
+{
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG1, ColorArg1));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLOROP, ColorOp));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG2, ColorArg2));
+}
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetAlphaOps(
+	const GLD_driver_dx8 *gld,
+	GLuint unit,
+	DWORD AlphaArg1,
+	D3DTEXTUREOP AlphaOp,
+	DWORD AlphaArg2)
+{
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAARG1, AlphaArg1));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAOP, AlphaOp));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAARG2, AlphaArg2));
+}
+
+//---------------------------------------------------------------------------
+
+void gldUpdateTextureUnit(
+	GLcontext *ctx,
+	GLuint unit,
+	BOOL bPassThrough)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	D3DTEXTUREFILTERTYPE	minfilter;
+	D3DTEXTUREFILTERTYPE	mipfilter;
+	GLenum					BaseFormat;
+	DWORD					dwColorArg0;
+	int						iTexEnv = 0;
+	GLD_texenv				*pTexenv;
+
+	// NOTE: If bPassThrough is FALSE then texture stage can be
+	// disabled otherwise it must pass-through it's current fragment.
+
+	const struct gl_texture_unit *pUnit = &ctx->Texture.Unit[unit];
+	const struct gl_texture_object *tObj = pUnit->_Current;
+
+	IDirect3DTexture8 *pTex = NULL;
+	if (tObj) {
+		pTex = (IDirect3DTexture8*)tObj->DriverData;
+	}
+
+	// Enable texturing if unit is enabled and a valid D3D texture exists
+	// Mesa 5: TEXTUREn_x altered to TEXTURE_nD_BIT
+	//if (pTex && (pUnit->Enabled & (TEXTURE0_1D | TEXTURE0_2D))) {
+	if (pTex && (pUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT))) {
+		// Enable texturing
+		_GLD_DX8_DEV(SetTexture(gld->pDev, unit, pTex));
+	} else {
+		// Disable texturing, then return
+		_GLD_DX8_DEV(SetTexture(gld->pDev, unit, NULL));
+		if (bPassThrough) {
+			_gldSetColorOps(gld, unit, D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_DIFFUSE);
+			_gldSetAlphaOps(gld, unit, D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_DIFFUSE);
+		} else {
+			_gldSetColorOps(gld, unit, D3DTA_TEXTURE, D3DTOP_DISABLE, D3DTA_DIFFUSE);
+			_gldSetAlphaOps(gld, unit, D3DTA_TEXTURE, D3DTOP_DISABLE, D3DTA_DIFFUSE);
+		}
+		return;
+	}
+
+	// Texture parameters
+	_gldConvertMinFilter(tObj->MinFilter, &minfilter, &mipfilter);
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MINFILTER, minfilter));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MIPFILTER, mipfilter));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MAGFILTER, _gldConvertMagFilter(tObj->MagFilter)));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ADDRESSU, _gldConvertWrap(tObj->WrapS)));
+	_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ADDRESSV, _gldConvertWrap(tObj->WrapT)));
+
+	// Texture priority
+	_GLD_DX8_TEX(SetPriority(pTex, (DWORD)(tObj->Priority*65535.0f)));
+
+	// Texture environment
+	// TODO: Examine input texture for alpha and use specific alpha/non-alpha ops.
+	//       See Page 355 of the Red Book.
+	BaseFormat = _gldDecodeBaseFormat(pTex);
+
+	switch (BaseFormat) {
+	case GL_RGB:
+		iTexEnv = 0;
+		break;
+	case GL_RGBA:
+		iTexEnv = 1;
+		break;
+	case GL_ALPHA:
+		iTexEnv = 2;
+		break;
+	}
+
+	switch (pUnit->EnvMode) {
+	case GL_DECAL:
+		iTexEnv += 0;
+		break;
+	case GL_REPLACE:
+		iTexEnv += 3;
+		break;
+	case GL_MODULATE:
+		iTexEnv += 6;
+		break;
+	case GL_BLEND:
+		// Set blend colour
+		dwColorArg0 = D3DCOLOR_COLORVALUE(pUnit->EnvColor[0], pUnit->EnvColor[1], pUnit->EnvColor[2], pUnit->EnvColor[3]);
+		_GLD_DX8_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG0, dwColorArg0));
+		iTexEnv += 9;
+		break;
+	case GL_ADD:
+		iTexEnv += 12;
+		break;
+	}
+	pTexenv = (GLD_texenv*)&gldTexEnv[iTexEnv];
+	_gldSetColorOps(gld, unit, pTexenv->ColorArg1, pTexenv->ColorOp, pTexenv->ColorArg2);
+	_gldSetAlphaOps(gld, unit, pTexenv->AlphaArg1, pTexenv->AlphaOp, pTexenv->AlphaArg2);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_TEXTURE_DX8(
+	GLcontext *ctx)
+{
+	// TODO: Support for three (ATI Radeon) or more (nVidia GeForce3) texture units
+
+	BOOL bUnit0Enabled;
+	BOOL bUnit1Enabled;
+
+	if (!ctx)
+		return; // Sanity check
+
+	if (ctx->Const.MaxTextureUnits == 1) {
+		gldUpdateTextureUnit(ctx, 0, TRUE);
+		return;
+	}
+
+	//
+	// NOTE: THE FOLLOWING RELATES TO TWO TEXTURE UNITS, AND TWO ONLY!!
+	//
+
+	// Mesa 5: Texture Units altered
+	//bUnit0Enabled = (ctx->Texture._ReallyEnabled & (TEXTURE0_1D | TEXTURE0_2D)) ? TRUE : FALSE;
+	//bUnit1Enabled = (ctx->Texture._ReallyEnabled & (TEXTURE1_1D | TEXTURE1_2D)) ? TRUE : FALSE;
+	bUnit0Enabled = (ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) ? TRUE : FALSE;
+	bUnit1Enabled = (ctx->Texture.Unit[1]._ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) ? TRUE : FALSE;
+
+	// If Unit0 is disabled and Unit1 is enabled then we must pass-though
+	gldUpdateTextureUnit(ctx, 0, (!bUnit0Enabled && bUnit1Enabled) ? TRUE : FALSE);
+	// We can always disable the last texture unit
+	gldUpdateTextureUnit(ctx, 1, FALSE);
+
+#ifdef _DEBUG
+	{
+		// Find out whether device supports current renderstates
+		GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+		GLD_driver_dx8		*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+//		GLD_context			*gld	= GLD_GET_CONTEXT(ctx);
+
+		DWORD dwPasses;
+		_GLD_DX8_DEV(ValidateDevice(gld->pDev, &dwPasses));
+//		if (FAILED(hr)) {
+//			gldLogError(GLDLOG_ERROR, "ValidateDevice failed", hr);
+//		}
+		if (dwPasses != 1) {
+			gldLogMessage(GLDLOG_ERROR, "ValidateDevice: Can't do in one pass\n");
+		}
+	}
+#endif
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_vb_d3d_render_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_vb_d3d_render_dx8.c
new file mode 100644
index 0000000000..265c81fb4a
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_vb_d3d_render_dx8.c
@@ -0,0 +1,249 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect fastpath pipeline stage
+*
+****************************************************************************/
+
+//---------------------------------------------------------------------------
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx8.h"
+
+//---------------------------------------------------------------------------
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+// #include "mem.h"
+#include "mtypes.h"
+//#include "mmath.h"
+
+#include "math/m_matrix.h"
+#include "math/m_xform.h"
+
+#include "tnl/t_pipeline.h"
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetVertexShaderConstants(
+	GLcontext *ctx,
+	GLD_driver_dx8 *gld)
+{
+	D3DXMATRIX mat, matView, matProj;
+	GLfloat		*pM;
+
+	// Mesa 5: Altered to a Stack
+	//pM = ctx->ModelView.m;
+	pM = ctx->ModelviewMatrixStack.Top->m;
+	matView._11 = pM[0];
+	matView._12 = pM[1];
+	matView._13 = pM[2];
+	matView._14 = pM[3];
+	matView._21 = pM[4];
+	matView._22 = pM[5];
+	matView._23 = pM[6];
+	matView._24 = pM[7];
+	matView._31 = pM[8];
+	matView._32 = pM[9];
+	matView._33 = pM[10];
+	matView._34 = pM[11];
+	matView._41 = pM[12];
+	matView._42 = pM[13];
+	matView._43 = pM[14];
+	matView._44 = pM[15];
+
+	// Mesa 5: Altered to a Stack
+	//pM = ctx->ProjectionMatrix.m;
+	pM = ctx->ProjectionMatrixStack.Top->m;
+	matProj._11 = pM[0];
+	matProj._12 = pM[1];
+	matProj._13 = pM[2];
+	matProj._14 = pM[3];
+	matProj._21 = pM[4];
+	matProj._22 = pM[5];
+	matProj._23 = pM[6];
+	matProj._24 = pM[7];
+	matProj._31 = pM[8];
+	matProj._32 = pM[9];
+	matProj._33 = pM[10];
+	matProj._34 = pM[11];
+	matProj._41 = pM[12];
+	matProj._42 = pM[13];
+	matProj._43 = pM[14];
+	matProj._44 = pM[15];
+
+	D3DXMatrixMultiply( &mat, &matView, &matProj );
+	D3DXMatrixTranspose( &mat, &mat );
+
+	_GLD_DX8_DEV(SetVertexShaderConstant(gld->pDev, 0, &mat, 4));
+}
+
+//---------------------------------------------------------------------------
+
+static GLboolean gld_d3d_render_stage_run(
+	GLcontext *ctx,
+	struct tnl_pipeline_stage *stage)
+{
+	GLD_context				*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8			*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+
+	TNLcontext				*tnl;
+	struct vertex_buffer	*VB;
+	tnl_render_func				*tab;
+	GLint					pass;
+	GLD_pb_dx8				*gldPB = &gld->PB3d;
+/*
+	static int count = 0;
+	count++;
+	if (count != 2)
+		return GL_FALSE;
+*/
+	// The "check" function should disable this stage,
+	// but we'll test gld->bUseMesaTnL anyway.
+	if (gld->bUseMesaTnL) {
+		// Do nothing in this stage, but continue pipeline
+		return GL_TRUE;
+	}
+	
+	tnl = TNL_CONTEXT(ctx);
+	VB = &tnl->vb;
+	pass = 0;
+
+   tnl->Driver.Render.Start( ctx );
+
+#if 0
+   // For debugging: Useful to see if an app passes colour data in
+   // an unusual format.
+   switch (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->Type) {
+   case GL_FLOAT:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: GL_FLOAT\n");
+	   break;
+   case GL_UNSIGNED_BYTE:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: GL_UNSIGNED_BYTE\n");
+	   break;
+   default:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: *?*\n");
+	   break;
+   }
+#endif
+
+   tnl->Driver.Render.Points		= gld_Points3D_DX8;
+   if (ctx->_TriangleCaps & DD_FLATSHADE) {
+	   tnl->Driver.Render.Line		= gld_Line3DFlat_DX8;
+	   tnl->Driver.Render.Triangle	= gld_Triangle3DFlat_DX8;
+	   tnl->Driver.Render.Quad		= gld_Quad3DFlat_DX8;
+   } else {
+	   tnl->Driver.Render.Line		= gld_Line3DSmooth_DX8;
+	   tnl->Driver.Render.Triangle	= gld_Triangle3DSmooth_DX8;
+	   tnl->Driver.Render.Quad		= gld_Quad3DSmooth_DX8;
+   }
+
+	_GLD_DX8_VB(Lock(gldPB->pVB, 0, 0, &gldPB->pPoints, D3DLOCK_DISCARD));
+	gldPB->nPoints = gldPB->nLines = gldPB->nTriangles = 0;
+	// Allocate primitive pointers
+	// gldPB->pPoints is always first
+	gldPB->pLines		= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstLine);
+	gldPB->pTriangles	= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstTriangle);
+	
+	ASSERT(tnl->Driver.Render.BuildVertices);
+	ASSERT(tnl->Driver.Render.PrimitiveNotify);
+	ASSERT(tnl->Driver.Render.Points);
+	ASSERT(tnl->Driver.Render.Line);
+	ASSERT(tnl->Driver.Render.Triangle);
+	ASSERT(tnl->Driver.Render.Quad);
+	ASSERT(tnl->Driver.Render.ResetLineStipple);
+	ASSERT(tnl->Driver.Render.Interp);
+	ASSERT(tnl->Driver.Render.CopyPV);
+	ASSERT(tnl->Driver.Render.ClippedLine);
+	ASSERT(tnl->Driver.Render.ClippedPolygon);
+	ASSERT(tnl->Driver.Render.Finish);
+
+	tab = (VB->Elts ? tnl->Driver.Render.PrimTabElts : tnl->Driver.Render.PrimTabVerts);
+	
+	do {
+		GLuint i, length, flags = 0;
+		for (i = 0 ; !(flags & PRIM_END) ; i += length)
+		{
+			flags = VB->Primitive[i].mode;
+			length= VB->Primitive[i].count;
+			ASSERT(length || (flags & PRIM_END));
+			ASSERT((flags & PRIM_MODE_MASK) <= GL_POLYGON+1);
+			if (length)
+				tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+		}
+	} while (tnl->Driver.Render.Multipass &&
+		tnl->Driver.Render.Multipass( ctx, ++pass ));
+	
+	_GLD_DX8_VB(Unlock(gldPB->pVB));
+
+	_GLD_DX8_DEV(SetStreamSource(gld->pDev, 0, gldPB->pVB, gldPB->dwStride));
+
+	_GLD_DX8_DEV(SetTransform(gld->pDev, D3DTS_PROJECTION, &gld->matProjection));
+	_GLD_DX8_DEV(SetTransform(gld->pDev, D3DTS_WORLD, &gld->matModelView));
+
+	if (gldPB->nPoints) {
+		_GLD_DX8_DEV(DrawPrimitive(gld->pDev, D3DPT_POINTLIST, 0, gldPB->nPoints));
+		gldPB->nPoints = 0;
+	}
+
+	if (gldPB->nLines) {
+		_GLD_DX8_DEV(DrawPrimitive(gld->pDev, D3DPT_LINELIST, gldPB->iFirstLine, gldPB->nLines));
+		gldPB->nLines = 0;
+	}
+
+	if (gldPB->nTriangles) {
+		_GLD_DX8_DEV(DrawPrimitive(gld->pDev, D3DPT_TRIANGLELIST, gldPB->iFirstTriangle, gldPB->nTriangles));
+		gldPB->nTriangles = 0;
+	}
+
+	return GL_FALSE;		/* finished the pipe */
+}
+
+//---------------------------------------------------------------------------
+
+const struct tnl_pipeline_stage _gld_d3d_render_stage =
+{
+   "gld_d3d_render_stage",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   gld_d3d_render_stage_run			/* run */
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_vb_mesa_render_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_vb_mesa_render_dx8.c
new file mode 100644
index 0000000000..9ab562010c
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_vb_mesa_render_dx8.c
@@ -0,0 +1,448 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ */
+
+
+/*
+ * Render whole vertex buffers, including projection of vertices from
+ * clip space and clipping of primitives.
+ *
+ * This file makes calls to project vertices and to the point, line
+ * and triangle rasterizers via the function pointers:
+ *
+ *    context->Driver.Render.*
+ *
+ */
+
+
+//---------------------------------------------------------------------------
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx8.h"
+
+//---------------------------------------------------------------------------
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+// #include "mem.h"
+#include "mtypes.h"
+//#include "mmath.h"
+
+#include "math/m_matrix.h"
+#include "math/m_xform.h"
+
+#include "tnl/t_pipeline.h"
+
+/**********************************************************************/
+/*                        Clip single primitives                      */
+/**********************************************************************/
+
+
+#if defined(USE_IEEE)
+#define NEGATIVE(x) (GET_FLOAT_BITS(x) & (1<<31))
+//#define DIFFERENT_SIGNS(x,y) ((GET_FLOAT_BITS(x) ^ GET_FLOAT_BITS(y)) & (1<<31))
+#else
+#define NEGATIVE(x) (x < 0)
+//#define DIFFERENT_SIGNS(x,y) (x * y <= 0 && x - y != 0)
+/* Could just use (x*y<0) except for the flatshading requirements.
+ * Maybe there's a better way?
+ */
+#endif
+
+
+#define W(i) coord[i][3]
+#define Z(i) coord[i][2]
+#define Y(i) coord[i][1]
+#define X(i) coord[i][0]
+#define SIZE 4
+#define TAG(x) x##_4
+#include "tnl/t_vb_cliptmp.h"
+
+
+
+/**********************************************************************/
+/*              Clip and render whole begin/end objects               */
+/**********************************************************************/
+
+#define NEED_EDGEFLAG_SETUP (ctx->_TriangleCaps & DD_TRI_UNFILLED)
+#define EDGEFLAG_GET(idx) VB->EdgeFlag[idx]
+#define EDGEFLAG_SET(idx, val) VB->EdgeFlag[idx] = val
+
+
+/* Vertices, with the possibility of clipping.
+ */
+#define RENDER_POINTS( start, count ) \
+   tnl->Driver.Render.Points( ctx, start, count )
+
+#define RENDER_LINE( v1, v2 )			\
+do {						\
+   GLubyte c1 = mask[v1], c2 = mask[v2];	\
+   GLubyte ormask = c1|c2;			\
+   if (!ormask)					\
+      LineFunc( ctx, v1, v2 );			\
+   else if (!(c1 & c2 & 0x3f))			\
+      clip_line_4( ctx, v1, v2, ormask );	\
+} while (0)
+
+#define RENDER_TRI( v1, v2, v3 )			\
+do {							\
+   GLubyte c1 = mask[v1], c2 = mask[v2], c3 = mask[v3];	\
+   GLubyte ormask = c1|c2|c3;				\
+   if (!ormask)						\
+      TriangleFunc( ctx, v1, v2, v3 );			\
+   else if (!(c1 & c2 & c3 & 0x3f)) 			\
+      clip_tri_4( ctx, v1, v2, v3, ormask );    	\
+} while (0)
+
+#define RENDER_QUAD( v1, v2, v3, v4 )			\
+do {							\
+   GLubyte c1 = mask[v1], c2 = mask[v2];		\
+   GLubyte c3 = mask[v3], c4 = mask[v4];		\
+   GLubyte ormask = c1|c2|c3|c4;			\
+   if (!ormask)						\
+      QuadFunc( ctx, v1, v2, v3, v4 );			\
+   else if (!(c1 & c2 & c3 & c4 & 0x3f)) 		\
+      clip_quad_4( ctx, v1, v2, v3, v4, ormask );	\
+} while (0)
+
+
+#define LOCAL_VARS						\
+   TNLcontext *tnl = TNL_CONTEXT(ctx);				\
+   struct vertex_buffer *VB = &tnl->vb;				\
+   const GLuint * const elt = VB->Elts;				\
+   const GLubyte *mask = VB->ClipMask;				\
+   const GLuint sz = VB->ClipPtr->size;				\
+   const tnl_line_func LineFunc = tnl->Driver.Render.Line;		\
+   const tnl_triangle_func TriangleFunc = tnl->Driver.Render.Triangle;	\
+   const tnl_quad_func QuadFunc = tnl->Driver.Render.Quad;		\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) (LineFunc && TriangleFunc && QuadFunc);		\
+   (void) elt; (void) mask; (void) sz; (void) stipple;
+
+#define TAG(x) clip_##x##_verts
+#define INIT(x) tnl->Driver.Render.PrimitiveNotify( ctx, x )
+#define RESET_STIPPLE if (stipple) tnl->Driver.Render.ResetLineStipple( ctx )
+#define PRESERVE_VB_DEFS
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/* Elts, with the possibility of clipping.
+ */
+#undef ELT
+#undef TAG
+#define ELT(x) elt[x]
+#define TAG(x) clip_##x##_elts
+#include "tnl/t_vb_rendertmp.h"
+
+/* TODO: do this for all primitives, verts and elts:
+ */
+static void clip_elt_triangles( GLcontext *ctx,
+				GLuint start,
+				GLuint count,
+				GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl_render_func render_tris = tnl->Driver.Render.PrimTabElts[GL_TRIANGLES];
+   struct vertex_buffer *VB = &tnl->vb;
+   const GLuint * const elt = VB->Elts;
+   GLubyte *mask = VB->ClipMask;
+   GLuint last = count-2;
+   GLuint j;
+   (void) flags;
+
+   tnl->Driver.Render.PrimitiveNotify( ctx, GL_TRIANGLES );
+
+   for (j=start; j < last; j+=3 ) {
+      GLubyte c1 = mask[elt[j]];
+      GLubyte c2 = mask[elt[j+1]];
+      GLubyte c3 = mask[elt[j+2]];
+      GLubyte ormask = c1|c2|c3;
+      if (ormask) {
+	 if (start < j)
+	    render_tris( ctx, start, j, 0 );
+	 if (!(c1&c2&c3&0x3f))
+	    clip_tri_4( ctx, elt[j], elt[j+1], elt[j+2], ormask );
+	 start = j+3;
+      }
+   }
+
+   if (start < j)
+      render_tris( ctx, start, j, 0 );
+}
+
+/**********************************************************************/
+/*                  Render whole begin/end objects                    */
+/**********************************************************************/
+
+#define NEED_EDGEFLAG_SETUP (ctx->_TriangleCaps & DD_TRI_UNFILLED)
+#define EDGEFLAG_GET(idx) VB->EdgeFlag[idx]
+#define EDGEFLAG_SET(idx, val) VB->EdgeFlag[idx] = val
+
+
+/* Vertices, no clipping.
+ */
+#define RENDER_POINTS( start, count ) \
+   tnl->Driver.Render.Points( ctx, start, count )
+
+#define RENDER_LINE( v1, v2 ) \
+   LineFunc( ctx, v1, v2 )
+
+#define RENDER_TRI( v1, v2, v3 ) \
+   TriangleFunc( ctx, v1, v2, v3 )
+
+#define RENDER_QUAD( v1, v2, v3, v4 ) \
+   QuadFunc( ctx, v1, v2, v3, v4 )
+
+#define TAG(x) _gld_tnl_##x##_verts
+
+#define LOCAL_VARS						\
+   TNLcontext *tnl = TNL_CONTEXT(ctx);				\
+   struct vertex_buffer *VB = &tnl->vb;				\
+   const GLuint * const elt = VB->Elts;				\
+   const tnl_line_func LineFunc = tnl->Driver.Render.Line;		\
+   const tnl_triangle_func TriangleFunc = tnl->Driver.Render.Triangle;	\
+   const tnl_quad_func QuadFunc = tnl->Driver.Render.Quad;		\
+   (void) (LineFunc && TriangleFunc && QuadFunc);		\
+   (void) elt;
+
+#define RESET_STIPPLE tnl->Driver.Render.ResetLineStipple( ctx )
+#define INIT(x) tnl->Driver.Render.PrimitiveNotify( ctx, x )
+#define RENDER_TAB_QUALIFIER
+#define PRESERVE_VB_DEFS
+#include "tnl/t_vb_rendertmp.h"
+
+
+/* Elts, no clipping.
+ */
+#undef ELT
+#define TAG(x) _gld_tnl_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*              Helper functions for drivers                  */
+/**********************************************************************/
+/*
+void _tnl_RenderClippedPolygon( GLcontext *ctx, const GLuint *elts, GLuint n )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint *tmp = VB->Elts;
+
+   VB->Elts = (GLuint *)elts;
+   tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+   VB->Elts = tmp;
+}
+
+void _tnl_RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+*/
+
+
+/**********************************************************************/
+/*              Clip and render whole vertex buffers                  */
+/**********************************************************************/
+
+tnl_points_func _gldSetupPoints[4] = {
+	gld_Points2D_DX8,
+	gld_Points2D_DX8,
+	gld_Points2D_DX8,
+	gld_Points2D_DX8
+};
+tnl_line_func _gldSetupLine[4] = {
+	gld_Line2DFlat_DX8,
+	gld_Line2DSmooth_DX8,
+	gld_Line2DFlat_DX8,
+	gld_Line2DSmooth_DX8,
+};
+tnl_triangle_func _gldSetupTriangle[4] = {
+	gld_Triangle2DFlat_DX8,
+	gld_Triangle2DSmooth_DX8,
+	gld_Triangle2DFlatExtras_DX8,
+	gld_Triangle2DSmoothExtras_DX8
+};
+tnl_quad_func _gldSetupQuad[4] = {
+	gld_Quad2DFlat_DX8,
+	gld_Quad2DSmooth_DX8,
+	gld_Quad2DFlatExtras_DX8,
+	gld_Quad2DSmoothExtras_DX8
+};
+
+//---------------------------------------------------------------------------
+
+static GLboolean _gld_mesa_render_stage_run(
+	GLcontext *ctx,
+	struct tnl_pipeline_stage *stage)
+{
+	GLD_context				*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx8			*gld	= GLD_GET_DX8_DRIVER(gldCtx);
+		
+	TNLcontext				*tnl = TNL_CONTEXT(ctx);
+	struct vertex_buffer	*VB = &tnl->vb;
+	tnl_render_func				*tab;
+	GLint					pass = 0;
+	GLD_pb_dx8				*gldPB;
+
+	/* Allow the drivers to lock before projected verts are built so
+    * that window coordinates are guarenteed not to change before
+    * rendering.
+    */
+	ASSERT(tnl->Driver.Render.Start);
+	
+	tnl->Driver.Render.Start( ctx );
+	
+	// NOTE: Setting D3DRS_SOFTWAREVERTEXPROCESSING for a mixed-mode device resets
+	//       stream, indices and shader to default values of NULL or 0.
+/*	if ((ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) &&
+		gld->VStwosidelight.hShader &&
+		!ctx->Fog.Enabled)
+	{
+		IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, !gld->VStwosidelight.bHardware);
+		_GLD_DX8_DEV(SetVertexShader(gld->pDev, gld->VStwosidelight.hShader));
+		gldPB = &gld->PBtwosidelight;
+		tnl->Driver.Render.Points	= gld_Points2DTwoside_DX8;
+		if (ctx->_TriangleCaps & DD_FLATSHADE) {
+			tnl->Driver.Render.Line		= gld_Line2DFlatTwoside_DX8;
+			tnl->Driver.Render.Triangle	= gld_Triangle2DFlatTwoside_DX8;
+			tnl->Driver.Render.Quad		= gld_Quad2DFlatTwoside_DX8;
+		} else {
+			tnl->Driver.Render.Line		= gld_Line2DSmoothTwoside_DX8;
+			tnl->Driver.Render.Triangle	= gld_Triangle2DSmoothTwoside_DX8;
+			tnl->Driver.Render.Quad		= gld_Quad2DSmoothTwoside_DX8;
+		}
+	} else {*/
+		IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, TRUE);
+		gldPB = &gld->PB2d;
+		_GLD_DX8_DEV(SetVertexShader(gld->pDev, gldPB->dwFVF));
+		tnl->Driver.Render.Points	= _gldSetupPoints[gld->iSetupFunc];
+		tnl->Driver.Render.Line		= _gldSetupLine[gld->iSetupFunc];
+		tnl->Driver.Render.Triangle	= _gldSetupTriangle[gld->iSetupFunc];
+		tnl->Driver.Render.Quad		= _gldSetupQuad[gld->iSetupFunc];
+//	}
+
+	_GLD_DX8_VB(Lock(gldPB->pVB, 0, 0, &gldPB->pPoints, D3DLOCK_DISCARD));
+	gldPB->nPoints = gldPB->nLines = gldPB->nTriangles = 0;
+	// Allocate primitive pointers
+	// gldPB->pPoints is always first
+	gldPB->pLines		= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstLine);
+	gldPB->pTriangles	= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstTriangle);
+
+	ASSERT(tnl->Driver.Render.BuildVertices);
+	ASSERT(tnl->Driver.Render.PrimitiveNotify);
+	ASSERT(tnl->Driver.Render.Points);
+	ASSERT(tnl->Driver.Render.Line);
+	ASSERT(tnl->Driver.Render.Triangle);
+	ASSERT(tnl->Driver.Render.Quad);
+	ASSERT(tnl->Driver.Render.ResetLineStipple);
+	ASSERT(tnl->Driver.Render.Interp);
+	ASSERT(tnl->Driver.Render.CopyPV);
+	ASSERT(tnl->Driver.Render.ClippedLine);
+	ASSERT(tnl->Driver.Render.ClippedPolygon);
+	ASSERT(tnl->Driver.Render.Finish);
+	
+	tnl->Driver.Render.BuildVertices( ctx, 0, VB->Count, ~0 );
+	
+	if (VB->ClipOrMask) {
+		tab = VB->Elts ? clip_render_tab_elts : clip_render_tab_verts;
+		clip_render_tab_elts[GL_TRIANGLES] = clip_elt_triangles;
+	}
+	else {
+		tab = (VB->Elts ? 
+			tnl->Driver.Render.PrimTabElts : 
+		tnl->Driver.Render.PrimTabVerts);
+	}
+	
+	do {
+		GLuint i, length, flags = 0;
+		for (i = 0 ; !(flags & PRIM_END) ; i += length) {
+			flags = VB->Primitive[i].mode;
+			length= VB->Primitive[i].count;
+			ASSERT(length || (flags & PRIM_END));
+			ASSERT((flags & PRIM_MODE_MASK) <= GL_POLYGON+1);
+			if (length)
+				tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+		}
+	} while (tnl->Driver.Render.Multipass &&
+		tnl->Driver.Render.Multipass( ctx, ++pass ));
+	
+	
+//	tnl->Driver.Render.Finish( ctx );
+	
+	_GLD_DX8_VB(Unlock(gldPB->pVB));
+
+	_GLD_DX8_DEV(SetStreamSource(gld->pDev, 0, gldPB->pVB, gldPB->dwStride));
+
+	if (gldPB->nPoints) {
+		_GLD_DX8_DEV(DrawPrimitive(gld->pDev, D3DPT_POINTLIST, 0, gldPB->nPoints));
+		gldPB->nPoints = 0;
+	}
+
+	if (gldPB->nLines) {
+		_GLD_DX8_DEV(DrawPrimitive(gld->pDev, D3DPT_LINELIST, gldPB->iFirstLine, gldPB->nLines));
+		gldPB->nLines = 0;
+	}
+
+	if (gldPB->nTriangles) {
+		_GLD_DX8_DEV(DrawPrimitive(gld->pDev, D3DPT_TRIANGLELIST, gldPB->iFirstTriangle, gldPB->nTriangles));
+		gldPB->nTriangles = 0;
+	}
+
+	return GL_FALSE;		/* finished the pipe */
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+
+
+//---------------------------------------------------------------------------
+
+const struct tnl_pipeline_stage _gld_mesa_render_stage =
+{
+   "gld_mesa_render_stage",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   _gld_mesa_render_stage_run	/* run */
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_wgl_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_wgl_dx8.c
new file mode 100644
index 0000000000..011d810e97
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx8/gld_wgl_dx8.c
@@ -0,0 +1,1336 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect Direct3D 8.x WGL (WindowsGL)
+*
+****************************************************************************/
+
+#include "dglcontext.h"
+#include "gld_driver.h"
+#include "gld_dxerr8.h"
+#include "gld_dx8.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+// Copied from dglcontect.c
+#define GLDERR_NONE     0
+#define GLDERR_MEM      1
+#define GLDERR_DDRAW    2
+#define GLDERR_D3D      3
+#define GLDERR_BPP      4
+#define GLDERR_DDS      5
+// This external var keeps track of any error
+extern int nContextError;
+
+#define DDLOG_CRITICAL_OR_WARN	DDLOG_CRITICAL
+
+extern void _gld_mesa_warning(GLcontext *, char *);
+extern void _gld_mesa_fatal(GLcontext *, char *);
+
+//---------------------------------------------------------------------------
+
+static char	szColorDepthWarning[] =
+"GLDirect does not support the current desktop\n\
+color depth.\n\n\
+You may need to change the display resolution to\n\
+16 bits per pixel or higher color depth using\n\
+the Windows Display Settings control panel\n\
+before running this OpenGL application.\n";
+
+// The only depth-stencil formats currently supported by Direct3D
+// Surface Format	Depth	Stencil		Total Bits
+// D3DFMT_D32		32		-			32
+// D3DFMT_D15S1		15		1			16
+// D3DFMT_D24S8		24		8			32
+// D3DFMT_D16		16		-			16
+// D3DFMT_D24X8		24		-			32
+// D3DFMT_D24X4S4	24		4			32
+
+// This pixel format will be used as a template when compiling the list
+// of pixel formats supported by the hardware. Many fields will be
+// filled in at runtime.
+// PFD flag defaults are upgraded to match ChoosePixelFormat() -- DaveM
+static DGL_pixelFormat pfTemplateHW =
+{
+    {
+	sizeof(PIXELFORMATDESCRIPTOR),	// Size of the data structure
+		1,							// Structure version - should be 1
+									// Flags:
+		PFD_DRAW_TO_WINDOW |		// The buffer can draw to a window or device surface.
+		PFD_DRAW_TO_BITMAP |		// The buffer can draw to a bitmap. (DaveM)
+		PFD_SUPPORT_GDI |			// The buffer supports GDI drawing. (DaveM)
+		PFD_SUPPORT_OPENGL |		// The buffer supports OpenGL drawing.
+		PFD_DOUBLEBUFFER |			// The buffer is double-buffered.
+		0,							// Placeholder for easy commenting of above flags
+		PFD_TYPE_RGBA,				// Pixel type RGBA.
+		16,							// Total colour bitplanes (excluding alpha bitplanes)
+		5, 0,						// Red bits, shift
+		5, 0,						// Green bits, shift
+		5, 0,						// Blue bits, shift
+		0, 0,						// Alpha bits, shift (destination alpha)
+		0,							// Accumulator bits (total)
+		0, 0, 0, 0,					// Accumulator bits: Red, Green, Blue, Alpha
+		0,							// Depth bits
+		0,							// Stencil bits
+		0,							// Number of auxiliary buffers
+		0,							// Layer type
+		0,							// Specifies the number of overlay and underlay planes.
+		0,							// Layer mask
+		0,							// Specifies the transparent color or index of an underlay plane.
+		0							// Damage mask
+	},
+	D3DFMT_UNKNOWN,	// No depth/stencil buffer
+};
+
+//---------------------------------------------------------------------------
+// Vertex Shaders
+//---------------------------------------------------------------------------
+
+// Vertex Shader Declaration
+static DWORD dwTwoSidedLightingDecl[] =
+{
+	D3DVSD_STREAM(0),
+	D3DVSD_REG(0,  D3DVSDT_FLOAT3), 	 // XYZ position
+	D3DVSD_REG(1,  D3DVSDT_FLOAT3), 	 // XYZ normal
+	D3DVSD_REG(2,  D3DVSDT_D3DCOLOR),	 // Diffuse color
+	D3DVSD_REG(3,  D3DVSDT_D3DCOLOR),	 // Specular color
+	D3DVSD_REG(4,  D3DVSDT_FLOAT2), 	 // 2D texture unit 0
+	D3DVSD_REG(5,  D3DVSDT_FLOAT2), 	 // 2D texture unit 1
+	D3DVSD_END()
+};
+
+// Vertex Shader for two-sided lighting
+static char *szTwoSidedLightingVS =
+// This is a test shader!
+"vs.1.0\n"
+"m4x4 oPos,v0,c0\n"
+"mov oD0,v2\n"
+"mov oD1,v3\n"
+"mov oT0,v4\n"
+"mov oT1,v5\n"
+;
+
+//---------------------------------------------------------------------------
+//---------------------------------------------------------------------------
+
+typedef struct {
+	HINSTANCE			hD3D8DLL;			// Handle to d3d8.dll
+	FNDIRECT3DCREATE8	fnDirect3DCreate8;	// Direct3DCreate8 function prototype
+	BOOL				bDirect3D;			// Persistant Direct3D8 exists
+	BOOL				bDirect3DDevice;	// Persistant Direct3DDevice8 exists
+	IDirect3D8			*pD3D;				// Persistant Direct3D8
+	IDirect3DDevice8	*pDev;				// Persistant Direct3DDevice8
+} GLD_dx8_globals;
+
+// These are "global" to all DX8 contexts. KeithH
+static GLD_dx8_globals dx8Globals;
+
+//---------------------------------------------------------------------------
+//---------------------------------------------------------------------------
+
+BOOL gldGetDXErrorString_DX(
+	HRESULT hr,
+	char *buf,
+	int nBufSize)
+{
+	//
+	// Return a string describing the input HRESULT error code
+	//
+
+	D3DXGetErrorString(hr, buf, nBufSize);
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+static D3DMULTISAMPLE_TYPE _gldGetDeviceMultiSampleType(
+	IDirect3D8 *pD3D8,
+	D3DFORMAT SurfaceFormat,
+	D3DDEVTYPE d3dDevType,
+	BOOL Windowed)
+{
+	int			i;
+	HRESULT		hr;
+
+	if (glb.dwMultisample == GLDS_MULTISAMPLE_NONE)
+		return D3DMULTISAMPLE_NONE;
+
+	if (glb.dwMultisample == GLDS_MULTISAMPLE_FASTEST) {
+		// Find fastest multisample
+		for (i=2; i<17; i++) {
+			hr = IDirect3D8_CheckDeviceMultiSampleType(
+					pD3D8,
+					glb.dwAdapter,
+					d3dDevType,
+					SurfaceFormat,
+					Windowed,
+					(D3DMULTISAMPLE_TYPE)i);
+			if (SUCCEEDED(hr)) {
+				return (D3DMULTISAMPLE_TYPE)i;
+			}
+		}
+	} else {
+		// Find nicest multisample
+		for (i=16; i>1; i--) {
+			hr = IDirect3D8_CheckDeviceMultiSampleType(
+					pD3D8,
+					glb.dwAdapter,
+					d3dDevType,
+					SurfaceFormat,
+					Windowed,
+					(D3DMULTISAMPLE_TYPE)i);
+			if (SUCCEEDED(hr)) {
+				return (D3DMULTISAMPLE_TYPE)i;
+			}
+		}
+	}
+
+	// Nothing found - return default
+	return D3DMULTISAMPLE_NONE;
+}
+
+//---------------------------------------------------------------------------
+
+void _gldDestroyPrimitiveBuffer(
+	GLD_pb_dx8 *gldVB)
+{
+	SAFE_RELEASE(gldVB->pVB);
+
+	// Sanity check...
+	gldVB->nLines = gldVB->nPoints = gldVB->nTriangles = 0;
+}
+
+//---------------------------------------------------------------------------
+
+HRESULT _gldCreatePrimitiveBuffer(
+	GLcontext *ctx,
+	GLD_driver_dx8 *lpCtx,
+	GLD_pb_dx8 *gldVB)
+{
+	HRESULT		hResult;
+	char		*szCreateVertexBufferFailed = "CreateVertexBuffer failed";
+	DWORD		dwMaxVertices;	// Max number of vertices in vertex buffer
+	DWORD		dwVBSize;		// Total size of vertex buffer
+
+	// If CVA (Compiled Vertex Array) is used by an OpenGL app, then we
+	// will need enough vertices to cater for Mesa::Const.MaxArrayLockSize.
+	// We'll use IMM_SIZE if it's larger (which it should not be).
+	dwMaxVertices = MAX_ARRAY_LOCK_SIZE;
+
+	// Now calculate how many vertices to allow for in total
+	// 1 per point, 2 per line, 6 per quad = 9
+	dwVBSize = dwMaxVertices * 9 * gldVB->dwStride;
+
+	hResult = IDirect3DDevice8_CreateVertexBuffer(
+		lpCtx->pDev,
+		dwVBSize,
+		gldVB->dwUsage,
+		gldVB->dwFVF,
+		gldVB->dwPool,
+		&gldVB->pVB);
+	if (FAILED(hResult)) {
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, szCreateVertexBufferFailed);
+		return hResult;
+	}
+
+	gldVB->nLines = gldVB->nPoints = gldVB->nTriangles = 0;
+	gldVB->pPoints	= gldVB->pLines = gldVB->pTriangles = NULL;
+	gldVB->iFirstLine = dwMaxVertices; // Index of first line in VB
+	gldVB->iFirstTriangle = dwMaxVertices*3; // Index of first triangle in VB
+
+	return S_OK;
+}
+
+//---------------------------------------------------------------------------
+// Function: _gldCreateVertexShaders
+// Create DX8 Vertex Shaders.
+//---------------------------------------------------------------------------
+/*
+void _gldCreateVertexShaders(
+	GLD_driver_dx8 *gld)
+{
+	DWORD			dwFlags;
+	LPD3DXBUFFER	pVSOpcodeBuffer; // Vertex Shader opcode buffer
+	HRESULT			hr;
+
+#ifdef _DEBUG
+	dwFlags = D3DXASM_DEBUG;
+#else
+	dwFlags = 0; // D3DXASM_SKIPVALIDATION;
+#endif
+
+	ddlogMessage(DDLOG_INFO, "Creating shaders...\n");
+
+	// Init the shader handle
+	gld->VStwosidelight.hShader = 0;
+
+	if (gld->d3dCaps8.MaxStreams == 0) {
+		// Lame DX8 driver doesn't support streams
+		// Not fatal, as defaults will be used
+		ddlogMessage(DDLOG_WARN, "Driver doesn't support Vertex Shaders (MaxStreams==0)\n");
+		return;
+	}
+
+	// ** THIS DISABLES VERTEX SHADER SUPPORT **
+//	return;
+	// ** THIS DISABLES VERTEX SHADER SUPPORT **
+
+	//
+	// Two-sided lighting
+	//
+
+#if 0
+	//
+	// DEBUGGING: Load shader from a text file
+	//
+	{
+	LPD3DXBUFFER	pVSErrorBuffer; // Vertex Shader error buffer
+	hr = D3DXAssembleShaderFromFile(
+			"twoside.vsh",
+			dwFlags,
+			NULL, // No constants
+			&pVSOpcodeBuffer,
+			&pVSErrorBuffer);
+	if (pVSErrorBuffer && pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer))
+		ddlogMessage(DDLOG_INFO, pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer));
+	SAFE_RELEASE(pVSErrorBuffer);
+	}
+#else
+	{
+	LPD3DXBUFFER	pVSErrorBuffer; // Vertex Shader error buffer
+	// Assemble ascii shader text into shader opcodes
+	hr = D3DXAssembleShader(
+			szTwoSidedLightingVS,
+			strlen(szTwoSidedLightingVS),
+			dwFlags,
+			NULL, // No constants
+			&pVSOpcodeBuffer,
+			&pVSErrorBuffer);
+	if (pVSErrorBuffer && pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer))
+		ddlogMessage(DDLOG_INFO, pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer));
+	SAFE_RELEASE(pVSErrorBuffer);
+	}
+#endif
+	if (FAILED(hr)) {
+		ddlogError(DDLOG_WARN, "AssembleShader failed", hr);
+		SAFE_RELEASE(pVSOpcodeBuffer);
+		return;
+	}
+
+// This is for debugging. Remove to enable vertex shaders in HW
+#define _GLD_FORCE_SW_VS 0
+
+	if (_GLD_FORCE_SW_VS) {
+		// _GLD_FORCE_SW_VS should be disabled for Final Release
+		ddlogMessage(DDLOG_SYSTEM, "[Forcing shaders in SW]\n");
+	}
+
+	// Try and create shader in hardware.
+	// NOTE: The D3D Ref device appears to succeed when trying to
+	//       create the device in hardware, but later complains
+	//       when trying to set it with SetVertexShader(). Go figure.
+	if (_GLD_FORCE_SW_VS || glb.dwDriver == GLDS_DRIVER_REF) {
+		// Don't try and create a hardware shader with the Ref device
+		hr = E_FAIL; // COM error/fail result
+	} else {
+		gld->VStwosidelight.bHardware = TRUE;
+		hr = IDirect3DDevice8_CreateVertexShader(
+			gld->pDev,
+			dwTwoSidedLightingDecl,
+			pVSOpcodeBuffer->lpVtbl->GetBufferPointer(pVSOpcodeBuffer),
+			&gld->VStwosidelight.hShader,
+			0);
+	}
+	if (FAILED(hr)) {
+		ddlogMessage(DDLOG_INFO, "... HW failed, trying SW...\n");
+		// Failed. Try and create shader for software processing
+		hr = IDirect3DDevice8_CreateVertexShader(
+			gld->pDev,
+			dwTwoSidedLightingDecl,
+			pVSOpcodeBuffer->lpVtbl->GetBufferPointer(pVSOpcodeBuffer),
+			&gld->VStwosidelight.hShader,
+			D3DUSAGE_SOFTWAREPROCESSING);
+		if (FAILED(hr)) {
+			gld->VStwosidelight.hShader = 0; // Sanity check
+			ddlogError(DDLOG_WARN, "CreateVertexShader failed", hr);
+			return;
+		}
+		// Succeeded, but for software processing
+		gld->VStwosidelight.bHardware = FALSE;
+	}
+
+	SAFE_RELEASE(pVSOpcodeBuffer);
+
+	ddlogMessage(DDLOG_INFO, "... OK\n");
+}
+
+//---------------------------------------------------------------------------
+
+void _gldDestroyVertexShaders(
+	GLD_driver_dx8 *gld)
+{
+	if (gld->VStwosidelight.hShader) {
+		IDirect3DDevice8_DeleteVertexShader(gld->pDev, gld->VStwosidelight.hShader);
+		gld->VStwosidelight.hShader = 0;
+	}
+}
+*/
+//---------------------------------------------------------------------------
+
+LPVOID lpOpaque1 = NULL;
+LPVOID lpOpaque2 = NULL;
+
+BOOL gldCreateDrawable_DX(
+	DGL_ctx *ctx,
+//	BOOL bDefaultDriver,
+	BOOL bDirectDrawPersistant,
+	BOOL bPersistantBuffers)
+{
+	//
+	// bDirectDrawPersistant:	applies to IDirect3D8
+	// bPersistantBuffers:		applies to IDirect3DDevice8
+	//
+
+	HRESULT					hResult;
+	GLD_driver_dx8			*lpCtx = NULL;
+	D3DDEVTYPE				d3dDevType;
+	D3DPRESENT_PARAMETERS	d3dpp;
+	D3DDISPLAYMODE			d3ddm;
+	DWORD					dwBehaviourFlags;
+	D3DADAPTER_IDENTIFIER8	d3dIdent;
+
+	// Error if context is NULL.
+	if (ctx == NULL)
+		return FALSE;
+
+	if (ctx->glPriv) {
+		lpCtx = ctx->glPriv;
+		// Release any existing interfaces
+		SAFE_RELEASE(lpCtx->pDev);
+		SAFE_RELEASE(lpCtx->pD3D);
+	} else {
+		lpCtx = (GLD_driver_dx8*)malloc(sizeof(GLD_driver_dx8));
+		ZeroMemory(lpCtx, sizeof(lpCtx));
+	}
+
+	d3dDevType = (glb.dwDriver == GLDS_DRIVER_HAL) ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF;
+	// TODO: Check this
+//	if (bDefaultDriver)
+//		d3dDevType = D3DDEVTYPE_REF;
+
+	// Use persistant interface if needed
+	if (bDirectDrawPersistant && dx8Globals.bDirect3D) {
+		lpCtx->pD3D = dx8Globals.pD3D;
+		IDirect3D8_AddRef(lpCtx->pD3D);
+		goto SkipDirectDrawCreate;
+	}
+
+	// Create Direct3D8 object
+	lpCtx->pD3D = dx8Globals.fnDirect3DCreate8(D3D_SDK_VERSION_DX8_SUPPORT_WIN95);
+	if (lpCtx->pD3D == NULL) {
+		MessageBox(NULL, "Unable to initialize Direct3D8", "GLDirect", MB_OK);
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, "Unable to create Direct3D8 interface");
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Cache Direct3D interface for subsequent GLRCs
+	if (bDirectDrawPersistant && !dx8Globals.bDirect3D) {
+		dx8Globals.pD3D = lpCtx->pD3D;
+		IDirect3D8_AddRef(dx8Globals.pD3D);
+		dx8Globals.bDirect3D = TRUE;
+	}
+SkipDirectDrawCreate:
+
+	// Get the display mode so we can make a compatible backbuffer
+	hResult = IDirect3D8_GetAdapterDisplayMode(lpCtx->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hResult)) {
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Get device caps
+	hResult = IDirect3D8_GetDeviceCaps(lpCtx->pD3D, glb.dwAdapter, d3dDevType, &lpCtx->d3dCaps8);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "IDirect3D8_GetDeviceCaps failed", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Check for hardware transform & lighting
+	lpCtx->bHasHWTnL = lpCtx->d3dCaps8.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT ? TRUE : FALSE;
+
+	// If this flag is present then we can't default to Mesa
+	// SW rendering between BeginScene() and EndScene().
+	if (lpCtx->d3dCaps8.Caps2 & D3DCAPS2_NO2DDURING3DSCENE) {
+		ddlogMessage(DDLOG_WARN,
+			"Warning          : No 2D allowed during 3D scene.\n");
+	}
+
+	//
+	//	Create the Direct3D context
+	//
+
+	// Re-use original IDirect3DDevice if persistant buffers exist.
+	// Note that we test for persistant IDirect3D8 as well
+	// bDirectDrawPersistant == persistant IDirect3D8 (DirectDraw8 does not exist)
+	if (bDirectDrawPersistant && bPersistantBuffers && dx8Globals.pD3D && dx8Globals.pDev) {
+		lpCtx->pDev = dx8Globals.pDev;
+		IDirect3DDevice8_AddRef(dx8Globals.pDev);
+		goto skip_direct3ddevice_create;
+	}
+
+	// Clear the presentation parameters (sets all members to zero)
+	ZeroMemory(&d3dpp, sizeof(d3dpp));
+
+	// Recommended by MS; needed for MultiSample.
+	// Be careful if altering this for FullScreenBlit
+	d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
+
+	d3dpp.BackBufferFormat	= d3ddm.Format;
+	d3dpp.BackBufferCount	= 1;
+	d3dpp.MultiSampleType	= _gldGetDeviceMultiSampleType(lpCtx->pD3D, d3ddm.Format, d3dDevType, !ctx->bFullscreen);
+	d3dpp.AutoDepthStencilFormat	= ctx->lpPF->dwDriverData;
+	d3dpp.EnableAutoDepthStencil	= (d3dpp.AutoDepthStencilFormat == D3DFMT_UNKNOWN) ? FALSE : TRUE;
+
+	if (ctx->bFullscreen) {
+		ddlogWarnOption(FALSE); // Don't popup any messages in fullscreen 
+		d3dpp.Windowed							= FALSE;
+		d3dpp.BackBufferWidth					= d3ddm.Width;
+		d3dpp.BackBufferHeight					= d3ddm.Height;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= D3DPRESENT_RATE_DEFAULT;
+
+		// Support for vertical retrace synchronisation.
+		// Set default presentation interval in case caps bits are missing
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+		if (glb.bWaitForRetrace) {
+			if (lpCtx->d3dCaps8.PresentationIntervals & D3DPRESENT_INTERVAL_ONE)
+				d3dpp.FullScreen_PresentationInterval = D3DPRESENT_INTERVAL_ONE;
+		} else {
+			if (lpCtx->d3dCaps8.PresentationIntervals & D3DPRESENT_INTERVAL_IMMEDIATE)
+				d3dpp.FullScreen_PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
+		}
+	} else {
+		ddlogWarnOption(glb.bMessageBoxWarnings); // OK to popup messages
+		d3dpp.Windowed							= TRUE;
+		d3dpp.BackBufferWidth					= ctx->dwWidth;
+		d3dpp.BackBufferHeight					= ctx->dwHeight;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= 0;
+		// FullScreen_PresentationInterval must be default for Windowed mode
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+	}
+
+	// Decide if we can use hardware TnL
+	dwBehaviourFlags = (lpCtx->bHasHWTnL) ?
+		D3DCREATE_MIXED_VERTEXPROCESSING : D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+	// Add flag to tell D3D to be thread-safe
+	if (glb.bMultiThreaded)
+		dwBehaviourFlags |= D3DCREATE_MULTITHREADED;
+	// Add flag to tell D3D to be FPU-safe
+	if (!glb.bFastFPU)
+		dwBehaviourFlags |= D3DCREATE_FPU_PRESERVE;
+	hResult = IDirect3D8_CreateDevice(lpCtx->pD3D,
+								glb.dwAdapter,
+								d3dDevType,
+								ctx->hWnd,
+								dwBehaviourFlags,
+								&d3dpp,
+								&lpCtx->pDev);
+    if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "IDirect3D8_CreateDevice failed", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	if (bDirectDrawPersistant && bPersistantBuffers && dx8Globals.pD3D) {
+		dx8Globals.pDev = lpCtx->pDev;
+		dx8Globals.bDirect3DDevice = TRUE;
+	}
+
+/*
+	// See if DDraw interfaces are available (DaveM)
+	hResult = IDirect3D8_QueryInterface(lpCtx->pDev,
+		&IID_IDirectDraw7, (LPVOID*)&lpOpaque1);
+	if (FAILED(hResult) || lpOpaque1 == NULL) {
+		ddlogMessage(DDLOG_INFO, "DirectDraw QueryInterface unavailable\n");
+	}
+
+	hResult = IDirect3DDevice8_QueryInterface(lpCtx->pDev, 
+		&IID_IDirectDrawSurface7, (LPVOID*)&lpOpaque2);
+	if (FAILED(hResult) || lpOpaque2 == NULL) {
+		ddlogMessage(DDLOG_INFO, "DirectDrawSurface QueryInterface unavialable\n");
+	}
+*/	
+	// Dump some useful stats
+	hResult = IDirect3D8_GetAdapterIdentifier(
+		lpCtx->pD3D,
+		glb.dwAdapter,
+		D3DENUM_NO_WHQL_LEVEL, // Avoids 1 to 2 second delay
+		&d3dIdent);
+	if (SUCCEEDED(hResult)) {
+		ddlogPrintf(DDLOG_INFO, "[Driver Description: %s]", &d3dIdent.Description);
+		ddlogPrintf(DDLOG_INFO, "[Driver file: %s %d.%d.%02d.%d]",
+			d3dIdent.Driver,
+			HIWORD(d3dIdent.DriverVersion.HighPart),
+			LOWORD(d3dIdent.DriverVersion.HighPart),
+			HIWORD(d3dIdent.DriverVersion.LowPart),
+			LOWORD(d3dIdent.DriverVersion.LowPart));
+		ddlogPrintf(DDLOG_INFO, "[VendorId: 0x%X, DeviceId: 0x%X, SubSysId: 0x%X, Revision: 0x%X]",
+			d3dIdent.VendorId, d3dIdent.DeviceId, d3dIdent.SubSysId, d3dIdent.Revision);
+	}
+
+	// Init projection matrix for D3D TnL
+	D3DXMatrixIdentity(&lpCtx->matProjection);
+	lpCtx->matModelView = lpCtx->matProjection;
+//		gld->bUseMesaProjection = TRUE;
+
+skip_direct3ddevice_create:
+
+	// Create buffers to hold primitives
+	lpCtx->PB2d.dwFVF		= GLD_FVF_2D_VERTEX;
+	lpCtx->PB2d.dwPool		= D3DPOOL_SYSTEMMEM;
+	lpCtx->PB2d.dwStride	= sizeof(GLD_2D_VERTEX);
+	lpCtx->PB2d.dwUsage		= D3DUSAGE_DONOTCLIP |
+								D3DUSAGE_DYNAMIC |
+								D3DUSAGE_SOFTWAREPROCESSING |
+								D3DUSAGE_WRITEONLY;
+	hResult = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PB2d);
+	if (FAILED(hResult))
+		goto return_with_error;
+
+	lpCtx->PB3d.dwFVF		= GLD_FVF_3D_VERTEX;
+	lpCtx->PB3d.dwPool		= D3DPOOL_DEFAULT;
+	lpCtx->PB3d.dwStride	= sizeof(GLD_3D_VERTEX);
+	lpCtx->PB3d.dwUsage		= D3DUSAGE_DYNAMIC |
+								D3DUSAGE_SOFTWAREPROCESSING |
+								D3DUSAGE_WRITEONLY;
+	hResult = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PB3d);
+	if (FAILED(hResult))
+		goto return_with_error;
+
+/*	// NOTE: A FVF code of zero indicates a non-FVF vertex buffer (for vertex shaders)
+	lpCtx->PBtwosidelight.dwFVF		= 0; //GLD_FVF_TWOSIDED_VERTEX;
+	lpCtx->PBtwosidelight.dwPool	= D3DPOOL_DEFAULT;
+	lpCtx->PBtwosidelight.dwStride	= sizeof(GLD_TWOSIDED_VERTEX);
+	lpCtx->PBtwosidelight.dwUsage	= D3DUSAGE_DONOTCLIP |
+								D3DUSAGE_DYNAMIC |
+								D3DUSAGE_SOFTWAREPROCESSING |
+								D3DUSAGE_WRITEONLY;
+	hResult = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PBtwosidelight);
+	if (FAILED(hResult))
+		goto return_with_error;*/
+
+	// Now try and create the DX8 Vertex Shaders
+//	_gldCreateVertexShaders(lpCtx);
+
+	// Zero the pipeline usage counters
+	lpCtx->PipelineUsage.qwMesa.QuadPart = 
+//	lpCtx->PipelineUsage.dwD3D2SVS.QuadPart =
+	lpCtx->PipelineUsage.qwD3DFVF.QuadPart = 0;
+
+	// Assign drawable to GL private
+	ctx->glPriv = lpCtx;
+	return TRUE;
+
+return_with_error:
+	// Clean up and bail
+
+//	_gldDestroyVertexShaders(lpCtx);
+
+//	_gldDestroyPrimitiveBuffer(&lpCtx->PBtwosidelight);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB3d);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB2d);
+
+	SAFE_RELEASE(lpCtx->pDev);
+	SAFE_RELEASE(lpCtx->pD3D);
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldResizeDrawable_DX(
+	DGL_ctx *ctx,
+	BOOL bDefaultDriver,
+	BOOL bPersistantInterface,
+	BOOL bPersistantBuffers)
+{
+	GLD_driver_dx8			*gld = NULL;
+	D3DDEVTYPE				d3dDevType;
+	D3DPRESENT_PARAMETERS	d3dpp;
+	D3DDISPLAYMODE			d3ddm;
+	HRESULT					hResult;
+
+	// Error if context is NULL.
+	if (ctx == NULL)
+		return FALSE;
+
+	gld = ctx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (ctx->bSceneStarted) {
+		IDirect3DDevice8_EndScene(gld->pDev);
+		ctx->bSceneStarted = FALSE;
+	}
+
+	d3dDevType = (glb.dwDriver == GLDS_DRIVER_HAL) ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF;
+	if (!bDefaultDriver)
+		d3dDevType = D3DDEVTYPE_REF; // Force Direct3D Reference Rasterise (software)
+
+	// Get the display mode so we can make a compatible backbuffer
+	hResult = IDirect3D8_GetAdapterDisplayMode(gld->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hResult)) {
+        nContextError = GLDERR_D3D;
+//		goto return_with_error;
+		return FALSE;
+	}
+
+	// Destroy DX8 Vertex Shaders before Reset()
+//	_gldDestroyVertexShaders(gld);
+
+	// Release POOL_DEFAULT objects before Reset()
+	if (gld->PB2d.dwPool == D3DPOOL_DEFAULT)
+		_gldDestroyPrimitiveBuffer(&gld->PB2d);
+	if (gld->PB3d.dwPool == D3DPOOL_DEFAULT)
+		_gldDestroyPrimitiveBuffer(&gld->PB3d);
+//	if (gld->PBtwosidelight.dwPool == D3DPOOL_DEFAULT)
+//		_gldDestroyPrimitiveBuffer(&gld->PBtwosidelight);
+
+	// Clear the presentation parameters (sets all members to zero)
+	ZeroMemory(&d3dpp, sizeof(d3dpp));
+
+	// Recommended by MS; needed for MultiSample.
+	// Be careful if altering this for FullScreenBlit
+	d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
+
+	d3dpp.BackBufferFormat	= d3ddm.Format;
+	d3dpp.BackBufferCount	= 1;
+	d3dpp.MultiSampleType	= _gldGetDeviceMultiSampleType(gld->pD3D, d3ddm.Format, d3dDevType, !ctx->bFullscreen);
+	d3dpp.AutoDepthStencilFormat	= ctx->lpPF->dwDriverData;
+	d3dpp.EnableAutoDepthStencil	= (d3dpp.AutoDepthStencilFormat == D3DFMT_UNKNOWN) ? FALSE : TRUE;
+
+	// TODO: Sync to refresh
+
+	if (ctx->bFullscreen) {
+		ddlogWarnOption(FALSE); // Don't popup any messages in fullscreen 
+		d3dpp.Windowed							= FALSE;
+		d3dpp.BackBufferWidth					= d3ddm.Width;
+		d3dpp.BackBufferHeight					= d3ddm.Height;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= D3DPRESENT_RATE_DEFAULT;
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+		// Get better benchmark results? KeithH
+//		d3dpp.FullScreen_RefreshRateInHz		= D3DPRESENT_RATE_UNLIMITED;
+	} else {
+		ddlogWarnOption(glb.bMessageBoxWarnings); // OK to popup messages
+		d3dpp.Windowed							= TRUE;
+		d3dpp.BackBufferWidth					= ctx->dwWidth;
+		d3dpp.BackBufferHeight					= ctx->dwHeight;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= 0;
+		d3dpp.FullScreen_PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+	}
+	hResult = IDirect3DDevice8_Reset(gld->pDev, &d3dpp);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: Reset failed", hResult);
+		return FALSE;
+		//goto cleanup_and_return_with_error;
+	}
+
+    // Explicitly Clear resized surfaces (DaveM)
+	{
+		D3DVIEWPORT8 d3dvp1, d3dvp2;
+		IDirect3DDevice8_GetViewport(gld->pDev, &d3dvp1);
+		IDirect3DDevice8_GetViewport(gld->pDev, &d3dvp2);
+		d3dvp1.X = 0;
+		d3dvp1.Y = 0;
+		d3dvp1.Width = ctx->dwWidth;
+		d3dvp1.Height = ctx->dwHeight;
+		IDirect3DDevice8_SetViewport(gld->pDev, &d3dvp1);
+		IDirect3DDevice8_Clear(gld->pDev,0,NULL,D3DCLEAR_TARGET,0,0,0);
+		IDirect3DDevice8_SetViewport(gld->pDev, &d3dvp2);
+	}
+
+	//
+	// Recreate POOL_DEFAULT objects
+	//
+	if (gld->PB2d.dwPool == D3DPOOL_DEFAULT) {
+		_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB2d);
+	}
+	if (gld->PB3d.dwPool == D3DPOOL_DEFAULT) {
+		_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB3d);
+	}
+//	if (gld->PBtwosidelight.dwPool == D3DPOOL_DEFAULT) {
+//		_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB2d);
+//	}
+
+	// Recreate DX8 Vertex Shaders
+//	_gldCreateVertexShaders(gld);
+
+	// Signal a complete state update
+	ctx->glCtx->Driver.UpdateState(ctx->glCtx, _NEW_ALL);
+
+	// Begin a new scene
+	IDirect3DDevice8_BeginScene(gld->pDev);
+	ctx->bSceneStarted = TRUE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyDrawable_DX(
+	DGL_ctx *ctx)
+{
+	GLD_driver_dx8			*lpCtx = NULL;
+
+	// Error if context is NULL.
+	if (!ctx)
+		return FALSE;
+
+	// Error if the drawable does not exist.
+	if (!ctx->glPriv)
+		return FALSE;
+
+	lpCtx = ctx->glPriv;
+
+#ifdef _DEBUG
+	// Dump out stats
+	ddlogPrintf(DDLOG_SYSTEM, "Usage: M:0x%X%X, D:0x%X%X",
+		lpCtx->PipelineUsage.qwMesa.HighPart,
+		lpCtx->PipelineUsage.qwMesa.LowPart,
+		lpCtx->PipelineUsage.qwD3DFVF.HighPart,
+		lpCtx->PipelineUsage.qwD3DFVF.LowPart);
+#endif
+
+//	_gldDestroyVertexShaders(lpCtx);
+	
+//	_gldDestroyPrimitiveBuffer(&lpCtx->PBtwosidelight);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB3d);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB2d);
+
+	SAFE_RELEASE(lpCtx->pDev);
+	SAFE_RELEASE(lpCtx->pD3D);
+
+	// Free the private drawable data
+	free(ctx->glPriv);
+	ctx->glPriv = NULL;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldCreatePrivateGlobals_DX(void)
+{
+	ZeroMemory(&dx8Globals, sizeof(dx8Globals));
+
+	// Load d3d8.dll
+	dx8Globals.hD3D8DLL = LoadLibrary("D3D8.DLL");
+	if (dx8Globals.hD3D8DLL == NULL)
+		return FALSE;
+
+	// Now try and obtain Direct3DCreate8
+	dx8Globals.fnDirect3DCreate8 = (FNDIRECT3DCREATE8)GetProcAddress(dx8Globals.hD3D8DLL, "Direct3DCreate8");
+	if (dx8Globals.fnDirect3DCreate8 == NULL) {
+		FreeLibrary(dx8Globals.hD3D8DLL);
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyPrivateGlobals_DX(void)
+{
+	if (dx8Globals.bDirect3DDevice) {
+		SAFE_RELEASE(dx8Globals.pDev);
+		dx8Globals.bDirect3DDevice = FALSE;
+	}
+	if (dx8Globals.bDirect3D) {
+		SAFE_RELEASE(dx8Globals.pD3D);
+		dx8Globals.bDirect3D = FALSE;
+	}
+
+	FreeLibrary(dx8Globals.hD3D8DLL);
+	dx8Globals.hD3D8DLL = NULL;
+	dx8Globals.fnDirect3DCreate8 = NULL;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+static void _BitsFromDisplayFormat(
+	D3DFORMAT fmt,
+	BYTE *cColorBits,
+	BYTE *cRedBits,
+	BYTE *cGreenBits,
+	BYTE *cBlueBits,
+	BYTE *cAlphaBits)
+{
+	switch (fmt) {
+	case D3DFMT_X1R5G5B5:
+		*cColorBits = 16;
+		*cRedBits = 5;
+		*cGreenBits = 5;
+		*cBlueBits = 5;
+		*cAlphaBits = 0;
+		return;
+	case D3DFMT_R5G6B5:
+		*cColorBits = 16;
+		*cRedBits = 5;
+		*cGreenBits = 6;
+		*cBlueBits = 5;
+		*cAlphaBits = 0;
+		return;
+	case D3DFMT_X8R8G8B8:
+		*cColorBits = 32;
+		*cRedBits = 8;
+		*cGreenBits = 8;
+		*cBlueBits = 8;
+		*cAlphaBits = 0;
+		return;
+	case D3DFMT_A8R8G8B8:
+		*cColorBits = 32;
+		*cRedBits = 8;
+		*cGreenBits = 8;
+		*cBlueBits = 8;
+		*cAlphaBits = 8;
+		return;
+	}
+
+	// Should not get here!
+	*cColorBits = 32;
+	*cRedBits = 8;
+	*cGreenBits = 8;
+	*cBlueBits = 8;
+	*cAlphaBits = 0;
+}
+
+//---------------------------------------------------------------------------
+
+static void _BitsFromDepthStencilFormat(
+	D3DFORMAT fmt,
+	BYTE *cDepthBits,
+	BYTE *cStencilBits)
+{
+	// NOTE: GL expects either 32 or 16 as depth bits.
+	switch (fmt) {
+	case D3DFMT_D32:
+		*cDepthBits = 32;
+		*cStencilBits = 0;
+		return;
+	case D3DFMT_D15S1:
+		*cDepthBits = 16;
+		*cStencilBits = 1;
+		return;
+	case D3DFMT_D24S8:
+		*cDepthBits = 32;
+		*cStencilBits = 8;
+		return;
+	case D3DFMT_D16:
+		*cDepthBits = 16;
+		*cStencilBits = 0;
+		return;
+	case D3DFMT_D24X8:
+		*cDepthBits = 32;
+		*cStencilBits = 0;
+		return;
+	case D3DFMT_D24X4S4:
+		*cDepthBits = 32;
+		*cStencilBits = 4;
+		return;
+	}
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldBuildPixelformatList_DX(void)
+{
+	D3DDISPLAYMODE		d3ddm;
+	D3DFORMAT			fmt[6];
+	IDirect3D8			*pD3D = NULL;
+	HRESULT				hr;
+	int					nSupportedFormats = 0;
+	int					i;
+	DGL_pixelFormat		*pPF;
+	BYTE				cColorBits, cRedBits, cGreenBits, cBlueBits, cAlphaBits;
+//	char				buf[128];
+//	char				cat[8];
+
+	// Direct3D (SW or HW)
+	// These are arranged so that 'best' pixelformat
+	// is higher in the list (for ChoosePixelFormat).
+	const D3DFORMAT DepthStencil[6] = {
+		D3DFMT_D15S1,
+		D3DFMT_D16,
+		D3DFMT_D24X8,
+		D3DFMT_D24X4S4,
+		D3DFMT_D24S8,
+		D3DFMT_D32,
+	};
+
+	// Dump DX version
+	ddlogMessage(GLDLOG_SYSTEM, "DirectX Version  : 8.0\n");
+
+	// Release any existing pixelformat list
+	if (glb.lpPF) {
+		free(glb.lpPF);
+	}
+
+	glb.nPixelFormatCount	= 0;
+	glb.lpPF				= NULL;
+
+	//
+	// Pixelformats for Direct3D (SW or HW) rendering
+	//
+
+	// Get a Direct3D 8.0 interface
+	pD3D = dx8Globals.fnDirect3DCreate8(D3D_SDK_VERSION_DX8_SUPPORT_WIN95);
+	if (!pD3D) {
+		return FALSE;
+	}
+
+	// We will use the display mode format when finding compliant
+	// rendertarget/depth-stencil surfaces.
+	hr = IDirect3D8_GetAdapterDisplayMode(pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hr)) {
+		IDirect3D8_Release(pD3D);
+		return FALSE;
+	}
+	
+	// Run through the possible formats and detect supported formats
+	for (i=0; i<6; i++) {
+		hr = IDirect3D8_CheckDeviceFormat(
+			pD3D,
+			glb.dwAdapter,
+			glb.dwDriver==GLDS_DRIVER_HAL ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF,
+            d3ddm.Format,
+			D3DUSAGE_DEPTHSTENCIL,
+			D3DRTYPE_SURFACE,
+			DepthStencil[i]);
+		if (FAILED(hr))
+			// A failure here is not fatal.
+			continue;
+
+	    // Verify that the depth format is compatible.
+	    hr = IDirect3D8_CheckDepthStencilMatch(
+				pD3D,
+				glb.dwAdapter,
+                glb.dwDriver==GLDS_DRIVER_HAL ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF,
+                d3ddm.Format,
+                d3ddm.Format,
+                DepthStencil[i]);
+		if (FAILED(hr))
+			// A failure here is not fatal, just means depth-stencil
+			// format is not compatible with this display mode.
+			continue;
+
+		fmt[nSupportedFormats++] = DepthStencil[i];
+	}
+
+	IDirect3D8_Release(pD3D);
+
+	if (nSupportedFormats == 0)
+		return FALSE; // Bail: no compliant pixelformats
+
+	// Total count of pixelformats is:
+	// (nSupportedFormats+1)*2
+	glb.lpPF = (DGL_pixelFormat *)calloc((nSupportedFormats)*2, sizeof(DGL_pixelFormat));
+	glb.nPixelFormatCount = (nSupportedFormats)*2;
+	if (glb.lpPF == NULL) {
+		glb.nPixelFormatCount = 0;
+		return FALSE;
+	}
+
+	// Get a copy of pointer that we can alter
+	pPF = glb.lpPF;
+
+	// Cache colour bits from display format
+	_BitsFromDisplayFormat(d3ddm.Format, &cColorBits, &cRedBits, &cGreenBits, &cBlueBits, &cAlphaBits);
+
+	//
+	// Add single-buffer formats
+	//
+
+	// Single-buffer, no depth-stencil buffer
+/*	memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+	pPF->pfd.dwFlags &= ~PFD_DOUBLEBUFFER; // Remove doublebuffer flag
+	pPF->pfd.cColorBits		= cColorBits;
+	pPF->pfd.cRedBits		= cRedBits;
+	pPF->pfd.cGreenBits		= cGreenBits;
+	pPF->pfd.cBlueBits		= cBlueBits;
+	pPF->pfd.cAlphaBits		= cAlphaBits;
+	pPF->pfd.cDepthBits		= 0;
+	pPF->pfd.cStencilBits	= 0;
+	pPF->dwDriverData		= D3DFMT_UNKNOWN;
+	pPF++;*/
+
+	for (i=0; i<nSupportedFormats; i++, pPF++) {
+		memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		pPF->pfd.dwFlags &= ~PFD_DOUBLEBUFFER; // Remove doublebuffer flag
+		pPF->pfd.cColorBits		= cColorBits;
+		pPF->pfd.cRedBits		= cRedBits;
+		pPF->pfd.cGreenBits		= cGreenBits;
+		pPF->pfd.cBlueBits		= cBlueBits;
+		pPF->pfd.cAlphaBits		= cAlphaBits;
+		_BitsFromDepthStencilFormat(fmt[i], &pPF->pfd.cDepthBits, &pPF->pfd.cStencilBits);
+		pPF->dwDriverData		= fmt[i];
+	}
+
+	//
+	// Add double-buffer formats
+	//
+
+/*	memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+	pPF->pfd.cColorBits		= cColorBits;
+	pPF->pfd.cRedBits		= cRedBits;
+	pPF->pfd.cGreenBits		= cGreenBits;
+	pPF->pfd.cBlueBits		= cBlueBits;
+	pPF->pfd.cAlphaBits		= cAlphaBits;
+	pPF->pfd.cDepthBits		= 0;
+	pPF->pfd.cStencilBits	= 0;
+	pPF->dwDriverData		= D3DFMT_UNKNOWN;
+	pPF++;*/
+
+	for (i=0; i<nSupportedFormats; i++, pPF++) {
+		memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		pPF->pfd.cColorBits		= cColorBits;
+		pPF->pfd.cRedBits		= cRedBits;
+		pPF->pfd.cGreenBits		= cGreenBits;
+		pPF->pfd.cBlueBits		= cBlueBits;
+		pPF->pfd.cAlphaBits		= cAlphaBits;
+		_BitsFromDepthStencilFormat(fmt[i], &pPF->pfd.cDepthBits, &pPF->pfd.cStencilBits);
+		pPF->dwDriverData		= fmt[i];
+	}
+
+	// Popup warning message if non RGB color mode
+	{
+		// This is a hack. KeithH
+		HDC hdcDesktop = GetDC(NULL);
+		DWORD dwDisplayBitDepth = GetDeviceCaps(hdcDesktop, BITSPIXEL);
+		ReleaseDC(0, hdcDesktop);
+		if (dwDisplayBitDepth <= 8) {
+			ddlogPrintf(DDLOG_WARN, "Current Color Depth %d bpp is not supported", dwDisplayBitDepth);
+			MessageBox(NULL, szColorDepthWarning, "GLDirect", MB_OK | MB_ICONWARNING);
+		}
+	}
+
+	// Mark list as 'current'
+	glb.bPixelformatsDirty = FALSE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldInitialiseMesa_DX(
+	DGL_ctx *lpCtx)
+{
+	GLD_driver_dx8	*gld = NULL;
+	int				MaxTextureSize, TextureLevels;
+	BOOL			bSoftwareTnL;
+
+	if (lpCtx == NULL)
+		return FALSE;
+
+	gld = lpCtx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (glb.bMultitexture) {
+		lpCtx->glCtx->Const.MaxTextureUnits = gld->d3dCaps8.MaxSimultaneousTextures;
+		// Only support MAX_TEXTURE_UNITS texture units.
+		// ** If this is altered then the FVF formats must be reviewed **.
+		if (lpCtx->glCtx->Const.MaxTextureUnits > GLD_MAX_TEXTURE_UNITS_DX8)
+			lpCtx->glCtx->Const.MaxTextureUnits = GLD_MAX_TEXTURE_UNITS_DX8;
+	} else {
+		// Multitexture override
+		lpCtx->glCtx->Const.MaxTextureUnits = 1;
+	}
+
+	// max texture size
+	MaxTextureSize = min(gld->d3dCaps8.MaxTextureHeight, gld->d3dCaps8.MaxTextureWidth);
+	if (MaxTextureSize == 0)
+		MaxTextureSize = 256; // Sanity check
+
+	//
+	// HACK!!
+	if (MaxTextureSize > 1024)
+		MaxTextureSize = 1024; // HACK - CLAMP TO 1024
+	// HACK!!
+	//
+
+	// Got to set MAX_TEXTURE_SIZE as max levels.
+	// Who thought this stupid idea up? ;)
+	TextureLevels = 0;
+	// Calculate power-of-two.
+	while (MaxTextureSize) {
+		TextureLevels++;
+		MaxTextureSize >>= 1;
+	}
+	lpCtx->glCtx->Const.MaxTextureLevels = (TextureLevels) ? TextureLevels : 8;
+	lpCtx->glCtx->Const.MaxDrawBuffers = 1;
+
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_LIGHTING, FALSE);
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_CULLMODE, D3DCULL_NONE);
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_DITHERENABLE, TRUE);
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_SHADEMODE, D3DSHADE_GOURAUD);
+
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_ZENABLE,
+		(lpCtx->lpPF->dwDriverData!=D3DFMT_UNKNOWN) ? D3DZB_TRUE : D3DZB_FALSE);
+
+	// Set the view matrix
+	{
+		D3DXMATRIX	vm;
+#if 1
+		D3DXMatrixIdentity(&vm);
+#else
+		D3DXVECTOR3 Eye(0.0f, 0.0f, 0.0f);
+		D3DXVECTOR3 At(0.0f, 0.0f, -1.0f);
+		D3DXVECTOR3 Up(0.0f, 1.0f, 0.0f);
+		D3DXMatrixLookAtRH(&vm, &Eye, &At, &Up);
+		vm._31 = -vm._31;
+		vm._32 = -vm._32;
+		vm._33 = -vm._33;
+		vm._34 = -vm._34;
+#endif
+		IDirect3DDevice8_SetTransform(gld->pDev, D3DTS_VIEW, &vm);
+	}
+
+	if (gld->bHasHWTnL) {
+		if (glb.dwTnL == GLDS_TNL_DEFAULT)
+			bSoftwareTnL = FALSE; // HW TnL
+		else {
+			bSoftwareTnL = ((glb.dwTnL == GLDS_TNL_MESA) || (glb.dwTnL == GLDS_TNL_D3DSW)) ? TRUE : FALSE;
+		}
+	} else {
+		// No HW TnL, so no choice possible
+		bSoftwareTnL = TRUE;
+	}
+	IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, bSoftwareTnL);
+
+// Dump this in a Release build as well, now.
+//#ifdef _DEBUG
+	ddlogPrintf(DDLOG_INFO, "HW TnL: %s",
+		gld->bHasHWTnL ? (bSoftwareTnL ? "Disabled" : "Enabled") : "Unavailable");
+//#endif
+
+	gldEnableExtensions_DX8(lpCtx->glCtx);
+	gldInstallPipeline_DX8(lpCtx->glCtx);
+	gldSetupDriverPointers_DX8(lpCtx->glCtx);
+
+	// Signal a complete state update
+	lpCtx->glCtx->Driver.UpdateState(lpCtx->glCtx, _NEW_ALL);
+
+	// Start a scene
+	IDirect3DDevice8_BeginScene(gld->pDev);
+	lpCtx->bSceneStarted = TRUE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldSwapBuffers_DX(
+	DGL_ctx *ctx,
+	HDC hDC,
+	HWND hWnd)
+{
+	HRESULT			hr;
+	GLD_driver_dx8	*gld = NULL;
+
+	if (ctx == NULL)
+		return FALSE;
+
+	gld = ctx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (ctx->bSceneStarted) {
+		IDirect3DDevice8_EndScene(gld->pDev);
+		ctx->bSceneStarted = FALSE;
+	}
+
+	// Swap the buffers. hWnd may override the hWnd used for CreateDevice()
+	hr = IDirect3DDevice8_Present(gld->pDev, NULL, NULL, hWnd, NULL);
+
+	IDirect3DDevice8_BeginScene(gld->pDev);
+	ctx->bSceneStarted = TRUE;
+
+	return (FAILED(hr)) ? FALSE : TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldGetDisplayMode_DX(
+	DGL_ctx *ctx,
+	GLD_displayMode *glddm)
+{
+	D3DDISPLAYMODE	d3ddm;
+	HRESULT			hr;
+	GLD_driver_dx8	*lpCtx = NULL;
+	BYTE cColorBits, cRedBits, cGreenBits, cBlueBits, cAlphaBits;
+
+	if ((glddm == NULL) || (ctx == NULL))
+		return FALSE;
+
+	lpCtx = ctx->glPriv;
+	if (lpCtx == NULL)
+		return FALSE;
+
+	if (lpCtx->pD3D == NULL)
+		return FALSE;
+
+	hr = IDirect3D8_GetAdapterDisplayMode(lpCtx->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hr))
+		return FALSE;
+
+	// Get info from the display format
+	_BitsFromDisplayFormat(d3ddm.Format,
+		&cColorBits, &cRedBits, &cGreenBits, &cBlueBits, &cAlphaBits);
+
+	glddm->Width	= d3ddm.Width;
+	glddm->Height	= d3ddm.Height;
+	glddm->BPP		= cColorBits;
+	glddm->Refresh	= d3ddm.RefreshRate;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c
new file mode 100644
index 0000000000..0558462dea
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c
@@ -0,0 +1,1206 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Driver interface code to Mesa
+*
+****************************************************************************/
+
+//#include <windows.h>
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx9.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "teximage.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+extern BOOL dglSwapBuffers(HDC hDC);
+
+// HACK: Hack the _33 member of the OpenGL perspective projection matrix
+const float _fPersp_33 = 1.6f;
+
+//---------------------------------------------------------------------------
+// Internal functions
+//---------------------------------------------------------------------------
+
+void _gld_mesa_warning(
+	__GLcontext *gc,
+	char *str)
+{
+	// Intercept Mesa's internal warning mechanism
+	gldLogPrintf(GLDLOG_WARN, "Mesa warning: %s", str);
+}
+
+//---------------------------------------------------------------------------
+
+void _gld_mesa_fatal(
+	__GLcontext *gc,
+	char *str)
+{
+	// Intercept Mesa's internal fatal-message mechanism
+	gldLogPrintf(GLDLOG_CRITICAL, "Mesa FATAL: %s", str);
+
+	// Mesa calls abort(0) here.
+	ddlogClose();
+	exit(0);
+}
+
+//---------------------------------------------------------------------------
+
+D3DSTENCILOP _gldConvertStencilOp(
+	GLenum StencilOp)
+{
+	// Used by Stencil: pass, fail and zfail
+
+	switch (StencilOp) {
+	case GL_KEEP:
+		return D3DSTENCILOP_KEEP;
+	case GL_ZERO:
+		return D3DSTENCILOP_ZERO;
+	case GL_REPLACE:
+	    return D3DSTENCILOP_REPLACE;
+	case GL_INCR:
+		return D3DSTENCILOP_INCRSAT;
+	case GL_DECR:
+	    return D3DSTENCILOP_DECRSAT;
+	case GL_INVERT:
+		return D3DSTENCILOP_INVERT;
+	case GL_INCR_WRAP_EXT:	// GL_EXT_stencil_wrap
+		return D3DSTENCILOP_INCR;
+	case GL_DECR_WRAP_EXT:	// GL_EXT_stencil_wrap
+	    return D3DSTENCILOP_DECR;
+	}
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertStencilOp: Unknown StencilOp\n");
+#endif
+
+	return D3DSTENCILOP_KEEP;
+}
+
+//---------------------------------------------------------------------------
+
+D3DCMPFUNC _gldConvertCompareFunc(
+	GLenum CmpFunc)
+{
+	// Used for Alpha func, depth func and stencil func.
+
+	switch (CmpFunc) {
+	case GL_NEVER:
+		return D3DCMP_NEVER;
+	case GL_LESS:
+		return D3DCMP_LESS;
+	case GL_EQUAL:
+		return D3DCMP_EQUAL;
+	case GL_LEQUAL:
+		return D3DCMP_LESSEQUAL;
+	case GL_GREATER:
+		return D3DCMP_GREATER;
+	case GL_NOTEQUAL:
+		return D3DCMP_NOTEQUAL;
+	case GL_GEQUAL:
+		return D3DCMP_GREATEREQUAL;
+	case GL_ALWAYS:
+		return D3DCMP_ALWAYS;
+	};
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertCompareFunc: Unknown CompareFunc\n");
+#endif
+
+	return D3DCMP_ALWAYS;
+}
+
+//---------------------------------------------------------------------------
+
+D3DBLEND _gldConvertBlendFunc(
+	GLenum blend,
+	GLenum DefaultBlend)
+{
+	switch (blend) {
+	case GL_ZERO:
+		return D3DBLEND_ZERO;
+	case GL_ONE:
+		return D3DBLEND_ONE;
+	case GL_DST_COLOR:
+		return D3DBLEND_DESTCOLOR;
+	case GL_SRC_COLOR:
+		return D3DBLEND_SRCCOLOR;
+	case GL_ONE_MINUS_DST_COLOR:
+		return D3DBLEND_INVDESTCOLOR;
+	case GL_ONE_MINUS_SRC_COLOR:
+		return D3DBLEND_INVSRCCOLOR;
+	case GL_SRC_ALPHA:
+		return D3DBLEND_SRCALPHA;
+	case GL_ONE_MINUS_SRC_ALPHA:
+		return D3DBLEND_INVSRCALPHA;
+	case GL_DST_ALPHA:
+		return D3DBLEND_DESTALPHA;
+	case GL_ONE_MINUS_DST_ALPHA:
+		return D3DBLEND_INVDESTALPHA;
+	case GL_SRC_ALPHA_SATURATE:
+		return D3DBLEND_SRCALPHASAT;
+	}
+
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "_gldConvertBlendFunc: Unknown BlendFunc\n");
+#endif
+
+	return DefaultBlend;
+}
+
+//---------------------------------------------------------------------------
+// Misc. functions
+//---------------------------------------------------------------------------
+
+void gld_Noop_DX9(
+	GLcontext *ctx)
+{
+#ifdef _DEBUG
+	gldLogMessage(GLDLOG_ERROR, "gld_Noop called!\n");
+#endif
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Error_DX9(
+	GLcontext *ctx)
+{
+#ifdef _DEBUG
+	// Quite useless.
+//	gldLogMessage(GLDLOG_ERROR, "ctx->Driver.Error called!\n");
+#endif
+}
+
+//---------------------------------------------------------------------------
+// Required Mesa functions
+//---------------------------------------------------------------------------
+
+static GLboolean gld_set_draw_buffer_DX9(
+	GLcontext *ctx,
+	GLenum mode)
+{
+   (void) ctx;
+   if ((mode==GL_FRONT_LEFT) || (mode == GL_BACK_LEFT)) {
+      return GL_TRUE;
+   }
+   else {
+      return GL_FALSE;
+   }
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_set_read_buffer_DX9(
+	GLcontext *ctx,
+	GLframebuffer *buffer,
+	GLenum mode)
+{
+   /* separate read buffer not supported */
+/*
+   ASSERT(buffer == ctx->DrawBuffer);
+   ASSERT(mode == GL_FRONT_LEFT);
+*/
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Clear_DX9(
+	GLcontext *ctx,
+	GLbitfield mask,
+	GLboolean all,
+	GLint x,
+	GLint y,
+	GLint width,
+	GLint height)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	DWORD		dwFlags = 0;
+	D3DCOLOR	Color = 0;
+	float		Z = 0.0f;
+	DWORD		Stencil = 0;
+	D3DRECT		d3dClearRect;
+
+	// TODO: Colourmask
+	const GLuint *colorMask = (GLuint *) &ctx->Color.ColorMask[0];
+
+	if (!gld->pDev)
+		return;
+
+	if (mask & (DD_FRONT_LEFT_BIT | DD_BACK_LEFT_BIT)) {
+		GLubyte col[4];
+		CLAMPED_FLOAT_TO_UBYTE(col[0], ctx->Color.ClearColor[0]);
+		CLAMPED_FLOAT_TO_UBYTE(col[1], ctx->Color.ClearColor[1]);
+		CLAMPED_FLOAT_TO_UBYTE(col[2], ctx->Color.ClearColor[2]);
+		CLAMPED_FLOAT_TO_UBYTE(col[3], ctx->Color.ClearColor[3]);
+		dwFlags |= D3DCLEAR_TARGET;
+		Color = D3DCOLOR_RGBA(col[0], col[1], col[2], col[3]);
+	}
+
+	if (mask & DD_DEPTH_BIT) {
+		// D3D8 will fail the Clear call if we try and clear a
+		// depth buffer and we haven't created one.
+		// Also, some apps try and clear a depth buffer,
+		// when a depth buffer hasn't been requested by the app.
+		if (ctx->Visual.depthBits == 0) {
+			mask &= ~DD_DEPTH_BIT; // Remove depth bit from mask
+		} else {
+			dwFlags |= D3DCLEAR_ZBUFFER;
+			Z = ctx->Depth.Clear;
+		}
+	}
+
+	if (mask & DD_STENCIL_BIT) {
+		if (ctx->Visual.stencilBits == 0) {
+			// No stencil bits in depth buffer
+			mask &= ~DD_STENCIL_BIT; // Remove stencil bit from mask
+		} else {
+			dwFlags |= D3DCLEAR_STENCIL;
+			Stencil = ctx->Stencil.Clear;
+		}
+	}
+
+	// Some apps do really weird things with the rect, such as Quake3.
+	if ((x < 0) || (y < 0) || (width <= 0) || (height <= 0)) {
+		all = GL_TRUE;
+	}
+
+	if (!all) {
+		// Calculate clear subrect
+		d3dClearRect.x1	= x;
+		d3dClearRect.y1	= gldCtx->dwHeight - (y + height);
+		d3dClearRect.x2	= x + width;
+		d3dClearRect.y2	= d3dClearRect.y1 + height;
+//		gldLogPrintf(GLDLOG_INFO, "Rect %d,%d %d,%d", x,y,width,height);
+	}
+
+	// dwFlags will be zero if there's nothing to clear
+	if (dwFlags) {
+		_GLD_DX9_DEV(Clear(
+			gld->pDev,
+			all ? 0 : 1,
+			all ? NULL : &d3dClearRect,
+			dwFlags,
+			Color, Z, Stencil));
+	}
+
+	if (mask & DD_ACCUM_BIT) {
+		// Clear accumulation buffer
+	}
+}
+
+//---------------------------------------------------------------------------
+
+// Mesa 5: Parameter change
+static void gld_buffer_size_DX9(
+//	GLcontext *ctx,
+	GLframebuffer *fb,
+	GLuint *width,
+	GLuint *height)
+{
+//	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+
+	*width = fb->Width; // gldCtx->dwWidth;
+	*height = fb->Height; // gldCtx->dwHeight;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_Finish_DX9(
+	GLcontext *ctx)
+{
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_Flush_DX9(
+	GLcontext *ctx)
+{
+	GLD_context		*gld	= GLD_GET_CONTEXT(ctx);
+
+	// TODO: Detect apps that glFlush() then SwapBuffers() ?
+
+	if (gld->EmulateSingle) {
+		// Emulating a single-buffered context.
+		// [Direct3D doesn't allow rendering to front buffer]
+		dglSwapBuffers(gld->hDC);
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_STENCIL(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	// Two-sided stencil. New for Mesa 5
+	const GLuint		uiFace	= 0UL;
+
+	struct gl_stencil_attrib *pStencil = &ctx->Stencil;
+
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILENABLE, pStencil->Enabled ? TRUE : FALSE));
+	if (pStencil->Enabled) {
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILFUNC, _gldConvertCompareFunc(pStencil->Function[uiFace])));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILREF, pStencil->Ref[uiFace]));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILMASK, pStencil->ValueMask[uiFace]));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILWRITEMASK, pStencil->WriteMask[uiFace]));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILFAIL, _gldConvertStencilOp(pStencil->FailFunc[uiFace])));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILZFAIL, _gldConvertStencilOp(pStencil->ZFailFunc[uiFace])));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_STENCILPASS, _gldConvertStencilOp(pStencil->ZPassFunc[uiFace])));
+	}
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_COLOR(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	DWORD		dwFlags = 0;
+	D3DBLEND	src;
+	D3DBLEND	dest;
+
+	// Alpha func
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_ALPHAFUNC, _gldConvertCompareFunc(ctx->Color.AlphaFunc)));
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_ALPHAREF, (DWORD)ctx->Color.AlphaRef));
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_ALPHATESTENABLE, ctx->Color.AlphaEnabled));
+
+	// Blend func
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_ALPHABLENDENABLE, ctx->Color.BlendEnabled));
+	src		= _gldConvertBlendFunc(ctx->Color.BlendSrcRGB, GL_ONE);
+	dest	= _gldConvertBlendFunc(ctx->Color.BlendDstRGB, GL_ZERO);
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_SRCBLEND, src));
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_DESTBLEND, dest));
+
+	// Color mask
+	if (ctx->Color.ColorMask[0][0]) dwFlags |= D3DCOLORWRITEENABLE_RED;
+	if (ctx->Color.ColorMask[0][1]) dwFlags |= D3DCOLORWRITEENABLE_GREEN;
+	if (ctx->Color.ColorMask[0][2]) dwFlags |= D3DCOLORWRITEENABLE_BLUE;
+	if (ctx->Color.ColorMask[0][3]) dwFlags |= D3DCOLORWRITEENABLE_ALPHA;
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_COLORWRITEENABLE, dwFlags));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_DEPTH(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_ZENABLE, ctx->Depth.Test ? D3DZB_TRUE : D3DZB_FALSE));
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_ZFUNC, _gldConvertCompareFunc(ctx->Depth.Func)));
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_ZWRITEENABLE, ctx->Depth.Mask ? TRUE : FALSE));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_POLYGON(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	D3DFILLMODE	d3dFillMode = D3DFILL_SOLID;
+	D3DCULL		d3dCullMode = D3DCULL_NONE;
+	float		fOffset = 0; // Changed from int to float for DX9
+
+	// Fillmode
+	switch (ctx->Polygon.FrontMode) {
+	case GL_POINT:
+		d3dFillMode = D3DFILL_POINT;
+		break;
+	case GL_LINE:
+		d3dFillMode = D3DFILL_WIREFRAME;
+		break;
+	case GL_FILL:
+		d3dFillMode = D3DFILL_SOLID;
+		break;
+	}
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FILLMODE, d3dFillMode));
+
+	if (ctx->Polygon.CullFlag) {
+		switch (ctx->Polygon.CullFaceMode) {
+		case GL_BACK:
+			if (ctx->Polygon.FrontFace == GL_CCW)
+				d3dCullMode = D3DCULL_CW;
+			else
+				d3dCullMode = D3DCULL_CCW;
+			break;
+		case GL_FRONT:
+			if (ctx->Polygon.FrontFace == GL_CCW)
+				d3dCullMode = D3DCULL_CCW;
+			else
+				d3dCullMode = D3DCULL_CW;
+			break;
+		case GL_FRONT_AND_BACK:
+			d3dCullMode = D3DCULL_NONE;
+			break;
+		default:
+			break;
+		}
+	} else {
+		d3dCullMode = D3DCULL_NONE;
+	}
+//	d3dCullMode = D3DCULL_NONE; // FOR DEBUGGING
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_CULLMODE, d3dCullMode));
+
+	// Polygon offset
+	// ZBIAS ranges from 0 to 16 and can only move towards the viewer
+	// Mesa5: ctx->Polygon._OffsetAny removed
+	if (ctx->Polygon.OffsetFill) {
+		fOffset = ctx->Polygon.OffsetUnits;
+//		if (iOffset < 0.0f)
+//			iOffset = -iOffset;
+//		else
+//			iOffset = 0.0f; // D3D can't push away
+	}
+	// NOTE: SetRenderState() required a DWORD, so need to cast
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_DEPTHBIAS, *((DWORD*)&fOffset)));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_FOG(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	D3DCOLOR	d3dFogColour;
+	D3DFOGMODE	d3dFogMode = D3DFOG_LINEAR;
+
+	// TODO: Fog is calculated seperately in the Mesa pipeline
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGENABLE, FALSE));
+	return;
+
+	// Fog enable
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGENABLE, ctx->Fog.Enabled));
+	if (!ctx->Fog.Enabled) {
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGTABLEMODE, D3DFOG_NONE));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGVERTEXMODE, D3DFOG_NONE));
+		return; // If disabled, don't bother setting any fog state
+	}
+
+	// Fog colour
+	d3dFogColour = D3DCOLOR_COLORVALUE(	ctx->Fog.Color[0],
+								ctx->Fog.Color[1],
+								ctx->Fog.Color[2],
+								ctx->Fog.Color[3]);
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGCOLOR, d3dFogColour));
+
+	// Fog density
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGDENSITY, *((DWORD*) (&ctx->Fog.Density))));
+
+	// Fog start
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGSTART, *((DWORD*) (&ctx->Fog.Start))));
+
+	// Fog end
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGEND, *((DWORD*) (&ctx->Fog.End))));
+
+	// Fog mode
+	switch (ctx->Fog.Mode) {
+	case GL_LINEAR:
+		d3dFogMode = D3DFOG_LINEAR;
+		break;
+	case GL_EXP:
+		d3dFogMode = D3DFOG_EXP;
+		break;
+	case GL_EXP2:
+		d3dFogMode = D3DFOG_EXP2;
+		break;
+	}
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGTABLEMODE, d3dFogMode));
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_FOGVERTEXMODE, D3DFOG_NONE));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_LIGHT(
+	GLcontext *ctx)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+	DWORD			dwSpecularEnable;
+
+	// Shademode
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_SHADEMODE, (ctx->Light.ShadeModel == GL_SMOOTH) ? D3DSHADE_GOURAUD : D3DSHADE_FLAT));
+
+	// Separate specular colour
+	if (ctx->Light.Enabled)
+		dwSpecularEnable = (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) ? TRUE: FALSE;
+	else
+		dwSpecularEnable = FALSE;
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_SPECULARENABLE, dwSpecularEnable));
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_MODELVIEW(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	D3DMATRIX	m;
+	//GLfloat		*pM = ctx->ModelView.m;
+	// Mesa5: Model-view is now a stack
+	GLfloat		*pM = ctx->ModelviewMatrixStack.Top->m;
+	m._11 = pM[0];
+	m._12 = pM[1];
+	m._13 = pM[2];
+	m._14 = pM[3];
+	m._21 = pM[4];
+	m._22 = pM[5];
+	m._23 = pM[6];
+	m._24 = pM[7];
+	m._31 = pM[8];
+	m._32 = pM[9];
+	m._33 = pM[10];
+	m._34 = pM[11];
+	m._41 = pM[12];
+	m._42 = pM[13];
+	m._43 = pM[14];
+	m._44 = pM[15];
+
+	gld->matModelView = m;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_PROJECTION(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	D3DMATRIX	m;
+	//GLfloat		*pM = ctx->ProjectionMatrix.m;
+	// Mesa 5: Now a stack
+	GLfloat		*pM = ctx->ProjectionMatrixStack.Top->m;
+	m._11 = pM[0];
+	m._12 = pM[1];
+	m._13 = pM[2];
+	m._14 = pM[3];
+
+	m._21 = pM[4];
+	m._22 = pM[5];
+	m._23 = pM[6];
+	m._24 = pM[7];
+
+	m._31 = pM[8];
+	m._32 = pM[9];
+	m._33 = pM[10] / _fPersp_33; // / 1.6f;
+	m._34 = pM[11];
+
+	m._41 = pM[12];
+	m._42 = pM[13];
+	m._43 = pM[14] / 2.0f;
+	m._44 = pM[15];
+
+	gld->matProjection = m;
+}
+
+//---------------------------------------------------------------------------
+/*
+void gldFrustumHook_DX9(
+	GLdouble left,
+	GLdouble right,
+	GLdouble bottom,
+	GLdouble top,
+	GLdouble nearval,
+	GLdouble farval)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	// Pass values on to Mesa first (in case we mess with them)
+	_mesa_Frustum(left, right, bottom, top, nearval, farval);
+
+	_fPersp_33 = farval / (nearval - farval);
+
+//	ddlogPrintf(GLDLOG_SYSTEM, "Frustum: %f", farval/nearval);
+}
+
+//---------------------------------------------------------------------------
+
+void gldOrthoHook_DX9(
+	GLdouble left,
+	GLdouble right,
+	GLdouble bottom,
+	GLdouble top,
+	GLdouble nearval,
+	GLdouble farval)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	// Pass values on to Mesa first (in case we mess with them)
+	_mesa_Ortho(left, right, bottom, top, nearval, farval);
+
+	_fPersp_33 = 1.6f;
+
+//	ddlogPrintf(GLDLOG_SYSTEM, "Ortho: %f", farval/nearval);
+}
+*/
+//---------------------------------------------------------------------------
+
+void gld_NEW_VIEWPORT(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	D3DVIEWPORT9	d3dvp;
+//	GLint			x, y;
+//	GLsizei			w, h;
+
+	// Set depth range
+	_GLD_DX9_DEV(GetViewport(gld->pDev, &d3dvp));
+	// D3D can't do Quake1/Quake2 z-trick
+	if (ctx->Viewport.Near <= ctx->Viewport.Far) {
+		d3dvp.MinZ		= ctx->Viewport.Near;
+		d3dvp.MaxZ		= ctx->Viewport.Far;
+	} else {
+		d3dvp.MinZ		= ctx->Viewport.Far;
+		d3dvp.MaxZ		= ctx->Viewport.Near;
+	}
+/*	x = ctx->Viewport.X;
+	y = ctx->Viewport.Y;
+	w = ctx->Viewport.Width;
+	h = ctx->Viewport.Height;
+	if (x < 0) x = 0;
+	if (y < 0) y = 0;
+	if (w > gldCtx->dwWidth) 		w = gldCtx->dwWidth;
+	if (h > gldCtx->dwHeight) 		h = gldCtx->dwHeight;
+	// Ditto for D3D viewport dimensions
+	if (w+x > gldCtx->dwWidth) 		w = gldCtx->dwWidth-x;
+	if (h+y > gldCtx->dwHeight) 	h = gldCtx->dwHeight-y;
+	d3dvp.X			= x;
+	d3dvp.Y			= gldCtx->dwHeight - (y + h);
+	d3dvp.Width		= w;
+	d3dvp.Height	= h;*/
+	_GLD_DX9_DEV(SetViewport(gld->pDev, &d3dvp));
+
+//	gld->fFlipWindowY = (float)gldCtx->dwHeight;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_SCISSOR(
+	GLcontext *ctx)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	// Bail if IHV driver cannot scissor
+	if (!gld->bCanScissor)
+		return;
+
+	// Set scissor rect
+	if (ctx->Scissor.Enabled) {
+		RECT rcRect;
+		// Keep in mind that RECT's need an extra row and column
+		rcRect.left		= ctx->Scissor.X;
+		rcRect.right	= ctx->Scissor.X + ctx->Scissor.Width; // + 1;
+		rcRect.top 		= gldCtx->dwHeight - (ctx->Scissor.Y + ctx->Scissor.Height);
+		rcRect.bottom 	= rcRect.top + ctx->Scissor.Height;
+		IDirect3DDevice9_SetScissorRect(gld->pDev, &rcRect);
+	}
+
+	// Enable/disable scissor as required
+	_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_SCISSORTESTENABLE, ctx->Scissor.Enabled));
+}
+
+//---------------------------------------------------------------------------
+
+__inline BOOL _gldAnyEvalEnabled(
+	GLcontext *ctx)
+{
+	struct gl_eval_attrib *eval = &ctx->Eval;
+
+	if ((eval->AutoNormal) ||
+		(eval->Map1Color4) ||
+		(eval->Map1Index) ||
+		(eval->Map1Normal) ||
+		(eval->Map1TextureCoord1) ||
+		(eval->Map1TextureCoord2) ||
+		(eval->Map1TextureCoord3) ||
+		(eval->Map1TextureCoord4) ||
+		(eval->Map1Vertex3) ||
+		(eval->Map1Vertex4) ||
+		(eval->Map2Color4) ||
+		(eval->Map2Index) ||
+		(eval->Map2Normal) ||
+		(eval->Map2TextureCoord1) ||
+		(eval->Map2TextureCoord2) ||
+		(eval->Map2TextureCoord3) ||
+		(eval->Map2TextureCoord4) ||
+		(eval->Map2Vertex3) ||
+		(eval->Map2Vertex4)
+		)
+	return TRUE;
+
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL _gldChooseInternalPipeline(
+	GLcontext *ctx,
+	GLD_driver_dx9 *gld)
+{
+//	return TRUE;	// DEBUGGING: ALWAYS USE MESA
+//	return FALSE;	// DEBUGGING: ALWAYS USE D3D
+
+	if ((glb.dwTnL == GLDS_TNL_MESA) || (gld->bHasHWTnL == FALSE))
+	{
+		gld->PipelineUsage.qwMesa.QuadPart++;
+		return TRUE; // Force Mesa TnL
+	}
+
+	if ((ctx->Light.Enabled) ||
+		(1) ||
+		(ctx->Texture._TexGenEnabled) ||
+		(ctx->Texture._TexMatEnabled) ||
+//		(ctx->Transform._AnyClip) ||
+		(ctx->Scissor.Enabled) ||
+		_gldAnyEvalEnabled(ctx) // Put this last so we can early-out
+		)
+	{
+		gld->PipelineUsage.qwMesa.QuadPart++;
+		return TRUE;
+	}
+
+	gld->PipelineUsage.qwD3DFVF.QuadPart++;
+	return FALSE;
+
+/*	// Force Mesa pipeline?
+	if (glb.dwTnL == GLDS_TNL_MESA) {
+		gld->PipelineUsage.dwMesa.QuadPart++;
+		return GLD_PIPELINE_MESA;
+	}
+
+	// Test for functionality not exposed in the D3D pathways
+	if ((ctx->Texture._GenFlags)) {
+		gld->PipelineUsage.dwMesa.QuadPart++;
+		return GLD_PIPELINE_MESA;
+	}
+
+	// Now decide if vertex shader can be used.
+	// If two sided lighting is enabled then we must either
+	// use Mesa TnL or the vertex shader
+	if (ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) {
+		if (gld->VStwosidelight.hShader && !ctx->Fog.Enabled) {
+			// Use Vertex Shader
+			gld->PipelineUsage.dwD3D2SVS.QuadPart++;
+			return GLD_PIPELINE_D3D_VS_TWOSIDE;
+		} else {
+			// Use Mesa TnL
+			gld->PipelineUsage.dwMesa.QuadPart++;
+			return GLD_PIPELINE_MESA;
+		}
+	}
+
+	// Must be D3D fixed-function pipeline
+	gld->PipelineUsage.dwD3DFVF.QuadPart++;
+	return GLD_PIPELINE_D3D_FVF;
+*/
+}
+
+//---------------------------------------------------------------------------
+
+void gld_update_state_DX9(
+	GLcontext *ctx,
+	GLuint new_state)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+	TNLcontext		*tnl = TNL_CONTEXT(ctx);
+	GLD_pb_dx9		*gldPB;
+
+	if (!gld || !gld->pDev)
+		return;
+
+	_swsetup_InvalidateState( ctx, new_state );
+	_vbo_InvalidateState( ctx, new_state );
+	_tnl_InvalidateState( ctx, new_state );
+
+	// SetupIndex will be used in the pipelines for choosing setup function
+	if ((ctx->_TriangleCaps & (DD_TRI_LIGHT_TWOSIDE | DD_SEPARATE_SPECULAR)) ||
+		(ctx->Fog.Enabled))
+	{
+		if (ctx->_TriangleCaps & DD_FLATSHADE)
+			gld->iSetupFunc = GLD_SI_FLAT_EXTRAS;
+		else
+			gld->iSetupFunc = GLD_SI_SMOOTH_EXTRAS;
+	} else {
+		if (ctx->_TriangleCaps & DD_FLATSHADE)
+			gld->iSetupFunc = GLD_SI_FLAT;	// Setup flat shade + texture
+		else
+			gld->iSetupFunc = GLD_SI_SMOOTH; // Setup smooth shade + texture
+	}
+
+	gld->bUseMesaTnL = _gldChooseInternalPipeline(ctx, gld);
+	if (gld->bUseMesaTnL) {
+		gldPB = &gld->PB2d;
+		_GLD_DX9_DEV(SetSoftwareVertexProcessing(gld->pDev, TRUE));
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_CLIPPING, FALSE));
+		_GLD_DX9_DEV(SetVertexShader(gld->pDev, NULL));
+		_GLD_DX9_DEV(SetFVF(gld->pDev, gldPB->dwFVF));
+	} else {
+		gldPB = &gld->PB3d;
+		_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_CLIPPING, TRUE));
+//		if (gld->TnLPipeline == GLD_PIPELINE_D3D_VS_TWOSIDE) {
+//			_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, !gld->VStwosidelight.bHardware));
+//			_GLD_DX9_DEV(SetVertexShader(gld->pDev, gld->VStwosidelight.hShader));
+//		} else {
+//			_GLD_DX9_DEV(SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, !gld->bHasHWTnL));
+			_GLD_DX9_DEV(SetSoftwareVertexProcessing(gld->pDev, !gld->bHasHWTnL));
+			_GLD_DX9_DEV(SetVertexShader(gld->pDev, NULL));
+			_GLD_DX9_DEV(SetFVF(gld->pDev, gldPB->dwFVF));
+//		}
+	}
+
+#define _GLD_TEST_STATE(a)		\
+	if (new_state & (a)) {		\
+		gld##a(ctx);			\
+		new_state &= ~(a);		\
+	}
+
+#define _GLD_TEST_STATE_DX9(a)	\
+	if (new_state & (a)) {		\
+		gld##a##_DX9(ctx);		\
+		new_state &= ~(a);		\
+	}
+
+#define _GLD_IGNORE_STATE(a) new_state &= ~(a);
+
+//	if (!gld->bUseMesaTnL) {
+		// Not required if Mesa is doing the TnL.
+	// Problem: If gld->bUseMesaTnL is TRUE when these are signaled,
+	// then we'll miss updating the D3D TnL pipeline.
+	// Therefore, don't test for gld->bUseMesaTnL
+	_GLD_TEST_STATE(_NEW_MODELVIEW);
+	_GLD_TEST_STATE(_NEW_PROJECTION);
+//	}
+
+	_GLD_TEST_STATE_DX9(_NEW_TEXTURE); // extern, so guard with _DX9
+	_GLD_TEST_STATE(_NEW_COLOR);
+	_GLD_TEST_STATE(_NEW_DEPTH);
+	_GLD_TEST_STATE(_NEW_POLYGON);
+	_GLD_TEST_STATE(_NEW_STENCIL);
+	_GLD_TEST_STATE(_NEW_FOG);
+	_GLD_TEST_STATE(_NEW_LIGHT);
+	_GLD_TEST_STATE(_NEW_VIEWPORT);
+
+	_GLD_IGNORE_STATE(_NEW_TRANSFORM);
+
+	// Scissor Test: New for DX9
+	_GLD_TEST_STATE(_NEW_SCISSOR);
+
+// Stubs for future use.
+/*	_GLD_TEST_STATE(_NEW_TEXTURE_MATRIX);
+	_GLD_TEST_STATE(_NEW_COLOR_MATRIX);
+	_GLD_TEST_STATE(_NEW_ACCUM);
+	_GLD_TEST_STATE(_NEW_EVAL);
+	_GLD_TEST_STATE(_NEW_HINT);
+	_GLD_TEST_STATE(_NEW_LINE);
+	_GLD_TEST_STATE(_NEW_PIXEL);
+	_GLD_TEST_STATE(_NEW_POINT);
+	_GLD_TEST_STATE(_NEW_POLYGONSTIPPLE);
+	_GLD_TEST_STATE(_NEW_PACKUNPACK);
+	_GLD_TEST_STATE(_NEW_ARRAY);
+	_GLD_TEST_STATE(_NEW_RENDERMODE);
+	_GLD_TEST_STATE(_NEW_BUFFERS);
+	_GLD_TEST_STATE(_NEW_MULTISAMPLE);
+*/
+
+// For debugging.
+#if 0
+#define _GLD_TEST_UNHANDLED_STATE(a)									\
+	if (new_state & (a)) {									\
+		gldLogMessage(GLDLOG_ERROR, "Unhandled " #a "\n");	\
+	}
+	_GLD_TEST_UNHANDLED_STATE(_NEW_TEXTURE_MATRIX);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_COLOR_MATRIX);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_ACCUM);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_EVAL);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_HINT);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_LINE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_PIXEL);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_POINT);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_POLYGONSTIPPLE);
+//	_GLD_TEST_UNHANDLED_STATE(_NEW_SCISSOR);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_PACKUNPACK);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_ARRAY);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_RENDERMODE);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_BUFFERS);
+	_GLD_TEST_UNHANDLED_STATE(_NEW_MULTISAMPLE);
+#undef _GLD_UNHANDLED_STATE
+#endif
+
+#undef _GLD_TEST_STATE
+}
+
+//---------------------------------------------------------------------------
+// Viewport
+//---------------------------------------------------------------------------
+
+void gld_Viewport_DX9(
+	GLcontext *ctx,
+	GLint x,
+	GLint y,
+	GLsizei w,
+	GLsizei h)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	D3DVIEWPORT9	d3dvp;
+
+	if (!gld || !gld->pDev)
+		return;
+
+	// This is a hack. When the app is minimized, Mesa passes
+	// w=1 and h=1 for viewport dimensions. Without this test
+	// we get a GPF in gld_wgl_resize_buffers().
+	if ((w==1) && (h==1))
+		return;
+
+	// Call ResizeBuffersMESA. This function will early-out
+	// if no resize is needed.
+	//ctx->Driver.ResizeBuffersMESA(ctx);
+	// Mesa 5: Changed parameters
+	ctx->Driver.ResizeBuffers(gldCtx->glBuffer);
+
+#if 0
+	ddlogPrintf(GLDLOG_SYSTEM, ">> Viewport x=%d y=%d w=%d h=%d", x,y,w,h);
+#endif
+
+	// ** D3D viewport must not be outside the render target surface **
+	// Sanity check the GL viewport dimensions
+	if (x < 0) x = 0;
+	if (y < 0) y = 0;
+	if (w > gldCtx->dwWidth) 		w = gldCtx->dwWidth;
+	if (h > gldCtx->dwHeight) 		h = gldCtx->dwHeight;
+	// Ditto for D3D viewport dimensions
+	if (w+x > gldCtx->dwWidth) 		w = gldCtx->dwWidth-x;
+	if (h+y > gldCtx->dwHeight) 	h = gldCtx->dwHeight-y;
+
+	d3dvp.X			= x;
+	d3dvp.Y			= gldCtx->dwHeight - (y + h);
+	d3dvp.Width		= w;
+	d3dvp.Height	= h;
+	if (ctx->Viewport.Near <= ctx->Viewport.Far) {
+		d3dvp.MinZ		= ctx->Viewport.Near;
+		d3dvp.MaxZ		= ctx->Viewport.Far;
+	} else {
+		d3dvp.MinZ		= ctx->Viewport.Far;
+		d3dvp.MaxZ		= ctx->Viewport.Near;
+	}
+
+	// TODO: DEBUGGING
+//	d3dvp.MinZ		= 0.0f;
+//	d3dvp.MaxZ		= 1.0f;
+
+	_GLD_DX9_DEV(SetViewport(gld->pDev, &d3dvp));
+
+}
+
+//---------------------------------------------------------------------------
+
+extern BOOL dglWglResizeBuffers(GLcontext *ctx, BOOL bDefaultDriver);
+
+// Mesa 5: Parameter change
+void gldResizeBuffers_DX9(
+//	GLcontext *ctx)
+	GLframebuffer *fb)
+{
+	GET_CURRENT_CONTEXT(ctx);
+	dglWglResizeBuffers(ctx, TRUE);
+}
+
+//---------------------------------------------------------------------------
+#ifdef _DEBUG
+// This is only for debugging.
+// To use, plug into ctx->Driver.Enable pointer below.
+void gld_Enable(
+	GLcontext *ctx,
+	GLenum e,
+	GLboolean b)
+{
+	char buf[1024];
+	sprintf(buf, "Enable: %s (%s)\n", _mesa_lookup_enum_by_nr(e), b?"TRUE":"FALSE");
+	ddlogMessage(DDLOG_SYSTEM, buf);
+}
+#endif
+//---------------------------------------------------------------------------
+// Driver pointer setup
+//---------------------------------------------------------------------------
+
+extern const GLubyte* _gldGetStringGeneric(GLcontext*, GLenum);
+
+void gldSetupDriverPointers_DX9(
+	GLcontext *ctx)
+{
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	TNLcontext *tnl = TNL_CONTEXT(ctx);
+
+	// Mandatory functions
+	ctx->Driver.GetString				= _gldGetStringGeneric;
+	ctx->Driver.UpdateState				= gld_update_state_DX9;
+	ctx->Driver.Clear					= gld_Clear_DX9;
+	ctx->Driver.DrawBuffer				= gld_set_draw_buffer_DX9;
+	ctx->Driver.GetBufferSize			= gld_buffer_size_DX9;
+	ctx->Driver.Finish					= gld_Finish_DX9;
+	ctx->Driver.Flush					= gld_Flush_DX9;
+	ctx->Driver.Error					= gld_Error_DX9;
+
+	// Hardware accumulation buffer
+	ctx->Driver.Accum					= NULL; // TODO: gld_Accum;
+
+	// Bitmap functions
+	ctx->Driver.CopyPixels				= gld_CopyPixels_DX9;
+	ctx->Driver.DrawPixels				= gld_DrawPixels_DX9;
+	ctx->Driver.ReadPixels				= gld_ReadPixels_DX9;
+	ctx->Driver.Bitmap					= gld_Bitmap_DX9;
+
+	// Buffer resize
+	ctx->Driver.ResizeBuffers			= gldResizeBuffers_DX9;
+	
+	// Texture image functions
+	ctx->Driver.ChooseTextureFormat		= gld_ChooseTextureFormat_DX9;
+	ctx->Driver.TexImage1D				= gld_TexImage1D_DX9;
+	ctx->Driver.TexImage2D				= gld_TexImage2D_DX9;
+	ctx->Driver.TexImage3D				= _mesa_store_teximage3d;
+	ctx->Driver.TexSubImage1D			= gld_TexSubImage1D_DX9;
+	ctx->Driver.TexSubImage2D			= gld_TexSubImage2D_DX9;
+	ctx->Driver.TexSubImage3D			= _mesa_store_texsubimage3d;
+	
+	ctx->Driver.CopyTexImage1D			= gldCopyTexImage1D_DX9; //NULL;
+	ctx->Driver.CopyTexImage2D			= gldCopyTexImage2D_DX9; //NULL;
+	ctx->Driver.CopyTexSubImage1D		= gldCopyTexSubImage1D_DX9; //NULL;
+	ctx->Driver.CopyTexSubImage2D		= gldCopyTexSubImage2D_DX9; //NULL;
+	ctx->Driver.CopyTexSubImage3D		= gldCopyTexSubImage3D_DX9;
+	ctx->Driver.TestProxyTexImage		= _mesa_test_proxy_teximage;
+
+	// Texture object functions
+	ctx->Driver.BindTexture				= NULL;
+	ctx->Driver.NewTextureObject		= NULL; // Not yet implemented by Mesa!;
+	ctx->Driver.DeleteTexture			= gld_DeleteTexture_DX9;
+	ctx->Driver.PrioritizeTexture		= NULL;
+
+	// Imaging functionality
+	ctx->Driver.CopyColorTable			= NULL;
+	ctx->Driver.CopyColorSubTable		= NULL;
+	ctx->Driver.CopyConvolutionFilter1D = NULL;
+	ctx->Driver.CopyConvolutionFilter2D = NULL;
+
+	// State changing functions
+	ctx->Driver.AlphaFunc				= NULL; //gld_AlphaFunc;
+	ctx->Driver.BlendFuncSeparate		= NULL; //gld_BlendFunc;
+	ctx->Driver.ClearColor				= NULL; //gld_ClearColor;
+	ctx->Driver.ClearDepth				= NULL; //gld_ClearDepth;
+	ctx->Driver.ClearStencil			= NULL; //gld_ClearStencil;
+	ctx->Driver.ColorMask				= NULL; //gld_ColorMask;
+	ctx->Driver.CullFace				= NULL; //gld_CullFace;
+	ctx->Driver.ClipPlane				= NULL; //gld_ClipPlane;
+	ctx->Driver.FrontFace				= NULL; //gld_FrontFace;
+	ctx->Driver.DepthFunc				= NULL; //gld_DepthFunc;
+	ctx->Driver.DepthMask				= NULL; //gld_DepthMask;
+	ctx->Driver.DepthRange				= NULL;
+	ctx->Driver.Enable					= NULL; //gld_Enable;
+	ctx->Driver.Fogfv					= NULL; //gld_Fogfv;
+	ctx->Driver.Hint					= NULL; //gld_Hint;
+	ctx->Driver.Lightfv					= NULL; //gld_Lightfv;
+	ctx->Driver.LightModelfv			= NULL; //gld_LightModelfv;
+	ctx->Driver.LineStipple				= NULL; //gld_LineStipple;
+	ctx->Driver.LineWidth				= NULL; //gld_LineWidth;
+	ctx->Driver.LogicOpcode				= NULL; //gld_LogicOpcode;
+	ctx->Driver.PointParameterfv		= NULL; //gld_PointParameterfv;
+	ctx->Driver.PointSize				= NULL; //gld_PointSize;
+	ctx->Driver.PolygonMode				= NULL; //gld_PolygonMode;
+	ctx->Driver.PolygonOffset			= NULL; //gld_PolygonOffset;
+	ctx->Driver.PolygonStipple			= NULL; //gld_PolygonStipple;
+	ctx->Driver.RenderMode				= NULL; //gld_RenderMode;
+	ctx->Driver.Scissor					= NULL; //gld_Scissor;
+	ctx->Driver.ShadeModel				= NULL; //gld_ShadeModel;
+	ctx->Driver.StencilFunc				= NULL; //gld_StencilFunc;
+	ctx->Driver.StencilMask				= NULL; //gld_StencilMask;
+	ctx->Driver.StencilOp				= NULL; //gld_StencilOp;
+	ctx->Driver.TexGen					= NULL; //gld_TexGen;
+	ctx->Driver.TexEnv					= NULL;
+	ctx->Driver.TexParameter			= NULL;
+	ctx->Driver.TextureMatrix			= NULL; //gld_TextureMatrix;
+	ctx->Driver.Viewport				= gld_Viewport_DX9;
+
+	_swsetup_Wakeup(ctx);
+
+	tnl->Driver.RunPipeline				= _tnl_run_pipeline;
+	tnl->Driver.Render.ResetLineStipple	= gld_ResetLineStipple_DX9;
+	tnl->Driver.Render.ClippedPolygon	= _tnl_RenderClippedPolygon;
+	tnl->Driver.Render.ClippedLine		= _tnl_RenderClippedLine;
+
+	// Hook into glFrustum() and glOrtho()
+//	ctx->Exec->Frustum					= gldFrustumHook_DX9;
+//	ctx->Exec->Ortho					= gldOrthoHook_DX9;
+
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_dx9.h b/src/mesa/drivers/windows/gldirect/dx9/gld_dx9.h
new file mode 100644
index 0000000000..aec40ac9dd
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_dx9.h
@@ -0,0 +1,327 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect Direct3D 9.0 header file
+*
+****************************************************************************/
+
+#ifndef _GLD_DX9_H
+#define _GLD_DX9_H
+
+//---------------------------------------------------------------------------
+// Windows includes
+//---------------------------------------------------------------------------
+
+//#ifndef STRICT
+//#define STRICT
+//#endif
+
+//#define WIN32_LEAN_AND_MEAN
+//#include <windows.h>
+#include <d3d9.h>
+#include <d3dx9.h>
+
+// MS screwed up with the DX8.1 SDK - there's no compile-time
+// method of compiling for 8.0 via the 8.1 SDK unless you
+// "make sure you don't use any 8.1 interfaces".
+// We CAN use 8.1 D3DX static functions, though - just not new 8.1 interfaces.
+//
+// D3D_SDK_VERSION is 120 for 8.0 (supported by Windows 95).
+// D3D_SDK_VERSION is 220 for 8.1 (NOT supported by Windows 95).
+//
+//#define D3D_SDK_VERSION_DX9_SUPPORT_WIN95 120
+//#define D3D_SDK_VERSION_DX91 220
+
+// Typedef for obtaining function from d3d8.dll
+typedef IDirect3D9* (WINAPI *FNDIRECT3DCREATE9) (UINT);
+
+
+//---------------------------------------------------------------------------
+// Defines
+//---------------------------------------------------------------------------
+
+#ifdef _DEBUG
+#define _GLD_TEST_HRESULT(h)					\
+{												\
+	HRESULT _hr = (h);							\
+	if (FAILED(_hr)) {							\
+		gldLogError(GLDLOG_ERROR, #h, _hr);		\
+	}											\
+}
+#define _GLD_DX9(func)		_GLD_TEST_HRESULT(IDirect3D9_##func##)
+#define _GLD_DX9_DEV(func)	_GLD_TEST_HRESULT(IDirect3DDevice9_##func##)
+#define _GLD_DX9_VB(func)	_GLD_TEST_HRESULT(IDirect3DVertexBuffer9_##func##)
+#define _GLD_DX9_TEX(func)	_GLD_TEST_HRESULT(IDirect3DTexture9_##func##)
+#else
+#define _GLD_DX9(func)		IDirect3D9_##func
+#define _GLD_DX9_DEV(func)	IDirect3DDevice9_##func
+#define _GLD_DX9_VB(func)	IDirect3DVertexBuffer9_##func
+#define _GLD_DX9_TEX(func)	IDirect3DTexture9_##func
+#endif
+
+#define SAFE_RELEASE(p)			\
+{								\
+	if (p) {					\
+		(p)->lpVtbl->Release(p);	\
+		(p) = NULL;				\
+	}							\
+}
+
+#define SAFE_RELEASE_VB9(p)						\
+{												\
+	if (p) {									\
+		IDirect3DVertexBuffer9_Release((p));	\
+		(p) = NULL;								\
+	}											\
+}
+
+#define SAFE_RELEASE_SURFACE9(p)		\
+{										\
+	if (p) {							\
+		IDirect3DSurface9_Release((p));	\
+		(p) = NULL;						\
+	}									\
+}
+
+// Setup index.
+enum {
+	GLD_SI_FLAT				= 0,
+	GLD_SI_SMOOTH			= 1,
+	GLD_SI_FLAT_EXTRAS		= 2,
+	GLD_SI_SMOOTH_EXTRAS	= 3,
+};
+/*
+// Internal pipeline
+typedef enum {
+	GLD_PIPELINE_MESA			= 0,	// Mesa pipeline
+	GLD_PIPELINE_D3D_FVF		= 1,	// Direct3D Fixed-function pipeline
+	GLD_PIPELINE_D3D_VS_TWOSIDE	= 2		// Direct3D two-sided-lighting vertex shader
+} GLD_tnl_pipeline;
+*/
+//---------------------------------------------------------------------------
+// Vertex definitions for Fixed-Function pipeline
+//---------------------------------------------------------------------------
+
+//
+// NOTE: If the number of texture units is altered then most of
+//       the texture code will need to be revised.
+//
+
+#define GLD_MAX_TEXTURE_UNITS_DX9	2
+
+//
+// 2D vertex transformed by Mesa
+//
+#define GLD_FVF_2D_VERTEX (	D3DFVF_XYZRHW |		\
+							D3DFVF_DIFFUSE |	\
+							D3DFVF_SPECULAR |	\
+							D3DFVF_TEX2)
+typedef struct {
+	FLOAT	x, y;		// 2D raster coords
+	FLOAT	sz;			// Screen Z (depth)
+	FLOAT	rhw;		// Reciprocal homogenous W
+	DWORD	diffuse;	// Diffuse colour
+	DWORD	specular;	// For separate-specular support
+	FLOAT	t0_u, t0_v;	// 1st set of texture coords
+	FLOAT	t1_u, t1_v;	// 2nd set of texture coords
+} GLD_2D_VERTEX;
+
+
+//
+// 3D vertex transformed by Direct3D
+//
+#define GLD_FVF_3D_VERTEX (	D3DFVF_XYZ |				\
+							D3DFVF_DIFFUSE |			\
+							D3DFVF_TEX2)
+
+typedef struct {
+	D3DXVECTOR3		Position;		// XYZ Vector in object space
+	D3DCOLOR		Diffuse;		// Diffuse colour
+	D3DXVECTOR2		TexUnit0;		// Texture unit 0
+	D3DXVECTOR2		TexUnit1;		// Texture unit 1
+} GLD_3D_VERTEX;
+
+//---------------------------------------------------------------------------
+// Vertex Shaders
+//---------------------------------------------------------------------------
+/*
+// DX8 Vertex Shader
+typedef struct {
+	DWORD	hShader;	// If NULL, shader is invalid and cannot be used
+	BOOL	bHardware;	// If TRUE then shader was created for hardware,
+						// otherwise shader was created for software.
+} GLD_vertexShader;
+*/
+//---------------------------------------------------------------------------
+// Structs
+//---------------------------------------------------------------------------
+
+// This keeps a count of how many times we choose each individual internal
+// pathway. Useful for seeing if a certain pathway was ever used by an app, and
+// how much each pathway is biased.
+// Zero the members at context creation and dump stats at context deletion.
+typedef struct {
+	// Note: DWORD is probably too small
+	ULARGE_INTEGER	qwMesa;		// Mesa TnL pipeline
+	ULARGE_INTEGER	qwD3DFVF;	// Direct3D Fixed-Function pipeline
+//	ULARGE_INTEGER	dwD3D2SVS;	// Direct3D Two-Sided Vertex Shader pipeline
+} GLD_pipeline_usage;
+
+// GLDirect Primitive Buffer (points, lines, triangles and quads)
+typedef struct {
+	// Data for IDirect3DDevice9::CreateVertexBuffer()
+	DWORD					dwStride;		// Stride of vertex
+	DWORD					dwUsage;		// Usage flags
+	DWORD					dwFVF;			// Direct3D Flexible Vertex Format
+	DWORD					dwPool;			// Pool flags
+
+	IDirect3DVertexBuffer9	*pVB;			// Holds points, lines, tris and quads.
+
+	// Point list is assumed to be at start of buffer
+	DWORD					iFirstLine;		// Index of start of line list
+	DWORD					iFirstTriangle;	// Index of start of triangle list
+
+	BYTE					*pPoints;		// Pointer to next free point
+	BYTE					*pLines;		// Pointer to next free line
+	BYTE					*pTriangles;	// Pointer to next free triangle
+
+	DWORD					nPoints;		// Number of points ready to render
+	DWORD					nLines;			// Number of lines ready to render
+	DWORD					nTriangles;		// Number of triangles ready to render
+} GLD_pb_dx9;
+
+// GLDirect DX9 driver data
+typedef struct {
+	// GLDirect vars
+	BOOL					bDoublebuffer;	// Doublebuffer (otherwise single-buffered)
+	BOOL					bDepthStencil;	// Depth buffer needed (stencil optional)
+	D3DFORMAT				RenderFormat;	// Format of back/front buffer
+	D3DFORMAT				DepthFormat;	// Format of depth/stencil
+//	float					fFlipWindowY;	// Value for flipping viewport Y coord
+
+	// Direct3D vars
+	D3DCAPS9				d3dCaps9;
+	BOOL					bHasHWTnL;		// Device has Hardware Transform/Light?
+	IDirect3D9				*pD3D;			// Base Direct3D9 interface
+	IDirect3DDevice9		*pDev;			// Direct3D9 Device interface
+	GLD_pb_dx9				PB2d;			// Vertices transformed by Mesa
+	GLD_pb_dx9				PB3d;			// Vertices transformed by Direct3D
+	D3DPRIMITIVETYPE		d3dpt;			// Current Direct3D primitive type
+	D3DXMATRIX				matProjection;	// Projection matrix for D3D TnL
+	D3DXMATRIX				matModelView;	// Model/View matrix for D3D TnL
+	int						iSetupFunc;		// Which setup functions to use
+	BOOL					bUseMesaTnL;	// Whether to use Mesa or D3D for TnL
+
+	// Direct3D vars for two-sided lighting
+//	GLD_vertexShader		VStwosidelight;	// Vertex Shader for two-sided lighting
+//	D3DXMATRIX				matWorldViewProj;// World/View/Projection matrix for shaders
+
+
+//	GLD_tnl_pipeline		TnLPipeline;	// Index of current internal pipeline
+	GLD_pipeline_usage		PipelineUsage;
+
+	BOOL					bCanScissor;	// Scissor test - new for DX9
+} GLD_driver_dx9;
+
+#define GLD_GET_DX9_DRIVER(c) (GLD_driver_dx9*)(c)->glPriv
+
+//---------------------------------------------------------------------------
+// Function prototypes
+//---------------------------------------------------------------------------
+
+PROC	gldGetProcAddress_DX9(LPCSTR a);
+void	gldEnableExtensions_DX9(GLcontext *ctx);
+void	gldInstallPipeline_DX9(GLcontext *ctx);
+void	gldSetupDriverPointers_DX9(GLcontext *ctx);
+//void	gldResizeBuffers_DX9(GLcontext *ctx);
+void	gldResizeBuffers_DX9(GLframebuffer *fb);
+
+
+// Texture functions
+
+void	gldCopyTexImage1D_DX9(GLcontext *ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border);
+void	gldCopyTexImage2D_DX9(GLcontext *ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+void	gldCopyTexSubImage1D_DX9(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width );
+void	gldCopyTexSubImage2D_DX9(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height );
+void	gldCopyTexSubImage3D_DX9(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height );
+
+void	gld_NEW_TEXTURE_DX9(GLcontext *ctx);
+void	gld_DrawPixels_DX9(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels);
+void	gld_ReadPixels_DX9(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, const struct gl_pixelstore_attrib *unpack, GLvoid *dest);
+void	gld_CopyPixels_DX9(GLcontext *ctx, GLint srcx, GLint srcy, GLsizei width, GLsizei height, GLint dstx, GLint dsty, GLenum type);
+void	gld_Bitmap_DX9(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap);
+const struct gl_texture_format* gld_ChooseTextureFormat_DX9(GLcontext *ctx, GLint internalFormat, GLenum srcFormat, GLenum srcType);
+void	gld_TexImage2D_DX9(GLcontext *ctx, GLenum target, GLint level, GLint internalFormat, GLint width, GLint height, GLint border, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *tObj, struct gl_texture_image *texImage);
+void	gld_TexImage1D_DX9(GLcontext *ctx, GLenum target, GLint level, GLint internalFormat, GLint width, GLint border, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage );
+void	gld_TexSubImage2D_DX9( GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage );
+void	gld_TexSubImage1D_DX9(GLcontext *ctx, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels, const struct gl_pixelstore_attrib *packing, struct gl_texture_object *texObj, struct gl_texture_image *texImage);
+void	gld_DeleteTexture_DX9(GLcontext *ctx, struct gl_texture_object *tObj);
+void	gld_ResetLineStipple_DX9(GLcontext *ctx);
+
+// 2D primitive functions
+
+void	gld_Points2D_DX9(GLcontext *ctx, GLuint first, GLuint last);
+
+void	gld_Line2DFlat_DX9(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Line2DSmooth_DX9(GLcontext *ctx, GLuint v0, GLuint v1);
+
+void	gld_Triangle2DFlat_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmooth_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DFlatExtras_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmoothExtras_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+
+void	gld_Quad2DFlat_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmooth_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DFlatExtras_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmoothExtras_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+// 3D primitive functions
+
+void	gld_Points3D_DX9(GLcontext *ctx, GLuint first, GLuint last);
+void	gld_Line3DFlat_DX9(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle3DFlat_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad3DFlat_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Line3DSmooth_DX9(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle3DSmooth_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad3DSmooth_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+// Primitive functions for Two-sided-lighting Vertex Shader
+
+void	gld_Points2DTwoside_DX9(GLcontext *ctx, GLuint first, GLuint last);
+void	gld_Line2DFlatTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Line2DSmoothTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1);
+void	gld_Triangle2DFlatTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Triangle2DSmoothTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2);
+void	gld_Quad2DFlatTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+void	gld_Quad2DSmoothTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+
+#endif
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_dxerr9.h b/src/mesa/drivers/windows/gldirect/dx9/gld_dxerr9.h
new file mode 100644
index 0000000000..1d6b7b1c76
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_dxerr9.h
@@ -0,0 +1,77 @@
+/*==========================================================================;
+ *
+ *
+ *  File:   dxerr9.h
+ *  Content:    DirectX Error Library Include File
+ *
+ ****************************************************************************/
+
+#ifndef _GLD_DXERR9_H_
+#define _GLD_DXERR9_H_
+
+
+#include <d3d9.h>
+
+//
+//  DXGetErrorString9
+//  
+//  Desc:  Converts an DirectX HRESULT to a string 
+//
+//  Args:  HRESULT hr   Can be any error code from
+//                      DPLAY D3D8 D3DX8 DMUSIC DSOUND
+//
+//  Return: Converted string 
+//
+const char*  __stdcall DXGetErrorString9A(HRESULT hr);
+const WCHAR* __stdcall DXGetErrorString9W(HRESULT hr);
+
+#ifdef UNICODE
+    #define DXGetErrorString9 DXGetErrorString9W
+#else
+    #define DXGetErrorString9 DXGetErrorString9A
+#endif 
+
+
+//
+//  DXTrace
+//
+//  Desc:  Outputs a formatted error message to the debug stream
+//
+//  Args:  CHAR* strFile   The current file, typically passed in using the 
+//                         __FILE__ macro.
+//         DWORD dwLine    The current line number, typically passed in using the 
+//                         __LINE__ macro.
+//         HRESULT hr      An HRESULT that will be traced to the debug stream.
+//         CHAR* strMsg    A string that will be traced to the debug stream (may be NULL)
+//         BOOL bPopMsgBox If TRUE, then a message box will popup also containing the passed info.
+//
+//  Return: The hr that was passed in.  
+//
+//HRESULT __stdcall DXTraceA( char* strFile, DWORD dwLine, HRESULT hr, char* strMsg, BOOL bPopMsgBox = FALSE );
+//HRESULT __stdcall DXTraceW( char* strFile, DWORD dwLine, HRESULT hr, WCHAR* strMsg, BOOL bPopMsgBox = FALSE );
+HRESULT __stdcall DXTraceA( char* strFile, DWORD dwLine, HRESULT hr, char* strMsg, BOOL bPopMsgBox);
+HRESULT __stdcall DXTraceW( char* strFile, DWORD dwLine, HRESULT hr, WCHAR* strMsg, BOOL bPopMsgBox);
+
+#ifdef UNICODE
+    #define DXTrace DXTraceW
+#else
+    #define DXTrace DXTraceA
+#endif 
+
+
+//
+// Helper macros
+//
+#if defined(DEBUG) | defined(_DEBUG)
+    #define DXTRACE_MSG(str)              DXTrace( __FILE__, (DWORD)__LINE__, 0, str, FALSE )
+    #define DXTRACE_ERR(str,hr)           DXTrace( __FILE__, (DWORD)__LINE__, hr, str, TRUE )
+    #define DXTRACE_ERR_NOMSGBOX(str,hr)  DXTrace( __FILE__, (DWORD)__LINE__, hr, str, FALSE )
+#else
+    #define DXTRACE_MSG(str)              (0L)
+    #define DXTRACE_ERR(str,hr)           (hr)
+    #define DXTRACE_ERR_NOMSGBOX(str,hr)  (hr)
+#endif
+
+
+#endif
+
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_ext_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_ext_dx9.c
new file mode 100644
index 0000000000..e8c73a6ff8
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_ext_dx9.c
@@ -0,0 +1,344 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GL extensions
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "../gld_settings.h"
+
+#include <windows.h>
+#define GL_GLEXT_PROTOTYPES
+#include <GL/gl.h>
+#include <GL/glext.h>
+
+//#include "ddlog.h"
+//#include "gld_dx8.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "dglcontext.h"
+#include "extensions.h"
+
+// For some reason this is not defined in an above header...
+extern void _mesa_enable_imaging_extensions(GLcontext *ctx);
+
+//---------------------------------------------------------------------------
+// Hack for the SGIS_multitexture extension that was removed from Mesa
+// NOTE: SGIS_multitexture enums also clash with GL_SGIX_async_pixel
+
+	// NOTE: Quake2 ran *slower* with this enabled, so I've
+	// disabled it for now.
+	// To enable, uncomment:
+	//  _mesa_add_extension(ctx, GL_TRUE, szGL_SGIS_multitexture, 0);
+
+//---------------------------------------------------------------------------
+
+enum {
+	/* Quake2 GL_SGIS_multitexture */
+	GL_SELECTED_TEXTURE_SGIS			= 0x835B,
+	GL_SELECTED_TEXTURE_COORD_SET_SGIS	= 0x835C,
+	GL_MAX_TEXTURES_SGIS				= 0x835D,
+	GL_TEXTURE0_SGIS					= 0x835E,
+	GL_TEXTURE1_SGIS					= 0x835F,
+	GL_TEXTURE2_SGIS					= 0x8360,
+	GL_TEXTURE3_SGIS					= 0x8361,
+	GL_TEXTURE_COORD_SET_SOURCE_SGIS	= 0x8363,
+};
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldSelectTextureSGIS(
+	GLenum target)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glActiveTextureARB(ARB_target);
+}
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldMTexCoord2fSGIS(
+	GLenum target,
+	GLfloat s,
+	GLfloat t)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glMultiTexCoord2fARB(ARB_target, s, t);
+}
+
+//---------------------------------------------------------------------------
+
+void APIENTRY gldMTexCoord2fvSGIS(
+	GLenum target,
+	const GLfloat *v)
+{
+	GLenum ARB_target = GL_TEXTURE0_ARB + (target - GL_TEXTURE0_SGIS);
+	glMultiTexCoord2fvARB(ARB_target, v);
+}
+
+//---------------------------------------------------------------------------
+// Extensions
+//---------------------------------------------------------------------------
+
+typedef struct {
+	PROC proc;
+	char *name;
+}  GLD_extension;
+
+GLD_extension GLD_extList[] = {
+#ifdef GL_EXT_polygon_offset
+    {	(PROC)glPolygonOffsetEXT,		"glPolygonOffsetEXT"		},
+#endif
+    {	(PROC)glBlendEquationEXT,		"glBlendEquationEXT"		},
+    {	(PROC)glBlendColorEXT,			"glBlendColorExt"			},
+    {	(PROC)glVertexPointerEXT,		"glVertexPointerEXT"		},
+    {	(PROC)glNormalPointerEXT,		"glNormalPointerEXT"		},
+    {	(PROC)glColorPointerEXT,		"glColorPointerEXT"			},
+    {	(PROC)glIndexPointerEXT,		"glIndexPointerEXT"			},
+    {	(PROC)glTexCoordPointerEXT,		"glTexCoordPointer"			},
+    {	(PROC)glEdgeFlagPointerEXT,		"glEdgeFlagPointerEXT"		},
+    {	(PROC)glGetPointervEXT,			"glGetPointervEXT"			},
+    {	(PROC)glArrayElementEXT,		"glArrayElementEXT"			},
+    {	(PROC)glDrawArraysEXT,			"glDrawArrayEXT"			},
+    {	(PROC)glAreTexturesResidentEXT,	"glAreTexturesResidentEXT"	},
+    {	(PROC)glBindTextureEXT,			"glBindTextureEXT"			},
+    {	(PROC)glDeleteTexturesEXT,		"glDeleteTexturesEXT"		},
+    {	(PROC)glGenTexturesEXT,			"glGenTexturesEXT"			},
+    {	(PROC)glIsTextureEXT,			"glIsTextureEXT"			},
+    {	(PROC)glPrioritizeTexturesEXT,	"glPrioritizeTexturesEXT"	},
+    {	(PROC)glCopyTexSubImage3DEXT,	"glCopyTexSubImage3DEXT"	},
+    {	(PROC)glTexImage3DEXT,			"glTexImage3DEXT"			},
+    {	(PROC)glTexSubImage3DEXT,		"glTexSubImage3DEXT"		},
+    {	(PROC)glPointParameterfEXT,		"glPointParameterfEXT"		},
+    {	(PROC)glPointParameterfvEXT,	"glPointParameterfvEXT"		},
+
+    {	(PROC)glLockArraysEXT,			"glLockArraysEXT"			},
+    {	(PROC)glUnlockArraysEXT,		"glUnlockArraysEXT"			},
+	{	NULL,							"\0"						}
+};
+
+GLD_extension GLD_multitexList[] = {
+/*
+    {	(PROC)glMultiTexCoord1dSGIS,		"glMTexCoord1dSGIS"			},
+    {	(PROC)glMultiTexCoord1dvSGIS,		"glMTexCoord1dvSGIS"		},
+    {	(PROC)glMultiTexCoord1fSGIS,		"glMTexCoord1fSGIS"			},
+    {	(PROC)glMultiTexCoord1fvSGIS,		"glMTexCoord1fvSGIS"		},
+    {	(PROC)glMultiTexCoord1iSGIS,		"glMTexCoord1iSGIS"			},
+    {	(PROC)glMultiTexCoord1ivSGIS,		"glMTexCoord1ivSGIS"		},
+    {	(PROC)glMultiTexCoord1sSGIS,		"glMTexCoord1sSGIS"			},
+    {	(PROC)glMultiTexCoord1svSGIS,		"glMTexCoord1svSGIS"		},
+    {	(PROC)glMultiTexCoord2dSGIS,		"glMTexCoord2dSGIS"			},
+    {	(PROC)glMultiTexCoord2dvSGIS,		"glMTexCoord2dvSGIS"		},
+    {	(PROC)glMultiTexCoord2fSGIS,		"glMTexCoord2fSGIS"			},
+    {	(PROC)glMultiTexCoord2fvSGIS,		"glMTexCoord2fvSGIS"		},
+    {	(PROC)glMultiTexCoord2iSGIS,		"glMTexCoord2iSGIS"			},
+    {	(PROC)glMultiTexCoord2ivSGIS,		"glMTexCoord2ivSGIS"		},
+    {	(PROC)glMultiTexCoord2sSGIS,		"glMTexCoord2sSGIS"			},
+    {	(PROC)glMultiTexCoord2svSGIS,		"glMTexCoord2svSGIS"		},
+    {	(PROC)glMultiTexCoord3dSGIS,		"glMTexCoord3dSGIS"			},
+    {	(PROC)glMultiTexCoord3dvSGIS,		"glMTexCoord3dvSGIS"		},
+    {	(PROC)glMultiTexCoord3fSGIS,		"glMTexCoord3fSGIS"			},
+    {	(PROC)glMultiTexCoord3fvSGIS,		"glMTexCoord3fvSGIS"		},
+    {	(PROC)glMultiTexCoord3iSGIS,		"glMTexCoord3iSGIS"			},
+    {	(PROC)glMultiTexCoord3ivSGIS,		"glMTexCoord3ivSGIS"		},
+    {	(PROC)glMultiTexCoord3sSGIS,		"glMTexCoord3sSGIS"			},
+    {	(PROC)glMultiTexCoord3svSGIS,		"glMTexCoord3svSGIS"		},
+    {	(PROC)glMultiTexCoord4dSGIS,		"glMTexCoord4dSGIS"			},
+    {	(PROC)glMultiTexCoord4dvSGIS,		"glMTexCoord4dvSGIS"		},
+    {	(PROC)glMultiTexCoord4fSGIS,		"glMTexCoord4fSGIS"			},
+    {	(PROC)glMultiTexCoord4fvSGIS,		"glMTexCoord4fvSGIS"		},
+    {	(PROC)glMultiTexCoord4iSGIS,		"glMTexCoord4iSGIS"			},
+    {	(PROC)glMultiTexCoord4ivSGIS,		"glMTexCoord4ivSGIS"		},
+    {	(PROC)glMultiTexCoord4sSGIS,		"glMTexCoord4sSGIS"			},
+    {	(PROC)glMultiTexCoord4svSGIS,		"glMTexCoord4svSGIS"		},
+    {	(PROC)glMultiTexCoordPointerSGIS,	"glMTexCoordPointerSGIS"	},
+    {	(PROC)glSelectTextureSGIS,			"glSelectTextureSGIS"			},
+    {	(PROC)glSelectTextureCoordSetSGIS,	"glSelectTextureCoordSetSGIS"	},
+*/
+    {	(PROC)glActiveTextureARB,		"glActiveTextureARB"		},
+    {	(PROC)glClientActiveTextureARB,	"glClientActiveTextureARB"	},
+    {	(PROC)glMultiTexCoord1dARB,		"glMultiTexCoord1dARB"		},
+    {	(PROC)glMultiTexCoord1dvARB,	"glMultiTexCoord1dvARB"		},
+    {	(PROC)glMultiTexCoord1fARB,		"glMultiTexCoord1fARB"		},
+    {	(PROC)glMultiTexCoord1fvARB,	"glMultiTexCoord1fvARB"		},
+    {	(PROC)glMultiTexCoord1iARB,		"glMultiTexCoord1iARB"		},
+    {	(PROC)glMultiTexCoord1ivARB,	"glMultiTexCoord1ivARB"		},
+    {	(PROC)glMultiTexCoord1sARB,		"glMultiTexCoord1sARB"		},
+    {	(PROC)glMultiTexCoord1svARB,	"glMultiTexCoord1svARB"		},
+    {	(PROC)glMultiTexCoord2dARB,		"glMultiTexCoord2dARB"		},
+    {	(PROC)glMultiTexCoord2dvARB,	"glMultiTexCoord2dvARB"		},
+    {	(PROC)glMultiTexCoord2fARB,		"glMultiTexCoord2fARB"		},
+    {	(PROC)glMultiTexCoord2fvARB,	"glMultiTexCoord2fvARB"		},
+    {	(PROC)glMultiTexCoord2iARB,		"glMultiTexCoord2iARB"		},
+    {	(PROC)glMultiTexCoord2ivARB,	"glMultiTexCoord2ivARB"		},
+    {	(PROC)glMultiTexCoord2sARB,		"glMultiTexCoord2sARB"		},
+    {	(PROC)glMultiTexCoord2svARB,	"glMultiTexCoord2svARB"		},
+    {	(PROC)glMultiTexCoord3dARB,		"glMultiTexCoord3dARB"		},
+    {	(PROC)glMultiTexCoord3dvARB,	"glMultiTexCoord3dvARB"		},
+    {	(PROC)glMultiTexCoord3fARB,		"glMultiTexCoord3fARB"		},
+    {	(PROC)glMultiTexCoord3fvARB,	"glMultiTexCoord3fvARB"		},
+    {	(PROC)glMultiTexCoord3iARB,		"glMultiTexCoord3iARB"		},
+    {	(PROC)glMultiTexCoord3ivARB,	"glMultiTexCoord3ivARB"		},
+    {	(PROC)glMultiTexCoord3sARB,		"glMultiTexCoord3sARB"		},
+    {	(PROC)glMultiTexCoord3svARB,	"glMultiTexCoord3svARB"		},
+    {	(PROC)glMultiTexCoord4dARB,		"glMultiTexCoord4dARB"		},
+    {	(PROC)glMultiTexCoord4dvARB,	"glMultiTexCoord4dvARB"		},
+    {	(PROC)glMultiTexCoord4fARB,		"glMultiTexCoord4fARB"		},
+    {	(PROC)glMultiTexCoord4fvARB,	"glMultiTexCoord4fvARB"		},
+    {	(PROC)glMultiTexCoord4iARB,		"glMultiTexCoord4iARB"		},
+    {	(PROC)glMultiTexCoord4ivARB,	"glMultiTexCoord4ivARB"		},
+    {	(PROC)glMultiTexCoord4sARB,		"glMultiTexCoord4sARB"		},
+    {	(PROC)glMultiTexCoord4svARB,	"glMultiTexCoord4svARB"		},
+
+	// Descent3 doesn't use correct string, hence this hack
+    {	(PROC)glMultiTexCoord4fARB,		"glMultiTexCoord4f"			},
+
+	// Quake2 SGIS multitexture
+    {	(PROC)gldSelectTextureSGIS,		"glSelectTextureSGIS"		},
+    {	(PROC)gldMTexCoord2fSGIS,		"glMTexCoord2fSGIS"			},
+    {	(PROC)gldMTexCoord2fvSGIS,		"glMTexCoord2fvSGIS"		},
+
+	{	NULL,							"\0"						}
+};
+
+//---------------------------------------------------------------------------
+
+PROC gldGetProcAddress_DX(
+	LPCSTR a)
+{
+	int		i;
+	PROC	proc = NULL;
+
+	for (i=0; GLD_extList[i].proc; i++) {
+		if (!strcmp(a, GLD_extList[i].name)) {
+			proc = GLD_extList[i].proc;
+			break;
+		}
+	}
+
+	if (glb.bMultitexture) {
+		for (i=0; GLD_multitexList[i].proc; i++) {
+			if (!strcmp(a, GLD_multitexList[i].name)) {
+				proc = GLD_multitexList[i].proc;
+				break;
+			}
+		}
+	}
+
+	gldLogPrintf(GLDLOG_INFO, "GetProcAddress: %s (%s)", a, proc ? "OK" : "Failed");
+
+	return proc;
+}
+
+//---------------------------------------------------------------------------
+
+void gldEnableExtensions_DX9(
+	GLcontext *ctx)
+{
+	GLuint i;
+
+	// Mesa enables some extensions by default.
+	// This table decides which ones we want to switch off again.
+
+	// NOTE: GL_EXT_compiled_vertex_array appears broken.
+
+	const char *gld_disable_extensions[] = {
+//		"GL_ARB_transpose_matrix",
+//		"GL_EXT_compiled_vertex_array",
+//		"GL_EXT_polygon_offset",
+//		"GL_EXT_rescale_normal",
+		"GL_EXT_texture3D",
+//		"GL_NV_texgen_reflection",
+		NULL
+	};
+
+	const char *gld_multitex_extensions[] = {
+		"GL_ARB_multitexture",		// Quake 3
+		NULL
+	};
+
+	// Quake 2 engines
+	const char *szGL_SGIS_multitexture = "GL_SGIS_multitexture";
+
+	const char *gld_enable_extensions[] = {
+		"GL_EXT_texture_env_add",	// Quake 3
+		"GL_ARB_texture_env_add",	// Quake 3
+		NULL
+	};
+	
+	for (i=0; gld_disable_extensions[i]; i++) {
+		_mesa_disable_extension(ctx, gld_disable_extensions[i]);
+	}
+	
+	for (i=0; gld_enable_extensions[i]; i++) {
+		_mesa_enable_extension(ctx, gld_enable_extensions[i]);
+	}
+
+	if (glb.bMultitexture) {	
+		for (i=0; gld_multitex_extensions[i]; i++) {
+			_mesa_enable_extension(ctx, gld_multitex_extensions[i]);
+		}
+
+		// GL_SGIS_multitexture
+		// NOTE: Quake2 ran *slower* with this enabled, so I've
+		// disabled it for now.
+		// Fair bit slower on GeForce256,
+		// Much slower on 3dfx Voodoo5 5500.
+//		_mesa_add_extension(ctx, GL_TRUE, szGL_SGIS_multitexture, 0);
+
+	}
+
+	_mesa_enable_imaging_extensions(ctx);
+	_mesa_enable_1_3_extensions(ctx);
+	_mesa_enable_1_4_extensions(ctx);
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_pipeline_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_pipeline_dx9.c
new file mode 100644
index 0000000000..2b272aa628
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_pipeline_dx9.c
@@ -0,0 +1,77 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Mesa transformation pipeline with GLDirect fastpath
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx9.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+//---------------------------------------------------------------------------
+
+extern struct tnl_pipeline_stage _gld_d3d_render_stage;
+extern struct tnl_pipeline_stage _gld_mesa_render_stage;
+
+static const struct tnl_pipeline_stage *gld_pipeline[] = {
+	&_gld_d3d_render_stage,			// Direct3D TnL
+	&_tnl_vertex_transform_stage,
+	&_tnl_normal_transform_stage,
+	&_tnl_lighting_stage,
+	&_tnl_fog_coordinate_stage,	/* TODO: Omit fog stage. ??? */
+	&_tnl_texgen_stage,
+	&_tnl_texture_transform_stage,
+	&_tnl_point_attenuation_stage,
+	&_gld_mesa_render_stage,		// Mesa TnL, D3D rendering
+	0,
+};
+
+//---------------------------------------------------------------------------
+
+void gldInstallPipeline_DX9(
+	GLcontext *ctx)
+{
+	// Remove any existing pipeline	stages,
+	// then install GLDirect pipeline stages.
+
+	_tnl_destroy_pipeline(ctx);
+	_tnl_install_pipeline(ctx, gld_pipeline);
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
new file mode 100644
index 0000000000..fd4dd4ed75
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c
@@ -0,0 +1,1446 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Primitive (points/lines/tris/quads) rendering
+*
+****************************************************************************/
+
+//#include "../GLDirect.h"
+
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx9.h"
+
+#include "glheader.h"
+#include "context.h"
+#include "colormac.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "vbo/vbo.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast_setup/ss_context.h"
+#include "swrast/s_context.h"
+#include "swrast/s_depth.h"
+#include "swrast/s_lines.h"
+#include "swrast/s_triangle.h"
+#include "swrast/s_trispan.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+// Disable compiler complaints about unreferenced local variables
+#pragma warning (disable:4101)
+
+//---------------------------------------------------------------------------
+// Helper defines for primitives
+//---------------------------------------------------------------------------
+
+//static const float ooZ		= 1.0f / 65536.0f; // One over Z
+
+#define GLD_COLOUR (D3DCOLOR_RGBA(swv->color[0], swv->color[1], swv->color[2], swv->color[3]))
+#define GLD_SPECULAR (D3DCOLOR_RGBA(swv->specular[0], swv->specular[1], swv->specular[2], swv->specular[3]))
+#define GLD_FLIP_Y(y) (gldCtx->dwHeight - (y))
+
+//---------------------------------------------------------------------------
+// 2D vertex setup
+//---------------------------------------------------------------------------
+
+#define GLD_SETUP_2D_VARS_POINTS							\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pPoints;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour
+
+#define GLD_SETUP_2D_VARS_LINES								\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pLines;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour
+
+#define GLD_SETUP_2D_VARS_TRIANGLES							\
+	BOOL			bFog = ctx->Fog.Enabled;				\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);	\
+	GLD_2D_VERTEX	*pV		= (GLD_2D_VERTEX*)gld->PB2d.pTriangles;	\
+	SScontext		*ss		= SWSETUP_CONTEXT(ctx);			\
+	SWvertex		*swv;									\
+	DWORD			dwSpecularColour;						\
+	DWORD			dwFlatColour;							\
+	GLuint					facing = 0;						\
+	struct vertex_buffer	*VB;							\
+	GLchan					(*vbcolor)[4];					\
+	GLchan					(*vbspec)[4]
+
+#define GLD_SETUP_GET_SWVERT(s)					\
+	swv = &ss->verts[##s]
+
+#define GLD_SETUP_2D_VERTEX						\
+	pV->x			= swv->win[0];				\
+	pV->y			= GLD_FLIP_Y(swv->win[1]);	\
+	pV->rhw			= swv->win[3]
+
+#define GLD_SETUP_SMOOTH_COLOUR					\
+	pV->diffuse		= GLD_COLOUR
+
+#define GLD_SETUP_GET_FLAT_COLOUR				\
+	dwFlatColour	= GLD_COLOUR
+#define GLD_SETUP_GET_FLAT_FOG_COLOUR			\
+	dwFlatColour	= _gldComputeFog(ctx, swv)
+
+#define GLD_SETUP_USE_FLAT_COLOUR				\
+	pV->diffuse		= dwFlatColour
+
+#define GLD_SETUP_GET_FLAT_SPECULAR				\
+	dwSpecularColour= GLD_SPECULAR
+
+#define GLD_SETUP_USE_FLAT_SPECULAR				\
+	pV->specular	= dwSpecularColour
+
+#define GLD_SETUP_DEPTH							\
+	pV->sz			= swv->win[2] / ctx->DepthMaxF
+//	pV->z			= swv->win[2] * ooZ;
+
+#define GLD_SETUP_SPECULAR						\
+	pV->specular	= GLD_SPECULAR
+
+#define GLD_SETUP_FOG							\
+	pV->diffuse		= _gldComputeFog(ctx, swv)
+
+#define GLD_SETUP_TEX0							\
+	pV->t0_u		= swv->texcoord[0][0];		\
+	pV->t0_v		= swv->texcoord[0][1]
+
+#define GLD_SETUP_TEX1							\
+	pV->t1_u		= swv->texcoord[1][0];		\
+	pV->t1_v		= swv->texcoord[1][1]
+
+#define GLD_SETUP_LIGHTING(v)			\
+	if (facing == 1) {					\
+		pV->diffuse	= D3DCOLOR_RGBA(vbcolor[##v][0], vbcolor[##v][1], vbcolor[##v][2], vbcolor[##v][3]);	\
+		if (vbspec) {																					\
+			pV->specular = D3DCOLOR_RGBA(vbspec[##v][0], vbspec[##v][1], vbspec[##v][2], vbspec[##v][3]);	\
+		}	\
+	} else {	\
+		if (bFog)						\
+			GLD_SETUP_FOG;				\
+		else							\
+			GLD_SETUP_SMOOTH_COLOUR;	\
+		GLD_SETUP_SPECULAR;				\
+	}
+
+#define GLD_SETUP_GET_FLAT_LIGHTING(v)	\
+	if (facing == 1) {					\
+		dwFlatColour = D3DCOLOR_RGBA(vbcolor[##v][0], vbcolor[##v][1], vbcolor[##v][2], vbcolor[##v][3]);	\
+		if (vbspec) {																					\
+			dwSpecularColour = D3DCOLOR_RGBA(vbspec[##v][0], vbspec[##v][1], vbspec[##v][2], vbspec[##v][3]);	\
+		}	\
+	}
+
+#define GLD_SETUP_TWOSIDED_LIGHTING		\
+	/* Two-sided lighting */				\
+	if (ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) {	\
+		SWvertex	*verts = SWSETUP_CONTEXT(ctx)->verts;	\
+		SWvertex	*v[3];									\
+		GLfloat		ex,ey,fx,fy,cc;							\
+		/* Get vars for later */							\
+		VB		= &TNL_CONTEXT(ctx)->vb;					\
+		vbcolor	= (GLchan (*)[4])VB->BackfaceColorPtr->data;	\
+		if (VB->BackfaceSecondaryColorPtr) {			\
+			vbspec = (GLchan (*)[4])VB->BackfaceSecondaryColorPtr->data;	\
+		} else {													\
+			vbspec = NULL;											\
+		}															\
+		v[0] = &verts[v0];											\
+		v[1] = &verts[v1];											\
+		v[2] = &verts[v2];											\
+		ex = v[0]->win[0] - v[2]->win[0];	\
+		ey = v[0]->win[1] - v[2]->win[1];	\
+		fx = v[1]->win[0] - v[2]->win[0];	\
+		fy = v[1]->win[1] - v[2]->win[1];	\
+		cc  = ex*fy - ey*fx;				\
+		facing = (cc < 0.0) ^ ctx->Polygon._FrontBit;	\
+	}
+
+//---------------------------------------------------------------------------
+// 3D vertex setup
+//---------------------------------------------------------------------------
+
+#define GLD_SETUP_3D_VARS_POINTS											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pPoints;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VARS_LINES											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pLines;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VARS_TRIANGLES											\
+	GLD_context		*gldCtx	= GLD_GET_CONTEXT(ctx);			\
+	GLD_driver_dx9	*gld	= GLD_GET_DX9_DRIVER(gldCtx);	\
+	GLD_3D_VERTEX			*pV		= (GLD_3D_VERTEX*)gld->PB3d.pTriangles;	\
+	TNLcontext				*tnl	= TNL_CONTEXT(ctx);				\
+	struct vertex_buffer	*VB		= &tnl->vb;						\
+	GLfloat					(*p4f)[4];								\
+	GLfloat					(*tc)[4];								\
+	DWORD					dwColor;
+
+#define GLD_SETUP_3D_VERTEX(v)					\
+	p4f = VB->AttribPtr[_TNL_ATTRIB_POS]->data;		\
+	pV->Position.x	= p4f[##v][0];				\
+	pV->Position.y	= p4f[##v][1];				\
+	pV->Position.z	= p4f[##v][2];
+
+#define GLD_SETUP_SMOOTH_COLOUR_3D(v)															\
+	p4f = (GLfloat (*)[4])VB->AttribPtr[_TNL_ATTRIB_COLOR0]->data;										\
+	pV->Diffuse	= D3DCOLOR_COLORVALUE(p4f[##v][0], p4f[##v][1], p4f[##v][2], p4f[##v][3]);
+
+
+#define GLD_SETUP_GET_FLAT_COLOUR_3D(v)													\
+	p4f = (GLfloat (*)[4])VB->AttribPtr[_TNL_ATTRIB_COLOR00]->data;	\
+	dwColor	= D3DCOLOR_COLORVALUE(p4f[##v][0], p4f[##v][1], p4f[##v][2], p4f[##v][3]);
+
+#define GLD_SETUP_USE_FLAT_COLOUR_3D			\
+	pV->Diffuse = dwColor;
+
+#define GLD_SETUP_TEX0_3D(v)						\
+	if (VB->AttribPtr[_TNL_ATTRIB_TEX0]) {				\
+		tc = VB->AttribPtr[_TNL_ATTRIB_TEX0]->data;		\
+		pV->TexUnit0.x	= tc[##v][0];				\
+		pV->TexUnit0.y	= tc[##v][1];				\
+	}
+
+#define GLD_SETUP_TEX1_3D(v)						\
+	if (VB->AttribPtr[_TNL_ATTRIB_TEX1]) {				\
+		tc = VB->AttribPtr[_TNL_ATTRIB_TEX1]->data;		\
+		pV->TexUnit1.x	= tc[##v][0];				\
+		pV->TexUnit1.y	= tc[##v][1];				\
+	}
+
+//---------------------------------------------------------------------------
+// Helper functions
+//---------------------------------------------------------------------------
+
+__inline DWORD _gldComputeFog(
+	GLcontext *ctx,
+	SWvertex *swv)
+{
+	// Full fog calculation.
+	// Based on Mesa code.
+
+	GLchan			rFog, gFog, bFog;
+	GLchan			fR, fG, fB;
+	const GLfloat	f = swv->fog;
+	const GLfloat	g = 1.0 - f;
+	
+	UNCLAMPED_FLOAT_TO_CHAN(rFog, ctx->Fog.Color[RCOMP]);
+	UNCLAMPED_FLOAT_TO_CHAN(gFog, ctx->Fog.Color[GCOMP]);
+	UNCLAMPED_FLOAT_TO_CHAN(bFog, ctx->Fog.Color[BCOMP]);
+	fR = f * swv->color[0] + g * rFog;
+	fG = f * swv->color[1] + g * gFog;
+	fB = f * swv->color[2] + g * bFog;
+	return D3DCOLOR_RGBA(fR, fG, fB, swv->color[3]);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_ResetLineStipple_DX9(
+	GLcontext *ctx)
+{
+	// TODO: Fake stipple with a 32x32 texture.
+}
+
+//---------------------------------------------------------------------------
+// 2D (post-transformed) primitives
+//---------------------------------------------------------------------------
+
+void gld_Points2D_DX9(
+	GLcontext *ctx,
+	GLuint first,
+	GLuint last)
+{
+	GLD_SETUP_2D_VARS_POINTS;
+
+	unsigned				i;
+	struct vertex_buffer	*VB = &TNL_CONTEXT(ctx)->vb;
+
+	// _Size is already clamped to MaxPointSize and MinPointSize
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_POINTSIZE, *((DWORD*)&ctx->Point._Size));
+
+	if (VB->Elts) {
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[VB->Elts[i]] == 0) {
+//				_swrast_Point( ctx, &verts[VB->Elts[i]] );
+				GLD_SETUP_GET_SWVERT(VB->Elts[i]);
+				GLD_SETUP_2D_VERTEX;
+				GLD_SETUP_SMOOTH_COLOUR;
+				GLD_SETUP_DEPTH;
+				GLD_SETUP_SPECULAR;
+				GLD_SETUP_TEX0;
+				GLD_SETUP_TEX1;
+			}
+		}
+	} else {
+		GLD_SETUP_GET_SWVERT(first);
+		for (i=first; i<last; i++, swv++, pV++) {
+			if (VB->ClipMask[i] == 0) {
+//				_swrast_Point( ctx, &verts[i] );
+				GLD_SETUP_2D_VERTEX;
+				GLD_SETUP_SMOOTH_COLOUR;
+				GLD_SETUP_DEPTH;
+				GLD_SETUP_SPECULAR;
+				GLD_SETUP_TEX0;
+				GLD_SETUP_TEX1;
+			}
+		}
+	}
+
+	gld->PB2d.pPoints = (BYTE*)pV;
+	gld->PB2d.nPoints += (last-first);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DFlat_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_2D_VARS_LINES;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pLines = (BYTE*)pV;
+	gld->PB2d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DSmooth_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_2D_VARS_LINES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_SPECULAR;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_SPECULAR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pLines = (BYTE*)pV;
+	gld->PB2d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlat_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmooth_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlatExtras_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v2);
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	if (bFog)
+		GLD_SETUP_GET_FLAT_FOG_COLOUR;
+	else
+		GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_GET_FLAT_LIGHTING(v2);
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmoothExtras_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v0);
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v1);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlat_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmooth_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_SMOOTH_COLOUR;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlatExtras_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v3);
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	if (bFog)
+		GLD_SETUP_GET_FLAT_FOG_COLOUR;
+	else
+		GLD_SETUP_GET_FLAT_COLOUR;
+	GLD_SETUP_GET_FLAT_SPECULAR;
+	GLD_SETUP_GET_FLAT_LIGHTING(v3);
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_USE_FLAT_COLOUR;
+	GLD_SETUP_USE_FLAT_SPECULAR;
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmoothExtras_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_2D_VARS_TRIANGLES;
+
+	GLD_SETUP_TWOSIDED_LIGHTING(v0);
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v1);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v1);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v2);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v2);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v3);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v3);
+	pV++;
+
+	GLD_SETUP_GET_SWVERT(v0);
+	GLD_SETUP_2D_VERTEX;
+	GLD_SETUP_DEPTH;
+	GLD_SETUP_TEX0;
+	GLD_SETUP_TEX1;
+	GLD_SETUP_LIGHTING(v0);
+	pV++;
+
+	gld->PB2d.pTriangles = (BYTE*)pV;
+	gld->PB2d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+// 3D (pre-transformed) primitives
+//---------------------------------------------------------------------------
+
+void gld_Points3D_DX9(
+	GLcontext *ctx,
+	GLuint first,
+	GLuint last)
+{
+	GLD_SETUP_3D_VARS_POINTS
+
+	unsigned				i;
+//	struct vertex_buffer	*VB = &TNL_CONTEXT(ctx)->vb;
+
+	// _Size is already clamped to MaxPointSize and MinPointSize
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_POINTSIZE, *((DWORD*)&ctx->Point._Size));
+
+	if (VB->Elts) {
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[VB->Elts[i]] == 0) {
+//				_swrast_Point( ctx, &verts[VB->Elts[i]] );
+//				GLD_SETUP_GET_SWVERT(VB->Elts[i]);
+				GLD_SETUP_3D_VERTEX(VB->Elts[i])
+				GLD_SETUP_SMOOTH_COLOUR_3D(i)
+				GLD_SETUP_TEX0_3D(i)
+				GLD_SETUP_TEX1_3D(i)
+			}
+		}
+	} else {
+//		GLD_SETUP_GET_SWVERT(first);
+		for (i=first; i<last; i++, pV++) {
+			if (VB->ClipMask[i] == 0) {
+//				_swrast_Point( ctx, &verts[i] );
+				GLD_SETUP_3D_VERTEX(i)
+				GLD_SETUP_SMOOTH_COLOUR_3D(i)
+				GLD_SETUP_TEX0_3D(i)
+				GLD_SETUP_TEX1_3D(i)
+			}
+		}
+	}
+/*
+	for (i=first; i<last; i++, pV++) {
+		GLD_SETUP_3D_VERTEX(i)
+		GLD_SETUP_SMOOTH_COLOUR_3D(i)
+		GLD_SETUP_TEX0_3D(i)
+		GLD_SETUP_TEX1_3D(i)
+	}
+*/
+	gld->PB3d.pPoints = (BYTE*)pV;
+	gld->PB3d.nPoints += (last-first);
+}
+
+//---------------------------------------------------------------------------
+// Line functions
+//---------------------------------------------------------------------------
+
+void gld_Line3DFlat_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_3D_VARS_LINES
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pLines = (BYTE*)pV;
+	gld->PB3d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line3DSmooth_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1)
+{
+	GLD_SETUP_3D_VARS_LINES
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pLines = (BYTE*)pV;
+	gld->PB3d.nLines++;
+}
+
+//---------------------------------------------------------------------------
+// Triangle functions
+//---------------------------------------------------------------------------
+
+void gld_Triangle3DFlat_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v2)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle3DSmooth_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+// Quad functions
+//---------------------------------------------------------------------------
+
+void gld_Quad3DFlat_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_GET_FLAT_COLOUR_3D(v3)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_USE_FLAT_COLOUR_3D
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad3DSmooth_DX9(
+	GLcontext *ctx,
+	GLuint v0,
+	GLuint v1,
+	GLuint v2,
+	GLuint v3)
+{
+	GLD_SETUP_3D_VARS_TRIANGLES
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v1)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v1)
+	GLD_SETUP_TEX0_3D(v1)
+	GLD_SETUP_TEX1_3D(v1)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v2)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v2)
+	GLD_SETUP_TEX0_3D(v2)
+	GLD_SETUP_TEX1_3D(v2)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v3)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v3)
+	GLD_SETUP_TEX0_3D(v3)
+	GLD_SETUP_TEX1_3D(v3)
+	pV++;
+
+	GLD_SETUP_3D_VERTEX(v0)
+	GLD_SETUP_SMOOTH_COLOUR_3D(v0)
+	GLD_SETUP_TEX0_3D(v0)
+	GLD_SETUP_TEX1_3D(v0)
+	pV++;
+
+	gld->PB3d.pTriangles = (BYTE*)pV;
+	gld->PB3d.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+// Vertex setup for two-sided-lighting vertex shader
+//---------------------------------------------------------------------------
+
+/*
+
+void gld_Points2DTwoside_DX9(GLcontext *ctx, GLuint first, GLuint last)
+{
+	// NOTE: Two-sided lighting does not apply to Points
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DFlatTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1)
+{
+	// NOTE: Two-sided lighting does not apply to Lines
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Line2DSmoothTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1)
+{
+	// NOTE: Two-sided lighting does not apply to Lines
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DFlatTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2)
+{
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Triangle2DSmoothTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles++;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DFlatTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 4th vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 5th vert
+	swv = &ss->verts[v3];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 6th vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Quad2DSmoothTwoside_DX9(GLcontext *ctx, GLuint v0, GLuint v1, GLuint v2, GLuint v3)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+	GLD_TWOSIDED_VERTEX	*pV		= (GLD_TWOSIDED_VERTEX*)gld->PBtwosidelight.pTriangles;
+	SScontext			*ss		= SWSETUP_CONTEXT(ctx);
+	SWvertex			*swv;
+	DWORD				dwSpecularColour;
+	DWORD				dwFlatColour;
+	GLuint					facing = 0;
+	struct vertex_buffer	*VB;
+	GLchan					(*vbcolor)[4];
+	GLchan					(*vbspec)[4];
+
+	// Reciprocal of DepthMax
+	const float ooDepthMax = 1.0f / ctx->DepthMaxF; 
+
+	// 1st vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 2nd vert
+	swv = &ss->verts[v1];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 3rd vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 4th vert
+	swv = &ss->verts[v2];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 5th vert
+	swv = &ss->verts[v3];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	// 6th vert
+	swv = &ss->verts[v0];
+	pV->Position.x = swv->win[0];
+	pV->Position.y = GLD_FLIP_Y(swv->win[1]);
+	pV->Position.z = swv->win[2] * ooDepthMax;
+	pV->Position.w = swv->win[3];
+	pV->TexUnit0.x = swv->texcoord[0][0];
+	pV->TexUnit0.y = swv->texcoord[0][1];
+	pV->TexUnit1.x = swv->texcoord[1][0];
+	pV->TexUnit1.y = swv->texcoord[1][1];
+	pV->FrontDiffuse = GLD_COLOUR;
+	pV->FrontSpecular = GLD_SPECULAR;
+	pV++;
+
+	gld->PBtwosidelight.pTriangles = (BYTE*)pV;
+	gld->PBtwosidelight.nTriangles += 2;
+}
+
+//---------------------------------------------------------------------------
+
+*/
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_texture_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_texture_dx9.c
new file mode 100644
index 0000000000..5a82235616
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_texture_dx9.c
@@ -0,0 +1,2104 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Texture / Bitmap functions
+*
+****************************************************************************/
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx9.h"
+
+#include <d3dx9tex.h>
+
+#include "texformat.h"
+#include "colormac.h"
+#include "texstore.h"
+#include "image.h"
+// #include "mem.h"
+
+//---------------------------------------------------------------------------
+
+#define GLD_FLIP_HEIGHT(y,h) (gldCtx->dwHeight - (y) - (h))
+
+//---------------------------------------------------------------------------
+// 1D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	((GLchan *)(t)->Data + (i) * (sz))
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + (i) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + (i))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + (i))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_1d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// 2D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	((GLchan *)(t)->Data + ((t)->Width * (j) + (i)) * (sz))
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + ((t)->Width * (j) + (i)) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + ((t)->Width * (j) + (i)))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + ((t)->Width * (j) + (i)))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_2d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// 3D texture fetch
+//---------------------------------------------------------------------------
+
+#define CHAN_SRC( t, i, j, k, sz )					\
+	(GLchan *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				(t)->Width + (i)) * (sz)
+#define UBYTE_SRC( t, i, j, k, sz )					\
+	((GLubyte *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				 (t)->Width + (i)) * (sz))
+#define USHORT_SRC( t, i, j, k )					\
+	((GLushort *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				  (t)->Width + (i)))
+#define FLOAT_SRC( t, i, j, k )						\
+	((GLfloat *)(t)->Data + (((t)->Height * (k) + (j)) *		\
+				  (t)->Width + (i)))
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   GLchan *rgba = (GLchan *)texel;
+   rgba[RCOMP] = src[2];
+   rgba[GCOMP] = src[1];
+   rgba[BCOMP] = src[0];
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X8R8G8B8(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLchan *src = CHAN_SRC( texImage, i, j, k, 4 );
+   texel[RCOMP] = CHAN_TO_FLOAT(src[0]);
+   texel[GCOMP] = CHAN_TO_FLOAT(src[1]);
+   texel[BCOMP] = CHAN_TO_FLOAT(src[2]);
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf8) * 255 / 0xf8 );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X1R5G5B5(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >> 10) & 0xf8) * 255 / 0xf8 );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  5) & 0xf8) * 255 / 0xf8 );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf8) * 255 / 0xf8 );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+	GLint i, GLint j, GLint k, GLchan *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLchan *rgba = (GLchan *) texel; GLushort s = *src;
+   rgba[RCOMP] = UBYTE_TO_CHAN( ((s >>  8) & 0xf) * 255 / 0xf );
+   rgba[GCOMP] = UBYTE_TO_CHAN( ((s >>  4) & 0xf) * 255 / 0xf );
+   rgba[BCOMP] = UBYTE_TO_CHAN( ((s      ) & 0xf) * 255 / 0xf );
+   rgba[ACOMP] = CHAN_MAX;
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_fetch_3d_texel_f_X4R4G4B4(
+	const struct gl_texture_image *texImage,
+    GLint i, GLint j, GLint k, GLfloat *texel )
+{
+   const GLushort *src = USHORT_SRC( texImage, i, j, k );
+   GLushort s = *src;
+   texel[RCOMP] = UBYTE_TO_FLOAT( ((s >>  8) & 0xf) * 255 / 0xf );
+   texel[GCOMP] = UBYTE_TO_FLOAT( ((s >>  4) & 0xf) * 255 / 0xf );
+   texel[BCOMP] = UBYTE_TO_FLOAT( ((s      ) & 0xf) * 255 / 0xf );
+   texel[ACOMP] = 1.f;
+}
+
+//---------------------------------------------------------------------------
+
+#undef CHAN_SRC
+#undef UBYTE_SRC
+#undef USHORT_SRC
+#undef FLOAT_SRC
+
+//---------------------------------------------------------------------------
+// Direct3D texture formats that have no Mesa equivalent
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format _gld_texformat_X8R8G8B8 = {
+   MESA_FORMAT_ARGB8888,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   8,					/* RedBits */
+   8,					/* GreenBits */
+   8,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   4,					/* TexelBytes */
+   _mesa_texstore_argb8888,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X8R8G8B8,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X8R8G8B8,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X8R8G8B8,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X8R8G8B8,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X8R8G8B8,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X8R8G8B8,		/* FetchTexel3Df */
+};
+
+const struct gl_texture_format _gld_texformat_X1R5G5B5 = {
+   MESA_FORMAT_ARGB1555,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   5,					/* RedBits */
+   5,					/* GreenBits */
+   5,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   2,					/* TexelBytes */
+   _mesa_texstore_argb1555,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X1R5G5B5,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X1R5G5B5,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X1R5G5B5,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X1R5G5B5,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X1R5G5B5,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X1R5G5B5,		/* FetchTexel3Df */
+};
+
+const struct gl_texture_format _gld_texformat_X4R4G4B4 = {
+   MESA_FORMAT_ARGB4444,		/* MesaFormat */
+   GL_RGBA,				/* BaseFormat */
+   GL_UNSIGNED_NORMALIZED_ARB,		/* DataType */
+   4,					/* RedBits */
+   4,					/* GreenBits */
+   4,					/* BlueBits */
+   0,					/* AlphaBits */
+   0,					/* LuminanceBits */
+   0,					/* IntensityBits */
+   0,					/* IndexBits */
+   0,					/* DepthBits */
+   2,					/* TexelBytes */
+   _mesa_texstore_argb4444,			/* StoreTexImageFunc */
+   gld_fetch_1d_texel_X4R4G4B4,		/* FetchTexel1D */
+   gld_fetch_2d_texel_X4R4G4B4,		/* FetchTexel2D */
+   gld_fetch_3d_texel_X4R4G4B4,		/* FetchTexel3D */
+   gld_fetch_1d_texel_f_X4R4G4B4,		/* FetchTexel1Df */
+   gld_fetch_2d_texel_f_X4R4G4B4,		/* FetchTexel2Df */
+   gld_fetch_3d_texel_f_X4R4G4B4,		/* FetchTexel3Df */
+};
+
+//---------------------------------------------------------------------------
+// Texture unit constants
+//---------------------------------------------------------------------------
+
+// List of possible combinations of texture environments.
+// Example: GLD_TEXENV_MODULATE_RGBA means 
+//          GL_MODULATE, GL_RGBA base internal format.
+#define GLD_TEXENV_DECAL_RGB		0
+#define GLD_TEXENV_DECAL_RGBA		1
+#define GLD_TEXENV_DECAL_ALPHA		2
+#define GLD_TEXENV_REPLACE_RGB		3
+#define GLD_TEXENV_REPLACE_RGBA		4
+#define GLD_TEXENV_REPLACE_ALPHA	5
+#define GLD_TEXENV_MODULATE_RGB		6
+#define GLD_TEXENV_MODULATE_RGBA	7
+#define GLD_TEXENV_MODULATE_ALPHA	8
+#define GLD_TEXENV_BLEND_RGB		9
+#define GLD_TEXENV_BLEND_RGBA		10
+#define GLD_TEXENV_BLEND_ALPHA		11
+#define GLD_TEXENV_ADD_RGB			12
+#define GLD_TEXENV_ADD_RGBA			13
+#define GLD_TEXENV_ADD_ALPHA		14
+
+// Per-stage (i.e. per-unit) texture environment
+typedef struct {
+	DWORD			ColorArg1;	// Colour argument 1
+	D3DTEXTUREOP	ColorOp;	// Colour operation
+	DWORD			ColorArg2;	// Colour argument 2
+	DWORD			AlphaArg1;	// Alpha argument 1
+	D3DTEXTUREOP	AlphaOp;	// Alpha operation
+	DWORD			AlphaArg2;	// Alpha argument 2
+} GLD_texenv;
+
+// TODO: Do we really need to set ARG1 and ARG2 every time?
+//       They seem to always be TEXTURE and CURRENT respectively.
+
+// C = Colour out
+// A = Alpha out
+// Ct = Colour from Texture
+// Cf = Colour from fragment (diffuse)
+// At = Alpha from Texture
+// Af = Alpha from fragment (diffuse)
+// Cc = GL_TEXTURE_ENV_COLOUR (GL_BLEND)
+const GLD_texenv gldTexEnv[] = {
+	// DECAL_RGB: C=Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// DECAL_RGBA: C=Cf(1-At)+CtAt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_BLENDTEXTUREALPHA, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// DECAL_ALPHA: <undefined> use DECAL_RGB
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+
+	// REPLACE_RGB: C=Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// REPLACE_RGBA: C=Ct, A=At
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT},
+	// REPLACE_ALPHA: C=Cf, A=At
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG1, D3DTA_CURRENT},
+
+	// MODULATE_RGB: C=CfCt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// MODULATE_RGBA: C=CfCt, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// MODULATE_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+
+	// BLEND_RGB: C=Cf(1-Ct)+CcCt, A=Af
+	{D3DTA_TEXTURE, D3DTOP_LERP, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// BLEND_RGBA: C=Cf(1-Ct)+CcCt, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_LERP, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// BLEND_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+
+	// ADD_RGB: C=Cf+Ct, A=Af
+	{D3DTA_TEXTURE, D3DTOP_ADD, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT},
+	// ADD_RGBA: C=Cf+Ct, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_ADD, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+	// ADD_ALPHA: C=Cf, A=AfAt
+	{D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_CURRENT,
+	D3DTA_TEXTURE, D3DTOP_MODULATE, D3DTA_CURRENT},
+};
+
+//---------------------------------------------------------------------------
+
+D3DTEXTUREADDRESS _gldConvertWrap(
+	GLenum wrap)
+{
+	return (wrap == GL_CLAMP) ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP;
+}
+
+//---------------------------------------------------------------------------
+
+D3DTEXTUREFILTERTYPE _gldConvertMagFilter(
+	GLenum magfilter)
+{
+	return (magfilter == GL_LINEAR) ? D3DTEXF_LINEAR : D3DTEXF_POINT;
+}
+
+//---------------------------------------------------------------------------
+
+void _gldConvertMinFilter(
+	GLenum minfilter,
+	D3DTEXTUREFILTERTYPE *min_filter,
+	D3DTEXTUREFILTERTYPE *mip_filter)
+{
+	switch (minfilter) {
+	case GL_NEAREST:
+		*min_filter = D3DTEXF_POINT;
+		*mip_filter = D3DTEXF_NONE;
+		break;
+	case GL_LINEAR:
+		*min_filter = D3DTEXF_LINEAR;
+		*mip_filter = D3DTEXF_NONE;
+		break;
+	case GL_NEAREST_MIPMAP_NEAREST:
+		*min_filter = D3DTEXF_POINT;
+		*mip_filter = D3DTEXF_POINT;
+		break;
+	case GL_LINEAR_MIPMAP_NEAREST:
+		*min_filter = D3DTEXF_LINEAR;
+		*mip_filter = D3DTEXF_POINT;
+		break;
+	case GL_NEAREST_MIPMAP_LINEAR:
+		*min_filter = D3DTEXF_POINT;
+		*mip_filter = D3DTEXF_LINEAR;
+		break;
+	case GL_LINEAR_MIPMAP_LINEAR:
+		*min_filter = D3DTEXF_LINEAR;
+		*mip_filter = D3DTEXF_LINEAR;
+		break;
+	}
+}
+
+//---------------------------------------------------------------------------
+
+D3DFORMAT _gldGLFormatToD3DFormat(
+	GLenum internalFormat)
+{
+	switch (internalFormat) {
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+		// LUNIMANCE != INTENSITY, but D3D doesn't have I8 textures
+		return D3DFMT_L8;
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+		return D3DFMT_L8;
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+		return D3DFMT_A8;
+	case GL_COLOR_INDEX:
+	case GL_COLOR_INDEX1_EXT:
+	case GL_COLOR_INDEX2_EXT:
+	case GL_COLOR_INDEX4_EXT:
+	case GL_COLOR_INDEX8_EXT:
+	case GL_COLOR_INDEX12_EXT:
+	case GL_COLOR_INDEX16_EXT:
+		return D3DFMT_X8R8G8B8;
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+		return D3DFMT_A8L8;
+	case GL_R3_G3_B2:
+		// TODO: Mesa does not support RGB332 internally
+		return D3DFMT_X4R4G4B4; //D3DFMT_R3G3B2;
+	case GL_RGB4:
+		return D3DFMT_X4R4G4B4;
+	case GL_RGB5:
+		return D3DFMT_X1R5G5B5;
+	case 3:
+	case GL_RGB:
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return D3DFMT_R8G8B8;
+	case GL_RGBA4:
+		return D3DFMT_A4R4G4B4;
+	case 4:
+	case GL_RGBA:
+	case GL_RGBA2:
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return D3DFMT_A8R8G8B8;
+	case GL_RGB5_A1:
+		return D3DFMT_A1R5G5B5;
+	}
+
+	// Return an acceptable default
+	return D3DFMT_A8R8G8B8;
+}
+
+//---------------------------------------------------------------------------
+
+GLenum _gldDecodeBaseFormat(
+	IDirect3DTexture9 *pTex)
+{
+	// Examine Direct3D texture and return base OpenGL internal texture format
+	// NOTE: We can't use any base format info from Mesa because D3D might have
+	// used a different texture format when we used D3DXCreateTexture().
+
+	// Base internal format is one of (Red Book p355):
+	//	GL_ALPHA, 
+	//	GL_LUMINANCE, 
+	//	GL_LUMINANCE_ALPHA, 
+	//	GL_INTENSITY, 
+	//	GL_RGB, 
+	//	GL_RGBA
+
+	// NOTE: INTENSITY not used (not supported by Direct3D)
+	//       LUMINANCE has same texture functions as RGB
+	//       LUMINANCE_ALPHA has same texture functions as RGBA
+
+	// TODO: cache format instead of using GetLevelDesc()
+	D3DSURFACE_DESC desc;
+	_GLD_DX9_TEX(GetLevelDesc(pTex, 0, &desc));
+
+	switch (desc.Format) {
+    case D3DFMT_R8G8B8:
+    case D3DFMT_X8R8G8B8:
+    case D3DFMT_R5G6B5:
+    case D3DFMT_X1R5G5B5:
+    case D3DFMT_R3G3B2:
+    case D3DFMT_X4R4G4B4:
+    case D3DFMT_P8:
+    case D3DFMT_L8:
+		return GL_RGB;
+    case D3DFMT_A8R8G8B8:
+    case D3DFMT_A1R5G5B5:
+    case D3DFMT_A4R4G4B4:
+    case D3DFMT_A8R3G3B2:
+    case D3DFMT_A8P8:
+    case D3DFMT_A8L8:
+    case D3DFMT_A4L4:
+		return GL_RGBA;
+    case D3DFMT_A8:
+		return GL_ALPHA;
+	// Compressed texture formats. Need to check these...
+    case D3DFMT_DXT1:
+		return GL_RGBA;
+    case D3DFMT_DXT2:
+		return GL_RGB;
+    case D3DFMT_DXT3:
+		return GL_RGBA;
+    case D3DFMT_DXT4:
+		return GL_RGB;
+    case D3DFMT_DXT5:
+		return GL_RGBA;
+	}
+
+	// Fell through. Return arbitary default.
+	return GL_RGBA;
+}
+
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format* _gldMesaFormatForD3DFormat(
+	D3DFORMAT d3dfmt)
+{
+	switch (d3dfmt) {
+	case D3DFMT_A8R8G8B8:
+		return &_mesa_texformat_argb8888;
+	case D3DFMT_R8G8B8:
+		return &_mesa_texformat_rgb888;
+	case D3DFMT_R5G6B5:
+		return &_mesa_texformat_rgb565;
+	case D3DFMT_A4R4G4B4:
+		return &_mesa_texformat_argb4444;
+	case D3DFMT_A1R5G5B5:
+		return &_mesa_texformat_argb1555;
+	case D3DFMT_A8L8:
+		return &_mesa_texformat_al88;
+	case D3DFMT_R3G3B2:
+		return &_mesa_texformat_rgb332;
+	case D3DFMT_A8:
+		return &_mesa_texformat_a8;
+	case D3DFMT_L8:
+		return &_mesa_texformat_l8;
+	case D3DFMT_X8R8G8B8:
+		return &_gld_texformat_X8R8G8B8;
+	case D3DFMT_X1R5G5B5:
+		return &_gld_texformat_X1R5G5B5;
+	case D3DFMT_X4R4G4B4:
+		return &_gld_texformat_X4R4G4B4;
+	}
+
+	// If we reach here then we've made an error somewhere else
+	// by allowing a format that is not supported.
+	assert(0);
+
+	return NULL; // Shut up compiler warning
+}
+
+//---------------------------------------------------------------------------
+// Copy* functions
+//---------------------------------------------------------------------------
+
+void gldCopyTexImage1D_DX9(
+	GLcontext *ctx,
+	GLenum target, GLint level,
+	GLenum internalFormat,
+	GLint x, GLint y,
+	GLsizei width, GLint border )
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexImage2D_DX9(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLenum internalFormat,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height,
+	GLint border)
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage1D_DX9(
+	GLcontext *ctx,
+	GLenum target, GLint level,
+	GLint xoffset, GLint x, GLint y, GLsizei width )
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage2D_DX9(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint xoffset,
+	GLint yoffset,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height)
+{
+	// TODO
+}
+
+//---------------------------------------------------------------------------
+
+void gldCopyTexSubImage3D_DX9(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint xoffset,
+	GLint yoffset,
+	GLint zoffset,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height )
+{
+	// TODO ?
+}
+
+//---------------------------------------------------------------------------
+// Bitmap/Pixel functions
+//---------------------------------------------------------------------------
+
+#define GLD_FLIP_Y(y) (gldCtx->dwHeight - (y))
+
+#define _GLD_FVF_IMAGE	(D3DFVF_XYZRHW | D3DFVF_TEX1)
+
+typedef struct {
+	FLOAT	x, y;		// 2D raster coords
+	FLOAT	z;			// depth value
+	FLOAT	rhw;		// reciprocal homogenous W (always 1.0f)
+	FLOAT	tu, tv;		// texture coords
+} _GLD_IMAGE_VERTEX;
+
+//---------------------------------------------------------------------------
+
+HRESULT _gldDrawPixels(
+	GLcontext *ctx,
+	BOOL bChromakey,	// Alpha test for glBitmap() images
+	GLint x,			// GL x position
+	GLint y,			// GL y position (needs flipping)
+	GLsizei width,		// Width of input image
+	GLsizei height,		// Height of input image
+	IDirect3DSurface9 *pImage)
+{
+	//
+	// Draw input image as texture implementing PixelZoom and clipping.
+	// Any fragment operations currently enabled will be used.
+	//
+
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	IDirect3DTexture9	*pTexture;
+	D3DSURFACE_DESC		d3dsd;
+	IDirect3DSurface9	*pSurface;
+	_GLD_IMAGE_VERTEX	v[4];
+	HRESULT				hr;
+
+	float				ZoomWidth, ZoomHeight;
+	float				ScaleWidth, ScaleHeight;
+
+	// Create a texture to hold image
+	hr = D3DXCreateTexture(
+		gld->pDev,
+		width, height,
+		1, // miplevels
+		0, // usage
+		D3DFMT_A8R8G8B8, // format
+		D3DPOOL_MANAGED, // pool
+		&pTexture);
+	if (FAILED(hr))
+		return hr;
+
+	hr = IDirect3DTexture9_GetSurfaceLevel(pTexture, 0, &pSurface);
+	if (FAILED(hr)) {
+		IDirect3DTexture9_Release(pTexture);
+		return hr;
+	}
+
+	// Copy image into texture
+	hr = D3DXLoadSurfaceFromSurface(
+		pSurface, NULL, NULL,	// Dest surface
+		pImage, NULL, NULL,		// Src surface
+		D3DX_FILTER_NONE,
+		0);
+	IDirect3DSurface9_Release(pSurface);
+	if (FAILED(hr)) {
+		IDirect3DTexture9_Release(pTexture);
+		return hr;
+	}
+
+	//
+	// Set up the quad like this (ascii-art ahead!)
+	//
+	// 3--2
+	// |  |
+	// 0--1
+	//
+	//
+
+	// Set depth
+	v[0].z = v[1].z = v[2].z = v[3].z = ctx->Current.RasterPos[2];
+	// Set Reciprocal Homogenous W
+	v[0].rhw = v[1].rhw = v[2].rhw = v[3].rhw = 1.0f;
+
+	// Set texcoords
+	// Examine texture size - if different to input width and height
+	// then we'll need to munge the texcoords to fit.
+	IDirect3DTexture9_GetLevelDesc(pTexture, 0, &d3dsd);
+	ScaleWidth = (float)width / (float)d3dsd.Width;
+	ScaleHeight = (float)height / (float)d3dsd.Height;
+	v[0].tu = 0.0f;			v[0].tv = 0.0f;
+	v[1].tu = ScaleWidth;	v[1].tv = 0.0f;
+	v[2].tu = ScaleWidth;	v[2].tv = ScaleHeight;
+	v[3].tu = 0.0f;			v[3].tv = ScaleHeight;
+
+	// Set raster positions
+	ZoomWidth = (float)width * ctx->Pixel.ZoomX;
+	ZoomHeight = (float)height * ctx->Pixel.ZoomY;
+
+	v[0].x = x;				v[0].y = GLD_FLIP_Y(y);
+	v[1].x = x+ZoomWidth;	v[1].y = GLD_FLIP_Y(y);
+	v[2].x = x+ZoomWidth;	v[2].y = GLD_FLIP_Y(y+ZoomHeight);
+	v[3].x = x;				v[3].y = GLD_FLIP_Y(y+ZoomHeight);
+
+	// Draw image with full HW acceleration
+	// NOTE: Be nice to use a State Block for all this state...
+	IDirect3DDevice9_SetTexture(gld->pDev, 0, pTexture);
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_CULLMODE, D3DCULL_NONE);
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_CLIPPING, TRUE);
+
+//	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_MINFILTER, D3DTEXF_POINT);
+//	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_MIPFILTER, D3DTEXF_POINT);
+//	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_MAGFILTER, D3DTEXF_POINT);
+//	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_ADDRESSU, D3DTADDRESS_CLAMP);
+//	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_ADDRESSV, D3DTADDRESS_CLAMP);
+	IDirect3DDevice9_SetSamplerState(gld->pDev, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
+	IDirect3DDevice9_SetSamplerState(gld->pDev, 0, D3DSAMP_MIPFILTER, D3DTEXF_POINT);
+	IDirect3DDevice9_SetSamplerState(gld->pDev, 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
+	IDirect3DDevice9_SetSamplerState(gld->pDev, 0, D3DSAMP_ADDRESSU, D3DTADDRESS_CLAMP);
+	IDirect3DDevice9_SetSamplerState(gld->pDev, 0, D3DSAMP_ADDRESSV, D3DTADDRESS_CLAMP);
+
+	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_COLOROP, D3DTOP_SELECTARG1);
+	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_ALPHAOP, D3DTOP_SELECTARG1);
+	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_COLORARG1, D3DTA_TEXTURE);
+	IDirect3DDevice9_SetTextureStageState(gld->pDev, 0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE);
+	IDirect3DDevice9_SetTextureStageState(gld->pDev, 1, D3DTSS_COLOROP, D3DTOP_DISABLE);
+	IDirect3DDevice9_SetTextureStageState(gld->pDev, 1, D3DTSS_ALPHAOP, D3DTOP_DISABLE);
+
+	IDirect3DDevice9_SetVertexShader(gld->pDev, NULL);
+	IDirect3DDevice9_SetFVF(gld->pDev, _GLD_FVF_IMAGE);
+
+	//
+	// Emulate Chromakey with an Alpha Test.
+	// [Alpha Test is more widely supported anyway]
+	//
+	if (bChromakey) {
+		// Switch on alpha testing
+		IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_ALPHATESTENABLE, TRUE);
+		// Fragment passes is alpha is greater than reference value
+		IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_ALPHAFUNC, D3DCMP_GREATER);
+		// Set alpha reference value between Bitmap alpha values of
+		// zero (transparent) and one (opaque).
+		IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_ALPHAREF, 0x7f);
+	}
+
+	IDirect3DDevice9_DrawPrimitiveUP(gld->pDev, D3DPT_TRIANGLEFAN, 2, &v, sizeof(_GLD_IMAGE_VERTEX));
+
+	// Release texture
+	IDirect3DDevice9_SetTexture(gld->pDev, 0, NULL);
+	IDirect3DTexture9_Release(pTexture);
+
+	// Reset state to before we messed it up
+	FLUSH_VERTICES(ctx, _NEW_ALL);
+
+	return S_OK;
+}
+
+//---------------------------------------------------------------------------
+
+void gld_DrawPixels_DX9(
+	GLcontext *ctx,
+	GLint x, GLint y, GLsizei width, GLsizei height,
+	GLenum format, GLenum type,
+	const struct gl_pixelstore_attrib *unpack,
+	const GLvoid *pixels )
+{
+	GLD_context			*gldCtx;
+	GLD_driver_dx9		*gld;
+
+	IDirect3DSurface9	*pImage;
+	HRESULT				hr;
+	D3DLOCKED_RECT		d3dLockedRect;
+
+	const struct gl_texture_format	*MesaFormat;
+
+	MesaFormat = _mesa_choose_tex_format(ctx, format, format, type);
+
+	// Mesa does not currently handle this format.
+	if (format == GL_BGR)
+		return;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX9_DRIVER(gldCtx);
+
+	hr = IDirect3DDevice9_CreateOffscreenPlainSurface(
+		gld->pDev, 
+		width,
+		height,
+		D3DFMT_A8R8G8B8,
+		D3DPOOL_SCRATCH,
+		&pImage,
+		NULL);
+	if (FAILED(hr)) {
+		return;
+	}
+
+	//
+	// Use Mesa to fill in image
+	//
+
+	// Lock all of surface 
+	hr = IDirect3DSurface9_LockRect(pImage, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface9_Release(pImage);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	MesaFormat->StoreImage(
+		ctx,
+		2,
+		GL_RGBA,
+		&_mesa_texformat_argb8888,
+		d3dLockedRect.pBits,
+		width, height, 1, 0, 0, 0,
+		d3dLockedRect.Pitch,
+		0, /* dstImageStride */
+		format, type, pixels, unpack);
+
+	IDirect3DSurface9_UnlockRect(pImage);
+
+	_gldDrawPixels(ctx, FALSE, x, y, width, height, pImage);
+
+	IDirect3DSurface9_Release(pImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_ReadPixels_DX9(
+	GLcontext *ctx,
+	GLint x, GLint y, GLsizei width, GLsizei height,
+	GLenum format, GLenum type,
+	const struct gl_pixelstore_attrib *pack,
+	GLvoid *dest)
+{
+
+	GLD_context						*gldCtx;
+	GLD_driver_dx9					*gld;
+
+	IDirect3DSurface9				*pBackbuffer = NULL;
+	IDirect3DSurface9				*pNativeImage = NULL;
+	IDirect3DSurface9				*pCanonicalImage = NULL;
+
+	D3DSURFACE_DESC					d3dsd;
+	RECT							rcSrc; // Source rect
+	POINT							ptDst; // Dest point
+	HRESULT							hr;
+	D3DLOCKED_RECT					d3dLockedRect;
+	struct gl_pixelstore_attrib		srcPacking;
+	int								i;
+	GLint							DstRowStride;
+	const struct gl_texture_format	*MesaFormat;
+
+	switch (format) {
+	case GL_STENCIL_INDEX:
+	case GL_DEPTH_COMPONENT:
+		return;
+	}
+	
+	MesaFormat = _mesa_choose_tex_format(ctx, format, format, type);
+	DstRowStride = _mesa_image_row_stride(pack, width, format, type);
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX9_DRIVER(gldCtx);
+
+	// Get backbuffer
+	hr = IDirect3DDevice9_GetBackBuffer(
+		gld->pDev,
+		0, // First swapchain
+		0, // First backbuffer
+		D3DBACKBUFFER_TYPE_MONO,
+		&pBackbuffer);
+	if (FAILED(hr))
+		return;
+
+	// Get backbuffer description
+	hr = IDirect3DSurface9_GetDesc(pBackbuffer, &d3dsd);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX9_return;
+	}
+
+	// Create a surface compatible with backbuffer
+	hr = IDirect3DDevice9_CreateOffscreenPlainSurface(
+		gld->pDev, 
+		width,
+		height,
+		d3dsd.Format,
+		D3DPOOL_SCRATCH,
+		&pNativeImage,
+		NULL);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX9_return;
+	}
+
+	// Compute source rect and dest point
+	SetRect(&rcSrc, 0, 0, width, height);
+	OffsetRect(&rcSrc, x, GLD_FLIP_HEIGHT(y, height));
+	ptDst.x = ptDst.y = 0;
+
+	// Get source pixels.
+	//
+	// This intermediate surface ensure that we can use CopyRects()
+	// instead of relying on D3DXLoadSurfaceFromSurface(), which may
+	// try and lock the backbuffer. This way seems safer.
+	//
+	// CopyRects has been removed for DX9.
+	//
+/*	hr = IDirect3DDevice9_CopyRects(
+		gld->pDev,
+		pBackbuffer,
+		&rcSrc,
+		1,
+		pNativeImage,
+		&ptDst);*/
+	hr = D3DXLoadSurfaceFromSurface(
+			pNativeImage,		// Dest surface
+			NULL,				// Dest palette
+			&rcSrc,				// Dest rect
+			pBackbuffer,		// Src surface
+			NULL,				// Src palette
+			&rcSrc,				// Src rect
+			D3DX_FILTER_NONE,	// Filter
+			0					// Colorkey (0=no colorkey)
+		);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX9_return;
+	}
+
+	// Create an RGBA8888 surface
+	hr = IDirect3DDevice9_CreateOffscreenPlainSurface(
+		gld->pDev, 
+		width,
+		height,
+		D3DFMT_A8R8G8B8,
+		D3DPOOL_SCRATCH,
+		&pCanonicalImage,
+		NULL);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX9_return;
+	}
+
+	// Convert to RGBA8888
+	hr = D3DXLoadSurfaceFromSurface(
+		pCanonicalImage,	// Dest surface
+		NULL, NULL,			// Dest palette, RECT
+		pNativeImage,		// Src surface
+		NULL, NULL,			// Src palette, RECT
+		D3DX_FILTER_NONE,	// Filter
+		0);					// Colourkey
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX9_return;
+	}
+
+	srcPacking.Alignment	= 1;
+	srcPacking.ImageHeight	= height;
+	srcPacking.LsbFirst		= GL_FALSE;
+	srcPacking.RowLength	= 0;
+	srcPacking.SkipImages	= 0;
+	srcPacking.SkipPixels	= 0;
+	srcPacking.SkipRows		= 0;
+	srcPacking.SwapBytes	= GL_FALSE;
+
+	// Lock all of image
+	hr = IDirect3DSurface9_LockRect(pCanonicalImage, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		goto gld_ReadPixels_DX9_return;
+	}
+
+	// We need to flip the data. Yuck.
+	// Perhaps Mesa has a span packer we can use in future...
+	for (i=0; i<height; i++) {
+		BYTE *pDestRow = (BYTE*)_mesa_image_address(2,pack, dest, width, height, format, type, 0, i, 0);
+		BYTE *pSrcRow = (BYTE*)d3dLockedRect.pBits + (d3dLockedRect.Pitch * (height-i-1));
+		MesaFormat->StoreImage(
+			ctx,
+			2,
+			GL_RGBA,				// base format
+			MesaFormat,				// dst format
+			pDestRow,				// dest addr
+			width, 1, 1, 0, 0, 0,	// src x,y,z & dst offsets x,y,z
+			DstRowStride,			// dst row stride
+			0,						// dstImageStride
+			GL_BGRA,				// src format
+			GL_UNSIGNED_BYTE,		// src type
+			pSrcRow,				// src addr
+			&srcPacking);			// packing params of source image
+	}
+
+	IDirect3DSurface9_UnlockRect(pCanonicalImage);
+
+gld_ReadPixels_DX9_return:
+	SAFE_RELEASE_SURFACE9(pCanonicalImage);
+	SAFE_RELEASE_SURFACE9(pNativeImage);
+	SAFE_RELEASE_SURFACE9(pBackbuffer);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_CopyPixels_DX9(
+	GLcontext *ctx,
+	GLint srcx,
+	GLint srcy,
+	GLsizei width,
+	GLsizei height,
+	GLint dstx,
+	GLint dsty,
+	GLenum type)
+{
+	//
+	// NOTE: Not allowed to copy vidmem to vidmem!
+	//       Therefore we use an intermediate image surface.
+	//
+
+	GLD_context			*gldCtx;
+	GLD_driver_dx9		*gld;
+
+	IDirect3DSurface9	*pBackbuffer;
+	D3DSURFACE_DESC		d3dsd;
+	IDirect3DSurface9	*pImage;
+	RECT				rcSrc; // Source rect
+	POINT				ptDst; // Dest point
+	HRESULT				hr;
+
+	// Only backbuffer
+	if (type != GL_COLOR)
+		return;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX9_DRIVER(gldCtx);
+
+	// Get backbuffer
+	hr = IDirect3DDevice9_GetBackBuffer(
+		gld->pDev,
+		0, // First swapchain
+		0, // First backbuffer
+		D3DBACKBUFFER_TYPE_MONO,
+		&pBackbuffer);
+	if (FAILED(hr))
+		return;
+
+	// Get backbuffer description
+	hr = IDirect3DSurface9_GetDesc(pBackbuffer, &d3dsd);
+	if (FAILED(hr)) {
+		IDirect3DSurface9_Release(pBackbuffer);
+		return;
+	}
+
+	// Create a surface compatible with backbuffer
+	hr = IDirect3DDevice9_CreateOffscreenPlainSurface(
+		gld->pDev, 
+		width,
+		height,
+		d3dsd.Format,
+		D3DPOOL_SCRATCH,
+		&pImage,
+		NULL);
+	if (FAILED(hr)) {
+		IDirect3DSurface9_Release(pBackbuffer);
+		return;
+	}
+
+	// Compute source rect and dest point
+	SetRect(&rcSrc, 0, 0, width, height);
+	OffsetRect(&rcSrc, srcx, GLD_FLIP_HEIGHT(srcy, height));
+	ptDst.x = ptDst.y = 0;
+
+	// Get source pixels
+/*	hr = IDirect3DDevice8_CopyRects(
+		gld->pDev,
+		pBackbuffer,
+		&rcSrc,
+		1,
+		pImage,
+		&ptDst);*/
+	hr = D3DXLoadSurfaceFromSurface(
+			pImage,				// Dest surface
+			NULL,				// Dest palette
+			&rcSrc,				// Dest rect
+			pBackbuffer,		// Src surface
+			NULL,				// Src palette
+			&rcSrc,				// Src rect
+			D3DX_FILTER_NONE,	// Filter
+			0					// Colorkey (0=no colorkey)
+		);
+	IDirect3DSurface9_Release(pBackbuffer);
+	if (FAILED(hr)) {
+		IDirect3DSurface9_Release(pImage);
+		return;
+	}
+
+	_gldDrawPixels(ctx, FALSE, dstx, dsty, width, height, pImage);
+
+	IDirect3DSurface9_Release(pImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_Bitmap_DX9(
+	GLcontext *ctx,
+	GLint x,
+	GLint y,
+	GLsizei width,
+	GLsizei height,
+	const struct gl_pixelstore_attrib *unpack,
+	const GLubyte *bitmap)
+{
+	GLD_context			*gldCtx;
+	GLD_driver_dx9		*gld;
+
+	IDirect3DSurface9	*pImage;
+	HRESULT				hr;
+	D3DLOCKED_RECT		d3dLockedRect;
+	BYTE				*pTempBitmap;
+	D3DCOLOR			clBitmapOne, clBitmapZero;
+	D3DCOLOR			*pBits;
+	const GLubyte		*src;
+	int					i, j, k;
+
+	gldCtx	= GLD_GET_CONTEXT(ctx);
+	gld		= GLD_GET_DX9_DRIVER(gldCtx);
+
+	// A NULL bitmap is valid, but merely advances the raster position
+	if ((bitmap == NULL) || (width == 0) || (height == 0))
+		return;
+
+	clBitmapZero	= D3DCOLOR_RGBA(0,0,0,0); // NOTE: Alpha is Zero
+	clBitmapOne		= D3DCOLOR_COLORVALUE(
+		ctx->Current.RasterColor[0],
+		ctx->Current.RasterColor[1],
+		ctx->Current.RasterColor[2],
+		1.0f); // NOTE: Alpha is One
+
+	hr = IDirect3DDevice9_CreateOffscreenPlainSurface(
+		gld->pDev, 
+		width,
+		height,
+		D3DFMT_A8R8G8B8,
+		D3DPOOL_SCRATCH,
+		&pImage,
+		NULL);
+	if (FAILED(hr)) {
+		return;
+	}
+
+	// Lock all of surface 
+	hr = IDirect3DSurface9_LockRect(pImage, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface9_Release(pImage);
+		return;
+	}
+
+	pTempBitmap = _mesa_unpack_bitmap(width, height, bitmap, unpack);
+	if (pTempBitmap == NULL) {
+		IDirect3DSurface9_Release(pImage);
+		return;
+	}
+
+	pBits = (D3DCOLOR*)d3dLockedRect.pBits;
+
+	for (i=0; i<height; i++) {
+		GLubyte byte;
+		pBits = (D3DCOLOR*)((BYTE*)d3dLockedRect.pBits + (i*d3dLockedRect.Pitch));
+		src = (const GLubyte *) _mesa_image_address(2,
+			&ctx->DefaultPacking, pTempBitmap, width, height, GL_COLOR_INDEX, GL_BITMAP,
+			0, i, 0);
+		for (j=0; j<(width>>3); j++) {
+			byte = *src++;
+			for (k=0; k<8; k++) {
+				*pBits++ = (byte & 128) ? clBitmapOne : clBitmapZero;
+				byte <<= 1;
+			}
+		}
+		// Fill remaining bits from bitmap
+		if (width & 7) {
+			byte = *src;
+			for (k=0; k<(width & 7); k++) {
+				*pBits++ = (byte & 128) ? clBitmapOne : clBitmapZero;
+				byte <<= 1;
+			}
+		}
+	}
+
+	FREE(pTempBitmap);
+
+/*
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(
+		ctx,
+		2,
+		GL_BITMAP,
+		&_mesa_texformat_argb8888,
+		d3dLockedRect.pBits,
+		width, height, 1, 0, 0, 0,
+		d3dLockedRect.Pitch,
+		0, // dstImageStride
+		GL_BITMAP, GL_COLOR_INDEX, bitmap, unpack);
+*/
+	IDirect3DSurface9_UnlockRect(pImage);
+
+	_gldDrawPixels(ctx, TRUE, x, y, width, height, pImage);
+
+	IDirect3DSurface9_Release(pImage);
+}
+
+//---------------------------------------------------------------------------
+// Texture functions
+//---------------------------------------------------------------------------
+
+void _gldAllocateTexture(
+	GLcontext *ctx,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	IDirect3DTexture9	*pTex;
+	D3DFORMAT			d3dFormat;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirect3DTexture9*)tObj->DriverData;
+	if (pTex) {
+		// Decide whether we can keep existing D3D texture
+		// by examining top-level surface.
+		D3DSURFACE_DESC d3dsd;
+		_GLD_DX9_TEX(GetLevelDesc(pTex, 0, &d3dsd));
+		// Release existing texture if not compatible
+		if ((d3dsd.Width == texImage->Width) || 
+			(d3dsd.Height == texImage->Height))
+		{
+			return; // Keep the existing texture
+		}
+		tObj->DriverData = NULL;
+		_GLD_DX9_TEX(Release(pTex));
+	}
+
+	d3dFormat = _gldGLFormatToD3DFormat(texImage->IntFormat);
+	D3DXCreateTexture(
+		gld->pDev,
+		texImage->Width,
+		texImage->Height,
+		// TODO: Re-evaluate mipmapping
+		(glb.bUseMipmaps) ? D3DX_DEFAULT : 1,
+		0,				// Usage
+		d3dFormat,
+		D3DPOOL_MANAGED,
+		&pTex);
+	tObj->DriverData = pTex;
+}
+
+//---------------------------------------------------------------------------
+
+const struct gl_texture_format* gld_ChooseTextureFormat_DX9(
+	GLcontext *ctx,
+	GLint internalFormat,
+	GLenum srcFormat,
+	GLenum srcType)
+{
+	// [Based on mesa_choose_tex_format()]
+	//
+	// We will choose only texture formats that are supported
+	// by Direct3D. If the hardware doesn't support a particular
+	// texture format, then the D3DX texture calls that we use
+	// will automatically use a HW supported format.
+	//
+	// The most critical aim is to reduce copying; if we can use
+	// texture-image data directly then it will be a big performance assist.
+	//
+
+	switch (internalFormat) {
+	case GL_INTENSITY:
+	case GL_INTENSITY4:
+	case GL_INTENSITY8:
+	case GL_INTENSITY12:
+	case GL_INTENSITY16:
+		return &_mesa_texformat_l8; // D3DFMT_L8
+	case 1:
+	case GL_LUMINANCE:
+	case GL_LUMINANCE4:
+	case GL_LUMINANCE8:
+	case GL_LUMINANCE12:
+	case GL_LUMINANCE16:
+		return &_mesa_texformat_l8; // D3DFMT_L8
+	case GL_ALPHA:
+	case GL_ALPHA4:
+	case GL_ALPHA8:
+	case GL_ALPHA12:
+	case GL_ALPHA16:
+		return &_mesa_texformat_a8; // D3DFMT_A8
+	case GL_COLOR_INDEX:
+	case GL_COLOR_INDEX1_EXT:
+	case GL_COLOR_INDEX2_EXT:
+	case GL_COLOR_INDEX4_EXT:
+	case GL_COLOR_INDEX8_EXT:
+	case GL_COLOR_INDEX12_EXT:
+	case GL_COLOR_INDEX16_EXT:
+		return &_mesa_texformat_rgb565; // D3DFMT_R5G6B5
+		// Mesa will convert this for us later...
+		//      return &_mesa_texformat_ci8; // D3DFMT_R5G6B5
+	case 2:
+	case GL_LUMINANCE_ALPHA:
+	case GL_LUMINANCE4_ALPHA4:
+	case GL_LUMINANCE6_ALPHA2:
+	case GL_LUMINANCE8_ALPHA8:
+	case GL_LUMINANCE12_ALPHA4:
+	case GL_LUMINANCE12_ALPHA12:
+	case GL_LUMINANCE16_ALPHA16:
+		return &_mesa_texformat_al88; // D3DFMT_A8L8
+	case GL_R3_G3_B2:
+		return &_mesa_texformat_rgb332; // D3DFMT_R3G3B2
+	case GL_RGB4:
+	case GL_RGBA4:
+	case GL_RGBA2:
+		return &_mesa_texformat_argb4444; // D3DFMT_A4R4G4B4
+	case 3:
+	case GL_RGB:
+	case GL_RGB5:
+	case GL_RGB8:
+	case GL_RGB10:
+	case GL_RGB12:
+	case GL_RGB16:
+		return &_mesa_texformat_rgb565;
+	case 4:
+	case GL_RGBA:
+	case GL_RGBA8:
+	case GL_RGB10_A2:
+	case GL_RGBA12:
+	case GL_RGBA16:
+		return &_mesa_texformat_argb8888;
+	case GL_RGB5_A1:
+		return &_mesa_texformat_argb1555;
+	default:
+		_mesa_problem(NULL, "unexpected format in fxDDChooseTextureFormat");
+		return NULL;
+   }
+}
+
+//---------------------------------------------------------------------------
+
+/*
+// Safer(?), slower version.
+void gld_TexImage2D_DX9(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint internalFormat,
+	GLint width,
+	GLint height,
+	GLint border,
+	GLenum format,
+	GLenum type,
+	const GLvoid *pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	IDirect3DTexture9	*pTex;
+	IDirect3DSurface9	*pSurface;
+	RECT				rcSrcRect;
+	HRESULT				hr;
+	GLint				texelBytes = 4;
+	GLvoid				*tempImage;
+
+	if (!tObj || !texImage)
+		return;
+
+	if (level == 0) {
+		_gldAllocateTexture(ctx, tObj, texImage);
+	}
+
+	pTex = (IDirect3DTexture9*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= IDirect3DTexture9_GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = IDirect3DTexture9_GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	tempImage = MALLOC(width * height * texelBytes);
+	if (!tempImage) {
+		_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+		IDirect3DSurface9_Release(pSurface);
+		return;
+	}
+	// unpack image, apply transfer ops and store in tempImage
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		&_mesa_texformat_argb8888, // dest format
+		tempImage,
+		width, height, 1, 0, 0, 0,
+		width * texelBytes,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	SetRect(&rcSrcRect, 0, 0, width, height);
+	D3DXLoadSurfaceFromMemory(
+		pSurface,
+		NULL,
+		NULL,
+		tempImage,
+		D3DFMT_A8R8G8B8,
+		width * texelBytes,
+		NULL,
+		&rcSrcRect,
+		D3DX_FILTER_NONE,
+		0);
+
+	FREE(tempImage);
+	IDirect3DSurface9_Release(pSurface);
+}
+*/
+
+//---------------------------------------------------------------------------
+
+// Faster, more efficient version.
+// Copies subimage straight to dest texture
+void gld_TexImage2D_DX9(
+	GLcontext *ctx,
+	GLenum target,
+	GLint level,
+	GLint internalFormat,
+	GLint width,
+	GLint height,
+	GLint border,
+	GLenum format,
+	GLenum type,
+	const GLvoid *pixels,
+	const struct gl_pixelstore_attrib *packing,
+	struct gl_texture_object *tObj,
+	struct gl_texture_image *texImage)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	IDirect3DTexture9	*pTex;
+	IDirect3DSurface9	*pSurface;
+	HRESULT				hr;
+	D3DLOCKED_RECT		d3dLockedRect;
+	D3DSURFACE_DESC		d3dsd;
+
+	if (!tObj || !texImage)
+		return;
+
+	// GLQUAKE FIX
+	// Test for input alpha data with non-alpha internalformat
+	if (((internalFormat==3) || (internalFormat==GL_RGB)) && (format==GL_RGBA)) {
+		// Input format has alpha, but a non-alpha format has been requested.
+		texImage->IntFormat = GL_RGBA;
+		internalFormat = GL_RGBA;
+	}
+
+	if (level == 0) {
+		_gldAllocateTexture(ctx, tObj, texImage);
+	}
+
+	pTex = (IDirect3DTexture9*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= IDirect3DTexture9_GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = IDirect3DTexture9_GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	IDirect3DSurface9_GetDesc(pSurface, &d3dsd);
+
+	// Lock all of surface 
+	hr = IDirect3DSurface9_LockRect(pSurface, &d3dLockedRect, NULL, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface9_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(
+		ctx,
+		2,
+		texImage->Format,
+		_gldMesaFormatForD3DFormat(d3dsd.Format),
+		d3dLockedRect.pBits,
+		width, height, 1, 0, 0, 0,
+		d3dLockedRect.Pitch,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	IDirect3DSurface9_UnlockRect(pSurface);
+	IDirect3DSurface9_Release(pSurface);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_TexImage1D_DX9(GLcontext *ctx, GLenum target, GLint level,
+                       GLint internalFormat,
+                       GLint width, GLint border,
+                       GLenum format, GLenum type, const GLvoid *pixels,
+                       const struct gl_pixelstore_attrib *packing,
+                       struct gl_texture_object *texObj,
+                       struct gl_texture_image *texImage )
+{
+	// A 1D texture is a 2D texture with a height of zero
+	gld_TexImage2D_DX9(ctx, target, level, internalFormat, width, 1, border, format, type, pixels, packing, texObj, texImage);
+}
+
+//---------------------------------------------------------------------------
+
+/*
+void gld_TexSubImage2D( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLint yoffset,
+                          GLsizei width, GLsizei height,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *tObj,
+                          struct gl_texture_image *texImage )
+{
+	GLD_GET_CONTEXT
+	IDirect3DTexture9	*pTex;
+	IDirect3DSurface9	*pSurface;
+	D3DFORMAT			d3dFormat;
+	HRESULT				hr;
+	GLint				texelBytes = 4;
+	GLvoid				*tempImage;
+	RECT				rcSrcRect;
+	RECT				rcDstRect;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirect3DTexture9*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= _GLD_DX9_TEX(GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = _GLD_DX9_TEX(GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	d3dFormat = _gldGLFormatToD3DFormat(texImage->Format);
+	tempImage = MALLOC(width * height * texelBytes);
+	if (!tempImage) {
+		_mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D");
+		IDirect3DSurface9_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store in tempImage
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		&_mesa_texformat_argb8888, // dest format
+		tempImage,
+		width, height, 1, 0, 0, 0,
+		width * texelBytes,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+	// Source rectangle is whole of input image
+	SetRect(&rcSrcRect, 0, 0, width, height);
+
+	// Dest rectangle must be offset to dest image
+	SetRect(&rcDstRect, 0, 0, width, height);
+	OffsetRect(&rcDstRect, xoffset, yoffset);
+
+	D3DXLoadSurfaceFromMemory(
+		pSurface,
+		NULL,
+		&rcDstRect,
+		tempImage,
+		D3DFMT_A8R8G8B8,
+		width * texelBytes,
+		NULL,
+		&rcSrcRect,
+		D3DX_FILTER_NONE,
+		0);
+
+	FREE(tempImage);
+	IDirect3DSurface9_Release(pSurface);
+}
+*/
+
+//---------------------------------------------------------------------------
+
+// Faster, more efficient version.
+// Copies subimage straight to dest texture
+void gld_TexSubImage2D_DX9( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLint yoffset,
+                          GLsizei width, GLsizei height,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *tObj,
+                          struct gl_texture_image *texImage )
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	IDirect3DTexture9	*pTex;
+	IDirect3DSurface9	*pSurface;
+	HRESULT				hr;
+	RECT				rcDstRect;
+	D3DLOCKED_RECT		d3dLockedRect;
+	D3DSURFACE_DESC		d3dsd;
+
+	if (!tObj || !texImage)
+		return;
+
+	pTex = (IDirect3DTexture9*)tObj->DriverData;
+	if (!pTex)
+		return; // Texture has not been created
+	if (level >= IDirect3DTexture9_GetLevelCount(pTex))
+		return; // Level does not exist
+	hr = IDirect3DTexture9_GetSurfaceLevel(pTex, level, &pSurface);
+	if (FAILED(hr))
+		return; // Surface level doesn't exist (or just a plain error)
+
+	IDirect3DSurface9_GetDesc(pSurface, &d3dsd);
+
+	// Dest rectangle must be offset to dest image
+	SetRect(&rcDstRect, 0, 0, width, height);
+	OffsetRect(&rcDstRect, xoffset, yoffset);
+
+	// Lock sub-rect of surface 
+	hr = IDirect3DSurface9_LockRect(pSurface, &d3dLockedRect, &rcDstRect, 0);
+	if (FAILED(hr)) {
+		IDirect3DSurface9_Release(pSurface);
+		return;
+	}
+
+	// unpack image, apply transfer ops and store directly in texture
+	texImage->TexFormat->StoreImage(ctx, 2, texImage->Format,
+		_gldMesaFormatForD3DFormat(d3dsd.Format),
+		d3dLockedRect.pBits,
+		width, height, 1,
+		0, 0, 0, // NOTE: d3dLockedRect.pBits is already offset!!!
+		d3dLockedRect.Pitch,
+		0, // dstImageStride
+		format, type, pixels, packing);
+
+
+	IDirect3DSurface9_UnlockRect(pSurface);
+	IDirect3DSurface9_Release(pSurface);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_TexSubImage1D_DX9( GLcontext *ctx, GLenum target, GLint level,
+                          GLint xoffset, GLsizei width,
+                          GLenum format, GLenum type,
+                          const GLvoid *pixels,
+                          const struct gl_pixelstore_attrib *packing,
+                          struct gl_texture_object *texObj,
+                          struct gl_texture_image *texImage )
+{
+	gld_TexSubImage2D_DX9(ctx, target, level, xoffset, 0, width, 1, format, type, pixels, packing, texObj, texImage);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_DeleteTexture_DX9(
+	GLcontext *ctx,
+	struct gl_texture_object *tObj)
+{
+	GLD_context *gld = (GLD_context*)(ctx->DriverCtx);
+
+	if (tObj) {
+		IDirect3DTexture9 *pTex = (IDirect3DTexture9*)tObj->DriverData;
+		if (pTex) {
+/*			// Make sure texture is not bound to a stage before releasing it
+			for (int i=0; i<MAX_TEXTURE_UNITS; i++) {
+				if (gld->CurrentTexture[i] == pTex) {
+					gld->pDev->SetTexture(i, NULL);
+					gld->CurrentTexture[i] = NULL;
+				}
+			}*/
+			_GLD_DX9_TEX(Release(pTex));
+			tObj->DriverData = NULL;
+		}
+	}
+}
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetColorOps(
+	const GLD_driver_dx9 *gld,
+	GLuint unit,
+	DWORD ColorArg1,
+	D3DTEXTUREOP ColorOp,
+	DWORD ColorArg2)
+{
+	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG1, ColorArg1));
+	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLOROP, ColorOp));
+	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG2, ColorArg2));
+}
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetAlphaOps(
+	const GLD_driver_dx9 *gld,
+	GLuint unit,
+	DWORD AlphaArg1,
+	D3DTEXTUREOP AlphaOp,
+	DWORD AlphaArg2)
+{
+	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAARG1, AlphaArg1));
+	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAOP, AlphaOp));
+	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ALPHAARG2, AlphaArg2));
+}
+
+//---------------------------------------------------------------------------
+
+void gldUpdateTextureUnit(
+	GLcontext *ctx,
+	GLuint unit,
+	BOOL bPassThrough)
+{
+	GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	D3DTEXTUREFILTERTYPE	minfilter;
+	D3DTEXTUREFILTERTYPE	mipfilter;
+	GLenum					BaseFormat;
+	DWORD					dwColorArg0;
+	int						iTexEnv = 0;
+	GLD_texenv				*pTexenv;
+
+	// NOTE: If bPassThrough is FALSE then texture stage can be
+	// disabled otherwise it must pass-through it's current fragment.
+
+	const struct gl_texture_unit *pUnit = &ctx->Texture.Unit[unit];
+	const struct gl_texture_object *tObj = pUnit->_Current;
+
+	IDirect3DTexture9 *pTex = NULL;
+	if (tObj) {
+		pTex = (IDirect3DTexture9*)tObj->DriverData;
+	}
+
+	// Enable texturing if unit is enabled and a valid D3D texture exists
+	// Mesa 5: TEXTUREn_x altered to TEXTURE_nD_BIT
+	//if (pTex && (pUnit->Enabled & (TEXTURE0_1D | TEXTURE0_2D))) {
+	if (pTex && (pUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT))) {
+		// Enable texturing
+		_GLD_DX9_DEV(SetTexture(gld->pDev, unit, pTex));
+	} else {
+		// Disable texturing, then return
+		_GLD_DX9_DEV(SetTexture(gld->pDev, unit, NULL));
+		if (bPassThrough) {
+			_gldSetColorOps(gld, unit, D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_DIFFUSE);
+			_gldSetAlphaOps(gld, unit, D3DTA_TEXTURE, D3DTOP_SELECTARG2, D3DTA_DIFFUSE);
+		} else {
+			_gldSetColorOps(gld, unit, D3DTA_TEXTURE, D3DTOP_DISABLE, D3DTA_DIFFUSE);
+			_gldSetAlphaOps(gld, unit, D3DTA_TEXTURE, D3DTOP_DISABLE, D3DTA_DIFFUSE);
+		}
+		return;
+	}
+
+	// Texture parameters
+	_gldConvertMinFilter(tObj->MinFilter, &minfilter, &mipfilter);
+//	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MINFILTER, minfilter));
+//	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MIPFILTER, mipfilter));
+//	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_MAGFILTER, _gldConvertMagFilter(tObj->MagFilter)));
+//	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ADDRESSU, _gldConvertWrap(tObj->WrapS)));
+//	_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_ADDRESSV, _gldConvertWrap(tObj->WrapT)));
+	_GLD_DX9_DEV(SetSamplerState(gld->pDev, unit, D3DSAMP_MINFILTER, minfilter));
+	_GLD_DX9_DEV(SetSamplerState(gld->pDev, unit, D3DSAMP_MIPFILTER, mipfilter));
+	_GLD_DX9_DEV(SetSamplerState(gld->pDev, unit, D3DSAMP_MAGFILTER, _gldConvertMagFilter(tObj->MagFilter)));
+	_GLD_DX9_DEV(SetSamplerState(gld->pDev, unit, D3DSAMP_ADDRESSU, _gldConvertWrap(tObj->WrapS)));
+	_GLD_DX9_DEV(SetSamplerState(gld->pDev, unit, D3DSAMP_ADDRESSV, _gldConvertWrap(tObj->WrapT)));
+
+	// Texture priority
+	_GLD_DX9_TEX(SetPriority(pTex, (DWORD)(tObj->Priority*65535.0f)));
+
+	// Texture environment
+	// TODO: Examine input texture for alpha and use specific alpha/non-alpha ops.
+	//       See Page 355 of the Red Book.
+	BaseFormat = _gldDecodeBaseFormat(pTex);
+
+	switch (BaseFormat) {
+	case GL_RGB:
+		iTexEnv = 0;
+		break;
+	case GL_RGBA:
+		iTexEnv = 1;
+		break;
+	case GL_ALPHA:
+		iTexEnv = 2;
+		break;
+	}
+
+	switch (pUnit->EnvMode) {
+	case GL_DECAL:
+		iTexEnv += 0;
+		break;
+	case GL_REPLACE:
+		iTexEnv += 3;
+		break;
+	case GL_MODULATE:
+		iTexEnv += 6;
+		break;
+	case GL_BLEND:
+		// Set blend colour
+		dwColorArg0 = D3DCOLOR_COLORVALUE(pUnit->EnvColor[0], pUnit->EnvColor[1], pUnit->EnvColor[2], pUnit->EnvColor[3]);
+		_GLD_DX9_DEV(SetTextureStageState(gld->pDev, unit, D3DTSS_COLORARG0, dwColorArg0));
+		iTexEnv += 9;
+		break;
+	case GL_ADD:
+		iTexEnv += 12;
+		break;
+	}
+	pTexenv = (GLD_texenv*)&gldTexEnv[iTexEnv];
+	_gldSetColorOps(gld, unit, pTexenv->ColorArg1, pTexenv->ColorOp, pTexenv->ColorArg2);
+	_gldSetAlphaOps(gld, unit, pTexenv->AlphaArg1, pTexenv->AlphaOp, pTexenv->AlphaArg2);
+}
+
+//---------------------------------------------------------------------------
+
+void gld_NEW_TEXTURE_DX9(
+	GLcontext *ctx)
+{
+	// TODO: Support for three (ATI Radeon) or more (nVidia GeForce3) texture units
+
+	BOOL bUnit0Enabled;
+	BOOL bUnit1Enabled;
+
+	if (!ctx)
+		return; // Sanity check
+
+	if (ctx->Const.MaxTextureUnits == 1) {
+		gldUpdateTextureUnit(ctx, 0, TRUE);
+		return;
+	}
+
+	//
+	// NOTE: THE FOLLOWING RELATES TO TWO TEXTURE UNITS, AND TWO ONLY!!
+	//
+
+	// Mesa 5: Texture Units altered
+	//bUnit0Enabled = (ctx->Texture._ReallyEnabled & (TEXTURE0_1D | TEXTURE0_2D)) ? TRUE : FALSE;
+	//bUnit1Enabled = (ctx->Texture._ReallyEnabled & (TEXTURE1_1D | TEXTURE1_2D)) ? TRUE : FALSE;
+	bUnit0Enabled = (ctx->Texture.Unit[0]._ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) ? TRUE : FALSE;
+	bUnit1Enabled = (ctx->Texture.Unit[1]._ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) ? TRUE : FALSE;
+
+	// If Unit0 is disabled and Unit1 is enabled then we must pass-though
+	gldUpdateTextureUnit(ctx, 0, (!bUnit0Enabled && bUnit1Enabled) ? TRUE : FALSE);
+	// We can always disable the last texture unit
+	gldUpdateTextureUnit(ctx, 1, FALSE);
+
+#ifdef _DEBUG
+#if 0
+	{
+		// Find out whether device supports current renderstates
+		GLD_context			*gldCtx	= GLD_GET_CONTEXT(ctx);
+		GLD_driver_dx9		*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+//		GLD_context			*gld	= GLD_GET_CONTEXT(ctx);
+
+		DWORD dwPasses;
+		_GLD_DX9_DEV(ValidateDevice(gld->pDev, &dwPasses));
+//		if (FAILED(hr)) {
+//			gldLogError(GLDLOG_ERROR, "ValidateDevice failed", hr);
+//		}
+		if (dwPasses != 1) {
+			gldLogMessage(GLDLOG_ERROR, "ValidateDevice: Can't do in one pass\n");
+		}
+	}
+#endif
+#endif
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_vb_d3d_render_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_vb_d3d_render_dx9.c
new file mode 100644
index 0000000000..91a68b3f2d
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_vb_d3d_render_dx9.c
@@ -0,0 +1,263 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect fastpath pipeline stage
+*
+****************************************************************************/
+
+//---------------------------------------------------------------------------
+
+//#include "../GLDirect.h"
+//#include "../gld_log.h"
+//#include "gld_dx8.h"
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx9.h"
+
+//---------------------------------------------------------------------------
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+// #include "mem.h"
+#include "mtypes.h"
+//#include "mmath.h"
+
+#include "math/m_matrix.h"
+#include "math/m_xform.h"
+
+#include "tnl/t_pipeline.h"
+
+//---------------------------------------------------------------------------
+
+__inline void _gldSetVertexShaderConstants(
+	GLcontext *ctx,
+	GLD_driver_dx9 *gld)
+{
+	D3DXMATRIX mat, matView, matProj;
+	GLfloat		*pM;
+
+	// Mesa 5: Altered to a Stack
+	//pM = ctx->ModelView.m;
+	pM = ctx->ModelviewMatrixStack.Top->m;
+	matView._11 = pM[0];
+	matView._12 = pM[1];
+	matView._13 = pM[2];
+	matView._14 = pM[3];
+	matView._21 = pM[4];
+	matView._22 = pM[5];
+	matView._23 = pM[6];
+	matView._24 = pM[7];
+	matView._31 = pM[8];
+	matView._32 = pM[9];
+	matView._33 = pM[10];
+	matView._34 = pM[11];
+	matView._41 = pM[12];
+	matView._42 = pM[13];
+	matView._43 = pM[14];
+	matView._44 = pM[15];
+
+	// Mesa 5: Altered to a Stack
+	//pM = ctx->ProjectionMatrix.m;
+	pM = ctx->ProjectionMatrixStack.Top->m;
+	matProj._11 = pM[0];
+	matProj._12 = pM[1];
+	matProj._13 = pM[2];
+	matProj._14 = pM[3];
+	matProj._21 = pM[4];
+	matProj._22 = pM[5];
+	matProj._23 = pM[6];
+	matProj._24 = pM[7];
+	matProj._31 = pM[8];
+	matProj._32 = pM[9];
+	matProj._33 = pM[10];
+	matProj._34 = pM[11];
+	matProj._41 = pM[12];
+	matProj._42 = pM[13];
+	matProj._43 = pM[14];
+	matProj._44 = pM[15];
+
+	D3DXMatrixMultiply( &mat, &matView, &matProj );
+	D3DXMatrixTranspose( &mat, &mat );
+
+	_GLD_DX9_DEV(SetVertexShaderConstantF(gld->pDev, 0, (float*)&mat, 4));
+}
+
+//---------------------------------------------------------------------------
+
+static GLboolean gld_d3d_render_stage_run(
+	GLcontext *ctx,
+	struct tnl_pipeline_stage *stage)
+{
+	GLD_context				*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9			*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+
+	TNLcontext				*tnl;
+	struct vertex_buffer	*VB;
+	tnl_render_func				*tab;
+	GLint					pass;
+	GLD_pb_dx9				*gldPB = &gld->PB3d;
+/*
+	static int count = 0;
+	count++;
+	if (count != 2)
+		return GL_FALSE;
+*/
+	// The "check" function should disable this stage,
+	// but we'll test gld->bUseMesaTnL anyway.
+	if (gld->bUseMesaTnL) {
+		// Do nothing in this stage, but continue pipeline
+		return GL_TRUE;
+	}
+	
+	tnl = TNL_CONTEXT(ctx);
+	VB = &tnl->vb;
+	pass = 0;
+
+   tnl->Driver.Render.Start( ctx );
+
+#if 0
+   // For debugging: Useful to see if an app passes colour data in
+   // an unusual format.
+   switch (VB->AttribPtr[_TNL_ATTRIB_COLOR0]->Type) {
+   case GL_FLOAT:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: GL_FLOAT\n");
+	   break;
+   case GL_UNSIGNED_BYTE:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: GL_UNSIGNED_BYTE\n");
+	   break;
+   default:
+	   ddlogMessage(GLDLOG_SYSTEM, "ColorPtr: *?*\n");
+	   break;
+   }
+#endif
+
+   tnl->Driver.Render.Points		= gld_Points3D_DX9;
+   if (ctx->_TriangleCaps & DD_FLATSHADE) {
+	   tnl->Driver.Render.Line		= gld_Line3DFlat_DX9;
+	   tnl->Driver.Render.Triangle	= gld_Triangle3DFlat_DX9;
+	   tnl->Driver.Render.Quad		= gld_Quad3DFlat_DX9;
+   } else {
+	   tnl->Driver.Render.Line		= gld_Line3DSmooth_DX9;
+	   tnl->Driver.Render.Triangle	= gld_Triangle3DSmooth_DX9;
+	   tnl->Driver.Render.Quad		= gld_Quad3DSmooth_DX9;
+   }
+
+	_GLD_DX9_VB(Lock(gldPB->pVB, 0, 0, &gldPB->pPoints, D3DLOCK_DISCARD));
+	gldPB->nPoints = gldPB->nLines = gldPB->nTriangles = 0;
+	// Allocate primitive pointers
+	// gldPB->pPoints is always first
+	gldPB->pLines		= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstLine);
+	gldPB->pTriangles	= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstTriangle);
+	
+	ASSERT(tnl->Driver.Render.BuildVertices);
+	ASSERT(tnl->Driver.Render.PrimitiveNotify);
+	ASSERT(tnl->Driver.Render.Points);
+	ASSERT(tnl->Driver.Render.Line);
+	ASSERT(tnl->Driver.Render.Triangle);
+	ASSERT(tnl->Driver.Render.Quad);
+	ASSERT(tnl->Driver.Render.ResetLineStipple);
+	ASSERT(tnl->Driver.Render.Interp);
+	ASSERT(tnl->Driver.Render.CopyPV);
+	ASSERT(tnl->Driver.Render.ClippedLine);
+	ASSERT(tnl->Driver.Render.ClippedPolygon);
+	ASSERT(tnl->Driver.Render.Finish);
+
+	tab = (VB->Elts ? tnl->Driver.Render.PrimTabElts : tnl->Driver.Render.PrimTabVerts);
+	
+	do {
+		GLuint i, length, flags = 0;
+		for (i = 0 ; !(flags & PRIM_END) ; i += length)
+		{
+			flags = VB->Primitive[i].mode;
+			length= VB->Primitive[i].count;
+			ASSERT(length || (flags & PRIM_END));
+			ASSERT((flags & PRIM_MODE_MASK) <= GL_POLYGON+1);
+			if (length)
+				tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+		}
+	} while (tnl->Driver.Render.Multipass &&
+		tnl->Driver.Render.Multipass( ctx, ++pass ));
+	
+	_GLD_DX9_VB(Unlock(gldPB->pVB));
+
+	_GLD_DX9_DEV(SetStreamSource(gld->pDev, 0, gldPB->pVB, 0, gldPB->dwStride));
+
+	_GLD_DX9_DEV(SetTransform(gld->pDev, D3DTS_PROJECTION, &gld->matProjection));
+	_GLD_DX9_DEV(SetTransform(gld->pDev, D3DTS_WORLD, &gld->matModelView));
+
+	if (gldPB->nPoints) {
+		_GLD_DX9_DEV(DrawPrimitive(gld->pDev, D3DPT_POINTLIST, 0, gldPB->nPoints));
+		gldPB->nPoints = 0;
+	}
+
+	if (gldPB->nLines) {
+		_GLD_DX9_DEV(DrawPrimitive(gld->pDev, D3DPT_LINELIST, gldPB->iFirstLine, gldPB->nLines));
+		gldPB->nLines = 0;
+	}
+
+	if (gldPB->nTriangles) {
+		_GLD_DX9_DEV(DrawPrimitive(gld->pDev, D3DPT_TRIANGLELIST, gldPB->iFirstTriangle, gldPB->nTriangles));
+		gldPB->nTriangles = 0;
+	}
+
+	return GL_FALSE;		/* finished the pipe */
+}
+
+//---------------------------------------------------------------------------
+
+static void gld_d3d_render_stage_check(
+	GLcontext *ctx,
+	struct tnl_pipeline_stage *stage)
+{
+	GLD_context				*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9			*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+	// Is this thread safe?
+	stage->active = (gld->bUseMesaTnL) ? GL_FALSE : GL_TRUE;
+	return;
+}
+
+
+//---------------------------------------------------------------------------
+
+const struct tnl_pipeline_stage _gld_d3d_render_stage =
+{
+   "gld_d3d_render_stage",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   gld_d3d_render_stage_run			/* run */
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_vb_mesa_render_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_vb_mesa_render_dx9.c
new file mode 100644
index 0000000000..64acab2d2a
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_vb_mesa_render_dx9.c
@@ -0,0 +1,443 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keithw@valinux.com>
+ */
+
+
+/*
+ * Render whole vertex buffers, including projection of vertices from
+ * clip space and clipping of primitives.
+ *
+ * This file makes calls to project vertices and to the point, line
+ * and triangle rasterizers via the function pointers:
+ *
+ *    context->Driver.Render.*
+ *
+ */
+
+
+//---------------------------------------------------------------------------
+
+#include "dglcontext.h"
+#include "ddlog.h"
+#include "gld_dx9.h"
+
+//---------------------------------------------------------------------------
+
+#include "glheader.h"
+#include "context.h"
+#include "macros.h"
+#include "mtypes.h"
+//#include "mmath.h"
+
+#include "math/m_matrix.h"
+#include "math/m_xform.h"
+
+#include "tnl/t_pipeline.h"
+
+/**********************************************************************/
+/*                        Clip single primitives                      */
+/**********************************************************************/
+
+
+#if defined(USE_IEEE)
+#define NEGATIVE(x) (GET_FLOAT_BITS(x) & (1<<31))
+//#define DIFFERENT_SIGNS(x,y) ((GET_FLOAT_BITS(x) ^ GET_FLOAT_BITS(y)) & (1<<31))
+#else
+#define NEGATIVE(x) (x < 0)
+//#define DIFFERENT_SIGNS(x,y) (x * y <= 0 && x - y != 0)
+/* Could just use (x*y<0) except for the flatshading requirements.
+ * Maybe there's a better way?
+ */
+#endif
+
+
+#define W(i) coord[i][3]
+#define Z(i) coord[i][2]
+#define Y(i) coord[i][1]
+#define X(i) coord[i][0]
+#define SIZE 4
+#define TAG(x) x##_4
+#include "tnl/t_vb_cliptmp.h"
+
+
+
+/**********************************************************************/
+/*              Clip and render whole begin/end objects               */
+/**********************************************************************/
+
+#define NEED_EDGEFLAG_SETUP (ctx->_TriangleCaps & DD_TRI_UNFILLED)
+#define EDGEFLAG_GET(idx) VB->EdgeFlag[idx]
+#define EDGEFLAG_SET(idx, val) VB->EdgeFlag[idx] = val
+
+
+/* Vertices, with the possibility of clipping.
+ */
+#define RENDER_POINTS( start, count ) \
+   tnl->Driver.Render.Points( ctx, start, count )
+
+#define RENDER_LINE( v1, v2 )			\
+do {						\
+   GLubyte c1 = mask[v1], c2 = mask[v2];	\
+   GLubyte ormask = c1|c2;			\
+   if (!ormask)					\
+      LineFunc( ctx, v1, v2 );			\
+   else if (!(c1 & c2 & 0x3f))			\
+      clip_line_4( ctx, v1, v2, ormask );	\
+} while (0)
+
+#define RENDER_TRI( v1, v2, v3 )			\
+do {							\
+   GLubyte c1 = mask[v1], c2 = mask[v2], c3 = mask[v3];	\
+   GLubyte ormask = c1|c2|c3;				\
+   if (!ormask)						\
+      TriangleFunc( ctx, v1, v2, v3 );			\
+   else if (!(c1 & c2 & c3 & 0x3f)) 			\
+      clip_tri_4( ctx, v1, v2, v3, ormask );    	\
+} while (0)
+
+#define RENDER_QUAD( v1, v2, v3, v4 )			\
+do {							\
+   GLubyte c1 = mask[v1], c2 = mask[v2];		\
+   GLubyte c3 = mask[v3], c4 = mask[v4];		\
+   GLubyte ormask = c1|c2|c3|c4;			\
+   if (!ormask)						\
+      QuadFunc( ctx, v1, v2, v3, v4 );			\
+   else if (!(c1 & c2 & c3 & c4 & 0x3f)) 		\
+      clip_quad_4( ctx, v1, v2, v3, v4, ormask );	\
+} while (0)
+
+
+#define LOCAL_VARS						\
+   TNLcontext *tnl = TNL_CONTEXT(ctx);				\
+   struct vertex_buffer *VB = &tnl->vb;				\
+   const GLuint * const elt = VB->Elts;				\
+   const GLubyte *mask = VB->ClipMask;				\
+   const GLuint sz = VB->ClipPtr->size;				\
+   const tnl_line_func LineFunc = tnl->Driver.Render.Line;		\
+   const tnl_triangle_func TriangleFunc = tnl->Driver.Render.Triangle;	\
+   const tnl_quad_func QuadFunc = tnl->Driver.Render.Quad;		\
+   const GLboolean stipple = ctx->Line.StippleFlag;		\
+   (void) (LineFunc && TriangleFunc && QuadFunc);		\
+   (void) elt; (void) mask; (void) sz; (void) stipple;
+
+#define TAG(x) clip_##x##_verts
+#define INIT(x) tnl->Driver.Render.PrimitiveNotify( ctx, x )
+#define RESET_STIPPLE if (stipple) tnl->Driver.Render.ResetLineStipple( ctx )
+#define PRESERVE_VB_DEFS
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+/* Elts, with the possibility of clipping.
+ */
+#undef ELT
+#undef TAG
+#define ELT(x) elt[x]
+#define TAG(x) clip_##x##_elts
+#include "tnl/t_vb_rendertmp.h"
+
+/* TODO: do this for all primitives, verts and elts:
+ */
+static void clip_elt_triangles( GLcontext *ctx,
+				GLuint start,
+				GLuint count,
+				GLuint flags )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl_render_func render_tris = tnl->Driver.Render.PrimTabElts[GL_TRIANGLES];
+   struct vertex_buffer *VB = &tnl->vb;
+   const GLuint * const elt = VB->Elts;
+   GLubyte *mask = VB->ClipMask;
+   GLuint last = count-2;
+   GLuint j;
+   (void) flags;
+
+   tnl->Driver.Render.PrimitiveNotify( ctx, GL_TRIANGLES );
+
+   for (j=start; j < last; j+=3 ) {
+      GLubyte c1 = mask[elt[j]];
+      GLubyte c2 = mask[elt[j+1]];
+      GLubyte c3 = mask[elt[j+2]];
+      GLubyte ormask = c1|c2|c3;
+      if (ormask) {
+	 if (start < j)
+	    render_tris( ctx, start, j, 0 );
+	 if (!(c1&c2&c3&0x3f))
+	    clip_tri_4( ctx, elt[j], elt[j+1], elt[j+2], ormask );
+	 start = j+3;
+      }
+   }
+
+   if (start < j)
+      render_tris( ctx, start, j, 0 );
+}
+
+/**********************************************************************/
+/*                  Render whole begin/end objects                    */
+/**********************************************************************/
+
+#define NEED_EDGEFLAG_SETUP (ctx->_TriangleCaps & DD_TRI_UNFILLED)
+#define EDGEFLAG_GET(idx) VB->EdgeFlag[idx]
+#define EDGEFLAG_SET(idx, val) VB->EdgeFlag[idx] = val
+
+
+/* Vertices, no clipping.
+ */
+#define RENDER_POINTS( start, count ) \
+   tnl->Driver.Render.Points( ctx, start, count )
+
+#define RENDER_LINE( v1, v2 ) \
+   LineFunc( ctx, v1, v2 )
+
+#define RENDER_TRI( v1, v2, v3 ) \
+   TriangleFunc( ctx, v1, v2, v3 )
+
+#define RENDER_QUAD( v1, v2, v3, v4 ) \
+   QuadFunc( ctx, v1, v2, v3, v4 )
+
+#define TAG(x) _gld_tnl_##x##_verts
+
+#define LOCAL_VARS						\
+   TNLcontext *tnl = TNL_CONTEXT(ctx);				\
+   struct vertex_buffer *VB = &tnl->vb;				\
+   const GLuint * const elt = VB->Elts;				\
+   const tnl_line_func LineFunc = tnl->Driver.Render.Line;		\
+   const tnl_triangle_func TriangleFunc = tnl->Driver.Render.Triangle;	\
+   const tnl_quad_func QuadFunc = tnl->Driver.Render.Quad;		\
+   (void) (LineFunc && TriangleFunc && QuadFunc);		\
+   (void) elt;
+
+#define RESET_STIPPLE tnl->Driver.Render.ResetLineStipple( ctx )
+#define INIT(x) tnl->Driver.Render.PrimitiveNotify( ctx, x )
+#define RENDER_TAB_QUALIFIER
+#define PRESERVE_VB_DEFS
+#include "tnl/t_vb_rendertmp.h"
+
+
+/* Elts, no clipping.
+ */
+#undef ELT
+#define TAG(x) _gld_tnl_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+/**********************************************************************/
+/*              Helper functions for drivers                  */
+/**********************************************************************/
+/*
+void _tnl_RenderClippedPolygon( GLcontext *ctx, const GLuint *elts, GLuint n )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   struct vertex_buffer *VB = &tnl->vb;
+   GLuint *tmp = VB->Elts;
+
+   VB->Elts = (GLuint *)elts;
+   tnl->Driver.Render.PrimTabElts[GL_POLYGON]( ctx, 0, n, PRIM_BEGIN|PRIM_END );
+   VB->Elts = tmp;
+}
+
+void _tnl_RenderClippedLine( GLcontext *ctx, GLuint ii, GLuint jj )
+{
+   TNLcontext *tnl = TNL_CONTEXT(ctx);
+   tnl->Driver.Render.Line( ctx, ii, jj );
+}
+*/
+
+
+/**********************************************************************/
+/*              Clip and render whole vertex buffers                  */
+/**********************************************************************/
+
+tnl_points_func _gldSetupPoints[4] = {
+	gld_Points2D_DX9,
+	gld_Points2D_DX9,
+	gld_Points2D_DX9,
+	gld_Points2D_DX9
+};
+tnl_line_func _gldSetupLine[4] = {
+	gld_Line2DFlat_DX9,
+	gld_Line2DSmooth_DX9,
+	gld_Line2DFlat_DX9,
+	gld_Line2DSmooth_DX9,
+};
+tnl_triangle_func _gldSetupTriangle[4] = {
+	gld_Triangle2DFlat_DX9,
+	gld_Triangle2DSmooth_DX9,
+	gld_Triangle2DFlatExtras_DX9,
+	gld_Triangle2DSmoothExtras_DX9
+};
+tnl_quad_func _gldSetupQuad[4] = {
+	gld_Quad2DFlat_DX9,
+	gld_Quad2DSmooth_DX9,
+	gld_Quad2DFlatExtras_DX9,
+	gld_Quad2DSmoothExtras_DX9
+};
+
+//---------------------------------------------------------------------------
+
+static GLboolean _gld_mesa_render_stage_run(
+	GLcontext *ctx,
+	struct tnl_pipeline_stage *stage)
+{
+	GLD_context				*gldCtx	= GLD_GET_CONTEXT(ctx);
+	GLD_driver_dx9			*gld	= GLD_GET_DX9_DRIVER(gldCtx);
+		
+	TNLcontext				*tnl = TNL_CONTEXT(ctx);
+	struct vertex_buffer	*VB = &tnl->vb;
+	tnl_render_func				*tab;
+	GLint					pass = 0;
+	GLD_pb_dx9				*gldPB;
+
+	/* Allow the drivers to lock before projected verts are built so
+    * that window coordinates are guarenteed not to change before
+    * rendering.
+    */
+	ASSERT(tnl->Driver.Render.Start);
+	
+	tnl->Driver.Render.Start( ctx );
+	
+	// NOTE: Setting D3DRS_SOFTWAREVERTEXPROCESSING for a mixed-mode device resets
+	//       stream, indices and shader to default values of NULL or 0.
+/*	if ((ctx->_TriangleCaps & DD_TRI_LIGHT_TWOSIDE) &&
+		gld->VStwosidelight.hShader &&
+		!ctx->Fog.Enabled)
+	{
+		IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, !gld->VStwosidelight.bHardware);
+		_GLD_DX9_DEV(SetVertexShader(gld->pDev, gld->VStwosidelight.hShader));
+		gldPB = &gld->PBtwosidelight;
+		tnl->Driver.Render.Points	= gld_Points2DTwoside_DX9;
+		if (ctx->_TriangleCaps & DD_FLATSHADE) {
+			tnl->Driver.Render.Line		= gld_Line2DFlatTwoside_DX9;
+			tnl->Driver.Render.Triangle	= gld_Triangle2DFlatTwoside_DX9;
+			tnl->Driver.Render.Quad		= gld_Quad2DFlatTwoside_DX9;
+		} else {
+			tnl->Driver.Render.Line		= gld_Line2DSmoothTwoside_DX9;
+			tnl->Driver.Render.Triangle	= gld_Triangle2DSmoothTwoside_DX9;
+			tnl->Driver.Render.Quad		= gld_Quad2DSmoothTwoside_DX9;
+		}
+	} else {*/
+//		IDirect3DDevice8_SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, TRUE);
+		IDirect3DDevice9_SetSoftwareVertexProcessing(gld->pDev, TRUE);
+		gldPB = &gld->PB2d;
+		_GLD_DX9_DEV(SetVertexShader(gld->pDev, NULL));
+		_GLD_DX9_DEV(SetFVF(gld->pDev, gldPB->dwFVF));
+		tnl->Driver.Render.Points	= _gldSetupPoints[gld->iSetupFunc];
+		tnl->Driver.Render.Line		= _gldSetupLine[gld->iSetupFunc];
+		tnl->Driver.Render.Triangle	= _gldSetupTriangle[gld->iSetupFunc];
+		tnl->Driver.Render.Quad		= _gldSetupQuad[gld->iSetupFunc];
+//	}
+
+	_GLD_DX9_VB(Lock(gldPB->pVB, 0, 0, &gldPB->pPoints, D3DLOCK_DISCARD));
+	gldPB->nPoints = gldPB->nLines = gldPB->nTriangles = 0;
+	// Allocate primitive pointers
+	// gldPB->pPoints is always first
+	gldPB->pLines		= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstLine);
+	gldPB->pTriangles	= gldPB->pPoints + (gldPB->dwStride * gldPB->iFirstTriangle);
+
+	ASSERT(tnl->Driver.Render.BuildVertices);
+	ASSERT(tnl->Driver.Render.PrimitiveNotify);
+	ASSERT(tnl->Driver.Render.Points);
+	ASSERT(tnl->Driver.Render.Line);
+	ASSERT(tnl->Driver.Render.Triangle);
+	ASSERT(tnl->Driver.Render.Quad);
+	ASSERT(tnl->Driver.Render.ResetLineStipple);
+	ASSERT(tnl->Driver.Render.Interp);
+	ASSERT(tnl->Driver.Render.CopyPV);
+	ASSERT(tnl->Driver.Render.ClippedLine);
+	ASSERT(tnl->Driver.Render.ClippedPolygon);
+	ASSERT(tnl->Driver.Render.Finish);
+	
+	tnl->Driver.Render.BuildVertices( ctx, 0, VB->Count, ~0 );
+	
+	if (VB->ClipOrMask) {
+		tab = VB->Elts ? clip_render_tab_elts : clip_render_tab_verts;
+		clip_render_tab_elts[GL_TRIANGLES] = clip_elt_triangles;
+	}
+	else {
+		tab = (VB->Elts ? 
+			tnl->Driver.Render.PrimTabElts : 
+		tnl->Driver.Render.PrimTabVerts);
+	}
+	
+	do {
+		GLuint i, length, flags = 0;
+		for (i = 0 ; !(flags & PRIM_END) ; i += length) {
+			flags = VB->Primitive[i].mode;
+			length= VB->Primitive[i].count;
+			ASSERT(length || (flags & PRIM_END));
+			ASSERT((flags & PRIM_MODE_MASK) <= GL_POLYGON+1);
+			if (length)
+				tab[flags & PRIM_MODE_MASK]( ctx, i, i + length, flags );
+		}
+	} while (tnl->Driver.Render.Multipass &&
+		tnl->Driver.Render.Multipass( ctx, ++pass ));
+	
+	
+//	tnl->Driver.Render.Finish( ctx );
+	
+	_GLD_DX9_VB(Unlock(gldPB->pVB));
+
+	_GLD_DX9_DEV(SetStreamSource(gld->pDev, 0, gldPB->pVB, 0, gldPB->dwStride));
+
+	if (gldPB->nPoints) {
+		_GLD_DX9_DEV(DrawPrimitive(gld->pDev, D3DPT_POINTLIST, 0, gldPB->nPoints));
+		gldPB->nPoints = 0;
+	}
+
+	if (gldPB->nLines) {
+		_GLD_DX9_DEV(DrawPrimitive(gld->pDev, D3DPT_LINELIST, gldPB->iFirstLine, gldPB->nLines));
+		gldPB->nLines = 0;
+	}
+
+	if (gldPB->nTriangles) {
+		_GLD_DX9_DEV(DrawPrimitive(gld->pDev, D3DPT_TRIANGLELIST, gldPB->iFirstTriangle, gldPB->nTriangles));
+		gldPB->nTriangles = 0;
+	}
+
+	return GL_FALSE;		/* finished the pipe */
+}
+
+
+/**********************************************************************/
+/*                          Render pipeline stage                     */
+/**********************************************************************/
+
+
+
+
+const struct tnl_pipeline_stage _gld_mesa_render_stage =
+{
+   "gld_mesa_render_stage",
+   NULL,
+   NULL,
+   NULL,
+   NULL,
+   _gld_mesa_render_stage_run	/* run */
+};
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_wgl_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_wgl_dx9.c
new file mode 100644
index 0000000000..a03b865bb4
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/dx9/gld_wgl_dx9.c
@@ -0,0 +1,1346 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  GLDirect Direct3D 8.x WGL (WindowsGL)
+*
+****************************************************************************/
+
+#include "dglcontext.h"
+#include "gld_driver.h"
+#include "gld_dxerr9.h"
+#include "gld_dx9.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+// Copied from dglcontect.c
+#define GLDERR_NONE     0
+#define GLDERR_MEM      1
+#define GLDERR_DDRAW    2
+#define GLDERR_D3D      3
+#define GLDERR_BPP      4
+#define GLDERR_DDS      5
+// This external var keeps track of any error
+extern int nContextError;
+
+#define DDLOG_CRITICAL_OR_WARN	DDLOG_CRITICAL
+
+extern void _gld_mesa_warning(GLcontext *, char *);
+extern void _gld_mesa_fatal(GLcontext *, char *);
+
+//---------------------------------------------------------------------------
+
+static char	szColorDepthWarning[] =
+"GLDirect does not support the current desktop\n\
+color depth.\n\n\
+You may need to change the display resolution to\n\
+16 bits per pixel or higher color depth using\n\
+the Windows Display Settings control panel\n\
+before running this OpenGL application.\n";
+
+// The only depth-stencil formats currently supported by Direct3D
+// Surface Format	Depth	Stencil		Total Bits
+// D3DFMT_D32		32		-			32
+// D3DFMT_D15S1		15		1			16
+// D3DFMT_D24S8		24		8			32
+// D3DFMT_D16		16		-			16
+// D3DFMT_D24X8		24		-			32
+// D3DFMT_D24X4S4	24		4			32
+
+// This pixel format will be used as a template when compiling the list
+// of pixel formats supported by the hardware. Many fields will be
+// filled in at runtime.
+// PFD flag defaults are upgraded to match ChoosePixelFormat() -- DaveM
+static DGL_pixelFormat pfTemplateHW =
+{
+    {
+	sizeof(PIXELFORMATDESCRIPTOR),	// Size of the data structure
+		1,							// Structure version - should be 1
+									// Flags:
+		PFD_DRAW_TO_WINDOW |		// The buffer can draw to a window or device surface.
+		PFD_DRAW_TO_BITMAP |		// The buffer can draw to a bitmap. (DaveM)
+		PFD_SUPPORT_GDI |			// The buffer supports GDI drawing. (DaveM)
+		PFD_SUPPORT_OPENGL |		// The buffer supports OpenGL drawing.
+		PFD_DOUBLEBUFFER |			// The buffer is double-buffered.
+		0,							// Placeholder for easy commenting of above flags
+		PFD_TYPE_RGBA,				// Pixel type RGBA.
+		16,							// Total colour bitplanes (excluding alpha bitplanes)
+		5, 0,						// Red bits, shift
+		5, 0,						// Green bits, shift
+		5, 0,						// Blue bits, shift
+		0, 0,						// Alpha bits, shift (destination alpha)
+		0,							// Accumulator bits (total)
+		0, 0, 0, 0,					// Accumulator bits: Red, Green, Blue, Alpha
+		0,							// Depth bits
+		0,							// Stencil bits
+		0,							// Number of auxiliary buffers
+		0,							// Layer type
+		0,							// Specifies the number of overlay and underlay planes.
+		0,							// Layer mask
+		0,							// Specifies the transparent color or index of an underlay plane.
+		0							// Damage mask
+	},
+	D3DFMT_UNKNOWN,	// No depth/stencil buffer
+};
+
+//---------------------------------------------------------------------------
+// Vertex Shaders
+//---------------------------------------------------------------------------
+/*
+// Vertex Shader Declaration
+static DWORD dwTwoSidedLightingDecl[] =
+{
+	D3DVSD_STREAM(0),
+	D3DVSD_REG(0,  D3DVSDT_FLOAT3), 	 // XYZ position
+	D3DVSD_REG(1,  D3DVSDT_FLOAT3), 	 // XYZ normal
+	D3DVSD_REG(2,  D3DVSDT_D3DCOLOR),	 // Diffuse color
+	D3DVSD_REG(3,  D3DVSDT_D3DCOLOR),	 // Specular color
+	D3DVSD_REG(4,  D3DVSDT_FLOAT2), 	 // 2D texture unit 0
+	D3DVSD_REG(5,  D3DVSDT_FLOAT2), 	 // 2D texture unit 1
+	D3DVSD_END()
+};
+
+// Vertex Shader for two-sided lighting
+static char *szTwoSidedLightingVS =
+// This is a test shader!
+"vs.1.0\n"
+"m4x4 oPos,v0,c0\n"
+"mov oD0,v2\n"
+"mov oD1,v3\n"
+"mov oT0,v4\n"
+"mov oT1,v5\n"
+;
+*/
+//---------------------------------------------------------------------------
+//---------------------------------------------------------------------------
+
+typedef struct {
+	HINSTANCE			hD3D9DLL;			// Handle to d3d9.dll
+	FNDIRECT3DCREATE9	fnDirect3DCreate9;	// Direct3DCreate9 function prototype
+	BOOL				bDirect3D;			// Persistant Direct3D9 exists
+	BOOL				bDirect3DDevice;	// Persistant Direct3DDevice9 exists
+	IDirect3D9			*pD3D;				// Persistant Direct3D9
+	IDirect3DDevice9	*pDev;				// Persistant Direct3DDevice9
+} GLD_dx9_globals;
+
+// These are "global" to all DX9 contexts. KeithH
+static GLD_dx9_globals dx9Globals;
+
+//---------------------------------------------------------------------------
+//---------------------------------------------------------------------------
+
+BOOL gldGetDXErrorString_DX(
+	HRESULT hr,
+	char *buf,
+	int nBufSize)
+{
+	//
+	// Return a string describing the input HRESULT error code
+	//
+
+	const char *pStr = DXGetErrorString9(hr);
+
+	if (pStr == NULL)
+		return FALSE;
+
+	if (strlen(pStr) > nBufSize)
+		strncpy(buf, pStr, nBufSize);
+	else
+		strcpy(buf, pStr);
+
+//	D3DXGetErrorString(hr, buf, nBufSize);
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+static D3DMULTISAMPLE_TYPE _gldGetDeviceMultiSampleType(
+	IDirect3D9 *pD3D9,
+	D3DFORMAT SurfaceFormat,
+	D3DDEVTYPE d3dDevType,
+	BOOL Windowed)
+{
+	int			i;
+	HRESULT		hr;
+
+	if (glb.dwMultisample == GLDS_MULTISAMPLE_NONE)
+		return D3DMULTISAMPLE_NONE;
+
+	if (glb.dwMultisample == GLDS_MULTISAMPLE_FASTEST) {
+		// Find fastest multisample
+		for (i=2; i<17; i++) {
+			hr = IDirect3D9_CheckDeviceMultiSampleType(
+					pD3D9,
+					glb.dwAdapter,
+					d3dDevType,
+					SurfaceFormat,
+					Windowed,
+					(D3DMULTISAMPLE_TYPE)i,
+					NULL);
+			if (SUCCEEDED(hr)) {
+				return (D3DMULTISAMPLE_TYPE)i;
+			}
+		}
+	} else {
+		// Find nicest multisample
+		for (i=16; i>1; i--) {
+			hr = IDirect3D9_CheckDeviceMultiSampleType(
+					pD3D9,
+					glb.dwAdapter,
+					d3dDevType,
+					SurfaceFormat,
+					Windowed,
+					(D3DMULTISAMPLE_TYPE)i,
+					NULL);
+			if (SUCCEEDED(hr)) {
+				return (D3DMULTISAMPLE_TYPE)i;
+			}
+		}
+	}
+
+	// Nothing found - return default
+	return D3DMULTISAMPLE_NONE;
+}
+
+//---------------------------------------------------------------------------
+
+void _gldDestroyPrimitiveBuffer(
+	GLD_pb_dx9 *gldVB)
+{
+	SAFE_RELEASE(gldVB->pVB);
+
+	// Sanity check...
+	gldVB->nLines = gldVB->nPoints = gldVB->nTriangles = 0;
+}
+
+//---------------------------------------------------------------------------
+
+HRESULT _gldCreatePrimitiveBuffer(
+	GLcontext *ctx,
+	GLD_driver_dx9 *lpCtx,
+	GLD_pb_dx9 *gldVB)
+{
+	HRESULT		hResult;
+	char		*szCreateVertexBufferFailed = "CreateVertexBuffer failed";
+	DWORD		dwMaxVertices;	// Max number of vertices in vertex buffer
+	DWORD		dwVBSize;		// Total size of vertex buffer
+
+	// If CVA (Compiled Vertex Array) is used by an OpenGL app, then we
+	// will need enough vertices to cater for Mesa::Const.MaxArrayLockSize.
+	// We'll use IMM_SIZE if it's larger (which it should not be).
+	dwMaxVertices = MAX_ARRAY_LOCK_SIZE;
+
+	// Now calculate how many vertices to allow for in total
+	// 1 per point, 2 per line, 6 per quad = 9
+	dwVBSize = dwMaxVertices * 9 * gldVB->dwStride;
+
+	hResult = IDirect3DDevice9_CreateVertexBuffer(
+		lpCtx->pDev,
+		dwVBSize,
+		gldVB->dwUsage,
+		gldVB->dwFVF,
+		gldVB->dwPool,
+		&gldVB->pVB,
+		NULL);
+	if (FAILED(hResult)) {
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, szCreateVertexBufferFailed);
+		return hResult;
+	}
+
+	gldVB->nLines = gldVB->nPoints = gldVB->nTriangles = 0;
+	gldVB->pPoints	= gldVB->pLines = gldVB->pTriangles = NULL;
+	gldVB->iFirstLine = dwMaxVertices; // Index of first line in VB
+	gldVB->iFirstTriangle = dwMaxVertices*3; // Index of first triangle in VB
+
+	return S_OK;
+}
+
+//---------------------------------------------------------------------------
+// Function: _gldCreateVertexShaders
+// Create DX9 Vertex Shaders.
+//---------------------------------------------------------------------------
+/*
+void _gldCreateVertexShaders(
+	GLD_driver_dx9 *gld)
+{
+	DWORD			dwFlags;
+	LPD3DXBUFFER	pVSOpcodeBuffer; // Vertex Shader opcode buffer
+	HRESULT			hr;
+
+#ifdef _DEBUG
+	dwFlags = D3DXASM_DEBUG;
+#else
+	dwFlags = 0; // D3DXASM_SKIPVALIDATION;
+#endif
+
+	ddlogMessage(DDLOG_INFO, "Creating shaders...\n");
+
+	// Init the shader handle
+	gld->VStwosidelight.hShader = 0;
+
+	if (gld->d3dCaps8.MaxStreams == 0) {
+		// Lame DX8 driver doesn't support streams
+		// Not fatal, as defaults will be used
+		ddlogMessage(DDLOG_WARN, "Driver doesn't support Vertex Shaders (MaxStreams==0)\n");
+		return;
+	}
+
+	// ** THIS DISABLES VERTEX SHADER SUPPORT **
+//	return;
+	// ** THIS DISABLES VERTEX SHADER SUPPORT **
+
+	//
+	// Two-sided lighting
+	//
+
+#if 0
+	//
+	// DEBUGGING: Load shader from a text file
+	//
+	{
+	LPD3DXBUFFER	pVSErrorBuffer; // Vertex Shader error buffer
+	hr = D3DXAssembleShaderFromFile(
+			"twoside.vsh",
+			dwFlags,
+			NULL, // No constants
+			&pVSOpcodeBuffer,
+			&pVSErrorBuffer);
+	if (pVSErrorBuffer && pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer))
+		ddlogMessage(DDLOG_INFO, pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer));
+	SAFE_RELEASE(pVSErrorBuffer);
+	}
+#else
+	{
+	LPD3DXBUFFER	pVSErrorBuffer; // Vertex Shader error buffer
+	// Assemble ascii shader text into shader opcodes
+	hr = D3DXAssembleShader(
+			szTwoSidedLightingVS,
+			strlen(szTwoSidedLightingVS),
+			dwFlags,
+			NULL, // No constants
+			&pVSOpcodeBuffer,
+			&pVSErrorBuffer);
+	if (pVSErrorBuffer && pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer))
+		ddlogMessage(DDLOG_INFO, pVSErrorBuffer->lpVtbl->GetBufferPointer(pVSErrorBuffer));
+	SAFE_RELEASE(pVSErrorBuffer);
+	}
+#endif
+	if (FAILED(hr)) {
+		ddlogError(DDLOG_WARN, "AssembleShader failed", hr);
+		SAFE_RELEASE(pVSOpcodeBuffer);
+		return;
+	}
+
+// This is for debugging. Remove to enable vertex shaders in HW
+#define _GLD_FORCE_SW_VS 0
+
+	if (_GLD_FORCE_SW_VS) {
+		// _GLD_FORCE_SW_VS should be disabled for Final Release
+		ddlogMessage(DDLOG_SYSTEM, "[Forcing shaders in SW]\n");
+	}
+
+	// Try and create shader in hardware.
+	// NOTE: The D3D Ref device appears to succeed when trying to
+	//       create the device in hardware, but later complains
+	//       when trying to set it with SetVertexShader(). Go figure.
+	if (_GLD_FORCE_SW_VS || glb.dwDriver == GLDS_DRIVER_REF) {
+		// Don't try and create a hardware shader with the Ref device
+		hr = E_FAIL; // COM error/fail result
+	} else {
+		gld->VStwosidelight.bHardware = TRUE;
+		hr = IDirect3DDevice8_CreateVertexShader(
+			gld->pDev,
+			dwTwoSidedLightingDecl,
+			pVSOpcodeBuffer->lpVtbl->GetBufferPointer(pVSOpcodeBuffer),
+			&gld->VStwosidelight.hShader,
+			0);
+	}
+	if (FAILED(hr)) {
+		ddlogMessage(DDLOG_INFO, "... HW failed, trying SW...\n");
+		// Failed. Try and create shader for software processing
+		hr = IDirect3DDevice8_CreateVertexShader(
+			gld->pDev,
+			dwTwoSidedLightingDecl,
+			pVSOpcodeBuffer->lpVtbl->GetBufferPointer(pVSOpcodeBuffer),
+			&gld->VStwosidelight.hShader,
+			D3DUSAGE_SOFTWAREPROCESSING);
+		if (FAILED(hr)) {
+			gld->VStwosidelight.hShader = 0; // Sanity check
+			ddlogError(DDLOG_WARN, "CreateVertexShader failed", hr);
+			return;
+		}
+		// Succeeded, but for software processing
+		gld->VStwosidelight.bHardware = FALSE;
+	}
+
+	SAFE_RELEASE(pVSOpcodeBuffer);
+
+	ddlogMessage(DDLOG_INFO, "... OK\n");
+}
+
+//---------------------------------------------------------------------------
+
+void _gldDestroyVertexShaders(
+	GLD_driver_dx9 *gld)
+{
+	if (gld->VStwosidelight.hShader) {
+		IDirect3DDevice8_DeleteVertexShader(gld->pDev, gld->VStwosidelight.hShader);
+		gld->VStwosidelight.hShader = 0;
+	}
+}
+*/
+//---------------------------------------------------------------------------
+
+BOOL gldCreateDrawable_DX(
+	DGL_ctx *ctx,
+//	BOOL bDefaultDriver,
+	BOOL bDirectDrawPersistant,
+	BOOL bPersistantBuffers)
+{
+	//
+	// bDirectDrawPersistant:	applies to IDirect3D9
+	// bPersistantBuffers:		applies to IDirect3DDevice9
+	//
+
+	HRESULT					hResult;
+	GLD_driver_dx9			*lpCtx = NULL;
+	D3DDEVTYPE				d3dDevType;
+	D3DPRESENT_PARAMETERS	d3dpp;
+	D3DDISPLAYMODE			d3ddm;
+	DWORD					dwBehaviourFlags;
+	D3DADAPTER_IDENTIFIER9	d3dIdent;
+
+	// Error if context is NULL.
+	if (ctx == NULL)
+		return FALSE;
+
+	if (ctx->glPriv) {
+		lpCtx = ctx->glPriv;
+		// Release any existing interfaces
+		SAFE_RELEASE(lpCtx->pDev);
+		SAFE_RELEASE(lpCtx->pD3D);
+	} else {
+		lpCtx = (GLD_driver_dx9*)malloc(sizeof(GLD_driver_dx9));
+		ZeroMemory(lpCtx, sizeof(lpCtx));
+	}
+
+	d3dDevType = (glb.dwDriver == GLDS_DRIVER_HAL) ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF;
+	// TODO: Check this
+//	if (bDefaultDriver)
+//		d3dDevType = D3DDEVTYPE_REF;
+
+	// Use persistant interface if needed
+	if (bDirectDrawPersistant && dx9Globals.bDirect3D) {
+		lpCtx->pD3D = dx9Globals.pD3D;
+		IDirect3D9_AddRef(lpCtx->pD3D);
+		goto SkipDirectDrawCreate;
+	}
+
+	// Create Direct3D9 object
+	lpCtx->pD3D = dx9Globals.fnDirect3DCreate9(D3D_SDK_VERSION);
+	if (lpCtx->pD3D == NULL) {
+		MessageBox(NULL, "Unable to initialize Direct3D9", "GLDirect", MB_OK);
+		ddlogMessage(DDLOG_CRITICAL_OR_WARN, "Unable to create Direct3D9 interface");
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Cache Direct3D interface for subsequent GLRCs
+	if (bDirectDrawPersistant && !dx9Globals.bDirect3D) {
+		dx9Globals.pD3D = lpCtx->pD3D;
+		IDirect3D9_AddRef(dx9Globals.pD3D);
+		dx9Globals.bDirect3D = TRUE;
+	}
+SkipDirectDrawCreate:
+
+	// Get the display mode so we can make a compatible backbuffer
+	hResult = IDirect3D9_GetAdapterDisplayMode(lpCtx->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hResult)) {
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Get device caps
+	hResult = IDirect3D9_GetDeviceCaps(lpCtx->pD3D, glb.dwAdapter, d3dDevType, &lpCtx->d3dCaps9);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "IDirect3D9_GetDeviceCaps failed", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	// Check for hardware transform & lighting
+	lpCtx->bHasHWTnL = lpCtx->d3dCaps9.DevCaps & D3DDEVCAPS_HWTRANSFORMANDLIGHT ? TRUE : FALSE;
+
+/*
+	//
+	// GONE FOR DX9?
+	//
+	// If this flag is present then we can't default to Mesa
+	// SW rendering between BeginScene() and EndScene().
+	if (lpCtx->d3dCaps9.Caps2 & D3DCAPS2_NO2DDURING3DSCENE) {
+		ddlogMessage(DDLOG_WARN,
+			"Warning          : No 2D allowed during 3D scene.\n");
+	}
+*/
+
+	//
+	//	Create the Direct3D context
+	//
+
+	// Re-use original IDirect3DDevice if persistant buffers exist.
+	// Note that we test for persistant IDirect3D9 as well
+	// bDirectDrawPersistant == persistant IDirect3D9 (DirectDraw9 does not exist)
+	if (bDirectDrawPersistant && bPersistantBuffers && dx9Globals.pD3D && dx9Globals.pDev) {
+		lpCtx->pDev = dx9Globals.pDev;
+		IDirect3DDevice9_AddRef(dx9Globals.pDev);
+		goto skip_direct3ddevice_create;
+	}
+
+	// Clear the presentation parameters (sets all members to zero)
+	ZeroMemory(&d3dpp, sizeof(d3dpp));
+
+	// Recommended by MS; needed for MultiSample.
+	// Be careful if altering this for FullScreenBlit
+	d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
+
+	d3dpp.BackBufferFormat	= d3ddm.Format;
+	d3dpp.BackBufferCount	= 2; //1;
+	d3dpp.MultiSampleType	= _gldGetDeviceMultiSampleType(lpCtx->pD3D, d3ddm.Format, d3dDevType, !ctx->bFullscreen);
+	d3dpp.AutoDepthStencilFormat	= ctx->lpPF->dwDriverData;
+	d3dpp.EnableAutoDepthStencil	= (d3dpp.AutoDepthStencilFormat == D3DFMT_UNKNOWN) ? FALSE : TRUE;
+
+	if (ctx->bFullscreen) {
+		ddlogWarnOption(FALSE); // Don't popup any messages in fullscreen
+		d3dpp.Windowed							= FALSE;
+		d3dpp.BackBufferWidth					= d3ddm.Width;
+		d3dpp.BackBufferHeight					= d3ddm.Height;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= D3DPRESENT_RATE_DEFAULT;
+
+		// Support for vertical retrace synchronisation.
+		// Set default presentation interval in case caps bits are missing
+		d3dpp.PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+		if (glb.bWaitForRetrace) {
+			if (lpCtx->d3dCaps9.PresentationIntervals & D3DPRESENT_INTERVAL_ONE)
+				d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_ONE;
+		} else {
+			if (lpCtx->d3dCaps9.PresentationIntervals & D3DPRESENT_INTERVAL_IMMEDIATE)
+				d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
+		}
+	} else {
+		ddlogWarnOption(glb.bMessageBoxWarnings); // OK to popup messages
+		d3dpp.Windowed							= TRUE;
+		d3dpp.BackBufferWidth					= ctx->dwWidth;
+		d3dpp.BackBufferHeight					= ctx->dwHeight;
+		d3dpp.hDeviceWindow						= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz		= 0;
+		// PresentationInterval Windowed mode is optional now in DX9 (DaveM)
+		d3dpp.PresentationInterval	= D3DPRESENT_INTERVAL_DEFAULT;
+		if (glb.bWaitForRetrace) {
+				d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_ONE;
+		} else {
+				d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
+		}
+	}
+
+	// Decide if we can use hardware TnL
+	dwBehaviourFlags = (lpCtx->bHasHWTnL) ?
+		D3DCREATE_MIXED_VERTEXPROCESSING : D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+	// Add flag to tell D3D to be thread-safe
+	if (glb.bMultiThreaded)
+		dwBehaviourFlags |= D3DCREATE_MULTITHREADED;
+	// Add flag to tell D3D to be FPU-safe
+	if (!glb.bFastFPU)
+		dwBehaviourFlags |= D3DCREATE_FPU_PRESERVE;
+	hResult = IDirect3D9_CreateDevice(lpCtx->pD3D,
+								glb.dwAdapter,
+								d3dDevType,
+								ctx->hWnd,
+								dwBehaviourFlags,
+								&d3dpp,
+								&lpCtx->pDev);
+    if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "IDirect3D9_CreateDevice failed", hResult);
+        nContextError = GLDERR_D3D;
+		goto return_with_error;
+	}
+
+	if (bDirectDrawPersistant && bPersistantBuffers && dx9Globals.pD3D) {
+		dx9Globals.pDev = lpCtx->pDev;
+		dx9Globals.bDirect3DDevice = TRUE;
+	}
+
+	// Dump some useful stats
+	hResult = IDirect3D9_GetAdapterIdentifier(
+		lpCtx->pD3D,
+		glb.dwAdapter,
+		0, // No WHQL detection (avoid few seconds delay)
+		&d3dIdent);
+	if (SUCCEEDED(hResult)) {
+		ddlogPrintf(DDLOG_INFO, "[Driver Description: %s]", &d3dIdent.Description);
+		ddlogPrintf(DDLOG_INFO, "[Driver file: %s %d.%d.%02d.%d]",
+			d3dIdent.Driver,
+			HIWORD(d3dIdent.DriverVersion.HighPart),
+			LOWORD(d3dIdent.DriverVersion.HighPart),
+			HIWORD(d3dIdent.DriverVersion.LowPart),
+			LOWORD(d3dIdent.DriverVersion.LowPart));
+		ddlogPrintf(DDLOG_INFO, "[VendorId: 0x%X, DeviceId: 0x%X, SubSysId: 0x%X, Revision: 0x%X]",
+			d3dIdent.VendorId, d3dIdent.DeviceId, d3dIdent.SubSysId, d3dIdent.Revision);
+	}
+
+	// Test to see if IHV driver exposes Scissor Test (new for DX9)
+	lpCtx->bCanScissor = lpCtx->d3dCaps9.RasterCaps & D3DPRASTERCAPS_SCISSORTEST;
+	ddlogPrintf(DDLOG_INFO, "Can Scissor: %s", lpCtx->bCanScissor ? "Yes" : "No");
+
+	// Init projection matrix for D3D TnL
+	D3DXMatrixIdentity(&lpCtx->matProjection);
+	lpCtx->matModelView = lpCtx->matProjection;
+//		gld->bUseMesaProjection = TRUE;
+
+skip_direct3ddevice_create:
+
+	// Create buffers to hold primitives
+	lpCtx->PB2d.dwFVF		= GLD_FVF_2D_VERTEX;
+	lpCtx->PB2d.dwPool		= D3DPOOL_SYSTEMMEM;
+	lpCtx->PB2d.dwStride	= sizeof(GLD_2D_VERTEX);
+	lpCtx->PB2d.dwUsage		= D3DUSAGE_DONOTCLIP |
+								D3DUSAGE_DYNAMIC |
+								D3DUSAGE_SOFTWAREPROCESSING |
+								D3DUSAGE_WRITEONLY;
+	hResult = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PB2d);
+	if (FAILED(hResult))
+		goto return_with_error;
+
+	lpCtx->PB3d.dwFVF		= GLD_FVF_3D_VERTEX;
+	lpCtx->PB3d.dwPool		= D3DPOOL_DEFAULT;
+	lpCtx->PB3d.dwStride	= sizeof(GLD_3D_VERTEX);
+	lpCtx->PB3d.dwUsage		= D3DUSAGE_DYNAMIC |
+//DaveM								D3DUSAGE_SOFTWAREPROCESSING |
+								D3DUSAGE_WRITEONLY;
+	hResult = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PB3d);
+	if (FAILED(hResult))
+		goto return_with_error;
+
+/*	// NOTE: A FVF code of zero indicates a non-FVF vertex buffer (for vertex shaders)
+	lpCtx->PBtwosidelight.dwFVF		= 0; //GLD_FVF_TWOSIDED_VERTEX;
+	lpCtx->PBtwosidelight.dwPool	= D3DPOOL_DEFAULT;
+	lpCtx->PBtwosidelight.dwStride	= sizeof(GLD_TWOSIDED_VERTEX);
+	lpCtx->PBtwosidelight.dwUsage	= D3DUSAGE_DONOTCLIP |
+								D3DUSAGE_DYNAMIC |
+								D3DUSAGE_SOFTWAREPROCESSING |
+								D3DUSAGE_WRITEONLY;
+	hResult = _gldCreatePrimitiveBuffer(ctx->glCtx, lpCtx, &lpCtx->PBtwosidelight);
+	if (FAILED(hResult))
+		goto return_with_error;*/
+
+	// Now try and create the DX9 Vertex Shaders
+//	_gldCreateVertexShaders(lpCtx);
+
+	// Zero the pipeline usage counters
+	lpCtx->PipelineUsage.qwMesa.QuadPart = 
+//	lpCtx->PipelineUsage.dwD3D2SVS.QuadPart =
+	lpCtx->PipelineUsage.qwD3DFVF.QuadPart = 0;
+
+	// Assign drawable to GL private
+	ctx->glPriv = lpCtx;
+	return TRUE;
+
+return_with_error:
+	// Clean up and bail
+
+//	_gldDestroyVertexShaders(lpCtx);
+
+//	_gldDestroyPrimitiveBuffer(&lpCtx->PBtwosidelight);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB3d);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB2d);
+
+	SAFE_RELEASE(lpCtx->pDev);
+	SAFE_RELEASE(lpCtx->pD3D);
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldResizeDrawable_DX(
+	DGL_ctx *ctx,
+	BOOL bDefaultDriver,
+	BOOL bPersistantInterface,
+	BOOL bPersistantBuffers)
+{
+	GLD_driver_dx9			*gld = NULL;
+	D3DDEVTYPE				d3dDevType;
+	D3DPRESENT_PARAMETERS	d3dpp;
+	D3DDISPLAYMODE			d3ddm;
+	HRESULT					hResult;
+
+	// Error if context is NULL.
+	if (ctx == NULL)
+		return FALSE;
+
+	gld = ctx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (ctx->bSceneStarted) {
+		IDirect3DDevice9_EndScene(gld->pDev);
+		ctx->bSceneStarted = FALSE;
+	}
+
+	d3dDevType = (glb.dwDriver == GLDS_DRIVER_HAL) ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF;
+	if (!bDefaultDriver)
+		d3dDevType = D3DDEVTYPE_REF; // Force Direct3D Reference Rasterise (software)
+
+	// Get the display mode so we can make a compatible backbuffer
+	hResult = IDirect3D9_GetAdapterDisplayMode(gld->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hResult)) {
+        nContextError = GLDERR_D3D;
+//		goto return_with_error;
+		return FALSE;
+	}
+
+	// Destroy DX9 Vertex Shaders before Reset()
+//	_gldDestroyVertexShaders(gld);
+
+	// Release POOL_DEFAULT objects before Reset()
+	if (gld->PB2d.dwPool == D3DPOOL_DEFAULT)
+		_gldDestroyPrimitiveBuffer(&gld->PB2d);
+	if (gld->PB3d.dwPool == D3DPOOL_DEFAULT)
+		_gldDestroyPrimitiveBuffer(&gld->PB3d);
+//	if (gld->PBtwosidelight.dwPool == D3DPOOL_DEFAULT)
+//		_gldDestroyPrimitiveBuffer(&gld->PBtwosidelight);
+
+	// Clear the presentation parameters (sets all members to zero)
+	ZeroMemory(&d3dpp, sizeof(d3dpp));
+
+	// Recommended by MS; needed for MultiSample.
+	// Be careful if altering this for FullScreenBlit
+	d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
+
+	d3dpp.BackBufferFormat	= d3ddm.Format;
+	d3dpp.BackBufferCount	= 1;
+	d3dpp.MultiSampleType	= _gldGetDeviceMultiSampleType(gld->pD3D, d3ddm.Format, d3dDevType, !ctx->bFullscreen);
+	d3dpp.AutoDepthStencilFormat	= ctx->lpPF->dwDriverData;
+	d3dpp.EnableAutoDepthStencil	= (d3dpp.AutoDepthStencilFormat == D3DFMT_UNKNOWN) ? FALSE : TRUE;
+
+	// TODO: Sync to refresh
+
+	if (ctx->bFullscreen) {
+		ddlogWarnOption(FALSE); // Don't popup any messages in fullscreen 
+		d3dpp.Windowed						= FALSE;
+		d3dpp.BackBufferWidth				= d3ddm.Width;
+		d3dpp.BackBufferHeight				= d3ddm.Height;
+		d3dpp.hDeviceWindow					= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz	= D3DPRESENT_RATE_DEFAULT;
+		d3dpp.PresentationInterval			= D3DPRESENT_INTERVAL_DEFAULT;
+		// Get better benchmark results? KeithH
+//		d3dpp.FullScreen_RefreshRateInHz	= D3DPRESENT_RATE_UNLIMITED;
+	} else {
+		ddlogWarnOption(glb.bMessageBoxWarnings); // OK to popup messages
+		d3dpp.Windowed						= TRUE;
+		d3dpp.BackBufferWidth				= ctx->dwWidth;
+		d3dpp.BackBufferHeight				= ctx->dwHeight;
+		d3dpp.hDeviceWindow					= ctx->hWnd;
+		d3dpp.FullScreen_RefreshRateInHz	= 0;
+		d3dpp.PresentationInterval			= D3DPRESENT_INTERVAL_DEFAULT;
+	}
+	hResult = IDirect3DDevice9_Reset(gld->pDev, &d3dpp);
+	if (FAILED(hResult)) {
+		ddlogError(DDLOG_CRITICAL_OR_WARN, "dglResize: Reset failed", hResult);
+		return FALSE;
+		//goto cleanup_and_return_with_error;
+	}
+
+	//
+	// Recreate POOL_DEFAULT objects
+	//
+	if (gld->PB2d.dwPool == D3DPOOL_DEFAULT) {
+		_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB2d);
+	}
+	if (gld->PB3d.dwPool == D3DPOOL_DEFAULT) {
+		_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB3d);
+	}
+//	if (gld->PBtwosidelight.dwPool == D3DPOOL_DEFAULT) {
+//		_gldCreatePrimitiveBuffer(ctx->glCtx, gld, &gld->PB2d);
+//	}
+
+	// Recreate DX9 Vertex Shaders
+//	_gldCreateVertexShaders(gld);
+
+	// Signal a complete state update
+	ctx->glCtx->Driver.UpdateState(ctx->glCtx, _NEW_ALL);
+
+	// Begin a new scene
+	IDirect3DDevice9_BeginScene(gld->pDev);
+	ctx->bSceneStarted = TRUE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyDrawable_DX(
+	DGL_ctx *ctx)
+{
+	GLD_driver_dx9			*lpCtx = NULL;
+
+	// Error if context is NULL.
+	if (!ctx)
+		return FALSE;
+
+	// Error if the drawable does not exist.
+	if (!ctx->glPriv)
+		return FALSE;
+
+	lpCtx = ctx->glPriv;
+
+#ifdef _DEBUG
+	// Dump out stats
+	ddlogPrintf(DDLOG_SYSTEM, "Usage: M:0x%X%X, D:0x%X%X",
+		lpCtx->PipelineUsage.qwMesa.HighPart,
+		lpCtx->PipelineUsage.qwMesa.LowPart,
+		lpCtx->PipelineUsage.qwD3DFVF.HighPart,
+		lpCtx->PipelineUsage.qwD3DFVF.LowPart);
+#endif
+
+//	_gldDestroyVertexShaders(lpCtx);
+	
+//	_gldDestroyPrimitiveBuffer(&lpCtx->PBtwosidelight);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB3d);
+	_gldDestroyPrimitiveBuffer(&lpCtx->PB2d);
+
+	SAFE_RELEASE(lpCtx->pDev);
+	SAFE_RELEASE(lpCtx->pD3D);
+
+	// Free the private drawable data
+	free(ctx->glPriv);
+	ctx->glPriv = NULL;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldCreatePrivateGlobals_DX(void)
+{
+	ZeroMemory(&dx9Globals, sizeof(dx9Globals));
+
+	// Load d3d9.dll
+	dx9Globals.hD3D9DLL = LoadLibrary("D3D9.DLL");
+	if (dx9Globals.hD3D9DLL == NULL)
+		return FALSE;
+
+	// Now try and obtain Direct3DCreate9
+	dx9Globals.fnDirect3DCreate9 = (FNDIRECT3DCREATE9)GetProcAddress(dx9Globals.hD3D9DLL, "Direct3DCreate9");
+	if (dx9Globals.fnDirect3DCreate9 == NULL) {
+		FreeLibrary(dx9Globals.hD3D9DLL);
+		return FALSE;
+	}
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyPrivateGlobals_DX(void)
+{
+	if (dx9Globals.bDirect3DDevice) {
+		SAFE_RELEASE(dx9Globals.pDev);
+		dx9Globals.bDirect3DDevice = FALSE;
+	}
+	if (dx9Globals.bDirect3D) {
+		SAFE_RELEASE(dx9Globals.pD3D);
+		dx9Globals.bDirect3D = FALSE;
+	}
+
+	FreeLibrary(dx9Globals.hD3D9DLL);
+	dx9Globals.hD3D9DLL = NULL;
+	dx9Globals.fnDirect3DCreate9 = NULL;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+static void _BitsFromDisplayFormat(
+	D3DFORMAT fmt,
+	BYTE *cColorBits,
+	BYTE *cRedBits,
+	BYTE *cGreenBits,
+	BYTE *cBlueBits,
+	BYTE *cAlphaBits)
+{
+	switch (fmt) {
+	case D3DFMT_X1R5G5B5:
+		*cColorBits = 16;
+		*cRedBits = 5;
+		*cGreenBits = 5;
+		*cBlueBits = 5;
+		*cAlphaBits = 0;
+		return;
+	case D3DFMT_R5G6B5:
+		*cColorBits = 16;
+		*cRedBits = 5;
+		*cGreenBits = 6;
+		*cBlueBits = 5;
+		*cAlphaBits = 0;
+		return;
+	case D3DFMT_X8R8G8B8:
+		*cColorBits = 32;
+		*cRedBits = 8;
+		*cGreenBits = 8;
+		*cBlueBits = 8;
+		*cAlphaBits = 0;
+		return;
+	case D3DFMT_A8R8G8B8:
+		*cColorBits = 32;
+		*cRedBits = 8;
+		*cGreenBits = 8;
+		*cBlueBits = 8;
+		*cAlphaBits = 8;
+		return;
+	}
+
+	// Should not get here!
+	*cColorBits = 32;
+	*cRedBits = 8;
+	*cGreenBits = 8;
+	*cBlueBits = 8;
+	*cAlphaBits = 0;
+}
+
+//---------------------------------------------------------------------------
+
+static void _BitsFromDepthStencilFormat(
+	D3DFORMAT fmt,
+	BYTE *cDepthBits,
+	BYTE *cStencilBits)
+{
+	// NOTE: GL expects either 32 or 16 as depth bits.
+	switch (fmt) {
+	case D3DFMT_D32:
+		*cDepthBits = 32;
+		*cStencilBits = 0;
+		return;
+	case D3DFMT_D15S1:
+		*cDepthBits = 16;
+		*cStencilBits = 1;
+		return;
+	case D3DFMT_D24S8:
+		*cDepthBits = 32;
+		*cStencilBits = 8;
+		return;
+	case D3DFMT_D16:
+		*cDepthBits = 16;
+		*cStencilBits = 0;
+		return;
+	case D3DFMT_D24X8:
+		*cDepthBits = 32;
+		*cStencilBits = 0;
+		return;
+	case D3DFMT_D24X4S4:
+		*cDepthBits = 32;
+		*cStencilBits = 4;
+		return;
+	}
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldBuildPixelformatList_DX(void)
+{
+	D3DDISPLAYMODE		d3ddm;
+	D3DFORMAT			fmt[6];
+	IDirect3D9			*pD3D = NULL;
+	HRESULT				hr;
+	int					nSupportedFormats = 0;
+	int					i;
+	DGL_pixelFormat		*pPF;
+	BYTE				cColorBits, cRedBits, cGreenBits, cBlueBits, cAlphaBits;
+//	char				buf[128];
+//	char				cat[8];
+
+	// Direct3D (SW or HW)
+	// These are arranged so that 'best' pixelformat
+	// is higher in the list (for ChoosePixelFormat).
+	const D3DFORMAT DepthStencil[6] = {
+// New order: increaing Z, then increasing stencil
+		D3DFMT_D15S1,
+		D3DFMT_D16,
+		D3DFMT_D24X4S4,
+		D3DFMT_D24X8,
+		D3DFMT_D24S8,
+		D3DFMT_D32,
+	};
+
+	// Dump DX version
+	ddlogMessage(GLDLOG_SYSTEM, "DirectX Version  : 9.0\n");
+
+	// Release any existing pixelformat list
+	if (glb.lpPF) {
+		free(glb.lpPF);
+	}
+
+	glb.nPixelFormatCount	= 0;
+	glb.lpPF				= NULL;
+
+	//
+	// Pixelformats for Direct3D (SW or HW) rendering
+	//
+
+	// Get a Direct3D 9.0 interface
+	pD3D = dx9Globals.fnDirect3DCreate9(D3D_SDK_VERSION);
+	if (!pD3D) {
+		return FALSE;
+	}
+
+	// We will use the display mode format when finding compliant
+	// rendertarget/depth-stencil surfaces.
+	hr = IDirect3D9_GetAdapterDisplayMode(pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hr)) {
+		IDirect3D9_Release(pD3D);
+		return FALSE;
+	}
+	
+	// Run through the possible formats and detect supported formats
+	for (i=0; i<6; i++) {
+		hr = IDirect3D9_CheckDeviceFormat(
+			pD3D,
+			glb.dwAdapter,
+			glb.dwDriver==GLDS_DRIVER_HAL ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF,
+            d3ddm.Format,
+			D3DUSAGE_DEPTHSTENCIL,
+			D3DRTYPE_SURFACE,
+			DepthStencil[i]);
+		if (FAILED(hr))
+			// A failure here is not fatal.
+			continue;
+
+	    // Verify that the depth format is compatible.
+	    hr = IDirect3D9_CheckDepthStencilMatch(
+				pD3D,
+				glb.dwAdapter,
+                glb.dwDriver==GLDS_DRIVER_HAL ? D3DDEVTYPE_HAL : D3DDEVTYPE_REF,
+                d3ddm.Format,
+                d3ddm.Format,
+                DepthStencil[i]);
+		if (FAILED(hr))
+			// A failure here is not fatal, just means depth-stencil
+			// format is not compatible with this display mode.
+			continue;
+
+		fmt[nSupportedFormats++] = DepthStencil[i];
+	}
+
+	IDirect3D9_Release(pD3D);
+
+	if (nSupportedFormats == 0)
+		return FALSE; // Bail: no compliant pixelformats
+
+	// Total count of pixelformats is:
+	// (nSupportedFormats+1)*2
+	// UPDATED: nSupportedFormats*2
+	glb.lpPF = (DGL_pixelFormat *)calloc(nSupportedFormats*2, sizeof(DGL_pixelFormat));
+	glb.nPixelFormatCount = nSupportedFormats*2;
+	if (glb.lpPF == NULL) {
+		glb.nPixelFormatCount = 0;
+		return FALSE;
+	}
+
+	// Get a copy of pointer that we can alter
+	pPF = glb.lpPF;
+
+	// Cache colour bits from display format
+	_BitsFromDisplayFormat(d3ddm.Format, &cColorBits, &cRedBits, &cGreenBits, &cBlueBits, &cAlphaBits);
+
+	//
+	// Add single-buffer formats
+	//
+/*
+	// NOTE: No longer returning pixelformats that don't contain depth
+	// Single-buffer, no depth-stencil buffer
+	memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+	pPF->pfd.dwFlags &= ~PFD_DOUBLEBUFFER; // Remove doublebuffer flag
+	pPF->pfd.cColorBits		= cColorBits;
+	pPF->pfd.cRedBits		= cRedBits;
+	pPF->pfd.cGreenBits		= cGreenBits;
+	pPF->pfd.cBlueBits		= cBlueBits;
+	pPF->pfd.cAlphaBits		= cAlphaBits;
+	pPF->pfd.cDepthBits		= 0;
+	pPF->pfd.cStencilBits	= 0;
+	pPF->dwDriverData		= D3DFMT_UNKNOWN;
+	pPF++;
+*/
+	for (i=0; i<nSupportedFormats; i++, pPF++) {
+		memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		pPF->pfd.dwFlags &= ~PFD_DOUBLEBUFFER; // Remove doublebuffer flag
+		pPF->pfd.cColorBits		= cColorBits;
+		pPF->pfd.cRedBits		= cRedBits;
+		pPF->pfd.cGreenBits		= cGreenBits;
+		pPF->pfd.cBlueBits		= cBlueBits;
+		pPF->pfd.cAlphaBits		= cAlphaBits;
+		_BitsFromDepthStencilFormat(fmt[i], &pPF->pfd.cDepthBits, &pPF->pfd.cStencilBits);
+		pPF->dwDriverData		= fmt[i];
+	}
+
+	//
+	// Add double-buffer formats
+	//
+
+	// NOTE: No longer returning pixelformats that don't contain depth
+/*
+	memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+	pPF->pfd.cColorBits		= cColorBits;
+	pPF->pfd.cRedBits		= cRedBits;
+	pPF->pfd.cGreenBits		= cGreenBits;
+	pPF->pfd.cBlueBits		= cBlueBits;
+	pPF->pfd.cAlphaBits		= cAlphaBits;
+	pPF->pfd.cDepthBits		= 0;
+	pPF->pfd.cStencilBits	= 0;
+	pPF->dwDriverData		= D3DFMT_UNKNOWN;
+	pPF++;
+*/
+	for (i=0; i<nSupportedFormats; i++, pPF++) {
+		memcpy(pPF, &pfTemplateHW, sizeof(DGL_pixelFormat));
+		pPF->pfd.cColorBits		= cColorBits;
+		pPF->pfd.cRedBits		= cRedBits;
+		pPF->pfd.cGreenBits		= cGreenBits;
+		pPF->pfd.cBlueBits		= cBlueBits;
+		pPF->pfd.cAlphaBits		= cAlphaBits;
+		_BitsFromDepthStencilFormat(fmt[i], &pPF->pfd.cDepthBits, &pPF->pfd.cStencilBits);
+		pPF->dwDriverData		= fmt[i];
+	}
+
+	// Popup warning message if non RGB color mode
+	{
+		// This is a hack. KeithH
+		HDC hdcDesktop = GetDC(NULL);
+		DWORD dwDisplayBitDepth = GetDeviceCaps(hdcDesktop, BITSPIXEL);
+		ReleaseDC(0, hdcDesktop);
+		if (dwDisplayBitDepth <= 8) {
+			ddlogPrintf(DDLOG_WARN, "Current Color Depth %d bpp is not supported", dwDisplayBitDepth);
+			MessageBox(NULL, szColorDepthWarning, "GLDirect", MB_OK | MB_ICONWARNING);
+		}
+	}
+
+	// Mark list as 'current'
+	glb.bPixelformatsDirty = FALSE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldInitialiseMesa_DX(
+	DGL_ctx *lpCtx)
+{
+	GLD_driver_dx9	*gld = NULL;
+	int				MaxTextureSize, TextureLevels;
+	BOOL			bSoftwareTnL;
+
+	if (lpCtx == NULL)
+		return FALSE;
+
+	gld = lpCtx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (glb.bMultitexture) {
+		lpCtx->glCtx->Const.MaxTextureUnits = gld->d3dCaps9.MaxSimultaneousTextures;
+		// Only support MAX_TEXTURE_UNITS texture units.
+		// ** If this is altered then the FVF formats must be reviewed **.
+		if (lpCtx->glCtx->Const.MaxTextureUnits > GLD_MAX_TEXTURE_UNITS_DX9)
+			lpCtx->glCtx->Const.MaxTextureUnits = GLD_MAX_TEXTURE_UNITS_DX9;
+	} else {
+		// Multitexture override
+		lpCtx->glCtx->Const.MaxTextureUnits = 1;
+	}
+
+	// max texture size
+	MaxTextureSize = min(gld->d3dCaps9.MaxTextureHeight, gld->d3dCaps9.MaxTextureWidth);
+	if (MaxTextureSize == 0)
+		MaxTextureSize = 256; // Sanity check
+
+	//
+	// HACK!!
+	if (MaxTextureSize > 1024)
+		MaxTextureSize = 1024; // HACK - CLAMP TO 1024
+	// HACK!!
+	//
+
+	// Got to set MAX_TEXTURE_SIZE as max levels.
+	// Who thought this stupid idea up? ;)
+	TextureLevels = 0;
+	// Calculate power-of-two.
+	while (MaxTextureSize) {
+		TextureLevels++;
+		MaxTextureSize >>= 1;
+	}
+	lpCtx->glCtx->Const.MaxTextureLevels = (TextureLevels) ? TextureLevels : 8;
+	lpCtx->glCtx->Const.MaxDrawBuffers = 1;
+
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_LIGHTING, FALSE);
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_CULLMODE, D3DCULL_NONE);
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_DITHERENABLE, TRUE);
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_SHADEMODE, D3DSHADE_GOURAUD);
+
+	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_ZENABLE,
+		(lpCtx->lpPF->dwDriverData!=D3DFMT_UNKNOWN) ? D3DZB_TRUE : D3DZB_FALSE);
+
+	// Set the view matrix
+	{
+		D3DXMATRIX	vm;
+#if 1
+		D3DXMatrixIdentity(&vm);
+#else
+		D3DXVECTOR3 Eye(0.0f, 0.0f, 0.0f);
+		D3DXVECTOR3 At(0.0f, 0.0f, -1.0f);
+		D3DXVECTOR3 Up(0.0f, 1.0f, 0.0f);
+		D3DXMatrixLookAtRH(&vm, &Eye, &At, &Up);
+		vm._31 = -vm._31;
+		vm._32 = -vm._32;
+		vm._33 = -vm._33;
+		vm._34 = -vm._34;
+#endif
+		IDirect3DDevice9_SetTransform(gld->pDev, D3DTS_VIEW, &vm);
+	}
+
+	if (gld->bHasHWTnL) {
+		if (glb.dwTnL == GLDS_TNL_DEFAULT)
+			bSoftwareTnL = FALSE; // HW TnL
+		else {
+			bSoftwareTnL = ((glb.dwTnL == GLDS_TNL_MESA) || (glb.dwTnL == GLDS_TNL_D3DSW)) ? TRUE : FALSE;
+		}
+	} else {
+		// No HW TnL, so no choice possible
+		bSoftwareTnL = TRUE;
+	}
+//	IDirect3DDevice9_SetRenderState(gld->pDev, D3DRS_SOFTWAREVERTEXPROCESSING, bSoftwareTnL);
+	IDirect3DDevice9_SetSoftwareVertexProcessing(gld->pDev, bSoftwareTnL);
+
+// Dump this in a Release build as well, now.
+//#ifdef _DEBUG
+	ddlogPrintf(DDLOG_INFO, "HW TnL: %s",
+		gld->bHasHWTnL ? (bSoftwareTnL ? "Disabled" : "Enabled") : "Unavailable");
+//#endif
+
+	gldEnableExtensions_DX9(lpCtx->glCtx);
+	gldInstallPipeline_DX9(lpCtx->glCtx);
+	gldSetupDriverPointers_DX9(lpCtx->glCtx);
+
+	// Signal a complete state update
+	lpCtx->glCtx->Driver.UpdateState(lpCtx->glCtx, _NEW_ALL);
+
+	// Start a scene
+	IDirect3DDevice9_BeginScene(gld->pDev);
+	lpCtx->bSceneStarted = TRUE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldSwapBuffers_DX(
+	DGL_ctx *ctx,
+	HDC hDC,
+	HWND hWnd)
+{
+	HRESULT			hr;
+	GLD_driver_dx9	*gld = NULL;
+
+	if (ctx == NULL)
+		return FALSE;
+
+	gld = ctx->glPriv;
+	if (gld == NULL)
+		return FALSE;
+
+	if (ctx->bSceneStarted) {
+		IDirect3DDevice9_EndScene(gld->pDev);
+		ctx->bSceneStarted = FALSE;
+	}
+
+	// Swap the buffers. hWnd may override the hWnd used for CreateDevice()
+	hr = IDirect3DDevice9_Present(gld->pDev, NULL, NULL, hWnd, NULL);
+
+exit_swap:
+
+	IDirect3DDevice9_BeginScene(gld->pDev);
+	ctx->bSceneStarted = TRUE;
+
+// Debugging code
+#ifdef _DEBUG
+//	ddlogMessage(GLDLOG_WARN, "SwapBuffers\n");
+#endif
+
+	return (FAILED(hr)) ? FALSE : TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldGetDisplayMode_DX(
+	DGL_ctx *ctx,
+	GLD_displayMode *glddm)
+{
+	D3DDISPLAYMODE	d3ddm;
+	HRESULT			hr;
+	GLD_driver_dx9	*lpCtx = NULL;
+	BYTE cColorBits, cRedBits, cGreenBits, cBlueBits, cAlphaBits;
+
+	if ((glddm == NULL) || (ctx == NULL))
+		return FALSE;
+
+	lpCtx = ctx->glPriv;
+	if (lpCtx == NULL)
+		return FALSE;
+
+	if (lpCtx->pD3D == NULL)
+		return FALSE;
+
+	hr = IDirect3D9_GetAdapterDisplayMode(lpCtx->pD3D, glb.dwAdapter, &d3ddm);
+	if (FAILED(hr))
+		return FALSE;
+
+	// Get info from the display format
+	_BitsFromDisplayFormat(d3ddm.Format,
+		&cColorBits, &cRedBits, &cGreenBits, &cBlueBits, &cAlphaBits);
+
+	glddm->Width	= d3ddm.Width;
+	glddm->Height	= d3ddm.Height;
+	glddm->BPP		= cColorBits;
+	glddm->Refresh	= d3ddm.RefreshRate;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
diff --git a/src/mesa/drivers/windows/gldirect/gld_debug_clip.c b/src/mesa/drivers/windows/gldirect/gld_debug_clip.c
new file mode 100644
index 0000000000..044d2e66f4
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gld_debug_clip.c
@@ -0,0 +1,39 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifdef DEBUG  /* This code only used for debugging */
+
+// Stub to enable Mesa to build. KeithH
+#pragma message("NOTE: Using gld_debug_clip.c HACK")
+
+void _math_test_all_cliptest_functions( char *description )
+{
+}
+
+
+#endif /* DEBUG */
diff --git a/src/mesa/drivers/windows/gldirect/gld_debug_norm.c b/src/mesa/drivers/windows/gldirect/gld_debug_norm.c
new file mode 100644
index 0000000000..c20362bb24
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gld_debug_norm.c
@@ -0,0 +1,39 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ */
+
+#ifdef DEBUG  /* This code only used for debugging */
+
+// Stub to enable Mesa to build. KeithH
+#pragma message("NOTE: Using gld_debug_norm.c HACK")
+
+void _math_test_all_normal_transform_functions( char *description )
+{
+}
+
+
+#endif /* DEBUG */
diff --git a/src/mesa/drivers/windows/gldirect/gld_debug_xform.c b/src/mesa/drivers/windows/gldirect/gld_debug_xform.c
new file mode 100644
index 0000000000..73439dc3b6
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gld_debug_xform.c
@@ -0,0 +1,41 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Updated for P6 architecture by Gareth Hughes.
+ */
+
+
+#ifdef DEBUG  /* This code only used for debugging */
+
+// Stub to enable Mesa to build. KeithH
+#pragma message("NOTE: Using gld_debug_xform.c HACK")
+
+void _math_test_all_transform_functions( char *description )
+{
+}
+
+
+#endif /* DEBUG */
diff --git a/src/mesa/drivers/windows/gldirect/gld_dispatch.c b/src/mesa/drivers/windows/gldirect/gld_dispatch.c
new file mode 100644
index 0000000000..e05d767e30
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gld_dispatch.c
@@ -0,0 +1,73 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Thread-aware dispatch table.
+*
+****************************************************************************/
+
+#include "glheader.h"
+#include "glapi.h"
+#include "glapitable.h"
+#include "mtypes.h"
+#include "context.h"
+
+#define KEYWORD1
+#define KEYWORD2 GLAPIENTRY
+#if defined(USE_MGL_NAMESPACE)
+	#define NAME(func)  mgl##func
+#else
+	#define NAME(func)  gl##func
+#endif
+
+#if 0
+// Altered these to get the dispatch table from 
+// the current context of the calling thread.
+#define DISPATCH(FUNC, ARGS, MESSAGE)	\
+	GET_CURRENT_CONTEXT(gc);			\
+	(gc->CurrentDispatch->FUNC) ARGS
+#define RETURN_DISPATCH(FUNC, ARGS, MESSAGE) 	\
+	GET_CURRENT_CONTEXT(gc);			\
+	return (gc->CurrentDispatch->FUNC) ARGS
+#else // #if 0
+#define DISPATCH(FUNC, ARGS, MESSAGE)	\
+	GET_CURRENT_CONTEXT(gc);			\
+	(_glapi_Dispatch->FUNC) ARGS
+#define RETURN_DISPATCH(FUNC, ARGS, MESSAGE) 	\
+	GET_CURRENT_CONTEXT(gc);			\
+	return (_glapi_Dispatch->FUNC) ARGS
+#endif // #if 0
+
+#ifndef GLAPIENTRY
+#define GLAPIENTRY
+#endif
+
+#include "glapitemp.h"
diff --git a/src/mesa/drivers/windows/gldirect/gld_driver.c b/src/mesa/drivers/windows/gldirect/gld_driver.c
new file mode 100644
index 0000000000..f7c575614b
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gld_driver.c
@@ -0,0 +1,279 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Driver functions and interfaces
+*
+****************************************************************************/
+
+#define STRICT
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#include "gld_driver.h"
+#include "ddlog.h"
+#include "glheader.h"
+
+// For glGetString().
+#include "common_x86_asm.h"
+
+//---------------------------------------------------------------------------
+
+static char *szDriverError = "Driver used before initialisation!";
+
+// This holds our dynamically created OpenGL renderer string.
+// 256 chars should be plenty - remember that some apps display this.
+static char _gldRendererString[256];
+
+static char *szVendor = "SciTech Software, Inc.";
+
+//---------------------------------------------------------------------------
+
+extern BOOL gldGetDXErrorString_DX(HRESULT hr, char *buf, int nBufSize);
+
+extern BOOL gldCreateDrawable_MesaSW(DGL_ctx *ctx, BOOL bPersistantInterface, BOOL bPersistantBuffers);
+extern BOOL gldResizeDrawable_MesaSW(DGL_ctx *ctx, BOOL bDefaultDriver, BOOL bPersistantInterface, BOOL bPersistantBuffers);
+extern BOOL gldDestroyDrawable_MesaSW(DGL_ctx *ctx);
+extern BOOL gldCreatePrivateGlobals_MesaSW(void);
+extern BOOL gldDestroyPrivateGlobals_MesaSW(void);
+extern BOOL	gldBuildPixelformatList_MesaSW(void);
+extern BOOL gldInitialiseMesa_MesaSW(DGL_ctx *ctx);
+extern BOOL	gldSwapBuffers_MesaSW(DGL_ctx *ctx, HDC hDC, HWND hWnd);
+extern PROC	gldGetProcAddress_MesaSW(LPCSTR a);
+extern BOOL	gldGetDisplayMode_MesaSW(DGL_ctx *ctx, GLD_displayMode *glddm);
+
+extern BOOL gldCreateDrawable_DX(DGL_ctx *ctx, BOOL bPersistantInterface, BOOL bPersistantBuffers);
+extern BOOL gldResizeDrawable_DX(DGL_ctx *ctx, BOOL bDefaultDriver, BOOL bPersistantInterface, BOOL bPersistantBuffers);
+extern BOOL gldDestroyDrawable_DX(DGL_ctx *ctx);
+extern BOOL gldCreatePrivateGlobals_DX(void);
+extern BOOL gldDestroyPrivateGlobals_DX(void);
+extern BOOL	gldBuildPixelformatList_DX(void);
+extern BOOL gldInitialiseMesa_DX(DGL_ctx *ctx);
+extern BOOL	gldSwapBuffers_DX(DGL_ctx *ctx, HDC hDC, HWND hWnd);
+extern PROC	gldGetProcAddress_DX(LPCSTR a);
+extern BOOL	gldGetDisplayMode_DX(DGL_ctx *ctx, GLD_displayMode *glddm);
+
+//---------------------------------------------------------------------------
+// NOP functions. Called if proper driver functions are not set.
+//---------------------------------------------------------------------------
+
+static BOOL _gldDriverError(void)
+{
+	ddlogMessage(DDLOG_CRITICAL, szDriverError);
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL _GetDXErrorString_ERROR(
+	HRESULT hr,
+	char *buf,
+	int nBufSize)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL _CreateDrawable_ERROR(
+	DGL_ctx *ctx,
+	BOOL bPersistantInterface,
+	BOOL bPersistantBuffers)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL _ResizeDrawable_ERROR(
+	DGL_ctx *ctx,
+	BOOL bDefaultDriver,
+	BOOL bPersistantInterface,
+	BOOL bPersistantBuffers)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL _DestroyDrawable_ERROR(
+	DGL_ctx *ctx)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL _CreatePrivateGlobals_ERROR(void)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL _DestroyPrivateGlobals_ERROR(void)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL _BuildPixelformatList_ERROR(void)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+
+static BOOL _InitialiseMesa_ERROR(
+	DGL_ctx *ctx)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL	_SwapBuffers_ERROR(
+	DGL_ctx *ctx,
+	HDC hDC,
+	HWND hWnd)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+
+static PROC _GetProcAddress_ERROR(
+	LPCSTR a)
+{
+	_gldDriverError();
+	return NULL;
+}
+
+//---------------------------------------------------------------------------
+
+static BOOL	_GetDisplayMode_ERROR(
+	DGL_ctx *ctx,
+	GLD_displayMode *glddm)
+{
+	return _gldDriverError();
+}
+
+//---------------------------------------------------------------------------
+// Functions useful to all drivers
+//---------------------------------------------------------------------------
+
+const GLubyte* _gldGetStringGeneric(
+	GLcontext *ctx,
+	GLenum name)
+{
+	if (!ctx)
+		return NULL;
+
+	switch (name) {
+	case GL_RENDERER:
+		sprintf(_gldRendererString, "GLDirect 4.0 %s%s%s%s (%s %s)",
+			_mesa_x86_cpu_features	? "/x86"		: "",
+			cpu_has_mmx				? "/MMX"		: "",
+			cpu_has_3dnow			? "/3DNow!"		: "",
+			cpu_has_xmm				? "/SSE"		: "",
+			__DATE__, __TIME__);
+		return (const GLubyte *) _gldRendererString;
+	case GL_VENDOR:
+		return (const GLubyte *) szVendor;
+	default:
+		return NULL;
+	}
+}
+
+//---------------------------------------------------------------------------
+// Global driver function pointers, initially set to functions that
+// will report an error when called.
+//---------------------------------------------------------------------------
+
+GLD_driver _gldDriver = {
+	_GetDXErrorString_ERROR,
+	_CreateDrawable_ERROR,
+	_ResizeDrawable_ERROR,
+	_DestroyDrawable_ERROR,
+	_CreatePrivateGlobals_ERROR,
+	_DestroyPrivateGlobals_ERROR,
+	_BuildPixelformatList_ERROR,
+	_InitialiseMesa_ERROR,
+	_SwapBuffers_ERROR,
+	_GetProcAddress_ERROR,
+	_GetDisplayMode_ERROR
+};
+
+//---------------------------------------------------------------------------
+// Init function. Should be called as soon as regkeys/ini-settings are read.
+//---------------------------------------------------------------------------
+
+BOOL gldInitDriverPointers(
+	DWORD dwDriver)
+{
+	_gldDriver.GetDXErrorString	= gldGetDXErrorString_DX;
+
+	if (dwDriver == GLDS_DRIVER_MESA_SW) {
+		// Mesa Software driver
+		_gldDriver.CreateDrawable			= gldCreateDrawable_MesaSW;
+		_gldDriver.ResizeDrawable			= gldResizeDrawable_MesaSW;
+		_gldDriver.DestroyDrawable			= gldDestroyDrawable_MesaSW;
+		_gldDriver.CreatePrivateGlobals		= gldCreatePrivateGlobals_MesaSW;
+		_gldDriver.DestroyPrivateGlobals	= gldDestroyPrivateGlobals_MesaSW;
+		_gldDriver.BuildPixelformatList		= gldBuildPixelformatList_MesaSW;
+		_gldDriver.InitialiseMesa			= gldInitialiseMesa_MesaSW;
+		_gldDriver.SwapBuffers				= gldSwapBuffers_MesaSW;
+		_gldDriver.wglGetProcAddress		= gldGetProcAddress_MesaSW;
+		_gldDriver.GetDisplayMode			= gldGetDisplayMode_MesaSW;
+		return TRUE;
+	}
+	
+	if ((dwDriver == GLDS_DRIVER_REF) || (dwDriver == GLDS_DRIVER_HAL)) {
+		// Direct3D driver, either HW or SW
+		_gldDriver.CreateDrawable			= gldCreateDrawable_DX;
+		_gldDriver.ResizeDrawable			= gldResizeDrawable_DX;
+		_gldDriver.DestroyDrawable			= gldDestroyDrawable_DX;
+		_gldDriver.CreatePrivateGlobals		= gldCreatePrivateGlobals_DX;
+		_gldDriver.DestroyPrivateGlobals	= gldDestroyPrivateGlobals_DX;
+		_gldDriver.BuildPixelformatList		= gldBuildPixelformatList_DX;
+		_gldDriver.InitialiseMesa			= gldInitialiseMesa_DX;
+		_gldDriver.SwapBuffers				= gldSwapBuffers_DX;
+		_gldDriver.wglGetProcAddress		= gldGetProcAddress_DX;
+		_gldDriver.GetDisplayMode			= gldGetDisplayMode_DX;
+		return TRUE;
+	};
+
+	return FALSE;
+}
+
+//---------------------------------------------------------------------------
diff --git a/src/mesa/drivers/windows/gldirect/gld_driver.h b/src/mesa/drivers/windows/gldirect/gld_driver.h
new file mode 100644
index 0000000000..01a46a8325
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gld_driver.h
@@ -0,0 +1,90 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Driver functions and interfaces
+*
+****************************************************************************/
+
+#ifndef _GLD_DRIVER_H
+#define _GLD_DRIVER_H
+
+// This file is only useful is we're using the new GLD3 WGL code.
+#ifdef _USE_GLD3_WGL
+
+#include "dglcontext.h"
+
+// Same as DX8 D3DDISPLAYMODE
+typedef struct {
+	DWORD	Width;
+	DWORD	Height;
+	DWORD	Refresh;
+	DWORD	BPP;
+} GLD_displayMode;
+
+typedef struct {
+	// Returns a string for a given HRESULT error code.
+	BOOL	(*GetDXErrorString)(HRESULT hr, char *buf, int nBufSize);
+
+	// Driver functions for managing drawables.
+	// Functions must respect persistant buffers / persistant interface.
+	// NOTE: Persistant interface is: DirectDraw, pre-DX8; Direct3D, DX8 and above.
+	BOOL	(*CreateDrawable)(DGL_ctx *ctx, BOOL bPersistantInterface, BOOL bPersistantBuffers);
+	BOOL	(*ResizeDrawable)(DGL_ctx *ctx, BOOL bDefaultDriver, BOOL bPersistantInterface, BOOL bPersistantBuffers);
+	BOOL	(*DestroyDrawable)(DGL_ctx *ctx);
+
+	// Create/Destroy private globals belonging to driver
+	BOOL	(*CreatePrivateGlobals)(void);
+	BOOL	(*DestroyPrivateGlobals)(void);
+
+	// Build pixelformat list
+	BOOL	(*BuildPixelformatList)(void);
+
+	// Initialise Mesa's driver pointers
+	BOOL	(*InitialiseMesa)(DGL_ctx *ctx);
+
+	// Swap buffers
+	BOOL	(*SwapBuffers)(DGL_ctx *ctx, HDC hDC, HWND hWnd);
+
+	// wglGetProcAddress()
+	PROC	(*wglGetProcAddress)(LPCSTR a);
+
+	BOOL	(*GetDisplayMode)(DGL_ctx *ctx, GLD_displayMode *glddm);
+} GLD_driver;
+
+extern GLD_driver _gldDriver;
+
+BOOL gldInitDriverPointers(DWORD dwDriver);
+const GLubyte* _gldGetStringGeneric(GLcontext *ctx, GLenum name);
+
+#endif // _USE_GLD3_WGL
+
+#endif // _GLD_DRIVER_H
diff --git a/src/mesa/drivers/windows/gldirect/gldirect.rc b/src/mesa/drivers/windows/gldirect/gldirect.rc
new file mode 100644
index 0000000000..ba09631538
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gldirect.rc
@@ -0,0 +1,43 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:		Windows Resource Compiler
+* Environment:	Windows 95
+*
+****************************************************************************/
+
+#ifndef WORKSHOP_INVOKED
+  #include <windows.h>
+#endif
+
+#define FILE_DESCRIPTION    "SciTech GLDirect"
+#define ORIG_FILENAME       "opengl32.dll"
+#define FILE_TYPE			VFT_DLL
+
+#include "gldirect/gldver.ver"
diff --git a/src/mesa/drivers/windows/gldirect/gldlame8.c b/src/mesa/drivers/windows/gldirect/gldlame8.c
new file mode 100644
index 0000000000..5ac519c174
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/gldlame8.c
@@ -0,0 +1,181 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  GLDirect utility for determining lame boards/drivers.
+*
+****************************************************************************/
+
+#define STRICT
+#define	WIN32_LEAN_AND_MEAN
+#include <d3d8.h>
+
+/*
+Ack. Broken out from gldlame.c because of broken D3D headers. KeithH
+*/
+
+/****************************************************************************
+REMARKS:
+Scans list of DirectDraw devices for specific device IDs.
+****************************************************************************/
+
+#define VENDORID_ATI 0x1002
+
+static DWORD devATIRagePro[] = {
+	0x4742, // 3D RAGE PRO BGA AGP 1X/2X
+	0x4744, // 3D RAGE PRO BGA AGP 1X only
+	0x4749, // 3D RAGE PRO BGA PCI 33 MHz
+	0x4750, // 3D RAGE PRO PQFP PCI 33 MHz
+	0x4751, // 3D RAGE PRO PQFP PCI 33 MHz limited 3D
+	0x4C42, // 3D RAGE LT PRO BGA-312 AGP 133 MHz
+	0x4C44, // 3D RAGE LT PRO BGA-312 AGP 66 MHz
+	0x4C49, // 3D RAGE LT PRO BGA-312 PCI 33 MHz
+	0x4C50, // 3D RAGE LT PRO BGA-256 PCI 33 MHz
+	0x4C51, // 3D RAGE LT PRO BGA-256 PCI 33 MHz limited 3D
+};
+
+static DWORD devATIRageIIplus[] = {
+	0x4755, // 3D RAGE II+
+	0x4756, // 3D RAGE IIC PQFP PCI
+	0x4757, // 3D RAGE IIC BGA AGP
+	0x475A, // 3D RAGE IIC PQFP AGP
+	0x4C47, // 3D RAGE LT-G
+};
+
+static __inline BOOL IsDevice(
+	DWORD *lpDeviceIdList,
+	DWORD dwDeviceId,
+	int count)
+{
+	int i;
+
+	for (i=0; i<count; i++)
+		if (dwDeviceId == lpDeviceIdList[i])
+			return TRUE;
+
+	return FALSE;
+}
+
+/****************************************************************************
+REMARKS:
+Test the Direct3D8 device for "lameness" with respect to GLDirect.
+This is done on per-chipset basis, as in GLD CAD driver (DGLCONTEXT.C).
+If bTestForWHQL is set then the device is tested to see if it is
+certified, and bIsWHQL is set to indicate TRUE or FALSE. Otherwise bIsWHQL
+is not set. [WHQL = Windows Hardware Quality Labs]
+
+NOTE: There is a one- or two-second time penalty incurred in determining
+      the WHQL certification date.
+****************************************************************************/
+BOOL IsThisD3D8Lame(
+	IDirect3D8 *pD3D,
+	DWORD dwAdapter,
+	BOOL bTestForWHQL,
+	BOOL *bIsWHQL)
+{
+	DWORD					dwFlags = bTestForWHQL ? 0 : D3DENUM_NO_WHQL_LEVEL;
+	D3DADAPTER_IDENTIFIER8	d3dai;
+	HRESULT					hr;
+
+	hr = IDirect3D8_GetAdapterIdentifier(pD3D, dwAdapter, dwFlags, &d3dai);
+	if (FAILED(hr))
+		return TRUE; // Definitely lame if adapter details can't be obtained!
+
+	if (bTestForWHQL) {
+		*bIsWHQL = d3dai.WHQLLevel ? TRUE : FALSE;
+	}
+
+	// Vendor 1: ATI
+	if (d3dai.VendorId == VENDORID_ATI) {
+		// Test A: ATI Rage PRO
+		if (IsDevice(devATIRagePro, d3dai.DeviceId, sizeof(devATIRagePro)))
+			return TRUE;	// bad mipmapping
+		// Test B: ATI Rage II+
+		if (IsDevice(devATIRageIIplus, d3dai.DeviceId, sizeof(devATIRageIIplus)))
+			return TRUE; 	// bad HW alpha testing
+	}
+
+	return FALSE;
+}
+
+/****************************************************************************
+REMARKS:
+Test the Direct3DDevice8 device for "lameness" with respect to GLDirect.
+This is done by querying for particular caps, as in GLD CPL (CPLMAIN.CPP).
+****************************************************************************/
+BOOL IsThisD3D8DeviceLame(
+	IDirect3DDevice8 *pDev)
+{
+	D3DCAPS8	d3dCaps;
+	HRESULT		hr;
+
+	hr = IDirect3DDevice8_GetDeviceCaps(pDev, &d3dCaps);
+	if (FAILED(hr))
+		return TRUE;
+
+	// Test 1: Perspective-correct textures
+	// Any card that cannot do perspective-textures is *exceptionally* lame.
+	if (!(d3dCaps.TextureCaps & D3DPTEXTURECAPS_PERSPECTIVE)) {
+		return TRUE; // Lame!
+	}
+
+	// Test 2: Bilinear filtering
+	if (!(d3dCaps.TextureFilterCaps & D3DPTFILTERCAPS_MINFLINEAR)) {
+		return TRUE; // Lame!
+	}
+
+	// Test 3: Mipmapping
+	if (!(d3dCaps.TextureCaps & D3DPTEXTURECAPS_MIPMAP)) {
+		return TRUE; // Lame!
+	}
+
+	// Test 4: Depth-test modes (?)
+
+	// Test 5: Blend Modes -- Based on DX7 D3DIM MTEXTURE.CPP caps test
+
+	// Accept devices that can do multipass, alpha blending
+	if( !((d3dCaps.DestBlendCaps & D3DPBLENDCAPS_INVSRCALPHA) &&
+			(d3dCaps.SrcBlendCaps & D3DPBLENDCAPS_SRCALPHA)) )
+		return TRUE;
+
+	// Accept devices that can do multipass, color blending
+	if( !((d3dCaps.DestBlendCaps & D3DPBLENDCAPS_SRCCOLOR) &&
+			(d3dCaps.SrcBlendCaps & D3DPBLENDCAPS_ZERO)) )
+		return TRUE;
+
+	// Accept devices that really support multiple textures.
+	if( !((d3dCaps.MaxTextureBlendStages > 1 ) &&
+			(d3dCaps.MaxSimultaneousTextures > 1 ) &&
+			(d3dCaps.TextureOpCaps & D3DTEXOPCAPS_MODULATE )) )
+		return TRUE;
+
+	return FALSE; // Not lame
+}
diff --git a/src/mesa/drivers/windows/gldirect/mesasw/colors.h b/src/mesa/drivers/windows/gldirect/mesasw/colors.h
new file mode 100644
index 0000000000..9c1f2a0540
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/mesasw/colors.h
@@ -0,0 +1,520 @@
+/*	File name	:	colors.h
+ *  Version		:	2.3
+ *
+ *  Header file for display driver for Mesa 2.3  under 
+ *	Windows95 and WindowsNT 
+ *	This file defines macros and global variables  needed
+ *	for converting color format
+ *
+ *	Copyright (C) 1996-  Li Wei
+ *  Address		:		Institute of Artificial Intelligence
+ *				:			& Robotics
+ *				:		Xi'an Jiaotong University
+ *  Email		:		liwei@aiar.xjtu.edu.cn
+ *  Web page	:		http://sun.aiar.xjtu.edu.cn
+ *
+ *  This file and its associations are partially based on the 
+ *  Windows NT driver for Mesa, written by Mark Leaming
+ *  (mark@rsinc.com).
+ */
+
+/*
+ * Macros for pixel format defined
+ */
+
+/*
+ * Revision 1.1  2004/04/20 11:13:11  alanh
+ * add SciTech's GLDirect driver for Windows.
+ *
+ * This code is donated to Mesa which allows the usage of
+ * a Direct3D layer (DX7, DX8, DX9 or complete software fallback).
+ *
+ * No build system exists for this code yet, that will come.....
+ *
+ * Revision 1.1.1.1  1999/08/19 00:55:42  jtg
+ * Imported sources
+ *
+ * Revision 1.2  1999/01/03 03:08:57  brianp
+ * Ted Jump's changes
+ *
+ * Revision 1.1  1999/01/03 03:08:12  brianp
+ * Initial revision
+ *
+ * Revision 2.0.2  1997/4/30 15:58:00  CST by Li Wei(liwei@aiar.xjtu.edu.cn)
+ * Add LUTs need for dithering
+ */
+
+/*
+ * Revision 1.1  2004/04/20 11:13:11  alanh
+ * add SciTech's GLDirect driver for Windows.
+ *
+ * This code is donated to Mesa which allows the usage of
+ * a Direct3D layer (DX7, DX8, DX9 or complete software fallback).
+ *
+ * No build system exists for this code yet, that will come.....
+ *
+ * Revision 1.1.1.1  1999/08/19 00:55:42  jtg
+ * Imported sources
+ *
+ * Revision 1.2  1999/01/03 03:08:57  brianp
+ * Ted Jump's changes
+ *
+ * Revision 1.1  1999/01/03 03:08:12  brianp
+ * Initial revision
+ *
+ * Revision 2.0.1  1997/4/29 15:52:00  CST by Li Wei(liwei@aiar.xjtu.edu.cn)
+ * Add BGR8 Macro
+ */
+ 
+/*
+ * Revision 1.1  2004/04/20 11:13:11  alanh
+ * add SciTech's GLDirect driver for Windows.
+ *
+ * This code is donated to Mesa which allows the usage of
+ * a Direct3D layer (DX7, DX8, DX9 or complete software fallback).
+ *
+ * No build system exists for this code yet, that will come.....
+ *
+ * Revision 1.1.1.1  1999/08/19 00:55:42  jtg
+ * Imported sources
+ *
+ * Revision 1.2  1999/01/03 03:08:57  brianp
+ * Ted Jump's changes
+ *
+ * Revision 1.1  1999/01/03 03:08:12  brianp
+ * Initial revision
+ *
+ * Revision 2.0  1996/11/15 10:55:00  CST by Li Wei(liwei@aiar.xjtu.edu.cn)
+ * Initial revision
+ */
+/* Values for wmesa->pixelformat: */
+
+#define PF_8A8B8G8R	3	/* 32-bit TrueColor:  8-A, 8-B, 8-G, 8-R */
+#define PF_8R8G8B	4	/* 32-bit TrueColor:  8-R, 8-G, 8-B */
+#define PF_5R6G5B	5	/* 16-bit TrueColor:  5-R, 6-G, 5-B bits */
+#define PF_DITHER8	6	/* Dithered RGB using a lookup table */
+#define PF_LOOKUP	7	/* Undithered RGB using a lookup table */
+#define PF_GRAYSCALE	10	/* Grayscale or StaticGray */
+#define PF_BADFORMAT	11
+#define PF_INDEX8		12
+
+char ColorMap16[] = {
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
+0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,
+0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,
+0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
+0x06,0x06,0x06,0x06,0x06,0x06,0x06,0x06,
+0x07,0x07,0x07,0x07,0x07,0x07,0x07,0x07,
+0x08,0x08,0x08,0x08,0x08,0x08,0x08,0x08,
+0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,
+0x0A,0x0A,0x0A,0x0A,0x0A,0x0A,0x0A,0x0A,
+0x0B,0x0B,0x0B,0x0B,0x0B,0x0B,0x0B,0x0B,
+0x0C,0x0C,0x0C,0x0C,0x0C,0x0C,0x0C,0x0C,
+0x0D,0x0D,0x0D,0x0D,0x0D,0x0D,0x0D,0x0D,
+0x0E,0x0E,0x0E,0x0E,0x0E,0x0E,0x0E,0x0E,
+0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,0x0F,
+0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
+0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,
+0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+0x13,0x13,0x13,0x13,0x13,0x13,0x13,0x13,
+0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14,
+0x15,0x15,0x15,0x15,0x15,0x15,0x15,0x15,
+0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16,
+0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,
+0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,
+0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,
+0x1A,0x1A,0x1A,0x1A,0x1A,0x1A,0x1A,0x1A,
+0x1B,0x1B,0x1B,0x1B,0x1B,0x1B,0x1B,0x1B,
+0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,0x1C,
+0x1D,0x1D,0x1D,0x1D,0x1D,0x1D,0x1D,0x1D,
+0x1E,0x1E,0x1E,0x1E,0x1E,0x1E,0x1E,0x1E,
+0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F};
+
+#define BGR8(r,g,b)		(unsigned)(((BYTE)(b & 0xc0 | (g & 0xe0)>>2 | (r & 0xe0)>>5)))
+#ifdef DDRAW
+#define BGR16(r,g,b)	((WORD)(((BYTE)(ColorMap16[b]) | ((BYTE)(g&0xfc) << 3)) | (((WORD)(BYTE)(ColorMap16[r])) << 11)))
+#else
+#define BGR16(r,g,b)	((WORD)(((BYTE)(ColorMap16[b]) | ((BYTE)(ColorMap16[g]) << 5)) | (((WORD)(BYTE)(ColorMap16[r])) << 10)))
+#endif
+#define BGR24(r,g,b)	(unsigned long)(((DWORD)(((BYTE)(b)|((WORD)((BYTE)(g))<<8))|(((DWORD)(BYTE)(r))<<16))) << 8)
+#define BGR32(r,g,b)	(unsigned long)((DWORD)(((BYTE)(b)|((WORD)((BYTE)(g))<<8))|(((DWORD)(BYTE)(r))<<16)))
+
+
+
+/*
+ * If pixelformat==PF_8A8B8G8R:
+ */
+#define PACK_8A8B8G8R( R, G, B, A )	\
+	( ((A) << 24) | ((B) << 16) | ((G) << 8) | (R) )
+
+
+/*
+ * If pixelformat==PF_8R8G8B:
+ */
+#define PACK_8R8G8B( R, G, B)	 ( ((R) << 16) | ((G) << 8) | (B) )
+
+
+/*
+ * If pixelformat==PF_5R6G5B:
+ */
+
+
+#ifdef DDRAW
+#define PACK_5R6G5B( R, G, B) ((WORD)(((BYTE)(ColorMap16[B]) | ((BYTE)(G&0xfc) << 3)) | (((WORD)(BYTE)(ColorMap16[R])) << 11)))
+#else
+#define PACK_5R6G5B( R, G, B)	((WORD)(((BYTE)(ColorMap16[B]) | ((BYTE)(ColorMap16[G]) << 5)) | (((WORD)(BYTE)(ColorMap16[R])) << 10)))
+#endif
+/*----------------------------------------------------------------------------
+
+Division lookup tables.  These tables compute 0-255 divided by 51 and
+modulo 51.  These tables could approximate gamma correction.
+
+*/
+
+char unsigned const aDividedBy51Rounded[256] =
+{
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+};
+
+char unsigned const aDividedBy51[256] =
+{
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 
+};
+
+char unsigned const aModulo51[256] =
+{
+  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+  20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
+  38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 0, 1, 2, 3, 4, 5, 6,
+  7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+  44, 45, 46, 47, 48, 49, 50, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+  13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+  49, 50, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+  18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+  36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 0, 1, 2, 3,
+  4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+  23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 0, 
+};
+
+/*----------------------------------------------------------------------------
+
+Multiplication LUTs.  These compute 0-5 times 6 and 36.
+
+*/
+
+char unsigned const aTimes6[6] =
+{
+  0, 6, 12, 18, 24, 30
+};
+
+char unsigned const aTimes36[6] =
+{
+  0, 36, 72, 108, 144, 180
+};
+
+
+/*----------------------------------------------------------------------------
+
+Dither matrices for 8 bit to 2.6 bit halftones.
+
+*/
+
+char unsigned const aHalftone16x16[256] =
+{
+  0, 44, 9, 41, 3, 46, 12, 43, 1, 44, 10, 41, 3, 46, 12, 43,
+  34, 16, 25, 19, 37, 18, 28, 21, 35, 16, 26, 19, 37, 18, 28, 21,
+  38, 6, 47, 3, 40, 9, 50, 6, 38, 7, 47, 4, 40, 9, 49, 6,
+  22, 28, 13, 31, 25, 31, 15, 34, 22, 29, 13, 32, 24, 31, 15, 34,
+  2, 46, 12, 43, 1, 45, 10, 42, 2, 45, 11, 42, 1, 45, 11, 42,
+  37, 18, 27, 21, 35, 17, 26, 20, 36, 17, 27, 20, 36, 17, 26, 20,
+  40, 8, 49, 5, 38, 7, 48, 4, 39, 8, 48, 5, 39, 7, 48, 4,
+  24, 30, 15, 33, 23, 29, 13, 32, 23, 30, 14, 33, 23, 29, 14, 32,
+  2, 46, 12, 43, 0, 44, 10, 41, 3, 47, 12, 44, 0, 44, 10, 41,
+  37, 18, 27, 21, 35, 16, 25, 19, 37, 19, 28, 22, 35, 16, 25, 19,
+  40, 9, 49, 5, 38, 7, 47, 4, 40, 9, 50, 6, 38, 6, 47, 3,
+  24, 30, 15, 34, 22, 29, 13, 32, 25, 31, 15, 34, 22, 28, 13, 31,
+  1, 45, 11, 42, 2, 46, 11, 42, 1, 45, 10, 41, 2, 46, 11, 43,
+  36, 17, 26, 20, 36, 17, 27, 21, 35, 16, 26, 20, 36, 18, 27, 21,
+  39, 8, 48, 4, 39, 8, 49, 5, 38, 7, 48, 4, 39, 8, 49, 5,
+  23, 29, 14, 33, 24, 30, 14, 33, 23, 29, 13, 32, 24, 30, 14, 33,
+};
+
+char unsigned const aHalftone8x8[64] =
+{
+   0, 38,  9, 47,  2, 40, 11, 50,
+  25, 12, 35, 22, 27, 15, 37, 24,
+   6, 44,  3, 41,  8, 47,  5, 43,
+  31, 19, 28, 15, 34, 21, 31, 18,
+   1, 39, 11, 49,  0, 39, 10, 48,
+  27, 14, 36, 23, 26, 13, 35, 23,
+   7, 46,  4, 43,  7, 45,  3, 42,
+  33, 20, 30, 17, 32, 19, 29, 16,
+};
+
+char unsigned const aHalftone4x4_1[16] =
+{
+  0, 25, 6, 31,
+  38, 12, 44, 19,
+  9, 35, 3, 28,
+  47, 22, 41, 15
+};
+
+char unsigned const aHalftone4x4_2[16] =
+{
+  41, 3, 9, 28,
+  35, 15, 22, 47,
+  6, 25, 38, 0,
+  19, 44, 31, 12
+};
+
+/***************************************************************************
+  aWinGHalftoneTranslation
+
+  Translates a 2.6 bit-per-pixel halftoned representation into the
+  slightly rearranged WinG Halftone Palette.
+*/
+
+char unsigned const aWinGHalftoneTranslation[216] =
+{
+  0,
+  29,
+  30,
+  31,
+  32,
+  249,
+  33,
+  34,
+  35,
+  36,
+  37,
+  38,
+  39,
+  40,
+  41,
+  42,
+  43,
+  44,
+  45,
+  46,
+  47,
+  48,
+  49,
+  50,
+  51,
+  52,
+  53,
+  54,
+  55,
+  56,
+  250,
+  250,
+  57,
+  58,
+  59,
+  251,
+  60,
+  61,
+  62,
+  63,
+  64,
+  65,
+  66,
+  67,
+  68,
+  69,
+  70,
+  71,
+  72,
+  73,
+  74,
+  75,
+  76,
+  77,
+  78,
+  79,
+  80,
+  81,
+  82,
+  83,
+  84,
+  85,
+  86,
+  87,
+  88,
+  89,
+  250,
+  90,
+  91,
+  92,
+  93,
+  94,
+  95,
+  96,
+  97,
+  98,
+  99,
+  100,
+  101,
+  102,
+  103,
+  104,
+  105,
+  106,
+  107,
+  108,
+  109,
+  110,
+  111,
+  227,
+  112,
+  113,
+  114,
+  115,
+  116,
+  117,
+  118,
+  119,
+  151,
+  120,
+  121,
+  122,
+  123,
+  124,
+  228,
+  125,
+  126,
+  229,
+  133,
+  162,
+  135,
+  131,
+  132,
+  137,
+  166,
+  134,
+  140,
+  130,
+  136,
+  143,
+  138,
+  139,
+  174,
+  141,
+  142,
+  177,
+  129,
+  144,
+  145,
+  146,
+  147,
+  148,
+  149,
+  150,
+  157,
+  152,
+  153,
+  154,
+  155,
+  156,
+  192,
+  158,
+  159,
+  160,
+  161,
+  196,
+  163,
+  164,
+  165,
+  127,
+  199,
+  167,
+  168,
+  169,
+  170,
+  171,
+  172,
+  173,
+  207,
+  175,
+  176,
+  210,
+  178,
+  179,
+  180,
+  181,
+  182,
+  183,
+  184,
+  185,
+  186,
+  187,
+  188,
+  189,
+  190,
+  191,
+  224,
+  193,
+  194,
+  195,
+  252,
+  252,
+  197,
+  198,
+  128,
+  253,
+  252,
+  200,
+  201,
+  202,
+  203,
+  204,
+  205,
+  206,
+  230,
+  208,
+  209,
+  231,
+  211,
+  212,
+  213,
+  214,
+  215,
+  216,
+  217,
+  218,
+  219,
+  220,
+  221,
+  222,
+  254,
+  223,
+  232,
+  225,
+  226,
+  255,
+};
diff --git a/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
new file mode 100644
index 0000000000..f927abfa11
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c
@@ -0,0 +1,1682 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x/2000/XP/XBox (Win32)
+*
+* Description:  Mesa Software WGL (WindowsGL)
+*
+****************************************************************************/
+
+#include <windows.h>
+#define GL_GLEXT_PROTOTYPES
+#include <GL/gl.h>
+#include <GL/glext.h>
+
+#include "glheader.h"
+#include "colors.h"
+#include "context.h"
+#include "colormac.h"
+#include "dd.h"
+#include "depth.h"
+#include "extensions.h"
+#include "macros.h"
+#include "matrix.h"
+// #include "mem.h"
+//#include "mmath.h"
+#include "mtypes.h"
+#include "texformat.h"
+#include "texstore.h"
+#include "teximage.h"
+#include "vbo/vbo.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "swrast/s_context.h"
+#include "swrast/s_depth.h"
+#include "swrast/s_lines.h"
+#include "swrast/s_triangle.h"
+#include "swrast/s_trispan.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+
+#include "dglcontext.h"
+#include "gld_driver.h"
+
+//---------------------------------------------------------------------------
+//---------------------------------------------------------------------------
+
+DGL_pixelFormat pfTemplateMesaSW =
+{
+    {
+	sizeof(PIXELFORMATDESCRIPTOR),	// Size of the data structure
+		1,							// Structure version - should be 1
+									// Flags:
+		PFD_DRAW_TO_WINDOW |		// The buffer can draw to a window or device surface.
+		PFD_DRAW_TO_BITMAP |		// The buffer can draw to a bitmap. (DaveM)
+		PFD_SUPPORT_GDI |			// The buffer supports GDI drawing. (DaveM)
+		PFD_SUPPORT_OPENGL |		// The buffer supports OpenGL drawing.
+		PFD_DOUBLEBUFFER |			// The buffer is double-buffered.
+		0,							// Placeholder for easy commenting of above flags
+		PFD_TYPE_RGBA,				// Pixel type RGBA.
+		32,							// Total colour bitplanes (excluding alpha bitplanes)
+		8, 0,						// Red bits, shift
+		8, 8,						// Green bits, shift
+		8, 16,						// Blue bits, shift
+		8, 24,						// Alpha bits, shift (destination alpha)
+		64,							// Accumulator bits (total)
+		16, 16, 16, 16,				// Accumulator bits: Red, Green, Blue, Alpha
+		16,							// Depth bits
+		8,							// Stencil bits
+		0,							// Number of auxiliary buffers
+		0,							// Layer type
+		0,							// Specifies the number of overlay and underlay planes.
+		0,							// Layer mask
+		0,							// Specifies the transparent color or index of an underlay plane.
+		0							// Damage mask
+	},
+	0,	// Unused
+};
+
+//---------------------------------------------------------------------------
+// Extensions
+//---------------------------------------------------------------------------
+
+typedef struct {
+	PROC proc;
+	char *name;
+}  GLD_extension;
+
+static GLD_extension GLD_extList[] = {
+#ifdef GL_EXT_polygon_offset
+    {	(PROC)glPolygonOffsetEXT,		"glPolygonOffsetEXT"		},
+#endif
+    {	(PROC)glBlendEquationEXT,		"glBlendEquationEXT"		},
+    {	(PROC)glBlendColorEXT,			"glBlendColorExt"			},
+    {	(PROC)glVertexPointerEXT,		"glVertexPointerEXT"		},
+    {	(PROC)glNormalPointerEXT,		"glNormalPointerEXT"		},
+    {	(PROC)glColorPointerEXT,		"glColorPointerEXT"			},
+    {	(PROC)glIndexPointerEXT,		"glIndexPointerEXT"			},
+    {	(PROC)glTexCoordPointerEXT,		"glTexCoordPointer"			},
+    {	(PROC)glEdgeFlagPointerEXT,		"glEdgeFlagPointerEXT"		},
+    {	(PROC)glGetPointervEXT,			"glGetPointervEXT"			},
+    {	(PROC)glArrayElementEXT,		"glArrayElementEXT"			},
+    {	(PROC)glDrawArraysEXT,			"glDrawArrayEXT"			},
+    {	(PROC)glAreTexturesResidentEXT,	"glAreTexturesResidentEXT"	},
+    {	(PROC)glBindTextureEXT,			"glBindTextureEXT"			},
+    {	(PROC)glDeleteTexturesEXT,		"glDeleteTexturesEXT"		},
+    {	(PROC)glGenTexturesEXT,			"glGenTexturesEXT"			},
+    {	(PROC)glIsTextureEXT,			"glIsTextureEXT"			},
+    {	(PROC)glPrioritizeTexturesEXT,	"glPrioritizeTexturesEXT"	},
+    {	(PROC)glCopyTexSubImage3DEXT,	"glCopyTexSubImage3DEXT"	},
+    {	(PROC)glTexImage3DEXT,			"glTexImage3DEXT"			},
+    {	(PROC)glTexSubImage3DEXT,		"glTexSubImage3DEXT"		},
+    {	(PROC)glPointParameterfEXT,		"glPointParameterfEXT"		},
+    {	(PROC)glPointParameterfvEXT,	"glPointParameterfvEXT"		},
+    {	(PROC)glLockArraysEXT,			"glLockArraysEXT"			},
+    {	(PROC)glUnlockArraysEXT,		"glUnlockArraysEXT"			},
+	{	NULL,							"\0"						}
+};
+
+//---------------------------------------------------------------------------
+// WMesa Internal Functions
+//---------------------------------------------------------------------------
+
+#define PAGE_FILE	0xffffffff
+
+#define REDBITS		0x03
+#define REDSHIFT	0x00
+#define GREENBITS	0x03
+#define GREENSHIFT	0x03
+#define BLUEBITS	0x02
+#define BLUESHIFT	0x06
+
+typedef struct _dibSection {
+	HDC 	hDC;
+	HANDLE	hFileMap;
+	BOOL	fFlushed;
+	LPVOID	base;
+} WMDIBSECTION, *PWMDIBSECTION;
+
+typedef struct wmesa_context {
+	HWND				Window;
+	HDC 				hDC;
+	HPALETTE			hPalette;
+	HPALETTE			hOldPalette;
+	HPEN				hPen;
+	HPEN				hOldPen;
+	HCURSOR 			hOldCursor;
+	COLORREF			crColor;
+	// 3D projection stuff
+	RECT				drawRect;
+	UINT				uiDIBoffset;
+	// OpenGL stuff
+	HPALETTE			hGLPalette;
+	GLuint				width;
+	GLuint				height;
+	GLuint				ScanWidth;
+	GLboolean			db_flag;	//* double buffered?
+	GLboolean			rgb_flag;	//* RGB mode?
+	GLboolean			dither_flag;	//* use dither when 256 color mode for RGB?
+	GLuint				depth;		//* bits per pixel (1, 8, 24, etc)
+	ULONG				pixel;	// current color index or RGBA pixel value
+	ULONG				clearpixel; //* pixel for clearing the color buffers
+	PBYTE				ScreenMem; // WinG memory
+	BITMAPINFO			*IndexFormat;
+	HPALETTE			hPal; // Current Palette
+	HPALETTE			hPalHalfTone;
+	
+	
+	WMDIBSECTION		dib;
+	BITMAPINFO			bmi;
+	HBITMAP 			hbmDIB;
+	HBITMAP 			hOldBitmap;
+	HBITMAP 			Old_Compat_BM;
+	HBITMAP 			Compat_BM;			  // Bitmap for double buffering
+	PBYTE				pbPixels;
+	int 				nColors;
+	BYTE				cColorBits;
+	int 				pixelformat;
+	
+	RECT					rectOffScreen;
+	RECT					rectSurface;
+//	HWND					hwnd;
+	DWORD					pitch;
+	PBYTE					addrOffScreen;
+
+	// We always double-buffer, for performance reasons, but
+	// we need to know which of SwapBuffers() or glFlush() to
+	// handle. If we're emulating, then we update on Flush(),
+	// otherwise we update on SwapBufers(). KeithH
+	BOOL				bEmulateSingleBuffer;
+} WMesaContext, *PWMC;
+
+#define GLD_GET_WMESA_DRIVER(c)	(WMesaContext*)(c)->glPriv
+
+// TODO:
+GLint stereo_flag = 0 ;
+
+/* If we are double-buffering, we want to get the DC for the
+ * off-screen DIB, otherwise the DC for the window.
+ */
+#define DD_GETDC ((Current->db_flag) ? Current->dib.hDC : Current->hDC )
+#define DD_RELEASEDC
+
+#define FLIP(Y)  (Current->height-(Y)-1)
+
+struct DISPLAY_OPTIONS {
+	int  stereo;
+	int  fullScreen;
+	int	 mode;
+	int	 bpp;
+};
+
+struct DISPLAY_OPTIONS displayOptions;
+
+//---------------------------------------------------------------------------
+
+static unsigned char threeto8[8] = {
+  0, 0111>>1, 0222>>1, 0333>>1, 0444>>1, 0555>>1, 0666>>1, 0377
+};
+
+static unsigned char twoto8[4] = {
+  0, 0x55, 0xaa, 0xff
+};
+
+static unsigned char oneto8[2] = {
+  0, 255
+};
+
+//---------------------------------------------------------------------------
+
+BYTE DITHER_RGB_2_8BIT( int red, int green, int blue, int pixel, int scanline)
+{
+  char unsigned redtemp, greentemp, bluetemp, paletteindex;
+  
+  //*** now, look up each value in the halftone matrix
+  //*** using an 8x8 ordered dither.
+  redtemp = aDividedBy51[red]
+    + (aModulo51[red] > aHalftone8x8[(pixel%8)*8
+				    + scanline%8]);
+  greentemp = aDividedBy51[(char unsigned)green]
+    + (aModulo51[green] > aHalftone8x8[
+      (pixel%8)*8 + scanline%8]);
+  bluetemp = aDividedBy51[(char unsigned)blue]
+    + (aModulo51[blue] > aHalftone8x8[
+      (pixel%8)*8 +scanline%8]);
+  
+  //*** recombine the halftoned rgb values into a palette index
+  paletteindex =
+    redtemp + aTimes6[greentemp] + aTimes36[bluetemp];
+  
+  //*** and translate through the wing halftone palette
+  //*** translation vector to give the correct value.
+  return aWinGHalftoneTranslation[paletteindex];
+}
+
+//---------------------------------------------------------------------------
+
+static unsigned char componentFromIndex(UCHAR i, UINT nbits, UINT shift)
+{
+  unsigned char val;
+  
+  val = i >> shift;
+  switch (nbits) {
+    
+  case 1:
+    val &= 0x1;
+    return oneto8[val];
+    
+  case 2:
+    val &= 0x3;
+    return twoto8[val];
+    
+  case 3:
+    val &= 0x7;
+    return threeto8[val];
+    
+  default:
+    return 0;
+  }
+}
+
+//---------------------------------------------------------------------------
+
+
+void wmSetPixel(PWMC pwc, int iScanLine, int iPixel, BYTE r, BYTE g, BYTE b)
+{
+	WMesaContext *Current = pwc;
+
+	// Test for invalid scanline parameter. KeithH
+	if ((iScanLine < 0) || (iScanLine >= pwc->height))
+		return;
+
+  if (Current->db_flag) {
+    LPBYTE  lpb = pwc->pbPixels;
+    UINT    nBypp = pwc->cColorBits >> 3;
+    UINT    nOffset = iPixel % nBypp;
+    
+    lpb += pwc->ScanWidth * iScanLine;
+    lpb += iPixel * nBypp;
+    
+    if(nBypp == 1){
+      if(pwc->dither_flag)
+	*lpb = DITHER_RGB_2_8BIT(r,g,b,iScanLine,iPixel);
+      else
+	*lpb = BGR8(r,g,b);
+    }
+    else if(nBypp == 2)
+      *((LPWORD)lpb) = BGR16(r,g,b);
+    else if (nBypp == 3)
+      *((LPDWORD)lpb) = BGR24(r,g,b);
+    else if (nBypp == 4)
+      *((LPDWORD)lpb) = BGR32(r,g,b);
+  }
+  else{
+    SetPixel(Current->hDC, iPixel, iScanLine, RGB(r,g,b));
+  }
+}
+
+//---------------------------------------------------------------------------
+
+void  wmCreateDIBSection(
+  HDC   hDC,
+  PWMC pwc,    // handle of device context
+  CONST BITMAPINFO *pbmi,  // bitmap size, format, and color data
+  UINT iUsage  // color data type indicator: RGB values or palette indices
+  )
+{
+  DWORD   dwSize = 0;
+  DWORD   dwScanWidth;
+  UINT    nBypp = pwc->cColorBits / 8;
+  HDC     hic;
+  
+  dwScanWidth = (((pwc->ScanWidth * nBypp)+ 3) & ~3);
+  
+  pwc->ScanWidth =pwc->pitch = dwScanWidth;
+  
+  if (stereo_flag)
+    pwc->ScanWidth = 2* pwc->pitch;
+  
+  dwSize = sizeof(BITMAPINFO) + (dwScanWidth * pwc->height);
+  
+  pwc->dib.hFileMap = CreateFileMapping((HANDLE)PAGE_FILE,
+					NULL,
+					PAGE_READWRITE | SEC_COMMIT,
+					0,
+					dwSize,
+					NULL);
+  
+  if (!pwc->dib.hFileMap)
+    return;
+  
+  pwc->dib.base = MapViewOfFile(pwc->dib.hFileMap,
+				FILE_MAP_ALL_ACCESS,
+				0,
+				0,
+				0);
+  
+  if(!pwc->dib.base){
+    CloseHandle(pwc->dib.hFileMap);
+    return;
+  }
+  
+
+  CopyMemory(pwc->dib.base, pbmi, sizeof(BITMAPINFO));
+  
+  hic = CreateIC("display", NULL, NULL, NULL);
+  pwc->dib.hDC = CreateCompatibleDC(hic);
+  
+
+  pwc->hbmDIB = CreateDIBSection(hic,
+				 &(pwc->bmi),
+				 (iUsage ? DIB_PAL_COLORS : DIB_RGB_COLORS),
+				 &(pwc->pbPixels),
+				 pwc->dib.hFileMap,
+				 0);
+  pwc->ScreenMem = pwc->addrOffScreen = pwc->pbPixels;
+  pwc->hOldBitmap = SelectObject(pwc->dib.hDC, pwc->hbmDIB);
+  
+  DeleteDC(hic);
+  
+  return;
+  
+}
+
+//---------------------------------------------------------------------------
+
+void wmCreatePalette( PWMC pwdc )
+{
+  /* Create a compressed and re-expanded 3:3:2 palette */
+  int            i;
+  LOGPALETTE     *pPal;
+  BYTE           rb, rs, gb, gs, bb, bs;
+  
+  pwdc->nColors = 0x100;
+  
+  pPal = (PLOGPALETTE)malloc(sizeof(LOGPALETTE) + 
+			     pwdc->nColors * sizeof(PALETTEENTRY));
+  memset( pPal, 0, sizeof(LOGPALETTE) + pwdc->nColors * sizeof(PALETTEENTRY) );
+  
+  pPal->palVersion = 0x300;
+  
+  rb = REDBITS;
+  rs = REDSHIFT;
+  gb = GREENBITS;
+  gs = GREENSHIFT;
+  bb = BLUEBITS;
+  bs = BLUESHIFT;
+  
+  if (pwdc->db_flag) {
+    
+    /* Need to make two palettes: one for the screen DC and one for the DIB. */
+    pPal->palNumEntries = pwdc->nColors;
+    for (i = 0; i < pwdc->nColors; i++) {
+      pPal->palPalEntry[i].peRed = componentFromIndex( i, rb, rs );
+      pPal->palPalEntry[i].peGreen = componentFromIndex( i, gb, gs );
+      pPal->palPalEntry[i].peBlue = componentFromIndex( i, bb, bs );
+      pPal->palPalEntry[i].peFlags = 0;
+    }
+    pwdc->hGLPalette = CreatePalette( pPal );
+    pwdc->hPalette = CreatePalette( pPal );
+  }
+  
+  else {
+    pPal->palNumEntries = pwdc->nColors;
+    for (i = 0; i < pwdc->nColors; i++) {
+      pPal->palPalEntry[i].peRed = componentFromIndex( i, rb, rs );
+      pPal->palPalEntry[i].peGreen = componentFromIndex( i, gb, gs );
+      pPal->palPalEntry[i].peBlue = componentFromIndex( i, bb, bs );
+      pPal->palPalEntry[i].peFlags = 0;
+    }
+    pwdc->hGLPalette = CreatePalette( pPal );
+  }
+  
+  free(pPal);
+  
+}
+
+//---------------------------------------------------------------------------
+
+/* This function sets the color table of a DIB section
+ * to match that of the destination DC
+ */
+BOOL wmSetDibColors(PWMC pwc)
+{
+  RGBQUAD         *pColTab, *pRGB;
+  PALETTEENTRY    *pPal, *pPE;
+  int             i, nColors;
+  BOOL            bRet=TRUE;
+  DWORD           dwErr=0;
+  
+  /* Build a color table in the DIB that maps to the
+   *  selected palette in the DC.
+   */
+  nColors = 1 << pwc->cColorBits;
+  pPal = (PALETTEENTRY *)malloc( nColors * sizeof(PALETTEENTRY));
+  memset( pPal, 0, nColors * sizeof(PALETTEENTRY) );
+  GetPaletteEntries( pwc->hGLPalette, 0, nColors, pPal );
+  pColTab = (RGBQUAD *)malloc( nColors * sizeof(RGBQUAD));
+  for (i = 0, pRGB = pColTab, pPE = pPal; i < nColors; i++, pRGB++, pPE++) {
+    pRGB->rgbRed = pPE->peRed;
+    pRGB->rgbGreen = pPE->peGreen;
+    pRGB->rgbBlue = pPE->peBlue;
+  }
+  if(pwc->db_flag)
+    bRet = SetDIBColorTable(pwc->dib.hDC, 0, nColors, pColTab );
+  
+  if(!bRet)
+    dwErr = GetLastError();
+  
+  free( pColTab );
+  free( pPal );
+  
+  return bRet;
+}
+
+//---------------------------------------------------------------------------
+
+static void wmSetPixelFormat( PWMC wc, HDC hDC)
+{
+  if(wc->rgb_flag)
+    wc->cColorBits = GetDeviceCaps(hDC, BITSPIXEL);
+  else
+    wc->cColorBits = 8;
+  switch(wc->cColorBits){
+  case 8:
+    if(wc->dither_flag != GL_TRUE)
+      wc->pixelformat = PF_INDEX8;
+    else
+      wc->pixelformat = PF_DITHER8;
+    break;
+  case 16:
+    wc->pixelformat = PF_5R6G5B;
+    break;
+  case 32:
+    wc->pixelformat = PF_8R8G8B;
+    break;
+  default:
+    wc->pixelformat = PF_BADFORMAT;
+  }
+}
+
+//---------------------------------------------------------------------------
+
+/*
+ * This function creates the DIB section that is used for combined
+ * GL and GDI calls
+ */
+BOOL wmCreateBackingStore(PWMC pwc, long lxSize, long lySize)
+{
+  HDC hdc = pwc->hDC;
+  LPBITMAPINFO pbmi = &(pwc->bmi);
+  int     iUsage;
+  
+  pbmi->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
+  pbmi->bmiHeader.biWidth = lxSize;
+  pbmi->bmiHeader.biHeight= -lySize;
+  pbmi->bmiHeader.biPlanes = 1;
+  if(pwc->rgb_flag)
+    pbmi->bmiHeader.biBitCount = GetDeviceCaps(pwc->hDC, BITSPIXEL);
+  else
+    pbmi->bmiHeader.biBitCount = 8;
+  pbmi->bmiHeader.biCompression = BI_RGB;
+  pbmi->bmiHeader.biSizeImage = 0;
+  pbmi->bmiHeader.biXPelsPerMeter = 0;
+  pbmi->bmiHeader.biYPelsPerMeter = 0;
+  pbmi->bmiHeader.biClrUsed = 0;
+  pbmi->bmiHeader.biClrImportant = 0;
+  
+  iUsage = (pbmi->bmiHeader.biBitCount <= 8) ? DIB_PAL_COLORS : DIB_RGB_COLORS;
+
+  pwc->cColorBits = pbmi->bmiHeader.biBitCount;
+  pwc->ScanWidth = pwc->pitch = lxSize;
+  pwc->width = lxSize;
+  pwc->height = lySize;
+  
+  wmCreateDIBSection(hdc, pwc, pbmi, iUsage);
+  
+  if ((iUsage == DIB_PAL_COLORS) && !(pwc->hGLPalette)) {
+    wmCreatePalette( pwc );
+    wmSetDibColors( pwc );
+  }
+  wmSetPixelFormat(pwc, pwc->hDC);
+  return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+/*
+ * Free up the dib section that was created
+ */
+BOOL wmDeleteBackingStore(PWMC pwc)
+{
+  SelectObject(pwc->dib.hDC, pwc->hOldBitmap);
+  DeleteDC(pwc->dib.hDC);
+  DeleteObject(pwc->hbmDIB);
+  UnmapViewOfFile(pwc->dib.base);
+  CloseHandle(pwc->dib.hFileMap);
+  return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+/*
+ * Blit memory DC to screen DC
+ */
+BOOL wmFlush(PWMC pwc, HDC hDC)
+{
+  BOOL    bRet = 0;
+  DWORD   dwErr = 0;
+  
+// Now using bEmulateSingleBuffer in the calling function. KeithH
+
+//  if(pwc->db_flag){
+    bRet = BitBlt(hDC, 0, 0, pwc->width, pwc->height,
+		  pwc->dib.hDC, 0, 0, SRCCOPY);
+//  }
+  
+  return bRet;
+  
+}
+
+//---------------------------------------------------------------------------
+// Support Functions
+//---------------------------------------------------------------------------
+
+static void flush(GLcontext* ctx)
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+/*
+  if((Current->rgb_flag &&!(Current->db_flag))
+     ||(!Current->rgb_flag))
+    {
+      wmFlush(Current, Current->hDC);
+    }
+*/
+	// Only flush if we're not in double-buffer mode. KeithH
+	// The demo fractal.c calls glutSwapBuffers() then glFlush()!
+	if (Current->bEmulateSingleBuffer) {
+		wmFlush(Current, Current->hDC);
+	}
+}
+
+
+//---------------------------------------------------------------------------
+
+/*
+ * Set the color used to clear the color buffer.
+ */
+//static void clear_color( GLcontext* ctx, const GLchan color[4] )
+// Changed for Mesa 5.x. KeithH
+static void clear_color(
+	GLcontext* ctx,
+	const GLfloat color[4])
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+	GLubyte col[4];
+	CLAMPED_FLOAT_TO_UBYTE(col[0], color[0]);
+	CLAMPED_FLOAT_TO_UBYTE(col[1], color[1]);
+	CLAMPED_FLOAT_TO_UBYTE(col[2], color[2]);
+	Current->clearpixel = RGB(col[0], col[1], col[2]);
+}
+
+
+//---------------------------------------------------------------------------
+
+
+/*
+ * Clear the specified region of the color buffer using the clear color
+ * or index as specified by one of the two functions above.
+ *
+ * This procedure clears either the front and/or the back COLOR buffers.
+ * Only the "left" buffer is cleared since we are not stereo.
+ * Clearing of the other non-color buffers is left to the swrast.
+ * We also only clear the color buffers if the color masks are all 1's.
+ * Otherwise, we let swrast do it.
+ */
+
+static clear(GLcontext* ctx, GLbitfield mask,
+	     GLboolean all, GLint x, GLint y, GLint width, GLint height)
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  DWORD   dwColor;
+  WORD    wColor;
+  BYTE    bColor;
+  LPDWORD lpdw = (LPDWORD)Current->pbPixels;
+  LPWORD  lpw = (LPWORD)Current->pbPixels;
+  LPBYTE  lpb = Current->pbPixels;
+  int     lines;
+  const   GLuint *colorMask = (GLuint *) &ctx->Color.ColorMask;
+  
+  if (all){
+    x=y=0;
+    width=Current->width;
+    height=Current->height;
+  }
+  
+  
+  /* sanity check - can't have right(stereo) buffers */
+  assert((mask & (DD_FRONT_RIGHT_BIT | DD_BACK_RIGHT_BIT)) == 0);
+  
+  /* clear alpha */
+  if ((mask & (DD_FRONT_LEFT_BIT | DD_BACK_RIGHT_BIT)) &&
+      ctx->DrawBuffer->UseSoftwareAlphaBuffers &&
+      ctx->Color.ColorMask[ACOMP]) {
+      _swrast_clear_alpha_buffers( ctx );
+  }
+  
+  if (*colorMask == 0xffffffff && ctx->Color.IndexMask == 0xffffffff) {
+      if (mask & DD_BACK_LEFT_BIT) {
+	  /* Double-buffering - clear back buffer */
+	  UINT    nBypp = Current->cColorBits / 8;
+	  int     i = 0;
+	  int     iSize = 0;
+	  
+	  assert(Current->db_flag==GL_TRUE); /* we'd better be double buffer */
+	  if(nBypp ==1 ){
+	      iSize = Current->width/4;
+	      bColor  = BGR8(GetRValue(Current->clearpixel),
+			     GetGValue(Current->clearpixel),
+			     GetBValue(Current->clearpixel));
+	      wColor  = MAKEWORD(bColor,bColor);
+	      dwColor = MAKELONG(wColor, wColor);
+	  }
+	  if(nBypp == 2){
+	      iSize = Current->width / 2;
+	      wColor = BGR16(GetRValue(Current->clearpixel),
+			     GetGValue(Current->clearpixel),
+			     GetBValue(Current->clearpixel));
+	      dwColor = MAKELONG(wColor, wColor);
+	  }
+	  else if(nBypp == 4){
+	      iSize = Current->width;
+	      dwColor = BGR32(GetRValue(Current->clearpixel),
+			      GetGValue(Current->clearpixel),
+			      GetBValue(Current->clearpixel));
+	  }
+	  
+	  /* clear a line */
+	  while(i < iSize){
+	      *lpdw = dwColor;
+	      lpdw++;
+	      i++;
+	  }
+	  
+	  /* This is the 24bit case */
+	  if (nBypp == 3) {
+	      iSize = Current->width *3/4;
+	      dwColor = BGR24(GetRValue(Current->clearpixel),
+			      GetGValue(Current->clearpixel),
+			      GetBValue(Current->clearpixel));
+	      while(i < iSize){
+		  *lpdw = dwColor;
+		  lpb += nBypp;
+		  lpdw = (LPDWORD)lpb;
+		  i++;
+	      }
+	  }
+	  
+	  i = 0;
+	  if (stereo_flag)
+	      lines = height /2;
+	  else
+	      lines = height;
+	  /* copy cleared line to other lines in buffer */
+	  do {
+	      memcpy(lpb, Current->pbPixels, iSize*4);
+	      lpb += Current->ScanWidth;
+	      i++;
+	  }
+	  while (i<lines-1);
+	  mask &= ~DD_BACK_LEFT_BIT;
+      } /* double-buffer */
+      
+      if (mask & DD_FRONT_LEFT_BIT) {
+	  /* single-buffer */
+	  HDC DC=DD_GETDC;
+	  HPEN Pen=CreatePen(PS_SOLID,1,Current->clearpixel);
+	  HBRUSH Brush=CreateSolidBrush(Current->clearpixel);
+	  HPEN Old_Pen=SelectObject(DC,Pen);
+	  HBRUSH Old_Brush=SelectObject(DC,Brush);
+	  Rectangle(DC,x,y,x+width,y+height);
+	  SelectObject(DC,Old_Pen);
+	  SelectObject(DC,Old_Brush);
+	  DeleteObject(Pen);
+	  DeleteObject(Brush);
+	  DD_RELEASEDC;
+	  mask &= ~DD_FRONT_LEFT_BIT;
+      } /* single-buffer */
+  } /* if masks are all 1's */
+    
+  /* Call swrast if there is anything left to clear (like DEPTH) */
+  if (mask)
+      _swrast_Clear( ctx, mask, all, x, y, width, height );
+}
+  
+
+//---------------------------------------------------------------------------
+
+
+static void enable( GLcontext* ctx, GLenum pname, GLboolean enable )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+
+	if (!Current)
+		return;
+	
+	if (pname == GL_DITHER) {
+		if(enable == GL_FALSE){
+			Current->dither_flag = GL_FALSE;
+			if(Current->cColorBits == 8)
+				Current->pixelformat = PF_INDEX8;
+		}
+		else{
+			if (Current->rgb_flag && Current->cColorBits == 8){
+				Current->pixelformat = PF_DITHER8;
+				Current->dither_flag = GL_TRUE;
+			}
+			else
+				Current->dither_flag = GL_FALSE;
+		}
+	}
+}
+
+//---------------------------------------------------------------------------
+
+static GLboolean set_draw_buffer( GLcontext* ctx, GLenum mode )
+{
+  /* TODO: this could be better */
+  if (mode==GL_FRONT_LEFT || mode==GL_BACK_LEFT) {
+    return GL_TRUE;
+  }
+  else {
+    return GL_FALSE;
+  }
+}
+
+//---------------------------------------------------------------------------
+
+
+static void set_read_buffer(GLcontext *ctx, GLframebuffer *colorBuffer,
+                            GLenum buffer )
+{
+  /* XXX todo */
+  return;
+}
+
+
+//---------------------------------------------------------------------------
+
+
+/* Return characteristics of the output buffer. */
+//static void buffer_size( GLcontext* ctx, GLuint *width, GLuint *height )
+// Altered for Mesa 5.x. KeithH
+static void buffer_size(
+	GLframebuffer *buffer,
+	GLuint *width,
+	GLuint *height)
+{
+	// For some reason the context is not passed into this function.
+	// Therefore we have to explicitly retrieve it.
+	GET_CURRENT_CONTEXT(ctx);
+
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+	int New_Size;
+	RECT CR;
+	
+	GetClientRect(Current->Window,&CR);
+	
+	*width=CR.right;
+	*height=CR.bottom;
+	
+	New_Size=((*width)!=Current->width) || ((*height)!=Current->height);
+	
+	if (New_Size){
+		Current->width=*width;
+		Current->height=*height;
+		Current->ScanWidth=Current->width;
+		if ((Current->ScanWidth%sizeof(long))!=0)
+			Current->ScanWidth+=(sizeof(long)-(Current->ScanWidth%sizeof(long)));
+		
+		if (Current->db_flag){
+			if (Current->rgb_flag==GL_TRUE && Current->dither_flag!=GL_TRUE){
+				wmDeleteBackingStore(Current);
+				wmCreateBackingStore(Current, Current->width, Current->height);
+			}
+		}
+		
+	}
+}
+
+
+
+/**********************************************************************/
+/*****           Accelerated point, line, polygon rendering       *****/
+/**********************************************************************/
+
+/* Accelerated routines are not implemented in 4.0. See OSMesa for ideas. */
+
+static void fast_rgb_points( GLcontext* ctx, GLuint first, GLuint last )
+{
+}
+
+//---------------------------------------------------------------------------
+
+/* Return pointer to accelerated points function */
+extern tnl_points_func choose_points_function( GLcontext* ctx )
+{
+  return NULL;
+}
+
+//---------------------------------------------------------------------------
+
+static void fast_flat_rgb_line( GLcontext* ctx, GLuint v0, 
+				GLuint v1, GLuint pv )
+{
+}
+
+//---------------------------------------------------------------------------
+
+static tnl_line_func choose_line_function( GLcontext* ctx )
+{
+}
+
+
+/**********************************************************************/
+/*****                 Span-based pixel drawing                   *****/
+/**********************************************************************/
+
+
+/* Write a horizontal span of 32-bit color-index pixels with a boolean mask. */
+static void write_ci32_span( const GLcontext* ctx,
+                             GLuint n, GLint x, GLint y,
+                             const GLuint index[],
+                             const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  PBYTE Mem=Current->ScreenMem+FLIP(y)*Current->ScanWidth+x;
+  assert(Current->rgb_flag==GL_FALSE);
+  for (i=0; i<n; i++)
+    if (mask[i])
+      Mem[i]=index[i];
+}
+
+
+//---------------------------------------------------------------------------
+
+/* Write a horizontal span of 8-bit color-index pixels with a boolean mask. */
+static void write_ci8_span( const GLcontext* ctx,
+                            GLuint n, GLint x, GLint y,
+                            const GLubyte index[],
+                            const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  PBYTE Mem=Current->ScreenMem+FLIP(y)*Current->ScanWidth+x;
+  assert(Current->rgb_flag==GL_FALSE);
+  for (i=0; i<n; i++)
+    if (mask[i])
+      Mem[i]=index[i];
+}
+
+
+//---------------------------------------------------------------------------
+
+
+/*
+ * Write a horizontal span of pixels with a boolean mask.  The current
+ * color index is used for all pixels.
+ */
+static void write_mono_ci_span(const GLcontext* ctx,
+                               GLuint n,GLint x,GLint y,
+                               GLuint colorIndex, const GLubyte mask[])
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  BYTE *Mem=Current->ScreenMem+FLIP(y)*Current->ScanWidth+x;
+  assert(Current->rgb_flag==GL_FALSE);
+  for (i=0; i<n; i++)
+    if (mask[i])
+      Mem[i]=colorIndex;
+}
+
+//---------------------------------------------------------------------------
+
+/*
+ * To improve the performance of this routine, frob the data into an actual
+ * scanline and call bitblt on the complete scan line instead of SetPixel.
+ */
+
+/* Write a horizontal span of RGBA color pixels with a boolean mask. */
+static void write_rgba_span( const GLcontext* ctx, GLuint n, GLint x, GLint y,
+                             const GLubyte rgba[][4], const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  PWMC    pwc = Current;
+  
+  if (pwc->rgb_flag==GL_TRUE)
+    {
+      GLuint i;
+      HDC DC=DD_GETDC;
+      y=FLIP(y);
+      if (mask) {
+	for (i=0; i<n; i++)
+	  if (mask[i])
+	    wmSetPixel(pwc, y, x + i, 
+		       rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+      }
+      else {
+	for (i=0; i<n; i++)
+	  wmSetPixel(pwc, y, x + i, 
+		     rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+      DD_RELEASEDC;
+    }
+  else
+    {
+      GLuint i;
+      BYTE *Mem=Current->ScreenMem+y*Current->ScanWidth+x;
+      y = FLIP(y);
+      if (mask) {
+	for (i=0; i<n; i++)
+	  if (mask[i])
+	    Mem[i] = GetNearestPaletteIndex(Current->hPal, 
+					    RGB(rgba[i][RCOMP], 
+						rgba[i][GCOMP], 
+						rgba[i][BCOMP]));
+      }
+      else {
+	for (i=0; i<n; i++)
+	  Mem[i] = GetNearestPaletteIndex(Current->hPal,
+					  RGB(rgba[i][RCOMP], 
+					      rgba[i][GCOMP], 
+					      rgba[i][BCOMP]));
+      }
+    }
+}
+
+//---------------------------------------------------------------------------
+
+/* Write a horizontal span of RGB color pixels with a boolean mask. */
+static void write_rgb_span( const GLcontext* ctx,
+                            GLuint n, GLint x, GLint y,
+                            const GLubyte rgb[][3], const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  PWMC    pwc = Current;
+  
+  if (pwc->rgb_flag==GL_TRUE)
+    {
+      GLuint i;
+      HDC DC=DD_GETDC;
+      y=FLIP(y);
+      if (mask) {
+	for (i=0; i<n; i++)
+	  if (mask[i])
+	    wmSetPixel(pwc, y, x + i, 
+		       rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+      }
+      else {
+	for (i=0; i<n; i++)
+	  wmSetPixel(pwc, y, x + i, 
+		     rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+      DD_RELEASEDC;
+    }
+  else
+    {
+      GLuint i;
+      BYTE *Mem=Current->ScreenMem+y*Current->ScanWidth+x;
+      y = FLIP(y);
+      if (mask) {
+	for (i=0; i<n; i++)
+	  if (mask[i])
+	    Mem[i] = GetNearestPaletteIndex(Current->hPal,
+					    RGB(rgb[i][RCOMP], 
+						rgb[i][GCOMP], 
+						rgb[i][BCOMP]));
+      }
+      else {
+	for (i=0; i<n; i++)
+	  Mem[i] = GetNearestPaletteIndex(Current->hPal,
+					  RGB(rgb[i][RCOMP], 
+					      rgb[i][GCOMP], 
+					      rgb[i][BCOMP]));
+      }
+    }
+}
+
+//---------------------------------------------------------------------------
+
+/*
+ * Write a horizontal span of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_span( const GLcontext* ctx,
+                                  GLuint n, GLint x, GLint y,
+                                  const GLchan color[4], const GLubyte mask[])
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  ULONG pixel =  RGB( color[RCOMP], color[GCOMP], color[BCOMP] );
+  GLuint i;
+  HDC DC=DD_GETDC;
+  PWMC pwc = Current;
+  assert(Current->rgb_flag==GL_TRUE);
+  y=FLIP(y);
+  if(Current->rgb_flag==GL_TRUE){
+    for (i=0; i<n; i++)
+      if (mask[i])
+	wmSetPixel(pwc,y,x+i,color[RCOMP], color[GCOMP], color[BCOMP]);
+  }
+  else {
+    for (i=0; i<n; i++)
+      if (mask[i])
+	SetPixel(DC, y, x+i, pixel);
+  }
+  DD_RELEASEDC;
+}
+
+
+
+/**********************************************************************/
+/*****                   Array-based pixel drawing                *****/
+/**********************************************************************/
+
+
+/* Write an array of 32-bit index pixels with a boolean mask. */
+static void write_ci32_pixels( const GLcontext* ctx,
+                               GLuint n, const GLint x[], const GLint y[],
+                               const GLuint index[], const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  assert(Current->rgb_flag==GL_FALSE);
+  for (i=0; i<n; i++) {
+    if (mask[i]) {
+      BYTE *Mem=Current->ScreenMem+FLIP(y[i])*Current->ScanWidth+x[i];
+      *Mem = index[i];
+    }
+  }
+}
+
+
+//---------------------------------------------------------------------------
+
+
+/*
+ * Write an array of pixels with a boolean mask.  The current color
+ * index is used for all pixels.
+ */
+static void write_mono_ci_pixels( const GLcontext* ctx,
+                                  GLuint n,
+                                  const GLint x[], const GLint y[],
+                                  GLuint colorIndex, const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  assert(Current->rgb_flag==GL_FALSE);
+  for (i=0; i<n; i++) {
+    if (mask[i]) {
+      BYTE *Mem=Current->ScreenMem+FLIP(y[i])*Current->ScanWidth+x[i];
+      *Mem = colorIndex;
+    }
+  }
+}
+
+
+//---------------------------------------------------------------------------
+
+
+/* Write an array of RGBA pixels with a boolean mask. */
+static void write_rgba_pixels( const GLcontext* ctx,
+                               GLuint n, const GLint x[], const GLint y[],
+                               const GLubyte rgba[][4], const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  PWMC    pwc = Current;
+  HDC DC=DD_GETDC;
+  assert(Current->rgb_flag==GL_TRUE);
+  for (i=0; i<n; i++)
+    if (mask[i])
+      wmSetPixel(pwc, FLIP(y[i]), x[i],
+		 rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+  DD_RELEASEDC;
+}
+
+
+//---------------------------------------------------------------------------
+
+
+/*
+ * Write an array of pixels with a boolean mask.  The current color
+ * is used for all pixels.
+ */
+static void write_mono_rgba_pixels( const GLcontext* ctx,
+                                    GLuint n,
+                                    const GLint x[], const GLint y[],
+                                    const GLchan color[4],
+                                    const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  PWMC    pwc = Current;
+  HDC DC=DD_GETDC;
+  assert(Current->rgb_flag==GL_TRUE);
+  for (i=0; i<n; i++)
+    if (mask[i])
+      wmSetPixel(pwc, FLIP(y[i]),x[i],color[RCOMP],
+		 color[GCOMP], color[BCOMP]);
+  DD_RELEASEDC;
+}
+
+/**********************************************************************/
+/*****            Read spans/arrays of pixels                     *****/
+/**********************************************************************/
+
+/* Read a horizontal span of color-index pixels. */
+static void read_ci32_span( const GLcontext* ctx, GLuint n, GLint x, GLint y,
+                            GLuint index[])
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  BYTE *Mem=Current->ScreenMem+FLIP(y)*Current->ScanWidth+x;
+  assert(Current->rgb_flag==GL_FALSE);
+  for (i=0; i<n; i++)
+    index[i]=Mem[i];
+}
+
+//---------------------------------------------------------------------------
+
+/* Read an array of color index pixels. */
+static void read_ci32_pixels( const GLcontext* ctx,
+                              GLuint n, const GLint x[], const GLint y[],
+                              GLuint indx[], const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  assert(Current->rgb_flag==GL_FALSE);
+  for (i=0; i<n; i++) {
+    if (mask[i]) {
+      indx[i]=*(Current->ScreenMem+FLIP(y[i])*Current->ScanWidth+x[i]);
+    }
+  }
+}
+
+//---------------------------------------------------------------------------
+
+/* Read a horizontal span of color pixels. */
+static void read_rgba_span( const GLcontext* ctx,
+                            GLuint n, GLint x, GLint y,
+                            GLubyte rgba[][4] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  UINT i;
+  COLORREF Color;
+  HDC DC=DD_GETDC;
+  assert(Current->rgb_flag==GL_TRUE);
+  y = Current->height - y - 1;
+  for (i=0; i<n; i++) {
+    Color=GetPixel(DC,x+i,y);
+    rgba[i][RCOMP] = GetRValue(Color);
+    rgba[i][GCOMP] = GetGValue(Color);
+    rgba[i][BCOMP] = GetBValue(Color);
+    rgba[i][ACOMP] = 255;
+  }
+  DD_RELEASEDC;
+}
+
+//---------------------------------------------------------------------------
+
+/* Read an array of color pixels. */
+static void read_rgba_pixels( const GLcontext* ctx,
+                              GLuint n, const GLint x[], const GLint y[],
+                              GLubyte rgba[][4], const GLubyte mask[] )
+{
+	GLD_context *gldCtx = GLD_GET_CONTEXT(ctx);
+	WMesaContext *Current = GLD_GET_WMESA_DRIVER(gldCtx);
+  GLuint i;
+  COLORREF Color;
+  HDC DC=DD_GETDC;
+  assert(Current->rgb_flag==GL_TRUE);
+  for (i=0; i<n; i++) {
+    if (mask[i]) {
+      GLint y2 = Current->height - y[i] - 1;
+      Color=GetPixel(DC,x[i],y2);
+      rgba[i][RCOMP] = GetRValue(Color);
+      rgba[i][GCOMP] = GetGValue(Color);
+      rgba[i][BCOMP] = GetBValue(Color);
+      rgba[i][ACOMP] = 255;
+    }
+  }
+  DD_RELEASEDC;
+}
+
+//---------------------------------------------------------------------------
+
+static void wmesa_update_state(
+	GLcontext *ctx,
+	GLuint new_state)
+{
+    _swrast_InvalidateState( ctx, new_state );
+	_swsetup_InvalidateState( ctx, new_state );
+	_vbo_InvalidateState( ctx, new_state );
+	_tnl_InvalidateState( ctx, new_state );
+}
+
+//---------------------------------------------------------------------------
+
+static void wmesa_viewport(
+	GLcontext *ctx,
+	GLint x,
+	GLint y,
+	GLsizei w,
+	GLsizei h)
+{
+//	ctx->Driver.ResizeBuffersMESA(ctx);
+}
+
+//---------------------------------------------------------------------------
+
+static void wmesa_update_state_first_time(
+	GLcontext *ctx,
+	GLuint new_state)
+{
+	struct swrast_device_driver	*swdd = _swrast_GetDeviceDriverReference( ctx );
+	TNLcontext					*tnl = TNL_CONTEXT(ctx);
+	
+        _mesa_init_driver_functions(&ctx->Driver);
+
+	/*
+	* XXX these function pointers could be initialized just once during
+	* context creation since they don't depend on any state changes.
+	* kws - This is true - this function gets called a lot and it
+	* would be good to minimize setting all this when not needed.
+	*/
+	// Good idea, so I'll do it. KeithH. :-)
+
+	ctx->Driver.GetString				= _gldGetStringGeneric;
+	ctx->Driver.UpdateState				= wmesa_update_state;
+	ctx->Driver.DrawBuffer				= set_draw_buffer;
+	ctx->Driver.ResizeBuffers			= _swrast_alloc_buffers;
+	ctx->Driver.GetBufferSize			= buffer_size;
+
+	ctx->Driver.Viewport				= wmesa_viewport;
+	
+	ctx->Driver.Clear					= clear;
+	
+	ctx->Driver.Flush					= flush;
+	ctx->Driver.ClearColor				= clear_color;
+	ctx->Driver.Enable					= enable;
+	
+	
+	// Does not apply for Mesa 5.x
+	//ctx->Driver.BaseCompressedTexFormat	= _mesa_base_compressed_texformat;
+	//ctx->Driver.CompressedTextureSize	= _mesa_compressed_texture_size;
+	//ctx->Driver.GetCompressedTexImage	= _mesa_get_compressed_teximage;
+	
+	swdd->SetBuffer					= set_read_buffer;
+	
+	
+	/* Pixel/span writing functions: */
+	swdd->WriteRGBASpan        = write_rgba_span;
+	swdd->WriteRGBSpan         = write_rgb_span;
+	swdd->WriteMonoRGBASpan    = write_mono_rgba_span;
+	swdd->WriteRGBAPixels      = write_rgba_pixels;
+	swdd->WriteMonoRGBAPixels  = write_mono_rgba_pixels;
+	swdd->WriteCI32Span        = write_ci32_span;
+	swdd->WriteCI8Span         = write_ci8_span;
+	swdd->WriteMonoCISpan      = write_mono_ci_span;
+	swdd->WriteCI32Pixels      = write_ci32_pixels;
+	swdd->WriteMonoCIPixels    = write_mono_ci_pixels;
+	
+	swdd->ReadCI32Span        = read_ci32_span;
+	swdd->ReadRGBASpan        = read_rgba_span;
+	swdd->ReadCI32Pixels      = read_ci32_pixels;
+	swdd->ReadRGBAPixels      = read_rgba_pixels;
+	
+	
+	tnl->Driver.RunPipeline = _tnl_run_pipeline;
+	
+	wmesa_update_state(ctx, new_state);
+}
+
+//---------------------------------------------------------------------------
+// Driver interface functions
+//---------------------------------------------------------------------------
+
+BOOL gldCreateDrawable_MesaSW(
+	DGL_ctx *pCtx,
+	BOOL bPersistantInterface,
+	BOOL bPersistantBuffers)
+{
+	WMesaContext *c;
+	GLboolean true_color_flag;
+	GLboolean rgb_flag = GL_TRUE;
+	GLboolean db_flag = GL_TRUE;
+
+	if (pCtx == NULL)
+		return FALSE;
+
+	c = (struct wmesa_context * ) calloc(1,sizeof(struct wmesa_context));
+	if (!c)
+		return FALSE;
+
+	pCtx->glPriv = c;
+
+	c->hDC		= pCtx->hDC;
+	c->Window	= pCtx->hWnd;
+
+	true_color_flag = GetDeviceCaps(pCtx->hDC, BITSPIXEL) > 8;
+	
+	
+#ifdef DITHER
+	if ((true_color_flag==GL_FALSE) && (rgb_flag == GL_TRUE)){
+		c->dither_flag = GL_TRUE;
+		c->hPalHalfTone = WinGCreateHalftonePalette();
+	}
+	else
+		c->dither_flag = GL_FALSE;
+#else
+	c->dither_flag = GL_FALSE;
+#endif
+	
+	
+	if (rgb_flag==GL_FALSE)
+    {
+		c->rgb_flag = GL_FALSE;
+#if 0
+		/* Old WinG stuff???? */
+		c->db_flag = db_flag =GL_TRUE; /* WinG requires double buffering */
+		printf("Single buffer is not supported in color index mode, ",
+			"setting to double buffer.\n");
+#endif
+    }
+	else
+    {
+		c->rgb_flag = GL_TRUE;
+    }
+
+//	db_flag = pCtx->lpPF->pfd.dwFlags & PFD_DOUBLEBUFFER ? GL_TRUE : GL_FALSE;
+	db_flag = GL_TRUE; // Force double-buffer
+	if (db_flag) {
+		c->db_flag = 1;
+		/* Double buffered */
+		{
+			wmCreateBackingStore(c, pCtx->dwWidth, pCtx->dwHeight);
+			
+		}
+    } else {
+		/* Single Buffered */
+		if (c->rgb_flag)
+			c->db_flag = 0;
+    }	
+
+	c->bEmulateSingleBuffer = (pCtx->lpPF->pfd.dwFlags & PFD_DOUBLEBUFFER)
+		? FALSE : TRUE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldResizeDrawable_MesaSW(
+	DGL_ctx *ctx,
+	BOOL bDefaultDriver,
+	BOOL bPersistantInterface,
+	BOOL bPersistantBuffers)
+{
+	WMesaContext *c;
+
+	if (ctx == NULL)
+		return FALSE;
+
+	c = ctx->glPriv;
+	if (c == NULL)
+		return FALSE;
+
+	c->hDC = ctx->hDC;
+	c->Window = ctx->hWnd;
+//	c->width = ctx->dwWidth;
+//	c->height = ctx->dwHeight;
+
+	if (c->db_flag) {
+		wmDeleteBackingStore(c);
+		wmCreateBackingStore(c, ctx->dwWidth, ctx->dwHeight);
+	}
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyDrawable_MesaSW(
+	DGL_ctx *ctx)
+{
+	WMesaContext *c;
+
+	if (ctx == NULL)
+		return FALSE;
+
+	c = ctx->glPriv;
+	if (c == NULL)
+		return FALSE;
+
+	if (c->hPalHalfTone != NULL)
+		DeleteObject(c->hPalHalfTone);
+    
+	if (c->db_flag)
+		wmDeleteBackingStore(c);
+
+	free(c);
+
+	ctx->glPriv = NULL;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldCreatePrivateGlobals_MesaSW(void)
+{
+	// Mesa Software driver needs no private globals
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldDestroyPrivateGlobals_MesaSW(void)
+{
+	// Mesa Software driver needs no private globals
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldBuildPixelformatList_MesaSW(void)
+{
+	// Release any existing pixelformat list
+	if (glb.lpPF) {
+		free(glb.lpPF);
+	}
+
+	glb.nPixelFormatCount	= 0;
+	glb.lpPF				= NULL;
+
+	glb.lpPF = (DGL_pixelFormat *)calloc(2, sizeof(DGL_pixelFormat));
+	if (glb.lpPF == NULL)
+		return FALSE;
+	// Single-buffered
+	memcpy(&glb.lpPF[0], &pfTemplateMesaSW, sizeof(DGL_pixelFormat));
+	glb.lpPF[0].pfd.dwFlags &= ~PFD_DOUBLEBUFFER; // Remove doublebuffer flag
+	// Double-buffered
+	memcpy(&glb.lpPF[1], &pfTemplateMesaSW, sizeof(DGL_pixelFormat));
+	glb.nPixelFormatCount = 2;
+
+	// Mark list as 'current'
+	glb.bPixelformatsDirty = FALSE;
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldInitialiseMesa_MesaSW(
+	DGL_ctx *gld)
+{
+	GLcontext *ctx;
+
+	if (gld == NULL)
+		return FALSE;
+
+	ctx = gld->glCtx;
+
+	// Set max texture size to 256
+	ctx->Const.MaxTextureLevels = 8;
+
+	// Multitexture enable/disable
+	ctx->Const.MaxTextureUnits = (glb.bMultitexture) ? MAX_TEXTURE_UNITS : 1;
+
+	/* Initialize the software rasterizer and helper modules.*/
+
+	// Added this to force max texture diminsion to 256. KeithH
+	ctx->Const.MaxTextureLevels = 8;
+	ctx->Const.MaxDrawBuffers = 1;
+
+	_mesa_enable_sw_extensions(ctx);
+	_mesa_enable_imaging_extensions(ctx);
+	_mesa_enable_1_3_extensions(ctx);
+	
+//	_swrast_CreateContext( ctx );
+//	_vbo_CreateContext( ctx );
+//	_tnl_CreateContext( ctx );
+//	_swsetup_CreateContext( ctx );
+	
+	_swsetup_Wakeup( ctx );
+	
+	wmesa_update_state_first_time(ctx, ~0);
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldSwapBuffers_MesaSW(
+	DGL_ctx *ctx,
+	HDC hDC,
+	HWND hWnd)
+{
+	WMesaContext *c;
+
+	if (ctx == NULL)
+		return FALSE;
+
+	c = ctx->glPriv;
+	if (c == NULL)
+		return FALSE;
+	
+	/* If we're swapping the buffer associated with the current context
+	* we have to flush any pending rendering commands first.
+	*/
+
+	// Altered to respect bEmulateSingleBuffer. KeithH
+//	if (c->db_flag)
+	if (!c->bEmulateSingleBuffer)
+		wmFlush(c, hDC);
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
+PROC gldGetProcAddress_MesaSW(
+	LPCSTR a)
+{
+	int		i;
+	PROC	proc = NULL;
+
+	for (i=0; GLD_extList[i].proc; i++) {
+		if (!strcmp(a, GLD_extList[i].name)) {
+			proc = GLD_extList[i].proc;
+			break;
+		}
+	}
+
+	gldLogPrintf(GLDLOG_INFO, "GetProcAddress: %s (%s)", a, proc ? "OK" : "Failed");
+
+	return proc;
+}
+
+//---------------------------------------------------------------------------
+
+BOOL gldGetDisplayMode_MesaSW(
+	DGL_ctx *ctx,
+	GLD_displayMode *glddm)
+{
+	HDC hdcDesktop;
+
+	if (glddm == NULL)
+		return FALSE;
+
+	//
+	// A bit hacky... KeithH
+	//
+
+	hdcDesktop = GetDC(NULL);
+	glddm->Width	= GetDeviceCaps(hdcDesktop, HORZRES);
+	glddm->Height	= GetDeviceCaps(hdcDesktop, VERTRES);
+	glddm->BPP		= GetDeviceCaps(hdcDesktop, BITSPIXEL);
+	glddm->Refresh	= 0;
+	ReleaseDC(0, hdcDesktop);
+
+	return TRUE;
+}
+
+//---------------------------------------------------------------------------
+
diff --git a/src/mesa/drivers/windows/gldirect/opengl32.def b/src/mesa/drivers/windows/gldirect/opengl32.def
new file mode 100644
index 0000000000..b213b6e047
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/opengl32.def
@@ -0,0 +1,488 @@
+;****************************************************************************
+;*
+;*                        Mesa 3-D graphics library
+;*                        Direct3D Driver Interface
+;*
+;*  ========================================================================
+;*
+;*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+;*
+;*   Permission is hereby granted, free of charge, to any person obtaining a
+;*   copy of this software and associated documentation files (the "Software"),
+;*   to deal in the Software without restriction, including without limitation
+;*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+;*   and/or sell copies of the Software, and to permit persons to whom the
+;*   Software is furnished to do so, subject to the following conditions:
+;*
+;*   The above copyright notice and this permission notice shall be included
+;*   in all copies or substantial portions of the Software.
+;*
+;*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+;*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+;*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+;*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+;*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+;*   SOFTWARE.
+;*
+;*  ======================================================================
+;*
+;* Language:     ANSI C
+;* Environment:  Windows 9x/2000/XP/XBox (Win32)
+;*
+;* Description:  DLL Module definition file
+;*
+;****************************************************************************/
+
+DESCRIPTION 'GLDirect'
+
+VERSION 3.0
+
+EXPORTS
+    glAccum=glAccum @1
+    glAlphaFunc=glAlphaFunc @2
+    glAreTexturesResident=glAreTexturesResident @3
+    glArrayElement=glArrayElement @4
+    glBegin=glBegin @5
+    glBindTexture=glBindTexture @6
+    glBitmap=glBitmap @7
+    glBlendFunc=glBlendFunc @8
+    glCallList=glCallList @9
+    glCallLists=glCallLists @10
+    glClear=glClear @11
+    glClearAccum=glClearAccum @12
+    glClearIndex=glClearIndex @13
+    glClearColor=glClearColor @14
+    glClearDepth=glClearDepth @15
+    glClearStencil=glClearStencil @16
+    glClipPlane=glClipPlane @17
+    glColor3b=glColor3b @18
+    glColor3d=glColor3d @19
+    glColor3f=glColor3f @20
+    glColor3i=glColor3i @21
+    glColor3s=glColor3s @22
+    glColor3ub=glColor3ub @23
+    glColor3ui=glColor3ui @24
+    glColor3us=glColor3us @25
+    glColor4b=glColor4b @26
+    glColor4d=glColor4d @27
+    glColor4f=glColor4f @28
+    glColor4i=glColor4i @29
+    glColor4s=glColor4s @30
+    glColor4ub=glColor4ub @31
+    glColor4ui=glColor4ui @32
+    glColor4us=glColor4us @33
+    glColor3bv=glColor3bv @34
+    glColor3dv=glColor3dv @35
+    glColor3fv=glColor3fv @36
+    glColor3iv=glColor3iv @37
+    glColor3sv=glColor3sv @38
+    glColor3ubv=glColor3ubv @39
+    glColor3uiv=glColor3uiv @40
+    glColor3usv=glColor3usv @41
+    glColor4bv=glColor4bv @42
+    glColor4dv=glColor4dv @43
+    glColor4fv=glColor4fv @44
+    glColor4iv=glColor4iv @45
+    glColor4sv=glColor4sv @46
+    glColor4ubv=glColor4ubv @47
+    glColor4uiv=glColor4uiv @48
+    glColor4usv=glColor4usv @49
+    glColorMask=glColorMask @50
+    glColorMaterial=glColorMaterial @51
+    glColorPointer=glColorPointer @52
+    glColorTableEXT=glColorTableEXT @53
+    glColorSubTableEXT=glColorSubTableEXT @54
+    glCopyPixels=glCopyPixels @55
+    glCopyTexImage1D=glCopyTexImage1D @56
+    glCopyTexImage2D=glCopyTexImage2D @57
+    glCopyTexSubImage1D=glCopyTexSubImage1D @58
+    glCopyTexSubImage2D=glCopyTexSubImage2D @59
+    glCullFace=glCullFace @60
+    glDepthFunc=glDepthFunc @61
+    glDepthMask=glDepthMask @62
+    glDepthRange=glDepthRange @63
+    glDeleteLists=glDeleteLists @64
+    glDeleteTextures=glDeleteTextures @65
+    glDisable=glDisable @66
+    glDisableClientState=glDisableClientState @67
+    glDrawArrays=glDrawArrays @68
+    glDrawBuffer=glDrawBuffer @69
+    glDrawElements=glDrawElements @70
+    glDrawPixels=glDrawPixels @71
+    glEnable=glEnable @72
+    glEnableClientState=glEnableClientState @73
+    glEnd=glEnd @74
+    glEndList=glEndList @75
+    glEvalCoord1d=glEvalCoord1d @76
+    glEvalCoord1f=glEvalCoord1f @77
+    glEvalCoord1dv=glEvalCoord1dv @78
+    glEvalCoord1fv=glEvalCoord1fv @79
+    glEvalCoord2d=glEvalCoord2d @80
+    glEvalCoord2f=glEvalCoord2f @81
+    glEvalCoord2dv=glEvalCoord2dv @82
+    glEvalCoord2fv=glEvalCoord2fv @83
+    glEvalPoint1=glEvalPoint1 @84
+    glEvalPoint2=glEvalPoint2 @85
+    glEvalMesh1=glEvalMesh1 @86
+    glEdgeFlag=glEdgeFlag @87
+    glEdgeFlagv=glEdgeFlagv @88
+    glEdgeFlagPointer=glEdgeFlagPointer @89
+    glEvalMesh2=glEvalMesh2 @90
+    glFeedbackBuffer=glFeedbackBuffer @91
+    glFinish=glFinish @92
+    glFlush=glFlush @93
+    glFogf=glFogf @94
+    glFogi=glFogi @95
+    glFogfv=glFogfv @96
+    glFogiv=glFogiv @97
+    glFrontFace=glFrontFace @98
+    glFrustum=glFrustum @99
+    glGenLists=glGenLists @100
+    glGenTextures=glGenTextures @101
+    glGetBooleanv=glGetBooleanv @102
+    glGetClipPlane=glGetClipPlane @103
+    glGetColorTableEXT=glGetColorTableEXT @104
+    glGetColorTableParameterivEXT=glGetColorTableParameterivEXT @105
+    glGetColorTableParameterfvEXT=glGetColorTableParameterfvEXT @106
+    glGetDoublev=glGetDoublev @107
+    glGetError=glGetError @108
+    glGetFloatv=glGetFloatv @109
+    glGetIntegerv=glGetIntegerv @110
+    glGetLightfv=glGetLightfv @111
+    glGetLightiv=glGetLightiv @112
+    glGetMapdv=glGetMapdv @113
+    glGetMapfv=glGetMapfv @114
+    glGetMapiv=glGetMapiv @115
+    glGetMaterialfv=glGetMaterialfv @116
+    glGetMaterialiv=glGetMaterialiv @117
+    glGetPixelMapfv=glGetPixelMapfv @118
+    glGetPixelMapuiv=glGetPixelMapuiv @119
+    glGetPixelMapusv=glGetPixelMapusv @120
+    glGetPointerv=glGetPointerv @121
+    glGetPolygonStipple=glGetPolygonStipple @122
+    glGetString=glGetString @123
+    glGetTexEnvfv=glGetTexEnvfv @124
+    glGetTexEnviv=glGetTexEnviv @125
+    glGetTexGeniv=glGetTexGeniv @126
+    glGetTexGendv=glGetTexGendv @127
+    glGetTexGenfv=glGetTexGenfv @128
+    glGetTexImage=glGetTexImage @129
+    glGetTexLevelParameterfv=glGetTexLevelParameterfv @130
+    glGetTexLevelParameteriv=glGetTexLevelParameteriv @131
+    glGetTexParameterfv=glGetTexParameterfv @132
+    glGetTexParameteriv=glGetTexParameteriv @133
+    glHint=glHint @134
+    glIndexd=glIndexd @135
+    glIndexf=glIndexf @136
+    glIndexi=glIndexi @137
+    glIndexs=glIndexs @138
+    glIndexub=glIndexub @139
+    glIndexdv=glIndexdv @140
+    glIndexfv=glIndexfv @141
+    glIndexiv=glIndexiv @142
+    glIndexsv=glIndexsv @143
+    glIndexubv=glIndexubv @144
+    glIndexMask=glIndexMask @145
+    glIndexPointer=glIndexPointer @146
+    glInterleavedArrays=glInterleavedArrays @147
+    glInitNames=glInitNames @148
+    glIsList=glIsList @149
+    glIsTexture=glIsTexture @150
+    glLightf=glLightf @151
+    glLighti=glLighti @152
+    glLightfv=glLightfv @153
+    glLightiv=glLightiv @154
+    glLightModelf=glLightModelf @155
+    glLightModeli=glLightModeli @156
+    glLightModelfv=glLightModelfv @157
+    glLightModeliv=glLightModeliv @158
+    glLineWidth=glLineWidth @159
+    glLineStipple=glLineStipple @160
+    glListBase=glListBase @161
+    glLoadIdentity=glLoadIdentity @162
+    glLoadMatrixd=glLoadMatrixd @163
+    glLoadMatrixf=glLoadMatrixf @164
+    glLoadName=glLoadName @165
+    glLogicOp=glLogicOp @166
+    glMap1d=glMap1d @167
+    glMap1f=glMap1f @168
+    glMap2d=glMap2d @169
+    glMap2f=glMap2f @170
+    glMapGrid1d=glMapGrid1d @171
+    glMapGrid1f=glMapGrid1f @172
+    glMapGrid2d=glMapGrid2d @173
+    glMapGrid2f=glMapGrid2f @174
+    glMaterialf=glMaterialf @175
+    glMateriali=glMateriali @176
+    glMaterialfv=glMaterialfv @177
+    glMaterialiv=glMaterialiv @178
+    glMatrixMode=glMatrixMode @179
+    glMultMatrixd=glMultMatrixd @180
+    glMultMatrixf=glMultMatrixf @181
+    glNewList=glNewList @182
+    glNormal3b=glNormal3b @183
+    glNormal3d=glNormal3d @184
+    glNormal3f=glNormal3f @185
+    glNormal3i=glNormal3i @186
+    glNormal3s=glNormal3s @187
+    glNormal3bv=glNormal3bv @188
+    glNormal3dv=glNormal3dv @189
+    glNormal3fv=glNormal3fv @190
+    glNormal3iv=glNormal3iv @191
+    glNormal3sv=glNormal3sv @192
+    glNormalPointer=glNormalPointer @193
+    glOrtho=glOrtho @194
+    glPassThrough=glPassThrough @195
+    glPixelMapfv=glPixelMapfv @196
+    glPixelMapuiv=glPixelMapuiv @197
+    glPixelMapusv=glPixelMapusv @198
+    glPixelStoref=glPixelStoref @199
+    glPixelStorei=glPixelStorei @200
+    glPixelTransferf=glPixelTransferf @201
+    glPixelTransferi=glPixelTransferi @202
+    glPixelZoom=glPixelZoom @203
+    glPointSize=glPointSize @204
+    glPolygonMode=glPolygonMode @205
+    glPolygonOffset=glPolygonOffset @206
+    glPolygonOffsetEXT=glPolygonOffsetEXT @207
+    glPolygonStipple=glPolygonStipple @208
+    glPopAttrib=glPopAttrib @209
+    glPopClientAttrib=glPopClientAttrib @210
+    glPopMatrix=glPopMatrix @211
+    glPopName=glPopName @212
+    glPrioritizeTextures=glPrioritizeTextures @213
+    glPushMatrix=glPushMatrix @214
+    glRasterPos2d=glRasterPos2d @215
+    glRasterPos2f=glRasterPos2f @216
+    glRasterPos2i=glRasterPos2i @217
+    glRasterPos2s=glRasterPos2s @218
+    glRasterPos3d=glRasterPos3d @219
+    glRasterPos3f=glRasterPos3f @220
+    glRasterPos3i=glRasterPos3i @221
+    glRasterPos3s=glRasterPos3s @222
+    glRasterPos4d=glRasterPos4d @223
+    glRasterPos4f=glRasterPos4f @224
+    glRasterPos4i=glRasterPos4i @225
+    glRasterPos4s=glRasterPos4s @226
+    glRasterPos2dv=glRasterPos2dv @227
+    glRasterPos2fv=glRasterPos2fv @228
+    glRasterPos2iv=glRasterPos2iv @229
+    glRasterPos2sv=glRasterPos2sv @230
+    glRasterPos3dv=glRasterPos3dv @231
+    glRasterPos3fv=glRasterPos3fv @232
+    glRasterPos3iv=glRasterPos3iv @233
+    glRasterPos3sv=glRasterPos3sv @234
+    glRasterPos4dv=glRasterPos4dv @235
+    glRasterPos4fv=glRasterPos4fv @236
+    glRasterPos4iv=glRasterPos4iv @237
+    glRasterPos4sv=glRasterPos4sv @238
+    glReadBuffer=glReadBuffer @239
+    glReadPixels=glReadPixels @240
+    glRectd=glRectd @241
+    glRectf=glRectf @242
+    glRecti=glRecti @243
+    glRects=glRects @244
+    glRectdv=glRectdv @245
+    glRectfv=glRectfv @246
+    glRectiv=glRectiv @247
+    glRectsv=glRectsv @248
+    glScissor=glScissor @249
+    glIsEnabled=glIsEnabled @250
+    glPushAttrib=glPushAttrib @251
+    glPushClientAttrib=glPushClientAttrib @252
+    glPushName=glPushName @253
+    glRenderMode=glRenderMode @254
+    glRotated=glRotated @255
+    glRotatef=glRotatef @256
+    glSelectBuffer=glSelectBuffer @257
+    glScaled=glScaled @258
+    glScalef=glScalef @259
+    glShadeModel=glShadeModel @260
+    glStencilFunc=glStencilFunc @261
+    glStencilMask=glStencilMask @262
+    glStencilOp=glStencilOp @263
+    glTexCoord1d=glTexCoord1d @264
+    glTexCoord1f=glTexCoord1f @265
+    glTexCoord1i=glTexCoord1i @266
+    glTexCoord1s=glTexCoord1s @267
+    glTexCoord2d=glTexCoord2d @268
+    glTexCoord2f=glTexCoord2f @269
+    glTexCoord2i=glTexCoord2i @270
+    glTexCoord2s=glTexCoord2s @271
+    glTexCoord3d=glTexCoord3d @272
+    glTexCoord3f=glTexCoord3f @273
+    glTexCoord3i=glTexCoord3i @274
+    glTexCoord3s=glTexCoord3s @275
+    glTexCoord4d=glTexCoord4d @276
+    glTexCoord4f=glTexCoord4f @277
+    glTexCoord4i=glTexCoord4i @278
+    glTexCoord4s=glTexCoord4s @279
+    glTexCoord1dv=glTexCoord1dv @280
+    glTexCoord1fv=glTexCoord1fv @281
+    glTexCoord1iv=glTexCoord1iv @282
+    glTexCoord1sv=glTexCoord1sv @283
+    glTexCoord2dv=glTexCoord2dv @284
+    glTexCoord2fv=glTexCoord2fv @285
+    glTexCoord2iv=glTexCoord2iv @286
+    glTexCoord2sv=glTexCoord2sv @287
+    glTexCoord3dv=glTexCoord3dv @288
+    glTexCoord3fv=glTexCoord3fv @289
+    glTexCoord3iv=glTexCoord3iv @290
+    glTexCoord3sv=glTexCoord3sv @291
+    glTexCoord4dv=glTexCoord4dv @292
+    glTexCoord4fv=glTexCoord4fv @293
+    glTexCoord4iv=glTexCoord4iv @294
+    glTexCoord4sv=glTexCoord4sv @295
+    glTexCoordPointer=glTexCoordPointer @296
+    glTexGend=glTexGend @297
+    glTexGenf=glTexGenf @298
+    glTexGeni=glTexGeni @299
+    glTexGendv=glTexGendv @300
+    glTexGeniv=glTexGeniv @301
+    glTexGenfv=glTexGenfv @302
+    glTexEnvf=glTexEnvf @303
+    glTexEnvi=glTexEnvi @304
+    glTexEnvfv=glTexEnvfv @305
+    glTexEnviv=glTexEnviv @306
+    glTexImage1D=glTexImage1D @307
+    glTexImage2D=glTexImage2D @308
+    glTexParameterf=glTexParameterf @309
+    glTexParameteri=glTexParameteri @310
+    glTexParameterfv=glTexParameterfv @311
+    glTexParameteriv=glTexParameteriv @312
+    glTexSubImage1D=glTexSubImage1D @313
+    glTexSubImage2D=glTexSubImage2D @314
+    glTranslated=glTranslated @315
+    glTranslatef=glTranslatef @316
+    glVertex2d=glVertex2d @317
+    glVertex2f=glVertex2f @318
+    glVertex2i=glVertex2i @319
+    glVertex2s=glVertex2s @320
+    glVertex3d=glVertex3d @321
+    glVertex3f=glVertex3f @322
+    glVertex3i=glVertex3i @323
+    glVertex3s=glVertex3s @324
+    glVertex4d=glVertex4d @325
+    glVertex4f=glVertex4f @326
+    glVertex4i=glVertex4i @327
+    glVertex4s=glVertex4s @328
+    glVertex2dv=glVertex2dv @329
+    glVertex2fv=glVertex2fv @330
+    glVertex2iv=glVertex2iv @331
+    glVertex2sv=glVertex2sv @332
+    glVertex3dv=glVertex3dv @333
+    glVertex3fv=glVertex3fv @334
+    glVertex3iv=glVertex3iv @335
+    glVertex3sv=glVertex3sv @336
+    glVertex4dv=glVertex4dv @337
+    glVertex4fv=glVertex4fv @338
+    glVertex4iv=glVertex4iv @339
+    glVertex4sv=glVertex4sv @340
+    glVertexPointer=glVertexPointer @341
+    glViewport=glViewport @342
+    glBlendEquationEXT=glBlendEquationEXT @343
+    glBlendColorEXT=glBlendColorEXT @344
+    glVertexPointerEXT=glVertexPointerEXT @345
+    glNormalPointerEXT=glNormalPointerEXT @346
+    glColorPointerEXT=glColorPointerEXT @347
+    glIndexPointerEXT=glIndexPointerEXT @348
+    glTexCoordPointerEXT=glTexCoordPointerEXT @349
+    glEdgeFlagPointerEXT=glEdgeFlagPointerEXT @350
+    glGetPointervEXT=glGetPointervEXT @351
+    glArrayElementEXT=glArrayElementEXT @352
+    glDrawArraysEXT=glDrawArraysEXT @353
+    glBindTextureEXT=glBindTextureEXT @354
+    glDeleteTexturesEXT=glDeleteTexturesEXT @355
+    glGenTexturesEXT=glGenTexturesEXT @356
+    glPrioritizeTexturesEXT=glPrioritizeTexturesEXT @357
+    glCopyTexSubImage3DEXT=glCopyTexSubImage3DEXT @358
+    glTexImage3DEXT=glTexImage3DEXT @359
+    glTexSubImage3DEXT=glTexSubImage3DEXT @360
+    glWindowPos4fMESA=glWindowPos4fMESA @361
+    glWindowPos2iMESA=glWindowPos2iMESA @362
+    glWindowPos2sMESA=glWindowPos2sMESA @363
+    glWindowPos2fMESA=glWindowPos2fMESA @364
+    glWindowPos2dMESA=glWindowPos2dMESA @365
+    glWindowPos2ivMESA=glWindowPos2ivMESA @366
+    glWindowPos2svMESA=glWindowPos2svMESA @367
+    glWindowPos2fvMESA=glWindowPos2fvMESA @368
+    glWindowPos2dvMESA=glWindowPos2dvMESA @369
+    glWindowPos3iMESA=glWindowPos3iMESA @370
+    glWindowPos3sMESA=glWindowPos3sMESA @371
+    glWindowPos3fMESA=glWindowPos3fMESA @372
+    glWindowPos3dMESA=glWindowPos3dMESA @373
+    glWindowPos3ivMESA=glWindowPos3ivMESA @374
+    glWindowPos3svMESA=glWindowPos3svMESA @375
+    glWindowPos3fvMESA=glWindowPos3fvMESA @376
+    glWindowPos3dvMESA=glWindowPos3dvMESA @377
+    glWindowPos4iMESA=glWindowPos4iMESA @378
+    glWindowPos4sMESA=glWindowPos4sMESA @379
+    glWindowPos4dMESA=glWindowPos4dMESA @380
+    glWindowPos4ivMESA=glWindowPos4ivMESA @381
+    glWindowPos4svMESA=glWindowPos4svMESA @382
+    glWindowPos4fvMESA=glWindowPos4fvMESA @383
+    glWindowPos4dvMESA=glWindowPos4dvMESA @384
+    glResizeBuffersMESA=glResizeBuffersMESA @385
+    wglCopyContext=wglCopyContext @386
+    wglCreateContext=wglCreateContext @387
+    wglCreateLayerContext=wglCreateLayerContext @388
+    wglDeleteContext=wglDeleteContext @389
+    wglDescribeLayerPlane=wglDescribeLayerPlane @390
+    wglGetCurrentContext=wglGetCurrentContext @391
+    wglGetCurrentDC=wglGetCurrentDC @392
+    wglGetLayerPaletteEntries=wglGetLayerPaletteEntries @393
+    wglGetProcAddress=wglGetProcAddress @394
+    wglMakeCurrent=wglMakeCurrent @395
+    wglRealizeLayerPalette=wglRealizeLayerPalette @396
+    wglSetLayerPaletteEntries=wglSetLayerPaletteEntries @397
+    wglShareLists=wglShareLists @398
+    wglSwapLayerBuffers=wglSwapLayerBuffers @399
+    wglUseFontBitmapsA=wglUseFontBitmapsA @400
+    wglUseFontBitmapsW=wglUseFontBitmapsW @401
+    wglUseFontOutlinesA=wglUseFontOutlinesA @402
+    wglUseFontOutlinesW=wglUseFontOutlinesW @403
+    ChoosePixelFormat=ChoosePixelFormat @404
+    DescribePixelFormat=DescribePixelFormat @405
+    GetPixelFormat=GetPixelFormat @406
+    SetPixelFormat=SetPixelFormat @407
+    SwapBuffers=SwapBuffers @408
+    wglChoosePixelFormat=wglChoosePixelFormat @409
+    wglDescribePixelFormat=wglDescribePixelFormat @410
+    wglGetPixelFormat=wglGetPixelFormat @411
+    wglSetPixelFormat=wglSetPixelFormat @412
+    wglSwapBuffers=wglSwapBuffers @413
+    glActiveTextureARB=glActiveTextureARB @414
+    glClientActiveTextureARB=glClientActiveTextureARB @415
+    glMultiTexCoord1dARB=glMultiTexCoord1dARB @416
+    glMultiTexCoord1dvARB=glMultiTexCoord1dvARB @417
+    glMultiTexCoord1fARB=glMultiTexCoord1fARB @418
+    glMultiTexCoord1fvARB=glMultiTexCoord1fvARB @419
+    glMultiTexCoord1iARB=glMultiTexCoord1iARB @420
+    glMultiTexCoord1ivARB=glMultiTexCoord1ivARB @421
+    glMultiTexCoord1sARB=glMultiTexCoord1sARB @422
+    glMultiTexCoord1svARB=glMultiTexCoord1svARB @423
+    glMultiTexCoord2dARB=glMultiTexCoord2dARB @424
+    glMultiTexCoord2dvARB=glMultiTexCoord2dvARB @425
+    glMultiTexCoord2fARB=glMultiTexCoord2fARB @426
+    glMultiTexCoord2fvARB=glMultiTexCoord2fvARB @427
+    glMultiTexCoord2iARB=glMultiTexCoord2iARB @428
+    glMultiTexCoord2ivARB=glMultiTexCoord2ivARB @429
+    glMultiTexCoord2sARB=glMultiTexCoord2sARB @430
+    glMultiTexCoord2svARB=glMultiTexCoord2svARB @431
+    glMultiTexCoord3dARB=glMultiTexCoord3dARB @432
+    glMultiTexCoord3dvARB=glMultiTexCoord3dvARB @433
+    glMultiTexCoord3fARB=glMultiTexCoord3fARB @434
+    glMultiTexCoord3fvARB=glMultiTexCoord3fvARB @435
+    glMultiTexCoord3iARB=glMultiTexCoord3iARB @436
+    glMultiTexCoord3ivARB=glMultiTexCoord3ivARB @437
+    glMultiTexCoord3sARB=glMultiTexCoord3sARB @438
+    glMultiTexCoord3svARB=glMultiTexCoord3svARB @439
+    glMultiTexCoord4dARB=glMultiTexCoord4dARB @440
+    glMultiTexCoord4dvARB=glMultiTexCoord4dvARB @441
+    glMultiTexCoord4fARB=glMultiTexCoord4fARB @442
+    glMultiTexCoord4fvARB=glMultiTexCoord4fvARB @443
+    glMultiTexCoord4iARB=glMultiTexCoord4iARB @444
+    glMultiTexCoord4ivARB=glMultiTexCoord4ivARB @445
+    glMultiTexCoord4sARB=glMultiTexCoord4sARB @446
+    glMultiTexCoord4svARB=glMultiTexCoord4svARB @447
diff --git a/src/mesa/drivers/windows/gldirect/opengl32.ref b/src/mesa/drivers/windows/gldirect/opengl32.ref
new file mode 100644
index 0000000000..2f71faf216
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/opengl32.ref
@@ -0,0 +1,495 @@
+;****************************************************************************
+;*
+;*                        Mesa 3-D graphics library
+;*                        Direct3D Driver Interface
+;*
+;*  ========================================================================
+;*
+;*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+;*
+;*   Permission is hereby granted, free of charge, to any person obtaining a
+;*   copy of this software and associated documentation files (the "Software"),
+;*   to deal in the Software without restriction, including without limitation
+;*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+;*   and/or sell copies of the Software, and to permit persons to whom the
+;*   Software is furnished to do so, subject to the following conditions:
+;*
+;*   The above copyright notice and this permission notice shall be included
+;*   in all copies or substantial portions of the Software.
+;*
+;*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+;*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+;*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+;*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+;*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+;*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+;*   SOFTWARE.
+;*
+;*  ======================================================================
+;*
+;* Language:     ANSI C
+;* Environment:  Windows 9x/2000/XP/XBox (Win32)
+;*
+;* Description:  DLL Module definition file
+;*
+;****************************************************************************/
+
+DESCRIPTION 'GLDirect'
+
+VERSION 3.0
+
+EXPORTS
+ glAccum
+ glAlphaFunc
+ glAreTexturesResident
+ glArrayElement
+ glBegin
+ glBindTexture
+ glBitmap
+ glBlendFunc
+ glCallList
+ glCallLists
+ glClear
+ glClearAccum
+ glClearIndex
+ glClearColor
+ glClearDepth
+ glClearStencil
+ glClipPlane
+ glColor3b
+ glColor3d
+ glColor3f
+ glColor3i
+ glColor3s
+ glColor3ub
+ glColor3ui
+ glColor3us
+ glColor4b
+ glColor4d
+ glColor4f
+ glColor4i
+ glColor4s
+ glColor4ub
+ glColor4ui
+ glColor4us
+ glColor3bv
+ glColor3dv
+ glColor3fv
+ glColor3iv
+ glColor3sv
+ glColor3ubv
+ glColor3uiv
+ glColor3usv
+ glColor4bv
+ glColor4dv
+ glColor4fv
+ glColor4iv
+ glColor4sv
+ glColor4ubv
+ glColor4uiv
+ glColor4usv
+ glColorMask
+ glColorMaterial
+ glColorPointer
+ glColorTableEXT
+ glColorSubTableEXT
+ glCopyPixels
+ glCopyTexImage1D
+ glCopyTexImage2D
+ glCopyTexSubImage1D
+ glCopyTexSubImage2D
+ glCullFace
+ glDepthFunc
+ glDepthMask
+ glDepthRange
+ glDeleteLists
+ glDeleteTextures
+ glDisable
+ glDisableClientState
+ glDrawArrays
+ glDrawBuffer
+ glDrawElements
+ glDrawPixels
+ glEnable
+ glEnableClientState
+ glEnd
+ glEndList
+ glEvalCoord1d
+ glEvalCoord1f
+ glEvalCoord1dv
+ glEvalCoord1fv
+ glEvalCoord2d
+ glEvalCoord2f
+ glEvalCoord2dv
+ glEvalCoord2fv
+ glEvalPoint1
+ glEvalPoint2
+ glEvalMesh1
+ glEdgeFlag
+ glEdgeFlagv
+ glEdgeFlagPointer
+ glEvalMesh2
+ glFeedbackBuffer
+ glFinish
+ glFlush
+ glFogf
+ glFogi
+ glFogfv
+ glFogiv
+ glFrontFace
+ glFrustum
+ glGenLists
+ glGenTextures
+ glGetBooleanv
+ glGetClipPlane
+ glGetColorTableEXT
+ glGetColorTableParameterivEXT
+ glGetColorTableParameterfvEXT
+ glGetDoublev
+ glGetError
+ glGetFloatv
+ glGetIntegerv
+ glGetLightfv
+ glGetLightiv
+ glGetMapdv
+ glGetMapfv
+ glGetMapiv
+ glGetMaterialfv
+ glGetMaterialiv
+ glGetPixelMapfv
+ glGetPixelMapuiv
+ glGetPixelMapusv
+ glGetPointerv
+ glGetPolygonStipple
+ glGetString
+ glGetTexEnvfv
+ glGetTexEnviv
+ glGetTexGeniv
+ glGetTexGendv
+ glGetTexGenfv
+ glGetTexImage
+ glGetTexLevelParameterfv
+ glGetTexLevelParameteriv
+ glGetTexParameterfv
+ glGetTexParameteriv
+ glHint
+ glIndexd
+ glIndexf
+ glIndexi
+ glIndexs
+ glIndexub
+ glIndexdv
+ glIndexfv
+ glIndexiv
+ glIndexsv
+ glIndexubv
+ glIndexMask
+ glIndexPointer
+ glInterleavedArrays
+ glInitNames
+ glIsList
+ glIsTexture
+ glLightf
+ glLighti
+ glLightfv
+ glLightiv
+ glLightModelf
+ glLightModeli
+ glLightModelfv
+ glLightModeliv
+ glLineWidth
+ glLineStipple
+ glListBase
+ glLoadIdentity
+ glLoadMatrixd
+ glLoadMatrixf
+ glLoadName
+ glLogicOp
+ glMap1d
+ glMap1f
+ glMap2d
+ glMap2f
+ glMapGrid1d
+ glMapGrid1f
+ glMapGrid2d
+ glMapGrid2f
+ glMaterialf
+ glMateriali
+ glMaterialfv
+ glMaterialiv
+ glMatrixMode
+ glMultMatrixd
+ glMultMatrixf
+ glNewList
+ glNormal3b
+ glNormal3d
+ glNormal3f
+ glNormal3i
+ glNormal3s
+ glNormal3bv
+ glNormal3dv
+ glNormal3fv
+ glNormal3iv
+ glNormal3sv
+ glNormalPointer
+ glOrtho
+ glPassThrough
+ glPixelMapfv
+ glPixelMapuiv
+ glPixelMapusv
+ glPixelStoref
+ glPixelStorei
+ glPixelTransferf
+ glPixelTransferi
+ glPixelZoom
+ glPointSize
+ glPolygonMode
+ glPolygonOffset
+ glPolygonOffsetEXT
+ glPolygonStipple
+ glPopAttrib
+ glPopClientAttrib
+ glPopMatrix
+ glPopName
+ glPrioritizeTextures
+ glPushMatrix
+ glRasterPos2d
+ glRasterPos2f
+ glRasterPos2i
+ glRasterPos2s
+ glRasterPos3d
+ glRasterPos3f
+ glRasterPos3i
+ glRasterPos3s
+ glRasterPos4d
+ glRasterPos4f
+ glRasterPos4i
+ glRasterPos4s
+ glRasterPos2dv
+ glRasterPos2fv
+ glRasterPos2iv
+ glRasterPos2sv
+ glRasterPos3dv
+ glRasterPos3fv
+ glRasterPos3iv
+ glRasterPos3sv
+ glRasterPos4dv
+ glRasterPos4fv
+ glRasterPos4iv
+ glRasterPos4sv
+ glReadBuffer
+ glReadPixels
+ glRectd
+ glRectf
+ glRecti
+ glRects
+ glRectdv
+ glRectfv
+ glRectiv
+ glRectsv
+ glScissor
+ glIsEnabled
+ glPushAttrib
+ glPushClientAttrib
+ glPushName
+ glRenderMode
+ glRotated
+ glRotatef
+ glSelectBuffer
+ glScaled
+ glScalef
+ glShadeModel
+ glStencilFunc
+ glStencilMask
+ glStencilOp
+ glTexCoord1d
+ glTexCoord1f
+ glTexCoord1i
+ glTexCoord1s
+ glTexCoord2d
+ glTexCoord2f
+ glTexCoord2i
+ glTexCoord2s
+ glTexCoord3d
+ glTexCoord3f
+ glTexCoord3i
+ glTexCoord3s
+ glTexCoord4d
+ glTexCoord4f
+ glTexCoord4i
+ glTexCoord4s
+ glTexCoord1dv
+ glTexCoord1fv
+ glTexCoord1iv
+ glTexCoord1sv
+ glTexCoord2dv
+ glTexCoord2fv
+ glTexCoord2iv
+ glTexCoord2sv
+ glTexCoord3dv
+ glTexCoord3fv
+ glTexCoord3iv
+ glTexCoord3sv
+ glTexCoord4dv
+ glTexCoord4fv
+ glTexCoord4iv
+ glTexCoord4sv
+ glTexCoordPointer
+ glTexGend
+ glTexGenf
+ glTexGeni
+ glTexGendv
+ glTexGeniv
+ glTexGenfv
+ glTexEnvf
+ glTexEnvi
+ glTexEnvfv
+ glTexEnviv
+ glTexImage1D
+ glTexImage2D
+ glTexParameterf
+ glTexParameteri
+ glTexParameterfv
+ glTexParameteriv
+ glTexSubImage1D
+ glTexSubImage2D
+ glTranslated
+ glTranslatef
+ glVertex2d
+ glVertex2f
+ glVertex2i
+ glVertex2s
+ glVertex3d
+ glVertex3f
+ glVertex3i
+ glVertex3s
+ glVertex4d
+ glVertex4f
+ glVertex4i
+ glVertex4s
+ glVertex2dv
+ glVertex2fv
+ glVertex2iv
+ glVertex2sv
+ glVertex3dv
+ glVertex3fv
+ glVertex3iv
+ glVertex3sv
+ glVertex4dv
+ glVertex4fv
+ glVertex4iv
+ glVertex4sv
+ glVertexPointer
+ glViewport
+
+ glBlendEquationEXT
+ glBlendColorEXT
+ glVertexPointerEXT
+ glNormalPointerEXT
+ glColorPointerEXT
+ glIndexPointerEXT
+ glTexCoordPointerEXT
+ glEdgeFlagPointerEXT
+ glGetPointervEXT
+ glArrayElementEXT
+ glDrawArraysEXT
+ glBindTextureEXT
+ glDeleteTexturesEXT
+ glGenTexturesEXT
+ glPrioritizeTexturesEXT
+ glCopyTexSubImage3DEXT
+ glTexImage3DEXT
+ glTexSubImage3DEXT
+
+ glWindowPos4fMESA
+ glWindowPos2iMESA
+ glWindowPos2sMESA
+ glWindowPos2fMESA
+ glWindowPos2dMESA
+ glWindowPos2ivMESA
+ glWindowPos2svMESA
+ glWindowPos2fvMESA
+ glWindowPos2dvMESA
+ glWindowPos3iMESA
+ glWindowPos3sMESA
+ glWindowPos3fMESA
+ glWindowPos3dMESA
+ glWindowPos3ivMESA
+ glWindowPos3svMESA
+ glWindowPos3fvMESA
+ glWindowPos3dvMESA
+ glWindowPos4iMESA
+ glWindowPos4sMESA
+ glWindowPos4dMESA
+ glWindowPos4ivMESA
+ glWindowPos4svMESA
+ glWindowPos4fvMESA
+ glWindowPos4dvMESA
+ glResizeBuffersMESA
+
+ wglCopyContext
+ wglCreateContext
+ wglCreateLayerContext
+ wglDeleteContext
+ wglDescribeLayerPlane
+ wglGetCurrentContext
+ wglGetCurrentDC
+ wglGetLayerPaletteEntries
+ wglGetProcAddress
+ wglMakeCurrent
+ wglRealizeLayerPalette
+ wglSetLayerPaletteEntries
+ wglShareLists
+ wglSwapLayerBuffers
+ wglUseFontBitmapsA
+ wglUseFontBitmapsW
+ wglUseFontOutlinesA
+ wglUseFontOutlinesW
+
+;These functions are identical and therefore share the same addresses
+ ChoosePixelFormat = wglChoosePixelFormat
+ DescribePixelFormat = wglDescribePixelFormat
+ GetPixelFormat = wglGetPixelFormat
+ SetPixelFormat = wglSetPixelFormat 
+ SwapBuffers = wglSwapBuffers
+
+ wglChoosePixelFormat
+ wglDescribePixelFormat
+ wglGetPixelFormat
+ wglSetPixelFormat
+ wglSwapBuffers
+
+ glActiveTextureARB
+ glClientActiveTextureARB
+ glMultiTexCoord1dARB
+ glMultiTexCoord1dvARB
+ glMultiTexCoord1fARB
+ glMultiTexCoord1fvARB
+ glMultiTexCoord1iARB
+ glMultiTexCoord1ivARB
+ glMultiTexCoord1sARB
+ glMultiTexCoord1svARB
+ glMultiTexCoord2dARB
+ glMultiTexCoord2dvARB
+ glMultiTexCoord2fARB
+ glMultiTexCoord2fvARB
+ glMultiTexCoord2iARB
+ glMultiTexCoord2ivARB
+ glMultiTexCoord2sARB
+ glMultiTexCoord2svARB
+ glMultiTexCoord3dARB
+ glMultiTexCoord3dvARB
+ glMultiTexCoord3fARB
+ glMultiTexCoord3fvARB
+ glMultiTexCoord3iARB
+ glMultiTexCoord3ivARB
+ glMultiTexCoord3sARB
+ glMultiTexCoord3svARB
+ glMultiTexCoord4dARB
+ glMultiTexCoord4dvARB
+ glMultiTexCoord4fARB
+ glMultiTexCoord4fvARB
+ glMultiTexCoord4iARB
+ glMultiTexCoord4ivARB
+ glMultiTexCoord4sARB
+ glMultiTexCoord4svARB
diff --git a/src/mesa/drivers/windows/gldirect/pixpack.h b/src/mesa/drivers/windows/gldirect/pixpack.h
new file mode 100644
index 0000000000..ec848d455b
--- /dev/null
+++ b/src/mesa/drivers/windows/gldirect/pixpack.h
@@ -0,0 +1,108 @@
+/****************************************************************************
+*
+*                        Mesa 3-D graphics library
+*                        Direct3D Driver Interface
+*
+*  ========================================================================
+*
+*   Copyright (C) 1991-2004 SciTech Software, Inc. All rights reserved.
+*
+*   Permission is hereby granted, free of charge, to any person obtaining a
+*   copy of this software and associated documentation files (the "Software"),
+*   to deal in the Software without restriction, including without limitation
+*   the rights to use, copy, modify, merge, publish, distribute, sublicense,
+*   and/or sell copies of the Software, and to permit persons to whom the
+*   Software is furnished to do so, subject to the following conditions:
+*
+*   The above copyright notice and this permission notice shall be included
+*   in all copies or substantial portions of the Software.
+*
+*   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+*   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+*   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+*   SCITECH SOFTWARE INC BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+*   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+*   OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+*   SOFTWARE.
+*
+*  ======================================================================
+*
+* Language:     ANSI C
+* Environment:  Windows 9x (Win32)
+*
+* Description:  Pixel packing functions.
+*
+****************************************************************************/
+
+#ifndef __PIXPACK_H
+#define __PIXPACK_H
+
+#include <GL\gl.h>
+#include <ddraw.h>
+
+#include "ddlog.h"
+
+/*---------------------- Macros and type definitions ----------------------*/
+
+#define PXAPI
+
+// Typedef that can be used for pixel packing function pointers.
+#define PX_PACK_FUNC(a) void PXAPI (a)(unsigned char *pixdata, void *dst, GLenum Format, const LPDDSURFACEDESC2 lpDDSD2)
+typedef void (PXAPI *PX_packFunc)(unsigned char *pixdata, void *dst, GLenum Format, const LPDDSURFACEDESC2 lpDDSD2);
+
+// Typedef that can be used for pixel unpacking function pointers.
+#define PX_UNPACK_FUNC(a) void PXAPI (a)(unsigned char *pixdata, void *src, GLenum Format, const LPDDSURFACEDESC2 lpDDSD2)
+typedef void (PXAPI *PX_unpackFunc)(unsigned char *pixdata, void *src, GLenum Format, const LPDDSURFACEDESC2 lpDDSD2);
+
+// Typedef that can be used for pixel span packing function pointers.
+#define PX_PACK_SPAN_FUNC(a) void PXAPI (a)(GLuint n, unsigned char *pixdata, unsigned char *dst, GLenum Format, const LPDDSURFACEDESC2 lpDDSD2)
+typedef void (PXAPI *PX_packSpanFunc)(GLuint n, unsigned char *pixdata, unsigned char *dst, GLenum Format, const LPDDSURFACEDESC2 lpDDSD2);
+
+/*------------------------- Function Prototypes ---------------------------*/
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+// Function that examines a pixel format and returns the relevent
+// pixel-packing function
+void PXAPI pxClassifyPixelFormat(const LPDDPIXELFORMAT lpddpf, PX_packFunc *lpPackFn ,PX_unpackFunc *lpUnpackFn, PX_packSpanFunc *lpPackSpanFn);
+
+// Packing functions
+PX_PACK_FUNC(pxPackGeneric);
+PX_PACK_FUNC(pxPackRGB555);
+PX_PACK_FUNC(pxPackARGB4444);
+PX_PACK_FUNC(pxPackARGB1555);
+PX_PACK_FUNC(pxPackRGB565);
+PX_PACK_FUNC(pxPackRGB332);
+PX_PACK_FUNC(pxPackRGB888);
+PX_PACK_FUNC(pxPackARGB8888);
+PX_PACK_FUNC(pxPackPAL8);
+
+// Unpacking functions
+PX_UNPACK_FUNC(pxUnpackGeneric);
+PX_UNPACK_FUNC(pxUnpackRGB555);
+PX_UNPACK_FUNC(pxUnpackARGB4444);
+PX_UNPACK_FUNC(pxUnpackARGB1555);
+PX_UNPACK_FUNC(pxUnpackRGB565);
+PX_UNPACK_FUNC(pxUnpackRGB332);
+PX_UNPACK_FUNC(pxUnpackRGB888);
+PX_UNPACK_FUNC(pxUnpackARGB8888);
+PX_UNPACK_FUNC(pxUnpackPAL8);
+
+// Span Packing functions
+PX_PACK_SPAN_FUNC(pxPackSpanGeneric);
+PX_PACK_SPAN_FUNC(pxPackSpanRGB555);
+PX_PACK_SPAN_FUNC(pxPackSpanARGB4444);
+PX_PACK_SPAN_FUNC(pxPackSpanARGB1555);
+PX_PACK_SPAN_FUNC(pxPackSpanRGB565);
+PX_PACK_SPAN_FUNC(pxPackSpanRGB332);
+PX_PACK_SPAN_FUNC(pxPackSpanRGB888);
+PX_PACK_SPAN_FUNC(pxPackSpanARGB8888);
+PX_PACK_SPAN_FUNC(pxPackSpanPAL8);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/mesa/drivers/windows/icd/icd.c b/src/mesa/drivers/windows/icd/icd.c
new file mode 100644
index 0000000000..4bc6176b1a
--- /dev/null
+++ b/src/mesa/drivers/windows/icd/icd.c
@@ -0,0 +1,347 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.1
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * File name: icd.c
+ * Author:    Gregor Anich
+ *
+ * ICD (Installable Client Driver) interface.
+ * Based on the windows GDI/WGL driver.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <windows.h>
+#define GL_GLEXT_PROTOTYPES
+#include "GL/gl.h"
+#include "GL/glext.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#include <stdio.h>
+#include <tchar.h>
+#include "GL/wmesa.h"
+#include "mtypes.h"
+#include "glapi.h"
+
+#define MAX_MESA_ATTRS	20
+
+typedef struct wmesa_context *PWMC;
+
+typedef struct _icdTable {
+    DWORD size;
+    PROC  table[336];
+} ICDTABLE, *PICDTABLE;
+
+#ifdef USE_MGL_NAMESPACE
+# define GL_FUNC(func) mgl##func
+#else
+# define GL_FUNC(func) gl##func
+#endif
+
+static ICDTABLE icdTable = { 336, {
+#define ICD_ENTRY(func) (PROC)GL_FUNC(func),
+#include "icdlist.h"
+#undef ICD_ENTRY
+} };
+
+struct __pixelformat__
+{
+    PIXELFORMATDESCRIPTOR	pfd;
+    GLboolean doubleBuffered;
+};
+
+struct __pixelformat__	pix[] =
+{
+    /* Double Buffer, alpha */
+    {	{	sizeof(PIXELFORMATDESCRIPTOR),	1,
+        PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL|PFD_DOUBLEBUFFER|PFD_SWAP_COPY,
+        PFD_TYPE_RGBA,
+        24,	8,	0,	8,	8,	8,	16,	8,	24,
+        0,	0,	0,	0,	0,	16,	8,	0,	0,	0,	0,	0,	0 },
+        GL_TRUE
+    },
+    /* Single Buffer, alpha */
+    {	{	sizeof(PIXELFORMATDESCRIPTOR),	1,
+        PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL, /* | PFD_SUPPORT_GDI ? */
+        PFD_TYPE_RGBA,
+        24,	8,	0,	8,	8,	8,	16,	8,	24,
+        0,	0,	0,	0,	0,	16,	8,	0,	0,	0,	0,	0,	0 },
+        GL_FALSE
+    },
+    /* Double Buffer, no alpha */
+    {	{	sizeof(PIXELFORMATDESCRIPTOR),	1,
+        PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL|PFD_DOUBLEBUFFER|PFD_SWAP_COPY,
+        PFD_TYPE_RGBA,
+        24,	8,	0,	8,	8,	8,	16,	0,	0,
+        0,	0,	0,	0,	0,	16,	8,	0,	0,	0,	0,	0,	0 },
+        GL_TRUE
+    },
+    /* Single Buffer, no alpha */
+    {	{	sizeof(PIXELFORMATDESCRIPTOR),	1,
+        PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL, /* | PFD_SUPPORT_GDI ? */
+        PFD_TYPE_RGBA,
+        24,	8,	0,	8,	8,	8,	16,	0,	0,
+        0,	0,	0,	0,	0,	16,	8,	0,	0,	0,	0,	0,	0 },
+        GL_FALSE
+    },
+};
+
+int qt_pix = sizeof(pix) / sizeof(pix[0]);
+
+typedef struct {
+    WMesaContext ctx;
+    HDC hdc;
+} MesaWglCtx;
+
+#define MESAWGL_CTX_MAX_COUNT 20
+
+static MesaWglCtx wgl_ctx[MESAWGL_CTX_MAX_COUNT];
+
+static unsigned ctx_count = 0;
+static int ctx_current = -1;
+static unsigned curPFD = 0;
+
+WGLAPI BOOL GLAPIENTRY DrvCopyContext(HGLRC hglrcSrc,HGLRC hglrcDst,UINT mask)
+{
+    (void) hglrcSrc; (void) hglrcDst; (void) mask;
+    return(FALSE);
+}
+
+WGLAPI HGLRC GLAPIENTRY DrvCreateContext(HDC hdc)
+{
+    HWND		hWnd;
+    int i = 0;
+
+    if(!(hWnd = WindowFromDC(hdc)))
+    {
+        SetLastError(0);
+        return(NULL);
+    }
+    if (!ctx_count)
+    {
+    	for(i=0;i<MESAWGL_CTX_MAX_COUNT;i++)
+    	{
+    		wgl_ctx[i].ctx = NULL;
+    		wgl_ctx[i].hdc = NULL;
+    	}
+    }
+    for( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ )
+    {
+        if ( wgl_ctx[i].ctx == NULL )
+        {
+            wgl_ctx[i].ctx = WMesaCreateContext( hWnd, NULL, GL_TRUE,
+                pix[curPFD-1].doubleBuffered, 
+                pix[curPFD-1].pfd.cAlphaBits ? GL_TRUE : GL_FALSE);
+            if (wgl_ctx[i].ctx == NULL)
+                break;
+            wgl_ctx[i].hdc = hdc;
+            ctx_count++;
+            return ((HGLRC)wgl_ctx[i].ctx);
+        }
+    }
+    SetLastError(0);
+    return(NULL);
+}
+
+WGLAPI BOOL GLAPIENTRY DrvDeleteContext(HGLRC hglrc)
+{
+    int i;
+    for ( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ )
+    {
+    	if ( wgl_ctx[i].ctx == (PWMC) hglrc )
+    	{
+            WMesaMakeCurrent((PWMC) hglrc);
+            WMesaDestroyContext();
+            wgl_ctx[i].ctx = NULL;
+            wgl_ctx[i].hdc = NULL;
+            ctx_count--;
+            return(TRUE);
+    	}
+    }
+    SetLastError(0);
+    return(FALSE);
+}
+
+WGLAPI HGLRC GLAPIENTRY DrvCreateLayerContext(HDC hdc,int iLayerPlane)
+{
+    if (iLayerPlane == 0)
+      return DrvCreateContext(hdc);
+    SetLastError(0);
+    return(NULL);
+}
+
+WGLAPI PICDTABLE GLAPIENTRY DrvSetContext(HDC hdc,HGLRC hglrc,void *callback)
+{
+    int i;
+    (void) callback;
+
+    /* new code suggested by Andy Sy */
+    if (!hdc || !hglrc) {
+       WMesaMakeCurrent(NULL);
+       ctx_current = -1;
+       return NULL;
+    }
+
+    for ( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ )
+    {
+        if ( wgl_ctx[i].ctx == (PWMC) hglrc )
+        {
+            wgl_ctx[i].hdc = hdc;
+            WMesaMakeCurrent( (PWMC) hglrc );
+            ctx_current = i;
+            return &icdTable;
+        }
+    }
+    return NULL;
+}
+
+WGLAPI void GLAPIENTRY DrvReleaseContext(HGLRC hglrc)
+{
+    (void) hglrc;
+    WMesaMakeCurrent(NULL);
+    ctx_current = -1;
+}
+
+WGLAPI BOOL GLAPIENTRY DrvShareLists(HGLRC hglrc1,HGLRC hglrc2)
+{
+    (void) hglrc1; (void) hglrc2;
+    return(TRUE);
+}
+
+WGLAPI BOOL GLAPIENTRY DrvDescribeLayerPlane(HDC hdc,int iPixelFormat,
+                                    int iLayerPlane,UINT nBytes,
+                                    LPLAYERPLANEDESCRIPTOR plpd)
+{
+    (void) hdc; (void) iPixelFormat; (void) iLayerPlane; (void) nBytes; (void) plpd;
+    SetLastError(0);
+    return(FALSE);
+}
+
+WGLAPI int GLAPIENTRY DrvSetLayerPaletteEntries(HDC hdc,int iLayerPlane,
+                                       int iStart,int cEntries,
+                                       CONST COLORREF *pcr)
+{
+    (void) hdc; (void) iLayerPlane; (void) iStart; (void) cEntries; (void) pcr;
+    SetLastError(0);
+    return(0);
+}
+
+WGLAPI int GLAPIENTRY DrvGetLayerPaletteEntries(HDC hdc,int iLayerPlane,
+                                       int iStart,int cEntries,
+                                       COLORREF *pcr)
+{
+    (void) hdc; (void) iLayerPlane; (void) iStart; (void) cEntries; (void) pcr;
+    SetLastError(0);
+    return(0);
+}
+
+WGLAPI BOOL GLAPIENTRY DrvRealizeLayerPalette(HDC hdc,int iLayerPlane,BOOL bRealize)
+{
+    (void) hdc; (void) iLayerPlane; (void) bRealize;
+    SetLastError(0);
+    return(FALSE);
+}
+
+WGLAPI BOOL GLAPIENTRY DrvSwapLayerBuffers(HDC hdc,UINT fuPlanes)
+{
+    (void) fuPlanes;
+    if( !hdc )
+    {
+        WMesaSwapBuffers();
+        return(TRUE);
+    }
+    SetLastError(0);
+    return(FALSE);
+}
+
+WGLAPI int GLAPIENTRY DrvDescribePixelFormat(HDC hdc,int iPixelFormat,UINT nBytes,
+                                    LPPIXELFORMATDESCRIPTOR ppfd)
+{
+    int	qt_valid_pix;
+    (void) hdc;
+
+    qt_valid_pix = qt_pix;
+    if(ppfd == NULL)
+	return(qt_valid_pix);
+    if(iPixelFormat < 1 || iPixelFormat > qt_valid_pix || nBytes != sizeof(PIXELFORMATDESCRIPTOR))
+    {
+        SetLastError(0);
+        return(0);
+    }
+    *ppfd = pix[iPixelFormat - 1].pfd;
+    return(qt_valid_pix);
+}
+
+/*
+* GetProcAddress - return the address of an appropriate extension
+*/
+WGLAPI PROC GLAPIENTRY DrvGetProcAddress(LPCSTR lpszProc)
+{
+   PROC p = (PROC) (int) _glapi_get_proc_address((const char *) lpszProc);
+   if (p)
+      return p;
+
+   SetLastError(0);
+   return(NULL);
+}
+
+WGLAPI BOOL GLAPIENTRY DrvSetPixelFormat(HDC hdc,int iPixelFormat)
+{
+    int qt_valid_pix;
+    (void) hdc;
+
+    qt_valid_pix = qt_pix;
+    if(iPixelFormat < 1 || iPixelFormat > qt_valid_pix)
+    {
+        SetLastError(0);
+        return(FALSE);
+    }
+    curPFD = iPixelFormat;
+    return(TRUE);
+}
+
+WGLAPI BOOL GLAPIENTRY DrvSwapBuffers(HDC hdc)
+{
+    (void) hdc;
+    if (ctx_current < 0)
+        return FALSE;
+
+    if(wgl_ctx[ctx_current].ctx == NULL) {
+        SetLastError(0);
+        return(FALSE);
+    }
+    WMesaSwapBuffers();
+    return(TRUE);
+}
+
+WGLAPI BOOL GLAPIENTRY DrvValidateVersion(DWORD version)
+{
+    (void) version;
+    return TRUE;
+}
diff --git a/src/mesa/drivers/windows/icd/icdlist.h b/src/mesa/drivers/windows/icd/icdlist.h
new file mode 100644
index 0000000000..1318804cf4
--- /dev/null
+++ b/src/mesa/drivers/windows/icd/icdlist.h
@@ -0,0 +1,336 @@
+ICD_ENTRY(NewList)				/* 0 */
+ICD_ENTRY(EndList)				/* 1 */
+ICD_ENTRY(CallList)				/* 2 */
+ICD_ENTRY(CallLists)				/* 3 */
+ICD_ENTRY(DeleteLists)			/* 4 */
+ICD_ENTRY(GenLists)				/* 5 */
+ICD_ENTRY(ListBase)				/* 6 */
+ICD_ENTRY(Begin)					/* 7 */
+ICD_ENTRY(Bitmap)					/* 8 */
+ICD_ENTRY(Color3b)				/* 9 */
+ICD_ENTRY(Color3bv)				/* 10 */
+ICD_ENTRY(Color3d)				/* 11 */
+ICD_ENTRY(Color3dv)				/* 12 */
+ICD_ENTRY(Color3f)				/* 13 */
+ICD_ENTRY(Color3fv)				/* 14 */
+ICD_ENTRY(Color3i)				/* 15 */
+ICD_ENTRY(Color3iv)				/* 16 */
+ICD_ENTRY(Color3s)				/* 17 */
+ICD_ENTRY(Color3sv)				/* 18 */
+ICD_ENTRY(Color3ub)				/* 19 */
+ICD_ENTRY(Color3ubv)				/* 20 */
+ICD_ENTRY(Color3ui)				/* 21 */
+ICD_ENTRY(Color3uiv)				/* 22 */
+ICD_ENTRY(Color3us)				/* 23 */
+ICD_ENTRY(Color3usv)				/* 24 */
+ICD_ENTRY(Color4b)				/* 25 */
+ICD_ENTRY(Color4bv)				/* 26 */
+ICD_ENTRY(Color4d)				/* 27 */
+ICD_ENTRY(Color4dv)				/* 28 */
+ICD_ENTRY(Color4f)				/* 29 */
+ICD_ENTRY(Color4fv)				/* 30 */
+ICD_ENTRY(Color4i)				/* 31 */
+ICD_ENTRY(Color4iv)				/* 32 */
+ICD_ENTRY(Color4s)				/* 33 */
+ICD_ENTRY(Color4sv)				/* 34 */
+ICD_ENTRY(Color4ub)				/* 35 */
+ICD_ENTRY(Color4ubv)				/* 36 */
+ICD_ENTRY(Color4ui)				/* 37 */
+ICD_ENTRY(Color4uiv)				/* 38 */
+ICD_ENTRY(Color4us)				/* 39 */
+ICD_ENTRY(Color4usv)				/* 40 */
+ICD_ENTRY(EdgeFlag)				/* 41 */
+ICD_ENTRY(EdgeFlagv)				/* 42 */
+ICD_ENTRY(End)					/* 43 */
+ICD_ENTRY(Indexd)					/* 44 */
+ICD_ENTRY(Indexdv)				/* 45 */
+ICD_ENTRY(Indexf)					/* 46 */
+ICD_ENTRY(Indexfv)				/* 47 */
+ICD_ENTRY(Indexi)					/* 48 */
+ICD_ENTRY(Indexiv)				/* 49 */
+ICD_ENTRY(Indexs)					/* 50 */
+ICD_ENTRY(Indexsv)				/* 51 */
+ICD_ENTRY(Normal3b)				/* 52 */
+ICD_ENTRY(Normal3bv)				/* 53 */
+ICD_ENTRY(Normal3d)				/* 54 */
+ICD_ENTRY(Normal3dv)				/* 55 */
+ICD_ENTRY(Normal3f)				/* 56 */
+ICD_ENTRY(Normal3fv)				/* 57 */
+ICD_ENTRY(Normal3i)				/* 58 */
+ICD_ENTRY(Normal3iv)				/* 59 */
+ICD_ENTRY(Normal3s)				/* 60 */
+ICD_ENTRY(Normal3sv)				/* 61 */
+ICD_ENTRY(RasterPos2d)			/* 62 */
+ICD_ENTRY(RasterPos2dv)			/* 63 */
+ICD_ENTRY(RasterPos2f)			/* 64 */
+ICD_ENTRY(RasterPos2fv)			/* 65 */
+ICD_ENTRY(RasterPos2i)			/* 66 */
+ICD_ENTRY(RasterPos2iv)			/* 67 */
+ICD_ENTRY(RasterPos2s)			/* 68 */
+ICD_ENTRY(RasterPos2sv)			/* 69 */
+ICD_ENTRY(RasterPos3d)			/* 70 */
+ICD_ENTRY(RasterPos3dv)			/* 71 */
+ICD_ENTRY(RasterPos3f)			/* 72 */
+ICD_ENTRY(RasterPos3fv)			/* 73 */
+ICD_ENTRY(RasterPos3i)			/* 74 */
+ICD_ENTRY(RasterPos3iv)			/* 75 */
+ICD_ENTRY(RasterPos3s)			/* 76 */
+ICD_ENTRY(RasterPos3sv)			/* 77 */
+ICD_ENTRY(RasterPos4d)			/* 78 */
+ICD_ENTRY(RasterPos4dv)			/* 79 */
+ICD_ENTRY(RasterPos4f)			/* 80 */
+ICD_ENTRY(RasterPos4fv)			/* 81 */
+ICD_ENTRY(RasterPos4i)			/* 82 */
+ICD_ENTRY(RasterPos4iv)			/* 83 */
+ICD_ENTRY(RasterPos4s)			/* 84 */
+ICD_ENTRY(RasterPos4sv)			/* 85 */
+ICD_ENTRY(Rectd)					/* 86 */
+ICD_ENTRY(Rectdv)					/* 87 */
+ICD_ENTRY(Rectf)					/* 88 */
+ICD_ENTRY(Rectfv)					/* 89 */
+ICD_ENTRY(Recti)					/* 90 */
+ICD_ENTRY(Rectiv)					/* 91 */
+ICD_ENTRY(Rects)					/* 92 */
+ICD_ENTRY(Rectsv)					/* 93 */
+ICD_ENTRY(TexCoord1d)				/* 94 */
+ICD_ENTRY(TexCoord1dv)			/* 95 */
+ICD_ENTRY(TexCoord1f)				/* 96 */
+ICD_ENTRY(TexCoord1fv)			/* 97 */
+ICD_ENTRY(TexCoord1i)				/* 98 */
+ICD_ENTRY(TexCoord1iv)			/* 99 */
+ICD_ENTRY(TexCoord1s)				/* 100 */
+ICD_ENTRY(TexCoord1sv)			/* 101 */
+ICD_ENTRY(TexCoord2d)				/* 102 */
+ICD_ENTRY(TexCoord2dv)			/* 103 */
+ICD_ENTRY(TexCoord2f)				/* 104 */
+ICD_ENTRY(TexCoord2fv)			/* 105 */
+ICD_ENTRY(TexCoord2i)				/* 106 */
+ICD_ENTRY(TexCoord2iv)			/* 107 */
+ICD_ENTRY(TexCoord2s)				/* 108 */
+ICD_ENTRY(TexCoord2sv)			/* 109 */
+ICD_ENTRY(TexCoord3d)				/* 110 */
+ICD_ENTRY(TexCoord3dv)			/* 111 */
+ICD_ENTRY(TexCoord3f)				/* 112 */
+ICD_ENTRY(TexCoord3fv)			/* 113 */
+ICD_ENTRY(TexCoord3i)				/* 114 */
+ICD_ENTRY(TexCoord3iv)			/* 115 */
+ICD_ENTRY(TexCoord3s)				/* 116 */
+ICD_ENTRY(TexCoord3sv)			/* 117 */
+ICD_ENTRY(TexCoord4d)				/* 118 */
+ICD_ENTRY(TexCoord4dv)			/* 119 */
+ICD_ENTRY(TexCoord4f)				/* 120 */
+ICD_ENTRY(TexCoord4fv)			/* 121 */
+ICD_ENTRY(TexCoord4i)				/* 122 */
+ICD_ENTRY(TexCoord4iv)			/* 123 */
+ICD_ENTRY(TexCoord4s)				/* 124 */
+ICD_ENTRY(TexCoord4sv)			/* 125 */
+ICD_ENTRY(Vertex2d)				/* 126 */
+ICD_ENTRY(Vertex2dv)				/* 127 */
+ICD_ENTRY(Vertex2f)				/* 128 */
+ICD_ENTRY(Vertex2fv)				/* 129 */
+ICD_ENTRY(Vertex2i)				/* 130 */
+ICD_ENTRY(Vertex2iv)				/* 131 */
+ICD_ENTRY(Vertex2s)				/* 132 */
+ICD_ENTRY(Vertex2sv)				/* 133 */
+ICD_ENTRY(Vertex3d)				/* 134 */
+ICD_ENTRY(Vertex3dv)				/* 135 */
+ICD_ENTRY(Vertex3f)				/* 136 */
+ICD_ENTRY(Vertex3fv)				/* 137 */
+ICD_ENTRY(Vertex3i)				/* 138 */
+ICD_ENTRY(Vertex3iv)				/* 139 */
+ICD_ENTRY(Vertex3s)				/* 140 */
+ICD_ENTRY(Vertex3sv)				/* 141 */
+ICD_ENTRY(Vertex4d)				/* 142 */
+ICD_ENTRY(Vertex4dv)				/* 143 */
+ICD_ENTRY(Vertex4f)				/* 144 */
+ICD_ENTRY(Vertex4fv)				/* 145 */
+ICD_ENTRY(Vertex4i)				/* 146 */
+ICD_ENTRY(Vertex4iv)				/* 147 */
+ICD_ENTRY(Vertex4s)				/* 148 */
+ICD_ENTRY(Vertex4sv)				/* 149 */
+ICD_ENTRY(ClipPlane)				/* 150 */
+ICD_ENTRY(ColorMaterial)			/* 151 */
+ICD_ENTRY(CullFace)				/* 152 */
+ICD_ENTRY(Fogf)					/* 153 */
+ICD_ENTRY(Fogfv)					/* 154 */
+ICD_ENTRY(Fogi)					/* 155 */
+ICD_ENTRY(Fogiv)					/* 156 */
+ICD_ENTRY(FrontFace)				/* 157 */
+ICD_ENTRY(Hint)					/* 158 */
+ICD_ENTRY(Lightf)					/* 159 */
+ICD_ENTRY(Lightfv)				/* 160 */
+ICD_ENTRY(Lighti)					/* 161 */
+ICD_ENTRY(Lightiv)				/* 162 */
+ICD_ENTRY(LightModelf)			/* 163 */
+ICD_ENTRY(LightModelfv)			/* 164 */
+ICD_ENTRY(LightModeli)			/* 165 */
+ICD_ENTRY(LightModeliv)			/* 166 */
+ICD_ENTRY(LineStipple)			/* 167 */
+ICD_ENTRY(LineWidth)				/* 168 */
+ICD_ENTRY(Materialf)				/* 169 */
+ICD_ENTRY(Materialfv)				/* 170 */
+ICD_ENTRY(Materiali)				/* 171 */
+ICD_ENTRY(Materialiv)				/* 172 */
+ICD_ENTRY(PointSize)				/* 173 */
+ICD_ENTRY(PolygonMode)			/* 174 */
+ICD_ENTRY(PolygonStipple)			/* 175 */
+ICD_ENTRY(Scissor)				/* 176 */
+ICD_ENTRY(ShadeModel)				/* 177 */
+ICD_ENTRY(TexParameterf)			/* 178 */
+ICD_ENTRY(TexParameterfv)			/* 179 */
+ICD_ENTRY(TexParameteri)			/* 180 */
+ICD_ENTRY(TexParameteriv)			/* 181 */
+ICD_ENTRY(TexImage1D)				/* 182 */
+ICD_ENTRY(TexImage2D)				/* 183 */
+ICD_ENTRY(TexEnvf)				/* 184 */
+ICD_ENTRY(TexEnvfv)				/* 185 */
+ICD_ENTRY(TexEnvi)				/* 186 */
+ICD_ENTRY(TexEnviv)				/* 187 */
+ICD_ENTRY(TexGend)				/* 188 */
+ICD_ENTRY(TexGendv)				/* 189 */
+ICD_ENTRY(TexGenf)				/* 190 */
+ICD_ENTRY(TexGenfv)				/* 191 */
+ICD_ENTRY(TexGeni)				/* 192 */
+ICD_ENTRY(TexGeniv)				/* 193 */
+ICD_ENTRY(FeedbackBuffer)			/* 194 */
+ICD_ENTRY(SelectBuffer)			/* 195 */
+ICD_ENTRY(RenderMode)				/* 196 */
+ICD_ENTRY(InitNames)				/* 197 */
+ICD_ENTRY(LoadName)				/* 198 */
+ICD_ENTRY(PassThrough)			/* 199 */
+ICD_ENTRY(PopName)				/* 200 */
+ICD_ENTRY(PushName)				/* 201 */
+ICD_ENTRY(DrawBuffer)				/* 202 */
+ICD_ENTRY(Clear)					/* 203 */
+ICD_ENTRY(ClearAccum)				/* 204 */
+ICD_ENTRY(ClearIndex)				/* 205 */
+ICD_ENTRY(ClearColor)				/* 206 */
+ICD_ENTRY(ClearStencil)			/* 207 */
+ICD_ENTRY(ClearDepth)				/* 208 */
+ICD_ENTRY(StencilMask)			/* 209 */
+ICD_ENTRY(ColorMask)				/* 210 */
+ICD_ENTRY(DepthMask)				/* 211 */
+ICD_ENTRY(IndexMask)				/* 212 */
+ICD_ENTRY(Accum)					/* 213 */
+ICD_ENTRY(Disable)				/* 214 */
+ICD_ENTRY(Enable)					/* 215 */
+ICD_ENTRY(Finish)					/* 216 */
+ICD_ENTRY(Flush)					/* 217 */
+ICD_ENTRY(PopAttrib)				/* 218 */
+ICD_ENTRY(PushAttrib)				/* 219 */
+ICD_ENTRY(Map1d)					/* 220 */
+ICD_ENTRY(Map1f)					/* 221 */
+ICD_ENTRY(Map2d)					/* 222 */
+ICD_ENTRY(Map2f)					/* 223 */
+ICD_ENTRY(MapGrid1d)				/* 224 */
+ICD_ENTRY(MapGrid1f)				/* 225 */
+ICD_ENTRY(MapGrid2d)				/* 226 */
+ICD_ENTRY(MapGrid2f)				/* 227 */
+ICD_ENTRY(EvalCoord1d)			/* 228 */
+ICD_ENTRY(EvalCoord1dv)			/* 229 */
+ICD_ENTRY(EvalCoord1f)			/* 230 */
+ICD_ENTRY(EvalCoord1fv)			/* 231 */
+ICD_ENTRY(EvalCoord2d)			/* 232 */
+ICD_ENTRY(EvalCoord2dv)			/* 233 */
+ICD_ENTRY(EvalCoord2f)			/* 234 */
+ICD_ENTRY(EvalCoord2fv)			/* 235 */
+ICD_ENTRY(EvalMesh1)				/* 236 */
+ICD_ENTRY(EvalPoint1)				/* 237 */
+ICD_ENTRY(EvalMesh2)				/* 238 */
+ICD_ENTRY(EvalPoint2)				/* 239 */
+ICD_ENTRY(AlphaFunc)				/* 240 */
+ICD_ENTRY(BlendFunc)				/* 241 */
+ICD_ENTRY(LogicOp)				/* 242 */
+ICD_ENTRY(StencilFunc)			/* 243 */
+ICD_ENTRY(StencilOp)				/* 244 */
+ICD_ENTRY(DepthFunc)				/* 245 */
+ICD_ENTRY(PixelZoom)				/* 246 */
+ICD_ENTRY(PixelTransferf)			/* 247 */
+ICD_ENTRY(PixelTransferi)			/* 248 */
+ICD_ENTRY(PixelStoref)			/* 249 */
+ICD_ENTRY(PixelStorei)			/* 250 */
+ICD_ENTRY(PixelMapfv)				/* 251 */
+ICD_ENTRY(PixelMapuiv)			/* 252 */
+ICD_ENTRY(PixelMapusv)			/* 253 */
+ICD_ENTRY(ReadBuffer)				/* 254 */
+ICD_ENTRY(CopyPixels)				/* 255 */
+ICD_ENTRY(ReadPixels)				/* 256 */
+ICD_ENTRY(DrawPixels)				/* 257 */
+ICD_ENTRY(GetBooleanv)			/* 258 */
+ICD_ENTRY(GetClipPlane)			/* 259 */
+ICD_ENTRY(GetDoublev)				/* 260 */
+ICD_ENTRY(GetError)				/* 261 */
+ICD_ENTRY(GetFloatv)				/* 262 */
+ICD_ENTRY(GetIntegerv)			/* 263 */
+ICD_ENTRY(GetLightfv)				/* 264 */
+ICD_ENTRY(GetLightiv)				/* 265 */
+ICD_ENTRY(GetMapdv)				/* 266 */
+ICD_ENTRY(GetMapfv)				/* 267 */
+ICD_ENTRY(GetMapiv)				/* 268 */
+ICD_ENTRY(GetMaterialfv)			/* 269 */
+ICD_ENTRY(GetMaterialiv)			/* 270 */
+ICD_ENTRY(GetPixelMapfv)			/* 271 */
+ICD_ENTRY(GetPixelMapuiv)			/* 272 */
+ICD_ENTRY(GetPixelMapusv)			/* 273 */
+ICD_ENTRY(GetPolygonStipple)		/* 274 */
+ICD_ENTRY(GetString)				/* 275 */
+ICD_ENTRY(GetTexEnvfv)			/* 276 */
+ICD_ENTRY(GetTexEnviv)			/* 277 */
+ICD_ENTRY(GetTexGendv)			/* 278 */
+ICD_ENTRY(GetTexGenfv)			/* 279 */
+ICD_ENTRY(GetTexGeniv)			/* 280 */
+ICD_ENTRY(GetTexImage)			/* 281 */
+ICD_ENTRY(GetTexParameterfv)		/* 282 */
+ICD_ENTRY(GetTexParameteriv)		/* 283 */
+ICD_ENTRY(GetTexLevelParameterfv)	/* 284 */
+ICD_ENTRY(GetTexLevelParameteriv)	/* 285 */
+ICD_ENTRY(IsEnabled)				/* 286 */
+ICD_ENTRY(IsList)					/* 287 */
+ICD_ENTRY(DepthRange)				/* 288 */
+ICD_ENTRY(Frustum)				/* 289 */
+ICD_ENTRY(LoadIdentity)			/* 290 */
+ICD_ENTRY(LoadMatrixf)			/* 291 */
+ICD_ENTRY(LoadMatrixd)			/* 292 */
+ICD_ENTRY(MatrixMode)				/* 293 */
+ICD_ENTRY(MultMatrixf)			/* 294 */
+ICD_ENTRY(MultMatrixd)			/* 295 */
+ICD_ENTRY(Ortho)					/* 296 */
+ICD_ENTRY(PopMatrix)				/* 297 */
+ICD_ENTRY(PushMatrix)				/* 298 */
+ICD_ENTRY(Rotated)				/* 299 */
+ICD_ENTRY(Rotatef)				/* 300 */
+ICD_ENTRY(Scaled)					/* 301 */
+ICD_ENTRY(Scalef)					/* 302 */
+ICD_ENTRY(Translated)				/* 303 */
+ICD_ENTRY(Translatef)				/* 304 */
+ICD_ENTRY(Viewport)				/* 305 */
+ICD_ENTRY(ArrayElement)			/* 306 */
+ICD_ENTRY(BindTexture)			/* 307 */
+ICD_ENTRY(ColorPointer)			/* 308 */
+ICD_ENTRY(DisableClientState)		/* 309 */
+ICD_ENTRY(DrawArrays)				/* 310 */
+ICD_ENTRY(DrawElements)			/* 311 */
+ICD_ENTRY(EdgeFlagPointer)		/* 312 */
+ICD_ENTRY(EnableClientState)		/* 313 */
+ICD_ENTRY(IndexPointer)			/* 314 */
+ICD_ENTRY(Indexub)				/* 315 */
+ICD_ENTRY(Indexubv)				/* 316 */
+ICD_ENTRY(InterleavedArrays)		/* 317 */
+ICD_ENTRY(NormalPointer)			/* 318 */
+ICD_ENTRY(PolygonOffset)			/* 319 */
+ICD_ENTRY(TexCoordPointer)		/* 320 */
+ICD_ENTRY(VertexPointer)			/* 321 */
+ICD_ENTRY(AreTexturesResident)	/* 322 */
+ICD_ENTRY(CopyTexImage1D)			/* 323 */
+ICD_ENTRY(CopyTexImage2D)			/* 324 */
+ICD_ENTRY(CopyTexSubImage1D)		/* 325 */
+ICD_ENTRY(CopyTexSubImage2D)		/* 326 */
+ICD_ENTRY(DeleteTextures)			/* 327 */
+ICD_ENTRY(GenTextures)			/* 328 */
+ICD_ENTRY(GetPointerv)			/* 329 */
+ICD_ENTRY(IsTexture)				/* 330 */
+ICD_ENTRY(PrioritizeTextures)		/* 331 */
+ICD_ENTRY(TexSubImage1D)			/* 332 */
+ICD_ENTRY(TexSubImage2D)			/* 333 */
+ICD_ENTRY(PopClientAttrib)		/* 334 */
+ICD_ENTRY(PushClientAttrib)		/* 335 */
diff --git a/src/mesa/drivers/windows/icd/mesa.def b/src/mesa/drivers/windows/icd/mesa.def
new file mode 100644
index 0000000000..6f33d170ff
--- /dev/null
+++ b/src/mesa/drivers/windows/icd/mesa.def
@@ -0,0 +1,102 @@
+DESCRIPTION 'Mesa (OpenGL driver) for Win32'
+VERSION 6.1
+
+EXPORTS
+;
+; ICD API
+	DrvCopyContext
+	DrvCreateContext
+	DrvCreateLayerContext
+	DrvDeleteContext
+	DrvDescribeLayerPlane
+	DrvDescribePixelFormat
+	DrvGetLayerPaletteEntries
+	DrvGetProcAddress
+	DrvReleaseContext
+	DrvRealizeLayerPalette
+	DrvSetContext
+	DrvSetLayerPaletteEntries
+	DrvSetPixelFormat
+	DrvShareLists
+	DrvSwapBuffers
+	DrvSwapLayerBuffers
+	DrvValidateVersion
+
+;
+; Mesa internals - mostly for OSMESA
+	_vbo_CreateContext
+	_vbo_DestroyContext
+	_vbo_InvalidateState
+	_glapi_get_context
+	_glapi_get_proc_address
+	_mesa_buffer_data
+	_mesa_buffer_map
+	_mesa_buffer_subdata
+	_mesa_choose_tex_format
+	_mesa_compressed_texture_size
+	_mesa_create_framebuffer
+	_mesa_create_visual
+	_mesa_delete_buffer_object
+	_mesa_delete_texture_object
+	_mesa_destroy_framebuffer
+	_mesa_destroy_visual
+	_mesa_enable_1_3_extensions
+	_mesa_enable_1_4_extensions
+	_mesa_enable_1_5_extensions
+	_mesa_enable_sw_extensions
+	_mesa_error
+	_mesa_free_context_data
+	_mesa_get_current_context
+	_mesa_init_default_imports
+	_mesa_init_driver_functions
+	_mesa_initialize_context
+	_mesa_make_current
+	_mesa_new_buffer_object
+	_mesa_new_texture_object
+	_mesa_problem
+	_mesa_ResizeBuffersMESA
+	_mesa_store_compressed_teximage1d
+	_mesa_store_compressed_teximage2d
+	_mesa_store_compressed_teximage3d
+	_mesa_store_compressed_texsubimage1d
+	_mesa_store_compressed_texsubimage2d
+	_mesa_store_compressed_texsubimage3d
+	_mesa_store_teximage1d
+	_mesa_store_teximage2d
+	_mesa_store_teximage3d
+	_mesa_store_texsubimage1d
+	_mesa_store_texsubimage2d
+	_mesa_store_texsubimage3d
+	_mesa_test_proxy_teximage
+	_mesa_Viewport
+	_mesa_meta_CopyColorSubTable
+	_mesa_meta_CopyColorTable
+	_mesa_meta_CopyConvolutionFilter1D
+	_mesa_meta_CopyConvolutionFilter2D
+	_mesa_meta_CopyTexImage1D
+	_mesa_meta_CopyTexImage2D
+	_mesa_meta_CopyTexSubImage1D
+	_mesa_meta_CopyTexSubImage2D
+	_mesa_meta_CopyTexSubImage3D
+	_swrast_Accum
+	_swrast_Bitmap
+	_swrast_CopyPixels
+	_swrast_DrawBuffer
+	_swrast_DrawPixels
+	_swrast_GetDeviceDriverReference
+	_swrast_Clear
+	_swrast_choose_line
+	_swrast_choose_triangle
+	_swrast_CreateContext
+	_swrast_DestroyContext
+	_swrast_InvalidateState
+	_swrast_ReadPixels
+	_swsetup_Wakeup
+	_swsetup_CreateContext
+	_swsetup_DestroyContext
+	_swsetup_InvalidateState
+	_tnl_CreateContext
+	_tnl_DestroyContext
+	_tnl_InvalidateState
+	_tnl_MakeCurrent
+	_tnl_run_pipeline
diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile
new file mode 100644
index 0000000000..b5b0c1f11a
--- /dev/null
+++ b/src/mesa/drivers/x11/Makefile
@@ -0,0 +1,81 @@
+# src/mesa/drivers/x11/Makefile for libGL.so
+
+# This builds "stand-alone" Mesa, a version of libGL that does not need the
+# GLX extension.  All rendering is converted to Xlib calls.  No hardware
+# acceleration.
+
+
+TOP = ../../../..
+
+include $(TOP)/configs/current
+
+
+GL_MAJOR = 1
+GL_MINOR = 5
+GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY)
+
+
+HEADERS = \
+	glxapi.h \
+	glxheader.h \
+	xfonts.h \
+	xmesaP.h \
+	xm_glide.h \
+	xm_image.h
+
+SOURCES = \
+	fakeglx.c \
+	glxapi.c \
+	xfonts.c \
+	xm_api.c \
+	xm_buffer.c \
+	xm_dd.c \
+	xm_glide.c \
+	xm_image.c \
+	xm_line.c \
+	xm_span.c \
+	xm_tri.c
+
+OBJECTS = $(SOURCES:.c=.o)
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mapi \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main \
+	$(X11_INCLUDES)
+
+CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a
+
+
+
+.c.o:
+	$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
+
+
+default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
+
+
+$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(CORE_MESA)
+	@ $(MKLIB) -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
+		-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
+		-id $(INSTALL_LIB_DIR)/lib$(GL_LIB).$(GL_MAJOR).dylib \
+		$(GL_LIB_DEPS) $(OBJECTS) $(CORE_MESA)
+
+
+
+clean:
+	-rm -f *.o *~
+	-rm -f depend depend.bak
+
+
+
+depend: $(SOURCES) $(HEADERS)
+	@ echo "running $(MKDEP)"
+	@ touch depend
+	@$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(SOURCES) $(HEADERS) \
+		> /dev/null 2>/dev/null
+
+
+-include depend
diff --git a/src/mesa/drivers/x11/descrip.mms b/src/mesa/drivers/x11/descrip.mms
new file mode 100644
index 0000000000..648f683dfa
--- /dev/null
+++ b/src/mesa/drivers/x11/descrip.mms
@@ -0,0 +1,55 @@
+# Makefile for core library for VMS
+# contributed by Jouk Jansen  joukj@hrem.nano.tudelft.nl
+# Last revision : 3 October 2007
+
+.first
+	define gl [----.include.gl]
+	define math [--.math]
+	define tnl [--.tnl]
+	define vbo [--.vbo]
+	define swrast [--.swrast]
+	define swrast_setup [--.swrast_setup]
+	define array_cache [--.array_cache]
+	define drivers [-]
+	define glapi [--.glapi]
+	define main [--.main]
+	define shader [--.shader]
+
+.include [----]mms-config.
+
+##### MACROS #####
+
+VPATH = RCS
+
+INCDIR = [----.include],[--.main],[--.glapi],[--.shader]
+LIBDIR = [----.lib]
+CFLAGS =/include=($(INCDIR),[])/define=(PTHREADS=1)/name=(as_is,short)/float=ieee/ieee=denorm
+
+SOURCES = fakeglx.c glxapi.c xfonts.c xm_api.c xm_dd.c xm_line.c xm_span.c\
+	xm_tri.c xm_buffer.c
+
+OBJECTS =fakeglx.obj,glxapi.obj,xfonts.obj,xm_api.obj,xm_dd.obj,xm_line.obj,\
+	xm_span.obj,xm_tri.obj,xm_buffer.obj
+
+##### RULES #####
+
+VERSION=Mesa V3.4
+
+##### TARGETS #####
+# Make the library
+$(LIBDIR)$(GL_LIB) : $(OBJECTS)
+  @ library $(LIBDIR)$(GL_LIB) $(OBJECTS)
+
+clean :
+	purge
+	delete *.obj;*
+
+fakeglx.obj : fakeglx.c
+glxapi.obj : glxapi.c
+xfonts.obj : xfonts.c
+xm_api.obj : xm_api.c
+xm_buffer.obj : xm_buffer.c
+xm_dd.obj : xm_dd.c
+xm_line.obj : xm_line.c
+xm_span.obj : xm_span.c
+xm_tri.obj : xm_tri.c
diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c
new file mode 100644
index 0000000000..f1e62b6bd4
--- /dev/null
+++ b/src/mesa/drivers/x11/fakeglx.c
@@ -0,0 +1,3067 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.5
+ *
+ * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2009  VMware, Inc.   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * This is an emulation of the GLX API which allows Mesa/GLX-based programs
+ * to run on X servers which do not have the real GLX extension.
+ *
+ * Thanks to the contributors:
+ *
+ * Initial version:  Philip Brown (phil@bolthole.com)
+ * Better glXGetConfig() support: Armin Liebchen (liebchen@asylum.cs.utah.edu)
+ * Further visual-handling refinements: Wolfram Gloger
+ *    (wmglo@Dent.MED.Uni-Muenchen.DE).
+ *
+ * Notes:
+ *   Don't be fooled, stereo isn't supported yet.
+ */
+
+
+
+#include "glxheader.h"
+#include "glxapi.h"
+#include "main/context.h"
+#include "main/config.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/version.h"
+#include "xfonts.h"
+#include "xmesaP.h"
+
+#ifdef __VMS
+#define sprintf sprintf
+#endif
+
+/* This indicates the client-side GLX API and GLX encoder version. */
+#define CLIENT_MAJOR_VERSION 1
+#define CLIENT_MINOR_VERSION 4  /* but don't have 1.3's pbuffers, etc yet */
+
+/* This indicates the server-side GLX decoder version.
+ * GLX 1.4 indicates OpenGL 1.3 support
+ */
+#define SERVER_MAJOR_VERSION 1
+#define SERVER_MINOR_VERSION 4
+
+/* This is appended onto the glXGetClient/ServerString version strings. */
+#define MESA_GLX_VERSION "Mesa " MESA_VERSION_STRING
+
+/* Who implemented this GLX? */
+#define VENDOR "Brian Paul"
+
+#define EXTENSIONS \
+   "GLX_MESA_set_3dfx_mode " \
+   "GLX_MESA_copy_sub_buffer " \
+   "GLX_MESA_pixmap_colormap " \
+   "GLX_MESA_release_buffers " \
+   "GLX_ARB_get_proc_address " \
+   "GLX_EXT_texture_from_pixmap " \
+   "GLX_EXT_visual_info " \
+   "GLX_EXT_visual_rating " \
+   /*"GLX_SGI_video_sync "*/ \
+   "GLX_SGIX_fbconfig " \
+   "GLX_SGIX_pbuffer "
+
+/*
+ * Our fake GLX context will contain a "real" GLX context and an XMesa context.
+ *
+ * Note that a pointer to a __GLXcontext is a pointer to a fake_glx_context,
+ * and vice versa.
+ *
+ * We really just need this structure in order to make the libGL functions
+ * glXGetCurrentContext(), glXGetCurrentDrawable() and glXGetCurrentDisplay()
+ * work correctly.
+ */
+struct fake_glx_context {
+   __GLXcontext glxContext;   /* this MUST be first! */
+   XMesaContext xmesaContext;
+};
+
+
+
+/**********************************************************************/
+/***                       GLX Visual Code                          ***/
+/**********************************************************************/
+
+#define DONT_CARE -1
+
+
+static XMesaVisual *VisualTable = NULL;
+static int NumVisuals = 0;
+
+
+/*
+ * This struct and some code fragments borrowed
+ * from Mark Kilgard's GLUT library.
+ */
+typedef struct _OverlayInfo {
+  /* Avoid 64-bit portability problems by being careful to use
+     longs due to the way XGetWindowProperty is specified. Note
+     that these parameters are passed as CARD32s over X
+     protocol. */
+  unsigned long overlay_visual;
+  long transparent_type;
+  long value;
+  long layer;
+} OverlayInfo;
+
+
+
+/* Macro to handle c_class vs class field name in XVisualInfo struct */
+#if defined(__cplusplus) || defined(c_plusplus)
+#define CLASS c_class
+#else
+#define CLASS class
+#endif
+
+
+
+/*
+ * Test if the given XVisualInfo is usable for Mesa rendering.
+ */
+static GLboolean
+is_usable_visual( XVisualInfo *vinfo )
+{
+   switch (vinfo->CLASS) {
+      case StaticGray:
+      case GrayScale:
+         /* Any StaticGray/GrayScale visual works in RGB or CI mode */
+         return GL_TRUE;
+      case StaticColor:
+      case PseudoColor:
+	 /* Color-index rendering is not supported. */
+	 return GL_FALSE;
+      case TrueColor:
+      case DirectColor:
+	 /* Any depth of TrueColor or DirectColor works in RGB mode */
+	 return GL_TRUE;
+      default:
+	 /* This should never happen */
+	 return GL_FALSE;
+   }
+}
+
+
+
+/**
+ * Get an array OverlayInfo records for specified screen.
+ * \param dpy  the display
+ * \param screen  screen number
+ * \param numOverlays  returns numver of OverlayInfo records
+ * \return  pointer to OverlayInfo array, free with XFree()
+ */
+static OverlayInfo *
+GetOverlayInfo(Display *dpy, int screen, int *numOverlays)
+{
+   Atom overlayVisualsAtom;
+   Atom actualType;
+   Status status;
+   unsigned char *ovInfo;
+   unsigned long sizeData, bytesLeft;
+   int actualFormat;
+
+   /*
+    * The SERVER_OVERLAY_VISUALS property on the root window contains
+    * a list of overlay visuals.  Get that list now.
+    */
+   overlayVisualsAtom = XInternAtom(dpy,"SERVER_OVERLAY_VISUALS", True);
+   if (overlayVisualsAtom == None) {
+      return 0;
+   }
+
+   status = XGetWindowProperty(dpy, RootWindow(dpy, screen),
+                               overlayVisualsAtom, 0L, (long) 10000, False,
+                               overlayVisualsAtom, &actualType, &actualFormat,
+                               &sizeData, &bytesLeft,
+                               &ovInfo);
+
+   if (status != Success || actualType != overlayVisualsAtom ||
+       actualFormat != 32 || sizeData < 4) {
+      /* something went wrong */
+      XFree((void *) ovInfo);
+      *numOverlays = 0;
+      return NULL;
+   }
+
+   *numOverlays = sizeData / 4;
+   return (OverlayInfo *) ovInfo;
+}
+
+
+
+/**
+ * Return the level (overlay, normal, underlay) of a given XVisualInfo.
+ * Input:  dpy - the X display
+ *         vinfo - the XVisualInfo to test
+ * Return:  level of the visual:
+ *             0 = normal planes
+ *            >0 = overlay planes
+ *            <0 = underlay planes
+ */
+static int
+level_of_visual( Display *dpy, XVisualInfo *vinfo )
+{
+   OverlayInfo *overlay_info;
+   int numOverlaysPerScreen, i;
+
+   overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen);
+   if (!overlay_info) {
+      return 0;
+   }
+
+   /* search the overlay visual list for the visual ID of interest */
+   for (i = 0; i < numOverlaysPerScreen; i++) {
+      const OverlayInfo *ov = overlay_info + i;
+      if (ov->overlay_visual == vinfo->visualid) {
+         /* found the visual */
+         if (/*ov->transparent_type==1 &&*/ ov->layer!=0) {
+            int level = ov->layer;
+            XFree((void *) overlay_info);
+            return level;
+         }
+         else {
+            XFree((void *) overlay_info);
+            return 0;
+         }
+      }
+   }
+
+   /* The visual ID was not found in the overlay list. */
+   XFree((void *) overlay_info);
+   return 0;
+}
+
+
+
+
+/*
+ * Given an XVisualInfo and RGB, Double, and Depth buffer flags, save the
+ * configuration in our list of GLX visuals.
+ */
+static XMesaVisual
+save_glx_visual( Display *dpy, XVisualInfo *vinfo,
+                 GLboolean alphaFlag, GLboolean dbFlag,
+                 GLboolean stereoFlag,
+                 GLint depth_size, GLint stencil_size,
+                 GLint accumRedSize, GLint accumGreenSize,
+                 GLint accumBlueSize, GLint accumAlphaSize,
+                 GLint level, GLint numAuxBuffers )
+{
+   GLboolean ximageFlag = GL_TRUE;
+   XMesaVisual xmvis;
+   GLint i;
+   GLboolean comparePointers;
+
+   if (dbFlag) {
+      /* Check if the MESA_BACK_BUFFER env var is set */
+      char *backbuffer = _mesa_getenv("MESA_BACK_BUFFER");
+      if (backbuffer) {
+         if (backbuffer[0]=='p' || backbuffer[0]=='P') {
+            ximageFlag = GL_FALSE;
+         }
+         else if (backbuffer[0]=='x' || backbuffer[0]=='X') {
+            ximageFlag = GL_TRUE;
+         }
+         else {
+            _mesa_warning(NULL, "Mesa: invalid value for MESA_BACK_BUFFER environment variable, using an XImage.");
+         }
+      }
+   }
+
+   if (stereoFlag) {
+      /* stereo not supported */
+      return NULL;
+   }
+
+   /* Comparing IDs uses less memory but sometimes fails. */
+   /* XXX revisit this after 3.0 is finished. */
+   if (_mesa_getenv("MESA_GLX_VISUAL_HACK"))
+      comparePointers = GL_TRUE;
+   else
+      comparePointers = GL_FALSE;
+
+   /* Force the visual to have an alpha channel */
+   if (_mesa_getenv("MESA_GLX_FORCE_ALPHA"))
+      alphaFlag = GL_TRUE;
+
+   /* First check if a matching visual is already in the list */
+   for (i=0; i<NumVisuals; i++) {
+      XMesaVisual v = VisualTable[i];
+      if (v->display == dpy
+          && v->mesa_visual.level == level
+          && v->mesa_visual.numAuxBuffers == numAuxBuffers
+          && v->ximage_flag == ximageFlag
+          && v->mesa_visual.doubleBufferMode == dbFlag
+          && v->mesa_visual.stereoMode == stereoFlag
+          && (v->mesa_visual.alphaBits > 0) == alphaFlag
+          && (v->mesa_visual.depthBits >= depth_size || depth_size == 0)
+          && (v->mesa_visual.stencilBits >= stencil_size || stencil_size == 0)
+          && (v->mesa_visual.accumRedBits >= accumRedSize || accumRedSize == 0)
+          && (v->mesa_visual.accumGreenBits >= accumGreenSize || accumGreenSize == 0)
+          && (v->mesa_visual.accumBlueBits >= accumBlueSize || accumBlueSize == 0)
+          && (v->mesa_visual.accumAlphaBits >= accumAlphaSize || accumAlphaSize == 0)) {
+         /* now either compare XVisualInfo pointers or visual IDs */
+         if ((!comparePointers && v->visinfo->visualid == vinfo->visualid)
+             || (comparePointers && v->vishandle == vinfo)) {
+            return v;
+         }
+      }
+   }
+
+   /* Create a new visual and add it to the list. */
+
+   xmvis = XMesaCreateVisual( dpy, vinfo, GL_TRUE, alphaFlag, dbFlag,
+                              stereoFlag, ximageFlag,
+                              depth_size, stencil_size,
+                              accumRedSize, accumBlueSize,
+                              accumBlueSize, accumAlphaSize, 0, level,
+                              GLX_NONE_EXT );
+   if (xmvis) {
+      /* Save a copy of the pointer now so we can find this visual again
+       * if we need to search for it in find_glx_visual().
+       */
+      xmvis->vishandle = vinfo;
+      /* Allocate more space for additional visual */
+      VisualTable = (XMesaVisual *) _mesa_realloc( VisualTable, 
+                                   sizeof(XMesaVisual) * NumVisuals, 
+                                   sizeof(XMesaVisual) * (NumVisuals + 1));
+      /* add xmvis to the list */
+      VisualTable[NumVisuals] = xmvis;
+      NumVisuals++;
+      /* XXX minor hack, because XMesaCreateVisual doesn't support an
+       * aux buffers parameter.
+       */
+      xmvis->mesa_visual.numAuxBuffers = numAuxBuffers;
+   }
+   return xmvis;
+}
+
+
+/**
+ * Return the default number of bits for the Z buffer.
+ * If defined, use the MESA_GLX_DEPTH_BITS env var value.
+ * Otherwise, use the DEFAULT_SOFTWARE_DEPTH_BITS constant.
+ * XXX probably do the same thing for stencil, accum, etc.
+ */
+static GLint
+default_depth_bits(void)
+{
+   int zBits;
+   const char *zEnv = _mesa_getenv("MESA_GLX_DEPTH_BITS");
+   if (zEnv)
+      zBits = atoi(zEnv);
+   else
+      zBits = DEFAULT_SOFTWARE_DEPTH_BITS;
+   return zBits;
+}
+
+static GLint
+default_alpha_bits(void)
+{
+   int aBits;
+   const char *aEnv = _mesa_getenv("MESA_GLX_ALPHA_BITS");
+   if (aEnv)
+      aBits = atoi(aEnv);
+   else
+      aBits = 0;
+   return aBits;
+}
+
+static GLint
+default_accum_bits(void)
+{
+   return 16;
+}
+
+
+
+/*
+ * Create a GLX visual from a regular XVisualInfo.
+ * This is called when Fake GLX is given an XVisualInfo which wasn't
+ * returned by glXChooseVisual.  Since this is the first time we're
+ * considering this visual we'll take a guess at reasonable values
+ * for depth buffer size, stencil size, accum size, etc.
+ * This is the best we can do with a client-side emulation of GLX.
+ */
+static XMesaVisual
+create_glx_visual( Display *dpy, XVisualInfo *visinfo )
+{
+   int vislevel;
+   GLint zBits = default_depth_bits();
+   GLint accBits = default_accum_bits();
+   GLboolean alphaFlag = default_alpha_bits() > 0;
+
+   vislevel = level_of_visual( dpy, visinfo );
+   if (vislevel) {
+      /* Color-index rendering to overlays is not supported. */
+      return NULL;
+   }
+   else if (is_usable_visual( visinfo )) {
+      /* Configure this visual as RGB, double-buffered, depth-buffered. */
+      /* This is surely wrong for some people's needs but what else */
+      /* can be done?  They should use glXChooseVisual(). */
+      return save_glx_visual( dpy, visinfo,
+			      alphaFlag, /* alpha */
+			      GL_TRUE,   /* double */
+			      GL_FALSE,  /* stereo */
+			      zBits,
+			      STENCIL_BITS,
+			      accBits, /* r */
+			      accBits, /* g */
+			      accBits, /* b */
+			      accBits, /* a */
+			      0,         /* level */
+			      0          /* numAux */
+			      );
+   }
+   else {
+      _mesa_warning(NULL, "Mesa: error in glXCreateContext: bad visual\n");
+      return NULL;
+   }
+}
+
+
+
+/*
+ * Find the GLX visual associated with an XVisualInfo.
+ */
+static XMesaVisual
+find_glx_visual( Display *dpy, XVisualInfo *vinfo )
+{
+   int i;
+
+   /* try to match visual id */
+   for (i=0;i<NumVisuals;i++) {
+      if (VisualTable[i]->display==dpy
+          && VisualTable[i]->visinfo->visualid == vinfo->visualid) {
+         return VisualTable[i];
+      }
+   }
+
+   /* if that fails, try to match pointers */
+   for (i=0;i<NumVisuals;i++) {
+      if (VisualTable[i]->display==dpy && VisualTable[i]->vishandle==vinfo) {
+         return VisualTable[i];
+      }
+   }
+
+   return NULL;
+}
+
+
+
+/**
+ * Return the transparent pixel value for a GLX visual.
+ * Input:  glxvis - the glx_visual
+ * Return:  a pixel value or -1 if no transparent pixel
+ */
+static int
+transparent_pixel( XMesaVisual glxvis )
+{
+   Display *dpy = glxvis->display;
+   XVisualInfo *vinfo = glxvis->visinfo;
+   OverlayInfo *overlay_info;
+   int numOverlaysPerScreen, i;
+
+   overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen);
+   if (!overlay_info) {
+      return -1;
+   }
+
+   for (i = 0; i < numOverlaysPerScreen; i++) {
+      const OverlayInfo *ov = overlay_info + i;
+      if (ov->overlay_visual == vinfo->visualid) {
+         /* found it! */
+         if (ov->transparent_type == 0) {
+            /* type 0 indicates no transparency */
+            XFree((void *) overlay_info);
+            return -1;
+         }
+         else {
+            /* ov->value is the transparent pixel */
+            XFree((void *) overlay_info);
+            return ov->value;
+         }
+      }
+   }
+
+   /* The visual ID was not found in the overlay list. */
+   XFree((void *) overlay_info);
+   return -1;
+}
+
+
+
+/**
+ * Try to get an X visual which matches the given arguments.
+ */
+static XVisualInfo *
+get_visual( Display *dpy, int scr, unsigned int depth, int xclass )
+{
+   XVisualInfo temp, *vis;
+   long mask;
+   int n;
+   unsigned int default_depth;
+   int default_class;
+
+   mask = VisualScreenMask | VisualDepthMask | VisualClassMask;
+   temp.screen = scr;
+   temp.depth = depth;
+   temp.CLASS = xclass;
+
+   default_depth = DefaultDepth(dpy,scr);
+   default_class = DefaultVisual(dpy,scr)->CLASS;
+
+   if (depth==default_depth && xclass==default_class) {
+      /* try to get root window's visual */
+      temp.visualid = DefaultVisual(dpy,scr)->visualid;
+      mask |= VisualIDMask;
+   }
+
+   vis = XGetVisualInfo( dpy, mask, &temp, &n );
+
+   /* In case bits/pixel > 24, make sure color channels are still <=8 bits.
+    * An SGI Infinite Reality system, for example, can have 30bpp pixels:
+    * 10 bits per color channel.  Mesa's limited to a max of 8 bits/channel.
+    */
+   if (vis && depth > 24 && (xclass==TrueColor || xclass==DirectColor)) {
+      if (_mesa_bitcount((GLuint) vis->red_mask  ) <= 8 &&
+          _mesa_bitcount((GLuint) vis->green_mask) <= 8 &&
+          _mesa_bitcount((GLuint) vis->blue_mask ) <= 8) {
+         return vis;
+      }
+      else {
+         XFree((void *) vis);
+         return NULL;
+      }
+   }
+
+   return vis;
+}
+
+
+
+/*
+ * Retrieve the value of the given environment variable and find
+ * the X visual which matches it.
+ * Input:  dpy - the display
+ *         screen - the screen number
+ *         varname - the name of the environment variable
+ * Return:  an XVisualInfo pointer to NULL if error.
+ */
+static XVisualInfo *
+get_env_visual(Display *dpy, int scr, const char *varname)
+{
+   char value[100], type[100];
+   int depth, xclass = -1;
+   XVisualInfo *vis;
+
+   if (!_mesa_getenv( varname )) {
+      return NULL;
+   }
+
+   strncpy( value, _mesa_getenv(varname), 100 );
+   value[99] = 0;
+
+   sscanf( value, "%s %d", type, &depth );
+
+   if (strcmp(type,"TrueColor")==0)          xclass = TrueColor;
+   else if (strcmp(type,"DirectColor")==0)   xclass = DirectColor;
+   else if (strcmp(type,"GrayScale")==0)     xclass = GrayScale;
+   else if (strcmp(type,"StaticGray")==0)    xclass = StaticGray;
+
+   if (xclass>-1 && depth>0) {
+      vis = get_visual( dpy, scr, depth, xclass );
+      if (vis) {
+	 return vis;
+      }
+   }
+
+   _mesa_warning(NULL, "GLX unable to find visual class=%s, depth=%d.",
+                 type, depth);
+
+   return NULL;
+}
+
+
+
+/*
+ * Select an X visual which satisfies the RGBA/CI flag and minimum depth.
+ * Input:  dpy, screen - X display and screen number
+ *         min_depth - minimum visual depth
+ *         preferred_class - preferred GLX visual class or DONT_CARE
+ * Return:  pointer to an XVisualInfo or NULL.
+ */
+static XVisualInfo *
+choose_x_visual(Display *dpy, int screen, int min_depth, int preferred_class)
+{
+   XVisualInfo *vis;
+   int xclass, visclass = 0;
+   int depth;
+
+   /* First see if the MESA_RGB_VISUAL env var is defined */
+   vis = get_env_visual( dpy, screen, "MESA_RGB_VISUAL" );
+   if (vis) {
+      return vis;
+   }
+   /* Otherwise, search for a suitable visual */
+   if (preferred_class==DONT_CARE) {
+      for (xclass=0;xclass<4;xclass++) {
+	 switch (xclass) {
+	 case 0:  visclass = TrueColor;    break;
+	 case 1:  visclass = DirectColor;  break;
+	 case 2:  visclass = GrayScale;    break;
+	 case 3:  visclass = StaticGray;   break;
+	 }
+	 if (min_depth==0) {
+	    /* start with shallowest */
+	    for (depth=0;depth<=32;depth++) {
+	       vis = get_visual( dpy, screen, depth, visclass );
+	       if (vis) {
+		  return vis;
+	       }
+	    }
+	 }
+	 else {
+	    /* start with deepest */
+	    for (depth=32;depth>=min_depth;depth--) {
+	       vis = get_visual( dpy, screen, depth, visclass );
+	       if (vis) {
+		  return vis;
+	       }
+	    }
+	 }
+      }
+   }
+   else {
+      /* search for a specific visual class */
+      switch (preferred_class) {
+      case GLX_TRUE_COLOR_EXT:    visclass = TrueColor;    break;
+      case GLX_DIRECT_COLOR_EXT:  visclass = DirectColor;  break;
+      case GLX_GRAY_SCALE_EXT:    visclass = GrayScale;    break;
+      case GLX_STATIC_GRAY_EXT:   visclass = StaticGray;   break;
+      case GLX_PSEUDO_COLOR_EXT:
+      case GLX_STATIC_COLOR_EXT:
+      default:   return NULL;
+      }
+      if (min_depth==0) {
+	 /* start with shallowest */
+	 for (depth=0;depth<=32;depth++) {
+	    vis = get_visual( dpy, screen, depth, visclass );
+	    if (vis) {
+	       return vis;
+	    }
+	 }
+      }
+      else {
+	 /* start with deepest */
+	 for (depth=32;depth>=min_depth;depth--) {
+	    vis = get_visual( dpy, screen, depth, visclass );
+	    if (vis) {
+	       return vis;
+	    }
+	 }
+      }
+   }
+
+   /* didn't find a visual */
+   return NULL;
+}
+
+
+
+/*
+ * Find the deepest X over/underlay visual of at least min_depth.
+ * Input:  dpy, screen - X display and screen number
+ *         level - the over/underlay level
+ *         trans_type - transparent pixel type: GLX_NONE_EXT,
+ *                      GLX_TRANSPARENT_RGB_EXT, GLX_TRANSPARENT_INDEX_EXT,
+ *                      or DONT_CARE
+ *         trans_value - transparent pixel value or DONT_CARE
+ *         min_depth - minimum visual depth
+ *         preferred_class - preferred GLX visual class or DONT_CARE
+ * Return:  pointer to an XVisualInfo or NULL.
+ */
+static XVisualInfo *
+choose_x_overlay_visual( Display *dpy, int scr,
+                         int level, int trans_type, int trans_value,
+                         int min_depth, int preferred_class )
+{
+   OverlayInfo *overlay_info;
+   int numOverlaysPerScreen;
+   int i;
+   XVisualInfo *deepvis;
+   int deepest;
+
+   /*DEBUG int tt, tv; */
+
+   switch (preferred_class) {
+      case GLX_TRUE_COLOR_EXT:    preferred_class = TrueColor;    break;
+      case GLX_DIRECT_COLOR_EXT:  preferred_class = DirectColor;  break;
+      case GLX_PSEUDO_COLOR_EXT:  preferred_class = PseudoColor;  break;
+      case GLX_STATIC_COLOR_EXT:  preferred_class = StaticColor;  break;
+      case GLX_GRAY_SCALE_EXT:    preferred_class = GrayScale;    break;
+      case GLX_STATIC_GRAY_EXT:   preferred_class = StaticGray;   break;
+      default:                    preferred_class = DONT_CARE;
+   }
+
+   overlay_info = GetOverlayInfo(dpy, scr, &numOverlaysPerScreen);
+   if (!overlay_info) {
+      return NULL;
+   }
+
+   /* Search for the deepest overlay which satisifies all criteria. */
+   deepest = min_depth;
+   deepvis = NULL;
+
+   for (i = 0; i < numOverlaysPerScreen; i++) {
+      const OverlayInfo *ov = overlay_info + i;
+      XVisualInfo *vislist, vistemplate;
+      int count;
+
+      if (ov->layer!=level) {
+         /* failed overlay level criteria */
+         continue;
+      }
+      if (!(trans_type==DONT_CARE
+            || (trans_type==GLX_TRANSPARENT_INDEX_EXT
+                && ov->transparent_type>0)
+            || (trans_type==GLX_NONE_EXT && ov->transparent_type==0))) {
+         /* failed transparent pixel type criteria */
+         continue;
+      }
+      if (trans_value!=DONT_CARE && trans_value!=ov->value) {
+         /* failed transparent pixel value criteria */
+         continue;
+      }
+
+      /* get XVisualInfo and check the depth */
+      vistemplate.visualid = ov->overlay_visual;
+      vistemplate.screen = scr;
+      vislist = XGetVisualInfo( dpy, VisualIDMask | VisualScreenMask,
+                                &vistemplate, &count );
+
+      if (count!=1) {
+         /* something went wrong */
+         continue;
+      }
+      if (preferred_class!=DONT_CARE && preferred_class!=vislist->CLASS) {
+         /* wrong visual class */
+         continue;
+      }
+
+      /* Color-index rendering is not supported.  Make sure we have True/DirectColor */
+      if (vislist->CLASS != TrueColor && vislist->CLASS != DirectColor)
+         continue;
+
+      if (deepvis==NULL || vislist->depth > deepest) {
+         /* YES!  found a satisfactory visual */
+         if (deepvis) {
+            XFree( deepvis );
+         }
+         deepest = vislist->depth;
+         deepvis = vislist;
+         /* DEBUG  tt = ov->transparent_type;*/
+         /* DEBUG  tv = ov->value; */
+      }
+   }
+
+/*DEBUG
+   if (deepvis) {
+      printf("chose 0x%x:  layer=%d depth=%d trans_type=%d trans_value=%d\n",
+             deepvis->visualid, level, deepvis->depth, tt, tv );
+   }
+*/
+   return deepvis;
+}
+
+
+/**********************************************************************/
+/***             Display-related functions                          ***/
+/**********************************************************************/
+
+
+/**
+ * Free all XMesaVisuals which are associated with the given display.
+ */
+static void
+destroy_visuals_on_display(Display *dpy)
+{
+   int i;
+   for (i = 0; i < NumVisuals; i++) {
+      if (VisualTable[i]->display == dpy) {
+         /* remove this visual */
+         int j;
+         free(VisualTable[i]);
+         for (j = i; j < NumVisuals - 1; j++)
+            VisualTable[j] = VisualTable[j + 1];
+         NumVisuals--;
+      }
+   }
+}
+
+
+/**
+ * Called from XCloseDisplay() to let us free our display-related data.
+ */
+static int
+close_display_callback(Display *dpy, XExtCodes *codes)
+{
+   destroy_visuals_on_display(dpy);
+   xmesa_destroy_buffers_on_display(dpy);
+   return 0;
+}
+
+
+/**
+ * Look for the named extension on given display and return a pointer
+ * to the _XExtension data, or NULL if extension not found.
+ */
+static _XExtension *
+lookup_extension(Display *dpy, const char *extName)
+{
+   _XExtension *ext;
+   for (ext = dpy->ext_procs; ext; ext = ext->next) {
+      if (ext->name && strcmp(ext->name, extName) == 0) {
+         return ext;
+      }
+   }
+   return NULL;
+}
+
+
+/**
+ * Whenever we're given a new Display pointer, call this function to
+ * register our close_display_callback function.
+ */
+static void
+register_with_display(Display *dpy)
+{
+   const char *extName = "MesaGLX";
+   _XExtension *ext;
+
+   ext = lookup_extension(dpy, extName);
+   if (!ext) {
+      XExtCodes *c = XAddExtension(dpy);
+      ext = dpy->ext_procs;  /* new extension is at head of list */
+      assert(c->extension == ext->codes.extension);
+      ext->name = _mesa_strdup(extName);
+      ext->close_display = close_display_callback;
+   }
+}
+
+
+/**********************************************************************/
+/***                  Begin Fake GLX API Functions                  ***/
+/**********************************************************************/
+
+
+/**
+ * Helper used by glXChooseVisual and glXChooseFBConfig.
+ * The fbConfig parameter must be GL_FALSE for the former and GL_TRUE for
+ * the later.
+ * In either case, the attribute list is terminated with the value 'None'.
+ */
+static XMesaVisual
+choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig )
+{
+   const GLboolean rgbModeDefault = fbConfig;
+   const int *parselist;
+   XVisualInfo *vis;
+   int min_ci = 0;
+   int min_red=0, min_green=0, min_blue=0;
+   GLboolean rgb_flag = rgbModeDefault;
+   GLboolean alpha_flag = GL_FALSE;
+   GLboolean double_flag = GL_FALSE;
+   GLboolean stereo_flag = GL_FALSE;
+   GLint depth_size = 0;
+   GLint stencil_size = 0;
+   GLint accumRedSize = 0;
+   GLint accumGreenSize = 0;
+   GLint accumBlueSize = 0;
+   GLint accumAlphaSize = 0;
+   int level = 0;
+   int visual_type = DONT_CARE;
+   int trans_type = DONT_CARE;
+   int trans_value = DONT_CARE;
+   GLint caveat = DONT_CARE;
+   XMesaVisual xmvis = NULL;
+   int desiredVisualID = -1;
+   int numAux = 0;
+
+   parselist = list;
+
+   while (*parselist) {
+
+      switch (*parselist) {
+	 case GLX_USE_GL:
+            if (fbConfig) {
+               /* invalid token */
+               return NULL;
+            }
+            else {
+               /* skip */
+               parselist++;
+            }
+	    break;
+	 case GLX_BUFFER_SIZE:
+	    parselist++;
+	    min_ci = *parselist++;
+	    break;
+	 case GLX_LEVEL:
+	    parselist++;
+            level = *parselist++;
+	    break;
+	 case GLX_RGBA:
+            if (fbConfig) {
+               /* invalid token */
+               return NULL;
+            }
+            else {
+               rgb_flag = GL_TRUE;
+               parselist++;
+            }
+	    break;
+	 case GLX_DOUBLEBUFFER:
+            parselist++;
+            if (fbConfig) {
+               double_flag = *parselist++;
+            }
+            else {
+               double_flag = GL_TRUE;
+            }
+	    break;
+	 case GLX_STEREO:
+            parselist++;
+            if (fbConfig) {
+               stereo_flag = *parselist++;
+            }
+            else {
+               stereo_flag = GL_TRUE;
+            }
+            break;
+	 case GLX_AUX_BUFFERS:
+	    parselist++;
+            numAux = *parselist++;
+            if (numAux > MAX_AUX_BUFFERS)
+               return NULL;
+	    break;
+	 case GLX_RED_SIZE:
+	    parselist++;
+	    min_red = *parselist++;
+	    break;
+	 case GLX_GREEN_SIZE:
+	    parselist++;
+	    min_green = *parselist++;
+	    break;
+	 case GLX_BLUE_SIZE:
+	    parselist++;
+	    min_blue = *parselist++;
+	    break;
+	 case GLX_ALPHA_SIZE:
+	    parselist++;
+            {
+               GLint size = *parselist++;
+               alpha_flag = size ? GL_TRUE : GL_FALSE;
+            }
+	    break;
+	 case GLX_DEPTH_SIZE:
+	    parselist++;
+	    depth_size = *parselist++;
+	    break;
+	 case GLX_STENCIL_SIZE:
+	    parselist++;
+	    stencil_size = *parselist++;
+	    break;
+	 case GLX_ACCUM_RED_SIZE:
+	    parselist++;
+            {
+               GLint size = *parselist++;
+               accumRedSize = MAX2( accumRedSize, size );
+            }
+            break;
+	 case GLX_ACCUM_GREEN_SIZE:
+	    parselist++;
+            {
+               GLint size = *parselist++;
+               accumGreenSize = MAX2( accumGreenSize, size );
+            }
+            break;
+	 case GLX_ACCUM_BLUE_SIZE:
+	    parselist++;
+            {
+               GLint size = *parselist++;
+               accumBlueSize = MAX2( accumBlueSize, size );
+            }
+            break;
+	 case GLX_ACCUM_ALPHA_SIZE:
+	    parselist++;
+            {
+               GLint size = *parselist++;
+               accumAlphaSize = MAX2( accumAlphaSize, size );
+            }
+	    break;
+
+         /*
+          * GLX_EXT_visual_info extension
+          */
+         case GLX_X_VISUAL_TYPE_EXT:
+            parselist++;
+            visual_type = *parselist++;
+            break;
+         case GLX_TRANSPARENT_TYPE_EXT:
+            parselist++;
+            trans_type = *parselist++;
+            break;
+         case GLX_TRANSPARENT_INDEX_VALUE_EXT:
+            parselist++;
+            trans_value = *parselist++;
+            break;
+         case GLX_TRANSPARENT_RED_VALUE_EXT:
+         case GLX_TRANSPARENT_GREEN_VALUE_EXT:
+         case GLX_TRANSPARENT_BLUE_VALUE_EXT:
+         case GLX_TRANSPARENT_ALPHA_VALUE_EXT:
+	    /* ignore */
+	    parselist++;
+	    parselist++;
+	    break;
+
+         /*
+          * GLX_EXT_visual_info extension
+          */
+         case GLX_VISUAL_CAVEAT_EXT:
+            parselist++;
+            caveat = *parselist++; /* ignored for now */
+            break;
+
+         /*
+          * GLX_ARB_multisample
+          */
+         case GLX_SAMPLE_BUFFERS_ARB:
+         case GLX_SAMPLES_ARB:
+	    parselist++;
+	    if (*parselist++ != 0)
+	       /* ms not supported */
+	       return NULL;
+	    break;
+
+         /*
+          * FBConfig attribs.
+          */
+         case GLX_RENDER_TYPE:
+            if (!fbConfig)
+               return NULL;
+            parselist++;
+            if (*parselist & GLX_RGBA_BIT) {
+               rgb_flag = GL_TRUE;
+            }
+            else if (*parselist & GLX_COLOR_INDEX_BIT) {
+               rgb_flag = GL_FALSE;
+            }
+            else if (*parselist == 0) {
+               rgb_flag = GL_TRUE;
+            }
+            parselist++;
+            break;
+         case GLX_DRAWABLE_TYPE:
+            if (!fbConfig)
+               return NULL;
+            parselist++;
+            if (*parselist & ~(GLX_WINDOW_BIT | GLX_PIXMAP_BIT | GLX_PBUFFER_BIT)) {
+               return NULL; /* bad bit */
+            }
+            parselist++;
+            break;
+         case GLX_FBCONFIG_ID:
+            if (!fbConfig)
+               return NULL;
+            parselist++;
+            desiredVisualID = *parselist++;
+            break;
+         case GLX_X_RENDERABLE:
+            if (!fbConfig)
+               return NULL;
+            parselist += 2;
+            /* ignore */
+            break;
+
+#ifdef GLX_EXT_texture_from_pixmap
+         case GLX_BIND_TO_TEXTURE_RGB_EXT:
+            parselist++; /*skip*/
+            break;
+         case GLX_BIND_TO_TEXTURE_RGBA_EXT:
+            parselist++; /*skip*/
+            break;
+         case GLX_BIND_TO_MIPMAP_TEXTURE_EXT:
+            parselist++; /*skip*/
+            break;
+         case GLX_BIND_TO_TEXTURE_TARGETS_EXT:
+            parselist++;
+            if (*parselist & ~(GLX_TEXTURE_1D_BIT_EXT |
+                               GLX_TEXTURE_2D_BIT_EXT |
+                               GLX_TEXTURE_RECTANGLE_BIT_EXT)) {
+               /* invalid bit */
+               return NULL;
+            }
+            break;
+         case GLX_Y_INVERTED_EXT:
+            parselist++; /*skip*/
+            break;
+#endif
+
+	 case None:
+            /* end of list */
+	    break;
+
+	 default:
+	    /* undefined attribute */
+            _mesa_warning(NULL, "unexpected attrib 0x%x in choose_visual()",
+                          *parselist);
+	    return NULL;
+      }
+   }
+
+   if (!rgb_flag)
+      return NULL;
+
+   (void) caveat;
+
+   /*
+    * Since we're only simulating the GLX extension this function will never
+    * find any real GL visuals.  Instead, all we can do is try to find an RGB
+    * or CI visual of appropriate depth.  Other requested attributes such as
+    * double buffering, depth buffer, etc. will be associated with the X
+    * visual and stored in the VisualTable[].
+    */
+   if (desiredVisualID != -1) {
+      /* try to get a specific visual, by visualID */
+      XVisualInfo temp;
+      int n;
+      temp.visualid = desiredVisualID;
+      temp.screen = screen;
+      vis = XGetVisualInfo(dpy, VisualIDMask | VisualScreenMask, &temp, &n);
+      if (vis) {
+         /* give the visual some useful GLX attributes */
+         double_flag = GL_TRUE;
+         if (vis->depth <= 8)
+	    return NULL;
+         depth_size = default_depth_bits();
+         stencil_size = STENCIL_BITS;
+         /* XXX accum??? */
+      }
+   }
+   else {
+      /* RGB visual */
+      int min_rgb = min_red + min_green + min_blue;
+      if (min_rgb>1 && min_rgb<8) {
+	 /* a special case to be sure we can get a monochrome visual */
+	 min_rgb = 1;
+      }
+
+      if (level==0) {
+	 vis = choose_x_visual(dpy, screen, min_rgb, visual_type);
+      }
+      else {
+	 vis = choose_x_overlay_visual(dpy, screen, level,
+				       trans_type, trans_value, min_rgb, visual_type);
+      }
+   }
+
+   if (vis) {
+      /* Note: we're not exactly obeying the glXChooseVisual rules here.
+       * When GLX_DEPTH_SIZE = 1 is specified we're supposed to choose the
+       * largest depth buffer size, which is 32bits/value.  Instead, we
+       * return 16 to maintain performance with earlier versions of Mesa.
+       */
+      if (depth_size > 24)
+         depth_size = 32;
+      else if (depth_size > 16)
+         depth_size = 24;
+      else if (depth_size > 0) {
+         depth_size = default_depth_bits();
+      }
+
+      if (!alpha_flag) {
+         alpha_flag = default_alpha_bits() > 0;
+      }
+
+      /* we only support one size of stencil and accum buffers. */
+      if (stencil_size > 0)
+         stencil_size = STENCIL_BITS;
+      if (accumRedSize > 0 || accumGreenSize > 0 || accumBlueSize > 0 ||
+          accumAlphaSize > 0) {
+         accumRedSize = 
+         accumGreenSize = 
+         accumBlueSize = default_accum_bits();
+         accumAlphaSize = alpha_flag ? accumRedSize : 0;
+      }
+
+      xmvis = save_glx_visual( dpy, vis, alpha_flag, double_flag,
+                               stereo_flag, depth_size, stencil_size,
+                               accumRedSize, accumGreenSize,
+                               accumBlueSize, accumAlphaSize, level, numAux );
+   }
+
+   return xmvis;
+}
+
+
+static XVisualInfo *
+Fake_glXChooseVisual( Display *dpy, int screen, int *list )
+{
+   XMesaVisual xmvis;
+
+   /* register ourselves as an extension on this display */
+   register_with_display(dpy);
+
+   xmvis = choose_visual(dpy, screen, list, GL_FALSE);
+   if (xmvis) {
+#if 0
+      return xmvis->vishandle;
+#else
+      /* create a new vishandle - the cached one may be stale */
+      xmvis->vishandle = (XVisualInfo *) malloc(sizeof(XVisualInfo));
+      if (xmvis->vishandle) {
+         memcpy(xmvis->vishandle, xmvis->visinfo, sizeof(XVisualInfo));
+      }
+      return xmvis->vishandle;
+#endif
+   }
+   else
+      return NULL;
+}
+
+
+/**
+ * Init basic fields of a new fake_glx_context.
+ */
+static void
+init_glx_context(struct fake_glx_context *glxCtx, Display *dpy)
+{
+   /* Always return True.  See if anyone's confused... */
+   GLboolean direct = GL_TRUE;
+
+   glxCtx->xmesaContext->direct = direct;
+   glxCtx->glxContext.isDirect = direct;
+   glxCtx->glxContext.currentDpy = dpy;
+   glxCtx->glxContext.xid = (XID) glxCtx;  /* self pointer */
+
+   assert((void *) glxCtx == (void *) &(glxCtx->glxContext));
+}
+
+
+
+static GLXContext
+Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo,
+                       GLXContext share_list, Bool direct )
+{
+   XMesaVisual xmvis;
+   struct fake_glx_context *glxCtx;
+   struct fake_glx_context *shareCtx = (struct fake_glx_context *) share_list;
+
+   if (!dpy || !visinfo)
+      return 0;
+
+   glxCtx = CALLOC_STRUCT(fake_glx_context);
+   if (!glxCtx)
+      return 0;
+
+   /* deallocate unused windows/buffers */
+#if 0
+   XMesaGarbageCollect();
+#endif
+
+   xmvis = find_glx_visual( dpy, visinfo );
+   if (!xmvis) {
+      /* This visual wasn't found with glXChooseVisual() */
+      xmvis = create_glx_visual( dpy, visinfo );
+      if (!xmvis) {
+         /* unusable visual */
+         free(glxCtx);
+         return NULL;
+      }
+   }
+
+   glxCtx->xmesaContext = XMesaCreateContext(xmvis,
+                                   shareCtx ? shareCtx->xmesaContext : NULL);
+   if (!glxCtx->xmesaContext) {
+      free(glxCtx);
+      return NULL;
+   }
+
+   init_glx_context(glxCtx, dpy);
+
+   return (GLXContext) glxCtx;
+}
+
+
+/* XXX these may have to be removed due to thread-safety issues. */
+static GLXContext MakeCurrent_PrevContext = 0;
+static GLXDrawable MakeCurrent_PrevDrawable = 0;
+static GLXDrawable MakeCurrent_PrevReadable = 0;
+static XMesaBuffer MakeCurrent_PrevDrawBuffer = 0;
+static XMesaBuffer MakeCurrent_PrevReadBuffer = 0;
+
+
+/* GLX 1.3 and later */
+static Bool
+Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw,
+                            GLXDrawable read, GLXContext ctx )
+{
+   struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx;
+
+   if (ctx && draw && read) {
+      XMesaBuffer drawBuffer, readBuffer;
+      XMesaContext xmctx = glxCtx->xmesaContext;
+
+      /* Find the XMesaBuffer which corresponds to the GLXDrawable 'draw' */
+      if (ctx == MakeCurrent_PrevContext
+          && draw == MakeCurrent_PrevDrawable) {
+         drawBuffer = MakeCurrent_PrevDrawBuffer;
+      }
+      else {
+         drawBuffer = XMesaFindBuffer( dpy, draw );
+      }
+      if (!drawBuffer) {
+         /* drawable must be a new window! */
+         drawBuffer = XMesaCreateWindowBuffer( xmctx->xm_visual, draw );
+         if (!drawBuffer) {
+            /* Out of memory, or context/drawable depth mismatch */
+            return False;
+         }
+#ifdef FX
+         FXcreateContext( xmctx->xm_visual, draw, xmctx, drawBuffer );
+#endif
+      }
+
+      /* Find the XMesaBuffer which corresponds to the GLXDrawable 'read' */
+      if (ctx == MakeCurrent_PrevContext
+          && read == MakeCurrent_PrevReadable) {
+         readBuffer = MakeCurrent_PrevReadBuffer;
+      }
+      else {
+         readBuffer = XMesaFindBuffer( dpy, read );
+      }
+      if (!readBuffer) {
+         /* drawable must be a new window! */
+         readBuffer = XMesaCreateWindowBuffer( xmctx->xm_visual, read );
+         if (!readBuffer) {
+            /* Out of memory, or context/drawable depth mismatch */
+            return False;
+         }
+#ifdef FX
+         FXcreateContext( xmctx->xm_visual, read, xmctx, readBuffer );
+#endif
+      }
+
+      MakeCurrent_PrevContext = ctx;
+      MakeCurrent_PrevDrawable = draw;
+      MakeCurrent_PrevReadable = read;
+      MakeCurrent_PrevDrawBuffer = drawBuffer;
+      MakeCurrent_PrevReadBuffer = readBuffer;
+
+      /* Now make current! */
+      if (XMesaMakeCurrent2(xmctx, drawBuffer, readBuffer)) {
+         ((__GLXcontext *) ctx)->currentDpy = dpy;
+         ((__GLXcontext *) ctx)->currentDrawable = draw;
+         ((__GLXcontext *) ctx)->currentReadable = read;
+         return True;
+      }
+      else {
+         return False;
+      }
+   }
+   else if (!ctx && !draw && !read) {
+      /* release current context w/out assigning new one. */
+      XMesaMakeCurrent( NULL, NULL );
+      MakeCurrent_PrevContext = 0;
+      MakeCurrent_PrevDrawable = 0;
+      MakeCurrent_PrevReadable = 0;
+      MakeCurrent_PrevDrawBuffer = 0;
+      MakeCurrent_PrevReadBuffer = 0;
+      return True;
+   }
+   else {
+      /* The args must either all be non-zero or all zero.
+       * This is an error.
+       */
+      return False;
+   }
+}
+
+
+static Bool
+Fake_glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx )
+{
+   return Fake_glXMakeContextCurrent( dpy, drawable, drawable, ctx );
+}
+
+
+static GLXPixmap
+Fake_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap )
+{
+   XMesaVisual v;
+   XMesaBuffer b;
+
+   v = find_glx_visual( dpy, visinfo );
+   if (!v) {
+      v = create_glx_visual( dpy, visinfo );
+      if (!v) {
+         /* unusable visual */
+         return 0;
+      }
+   }
+
+   b = XMesaCreatePixmapBuffer( v, pixmap, 0 );
+   if (!b) {
+      return 0;
+   }
+   return b->frontxrb->pixmap;
+}
+
+
+/*** GLX_MESA_pixmap_colormap ***/
+
+static GLXPixmap
+Fake_glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo,
+                             Pixmap pixmap, Colormap cmap )
+{
+   XMesaVisual v;
+   XMesaBuffer b;
+
+   v = find_glx_visual( dpy, visinfo );
+   if (!v) {
+      v = create_glx_visual( dpy, visinfo );
+      if (!v) {
+         /* unusable visual */
+         return 0;
+      }
+   }
+
+   b = XMesaCreatePixmapBuffer( v, pixmap, cmap );
+   if (!b) {
+      return 0;
+   }
+   return b->frontxrb->pixmap;
+}
+
+
+static void
+Fake_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap )
+{
+   XMesaBuffer b = XMesaFindBuffer(dpy, pixmap);
+   if (b) {
+      XMesaDestroyBuffer(b);
+   }
+   else if (_mesa_getenv("MESA_DEBUG")) {
+      _mesa_warning(NULL, "Mesa: glXDestroyGLXPixmap: invalid pixmap\n");
+   }
+}
+
+
+static void
+Fake_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst,
+                     unsigned long mask )
+{
+   struct fake_glx_context *fakeSrc = (struct fake_glx_context *) src;
+   struct fake_glx_context *fakeDst = (struct fake_glx_context *) dst;
+   XMesaContext xm_src = fakeSrc->xmesaContext;
+   XMesaContext xm_dst = fakeDst->xmesaContext;
+   (void) dpy;
+   if (MakeCurrent_PrevContext == src) {
+      _mesa_Flush();
+   }
+   _mesa_copy_context( &(xm_src->mesa), &(xm_dst->mesa), (GLuint) mask );
+}
+
+
+static Bool
+Fake_glXQueryExtension( Display *dpy, int *errorBase, int *eventBase )
+{
+   int op, ev, err;
+   /* Mesa's GLX isn't really an X extension but we try to act like one. */
+   if (!XQueryExtension(dpy, GLX_EXTENSION_NAME, &op, &ev, &err))
+      ev = err = 0;
+   if (errorBase)
+      *errorBase = err;
+   if (eventBase)
+      *eventBase = ev;
+   return True; /* we're faking GLX so always return success */
+}
+
+
+extern void _kw_ungrab_all( Display *dpy );
+void _kw_ungrab_all( Display *dpy )
+{
+   XUngrabPointer( dpy, CurrentTime );
+   XUngrabKeyboard( dpy, CurrentTime );
+}
+
+
+static void
+Fake_glXDestroyContext( Display *dpy, GLXContext ctx )
+{
+   struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx;
+   (void) dpy;
+   MakeCurrent_PrevContext = 0;
+   MakeCurrent_PrevDrawable = 0;
+   MakeCurrent_PrevReadable = 0;
+   MakeCurrent_PrevDrawBuffer = 0;
+   MakeCurrent_PrevReadBuffer = 0;
+   XMesaDestroyContext( glxCtx->xmesaContext );
+   XMesaGarbageCollect();
+   free(glxCtx);
+}
+
+
+static Bool
+Fake_glXIsDirect( Display *dpy, GLXContext ctx )
+{
+   struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx;
+   (void) dpy;
+   return glxCtx->xmesaContext->direct;
+}
+
+
+
+static void
+Fake_glXSwapBuffers( Display *dpy, GLXDrawable drawable )
+{
+   XMesaBuffer buffer = XMesaFindBuffer( dpy, drawable );
+
+   if (buffer) {
+      XMesaSwapBuffers(buffer);
+   }
+   else if (_mesa_getenv("MESA_DEBUG")) {
+      _mesa_warning(NULL, "glXSwapBuffers: invalid drawable 0x%x\n",
+                    (int) drawable);
+   }
+}
+
+
+
+/*** GLX_MESA_copy_sub_buffer ***/
+
+static void
+Fake_glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable,
+                           int x, int y, int width, int height )
+{
+   XMesaBuffer buffer = XMesaFindBuffer( dpy, drawable );
+   if (buffer) {
+      XMesaCopySubBuffer(buffer, x, y, width, height);
+   }
+   else if (_mesa_getenv("MESA_DEBUG")) {
+      _mesa_warning(NULL, "Mesa: glXCopySubBufferMESA: invalid drawable\n");
+   }
+}
+
+
+static Bool
+Fake_glXQueryVersion( Display *dpy, int *maj, int *min )
+{
+   (void) dpy;
+   /* Return GLX version, not Mesa version */
+   assert(CLIENT_MAJOR_VERSION == SERVER_MAJOR_VERSION);
+   *maj = CLIENT_MAJOR_VERSION;
+   *min = MIN2( CLIENT_MINOR_VERSION, SERVER_MINOR_VERSION );
+   return True;
+}
+
+
+/*
+ * Query the GLX attributes of the given XVisualInfo.
+ */
+static int
+get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig )
+{
+   ASSERT(xmvis);
+   switch(attrib) {
+      case GLX_USE_GL:
+         if (fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = (int) True;
+	 return 0;
+      case GLX_BUFFER_SIZE:
+	 *value = xmvis->visinfo->depth;
+	 return 0;
+      case GLX_LEVEL:
+	 *value = xmvis->mesa_visual.level;
+	 return 0;
+      case GLX_RGBA:
+         if (fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+	 if (xmvis->mesa_visual.rgbMode) {
+	    *value = True;
+	 }
+	 else {
+	    *value = False;
+	 }
+	 return 0;
+      case GLX_DOUBLEBUFFER:
+	 *value = (int) xmvis->mesa_visual.doubleBufferMode;
+	 return 0;
+      case GLX_STEREO:
+	 *value = (int) xmvis->mesa_visual.stereoMode;
+	 return 0;
+      case GLX_AUX_BUFFERS:
+	 *value = xmvis->mesa_visual.numAuxBuffers;
+	 return 0;
+      case GLX_RED_SIZE:
+         *value = xmvis->mesa_visual.redBits;
+	 return 0;
+      case GLX_GREEN_SIZE:
+         *value = xmvis->mesa_visual.greenBits;
+	 return 0;
+      case GLX_BLUE_SIZE:
+         *value = xmvis->mesa_visual.blueBits;
+	 return 0;
+      case GLX_ALPHA_SIZE:
+         *value = xmvis->mesa_visual.alphaBits;
+	 return 0;
+      case GLX_DEPTH_SIZE:
+         *value = xmvis->mesa_visual.depthBits;
+	 return 0;
+      case GLX_STENCIL_SIZE:
+	 *value = xmvis->mesa_visual.stencilBits;
+	 return 0;
+      case GLX_ACCUM_RED_SIZE:
+	 *value = xmvis->mesa_visual.accumRedBits;
+	 return 0;
+      case GLX_ACCUM_GREEN_SIZE:
+	 *value = xmvis->mesa_visual.accumGreenBits;
+	 return 0;
+      case GLX_ACCUM_BLUE_SIZE:
+	 *value = xmvis->mesa_visual.accumBlueBits;
+	 return 0;
+      case GLX_ACCUM_ALPHA_SIZE:
+         *value = xmvis->mesa_visual.accumAlphaBits;
+	 return 0;
+
+      /*
+       * GLX_EXT_visual_info extension
+       */
+      case GLX_X_VISUAL_TYPE_EXT:
+         switch (xmvis->visinfo->CLASS) {
+            case StaticGray:   *value = GLX_STATIC_GRAY_EXT;   return 0;
+            case GrayScale:    *value = GLX_GRAY_SCALE_EXT;    return 0;
+            case StaticColor:  *value = GLX_STATIC_GRAY_EXT;   return 0;
+            case PseudoColor:  *value = GLX_PSEUDO_COLOR_EXT;  return 0;
+            case TrueColor:    *value = GLX_TRUE_COLOR_EXT;    return 0;
+            case DirectColor:  *value = GLX_DIRECT_COLOR_EXT;  return 0;
+         }
+         return 0;
+      case GLX_TRANSPARENT_TYPE_EXT:
+         if (xmvis->mesa_visual.level==0) {
+            /* normal planes */
+            *value = GLX_NONE_EXT;
+         }
+         else if (xmvis->mesa_visual.level>0) {
+            /* overlay */
+            if (xmvis->mesa_visual.rgbMode) {
+               *value = GLX_TRANSPARENT_RGB_EXT;
+            }
+            else {
+               *value = GLX_TRANSPARENT_INDEX_EXT;
+            }
+         }
+         else if (xmvis->mesa_visual.level<0) {
+            /* underlay */
+            *value = GLX_NONE_EXT;
+         }
+         return 0;
+      case GLX_TRANSPARENT_INDEX_VALUE_EXT:
+         {
+            int pixel = transparent_pixel( xmvis );
+            if (pixel>=0) {
+               *value = pixel;
+            }
+            /* else undefined */
+         }
+         return 0;
+      case GLX_TRANSPARENT_RED_VALUE_EXT:
+         /* undefined */
+         return 0;
+      case GLX_TRANSPARENT_GREEN_VALUE_EXT:
+         /* undefined */
+         return 0;
+      case GLX_TRANSPARENT_BLUE_VALUE_EXT:
+         /* undefined */
+         return 0;
+      case GLX_TRANSPARENT_ALPHA_VALUE_EXT:
+         /* undefined */
+         return 0;
+
+      /*
+       * GLX_EXT_visual_info extension
+       */
+      case GLX_VISUAL_CAVEAT_EXT:
+         /* test for zero, just in case */
+         if (xmvis->mesa_visual.visualRating > 0)
+            *value = xmvis->mesa_visual.visualRating;
+         else
+            *value = GLX_NONE_EXT;
+         return 0;
+
+      /*
+       * GLX_ARB_multisample
+       */
+      case GLX_SAMPLE_BUFFERS_ARB:
+         *value = 0;
+         return 0;
+      case GLX_SAMPLES_ARB:
+         *value = 0;
+         return 0;
+
+      /*
+       * For FBConfigs:
+       */
+      case GLX_SCREEN_EXT:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = xmvis->visinfo->screen;
+         break;
+      case GLX_DRAWABLE_TYPE: /*SGIX too */
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = GLX_WINDOW_BIT | GLX_PIXMAP_BIT | GLX_PBUFFER_BIT;
+         break;
+      case GLX_RENDER_TYPE_SGIX:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         if (xmvis->mesa_visual.rgbMode)
+            *value = GLX_RGBA_BIT;
+         else
+            *value = GLX_COLOR_INDEX_BIT;
+         break;
+      case GLX_X_RENDERABLE_SGIX:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = True; /* XXX really? */
+         break;
+      case GLX_FBCONFIG_ID_SGIX:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = xmvis->visinfo->visualid;
+         break;
+      case GLX_MAX_PBUFFER_WIDTH:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         /* XXX or MAX_WIDTH? */
+         *value = DisplayWidth(xmvis->display, xmvis->visinfo->screen);
+         break;
+      case GLX_MAX_PBUFFER_HEIGHT:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = DisplayHeight(xmvis->display, xmvis->visinfo->screen);
+         break;
+      case GLX_MAX_PBUFFER_PIXELS:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = DisplayWidth(xmvis->display, xmvis->visinfo->screen) *
+                  DisplayHeight(xmvis->display, xmvis->visinfo->screen);
+         break;
+      case GLX_VISUAL_ID:
+         if (!fbconfig)
+            return GLX_BAD_ATTRIBUTE;
+         *value = xmvis->visinfo->visualid;
+         break;
+
+#ifdef GLX_EXT_texture_from_pixmap
+      case GLX_BIND_TO_TEXTURE_RGB_EXT:
+         *value = True; /*XXX*/
+         break;
+      case GLX_BIND_TO_TEXTURE_RGBA_EXT:
+         /* XXX review */
+         *value = xmvis->mesa_visual.alphaBits > 0 ? True : False;
+         break;
+      case GLX_BIND_TO_MIPMAP_TEXTURE_EXT:
+         *value = True; /*XXX*/
+         break;
+      case GLX_BIND_TO_TEXTURE_TARGETS_EXT:
+         *value = (GLX_TEXTURE_1D_BIT_EXT |
+                   GLX_TEXTURE_2D_BIT_EXT |
+                   GLX_TEXTURE_RECTANGLE_BIT_EXT); /*XXX*/
+         break;
+      case GLX_Y_INVERTED_EXT:
+         *value = True; /*XXX*/
+         break;
+#endif
+
+      default:
+	 return GLX_BAD_ATTRIBUTE;
+   }
+   return Success;
+}
+
+
+static int
+Fake_glXGetConfig( Display *dpy, XVisualInfo *visinfo,
+                   int attrib, int *value )
+{
+   XMesaVisual xmvis;
+   int k;
+   if (!dpy || !visinfo)
+      return GLX_BAD_ATTRIBUTE;
+
+   xmvis = find_glx_visual( dpy, visinfo );
+   if (!xmvis) {
+      /* this visual wasn't obtained with glXChooseVisual */
+      xmvis = create_glx_visual( dpy, visinfo );
+      if (!xmvis) {
+	 /* this visual can't be used for GL rendering */
+	 if (attrib==GLX_USE_GL) {
+	    *value = (int) False;
+	    return 0;
+	 }
+	 else {
+	    return GLX_BAD_VISUAL;
+	 }
+      }
+   }
+
+   k = get_config(xmvis, attrib, value, GL_FALSE);
+   return k;
+}
+
+
+static void
+Fake_glXWaitGL( void )
+{
+   XMesaContext xmesa = XMesaGetCurrentContext();
+   XMesaFlush( xmesa );
+}
+
+
+
+static void
+Fake_glXWaitX( void )
+{
+   XMesaContext xmesa = XMesaGetCurrentContext();
+   XMesaFlush( xmesa );
+}
+
+
+static const char *
+get_extensions( void )
+{
+#ifdef FX
+   const char *fx = _mesa_getenv("MESA_GLX_FX");
+   if (fx && fx[0] != 'd') {
+      return EXTENSIONS;
+   }
+#endif
+   return EXTENSIONS + 23; /* skip "GLX_MESA_set_3dfx_mode" */
+}
+
+
+
+/* GLX 1.1 and later */
+static const char *
+Fake_glXQueryExtensionsString( Display *dpy, int screen )
+{
+   (void) dpy;
+   (void) screen;
+   return get_extensions();
+}
+
+
+
+/* GLX 1.1 and later */
+static const char *
+Fake_glXQueryServerString( Display *dpy, int screen, int name )
+{
+   static char version[1000];
+   sprintf(version, "%d.%d %s",
+	   SERVER_MAJOR_VERSION, SERVER_MINOR_VERSION, MESA_GLX_VERSION);
+
+   (void) dpy;
+   (void) screen;
+
+   switch (name) {
+      case GLX_EXTENSIONS:
+         return get_extensions();
+      case GLX_VENDOR:
+	 return VENDOR;
+      case GLX_VERSION:
+	 return version;
+      default:
+         return NULL;
+   }
+}
+
+
+
+/* GLX 1.1 and later */
+static const char *
+Fake_glXGetClientString( Display *dpy, int name )
+{
+   static char version[1000];
+   sprintf(version, "%d.%d %s", CLIENT_MAJOR_VERSION,
+	   CLIENT_MINOR_VERSION, MESA_GLX_VERSION);
+
+   (void) dpy;
+
+   switch (name) {
+      case GLX_EXTENSIONS:
+         return get_extensions();
+      case GLX_VENDOR:
+	 return VENDOR;
+      case GLX_VERSION:
+	 return version;
+      default:
+         return NULL;
+   }
+}
+
+
+
+/*
+ * GLX 1.3 and later
+ */
+
+
+static int
+Fake_glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config,
+                           int attribute, int *value )
+{
+   XMesaVisual v = (XMesaVisual) config;
+   (void) dpy;
+   (void) config;
+
+   if (!dpy || !config || !value)
+      return -1;
+
+   return get_config(v, attribute, value, GL_TRUE);
+}
+
+
+static GLXFBConfig *
+Fake_glXGetFBConfigs( Display *dpy, int screen, int *nelements )
+{
+   XVisualInfo *visuals, visTemplate;
+   const long visMask = VisualScreenMask;
+   int i;
+
+   /* Get list of all X visuals */
+   visTemplate.screen = screen;
+   visuals = XGetVisualInfo(dpy, visMask, &visTemplate, nelements);
+   if (*nelements > 0) {
+      XMesaVisual *results;
+      results = (XMesaVisual *) malloc(*nelements * sizeof(XMesaVisual));
+      if (!results) {
+         *nelements = 0;
+         return NULL;
+      }
+      for (i = 0; i < *nelements; i++) {
+         results[i] = create_glx_visual(dpy, visuals + i);
+      }
+      return (GLXFBConfig *) results;
+   }
+   return NULL;
+}
+
+
+static GLXFBConfig *
+Fake_glXChooseFBConfig( Display *dpy, int screen,
+                        const int *attribList, int *nitems )
+{
+   XMesaVisual xmvis;
+
+   if (!attribList || !attribList[0]) {
+      /* return list of all configs (per GLX_SGIX_fbconfig spec) */
+      return Fake_glXGetFBConfigs(dpy, screen, nitems);
+   }
+
+   xmvis = choose_visual(dpy, screen, attribList, GL_TRUE);
+   if (xmvis) {
+      GLXFBConfig *config = (GLXFBConfig *) malloc(sizeof(XMesaVisual));
+      if (!config) {
+         *nitems = 0;
+         return NULL;
+      }
+      *nitems = 1;
+      config[0] = (GLXFBConfig) xmvis;
+      return (GLXFBConfig *) config;
+   }
+   else {
+      *nitems = 0;
+      return NULL;
+   }
+}
+
+
+static XVisualInfo *
+Fake_glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config )
+{
+   if (dpy && config) {
+      XMesaVisual xmvis = (XMesaVisual) config;
+#if 0      
+      return xmvis->vishandle;
+#else
+      /* create a new vishandle - the cached one may be stale */
+      xmvis->vishandle = (XVisualInfo *) malloc(sizeof(XVisualInfo));
+      if (xmvis->vishandle) {
+         memcpy(xmvis->vishandle, xmvis->visinfo, sizeof(XVisualInfo));
+      }
+      return xmvis->vishandle;
+#endif
+   }
+   else {
+      return NULL;
+   }
+}
+
+
+static GLXWindow
+Fake_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win,
+                      const int *attribList )
+{
+   XMesaVisual xmvis = (XMesaVisual) config;
+   XMesaBuffer xmbuf;
+   if (!xmvis)
+      return 0;
+
+   xmbuf = XMesaCreateWindowBuffer(xmvis, win);
+   if (!xmbuf)
+      return 0;
+
+#ifdef FX
+   /* XXX this will segfault if actually called */
+   FXcreateContext(xmvis, win, NULL, xmbuf);
+#endif
+
+   (void) dpy;
+   (void) attribList;  /* Ignored in GLX 1.3 */
+
+   return win;  /* A hack for now */
+}
+
+
+static void
+Fake_glXDestroyWindow( Display *dpy, GLXWindow window )
+{
+   XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable) window);
+   if (b)
+      XMesaDestroyBuffer(b);
+   /* don't destroy X window */
+}
+
+
+/* XXX untested */
+static GLXPixmap
+Fake_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap,
+                      const int *attribList )
+{
+   XMesaVisual v = (XMesaVisual) config;
+   XMesaBuffer b;
+   const int *attr;
+   int target = 0, format = 0, mipmap = 0;
+   int value;
+
+   if (!dpy || !config || !pixmap)
+      return 0;
+
+   for (attr = attribList; attr && *attr; attr++) {
+      switch (*attr) {
+      case GLX_TEXTURE_FORMAT_EXT:
+         attr++;
+         switch (*attr) {
+         case GLX_TEXTURE_FORMAT_NONE_EXT:
+         case GLX_TEXTURE_FORMAT_RGB_EXT:
+         case GLX_TEXTURE_FORMAT_RGBA_EXT:
+            format = *attr;
+            break;
+         default:
+            /* error */
+            return 0;
+         }
+         break;
+      case GLX_TEXTURE_TARGET_EXT:
+         attr++;
+         switch (*attr) {
+         case GLX_TEXTURE_1D_EXT:
+         case GLX_TEXTURE_2D_EXT:
+         case GLX_TEXTURE_RECTANGLE_EXT:
+            target = *attr;
+            break;
+         default:
+            /* error */
+            return 0;
+         }
+         break;
+      case GLX_MIPMAP_TEXTURE_EXT:
+         attr++;
+         if (*attr)
+            mipmap = 1;
+         break;
+      default:
+         /* error */
+         return 0;
+      }
+   }
+
+   if (format == GLX_TEXTURE_FORMAT_RGB_EXT) {
+      if (get_config(v, GLX_BIND_TO_TEXTURE_RGB_EXT,
+                     &value, GL_TRUE) != Success
+          || !value) {
+         return 0; /* error! */
+      }
+   }
+   else if (format == GLX_TEXTURE_FORMAT_RGBA_EXT) {
+      if (get_config(v, GLX_BIND_TO_TEXTURE_RGBA_EXT,
+                     &value, GL_TRUE) != Success
+          || !value) {
+         return 0; /* error! */
+      }
+   }
+   if (mipmap) {
+      if (get_config(v, GLX_BIND_TO_MIPMAP_TEXTURE_EXT,
+                     &value, GL_TRUE) != Success
+          || !value) {
+         return 0; /* error! */
+      }
+   }
+   if (target == GLX_TEXTURE_1D_EXT) {
+      if (get_config(v, GLX_BIND_TO_TEXTURE_TARGETS_EXT,
+                     &value, GL_TRUE) != Success
+          || (value & GLX_TEXTURE_1D_BIT_EXT) == 0) {
+         return 0; /* error! */
+      }
+   }
+   else if (target == GLX_TEXTURE_2D_EXT) {
+      if (get_config(v, GLX_BIND_TO_TEXTURE_TARGETS_EXT,
+                     &value, GL_TRUE) != Success
+          || (value & GLX_TEXTURE_2D_BIT_EXT) == 0) {
+         return 0; /* error! */
+      }
+   }
+   if (target == GLX_TEXTURE_RECTANGLE_EXT) {
+      if (get_config(v, GLX_BIND_TO_TEXTURE_TARGETS_EXT,
+                     &value, GL_TRUE) != Success
+          || (value & GLX_TEXTURE_RECTANGLE_BIT_EXT) == 0) {
+         return 0; /* error! */
+      }
+   }
+
+   if (format || target || mipmap) {
+      /* texture from pixmap */
+      b = XMesaCreatePixmapTextureBuffer(v, pixmap, 0, format, target, mipmap);
+   }
+   else {
+      b = XMesaCreatePixmapBuffer( v, pixmap, 0 );
+   }
+   if (!b) {
+      return 0;
+   }
+
+   return pixmap;
+}
+
+
+static void
+Fake_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap )
+{
+   XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable)pixmap);
+   if (b)
+      XMesaDestroyBuffer(b);
+   /* don't destroy X pixmap */
+}
+
+
+static GLXPbuffer
+Fake_glXCreatePbuffer( Display *dpy, GLXFBConfig config,
+                       const int *attribList )
+{
+   XMesaVisual xmvis = (XMesaVisual) config;
+   XMesaBuffer xmbuf;
+   const int *attrib;
+   int width = 0, height = 0;
+   GLboolean useLargest = GL_FALSE, preserveContents = GL_FALSE;
+
+   (void) dpy;
+
+   for (attrib = attribList; *attrib; attrib++) {
+      switch (*attrib) {
+         case GLX_PBUFFER_WIDTH:
+            attrib++;
+            width = *attrib;
+            break;
+         case GLX_PBUFFER_HEIGHT:
+            attrib++;
+            height = *attrib;
+            break;
+         case GLX_PRESERVED_CONTENTS:
+            attrib++;
+            preserveContents = *attrib;
+            break;
+         case GLX_LARGEST_PBUFFER:
+            attrib++;
+            useLargest = *attrib;
+            break;
+         default:
+            return 0;
+      }
+   }
+
+   if (width == 0 || height == 0)
+      return 0;
+
+   if (width > MAX_WIDTH || height > MAX_HEIGHT) {
+      /* If allocation would have failed and GLX_LARGEST_PBUFFER is set,
+       * allocate the largest possible buffer.
+       */
+      if (useLargest) {
+         width = MAX_WIDTH;
+         height = MAX_HEIGHT;
+      }
+   }
+
+   xmbuf = XMesaCreatePBuffer( xmvis, 0, width, height);
+   /* A GLXPbuffer handle must be an X Drawable because that's what
+    * glXMakeCurrent takes.
+    */
+   if (xmbuf) {
+      xmbuf->largestPbuffer = useLargest;
+      xmbuf->preservedContents = preserveContents;
+      return (GLXPbuffer) xmbuf->frontxrb->pixmap;
+   }
+   else {
+      return 0;
+   }
+}
+
+
+static void
+Fake_glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf )
+{
+   XMesaBuffer b = XMesaFindBuffer(dpy, pbuf);
+   if (b) {
+      XMesaDestroyBuffer(b);
+   }
+}
+
+
+static void
+Fake_glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute,
+                       unsigned int *value )
+{
+   XMesaBuffer xmbuf = XMesaFindBuffer(dpy, draw);
+   if (!xmbuf)
+      return;
+
+   /* make sure buffer's dimensions are up to date */
+   xmesa_check_and_update_buffer_size(NULL, xmbuf);
+
+   switch (attribute) {
+      case GLX_WIDTH:
+         *value = xmbuf->mesa_buffer.Width;
+         break;
+      case GLX_HEIGHT:
+         *value = xmbuf->mesa_buffer.Height;
+         break;
+      case GLX_PRESERVED_CONTENTS:
+         *value = xmbuf->preservedContents;
+         break;
+      case GLX_LARGEST_PBUFFER:
+         *value = xmbuf->largestPbuffer;
+         break;
+      case GLX_FBCONFIG_ID:
+         *value = xmbuf->xm_visual->visinfo->visualid;
+         return;
+#ifdef GLX_EXT_texture_from_pixmap
+      case GLX_TEXTURE_FORMAT_EXT:
+         *value = xmbuf->TextureFormat;
+         break;
+      case GLX_TEXTURE_TARGET_EXT:
+         *value = xmbuf->TextureTarget;
+         break;
+      case GLX_MIPMAP_TEXTURE_EXT:
+         *value = xmbuf->TextureMipmap;
+         break;
+#endif
+
+      default:
+         return; /* raise BadValue error */
+   }
+}
+
+
+static GLXContext
+Fake_glXCreateNewContext( Display *dpy, GLXFBConfig config,
+                          int renderType, GLXContext shareList, Bool direct )
+{
+   struct fake_glx_context *glxCtx;
+   struct fake_glx_context *shareCtx = (struct fake_glx_context *) shareList;
+   XMesaVisual xmvis = (XMesaVisual) config;
+
+   if (!dpy || !config ||
+       (renderType != GLX_RGBA_TYPE && renderType != GLX_COLOR_INDEX_TYPE))
+      return 0;
+
+   glxCtx = CALLOC_STRUCT(fake_glx_context);
+   if (!glxCtx)
+      return 0;
+
+   /* deallocate unused windows/buffers */
+   XMesaGarbageCollect();
+
+   glxCtx->xmesaContext = XMesaCreateContext(xmvis,
+                                   shareCtx ? shareCtx->xmesaContext : NULL);
+   if (!glxCtx->xmesaContext) {
+      free(glxCtx);
+      return NULL;
+   }
+
+   init_glx_context(glxCtx, dpy);
+
+   return (GLXContext) glxCtx;
+}
+
+
+static int
+Fake_glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value )
+{
+   struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx;
+   XMesaContext xmctx = glxCtx->xmesaContext;
+
+   (void) dpy;
+   (void) ctx;
+
+   switch (attribute) {
+   case GLX_FBCONFIG_ID:
+      *value = xmctx->xm_visual->visinfo->visualid;
+      break;
+   case GLX_RENDER_TYPE:
+      *value = GLX_RGBA_TYPE;
+      break;
+   case GLX_SCREEN:
+      *value = 0;
+      return Success;
+   default:
+      return GLX_BAD_ATTRIBUTE;
+   }
+   return 0;
+}
+
+
+static void
+Fake_glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask )
+{
+   XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable);
+   if (xmbuf)
+      xmbuf->selectedEvents = mask;
+}
+
+
+static void
+Fake_glXGetSelectedEvent( Display *dpy, GLXDrawable drawable,
+                          unsigned long *mask )
+{
+   XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable);
+   if (xmbuf)
+      *mask = xmbuf->selectedEvents;
+   else
+      *mask = 0;
+}
+
+
+
+/*** GLX_SGI_swap_control ***/
+
+static int
+Fake_glXSwapIntervalSGI(int interval)
+{
+   (void) interval;
+   return 0;
+}
+
+
+
+/*** GLX_SGI_video_sync ***/
+
+static unsigned int FrameCounter = 0;
+
+static int
+Fake_glXGetVideoSyncSGI(unsigned int *count)
+{
+   /* this is a bogus implementation */
+   *count = FrameCounter++;
+   return 0;
+}
+
+static int
+Fake_glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count)
+{
+   if (divisor <= 0 || remainder < 0)
+      return GLX_BAD_VALUE;
+   /* this is a bogus implementation */
+   FrameCounter++;
+   while (FrameCounter % divisor != remainder)
+      FrameCounter++;
+   *count = FrameCounter;
+   return 0;
+}
+
+
+
+/*** GLX_SGI_make_current_read ***/
+
+static Bool
+Fake_glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx)
+{
+   return Fake_glXMakeContextCurrent( dpy, draw, read, ctx );
+}
+
+/* not used
+static GLXDrawable
+Fake_glXGetCurrentReadDrawableSGI(void)
+{
+   return 0;
+}
+*/
+
+
+/*** GLX_SGIX_video_source ***/
+#if defined(_VL_H)
+
+static GLXVideoSourceSGIX
+Fake_glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode)
+{
+   (void) dpy;
+   (void) screen;
+   (void) server;
+   (void) path;
+   (void) nodeClass;
+   (void) drainNode;
+   return 0;
+}
+
+static void
+Fake_glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src)
+{
+   (void) dpy;
+   (void) src;
+}
+
+#endif
+
+
+/*** GLX_EXT_import_context ***/
+
+static void
+Fake_glXFreeContextEXT(Display *dpy, GLXContext context)
+{
+   (void) dpy;
+   (void) context;
+}
+
+static GLXContextID
+Fake_glXGetContextIDEXT(const GLXContext context)
+{
+   (void) context;
+   return 0;
+}
+
+static GLXContext
+Fake_glXImportContextEXT(Display *dpy, GLXContextID contextID)
+{
+   (void) dpy;
+   (void) contextID;
+   return 0;
+}
+
+static int
+Fake_glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value)
+{
+   (void) dpy;
+   (void) context;
+   (void) attribute;
+   (void) value;
+   return 0;
+}
+
+
+
+/*** GLX_SGIX_fbconfig ***/
+
+static int
+Fake_glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value)
+{
+   return Fake_glXGetFBConfigAttrib(dpy, config, attribute, value);
+}
+
+static GLXFBConfigSGIX *
+Fake_glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements)
+{
+   return (GLXFBConfig *) Fake_glXChooseFBConfig(dpy, screen, attrib_list, nelements);
+}
+
+
+static GLXPixmap
+Fake_glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap)
+{
+   XMesaVisual xmvis = (XMesaVisual) config;
+   XMesaBuffer xmbuf = XMesaCreatePixmapBuffer(xmvis, pixmap, 0);
+   return xmbuf->frontxrb->pixmap; /* need to return an X ID */
+}
+
+
+static GLXContext
+Fake_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct)
+{
+   XMesaVisual xmvis = (XMesaVisual) config;
+   struct fake_glx_context *glxCtx;
+   struct fake_glx_context *shareCtx = (struct fake_glx_context *) share_list;
+
+   glxCtx = CALLOC_STRUCT(fake_glx_context);
+   if (!glxCtx)
+      return 0;
+
+   /* deallocate unused windows/buffers */
+   XMesaGarbageCollect();
+
+   glxCtx->xmesaContext = XMesaCreateContext(xmvis,
+                                   shareCtx ? shareCtx->xmesaContext : NULL);
+   if (!glxCtx->xmesaContext) {
+      free(glxCtx);
+      return NULL;
+   }
+
+   init_glx_context(glxCtx, dpy);
+
+   return (GLXContext) glxCtx;
+}
+
+
+static XVisualInfo *
+Fake_glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config)
+{
+   return Fake_glXGetVisualFromFBConfig(dpy, config);
+}
+
+
+static GLXFBConfigSGIX
+Fake_glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis)
+{
+   XMesaVisual xmvis = find_glx_visual(dpy, vis);
+   if (!xmvis) {
+      /* This visual wasn't found with glXChooseVisual() */
+      xmvis = create_glx_visual(dpy, vis);
+   }
+
+   return (GLXFBConfigSGIX) xmvis;
+}
+
+
+
+/*** GLX_SGIX_pbuffer ***/
+
+static GLXPbufferSGIX
+Fake_glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config,
+                             unsigned int width, unsigned int height,
+                             int *attribList)
+{
+   XMesaVisual xmvis = (XMesaVisual) config;
+   XMesaBuffer xmbuf;
+   const int *attrib;
+   GLboolean useLargest = GL_FALSE, preserveContents = GL_FALSE;
+
+   (void) dpy;
+
+   for (attrib = attribList; attrib && *attrib; attrib++) {
+      switch (*attrib) {
+         case GLX_PRESERVED_CONTENTS_SGIX:
+            attrib++;
+            preserveContents = *attrib; /* ignored */
+            break;
+         case GLX_LARGEST_PBUFFER_SGIX:
+            attrib++;
+            useLargest = *attrib; /* ignored */
+            break;
+         default:
+            return 0;
+      }
+   }
+
+   /* not used at this time */
+   (void) useLargest;
+   (void) preserveContents;
+
+   xmbuf = XMesaCreatePBuffer( xmvis, 0, width, height);
+   /* A GLXPbuffer handle must be an X Drawable because that's what
+    * glXMakeCurrent takes.
+    */
+   return (GLXPbuffer) xmbuf->frontxrb->pixmap;
+}
+
+
+static void
+Fake_glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf)
+{
+   XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf);
+   if (xmbuf) {
+      XMesaDestroyBuffer(xmbuf);
+   }
+}
+
+
+static int
+Fake_glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value)
+{
+   const XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf);
+
+   if (!xmbuf) {
+      /* Generate GLXBadPbufferSGIX for bad pbuffer */
+      return 0;
+   }
+
+   switch (attribute) {
+      case GLX_PRESERVED_CONTENTS_SGIX:
+         *value = xmbuf->preservedContents;
+         break;
+      case GLX_LARGEST_PBUFFER_SGIX:
+         *value = xmbuf->largestPbuffer;
+         break;
+      case GLX_WIDTH_SGIX:
+         *value = xmbuf->mesa_buffer.Width;
+         break;
+      case GLX_HEIGHT_SGIX:
+         *value = xmbuf->mesa_buffer.Height;
+         break;
+      case GLX_EVENT_MASK_SGIX:
+         *value = 0;  /* XXX might be wrong */
+         break;
+      default:
+         *value = 0;
+   }
+   return 0;
+}
+
+
+static void
+Fake_glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask)
+{
+   XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable);
+   if (xmbuf) {
+      /* Note: we'll never generate clobber events */
+      xmbuf->selectedEvents = mask;
+   }
+}
+
+
+static void
+Fake_glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask)
+{
+   XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable);
+   if (xmbuf) {
+      *mask = xmbuf->selectedEvents;
+   }
+   else {
+      *mask = 0;
+   }
+}
+
+
+
+/*** GLX_SGI_cushion ***/
+
+static void
+Fake_glXCushionSGI(Display *dpy, Window win, float cushion)
+{
+   (void) dpy;
+   (void) win;
+   (void) cushion;
+}
+
+
+
+/*** GLX_SGIX_video_resize ***/
+
+static int
+Fake_glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window)
+{
+   (void) dpy;
+   (void) screen;
+   (void) channel;
+   (void) window;
+   return 0;
+}
+
+static int
+Fake_glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h)
+{
+   (void) dpy;
+   (void) screen;
+   (void) channel;
+   (void) x;
+   (void) y;
+   (void) w;
+   (void) h;
+   return 0;
+}
+
+static int
+Fake_glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h)
+{
+   (void) dpy;
+   (void) screen;
+   (void) channel;
+   (void) x;
+   (void) y;
+   (void) w;
+   (void) h;
+   return 0;
+}
+
+static int
+Fake_glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh)
+{
+   (void) dpy;
+   (void) screen;
+   (void) channel;
+   (void) dx;
+   (void) dy;
+   (void) dw;
+   (void) dh;
+   return 0;
+}
+
+static int
+Fake_glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype)
+{
+   (void) dpy;
+   (void) screen;
+   (void) channel;
+   (void) synctype;
+   return 0;
+}
+
+
+
+/*** GLX_SGIX_dmbuffer **/
+
+#if defined(_DM_BUFFER_H_)
+static Bool
+Fake_glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer)
+{
+   (void) dpy;
+   (void) pbuffer;
+   (void) params;
+   (void) dmbuffer;
+   return False;
+}
+#endif
+
+
+/*** GLX_SGIX_swap_group ***/
+
+static void
+Fake_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member)
+{
+   (void) dpy;
+   (void) drawable;
+   (void) member;
+}
+
+
+
+/*** GLX_SGIX_swap_barrier ***/
+
+static void
+Fake_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier)
+{
+   (void) dpy;
+   (void) drawable;
+   (void) barrier;
+}
+
+static Bool
+Fake_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max)
+{
+   (void) dpy;
+   (void) screen;
+   (void) max;
+   return False;
+}
+
+
+
+/*** GLX_SUN_get_transparent_index ***/
+
+static Status
+Fake_glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent)
+{
+   (void) dpy;
+   (void) overlay;
+   (void) underlay;
+   (void) pTransparent;
+   return 0;
+}
+
+
+
+/*** GLX_MESA_release_buffers ***/
+
+/*
+ * Release the depth, stencil, accum buffers attached to a GLXDrawable
+ * (a window or pixmap) prior to destroying the GLXDrawable.
+ */
+static Bool
+Fake_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d )
+{
+   XMesaBuffer b = XMesaFindBuffer(dpy, d);
+   if (b) {
+      XMesaDestroyBuffer(b);
+      return True;
+   }
+   return False;
+}
+
+
+
+/*** GLX_MESA_set_3dfx_mode ***/
+
+static Bool
+Fake_glXSet3DfxModeMESA( int mode )
+{
+   return XMesaSetFXmode( mode );
+}
+
+
+
+/*** GLX_NV_vertex_array range ***/
+static void *
+Fake_glXAllocateMemoryNV( GLsizei size,
+                          GLfloat readFrequency,
+                          GLfloat writeFrequency,
+                          GLfloat priority )
+{
+   (void) size;
+   (void) readFrequency;
+   (void) writeFrequency;
+   (void) priority;
+   return NULL;
+}
+
+
+static void 
+Fake_glXFreeMemoryNV( GLvoid *pointer )
+{
+   (void) pointer;
+}
+
+
+/*** GLX_MESA_agp_offset ***/
+
+static GLuint
+Fake_glXGetAGPOffsetMESA( const GLvoid *pointer )
+{
+   (void) pointer;
+   return ~0;
+}
+
+
+/*** GLX_EXT_texture_from_pixmap ***/
+
+static void
+Fake_glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer,
+                        const int *attrib_list)
+{
+   XMesaBuffer b = XMesaFindBuffer(dpy, drawable);
+   if (b)
+      XMesaBindTexImage(dpy, b, buffer, attrib_list);
+}
+
+static void
+Fake_glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer)
+{
+   XMesaBuffer b = XMesaFindBuffer(dpy, drawable);
+   if (b)
+      XMesaReleaseTexImage(dpy, b, buffer);
+}
+
+
+/* silence warning */
+extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void);
+
+
+/**
+ * Create a new GLX API dispatch table with its function pointers
+ * initialized to point to Mesa's "fake" GLX API functions.
+ * Note: there's a similar function (_real_GetGLXDispatchTable) that
+ * returns a new dispatch table with all pointers initalized to point
+ * to "real" GLX functions (which understand GLX wire protocol, etc).
+ */
+struct _glxapi_table *
+_mesa_GetGLXDispatchTable(void)
+{
+   static struct _glxapi_table glx;
+
+   /* be sure our dispatch table size <= libGL's table */
+   {
+      GLuint size = sizeof(struct _glxapi_table) / sizeof(void *);
+      (void) size;
+      assert(_glxapi_get_dispatch_table_size() >= size);
+   }
+
+   /* initialize the whole table to no-ops */
+   _glxapi_set_no_op_table(&glx);
+
+   /* now initialize the table with the functions I implement */
+   glx.ChooseVisual = Fake_glXChooseVisual;
+   glx.CopyContext = Fake_glXCopyContext;
+   glx.CreateContext = Fake_glXCreateContext;
+   glx.CreateGLXPixmap = Fake_glXCreateGLXPixmap;
+   glx.DestroyContext = Fake_glXDestroyContext;
+   glx.DestroyGLXPixmap = Fake_glXDestroyGLXPixmap;
+   glx.GetConfig = Fake_glXGetConfig;
+   /*glx.GetCurrentContext = Fake_glXGetCurrentContext;*/
+   /*glx.GetCurrentDrawable = Fake_glXGetCurrentDrawable;*/
+   glx.IsDirect = Fake_glXIsDirect;
+   glx.MakeCurrent = Fake_glXMakeCurrent;
+   glx.QueryExtension = Fake_glXQueryExtension;
+   glx.QueryVersion = Fake_glXQueryVersion;
+   glx.SwapBuffers = Fake_glXSwapBuffers;
+   glx.UseXFont = Fake_glXUseXFont;
+   glx.WaitGL = Fake_glXWaitGL;
+   glx.WaitX = Fake_glXWaitX;
+
+   /*** GLX_VERSION_1_1 ***/
+   glx.GetClientString = Fake_glXGetClientString;
+   glx.QueryExtensionsString = Fake_glXQueryExtensionsString;
+   glx.QueryServerString = Fake_glXQueryServerString;
+
+   /*** GLX_VERSION_1_2 ***/
+   /*glx.GetCurrentDisplay = Fake_glXGetCurrentDisplay;*/
+
+   /*** GLX_VERSION_1_3 ***/
+   glx.ChooseFBConfig = Fake_glXChooseFBConfig;
+   glx.CreateNewContext = Fake_glXCreateNewContext;
+   glx.CreatePbuffer = Fake_glXCreatePbuffer;
+   glx.CreatePixmap = Fake_glXCreatePixmap;
+   glx.CreateWindow = Fake_glXCreateWindow;
+   glx.DestroyPbuffer = Fake_glXDestroyPbuffer;
+   glx.DestroyPixmap = Fake_glXDestroyPixmap;
+   glx.DestroyWindow = Fake_glXDestroyWindow;
+   /*glx.GetCurrentReadDrawable = Fake_glXGetCurrentReadDrawable;*/
+   glx.GetFBConfigAttrib = Fake_glXGetFBConfigAttrib;
+   glx.GetFBConfigs = Fake_glXGetFBConfigs;
+   glx.GetSelectedEvent = Fake_glXGetSelectedEvent;
+   glx.GetVisualFromFBConfig = Fake_glXGetVisualFromFBConfig;
+   glx.MakeContextCurrent = Fake_glXMakeContextCurrent;
+   glx.QueryContext = Fake_glXQueryContext;
+   glx.QueryDrawable = Fake_glXQueryDrawable;
+   glx.SelectEvent = Fake_glXSelectEvent;
+
+   /*** GLX_SGI_swap_control ***/
+   glx.SwapIntervalSGI = Fake_glXSwapIntervalSGI;
+
+   /*** GLX_SGI_video_sync ***/
+   glx.GetVideoSyncSGI = Fake_glXGetVideoSyncSGI;
+   glx.WaitVideoSyncSGI = Fake_glXWaitVideoSyncSGI;
+
+   /*** GLX_SGI_make_current_read ***/
+   glx.MakeCurrentReadSGI = Fake_glXMakeCurrentReadSGI;
+   /*glx.GetCurrentReadDrawableSGI = Fake_glXGetCurrentReadDrawableSGI;*/
+
+/*** GLX_SGIX_video_source ***/
+#if defined(_VL_H)
+   glx.CreateGLXVideoSourceSGIX = Fake_glXCreateGLXVideoSourceSGIX;
+   glx.DestroyGLXVideoSourceSGIX = Fake_glXDestroyGLXVideoSourceSGIX;
+#endif
+
+   /*** GLX_EXT_import_context ***/
+   glx.FreeContextEXT = Fake_glXFreeContextEXT;
+   glx.GetContextIDEXT = Fake_glXGetContextIDEXT;
+   /*glx.GetCurrentDisplayEXT = Fake_glXGetCurrentDisplayEXT;*/
+   glx.ImportContextEXT = Fake_glXImportContextEXT;
+   glx.QueryContextInfoEXT = Fake_glXQueryContextInfoEXT;
+
+   /*** GLX_SGIX_fbconfig ***/
+   glx.GetFBConfigAttribSGIX = Fake_glXGetFBConfigAttribSGIX;
+   glx.ChooseFBConfigSGIX = Fake_glXChooseFBConfigSGIX;
+   glx.CreateGLXPixmapWithConfigSGIX = Fake_glXCreateGLXPixmapWithConfigSGIX;
+   glx.CreateContextWithConfigSGIX = Fake_glXCreateContextWithConfigSGIX;
+   glx.GetVisualFromFBConfigSGIX = Fake_glXGetVisualFromFBConfigSGIX;
+   glx.GetFBConfigFromVisualSGIX = Fake_glXGetFBConfigFromVisualSGIX;
+
+   /*** GLX_SGIX_pbuffer ***/
+   glx.CreateGLXPbufferSGIX = Fake_glXCreateGLXPbufferSGIX;
+   glx.DestroyGLXPbufferSGIX = Fake_glXDestroyGLXPbufferSGIX;
+   glx.QueryGLXPbufferSGIX = Fake_glXQueryGLXPbufferSGIX;
+   glx.SelectEventSGIX = Fake_glXSelectEventSGIX;
+   glx.GetSelectedEventSGIX = Fake_glXGetSelectedEventSGIX;
+
+   /*** GLX_SGI_cushion ***/
+   glx.CushionSGI = Fake_glXCushionSGI;
+
+   /*** GLX_SGIX_video_resize ***/
+   glx.BindChannelToWindowSGIX = Fake_glXBindChannelToWindowSGIX;
+   glx.ChannelRectSGIX = Fake_glXChannelRectSGIX;
+   glx.QueryChannelRectSGIX = Fake_glXQueryChannelRectSGIX;
+   glx.QueryChannelDeltasSGIX = Fake_glXQueryChannelDeltasSGIX;
+   glx.ChannelRectSyncSGIX = Fake_glXChannelRectSyncSGIX;
+
+   /*** GLX_SGIX_dmbuffer **/
+#if defined(_DM_BUFFER_H_)
+   glx.AssociateDMPbufferSGIX = NULL;
+#endif
+
+   /*** GLX_SGIX_swap_group ***/
+   glx.JoinSwapGroupSGIX = Fake_glXJoinSwapGroupSGIX;
+
+   /*** GLX_SGIX_swap_barrier ***/
+   glx.BindSwapBarrierSGIX = Fake_glXBindSwapBarrierSGIX;
+   glx.QueryMaxSwapBarriersSGIX = Fake_glXQueryMaxSwapBarriersSGIX;
+
+   /*** GLX_SUN_get_transparent_index ***/
+   glx.GetTransparentIndexSUN = Fake_glXGetTransparentIndexSUN;
+
+   /*** GLX_MESA_copy_sub_buffer ***/
+   glx.CopySubBufferMESA = Fake_glXCopySubBufferMESA;
+
+   /*** GLX_MESA_release_buffers ***/
+   glx.ReleaseBuffersMESA = Fake_glXReleaseBuffersMESA;
+
+   /*** GLX_MESA_pixmap_colormap ***/
+   glx.CreateGLXPixmapMESA = Fake_glXCreateGLXPixmapMESA;
+
+   /*** GLX_MESA_set_3dfx_mode ***/
+   glx.Set3DfxModeMESA = Fake_glXSet3DfxModeMESA;
+
+   /*** GLX_NV_vertex_array_range ***/
+   glx.AllocateMemoryNV = Fake_glXAllocateMemoryNV;
+   glx.FreeMemoryNV = Fake_glXFreeMemoryNV;
+
+   /*** GLX_MESA_agp_offset ***/
+   glx.GetAGPOffsetMESA = Fake_glXGetAGPOffsetMESA;
+
+   /*** GLX_EXT_texture_from_pixmap ***/
+   glx.BindTexImageEXT = Fake_glXBindTexImageEXT;
+   glx.ReleaseTexImageEXT = Fake_glXReleaseTexImageEXT;
+
+   return &glx;
+}
diff --git a/src/mesa/drivers/x11/fxmesa.h b/src/mesa/drivers/x11/fxmesa.h
new file mode 100644
index 0000000000..f8e9661f9c
--- /dev/null
+++ b/src/mesa/drivers/x11/fxmesa.h
@@ -0,0 +1,103 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  4.0
+ * Copyright (C) 1995-2001  Brian Paul
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+/*
+ * FXMesa - 3Dfx Glide driver for Mesa.  Contributed by David Bucciarelli
+ *
+ * NOTE: This version requires Glide3 (http://sourceforge.net/projects/glide)
+ */
+
+
+#ifndef FXMESA_H
+#define FXMESA_H
+
+
+#include <glide.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define FXMESA_MAJOR_VERSION 6
+#define FXMESA_MINOR_VERSION 3
+
+
+/*
+ * Values for attribList parameter to fxMesaCreateContext():
+ */
+#define FXMESA_NONE		0	/* to terminate attribList */
+#define FXMESA_DOUBLEBUFFER	10
+#define FXMESA_ALPHA_SIZE	11      /* followed by an integer */
+#define FXMESA_DEPTH_SIZE	12      /* followed by an integer */
+#define FXMESA_STENCIL_SIZE	13      /* followed by an integer */
+#define FXMESA_ACCUM_SIZE	14      /* followed by an integer */
+#define FXMESA_COLORDEPTH	20      /* followed by an integer */
+#define FXMESA_SHARE_CONTEXT 990099	/* keep in sync with xmesa1.c! */
+
+
+
+typedef struct tfxMesaContext *fxMesaContext;
+
+
+#if defined (__BEOS__)
+#pragma export on
+#endif
+
+
+GLAPI fxMesaContext GLAPIENTRY fxMesaCreateContext(GLuint win, GrScreenResolution_t,
+						  GrScreenRefresh_t,
+						  const GLint attribList[]);
+
+GLAPI fxMesaContext GLAPIENTRY fxMesaCreateBestContext(GLuint win,
+						      GLint width, GLint height,
+						      const GLint attribList[]);
+GLAPI void GLAPIENTRY fxMesaDestroyContext(fxMesaContext ctx);
+
+GLAPI GLint GLAPIENTRY fxMesaSelectCurrentBoard(int n);
+
+GLAPI void GLAPIENTRY fxMesaMakeCurrent(fxMesaContext ctx);
+
+GLAPI fxMesaContext GLAPIENTRY fxMesaGetCurrentContext(void);
+
+GLAPI void GLAPIENTRY fxMesaSwapBuffers(void);
+
+GLAPI void GLAPIENTRY fxMesaSetNearFar(GLfloat nearVal, GLfloat farVal);
+
+GLAPI void GLAPIENTRY fxMesaUpdateScreenSize(fxMesaContext ctx);
+
+GLAPI void GLAPIENTRY fxCloseHardware(void);
+
+GLAPI void GLAPIENTRY fxGetScreenGeometry (GLint *w, GLint *h);
+
+
+#if defined (__BEOS__)
+#pragma export off
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/src/mesa/drivers/x11/glxapi.c b/src/mesa/drivers/x11/glxapi.c
new file mode 100644
index 0000000000..955eba4e94
--- /dev/null
+++ b/src/mesa/drivers/x11/glxapi.c
@@ -0,0 +1,1449 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ * 
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * This is the GLX API dispatcher.  Calls to the glX* functions are
+ * either routed to the real GLX encoders or to Mesa's pseudo-GLX functions.
+ * See the glxapi.h file for more details.
+ */
+
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "main/glheader.h"
+#include "main/compiler.h"
+#include "glapi/glapi.h"
+#include "glxapi.h"
+
+
+extern struct _glxapi_table *_real_GetGLXDispatchTable(void);
+extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void);
+
+
+struct display_dispatch {
+   Display *Dpy;
+   struct _glxapi_table *Table;
+   struct display_dispatch *Next;
+};
+
+
+/**
+ * When GLX_INDIRECT_RENDERING is defined, some symbols are missing in
+ * libglapi.a.  We need to define them here.
+ */
+#ifdef GLX_INDIRECT_RENDERING
+
+#include "glapi/glapitable.h"
+#include "glapi/glapidispatch.h"
+
+#define KEYWORD1 PUBLIC
+
+#if defined(USE_MGL_NAMESPACE)
+#define NAME(func)  mgl##func
+#else
+#define NAME(func)  gl##func
+#endif
+
+#define DISPATCH(FUNC, ARGS, MESSAGE)		\
+   CALL_ ## FUNC(GET_DISPATCH(), ARGS);
+
+#define RETURN_DISPATCH(FUNC, ARGS, MESSAGE) 	\
+   return CALL_ ## FUNC(GET_DISPATCH(), ARGS);
+
+/* skip normal ones */
+#define _GLAPI_SKIP_NORMAL_ENTRY_POINTS
+#include "glapi/glapitemp.h"
+
+#endif /* GLX_INDIRECT_RENDERING */
+
+
+static struct display_dispatch *DispatchList = NULL;
+
+
+/* Display -> Dispatch caching */
+static Display *prevDisplay = NULL;
+static struct _glxapi_table *prevTable = NULL;
+
+
+static struct _glxapi_table *
+get_dispatch(Display *dpy)
+{
+   if (!dpy)
+      return NULL;
+
+   /* search list of display/dispatch pairs for this display */
+   {
+      const struct display_dispatch *d = DispatchList;
+      while (d) {
+         if (d->Dpy == dpy) {
+            prevDisplay = dpy;
+            prevTable = d->Table;
+            return d->Table;  /* done! */
+         }
+         d = d->Next;
+      }
+   }
+
+   /* A new display, determine if we should use real GLX
+    * or Mesa's pseudo-GLX.
+    */
+   {
+      struct _glxapi_table *t = _mesa_GetGLXDispatchTable();
+
+      if (t) {
+         struct display_dispatch *d;
+         d = (struct display_dispatch *) malloc(sizeof(struct display_dispatch));
+         if (d) {
+            d->Dpy = dpy;
+            d->Table = t;
+            /* insert at head of list */
+            d->Next = DispatchList;
+            DispatchList = d;
+            /* update cache */
+            prevDisplay = dpy;
+            prevTable = t;
+            return t;
+         }
+      }
+   }
+
+   /* If we get here that means we can't use real GLX on this display
+    * and the Mesa pseudo-GLX software renderer wasn't compiled in.
+    * Or, we ran out of memory!
+    */
+   return NULL;
+}
+
+
+/* Don't use the GET_DISPATCH defined in glthread.h */
+#undef GET_DISPATCH
+
+#define GET_DISPATCH(DPY, TABLE)	\
+   if (DPY == prevDisplay) {		\
+      TABLE = prevTable;		\
+   }					\
+   else if (!DPY) {			\
+      TABLE = NULL;			\
+   }					\
+   else {				\
+      TABLE = get_dispatch(DPY);	\
+   }
+
+   
+
+
+/**
+ * GLX API current context.
+ */
+#if defined(GLX_USE_TLS)
+PUBLIC __thread void * CurrentContext
+    __attribute__((tls_model("initial-exec")));
+#elif defined(THREADS)
+static _glthread_TSD ContextTSD;         /**< Per-thread context pointer */
+#else
+static GLXContext CurrentContext = 0;
+#endif
+
+
+static void
+SetCurrentContext(GLXContext c)
+{
+#if defined(GLX_USE_TLS)
+   CurrentContext = c;
+#elif defined(THREADS)
+   _glthread_SetTSD(&ContextTSD, c);
+#else
+   CurrentContext = c;
+#endif
+}
+
+
+/*
+ * GLX API entrypoints
+ */
+
+/*** GLX_VERSION_1_0 ***/
+
+XVisualInfo PUBLIC *
+glXChooseVisual(Display *dpy, int screen, int *list)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return NULL;
+   return (t->ChooseVisual)(dpy, screen, list);
+}
+
+
+void PUBLIC
+glXCopyContext(Display *dpy, GLXContext src, GLXContext dst, unsigned long mask)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->CopyContext)(dpy, src, dst, mask);
+}
+
+
+GLXContext PUBLIC
+glXCreateContext(Display *dpy, XVisualInfo *visinfo, GLXContext shareList, Bool direct)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateContext)(dpy, visinfo, shareList, direct);
+}
+
+
+GLXPixmap PUBLIC
+glXCreateGLXPixmap(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateGLXPixmap)(dpy, visinfo, pixmap);
+}
+
+
+void PUBLIC
+glXDestroyContext(Display *dpy, GLXContext ctx)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   if (glXGetCurrentContext() == ctx)
+      SetCurrentContext(NULL);
+   (t->DestroyContext)(dpy, ctx);
+}
+
+
+void PUBLIC
+glXDestroyGLXPixmap(Display *dpy, GLXPixmap pixmap)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->DestroyGLXPixmap)(dpy, pixmap);
+}
+
+
+int PUBLIC
+glXGetConfig(Display *dpy, XVisualInfo *visinfo, int attrib, int *value)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return GLX_NO_EXTENSION;
+   return (t->GetConfig)(dpy, visinfo, attrib, value);
+}
+
+
+GLXContext PUBLIC
+glXGetCurrentContext(void)
+{
+#if defined(GLX_USE_TLS)
+   return CurrentContext;
+#elif defined(THREADS)
+   return (GLXContext) _glthread_GetTSD(&ContextTSD);
+#else
+   return CurrentContext;
+#endif
+}
+
+
+GLXDrawable PUBLIC
+glXGetCurrentDrawable(void)
+{
+   __GLXcontext *gc = (__GLXcontext *) glXGetCurrentContext();
+   return gc ? gc->currentDrawable : 0;
+}
+
+
+Bool PUBLIC
+glXIsDirect(Display *dpy, GLXContext ctx)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (t->IsDirect)(dpy, ctx);
+}
+
+
+Bool PUBLIC
+glXMakeCurrent(Display *dpy, GLXDrawable drawable, GLXContext ctx)
+{
+   Bool b;
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t) {
+      return False;
+   }
+   b = (*t->MakeCurrent)(dpy, drawable, ctx);
+   if (b) {
+      SetCurrentContext(ctx);
+   }
+   return b;
+}
+
+
+Bool PUBLIC
+glXQueryExtension(Display *dpy, int *errorb, int *event)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (t->QueryExtension)(dpy, errorb, event);
+}
+
+
+Bool PUBLIC
+glXQueryVersion(Display *dpy, int *maj, int *min)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (t->QueryVersion)(dpy, maj, min);
+}
+
+
+void PUBLIC
+glXSwapBuffers(Display *dpy, GLXDrawable drawable)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->SwapBuffers)(dpy, drawable);
+}
+
+
+void PUBLIC
+glXUseXFont(Font font, int first, int count, int listBase)
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->UseXFont)(font, first, count, listBase);
+}
+
+
+void PUBLIC
+glXWaitGL(void)
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->WaitGL)();
+}
+
+
+void PUBLIC
+glXWaitX(void)
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->WaitX)();
+}
+
+
+
+/*** GLX_VERSION_1_1 ***/
+
+const char PUBLIC *
+glXGetClientString(Display *dpy, int name)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return NULL;
+   return (t->GetClientString)(dpy, name);
+}
+
+
+const char PUBLIC *
+glXQueryExtensionsString(Display *dpy, int screen)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return NULL;
+   return (t->QueryExtensionsString)(dpy, screen);
+}
+
+
+const char PUBLIC *
+glXQueryServerString(Display *dpy, int screen, int name)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return NULL;
+   return (t->QueryServerString)(dpy, screen, name);
+}
+
+
+/*** GLX_VERSION_1_2 ***/
+
+Display PUBLIC *
+glXGetCurrentDisplay(void)
+{
+   /* Same code as in libGL's glxext.c */
+   __GLXcontext *gc = (__GLXcontext *) glXGetCurrentContext();
+   if (NULL == gc) return NULL;
+   return gc->currentDpy;
+}
+
+
+
+/*** GLX_VERSION_1_3 ***/
+
+GLXFBConfig PUBLIC *
+glXChooseFBConfig(Display *dpy, int screen, const int *attribList, int *nitems)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->ChooseFBConfig)(dpy, screen, attribList, nitems);
+}
+
+
+GLXContext PUBLIC
+glXCreateNewContext(Display *dpy, GLXFBConfig config, int renderType, GLXContext shareList, Bool direct)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateNewContext)(dpy, config, renderType, shareList, direct);
+}
+
+
+GLXPbuffer PUBLIC
+glXCreatePbuffer(Display *dpy, GLXFBConfig config, const int *attribList)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreatePbuffer)(dpy, config, attribList);
+}
+
+
+GLXPixmap PUBLIC
+glXCreatePixmap(Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreatePixmap)(dpy, config, pixmap, attribList);
+}
+
+
+GLXWindow PUBLIC
+glXCreateWindow(Display *dpy, GLXFBConfig config, Window win, const int *attribList)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateWindow)(dpy, config, win, attribList);
+}
+
+
+void PUBLIC
+glXDestroyPbuffer(Display *dpy, GLXPbuffer pbuf)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->DestroyPbuffer)(dpy, pbuf);
+}
+
+
+void PUBLIC
+glXDestroyPixmap(Display *dpy, GLXPixmap pixmap)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->DestroyPixmap)(dpy, pixmap);
+}
+
+
+void PUBLIC
+glXDestroyWindow(Display *dpy, GLXWindow window)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->DestroyWindow)(dpy, window);
+}
+
+
+GLXDrawable PUBLIC
+glXGetCurrentReadDrawable(void)
+{
+   __GLXcontext *gc = (__GLXcontext *) glXGetCurrentContext();
+   return gc ? gc->currentReadable : 0;
+}
+
+
+int PUBLIC
+glXGetFBConfigAttrib(Display *dpy, GLXFBConfig config, int attribute, int *value)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return GLX_NO_EXTENSION;
+   return (t->GetFBConfigAttrib)(dpy, config, attribute, value);
+}
+
+
+GLXFBConfig PUBLIC *
+glXGetFBConfigs(Display *dpy, int screen, int *nelements)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->GetFBConfigs)(dpy, screen, nelements);
+}
+
+void PUBLIC
+glXGetSelectedEvent(Display *dpy, GLXDrawable drawable, unsigned long *mask)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->GetSelectedEvent)(dpy, drawable, mask);
+}
+
+
+XVisualInfo PUBLIC *
+glXGetVisualFromFBConfig(Display *dpy, GLXFBConfig config)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return NULL;
+   return (t->GetVisualFromFBConfig)(dpy, config);
+}
+
+
+Bool PUBLIC
+glXMakeContextCurrent(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx)
+{
+   Bool b;
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   b = (t->MakeContextCurrent)(dpy, draw, read, ctx);
+   if (b) {
+      SetCurrentContext(ctx);
+   }
+   return b;
+}
+
+
+int PUBLIC
+glXQueryContext(Display *dpy, GLXContext ctx, int attribute, int *value)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   assert(t);
+   if (!t)
+      return 0; /* XXX correct? */
+   return (t->QueryContext)(dpy, ctx, attribute, value);
+}
+
+
+void PUBLIC
+glXQueryDrawable(Display *dpy, GLXDrawable draw, int attribute, unsigned int *value)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->QueryDrawable)(dpy, draw, attribute, value);
+}
+
+
+void PUBLIC
+glXSelectEvent(Display *dpy, GLXDrawable drawable, unsigned long mask)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->SelectEvent)(dpy, drawable, mask);
+}
+
+
+
+/*** GLX_SGI_swap_control ***/
+
+int PUBLIC
+glXSwapIntervalSGI(int interval)
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->SwapIntervalSGI)(interval);
+}
+
+
+
+/*** GLX_SGI_video_sync ***/
+
+int PUBLIC
+glXGetVideoSyncSGI(unsigned int *count)
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t || !glXGetCurrentContext())
+      return GLX_BAD_CONTEXT;
+   return (t->GetVideoSyncSGI)(count);
+}
+
+int PUBLIC
+glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count)
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t || !glXGetCurrentContext())
+      return GLX_BAD_CONTEXT;
+   return (t->WaitVideoSyncSGI)(divisor, remainder, count);
+}
+
+
+
+/*** GLX_SGI_make_current_read ***/
+
+Bool PUBLIC
+glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (t->MakeCurrentReadSGI)(dpy, draw, read, ctx);
+}
+
+GLXDrawable PUBLIC
+glXGetCurrentReadDrawableSGI(void)
+{
+   return glXGetCurrentReadDrawable();
+}
+
+
+#if defined(_VL_H)
+
+GLXVideoSourceSGIX PUBLIC
+glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateGLXVideoSourceSGIX)(dpy, screen, server, path, nodeClass, drainNode);
+}
+
+void PUBLIC
+glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->DestroyGLXVideoSourceSGIX)(dpy, src);
+}
+
+#endif
+
+
+/*** GLX_EXT_import_context ***/
+
+void PUBLIC
+glXFreeContextEXT(Display *dpy, GLXContext context)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->FreeContextEXT)(dpy, context);
+}
+
+GLXContextID PUBLIC
+glXGetContextIDEXT(const GLXContext context)
+{
+   return ((__GLXcontext *) context)->xid;
+}
+
+Display PUBLIC *
+glXGetCurrentDisplayEXT(void)
+{
+   return glXGetCurrentDisplay();
+}
+
+GLXContext PUBLIC
+glXImportContextEXT(Display *dpy, GLXContextID contextID)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->ImportContextEXT)(dpy, contextID);
+}
+
+int PUBLIC
+glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute,int *value)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;  /* XXX ok? */
+   return (t->QueryContextInfoEXT)(dpy, context, attribute, value);
+}
+
+
+
+/*** GLX_SGIX_fbconfig ***/
+
+int PUBLIC
+glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->GetFBConfigAttribSGIX)(dpy, config, attribute, value);
+}
+
+GLXFBConfigSGIX PUBLIC *
+glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->ChooseFBConfigSGIX)(dpy, screen, attrib_list, nelements);
+}
+
+GLXPixmap PUBLIC
+glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateGLXPixmapWithConfigSGIX)(dpy, config, pixmap);
+}
+
+GLXContext PUBLIC
+glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateContextWithConfigSGIX)(dpy, config, render_type, share_list, direct);
+}
+
+XVisualInfo PUBLIC *
+glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->GetVisualFromFBConfigSGIX)(dpy, config);
+}
+
+GLXFBConfigSGIX PUBLIC
+glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->GetFBConfigFromVisualSGIX)(dpy, vis);
+}
+
+
+
+/*** GLX_SGIX_pbuffer ***/
+
+GLXPbufferSGIX PUBLIC
+glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attrib_list)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateGLXPbufferSGIX)(dpy, config, width, height, attrib_list);
+}
+
+void PUBLIC
+glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->DestroyGLXPbufferSGIX)(dpy, pbuf);
+}
+
+int PUBLIC
+glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->QueryGLXPbufferSGIX)(dpy, pbuf, attribute, value);
+}
+
+void PUBLIC
+glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->SelectEventSGIX)(dpy, drawable, mask);
+}
+
+void PUBLIC
+glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->GetSelectedEventSGIX)(dpy, drawable, mask);
+}
+
+
+
+/*** GLX_SGI_cushion ***/
+
+void PUBLIC
+glXCushionSGI(Display *dpy, Window win, float cushion)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->CushionSGI)(dpy, win, cushion);
+}
+
+
+
+/*** GLX_SGIX_video_resize ***/
+
+int PUBLIC
+glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->BindChannelToWindowSGIX)(dpy, screen, channel, window);
+}
+
+int PUBLIC
+glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->ChannelRectSGIX)(dpy, screen, channel, x, y, w, h);
+}
+
+int PUBLIC
+glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->QueryChannelRectSGIX)(dpy, screen, channel, x, y, w, h);
+}
+
+int PUBLIC
+glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->QueryChannelDeltasSGIX)(dpy, screen, channel, dx, dy, dw, dh);
+}
+
+int PUBLIC
+glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->ChannelRectSyncSGIX)(dpy, screen, channel, synctype);
+}
+
+
+
+#if defined(_DM_BUFFER_H_)
+
+Bool PUBLIC
+glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (t->AssociateDMPbufferSGIX)(dpy, pbuffer, params, dmbuffer);
+}
+
+#endif
+
+
+/*** GLX_SGIX_swap_group ***/
+
+void PUBLIC
+glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (*t->JoinSwapGroupSGIX)(dpy, drawable, member);
+}
+
+
+/*** GLX_SGIX_swap_barrier ***/
+
+void PUBLIC
+glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (*t->BindSwapBarrierSGIX)(dpy, drawable, barrier);
+}
+
+Bool PUBLIC
+glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (*t->QueryMaxSwapBarriersSGIX)(dpy, screen, max);
+}
+
+
+
+/*** GLX_SUN_get_transparent_index ***/
+
+Status PUBLIC
+glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (*t->GetTransparentIndexSUN)(dpy, overlay, underlay, pTransparent);
+}
+
+
+
+/*** GLX_MESA_copy_sub_buffer ***/
+
+void PUBLIC
+glXCopySubBufferMESA(Display *dpy, GLXDrawable drawable, int x, int y, int width, int height)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->CopySubBufferMESA)(dpy, drawable, x, y, width, height);
+}
+
+
+
+/*** GLX_MESA_release_buffers ***/
+
+Bool PUBLIC
+glXReleaseBuffersMESA(Display *dpy, Window w)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (t->ReleaseBuffersMESA)(dpy, w);
+}
+
+
+
+/*** GLX_MESA_pixmap_colormap ***/
+
+GLXPixmap PUBLIC
+glXCreateGLXPixmapMESA(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return 0;
+   return (t->CreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap);
+}
+
+
+
+/*** GLX_MESA_set_3dfx_mode ***/
+
+Bool PUBLIC
+glXSet3DfxModeMESA(int mode)
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return False;
+   return (t->Set3DfxModeMESA)(mode);
+}
+
+
+
+/*** GLX_NV_vertex_array_range ***/
+
+void PUBLIC *
+glXAllocateMemoryNV( GLsizei size,
+                     GLfloat readFrequency,
+                     GLfloat writeFrequency,
+                     GLfloat priority )
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return NULL;
+   return (t->AllocateMemoryNV)(size, readFrequency, writeFrequency, priority);
+}
+
+
+void PUBLIC
+glXFreeMemoryNV( GLvoid *pointer )
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return;
+   (t->FreeMemoryNV)(pointer);
+}
+
+
+
+
+/*** GLX_MESA_agp_offset */
+
+GLuint PUBLIC
+glXGetAGPOffsetMESA( const GLvoid *pointer )
+{
+   struct _glxapi_table *t;
+   Display *dpy = glXGetCurrentDisplay();
+   GET_DISPATCH(dpy, t);
+   if (!t)
+      return ~0;
+   return (t->GetAGPOffsetMESA)(pointer);
+}
+
+
+/*** GLX_MESA_allocate_memory */
+
+void PUBLIC *
+glXAllocateMemoryMESA(Display *dpy, int scrn, size_t size,
+                      float readfreq, float writefreq, float priority)
+{
+   /* dummy */
+   return NULL;
+}
+
+void PUBLIC
+glXFreeMemoryMESA(Display *dpy, int scrn, void *pointer)
+{
+   /* dummy */
+}
+
+
+GLuint PUBLIC
+glXGetMemoryOffsetMESA(Display *dpy, int scrn, const void *pointer)
+{
+   /* dummy */
+   return 0;
+}
+
+
+/*** GLX_EXT_texture_from_pixmap */
+
+void PUBLIC
+glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer,
+                   const int *attrib_list)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (t)
+      t->BindTexImageEXT(dpy, drawable, buffer, attrib_list);
+}
+
+void PUBLIC
+glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer)
+{
+   struct _glxapi_table *t;
+   GET_DISPATCH(dpy, t);
+   if (t)
+      t->ReleaseTexImageEXT(dpy, drawable, buffer);
+}
+
+
+/**********************************************************************/
+/* GLX API management functions                                       */
+/**********************************************************************/
+
+
+const char *
+_glxapi_get_version(void)
+{
+   return "1.3";
+}
+
+
+/*
+ * Return array of extension strings.
+ */
+const char **
+_glxapi_get_extensions(void)
+{
+   static const char *extensions[] = {
+#ifdef GLX_EXT_import_context
+      "GLX_EXT_import_context",
+#endif
+#ifdef GLX_SGI_video_sync
+      "GLX_SGI_video_sync",
+#endif
+#ifdef GLX_MESA_copy_sub_buffer
+      "GLX_MESA_copy_sub_buffer",
+#endif
+#ifdef GLX_MESA_release_buffers
+      "GLX_MESA_release_buffers",
+#endif
+#ifdef GLX_MESA_pixmap_colormap
+      "GLX_MESA_pixmap_colormap",
+#endif
+#ifdef GLX_MESA_set_3dfx_mode
+      "GLX_MESA_set_3dfx_mode",
+#endif
+#ifdef GLX_SGIX_fbconfig
+      "GLX_SGIX_fbconfig",
+#endif
+#ifdef GLX_SGIX_pbuffer
+      "GLX_SGIX_pbuffer",
+#endif
+#ifdef GLX_EXT_texture_from_pixmap
+      "GLX_EXT_texture_from_pixmap",
+#endif
+#ifdef GLX_INTEL_swap_event
+      "GLX_INTEL_swap_event",
+#endif
+      NULL
+   };
+   return extensions;
+}
+
+
+/*
+ * Return size of the GLX dispatch table, in entries, not bytes.
+ */
+GLuint
+_glxapi_get_dispatch_table_size(void)
+{
+   return sizeof(struct _glxapi_table) / sizeof(void *);
+}
+
+
+static int
+generic_no_op_func(void)
+{
+   return 0;
+}
+
+
+/*
+ * Initialize all functions in given dispatch table to be no-ops
+ */
+void
+_glxapi_set_no_op_table(struct _glxapi_table *t)
+{
+   typedef int (*nop_func)(void);
+   nop_func *dispatch = (nop_func *) t;
+   GLuint n = _glxapi_get_dispatch_table_size();
+   GLuint i;
+   for (i = 0; i < n; i++) {
+      dispatch[i] = generic_no_op_func;
+   }
+}
+
+
+struct name_address_pair {
+   const char *Name;
+   __GLXextFuncPtr Address;
+};
+
+static struct name_address_pair GLX_functions[] = {
+   /*** GLX_VERSION_1_0 ***/
+   { "glXChooseVisual", (__GLXextFuncPtr) glXChooseVisual },
+   { "glXCopyContext", (__GLXextFuncPtr) glXCopyContext },
+   { "glXCreateContext", (__GLXextFuncPtr) glXCreateContext },
+   { "glXCreateGLXPixmap", (__GLXextFuncPtr) glXCreateGLXPixmap },
+   { "glXDestroyContext", (__GLXextFuncPtr) glXDestroyContext },
+   { "glXDestroyGLXPixmap", (__GLXextFuncPtr) glXDestroyGLXPixmap },
+   { "glXGetConfig", (__GLXextFuncPtr) glXGetConfig },
+   { "glXGetCurrentContext", (__GLXextFuncPtr) glXGetCurrentContext },
+   { "glXGetCurrentDrawable", (__GLXextFuncPtr) glXGetCurrentDrawable },
+   { "glXIsDirect", (__GLXextFuncPtr) glXIsDirect },
+   { "glXMakeCurrent", (__GLXextFuncPtr) glXMakeCurrent },
+   { "glXQueryExtension", (__GLXextFuncPtr) glXQueryExtension },
+   { "glXQueryVersion", (__GLXextFuncPtr) glXQueryVersion },
+   { "glXSwapBuffers", (__GLXextFuncPtr) glXSwapBuffers },
+   { "glXUseXFont", (__GLXextFuncPtr) glXUseXFont },
+   { "glXWaitGL", (__GLXextFuncPtr) glXWaitGL },
+   { "glXWaitX", (__GLXextFuncPtr) glXWaitX },
+
+   /*** GLX_VERSION_1_1 ***/
+   { "glXGetClientString", (__GLXextFuncPtr) glXGetClientString },
+   { "glXQueryExtensionsString", (__GLXextFuncPtr) glXQueryExtensionsString },
+   { "glXQueryServerString", (__GLXextFuncPtr) glXQueryServerString },
+
+   /*** GLX_VERSION_1_2 ***/
+   { "glXGetCurrentDisplay", (__GLXextFuncPtr) glXGetCurrentDisplay },
+
+   /*** GLX_VERSION_1_3 ***/
+   { "glXChooseFBConfig", (__GLXextFuncPtr) glXChooseFBConfig },
+   { "glXCreateNewContext", (__GLXextFuncPtr) glXCreateNewContext },
+   { "glXCreatePbuffer", (__GLXextFuncPtr) glXCreatePbuffer },
+   { "glXCreatePixmap", (__GLXextFuncPtr) glXCreatePixmap },
+   { "glXCreateWindow", (__GLXextFuncPtr) glXCreateWindow },
+   { "glXDestroyPbuffer", (__GLXextFuncPtr) glXDestroyPbuffer },
+   { "glXDestroyPixmap", (__GLXextFuncPtr) glXDestroyPixmap },
+   { "glXDestroyWindow", (__GLXextFuncPtr) glXDestroyWindow },
+   { "glXGetCurrentReadDrawable", (__GLXextFuncPtr) glXGetCurrentReadDrawable },
+   { "glXGetFBConfigAttrib", (__GLXextFuncPtr) glXGetFBConfigAttrib },
+   { "glXGetFBConfigs", (__GLXextFuncPtr) glXGetFBConfigs },
+   { "glXGetSelectedEvent", (__GLXextFuncPtr) glXGetSelectedEvent },
+   { "glXGetVisualFromFBConfig", (__GLXextFuncPtr) glXGetVisualFromFBConfig },
+   { "glXMakeContextCurrent", (__GLXextFuncPtr) glXMakeContextCurrent },
+   { "glXQueryContext", (__GLXextFuncPtr) glXQueryContext },
+   { "glXQueryDrawable", (__GLXextFuncPtr) glXQueryDrawable },
+   { "glXSelectEvent", (__GLXextFuncPtr) glXSelectEvent },
+
+   /*** GLX_VERSION_1_4 ***/
+   { "glXGetProcAddress", (__GLXextFuncPtr) glXGetProcAddress },
+
+   /*** GLX_SGI_swap_control ***/
+   { "glXSwapIntervalSGI", (__GLXextFuncPtr) glXSwapIntervalSGI },
+
+   /*** GLX_SGI_video_sync ***/
+   { "glXGetVideoSyncSGI", (__GLXextFuncPtr) glXGetVideoSyncSGI },
+   { "glXWaitVideoSyncSGI", (__GLXextFuncPtr) glXWaitVideoSyncSGI },
+
+   /*** GLX_SGI_make_current_read ***/
+   { "glXMakeCurrentReadSGI", (__GLXextFuncPtr) glXMakeCurrentReadSGI },
+   { "glXGetCurrentReadDrawableSGI", (__GLXextFuncPtr) glXGetCurrentReadDrawableSGI },
+
+   /*** GLX_SGIX_video_source ***/
+#if defined(_VL_H)
+   { "glXCreateGLXVideoSourceSGIX", (__GLXextFuncPtr) glXCreateGLXVideoSourceSGIX },
+   { "glXDestroyGLXVideoSourceSGIX", (__GLXextFuncPtr) glXDestroyGLXVideoSourceSGIX },
+#endif
+
+   /*** GLX_EXT_import_context ***/
+   { "glXFreeContextEXT", (__GLXextFuncPtr) glXFreeContextEXT },
+   { "glXGetContextIDEXT", (__GLXextFuncPtr) glXGetContextIDEXT },
+   { "glXGetCurrentDisplayEXT", (__GLXextFuncPtr) glXGetCurrentDisplayEXT },
+   { "glXImportContextEXT", (__GLXextFuncPtr) glXImportContextEXT },
+   { "glXQueryContextInfoEXT", (__GLXextFuncPtr) glXQueryContextInfoEXT },
+
+   /*** GLX_SGIX_fbconfig ***/
+   { "glXGetFBConfigAttribSGIX", (__GLXextFuncPtr) glXGetFBConfigAttribSGIX },
+   { "glXChooseFBConfigSGIX", (__GLXextFuncPtr) glXChooseFBConfigSGIX },
+   { "glXCreateGLXPixmapWithConfigSGIX", (__GLXextFuncPtr) glXCreateGLXPixmapWithConfigSGIX },
+   { "glXCreateContextWithConfigSGIX", (__GLXextFuncPtr) glXCreateContextWithConfigSGIX },
+   { "glXGetVisualFromFBConfigSGIX", (__GLXextFuncPtr) glXGetVisualFromFBConfigSGIX },
+   { "glXGetFBConfigFromVisualSGIX", (__GLXextFuncPtr) glXGetFBConfigFromVisualSGIX },
+
+   /*** GLX_SGIX_pbuffer ***/
+   { "glXCreateGLXPbufferSGIX", (__GLXextFuncPtr) glXCreateGLXPbufferSGIX },
+   { "glXDestroyGLXPbufferSGIX", (__GLXextFuncPtr) glXDestroyGLXPbufferSGIX },
+   { "glXQueryGLXPbufferSGIX", (__GLXextFuncPtr) glXQueryGLXPbufferSGIX },
+   { "glXSelectEventSGIX", (__GLXextFuncPtr) glXSelectEventSGIX },
+   { "glXGetSelectedEventSGIX", (__GLXextFuncPtr) glXGetSelectedEventSGIX },
+
+   /*** GLX_SGI_cushion ***/
+   { "glXCushionSGI", (__GLXextFuncPtr) glXCushionSGI },
+
+   /*** GLX_SGIX_video_resize ***/
+   { "glXBindChannelToWindowSGIX", (__GLXextFuncPtr) glXBindChannelToWindowSGIX },
+   { "glXChannelRectSGIX", (__GLXextFuncPtr) glXChannelRectSGIX },
+   { "glXQueryChannelRectSGIX", (__GLXextFuncPtr) glXQueryChannelRectSGIX },
+   { "glXQueryChannelDeltasSGIX", (__GLXextFuncPtr) glXQueryChannelDeltasSGIX },
+   { "glXChannelRectSyncSGIX", (__GLXextFuncPtr) glXChannelRectSyncSGIX },
+
+   /*** GLX_SGIX_dmbuffer **/
+#if defined(_DM_BUFFER_H_)
+   { "glXAssociateDMPbufferSGIX", (__GLXextFuncPtr) glXAssociateDMPbufferSGIX },
+#endif
+
+   /*** GLX_SGIX_swap_group ***/
+   { "glXJoinSwapGroupSGIX", (__GLXextFuncPtr) glXJoinSwapGroupSGIX },
+
+   /*** GLX_SGIX_swap_barrier ***/
+   { "glXBindSwapBarrierSGIX", (__GLXextFuncPtr) glXBindSwapBarrierSGIX },
+   { "glXQueryMaxSwapBarriersSGIX", (__GLXextFuncPtr) glXQueryMaxSwapBarriersSGIX },
+
+   /*** GLX_SUN_get_transparent_index ***/
+   { "glXGetTransparentIndexSUN", (__GLXextFuncPtr) glXGetTransparentIndexSUN },
+
+   /*** GLX_MESA_copy_sub_buffer ***/
+   { "glXCopySubBufferMESA", (__GLXextFuncPtr) glXCopySubBufferMESA },
+
+   /*** GLX_MESA_pixmap_colormap ***/
+   { "glXCreateGLXPixmapMESA", (__GLXextFuncPtr) glXCreateGLXPixmapMESA },
+
+   /*** GLX_MESA_release_buffers ***/
+   { "glXReleaseBuffersMESA", (__GLXextFuncPtr) glXReleaseBuffersMESA },
+
+   /*** GLX_MESA_set_3dfx_mode ***/
+   { "glXSet3DfxModeMESA", (__GLXextFuncPtr) glXSet3DfxModeMESA },
+
+   /*** GLX_ARB_get_proc_address ***/
+   { "glXGetProcAddressARB", (__GLXextFuncPtr) glXGetProcAddressARB },
+
+   /*** GLX_NV_vertex_array_range ***/
+   { "glXAllocateMemoryNV", (__GLXextFuncPtr) glXAllocateMemoryNV },
+   { "glXFreeMemoryNV", (__GLXextFuncPtr) glXFreeMemoryNV },
+
+   /*** GLX_MESA_agp_offset ***/
+   { "glXGetAGPOffsetMESA", (__GLXextFuncPtr) glXGetAGPOffsetMESA },
+
+   /*** GLX_MESA_allocate_memory ***/
+   { "glXAllocateMemoryMESA", (__GLXextFuncPtr) glXAllocateMemoryMESA },
+   { "glXFreeMemoryMESA", (__GLXextFuncPtr) glXFreeMemoryMESA },
+   { "glXGetMemoryOffsetMESA", (__GLXextFuncPtr) glXGetMemoryOffsetMESA },
+
+   /*** GLX_EXT_texture_from_pixmap ***/
+   { "glXBindTexImageEXT", (__GLXextFuncPtr) glXBindTexImageEXT },
+   { "glXReleaseTexImageEXT", (__GLXextFuncPtr) glXReleaseTexImageEXT },
+
+   { NULL, NULL }   /* end of list */
+};
+
+
+
+/*
+ * Return address of named glX function, or NULL if not found.
+ */
+__GLXextFuncPtr
+_glxapi_get_proc_address(const char *funcName)
+{
+   GLuint i;
+   for (i = 0; GLX_functions[i].Name; i++) {
+#ifdef MANGLE
+      /* skip the "m" prefix on the name */
+      if (strcmp(GLX_functions[i].Name, funcName+1) == 0)
+#else
+      if (strcmp(GLX_functions[i].Name, funcName) == 0)
+#endif
+         return GLX_functions[i].Address;
+   }
+   return NULL;
+}
+
+
+
+/*
+ * This function does not get dispatched through the dispatch table
+ * since it's really a "meta" function.
+ */
+__GLXextFuncPtr PUBLIC
+glXGetProcAddressARB(const GLubyte *procName)
+{
+   __GLXextFuncPtr f;
+
+   f = _glxapi_get_proc_address((const char *) procName);
+   if (f) {
+      return f;
+   }
+
+   f = (__GLXextFuncPtr) _glapi_get_proc_address((const char *) procName);
+   return f;
+}
+
+
+/* GLX 1.4 */
+void PUBLIC
+(*glXGetProcAddress(const GLubyte *procName))()
+{
+   return glXGetProcAddressARB(procName);
+}
diff --git a/src/mesa/drivers/x11/glxapi.h b/src/mesa/drivers/x11/glxapi.h
new file mode 100644
index 0000000000..37de81e55a
--- /dev/null
+++ b/src/mesa/drivers/x11/glxapi.h
@@ -0,0 +1,228 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ * 
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _glxapi_h_
+#define _glxapi_h_
+
+
+#define GLX_GLXEXT_PROTOTYPES
+#include "GL/glx.h"
+
+
+/* The GLX API dispatcher (i.e. this code) is being built into stand-alone
+ * Mesa.  We don't know anything about XFree86 or real GLX so we define a
+ * minimal __GLXContextRec here so some of the functions in this file can
+ * work properly.
+ */
+typedef struct __GLXcontextRec {
+   Display *currentDpy;
+   GLboolean isDirect;
+   GLXDrawable currentDrawable;
+   GLXDrawable currentReadable;
+   XID xid;
+} __GLXcontext;
+
+
+/*
+ * Almost all the GLX API functions get routed through this dispatch table.
+ * The exceptions are the glXGetCurrentXXX() functions.
+ *
+ * This dispatch table allows multiple GLX client-side modules to coexist.
+ * Specifically, a real GLX library (like SGI's or the Utah GLX) and Mesa's
+ * pseudo-GLX can be present at the same time.  The former being used on
+ * GLX-enabled X servers and the later on non-GLX X servers.
+ *
+ * Red Hat has been using this since Red Hat Linux 7.0 (I think).
+ * This'll be a standard feature in XFree86 4.3.  It basically allows one
+ * libGL to do both DRI-rendering and "fake GLX" rendering to X displays
+ * that lack the GLX extension.
+ */
+struct _glxapi_table {
+   /*** GLX_VERSION_1_0 ***/
+   XVisualInfo *(*ChooseVisual)(Display *dpy, int screen, int *list);
+   void (*CopyContext)(Display *dpy, GLXContext src, GLXContext dst, unsigned long mask);
+   GLXContext (*CreateContext)(Display *dpy, XVisualInfo *visinfo, GLXContext shareList, Bool direct);
+   GLXPixmap (*CreateGLXPixmap)(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap);
+   void (*DestroyContext)(Display *dpy, GLXContext ctx);
+   void (*DestroyGLXPixmap)(Display *dpy, GLXPixmap pixmap);
+   int (*GetConfig)(Display *dpy, XVisualInfo *visinfo, int attrib, int *value);
+   /*GLXContext (*GetCurrentContext)(void);*/
+   /*GLXDrawable (*GetCurrentDrawable)(void);*/
+   Bool (*IsDirect)(Display *dpy, GLXContext ctx);
+   Bool (*MakeCurrent)(Display *dpy, GLXDrawable drawable, GLXContext ctx);
+   Bool (*QueryExtension)(Display *dpy, int *errorb, int *event);
+   Bool (*QueryVersion)(Display *dpy, int *maj, int *min);
+   void (*SwapBuffers)(Display *dpy, GLXDrawable drawable);
+   void (*UseXFont)(Font font, int first, int count, int listBase);
+   void (*WaitGL)(void);
+   void (*WaitX)(void);
+
+   /*** GLX_VERSION_1_1 ***/
+   const char *(*GetClientString)(Display *dpy, int name);
+   const char *(*QueryExtensionsString)(Display *dpy, int screen);
+   const char *(*QueryServerString)(Display *dpy, int screen, int name);
+
+   /*** GLX_VERSION_1_2 ***/
+   /*Display *(*GetCurrentDisplay)(void);*/
+
+   /*** GLX_VERSION_1_3 ***/
+   GLXFBConfig *(*ChooseFBConfig)(Display *dpy, int screen, const int *attribList, int *nitems);
+   GLXContext (*CreateNewContext)(Display *dpy, GLXFBConfig config, int renderType, GLXContext shareList, Bool direct);
+   GLXPbuffer (*CreatePbuffer)(Display *dpy, GLXFBConfig config, const int *attribList);
+   GLXPixmap (*CreatePixmap)(Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList);
+   GLXWindow (*CreateWindow)(Display *dpy, GLXFBConfig config, Window win, const int *attribList);
+   void (*DestroyPbuffer)(Display *dpy, GLXPbuffer pbuf);
+   void (*DestroyPixmap)(Display *dpy, GLXPixmap pixmap);
+   void (*DestroyWindow)(Display *dpy, GLXWindow window);
+   /*GLXDrawable (*GetCurrentReadDrawable)(void);*/
+   int (*GetFBConfigAttrib)(Display *dpy, GLXFBConfig config, int attribute, int *value);
+   GLXFBConfig *(*GetFBConfigs)(Display *dpy, int screen, int *nelements);
+   void (*GetSelectedEvent)(Display *dpy, GLXDrawable drawable, unsigned long *mask);
+   XVisualInfo *(*GetVisualFromFBConfig)(Display *dpy, GLXFBConfig config);
+   Bool (*MakeContextCurrent)(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx);
+   int (*QueryContext)(Display *dpy, GLXContext ctx, int attribute, int *value);
+   void (*QueryDrawable)(Display *dpy, GLXDrawable draw, int attribute, unsigned int *value);
+   void (*SelectEvent)(Display *dpy, GLXDrawable drawable, unsigned long mask);
+
+   /*** GLX_SGI_swap_control ***/
+   int (*SwapIntervalSGI)(int);
+
+   /*** GLX_SGI_video_sync ***/
+   int (*GetVideoSyncSGI)(unsigned int *count);
+   int (*WaitVideoSyncSGI)(int divisor, int remainder, unsigned int *count);
+
+   /*** GLX_SGI_make_current_read ***/
+   Bool (*MakeCurrentReadSGI)(Display *, GLXDrawable, GLXDrawable, GLXContext);
+   /*GLXDrawable (*GetCurrentReadDrawableSGI)(void);*/
+
+   /*** GLX_SGIX_video_source (needs video library) ***/
+#if defined(_VL_H_)
+   GLXVideoSourceSGIX (*CreateGLXVideoSourceSGIX)(Display *, int, VLServer, VLPath, int, VLNode);
+   void (*DestroyGLXVideoSourceSGIX)(Display *, GLXVideoSourceSGIX);
+#else
+   void *CreateGLXVideoSourceSGIX;
+   void *DestroyGLXVideoSourceSGIX;
+#endif
+
+   /*** GLX_EXT_import_context ***/
+   void (*FreeContextEXT)(Display *dpy, GLXContext context);
+   GLXContextID (*GetContextIDEXT)(const GLXContext context);
+   /*Display *(*GetCurrentDisplayEXT)(void);*/
+   GLXContext (*ImportContextEXT)(Display *dpy, GLXContextID contextID);
+   int (*QueryContextInfoEXT)(Display *dpy, GLXContext context, int attribute,int *value);
+
+   /*** GLX_SGIX_fbconfig ***/
+   int (*GetFBConfigAttribSGIX)(Display *, GLXFBConfigSGIX, int, int *);
+   GLXFBConfigSGIX * (*ChooseFBConfigSGIX)(Display *, int, int *, int *);
+   GLXPixmap (*CreateGLXPixmapWithConfigSGIX)(Display *, GLXFBConfigSGIX, Pixmap);
+   GLXContext (*CreateContextWithConfigSGIX)(Display *, GLXFBConfigSGIX, int, GLXContext, Bool);
+   XVisualInfo * (*GetVisualFromFBConfigSGIX)(Display *, GLXFBConfigSGIX);
+   GLXFBConfigSGIX (*GetFBConfigFromVisualSGIX)(Display *, XVisualInfo *);
+
+   /*** GLX_SGIX_pbuffer ***/
+   GLXPbufferSGIX (*CreateGLXPbufferSGIX)(Display *, GLXFBConfigSGIX, unsigned int, unsigned int, int *);
+   void (*DestroyGLXPbufferSGIX)(Display *, GLXPbufferSGIX);
+   int (*QueryGLXPbufferSGIX)(Display *, GLXPbufferSGIX, int, unsigned int *);
+   void (*SelectEventSGIX)(Display *, GLXDrawable, unsigned long);
+   void (*GetSelectedEventSGIX)(Display *, GLXDrawable, unsigned long *);
+
+   /*** GLX_SGI_cushion ***/
+   void (*CushionSGI)(Display *, Window, float);
+
+   /*** GLX_SGIX_video_resize ***/
+   int (*BindChannelToWindowSGIX)(Display *, int, int, Window);
+   int (*ChannelRectSGIX)(Display *, int, int, int, int, int, int);
+   int (*QueryChannelRectSGIX)(Display *, int, int, int *, int *, int *, int *);
+   int (*QueryChannelDeltasSGIX)(Display *, int, int, int *, int *, int *, int *);
+   int (*ChannelRectSyncSGIX)(Display *, int, int, GLenum);
+
+   /*** GLX_SGIX_dmbuffer (needs dmedia library) ***/
+#if defined (_DM_BUFFER_H_)
+   Bool (*AssociateDMPbufferSGIX)(Display *, GLXPbufferSGIX, DMparams *, DMbuffer);
+#else
+   void *AssociciateDMPbufferSGIX;
+#endif
+
+   /*** GLX_SGIX_swap_group ***/
+   void (*JoinSwapGroupSGIX)(Display *, GLXDrawable, GLXDrawable);
+
+   /*** GLX_SGIX_swap_barrier ***/
+   void (*BindSwapBarrierSGIX)(Display *, GLXDrawable, int);
+   Bool (*QueryMaxSwapBarriersSGIX)(Display *, int, int *);
+
+   /*** GLX_SUN_get_transparent_index ***/
+   Status (*GetTransparentIndexSUN)(Display *, Window, Window, long *);
+
+   /*** GLX_MESA_copy_sub_buffer ***/
+   void (*CopySubBufferMESA)(Display *dpy, GLXDrawable drawable, int x, int y, int width, int height);
+
+   /*** GLX_MESA_release_buffers ***/
+   Bool (*ReleaseBuffersMESA)(Display *dpy, Window w);
+
+   /*** GLX_MESA_pixmap_colormap ***/
+   GLXPixmap (*CreateGLXPixmapMESA)(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap);
+
+   /*** GLX_MESA_set_3dfx_mode ***/
+   Bool (*Set3DfxModeMESA)(int mode);
+
+   /*** GLX_NV_vertex_array_range ***/
+   void * (*AllocateMemoryNV)( GLsizei size,
+                               GLfloat readFrequency,
+                               GLfloat writeFrequency,
+                               GLfloat priority );
+   void (*FreeMemoryNV)( GLvoid *pointer );
+
+   /*** GLX_MESA_agp_offset ***/
+   GLuint (*GetAGPOffsetMESA)( const GLvoid *pointer );
+
+   /*** GLX_EXT_texture_from_pixmap ***/
+   void (*BindTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer,
+                           const int *attrib_list);
+   void (*ReleaseTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer);
+};
+
+
+
+extern const char *
+_glxapi_get_version(void);
+
+
+extern const char **
+_glxapi_get_extensions(void);
+
+
+extern GLuint
+_glxapi_get_dispatch_table_size(void);
+
+
+extern void
+_glxapi_set_no_op_table(struct _glxapi_table *t);
+
+
+extern __GLXextFuncPtr
+_glxapi_get_proc_address(const char *funcName);
+
+
+#endif
diff --git a/src/mesa/drivers/x11/glxheader.h b/src/mesa/drivers/x11/glxheader.h
new file mode 100644
index 0000000000..d88afba20e
--- /dev/null
+++ b/src/mesa/drivers/x11/glxheader.h
@@ -0,0 +1,63 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.1
+ * 
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef GLX_HEADER_H
+#define GLX_HEADER_H
+
+#ifdef __VMS
+#include <GL/vms_x_fix.h>
+#endif
+
+#include "main/glheader.h"
+
+#ifdef XFree86Server
+
+# include "xorg-server.h"
+# include "resource.h"
+# include "windowstr.h"
+
+#else
+
+# include <X11/Xlib.h>
+# include <X11/Xlibint.h>
+# include <X11/Xutil.h>
+# ifdef USE_XSHM  /* was SHM */
+#  include <sys/ipc.h>
+#  include <sys/shm.h>
+#  include <X11/extensions/XShm.h>
+# endif
+# include <GL/glx.h>
+# include <sys/time.h>
+
+#endif
+
+
+
+/* this silences a compiler warning on several systems */
+struct timespec;
+struct itimerspec;
+
+
+#endif /*GLX_HEADER*/
diff --git a/src/mesa/drivers/x11/realglx.c b/src/mesa/drivers/x11/realglx.c
new file mode 100644
index 0000000000..30adb7465b
--- /dev/null
+++ b/src/mesa/drivers/x11/realglx.c
@@ -0,0 +1,180 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  5.1
+ * 
+ * Copyright (C) 1999-2002  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <assert.h>
+#include <GL/glx.h>
+#include "realglx.h"
+#include "glxapi.h"
+
+
+struct _glxapi_table *
+_real_GetGLXDispatchTable(void)
+{
+   static struct _glxapi_table glx;
+
+   /* be sure our dispatch table size <= libGL's table */
+   {
+      GLuint size = sizeof(struct _glxapi_table) / sizeof(void *);
+      (void) size;
+      assert(_glxapi_get_dispatch_table_size() >= size);
+   }
+
+   /* initialize the whole table to no-ops */
+   _glxapi_set_no_op_table(&glx);
+
+   /* now initialize the table with the functions I implement */
+
+   /*** GLX_VERSION_1_0 ***/
+   glx.ChooseVisual = _real_glXChooseVisual;
+   glx.CopyContext = _real_glXCopyContext;
+   glx.CreateContext = _real_glXCreateContext;
+   glx.CreateGLXPixmap = _real_glXCreateGLXPixmap;
+   glx.DestroyContext = _real_glXDestroyContext;
+   glx.DestroyGLXPixmap = _real_glXDestroyGLXPixmap;
+   glx.GetConfig = _real_glXGetConfig;
+   /*glx.GetCurrentContext = _real_glXGetCurrentContext;*/
+   /*glx.GetCurrentDrawable = _real_glXGetCurrentDrawable;*/
+   glx.IsDirect = _real_glXIsDirect;
+   glx.MakeCurrent = _real_glXMakeCurrent;
+   glx.QueryExtension = _real_glXQueryExtension;
+   glx.QueryVersion = _real_glXQueryVersion;
+   glx.SwapBuffers = _real_glXSwapBuffers;
+   glx.UseXFont = _real_glXUseXFont;
+   glx.WaitGL = _real_glXWaitGL;
+   glx.WaitX = _real_glXWaitX;
+
+   /*** GLX_VERSION_1_1 ***/
+   glx.GetClientString = _real_glXGetClientString;
+   glx.QueryExtensionsString = _real_glXQueryExtensionsString;
+   glx.QueryServerString = _real_glXQueryServerString;
+
+   /*** GLX_VERSION_1_2 ***/
+   /*glx.GetCurrentDisplay = _real_glXGetCurrentDisplay;*/
+
+   /*** GLX_VERSION_1_3 ***/
+   glx.ChooseFBConfig = _real_glXChooseFBConfig;
+   glx.CreateNewContext = _real_glXCreateNewContext;
+   glx.CreatePbuffer = _real_glXCreatePbuffer;
+   glx.CreatePixmap = _real_glXCreatePixmap;
+   glx.CreateWindow = _real_glXCreateWindow;
+   glx.DestroyPbuffer = _real_glXDestroyPbuffer;
+   glx.DestroyPixmap = _real_glXDestroyPixmap;
+   glx.DestroyWindow = _real_glXDestroyWindow;
+   /*glx.GetCurrentReadDrawable = _real_glXGetCurrentReadDrawable;*/
+   glx.GetFBConfigAttrib = _real_glXGetFBConfigAttrib;
+   glx.GetFBConfigs = _real_glXGetFBConfigs;
+   glx.GetSelectedEvent = _real_glXGetSelectedEvent;
+   glx.GetVisualFromFBConfig = _real_glXGetVisualFromFBConfig;
+   glx.MakeContextCurrent = _real_glXMakeContextCurrent;
+   glx.QueryContext = _real_glXQueryContext;
+   glx.QueryDrawable = _real_glXQueryDrawable;
+   glx.SelectEvent = _real_glXSelectEvent;
+
+   /*** GLX_SGI_swap_control ***/
+   glx.SwapIntervalSGI = _real_glXSwapIntervalSGI;
+
+   /*** GLX_SGI_video_sync ***/
+   glx.GetVideoSyncSGI = _real_glXGetVideoSyncSGI;
+   glx.WaitVideoSyncSGI = _real_glXWaitVideoSyncSGI;
+
+   /*** GLX_SGI_make_current_read ***/
+   glx.MakeCurrentReadSGI = _real_glXMakeCurrentReadSGI;
+   /*glx.GetCurrentReadDrawableSGI = _real_glXGetCurrentReadDrawableSGI;*/
+
+#if defined(_VL_H)
+   /*** GLX_SGIX_video_source ***/
+   glx.CreateGLXVideoSourceSGIX = _real_glXCreateGLXVideoSourceSGIX;
+   glx.DestroyGLXVideoSourceSGIX = _real_glXDestroyGLXVideoSourceSGIX;
+#endif
+
+   /*** GLX_EXT_import_context ***/
+   glx.FreeContextEXT = _real_glXFreeContextEXT;
+   /*glx.GetContextIDEXT = _real_glXGetContextIDEXT;*/
+   /*glx.GetCurrentDisplayEXT = _real_glXGetCurrentDisplayEXT;*/
+   glx.ImportContextEXT = _real_glXImportContextEXT;
+   glx.QueryContextInfoEXT = _real_glXQueryContextInfoEXT;
+
+   /*** GLX_SGIX_fbconfig ***/
+   glx.GetFBConfigAttribSGIX = _real_glXGetFBConfigAttribSGIX;
+   glx.ChooseFBConfigSGIX = _real_glXChooseFBConfigSGIX;
+   glx.CreateGLXPixmapWithConfigSGIX = _real_glXCreateGLXPixmapWithConfigSGIX;
+   glx.CreateContextWithConfigSGIX = _real_glXCreateContextWithConfigSGIX;
+   glx.GetVisualFromFBConfigSGIX = _real_glXGetVisualFromFBConfigSGIX;
+   glx.GetFBConfigFromVisualSGIX = _real_glXGetFBConfigFromVisualSGIX;
+
+   /*** GLX_SGIX_pbuffer ***/
+   glx.CreateGLXPbufferSGIX = _real_glXCreateGLXPbufferSGIX;
+   glx.DestroyGLXPbufferSGIX = _real_glXDestroyGLXPbufferSGIX;
+   glx.QueryGLXPbufferSGIX = _real_glXQueryGLXPbufferSGIX;
+   glx.SelectEventSGIX = _real_glXSelectEventSGIX;
+   glx.GetSelectedEventSGIX = _real_glXGetSelectedEventSGIX;
+
+   /*** GLX_SGI_cushion ***/
+   glx.CushionSGI = _real_glXCushionSGI;
+
+   /*** GLX_SGIX_video_resize ***/
+   glx.BindChannelToWindowSGIX = _real_glXBindChannelToWindowSGIX;
+   glx.ChannelRectSGIX = _real_glXChannelRectSGIX;
+   glx.QueryChannelRectSGIX = _real_glXQueryChannelRectSGIX;
+   glx.QueryChannelDeltasSGIX = _real_glXQueryChannelDeltasSGIX;
+   glx.ChannelRectSyncSGIX = _real_glXChannelRectSyncSGIX;
+
+#if defined(_DM_BUFFER_H_)
+   /*** (GLX_SGIX_dmbuffer ***/
+   glx.AssociateDMPbufferSGIX = NULL;
+#endif
+
+   /*** GLX_SGIX_swap_group ***/
+   glx.JoinSwapGroupSGIX = _real_glXJoinSwapGroupSGIX;
+
+   /*** GLX_SGIX_swap_barrier ***/
+   glx.BindSwapBarrierSGIX = _real_glXBindSwapBarrierSGIX;
+   glx.QueryMaxSwapBarriersSGIX = _real_glXQueryMaxSwapBarriersSGIX;
+
+   /*** GLX_SUN_get_transparent_index ***/
+   glx.GetTransparentIndexSUN = _real_glXGetTransparentIndexSUN;
+
+   /*** GLX_MESA_copy_sub_buffer ***/
+   glx.CopySubBufferMESA = _real_glXCopySubBufferMESA;
+
+   /*** GLX_MESA_release_buffers ***/
+   glx.ReleaseBuffersMESA = _real_glXReleaseBuffersMESA;
+
+   /*** GLX_MESA_pixmap_colormap ***/
+   glx.CreateGLXPixmapMESA = _real_glXCreateGLXPixmapMESA;
+
+   /*** GLX_MESA_set_3dfx_mode ***/
+   glx.Set3DfxModeMESA = _real_glXSet3DfxModeMESA;
+
+   /*** GLX_NV_vertex_array_range ***/
+   glx.AllocateMemoryNV = _real_glXAllocateMemoryNV;
+   glx.FreeMemoryNV = _real_glXFreeMemoryNV;
+
+   /*** GLX_MESA_agp_offset ***/
+   glx.GetAGPOffsetMESA = _real_glXGetAGPOffsetMESA;
+
+   return &glx;
+}
diff --git a/src/mesa/drivers/x11/realglx.h b/src/mesa/drivers/x11/realglx.h
new file mode 100644
index 0000000000..150129db68
--- /dev/null
+++ b/src/mesa/drivers/x11/realglx.h
@@ -0,0 +1,326 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ * 
+ * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef REALGLX_H
+#define REALGLX_H
+
+
+extern struct _glxapi_table *
+_real_GetGLXDispatchTable(void);
+
+
+/*
+ * Basically just need these to prevent compiler warnings.
+ */
+
+
+extern XVisualInfo *
+_real_glXChooseVisual( Display *dpy, int screen, int *list );
+
+extern GLXContext
+_real_glXCreateContext( Display *dpy, XVisualInfo *visinfo,
+                        GLXContext share_list, Bool direct );
+
+extern GLXPixmap
+_real_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap );
+
+extern GLXPixmap
+_real_glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo,
+                              Pixmap pixmap, Colormap cmap );
+
+extern void
+_real_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap );
+
+extern void
+_real_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst,
+                      unsigned long mask );
+
+extern Bool
+_real_glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx );
+
+extern Bool
+_real_glXQueryExtension( Display *dpy, int *errorb, int *event );
+
+extern void
+_real_glXDestroyContext( Display *dpy, GLXContext ctx );
+
+extern Bool
+_real_glXIsDirect( Display *dpy, GLXContext ctx );
+
+extern void
+_real_glXSwapBuffers( Display *dpy, GLXDrawable drawable );
+
+extern void
+_real_glXUseXFont( Font font, int first, int count, int listbase );
+
+extern Bool
+_real_glXQueryVersion( Display *dpy, int *maj, int *min );
+
+extern int
+_real_glXGetConfig( Display *dpy, XVisualInfo *visinfo,
+                    int attrib, int *value );
+
+extern void
+_real_glXWaitGL( void );
+
+
+extern void
+_real_glXWaitX( void );
+
+/* GLX 1.1 and later */
+extern const char *
+_real_glXQueryExtensionsString( Display *dpy, int screen );
+
+/* GLX 1.1 and later */
+extern const char *
+_real_glXQueryServerString( Display *dpy, int screen, int name );
+
+/* GLX 1.1 and later */
+extern const char *
+_real_glXGetClientString( Display *dpy, int name );
+
+
+/*
+ * GLX 1.3 and later
+ */
+
+extern GLXFBConfig *
+_real_glXChooseFBConfig( Display *dpy, int screen,
+                         const int *attribList, int *nitems );
+
+extern int
+_real_glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config,
+                            int attribute, int *value );
+
+extern GLXFBConfig *
+_real_glXGetFBConfigs( Display *dpy, int screen, int *nelements );
+
+extern XVisualInfo *
+_real_glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config );
+
+extern GLXWindow
+_real_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win,
+                       const int *attribList );
+
+extern void
+_real_glXDestroyWindow( Display *dpy, GLXWindow window );
+
+extern GLXPixmap
+_real_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap,
+                       const int *attribList );
+
+extern void
+_real_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap );
+
+extern GLXPbuffer
+_real_glXCreatePbuffer( Display *dpy, GLXFBConfig config,
+                        const int *attribList );
+
+extern void
+_real_glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf );
+
+extern void
+_real_glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute,
+                        unsigned int *value );
+
+extern GLXContext
+_real_glXCreateNewContext( Display *dpy, GLXFBConfig config,
+                           int renderType, GLXContext shareList, Bool direct );
+
+
+extern Bool
+_real_glXMakeContextCurrent( Display *dpy, GLXDrawable draw,
+                             GLXDrawable read, GLXContext ctx );
+
+extern int
+_real_glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value );
+
+extern void
+_real_glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask );
+
+extern void
+_real_glXGetSelectedEvent( Display *dpy, GLXDrawable drawable,
+                           unsigned long *mask );
+
+#ifdef GLX_SGI_swap_control
+extern int
+_real_glXSwapIntervalSGI(int interval);
+#endif
+
+
+#ifdef GLX_SGI_video_sync
+extern int
+_real_glXGetVideoSyncSGI(unsigned int *count);
+
+extern int
+_real_glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count);
+#endif
+
+
+#ifdef GLX_SGI_make_current_read
+extern Bool
+_real_glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx);
+
+extern GLXDrawable
+_real_glXGetCurrentReadDrawableSGI(void);
+#endif
+
+#if defined(_VL_H) && defined(GLX_SGIX_video_source)
+extern GLXVideoSourceSGIX
+_real_glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode);
+
+extern void
+_real_glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src);
+#endif
+
+#ifdef GLX_EXT_import_context
+extern void
+_real_glXFreeContextEXT(Display *dpy, GLXContext context);
+
+extern GLXContextID
+_real_glXGetContextIDEXT(const GLXContext context);
+
+extern Display *
+_real_glXGetCurrentDisplayEXT(void);
+
+extern GLXContext
+_real_glXImportContextEXT(Display *dpy, GLXContextID contextID);
+
+extern int
+_real_glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value);
+#endif
+
+#ifdef GLX_SGIX_fbconfig
+extern int
+_real_glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value);
+
+extern GLXFBConfigSGIX *
+_real_glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements);
+
+extern GLXPixmap
+_real_glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap);
+
+extern GLXContext
+_real_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct);
+
+extern XVisualInfo *
+_real_glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config);
+
+extern GLXFBConfigSGIX
+_real_glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis);
+#endif
+
+#ifdef GLX_SGIX_pbuffer
+extern GLXPbufferSGIX
+_real_glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attrib_list);
+
+extern void
+_real_glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf);
+
+extern int
+_real_glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value);
+
+extern void
+_real_glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask);
+
+extern void
+_real_glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask);
+#endif
+
+#ifdef GLX_SGI_cushion
+extern void
+_real_glXCushionSGI(Display *dpy, Window win, float cushion);
+#endif
+
+#ifdef GLX_SGIX_video_resize
+extern int
+_real_glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window);
+
+extern int
+_real_glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h);
+
+extern int
+_real_glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h);
+
+extern int
+_real_glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh);
+
+extern int
+_real_glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype);
+#endif
+
+#if defined(_DM_BUFFER_H_) && defined(GLX_SGIX_dmbuffer)
+extern Bool
+_real_glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer);
+#endif
+
+#ifdef GLX_SGIX_swap_group
+extern void
+_real_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member);
+#endif
+
+#ifdef GLX_SGIX_swap_barrier
+extern void
+_real_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier);
+
+extern Bool
+_real_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max);
+#endif
+
+#ifdef GLX_SUN_get_transparent_index
+extern Status
+_real_glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent);
+#endif
+
+#ifdef GLX_MESA_release_buffers
+extern Bool
+_real_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d );
+#endif
+
+#ifdef GLX_MESA_set_3dfx_mode
+extern Bool
+_real_glXSet3DfxModeMESA( int mode );
+#endif
+
+#ifdef GLX_NV_vertex_array_range
+extern void *
+_real_glXAllocateMemoryNV(GLsizei size, GLfloat readfreq, GLfloat writefreq, GLfloat priority);
+extern void
+_real_glXFreeMemoryNV(GLvoid *pointer);
+#endif
+
+#ifdef GLX_MESA_agp_offset
+extern GLuint
+_real_glXGetAGPOffsetMESA(const GLvoid *pointer);
+#endif
+
+#ifdef GLX_MESA_copy_sub_buffer
+extern void
+_real_glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable,
+                            int x, int y, int width, int height );
+#endif
+
+#endif /* REALGLX_H */
diff --git a/src/mesa/drivers/x11/xfonts.c b/src/mesa/drivers/x11/xfonts.c
new file mode 100644
index 0000000000..91f819b8df
--- /dev/null
+++ b/src/mesa/drivers/x11/xfonts.c
@@ -0,0 +1,377 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2000  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/* xfonts.c -- glXUseXFont() for Mesa written by
+ * Copyright (C) 1995 Thorsten.Ohl @ Physik.TH-Darmstadt.de
+ */
+
+#ifdef __VMS
+#include <GL/vms_x_fix.h>
+#endif
+
+#include "glxheader.h"
+#include "main/context.h"
+#include "main/imports.h"
+#include "xfonts.h"
+
+
+/* Some debugging info.  */
+
+#ifdef DEBUG
+#undef _R
+#undef _G
+#undef _B
+#include <ctype.h>
+
+int debug_xfonts = 0;
+
+static void
+dump_char_struct(XCharStruct * ch, char *prefix)
+{
+   printf("%slbearing = %d, rbearing = %d, width = %d\n",
+	  prefix, ch->lbearing, ch->rbearing, ch->width);
+   printf("%sascent = %d, descent = %d, attributes = %u\n",
+	  prefix, ch->ascent, ch->descent, (unsigned int) ch->attributes);
+}
+
+static void
+dump_font_struct(XFontStruct * font)
+{
+   printf("ascent = %d, descent = %d\n", font->ascent, font->descent);
+   printf("char_or_byte2 = (%u,%u)\n",
+	  font->min_char_or_byte2, font->max_char_or_byte2);
+   printf("byte1 = (%u,%u)\n", font->min_byte1, font->max_byte1);
+   printf("all_chars_exist = %s\n", font->all_chars_exist ? "True" : "False");
+   printf("default_char = %c (\\%03o)\n",
+	  (char) (isprint(font->default_char) ? font->default_char : ' '),
+	  font->default_char);
+   dump_char_struct(&font->min_bounds, "min> ");
+   dump_char_struct(&font->max_bounds, "max> ");
+#if 0
+   for (c = font->min_char_or_byte2; c <= font->max_char_or_byte2; c++) {
+      char prefix[8];
+      sprintf(prefix, "%d> ", c);
+      dump_char_struct(&font->per_char[c], prefix);
+   }
+#endif
+}
+
+static void
+dump_bitmap(unsigned int width, unsigned int height, GLubyte * bitmap)
+{
+   unsigned int x, y;
+
+   printf("    ");
+   for (x = 0; x < 8 * width; x++)
+      printf("%o", 7 - (x % 8));
+   putchar('\n');
+   for (y = 0; y < height; y++) {
+      printf("%3o:", y);
+      for (x = 0; x < 8 * width; x++)
+	 putchar((bitmap[width * (height - y - 1) + x / 8] & (1 << (7 - (x %
+									 8))))
+		 ? '*' : '.');
+      printf("   ");
+      for (x = 0; x < width; x++)
+	 printf("0x%02x, ", bitmap[width * (height - y - 1) + x]);
+      putchar('\n');
+   }
+}
+#endif /* DEBUG */
+
+
+/* Implementation.  */
+
+/* Fill a BITMAP with a character C from thew current font
+   in the graphics context GC.  WIDTH is the width in bytes
+   and HEIGHT is the height in bits.
+
+   Note that the generated bitmaps must be used with
+
+        glPixelStorei (GL_UNPACK_SWAP_BYTES, GL_FALSE);
+        glPixelStorei (GL_UNPACK_LSB_FIRST, GL_FALSE);
+        glPixelStorei (GL_UNPACK_ROW_LENGTH, 0);
+        glPixelStorei (GL_UNPACK_SKIP_ROWS, 0);
+        glPixelStorei (GL_UNPACK_SKIP_PIXELS, 0);
+        glPixelStorei (GL_UNPACK_ALIGNMENT, 1);
+
+   Possible optimizations:
+
+     * use only one reusable pixmap with the maximum dimensions.
+     * draw the entire font into a single pixmap (careful with
+       proportional fonts!).
+*/
+
+
+/*
+ * Generate OpenGL-compatible bitmap.
+ */
+static void
+fill_bitmap(Display * dpy, Window win, GC gc,
+	    unsigned int width, unsigned int height,
+	    int x0, int y0, unsigned int c, GLubyte * bitmap)
+{
+   XImage *image;
+   unsigned int x, y;
+   Pixmap pixmap;
+   XChar2b char2b;
+
+   pixmap = XCreatePixmap(dpy, win, 8 * width, height, 1);
+   XSetForeground(dpy, gc, 0);
+   XFillRectangle(dpy, pixmap, gc, 0, 0, 8 * width, height);
+   XSetForeground(dpy, gc, 1);
+
+   char2b.byte1 = (c >> 8) & 0xff;
+   char2b.byte2 = (c & 0xff);
+
+   XDrawString16(dpy, pixmap, gc, x0, y0, &char2b, 1);
+
+   image = XGetImage(dpy, pixmap, 0, 0, 8 * width, height, 1, XYPixmap);
+   if (image) {
+      /* Fill the bitmap (X11 and OpenGL are upside down wrt each other).  */
+      for (y = 0; y < height; y++)
+	 for (x = 0; x < 8 * width; x++)
+	    if (XGetPixel(image, x, y))
+	       bitmap[width * (height - y - 1) + x / 8] |=
+		  (1 << (7 - (x % 8)));
+      XDestroyImage(image);
+   }
+
+   XFreePixmap(dpy, pixmap);
+}
+
+/*
+ * determine if a given glyph is valid and return the
+ * corresponding XCharStruct.
+ */
+static XCharStruct *
+isvalid(XFontStruct * fs, unsigned int which)
+{
+   unsigned int rows, pages;
+   unsigned int byte1 = 0, byte2 = 0;
+   int i, valid = 1;
+
+   rows = fs->max_byte1 - fs->min_byte1 + 1;
+   pages = fs->max_char_or_byte2 - fs->min_char_or_byte2 + 1;
+
+   if (rows == 1) {
+      /* "linear" fonts */
+      if ((fs->min_char_or_byte2 > which) || (fs->max_char_or_byte2 < which))
+	 valid = 0;
+   }
+   else {
+      /* "matrix" fonts */
+      byte2 = which & 0xff;
+      byte1 = which >> 8;
+      if ((fs->min_char_or_byte2 > byte2) ||
+	  (fs->max_char_or_byte2 < byte2) ||
+	  (fs->min_byte1 > byte1) || (fs->max_byte1 < byte1))
+	 valid = 0;
+   }
+
+   if (valid) {
+      if (fs->per_char) {
+	 if (rows == 1) {
+	    /* "linear" fonts */
+	    return (fs->per_char + (which - fs->min_char_or_byte2));
+	 }
+	 else {
+	    /* "matrix" fonts */
+	    i = ((byte1 - fs->min_byte1) * pages) +
+	       (byte2 - fs->min_char_or_byte2);
+	    return (fs->per_char + i);
+	 }
+      }
+      else {
+	 return (&fs->min_bounds);
+      }
+   }
+   return (NULL);
+}
+
+
+void
+Fake_glXUseXFont(Font font, int first, int count, int listbase)
+{
+   Display *dpy;
+   Window win;
+   Pixmap pixmap;
+   GC gc;
+   XGCValues values;
+   unsigned long valuemask;
+   XFontStruct *fs;
+   GLint swapbytes, lsbfirst, rowlength;
+   GLint skiprows, skippixels, alignment;
+   unsigned int max_width, max_height, max_bm_width, max_bm_height;
+   GLubyte *bm;
+   int i;
+
+   dpy = glXGetCurrentDisplay();
+   if (!dpy)
+      return;			/* I guess glXMakeCurrent wasn't called */
+   win = RootWindow(dpy, DefaultScreen(dpy));
+
+   fs = XQueryFont(dpy, font);
+   if (!fs) {
+      _mesa_error(NULL, GL_INVALID_VALUE,
+		  "Couldn't get font structure information");
+      return;
+   }
+
+   /* Allocate a bitmap that can fit all characters.  */
+   max_width = fs->max_bounds.rbearing - fs->min_bounds.lbearing;
+   max_height = fs->max_bounds.ascent + fs->max_bounds.descent;
+   max_bm_width = (max_width + 7) / 8;
+   max_bm_height = max_height;
+
+   bm = (GLubyte *) MALLOC((max_bm_width * max_bm_height) * sizeof(GLubyte));
+   if (!bm) {
+      XFreeFontInfo(NULL, fs, 1);
+      _mesa_error(NULL, GL_OUT_OF_MEMORY,
+		  "Couldn't allocate bitmap in glXUseXFont()");
+      return;
+   }
+
+#if 0
+   /* get the page info */
+   pages = fs->max_char_or_byte2 - fs->min_char_or_byte2 + 1;
+   firstchar = (fs->min_byte1 << 8) + fs->min_char_or_byte2;
+   lastchar = (fs->max_byte1 << 8) + fs->max_char_or_byte2;
+   rows = fs->max_byte1 - fs->min_byte1 + 1;
+   unsigned int first_char, last_char, pages, rows;
+#endif
+
+   /* Save the current packing mode for bitmaps.  */
+   glGetIntegerv(GL_UNPACK_SWAP_BYTES, &swapbytes);
+   glGetIntegerv(GL_UNPACK_LSB_FIRST, &lsbfirst);
+   glGetIntegerv(GL_UNPACK_ROW_LENGTH, &rowlength);
+   glGetIntegerv(GL_UNPACK_SKIP_ROWS, &skiprows);
+   glGetIntegerv(GL_UNPACK_SKIP_PIXELS, &skippixels);
+   glGetIntegerv(GL_UNPACK_ALIGNMENT, &alignment);
+
+   /* Enforce a standard packing mode which is compatible with
+      fill_bitmap() from above.  This is actually the default mode,
+      except for the (non)alignment.  */
+   glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE);
+   glPixelStorei(GL_UNPACK_LSB_FIRST, GL_FALSE);
+   glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+   glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
+   glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
+   glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+
+   pixmap = XCreatePixmap(dpy, win, 10, 10, 1);
+   values.foreground = BlackPixel(dpy, DefaultScreen(dpy));
+   values.background = WhitePixel(dpy, DefaultScreen(dpy));
+   values.font = fs->fid;
+   valuemask = GCForeground | GCBackground | GCFont;
+   gc = XCreateGC(dpy, pixmap, valuemask, &values);
+   XFreePixmap(dpy, pixmap);
+
+#ifdef DEBUG
+   if (debug_xfonts)
+      dump_font_struct(fs);
+#endif
+
+   for (i = 0; i < count; i++) {
+      unsigned int width, height, bm_width, bm_height;
+      GLfloat x0, y0, dx, dy;
+      XCharStruct *ch;
+      int x, y;
+      unsigned int c = first + i;
+      int list = listbase + i;
+      int valid;
+
+      /* check on index validity and get the bounds */
+      ch = isvalid(fs, c);
+      if (!ch) {
+	 ch = &fs->max_bounds;
+	 valid = 0;
+      }
+      else {
+	 valid = 1;
+      }
+
+#ifdef DEBUG
+      if (debug_xfonts) {
+	 char s[7];
+	 sprintf(s, isprint(c) ? "%c> " : "\\%03o> ", c);
+	 dump_char_struct(ch, s);
+      }
+#endif
+
+      /* glBitmap()' parameters:
+         straight from the glXUseXFont(3) manpage.  */
+      width = ch->rbearing - ch->lbearing;
+      height = ch->ascent + ch->descent;
+      x0 = -ch->lbearing;
+      y0 = ch->descent - 0;	/* XXX used to subtract 1 here */
+      /* but that caused a conformace failure */
+      dx = ch->width;
+      dy = 0;
+
+      /* X11's starting point.  */
+      x = -ch->lbearing;
+      y = ch->ascent;
+
+      /* Round the width to a multiple of eight.  We will use this also
+         for the pixmap for capturing the X11 font.  This is slightly
+         inefficient, but it makes the OpenGL part real easy.  */
+      bm_width = (width + 7) / 8;
+      bm_height = height;
+
+      glNewList(list, GL_COMPILE);
+      if (valid && (bm_width > 0) && (bm_height > 0)) {
+
+	 memset(bm, '\0', bm_width * bm_height);
+	 fill_bitmap(dpy, win, gc, bm_width, bm_height, x, y, c, bm);
+
+	 glBitmap(width, height, x0, y0, dx, dy, bm);
+#ifdef DEBUG
+	 if (debug_xfonts) {
+	    printf("width/height = %u/%u\n", width, height);
+	    printf("bm_width/bm_height = %u/%u\n", bm_width, bm_height);
+	    dump_bitmap(bm_width, bm_height, bm);
+	 }
+#endif
+      }
+      else {
+	 glBitmap(0, 0, 0.0, 0.0, dx, dy, NULL);
+      }
+      glEndList();
+   }
+
+   FREE(bm);
+   XFreeFontInfo(NULL, fs, 1);
+   XFreeGC(dpy, gc);
+
+   /* Restore saved packing modes.  */
+   glPixelStorei(GL_UNPACK_SWAP_BYTES, swapbytes);
+   glPixelStorei(GL_UNPACK_LSB_FIRST, lsbfirst);
+   glPixelStorei(GL_UNPACK_ROW_LENGTH, rowlength);
+   glPixelStorei(GL_UNPACK_SKIP_ROWS, skiprows);
+   glPixelStorei(GL_UNPACK_SKIP_PIXELS, skippixels);
+   glPixelStorei(GL_UNPACK_ALIGNMENT, alignment);
+}
diff --git a/src/mesa/drivers/x11/xfonts.h b/src/mesa/drivers/x11/xfonts.h
new file mode 100644
index 0000000000..e36f42f817
--- /dev/null
+++ b/src/mesa/drivers/x11/xfonts.h
@@ -0,0 +1,41 @@
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.5
+ *
+ * Copyright (C) 1999-2000  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef XFONTS_H
+#define XFONTS_H
+
+#ifdef __VMS
+#include <GL/vms_x_fix.h>
+#endif
+
+#include <X11/Xlib.h>
+
+
+extern void Fake_glXUseXFont( Font font, int first, int count, int listbase );
+
+
+#endif
+
diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c
new file mode 100644
index 0000000000..dac1668cfe
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_api.c
@@ -0,0 +1,2471 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file xm_api.c
+ *
+ * All the XMesa* API functions.
+ *
+ *
+ * NOTES:
+ *
+ * The window coordinate system origin (0,0) is in the lower-left corner
+ * of the window.  X11's window coordinate origin is in the upper-left
+ * corner of the window.  Therefore, most drawing functions in this
+ * file have to flip Y coordinates.
+ *
+ * Define USE_XSHM in the Makefile with -DUSE_XSHM if you want to compile
+ * in support for the MIT Shared Memory extension.  If enabled, when you
+ * use an Ximage for the back buffer in double buffered mode, the "swap"
+ * operation will be faster.  You must also link with -lXext.
+ *
+ * Byte swapping:  If the Mesa host and the X display use a different
+ * byte order then there's some trickiness to be aware of when using
+ * XImages.  The byte ordering used for the XImage is that of the X
+ * display, not the Mesa host.
+ * The color-to-pixel encoding for True/DirectColor must be done
+ * according to the display's visual red_mask, green_mask, and blue_mask.
+ * If XPutPixel is used to put a pixel into an XImage then XPutPixel will
+ * do byte swapping if needed.  If one wants to directly "poke" the pixel
+ * into the XImage's buffer then the pixel must be byte swapped first.  In
+ * Mesa, when byte swapping is needed we use the PF_TRUECOLOR pixel format
+ * and use XPutPixel everywhere except in the implementation of
+ * glClear(GL_COLOR_BUFFER_BIT).  We want this function to be fast so
+ * instead of using XPutPixel we "poke" our values after byte-swapping
+ * the clear pixel value if needed.
+ *
+ */
+
+#ifdef __CYGWIN__
+#undef WIN32
+#undef __WIN32__
+#endif
+
+#include "glxheader.h"
+#include "xmesaP.h"
+#include "main/context.h"
+#include "main/extensions.h"
+#include "main/framebuffer.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/renderbuffer.h"
+#include "main/teximage.h"
+#include "glapi/glthread.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "tnl/t_pipeline.h"
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+
+/**
+ * Global X driver lock
+ */
+_glthread_Mutex _xmesa_lock;
+
+
+
+/**
+ * Lookup tables for HPCR pixel format:
+ */
+static short hpcr_rgbTbl[3][256] = {
+{
+ 16,  16,  17,  17,  18,  18,  19,  19,  20,  20,  21,  21,  22,  22,  23,  23,
+ 24,  24,  25,  25,  26,  26,  27,  27,  28,  28,  29,  29,  30,  30,  31,  31,
+ 32,  32,  33,  33,  34,  34,  35,  35,  36,  36,  37,  37,  38,  38,  39,  39,
+ 32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+ 48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
+ 64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
+ 80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+ 96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239
+},
+{
+ 16,  16,  17,  17,  18,  18,  19,  19,  20,  20,  21,  21,  22,  22,  23,  23,
+ 24,  24,  25,  25,  26,  26,  27,  27,  28,  28,  29,  29,  30,  30,  31,  31,
+ 32,  32,  33,  33,  34,  34,  35,  35,  36,  36,  37,  37,  38,  38,  39,  39,
+ 32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+ 48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
+ 64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
+ 80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+ 96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239
+},
+{
+ 32,  32,  33,  33,  34,  34,  35,  35,  36,  36,  37,  37,  38,  38,  39,  39,
+ 40,  40,  41,  41,  42,  42,  43,  43,  44,  44,  45,  45,  46,  46,  47,  47,
+ 48,  48,  49,  49,  50,  50,  51,  51,  52,  52,  53,  53,  54,  54,  55,  55,
+ 56,  56,  57,  57,  58,  58,  59,  59,  60,  60,  61,  61,  62,  62,  63,  63,
+ 64,  64,  65,  65,  66,  66,  67,  67,  68,  68,  69,  69,  70,  70,  71,  71,
+ 72,  72,  73,  73,  74,  74,  75,  75,  76,  76,  77,  77,  78,  78,  79,  79,
+ 80,  80,  81,  81,  82,  82,  83,  83,  84,  84,  85,  85,  86,  86,  87,  87,
+ 80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+ 96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223
+}
+};
+
+
+
+/**********************************************************************/
+/*****                     X Utility Functions                    *****/
+/**********************************************************************/
+
+
+/**
+ * Return the host's byte order as LSBFirst or MSBFirst ala X.
+ */
+#ifndef XFree86Server
+static int host_byte_order( void )
+{
+   int i = 1;
+   char *cptr = (char *) &i;
+   return (*cptr==1) ? LSBFirst : MSBFirst;
+}
+#endif
+
+
+/**
+ * Check if the X Shared Memory extension is available.
+ * Return:  0 = not available
+ *          1 = shared XImage support available
+ *          2 = shared Pixmap support available also
+ */
+static int check_for_xshm( XMesaDisplay *display )
+{
+#if defined(USE_XSHM) && !defined(XFree86Server)
+   int major, minor, ignore;
+   Bool pixmaps;
+
+   if (XQueryExtension( display, "MIT-SHM", &ignore, &ignore, &ignore )) {
+      if (XShmQueryVersion( display, &major, &minor, &pixmaps )==True) {
+	 return (pixmaps==True) ? 2 : 1;
+      }
+      else {
+	 return 0;
+      }
+   }
+   else {
+      return 0;
+   }
+#else
+   /* No  XSHM support */
+   return 0;
+#endif
+}
+
+
+/**
+ * Apply gamma correction to an intensity value in [0..max].  Return the
+ * new intensity value.
+ */
+static GLint
+gamma_adjust( GLfloat gamma, GLint value, GLint max )
+{
+   if (gamma == 1.0) {
+      return value;
+   }
+   else {
+      double x = (double) value / (double) max;
+      return IROUND_POS((GLfloat) max * pow(x, 1.0F/gamma));
+   }
+}
+
+
+
+/**
+ * Return the true number of bits per pixel for XImages.
+ * For example, if we request a 24-bit deep visual we may actually need/get
+ * 32bpp XImages.  This function returns the appropriate bpp.
+ * Input:  dpy - the X display
+ *         visinfo - desribes the visual to be used for XImages
+ * Return:  true number of bits per pixel for XImages
+ */
+static int
+bits_per_pixel( XMesaVisual xmv )
+{
+#ifdef XFree86Server
+   const int depth = xmv->nplanes;
+   int i;
+   assert(depth > 0);
+   for (i = 0; i < screenInfo.numPixmapFormats; i++) {
+      if (screenInfo.formats[i].depth == depth)
+         return screenInfo.formats[i].bitsPerPixel;
+   }
+   return depth;  /* should never get here, but this should be safe */
+#else
+   XMesaDisplay *dpy = xmv->display;
+   XMesaVisualInfo visinfo = xmv->visinfo;
+   XMesaImage *img;
+   int bitsPerPixel;
+   /* Create a temporary XImage */
+   img = XCreateImage( dpy, visinfo->visual, visinfo->depth,
+		       ZPixmap, 0,           /*format, offset*/
+		       (char*) MALLOC(8),    /*data*/
+		       1, 1,                 /*width, height*/
+		       32,                   /*bitmap_pad*/
+		       0                     /*bytes_per_line*/
+                     );
+   assert(img);
+   /* grab the bits/pixel value */
+   bitsPerPixel = img->bits_per_pixel;
+   /* free the XImage */
+   free( img->data );
+   img->data = NULL;
+   XMesaDestroyImage( img );
+   return bitsPerPixel;
+#endif
+}
+
+
+
+/*
+ * Determine if a given X window ID is valid (window exists).
+ * Do this by calling XGetWindowAttributes() for the window and
+ * checking if we catch an X error.
+ * Input:  dpy - the display
+ *         win - the window to check for existance
+ * Return:  GL_TRUE - window exists
+ *          GL_FALSE - window doesn't exist
+ */
+#ifndef XFree86Server
+static GLboolean WindowExistsFlag;
+
+static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr )
+{
+   (void) dpy;
+   if (xerr->error_code == BadWindow) {
+      WindowExistsFlag = GL_FALSE;
+   }
+   return 0;
+}
+
+static GLboolean window_exists( XMesaDisplay *dpy, Window win )
+{
+   XWindowAttributes wa;
+   int (*old_handler)( XMesaDisplay*, XErrorEvent* );
+   WindowExistsFlag = GL_TRUE;
+   old_handler = XSetErrorHandler(window_exists_err_handler);
+   XGetWindowAttributes( dpy, win, &wa ); /* dummy request */
+   XSetErrorHandler(old_handler);
+   return WindowExistsFlag;
+}
+
+static Status
+get_drawable_size( XMesaDisplay *dpy, Drawable d, GLuint *width, GLuint *height )
+{
+   Window root;
+   Status stat;
+   int xpos, ypos;
+   unsigned int w, h, bw, depth;
+   stat = XGetGeometry(dpy, d, &root, &xpos, &ypos, &w, &h, &bw, &depth);
+   *width = w;
+   *height = h;
+   return stat;
+}
+#endif
+
+
+/**
+ * Return the size of the window (or pixmap) that corresponds to the
+ * given XMesaBuffer.
+ * \param width  returns width in pixels
+ * \param height  returns height in pixels
+ */
+void
+xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b,
+                      GLuint *width, GLuint *height)
+{
+#ifdef XFree86Server
+   *width = MIN2(b->frontxrb->drawable->width, MAX_WIDTH);
+   *height = MIN2(b->frontxrb->drawable->height, MAX_HEIGHT);
+#else
+   Status stat;
+
+   _glthread_LOCK_MUTEX(_xmesa_lock);
+   XSync(b->xm_visual->display, 0); /* added for Chromium */
+   stat = get_drawable_size(dpy, b->frontxrb->pixmap, width, height);
+   _glthread_UNLOCK_MUTEX(_xmesa_lock);
+
+   if (!stat) {
+      /* probably querying a window that's recently been destroyed */
+      _mesa_warning(NULL, "XGetGeometry failed!\n");
+      *width = *height = 1;
+   }
+#endif
+}
+
+
+
+/**********************************************************************/
+/*****                Linked list of XMesaBuffers                 *****/
+/**********************************************************************/
+
+XMesaBuffer XMesaBufferList = NULL;
+
+
+/**
+ * Allocate a new XMesaBuffer object which corresponds to the given drawable.
+ * Note that XMesaBuffer is derived from GLframebuffer.
+ * The new XMesaBuffer will not have any size (Width=Height=0).
+ *
+ * \param d  the corresponding X drawable (window or pixmap)
+ * \param type  either WINDOW, PIXMAP or PBUFFER, describing d
+ * \param vis  the buffer's visual
+ * \param cmap  the window's colormap, if known.
+ * \return new XMesaBuffer or NULL if any problem
+ */
+static XMesaBuffer
+create_xmesa_buffer(XMesaDrawable d, BufferType type,
+                    XMesaVisual vis, XMesaColormap cmap)
+{
+   XMesaBuffer b;
+
+   ASSERT(type == WINDOW || type == PIXMAP || type == PBUFFER);
+
+   b = (XMesaBuffer) CALLOC_STRUCT(xmesa_buffer);
+   if (!b)
+      return NULL;
+
+   b->display = vis->display;
+   b->xm_visual = vis;
+   b->type = type;
+   b->cmap = cmap;
+
+   _mesa_initialize_window_framebuffer(&b->mesa_buffer, &vis->mesa_visual);
+   b->mesa_buffer.Delete = xmesa_delete_framebuffer;
+
+   /*
+    * Front renderbuffer
+    */
+   b->frontxrb = xmesa_new_renderbuffer(NULL, 0, &vis->mesa_visual, GL_FALSE);
+   if (!b->frontxrb) {
+      free(b);
+      return NULL;
+   }
+   b->frontxrb->Parent = b;
+   b->frontxrb->drawable = d;
+   b->frontxrb->pixmap = (XMesaPixmap) d;
+   _mesa_add_renderbuffer(&b->mesa_buffer, BUFFER_FRONT_LEFT,
+                          &b->frontxrb->Base);
+
+   /*
+    * Back renderbuffer
+    */
+   if (vis->mesa_visual.doubleBufferMode) {
+      b->backxrb = xmesa_new_renderbuffer(NULL, 0, &vis->mesa_visual, GL_TRUE);
+      if (!b->backxrb) {
+         /* XXX free front xrb too */
+         free(b);
+         return NULL;
+      }
+      b->backxrb->Parent = b;
+      /* determine back buffer implementation */
+      b->db_mode = vis->ximage_flag ? BACK_XIMAGE : BACK_PIXMAP;
+      
+      _mesa_add_renderbuffer(&b->mesa_buffer, BUFFER_BACK_LEFT,
+                             &b->backxrb->Base);
+   }
+
+   /*
+    * Software alpha planes
+    */
+   if (vis->mesa_visual.alphaBits > 0
+       && vis->undithered_pf != PF_8A8B8G8R
+       && vis->undithered_pf != PF_8A8R8G8B) {
+      /* Visual has alpha, but pixel format doesn't support it.
+       * We'll use an alpha renderbuffer wrapper.
+       */
+      b->swAlpha = GL_TRUE;
+   }
+   else {
+      b->swAlpha = GL_FALSE;
+   }
+
+   /*
+    * Other renderbuffer (depth, stencil, etc)
+    */
+   _mesa_add_soft_renderbuffers(&b->mesa_buffer,
+                                GL_FALSE,  /* color */
+                                vis->mesa_visual.haveDepthBuffer,
+                                vis->mesa_visual.haveStencilBuffer,
+                                vis->mesa_visual.haveAccumBuffer,
+                                b->swAlpha,
+                                vis->mesa_visual.numAuxBuffers > 0 );
+
+   /* GLX_EXT_texture_from_pixmap */
+   b->TextureTarget = 0;
+   b->TextureFormat = GLX_TEXTURE_FORMAT_NONE_EXT;
+   b->TextureMipmap = 0;
+
+   /* insert buffer into linked list */
+   b->Next = XMesaBufferList;
+   XMesaBufferList = b;
+
+   return b;
+}
+
+
+/**
+ * Find an XMesaBuffer by matching X display and colormap but NOT matching
+ * the notThis buffer.
+ */
+XMesaBuffer
+xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis)
+{
+   XMesaBuffer b;
+   for (b=XMesaBufferList; b; b=b->Next) {
+      if (b->display==dpy && b->cmap==cmap && b!=notThis) {
+         return b;
+      }
+   }
+   return NULL;
+}
+
+
+/**
+ * Remove buffer from linked list, delete if no longer referenced.
+ */
+static void
+xmesa_free_buffer(XMesaBuffer buffer)
+{
+   XMesaBuffer prev = NULL, b;
+
+   for (b = XMesaBufferList; b; b = b->Next) {
+      if (b == buffer) {
+         struct gl_framebuffer *fb = &buffer->mesa_buffer;
+
+         /* unlink buffer from list */
+         if (prev)
+            prev->Next = buffer->Next;
+         else
+            XMesaBufferList = buffer->Next;
+
+         /* mark as delete pending */
+         fb->DeletePending = GL_TRUE;
+
+         /* Since the X window for the XMesaBuffer is going away, we don't
+          * want to dereference this pointer in the future.
+          */
+         b->frontxrb->drawable = 0;
+
+         /* Unreference.  If count = zero we'll really delete the buffer */
+         _mesa_reference_framebuffer(&fb, NULL);
+
+         return;
+      }
+      /* continue search */
+      prev = b;
+   }
+   /* buffer not found in XMesaBufferList */
+   _mesa_problem(NULL,"xmesa_free_buffer() - buffer not found\n");
+}
+
+
+/**
+ * Copy X color table stuff from one XMesaBuffer to another.
+ */
+static void
+copy_colortable_info(XMesaBuffer dst, const XMesaBuffer src)
+{
+   memcpy(dst->color_table, src->color_table, sizeof(src->color_table));
+   memcpy(dst->pixel_to_r, src->pixel_to_r, sizeof(src->pixel_to_r));
+   memcpy(dst->pixel_to_g, src->pixel_to_g, sizeof(src->pixel_to_g));
+   memcpy(dst->pixel_to_b, src->pixel_to_b, sizeof(src->pixel_to_b));
+   dst->num_alloced = src->num_alloced;
+   memcpy(dst->alloced_colors, src->alloced_colors,
+          sizeof(src->alloced_colors));
+}
+
+
+
+/**********************************************************************/
+/*****                   Misc Private Functions                   *****/
+/**********************************************************************/
+
+
+/**
+ * A replacement for XAllocColor.  This function should never
+ * fail to allocate a color.  When XAllocColor fails, we return
+ * the nearest matching color.  If we have to allocate many colors
+ * this function isn't too efficient; the XQueryColors() could be
+ * done just once.
+ * Written by Michael Pichler, Brian Paul, Mark Kilgard
+ * Input:  dpy - X display
+ *         cmap - X colormap
+ *         cmapSize - size of colormap
+ * In/Out: color - the XColor struct
+ * Output:  exact - 1=exact color match, 0=closest match
+ *          alloced - 1=XAlloc worked, 0=XAlloc failed
+ */
+static void
+noFaultXAllocColor( int client,
+                    XMesaDisplay *dpy,
+                    XMesaColormap cmap,
+                    int cmapSize,
+                    XMesaColor *color,
+                    int *exact, int *alloced )
+{
+#ifdef XFree86Server
+   Pixel *ppixIn;
+   xrgb *ctable;
+#else
+   /* we'll try to cache ctable for better remote display performance */
+   static Display *prevDisplay = NULL;
+   static XMesaColormap prevCmap = 0;
+   static int prevCmapSize = 0;
+   static XMesaColor *ctable = NULL;
+#endif
+   XMesaColor subColor;
+   int i, bestmatch;
+   double mindist;       /* 3*2^16^2 exceeds long int precision. */
+
+   (void) client;
+
+   /* First try just using XAllocColor. */
+#ifdef XFree86Server
+   if (AllocColor(cmap,
+		  &color->red, &color->green, &color->blue,
+		  &color->pixel,
+		  client) == Success)
+#else
+   if (XAllocColor(dpy, cmap, color))
+#endif
+   {
+      *exact = 1;
+      *alloced = 1;
+      return;
+   }
+
+   /* Alloc failed, search for closest match */
+
+   /* Retrieve color table entries. */
+   /* XXX alloca candidate. */
+#ifdef XFree86Server
+   ppixIn = (Pixel *) MALLOC(cmapSize * sizeof(Pixel));
+   ctable = (xrgb *) MALLOC(cmapSize * sizeof(xrgb));
+   for (i = 0; i < cmapSize; i++) {
+      ppixIn[i] = i;
+   }
+   QueryColors(cmap, cmapSize, ppixIn, ctable);
+#else
+   if (prevDisplay != dpy || prevCmap != cmap
+       || prevCmapSize != cmapSize || !ctable) {
+      /* free previously cached color table */
+      if (ctable)
+         free(ctable);
+      /* Get the color table from X */
+      ctable = (XMesaColor *) MALLOC(cmapSize * sizeof(XMesaColor));
+      assert(ctable);
+      for (i = 0; i < cmapSize; i++) {
+         ctable[i].pixel = i;
+      }
+      XQueryColors(dpy, cmap, ctable, cmapSize);
+      prevDisplay = dpy;
+      prevCmap = cmap;
+      prevCmapSize = cmapSize;
+   }
+#endif
+
+   /* Find best match. */
+   bestmatch = -1;
+   mindist = 0.0;
+   for (i = 0; i < cmapSize; i++) {
+      double dr = 0.30 * ((double) color->red - (double) ctable[i].red);
+      double dg = 0.59 * ((double) color->green - (double) ctable[i].green);
+      double db = 0.11 * ((double) color->blue - (double) ctable[i].blue);
+      double dist = dr * dr + dg * dg + db * db;
+      if (bestmatch < 0 || dist < mindist) {
+         bestmatch = i;
+         mindist = dist;
+      }
+   }
+
+   /* Return result. */
+   subColor.red   = ctable[bestmatch].red;
+   subColor.green = ctable[bestmatch].green;
+   subColor.blue  = ctable[bestmatch].blue;
+   /* Try to allocate the closest match color.  This should only
+    * fail if the cell is read/write.  Otherwise, we're incrementing
+    * the cell's reference count.
+    */
+#ifdef XFree86Server
+   if (AllocColor(cmap,
+		  &subColor.red, &subColor.green, &subColor.blue,
+		  &subColor.pixel,
+		  client) == Success) {
+#else
+   if (XAllocColor(dpy, cmap, &subColor)) {
+#endif
+      *alloced = 1;
+   }
+   else {
+      /* do this to work around a problem reported by Frank Ortega */
+      subColor.pixel = (unsigned long) bestmatch;
+      subColor.red   = ctable[bestmatch].red;
+      subColor.green = ctable[bestmatch].green;
+      subColor.blue  = ctable[bestmatch].blue;
+      subColor.flags = DoRed | DoGreen | DoBlue;
+      *alloced = 0;
+   }
+#ifdef XFree86Server
+   free(ppixIn);
+   free(ctable);
+#else
+   /* don't free table, save it for next time */
+#endif
+
+   *color = subColor;
+   *exact = 0;
+}
+
+
+
+/**
+ * Do setup for PF_GRAYSCALE pixel format.
+ * Note that buffer may be NULL.
+ */
+static GLboolean
+setup_grayscale(int client, XMesaVisual v,
+                XMesaBuffer buffer, XMesaColormap cmap)
+{
+   if (GET_VISUAL_DEPTH(v)<4 || GET_VISUAL_DEPTH(v)>16) {
+      return GL_FALSE;
+   }
+
+   if (buffer) {
+      XMesaBuffer prevBuffer;
+
+      if (!cmap) {
+         return GL_FALSE;
+      }
+
+      prevBuffer = xmesa_find_buffer(v->display, cmap, buffer);
+      if (prevBuffer) {
+         /* Copy colormap stuff from previous XMesaBuffer which uses same
+          * X colormap.  Do this to avoid time spent in noFaultXAllocColor.
+          */
+         copy_colortable_info(buffer, prevBuffer);
+      }
+      else {
+         /* Allocate 256 shades of gray */
+         int gray;
+         int colorsfailed = 0;
+         for (gray=0;gray<256;gray++) {
+            GLint r = gamma_adjust( v->RedGamma,   gray, 255 );
+            GLint g = gamma_adjust( v->GreenGamma, gray, 255 );
+            GLint b = gamma_adjust( v->BlueGamma,  gray, 255 );
+            int exact, alloced;
+            XMesaColor xcol;
+            xcol.red   = (r << 8) | r;
+            xcol.green = (g << 8) | g;
+            xcol.blue  = (b << 8) | b;
+            noFaultXAllocColor( client, v->display,
+                                cmap, GET_COLORMAP_SIZE(v),
+                                &xcol, &exact, &alloced );
+            if (!exact) {
+               colorsfailed++;
+            }
+            if (alloced) {
+               assert(buffer->num_alloced<256);
+               buffer->alloced_colors[buffer->num_alloced] = xcol.pixel;
+               buffer->num_alloced++;
+            }
+
+            /*OLD
+            assert(gray < 576);
+            buffer->color_table[gray*3+0] = xcol.pixel;
+            buffer->color_table[gray*3+1] = xcol.pixel;
+            buffer->color_table[gray*3+2] = xcol.pixel;
+            assert(xcol.pixel < 65536);
+            buffer->pixel_to_r[xcol.pixel] = gray * 30 / 100;
+            buffer->pixel_to_g[xcol.pixel] = gray * 59 / 100;
+            buffer->pixel_to_b[xcol.pixel] = gray * 11 / 100;
+            */
+            buffer->color_table[gray] = xcol.pixel;
+            assert(xcol.pixel < 65536);
+            buffer->pixel_to_r[xcol.pixel] = gray;
+            buffer->pixel_to_g[xcol.pixel] = gray;
+            buffer->pixel_to_b[xcol.pixel] = gray;
+         }
+
+         if (colorsfailed && _mesa_getenv("MESA_DEBUG")) {
+            _mesa_warning(NULL,
+                  "Note: %d out of 256 needed colors do not match exactly.\n",
+                  colorsfailed );
+         }
+      }
+   }
+
+   v->dithered_pf = PF_Grayscale;
+   v->undithered_pf = PF_Grayscale;
+   return GL_TRUE;
+}
+
+
+
+/**
+ * Setup RGB rendering for a window with a PseudoColor, StaticColor,
+ * or 8-bit TrueColor visual visual.  We try to allocate a palette of 225
+ * colors (5 red, 9 green, 5 blue) and dither to approximate a 24-bit RGB
+ * color.  While this function was originally designed just for 8-bit
+ * visuals, it has also proven to work from 4-bit up to 16-bit visuals.
+ * Dithering code contributed by Bob Mercier.
+ */
+static GLboolean
+setup_dithered_color(int client, XMesaVisual v,
+                     XMesaBuffer buffer, XMesaColormap cmap)
+{
+   if (GET_VISUAL_DEPTH(v)<4 || GET_VISUAL_DEPTH(v)>16) {
+      return GL_FALSE;
+   }
+
+   if (buffer) {
+      XMesaBuffer prevBuffer;
+
+      if (!cmap) {
+         return GL_FALSE;
+      }
+
+      prevBuffer = xmesa_find_buffer(v->display, cmap, buffer);
+      if (prevBuffer) {
+         /* Copy colormap stuff from previous, matching XMesaBuffer.
+          * Do this to avoid time spent in noFaultXAllocColor.
+          */
+         copy_colortable_info(buffer, prevBuffer);
+      }
+      else {
+         /* Allocate X colors and initialize color_table[], red_table[], etc */
+         int r, g, b, i;
+         int colorsfailed = 0;
+         for (r = 0; r < DITH_R; r++) {
+            for (g = 0; g < DITH_G; g++) {
+               for (b = 0; b < DITH_B; b++) {
+                  XMesaColor xcol;
+                  int exact, alloced;
+                  xcol.red  =gamma_adjust(v->RedGamma,   r*65535/(DITH_R-1),65535);
+                  xcol.green=gamma_adjust(v->GreenGamma, g*65535/(DITH_G-1),65535);
+                  xcol.blue =gamma_adjust(v->BlueGamma,  b*65535/(DITH_B-1),65535);
+                  noFaultXAllocColor( client, v->display,
+                                      cmap, GET_COLORMAP_SIZE(v),
+                                      &xcol, &exact, &alloced );
+                  if (!exact) {
+                     colorsfailed++;
+                  }
+                  if (alloced) {
+                     assert(buffer->num_alloced<256);
+                     buffer->alloced_colors[buffer->num_alloced] = xcol.pixel;
+                     buffer->num_alloced++;
+                  }
+                  i = DITH_MIX( r, g, b );
+                  assert(i < 576);
+                  buffer->color_table[i] = xcol.pixel;
+                  assert(xcol.pixel < 65536);
+                  buffer->pixel_to_r[xcol.pixel] = r * 255 / (DITH_R-1);
+                  buffer->pixel_to_g[xcol.pixel] = g * 255 / (DITH_G-1);
+                  buffer->pixel_to_b[xcol.pixel] = b * 255 / (DITH_B-1);
+               }
+            }
+         }
+
+         if (colorsfailed && _mesa_getenv("MESA_DEBUG")) {
+            _mesa_warning(NULL,
+                  "Note: %d out of %d needed colors do not match exactly.\n",
+                  colorsfailed, DITH_R * DITH_G * DITH_B );
+         }
+      }
+   }
+
+   v->dithered_pf = PF_Dither;
+   v->undithered_pf = PF_Lookup;
+   return GL_TRUE;
+}
+
+
+/**
+ * Setup for Hewlett Packard Color Recovery 8-bit TrueColor mode.
+ * HPCR simulates 24-bit color fidelity with an 8-bit frame buffer.
+ * Special dithering tables have to be initialized.
+ */
+static void
+setup_8bit_hpcr(XMesaVisual v)
+{
+   /* HP Color Recovery contributed by:  Alex De Bruyn (ad@lms.be)
+    * To work properly, the atom _HP_RGB_SMOOTH_MAP_LIST must be defined
+    * on the root window AND the colormap obtainable by XGetRGBColormaps
+    * for that atom must be set on the window.  (see also tkInitWindow)
+    * If that colormap is not set, the output will look stripy.
+    */
+
+   /* Setup color tables with gamma correction */
+   int i;
+   double g;
+
+   g = 1.0 / v->RedGamma;
+   for (i=0; i<256; i++) {
+      GLint red = IROUND_POS(255.0 * pow( hpcr_rgbTbl[0][i]/255.0, g ));
+      v->hpcr_rgbTbl[0][i] = CLAMP( red, 16, 239 );
+   }
+
+   g = 1.0 / v->GreenGamma;
+   for (i=0; i<256; i++) {
+      GLint green = IROUND_POS(255.0 * pow( hpcr_rgbTbl[1][i]/255.0, g ));
+      v->hpcr_rgbTbl[1][i] = CLAMP( green, 16, 239 );
+   }
+
+   g = 1.0 / v->BlueGamma;
+   for (i=0; i<256; i++) {
+      GLint blue = IROUND_POS(255.0 * pow( hpcr_rgbTbl[2][i]/255.0, g ));
+      v->hpcr_rgbTbl[2][i] = CLAMP( blue, 32, 223 );
+   }
+   v->undithered_pf = PF_HPCR;  /* can't really disable dithering for now */
+   v->dithered_pf = PF_HPCR;
+
+   /* which method should I use to clear */
+   /* GL_FALSE: keep the ordinary method  */
+   /* GL_TRUE : clear with dither pattern */
+   v->hpcr_clear_flag = _mesa_getenv("MESA_HPCR_CLEAR") ? GL_TRUE : GL_FALSE;
+
+   if (v->hpcr_clear_flag) {
+      v->hpcr_clear_pixmap = XMesaCreatePixmap(v->display,
+                                               DefaultRootWindow(v->display),
+                                               16, 2, 8);
+#ifndef XFree86Server
+      v->hpcr_clear_ximage = XGetImage(v->display, v->hpcr_clear_pixmap,
+                                       0, 0, 16, 2, AllPlanes, ZPixmap);
+#endif
+   }
+}
+
+
+/**
+ * Setup RGB rendering for a window with a True/DirectColor visual.
+ */
+static void
+setup_truecolor(XMesaVisual v, XMesaBuffer buffer, XMesaColormap cmap)
+{
+   unsigned long rmask, gmask, bmask;
+   (void) buffer;
+   (void) cmap;
+
+   /* Compute red multiplier (mask) and bit shift */
+   v->rshift = 0;
+   rmask = GET_REDMASK(v);
+   while ((rmask & 1)==0) {
+      v->rshift++;
+      rmask = rmask >> 1;
+   }
+
+   /* Compute green multiplier (mask) and bit shift */
+   v->gshift = 0;
+   gmask = GET_GREENMASK(v);
+   while ((gmask & 1)==0) {
+      v->gshift++;
+      gmask = gmask >> 1;
+   }
+
+   /* Compute blue multiplier (mask) and bit shift */
+   v->bshift = 0;
+   bmask = GET_BLUEMASK(v);
+   while ((bmask & 1)==0) {
+      v->bshift++;
+      bmask = bmask >> 1;
+   }
+
+   /*
+    * Compute component-to-pixel lookup tables and dithering kernel
+    */
+   {
+      static GLubyte kernel[16] = {
+          0*16,  8*16,  2*16, 10*16,
+         12*16,  4*16, 14*16,  6*16,
+          3*16, 11*16,  1*16,  9*16,
+         15*16,  7*16, 13*16,  5*16,
+      };
+      GLint rBits = _mesa_bitcount(rmask);
+      GLint gBits = _mesa_bitcount(gmask);
+      GLint bBits = _mesa_bitcount(bmask);
+      GLint maxBits;
+      GLuint i;
+
+      /* convert pixel components in [0,_mask] to RGB values in [0,255] */
+      for (i=0; i<=rmask; i++)
+         v->PixelToR[i] = (unsigned char) ((i * 255) / rmask);
+      for (i=0; i<=gmask; i++)
+         v->PixelToG[i] = (unsigned char) ((i * 255) / gmask);
+      for (i=0; i<=bmask; i++)
+         v->PixelToB[i] = (unsigned char) ((i * 255) / bmask);
+
+      /* convert RGB values from [0,255] to pixel components */
+
+      for (i=0;i<256;i++) {
+         GLint r = gamma_adjust(v->RedGamma,   i, 255);
+         GLint g = gamma_adjust(v->GreenGamma, i, 255);
+         GLint b = gamma_adjust(v->BlueGamma,  i, 255);
+         v->RtoPixel[i] = (r >> (8-rBits)) << v->rshift;
+         v->GtoPixel[i] = (g >> (8-gBits)) << v->gshift;
+         v->BtoPixel[i] = (b >> (8-bBits)) << v->bshift;
+      }
+      /* overflow protection */
+      for (i=256;i<512;i++) {
+         v->RtoPixel[i] = v->RtoPixel[255];
+         v->GtoPixel[i] = v->GtoPixel[255];
+         v->BtoPixel[i] = v->BtoPixel[255];
+      }
+
+      /* setup dithering kernel */
+      maxBits = rBits;
+      if (gBits > maxBits)  maxBits = gBits;
+      if (bBits > maxBits)  maxBits = bBits;
+      for (i=0;i<16;i++) {
+         v->Kernel[i] = kernel[i] >> maxBits;
+      }
+
+      v->undithered_pf = PF_Truecolor;
+      v->dithered_pf = (GET_VISUAL_DEPTH(v)<24) ? PF_Dither_True : PF_Truecolor;
+   }
+
+   /*
+    * Now check for TrueColor visuals which we can optimize.
+    */
+   if (   GET_REDMASK(v)  ==0x0000ff
+       && GET_GREENMASK(v)==0x00ff00
+       && GET_BLUEMASK(v) ==0xff0000
+       && CHECK_BYTE_ORDER(v)
+       && v->BitsPerPixel==32
+       && v->RedGamma==1.0 && v->GreenGamma==1.0 && v->BlueGamma==1.0) {
+      /* common 32 bpp config used on SGI, Sun */
+      v->undithered_pf = v->dithered_pf = PF_8A8B8G8R; /* ABGR */
+   }
+   else if (GET_REDMASK(v)  == 0xff0000
+         && GET_GREENMASK(v)== 0x00ff00
+         && GET_BLUEMASK(v) == 0x0000ff
+         && CHECK_BYTE_ORDER(v)
+         && v->RedGamma == 1.0 && v->GreenGamma == 1.0 && v->BlueGamma == 1.0){
+      if (v->BitsPerPixel==32) {
+         /* if 32 bpp, and visual indicates 8 bpp alpha channel */
+         if (GET_VISUAL_DEPTH(v) == 32 && v->mesa_visual.alphaBits == 8)
+            v->undithered_pf = v->dithered_pf = PF_8A8R8G8B; /* ARGB */
+         else
+            v->undithered_pf = v->dithered_pf = PF_8R8G8B; /* xRGB */
+      }
+      else if (v->BitsPerPixel == 24) {
+         v->undithered_pf = v->dithered_pf = PF_8R8G8B24; /* RGB */
+      }
+   }
+   else if (GET_REDMASK(v)  ==0xf800
+       &&   GET_GREENMASK(v)==0x07e0
+       &&   GET_BLUEMASK(v) ==0x001f
+       && CHECK_BYTE_ORDER(v)
+       && v->BitsPerPixel==16
+       && v->RedGamma==1.0 && v->GreenGamma==1.0 && v->BlueGamma==1.0) {
+      /* 5-6-5 RGB */
+      v->undithered_pf = PF_5R6G5B;
+      v->dithered_pf = PF_Dither_5R6G5B;
+   }
+   else if (GET_REDMASK(v)  ==0xe0
+       &&   GET_GREENMASK(v)==0x1c
+       &&   GET_BLUEMASK(v) ==0x03
+       && CHECK_FOR_HPCR(v)) {
+      /* 8-bit HP color recovery */
+      setup_8bit_hpcr( v );
+   }
+}
+
+
+
+/**
+ * Setup RGB rendering for a window with a monochrome visual.
+ */
+static void
+setup_monochrome( XMesaVisual v, XMesaBuffer b )
+{
+   (void) b;
+   v->dithered_pf = v->undithered_pf = PF_1Bit;
+   /* if black=1 then we must flip pixel values */
+   v->bitFlip = (GET_BLACK_PIXEL(v) != 0);
+}
+
+
+
+/**
+ * When a context is bound for the first time, we can finally finish
+ * initializing the context's visual and buffer information.
+ * \param v  the XMesaVisual to initialize
+ * \param b  the XMesaBuffer to initialize (may be NULL)
+ * \param rgb_flag  TRUE = RGBA mode, FALSE = color index mode
+ * \param window  the window/pixmap we're rendering into
+ * \param cmap  the colormap associated with the window/pixmap
+ * \return GL_TRUE=success, GL_FALSE=failure
+ */
+static GLboolean
+initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b,
+                             XMesaDrawable window,
+                             XMesaColormap cmap)
+{
+   int client = 0;
+   const int xclass = v->mesa_visual.visualType;
+
+#ifdef XFree86Server
+   client = (window) ? CLIENT_ID(window->id) : 0;
+#endif
+
+   ASSERT(!b || b->xm_visual == v);
+
+   /* Save true bits/pixel */
+   v->BitsPerPixel = bits_per_pixel(v);
+   assert(v->BitsPerPixel > 0);
+
+   /* RGB WINDOW:
+    * We support RGB rendering into almost any kind of visual.
+    */
+   if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) {
+      setup_truecolor( v, b, cmap );
+   }
+   else if (xclass == GLX_STATIC_GRAY && GET_VISUAL_DEPTH(v) == 1) {
+      setup_monochrome( v, b );
+   }
+   else if (xclass == GLX_GRAY_SCALE || xclass == GLX_STATIC_GRAY) {
+      if (!setup_grayscale( client, v, b, cmap )) {
+	 return GL_FALSE;
+      }
+   }
+   else if ((xclass == GLX_PSEUDO_COLOR || xclass == GLX_STATIC_COLOR)
+	    && GET_VISUAL_DEPTH(v)>=4 && GET_VISUAL_DEPTH(v)<=16) {
+      if (!setup_dithered_color( client, v, b, cmap )) {
+	 return GL_FALSE;
+      }
+   }
+   else {
+      _mesa_warning(NULL, "XMesa: RGB mode rendering not supported in given visual.\n");
+      return GL_FALSE;
+   }
+   v->mesa_visual.indexBits = 0;
+
+   if (_mesa_getenv("MESA_NO_DITHER")) {
+      v->dithered_pf = v->undithered_pf;
+   }
+
+
+   /*
+    * If MESA_INFO env var is set print out some debugging info
+    * which can help Brian figure out what's going on when a user
+    * reports bugs.
+    */
+   if (_mesa_getenv("MESA_INFO")) {
+      printf("X/Mesa visual = %p\n", (void *) v);
+      printf("X/Mesa dithered pf = %u\n", v->dithered_pf);
+      printf("X/Mesa undithered pf = %u\n", v->undithered_pf);
+      printf("X/Mesa level = %d\n", v->mesa_visual.level);
+      printf("X/Mesa depth = %d\n", GET_VISUAL_DEPTH(v));
+      printf("X/Mesa bits per pixel = %d\n", v->BitsPerPixel);
+   }
+
+   if (b && window) {
+      char *data;
+
+      /* Do window-specific initializations */
+
+      /* these should have been set in create_xmesa_buffer */
+      ASSERT(b->frontxrb->drawable == window);
+      ASSERT(b->frontxrb->pixmap == (XMesaPixmap) window);
+
+      /* Setup for single/double buffering */
+      if (v->mesa_visual.doubleBufferMode) {
+         /* Double buffered */
+         b->shm = check_for_xshm( v->display );
+      }
+
+      /* X11 graphics contexts */
+#ifdef XFree86Server
+      b->gc = CreateScratchGC(v->display, window->depth);
+#else
+      b->gc = XCreateGC( v->display, window, 0, NULL );
+#endif
+      XMesaSetFunction( v->display, b->gc, GXcopy );
+
+      /* cleargc - for glClear() */
+#ifdef XFree86Server
+      b->cleargc = CreateScratchGC(v->display, window->depth);
+#else
+      b->cleargc = XCreateGC( v->display, window, 0, NULL );
+#endif
+      XMesaSetFunction( v->display, b->cleargc, GXcopy );
+
+      /*
+       * Don't generate Graphics Expose/NoExpose events in swapbuffers().
+       * Patch contributed by Michael Pichler May 15, 1995.
+       */
+#ifdef XFree86Server
+      b->swapgc = CreateScratchGC(v->display, window->depth);
+      {
+         CARD32 v[1];
+         v[0] = FALSE;
+         dixChangeGC(NullClient, b->swapgc, GCGraphicsExposures, v, NULL);
+      }
+#else
+      {
+         XGCValues gcvalues;
+         gcvalues.graphics_exposures = False;
+         b->swapgc = XCreateGC(v->display, window,
+                               GCGraphicsExposures, &gcvalues);
+      }
+#endif
+      XMesaSetFunction( v->display, b->swapgc, GXcopy );
+      /*
+       * Set fill style and tile pixmap once for all for HPCR stuff
+       * (instead of doing it each time in clear_color_HPCR_pixmap())
+       * Initialize whole stuff
+       * Patch contributed by Jacques Leroy March 8, 1998.
+       */
+      if (v->hpcr_clear_flag && b->backxrb && b->backxrb->pixmap) {
+         int i;
+         for (i = 0; i < 16; i++) {
+            XMesaPutPixel(v->hpcr_clear_ximage, i, 0, 0);
+            XMesaPutPixel(v->hpcr_clear_ximage, i, 1, 0);
+         }
+         XMesaPutImage(b->display, (XMesaDrawable) v->hpcr_clear_pixmap,
+                       b->cleargc, v->hpcr_clear_ximage, 0, 0, 0, 0, 16, 2);
+         XMesaSetFillStyle( v->display, b->cleargc, FillTiled);
+         XMesaSetTile( v->display, b->cleargc, v->hpcr_clear_pixmap );
+      }
+
+      /* Initialize the row buffer XImage for use in write_color_span() */
+      data = (char*) MALLOC(MAX_WIDTH*4);
+#ifdef XFree86Server
+      b->rowimage = XMesaCreateImage(GET_VISUAL_DEPTH(v), MAX_WIDTH, 1, data);
+#else
+      b->rowimage = XCreateImage( v->display,
+                                  v->visinfo->visual,
+                                  v->visinfo->depth,
+                                  ZPixmap, 0,           /*format, offset*/
+                                  data,                 /*data*/
+                                  MAX_WIDTH, 1,         /*width, height*/
+                                  32,                   /*bitmap_pad*/
+                                  0                     /*bytes_per_line*/ );
+#endif
+      if (!b->rowimage)
+         return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+
+
+
+/*
+ * Convert an RGBA color to a pixel value.
+ */
+unsigned long
+xmesa_color_to_pixel(GLcontext *ctx,
+                     GLubyte r, GLubyte g, GLubyte b, GLubyte a,
+                     GLuint pixelFormat)
+{
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   switch (pixelFormat) {
+      case PF_Index:
+         return 0;
+      case PF_Truecolor:
+         {
+            unsigned long p;
+            PACK_TRUECOLOR( p, r, g, b );
+            return p;
+         }
+      case PF_8A8B8G8R:
+         return PACK_8A8B8G8R( r, g, b, a );
+      case PF_8A8R8G8B:
+         return PACK_8A8R8G8B( r, g, b, a );
+      case PF_8R8G8B:
+         /* fall through */
+      case PF_8R8G8B24:
+         return PACK_8R8G8B( r, g, b );
+      case PF_5R6G5B:
+         return PACK_5R6G5B( r, g, b );
+      case PF_Dither:
+         {
+            DITHER_SETUP;
+            return DITHER( 1, 0, r, g, b );
+         }
+      case PF_1Bit:
+         /* 382 = (3*255)/2 */
+         return ((r+g+b) > 382) ^ xmesa->xm_visual->bitFlip;
+      case PF_HPCR:
+         return DITHER_HPCR(1, 1, r, g, b);
+      case PF_Lookup:
+         {
+            LOOKUP_SETUP;
+            return LOOKUP( r, g, b );
+         }
+      case PF_Grayscale:
+         return GRAY_RGB( r, g, b );
+      case PF_Dither_True:
+         /* fall through */
+      case PF_Dither_5R6G5B:
+         {
+            unsigned long p;
+            PACK_TRUEDITHER(p, 1, 0, r, g, b);
+            return p;
+         }
+      default:
+         _mesa_problem(ctx, "Bad pixel format in xmesa_color_to_pixel");
+   }
+   return 0;
+}
+
+
+#define NUM_VISUAL_TYPES   6
+
+/**
+ * Convert an X visual type to a GLX visual type.
+ * 
+ * \param visualType X visual type (i.e., \c TrueColor, \c StaticGray, etc.)
+ *        to be converted.
+ * \return If \c visualType is a valid X visual type, a GLX visual type will
+ *         be returned.  Otherwise \c GLX_NONE will be returned.
+ * 
+ * \note
+ * This code was lifted directly from lib/GL/glx/glcontextmodes.c in the
+ * DRI CVS tree.
+ */
+static GLint
+xmesa_convert_from_x_visual_type( int visualType )
+{
+    static const int glx_visual_types[ NUM_VISUAL_TYPES ] = {
+	GLX_STATIC_GRAY,  GLX_GRAY_SCALE,
+	GLX_STATIC_COLOR, GLX_PSEUDO_COLOR,
+	GLX_TRUE_COLOR,   GLX_DIRECT_COLOR
+    };
+
+    return ( (unsigned) visualType < NUM_VISUAL_TYPES )
+	? glx_visual_types[ visualType ] : GLX_NONE;
+}
+
+
+/**********************************************************************/
+/*****                       Public Functions                     *****/
+/**********************************************************************/
+
+
+/*
+ * Create a new X/Mesa visual.
+ * Input:  display - X11 display
+ *         visinfo - an XVisualInfo pointer
+ *         rgb_flag - GL_TRUE = RGB mode,
+ *                    GL_FALSE = color index mode
+ *         alpha_flag - alpha buffer requested?
+ *         db_flag - GL_TRUE = double-buffered,
+ *                   GL_FALSE = single buffered
+ *         stereo_flag - stereo visual?
+ *         ximage_flag - GL_TRUE = use an XImage for back buffer,
+ *                       GL_FALSE = use an off-screen pixmap for back buffer
+ *         depth_size - requested bits/depth values, or zero
+ *         stencil_size - requested bits/stencil values, or zero
+ *         accum_red_size - requested bits/red accum values, or zero
+ *         accum_green_size - requested bits/green accum values, or zero
+ *         accum_blue_size - requested bits/blue accum values, or zero
+ *         accum_alpha_size - requested bits/alpha accum values, or zero
+ *         num_samples - number of samples/pixel if multisampling, or zero
+ *         level - visual level, usually 0
+ *         visualCaveat - ala the GLX extension, usually GLX_NONE
+ * Return;  a new XMesaVisual or 0 if error.
+ */
+PUBLIC
+XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
+                               XMesaVisualInfo visinfo,
+                               GLboolean rgb_flag,
+                               GLboolean alpha_flag,
+                               GLboolean db_flag,
+                               GLboolean stereo_flag,
+                               GLboolean ximage_flag,
+                               GLint depth_size,
+                               GLint stencil_size,
+                               GLint accum_red_size,
+                               GLint accum_green_size,
+                               GLint accum_blue_size,
+                               GLint accum_alpha_size,
+                               GLint num_samples,
+                               GLint level,
+                               GLint visualCaveat )
+{
+   char *gamma;
+   XMesaVisual v;
+   GLint red_bits, green_bits, blue_bits, alpha_bits;
+
+#ifndef XFree86Server
+   /* For debugging only */
+   if (_mesa_getenv("MESA_XSYNC")) {
+      /* This makes debugging X easier.
+       * In your debugger, set a breakpoint on _XError to stop when an
+       * X protocol error is generated.
+       */
+      XSynchronize( display, 1 );
+   }
+#endif
+
+   /* Color-index rendering not supported. */
+   if (!rgb_flag)
+      return NULL;
+
+   v = (XMesaVisual) CALLOC_STRUCT(xmesa_visual);
+   if (!v) {
+      return NULL;
+   }
+
+   v->display = display;
+
+   /* Save a copy of the XVisualInfo struct because the user may Xfree()
+    * the struct but we may need some of the information contained in it
+    * at a later time.
+    */
+#ifndef XFree86Server
+   v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo));
+   if(!v->visinfo) {
+      free(v);
+      return NULL;
+   }
+   memcpy(v->visinfo, visinfo, sizeof(*visinfo));
+#endif
+
+   /* check for MESA_GAMMA environment variable */
+   gamma = _mesa_getenv("MESA_GAMMA");
+   if (gamma) {
+      v->RedGamma = v->GreenGamma = v->BlueGamma = 0.0;
+      sscanf( gamma, "%f %f %f", &v->RedGamma, &v->GreenGamma, &v->BlueGamma );
+      if (v->RedGamma<=0.0)    v->RedGamma = 1.0;
+      if (v->GreenGamma<=0.0)  v->GreenGamma = v->RedGamma;
+      if (v->BlueGamma<=0.0)   v->BlueGamma = v->RedGamma;
+   }
+   else {
+      v->RedGamma = v->GreenGamma = v->BlueGamma = 1.0;
+   }
+
+   v->ximage_flag = ximage_flag;
+
+#ifdef XFree86Server
+   /* We could calculate these values by ourselves.  nplanes is either the sum
+    * of the red, green, and blue bits or the number index bits.
+    * ColormapEntries is either (1U << index_bits) or
+    * (1U << max(redBits, greenBits, blueBits)).
+    */
+   assert(visinfo->nplanes > 0);
+   v->nplanes = visinfo->nplanes;
+   v->ColormapEntries = visinfo->ColormapEntries;
+
+   v->mesa_visual.redMask = visinfo->redMask;
+   v->mesa_visual.greenMask = visinfo->greenMask;
+   v->mesa_visual.blueMask = visinfo->blueMask;
+   v->mesa_visual.visualID = visinfo->vid;
+   v->mesa_visual.screen = 0; /* FIXME: What should be done here? */
+#else
+   v->mesa_visual.redMask = visinfo->red_mask;
+   v->mesa_visual.greenMask = visinfo->green_mask;
+   v->mesa_visual.blueMask = visinfo->blue_mask;
+   v->mesa_visual.visualID = visinfo->visualid;
+   v->mesa_visual.screen = visinfo->screen;
+#endif
+
+#if defined(XFree86Server) || !(defined(__cplusplus) || defined(c_plusplus))
+   v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->class);
+#else
+   v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->c_class);
+#endif
+
+   v->mesa_visual.visualRating = visualCaveat;
+
+   if (alpha_flag)
+      v->mesa_visual.alphaBits = 8;
+
+   (void) initialize_visual_and_buffer( v, NULL, 0, 0 );
+
+   {
+      const int xclass = v->mesa_visual.visualType;
+      if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) {
+         red_bits   = _mesa_bitcount(GET_REDMASK(v));
+         green_bits = _mesa_bitcount(GET_GREENMASK(v));
+         blue_bits  = _mesa_bitcount(GET_BLUEMASK(v));
+      }
+      else {
+         /* this is an approximation */
+         int depth;
+         depth = GET_VISUAL_DEPTH(v);
+         red_bits = depth / 3;
+         depth -= red_bits;
+         green_bits = depth / 2;
+         depth -= green_bits;
+         blue_bits = depth;
+         alpha_bits = 0;
+         assert( red_bits + green_bits + blue_bits == GET_VISUAL_DEPTH(v) );
+      }
+      alpha_bits = v->mesa_visual.alphaBits;
+   }
+
+   _mesa_initialize_visual( &v->mesa_visual,
+                            db_flag, stereo_flag,
+                            red_bits, green_bits,
+                            blue_bits, alpha_bits,
+                            depth_size,
+                            stencil_size,
+                            accum_red_size, accum_green_size,
+                            accum_blue_size, accum_alpha_size,
+                            0 );
+
+   /* XXX minor hack */
+   v->mesa_visual.level = level;
+   return v;
+}
+
+
+PUBLIC
+void XMesaDestroyVisual( XMesaVisual v )
+{
+#ifndef XFree86Server
+   free(v->visinfo);
+#endif
+   free(v);
+}
+
+
+
+/**
+ * Create a new XMesaContext.
+ * \param v  the XMesaVisual
+ * \param share_list  another XMesaContext with which to share display
+ *                    lists or NULL if no sharing is wanted.
+ * \return an XMesaContext or NULL if error.
+ */
+PUBLIC
+XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
+{
+   static GLboolean firstTime = GL_TRUE;
+   XMesaContext c;
+   GLcontext *mesaCtx;
+   struct dd_function_table functions;
+   TNLcontext *tnl;
+
+   if (firstTime) {
+      _glthread_INIT_MUTEX(_xmesa_lock);
+      firstTime = GL_FALSE;
+   }
+
+   /* Note: the XMesaContext contains a Mesa GLcontext struct (inheritance) */
+   c = (XMesaContext) CALLOC_STRUCT(xmesa_context);
+   if (!c)
+      return NULL;
+
+   mesaCtx = &(c->mesa);
+
+   /* initialize with default driver functions, then plug in XMesa funcs */
+   _mesa_init_driver_functions(&functions);
+   xmesa_init_driver_functions(v, &functions);
+   if (!_mesa_initialize_context(mesaCtx, &v->mesa_visual,
+                      share_list ? &(share_list->mesa) : (GLcontext *) NULL,
+                      &functions, (void *) c)) {
+      free(c);
+      return NULL;
+   }
+
+   /* Enable this to exercise fixed function -> shader translation
+    * with software rendering.
+    */
+   if (0) {
+      mesaCtx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+      mesaCtx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+   }
+
+   _mesa_enable_sw_extensions(mesaCtx);
+   _mesa_enable_1_3_extensions(mesaCtx);
+   _mesa_enable_1_4_extensions(mesaCtx);
+   _mesa_enable_1_5_extensions(mesaCtx);
+   _mesa_enable_2_0_extensions(mesaCtx);
+   _mesa_enable_2_1_extensions(mesaCtx);
+#if ENABLE_EXT_texure_compression_s3tc
+    if (mesaCtx->Mesa_DXTn) {
+       _mesa_enable_extension(mesaCtx, "GL_EXT_texture_compression_s3tc");
+       _mesa_enable_extension(mesaCtx, "GL_S3_s3tc");
+    }
+    _mesa_enable_extension(mesaCtx, "GL_3DFX_texture_compression_FXT1");
+#endif
+#if ENABLE_EXT_timer_query
+    _mesa_enable_extension(mesaCtx, "GL_EXT_timer_query");
+#endif
+
+#ifdef XFree86Server
+   /* If we're running in the X server, do bounds checking to prevent
+    * segfaults and server crashes!
+    */
+   mesaCtx->Const.CheckArrayBounds = GL_TRUE;
+#endif
+
+   /* finish up xmesa context initializations */
+   c->swapbytes = CHECK_BYTE_ORDER(v) ? GL_FALSE : GL_TRUE;
+   c->xm_visual = v;
+   c->xm_buffer = NULL;   /* set later by XMesaMakeCurrent */
+   c->display = v->display;
+   c->pixelformat = v->dithered_pf;      /* Dithering is enabled by default */
+
+   /* Initialize the software rasterizer and helper modules.
+    */
+   if (!_swrast_CreateContext( mesaCtx ) ||
+       !_vbo_CreateContext( mesaCtx ) ||
+       !_tnl_CreateContext( mesaCtx ) ||
+       !_swsetup_CreateContext( mesaCtx )) {
+      _mesa_free_context_data(&c->mesa);
+      free(c);
+      return NULL;
+   }
+
+   /* tnl setup */
+   tnl = TNL_CONTEXT(mesaCtx);
+   tnl->Driver.RunPipeline = _tnl_run_pipeline;
+   /* swrast setup */
+   xmesa_register_swrast_functions( mesaCtx );
+   _swsetup_Wakeup(mesaCtx);
+
+   _mesa_meta_init(mesaCtx);
+
+   return c;
+}
+
+
+
+PUBLIC
+void XMesaDestroyContext( XMesaContext c )
+{
+   GLcontext *mesaCtx = &c->mesa;
+
+#ifdef FX
+   FXdestroyContext( XMESA_BUFFER(mesaCtx->DrawBuffer) );
+#endif
+
+   _mesa_meta_free( mesaCtx );
+
+   _swsetup_DestroyContext( mesaCtx );
+   _swrast_DestroyContext( mesaCtx );
+   _tnl_DestroyContext( mesaCtx );
+   _vbo_DestroyContext( mesaCtx );
+   _mesa_free_context_data( mesaCtx );
+   free( c );
+}
+
+
+
+/**
+ * Private function for creating an XMesaBuffer which corresponds to an
+ * X window or pixmap.
+ * \param v  the window's XMesaVisual
+ * \param w  the window we're wrapping
+ * \return  new XMesaBuffer or NULL if error
+ */
+PUBLIC XMesaBuffer
+XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w)
+{
+#ifndef XFree86Server
+   XWindowAttributes attr;
+#endif
+   XMesaBuffer b;
+   XMesaColormap cmap;
+   int depth;
+
+   assert(v);
+   assert(w);
+
+   /* Check that window depth matches visual depth */
+#ifdef XFree86Server
+   depth = ((XMesaDrawable)w)->depth;
+#else
+   XGetWindowAttributes( v->display, w, &attr );
+   depth = attr.depth;
+#endif
+   if (GET_VISUAL_DEPTH(v) != depth) {
+      _mesa_warning(NULL, "XMesaCreateWindowBuffer: depth mismatch between visual (%d) and window (%d)!\n",
+                    GET_VISUAL_DEPTH(v), depth);
+      return NULL;
+   }
+
+   /* Find colormap */
+#ifdef XFree86Server
+   cmap = (ColormapPtr)LookupIDByType(wColormap(w), RT_COLORMAP);
+#else
+   if (attr.colormap) {
+      cmap = attr.colormap;
+   }
+   else {
+      _mesa_warning(NULL, "Window %u has no colormap!\n", (unsigned int) w);
+      /* this is weird, a window w/out a colormap!? */
+      /* OK, let's just allocate a new one and hope for the best */
+      cmap = XCreateColormap(v->display, w, attr.visual, AllocNone);
+   }
+#endif
+
+   b = create_xmesa_buffer((XMesaDrawable) w, WINDOW, v, cmap);
+   if (!b)
+      return NULL;
+
+   if (!initialize_visual_and_buffer( v, b, (XMesaDrawable) w, cmap )) {
+      xmesa_free_buffer(b);
+      return NULL;
+   }
+
+   return b;
+}
+
+
+
+/**
+ * Create a new XMesaBuffer from an X pixmap.
+ *
+ * \param v    the XMesaVisual
+ * \param p    the pixmap
+ * \param cmap the colormap, may be 0 if using a \c GLX_TRUE_COLOR or
+ *             \c GLX_DIRECT_COLOR visual for the pixmap
+ * \returns new XMesaBuffer or NULL if error
+ */
+PUBLIC XMesaBuffer
+XMesaCreatePixmapBuffer(XMesaVisual v, XMesaPixmap p, XMesaColormap cmap)
+{
+   XMesaBuffer b;
+
+   assert(v);
+
+   b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap);
+   if (!b)
+      return NULL;
+
+   if (!initialize_visual_and_buffer(v, b, (XMesaDrawable) p, cmap)) {
+      xmesa_free_buffer(b);
+      return NULL;
+   }
+
+   return b;
+}
+
+
+/**
+ * For GLX_EXT_texture_from_pixmap
+ */
+XMesaBuffer
+XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p,
+                               XMesaColormap cmap,
+                               int format, int target, int mipmap)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   XMesaBuffer b;
+   GLuint width, height;
+
+   assert(v);
+
+   b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap);
+   if (!b)
+      return NULL;
+
+   /* get pixmap size, update framebuffer/renderbuffer dims */
+   xmesa_get_window_size(v->display, b, &width, &height);
+   _mesa_resize_framebuffer(NULL, &(b->mesa_buffer), width, height);
+
+   if (target == 0) {
+      /* examine dims */
+      if (ctx->Extensions.ARB_texture_non_power_of_two) {
+         target = GLX_TEXTURE_2D_EXT;
+      }
+      else if (   _mesa_bitcount(width)  == 1
+               && _mesa_bitcount(height) == 1) {
+         /* power of two size */
+         if (height == 1) {
+            target = GLX_TEXTURE_1D_EXT;
+         }
+         else {
+            target = GLX_TEXTURE_2D_EXT;
+         }
+      }
+      else if (ctx->Extensions.NV_texture_rectangle) {
+         target = GLX_TEXTURE_RECTANGLE_EXT;
+      }
+      else {
+         /* non power of two textures not supported */
+         XMesaDestroyBuffer(b);
+         return 0;
+      }
+   }
+
+   b->TextureTarget = target;
+   b->TextureFormat = format;
+   b->TextureMipmap = mipmap;
+
+   if (!initialize_visual_and_buffer(v, b, (XMesaDrawable) p, cmap)) {
+      xmesa_free_buffer(b);
+      return NULL;
+   }
+
+   return b;
+}
+
+
+
+XMesaBuffer
+XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap,
+                   unsigned int width, unsigned int height)
+{
+#ifndef XFree86Server
+   XMesaWindow root;
+   XMesaDrawable drawable;  /* X Pixmap Drawable */
+   XMesaBuffer b;
+
+   /* allocate pixmap for front buffer */
+   root = RootWindow( v->display, v->visinfo->screen );
+   drawable = XCreatePixmap(v->display, root, width, height,
+                            v->visinfo->depth);
+   if (!drawable)
+      return NULL;
+
+   b = create_xmesa_buffer(drawable, PBUFFER, v, cmap);
+   if (!b)
+      return NULL;
+
+   if (!initialize_visual_and_buffer(v, b, drawable, cmap)) {
+      xmesa_free_buffer(b);
+      return NULL;
+   }
+
+   return b;
+#else
+   return 0;
+#endif
+}
+
+
+
+/*
+ * Deallocate an XMesaBuffer structure and all related info.
+ */
+PUBLIC void
+XMesaDestroyBuffer(XMesaBuffer b)
+{
+   xmesa_free_buffer(b);
+}
+
+
+/**
+ * Query the current window size and update the corresponding GLframebuffer
+ * and all attached renderbuffers.
+ * Called when:
+ *  1. the first time a buffer is bound to a context.
+ *  2. from glViewport to poll for window size changes
+ *  3. from the XMesaResizeBuffers() API function.
+ * Note: it's possible (and legal) for xmctx to be NULL.  That can happen
+ * when resizing a buffer when no rendering context is bound.
+ */
+void
+xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer)
+{
+   GLuint width, height;
+   xmesa_get_window_size(drawBuffer->display, drawBuffer, &width, &height);
+   if (drawBuffer->mesa_buffer.Width != width ||
+       drawBuffer->mesa_buffer.Height != height) {
+      GLcontext *ctx = xmctx ? &xmctx->mesa : NULL;
+      _mesa_resize_framebuffer(ctx, &(drawBuffer->mesa_buffer), width, height);
+   }
+   drawBuffer->mesa_buffer.Initialized = GL_TRUE; /* XXX TEMPORARY? */
+}
+
+
+/*
+ * Bind buffer b to context c and make c the current rendering context.
+ */
+GLboolean XMesaMakeCurrent( XMesaContext c, XMesaBuffer b )
+{
+   return XMesaMakeCurrent2( c, b, b );
+}
+
+
+/*
+ * Bind buffer b to context c and make c the current rendering context.
+ */
+PUBLIC
+GLboolean XMesaMakeCurrent2( XMesaContext c, XMesaBuffer drawBuffer,
+                             XMesaBuffer readBuffer )
+{
+   if (c) {
+      if (!drawBuffer || !readBuffer)
+         return GL_FALSE;  /* must specify buffers! */
+
+      if (&(c->mesa) == _mesa_get_current_context()
+          && c->mesa.DrawBuffer == &drawBuffer->mesa_buffer
+          && c->mesa.ReadBuffer == &readBuffer->mesa_buffer
+          && XMESA_BUFFER(c->mesa.DrawBuffer)->wasCurrent) {
+         /* same context and buffer, do nothing */
+         return GL_TRUE;
+      }
+
+      c->xm_buffer = drawBuffer;
+
+#ifdef FX
+      if (FXmakeCurrent( drawBuffer ))
+         return GL_TRUE;
+#endif
+
+      /* Call this periodically to detect when the user has begun using
+       * GL rendering from multiple threads.
+       */
+      _glapi_check_multithread();
+
+      xmesa_check_and_update_buffer_size(c, drawBuffer);
+      if (readBuffer != drawBuffer)
+         xmesa_check_and_update_buffer_size(c, readBuffer);
+
+      _mesa_make_current(&(c->mesa),
+                         &drawBuffer->mesa_buffer,
+                         &readBuffer->mesa_buffer);
+
+      /*
+       * Must recompute and set these pixel values because colormap
+       * can be different for different windows.
+       */
+      c->clearpixel = xmesa_color_to_pixel( &c->mesa,
+					    c->clearcolor[0],
+					    c->clearcolor[1],
+					    c->clearcolor[2],
+					    c->clearcolor[3],
+					    c->xm_visual->undithered_pf);
+      XMesaSetForeground(c->display, drawBuffer->cleargc, c->clearpixel);
+
+      /* Solution to Stephane Rehel's problem with glXReleaseBuffersMESA(): */
+      drawBuffer->wasCurrent = GL_TRUE;
+   }
+   else {
+      /* Detach */
+      _mesa_make_current( NULL, NULL, NULL );
+   }
+   return GL_TRUE;
+}
+
+
+/*
+ * Unbind the context c from its buffer.
+ */
+GLboolean XMesaUnbindContext( XMesaContext c )
+{
+   /* A no-op for XFree86 integration purposes */
+   return GL_TRUE;
+}
+
+
+XMesaContext XMesaGetCurrentContext( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (ctx) {
+      XMesaContext xmesa = XMESA_CONTEXT(ctx);
+      return xmesa;
+   }
+   else {
+      return 0;
+   }
+}
+
+
+XMesaBuffer XMesaGetCurrentBuffer( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (ctx) {
+      XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+      return xmbuf;
+   }
+   else {
+      return 0;
+   }
+}
+
+
+/* New in Mesa 3.1 */
+XMesaBuffer XMesaGetCurrentReadBuffer( void )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   if (ctx) {
+      return XMESA_BUFFER(ctx->ReadBuffer);
+   }
+   else {
+      return 0;
+   }
+}
+
+
+#ifdef XFree86Server
+PUBLIC
+GLboolean XMesaForceCurrent(XMesaContext c)
+{
+   if (c) {
+      _glapi_set_dispatch(c->mesa.CurrentDispatch);
+
+      if (&(c->mesa) != _mesa_get_current_context()) {
+	 _mesa_make_current(&c->mesa, c->mesa.DrawBuffer, c->mesa.ReadBuffer);
+      }
+   }
+   else {
+      _mesa_make_current(NULL, NULL, NULL);
+   }
+   return GL_TRUE;
+}
+
+
+PUBLIC
+GLboolean XMesaLoseCurrent(XMesaContext c)
+{
+   (void) c;
+   _mesa_make_current(NULL, NULL, NULL);
+   return GL_TRUE;
+}
+
+
+PUBLIC
+GLboolean XMesaCopyContext( XMesaContext xm_src, XMesaContext xm_dst, GLuint mask )
+{
+   _mesa_copy_context(&xm_src->mesa, &xm_dst->mesa, mask);
+   return GL_TRUE;
+}
+#endif /* XFree86Server */
+
+
+#ifndef FX
+GLboolean XMesaSetFXmode( GLint mode )
+{
+   (void) mode;
+   return GL_FALSE;
+}
+#endif
+
+
+
+/*
+ * Copy the back buffer to the front buffer.  If there's no back buffer
+ * this is a no-op.
+ */
+PUBLIC
+void XMesaSwapBuffers( XMesaBuffer b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (!b->backxrb) {
+      /* single buffered */
+      return;
+   }
+
+   /* If we're swapping the buffer associated with the current context
+    * we have to flush any pending rendering commands first.
+    */
+   if (ctx && ctx->DrawBuffer == &(b->mesa_buffer))
+      _mesa_notifySwapBuffers(ctx);
+
+   if (b->db_mode) {
+#ifdef FX
+      if (FXswapBuffers(b))
+         return;
+#endif
+      if (b->backxrb->ximage) {
+	 /* Copy Ximage (back buf) from client memory to server window */
+#if defined(USE_XSHM) && !defined(XFree86Server)
+	 if (b->shm) {
+            /*_glthread_LOCK_MUTEX(_xmesa_lock);*/
+	    XShmPutImage( b->xm_visual->display, b->frontxrb->drawable,
+			  b->swapgc,
+			  b->backxrb->ximage, 0, 0,
+			  0, 0, b->mesa_buffer.Width, b->mesa_buffer.Height,
+                          False );
+            /*_glthread_UNLOCK_MUTEX(_xmesa_lock);*/
+	 }
+	 else
+#endif
+         {
+            /*_glthread_LOCK_MUTEX(_xmesa_lock);*/
+            XMesaPutImage( b->xm_visual->display, b->frontxrb->drawable,
+			   b->swapgc,
+			   b->backxrb->ximage, 0, 0,
+			   0, 0, b->mesa_buffer.Width, b->mesa_buffer.Height );
+            /*_glthread_UNLOCK_MUTEX(_xmesa_lock);*/
+         }
+      }
+      else if (b->backxrb->pixmap) {
+	 /* Copy pixmap (back buf) to window (front buf) on server */
+         /*_glthread_LOCK_MUTEX(_xmesa_lock);*/
+	 XMesaCopyArea( b->xm_visual->display,
+			b->backxrb->pixmap,   /* source drawable */
+			b->frontxrb->drawable,  /* dest. drawable */
+			b->swapgc,
+			0, 0, b->mesa_buffer.Width, b->mesa_buffer.Height,
+			0, 0                 /* dest region */
+		      );
+         /*_glthread_UNLOCK_MUTEX(_xmesa_lock);*/
+      }
+
+      if (b->swAlpha)
+         _mesa_copy_soft_alpha_renderbuffers(ctx, &b->mesa_buffer);
+   }
+#if !defined(XFree86Server)
+   XSync( b->xm_visual->display, False );
+#endif
+}
+
+
+
+/*
+ * Copy sub-region of back buffer to front buffer
+ */
+void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height )
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   /* If we're swapping the buffer associated with the current context
+    * we have to flush any pending rendering commands first.
+    */
+   if (ctx && ctx->DrawBuffer == &(b->mesa_buffer))
+      _mesa_notifySwapBuffers(ctx);
+
+   if (!b->backxrb) {
+      /* single buffered */
+      return; 
+   }
+
+   if (b->db_mode) {
+      int yTop = b->mesa_buffer.Height - y - height;
+#ifdef FX
+      if (FXswapBuffers(b))
+         return;
+#endif
+      if (b->backxrb->ximage) {
+         /* Copy Ximage from host's memory to server's window */
+#if defined(USE_XSHM) && !defined(XFree86Server)
+         if (b->shm) {
+            /* XXX assuming width and height aren't too large! */
+            XShmPutImage( b->xm_visual->display, b->frontxrb->drawable,
+                          b->swapgc,
+                          b->backxrb->ximage, x, yTop,
+                          x, yTop, width, height, False );
+            /* wait for finished event??? */
+         }
+         else
+#endif
+         {
+            /* XXX assuming width and height aren't too large! */
+            XMesaPutImage( b->xm_visual->display, b->frontxrb->drawable,
+			   b->swapgc,
+			   b->backxrb->ximage, x, yTop,
+			   x, yTop, width, height );
+         }
+      }
+      else {
+         /* Copy pixmap to window on server */
+         XMesaCopyArea( b->xm_visual->display,
+			b->backxrb->pixmap,           /* source drawable */
+			b->frontxrb->drawable,        /* dest. drawable */
+			b->swapgc,
+			x, yTop, width, height,  /* source region */
+			x, yTop                  /* dest region */
+                      );
+      }
+   }
+}
+
+
+/*
+ * Return a pointer to the XMesa backbuffer Pixmap or XImage.  This function
+ * is a way to get "under the hood" of X/Mesa so one can manipulate the
+ * back buffer directly.
+ * Output:  pixmap - pointer to back buffer's Pixmap, or 0
+ *          ximage - pointer to back buffer's XImage, or NULL
+ * Return:  GL_TRUE = context is double buffered
+ *          GL_FALSE = context is single buffered
+ */
+#ifndef XFree86Server
+GLboolean XMesaGetBackBuffer( XMesaBuffer b,
+                              XMesaPixmap *pixmap,
+                              XMesaImage **ximage )
+{
+   if (b->db_mode) {
+      if (pixmap)
+         *pixmap = b->backxrb->pixmap;
+      if (ximage)
+         *ximage = b->backxrb->ximage;
+      return GL_TRUE;
+   }
+   else {
+      *pixmap = 0;
+      *ximage = NULL;
+      return GL_FALSE;
+   }
+}
+#endif /* XFree86Server */
+
+
+/*
+ * Return the depth buffer associated with an XMesaBuffer.
+ * Input:  b - the XMesa buffer handle
+ * Output:  width, height - size of buffer in pixels
+ *          bytesPerValue - bytes per depth value (2 or 4)
+ *          buffer - pointer to depth buffer values
+ * Return:  GL_TRUE or GL_FALSE to indicate success or failure.
+ */
+GLboolean XMesaGetDepthBuffer( XMesaBuffer b, GLint *width, GLint *height,
+                               GLint *bytesPerValue, void **buffer )
+{
+   struct gl_renderbuffer *rb
+      = b->mesa_buffer.Attachment[BUFFER_DEPTH].Renderbuffer;
+   if (!rb || !rb->Data) {
+      *width = 0;
+      *height = 0;
+      *bytesPerValue = 0;
+      *buffer = 0;
+      return GL_FALSE;
+   }
+   else {
+      *width = b->mesa_buffer.Width;
+      *height = b->mesa_buffer.Height;
+      *bytesPerValue = b->mesa_buffer.Visual.depthBits <= 16
+         ? sizeof(GLushort) : sizeof(GLuint);
+      *buffer = rb->Data;
+      return GL_TRUE;
+   }
+}
+
+
+void XMesaFlush( XMesaContext c )
+{
+   if (c && c->xm_visual) {
+#ifdef XFree86Server
+      /* NOT_NEEDED */
+#else
+      XSync( c->xm_visual->display, False );
+#endif
+   }
+}
+
+
+
+const char *XMesaGetString( XMesaContext c, int name )
+{
+   (void) c;
+   if (name==XMESA_VERSION) {
+      return "5.0";
+   }
+   else if (name==XMESA_EXTENSIONS) {
+      return "";
+   }
+   else {
+      return NULL;
+   }
+}
+
+
+
+XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, XMesaDrawable d )
+{
+   XMesaBuffer b;
+   for (b=XMesaBufferList; b; b=b->Next) {
+      if (b->frontxrb->drawable == d && b->display == dpy) {
+         return b;
+      }
+   }
+   return NULL;
+}
+
+
+/**
+ * Free/destroy all XMesaBuffers associated with given display.
+ */
+void xmesa_destroy_buffers_on_display(XMesaDisplay *dpy)
+{
+   XMesaBuffer b, next;
+   for (b = XMesaBufferList; b; b = next) {
+      next = b->Next;
+      if (b->display == dpy) {
+         xmesa_free_buffer(b);
+      }
+   }
+}
+
+
+/*
+ * Look for XMesaBuffers whose X window has been destroyed.
+ * Deallocate any such XMesaBuffers.
+ */
+void XMesaGarbageCollect( void )
+{
+   XMesaBuffer b, next;
+   for (b=XMesaBufferList; b; b=next) {
+      next = b->Next;
+      if (b->display && b->frontxrb->drawable && b->type == WINDOW) {
+#ifdef XFree86Server
+	 /* NOT_NEEDED */
+#else
+         XSync(b->display, False);
+         if (!window_exists( b->display, b->frontxrb->drawable )) {
+            /* found a dead window, free the ancillary info */
+            XMesaDestroyBuffer( b );
+         }
+#endif
+      }
+   }
+}
+
+
+unsigned long XMesaDitherColor( XMesaContext xmesa, GLint x, GLint y,
+                                GLfloat red, GLfloat green,
+                                GLfloat blue, GLfloat alpha )
+{
+   GLcontext *ctx = &xmesa->mesa;
+   GLint r = (GLint) (red   * 255.0F);
+   GLint g = (GLint) (green * 255.0F);
+   GLint b = (GLint) (blue  * 255.0F);
+   GLint a = (GLint) (alpha * 255.0F);
+
+   switch (xmesa->pixelformat) {
+      case PF_Index:
+         return 0;
+      case PF_Truecolor:
+         {
+            unsigned long p;
+            PACK_TRUECOLOR( p, r, g, b );
+            return p;
+         }
+      case PF_8A8B8G8R:
+         return PACK_8A8B8G8R( r, g, b, a );
+      case PF_8A8R8G8B:
+         return PACK_8A8R8G8B( r, g, b, a );
+      case PF_8R8G8B:
+         return PACK_8R8G8B( r, g, b );
+      case PF_5R6G5B:
+         return PACK_5R6G5B( r, g, b );
+      case PF_Dither:
+         {
+            DITHER_SETUP;
+            return DITHER( x, y, r, g, b );
+         }
+      case PF_1Bit:
+         /* 382 = (3*255)/2 */
+         return ((r+g+b) > 382) ^ xmesa->xm_visual->bitFlip;
+      case PF_HPCR:
+         return DITHER_HPCR(x, y, r, g, b);
+      case PF_Lookup:
+         {
+            LOOKUP_SETUP;
+            return LOOKUP( r, g, b );
+         }
+      case PF_Grayscale:
+         return GRAY_RGB( r, g, b );
+      case PF_Dither_5R6G5B:
+         /* fall through */
+      case PF_Dither_True:
+         {
+            unsigned long p;
+            PACK_TRUEDITHER(p, x, y, r, g, b);
+            return p;
+         }
+      default:
+         _mesa_problem(NULL, "Bad pixel format in XMesaDitherColor");
+   }
+   return 0;
+}
+
+
+/*
+ * This is typically called when the window size changes and we need
+ * to reallocate the buffer's back/depth/stencil/accum buffers.
+ */
+PUBLIC void
+XMesaResizeBuffers( XMesaBuffer b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   XMesaContext xmctx = XMESA_CONTEXT(ctx);
+   if (!xmctx)
+      return;
+   xmesa_check_and_update_buffer_size(xmctx, b);
+}
+
+
+static GLint
+xbuffer_to_renderbuffer(int buffer)
+{
+   assert(MAX_AUX_BUFFERS <= 4);
+
+   switch (buffer) {
+   case GLX_FRONT_LEFT_EXT:
+      return BUFFER_FRONT_LEFT;
+   case GLX_FRONT_RIGHT_EXT:
+      return BUFFER_FRONT_RIGHT;
+   case GLX_BACK_LEFT_EXT:
+      return BUFFER_BACK_LEFT;
+   case GLX_BACK_RIGHT_EXT:
+      return BUFFER_BACK_RIGHT;
+   case GLX_AUX0_EXT:
+      return BUFFER_AUX0;
+   case GLX_AUX1_EXT:
+   case GLX_AUX2_EXT:
+   case GLX_AUX3_EXT:
+   case GLX_AUX4_EXT:
+   case GLX_AUX5_EXT:
+   case GLX_AUX6_EXT:
+   case GLX_AUX7_EXT:
+   case GLX_AUX8_EXT:
+   case GLX_AUX9_EXT:
+   default:
+      /* BadValue error */
+      return -1;
+   }
+}
+
+
+PUBLIC void
+XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer,
+                  const int *attrib_list)
+{
+#if 0
+   GET_CURRENT_CONTEXT(ctx);
+   const GLuint unit = ctx->Texture.CurrentUnit;
+   struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+   struct gl_texture_object *texObj;
+#endif
+   struct gl_renderbuffer *rb;
+   struct xmesa_renderbuffer *xrb;
+   GLint b;
+   XMesaImage *img = NULL;
+   GLboolean freeImg = GL_FALSE;
+
+   b = xbuffer_to_renderbuffer(buffer);
+   if (b < 0)
+      return;
+
+   if (drawable->TextureFormat == GLX_TEXTURE_FORMAT_NONE_EXT)
+      return; /* BadMatch error */
+
+   rb = drawable->mesa_buffer.Attachment[b].Renderbuffer;
+   if (!rb) {
+      /* invalid buffer */
+      return;
+   }
+   xrb = xmesa_renderbuffer(rb);
+
+#if 0
+   switch (drawable->TextureTarget) {
+   case GLX_TEXTURE_1D_EXT:
+      texObj = texUnit->CurrentTex[TEXTURE_1D_INDEX];
+      break;
+   case GLX_TEXTURE_2D_EXT:
+      texObj = texUnit->CurrentTex[TEXTURE_2D_INDEX];
+      break;
+   case GLX_TEXTURE_RECTANGLE_EXT:
+      texObj = texUnit->CurrentTex[TEXTURE_RECT_INDEX];
+      break;
+   default:
+      return; /* BadMatch error */
+   }
+#endif
+
+   /*
+    * The following is a quick and simple way to implement
+    * BindTexImage.  The better way is to write some new FetchTexel()
+    * functions which would extract texels from XImages.  We'd still
+    * need to use GetImage when texturing from a Pixmap (front buffer)
+    * but texturing from a back buffer (XImage) would avoid an image
+    * copy.
+    */
+
+   /* get XImage */
+   if (xrb->pixmap) {
+      img = XMesaGetImage(dpy, xrb->pixmap, 0, 0, rb->Width, rb->Height, ~0L,
+			  ZPixmap);
+      freeImg = GL_TRUE;
+   }
+   else if (xrb->ximage) {
+      img = xrb->ximage;
+   }
+
+   /* store the XImage as a new texture image */
+   if (img) {
+      GLenum format, type, intFormat;
+      if (img->bits_per_pixel == 32) {
+         format = GL_BGRA;
+         type = GL_UNSIGNED_BYTE;
+         intFormat = GL_RGBA;
+      }
+      else if (img->bits_per_pixel == 24) {
+         format = GL_BGR;
+         type = GL_UNSIGNED_BYTE;
+         intFormat = GL_RGB;
+      }
+      else if (img->bits_per_pixel == 16) {
+         format = GL_BGR;
+         type = GL_UNSIGNED_SHORT_5_6_5;
+         intFormat = GL_RGB;
+      }
+      else {
+         _mesa_problem(NULL, "Unexpected XImage format in XMesaBindTexImage");
+         return;
+      }
+      if (drawable->TextureFormat == GLX_TEXTURE_FORMAT_RGBA_EXT) {
+         intFormat = GL_RGBA;
+      }
+      else if (drawable->TextureFormat == GLX_TEXTURE_FORMAT_RGB_EXT) {
+         intFormat = GL_RGB;
+      }
+
+      _mesa_TexImage2D(GL_TEXTURE_2D, 0, intFormat, rb->Width, rb->Height, 0,
+                       format, type, img->data);
+
+      if (freeImg) {
+	 XMesaDestroyImage(img);
+      }
+   }
+}
+
+
+
+PUBLIC void
+XMesaReleaseTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer)
+{
+   const GLint b = xbuffer_to_renderbuffer(buffer);
+   if (b < 0)
+      return;
+
+   /* no-op for now */
+}
+
diff --git a/src/mesa/drivers/x11/xm_buffer.c b/src/mesa/drivers/x11/xm_buffer.c
new file mode 100644
index 0000000000..e47949750a
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_buffer.c
@@ -0,0 +1,414 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.2
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file xm_buffer.h
+ * Framebuffer and renderbuffer-related functions.
+ */
+
+
+#include "glxheader.h"
+#include "xmesaP.h"
+#include "main/imports.h"
+#include "main/formats.h"
+#include "main/framebuffer.h"
+#include "main/renderbuffer.h"
+
+
+#if defined(USE_XSHM) && !defined(XFree86Server)
+static volatile int mesaXErrorFlag = 0;
+
+/**
+ * Catches potential Xlib errors.
+ */
+static int
+mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event)
+{
+   (void) dpy;
+   (void) event;
+   mesaXErrorFlag = 1;
+   return 0;
+}
+
+/**
+ * Allocate a shared memory XImage back buffer for the given XMesaBuffer.
+ * Return:  GL_TRUE if success, GL_FALSE if error
+ */
+static GLboolean
+alloc_back_shm_ximage(XMesaBuffer b, GLuint width, GLuint height)
+{
+   /*
+    * We have to do a _lot_ of error checking here to be sure we can
+    * really use the XSHM extension.  It seems different servers trigger
+    * errors at different points if the extension won't work.  Therefore
+    * we have to be very careful...
+    */
+   GC gc;
+   int (*old_handler)(XMesaDisplay *, XErrorEvent *);
+
+   if (width == 0 || height == 0) {
+      /* this will be true the first time we're called on 'b' */
+      return GL_FALSE;
+   }
+
+   b->backxrb->ximage = XShmCreateImage(b->xm_visual->display,
+                                        b->xm_visual->visinfo->visual,
+                                        b->xm_visual->visinfo->depth,
+                                        ZPixmap, NULL, &b->shminfo,
+                                        width, height);
+   if (b->backxrb->ximage == NULL) {
+      _mesa_warning(NULL, "alloc_back_buffer: Shared memory error (XShmCreateImage), disabling.\n");
+      b->shm = 0;
+      return GL_FALSE;
+   }
+
+   b->shminfo.shmid = shmget(IPC_PRIVATE, b->backxrb->ximage->bytes_per_line
+			     * b->backxrb->ximage->height, IPC_CREAT|0777);
+   if (b->shminfo.shmid < 0) {
+      _mesa_warning(NULL, "shmget failed while allocating back buffer.\n");
+      XDestroyImage(b->backxrb->ximage);
+      b->backxrb->ximage = NULL;
+      _mesa_warning(NULL, "alloc_back_buffer: Shared memory error (shmget), disabling.\n");
+      b->shm = 0;
+      return GL_FALSE;
+   }
+
+   b->shminfo.shmaddr = b->backxrb->ximage->data
+                      = (char*)shmat(b->shminfo.shmid, 0, 0);
+   if (b->shminfo.shmaddr == (char *) -1) {
+      _mesa_warning(NULL, "shmat() failed while allocating back buffer.\n");
+      XDestroyImage(b->backxrb->ximage);
+      shmctl(b->shminfo.shmid, IPC_RMID, 0);
+      b->backxrb->ximage = NULL;
+      _mesa_warning(NULL, "alloc_back_buffer: Shared memory error (shmat), disabling.\n");
+      b->shm = 0;
+      return GL_FALSE;
+   }
+
+   b->shminfo.readOnly = False;
+   mesaXErrorFlag = 0;
+   old_handler = XSetErrorHandler(mesaHandleXError);
+   /* This may trigger the X protocol error we're ready to catch: */
+   XShmAttach(b->xm_visual->display, &b->shminfo);
+   XSync(b->xm_visual->display, False);
+
+   if (mesaXErrorFlag) {
+      /* we are on a remote display, this error is normal, don't print it */
+      XFlush(b->xm_visual->display);
+      mesaXErrorFlag = 0;
+      XDestroyImage(b->backxrb->ximage);
+      shmdt(b->shminfo.shmaddr);
+      shmctl(b->shminfo.shmid, IPC_RMID, 0);
+      b->backxrb->ximage = NULL;
+      b->shm = 0;
+      (void) XSetErrorHandler(old_handler);
+      return GL_FALSE;
+   }
+
+   shmctl(b->shminfo.shmid, IPC_RMID, 0); /* nobody else needs it */
+
+   /* Finally, try an XShmPutImage to be really sure the extension works */
+   gc = XCreateGC(b->xm_visual->display, b->frontxrb->drawable, 0, NULL);
+   XShmPutImage(b->xm_visual->display, b->frontxrb->drawable, gc,
+		 b->backxrb->ximage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False);
+   XSync(b->xm_visual->display, False);
+   XFreeGC(b->xm_visual->display, gc);
+   (void) XSetErrorHandler(old_handler);
+   if (mesaXErrorFlag) {
+      XFlush(b->xm_visual->display);
+      mesaXErrorFlag = 0;
+      XDestroyImage(b->backxrb->ximage);
+      shmdt(b->shminfo.shmaddr);
+      shmctl(b->shminfo.shmid, IPC_RMID, 0);
+      b->backxrb->ximage = NULL;
+      b->shm = 0;
+      return GL_FALSE;
+   }
+
+   return GL_TRUE;
+}
+#else
+static GLboolean
+alloc_back_shm_ximage(XMesaBuffer b, GLuint width, GLuint height)
+{
+   /* Can't compile XSHM support */
+   return GL_FALSE;
+}
+#endif
+
+
+
+/**
+ * Setup an off-screen pixmap or Ximage to use as the back buffer.
+ * Input:  b - the X/Mesa buffer
+ */
+static void
+alloc_back_buffer(XMesaBuffer b, GLuint width, GLuint height)
+{
+   if (b->db_mode == BACK_XIMAGE) {
+      /* Deallocate the old backxrb->ximage, if any */
+      if (b->backxrb->ximage) {
+#if defined(USE_XSHM) && !defined(XFree86Server)
+	 if (b->shm) {
+	    XShmDetach(b->xm_visual->display, &b->shminfo);
+	    XDestroyImage(b->backxrb->ximage);
+	    shmdt(b->shminfo.shmaddr);
+	 }
+	 else
+#endif
+	   XMesaDestroyImage(b->backxrb->ximage);
+	 b->backxrb->ximage = NULL;
+      }
+
+      if (width == 0 || height == 0)
+         return;
+
+      /* Allocate new back buffer */
+      if (b->shm == 0 || !alloc_back_shm_ximage(b, width, height)) {
+	 /* Allocate a regular XImage for the back buffer. */
+#ifdef XFree86Server
+	 b->backxrb->ximage = XMesaCreateImage(b->xm_visual->BitsPerPixel,
+                                               width, height, NULL);
+#else
+	 b->backxrb->ximage = XCreateImage(b->xm_visual->display,
+                                      b->xm_visual->visinfo->visual,
+                                      GET_VISUAL_DEPTH(b->xm_visual),
+				      ZPixmap, 0,   /* format, offset */
+				      NULL,
+                                      width, height,
+				      8, 0);  /* pad, bytes_per_line */
+#endif
+	 if (!b->backxrb->ximage) {
+	    _mesa_warning(NULL, "alloc_back_buffer: XCreateImage failed.\n");
+            return;
+	 }
+         b->backxrb->ximage->data = (char *) MALLOC(b->backxrb->ximage->height
+                                        * b->backxrb->ximage->bytes_per_line);
+         if (!b->backxrb->ximage->data) {
+            _mesa_warning(NULL, "alloc_back_buffer: MALLOC failed.\n");
+            XMesaDestroyImage(b->backxrb->ximage);
+            b->backxrb->ximage = NULL;
+         }
+      }
+      b->backxrb->pixmap = None;
+   }
+   else if (b->db_mode == BACK_PIXMAP) {
+      /* Free the old back pixmap */
+      if (b->backxrb->pixmap) {
+         XMesaFreePixmap(b->xm_visual->display, b->backxrb->pixmap);
+         b->backxrb->pixmap = 0;
+      }
+
+      if (width > 0 && height > 0) {
+         /* Allocate new back pixmap */
+         b->backxrb->pixmap = XMesaCreatePixmap(b->xm_visual->display,
+                                                b->frontxrb->drawable,
+                                                width, height,
+                                                GET_VISUAL_DEPTH(b->xm_visual));
+      }
+
+      b->backxrb->ximage = NULL;
+      b->backxrb->drawable = b->backxrb->pixmap;
+   }
+}
+
+
+static void
+xmesa_delete_renderbuffer(struct gl_renderbuffer *rb)
+{
+   /* XXX Note: the ximage or Pixmap attached to this renderbuffer
+    * should probably get freed here, but that's currently done in
+    * XMesaDestroyBuffer().
+    */
+   free(rb);
+}
+
+
+/**
+ * Reallocate renderbuffer storage for front color buffer.
+ * Called via gl_renderbuffer::AllocStorage()
+ */
+static GLboolean
+xmesa_alloc_front_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+                          GLenum internalFormat, GLuint width, GLuint height)
+{
+   struct xmesa_renderbuffer *xrb = xmesa_renderbuffer(rb);
+
+   /* just clear these to be sure we don't accidentally use them */
+   xrb->origin1 = NULL;
+   xrb->origin2 = NULL;
+   xrb->origin3 = NULL;
+   xrb->origin4 = NULL;
+
+   /* for the FLIP macro: */
+   xrb->bottom = height - 1;
+
+   rb->Width = width;
+   rb->Height = height;
+   rb->InternalFormat = internalFormat;
+
+   return GL_TRUE;
+}
+
+
+/**
+ * Reallocate renderbuffer storage for back color buffer.
+ * Called via gl_renderbuffer::AllocStorage()
+ */
+static GLboolean
+xmesa_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb,
+                         GLenum internalFormat, GLuint width, GLuint height)
+{
+   struct xmesa_renderbuffer *xrb = xmesa_renderbuffer(rb);
+
+   /* reallocate the back buffer XImage or Pixmap */
+   assert(xrb->Parent);
+   alloc_back_buffer(xrb->Parent, width, height);
+
+   /* same as front buffer */
+   /* XXX why is this here? */
+   (void) xmesa_alloc_front_storage(ctx, rb, internalFormat, width, height);
+
+   /* plus... */
+   if (xrb->ximage) {
+      /* Needed by PIXELADDR1 macro */
+      xrb->width1 = xrb->ximage->bytes_per_line;
+      xrb->origin1 = (GLubyte *) xrb->ximage->data + xrb->width1 * (height - 1);
+
+      /* Needed by PIXELADDR2 macro */
+      xrb->width2 = xrb->ximage->bytes_per_line / 2;
+      xrb->origin2 = (GLushort *) xrb->ximage->data + xrb->width2 * (height - 1);
+
+      /* Needed by PIXELADDR3 macro */
+      xrb->width3 = xrb->ximage->bytes_per_line;
+      xrb->origin3 = (GLubyte *) xrb->ximage->data + xrb->width3 * (height - 1);
+
+      /* Needed by PIXELADDR4 macro */
+      xrb->width4 = xrb->ximage->width;
+      xrb->origin4 = (GLuint *) xrb->ximage->data + xrb->width4 * (height - 1);
+   }
+   else {
+      /* out of memory or buffer size is 0 x 0 */
+      xrb->width1 = xrb->width2 = xrb->width3 = xrb->width4 = 0;
+      xrb->origin1 = NULL;
+      xrb->origin2 = NULL;
+      xrb->origin3 = NULL;
+      xrb->origin4 = NULL;
+   }
+
+   return GL_TRUE;
+}
+
+
+struct xmesa_renderbuffer *
+xmesa_new_renderbuffer(GLcontext *ctx, GLuint name, const GLvisual *visual,
+                       GLboolean backBuffer)
+{
+   struct xmesa_renderbuffer *xrb = CALLOC_STRUCT(xmesa_renderbuffer);
+   if (xrb) {
+      GLuint name = 0;
+      _mesa_init_renderbuffer(&xrb->Base, name);
+
+      xrb->Base.Delete = xmesa_delete_renderbuffer;
+      if (backBuffer)
+         xrb->Base.AllocStorage = xmesa_alloc_back_storage;
+      else
+         xrb->Base.AllocStorage = xmesa_alloc_front_storage;
+
+      xrb->Base.InternalFormat = GL_RGBA;
+      xrb->Base.Format = MESA_FORMAT_RGBA8888;
+      xrb->Base._BaseFormat = GL_RGBA;
+      xrb->Base.DataType = GL_UNSIGNED_BYTE;
+      /* only need to set Red/Green/EtcBits fields for user-created RBs */
+   }
+   return xrb;
+}
+
+
+/**
+ * Called via gl_framebuffer::Delete() method when this buffer
+ * is _really_ being deleted.
+ */
+void
+xmesa_delete_framebuffer(struct gl_framebuffer *fb)
+{
+   XMesaBuffer b = XMESA_BUFFER(fb);
+
+   if (b->num_alloced > 0) {
+      /* If no other buffer uses this X colormap then free the colors. */
+      if (!xmesa_find_buffer(b->display, b->cmap, b)) {
+#ifdef XFree86Server
+         int client = 0;
+         if (b->frontxrb->drawable)
+            client = CLIENT_ID(b->frontxrb->drawable->id);
+         (void)FreeColors(b->cmap, client,
+                          b->num_alloced, b->alloced_colors, 0);
+#else
+         XFreeColors(b->display, b->cmap,
+                     b->alloced_colors, b->num_alloced, 0);
+#endif
+      }
+   }
+
+   if (b->gc)
+      XMesaFreeGC(b->display, b->gc);
+   if (b->cleargc)
+      XMesaFreeGC(b->display, b->cleargc);
+   if (b->swapgc)
+      XMesaFreeGC(b->display, b->swapgc);
+
+   if (fb->Visual.doubleBufferMode) {
+      /* free back ximage/pixmap/shmregion */
+      if (b->backxrb->ximage) {
+#if defined(USE_XSHM) && !defined(XFree86Server)
+         if (b->shm) {
+            XShmDetach( b->display, &b->shminfo );
+            XDestroyImage( b->backxrb->ximage );
+            shmdt( b->shminfo.shmaddr );
+         }
+         else
+#endif
+            XMesaDestroyImage( b->backxrb->ximage );
+         b->backxrb->ximage = NULL;
+      }
+      if (b->backxrb->pixmap) {
+         XMesaFreePixmap( b->display, b->backxrb->pixmap );
+         if (b->xm_visual->hpcr_clear_flag) {
+            XMesaFreePixmap( b->display,
+                             b->xm_visual->hpcr_clear_pixmap );
+            XMesaDestroyImage( b->xm_visual->hpcr_clear_ximage );
+         }
+      }
+   }
+
+   if (b->rowimage) {
+      free( b->rowimage->data );
+      b->rowimage->data = NULL;
+      XMesaDestroyImage( b->rowimage );
+   }
+
+   _mesa_free_framebuffer_data(fb);
+   free(fb);
+}
diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c
new file mode 100644
index 0000000000..5edafb890b
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_dd.c
@@ -0,0 +1,1190 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5.2
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * \file xm_dd.h
+ * General device driver functions for Xlib driver.
+ */
+
+#include "glxheader.h"
+#include "main/bufferobj.h"
+#include "main/buffers.h"
+#include "main/context.h"
+#include "main/colormac.h"
+#include "main/depth.h"
+#include "main/drawpix.h"
+#include "main/extensions.h"
+#include "main/framebuffer.h"
+#include "main/macros.h"
+#include "main/image.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/state.h"
+#include "main/texobj.h"
+#include "main/teximage.h"
+#include "main/texstore.h"
+#include "main/texformat.h"
+#include "swrast/swrast.h"
+#include "swrast/s_context.h"
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+#include "drivers/common/meta.h"
+#include "xmesaP.h"
+
+
+
+/*
+ * Dithering kernels and lookup tables.
+ */
+
+const int xmesa_kernel8[DITH_DY * DITH_DX] = {
+    0 * MAXC,  8 * MAXC,  2 * MAXC, 10 * MAXC,
+   12 * MAXC,  4 * MAXC, 14 * MAXC,  6 * MAXC,
+    3 * MAXC, 11 * MAXC,  1 * MAXC,  9 * MAXC,
+   15 * MAXC,  7 * MAXC, 13 * MAXC,  5 * MAXC,
+};
+
+const short xmesa_HPCR_DRGB[3][2][16] = {
+   {
+      { 16, -4,  1,-11, 14, -6,  3, -9, 15, -5,  2,-10, 13, -7,  4, -8},
+      {-15,  5,  0, 12,-13,  7, -2, 10,-14,  6, -1, 11,-12,  8, -3,  9}
+   },
+   {
+      {-11, 15, -7,  3, -8, 14, -4,  2,-10, 16, -6,  4, -9, 13, -5,  1},
+      { 12,-14,  8, -2,  9,-13,  5, -1, 11,-15,  7, -3, 10,-12,  6,  0}
+   },
+   {
+      {  6,-18, 26,-14,  2,-22, 30,-10,  8,-16, 28,-12,  4,-20, 32, -8},
+      { -4, 20,-24, 16,  0, 24,-28, 12, -6, 18,-26, 14, -2, 22,-30, 10}
+   }
+};
+
+const int xmesa_kernel1[16] = {
+   0*47,  9*47,  4*47, 12*47,     /* 47 = (255*3)/16 */
+   6*47,  2*47, 14*47,  8*47,
+  10*47,  1*47,  5*47, 11*47,
+   7*47, 13*47,  3*47, 15*47
+};
+
+
+static void
+finish_or_flush( GLcontext *ctx )
+{
+#ifdef XFree86Server
+      /* NOT_NEEDED */
+#else
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   if (xmesa) {
+      _glthread_LOCK_MUTEX(_xmesa_lock);
+      XSync( xmesa->display, False );
+      _glthread_UNLOCK_MUTEX(_xmesa_lock);
+   }
+#endif
+}
+
+
+static void
+clear_color( GLcontext *ctx, const GLfloat color[4] )
+{
+   if (ctx->DrawBuffer->Name == 0) {
+      const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+      XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+
+      CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[0], color[0]);
+      CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[1], color[1]);
+      CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[2], color[2]);
+      CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[3], color[3]);
+      xmesa->clearpixel = xmesa_color_to_pixel( ctx,
+                                                xmesa->clearcolor[0],
+                                                xmesa->clearcolor[1],
+                                                xmesa->clearcolor[2],
+                                                xmesa->clearcolor[3],
+                                                xmesa->xm_visual->undithered_pf );
+      _glthread_LOCK_MUTEX(_xmesa_lock);
+      XMesaSetForeground( xmesa->display, xmbuf->cleargc,
+                          xmesa->clearpixel );
+      _glthread_UNLOCK_MUTEX(_xmesa_lock);
+   }
+}
+
+
+
+/* Implements glColorMask() */
+static void
+color_mask(GLcontext *ctx,
+           GLboolean rmask, GLboolean gmask, GLboolean bmask, GLboolean amask)
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaBuffer xmbuf;
+   const int xclass = xmesa->xm_visual->mesa_visual.visualType;
+   (void) amask;
+
+   if (ctx->DrawBuffer->Name != 0)
+      return;
+
+   xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+
+   if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) {
+      unsigned long m;
+      if (rmask && gmask && bmask) {
+         m = ((unsigned long)~0L);
+      }
+      else {
+         m = 0;
+         if (rmask)   m |= GET_REDMASK(xmesa->xm_visual);
+         if (gmask)   m |= GET_GREENMASK(xmesa->xm_visual);
+         if (bmask)   m |= GET_BLUEMASK(xmesa->xm_visual);
+      }
+      XMesaSetPlaneMask( xmesa->display, xmbuf->cleargc, m );
+   }
+}
+
+
+
+/**********************************************************************/
+/*** glClear implementations                                        ***/
+/**********************************************************************/
+
+
+/**
+ * Clear the front or back color buffer, if it's implemented with a pixmap.
+ */
+static void
+clear_pixmap(GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+             GLint x, GLint y, GLint width, GLint height)
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+
+   assert(xmbuf);
+   assert(xrb->pixmap);
+   assert(xmesa);
+   assert(xmesa->display);
+   assert(xrb->pixmap);
+   assert(xmbuf->cleargc);
+
+   XMesaFillRectangle( xmesa->display, xrb->pixmap, xmbuf->cleargc,
+                       x, xrb->Base.Height - y - height,
+                       width, height );
+}
+
+
+static void
+clear_8bit_ximage( GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+                   GLint x, GLint y, GLint width, GLint height )
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GLint i;
+   for (i = 0; i < height; i++) {
+      GLubyte *ptr = PIXEL_ADDR1(xrb, x, y + i);
+      memset( ptr, xmesa->clearpixel, width );
+   }
+}
+
+
+static void
+clear_HPCR_ximage( GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+                   GLint x, GLint y, GLint width, GLint height )
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GLint i;
+   for (i = y; i < y + height; i++) {
+      GLubyte *ptr = PIXEL_ADDR1( xrb, x, i );
+      int j;
+      const GLubyte *sptr = xmesa->xm_visual->hpcr_clear_ximage_pattern[0];
+      if (i & 1) {
+         sptr += 16;
+      }
+      for (j = x; j < x + width; j++) {
+         *ptr = sptr[j&15];
+         ptr++;
+      }
+   }
+}
+
+
+static void
+clear_16bit_ximage( GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+                    GLint x, GLint y, GLint width, GLint height)
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GLuint pixel = (GLuint) xmesa->clearpixel;
+   GLint i, j;
+
+   if (xmesa->swapbytes) {
+      pixel = ((pixel >> 8) & 0x00ff) | ((pixel << 8) & 0xff00);
+   }
+
+   for (j = 0; j < height; j++) {
+      GLushort *ptr2 = PIXEL_ADDR2(xrb, x, y + j);
+      for (i = 0; i < width; i++) {
+         ptr2[i] = pixel;
+      }
+   }
+}
+
+
+/* Optimized code provided by Nozomi Ytow <noz@xfree86.org> */
+static void
+clear_24bit_ximage(GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+                   GLint x, GLint y, GLint width, GLint height)
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   const GLubyte r = xmesa->clearcolor[0];
+   const GLubyte g = xmesa->clearcolor[1];
+   const GLubyte b = xmesa->clearcolor[2];
+
+   if (r == g && g == b) {
+      /* same value for all three components (gray) */
+      GLint j;
+      for (j = 0; j < height; j++) {
+         bgr_t *ptr3 = PIXEL_ADDR3(xrb, x, y + j);
+         memset(ptr3, r, 3 * width);
+      }
+   }
+   else {
+      /* non-gray clear color */
+      GLint i, j;
+      for (j = 0; j < height; j++) {
+         bgr_t *ptr3 = PIXEL_ADDR3(xrb, x, y + j);
+         for (i = 0; i < width; i++) {
+            ptr3->r = r;
+            ptr3->g = g;
+            ptr3->b = b;
+            ptr3++;
+         }
+      }
+   }
+}
+
+
+static void
+clear_32bit_ximage(GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+                   GLint x, GLint y, GLint width, GLint height)
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLuint pixel = (GLuint) xmesa->clearpixel;
+
+   if (!xrb->ximage)
+      return;
+
+   if (xmesa->swapbytes) {
+      pixel = ((pixel >> 24) & 0x000000ff)
+            | ((pixel >> 8)  & 0x0000ff00)
+            | ((pixel << 8)  & 0x00ff0000)
+            | ((pixel << 24) & 0xff000000);
+   }
+
+   if (width == xrb->Base.Width && height == xrb->Base.Height) {
+      /* clearing whole buffer */
+      const GLuint n = xrb->Base.Width * xrb->Base.Height;
+      GLuint *ptr4 = (GLuint *) xrb->ximage->data;
+      if (pixel == 0) {
+         /* common case */
+         memset(ptr4, pixel, 4 * n);
+      }
+      else {
+         GLuint i;
+         for (i = 0; i < n; i++)
+            ptr4[i] = pixel;
+      }
+   }
+   else {
+      /* clearing scissored region */
+      GLint i, j;
+      for (j = 0; j < height; j++) {
+         GLuint *ptr4 = PIXEL_ADDR4(xrb, x, y + j);
+         for (i = 0; i < width; i++) {
+            ptr4[i] = pixel;
+         }
+      }
+   }
+}
+
+
+static void
+clear_nbit_ximage(GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+                  GLint x, GLint y, GLint width, GLint height)
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaImage *img = xrb->ximage;
+   GLint i, j;
+
+   /* TODO: optimize this */
+   y = YFLIP(xrb, y);
+   for (j = 0; j < height; j++) {
+      for (i = 0; i < width; i++) {
+         XMesaPutPixel(img, x+i, y-j, xmesa->clearpixel);
+      }
+   }
+}
+
+
+
+static void
+clear_buffers(GLcontext *ctx, GLbitfield buffers)
+{
+   if (ctx->DrawBuffer->Name == 0) {
+      /* this is a window system framebuffer */
+      const GLuint *colorMask = (GLuint *) &ctx->Color.ColorMask[0];
+      XMesaBuffer b = XMESA_BUFFER(ctx->DrawBuffer);
+      const GLint x = ctx->DrawBuffer->_Xmin;
+      const GLint y = ctx->DrawBuffer->_Ymin;
+      const GLint width = ctx->DrawBuffer->_Xmax - x;
+      const GLint height = ctx->DrawBuffer->_Ymax - y;
+
+      /* we can't handle color or index masking */
+      if (*colorMask == 0xffffffff && ctx->Color.IndexMask == 0xffffffff) {
+         if (buffers & BUFFER_BIT_FRONT_LEFT) {
+            /* clear front color buffer */
+            struct gl_renderbuffer *frontRb
+               = ctx->DrawBuffer->Attachment[BUFFER_FRONT_LEFT].Renderbuffer;
+            if (b->frontxrb == xmesa_renderbuffer(frontRb)) {
+               /* renderbuffer is not wrapped - great! */
+               b->frontxrb->clearFunc(ctx, b->frontxrb, x, y, width, height);
+               buffers &= ~BUFFER_BIT_FRONT_LEFT;
+            }
+            else {
+               /* we can't directly clear an alpha-wrapped color buffer */
+            }
+         }
+         if (buffers & BUFFER_BIT_BACK_LEFT) {
+            /* clear back color buffer */
+            struct gl_renderbuffer *backRb
+               = ctx->DrawBuffer->Attachment[BUFFER_BACK_LEFT].Renderbuffer;
+            if (b->backxrb == xmesa_renderbuffer(backRb)) {
+               /* renderbuffer is not wrapped - great! */
+               b->backxrb->clearFunc(ctx, b->backxrb, x, y, width, height);
+               buffers &= ~BUFFER_BIT_BACK_LEFT;
+            }
+         }
+      }
+   }
+   if (buffers)
+      _swrast_Clear(ctx, buffers);
+}
+
+
+#ifndef XFree86Server
+/* XXX these functions haven't been tested in the Xserver environment */
+
+
+/**
+ * Check if we can do an optimized glDrawPixels into an 8R8G8B visual.
+ */
+static GLboolean
+can_do_DrawPixels_8R8G8B(GLcontext *ctx, GLenum format, GLenum type)
+{
+   if (format == GL_BGRA &&
+       type == GL_UNSIGNED_BYTE &&
+       ctx->DrawBuffer &&
+       ctx->DrawBuffer->Name == 0 &&
+       ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
+       ctx->Pixel.ZoomY == 1.0 &&
+       ctx->_ImageTransferState == 0 /* no color tables, scale/bias, etc */) {
+      const SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+      if (swrast->NewState)
+         _swrast_validate_derived( ctx );
+      
+      if ((swrast->_RasterMask & ~CLIP_BIT) == 0) /* no blend, z-test, etc */ {
+         struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+         if (rb) {
+            struct xmesa_renderbuffer *xrb = xmesa_renderbuffer(rb->Wrapped);
+            if (xrb &&
+                xrb->pixmap && /* drawing to pixmap or window */
+                _mesa_get_format_bits(xrb->Base.Format, GL_ALPHA_BITS) == 0) {
+               return GL_TRUE;
+            }
+         }
+      }
+   }
+   return GL_FALSE;
+}
+
+
+/**
+ * This function implements glDrawPixels() with an XPutImage call when
+ * drawing to the front buffer (X Window drawable).
+ * The image format must be GL_BGRA to match the PF_8R8G8B pixel format.
+ */
+static void
+xmesa_DrawPixels_8R8G8B( GLcontext *ctx,
+                         GLint x, GLint y, GLsizei width, GLsizei height,
+                         GLenum format, GLenum type,
+                         const struct gl_pixelstore_attrib *unpack,
+                         const GLvoid *pixels )
+{
+   if (can_do_DrawPixels_8R8G8B(ctx, format, type)) {
+      const SWcontext *swrast = SWRAST_CONTEXT( ctx );
+      struct gl_pixelstore_attrib clippedUnpack = *unpack;
+      int dstX = x;
+      int dstY = y;
+      int w = width;
+      int h = height;
+
+      if (swrast->NewState)
+         _swrast_validate_derived( ctx );
+
+      if (unpack->BufferObj->Name) {
+         /* unpack from PBO */
+         GLubyte *buf;
+         if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                        format, type, pixels)) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glDrawPixels(invalid PBO access)");
+            return;
+         }
+         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
+                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
+                                                 GL_READ_ONLY_ARB,
+                                                 unpack->BufferObj);
+         if (!buf) {
+            /* buffer is already mapped - that's an error */
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glDrawPixels(PBO is mapped)");
+            return;
+         }
+         pixels = ADD_POINTERS(buf, pixels);
+      }
+
+      if (_mesa_clip_drawpixels(ctx, &dstX, &dstY, &w, &h, &clippedUnpack)) {
+         const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+         XMesaDisplay *dpy = xmesa->xm_visual->display;
+         XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+         const XMesaGC gc = xmbuf->cleargc;  /* effected by glColorMask */
+         struct xmesa_renderbuffer *xrb
+            = xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped);
+         const int srcX = clippedUnpack.SkipPixels;
+         const int srcY = clippedUnpack.SkipRows;
+         const int rowLength = clippedUnpack.RowLength;
+         XMesaImage ximage;
+
+         ASSERT(xmesa->xm_visual->dithered_pf == PF_8R8G8B);
+         ASSERT(xmesa->xm_visual->undithered_pf == PF_8R8G8B);
+         ASSERT(dpy);
+         ASSERT(gc);
+
+         /* This is a little tricky since all coordinates up to now have
+          * been in the OpenGL bottom-to-top orientation.  X is top-to-bottom
+          * so we have to carefully compute the Y coordinates/addresses here.
+          */
+         memset(&ximage, 0, sizeof(XMesaImage));
+         ximage.width = width;
+         ximage.height = height;
+         ximage.format = ZPixmap;
+         ximage.data = (char *) pixels
+            + ((srcY + h - 1) * rowLength + srcX) * 4;
+         ximage.byte_order = LSBFirst;
+         ximage.bitmap_unit = 32;
+         ximage.bitmap_bit_order = LSBFirst;
+         ximage.bitmap_pad = 32;
+         ximage.depth = 32;
+         ximage.bits_per_pixel = 32;
+         ximage.bytes_per_line = -rowLength * 4; /* negative to flip image */
+         /* it seems we don't need to set the ximage.red/green/blue_mask fields */
+         /* flip Y axis for dest position */
+         dstY = YFLIP(xrb, dstY) - h + 1;
+         XPutImage(dpy, xrb->pixmap, gc, &ximage, 0, 0, dstX, dstY, w, h);
+      }
+
+      if (unpack->BufferObj->Name) {
+         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                                 unpack->BufferObj);
+      }
+   }
+   else {
+      /* software fallback */
+      _swrast_DrawPixels(ctx, x, y, width, height,
+                         format, type, unpack, pixels);
+   }
+}
+
+
+
+/**
+ * Check if we can do an optimized glDrawPixels into an 5R6G5B visual.
+ */
+static GLboolean
+can_do_DrawPixels_5R6G5B(GLcontext *ctx, GLenum format, GLenum type)
+{
+   if (format == GL_RGB &&
+       type == GL_UNSIGNED_SHORT_5_6_5 &&
+       !ctx->Color.DitherFlag &&  /* no dithering */
+       ctx->DrawBuffer &&
+       ctx->DrawBuffer->Name == 0 &&
+       ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
+       ctx->Pixel.ZoomY == 1.0 &&
+       ctx->_ImageTransferState == 0 /* no color tables, scale/bias, etc */) {
+      const SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+      if (swrast->NewState)
+         _swrast_validate_derived( ctx );
+      
+      if ((swrast->_RasterMask & ~CLIP_BIT) == 0) /* no blend, z-test, etc */ {
+         struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
+         if (rb) {
+            struct xmesa_renderbuffer *xrb = xmesa_renderbuffer(rb->Wrapped);
+            if (xrb &&
+                xrb->pixmap && /* drawing to pixmap or window */
+                _mesa_get_format_bits(xrb->Base.Format, GL_ALPHA_BITS) == 0) {
+               return GL_TRUE;
+            }
+         }
+      }
+   }
+   return GL_FALSE;
+}
+
+
+/**
+ * This function implements glDrawPixels() with an XPutImage call when
+ * drawing to the front buffer (X Window drawable).  The image format
+ * must be GL_RGB and image type must be GL_UNSIGNED_SHORT_5_6_5 to
+ * match the PF_5R6G5B pixel format.
+ */
+static void
+xmesa_DrawPixels_5R6G5B( GLcontext *ctx,
+                         GLint x, GLint y, GLsizei width, GLsizei height,
+                         GLenum format, GLenum type,
+                         const struct gl_pixelstore_attrib *unpack,
+                         const GLvoid *pixels )
+{
+   if (can_do_DrawPixels_5R6G5B(ctx, format, type)) {
+      const SWcontext *swrast = SWRAST_CONTEXT( ctx );
+      struct gl_pixelstore_attrib clippedUnpack = *unpack;
+      int dstX = x;
+      int dstY = y;
+      int w = width;
+      int h = height;
+
+      if (swrast->NewState)
+         _swrast_validate_derived( ctx );
+      
+      if (unpack->BufferObj->Name) {
+         /* unpack from PBO */
+         GLubyte *buf;
+         if (!_mesa_validate_pbo_access(2, unpack, width, height, 1,
+                                        format, type, pixels)) {
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glDrawPixels(invalid PBO access)");
+            return;
+         }
+         buf = (GLubyte *) ctx->Driver.MapBuffer(ctx,
+                                                 GL_PIXEL_UNPACK_BUFFER_EXT,
+                                                 GL_READ_ONLY_ARB,
+                                                 unpack->BufferObj);
+         if (!buf) {
+            /* buffer is already mapped - that's an error */
+            _mesa_error(ctx, GL_INVALID_OPERATION,
+                        "glDrawPixels(PBO is mapped)");
+            return;
+         }
+         pixels = ADD_POINTERS(buf, pixels);
+      }
+
+      if (_mesa_clip_drawpixels(ctx, &dstX, &dstY, &w, &h, &clippedUnpack)) {
+         const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+         XMesaDisplay *dpy = xmesa->xm_visual->display;
+         XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+         const XMesaGC gc = xmbuf->cleargc;  /* effected by glColorMask */
+         struct xmesa_renderbuffer *xrb
+            = xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped);
+         const int srcX = clippedUnpack.SkipPixels;
+         const int srcY = clippedUnpack.SkipRows;
+         const int rowLength = clippedUnpack.RowLength;
+         XMesaImage ximage;
+
+         ASSERT(xmesa->xm_visual->undithered_pf == PF_5R6G5B);
+         ASSERT(dpy);
+         ASSERT(gc);
+
+         /* This is a little tricky since all coordinates up to now have
+          * been in the OpenGL bottom-to-top orientation.  X is top-to-bottom
+          * so we have to carefully compute the Y coordinates/addresses here.
+          */
+         memset(&ximage, 0, sizeof(XMesaImage));
+         ximage.width = width;
+         ximage.height = height;
+         ximage.format = ZPixmap;
+         ximage.data = (char *) pixels
+            + ((srcY + h - 1) * rowLength + srcX) * 2;
+         ximage.byte_order = LSBFirst;
+         ximage.bitmap_unit = 16;
+         ximage.bitmap_bit_order = LSBFirst;
+         ximage.bitmap_pad = 16;
+         ximage.depth = 16;
+         ximage.bits_per_pixel = 16;
+         ximage.bytes_per_line = -rowLength * 2; /* negative to flip image */
+         /* it seems we don't need to set the ximage.red/green/blue_mask fields */
+         /* flip Y axis for dest position */
+         dstY = YFLIP(xrb, dstY) - h + 1;
+         XPutImage(dpy, xrb->pixmap, gc, &ximage, 0, 0, dstX, dstY, w, h);
+      }
+
+      if (unpack->BufferObj->Name) {
+         ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT,
+                                 unpack->BufferObj);
+      }
+   }
+   else {
+      /* software fallback */
+      _swrast_DrawPixels(ctx, x, y, width, height,
+                         format, type, unpack, pixels);
+   }
+}
+
+
+/**
+ * Determine if we can do an optimized glCopyPixels.
+ */
+static GLboolean
+can_do_CopyPixels(GLcontext *ctx, GLenum type)
+{
+   if (type == GL_COLOR &&
+       ctx->_ImageTransferState == 0 &&  /* no color tables, scale/bias, etc */
+       ctx->Pixel.ZoomX == 1.0 &&        /* no zooming */
+       ctx->Pixel.ZoomY == 1.0 &&
+       ctx->Color.DrawBuffer[0] == GL_FRONT &&  /* copy to front buf */
+       ctx->Pixel.ReadBuffer == GL_FRONT &&    /* copy from front buf */
+       ctx->ReadBuffer->_ColorReadBuffer &&
+       ctx->DrawBuffer->_ColorDrawBuffers[0]) {
+      const SWcontext *swrast = SWRAST_CONTEXT( ctx );
+
+      if (swrast->NewState)
+         _swrast_validate_derived( ctx );
+
+      if ((swrast->_RasterMask & ~CLIP_BIT) == 0x0 &&
+          ctx->ReadBuffer &&
+          ctx->ReadBuffer->_ColorReadBuffer &&
+          ctx->DrawBuffer &&
+          ctx->DrawBuffer->_ColorDrawBuffers[0]) {
+         struct xmesa_renderbuffer *srcXrb
+            = xmesa_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer->Wrapped);
+         struct xmesa_renderbuffer *dstXrb
+            = xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped);
+         if (srcXrb->pixmap && dstXrb->pixmap) {
+            return GL_TRUE;
+         }
+      }
+   }
+   return GL_FALSE;
+}
+
+
+/**
+ * Implement glCopyPixels for the front color buffer (or back buffer Pixmap)
+ * for the color buffer.  Don't support zooming, pixel transfer, etc.
+ * We do support copying from one window to another, ala glXMakeCurrentRead.
+ */
+static void
+xmesa_CopyPixels( GLcontext *ctx,
+                  GLint srcx, GLint srcy, GLsizei width, GLsizei height,
+                  GLint destx, GLint desty, GLenum type )
+{
+   if (can_do_CopyPixels(ctx, type)) {
+      const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+      XMesaDisplay *dpy = xmesa->xm_visual->display;
+      XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+      const XMesaGC gc = xmbuf->cleargc;  /* effected by glColorMask */
+      struct xmesa_renderbuffer *srcXrb
+         = xmesa_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer->Wrapped);
+      struct xmesa_renderbuffer *dstXrb
+         = xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped);
+
+      ASSERT(dpy);
+      ASSERT(gc);
+
+      /* Note: we don't do any special clipping work here.  We could,
+       * but X will do it for us.
+       */
+      srcy = YFLIP(srcXrb, srcy) - height + 1;
+      desty = YFLIP(dstXrb, desty) - height + 1;
+      XCopyArea(dpy, srcXrb->pixmap, dstXrb->pixmap, gc,
+                srcx, srcy, width, height, destx, desty);
+   }
+   else {
+      _swrast_CopyPixels(ctx, srcx, srcy, width, height, destx, desty, type );
+   }
+}
+
+#endif /* XFree86Server */
+
+
+
+/*
+ * Every driver should implement a GetString function in order to
+ * return a meaningful GL_RENDERER string.
+ */
+static const GLubyte *
+get_string( GLcontext *ctx, GLenum name )
+{
+   (void) ctx;
+   switch (name) {
+      case GL_RENDERER:
+#ifdef XFree86Server
+         return (const GLubyte *) "Mesa GLX Indirect";
+#else
+         return (const GLubyte *) "Mesa X11";
+#endif
+      case GL_VENDOR:
+#ifdef XFree86Server
+         return (const GLubyte *) "Mesa project: www.mesa3d.org";
+#else
+         return NULL;
+#endif
+      default:
+         return NULL;
+   }
+}
+
+
+/*
+ * We implement the glEnable function only because we care about
+ * dither enable/disable.
+ */
+static void
+enable( GLcontext *ctx, GLenum pname, GLboolean state )
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+
+   switch (pname) {
+      case GL_DITHER:
+         if (state)
+            xmesa->pixelformat = xmesa->xm_visual->dithered_pf;
+         else
+            xmesa->pixelformat = xmesa->xm_visual->undithered_pf;
+         break;
+      default:
+         ;  /* silence compiler warning */
+   }
+}
+
+
+static void
+clear_color_HPCR_ximage( GLcontext *ctx, const GLfloat color[4] )
+{
+   int i;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[3], color[3]);
+
+   if (color[0] == 0.0 && color[1] == 0.0 && color[2] == 0.0) {
+      /* black is black */
+      memset( xmesa->xm_visual->hpcr_clear_ximage_pattern, 0x0 ,
+              sizeof(xmesa->xm_visual->hpcr_clear_ximage_pattern));
+   }
+   else {
+      /* build clear pattern */
+      for (i=0; i<16; i++) {
+         xmesa->xm_visual->hpcr_clear_ximage_pattern[0][i] =
+            DITHER_HPCR(i, 0,
+                        xmesa->clearcolor[0],
+                        xmesa->clearcolor[1],
+                        xmesa->clearcolor[2]);
+         xmesa->xm_visual->hpcr_clear_ximage_pattern[1][i]    =
+            DITHER_HPCR(i, 1,
+                        xmesa->clearcolor[0],
+                        xmesa->clearcolor[1],
+                        xmesa->clearcolor[2]);
+      }
+   }
+}
+
+
+static void
+clear_color_HPCR_pixmap( GLcontext *ctx, const GLfloat color[4] )
+{
+   int i;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[0], color[0]);
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[1], color[1]);
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[2], color[2]);
+   CLAMPED_FLOAT_TO_UBYTE(xmesa->clearcolor[3], color[3]);
+
+   if (color[0] == 0.0 && color[1] == 0.0 && color[2] == 0.0) {
+      /* black is black */
+      for (i=0; i<16; i++) {
+         XMesaPutPixel(xmesa->xm_visual->hpcr_clear_ximage, i, 0, 0);
+         XMesaPutPixel(xmesa->xm_visual->hpcr_clear_ximage, i, 1, 0);
+      }
+   }
+   else {
+      for (i=0; i<16; i++) {
+         XMesaPutPixel(xmesa->xm_visual->hpcr_clear_ximage, i, 0,
+                       DITHER_HPCR(i, 0,
+                                   xmesa->clearcolor[0],
+                                   xmesa->clearcolor[1],
+                                   xmesa->clearcolor[2]));
+         XMesaPutPixel(xmesa->xm_visual->hpcr_clear_ximage, i, 1,
+                       DITHER_HPCR(i, 1,
+                                   xmesa->clearcolor[0],
+                                   xmesa->clearcolor[1],
+                                   xmesa->clearcolor[2]));
+      }
+   }
+   /* change tile pixmap content */
+   XMesaPutImage(xmesa->display,
+		 (XMesaDrawable)xmesa->xm_visual->hpcr_clear_pixmap,
+		 XMESA_BUFFER(ctx->DrawBuffer)->cleargc,
+		 xmesa->xm_visual->hpcr_clear_ximage, 0, 0, 0, 0, 16, 2);
+}
+
+
+/**
+ * Called when the driver should update its state, based on the new_state
+ * flags.
+ */
+void
+xmesa_update_state( GLcontext *ctx, GLbitfield new_state )
+{
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+
+   /* Propagate statechange information to swrast and swrast_setup
+    * modules.  The X11 driver has no internal GL-dependent state.
+    */
+   _swrast_InvalidateState( ctx, new_state );
+   _tnl_InvalidateState( ctx, new_state );
+   _vbo_InvalidateState( ctx, new_state );
+   _swsetup_InvalidateState( ctx, new_state );
+
+   if (ctx->DrawBuffer->Name != 0)
+      return;
+
+   /*
+    * GL_DITHER, GL_READ/DRAW_BUFFER, buffer binding state, etc. effect
+    * renderbuffer span/clear funcs.
+    * Check _NEW_COLOR to detect dither enable/disable.
+    */
+   if (new_state & (_NEW_COLOR | _NEW_BUFFERS)) {
+      XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+      struct xmesa_renderbuffer *front_xrb, *back_xrb;
+
+      front_xrb = xmbuf->frontxrb;
+      if (front_xrb) {
+         xmesa_set_renderbuffer_funcs(front_xrb, xmesa->pixelformat,
+                                      xmesa->xm_visual->BitsPerPixel);
+         front_xrb->clearFunc = clear_pixmap;
+      }
+
+      back_xrb = xmbuf->backxrb;
+      if (back_xrb) {
+         xmesa_set_renderbuffer_funcs(back_xrb, xmesa->pixelformat,
+                                      xmesa->xm_visual->BitsPerPixel);
+         if (xmbuf->backxrb->pixmap) {
+            back_xrb->clearFunc = clear_pixmap;
+         }
+         else {
+            switch (xmesa->xm_visual->BitsPerPixel) {
+            case 8:
+               if (xmesa->xm_visual->hpcr_clear_flag) {
+                  back_xrb->clearFunc = clear_HPCR_ximage;
+               }
+               else {
+                  back_xrb->clearFunc = clear_8bit_ximage;
+               }
+               break;
+            case 16:
+               back_xrb->clearFunc = clear_16bit_ximage;
+               break;
+            case 24:
+               back_xrb->clearFunc = clear_24bit_ximage;
+               break;
+            case 32:
+               back_xrb->clearFunc = clear_32bit_ximage;
+               break;
+            default:
+               back_xrb->clearFunc = clear_nbit_ximage;
+               break;
+            }
+         }
+      }
+   }
+
+   if (xmesa->xm_visual->hpcr_clear_flag) {
+      /* this depends on whether we're drawing to the front or back buffer */
+      /* XXX FIX THIS! */
+#if 0
+      if (pixmap) {
+         ctx->Driver.ClearColor = clear_color_HPCR_pixmap;
+      }
+      else {
+         ctx->Driver.ClearColor = clear_color_HPCR_ximage;
+      }
+#else
+      (void) clear_color_HPCR_pixmap;
+      (void) clear_color_HPCR_ximage;
+#endif
+   }
+}
+
+
+
+/**
+ * Called via ctx->Driver.TestProxyTeximage().  Normally, we'd just use
+ * the _mesa_test_proxy_teximage() fallback function, but we're going to
+ * special-case the 3D texture case to allow textures up to 512x512x32
+ * texels.
+ */
+static GLboolean
+test_proxy_teximage(GLcontext *ctx, GLenum target, GLint level,
+                    GLint internalFormat, GLenum format, GLenum type,
+                    GLint width, GLint height, GLint depth, GLint border)
+{
+   if (target == GL_PROXY_TEXTURE_3D) {
+      /* special case for 3D textures */
+      if (width * height * depth > 512 * 512 * 64 ||
+          width  < 2 * border ||
+          (!ctx->Extensions.ARB_texture_non_power_of_two &&
+           _mesa_bitcount(width  - 2 * border) != 1) ||
+          height < 2 * border ||
+          (!ctx->Extensions.ARB_texture_non_power_of_two &&
+           _mesa_bitcount(height - 2 * border) != 1) ||
+          depth  < 2 * border ||
+          (!ctx->Extensions.ARB_texture_non_power_of_two &&
+           _mesa_bitcount(depth  - 2 * border) != 1)) {
+         /* Bad size, or too many texels */
+         return GL_FALSE;
+      }
+      return GL_TRUE;
+   }
+   else {
+      /* use the fallback routine for 1D, 2D, cube and rect targets */
+      return _mesa_test_proxy_teximage(ctx, target, level, internalFormat,
+                                       format, type, width, height, depth,
+                                       border);
+   }
+}
+
+
+/**
+ * In SW, we don't really compress GL_COMPRESSED_RGB[A] textures!
+ */
+static gl_format
+choose_tex_format( GLcontext *ctx, GLint internalFormat,
+                   GLenum format, GLenum type )
+{
+   switch (internalFormat) {
+      case GL_COMPRESSED_RGB_ARB:
+         return MESA_FORMAT_RGB888;
+      case GL_COMPRESSED_RGBA_ARB:
+         return MESA_FORMAT_RGBA8888;
+      default:
+         return _mesa_choose_tex_format(ctx, internalFormat, format, type);
+   }
+}
+
+
+/**
+ * Called by glViewport.
+ * This is a good time for us to poll the current X window size and adjust
+ * our renderbuffers to match the current window size.
+ * Remember, we have no opportunity to respond to conventional
+ * X Resize/StructureNotify events since the X driver has no event loop.
+ * Thus, we poll.
+ * Note that this trick isn't fool-proof.  If the application never calls
+ * glViewport, our notion of the current window size may be incorrect.
+ * That problem led to the GLX_MESA_resize_buffers extension.
+ */
+static void
+xmesa_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei w, GLsizei h)
+{
+   XMesaContext xmctx = XMESA_CONTEXT(ctx);
+   XMesaBuffer xmdrawbuf = XMESA_BUFFER(ctx->WinSysDrawBuffer);
+   XMesaBuffer xmreadbuf = XMESA_BUFFER(ctx->WinSysReadBuffer);
+   xmesa_check_and_update_buffer_size(xmctx, xmdrawbuf);
+   xmesa_check_and_update_buffer_size(xmctx, xmreadbuf);
+   (void) x;
+   (void) y;
+   (void) w;
+   (void) h;
+}
+
+
+#if ENABLE_EXT_timer_query
+
+/*
+ * The GL_EXT_timer_query extension is not enabled for the XServer
+ * indirect renderer.  Not sure about how/if wrapping of gettimeofday()
+ * is done, etc.
+ */
+
+struct xmesa_query_object
+{
+   struct gl_query_object Base;
+   struct timeval StartTime;
+};
+
+
+static struct gl_query_object *
+xmesa_new_query_object(GLcontext *ctx, GLuint id)
+{
+   struct xmesa_query_object *q = CALLOC_STRUCT(xmesa_query_object);
+   if (q) {
+      q->Base.Id = id;
+      q->Base.Ready = GL_TRUE;
+   }
+   return &q->Base;
+}
+
+
+static void
+xmesa_begin_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   if (q->Target == GL_TIME_ELAPSED_EXT) {
+      struct xmesa_query_object *xq = (struct xmesa_query_object *) q;
+      (void) gettimeofday(&xq->StartTime, NULL);
+   }
+}
+
+
+/**
+ * Return the difference between the two given times in microseconds.
+ */
+#ifdef __VMS
+#define suseconds_t unsigned int
+#endif
+static GLuint64EXT
+time_diff(const struct timeval *t0, const struct timeval *t1)
+{
+   GLuint64EXT seconds0 = t0->tv_sec & 0xff;  /* 0 .. 255 seconds */
+   GLuint64EXT seconds1 = t1->tv_sec & 0xff;  /* 0 .. 255 seconds */
+   GLuint64EXT nanosec0 = (seconds0 * 1000000 + t0->tv_usec) * 1000;
+   GLuint64EXT nanosec1 = (seconds1 * 1000000 + t1->tv_usec) * 1000;
+   return nanosec1 - nanosec0;
+}
+
+
+static void
+xmesa_end_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   if (q->Target == GL_TIME_ELAPSED_EXT) {
+      struct xmesa_query_object *xq = (struct xmesa_query_object *) q;
+      struct timeval endTime;
+      (void) gettimeofday(&endTime, NULL);
+      /* result is in nanoseconds! */
+      q->Result = time_diff(&xq->StartTime, &endTime);
+   }
+   q->Ready = GL_TRUE;
+}
+
+#endif /* ENABLE_timer_query */
+
+
+/**
+ * Initialize the device driver function table with the functions
+ * we implement in this driver.
+ */
+void
+xmesa_init_driver_functions( XMesaVisual xmvisual,
+                             struct dd_function_table *driver )
+{
+   driver->GetString = get_string;
+   driver->UpdateState = xmesa_update_state;
+   driver->GetBufferSize = NULL; /* OBSOLETE */
+   driver->Flush = finish_or_flush;
+   driver->Finish = finish_or_flush;
+   driver->ClearColor = clear_color;
+   driver->ColorMask = color_mask;
+   driver->Enable = enable;
+   driver->Viewport = xmesa_viewport;
+   if (TEST_META_FUNCS) {
+      driver->Clear = _mesa_meta_Clear;
+      driver->CopyPixels = _mesa_meta_CopyPixels;
+      driver->BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+      driver->DrawPixels = _mesa_meta_DrawPixels;
+      driver->Bitmap = _mesa_meta_Bitmap;
+   }
+   else {
+      driver->Clear = clear_buffers;
+#ifndef XFree86Server
+      driver->CopyPixels = xmesa_CopyPixels;
+      if (xmvisual->undithered_pf == PF_8R8G8B &&
+          xmvisual->dithered_pf == PF_8R8G8B &&
+          xmvisual->BitsPerPixel == 32) {
+         driver->DrawPixels = xmesa_DrawPixels_8R8G8B;
+      }
+      else if (xmvisual->undithered_pf == PF_5R6G5B) {
+         driver->DrawPixels = xmesa_DrawPixels_5R6G5B;
+      }
+#endif
+   }
+   driver->TestProxyTexImage = test_proxy_teximage;
+#if ENABLE_EXT_texure_compression_s3tc
+   driver->ChooseTextureFormat = choose_tex_format;
+#else
+   (void) choose_tex_format;
+#endif
+
+#if ENABLE_EXT_timer_query
+   driver->NewQueryObject = xmesa_new_query_object;
+   driver->BeginQuery = xmesa_begin_query;
+   driver->EndQuery = xmesa_end_query;
+#endif
+}
+
+
+#define XMESA_NEW_POINT  (_NEW_POINT | \
+                          _NEW_RENDERMODE | \
+                          _SWRAST_NEW_RASTERMASK)
+
+#define XMESA_NEW_LINE   (_NEW_LINE | \
+                          _NEW_TEXTURE | \
+                          _NEW_LIGHT | \
+                          _NEW_DEPTH | \
+                          _NEW_RENDERMODE | \
+                          _SWRAST_NEW_RASTERMASK)
+
+#define XMESA_NEW_TRIANGLE (_NEW_POLYGON | \
+                            _NEW_TEXTURE | \
+                            _NEW_LIGHT | \
+                            _NEW_DEPTH | \
+                            _NEW_RENDERMODE | \
+                            _SWRAST_NEW_RASTERMASK)
+
+
+/**
+ * Extend the software rasterizer with our line/point/triangle
+ * functions.
+ * Called during context creation only.
+ */
+void xmesa_register_swrast_functions( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT( ctx );
+
+   swrast->choose_point = xmesa_choose_point;
+   swrast->choose_line = xmesa_choose_line;
+   swrast->choose_triangle = xmesa_choose_triangle;
+
+   /* XXX these lines have no net effect.  Remove??? */
+   swrast->InvalidatePointMask |= XMESA_NEW_POINT;
+   swrast->InvalidateLineMask |= XMESA_NEW_LINE;
+   swrast->InvalidateTriangleMask |= XMESA_NEW_TRIANGLE;
+}
diff --git a/src/mesa/drivers/x11/xm_glide.c b/src/mesa/drivers/x11/xm_glide.c
new file mode 100644
index 0000000000..cbd69b011a
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_glide.c
@@ -0,0 +1,277 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "glxheader.h"
+#include "xmesaP.h"
+
+#ifdef FX
+#include "../glide/fxdrv.h"
+
+void
+FXcreateContext(XMesaVisual v, XMesaWindow w, XMesaContext c, XMesaBuffer b)
+{
+   char *fxEnvVar = _mesa_getenv("MESA_GLX_FX");
+   if (fxEnvVar) {
+     if (fxEnvVar[0]!='d') {
+       int attribs[100];
+       int numAttribs = 0;
+       int hw;
+       if (v->mesa_visual.depthBits > 0) {
+	 attribs[numAttribs++] = FXMESA_DEPTH_SIZE;
+	 attribs[numAttribs++] = v->mesa_visual.depthBits;
+       }
+       if (v->mesa_visual.doubleBufferMode) {
+	 attribs[numAttribs++] = FXMESA_DOUBLEBUFFER;
+       }
+       if (v->mesa_visual.accumRedBits > 0) {
+	 attribs[numAttribs++] = FXMESA_ACCUM_SIZE;
+	 attribs[numAttribs++] = v->mesa_visual.accumRedBits;
+       }
+       if (v->mesa_visual.stencilBits > 0) {
+         attribs[numAttribs++] = FXMESA_STENCIL_SIZE;
+         attribs[numAttribs++] = v->mesa_visual.stencilBits;
+       }
+       if (v->mesa_visual.alphaBits > 0) {
+         attribs[numAttribs++] = FXMESA_ALPHA_SIZE;
+         attribs[numAttribs++] = v->mesa_visual.alphaBits;
+       }
+       if (1) {
+         attribs[numAttribs++] = FXMESA_SHARE_CONTEXT;
+         attribs[numAttribs++] = (int) &(c->mesa);
+       }
+       attribs[numAttribs++] = FXMESA_NONE;
+
+       /* [dBorca] we should take an envvar for `fxMesaSelectCurrentBoard'!!! */
+/*       hw = fxMesaSelectCurrentBoard(0); */
+       hw = GR_SSTTYPE_Voodoo2;
+
+       /* if these fail, there's a new bug somewhere */
+       ASSERT(b->mesa_buffer.Width > 0);
+       ASSERT(b->mesa_buffer.Height > 0);
+
+       if ((hw == GR_SSTTYPE_VOODOO) || (hw == GR_SSTTYPE_Voodoo2)) {
+         b->FXctx = fxMesaCreateBestContext(0, b->mesa_buffer.Width,
+                                            b->mesa_buffer.Height, attribs);
+         if ((v->undithered_pf!=PF_Index) && (b->backxrb->ximage)) {
+	   b->FXisHackUsable = b->FXctx ? GL_TRUE : GL_FALSE;
+	   if (b->FXctx && (fxEnvVar[0]=='w' || fxEnvVar[0]=='W')) {
+	     b->FXwindowHack = GL_TRUE;
+	     FX_grSstControl(GR_CONTROL_DEACTIVATE);
+	   }
+           else {
+	     b->FXwindowHack = GL_FALSE;
+	   }
+         }
+       }
+       else {
+         if (fxEnvVar[0]=='w' || fxEnvVar[0]=='W')
+	   b->FXctx = fxMesaCreateContext(w, GR_RESOLUTION_NONE,
+					  GR_REFRESH_75Hz, attribs);
+         else
+	   b->FXctx = fxMesaCreateBestContext(0, b->mesa_buffer.Width,
+                                              b->mesa_buffer.Height, attribs);
+         b->FXisHackUsable = GL_FALSE;
+         b->FXwindowHack = GL_FALSE;
+       }
+       /*
+       fprintf(stderr,
+               "voodoo %d, wid %d height %d hack: usable %d active %d\n",
+               hw, b->mesa_buffer.Width, b->mesa_buffer.Height,
+	       b->FXisHackUsable, b->FXwindowHack);
+       */
+     }
+   }
+   else {
+      _mesa_warning(NULL, "WARNING: This Mesa Library includes the Glide driver but\n");
+      _mesa_warning(NULL, "         you have not defined the MESA_GLX_FX env. var.\n");
+      _mesa_warning(NULL, "         (check the README.3DFX file for more information).\n\n");
+      _mesa_warning(NULL, "         you can disable this message with a 'export MESA_GLX_FX=disable'.\n");
+   }
+}
+
+
+void FXdestroyContext( XMesaBuffer b )
+{
+   if (b && b->FXctx)
+      fxMesaDestroyContext(b->FXctx);
+}
+
+
+GLboolean FXmakeCurrent( XMesaBuffer b )
+{
+   if (b->FXctx) {
+      fxMesaMakeCurrent(b->FXctx);
+
+      return GL_TRUE;
+   }
+   return GL_FALSE;
+}
+
+
+/*
+ * Read image from VooDoo frame buffer into X/Mesa's back XImage.
+ */
+static void FXgetImage( XMesaBuffer b )
+{
+   GET_CURRENT_CONTEXT(ctx);
+   static unsigned short pixbuf[MAX_WIDTH];
+   GLuint x, y;
+   GLuint width, height;
+
+#ifdef XFree86Server
+   x = b->frontxrb->pixmap->x;
+   y = b->frontxrb->pixmap->y;
+   width = b->frontxrb->pixmap->width;
+   height = b->frontxrb->pixmap->height;
+   depth = b->frontxrb->pixmap->depth;
+#else
+   xmesa_get_window_size(b->display, b, &width, &height);
+   x = y = 0;
+#endif
+   if (b->mesa_buffer.Width != width || b->mesa_buffer.Height != height) {
+      b->mesa_buffer.Width = MIN2((int)width, b->FXctx->width);
+      b->mesa_buffer.Height = MIN2((int)height, b->FXctx->height);
+      if (b->mesa_buffer.Width & 1)
+         b->mesa_buffer.Width--;  /* prevent odd width */
+   }
+
+   /* [dBorca] we're always in the right GR_COLORFORMAT... aren't we? */
+   /* grLfbWriteColorFormat(GR_COLORFORMAT_ARGB); */
+   if (b->xm_visual->undithered_pf==PF_5R6G5B) {
+      /* Special case: 16bpp RGB */
+      grLfbReadRegion( GR_BUFFER_FRONTBUFFER,       /* src buffer */
+                       0, b->FXctx->height - b->mesa_buffer.Height,  /*pos*/
+                       b->mesa_buffer.Width, b->mesa_buffer.Height,  /* size */
+                       b->mesa_buffer.Width * sizeof(GLushort), /* stride */
+                       b->backxrb->ximage->data);         /* dest buffer */
+   }
+   else if (b->xm_visual->dithered_pf==PF_Dither
+	    && GET_VISUAL_DEPTH(b->xm_visual)==8) {
+      /* Special case: 8bpp RGB */
+      for (y=0;y<b->mesa_buffer.Height;y++) {
+         GLubyte *ptr = (GLubyte*) b->backxrb->ximage->data
+                        + b->backxrb->ximage->bytes_per_line * y;
+         XDITHER_SETUP(y);
+
+         /* read row from 3Dfx frame buffer */
+         grLfbReadRegion( GR_BUFFER_FRONTBUFFER,
+                          0, b->FXctx->height-(b->mesa_buffer.Height-y),
+                          b->mesa_buffer.Width, 1,
+                          0,
+                          pixbuf );
+
+         /* write to XImage back buffer */
+         for (x=0;x<b->mesa_buffer.Width;x++) {
+            GLubyte r = (pixbuf[x] & 0xf800) >> 8;
+            GLubyte g = (pixbuf[x] & 0x07e0) >> 3;
+            GLubyte b = (pixbuf[x] & 0x001f) << 3;
+            *ptr++ = XDITHER( x, r, g, b);
+         }
+      }
+   }
+   else {
+      /* General case: slow! */
+      for (y=0;y<b->mesa_buffer.Height;y++) {
+         /* read row from 3Dfx frame buffer */
+         grLfbReadRegion( GR_BUFFER_FRONTBUFFER,
+                          0, b->FXctx->height-(b->mesa_buffer.Height-y),
+                          b->mesa_buffer.Width, 1,
+                          0,
+                          pixbuf );
+
+         /* write to XImage back buffer */
+         for (x=0;x<b->mesa_buffer.Width;x++) {
+            XMesaPutPixel(b->backxrb->ximage,x,y,
+			  xmesa_color_to_pixel(ctx,
+					       (pixbuf[x] & 0xf800) >> 8,
+					       (pixbuf[x] & 0x07e0) >> 3,
+					       (pixbuf[x] & 0x001f) << 3,
+					       0xff,
+                                               b->xm_visual->undithered_pf));
+         }
+      }
+   }
+   /* grLfbWriteColorFormat(GR_COLORFORMAT_ABGR); */
+}
+
+
+GLboolean FXswapBuffers( XMesaBuffer b )
+{
+   if (b->FXctx) {
+      fxMesaSwapBuffers();
+
+      if (!b->FXwindowHack)
+         return GL_TRUE;
+
+      FXgetImage(b);
+   }
+   return GL_FALSE;
+}
+
+
+/*
+ * Switch 3Dfx support hack between window and full-screen mode.
+ */
+GLboolean XMesaSetFXmode( GLint mode )
+{
+   const char *fx = _mesa_getenv("MESA_GLX_FX");
+   if (fx && fx[0] != 'd') {
+      GET_CURRENT_CONTEXT(ctx);
+      GrHwConfiguration hw;
+      if (!FX_grSstQueryHardware(&hw)) {
+         /*fprintf(stderr, "!grSstQueryHardware\n");*/
+         return GL_FALSE;
+      }
+      if (hw.num_sst < 1) {
+         /*fprintf(stderr, "hw.num_sst < 1\n");*/
+         return GL_FALSE;
+      }
+      if (ctx) {
+         /* [dBorca] Hack alert: 
+	  * oh, this is sooo wrong: ctx above is
+	  * really an fxMesaContext, not an XMesaContext
+	  */
+         XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+         if (mode == XMESA_FX_WINDOW) {
+	    if (xmbuf->FXisHackUsable) {
+	       FX_grSstControl(GR_CONTROL_DEACTIVATE);
+	       xmbuf->FXwindowHack = GL_TRUE;
+	       return GL_TRUE;
+	    }
+	 }
+	 else if (mode == XMESA_FX_FULLSCREEN) {
+	    FX_grSstControl(GR_CONTROL_ACTIVATE);
+	    xmbuf->FXwindowHack = GL_FALSE;
+	    return GL_TRUE;
+	 }
+	 else {
+	    /* Error: Bad mode value */
+	 }
+      }
+   }
+   /*fprintf(stderr, "fallthrough\n");*/
+   return GL_FALSE;
+}
+#endif
diff --git a/src/mesa/drivers/x11/xm_glide.h b/src/mesa/drivers/x11/xm_glide.h
new file mode 100644
index 0000000000..f7d0316524
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_glide.h
@@ -0,0 +1,40 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _XM_GLIDE_H_
+#define _XM_GLIDE_H_
+
+extern void FXcreateContext( XMesaVisual v,
+                             XMesaWindow w,
+                             XMesaContext c,
+                             XMesaBuffer b );
+
+extern void FXdestroyContext( XMesaBuffer b );
+
+extern GLboolean FXmakeCurrent( XMesaBuffer b );
+
+extern GLboolean FXswapBuffers( XMesaBuffer b );
+
+#endif /* _XM_GLIDE_H_ */
diff --git a/src/mesa/drivers/x11/xm_image.c b/src/mesa/drivers/x11/xm_image.c
new file mode 100644
index 0000000000..087b4e4c3a
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_image.c
@@ -0,0 +1,133 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <kevin@precisioninsight.com>
+ *   Brian Paul <brian@precisioninsight.com>
+ */
+
+#include <stdlib.h>
+#include <X11/Xmd.h>
+
+#include "glxheader.h"
+#include "xmesaP.h"
+
+#ifdef XFree86Server
+
+#ifdef ROUNDUP
+#undef ROUNDUP
+#endif
+
+#define ROUNDUP(nbytes, pad) ((((nbytes) + ((pad)-1)) / (pad)) * ((pad)>>3))
+
+XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, char *data)
+{
+    XMesaImage *image;
+
+    image = (XMesaImage *)xalloc(sizeof(XMesaImage));
+
+    if (image) {
+	image->width = width;
+	image->height = height;
+	image->data = data;
+	/* Always pad to 32 bits */
+	image->bytes_per_line = ROUNDUP((bitsPerPixel * width), 32);
+	image->bits_per_pixel = bitsPerPixel;
+    }
+
+    return image;
+}
+
+void XMesaDestroyImage(XMesaImage *image)
+{
+    if (image->data)
+	free(image->data);
+    xfree(image);
+}
+
+unsigned long XMesaGetPixel(XMesaImage *image, int x, int y)
+{
+    CARD8  *row = (CARD8 *)(image->data + y*image->bytes_per_line);
+    CARD8  *i8;
+    CARD16 *i16;
+    CARD32 *i32;
+    switch (image->bits_per_pixel) {
+    case 8:
+	i8 = (CARD8 *)row;
+	return i8[x];
+	break;
+    case 15:
+    case 16:
+	i16 = (CARD16 *)row;
+	return i16[x];
+	break;
+    case 24: /* WARNING: architecture specific code */
+	i8 = (CARD8 *)row;
+	return (((CARD32)i8[x*3]) |
+		(((CARD32)i8[x*3+1])<<8) |
+		(((CARD32)i8[x*3+2])<<16));
+	break;
+    case 32:
+	i32 = (CARD32 *)row;
+	return i32[x];
+	break;
+    }
+    return 0;
+}
+
+#ifndef XMESA_USE_PUTPIXEL_MACRO
+void XMesaPutPixel(XMesaImage *image, int x, int y, unsigned long pixel)
+{
+    CARD8  *row = (CARD8 *)(image->data + y*image->bytes_per_line);
+    CARD8  *i8;
+    CARD16 *i16;
+    CARD32 *i32;
+    switch (image->bits_per_pixel) {
+    case 8:
+	i8 = (CARD8 *)row;
+	i8[x] = (CARD8)pixel;
+	break;
+    case 15:
+    case 16:
+	i16 = (CARD16 *)row;
+	i16[x] = (CARD16)pixel;
+	break;
+    case 24: /* WARNING: architecture specific code */
+	i8 = (CARD8 *)__row;
+	i8[x*3]   = (CARD8)(p);
+	i8[x*3+1] = (CARD8)(p>>8);
+	i8[x*3+2] = (CARD8)(p>>16);
+    case 32:
+	i32 = (CARD32 *)row;
+	i32[x] = (CARD32)pixel;
+	break;
+    }
+}
+#endif
+
+#endif /* XFree86Server */
diff --git a/src/mesa/drivers/x11/xm_image.h b/src/mesa/drivers/x11/xm_image.h
new file mode 100644
index 0000000000..2a5e0f3777
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_image.h
@@ -0,0 +1,77 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <kevin@precisioninsight.com>
+ *   Brian Paul <brian@precisioninsight.com>
+ */
+
+#ifndef _XM_IMAGE_H_
+#define _XM_IMAGE_H_
+
+#define XMESA_USE_PUTPIXEL_MACRO
+
+extern XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height,
+				    char *data);
+extern void XMesaDestroyImage(XMesaImage *image);
+extern unsigned long XMesaGetPixel(XMesaImage *image, int x, int y);
+#ifdef XMESA_USE_PUTPIXEL_MACRO
+#define XMesaPutPixel(__i,__x,__y,__p) \
+{ \
+    CARD8  *__row = (CARD8 *)(__i->data + __y*__i->bytes_per_line); \
+    CARD8  *__i8; \
+    CARD16 *__i16; \
+    CARD32 *__i32; \
+    switch (__i->bits_per_pixel) { \
+    case 8: \
+	__i8 = (CARD8 *)__row; \
+	__i8[__x] = (CARD8)__p; \
+	break; \
+    case 15: \
+    case 16: \
+	__i16 = (CARD16 *)__row; \
+	__i16[__x] = (CARD16)__p; \
+	break; \
+    case 24: /* WARNING: architecture specific code */ \
+	__i8 = (CARD8 *)__row; \
+	__i8[__x*3]   = (CARD8)(__p); \
+	__i8[__x*3+1] = (CARD8)(__p>>8); \
+	__i8[__x*3+2] = (CARD8)(__p>>16); \
+	break; \
+    case 32: \
+	__i32 = (CARD32 *)__row; \
+	__i32[__x] = (CARD32)__p; \
+	break; \
+    } \
+}
+#else
+extern void XMesaPutPixel(XMesaImage *image, int x, int y,
+			  unsigned long pixel);
+#endif
+
+#endif /* _XM_IMAGE_H_ */
diff --git a/src/mesa/drivers/x11/xm_line.c b/src/mesa/drivers/x11/xm_line.c
new file mode 100644
index 0000000000..f643b6d3a7
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_line.c
@@ -0,0 +1,691 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * This file contains "accelerated" point, line, and triangle functions.
+ * It should be fairly easy to write new special-purpose point, line or
+ * triangle functions and hook them into this module.
+ */
+
+
+#include "glxheader.h"
+#include "main/depth.h"
+#include "main/macros.h"
+#include "main/mtypes.h"
+#include "xmesaP.h"
+
+/* Internal swrast includes:
+ */
+#include "swrast/s_depth.h"
+#include "swrast/s_points.h"
+#include "swrast/s_lines.h"
+#include "swrast/s_context.h"
+
+
+/**********************************************************************/
+/***                    Point rendering                             ***/
+/**********************************************************************/
+
+
+/*
+ * Render an array of points into a pixmap, any pixel format.
+ */
+#if 000
+/* XXX don't use this, it doesn't dither correctly */
+static void draw_points_ANY_pixmap( GLcontext *ctx, const SWvertex *vert )
+{
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xmesa->xm_buffer->buffer;
+   XMesaGC gc = xmesa->xm_buffer->gc;
+
+   if (xmesa->xm_visual->mesa_visual.RGBAflag) {
+      register int x, y;
+      const GLubyte *color = vert->color;
+      unsigned long pixel = xmesa_color_to_pixel( xmesa,
+						  color[0], color[1],
+						  color[2], color[3],
+						  xmesa->pixelformat);
+      XMesaSetForeground( dpy, gc, pixel );
+      x = (GLint) vert->win[0];
+      y = YFLIP( xrb, (GLint) vert->win[1] );
+      XMesaDrawPoint( dpy, buffer, gc, x, y);
+   }
+   else {
+      /* Color index mode */
+      register int x, y;
+      XMesaSetForeground( dpy, gc, vert->index );
+      x =                         (GLint) vert->win[0];
+      y = YFLIP( xrb, (GLint) vert->win[1] );
+      XMesaDrawPoint( dpy, buffer, gc, x, y);
+   }
+}
+#endif
+
+
+/* Override the swrast point-selection function.  Try to use one of
+ * our internal point functions, otherwise fall back to the standard
+ * swrast functions.
+ */
+void xmesa_choose_point( GLcontext *ctx )
+{
+#if 0
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (ctx->RenderMode == GL_RENDER
+       && ctx->Point.Size == 1.0F && !ctx->Point.SmoothFlag
+       && swrast->_RasterMask == 0
+       && !ctx->Texture._EnabledUnits
+       && xmesa->xm_buffer->buffer != XIMAGE) {
+      swrast->Point = draw_points_ANY_pixmap;
+   }
+   else {
+      _swrast_choose_point( ctx );
+   }
+#else
+   _swrast_choose_point( ctx );
+#endif
+}
+
+
+
+/**********************************************************************/
+/***                      Line rendering                            ***/
+/**********************************************************************/
+
+
+#if CHAN_BITS == 8
+
+
+#define GET_XRB(XRB)  struct xmesa_renderbuffer *XRB = \
+   xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped)
+
+
+/*
+ * Draw a flat-shaded, PF_TRUECOLOR line into an XImage.
+ */
+#define NAME flat_TRUECOLOR_line
+#define SETUP_CODE					\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);		\
+   GET_XRB(xrb);					\
+   const GLubyte *color = vert1->color;			\
+   unsigned long pixel;					\
+   PACK_TRUECOLOR( pixel, color[0], color[1], color[2] );
+#define CLIP_HACK 1
+#define PLOT(X,Y) XMesaPutPixel(xrb->ximage, X, YFLIP(xrb, Y), pixel );
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_8A8B8G8R line into an XImage.
+ */
+#define NAME flat_8A8B8G8R_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLuint pixel = PACK_8A8B8G8R(color[0], color[1], color[2], color[3]);
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) *pixelPtr = pixel;
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_8A8R8G8B line into an XImage.
+ */
+#define NAME flat_8A8R8G8B_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLuint pixel = PACK_8A8R8G8B(color[0], color[1], color[2], color[3]);
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) *pixelPtr = pixel;
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_8R8G8B line into an XImage.
+ */
+#define NAME flat_8R8G8B_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLuint pixel = PACK_8R8G8B( color[0], color[1], color[2] );
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) *pixelPtr = pixel;
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_8R8G8B24 line into an XImage.
+ */
+#define NAME flat_8R8G8B24_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;
+#define PIXEL_TYPE bgr_t
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR3(xrb, X, Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) {			\
+      pixelPtr->r = color[RCOMP];	\
+      pixelPtr->g = color[GCOMP];	\
+      pixelPtr->b = color[BCOMP];	\
+}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_5R6G5B line into an XImage.
+ */
+#define NAME flat_5R6G5B_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLushort pixel = PACK_5R6G5B( color[0], color[1], color[2] );
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) *pixelPtr = pixel;
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_DITHER_5R6G5B line into an XImage.
+ */
+#define NAME flat_DITHER_5R6G5B_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   const GLubyte *color = vert1->color;
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) PACK_TRUEDITHER( *pixelPtr, X, Y, color[0], color[1], color[2] );
+#include "swrast/s_linetemp.h"
+
+
+
+
+/*
+ * Draw a flat-shaded, PF_DITHER 8-bit line into an XImage.
+ */
+#define NAME flat_DITHER8_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLint r = color[0], g = color[1], b = color[2];		\
+   DITHER_SETUP;
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) *pixelPtr = DITHER(X,Y,r,g,b);
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_LOOKUP 8-bit line into an XImage.
+ */
+#define NAME flat_LOOKUP8_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLubyte pixel;						\
+   LOOKUP_SETUP;						\
+   pixel = (GLubyte) LOOKUP( color[0], color[1], color[2] );
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) *pixelPtr = pixel;
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, PF_HPCR line into an XImage.
+ */
+#define NAME flat_HPCR_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   const GLubyte *color = vert1->color;				\
+   GLint r = color[0], g = color[1], b = color[2];
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y) *pixelPtr = (GLubyte) DITHER_HPCR(X,Y,r,g,b);
+#include "swrast/s_linetemp.h"
+
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_TRUECOLOR line into an XImage.
+ */
+#define NAME flat_TRUECOLOR_z_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   const GLubyte *color = vert1->color;				\
+   unsigned long pixel;						\
+   PACK_TRUECOLOR( pixel, color[0], color[1], color[2] );
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define CLIP_HACK 1
+#define PLOT(X,Y)							\
+	if (Z < *zPtr) {						\
+	   *zPtr = Z;							\
+           XMesaPutPixel(xrb->ximage, X, YFLIP(xrb, Y), pixel);		\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_8A8B8G8R line into an XImage.
+ */
+#define NAME flat_8A8B8G8R_z_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLuint pixel = PACK_8A8B8G8R(color[0], color[1], color[2], color[3]);
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)		\
+	if (Z < *zPtr) {	\
+	   *zPtr = Z;		\
+	   *pixelPtr = pixel;	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_8A8R8G8B line into an XImage.
+ */
+#define NAME flat_8A8R8G8B_z_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLuint pixel = PACK_8A8R8G8B(color[0], color[1], color[2], color[3]);
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)		\
+	if (Z < *zPtr) {	\
+	   *zPtr = Z;		\
+	   *pixelPtr = pixel;	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_8R8G8B line into an XImage.
+ */
+#define NAME flat_8R8G8B_z_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLuint pixel = PACK_8R8G8B( color[0], color[1], color[2] );
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)		\
+	if (Z < *zPtr) {	\
+	   *zPtr = Z;		\
+	   *pixelPtr = pixel;	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_8R8G8B24 line into an XImage.
+ */
+#define NAME flat_8R8G8B24_z_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE bgr_t
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR3(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)			\
+	if (Z < *zPtr) {		\
+	   *zPtr = Z;			\
+           pixelPtr->r = color[RCOMP];	\
+           pixelPtr->g = color[GCOMP];	\
+           pixelPtr->b = color[BCOMP];	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_5R6G5B line into an XImage.
+ */
+#define NAME flat_5R6G5B_z_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLushort pixel = PACK_5R6G5B( color[0], color[1], color[2] );
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)		\
+	if (Z < *zPtr) {	\
+	   *zPtr = Z;		\
+	   *pixelPtr = pixel;	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_DITHER_5R6G5B line into an XImage.
+ */
+#define NAME flat_DITHER_5R6G5B_z_line
+#define SETUP_CODE					\
+   GET_XRB(xrb);						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);		\
+   const GLubyte *color = vert1->color;
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)		\
+	if (Z < *zPtr) {	\
+	   *zPtr = Z;		\
+	   PACK_TRUEDITHER(*pixelPtr, X, Y, color[0], color[1], color[2]); \
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_DITHER 8-bit line into an XImage.
+ */
+#define NAME flat_DITHER8_z_line
+#define SETUP_CODE					\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;			\
+   GLint r = color[0], g = color[1], b = color[2];	\
+   DITHER_SETUP;
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)						\
+	if (Z < *zPtr) {					\
+	   *zPtr = Z;						\
+	   *pixelPtr = (GLubyte) DITHER( X, Y, r, g, b);	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_LOOKUP 8-bit line into an XImage.
+ */
+#define NAME flat_LOOKUP8_z_line
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   const GLubyte *color = vert1->color;				\
+   GLubyte pixel;						\
+   LOOKUP_SETUP;						\
+   pixel = (GLubyte) LOOKUP( color[0], color[1], color[2] );
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)		\
+	if (Z < *zPtr) {	\
+	   *zPtr = Z;		\
+	   *pixelPtr = pixel;	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+/*
+ * Draw a flat-shaded, Z-less, PF_HPCR line into an XImage.
+ */
+#define NAME flat_HPCR_z_line
+#define SETUP_CODE 						\
+   GET_XRB(xrb);						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   const GLubyte *color = vert1->color;				\
+   GLint r = color[0], g = color[1], b = color[2];
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X,Y)
+#define CLIP_HACK 1
+#define PLOT(X,Y)						\
+	if (Z < *zPtr) {					\
+	   *zPtr = Z;						\
+	   *pixelPtr = (GLubyte) DITHER_HPCR( X, Y, r, g, b);	\
+	}
+#include "swrast/s_linetemp.h"
+
+
+
+
+#ifndef XFree86Server
+/**
+ * Draw fast, XOR line with XDrawLine in front color buffer.
+ * WARNING: this isn't fully OpenGL conformant because different pixels
+ * will be hit versus using the other line functions.
+ * Don't use the code in X server GLcore module since we need a wrapper
+ * for the XSetLineAttributes() function call.
+ */
+static void
+xor_line(GLcontext *ctx, const SWvertex *vert0, const SWvertex *vert1)
+{
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaGC gc = xmesa->xm_buffer->gc;
+   GET_XRB(xrb);
+   unsigned long pixel = xmesa_color_to_pixel(ctx,
+                                              vert1->color[0], vert1->color[1],
+                                              vert1->color[2], vert1->color[3],
+                                              xmesa->pixelformat);
+   int x0 =            (GLint) vert0->attrib[FRAG_ATTRIB_WPOS][0];
+   int y0 = YFLIP(xrb, (GLint) vert0->attrib[FRAG_ATTRIB_WPOS][1]);
+   int x1 =            (GLint) vert1->attrib[FRAG_ATTRIB_WPOS][0];
+   int y1 = YFLIP(xrb, (GLint) vert1->attrib[FRAG_ATTRIB_WPOS][1]);
+   XMesaSetForeground(dpy, gc, pixel);
+   XMesaSetFunction(dpy, gc, GXxor);
+   XSetLineAttributes(dpy, gc, (int) ctx->Line.Width,
+                      LineSolid, CapButt, JoinMiter);
+   XDrawLine(dpy, xrb->pixmap, gc, x0, y0, x1, y1);
+   XMesaSetFunction(dpy, gc, GXcopy);  /* this gc is used elsewhere */
+}
+#endif /* XFree86Server */
+
+
+#endif /* CHAN_BITS == 8 */
+
+
+/**
+ * Return pointer to line drawing function, or NULL if we should use a
+ * swrast fallback.
+ */
+static swrast_line_func
+get_line_func(GLcontext *ctx)
+{
+#if CHAN_BITS == 8
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+   const int depth = GET_VISUAL_DEPTH(xmesa->xm_visual);
+   const struct xmesa_renderbuffer *xrb;
+
+   if ((ctx->DrawBuffer->_ColorDrawBufferIndexes[0] != BUFFER_BIT_FRONT_LEFT) &&
+       (ctx->DrawBuffer->_ColorDrawBufferIndexes[0] != BUFFER_BIT_BACK_LEFT))
+      return (swrast_line_func) NULL;
+   if (ctx->RenderMode != GL_RENDER)      return (swrast_line_func) NULL;
+   if (ctx->Line.SmoothFlag)              return (swrast_line_func) NULL;
+   if (ctx->Texture._EnabledUnits)        return (swrast_line_func) NULL;
+   if (ctx->Light.ShadeModel != GL_FLAT)  return (swrast_line_func) NULL;
+   if (ctx->Line.StippleFlag)             return (swrast_line_func) NULL;
+   if (swrast->_RasterMask & MULTI_DRAW_BIT) return (swrast_line_func) NULL;
+   if (xmbuf->swAlpha)                    return (swrast_line_func) NULL;
+
+   xrb = xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped);
+
+   if (xrb->ximage
+       && swrast->_RasterMask==DEPTH_BIT
+       && ctx->Depth.Func==GL_LESS
+       && ctx->Depth.Mask==GL_TRUE
+       && ctx->Visual.depthBits == DEFAULT_SOFTWARE_DEPTH_BITS
+       && ctx->Line.Width==1.0F) {
+      switch (xmesa->pixelformat) {
+         case PF_Truecolor:
+            return flat_TRUECOLOR_z_line;
+         case PF_8A8B8G8R:
+            return flat_8A8B8G8R_z_line;
+         case PF_8A8R8G8B:
+            return flat_8A8R8G8B_z_line;
+         case PF_8R8G8B:
+            return flat_8R8G8B_z_line;
+         case PF_8R8G8B24:
+            return flat_8R8G8B24_z_line;
+         case PF_5R6G5B:
+            return flat_5R6G5B_z_line;
+         case PF_Dither_5R6G5B:
+            return flat_DITHER_5R6G5B_z_line;
+         case PF_Dither:
+            return (depth==8) ? flat_DITHER8_z_line : (swrast_line_func) NULL;
+         case PF_Lookup:
+            return (depth==8) ? flat_LOOKUP8_z_line : (swrast_line_func) NULL;
+         case PF_HPCR:
+            return flat_HPCR_z_line;
+         default:
+            return (swrast_line_func)NULL;
+      }
+   }
+   if (xrb->ximage
+       && swrast->_RasterMask==0
+       && ctx->Line.Width==1.0F) {
+      switch (xmesa->pixelformat) {
+         case PF_Truecolor:
+            return flat_TRUECOLOR_line;
+         case PF_8A8B8G8R:
+            return flat_8A8B8G8R_line;
+         case PF_8A8R8G8B:
+            return flat_8A8R8G8B_line;
+         case PF_8R8G8B:
+            return flat_8R8G8B_line;
+         case PF_8R8G8B24:
+            return flat_8R8G8B24_line;
+         case PF_5R6G5B:
+            return flat_5R6G5B_line;
+         case PF_Dither_5R6G5B:
+            return flat_DITHER_5R6G5B_line;
+         case PF_Dither:
+            return (depth==8) ? flat_DITHER8_line : (swrast_line_func) NULL;
+         case PF_Lookup:
+            return (depth==8) ? flat_LOOKUP8_line : (swrast_line_func) NULL;
+         case PF_HPCR:
+            return flat_HPCR_line;
+	 default:
+	    return (swrast_line_func)NULL;
+      }
+   }
+
+#ifndef XFree86Server
+   if (ctx->DrawBuffer->_NumColorDrawBuffers == 1
+       && ctx->DrawBuffer->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT
+       && swrast->_RasterMask == LOGIC_OP_BIT
+       && ctx->Color.LogicOp == GL_XOR
+       && !ctx->Line.StippleFlag
+       && !ctx->Line.SmoothFlag) {
+      return xor_line;
+   }
+#endif /* XFree86Server */
+
+#endif /* CHAN_BITS == 8 */
+   return (swrast_line_func) NULL;
+}
+
+
+/**
+ * Override for the swrast line-selection function.  Try to use one
+ * of our internal line functions, otherwise fall back to the
+ * standard swrast functions.
+ */
+void
+xmesa_choose_line(GLcontext *ctx)
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (!(swrast->Line = get_line_func( ctx )))
+      _swrast_choose_line( ctx );
+}
diff --git a/src/mesa/drivers/x11/xm_span.c b/src/mesa/drivers/x11/xm_span.c
new file mode 100644
index 0000000000..c39d87c451
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_span.c
@@ -0,0 +1,4819 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.3
+ *
+ * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "glxheader.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/depth.h"
+#include "main/drawpix.h"
+#include "main/extensions.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "main/state.h"
+#include "xmesaP.h"
+
+#include "swrast/swrast.h"
+
+
+/*
+ * The following functions are used to trap XGetImage() calls which
+ * generate BadMatch errors if the drawable isn't mapped.
+ */
+
+#ifndef XFree86Server
+static int caught_xgetimage_error = 0;
+static int (*old_xerror_handler)( XMesaDisplay *dpy, XErrorEvent *ev );
+static unsigned long xgetimage_serial;
+
+/*
+ * This is the error handler which will be called if XGetImage fails.
+ */
+static int xgetimage_error_handler( XMesaDisplay *dpy, XErrorEvent *ev )
+{
+   if (ev->serial==xgetimage_serial && ev->error_code==BadMatch) {
+      /* caught the expected error */
+      caught_xgetimage_error = 0;
+   }
+   else {
+      /* call the original X error handler, if any.  otherwise ignore */
+      if (old_xerror_handler) {
+         (*old_xerror_handler)( dpy, ev );
+      }
+   }
+   return 0;
+}
+
+
+/*
+ * Call this right before XGetImage to setup error trap.
+ */
+static void catch_xgetimage_errors( XMesaDisplay *dpy )
+{
+   xgetimage_serial = NextRequest( dpy );
+   old_xerror_handler = XSetErrorHandler( xgetimage_error_handler );
+   caught_xgetimage_error = 0;
+}
+
+
+/*
+ * Call this right after XGetImage to check if an error occured.
+ */
+static int check_xgetimage_errors( void )
+{
+   /* restore old handler */
+   (void) XSetErrorHandler( old_xerror_handler );
+   /* return 0=no error, 1=error caught */
+   return caught_xgetimage_error;
+}
+#endif
+
+
+/*
+ * Read a pixel from an X drawable.
+ */
+static unsigned long read_pixel( XMesaDisplay *dpy,
+                                 XMesaDrawable d, int x, int y )
+{
+   unsigned long p;
+#ifndef XFree86Server
+   XMesaImage *pixel = NULL;
+   int error;
+
+   catch_xgetimage_errors( dpy );
+   pixel = XGetImage( dpy, d, x, y, 1, 1, AllPlanes, ZPixmap );
+   error = check_xgetimage_errors();
+   if (pixel && !error) {
+      p = XMesaGetPixel( pixel, 0, 0 );
+   }
+   else {
+      p = 0;
+   }
+   if (pixel) {
+      XMesaDestroyImage( pixel );
+   }
+#else
+   (*dpy->GetImage)(d, x, y, 1, 1, ZPixmap, ~0L, (pointer)&p);
+#endif
+   return p;
+}
+
+
+
+/*
+ * The Mesa library needs to be able to draw pixels in a number of ways:
+ *   1. RGB vs Color Index
+ *   2. as horizontal spans (polygons, images) vs random locations (points,
+ *      lines)
+ *   3. different color per-pixel or same color for all pixels
+ *
+ * Furthermore, the X driver needs to support rendering to 3 possible
+ * "buffers", usually one, but sometimes two at a time:
+ *   1. The front buffer as an X window
+ *   2. The back buffer as a Pixmap
+ *   3. The back buffer as an XImage
+ *
+ * Finally, if the back buffer is an XImage, we can avoid using XPutPixel and
+ * optimize common cases such as 24-bit and 8-bit modes.
+ *
+ * By multiplication, there's at least 48 possible combinations of the above.
+ *
+ * Below are implementations of the most commonly used combinations.  They are
+ * accessed through function pointers which get initialized here and are used
+ * directly from the Mesa library.  The 8 function pointers directly correspond
+ * to the first 3 cases listed above.
+ *
+ *
+ * The function naming convention is:
+ *
+ *   [put|get]_[mono]_[row|values]_[format]_[pixmap|ximage]
+ *
+ * New functions optimized for specific cases can be added without too much
+ * trouble.  An example might be the 24-bit TrueColor mode 8A8R8G8B which is
+ * found on IBM RS/6000 X servers.
+ */
+
+
+
+
+/**********************************************************************/
+/*** Write COLOR SPAN functions                                     ***/
+/**********************************************************************/
+
+
+#define PUT_ROW_ARGS \
+	GLcontext *ctx,					\
+	struct gl_renderbuffer *rb,			\
+	GLuint n, GLint x, GLint y,			\
+	const void *values, const GLubyte mask[]
+
+#define RGB_SPAN_ARGS \
+	GLcontext *ctx,					\
+	struct gl_renderbuffer *rb,			\
+	GLuint n, GLint x, GLint y,			\
+	const void *values, const GLubyte mask[]
+
+
+#define GET_XRB(XRB) \
+   struct xmesa_renderbuffer *XRB = xmesa_renderbuffer(rb)
+
+
+/*
+ * Write a span of PF_TRUECOLOR pixels to a pixmap.
+ */
+static void put_row_TRUECOLOR_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = XMESA_BUFFER(ctx->DrawBuffer)->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUECOLOR( p, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+            XMesaSetForeground( dpy, gc, p );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         unsigned long p;
+         PACK_TRUECOLOR( p, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         XMesaPutPixel( rowimg, i, 0, p );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_TRUECOLOR pixels to a pixmap.
+ */
+static void put_row_rgb_TRUECOLOR_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUECOLOR( p, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+            XMesaSetForeground( dpy, gc, p );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         unsigned long p;
+         PACK_TRUECOLOR( p, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+         XMesaPutPixel( rowimg, i, 0, p );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+/*
+ * Write a span of PF_TRUEDITHER pixels to a pixmap.
+ */
+static void put_row_TRUEDITHER_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUEDITHER(p, x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+            XMesaSetForeground( dpy, gc, p );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x+i, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+         XMesaPutPixel( rowimg, i, 0, p );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_TRUEDITHER pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_TRUEDITHER_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUEDITHER(p, x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+            XMesaSetForeground( dpy, gc, p );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x+i, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+         XMesaPutPixel( rowimg, i, 0, p );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_8A8B8G8R pixels to a pixmap.
+ */
+static void put_row_8A8B8G8R_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+                         PACK_8A8B8G8R(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      for (i=0;i<n;i++) {
+         *ptr4++ = PACK_8A8B8G8R( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_8A8B8G8R pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_8A8B8G8R_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+                   PACK_8B8G8R(rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      for (i=0;i<n;i++) {
+         *ptr4++ = PACK_8B8G8R(rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+/*
+ * Write a span of PF_8A8R8G8B pixels to a pixmap.
+ */
+static void put_row_8A8R8G8B_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+                         PACK_8A8R8G8B(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      for (i=0;i<n;i++) {
+         *ptr4++ = PACK_8A8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_8A8R8G8B pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_8A8R8G8B_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+                   PACK_8R8G8B(rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      for (i=0;i<n;i++) {
+         *ptr4++ = PACK_8R8G8B(rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+/*
+ * Write a span of PF_8R8G8B pixels to a pixmap.
+ */
+static void put_row_8R8G8B_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+#if 1
+            /*
+             * XXX Something funny is going on here.
+             * If we're drawing into a window that uses a depth 32 TrueColor
+             * visual, we see the right pixels on screen, but when we read
+             * them back with XGetImage() we get random colors.
+             * The alternative code below which uses XPutImage() instead
+             * seems to mostly fix the problem, but not always.
+             * We don't normally create windows with this visual, but glean
+             * does and we're seeing some failures there.
+             */
+            XMesaSetForeground( dpy, gc, PACK_8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ));
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+#else
+            /* This code works more often, but not always */
+            XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+            GLuint *ptr4 = (GLuint *) rowimg->data;
+            *ptr4 = PACK_8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+            XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, 1, 1 );
+#endif
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      for (i=0;i<n;i++) {
+         *ptr4++ = PACK_8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_8R8G8B24 pixels to a pixmap.
+ */
+static void put_row_8R8G8B24_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      register GLuint i;
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+               PACK_8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ));
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      register GLuint pixel;
+      static const GLuint shift[4] = {0, 8, 16, 24};
+      register GLuint i = 0;
+      int w = n;
+      while (w > 3) {
+         pixel  = rgba[i][BCOMP] /* << shift[0]*/;
+         pixel |= rgba[i][GCOMP]    << shift[1];
+         pixel |= rgba[i++][RCOMP]  << shift[2];
+         pixel |= rgba[i][BCOMP]    << shift[3];
+         *ptr4++ = pixel;
+
+         pixel  = rgba[i][GCOMP] /* << shift[0]*/;
+         pixel |= rgba[i++][RCOMP]  << shift[1];
+         pixel |= rgba[i][BCOMP]    << shift[2];
+         pixel |= rgba[i][GCOMP]    << shift[3];
+         *ptr4++ = pixel;
+
+         pixel  = rgba[i++][RCOMP]/* << shift[0]*/;
+         pixel |= rgba[i][BCOMP]     << shift[1];
+         pixel |= rgba[i][GCOMP]     << shift[2];
+         pixel |= rgba[i++][RCOMP]   << shift[3];
+         *ptr4++ = pixel;
+
+         w -= 4;
+      }
+      switch (w) {
+         case 3:
+            pixel = 0;
+            pixel |= rgba[i][BCOMP] /*<< shift[0]*/;
+            pixel |= rgba[i][GCOMP]   << shift[1];
+            pixel |= rgba[i++][RCOMP] << shift[2];
+            pixel |= rgba[i][BCOMP]   << shift[3];
+            *ptr4++ = pixel;
+            pixel = 0;
+            pixel |= rgba[i][GCOMP] /*<< shift[0]*/;
+            pixel |= rgba[i++][RCOMP] << shift[1];
+            pixel |= rgba[i][BCOMP]   << shift[2];
+            pixel |= rgba[i][GCOMP]   << shift[3];
+            *ptr4++ = pixel;
+            pixel = 0xffffff00 & *ptr4;
+            pixel |= rgba[i][RCOMP] /*<< shift[0]*/;
+            *ptr4 = pixel;
+            break;
+         case 2:
+            pixel = 0;
+            pixel |= rgba[i][BCOMP] /*<< shift[0]*/;
+            pixel |= rgba[i][GCOMP]   << shift[1];
+            pixel |= rgba[i++][RCOMP] << shift[2];
+            pixel |= rgba[i][BCOMP]   << shift[3];
+            *ptr4++ = pixel;
+            pixel = 0xffff0000 & *ptr4;
+            pixel |= rgba[i][GCOMP] /*<< shift[0]*/;
+            pixel |= rgba[i][RCOMP]   << shift[1];
+            *ptr4 = pixel;
+            break;
+         case 1:
+            pixel = 0xff000000 & *ptr4;
+            pixel |= rgba[i][BCOMP] /*<< shift[0]*/;
+            pixel |= rgba[i][GCOMP] << shift[1];
+            pixel |= rgba[i][RCOMP] << shift[2];
+            *ptr4 = pixel;
+            break;
+         case 0:
+            break;
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_8R8G8B pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_8R8G8B_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, PACK_8R8G8B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ));
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      for (i=0;i<n;i++) {
+         *ptr4++ = PACK_8R8G8B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+/*
+ * Write a span of PF_8R8G8B24 pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_8R8G8B24_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      register GLuint i;
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+                  PACK_8R8G8B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ));
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLuint *ptr4 = (GLuint *) rowimg->data;
+      register GLuint pixel;
+      static const GLuint shift[4] = {0, 8, 16, 24};
+      unsigned w = n;
+      register GLuint i = 0;
+      while (w > 3) {
+         pixel = 0;
+         pixel |= rgb[i][BCOMP]/* << shift[0]*/;
+         pixel |= rgb[i][GCOMP] << shift[1];
+         pixel |= rgb[i++][RCOMP] << shift[2];
+         pixel |= rgb[i][BCOMP] <<shift[3];
+         *ptr4++ = pixel;
+
+         pixel = 0;
+         pixel |= rgb[i][GCOMP]/* << shift[0]*/;
+         pixel |= rgb[i++][RCOMP] << shift[1];
+         pixel |= rgb[i][BCOMP] << shift[2];
+         pixel |= rgb[i][GCOMP] << shift[3];
+         *ptr4++ = pixel;
+
+         pixel = 0;
+         pixel |= rgb[i++][RCOMP]/* << shift[0]*/;
+         pixel |= rgb[i][BCOMP] << shift[1];
+         pixel |= rgb[i][GCOMP] << shift[2];
+         pixel |= rgb[i++][RCOMP] << shift[3];
+         *ptr4++ = pixel;
+         w -= 4;
+      }
+      switch (w) {
+         case 3:
+            pixel = 0;
+            pixel |= rgb[i][BCOMP]/* << shift[0]*/;
+            pixel |= rgb[i][GCOMP] << shift[1];
+            pixel |= rgb[i++][RCOMP] << shift[2];
+            pixel |= rgb[i][BCOMP] << shift[3];
+            *ptr4++ = pixel;
+            pixel = 0;
+            pixel |= rgb[i][GCOMP]/* << shift[0]*/;
+            pixel |= rgb[i++][RCOMP] << shift[1];
+            pixel |= rgb[i][BCOMP] << shift[2];
+            pixel |= rgb[i][GCOMP] << shift[3];
+            *ptr4++ = pixel;
+            pixel = *ptr4;
+            pixel &= 0xffffff00;
+            pixel |= rgb[i++][RCOMP]/* << shift[0]*/;
+            *ptr4++ = pixel;
+            break;
+         case 2:
+            pixel = 0;
+            pixel |= rgb[i][BCOMP]/* << shift[0]*/;
+            pixel |= rgb[i][GCOMP] << shift[1];
+            pixel |= rgb[i++][RCOMP] << shift[2];
+            pixel |= rgb[i][BCOMP]  << shift[3];
+            *ptr4++ = pixel;
+            pixel = *ptr4;
+            pixel &= 0xffff0000;
+            pixel |= rgb[i][GCOMP]/* << shift[0]*/;
+            pixel |= rgb[i++][RCOMP] << shift[1];
+            *ptr4++ = pixel;
+            break;
+         case 1:
+            pixel = *ptr4;
+            pixel &= 0xff000000;
+            pixel |= rgb[i][BCOMP]/* << shift[0]*/;
+            pixel |= rgb[i][GCOMP] << shift[1];
+            pixel |= rgb[i++][RCOMP] << shift[2];
+            *ptr4++ = pixel;
+            break;
+         case 0:
+            break;
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_5R6G5B pixels to a pixmap.
+ */
+static void put_row_5R6G5B_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, PACK_5R6G5B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ));
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLushort *ptr2 = (GLushort *) rowimg->data;
+      for (i=0;i<n;i++) {
+         ptr2[i] = PACK_5R6G5B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER_5R6G5B pixels to a pixmap.
+ */
+static void put_row_DITHER_5R6G5B_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUEDITHER(p, x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+            XMesaSetForeground( dpy, gc, p );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLushort *ptr2 = (GLushort *) rowimg->data;
+      for (i=0;i<n;i++) {
+         PACK_TRUEDITHER( ptr2[i], x+i, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_5R6G5B pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_5R6G5B_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, PACK_5R6G5B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ));
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLushort *ptr2 = (GLushort *) rowimg->data;
+      for (i=0;i<n;i++) {
+         ptr2[i] = PACK_5R6G5B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER_5R6G5B pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_DITHER_5R6G5B_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUEDITHER(p, x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+            XMesaSetForeground( dpy, gc, p );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLushort *ptr2 = (GLushort *) rowimg->data;
+      for (i=0;i<n;i++) {
+         PACK_TRUEDITHER( ptr2[i], x+i, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER pixels to a pixmap.
+ */
+static void put_row_DITHER_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   XDITHER_SETUP(y);
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, XDITHER(x, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0, XDITHER(x+i, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_DITHER_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   XDITHER_SETUP(y);
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, XDITHER(x, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0, XDITHER(x+i, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_1BIT pixels to a pixmap.
+ */
+static void put_row_1BIT_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   SETUP_1BIT;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+                            DITHER_1BIT( x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0,
+                    DITHER_1BIT( x+i, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_1BIT pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_1BIT_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   SETUP_1BIT;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+              DITHER_1BIT(x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0,
+          DITHER_1BIT(x+i, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_HPCR pixels to a pixmap.
+ */
+static void put_row_HPCR_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+                            DITHER_HPCR( x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLubyte *ptr = (GLubyte *) XMESA_BUFFER(ctx->DrawBuffer)->rowimage->data;
+      for (i=0;i<n;i++) {
+         ptr[i] = DITHER_HPCR( (x+i), y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_HPCR pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_HPCR_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc,
+              DITHER_HPCR(x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      register GLubyte *ptr = (GLubyte *) XMESA_BUFFER(ctx->DrawBuffer)->rowimage->data;
+      for (i=0;i<n;i++) {
+         ptr[i] = DITHER_HPCR( (x+i), y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+/*
+ * Write a span of PF_LOOKUP pixels to a pixmap.
+ */
+static void put_row_LOOKUP_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   LOOKUP_SETUP;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0, LOOKUP(rgba[i][RCOMP],rgba[i][GCOMP],rgba[i][BCOMP]) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_LOOKUP pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_LOOKUP_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   LOOKUP_SETUP;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, LOOKUP( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0, LOOKUP(rgb[i][RCOMP],rgb[i][GCOMP],rgb[i][BCOMP]) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_GRAYSCALE pixels to a pixmap.
+ */
+static void put_row_GRAYSCALE_pixmap( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0, GRAY_RGB(rgba[i][RCOMP],rgba[i][GCOMP],rgba[i][BCOMP]) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+
+/*
+ * Write a span of PF_GRAYSCALE pixels to a pixmap (no alpha).
+ */
+static void put_row_rgb_GRAYSCALE_pixmap( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, GRAY_RGB( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      XMesaImage *rowimg = XMESA_BUFFER(ctx->DrawBuffer)->rowimage;
+      for (i=0;i<n;i++) {
+         XMesaPutPixel( rowimg, i, 0, GRAY_RGB(rgb[i][RCOMP],rgb[i][GCOMP],rgb[i][BCOMP]) );
+      }
+      XMesaPutImage( dpy, buffer, gc, rowimg, 0, 0, x, y, n, 1 );
+   }
+}
+
+/*
+ * Write a span of PF_TRUECOLOR pixels to an XImage.
+ */
+static void put_row_TRUECOLOR_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUECOLOR( p, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+            XMesaPutPixel( img, x, y, p );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         unsigned long p;
+         PACK_TRUECOLOR( p, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         XMesaPutPixel( img, x, y, p );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_TRUECOLOR pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_TRUECOLOR_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUECOLOR( p, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+            XMesaPutPixel( img, x, y, p );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         unsigned long p;
+         PACK_TRUECOLOR( p, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+         XMesaPutPixel( img, x, y, p );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_TRUEDITHER pixels to an XImage.
+ */
+static void put_row_TRUEDITHER_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUEDITHER(p, x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+            XMesaPutPixel( img, x, y, p );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+         XMesaPutPixel( img, x, y, p );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_TRUEDITHER pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_TRUEDITHER_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            unsigned long p;
+            PACK_TRUEDITHER(p, x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+            XMesaPutPixel( img, x, y, p );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+         XMesaPutPixel( img, x, y, p );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_8A8B8G8R-format pixels to an ximage.
+ */
+static void put_row_8A8B8G8R_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLuint *ptr = PIXEL_ADDR4(xrb, x, y);
+   (void) ctx;
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_8A8B8G8R( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         ptr[i] = PACK_8A8B8G8R( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_8A8B8G8R-format pixels to an ximage (no alpha).
+ */
+static void put_row_rgb_8A8B8G8R_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLuint *ptr = PIXEL_ADDR4(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_8A8B8G8R( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP], 255 );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         ptr[i] = PACK_8A8B8G8R( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP], 255 );
+      }
+   }
+}
+
+/*
+ * Write a span of PF_8A8R8G8B-format pixels to an ximage.
+ */
+static void put_row_8A8R8G8B_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLuint *ptr = PIXEL_ADDR4(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_8A8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         ptr[i] = PACK_8A8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_8A8R8G8B-format pixels to an ximage (no alpha).
+ */
+static void put_row_rgb_8A8R8G8B_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLuint *ptr = PIXEL_ADDR4(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_8A8R8G8B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP], 255 );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         ptr[i] = PACK_8A8R8G8B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP], 255 );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_8R8G8B-format pixels to an ximage.
+ */
+static void put_row_8R8G8B_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLuint *ptr = PIXEL_ADDR4(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_8R8G8B(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+         }
+      }
+   }
+   else {
+      for (i=0;i<n;i++) {
+         ptr[i] = PACK_8R8G8B(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_8R8G8B24-format pixels to an ximage.
+ */
+static void put_row_8R8G8B24_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = (GLubyte *) PIXEL_ADDR3(xrb, x, y );
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            GLuint *ptr4 = (GLuint *) ptr;
+            register GLuint pixel = *ptr4;
+            switch (3 & (int)(ptr - (GLubyte*)ptr4)) {
+               case 0:
+                  pixel &= 0xff000000;
+                  pixel |= rgba[i][BCOMP];
+                  pixel |= rgba[i][GCOMP] << 8;
+                  pixel |= rgba[i][RCOMP] << 16;
+                  *ptr4 = pixel;
+                  break;
+               case 3:
+                  pixel &= 0x00ffffff;
+                  pixel |= rgba[i][BCOMP] << 24;
+                  *ptr4++ = pixel;
+                  pixel = *ptr4 & 0xffff0000;
+                  pixel |= rgba[i][GCOMP];
+                  pixel |= rgba[i][RCOMP] << 8;
+                  *ptr4 = pixel;
+                  break;
+               case 2:
+                  pixel &= 0x0000ffff;
+                  pixel |= rgba[i][BCOMP] << 16;
+                  pixel |= rgba[i][GCOMP] << 24;
+                  *ptr4++ = pixel;
+                  pixel = *ptr4 & 0xffffff00;
+                  pixel |= rgba[i][RCOMP];
+                  *ptr4 = pixel;
+                  break;
+               case 1:
+                  pixel &= 0x000000ff;
+                  pixel |= rgba[i][BCOMP] << 8;
+                  pixel |= rgba[i][GCOMP] << 16;
+                  pixel |= rgba[i][RCOMP] << 24;
+                  *ptr4 = pixel;
+                  break;
+            }
+         }
+	 ptr += 3;
+      }
+   }
+   else {
+      /* write all pixels */
+      int w = n;
+      GLuint *ptr4 = (GLuint *) ptr;
+      register GLuint pixel = *ptr4;
+      int index = (int)(ptr - (GLubyte *)ptr4);
+      register GLuint i = 0;
+      switch (index) {
+         case 0:
+            break;
+         case 1:
+            pixel &= 0x00ffffff;
+            pixel |= rgba[i][BCOMP] << 24;
+            *ptr4++ = pixel;
+            pixel = *ptr4 & 0xffff0000;
+            pixel |= rgba[i][GCOMP];
+            pixel |= rgba[i++][RCOMP] << 8;
+            *ptr4 = pixel;
+            if (0 == --w)
+               break;
+         case 2:
+            pixel &= 0x0000ffff;
+            pixel |= rgba[i][BCOMP] << 16;
+            pixel |= rgba[i][GCOMP] << 24;
+            *ptr4++ = pixel;
+            pixel = *ptr4 & 0xffffff00;
+            pixel |= rgba[i++][RCOMP];
+            *ptr4 = pixel;
+            if (0 == --w)
+               break;
+         case 3:
+            pixel &= 0x000000ff;
+            pixel |= rgba[i][BCOMP] << 8;
+            pixel |= rgba[i][GCOMP] << 16;
+            pixel |= rgba[i++][RCOMP] << 24;
+            *ptr4++ = pixel;
+            if (0 == --w)
+               break;
+            break;
+      }
+      while (w > 3) {
+         pixel = rgba[i][BCOMP];
+         pixel |= rgba[i][GCOMP] << 8;
+         pixel |= rgba[i++][RCOMP] << 16;
+         pixel |= rgba[i][BCOMP] << 24;
+         *ptr4++ = pixel;
+         pixel = rgba[i][GCOMP];
+         pixel |= rgba[i++][RCOMP] << 8;
+         pixel |= rgba[i][BCOMP] << 16;
+         pixel |= rgba[i][GCOMP] << 24;
+         *ptr4++ = pixel;
+         pixel = rgba[i++][RCOMP];
+         pixel |= rgba[i][BCOMP] << 8;
+         pixel |= rgba[i][GCOMP] << 16;
+         pixel |= rgba[i++][RCOMP] << 24;
+         *ptr4++ = pixel;
+         w -= 4;
+      }
+      switch (w) {
+         case 0:
+            break;
+         case 1:
+            pixel = *ptr4 & 0xff000000;
+            pixel |= rgba[i][BCOMP];
+            pixel |= rgba[i][GCOMP] << 8;
+            pixel |= rgba[i][RCOMP] << 16;
+            *ptr4 = pixel;
+            break;
+         case 2:
+            pixel = rgba[i][BCOMP];
+            pixel |= rgba[i][GCOMP] << 8;
+            pixel |= rgba[i++][RCOMP] << 16;
+            pixel |= rgba[i][BCOMP] << 24;
+            *ptr4++ = pixel;
+            pixel = *ptr4 & 0xffff0000;
+            pixel |= rgba[i][GCOMP];
+            pixel |= rgba[i][RCOMP] << 8;
+            *ptr4 = pixel;
+            break;
+         case 3:
+            pixel = rgba[i][BCOMP];
+            pixel |= rgba[i][GCOMP] << 8;
+            pixel |= rgba[i++][RCOMP] << 16;
+            pixel |= rgba[i][BCOMP] << 24;
+            *ptr4++ = pixel;
+            pixel = rgba[i][GCOMP];
+            pixel |= rgba[i++][RCOMP] << 8;
+            pixel |= rgba[i][BCOMP] << 16;
+            pixel |= rgba[i][GCOMP] << 24;
+            *ptr4++ = pixel;
+            pixel = *ptr4 & 0xffffff00;
+            pixel |= rgba[i][RCOMP];
+            *ptr4 = pixel;
+            break;
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_8R8G8B-format pixels to an ximage (no alpha).
+ */
+static void put_row_rgb_8R8G8B_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLuint *ptr = PIXEL_ADDR4(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_8R8G8B(rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         ptr[i] = PACK_8R8G8B(rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_8R8G8B24-format pixels to an ximage (no alpha).
+ */
+static void put_row_rgb_8R8G8B24_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = (GLubyte *) PIXEL_ADDR3(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            *ptr++ = rgb[i][BCOMP];
+            *ptr++ = rgb[i][GCOMP];
+            *ptr++ = rgb[i][RCOMP];
+         }
+         else {
+            ptr += 3;
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         *ptr++ = rgb[i][BCOMP];
+         *ptr++ = rgb[i][GCOMP];
+         *ptr++ = rgb[i][RCOMP];
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_5R6G5B-format pixels to an ximage.
+ */
+static void put_row_5R6G5B_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLushort *ptr = PIXEL_ADDR2(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_5R6G5B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+#if defined(__i386__) /* word stores don't have to be on 4-byte boundaries */
+      GLuint *ptr32 = (GLuint *) ptr;
+      GLuint extraPixel = (n & 1);
+      n -= extraPixel;
+      for (i = 0; i < n; i += 2) {
+         GLuint p0, p1;
+         p0 = PACK_5R6G5B(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+         p1 = PACK_5R6G5B(rgba[i+1][RCOMP], rgba[i+1][GCOMP], rgba[i+1][BCOMP]);
+         *ptr32++ = (p1 << 16) | p0;
+      }
+      if (extraPixel) {
+         ptr[n] = PACK_5R6G5B(rgba[n][RCOMP], rgba[n][GCOMP], rgba[n][BCOMP]);
+      }
+#else
+      for (i = 0; i < n; i++) {
+         ptr[i] = PACK_5R6G5B(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+      }
+#endif
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER_5R6G5B-format pixels to an ximage.
+ */
+static void put_row_DITHER_5R6G5B_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLuint i;
+   register GLushort *ptr = PIXEL_ADDR2(xrb, x, y);
+   const GLint y2 = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            PACK_TRUEDITHER( ptr[i], x, y2, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+#if defined(__i386__) /* word stores don't have to be on 4-byte boundaries */
+      GLuint *ptr32 = (GLuint *) ptr;
+      GLuint extraPixel = (n & 1);
+      n -= extraPixel;
+      for (i = 0; i < n; i += 2, x += 2) {
+         GLuint p0, p1;
+         PACK_TRUEDITHER( p0, x, y2, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         PACK_TRUEDITHER( p1, x+1, y2, rgba[i+1][RCOMP], rgba[i+1][GCOMP], rgba[i+1][BCOMP] );
+         *ptr32++ = (p1 << 16) | p0;
+      }
+      if (extraPixel) {
+         PACK_TRUEDITHER( ptr[n], x+n, y2, rgba[n][RCOMP], rgba[n][GCOMP], rgba[n][BCOMP]);
+      }
+#else
+      for (i = 0; i < n; i++, x++) {
+         PACK_TRUEDITHER( ptr[i], x, y2, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+      }
+#endif
+   }
+}
+
+
+/*
+ * Write a span of PF_5R6G5B-format pixels to an ximage (no alpha).
+ */
+static void put_row_rgb_5R6G5B_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLushort *ptr = PIXEL_ADDR2(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = PACK_5R6G5B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+#if defined(__i386__) /* word stores don't have to be on 4-byte boundaries */
+      GLuint *ptr32 = (GLuint *) ptr;
+      GLuint extraPixel = (n & 1);
+      n -= extraPixel;
+      for (i = 0; i < n; i += 2) {
+         GLuint p0, p1;
+         p0 = PACK_5R6G5B(rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]);
+         p1 = PACK_5R6G5B(rgb[i+1][RCOMP], rgb[i+1][GCOMP], rgb[i+1][BCOMP]);
+         *ptr32++ = (p1 << 16) | p0;
+      }
+      if (extraPixel) {
+         ptr[n] = PACK_5R6G5B(rgb[n][RCOMP], rgb[n][GCOMP], rgb[n][BCOMP]);
+      }
+#else
+      for (i=0;i<n;i++) {
+         ptr[i] = PACK_5R6G5B( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+#endif
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER_5R6G5B-format pixels to an ximage (no alpha).
+ */
+static void put_row_rgb_DITHER_5R6G5B_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLuint i;
+   register GLushort *ptr = PIXEL_ADDR2(xrb, x, y );
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            PACK_TRUEDITHER( ptr[i], x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+#if defined(__i386__) /* word stores don't have to be on 4-byte boundaries */
+      GLuint *ptr32 = (GLuint *) ptr;
+      GLuint extraPixel = (n & 1);
+      n -= extraPixel;
+      for (i = 0; i < n; i += 2, x += 2) {
+         GLuint p0, p1;
+         PACK_TRUEDITHER( p0, x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+         PACK_TRUEDITHER( p1, x+1, y, rgb[i+1][RCOMP], rgb[i+1][GCOMP], rgb[i+1][BCOMP] );
+         *ptr32++ = (p1 << 16) | p0;
+      }
+      if (extraPixel) {
+         PACK_TRUEDITHER( ptr[n], x+n, y, rgb[n][RCOMP], rgb[n][GCOMP], rgb[n][BCOMP]);
+      }
+#else
+      for (i=0;i<n;i++,x++) {
+         PACK_TRUEDITHER( ptr[i], x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+#endif
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER pixels to an XImage.
+ */
+static void put_row_DITHER_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   int yy = YFLIP(xrb, y);
+   XDITHER_SETUP(yy);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel( img, x, yy, XDITHER( x, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, yy, XDITHER( x, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_DITHER_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   int yy = YFLIP(xrb, y);
+   XDITHER_SETUP(yy);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel( img, x, yy, XDITHER( x, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, yy, XDITHER( x, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+      }
+   }
+}
+
+
+
+/*
+ * Write a span of 8-bit PF_DITHER pixels to an XImage.
+ */
+static void put_row_DITHER8_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   XDITHER_SETUP(y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            ptr[i] = (GLubyte) XDITHER( x, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      for (i=0;i<n;i++,x++) {
+         ptr[i] = (GLubyte) XDITHER( x, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+static void put_row_rgb_DITHER8_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   XDITHER_SETUP(y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            ptr[i] = (GLubyte) XDITHER( x, rgb[i][0], rgb[i][1], rgb[i][2] );
+         }
+      }
+   }
+   else {
+      const GLubyte *data = (GLubyte *) rgb;
+      for (i=0;i<n;i++,x++) {
+         /*ptr[i] = XDITHER( x, rgb[i][0], rgb[i][1], rgb[i][2] );*/
+         ptr[i] = (GLubyte) XDITHER( x, data[i+i+i], data[i+i+i+1], data[i+i+i+2] );
+      }
+   }
+}
+
+
+
+/*
+ * Write a span of PF_1BIT pixels to an XImage.
+ */
+static void put_row_1BIT_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   SETUP_1BIT;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel(img, x, y, DITHER_1BIT(x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]));
+         }
+      }
+   }
+   else {
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, y, DITHER_1BIT(x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_1BIT pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_1BIT_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   SETUP_1BIT;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel(img, x, y, DITHER_1BIT(x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]));
+         }
+      }
+   }
+   else {
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, y, DITHER_1BIT(x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP]) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_HPCR pixels to an XImage.
+ */
+static void put_row_HPCR_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            ptr[i] = DITHER_HPCR( x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         ptr[i] = DITHER_HPCR( x, y, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_HPCR pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_HPCR_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            ptr[i] = DITHER_HPCR( x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         ptr[i] = DITHER_HPCR( x, y, rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_LOOKUP pixels to an XImage.
+ */
+static void put_row_LOOKUP_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   LOOKUP_SETUP;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel( img, x, y, LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, y, LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_LOOKUP pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_LOOKUP_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   LOOKUP_SETUP;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel( img, x, y, LOOKUP( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, y, LOOKUP( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of 8-bit PF_LOOKUP pixels to an XImage.
+ */
+static void put_row_LOOKUP8_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   LOOKUP_SETUP;
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            ptr[i] = (GLubyte) LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         ptr[i] = (GLubyte) LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+static void put_row_rgb_LOOKUP8_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   LOOKUP_SETUP;
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            ptr[i] = (GLubyte) LOOKUP( rgb[i][0], rgb[i][1], rgb[i][2] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      const GLubyte *data = (GLubyte *) rgb;
+      for (i=0;i<n;i++,x++) {
+         /*ptr[i] = LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );*/
+         ptr[i] = (GLubyte) LOOKUP( data[i+i+i], data[i+i+i+1], data[i+i+i+2] );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_GRAYSCALE pixels to an XImage.
+ */
+static void put_row_GRAYSCALE_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel( img, x, y, GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, y, GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_GRAYSCALE pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_GRAYSCALE_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel( img, x, y, GRAY_RGB( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, y, GRAY_RGB( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] ) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of 8-bit PF_GRAYSCALE pixels to an XImage.
+ */
+static void put_row_GRAYSCALE8_ximage( PUT_ROW_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = (GLubyte) GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         ptr[i] = (GLubyte) GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write a span of 8-bit PF_GRAYSCALE pixels to an XImage (no alpha).
+ */
+static void put_row_rgb_GRAYSCALE8_ximage( RGB_SPAN_ARGS )
+{
+   const GLubyte (*rgb)[3] = (const GLubyte (*)[3]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   if (mask) {
+      for (i=0;i<n;i++) {
+         if (mask[i]) {
+            ptr[i] = (GLubyte) GRAY_RGB( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+         }
+      }
+   }
+   else {
+      /* draw all pixels */
+      for (i=0;i<n;i++) {
+         ptr[i] = (GLubyte) GRAY_RGB( rgb[i][RCOMP], rgb[i][GCOMP], rgb[i][BCOMP] );
+      }
+   }
+}
+
+
+
+
+/**********************************************************************/
+/*** Write COLOR PIXEL functions                                    ***/
+/**********************************************************************/
+
+
+#define PUT_VALUES_ARGS \
+	GLcontext *ctx, struct gl_renderbuffer *rb,	\
+	GLuint n, const GLint x[], const GLint y[],	\
+	const void *values, const GLubyte mask[]
+
+
+/*
+ * Write an array of PF_TRUECOLOR pixels to a pixmap.
+ */
+static void put_values_TRUECOLOR_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         unsigned long p;
+         PACK_TRUECOLOR( p, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+	 XMesaSetForeground( dpy, gc, p );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_TRUEDITHER pixels to a pixmap.
+ */
+static void put_values_TRUEDITHER_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+         XMesaSetForeground( dpy, gc, p );
+         XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_8A8B8G8R pixels to a pixmap.
+ */
+static void put_values_8A8B8G8R_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc,
+                         PACK_8A8B8G8R( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] ));
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+/*
+ * Write an array of PF_8A8R8G8B pixels to a pixmap.
+ */
+static void put_values_8A8R8G8B_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc,
+                         PACK_8A8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] ));
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+/*
+ * Write an array of PF_8R8G8B pixels to a pixmap.
+ */
+static void put_values_8R8G8B_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc, PACK_8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_8R8G8B24 pixels to a pixmap.
+ */
+static void put_values_8R8G8B24_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc, PACK_8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_5R6G5B pixels to a pixmap.
+ */
+static void put_values_5R6G5B_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc, PACK_5R6G5B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_DITHER_5R6G5B pixels to a pixmap.
+ */
+static void put_values_DITHER_5R6G5B_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+	 XMesaSetForeground( dpy, gc, p );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_DITHER pixels to a pixmap.
+ */
+static void put_values_DITHER_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   DITHER_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc,
+                         DITHER(x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]) );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_1BIT pixels to a pixmap.
+ */
+static void put_values_1BIT_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   SETUP_1BIT;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc,
+                         DITHER_1BIT( x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ));
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_HPCR pixels to a pixmap.
+ */
+static void put_values_HPCR_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         XMesaSetForeground( dpy, gc,
+                         DITHER_HPCR( x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ));
+         XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_LOOKUP pixels to a pixmap.
+ */
+static void put_values_LOOKUP_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   LOOKUP_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         XMesaSetForeground( dpy, gc, LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+         XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_GRAYSCALE pixels to a pixmap.
+ */
+static void put_values_GRAYSCALE_pixmap( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         XMesaSetForeground( dpy, gc, GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+         XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_TRUECOLOR pixels to an ximage.
+ */
+static void put_values_TRUECOLOR_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         unsigned long p;
+         PACK_TRUECOLOR( p, rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]), p );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_TRUEDITHER pixels to an XImage.
+ */
+static void put_values_TRUEDITHER_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]);
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]), p );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_8A8B8G8R pixels to an ximage.
+ */
+static void put_values_8A8B8G8R_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLuint *ptr = PIXEL_ADDR4(xrb, x[i], y[i] );
+         *ptr = PACK_8A8B8G8R( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+      }
+   }
+}
+
+/*
+ * Write an array of PF_8A8R8G8B pixels to an ximage.
+ */
+static void put_values_8A8R8G8B_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLuint *ptr = PIXEL_ADDR4(xrb, x[i], y[i]);
+         *ptr = PACK_8A8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP], rgba[i][ACOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_8R8G8B pixels to an ximage.
+ */
+static void put_values_8R8G8B_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLuint *ptr = PIXEL_ADDR4(xrb, x[i], y[i]);
+         *ptr = PACK_8R8G8B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_8R8G8B24 pixels to an ximage.
+ */
+static void put_values_8R8G8B24_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 bgr_t *ptr = PIXEL_ADDR3(xrb, x[i], y[i] );
+         ptr->r = rgba[i][RCOMP];
+         ptr->g = rgba[i][GCOMP];
+         ptr->b = rgba[i][BCOMP];
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_5R6G5B pixels to an ximage.
+ */
+static void put_values_5R6G5B_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLushort *ptr = PIXEL_ADDR2(xrb, x[i], y[i] );
+         *ptr = PACK_5R6G5B( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_DITHER_5R6G5B pixels to an ximage.
+ */
+static void put_values_DITHER_5R6G5B_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLushort *ptr = PIXEL_ADDR2(xrb, x[i], y[i] );
+         PACK_TRUEDITHER( *ptr, x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_DITHER pixels to an XImage.
+ */
+static void put_values_DITHER_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   DITHER_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]),
+                    DITHER( x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of 8-bit PF_DITHER pixels to an XImage.
+ */
+static void put_values_DITHER8_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   DITHER_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i]);
+	 *ptr = (GLubyte) DITHER( x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_1BIT pixels to an XImage.
+ */
+static void put_values_1BIT_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   SETUP_1BIT;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]),
+                    DITHER_1BIT( x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ));
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_HPCR pixels to an XImage.
+ */
+static void put_values_HPCR_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i]);
+         *ptr = (GLubyte) DITHER_HPCR( x[i], y[i], rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_LOOKUP pixels to an XImage.
+ */
+static void put_values_LOOKUP_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   LOOKUP_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]), LOOKUP(rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of 8-bit PF_LOOKUP pixels to an XImage.
+ */
+static void put_values_LOOKUP8_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   LOOKUP_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i]);
+	 *ptr = (GLubyte) LOOKUP( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_GRAYSCALE pixels to an XImage.
+ */
+static void put_values_GRAYSCALE_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]),
+                    GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] ) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of 8-bit PF_GRAYSCALE pixels to an XImage.
+ */
+static void put_values_GRAYSCALE8_ximage( PUT_VALUES_ARGS )
+{
+   const GLubyte (*rgba)[4] = (const GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i] );
+	 *ptr = (GLubyte) GRAY_RGB( rgba[i][RCOMP], rgba[i][GCOMP], rgba[i][BCOMP] );
+      }
+   }
+}
+
+
+
+
+/**********************************************************************/
+/*** Write MONO COLOR SPAN functions                                ***/
+/**********************************************************************/
+
+#define PUT_MONO_ROW_ARGS \
+	GLcontext *ctx, struct gl_renderbuffer *rb,	\
+	GLuint n, GLint x, GLint y, const void *value,	\
+	const GLubyte mask[]
+
+
+
+/*
+ * Write a span of identical pixels to a pixmap.
+ */
+static void put_mono_row_pixmap( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   const unsigned long pixel = xmesa_color_to_pixel(ctx, color[RCOMP],
+               color[GCOMP], color[BCOMP], color[ACOMP], xmesa->pixelformat);
+   register GLuint i;
+   XMesaSetForeground( xmesa->display, gc, pixel );
+   y = YFLIP(xrb, y);
+
+   /* New code contributed by Jeff Epler and cleaned up by Keith
+    * Whitwell.  
+    */
+   for (i = 0; i < n; ) {
+      GLuint start = i;
+
+      /* Identify and emit contiguous rendered pixels
+       */
+      while (i < n && (!mask || mask[i]))
+	 i++;
+
+      if (start < i) 
+	 XMesaFillRectangle( dpy, buffer, gc,
+			     (int)(x+start), (int) y,
+			     (int)(i-start), 1);
+
+      /* Eat up non-rendered pixels
+       */
+      while (i < n && !mask[i])
+	 i++;
+   }
+}
+
+
+
+static void
+put_mono_row_ci_pixmap( PUT_MONO_ROW_ARGS )
+{
+   GLuint colorIndex = *((GLuint *) value);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   XMesaSetForeground( xmesa->display, gc, colorIndex );
+   y = YFLIP(xrb, y);
+
+   for (i = 0 ; i < n ;) {
+      GLuint start = i;
+      
+      /* Identify and emit contiguous rendered pixels 
+       */
+      while (i < n && (!mask || mask[i]))
+	 i++;
+
+      if (start < i) 
+	 XMesaFillRectangle( dpy, buffer, gc, 
+			     (int)(x+start), (int) y, 
+			     (int)(i-start), 1);
+
+      /* Eat up non-rendered pixels
+       */
+      while (i < n && !mask[i])
+	 i++;
+   }
+}
+
+
+
+/*
+ * Write a span of PF_TRUEDITHER pixels to a pixmap.
+ */
+static void put_mono_row_TRUEDITHER_pixmap( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLuint i;
+   int yy = YFLIP(xrb, y);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x, yy, r, g, b);
+         XMesaSetForeground( dpy, gc, p );
+         XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) yy );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_DITHER pixels to a pixmap.
+ */
+static void put_mono_row_DITHER_pixmap( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLuint i;
+   int yy = YFLIP(xrb, y);
+   XDITHER_SETUP(yy);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+         XMesaSetForeground( dpy, gc, XDITHER( x, r, g, b ) );
+         XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) yy );
+      }
+   }
+}
+
+
+/*
+ * Write a span of PF_1BIT pixels to a pixmap.
+ */
+static void put_mono_row_1BIT_pixmap( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLuint i;
+   SETUP_1BIT;
+   y = YFLIP(xrb, y);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+         XMesaSetForeground( dpy, gc, DITHER_1BIT( x, y, r, g, b ) );
+         XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical pixels to an XImage.
+ */
+static void put_mono_row_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   const unsigned long pixel = xmesa_color_to_pixel(ctx, color[RCOMP],
+               color[GCOMP], color[BCOMP], color[ACOMP], xmesa->pixelformat);
+   y = YFLIP(xrb, y);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+	 XMesaPutPixel( img, x, y, pixel );
+      }
+   }
+}
+
+
+static void
+put_mono_row_ci_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLuint colorIndex = *((GLuint *) value);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+	 XMesaPutPixel( img, x, y, colorIndex );
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical PF_TRUEDITHER pixels to an XImage.
+ */
+static void put_mono_row_TRUEDITHER_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaImage *img = xrb->ximage;
+   const GLint r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   GLuint i;
+   y = YFLIP(xrb, y);
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+         unsigned long p;
+         PACK_TRUEDITHER( p, x+i, y, r, g, b);
+	 XMesaPutPixel( img, x+i, y, p );
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical 8A8B8G8R pixels to an XImage.
+ */
+static void put_mono_row_8A8B8G8R_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GLuint i, *ptr;
+   const unsigned long pixel = xmesa_color_to_pixel(ctx, color[RCOMP],
+               color[GCOMP], color[BCOMP], color[ACOMP], xmesa->pixelformat);
+   ptr = PIXEL_ADDR4(xrb, x, y );
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+	 ptr[i] = pixel;
+      }
+   }
+}
+
+/*
+ * Write a span of identical 8A8R8G8B pixels to an XImage.
+ */
+static void put_mono_row_8A8R8G8B_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   GLuint i, *ptr;
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   const unsigned long pixel = xmesa_color_to_pixel(ctx, color[RCOMP],
+               color[GCOMP], color[BCOMP], color[ACOMP], xmesa->pixelformat);
+   ptr = PIXEL_ADDR4(xrb, x, y );
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+	 ptr[i] = pixel;
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical 8R8G8B pixels to an XImage.
+ */
+static void put_mono_row_8R8G8B_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLuint pixel = PACK_8R8G8B(color[RCOMP], color[GCOMP], color[BCOMP]);
+   GLuint *ptr = PIXEL_ADDR4(xrb, x, y );
+   GLuint i;
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+	 ptr[i] = pixel;
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical 8R8G8B pixels to an XImage.
+ */
+static void put_mono_row_8R8G8B24_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP];
+   const GLubyte g = color[GCOMP];
+   const GLubyte b = color[BCOMP];
+   GLuint i;
+   bgr_t *ptr = PIXEL_ADDR3(xrb, x, y );
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+         ptr[i].r = r;
+         ptr[i].g = g;
+         ptr[i].b = b;
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical DITHER pixels to an XImage.
+ */
+static void put_mono_row_DITHER_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   XMesaImage *img = xrb->ximage;
+   int yy = YFLIP(xrb, y);
+   register GLuint i;
+   XDITHER_SETUP(yy);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+	 XMesaPutPixel( img, x, yy, XDITHER( x, r, g, b ) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical 8-bit DITHER pixels to an XImage.
+ */
+static void put_mono_row_DITHER8_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   register GLuint i;
+   XDITHER_SETUP(y);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+	 ptr[i] = (GLubyte) XDITHER( x, r, g, b );
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical 8-bit LOOKUP pixels to an XImage.
+ */
+static void put_mono_row_LOOKUP8_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   GLubyte pixel;
+   LOOKUP_SETUP;
+   pixel = LOOKUP(color[RCOMP], color[GCOMP], color[BCOMP]);
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+	 ptr[i] = pixel;
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical PF_1BIT pixels to an XImage.
+ */
+static void put_mono_row_1BIT_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   SETUP_1BIT;
+   y = YFLIP(xrb, y);
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+	 XMesaPutPixel( img, x, y, DITHER_1BIT( x, y, r, g, b ) );
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical HPCR pixels to an XImage.
+ */
+static void put_mono_row_HPCR_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLubyte *ptr = PIXEL_ADDR1(xrb, x, y);
+   register GLuint i;
+   for (i=0;i<n;i++,x++) {
+      if (!mask || mask[i]) {
+         ptr[i] = DITHER_HPCR( x, y, r, g, b );
+      }
+   }
+}
+
+
+/*
+ * Write a span of identical 8-bit GRAYSCALE pixels to an XImage.
+ */
+static void put_mono_row_GRAYSCALE8_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLubyte p = GRAY_RGB(color[RCOMP], color[GCOMP], color[BCOMP]);
+   GLubyte *ptr = (GLubyte *) PIXEL_ADDR1(xrb, x, y);
+   GLuint i;
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+	 ptr[i] = p;
+      }
+   }
+}
+
+
+
+/*
+ * Write a span of identical PF_DITHER_5R6G5B pixels to an XImage.
+ */
+static void put_mono_row_DITHER_5R6G5B_ximage( PUT_MONO_ROW_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   register GLushort *ptr = PIXEL_ADDR2(xrb, x, y );
+   const GLint r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   GLuint i;
+   y = YFLIP(xrb, y);
+   for (i=0;i<n;i++) {
+      if (!mask || mask[i]) {
+         PACK_TRUEDITHER(ptr[i], x+i, y, r, g, b);
+      }
+   }
+}
+
+
+
+/**********************************************************************/
+/*** Write MONO COLOR PIXELS functions                              ***/
+/**********************************************************************/
+
+#define PUT_MONO_VALUES_ARGS \
+	GLcontext *ctx, struct gl_renderbuffer *rb,	\
+	GLuint n, const GLint x[], const GLint y[],	\
+	const void *value, const GLubyte mask[]
+
+
+
+/*
+ * Write an array of identical pixels to a pixmap.
+ */
+static void put_mono_values_pixmap( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   const unsigned long pixel = xmesa_color_to_pixel(ctx, color[RCOMP],
+               color[GCOMP], color[BCOMP], color[ACOMP], xmesa->pixelformat);
+   XMesaSetForeground( xmesa->display, gc, pixel );
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaDrawPoint( dpy, buffer, gc,
+                         (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+static void
+put_mono_values_ci_pixmap( PUT_MONO_VALUES_ARGS )
+{
+   const GLuint colorIndex = *((GLuint *) value);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   XMesaSetForeground( xmesa->display, gc, colorIndex );
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaDrawPoint( dpy, buffer, gc,
+                         (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_TRUEDITHER pixels to a pixmap.
+ */
+static void put_mono_values_TRUEDITHER_pixmap( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x[i], y[i], r, g, b);
+         XMesaSetForeground( dpy, gc, p );
+	 XMesaDrawPoint( dpy, buffer, gc,
+                         (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_DITHER pixels to a pixmap.
+ */
+static void put_mono_values_DITHER_pixmap( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   DITHER_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         XMesaSetForeground( dpy, gc, DITHER( x[i], y[i], r, g, b ) );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of PF_1BIT pixels to a pixmap.
+ */
+static void put_mono_values_1BIT_pixmap( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   SETUP_1BIT;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         XMesaSetForeground( dpy, gc, DITHER_1BIT( x[i], y[i], r, g, b ) );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical pixels to an XImage.
+ */
+static void put_mono_values_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   const unsigned long pixel = xmesa_color_to_pixel(ctx, color[RCOMP],
+               color[GCOMP], color[BCOMP], color[ACOMP], xmesa->pixelformat);
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]), pixel );
+      }
+   }
+}
+
+
+static void
+put_mono_values_ci_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLuint colorIndex = *((GLuint *) value);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]), colorIndex );
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical TRUEDITHER pixels to an XImage.
+ */
+static void put_mono_values_TRUEDITHER_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   const int r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         unsigned long p;
+         PACK_TRUEDITHER(p, x[i], YFLIP(xrb, y[i]), r, g, b);
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]), p );
+      }
+   }
+}
+
+
+
+/*
+ * Write an array of identical 8A8B8G8R pixels to an XImage
+ */
+static void put_mono_values_8A8B8G8R_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLuint p = PACK_8A8B8G8R(color[RCOMP], color[GCOMP],
+                                  color[BCOMP], color[ACOMP]);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLuint *ptr = PIXEL_ADDR4(xrb, x[i], y[i] );
+	 *ptr = p;
+      }
+   }
+}
+
+/*
+ * Write an array of identical 8A8R8G8B pixels to an XImage
+ */
+static void put_mono_values_8A8R8G8B_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLuint p = PACK_8A8R8G8B(color[RCOMP], color[GCOMP],
+                                  color[BCOMP], color[ACOMP]);
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLuint *ptr = PIXEL_ADDR4(xrb, x[i], y[i] );
+	 *ptr = p;
+      }
+   }
+}
+
+/*
+ * Write an array of identical 8R8G8B pixels to an XImage.
+ */
+static void put_mono_values_8R8G8B_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   register GLuint i;
+   const GLuint p = PACK_8R8G8B(color[RCOMP], color[GCOMP], color[BCOMP]);
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLuint *ptr = PIXEL_ADDR4(xrb, x[i], y[i] );
+	 *ptr = p;
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical 8R8G8B pixels to an XImage.
+ */
+static void put_mono_values_8R8G8B24_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 bgr_t *ptr = PIXEL_ADDR3(xrb, x[i], y[i] );
+         ptr->r = r;
+         ptr->g = g;
+         ptr->b = b;
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical PF_DITHER pixels to an XImage.
+ */
+static void put_mono_values_DITHER_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   DITHER_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]), DITHER( x[i], y[i], r, g, b ) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical 8-bit PF_DITHER pixels to an XImage.
+ */
+static void put_mono_values_DITHER8_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLuint i;
+   DITHER_SETUP;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i]);
+	 *ptr = (GLubyte) DITHER( x[i], y[i], r, g, b );
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical 8-bit PF_LOOKUP pixels to an XImage.
+ */
+static void put_mono_values_LOOKUP8_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   register GLuint i;
+   GLubyte pixel;
+   LOOKUP_SETUP;
+   pixel = LOOKUP(color[RCOMP], color[GCOMP], color[BCOMP]);
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i]);
+	 *ptr = pixel;
+      }
+   }
+}
+
+
+
+/*
+ * Write an array of identical PF_1BIT pixels to an XImage.
+ */
+static void put_mono_values_1BIT_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   SETUP_1BIT;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel( img, x[i], YFLIP(xrb, y[i]),
+                        DITHER_1BIT( x[i], y[i], r, g, b ));
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical PF_HPCR pixels to an XImage.
+ */
+static void put_mono_values_HPCR_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   const GLubyte r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+         GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i]);
+         *ptr = DITHER_HPCR( x[i], y[i], r, g, b );
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical 8-bit PF_GRAYSCALE pixels to an XImage.
+ */
+static void put_mono_values_GRAYSCALE8_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   register GLuint i;
+   register GLubyte p = GRAY_RGB(color[RCOMP], color[GCOMP], color[BCOMP]);
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLubyte *ptr = PIXEL_ADDR1(xrb, x[i], y[i]);
+	 *ptr = p;
+      }
+   }
+}
+
+
+/*
+ * Write an array of identical PF_DITHER_5R6G5B pixels to an XImage.
+ */
+static void put_mono_values_DITHER_5R6G5B_ximage( PUT_MONO_VALUES_ARGS )
+{
+   const GLubyte *color = (const GLubyte *) value;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   const int r = color[RCOMP], g = color[GCOMP], b = color[BCOMP];
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 GLushort *ptr = PIXEL_ADDR2(xrb, x[i], y[i] );
+         PACK_TRUEDITHER(*ptr, x[i], y[i], r, g, b);
+      }
+   }
+}
+
+
+
+/**********************************************************************/
+/*** Write INDEX SPAN functions                                     ***/
+/**********************************************************************/
+
+/*
+ * Write a span of CI pixels to a Pixmap.
+ */
+static void put_row_ci_pixmap( PUT_ROW_ARGS )
+{
+   const GLuint *index = (GLuint *) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaSetForeground( dpy, gc, (unsigned long) index[i] );
+            XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+         }
+      }
+   }
+   else {
+      for (i=0;i<n;i++,x++) {
+         XMesaSetForeground( dpy, gc, (unsigned long) index[i] );
+         XMesaDrawPoint( dpy, buffer, gc, (int) x, (int) y );
+      }
+   }
+}
+
+
+/*
+ * Write a span of CI pixels to an XImage.
+ */
+static void put_row_ci_ximage( PUT_ROW_ARGS )
+{
+   const GLuint *index = (const GLuint *) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   y = YFLIP(xrb, y);
+   if (mask) {
+      for (i=0;i<n;i++,x++) {
+         if (mask[i]) {
+            XMesaPutPixel( img, x, y, (unsigned long) index[i] );
+         }
+      }
+   }
+   else {
+      for (i=0;i<n;i++,x++) {
+         XMesaPutPixel( img, x, y, (unsigned long) index[i] );
+      }
+   }
+}
+
+
+/**********************************************************************/
+/*** Write INDEX PIXELS functions                                   ***/
+/**********************************************************************/
+
+/*
+ * Write an array of CI pixels to a Pixmap.
+ */
+static void put_values_ci_pixmap( PUT_VALUES_ARGS )
+{
+   const GLuint *index = (const GLuint *) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaDrawable buffer = xrb->drawable;
+   XMesaGC gc = XMESA_BUFFER(ctx->DrawBuffer)->gc;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaSetForeground( dpy, gc, (unsigned long) index[i] );
+	 XMesaDrawPoint( dpy, buffer, gc, (int) x[i], (int) YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+/*
+ * Write an array of CI pixels to an XImage.
+ */
+static void put_values_ci_ximage( PUT_VALUES_ARGS )
+{
+   const GLuint *index = (const GLuint *) values;
+   GET_XRB(xrb);
+   XMesaImage *img = xrb->ximage;
+   register GLuint i;
+   for (i=0;i<n;i++) {
+      if (mask[i]) {
+	 XMesaPutPixel(img, x[i], YFLIP(xrb, y[i]), (unsigned long) index[i]);
+      }
+   }
+}
+
+
+
+
+/**********************************************************************/
+/*****                      Pixel reading                         *****/
+/**********************************************************************/
+
+#ifndef XFree86Server
+/**
+ * Do clip testing prior to calling XGetImage.  If any of the region lies
+ * outside the screen's bounds, XGetImage will return NULL.
+ * We use XTranslateCoordinates() to check if that's the case and
+ * adjust the x, y and length parameters accordingly.
+ * \return  -1 if span is totally clipped away,
+ *          else return number of pixels to skip in the destination array.
+ */
+static int
+clip_for_xgetimage(GLcontext *ctx, XMesaPixmap pixmap, GLuint *n, GLint *x, GLint *y)
+{
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaBuffer source = XMESA_BUFFER(ctx->DrawBuffer);
+   Window rootWin = RootWindow(xmesa->display, 0);
+   Window child;
+   GLint screenWidth = WidthOfScreen(DefaultScreenOfDisplay(xmesa->display));
+   GLint dx, dy;
+   if (source->type == PBUFFER || source->type == PIXMAP)
+      return 0;
+   XTranslateCoordinates(xmesa->display, pixmap, rootWin,
+                         *x, *y, &dx, &dy, &child);
+   if (dx >= screenWidth) {
+      /* totally clipped on right */
+      return -1;
+   }
+   if (dx < 0) {
+      /* clipped on left */
+      GLint clip = -dx;
+      if (clip >= (GLint) *n)
+         return -1;  /* totally clipped on left */
+      *x += clip;
+      *n -= clip;
+      dx = 0;
+      return clip;
+   }
+   if ((GLint) (dx + *n) > screenWidth) {
+      /* clipped on right */
+      GLint clip = dx + *n - screenWidth;
+      *n -= clip;
+   }
+   return 0;
+}
+#endif
+
+
+/*
+ * Read a horizontal span of color-index pixels.
+ */
+static void
+get_row_ci(GLcontext *ctx, struct gl_renderbuffer *rb,
+           GLuint n, GLint x, GLint y, void *values)
+{
+   GLuint *index = (GLuint *) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   GLuint i;
+
+   y = YFLIP(xrb, y);
+
+   if (xrb->pixmap) {
+#ifndef XFree86Server
+      XMesaImage *span = NULL;
+      int error;
+      int k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y);
+      if (k < 0)
+         return;
+      index += k;
+
+      catch_xgetimage_errors( xmesa->display );
+      span = XGetImage( xmesa->display, xrb->pixmap,
+		        x, y, n, 1, AllPlanes, ZPixmap );
+      error = check_xgetimage_errors();
+      if (span && !error) {
+	 for (i=0;i<n;i++) {
+	    index[i] = (GLuint) XMesaGetPixel( span, i, 0 );
+	 }
+      }
+      else {
+	 /* return 0 pixels */
+	 for (i=0;i<n;i++) {
+	    index[i] = 0;
+	 }
+      }
+      if (span) {
+	 XMesaDestroyImage( span );
+      }
+#else
+      (*xmesa->display->GetImage)(xrb->drawable,
+				  x, y, n, 1, ZPixmap,
+				  ~0L, (pointer)index);
+#endif
+   }
+   else if (xrb->ximage) {
+      XMesaImage *img = xrb->ximage;
+      for (i=0;i<n;i++,x++) {
+	 index[i] = (GLuint) XMesaGetPixel( img, x, y );
+      }
+   }
+}
+
+
+
+/*
+ * Read a horizontal span of color pixels.
+ */
+static void
+get_row_rgba(GLcontext *ctx, struct gl_renderbuffer *rb,
+             GLuint n, GLint x, GLint y, void *values)
+{
+   GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   XMesaBuffer source = XMESA_BUFFER(ctx->DrawBuffer);
+
+   if (xrb->pixmap) {
+      /* Read from Pixmap or Window */
+      XMesaImage *span = NULL;
+      int error;
+#ifdef XFree86Server
+      span = XMesaCreateImage(xmesa->xm_visual->BitsPerPixel, n, 1, NULL);
+      span->data = (char *)MALLOC(span->height * span->bytes_per_line);
+      error = (!span->data);
+      (*xmesa->display->GetImage)(xrb->drawable,
+				  x, YFLIP(xrb, y), n, 1, ZPixmap,
+				  ~0L, (pointer)span->data);
+#else
+      int k;
+      y = YFLIP(xrb, y);
+      k = clip_for_xgetimage(ctx, xrb->pixmap, &n, &x, &y);
+      if (k < 0)
+         return;
+      rgba += k;
+      catch_xgetimage_errors( xmesa->display );
+      span = XGetImage( xmesa->display, xrb->pixmap,
+		        x, y, n, 1, AllPlanes, ZPixmap );
+      error = check_xgetimage_errors();
+#endif
+      if (span && !error) {
+	 switch (xmesa->pixelformat) {
+	    case PF_Truecolor:
+	    case PF_Dither_True:
+               {
+                  const GLubyte *pixelToR = xmesa->xm_visual->PixelToR;
+                  const GLubyte *pixelToG = xmesa->xm_visual->PixelToG;
+                  const GLubyte *pixelToB = xmesa->xm_visual->PixelToB;
+                  unsigned long rMask = GET_REDMASK(xmesa->xm_visual);
+                  unsigned long gMask = GET_GREENMASK(xmesa->xm_visual);
+                  unsigned long bMask = GET_BLUEMASK(xmesa->xm_visual);
+                  GLint rShift = xmesa->xm_visual->rshift;
+                  GLint gShift = xmesa->xm_visual->gshift;
+                  GLint bShift = xmesa->xm_visual->bshift;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     unsigned long p;
+                     p = XMesaGetPixel( span, i, 0 );
+                     rgba[i][RCOMP] = pixelToR[(p & rMask) >> rShift];
+                     rgba[i][GCOMP] = pixelToG[(p & gMask) >> gShift];
+                     rgba[i][BCOMP] = pixelToB[(p & bMask) >> bShift];
+                     rgba[i][ACOMP] = 255;
+                  }
+               }
+	       break;
+            case PF_5R6G5B:
+            case PF_Dither_5R6G5B:
+               {
+                  const GLubyte *pixelToR = xmesa->xm_visual->PixelToR;
+                  const GLubyte *pixelToG = xmesa->xm_visual->PixelToG;
+                  const GLubyte *pixelToB = xmesa->xm_visual->PixelToB;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     unsigned long p = XMesaGetPixel( span, i, 0 );
+                     /* fast, but not quite accurate
+                     rgba[i][RCOMP] = ((p >> 8) & 0xf8);
+                     rgba[i][GCOMP] = ((p >> 3) & 0xfc);
+                     rgba[i][BCOMP] = ((p << 3) & 0xff);
+                     */
+                     rgba[i][RCOMP] = pixelToR[p >> 11];
+                     rgba[i][GCOMP] = pixelToG[(p >> 5) & 0x3f];
+                     rgba[i][BCOMP] = pixelToB[p & 0x1f];
+                     rgba[i][ACOMP] = 255;
+                  }
+               }
+	       break;
+	    case PF_8A8B8G8R:
+               {
+                  const GLuint *ptr4 = (GLuint *) span->data;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     GLuint p4 = *ptr4++;
+                     rgba[i][RCOMP] = (GLubyte) ( p4        & 0xff);
+                     rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+                     rgba[i][BCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+                     rgba[i][ACOMP] = (GLubyte) ((p4 >> 24) & 0xff);
+                  }
+	       }
+	       break;
+            case PF_8A8R8G8B:
+               {
+                  const GLuint *ptr4 = (GLuint *) span->data;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     GLuint p4 = *ptr4++;
+                     rgba[i][RCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+                     rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+                     rgba[i][BCOMP] = (GLubyte) ( p4        & 0xff);
+                     rgba[i][ACOMP] = (GLubyte) ((p4 >> 24) & 0xff);
+                  }
+	       }
+	       break;
+            case PF_8R8G8B:
+               {
+                  const GLuint *ptr4 = (GLuint *) span->data;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     GLuint p4 = *ptr4++;
+                     rgba[i][RCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+                     rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+                     rgba[i][BCOMP] = (GLubyte) ( p4        & 0xff);
+                     rgba[i][ACOMP] = 255;
+                  }
+	       }
+	       break;
+            case PF_8R8G8B24:
+               {
+                  const bgr_t *ptr3 = (bgr_t *) span->data;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     rgba[i][RCOMP] = ptr3[i].r;
+                     rgba[i][GCOMP] = ptr3[i].g;
+                     rgba[i][BCOMP] = ptr3[i].b;
+                     rgba[i][ACOMP] = 255;
+                  }
+	       }
+	       break;
+            case PF_HPCR:
+               {
+                  GLubyte *ptr1 = (GLubyte *) span->data;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     GLubyte p = *ptr1++;
+                     rgba[i][RCOMP] =  p & 0xE0;
+                     rgba[i][GCOMP] = (p & 0x1C) << 3;
+                     rgba[i][BCOMP] = (p & 0x03) << 6;
+                     rgba[i][ACOMP] = 255;
+                  }
+               }
+               break;
+	    case PF_Dither:
+	    case PF_Lookup:
+	    case PF_Grayscale:
+               {
+                  GLubyte *rTable = source->pixel_to_r;
+                  GLubyte *gTable = source->pixel_to_g;
+                  GLubyte *bTable = source->pixel_to_b;
+                  if (GET_VISUAL_DEPTH(xmesa->xm_visual)==8) {
+                     const GLubyte *ptr1 = (GLubyte *) span->data;
+                     GLuint i;
+                     for (i=0;i<n;i++) {
+                        unsigned long p = *ptr1++;
+                        rgba[i][RCOMP] = rTable[p];
+                        rgba[i][GCOMP] = gTable[p];
+                        rgba[i][BCOMP] = bTable[p];
+                        rgba[i][ACOMP] = 255;
+                     }
+                  }
+                  else {
+                     GLuint i;
+                     for (i=0;i<n;i++) {
+                        unsigned long p = XMesaGetPixel( span, i, 0 );
+                        rgba[i][RCOMP] = rTable[p];
+                        rgba[i][GCOMP] = gTable[p];
+                        rgba[i][BCOMP] = bTable[p];
+                        rgba[i][ACOMP] = 255;
+                     }
+                  }
+               }
+	       break;
+	    case PF_1Bit:
+               {
+                  int bitFlip = xmesa->xm_visual->bitFlip;
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     unsigned long p;
+                     p = XMesaGetPixel( span, i, 0 ) ^ bitFlip;
+                     rgba[i][RCOMP] = (GLubyte) (p * 255);
+                     rgba[i][GCOMP] = (GLubyte) (p * 255);
+                     rgba[i][BCOMP] = (GLubyte) (p * 255);
+                     rgba[i][ACOMP] = 255;
+                  }
+               }
+	       break;
+	    default:
+	       _mesa_problem(NULL,"Problem in DD.read_color_span (1)");
+               return;
+	 }
+      }
+      else {
+	 /* return black pixels */
+         GLuint i;
+	 for (i=0;i<n;i++) {
+	    rgba[i][RCOMP] = rgba[i][GCOMP] = rgba[i][BCOMP] = rgba[i][ACOMP] = 0;
+	 }
+      }
+      if (span) {
+	 XMesaDestroyImage( span );
+      }
+   }
+   else if (xrb->ximage) {
+      /* Read from XImage back buffer */
+      switch (xmesa->pixelformat) {
+         case PF_Truecolor:
+         case PF_Dither_True:
+            {
+               const GLubyte *pixelToR = xmesa->xm_visual->PixelToR;
+               const GLubyte *pixelToG = xmesa->xm_visual->PixelToG;
+               const GLubyte *pixelToB = xmesa->xm_visual->PixelToB;
+               unsigned long rMask = GET_REDMASK(xmesa->xm_visual);
+               unsigned long gMask = GET_GREENMASK(xmesa->xm_visual);
+               unsigned long bMask = GET_BLUEMASK(xmesa->xm_visual);
+               GLint rShift = xmesa->xm_visual->rshift;
+               GLint gShift = xmesa->xm_visual->gshift;
+               GLint bShift = xmesa->xm_visual->bshift;
+               XMesaImage *img = xrb->ximage;
+               GLuint i;
+               y = YFLIP(xrb, y);
+               for (i=0;i<n;i++) {
+                  unsigned long p;
+		  p = XMesaGetPixel( img, x+i, y );
+                  rgba[i][RCOMP] = pixelToR[(p & rMask) >> rShift];
+                  rgba[i][GCOMP] = pixelToG[(p & gMask) >> gShift];
+                  rgba[i][BCOMP] = pixelToB[(p & bMask) >> bShift];
+                  rgba[i][ACOMP] = 255;
+               }
+            }
+            break;
+         case PF_5R6G5B:
+         case PF_Dither_5R6G5B:
+            {
+               const GLubyte *pixelToR = xmesa->xm_visual->PixelToR;
+               const GLubyte *pixelToG = xmesa->xm_visual->PixelToG;
+               const GLubyte *pixelToB = xmesa->xm_visual->PixelToB;
+               const GLushort *ptr2 = PIXEL_ADDR2(xrb, x, y);
+               GLuint i;
+#if defined(__i386__) /* word stores don't have to be on 4-byte boundaries */
+               const GLuint *ptr4 = (const GLuint *) ptr2;
+               GLuint extraPixel = (n & 1);
+               n -= extraPixel;
+               for (i = 0; i < n; i += 2) {
+                  const GLuint p = *ptr4++;
+                  const GLuint p0 = p & 0xffff;
+                  const GLuint p1 = p >> 16;
+                  /* fast, but not quite accurate
+                  rgba[i][RCOMP] = ((p >> 8) & 0xf8);
+                  rgba[i][GCOMP] = ((p >> 3) & 0xfc);
+                  rgba[i][BCOMP] = ((p << 3) & 0xff);
+                  */
+                  rgba[i][RCOMP] = pixelToR[p0 >> 11];
+                  rgba[i][GCOMP] = pixelToG[(p0 >> 5) & 0x3f];
+                  rgba[i][BCOMP] = pixelToB[p0 & 0x1f];
+                  rgba[i][ACOMP] = 255;
+                  rgba[i+1][RCOMP] = pixelToR[p1 >> 11];
+                  rgba[i+1][GCOMP] = pixelToG[(p1 >> 5) & 0x3f];
+                  rgba[i+1][BCOMP] = pixelToB[p1 & 0x1f];
+                  rgba[i+1][ACOMP] = 255;
+               }
+               if (extraPixel) {
+                  GLushort p = ptr2[n];
+                  rgba[n][RCOMP] = pixelToR[p >> 11];
+                  rgba[n][GCOMP] = pixelToG[(p >> 5) & 0x3f];
+                  rgba[n][BCOMP] = pixelToB[p & 0x1f];
+                  rgba[n][ACOMP] = 255;
+               }
+#else
+               for (i = 0; i < n; i++) {
+                  const GLushort p = ptr2[i];
+                  rgba[i][RCOMP] = pixelToR[p >> 11];
+                  rgba[i][GCOMP] = pixelToG[(p >> 5) & 0x3f];
+                  rgba[i][BCOMP] = pixelToB[p & 0x1f];
+                  rgba[i][ACOMP] = 255;
+               }
+#endif
+            }
+            break;
+	 case PF_8A8B8G8R:
+            {
+               const GLuint *ptr4 = PIXEL_ADDR4(xrb, x, y);
+               GLuint i;
+               for (i=0;i<n;i++) {
+                  GLuint p4 = *ptr4++;
+                  rgba[i][RCOMP] = (GLubyte) ( p4        & 0xff);
+                  rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+                  rgba[i][BCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+                  rgba[i][ACOMP] = (GLint)   ((p4 >> 24) & 0xff);
+               }
+            }
+	    break;
+	 case PF_8A8R8G8B:
+            {
+               const GLuint *ptr4 = PIXEL_ADDR4(xrb, x, y);
+               GLuint i;
+               for (i=0;i<n;i++) {
+                  GLuint p4 = *ptr4++;
+                  rgba[i][RCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+                  rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+                  rgba[i][BCOMP] = (GLubyte) ( p4        & 0xff);
+                  rgba[i][ACOMP] = (GLint)   ((p4 >> 24) & 0xff);
+               }
+            }
+	    break;
+	 case PF_8R8G8B:
+            {
+               const GLuint *ptr4 = PIXEL_ADDR4(xrb, x, y);
+               GLuint i;
+               for (i=0;i<n;i++) {
+                  GLuint p4 = *ptr4++;
+                  rgba[i][RCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+                  rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+                  rgba[i][BCOMP] = (GLubyte) ( p4        & 0xff);
+                  rgba[i][ACOMP] = 255;
+               }
+            }
+	    break;
+	 case PF_8R8G8B24:
+            {
+               const bgr_t *ptr3 = PIXEL_ADDR3(xrb, x, y);
+               GLuint i;
+               for (i=0;i<n;i++) {
+                  rgba[i][RCOMP] = ptr3[i].r;
+                  rgba[i][GCOMP] = ptr3[i].g;
+                  rgba[i][BCOMP] = ptr3[i].b;
+                  rgba[i][ACOMP] = 255;
+               }
+            }
+	    break;
+         case PF_HPCR:
+            {
+               const GLubyte *ptr1 = PIXEL_ADDR1(xrb, x, y);
+               GLuint i;
+               for (i=0;i<n;i++) {
+                  GLubyte p = *ptr1++;
+                  rgba[i][RCOMP] =  p & 0xE0;
+                  rgba[i][GCOMP] = (p & 0x1C) << 3;
+                  rgba[i][BCOMP] = (p & 0x03) << 6;
+                  rgba[i][ACOMP] = 255;
+               }
+            }
+            break;
+	 case PF_Dither:
+	 case PF_Lookup:
+	 case PF_Grayscale:
+            {
+               const GLubyte *rTable = source->pixel_to_r;
+               const GLubyte *gTable = source->pixel_to_g;
+               const GLubyte *bTable = source->pixel_to_b;
+               if (GET_VISUAL_DEPTH(xmesa->xm_visual)==8) {
+                  GLubyte *ptr1 = PIXEL_ADDR1(xrb, x, y);
+                  GLuint i;
+                  for (i=0;i<n;i++) {
+                     unsigned long p = *ptr1++;
+                     rgba[i][RCOMP] = rTable[p];
+                     rgba[i][GCOMP] = gTable[p];
+                     rgba[i][BCOMP] = bTable[p];
+                     rgba[i][ACOMP] = 255;
+                  }
+               }
+               else {
+                  XMesaImage *img = xrb->ximage;
+                  GLuint i;
+                  y = YFLIP(xrb, y);
+                  for (i=0;i<n;i++,x++) {
+                     unsigned long p = XMesaGetPixel( img, x, y );
+                     rgba[i][RCOMP] = rTable[p];
+                     rgba[i][GCOMP] = gTable[p];
+                     rgba[i][BCOMP] = bTable[p];
+                     rgba[i][ACOMP] = 255;
+                  }
+               }
+            }
+	    break;
+	 case PF_1Bit:
+            {
+               XMesaImage *img = xrb->ximage;
+               int bitFlip = xmesa->xm_visual->bitFlip;
+               GLuint i;
+               y = YFLIP(xrb, y);
+               for (i=0;i<n;i++,x++) {
+                  unsigned long p;
+		  p = XMesaGetPixel( img, x, y ) ^ bitFlip;
+                  rgba[i][RCOMP] = (GLubyte) (p * 255);
+                  rgba[i][GCOMP] = (GLubyte) (p * 255);
+                  rgba[i][BCOMP] = (GLubyte) (p * 255);
+                  rgba[i][ACOMP] = 255;
+               }
+	    }
+	    break;
+	 default:
+	    _mesa_problem(NULL,"Problem in DD.read_color_span (2)");
+            return;
+      }
+   }
+}
+
+
+
+/*
+ * Read an array of color index pixels.
+ */
+static void
+get_values_ci(GLcontext *ctx, struct gl_renderbuffer *rb,
+              GLuint n, const GLint x[], const GLint y[], void *values)
+{
+   GLuint *indx = (GLuint *) values;
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   GET_XRB(xrb);
+   GLuint i;
+   if (xrb->pixmap) {
+      for (i=0;i<n;i++) {
+         indx[i] = (GLuint) read_pixel( xmesa->display, xrb->drawable,
+                                        x[i], YFLIP(xrb, y[i]) );
+      }
+   }
+   else if (xrb->ximage) {
+      XMesaImage *img = xrb->ximage;
+      for (i=0;i<n;i++) {
+         indx[i] = (GLuint) XMesaGetPixel( img, x[i], YFLIP(xrb, y[i]) );
+      }
+   }
+}
+
+
+
+static void
+get_values_rgba(GLcontext *ctx, struct gl_renderbuffer *rb,
+                GLuint n, const GLint x[], const GLint y[], void *values)
+{
+   GLubyte (*rgba)[4] = (GLubyte (*)[4]) values;
+   GET_XRB(xrb);
+   const XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaDisplay *dpy = xmesa->xm_visual->display;
+   XMesaBuffer source = XMESA_BUFFER(ctx->DrawBuffer);
+   register GLuint i;
+
+   if (xrb->pixmap) {
+      XMesaDrawable buffer = xrb->drawable;
+      switch (xmesa->pixelformat) {
+	 case PF_Truecolor:
+         case PF_Dither_True:
+         case PF_5R6G5B:
+         case PF_Dither_5R6G5B:
+            {
+               unsigned long rMask = GET_REDMASK(xmesa->xm_visual);
+               unsigned long gMask = GET_GREENMASK(xmesa->xm_visual);
+               unsigned long bMask = GET_BLUEMASK(xmesa->xm_visual);
+               GLubyte *pixelToR = xmesa->xm_visual->PixelToR;
+               GLubyte *pixelToG = xmesa->xm_visual->PixelToG;
+               GLubyte *pixelToB = xmesa->xm_visual->PixelToB;
+               GLint rShift = xmesa->xm_visual->rshift;
+               GLint gShift = xmesa->xm_visual->gshift;
+               GLint bShift = xmesa->xm_visual->bshift;
+               for (i=0;i<n;i++) {
+                  unsigned long p = read_pixel( dpy, buffer,
+                                                x[i], YFLIP(xrb, y[i]) );
+                  rgba[i][RCOMP] = pixelToR[(p & rMask) >> rShift];
+                  rgba[i][GCOMP] = pixelToG[(p & gMask) >> gShift];
+                  rgba[i][BCOMP] = pixelToB[(p & bMask) >> bShift];
+                  rgba[i][ACOMP] = 255;
+               }
+            }
+            break;
+	 case PF_8A8B8G8R:
+	    for (i=0;i<n;i++) {
+               unsigned long p = read_pixel( dpy, buffer,
+                                             x[i], YFLIP(xrb, y[i]) );
+               rgba[i][RCOMP] = (GLubyte) ( p        & 0xff);
+               rgba[i][GCOMP] = (GLubyte) ((p >> 8)  & 0xff);
+               rgba[i][BCOMP] = (GLubyte) ((p >> 16) & 0xff);
+               rgba[i][ACOMP] = (GLubyte) ((p >> 24) & 0xff);
+	    }
+	    break;
+	 case PF_8A8R8G8B:
+	    for (i=0;i<n;i++) {
+               unsigned long p = read_pixel( dpy, buffer,
+                                             x[i], YFLIP(xrb, y[i]) );
+               rgba[i][RCOMP] = (GLubyte) ((p >> 16) & 0xff);
+               rgba[i][GCOMP] = (GLubyte) ((p >> 8)  & 0xff);
+               rgba[i][BCOMP] = (GLubyte) ( p        & 0xff);
+               rgba[i][ACOMP] = (GLubyte) ((p >> 24) & 0xff);
+	    }
+	    break;
+	 case PF_8R8G8B:
+	    for (i=0;i<n;i++) {
+               unsigned long p = read_pixel( dpy, buffer,
+                                             x[i], YFLIP(xrb, y[i]) );
+               rgba[i][RCOMP] = (GLubyte) ((p >> 16) & 0xff);
+               rgba[i][GCOMP] = (GLubyte) ((p >> 8)  & 0xff);
+               rgba[i][BCOMP] = (GLubyte) ( p        & 0xff);
+               rgba[i][ACOMP] = 255;
+	    }
+	    break;
+	 case PF_8R8G8B24:
+	    for (i=0;i<n;i++) {
+               unsigned long p = read_pixel( dpy, buffer,
+                                             x[i], YFLIP(xrb, y[i]) );
+               rgba[i][RCOMP] = (GLubyte) ((p >> 16) & 0xff);
+               rgba[i][GCOMP] = (GLubyte) ((p >> 8)  & 0xff);
+               rgba[i][BCOMP] = (GLubyte) ( p        & 0xff);
+               rgba[i][ACOMP] = 255;
+	    }
+	    break;
+         case PF_HPCR:
+            for (i=0;i<n;i++) {
+               unsigned long p = read_pixel( dpy, buffer,
+                                             x[i], YFLIP(xrb, y[i]) );
+               rgba[i][RCOMP] = (GLubyte) ( p & 0xE0      );
+               rgba[i][GCOMP] = (GLubyte) ((p & 0x1C) << 3);
+                  rgba[i][BCOMP] = (GLubyte) ((p & 0x03) << 6);
+                  rgba[i][ACOMP] = (GLubyte) 255;
+            }
+            break;
+	 case PF_Dither:
+	 case PF_Lookup:
+	 case PF_Grayscale:
+            {
+               GLubyte *rTable = source->pixel_to_r;
+               GLubyte *gTable = source->pixel_to_g;
+               GLubyte *bTable = source->pixel_to_b;
+               for (i=0;i<n;i++) {
+                  unsigned long p = read_pixel( dpy, buffer,
+                                                x[i], YFLIP(xrb, y[i]) );
+                  rgba[i][RCOMP] = rTable[p];
+                  rgba[i][GCOMP] = gTable[p];
+                  rgba[i][BCOMP] = bTable[p];
+                  rgba[i][ACOMP] = 255;
+               }
+	    }
+	    break;
+	 case PF_1Bit:
+            {
+               int bitFlip = xmesa->xm_visual->bitFlip;
+               for (i=0;i<n;i++) {
+                  unsigned long p = read_pixel( dpy, buffer,
+                                           x[i], YFLIP(xrb, y[i])) ^ bitFlip;
+                  rgba[i][RCOMP] = (GLubyte) (p * 255);
+                  rgba[i][GCOMP] = (GLubyte) (p * 255);
+                  rgba[i][BCOMP] = (GLubyte) (p * 255);
+                  rgba[i][ACOMP] = 255;
+               }
+	    }
+	    break;
+	 default:
+	    _mesa_problem(NULL,"Problem in DD.read_color_pixels (1)");
+            return;
+      }
+   }
+   else if (xrb->ximage) {
+      /* Read from XImage back buffer */
+      switch (xmesa->pixelformat) {
+	 case PF_Truecolor:
+         case PF_Dither_True:
+         case PF_5R6G5B:
+         case PF_Dither_5R6G5B:
+            {
+               unsigned long rMask = GET_REDMASK(xmesa->xm_visual);
+               unsigned long gMask = GET_GREENMASK(xmesa->xm_visual);
+               unsigned long bMask = GET_BLUEMASK(xmesa->xm_visual);
+               GLubyte *pixelToR = xmesa->xm_visual->PixelToR;
+               GLubyte *pixelToG = xmesa->xm_visual->PixelToG;
+               GLubyte *pixelToB = xmesa->xm_visual->PixelToB;
+               GLint rShift = xmesa->xm_visual->rshift;
+               GLint gShift = xmesa->xm_visual->gshift;
+               GLint bShift = xmesa->xm_visual->bshift;
+               XMesaImage *img = xrb->ximage;
+               for (i=0;i<n;i++) {
+                  unsigned long p;
+                  p = XMesaGetPixel( img, x[i], YFLIP(xrb, y[i]) );
+                  rgba[i][RCOMP] = pixelToR[(p & rMask) >> rShift];
+                  rgba[i][GCOMP] = pixelToG[(p & gMask) >> gShift];
+                  rgba[i][BCOMP] = pixelToB[(p & bMask) >> bShift];
+                  rgba[i][ACOMP] = 255;
+               }
+            }
+            break;
+	 case PF_8A8B8G8R:
+	    for (i=0;i<n;i++) {
+               GLuint *ptr4 = PIXEL_ADDR4(xrb, x[i], y[i]);
+               GLuint p4 = *ptr4;
+               rgba[i][RCOMP] = (GLubyte) ( p4        & 0xff);
+               rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+               rgba[i][BCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+               rgba[i][ACOMP] = (GLubyte) ((p4 >> 24) & 0xff);
+	    }
+	    break;
+	 case PF_8A8R8G8B:
+	    for (i=0;i<n;i++) {
+               GLuint *ptr4 = PIXEL_ADDR4(xrb, x[i], y[i]);
+               GLuint p4 = *ptr4;
+               rgba[i][RCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+               rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+               rgba[i][BCOMP] = (GLubyte) ( p4        & 0xff);
+               rgba[i][ACOMP] = (GLubyte) ((p4 >> 24) & 0xff);
+	    }
+	    break;
+	 case PF_8R8G8B:
+	    for (i=0;i<n;i++) {
+               GLuint *ptr4 = PIXEL_ADDR4(xrb, x[i], y[i]);
+               GLuint p4 = *ptr4;
+               rgba[i][RCOMP] = (GLubyte) ((p4 >> 16) & 0xff);
+               rgba[i][GCOMP] = (GLubyte) ((p4 >> 8)  & 0xff);
+               rgba[i][BCOMP] = (GLubyte) ( p4        & 0xff);
+               rgba[i][ACOMP] = 255;
+	    }
+	    break;
+	 case PF_8R8G8B24:
+	    for (i=0;i<n;i++) {
+               bgr_t *ptr3 = PIXEL_ADDR3(xrb, x[i], y[i]);
+               rgba[i][RCOMP] = ptr3->r;
+               rgba[i][GCOMP] = ptr3->g;
+               rgba[i][BCOMP] = ptr3->b;
+               rgba[i][ACOMP] = 255;
+	    }
+	    break;
+         case PF_HPCR:
+            for (i=0;i<n;i++) {
+               GLubyte *ptr1 = PIXEL_ADDR1(xrb, x[i], y[i]);
+               GLubyte p = *ptr1;
+               rgba[i][RCOMP] =  p & 0xE0;
+               rgba[i][GCOMP] = (p & 0x1C) << 3;
+               rgba[i][BCOMP] = (p & 0x03) << 6;
+               rgba[i][ACOMP] = 255;
+            }
+            break;
+	 case PF_Dither:
+	 case PF_Lookup:
+	 case PF_Grayscale:
+            {
+               GLubyte *rTable = source->pixel_to_r;
+               GLubyte *gTable = source->pixel_to_g;
+               GLubyte *bTable = source->pixel_to_b;
+               XMesaImage *img = xrb->ximage;
+               for (i=0;i<n;i++) {
+                  unsigned long p;
+                  p = XMesaGetPixel( img, x[i], YFLIP(xrb, y[i]) );
+                  rgba[i][RCOMP] = rTable[p];
+                  rgba[i][GCOMP] = gTable[p];
+                  rgba[i][BCOMP] = bTable[p];
+                  rgba[i][ACOMP] = 255;
+               }
+	    }
+	    break;
+	 case PF_1Bit:
+            {
+               XMesaImage *img = xrb->ximage;
+               int bitFlip = xmesa->xm_visual->bitFlip;
+               for (i=0;i<n;i++) {
+                  unsigned long p;
+                  p = XMesaGetPixel( img, x[i], YFLIP(xrb, y[i]) ) ^ bitFlip;
+                  rgba[i][RCOMP] = (GLubyte) (p * 255);
+                  rgba[i][GCOMP] = (GLubyte) (p * 255);
+                  rgba[i][BCOMP] = (GLubyte) (p * 255);
+                  rgba[i][ACOMP] = 255;
+               }
+	    }
+	    break;
+	 default:
+	    _mesa_problem(NULL,"Problem in DD.read_color_pixels (1)");
+            return;
+      }
+   }
+}
+
+
+/**
+ * Initialize the renderbuffer's PutRow, GetRow, etc. functions.
+ * This would generally only need to be called once when the renderbuffer
+ * is created.  However, we can change pixel formats on the fly if dithering
+ * is enabled/disabled.  Therefore, we may call this more often than that.
+ */
+void
+xmesa_set_renderbuffer_funcs(struct xmesa_renderbuffer *xrb,
+                             enum pixel_format pixelformat, GLint depth)
+{
+   const GLboolean pixmap = xrb->pixmap ? GL_TRUE : GL_FALSE;
+
+   switch (pixelformat) {
+   case PF_Index:
+      ASSERT(xrb->Base.DataType == GL_UNSIGNED_INT);
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_ci_pixmap;
+         xrb->Base.PutRowRGB     = NULL;
+         xrb->Base.PutMonoRow    = put_mono_row_ci_pixmap;
+         xrb->Base.PutValues     = put_values_ci_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_ci_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_ci_ximage;
+         xrb->Base.PutRowRGB     = NULL;
+         xrb->Base.PutMonoRow    = put_mono_row_ci_ximage;
+         xrb->Base.PutValues     = put_values_ci_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_ci_ximage;
+      }
+      break;
+   case PF_Truecolor:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_TRUECOLOR_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_TRUECOLOR_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_TRUECOLOR_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_TRUECOLOR_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_TRUECOLOR_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_ximage;
+         xrb->Base.PutValues     = put_values_TRUECOLOR_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_ximage;
+      }
+      break;
+   case PF_Dither_True:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_TRUEDITHER_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_TRUEDITHER_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_TRUEDITHER_pixmap;
+         xrb->Base.PutValues     = put_values_TRUEDITHER_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_TRUEDITHER_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_TRUEDITHER_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_TRUEDITHER_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_TRUEDITHER_ximage;
+         xrb->Base.PutValues     = put_values_TRUEDITHER_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_TRUEDITHER_ximage;
+      }
+      break;
+   case PF_8A8B8G8R:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_8A8B8G8R_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_8A8B8G8R_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_8A8B8G8R_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_8A8B8G8R_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_8A8B8G8R_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_8A8B8G8R_ximage;
+         xrb->Base.PutValues     = put_values_8A8B8G8R_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_8A8B8G8R_ximage;
+      }
+      break;
+   case PF_8A8R8G8B:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_8A8R8G8B_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_8A8R8G8B_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_8A8R8G8B_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_8A8R8G8B_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_8A8R8G8B_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_8A8R8G8B_ximage;
+         xrb->Base.PutValues     = put_values_8A8R8G8B_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_8A8R8G8B_ximage;
+      }
+      break;
+   case PF_8R8G8B:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_8R8G8B_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_8R8G8B_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_8R8G8B_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_8R8G8B_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_8R8G8B_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_8R8G8B_ximage;
+         xrb->Base.PutValues     = put_values_8R8G8B_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_8R8G8B_ximage;
+      }
+      break;
+   case PF_8R8G8B24:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_8R8G8B24_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_8R8G8B24_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_8R8G8B24_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_8R8G8B24_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_8R8G8B24_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_8R8G8B24_ximage;
+         xrb->Base.PutValues     = put_values_8R8G8B24_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_8R8G8B24_ximage;
+      }
+      break;
+   case PF_5R6G5B:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_5R6G5B_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_5R6G5B_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_5R6G5B_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_5R6G5B_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_5R6G5B_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_ximage;
+         xrb->Base.PutValues     = put_values_5R6G5B_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_ximage;
+      }
+      break;
+   case PF_Dither_5R6G5B:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_DITHER_5R6G5B_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_DITHER_5R6G5B_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_TRUEDITHER_pixmap;
+         xrb->Base.PutValues     = put_values_DITHER_5R6G5B_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_TRUEDITHER_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_DITHER_5R6G5B_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_DITHER_5R6G5B_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_DITHER_5R6G5B_ximage;
+         xrb->Base.PutValues     = put_values_DITHER_5R6G5B_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_DITHER_5R6G5B_ximage;
+      }
+      break;
+   case PF_Dither:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_DITHER_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_DITHER_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_DITHER_pixmap;
+         xrb->Base.PutValues     = put_values_DITHER_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_DITHER_pixmap;
+      }
+      else {
+         if (depth == 8) {
+            xrb->Base.PutRow        = put_row_DITHER8_ximage;
+            xrb->Base.PutRowRGB     = put_row_rgb_DITHER8_ximage;
+            xrb->Base.PutMonoRow    = put_mono_row_DITHER8_ximage;
+            xrb->Base.PutValues     = put_values_DITHER8_ximage;
+            xrb->Base.PutMonoValues = put_mono_values_DITHER8_ximage;
+         }
+         else {
+            xrb->Base.PutRow        = put_row_DITHER_ximage;
+            xrb->Base.PutRowRGB     = put_row_rgb_DITHER_ximage;
+            xrb->Base.PutMonoRow    = put_mono_row_DITHER_ximage;
+            xrb->Base.PutValues     = put_values_DITHER_ximage;
+            xrb->Base.PutMonoValues = put_mono_values_DITHER_ximage;
+         }
+      }
+      break;
+   case PF_1Bit:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_1BIT_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_1BIT_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_1BIT_pixmap;
+         xrb->Base.PutValues     = put_values_1BIT_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_1BIT_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_1BIT_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_1BIT_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_1BIT_ximage;
+         xrb->Base.PutValues     = put_values_1BIT_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_1BIT_ximage;
+      }
+      break;
+   case PF_HPCR:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_HPCR_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_HPCR_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_HPCR_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         xrb->Base.PutRow        = put_row_HPCR_ximage;
+         xrb->Base.PutRowRGB     = put_row_rgb_HPCR_ximage;
+         xrb->Base.PutMonoRow    = put_mono_row_HPCR_ximage;
+         xrb->Base.PutValues     = put_values_HPCR_ximage;
+         xrb->Base.PutMonoValues = put_mono_values_HPCR_ximage;
+      }
+      break;
+   case PF_Lookup:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_LOOKUP_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_LOOKUP_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_LOOKUP_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         if (depth==8) {
+            xrb->Base.PutRow        = put_row_LOOKUP8_ximage;
+            xrb->Base.PutRowRGB     = put_row_rgb_LOOKUP8_ximage;
+            xrb->Base.PutMonoRow    = put_mono_row_LOOKUP8_ximage;
+            xrb->Base.PutValues     = put_values_LOOKUP8_ximage;
+            xrb->Base.PutMonoValues = put_mono_values_LOOKUP8_ximage;
+         }
+         else {
+            xrb->Base.PutRow        = put_row_LOOKUP_ximage;
+            xrb->Base.PutRowRGB     = put_row_rgb_LOOKUP_ximage;
+            xrb->Base.PutMonoRow    = put_mono_row_ximage;
+            xrb->Base.PutValues     = put_values_LOOKUP_ximage;
+            xrb->Base.PutMonoValues = put_mono_values_ximage;
+         }
+      }
+      break;
+   case PF_Grayscale:
+      if (pixmap) {
+         xrb->Base.PutRow        = put_row_GRAYSCALE_pixmap;
+         xrb->Base.PutRowRGB     = put_row_rgb_GRAYSCALE_pixmap;
+         xrb->Base.PutMonoRow    = put_mono_row_pixmap;
+         xrb->Base.PutValues     = put_values_GRAYSCALE_pixmap;
+         xrb->Base.PutMonoValues = put_mono_values_pixmap;
+      }
+      else {
+         if (depth == 8) {
+            xrb->Base.PutRow        = put_row_GRAYSCALE8_ximage;
+            xrb->Base.PutRowRGB     = put_row_rgb_GRAYSCALE8_ximage;
+            xrb->Base.PutMonoRow    = put_mono_row_GRAYSCALE8_ximage;
+            xrb->Base.PutValues     = put_values_GRAYSCALE8_ximage;
+            xrb->Base.PutMonoValues = put_mono_values_GRAYSCALE8_ximage;
+         }
+         else {
+            xrb->Base.PutRow        = put_row_GRAYSCALE_ximage;
+            xrb->Base.PutRowRGB     = put_row_rgb_GRAYSCALE_ximage;
+            xrb->Base.PutMonoRow    = put_mono_row_ximage;
+            xrb->Base.PutValues     = put_values_GRAYSCALE_ximage;
+            xrb->Base.PutMonoValues = put_mono_values_ximage;
+         }
+      }
+      break;
+   default:
+      _mesa_problem(NULL, "Bad pixel format in xmesa_update_state (1)");
+      return;
+   }
+
+
+   /* Get functions */
+   if (pixelformat == PF_Index) {
+      xrb->Base.GetRow = get_row_ci;
+      xrb->Base.GetValues = get_values_ci;
+   }
+   else {
+      xrb->Base.GetRow = get_row_rgba;
+      xrb->Base.GetValues = get_values_rgba;
+   }
+}
+
diff --git a/src/mesa/drivers/x11/xm_tri.c b/src/mesa/drivers/x11/xm_tri.c
new file mode 100644
index 0000000000..a6efb35e3c
--- /dev/null
+++ b/src/mesa/drivers/x11/xm_tri.c
@@ -0,0 +1,1654 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  6.5
+ *
+ * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * This file contains "accelerated" triangle functions.  It should be
+ * fairly easy to write new special-purpose triangle functions and hook
+ * them into this module.
+ */
+
+
+#include "main/depth.h"
+#include "main/macros.h"
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "glxheader.h"
+#include "xmesaP.h"
+
+/* Internal swrast includes:
+ */
+#include "swrast/s_context.h"
+#include "swrast/s_depth.h"
+#include "swrast/s_triangle.h"
+
+
+#define GET_XRB(XRB)  struct xmesa_renderbuffer *XRB = \
+   xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped)
+
+
+/**********************************************************************/
+/***                   Triangle rendering                           ***/
+/**********************************************************************/
+
+
+#if CHAN_BITS == 8
+
+/*
+ * XImage, smooth, depth-buffered, PF_TRUECOLOR triangle.
+ */
+#define NAME smooth_TRUECOLOR_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);
+
+#define RENDER_SPAN( span ) {					\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         unsigned long p;					\
+         PACK_TRUECOLOR(p, FixedToInt(span.red),		\
+            FixedToInt(span.green), FixedToInt(span.blue));	\
+         XMesaPutPixel(xrb->ximage, x, y, p);			\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+
+#include "swrast/s_tritemp.h"
+
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_8A8B8G8R triangle.
+ */
+#define NAME smooth_8A8B8G8R_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         pRow[i] = PACK_8A8B8G8R(FixedToInt(span.red),		\
+            FixedToInt(span.green), FixedToInt(span.blue),	\
+            FixedToInt(span.alpha));				\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.alpha += span.alphaStep;				\
+      span.z += span.zStep;					\
+   } }
+
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_8A8R8G8B triangle.
+ */
+#define NAME smooth_8A8R8G8B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         pRow[i] = PACK_8A8R8G8B(FixedToInt(span.red),		\
+            FixedToInt(span.green), FixedToInt(span.blue),	\
+            FixedToInt(span.alpha));				\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.alpha += span.alphaStep;				\
+      span.z += span.zStep;					\
+   } }
+
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_8R8G8B triangle.
+ */
+#define NAME smooth_8R8G8B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         pRow[i] = PACK_8R8G8B(FixedToInt(span.red),		\
+            FixedToInt(span.green), FixedToInt(span.blue));	\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_8R8G8B24 triangle.
+ */
+#define NAME smooth_8R8G8B24_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR3(xrb, X, Y)
+#define PIXEL_TYPE bgr_t
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+	 PIXEL_TYPE *ptr = pRow + i;				\
+         ptr->r = FixedToInt(span.red);				\
+         ptr->g = FixedToInt(span.green);			\
+         ptr->b = FixedToInt(span.blue);			\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_TRUEDITHER triangle.
+ */
+#define NAME smooth_TRUEDITHER_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         unsigned long p;					\
+         PACK_TRUEDITHER(p, x, y, FixedToInt(span.red),		\
+            FixedToInt(span.green), FixedToInt(span.blue));	\
+         XMesaPutPixel(xrb->ximage, x, y, p);			\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_5R6G5B triangle.
+ */
+#define NAME smooth_5R6G5B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         pRow[i] = PACK_5R6G5B(FixedToInt(span.red),		\
+            FixedToInt(span.green), FixedToInt(span.blue));	\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_DITHER_5R6G5B triangle.
+ */
+#define NAME smooth_DITHER_5R6G5B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         PACK_TRUEDITHER(pRow[i], x, y, FixedToInt(span.red),	\
+            FixedToInt(span.green), FixedToInt(span.blue));	\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, 8-bit, PF_DITHER8 triangle.
+ */
+#define NAME smooth_DITHER8_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   XDITHER_SETUP(y);						\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         pRow[i] = (PIXEL_TYPE) XDITHER(x, FixedToInt(span.red),\
+            FixedToInt(span.green), FixedToInt(span.blue) );	\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, PF_DITHER triangle.
+ */
+#define NAME smooth_DITHER_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   XDITHER_SETUP(y);						\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         unsigned long p = XDITHER(x, FixedToInt(span.red),	\
+            FixedToInt(span.green), FixedToInt(span.blue));	\
+	 XMesaPutPixel(img, x, y, p);			       	\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, 8-bit PF_LOOKUP triangle.
+ */
+#define NAME smooth_LOOKUP8_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   LOOKUP_SETUP;						\
+   for (i = 0; i < span.end; i++) {				\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         pRow[i] = LOOKUP(FixedToInt(span.red),			\
+            FixedToInt(span.green), FixedToInt(span.blue));	\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, depth-buffered, 8-bit PF_HPCR triangle.
+ */
+#define NAME smooth_HPCR_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         pRow[i] = DITHER_HPCR(x, y, FixedToInt(span.red),	\
+            FixedToInt(span.green), FixedToInt(span.blue) );	\
+         zRow[i] = z;						\
+      }								\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_TRUECOLOR triangle.
+ */
+#define NAME flat_TRUECOLOR_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;				\
+   unsigned long pixel;						\
+   PACK_TRUECOLOR(pixel, v2->color[0], v2->color[1], v2->color[2]);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         XMesaPutPixel(img, x, y, pixel);			\
+         zRow[i] = z;						\
+      }								\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_8A8B8G8R triangle.
+ */
+#define NAME flat_8A8B8G8R_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   GLuint p = PACK_8A8B8G8R( v2->color[0], v2->color[1],\
+                             v2->color[2], v2->color[3]);
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);	\
+      if (z < zRow[i]) {				\
+	 pRow[i] = (PIXEL_TYPE) p;			\
+         zRow[i] = z;					\
+      }							\
+      span.z += span.zStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_8A8R8G8B triangle.
+ */
+#define NAME flat_8A8R8G8B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   GLuint p = PACK_8A8R8G8B(v2->color[0], v2->color[1],	\
+                            v2->color[2], v2->color[3]);
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);	\
+      if (z < zRow[i]) {				\
+	 pRow[i] = (PIXEL_TYPE) p;			\
+         zRow[i] = z;					\
+      }							\
+      span.z += span.zStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_8R8G8B triangle.
+ */
+#define NAME flat_8R8G8B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   GLuint p = PACK_8R8G8B( v2->color[0], v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {			\
+   GLuint i;					\
+   for (i = 0; i < span.end; i++) {		\
+      DEPTH_TYPE z = FixedToDepth(span.z);	\
+      if (z < zRow[i]) {			\
+	 pRow[i] = (PIXEL_TYPE) p;		\
+         zRow[i] = z;				\
+      }						\
+      span.z += span.zStep;			\
+   } }
+
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_8R8G8B24 triangle.
+ */
+#define NAME flat_8R8G8B24_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR3(xrb, X, Y)
+#define PIXEL_TYPE bgr_t
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   const GLubyte *color = v2->color;
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);	\
+      if (z < zRow[i]) {				\
+	 PIXEL_TYPE *ptr = pRow + i;			\
+         ptr->r = color[RCOMP];				\
+         ptr->g = color[GCOMP];				\
+         ptr->b = color[BCOMP];				\
+         zRow[i] = z;					\
+      }							\
+      span.z += span.zStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_TRUEDITHER triangle.
+ */
+#define NAME flat_TRUEDITHER_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         unsigned long p;					\
+         PACK_TRUEDITHER(p, x, y, v2->color[0],			\
+            v2->color[1], v2->color[2]);			\
+         XMesaPutPixel(img, x, y, p);				\
+         zRow[i] = z;						\
+      }								\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_5R6G5B triangle.
+ */
+#define NAME flat_5R6G5B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   GLushort p = PACK_5R6G5B( v2->color[0], v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);	\
+      if (z < zRow[i]) {				\
+	 pRow[i] = (PIXEL_TYPE) p;			\
+         zRow[i] = z;					\
+      }							\
+      span.z += span.zStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_DITHER_5R6G5B triangle.
+ */
+#define NAME flat_DITHER_5R6G5B_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   const GLubyte *color = v2->color;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+	 PACK_TRUEDITHER(pRow[i], x, y, color[RCOMP],		\
+			 color[GCOMP], color[BCOMP]);		\
+         zRow[i] = z;						\
+      }								\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, 8-bit PF_DITHER triangle.
+ */
+#define NAME flat_DITHER8_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   FLAT_DITHER_SETUP( v2->color[0], v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);			\
+   FLAT_DITHER_ROW_SETUP(YFLIP(xrb, y));			\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+	 pRow[i] = (PIXEL_TYPE) FLAT_DITHER(x);			\
+         zRow[i] = z;						\
+      }								\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, PF_DITHER triangle.
+ */
+#define NAME flat_DITHER_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define SETUP_CODE						\
+   GET_XRB(xrb);					\
+   XMesaImage *img = xrb->ximage;		\
+   FLAT_DITHER_SETUP( v2->color[0], v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   FLAT_DITHER_ROW_SETUP(y);					\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+         unsigned long p = FLAT_DITHER(x);			\
+	 XMesaPutPixel(img, x, y, p);				\
+         zRow[i] = z;						\
+      }								\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, 8-bit PF_HPCR triangle.
+ */
+#define NAME flat_HPCR_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   GLubyte r = v2->color[0];					\
+   GLubyte g = v2->color[1];					\
+   GLubyte b = v2->color[2];
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);		\
+      if (z < zRow[i]) {					\
+	 pRow[i] = (PIXEL_TYPE) DITHER_HPCR(x, y, r, g, b);	\
+         zRow[i] = z;						\
+      }								\
+      span.z += span.zStep;					\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, depth-buffered, 8-bit PF_LOOKUP triangle.
+ */
+#define NAME flat_LOOKUP8_z_triangle
+#define INTERP_Z 1
+#define DEPTH_TYPE DEFAULT_SOFTWARE_DEPTH_TYPE
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   LOOKUP_SETUP;					\
+   GLubyte r = v2->color[0];				\
+   GLubyte g = v2->color[1];				\
+   GLubyte b = v2->color[2];				\
+   GLubyte p = LOOKUP(r,g,b);
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      const DEPTH_TYPE z = FixedToDepth(span.z);	\
+      if (z < zRow[i]) {				\
+	 pRow[i] = p;					\
+         zRow[i] = z;					\
+      }							\
+      span.z += span.zStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_TRUECOLOR triangle.
+ */
+#define NAME smooth_TRUECOLOR_triangle
+#define INTERP_RGB 1
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      unsigned long p;						\
+      PACK_TRUECOLOR(p, FixedToInt(span.red),			\
+         FixedToInt(span.green), FixedToInt(span.blue));	\
+      XMesaPutPixel(img, x, y, p);				\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_8A8B8G8R triangle.
+ */
+#define NAME smooth_8A8B8G8R_triangle
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      pRow[i] = PACK_8A8B8G8R(FixedToInt(span.red),		\
+         FixedToInt(span.green), FixedToInt(span.blue),		\
+         FixedToInt(span.alpha));				\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.alpha += span.alphaStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_8A8R8G8B triangle.
+ */
+#define NAME smooth_8A8R8G8B_triangle
+#define INTERP_RGB 1
+#define INTERP_ALPHA 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      pRow[i] = PACK_8A8R8G8B(FixedToInt(span.red),		\
+         FixedToInt(span.green), FixedToInt(span.blue),		\
+         FixedToInt(span.alpha));				\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+      span.alpha += span.alphaStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_8R8G8B triangle.
+ */
+#define NAME smooth_8R8G8B_triangle
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      pRow[i] = PACK_8R8G8B(FixedToInt(span.red),		\
+         FixedToInt(span.green), FixedToInt(span.blue) );	\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_8R8G8B triangle.
+ */
+#define NAME smooth_8R8G8B24_triangle
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR3(xrb, X, Y)
+#define PIXEL_TYPE bgr_t
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   PIXEL_TYPE *pixel = pRow;				\
+   for (i = 0; i < span.end; i++, pixel++) {		\
+      pixel->r = FixedToInt(span.red);			\
+      pixel->g = FixedToInt(span.green);		\
+      pixel->b = FixedToInt(span.blue);			\
+      span.red += span.redStep;				\
+      span.green += span.greenStep;			\
+      span.blue += span.blueStep;			\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_TRUEDITHER triangle.
+ */
+#define NAME smooth_TRUEDITHER_triangle
+#define INTERP_RGB 1
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      unsigned long p;						\
+      PACK_TRUEDITHER(p, x, y, FixedToInt(span.red),		\
+         FixedToInt(span.green), FixedToInt(span.blue));	\
+      XMesaPutPixel(img, x, y, p );				\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_5R6G5B triangle.
+ */
+#define NAME smooth_5R6G5B_triangle
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   for (i = 0; i < span.end; i++) {				\
+      pRow[i] = (PIXEL_TYPE) PACK_5R6G5B(FixedToInt(span.red),	\
+         FixedToInt(span.green), FixedToInt(span.blue));	\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_DITHER_5R6G5B triangle.
+ */
+#define NAME smooth_DITHER_5R6G5B_triangle
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      PACK_TRUEDITHER(pRow[i], x, y, FixedToInt(span.red),	\
+         FixedToInt(span.green), FixedToInt(span.blue));	\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, 8-bit PF_DITHER triangle.
+ */
+#define NAME smooth_DITHER8_triangle
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   XDITHER_SETUP(y);						\
+   for (i = 0; i < span.end; i++, x++) {			\
+      pRow[i] = (PIXEL_TYPE) XDITHER(x, FixedToInt(span.red),	\
+         FixedToInt(span.green), FixedToInt(span.blue) );	\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, PF_DITHER triangle.
+ */
+#define NAME smooth_DITHER_triangle
+#define INTERP_RGB 1
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   XDITHER_SETUP(y);						\
+   for (i = 0; i < span.end; i++, x++) {			\
+      unsigned long p = XDITHER(x, FixedToInt(span.red),	\
+         FixedToInt(span.green), FixedToInt(span.blue) );	\
+      XMesaPutPixel(img, x, y, p);				\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, 8-bit PF_LOOKUP triangle.
+ */
+#define NAME smooth_LOOKUP8_triangle
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   LOOKUP_SETUP;						\
+   for (i = 0; i < span.end; i++) {				\
+      pRow[i] = LOOKUP(FixedToInt(span.red),			\
+         FixedToInt(span.green), FixedToInt(span.blue));	\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, smooth, NON-depth-buffered, 8-bit PF_HPCR triangle.
+ */
+#define NAME smooth_HPCR_triangle
+#define INTERP_RGB 1
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      pRow[i] = DITHER_HPCR(x, y, FixedToInt(span.red),		\
+         FixedToInt(span.green), FixedToInt(span.blue));	\
+      span.red += span.redStep;					\
+      span.green += span.greenStep;				\
+      span.blue += span.blueStep;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_TRUECOLOR triangle.
+ */
+#define NAME flat_TRUECOLOR_triangle
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;				\
+   unsigned long pixel;						\
+   PACK_TRUECOLOR(pixel, v2->color[0], v2->color[1], v2->color[2]);
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      XMesaPutPixel(img, x, y, pixel);				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_8A8B8G8R triangle.
+ */
+#define NAME flat_8A8B8G8R_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   unsigned long p = PACK_8B8G8R( v2->color[0],		\
+		 v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      pRow[i] = (PIXEL_TYPE) p;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_8A8R8G8B triangle.
+ */
+#define NAME flat_8A8R8G8B_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   unsigned long p = PACK_8R8G8B( v2->color[0],		\
+		 v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      pRow[i] = (PIXEL_TYPE) p;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_8R8G8B triangle.
+ */
+#define NAME flat_8R8G8B_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR4(xrb, X, Y)
+#define PIXEL_TYPE GLuint
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   unsigned long p = PACK_8R8G8B( v2->color[0],		\
+		 v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      pRow[i] = (PIXEL_TYPE) p;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_8R8G8B24 triangle.
+ */
+#define NAME flat_8R8G8B24_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR3(xrb, X, Y)
+#define PIXEL_TYPE bgr_t
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   const GLubyte *color = v2->color;
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   PIXEL_TYPE *pixel = pRow;				\
+   for (i = 0; i < span.end; i++, pixel++) {		\
+      pixel->r = color[RCOMP];				\
+      pixel->g = color[GCOMP];				\
+      pixel->b = color[BCOMP];				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_TRUEDITHER triangle.
+ */
+#define NAME flat_TRUEDITHER_triangle
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      unsigned long p;						\
+      PACK_TRUEDITHER(p, x, y, v2->color[0],			\
+               v2->color[1], v2->color[2] );			\
+      XMesaPutPixel(img, x, y, p);				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_5R6G5B triangle.
+ */
+#define NAME flat_5R6G5B_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   unsigned long p = PACK_5R6G5B( v2->color[0],		\
+		 v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {				\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      pRow[i] = (PIXEL_TYPE) p;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_DITHER_5R6G5B triangle.
+ */
+#define NAME flat_DITHER_5R6G5B_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR2(xrb, X, Y)
+#define PIXEL_TYPE GLushort
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   const GLubyte *color = v2->color;
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      PACK_TRUEDITHER(pRow[i], x, y, color[RCOMP],		\
+         color[GCOMP], color[BCOMP]);				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, 8-bit PF_DITHER triangle.
+ */
+#define NAME flat_DITHER8_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   FLAT_DITHER_SETUP( v2->color[0], v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   FLAT_DITHER_ROW_SETUP(YFLIP(xrb, y));		\
+   for (i = 0; i < span.end; i++, x++) {			\
+      pRow[i] = (PIXEL_TYPE) FLAT_DITHER(x);			\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, PF_DITHER triangle.
+ */
+#define NAME flat_DITHER_triangle
+#define SETUP_CODE						\
+   GET_XRB(xrb);						\
+   XMesaImage *img = xrb->ximage;				\
+   FLAT_DITHER_SETUP( v2->color[0], v2->color[1], v2->color[2] );
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   FLAT_DITHER_ROW_SETUP(y);					\
+   for (i = 0; i < span.end; i++, x++) {			\
+      unsigned long p = FLAT_DITHER(x);				\
+      XMesaPutPixel(img, x, y, p );				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, 8-bit PF_HPCR triangle.
+ */
+#define NAME flat_HPCR_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE						\
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);			\
+   GET_XRB(xrb);						\
+   GLubyte r = v2->color[0];					\
+   GLubyte g = v2->color[1];					\
+   GLubyte b = v2->color[2];
+#define RENDER_SPAN( span ) {					\
+   GLuint i;							\
+   GLint x = span.x, y = YFLIP(xrb, span.y);	\
+   for (i = 0; i < span.end; i++, x++) {			\
+      pRow[i] = (PIXEL_TYPE) DITHER_HPCR(x, y, r, g, b);	\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+
+/*
+ * XImage, flat, NON-depth-buffered, 8-bit PF_LOOKUP triangle.
+ */
+#define NAME flat_LOOKUP8_triangle
+#define PIXEL_ADDRESS(X,Y) PIXEL_ADDR1(xrb, X, Y)
+#define PIXEL_TYPE GLubyte
+#define BYTES_PER_ROW (xrb->ximage->bytes_per_line)
+#define SETUP_CODE					\
+   GET_XRB(xrb);					\
+   LOOKUP_SETUP;					\
+   GLubyte r = v2->color[0];				\
+   GLubyte g = v2->color[1];				\
+   GLubyte b = v2->color[2];				\
+   GLubyte p = LOOKUP(r,g,b);
+#define RENDER_SPAN( span ) {         	 		\
+   GLuint i;						\
+   for (i = 0; i < span.end; i++) {			\
+      pRow[i] = (PIXEL_TYPE) p;				\
+   } }
+#include "swrast/s_tritemp.h"
+
+
+#endif /* CHAN_BITS == 8 */
+
+
+#if defined(DEBUG) && CHAN_BITS == 8
+extern void _xmesa_print_triangle_func( swrast_tri_func triFunc );
+void _xmesa_print_triangle_func( swrast_tri_func triFunc )
+{
+   printf("XMesa tri func = ");
+   if (triFunc ==smooth_TRUECOLOR_z_triangle)
+      printf("smooth_TRUECOLOR_z_triangle\n");
+   else if (triFunc ==smooth_8A8B8G8R_z_triangle)
+      printf("smooth_8A8B8G8R_z_triangle\n");
+   else if (triFunc ==smooth_8A8R8G8B_z_triangle)
+      printf("smooth_8A8R8G8B_z_triangle\n");
+   else if (triFunc ==smooth_8R8G8B_z_triangle)
+      printf("smooth_8R8G8B_z_triangle\n");
+   else if (triFunc ==smooth_8R8G8B24_z_triangle)
+      printf("smooth_8R8G8B24_z_triangle\n");
+   else if (triFunc ==smooth_TRUEDITHER_z_triangle)
+      printf("smooth_TRUEDITHER_z_triangle\n");
+   else if (triFunc ==smooth_5R6G5B_z_triangle)
+      printf("smooth_5R6G5B_z_triangle\n");
+   else if (triFunc ==smooth_DITHER_5R6G5B_z_triangle)
+      printf("smooth_DITHER_5R6G5B_z_triangle\n");
+   else if (triFunc ==smooth_HPCR_z_triangle)
+      printf("smooth_HPCR_z_triangle\n");
+   else if (triFunc ==smooth_DITHER8_z_triangle)
+      printf("smooth_DITHER8_z_triangle\n");
+   else if (triFunc ==smooth_LOOKUP8_z_triangle)
+      printf("smooth_LOOKUP8_z_triangle\n");
+   else if (triFunc ==flat_TRUECOLOR_z_triangle)
+      printf("flat_TRUECOLOR_z_triangle\n");
+   else if (triFunc ==flat_8A8B8G8R_z_triangle)
+      printf("flat_8A8B8G8R_z_triangle\n");
+   else if (triFunc ==flat_8A8R8G8B_z_triangle)
+      printf("flat_8A8R8G8B_z_triangle\n");
+   else if (triFunc ==flat_8R8G8B_z_triangle)
+      printf("flat_8R8G8B_z_triangle\n");
+   else if (triFunc ==flat_8R8G8B24_z_triangle)
+      printf("flat_8R8G8B24_z_triangle\n");
+   else if (triFunc ==flat_TRUEDITHER_z_triangle)
+      printf("flat_TRUEDITHER_z_triangle\n");
+   else if (triFunc ==flat_5R6G5B_z_triangle)
+      printf("flat_5R6G5B_z_triangle\n");
+   else if (triFunc ==flat_DITHER_5R6G5B_z_triangle)
+      printf("flat_DITHER_5R6G5B_z_triangle\n");
+   else if (triFunc ==flat_HPCR_z_triangle)
+      printf("flat_HPCR_z_triangle\n");
+   else if (triFunc ==flat_DITHER8_z_triangle)
+      printf("flat_DITHER8_z_triangle\n");
+   else if (triFunc ==flat_LOOKUP8_z_triangle)
+      printf("flat_LOOKUP8_z_triangle\n");
+   else if (triFunc ==smooth_TRUECOLOR_triangle)
+      printf("smooth_TRUECOLOR_triangle\n");
+   else if (triFunc ==smooth_8A8B8G8R_triangle)
+      printf("smooth_8A8B8G8R_triangle\n");
+   else if (triFunc ==smooth_8A8R8G8B_triangle)
+      printf("smooth_8A8R8G8B_triangle\n");
+   else if (triFunc ==smooth_8R8G8B_triangle)
+      printf("smooth_8R8G8B_triangle\n");
+   else if (triFunc ==smooth_8R8G8B24_triangle)
+      printf("smooth_8R8G8B24_triangle\n");
+   else if (triFunc ==smooth_TRUEDITHER_triangle)
+      printf("smooth_TRUEDITHER_triangle\n");
+   else if (triFunc ==smooth_5R6G5B_triangle)
+      printf("smooth_5R6G5B_triangle\n");
+   else if (triFunc ==smooth_DITHER_5R6G5B_triangle)
+      printf("smooth_DITHER_5R6G5B_triangle\n");
+   else if (triFunc ==smooth_HPCR_triangle)
+      printf("smooth_HPCR_triangle\n");
+   else if (triFunc ==smooth_DITHER8_triangle)
+      printf("smooth_DITHER8_triangle\n");
+   else if (triFunc ==smooth_LOOKUP8_triangle)
+      printf("smooth_LOOKUP8_triangle\n");
+   else if (triFunc ==flat_TRUECOLOR_triangle)
+      printf("flat_TRUECOLOR_triangle\n");
+   else if (triFunc ==flat_TRUEDITHER_triangle)
+      printf("flat_TRUEDITHER_triangle\n");
+   else if (triFunc ==flat_8A8B8G8R_triangle)
+      printf("flat_8A8B8G8R_triangle\n");
+   else if (triFunc ==flat_8A8R8G8B_triangle)
+      printf("flat_8A8R8G8B_triangle\n");
+   else if (triFunc ==flat_8R8G8B_triangle)
+      printf("flat_8R8G8B_triangle\n");
+   else if (triFunc ==flat_8R8G8B24_triangle)
+      printf("flat_8R8G8B24_triangle\n");
+   else if (triFunc ==flat_5R6G5B_triangle)
+      printf("flat_5R6G5B_triangle\n");
+   else if (triFunc ==flat_DITHER_5R6G5B_triangle)
+      printf("flat_DITHER_5R6G5B_triangle\n");
+   else if (triFunc ==flat_HPCR_triangle)
+      printf("flat_HPCR_triangle\n");
+   else if (triFunc ==flat_DITHER8_triangle)
+      printf("flat_DITHER8_triangle\n");
+   else if (triFunc ==flat_LOOKUP8_triangle)
+      printf("flat_LOOKUP8_triangle\n");
+   else
+      printf("???\n");
+}
+#endif
+
+
+#ifdef DEBUG
+
+/* record the current triangle function name */
+static const char *triFuncName = NULL;
+
+#define USE(triFunc)                   \
+do {                                   \
+    triFuncName = #triFunc;            \
+    return triFunc;                    \
+} while (0)
+
+#else
+
+#define USE(triFunc)  return triFunc
+
+#endif
+
+
+/**
+ * Return pointer to line drawing function, or NULL if we should use a
+ * swrast fallback.
+ */
+static swrast_tri_func
+get_triangle_func(GLcontext *ctx)
+{
+#if CHAN_BITS == 8
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+   XMesaContext xmesa = XMESA_CONTEXT(ctx);
+   XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer);
+   const int depth = GET_VISUAL_DEPTH(xmesa->xm_visual);
+   const struct xmesa_renderbuffer *xrb;
+
+#ifdef DEBUG
+   triFuncName = NULL;
+#endif
+
+   /* trivial fallback tests */
+   if ((ctx->DrawBuffer->_ColorDrawBufferIndexes[0] != BUFFER_BIT_FRONT_LEFT) &&
+       (ctx->DrawBuffer->_ColorDrawBufferIndexes[0] != BUFFER_BIT_BACK_LEFT))
+      return (swrast_tri_func) NULL;
+   if (ctx->RenderMode != GL_RENDER)
+      return (swrast_tri_func) NULL;
+   if (ctx->Polygon.SmoothFlag)
+      return (swrast_tri_func) NULL;
+   if (ctx->Texture._EnabledUnits)
+      return (swrast_tri_func) NULL;
+   if (swrast->_RasterMask & MULTI_DRAW_BIT)
+      return (swrast_tri_func) NULL;
+   if (ctx->Polygon.CullFlag && 
+       ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+      return (swrast_tri_func) NULL;
+   if (xmbuf->swAlpha)
+      return (swrast_tri_func) NULL;
+
+   xrb = xmesa_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]->Wrapped);
+
+   if (xrb->ximage) {
+      if (   ctx->Light.ShadeModel==GL_SMOOTH
+          && swrast->_RasterMask==DEPTH_BIT
+          && ctx->Depth.Func==GL_LESS
+          && ctx->Depth.Mask==GL_TRUE
+          && ctx->Visual.depthBits == DEFAULT_SOFTWARE_DEPTH_BITS
+          && ctx->Polygon.StippleFlag==GL_FALSE) {
+         switch (xmesa->pixelformat) {
+            case PF_Truecolor:
+	       USE(smooth_TRUECOLOR_z_triangle);
+            case PF_8A8B8G8R:
+               USE(smooth_8A8B8G8R_z_triangle);
+            case PF_8A8R8G8B:
+               USE(smooth_8A8R8G8B_z_triangle);
+            case PF_8R8G8B:
+               USE(smooth_8R8G8B_z_triangle);
+            case PF_8R8G8B24:
+               USE(smooth_8R8G8B24_z_triangle);
+            case PF_Dither_True:
+               USE(smooth_TRUEDITHER_z_triangle);
+            case PF_5R6G5B:
+               USE(smooth_5R6G5B_z_triangle);
+            case PF_Dither_5R6G5B:
+               USE(smooth_DITHER_5R6G5B_z_triangle);
+            case PF_HPCR:
+	       USE(smooth_HPCR_z_triangle);
+            case PF_Dither:
+               if (depth == 8)
+                  USE(smooth_DITHER8_z_triangle);
+               else
+                  USE(smooth_DITHER_z_triangle);
+            case PF_Lookup:
+               if (depth == 8)
+                  USE(smooth_LOOKUP8_z_triangle);
+               else
+                  return (swrast_tri_func) NULL;
+            default:
+               return (swrast_tri_func) NULL;
+         }
+      }
+      if (   ctx->Light.ShadeModel==GL_FLAT
+          && swrast->_RasterMask==DEPTH_BIT
+          && ctx->Depth.Func==GL_LESS
+          && ctx->Depth.Mask==GL_TRUE
+          && ctx->Visual.depthBits == DEFAULT_SOFTWARE_DEPTH_BITS
+          && ctx->Polygon.StippleFlag==GL_FALSE) {
+         switch (xmesa->pixelformat) {
+            case PF_Truecolor:
+	       USE(flat_TRUECOLOR_z_triangle);
+            case PF_8A8B8G8R:
+               USE(flat_8A8B8G8R_z_triangle);
+            case PF_8A8R8G8B:
+               USE(flat_8A8R8G8B_z_triangle);
+            case PF_8R8G8B:
+               USE(flat_8R8G8B_z_triangle);
+            case PF_8R8G8B24:
+               USE(flat_8R8G8B24_z_triangle);
+            case PF_Dither_True:
+               USE(flat_TRUEDITHER_z_triangle);
+            case PF_5R6G5B:
+               USE(flat_5R6G5B_z_triangle);
+            case PF_Dither_5R6G5B:
+               USE(flat_DITHER_5R6G5B_z_triangle);
+            case PF_HPCR:
+	       USE(flat_HPCR_z_triangle);
+            case PF_Dither:
+               if (depth == 8)
+                  USE(flat_DITHER8_z_triangle);
+               else
+                  USE(flat_DITHER_z_triangle);
+            case PF_Lookup:
+               if (depth == 8)
+                  USE(flat_LOOKUP8_z_triangle);
+               else
+                  return (swrast_tri_func) NULL;
+            default:
+               return (swrast_tri_func) NULL;
+         }
+      }
+      if (   swrast->_RasterMask==0   /* no depth test */
+          && ctx->Light.ShadeModel==GL_SMOOTH
+          && ctx->Polygon.StippleFlag==GL_FALSE) {
+         switch (xmesa->pixelformat) {
+            case PF_Truecolor:
+	       USE(smooth_TRUECOLOR_triangle);
+            case PF_8A8B8G8R:
+               USE(smooth_8A8B8G8R_triangle);
+            case PF_8A8R8G8B:
+               USE(smooth_8A8R8G8B_triangle);
+            case PF_8R8G8B:
+               USE(smooth_8R8G8B_triangle);
+            case PF_8R8G8B24:
+               USE(smooth_8R8G8B24_triangle);
+            case PF_Dither_True:
+               USE(smooth_TRUEDITHER_triangle);
+            case PF_5R6G5B:
+               USE(smooth_5R6G5B_triangle);
+            case PF_Dither_5R6G5B:
+               USE(smooth_DITHER_5R6G5B_triangle);
+            case PF_HPCR:
+	       USE(smooth_HPCR_triangle);
+            case PF_Dither:
+               if (depth == 8)
+                  USE(smooth_DITHER8_triangle);
+               else
+                  USE(smooth_DITHER_triangle);
+            case PF_Lookup:
+               if (depth == 8)
+                  USE(smooth_LOOKUP8_triangle);
+               else
+                  return (swrast_tri_func) NULL;
+            default:
+               return (swrast_tri_func) NULL;
+         }
+      }
+
+      if (   swrast->_RasterMask==0   /* no depth test */
+          && ctx->Light.ShadeModel==GL_FLAT
+          && ctx->Polygon.StippleFlag==GL_FALSE) {
+         switch (xmesa->pixelformat) {
+            case PF_Truecolor:
+	       USE(flat_TRUECOLOR_triangle);
+            case PF_Dither_True:
+	       USE(flat_TRUEDITHER_triangle);
+            case PF_8A8B8G8R:
+               USE(flat_8A8B8G8R_triangle);
+            case PF_8A8R8G8B:
+               USE(flat_8A8R8G8B_triangle);
+            case PF_8R8G8B:
+               USE(flat_8R8G8B_triangle);
+            case PF_8R8G8B24:
+               USE(flat_8R8G8B24_triangle);
+            case PF_5R6G5B:
+               USE(flat_5R6G5B_triangle);
+            case PF_Dither_5R6G5B:
+               USE(flat_DITHER_5R6G5B_triangle);
+            case PF_HPCR:
+	       USE(flat_HPCR_triangle);
+            case PF_Dither:
+               if (depth == 8)
+                  USE(flat_DITHER8_triangle);
+               else
+                  USE(flat_DITHER_triangle);
+            case PF_Lookup:
+               if (depth == 8)
+                  USE(flat_LOOKUP8_triangle);
+               else
+                  return (swrast_tri_func) NULL;
+            default:
+               return (swrast_tri_func) NULL;
+         }
+      }
+   }
+#endif /* CHAN_BITS == 8 */
+
+   return (swrast_tri_func) NULL;
+}
+
+
+/* Override for the swrast tri-selection function.  Try to use one
+ * of our internal tri functions, otherwise fall back to the
+ * standard swrast functions.
+ */
+void xmesa_choose_triangle( GLcontext *ctx )
+{
+   SWcontext *swrast = SWRAST_CONTEXT(ctx);
+
+   if (!(swrast->Triangle = get_triangle_func( ctx )))
+      _swrast_choose_triangle( ctx );
+}
+
diff --git a/src/mesa/drivers/x11/xmesa.h b/src/mesa/drivers/x11/xmesa.h
new file mode 100644
index 0000000000..f63626a970
--- /dev/null
+++ b/src/mesa/drivers/x11/xmesa.h
@@ -0,0 +1,424 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ * 
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*
+ * Mesa/X11 interface.  This header file serves as the documentation for
+ * the Mesa/X11 interface functions.
+ *
+ * Note: this interface isn't intended for user programs.  It's primarily
+ * just for implementing the pseudo-GLX interface.
+ */
+
+
+/* Sample Usage:
+
+In addition to the usual X calls to select a visual, create a colormap
+and create a window, you must do the following to use the X/Mesa interface:
+
+1. Call XMesaCreateVisual() to make an XMesaVisual from an XVisualInfo.
+
+2. Call XMesaCreateContext() to create an X/Mesa rendering context, given
+   the XMesaVisual.
+
+3. Call XMesaCreateWindowBuffer() to create an XMesaBuffer from an X window
+   and XMesaVisual.
+
+4. Call XMesaMakeCurrent() to bind the XMesaBuffer to an XMesaContext and
+   to make the context the current one.
+
+5. Make gl* calls to render your graphics.
+
+6. Use XMesaSwapBuffers() when double buffering to swap front/back buffers.
+
+7. Before the X window is destroyed, call XMesaDestroyBuffer().
+
+8. Before exiting, call XMesaDestroyVisual and XMesaDestroyContext.
+
+*/
+
+
+
+
+#ifndef XMESA_H
+#define XMESA_H
+
+#ifdef __VMS
+#include <GL/vms_x_fix.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef XFree86Server
+#include "xmesa_xf86.h"
+#else
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include "xmesa_x.h"
+#endif
+#include "GL/gl.h"
+
+#ifdef AMIWIN
+#include <pragmas/xlib_pragmas.h>
+extern struct Library *XLibBase;
+#endif
+
+
+#define XMESA_MAJOR_VERSION 6
+#define XMESA_MINOR_VERSION 3
+
+
+
+/*
+ * Values passed to XMesaGetString:
+ */
+#define XMESA_VERSION 1
+#define XMESA_EXTENSIONS 2
+
+
+/*
+ * Values passed to XMesaSetFXmode:
+ */
+#define XMESA_FX_WINDOW       1
+#define XMESA_FX_FULLSCREEN   2
+
+
+
+typedef struct xmesa_context *XMesaContext;
+
+typedef struct xmesa_visual *XMesaVisual;
+
+typedef struct xmesa_buffer *XMesaBuffer;
+
+
+
+/*
+ * Create a new X/Mesa visual.
+ * Input:  display - X11 display
+ *         visinfo - an XVisualInfo pointer
+ *         rgb_flag - GL_TRUE = RGB mode,
+ *                    GL_FALSE = color index mode
+ *         alpha_flag - alpha buffer requested?
+ *         db_flag - GL_TRUE = double-buffered,
+ *                   GL_FALSE = single buffered
+ *         stereo_flag - stereo visual?
+ *         ximage_flag - GL_TRUE = use an XImage for back buffer,
+ *                       GL_FALSE = use an off-screen pixmap for back buffer
+ *         depth_size - requested bits/depth values, or zero
+ *         stencil_size - requested bits/stencil values, or zero
+ *         accum_red_size - requested bits/red accum values, or zero
+ *         accum_green_size - requested bits/green accum values, or zero
+ *         accum_blue_size - requested bits/blue accum values, or zero
+ *         accum_alpha_size - requested bits/alpha accum values, or zero
+ *         num_samples - number of samples/pixel if multisampling, or zero
+ *         level - visual level, usually 0
+ *         visualCaveat - ala the GLX extension, usually GLX_NONE_EXT
+ * Return;  a new XMesaVisual or 0 if error.
+ */
+extern XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
+                                      XMesaVisualInfo visinfo,
+                                      GLboolean rgb_flag,
+                                      GLboolean alpha_flag,
+                                      GLboolean db_flag,
+                                      GLboolean stereo_flag,
+                                      GLboolean ximage_flag,
+                                      GLint depth_size,
+                                      GLint stencil_size,
+                                      GLint accum_red_size,
+                                      GLint accum_green_size,
+                                      GLint accum_blue_size,
+                                      GLint accum_alpha_size,
+                                      GLint num_samples,
+                                      GLint level,
+                                      GLint visualCaveat );
+
+/*
+ * Destroy an XMesaVisual, but not the associated XVisualInfo.
+ */
+extern void XMesaDestroyVisual( XMesaVisual v );
+
+
+
+/*
+ * Create a new XMesaContext for rendering into an X11 window.
+ *
+ * Input:  visual - an XMesaVisual
+ *         share_list - another XMesaContext with which to share display
+ *                      lists or NULL if no sharing is wanted.
+ * Return:  an XMesaContext or NULL if error.
+ */
+extern XMesaContext XMesaCreateContext( XMesaVisual v,
+					XMesaContext share_list );
+
+
+/*
+ * Destroy a rendering context as returned by XMesaCreateContext()
+ */
+extern void XMesaDestroyContext( XMesaContext c );
+
+
+#ifdef XFree86Server
+/*
+ * These are the extra routines required for integration with XFree86.
+ * None of these routines should be user visible. -KEM
+ */
+extern GLboolean XMesaForceCurrent( XMesaContext c );
+
+extern GLboolean XMesaLoseCurrent( XMesaContext c );
+
+extern GLboolean XMesaCopyContext( XMesaContext src,
+				   XMesaContext dst,
+				   GLuint mask );
+#endif /* XFree86Server */
+
+
+/*
+ * Create an XMesaBuffer from an X window.
+ */
+extern XMesaBuffer XMesaCreateWindowBuffer( XMesaVisual v, XMesaWindow w );
+
+
+/*
+ * Create an XMesaBuffer from an X pixmap.
+ */
+extern XMesaBuffer XMesaCreatePixmapBuffer( XMesaVisual v,
+					    XMesaPixmap p,
+					    XMesaColormap cmap );
+
+
+/*
+ * Destroy an XMesaBuffer, but not the corresponding window or pixmap.
+ */
+extern void XMesaDestroyBuffer( XMesaBuffer b );
+
+
+/*
+ * Return the XMesaBuffer handle which corresponds to an X drawable, if any.
+ *
+ * New in Mesa 2.3.
+ */
+extern XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy,
+				    XMesaDrawable d );
+
+
+
+/*
+ * Bind a buffer to a context and make the context the current one.
+ */
+extern GLboolean XMesaMakeCurrent( XMesaContext c,
+				   XMesaBuffer b );
+
+
+/*
+ * Bind two buffers (read and draw) to a context and make the
+ * context the current one.
+ * New in Mesa 3.3
+ */
+extern GLboolean XMesaMakeCurrent2( XMesaContext c,
+                                    XMesaBuffer drawBuffer,
+                                    XMesaBuffer readBuffer );
+
+
+/*
+ * Unbind the current context from its buffer.
+ */
+extern GLboolean XMesaUnbindContext( XMesaContext c );
+
+
+/*
+ * Return a handle to the current context.
+ */
+extern XMesaContext XMesaGetCurrentContext( void );
+
+
+/*
+ * Return handle to the current (draw) buffer.
+ */
+extern XMesaBuffer XMesaGetCurrentBuffer( void );
+
+
+/*
+ * Return handle to the current read buffer.
+ * New in Mesa 3.3
+ */
+extern XMesaBuffer XMesaGetCurrentReadBuffer( void );
+
+
+/*
+ * Swap the front and back buffers for the given buffer.  No action is
+ * taken if the buffer is not double buffered.
+ */
+extern void XMesaSwapBuffers( XMesaBuffer b );
+
+
+/*
+ * Copy a sub-region of the back buffer to the front buffer.
+ *
+ * New in Mesa 2.6
+ */
+extern void XMesaCopySubBuffer( XMesaBuffer b,
+				int x,
+				int y,
+				int width,
+				int height );
+
+
+/*
+ * Return a pointer to the Pixmap or XImage being used as the back
+ * color buffer of an XMesaBuffer.  This function is a way to get "under
+ * the hood" of X/Mesa so one can manipulate the back buffer directly.
+ * Input:  b - the XMesaBuffer
+ * Output:  pixmap - pointer to back buffer's Pixmap, or 0
+ *          ximage - pointer to back buffer's XImage, or NULL
+ * Return:  GL_TRUE = context is double buffered
+ *          GL_FALSE = context is single buffered
+ */
+extern GLboolean XMesaGetBackBuffer( XMesaBuffer b,
+				     XMesaPixmap *pixmap,
+				     XMesaImage **ximage );
+
+
+
+/*
+ * Return the depth buffer associated with an XMesaBuffer.
+ * Input:  b - the XMesa buffer handle
+ * Output:  width, height - size of buffer in pixels
+ *          bytesPerValue - bytes per depth value (2 or 4)
+ *          buffer - pointer to depth buffer values
+ * Return:  GL_TRUE or GL_FALSE to indicate success or failure.
+ *
+ * New in Mesa 2.4.
+ */
+extern GLboolean XMesaGetDepthBuffer( XMesaBuffer b,
+				      GLint *width,
+				      GLint *height,
+				      GLint *bytesPerValue,
+				      void **buffer );
+
+
+
+/*
+ * Flush/sync a context
+ */
+extern void XMesaFlush( XMesaContext c );
+
+
+
+/*
+ * Get an X/Mesa-specific string.
+ * Input:  name - either XMESA_VERSION or XMESA_EXTENSIONS
+ */
+extern const char *XMesaGetString( XMesaContext c, int name );
+
+
+
+/*
+ * Scan for XMesaBuffers whose window/pixmap has been destroyed, then free
+ * any memory used by that buffer.
+ *
+ * New in Mesa 2.3.
+ */
+extern void XMesaGarbageCollect( void );
+
+
+
+/*
+ * Return a dithered pixel value.
+ * Input:  c - XMesaContext
+ *         x, y - window coordinate
+ *         red, green, blue, alpha - color components in [0,1]
+ * Return:  pixel value
+ *
+ * New in Mesa 2.3.
+ */
+extern unsigned long XMesaDitherColor( XMesaContext xmesa,
+				       GLint x,
+				       GLint y,
+				       GLfloat red,
+				       GLfloat green,
+				       GLfloat blue,
+				       GLfloat alpha );
+
+
+
+/*
+ * 3Dfx Glide driver only!
+ * Set 3Dfx/Glide full-screen or window rendering mode.
+ * Input:  mode - either XMESA_FX_WINDOW (window rendering mode) or
+ *                XMESA_FX_FULLSCREEN (full-screen rendering mode)
+ * Return:  GL_TRUE if success
+ *          GL_FALSE if invalid mode or if not using 3Dfx driver
+ *
+ * New in Mesa 2.6.
+ */
+extern GLboolean XMesaSetFXmode( GLint mode );
+
+
+
+/*
+ * Reallocate the back/depth/stencil/accum/etc/ buffers associated with
+ * buffer <b> if its size has changed.
+ *
+ * New in Mesa 4.0.2
+ */
+extern void XMesaResizeBuffers( XMesaBuffer b );
+
+
+
+/*
+ * Create a pbuffer.
+ * New in Mesa 4.1
+ */
+extern XMesaBuffer XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap,
+                                      unsigned int width, unsigned int height);
+
+
+
+/*
+ * Texture from Pixmap
+ * New in Mesa 7.1
+ */
+extern void
+XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer,
+                  const int *attrib_list);
+
+extern void
+XMesaReleaseTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer);
+
+
+extern XMesaBuffer
+XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p,
+                               XMesaColormap cmap,
+                               int format, int target, int mipmap);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/src/mesa/drivers/x11/xmesaP.h b/src/mesa/drivers/x11/xmesaP.h
new file mode 100644
index 0000000000..e0a6908228
--- /dev/null
+++ b/src/mesa/drivers/x11/xmesaP.h
@@ -0,0 +1,591 @@
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.1
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef XMESAP_H
+#define XMESAP_H
+
+
+#include "xmesa.h"
+#include "main/mtypes.h"
+#if defined(FX)
+#include "fxmesa.h"
+#include "xm_glide.h"
+#endif
+#ifdef XFree86Server
+#include "xm_image.h"
+#endif
+
+
+extern _glthread_Mutex _xmesa_lock;
+
+extern XMesaBuffer XMesaBufferList;
+
+/* for PF_8R8G8B24 pixel format */
+typedef struct {
+   GLubyte b;
+   GLubyte g;
+   GLubyte r;
+} bgr_t;
+
+
+struct xmesa_renderbuffer;
+
+
+/* Function pointer for clearing color buffers */
+typedef void (*ClearFunc)( GLcontext *ctx, struct xmesa_renderbuffer *xrb,
+                           GLint x, GLint y, GLint width, GLint height );
+
+
+
+
+/** Framebuffer pixel formats */
+enum pixel_format {
+   PF_Index,		/**< Color Index mode */
+   PF_Truecolor,	/**< TrueColor or DirectColor, any depth */
+   PF_Dither_True,	/**< TrueColor with dithering */
+   PF_8A8R8G8B,		/**< 32-bit TrueColor:  8-A, 8-R, 8-G, 8-B bits */
+   PF_8A8B8G8R,		/**< 32-bit TrueColor:  8-A, 8-B, 8-G, 8-R bits */
+   PF_8R8G8B,		/**< 32-bit TrueColor:  8-R, 8-G, 8-B bits */
+   PF_8R8G8B24,		/**< 24-bit TrueColor:  8-R, 8-G, 8-B bits */
+   PF_5R6G5B,		/**< 16-bit TrueColor:  5-R, 6-G, 5-B bits */
+   PF_Dither,		/**< Color-mapped RGB with dither */
+   PF_Lookup,		/**< Color-mapped RGB without dither */
+   PF_HPCR,		/**< HP Color Recovery (ad@lms.be 30/08/95) */
+   PF_1Bit,		/**< monochrome dithering of RGB */
+   PF_Grayscale,	/**< Grayscale or StaticGray */
+   PF_Dither_5R6G5B	/**< 16-bit dithered TrueColor: 5-R, 6-G, 5-B */
+};
+
+
+/**
+ * Visual inforation, derived from GLvisual.
+ * Basically corresponds to an XVisualInfo.
+ */
+struct xmesa_visual {
+   GLvisual mesa_visual;	/* Device independent visual parameters */
+   XMesaDisplay *display;	/* The X11 display */
+#ifdef XFree86Server
+   GLint ColormapEntries;
+   GLint nplanes;
+#else
+   XMesaVisualInfo visinfo;	/* X's visual info (pointer to private copy) */
+   XVisualInfo *vishandle;	/* Only used in fakeglx.c */
+#endif
+   GLint BitsPerPixel;		/* True bits per pixel for XImages */
+
+   GLboolean ximage_flag;	/* Use XImage for back buffer (not pixmap)? */
+
+   enum pixel_format dithered_pf;  /* Pixel format when dithering */
+   enum pixel_format undithered_pf;/* Pixel format when not dithering */
+
+   GLfloat RedGamma;		/* Gamma values, 1.0 is default */
+   GLfloat GreenGamma;
+   GLfloat BlueGamma;
+
+   /* For PF_TRUECOLOR */
+   GLint rshift, gshift, bshift;/* Pixel color component shifts */
+   GLubyte Kernel[16];		/* Dither kernel */
+   unsigned long RtoPixel[512];	/* RGB to pixel conversion */
+   unsigned long GtoPixel[512];
+   unsigned long BtoPixel[512];
+   GLubyte PixelToR[256];	/* Pixel to RGB conversion */
+   GLubyte PixelToG[256];
+   GLubyte PixelToB[256];
+
+   /* For PF_HPCR */
+   short       hpcr_rgbTbl[3][256];
+   GLboolean   hpcr_clear_flag;
+   GLubyte     hpcr_clear_ximage_pattern[2][16];
+   XMesaImage *hpcr_clear_ximage;
+   XMesaPixmap hpcr_clear_pixmap;
+
+   /* For PF_1BIT */
+   int bitFlip;
+};
+
+
+/**
+ * Context info, derived from GLcontext.
+ * Basically corresponds to a GLXContext.
+ */
+struct xmesa_context {
+   GLcontext mesa;		/* the core library context (containment) */
+   XMesaVisual xm_visual;	/* Describes the buffers */
+   XMesaBuffer xm_buffer;	/* current span/point/line/triangle buffer */
+
+   XMesaDisplay *display;	/* == xm_visual->display */
+   GLboolean swapbytes;		/* Host byte order != display byte order? */
+   GLboolean direct;		/* Direct rendering context? */
+
+   enum pixel_format pixelformat;
+
+   GLubyte clearcolor[4];		/* current clearing color */
+   unsigned long clearpixel;		/* current clearing pixel value */
+};
+
+
+/**
+ * Types of X/GLX drawables we might render into.
+ */
+typedef enum {
+   WINDOW,          /* An X window */
+   GLXWINDOW,       /* GLX window */
+   PIXMAP,          /* GLX pixmap */
+   PBUFFER          /* GLX Pbuffer */
+} BufferType;
+
+
+/** Values for db_mode: */
+/*@{*/
+#define BACK_PIXMAP	1
+#define BACK_XIMAGE	2
+/*@}*/
+
+
+/**
+ * An xmesa_renderbuffer represents the back or front color buffer.
+ * For the front color buffer:
+ *    <drawable> is the X window
+ * For the back color buffer:
+ *    Either <ximage> or <pixmap> will be used, never both.
+ * In any case, <drawable> always equals <pixmap>.
+ * For stand-alone Mesa, we could merge <drawable> and <pixmap> into one
+ * field.  We don't do that for the server-side GLcore module because
+ * pixmaps and drawables are different and we'd need a bunch of casts.
+ */
+struct xmesa_renderbuffer
+{
+   struct gl_renderbuffer Base;  /* Base class */
+
+   XMesaBuffer Parent;  /**< The XMesaBuffer this renderbuffer belongs to */
+   XMesaDrawable drawable;	/* Usually the X window ID */
+   XMesaPixmap pixmap;	/* Back color buffer */
+   XMesaImage *ximage;	/* The back buffer, if not using a Pixmap */
+
+   GLubyte *origin1;	/* used for PIXEL_ADDR1 macro */
+   GLint width1;
+   GLushort *origin2;	/* used for PIXEL_ADDR2 macro */
+   GLint width2;
+   GLubyte *origin3;	/* used for PIXEL_ADDR3 macro */
+   GLint width3;
+   GLuint *origin4;	/* used for PIXEL_ADDR4 macro */
+   GLint width4;
+
+   GLint bottom;	/* used for FLIP macro, equals height - 1 */
+
+   ClearFunc clearFunc;
+};
+
+
+/**
+ * Framebuffer information, derived from.
+ * Basically corresponds to a GLXDrawable.
+ */
+struct xmesa_buffer {
+   GLframebuffer mesa_buffer;	/* depth, stencil, accum, etc buffers */
+				/* This MUST BE FIRST! */
+   GLboolean wasCurrent;	/* was ever the current buffer? */
+   XMesaVisual xm_visual;	/* the X/Mesa visual */
+
+   XMesaDisplay *display;
+   BufferType type;             /* window, pixmap, pbuffer or glxwindow */
+
+   GLboolean largestPbuffer;    /**< for pbuffers */
+   GLboolean preservedContents; /**< for pbuffers */
+
+   struct xmesa_renderbuffer *frontxrb; /* front color renderbuffer */
+   struct xmesa_renderbuffer *backxrb;  /* back color renderbuffer */
+
+   XMesaColormap cmap;		/* the X colormap */
+
+   unsigned long selectedEvents;/* for pbuffers only */
+
+   GLint db_mode;		/* 0 = single buffered */
+				/* BACK_PIXMAP = use Pixmap for back buffer */
+				/* BACK_XIMAGE = use XImage for back buffer */
+   GLboolean swAlpha;
+
+   GLuint shm;			/* X Shared Memory extension status:	*/
+				/*    0 = not available			*/
+				/*    1 = XImage support available	*/
+				/*    2 = Pixmap support available too	*/
+#if defined(USE_XSHM) && !defined(XFree86Server)
+   XShmSegmentInfo shminfo;
+#endif
+
+   XMesaImage *rowimage;	/* Used for optimized span writing */
+   XMesaPixmap stipple_pixmap;	/* For polygon stippling */
+   XMesaGC stipple_gc;		/* For polygon stippling */
+
+   XMesaGC gc;			/* scratch GC for span, line, tri drawing */
+   XMesaGC cleargc;		/* GC for clearing the color buffer */
+   XMesaGC swapgc;		/* GC for swapping the color buffers */
+
+   /* The following are here instead of in the XMesaVisual
+    * because they depend on the window's colormap.
+    */
+
+   /* For PF_DITHER, PF_LOOKUP, PF_GRAYSCALE */
+   unsigned long color_table[576];	/* RGB -> pixel value */
+
+   /* For PF_DITHER, PF_LOOKUP, PF_GRAYSCALE */
+   GLubyte pixel_to_r[65536];		/* pixel value -> red */
+   GLubyte pixel_to_g[65536];		/* pixel value -> green */
+   GLubyte pixel_to_b[65536];		/* pixel value -> blue */
+
+   /* Used to do XAllocColor/XFreeColors accounting: */
+   int num_alloced;
+#if defined(XFree86Server)
+   Pixel alloced_colors[256];
+#else
+   unsigned long alloced_colors[256];
+#endif
+
+#if defined( FX )
+   /* For 3Dfx Glide only */
+   GLboolean FXisHackUsable;	/* Can we render into window? */
+   GLboolean FXwindowHack;	/* Are we rendering into a window? */
+   fxMesaContext FXctx;
+#endif
+
+   /* GLX_EXT_texture_from_pixmap */
+   GLint TextureTarget; /** GLX_TEXTURE_1D_EXT, for example */
+   GLint TextureFormat; /** GLX_TEXTURE_FORMAT_RGB_EXT, for example */
+   GLint TextureMipmap; /** 0 or 1 */
+
+   struct xmesa_buffer *Next;	/* Linked list pointer: */
+};
+
+
+/**
+ * If pixelformat==PF_TRUECOLOR:
+ */
+#define PACK_TRUECOLOR( PIXEL, R, G, B )	\
+   PIXEL = xmesa->xm_visual->RtoPixel[R]	\
+         | xmesa->xm_visual->GtoPixel[G]	\
+         | xmesa->xm_visual->BtoPixel[B];	\
+
+
+/**
+ * If pixelformat==PF_TRUEDITHER:
+ */
+#define PACK_TRUEDITHER( PIXEL, X, Y, R, G, B )			\
+{								\
+   int d = xmesa->xm_visual->Kernel[((X)&3) | (((Y)&3)<<2)];	\
+   PIXEL = xmesa->xm_visual->RtoPixel[(R)+d]			\
+         | xmesa->xm_visual->GtoPixel[(G)+d]			\
+         | xmesa->xm_visual->BtoPixel[(B)+d];			\
+}
+
+
+
+/**
+ * If pixelformat==PF_8A8B8G8R:
+ */
+#define PACK_8A8B8G8R( R, G, B, A )	\
+	( ((A) << 24) | ((B) << 16) | ((G) << 8) | (R) )
+
+
+/**
+ * Like PACK_8A8B8G8R() but don't use alpha.  This is usually an acceptable
+ * shortcut.
+ */
+#define PACK_8B8G8R( R, G, B )   ( ((B) << 16) | ((G) << 8) | (R) )
+
+
+
+/**
+ * If pixelformat==PF_8R8G8B:
+ */
+#define PACK_8R8G8B( R, G, B)	 ( ((R) << 16) | ((G) << 8) | (B) )
+
+
+/**
+ * If pixelformat==PF_5R6G5B:
+ */
+#define PACK_5R6G5B( R, G, B)	 ( (((R) & 0xf8) << 8) | (((G) & 0xfc) << 3) | ((B) >> 3) )
+
+
+/**
+ * If pixelformat==PF_8A8R8G8B:
+ */
+#define PACK_8A8R8G8B( R, G, B, A )	\
+	( ((A) << 24) | ((R) << 16) | ((G) << 8) | (B) )
+
+
+
+/**
+ * If pixelformat==PF_DITHER:
+ *
+ * Improved 8-bit RGB dithering code contributed by Bob Mercier
+ * (mercier@hollywood.cinenet.net).  Thanks Bob!
+ */
+#ifdef DITHER666
+# define DITH_R   6
+# define DITH_G   6
+# define DITH_B   6
+# define DITH_MIX(r,g,b)  (((r) * DITH_G + (g)) * DITH_B + (b))
+#else
+# define DITH_R	5
+# define DITH_G	9
+# define DITH_B	5
+# define DITH_MIX(r,g,b)  (((g) << 6) | ((b) << 3) | (r))
+#endif
+#define DITH_DX	4
+#define DITH_DY	4
+#define DITH_N	(DITH_DX * DITH_DY)
+
+#define _dither(C, c, d)   (((unsigned)((DITH_N * (C - 1) + 1) * c + d)) >> 12)
+
+#define MAXC	256
+extern const int xmesa_kernel8[DITH_DY * DITH_DX];
+
+/* Dither for random X,Y */
+#define DITHER_SETUP						\
+	int __d;						\
+	unsigned long *ctable = XMESA_BUFFER(ctx->DrawBuffer)->color_table;
+
+#define DITHER( X, Y, R, G, B )				\
+	(__d = xmesa_kernel8[(((Y)&3)<<2) | ((X)&3)],	\
+	 ctable[DITH_MIX(_dither(DITH_R, (R), __d),	\
+		         _dither(DITH_G, (G), __d),	\
+		         _dither(DITH_B, (B), __d))])
+
+/* Dither for random X, fixed Y */
+#define XDITHER_SETUP(Y)					\
+	int __d;						\
+	unsigned long *ctable = XMESA_BUFFER(ctx->DrawBuffer)->color_table;	\
+	const int *kernel = &xmesa_kernel8[ ((Y)&3) << 2 ];
+
+#define XDITHER( X, R, G, B )				\
+	(__d = kernel[(X)&3],				\
+	ctable[DITH_MIX(_dither(DITH_R, (R), __d),	\
+		        _dither(DITH_G, (G), __d),	\
+		        _dither(DITH_B, (B), __d))])
+
+
+
+/*
+ * Dithering for flat-shaded triangles.  Precompute all 16 possible
+ * pixel values given the triangle's RGB color.  Contributed by Martin Shenk.
+ */
+#define FLAT_DITHER_SETUP( R, G, B )					\
+	GLushort ditherValues[16];					\
+	{								\
+	   unsigned long *ctable = XMESA_BUFFER(ctx->DrawBuffer)->color_table;	\
+	   int msdr = (DITH_N*((DITH_R)-1)+1) * (R);			\
+	   int msdg = (DITH_N*((DITH_G)-1)+1) * (G);			\
+	   int msdb = (DITH_N*((DITH_B)-1)+1) * (B);			\
+	   int i;							\
+	   for (i=0;i<16;i++) {						\
+	      int k = xmesa_kernel8[i];					\
+	      int j = DITH_MIX( (msdr+k)>>12, (msdg+k)>>12, (msdb+k)>>12 );\
+	      ditherValues[i] = (GLushort) ctable[j];			\
+	   }								\
+        }
+
+#define FLAT_DITHER_ROW_SETUP(Y)					\
+	GLushort *ditherRow = ditherValues + ( ((Y)&3) << 2);
+
+#define FLAT_DITHER(X)  ditherRow[(X)&3]
+
+
+
+/**
+ * If pixelformat==PF_LOOKUP:
+ */
+#define _dither_lookup(C, c)   (((unsigned)((DITH_N * (C - 1) + 1) * c)) >> 12)
+
+#define LOOKUP_SETUP						\
+	unsigned long *ctable = XMESA_BUFFER(ctx->DrawBuffer)->color_table
+
+#define LOOKUP( R, G, B )				\
+	ctable[DITH_MIX(_dither_lookup(DITH_R, (R)),	\
+		        _dither_lookup(DITH_G, (G)),	\
+		        _dither_lookup(DITH_B, (B)))]
+
+
+/**
+ * If pixelformat==PF_HPCR:
+ *
+ *      HP Color Recovery dithering               (ad@lms.be 30/08/95)
+ *      HP has on its 8-bit 700-series computers, a feature called
+ *      'Color Recovery'.  This allows near 24-bit output (so they say).
+ *      It is enabled by selecting the 8-bit  TrueColor  visual AND
+ *      corresponding  colormap (see tkInitWindow) AND doing some special
+ *      dither.
+ */
+extern const short xmesa_HPCR_DRGB[3][2][16];
+
+#define DITHER_HPCR( X, Y, R, G, B )					   \
+  ( ((xmesa->xm_visual->hpcr_rgbTbl[0][R] + xmesa_HPCR_DRGB[0][(Y)&1][(X)&15]) & 0xE0)     \
+  |(((xmesa->xm_visual->hpcr_rgbTbl[1][G] + xmesa_HPCR_DRGB[1][(Y)&1][(X)&15]) & 0xE0)>>3) \
+  | ((xmesa->xm_visual->hpcr_rgbTbl[2][B] + xmesa_HPCR_DRGB[2][(Y)&1][(X)&15])>>6)	   \
+  )
+
+
+
+/**
+ * If pixelformat==PF_1BIT:
+ */
+extern const int xmesa_kernel1[16];
+
+#define SETUP_1BIT  int bitFlip = xmesa->xm_visual->bitFlip
+#define DITHER_1BIT( X, Y, R, G, B )	\
+	(( ((int)(R)+(int)(G)+(int)(B)) > xmesa_kernel1[(((Y)&3) << 2) | ((X)&3)] ) ^ bitFlip)
+
+
+
+/**
+ * If pixelformat==PF_GRAYSCALE:
+ */
+#define GRAY_RGB( R, G, B )   XMESA_BUFFER(ctx->DrawBuffer)->color_table[((R) + (G) + (B))/3]
+
+
+
+/**
+ * Converts a GL window Y coord to an X window Y coord:
+ */
+#define YFLIP(XRB, Y)  ((XRB)->bottom - (Y))
+
+
+/**
+ * Return the address of a 1, 2 or 4-byte pixel in the buffer's XImage:
+ * X==0 is left, Y==0 is bottom.
+ */
+#define PIXEL_ADDR1(XRB, X, Y)  \
+   ( (XRB)->origin1 - (Y) * (XRB)->width1 + (X) )
+
+#define PIXEL_ADDR2(XRB, X, Y)  \
+   ( (XRB)->origin2 - (Y) * (XRB)->width2 + (X) )
+
+#define PIXEL_ADDR3(XRB, X, Y)  \
+   ( (bgr_t *) ( (XRB)->origin3 - (Y) * (XRB)->width3 + 3 * (X) ))
+
+#define PIXEL_ADDR4(XRB, X, Y)  \
+   ( (XRB)->origin4 - (Y) * (XRB)->width4 + (X) )
+
+
+
+/*
+ * External functions:
+ */
+
+extern struct xmesa_renderbuffer *
+xmesa_new_renderbuffer(GLcontext *ctx, GLuint name, const GLvisual *visual,
+                       GLboolean backBuffer);
+
+extern void
+xmesa_delete_framebuffer(struct gl_framebuffer *fb);
+
+extern XMesaBuffer
+xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis);
+
+extern unsigned long
+xmesa_color_to_pixel( GLcontext *ctx,
+                      GLubyte r, GLubyte g, GLubyte b, GLubyte a,
+                      GLuint pixelFormat );
+
+extern void
+xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b,
+                      GLuint *width, GLuint *height);
+
+extern void
+xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer);
+
+extern void
+xmesa_init_driver_functions( XMesaVisual xmvisual,
+                             struct dd_function_table *driver );
+
+extern void
+xmesa_update_state( GLcontext *ctx, GLbitfield new_state );
+
+extern void
+xmesa_set_renderbuffer_funcs(struct xmesa_renderbuffer *xrb,
+                             enum pixel_format pixelformat, GLint depth);
+
+extern void
+xmesa_destroy_buffers_on_display(XMesaDisplay *dpy);
+
+
+/**
+ * Using a function instead of an ordinary cast is safer.
+ */
+static INLINE struct xmesa_renderbuffer *
+xmesa_renderbuffer(struct gl_renderbuffer *rb)
+{
+   return (struct xmesa_renderbuffer *) rb;
+}
+
+
+/**
+ * Return pointer to XMesaContext corresponding to a Mesa GLcontext.
+ * Since we're using structure containment, it's just a cast!.
+ */
+static INLINE XMesaContext
+XMESA_CONTEXT(GLcontext *ctx)
+{
+   return (XMesaContext) ctx;
+}
+
+
+/**
+ * Return pointer to XMesaBuffer corresponding to a Mesa GLframebuffer.
+ * Since we're using structure containment, it's just a cast!.
+ */
+static INLINE XMesaBuffer
+XMESA_BUFFER(GLframebuffer *b)
+{
+   return (XMesaBuffer) b;
+}
+
+
+/* Plugged into the software rasterizer.  Try to use internal
+ * swrast-style point, line and triangle functions.
+ */
+extern void xmesa_choose_point( GLcontext *ctx );
+extern void xmesa_choose_line( GLcontext *ctx );
+extern void xmesa_choose_triangle( GLcontext *ctx );
+
+
+extern void xmesa_register_swrast_functions( GLcontext *ctx );
+
+
+
+#define ENABLE_EXT_texure_compression_s3tc 0 /* SW texture compression */
+
+#ifdef XFree86Server
+#define ENABLE_EXT_timer_query 0
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define ENABLE_EXT_timer_query 1 /* should have 64-bit GLuint64EXT */
+#else
+#define ENABLE_EXT_timer_query 0 /* may not have 64-bit GLuint64EXT */
+#endif
+
+
+#define TEST_META_FUNCS 0
+
+
+#endif
diff --git a/src/mesa/drivers/x11/xmesa_x.h b/src/mesa/drivers/x11/xmesa_x.h
new file mode 100644
index 0000000000..865bab4313
--- /dev/null
+++ b/src/mesa/drivers/x11/xmesa_x.h
@@ -0,0 +1,86 @@
+
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <kevin@precisioninsight.com>
+ *
+ * When we're building the XMesa driver for stand-alone Mesa we
+ * include this file when building the xm_*.c files.
+ * We need to define some types and macros differently when building
+ * in the Xserver vs. stand-alone Mesa.
+ */
+
+#ifndef _XMESA_X_H_
+#define _XMESA_X_H_
+
+typedef Display      XMesaDisplay;
+typedef Pixmap       XMesaPixmap;
+typedef Colormap     XMesaColormap;
+typedef Drawable     XMesaDrawable;
+typedef Window       XMesaWindow;
+typedef GC           XMesaGC;
+typedef XVisualInfo *XMesaVisualInfo;
+typedef XImage       XMesaImage;
+typedef XPoint       XMesaPoint;
+typedef XColor       XMesaColor;
+
+#define XMesaDestroyImage      XDestroyImage
+
+#define XMesaPutPixel          XPutPixel
+#define XMesaGetPixel          XGetPixel
+
+#define XMesaSetForeground     XSetForeground
+#define XMesaSetBackground     XSetBackground
+#define XMesaSetPlaneMask      XSetPlaneMask
+#define XMesaSetFunction       XSetFunction
+#define XMesaSetFillStyle      XSetFillStyle
+#define XMesaSetTile           XSetTile
+
+#define XMesaDrawPoint         XDrawPoint
+#define XMesaDrawPoints        XDrawPoints
+#define XMesaDrawLine          XDrawLine
+#define XMesaFillRectangle     XFillRectangle
+#define XMesaGetImage          XGetImage
+#define XMesaPutImage          XPutImage
+#define XMesaCopyArea          XCopyArea
+
+#define XMesaCreatePixmap      XCreatePixmap
+#define XMesaFreePixmap        XFreePixmap
+#define XMesaFreeGC            XFreeGC
+
+#define GET_COLORMAP_SIZE(__v)  __v->visinfo->colormap_size
+#define GET_REDMASK(__v)        __v->mesa_visual.redMask
+#define GET_GREENMASK(__v)      __v->mesa_visual.greenMask
+#define GET_BLUEMASK(__v)       __v->mesa_visual.blueMask
+#define GET_VISUAL_DEPTH(__v)   __v->visinfo->depth
+#define GET_BLACK_PIXEL(__v)    BlackPixel(__v->display, __v->mesa_visual.screen)
+#define CHECK_BYTE_ORDER(__v)   host_byte_order()==ImageByteOrder(__v->display)
+#define CHECK_FOR_HPCR(__v)     XInternAtom(__v->display, "_HP_RGB_SMOOTH_MAP_LIST", True)
+
+#endif
diff --git a/src/mesa/drivers/x11/xmesa_xf86.h b/src/mesa/drivers/x11/xmesa_xf86.h
new file mode 100644
index 0000000000..10f93c3ab6
--- /dev/null
+++ b/src/mesa/drivers/x11/xmesa_xf86.h
@@ -0,0 +1,198 @@
+
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <kevin@precisioninsight.com>
+ *
+ * When we're building the XMesa driver for use in the X server (as the
+ * indirect render) we include this file when building the xm_*.c files.
+ * We need to define some types and macros differently when building
+ * in the Xserver vs. stand-alone Mesa.
+ */
+
+#ifndef _XMESA_XF86_H_
+#define _XMESA_XF86_H_
+
+#include "GL/glxtokens.h"
+#include "scrnintstr.h"
+#include "pixmapstr.h"
+#include "gcstruct.h"
+#include "servermd.h"
+
+
+typedef struct _XMesaImageRec {
+    int width, height;
+    char *data;
+    int bytes_per_line; /* Padded to 32 bits */
+    int bits_per_pixel;
+} XMesaImage;
+
+typedef ScreenRec   XMesaDisplay;
+typedef PixmapPtr   XMesaPixmap;
+typedef ColormapPtr XMesaColormap;
+typedef DrawablePtr XMesaDrawable;
+typedef WindowPtr   XMesaWindow;
+typedef GCPtr       XMesaGC;
+typedef VisualPtr   XMesaVisualInfo;
+typedef DDXPointRec XMesaPoint;
+typedef xColorItem  XMesaColor;
+
+#define XMesaSetGeneric(__d,__gc,__val,__mask) \
+do { \
+    CARD32 __v[1]; \
+    (void) __d; \
+    __v[0] = __val; \
+    dixChangeGC(NullClient, __gc, __mask, __v, NULL); \
+} while (0)
+
+#define XMesaSetGenericPtr(__d,__gc,__pval,__mask) \
+do { \
+    ChangeGCVal __v[1]; \
+    (void) __d; \
+    __v[0].ptr = __pval; \
+    dixChangeGC(NullClient, __gc, __mask, NULL, __v); \
+} while (0)
+
+#define XMesaSetForeground(d,gc,v) XMesaSetGeneric(d,gc,v,GCForeground)
+#define XMesaSetBackground(d,gc,v) XMesaSetGeneric(d,gc,v,GCBackground)
+#define XMesaSetPlaneMask(d,gc,v)  XMesaSetGeneric(d,gc,v,GCPlaneMask)
+#define XMesaSetFunction(d,gc,v)   XMesaSetGeneric(d,gc,v,GCFunction)
+#define XMesaSetFillStyle(d,gc,v)  XMesaSetGeneric(d,gc,v,GCFillStyle)
+
+#define XMesaSetTile(d,gc,v)       XMesaSetGenericPtr(d,gc,v,GCTile)
+
+#define XMesaDrawPoint(__d,__b,__gc,__x,__y) \
+do { \
+    XMesaPoint __p[1]; \
+    (void) __d; \
+    __p[0].x = __x; \
+    __p[0].y = __y; \
+    ValidateGC(__b, __gc); \
+    (*gc->ops->PolyPoint)(__b, __gc, CoordModeOrigin, 1, __p); \
+} while (0)
+
+#define XMesaDrawPoints(__d,__b,__gc,__p,__n,__m) \
+do { \
+    (void) __d; \
+    ValidateGC(__b, __gc); \
+    (*gc->ops->PolyPoint)(__b, __gc, __m, __n, __p); \
+} while (0)
+
+#define XMesaDrawLine(__d, __b, __gc, __x0, __y0, __x1, __y1) \
+do { \
+    XMesaPoint __p[2]; \
+    (void) __d; \
+    __p[0].x = __x0; \
+    __p[0].y = __y0; \
+    __p[1].x = __x1; \
+    __p[1].y = __y1; \
+    ValidateGC(__b, __gc); \
+    (*gc->ops->PolyLines)(__b, __gc, CoordModeOrigin, 2, __p); \
+} while (0)
+
+#define XMesaFillRectangle(__d,__b,__gc,__x,__y,__w,__h) \
+do { \
+    xRectangle __r[1]; \
+    (void) __d; \
+    ValidateGC((DrawablePtr)__b, __gc); \
+    __r[0].x = __x; \
+    __r[0].y = __y; \
+    __r[0].width = __w; \
+    __r[0].height = __h; \
+    (*__gc->ops->PolyFillRect)((DrawablePtr)__b, __gc, 1, __r); \
+} while (0)
+
+static _X_INLINE XMesaImage *XMesaGetImage(XMesaDisplay *dpy, PixmapPtr p, int x,
+					int y, unsigned int width,
+					unsigned int height,
+					unsigned long plane_mask, int format)
+{
+    XMesaImage *img = Xcalloc(sizeof(*img));
+
+    img->width = p->drawable.width;
+    img->height = p->drawable.height;
+    img->bits_per_pixel = p->drawable.bitsPerPixel;
+    img->bytes_per_line = PixmapBytePad(width, p->drawable.depth);
+    img->data = malloc(height * img->bytes_per_line);
+
+    /* Assumes: Images are always in ZPixmap format */
+    (*p->drawable.pScreen->GetImage)(&p->drawable, x, y, width, height,
+				     plane_mask, ZPixmap, img->data);
+
+    return img;
+}
+
+#define XMesaPutImage(__d,__b,__gc,__i,__sx,__sy,__x,__y,__w,__h) \
+do { \
+    /* Assumes: Images are always in ZPixmap format */ \
+    (void) __d; \
+    ASSERT(!__sx && !__sy); /* The SubImage case */     \
+    ValidateGC(__b, __gc); \
+    (*__gc->ops->PutImage)(__b, __gc, ((XMesaDrawable)(__b))->depth, \
+			   __x, __y, __w, __h, 0, ZPixmap, \
+			   ((XMesaImage *)(__i))->data); \
+} while (0)
+
+#define XMesaCopyArea(__d,__sb,__db,__gc,__sx,__sy,__w,__h,__x,__y) \
+do { \
+    (void) __d; \
+    ValidateGC(__db, __gc); \
+    (*__gc->ops->CopyArea)((DrawablePtr)__sb, __db, __gc, \
+			   __sx, __sy, __w, __h, __x, __y); \
+} while (0)
+
+
+/* CreatePixmap returns a PixmapPtr; so, it cannot be inside braces */
+#ifdef CREATE_PIXMAP_USAGE_SCRATCH
+#define XMesaCreatePixmap(__d,__b,__w,__h,__depth) \
+  (*__d->CreatePixmap)(__d, __w, __h, __depth, 0)
+#else
+#define XMesaCreatePixmap(__d,__b,__w,__h,__depth) \
+    (*__d->CreatePixmap)(__d, __w, __h, __depth)
+#endif
+
+#define XMesaFreePixmap(__d,__b) \
+    (*__d->DestroyPixmap)(__b)
+
+#define XMesaFreeGC(__d,__gc) \
+do { \
+    (void) __d; \
+    FreeScratchGC(__gc); \
+} while (0)
+
+#define GET_COLORMAP_SIZE(__v)  __v->ColormapEntries
+#define GET_REDMASK(__v)        __v->mesa_visual.redMask
+#define GET_GREENMASK(__v)      __v->mesa_visual.greenMask
+#define GET_BLUEMASK(__v)       __v->mesa_visual.blueMask
+#define GET_VISUAL_DEPTH(__v)   __v->nplanes
+#define GET_BLACK_PIXEL(__v)    __v->display->blackPixel
+#define CHECK_BYTE_ORDER(__v)   GL_TRUE
+#define CHECK_FOR_HPCR(__v)     GL_FALSE
+
+#endif